2 * Job control and execution for Storage Daemon
10 Copyright (C) 2000-2006 Kern Sibbald
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License
14 version 2 as amended with additional clauses defined in the
15 file LICENSE in the main source directory.
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 the file LICENSE for additional details.
27 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
29 /* Imported variables */
30 extern uint32_t VolSessionTime;
32 /* Imported functions */
33 extern uint32_t newVolSessionId();
34 extern bool do_mac(JCR *jcr);
36 /* Requests from the Director daemon */
37 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
38 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
39 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
42 /* Responses sent to Director daemon */
43 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
44 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
45 //static char OK_query[] = "3001 OK query\n";
46 //static char NO_query[] = "3918 Query failed\n";
47 //static char BAD_query[] = "3917 Bad query command: %s\n";
50 * Director requests us to start a job
51 * Basic tasks done here:
52 * - We pickup the JobId to be run from the Director.
53 * - We pickup the device, media, and pool from the Director
54 * - Wait for a connection from the File Daemon (FD)
55 * - Accept commands from the FD (i.e. run the job)
56 * - Return when the connection is terminated or
59 bool job_cmd(JCR *jcr)
63 BSOCK *dir = jcr->dir_bsock;
64 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
65 int JobType, level, spool_attributes, no_attributes, spool_data;
66 int write_part_after_job, PreferMountedVols;
71 * Get JobId and permissions from Director
73 Dmsg1(100, "<dird: %s", dir->msg);
74 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
76 &JobType, &level, fileset_name.c_str(), &no_attributes,
77 &spool_attributes, fileset_md5.c_str(), &spool_data,
78 &write_part_after_job, &PreferMountedVols);
80 pm_strcpy(jcr->errmsg, dir->msg);
81 bnet_fsend(dir, BAD_job, stat, jcr->errmsg);
82 Dmsg1(100, ">dird: %s", dir->msg);
83 set_jcr_job_status(jcr, JS_ErrorTerminated);
87 * Since this job could be rescheduled, we
88 * check to see if we have it already. If so
89 * free the old jcr and use the new one.
91 ojcr = get_jcr_by_full_name(job.c_str());
92 if (ojcr && !ojcr->authenticated) {
93 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
97 jcr->VolSessionId = newVolSessionId();
98 jcr->VolSessionTime = VolSessionTime;
99 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
100 unbash_spaces(job_name);
101 jcr->job_name = get_pool_memory(PM_NAME);
102 pm_strcpy(jcr->job_name, job_name);
103 unbash_spaces(client_name);
104 jcr->client_name = get_pool_memory(PM_NAME);
105 pm_strcpy(jcr->client_name, client_name);
106 unbash_spaces(fileset_name);
107 jcr->fileset_name = get_pool_memory(PM_NAME);
108 pm_strcpy(jcr->fileset_name, fileset_name);
109 jcr->JobType = JobType;
110 jcr->JobLevel = level;
111 jcr->no_attributes = no_attributes;
112 jcr->spool_attributes = spool_attributes;
113 jcr->spool_data = spool_data;
114 jcr->write_part_after_job = write_part_after_job;
115 jcr->fileset_md5 = get_pool_memory(PM_NAME);
116 pm_strcpy(jcr->fileset_md5, fileset_md5);
117 jcr->PreferMountedVols = PreferMountedVols;
119 jcr->authenticated = false;
122 * Pass back an authorization key for the File daemon
124 make_session_key(auth_key, NULL, 1);
125 bnet_fsend(dir, OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
126 Dmsg1(100, ">dird: %s", dir->msg);
127 jcr->sd_auth_key = bstrdup(auth_key);
128 memset(auth_key, 0, sizeof(auth_key));
129 generate_daemon_event(jcr, "JobStart");
133 bool run_cmd(JCR *jcr)
137 struct timespec timeout;
140 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
141 /* The following jobs don't need the FD */
142 switch (jcr->JobType) {
146 jcr->authenticated = true;
151 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
152 dir_send_job_status(jcr);
154 gettimeofday(&tv, &tz);
155 timeout.tv_nsec = tv.tv_usec * 1000;
156 timeout.tv_sec = tv.tv_sec + 30 * 60; /* wait 30 minutes */
158 Dmsg1(100, "%s waiting on FD to contact SD\n", jcr->Job);
160 * Wait for the File daemon to contact us to start the Job,
161 * when he does, we will be released, unless the 30 minutes
165 for ( ;!job_canceled(jcr); ) {
166 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
167 if (errstat == 0 || errstat == ETIMEDOUT) {
173 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
175 if (jcr->authenticated && !job_canceled(jcr)) {
176 Dmsg1(100, "Running job %s\n", jcr->Job);
177 run_job(jcr); /* Run the job */
183 * After receiving a connection (in job.c) if it is
184 * from the File daemon, this routine is called.
186 void handle_filed_connection(BSOCK *fd, char *job_name)
190 bmicrosleep(0, 50000); /* wait 50 millisecs */
191 if (!(jcr=get_jcr_by_full_name(job_name))) {
192 Jmsg1(NULL, M_FATAL, 0, _("Job name not found: %s\n"), job_name);
193 Dmsg1(100, "Job name not found: %s\n", job_name);
197 jcr->file_bsock = fd;
198 jcr->file_bsock->jcr = jcr;
200 Dmsg1(110, "Found Job %s\n", job_name);
202 if (jcr->authenticated) {
203 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
204 jcr->JobId, jcr->Job);
210 * Authenticate the File daemon
212 if (jcr->authenticated || !authenticate_filed(jcr)) {
213 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
214 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
216 jcr->authenticated = true;
217 Dmsg1(110, "OK Authentication Job %s\n", jcr->Job);
220 if (!jcr->authenticated) {
221 set_jcr_job_status(jcr, JS_ErrorTerminated);
223 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
231 * Query Device command from Director
232 * Sends Storage Daemon's information on the device to the
233 * caller (presumably the Director).
234 * This command always returns "true" so that the line is
235 * not closed on an error.
238 bool query_cmd(JCR *jcr)
240 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
241 BSOCK *dir = jcr->dir_bsock;
243 AUTOCHANGER *changer;
246 Dmsg1(100, "Query_cmd: %s", dir->msg);
247 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
248 Dmsg1(100, "<dird: %s\n", dir->msg);
250 unbash_spaces(dev_name);
251 foreach_res(device, R_DEVICE) {
252 /* Find resource, and make sure we were able to open it */
253 if (fnmatch(dev_name.c_str(), device->hdr.name, 0) == 0) {
255 device->dev = init_dev(jcr, device);
260 ok = dir_update_device(jcr, device->dev);
262 ok = bnet_fsend(dir, OK_query);
264 bnet_fsend(dir, NO_query);
269 foreach_res(changer, R_AUTOCHANGER) {
270 /* Find resource, and make sure we were able to open it */
271 if (fnmatch(dev_name.c_str(), changer->hdr.name, 0) == 0) {
272 if (!changer->device || changer->device->size() == 0) {
273 continue; /* no devices */
275 ok = dir_update_changer(jcr, changer);
277 ok = bnet_fsend(dir, OK_query);
279 bnet_fsend(dir, NO_query);
284 /* If we get here, the device/autochanger was not found */
285 unbash_spaces(dir->msg);
286 pm_strcpy(jcr->errmsg, dir->msg);
287 bnet_fsend(dir, NO_device, dev_name.c_str());
288 Dmsg1(100, ">dird: %s\n", dir->msg);
290 unbash_spaces(dir->msg);
291 pm_strcpy(jcr->errmsg, dir->msg);
292 bnet_fsend(dir, BAD_query, jcr->errmsg);
293 Dmsg1(100, ">dird: %s\n", dir->msg);
303 * Destroy the Job Control Record and associated
304 * resources (sockets).
306 void stored_free_jcr(JCR *jcr)
308 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
309 if (jcr->file_bsock) {
310 bnet_close(jcr->file_bsock);
311 jcr->file_bsock = NULL;
314 free_pool_memory(jcr->job_name);
316 if (jcr->client_name) {
317 free_memory(jcr->client_name);
318 jcr->client_name = NULL;
320 if (jcr->fileset_name) {
321 free_memory(jcr->fileset_name);
323 if (jcr->fileset_md5) {
324 free_memory(jcr->fileset_md5);
330 if (jcr->RestoreBootstrap) {
331 unlink(jcr->RestoreBootstrap);
332 free_pool_memory(jcr->RestoreBootstrap);
333 jcr->RestoreBootstrap = NULL;
335 if (jcr->next_dev || jcr->prev_dev) {
336 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
338 pthread_cond_destroy(&jcr->job_start_wait);
348 free_dcr(jcr->read_dcr);
349 jcr->read_dcr = NULL;
352 if (jcr->read_store) {
354 foreach_alist(store, jcr->read_store) {
355 delete store->device;
358 delete jcr->read_store;
359 jcr->read_store = NULL;
361 if (jcr->write_store) {
363 foreach_alist(store, jcr->write_store) {
364 delete store->device;
367 delete jcr->write_store;
368 jcr->write_store = NULL;