2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2010 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
38 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
40 /* Imported variables */
41 extern uint32_t VolSessionTime;
43 /* Imported functions */
44 extern uint32_t newVolSessionId();
45 extern bool do_mac(JCR *jcr);
47 /* Requests from the Director daemon */
48 /* Added in 3.1.4 14Sep09 KES */
49 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
50 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
51 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s "
53 static char oldjobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
54 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
55 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s\n";
56 static char oldoldjobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
57 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
58 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
62 /* Responses sent to Director daemon */
63 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
64 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
65 //static char OK_query[] = "3001 OK query\n";
66 //static char NO_query[] = "3918 Query failed\n";
67 //static char BAD_query[] = "3917 Bad query command: %s\n";
70 * Director requests us to start a job
71 * Basic tasks done here:
72 * - We pickup the JobId to be run from the Director.
73 * - We pickup the device, media, and pool from the Director
74 * - Wait for a connection from the File Daemon (FD)
75 * - Accept commands from the FD (i.e. run the job)
76 * - Return when the connection is terminated or
79 bool job_cmd(JCR *jcr)
85 BSOCK *dir = jcr->dir_bsock;
86 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
87 int JobType, level, spool_attributes, no_attributes, spool_data;
88 int write_part_after_job, PreferMountedVols;
94 * Get JobId and permissions from Director
96 Dmsg1(100, "<dird: %s", dir->msg);
97 bstrncpy(spool_size, "0", sizeof(spool_size));
98 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
100 &JobType, &level, fileset_name.c_str(), &no_attributes,
101 &spool_attributes, fileset_md5.c_str(), &spool_data,
102 &write_part_after_job, &PreferMountedVols, spool_size,
105 /* Try old version */
106 stat = sscanf(dir->msg, oldjobcmd, &JobId, job.c_str(), job_name.c_str(),
108 &JobType, &level, fileset_name.c_str(), &no_attributes,
109 &spool_attributes, fileset_md5.c_str(), &spool_data,
110 &write_part_after_job, &PreferMountedVols, spool_size);
112 /* Try oldold version */
113 stat = sscanf(dir->msg, oldoldjobcmd, &JobId, job.c_str(), job_name.c_str(),
115 &JobType, &level, fileset_name.c_str(), &no_attributes,
116 &spool_attributes, fileset_md5.c_str(), &spool_data,
117 &write_part_after_job, &PreferMountedVols);
119 pm_strcpy(jcr->errmsg, dir->msg);
120 dir->fsend(BAD_job, stat, jcr->errmsg);
121 Dmsg1(100, ">dird: %s", dir->msg);
122 set_jcr_job_status(jcr, JS_ErrorTerminated);
128 * Since this job could be rescheduled, we
129 * check to see if we have it already. If so
130 * free the old jcr and use the new one.
132 ojcr = get_jcr_by_full_name(job.c_str());
133 if (ojcr && !ojcr->authenticated) {
134 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
138 Dmsg2(800, "Start JobId=%d %p\n", JobId, jcr);
139 jcr->VolSessionId = newVolSessionId();
140 jcr->VolSessionTime = VolSessionTime;
141 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
142 unbash_spaces(job_name);
143 jcr->job_name = get_pool_memory(PM_NAME);
144 pm_strcpy(jcr->job_name, job_name);
145 unbash_spaces(client_name);
146 jcr->client_name = get_pool_memory(PM_NAME);
147 pm_strcpy(jcr->client_name, client_name);
148 unbash_spaces(fileset_name);
149 jcr->fileset_name = get_pool_memory(PM_NAME);
150 pm_strcpy(jcr->fileset_name, fileset_name);
151 jcr->set_JobType(JobType);
152 jcr->set_JobLevel(level);
153 jcr->no_attributes = no_attributes;
154 jcr->spool_attributes = spool_attributes;
155 jcr->spool_data = spool_data;
156 jcr->spool_size = str_to_int64(spool_size);
157 jcr->write_part_after_job = write_part_after_job;
158 jcr->fileset_md5 = get_pool_memory(PM_NAME);
159 pm_strcpy(jcr->fileset_md5, fileset_md5);
160 jcr->PreferMountedVols = PreferMountedVols;
163 jcr->authenticated = false;
166 * Pass back an authorization key for the File daemon
168 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
169 make_session_key(auth_key, seed, 1);
170 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
171 Dmsg2(50, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
172 jcr->sd_auth_key = bstrdup(auth_key);
173 memset(auth_key, 0, sizeof(auth_key));
174 generate_daemon_event(jcr, "JobStart");
178 bool run_cmd(JCR *jcr)
182 struct timespec timeout;
186 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
188 /* If we do not need the FD, we are doing a migrate, copy, or virtual
191 if (jcr->no_client_used()) {
196 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
197 dir_send_job_status(jcr);
199 gettimeofday(&tv, &tz);
200 timeout.tv_nsec = tv.tv_usec * 1000;
201 timeout.tv_sec = tv.tv_sec + me->client_wait;
203 Dmsg3(50, "%s waiting %d sec for FD to contact SD key=%s\n",
204 jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
205 Dmsg2(800, "Wait FD for jid=%d %p\n", jcr->JobId, jcr);
208 * Wait for the File daemon to contact us to start the Job,
209 * when he does, we will be released, unless the 30 minutes
213 while ( !jcr->authenticated && !job_canceled(jcr) ) {
214 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
215 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
218 Dmsg1(800, "=== Auth cond errstat=%d\n", errstat);
220 Dmsg3(50, "Auth=%d canceled=%d errstat=%d\n", jcr->authenticated,
221 job_canceled(jcr), errstat);
223 Dmsg2(800, "Auth fail or cancel for jid=%d %p\n", jcr->JobId, jcr);
225 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
227 if (jcr->authenticated && !job_canceled(jcr)) {
228 Dmsg2(800, "Running jid=%d %p\n", jcr->JobId, jcr);
229 run_job(jcr); /* Run the job */
231 Dmsg2(800, "Done jid=%d %p\n", jcr->JobId, jcr);
236 * After receiving a connection (in dircmd.c) if it is
237 * from the File daemon, this routine is called.
239 void handle_filed_connection(BSOCK *fd, char *job_name)
244 * With the following bmicrosleep on, running the
245 * SD under the debugger fails.
247 // bmicrosleep(0, 50000); /* wait 50 millisecs */
248 if (!(jcr=get_jcr_by_full_name(job_name))) {
249 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
250 Dmsg1(3, "**** Job \"%s\" not found.\n", job_name);
256 Dmsg1(50, "Found Job %s\n", job_name);
258 if (jcr->authenticated) {
259 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
260 (uint32_t)jcr->JobId, jcr->Job);
261 Dmsg2(50, "Hey!!!! JobId %u Job %s already authenticated.\n",
262 (uint32_t)jcr->JobId, jcr->Job);
268 jcr->file_bsock = fd;
269 jcr->file_bsock->set_jcr(jcr);
272 * Authenticate the File daemon
274 if (jcr->authenticated || !authenticate_filed(jcr)) {
275 Dmsg1(50, "Authentication failed Job %s\n", jcr->Job);
276 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
278 jcr->authenticated = true;
279 Dmsg2(50, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
282 if (!jcr->authenticated) {
283 set_jcr_job_status(jcr, JS_ErrorTerminated);
285 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
293 * Query Device command from Director
294 * Sends Storage Daemon's information on the device to the
295 * caller (presumably the Director).
296 * This command always returns "true" so that the line is
297 * not closed on an error.
300 bool query_cmd(JCR *jcr)
302 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
303 BSOCK *dir = jcr->dir_bsock;
305 AUTOCHANGER *changer;
308 Dmsg1(100, "Query_cmd: %s", dir->msg);
309 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
310 Dmsg1(100, "<dird: %s\n", dir->msg);
312 unbash_spaces(dev_name);
313 foreach_res(device, R_DEVICE) {
314 /* Find resource, and make sure we were able to open it */
315 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
317 device->dev = init_dev(jcr, device);
322 ok = dir_update_device(jcr, device->dev);
324 ok = dir->fsend(OK_query);
326 dir->fsend(NO_query);
331 foreach_res(changer, R_AUTOCHANGER) {
332 /* Find resource, and make sure we were able to open it */
333 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
334 if (!changer->device || changer->device->size() == 0) {
335 continue; /* no devices */
337 ok = dir_update_changer(jcr, changer);
339 ok = dir->fsend(OK_query);
341 dir->fsend(NO_query);
346 /* If we get here, the device/autochanger was not found */
347 unbash_spaces(dir->msg);
348 pm_strcpy(jcr->errmsg, dir->msg);
349 dir->fsend(NO_device, dev_name.c_str());
350 Dmsg1(100, ">dird: %s\n", dir->msg);
352 unbash_spaces(dir->msg);
353 pm_strcpy(jcr->errmsg, dir->msg);
354 dir->fsend(BAD_query, jcr->errmsg);
355 Dmsg1(100, ">dird: %s\n", dir->msg);
365 * Destroy the Job Control Record and associated
366 * resources (sockets).
368 void stored_free_jcr(JCR *jcr)
370 Dmsg2(800, "End Job JobId=%u %p\n", jcr->JobId, jcr);
371 if (jcr->dir_bsock) {
372 Dmsg2(800, "Send terminate jid=%d %p\n", jcr->JobId, jcr);
373 jcr->dir_bsock->signal(BNET_EOD);
374 jcr->dir_bsock->signal(BNET_TERMINATE);
376 if (jcr->file_bsock) {
377 jcr->file_bsock->close();
378 jcr->file_bsock = NULL;
381 free_pool_memory(jcr->job_name);
383 if (jcr->client_name) {
384 free_memory(jcr->client_name);
385 jcr->client_name = NULL;
387 if (jcr->fileset_name) {
388 free_memory(jcr->fileset_name);
390 if (jcr->fileset_md5) {
391 free_memory(jcr->fileset_md5);
397 /* Free any restore volume list created */
398 free_restore_volume_list(jcr);
399 if (jcr->RestoreBootstrap) {
400 unlink(jcr->RestoreBootstrap);
401 free_pool_memory(jcr->RestoreBootstrap);
402 jcr->RestoreBootstrap = NULL;
404 if (jcr->next_dev || jcr->prev_dev) {
405 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
407 pthread_cond_destroy(&jcr->job_start_wait);
413 /* Avoid a double free */
414 if (jcr->dcr == jcr->read_dcr) {
415 jcr->read_dcr = NULL;
422 free_dcr(jcr->read_dcr);
423 jcr->read_dcr = NULL;
426 if (jcr->read_store) {
428 foreach_alist(store, jcr->read_store) {
429 delete store->device;
432 delete jcr->read_store;
433 jcr->read_store = NULL;
435 if (jcr->write_store) {
437 foreach_alist(store, jcr->write_store) {
438 delete store->device;
441 delete jcr->write_store;
442 jcr->write_store = NULL;
447 write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));