2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
38 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
40 /* Imported variables */
41 extern uint32_t VolSessionTime;
43 /* Imported functions */
44 extern uint32_t newVolSessionId();
45 extern bool do_mac(JCR *jcr);
47 /* Requests from the Director daemon */
48 /* Added in 3.1.4 14Sep09 KES */
49 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
50 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
51 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s "
53 static char oldjobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
54 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
55 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s\n";
56 static char oldoldjobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
57 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
58 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
62 /* Responses sent to Director daemon */
63 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
64 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
65 //static char OK_query[] = "3001 OK query\n";
66 //static char NO_query[] = "3918 Query failed\n";
67 //static char BAD_query[] = "3917 Bad query command: %s\n";
70 * Director requests us to start a job
71 * Basic tasks done here:
72 * - We pickup the JobId to be run from the Director.
73 * - We pickup the device, media, and pool from the Director
74 * - Wait for a connection from the File Daemon (FD)
75 * - Accept commands from the FD (i.e. run the job)
76 * - Return when the connection is terminated or
79 bool job_cmd(JCR *jcr)
85 BSOCK *dir = jcr->dir_bsock;
86 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
87 int JobType, level, spool_attributes, no_attributes, spool_data;
88 int write_part_after_job, PreferMountedVols;
94 * Get JobId and permissions from Director
96 Dmsg1(100, "<dird: %s", dir->msg);
97 bstrncpy(spool_size, "0", sizeof(spool_size));
98 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
100 &JobType, &level, fileset_name.c_str(), &no_attributes,
101 &spool_attributes, fileset_md5.c_str(), &spool_data,
102 &write_part_after_job, &PreferMountedVols, spool_size,
105 /* Try old version */
106 stat = sscanf(dir->msg, oldjobcmd, &JobId, job.c_str(), job_name.c_str(),
108 &JobType, &level, fileset_name.c_str(), &no_attributes,
109 &spool_attributes, fileset_md5.c_str(), &spool_data,
110 &write_part_after_job, &PreferMountedVols, spool_size);
112 /* Try oldold version */
113 stat = sscanf(dir->msg, oldoldjobcmd, &JobId, job.c_str(), job_name.c_str(),
115 &JobType, &level, fileset_name.c_str(), &no_attributes,
116 &spool_attributes, fileset_md5.c_str(), &spool_data,
117 &write_part_after_job, &PreferMountedVols);
119 pm_strcpy(jcr->errmsg, dir->msg);
120 dir->fsend(BAD_job, stat, jcr->errmsg);
121 Dmsg1(100, ">dird: %s", dir->msg);
122 set_jcr_job_status(jcr, JS_ErrorTerminated);
128 * Since this job could be rescheduled, we
129 * check to see if we have it already. If so
130 * free the old jcr and use the new one.
132 ojcr = get_jcr_by_full_name(job.c_str());
133 if (ojcr && !ojcr->authenticated) {
134 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
138 jcr->VolSessionId = newVolSessionId();
139 jcr->VolSessionTime = VolSessionTime;
140 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
141 unbash_spaces(job_name);
142 jcr->job_name = get_pool_memory(PM_NAME);
143 pm_strcpy(jcr->job_name, job_name);
144 unbash_spaces(client_name);
145 jcr->client_name = get_pool_memory(PM_NAME);
146 pm_strcpy(jcr->client_name, client_name);
147 unbash_spaces(fileset_name);
148 jcr->fileset_name = get_pool_memory(PM_NAME);
149 pm_strcpy(jcr->fileset_name, fileset_name);
150 jcr->set_JobType(JobType);
151 jcr->set_JobLevel(level);
152 jcr->no_attributes = no_attributes;
153 jcr->spool_attributes = spool_attributes;
154 jcr->spool_data = spool_data;
155 jcr->spool_size = str_to_int64(spool_size);
156 jcr->write_part_after_job = write_part_after_job;
157 jcr->fileset_md5 = get_pool_memory(PM_NAME);
158 pm_strcpy(jcr->fileset_md5, fileset_md5);
159 jcr->PreferMountedVols = PreferMountedVols;
162 jcr->authenticated = false;
165 * Pass back an authorization key for the File daemon
167 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
168 make_session_key(auth_key, seed, 1);
169 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
170 Dmsg2(50, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
171 jcr->sd_auth_key = bstrdup(auth_key);
172 memset(auth_key, 0, sizeof(auth_key));
173 generate_daemon_event(jcr, "JobStart");
177 bool run_cmd(JCR *jcr)
181 struct timespec timeout;
185 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
187 /* If we do not need the FD, we are doing a migrate, copy, or virtual
190 if (jcr->no_client_used()) {
195 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
196 dir_send_job_status(jcr);
198 gettimeofday(&tv, &tz);
199 timeout.tv_nsec = tv.tv_usec * 1000;
200 timeout.tv_sec = tv.tv_sec + me->client_wait;
202 Dmsg3(50, "%s waiting %d sec for FD to contact SD key=%s\n",
203 jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
206 * Wait for the File daemon to contact us to start the Job,
207 * when he does, we will be released, unless the 30 minutes
211 while ( !jcr->authenticated && !job_canceled(jcr) ) {
212 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
213 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
217 Dmsg3(50, "Auth=%d canceled=%d errstat=%d\n", jcr->authenticated,
218 job_canceled(jcr), errstat);
221 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
223 if (jcr->authenticated && !job_canceled(jcr)) {
224 Dmsg1(50, "Running job %s\n", jcr->Job);
225 run_job(jcr); /* Run the job */
231 * After receiving a connection (in dircmd.c) if it is
232 * from the File daemon, this routine is called.
234 void handle_filed_connection(BSOCK *fd, char *job_name)
239 * With the following bmicrosleep on, running the
240 * SD under the debugger fails.
242 // bmicrosleep(0, 50000); /* wait 50 millisecs */
243 if (!(jcr=get_jcr_by_full_name(job_name))) {
244 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
245 Dmsg1(3, "**** Job \"%s\" not found.\n", job_name);
251 Dmsg1(50, "Found Job %s\n", job_name);
253 if (jcr->authenticated) {
254 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
255 (uint32_t)jcr->JobId, jcr->Job);
256 Dmsg2(50, "Hey!!!! JobId %u Job %s already authenticated.\n",
257 (uint32_t)jcr->JobId, jcr->Job);
263 jcr->file_bsock = fd;
264 jcr->file_bsock->set_jcr(jcr);
267 * Authenticate the File daemon
269 if (jcr->authenticated || !authenticate_filed(jcr)) {
270 Dmsg1(50, "Authentication failed Job %s\n", jcr->Job);
271 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
273 jcr->authenticated = true;
274 Dmsg2(50, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
277 if (!jcr->authenticated) {
278 set_jcr_job_status(jcr, JS_ErrorTerminated);
280 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
288 * Query Device command from Director
289 * Sends Storage Daemon's information on the device to the
290 * caller (presumably the Director).
291 * This command always returns "true" so that the line is
292 * not closed on an error.
295 bool query_cmd(JCR *jcr)
297 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
298 BSOCK *dir = jcr->dir_bsock;
300 AUTOCHANGER *changer;
303 Dmsg1(100, "Query_cmd: %s", dir->msg);
304 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
305 Dmsg1(100, "<dird: %s\n", dir->msg);
307 unbash_spaces(dev_name);
308 foreach_res(device, R_DEVICE) {
309 /* Find resource, and make sure we were able to open it */
310 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
312 device->dev = init_dev(jcr, device);
317 ok = dir_update_device(jcr, device->dev);
319 ok = dir->fsend(OK_query);
321 dir->fsend(NO_query);
326 foreach_res(changer, R_AUTOCHANGER) {
327 /* Find resource, and make sure we were able to open it */
328 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
329 if (!changer->device || changer->device->size() == 0) {
330 continue; /* no devices */
332 ok = dir_update_changer(jcr, changer);
334 ok = dir->fsend(OK_query);
336 dir->fsend(NO_query);
341 /* If we get here, the device/autochanger was not found */
342 unbash_spaces(dir->msg);
343 pm_strcpy(jcr->errmsg, dir->msg);
344 dir->fsend(NO_device, dev_name.c_str());
345 Dmsg1(100, ">dird: %s\n", dir->msg);
347 unbash_spaces(dir->msg);
348 pm_strcpy(jcr->errmsg, dir->msg);
349 dir->fsend(BAD_query, jcr->errmsg);
350 Dmsg1(100, ">dird: %s\n", dir->msg);
360 * Destroy the Job Control Record and associated
361 * resources (sockets).
363 void stored_free_jcr(JCR *jcr)
365 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
366 if (jcr->file_bsock) {
367 jcr->file_bsock->close();
368 jcr->file_bsock = NULL;
371 free_pool_memory(jcr->job_name);
373 if (jcr->client_name) {
374 free_memory(jcr->client_name);
375 jcr->client_name = NULL;
377 if (jcr->fileset_name) {
378 free_memory(jcr->fileset_name);
380 if (jcr->fileset_md5) {
381 free_memory(jcr->fileset_md5);
387 /* Free any restore volume list created */
388 free_restore_volume_list(jcr);
389 if (jcr->RestoreBootstrap) {
390 unlink(jcr->RestoreBootstrap);
391 free_pool_memory(jcr->RestoreBootstrap);
392 jcr->RestoreBootstrap = NULL;
394 if (jcr->next_dev || jcr->prev_dev) {
395 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
397 pthread_cond_destroy(&jcr->job_start_wait);
403 /* Avoid a double free */
404 if (jcr->dcr == jcr->read_dcr) {
405 jcr->read_dcr = NULL;
412 free_dcr(jcr->read_dcr);
413 jcr->read_dcr = NULL;
416 if (jcr->read_store) {
418 foreach_alist(store, jcr->read_store) {
419 delete store->device;
422 delete jcr->read_store;
423 jcr->read_store = NULL;
425 if (jcr->write_store) {
427 foreach_alist(store, jcr->write_store) {
428 delete store->device;
431 delete jcr->write_store;
432 jcr->write_store = NULL;
437 write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));