2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
38 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
40 /* Imported variables */
41 extern uint32_t VolSessionTime;
43 /* Imported functions */
44 extern uint32_t newVolSessionId();
45 extern bool do_mac(JCR *jcr);
47 /* Requests from the Director daemon */
48 /* Added in 3.1.4 14Sep09 KES */
49 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
50 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
51 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s "
52 "rerunning=%d VolSessionId=%d VolSessionTime=%d\n";
54 /* Responses sent to Director daemon */
55 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
56 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
59 * Director requests us to start a job
60 * Basic tasks done here:
61 * - We pickup the JobId to be run from the Director.
62 * - We pickup the device, media, and pool from the Director
63 * - Wait for a connection from the File Daemon (FD)
64 * - Accept commands from the FD (i.e. run the job)
65 * - Return when the connection is terminated or
68 bool job_cmd(JCR *jcr)
74 BSOCK *dir = jcr->dir_bsock;
75 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
76 int JobType, level, spool_attributes, no_attributes, spool_data;
77 int write_part_after_job, PreferMountedVols;
82 * Get JobId and permissions from Director
84 Dmsg1(100, "<dird: %s", dir->msg);
85 bstrncpy(spool_size, "0", sizeof(spool_size));
86 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
88 &JobType, &level, fileset_name.c_str(), &no_attributes,
89 &spool_attributes, fileset_md5.c_str(), &spool_data,
90 &write_part_after_job, &PreferMountedVols, spool_size,
91 &jcr->rerunning, &jcr->VolSessionId, &jcr->VolSessionTime);
93 pm_strcpy(jcr->errmsg, dir->msg);
94 dir->fsend(BAD_job, stat, jcr->errmsg);
95 Dmsg1(100, ">dird: %s", dir->msg);
96 jcr->setJobStatus(JS_ErrorTerminated);
99 Dmsg3(100, "==== rerunning=%d VolSesId=%d VolSesTime=%d\n", jcr->rerunning,
100 jcr->VolSessionId, jcr->VolSessionTime);
102 * Since this job could be rescheduled, we
103 * check to see if we have it already. If so
104 * free the old jcr and use the new one.
106 ojcr = get_jcr_by_full_name(job.c_str());
107 if (ojcr && !ojcr->authenticated) {
108 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(intptr_t)ojcr, job.c_str());
112 Dmsg2(800, "Start JobId=%d %p\n", JobId, jcr);
114 * If job rescheduled because previous was incomplete,
115 * the Resched flag is set and VolSessionId and VolSessionTime
116 * are given to us (same as restarted job).
118 if (!jcr->rerunning) {
119 jcr->VolSessionId = newVolSessionId();
120 jcr->VolSessionTime = VolSessionTime;
122 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
123 unbash_spaces(job_name);
124 jcr->job_name = get_pool_memory(PM_NAME);
125 pm_strcpy(jcr->job_name, job_name);
126 unbash_spaces(client_name);
127 jcr->client_name = get_pool_memory(PM_NAME);
128 pm_strcpy(jcr->client_name, client_name);
129 unbash_spaces(fileset_name);
130 jcr->fileset_name = get_pool_memory(PM_NAME);
131 pm_strcpy(jcr->fileset_name, fileset_name);
132 jcr->setJobType(JobType);
133 jcr->setJobLevel(level);
134 jcr->no_attributes = no_attributes;
135 jcr->spool_attributes = spool_attributes;
136 jcr->spool_data = spool_data;
137 jcr->spool_size = str_to_int64(spool_size);
138 jcr->write_part_after_job = write_part_after_job;
139 jcr->fileset_md5 = get_pool_memory(PM_NAME);
140 pm_strcpy(jcr->fileset_md5, fileset_md5);
141 jcr->PreferMountedVols = PreferMountedVols;
144 jcr->authenticated = false;
147 * Pass back an authorization key for the File daemon
149 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
150 make_session_key(auth_key, seed, 1);
151 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
152 Dmsg2(50, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
153 jcr->sd_auth_key = bstrdup(auth_key);
154 memset(auth_key, 0, sizeof(auth_key));
155 generate_daemon_event(jcr, "JobStart");
159 bool run_cmd(JCR *jcr)
163 struct timespec timeout;
167 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
169 /* If we do not need the FD, we are doing a migrate, copy, or virtual
172 if (jcr->no_client_used()) {
177 jcr->setJobStatus(JS_WaitFD); /* wait for FD to connect */
178 dir_send_job_status(jcr);
180 gettimeofday(&tv, &tz);
181 timeout.tv_nsec = tv.tv_usec * 1000;
182 timeout.tv_sec = tv.tv_sec + me->client_wait;
184 Dmsg3(50, "%s waiting %d sec for FD to contact SD key=%s\n",
185 jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
186 Dmsg2(800, "Wait FD for jid=%d %p\n", jcr->JobId, jcr);
189 * Wait for the File daemon to contact us to start the Job,
190 * when he does, we will be released, unless the 30 minutes
194 while ( !jcr->authenticated && !job_canceled(jcr) ) {
195 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
196 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
199 Dmsg1(800, "=== Auth cond errstat=%d\n", errstat);
201 Dmsg3(50, "Auth=%d canceled=%d errstat=%d\n", jcr->authenticated,
202 job_canceled(jcr), errstat);
204 Dmsg2(800, "Auth fail or cancel for jid=%d %p\n", jcr->JobId, jcr);
206 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
208 if (jcr->authenticated && !job_canceled(jcr)) {
209 Dmsg2(800, "Running jid=%d %p\n", jcr->JobId, jcr);
210 run_job(jcr); /* Run the job */
212 Dmsg2(800, "Done jid=%d %p\n", jcr->JobId, jcr);
217 * After receiving a connection (in dircmd.c) if it is
218 * from the File daemon, this routine is called.
220 void handle_filed_connection(BSOCK *fd, char *job_name)
225 * With the following bmicrosleep on, running the
226 * SD under the debugger fails.
228 // bmicrosleep(0, 50000); /* wait 50 millisecs */
229 if (!(jcr=get_jcr_by_full_name(job_name))) {
230 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
231 Dmsg1(3, "**** Job \"%s\" not found.\n", job_name);
237 Dmsg1(50, "Found Job %s\n", job_name);
239 if (jcr->authenticated) {
240 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
241 (uint32_t)jcr->JobId, jcr->Job);
242 Dmsg2(50, "Hey!!!! JobId %u Job %s already authenticated.\n",
243 (uint32_t)jcr->JobId, jcr->Job);
249 jcr->file_bsock = fd;
250 jcr->file_bsock->set_jcr(jcr);
253 * Authenticate the File daemon
255 if (jcr->authenticated || !authenticate_filed(jcr)) {
256 Dmsg1(50, "Authentication failed Job %s\n", jcr->Job);
257 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
259 jcr->authenticated = true;
260 Dmsg2(50, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
263 if (!jcr->authenticated) {
264 jcr->setJobStatus(JS_ErrorTerminated);
266 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
274 * Query Device command from Director
275 * Sends Storage Daemon's information on the device to the
276 * caller (presumably the Director).
277 * This command always returns "true" so that the line is
278 * not closed on an error.
281 bool query_cmd(JCR *jcr)
283 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
284 BSOCK *dir = jcr->dir_bsock;
286 AUTOCHANGER *changer;
289 Dmsg1(100, "Query_cmd: %s", dir->msg);
290 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
291 Dmsg1(100, "<dird: %s\n", dir->msg);
293 unbash_spaces(dev_name);
294 foreach_res(device, R_DEVICE) {
295 /* Find resource, and make sure we were able to open it */
296 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
298 device->dev = init_dev(jcr, device);
303 ok = dir_update_device(jcr, device->dev);
305 ok = dir->fsend(OK_query);
307 dir->fsend(NO_query);
312 foreach_res(changer, R_AUTOCHANGER) {
313 /* Find resource, and make sure we were able to open it */
314 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
315 if (!changer->device || changer->device->size() == 0) {
316 continue; /* no devices */
318 ok = dir_update_changer(jcr, changer);
320 ok = dir->fsend(OK_query);
322 dir->fsend(NO_query);
327 /* If we get here, the device/autochanger was not found */
328 unbash_spaces(dir->msg);
329 pm_strcpy(jcr->errmsg, dir->msg);
330 dir->fsend(NO_device, dev_name.c_str());
331 Dmsg1(100, ">dird: %s\n", dir->msg);
333 unbash_spaces(dir->msg);
334 pm_strcpy(jcr->errmsg, dir->msg);
335 dir->fsend(BAD_query, jcr->errmsg);
336 Dmsg1(100, ">dird: %s\n", dir->msg);
346 * Destroy the Job Control Record and associated
347 * resources (sockets).
349 void stored_free_jcr(JCR *jcr)
351 Dmsg2(800, "End Job JobId=%u %p\n", jcr->JobId, jcr);
352 if (jcr->dir_bsock) {
353 Dmsg2(800, "Send terminate jid=%d %p\n", jcr->JobId, jcr);
354 jcr->dir_bsock->signal(BNET_EOD);
355 jcr->dir_bsock->signal(BNET_TERMINATE);
357 if (jcr->file_bsock) {
358 jcr->file_bsock->close();
359 jcr->file_bsock = NULL;
362 free_pool_memory(jcr->job_name);
364 if (jcr->client_name) {
365 free_memory(jcr->client_name);
366 jcr->client_name = NULL;
368 if (jcr->fileset_name) {
369 free_memory(jcr->fileset_name);
371 if (jcr->fileset_md5) {
372 free_memory(jcr->fileset_md5);
378 /* Free any restore volume list created */
379 free_restore_volume_list(jcr);
380 if (jcr->RestoreBootstrap) {
381 unlink(jcr->RestoreBootstrap);
382 free_pool_memory(jcr->RestoreBootstrap);
383 jcr->RestoreBootstrap = NULL;
385 if (jcr->next_dev || jcr->prev_dev) {
386 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
388 pthread_cond_destroy(&jcr->job_start_wait);
394 /* Avoid a double free */
395 if (jcr->dcr == jcr->read_dcr) {
396 jcr->read_dcr = NULL;
403 free_dcr(jcr->read_dcr);
404 jcr->read_dcr = NULL;
407 if (jcr->read_store) {
409 foreach_alist(store, jcr->read_store) {
410 delete store->device;
413 delete jcr->read_store;
414 jcr->read_store = NULL;
416 if (jcr->write_store) {
418 foreach_alist(store, jcr->write_store) {
419 delete store->device;
422 delete jcr->write_store;
423 jcr->write_store = NULL;
428 write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));