2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
38 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
40 /* Imported variables */
41 extern uint32_t VolSessionTime;
43 /* Imported functions */
44 extern uint32_t newVolSessionId();
45 extern bool do_mac(JCR *jcr);
47 /* Requests from the Director daemon */
48 /* Added in 3.1.4 14Sep09 KES */
49 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
50 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
51 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s "
52 "rerunning=%d VolSessionId=%d VolSessionTime=%d\n";
54 /* Responses sent to Director daemon */
55 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
56 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
59 * Director requests us to start a job
60 * Basic tasks done here:
61 * - We pickup the JobId to be run from the Director.
62 * - We pickup the device, media, and pool from the Director
63 * - Wait for a connection from the File Daemon (FD)
64 * - Accept commands from the FD (i.e. run the job)
65 * - Return when the connection is terminated or
68 bool job_cmd(JCR *jcr)
74 BSOCK *dir = jcr->dir_bsock;
75 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
76 int32_t JobType, level, spool_attributes, no_attributes, spool_data;
77 int32_t write_part_after_job, PreferMountedVols;
82 * Get JobId and permissions from Director
84 Dmsg1(100, "<dird: %s", dir->msg);
85 bstrncpy(spool_size, "0", sizeof(spool_size));
86 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
88 &JobType, &level, fileset_name.c_str(), &no_attributes,
89 &spool_attributes, fileset_md5.c_str(), &spool_data,
90 &write_part_after_job, &PreferMountedVols, spool_size,
91 &jcr->rerunning, &jcr->VolSessionId, &jcr->VolSessionTime);
93 pm_strcpy(jcr->errmsg, dir->msg);
94 dir->fsend(BAD_job, stat, jcr->errmsg);
95 Dmsg1(100, ">dird: %s", dir->msg);
96 jcr->setJobStatus(JS_ErrorTerminated);
99 Dmsg3(100, "==== rerunning=%d VolSesId=%d VolSesTime=%d\n", jcr->rerunning,
100 jcr->VolSessionId, jcr->VolSessionTime);
102 * Since this job could be rescheduled, we
103 * check to see if we have it already. If so
104 * free the old jcr and use the new one.
106 ojcr = get_jcr_by_full_name(job.c_str());
107 if (ojcr && !ojcr->authenticated) {
108 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(intptr_t)ojcr, job.c_str());
112 Dmsg2(800, "Start JobId=%d %p\n", JobId, jcr);
114 * If job rescheduled because previous was incomplete,
115 * the Resched flag is set and VolSessionId and VolSessionTime
116 * are given to us (same as restarted job).
118 if (!jcr->rerunning) {
119 jcr->VolSessionId = newVolSessionId();
120 jcr->VolSessionTime = VolSessionTime;
122 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
123 unbash_spaces(job_name);
124 jcr->job_name = get_pool_memory(PM_NAME);
125 pm_strcpy(jcr->job_name, job_name);
126 unbash_spaces(client_name);
127 jcr->client_name = get_pool_memory(PM_NAME);
128 pm_strcpy(jcr->client_name, client_name);
129 unbash_spaces(fileset_name);
130 jcr->fileset_name = get_pool_memory(PM_NAME);
131 pm_strcpy(jcr->fileset_name, fileset_name);
132 jcr->setJobType(JobType);
133 jcr->setJobLevel(level);
134 jcr->no_attributes = no_attributes;
135 jcr->spool_attributes = spool_attributes;
136 jcr->spool_data = spool_data;
137 jcr->spool_size = str_to_int64(spool_size);
138 jcr->write_part_after_job = write_part_after_job;
139 jcr->fileset_md5 = get_pool_memory(PM_NAME);
140 pm_strcpy(jcr->fileset_md5, fileset_md5);
141 jcr->PreferMountedVols = PreferMountedVols;
144 jcr->authenticated = false;
147 * Pass back an authorization key for the File daemon
149 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
150 make_session_key(auth_key, seed, 1);
151 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
152 Dmsg2(50, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
153 jcr->sd_auth_key = bstrdup(auth_key);
154 memset(auth_key, 0, sizeof(auth_key));
155 new_plugins(jcr); /* instantiate the plugins */
156 generate_daemon_event(jcr, "JobStart");
157 generate_plugin_event(jcr, bsdEventJobStart, (void *)"JobStart");
161 bool run_cmd(JCR *jcr)
165 struct timespec timeout;
169 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
171 /* If we do not need the FD, we are doing a migrate, copy, or virtual
174 if (jcr->no_client_used()) {
179 jcr->sendJobStatus(JS_WaitFD); /* wait for FD to connect */
181 gettimeofday(&tv, &tz);
182 timeout.tv_nsec = tv.tv_usec * 1000;
183 timeout.tv_sec = tv.tv_sec + me->client_wait;
185 Dmsg3(50, "%s waiting %d sec for FD to contact SD key=%s\n",
186 jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
187 Dmsg2(800, "Wait FD for jid=%d %p\n", jcr->JobId, jcr);
190 * Wait for the File daemon to contact us to start the Job,
191 * when he does, we will be released, unless the 30 minutes
195 while ( !jcr->authenticated && !job_canceled(jcr) ) {
196 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
197 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
200 Dmsg1(800, "=== Auth cond errstat=%d\n", errstat);
202 Dmsg3(50, "Auth=%d canceled=%d errstat=%d\n", jcr->authenticated,
203 job_canceled(jcr), errstat);
205 Dmsg2(800, "Auth fail or cancel for jid=%d %p\n", jcr->JobId, jcr);
207 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
209 if (jcr->authenticated && !job_canceled(jcr)) {
210 Dmsg2(800, "Running jid=%d %p\n", jcr->JobId, jcr);
211 run_job(jcr); /* Run the job */
213 Dmsg2(800, "Done jid=%d %p\n", jcr->JobId, jcr);
218 * After receiving a connection (in dircmd.c) if it is
219 * from the File daemon, this routine is called.
221 void handle_filed_connection(BSOCK *fd, char *job_name)
226 * With the following bmicrosleep on, running the
227 * SD under the debugger fails.
229 // bmicrosleep(0, 50000); /* wait 50 millisecs */
230 if (!(jcr=get_jcr_by_full_name(job_name))) {
231 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
232 Dmsg1(3, "**** Job \"%s\" not found.\n", job_name);
238 Dmsg1(50, "Found Job %s\n", job_name);
240 if (jcr->authenticated) {
241 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
242 (uint32_t)jcr->JobId, jcr->Job);
243 Dmsg2(50, "Hey!!!! JobId %u Job %s already authenticated.\n",
244 (uint32_t)jcr->JobId, jcr->Job);
250 jcr->file_bsock = fd;
251 jcr->file_bsock->set_jcr(jcr);
254 * Authenticate the File daemon
256 if (jcr->authenticated || !authenticate_filed(jcr)) {
257 Dmsg1(50, "Authentication failed Job %s\n", jcr->Job);
258 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
260 jcr->authenticated = true;
261 Dmsg2(50, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
264 if (!jcr->authenticated) {
265 jcr->setJobStatus(JS_ErrorTerminated);
267 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
275 * Query Device command from Director
276 * Sends Storage Daemon's information on the device to the
277 * caller (presumably the Director).
278 * This command always returns "true" so that the line is
279 * not closed on an error.
282 bool query_cmd(JCR *jcr)
284 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
285 BSOCK *dir = jcr->dir_bsock;
287 AUTOCHANGER *changer;
290 Dmsg1(100, "Query_cmd: %s", dir->msg);
291 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
292 Dmsg1(100, "<dird: %s\n", dir->msg);
294 unbash_spaces(dev_name);
295 foreach_res(device, R_DEVICE) {
296 /* Find resource, and make sure we were able to open it */
297 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
299 device->dev = init_dev(jcr, device);
304 ok = dir_update_device(jcr, device->dev);
306 ok = dir->fsend(OK_query);
308 dir->fsend(NO_query);
313 foreach_res(changer, R_AUTOCHANGER) {
314 /* Find resource, and make sure we were able to open it */
315 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
316 if (!changer->device || changer->device->size() == 0) {
317 continue; /* no devices */
319 ok = dir_update_changer(jcr, changer);
321 ok = dir->fsend(OK_query);
323 dir->fsend(NO_query);
328 /* If we get here, the device/autochanger was not found */
329 unbash_spaces(dir->msg);
330 pm_strcpy(jcr->errmsg, dir->msg);
331 dir->fsend(NO_device, dev_name.c_str());
332 Dmsg1(100, ">dird: %s\n", dir->msg);
334 unbash_spaces(dir->msg);
335 pm_strcpy(jcr->errmsg, dir->msg);
336 dir->fsend(BAD_query, jcr->errmsg);
337 Dmsg1(100, ">dird: %s\n", dir->msg);
347 * Destroy the Job Control Record and associated
348 * resources (sockets).
350 void stored_free_jcr(JCR *jcr)
352 Dmsg2(800, "End Job JobId=%u %p\n", jcr->JobId, jcr);
353 if (jcr->dir_bsock) {
354 Dmsg2(800, "Send terminate jid=%d %p\n", jcr->JobId, jcr);
355 jcr->dir_bsock->signal(BNET_EOD);
356 jcr->dir_bsock->signal(BNET_TERMINATE);
358 if (jcr->file_bsock) {
359 jcr->file_bsock->close();
360 jcr->file_bsock = NULL;
363 free_pool_memory(jcr->job_name);
365 if (jcr->client_name) {
366 free_memory(jcr->client_name);
367 jcr->client_name = NULL;
369 if (jcr->fileset_name) {
370 free_memory(jcr->fileset_name);
372 if (jcr->fileset_md5) {
373 free_memory(jcr->fileset_md5);
379 /* Free any restore volume list created */
380 free_restore_volume_list(jcr);
381 if (jcr->RestoreBootstrap) {
382 unlink(jcr->RestoreBootstrap);
383 free_pool_memory(jcr->RestoreBootstrap);
384 jcr->RestoreBootstrap = NULL;
386 if (jcr->next_dev || jcr->prev_dev) {
387 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
389 pthread_cond_destroy(&jcr->job_start_wait);
395 /* Avoid a double free */
396 if (jcr->dcr == jcr->read_dcr) {
397 jcr->read_dcr = NULL;
404 free_dcr(jcr->read_dcr);
405 jcr->read_dcr = NULL;
408 if (jcr->read_store) {
410 foreach_alist(store, jcr->read_store) {
411 delete store->device;
414 delete jcr->read_store;
415 jcr->read_store = NULL;
417 if (jcr->write_store) {
419 foreach_alist(store, jcr->write_store) {
420 delete store->device;
423 delete jcr->write_store;
424 jcr->write_store = NULL;
429 write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));