2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
40 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
42 /* Imported variables */
43 extern uint32_t VolSessionTime;
45 /* Imported functions */
46 extern uint32_t newVolSessionId();
47 extern bool do_mac(JCR *jcr);
49 /* Requests from the Director daemon */
50 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
51 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
52 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s\n";
53 static char oldjobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
54 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
55 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
59 /* Responses sent to Director daemon */
60 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
61 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
62 //static char OK_query[] = "3001 OK query\n";
63 //static char NO_query[] = "3918 Query failed\n";
64 //static char BAD_query[] = "3917 Bad query command: %s\n";
67 * Director requests us to start a job
68 * Basic tasks done here:
69 * - We pickup the JobId to be run from the Director.
70 * - We pickup the device, media, and pool from the Director
71 * - Wait for a connection from the File Daemon (FD)
72 * - Accept commands from the FD (i.e. run the job)
73 * - Return when the connection is terminated or
76 bool job_cmd(JCR *jcr)
82 BSOCK *dir = jcr->dir_bsock;
83 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
84 int JobType, level, spool_attributes, no_attributes, spool_data;
85 int write_part_after_job, PreferMountedVols;
90 * Get JobId and permissions from Director
92 Dmsg1(100, "<dird: %s", dir->msg);
93 bstrncpy(spool_size, "0", sizeof(spool_size));
94 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
96 &JobType, &level, fileset_name.c_str(), &no_attributes,
97 &spool_attributes, fileset_md5.c_str(), &spool_data,
98 &write_part_after_job, &PreferMountedVols, spool_size);
100 /* Try old version */
101 stat = sscanf(dir->msg, oldjobcmd, &JobId, job.c_str(), job_name.c_str(),
103 &JobType, &level, fileset_name.c_str(), &no_attributes,
104 &spool_attributes, fileset_md5.c_str(), &spool_data,
105 &write_part_after_job, &PreferMountedVols);
107 pm_strcpy(jcr->errmsg, dir->msg);
108 dir->fsend(BAD_job, stat, jcr->errmsg);
109 Dmsg1(100, ">dird: %s", dir->msg);
110 set_jcr_job_status(jcr, JS_ErrorTerminated);
115 * Since this job could be rescheduled, we
116 * check to see if we have it already. If so
117 * free the old jcr and use the new one.
119 ojcr = get_jcr_by_full_name(job.c_str());
120 if (ojcr && !ojcr->authenticated) {
121 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
125 jcr->VolSessionId = newVolSessionId();
126 jcr->VolSessionTime = VolSessionTime;
127 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
128 unbash_spaces(job_name);
129 jcr->job_name = get_pool_memory(PM_NAME);
130 pm_strcpy(jcr->job_name, job_name);
131 unbash_spaces(client_name);
132 jcr->client_name = get_pool_memory(PM_NAME);
133 pm_strcpy(jcr->client_name, client_name);
134 unbash_spaces(fileset_name);
135 jcr->fileset_name = get_pool_memory(PM_NAME);
136 pm_strcpy(jcr->fileset_name, fileset_name);
137 jcr->set_JobType(JobType);
138 jcr->set_JobLevel(level);
139 jcr->no_attributes = no_attributes;
140 jcr->spool_attributes = spool_attributes;
141 jcr->spool_data = spool_data;
142 jcr->spool_size = str_to_int64(spool_size);
143 jcr->write_part_after_job = write_part_after_job;
144 jcr->fileset_md5 = get_pool_memory(PM_NAME);
145 pm_strcpy(jcr->fileset_md5, fileset_md5);
146 jcr->PreferMountedVols = PreferMountedVols;
148 jcr->authenticated = false;
151 * Pass back an authorization key for the File daemon
153 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
154 make_session_key(auth_key, seed, 1);
155 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
156 Dmsg2(50, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
157 jcr->sd_auth_key = bstrdup(auth_key);
158 memset(auth_key, 0, sizeof(auth_key));
159 generate_daemon_event(jcr, "JobStart");
163 bool run_cmd(JCR *jcr)
167 struct timespec timeout;
171 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
173 /* If we do not need the FD, we are doing a migrate, copy, or virtual
176 if (jcr->no_client_used()) {
181 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
182 dir_send_job_status(jcr);
184 gettimeofday(&tv, &tz);
185 timeout.tv_nsec = tv.tv_usec * 1000;
186 timeout.tv_sec = tv.tv_sec + me->client_wait;
188 Dmsg3(50, "%s waiting %d sec for FD to contact SD key=%s\n",
189 jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
192 * Wait for the File daemon to contact us to start the Job,
193 * when he does, we will be released, unless the 30 minutes
197 while ( !jcr->authenticated && !job_canceled(jcr) ) {
198 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
199 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
203 Dmsg3(50, "Auth=%d canceled=%d errstat=%d\n", jcr->authenticated,
204 job_canceled(jcr), errstat);
207 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
209 if (jcr->authenticated && !job_canceled(jcr)) {
210 Dmsg1(50, "Running job %s\n", jcr->Job);
211 run_job(jcr); /* Run the job */
217 * After receiving a connection (in dircmd.c) if it is
218 * from the File daemon, this routine is called.
220 void handle_filed_connection(BSOCK *fd, char *job_name)
225 * With the following bmicrosleep on, running the
226 * SD under the debugger fails.
228 // bmicrosleep(0, 50000); /* wait 50 millisecs */
229 if (!(jcr=get_jcr_by_full_name(job_name))) {
230 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
231 Dmsg1(3, "**** Job \"%s\" not found.\n", job_name);
237 Dmsg1(50, "Found Job %s\n", job_name);
239 if (jcr->authenticated) {
240 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
241 (uint32_t)jcr->JobId, jcr->Job);
242 Dmsg2(50, "Hey!!!! JobId %u Job %s already authenticated.\n",
243 (uint32_t)jcr->JobId, jcr->Job);
249 jcr->file_bsock = fd;
250 jcr->file_bsock->set_jcr(jcr);
253 * Authenticate the File daemon
255 if (jcr->authenticated || !authenticate_filed(jcr)) {
256 Dmsg1(50, "Authentication failed Job %s\n", jcr->Job);
257 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
259 jcr->authenticated = true;
260 Dmsg2(50, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
263 if (!jcr->authenticated) {
264 set_jcr_job_status(jcr, JS_ErrorTerminated);
266 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
274 * Query Device command from Director
275 * Sends Storage Daemon's information on the device to the
276 * caller (presumably the Director).
277 * This command always returns "true" so that the line is
278 * not closed on an error.
281 bool query_cmd(JCR *jcr)
283 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
284 BSOCK *dir = jcr->dir_bsock;
286 AUTOCHANGER *changer;
289 Dmsg1(100, "Query_cmd: %s", dir->msg);
290 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
291 Dmsg1(100, "<dird: %s\n", dir->msg);
293 unbash_spaces(dev_name);
294 foreach_res(device, R_DEVICE) {
295 /* Find resource, and make sure we were able to open it */
296 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
298 device->dev = init_dev(jcr, device);
303 ok = dir_update_device(jcr, device->dev);
305 ok = dir->fsend(OK_query);
307 dir->fsend(NO_query);
312 foreach_res(changer, R_AUTOCHANGER) {
313 /* Find resource, and make sure we were able to open it */
314 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
315 if (!changer->device || changer->device->size() == 0) {
316 continue; /* no devices */
318 ok = dir_update_changer(jcr, changer);
320 ok = dir->fsend(OK_query);
322 dir->fsend(NO_query);
327 /* If we get here, the device/autochanger was not found */
328 unbash_spaces(dir->msg);
329 pm_strcpy(jcr->errmsg, dir->msg);
330 dir->fsend(NO_device, dev_name.c_str());
331 Dmsg1(100, ">dird: %s\n", dir->msg);
333 unbash_spaces(dir->msg);
334 pm_strcpy(jcr->errmsg, dir->msg);
335 dir->fsend(BAD_query, jcr->errmsg);
336 Dmsg1(100, ">dird: %s\n", dir->msg);
346 * Destroy the Job Control Record and associated
347 * resources (sockets).
349 void stored_free_jcr(JCR *jcr)
351 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
352 if (jcr->file_bsock) {
353 jcr->file_bsock->close();
354 jcr->file_bsock = NULL;
357 free_pool_memory(jcr->job_name);
359 if (jcr->client_name) {
360 free_memory(jcr->client_name);
361 jcr->client_name = NULL;
363 if (jcr->fileset_name) {
364 free_memory(jcr->fileset_name);
366 if (jcr->fileset_md5) {
367 free_memory(jcr->fileset_md5);
373 /* Free any restore volume list created */
374 free_restore_volume_list(jcr);
375 if (jcr->RestoreBootstrap) {
376 unlink(jcr->RestoreBootstrap);
377 free_pool_memory(jcr->RestoreBootstrap);
378 jcr->RestoreBootstrap = NULL;
380 if (jcr->next_dev || jcr->prev_dev) {
381 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
383 pthread_cond_destroy(&jcr->job_start_wait);
389 /* Avoid a double free */
390 if (jcr->dcr == jcr->read_dcr) {
391 jcr->read_dcr = NULL;
398 free_dcr(jcr->read_dcr);
399 jcr->read_dcr = NULL;
402 if (jcr->read_store) {
404 foreach_alist(store, jcr->read_store) {
405 delete store->device;
408 delete jcr->read_store;
409 jcr->read_store = NULL;
411 if (jcr->write_store) {
413 foreach_alist(store, jcr->write_store) {
414 delete store->device;
417 delete jcr->write_store;
418 jcr->write_store = NULL;
423 write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));