2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
40 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
42 /* Imported variables */
43 extern uint32_t VolSessionTime;
45 /* Imported functions */
46 extern uint32_t newVolSessionId();
47 extern bool do_mac(JCR *jcr);
49 /* Requests from the Director daemon */
50 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
51 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
52 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
55 /* Responses sent to Director daemon */
56 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
57 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
58 //static char OK_query[] = "3001 OK query\n";
59 //static char NO_query[] = "3918 Query failed\n";
60 //static char BAD_query[] = "3917 Bad query command: %s\n";
63 * Director requests us to start a job
64 * Basic tasks done here:
65 * - We pickup the JobId to be run from the Director.
66 * - We pickup the device, media, and pool from the Director
67 * - Wait for a connection from the File Daemon (FD)
68 * - Accept commands from the FD (i.e. run the job)
69 * - Return when the connection is terminated or
72 bool job_cmd(JCR *jcr)
76 BSOCK *dir = jcr->dir_bsock;
77 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
78 int JobType, level, spool_attributes, no_attributes, spool_data;
79 int write_part_after_job, PreferMountedVols;
84 * Get JobId and permissions from Director
86 Dmsg1(100, "<dird: %s", dir->msg);
87 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
89 &JobType, &level, fileset_name.c_str(), &no_attributes,
90 &spool_attributes, fileset_md5.c_str(), &spool_data,
91 &write_part_after_job, &PreferMountedVols);
93 pm_strcpy(jcr->errmsg, dir->msg);
94 dir->fsend(BAD_job, stat, jcr->errmsg);
95 Dmsg1(100, ">dird: %s", dir->msg);
96 set_jcr_job_status(jcr, JS_ErrorTerminated);
100 * Since this job could be rescheduled, we
101 * check to see if we have it already. If so
102 * free the old jcr and use the new one.
104 ojcr = get_jcr_by_full_name(job.c_str());
105 if (ojcr && !ojcr->authenticated) {
106 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
110 jcr->VolSessionId = newVolSessionId();
111 jcr->VolSessionTime = VolSessionTime;
112 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
113 unbash_spaces(job_name);
114 jcr->job_name = get_pool_memory(PM_NAME);
115 pm_strcpy(jcr->job_name, job_name);
116 unbash_spaces(client_name);
117 jcr->client_name = get_pool_memory(PM_NAME);
118 pm_strcpy(jcr->client_name, client_name);
119 unbash_spaces(fileset_name);
120 jcr->fileset_name = get_pool_memory(PM_NAME);
121 pm_strcpy(jcr->fileset_name, fileset_name);
122 jcr->JobType = JobType;
123 jcr->JobLevel = level;
124 jcr->no_attributes = no_attributes;
125 jcr->spool_attributes = spool_attributes;
126 jcr->spool_data = spool_data;
127 jcr->write_part_after_job = write_part_after_job;
128 jcr->fileset_md5 = get_pool_memory(PM_NAME);
129 pm_strcpy(jcr->fileset_md5, fileset_md5);
130 jcr->PreferMountedVols = PreferMountedVols;
132 jcr->authenticated = false;
135 * Pass back an authorization key for the File daemon
137 make_session_key(auth_key, NULL, 1);
138 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
139 if (debug_level == 3) {
140 Dmsg2(000, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
142 Dmsg2(100, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
143 jcr->sd_auth_key = bstrdup(auth_key);
144 memset(auth_key, 0, sizeof(auth_key));
145 generate_daemon_event(jcr, "JobStart");
149 bool run_cmd(JCR *jcr)
153 struct timespec timeout;
157 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
158 /* The following jobs don't need the FD */
159 switch (jcr->JobType) {
163 jcr->authenticated = true;
168 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
169 dir_send_job_status(jcr);
171 gettimeofday(&tv, &tz);
172 timeout.tv_nsec = tv.tv_usec * 1000;
173 timeout.tv_sec = tv.tv_sec + me->client_wait;
175 Dmsg2(100, "%s waiting %d sec for FD to contact SD\n",
176 jcr->Job, (int)me->client_wait);
178 * Wait for the File daemon to contact us to start the Job,
179 * when he does, we will be released, unless the 30 minutes
183 for ( ; !job_canceled(jcr); ) {
184 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
185 if (errstat == 0 || errstat == ETIMEDOUT) {
191 if (debug_level == 3) {
192 Dmsg1(000, "jid=%u Zap sd_auth_key\n", (uint32_t)jcr->JobId);
194 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
196 if (jcr->authenticated && !job_canceled(jcr)) {
197 Dmsg1(100, "Running job %s\n", jcr->Job);
198 run_job(jcr); /* Run the job */
204 * After receiving a connection (in dircmd.c) if it is
205 * from the File daemon, this routine is called.
207 void handle_filed_connection(BSOCK *fd, char *job_name)
211 bmicrosleep(0, 50000); /* wait 50 millisecs */
212 if (!(jcr=get_jcr_by_full_name(job_name))) {
213 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
214 Dmsg1(100, "Job name not found: %s\n", job_name);
218 jcr->file_bsock = fd;
219 jcr->file_bsock->set_jcr(jcr);
221 Dmsg1(110, "Found Job %s\n", job_name);
223 if (jcr->authenticated) {
224 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
225 (uint32_t)jcr->JobId, jcr->Job);
231 * Authenticate the File daemon
233 if (debug_level == 3) {
234 Dmsg2(000, "jid=%u sd_auth_key=%s\n", (uint32_t)jcr->JobId, jcr->sd_auth_key);
236 if (jcr->authenticated || !authenticate_filed(jcr)) {
237 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
238 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
239 if (debug_level == 3) {
240 Dmsg1(000, "Authentication failed Job %s\n", jcr->Job);
243 jcr->authenticated = true;
244 Dmsg2(110, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
247 if (!jcr->authenticated) {
248 set_jcr_job_status(jcr, JS_ErrorTerminated);
250 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
258 * Query Device command from Director
259 * Sends Storage Daemon's information on the device to the
260 * caller (presumably the Director).
261 * This command always returns "true" so that the line is
262 * not closed on an error.
265 bool query_cmd(JCR *jcr)
267 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
268 BSOCK *dir = jcr->dir_bsock;
270 AUTOCHANGER *changer;
273 Dmsg1(100, "Query_cmd: %s", dir->msg);
274 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
275 Dmsg1(100, "<dird: %s\n", dir->msg);
277 unbash_spaces(dev_name);
278 foreach_res(device, R_DEVICE) {
279 /* Find resource, and make sure we were able to open it */
280 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
282 device->dev = init_dev(jcr, device);
287 ok = dir_update_device(jcr, device->dev);
289 ok = dir->fsend(OK_query);
291 dir->fsend(NO_query);
296 foreach_res(changer, R_AUTOCHANGER) {
297 /* Find resource, and make sure we were able to open it */
298 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
299 if (!changer->device || changer->device->size() == 0) {
300 continue; /* no devices */
302 ok = dir_update_changer(jcr, changer);
304 ok = dir->fsend(OK_query);
306 dir->fsend(NO_query);
311 /* If we get here, the device/autochanger was not found */
312 unbash_spaces(dir->msg);
313 pm_strcpy(jcr->errmsg, dir->msg);
314 dir->fsend(NO_device, dev_name.c_str());
315 Dmsg1(100, ">dird: %s\n", dir->msg);
317 unbash_spaces(dir->msg);
318 pm_strcpy(jcr->errmsg, dir->msg);
319 dir->fsend(BAD_query, jcr->errmsg);
320 Dmsg1(100, ">dird: %s\n", dir->msg);
330 * Destroy the Job Control Record and associated
331 * resources (sockets).
333 void stored_free_jcr(JCR *jcr)
335 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
336 if (jcr->file_bsock) {
337 jcr->file_bsock->close();
338 jcr->file_bsock = NULL;
341 free_pool_memory(jcr->job_name);
343 if (jcr->client_name) {
344 free_memory(jcr->client_name);
345 jcr->client_name = NULL;
347 if (jcr->fileset_name) {
348 free_memory(jcr->fileset_name);
350 if (jcr->fileset_md5) {
351 free_memory(jcr->fileset_md5);
357 if (jcr->RestoreBootstrap) {
358 unlink(jcr->RestoreBootstrap);
359 free_pool_memory(jcr->RestoreBootstrap);
360 jcr->RestoreBootstrap = NULL;
362 if (jcr->next_dev || jcr->prev_dev) {
363 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
365 pthread_cond_destroy(&jcr->job_start_wait);
371 /* Avoid a double free */
372 if (jcr->dcr == jcr->read_dcr) {
373 jcr->read_dcr = NULL;
380 free_dcr(jcr->read_dcr);
381 jcr->read_dcr = NULL;
384 if (jcr->read_store) {
386 foreach_alist(store, jcr->read_store) {
387 delete store->device;
390 delete jcr->read_store;
391 jcr->read_store = NULL;
393 if (jcr->write_store) {
395 foreach_alist(store, jcr->write_store) {
396 delete store->device;
399 delete jcr->write_store;
400 jcr->write_store = NULL;