2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
40 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
42 /* Imported variables */
43 extern uint32_t VolSessionTime;
45 /* Imported functions */
46 extern uint32_t newVolSessionId();
47 extern bool do_mac(JCR *jcr);
49 /* Requests from the Director daemon */
50 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
51 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
52 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
55 /* Responses sent to Director daemon */
56 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
57 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
58 //static char OK_query[] = "3001 OK query\n";
59 //static char NO_query[] = "3918 Query failed\n";
60 //static char BAD_query[] = "3917 Bad query command: %s\n";
63 * Director requests us to start a job
64 * Basic tasks done here:
65 * - We pickup the JobId to be run from the Director.
66 * - We pickup the device, media, and pool from the Director
67 * - Wait for a connection from the File Daemon (FD)
68 * - Accept commands from the FD (i.e. run the job)
69 * - Return when the connection is terminated or
72 bool job_cmd(JCR *jcr)
77 BSOCK *dir = jcr->dir_bsock;
78 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
79 int JobType, level, spool_attributes, no_attributes, spool_data;
80 int write_part_after_job, PreferMountedVols;
85 * Get JobId and permissions from Director
87 Dmsg1(100, "<dird: %s", dir->msg);
88 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
90 &JobType, &level, fileset_name.c_str(), &no_attributes,
91 &spool_attributes, fileset_md5.c_str(), &spool_data,
92 &write_part_after_job, &PreferMountedVols);
94 pm_strcpy(jcr->errmsg, dir->msg);
95 dir->fsend(BAD_job, stat, jcr->errmsg);
96 Dmsg1(100, ">dird: %s", dir->msg);
97 set_jcr_job_status(jcr, JS_ErrorTerminated);
101 * Since this job could be rescheduled, we
102 * check to see if we have it already. If so
103 * free the old jcr and use the new one.
105 ojcr = get_jcr_by_full_name(job.c_str());
106 if (ojcr && !ojcr->authenticated) {
107 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
111 jcr->VolSessionId = newVolSessionId();
112 jcr->VolSessionTime = VolSessionTime;
113 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
114 unbash_spaces(job_name);
115 jcr->job_name = get_pool_memory(PM_NAME);
116 pm_strcpy(jcr->job_name, job_name);
117 unbash_spaces(client_name);
118 jcr->client_name = get_pool_memory(PM_NAME);
119 pm_strcpy(jcr->client_name, client_name);
120 unbash_spaces(fileset_name);
121 jcr->fileset_name = get_pool_memory(PM_NAME);
122 pm_strcpy(jcr->fileset_name, fileset_name);
123 jcr->JobType = JobType;
124 jcr->JobLevel = level;
125 jcr->no_attributes = no_attributes;
126 jcr->spool_attributes = spool_attributes;
127 jcr->spool_data = spool_data;
128 jcr->write_part_after_job = write_part_after_job;
129 jcr->fileset_md5 = get_pool_memory(PM_NAME);
130 pm_strcpy(jcr->fileset_md5, fileset_md5);
131 jcr->PreferMountedVols = PreferMountedVols;
133 jcr->authenticated = false;
136 * Pass back an authorization key for the File daemon
138 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
139 make_session_key(auth_key, seed, 1);
140 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
141 if (debug_level == 3) {
142 Dmsg2(000, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
144 Dmsg2(100, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
145 jcr->sd_auth_key = bstrdup(auth_key);
146 memset(auth_key, 0, sizeof(auth_key));
147 generate_daemon_event(jcr, "JobStart");
151 bool run_cmd(JCR *jcr)
155 struct timespec timeout;
159 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
160 /* The following jobs don't need the FD */
161 switch (jcr->JobType) {
165 jcr->authenticated = true;
170 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
171 dir_send_job_status(jcr);
173 gettimeofday(&tv, &tz);
174 timeout.tv_nsec = tv.tv_usec * 1000;
175 timeout.tv_sec = tv.tv_sec + me->client_wait;
177 if (debug_level == 3) {
178 Dmsg3(000, "%s waiting %d sec for FD to contact SD key=%s\n",
179 jcr->Job, (int)me->client_wait, jcr->sd_auth_key);
181 Dmsg3(100, "%s waiting %d sec for FD to contact SD key=%s\n",
182 jcr->Job, (int)me->client_wait, jcr->sd_auth_key);
186 * Wait for the File daemon to contact us to start the Job,
187 * when he does, we will be released, unless the 30 minutes
191 for ( ; !job_canceled(jcr); ) {
192 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
193 if (errstat == 0 || errstat == ETIMEDOUT) {
199 if (debug_level == 3) {
200 Dmsg0(000, "Zap sd_auth_key\n");
202 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
204 if (jcr->authenticated && !job_canceled(jcr)) {
205 Dmsg1(100, "Running job %s\n", jcr->Job);
206 run_job(jcr); /* Run the job */
212 * After receiving a connection (in dircmd.c) if it is
213 * from the File daemon, this routine is called.
215 void handle_filed_connection(BSOCK *fd, char *job_name)
219 bmicrosleep(0, 50000); /* wait 50 millisecs */
220 if (!(jcr=get_jcr_by_full_name(job_name))) {
221 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
222 Dmsg1(3, "**** Job \"%s\" not found", job_name);
226 jcr->file_bsock = fd;
227 jcr->file_bsock->set_jcr(jcr);
229 Dmsg1(110, "Found Job %s\n", job_name);
231 if (jcr->authenticated) {
232 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
233 (uint32_t)jcr->JobId, jcr->Job);
239 * Authenticate the File daemon
241 if (debug_level == 3) {
242 Dmsg2(000, "jid=%u sd_auth_key=%s\n", (uint32_t)jcr->JobId, jcr->sd_auth_key);
244 if (jcr->authenticated || !authenticate_filed(jcr)) {
245 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
246 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
247 if (debug_level == 3) {
248 Dmsg2(000, "**** Authentication failed jid=%u key=%s\n",
249 (uint32_t)jcr->JobId, jcr->sd_auth_key);
252 jcr->authenticated = true;
253 Dmsg2(110, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
256 if (!jcr->authenticated) {
257 set_jcr_job_status(jcr, JS_ErrorTerminated);
259 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
267 * Query Device command from Director
268 * Sends Storage Daemon's information on the device to the
269 * caller (presumably the Director).
270 * This command always returns "true" so that the line is
271 * not closed on an error.
274 bool query_cmd(JCR *jcr)
276 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
277 BSOCK *dir = jcr->dir_bsock;
279 AUTOCHANGER *changer;
282 Dmsg1(100, "Query_cmd: %s", dir->msg);
283 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
284 Dmsg1(100, "<dird: %s\n", dir->msg);
286 unbash_spaces(dev_name);
287 foreach_res(device, R_DEVICE) {
288 /* Find resource, and make sure we were able to open it */
289 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
291 device->dev = init_dev(jcr, device);
296 ok = dir_update_device(jcr, device->dev);
298 ok = dir->fsend(OK_query);
300 dir->fsend(NO_query);
305 foreach_res(changer, R_AUTOCHANGER) {
306 /* Find resource, and make sure we were able to open it */
307 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
308 if (!changer->device || changer->device->size() == 0) {
309 continue; /* no devices */
311 ok = dir_update_changer(jcr, changer);
313 ok = dir->fsend(OK_query);
315 dir->fsend(NO_query);
320 /* If we get here, the device/autochanger was not found */
321 unbash_spaces(dir->msg);
322 pm_strcpy(jcr->errmsg, dir->msg);
323 dir->fsend(NO_device, dev_name.c_str());
324 Dmsg1(100, ">dird: %s\n", dir->msg);
326 unbash_spaces(dir->msg);
327 pm_strcpy(jcr->errmsg, dir->msg);
328 dir->fsend(BAD_query, jcr->errmsg);
329 Dmsg1(100, ">dird: %s\n", dir->msg);
339 * Destroy the Job Control Record and associated
340 * resources (sockets).
342 void stored_free_jcr(JCR *jcr)
344 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
345 if (jcr->file_bsock) {
346 jcr->file_bsock->close();
347 jcr->file_bsock = NULL;
350 free_pool_memory(jcr->job_name);
352 if (jcr->client_name) {
353 free_memory(jcr->client_name);
354 jcr->client_name = NULL;
356 if (jcr->fileset_name) {
357 free_memory(jcr->fileset_name);
359 if (jcr->fileset_md5) {
360 free_memory(jcr->fileset_md5);
366 if (jcr->RestoreBootstrap) {
367 unlink(jcr->RestoreBootstrap);
368 free_pool_memory(jcr->RestoreBootstrap);
369 jcr->RestoreBootstrap = NULL;
371 if (jcr->next_dev || jcr->prev_dev) {
372 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
374 pthread_cond_destroy(&jcr->job_start_wait);
380 /* Avoid a double free */
381 if (jcr->dcr == jcr->read_dcr) {
382 jcr->read_dcr = NULL;
389 free_dcr(jcr->read_dcr);
390 jcr->read_dcr = NULL;
393 if (jcr->read_store) {
395 foreach_alist(store, jcr->read_store) {
396 delete store->device;
399 delete jcr->read_store;
400 jcr->read_store = NULL;
402 if (jcr->write_store) {
404 foreach_alist(store, jcr->write_store) {
405 delete store->device;
408 delete jcr->write_store;
409 jcr->write_store = NULL;