2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
40 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
42 /* Imported variables */
43 extern uint32_t VolSessionTime;
45 /* Imported functions */
46 extern uint32_t newVolSessionId();
47 extern bool do_mac(JCR *jcr);
49 /* Requests from the Director daemon */
50 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
51 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
52 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
55 /* Responses sent to Director daemon */
56 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
57 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
58 //static char OK_query[] = "3001 OK query\n";
59 //static char NO_query[] = "3918 Query failed\n";
60 //static char BAD_query[] = "3917 Bad query command: %s\n";
63 * Director requests us to start a job
64 * Basic tasks done here:
65 * - We pickup the JobId to be run from the Director.
66 * - We pickup the device, media, and pool from the Director
67 * - Wait for a connection from the File Daemon (FD)
68 * - Accept commands from the FD (i.e. run the job)
69 * - Return when the connection is terminated or
72 bool job_cmd(JCR *jcr)
77 BSOCK *dir = jcr->dir_bsock;
78 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
79 int JobType, level, spool_attributes, no_attributes, spool_data;
80 int write_part_after_job, PreferMountedVols;
85 * Get JobId and permissions from Director
87 Dmsg1(100, "<dird: %s", dir->msg);
88 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
90 &JobType, &level, fileset_name.c_str(), &no_attributes,
91 &spool_attributes, fileset_md5.c_str(), &spool_data,
92 &write_part_after_job, &PreferMountedVols);
94 pm_strcpy(jcr->errmsg, dir->msg);
95 dir->fsend(BAD_job, stat, jcr->errmsg);
96 Dmsg1(100, ">dird: %s", dir->msg);
97 set_jcr_job_status(jcr, JS_ErrorTerminated);
101 * Since this job could be rescheduled, we
102 * check to see if we have it already. If so
103 * free the old jcr and use the new one.
105 ojcr = get_jcr_by_full_name(job.c_str());
106 if (ojcr && !ojcr->authenticated) {
107 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
111 jcr->VolSessionId = newVolSessionId();
112 jcr->VolSessionTime = VolSessionTime;
113 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
114 unbash_spaces(job_name);
115 jcr->job_name = get_pool_memory(PM_NAME);
116 pm_strcpy(jcr->job_name, job_name);
117 unbash_spaces(client_name);
118 jcr->client_name = get_pool_memory(PM_NAME);
119 pm_strcpy(jcr->client_name, client_name);
120 unbash_spaces(fileset_name);
121 jcr->fileset_name = get_pool_memory(PM_NAME);
122 pm_strcpy(jcr->fileset_name, fileset_name);
123 jcr->JobType = JobType;
124 jcr->JobLevel = level;
125 jcr->no_attributes = no_attributes;
126 jcr->spool_attributes = spool_attributes;
127 jcr->spool_data = spool_data;
128 jcr->write_part_after_job = write_part_after_job;
129 jcr->fileset_md5 = get_pool_memory(PM_NAME);
130 pm_strcpy(jcr->fileset_md5, fileset_md5);
131 jcr->PreferMountedVols = PreferMountedVols;
133 jcr->authenticated = false;
136 * Pass back an authorization key for the File daemon
138 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
139 make_session_key(auth_key, seed, 1);
140 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
141 Dmsg2(100, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
142 jcr->sd_auth_key = bstrdup(auth_key);
143 memset(auth_key, 0, sizeof(auth_key));
144 generate_daemon_event(jcr, "JobStart");
148 bool run_cmd(JCR *jcr)
152 struct timespec timeout;
156 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
157 /* The following jobs don't need the FD */
158 switch (jcr->JobType) {
162 jcr->authenticated = true;
167 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
168 dir_send_job_status(jcr);
170 gettimeofday(&tv, &tz);
171 timeout.tv_nsec = tv.tv_usec * 1000;
172 timeout.tv_sec = tv.tv_sec + me->client_wait;
174 if (debug_level == 3) {
175 Dmsg3(000, "%s waiting %d sec for FD to contact SD key=%s\n",
176 jcr->Job, (int)me->client_wait, jcr->sd_auth_key);
178 Dmsg3(100, "%s waiting %d sec for FD to contact SD key=%s\n",
179 jcr->Job, (int)me->client_wait, jcr->sd_auth_key);
183 * Wait for the File daemon to contact us to start the Job,
184 * when he does, we will be released, unless the 30 minutes
188 for ( ; !job_canceled(jcr); ) {
189 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
190 if (errstat == 0 || errstat == ETIMEDOUT) {
196 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
198 if (jcr->authenticated && !job_canceled(jcr)) {
199 Dmsg1(100, "Running job %s\n", jcr->Job);
200 run_job(jcr); /* Run the job */
206 * After receiving a connection (in dircmd.c) if it is
207 * from the File daemon, this routine is called.
209 void handle_filed_connection(BSOCK *fd, char *job_name)
213 bmicrosleep(0, 50000); /* wait 50 millisecs */
214 if (!(jcr=get_jcr_by_full_name(job_name))) {
215 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
216 Dmsg1(3, "**** Job \"%s\" not found", job_name);
220 jcr->file_bsock = fd;
221 jcr->file_bsock->set_jcr(jcr);
223 Dmsg1(110, "Found Job %s\n", job_name);
225 if (jcr->authenticated) {
226 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
227 (uint32_t)jcr->JobId, jcr->Job);
233 * Authenticate the File daemon
235 if (jcr->authenticated || !authenticate_filed(jcr)) {
236 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
237 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
239 jcr->authenticated = true;
240 Dmsg2(110, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
243 if (!jcr->authenticated) {
244 set_jcr_job_status(jcr, JS_ErrorTerminated);
246 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
254 * Query Device command from Director
255 * Sends Storage Daemon's information on the device to the
256 * caller (presumably the Director).
257 * This command always returns "true" so that the line is
258 * not closed on an error.
261 bool query_cmd(JCR *jcr)
263 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
264 BSOCK *dir = jcr->dir_bsock;
266 AUTOCHANGER *changer;
269 Dmsg1(100, "Query_cmd: %s", dir->msg);
270 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
271 Dmsg1(100, "<dird: %s\n", dir->msg);
273 unbash_spaces(dev_name);
274 foreach_res(device, R_DEVICE) {
275 /* Find resource, and make sure we were able to open it */
276 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
278 device->dev = init_dev(jcr, device);
283 ok = dir_update_device(jcr, device->dev);
285 ok = dir->fsend(OK_query);
287 dir->fsend(NO_query);
292 foreach_res(changer, R_AUTOCHANGER) {
293 /* Find resource, and make sure we were able to open it */
294 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
295 if (!changer->device || changer->device->size() == 0) {
296 continue; /* no devices */
298 ok = dir_update_changer(jcr, changer);
300 ok = dir->fsend(OK_query);
302 dir->fsend(NO_query);
307 /* If we get here, the device/autochanger was not found */
308 unbash_spaces(dir->msg);
309 pm_strcpy(jcr->errmsg, dir->msg);
310 dir->fsend(NO_device, dev_name.c_str());
311 Dmsg1(100, ">dird: %s\n", dir->msg);
313 unbash_spaces(dir->msg);
314 pm_strcpy(jcr->errmsg, dir->msg);
315 dir->fsend(BAD_query, jcr->errmsg);
316 Dmsg1(100, ">dird: %s\n", dir->msg);
326 * Destroy the Job Control Record and associated
327 * resources (sockets).
329 void stored_free_jcr(JCR *jcr)
331 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
332 if (jcr->file_bsock) {
333 jcr->file_bsock->close();
334 jcr->file_bsock = NULL;
337 free_pool_memory(jcr->job_name);
339 if (jcr->client_name) {
340 free_memory(jcr->client_name);
341 jcr->client_name = NULL;
343 if (jcr->fileset_name) {
344 free_memory(jcr->fileset_name);
346 if (jcr->fileset_md5) {
347 free_memory(jcr->fileset_md5);
353 if (jcr->RestoreBootstrap) {
354 unlink(jcr->RestoreBootstrap);
355 free_pool_memory(jcr->RestoreBootstrap);
356 jcr->RestoreBootstrap = NULL;
358 if (jcr->next_dev || jcr->prev_dev) {
359 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
361 pthread_cond_destroy(&jcr->job_start_wait);
367 /* Avoid a double free */
368 if (jcr->dcr == jcr->read_dcr) {
369 jcr->read_dcr = NULL;
376 free_dcr(jcr->read_dcr);
377 jcr->read_dcr = NULL;
380 if (jcr->read_store) {
382 foreach_alist(store, jcr->read_store) {
383 delete store->device;
386 delete jcr->read_store;
387 jcr->read_store = NULL;
389 if (jcr->write_store) {
391 foreach_alist(store, jcr->write_store) {
392 delete store->device;
395 delete jcr->write_store;
396 jcr->write_store = NULL;