2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
40 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
42 /* Imported variables */
43 extern uint32_t VolSessionTime;
45 /* Imported functions */
46 extern uint32_t newVolSessionId();
47 extern bool do_mac(JCR *jcr);
49 /* Requests from the Director daemon */
50 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
51 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
52 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s\n";
53 static char oldjobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
54 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
55 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
59 /* Responses sent to Director daemon */
60 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
61 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
62 //static char OK_query[] = "3001 OK query\n";
63 //static char NO_query[] = "3918 Query failed\n";
64 //static char BAD_query[] = "3917 Bad query command: %s\n";
67 * Director requests us to start a job
68 * Basic tasks done here:
69 * - We pickup the JobId to be run from the Director.
70 * - We pickup the device, media, and pool from the Director
71 * - Wait for a connection from the File Daemon (FD)
72 * - Accept commands from the FD (i.e. run the job)
73 * - Return when the connection is terminated or
76 bool job_cmd(JCR *jcr)
82 BSOCK *dir = jcr->dir_bsock;
83 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
84 int JobType, level, spool_attributes, no_attributes, spool_data;
85 int write_part_after_job, PreferMountedVols;
90 * Get JobId and permissions from Director
92 Dmsg1(100, "<dird: %s", dir->msg);
93 bstrncpy(spool_size, "0", sizeof(spool_size));
94 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
96 &JobType, &level, fileset_name.c_str(), &no_attributes,
97 &spool_attributes, fileset_md5.c_str(), &spool_data,
98 &write_part_after_job, &PreferMountedVols, spool_size);
100 /* Try old version */
101 stat = sscanf(dir->msg, oldjobcmd, &JobId, job.c_str(), job_name.c_str(),
103 &JobType, &level, fileset_name.c_str(), &no_attributes,
104 &spool_attributes, fileset_md5.c_str(), &spool_data,
105 &write_part_after_job, &PreferMountedVols);
107 pm_strcpy(jcr->errmsg, dir->msg);
108 dir->fsend(BAD_job, stat, jcr->errmsg);
109 Dmsg1(100, ">dird: %s", dir->msg);
110 set_jcr_job_status(jcr, JS_ErrorTerminated);
115 * Since this job could be rescheduled, we
116 * check to see if we have it already. If so
117 * free the old jcr and use the new one.
119 ojcr = get_jcr_by_full_name(job.c_str());
120 if (ojcr && !ojcr->authenticated) {
121 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
125 jcr->VolSessionId = newVolSessionId();
126 jcr->VolSessionTime = VolSessionTime;
127 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
128 unbash_spaces(job_name);
129 jcr->job_name = get_pool_memory(PM_NAME);
130 pm_strcpy(jcr->job_name, job_name);
131 unbash_spaces(client_name);
132 jcr->client_name = get_pool_memory(PM_NAME);
133 pm_strcpy(jcr->client_name, client_name);
134 unbash_spaces(fileset_name);
135 jcr->fileset_name = get_pool_memory(PM_NAME);
136 pm_strcpy(jcr->fileset_name, fileset_name);
137 jcr->JobType = JobType;
138 jcr->JobLevel = level;
139 jcr->no_attributes = no_attributes;
140 jcr->spool_attributes = spool_attributes;
141 jcr->spool_data = spool_data;
142 jcr->spool_size = str_to_int64(spool_size);
143 jcr->write_part_after_job = write_part_after_job;
144 jcr->fileset_md5 = get_pool_memory(PM_NAME);
145 pm_strcpy(jcr->fileset_md5, fileset_md5);
146 jcr->PreferMountedVols = PreferMountedVols;
148 jcr->authenticated = false;
151 * Pass back an authorization key for the File daemon
153 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
154 make_session_key(auth_key, seed, 1);
155 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
156 Dmsg2(100, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
157 jcr->sd_auth_key = bstrdup(auth_key);
158 memset(auth_key, 0, sizeof(auth_key));
159 generate_daemon_event(jcr, "JobStart");
163 bool run_cmd(JCR *jcr)
167 struct timespec timeout;
171 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
172 /* The following jobs don't need the FD */
173 switch (jcr->JobType) {
177 jcr->authenticated = true;
182 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
183 dir_send_job_status(jcr);
185 gettimeofday(&tv, &tz);
186 timeout.tv_nsec = tv.tv_usec * 1000;
187 timeout.tv_sec = tv.tv_sec + me->client_wait;
189 Dmsg3(050, "%s waiting %d sec for FD to contact SD key=%s\n",
190 jcr->Job, (int)me->client_wait, jcr->sd_auth_key);
193 * Wait for the File daemon to contact us to start the Job,
194 * when he does, we will be released, unless the 30 minutes
198 while ( !jcr->authenticated && !job_canceled(jcr) ) {
199 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
200 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
206 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
208 if (jcr->authenticated && !job_canceled(jcr)) {
209 Dmsg1(100, "Running job %s\n", jcr->Job);
210 run_job(jcr); /* Run the job */
216 * After receiving a connection (in dircmd.c) if it is
217 * from the File daemon, this routine is called.
219 void handle_filed_connection(BSOCK *fd, char *job_name)
223 bmicrosleep(0, 50000); /* wait 50 millisecs */
224 if (!(jcr=get_jcr_by_full_name(job_name))) {
225 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
226 Dmsg1(3, "**** Job \"%s\" not found", job_name);
230 jcr->file_bsock = fd;
231 jcr->file_bsock->set_jcr(jcr);
233 Dmsg1(110, "Found Job %s\n", job_name);
235 if (jcr->authenticated) {
236 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
237 (uint32_t)jcr->JobId, jcr->Job);
243 * Authenticate the File daemon
245 if (jcr->authenticated || !authenticate_filed(jcr)) {
246 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
247 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
249 jcr->authenticated = true;
250 Dmsg2(110, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
253 if (!jcr->authenticated) {
254 set_jcr_job_status(jcr, JS_ErrorTerminated);
256 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
264 * Query Device command from Director
265 * Sends Storage Daemon's information on the device to the
266 * caller (presumably the Director).
267 * This command always returns "true" so that the line is
268 * not closed on an error.
271 bool query_cmd(JCR *jcr)
273 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
274 BSOCK *dir = jcr->dir_bsock;
276 AUTOCHANGER *changer;
279 Dmsg1(100, "Query_cmd: %s", dir->msg);
280 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
281 Dmsg1(100, "<dird: %s\n", dir->msg);
283 unbash_spaces(dev_name);
284 foreach_res(device, R_DEVICE) {
285 /* Find resource, and make sure we were able to open it */
286 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
288 device->dev = init_dev(jcr, device);
293 ok = dir_update_device(jcr, device->dev);
295 ok = dir->fsend(OK_query);
297 dir->fsend(NO_query);
302 foreach_res(changer, R_AUTOCHANGER) {
303 /* Find resource, and make sure we were able to open it */
304 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
305 if (!changer->device || changer->device->size() == 0) {
306 continue; /* no devices */
308 ok = dir_update_changer(jcr, changer);
310 ok = dir->fsend(OK_query);
312 dir->fsend(NO_query);
317 /* If we get here, the device/autochanger was not found */
318 unbash_spaces(dir->msg);
319 pm_strcpy(jcr->errmsg, dir->msg);
320 dir->fsend(NO_device, dev_name.c_str());
321 Dmsg1(100, ">dird: %s\n", dir->msg);
323 unbash_spaces(dir->msg);
324 pm_strcpy(jcr->errmsg, dir->msg);
325 dir->fsend(BAD_query, jcr->errmsg);
326 Dmsg1(100, ">dird: %s\n", dir->msg);
336 * Destroy the Job Control Record and associated
337 * resources (sockets).
339 void stored_free_jcr(JCR *jcr)
341 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
342 if (jcr->file_bsock) {
343 jcr->file_bsock->close();
344 jcr->file_bsock = NULL;
347 free_pool_memory(jcr->job_name);
349 if (jcr->client_name) {
350 free_memory(jcr->client_name);
351 jcr->client_name = NULL;
353 if (jcr->fileset_name) {
354 free_memory(jcr->fileset_name);
356 if (jcr->fileset_md5) {
357 free_memory(jcr->fileset_md5);
363 if (jcr->RestoreBootstrap) {
364 unlink(jcr->RestoreBootstrap);
365 free_pool_memory(jcr->RestoreBootstrap);
366 jcr->RestoreBootstrap = NULL;
368 if (jcr->next_dev || jcr->prev_dev) {
369 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
371 pthread_cond_destroy(&jcr->job_start_wait);
377 /* Avoid a double free */
378 if (jcr->dcr == jcr->read_dcr) {
379 jcr->read_dcr = NULL;
386 free_dcr(jcr->read_dcr);
387 jcr->read_dcr = NULL;
390 if (jcr->read_store) {
392 foreach_alist(store, jcr->read_store) {
393 delete store->device;
396 delete jcr->read_store;
397 jcr->read_store = NULL;
399 if (jcr->write_store) {
401 foreach_alist(store, jcr->write_store) {
402 delete store->device;
405 delete jcr->write_store;
406 jcr->write_store = NULL;