2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
40 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
42 /* Imported variables */
43 extern uint32_t VolSessionTime;
45 /* Imported functions */
46 extern uint32_t newVolSessionId();
47 extern bool do_mac(JCR *jcr);
49 /* Requests from the Director daemon */
50 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
51 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
52 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s\n";
55 /* Responses sent to Director daemon */
56 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
57 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
58 //static char OK_query[] = "3001 OK query\n";
59 //static char NO_query[] = "3918 Query failed\n";
60 //static char BAD_query[] = "3917 Bad query command: %s\n";
63 * Director requests us to start a job
64 * Basic tasks done here:
65 * - We pickup the JobId to be run from the Director.
66 * - We pickup the device, media, and pool from the Director
67 * - Wait for a connection from the File Daemon (FD)
68 * - Accept commands from the FD (i.e. run the job)
69 * - Return when the connection is terminated or
72 bool job_cmd(JCR *jcr)
78 BSOCK *dir = jcr->dir_bsock;
79 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
80 int JobType, level, spool_attributes, no_attributes, spool_data;
81 int write_part_after_job, PreferMountedVols;
86 * Get JobId and permissions from Director
88 Dmsg1(100, "<dird: %s", dir->msg);
89 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
91 &JobType, &level, fileset_name.c_str(), &no_attributes,
92 &spool_attributes, fileset_md5.c_str(), &spool_data,
93 &write_part_after_job, &PreferMountedVols, spool_size);
95 pm_strcpy(jcr->errmsg, dir->msg);
96 dir->fsend(BAD_job, stat, jcr->errmsg);
97 Dmsg1(100, ">dird: %s", dir->msg);
98 set_jcr_job_status(jcr, JS_ErrorTerminated);
102 * Since this job could be rescheduled, we
103 * check to see if we have it already. If so
104 * free the old jcr and use the new one.
106 ojcr = get_jcr_by_full_name(job.c_str());
107 if (ojcr && !ojcr->authenticated) {
108 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
112 jcr->VolSessionId = newVolSessionId();
113 jcr->VolSessionTime = VolSessionTime;
114 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
115 unbash_spaces(job_name);
116 jcr->job_name = get_pool_memory(PM_NAME);
117 pm_strcpy(jcr->job_name, job_name);
118 unbash_spaces(client_name);
119 jcr->client_name = get_pool_memory(PM_NAME);
120 pm_strcpy(jcr->client_name, client_name);
121 unbash_spaces(fileset_name);
122 jcr->fileset_name = get_pool_memory(PM_NAME);
123 pm_strcpy(jcr->fileset_name, fileset_name);
124 jcr->JobType = JobType;
125 jcr->JobLevel = level;
126 jcr->no_attributes = no_attributes;
127 jcr->spool_attributes = spool_attributes;
128 jcr->spool_data = spool_data;
129 jcr->spool_size = str_to_int64(spool_size);
130 jcr->write_part_after_job = write_part_after_job;
131 jcr->fileset_md5 = get_pool_memory(PM_NAME);
132 pm_strcpy(jcr->fileset_md5, fileset_md5);
133 jcr->PreferMountedVols = PreferMountedVols;
135 jcr->authenticated = false;
138 * Pass back an authorization key for the File daemon
140 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
141 make_session_key(auth_key, seed, 1);
142 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
143 Dmsg2(100, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
144 jcr->sd_auth_key = bstrdup(auth_key);
145 memset(auth_key, 0, sizeof(auth_key));
146 generate_daemon_event(jcr, "JobStart");
150 bool run_cmd(JCR *jcr)
154 struct timespec timeout;
158 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
159 /* The following jobs don't need the FD */
160 switch (jcr->JobType) {
164 jcr->authenticated = true;
169 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
170 dir_send_job_status(jcr);
172 gettimeofday(&tv, &tz);
173 timeout.tv_nsec = tv.tv_usec * 1000;
174 timeout.tv_sec = tv.tv_sec + me->client_wait;
176 Dmsg3(050, "%s waiting %d sec for FD to contact SD key=%s\n",
177 jcr->Job, (int)me->client_wait, jcr->sd_auth_key);
180 * Wait for the File daemon to contact us to start the Job,
181 * when he does, we will be released, unless the 30 minutes
185 while ( !jcr->authenticated && !job_canceled(jcr) ) {
186 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
187 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
193 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
195 if (jcr->authenticated && !job_canceled(jcr)) {
196 Dmsg1(100, "Running job %s\n", jcr->Job);
197 run_job(jcr); /* Run the job */
203 * After receiving a connection (in dircmd.c) if it is
204 * from the File daemon, this routine is called.
206 void handle_filed_connection(BSOCK *fd, char *job_name)
210 bmicrosleep(0, 50000); /* wait 50 millisecs */
211 if (!(jcr=get_jcr_by_full_name(job_name))) {
212 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
213 Dmsg1(3, "**** Job \"%s\" not found", job_name);
217 jcr->file_bsock = fd;
218 jcr->file_bsock->set_jcr(jcr);
220 Dmsg1(110, "Found Job %s\n", job_name);
222 if (jcr->authenticated) {
223 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
224 (uint32_t)jcr->JobId, jcr->Job);
230 * Authenticate the File daemon
232 if (jcr->authenticated || !authenticate_filed(jcr)) {
233 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
234 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
236 jcr->authenticated = true;
237 Dmsg2(110, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
240 if (!jcr->authenticated) {
241 set_jcr_job_status(jcr, JS_ErrorTerminated);
243 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
251 * Query Device command from Director
252 * Sends Storage Daemon's information on the device to the
253 * caller (presumably the Director).
254 * This command always returns "true" so that the line is
255 * not closed on an error.
258 bool query_cmd(JCR *jcr)
260 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
261 BSOCK *dir = jcr->dir_bsock;
263 AUTOCHANGER *changer;
266 Dmsg1(100, "Query_cmd: %s", dir->msg);
267 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
268 Dmsg1(100, "<dird: %s\n", dir->msg);
270 unbash_spaces(dev_name);
271 foreach_res(device, R_DEVICE) {
272 /* Find resource, and make sure we were able to open it */
273 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
275 device->dev = init_dev(jcr, device);
280 ok = dir_update_device(jcr, device->dev);
282 ok = dir->fsend(OK_query);
284 dir->fsend(NO_query);
289 foreach_res(changer, R_AUTOCHANGER) {
290 /* Find resource, and make sure we were able to open it */
291 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
292 if (!changer->device || changer->device->size() == 0) {
293 continue; /* no devices */
295 ok = dir_update_changer(jcr, changer);
297 ok = dir->fsend(OK_query);
299 dir->fsend(NO_query);
304 /* If we get here, the device/autochanger was not found */
305 unbash_spaces(dir->msg);
306 pm_strcpy(jcr->errmsg, dir->msg);
307 dir->fsend(NO_device, dev_name.c_str());
308 Dmsg1(100, ">dird: %s\n", dir->msg);
310 unbash_spaces(dir->msg);
311 pm_strcpy(jcr->errmsg, dir->msg);
312 dir->fsend(BAD_query, jcr->errmsg);
313 Dmsg1(100, ">dird: %s\n", dir->msg);
323 * Destroy the Job Control Record and associated
324 * resources (sockets).
326 void stored_free_jcr(JCR *jcr)
328 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
329 if (jcr->file_bsock) {
330 jcr->file_bsock->close();
331 jcr->file_bsock = NULL;
334 free_pool_memory(jcr->job_name);
336 if (jcr->client_name) {
337 free_memory(jcr->client_name);
338 jcr->client_name = NULL;
340 if (jcr->fileset_name) {
341 free_memory(jcr->fileset_name);
343 if (jcr->fileset_md5) {
344 free_memory(jcr->fileset_md5);
350 if (jcr->RestoreBootstrap) {
351 unlink(jcr->RestoreBootstrap);
352 free_pool_memory(jcr->RestoreBootstrap);
353 jcr->RestoreBootstrap = NULL;
355 if (jcr->next_dev || jcr->prev_dev) {
356 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
358 pthread_cond_destroy(&jcr->job_start_wait);
364 /* Avoid a double free */
365 if (jcr->dcr == jcr->read_dcr) {
366 jcr->read_dcr = NULL;
373 free_dcr(jcr->read_dcr);
374 jcr->read_dcr = NULL;
377 if (jcr->read_store) {
379 foreach_alist(store, jcr->read_store) {
380 delete store->device;
383 delete jcr->read_store;
384 jcr->read_store = NULL;
386 if (jcr->write_store) {
388 foreach_alist(store, jcr->write_store) {
389 delete store->device;
392 delete jcr->write_store;
393 jcr->write_store = NULL;