2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2008 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
40 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
42 /* Imported variables */
43 extern uint32_t VolSessionTime;
45 /* Imported functions */
46 extern uint32_t newVolSessionId();
47 extern bool do_mac(JCR *jcr);
49 /* Requests from the Director daemon */
50 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
51 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
52 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s\n";
53 static char oldjobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
54 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
55 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
59 /* Responses sent to Director daemon */
60 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
61 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
62 //static char OK_query[] = "3001 OK query\n";
63 //static char NO_query[] = "3918 Query failed\n";
64 //static char BAD_query[] = "3917 Bad query command: %s\n";
67 * Director requests us to start a job
68 * Basic tasks done here:
69 * - We pickup the JobId to be run from the Director.
70 * - We pickup the device, media, and pool from the Director
71 * - Wait for a connection from the File Daemon (FD)
72 * - Accept commands from the FD (i.e. run the job)
73 * - Return when the connection is terminated or
76 bool job_cmd(JCR *jcr)
82 BSOCK *dir = jcr->dir_bsock;
83 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
84 int JobType, level, spool_attributes, no_attributes, spool_data;
85 int write_part_after_job, PreferMountedVols;
90 * Get JobId and permissions from Director
92 Dmsg1(100, "<dird: %s", dir->msg);
93 bstrncpy(spool_size, "0", sizeof(spool_size));
94 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
96 &JobType, &level, fileset_name.c_str(), &no_attributes,
97 &spool_attributes, fileset_md5.c_str(), &spool_data,
98 &write_part_after_job, &PreferMountedVols, spool_size);
100 /* Try old version */
101 stat = sscanf(dir->msg, oldjobcmd, &JobId, job.c_str(), job_name.c_str(),
103 &JobType, &level, fileset_name.c_str(), &no_attributes,
104 &spool_attributes, fileset_md5.c_str(), &spool_data,
105 &write_part_after_job, &PreferMountedVols);
107 pm_strcpy(jcr->errmsg, dir->msg);
108 dir->fsend(BAD_job, stat, jcr->errmsg);
109 Dmsg1(100, ">dird: %s", dir->msg);
110 set_jcr_job_status(jcr, JS_ErrorTerminated);
115 * Since this job could be rescheduled, we
116 * check to see if we have it already. If so
117 * free the old jcr and use the new one.
119 ojcr = get_jcr_by_full_name(job.c_str());
120 if (ojcr && !ojcr->authenticated) {
121 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
125 jcr->VolSessionId = newVolSessionId();
126 jcr->VolSessionTime = VolSessionTime;
127 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
128 unbash_spaces(job_name);
129 jcr->job_name = get_pool_memory(PM_NAME);
130 pm_strcpy(jcr->job_name, job_name);
131 unbash_spaces(client_name);
132 jcr->client_name = get_pool_memory(PM_NAME);
133 pm_strcpy(jcr->client_name, client_name);
134 unbash_spaces(fileset_name);
135 jcr->fileset_name = get_pool_memory(PM_NAME);
136 pm_strcpy(jcr->fileset_name, fileset_name);
137 jcr->JobType = JobType;
138 jcr->JobLevel = level;
139 jcr->no_attributes = no_attributes;
140 jcr->spool_attributes = spool_attributes;
141 jcr->spool_data = spool_data;
142 jcr->spool_size = str_to_int64(spool_size);
143 jcr->write_part_after_job = write_part_after_job;
144 jcr->fileset_md5 = get_pool_memory(PM_NAME);
145 pm_strcpy(jcr->fileset_md5, fileset_md5);
146 jcr->PreferMountedVols = PreferMountedVols;
148 jcr->authenticated = false;
151 * Pass back an authorization key for the File daemon
153 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
154 make_session_key(auth_key, seed, 1);
155 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
156 Dmsg2(100, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
157 jcr->sd_auth_key = bstrdup(auth_key);
158 memset(auth_key, 0, sizeof(auth_key));
159 generate_daemon_event(jcr, "JobStart");
163 bool run_cmd(JCR *jcr)
167 struct timespec timeout;
171 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
172 /* The following jobs don't need the FD */
173 switch (jcr->JobType) {
177 jcr->authenticated = true;
182 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
183 dir_send_job_status(jcr);
185 gettimeofday(&tv, &tz);
186 timeout.tv_nsec = tv.tv_usec * 1000;
187 timeout.tv_sec = tv.tv_sec + me->client_wait;
189 Dmsg3(050, "%s waiting %d sec for FD to contact SD key=%s\n",
190 jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
193 * Wait for the File daemon to contact us to start the Job,
194 * when he does, we will be released, unless the 30 minutes
198 while ( !jcr->authenticated && !job_canceled(jcr) ) {
199 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
200 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
204 Dmsg3(100, "Auth=%d canceled=%d errstat=%d\n", jcr->authenticated,
205 job_canceled(jcr), errstat);
208 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
210 if (jcr->authenticated && !job_canceled(jcr)) {
211 Dmsg1(100, "Running job %s\n", jcr->Job);
212 run_job(jcr); /* Run the job */
218 * After receiving a connection (in dircmd.c) if it is
219 * from the File daemon, this routine is called.
221 void handle_filed_connection(BSOCK *fd, char *job_name)
226 * With the following bmicrosleep on, running the
227 * SD under the debugger fails.
229 // bmicrosleep(0, 50000); /* wait 50 millisecs */
230 if (!(jcr=get_jcr_by_full_name(job_name))) {
231 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
232 Dmsg1(3, "**** Job \"%s\" not found.\n", job_name);
236 jcr->file_bsock = fd;
237 jcr->file_bsock->set_jcr(jcr);
239 Dmsg1(110, "Found Job %s\n", job_name);
241 if (jcr->authenticated) {
242 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
243 (uint32_t)jcr->JobId, jcr->Job);
249 * Authenticate the File daemon
251 if (jcr->authenticated || !authenticate_filed(jcr)) {
252 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
253 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
255 jcr->authenticated = true;
256 Dmsg2(110, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
259 if (!jcr->authenticated) {
260 set_jcr_job_status(jcr, JS_ErrorTerminated);
262 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
270 * Query Device command from Director
271 * Sends Storage Daemon's information on the device to the
272 * caller (presumably the Director).
273 * This command always returns "true" so that the line is
274 * not closed on an error.
277 bool query_cmd(JCR *jcr)
279 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
280 BSOCK *dir = jcr->dir_bsock;
282 AUTOCHANGER *changer;
285 Dmsg1(100, "Query_cmd: %s", dir->msg);
286 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
287 Dmsg1(100, "<dird: %s\n", dir->msg);
289 unbash_spaces(dev_name);
290 foreach_res(device, R_DEVICE) {
291 /* Find resource, and make sure we were able to open it */
292 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
294 device->dev = init_dev(jcr, device);
299 ok = dir_update_device(jcr, device->dev);
301 ok = dir->fsend(OK_query);
303 dir->fsend(NO_query);
308 foreach_res(changer, R_AUTOCHANGER) {
309 /* Find resource, and make sure we were able to open it */
310 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
311 if (!changer->device || changer->device->size() == 0) {
312 continue; /* no devices */
314 ok = dir_update_changer(jcr, changer);
316 ok = dir->fsend(OK_query);
318 dir->fsend(NO_query);
323 /* If we get here, the device/autochanger was not found */
324 unbash_spaces(dir->msg);
325 pm_strcpy(jcr->errmsg, dir->msg);
326 dir->fsend(NO_device, dev_name.c_str());
327 Dmsg1(100, ">dird: %s\n", dir->msg);
329 unbash_spaces(dir->msg);
330 pm_strcpy(jcr->errmsg, dir->msg);
331 dir->fsend(BAD_query, jcr->errmsg);
332 Dmsg1(100, ">dird: %s\n", dir->msg);
342 * Destroy the Job Control Record and associated
343 * resources (sockets).
345 void stored_free_jcr(JCR *jcr)
347 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
348 if (jcr->file_bsock) {
349 jcr->file_bsock->close();
350 jcr->file_bsock = NULL;
353 free_pool_memory(jcr->job_name);
355 if (jcr->client_name) {
356 free_memory(jcr->client_name);
357 jcr->client_name = NULL;
359 if (jcr->fileset_name) {
360 free_memory(jcr->fileset_name);
362 if (jcr->fileset_md5) {
363 free_memory(jcr->fileset_md5);
369 if (jcr->RestoreBootstrap) {
370 unlink(jcr->RestoreBootstrap);
371 free_pool_memory(jcr->RestoreBootstrap);
372 jcr->RestoreBootstrap = NULL;
374 if (jcr->next_dev || jcr->prev_dev) {
375 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
377 pthread_cond_destroy(&jcr->job_start_wait);
383 /* Avoid a double free */
384 if (jcr->dcr == jcr->read_dcr) {
385 jcr->read_dcr = NULL;
392 free_dcr(jcr->read_dcr);
393 jcr->read_dcr = NULL;
396 if (jcr->read_store) {
398 foreach_alist(store, jcr->read_store) {
399 delete store->device;
402 delete jcr->read_store;
403 jcr->read_store = NULL;
405 if (jcr->write_store) {
407 foreach_alist(store, jcr->write_store) {
408 delete store->device;
411 delete jcr->write_store;
412 jcr->write_store = NULL;
417 write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));