2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
40 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
42 /* Imported variables */
43 extern uint32_t VolSessionTime;
45 /* Imported functions */
46 extern uint32_t newVolSessionId();
47 extern bool do_mac(JCR *jcr);
49 /* Requests from the Director daemon */
50 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
51 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
52 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
55 /* Responses sent to Director daemon */
56 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
57 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
58 //static char OK_query[] = "3001 OK query\n";
59 //static char NO_query[] = "3918 Query failed\n";
60 //static char BAD_query[] = "3917 Bad query command: %s\n";
63 * Director requests us to start a job
64 * Basic tasks done here:
65 * - We pickup the JobId to be run from the Director.
66 * - We pickup the device, media, and pool from the Director
67 * - Wait for a connection from the File Daemon (FD)
68 * - Accept commands from the FD (i.e. run the job)
69 * - Return when the connection is terminated or
72 bool job_cmd(JCR *jcr)
77 BSOCK *dir = jcr->dir_bsock;
78 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
79 int JobType, level, spool_attributes, no_attributes, spool_data;
80 int write_part_after_job, PreferMountedVols;
85 * Get JobId and permissions from Director
87 Dmsg1(100, "<dird: %s", dir->msg);
88 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
90 &JobType, &level, fileset_name.c_str(), &no_attributes,
91 &spool_attributes, fileset_md5.c_str(), &spool_data,
92 &write_part_after_job, &PreferMountedVols);
94 pm_strcpy(jcr->errmsg, dir->msg);
95 dir->fsend(BAD_job, stat, jcr->errmsg);
96 Dmsg1(100, ">dird: %s", dir->msg);
97 set_jcr_job_status(jcr, JS_ErrorTerminated);
101 * Since this job could be rescheduled, we
102 * check to see if we have it already. If so
103 * free the old jcr and use the new one.
105 ojcr = get_jcr_by_full_name(job.c_str());
106 if (ojcr && !ojcr->authenticated) {
107 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
111 jcr->VolSessionId = newVolSessionId();
112 jcr->VolSessionTime = VolSessionTime;
113 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
114 unbash_spaces(job_name);
115 jcr->job_name = get_pool_memory(PM_NAME);
116 pm_strcpy(jcr->job_name, job_name);
117 unbash_spaces(client_name);
118 jcr->client_name = get_pool_memory(PM_NAME);
119 pm_strcpy(jcr->client_name, client_name);
120 unbash_spaces(fileset_name);
121 jcr->fileset_name = get_pool_memory(PM_NAME);
122 pm_strcpy(jcr->fileset_name, fileset_name);
123 jcr->JobType = JobType;
124 jcr->JobLevel = level;
125 jcr->no_attributes = no_attributes;
126 jcr->spool_attributes = spool_attributes;
127 jcr->spool_data = spool_data;
128 jcr->write_part_after_job = write_part_after_job;
129 jcr->fileset_md5 = get_pool_memory(PM_NAME);
130 pm_strcpy(jcr->fileset_md5, fileset_md5);
131 jcr->PreferMountedVols = PreferMountedVols;
133 jcr->authenticated = false;
136 * Pass back an authorization key for the File daemon
138 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
139 make_session_key(auth_key, seed, 1);
140 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
141 Dmsg2(100, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
142 jcr->sd_auth_key = bstrdup(auth_key);
143 memset(auth_key, 0, sizeof(auth_key));
144 generate_daemon_event(jcr, "JobStart");
148 bool run_cmd(JCR *jcr)
152 struct timespec timeout;
156 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
157 /* The following jobs don't need the FD */
158 switch (jcr->JobType) {
162 jcr->authenticated = true;
167 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
168 dir_send_job_status(jcr);
170 gettimeofday(&tv, &tz);
171 timeout.tv_nsec = tv.tv_usec * 1000;
172 timeout.tv_sec = tv.tv_sec + me->client_wait;
174 Dmsg3(050, "%s waiting %d sec for FD to contact SD key=%s\n",
175 jcr->Job, (int)me->client_wait, jcr->sd_auth_key);
178 * Wait for the File daemon to contact us to start the Job,
179 * when he does, we will be released, unless the 30 minutes
183 while ( !jcr->authenticated && !job_canceled(jcr) ) {
184 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
185 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
191 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
193 if (jcr->authenticated && !job_canceled(jcr)) {
194 Dmsg1(100, "Running job %s\n", jcr->Job);
195 run_job(jcr); /* Run the job */
201 * After receiving a connection (in dircmd.c) if it is
202 * from the File daemon, this routine is called.
204 void handle_filed_connection(BSOCK *fd, char *job_name)
208 bmicrosleep(0, 50000); /* wait 50 millisecs */
209 if (!(jcr=get_jcr_by_full_name(job_name))) {
210 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
211 Dmsg1(3, "**** Job \"%s\" not found", job_name);
215 jcr->file_bsock = fd;
216 jcr->file_bsock->set_jcr(jcr);
218 Dmsg1(110, "Found Job %s\n", job_name);
220 if (jcr->authenticated) {
221 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
222 (uint32_t)jcr->JobId, jcr->Job);
228 * Authenticate the File daemon
230 if (jcr->authenticated || !authenticate_filed(jcr)) {
231 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
232 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
234 jcr->authenticated = true;
235 Dmsg2(110, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
238 if (!jcr->authenticated) {
239 set_jcr_job_status(jcr, JS_ErrorTerminated);
241 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
249 * Query Device command from Director
250 * Sends Storage Daemon's information on the device to the
251 * caller (presumably the Director).
252 * This command always returns "true" so that the line is
253 * not closed on an error.
256 bool query_cmd(JCR *jcr)
258 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
259 BSOCK *dir = jcr->dir_bsock;
261 AUTOCHANGER *changer;
264 Dmsg1(100, "Query_cmd: %s", dir->msg);
265 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
266 Dmsg1(100, "<dird: %s\n", dir->msg);
268 unbash_spaces(dev_name);
269 foreach_res(device, R_DEVICE) {
270 /* Find resource, and make sure we were able to open it */
271 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
273 device->dev = init_dev(jcr, device);
278 ok = dir_update_device(jcr, device->dev);
280 ok = dir->fsend(OK_query);
282 dir->fsend(NO_query);
287 foreach_res(changer, R_AUTOCHANGER) {
288 /* Find resource, and make sure we were able to open it */
289 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
290 if (!changer->device || changer->device->size() == 0) {
291 continue; /* no devices */
293 ok = dir_update_changer(jcr, changer);
295 ok = dir->fsend(OK_query);
297 dir->fsend(NO_query);
302 /* If we get here, the device/autochanger was not found */
303 unbash_spaces(dir->msg);
304 pm_strcpy(jcr->errmsg, dir->msg);
305 dir->fsend(NO_device, dev_name.c_str());
306 Dmsg1(100, ">dird: %s\n", dir->msg);
308 unbash_spaces(dir->msg);
309 pm_strcpy(jcr->errmsg, dir->msg);
310 dir->fsend(BAD_query, jcr->errmsg);
311 Dmsg1(100, ">dird: %s\n", dir->msg);
321 * Destroy the Job Control Record and associated
322 * resources (sockets).
324 void stored_free_jcr(JCR *jcr)
326 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
327 if (jcr->file_bsock) {
328 jcr->file_bsock->close();
329 jcr->file_bsock = NULL;
332 free_pool_memory(jcr->job_name);
334 if (jcr->client_name) {
335 free_memory(jcr->client_name);
336 jcr->client_name = NULL;
338 if (jcr->fileset_name) {
339 free_memory(jcr->fileset_name);
341 if (jcr->fileset_md5) {
342 free_memory(jcr->fileset_md5);
348 if (jcr->RestoreBootstrap) {
349 unlink(jcr->RestoreBootstrap);
350 free_pool_memory(jcr->RestoreBootstrap);
351 jcr->RestoreBootstrap = NULL;
353 if (jcr->next_dev || jcr->prev_dev) {
354 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
356 pthread_cond_destroy(&jcr->job_start_wait);
362 /* Avoid a double free */
363 if (jcr->dcr == jcr->read_dcr) {
364 jcr->read_dcr = NULL;
371 free_dcr(jcr->read_dcr);
372 jcr->read_dcr = NULL;
375 if (jcr->read_store) {
377 foreach_alist(store, jcr->read_store) {
378 delete store->device;
381 delete jcr->read_store;
382 jcr->read_store = NULL;
384 if (jcr->write_store) {
386 foreach_alist(store, jcr->write_store) {
387 delete store->device;
390 delete jcr->write_store;
391 jcr->write_store = NULL;