2 * Job control and execution for Storage Daemon
10 Bacula® - The Network Backup Solution
12 Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
14 The main author of Bacula is Kern Sibbald, with contributions from
15 many others, a complete list can be found in the file AUTHORS.
16 This program is Free Software; you can redistribute it and/or
17 modify it under the terms of version two of the GNU General Public
18 License as published by the Free Software Foundation plus additions
19 that are listed in the file LICENSE.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 Bacula® is a registered trademark of John Walker.
32 The licensor of Bacula is the Free Software Foundation Europe
33 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
34 Switzerland, email:ftf@fsfeurope.org.
40 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
42 /* Imported variables */
43 extern uint32_t VolSessionTime;
45 /* Imported functions */
46 extern uint32_t newVolSessionId();
47 extern bool do_mac(JCR *jcr);
49 /* Requests from the Director daemon */
50 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
51 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
52 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
55 /* Responses sent to Director daemon */
56 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
57 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
58 //static char OK_query[] = "3001 OK query\n";
59 //static char NO_query[] = "3918 Query failed\n";
60 //static char BAD_query[] = "3917 Bad query command: %s\n";
63 * Director requests us to start a job
64 * Basic tasks done here:
65 * - We pickup the JobId to be run from the Director.
66 * - We pickup the device, media, and pool from the Director
67 * - Wait for a connection from the File Daemon (FD)
68 * - Accept commands from the FD (i.e. run the job)
69 * - Return when the connection is terminated or
72 bool job_cmd(JCR *jcr)
76 BSOCK *dir = jcr->dir_bsock;
77 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
78 int JobType, level, spool_attributes, no_attributes, spool_data;
79 int write_part_after_job, PreferMountedVols;
84 * Get JobId and permissions from Director
86 Dmsg1(100, "<dird: %s", dir->msg);
87 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
89 &JobType, &level, fileset_name.c_str(), &no_attributes,
90 &spool_attributes, fileset_md5.c_str(), &spool_data,
91 &write_part_after_job, &PreferMountedVols);
93 pm_strcpy(jcr->errmsg, dir->msg);
94 bnet_fsend(dir, BAD_job, stat, jcr->errmsg);
95 Dmsg1(100, ">dird: %s", dir->msg);
96 set_jcr_job_status(jcr, JS_ErrorTerminated);
100 * Since this job could be rescheduled, we
101 * check to see if we have it already. If so
102 * free the old jcr and use the new one.
104 ojcr = get_jcr_by_full_name(job.c_str());
105 if (ojcr && !ojcr->authenticated) {
106 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
110 jcr->VolSessionId = newVolSessionId();
111 jcr->VolSessionTime = VolSessionTime;
112 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
113 unbash_spaces(job_name);
114 jcr->job_name = get_pool_memory(PM_NAME);
115 pm_strcpy(jcr->job_name, job_name);
116 unbash_spaces(client_name);
117 jcr->client_name = get_pool_memory(PM_NAME);
118 pm_strcpy(jcr->client_name, client_name);
119 unbash_spaces(fileset_name);
120 jcr->fileset_name = get_pool_memory(PM_NAME);
121 pm_strcpy(jcr->fileset_name, fileset_name);
122 jcr->JobType = JobType;
123 jcr->JobLevel = level;
124 jcr->no_attributes = no_attributes;
125 jcr->spool_attributes = spool_attributes;
126 jcr->spool_data = spool_data;
127 jcr->write_part_after_job = write_part_after_job;
128 jcr->fileset_md5 = get_pool_memory(PM_NAME);
129 pm_strcpy(jcr->fileset_md5, fileset_md5);
130 jcr->PreferMountedVols = PreferMountedVols;
132 jcr->authenticated = false;
135 * Pass back an authorization key for the File daemon
137 make_session_key(auth_key, NULL, 1);
138 bnet_fsend(dir, OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
139 Dmsg1(100, ">dird: %s", dir->msg);
140 jcr->sd_auth_key = bstrdup(auth_key);
141 memset(auth_key, 0, sizeof(auth_key));
142 generate_daemon_event(jcr, "JobStart");
146 bool run_cmd(JCR *jcr)
150 struct timespec timeout;
153 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
154 /* The following jobs don't need the FD */
155 switch (jcr->JobType) {
159 jcr->authenticated = true;
164 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
165 dir_send_job_status(jcr);
167 gettimeofday(&tv, &tz);
168 timeout.tv_nsec = tv.tv_usec * 1000;
169 timeout.tv_sec = tv.tv_sec + 30 * 60; /* wait 30 minutes */
171 Dmsg1(100, "%s waiting on FD to contact SD\n", jcr->Job);
173 * Wait for the File daemon to contact us to start the Job,
174 * when he does, we will be released, unless the 30 minutes
178 for ( ;!job_canceled(jcr); ) {
179 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
180 if (errstat == 0 || errstat == ETIMEDOUT) {
186 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
188 if (jcr->authenticated && !job_canceled(jcr)) {
189 Dmsg1(100, "Running job %s\n", jcr->Job);
190 run_job(jcr); /* Run the job */
196 * After receiving a connection (in job.c) if it is
197 * from the File daemon, this routine is called.
199 void handle_filed_connection(BSOCK *fd, char *job_name)
203 bmicrosleep(0, 50000); /* wait 50 millisecs */
204 if (!(jcr=get_jcr_by_full_name(job_name))) {
205 Jmsg1(NULL, M_FATAL, 0, _("Job name not found: %s\n"), job_name);
206 Dmsg1(100, "Job name not found: %s\n", job_name);
210 jcr->file_bsock = fd;
211 jcr->file_bsock->set_jcr(jcr);
213 Dmsg1(110, "Found Job %s\n", job_name);
215 if (jcr->authenticated) {
216 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
217 jcr->JobId, jcr->Job);
223 * Authenticate the File daemon
225 if (jcr->authenticated || !authenticate_filed(jcr)) {
226 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
227 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
229 jcr->authenticated = true;
230 Dmsg1(110, "OK Authentication Job %s\n", jcr->Job);
233 if (!jcr->authenticated) {
234 set_jcr_job_status(jcr, JS_ErrorTerminated);
236 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
244 * Query Device command from Director
245 * Sends Storage Daemon's information on the device to the
246 * caller (presumably the Director).
247 * This command always returns "true" so that the line is
248 * not closed on an error.
251 bool query_cmd(JCR *jcr)
253 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
254 BSOCK *dir = jcr->dir_bsock;
256 AUTOCHANGER *changer;
259 Dmsg1(100, "Query_cmd: %s", dir->msg);
260 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
261 Dmsg1(100, "<dird: %s\n", dir->msg);
263 unbash_spaces(dev_name);
264 foreach_res(device, R_DEVICE) {
265 /* Find resource, and make sure we were able to open it */
266 if (fnmatch(dev_name.c_str(), device->hdr.name, 0) == 0) {
268 device->dev = init_dev(jcr, device);
273 ok = dir_update_device(jcr, device->dev);
275 ok = bnet_fsend(dir, OK_query);
277 bnet_fsend(dir, NO_query);
282 foreach_res(changer, R_AUTOCHANGER) {
283 /* Find resource, and make sure we were able to open it */
284 if (fnmatch(dev_name.c_str(), changer->hdr.name, 0) == 0) {
285 if (!changer->device || changer->device->size() == 0) {
286 continue; /* no devices */
288 ok = dir_update_changer(jcr, changer);
290 ok = bnet_fsend(dir, OK_query);
292 bnet_fsend(dir, NO_query);
297 /* If we get here, the device/autochanger was not found */
298 unbash_spaces(dir->msg);
299 pm_strcpy(jcr->errmsg, dir->msg);
300 bnet_fsend(dir, NO_device, dev_name.c_str());
301 Dmsg1(100, ">dird: %s\n", dir->msg);
303 unbash_spaces(dir->msg);
304 pm_strcpy(jcr->errmsg, dir->msg);
305 bnet_fsend(dir, BAD_query, jcr->errmsg);
306 Dmsg1(100, ">dird: %s\n", dir->msg);
316 * Destroy the Job Control Record and associated
317 * resources (sockets).
319 void stored_free_jcr(JCR *jcr)
321 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
322 if (jcr->file_bsock) {
323 bnet_close(jcr->file_bsock);
324 jcr->file_bsock = NULL;
327 free_pool_memory(jcr->job_name);
329 if (jcr->client_name) {
330 free_memory(jcr->client_name);
331 jcr->client_name = NULL;
333 if (jcr->fileset_name) {
334 free_memory(jcr->fileset_name);
336 if (jcr->fileset_md5) {
337 free_memory(jcr->fileset_md5);
343 if (jcr->RestoreBootstrap) {
344 unlink(jcr->RestoreBootstrap);
345 free_pool_memory(jcr->RestoreBootstrap);
346 jcr->RestoreBootstrap = NULL;
348 if (jcr->next_dev || jcr->prev_dev) {
349 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
351 pthread_cond_destroy(&jcr->job_start_wait);
362 free_dcr(jcr->read_dcr);
363 jcr->read_dcr = NULL;
366 if (jcr->read_store) {
368 foreach_alist(store, jcr->read_store) {
369 delete store->device;
372 delete jcr->read_store;
373 jcr->read_store = NULL;
375 if (jcr->write_store) {
377 foreach_alist(store, jcr->write_store) {
378 delete store->device;
381 delete jcr->write_store;
382 jcr->write_store = NULL;