2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Job control and execution for Storage Daemon
40 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
42 /* Imported variables */
43 extern uint32_t VolSessionTime;
45 /* Imported functions */
46 extern uint32_t newVolSessionId();
47 extern bool do_mac(JCR *jcr);
49 /* Requests from the Director daemon */
50 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
51 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
52 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
55 /* Responses sent to Director daemon */
56 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
57 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
58 //static char OK_query[] = "3001 OK query\n";
59 //static char NO_query[] = "3918 Query failed\n";
60 //static char BAD_query[] = "3917 Bad query command: %s\n";
63 * Director requests us to start a job
64 * Basic tasks done here:
65 * - We pickup the JobId to be run from the Director.
66 * - We pickup the device, media, and pool from the Director
67 * - Wait for a connection from the File Daemon (FD)
68 * - Accept commands from the FD (i.e. run the job)
69 * - Return when the connection is terminated or
72 bool job_cmd(JCR *jcr)
77 BSOCK *dir = jcr->dir_bsock;
78 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
79 int JobType, level, spool_attributes, no_attributes, spool_data;
80 int write_part_after_job, PreferMountedVols;
85 * Get JobId and permissions from Director
87 Dmsg1(100, "<dird: %s", dir->msg);
88 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
90 &JobType, &level, fileset_name.c_str(), &no_attributes,
91 &spool_attributes, fileset_md5.c_str(), &spool_data,
92 &write_part_after_job, &PreferMountedVols);
94 pm_strcpy(jcr->errmsg, dir->msg);
95 dir->fsend(BAD_job, stat, jcr->errmsg);
96 Dmsg1(100, ">dird: %s", dir->msg);
97 set_jcr_job_status(jcr, JS_ErrorTerminated);
101 * Since this job could be rescheduled, we
102 * check to see if we have it already. If so
103 * free the old jcr and use the new one.
105 ojcr = get_jcr_by_full_name(job.c_str());
106 if (ojcr && !ojcr->authenticated) {
107 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
111 jcr->VolSessionId = newVolSessionId();
112 jcr->VolSessionTime = VolSessionTime;
113 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
114 unbash_spaces(job_name);
115 jcr->job_name = get_pool_memory(PM_NAME);
116 pm_strcpy(jcr->job_name, job_name);
117 unbash_spaces(client_name);
118 jcr->client_name = get_pool_memory(PM_NAME);
119 pm_strcpy(jcr->client_name, client_name);
120 unbash_spaces(fileset_name);
121 jcr->fileset_name = get_pool_memory(PM_NAME);
122 pm_strcpy(jcr->fileset_name, fileset_name);
123 jcr->JobType = JobType;
124 jcr->JobLevel = level;
125 jcr->no_attributes = no_attributes;
126 jcr->spool_attributes = spool_attributes;
127 jcr->spool_data = spool_data;
128 jcr->write_part_after_job = write_part_after_job;
129 jcr->fileset_md5 = get_pool_memory(PM_NAME);
130 pm_strcpy(jcr->fileset_md5, fileset_md5);
131 jcr->PreferMountedVols = PreferMountedVols;
133 jcr->authenticated = false;
136 * Pass back an authorization key for the File daemon
138 bsnprintf(seed, sizeof(seed), "%p", jcr);
139 make_session_key(auth_key, seed, 1);
140 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
141 if (debug_level == 3) {
142 Dmsg2(000, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
144 Dmsg2(100, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
145 jcr->sd_auth_key = bstrdup(auth_key);
146 memset(auth_key, 0, sizeof(auth_key));
147 generate_daemon_event(jcr, "JobStart");
151 bool run_cmd(JCR *jcr)
155 struct timespec timeout;
159 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
160 /* The following jobs don't need the FD */
161 switch (jcr->JobType) {
165 jcr->authenticated = true;
170 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
171 dir_send_job_status(jcr);
173 gettimeofday(&tv, &tz);
174 timeout.tv_nsec = tv.tv_usec * 1000;
175 timeout.tv_sec = tv.tv_sec + me->client_wait;
177 Dmsg2(100, "%s waiting %d sec for FD to contact SD\n",
178 jcr->Job, (int)me->client_wait);
180 * Wait for the File daemon to contact us to start the Job,
181 * when he does, we will be released, unless the 30 minutes
185 for ( ; !job_canceled(jcr); ) {
186 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
187 if (errstat == 0 || errstat == ETIMEDOUT) {
193 if (debug_level == 3) {
194 Dmsg1(000, "jid=%u Zap sd_auth_key\n", (uint32_t)jcr->JobId);
196 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
198 if (jcr->authenticated && !job_canceled(jcr)) {
199 Dmsg1(100, "Running job %s\n", jcr->Job);
200 run_job(jcr); /* Run the job */
206 * After receiving a connection (in dircmd.c) if it is
207 * from the File daemon, this routine is called.
209 void handle_filed_connection(BSOCK *fd, char *job_name)
213 bmicrosleep(0, 50000); /* wait 50 millisecs */
214 if (!(jcr=get_jcr_by_full_name(job_name))) {
215 Jmsg1(NULL, M_FATAL, 0, _("FD connect failed: Job name not found: %s\n"), job_name);
216 Dmsg1(100, "Job name not found: %s\n", job_name);
220 jcr->file_bsock = fd;
221 jcr->file_bsock->set_jcr(jcr);
223 Dmsg1(110, "Found Job %s\n", job_name);
225 if (jcr->authenticated) {
226 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
227 (uint32_t)jcr->JobId, jcr->Job);
233 * Authenticate the File daemon
235 if (debug_level == 3) {
236 Dmsg2(000, "jid=%u sd_auth_key=%s\n", (uint32_t)jcr->JobId, jcr->sd_auth_key);
238 if (jcr->authenticated || !authenticate_filed(jcr)) {
239 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
240 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
241 if (debug_level == 3) {
242 Dmsg1(000, "Authentication failed Job %s\n", jcr->Job);
245 jcr->authenticated = true;
246 Dmsg2(110, "OK Authentication jid=%u Job %s\n", (uint32_t)jcr->JobId, jcr->Job);
249 if (!jcr->authenticated) {
250 set_jcr_job_status(jcr, JS_ErrorTerminated);
252 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
260 * Query Device command from Director
261 * Sends Storage Daemon's information on the device to the
262 * caller (presumably the Director).
263 * This command always returns "true" so that the line is
264 * not closed on an error.
267 bool query_cmd(JCR *jcr)
269 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
270 BSOCK *dir = jcr->dir_bsock;
272 AUTOCHANGER *changer;
275 Dmsg1(100, "Query_cmd: %s", dir->msg);
276 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
277 Dmsg1(100, "<dird: %s\n", dir->msg);
279 unbash_spaces(dev_name);
280 foreach_res(device, R_DEVICE) {
281 /* Find resource, and make sure we were able to open it */
282 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
284 device->dev = init_dev(jcr, device);
289 ok = dir_update_device(jcr, device->dev);
291 ok = dir->fsend(OK_query);
293 dir->fsend(NO_query);
298 foreach_res(changer, R_AUTOCHANGER) {
299 /* Find resource, and make sure we were able to open it */
300 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
301 if (!changer->device || changer->device->size() == 0) {
302 continue; /* no devices */
304 ok = dir_update_changer(jcr, changer);
306 ok = dir->fsend(OK_query);
308 dir->fsend(NO_query);
313 /* If we get here, the device/autochanger was not found */
314 unbash_spaces(dir->msg);
315 pm_strcpy(jcr->errmsg, dir->msg);
316 dir->fsend(NO_device, dev_name.c_str());
317 Dmsg1(100, ">dird: %s\n", dir->msg);
319 unbash_spaces(dir->msg);
320 pm_strcpy(jcr->errmsg, dir->msg);
321 dir->fsend(BAD_query, jcr->errmsg);
322 Dmsg1(100, ">dird: %s\n", dir->msg);
332 * Destroy the Job Control Record and associated
333 * resources (sockets).
335 void stored_free_jcr(JCR *jcr)
337 Dmsg1(900, "stored_free_jcr JobId=%u\n", jcr->JobId);
338 if (jcr->file_bsock) {
339 jcr->file_bsock->close();
340 jcr->file_bsock = NULL;
343 free_pool_memory(jcr->job_name);
345 if (jcr->client_name) {
346 free_memory(jcr->client_name);
347 jcr->client_name = NULL;
349 if (jcr->fileset_name) {
350 free_memory(jcr->fileset_name);
352 if (jcr->fileset_md5) {
353 free_memory(jcr->fileset_md5);
359 if (jcr->RestoreBootstrap) {
360 unlink(jcr->RestoreBootstrap);
361 free_pool_memory(jcr->RestoreBootstrap);
362 jcr->RestoreBootstrap = NULL;
364 if (jcr->next_dev || jcr->prev_dev) {
365 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
367 pthread_cond_destroy(&jcr->job_start_wait);
373 /* Avoid a double free */
374 if (jcr->dcr == jcr->read_dcr) {
375 jcr->read_dcr = NULL;
382 free_dcr(jcr->read_dcr);
383 jcr->read_dcr = NULL;
386 if (jcr->read_store) {
388 foreach_alist(store, jcr->read_store) {
389 delete store->device;
392 delete jcr->read_store;
393 jcr->read_store = NULL;
395 if (jcr->write_store) {
397 foreach_alist(store, jcr->write_store) {
398 delete store->device;
401 delete jcr->write_store;
402 jcr->write_store = NULL;