2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2016 Kern Sibbald
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
17 Bacula(R) is a registered trademark of Kern Sibbald.
20 * Job control and execution for Storage Daemon
22 * Written by Kern Sibbald, MM
29 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
31 /* Imported variables */
32 extern STORES *me; /* our Global resource */
33 extern uint32_t VolSessionTime;
35 /* Imported functions */
36 extern uint32_t newVolSessionId();
37 extern bool do_vbackup(JCR *jcr);
39 /* Requests from the Director daemon */
40 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
41 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
42 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s "
43 "rerunning=%d VolSessionId=%d VolSessionTime=%d sd_client=%d "
46 /* Responses sent to Director daemon */
47 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
48 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
51 * Director requests us to start a job
52 * Basic tasks done here:
53 * - We pickup the JobId to be run from the Director.
54 * - We pickup the device, media, and pool from the Director
55 * - Wait for a connection from the File Daemon (FD)
56 * - Accept commands from the FD (i.e. run the job)
57 * - Return when the connection is terminated or
60 bool job_cmd(JCR *jcr)
63 char sd_auth_key[200];
66 BSOCK *dir = jcr->dir_bsock;
67 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
68 int32_t JobType, level, spool_attributes, no_attributes, spool_data;
69 int32_t write_part_after_job, PreferMountedVols;
76 * Get JobId and permissions from Director
78 Dmsg1(100, "<dird: %s", dir->msg);
79 bstrncpy(spool_size, "0", sizeof(spool_size));
80 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
82 &JobType, &level, fileset_name.c_str(), &no_attributes,
83 &spool_attributes, fileset_md5.c_str(), &spool_data,
84 &write_part_after_job, &PreferMountedVols, spool_size,
85 &rerunning, &jcr->VolSessionId, &jcr->VolSessionTime,
86 &is_client, &sd_auth_key);
88 pm_strcpy(jcr->errmsg, dir->msg);
89 dir->fsend(BAD_job, stat, jcr->errmsg);
90 Dmsg1(100, ">dird: %s", dir->msg);
91 jcr->setJobStatus(JS_ErrorTerminated);
94 jcr->rerunning = rerunning;
95 jcr->sd_client = is_client;
97 jcr->sd_auth_key = bstrdup(sd_auth_key);
99 Dmsg3(100, "rerunning=%d VolSesId=%d VolSesTime=%d\n", jcr->rerunning,
100 jcr->VolSessionId, jcr->VolSessionTime);
102 * Since this job could be rescheduled, we
103 * check to see if we have it already. If so
104 * free the old jcr and use the new one.
106 ojcr = get_jcr_by_full_name(job.c_str());
107 if (ojcr && !ojcr->authenticated) {
108 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(intptr_t)ojcr, job.c_str());
112 Dmsg2(800, "Start JobId=%d %p\n", JobId, jcr);
116 * If job rescheduled because previous was incomplete,
117 * the Resched flag is set and VolSessionId and VolSessionTime
118 * are given to us (same as restarted job).
120 if (!jcr->rerunning) {
121 jcr->VolSessionId = newVolSessionId();
122 jcr->VolSessionTime = VolSessionTime;
124 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
125 unbash_spaces(job_name);
126 jcr->job_name = get_pool_memory(PM_NAME);
127 pm_strcpy(jcr->job_name, job_name);
128 unbash_spaces(client_name);
129 jcr->client_name = get_pool_memory(PM_NAME);
130 pm_strcpy(jcr->client_name, client_name);
131 unbash_spaces(fileset_name);
132 jcr->fileset_name = get_pool_memory(PM_NAME);
133 pm_strcpy(jcr->fileset_name, fileset_name);
134 jcr->setJobType(JobType);
135 jcr->setJobLevel(level);
136 jcr->no_attributes = no_attributes;
137 jcr->spool_attributes = spool_attributes;
138 jcr->spool_data = spool_data;
139 jcr->spool_size = str_to_int64(spool_size);
140 jcr->write_part_after_job = write_part_after_job;
141 jcr->fileset_md5 = get_pool_memory(PM_NAME);
142 pm_strcpy(jcr->fileset_md5, fileset_md5);
143 jcr->PreferMountedVols = PreferMountedVols;
146 jcr->authenticated = false;
149 * Pass back an authorization key for the File daemon
151 if (jcr->sd_client) {
152 bstrncpy(sd_auth_key, "xxx", 3);
154 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
155 make_session_key(sd_auth_key, seed, 1);
157 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, sd_auth_key);
158 Dmsg2(150, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
159 /* If not client, set key, otherwise it is already set */
160 if (!jcr->sd_client) {
161 jcr->sd_auth_key = bstrdup(sd_auth_key);
162 memset(sd_auth_key, 0, sizeof(sd_auth_key));
164 new_plugins(jcr); /* instantiate the plugins */
165 generate_daemon_event(jcr, "JobStart");
166 generate_plugin_event(jcr, bsdEventJobStart, (void *)"JobStart");
170 bool run_cmd(JCR *jcr)
174 struct timespec timeout;
178 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
180 /* If we do not need the FD, we are doing a virtual backup. */
181 if (jcr->no_client_used()) {
186 jcr->sendJobStatus(JS_WaitFD); /* wait for FD to connect */
188 Dmsg2(050, "sd_calls_client=%d sd_client=%d\n", jcr->sd_calls_client, jcr->sd_client);
189 if (jcr->sd_calls_client) {
190 if (!read_client_hello(jcr)) {
194 * Authenticate the File daemon
196 Dmsg0(050, "=== Authenticate FD\n");
197 if (jcr->authenticated || !authenticate_filed(jcr, jcr->file_bsock, jcr->FDVersion)) {
198 Dmsg1(050, "Authentication failed Job %s\n", jcr->Job);
199 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
201 jcr->authenticated = true;
203 } else if (!jcr->sd_client) {
204 /* We wait to receive connection from Client */
205 gettimeofday(&tv, &tz);
206 timeout.tv_nsec = tv.tv_usec * 1000;
207 timeout.tv_sec = tv.tv_sec + me->client_wait;
209 Dmsg3(050, "%s waiting %d sec for FD to contact SD key=%s\n",
210 jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
212 Dmsg3(800, "=== Block Job=%s jid=%d %p\n", jcr->Job, jcr->JobId, jcr);
215 * Wait for the File daemon to contact us to start the Job,
216 * when he does, we will be released, unless the 30 minutes
220 while ( !jcr->authenticated && !job_canceled(jcr) ) {
221 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
222 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
225 Dmsg1(800, "=== Auth cond errstat=%d\n", errstat);
227 Dmsg4(050, "=== Auth=%d jid=%d canceled=%d errstat=%d\n",
228 jcr->JobId, jcr->authenticated, job_canceled(jcr), errstat);
230 Dmsg2(800, "Auth fail or cancel for jid=%d %p\n", jcr->JobId, jcr);
233 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
235 if (jcr->authenticated && !job_canceled(jcr)) {
236 Dmsg2(800, "Running jid=%d %p\n", jcr->JobId, jcr);
237 run_job(jcr); /* Run the job */
239 Dmsg2(800, "Done jid=%d %p\n", jcr->JobId, jcr);
246 * Query Device command from Director
247 * Sends Storage Daemon's information on the device to the
248 * caller (presumably the Director).
249 * This command always returns "true" so that the line is
250 * not closed on an error.
253 bool query_cmd(JCR *jcr)
255 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
256 BSOCK *dir = jcr->dir_bsock;
258 AUTOCHANGER *changer;
261 Dmsg1(100, "Query_cmd: %s", dir->msg);
262 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
263 Dmsg1(100, "<dird: %s\n", dir->msg);
265 unbash_spaces(dev_name);
266 foreach_res(device, R_DEVICE) {
267 /* Find resource, and make sure we were able to open it */
268 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
270 device->dev = init_dev(jcr, device);
275 ok = dir_update_device(jcr, device->dev);
277 ok = dir->fsend(OK_query);
279 dir->fsend(NO_query);
284 foreach_res(changer, R_AUTOCHANGER) {
285 /* Find resource, and make sure we were able to open it */
286 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
287 if (!changer->device || changer->device->size() == 0) {
288 continue; /* no devices */
290 ok = dir_update_changer(jcr, changer);
292 ok = dir->fsend(OK_query);
294 dir->fsend(NO_query);
299 /* If we get here, the device/autochanger was not found */
300 unbash_spaces(dir->msg);
301 pm_strcpy(jcr->errmsg, dir->msg);
302 dir->fsend(NO_device, dev_name.c_str());
303 Dmsg1(100, ">dird: %s\n", dir->msg);
305 unbash_spaces(dir->msg);
306 pm_strcpy(jcr->errmsg, dir->msg);
307 dir->fsend(BAD_query, jcr->errmsg);
308 Dmsg1(100, ">dird: %s\n", dir->msg);
318 * Destroy the Job Control Record and associated
319 * resources (sockets).
321 void stored_free_jcr(JCR *jcr)
323 Dmsg2(800, "End Job JobId=%u %p\n", jcr->JobId, jcr);
324 if (jcr->jobmedia_queue) {
325 flush_jobmedia_queue(jcr);
326 delete jcr->jobmedia_queue;
327 jcr->jobmedia_queue = NULL;
329 if (jcr->dir_bsock) {
330 Dmsg2(800, "Send terminate jid=%d %p\n", jcr->JobId, jcr);
331 jcr->dir_bsock->signal(BNET_EOD);
332 jcr->dir_bsock->signal(BNET_TERMINATE);
334 free_bsock(jcr->file_bsock);
336 free_pool_memory(jcr->job_name);
338 if (jcr->client_name) {
339 free_memory(jcr->client_name);
340 jcr->client_name = NULL;
342 if (jcr->fileset_name) {
343 free_memory(jcr->fileset_name);
345 if (jcr->fileset_md5) {
346 free_memory(jcr->fileset_md5);
352 /* Free any restore volume list created */
353 free_restore_volume_list(jcr);
354 if (jcr->RestoreBootstrap) {
355 unlink(jcr->RestoreBootstrap);
356 free_pool_memory(jcr->RestoreBootstrap);
357 jcr->RestoreBootstrap = NULL;
359 if (jcr->next_dev || jcr->prev_dev) {
360 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
362 pthread_cond_destroy(&jcr->job_start_wait);
368 /* Avoid a double free */
369 if (jcr->dcr == jcr->read_dcr) {
370 jcr->read_dcr = NULL;
377 free_dcr(jcr->read_dcr);
378 jcr->read_dcr = NULL;
381 if (jcr->read_store) {
383 foreach_alist(store, jcr->read_store) {
384 delete store->device;
387 delete jcr->read_store;
388 jcr->read_store = NULL;
390 if (jcr->write_store) {
392 foreach_alist(store, jcr->write_store) {
393 delete store->device;
396 delete jcr->write_store;
397 jcr->write_store = NULL;
402 write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));