2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2015 Kern Sibbald
5 Copyright (C) 2000-2014 Free Software Foundation Europe e.V.
7 The original author of Bacula is Kern Sibbald, with contributions
8 from many others, a complete list can be found in the file AUTHORS.
10 You may use this file and others of this release according to the
11 license defined in the LICENSE file, which includes the Affero General
12 Public License, v3.0 ("AGPLv3") and some additional permissions and
13 terms pursuant to its AGPLv3 Section 7.
15 This notice must be preserved when any source code is
16 conveyed and/or propagated.
18 Bacula(R) is a registered trademark of Kern Sibbald.
21 * Job control and execution for Storage Daemon
23 * Written by Kern Sibbald, MM
30 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
32 /* Imported variables */
33 extern STORES *me; /* our Global resource */
34 extern uint32_t VolSessionTime;
36 /* Imported functions */
37 extern uint32_t newVolSessionId();
38 extern bool do_vbackup(JCR *jcr);
40 /* Requests from the Director daemon */
41 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
42 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
43 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s "
44 "rerunning=%d VolSessionId=%d VolSessionTime=%d sd_client=%d "
47 /* Responses sent to Director daemon */
48 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
49 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
52 * Director requests us to start a job
53 * Basic tasks done here:
54 * - We pickup the JobId to be run from the Director.
55 * - We pickup the device, media, and pool from the Director
56 * - Wait for a connection from the File Daemon (FD)
57 * - Accept commands from the FD (i.e. run the job)
58 * - Return when the connection is terminated or
61 bool job_cmd(JCR *jcr)
64 char sd_auth_key[200];
67 BSOCK *dir = jcr->dir_bsock;
68 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
69 int32_t JobType, level, spool_attributes, no_attributes, spool_data;
70 int32_t write_part_after_job, PreferMountedVols;
77 * Get JobId and permissions from Director
79 Dmsg1(100, "<dird: %s", dir->msg);
80 bstrncpy(spool_size, "0", sizeof(spool_size));
81 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
83 &JobType, &level, fileset_name.c_str(), &no_attributes,
84 &spool_attributes, fileset_md5.c_str(), &spool_data,
85 &write_part_after_job, &PreferMountedVols, spool_size,
86 &rerunning, &jcr->VolSessionId, &jcr->VolSessionTime,
87 &is_client, &sd_auth_key);
89 pm_strcpy(jcr->errmsg, dir->msg);
90 dir->fsend(BAD_job, stat, jcr->errmsg);
91 Dmsg1(100, ">dird: %s", dir->msg);
92 jcr->setJobStatus(JS_ErrorTerminated);
95 jcr->rerunning = rerunning;
96 jcr->sd_client = is_client;
98 jcr->sd_auth_key = bstrdup(sd_auth_key);
100 Dmsg3(100, "rerunning=%d VolSesId=%d VolSesTime=%d\n", jcr->rerunning,
101 jcr->VolSessionId, jcr->VolSessionTime);
103 * Since this job could be rescheduled, we
104 * check to see if we have it already. If so
105 * free the old jcr and use the new one.
107 ojcr = get_jcr_by_full_name(job.c_str());
108 if (ojcr && !ojcr->authenticated) {
109 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(intptr_t)ojcr, job.c_str());
113 Dmsg2(800, "Start JobId=%d %p\n", JobId, jcr);
117 * If job rescheduled because previous was incomplete,
118 * the Resched flag is set and VolSessionId and VolSessionTime
119 * are given to us (same as restarted job).
121 if (!jcr->rerunning) {
122 jcr->VolSessionId = newVolSessionId();
123 jcr->VolSessionTime = VolSessionTime;
125 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
126 unbash_spaces(job_name);
127 jcr->job_name = get_pool_memory(PM_NAME);
128 pm_strcpy(jcr->job_name, job_name);
129 unbash_spaces(client_name);
130 jcr->client_name = get_pool_memory(PM_NAME);
131 pm_strcpy(jcr->client_name, client_name);
132 unbash_spaces(fileset_name);
133 jcr->fileset_name = get_pool_memory(PM_NAME);
134 pm_strcpy(jcr->fileset_name, fileset_name);
135 jcr->setJobType(JobType);
136 jcr->setJobLevel(level);
137 jcr->no_attributes = no_attributes;
138 jcr->spool_attributes = spool_attributes;
139 jcr->spool_data = spool_data;
140 jcr->spool_size = str_to_int64(spool_size);
141 jcr->write_part_after_job = write_part_after_job;
142 jcr->fileset_md5 = get_pool_memory(PM_NAME);
143 pm_strcpy(jcr->fileset_md5, fileset_md5);
144 jcr->PreferMountedVols = PreferMountedVols;
147 jcr->authenticated = false;
150 * Pass back an authorization key for the File daemon
152 if (jcr->sd_client) {
153 bstrncpy(sd_auth_key, "xxx", 3);
155 bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId);
156 make_session_key(sd_auth_key, seed, 1);
158 dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, sd_auth_key);
159 Dmsg2(150, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg);
160 /* If not client, set key, otherwise it is already set */
161 if (!jcr->sd_client) {
162 jcr->sd_auth_key = bstrdup(sd_auth_key);
163 memset(sd_auth_key, 0, sizeof(sd_auth_key));
165 new_plugins(jcr); /* instantiate the plugins */
166 generate_daemon_event(jcr, "JobStart");
167 generate_plugin_event(jcr, bsdEventJobStart, (void *)"JobStart");
171 bool run_cmd(JCR *jcr)
175 struct timespec timeout;
179 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
181 /* If we do not need the FD, we are doing a virtual backup. */
182 if (jcr->no_client_used()) {
187 jcr->sendJobStatus(JS_WaitFD); /* wait for FD to connect */
189 Dmsg2(050, "sd_calls_client=%d sd_client=%d\n", jcr->sd_calls_client, jcr->sd_client);
190 if (jcr->sd_calls_client) {
191 if (!read_client_hello(jcr)) {
195 * Authenticate the File daemon
197 Dmsg0(050, "=== Authenticate FD\n");
198 if (jcr->authenticated || !authenticate_filed(jcr, jcr->file_bsock, jcr->FDVersion)) {
199 Dmsg1(050, "Authentication failed Job %s\n", jcr->Job);
200 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
202 jcr->authenticated = true;
204 } else if (!jcr->sd_client) {
205 /* We wait to receive connection from Client */
206 gettimeofday(&tv, &tz);
207 timeout.tv_nsec = tv.tv_usec * 1000;
208 timeout.tv_sec = tv.tv_sec + me->client_wait;
210 Dmsg3(050, "%s waiting %d sec for FD to contact SD key=%s\n",
211 jcr->Job, (int)(timeout.tv_sec-time(NULL)), jcr->sd_auth_key);
213 Dmsg3(800, "=== Block Job=%s jid=%d %p\n", jcr->Job, jcr->JobId, jcr);
216 * Wait for the File daemon to contact us to start the Job,
217 * when he does, we will be released, unless the 30 minutes
221 while ( !jcr->authenticated && !job_canceled(jcr) ) {
222 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
223 if (errstat == ETIMEDOUT || errstat == EINVAL || errstat == EPERM) {
226 Dmsg1(800, "=== Auth cond errstat=%d\n", errstat);
228 Dmsg4(050, "=== Auth=%d jid=%d canceled=%d errstat=%d\n",
229 jcr->JobId, jcr->authenticated, job_canceled(jcr), errstat);
231 Dmsg2(800, "Auth fail or cancel for jid=%d %p\n", jcr->JobId, jcr);
234 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
236 if (jcr->authenticated && !job_canceled(jcr)) {
237 Dmsg2(800, "Running jid=%d %p\n", jcr->JobId, jcr);
238 run_job(jcr); /* Run the job */
240 Dmsg2(800, "Done jid=%d %p\n", jcr->JobId, jcr);
247 * Query Device command from Director
248 * Sends Storage Daemon's information on the device to the
249 * caller (presumably the Director).
250 * This command always returns "true" so that the line is
251 * not closed on an error.
254 bool query_cmd(JCR *jcr)
256 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
257 BSOCK *dir = jcr->dir_bsock;
259 AUTOCHANGER *changer;
262 Dmsg1(100, "Query_cmd: %s", dir->msg);
263 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
264 Dmsg1(100, "<dird: %s\n", dir->msg);
266 unbash_spaces(dev_name);
267 foreach_res(device, R_DEVICE) {
268 /* Find resource, and make sure we were able to open it */
269 if (strcmp(dev_name.c_str(), device->hdr.name) == 0) {
271 device->dev = init_dev(jcr, device);
276 ok = dir_update_device(jcr, device->dev);
278 ok = dir->fsend(OK_query);
280 dir->fsend(NO_query);
285 foreach_res(changer, R_AUTOCHANGER) {
286 /* Find resource, and make sure we were able to open it */
287 if (strcmp(dev_name.c_str(), changer->hdr.name) == 0) {
288 if (!changer->device || changer->device->size() == 0) {
289 continue; /* no devices */
291 ok = dir_update_changer(jcr, changer);
293 ok = dir->fsend(OK_query);
295 dir->fsend(NO_query);
300 /* If we get here, the device/autochanger was not found */
301 unbash_spaces(dir->msg);
302 pm_strcpy(jcr->errmsg, dir->msg);
303 dir->fsend(NO_device, dev_name.c_str());
304 Dmsg1(100, ">dird: %s\n", dir->msg);
306 unbash_spaces(dir->msg);
307 pm_strcpy(jcr->errmsg, dir->msg);
308 dir->fsend(BAD_query, jcr->errmsg);
309 Dmsg1(100, ">dird: %s\n", dir->msg);
319 * Destroy the Job Control Record and associated
320 * resources (sockets).
322 void stored_free_jcr(JCR *jcr)
324 Dmsg2(800, "End Job JobId=%u %p\n", jcr->JobId, jcr);
325 if (jcr->jobmedia_queue) {
326 flush_jobmedia_queue(jcr);
327 delete jcr->jobmedia_queue;
328 jcr->jobmedia_queue = NULL;
330 if (jcr->dir_bsock) {
331 Dmsg2(800, "Send terminate jid=%d %p\n", jcr->JobId, jcr);
332 jcr->dir_bsock->signal(BNET_EOD);
333 jcr->dir_bsock->signal(BNET_TERMINATE);
335 free_bsock(jcr->file_bsock);
337 free_pool_memory(jcr->job_name);
339 if (jcr->client_name) {
340 free_memory(jcr->client_name);
341 jcr->client_name = NULL;
343 if (jcr->fileset_name) {
344 free_memory(jcr->fileset_name);
346 if (jcr->fileset_md5) {
347 free_memory(jcr->fileset_md5);
353 /* Free any restore volume list created */
354 free_restore_volume_list(jcr);
355 if (jcr->RestoreBootstrap) {
356 unlink(jcr->RestoreBootstrap);
357 free_pool_memory(jcr->RestoreBootstrap);
358 jcr->RestoreBootstrap = NULL;
360 if (jcr->next_dev || jcr->prev_dev) {
361 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
363 pthread_cond_destroy(&jcr->job_start_wait);
369 /* Avoid a double free */
370 if (jcr->dcr == jcr->read_dcr) {
371 jcr->read_dcr = NULL;
378 free_dcr(jcr->read_dcr);
379 jcr->read_dcr = NULL;
382 if (jcr->read_store) {
384 foreach_alist(store, jcr->read_store) {
385 delete store->device;
388 delete jcr->read_store;
389 jcr->read_store = NULL;
391 if (jcr->write_store) {
393 foreach_alist(store, jcr->write_store) {
394 delete store->device;
397 delete jcr->write_store;
398 jcr->write_store = NULL;
403 write_state_file(me->working_directory, "bacula-sd", get_first_port_host_order(me->sdaddrs));