3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Bacula® - The Network Backup Solution
19 Copyright (C) 2000-2006 Free Software Foundation Europe e.V.
21 The main author of Bacula is Kern Sibbald, with contributions from
22 many others, a complete list can be found in the file AUTHORS.
23 This program is Free Software; you can redistribute it and/or
24 modify it under the terms of version two of the GNU General Public
25 License as published by the Free Software Foundation plus additions
26 that are listed in the file LICENSE.
28 This program is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31 General Public License for more details.
33 You should have received a copy of the GNU General Public License
34 along with this program; if not, write to the Free Software
35 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
38 Bacula® is a registered trademark of John Walker.
39 The licensor of Bacula is the Free Software Foundation Europe
40 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
41 Switzerland, email:ftf@fsfeurope.org.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
59 * Called here before the job is run to do the job
62 bool do_backup_init(JCR *jcr)
65 free_rstorage(jcr); /* we don't read so release */
67 if (!get_or_create_fileset_record(jcr)) {
72 * Get definitive Job level and since time
74 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
76 apply_pool_overrides(jcr);
78 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->hdr.name);
79 if (jcr->jr.PoolId == 0) {
83 /* If pool storage specified, use it instead of job storage */
84 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
87 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
91 create_clones(jcr); /* run any clone jobs */
97 * Do a backup of the specified FileSet
99 * Returns: false on failure
102 bool do_backup(JCR *jcr)
105 int tls_need = BNET_TLS_NONE;
111 /* Print Job Start message */
112 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
113 edit_uint64(jcr->JobId, ed1), jcr->Job);
115 set_jcr_job_status(jcr, JS_Running);
116 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
117 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
118 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
123 * Open a message channel connection with the Storage
124 * daemon. This is to let him know that our client
125 * will be contacting him for a backup session.
128 Dmsg0(110, "Open connection with storage daemon\n");
129 set_jcr_job_status(jcr, JS_WaitSD);
131 * Start conversation with Storage daemon
133 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
137 * Now start a job with the Storage daemon
139 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
144 * Start the job prior to starting the message thread below
145 * to avoid two threads from using the BSOCK structure at
148 if (!bnet_fsend(jcr->store_bsock, "run")) {
153 * Now start a Storage daemon message thread. Note,
154 * this thread is used to provide the catalog services
155 * for the backup job, including inserting the attributes
156 * into the catalog. See catalog_update() in catreq.c
158 if (!start_storage_daemon_message_thread(jcr)) {
161 Dmsg0(150, "Storage daemon connection OK\n");
163 set_jcr_job_status(jcr, JS_WaitFD);
164 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
168 set_jcr_job_status(jcr, JS_Running);
169 fd = jcr->file_bsock;
171 if (!send_include_list(jcr)) {
175 if (!send_exclude_list(jcr)) {
179 if (!send_level_command(jcr)) {
184 * send Storage daemon address to the File daemon
187 if (store->SDDport == 0) {
188 store->SDDport = store->SDport;
191 /* TLS Requirement */
192 if (store->tls_enable) {
193 if (store->tls_require) {
194 tls_need = BNET_TLS_REQUIRED;
196 tls_need = BNET_TLS_OK;
200 bnet_fsend(fd, storaddr, store->address, store->SDDport, tls_need);
201 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
206 if (!send_runscripts_commands(jcr)) {
211 * We re-update the job start record so that the start
212 * time is set after the run before job. This avoids
213 * that any files created by the run before job will
214 * be saved twice. They will be backed up in the current
215 * job, but not in the next one unless they are changed.
216 * Without this, they will be backed up in this job and
217 * in the next job run because in that case, their date
218 * is after the start of this run.
220 jcr->start_time = time(NULL);
221 jcr->jr.StartTime = jcr->start_time;
222 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
223 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
226 /* Send backup command */
227 bnet_fsend(fd, backupcmd);
228 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
232 /* Pickup Job termination data */
233 stat = wait_for_job_termination(jcr);
234 if (stat == JS_Terminated) {
235 backup_cleanup(jcr, stat);
240 /* Come here only after starting SD thread */
242 set_jcr_job_status(jcr, JS_ErrorTerminated);
243 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
244 wait_for_storage_daemon_termination(jcr);
245 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
251 * Here we wait for the File daemon to signal termination,
252 * then we wait for the Storage daemon. When both
253 * are done, we return the job status.
254 * Also used by restore.c
256 int wait_for_job_termination(JCR *jcr)
259 BSOCK *fd = jcr->file_bsock;
261 uint32_t JobFiles, Errors;
262 uint64_t ReadBytes, JobBytes;
264 set_jcr_job_status(jcr, JS_Running);
265 /* Wait for Client to terminate */
266 while ((n = bget_dirmsg(fd)) >= 0) {
267 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
268 &ReadBytes, &JobBytes, &Errors) == 5) {
270 set_jcr_job_status(jcr, jcr->FDJobStatus);
271 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
273 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
276 if (job_canceled(jcr)) {
280 if (is_bnet_error(fd)) {
281 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
282 job_type_to_str(jcr->JobType), bnet_strerror(fd));
284 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
286 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
287 wait_for_storage_daemon_termination(jcr);
290 /* Return values from FD */
292 jcr->JobFiles = JobFiles;
293 jcr->Errors = Errors;
294 jcr->ReadBytes = ReadBytes;
295 jcr->JobBytes = JobBytes;
297 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
300 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
301 // jcr->JobStatus, jcr->SDJobStatus);
303 /* Return the first error status we find Dir, FD, or SD */
304 if (!fd_ok || is_bnet_error(fd)) {
305 jcr->FDJobStatus = JS_ErrorTerminated;
307 if (jcr->JobStatus != JS_Terminated) {
308 return jcr->JobStatus;
310 if (jcr->FDJobStatus != JS_Terminated) {
311 return jcr->FDJobStatus;
313 return jcr->SDJobStatus;
317 * Release resources allocated during backup.
319 void backup_cleanup(JCR *jcr, int TermCode)
321 char sdt[50], edt[50], schedt[50];
322 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
323 char ec6[30], ec7[30], ec8[30], elapsed[50];
324 char term_code[100], fd_term_msg[100], sd_term_msg[100];
325 const char *term_msg;
329 double kbps, compression;
332 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
333 dequeue_messages(jcr); /* display any queued messages */
334 memset(&mr, 0, sizeof(mr));
335 memset(&cr, 0, sizeof(cr));
336 set_jcr_job_status(jcr, TermCode);
338 update_job_end_record(jcr); /* update database */
340 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
341 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
342 db_strerror(jcr->db));
343 set_jcr_job_status(jcr, JS_ErrorTerminated);
346 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
347 if (!db_get_client_record(jcr, jcr->db, &cr)) {
348 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
349 db_strerror(jcr->db));
352 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
353 if (!db_get_media_record(jcr, jcr->db, &mr)) {
354 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
355 mr.VolumeName, db_strerror(jcr->db));
356 set_jcr_job_status(jcr, JS_ErrorTerminated);
359 update_bootstrap_file(jcr);
361 msg_type = M_INFO; /* by default INFO message */
362 switch (jcr->JobStatus) {
364 if (jcr->Errors || jcr->SDErrors) {
365 term_msg = _("Backup OK -- with warnings");
367 term_msg = _("Backup OK");
371 case JS_ErrorTerminated:
372 term_msg = _("*** Backup Error ***");
373 msg_type = M_ERROR; /* Generate error message */
374 if (jcr->store_bsock) {
375 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
376 if (jcr->SD_msg_chan) {
377 pthread_cancel(jcr->SD_msg_chan);
382 term_msg = _("Backup Canceled");
383 if (jcr->store_bsock) {
384 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
385 if (jcr->SD_msg_chan) {
386 pthread_cancel(jcr->SD_msg_chan);
391 term_msg = term_code;
392 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
395 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
396 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
397 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
398 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
402 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
404 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
406 * Note, if the job has erred, most likely it did not write any
407 * tape, so suppress this "error" message since in that case
408 * it is normal. Or look at it the other way, only for a
409 * normal exit should we complain about this error.
411 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
412 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
414 jcr->VolumeName[0] = 0; /* none */
417 if (jcr->ReadBytes == 0) {
418 bstrncpy(compress, "None", sizeof(compress));
420 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
421 if (compression < 0.5) {
422 bstrncpy(compress, "None", sizeof(compress));
424 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
427 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
428 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
430 // bmicrosleep(15, 0); /* for debugging SIGHUP */
432 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
435 " Backup Level: %s%s\n"
436 " Client: \"%s\" %s\n"
437 " FileSet: \"%s\" %s\n"
438 " Pool: \"%s\" (From %s)\n"
439 " Storage: \"%s\" (From %s)\n"
440 " Scheduled time: %s\n"
443 " Elapsed time: %s\n"
445 " FD Files Written: %s\n"
446 " SD Files Written: %s\n"
447 " FD Bytes Written: %s (%sB)\n"
448 " SD Bytes Written: %s (%sB)\n"
450 " Software Compression: %s\n"
451 " Volume name(s): %s\n"
452 " Volume Session Id: %d\n"
453 " Volume Session Time: %d\n"
454 " Last Volume Bytes: %s (%sB)\n"
455 " Non-fatal FD errors: %d\n"
457 " FD termination status: %s\n"
458 " SD termination status: %s\n"
459 " Termination: %s\n\n"),
465 level_to_str(jcr->JobLevel), jcr->since,
466 jcr->client->name(), cr.Uname,
467 jcr->fileset->name(), jcr->FSCreateTime,
468 jcr->pool->name(), jcr->pool_source,
469 jcr->wstore->name(), jcr->storage_source,
473 edit_utime(RunTime, elapsed, sizeof(elapsed)),
475 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
476 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
477 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
478 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
479 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
480 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
486 edit_uint64_with_commas(mr.VolBytes, ec7),
487 edit_uint64_with_suffix(mr.VolBytes, ec8),
494 Dmsg0(100, "Leave backup_cleanup()\n");
497 void update_bootstrap_file(JCR *jcr)
499 /* Now update the bootstrap file if any */
500 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
501 jcr->job->WriteBootstrap) {
505 POOLMEM *fname = get_pool_memory(PM_FNAME);
506 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
508 VOL_PARAMS *VolParams = NULL;
514 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
515 fd = bpipe ? bpipe->wfd : NULL;
517 /* ***FIXME*** handle BASE */
518 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+b":"a+b");
521 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
524 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
525 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
526 if (jcr->SDJobFiles != 0) {
527 set_jcr_job_status(jcr, JS_ErrorTerminated);
531 /* Start output with when and who wrote it */
532 bstrftimes(edt, sizeof(edt), time(NULL));
533 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
534 level_to_str(jcr->JobLevel), jcr->since);
535 for (int i=0; i < VolCount; i++) {
536 /* Write the record */
537 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
538 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
539 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
540 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
541 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
542 VolParams[i].EndFile);
543 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
544 VolParams[i].EndBlock);
545 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
546 VolParams[i].LastIndex);
558 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
559 "%s: ERR=%s\n"), fname, be.strerror());
560 set_jcr_job_status(jcr, JS_ErrorTerminated);
562 free_pool_memory(fname);