2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
106 /* Take all base jobs from job resource and find the
109 static bool get_base_jobids(JCR *jcr, db_list_ctx *jobids)
116 if (!jcr->job->base) {
117 return false; /* no base job, stop accurate */
120 memset(&jr, 0, sizeof(JOB_DBR));
121 jr.StartTime = jcr->jr.StartTime;
123 foreach_alist(job, jcr->job->base) {
124 bstrncpy(jr.Name, job->name(), sizeof(jr.Name));
125 db_get_base_jobid(jcr, jcr->db, &jr, &id);
129 pm_strcat(jobids->list, ",");
131 pm_strcat(jobids->list, edit_uint64(id, str_jobid));
136 return jobids->count > 0;
140 * Foreach files in currrent list, send "/path/fname\0LStat\0MD5" to FD
142 static int accurate_list_handler(void *ctx, int num_fields, char **row)
144 JCR *jcr = (JCR *)ctx;
146 if (job_canceled(jcr)) {
150 if (row[2] == 0) { /* discard when file_index == 0 */
154 /* sending with checksum */
155 if (num_fields == 6 && row[5][0] && row[5][1]) { /* skip checksum = '0' */
156 jcr->file_bsock->fsend("%s%s%c%s%c%s",
157 row[0], row[1], 0, row[4], 0, row[5]);
159 jcr->file_bsock->fsend("%s%s%c%s",
160 row[0], row[1], 0, row[4]);
166 * Send current file list to FD
167 * DIR -> FD : accurate files=xxxx
168 * DIR -> FD : /path/to/file\0Lstat\0MD5
169 * DIR -> FD : /path/to/dir/\0Lstat\0MD5
173 bool send_accurate_current_files(JCR *jcr)
180 if (!jcr->accurate || job_canceled(jcr)) {
183 /* In base level, no previous job is used */
184 if (jcr->get_JobLevel() == L_BASE) {
188 if (jcr->get_JobLevel() == L_FULL) {
189 /* On Full mode, if no previous base job, no accurate things */
190 if (!get_base_jobids(jcr, &jobids)) {
194 Jmsg(jcr, M_INFO, 0, _("Using BaseJobId(s): %s\n"), jobids.list);
197 /* For Incr/Diff level, we search for older jobs */
198 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, &jobids);
200 /* We are in Incr/Diff, but no Full to build the accurate list... */
201 if (jobids.count == 0) {
203 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
208 if (jcr->JobId) { /* display the message only for real jobs */
209 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
212 /* to be able to allocate the right size for htable */
213 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)", jobids.list);
214 db_sql_query(jcr->db, buf.c_str(), db_list_handler, &nb);
215 Dmsg2(200, "jobids=%s nb=%s\n", jobids.list, nb.list);
216 jcr->file_bsock->fsend("accurate files=%s\n", nb.list);
218 if (!db_open_batch_connexion(jcr, jcr->db)) {
219 Jmsg0(jcr, M_FATAL, 0, "Can't get batch sql connexion");
224 jcr->nb_base_files = str_to_int64(nb.list);
225 db_create_base_file_list(jcr, jcr->db, jobids.list);
226 db_get_base_file_list(jcr, jcr->db,
227 accurate_list_handler, (void *)jcr);
230 db_get_file_list(jcr, jcr->db_batch, jobids.list,
231 accurate_list_handler, (void *)jcr);
234 /* TODO: close the batch connexion ? (can be used very soon) */
236 jcr->file_bsock->signal(BNET_EOD);
243 * Do a backup of the specified FileSet
245 * Returns: false on failure
248 bool do_backup(JCR *jcr)
251 int tls_need = BNET_TLS_NONE;
256 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
257 return do_vbackup(jcr);
260 /* Print Job Start message */
261 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
262 edit_uint64(jcr->JobId, ed1), jcr->Job);
264 set_jcr_job_status(jcr, JS_Running);
265 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
266 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
267 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
272 * Open a message channel connection with the Storage
273 * daemon. This is to let him know that our client
274 * will be contacting him for a backup session.
277 Dmsg0(110, "Open connection with storage daemon\n");
278 set_jcr_job_status(jcr, JS_WaitSD);
280 * Start conversation with Storage daemon
282 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
286 * Now start a job with the Storage daemon
288 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
293 * Start the job prior to starting the message thread below
294 * to avoid two threads from using the BSOCK structure at
297 if (!bnet_fsend(jcr->store_bsock, "run")) {
302 * Now start a Storage daemon message thread. Note,
303 * this thread is used to provide the catalog services
304 * for the backup job, including inserting the attributes
305 * into the catalog. See catalog_update() in catreq.c
307 if (!start_storage_daemon_message_thread(jcr)) {
310 Dmsg0(150, "Storage daemon connection OK\n");
312 set_jcr_job_status(jcr, JS_WaitFD);
313 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
317 set_jcr_job_status(jcr, JS_Running);
318 fd = jcr->file_bsock;
320 if (!send_include_list(jcr)) {
324 if (!send_exclude_list(jcr)) {
328 if (!send_level_command(jcr)) {
333 * send Storage daemon address to the File daemon
336 if (store->SDDport == 0) {
337 store->SDDport = store->SDport;
340 /* TLS Requirement */
341 if (store->tls_enable) {
342 if (store->tls_require) {
343 tls_need = BNET_TLS_REQUIRED;
345 tls_need = BNET_TLS_OK;
349 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
350 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
354 if (!send_runscripts_commands(jcr)) {
359 * We re-update the job start record so that the start
360 * time is set after the run before job. This avoids
361 * that any files created by the run before job will
362 * be saved twice. They will be backed up in the current
363 * job, but not in the next one unless they are changed.
364 * Without this, they will be backed up in this job and
365 * in the next job run because in that case, their date
366 * is after the start of this run.
368 jcr->start_time = time(NULL);
369 jcr->jr.StartTime = jcr->start_time;
370 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
371 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
375 * If backup is in accurate mode, we send the list of
378 if (!send_accurate_current_files(jcr)) {
382 /* Send backup command */
383 fd->fsend(backupcmd);
384 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
388 /* Pickup Job termination data */
389 stat = wait_for_job_termination(jcr);
390 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
393 !db_commit_base_file_attributes_record(jcr, jcr->db))
395 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
398 if (stat == JS_Terminated) {
399 backup_cleanup(jcr, stat);
404 /* Come here only after starting SD thread */
406 set_jcr_job_status(jcr, JS_ErrorTerminated);
407 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
409 wait_for_job_termination(jcr, FDConnectTimeout);
410 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
416 * Here we wait for the File daemon to signal termination,
417 * then we wait for the Storage daemon. When both
418 * are done, we return the job status.
419 * Also used by restore.c
421 int wait_for_job_termination(JCR *jcr, int timeout)
424 BSOCK *fd = jcr->file_bsock;
426 uint32_t JobFiles, JobErrors;
427 uint32_t JobWarnings = 0;
428 uint64_t ReadBytes = 0;
429 uint64_t JobBytes = 0;
434 set_jcr_job_status(jcr, JS_Running);
438 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
440 /* Wait for Client to terminate */
441 while ((n = bget_dirmsg(fd)) >= 0) {
443 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
444 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
445 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
446 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
448 set_jcr_job_status(jcr, jcr->FDJobStatus);
449 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
451 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
454 if (job_canceled(jcr)) {
459 stop_bsock_timer(tid);
462 if (is_bnet_error(fd)) {
463 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
464 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
466 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
469 /* Force cancel in SD if failing */
470 if (job_canceled(jcr) || !fd_ok) {
471 cancel_storage_daemon_job(jcr);
474 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
475 wait_for_storage_daemon_termination(jcr);
477 /* Return values from FD */
479 jcr->JobFiles = JobFiles;
480 jcr->JobErrors += JobErrors; /* Keep total errors */
481 jcr->ReadBytes = ReadBytes;
482 jcr->JobBytes = JobBytes;
483 jcr->JobWarnings = JobWarnings;
485 jcr->Encrypt = Encrypt;
487 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
490 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
491 // jcr->JobStatus, jcr->SDJobStatus);
493 /* Return the first error status we find Dir, FD, or SD */
494 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
495 jcr->FDJobStatus = JS_ErrorTerminated;
497 if (jcr->JobStatus != JS_Terminated) {
498 return jcr->JobStatus;
500 if (jcr->FDJobStatus != JS_Terminated) {
501 return jcr->FDJobStatus;
503 return jcr->SDJobStatus;
507 * Release resources allocated during backup.
509 void backup_cleanup(JCR *jcr, int TermCode)
511 char sdt[50], edt[50], schedt[50];
512 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
513 char ec6[30], ec7[30], ec8[30], elapsed[50];
514 char term_code[100], fd_term_msg[100], sd_term_msg[100];
515 const char *term_msg;
516 int msg_type = M_INFO;
519 double kbps, compression;
522 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
523 vbackup_cleanup(jcr, TermCode);
527 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
528 memset(&mr, 0, sizeof(mr));
529 memset(&cr, 0, sizeof(cr));
531 update_job_end(jcr, TermCode);
533 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
534 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
535 db_strerror(jcr->db));
536 set_jcr_job_status(jcr, JS_ErrorTerminated);
539 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
540 if (!db_get_client_record(jcr, jcr->db, &cr)) {
541 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
542 db_strerror(jcr->db));
545 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
546 if (!db_get_media_record(jcr, jcr->db, &mr)) {
547 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
548 mr.VolumeName, db_strerror(jcr->db));
549 set_jcr_job_status(jcr, JS_ErrorTerminated);
552 update_bootstrap_file(jcr);
554 switch (jcr->JobStatus) {
556 if (jcr->JobErrors || jcr->SDErrors) {
557 term_msg = _("Backup OK -- with warnings");
559 term_msg = _("Backup OK");
563 term_msg = _("Backup OK -- with warnings");
566 case JS_ErrorTerminated:
567 term_msg = _("*** Backup Error ***");
568 msg_type = M_ERROR; /* Generate error message */
569 if (jcr->store_bsock) {
570 jcr->store_bsock->signal(BNET_TERMINATE);
571 if (jcr->SD_msg_chan) {
572 pthread_cancel(jcr->SD_msg_chan);
577 term_msg = _("Backup Canceled");
578 if (jcr->store_bsock) {
579 jcr->store_bsock->signal(BNET_TERMINATE);
580 if (jcr->SD_msg_chan) {
581 pthread_cancel(jcr->SD_msg_chan);
586 term_msg = term_code;
587 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
590 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
591 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
592 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
593 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
597 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
599 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
601 * Note, if the job has erred, most likely it did not write any
602 * tape, so suppress this "error" message since in that case
603 * it is normal. Or look at it the other way, only for a
604 * normal exit should we complain about this error.
606 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
607 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
609 jcr->VolumeName[0] = 0; /* none */
612 if (jcr->ReadBytes == 0) {
613 bstrncpy(compress, "None", sizeof(compress));
615 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
616 if (compression < 0.5) {
617 bstrncpy(compress, "None", sizeof(compress));
619 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
622 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
623 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
626 Dmsg3(0, "Base files/Used files %lld/%lld=%.2f%%\n", jcr->nb_base_files,
627 jcr->nb_base_files_used,
628 jcr->nb_base_files_used*100.0/jcr->nb_base_files);
630 // bmicrosleep(15, 0); /* for debugging SIGHUP */
632 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
633 " Build OS: %s %s %s\n"
636 " Backup Level: %s%s\n"
637 " Client: \"%s\" %s\n"
638 " FileSet: \"%s\" %s\n"
639 " Pool: \"%s\" (From %s)\n"
640 " Catalog: \"%s\" (From %s)\n"
641 " Storage: \"%s\" (From %s)\n"
642 " Scheduled time: %s\n"
645 " Elapsed time: %s\n"
647 " FD Files Written: %s\n"
648 " SD Files Written: %s\n"
649 " FD Bytes Written: %s (%sB)\n"
650 " SD Bytes Written: %s (%sB)\n"
652 " Software Compression: %s\n"
656 " Volume name(s): %s\n"
657 " Volume Session Id: %d\n"
658 " Volume Session Time: %d\n"
659 " Last Volume Bytes: %s (%sB)\n"
660 " Non-fatal FD errors: %d\n"
662 " FD termination status: %s\n"
663 " SD termination status: %s\n"
664 " Termination: %s\n\n"),
665 BACULA, my_name, VERSION, LSMDATE, edt,
666 HOST_OS, DISTNAME, DISTVER,
669 level_to_str(jcr->get_JobLevel()), jcr->since,
670 jcr->client->name(), cr.Uname,
671 jcr->fileset->name(), jcr->FSCreateTime,
672 jcr->pool->name(), jcr->pool_source,
673 jcr->catalog->name(), jcr->catalog_source,
674 jcr->wstore->name(), jcr->wstore_source,
678 edit_utime(RunTime, elapsed, sizeof(elapsed)),
680 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
681 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
682 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
683 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
684 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
685 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
688 jcr->VSS?_("yes"):_("no"),
689 jcr->Encrypt?_("yes"):_("no"),
690 jcr->accurate?_("yes"):_("no"),
694 edit_uint64_with_commas(mr.VolBytes, ec7),
695 edit_uint64_with_suffix(mr.VolBytes, ec8),
702 Dmsg0(100, "Leave backup_cleanup()\n");
705 void update_bootstrap_file(JCR *jcr)
707 /* Now update the bootstrap file if any */
708 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
709 jcr->job->WriteBootstrap) {
713 POOLMEM *fname = get_pool_memory(PM_FNAME);
714 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
716 VOL_PARAMS *VolParams = NULL;
718 char edt[50], ed1[50], ed2[50];
722 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
723 fd = bpipe ? bpipe->wfd : NULL;
725 /* ***FIXME*** handle BASE */
726 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
729 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
732 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
733 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
734 if (jcr->SDJobFiles != 0) {
735 set_jcr_job_status(jcr, JS_ErrorTerminated);
739 /* Start output with when and who wrote it */
740 bstrftimes(edt, sizeof(edt), time(NULL));
741 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
742 level_to_str(jcr->get_JobLevel()), jcr->since);
743 for (int i=0; i < VolCount; i++) {
744 /* Write the record */
745 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
746 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
747 if (VolParams[i].Slot > 0) {
748 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
750 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
751 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
752 fprintf(fd, "VolAddr=%s-%s\n",
753 edit_uint64(VolParams[i].StartAddr, ed1),
754 edit_uint64(VolParams[i].EndAddr, ed2));
755 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
756 VolParams[i].LastIndex);
768 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
769 "%s: ERR=%s\n"), fname, be.bstrerror());
770 set_jcr_job_status(jcr, JS_ErrorTerminated);
772 free_pool_memory(fname);