2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
106 /* Take all base jobs from job resource and find the
109 static bool get_base_jobids(JCR *jcr, POOLMEM *jobids)
116 if (!jcr->job->base) {
117 return false; /* no base job, stop accurate */
120 memset(&jr, 0, sizeof(JOB_DBR));
121 jr.StartTime = jcr->jr.StartTime;
123 foreach_alist(job, jcr->job->base) {
124 bstrncpy(jr.Name, job->name(), sizeof(jr.Name));
125 db_get_base_jobid(jcr, jcr->db, &jr, &id);
129 pm_strcat(jobids, ",");
131 pm_strcat(jobids, edit_uint64(id, str_jobid));
135 return *jobids != '\0';
139 * Foreach files in currrent list, send "/path/fname\0LStat\0MD5" to FD
141 static int accurate_list_handler(void *ctx, int num_fields, char **row)
143 JCR *jcr = (JCR *)ctx;
145 if (job_canceled(jcr)) {
149 if (row[2] == 0) { /* discard when file_index == 0 */
153 /* sending with checksum */
154 if (num_fields == 6 && row[5][0] && row[5][1]) { /* skip checksum = '0' */
155 jcr->file_bsock->fsend("%s%s%c%s%c%s",
156 row[0], row[1], 0, row[4], 0, row[5]);
158 jcr->file_bsock->fsend("%s%s%c%s",
159 row[0], row[1], 0, row[4]);
165 * Send current file list to FD
166 * DIR -> FD : accurate files=xxxx
167 * DIR -> FD : /path/to/file\0Lstat\0MD5
168 * DIR -> FD : /path/to/dir/\0Lstat\0MD5
172 bool send_accurate_current_files(JCR *jcr)
177 if (!jcr->accurate || job_canceled(jcr)) {
180 /* In base level, no previous job is used */
181 if (jcr->get_JobLevel() == L_BASE) {
185 POOLMEM *nb = get_pool_memory(PM_FNAME);
186 POOLMEM *jobids = get_pool_memory(PM_FNAME);
187 nb[0] = jobids[0] = '\0';
189 if (jcr->get_JobLevel() == L_FULL) {
190 /* On Full mode, if no previous base job, no accurate things */
191 if (!get_base_jobids(jcr, jobids)) {
195 Jmsg(jcr, M_INFO, 0, _("Using BaseJobId(s): %s\n"), jobids);
198 /* For Incr/Diff level, we search for older jobs */
199 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
201 /* We are in Incr/Diff, but no Full to build the accurate list... */
204 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
209 if (jcr->JobId) { /* display the message only for real jobs */
210 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
213 /* to be able to allocate the right size for htable */
214 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
215 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
216 Dmsg2(0, "jobids=%s nb=%s\n", jobids, nb);
217 jcr->file_bsock->fsend("accurate files=%s\n", nb);
219 if (!db_open_batch_connexion(jcr, jcr->db)) {
221 Jmsg0(jcr, M_FATAL, 0, "Can't get dedicate sql connexion");
226 jcr->nb_base_files = str_to_int64(nb);
227 db_create_base_file_list(jcr, jcr->db, jobids);
228 db_get_base_file_list(jcr, jcr->db,
229 accurate_list_handler, (void *)jcr);
232 db_get_file_list(jcr, jcr->db_batch, jobids,
233 accurate_list_handler, (void *)jcr);
236 /* TODO: close the batch connexion ? (can be used very soon) */
238 jcr->file_bsock->signal(BNET_EOD);
241 free_pool_memory(jobids);
242 free_pool_memory(nb);
248 * Do a backup of the specified FileSet
250 * Returns: false on failure
253 bool do_backup(JCR *jcr)
256 int tls_need = BNET_TLS_NONE;
261 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
262 return do_vbackup(jcr);
265 /* Print Job Start message */
266 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
267 edit_uint64(jcr->JobId, ed1), jcr->Job);
269 set_jcr_job_status(jcr, JS_Running);
270 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
271 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
272 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
277 * Open a message channel connection with the Storage
278 * daemon. This is to let him know that our client
279 * will be contacting him for a backup session.
282 Dmsg0(110, "Open connection with storage daemon\n");
283 set_jcr_job_status(jcr, JS_WaitSD);
285 * Start conversation with Storage daemon
287 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
291 * Now start a job with the Storage daemon
293 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
298 * Start the job prior to starting the message thread below
299 * to avoid two threads from using the BSOCK structure at
302 if (!bnet_fsend(jcr->store_bsock, "run")) {
307 * Now start a Storage daemon message thread. Note,
308 * this thread is used to provide the catalog services
309 * for the backup job, including inserting the attributes
310 * into the catalog. See catalog_update() in catreq.c
312 if (!start_storage_daemon_message_thread(jcr)) {
315 Dmsg0(150, "Storage daemon connection OK\n");
317 set_jcr_job_status(jcr, JS_WaitFD);
318 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
322 set_jcr_job_status(jcr, JS_Running);
323 fd = jcr->file_bsock;
325 if (!send_include_list(jcr)) {
329 if (!send_exclude_list(jcr)) {
333 if (!send_level_command(jcr)) {
338 * send Storage daemon address to the File daemon
341 if (store->SDDport == 0) {
342 store->SDDport = store->SDport;
345 /* TLS Requirement */
346 if (store->tls_enable) {
347 if (store->tls_require) {
348 tls_need = BNET_TLS_REQUIRED;
350 tls_need = BNET_TLS_OK;
354 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
355 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
359 if (!send_runscripts_commands(jcr)) {
364 * We re-update the job start record so that the start
365 * time is set after the run before job. This avoids
366 * that any files created by the run before job will
367 * be saved twice. They will be backed up in the current
368 * job, but not in the next one unless they are changed.
369 * Without this, they will be backed up in this job and
370 * in the next job run because in that case, their date
371 * is after the start of this run.
373 jcr->start_time = time(NULL);
374 jcr->jr.StartTime = jcr->start_time;
375 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
376 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
380 * If backup is in accurate mode, we send the list of
383 if (!send_accurate_current_files(jcr)) {
387 /* Send backup command */
388 fd->fsend(backupcmd);
389 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
393 /* Pickup Job termination data */
394 stat = wait_for_job_termination(jcr);
395 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
398 !db_commit_base_file_attributes_record(jcr, jcr->db))
400 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
403 if (stat == JS_Terminated) {
404 backup_cleanup(jcr, stat);
409 /* Come here only after starting SD thread */
411 set_jcr_job_status(jcr, JS_ErrorTerminated);
412 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
414 wait_for_job_termination(jcr, FDConnectTimeout);
415 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
421 * Here we wait for the File daemon to signal termination,
422 * then we wait for the Storage daemon. When both
423 * are done, we return the job status.
424 * Also used by restore.c
426 int wait_for_job_termination(JCR *jcr, int timeout)
429 BSOCK *fd = jcr->file_bsock;
431 uint32_t JobFiles, JobErrors;
432 uint32_t JobWarnings = 0;
433 uint64_t ReadBytes = 0;
434 uint64_t JobBytes = 0;
439 set_jcr_job_status(jcr, JS_Running);
443 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
445 /* Wait for Client to terminate */
446 while ((n = bget_dirmsg(fd)) >= 0) {
448 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
449 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
450 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
451 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
453 set_jcr_job_status(jcr, jcr->FDJobStatus);
454 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
456 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
459 if (job_canceled(jcr)) {
464 stop_bsock_timer(tid);
467 if (is_bnet_error(fd)) {
468 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
469 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
471 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
474 /* Force cancel in SD if failing */
475 if (job_canceled(jcr) || !fd_ok) {
476 cancel_storage_daemon_job(jcr);
479 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
480 wait_for_storage_daemon_termination(jcr);
482 /* Return values from FD */
484 jcr->JobFiles = JobFiles;
485 jcr->JobErrors += JobErrors; /* Keep total errors */
486 jcr->ReadBytes = ReadBytes;
487 jcr->JobBytes = JobBytes;
488 jcr->JobWarnings = JobWarnings;
490 jcr->Encrypt = Encrypt;
492 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
495 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
496 // jcr->JobStatus, jcr->SDJobStatus);
498 /* Return the first error status we find Dir, FD, or SD */
499 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
500 jcr->FDJobStatus = JS_ErrorTerminated;
502 if (jcr->JobStatus != JS_Terminated) {
503 return jcr->JobStatus;
505 if (jcr->FDJobStatus != JS_Terminated) {
506 return jcr->FDJobStatus;
508 return jcr->SDJobStatus;
512 * Release resources allocated during backup.
514 void backup_cleanup(JCR *jcr, int TermCode)
516 char sdt[50], edt[50], schedt[50];
517 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
518 char ec6[30], ec7[30], ec8[30], elapsed[50];
519 char term_code[100], fd_term_msg[100], sd_term_msg[100];
520 const char *term_msg;
521 int msg_type = M_INFO;
524 double kbps, compression;
527 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
528 vbackup_cleanup(jcr, TermCode);
532 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
533 memset(&mr, 0, sizeof(mr));
534 memset(&cr, 0, sizeof(cr));
536 update_job_end(jcr, TermCode);
538 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
539 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
540 db_strerror(jcr->db));
541 set_jcr_job_status(jcr, JS_ErrorTerminated);
544 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
545 if (!db_get_client_record(jcr, jcr->db, &cr)) {
546 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
547 db_strerror(jcr->db));
550 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
551 if (!db_get_media_record(jcr, jcr->db, &mr)) {
552 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
553 mr.VolumeName, db_strerror(jcr->db));
554 set_jcr_job_status(jcr, JS_ErrorTerminated);
557 update_bootstrap_file(jcr);
559 switch (jcr->JobStatus) {
561 if (jcr->JobErrors || jcr->SDErrors) {
562 term_msg = _("Backup OK -- with warnings");
564 term_msg = _("Backup OK");
568 term_msg = _("Backup OK -- with warnings");
571 case JS_ErrorTerminated:
572 term_msg = _("*** Backup Error ***");
573 msg_type = M_ERROR; /* Generate error message */
574 if (jcr->store_bsock) {
575 jcr->store_bsock->signal(BNET_TERMINATE);
576 if (jcr->SD_msg_chan) {
577 pthread_cancel(jcr->SD_msg_chan);
582 term_msg = _("Backup Canceled");
583 if (jcr->store_bsock) {
584 jcr->store_bsock->signal(BNET_TERMINATE);
585 if (jcr->SD_msg_chan) {
586 pthread_cancel(jcr->SD_msg_chan);
591 term_msg = term_code;
592 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
595 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
596 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
597 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
598 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
602 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
604 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
606 * Note, if the job has erred, most likely it did not write any
607 * tape, so suppress this "error" message since in that case
608 * it is normal. Or look at it the other way, only for a
609 * normal exit should we complain about this error.
611 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
612 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
614 jcr->VolumeName[0] = 0; /* none */
617 if (jcr->ReadBytes == 0) {
618 bstrncpy(compress, "None", sizeof(compress));
620 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
621 if (compression < 0.5) {
622 bstrncpy(compress, "None", sizeof(compress));
624 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
627 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
628 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
631 Dmsg3(0, "Base files/Used files %lld/%lld=%.2f%%\n", jcr->nb_base_files,
632 jcr->nb_base_files_used,
633 jcr->nb_base_files_used*100.0/jcr->nb_base_files);
635 // bmicrosleep(15, 0); /* for debugging SIGHUP */
637 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
638 " Build OS: %s %s %s\n"
641 " Backup Level: %s%s\n"
642 " Client: \"%s\" %s\n"
643 " FileSet: \"%s\" %s\n"
644 " Pool: \"%s\" (From %s)\n"
645 " Catalog: \"%s\" (From %s)\n"
646 " Storage: \"%s\" (From %s)\n"
647 " Scheduled time: %s\n"
650 " Elapsed time: %s\n"
652 " FD Files Written: %s\n"
653 " SD Files Written: %s\n"
654 " FD Bytes Written: %s (%sB)\n"
655 " SD Bytes Written: %s (%sB)\n"
657 " Software Compression: %s\n"
661 " Volume name(s): %s\n"
662 " Volume Session Id: %d\n"
663 " Volume Session Time: %d\n"
664 " Last Volume Bytes: %s (%sB)\n"
665 " Non-fatal FD errors: %d\n"
667 " FD termination status: %s\n"
668 " SD termination status: %s\n"
669 " Termination: %s\n\n"),
670 BACULA, my_name, VERSION, LSMDATE, edt,
671 HOST_OS, DISTNAME, DISTVER,
674 level_to_str(jcr->get_JobLevel()), jcr->since,
675 jcr->client->name(), cr.Uname,
676 jcr->fileset->name(), jcr->FSCreateTime,
677 jcr->pool->name(), jcr->pool_source,
678 jcr->catalog->name(), jcr->catalog_source,
679 jcr->wstore->name(), jcr->wstore_source,
683 edit_utime(RunTime, elapsed, sizeof(elapsed)),
685 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
686 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
687 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
688 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
689 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
690 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
693 jcr->VSS?_("yes"):_("no"),
694 jcr->Encrypt?_("yes"):_("no"),
695 jcr->accurate?_("yes"):_("no"),
699 edit_uint64_with_commas(mr.VolBytes, ec7),
700 edit_uint64_with_suffix(mr.VolBytes, ec8),
707 Dmsg0(100, "Leave backup_cleanup()\n");
710 void update_bootstrap_file(JCR *jcr)
712 /* Now update the bootstrap file if any */
713 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
714 jcr->job->WriteBootstrap) {
718 POOLMEM *fname = get_pool_memory(PM_FNAME);
719 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
721 VOL_PARAMS *VolParams = NULL;
723 char edt[50], ed1[50], ed2[50];
727 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
728 fd = bpipe ? bpipe->wfd : NULL;
730 /* ***FIXME*** handle BASE */
731 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
734 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
737 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
738 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
739 if (jcr->SDJobFiles != 0) {
740 set_jcr_job_status(jcr, JS_ErrorTerminated);
744 /* Start output with when and who wrote it */
745 bstrftimes(edt, sizeof(edt), time(NULL));
746 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
747 level_to_str(jcr->get_JobLevel()), jcr->since);
748 for (int i=0; i < VolCount; i++) {
749 /* Write the record */
750 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
751 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
752 if (VolParams[i].Slot > 0) {
753 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
755 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
756 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
757 fprintf(fd, "VolAddr=%s-%s\n",
758 edit_uint64(VolParams[i].StartAddr, ed1),
759 edit_uint64(VolParams[i].EndAddr, ed2));
760 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
761 VolParams[i].LastIndex);
773 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
774 "%s: ERR=%s\n"), fname, be.bstrerror());
775 set_jcr_job_status(jcr, JS_ErrorTerminated);
777 free_pool_memory(fname);