2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
106 /* Take all base jobs from job resource and find the
109 static bool get_base_jobids(JCR *jcr, POOLMEM *jobids)
116 if (!jcr->job->base) {
117 return false; /* no base job, stop accurate */
120 memset(&jr, 0, sizeof(JOB_DBR));
121 jr.StartTime = jcr->jr.StartTime;
123 foreach_alist(job, jcr->job->base) {
124 bstrncpy(jr.Name, job->name(), sizeof(jr.Name));
125 db_get_base_jobid(jcr, jcr->db, &jr, &id);
129 pm_strcat(jobids, ",");
131 pm_strcat(jobids, edit_uint64(id, str_jobid));
135 return *jobids != '\0';
139 * Foreach files in currrent list, send "/path/fname\0LStat\0MD5" to FD
141 static int accurate_list_handler(void *ctx, int num_fields, char **row)
143 JCR *jcr = (JCR *)ctx;
145 if (job_canceled(jcr)) {
149 if (row[2] == 0) { /* discard when file_index == 0 */
153 /* sending with checksum */
154 if (num_fields == 6 && row[5][0] && row[5][1]) { /* skip checksum = '0' */
155 jcr->file_bsock->fsend("%s%s%c%s%c%s",
156 row[0], row[1], 0, row[4], 0, row[5]);
158 jcr->file_bsock->fsend("%s%s%c%s",
159 row[0], row[1], 0, row[4]);
165 * Send current file list to FD
166 * DIR -> FD : accurate files=xxxx
167 * DIR -> FD : /path/to/file\0Lstat\0MD5
168 * DIR -> FD : /path/to/dir/\0Lstat\0MD5
172 bool send_accurate_current_files(JCR *jcr)
179 if (!jcr->accurate || job_canceled(jcr)) {
182 /* In base level, no previous job is used */
183 if (jcr->get_JobLevel() == L_BASE) {
187 if (jcr->get_JobLevel() == L_FULL) {
188 /* On Full mode, if no previous base job, no accurate things */
189 if (!get_base_jobids(jcr, jobids)) {
193 Jmsg(jcr, M_INFO, 0, _("Using BaseJobId(s): %s\n"), jobids);
196 /* For Incr/Diff level, we search for older jobs */
197 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
199 /* We are in Incr/Diff, but no Full to build the accurate list... */
202 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
207 if (jcr->JobId) { /* display the message only for real jobs */
208 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
211 /* to be able to allocate the right size for htable */
212 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)", jobids.list);
213 db_sql_query(jcr->db, buf.c_str(), db_list_handler, &nb);
214 Dmsg2(200, "jobids=%s nb=%s\n", jobids.list, nb.list);
215 jcr->file_bsock->fsend("accurate files=%s\n", nb.list);
217 if (!db_open_batch_connexion(jcr, jcr->db)) {
218 Jmsg0(jcr, M_FATAL, 0, "Can't get batch sql connexion");
223 jcr->nb_base_files = str_to_int64(nb);
224 db_create_base_file_list(jcr, jcr->db, jobids);
225 db_get_base_file_list(jcr, jcr->db,
226 accurate_list_handler, (void *)jcr);
229 db_get_file_list(jcr, jcr->db_batch, jobids,
230 accurate_list_handler, (void *)jcr);
233 /* TODO: close the batch connexion ? (can be used very soon) */
235 jcr->file_bsock->signal(BNET_EOD);
242 * Do a backup of the specified FileSet
244 * Returns: false on failure
247 bool do_backup(JCR *jcr)
250 int tls_need = BNET_TLS_NONE;
255 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
256 return do_vbackup(jcr);
259 /* Print Job Start message */
260 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
261 edit_uint64(jcr->JobId, ed1), jcr->Job);
263 set_jcr_job_status(jcr, JS_Running);
264 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
265 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
266 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
271 * Open a message channel connection with the Storage
272 * daemon. This is to let him know that our client
273 * will be contacting him for a backup session.
276 Dmsg0(110, "Open connection with storage daemon\n");
277 set_jcr_job_status(jcr, JS_WaitSD);
279 * Start conversation with Storage daemon
281 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
285 * Now start a job with the Storage daemon
287 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
292 * Start the job prior to starting the message thread below
293 * to avoid two threads from using the BSOCK structure at
296 if (!bnet_fsend(jcr->store_bsock, "run")) {
301 * Now start a Storage daemon message thread. Note,
302 * this thread is used to provide the catalog services
303 * for the backup job, including inserting the attributes
304 * into the catalog. See catalog_update() in catreq.c
306 if (!start_storage_daemon_message_thread(jcr)) {
309 Dmsg0(150, "Storage daemon connection OK\n");
311 set_jcr_job_status(jcr, JS_WaitFD);
312 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
316 set_jcr_job_status(jcr, JS_Running);
317 fd = jcr->file_bsock;
319 if (!send_include_list(jcr)) {
323 if (!send_exclude_list(jcr)) {
327 if (!send_level_command(jcr)) {
332 * send Storage daemon address to the File daemon
335 if (store->SDDport == 0) {
336 store->SDDport = store->SDport;
339 /* TLS Requirement */
340 if (store->tls_enable) {
341 if (store->tls_require) {
342 tls_need = BNET_TLS_REQUIRED;
344 tls_need = BNET_TLS_OK;
348 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
349 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
353 if (!send_runscripts_commands(jcr)) {
358 * We re-update the job start record so that the start
359 * time is set after the run before job. This avoids
360 * that any files created by the run before job will
361 * be saved twice. They will be backed up in the current
362 * job, but not in the next one unless they are changed.
363 * Without this, they will be backed up in this job and
364 * in the next job run because in that case, their date
365 * is after the start of this run.
367 jcr->start_time = time(NULL);
368 jcr->jr.StartTime = jcr->start_time;
369 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
370 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
374 * If backup is in accurate mode, we send the list of
377 if (!send_accurate_current_files(jcr)) {
381 /* Send backup command */
382 fd->fsend(backupcmd);
383 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
387 /* Pickup Job termination data */
388 stat = wait_for_job_termination(jcr);
389 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
392 !db_commit_base_file_attributes_record(jcr, jcr->db))
394 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
397 if (stat == JS_Terminated) {
398 backup_cleanup(jcr, stat);
403 /* Come here only after starting SD thread */
405 set_jcr_job_status(jcr, JS_ErrorTerminated);
406 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
408 wait_for_job_termination(jcr, FDConnectTimeout);
409 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
415 * Here we wait for the File daemon to signal termination,
416 * then we wait for the Storage daemon. When both
417 * are done, we return the job status.
418 * Also used by restore.c
420 int wait_for_job_termination(JCR *jcr, int timeout)
423 BSOCK *fd = jcr->file_bsock;
425 uint32_t JobFiles, JobErrors;
426 uint32_t JobWarnings = 0;
427 uint64_t ReadBytes = 0;
428 uint64_t JobBytes = 0;
433 set_jcr_job_status(jcr, JS_Running);
437 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
439 /* Wait for Client to terminate */
440 while ((n = bget_dirmsg(fd)) >= 0) {
442 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
443 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
444 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
445 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
447 set_jcr_job_status(jcr, jcr->FDJobStatus);
448 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
450 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
453 if (job_canceled(jcr)) {
458 stop_bsock_timer(tid);
461 if (is_bnet_error(fd)) {
462 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
463 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
465 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
468 /* Force cancel in SD if failing */
469 if (job_canceled(jcr) || !fd_ok) {
470 cancel_storage_daemon_job(jcr);
473 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
474 wait_for_storage_daemon_termination(jcr);
476 /* Return values from FD */
478 jcr->JobFiles = JobFiles;
479 jcr->JobErrors += JobErrors; /* Keep total errors */
480 jcr->ReadBytes = ReadBytes;
481 jcr->JobBytes = JobBytes;
482 jcr->JobWarnings = JobWarnings;
484 jcr->Encrypt = Encrypt;
486 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
489 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
490 // jcr->JobStatus, jcr->SDJobStatus);
492 /* Return the first error status we find Dir, FD, or SD */
493 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
494 jcr->FDJobStatus = JS_ErrorTerminated;
496 if (jcr->JobStatus != JS_Terminated) {
497 return jcr->JobStatus;
499 if (jcr->FDJobStatus != JS_Terminated) {
500 return jcr->FDJobStatus;
502 return jcr->SDJobStatus;
506 * Release resources allocated during backup.
508 void backup_cleanup(JCR *jcr, int TermCode)
510 char sdt[50], edt[50], schedt[50];
511 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
512 char ec6[30], ec7[30], ec8[30], elapsed[50];
513 char term_code[100], fd_term_msg[100], sd_term_msg[100];
514 const char *term_msg;
515 int msg_type = M_INFO;
518 double kbps, compression;
521 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
522 vbackup_cleanup(jcr, TermCode);
526 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
527 memset(&mr, 0, sizeof(mr));
528 memset(&cr, 0, sizeof(cr));
530 update_job_end(jcr, TermCode);
532 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
533 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
534 db_strerror(jcr->db));
535 set_jcr_job_status(jcr, JS_ErrorTerminated);
538 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
539 if (!db_get_client_record(jcr, jcr->db, &cr)) {
540 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
541 db_strerror(jcr->db));
544 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
545 if (!db_get_media_record(jcr, jcr->db, &mr)) {
546 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
547 mr.VolumeName, db_strerror(jcr->db));
548 set_jcr_job_status(jcr, JS_ErrorTerminated);
551 update_bootstrap_file(jcr);
553 switch (jcr->JobStatus) {
555 if (jcr->JobErrors || jcr->SDErrors) {
556 term_msg = _("Backup OK -- with warnings");
558 term_msg = _("Backup OK");
562 term_msg = _("Backup OK -- with warnings");
565 case JS_ErrorTerminated:
566 term_msg = _("*** Backup Error ***");
567 msg_type = M_ERROR; /* Generate error message */
568 if (jcr->store_bsock) {
569 jcr->store_bsock->signal(BNET_TERMINATE);
570 if (jcr->SD_msg_chan) {
571 pthread_cancel(jcr->SD_msg_chan);
576 term_msg = _("Backup Canceled");
577 if (jcr->store_bsock) {
578 jcr->store_bsock->signal(BNET_TERMINATE);
579 if (jcr->SD_msg_chan) {
580 pthread_cancel(jcr->SD_msg_chan);
585 term_msg = term_code;
586 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
589 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
590 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
591 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
592 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
596 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
598 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
600 * Note, if the job has erred, most likely it did not write any
601 * tape, so suppress this "error" message since in that case
602 * it is normal. Or look at it the other way, only for a
603 * normal exit should we complain about this error.
605 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
606 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
608 jcr->VolumeName[0] = 0; /* none */
611 if (jcr->ReadBytes == 0) {
612 bstrncpy(compress, "None", sizeof(compress));
614 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
615 if (compression < 0.5) {
616 bstrncpy(compress, "None", sizeof(compress));
618 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
621 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
622 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
625 Dmsg3(0, "Base files/Used files %lld/%lld=%.2f%%\n", jcr->nb_base_files,
626 jcr->nb_base_files_used,
627 jcr->nb_base_files_used*100.0/jcr->nb_base_files);
629 // bmicrosleep(15, 0); /* for debugging SIGHUP */
631 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
632 " Build OS: %s %s %s\n"
635 " Backup Level: %s%s\n"
636 " Client: \"%s\" %s\n"
637 " FileSet: \"%s\" %s\n"
638 " Pool: \"%s\" (From %s)\n"
639 " Catalog: \"%s\" (From %s)\n"
640 " Storage: \"%s\" (From %s)\n"
641 " Scheduled time: %s\n"
644 " Elapsed time: %s\n"
646 " FD Files Written: %s\n"
647 " SD Files Written: %s\n"
648 " FD Bytes Written: %s (%sB)\n"
649 " SD Bytes Written: %s (%sB)\n"
651 " Software Compression: %s\n"
655 " Volume name(s): %s\n"
656 " Volume Session Id: %d\n"
657 " Volume Session Time: %d\n"
658 " Last Volume Bytes: %s (%sB)\n"
659 " Non-fatal FD errors: %d\n"
661 " FD termination status: %s\n"
662 " SD termination status: %s\n"
663 " Termination: %s\n\n"),
664 BACULA, my_name, VERSION, LSMDATE, edt,
665 HOST_OS, DISTNAME, DISTVER,
668 level_to_str(jcr->get_JobLevel()), jcr->since,
669 jcr->client->name(), cr.Uname,
670 jcr->fileset->name(), jcr->FSCreateTime,
671 jcr->pool->name(), jcr->pool_source,
672 jcr->catalog->name(), jcr->catalog_source,
673 jcr->wstore->name(), jcr->wstore_source,
677 edit_utime(RunTime, elapsed, sizeof(elapsed)),
679 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
680 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
681 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
682 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
683 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
684 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
687 jcr->VSS?_("yes"):_("no"),
688 jcr->Encrypt?_("yes"):_("no"),
689 jcr->accurate?_("yes"):_("no"),
693 edit_uint64_with_commas(mr.VolBytes, ec7),
694 edit_uint64_with_suffix(mr.VolBytes, ec8),
701 Dmsg0(100, "Leave backup_cleanup()\n");
704 void update_bootstrap_file(JCR *jcr)
706 /* Now update the bootstrap file if any */
707 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
708 jcr->job->WriteBootstrap) {
712 POOLMEM *fname = get_pool_memory(PM_FNAME);
713 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
715 VOL_PARAMS *VolParams = NULL;
717 char edt[50], ed1[50], ed2[50];
721 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
722 fd = bpipe ? bpipe->wfd : NULL;
724 /* ***FIXME*** handle BASE */
725 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
728 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
731 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
732 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
733 if (jcr->SDJobFiles != 0) {
734 set_jcr_job_status(jcr, JS_ErrorTerminated);
738 /* Start output with when and who wrote it */
739 bstrftimes(edt, sizeof(edt), time(NULL));
740 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
741 level_to_str(jcr->get_JobLevel()), jcr->since);
742 for (int i=0; i < VolCount; i++) {
743 /* Write the record */
744 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
745 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
746 if (VolParams[i].Slot > 0) {
747 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
749 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
750 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
751 fprintf(fd, "VolAddr=%s-%s\n",
752 edit_uint64(VolParams[i].StartAddr, ed1),
753 edit_uint64(VolParams[i].EndAddr, ed2));
754 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
755 VolParams[i].LastIndex);
767 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
768 "%s: ERR=%s\n"), fname, be.bstrerror());
769 set_jcr_job_status(jcr, JS_ErrorTerminated);
771 free_pool_memory(fname);