2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
106 /* Take all base jobs from job resource and find the
109 static bool get_base_jobids(JCR *jcr, POOLMEM *jobids)
116 if (!jcr->job->base) {
117 return false; /* no base job, stop accurate */
120 memset(&jr, 0, sizeof(JOB_DBR));
121 jr.StartTime = jcr->jr.StartTime;
123 foreach_alist(job, jcr->job->base) {
124 bstrncpy(jr.Name, job->name(), sizeof(jr.Name));
125 db_get_base_jobid(jcr, jcr->db, &jr, &id);
129 pm_strcat(jobids, ",");
131 pm_strcat(jobids, edit_uint64(id, str_jobid));
135 return *jobids != '\0';
139 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
141 static int accurate_list_handler(void *ctx, int num_fields, char **row)
143 JCR *jcr = (JCR *)ctx;
145 if (job_canceled(jcr)) {
149 if (row[2] > 0) { /* discard when file_index == 0 */
150 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
156 * Send current file list to FD
157 * DIR -> FD : accurate files=xxxx
158 * DIR -> FD : /path/to/file\0Lstat
159 * DIR -> FD : /path/to/dir/\0Lstat
163 bool send_accurate_current_files(JCR *jcr)
168 if (!jcr->accurate || job_canceled(jcr)) {
171 /* In base level, no previous job is used */
172 if (jcr->get_JobLevel() == L_BASE) {
176 POOLMEM *nb = get_pool_memory(PM_FNAME);
177 POOLMEM *jobids = get_pool_memory(PM_FNAME);
178 nb[0] = jobids[0] = '\0';
180 if (jcr->get_JobLevel() == L_FULL) {
181 /* On Full mode, if no previous base job, no accurate things */
182 if (!get_base_jobids(jcr, jobids)) {
186 Jmsg(jcr, M_INFO, 0, _("Using BaseJobId(s): %s\n"), jobids);
189 /* For Incr/Diff level, we search for older jobs */
190 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
192 /* We are in Incr/Diff, but no Full to build the accurate list... */
195 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
200 if (jcr->JobId) { /* display the message only for real jobs */
201 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
204 /* to be able to allocate the right size for htable */
205 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
206 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
207 Dmsg2(0, "jobids=%s nb=%s\n", jobids, nb);
208 jcr->file_bsock->fsend("accurate files=%s\n", nb);
210 if (!db_open_batch_connexion(jcr, jcr->db)) {
212 Jmsg0(jcr, M_FATAL, 0, "Can't get dedicate sql connexion");
217 jcr->nb_base_files = str_to_int64(nb);
218 db_create_base_file_list(jcr, jcr->db, jobids);
219 db_get_base_file_list(jcr, jcr->db,
220 accurate_list_handler, (void *)jcr);
223 db_get_file_list(jcr, jcr->db_batch, jobids,
224 accurate_list_handler, (void *)jcr);
227 /* TODO: close the batch connexion ? (can be used very soon) */
229 jcr->file_bsock->signal(BNET_EOD);
232 free_pool_memory(jobids);
233 free_pool_memory(nb);
239 * Do a backup of the specified FileSet
241 * Returns: false on failure
244 bool do_backup(JCR *jcr)
247 int tls_need = BNET_TLS_NONE;
252 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
253 return do_vbackup(jcr);
256 /* Print Job Start message */
257 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
258 edit_uint64(jcr->JobId, ed1), jcr->Job);
260 set_jcr_job_status(jcr, JS_Running);
261 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
262 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
263 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
268 * Open a message channel connection with the Storage
269 * daemon. This is to let him know that our client
270 * will be contacting him for a backup session.
273 Dmsg0(110, "Open connection with storage daemon\n");
274 set_jcr_job_status(jcr, JS_WaitSD);
276 * Start conversation with Storage daemon
278 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
282 * Now start a job with the Storage daemon
284 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
289 * Start the job prior to starting the message thread below
290 * to avoid two threads from using the BSOCK structure at
293 if (!bnet_fsend(jcr->store_bsock, "run")) {
298 * Now start a Storage daemon message thread. Note,
299 * this thread is used to provide the catalog services
300 * for the backup job, including inserting the attributes
301 * into the catalog. See catalog_update() in catreq.c
303 if (!start_storage_daemon_message_thread(jcr)) {
306 Dmsg0(150, "Storage daemon connection OK\n");
308 set_jcr_job_status(jcr, JS_WaitFD);
309 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
313 set_jcr_job_status(jcr, JS_Running);
314 fd = jcr->file_bsock;
316 if (!send_include_list(jcr)) {
320 if (!send_exclude_list(jcr)) {
324 if (!send_level_command(jcr)) {
329 * send Storage daemon address to the File daemon
332 if (store->SDDport == 0) {
333 store->SDDport = store->SDport;
336 /* TLS Requirement */
337 if (store->tls_enable) {
338 if (store->tls_require) {
339 tls_need = BNET_TLS_REQUIRED;
341 tls_need = BNET_TLS_OK;
345 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
346 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
350 if (!send_runscripts_commands(jcr)) {
355 * We re-update the job start record so that the start
356 * time is set after the run before job. This avoids
357 * that any files created by the run before job will
358 * be saved twice. They will be backed up in the current
359 * job, but not in the next one unless they are changed.
360 * Without this, they will be backed up in this job and
361 * in the next job run because in that case, their date
362 * is after the start of this run.
364 jcr->start_time = time(NULL);
365 jcr->jr.StartTime = jcr->start_time;
366 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
367 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
371 * If backup is in accurate mode, we send the list of
374 if (!send_accurate_current_files(jcr)) {
378 /* Send backup command */
379 fd->fsend(backupcmd);
380 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
384 /* Pickup Job termination data */
385 stat = wait_for_job_termination(jcr);
386 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
389 !db_commit_base_file_attributes_record(jcr, jcr->db))
391 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
394 if (stat == JS_Terminated) {
395 backup_cleanup(jcr, stat);
400 /* Come here only after starting SD thread */
402 set_jcr_job_status(jcr, JS_ErrorTerminated);
403 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
405 wait_for_job_termination(jcr, FDConnectTimeout);
406 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
412 * Here we wait for the File daemon to signal termination,
413 * then we wait for the Storage daemon. When both
414 * are done, we return the job status.
415 * Also used by restore.c
417 int wait_for_job_termination(JCR *jcr, int timeout)
420 BSOCK *fd = jcr->file_bsock;
422 uint32_t JobFiles, JobErrors;
423 uint32_t JobWarnings = 0;
424 uint64_t ReadBytes = 0;
425 uint64_t JobBytes = 0;
430 set_jcr_job_status(jcr, JS_Running);
434 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
436 /* Wait for Client to terminate */
437 while ((n = bget_dirmsg(fd)) >= 0) {
439 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
440 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
441 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
442 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
444 set_jcr_job_status(jcr, jcr->FDJobStatus);
445 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
447 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
450 if (job_canceled(jcr)) {
455 stop_bsock_timer(tid);
458 if (is_bnet_error(fd)) {
459 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
460 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
462 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
465 /* Force cancel in SD if failing */
466 if (job_canceled(jcr) || !fd_ok) {
467 cancel_storage_daemon_job(jcr);
470 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
471 wait_for_storage_daemon_termination(jcr);
473 /* Return values from FD */
475 jcr->JobFiles = JobFiles;
476 jcr->JobErrors += JobErrors; /* Keep total errors */
477 jcr->ReadBytes = ReadBytes;
478 jcr->JobBytes = JobBytes;
479 jcr->JobWarnings = JobWarnings;
481 jcr->Encrypt = Encrypt;
483 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
486 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
487 // jcr->JobStatus, jcr->SDJobStatus);
489 /* Return the first error status we find Dir, FD, or SD */
490 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
491 jcr->FDJobStatus = JS_ErrorTerminated;
493 if (jcr->JobStatus != JS_Terminated) {
494 return jcr->JobStatus;
496 if (jcr->FDJobStatus != JS_Terminated) {
497 return jcr->FDJobStatus;
499 return jcr->SDJobStatus;
503 * Release resources allocated during backup.
505 void backup_cleanup(JCR *jcr, int TermCode)
507 char sdt[50], edt[50], schedt[50];
508 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
509 char ec6[30], ec7[30], ec8[30], elapsed[50];
510 char term_code[100], fd_term_msg[100], sd_term_msg[100];
511 const char *term_msg;
512 int msg_type = M_INFO;
515 double kbps, compression;
518 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
519 vbackup_cleanup(jcr, TermCode);
523 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
524 memset(&mr, 0, sizeof(mr));
525 memset(&cr, 0, sizeof(cr));
527 update_job_end(jcr, TermCode);
529 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
530 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
531 db_strerror(jcr->db));
532 set_jcr_job_status(jcr, JS_ErrorTerminated);
535 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
536 if (!db_get_client_record(jcr, jcr->db, &cr)) {
537 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
538 db_strerror(jcr->db));
541 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
542 if (!db_get_media_record(jcr, jcr->db, &mr)) {
543 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
544 mr.VolumeName, db_strerror(jcr->db));
545 set_jcr_job_status(jcr, JS_ErrorTerminated);
548 update_bootstrap_file(jcr);
550 switch (jcr->JobStatus) {
552 if (jcr->JobErrors || jcr->SDErrors) {
553 term_msg = _("Backup OK -- with warnings");
555 term_msg = _("Backup OK");
559 term_msg = _("Backup OK -- with warnings");
562 case JS_ErrorTerminated:
563 term_msg = _("*** Backup Error ***");
564 msg_type = M_ERROR; /* Generate error message */
565 if (jcr->store_bsock) {
566 jcr->store_bsock->signal(BNET_TERMINATE);
567 if (jcr->SD_msg_chan) {
568 pthread_cancel(jcr->SD_msg_chan);
573 term_msg = _("Backup Canceled");
574 if (jcr->store_bsock) {
575 jcr->store_bsock->signal(BNET_TERMINATE);
576 if (jcr->SD_msg_chan) {
577 pthread_cancel(jcr->SD_msg_chan);
582 term_msg = term_code;
583 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
586 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
587 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
588 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
589 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
593 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
595 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
597 * Note, if the job has erred, most likely it did not write any
598 * tape, so suppress this "error" message since in that case
599 * it is normal. Or look at it the other way, only for a
600 * normal exit should we complain about this error.
602 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
603 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
605 jcr->VolumeName[0] = 0; /* none */
608 if (jcr->ReadBytes == 0) {
609 bstrncpy(compress, "None", sizeof(compress));
611 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
612 if (compression < 0.5) {
613 bstrncpy(compress, "None", sizeof(compress));
615 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
618 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
619 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
622 Dmsg3(0, "Base files/Used files %lld/%lld=%.2f%%\n", jcr->nb_base_files,
623 jcr->nb_base_files_used,
624 jcr->nb_base_files_used*100.0/jcr->nb_base_files);
626 // bmicrosleep(15, 0); /* for debugging SIGHUP */
628 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
629 " Build OS: %s %s %s\n"
632 " Backup Level: %s%s\n"
633 " Client: \"%s\" %s\n"
634 " FileSet: \"%s\" %s\n"
635 " Pool: \"%s\" (From %s)\n"
636 " Catalog: \"%s\" (From %s)\n"
637 " Storage: \"%s\" (From %s)\n"
638 " Scheduled time: %s\n"
641 " Elapsed time: %s\n"
643 " FD Files Written: %s\n"
644 " SD Files Written: %s\n"
645 " FD Bytes Written: %s (%sB)\n"
646 " SD Bytes Written: %s (%sB)\n"
648 " Software Compression: %s\n"
652 " Volume name(s): %s\n"
653 " Volume Session Id: %d\n"
654 " Volume Session Time: %d\n"
655 " Last Volume Bytes: %s (%sB)\n"
656 " Non-fatal FD errors: %d\n"
658 " FD termination status: %s\n"
659 " SD termination status: %s\n"
660 " Termination: %s\n\n"),
661 BACULA, my_name, VERSION, LSMDATE, edt,
662 HOST_OS, DISTNAME, DISTVER,
665 level_to_str(jcr->get_JobLevel()), jcr->since,
666 jcr->client->name(), cr.Uname,
667 jcr->fileset->name(), jcr->FSCreateTime,
668 jcr->pool->name(), jcr->pool_source,
669 jcr->catalog->name(), jcr->catalog_source,
670 jcr->wstore->name(), jcr->wstore_source,
674 edit_utime(RunTime, elapsed, sizeof(elapsed)),
676 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
677 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
678 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
679 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
680 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
681 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
684 jcr->VSS?_("yes"):_("no"),
685 jcr->Encrypt?_("yes"):_("no"),
686 jcr->accurate?_("yes"):_("no"),
690 edit_uint64_with_commas(mr.VolBytes, ec7),
691 edit_uint64_with_suffix(mr.VolBytes, ec8),
698 Dmsg0(100, "Leave backup_cleanup()\n");
701 void update_bootstrap_file(JCR *jcr)
703 /* Now update the bootstrap file if any */
704 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
705 jcr->job->WriteBootstrap) {
709 POOLMEM *fname = get_pool_memory(PM_FNAME);
710 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
712 VOL_PARAMS *VolParams = NULL;
714 char edt[50], ed1[50], ed2[50];
718 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
719 fd = bpipe ? bpipe->wfd : NULL;
721 /* ***FIXME*** handle BASE */
722 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
725 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
728 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
729 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
730 if (jcr->SDJobFiles != 0) {
731 set_jcr_job_status(jcr, JS_ErrorTerminated);
735 /* Start output with when and who wrote it */
736 bstrftimes(edt, sizeof(edt), time(NULL));
737 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
738 level_to_str(jcr->get_JobLevel()), jcr->since);
739 for (int i=0; i < VolCount; i++) {
740 /* Write the record */
741 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
742 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
743 if (VolParams[i].Slot > 0) {
744 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
746 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
747 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
748 fprintf(fd, "VolAddr=%s-%s\n",
749 edit_uint64(VolParams[i].StartAddr, ed1),
750 edit_uint64(VolParams[i].EndAddr, ed2));
751 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
752 VolParams[i].LastIndex);
764 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
765 "%s: ERR=%s\n"), fname, be.bstrerror());
766 set_jcr_job_status(jcr, JS_ErrorTerminated);
768 free_pool_memory(fname);