2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
106 /* Take all base jobs from job resource and find the
109 static void get_base_jobids(JCR *jcr, POOLMEM *jobids)
116 if (!jcr->job->base) {
120 memset(&jr, 0, sizeof(JOB_DBR));
121 jr.StartTime = jcr->jr.StartTime;
123 foreach_alist(job, jcr->job->base) {
124 bstrncpy(jr.Name, job->name(), sizeof(jr.Name));
125 db_get_base_jobid(jcr, jcr->db, &jr, &id);
129 pm_strcat(jobids, ",");
131 pm_strcat(jobids, edit_uint64(id, str_jobid));
137 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
139 static int accurate_list_handler(void *ctx, int num_fields, char **row)
141 JCR *jcr = (JCR *)ctx;
143 if (job_canceled(jcr)) {
147 if (row[2] > 0) { /* discard when file_index == 0 */
148 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
154 * Send current file list to FD
155 * DIR -> FD : accurate files=xxxx
156 * DIR -> FD : /path/to/file\0Lstat
157 * DIR -> FD : /path/to/dir/\0Lstat
161 bool send_accurate_current_files(JCR *jcr)
166 if (!jcr->accurate || job_canceled(jcr)) {
169 /* In base level, no previous job is used */
170 if (jcr->get_JobLevel() == L_BASE) {
174 POOLMEM *nb = get_pool_memory(PM_FNAME);
175 POOLMEM *jobids = get_pool_memory(PM_FNAME);
176 nb[0] = jobids[0] = '\0';
178 get_base_jobids(jcr, jobids);
180 /* On Full mode, if no previous base job, no accurate things */
181 if (jcr->get_JobLevel() == L_FULL && *jobids == 0) {
185 /* For Incr/Diff level, we search for older jobs */
186 if (jcr->get_JobLevel() != L_FULL) {
187 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
191 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
196 if (jcr->JobId) { /* display the message only for real jobs */
197 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
200 /* to be able to allocate the right size for htable */
201 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
202 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
203 Dmsg2(200, "jobids=%s nb=%s\n", jobids, nb);
204 jcr->file_bsock->fsend("accurate files=%s\n", nb);
206 if (!db_open_batch_connexion(jcr, jcr->db)) {
208 Jmsg0(jcr, M_FATAL, 0, "Can't get dedicate sql connexion");
212 db_get_file_list(jcr, jcr->db_batch, jobids, accurate_list_handler, (void *)jcr);
214 /* TODO: close the batch connexion ? (can be used very soon) */
216 jcr->file_bsock->signal(BNET_EOD);
219 free_pool_memory(jobids);
220 free_pool_memory(nb);
226 * Do a backup of the specified FileSet
228 * Returns: false on failure
231 bool do_backup(JCR *jcr)
234 int tls_need = BNET_TLS_NONE;
239 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
240 return do_vbackup(jcr);
243 /* Print Job Start message */
244 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
245 edit_uint64(jcr->JobId, ed1), jcr->Job);
247 set_jcr_job_status(jcr, JS_Running);
248 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
249 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
250 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
255 * Open a message channel connection with the Storage
256 * daemon. This is to let him know that our client
257 * will be contacting him for a backup session.
260 Dmsg0(110, "Open connection with storage daemon\n");
261 set_jcr_job_status(jcr, JS_WaitSD);
263 * Start conversation with Storage daemon
265 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
269 * Now start a job with the Storage daemon
271 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
276 * Start the job prior to starting the message thread below
277 * to avoid two threads from using the BSOCK structure at
280 if (!bnet_fsend(jcr->store_bsock, "run")) {
285 * Now start a Storage daemon message thread. Note,
286 * this thread is used to provide the catalog services
287 * for the backup job, including inserting the attributes
288 * into the catalog. See catalog_update() in catreq.c
290 if (!start_storage_daemon_message_thread(jcr)) {
293 Dmsg0(150, "Storage daemon connection OK\n");
295 set_jcr_job_status(jcr, JS_WaitFD);
296 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
300 set_jcr_job_status(jcr, JS_Running);
301 fd = jcr->file_bsock;
303 if (!send_include_list(jcr)) {
307 if (!send_exclude_list(jcr)) {
311 if (!send_level_command(jcr)) {
316 * send Storage daemon address to the File daemon
319 if (store->SDDport == 0) {
320 store->SDDport = store->SDport;
323 /* TLS Requirement */
324 if (store->tls_enable) {
325 if (store->tls_require) {
326 tls_need = BNET_TLS_REQUIRED;
328 tls_need = BNET_TLS_OK;
332 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
333 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
337 if (!send_runscripts_commands(jcr)) {
342 * We re-update the job start record so that the start
343 * time is set after the run before job. This avoids
344 * that any files created by the run before job will
345 * be saved twice. They will be backed up in the current
346 * job, but not in the next one unless they are changed.
347 * Without this, they will be backed up in this job and
348 * in the next job run because in that case, their date
349 * is after the start of this run.
351 jcr->start_time = time(NULL);
352 jcr->jr.StartTime = jcr->start_time;
353 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
354 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
358 * If backup is in accurate mode, we send the list of
361 if (!send_accurate_current_files(jcr)) {
365 /* Send backup command */
366 fd->fsend(backupcmd);
367 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
371 /* Pickup Job termination data */
372 stat = wait_for_job_termination(jcr);
373 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
374 if (stat == JS_Terminated) {
375 backup_cleanup(jcr, stat);
380 /* Come here only after starting SD thread */
382 set_jcr_job_status(jcr, JS_ErrorTerminated);
383 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
385 wait_for_job_termination(jcr, FDConnectTimeout);
386 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
392 * Here we wait for the File daemon to signal termination,
393 * then we wait for the Storage daemon. When both
394 * are done, we return the job status.
395 * Also used by restore.c
397 int wait_for_job_termination(JCR *jcr, int timeout)
400 BSOCK *fd = jcr->file_bsock;
402 uint32_t JobFiles, JobErrors;
403 uint32_t JobWarnings = 0;
404 uint64_t ReadBytes = 0;
405 uint64_t JobBytes = 0;
410 set_jcr_job_status(jcr, JS_Running);
414 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
416 /* Wait for Client to terminate */
417 while ((n = bget_dirmsg(fd)) >= 0) {
419 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
420 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
421 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
422 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
424 set_jcr_job_status(jcr, jcr->FDJobStatus);
425 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
427 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
430 if (job_canceled(jcr)) {
435 stop_bsock_timer(tid);
438 if (is_bnet_error(fd)) {
439 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
440 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
442 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
445 /* Force cancel in SD if failing */
446 if (job_canceled(jcr) || !fd_ok) {
447 cancel_storage_daemon_job(jcr);
450 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
451 wait_for_storage_daemon_termination(jcr);
453 /* Return values from FD */
455 jcr->JobFiles = JobFiles;
456 jcr->JobErrors += JobErrors; /* Keep total errors */
457 jcr->ReadBytes = ReadBytes;
458 jcr->JobBytes = JobBytes;
459 jcr->JobWarnings = JobWarnings;
461 jcr->Encrypt = Encrypt;
463 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
466 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
467 // jcr->JobStatus, jcr->SDJobStatus);
469 /* Return the first error status we find Dir, FD, or SD */
470 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
471 jcr->FDJobStatus = JS_ErrorTerminated;
473 if (jcr->JobStatus != JS_Terminated) {
474 return jcr->JobStatus;
476 if (jcr->FDJobStatus != JS_Terminated) {
477 return jcr->FDJobStatus;
479 return jcr->SDJobStatus;
483 * Release resources allocated during backup.
485 void backup_cleanup(JCR *jcr, int TermCode)
487 char sdt[50], edt[50], schedt[50];
488 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
489 char ec6[30], ec7[30], ec8[30], elapsed[50];
490 char term_code[100], fd_term_msg[100], sd_term_msg[100];
491 const char *term_msg;
492 int msg_type = M_INFO;
495 double kbps, compression;
498 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
499 vbackup_cleanup(jcr, TermCode);
503 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
504 memset(&mr, 0, sizeof(mr));
505 memset(&cr, 0, sizeof(cr));
507 update_job_end(jcr, TermCode);
509 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
510 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
511 db_strerror(jcr->db));
512 set_jcr_job_status(jcr, JS_ErrorTerminated);
515 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
516 if (!db_get_client_record(jcr, jcr->db, &cr)) {
517 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
518 db_strerror(jcr->db));
521 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
522 if (!db_get_media_record(jcr, jcr->db, &mr)) {
523 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
524 mr.VolumeName, db_strerror(jcr->db));
525 set_jcr_job_status(jcr, JS_ErrorTerminated);
528 update_bootstrap_file(jcr);
530 switch (jcr->JobStatus) {
532 if (jcr->JobErrors || jcr->SDErrors) {
533 term_msg = _("Backup OK -- with warnings");
535 term_msg = _("Backup OK");
539 term_msg = _("Backup OK -- with warnings");
542 case JS_ErrorTerminated:
543 term_msg = _("*** Backup Error ***");
544 msg_type = M_ERROR; /* Generate error message */
545 if (jcr->store_bsock) {
546 jcr->store_bsock->signal(BNET_TERMINATE);
547 if (jcr->SD_msg_chan) {
548 pthread_cancel(jcr->SD_msg_chan);
553 term_msg = _("Backup Canceled");
554 if (jcr->store_bsock) {
555 jcr->store_bsock->signal(BNET_TERMINATE);
556 if (jcr->SD_msg_chan) {
557 pthread_cancel(jcr->SD_msg_chan);
562 term_msg = term_code;
563 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
566 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
567 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
568 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
569 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
573 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
575 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
577 * Note, if the job has erred, most likely it did not write any
578 * tape, so suppress this "error" message since in that case
579 * it is normal. Or look at it the other way, only for a
580 * normal exit should we complain about this error.
582 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
583 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
585 jcr->VolumeName[0] = 0; /* none */
588 if (jcr->ReadBytes == 0) {
589 bstrncpy(compress, "None", sizeof(compress));
591 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
592 if (compression < 0.5) {
593 bstrncpy(compress, "None", sizeof(compress));
595 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
598 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
599 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
601 // bmicrosleep(15, 0); /* for debugging SIGHUP */
603 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
604 " Build OS: %s %s %s\n"
607 " Backup Level: %s%s\n"
608 " Client: \"%s\" %s\n"
609 " FileSet: \"%s\" %s\n"
610 " Pool: \"%s\" (From %s)\n"
611 " Catalog: \"%s\" (From %s)\n"
612 " Storage: \"%s\" (From %s)\n"
613 " Scheduled time: %s\n"
616 " Elapsed time: %s\n"
618 " FD Files Written: %s\n"
619 " SD Files Written: %s\n"
620 " FD Bytes Written: %s (%sB)\n"
621 " SD Bytes Written: %s (%sB)\n"
623 " Software Compression: %s\n"
627 " Volume name(s): %s\n"
628 " Volume Session Id: %d\n"
629 " Volume Session Time: %d\n"
630 " Last Volume Bytes: %s (%sB)\n"
631 " Non-fatal FD errors: %d\n"
633 " FD termination status: %s\n"
634 " SD termination status: %s\n"
635 " Termination: %s\n\n"),
636 BACULA, my_name, VERSION, LSMDATE, edt,
637 HOST_OS, DISTNAME, DISTVER,
640 level_to_str(jcr->get_JobLevel()), jcr->since,
641 jcr->client->name(), cr.Uname,
642 jcr->fileset->name(), jcr->FSCreateTime,
643 jcr->pool->name(), jcr->pool_source,
644 jcr->catalog->name(), jcr->catalog_source,
645 jcr->wstore->name(), jcr->wstore_source,
649 edit_utime(RunTime, elapsed, sizeof(elapsed)),
651 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
652 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
653 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
654 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
655 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
656 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
659 jcr->VSS?_("yes"):_("no"),
660 jcr->Encrypt?_("yes"):_("no"),
661 jcr->accurate?_("yes"):_("no"),
665 edit_uint64_with_commas(mr.VolBytes, ec7),
666 edit_uint64_with_suffix(mr.VolBytes, ec8),
673 Dmsg0(100, "Leave backup_cleanup()\n");
676 void update_bootstrap_file(JCR *jcr)
678 /* Now update the bootstrap file if any */
679 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
680 jcr->job->WriteBootstrap) {
684 POOLMEM *fname = get_pool_memory(PM_FNAME);
685 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
687 VOL_PARAMS *VolParams = NULL;
689 char edt[50], ed1[50], ed2[50];
693 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
694 fd = bpipe ? bpipe->wfd : NULL;
696 /* ***FIXME*** handle BASE */
697 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
700 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
703 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
704 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
705 if (jcr->SDJobFiles != 0) {
706 set_jcr_job_status(jcr, JS_ErrorTerminated);
710 /* Start output with when and who wrote it */
711 bstrftimes(edt, sizeof(edt), time(NULL));
712 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
713 level_to_str(jcr->get_JobLevel()), jcr->since);
714 for (int i=0; i < VolCount; i++) {
715 /* Write the record */
716 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
717 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
718 if (VolParams[i].Slot > 0) {
719 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
721 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
722 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
723 fprintf(fd, "VolAddr=%s-%s\n",
724 edit_uint64(VolParams[i].StartAddr, ed1),
725 edit_uint64(VolParams[i].EndAddr, ed2));
726 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
727 VolParams[i].LastIndex);
739 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
740 "%s: ERR=%s\n"), fname, be.bstrerror());
741 set_jcr_job_status(jcr, JS_ErrorTerminated);
743 free_pool_memory(fname);