2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
106 /* Take all base jobs from job resource and find the
109 static void get_base_jobids(JCR *jcr, POOLMEM *jobids)
116 if (!jcr->job->base) {
120 memset(&jr, 0, sizeof(JOB_DBR));
121 jr.StartTime = jcr->jr.StartTime;
123 foreach_alist(job, jcr->job->base) {
124 bstrncpy(jr.Name, job->name(), sizeof(jr.Name));
125 db_get_base_jobid(jcr, jcr->db, &jr, &id);
129 pm_strcat(jobids, ",");
131 pm_strcat(jobids, edit_uint64(id, str_jobid));
137 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
139 static int accurate_list_handler(void *ctx, int num_fields, char **row)
141 JCR *jcr = (JCR *)ctx;
143 if (job_canceled(jcr)) {
147 if (row[2] > 0) { /* discard when file_index == 0 */
148 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
154 * Send current file list to FD
155 * DIR -> FD : accurate files=xxxx
156 * DIR -> FD : /path/to/file\0Lstat
157 * DIR -> FD : /path/to/dir/\0Lstat
161 bool send_accurate_current_files(JCR *jcr)
166 if (!jcr->accurate || job_canceled(jcr)) {
169 /* In base level, no previous job is used */
170 if (jcr->get_JobLevel() == L_BASE) {
174 POOLMEM *nb = get_pool_memory(PM_FNAME);
175 POOLMEM *jobids = get_pool_memory(PM_FNAME);
176 nb[0] = jobids[0] = '\0';
178 get_base_jobids(jcr, jobids);
180 /* On Full mode, if no previous base job, no accurate things */
181 if (jcr->get_JobLevel() == L_FULL && *jobids == 0) {
185 if (jcr->get_JobLevel() == L_FULL && *jobids != 0) {
186 db_init_base_file(jcr, jcr->db);
189 /* For Incr/Diff level, we search for older jobs */
190 if (jcr->get_JobLevel() != L_FULL) {
191 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
195 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
200 if (jcr->JobId) { /* display the message only for real jobs */
201 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
204 /* to be able to allocate the right size for htable */
205 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
206 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
207 Dmsg2(200, "jobids=%s nb=%s\n", jobids, nb);
208 jcr->file_bsock->fsend("accurate files=%s\n", nb);
210 if (!db_open_batch_connexion(jcr, jcr->db)) {
212 Jmsg0(jcr, M_FATAL, 0, "Can't get dedicate sql connexion");
216 db_get_file_list(jcr, jcr->db_batch, jobids, accurate_list_handler, (void *)jcr);
218 /* TODO: close the batch connexion ? (can be used very soon) */
220 jcr->file_bsock->signal(BNET_EOD);
223 free_pool_memory(jobids);
224 free_pool_memory(nb);
230 * Do a backup of the specified FileSet
232 * Returns: false on failure
235 bool do_backup(JCR *jcr)
238 int tls_need = BNET_TLS_NONE;
243 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
244 return do_vbackup(jcr);
247 /* Print Job Start message */
248 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
249 edit_uint64(jcr->JobId, ed1), jcr->Job);
251 set_jcr_job_status(jcr, JS_Running);
252 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
253 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
254 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
259 * Open a message channel connection with the Storage
260 * daemon. This is to let him know that our client
261 * will be contacting him for a backup session.
264 Dmsg0(110, "Open connection with storage daemon\n");
265 set_jcr_job_status(jcr, JS_WaitSD);
267 * Start conversation with Storage daemon
269 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
273 * Now start a job with the Storage daemon
275 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
280 * Start the job prior to starting the message thread below
281 * to avoid two threads from using the BSOCK structure at
284 if (!bnet_fsend(jcr->store_bsock, "run")) {
289 * Now start a Storage daemon message thread. Note,
290 * this thread is used to provide the catalog services
291 * for the backup job, including inserting the attributes
292 * into the catalog. See catalog_update() in catreq.c
294 if (!start_storage_daemon_message_thread(jcr)) {
297 Dmsg0(150, "Storage daemon connection OK\n");
299 set_jcr_job_status(jcr, JS_WaitFD);
300 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
304 set_jcr_job_status(jcr, JS_Running);
305 fd = jcr->file_bsock;
307 if (!send_include_list(jcr)) {
311 if (!send_exclude_list(jcr)) {
315 if (!send_level_command(jcr)) {
320 * send Storage daemon address to the File daemon
323 if (store->SDDport == 0) {
324 store->SDDport = store->SDport;
327 /* TLS Requirement */
328 if (store->tls_enable) {
329 if (store->tls_require) {
330 tls_need = BNET_TLS_REQUIRED;
332 tls_need = BNET_TLS_OK;
336 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
337 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
341 if (!send_runscripts_commands(jcr)) {
346 * We re-update the job start record so that the start
347 * time is set after the run before job. This avoids
348 * that any files created by the run before job will
349 * be saved twice. They will be backed up in the current
350 * job, but not in the next one unless they are changed.
351 * Without this, they will be backed up in this job and
352 * in the next job run because in that case, their date
353 * is after the start of this run.
355 jcr->start_time = time(NULL);
356 jcr->jr.StartTime = jcr->start_time;
357 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
358 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
362 * If backup is in accurate mode, we send the list of
365 if (!send_accurate_current_files(jcr)) {
369 /* Send backup command */
370 fd->fsend(backupcmd);
371 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
375 /* Pickup Job termination data */
376 stat = wait_for_job_termination(jcr);
377 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
378 if (stat == JS_Terminated) {
379 backup_cleanup(jcr, stat);
384 /* Come here only after starting SD thread */
386 set_jcr_job_status(jcr, JS_ErrorTerminated);
387 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
389 wait_for_job_termination(jcr, FDConnectTimeout);
390 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
396 * Here we wait for the File daemon to signal termination,
397 * then we wait for the Storage daemon. When both
398 * are done, we return the job status.
399 * Also used by restore.c
401 int wait_for_job_termination(JCR *jcr, int timeout)
404 BSOCK *fd = jcr->file_bsock;
406 uint32_t JobFiles, JobErrors;
407 uint32_t JobWarnings = 0;
408 uint64_t ReadBytes = 0;
409 uint64_t JobBytes = 0;
414 set_jcr_job_status(jcr, JS_Running);
418 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
420 /* Wait for Client to terminate */
421 while ((n = bget_dirmsg(fd)) >= 0) {
423 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
424 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
425 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
426 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
428 set_jcr_job_status(jcr, jcr->FDJobStatus);
429 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
431 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
434 if (job_canceled(jcr)) {
439 stop_bsock_timer(tid);
442 if (is_bnet_error(fd)) {
443 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
444 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
446 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
449 /* Force cancel in SD if failing */
450 if (job_canceled(jcr) || !fd_ok) {
451 cancel_storage_daemon_job(jcr);
454 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
455 wait_for_storage_daemon_termination(jcr);
457 /* Return values from FD */
459 jcr->JobFiles = JobFiles;
460 jcr->JobErrors += JobErrors; /* Keep total errors */
461 jcr->ReadBytes = ReadBytes;
462 jcr->JobBytes = JobBytes;
463 jcr->JobWarnings = JobWarnings;
465 jcr->Encrypt = Encrypt;
467 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
470 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
471 // jcr->JobStatus, jcr->SDJobStatus);
473 /* Return the first error status we find Dir, FD, or SD */
474 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
475 jcr->FDJobStatus = JS_ErrorTerminated;
477 if (jcr->JobStatus != JS_Terminated) {
478 return jcr->JobStatus;
480 if (jcr->FDJobStatus != JS_Terminated) {
481 return jcr->FDJobStatus;
483 return jcr->SDJobStatus;
487 * Release resources allocated during backup.
489 void backup_cleanup(JCR *jcr, int TermCode)
491 char sdt[50], edt[50], schedt[50];
492 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
493 char ec6[30], ec7[30], ec8[30], elapsed[50];
494 char term_code[100], fd_term_msg[100], sd_term_msg[100];
495 const char *term_msg;
496 int msg_type = M_INFO;
499 double kbps, compression;
502 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
503 vbackup_cleanup(jcr, TermCode);
507 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
508 memset(&mr, 0, sizeof(mr));
509 memset(&cr, 0, sizeof(cr));
511 update_job_end(jcr, TermCode);
513 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
514 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
515 db_strerror(jcr->db));
516 set_jcr_job_status(jcr, JS_ErrorTerminated);
519 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
520 if (!db_get_client_record(jcr, jcr->db, &cr)) {
521 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
522 db_strerror(jcr->db));
525 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
526 if (!db_get_media_record(jcr, jcr->db, &mr)) {
527 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
528 mr.VolumeName, db_strerror(jcr->db));
529 set_jcr_job_status(jcr, JS_ErrorTerminated);
532 update_bootstrap_file(jcr);
534 switch (jcr->JobStatus) {
536 if (jcr->JobErrors || jcr->SDErrors) {
537 term_msg = _("Backup OK -- with warnings");
539 term_msg = _("Backup OK");
543 term_msg = _("Backup OK -- with warnings");
546 case JS_ErrorTerminated:
547 term_msg = _("*** Backup Error ***");
548 msg_type = M_ERROR; /* Generate error message */
549 if (jcr->store_bsock) {
550 jcr->store_bsock->signal(BNET_TERMINATE);
551 if (jcr->SD_msg_chan) {
552 pthread_cancel(jcr->SD_msg_chan);
557 term_msg = _("Backup Canceled");
558 if (jcr->store_bsock) {
559 jcr->store_bsock->signal(BNET_TERMINATE);
560 if (jcr->SD_msg_chan) {
561 pthread_cancel(jcr->SD_msg_chan);
566 term_msg = term_code;
567 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
570 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
571 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
572 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
573 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
577 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
579 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
581 * Note, if the job has erred, most likely it did not write any
582 * tape, so suppress this "error" message since in that case
583 * it is normal. Or look at it the other way, only for a
584 * normal exit should we complain about this error.
586 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
587 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
589 jcr->VolumeName[0] = 0; /* none */
592 if (jcr->ReadBytes == 0) {
593 bstrncpy(compress, "None", sizeof(compress));
595 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
596 if (compression < 0.5) {
597 bstrncpy(compress, "None", sizeof(compress));
599 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
602 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
603 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
605 // bmicrosleep(15, 0); /* for debugging SIGHUP */
607 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
608 " Build OS: %s %s %s\n"
611 " Backup Level: %s%s\n"
612 " Client: \"%s\" %s\n"
613 " FileSet: \"%s\" %s\n"
614 " Pool: \"%s\" (From %s)\n"
615 " Catalog: \"%s\" (From %s)\n"
616 " Storage: \"%s\" (From %s)\n"
617 " Scheduled time: %s\n"
620 " Elapsed time: %s\n"
622 " FD Files Written: %s\n"
623 " SD Files Written: %s\n"
624 " FD Bytes Written: %s (%sB)\n"
625 " SD Bytes Written: %s (%sB)\n"
627 " Software Compression: %s\n"
631 " Volume name(s): %s\n"
632 " Volume Session Id: %d\n"
633 " Volume Session Time: %d\n"
634 " Last Volume Bytes: %s (%sB)\n"
635 " Non-fatal FD errors: %d\n"
637 " FD termination status: %s\n"
638 " SD termination status: %s\n"
639 " Termination: %s\n\n"),
640 BACULA, my_name, VERSION, LSMDATE, edt,
641 HOST_OS, DISTNAME, DISTVER,
644 level_to_str(jcr->get_JobLevel()), jcr->since,
645 jcr->client->name(), cr.Uname,
646 jcr->fileset->name(), jcr->FSCreateTime,
647 jcr->pool->name(), jcr->pool_source,
648 jcr->catalog->name(), jcr->catalog_source,
649 jcr->wstore->name(), jcr->wstore_source,
653 edit_utime(RunTime, elapsed, sizeof(elapsed)),
655 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
656 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
657 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
658 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
659 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
660 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
663 jcr->VSS?_("yes"):_("no"),
664 jcr->Encrypt?_("yes"):_("no"),
665 jcr->accurate?_("yes"):_("no"),
669 edit_uint64_with_commas(mr.VolBytes, ec7),
670 edit_uint64_with_suffix(mr.VolBytes, ec8),
677 Dmsg0(100, "Leave backup_cleanup()\n");
680 void update_bootstrap_file(JCR *jcr)
682 /* Now update the bootstrap file if any */
683 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
684 jcr->job->WriteBootstrap) {
688 POOLMEM *fname = get_pool_memory(PM_FNAME);
689 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
691 VOL_PARAMS *VolParams = NULL;
693 char edt[50], ed1[50], ed2[50];
697 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
698 fd = bpipe ? bpipe->wfd : NULL;
700 /* ***FIXME*** handle BASE */
701 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
704 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
707 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
708 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
709 if (jcr->SDJobFiles != 0) {
710 set_jcr_job_status(jcr, JS_ErrorTerminated);
714 /* Start output with when and who wrote it */
715 bstrftimes(edt, sizeof(edt), time(NULL));
716 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
717 level_to_str(jcr->get_JobLevel()), jcr->since);
718 for (int i=0; i < VolCount; i++) {
719 /* Write the record */
720 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
721 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
722 if (VolParams[i].Slot > 0) {
723 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
725 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
726 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
727 fprintf(fd, "VolAddr=%s-%s\n",
728 edit_uint64(VolParams[i].StartAddr, ed1),
729 edit_uint64(VolParams[i].EndAddr, ed2));
730 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
731 VolParams[i].LastIndex);
743 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
744 "%s: ERR=%s\n"), fname, be.bstrerror());
745 set_jcr_job_status(jcr, JS_ErrorTerminated);
747 free_pool_memory(fname);