2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
106 /* Take all base jobs from job resource and find the
109 static bool get_base_jobids(JCR *jcr, POOLMEM *jobids)
116 if (!jcr->job->base) {
117 return false; /* no base job, stop accurate */
120 memset(&jr, 0, sizeof(JOB_DBR));
121 jr.StartTime = jcr->jr.StartTime;
123 foreach_alist(job, jcr->job->base) {
124 bstrncpy(jr.Name, job->name(), sizeof(jr.Name));
125 db_get_base_jobid(jcr, jcr->db, &jr, &id);
129 pm_strcat(jobids, ",");
131 pm_strcat(jobids, edit_uint64(id, str_jobid));
135 return *jobids != '\0';
139 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
141 static int accurate_list_handler(void *ctx, int num_fields, char **row)
143 JCR *jcr = (JCR *)ctx;
145 if (job_canceled(jcr)) {
149 if (row[2] > 0) { /* discard when file_index == 0 */
150 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
156 * Send current file list to FD
157 * DIR -> FD : accurate files=xxxx
158 * DIR -> FD : /path/to/file\0Lstat
159 * DIR -> FD : /path/to/dir/\0Lstat
163 bool send_accurate_current_files(JCR *jcr)
168 if (!jcr->accurate || job_canceled(jcr)) {
171 /* In base level, no previous job is used */
172 if (jcr->get_JobLevel() == L_BASE) {
176 POOLMEM *nb = get_pool_memory(PM_FNAME);
177 POOLMEM *jobids = get_pool_memory(PM_FNAME);
178 nb[0] = jobids[0] = '\0';
180 if (jcr->get_JobLevel() == L_FULL) {
181 /* On Full mode, if no previous base job, no accurate things */
182 if (!get_base_jobids(jcr, jobids)) {
188 /* For Incr/Diff level, we search for older jobs */
189 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
191 /* We are in Incr/Diff, but no Full to build the accurate list... */
194 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
199 if (jcr->JobId) { /* display the message only for real jobs */
200 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
203 /* to be able to allocate the right size for htable */
204 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
205 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
206 Dmsg2(200, "jobids=%s nb=%s\n", jobids, nb);
207 jcr->file_bsock->fsend("accurate files=%s\n", nb);
209 if (!db_open_batch_connexion(jcr, jcr->db)) {
211 Jmsg0(jcr, M_FATAL, 0, "Can't get dedicate sql connexion");
215 if (jcr->get_JobLevel() == L_FULL) {
216 db_create_base_file_list(jcr, jcr->db_batch, jobids);
217 db_get_base_file_list(jcr, jcr->db_batch,
218 accurate_list_handler, (void *)jcr);
221 db_get_file_list(jcr, jcr->db_batch, jobids,
222 accurate_list_handler, (void *)jcr);
225 /* TODO: close the batch connexion ? (can be used very soon) */
227 jcr->file_bsock->signal(BNET_EOD);
230 free_pool_memory(jobids);
231 free_pool_memory(nb);
237 * Do a backup of the specified FileSet
239 * Returns: false on failure
242 bool do_backup(JCR *jcr)
245 int tls_need = BNET_TLS_NONE;
250 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
251 return do_vbackup(jcr);
254 /* Print Job Start message */
255 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
256 edit_uint64(jcr->JobId, ed1), jcr->Job);
258 set_jcr_job_status(jcr, JS_Running);
259 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
260 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
261 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
266 * Open a message channel connection with the Storage
267 * daemon. This is to let him know that our client
268 * will be contacting him for a backup session.
271 Dmsg0(110, "Open connection with storage daemon\n");
272 set_jcr_job_status(jcr, JS_WaitSD);
274 * Start conversation with Storage daemon
276 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
280 * Now start a job with the Storage daemon
282 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
287 * Start the job prior to starting the message thread below
288 * to avoid two threads from using the BSOCK structure at
291 if (!bnet_fsend(jcr->store_bsock, "run")) {
296 * Now start a Storage daemon message thread. Note,
297 * this thread is used to provide the catalog services
298 * for the backup job, including inserting the attributes
299 * into the catalog. See catalog_update() in catreq.c
301 if (!start_storage_daemon_message_thread(jcr)) {
304 Dmsg0(150, "Storage daemon connection OK\n");
306 set_jcr_job_status(jcr, JS_WaitFD);
307 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
311 set_jcr_job_status(jcr, JS_Running);
312 fd = jcr->file_bsock;
314 if (!send_include_list(jcr)) {
318 if (!send_exclude_list(jcr)) {
322 if (!send_level_command(jcr)) {
327 * send Storage daemon address to the File daemon
330 if (store->SDDport == 0) {
331 store->SDDport = store->SDport;
334 /* TLS Requirement */
335 if (store->tls_enable) {
336 if (store->tls_require) {
337 tls_need = BNET_TLS_REQUIRED;
339 tls_need = BNET_TLS_OK;
343 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
344 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
348 if (!send_runscripts_commands(jcr)) {
353 * We re-update the job start record so that the start
354 * time is set after the run before job. This avoids
355 * that any files created by the run before job will
356 * be saved twice. They will be backed up in the current
357 * job, but not in the next one unless they are changed.
358 * Without this, they will be backed up in this job and
359 * in the next job run because in that case, their date
360 * is after the start of this run.
362 jcr->start_time = time(NULL);
363 jcr->jr.StartTime = jcr->start_time;
364 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
365 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
369 * If backup is in accurate mode, we send the list of
372 if (!send_accurate_current_files(jcr)) {
376 /* Send backup command */
377 fd->fsend(backupcmd);
378 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
382 /* Pickup Job termination data */
383 stat = wait_for_job_termination(jcr);
384 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
387 db_commit_base_file_attributes_record(jcr, jcr->db_batch);
388 db_cleanup_base_file(jcr, jcr->db_batch);
391 if (stat == JS_Terminated) {
392 backup_cleanup(jcr, stat);
397 /* Come here only after starting SD thread */
399 set_jcr_job_status(jcr, JS_ErrorTerminated);
400 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
402 wait_for_job_termination(jcr, FDConnectTimeout);
403 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
409 * Here we wait for the File daemon to signal termination,
410 * then we wait for the Storage daemon. When both
411 * are done, we return the job status.
412 * Also used by restore.c
414 int wait_for_job_termination(JCR *jcr, int timeout)
417 BSOCK *fd = jcr->file_bsock;
419 uint32_t JobFiles, JobErrors;
420 uint32_t JobWarnings = 0;
421 uint64_t ReadBytes = 0;
422 uint64_t JobBytes = 0;
427 set_jcr_job_status(jcr, JS_Running);
431 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
433 /* Wait for Client to terminate */
434 while ((n = bget_dirmsg(fd)) >= 0) {
436 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
437 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
438 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
439 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
441 set_jcr_job_status(jcr, jcr->FDJobStatus);
442 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
444 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
447 if (job_canceled(jcr)) {
452 stop_bsock_timer(tid);
455 if (is_bnet_error(fd)) {
456 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
457 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
459 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
462 /* Force cancel in SD if failing */
463 if (job_canceled(jcr) || !fd_ok) {
464 cancel_storage_daemon_job(jcr);
467 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
468 wait_for_storage_daemon_termination(jcr);
470 /* Return values from FD */
472 jcr->JobFiles = JobFiles;
473 jcr->JobErrors += JobErrors; /* Keep total errors */
474 jcr->ReadBytes = ReadBytes;
475 jcr->JobBytes = JobBytes;
476 jcr->JobWarnings = JobWarnings;
478 jcr->Encrypt = Encrypt;
480 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
483 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
484 // jcr->JobStatus, jcr->SDJobStatus);
486 /* Return the first error status we find Dir, FD, or SD */
487 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
488 jcr->FDJobStatus = JS_ErrorTerminated;
490 if (jcr->JobStatus != JS_Terminated) {
491 return jcr->JobStatus;
493 if (jcr->FDJobStatus != JS_Terminated) {
494 return jcr->FDJobStatus;
496 return jcr->SDJobStatus;
500 * Release resources allocated during backup.
502 void backup_cleanup(JCR *jcr, int TermCode)
504 char sdt[50], edt[50], schedt[50];
505 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
506 char ec6[30], ec7[30], ec8[30], elapsed[50];
507 char term_code[100], fd_term_msg[100], sd_term_msg[100];
508 const char *term_msg;
509 int msg_type = M_INFO;
512 double kbps, compression;
515 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
516 vbackup_cleanup(jcr, TermCode);
520 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
521 memset(&mr, 0, sizeof(mr));
522 memset(&cr, 0, sizeof(cr));
524 update_job_end(jcr, TermCode);
526 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
527 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
528 db_strerror(jcr->db));
529 set_jcr_job_status(jcr, JS_ErrorTerminated);
532 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
533 if (!db_get_client_record(jcr, jcr->db, &cr)) {
534 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
535 db_strerror(jcr->db));
538 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
539 if (!db_get_media_record(jcr, jcr->db, &mr)) {
540 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
541 mr.VolumeName, db_strerror(jcr->db));
542 set_jcr_job_status(jcr, JS_ErrorTerminated);
545 update_bootstrap_file(jcr);
547 switch (jcr->JobStatus) {
549 if (jcr->JobErrors || jcr->SDErrors) {
550 term_msg = _("Backup OK -- with warnings");
552 term_msg = _("Backup OK");
556 term_msg = _("Backup OK -- with warnings");
559 case JS_ErrorTerminated:
560 term_msg = _("*** Backup Error ***");
561 msg_type = M_ERROR; /* Generate error message */
562 if (jcr->store_bsock) {
563 jcr->store_bsock->signal(BNET_TERMINATE);
564 if (jcr->SD_msg_chan) {
565 pthread_cancel(jcr->SD_msg_chan);
570 term_msg = _("Backup Canceled");
571 if (jcr->store_bsock) {
572 jcr->store_bsock->signal(BNET_TERMINATE);
573 if (jcr->SD_msg_chan) {
574 pthread_cancel(jcr->SD_msg_chan);
579 term_msg = term_code;
580 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
583 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
584 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
585 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
586 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
590 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
592 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
594 * Note, if the job has erred, most likely it did not write any
595 * tape, so suppress this "error" message since in that case
596 * it is normal. Or look at it the other way, only for a
597 * normal exit should we complain about this error.
599 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
600 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
602 jcr->VolumeName[0] = 0; /* none */
605 if (jcr->ReadBytes == 0) {
606 bstrncpy(compress, "None", sizeof(compress));
608 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
609 if (compression < 0.5) {
610 bstrncpy(compress, "None", sizeof(compress));
612 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
615 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
616 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
618 // bmicrosleep(15, 0); /* for debugging SIGHUP */
620 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
621 " Build OS: %s %s %s\n"
624 " Backup Level: %s%s\n"
625 " Client: \"%s\" %s\n"
626 " FileSet: \"%s\" %s\n"
627 " Pool: \"%s\" (From %s)\n"
628 " Catalog: \"%s\" (From %s)\n"
629 " Storage: \"%s\" (From %s)\n"
630 " Scheduled time: %s\n"
633 " Elapsed time: %s\n"
635 " FD Files Written: %s\n"
636 " SD Files Written: %s\n"
637 " FD Bytes Written: %s (%sB)\n"
638 " SD Bytes Written: %s (%sB)\n"
640 " Software Compression: %s\n"
644 " Volume name(s): %s\n"
645 " Volume Session Id: %d\n"
646 " Volume Session Time: %d\n"
647 " Last Volume Bytes: %s (%sB)\n"
648 " Non-fatal FD errors: %d\n"
650 " FD termination status: %s\n"
651 " SD termination status: %s\n"
652 " Termination: %s\n\n"),
653 BACULA, my_name, VERSION, LSMDATE, edt,
654 HOST_OS, DISTNAME, DISTVER,
657 level_to_str(jcr->get_JobLevel()), jcr->since,
658 jcr->client->name(), cr.Uname,
659 jcr->fileset->name(), jcr->FSCreateTime,
660 jcr->pool->name(), jcr->pool_source,
661 jcr->catalog->name(), jcr->catalog_source,
662 jcr->wstore->name(), jcr->wstore_source,
666 edit_utime(RunTime, elapsed, sizeof(elapsed)),
668 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
669 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
670 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
671 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
672 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
673 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
676 jcr->VSS?_("yes"):_("no"),
677 jcr->Encrypt?_("yes"):_("no"),
678 jcr->accurate?_("yes"):_("no"),
682 edit_uint64_with_commas(mr.VolBytes, ec7),
683 edit_uint64_with_suffix(mr.VolBytes, ec8),
690 Dmsg0(100, "Leave backup_cleanup()\n");
693 void update_bootstrap_file(JCR *jcr)
695 /* Now update the bootstrap file if any */
696 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
697 jcr->job->WriteBootstrap) {
701 POOLMEM *fname = get_pool_memory(PM_FNAME);
702 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
704 VOL_PARAMS *VolParams = NULL;
706 char edt[50], ed1[50], ed2[50];
710 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
711 fd = bpipe ? bpipe->wfd : NULL;
713 /* ***FIXME*** handle BASE */
714 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
717 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
720 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
721 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
722 if (jcr->SDJobFiles != 0) {
723 set_jcr_job_status(jcr, JS_ErrorTerminated);
727 /* Start output with when and who wrote it */
728 bstrftimes(edt, sizeof(edt), time(NULL));
729 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
730 level_to_str(jcr->get_JobLevel()), jcr->since);
731 for (int i=0; i < VolCount; i++) {
732 /* Write the record */
733 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
734 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
735 if (VolParams[i].Slot > 0) {
736 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
738 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
739 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
740 fprintf(fd, "VolAddr=%s-%s\n",
741 edit_uint64(VolParams[i].StartAddr, ed1),
742 edit_uint64(VolParams[i].EndAddr, ed2));
743 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
744 VolParams[i].LastIndex);
756 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
757 "%s: ERR=%s\n"), fname, be.bstrerror());
758 set_jcr_job_status(jcr, JS_ErrorTerminated);
760 free_pool_memory(fname);