2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
106 /* Take all base jobs from job resource and find the
109 static bool get_base_jobids(JCR *jcr, POOLMEM *jobids)
116 if (!jcr->job->base) {
117 return false; /* no base job, stop accurate */
120 memset(&jr, 0, sizeof(JOB_DBR));
121 jr.StartTime = jcr->jr.StartTime;
123 foreach_alist(job, jcr->job->base) {
124 bstrncpy(jr.Name, job->name(), sizeof(jr.Name));
125 db_get_base_jobid(jcr, jcr->db, &jr, &id);
129 pm_strcat(jobids, ",");
131 pm_strcat(jobids, edit_uint64(id, str_jobid));
135 return *jobids != '\0';
139 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
141 static int accurate_list_handler(void *ctx, int num_fields, char **row)
143 JCR *jcr = (JCR *)ctx;
145 if (job_canceled(jcr)) {
149 if (row[2] > 0) { /* discard when file_index == 0 */
150 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
156 * Send current file list to FD
157 * DIR -> FD : accurate files=xxxx
158 * DIR -> FD : /path/to/file\0Lstat
159 * DIR -> FD : /path/to/dir/\0Lstat
163 bool send_accurate_current_files(JCR *jcr)
168 if (!jcr->accurate || job_canceled(jcr)) {
171 /* In base level, no previous job is used */
172 if (jcr->get_JobLevel() == L_BASE) {
176 POOLMEM *nb = get_pool_memory(PM_FNAME);
177 POOLMEM *jobids = get_pool_memory(PM_FNAME);
178 nb[0] = jobids[0] = '\0';
180 if (jcr->get_JobLevel() == L_FULL) {
181 /* On Full mode, if no previous base job, no accurate things */
182 if (!get_base_jobids(jcr, jobids)) {
185 db_create_base_file_list(jcr, jcr->db, jobids);
189 /* For Incr/Diff level, we search for older jobs */
190 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
192 /* We are in Incr/Diff, but no Full to build the accurate list... */
195 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
200 if (jcr->JobId) { /* display the message only for real jobs */
201 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
204 /* to be able to allocate the right size for htable */
205 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
206 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
207 Dmsg2(200, "jobids=%s nb=%s\n", jobids, nb);
208 jcr->file_bsock->fsend("accurate files=%s\n", nb);
210 if (jcr->get_JobLevel() == L_FULL) {
211 db_get_base_file_list(jcr, jcr->db, accurate_list_handler, (void *)jcr);
214 if (!db_open_batch_connexion(jcr, jcr->db)) {
216 Jmsg0(jcr, M_FATAL, 0, "Can't get dedicate sql connexion");
219 db_get_file_list(jcr, jcr->db_batch, jobids, accurate_list_handler, (void *)jcr);
222 /* TODO: close the batch connexion ? (can be used very soon) */
224 jcr->file_bsock->signal(BNET_EOD);
227 free_pool_memory(jobids);
228 free_pool_memory(nb);
234 * Do a backup of the specified FileSet
236 * Returns: false on failure
239 bool do_backup(JCR *jcr)
242 int tls_need = BNET_TLS_NONE;
247 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
248 return do_vbackup(jcr);
251 /* Print Job Start message */
252 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
253 edit_uint64(jcr->JobId, ed1), jcr->Job);
255 set_jcr_job_status(jcr, JS_Running);
256 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
257 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
258 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
263 * Open a message channel connection with the Storage
264 * daemon. This is to let him know that our client
265 * will be contacting him for a backup session.
268 Dmsg0(110, "Open connection with storage daemon\n");
269 set_jcr_job_status(jcr, JS_WaitSD);
271 * Start conversation with Storage daemon
273 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
277 * Now start a job with the Storage daemon
279 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
284 * Start the job prior to starting the message thread below
285 * to avoid two threads from using the BSOCK structure at
288 if (!bnet_fsend(jcr->store_bsock, "run")) {
293 * Now start a Storage daemon message thread. Note,
294 * this thread is used to provide the catalog services
295 * for the backup job, including inserting the attributes
296 * into the catalog. See catalog_update() in catreq.c
298 if (!start_storage_daemon_message_thread(jcr)) {
301 Dmsg0(150, "Storage daemon connection OK\n");
303 set_jcr_job_status(jcr, JS_WaitFD);
304 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
308 set_jcr_job_status(jcr, JS_Running);
309 fd = jcr->file_bsock;
311 if (!send_include_list(jcr)) {
315 if (!send_exclude_list(jcr)) {
319 if (!send_level_command(jcr)) {
324 * send Storage daemon address to the File daemon
327 if (store->SDDport == 0) {
328 store->SDDport = store->SDport;
331 /* TLS Requirement */
332 if (store->tls_enable) {
333 if (store->tls_require) {
334 tls_need = BNET_TLS_REQUIRED;
336 tls_need = BNET_TLS_OK;
340 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
341 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
345 if (!send_runscripts_commands(jcr)) {
350 * We re-update the job start record so that the start
351 * time is set after the run before job. This avoids
352 * that any files created by the run before job will
353 * be saved twice. They will be backed up in the current
354 * job, but not in the next one unless they are changed.
355 * Without this, they will be backed up in this job and
356 * in the next job run because in that case, their date
357 * is after the start of this run.
359 jcr->start_time = time(NULL);
360 jcr->jr.StartTime = jcr->start_time;
361 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
362 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
366 * If backup is in accurate mode, we send the list of
369 if (!send_accurate_current_files(jcr)) {
373 /* Send backup command */
374 fd->fsend(backupcmd);
375 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
379 /* Pickup Job termination data */
380 stat = wait_for_job_termination(jcr);
381 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
383 if (jcr->get_JobLevel() == L_FULL && jcr->job->base) {
384 db_commit_base_file_attributes_record(jcr, jcr->db);
385 db_cleanup_base_file(jcr, jcr->db);
388 if (stat == JS_Terminated) {
389 backup_cleanup(jcr, stat);
394 /* Come here only after starting SD thread */
396 set_jcr_job_status(jcr, JS_ErrorTerminated);
397 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
399 wait_for_job_termination(jcr, FDConnectTimeout);
400 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
406 * Here we wait for the File daemon to signal termination,
407 * then we wait for the Storage daemon. When both
408 * are done, we return the job status.
409 * Also used by restore.c
411 int wait_for_job_termination(JCR *jcr, int timeout)
414 BSOCK *fd = jcr->file_bsock;
416 uint32_t JobFiles, JobErrors;
417 uint32_t JobWarnings = 0;
418 uint64_t ReadBytes = 0;
419 uint64_t JobBytes = 0;
424 set_jcr_job_status(jcr, JS_Running);
428 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
430 /* Wait for Client to terminate */
431 while ((n = bget_dirmsg(fd)) >= 0) {
433 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
434 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
435 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
436 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
438 set_jcr_job_status(jcr, jcr->FDJobStatus);
439 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
441 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
444 if (job_canceled(jcr)) {
449 stop_bsock_timer(tid);
452 if (is_bnet_error(fd)) {
453 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
454 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
456 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
459 /* Force cancel in SD if failing */
460 if (job_canceled(jcr) || !fd_ok) {
461 cancel_storage_daemon_job(jcr);
464 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
465 wait_for_storage_daemon_termination(jcr);
467 /* Return values from FD */
469 jcr->JobFiles = JobFiles;
470 jcr->JobErrors += JobErrors; /* Keep total errors */
471 jcr->ReadBytes = ReadBytes;
472 jcr->JobBytes = JobBytes;
473 jcr->JobWarnings = JobWarnings;
475 jcr->Encrypt = Encrypt;
477 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
480 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
481 // jcr->JobStatus, jcr->SDJobStatus);
483 /* Return the first error status we find Dir, FD, or SD */
484 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
485 jcr->FDJobStatus = JS_ErrorTerminated;
487 if (jcr->JobStatus != JS_Terminated) {
488 return jcr->JobStatus;
490 if (jcr->FDJobStatus != JS_Terminated) {
491 return jcr->FDJobStatus;
493 return jcr->SDJobStatus;
497 * Release resources allocated during backup.
499 void backup_cleanup(JCR *jcr, int TermCode)
501 char sdt[50], edt[50], schedt[50];
502 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
503 char ec6[30], ec7[30], ec8[30], elapsed[50];
504 char term_code[100], fd_term_msg[100], sd_term_msg[100];
505 const char *term_msg;
506 int msg_type = M_INFO;
509 double kbps, compression;
512 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
513 vbackup_cleanup(jcr, TermCode);
517 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
518 memset(&mr, 0, sizeof(mr));
519 memset(&cr, 0, sizeof(cr));
521 update_job_end(jcr, TermCode);
523 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
524 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
525 db_strerror(jcr->db));
526 set_jcr_job_status(jcr, JS_ErrorTerminated);
529 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
530 if (!db_get_client_record(jcr, jcr->db, &cr)) {
531 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
532 db_strerror(jcr->db));
535 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
536 if (!db_get_media_record(jcr, jcr->db, &mr)) {
537 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
538 mr.VolumeName, db_strerror(jcr->db));
539 set_jcr_job_status(jcr, JS_ErrorTerminated);
542 update_bootstrap_file(jcr);
544 switch (jcr->JobStatus) {
546 if (jcr->JobErrors || jcr->SDErrors) {
547 term_msg = _("Backup OK -- with warnings");
549 term_msg = _("Backup OK");
553 term_msg = _("Backup OK -- with warnings");
556 case JS_ErrorTerminated:
557 term_msg = _("*** Backup Error ***");
558 msg_type = M_ERROR; /* Generate error message */
559 if (jcr->store_bsock) {
560 jcr->store_bsock->signal(BNET_TERMINATE);
561 if (jcr->SD_msg_chan) {
562 pthread_cancel(jcr->SD_msg_chan);
567 term_msg = _("Backup Canceled");
568 if (jcr->store_bsock) {
569 jcr->store_bsock->signal(BNET_TERMINATE);
570 if (jcr->SD_msg_chan) {
571 pthread_cancel(jcr->SD_msg_chan);
576 term_msg = term_code;
577 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
580 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
581 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
582 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
583 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
587 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
589 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
591 * Note, if the job has erred, most likely it did not write any
592 * tape, so suppress this "error" message since in that case
593 * it is normal. Or look at it the other way, only for a
594 * normal exit should we complain about this error.
596 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
597 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
599 jcr->VolumeName[0] = 0; /* none */
602 if (jcr->ReadBytes == 0) {
603 bstrncpy(compress, "None", sizeof(compress));
605 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
606 if (compression < 0.5) {
607 bstrncpy(compress, "None", sizeof(compress));
609 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
612 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
613 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
615 // bmicrosleep(15, 0); /* for debugging SIGHUP */
617 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
618 " Build OS: %s %s %s\n"
621 " Backup Level: %s%s\n"
622 " Client: \"%s\" %s\n"
623 " FileSet: \"%s\" %s\n"
624 " Pool: \"%s\" (From %s)\n"
625 " Catalog: \"%s\" (From %s)\n"
626 " Storage: \"%s\" (From %s)\n"
627 " Scheduled time: %s\n"
630 " Elapsed time: %s\n"
632 " FD Files Written: %s\n"
633 " SD Files Written: %s\n"
634 " FD Bytes Written: %s (%sB)\n"
635 " SD Bytes Written: %s (%sB)\n"
637 " Software Compression: %s\n"
641 " Volume name(s): %s\n"
642 " Volume Session Id: %d\n"
643 " Volume Session Time: %d\n"
644 " Last Volume Bytes: %s (%sB)\n"
645 " Non-fatal FD errors: %d\n"
647 " FD termination status: %s\n"
648 " SD termination status: %s\n"
649 " Termination: %s\n\n"),
650 BACULA, my_name, VERSION, LSMDATE, edt,
651 HOST_OS, DISTNAME, DISTVER,
654 level_to_str(jcr->get_JobLevel()), jcr->since,
655 jcr->client->name(), cr.Uname,
656 jcr->fileset->name(), jcr->FSCreateTime,
657 jcr->pool->name(), jcr->pool_source,
658 jcr->catalog->name(), jcr->catalog_source,
659 jcr->wstore->name(), jcr->wstore_source,
663 edit_utime(RunTime, elapsed, sizeof(elapsed)),
665 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
666 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
667 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
668 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
669 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
670 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
673 jcr->VSS?_("yes"):_("no"),
674 jcr->Encrypt?_("yes"):_("no"),
675 jcr->accurate?_("yes"):_("no"),
679 edit_uint64_with_commas(mr.VolBytes, ec7),
680 edit_uint64_with_suffix(mr.VolBytes, ec8),
687 Dmsg0(100, "Leave backup_cleanup()\n");
690 void update_bootstrap_file(JCR *jcr)
692 /* Now update the bootstrap file if any */
693 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
694 jcr->job->WriteBootstrap) {
698 POOLMEM *fname = get_pool_memory(PM_FNAME);
699 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
701 VOL_PARAMS *VolParams = NULL;
703 char edt[50], ed1[50], ed2[50];
707 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
708 fd = bpipe ? bpipe->wfd : NULL;
710 /* ***FIXME*** handle BASE */
711 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
714 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
717 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
718 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
719 if (jcr->SDJobFiles != 0) {
720 set_jcr_job_status(jcr, JS_ErrorTerminated);
724 /* Start output with when and who wrote it */
725 bstrftimes(edt, sizeof(edt), time(NULL));
726 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
727 level_to_str(jcr->get_JobLevel()), jcr->since);
728 for (int i=0; i < VolCount; i++) {
729 /* Write the record */
730 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
731 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
732 if (VolParams[i].Slot > 0) {
733 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
735 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
736 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
737 fprintf(fd, "VolAddr=%s-%s\n",
738 edit_uint64(VolParams[i].StartAddr, ed1),
739 edit_uint64(VolParams[i].EndAddr, ed2));
740 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
741 VolParams[i].LastIndex);
753 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
754 "%s: ERR=%s\n"), fname, be.bstrerror());
755 set_jcr_job_status(jcr, JS_ErrorTerminated);
757 free_pool_memory(fname);