2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
106 /* Take all base jobs from job resource and find the
109 static bool get_base_jobids(JCR *jcr, POOLMEM *jobids)
116 if (!jcr->job->base) {
117 return false; /* no base job, stop accurate */
120 memset(&jr, 0, sizeof(JOB_DBR));
121 jr.StartTime = jcr->jr.StartTime;
123 foreach_alist(job, jcr->job->base) {
124 bstrncpy(jr.Name, job->name(), sizeof(jr.Name));
125 db_get_base_jobid(jcr, jcr->db, &jr, &id);
129 pm_strcat(jobids, ",");
131 pm_strcat(jobids, edit_uint64(id, str_jobid));
135 return *jobids != '\0';
139 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
141 static int accurate_list_handler(void *ctx, int num_fields, char **row)
143 JCR *jcr = (JCR *)ctx;
145 if (job_canceled(jcr)) {
149 if (row[2] > 0) { /* discard when file_index == 0 */
150 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
156 * Send current file list to FD
157 * DIR -> FD : accurate files=xxxx
158 * DIR -> FD : /path/to/file\0Lstat
159 * DIR -> FD : /path/to/dir/\0Lstat
163 bool send_accurate_current_files(JCR *jcr)
168 if (!jcr->accurate || job_canceled(jcr)) {
171 /* In base level, no previous job is used */
172 if (jcr->get_JobLevel() == L_BASE) {
176 POOLMEM *nb = get_pool_memory(PM_FNAME);
177 POOLMEM *jobids = get_pool_memory(PM_FNAME);
178 nb[0] = jobids[0] = '\0';
180 if (jcr->get_JobLevel() == L_FULL) {
181 /* On Full mode, if no previous base job, no accurate things */
182 if (!get_base_jobids(jcr, jobids)) {
186 Jmsg(jcr, M_INFO, 0, _("Using BaseJobId(s): %s\n"), jobids);
189 /* For Incr/Diff level, we search for older jobs */
190 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
192 /* We are in Incr/Diff, but no Full to build the accurate list... */
195 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
200 if (jcr->JobId) { /* display the message only for real jobs */
201 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
204 /* to be able to allocate the right size for htable */
205 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
206 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
207 Dmsg2(0, "jobids=%s nb=%s\n", jobids, nb);
208 jcr->file_bsock->fsend("accurate files=%s\n", nb);
210 if (!db_open_batch_connexion(jcr, jcr->db)) {
212 Jmsg0(jcr, M_FATAL, 0, "Can't get dedicate sql connexion");
217 db_create_base_file_list(jcr, jcr->db_batch, jobids);
218 db_get_base_file_list(jcr, jcr->db_batch,
219 accurate_list_handler, (void *)jcr);
222 db_get_file_list(jcr, jcr->db_batch, jobids,
223 accurate_list_handler, (void *)jcr);
226 /* TODO: close the batch connexion ? (can be used very soon) */
228 jcr->file_bsock->signal(BNET_EOD);
231 free_pool_memory(jobids);
232 free_pool_memory(nb);
238 * Do a backup of the specified FileSet
240 * Returns: false on failure
243 bool do_backup(JCR *jcr)
246 int tls_need = BNET_TLS_NONE;
251 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
252 return do_vbackup(jcr);
255 /* Print Job Start message */
256 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
257 edit_uint64(jcr->JobId, ed1), jcr->Job);
259 set_jcr_job_status(jcr, JS_Running);
260 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
261 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
262 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
267 * Open a message channel connection with the Storage
268 * daemon. This is to let him know that our client
269 * will be contacting him for a backup session.
272 Dmsg0(110, "Open connection with storage daemon\n");
273 set_jcr_job_status(jcr, JS_WaitSD);
275 * Start conversation with Storage daemon
277 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
281 * Now start a job with the Storage daemon
283 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
288 * Start the job prior to starting the message thread below
289 * to avoid two threads from using the BSOCK structure at
292 if (!bnet_fsend(jcr->store_bsock, "run")) {
297 * Now start a Storage daemon message thread. Note,
298 * this thread is used to provide the catalog services
299 * for the backup job, including inserting the attributes
300 * into the catalog. See catalog_update() in catreq.c
302 if (!start_storage_daemon_message_thread(jcr)) {
305 Dmsg0(150, "Storage daemon connection OK\n");
307 set_jcr_job_status(jcr, JS_WaitFD);
308 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
312 set_jcr_job_status(jcr, JS_Running);
313 fd = jcr->file_bsock;
315 if (!send_include_list(jcr)) {
319 if (!send_exclude_list(jcr)) {
323 if (!send_level_command(jcr)) {
328 * send Storage daemon address to the File daemon
331 if (store->SDDport == 0) {
332 store->SDDport = store->SDport;
335 /* TLS Requirement */
336 if (store->tls_enable) {
337 if (store->tls_require) {
338 tls_need = BNET_TLS_REQUIRED;
340 tls_need = BNET_TLS_OK;
344 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
345 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
349 if (!send_runscripts_commands(jcr)) {
354 * We re-update the job start record so that the start
355 * time is set after the run before job. This avoids
356 * that any files created by the run before job will
357 * be saved twice. They will be backed up in the current
358 * job, but not in the next one unless they are changed.
359 * Without this, they will be backed up in this job and
360 * in the next job run because in that case, their date
361 * is after the start of this run.
363 jcr->start_time = time(NULL);
364 jcr->jr.StartTime = jcr->start_time;
365 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
366 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
370 * If backup is in accurate mode, we send the list of
373 if (!send_accurate_current_files(jcr)) {
377 /* Send backup command */
378 fd->fsend(backupcmd);
379 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
383 /* Pickup Job termination data */
384 stat = wait_for_job_termination(jcr);
385 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
388 !db_commit_base_file_attributes_record(jcr, jcr->db_batch))
390 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db_batch));
393 if (stat == JS_Terminated) {
394 backup_cleanup(jcr, stat);
399 /* Come here only after starting SD thread */
401 set_jcr_job_status(jcr, JS_ErrorTerminated);
402 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
404 wait_for_job_termination(jcr, FDConnectTimeout);
405 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
411 * Here we wait for the File daemon to signal termination,
412 * then we wait for the Storage daemon. When both
413 * are done, we return the job status.
414 * Also used by restore.c
416 int wait_for_job_termination(JCR *jcr, int timeout)
419 BSOCK *fd = jcr->file_bsock;
421 uint32_t JobFiles, JobErrors;
422 uint32_t JobWarnings = 0;
423 uint64_t ReadBytes = 0;
424 uint64_t JobBytes = 0;
429 set_jcr_job_status(jcr, JS_Running);
433 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
435 /* Wait for Client to terminate */
436 while ((n = bget_dirmsg(fd)) >= 0) {
438 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
439 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
440 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
441 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
443 set_jcr_job_status(jcr, jcr->FDJobStatus);
444 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
446 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
449 if (job_canceled(jcr)) {
454 stop_bsock_timer(tid);
457 if (is_bnet_error(fd)) {
458 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
459 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
461 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
464 /* Force cancel in SD if failing */
465 if (job_canceled(jcr) || !fd_ok) {
466 cancel_storage_daemon_job(jcr);
469 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
470 wait_for_storage_daemon_termination(jcr);
472 /* Return values from FD */
474 jcr->JobFiles = JobFiles;
475 jcr->JobErrors += JobErrors; /* Keep total errors */
476 jcr->ReadBytes = ReadBytes;
477 jcr->JobBytes = JobBytes;
478 jcr->JobWarnings = JobWarnings;
480 jcr->Encrypt = Encrypt;
482 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
485 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
486 // jcr->JobStatus, jcr->SDJobStatus);
488 /* Return the first error status we find Dir, FD, or SD */
489 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
490 jcr->FDJobStatus = JS_ErrorTerminated;
492 if (jcr->JobStatus != JS_Terminated) {
493 return jcr->JobStatus;
495 if (jcr->FDJobStatus != JS_Terminated) {
496 return jcr->FDJobStatus;
498 return jcr->SDJobStatus;
502 * Release resources allocated during backup.
504 void backup_cleanup(JCR *jcr, int TermCode)
506 char sdt[50], edt[50], schedt[50];
507 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
508 char ec6[30], ec7[30], ec8[30], elapsed[50];
509 char term_code[100], fd_term_msg[100], sd_term_msg[100];
510 const char *term_msg;
511 int msg_type = M_INFO;
514 double kbps, compression;
517 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
518 vbackup_cleanup(jcr, TermCode);
522 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
523 memset(&mr, 0, sizeof(mr));
524 memset(&cr, 0, sizeof(cr));
526 update_job_end(jcr, TermCode);
528 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
529 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
530 db_strerror(jcr->db));
531 set_jcr_job_status(jcr, JS_ErrorTerminated);
534 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
535 if (!db_get_client_record(jcr, jcr->db, &cr)) {
536 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
537 db_strerror(jcr->db));
540 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
541 if (!db_get_media_record(jcr, jcr->db, &mr)) {
542 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
543 mr.VolumeName, db_strerror(jcr->db));
544 set_jcr_job_status(jcr, JS_ErrorTerminated);
547 update_bootstrap_file(jcr);
549 switch (jcr->JobStatus) {
551 if (jcr->JobErrors || jcr->SDErrors) {
552 term_msg = _("Backup OK -- with warnings");
554 term_msg = _("Backup OK");
558 term_msg = _("Backup OK -- with warnings");
561 case JS_ErrorTerminated:
562 term_msg = _("*** Backup Error ***");
563 msg_type = M_ERROR; /* Generate error message */
564 if (jcr->store_bsock) {
565 jcr->store_bsock->signal(BNET_TERMINATE);
566 if (jcr->SD_msg_chan) {
567 pthread_cancel(jcr->SD_msg_chan);
572 term_msg = _("Backup Canceled");
573 if (jcr->store_bsock) {
574 jcr->store_bsock->signal(BNET_TERMINATE);
575 if (jcr->SD_msg_chan) {
576 pthread_cancel(jcr->SD_msg_chan);
581 term_msg = term_code;
582 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
585 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
586 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
587 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
588 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
592 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
594 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
596 * Note, if the job has erred, most likely it did not write any
597 * tape, so suppress this "error" message since in that case
598 * it is normal. Or look at it the other way, only for a
599 * normal exit should we complain about this error.
601 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
602 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
604 jcr->VolumeName[0] = 0; /* none */
607 if (jcr->ReadBytes == 0) {
608 bstrncpy(compress, "None", sizeof(compress));
610 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
611 if (compression < 0.5) {
612 bstrncpy(compress, "None", sizeof(compress));
614 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
617 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
618 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
620 // bmicrosleep(15, 0); /* for debugging SIGHUP */
622 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
623 " Build OS: %s %s %s\n"
626 " Backup Level: %s%s\n"
627 " Client: \"%s\" %s\n"
628 " FileSet: \"%s\" %s\n"
629 " Pool: \"%s\" (From %s)\n"
630 " Catalog: \"%s\" (From %s)\n"
631 " Storage: \"%s\" (From %s)\n"
632 " Scheduled time: %s\n"
635 " Elapsed time: %s\n"
637 " FD Files Written: %s\n"
638 " SD Files Written: %s\n"
639 " FD Bytes Written: %s (%sB)\n"
640 " SD Bytes Written: %s (%sB)\n"
642 " Software Compression: %s\n"
646 " Volume name(s): %s\n"
647 " Volume Session Id: %d\n"
648 " Volume Session Time: %d\n"
649 " Last Volume Bytes: %s (%sB)\n"
650 " Non-fatal FD errors: %d\n"
652 " FD termination status: %s\n"
653 " SD termination status: %s\n"
654 " Termination: %s\n\n"),
655 BACULA, my_name, VERSION, LSMDATE, edt,
656 HOST_OS, DISTNAME, DISTVER,
659 level_to_str(jcr->get_JobLevel()), jcr->since,
660 jcr->client->name(), cr.Uname,
661 jcr->fileset->name(), jcr->FSCreateTime,
662 jcr->pool->name(), jcr->pool_source,
663 jcr->catalog->name(), jcr->catalog_source,
664 jcr->wstore->name(), jcr->wstore_source,
668 edit_utime(RunTime, elapsed, sizeof(elapsed)),
670 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
671 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
672 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
673 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
674 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
675 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
678 jcr->VSS?_("yes"):_("no"),
679 jcr->Encrypt?_("yes"):_("no"),
680 jcr->accurate?_("yes"):_("no"),
684 edit_uint64_with_commas(mr.VolBytes, ec7),
685 edit_uint64_with_suffix(mr.VolBytes, ec8),
692 Dmsg0(100, "Leave backup_cleanup()\n");
695 void update_bootstrap_file(JCR *jcr)
697 /* Now update the bootstrap file if any */
698 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
699 jcr->job->WriteBootstrap) {
703 POOLMEM *fname = get_pool_memory(PM_FNAME);
704 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
706 VOL_PARAMS *VolParams = NULL;
708 char edt[50], ed1[50], ed2[50];
712 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
713 fd = bpipe ? bpipe->wfd : NULL;
715 /* ***FIXME*** handle BASE */
716 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
719 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
722 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
723 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
724 if (jcr->SDJobFiles != 0) {
725 set_jcr_job_status(jcr, JS_ErrorTerminated);
729 /* Start output with when and who wrote it */
730 bstrftimes(edt, sizeof(edt), time(NULL));
731 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
732 level_to_str(jcr->get_JobLevel()), jcr->since);
733 for (int i=0; i < VolCount; i++) {
734 /* Write the record */
735 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
736 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
737 if (VolParams[i].Slot > 0) {
738 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
740 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
741 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
742 fprintf(fd, "VolAddr=%s-%s\n",
743 edit_uint64(VolParams[i].StartAddr, ed1),
744 edit_uint64(VolParams[i].EndAddr, ed2));
745 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
746 VolParams[i].LastIndex);
758 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
759 "%s: ERR=%s\n"), fname, be.bstrerror());
760 set_jcr_job_status(jcr, JS_ErrorTerminated);
762 free_pool_memory(fname);