2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
106 /* Take all base jobs from job resource and find the
109 static bool get_base_jobids(JCR *jcr, POOLMEM *jobids)
116 if (!jcr->job->base) {
117 return false; /* no base job, stop accurate */
120 memset(&jr, 0, sizeof(JOB_DBR));
121 jr.StartTime = jcr->jr.StartTime;
123 foreach_alist(job, jcr->job->base) {
124 bstrncpy(jr.Name, job->name(), sizeof(jr.Name));
125 db_get_base_jobid(jcr, jcr->db, &jr, &id);
129 pm_strcat(jobids, ",");
131 pm_strcat(jobids, edit_uint64(id, str_jobid));
135 return *jobids != '\0';
139 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
141 static int accurate_list_handler(void *ctx, int num_fields, char **row)
143 JCR *jcr = (JCR *)ctx;
145 if (job_canceled(jcr)) {
149 if (row[2] > 0) { /* discard when file_index == 0 */
150 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
156 * Send current file list to FD
157 * DIR -> FD : accurate files=xxxx
158 * DIR -> FD : /path/to/file\0Lstat
159 * DIR -> FD : /path/to/dir/\0Lstat
163 bool send_accurate_current_files(JCR *jcr)
168 if (!jcr->accurate || job_canceled(jcr)) {
171 /* In base level, no previous job is used */
172 if (jcr->get_JobLevel() == L_BASE) {
176 POOLMEM *nb = get_pool_memory(PM_FNAME);
177 POOLMEM *jobids = get_pool_memory(PM_FNAME);
178 nb[0] = jobids[0] = '\0';
180 if (jcr->get_JobLevel() == L_FULL) {
181 /* On Full mode, if no previous base job, no accurate things */
182 if (!get_base_jobids(jcr, jobids)) {
185 db_create_base_file_list(jcr, jcr->db, jobids);
188 /* For Incr/Diff level, we search for older jobs */
189 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
191 /* We are in Incr/Diff, but no Full to build the accurate list... */
194 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
199 if (jcr->JobId) { /* display the message only for real jobs */
200 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
203 /* to be able to allocate the right size for htable */
204 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
205 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
206 Dmsg2(200, "jobids=%s nb=%s\n", jobids, nb);
207 jcr->file_bsock->fsend("accurate files=%s\n", nb);
209 if (jcr->get_JobLevel() == L_FULL) {
210 db_get_base_file_list(jcr, jcr->db, accurate_list_handler, (void *)jcr);
213 if (!db_open_batch_connexion(jcr, jcr->db)) {
215 Jmsg0(jcr, M_FATAL, 0, "Can't get dedicate sql connexion");
218 db_get_file_list(jcr, jcr->db_batch, jobids, accurate_list_handler, (void *)jcr);
221 /* TODO: close the batch connexion ? (can be used very soon) */
223 jcr->file_bsock->signal(BNET_EOD);
226 free_pool_memory(jobids);
227 free_pool_memory(nb);
233 * Do a backup of the specified FileSet
235 * Returns: false on failure
238 bool do_backup(JCR *jcr)
241 int tls_need = BNET_TLS_NONE;
246 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
247 return do_vbackup(jcr);
250 /* Print Job Start message */
251 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
252 edit_uint64(jcr->JobId, ed1), jcr->Job);
254 set_jcr_job_status(jcr, JS_Running);
255 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
256 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
257 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
262 * Open a message channel connection with the Storage
263 * daemon. This is to let him know that our client
264 * will be contacting him for a backup session.
267 Dmsg0(110, "Open connection with storage daemon\n");
268 set_jcr_job_status(jcr, JS_WaitSD);
270 * Start conversation with Storage daemon
272 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
276 * Now start a job with the Storage daemon
278 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
283 * Start the job prior to starting the message thread below
284 * to avoid two threads from using the BSOCK structure at
287 if (!bnet_fsend(jcr->store_bsock, "run")) {
292 * Now start a Storage daemon message thread. Note,
293 * this thread is used to provide the catalog services
294 * for the backup job, including inserting the attributes
295 * into the catalog. See catalog_update() in catreq.c
297 if (!start_storage_daemon_message_thread(jcr)) {
300 Dmsg0(150, "Storage daemon connection OK\n");
302 set_jcr_job_status(jcr, JS_WaitFD);
303 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
307 set_jcr_job_status(jcr, JS_Running);
308 fd = jcr->file_bsock;
310 if (!send_include_list(jcr)) {
314 if (!send_exclude_list(jcr)) {
318 if (!send_level_command(jcr)) {
323 * send Storage daemon address to the File daemon
326 if (store->SDDport == 0) {
327 store->SDDport = store->SDport;
330 /* TLS Requirement */
331 if (store->tls_enable) {
332 if (store->tls_require) {
333 tls_need = BNET_TLS_REQUIRED;
335 tls_need = BNET_TLS_OK;
339 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
340 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
344 if (!send_runscripts_commands(jcr)) {
349 * We re-update the job start record so that the start
350 * time is set after the run before job. This avoids
351 * that any files created by the run before job will
352 * be saved twice. They will be backed up in the current
353 * job, but not in the next one unless they are changed.
354 * Without this, they will be backed up in this job and
355 * in the next job run because in that case, their date
356 * is after the start of this run.
358 jcr->start_time = time(NULL);
359 jcr->jr.StartTime = jcr->start_time;
360 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
361 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
365 * If backup is in accurate mode, we send the list of
368 if (!send_accurate_current_files(jcr)) {
372 /* Send backup command */
373 fd->fsend(backupcmd);
374 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
378 /* Pickup Job termination data */
379 stat = wait_for_job_termination(jcr);
380 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
382 if (jcr->get_JobLevel() == L_FULL && jcr->job->base) {
383 db_commit_base_file_attributes_record(jcr, jcr->db);
384 db_cleanup_base_file(jcr, jcr->db);
387 if (stat == JS_Terminated) {
388 backup_cleanup(jcr, stat);
393 /* Come here only after starting SD thread */
395 set_jcr_job_status(jcr, JS_ErrorTerminated);
396 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
398 wait_for_job_termination(jcr, FDConnectTimeout);
399 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
405 * Here we wait for the File daemon to signal termination,
406 * then we wait for the Storage daemon. When both
407 * are done, we return the job status.
408 * Also used by restore.c
410 int wait_for_job_termination(JCR *jcr, int timeout)
413 BSOCK *fd = jcr->file_bsock;
415 uint32_t JobFiles, JobErrors;
416 uint32_t JobWarnings = 0;
417 uint64_t ReadBytes = 0;
418 uint64_t JobBytes = 0;
423 set_jcr_job_status(jcr, JS_Running);
427 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
429 /* Wait for Client to terminate */
430 while ((n = bget_dirmsg(fd)) >= 0) {
432 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
433 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
434 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
435 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
437 set_jcr_job_status(jcr, jcr->FDJobStatus);
438 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
440 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
443 if (job_canceled(jcr)) {
448 stop_bsock_timer(tid);
451 if (is_bnet_error(fd)) {
452 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
453 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
455 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
458 /* Force cancel in SD if failing */
459 if (job_canceled(jcr) || !fd_ok) {
460 cancel_storage_daemon_job(jcr);
463 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
464 wait_for_storage_daemon_termination(jcr);
466 /* Return values from FD */
468 jcr->JobFiles = JobFiles;
469 jcr->JobErrors += JobErrors; /* Keep total errors */
470 jcr->ReadBytes = ReadBytes;
471 jcr->JobBytes = JobBytes;
472 jcr->JobWarnings = JobWarnings;
474 jcr->Encrypt = Encrypt;
476 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
479 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
480 // jcr->JobStatus, jcr->SDJobStatus);
482 /* Return the first error status we find Dir, FD, or SD */
483 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
484 jcr->FDJobStatus = JS_ErrorTerminated;
486 if (jcr->JobStatus != JS_Terminated) {
487 return jcr->JobStatus;
489 if (jcr->FDJobStatus != JS_Terminated) {
490 return jcr->FDJobStatus;
492 return jcr->SDJobStatus;
496 * Release resources allocated during backup.
498 void backup_cleanup(JCR *jcr, int TermCode)
500 char sdt[50], edt[50], schedt[50];
501 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
502 char ec6[30], ec7[30], ec8[30], elapsed[50];
503 char term_code[100], fd_term_msg[100], sd_term_msg[100];
504 const char *term_msg;
505 int msg_type = M_INFO;
508 double kbps, compression;
511 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
512 vbackup_cleanup(jcr, TermCode);
516 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
517 memset(&mr, 0, sizeof(mr));
518 memset(&cr, 0, sizeof(cr));
520 update_job_end(jcr, TermCode);
522 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
523 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
524 db_strerror(jcr->db));
525 set_jcr_job_status(jcr, JS_ErrorTerminated);
528 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
529 if (!db_get_client_record(jcr, jcr->db, &cr)) {
530 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
531 db_strerror(jcr->db));
534 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
535 if (!db_get_media_record(jcr, jcr->db, &mr)) {
536 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
537 mr.VolumeName, db_strerror(jcr->db));
538 set_jcr_job_status(jcr, JS_ErrorTerminated);
541 update_bootstrap_file(jcr);
543 switch (jcr->JobStatus) {
545 if (jcr->JobErrors || jcr->SDErrors) {
546 term_msg = _("Backup OK -- with warnings");
548 term_msg = _("Backup OK");
552 term_msg = _("Backup OK -- with warnings");
555 case JS_ErrorTerminated:
556 term_msg = _("*** Backup Error ***");
557 msg_type = M_ERROR; /* Generate error message */
558 if (jcr->store_bsock) {
559 jcr->store_bsock->signal(BNET_TERMINATE);
560 if (jcr->SD_msg_chan) {
561 pthread_cancel(jcr->SD_msg_chan);
566 term_msg = _("Backup Canceled");
567 if (jcr->store_bsock) {
568 jcr->store_bsock->signal(BNET_TERMINATE);
569 if (jcr->SD_msg_chan) {
570 pthread_cancel(jcr->SD_msg_chan);
575 term_msg = term_code;
576 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
579 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
580 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
581 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
582 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
586 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
588 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
590 * Note, if the job has erred, most likely it did not write any
591 * tape, so suppress this "error" message since in that case
592 * it is normal. Or look at it the other way, only for a
593 * normal exit should we complain about this error.
595 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
596 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
598 jcr->VolumeName[0] = 0; /* none */
601 if (jcr->ReadBytes == 0) {
602 bstrncpy(compress, "None", sizeof(compress));
604 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
605 if (compression < 0.5) {
606 bstrncpy(compress, "None", sizeof(compress));
608 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
611 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
612 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
614 // bmicrosleep(15, 0); /* for debugging SIGHUP */
616 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
617 " Build OS: %s %s %s\n"
620 " Backup Level: %s%s\n"
621 " Client: \"%s\" %s\n"
622 " FileSet: \"%s\" %s\n"
623 " Pool: \"%s\" (From %s)\n"
624 " Catalog: \"%s\" (From %s)\n"
625 " Storage: \"%s\" (From %s)\n"
626 " Scheduled time: %s\n"
629 " Elapsed time: %s\n"
631 " FD Files Written: %s\n"
632 " SD Files Written: %s\n"
633 " FD Bytes Written: %s (%sB)\n"
634 " SD Bytes Written: %s (%sB)\n"
636 " Software Compression: %s\n"
640 " Volume name(s): %s\n"
641 " Volume Session Id: %d\n"
642 " Volume Session Time: %d\n"
643 " Last Volume Bytes: %s (%sB)\n"
644 " Non-fatal FD errors: %d\n"
646 " FD termination status: %s\n"
647 " SD termination status: %s\n"
648 " Termination: %s\n\n"),
649 BACULA, my_name, VERSION, LSMDATE, edt,
650 HOST_OS, DISTNAME, DISTVER,
653 level_to_str(jcr->get_JobLevel()), jcr->since,
654 jcr->client->name(), cr.Uname,
655 jcr->fileset->name(), jcr->FSCreateTime,
656 jcr->pool->name(), jcr->pool_source,
657 jcr->catalog->name(), jcr->catalog_source,
658 jcr->wstore->name(), jcr->wstore_source,
662 edit_utime(RunTime, elapsed, sizeof(elapsed)),
664 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
665 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
666 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
667 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
668 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
669 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
672 jcr->VSS?_("yes"):_("no"),
673 jcr->Encrypt?_("yes"):_("no"),
674 jcr->accurate?_("yes"):_("no"),
678 edit_uint64_with_commas(mr.VolBytes, ec7),
679 edit_uint64_with_suffix(mr.VolBytes, ec8),
686 Dmsg0(100, "Leave backup_cleanup()\n");
689 void update_bootstrap_file(JCR *jcr)
691 /* Now update the bootstrap file if any */
692 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
693 jcr->job->WriteBootstrap) {
697 POOLMEM *fname = get_pool_memory(PM_FNAME);
698 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
700 VOL_PARAMS *VolParams = NULL;
702 char edt[50], ed1[50], ed2[50];
706 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
707 fd = bpipe ? bpipe->wfd : NULL;
709 /* ***FIXME*** handle BASE */
710 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
713 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
716 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
717 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
718 if (jcr->SDJobFiles != 0) {
719 set_jcr_job_status(jcr, JS_ErrorTerminated);
723 /* Start output with when and who wrote it */
724 bstrftimes(edt, sizeof(edt), time(NULL));
725 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
726 level_to_str(jcr->get_JobLevel()), jcr->since);
727 for (int i=0; i < VolCount; i++) {
728 /* Write the record */
729 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
730 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
731 if (VolParams[i].Slot > 0) {
732 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
734 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
735 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
736 fprintf(fd, "VolAddr=%s-%s\n",
737 edit_uint64(VolParams[i].StartAddr, ed1),
738 edit_uint64(VolParams[i].EndAddr, ed2));
739 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
740 VolParams[i].LastIndex);
752 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
753 "%s: ERR=%s\n"), fname, be.bstrerror());
754 set_jcr_job_status(jcr, JS_ErrorTerminated);
756 free_pool_memory(fname);