2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
107 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
109 static int accurate_list_handler(void *ctx, int num_fields, char **row)
111 JCR *jcr = (JCR *)ctx;
113 if (job_canceled(jcr)) {
117 if (row[2] > 0) { /* discard when file_index == 0 */
118 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
124 * Send current file list to FD
125 * DIR -> FD : accurate files=xxxx
126 * DIR -> FD : /path/to/file\0Lstat
127 * DIR -> FD : /path/to/dir/\0Lstat
131 bool send_accurate_current_files(JCR *jcr)
135 if (!jcr->accurate || job_canceled(jcr) || jcr->get_JobLevel()==L_FULL) {
138 POOLMEM *jobids = get_pool_memory(PM_FNAME);
140 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
143 free_pool_memory(jobids);
144 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
147 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
149 /* to be able to allocate the right size for htable */
150 POOLMEM *nb = get_pool_memory(PM_FNAME);
151 *nb = 0; /* clear buffer */
152 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
153 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
154 Dmsg2(200, "jobids=%s nb=%s\n", jobids, nb);
155 jcr->file_bsock->fsend("accurate files=%s\n", nb);
157 if (!db_open_batch_connexion(jcr, jcr->db)) {
158 Jmsg0(jcr, M_FATAL, 0, "Can't get dedicate sql connexion");
162 db_get_file_list(jcr, jcr->db_batch, jobids, accurate_list_handler, (void *)jcr);
164 /* TODO: close the batch connexion ? (can be used very soon) */
166 free_pool_memory(jobids);
167 free_pool_memory(nb);
169 jcr->file_bsock->signal(BNET_EOD);
175 * Do a backup of the specified FileSet
177 * Returns: false on failure
180 bool do_backup(JCR *jcr)
183 int tls_need = BNET_TLS_NONE;
188 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
189 return do_vbackup(jcr);
192 /* Print Job Start message */
193 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
194 edit_uint64(jcr->JobId, ed1), jcr->Job);
196 set_jcr_job_status(jcr, JS_Running);
197 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
198 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
199 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
204 * Open a message channel connection with the Storage
205 * daemon. This is to let him know that our client
206 * will be contacting him for a backup session.
209 Dmsg0(110, "Open connection with storage daemon\n");
210 set_jcr_job_status(jcr, JS_WaitSD);
212 * Start conversation with Storage daemon
214 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
218 * Now start a job with the Storage daemon
220 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
225 * Start the job prior to starting the message thread below
226 * to avoid two threads from using the BSOCK structure at
229 if (!bnet_fsend(jcr->store_bsock, "run")) {
234 * Now start a Storage daemon message thread. Note,
235 * this thread is used to provide the catalog services
236 * for the backup job, including inserting the attributes
237 * into the catalog. See catalog_update() in catreq.c
239 if (!start_storage_daemon_message_thread(jcr)) {
242 Dmsg0(150, "Storage daemon connection OK\n");
244 set_jcr_job_status(jcr, JS_WaitFD);
245 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
249 set_jcr_job_status(jcr, JS_Running);
250 fd = jcr->file_bsock;
252 if (!send_include_list(jcr)) {
256 if (!send_exclude_list(jcr)) {
260 if (!send_level_command(jcr)) {
265 * send Storage daemon address to the File daemon
268 if (store->SDDport == 0) {
269 store->SDDport = store->SDport;
272 /* TLS Requirement */
273 if (store->tls_enable) {
274 if (store->tls_require) {
275 tls_need = BNET_TLS_REQUIRED;
277 tls_need = BNET_TLS_OK;
281 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
282 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
286 if (!send_runscripts_commands(jcr)) {
291 * We re-update the job start record so that the start
292 * time is set after the run before job. This avoids
293 * that any files created by the run before job will
294 * be saved twice. They will be backed up in the current
295 * job, but not in the next one unless they are changed.
296 * Without this, they will be backed up in this job and
297 * in the next job run because in that case, their date
298 * is after the start of this run.
300 jcr->start_time = time(NULL);
301 jcr->jr.StartTime = jcr->start_time;
302 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
303 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
307 * If backup is in accurate mode, we send the list of
310 if (!send_accurate_current_files(jcr)) {
314 /* Send backup command */
315 fd->fsend(backupcmd);
316 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
320 /* Pickup Job termination data */
321 stat = wait_for_job_termination(jcr);
322 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
323 if (stat == JS_Terminated) {
324 backup_cleanup(jcr, stat);
329 /* Come here only after starting SD thread */
331 set_jcr_job_status(jcr, JS_ErrorTerminated);
332 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
334 wait_for_job_termination(jcr, FDConnectTimeout);
335 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
341 * Here we wait for the File daemon to signal termination,
342 * then we wait for the Storage daemon. When both
343 * are done, we return the job status.
344 * Also used by restore.c
346 int wait_for_job_termination(JCR *jcr, int timeout)
349 BSOCK *fd = jcr->file_bsock;
351 uint32_t JobFiles, JobErrors;
352 uint32_t JobWarnings = 0;
353 uint64_t ReadBytes = 0;
354 uint64_t JobBytes = 0;
359 set_jcr_job_status(jcr, JS_Running);
363 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
365 /* Wait for Client to terminate */
366 while ((n = bget_dirmsg(fd)) >= 0) {
368 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
369 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
370 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
371 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
373 set_jcr_job_status(jcr, jcr->FDJobStatus);
374 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
376 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
379 if (job_canceled(jcr)) {
384 stop_bsock_timer(tid);
387 if (is_bnet_error(fd)) {
388 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
389 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
391 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
394 /* Force cancel in SD if failing */
395 if (job_canceled(jcr) || !fd_ok) {
396 cancel_storage_daemon_job(jcr);
399 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
400 wait_for_storage_daemon_termination(jcr);
402 /* Return values from FD */
404 jcr->JobFiles = JobFiles;
405 jcr->JobErrors += JobErrors; /* Keep total errors */
406 jcr->ReadBytes = ReadBytes;
407 jcr->JobBytes = JobBytes;
408 jcr->JobWarnings = JobWarnings;
410 jcr->Encrypt = Encrypt;
412 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
415 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
416 // jcr->JobStatus, jcr->SDJobStatus);
418 /* Return the first error status we find Dir, FD, or SD */
419 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
420 jcr->FDJobStatus = JS_ErrorTerminated;
422 if (jcr->JobStatus != JS_Terminated) {
423 return jcr->JobStatus;
425 if (jcr->FDJobStatus != JS_Terminated) {
426 return jcr->FDJobStatus;
428 return jcr->SDJobStatus;
432 * Release resources allocated during backup.
434 void backup_cleanup(JCR *jcr, int TermCode)
436 char sdt[50], edt[50], schedt[50];
437 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
438 char ec6[30], ec7[30], ec8[30], elapsed[50];
439 char term_code[100], fd_term_msg[100], sd_term_msg[100];
440 const char *term_msg;
441 int msg_type = M_INFO;
444 double kbps, compression;
447 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
448 vbackup_cleanup(jcr, TermCode);
452 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
453 memset(&mr, 0, sizeof(mr));
454 memset(&cr, 0, sizeof(cr));
456 update_job_end(jcr, TermCode);
458 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
459 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
460 db_strerror(jcr->db));
461 set_jcr_job_status(jcr, JS_ErrorTerminated);
464 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
465 if (!db_get_client_record(jcr, jcr->db, &cr)) {
466 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
467 db_strerror(jcr->db));
470 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
471 if (!db_get_media_record(jcr, jcr->db, &mr)) {
472 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
473 mr.VolumeName, db_strerror(jcr->db));
474 set_jcr_job_status(jcr, JS_ErrorTerminated);
477 update_bootstrap_file(jcr);
479 switch (jcr->JobStatus) {
481 if (jcr->JobErrors || jcr->SDErrors) {
482 term_msg = _("Backup OK -- with warnings");
484 term_msg = _("Backup OK");
488 term_msg = _("Backup OK -- with warnings");
491 case JS_ErrorTerminated:
492 term_msg = _("*** Backup Error ***");
493 msg_type = M_ERROR; /* Generate error message */
494 if (jcr->store_bsock) {
495 jcr->store_bsock->signal(BNET_TERMINATE);
496 if (jcr->SD_msg_chan) {
497 pthread_cancel(jcr->SD_msg_chan);
502 term_msg = _("Backup Canceled");
503 if (jcr->store_bsock) {
504 jcr->store_bsock->signal(BNET_TERMINATE);
505 if (jcr->SD_msg_chan) {
506 pthread_cancel(jcr->SD_msg_chan);
511 term_msg = term_code;
512 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
515 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
516 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
517 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
518 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
522 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
524 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
526 * Note, if the job has erred, most likely it did not write any
527 * tape, so suppress this "error" message since in that case
528 * it is normal. Or look at it the other way, only for a
529 * normal exit should we complain about this error.
531 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
532 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
534 jcr->VolumeName[0] = 0; /* none */
537 if (jcr->ReadBytes == 0) {
538 bstrncpy(compress, "None", sizeof(compress));
540 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
541 if (compression < 0.5) {
542 bstrncpy(compress, "None", sizeof(compress));
544 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
547 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
548 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
550 // bmicrosleep(15, 0); /* for debugging SIGHUP */
552 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
553 " Build OS: %s %s %s\n"
556 " Backup Level: %s%s\n"
557 " Client: \"%s\" %s\n"
558 " FileSet: \"%s\" %s\n"
559 " Pool: \"%s\" (From %s)\n"
560 " Catalog: \"%s\" (From %s)\n"
561 " Storage: \"%s\" (From %s)\n"
562 " Scheduled time: %s\n"
565 " Elapsed time: %s\n"
567 " FD Files Written: %s\n"
568 " SD Files Written: %s\n"
569 " FD Bytes Written: %s (%sB)\n"
570 " SD Bytes Written: %s (%sB)\n"
572 " Software Compression: %s\n"
576 " Volume name(s): %s\n"
577 " Volume Session Id: %d\n"
578 " Volume Session Time: %d\n"
579 " Last Volume Bytes: %s (%sB)\n"
580 " Non-fatal FD errors: %d\n"
582 " FD termination status: %s\n"
583 " SD termination status: %s\n"
584 " Termination: %s\n\n"),
585 BACULA, my_name, VERSION, LSMDATE, edt,
586 HOST_OS, DISTNAME, DISTVER,
589 level_to_str(jcr->get_JobLevel()), jcr->since,
590 jcr->client->name(), cr.Uname,
591 jcr->fileset->name(), jcr->FSCreateTime,
592 jcr->pool->name(), jcr->pool_source,
593 jcr->catalog->name(), jcr->catalog_source,
594 jcr->wstore->name(), jcr->wstore_source,
598 edit_utime(RunTime, elapsed, sizeof(elapsed)),
600 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
601 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
602 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
603 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
604 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
605 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
608 jcr->VSS?_("yes"):_("no"),
609 jcr->Encrypt?_("yes"):_("no"),
610 jcr->accurate?_("yes"):_("no"),
614 edit_uint64_with_commas(mr.VolBytes, ec7),
615 edit_uint64_with_suffix(mr.VolBytes, ec8),
622 Dmsg0(100, "Leave backup_cleanup()\n");
625 void update_bootstrap_file(JCR *jcr)
627 /* Now update the bootstrap file if any */
628 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
629 jcr->job->WriteBootstrap) {
633 POOLMEM *fname = get_pool_memory(PM_FNAME);
634 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
636 VOL_PARAMS *VolParams = NULL;
638 char edt[50], ed1[50], ed2[50];
642 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
643 fd = bpipe ? bpipe->wfd : NULL;
645 /* ***FIXME*** handle BASE */
646 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
649 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
652 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
653 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
654 if (jcr->SDJobFiles != 0) {
655 set_jcr_job_status(jcr, JS_ErrorTerminated);
659 /* Start output with when and who wrote it */
660 bstrftimes(edt, sizeof(edt), time(NULL));
661 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
662 level_to_str(jcr->get_JobLevel()), jcr->since);
663 for (int i=0; i < VolCount; i++) {
664 /* Write the record */
665 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
666 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
667 if (VolParams[i].Slot > 0) {
668 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
670 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
671 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
672 fprintf(fd, "VolAddr=%s-%s\n",
673 edit_uint64(VolParams[i].StartAddr, ed1),
674 edit_uint64(VolParams[i].EndAddr, ed2));
675 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
676 VolParams[i].LastIndex);
688 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
689 "%s: ERR=%s\n"), fname, be.bstrerror());
690 set_jcr_job_status(jcr, JS_ErrorTerminated);
692 free_pool_memory(fname);