2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2008 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
107 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
109 static int accurate_list_handler(void *ctx, int num_fields, char **row)
111 JCR *jcr = (JCR *)ctx;
113 if (job_canceled(jcr)) {
117 if (row[2] > 0) { /* discard when file_index == 0 */
118 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
124 * Send current file list to FD
125 * DIR -> FD : accurate files=xxxx
126 * DIR -> FD : /path/to/file\0Lstat
127 * DIR -> FD : /path/to/dir/\0Lstat
131 bool send_accurate_current_files(JCR *jcr)
135 if (!jcr->accurate || job_canceled(jcr) || jcr->get_JobLevel()==L_FULL) {
138 POOLMEM *jobids = get_pool_memory(PM_FNAME);
140 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
143 free_pool_memory(jobids);
144 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
147 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
149 /* to be able to allocate the right size for htable */
150 POOLMEM *nb = get_pool_memory(PM_FNAME);
151 *nb = 0; /* clear buffer */
152 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
153 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
154 Dmsg2(200, "jobids=%s nb=%s\n", jobids, nb);
155 jcr->file_bsock->fsend("accurate files=%s\n", nb);
157 if (!db_open_batch_connexion(jcr, jcr->db)) {
158 Jmsg0(jcr, M_FATAL, 0, "Can't get dedicate sql connexion");
162 db_get_file_list(jcr, jcr->db_batch, jobids, accurate_list_handler, (void *)jcr);
164 /* TODO: close the batch connexion ? (can be used very soon) */
166 free_pool_memory(jobids);
167 free_pool_memory(nb);
169 jcr->file_bsock->signal(BNET_EOD);
175 * Do a backup of the specified FileSet
177 * Returns: false on failure
180 bool do_backup(JCR *jcr)
183 int tls_need = BNET_TLS_NONE;
188 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
189 return do_vbackup(jcr);
192 /* Print Job Start message */
193 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
194 edit_uint64(jcr->JobId, ed1), jcr->Job);
196 set_jcr_job_status(jcr, JS_Running);
197 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
198 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
199 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
204 * Open a message channel connection with the Storage
205 * daemon. This is to let him know that our client
206 * will be contacting him for a backup session.
209 Dmsg0(110, "Open connection with storage daemon\n");
210 set_jcr_job_status(jcr, JS_WaitSD);
212 * Start conversation with Storage daemon
214 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
218 * Now start a job with the Storage daemon
220 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
225 * Start the job prior to starting the message thread below
226 * to avoid two threads from using the BSOCK structure at
229 if (!bnet_fsend(jcr->store_bsock, "run")) {
234 * Now start a Storage daemon message thread. Note,
235 * this thread is used to provide the catalog services
236 * for the backup job, including inserting the attributes
237 * into the catalog. See catalog_update() in catreq.c
239 if (!start_storage_daemon_message_thread(jcr)) {
242 Dmsg0(150, "Storage daemon connection OK\n");
244 set_jcr_job_status(jcr, JS_WaitFD);
245 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
249 set_jcr_job_status(jcr, JS_Running);
250 fd = jcr->file_bsock;
252 if (!send_include_list(jcr)) {
256 if (!send_exclude_list(jcr)) {
260 if (!send_level_command(jcr)) {
265 * send Storage daemon address to the File daemon
268 if (store->SDDport == 0) {
269 store->SDDport = store->SDport;
272 /* TLS Requirement */
273 if (store->tls_enable) {
274 if (store->tls_require) {
275 tls_need = BNET_TLS_REQUIRED;
277 tls_need = BNET_TLS_OK;
281 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
282 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
286 if (!send_runscripts_commands(jcr)) {
291 * We re-update the job start record so that the start
292 * time is set after the run before job. This avoids
293 * that any files created by the run before job will
294 * be saved twice. They will be backed up in the current
295 * job, but not in the next one unless they are changed.
296 * Without this, they will be backed up in this job and
297 * in the next job run because in that case, their date
298 * is after the start of this run.
300 jcr->start_time = time(NULL);
301 jcr->jr.StartTime = jcr->start_time;
302 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
303 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
307 * If backup is in accurate mode, we send the list of
310 if (!send_accurate_current_files(jcr)) {
314 /* Send backup command */
315 fd->fsend(backupcmd);
316 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
320 /* Pickup Job termination data */
321 stat = wait_for_job_termination(jcr);
322 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
323 if (stat == JS_Terminated) {
324 backup_cleanup(jcr, stat);
329 /* Come here only after starting SD thread */
331 set_jcr_job_status(jcr, JS_ErrorTerminated);
332 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
334 wait_for_job_termination(jcr, FDConnectTimeout);
335 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
341 * Here we wait for the File daemon to signal termination,
342 * then we wait for the Storage daemon. When both
343 * are done, we return the job status.
344 * Also used by restore.c
346 int wait_for_job_termination(JCR *jcr, int timeout)
349 BSOCK *fd = jcr->file_bsock;
351 uint32_t JobFiles, Errors;
352 uint64_t ReadBytes = 0;
353 uint64_t JobBytes = 0;
358 set_jcr_job_status(jcr, JS_Running);
362 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
364 /* Wait for Client to terminate */
365 while ((n = bget_dirmsg(fd)) >= 0) {
367 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
368 &ReadBytes, &JobBytes, &Errors, &VSS, &Encrypt) == 7 ||
369 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
370 &ReadBytes, &JobBytes, &Errors) == 5)) {
372 set_jcr_job_status(jcr, jcr->FDJobStatus);
373 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
375 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
378 if (job_canceled(jcr)) {
383 stop_bsock_timer(tid);
386 if (is_bnet_error(fd)) {
387 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
388 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
390 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
393 /* Force cancel in SD if failing */
394 if (job_canceled(jcr) || !fd_ok) {
395 cancel_storage_daemon_job(jcr);
398 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
399 wait_for_storage_daemon_termination(jcr);
401 /* Return values from FD */
403 jcr->JobFiles = JobFiles;
404 jcr->Errors = Errors;
405 jcr->ReadBytes = ReadBytes;
406 jcr->JobBytes = JobBytes;
408 jcr->Encrypt = Encrypt;
410 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
413 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
414 // jcr->JobStatus, jcr->SDJobStatus);
416 /* Return the first error status we find Dir, FD, or SD */
417 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
418 jcr->FDJobStatus = JS_ErrorTerminated;
420 if (jcr->JobStatus != JS_Terminated) {
421 return jcr->JobStatus;
423 if (jcr->FDJobStatus != JS_Terminated) {
424 return jcr->FDJobStatus;
426 return jcr->SDJobStatus;
430 * Release resources allocated during backup.
432 void backup_cleanup(JCR *jcr, int TermCode)
434 char sdt[50], edt[50], schedt[50];
435 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
436 char ec6[30], ec7[30], ec8[30], elapsed[50];
437 char term_code[100], fd_term_msg[100], sd_term_msg[100];
438 const char *term_msg;
439 int msg_type = M_INFO;
442 double kbps, compression;
445 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
446 vbackup_cleanup(jcr, TermCode);
449 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
450 memset(&mr, 0, sizeof(mr));
451 memset(&cr, 0, sizeof(cr));
453 update_job_end(jcr, TermCode);
455 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
456 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
457 db_strerror(jcr->db));
458 set_jcr_job_status(jcr, JS_ErrorTerminated);
461 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
462 if (!db_get_client_record(jcr, jcr->db, &cr)) {
463 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
464 db_strerror(jcr->db));
467 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
468 if (!db_get_media_record(jcr, jcr->db, &mr)) {
469 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
470 mr.VolumeName, db_strerror(jcr->db));
471 set_jcr_job_status(jcr, JS_ErrorTerminated);
474 update_bootstrap_file(jcr);
476 switch (jcr->JobStatus) {
478 if (jcr->Errors || jcr->SDErrors) {
479 term_msg = _("Backup OK -- with warnings");
481 term_msg = _("Backup OK");
485 case JS_ErrorTerminated:
486 term_msg = _("*** Backup Error ***");
487 msg_type = M_ERROR; /* Generate error message */
488 if (jcr->store_bsock) {
489 jcr->store_bsock->signal(BNET_TERMINATE);
490 if (jcr->SD_msg_chan) {
491 pthread_cancel(jcr->SD_msg_chan);
496 term_msg = _("Backup Canceled");
497 if (jcr->store_bsock) {
498 jcr->store_bsock->signal(BNET_TERMINATE);
499 if (jcr->SD_msg_chan) {
500 pthread_cancel(jcr->SD_msg_chan);
505 term_msg = term_code;
506 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
509 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
510 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
511 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
512 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
516 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
518 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
520 * Note, if the job has erred, most likely it did not write any
521 * tape, so suppress this "error" message since in that case
522 * it is normal. Or look at it the other way, only for a
523 * normal exit should we complain about this error.
525 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
526 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
528 jcr->VolumeName[0] = 0; /* none */
531 if (jcr->ReadBytes == 0) {
532 bstrncpy(compress, "None", sizeof(compress));
534 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
535 if (compression < 0.5) {
536 bstrncpy(compress, "None", sizeof(compress));
538 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
541 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
542 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
544 // bmicrosleep(15, 0); /* for debugging SIGHUP */
546 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
547 " Build OS: %s %s %s\n"
550 " Backup Level: %s%s\n"
551 " Client: \"%s\" %s\n"
552 " FileSet: \"%s\" %s\n"
553 " Pool: \"%s\" (From %s)\n"
554 " Catalog: \"%s\" (From %s)\n"
555 " Storage: \"%s\" (From %s)\n"
556 " Scheduled time: %s\n"
559 " Elapsed time: %s\n"
561 " FD Files Written: %s\n"
562 " SD Files Written: %s\n"
563 " FD Bytes Written: %s (%sB)\n"
564 " SD Bytes Written: %s (%sB)\n"
566 " Software Compression: %s\n"
570 " Volume name(s): %s\n"
571 " Volume Session Id: %d\n"
572 " Volume Session Time: %d\n"
573 " Last Volume Bytes: %s (%sB)\n"
574 " Non-fatal FD errors: %d\n"
576 " FD termination status: %s\n"
577 " SD termination status: %s\n"
578 " Termination: %s\n\n"),
579 BACULA, my_name, VERSION, LSMDATE, edt,
580 HOST_OS, DISTNAME, DISTVER,
583 level_to_str(jcr->get_JobLevel()), jcr->since,
584 jcr->client->name(), cr.Uname,
585 jcr->fileset->name(), jcr->FSCreateTime,
586 jcr->pool->name(), jcr->pool_source,
587 jcr->catalog->name(), jcr->catalog_source,
588 jcr->wstore->name(), jcr->wstore_source,
592 edit_utime(RunTime, elapsed, sizeof(elapsed)),
594 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
595 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
596 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
597 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
598 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
599 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
602 jcr->VSS?_("yes"):_("no"),
603 jcr->Encrypt?_("yes"):_("no"),
604 jcr->accurate?_("yes"):_("no"),
608 edit_uint64_with_commas(mr.VolBytes, ec7),
609 edit_uint64_with_suffix(mr.VolBytes, ec8),
616 Dmsg0(100, "Leave backup_cleanup()\n");
619 void update_bootstrap_file(JCR *jcr)
621 /* Now update the bootstrap file if any */
622 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
623 jcr->job->WriteBootstrap) {
627 POOLMEM *fname = get_pool_memory(PM_FNAME);
628 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
630 VOL_PARAMS *VolParams = NULL;
636 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
637 fd = bpipe ? bpipe->wfd : NULL;
639 /* ***FIXME*** handle BASE */
640 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
643 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
646 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
647 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
648 if (jcr->SDJobFiles != 0) {
649 set_jcr_job_status(jcr, JS_ErrorTerminated);
653 /* Start output with when and who wrote it */
654 bstrftimes(edt, sizeof(edt), time(NULL));
655 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
656 level_to_str(jcr->get_JobLevel()), jcr->since);
657 for (int i=0; i < VolCount; i++) {
658 /* Write the record */
659 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
660 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
661 if (VolParams[i].Slot > 0) {
662 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
664 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
665 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
666 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
667 VolParams[i].EndFile);
668 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
669 VolParams[i].EndBlock);
670 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
671 VolParams[i].LastIndex);
683 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
684 "%s: ERR=%s\n"), fname, be.bstrerror());
685 set_jcr_job_status(jcr, JS_ErrorTerminated);
687 free_pool_memory(fname);