2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
107 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
109 static int accurate_list_handler(void *ctx, int num_fields, char **row)
111 JCR *jcr = (JCR *)ctx;
113 if (job_canceled(jcr)) {
117 if (row[2] > 0) { /* discard when file_index == 0 */
118 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
124 * Send current file list to FD
125 * DIR -> FD : accurate files=xxxx
126 * DIR -> FD : /path/to/file\0Lstat
127 * DIR -> FD : /path/to/dir/\0Lstat
131 bool send_accurate_current_files(JCR *jcr)
137 if (!jcr->accurate || job_canceled(jcr) || jcr->get_JobLevel()==L_FULL) {
140 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, &jobids);
142 if (jobids.count == 0) {
143 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
146 if (jcr->JobId) { /* display the message only for real jobs */
147 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
149 /* to be able to allocate the right size for htable */
150 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)", jobids.list);
151 db_sql_query(jcr->db, buf.c_str(), db_list_handler, &nb);
152 Dmsg2(200, "jobids=%s nb=%s\n", jobids.list, nb.list);
153 jcr->file_bsock->fsend("accurate files=%s\n", nb.list);
155 if (!db_open_batch_connexion(jcr, jcr->db)) {
156 Jmsg0(jcr, M_FATAL, 0, "Can't get batch sql connexion");
160 db_get_file_list(jcr, jcr->db_batch, jobids.list, accurate_list_handler, (void *)jcr);
162 /* TODO: close the batch connexion ? (can be used very soon) */
164 jcr->file_bsock->signal(BNET_EOD);
170 * Do a backup of the specified FileSet
172 * Returns: false on failure
175 bool do_backup(JCR *jcr)
178 int tls_need = BNET_TLS_NONE;
183 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
184 return do_vbackup(jcr);
187 /* Print Job Start message */
188 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
189 edit_uint64(jcr->JobId, ed1), jcr->Job);
191 set_jcr_job_status(jcr, JS_Running);
192 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
193 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
194 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
199 * Open a message channel connection with the Storage
200 * daemon. This is to let him know that our client
201 * will be contacting him for a backup session.
204 Dmsg0(110, "Open connection with storage daemon\n");
205 set_jcr_job_status(jcr, JS_WaitSD);
207 * Start conversation with Storage daemon
209 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
213 * Now start a job with the Storage daemon
215 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
220 * Start the job prior to starting the message thread below
221 * to avoid two threads from using the BSOCK structure at
224 if (!bnet_fsend(jcr->store_bsock, "run")) {
229 * Now start a Storage daemon message thread. Note,
230 * this thread is used to provide the catalog services
231 * for the backup job, including inserting the attributes
232 * into the catalog. See catalog_update() in catreq.c
234 if (!start_storage_daemon_message_thread(jcr)) {
237 Dmsg0(150, "Storage daemon connection OK\n");
239 set_jcr_job_status(jcr, JS_WaitFD);
240 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
244 set_jcr_job_status(jcr, JS_Running);
245 fd = jcr->file_bsock;
247 if (!send_include_list(jcr)) {
251 if (!send_exclude_list(jcr)) {
255 if (!send_level_command(jcr)) {
260 * send Storage daemon address to the File daemon
263 if (store->SDDport == 0) {
264 store->SDDport = store->SDport;
267 /* TLS Requirement */
268 if (store->tls_enable) {
269 if (store->tls_require) {
270 tls_need = BNET_TLS_REQUIRED;
272 tls_need = BNET_TLS_OK;
276 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
277 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
281 if (!send_runscripts_commands(jcr)) {
286 * We re-update the job start record so that the start
287 * time is set after the run before job. This avoids
288 * that any files created by the run before job will
289 * be saved twice. They will be backed up in the current
290 * job, but not in the next one unless they are changed.
291 * Without this, they will be backed up in this job and
292 * in the next job run because in that case, their date
293 * is after the start of this run.
295 jcr->start_time = time(NULL);
296 jcr->jr.StartTime = jcr->start_time;
297 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
298 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
302 * If backup is in accurate mode, we send the list of
305 if (!send_accurate_current_files(jcr)) {
309 /* Send backup command */
310 fd->fsend(backupcmd);
311 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
315 /* Pickup Job termination data */
316 stat = wait_for_job_termination(jcr);
317 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
318 if (stat == JS_Terminated) {
319 backup_cleanup(jcr, stat);
324 /* Come here only after starting SD thread */
326 set_jcr_job_status(jcr, JS_ErrorTerminated);
327 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
329 wait_for_job_termination(jcr, FDConnectTimeout);
330 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
336 * Here we wait for the File daemon to signal termination,
337 * then we wait for the Storage daemon. When both
338 * are done, we return the job status.
339 * Also used by restore.c
341 int wait_for_job_termination(JCR *jcr, int timeout)
344 BSOCK *fd = jcr->file_bsock;
346 uint32_t JobFiles, JobErrors;
347 uint32_t JobWarnings = 0;
348 uint64_t ReadBytes = 0;
349 uint64_t JobBytes = 0;
354 set_jcr_job_status(jcr, JS_Running);
358 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
360 /* Wait for Client to terminate */
361 while ((n = bget_dirmsg(fd)) >= 0) {
363 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
364 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
365 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
366 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
368 set_jcr_job_status(jcr, jcr->FDJobStatus);
369 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
371 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
374 if (job_canceled(jcr)) {
379 stop_bsock_timer(tid);
382 if (is_bnet_error(fd)) {
383 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
384 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
386 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
389 /* Force cancel in SD if failing */
390 if (job_canceled(jcr) || !fd_ok) {
391 cancel_storage_daemon_job(jcr);
394 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
395 wait_for_storage_daemon_termination(jcr);
397 /* Return values from FD */
399 jcr->JobFiles = JobFiles;
400 jcr->JobErrors += JobErrors; /* Keep total errors */
401 jcr->ReadBytes = ReadBytes;
402 jcr->JobBytes = JobBytes;
403 jcr->JobWarnings = JobWarnings;
405 jcr->Encrypt = Encrypt;
407 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
410 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
411 // jcr->JobStatus, jcr->SDJobStatus);
413 /* Return the first error status we find Dir, FD, or SD */
414 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
415 jcr->FDJobStatus = JS_ErrorTerminated;
417 if (jcr->JobStatus != JS_Terminated) {
418 return jcr->JobStatus;
420 if (jcr->FDJobStatus != JS_Terminated) {
421 return jcr->FDJobStatus;
423 return jcr->SDJobStatus;
427 * Release resources allocated during backup.
429 void backup_cleanup(JCR *jcr, int TermCode)
431 char sdt[50], edt[50], schedt[50];
432 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
433 char ec6[30], ec7[30], ec8[30], elapsed[50];
434 char term_code[100], fd_term_msg[100], sd_term_msg[100];
435 const char *term_msg;
436 int msg_type = M_INFO;
439 double kbps, compression;
442 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
443 vbackup_cleanup(jcr, TermCode);
447 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
448 memset(&mr, 0, sizeof(mr));
449 memset(&cr, 0, sizeof(cr));
451 update_job_end(jcr, TermCode);
453 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
454 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
455 db_strerror(jcr->db));
456 set_jcr_job_status(jcr, JS_ErrorTerminated);
459 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
460 if (!db_get_client_record(jcr, jcr->db, &cr)) {
461 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
462 db_strerror(jcr->db));
465 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
466 if (!db_get_media_record(jcr, jcr->db, &mr)) {
467 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
468 mr.VolumeName, db_strerror(jcr->db));
469 set_jcr_job_status(jcr, JS_ErrorTerminated);
472 update_bootstrap_file(jcr);
474 switch (jcr->JobStatus) {
476 if (jcr->JobErrors || jcr->SDErrors) {
477 term_msg = _("Backup OK -- with warnings");
479 term_msg = _("Backup OK");
483 term_msg = _("Backup OK -- with warnings");
486 case JS_ErrorTerminated:
487 term_msg = _("*** Backup Error ***");
488 msg_type = M_ERROR; /* Generate error message */
489 if (jcr->store_bsock) {
490 jcr->store_bsock->signal(BNET_TERMINATE);
491 if (jcr->SD_msg_chan) {
492 pthread_cancel(jcr->SD_msg_chan);
497 term_msg = _("Backup Canceled");
498 if (jcr->store_bsock) {
499 jcr->store_bsock->signal(BNET_TERMINATE);
500 if (jcr->SD_msg_chan) {
501 pthread_cancel(jcr->SD_msg_chan);
506 term_msg = term_code;
507 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
510 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
511 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
512 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
513 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
517 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
519 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
521 * Note, if the job has erred, most likely it did not write any
522 * tape, so suppress this "error" message since in that case
523 * it is normal. Or look at it the other way, only for a
524 * normal exit should we complain about this error.
526 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
527 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
529 jcr->VolumeName[0] = 0; /* none */
532 if (jcr->ReadBytes == 0) {
533 bstrncpy(compress, "None", sizeof(compress));
535 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
536 if (compression < 0.5) {
537 bstrncpy(compress, "None", sizeof(compress));
539 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
542 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
543 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
545 // bmicrosleep(15, 0); /* for debugging SIGHUP */
547 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
548 " Build OS: %s %s %s\n"
551 " Backup Level: %s%s\n"
552 " Client: \"%s\" %s\n"
553 " FileSet: \"%s\" %s\n"
554 " Pool: \"%s\" (From %s)\n"
555 " Catalog: \"%s\" (From %s)\n"
556 " Storage: \"%s\" (From %s)\n"
557 " Scheduled time: %s\n"
560 " Elapsed time: %s\n"
562 " FD Files Written: %s\n"
563 " SD Files Written: %s\n"
564 " FD Bytes Written: %s (%sB)\n"
565 " SD Bytes Written: %s (%sB)\n"
567 " Software Compression: %s\n"
571 " Volume name(s): %s\n"
572 " Volume Session Id: %d\n"
573 " Volume Session Time: %d\n"
574 " Last Volume Bytes: %s (%sB)\n"
575 " Non-fatal FD errors: %d\n"
577 " FD termination status: %s\n"
578 " SD termination status: %s\n"
579 " Termination: %s\n\n"),
580 BACULA, my_name, VERSION, LSMDATE, edt,
581 HOST_OS, DISTNAME, DISTVER,
584 level_to_str(jcr->get_JobLevel()), jcr->since,
585 jcr->client->name(), cr.Uname,
586 jcr->fileset->name(), jcr->FSCreateTime,
587 jcr->pool->name(), jcr->pool_source,
588 jcr->catalog->name(), jcr->catalog_source,
589 jcr->wstore->name(), jcr->wstore_source,
593 edit_utime(RunTime, elapsed, sizeof(elapsed)),
595 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
596 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
597 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
598 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
599 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
600 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
603 jcr->VSS?_("yes"):_("no"),
604 jcr->Encrypt?_("yes"):_("no"),
605 jcr->accurate?_("yes"):_("no"),
609 edit_uint64_with_commas(mr.VolBytes, ec7),
610 edit_uint64_with_suffix(mr.VolBytes, ec8),
617 Dmsg0(100, "Leave backup_cleanup()\n");
620 void update_bootstrap_file(JCR *jcr)
622 /* Now update the bootstrap file if any */
623 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
624 jcr->job->WriteBootstrap) {
628 POOLMEM *fname = get_pool_memory(PM_FNAME);
629 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
631 VOL_PARAMS *VolParams = NULL;
633 char edt[50], ed1[50], ed2[50];
637 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
638 fd = bpipe ? bpipe->wfd : NULL;
640 /* ***FIXME*** handle BASE */
641 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
644 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
647 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
648 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
649 if (jcr->SDJobFiles != 0) {
650 set_jcr_job_status(jcr, JS_ErrorTerminated);
654 /* Start output with when and who wrote it */
655 bstrftimes(edt, sizeof(edt), time(NULL));
656 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
657 level_to_str(jcr->get_JobLevel()), jcr->since);
658 for (int i=0; i < VolCount; i++) {
659 /* Write the record */
660 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
661 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
662 if (VolParams[i].Slot > 0) {
663 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
665 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
666 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
667 fprintf(fd, "VolAddr=%s-%s\n",
668 edit_uint64(VolParams[i].StartAddr, ed1),
669 edit_uint64(VolParams[i].EndAddr, ed2));
670 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
671 VolParams[i].LastIndex);
683 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
684 "%s: ERR=%s\n"), fname, be.bstrerror());
685 set_jcr_job_status(jcr, JS_ErrorTerminated);
687 free_pool_memory(fname);