2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2008 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
107 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
109 static int accurate_list_handler(void *ctx, int num_fields, char **row)
111 JCR *jcr = (JCR *)ctx;
113 if (job_canceled(jcr)) {
117 if (row[2] > 0) { /* discard when file_index == 0 */
118 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
124 * Send current file list to FD
125 * DIR -> FD : accurate files=xxxx
126 * DIR -> FD : /path/to/file\0Lstat
127 * DIR -> FD : /path/to/dir/\0Lstat
131 bool send_accurate_current_files(JCR *jcr)
135 if (!jcr->accurate || job_canceled(jcr) || jcr->get_JobLevel()==L_FULL) {
138 POOLMEM *jobids = get_pool_memory(PM_FNAME);
140 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
143 free_pool_memory(jobids);
144 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
147 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
149 /* to be able to allocate the right size for htable */
150 POOLMEM *nb = get_pool_memory(PM_FNAME);
151 *nb = 0; /* clear buffer */
152 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
153 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
154 Dmsg2(200, "jobids=%s nb=%s\n", jobids, nb);
155 jcr->file_bsock->fsend("accurate files=%s\n", nb);
157 if (!db_open_batch_connexion(jcr, jcr->db)) {
158 Jmsg0(jcr, M_FATAL, 0, "Can't get dedicate sql connexion");
162 db_get_file_list(jcr, jcr->db_batch, jobids, accurate_list_handler, (void *)jcr);
164 /* TODO: close the batch connexion ? (can be used very soon) */
166 free_pool_memory(jobids);
167 free_pool_memory(nb);
169 jcr->file_bsock->signal(BNET_EOD);
175 * Do a backup of the specified FileSet
177 * Returns: false on failure
180 bool do_backup(JCR *jcr)
183 int tls_need = BNET_TLS_NONE;
188 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
189 return do_vbackup(jcr);
192 /* Print Job Start message */
193 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
194 edit_uint64(jcr->JobId, ed1), jcr->Job);
196 set_jcr_job_status(jcr, JS_Running);
197 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
198 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
199 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
204 * Open a message channel connection with the Storage
205 * daemon. This is to let him know that our client
206 * will be contacting him for a backup session.
209 Dmsg0(110, "Open connection with storage daemon\n");
210 set_jcr_job_status(jcr, JS_WaitSD);
212 * Start conversation with Storage daemon
214 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
218 * Now start a job with the Storage daemon
220 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
225 * Start the job prior to starting the message thread below
226 * to avoid two threads from using the BSOCK structure at
229 if (!bnet_fsend(jcr->store_bsock, "run")) {
234 * Now start a Storage daemon message thread. Note,
235 * this thread is used to provide the catalog services
236 * for the backup job, including inserting the attributes
237 * into the catalog. See catalog_update() in catreq.c
239 if (!start_storage_daemon_message_thread(jcr)) {
242 Dmsg0(150, "Storage daemon connection OK\n");
244 set_jcr_job_status(jcr, JS_WaitFD);
245 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
249 set_jcr_job_status(jcr, JS_Running);
250 fd = jcr->file_bsock;
252 if (!send_include_list(jcr)) {
256 if (!send_exclude_list(jcr)) {
260 if (!send_level_command(jcr)) {
265 * send Storage daemon address to the File daemon
268 if (store->SDDport == 0) {
269 store->SDDport = store->SDport;
272 /* TLS Requirement */
273 if (store->tls_enable) {
274 if (store->tls_require) {
275 tls_need = BNET_TLS_REQUIRED;
277 tls_need = BNET_TLS_OK;
281 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
282 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
286 if (!send_runscripts_commands(jcr)) {
291 * We re-update the job start record so that the start
292 * time is set after the run before job. This avoids
293 * that any files created by the run before job will
294 * be saved twice. They will be backed up in the current
295 * job, but not in the next one unless they are changed.
296 * Without this, they will be backed up in this job and
297 * in the next job run because in that case, their date
298 * is after the start of this run.
300 jcr->start_time = time(NULL);
301 jcr->jr.StartTime = jcr->start_time;
302 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
303 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
307 * If backup is in accurate mode, we send the list of
310 if (!send_accurate_current_files(jcr)) {
314 /* Send backup command */
315 fd->fsend(backupcmd);
316 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
320 /* Pickup Job termination data */
321 stat = wait_for_job_termination(jcr);
322 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
323 if (stat == JS_Terminated) {
324 backup_cleanup(jcr, stat);
329 /* Come here only after starting SD thread */
331 set_jcr_job_status(jcr, JS_ErrorTerminated);
332 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
334 wait_for_job_termination(jcr, FDConnectTimeout);
335 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
341 * Here we wait for the File daemon to signal termination,
342 * then we wait for the Storage daemon. When both
343 * are done, we return the job status.
344 * Also used by restore.c
346 int wait_for_job_termination(JCR *jcr, int timeout)
349 BSOCK *fd = jcr->file_bsock;
351 uint32_t JobFiles, Errors;
352 uint64_t ReadBytes = 0;
353 uint64_t JobBytes = 0;
358 set_jcr_job_status(jcr, JS_Running);
362 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
364 /* Wait for Client to terminate */
365 while ((n = bget_dirmsg(fd)) >= 0) {
367 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
368 &ReadBytes, &JobBytes, &Errors, &VSS, &Encrypt) == 7 ||
369 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
370 &ReadBytes, &JobBytes, &Errors) == 5)) {
372 set_jcr_job_status(jcr, jcr->FDJobStatus);
373 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
375 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
378 if (job_canceled(jcr)) {
383 stop_bsock_timer(tid);
386 if (is_bnet_error(fd)) {
387 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
388 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
390 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
393 /* Force cancel in SD if failing */
394 if (job_canceled(jcr) || !fd_ok) {
395 cancel_storage_daemon_job(jcr);
398 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
399 wait_for_storage_daemon_termination(jcr);
401 /* Return values from FD */
403 jcr->JobFiles = JobFiles;
404 jcr->Errors = Errors;
405 jcr->ReadBytes = ReadBytes;
406 jcr->JobBytes = JobBytes;
408 jcr->Encrypt = Encrypt;
410 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
413 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
414 // jcr->JobStatus, jcr->SDJobStatus);
416 /* Return the first error status we find Dir, FD, or SD */
417 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
418 jcr->FDJobStatus = JS_ErrorTerminated;
420 if (jcr->JobStatus != JS_Terminated) {
421 return jcr->JobStatus;
423 if (jcr->FDJobStatus != JS_Terminated) {
424 return jcr->FDJobStatus;
426 return jcr->SDJobStatus;
430 * Release resources allocated during backup.
432 void backup_cleanup(JCR *jcr, int TermCode)
434 char sdt[50], edt[50], schedt[50];
435 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
436 char ec6[30], ec7[30], ec8[30], elapsed[50];
437 char term_code[100], fd_term_msg[100], sd_term_msg[100];
438 const char *term_msg;
439 int msg_type = M_INFO;
442 double kbps, compression;
445 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
446 vbackup_cleanup(jcr, TermCode);
450 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
451 memset(&mr, 0, sizeof(mr));
452 memset(&cr, 0, sizeof(cr));
454 update_job_end(jcr, TermCode);
456 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
457 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
458 db_strerror(jcr->db));
459 set_jcr_job_status(jcr, JS_ErrorTerminated);
462 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
463 if (!db_get_client_record(jcr, jcr->db, &cr)) {
464 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
465 db_strerror(jcr->db));
468 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
469 if (!db_get_media_record(jcr, jcr->db, &mr)) {
470 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
471 mr.VolumeName, db_strerror(jcr->db));
472 set_jcr_job_status(jcr, JS_ErrorTerminated);
475 update_bootstrap_file(jcr);
477 switch (jcr->JobStatus) {
479 if (jcr->Errors || jcr->SDErrors) {
480 term_msg = _("Backup OK -- with warnings");
482 term_msg = _("Backup OK");
486 case JS_ErrorTerminated:
487 term_msg = _("*** Backup Error ***");
488 msg_type = M_ERROR; /* Generate error message */
489 if (jcr->store_bsock) {
490 jcr->store_bsock->signal(BNET_TERMINATE);
491 if (jcr->SD_msg_chan) {
492 pthread_cancel(jcr->SD_msg_chan);
497 term_msg = _("Backup Canceled");
498 if (jcr->store_bsock) {
499 jcr->store_bsock->signal(BNET_TERMINATE);
500 if (jcr->SD_msg_chan) {
501 pthread_cancel(jcr->SD_msg_chan);
506 term_msg = term_code;
507 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
510 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
511 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
512 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
513 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
517 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
519 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
521 * Note, if the job has erred, most likely it did not write any
522 * tape, so suppress this "error" message since in that case
523 * it is normal. Or look at it the other way, only for a
524 * normal exit should we complain about this error.
526 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
527 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
529 jcr->VolumeName[0] = 0; /* none */
532 if (jcr->ReadBytes == 0) {
533 bstrncpy(compress, "None", sizeof(compress));
535 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
536 if (compression < 0.5) {
537 bstrncpy(compress, "None", sizeof(compress));
539 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
542 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
543 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
545 // bmicrosleep(15, 0); /* for debugging SIGHUP */
547 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
548 " Build OS: %s %s %s\n"
551 " Backup Level: %s%s\n"
552 " Client: \"%s\" %s\n"
553 " FileSet: \"%s\" %s\n"
554 " Pool: \"%s\" (From %s)\n"
555 " Catalog: \"%s\" (From %s)\n"
556 " Storage: \"%s\" (From %s)\n"
557 " Scheduled time: %s\n"
560 " Elapsed time: %s\n"
562 " FD Files Written: %s\n"
563 " SD Files Written: %s\n"
564 " FD Bytes Written: %s (%sB)\n"
565 " SD Bytes Written: %s (%sB)\n"
567 " Software Compression: %s\n"
571 " Volume name(s): %s\n"
572 " Volume Session Id: %d\n"
573 " Volume Session Time: %d\n"
574 " Last Volume Bytes: %s (%sB)\n"
575 " Non-fatal FD errors: %d\n"
577 " FD termination status: %s\n"
578 " SD termination status: %s\n"
579 " Termination: %s\n\n"),
580 BACULA, my_name, VERSION, LSMDATE, edt,
581 HOST_OS, DISTNAME, DISTVER,
584 level_to_str(jcr->get_JobLevel()), jcr->since,
585 jcr->client->name(), cr.Uname,
586 jcr->fileset->name(), jcr->FSCreateTime,
587 jcr->pool->name(), jcr->pool_source,
588 jcr->catalog->name(), jcr->catalog_source,
589 jcr->wstore->name(), jcr->wstore_source,
593 edit_utime(RunTime, elapsed, sizeof(elapsed)),
595 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
596 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
597 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
598 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
599 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
600 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
603 jcr->VSS?_("yes"):_("no"),
604 jcr->Encrypt?_("yes"):_("no"),
605 jcr->accurate?_("yes"):_("no"),
609 edit_uint64_with_commas(mr.VolBytes, ec7),
610 edit_uint64_with_suffix(mr.VolBytes, ec8),
617 Dmsg0(100, "Leave backup_cleanup()\n");
620 void update_bootstrap_file(JCR *jcr)
622 /* Now update the bootstrap file if any */
623 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
624 jcr->job->WriteBootstrap) {
628 POOLMEM *fname = get_pool_memory(PM_FNAME);
629 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
631 VOL_PARAMS *VolParams = NULL;
637 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
638 fd = bpipe ? bpipe->wfd : NULL;
640 /* ***FIXME*** handle BASE */
641 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
644 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
647 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
648 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
649 if (jcr->SDJobFiles != 0) {
650 set_jcr_job_status(jcr, JS_ErrorTerminated);
654 /* Start output with when and who wrote it */
655 bstrftimes(edt, sizeof(edt), time(NULL));
656 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
657 level_to_str(jcr->get_JobLevel()), jcr->since);
658 for (int i=0; i < VolCount; i++) {
659 /* Write the record */
660 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
661 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
662 if (VolParams[i].Slot > 0) {
663 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
665 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
666 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
667 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
668 VolParams[i].EndFile);
669 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
670 VolParams[i].EndBlock);
671 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
672 VolParams[i].LastIndex);
684 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
685 "%s: ERR=%s\n"), fname, be.bstrerror());
686 set_jcr_job_status(jcr, JS_ErrorTerminated);
688 free_pool_memory(fname);