2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2008 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 free_rstorage(jcr); /* we don't read so release */
70 if (!get_or_create_fileset_record(jcr)) {
75 * Get definitive Job level and since time
77 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
79 apply_pool_overrides(jcr);
81 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
82 if (jcr->jr.PoolId == 0) {
86 /* If pool storage specified, use it instead of job storage */
87 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
90 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
94 create_clones(jcr); /* run any clone jobs */
100 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
102 static int accurate_list_handler(void *ctx, int num_fields, char **row)
104 JCR *jcr = (JCR *)ctx;
106 if (job_canceled(jcr)) {
110 if (row[2] > 0) { /* discard when file_index == 0 */
111 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
117 * Send current file list to FD
118 * DIR -> FD : accurate files=xxxx
119 * DIR -> FD : /path/to/file\0Lstat
120 * DIR -> FD : /path/to/dir/\0Lstat
124 bool send_accurate_current_files(JCR *jcr)
128 if (jcr->accurate==false || job_canceled(jcr) || jcr->JobLevel==L_FULL) {
131 POOLMEM *jobids = get_pool_memory(PM_FNAME);
132 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
135 free_pool_memory(jobids);
136 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
139 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
141 /* to be able to allocate the right size for htable */
142 POOLMEM *nb = get_pool_memory(PM_FNAME);
143 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
144 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
145 jcr->file_bsock->fsend("accurate files=%s\n", nb);
147 db_get_file_list(jcr, jcr->db, jobids, accurate_list_handler, (void *)jcr);
149 free_pool_memory(jobids);
150 free_pool_memory(nb);
152 jcr->file_bsock->signal(BNET_EOD);
153 /* TODO: use response() ? */
159 * Do a backup of the specified FileSet
161 * Returns: false on failure
164 bool do_backup(JCR *jcr)
167 int tls_need = BNET_TLS_NONE;
173 /* Print Job Start message */
174 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
175 edit_uint64(jcr->JobId, ed1), jcr->Job);
177 set_jcr_job_status(jcr, JS_Running);
178 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
179 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
180 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
185 * Open a message channel connection with the Storage
186 * daemon. This is to let him know that our client
187 * will be contacting him for a backup session.
190 Dmsg0(110, "Open connection with storage daemon\n");
191 set_jcr_job_status(jcr, JS_WaitSD);
193 * Start conversation with Storage daemon
195 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
199 * Now start a job with the Storage daemon
201 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
206 * Start the job prior to starting the message thread below
207 * to avoid two threads from using the BSOCK structure at
210 if (!bnet_fsend(jcr->store_bsock, "run")) {
215 * Now start a Storage daemon message thread. Note,
216 * this thread is used to provide the catalog services
217 * for the backup job, including inserting the attributes
218 * into the catalog. See catalog_update() in catreq.c
220 if (!start_storage_daemon_message_thread(jcr)) {
223 Dmsg0(150, "Storage daemon connection OK\n");
225 set_jcr_job_status(jcr, JS_WaitFD);
226 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
230 set_jcr_job_status(jcr, JS_Running);
231 fd = jcr->file_bsock;
233 if (!send_include_list(jcr)) {
237 if (!send_exclude_list(jcr)) {
241 if (!send_level_command(jcr)) {
246 * send Storage daemon address to the File daemon
249 if (store->SDDport == 0) {
250 store->SDDport = store->SDport;
253 /* TLS Requirement */
254 if (store->tls_enable) {
255 if (store->tls_require) {
256 tls_need = BNET_TLS_REQUIRED;
258 tls_need = BNET_TLS_OK;
262 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
263 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
267 if (!send_runscripts_commands(jcr)) {
272 * We re-update the job start record so that the start
273 * time is set after the run before job. This avoids
274 * that any files created by the run before job will
275 * be saved twice. They will be backed up in the current
276 * job, but not in the next one unless they are changed.
277 * Without this, they will be backed up in this job and
278 * in the next job run because in that case, their date
279 * is after the start of this run.
281 jcr->start_time = time(NULL);
282 jcr->jr.StartTime = jcr->start_time;
283 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
284 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
288 * If backup is in accurate mode, we send the list of
291 if (!send_accurate_current_files(jcr)) {
295 /* Send backup command */
296 fd->fsend(backupcmd);
297 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
301 /* Pickup Job termination data */
302 stat = wait_for_job_termination(jcr);
303 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
304 if (stat == JS_Terminated) {
305 backup_cleanup(jcr, stat);
310 /* Come here only after starting SD thread */
312 set_jcr_job_status(jcr, JS_ErrorTerminated);
313 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
315 wait_for_job_termination(jcr, FDConnectTimeout);
316 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
322 * Here we wait for the File daemon to signal termination,
323 * then we wait for the Storage daemon. When both
324 * are done, we return the job status.
325 * Also used by restore.c
327 int wait_for_job_termination(JCR *jcr, int timeout)
330 BSOCK *fd = jcr->file_bsock;
332 uint32_t JobFiles, Errors;
333 uint64_t ReadBytes = 0;
334 uint64_t JobBytes = 0;
339 set_jcr_job_status(jcr, JS_Running);
343 tid = start_bsock_timer(fd, timeout); /* TODO: use user timeout */
345 /* Wait for Client to terminate */
346 while ((n = bget_dirmsg(fd)) >= 0) {
348 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
349 &ReadBytes, &JobBytes, &Errors, &VSS, &Encrypt) == 7 ||
350 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
351 &ReadBytes, &JobBytes, &Errors) == 5)) {
353 set_jcr_job_status(jcr, jcr->FDJobStatus);
354 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
356 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
359 if (job_canceled(jcr)) {
364 stop_bsock_timer(tid);
367 if (is_bnet_error(fd)) {
368 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
369 job_type_to_str(jcr->JobType), fd->bstrerror());
371 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
374 /* Force cancel in SD if failing */
375 if (job_canceled(jcr) || !fd_ok) {
376 cancel_storage_daemon_job(jcr);
379 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
380 wait_for_storage_daemon_termination(jcr);
382 /* Return values from FD */
384 jcr->JobFiles = JobFiles;
385 jcr->Errors = Errors;
386 jcr->ReadBytes = ReadBytes;
387 jcr->JobBytes = JobBytes;
389 jcr->Encrypt = Encrypt;
391 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
394 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
395 // jcr->JobStatus, jcr->SDJobStatus);
397 /* Return the first error status we find Dir, FD, or SD */
398 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
399 jcr->FDJobStatus = JS_ErrorTerminated;
401 if (jcr->JobStatus != JS_Terminated) {
402 return jcr->JobStatus;
404 if (jcr->FDJobStatus != JS_Terminated) {
405 return jcr->FDJobStatus;
407 return jcr->SDJobStatus;
411 * Release resources allocated during backup.
413 void backup_cleanup(JCR *jcr, int TermCode)
415 char sdt[50], edt[50], schedt[50];
416 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
417 char ec6[30], ec7[30], ec8[30], elapsed[50];
418 char term_code[100], fd_term_msg[100], sd_term_msg[100];
419 const char *term_msg;
420 int msg_type = M_INFO;
423 double kbps, compression;
426 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
427 memset(&mr, 0, sizeof(mr));
428 memset(&cr, 0, sizeof(cr));
430 update_job_end(jcr, TermCode);
432 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
433 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
434 db_strerror(jcr->db));
435 set_jcr_job_status(jcr, JS_ErrorTerminated);
438 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
439 if (!db_get_client_record(jcr, jcr->db, &cr)) {
440 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
441 db_strerror(jcr->db));
444 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
445 if (!db_get_media_record(jcr, jcr->db, &mr)) {
446 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
447 mr.VolumeName, db_strerror(jcr->db));
448 set_jcr_job_status(jcr, JS_ErrorTerminated);
451 update_bootstrap_file(jcr);
453 switch (jcr->JobStatus) {
455 if (jcr->Errors || jcr->SDErrors) {
456 term_msg = _("Backup OK -- with warnings");
458 term_msg = _("Backup OK");
462 case JS_ErrorTerminated:
463 term_msg = _("*** Backup Error ***");
464 msg_type = M_ERROR; /* Generate error message */
465 if (jcr->store_bsock) {
466 jcr->store_bsock->signal(BNET_TERMINATE);
467 if (jcr->SD_msg_chan) {
468 pthread_cancel(jcr->SD_msg_chan);
473 term_msg = _("Backup Canceled");
474 if (jcr->store_bsock) {
475 jcr->store_bsock->signal(BNET_TERMINATE);
476 if (jcr->SD_msg_chan) {
477 pthread_cancel(jcr->SD_msg_chan);
482 term_msg = term_code;
483 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
486 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
487 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
488 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
489 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
493 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
495 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
497 * Note, if the job has erred, most likely it did not write any
498 * tape, so suppress this "error" message since in that case
499 * it is normal. Or look at it the other way, only for a
500 * normal exit should we complain about this error.
502 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
503 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
505 jcr->VolumeName[0] = 0; /* none */
508 if (jcr->ReadBytes == 0) {
509 bstrncpy(compress, "None", sizeof(compress));
511 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
512 if (compression < 0.5) {
513 bstrncpy(compress, "None", sizeof(compress));
515 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
518 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
519 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
521 // bmicrosleep(15, 0); /* for debugging SIGHUP */
523 Jmsg(jcr, msg_type, 0, _("Bacula %s %s (%s): %s\n"
524 " Build OS: %s %s %s\n"
527 " Backup Level: %s%s\n"
528 " Client: \"%s\" %s\n"
529 " FileSet: \"%s\" %s\n"
530 " Pool: \"%s\" (From %s)\n"
531 " Catalog: \"%s\" (From %s)\n"
532 " Storage: \"%s\" (From %s)\n"
533 " Scheduled time: %s\n"
536 " Elapsed time: %s\n"
538 " FD Files Written: %s\n"
539 " SD Files Written: %s\n"
540 " FD Bytes Written: %s (%sB)\n"
541 " SD Bytes Written: %s (%sB)\n"
543 " Software Compression: %s\n"
547 " Volume name(s): %s\n"
548 " Volume Session Id: %d\n"
549 " Volume Session Time: %d\n"
550 " Last Volume Bytes: %s (%sB)\n"
551 " Non-fatal FD errors: %d\n"
553 " FD termination status: %s\n"
554 " SD termination status: %s\n"
555 " Termination: %s\n\n"),
556 my_name, VERSION, LSMDATE, edt,
557 HOST_OS, DISTNAME, DISTVER,
560 level_to_str(jcr->JobLevel), jcr->since,
561 jcr->client->name(), cr.Uname,
562 jcr->fileset->name(), jcr->FSCreateTime,
563 jcr->pool->name(), jcr->pool_source,
564 jcr->catalog->name(), jcr->catalog_source,
565 jcr->wstore->name(), jcr->wstore_source,
569 edit_utime(RunTime, elapsed, sizeof(elapsed)),
571 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
572 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
573 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
574 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
575 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
576 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
579 jcr->VSS?_("yes"):_("no"),
580 jcr->Encrypt?_("yes"):_("no"),
581 jcr->accurate?_("yes"):_("no"),
585 edit_uint64_with_commas(mr.VolBytes, ec7),
586 edit_uint64_with_suffix(mr.VolBytes, ec8),
593 Dmsg0(100, "Leave backup_cleanup()\n");
596 void update_bootstrap_file(JCR *jcr)
598 /* Now update the bootstrap file if any */
599 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
600 jcr->job->WriteBootstrap) {
604 POOLMEM *fname = get_pool_memory(PM_FNAME);
605 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
607 VOL_PARAMS *VolParams = NULL;
613 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
614 fd = bpipe ? bpipe->wfd : NULL;
616 /* ***FIXME*** handle BASE */
617 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+b":"a+b");
620 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
623 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
624 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
625 if (jcr->SDJobFiles != 0) {
626 set_jcr_job_status(jcr, JS_ErrorTerminated);
630 /* Start output with when and who wrote it */
631 bstrftimes(edt, sizeof(edt), time(NULL));
632 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
633 level_to_str(jcr->JobLevel), jcr->since);
634 for (int i=0; i < VolCount; i++) {
635 /* Write the record */
636 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
637 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
638 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
639 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
640 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
641 VolParams[i].EndFile);
642 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
643 VolParams[i].EndBlock);
644 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
645 VolParams[i].LastIndex);
657 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
658 "%s: ERR=%s\n"), fname, be.bstrerror());
659 set_jcr_job_status(jcr, JS_ErrorTerminated);
661 free_pool_memory(fname);