2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
69 return do_vbackup_init(jcr);
71 free_rstorage(jcr); /* we don't read so release */
73 if (!get_or_create_fileset_record(jcr)) {
78 * Get definitive Job level and since time
80 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
82 apply_pool_overrides(jcr);
84 if (!allow_duplicate_job(jcr)) {
88 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
89 if (jcr->jr.PoolId == 0) {
93 /* If pool storage specified, use it instead of job storage */
94 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
97 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
101 create_clones(jcr); /* run any clone jobs */
107 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
109 static int accurate_list_handler(void *ctx, int num_fields, char **row)
111 JCR *jcr = (JCR *)ctx;
113 if (job_canceled(jcr)) {
117 if (row[2] > 0) { /* discard when file_index == 0 */
118 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
124 * Send current file list to FD
125 * DIR -> FD : accurate files=xxxx
126 * DIR -> FD : /path/to/file\0Lstat
127 * DIR -> FD : /path/to/dir/\0Lstat
131 bool send_accurate_current_files(JCR *jcr)
135 if (!jcr->accurate || job_canceled(jcr) || jcr->get_JobLevel()==L_FULL) {
138 POOLMEM *jobids = get_pool_memory(PM_FNAME);
140 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
143 free_pool_memory(jobids);
144 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
147 if (jcr->JobId) { /* display the message only for real jobs */
148 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
150 /* to be able to allocate the right size for htable */
151 POOLMEM *nb = get_pool_memory(PM_FNAME);
152 *nb = 0; /* clear buffer */
153 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
154 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
155 Dmsg2(200, "jobids=%s nb=%s\n", jobids, nb);
156 jcr->file_bsock->fsend("accurate files=%s\n", nb);
158 if (!db_open_batch_connexion(jcr, jcr->db)) {
159 Jmsg0(jcr, M_FATAL, 0, "Can't get dedicate sql connexion");
163 db_get_file_list(jcr, jcr->db_batch, jobids, accurate_list_handler, (void *)jcr);
165 /* TODO: close the batch connexion ? (can be used very soon) */
167 free_pool_memory(jobids);
168 free_pool_memory(nb);
170 jcr->file_bsock->signal(BNET_EOD);
176 * Do a backup of the specified FileSet
178 * Returns: false on failure
181 bool do_backup(JCR *jcr)
184 int tls_need = BNET_TLS_NONE;
189 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
190 return do_vbackup(jcr);
193 /* Print Job Start message */
194 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
195 edit_uint64(jcr->JobId, ed1), jcr->Job);
197 set_jcr_job_status(jcr, JS_Running);
198 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
199 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
200 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
205 * Open a message channel connection with the Storage
206 * daemon. This is to let him know that our client
207 * will be contacting him for a backup session.
210 Dmsg0(110, "Open connection with storage daemon\n");
211 set_jcr_job_status(jcr, JS_WaitSD);
213 * Start conversation with Storage daemon
215 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
219 * Now start a job with the Storage daemon
221 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
226 * Start the job prior to starting the message thread below
227 * to avoid two threads from using the BSOCK structure at
230 if (!bnet_fsend(jcr->store_bsock, "run")) {
235 * Now start a Storage daemon message thread. Note,
236 * this thread is used to provide the catalog services
237 * for the backup job, including inserting the attributes
238 * into the catalog. See catalog_update() in catreq.c
240 if (!start_storage_daemon_message_thread(jcr)) {
243 Dmsg0(150, "Storage daemon connection OK\n");
245 set_jcr_job_status(jcr, JS_WaitFD);
246 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
250 set_jcr_job_status(jcr, JS_Running);
251 fd = jcr->file_bsock;
253 if (!send_include_list(jcr)) {
257 if (!send_exclude_list(jcr)) {
261 if (!send_level_command(jcr)) {
266 * send Storage daemon address to the File daemon
269 if (store->SDDport == 0) {
270 store->SDDport = store->SDport;
273 /* TLS Requirement */
274 if (store->tls_enable) {
275 if (store->tls_require) {
276 tls_need = BNET_TLS_REQUIRED;
278 tls_need = BNET_TLS_OK;
282 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
283 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
287 if (!send_runscripts_commands(jcr)) {
292 * We re-update the job start record so that the start
293 * time is set after the run before job. This avoids
294 * that any files created by the run before job will
295 * be saved twice. They will be backed up in the current
296 * job, but not in the next one unless they are changed.
297 * Without this, they will be backed up in this job and
298 * in the next job run because in that case, their date
299 * is after the start of this run.
301 jcr->start_time = time(NULL);
302 jcr->jr.StartTime = jcr->start_time;
303 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
304 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
308 * If backup is in accurate mode, we send the list of
311 if (!send_accurate_current_files(jcr)) {
315 /* Send backup command */
316 fd->fsend(backupcmd);
317 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
321 /* Pickup Job termination data */
322 stat = wait_for_job_termination(jcr);
323 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
324 if (stat == JS_Terminated) {
325 backup_cleanup(jcr, stat);
330 /* Come here only after starting SD thread */
332 set_jcr_job_status(jcr, JS_ErrorTerminated);
333 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
335 wait_for_job_termination(jcr, FDConnectTimeout);
336 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
342 * Here we wait for the File daemon to signal termination,
343 * then we wait for the Storage daemon. When both
344 * are done, we return the job status.
345 * Also used by restore.c
347 int wait_for_job_termination(JCR *jcr, int timeout)
350 BSOCK *fd = jcr->file_bsock;
352 uint32_t JobFiles, JobErrors;
353 uint32_t JobWarnings = 0;
354 uint64_t ReadBytes = 0;
355 uint64_t JobBytes = 0;
360 set_jcr_job_status(jcr, JS_Running);
364 tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */
366 /* Wait for Client to terminate */
367 while ((n = bget_dirmsg(fd)) >= 0) {
369 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
370 &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 ||
371 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
372 &ReadBytes, &JobBytes, &JobErrors) == 5)) {
374 set_jcr_job_status(jcr, jcr->FDJobStatus);
375 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
377 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
380 if (job_canceled(jcr)) {
385 stop_bsock_timer(tid);
388 if (is_bnet_error(fd)) {
389 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
390 job_type_to_str(jcr->get_JobType()), fd->bstrerror());
392 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
395 /* Force cancel in SD if failing */
396 if (job_canceled(jcr) || !fd_ok) {
397 cancel_storage_daemon_job(jcr);
400 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/JobErrors */
401 wait_for_storage_daemon_termination(jcr);
403 /* Return values from FD */
405 jcr->JobFiles = JobFiles;
406 jcr->JobErrors += JobErrors; /* Keep total errors */
407 jcr->ReadBytes = ReadBytes;
408 jcr->JobBytes = JobBytes;
409 jcr->JobWarnings = JobWarnings;
411 jcr->Encrypt = Encrypt;
413 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
416 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
417 // jcr->JobStatus, jcr->SDJobStatus);
419 /* Return the first error status we find Dir, FD, or SD */
420 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
421 jcr->FDJobStatus = JS_ErrorTerminated;
423 if (jcr->JobStatus != JS_Terminated) {
424 return jcr->JobStatus;
426 if (jcr->FDJobStatus != JS_Terminated) {
427 return jcr->FDJobStatus;
429 return jcr->SDJobStatus;
433 * Release resources allocated during backup.
435 void backup_cleanup(JCR *jcr, int TermCode)
437 char sdt[50], edt[50], schedt[50];
438 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
439 char ec6[30], ec7[30], ec8[30], elapsed[50];
440 char term_code[100], fd_term_msg[100], sd_term_msg[100];
441 const char *term_msg;
442 int msg_type = M_INFO;
445 double kbps, compression;
448 if (jcr->get_JobLevel() == L_VIRTUAL_FULL) {
449 vbackup_cleanup(jcr, TermCode);
453 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
454 memset(&mr, 0, sizeof(mr));
455 memset(&cr, 0, sizeof(cr));
457 update_job_end(jcr, TermCode);
459 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
460 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
461 db_strerror(jcr->db));
462 set_jcr_job_status(jcr, JS_ErrorTerminated);
465 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
466 if (!db_get_client_record(jcr, jcr->db, &cr)) {
467 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
468 db_strerror(jcr->db));
471 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
472 if (!db_get_media_record(jcr, jcr->db, &mr)) {
473 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
474 mr.VolumeName, db_strerror(jcr->db));
475 set_jcr_job_status(jcr, JS_ErrorTerminated);
478 update_bootstrap_file(jcr);
480 switch (jcr->JobStatus) {
482 if (jcr->JobErrors || jcr->SDErrors) {
483 term_msg = _("Backup OK -- with warnings");
485 term_msg = _("Backup OK");
489 term_msg = _("Backup OK -- with warnings");
492 case JS_ErrorTerminated:
493 term_msg = _("*** Backup Error ***");
494 msg_type = M_ERROR; /* Generate error message */
495 if (jcr->store_bsock) {
496 jcr->store_bsock->signal(BNET_TERMINATE);
497 if (jcr->SD_msg_chan) {
498 pthread_cancel(jcr->SD_msg_chan);
503 term_msg = _("Backup Canceled");
504 if (jcr->store_bsock) {
505 jcr->store_bsock->signal(BNET_TERMINATE);
506 if (jcr->SD_msg_chan) {
507 pthread_cancel(jcr->SD_msg_chan);
512 term_msg = term_code;
513 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
516 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
517 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
518 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
519 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
523 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
525 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
527 * Note, if the job has erred, most likely it did not write any
528 * tape, so suppress this "error" message since in that case
529 * it is normal. Or look at it the other way, only for a
530 * normal exit should we complain about this error.
532 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
533 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
535 jcr->VolumeName[0] = 0; /* none */
538 if (jcr->ReadBytes == 0) {
539 bstrncpy(compress, "None", sizeof(compress));
541 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
542 if (compression < 0.5) {
543 bstrncpy(compress, "None", sizeof(compress));
545 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
548 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
549 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
551 // bmicrosleep(15, 0); /* for debugging SIGHUP */
553 Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n"
554 " Build OS: %s %s %s\n"
557 " Backup Level: %s%s\n"
558 " Client: \"%s\" %s\n"
559 " FileSet: \"%s\" %s\n"
560 " Pool: \"%s\" (From %s)\n"
561 " Catalog: \"%s\" (From %s)\n"
562 " Storage: \"%s\" (From %s)\n"
563 " Scheduled time: %s\n"
566 " Elapsed time: %s\n"
568 " FD Files Written: %s\n"
569 " SD Files Written: %s\n"
570 " FD Bytes Written: %s (%sB)\n"
571 " SD Bytes Written: %s (%sB)\n"
573 " Software Compression: %s\n"
577 " Volume name(s): %s\n"
578 " Volume Session Id: %d\n"
579 " Volume Session Time: %d\n"
580 " Last Volume Bytes: %s (%sB)\n"
581 " Non-fatal FD errors: %d\n"
583 " FD termination status: %s\n"
584 " SD termination status: %s\n"
585 " Termination: %s\n\n"),
586 BACULA, my_name, VERSION, LSMDATE, edt,
587 HOST_OS, DISTNAME, DISTVER,
590 level_to_str(jcr->get_JobLevel()), jcr->since,
591 jcr->client->name(), cr.Uname,
592 jcr->fileset->name(), jcr->FSCreateTime,
593 jcr->pool->name(), jcr->pool_source,
594 jcr->catalog->name(), jcr->catalog_source,
595 jcr->wstore->name(), jcr->wstore_source,
599 edit_utime(RunTime, elapsed, sizeof(elapsed)),
601 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
602 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
603 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
604 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
605 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
606 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
609 jcr->VSS?_("yes"):_("no"),
610 jcr->Encrypt?_("yes"):_("no"),
611 jcr->accurate?_("yes"):_("no"),
615 edit_uint64_with_commas(mr.VolBytes, ec7),
616 edit_uint64_with_suffix(mr.VolBytes, ec8),
623 Dmsg0(100, "Leave backup_cleanup()\n");
626 void update_bootstrap_file(JCR *jcr)
628 /* Now update the bootstrap file if any */
629 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
630 jcr->job->WriteBootstrap) {
634 POOLMEM *fname = get_pool_memory(PM_FNAME);
635 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
637 VOL_PARAMS *VolParams = NULL;
639 char edt[50], ed1[50], ed2[50];
643 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
644 fd = bpipe ? bpipe->wfd : NULL;
646 /* ***FIXME*** handle BASE */
647 fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b");
650 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
653 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
654 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
655 if (jcr->SDJobFiles != 0) {
656 set_jcr_job_status(jcr, JS_ErrorTerminated);
660 /* Start output with when and who wrote it */
661 bstrftimes(edt, sizeof(edt), time(NULL));
662 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
663 level_to_str(jcr->get_JobLevel()), jcr->since);
664 for (int i=0; i < VolCount; i++) {
665 /* Write the record */
666 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
667 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
668 if (VolParams[i].Slot > 0) {
669 fprintf(fd, "Slot=%d\n", VolParams[i].Slot);
671 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
672 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
673 fprintf(fd, "VolAddr=%s-%s\n",
674 edit_uint64(VolParams[i].StartAddr, ed1),
675 edit_uint64(VolParams[i].EndAddr, ed2));
676 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
677 VolParams[i].LastIndex);
689 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
690 "%s: ERR=%s\n"), fname, be.bstrerror());
691 set_jcr_job_status(jcr, JS_ErrorTerminated);
693 free_pool_memory(fname);