2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2008 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 free_rstorage(jcr); /* we don't read so release */
70 if (!get_or_create_fileset_record(jcr)) {
75 * Get definitive Job level and since time
77 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
79 apply_pool_overrides(jcr);
81 if (!allow_duplicate_job(jcr)) {
85 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
86 if (jcr->jr.PoolId == 0) {
90 /* If pool storage specified, use it instead of job storage */
91 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
94 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
98 create_clones(jcr); /* run any clone jobs */
104 * Foreach files in currrent list, send "/path/fname\0LStat" to FD
106 static int accurate_list_handler(void *ctx, int num_fields, char **row)
108 JCR *jcr = (JCR *)ctx;
110 if (job_canceled(jcr)) {
114 if (row[2] > 0) { /* discard when file_index == 0 */
115 jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]);
121 * Send current file list to FD
122 * DIR -> FD : accurate files=xxxx
123 * DIR -> FD : /path/to/file\0Lstat
124 * DIR -> FD : /path/to/dir/\0Lstat
128 bool send_accurate_current_files(JCR *jcr)
132 if (jcr->accurate==false || job_canceled(jcr) || jcr->JobLevel==L_FULL) {
135 POOLMEM *jobids = get_pool_memory(PM_FNAME);
136 db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, jobids);
139 free_pool_memory(jobids);
140 Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
143 Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n"));
145 /* to be able to allocate the right size for htable */
146 POOLMEM *nb = get_pool_memory(PM_FNAME);
147 *nb = 0; /* clear buffer */
148 Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
149 db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
150 Dmsg2(200, "jobids=%s nb=%s\n", jobids, nb);
151 jcr->file_bsock->fsend("accurate files=%s\n", nb);
153 db_get_file_list(jcr, jcr->db, jobids, accurate_list_handler, (void *)jcr);
155 free_pool_memory(jobids);
156 free_pool_memory(nb);
158 jcr->file_bsock->signal(BNET_EOD);
159 /* TODO: use response() ? */
165 * Do a backup of the specified FileSet
167 * Returns: false on failure
170 bool do_backup(JCR *jcr)
173 int tls_need = BNET_TLS_NONE;
179 /* Print Job Start message */
180 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
181 edit_uint64(jcr->JobId, ed1), jcr->Job);
183 set_jcr_job_status(jcr, JS_Running);
184 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
185 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
186 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
191 * Open a message channel connection with the Storage
192 * daemon. This is to let him know that our client
193 * will be contacting him for a backup session.
196 Dmsg0(110, "Open connection with storage daemon\n");
197 set_jcr_job_status(jcr, JS_WaitSD);
199 * Start conversation with Storage daemon
201 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
205 * Now start a job with the Storage daemon
207 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
212 * Start the job prior to starting the message thread below
213 * to avoid two threads from using the BSOCK structure at
216 if (!bnet_fsend(jcr->store_bsock, "run")) {
221 * Now start a Storage daemon message thread. Note,
222 * this thread is used to provide the catalog services
223 * for the backup job, including inserting the attributes
224 * into the catalog. See catalog_update() in catreq.c
226 if (!start_storage_daemon_message_thread(jcr)) {
229 Dmsg0(150, "Storage daemon connection OK\n");
231 set_jcr_job_status(jcr, JS_WaitFD);
232 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
236 set_jcr_job_status(jcr, JS_Running);
237 fd = jcr->file_bsock;
239 if (!send_include_list(jcr)) {
243 if (!send_exclude_list(jcr)) {
247 if (!send_level_command(jcr)) {
252 * send Storage daemon address to the File daemon
255 if (store->SDDport == 0) {
256 store->SDDport = store->SDport;
259 /* TLS Requirement */
260 if (store->tls_enable) {
261 if (store->tls_require) {
262 tls_need = BNET_TLS_REQUIRED;
264 tls_need = BNET_TLS_OK;
268 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
269 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
273 if (!send_runscripts_commands(jcr)) {
278 * We re-update the job start record so that the start
279 * time is set after the run before job. This avoids
280 * that any files created by the run before job will
281 * be saved twice. They will be backed up in the current
282 * job, but not in the next one unless they are changed.
283 * Without this, they will be backed up in this job and
284 * in the next job run because in that case, their date
285 * is after the start of this run.
287 jcr->start_time = time(NULL);
288 jcr->jr.StartTime = jcr->start_time;
289 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
290 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
294 * If backup is in accurate mode, we send the list of
297 if (!send_accurate_current_files(jcr)) {
301 /* Send backup command */
302 fd->fsend(backupcmd);
303 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
307 /* Pickup Job termination data */
308 stat = wait_for_job_termination(jcr);
309 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
310 if (stat == JS_Terminated) {
311 backup_cleanup(jcr, stat);
316 /* Come here only after starting SD thread */
318 set_jcr_job_status(jcr, JS_ErrorTerminated);
319 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
321 wait_for_job_termination(jcr, FDConnectTimeout);
322 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
328 * Here we wait for the File daemon to signal termination,
329 * then we wait for the Storage daemon. When both
330 * are done, we return the job status.
331 * Also used by restore.c
333 int wait_for_job_termination(JCR *jcr, int timeout)
336 BSOCK *fd = jcr->file_bsock;
338 uint32_t JobFiles, Errors;
339 uint64_t ReadBytes = 0;
340 uint64_t JobBytes = 0;
345 set_jcr_job_status(jcr, JS_Running);
349 tid = start_bsock_timer(fd, timeout); /* TODO: use user timeout */
351 /* Wait for Client to terminate */
352 while ((n = bget_dirmsg(fd)) >= 0) {
354 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
355 &ReadBytes, &JobBytes, &Errors, &VSS, &Encrypt) == 7 ||
356 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
357 &ReadBytes, &JobBytes, &Errors) == 5)) {
359 set_jcr_job_status(jcr, jcr->FDJobStatus);
360 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
362 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
365 if (job_canceled(jcr)) {
370 stop_bsock_timer(tid);
373 if (is_bnet_error(fd)) {
374 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
375 job_type_to_str(jcr->JobType), fd->bstrerror());
377 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
380 /* Force cancel in SD if failing */
381 if (job_canceled(jcr) || !fd_ok) {
382 cancel_storage_daemon_job(jcr);
385 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
386 wait_for_storage_daemon_termination(jcr);
388 /* Return values from FD */
390 jcr->JobFiles = JobFiles;
391 jcr->Errors = Errors;
392 jcr->ReadBytes = ReadBytes;
393 jcr->JobBytes = JobBytes;
395 jcr->Encrypt = Encrypt;
397 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
400 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
401 // jcr->JobStatus, jcr->SDJobStatus);
403 /* Return the first error status we find Dir, FD, or SD */
404 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
405 jcr->FDJobStatus = JS_ErrorTerminated;
407 if (jcr->JobStatus != JS_Terminated) {
408 return jcr->JobStatus;
410 if (jcr->FDJobStatus != JS_Terminated) {
411 return jcr->FDJobStatus;
413 return jcr->SDJobStatus;
417 * Release resources allocated during backup.
419 void backup_cleanup(JCR *jcr, int TermCode)
421 char sdt[50], edt[50], schedt[50];
422 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
423 char ec6[30], ec7[30], ec8[30], elapsed[50];
424 char term_code[100], fd_term_msg[100], sd_term_msg[100];
425 const char *term_msg;
426 int msg_type = M_INFO;
429 double kbps, compression;
432 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
433 memset(&mr, 0, sizeof(mr));
434 memset(&cr, 0, sizeof(cr));
436 update_job_end(jcr, TermCode);
438 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
439 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
440 db_strerror(jcr->db));
441 set_jcr_job_status(jcr, JS_ErrorTerminated);
444 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
445 if (!db_get_client_record(jcr, jcr->db, &cr)) {
446 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
447 db_strerror(jcr->db));
450 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
451 if (!db_get_media_record(jcr, jcr->db, &mr)) {
452 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
453 mr.VolumeName, db_strerror(jcr->db));
454 set_jcr_job_status(jcr, JS_ErrorTerminated);
457 update_bootstrap_file(jcr);
459 switch (jcr->JobStatus) {
461 if (jcr->Errors || jcr->SDErrors) {
462 term_msg = _("Backup OK -- with warnings");
464 term_msg = _("Backup OK");
468 case JS_ErrorTerminated:
469 term_msg = _("*** Backup Error ***");
470 msg_type = M_ERROR; /* Generate error message */
471 if (jcr->store_bsock) {
472 jcr->store_bsock->signal(BNET_TERMINATE);
473 if (jcr->SD_msg_chan) {
474 pthread_cancel(jcr->SD_msg_chan);
479 term_msg = _("Backup Canceled");
480 if (jcr->store_bsock) {
481 jcr->store_bsock->signal(BNET_TERMINATE);
482 if (jcr->SD_msg_chan) {
483 pthread_cancel(jcr->SD_msg_chan);
488 term_msg = term_code;
489 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
492 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
493 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
494 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
495 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
499 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
501 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
503 * Note, if the job has erred, most likely it did not write any
504 * tape, so suppress this "error" message since in that case
505 * it is normal. Or look at it the other way, only for a
506 * normal exit should we complain about this error.
508 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
509 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
511 jcr->VolumeName[0] = 0; /* none */
514 if (jcr->ReadBytes == 0) {
515 bstrncpy(compress, "None", sizeof(compress));
517 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
518 if (compression < 0.5) {
519 bstrncpy(compress, "None", sizeof(compress));
521 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
524 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
525 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
527 // bmicrosleep(15, 0); /* for debugging SIGHUP */
529 Jmsg(jcr, msg_type, 0, _("Bacula %s %s (%s): %s\n"
530 " Build OS: %s %s %s\n"
533 " Backup Level: %s%s\n"
534 " Client: \"%s\" %s\n"
535 " FileSet: \"%s\" %s\n"
536 " Pool: \"%s\" (From %s)\n"
537 " Catalog: \"%s\" (From %s)\n"
538 " Storage: \"%s\" (From %s)\n"
539 " Scheduled time: %s\n"
542 " Elapsed time: %s\n"
544 " FD Files Written: %s\n"
545 " SD Files Written: %s\n"
546 " FD Bytes Written: %s (%sB)\n"
547 " SD Bytes Written: %s (%sB)\n"
549 " Software Compression: %s\n"
553 " Volume name(s): %s\n"
554 " Volume Session Id: %d\n"
555 " Volume Session Time: %d\n"
556 " Last Volume Bytes: %s (%sB)\n"
557 " Non-fatal FD errors: %d\n"
559 " FD termination status: %s\n"
560 " SD termination status: %s\n"
561 " Termination: %s\n\n"),
562 my_name, VERSION, LSMDATE, edt,
563 HOST_OS, DISTNAME, DISTVER,
566 level_to_str(jcr->JobLevel), jcr->since,
567 jcr->client->name(), cr.Uname,
568 jcr->fileset->name(), jcr->FSCreateTime,
569 jcr->pool->name(), jcr->pool_source,
570 jcr->catalog->name(), jcr->catalog_source,
571 jcr->wstore->name(), jcr->wstore_source,
575 edit_utime(RunTime, elapsed, sizeof(elapsed)),
577 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
578 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
579 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
580 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
581 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
582 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
585 jcr->VSS?_("yes"):_("no"),
586 jcr->Encrypt?_("yes"):_("no"),
587 jcr->accurate?_("yes"):_("no"),
591 edit_uint64_with_commas(mr.VolBytes, ec7),
592 edit_uint64_with_suffix(mr.VolBytes, ec8),
599 Dmsg0(100, "Leave backup_cleanup()\n");
602 void update_bootstrap_file(JCR *jcr)
604 /* Now update the bootstrap file if any */
605 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
606 jcr->job->WriteBootstrap) {
610 POOLMEM *fname = get_pool_memory(PM_FNAME);
611 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
613 VOL_PARAMS *VolParams = NULL;
619 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
620 fd = bpipe ? bpipe->wfd : NULL;
622 /* ***FIXME*** handle BASE */
623 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+b":"a+b");
626 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
629 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
630 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
631 if (jcr->SDJobFiles != 0) {
632 set_jcr_job_status(jcr, JS_ErrorTerminated);
636 /* Start output with when and who wrote it */
637 bstrftimes(edt, sizeof(edt), time(NULL));
638 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
639 level_to_str(jcr->JobLevel), jcr->since);
640 for (int i=0; i < VolCount; i++) {
641 /* Write the record */
642 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
643 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
644 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
645 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
646 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
647 VolParams[i].EndFile);
648 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
649 VolParams[i].EndBlock);
650 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
651 VolParams[i].LastIndex);
663 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
664 "%s: ERR=%s\n"), fname, be.bstrerror());
665 set_jcr_job_status(jcr, JS_ErrorTerminated);
667 free_pool_memory(fname);