2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2008 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Bacula Director -- backup.c -- responsible for doing backup jobs
32 * Kern Sibbald, March MM
34 * Basic tasks done here:
35 * Open DB and create records for this job.
36 * Open Message Channel with Storage daemon to tell him a job will be starting.
37 * Open connection with File daemon and pass him commands
39 * When the File daemon finishes the job, update the DB.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 free_rstorage(jcr); /* we don't read so release */
70 if (!get_or_create_fileset_record(jcr)) {
75 * Get definitive Job level and since time
77 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
79 apply_pool_overrides(jcr);
81 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
82 if (jcr->jr.PoolId == 0) {
86 /* If pool storage specified, use it instead of job storage */
87 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
90 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
94 create_clones(jcr); /* run any clone jobs */
100 * Do a backup of the specified FileSet
102 * Returns: false on failure
105 bool do_backup(JCR *jcr)
108 int tls_need = BNET_TLS_NONE;
114 /* Print Job Start message */
115 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
116 edit_uint64(jcr->JobId, ed1), jcr->Job);
118 set_jcr_job_status(jcr, JS_Running);
119 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
120 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
121 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
126 * Open a message channel connection with the Storage
127 * daemon. This is to let him know that our client
128 * will be contacting him for a backup session.
131 Dmsg0(110, "Open connection with storage daemon\n");
132 set_jcr_job_status(jcr, JS_WaitSD);
134 * Start conversation with Storage daemon
136 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
140 * Now start a job with the Storage daemon
142 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
147 * Start the job prior to starting the message thread below
148 * to avoid two threads from using the BSOCK structure at
151 if (!bnet_fsend(jcr->store_bsock, "run")) {
156 * Now start a Storage daemon message thread. Note,
157 * this thread is used to provide the catalog services
158 * for the backup job, including inserting the attributes
159 * into the catalog. See catalog_update() in catreq.c
161 if (!start_storage_daemon_message_thread(jcr)) {
164 Dmsg0(150, "Storage daemon connection OK\n");
166 set_jcr_job_status(jcr, JS_WaitFD);
167 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
171 set_jcr_job_status(jcr, JS_Running);
172 fd = jcr->file_bsock;
174 if (!send_include_list(jcr)) {
178 if (!send_exclude_list(jcr)) {
182 if (!send_level_command(jcr)) {
187 * send Storage daemon address to the File daemon
190 if (store->SDDport == 0) {
191 store->SDDport = store->SDport;
194 /* TLS Requirement */
195 if (store->tls_enable) {
196 if (store->tls_require) {
197 tls_need = BNET_TLS_REQUIRED;
199 tls_need = BNET_TLS_OK;
203 fd->fsend(storaddr, store->address, store->SDDport, tls_need);
204 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
208 if (!send_runscripts_commands(jcr)) {
213 * We re-update the job start record so that the start
214 * time is set after the run before job. This avoids
215 * that any files created by the run before job will
216 * be saved twice. They will be backed up in the current
217 * job, but not in the next one unless they are changed.
218 * Without this, they will be backed up in this job and
219 * in the next job run because in that case, their date
220 * is after the start of this run.
222 jcr->start_time = time(NULL);
223 jcr->jr.StartTime = jcr->start_time;
224 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
225 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
228 /* Send backup command */
229 fd->fsend(backupcmd);
230 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
234 /* Pickup Job termination data */
235 stat = wait_for_job_termination(jcr);
236 db_write_batch_file_records(jcr); /* used by bulk batch file insert */
237 if (stat == JS_Terminated) {
238 backup_cleanup(jcr, stat);
243 /* Come here only after starting SD thread */
245 set_jcr_job_status(jcr, JS_ErrorTerminated);
246 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
248 wait_for_job_termination(jcr, FDConnectTimeout);
249 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
255 * Here we wait for the File daemon to signal termination,
256 * then we wait for the Storage daemon. When both
257 * are done, we return the job status.
258 * Also used by restore.c
260 int wait_for_job_termination(JCR *jcr, int timeout)
263 BSOCK *fd = jcr->file_bsock;
265 uint32_t JobFiles, Errors;
266 uint64_t ReadBytes = 0;
267 uint64_t JobBytes = 0;
272 set_jcr_job_status(jcr, JS_Running);
276 tid = start_bsock_timer(fd, timeout); /* TODO: use user timeout */
278 /* Wait for Client to terminate */
279 while ((n = bget_dirmsg(fd)) >= 0) {
281 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
282 &ReadBytes, &JobBytes, &Errors, &VSS, &Encrypt) == 7 ||
283 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
284 &ReadBytes, &JobBytes, &Errors) == 5)) {
286 set_jcr_job_status(jcr, jcr->FDJobStatus);
287 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
289 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
292 if (job_canceled(jcr)) {
297 stop_bsock_timer(tid);
300 if (is_bnet_error(fd)) {
301 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
302 job_type_to_str(jcr->JobType), fd->bstrerror());
304 fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
307 /* Force cancel in SD if failing */
308 if (job_canceled(jcr) || !fd_ok) {
309 cancel_storage_daemon_job(jcr);
312 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
313 wait_for_storage_daemon_termination(jcr);
315 /* Return values from FD */
317 jcr->JobFiles = JobFiles;
318 jcr->Errors = Errors;
319 jcr->ReadBytes = ReadBytes;
320 jcr->JobBytes = JobBytes;
322 jcr->Encrypt = Encrypt;
324 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
327 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
328 // jcr->JobStatus, jcr->SDJobStatus);
330 /* Return the first error status we find Dir, FD, or SD */
331 if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */
332 jcr->FDJobStatus = JS_ErrorTerminated;
334 if (jcr->JobStatus != JS_Terminated) {
335 return jcr->JobStatus;
337 if (jcr->FDJobStatus != JS_Terminated) {
338 return jcr->FDJobStatus;
340 return jcr->SDJobStatus;
344 * Release resources allocated during backup.
346 void backup_cleanup(JCR *jcr, int TermCode)
348 char sdt[50], edt[50], schedt[50];
349 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
350 char ec6[30], ec7[30], ec8[30], elapsed[50];
351 char term_code[100], fd_term_msg[100], sd_term_msg[100];
352 const char *term_msg;
353 int msg_type = M_INFO;
356 double kbps, compression;
359 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
360 memset(&mr, 0, sizeof(mr));
361 memset(&cr, 0, sizeof(cr));
363 update_job_end(jcr, TermCode);
365 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
366 Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"),
367 db_strerror(jcr->db));
368 set_jcr_job_status(jcr, JS_ErrorTerminated);
371 bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name));
372 if (!db_get_client_record(jcr, jcr->db, &cr)) {
373 Jmsg(jcr, M_WARNING, 0, _("Error getting Client record for Job report: ERR=%s"),
374 db_strerror(jcr->db));
377 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
378 if (!db_get_media_record(jcr, jcr->db, &mr)) {
379 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
380 mr.VolumeName, db_strerror(jcr->db));
381 set_jcr_job_status(jcr, JS_ErrorTerminated);
384 update_bootstrap_file(jcr);
386 switch (jcr->JobStatus) {
388 if (jcr->Errors || jcr->SDErrors) {
389 term_msg = _("Backup OK -- with warnings");
391 term_msg = _("Backup OK");
395 case JS_ErrorTerminated:
396 term_msg = _("*** Backup Error ***");
397 msg_type = M_ERROR; /* Generate error message */
398 if (jcr->store_bsock) {
399 jcr->store_bsock->signal(BNET_TERMINATE);
400 if (jcr->SD_msg_chan) {
401 pthread_cancel(jcr->SD_msg_chan);
406 term_msg = _("Backup Canceled");
407 if (jcr->store_bsock) {
408 jcr->store_bsock->signal(BNET_TERMINATE);
409 if (jcr->SD_msg_chan) {
410 pthread_cancel(jcr->SD_msg_chan);
415 term_msg = term_code;
416 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
419 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
420 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
421 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
422 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
426 kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime);
428 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
430 * Note, if the job has erred, most likely it did not write any
431 * tape, so suppress this "error" message since in that case
432 * it is normal. Or look at it the other way, only for a
433 * normal exit should we complain about this error.
435 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
436 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
438 jcr->VolumeName[0] = 0; /* none */
441 if (jcr->ReadBytes == 0) {
442 bstrncpy(compress, "None", sizeof(compress));
444 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
445 if (compression < 0.5) {
446 bstrncpy(compress, "None", sizeof(compress));
448 bsnprintf(compress, sizeof(compress), "%.1f %%", compression);
451 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
452 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
454 // bmicrosleep(15, 0); /* for debugging SIGHUP */
456 Jmsg(jcr, msg_type, 0, _("Bacula %s %s (%s): %s\n"
457 " Build OS: %s %s %s\n"
460 " Backup Level: %s%s\n"
461 " Client: \"%s\" %s\n"
462 " FileSet: \"%s\" %s\n"
463 " Pool: \"%s\" (From %s)\n"
464 " Storage: \"%s\" (From %s)\n"
465 " Scheduled time: %s\n"
468 " Elapsed time: %s\n"
470 " FD Files Written: %s\n"
471 " SD Files Written: %s\n"
472 " FD Bytes Written: %s (%sB)\n"
473 " SD Bytes Written: %s (%sB)\n"
475 " Software Compression: %s\n"
478 " Volume name(s): %s\n"
479 " Volume Session Id: %d\n"
480 " Volume Session Time: %d\n"
481 " Last Volume Bytes: %s (%sB)\n"
482 " Non-fatal FD errors: %d\n"
484 " FD termination status: %s\n"
485 " SD termination status: %s\n"
486 " Termination: %s\n\n"),
487 my_name, VERSION, LSMDATE, edt,
488 HOST_OS, DISTNAME, DISTVER,
491 level_to_str(jcr->JobLevel), jcr->since,
492 jcr->client->name(), cr.Uname,
493 jcr->fileset->name(), jcr->FSCreateTime,
494 jcr->pool->name(), jcr->pool_source,
495 jcr->wstore->name(), jcr->wstore_source,
499 edit_utime(RunTime, elapsed, sizeof(elapsed)),
501 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
502 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
503 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
504 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
505 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
506 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
510 jcr->Encrypt?"yes":"no",
514 edit_uint64_with_commas(mr.VolBytes, ec7),
515 edit_uint64_with_suffix(mr.VolBytes, ec8),
522 Dmsg0(100, "Leave backup_cleanup()\n");
525 void update_bootstrap_file(JCR *jcr)
527 /* Now update the bootstrap file if any */
528 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
529 jcr->job->WriteBootstrap) {
533 POOLMEM *fname = get_pool_memory(PM_FNAME);
534 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
536 VOL_PARAMS *VolParams = NULL;
542 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
543 fd = bpipe ? bpipe->wfd : NULL;
545 /* ***FIXME*** handle BASE */
546 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+b":"a+b");
549 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
552 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
553 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
554 if (jcr->SDJobFiles != 0) {
555 set_jcr_job_status(jcr, JS_ErrorTerminated);
559 /* Start output with when and who wrote it */
560 bstrftimes(edt, sizeof(edt), time(NULL));
561 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
562 level_to_str(jcr->JobLevel), jcr->since);
563 for (int i=0; i < VolCount; i++) {
564 /* Write the record */
565 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
566 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
567 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
568 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
569 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
570 VolParams[i].EndFile);
571 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
572 VolParams[i].EndBlock);
573 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
574 VolParams[i].LastIndex);
586 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
587 "%s: ERR=%s\n"), fname, be.bstrerror());
588 set_jcr_job_status(jcr, JS_ErrorTerminated);
590 free_pool_memory(fname);