3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Bacula® - The Network Backup Solution
19 Copyright (C) 2000-2006 Free Software Foundation Europe e.V.
21 The main author of Bacula is Kern Sibbald, with contributions from
22 many others, a complete list can be found in the file AUTHORS.
23 This program is Free Software; you can redistribute it and/or
24 modify it under the terms of version two of the GNU General Public
25 License as published by the Free Software Foundation plus additions
26 that are listed in the file LICENSE.
28 This program is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31 General Public License for more details.
33 You should have received a copy of the GNU General Public License
34 along with this program; if not, write to the Free Software
35 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
38 Bacula® is a registered trademark of John Walker.
39 The licensor of Bacula is the Free Software Foundation Europe
40 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
41 Switzerland, email:ftf@fsfeurope.org.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%llu JobBytes=%llu Errors=%u "
57 "VSS=%d Encrypt=%d\n";
58 /* Pre 1.39.29 (04Dec06) EndJob */
59 static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
60 "ReadBytes=%llu JobBytes=%llu Errors=%u\n";
62 * Called here before the job is run to do the job
65 bool do_backup_init(JCR *jcr)
68 free_rstorage(jcr); /* we don't read so release */
70 if (!get_or_create_fileset_record(jcr)) {
75 * Get definitive Job level and since time
77 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
79 apply_pool_overrides(jcr);
81 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->name());
82 if (jcr->jr.PoolId == 0) {
86 /* If pool storage specified, use it instead of job storage */
87 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
90 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
94 create_clones(jcr); /* run any clone jobs */
100 * Do a backup of the specified FileSet
102 * Returns: false on failure
105 bool do_backup(JCR *jcr)
108 int tls_need = BNET_TLS_NONE;
114 /* Print Job Start message */
115 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
116 edit_uint64(jcr->JobId, ed1), jcr->Job);
118 set_jcr_job_status(jcr, JS_Running);
119 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
120 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
121 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
126 * Open a message channel connection with the Storage
127 * daemon. This is to let him know that our client
128 * will be contacting him for a backup session.
131 Dmsg0(110, "Open connection with storage daemon\n");
132 set_jcr_job_status(jcr, JS_WaitSD);
134 * Start conversation with Storage daemon
136 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
140 * Now start a job with the Storage daemon
142 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
147 * Start the job prior to starting the message thread below
148 * to avoid two threads from using the BSOCK structure at
151 if (!bnet_fsend(jcr->store_bsock, "run")) {
156 * Now start a Storage daemon message thread. Note,
157 * this thread is used to provide the catalog services
158 * for the backup job, including inserting the attributes
159 * into the catalog. See catalog_update() in catreq.c
161 if (!start_storage_daemon_message_thread(jcr)) {
164 Dmsg0(150, "Storage daemon connection OK\n");
166 set_jcr_job_status(jcr, JS_WaitFD);
167 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
171 set_jcr_job_status(jcr, JS_Running);
172 fd = jcr->file_bsock;
174 if (!send_level_command(jcr)) {
178 if (!send_runscripts_commands(jcr)) {
182 if (!send_include_list(jcr)) {
186 if (!send_exclude_list(jcr)) {
191 * send Storage daemon address to the File daemon
194 if (store->SDDport == 0) {
195 store->SDDport = store->SDport;
198 /* TLS Requirement */
199 if (store->tls_enable) {
200 if (store->tls_require) {
201 tls_need = BNET_TLS_REQUIRED;
203 tls_need = BNET_TLS_OK;
207 bnet_fsend(fd, storaddr, store->address, store->SDDport, tls_need);
208 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
213 * We re-update the job start record so that the start
214 * time is set after the run before job. This avoids
215 * that any files created by the run before job will
216 * be saved twice. They will be backed up in the current
217 * job, but not in the next one unless they are changed.
218 * Without this, they will be backed up in this job and
219 * in the next job run because in that case, their date
220 * is after the start of this run.
222 jcr->start_time = time(NULL);
223 jcr->jr.StartTime = jcr->start_time;
224 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
225 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
228 /* Send backup command */
229 bnet_fsend(fd, backupcmd);
230 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
234 /* Pickup Job termination data */
235 stat = wait_for_job_termination(jcr);
236 if (stat == JS_Terminated) {
237 backup_cleanup(jcr, stat);
242 /* Come here only after starting SD thread */
244 set_jcr_job_status(jcr, JS_ErrorTerminated);
245 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
246 wait_for_storage_daemon_termination(jcr);
247 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
253 * Here we wait for the File daemon to signal termination,
254 * then we wait for the Storage daemon. When both
255 * are done, we return the job status.
256 * Also used by restore.c
258 int wait_for_job_termination(JCR *jcr)
261 BSOCK *fd = jcr->file_bsock;
263 uint32_t JobFiles, Errors;
264 uint64_t ReadBytes = 0;
265 uint64_t JobBytes = 0;
269 set_jcr_job_status(jcr, JS_Running);
270 /* Wait for Client to terminate */
271 while ((n = bget_dirmsg(fd)) >= 0) {
273 (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
274 &ReadBytes, &JobBytes, &Errors, &VSS, &Encrypt) == 7 ||
275 sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles,
276 &ReadBytes, &JobBytes, &Errors) == 5)) {
278 set_jcr_job_status(jcr, jcr->FDJobStatus);
279 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
281 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
284 if (job_canceled(jcr)) {
289 if (is_bnet_error(fd)) {
290 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
291 job_type_to_str(jcr->JobType), bnet_strerror(fd));
293 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
295 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
296 wait_for_storage_daemon_termination(jcr);
299 /* Return values from FD */
301 jcr->JobFiles = JobFiles;
302 jcr->Errors = Errors;
303 jcr->ReadBytes = ReadBytes;
304 jcr->JobBytes = JobBytes;
306 jcr->Encrypt = Encrypt;
308 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
311 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
312 // jcr->JobStatus, jcr->SDJobStatus);
314 /* Return the first error status we find Dir, FD, or SD */
315 if (!fd_ok || is_bnet_error(fd)) {
316 jcr->FDJobStatus = JS_ErrorTerminated;
318 if (jcr->JobStatus != JS_Terminated) {
319 return jcr->JobStatus;
321 if (jcr->FDJobStatus != JS_Terminated) {
322 return jcr->FDJobStatus;
324 return jcr->SDJobStatus;
328 * Release resources allocated during backup.
330 void backup_cleanup(JCR *jcr, int TermCode)
332 char sdt[50], edt[50], schedt[50];
333 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
334 char ec6[30], ec7[30], ec8[30], elapsed[50];
335 char term_code[100], fd_term_msg[100], sd_term_msg[100];
336 const char *term_msg;
337 int msg_type = M_INFO;
340 double kbps, compression;
343 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
344 memset(&mr, 0, sizeof(mr));
345 memset(&cr, 0, sizeof(cr));
347 update_job_end(jcr, TermCode);
349 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
350 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
351 db_strerror(jcr->db));
352 set_jcr_job_status(jcr, JS_ErrorTerminated);
355 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
356 if (!db_get_client_record(jcr, jcr->db, &cr)) {
357 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
358 db_strerror(jcr->db));
361 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
362 if (!db_get_media_record(jcr, jcr->db, &mr)) {
363 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
364 mr.VolumeName, db_strerror(jcr->db));
365 set_jcr_job_status(jcr, JS_ErrorTerminated);
368 update_bootstrap_file(jcr);
370 switch (jcr->JobStatus) {
372 if (jcr->Errors || jcr->SDErrors) {
373 term_msg = _("Backup OK -- with warnings");
375 term_msg = _("Backup OK");
379 case JS_ErrorTerminated:
380 term_msg = _("*** Backup Error ***");
381 msg_type = M_ERROR; /* Generate error message */
382 if (jcr->store_bsock) {
383 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
384 if (jcr->SD_msg_chan) {
385 pthread_cancel(jcr->SD_msg_chan);
390 term_msg = _("Backup Canceled");
391 if (jcr->store_bsock) {
392 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
393 if (jcr->SD_msg_chan) {
394 pthread_cancel(jcr->SD_msg_chan);
399 term_msg = term_code;
400 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
403 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
404 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
405 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
406 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
410 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
412 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
414 * Note, if the job has erred, most likely it did not write any
415 * tape, so suppress this "error" message since in that case
416 * it is normal. Or look at it the other way, only for a
417 * normal exit should we complain about this error.
419 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
420 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
422 jcr->VolumeName[0] = 0; /* none */
425 if (jcr->ReadBytes == 0) {
426 bstrncpy(compress, "None", sizeof(compress));
428 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
429 if (compression < 0.5) {
430 bstrncpy(compress, "None", sizeof(compress));
432 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
435 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
436 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
438 // bmicrosleep(15, 0); /* for debugging SIGHUP */
440 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
443 " Backup Level: %s%s\n"
444 " Client: \"%s\" %s\n"
445 " FileSet: \"%s\" %s\n"
446 " Pool: \"%s\" (From %s)\n"
447 " Storage: \"%s\" (From %s)\n"
448 " Scheduled time: %s\n"
451 " Elapsed time: %s\n"
453 " FD Files Written: %s\n"
454 " SD Files Written: %s\n"
455 " FD Bytes Written: %s (%sB)\n"
456 " SD Bytes Written: %s (%sB)\n"
458 " Software Compression: %s\n"
461 " Volume name(s): %s\n"
462 " Volume Session Id: %d\n"
463 " Volume Session Time: %d\n"
464 " Last Volume Bytes: %s (%sB)\n"
465 " Non-fatal FD errors: %d\n"
467 " FD termination status: %s\n"
468 " SD termination status: %s\n"
469 " Termination: %s\n\n"),
475 level_to_str(jcr->JobLevel), jcr->since,
476 jcr->client->name(), cr.Uname,
477 jcr->fileset->name(), jcr->FSCreateTime,
478 jcr->pool->name(), jcr->pool_source,
479 jcr->wstore->name(), jcr->wstore_source,
483 edit_utime(RunTime, elapsed, sizeof(elapsed)),
485 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
486 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
487 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
488 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
489 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
490 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
494 jcr->Encrypt?"yes":"no",
498 edit_uint64_with_commas(mr.VolBytes, ec7),
499 edit_uint64_with_suffix(mr.VolBytes, ec8),
506 Dmsg0(100, "Leave backup_cleanup()\n");
509 void update_bootstrap_file(JCR *jcr)
511 /* Now update the bootstrap file if any */
512 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
513 jcr->job->WriteBootstrap) {
517 POOLMEM *fname = get_pool_memory(PM_FNAME);
518 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
520 VOL_PARAMS *VolParams = NULL;
526 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
527 fd = bpipe ? bpipe->wfd : NULL;
529 /* ***FIXME*** handle BASE */
530 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+b":"a+b");
533 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
536 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
537 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
538 if (jcr->SDJobFiles != 0) {
539 set_jcr_job_status(jcr, JS_ErrorTerminated);
543 /* Start output with when and who wrote it */
544 bstrftimes(edt, sizeof(edt), time(NULL));
545 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
546 level_to_str(jcr->JobLevel), jcr->since);
547 for (int i=0; i < VolCount; i++) {
548 /* Write the record */
549 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
550 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
551 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
552 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
553 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
554 VolParams[i].EndFile);
555 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
556 VolParams[i].EndBlock);
557 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
558 VolParams[i].LastIndex);
570 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
571 "%s: ERR=%s\n"), fname, be.strerror());
572 set_jcr_job_status(jcr, JS_ErrorTerminated);
574 free_pool_memory(fname);