3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2006 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
53 if (!get_or_create_fileset_record(jcr)) {
58 * Get definitive Job level and since time
60 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
63 * Apply any level related Pool selections
65 switch (jcr->JobLevel) {
68 jcr->pool = jcr->full_pool;
73 jcr->pool = jcr->inc_pool;
78 jcr->pool = jcr->dif_pool;
82 memset(&pr, 0, sizeof(pr));
83 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
85 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
86 /* Try to create the pool */
87 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
88 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
89 db_strerror(jcr->db));
92 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
93 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
94 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
95 db_strerror(jcr->db));
100 jcr->jr.PoolId = pr.PoolId;
102 /* If pool storage specified, use it instead of job storage */
103 copy_storage(jcr, jcr->pool->storage);
106 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
110 create_clones(jcr); /* run any clone jobs */
116 * Do a backup of the specified FileSet
118 * Returns: false on failure
121 bool do_backup(JCR *jcr)
124 int tls_need = BNET_TLS_NONE;
130 /* Print Job Start message */
131 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
132 edit_uint64(jcr->JobId, ed1), jcr->Job);
134 set_jcr_job_status(jcr, JS_Running);
135 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
136 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
137 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
142 * Open a message channel connection with the Storage
143 * daemon. This is to let him know that our client
144 * will be contacting him for a backup session.
147 Dmsg0(110, "Open connection with storage daemon\n");
148 set_jcr_job_status(jcr, JS_WaitSD);
150 * Start conversation with Storage daemon
152 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
156 * Now start a job with the Storage daemon
158 if (!start_storage_daemon_job(jcr, NULL, jcr->storage)) {
162 * Now start a Storage daemon message thread. Note,
163 * this thread is used to provide the catalog services
164 * for the backup job, including inserting the attributes
165 * into the catalog. See catalog_update() in catreq.c
167 if (!start_storage_daemon_message_thread(jcr)) {
170 Dmsg0(150, "Storage daemon connection OK\n");
172 if (!bnet_fsend(jcr->store_bsock, "run")) {
176 set_jcr_job_status(jcr, JS_WaitFD);
177 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
181 set_jcr_job_status(jcr, JS_Running);
182 fd = jcr->file_bsock;
184 if (!send_include_list(jcr)) {
188 if (!send_exclude_list(jcr)) {
192 if (!send_level_command(jcr)) {
197 * send Storage daemon address to the File daemon
200 if (store->SDDport == 0) {
201 store->SDDport = store->SDport;
204 /* TLS Requirement */
205 if (store->tls_enable) {
206 if (store->tls_require) {
207 tls_need = BNET_TLS_REQUIRED;
209 tls_need = BNET_TLS_OK;
213 bnet_fsend(fd, storaddr, store->address, store->SDDport, tls_need);
214 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
219 if (!send_run_before_and_after_commands(jcr)) {
224 * We re-update the job start record so that the start
225 * time is set after the run before job. This avoids
226 * that any files created by the run before job will
227 * be saved twice. They will be backed up in the current
228 * job, but not in the next one unless they are changed.
229 * Without this, they will be backed up in this job and
230 * in the next job run because in that case, their date
231 * is after the start of this run.
233 jcr->start_time = time(NULL);
234 jcr->jr.StartTime = jcr->start_time;
235 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
236 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
239 /* Send backup command */
240 bnet_fsend(fd, backupcmd);
241 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
245 /* Pickup Job termination data */
246 stat = wait_for_job_termination(jcr);
247 if (stat == JS_Terminated) {
248 backup_cleanup(jcr, stat);
253 /* Come here only after starting SD thread */
255 set_jcr_job_status(jcr, JS_ErrorTerminated);
256 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
257 wait_for_storage_daemon_termination(jcr);
258 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
264 * Here we wait for the File daemon to signal termination,
265 * then we wait for the Storage daemon. When both
266 * are done, we return the job status.
267 * Also used by restore.c
269 int wait_for_job_termination(JCR *jcr)
272 BSOCK *fd = jcr->file_bsock;
274 uint32_t JobFiles, Errors;
275 uint64_t ReadBytes, JobBytes;
277 set_jcr_job_status(jcr, JS_Running);
278 /* Wait for Client to terminate */
279 while ((n = bget_dirmsg(fd)) >= 0) {
280 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
281 &ReadBytes, &JobBytes, &Errors) == 5) {
283 set_jcr_job_status(jcr, jcr->FDJobStatus);
284 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
286 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
289 if (job_canceled(jcr)) {
293 if (is_bnet_error(fd)) {
294 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
295 job_type_to_str(jcr->JobType), bnet_strerror(fd));
297 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
299 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
300 wait_for_storage_daemon_termination(jcr);
303 /* Return values from FD */
305 jcr->JobFiles = JobFiles;
306 jcr->Errors = Errors;
307 jcr->ReadBytes = ReadBytes;
308 jcr->JobBytes = JobBytes;
310 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
313 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
314 // jcr->JobStatus, jcr->SDJobStatus);
316 /* Return the first error status we find Dir, FD, or SD */
317 if (!fd_ok || is_bnet_error(fd)) {
318 jcr->FDJobStatus = JS_ErrorTerminated;
320 if (jcr->JobStatus != JS_Terminated) {
321 return jcr->JobStatus;
323 if (jcr->FDJobStatus != JS_Terminated) {
324 return jcr->FDJobStatus;
326 return jcr->SDJobStatus;
330 * Release resources allocated during backup.
332 void backup_cleanup(JCR *jcr, int TermCode)
334 char sdt[50], edt[50], schedt[50];
335 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
336 char ec6[30], ec7[30], ec8[30], elapsed[50];
337 char term_code[100], fd_term_msg[100], sd_term_msg[100];
338 const char *term_msg;
342 double kbps, compression;
345 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
346 dequeue_messages(jcr); /* display any queued messages */
347 memset(&mr, 0, sizeof(mr));
348 memset(&cr, 0, sizeof(cr));
349 set_jcr_job_status(jcr, TermCode);
351 update_job_end_record(jcr); /* update database */
353 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
354 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
355 db_strerror(jcr->db));
356 set_jcr_job_status(jcr, JS_ErrorTerminated);
359 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
360 if (!db_get_client_record(jcr, jcr->db, &cr)) {
361 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
362 db_strerror(jcr->db));
365 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
366 if (!db_get_media_record(jcr, jcr->db, &mr)) {
367 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
368 mr.VolumeName, db_strerror(jcr->db));
369 set_jcr_job_status(jcr, JS_ErrorTerminated);
372 update_bootstrap_file(jcr);
374 msg_type = M_INFO; /* by default INFO message */
375 switch (jcr->JobStatus) {
377 if (jcr->Errors || jcr->SDErrors) {
378 term_msg = _("Backup OK -- with warnings");
380 term_msg = _("Backup OK");
384 case JS_ErrorTerminated:
385 term_msg = _("*** Backup Error ***");
386 msg_type = M_ERROR; /* Generate error message */
387 if (jcr->store_bsock) {
388 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
389 if (jcr->SD_msg_chan) {
390 pthread_cancel(jcr->SD_msg_chan);
395 term_msg = _("Backup Canceled");
396 if (jcr->store_bsock) {
397 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
398 if (jcr->SD_msg_chan) {
399 pthread_cancel(jcr->SD_msg_chan);
404 term_msg = term_code;
405 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
408 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
409 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
410 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
411 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
415 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
417 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
419 * Note, if the job has erred, most likely it did not write any
420 * tape, so suppress this "error" message since in that case
421 * it is normal. Or look at it the other way, only for a
422 * normal exit should we complain about this error.
424 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
425 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
427 jcr->VolumeName[0] = 0; /* none */
430 if (jcr->ReadBytes == 0) {
431 bstrncpy(compress, "None", sizeof(compress));
433 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
434 if (compression < 0.5) {
435 bstrncpy(compress, "None", sizeof(compress));
437 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
440 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
441 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
443 // bmicrosleep(15, 0); /* for debugging SIGHUP */
445 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
448 " Backup Level: %s%s\n"
449 " Client: \"%s\" %s\n"
450 " FileSet: \"%s\" %s\n"
453 " Scheduled time: %s\n"
456 " Elapsed time: %s\n"
458 " FD Files Written: %s\n"
459 " SD Files Written: %s\n"
460 " FD Bytes Written: %s (%sB)\n"
461 " SD Bytes Written: %s (%sB)\n"
463 " Software Compression: %s\n"
464 " Volume name(s): %s\n"
465 " Volume Session Id: %d\n"
466 " Volume Session Time: %d\n"
467 " Last Volume Bytes: %s (%sB)\n"
468 " Non-fatal FD errors: %d\n"
470 " FD termination status: %s\n"
471 " SD termination status: %s\n"
472 " Termination: %s\n\n"),
478 level_to_str(jcr->JobLevel), jcr->since,
479 jcr->client->hdr.name, cr.Uname,
480 jcr->fileset->hdr.name, jcr->FSCreateTime,
482 jcr->store->hdr.name,
486 edit_utime(RunTime, elapsed, sizeof(elapsed)),
488 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
489 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
490 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
491 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
492 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
493 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
499 edit_uint64_with_commas(mr.VolBytes, ec7),
500 edit_uint64_with_suffix(mr.VolBytes, ec8),
507 Dmsg0(100, "Leave backup_cleanup()\n");
510 void update_bootstrap_file(JCR *jcr)
512 /* Now update the bootstrap file if any */
513 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
514 jcr->job->WriteBootstrap) {
518 char *fname = jcr->job->WriteBootstrap;
519 VOL_PARAMS *VolParams = NULL;
526 bpipe = open_bpipe(fname, 0, "w");
527 fd = bpipe ? bpipe->wfd : NULL;
529 /* ***FIXME*** handle BASE */
530 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
533 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
536 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
537 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
538 if (jcr->SDJobFiles != 0) {
539 set_jcr_job_status(jcr, JS_ErrorTerminated);
543 /* Start output with when and who wrote it */
544 bstrftimes(edt, sizeof(edt), time(NULL));
545 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
546 level_to_str(jcr->JobLevel), jcr->since);
547 for (int i=0; i < VolCount; i++) {
548 /* Write the record */
549 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
550 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
551 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
552 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
553 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
554 VolParams[i].EndFile);
555 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
556 VolParams[i].EndBlock);
557 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
558 VolParams[i].LastIndex);
570 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
571 "%s: ERR=%s\n"), fname, be.strerror());
572 set_jcr_job_status(jcr, JS_ErrorTerminated);