3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2006 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
53 if (!get_or_create_fileset_record(jcr)) {
58 * Get definitive Job level and since time
60 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
63 * Apply any level related Pool selections
65 switch (jcr->JobLevel) {
68 jcr->pool = jcr->full_pool;
73 jcr->pool = jcr->inc_pool;
78 jcr->pool = jcr->dif_pool;
82 memset(&pr, 0, sizeof(pr));
83 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
85 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
86 /* Try to create the pool */
87 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
88 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
89 db_strerror(jcr->db));
92 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
93 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
94 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
95 db_strerror(jcr->db));
100 jcr->jr.PoolId = pr.PoolId;
102 /* If pool storage specified, use it instead of job storage */
103 copy_storage(jcr, jcr->pool->storage);
106 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
110 create_clones(jcr); /* run any clone jobs */
116 * Do a backup of the specified FileSet
118 * Returns: false on failure
121 bool do_backup(JCR *jcr)
124 int tls_need = BNET_TLS_NONE;
130 /* Print Job Start message */
131 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
132 edit_uint64(jcr->JobId, ed1), jcr->Job);
134 set_jcr_job_status(jcr, JS_Running);
135 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
136 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
137 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
142 * Open a message channel connection with the Storage
143 * daemon. This is to let him know that our client
144 * will be contacting him for a backup session.
147 Dmsg0(110, "Open connection with storage daemon\n");
148 set_jcr_job_status(jcr, JS_WaitSD);
150 * Start conversation with Storage daemon
152 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
156 * Now start a job with the Storage daemon
158 if (!start_storage_daemon_job(jcr, NULL, jcr->storage)) {
163 * Start the job prior to starting the message thread below
164 * to avoid two threads from using the BSOCK structure at
167 if (!bnet_fsend(jcr->store_bsock, "run")) {
172 * Now start a Storage daemon message thread. Note,
173 * this thread is used to provide the catalog services
174 * for the backup job, including inserting the attributes
175 * into the catalog. See catalog_update() in catreq.c
177 if (!start_storage_daemon_message_thread(jcr)) {
180 Dmsg0(150, "Storage daemon connection OK\n");
182 set_jcr_job_status(jcr, JS_WaitFD);
183 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
187 set_jcr_job_status(jcr, JS_Running);
188 fd = jcr->file_bsock;
190 if (!send_include_list(jcr)) {
194 if (!send_exclude_list(jcr)) {
198 if (!send_level_command(jcr)) {
203 * send Storage daemon address to the File daemon
206 if (store->SDDport == 0) {
207 store->SDDport = store->SDport;
210 /* TLS Requirement */
211 if (store->tls_enable) {
212 if (store->tls_require) {
213 tls_need = BNET_TLS_REQUIRED;
215 tls_need = BNET_TLS_OK;
219 bnet_fsend(fd, storaddr, store->address, store->SDDport, tls_need);
220 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
225 if (!send_runscripts_commands(jcr)) {
230 * We re-update the job start record so that the start
231 * time is set after the run before job. This avoids
232 * that any files created by the run before job will
233 * be saved twice. They will be backed up in the current
234 * job, but not in the next one unless they are changed.
235 * Without this, they will be backed up in this job and
236 * in the next job run because in that case, their date
237 * is after the start of this run.
239 jcr->start_time = time(NULL);
240 jcr->jr.StartTime = jcr->start_time;
241 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
242 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
245 /* Send backup command */
246 bnet_fsend(fd, backupcmd);
247 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
251 /* Pickup Job termination data */
252 stat = wait_for_job_termination(jcr);
253 if (stat == JS_Terminated) {
254 backup_cleanup(jcr, stat);
259 /* Come here only after starting SD thread */
261 set_jcr_job_status(jcr, JS_ErrorTerminated);
262 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
263 wait_for_storage_daemon_termination(jcr);
264 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
270 * Here we wait for the File daemon to signal termination,
271 * then we wait for the Storage daemon. When both
272 * are done, we return the job status.
273 * Also used by restore.c
275 int wait_for_job_termination(JCR *jcr)
278 BSOCK *fd = jcr->file_bsock;
280 uint32_t JobFiles, Errors;
281 uint64_t ReadBytes, JobBytes;
283 set_jcr_job_status(jcr, JS_Running);
284 /* Wait for Client to terminate */
285 while ((n = bget_dirmsg(fd)) >= 0) {
286 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
287 &ReadBytes, &JobBytes, &Errors) == 5) {
289 set_jcr_job_status(jcr, jcr->FDJobStatus);
290 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
292 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
295 if (job_canceled(jcr)) {
299 if (is_bnet_error(fd)) {
300 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
301 job_type_to_str(jcr->JobType), bnet_strerror(fd));
303 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
305 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
306 wait_for_storage_daemon_termination(jcr);
309 /* Return values from FD */
311 jcr->JobFiles = JobFiles;
312 jcr->Errors = Errors;
313 jcr->ReadBytes = ReadBytes;
314 jcr->JobBytes = JobBytes;
316 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
319 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
320 // jcr->JobStatus, jcr->SDJobStatus);
322 /* Return the first error status we find Dir, FD, or SD */
323 if (!fd_ok || is_bnet_error(fd)) {
324 jcr->FDJobStatus = JS_ErrorTerminated;
326 if (jcr->JobStatus != JS_Terminated) {
327 return jcr->JobStatus;
329 if (jcr->FDJobStatus != JS_Terminated) {
330 return jcr->FDJobStatus;
332 return jcr->SDJobStatus;
336 * Release resources allocated during backup.
338 void backup_cleanup(JCR *jcr, int TermCode)
340 char sdt[50], edt[50], schedt[50];
341 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
342 char ec6[30], ec7[30], ec8[30], elapsed[50];
343 char term_code[100], fd_term_msg[100], sd_term_msg[100];
344 const char *term_msg;
348 double kbps, compression;
351 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
352 dequeue_messages(jcr); /* display any queued messages */
353 memset(&mr, 0, sizeof(mr));
354 memset(&cr, 0, sizeof(cr));
355 set_jcr_job_status(jcr, TermCode);
357 update_job_end_record(jcr); /* update database */
359 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
360 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
361 db_strerror(jcr->db));
362 set_jcr_job_status(jcr, JS_ErrorTerminated);
365 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
366 if (!db_get_client_record(jcr, jcr->db, &cr)) {
367 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
368 db_strerror(jcr->db));
371 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
372 if (!db_get_media_record(jcr, jcr->db, &mr)) {
373 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
374 mr.VolumeName, db_strerror(jcr->db));
375 set_jcr_job_status(jcr, JS_ErrorTerminated);
378 update_bootstrap_file(jcr);
380 msg_type = M_INFO; /* by default INFO message */
381 switch (jcr->JobStatus) {
383 if (jcr->Errors || jcr->SDErrors) {
384 term_msg = _("Backup OK -- with warnings");
386 term_msg = _("Backup OK");
390 case JS_ErrorTerminated:
391 term_msg = _("*** Backup Error ***");
392 msg_type = M_ERROR; /* Generate error message */
393 if (jcr->store_bsock) {
394 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
395 if (jcr->SD_msg_chan) {
396 pthread_cancel(jcr->SD_msg_chan);
401 term_msg = _("Backup Canceled");
402 if (jcr->store_bsock) {
403 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
404 if (jcr->SD_msg_chan) {
405 pthread_cancel(jcr->SD_msg_chan);
410 term_msg = term_code;
411 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
414 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
415 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
416 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
417 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
421 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
423 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
425 * Note, if the job has erred, most likely it did not write any
426 * tape, so suppress this "error" message since in that case
427 * it is normal. Or look at it the other way, only for a
428 * normal exit should we complain about this error.
430 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
431 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
433 jcr->VolumeName[0] = 0; /* none */
436 if (jcr->ReadBytes == 0) {
437 bstrncpy(compress, "None", sizeof(compress));
439 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
440 if (compression < 0.5) {
441 bstrncpy(compress, "None", sizeof(compress));
443 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
446 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
447 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
449 // bmicrosleep(15, 0); /* for debugging SIGHUP */
451 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
454 " Backup Level: %s%s\n"
455 " Client: \"%s\" %s\n"
456 " FileSet: \"%s\" %s\n"
459 " Scheduled time: %s\n"
462 " Elapsed time: %s\n"
464 " FD Files Written: %s\n"
465 " SD Files Written: %s\n"
466 " FD Bytes Written: %s (%sB)\n"
467 " SD Bytes Written: %s (%sB)\n"
469 " Software Compression: %s\n"
470 " Volume name(s): %s\n"
471 " Volume Session Id: %d\n"
472 " Volume Session Time: %d\n"
473 " Last Volume Bytes: %s (%sB)\n"
474 " Non-fatal FD errors: %d\n"
476 " FD termination status: %s\n"
477 " SD termination status: %s\n"
478 " Termination: %s\n\n"),
484 level_to_str(jcr->JobLevel), jcr->since,
485 jcr->client->hdr.name, cr.Uname,
486 jcr->fileset->hdr.name, jcr->FSCreateTime,
488 jcr->store->hdr.name,
492 edit_utime(RunTime, elapsed, sizeof(elapsed)),
494 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
495 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
496 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
497 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
498 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
499 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
505 edit_uint64_with_commas(mr.VolBytes, ec7),
506 edit_uint64_with_suffix(mr.VolBytes, ec8),
513 Dmsg0(100, "Leave backup_cleanup()\n");
516 void update_bootstrap_file(JCR *jcr)
518 /* Now update the bootstrap file if any */
519 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
520 jcr->job->WriteBootstrap) {
524 char *fname = jcr->job->WriteBootstrap;
525 VOL_PARAMS *VolParams = NULL;
532 bpipe = open_bpipe(fname, 0, "w");
533 fd = bpipe ? bpipe->wfd : NULL;
535 /* ***FIXME*** handle BASE */
536 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
539 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
542 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
543 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
544 if (jcr->SDJobFiles != 0) {
545 set_jcr_job_status(jcr, JS_ErrorTerminated);
549 /* Start output with when and who wrote it */
550 bstrftimes(edt, sizeof(edt), time(NULL));
551 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
552 level_to_str(jcr->JobLevel), jcr->since);
553 for (int i=0; i < VolCount; i++) {
554 /* Write the record */
555 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
556 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
557 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
558 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
559 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
560 VolParams[i].EndFile);
561 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
562 VolParams[i].EndBlock);
563 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
564 VolParams[i].LastIndex);
576 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
577 "%s: ERR=%s\n"), fname, be.strerror());
578 set_jcr_job_status(jcr, JS_ErrorTerminated);