3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2006 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
52 if (!get_or_create_fileset_record(jcr)) {
57 * Get definitive Job level and since time
59 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
61 apply_pool_overrides(jcr);
63 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->hdr.name);
64 if (jcr->jr.PoolId == 0) {
68 /* If pool storage specified, use it instead of job storage */
69 copy_storage(jcr, jcr->pool->storage, _("Pool resource"));
72 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
76 create_clones(jcr); /* run any clone jobs */
82 * Do a backup of the specified FileSet
84 * Returns: false on failure
87 bool do_backup(JCR *jcr)
90 int tls_need = BNET_TLS_NONE;
96 /* Print Job Start message */
97 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
98 edit_uint64(jcr->JobId, ed1), jcr->Job);
100 set_jcr_job_status(jcr, JS_Running);
101 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
102 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
103 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
108 * Open a message channel connection with the Storage
109 * daemon. This is to let him know that our client
110 * will be contacting him for a backup session.
113 Dmsg0(110, "Open connection with storage daemon\n");
114 set_jcr_job_status(jcr, JS_WaitSD);
116 * Start conversation with Storage daemon
118 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
122 * Now start a job with the Storage daemon
124 if (!start_storage_daemon_job(jcr, NULL, jcr->storage)) {
129 * Start the job prior to starting the message thread below
130 * to avoid two threads from using the BSOCK structure at
133 if (!bnet_fsend(jcr->store_bsock, "run")) {
138 * Now start a Storage daemon message thread. Note,
139 * this thread is used to provide the catalog services
140 * for the backup job, including inserting the attributes
141 * into the catalog. See catalog_update() in catreq.c
143 if (!start_storage_daemon_message_thread(jcr)) {
146 Dmsg0(150, "Storage daemon connection OK\n");
148 set_jcr_job_status(jcr, JS_WaitFD);
149 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
153 set_jcr_job_status(jcr, JS_Running);
154 fd = jcr->file_bsock;
156 if (!send_include_list(jcr)) {
160 if (!send_exclude_list(jcr)) {
164 if (!send_level_command(jcr)) {
169 * send Storage daemon address to the File daemon
172 if (store->SDDport == 0) {
173 store->SDDport = store->SDport;
176 /* TLS Requirement */
177 if (store->tls_enable) {
178 if (store->tls_require) {
179 tls_need = BNET_TLS_REQUIRED;
181 tls_need = BNET_TLS_OK;
185 bnet_fsend(fd, storaddr, store->address, store->SDDport, tls_need);
186 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
191 if (!send_runscripts_commands(jcr)) {
196 * We re-update the job start record so that the start
197 * time is set after the run before job. This avoids
198 * that any files created by the run before job will
199 * be saved twice. They will be backed up in the current
200 * job, but not in the next one unless they are changed.
201 * Without this, they will be backed up in this job and
202 * in the next job run because in that case, their date
203 * is after the start of this run.
205 jcr->start_time = time(NULL);
206 jcr->jr.StartTime = jcr->start_time;
207 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
208 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
211 /* Send backup command */
212 bnet_fsend(fd, backupcmd);
213 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
217 /* Pickup Job termination data */
218 stat = wait_for_job_termination(jcr);
219 if (stat == JS_Terminated) {
220 backup_cleanup(jcr, stat);
225 /* Come here only after starting SD thread */
227 set_jcr_job_status(jcr, JS_ErrorTerminated);
228 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
229 wait_for_storage_daemon_termination(jcr);
230 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
236 * Here we wait for the File daemon to signal termination,
237 * then we wait for the Storage daemon. When both
238 * are done, we return the job status.
239 * Also used by restore.c
241 int wait_for_job_termination(JCR *jcr)
244 BSOCK *fd = jcr->file_bsock;
246 uint32_t JobFiles, Errors;
247 uint64_t ReadBytes, JobBytes;
249 set_jcr_job_status(jcr, JS_Running);
250 /* Wait for Client to terminate */
251 while ((n = bget_dirmsg(fd)) >= 0) {
252 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
253 &ReadBytes, &JobBytes, &Errors) == 5) {
255 set_jcr_job_status(jcr, jcr->FDJobStatus);
256 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
258 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
261 if (job_canceled(jcr)) {
265 if (is_bnet_error(fd)) {
266 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
267 job_type_to_str(jcr->JobType), bnet_strerror(fd));
269 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
271 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
272 wait_for_storage_daemon_termination(jcr);
275 /* Return values from FD */
277 jcr->JobFiles = JobFiles;
278 jcr->Errors = Errors;
279 jcr->ReadBytes = ReadBytes;
280 jcr->JobBytes = JobBytes;
282 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
285 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
286 // jcr->JobStatus, jcr->SDJobStatus);
288 /* Return the first error status we find Dir, FD, or SD */
289 if (!fd_ok || is_bnet_error(fd)) {
290 jcr->FDJobStatus = JS_ErrorTerminated;
292 if (jcr->JobStatus != JS_Terminated) {
293 return jcr->JobStatus;
295 if (jcr->FDJobStatus != JS_Terminated) {
296 return jcr->FDJobStatus;
298 return jcr->SDJobStatus;
302 * Release resources allocated during backup.
304 void backup_cleanup(JCR *jcr, int TermCode)
306 char sdt[50], edt[50], schedt[50];
307 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
308 char ec6[30], ec7[30], ec8[30], elapsed[50];
309 char term_code[100], fd_term_msg[100], sd_term_msg[100];
310 const char *term_msg;
314 double kbps, compression;
317 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
318 dequeue_messages(jcr); /* display any queued messages */
319 memset(&mr, 0, sizeof(mr));
320 memset(&cr, 0, sizeof(cr));
321 set_jcr_job_status(jcr, TermCode);
323 update_job_end_record(jcr); /* update database */
325 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
326 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
327 db_strerror(jcr->db));
328 set_jcr_job_status(jcr, JS_ErrorTerminated);
331 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
332 if (!db_get_client_record(jcr, jcr->db, &cr)) {
333 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
334 db_strerror(jcr->db));
337 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
338 if (!db_get_media_record(jcr, jcr->db, &mr)) {
339 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
340 mr.VolumeName, db_strerror(jcr->db));
341 set_jcr_job_status(jcr, JS_ErrorTerminated);
344 update_bootstrap_file(jcr);
346 msg_type = M_INFO; /* by default INFO message */
347 switch (jcr->JobStatus) {
349 if (jcr->Errors || jcr->SDErrors) {
350 term_msg = _("Backup OK -- with warnings");
352 term_msg = _("Backup OK");
356 case JS_ErrorTerminated:
357 term_msg = _("*** Backup Error ***");
358 msg_type = M_ERROR; /* Generate error message */
359 if (jcr->store_bsock) {
360 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
361 if (jcr->SD_msg_chan) {
362 pthread_cancel(jcr->SD_msg_chan);
367 term_msg = _("Backup Canceled");
368 if (jcr->store_bsock) {
369 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
370 if (jcr->SD_msg_chan) {
371 pthread_cancel(jcr->SD_msg_chan);
376 term_msg = term_code;
377 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
380 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
381 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
382 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
383 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
387 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
389 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
391 * Note, if the job has erred, most likely it did not write any
392 * tape, so suppress this "error" message since in that case
393 * it is normal. Or look at it the other way, only for a
394 * normal exit should we complain about this error.
396 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
397 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
399 jcr->VolumeName[0] = 0; /* none */
402 if (jcr->ReadBytes == 0) {
403 bstrncpy(compress, "None", sizeof(compress));
405 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
406 if (compression < 0.5) {
407 bstrncpy(compress, "None", sizeof(compress));
409 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
412 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
413 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
415 // bmicrosleep(15, 0); /* for debugging SIGHUP */
417 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
420 " Backup Level: %s%s\n"
421 " Client: \"%s\" %s\n"
422 " FileSet: \"%s\" %s\n"
423 " Pool: \"%s\" (From %s)\n"
424 " Storage: \"%s\" (From %s)\n"
425 " Scheduled time: %s\n"
428 " Elapsed time: %s\n"
430 " FD Files Written: %s\n"
431 " SD Files Written: %s\n"
432 " FD Bytes Written: %s (%sB)\n"
433 " SD Bytes Written: %s (%sB)\n"
435 " Software Compression: %s\n"
436 " Volume name(s): %s\n"
437 " Volume Session Id: %d\n"
438 " Volume Session Time: %d\n"
439 " Last Volume Bytes: %s (%sB)\n"
440 " Non-fatal FD errors: %d\n"
442 " FD termination status: %s\n"
443 " SD termination status: %s\n"
444 " Termination: %s\n\n"),
450 level_to_str(jcr->JobLevel), jcr->since,
451 jcr->client->hdr.name, cr.Uname,
452 jcr->fileset->hdr.name, jcr->FSCreateTime,
453 jcr->pool->hdr.name, jcr->pool_source,
454 jcr->store->hdr.name, jcr->storage_source,
458 edit_utime(RunTime, elapsed, sizeof(elapsed)),
460 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
461 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
462 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
463 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
464 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
465 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
471 edit_uint64_with_commas(mr.VolBytes, ec7),
472 edit_uint64_with_suffix(mr.VolBytes, ec8),
479 Dmsg0(100, "Leave backup_cleanup()\n");
482 void update_bootstrap_file(JCR *jcr)
484 /* Now update the bootstrap file if any */
485 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
486 jcr->job->WriteBootstrap) {
490 char *fname = jcr->job->WriteBootstrap;
491 VOL_PARAMS *VolParams = NULL;
498 bpipe = open_bpipe(fname, 0, "w");
499 fd = bpipe ? bpipe->wfd : NULL;
501 /* ***FIXME*** handle BASE */
502 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+b":"a+b");
505 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
508 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
509 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
510 if (jcr->SDJobFiles != 0) {
511 set_jcr_job_status(jcr, JS_ErrorTerminated);
515 /* Start output with when and who wrote it */
516 bstrftimes(edt, sizeof(edt), time(NULL));
517 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
518 level_to_str(jcr->JobLevel), jcr->since);
519 for (int i=0; i < VolCount; i++) {
520 /* Write the record */
521 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
522 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
523 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
524 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
525 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
526 VolParams[i].EndFile);
527 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
528 VolParams[i].EndBlock);
529 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
530 VolParams[i].LastIndex);
542 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
543 "%s: ERR=%s\n"), fname, be.strerror());
544 set_jcr_job_status(jcr, JS_ErrorTerminated);