3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2006 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
53 if (!get_or_create_fileset_record(jcr)) {
58 * Get definitive Job level and since time
60 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
63 * Apply any level related Pool selections
65 switch (jcr->JobLevel) {
68 jcr->pool = jcr->full_pool;
73 jcr->pool = jcr->inc_pool;
78 jcr->pool = jcr->dif_pool;
82 memset(&pr, 0, sizeof(pr));
83 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
85 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
86 /* Try to create the pool */
87 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
88 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
89 db_strerror(jcr->db));
92 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
93 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
94 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
95 db_strerror(jcr->db));
100 jcr->PoolId = pr.PoolId;
101 jcr->jr.PoolId = pr.PoolId;
104 * Fire off any clone jobs (run directives)
106 Dmsg2(900, "cloned=%d run_cmds=%p\n", jcr->cloned, jcr->job->run_cmds);
107 if (!jcr->cloned && jcr->job->run_cmds) {
110 POOLMEM *cmd = get_pool_memory(PM_FNAME);
111 UAContext *ua = new_ua_context(jcr);
113 foreach_alist(runcmd, job->run_cmds) {
114 cmd = edit_job_codes(jcr, cmd, runcmd, "");
115 Mmsg(ua->cmd, "run %s cloned=yes", cmd);
116 Dmsg1(900, "=============== Clone cmd=%s\n", ua->cmd);
117 parse_ua_args(ua); /* parse command */
118 int stat = run_cmd(ua, ua->cmd);
120 Jmsg(jcr, M_ERROR, 0, _("Could not start clone job.\n"));
122 Jmsg(jcr, M_INFO, 0, _("Clone JobId %d started.\n"), stat);
126 free_pool_memory(cmd);
133 * Do a backup of the specified FileSet
135 * Returns: false on failure
138 bool do_backup(JCR *jcr)
141 int tls_need = BNET_TLS_NONE;
147 /* Print Job Start message */
148 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
149 edit_uint64(jcr->JobId, ed1), jcr->Job);
151 set_jcr_job_status(jcr, JS_Running);
152 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
153 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
154 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
159 * Open a message channel connection with the Storage
160 * daemon. This is to let him know that our client
161 * will be contacting him for a backup session.
164 Dmsg0(110, "Open connection with storage daemon\n");
165 set_jcr_job_status(jcr, JS_WaitSD);
167 * Start conversation with Storage daemon
169 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
173 * Now start a job with the Storage daemon
175 if (!start_storage_daemon_job(jcr, NULL, jcr->storage)) {
179 * Now start a Storage daemon message thread. Note,
180 * this thread is used to provide the catalog services
181 * for the backup job, including inserting the attributes
182 * into the catalog. See catalog_update() in catreq.c
184 if (!start_storage_daemon_message_thread(jcr)) {
187 Dmsg0(150, "Storage daemon connection OK\n");
189 set_jcr_job_status(jcr, JS_WaitFD);
190 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
194 set_jcr_job_status(jcr, JS_Running);
195 fd = jcr->file_bsock;
197 if (!send_include_list(jcr)) {
201 if (!send_exclude_list(jcr)) {
205 if (!send_level_command(jcr)) {
210 * send Storage daemon address to the File daemon
213 if (store->SDDport == 0) {
214 store->SDDport = store->SDport;
217 /* TLS Requirement */
218 if (store->tls_enable) {
219 if (store->tls_require) {
220 tls_need = BNET_TLS_REQUIRED;
222 tls_need = BNET_TLS_OK;
226 bnet_fsend(fd, storaddr, store->address, store->SDDport,
228 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
233 if (!send_run_before_and_after_commands(jcr)) {
237 /* Send backup command */
238 bnet_fsend(fd, backupcmd);
239 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
243 /* Pickup Job termination data */
244 stat = wait_for_job_termination(jcr);
245 if (stat == JS_Terminated) {
246 backup_cleanup(jcr, stat);
254 * Here we wait for the File daemon to signal termination,
255 * then we wait for the Storage daemon. When both
256 * are done, we return the job status.
257 * Also used by restore.c
259 int wait_for_job_termination(JCR *jcr)
262 BSOCK *fd = jcr->file_bsock;
264 uint32_t JobFiles, Errors;
265 uint64_t ReadBytes, JobBytes;
267 set_jcr_job_status(jcr, JS_Running);
268 /* Wait for Client to terminate */
269 while ((n = bget_dirmsg(fd)) >= 0) {
270 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
271 &ReadBytes, &JobBytes, &Errors) == 5) {
273 set_jcr_job_status(jcr, jcr->FDJobStatus);
274 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
276 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
279 if (job_canceled(jcr)) {
283 if (is_bnet_error(fd)) {
284 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
285 job_type_to_str(jcr->JobType), bnet_strerror(fd));
287 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
289 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
290 wait_for_storage_daemon_termination(jcr);
293 /* Return values from FD */
295 jcr->JobFiles = JobFiles;
296 jcr->Errors = Errors;
297 jcr->ReadBytes = ReadBytes;
298 jcr->JobBytes = JobBytes;
300 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
303 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
304 // jcr->JobStatus, jcr->SDJobStatus);
306 /* Return the first error status we find Dir, FD, or SD */
307 if (!fd_ok || is_bnet_error(fd)) {
308 jcr->FDJobStatus = JS_ErrorTerminated;
310 if (jcr->JobStatus != JS_Terminated) {
311 return jcr->JobStatus;
313 if (jcr->FDJobStatus != JS_Terminated) {
314 return jcr->FDJobStatus;
316 return jcr->SDJobStatus;
320 * Release resources allocated during backup.
322 void backup_cleanup(JCR *jcr, int TermCode)
324 char sdt[50], edt[50], schedt[50];
325 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
326 char term_code[100], fd_term_msg[100], sd_term_msg[100];
327 const char *term_msg;
331 double kbps, compression;
334 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
335 dequeue_messages(jcr); /* display any queued messages */
336 memset(&mr, 0, sizeof(mr));
337 memset(&cr, 0, sizeof(cr));
338 set_jcr_job_status(jcr, TermCode);
340 update_job_end_record(jcr); /* update database */
342 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
343 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
344 db_strerror(jcr->db));
345 set_jcr_job_status(jcr, JS_ErrorTerminated);
348 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
349 if (!db_get_client_record(jcr, jcr->db, &cr)) {
350 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
351 db_strerror(jcr->db));
354 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
355 if (!db_get_media_record(jcr, jcr->db, &mr)) {
356 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
357 mr.VolumeName, db_strerror(jcr->db));
358 set_jcr_job_status(jcr, JS_ErrorTerminated);
361 /* Now update the bootstrap file if any */
362 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
363 jcr->job->WriteBootstrap) {
367 char *fname = jcr->job->WriteBootstrap;
368 VOL_PARAMS *VolParams = NULL;
374 bpipe = open_bpipe(fname, 0, "w");
375 fd = bpipe ? bpipe->wfd : NULL;
377 /* ***FIXME*** handle BASE */
378 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
381 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
384 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
385 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
386 if (jcr->SDJobFiles != 0) {
387 set_jcr_job_status(jcr, JS_ErrorTerminated);
391 /* Start output with when and who wrote it */
392 bstrftimes(edt, sizeof(edt), time(NULL));
393 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
394 level_to_str(jcr->JobLevel), jcr->since);
395 for (int i=0; i < VolCount; i++) {
396 /* Write the record */
397 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
398 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
399 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
400 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
401 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
402 VolParams[i].EndFile);
403 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
404 VolParams[i].EndBlock);
405 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
406 VolParams[i].LastIndex);
418 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
419 "%s: ERR=%s\n"), fname, be.strerror());
420 set_jcr_job_status(jcr, JS_ErrorTerminated);
424 msg_type = M_INFO; /* by default INFO message */
425 switch (jcr->JobStatus) {
427 if (jcr->Errors || jcr->SDErrors) {
428 term_msg = _("Backup OK -- with warnings");
430 term_msg = _("Backup OK");
434 case JS_ErrorTerminated:
435 term_msg = _("*** Backup Error ***");
436 msg_type = M_ERROR; /* Generate error message */
437 if (jcr->store_bsock) {
438 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
439 if (jcr->SD_msg_chan) {
440 pthread_cancel(jcr->SD_msg_chan);
445 term_msg = _("Backup Canceled");
446 if (jcr->store_bsock) {
447 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
448 if (jcr->SD_msg_chan) {
449 pthread_cancel(jcr->SD_msg_chan);
454 term_msg = term_code;
455 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
458 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
459 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
460 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
461 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
465 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
467 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
469 * Note, if the job has erred, most likely it did not write any
470 * tape, so suppress this "error" message since in that case
471 * it is normal. Or look at it the other way, only for a
472 * normal exit should we complain about this error.
474 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
475 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
477 jcr->VolumeName[0] = 0; /* none */
480 if (jcr->ReadBytes == 0) {
481 bstrncpy(compress, "None", sizeof(compress));
483 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
484 if (compression < 0.5) {
485 bstrncpy(compress, "None", sizeof(compress));
487 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
490 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
491 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
493 // bmicrosleep(15, 0); /* for debugging SIGHUP */
495 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
498 " Backup Level: %s%s\n"
499 " Client: \"%s\" %s\n"
500 " FileSet: \"%s\" %s\n"
503 " Scheduled time: %s\n"
507 " FD Files Written: %s\n"
508 " SD Files Written: %s\n"
509 " FD Bytes Written: %s\n"
510 " SD Bytes Written: %s\n"
512 " Software Compression: %s\n"
513 " Volume name(s): %s\n"
514 " Volume Session Id: %d\n"
515 " Volume Session Time: %d\n"
516 " Last Volume Bytes: %s\n"
517 " Non-fatal FD errors: %d\n"
519 " FD termination status: %s\n"
520 " SD termination status: %s\n"
521 " Termination: %s\n\n"),
527 level_to_str(jcr->JobLevel), jcr->since,
528 jcr->client->hdr.name, cr.Uname,
529 jcr->fileset->hdr.name, jcr->FSCreateTime,
531 jcr->store->hdr.name,
536 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
537 edit_uint64_with_commas(jcr->SDJobFiles, ec4),
538 edit_uint64_with_commas(jcr->jr.JobBytes, ec2),
539 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
545 edit_uint64_with_commas(mr.VolBytes, ec3),
552 Dmsg0(100, "Leave backup_cleanup()\n");