3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2006 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
53 if (!get_or_create_fileset_record(jcr)) {
58 * Get definitive Job level and since time
60 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
63 * Apply any level related Pool selections
65 switch (jcr->JobLevel) {
68 jcr->pool = jcr->full_pool;
73 jcr->pool = jcr->inc_pool;
78 jcr->pool = jcr->dif_pool;
82 memset(&pr, 0, sizeof(pr));
83 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
85 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
86 /* Try to create the pool */
87 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
88 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
89 db_strerror(jcr->db));
92 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
93 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
94 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
95 db_strerror(jcr->db));
100 jcr->PoolId = pr.PoolId;
101 jcr->jr.PoolId = pr.PoolId;
104 * Fire off any clone jobs (run directives)
106 Dmsg2(900, "cloned=%d run_cmds=%p\n", jcr->cloned, jcr->job->run_cmds);
107 if (!jcr->cloned && jcr->job->run_cmds) {
110 POOLMEM *cmd = get_pool_memory(PM_FNAME);
111 UAContext *ua = new_ua_context(jcr);
113 foreach_alist(runcmd, job->run_cmds) {
114 cmd = edit_job_codes(jcr, cmd, runcmd, "");
115 Mmsg(ua->cmd, "run %s cloned=yes", cmd);
116 Dmsg1(900, "=============== Clone cmd=%s\n", ua->cmd);
117 parse_ua_args(ua); /* parse command */
118 int stat = run_cmd(ua, ua->cmd);
120 Jmsg(jcr, M_ERROR, 0, _("Could not start clone job.\n"));
122 Jmsg(jcr, M_INFO, 0, _("Clone JobId %d started.\n"), stat);
126 free_pool_memory(cmd);
133 * Do a backup of the specified FileSet
135 * Returns: false on failure
138 bool do_backup(JCR *jcr)
141 int tls_need = BNET_TLS_NONE;
147 /* Print Job Start message */
148 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
149 edit_uint64(jcr->JobId, ed1), jcr->Job);
151 set_jcr_job_status(jcr, JS_Running);
152 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
153 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
154 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
159 * Open a message channel connection with the Storage
160 * daemon. This is to let him know that our client
161 * will be contacting him for a backup session.
164 Dmsg0(110, "Open connection with storage daemon\n");
165 set_jcr_job_status(jcr, JS_WaitSD);
167 * Start conversation with Storage daemon
169 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
173 * Now start a job with the Storage daemon
175 if (!start_storage_daemon_job(jcr, NULL, jcr->storage)) {
179 * Now start a Storage daemon message thread. Note,
180 * this thread is used to provide the catalog services
181 * for the backup job, including inserting the attributes
182 * into the catalog. See catalog_update() in catreq.c
184 if (!start_storage_daemon_message_thread(jcr)) {
187 Dmsg0(150, "Storage daemon connection OK\n");
189 if (!bnet_fsend(jcr->store_bsock, "run")) {
193 set_jcr_job_status(jcr, JS_WaitFD);
194 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
198 set_jcr_job_status(jcr, JS_Running);
199 fd = jcr->file_bsock;
201 if (!send_include_list(jcr)) {
205 if (!send_exclude_list(jcr)) {
209 if (!send_level_command(jcr)) {
214 * send Storage daemon address to the File daemon
217 if (store->SDDport == 0) {
218 store->SDDport = store->SDport;
221 /* TLS Requirement */
222 if (store->tls_enable) {
223 if (store->tls_require) {
224 tls_need = BNET_TLS_REQUIRED;
226 tls_need = BNET_TLS_OK;
230 bnet_fsend(fd, storaddr, store->address, store->SDDport,
232 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
237 if (!send_run_before_and_after_commands(jcr)) {
242 * We re-update the job start record so that the start
243 * time is set after the run before job. This avoids
244 * that any files created by the run before job will
245 * be saved twice. They will be backed up in the current
246 * job, but not in the next one unless they are changed.
247 * Without this, they will be backed up in this job and
248 * in the next job run because in that case, their date
249 * is after the start of this run.
251 jcr->start_time = time(NULL);
252 jcr->jr.StartTime = jcr->start_time;
253 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
254 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
257 /* Send backup command */
258 bnet_fsend(fd, backupcmd);
259 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
263 /* Pickup Job termination data */
264 stat = wait_for_job_termination(jcr);
265 if (stat == JS_Terminated) {
266 backup_cleanup(jcr, stat);
271 /* Come here only after starting SD thread */
273 set_jcr_job_status(jcr, JS_ErrorTerminated);
274 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
275 wait_for_storage_daemon_termination(jcr);
276 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
282 * Here we wait for the File daemon to signal termination,
283 * then we wait for the Storage daemon. When both
284 * are done, we return the job status.
285 * Also used by restore.c
287 int wait_for_job_termination(JCR *jcr)
290 BSOCK *fd = jcr->file_bsock;
292 uint32_t JobFiles, Errors;
293 uint64_t ReadBytes, JobBytes;
295 set_jcr_job_status(jcr, JS_Running);
296 /* Wait for Client to terminate */
297 while ((n = bget_dirmsg(fd)) >= 0) {
298 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
299 &ReadBytes, &JobBytes, &Errors) == 5) {
301 set_jcr_job_status(jcr, jcr->FDJobStatus);
302 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
304 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
307 if (job_canceled(jcr)) {
311 if (is_bnet_error(fd)) {
312 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
313 job_type_to_str(jcr->JobType), bnet_strerror(fd));
315 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
317 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
318 wait_for_storage_daemon_termination(jcr);
321 /* Return values from FD */
323 jcr->JobFiles = JobFiles;
324 jcr->Errors = Errors;
325 jcr->ReadBytes = ReadBytes;
326 jcr->JobBytes = JobBytes;
328 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
331 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
332 // jcr->JobStatus, jcr->SDJobStatus);
334 /* Return the first error status we find Dir, FD, or SD */
335 if (!fd_ok || is_bnet_error(fd)) {
336 jcr->FDJobStatus = JS_ErrorTerminated;
338 if (jcr->JobStatus != JS_Terminated) {
339 return jcr->JobStatus;
341 if (jcr->FDJobStatus != JS_Terminated) {
342 return jcr->FDJobStatus;
344 return jcr->SDJobStatus;
348 * Release resources allocated during backup.
350 void backup_cleanup(JCR *jcr, int TermCode)
352 char sdt[50], edt[50], schedt[50];
353 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
354 char ec6[30], ec7[30], ec8[30], elapsed[50];
355 char term_code[100], fd_term_msg[100], sd_term_msg[100];
356 const char *term_msg;
360 double kbps, compression;
363 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
364 dequeue_messages(jcr); /* display any queued messages */
365 memset(&mr, 0, sizeof(mr));
366 memset(&cr, 0, sizeof(cr));
367 set_jcr_job_status(jcr, TermCode);
369 update_job_end_record(jcr); /* update database */
371 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
372 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
373 db_strerror(jcr->db));
374 set_jcr_job_status(jcr, JS_ErrorTerminated);
377 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
378 if (!db_get_client_record(jcr, jcr->db, &cr)) {
379 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
380 db_strerror(jcr->db));
383 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
384 if (!db_get_media_record(jcr, jcr->db, &mr)) {
385 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
386 mr.VolumeName, db_strerror(jcr->db));
387 set_jcr_job_status(jcr, JS_ErrorTerminated);
390 update_bootstrap_file(jcr);
392 msg_type = M_INFO; /* by default INFO message */
393 switch (jcr->JobStatus) {
395 if (jcr->Errors || jcr->SDErrors) {
396 term_msg = _("Backup OK -- with warnings");
398 term_msg = _("Backup OK");
402 case JS_ErrorTerminated:
403 term_msg = _("*** Backup Error ***");
404 msg_type = M_ERROR; /* Generate error message */
405 if (jcr->store_bsock) {
406 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
407 if (jcr->SD_msg_chan) {
408 pthread_cancel(jcr->SD_msg_chan);
413 term_msg = _("Backup Canceled");
414 if (jcr->store_bsock) {
415 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
416 if (jcr->SD_msg_chan) {
417 pthread_cancel(jcr->SD_msg_chan);
422 term_msg = term_code;
423 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
426 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
427 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
428 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
429 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
433 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
435 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
437 * Note, if the job has erred, most likely it did not write any
438 * tape, so suppress this "error" message since in that case
439 * it is normal. Or look at it the other way, only for a
440 * normal exit should we complain about this error.
442 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
443 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
445 jcr->VolumeName[0] = 0; /* none */
448 if (jcr->ReadBytes == 0) {
449 bstrncpy(compress, "None", sizeof(compress));
451 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
452 if (compression < 0.5) {
453 bstrncpy(compress, "None", sizeof(compress));
455 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
458 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
459 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
461 // bmicrosleep(15, 0); /* for debugging SIGHUP */
463 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
466 " Backup Level: %s%s\n"
467 " Client: \"%s\" %s\n"
468 " FileSet: \"%s\" %s\n"
471 " Scheduled time: %s\n"
474 " Elapsed time: %s\n"
476 " FD Files Written: %s\n"
477 " SD Files Written: %s\n"
478 " FD Bytes Written: %s (%sB)\n"
479 " SD Bytes Written: %s (%sB)\n"
481 " Software Compression: %s\n"
482 " Volume name(s): %s\n"
483 " Volume Session Id: %d\n"
484 " Volume Session Time: %d\n"
485 " Last Volume Bytes: %s (%sB)\n"
486 " Non-fatal FD errors: %d\n"
488 " FD termination status: %s\n"
489 " SD termination status: %s\n"
490 " Termination: %s\n\n"),
496 level_to_str(jcr->JobLevel), jcr->since,
497 jcr->client->hdr.name, cr.Uname,
498 jcr->fileset->hdr.name, jcr->FSCreateTime,
500 jcr->store->hdr.name,
504 edit_utime(RunTime, elapsed, sizeof(elapsed)),
506 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
507 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
508 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
509 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
510 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
511 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
517 edit_uint64_with_commas(mr.VolBytes, ec7),
518 edit_uint64_with_suffix(mr.VolBytes, ec8),
525 Dmsg0(100, "Leave backup_cleanup()\n");
528 void update_bootstrap_file(JCR *jcr)
530 /* Now update the bootstrap file if any */
531 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
532 jcr->job->WriteBootstrap) {
536 char *fname = jcr->job->WriteBootstrap;
537 VOL_PARAMS *VolParams = NULL;
544 bpipe = open_bpipe(fname, 0, "w");
545 fd = bpipe ? bpipe->wfd : NULL;
547 /* ***FIXME*** handle BASE */
548 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
551 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
554 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
555 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
556 if (jcr->SDJobFiles != 0) {
557 set_jcr_job_status(jcr, JS_ErrorTerminated);
561 /* Start output with when and who wrote it */
562 bstrftimes(edt, sizeof(edt), time(NULL));
563 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
564 level_to_str(jcr->JobLevel), jcr->since);
565 for (int i=0; i < VolCount; i++) {
566 /* Write the record */
567 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
568 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
569 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
570 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
571 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
572 VolParams[i].EndFile);
573 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
574 VolParams[i].EndBlock);
575 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
576 VolParams[i].LastIndex);
588 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
589 "%s: ERR=%s\n"), fname, be.strerror());
590 set_jcr_job_status(jcr, JS_ErrorTerminated);