3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2006 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
53 if (!get_or_create_fileset_record(jcr)) {
58 * Get definitive Job level and since time
60 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
63 * Apply any level related Pool selections
65 switch (jcr->JobLevel) {
68 jcr->pool = jcr->full_pool;
73 jcr->pool = jcr->inc_pool;
78 jcr->pool = jcr->dif_pool;
82 memset(&pr, 0, sizeof(pr));
83 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
85 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
86 /* Try to create the pool */
87 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
88 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
89 db_strerror(jcr->db));
92 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
93 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
94 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
95 db_strerror(jcr->db));
100 jcr->PoolId = pr.PoolId;
101 jcr->jr.PoolId = pr.PoolId;
104 * Fire off any clone jobs (run directives)
106 Dmsg2(900, "cloned=%d run_cmds=%p\n", jcr->cloned, jcr->job->run_cmds);
107 if (!jcr->cloned && jcr->job->run_cmds) {
110 POOLMEM *cmd = get_pool_memory(PM_FNAME);
111 UAContext *ua = new_ua_context(jcr);
113 foreach_alist(runcmd, job->run_cmds) {
114 cmd = edit_job_codes(jcr, cmd, runcmd, "");
115 Mmsg(ua->cmd, "run %s cloned=yes", cmd);
116 Dmsg1(900, "=============== Clone cmd=%s\n", ua->cmd);
117 parse_ua_args(ua); /* parse command */
118 int stat = run_cmd(ua, ua->cmd);
120 Jmsg(jcr, M_ERROR, 0, _("Could not start clone job.\n"));
122 Jmsg(jcr, M_INFO, 0, _("Clone JobId %d started.\n"), stat);
126 free_pool_memory(cmd);
133 * Do a backup of the specified FileSet
135 * Returns: false on failure
138 bool do_backup(JCR *jcr)
141 int tls_need = BNET_TLS_NONE;
147 /* Print Job Start message */
148 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
149 edit_uint64(jcr->JobId, ed1), jcr->Job);
151 set_jcr_job_status(jcr, JS_Running);
152 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
153 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
154 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
159 * Open a message channel connection with the Storage
160 * daemon. This is to let him know that our client
161 * will be contacting him for a backup session.
164 Dmsg0(110, "Open connection with storage daemon\n");
165 set_jcr_job_status(jcr, JS_WaitSD);
167 * Start conversation with Storage daemon
169 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
173 * Now start a job with the Storage daemon
175 if (!start_storage_daemon_job(jcr, NULL, jcr->storage)) {
179 * Now start a Storage daemon message thread. Note,
180 * this thread is used to provide the catalog services
181 * for the backup job, including inserting the attributes
182 * into the catalog. See catalog_update() in catreq.c
184 if (!start_storage_daemon_message_thread(jcr)) {
187 Dmsg0(150, "Storage daemon connection OK\n");
189 if (!bnet_fsend(jcr->store_bsock, "run")) {
193 set_jcr_job_status(jcr, JS_WaitFD);
194 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
198 set_jcr_job_status(jcr, JS_Running);
199 fd = jcr->file_bsock;
201 if (!send_include_list(jcr)) {
205 if (!send_exclude_list(jcr)) {
209 if (!send_level_command(jcr)) {
214 * send Storage daemon address to the File daemon
217 if (store->SDDport == 0) {
218 store->SDDport = store->SDport;
221 /* TLS Requirement */
222 if (store->tls_enable) {
223 if (store->tls_require) {
224 tls_need = BNET_TLS_REQUIRED;
226 tls_need = BNET_TLS_OK;
230 bnet_fsend(fd, storaddr, store->address, store->SDDport, tls_need);
231 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
236 if (!send_run_before_and_after_commands(jcr)) {
240 /* Send backup command */
241 bnet_fsend(fd, backupcmd);
242 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
246 /* Pickup Job termination data */
247 stat = wait_for_job_termination(jcr);
248 if (stat == JS_Terminated) {
249 backup_cleanup(jcr, stat);
257 * Here we wait for the File daemon to signal termination,
258 * then we wait for the Storage daemon. When both
259 * are done, we return the job status.
260 * Also used by restore.c
262 int wait_for_job_termination(JCR *jcr)
265 BSOCK *fd = jcr->file_bsock;
267 uint32_t JobFiles, Errors;
268 uint64_t ReadBytes, JobBytes;
270 set_jcr_job_status(jcr, JS_Running);
271 /* Wait for Client to terminate */
272 while ((n = bget_dirmsg(fd)) >= 0) {
273 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
274 &ReadBytes, &JobBytes, &Errors) == 5) {
276 set_jcr_job_status(jcr, jcr->FDJobStatus);
277 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
279 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
282 if (job_canceled(jcr)) {
286 if (is_bnet_error(fd)) {
287 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
288 job_type_to_str(jcr->JobType), bnet_strerror(fd));
290 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
292 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
293 wait_for_storage_daemon_termination(jcr);
296 /* Return values from FD */
298 jcr->JobFiles = JobFiles;
299 jcr->Errors = Errors;
300 jcr->ReadBytes = ReadBytes;
301 jcr->JobBytes = JobBytes;
303 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
306 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
307 // jcr->JobStatus, jcr->SDJobStatus);
309 /* Return the first error status we find Dir, FD, or SD */
310 if (!fd_ok || is_bnet_error(fd)) {
311 jcr->FDJobStatus = JS_ErrorTerminated;
313 if (jcr->JobStatus != JS_Terminated) {
314 return jcr->JobStatus;
316 if (jcr->FDJobStatus != JS_Terminated) {
317 return jcr->FDJobStatus;
319 return jcr->SDJobStatus;
323 * Release resources allocated during backup.
325 void backup_cleanup(JCR *jcr, int TermCode)
327 char sdt[50], edt[50], schedt[50];
328 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
329 char ec6[30], ec7[30], elapsed[50];
330 char term_code[100], fd_term_msg[100], sd_term_msg[100];
331 const char *term_msg;
335 double kbps, compression;
338 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
339 dequeue_messages(jcr); /* display any queued messages */
340 memset(&mr, 0, sizeof(mr));
341 memset(&cr, 0, sizeof(cr));
342 set_jcr_job_status(jcr, TermCode);
344 update_job_end_record(jcr); /* update database */
346 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
347 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
348 db_strerror(jcr->db));
349 set_jcr_job_status(jcr, JS_ErrorTerminated);
352 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
353 if (!db_get_client_record(jcr, jcr->db, &cr)) {
354 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
355 db_strerror(jcr->db));
358 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
359 if (!db_get_media_record(jcr, jcr->db, &mr)) {
360 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
361 mr.VolumeName, db_strerror(jcr->db));
362 set_jcr_job_status(jcr, JS_ErrorTerminated);
365 update_bootstrap_file(jcr);
368 msg_type = M_INFO; /* by default INFO message */
369 switch (jcr->JobStatus) {
371 if (jcr->Errors || jcr->SDErrors) {
372 term_msg = _("Backup OK -- with warnings");
374 term_msg = _("Backup OK");
378 case JS_ErrorTerminated:
379 term_msg = _("*** Backup Error ***");
380 msg_type = M_ERROR; /* Generate error message */
381 if (jcr->store_bsock) {
382 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
383 if (jcr->SD_msg_chan) {
384 pthread_cancel(jcr->SD_msg_chan);
389 term_msg = _("Backup Canceled");
390 if (jcr->store_bsock) {
391 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
392 if (jcr->SD_msg_chan) {
393 pthread_cancel(jcr->SD_msg_chan);
398 term_msg = term_code;
399 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
402 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
403 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
404 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
405 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
409 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
411 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
413 * Note, if the job has erred, most likely it did not write any
414 * tape, so suppress this "error" message since in that case
415 * it is normal. Or look at it the other way, only for a
416 * normal exit should we complain about this error.
418 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
419 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
421 jcr->VolumeName[0] = 0; /* none */
424 if (jcr->ReadBytes == 0) {
425 bstrncpy(compress, "None", sizeof(compress));
427 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
428 if (compression < 0.5) {
429 bstrncpy(compress, "None", sizeof(compress));
431 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
434 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
435 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
437 // bmicrosleep(15, 0); /* for debugging SIGHUP */
439 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
442 " Backup Level: %s%s\n"
443 " Client: \"%s\" %s\n"
444 " FileSet: \"%s\" %s\n"
447 " Scheduled time: %s\n"
450 " Elapsed time: %s\n"
452 " FD Files Written: %s\n"
453 " SD Files Written: %s\n"
454 " FD Bytes Written: %s (%sB)\n"
455 " SD Bytes Written: %s (%sB)\n"
457 " Software Compression: %s\n"
458 " Volume name(s): %s\n"
459 " Volume Session Id: %d\n"
460 " Volume Session Time: %d\n"
461 " Last Volume Bytes: %s\n"
462 " Non-fatal FD errors: %d\n"
464 " FD termination status: %s\n"
465 " SD termination status: %s\n"
466 " Termination: %s\n\n"),
472 level_to_str(jcr->JobLevel), jcr->since,
473 jcr->client->hdr.name, cr.Uname,
474 jcr->fileset->hdr.name, jcr->FSCreateTime,
476 jcr->store->hdr.name,
480 edit_utime(RunTime, elapsed, sizeof(elapsed)),
482 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
483 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
484 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
485 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
486 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
487 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
493 edit_uint64_with_commas(mr.VolBytes, ec7),
500 Dmsg0(100, "Leave backup_cleanup()\n");
503 void update_bootstrap_file(JCR *jcr)
505 /* Now update the bootstrap file if any */
506 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
507 jcr->job->WriteBootstrap) {
511 char *fname = jcr->job->WriteBootstrap;
512 VOL_PARAMS *VolParams = NULL;
519 bpipe = open_bpipe(fname, 0, "w");
520 fd = bpipe ? bpipe->wfd : NULL;
522 /* ***FIXME*** handle BASE */
523 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
526 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
529 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
530 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
531 if (jcr->SDJobFiles != 0) {
532 set_jcr_job_status(jcr, JS_ErrorTerminated);
536 /* Start output with when and who wrote it */
537 bstrftimes(edt, sizeof(edt), time(NULL));
538 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
539 level_to_str(jcr->JobLevel), jcr->since);
540 for (int i=0; i < VolCount; i++) {
541 /* Write the record */
542 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
543 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
544 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
545 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
546 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
547 VolParams[i].EndFile);
548 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
549 VolParams[i].EndBlock);
550 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
551 VolParams[i].LastIndex);
563 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
564 "%s: ERR=%s\n"), fname, be.strerror());
565 set_jcr_job_status(jcr, JS_ErrorTerminated);