3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2005 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
55 * Get definitive Job level and since time
57 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
60 * Apply any level related Pool selections
62 switch (jcr->JobLevel) {
65 jcr->pool = jcr->full_pool;
70 jcr->pool = jcr->inc_pool;
75 jcr->pool = jcr->dif_pool;
79 memset(&pr, 0, sizeof(pr));
80 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
82 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
83 /* Try to create the pool */
84 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
85 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
86 db_strerror(jcr->db));
89 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
90 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
91 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
92 db_strerror(jcr->db));
97 jcr->PoolId = pr.PoolId; /****FIXME**** this can go away */
98 jcr->jr.PoolId = pr.PoolId;
100 if (!get_or_create_fileset_record(jcr, &fsr)) {
103 bstrncpy(jcr->FSCreateTime, fsr.cCreateTime, sizeof(jcr->FSCreateTime));
106 Dmsg2(900, "cloned=%d run_cmds=%p\n", jcr->cloned, jcr->job->run_cmds);
107 if (!jcr->cloned && jcr->job->run_cmds) {
110 POOLMEM *cmd = get_pool_memory(PM_FNAME);
111 UAContext *ua = new_ua_context(jcr);
113 foreach_alist(runcmd, job->run_cmds) {
114 cmd = edit_job_codes(jcr, cmd, runcmd, "");
115 Mmsg(ua->cmd, "run %s cloned=yes", cmd);
116 Dmsg1(900, "=============== Clone cmd=%s\n", ua->cmd);
117 parse_ua_args(ua); /* parse command */
118 int stat = run_cmd(ua, ua->cmd);
120 Jmsg(jcr, M_ERROR, 0, _("Could not start clone job.\n"));
122 Jmsg(jcr, M_INFO, 0, _("Clone JobId %d started.\n"), stat);
126 free_pool_memory(cmd);
133 * Do a backup of the specified FileSet
135 * Returns: false on failure
138 bool do_backup(JCR *jcr)
141 int tls_need = BNET_TLS_NONE;
146 /* Print Job Start message */
147 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %u, Job=%s\n"),
148 jcr->JobId, jcr->Job);
150 set_jcr_job_status(jcr, JS_Running);
151 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
152 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
153 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
158 * Open a message channel connection with the Storage
159 * daemon. This is to let him know that our client
160 * will be contacting him for a backup session.
163 Dmsg0(110, "Open connection with storage daemon\n");
164 set_jcr_job_status(jcr, JS_WaitSD);
166 * Start conversation with Storage daemon
168 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
172 * Now start a job with the Storage daemon
174 if (!start_storage_daemon_job(jcr, jcr->storage, SD_APPEND)) {
178 * Now start a Storage daemon message thread. Note,
179 * this thread is used to provide the catalog services
180 * for the backup job, including inserting the attributes
181 * into the catalog. See catalog_update() in catreq.c
183 if (!start_storage_daemon_message_thread(jcr)) {
186 Dmsg0(150, "Storage daemon connection OK\n");
188 set_jcr_job_status(jcr, JS_WaitFD);
189 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
193 set_jcr_job_status(jcr, JS_Running);
194 fd = jcr->file_bsock;
196 if (!send_include_list(jcr)) {
200 if (!send_exclude_list(jcr)) {
204 if (!send_level_command(jcr)) {
209 * send Storage daemon address to the File daemon
212 if (store->SDDport == 0) {
213 store->SDDport = store->SDport;
216 /* TLS Requirement */
217 if (store->tls_enable) {
218 if (store->tls_require) {
219 tls_need = BNET_TLS_REQUIRED;
221 tls_need = BNET_TLS_OK;
225 bnet_fsend(fd, storaddr, store->address, store->SDDport,
227 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
232 if (!send_run_before_and_after_commands(jcr)) {
236 /* Send backup command */
237 bnet_fsend(fd, backupcmd);
238 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
242 /* Pickup Job termination data */
243 stat = wait_for_job_termination(jcr);
244 if (stat == JS_Terminated) {
245 backup_cleanup(jcr, stat);
253 * Here we wait for the File daemon to signal termination,
254 * then we wait for the Storage daemon. When both
255 * are done, we return the job status.
256 * Also used by restore.c
258 int wait_for_job_termination(JCR *jcr)
261 BSOCK *fd = jcr->file_bsock;
263 uint32_t JobFiles, Errors;
264 uint64_t ReadBytes, JobBytes;
266 set_jcr_job_status(jcr, JS_Running);
267 /* Wait for Client to terminate */
268 while ((n = bget_dirmsg(fd)) >= 0) {
269 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
270 &ReadBytes, &JobBytes, &Errors) == 5) {
272 set_jcr_job_status(jcr, jcr->FDJobStatus);
273 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
275 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
278 if (job_canceled(jcr)) {
282 if (is_bnet_error(fd)) {
283 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
284 job_type_to_str(jcr->JobType), bnet_strerror(fd));
286 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
288 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
289 wait_for_storage_daemon_termination(jcr);
292 /* Return values from FD */
294 jcr->JobFiles = JobFiles;
295 jcr->Errors = Errors;
296 jcr->ReadBytes = ReadBytes;
297 jcr->JobBytes = JobBytes;
299 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
302 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
303 // jcr->JobStatus, jcr->SDJobStatus);
305 /* Return the first error status we find Dir, FD, or SD */
306 if (!fd_ok || is_bnet_error(fd)) {
307 jcr->FDJobStatus = JS_ErrorTerminated;
309 if (jcr->JobStatus != JS_Terminated) {
310 return jcr->JobStatus;
312 if (jcr->FDJobStatus != JS_Terminated) {
313 return jcr->FDJobStatus;
315 return jcr->SDJobStatus;
319 * Release resources allocated during backup.
321 void backup_cleanup(JCR *jcr, int TermCode)
323 char sdt[50], edt[50], schedt[50];
324 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
325 char term_code[100], fd_term_msg[100], sd_term_msg[100];
326 const char *term_msg;
330 double kbps, compression;
333 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
334 dequeue_messages(jcr); /* display any queued messages */
335 memset(&mr, 0, sizeof(mr));
336 memset(&cr, 0, sizeof(cr));
337 set_jcr_job_status(jcr, TermCode);
339 update_job_end_record(jcr); /* update database */
341 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
342 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
343 db_strerror(jcr->db));
344 set_jcr_job_status(jcr, JS_ErrorTerminated);
347 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
348 if (!db_get_client_record(jcr, jcr->db, &cr)) {
349 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
350 db_strerror(jcr->db));
353 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
354 if (!db_get_media_record(jcr, jcr->db, &mr)) {
355 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
356 mr.VolumeName, db_strerror(jcr->db));
357 set_jcr_job_status(jcr, JS_ErrorTerminated);
360 /* Now update the bootstrap file if any */
361 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
362 jcr->job->WriteBootstrap) {
366 char *fname = jcr->job->WriteBootstrap;
367 VOL_PARAMS *VolParams = NULL;
373 bpipe = open_bpipe(fname, 0, "w");
374 fd = bpipe ? bpipe->wfd : NULL;
376 /* ***FIXME*** handle BASE */
377 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
380 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
383 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
384 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
385 if (jcr->SDJobFiles != 0) {
386 set_jcr_job_status(jcr, JS_ErrorTerminated);
390 /* Start output with when and who wrote it */
391 bstrftimes(edt, sizeof(edt), time(NULL));
392 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
393 level_to_str(jcr->JobLevel), jcr->since);
394 for (int i=0; i < VolCount; i++) {
395 /* Write the record */
396 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
397 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
398 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
399 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
400 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
401 VolParams[i].EndFile);
402 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
403 VolParams[i].EndBlock);
404 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
405 VolParams[i].LastIndex);
417 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
418 "%s: ERR=%s\n"), fname, be.strerror());
419 set_jcr_job_status(jcr, JS_ErrorTerminated);
423 msg_type = M_INFO; /* by default INFO message */
424 switch (jcr->JobStatus) {
426 if (jcr->Errors || jcr->SDErrors) {
427 term_msg = _("Backup OK -- with warnings");
429 term_msg = _("Backup OK");
433 case JS_ErrorTerminated:
434 term_msg = _("*** Backup Error ***");
435 msg_type = M_ERROR; /* Generate error message */
436 if (jcr->store_bsock) {
437 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
438 if (jcr->SD_msg_chan) {
439 pthread_cancel(jcr->SD_msg_chan);
444 term_msg = _("Backup Canceled");
445 if (jcr->store_bsock) {
446 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
447 if (jcr->SD_msg_chan) {
448 pthread_cancel(jcr->SD_msg_chan);
453 term_msg = term_code;
454 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
457 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
458 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
459 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
460 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
464 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
466 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
468 * Note, if the job has erred, most likely it did not write any
469 * tape, so suppress this "error" message since in that case
470 * it is normal. Or look at it the other way, only for a
471 * normal exit should we complain about this error.
473 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
474 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
476 jcr->VolumeName[0] = 0; /* none */
479 if (jcr->ReadBytes == 0) {
480 bstrncpy(compress, "None", sizeof(compress));
482 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
483 if (compression < 0.5) {
484 bstrncpy(compress, "None", sizeof(compress));
486 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
489 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
490 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
492 // bmicrosleep(15, 0); /* for debugging SIGHUP */
494 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
497 " Backup Level: %s%s\n"
498 " Client: \"%s\" %s\n"
499 " FileSet: \"%s\" %s\n"
502 " Scheduled time: %s\n"
506 " FD Files Written: %s\n"
507 " SD Files Written: %s\n"
508 " FD Bytes Written: %s\n"
509 " SD Bytes Written: %s\n"
511 " Software Compression: %s\n"
512 " Volume name(s): %s\n"
513 " Volume Session Id: %d\n"
514 " Volume Session Time: %d\n"
515 " Last Volume Bytes: %s\n"
516 " Non-fatal FD errors: %d\n"
518 " FD termination status: %s\n"
519 " SD termination status: %s\n"
520 " Termination: %s\n\n"),
526 level_to_str(jcr->JobLevel), jcr->since,
527 jcr->client->hdr.name, cr.Uname,
528 jcr->fileset->hdr.name, jcr->FSCreateTime,
530 jcr->store->hdr.name,
535 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
536 edit_uint64_with_commas(jcr->SDJobFiles, ec4),
537 edit_uint64_with_commas(jcr->jr.JobBytes, ec2),
538 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
544 edit_uint64_with_commas(mr.VolBytes, ec3),
551 Dmsg0(100, "Leave backup_cleanup()\n");