3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2006 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
52 free_rstorage(jcr); /* we don't read so release */
54 if (!get_or_create_fileset_record(jcr)) {
59 * Get definitive Job level and since time
61 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
63 apply_pool_overrides(jcr);
65 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->hdr.name);
66 if (jcr->jr.PoolId == 0) {
70 /* If pool storage specified, use it instead of job storage */
71 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
74 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
78 create_clones(jcr); /* run any clone jobs */
80 Dmsg2(000, "rstore=%p wstore=%p\n", jcr->rstore, jcr->wstore);
86 * Do a backup of the specified FileSet
88 * Returns: false on failure
91 bool do_backup(JCR *jcr)
94 int tls_need = BNET_TLS_NONE;
100 /* Print Job Start message */
101 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
102 edit_uint64(jcr->JobId, ed1), jcr->Job);
104 set_jcr_job_status(jcr, JS_Running);
105 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
106 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
107 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
112 * Open a message channel connection with the Storage
113 * daemon. This is to let him know that our client
114 * will be contacting him for a backup session.
117 Dmsg0(110, "Open connection with storage daemon\n");
118 set_jcr_job_status(jcr, JS_WaitSD);
120 * Start conversation with Storage daemon
122 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
126 * Now start a job with the Storage daemon
128 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
133 * Start the job prior to starting the message thread below
134 * to avoid two threads from using the BSOCK structure at
137 if (!bnet_fsend(jcr->store_bsock, "run")) {
142 * Now start a Storage daemon message thread. Note,
143 * this thread is used to provide the catalog services
144 * for the backup job, including inserting the attributes
145 * into the catalog. See catalog_update() in catreq.c
147 if (!start_storage_daemon_message_thread(jcr)) {
150 Dmsg0(150, "Storage daemon connection OK\n");
152 set_jcr_job_status(jcr, JS_WaitFD);
153 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
157 set_jcr_job_status(jcr, JS_Running);
158 fd = jcr->file_bsock;
160 if (!send_include_list(jcr)) {
164 if (!send_exclude_list(jcr)) {
168 if (!send_level_command(jcr)) {
173 * send Storage daemon address to the File daemon
176 if (store->SDDport == 0) {
177 store->SDDport = store->SDport;
180 /* TLS Requirement */
181 if (store->tls_enable) {
182 if (store->tls_require) {
183 tls_need = BNET_TLS_REQUIRED;
185 tls_need = BNET_TLS_OK;
189 bnet_fsend(fd, storaddr, store->address, store->SDDport, tls_need);
190 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
195 if (!send_runscripts_commands(jcr)) {
200 * We re-update the job start record so that the start
201 * time is set after the run before job. This avoids
202 * that any files created by the run before job will
203 * be saved twice. They will be backed up in the current
204 * job, but not in the next one unless they are changed.
205 * Without this, they will be backed up in this job and
206 * in the next job run because in that case, their date
207 * is after the start of this run.
209 jcr->start_time = time(NULL);
210 jcr->jr.StartTime = jcr->start_time;
211 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
212 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
215 /* Send backup command */
216 bnet_fsend(fd, backupcmd);
217 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
221 /* Pickup Job termination data */
222 stat = wait_for_job_termination(jcr);
223 if (stat == JS_Terminated) {
224 backup_cleanup(jcr, stat);
229 /* Come here only after starting SD thread */
231 set_jcr_job_status(jcr, JS_ErrorTerminated);
232 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
233 wait_for_storage_daemon_termination(jcr);
234 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
240 * Here we wait for the File daemon to signal termination,
241 * then we wait for the Storage daemon. When both
242 * are done, we return the job status.
243 * Also used by restore.c
245 int wait_for_job_termination(JCR *jcr)
248 BSOCK *fd = jcr->file_bsock;
250 uint32_t JobFiles, Errors;
251 uint64_t ReadBytes, JobBytes;
253 set_jcr_job_status(jcr, JS_Running);
254 /* Wait for Client to terminate */
255 while ((n = bget_dirmsg(fd)) >= 0) {
256 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
257 &ReadBytes, &JobBytes, &Errors) == 5) {
259 set_jcr_job_status(jcr, jcr->FDJobStatus);
260 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
262 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
265 if (job_canceled(jcr)) {
269 if (is_bnet_error(fd)) {
270 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
271 job_type_to_str(jcr->JobType), bnet_strerror(fd));
273 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
275 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
276 wait_for_storage_daemon_termination(jcr);
279 /* Return values from FD */
281 jcr->JobFiles = JobFiles;
282 jcr->Errors = Errors;
283 jcr->ReadBytes = ReadBytes;
284 jcr->JobBytes = JobBytes;
286 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
289 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
290 // jcr->JobStatus, jcr->SDJobStatus);
292 /* Return the first error status we find Dir, FD, or SD */
293 if (!fd_ok || is_bnet_error(fd)) {
294 jcr->FDJobStatus = JS_ErrorTerminated;
296 if (jcr->JobStatus != JS_Terminated) {
297 return jcr->JobStatus;
299 if (jcr->FDJobStatus != JS_Terminated) {
300 return jcr->FDJobStatus;
302 return jcr->SDJobStatus;
306 * Release resources allocated during backup.
308 void backup_cleanup(JCR *jcr, int TermCode)
310 char sdt[50], edt[50], schedt[50];
311 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
312 char ec6[30], ec7[30], ec8[30], elapsed[50];
313 char term_code[100], fd_term_msg[100], sd_term_msg[100];
314 const char *term_msg;
318 double kbps, compression;
321 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
322 dequeue_messages(jcr); /* display any queued messages */
323 memset(&mr, 0, sizeof(mr));
324 memset(&cr, 0, sizeof(cr));
325 set_jcr_job_status(jcr, TermCode);
327 update_job_end_record(jcr); /* update database */
329 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
330 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
331 db_strerror(jcr->db));
332 set_jcr_job_status(jcr, JS_ErrorTerminated);
335 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
336 if (!db_get_client_record(jcr, jcr->db, &cr)) {
337 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
338 db_strerror(jcr->db));
341 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
342 if (!db_get_media_record(jcr, jcr->db, &mr)) {
343 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
344 mr.VolumeName, db_strerror(jcr->db));
345 set_jcr_job_status(jcr, JS_ErrorTerminated);
348 update_bootstrap_file(jcr);
350 msg_type = M_INFO; /* by default INFO message */
351 switch (jcr->JobStatus) {
353 if (jcr->Errors || jcr->SDErrors) {
354 term_msg = _("Backup OK -- with warnings");
356 term_msg = _("Backup OK");
360 case JS_ErrorTerminated:
361 term_msg = _("*** Backup Error ***");
362 msg_type = M_ERROR; /* Generate error message */
363 if (jcr->store_bsock) {
364 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
365 if (jcr->SD_msg_chan) {
366 pthread_cancel(jcr->SD_msg_chan);
371 term_msg = _("Backup Canceled");
372 if (jcr->store_bsock) {
373 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
374 if (jcr->SD_msg_chan) {
375 pthread_cancel(jcr->SD_msg_chan);
380 term_msg = term_code;
381 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
384 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
385 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
386 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
387 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
391 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
393 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
395 * Note, if the job has erred, most likely it did not write any
396 * tape, so suppress this "error" message since in that case
397 * it is normal. Or look at it the other way, only for a
398 * normal exit should we complain about this error.
400 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
401 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
403 jcr->VolumeName[0] = 0; /* none */
406 if (jcr->ReadBytes == 0) {
407 bstrncpy(compress, "None", sizeof(compress));
409 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
410 if (compression < 0.5) {
411 bstrncpy(compress, "None", sizeof(compress));
413 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
416 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
417 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
419 // bmicrosleep(15, 0); /* for debugging SIGHUP */
421 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
424 " Backup Level: %s%s\n"
425 " Client: \"%s\" %s\n"
426 " FileSet: \"%s\" %s\n"
427 " Pool: \"%s\" (From %s)\n"
428 " Storage: \"%s\" (From %s)\n"
429 " Scheduled time: %s\n"
432 " Elapsed time: %s\n"
434 " FD Files Written: %s\n"
435 " SD Files Written: %s\n"
436 " FD Bytes Written: %s (%sB)\n"
437 " SD Bytes Written: %s (%sB)\n"
439 " Software Compression: %s\n"
440 " Volume name(s): %s\n"
441 " Volume Session Id: %d\n"
442 " Volume Session Time: %d\n"
443 " Last Volume Bytes: %s (%sB)\n"
444 " Non-fatal FD errors: %d\n"
446 " FD termination status: %s\n"
447 " SD termination status: %s\n"
448 " Termination: %s\n\n"),
454 level_to_str(jcr->JobLevel), jcr->since,
455 jcr->client->name(), cr.Uname,
456 jcr->fileset->name(), jcr->FSCreateTime,
457 jcr->pool->name(), jcr->pool_source,
458 jcr->wstore->name(), jcr->storage_source,
462 edit_utime(RunTime, elapsed, sizeof(elapsed)),
464 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
465 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
466 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
467 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
468 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
469 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
475 edit_uint64_with_commas(mr.VolBytes, ec7),
476 edit_uint64_with_suffix(mr.VolBytes, ec8),
483 Dmsg0(100, "Leave backup_cleanup()\n");
486 void update_bootstrap_file(JCR *jcr)
488 /* Now update the bootstrap file if any */
489 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
490 jcr->job->WriteBootstrap) {
494 char *fname = jcr->job->WriteBootstrap;
495 VOL_PARAMS *VolParams = NULL;
502 bpipe = open_bpipe(fname, 0, "w");
503 fd = bpipe ? bpipe->wfd : NULL;
505 /* ***FIXME*** handle BASE */
506 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+b":"a+b");
509 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
512 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
513 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
514 if (jcr->SDJobFiles != 0) {
515 set_jcr_job_status(jcr, JS_ErrorTerminated);
519 /* Start output with when and who wrote it */
520 bstrftimes(edt, sizeof(edt), time(NULL));
521 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
522 level_to_str(jcr->JobLevel), jcr->since);
523 for (int i=0; i < VolCount; i++) {
524 /* Write the record */
525 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
526 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
527 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
528 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
529 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
530 VolParams[i].EndFile);
531 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
532 VolParams[i].EndBlock);
533 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
534 VolParams[i].LastIndex);
546 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
547 "%s: ERR=%s\n"), fname, be.strerror());
548 set_jcr_job_status(jcr, JS_ErrorTerminated);