3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2006 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
53 if (!get_or_create_fileset_record(jcr)) {
58 * Get definitive Job level and since time
60 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
63 * Apply any level related Pool selections
65 switch (jcr->JobLevel) {
68 jcr->pool = jcr->full_pool;
73 jcr->pool = jcr->inc_pool;
78 jcr->pool = jcr->dif_pool;
82 memset(&pr, 0, sizeof(pr));
83 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
85 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
86 /* Try to create the pool */
87 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
88 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
89 db_strerror(jcr->db));
92 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
93 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
94 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
95 db_strerror(jcr->db));
100 jcr->jr.PoolId = pr.PoolId;
102 /* If pool storage specified, use it instead of job storage */
103 copy_storage(jcr, jcr->pool->storage);
106 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
110 create_clones(jcr); /* run any clone jobs */
116 * Do a backup of the specified FileSet
118 * Returns: false on failure
121 bool do_backup(JCR *jcr)
124 int tls_need = BNET_TLS_NONE;
130 /* Print Job Start message */
131 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
132 edit_uint64(jcr->JobId, ed1), jcr->Job);
134 set_jcr_job_status(jcr, JS_Running);
135 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
136 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
137 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
142 * Open a message channel connection with the Storage
143 * daemon. This is to let him know that our client
144 * will be contacting him for a backup session.
147 Dmsg0(110, "Open connection with storage daemon\n");
148 set_jcr_job_status(jcr, JS_WaitSD);
150 * Start conversation with Storage daemon
152 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
156 * Now start a job with the Storage daemon
158 if (!start_storage_daemon_job(jcr, NULL, jcr->storage)) {
162 * Now start a Storage daemon message thread. Note,
163 * this thread is used to provide the catalog services
164 * for the backup job, including inserting the attributes
165 * into the catalog. See catalog_update() in catreq.c
167 if (!start_storage_daemon_message_thread(jcr)) {
170 Dmsg0(150, "Storage daemon connection OK\n");
172 if (!bnet_fsend(jcr->store_bsock, "run")) {
176 set_jcr_job_status(jcr, JS_WaitFD);
177 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
181 set_jcr_job_status(jcr, JS_Running);
182 fd = jcr->file_bsock;
184 if (!send_include_list(jcr)) {
188 if (!send_exclude_list(jcr)) {
192 if (!send_level_command(jcr)) {
197 * send Storage daemon address to the File daemon
200 if (store->SDDport == 0) {
201 store->SDDport = store->SDport;
204 /* TLS Requirement */
205 if (store->tls_enable) {
206 if (store->tls_require) {
207 tls_need = BNET_TLS_REQUIRED;
209 tls_need = BNET_TLS_OK;
213 bnet_fsend(fd, storaddr, store->address, store->SDDport, tls_need);
214 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
219 if (!send_run_before_and_after_commands(jcr)) {
223 /* Send backup command */
224 bnet_fsend(fd, backupcmd);
225 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
229 /* Pickup Job termination data */
230 stat = wait_for_job_termination(jcr);
231 if (stat == JS_Terminated) {
232 backup_cleanup(jcr, stat);
237 /* Come here only after starting SD thread */
239 set_jcr_job_status(jcr, JS_ErrorTerminated);
240 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
241 wait_for_storage_daemon_termination(jcr);
242 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
248 * Here we wait for the File daemon to signal termination,
249 * then we wait for the Storage daemon. When both
250 * are done, we return the job status.
251 * Also used by restore.c
253 int wait_for_job_termination(JCR *jcr)
256 BSOCK *fd = jcr->file_bsock;
258 uint32_t JobFiles, Errors;
259 uint64_t ReadBytes, JobBytes;
261 set_jcr_job_status(jcr, JS_Running);
262 /* Wait for Client to terminate */
263 while ((n = bget_dirmsg(fd)) >= 0) {
264 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
265 &ReadBytes, &JobBytes, &Errors) == 5) {
267 set_jcr_job_status(jcr, jcr->FDJobStatus);
268 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
270 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
273 if (job_canceled(jcr)) {
277 if (is_bnet_error(fd)) {
278 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
279 job_type_to_str(jcr->JobType), bnet_strerror(fd));
281 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
283 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
284 wait_for_storage_daemon_termination(jcr);
287 /* Return values from FD */
289 jcr->JobFiles = JobFiles;
290 jcr->Errors = Errors;
291 jcr->ReadBytes = ReadBytes;
292 jcr->JobBytes = JobBytes;
294 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
297 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
298 // jcr->JobStatus, jcr->SDJobStatus);
300 /* Return the first error status we find Dir, FD, or SD */
301 if (!fd_ok || is_bnet_error(fd)) {
302 jcr->FDJobStatus = JS_ErrorTerminated;
304 if (jcr->JobStatus != JS_Terminated) {
305 return jcr->JobStatus;
307 if (jcr->FDJobStatus != JS_Terminated) {
308 return jcr->FDJobStatus;
310 return jcr->SDJobStatus;
314 * Release resources allocated during backup.
316 void backup_cleanup(JCR *jcr, int TermCode)
318 char sdt[50], edt[50], schedt[50];
319 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
320 char ec6[30], ec7[30], ec8[30], elapsed[50];
321 char term_code[100], fd_term_msg[100], sd_term_msg[100];
322 const char *term_msg;
326 double kbps, compression;
329 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
330 dequeue_messages(jcr); /* display any queued messages */
331 memset(&mr, 0, sizeof(mr));
332 memset(&cr, 0, sizeof(cr));
333 set_jcr_job_status(jcr, TermCode);
335 update_job_end_record(jcr); /* update database */
337 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
338 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
339 db_strerror(jcr->db));
340 set_jcr_job_status(jcr, JS_ErrorTerminated);
343 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
344 if (!db_get_client_record(jcr, jcr->db, &cr)) {
345 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
346 db_strerror(jcr->db));
349 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
350 if (!db_get_media_record(jcr, jcr->db, &mr)) {
351 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
352 mr.VolumeName, db_strerror(jcr->db));
353 set_jcr_job_status(jcr, JS_ErrorTerminated);
356 update_bootstrap_file(jcr);
358 msg_type = M_INFO; /* by default INFO message */
359 switch (jcr->JobStatus) {
361 if (jcr->Errors || jcr->SDErrors) {
362 term_msg = _("Backup OK -- with warnings");
364 term_msg = _("Backup OK");
368 case JS_ErrorTerminated:
369 term_msg = _("*** Backup Error ***");
370 msg_type = M_ERROR; /* Generate error message */
371 if (jcr->store_bsock) {
372 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
373 if (jcr->SD_msg_chan) {
374 pthread_cancel(jcr->SD_msg_chan);
379 term_msg = _("Backup Canceled");
380 if (jcr->store_bsock) {
381 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
382 if (jcr->SD_msg_chan) {
383 pthread_cancel(jcr->SD_msg_chan);
388 term_msg = term_code;
389 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
392 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
393 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
394 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
395 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
399 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
401 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
403 * Note, if the job has erred, most likely it did not write any
404 * tape, so suppress this "error" message since in that case
405 * it is normal. Or look at it the other way, only for a
406 * normal exit should we complain about this error.
408 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
409 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
411 jcr->VolumeName[0] = 0; /* none */
414 if (jcr->ReadBytes == 0) {
415 bstrncpy(compress, "None", sizeof(compress));
417 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
418 if (compression < 0.5) {
419 bstrncpy(compress, "None", sizeof(compress));
421 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
424 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
425 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
427 // bmicrosleep(15, 0); /* for debugging SIGHUP */
429 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
432 " Backup Level: %s%s\n"
433 " Client: \"%s\" %s\n"
434 " FileSet: \"%s\" %s\n"
437 " Scheduled time: %s\n"
440 " Elapsed time: %s\n"
442 " FD Files Written: %s\n"
443 " SD Files Written: %s\n"
444 " FD Bytes Written: %s (%sB)\n"
445 " SD Bytes Written: %s (%sB)\n"
447 " Software Compression: %s\n"
448 " Volume name(s): %s\n"
449 " Volume Session Id: %d\n"
450 " Volume Session Time: %d\n"
451 " Last Volume Bytes: %s (%sB)\n"
452 " Non-fatal FD errors: %d\n"
454 " FD termination status: %s\n"
455 " SD termination status: %s\n"
456 " Termination: %s\n\n"),
462 level_to_str(jcr->JobLevel), jcr->since,
463 jcr->client->hdr.name, cr.Uname,
464 jcr->fileset->hdr.name, jcr->FSCreateTime,
466 jcr->store->hdr.name,
470 edit_utime(RunTime, elapsed, sizeof(elapsed)),
472 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
473 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
474 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
475 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
476 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
477 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
483 edit_uint64_with_commas(mr.VolBytes, ec7),
484 edit_uint64_with_suffix(mr.VolBytes, ec8),
491 Dmsg0(100, "Leave backup_cleanup()\n");
494 void update_bootstrap_file(JCR *jcr)
496 /* Now update the bootstrap file if any */
497 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
498 jcr->job->WriteBootstrap) {
502 char *fname = jcr->job->WriteBootstrap;
503 VOL_PARAMS *VolParams = NULL;
510 bpipe = open_bpipe(fname, 0, "w");
511 fd = bpipe ? bpipe->wfd : NULL;
513 /* ***FIXME*** handle BASE */
514 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
517 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
520 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
521 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
522 if (jcr->SDJobFiles != 0) {
523 set_jcr_job_status(jcr, JS_ErrorTerminated);
527 /* Start output with when and who wrote it */
528 bstrftimes(edt, sizeof(edt), time(NULL));
529 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
530 level_to_str(jcr->JobLevel), jcr->since);
531 for (int i=0; i < VolCount; i++) {
532 /* Write the record */
533 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
534 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
535 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
536 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
537 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
538 VolParams[i].EndFile);
539 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
540 VolParams[i].EndBlock);
541 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
542 VolParams[i].LastIndex);
554 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
555 "%s: ERR=%s\n"), fname, be.strerror());
556 set_jcr_job_status(jcr, JS_ErrorTerminated);