3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2006 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
53 if (!get_or_create_fileset_record(jcr)) {
58 * Get definitive Job level and since time
60 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
63 * Apply any level related Pool selections
65 switch (jcr->JobLevel) {
68 jcr->pool = jcr->full_pool;
73 jcr->pool = jcr->inc_pool;
78 jcr->pool = jcr->dif_pool;
82 memset(&pr, 0, sizeof(pr));
83 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
85 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
86 /* Try to create the pool */
87 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
88 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
89 db_strerror(jcr->db));
92 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
93 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
94 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
95 db_strerror(jcr->db));
100 jcr->jr.PoolId = pr.PoolId;
102 /* If pool storage specified, use it instead of job storage */
103 copy_storage(jcr, jcr->pool->storage);
106 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
110 create_clones(jcr); /* run any clone jobs */
116 * Do a backup of the specified FileSet
118 * Returns: false on failure
121 bool do_backup(JCR *jcr)
124 int tls_need = BNET_TLS_NONE;
130 /* Print Job Start message */
131 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
132 edit_uint64(jcr->JobId, ed1), jcr->Job);
134 set_jcr_job_status(jcr, JS_Running);
135 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
136 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
137 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
142 * Open a message channel connection with the Storage
143 * daemon. This is to let him know that our client
144 * will be contacting him for a backup session.
147 Dmsg0(110, "Open connection with storage daemon\n");
148 set_jcr_job_status(jcr, JS_WaitSD);
150 * Start conversation with Storage daemon
152 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
156 * Now start a job with the Storage daemon
158 if (!start_storage_daemon_job(jcr, NULL, jcr->storage)) {
162 * Now start a Storage daemon message thread. Note,
163 * this thread is used to provide the catalog services
164 * for the backup job, including inserting the attributes
165 * into the catalog. See catalog_update() in catreq.c
167 if (!start_storage_daemon_message_thread(jcr)) {
170 Dmsg0(150, "Storage daemon connection OK\n");
172 if (!bnet_fsend(jcr->store_bsock, "run")) {
176 set_jcr_job_status(jcr, JS_WaitFD);
177 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
181 set_jcr_job_status(jcr, JS_Running);
182 fd = jcr->file_bsock;
184 if (!send_include_list(jcr)) {
188 if (!send_exclude_list(jcr)) {
192 if (!send_level_command(jcr)) {
197 * send Storage daemon address to the File daemon
200 if (store->SDDport == 0) {
201 store->SDDport = store->SDport;
204 /* TLS Requirement */
205 if (store->tls_enable) {
206 if (store->tls_require) {
207 tls_need = BNET_TLS_REQUIRED;
209 tls_need = BNET_TLS_OK;
213 bnet_fsend(fd, storaddr, store->address, store->SDDport, tls_need);
214 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
219 if (!send_run_before_and_after_commands(jcr)) {
223 /* Send backup command */
224 bnet_fsend(fd, backupcmd);
225 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
229 /* Pickup Job termination data */
230 stat = wait_for_job_termination(jcr);
231 if (stat == JS_Terminated) {
232 backup_cleanup(jcr, stat);
240 * Here we wait for the File daemon to signal termination,
241 * then we wait for the Storage daemon. When both
242 * are done, we return the job status.
243 * Also used by restore.c
245 int wait_for_job_termination(JCR *jcr)
248 BSOCK *fd = jcr->file_bsock;
250 uint32_t JobFiles, Errors;
251 uint64_t ReadBytes, JobBytes;
253 set_jcr_job_status(jcr, JS_Running);
254 /* Wait for Client to terminate */
255 while ((n = bget_dirmsg(fd)) >= 0) {
256 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
257 &ReadBytes, &JobBytes, &Errors) == 5) {
259 set_jcr_job_status(jcr, jcr->FDJobStatus);
260 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
262 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
265 if (job_canceled(jcr)) {
269 if (is_bnet_error(fd)) {
270 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
271 job_type_to_str(jcr->JobType), bnet_strerror(fd));
273 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
275 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
276 wait_for_storage_daemon_termination(jcr);
279 /* Return values from FD */
281 jcr->JobFiles = JobFiles;
282 jcr->Errors = Errors;
283 jcr->ReadBytes = ReadBytes;
284 jcr->JobBytes = JobBytes;
286 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
289 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
290 // jcr->JobStatus, jcr->SDJobStatus);
292 /* Return the first error status we find Dir, FD, or SD */
293 if (!fd_ok || is_bnet_error(fd)) {
294 jcr->FDJobStatus = JS_ErrorTerminated;
296 if (jcr->JobStatus != JS_Terminated) {
297 return jcr->JobStatus;
299 if (jcr->FDJobStatus != JS_Terminated) {
300 return jcr->FDJobStatus;
302 return jcr->SDJobStatus;
306 * Release resources allocated during backup.
308 void backup_cleanup(JCR *jcr, int TermCode)
310 char sdt[50], edt[50], schedt[50];
311 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
312 char ec6[30], ec7[30], ec8[30], elapsed[50];
313 char term_code[100], fd_term_msg[100], sd_term_msg[100];
314 const char *term_msg;
318 double kbps, compression;
321 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
322 dequeue_messages(jcr); /* display any queued messages */
323 memset(&mr, 0, sizeof(mr));
324 memset(&cr, 0, sizeof(cr));
325 set_jcr_job_status(jcr, TermCode);
327 update_job_end_record(jcr); /* update database */
329 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
330 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
331 db_strerror(jcr->db));
332 set_jcr_job_status(jcr, JS_ErrorTerminated);
335 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
336 if (!db_get_client_record(jcr, jcr->db, &cr)) {
337 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
338 db_strerror(jcr->db));
341 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
342 if (!db_get_media_record(jcr, jcr->db, &mr)) {
343 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
344 mr.VolumeName, db_strerror(jcr->db));
345 set_jcr_job_status(jcr, JS_ErrorTerminated);
348 update_bootstrap_file(jcr);
350 msg_type = M_INFO; /* by default INFO message */
351 switch (jcr->JobStatus) {
353 if (jcr->Errors || jcr->SDErrors) {
354 term_msg = _("Backup OK -- with warnings");
356 term_msg = _("Backup OK");
360 case JS_ErrorTerminated:
361 term_msg = _("*** Backup Error ***");
362 msg_type = M_ERROR; /* Generate error message */
363 if (jcr->store_bsock) {
364 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
365 if (jcr->SD_msg_chan) {
366 pthread_cancel(jcr->SD_msg_chan);
371 term_msg = _("Backup Canceled");
372 if (jcr->store_bsock) {
373 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
374 if (jcr->SD_msg_chan) {
375 pthread_cancel(jcr->SD_msg_chan);
380 term_msg = term_code;
381 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
384 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
385 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
386 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
387 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
391 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
393 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
395 * Note, if the job has erred, most likely it did not write any
396 * tape, so suppress this "error" message since in that case
397 * it is normal. Or look at it the other way, only for a
398 * normal exit should we complain about this error.
400 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
401 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
403 jcr->VolumeName[0] = 0; /* none */
406 if (jcr->ReadBytes == 0) {
407 bstrncpy(compress, "None", sizeof(compress));
409 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
410 if (compression < 0.5) {
411 bstrncpy(compress, "None", sizeof(compress));
413 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
416 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
417 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
419 // bmicrosleep(15, 0); /* for debugging SIGHUP */
421 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
424 " Backup Level: %s%s\n"
425 " Client: \"%s\" %s\n"
426 " FileSet: \"%s\" %s\n"
429 " Scheduled time: %s\n"
432 " Elapsed time: %s\n"
434 " FD Files Written: %s\n"
435 " SD Files Written: %s\n"
436 " FD Bytes Written: %s (%sB)\n"
437 " SD Bytes Written: %s (%sB)\n"
439 " Software Compression: %s\n"
440 " Volume name(s): %s\n"
441 " Volume Session Id: %d\n"
442 " Volume Session Time: %d\n"
443 " Last Volume Bytes: %s (%sB)\n"
444 " Non-fatal FD errors: %d\n"
446 " FD termination status: %s\n"
447 " SD termination status: %s\n"
448 " Termination: %s\n\n"),
454 level_to_str(jcr->JobLevel), jcr->since,
455 jcr->client->hdr.name, cr.Uname,
456 jcr->fileset->hdr.name, jcr->FSCreateTime,
458 jcr->store->hdr.name,
462 edit_utime(RunTime, elapsed, sizeof(elapsed)),
464 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
465 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
466 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
467 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
468 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
469 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
475 edit_uint64_with_commas(mr.VolBytes, ec7),
476 edit_uint64_with_suffix(mr.VolBytes, ec8),
483 Dmsg0(100, "Leave backup_cleanup()\n");
486 void update_bootstrap_file(JCR *jcr)
488 /* Now update the bootstrap file if any */
489 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
490 jcr->job->WriteBootstrap) {
494 char *fname = jcr->job->WriteBootstrap;
495 VOL_PARAMS *VolParams = NULL;
502 bpipe = open_bpipe(fname, 0, "w");
503 fd = bpipe ? bpipe->wfd : NULL;
505 /* ***FIXME*** handle BASE */
506 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
509 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
512 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
513 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
514 if (jcr->SDJobFiles != 0) {
515 set_jcr_job_status(jcr, JS_ErrorTerminated);
519 /* Start output with when and who wrote it */
520 bstrftimes(edt, sizeof(edt), time(NULL));
521 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
522 level_to_str(jcr->JobLevel), jcr->since);
523 for (int i=0; i < VolCount; i++) {
524 /* Write the record */
525 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
526 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
527 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
528 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
529 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
530 VolParams[i].EndFile);
531 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
532 VolParams[i].EndBlock);
533 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
534 VolParams[i].LastIndex);
546 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
547 "%s: ERR=%s\n"), fname, be.strerror());
548 set_jcr_job_status(jcr, JS_ErrorTerminated);