3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2006 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
52 free_rstorage(jcr); /* we don't read so release */
54 if (!get_or_create_fileset_record(jcr)) {
59 * Get definitive Job level and since time
61 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
63 apply_pool_overrides(jcr);
65 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->hdr.name);
66 if (jcr->jr.PoolId == 0) {
70 /* If pool storage specified, use it instead of job storage */
71 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
74 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
78 create_clones(jcr); /* run any clone jobs */
84 * Do a backup of the specified FileSet
86 * Returns: false on failure
89 bool do_backup(JCR *jcr)
92 int tls_need = BNET_TLS_NONE;
98 /* Print Job Start message */
99 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
100 edit_uint64(jcr->JobId, ed1), jcr->Job);
102 set_jcr_job_status(jcr, JS_Running);
103 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
104 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
105 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
110 * Open a message channel connection with the Storage
111 * daemon. This is to let him know that our client
112 * will be contacting him for a backup session.
115 Dmsg0(110, "Open connection with storage daemon\n");
116 set_jcr_job_status(jcr, JS_WaitSD);
118 * Start conversation with Storage daemon
120 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
124 * Now start a job with the Storage daemon
126 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
131 * Start the job prior to starting the message thread below
132 * to avoid two threads from using the BSOCK structure at
135 if (!bnet_fsend(jcr->store_bsock, "run")) {
140 * Now start a Storage daemon message thread. Note,
141 * this thread is used to provide the catalog services
142 * for the backup job, including inserting the attributes
143 * into the catalog. See catalog_update() in catreq.c
145 if (!start_storage_daemon_message_thread(jcr)) {
148 Dmsg0(150, "Storage daemon connection OK\n");
150 set_jcr_job_status(jcr, JS_WaitFD);
151 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
155 set_jcr_job_status(jcr, JS_Running);
156 fd = jcr->file_bsock;
158 if (!send_include_list(jcr)) {
162 if (!send_exclude_list(jcr)) {
166 if (!send_level_command(jcr)) {
171 * send Storage daemon address to the File daemon
174 if (store->SDDport == 0) {
175 store->SDDport = store->SDport;
178 /* TLS Requirement */
179 if (store->tls_enable) {
180 if (store->tls_require) {
181 tls_need = BNET_TLS_REQUIRED;
183 tls_need = BNET_TLS_OK;
187 bnet_fsend(fd, storaddr, store->address, store->SDDport, tls_need);
188 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
193 if (!send_runscripts_commands(jcr)) {
198 * We re-update the job start record so that the start
199 * time is set after the run before job. This avoids
200 * that any files created by the run before job will
201 * be saved twice. They will be backed up in the current
202 * job, but not in the next one unless they are changed.
203 * Without this, they will be backed up in this job and
204 * in the next job run because in that case, their date
205 * is after the start of this run.
207 jcr->start_time = time(NULL);
208 jcr->jr.StartTime = jcr->start_time;
209 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
210 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
213 /* Send backup command */
214 bnet_fsend(fd, backupcmd);
215 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
219 /* Pickup Job termination data */
220 stat = wait_for_job_termination(jcr);
221 if (stat == JS_Terminated) {
222 backup_cleanup(jcr, stat);
227 /* Come here only after starting SD thread */
229 set_jcr_job_status(jcr, JS_ErrorTerminated);
230 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
231 wait_for_storage_daemon_termination(jcr);
232 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
238 * Here we wait for the File daemon to signal termination,
239 * then we wait for the Storage daemon. When both
240 * are done, we return the job status.
241 * Also used by restore.c
243 int wait_for_job_termination(JCR *jcr)
246 BSOCK *fd = jcr->file_bsock;
248 uint32_t JobFiles, Errors;
249 uint64_t ReadBytes, JobBytes;
251 set_jcr_job_status(jcr, JS_Running);
252 /* Wait for Client to terminate */
253 while ((n = bget_dirmsg(fd)) >= 0) {
254 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
255 &ReadBytes, &JobBytes, &Errors) == 5) {
257 set_jcr_job_status(jcr, jcr->FDJobStatus);
258 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
260 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
263 if (job_canceled(jcr)) {
267 if (is_bnet_error(fd)) {
268 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
269 job_type_to_str(jcr->JobType), bnet_strerror(fd));
271 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
273 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
274 wait_for_storage_daemon_termination(jcr);
277 /* Return values from FD */
279 jcr->JobFiles = JobFiles;
280 jcr->Errors = Errors;
281 jcr->ReadBytes = ReadBytes;
282 jcr->JobBytes = JobBytes;
284 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
287 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
288 // jcr->JobStatus, jcr->SDJobStatus);
290 /* Return the first error status we find Dir, FD, or SD */
291 if (!fd_ok || is_bnet_error(fd)) {
292 jcr->FDJobStatus = JS_ErrorTerminated;
294 if (jcr->JobStatus != JS_Terminated) {
295 return jcr->JobStatus;
297 if (jcr->FDJobStatus != JS_Terminated) {
298 return jcr->FDJobStatus;
300 return jcr->SDJobStatus;
304 * Release resources allocated during backup.
306 void backup_cleanup(JCR *jcr, int TermCode)
308 char sdt[50], edt[50], schedt[50];
309 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
310 char ec6[30], ec7[30], ec8[30], elapsed[50];
311 char term_code[100], fd_term_msg[100], sd_term_msg[100];
312 const char *term_msg;
316 double kbps, compression;
319 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
320 dequeue_messages(jcr); /* display any queued messages */
321 memset(&mr, 0, sizeof(mr));
322 memset(&cr, 0, sizeof(cr));
323 set_jcr_job_status(jcr, TermCode);
325 update_job_end_record(jcr); /* update database */
327 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
328 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
329 db_strerror(jcr->db));
330 set_jcr_job_status(jcr, JS_ErrorTerminated);
333 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
334 if (!db_get_client_record(jcr, jcr->db, &cr)) {
335 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
336 db_strerror(jcr->db));
339 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
340 if (!db_get_media_record(jcr, jcr->db, &mr)) {
341 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
342 mr.VolumeName, db_strerror(jcr->db));
343 set_jcr_job_status(jcr, JS_ErrorTerminated);
346 update_bootstrap_file(jcr);
348 msg_type = M_INFO; /* by default INFO message */
349 switch (jcr->JobStatus) {
351 if (jcr->Errors || jcr->SDErrors) {
352 term_msg = _("Backup OK -- with warnings");
354 term_msg = _("Backup OK");
358 case JS_ErrorTerminated:
359 term_msg = _("*** Backup Error ***");
360 msg_type = M_ERROR; /* Generate error message */
361 if (jcr->store_bsock) {
362 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
363 if (jcr->SD_msg_chan) {
364 pthread_cancel(jcr->SD_msg_chan);
369 term_msg = _("Backup Canceled");
370 if (jcr->store_bsock) {
371 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
372 if (jcr->SD_msg_chan) {
373 pthread_cancel(jcr->SD_msg_chan);
378 term_msg = term_code;
379 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
382 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
383 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
384 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
385 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
389 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
391 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
393 * Note, if the job has erred, most likely it did not write any
394 * tape, so suppress this "error" message since in that case
395 * it is normal. Or look at it the other way, only for a
396 * normal exit should we complain about this error.
398 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
399 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
401 jcr->VolumeName[0] = 0; /* none */
404 if (jcr->ReadBytes == 0) {
405 bstrncpy(compress, "None", sizeof(compress));
407 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
408 if (compression < 0.5) {
409 bstrncpy(compress, "None", sizeof(compress));
411 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
414 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
415 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
417 // bmicrosleep(15, 0); /* for debugging SIGHUP */
419 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
422 " Backup Level: %s%s\n"
423 " Client: \"%s\" %s\n"
424 " FileSet: \"%s\" %s\n"
425 " Pool: \"%s\" (From %s)\n"
426 " Storage: \"%s\" (From %s)\n"
427 " Scheduled time: %s\n"
430 " Elapsed time: %s\n"
432 " FD Files Written: %s\n"
433 " SD Files Written: %s\n"
434 " FD Bytes Written: %s (%sB)\n"
435 " SD Bytes Written: %s (%sB)\n"
437 " Software Compression: %s\n"
438 " Volume name(s): %s\n"
439 " Volume Session Id: %d\n"
440 " Volume Session Time: %d\n"
441 " Last Volume Bytes: %s (%sB)\n"
442 " Non-fatal FD errors: %d\n"
444 " FD termination status: %s\n"
445 " SD termination status: %s\n"
446 " Termination: %s\n\n"),
452 level_to_str(jcr->JobLevel), jcr->since,
453 jcr->client->name(), cr.Uname,
454 jcr->fileset->name(), jcr->FSCreateTime,
455 jcr->pool->name(), jcr->pool_source,
456 jcr->wstore->name(), jcr->storage_source,
460 edit_utime(RunTime, elapsed, sizeof(elapsed)),
462 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
463 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
464 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
465 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
466 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
467 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
473 edit_uint64_with_commas(mr.VolBytes, ec7),
474 edit_uint64_with_suffix(mr.VolBytes, ec8),
481 Dmsg0(100, "Leave backup_cleanup()\n");
484 void update_bootstrap_file(JCR *jcr)
486 /* Now update the bootstrap file if any */
487 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
488 jcr->job->WriteBootstrap) {
492 POOLMEM *fname = get_pool_memory(PM_FNAME);
493 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
495 VOL_PARAMS *VolParams = NULL;
501 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
502 fd = bpipe ? bpipe->wfd : NULL;
504 /* ***FIXME*** handle BASE */
505 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+b":"a+b");
508 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
511 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
512 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
513 if (jcr->SDJobFiles != 0) {
514 set_jcr_job_status(jcr, JS_ErrorTerminated);
518 /* Start output with when and who wrote it */
519 bstrftimes(edt, sizeof(edt), time(NULL));
520 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
521 level_to_str(jcr->JobLevel), jcr->since);
522 for (int i=0; i < VolCount; i++) {
523 /* Write the record */
524 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
525 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
526 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
527 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
528 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
529 VolParams[i].EndFile);
530 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
531 VolParams[i].EndBlock);
532 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
533 VolParams[i].LastIndex);
545 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
546 "%s: ERR=%s\n"), fname, be.strerror());
547 set_jcr_job_status(jcr, JS_ErrorTerminated);
549 free_pool_memory(fname);