3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2005 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
53 if (!get_or_create_fileset_record(jcr)) {
58 * Get definitive Job level and since time
60 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
63 * Apply any level related Pool selections
65 switch (jcr->JobLevel) {
68 jcr->pool = jcr->full_pool;
73 jcr->pool = jcr->inc_pool;
78 jcr->pool = jcr->dif_pool;
82 memset(&pr, 0, sizeof(pr));
83 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
85 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
86 /* Try to create the pool */
87 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
88 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
89 db_strerror(jcr->db));
92 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
93 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
94 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
95 db_strerror(jcr->db));
100 jcr->PoolId = pr.PoolId; /****FIXME**** this can go away */
101 jcr->jr.PoolId = pr.PoolId;
103 Dmsg2(900, "cloned=%d run_cmds=%p\n", jcr->cloned, jcr->job->run_cmds);
104 if (!jcr->cloned && jcr->job->run_cmds) {
107 POOLMEM *cmd = get_pool_memory(PM_FNAME);
108 UAContext *ua = new_ua_context(jcr);
110 foreach_alist(runcmd, job->run_cmds) {
111 cmd = edit_job_codes(jcr, cmd, runcmd, "");
112 Mmsg(ua->cmd, "run %s cloned=yes", cmd);
113 Dmsg1(900, "=============== Clone cmd=%s\n", ua->cmd);
114 parse_ua_args(ua); /* parse command */
115 int stat = run_cmd(ua, ua->cmd);
117 Jmsg(jcr, M_ERROR, 0, _("Could not start clone job.\n"));
119 Jmsg(jcr, M_INFO, 0, _("Clone JobId %d started.\n"), stat);
123 free_pool_memory(cmd);
130 * Do a backup of the specified FileSet
132 * Returns: false on failure
135 bool do_backup(JCR *jcr)
138 int tls_need = BNET_TLS_NONE;
144 /* Print Job Start message */
145 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
146 edit_uint64(jcr->JobId, ed1), jcr->Job);
148 set_jcr_job_status(jcr, JS_Running);
149 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
150 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
151 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
156 * Open a message channel connection with the Storage
157 * daemon. This is to let him know that our client
158 * will be contacting him for a backup session.
161 Dmsg0(110, "Open connection with storage daemon\n");
162 set_jcr_job_status(jcr, JS_WaitSD);
164 * Start conversation with Storage daemon
166 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
170 * Now start a job with the Storage daemon
172 if (!start_storage_daemon_job(jcr, NULL, jcr->storage)) {
176 * Now start a Storage daemon message thread. Note,
177 * this thread is used to provide the catalog services
178 * for the backup job, including inserting the attributes
179 * into the catalog. See catalog_update() in catreq.c
181 if (!start_storage_daemon_message_thread(jcr)) {
184 Dmsg0(150, "Storage daemon connection OK\n");
186 set_jcr_job_status(jcr, JS_WaitFD);
187 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
191 set_jcr_job_status(jcr, JS_Running);
192 fd = jcr->file_bsock;
194 if (!send_include_list(jcr)) {
198 if (!send_exclude_list(jcr)) {
202 if (!send_level_command(jcr)) {
207 * send Storage daemon address to the File daemon
210 if (store->SDDport == 0) {
211 store->SDDport = store->SDport;
214 /* TLS Requirement */
215 if (store->tls_enable) {
216 if (store->tls_require) {
217 tls_need = BNET_TLS_REQUIRED;
219 tls_need = BNET_TLS_OK;
223 bnet_fsend(fd, storaddr, store->address, store->SDDport,
225 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
230 if (!send_run_before_and_after_commands(jcr)) {
234 /* Send backup command */
235 bnet_fsend(fd, backupcmd);
236 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
240 /* Pickup Job termination data */
241 stat = wait_for_job_termination(jcr);
242 if (stat == JS_Terminated) {
243 backup_cleanup(jcr, stat);
251 * Here we wait for the File daemon to signal termination,
252 * then we wait for the Storage daemon. When both
253 * are done, we return the job status.
254 * Also used by restore.c
256 int wait_for_job_termination(JCR *jcr)
259 BSOCK *fd = jcr->file_bsock;
261 uint32_t JobFiles, Errors;
262 uint64_t ReadBytes, JobBytes;
264 set_jcr_job_status(jcr, JS_Running);
265 /* Wait for Client to terminate */
266 while ((n = bget_dirmsg(fd)) >= 0) {
267 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
268 &ReadBytes, &JobBytes, &Errors) == 5) {
270 set_jcr_job_status(jcr, jcr->FDJobStatus);
271 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
273 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
276 if (job_canceled(jcr)) {
280 if (is_bnet_error(fd)) {
281 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
282 job_type_to_str(jcr->JobType), bnet_strerror(fd));
284 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
286 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
287 wait_for_storage_daemon_termination(jcr);
290 /* Return values from FD */
292 jcr->JobFiles = JobFiles;
293 jcr->Errors = Errors;
294 jcr->ReadBytes = ReadBytes;
295 jcr->JobBytes = JobBytes;
297 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
300 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
301 // jcr->JobStatus, jcr->SDJobStatus);
303 /* Return the first error status we find Dir, FD, or SD */
304 if (!fd_ok || is_bnet_error(fd)) {
305 jcr->FDJobStatus = JS_ErrorTerminated;
307 if (jcr->JobStatus != JS_Terminated) {
308 return jcr->JobStatus;
310 if (jcr->FDJobStatus != JS_Terminated) {
311 return jcr->FDJobStatus;
313 return jcr->SDJobStatus;
317 * Release resources allocated during backup.
319 void backup_cleanup(JCR *jcr, int TermCode)
321 char sdt[50], edt[50], schedt[50];
322 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
323 char term_code[100], fd_term_msg[100], sd_term_msg[100];
324 const char *term_msg;
328 double kbps, compression;
331 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
332 dequeue_messages(jcr); /* display any queued messages */
333 memset(&mr, 0, sizeof(mr));
334 memset(&cr, 0, sizeof(cr));
335 set_jcr_job_status(jcr, TermCode);
337 update_job_end_record(jcr); /* update database */
339 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
340 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
341 db_strerror(jcr->db));
342 set_jcr_job_status(jcr, JS_ErrorTerminated);
345 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
346 if (!db_get_client_record(jcr, jcr->db, &cr)) {
347 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
348 db_strerror(jcr->db));
351 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
352 if (!db_get_media_record(jcr, jcr->db, &mr)) {
353 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
354 mr.VolumeName, db_strerror(jcr->db));
355 set_jcr_job_status(jcr, JS_ErrorTerminated);
358 /* Now update the bootstrap file if any */
359 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
360 jcr->job->WriteBootstrap) {
364 char *fname = jcr->job->WriteBootstrap;
365 VOL_PARAMS *VolParams = NULL;
371 bpipe = open_bpipe(fname, 0, "w");
372 fd = bpipe ? bpipe->wfd : NULL;
374 /* ***FIXME*** handle BASE */
375 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
378 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
381 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
382 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
383 if (jcr->SDJobFiles != 0) {
384 set_jcr_job_status(jcr, JS_ErrorTerminated);
388 /* Start output with when and who wrote it */
389 bstrftimes(edt, sizeof(edt), time(NULL));
390 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
391 level_to_str(jcr->JobLevel), jcr->since);
392 for (int i=0; i < VolCount; i++) {
393 /* Write the record */
394 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
395 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
396 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
397 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
398 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
399 VolParams[i].EndFile);
400 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
401 VolParams[i].EndBlock);
402 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
403 VolParams[i].LastIndex);
415 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
416 "%s: ERR=%s\n"), fname, be.strerror());
417 set_jcr_job_status(jcr, JS_ErrorTerminated);
421 msg_type = M_INFO; /* by default INFO message */
422 switch (jcr->JobStatus) {
424 if (jcr->Errors || jcr->SDErrors) {
425 term_msg = _("Backup OK -- with warnings");
427 term_msg = _("Backup OK");
431 case JS_ErrorTerminated:
432 term_msg = _("*** Backup Error ***");
433 msg_type = M_ERROR; /* Generate error message */
434 if (jcr->store_bsock) {
435 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
436 if (jcr->SD_msg_chan) {
437 pthread_cancel(jcr->SD_msg_chan);
442 term_msg = _("Backup Canceled");
443 if (jcr->store_bsock) {
444 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
445 if (jcr->SD_msg_chan) {
446 pthread_cancel(jcr->SD_msg_chan);
451 term_msg = term_code;
452 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
455 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
456 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
457 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
458 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
462 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
464 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
466 * Note, if the job has erred, most likely it did not write any
467 * tape, so suppress this "error" message since in that case
468 * it is normal. Or look at it the other way, only for a
469 * normal exit should we complain about this error.
471 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
472 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
474 jcr->VolumeName[0] = 0; /* none */
477 if (jcr->ReadBytes == 0) {
478 bstrncpy(compress, "None", sizeof(compress));
480 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
481 if (compression < 0.5) {
482 bstrncpy(compress, "None", sizeof(compress));
484 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
487 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
488 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
490 // bmicrosleep(15, 0); /* for debugging SIGHUP */
492 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
495 " Backup Level: %s%s\n"
496 " Client: \"%s\" %s\n"
497 " FileSet: \"%s\" %s\n"
500 " Scheduled time: %s\n"
504 " FD Files Written: %s\n"
505 " SD Files Written: %s\n"
506 " FD Bytes Written: %s\n"
507 " SD Bytes Written: %s\n"
509 " Software Compression: %s\n"
510 " Volume name(s): %s\n"
511 " Volume Session Id: %d\n"
512 " Volume Session Time: %d\n"
513 " Last Volume Bytes: %s\n"
514 " Non-fatal FD errors: %d\n"
516 " FD termination status: %s\n"
517 " SD termination status: %s\n"
518 " Termination: %s\n\n"),
524 level_to_str(jcr->JobLevel), jcr->since,
525 jcr->client->hdr.name, cr.Uname,
526 jcr->fileset->hdr.name, jcr->FSCreateTime,
528 jcr->store->hdr.name,
533 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
534 edit_uint64_with_commas(jcr->SDJobFiles, ec4),
535 edit_uint64_with_commas(jcr->jr.JobBytes, ec2),
536 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
542 edit_uint64_with_commas(mr.VolBytes, ec3),
549 Dmsg0(100, "Leave backup_cleanup()\n");