3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
18 Copyright (C) 2000-2005 Kern Sibbald
20 This program is free software; you can redistribute it and/or
21 modify it under the terms of the GNU General Public License as
22 published by the Free Software Foundation; either version 2 of
23 the License, or (at your option) any later version.
25 This program is distributed in the hope that it will be useful,
26 but WITHOUT ANY WARRANTY; without even the implied warranty of
27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 General Public License for more details.
30 You should have received a copy of the GNU General Public
31 License along with this program; if not, write to the Free
32 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
41 /* Commands sent to File daemon */
42 static char backupcmd[] = "backup\n";
43 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
45 /* Responses received from File daemon */
46 static char OKbackup[] = "2000 OK backup\n";
47 static char OKstore[] = "2000 OK storage\n";
48 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
49 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
52 * Called here before the job is run to do the job
55 bool do_backup_init(JCR *jcr)
60 * Get the Pool record -- first apply any level defined pools
62 switch (jcr->JobLevel) {
65 jcr->pool = jcr->full_pool;
70 jcr->pool = jcr->inc_pool;
75 jcr->pool = jcr->dif_pool;
79 memset(&pr, 0, sizeof(pr));
80 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
82 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
83 /* Try to create the pool */
84 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
85 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
86 db_strerror(jcr->db));
89 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
90 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
91 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
92 db_strerror(jcr->db));
97 jcr->PoolId = pr.PoolId; /****FIXME**** this can go away */
98 jcr->jr.PoolId = pr.PoolId;
100 if (!get_or_create_fileset_record(jcr, &fsr)) {
103 bstrncpy(jcr->FSCreateTime, fsr.cCreateTime, sizeof(jcr->FSCreateTime));
105 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
107 Dmsg2(900, "cloned=%d run_cmds=%p\n", jcr->cloned, jcr->job->run_cmds);
108 if (!jcr->cloned && jcr->job->run_cmds) {
111 POOLMEM *cmd = get_pool_memory(PM_FNAME);
112 UAContext *ua = new_ua_context(jcr);
114 foreach_alist(runcmd, job->run_cmds) {
115 cmd = edit_job_codes(jcr, cmd, runcmd, "");
116 Mmsg(ua->cmd, "run %s cloned=yes", cmd);
117 Dmsg1(900, "=============== Clone cmd=%s\n", ua->cmd);
118 parse_ua_args(ua); /* parse command */
119 int stat = run_cmd(ua, ua->cmd);
121 Jmsg(jcr, M_ERROR, 0, _("Could not start clone job.\n"));
123 Jmsg(jcr, M_INFO, 0, _("Clone JobId %d started.\n"), stat);
127 free_pool_memory(cmd);
134 * Do a backup of the specified FileSet
136 * Returns: false on failure
139 bool do_backup(JCR *jcr)
142 int tls_need = BNET_TLS_NONE;
147 /* Print Job Start message */
148 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %u, Job=%s\n"),
149 jcr->JobId, jcr->Job);
151 set_jcr_job_status(jcr, JS_Running);
152 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
153 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
154 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
159 * Open a message channel connection with the Storage
160 * daemon. This is to let him know that our client
161 * will be contacting him for a backup session.
164 Dmsg0(110, "Open connection with storage daemon\n");
165 set_jcr_job_status(jcr, JS_WaitSD);
167 * Start conversation with Storage daemon
169 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
173 * Now start a job with the Storage daemon
175 if (!start_storage_daemon_job(jcr, jcr->storage, SD_APPEND)) {
179 * Now start a Storage daemon message thread
181 if (!start_storage_daemon_message_thread(jcr)) {
184 Dmsg0(150, "Storage daemon connection OK\n");
186 set_jcr_job_status(jcr, JS_WaitFD);
187 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
191 set_jcr_job_status(jcr, JS_Running);
192 fd = jcr->file_bsock;
194 if (!send_include_list(jcr)) {
198 if (!send_exclude_list(jcr)) {
202 if (!send_level_command(jcr)) {
207 * send Storage daemon address to the File daemon
210 if (store->SDDport == 0) {
211 store->SDDport = store->SDport;
215 /* TLS Requirement */
216 if (store->tls_enable) {
217 if (store->tls_require) {
218 tls_need = BNET_TLS_REQUIRED;
220 tls_need = BNET_TLS_OK;
225 bnet_fsend(fd, storaddr, store->address, store->SDDport,
227 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
232 if (!send_run_before_and_after_commands(jcr)) {
236 /* Send backup command */
237 bnet_fsend(fd, backupcmd);
238 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
242 /* Pickup Job termination data */
243 stat = wait_for_job_termination(jcr);
244 if (stat == JS_Terminated) {
245 backup_cleanup(jcr, stat);
253 * Here we wait for the File daemon to signal termination,
254 * then we wait for the Storage daemon. When both
255 * are done, we return the job status.
256 * Also used by restore.c
258 int wait_for_job_termination(JCR *jcr)
261 BSOCK *fd = jcr->file_bsock;
263 uint32_t JobFiles, Errors;
264 uint64_t ReadBytes, JobBytes;
266 set_jcr_job_status(jcr, JS_Running);
267 /* Wait for Client to terminate */
268 while ((n = bget_dirmsg(fd)) >= 0) {
269 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
270 &ReadBytes, &JobBytes, &Errors) == 5) {
272 set_jcr_job_status(jcr, jcr->FDJobStatus);
273 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
275 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
278 if (job_canceled(jcr)) {
282 if (is_bnet_error(fd)) {
283 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
284 job_type_to_str(jcr->JobType), bnet_strerror(fd));
286 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
288 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
289 wait_for_storage_daemon_termination(jcr);
292 /* Return values from FD */
294 jcr->JobFiles = JobFiles;
295 jcr->Errors = Errors;
296 jcr->ReadBytes = ReadBytes;
297 jcr->JobBytes = JobBytes;
299 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
302 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
303 // jcr->JobStatus, jcr->SDJobStatus);
305 /* Return the first error status we find Dir, FD, or SD */
306 if (!fd_ok || is_bnet_error(fd)) {
307 jcr->FDJobStatus = JS_ErrorTerminated;
309 if (jcr->JobStatus != JS_Terminated) {
310 return jcr->JobStatus;
312 if (jcr->FDJobStatus != JS_Terminated) {
313 return jcr->FDJobStatus;
315 return jcr->SDJobStatus;
319 * Release resources allocated during backup.
321 void backup_cleanup(JCR *jcr, int TermCode)
323 char sdt[50], edt[50], schedt[50];
324 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
325 char term_code[100], fd_term_msg[100], sd_term_msg[100];
326 const char *term_msg;
329 double kbps, compression;
332 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
333 dequeue_messages(jcr); /* display any queued messages */
334 memset(&mr, 0, sizeof(mr));
335 set_jcr_job_status(jcr, TermCode);
337 update_job_end_record(jcr); /* update database */
339 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
340 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
341 db_strerror(jcr->db));
342 set_jcr_job_status(jcr, JS_ErrorTerminated);
345 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
346 if (!db_get_media_record(jcr, jcr->db, &mr)) {
347 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
348 mr.VolumeName, db_strerror(jcr->db));
349 set_jcr_job_status(jcr, JS_ErrorTerminated);
352 /* Now update the bootstrap file if any */
353 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
354 jcr->job->WriteBootstrap) {
358 char *fname = jcr->job->WriteBootstrap;
359 VOL_PARAMS *VolParams = NULL;
365 bpipe = open_bpipe(fname, 0, "w");
366 fd = bpipe ? bpipe->wfd : NULL;
368 /* ***FIXME*** handle BASE */
369 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
372 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
375 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
376 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
377 if (jcr->SDJobFiles != 0) {
378 set_jcr_job_status(jcr, JS_ErrorTerminated);
382 for (int i=0; i < VolCount; i++) {
383 /* Write the record */
384 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
385 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
386 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
387 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
388 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
389 VolParams[i].EndFile);
390 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
391 VolParams[i].EndBlock);
392 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
393 VolParams[i].LastIndex);
405 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
406 "%s: ERR=%s\n"), fname, be.strerror());
407 set_jcr_job_status(jcr, JS_ErrorTerminated);
411 msg_type = M_INFO; /* by default INFO message */
412 switch (jcr->JobStatus) {
414 if (jcr->Errors || jcr->SDErrors) {
415 term_msg = _("Backup OK -- with warnings");
417 term_msg = _("Backup OK");
421 case JS_ErrorTerminated:
422 term_msg = _("*** Backup Error ***");
423 msg_type = M_ERROR; /* Generate error message */
424 if (jcr->store_bsock) {
425 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
426 if (jcr->SD_msg_chan) {
427 pthread_cancel(jcr->SD_msg_chan);
432 term_msg = _("Backup Canceled");
433 if (jcr->store_bsock) {
434 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
435 if (jcr->SD_msg_chan) {
436 pthread_cancel(jcr->SD_msg_chan);
441 term_msg = term_code;
442 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
445 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
446 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
447 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
448 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
452 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
454 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
456 * Note, if the job has erred, most likely it did not write any
457 * tape, so suppress this "error" message since in that case
458 * it is normal. Or look at it the other way, only for a
459 * normal exit should we complain about this error.
461 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
462 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
464 jcr->VolumeName[0] = 0; /* none */
467 if (jcr->ReadBytes == 0) {
468 bstrncpy(compress, "None", sizeof(compress));
470 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
471 if (compression < 0.5) {
472 bstrncpy(compress, "None", sizeof(compress));
474 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
477 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
478 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
480 // bmicrosleep(15, 0); /* for debugging SIGHUP */
482 Jmsg(jcr, msg_type, 0, _("Bacula " VERSION " (" LSMDATE "): %s\n"
485 " Backup Level: %s%s\n"
487 " FileSet: \"%s\" %s\n"
490 " Scheduled time: %s\n"
494 " FD Files Written: %s\n"
495 " SD Files Written: %s\n"
496 " FD Bytes Written: %s\n"
497 " SD Bytes Written: %s\n"
499 " Software Compression: %s\n"
500 " Volume name(s): %s\n"
501 " Volume Session Id: %d\n"
502 " Volume Session Time: %d\n"
503 " Last Volume Bytes: %s\n"
504 " Non-fatal FD errors: %d\n"
506 " FD termination status: %s\n"
507 " SD termination status: %s\n"
508 " Termination: %s\n\n"),
512 level_to_str(jcr->JobLevel), jcr->since,
513 jcr->client->hdr.name,
514 jcr->fileset->hdr.name, jcr->FSCreateTime,
516 jcr->store->hdr.name,
521 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
522 edit_uint64_with_commas(jcr->SDJobFiles, ec4),
523 edit_uint64_with_commas(jcr->jr.JobBytes, ec2),
524 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
530 edit_uint64_with_commas(mr.VolBytes, ec3),
537 Dmsg0(100, "Leave backup_cleanup()\n");