3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Copyright (C) 2000-2005 Kern Sibbald
19 This program is free software; you can redistribute it and/or
20 modify it under the terms of the GNU General Public License
21 version 2 as amended with additional clauses defined in the
22 file LICENSE in the main source directory.
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 the file LICENSE for additional details.
35 /* Commands sent to File daemon */
36 static char backupcmd[] = "backup\n";
37 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
39 /* Responses received from File daemon */
40 static char OKbackup[] = "2000 OK backup\n";
41 static char OKstore[] = "2000 OK storage\n";
42 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
43 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
46 * Called here before the job is run to do the job
49 bool do_backup_init(JCR *jcr)
54 * Get the Pool record -- first apply any level defined pools
56 switch (jcr->JobLevel) {
59 jcr->pool = jcr->full_pool;
64 jcr->pool = jcr->inc_pool;
69 jcr->pool = jcr->dif_pool;
73 memset(&pr, 0, sizeof(pr));
74 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
76 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
77 /* Try to create the pool */
78 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
79 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
80 db_strerror(jcr->db));
83 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
84 if (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
85 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
86 db_strerror(jcr->db));
91 jcr->PoolId = pr.PoolId; /****FIXME**** this can go away */
92 jcr->jr.PoolId = pr.PoolId;
94 if (!get_or_create_fileset_record(jcr, &fsr)) {
97 bstrncpy(jcr->FSCreateTime, fsr.cCreateTime, sizeof(jcr->FSCreateTime));
99 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
101 Dmsg2(900, "cloned=%d run_cmds=%p\n", jcr->cloned, jcr->job->run_cmds);
102 if (!jcr->cloned && jcr->job->run_cmds) {
105 POOLMEM *cmd = get_pool_memory(PM_FNAME);
106 UAContext *ua = new_ua_context(jcr);
108 foreach_alist(runcmd, job->run_cmds) {
109 cmd = edit_job_codes(jcr, cmd, runcmd, "");
110 Mmsg(ua->cmd, "run %s cloned=yes", cmd);
111 Dmsg1(900, "=============== Clone cmd=%s\n", ua->cmd);
112 parse_ua_args(ua); /* parse command */
113 int stat = run_cmd(ua, ua->cmd);
115 Jmsg(jcr, M_ERROR, 0, _("Could not start clone job.\n"));
117 Jmsg(jcr, M_INFO, 0, _("Clone JobId %d started.\n"), stat);
121 free_pool_memory(cmd);
128 * Do a backup of the specified FileSet
130 * Returns: false on failure
133 bool do_backup(JCR *jcr)
136 int tls_need = BNET_TLS_NONE;
141 /* Print Job Start message */
142 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %u, Job=%s\n"),
143 jcr->JobId, jcr->Job);
145 set_jcr_job_status(jcr, JS_Running);
146 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
147 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
148 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
153 * Open a message channel connection with the Storage
154 * daemon. This is to let him know that our client
155 * will be contacting him for a backup session.
158 Dmsg0(110, "Open connection with storage daemon\n");
159 set_jcr_job_status(jcr, JS_WaitSD);
161 * Start conversation with Storage daemon
163 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
167 * Now start a job with the Storage daemon
169 if (!start_storage_daemon_job(jcr, jcr->storage, SD_APPEND)) {
173 * Now start a Storage daemon message thread. Note,
174 * this thread is used to provide the catalog services
175 * for the backup job, including inserting the attributes
176 * into the catalog. See catalog_update() in catreq.c
178 if (!start_storage_daemon_message_thread(jcr)) {
181 Dmsg0(150, "Storage daemon connection OK\n");
183 set_jcr_job_status(jcr, JS_WaitFD);
184 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
188 set_jcr_job_status(jcr, JS_Running);
189 fd = jcr->file_bsock;
191 if (!send_include_list(jcr)) {
195 if (!send_exclude_list(jcr)) {
199 if (!send_level_command(jcr)) {
204 * send Storage daemon address to the File daemon
207 if (store->SDDport == 0) {
208 store->SDDport = store->SDport;
211 /* TLS Requirement */
212 if (store->tls_enable) {
213 if (store->tls_require) {
214 tls_need = BNET_TLS_REQUIRED;
216 tls_need = BNET_TLS_OK;
220 bnet_fsend(fd, storaddr, store->address, store->SDDport,
222 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
227 if (!send_run_before_and_after_commands(jcr)) {
231 /* Send backup command */
232 bnet_fsend(fd, backupcmd);
233 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
237 /* Pickup Job termination data */
238 stat = wait_for_job_termination(jcr);
239 if (stat == JS_Terminated) {
240 backup_cleanup(jcr, stat);
248 * Here we wait for the File daemon to signal termination,
249 * then we wait for the Storage daemon. When both
250 * are done, we return the job status.
251 * Also used by restore.c
253 int wait_for_job_termination(JCR *jcr)
256 BSOCK *fd = jcr->file_bsock;
258 uint32_t JobFiles, Errors;
259 uint64_t ReadBytes, JobBytes;
261 set_jcr_job_status(jcr, JS_Running);
262 /* Wait for Client to terminate */
263 while ((n = bget_dirmsg(fd)) >= 0) {
264 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
265 &ReadBytes, &JobBytes, &Errors) == 5) {
267 set_jcr_job_status(jcr, jcr->FDJobStatus);
268 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
270 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
273 if (job_canceled(jcr)) {
277 if (is_bnet_error(fd)) {
278 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
279 job_type_to_str(jcr->JobType), bnet_strerror(fd));
281 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
283 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
284 wait_for_storage_daemon_termination(jcr);
287 /* Return values from FD */
289 jcr->JobFiles = JobFiles;
290 jcr->Errors = Errors;
291 jcr->ReadBytes = ReadBytes;
292 jcr->JobBytes = JobBytes;
294 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
297 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
298 // jcr->JobStatus, jcr->SDJobStatus);
300 /* Return the first error status we find Dir, FD, or SD */
301 if (!fd_ok || is_bnet_error(fd)) {
302 jcr->FDJobStatus = JS_ErrorTerminated;
304 if (jcr->JobStatus != JS_Terminated) {
305 return jcr->JobStatus;
307 if (jcr->FDJobStatus != JS_Terminated) {
308 return jcr->FDJobStatus;
310 return jcr->SDJobStatus;
314 * Release resources allocated during backup.
316 void backup_cleanup(JCR *jcr, int TermCode)
318 char sdt[50], edt[50], schedt[50];
319 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
320 char term_code[100], fd_term_msg[100], sd_term_msg[100];
321 const char *term_msg;
325 double kbps, compression;
328 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
329 dequeue_messages(jcr); /* display any queued messages */
330 memset(&mr, 0, sizeof(mr));
331 memset(&cr, 0, sizeof(cr));
332 set_jcr_job_status(jcr, TermCode);
334 update_job_end_record(jcr); /* update database */
336 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
337 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
338 db_strerror(jcr->db));
339 set_jcr_job_status(jcr, JS_ErrorTerminated);
342 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
343 if (!db_get_client_record(jcr, jcr->db, &cr)) {
344 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
345 db_strerror(jcr->db));
348 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
349 if (!db_get_media_record(jcr, jcr->db, &mr)) {
350 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
351 mr.VolumeName, db_strerror(jcr->db));
352 set_jcr_job_status(jcr, JS_ErrorTerminated);
355 /* Now update the bootstrap file if any */
356 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
357 jcr->job->WriteBootstrap) {
361 char *fname = jcr->job->WriteBootstrap;
362 VOL_PARAMS *VolParams = NULL;
368 bpipe = open_bpipe(fname, 0, "w");
369 fd = bpipe ? bpipe->wfd : NULL;
371 /* ***FIXME*** handle BASE */
372 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
375 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
378 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
379 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
380 if (jcr->SDJobFiles != 0) {
381 set_jcr_job_status(jcr, JS_ErrorTerminated);
385 /* Start output with when and who wrote it */
386 bstrftimes(edt, sizeof(edt), time(NULL));
387 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
388 level_to_str(jcr->JobLevel), jcr->since);
389 for (int i=0; i < VolCount; i++) {
390 /* Write the record */
391 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
392 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
393 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
394 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
395 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
396 VolParams[i].EndFile);
397 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
398 VolParams[i].EndBlock);
399 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
400 VolParams[i].LastIndex);
412 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
413 "%s: ERR=%s\n"), fname, be.strerror());
414 set_jcr_job_status(jcr, JS_ErrorTerminated);
418 msg_type = M_INFO; /* by default INFO message */
419 switch (jcr->JobStatus) {
421 if (jcr->Errors || jcr->SDErrors) {
422 term_msg = _("Backup OK -- with warnings");
424 term_msg = _("Backup OK");
428 case JS_ErrorTerminated:
429 term_msg = _("*** Backup Error ***");
430 msg_type = M_ERROR; /* Generate error message */
431 if (jcr->store_bsock) {
432 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
433 if (jcr->SD_msg_chan) {
434 pthread_cancel(jcr->SD_msg_chan);
439 term_msg = _("Backup Canceled");
440 if (jcr->store_bsock) {
441 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
442 if (jcr->SD_msg_chan) {
443 pthread_cancel(jcr->SD_msg_chan);
448 term_msg = term_code;
449 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
452 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
453 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
454 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
455 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
459 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
461 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
463 * Note, if the job has erred, most likely it did not write any
464 * tape, so suppress this "error" message since in that case
465 * it is normal. Or look at it the other way, only for a
466 * normal exit should we complain about this error.
468 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
469 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
471 jcr->VolumeName[0] = 0; /* none */
474 if (jcr->ReadBytes == 0) {
475 bstrncpy(compress, "None", sizeof(compress));
477 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
478 if (compression < 0.5) {
479 bstrncpy(compress, "None", sizeof(compress));
481 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
484 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
485 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
487 // bmicrosleep(15, 0); /* for debugging SIGHUP */
489 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
492 " Backup Level: %s%s\n"
493 " Client: \"%s\" %s\n"
494 " FileSet: \"%s\" %s\n"
497 " Scheduled time: %s\n"
501 " FD Files Written: %s\n"
502 " SD Files Written: %s\n"
503 " FD Bytes Written: %s\n"
504 " SD Bytes Written: %s\n"
506 " Software Compression: %s\n"
507 " Volume name(s): %s\n"
508 " Volume Session Id: %d\n"
509 " Volume Session Time: %d\n"
510 " Last Volume Bytes: %s\n"
511 " Non-fatal FD errors: %d\n"
513 " FD termination status: %s\n"
514 " SD termination status: %s\n"
515 " Termination: %s\n\n"),
521 level_to_str(jcr->JobLevel), jcr->since,
522 jcr->client->hdr.name, cr.Uname,
523 jcr->fileset->hdr.name, jcr->FSCreateTime,
525 jcr->store->hdr.name,
530 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
531 edit_uint64_with_commas(jcr->SDJobFiles, ec4),
532 edit_uint64_with_commas(jcr->jr.JobBytes, ec2),
533 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
539 edit_uint64_with_commas(mr.VolBytes, ec3),
546 Dmsg0(100, "Leave backup_cleanup()\n");