3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
17 Bacula® - The Network Backup Solution
19 Copyright (C) 2000-2006 Free Software Foundation Europe e.V.
21 The main author of Bacula is Kern Sibbald, with contributions from
22 many others, a complete list can be found in the file AUTHORS.
23 This program is Free Software; you can redistribute it and/or
24 modify it under the terms of version two of the GNU General Public
25 License as published by the Free Software Foundation plus additions
26 that are listed in the file LICENSE.
28 This program is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31 General Public License for more details.
33 You should have received a copy of the GNU General Public License
34 along with this program; if not, write to the Free Software
35 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
38 Bacula® is a registered trademark of John Walker.
39 The licensor of Bacula is the Free Software Foundation Europe
40 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
41 Switzerland, email:ftf@fsfeurope.org.
48 /* Commands sent to File daemon */
49 static char backupcmd[] = "backup\n";
50 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
52 /* Responses received from File daemon */
53 static char OKbackup[] = "2000 OK backup\n";
54 static char OKstore[] = "2000 OK storage\n";
55 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
56 "ReadBytes=%lld JobBytes=%lld Errors=%u\n";
59 * Called here before the job is run to do the job
62 bool do_backup_init(JCR *jcr)
65 free_rstorage(jcr); /* we don't read so release */
67 if (!get_or_create_fileset_record(jcr)) {
72 * Get definitive Job level and since time
74 get_level_since_time(jcr, jcr->since, sizeof(jcr->since));
76 apply_pool_overrides(jcr);
78 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->hdr.name);
79 if (jcr->jr.PoolId == 0) {
83 /* If pool storage specified, use it instead of job storage */
84 copy_wstorage(jcr, jcr->pool->storage, _("Pool resource"));
87 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
91 create_clones(jcr); /* run any clone jobs */
97 * Do a backup of the specified FileSet
99 * Returns: false on failure
102 bool do_backup(JCR *jcr)
105 int tls_need = BNET_TLS_NONE;
111 /* Print Job Start message */
112 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"),
113 edit_uint64(jcr->JobId, ed1), jcr->Job);
115 set_jcr_job_status(jcr, JS_Running);
116 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
117 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
118 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
123 * Open a message channel connection with the Storage
124 * daemon. This is to let him know that our client
125 * will be contacting him for a backup session.
128 Dmsg0(110, "Open connection with storage daemon\n");
129 set_jcr_job_status(jcr, JS_WaitSD);
131 * Start conversation with Storage daemon
133 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
137 * Now start a job with the Storage daemon
139 if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) {
144 * Start the job prior to starting the message thread below
145 * to avoid two threads from using the BSOCK structure at
148 if (!bnet_fsend(jcr->store_bsock, "run")) {
153 * Now start a Storage daemon message thread. Note,
154 * this thread is used to provide the catalog services
155 * for the backup job, including inserting the attributes
156 * into the catalog. See catalog_update() in catreq.c
158 if (!start_storage_daemon_message_thread(jcr)) {
161 Dmsg0(150, "Storage daemon connection OK\n");
163 set_jcr_job_status(jcr, JS_WaitFD);
164 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
168 set_jcr_job_status(jcr, JS_Running);
169 fd = jcr->file_bsock;
171 if (!send_include_list(jcr)) {
175 if (!send_exclude_list(jcr)) {
179 if (!send_level_command(jcr)) {
184 * send Storage daemon address to the File daemon
187 if (store->SDDport == 0) {
188 store->SDDport = store->SDport;
191 /* TLS Requirement */
192 if (store->tls_enable) {
193 if (store->tls_require) {
194 tls_need = BNET_TLS_REQUIRED;
196 tls_need = BNET_TLS_OK;
200 bnet_fsend(fd, storaddr, store->address, store->SDDport, tls_need);
201 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
205 if (!send_runscripts_commands(jcr)) {
210 * We re-update the job start record so that the start
211 * time is set after the run before job. This avoids
212 * that any files created by the run before job will
213 * be saved twice. They will be backed up in the current
214 * job, but not in the next one unless they are changed.
215 * Without this, they will be backed up in this job and
216 * in the next job run because in that case, their date
217 * is after the start of this run.
219 jcr->start_time = time(NULL);
220 jcr->jr.StartTime = jcr->start_time;
221 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
222 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
225 /* Send backup command */
226 bnet_fsend(fd, backupcmd);
227 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
231 /* Pickup Job termination data */
232 stat = wait_for_job_termination(jcr);
233 if (stat == JS_Terminated) {
234 backup_cleanup(jcr, stat);
239 /* Come here only after starting SD thread */
241 set_jcr_job_status(jcr, JS_ErrorTerminated);
242 Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count());
243 wait_for_storage_daemon_termination(jcr);
244 Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count());
250 * Here we wait for the File daemon to signal termination,
251 * then we wait for the Storage daemon. When both
252 * are done, we return the job status.
253 * Also used by restore.c
255 int wait_for_job_termination(JCR *jcr)
258 BSOCK *fd = jcr->file_bsock;
260 uint32_t JobFiles, Errors;
261 uint64_t ReadBytes, JobBytes;
263 set_jcr_job_status(jcr, JS_Running);
264 /* Wait for Client to terminate */
265 while ((n = bget_dirmsg(fd)) >= 0) {
266 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
267 &ReadBytes, &JobBytes, &Errors) == 5) {
269 set_jcr_job_status(jcr, jcr->FDJobStatus);
270 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
272 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
275 if (job_canceled(jcr)) {
279 if (is_bnet_error(fd)) {
280 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
281 job_type_to_str(jcr->JobType), bnet_strerror(fd));
283 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
285 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
286 wait_for_storage_daemon_termination(jcr);
289 /* Return values from FD */
291 jcr->JobFiles = JobFiles;
292 jcr->Errors = Errors;
293 jcr->ReadBytes = ReadBytes;
294 jcr->JobBytes = JobBytes;
296 Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n"));
299 // Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
300 // jcr->JobStatus, jcr->SDJobStatus);
302 /* Return the first error status we find Dir, FD, or SD */
303 if (!fd_ok || is_bnet_error(fd)) {
304 jcr->FDJobStatus = JS_ErrorTerminated;
306 if (jcr->JobStatus != JS_Terminated) {
307 return jcr->JobStatus;
309 if (jcr->FDJobStatus != JS_Terminated) {
310 return jcr->FDJobStatus;
312 return jcr->SDJobStatus;
316 * Release resources allocated during backup.
318 void backup_cleanup(JCR *jcr, int TermCode)
320 char sdt[50], edt[50], schedt[50];
321 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
322 char ec6[30], ec7[30], ec8[30], elapsed[50];
323 char term_code[100], fd_term_msg[100], sd_term_msg[100];
324 const char *term_msg;
328 double kbps, compression;
331 Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode);
332 dequeue_messages(jcr); /* display any queued messages */
333 memset(&mr, 0, sizeof(mr));
334 memset(&cr, 0, sizeof(cr));
335 set_jcr_job_status(jcr, TermCode);
337 update_job_end_record(jcr); /* update database */
339 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
340 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
341 db_strerror(jcr->db));
342 set_jcr_job_status(jcr, JS_ErrorTerminated);
345 bstrncpy(cr.Name, jcr->client->hdr.name, sizeof(cr.Name));
346 if (!db_get_client_record(jcr, jcr->db, &cr)) {
347 Jmsg(jcr, M_WARNING, 0, _("Error getting client record for stats: %s"),
348 db_strerror(jcr->db));
351 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
352 if (!db_get_media_record(jcr, jcr->db, &mr)) {
353 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
354 mr.VolumeName, db_strerror(jcr->db));
355 set_jcr_job_status(jcr, JS_ErrorTerminated);
358 update_bootstrap_file(jcr);
360 msg_type = M_INFO; /* by default INFO message */
361 switch (jcr->JobStatus) {
363 if (jcr->Errors || jcr->SDErrors) {
364 term_msg = _("Backup OK -- with warnings");
366 term_msg = _("Backup OK");
370 case JS_ErrorTerminated:
371 term_msg = _("*** Backup Error ***");
372 msg_type = M_ERROR; /* Generate error message */
373 if (jcr->store_bsock) {
374 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
375 if (jcr->SD_msg_chan) {
376 pthread_cancel(jcr->SD_msg_chan);
381 term_msg = _("Backup Canceled");
382 if (jcr->store_bsock) {
383 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
384 if (jcr->SD_msg_chan) {
385 pthread_cancel(jcr->SD_msg_chan);
390 term_msg = term_code;
391 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
394 bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime);
395 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
396 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
397 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
401 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
403 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
405 * Note, if the job has erred, most likely it did not write any
406 * tape, so suppress this "error" message since in that case
407 * it is normal. Or look at it the other way, only for a
408 * normal exit should we complain about this error.
410 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
411 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
413 jcr->VolumeName[0] = 0; /* none */
416 if (jcr->ReadBytes == 0) {
417 bstrncpy(compress, "None", sizeof(compress));
419 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
420 if (compression < 0.5) {
421 bstrncpy(compress, "None", sizeof(compress));
423 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
426 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
427 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
429 // bmicrosleep(15, 0); /* for debugging SIGHUP */
431 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
434 " Backup Level: %s%s\n"
435 " Client: \"%s\" %s\n"
436 " FileSet: \"%s\" %s\n"
437 " Pool: \"%s\" (From %s)\n"
438 " Storage: \"%s\" (From %s)\n"
439 " Scheduled time: %s\n"
442 " Elapsed time: %s\n"
444 " FD Files Written: %s\n"
445 " SD Files Written: %s\n"
446 " FD Bytes Written: %s (%sB)\n"
447 " SD Bytes Written: %s (%sB)\n"
449 " Software Compression: %s\n"
450 " Volume name(s): %s\n"
451 " Volume Session Id: %d\n"
452 " Volume Session Time: %d\n"
453 " Last Volume Bytes: %s (%sB)\n"
454 " Non-fatal FD errors: %d\n"
456 " FD termination status: %s\n"
457 " SD termination status: %s\n"
458 " Termination: %s\n\n"),
464 level_to_str(jcr->JobLevel), jcr->since,
465 jcr->client->name(), cr.Uname,
466 jcr->fileset->name(), jcr->FSCreateTime,
467 jcr->pool->name(), jcr->pool_source,
468 jcr->wstore->name(), jcr->wstore_source,
472 edit_utime(RunTime, elapsed, sizeof(elapsed)),
474 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
475 edit_uint64_with_commas(jcr->SDJobFiles, ec2),
476 edit_uint64_with_commas(jcr->jr.JobBytes, ec3),
477 edit_uint64_with_suffix(jcr->jr.JobBytes, ec4),
478 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
479 edit_uint64_with_suffix(jcr->SDJobBytes, ec6),
485 edit_uint64_with_commas(mr.VolBytes, ec7),
486 edit_uint64_with_suffix(mr.VolBytes, ec8),
493 Dmsg0(100, "Leave backup_cleanup()\n");
496 void update_bootstrap_file(JCR *jcr)
498 /* Now update the bootstrap file if any */
499 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
500 jcr->job->WriteBootstrap) {
504 POOLMEM *fname = get_pool_memory(PM_FNAME);
505 fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "");
507 VOL_PARAMS *VolParams = NULL;
513 bpipe = open_bpipe(fname+1, 0, "w"); /* skip first char "|" */
514 fd = bpipe ? bpipe->wfd : NULL;
516 /* ***FIXME*** handle BASE */
517 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+b":"a+b");
520 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
523 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to "
524 "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db));
525 if (jcr->SDJobFiles != 0) {
526 set_jcr_job_status(jcr, JS_ErrorTerminated);
530 /* Start output with when and who wrote it */
531 bstrftimes(edt, sizeof(edt), time(NULL));
532 fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job,
533 level_to_str(jcr->JobLevel), jcr->since);
534 for (int i=0; i < VolCount; i++) {
535 /* Write the record */
536 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
537 fprintf(fd, "MediaType=\"%s\"\n", VolParams[i].MediaType);
538 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
539 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
540 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
541 VolParams[i].EndFile);
542 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
543 VolParams[i].EndBlock);
544 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
545 VolParams[i].LastIndex);
557 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
558 "%s: ERR=%s\n"), fname, be.strerror());
559 set_jcr_job_status(jcr, JS_ErrorTerminated);
561 free_pool_memory(fname);