3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
18 Copyright (C) 2000-2003 Kern Sibbald and John Walker
20 This program is free software; you can redistribute it and/or
21 modify it under the terms of the GNU General Public License as
22 published by the Free Software Foundation; either version 2 of
23 the License, or (at your option) any later version.
25 This program is distributed in the hope that it will be useful,
26 but WITHOUT ANY WARRANTY; without even the implied warranty of
27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 General Public License for more details.
30 You should have received a copy of the GNU General Public
31 License along with this program; if not, write to the Free
32 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
41 /* Commands sent to File daemon */
42 static char backupcmd[] = "backup\n";
43 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
44 static char levelcmd[] = "level = %s%s\n";
46 /* Responses received from File daemon */
47 static char OKbackup[] = "2000 OK backup\n";
48 static char OKstore[] = "2000 OK storage\n";
49 static char OKlevel[] = "2000 OK level\n";
50 static char EndBackup[] = "2801 End Backup Job TermCode=%d JobFiles=%u "
51 "ReadBytes=%" lld " JobBytes=%" lld " Errors=%u\n";
54 /* Forward referenced functions */
55 static void backup_cleanup(JCR *jcr, int TermCode, char *since);
56 static int wait_for_job_termination(JCR *jcr);
58 /* External functions */
61 * Do a backup of the specified FileSet
63 * Returns: 0 on failure
66 int do_backup(JCR *jcr)
68 char since[MAXSTRING];
76 if (!get_or_create_client_record(jcr)) {
77 Jmsg(jcr, M_ERROR, 0, _("Could not get/create Client record. ERR=%s\n"),
78 db_strerror(jcr->db));
83 * Get or Create FileSet record
85 memset(&fsr, 0, sizeof(fsr));
86 strcpy(fsr.FileSet, jcr->fileset->hdr.name);
87 if (jcr->fileset->have_MD5) {
88 struct MD5Context md5c;
89 unsigned char signature[16];
90 memcpy(&md5c, &jcr->fileset->md5c, sizeof(md5c));
91 MD5Final(signature, &md5c);
92 bin_to_base64(fsr.MD5, (char *)signature, 16); /* encode 16 bytes */
93 strcpy(jcr->fileset->MD5, fsr.MD5);
95 Jmsg(jcr, M_WARNING, 0, _("FileSet MD5 signature not found.\n"));
97 if (!db_create_fileset_record(jcr, jcr->db, &fsr)) {
98 Jmsg(jcr, M_ERROR, 0, _("Could not create FileSet record. ERR=%s\n"),
99 db_strerror(jcr->db));
102 jcr->jr.FileSetId = fsr.FileSetId;
103 Dmsg2(119, "Created FileSet %s record %d\n", jcr->fileset->hdr.name,
107 * FULL backup job to get the time/date for a
108 * differential or incremental save.
110 jcr->stime = get_pool_memory(PM_MESSAGE);
113 switch (jcr->JobLevel) {
116 /* Look up start time of last job */
118 if (!db_find_job_start_time(jcr, jcr->db, &jcr->jr, &jcr->stime)) {
119 Jmsg(jcr, M_INFO, 0, _("No prior or suitable FULL backup found. Doing FULL backup.\n"));
120 jcr->JobLevel = jcr->jr.Level = L_FULL;
122 strcpy(since, ", since=");
123 bstrncat(since, jcr->stime, sizeof(since));
125 Dmsg1(115, "Last start time = %s\n", jcr->stime);
129 jcr->jr.JobId = jcr->JobId;
130 jcr->jr.StartTime = jcr->start_time;
131 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
132 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
136 jcr->fname = (char *) get_pool_memory(PM_FNAME);
138 /* Print Job Start message */
139 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %d, Job=%s\n"),
140 jcr->JobId, jcr->Job);
143 * Get the Pool record
145 memset(&pr, 0, sizeof(pr));
146 strcpy(pr.Name, jcr->pool->hdr.name);
147 while (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
148 /* Try to create the pool */
149 if (create_pool(jcr, jcr->db, jcr->pool, 1) < 0) {
150 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
151 db_strerror(jcr->db));
154 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
157 jcr->PoolId = pr.PoolId; /****FIXME**** this can go away */
158 jcr->jr.PoolId = pr.PoolId;
161 * Open a message channel connection with the Storage
162 * daemon. This is to let him know that our client
163 * will be contacting him for a backup session.
166 Dmsg0(110, "Open connection with storage daemon\n");
167 set_jcr_job_status(jcr, JS_WaitSD);
169 * Start conversation with Storage daemon
171 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
175 * Now start a job with the Storage daemon
177 if (!start_storage_daemon_job(jcr)) {
181 * Now start a Storage daemon message thread
183 if (!start_storage_daemon_message_thread(jcr)) {
186 Dmsg0(150, "Storage daemon connection OK\n");
188 set_jcr_job_status(jcr, JS_WaitFD);
189 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
193 set_jcr_job_status(jcr, JS_Running);
194 fd = jcr->file_bsock;
196 if (!send_include_list(jcr)) {
200 if (!send_exclude_list(jcr)) {
205 * send Storage daemon address to the File daemon
207 if (jcr->store->SDDport == 0) {
208 jcr->store->SDDport = jcr->store->SDport;
210 bnet_fsend(fd, storaddr, jcr->store->address, jcr->store->SDDport,
211 jcr->store->enable_ssl);
212 if (!response(fd, OKstore, "Storage", 1)) {
217 * Send Level command to File daemon
219 switch (jcr->JobLevel) {
221 bnet_fsend(fd, levelcmd, "base", " ");
224 bnet_fsend(fd, levelcmd, "full", " ");
228 bnet_fsend(fd, levelcmd, "since ", jcr->stime);
229 free_pool_memory(jcr->stime);
234 Jmsg2(jcr, M_FATAL, 0, _("Unimplemented backup level %d %c\n"),
235 jcr->JobLevel, jcr->JobLevel);
238 Dmsg1(120, ">filed: %s", fd->msg);
239 if (!response(fd, OKlevel, "Level", 1)) {
243 /* Send backup command */
244 bnet_fsend(fd, backupcmd);
245 if (!response(fd, OKbackup, "backup", 1)) {
249 /* Pickup Job termination data */
250 stat = wait_for_job_termination(jcr);
251 backup_cleanup(jcr, stat, since);
256 free_pool_memory(jcr->stime);
259 backup_cleanup(jcr, JS_ErrorTerminated, since);
265 * Here we wait for the File daemon to signal termination,
266 * then we wait for the Storage daemon. When both
267 * are done, we return the job status.
269 static int wait_for_job_termination(JCR *jcr)
272 BSOCK *fd = jcr->file_bsock;
275 set_jcr_job_status(jcr, JS_Running);
276 /* Wait for Client to terminate */
277 while ((n = bget_dirmsg(fd)) >= 0) {
278 if (sscanf(fd->msg, EndBackup, &jcr->FDJobStatus, &jcr->JobFiles,
279 &jcr->ReadBytes, &jcr->JobBytes, &jcr->Errors) == 5) {
281 set_jcr_job_status(jcr, jcr->FDJobStatus);
282 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
284 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
287 if (job_canceled(jcr)) {
291 if (is_bnet_error(fd)) {
292 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during BACKUP: ERR=%s\n"),
295 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
297 wait_for_storage_daemon_termination(jcr);
299 /* Return the first error status we find FD or SD */
300 if (fd_ok && jcr->JobStatus != JS_Terminated) {
301 return jcr->JobStatus;
303 if (!fd_ok || is_bnet_error(fd)) {
304 return JS_ErrorTerminated;
306 return jcr->SDJobStatus;
310 * Release resources allocated during backup.
312 static void backup_cleanup(JCR *jcr, int TermCode, char *since)
314 char sdt[50], edt[50];
315 char ec1[30], ec2[30], ec3[30], compress[50];
316 char term_code[100], fd_term_msg[100], sd_term_msg[100];
320 double kbps, compression;
323 Dmsg0(100, "Enter backup_cleanup()\n");
324 memset(&mr, 0, sizeof(mr));
325 set_jcr_job_status(jcr, TermCode);
327 update_job_end_record(jcr); /* update database */
329 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
330 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
331 db_strerror(jcr->db));
332 set_jcr_job_status(jcr, JS_ErrorTerminated);
335 strcpy(mr.VolumeName, jcr->VolumeName);
336 if (!db_get_media_record(jcr, jcr->db, &mr)) {
337 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
338 mr.VolumeName, db_strerror(jcr->db));
339 set_jcr_job_status(jcr, JS_ErrorTerminated);
342 /* Now update the bootstrap file if any */
343 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
344 jcr->job->WriteBootstrap) {
348 char *fname = jcr->job->WriteBootstrap;
349 VOL_PARAMS *VolParams = NULL;
355 bpipe = open_bpipe(fname, 0, "w");
356 fd = bpipe ? bpipe->wfd : NULL;
358 /* ***FIXME*** handle BASE */
359 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
362 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
365 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters. ERR=%s\n"),
366 db_strerror(jcr->db));
368 for (int i=0; i < VolCount; i++) {
369 /* Write the record */
370 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
371 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
372 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
373 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
374 VolParams[i].EndFile);
375 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
376 VolParams[i].EndBlock);
377 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
378 VolParams[i].LastIndex);
389 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
390 "%s: ERR=%s\n"), fname, strerror(errno));
391 set_jcr_job_status(jcr, JS_ErrorTerminated);
395 msg_type = M_INFO; /* by default INFO message */
396 switch (jcr->JobStatus) {
398 term_msg = _("Backup OK");
401 case JS_ErrorTerminated:
402 term_msg = _("*** Backup Error ***");
403 msg_type = M_ERROR; /* Generate error message */
404 if (jcr->store_bsock) {
405 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
406 pthread_cancel(jcr->SD_msg_chan);
410 term_msg = _("Backup Canceled");
411 if (jcr->store_bsock) {
412 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
413 pthread_cancel(jcr->SD_msg_chan);
417 term_msg = term_code;
418 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
421 bstrftime(sdt, sizeof(sdt), jcr->jr.StartTime);
422 bstrftime(edt, sizeof(edt), jcr->jr.EndTime);
423 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
427 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
429 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
431 * Note, if the job has erred, most likely it did not write any
432 * tape, so suppress this "error" message since in that case
433 * it is normal. Or look at it the other way, only for a
434 * normal exit should we complain about this error.
436 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
437 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
439 jcr->VolumeName[0] = 0; /* none */
442 if (jcr->ReadBytes == 0) {
443 strcpy(compress, "None");
445 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
446 if (compression < 0.5) {
447 strcpy(compress, "None");
449 sprintf(compress, "%.1f %%", (float)compression);
452 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
453 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
455 Jmsg(jcr, msg_type, 0, _("Bacula " VERSION " (" LSMDATE "): %s\n\
459 Backup Level: %s%s\n\
466 Software Compression: %s\n\
467 Volume names(s): %s\n\
468 Volume Session Id: %d\n\
469 Volume Session Time: %d\n\
470 Last Volume Bytes: %s\n\
471 Non-fatal FD errors: %d\n\
472 FD termination status: %s\n\
473 SD termination status: %s\n\
474 Termination: %s\n\n"),
478 jcr->fileset->hdr.name,
479 level_to_str(jcr->JobLevel), since,
480 jcr->client->hdr.name,
483 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
484 edit_uint64_with_commas(jcr->jr.JobBytes, ec2),
490 edit_uint64_with_commas(mr.VolBytes, ec3),
496 Dmsg0(100, "Leave backup_cleanup()\n");