3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * Basic tasks done here:
8 * Open DB and create records for this job.
9 * Open Message Channel with Storage daemon to tell him a job will be starting.
10 * Open connection with File daemon and pass him commands
12 * When the File daemon finishes the job, update the DB.
18 Copyright (C) 2000-2003 Kern Sibbald and John Walker
20 This program is free software; you can redistribute it and/or
21 modify it under the terms of the GNU General Public License as
22 published by the Free Software Foundation; either version 2 of
23 the License, or (at your option) any later version.
25 This program is distributed in the hope that it will be useful,
26 but WITHOUT ANY WARRANTY; without even the implied warranty of
27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28 General Public License for more details.
30 You should have received a copy of the GNU General Public
31 License along with this program; if not, write to the Free
32 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
41 /* Commands sent to File daemon */
42 static char backupcmd[] = "backup\n";
43 static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
45 /* Responses received from File daemon */
46 static char OKbackup[] = "2000 OK backup\n";
47 static char OKstore[] = "2000 OK storage\n";
48 static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u "
49 "ReadBytes=%" lld " JobBytes=%" lld " Errors=%u\n";
52 /* Forward referenced functions */
53 static void backup_cleanup(JCR *jcr, int TermCode, char *since, FILESET_DBR *fsr);
55 /* External functions */
58 * Do a backup of the specified FileSet
60 * Returns: 0 on failure
63 int do_backup(JCR *jcr)
65 char since[MAXSTRING];
73 if (!get_or_create_client_record(jcr)) {
74 Jmsg(jcr, M_ERROR, 0, _("Could not get/create Client record. ERR=%s\n"),
75 db_strerror(jcr->db));
80 * Get or Create FileSet record
82 memset(&fsr, 0, sizeof(fsr));
83 bstrncpy(fsr.FileSet, jcr->fileset->hdr.name, sizeof(fsr.FileSet));
84 if (jcr->fileset->have_MD5) {
85 struct MD5Context md5c;
86 unsigned char signature[16];
87 memcpy(&md5c, &jcr->fileset->md5c, sizeof(md5c));
88 MD5Final(signature, &md5c);
89 bin_to_base64(fsr.MD5, (char *)signature, 16); /* encode 16 bytes */
90 bstrncpy(jcr->fileset->MD5, fsr.MD5, sizeof(jcr->fileset->MD5));
92 Jmsg(jcr, M_WARNING, 0, _("FileSet MD5 signature not found.\n"));
94 if (!db_create_fileset_record(jcr, jcr->db, &fsr)) {
95 Jmsg(jcr, M_ERROR, 0, _("Could not create FileSet \"%s\" record. ERR=%s\n"),
96 fsr.FileSet, db_strerror(jcr->db));
99 jcr->jr.FileSetId = fsr.FileSetId;
101 Jmsg(jcr, M_INFO, 0, _("Created new FileSet record \"%s\" %s\n"),
102 fsr.FileSet, fsr.cCreateTime);
104 Dmsg2(119, "Created FileSet %s record %u\n", jcr->fileset->hdr.name,
107 get_level_since_time(jcr, since, sizeof(since));
109 jcr->jr.JobId = jcr->JobId;
110 jcr->jr.StartTime = jcr->start_time;
111 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
112 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
116 jcr->fname = (char *) get_pool_memory(PM_FNAME);
118 /* Print Job Start message */
119 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %u, Job=%s\n"),
120 jcr->JobId, jcr->Job);
123 * Get the Pool record
125 memset(&pr, 0, sizeof(pr));
126 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
127 while (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
128 /* Try to create the pool */
129 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
130 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
131 db_strerror(jcr->db));
134 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
137 jcr->PoolId = pr.PoolId; /****FIXME**** this can go away */
138 jcr->jr.PoolId = pr.PoolId;
141 * Open a message channel connection with the Storage
142 * daemon. This is to let him know that our client
143 * will be contacting him for a backup session.
146 Dmsg0(110, "Open connection with storage daemon\n");
147 set_jcr_job_status(jcr, JS_WaitSD);
149 * Start conversation with Storage daemon
151 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
155 * Now start a job with the Storage daemon
157 if (!start_storage_daemon_job(jcr)) {
161 * Now start a Storage daemon message thread
163 if (!start_storage_daemon_message_thread(jcr)) {
166 Dmsg0(150, "Storage daemon connection OK\n");
168 set_jcr_job_status(jcr, JS_WaitFD);
169 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
173 set_jcr_job_status(jcr, JS_Running);
174 fd = jcr->file_bsock;
176 if (!send_include_list(jcr)) {
180 if (!send_exclude_list(jcr)) {
185 * send Storage daemon address to the File daemon
187 if (jcr->store->SDDport == 0) {
188 jcr->store->SDDport = jcr->store->SDport;
190 bnet_fsend(fd, storaddr, jcr->store->address, jcr->store->SDDport,
191 jcr->store->enable_ssl);
192 if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) {
197 if (!send_level_command(jcr)) {
201 if (!send_run_before_and_after_commands(jcr)) {
205 /* Send backup command */
206 bnet_fsend(fd, backupcmd);
207 if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
211 /* Pickup Job termination data */
212 stat = wait_for_job_termination(jcr);
213 backup_cleanup(jcr, stat, since, &fsr);
217 backup_cleanup(jcr, JS_ErrorTerminated, since, &fsr);
222 * Here we wait for the File daemon to signal termination,
223 * then we wait for the Storage daemon. When both
224 * are done, we return the job status.
225 * Also used by restore.c
227 int wait_for_job_termination(JCR *jcr)
230 BSOCK *fd = jcr->file_bsock;
232 uint32_t JobFiles, Errors;
233 uint64_t ReadBytes, JobBytes;
235 set_jcr_job_status(jcr, JS_Running);
236 /* Wait for Client to terminate */
237 while ((n = bget_dirmsg(fd)) >= 0) {
238 if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles,
239 &ReadBytes, &JobBytes, &Errors) == 5) {
241 set_jcr_job_status(jcr, jcr->FDJobStatus);
242 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
244 Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"),
247 if (job_canceled(jcr)) {
251 if (is_bnet_error(fd)) {
252 Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"),
253 job_type_to_str(jcr->JobType), bnet_strerror(fd));
255 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
257 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
258 wait_for_storage_daemon_termination(jcr);
260 /* Return values from FD */
262 jcr->JobFiles = JobFiles;
263 jcr->Errors = Errors;
264 jcr->ReadBytes = ReadBytes;
265 jcr->JobBytes = JobBytes;
268 // Dmsg4(000, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus,
269 // jcr->JobStatus, jcr->SDJobStatus);
271 /* Return the first error status we find Dir, FD, or SD */
272 if (!fd_ok || is_bnet_error(fd)) {
273 jcr->FDJobStatus = JS_ErrorTerminated;
275 if (jcr->JobStatus != JS_Terminated) {
276 return jcr->JobStatus;
278 if (jcr->FDJobStatus != JS_Terminated) {
279 return jcr->FDJobStatus;
281 return jcr->SDJobStatus;
285 * Release resources allocated during backup.
287 static void backup_cleanup(JCR *jcr, int TermCode, char *since, FILESET_DBR *fsr)
289 char sdt[50], edt[50];
290 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50];
291 char term_code[100], fd_term_msg[100], sd_term_msg[100];
295 double kbps, compression;
298 Dmsg0(100, "Enter backup_cleanup()\n");
299 memset(&mr, 0, sizeof(mr));
300 set_jcr_job_status(jcr, TermCode);
302 update_job_end_record(jcr); /* update database */
304 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
305 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
306 db_strerror(jcr->db));
307 set_jcr_job_status(jcr, JS_ErrorTerminated);
310 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
311 if (!db_get_media_record(jcr, jcr->db, &mr)) {
312 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
313 mr.VolumeName, db_strerror(jcr->db));
314 set_jcr_job_status(jcr, JS_ErrorTerminated);
317 /* Now update the bootstrap file if any */
318 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes &&
319 jcr->job->WriteBootstrap) {
323 char *fname = jcr->job->WriteBootstrap;
324 VOL_PARAMS *VolParams = NULL;
330 bpipe = open_bpipe(fname, 0, "w");
331 fd = bpipe ? bpipe->wfd : NULL;
333 /* ***FIXME*** handle BASE */
334 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
337 VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
340 Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters. ERR=%s\n"),
341 db_strerror(jcr->db));
343 for (int i=0; i < VolCount; i++) {
344 /* Write the record */
345 fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName);
346 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
347 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
348 fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile,
349 VolParams[i].EndFile);
350 fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock,
351 VolParams[i].EndBlock);
352 fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex,
353 VolParams[i].LastIndex);
364 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
365 "%s: ERR=%s\n"), fname, strerror(errno));
366 set_jcr_job_status(jcr, JS_ErrorTerminated);
370 msg_type = M_INFO; /* by default INFO message */
371 switch (jcr->JobStatus) {
373 term_msg = _("Backup OK");
376 case JS_ErrorTerminated:
377 term_msg = _("*** Backup Error ***");
378 msg_type = M_ERROR; /* Generate error message */
379 if (jcr->store_bsock) {
380 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
381 pthread_cancel(jcr->SD_msg_chan);
385 term_msg = _("Backup Canceled");
386 if (jcr->store_bsock) {
387 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
388 pthread_cancel(jcr->SD_msg_chan);
392 term_msg = term_code;
393 sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus);
396 bstrftime(sdt, sizeof(sdt), jcr->jr.StartTime);
397 bstrftime(edt, sizeof(edt), jcr->jr.EndTime);
398 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
402 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
404 if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
406 * Note, if the job has erred, most likely it did not write any
407 * tape, so suppress this "error" message since in that case
408 * it is normal. Or look at it the other way, only for a
409 * normal exit should we complain about this error.
411 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
412 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
414 jcr->VolumeName[0] = 0; /* none */
417 if (jcr->ReadBytes == 0) {
418 bstrncpy(compress, "None", sizeof(compress));
420 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
421 if (compression < 0.5) {
422 bstrncpy(compress, "None", sizeof(compress));
424 bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression);
427 jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg));
428 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
430 Jmsg(jcr, msg_type, 0, _("Bacula " VERSION " (" LSMDATE "): %s\n\
433 Backup Level: %s%s\n\
435 FileSet: \"%s\" %s\n\
438 FD Files Written: %s\n\
439 SD Files Written: %s\n\
440 FD Bytes Written: %s\n\
441 SD Bytes Written: %s\n\
443 Software Compression: %s\n\
444 Volume name(s): %s\n\
445 Volume Session Id: %d\n\
446 Volume Session Time: %d\n\
447 Last Volume Bytes: %s\n\
448 Non-fatal FD errors: %d\n\
450 FD termination status: %s\n\
451 SD termination status: %s\n\
452 Termination: %s\n\n"),
456 level_to_str(jcr->JobLevel), since,
457 jcr->client->hdr.name,
458 jcr->fileset->hdr.name, fsr->cCreateTime,
461 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
462 edit_uint64_with_commas(jcr->SDJobFiles, ec4),
463 edit_uint64_with_commas(jcr->jr.JobBytes, ec2),
464 edit_uint64_with_commas(jcr->SDJobBytes, ec5),
470 edit_uint64_with_commas(mr.VolBytes, ec3),
477 Dmsg0(100, "Leave backup_cleanup()\n");