3 * Bacula Director -- backup.c -- responsible for doing backup jobs
5 * Kern Sibbald, March MM
7 * This routine is called as a thread. It may not yet be totally
10 * Basic tasks done here:
11 * Open DB and create records for this job.
12 * Open Message Channel with Storage daemon to tell him a job will be starting.
13 * Open connection with File daemon and pass him commands
15 * When the File daemon finishes the job, update the DB.
21 Copyright (C) 2000, 2001, 2002 Kern Sibbald and John Walker
23 This program is free software; you can redistribute it and/or
24 modify it under the terms of the GNU General Public License as
25 published by the Free Software Foundation; either version 2 of
26 the License, or (at your option) any later version.
28 This program is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31 General Public License for more details.
33 You should have received a copy of the GNU General Public
34 License along with this program; if not, write to the Free
35 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
44 /* Commands sent to File daemon */
45 static char backupcmd[] = "backup\n";
46 static char storaddr[] = "storage address=%s port=%d\n";
47 static char levelcmd[] = "level = %s%s\n";
49 /* Responses received from File daemon */
50 static char OKbackup[] = "2000 OK backup\n";
51 static char OKstore[] = "2000 OK storage\n";
52 static char OKlevel[] = "2000 OK level\n";
53 static char EndBackup[] = "2801 End Backup Job TermCode=%d JobFiles=%u ReadBytes=%" lld " JobBytes=%" lld "\n";
56 /* Forward referenced functions */
57 static void backup_cleanup(JCR *jcr, int TermCode, char *since);
58 static int wait_for_job_termination(JCR *jcr);
60 /* External functions */
63 * Do a backup of the specified FileSet
65 * Returns: 0 on failure
68 int do_backup(JCR *jcr)
70 char since[MAXSTRING];
78 if (!get_or_create_client_record(jcr)) {
79 Jmsg(jcr, M_ERROR, 0, _("Could not get/create Client record. ERR=%s\n"),
80 db_strerror(jcr->db));
85 * Get or Create FileSet record
87 memset(&fsr, 0, sizeof(fsr));
88 strcpy(fsr.FileSet, jcr->fileset->hdr.name);
89 if (jcr->fileset->have_MD5) {
90 struct MD5Context md5c;
91 unsigned char signature[16];
92 memcpy(&md5c, &jcr->fileset->md5c, sizeof(md5c));
93 MD5Final(signature, &md5c);
94 bin_to_base64(fsr.MD5, (char *)signature, 16); /* encode 16 bytes */
95 strcpy(jcr->fileset->MD5, fsr.MD5);
97 Jmsg(jcr, M_WARNING, 0, _("FileSet MD5 signature not found.\n"));
99 if (!db_create_fileset_record(jcr->db, &fsr)) {
100 Jmsg(jcr, M_ERROR, 0, _("Could not create FileSet record. ERR=%s\n"),
101 db_strerror(jcr->db));
104 jcr->jr.FileSetId = fsr.FileSetId;
105 Dmsg2(119, "Created FileSet %s record %d\n", jcr->fileset->hdr.name,
109 * FULL backup job to get the time/date for a
110 * differential or incremental save.
112 jcr->stime = get_pool_memory(PM_MESSAGE);
115 switch (jcr->JobLevel) {
118 /* Look up start time of last job */
120 if (!db_find_job_start_time(jcr->db, &jcr->jr, &jcr->stime)) {
121 Jmsg(jcr, M_INFO, 0, _("Last FULL backup time not found. Doing FULL backup.\n"));
122 jcr->JobLevel = jcr->jr.Level = L_FULL;
124 strcpy(since, ", since=");
125 bstrncat(since, jcr->stime, sizeof(since));
127 Dmsg1(115, "Last start time = %s\n", jcr->stime);
131 jcr->jr.JobId = jcr->JobId;
132 jcr->jr.StartTime = jcr->start_time;
133 if (!db_update_job_start_record(jcr->db, &jcr->jr)) {
134 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
138 jcr->fname = (char *) get_pool_memory(PM_FNAME);
140 /* Print Job Start message */
141 Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %d, Job=%s\n"),
142 jcr->JobId, jcr->Job);
145 * Get the Pool record
147 memset(&pr, 0, sizeof(pr));
148 strcpy(pr.Name, jcr->pool->hdr.name);
149 while (!db_get_pool_record(jcr->db, &pr)) { /* get by Name */
150 /* Try to create the pool */
151 if (create_pool(jcr->db, jcr->pool) < 0) {
152 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
153 db_strerror(jcr->db));
156 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
159 jcr->PoolId = pr.PoolId; /****FIXME**** this can go away */
160 jcr->jr.PoolId = pr.PoolId;
163 * Open a message channel connection with the Storage
164 * daemon. This is to let him know that our client
165 * will be contacting him for a backup session.
168 Dmsg0(110, "Open connection with storage daemon\n");
169 jcr->JobStatus = JS_Blocked;
171 * Start conversation with Storage daemon
173 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
177 * Now start a job with the Storage daemon
179 if (!start_storage_daemon_job(jcr)) {
183 * Now start a Storage daemon message thread
185 if (!start_storage_daemon_message_thread(jcr)) {
188 Dmsg0(150, "Storage daemon connection OK\n");
190 jcr->JobStatus = JS_Blocked;
191 if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) {
195 jcr->JobStatus = JS_Running;
196 fd = jcr->file_bsock;
198 if (!send_include_list(jcr)) {
202 if (!send_exclude_list(jcr)) {
207 * send Storage daemon address to the File daemon
209 if (jcr->store->SDDport == 0) {
210 jcr->store->SDDport = jcr->store->SDport;
212 bnet_fsend(fd, storaddr, jcr->store->address, jcr->store->SDDport);
213 if (!response(fd, OKstore, "Storage")) {
218 * Send Level command to File daemon
220 switch (jcr->JobLevel) {
222 bnet_fsend(fd, levelcmd, "full", " ");
226 bnet_fsend(fd, levelcmd, "since ", jcr->stime);
227 free_pool_memory(jcr->stime);
232 Jmsg2(jcr, M_FATAL, 0, _("Unimplemented backup level %d %c\n"),
233 jcr->JobLevel, jcr->JobLevel);
236 Dmsg1(120, ">filed: %s", fd->msg);
237 if (!response(fd, OKlevel, "Level")) {
241 /* Send backup command */
242 bnet_fsend(fd, backupcmd);
243 if (!response(fd, OKbackup, "backup")) {
247 /* Pickup Job termination data */
248 stat = wait_for_job_termination(jcr);
249 backup_cleanup(jcr, stat, since);
254 free_pool_memory(jcr->stime);
257 backup_cleanup(jcr, JS_ErrorTerminated, since);
263 * Here we wait for the File daemon to signal termination,
264 * then we wait for the Storage daemon. When both
265 * are done, we return the job status.
267 static int wait_for_job_termination(JCR *jcr)
270 BSOCK *fd = jcr->file_bsock;
273 jcr->JobStatus = JS_WaitFD;
274 /* Wait for Client to terminate */
275 while ((n = bget_msg(fd, 0)) >= 0 && !job_cancelled(jcr)) {
276 if (sscanf(fd->msg, EndBackup, &jcr->JobStatus, &jcr->JobFiles,
277 &jcr->ReadBytes, &jcr->JobBytes) == 4) {
279 Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus);
282 if (is_bnet_error(fd)) {
283 Jmsg(jcr, M_FATAL, 0, _("<filed: network error during BACKUP command. ERR=%s\n"),
286 bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */
288 wait_for_storage_daemon_termination(jcr);
290 /* Return the first error status we find FD or SD */
291 if (fd_ok && jcr->JobStatus != JS_Terminated) {
292 return jcr->JobStatus;
294 if (!fd_ok || is_bnet_error(fd)) {
295 return JS_ErrorTerminated;
297 return jcr->SDJobStatus;
301 * Release resources allocated during backup.
303 static void backup_cleanup(JCR *jcr, int TermCode, char *since)
305 char sdt[50], edt[50];
306 char ec1[30], ec2[30], ec3[30], compress[50];
311 double kbps, compression;
314 Dmsg0(100, "Enter backup_cleanup()\n");
315 memset(&mr, 0, sizeof(mr));
316 jcr->JobStatus = TermCode;
318 update_job_end_record(jcr); /* update database */
320 if (!db_get_job_record(jcr->db, &jcr->jr)) {
321 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
322 db_strerror(jcr->db));
325 strcpy(mr.VolumeName, jcr->VolumeName);
326 if (!db_get_media_record(jcr->db, &mr)) {
327 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for stats: %s"),
328 db_strerror(jcr->db));
331 msg_type = M_INFO; /* by default INFO message */
334 term_msg = _("Backup OK");
337 case JS_ErrorTerminated:
338 term_msg = _("*** Backup Error ***");
339 msg_type = M_ERROR; /* Generate error message */
340 if (jcr->store_bsock) {
341 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
342 pthread_cancel(jcr->SD_msg_chan);
346 term_msg = _("Backup Cancelled");
347 if (jcr->store_bsock) {
348 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
349 pthread_cancel(jcr->SD_msg_chan);
353 term_msg = term_code;
354 sprintf(term_code, _("Inappropriate term code: %c\n"), TermCode);
357 bstrftime(sdt, sizeof(sdt), jcr->jr.StartTime);
358 bstrftime(edt, sizeof(edt), jcr->jr.EndTime);
359 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
363 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
365 if (!db_get_job_volume_names(jcr->db, jcr->jr.JobId, &jcr->VolumeName)) {
367 * Note, if the job has erred, most likely it did not write any
368 * tape, so suppress this "error" message since in that case
369 * it is normal. Or look at it the other way, only for a
370 * normal exit should we complain about this error.
372 if (TermCode == JS_Terminated) {
373 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db));
375 jcr->VolumeName[0] = 0; /* none */
378 if (jcr->ReadBytes == 0) {
379 strcpy(compress, "None");
381 compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes);
382 if (compression < 0.5) {
383 strcpy(compress, "None");
385 sprintf(compress, "%.1f %%", (float)compression);
389 Jmsg(jcr, msg_type, 0, _("Bacula " VERSION " (" LSMDATE "): %s\n\
393 Backup Level: %s%s\n\
400 Software Compression: %s\n\
401 Volume names(s): %s\n\
402 Volume Session Id: %d\n\
403 Volume Session Time: %d\n\
404 Last Volume Bytes: %s\n\
405 Termination: %s\n\n"),
409 jcr->fileset->hdr.name,
410 level_to_str(jcr->JobLevel), since,
411 jcr->client->hdr.name,
414 edit_uint64_with_commas(jcr->jr.JobFiles, ec1),
415 edit_uint64_with_commas(jcr->jr.JobBytes, ec2),
421 edit_uint64_with_commas(mr.VolBytes, ec3),
425 /* Now update the bootstrap file if any */
426 if (jcr->job->WriteBootstrap) {
429 char *fname = jcr->job->WriteBootstrap;
434 fd = popen(fname, "w");
436 fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+");
439 Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n"
440 "%s: ERR=%s\n"), fname, strerror(errno));
443 fprintf(fd, "Volume=\"%s\"\n", jcr->VolumeName);
444 fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId);
445 fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime);
452 Dmsg0(100, "Leave backup_cleanup()\n");