X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Fdird%2Fbackup.c;h=9f7d07c7a90dc9bd8842c92638546229742dc19c;hb=1c31d780ff8befc9ef13c681d991bf235cb5f735;hp=b101b46d116e2eb6a534e38f536ecd015094bae3;hpb=3a5e17c3fb18038b3590abb48e4d47c3c1031e34;p=bacula%2Fbacula diff --git a/bacula/src/dird/backup.c b/bacula/src/dird/backup.c index b101b46d11..9f7d07c7a9 100644 --- a/bacula/src/dird/backup.c +++ b/bacula/src/dird/backup.c @@ -4,9 +4,6 @@ * * Kern Sibbald, March MM * - * This routine is called as a thread. It may not yet be totally - * thread reentrant!!! - * * Basic tasks done here: * Open DB and create records for this job. * Open Message Channel with Storage daemon to tell him a job will be starting. @@ -18,7 +15,7 @@ */ /* - Copyright (C) 2000, 2001, 2002 Kern Sibbald and John Walker + Copyright (C) 2000-2004 Kern Sibbald and John Walker This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -43,17 +40,17 @@ /* Commands sent to File daemon */ static char backupcmd[] = "backup\n"; -static char storaddr[] = "storage address=%s port=%d\n"; -static char levelcmd[] = "level = %s%s\n"; +static char storaddr[] = "storage address=%s port=%d ssl=%d\n"; /* Responses received from File daemon */ -static char OKbackup[] = "2000 OK backup\n"; -static char OKstore[] = "2000 OK storage\n"; -static char OKlevel[] = "2000 OK level\n"; +static char OKbackup[] = "2000 OK backup\n"; +static char OKstore[] = "2000 OK storage\n"; +static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u " + "ReadBytes=%" lld " JobBytes=%" lld " Errors=%u\n"; + /* Forward referenced functions */ -static void backup_cleanup(JCR *jcr, int TermCode, char *since); -static int wait_for_job_termination(JCR *jcr); +static void backup_cleanup(JCR *jcr, int TermCode, char *since, FILESET_DBR *fsr); /* External functions */ @@ -70,82 +67,48 @@ int do_backup(JCR *jcr) BSOCK *fd; POOL_DBR pr; FILESET_DBR fsr; + STORE *store; since[0] = 0; if (!get_or_create_client_record(jcr)) { - Jmsg(jcr, M_ERROR, 0, _("Could not get/create Client record. ERR=%s\n"), - db_strerror(jcr->db)); goto bail_out; } - /* - * Get or Create FileSet record - */ - memset(&fsr, 0, sizeof(fsr)); - strcpy(fsr.FileSet, jcr->fileset->hdr.name); - if (jcr->fileset->have_MD5) { - struct MD5Context md5c; - unsigned char signature[16]; - memcpy(&md5c, &jcr->fileset->md5c, sizeof(md5c)); - MD5Final(signature, &md5c); - bin_to_base64(fsr.MD5, (char *)signature, 16); /* encode 16 bytes */ - } else { - Jmsg(jcr, M_WARNING, 0, _("FileSet MD5 signature not found.\n")); - } - if (!db_create_fileset_record(jcr->db, &fsr)) { - Jmsg(jcr, M_ERROR, 0, _("Could not create FileSet record. ERR=%s\n"), - db_strerror(jcr->db)); - goto bail_out; - } - jcr->jr.FileSetId = fsr.FileSetId; - Dmsg2(119, "Created FileSet %s record %d\n", jcr->fileset->hdr.name, - jcr->jr.FileSetId); - - /* Look up the last - * FULL backup job to get the time/date for a - * differential or incremental save. - */ - jcr->stime = get_pool_memory(PM_MESSAGE); - jcr->stime[0] = 0; - since[0] = 0; - switch (jcr->JobLevel) { - case L_DIFFERENTIAL: - case L_INCREMENTAL: - /* Look up start time of last job */ - jcr->jr.JobId = 0; - if (!db_find_job_start_time(jcr->db, &jcr->jr, jcr->stime)) { - Jmsg(jcr, M_INFO, 0, _("Last FULL backup time not found. Doing FULL backup.\n")); - jcr->JobLevel = jcr->jr.Level = L_FULL; - } else { - strcpy(since, ", since="); - strcat(since, jcr->stime); - } - Dmsg1(115, "Last start time = %s\n", jcr->stime); - break; - } - - jcr->jr.JobId = jcr->JobId; - jcr->jr.StartTime = jcr->start_time; - if (!db_update_job_start_record(jcr->db, &jcr->jr)) { - Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db)); + if (!get_or_create_fileset_record(jcr, &fsr)) { goto bail_out; } - jcr->fname = (char *) get_pool_memory(PM_FNAME); + get_level_since_time(jcr, since, sizeof(since)); - /* Print Job Start message */ - Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %d, Job=%s\n"), - jcr->JobId, jcr->Job); + jcr->fname = get_pool_memory(PM_FNAME); /* - * Get the Pool record + * Get the Pool record -- first apply any level defined pools */ + switch (jcr->JobLevel) { + case L_FULL: + if (jcr->full_pool) { + jcr->pool = jcr->full_pool; + } + break; + case L_INCREMENTAL: + if (jcr->inc_pool) { + jcr->pool = jcr->inc_pool; + } + break; + case L_DIFFERENTIAL: + if (jcr->dif_pool) { + jcr->pool = jcr->dif_pool; + } + break; + } memset(&pr, 0, sizeof(pr)); - strcpy(pr.Name, jcr->pool->hdr.name); - while (!db_get_pool_record(jcr->db, &pr)) { /* get by Name */ + bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name)); + + while (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */ /* Try to create the pool */ - if (create_pool(jcr->db, jcr->pool) < 0) { + if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) { Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name, db_strerror(jcr->db)); goto bail_out; @@ -156,6 +119,19 @@ int do_backup(JCR *jcr) jcr->PoolId = pr.PoolId; /****FIXME**** this can go away */ jcr->jr.PoolId = pr.PoolId; + + /* Print Job Start message */ + Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %u, Job=%s\n"), + jcr->JobId, jcr->Job); + + set_jcr_job_status(jcr, JS_Running); + Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel); + if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) { + Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db)); + goto bail_out; + } + + /* * Open a message channel connection with the Storage * daemon. This is to let him know that our client @@ -163,7 +139,7 @@ int do_backup(JCR *jcr) * */ Dmsg0(110, "Open connection with storage daemon\n"); - jcr->JobStatus = JS_Blocked; + set_jcr_job_status(jcr, JS_WaitSD); /* * Start conversation with Storage daemon */ @@ -184,12 +160,12 @@ int do_backup(JCR *jcr) } Dmsg0(150, "Storage daemon connection OK\n"); - jcr->JobStatus = JS_Blocked; + set_jcr_job_status(jcr, JS_WaitFD); if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) { goto bail_out; } - jcr->JobStatus = JS_Running; + set_jcr_job_status(jcr, JS_Running); fd = jcr->file_bsock; if (!send_include_list(jcr)) { @@ -200,87 +176,106 @@ int do_backup(JCR *jcr) goto bail_out; } + if (!send_level_command(jcr)) { + goto bail_out; + } + /* * send Storage daemon address to the File daemon */ - if (jcr->store->SDDport == 0) { - jcr->store->SDDport = jcr->store->SDport; + store = (STORE *)jcr->storage[0]->first(); + if (store->SDDport == 0) { + store->SDDport = store->SDport; } - bnet_fsend(fd, storaddr, jcr->store->address, jcr->store->SDDport); - if (!response(fd, OKstore, "Storage")) { + bnet_fsend(fd, storaddr, store->address, store->SDDport, + store->enable_ssl); + if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) { goto bail_out; } - /* - * Send Level command to File daemon - */ - switch (jcr->JobLevel) { - case L_FULL: - bnet_fsend(fd, levelcmd, "full", " "); - break; - case L_DIFFERENTIAL: - case L_INCREMENTAL: - bnet_fsend(fd, levelcmd, "since ", jcr->stime); - free_pool_memory(jcr->stime); - jcr->stime = NULL; - break; - case L_SINCE: - default: - Jmsg2(jcr, M_FATAL, 0, _("Unimplemented backup level %d %c\n"), - jcr->JobLevel, jcr->JobLevel); - goto bail_out; - } - Dmsg1(120, ">filed: %s", fd->msg); - if (!response(fd, OKlevel, "Level")) { + + if (!send_run_before_and_after_commands(jcr)) { goto bail_out; } /* Send backup command */ bnet_fsend(fd, backupcmd); - if (!response(fd, OKbackup, "backup")) { + if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) { goto bail_out; } /* Pickup Job termination data */ stat = wait_for_job_termination(jcr); - backup_cleanup(jcr, stat, since); + backup_cleanup(jcr, stat, since, &fsr); return 1; bail_out: - if (jcr->stime) { - free_pool_memory(jcr->stime); - jcr->stime = NULL; - } - backup_cleanup(jcr, JS_ErrorTerminated, since); + backup_cleanup(jcr, JS_ErrorTerminated, since, &fsr); return 0; - } /* - * NOTE! This is no longer really needed as the Storage - * daemon now passes this information directly - * back to us. + * Here we wait for the File daemon to signal termination, + * then we wait for the Storage daemon. When both + * are done, we return the job status. + * Also used by restore.c */ -static int wait_for_job_termination(JCR *jcr) +int wait_for_job_termination(JCR *jcr) { int32_t n = 0; BSOCK *fd = jcr->file_bsock; + bool fd_ok = false; + uint32_t JobFiles, Errors; + uint64_t ReadBytes, JobBytes; - jcr->JobStatus = JS_WaitFD; + set_jcr_job_status(jcr, JS_Running); /* Wait for Client to terminate */ - while ((n = bget_msg(fd, 0)) > 0 && !job_cancelled(jcr)) { - /* get and discard Client output */ + while ((n = bget_dirmsg(fd)) >= 0) { + if (!fd_ok && sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles, + &ReadBytes, &JobBytes, &Errors) == 5) { + fd_ok = true; + set_jcr_job_status(jcr, jcr->FDJobStatus); + Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus); + } else { + Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"), + fd->msg); + } + if (job_canceled(jcr)) { + break; + } } - bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */ - if (n < 0) { - Jmsg(jcr, M_FATAL, 0, _("JobType), bnet_strerror(fd)); } + bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */ + /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */ wait_for_storage_daemon_termination(jcr); - if (n < 0) { - return JS_ErrorTerminated; + + /* Return values from FD */ + if (fd_ok) { + jcr->JobFiles = JobFiles; + jcr->Errors = Errors; + jcr->ReadBytes = ReadBytes; + jcr->JobBytes = JobBytes; + } else { + Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n")); + } + +// Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus, +// jcr->JobStatus, jcr->SDJobStatus); + + /* Return the first error status we find Dir, FD, or SD */ + if (!fd_ok || is_bnet_error(fd)) { + jcr->FDJobStatus = JS_ErrorTerminated; + } + if (jcr->JobStatus != JS_Terminated) { + return jcr->JobStatus; + } + if (jcr->FDJobStatus != JS_Terminated) { + return jcr->FDJobStatus; } return jcr->SDJobStatus; } @@ -288,37 +283,103 @@ static int wait_for_job_termination(JCR *jcr) /* * Release resources allocated during backup. */ -static void backup_cleanup(JCR *jcr, int TermCode, char *since) +static void backup_cleanup(JCR *jcr, int TermCode, char *since, FILESET_DBR *fsr) { char sdt[50], edt[50]; - char ec1[30], ec2[30], ec3[30]; - char term_code[100]; - char *term_msg; + char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50]; + char term_code[100], fd_term_msg[100], sd_term_msg[100]; + const char *term_msg; int msg_type; MEDIA_DBR mr; - double kbps; + double kbps, compression; + utime_t RunTime; - Dmsg0(100, "Enter backup_cleanup()\n"); + Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode); + dequeue_messages(jcr); /* display any queued messages */ memset(&mr, 0, sizeof(mr)); - jcr->JobStatus = TermCode; + set_jcr_job_status(jcr, TermCode); update_job_end_record(jcr); /* update database */ - if (!db_get_job_record(jcr->db, &jcr->jr)) { + if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) { Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"), db_strerror(jcr->db)); + set_jcr_job_status(jcr, JS_ErrorTerminated); } - strcpy(mr.VolumeName, jcr->VolumeName); - if (!db_get_media_record(jcr->db, &mr)) { - Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for stats: %s"), - db_strerror(jcr->db)); + bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName)); + if (!db_get_media_record(jcr, jcr->db, &mr)) { + Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"), + mr.VolumeName, db_strerror(jcr->db)); + set_jcr_job_status(jcr, JS_ErrorTerminated); + } + + /* Now update the bootstrap file if any */ + if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes && + jcr->job->WriteBootstrap) { + FILE *fd; + BPIPE *bpipe = NULL; + int got_pipe = 0; + char *fname = jcr->job->WriteBootstrap; + VOL_PARAMS *VolParams = NULL; + int VolCount; + + if (*fname == '|') { + fname++; + got_pipe = 1; + bpipe = open_bpipe(fname, 0, "w"); + fd = bpipe ? bpipe->wfd : NULL; + } else { + /* ***FIXME*** handle BASE */ + fd = fopen(fname, jcr->JobLevel==L_FULL?"w+":"a+"); + } + if (fd) { + VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId, + &VolParams); + if (VolCount == 0) { + Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to " + "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db)); + if (jcr->SDJobFiles != 0) { + set_jcr_job_status(jcr, JS_ErrorTerminated); + } + + } + for (int i=0; i < VolCount; i++) { + /* Write the record */ + fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName); + fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId); + fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime); + fprintf(fd, "VolFile=%u-%u\n", VolParams[i].StartFile, + VolParams[i].EndFile); + fprintf(fd, "VolBlock=%u-%u\n", VolParams[i].StartBlock, + VolParams[i].EndBlock); + fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex, + VolParams[i].LastIndex); + } + if (VolParams) { + free(VolParams); + } + if (got_pipe) { + close_bpipe(bpipe); + } else { + fclose(fd); + } + } else { + berrno be; + Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n" + "%s: ERR=%s\n"), fname, be.strerror()); + set_jcr_job_status(jcr, JS_ErrorTerminated); + } } msg_type = M_INFO; /* by default INFO message */ - switch (TermCode) { + switch (jcr->JobStatus) { case JS_Terminated: - term_msg = _("Backup OK"); + if (jcr->Errors || jcr->SDErrors) { + term_msg = _("Backup OK -- with warnings"); + } else { + term_msg = _("Backup OK"); + } break; case JS_FatalError: case JS_ErrorTerminated: @@ -326,59 +387,108 @@ static void backup_cleanup(JCR *jcr, int TermCode, char *since) msg_type = M_ERROR; /* Generate error message */ if (jcr->store_bsock) { bnet_sig(jcr->store_bsock, BNET_TERMINATE); - pthread_cancel(jcr->SD_msg_chan); + if (jcr->SD_msg_chan) { + pthread_cancel(jcr->SD_msg_chan); + } } break; - case JS_Cancelled: - term_msg = _("Backup Cancelled"); + case JS_Canceled: + term_msg = _("Backup Canceled"); if (jcr->store_bsock) { bnet_sig(jcr->store_bsock, BNET_TERMINATE); - pthread_cancel(jcr->SD_msg_chan); + if (jcr->SD_msg_chan) { + pthread_cancel(jcr->SD_msg_chan); + } } break; default: term_msg = term_code; - sprintf(term_code, _("Inappropriate term code: %c\n"), TermCode); + sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus); break; } bstrftime(sdt, sizeof(sdt), jcr->jr.StartTime); bstrftime(edt, sizeof(edt), jcr->jr.EndTime); - kbps = (double)jcr->jr.JobBytes / (1000 * (jcr->jr.EndTime - jcr->jr.StartTime)); - if (!db_get_job_volume_names(jcr->db, jcr->jr.JobId, &jcr->VolumeName)) { + RunTime = jcr->jr.EndTime - jcr->jr.StartTime; + if (RunTime <= 0) { + kbps = 0; + } else { + kbps = (double)jcr->jr.JobBytes / (1000 * RunTime); + } + if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) { + /* + * Note, if the job has erred, most likely it did not write any + * tape, so suppress this "error" message since in that case + * it is normal. Or look at it the other way, only for a + * normal exit should we complain about this error. + */ + if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) { + Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(jcr->db)); + } jcr->VolumeName[0] = 0; /* none */ } - Jmsg(jcr, msg_type, 0, _("%s\n\ + if (jcr->ReadBytes == 0) { + bstrncpy(compress, "None", sizeof(compress)); + } else { + compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes); + if (compression < 0.5) { + bstrncpy(compress, "None", sizeof(compress)); + } else { + bsnprintf(compress, sizeof(compress), "%.1f %%", (float)compression); + } + } + jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg)); + jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg)); + +// bmicrosleep(15, 0); /* for debugging SIGHUP */ + + Jmsg(jcr, msg_type, 0, _("Bacula " VERSION " (" LSMDATE "): %s\n\ JobId: %d\n\ Job: %s\n\ -FileSet: %s\n\ Backup Level: %s%s\n\ Client: %s\n\ +FileSet: \"%s\" %s\n\ +Pool: \"%s\"\n\ Start time: %s\n\ End time: %s\n\ -Files Written: %s\n\ -Bytes Written: %s\n\ +FD Files Written: %s\n\ +SD Files Written: %s\n\ +FD Bytes Written: %s\n\ +SD Bytes Written: %s\n\ Rate: %.1f KB/s\n\ -Volume names(s): %s\n\ +Software Compression: %s\n\ +Volume name(s): %s\n\ Volume Session Id: %d\n\ Volume Session Time: %d\n\ -Volume Bytes: %s\n\ +Last Volume Bytes: %s\n\ +Non-fatal FD errors: %d\n\ +SD Errors: %d\n\ +FD termination status: %s\n\ +SD termination status: %s\n\ Termination: %s\n\n"), edt, jcr->jr.JobId, jcr->jr.Job, - jcr->fileset->hdr.name, level_to_str(jcr->JobLevel), since, jcr->client->hdr.name, + jcr->fileset->hdr.name, fsr->cCreateTime, + jcr->pool->hdr.name, sdt, edt, edit_uint64_with_commas(jcr->jr.JobFiles, ec1), + edit_uint64_with_commas(jcr->SDJobFiles, ec4), edit_uint64_with_commas(jcr->jr.JobBytes, ec2), + edit_uint64_with_commas(jcr->SDJobBytes, ec5), (float)kbps, + compress, jcr->VolumeName, jcr->VolSessionId, jcr->VolSessionTime, edit_uint64_with_commas(mr.VolBytes, ec3), + jcr->Errors, + jcr->SDErrors, + fd_term_msg, + sd_term_msg, term_msg); Dmsg0(100, "Leave backup_cleanup()\n");