X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Fdird%2Fbackup.c;h=10f301f011a1a49b6f54ab8a1e0baecb93a39746;hb=1e2a682ea94040438bdcbb98f121976ae73c7937;hp=eb8e9f121751002b5fcf7bb4414b431a5993c143;hpb=d1cfa762a333e159b88a3eb49d1bf228770cf9d8;p=bacula%2Fbacula diff --git a/bacula/src/dird/backup.c b/bacula/src/dird/backup.c index eb8e9f1217..10f301f011 100644 --- a/bacula/src/dird/backup.c +++ b/bacula/src/dird/backup.c @@ -1,32 +1,22 @@ /* - Bacula® - The Network Backup Solution - - Copyright (C) 2000-2009 Free Software Foundation Europe e.V. - - The main author of Bacula is Kern Sibbald, with contributions from - many others, a complete list can be found in the file AUTHORS. - This program is Free Software; you can redistribute it and/or - modify it under the terms of version two of the GNU General Public - License as published by the Free Software Foundation and included - in the file LICENSE. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - - Bacula® is a registered trademark of Kern Sibbald. - The licensor of Bacula is the Free Software Foundation Europe - (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, - Switzerland, email:ftf@fsfeurope.org. + Bacula(R) - The Network Backup Solution + + Copyright (C) 2000-2017 Kern Sibbald + + The original author of Bacula is Kern Sibbald, with contributions + from many others, a complete list can be found in the file AUTHORS. + + You may use this file and others of this release according to the + license defined in the LICENSE file, which includes the Affero General + Public License, v3.0 ("AGPLv3") and some additional permissions and + terms pursuant to its AGPLv3 Section 7. + + This notice must be preserved when any source code is + conveyed and/or propagated. + + Bacula(R) is a registered trademark of Kern Sibbald. */ /* - * * Bacula Director -- backup.c -- responsible for doing backup jobs * * Kern Sibbald, March MM @@ -37,8 +27,6 @@ * Open connection with File daemon and pass him commands * to do the backup. * When the File daemon finishes the job, update the DB. - * - * Version $Id$ */ #include "bacula.h" @@ -46,26 +34,41 @@ #include "ua.h" /* Commands sent to File daemon */ -static char backupcmd[] = "backup\n"; +static char backupcmd[] = "backup FileIndex=%ld\n"; static char storaddr[] = "storage address=%s port=%d ssl=%d\n"; /* Responses received from File daemon */ static char OKbackup[] = "2000 OK backup\n"; static char OKstore[] = "2000 OK storage\n"; +/* After 17 Aug 2013 */ +static char newEndJob[] = "2800 End Job TermCode=%d JobFiles=%u " + "ReadBytes=%llu JobBytes=%llu Errors=%u " + "VSS=%d Encrypt=%d " + "CommBytes=%lld CompressCommBytes=%lld\n"; +/* Pre 17 Aug 2013 */ static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u " - "ReadBytes=%llu JobBytes=%llu Errors=%u " + "ReadBytes=%llu JobBytes=%llu Errors=%u " "VSS=%d Encrypt=%d\n"; /* Pre 1.39.29 (04Dec06) EndJob */ static char OldEndJob[] = "2800 End Job TermCode=%d JobFiles=%u " "ReadBytes=%llu JobBytes=%llu Errors=%u\n"; -/* + +/* Commands sent to Storage daemon */ +static char clientaddr[] = "client address=%s port=%d ssl=%d\n"; + +/* Commands received from Storage daemon */ +static char OKclient[] = "3000 OK client command\n"; + +/* * Called here before the job is run to do the job * specific setup. */ bool do_backup_init(JCR *jcr) { + /* Make local copy */ + jcr->RescheduleIncompleteJobs = jcr->job->RescheduleIncompleteJobs; - if (jcr->get_JobLevel() == L_VIRTUAL_FULL) { + if (jcr->is_JobLevel(L_VIRTUAL_FULL)) { return do_vbackup_init(jcr); } free_rstorage(jcr); /* we don't read so release */ @@ -74,7 +77,7 @@ bool do_backup_init(JCR *jcr) return false; } - /* + /* * Get definitive Job level and since time */ get_level_since_time(jcr, jcr->since, sizeof(jcr->since)); @@ -137,7 +140,9 @@ static bool get_base_jobids(JCR *jcr, db_list_ctx *jobids) } /* - * Foreach files in currrent list, send "/path/fname\0LStat\0MD5" to FD + * Foreach files in currrent list, send "/path/fname\0LStat\0MD5\0Delta" to FD + * row[0]=Path, row[1]=Filename, row[2]=FileIndex + * row[3]=JobId row[4]=LStat row[5]=DeltaSeq row[6]=MD5 */ static int accurate_list_handler(void *ctx, int num_fields, char **row) { @@ -146,22 +151,22 @@ static int accurate_list_handler(void *ctx, int num_fields, char **row) if (job_canceled(jcr)) { return 1; } - - if (row[2] == 0) { /* discard when file_index == 0 */ + + if (row[2][0] == '0') { /* discard when file_index == 0 */ return 0; } /* sending with checksum */ - if (jcr->use_accurate_chksum - && num_fields == 6 - && row[5][0] /* skip checksum = '0' */ - && row[5][1]) - { - jcr->file_bsock->fsend("%s%s%c%s%c%s", - row[0], row[1], 0, row[4], 0, row[5]); + if (jcr->use_accurate_chksum + && num_fields == 7 + && row[6][0] /* skip checksum = '0' */ + && row[6][1]) + { + jcr->file_bsock->fsend("%s%s%c%s%c%s%c%s", + row[0], row[1], 0, row[4], 0, row[6], 0, row[5]); } else { - jcr->file_bsock->fsend("%s%s%c%s", - row[0], row[1], 0, row[4]); + jcr->file_bsock->fsend("%s%s%c%s%c%c%s", + row[0], row[1], 0, row[4], 0, 0, row[5]); } return 0; } @@ -182,23 +187,23 @@ static bool is_checksum_needed_by_fileset(JCR *jcr) } f = jcr->job->fileset; - + for (int i=0; i < f->num_includes; i++) { /* Parse all Include {} */ inc = f->include_items[i]; - + for (int j=0; j < inc->num_opts; j++) { /* Parse all Options {} */ fopts = inc->opts_list[j]; - + for (char *k=fopts->opts; *k ; k++) { /* Try to find one request */ switch (*k) { case 'V': /* verify */ - in_block = (jcr->get_JobType() == JT_VERIFY); /* not used now */ + in_block = (jcr->getJobType() == JT_VERIFY); /* not used now */ break; case 'J': /* Basejob keyword */ have_basejob_option = in_block = jcr->HasBase; break; case 'C': /* Accurate keyword */ - in_block = (jcr->get_JobLevel() != L_FULL); + in_block = !jcr->is_JobLevel(L_FULL); break; case ':': /* End of keyword */ in_block = false; @@ -221,7 +226,7 @@ static bool is_checksum_needed_by_fileset(JCR *jcr) if (!have_basejob_option && jcr->HasBase) { return true; } - + Dmsg0(50, "Checksum will be sent to FD\n"); return false; } @@ -229,81 +234,193 @@ static bool is_checksum_needed_by_fileset(JCR *jcr) /* * Send current file list to FD * DIR -> FD : accurate files=xxxx - * DIR -> FD : /path/to/file\0Lstat\0MD5 - * DIR -> FD : /path/to/dir/\0Lstat\0MD5 + * DIR -> FD : /path/to/file\0Lstat\0MD5\0Delta + * DIR -> FD : /path/to/dir/\0Lstat\0MD5\0Delta * ... * DIR -> FD : EOD */ bool send_accurate_current_files(JCR *jcr) { POOL_MEM buf; - bool ret=true; db_list_ctx jobids; db_list_ctx nb; + char ed1[50]; - if (!jcr->accurate || job_canceled(jcr)) { + /* In base level, no previous job is used and no restart incomplete jobs */ + if (jcr->is_canceled() || jcr->is_JobLevel(L_BASE)) { return true; } - /* In base level, no previous job is used */ - if (jcr->get_JobLevel() == L_BASE) { + if (!jcr->accurate && !jcr->rerunning) { return true; } - - if (jcr->get_JobLevel() == L_FULL) { + + if (jcr->is_JobLevel(L_FULL)) { /* On Full mode, if no previous base job, no accurate things */ - if (!get_base_jobids(jcr, &jobids)) { - goto bail_out; + if (get_base_jobids(jcr, &jobids)) { + jcr->HasBase = true; + Jmsg(jcr, M_INFO, 0, _("Using BaseJobId(s): %s\n"), jobids.list); + } else if (!jcr->rerunning) { + return true; } - jcr->HasBase = true; - Jmsg(jcr, M_INFO, 0, _("Using BaseJobId(s): %s\n"), jobids.list); + + } else if (jcr->is_JobLevel(L_VERIFY_DATA)) { + char ed1[50]; + jobids.add(edit_uint64(jcr->previous_jr.JobId, ed1)); } else { /* For Incr/Diff level, we search for older jobs */ - db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, &jobids); + db_get_accurate_jobids(jcr, jcr->db, &jcr->jr, &jobids); /* We are in Incr/Diff, but no Full to build the accurate list... */ if (jobids.count == 0) { - ret=false; Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n")); - goto bail_out; + return false; /* fail */ } } + /* For incomplete Jobs, we add our own id */ + if (jcr->rerunning) { + edit_int64(jcr->JobId, ed1); + jobids.add(ed1); + } + /* Don't send and store the checksum if fileset doesn't require it */ jcr->use_accurate_chksum = is_checksum_needed_by_fileset(jcr); if (jcr->JobId) { /* display the message only for real jobs */ - Jmsg(jcr, M_INFO, 0, _("Sending Accurate information.\n")); + Jmsg(jcr, M_INFO, 0, _("Sending Accurate information to the FD.\n")); } /* to be able to allocate the right size for htable */ Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)", jobids.list); db_sql_query(jcr->db, buf.c_str(), db_list_handler, &nb); Dmsg2(200, "jobids=%s nb=%s\n", jobids.list, nb.list); - jcr->file_bsock->fsend("accurate files=%s\n", nb.list); + jcr->file_bsock->fsend("accurate files=%s\n", nb.list); if (!db_open_batch_connexion(jcr, jcr->db)) { Jmsg0(jcr, M_FATAL, 0, "Can't get batch sql connexion"); - return false; + return false; /* Fail */ } - + if (jcr->HasBase) { jcr->nb_base_files = str_to_int64(nb.list); - db_create_base_file_list(jcr, jcr->db, jobids.list); - db_get_base_file_list(jcr, jcr->db, - accurate_list_handler, (void *)jcr); + if (!db_create_base_file_list(jcr, jcr->db, jobids.list)) { + Jmsg1(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db)); + return false; + } + if (!db_get_base_file_list(jcr, jcr->db, jcr->use_accurate_chksum, + accurate_list_handler, (void *)jcr)) { + Jmsg1(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db)); + return false; + } } else { - db_get_file_list(jcr, jcr->db_batch, jobids.list, - accurate_list_handler, (void *)jcr); - } - - /* TODO: close the batch connexion ? (can be used very soon) */ + if (!db_get_file_list(jcr, jcr->db_batch, + jobids.list, jcr->use_accurate_chksum, false /* no delta */, + accurate_list_handler, (void *)jcr)) { + Jmsg1(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db_batch)); + return false; + } + } + /* TODO: close the batch connection ? (can be used very soon) */ jcr->file_bsock->signal(BNET_EOD); + return true; +} -bail_out: - return ret; +bool send_store_addr_to_fd(JCR *jcr, STORE *store, + char *store_address, uint32_t store_port) +{ + int tls_need = BNET_TLS_NONE; + + /* TLS Requirement */ + if (store->tls_enable) { + if (store->tls_require) { + tls_need = BNET_TLS_REQUIRED; + } else { + tls_need = BNET_TLS_OK; + } + } + + /* + * Send Storage address to the FD + */ + jcr->file_bsock->fsend(storaddr, store_address, store_port, tls_need); + if (!response(jcr, jcr->file_bsock, OKstore, "Storage", DISPLAY_ERROR)) { + return false; + } + return true; +} + +bool send_client_addr_to_sd(JCR *jcr) +{ + int tls_need = BNET_TLS_NONE; + BSOCK *sd = jcr->store_bsock; + + /* TLS Requirement for the client */ + if (jcr->client->tls_enable) { + if (jcr->client->tls_require) { + tls_need = BNET_TLS_REQUIRED; + } else { + tls_need = BNET_TLS_OK; + } + } + /* + * Send Client address to the SD + */ + sd->fsend(clientaddr, jcr->client->address(), jcr->client->FDport, tls_need); + if (!response(jcr, sd, OKclient, "Client", DISPLAY_ERROR)) { + return false; + } + return true; +} + +/* + * Allow to specify the address used by the Client to + * connect to the storage daemon in the Client resource + * or in the Storage resource. + */ +char *get_storage_address(CLIENT *client, STORE *store) +{ + char *store_address; + + if (client && client->fd_storage_address) { + Dmsg0(10, "Using Client resource FD Storage Address to contact the Storage\n"); + store_address = client->fd_storage_address; + + } else if (store->fd_storage_address) { + Dmsg0(10, "Using Storage resource FD Storage Address to contact the Storage\n"); + store_address = store->fd_storage_address; + + } else { + Dmsg0(10, "Using default Storage address\n"); + store_address = store->address; + } + return store_address; +} + +bool run_storage_and_start_message_thread(JCR *jcr, BSOCK *sd) +{ + /* + * Start the job prior to starting the message thread below + * to avoid two threads from using the BSOCK structure at + * the same time. + */ + if (!sd->fsend("run")) { + return false; + } + + /* + * Now start a Storage daemon message thread. Note, + * this thread is used to provide the catalog services + * for the backup job, including inserting the attributes + * into the catalog. See catalog_update() in catreq.c + */ + if (!start_storage_daemon_message_thread(jcr)) { + return false; + } + Dmsg0(150, "Storage daemon connection OK\n"); + return true; } /* @@ -315,26 +432,63 @@ bail_out: bool do_backup(JCR *jcr) { int stat; - int tls_need = BNET_TLS_NONE; - BSOCK *fd; + BSOCK *fd, *sd; STORE *store; + char *store_address; + uint32_t store_port; char ed1[100]; + db_int64_ctx job; + POOL_MEM buf; - if (jcr->get_JobLevel() == L_VIRTUAL_FULL) { + if (jcr->is_JobLevel(L_VIRTUAL_FULL)) { return do_vbackup(jcr); } /* Print Job Start message */ - Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"), - edit_uint64(jcr->JobId, ed1), jcr->Job); + if (jcr->rerunning) { + Jmsg(jcr, M_INFO, 0, _("Restart Incomplete Backup JobId %s, Job=%s\n"), + edit_uint64(jcr->JobId, ed1), jcr->Job); + } else { + Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"), + edit_uint64(jcr->JobId, ed1), jcr->Job); + } - set_jcr_job_status(jcr, JS_Running); + jcr->setJobStatus(JS_Running); Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel); if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) { Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db)); return false; } + /* For incomplete Jobs, we add our own id */ + if (jcr->rerunning) { + edit_int64(jcr->JobId, ed1); + Mmsg(buf, "SELECT max(FileIndex) FROM File WHERE JobId=%s", ed1); + if (db_sql_query(jcr->db, buf.c_str(), db_int64_handler, &job)) { + Jmsg(jcr, M_INFO, 0, _("Found %ld files from prior incomplete Job.\n"), + (int32_t)job.value); + } else { + Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db)); + return false; + } + jcr->JobFiles = job.value; + Dmsg1(100, "==== FI=%ld\n", jcr->JobFiles); + Mmsg(buf, "SELECT VolSessionId FROM Job WHERE JobId=%s", ed1); + if (!db_sql_query(jcr->db, buf.c_str(), db_int64_handler, &job)) { + Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db)); + return false; + } + jcr->VolSessionId = job.value; + Mmsg(buf, "SELECT VolSessionTime FROM Job WHERE JobId=%s", ed1); + if (!db_sql_query(jcr->db, buf.c_str(), db_int64_handler, &job)) { + Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db)); + return false; + } + jcr->VolSessionTime = job.value; + Dmsg4(100, "JobId=%s JobFiles=%ld VolSessionId=%ld VolSessionTime=%ld\n", ed1, + jcr->JobFiles, jcr->VolSessionId, jcr->VolSessionTime); + } + /* * Open a message channel connection with the Storage * daemon. This is to let him know that our client @@ -342,7 +496,7 @@ bool do_backup(JCR *jcr) * */ Dmsg0(110, "Open connection with storage daemon\n"); - set_jcr_job_status(jcr, JS_WaitSD); + jcr->setJobStatus(JS_WaitSD); /* * Start conversation with Storage daemon */ @@ -355,35 +509,31 @@ bool do_backup(JCR *jcr) if (!start_storage_daemon_job(jcr, NULL, jcr->wstorage)) { return false; } - - /* - * Start the job prior to starting the message thread below - * to avoid two threads from using the BSOCK structure at - * the same time. - */ - if (!bnet_fsend(jcr->store_bsock, "run")) { - return false; + sd = jcr->store_bsock; + if (jcr->client) { + jcr->sd_calls_client = jcr->client->sd_calls_client; } - /* - * Now start a Storage daemon message thread. Note, - * this thread is used to provide the catalog services - * for the backup job, including inserting the attributes - * into the catalog. See catalog_update() in catreq.c + * Note startup sequence of SD/FD is different depending on + * whether the SD listens (normal) or the SD calls the FD. */ - if (!start_storage_daemon_message_thread(jcr)) { - return false; + if (!jcr->sd_calls_client) { + if (!run_storage_and_start_message_thread(jcr, sd)) { + goto bail_out; + } } - Dmsg0(150, "Storage daemon connection OK\n"); - - set_jcr_job_status(jcr, JS_WaitFD); + jcr->setJobStatus(JS_WaitFD); if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) { goto bail_out; } - set_jcr_job_status(jcr, JS_Running); + jcr->setJobStatus(JS_Running); fd = jcr->file_bsock; + if (!send_level_command(jcr)) { + goto bail_out; + } + if (!send_include_list(jcr)) { goto bail_out; } @@ -392,44 +542,68 @@ bool do_backup(JCR *jcr) goto bail_out; } - if (!send_level_command(jcr)) { - goto bail_out; + /* TODO: See priority with bandwidth parameter */ + if (jcr->job->max_bandwidth > 0) { + jcr->max_bandwidth = jcr->job->max_bandwidth; + } else if (jcr->client->max_bandwidth > 0) { + jcr->max_bandwidth = jcr->client->max_bandwidth; } - /* - * send Storage daemon address to the File daemon - */ - store = jcr->wstore; - if (store->SDDport == 0) { - store->SDDport = store->SDport; + if (jcr->max_bandwidth > 0) { + send_bwlimit(jcr, jcr->Job); /* Old clients don't have this command */ } - /* TLS Requirement */ - if (store->tls_enable) { - if (store->tls_require) { - tls_need = BNET_TLS_REQUIRED; - } else { - tls_need = BNET_TLS_OK; + send_snapshot_retention(jcr, jcr->snapshot_retention); + + store = jcr->wstore; + + if (jcr->sd_calls_client) { + if (jcr->FDVersion < 10) { + Jmsg(jcr, M_FATAL, 0, _("The File daemon does not support SDCallsClient.\n")); + goto bail_out; + } + if (!send_client_addr_to_sd(jcr)) { + goto bail_out; } + + if (!run_storage_and_start_message_thread(jcr, sd)) { + goto bail_out; + } + + store_address = jcr->wstore->address; /* dummy */ + store_port = 0; /* flag that SD calls FD */ + } else { + /* + * send Storage daemon address to the File daemon + */ + if (store->SDDport == 0) { + store->SDDport = store->SDport; + } + + store_address = get_storage_address(jcr->client, store); + store_port = store->SDDport; } - fd->fsend(storaddr, store->address, store->SDDport, tls_need); - if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) { + if (!send_store_addr_to_fd(jcr, store, store_address, store_port)) { goto bail_out; } + /* Declare the job started to start the MaxRunTime check */ + jcr->setJobStarted(); + + /* Send and run the RunBefore */ if (!send_runscripts_commands(jcr)) { goto bail_out; } - /* + /* * We re-update the job start record so that the start - * time is set after the run before job. This avoids + * time is set after the run before job. This avoids * that any files created by the run before job will * be saved twice. They will be backed up in the current * job, but not in the next one unless they are changed. * Without this, they will be backed up in this job and - * in the next job run because in that case, their date + * in the next job run because in that case, their date * is after the start of this run. */ jcr->start_time = time(NULL); @@ -443,11 +617,12 @@ bool do_backup(JCR *jcr) * all files to FD. */ if (!send_accurate_current_files(jcr)) { - goto bail_out; + goto bail_out; /* error */ } /* Send backup command */ - fd->fsend(backupcmd); + fd->fsend(backupcmd, jcr->JobFiles); + Dmsg1(100, ">filed: %s", fd->msg); if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) { goto bail_out; } @@ -456,21 +631,20 @@ bool do_backup(JCR *jcr) stat = wait_for_job_termination(jcr); db_write_batch_file_records(jcr); /* used by bulk batch file insert */ - if (jcr->HasBase && - !db_commit_base_file_attributes_record(jcr, jcr->db)) - { - Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db)); + if (jcr->HasBase) { + db_commit_base_file_attributes_record(jcr, jcr->db); + /* Any error already printed */ } - if (stat == JS_Terminated) { + if (!jcr->is_canceled() && stat == JS_Terminated) { backup_cleanup(jcr, stat); return true; - } + } return false; /* Come here only after starting SD thread */ bail_out: - set_jcr_job_status(jcr, JS_ErrorTerminated); + jcr->setJobStatus(JS_ErrorTerminated); Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count()); /* Cancel SD */ wait_for_job_termination(jcr, FDConnectTimeout); @@ -494,25 +668,28 @@ int wait_for_job_termination(JCR *jcr, int timeout) uint32_t JobWarnings = 0; uint64_t ReadBytes = 0; uint64_t JobBytes = 0; - int VSS = 0; + uint64_t CommBytes = 0; + uint64_t CommCompressedBytes = 0; + int VSS = 0; /* or Snapshot on Unix */ int Encrypt = 0; btimer_t *tid=NULL; - set_jcr_job_status(jcr, JS_Running); - if (fd) { if (timeout) { tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */ } /* Wait for Client to terminate */ while ((n = bget_dirmsg(fd)) >= 0) { - if (!fd_ok && - (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles, + if (!fd_ok && + (sscanf(fd->msg, newEndJob, &jcr->FDJobStatus, &JobFiles, + &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt, + &CommBytes, &CommCompressedBytes) == 9 || + sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles, &ReadBytes, &JobBytes, &JobErrors, &VSS, &Encrypt) == 7 || sscanf(fd->msg, OldEndJob, &jcr->FDJobStatus, &JobFiles, &ReadBytes, &JobBytes, &JobErrors) == 5)) { fd_ok = true; - set_jcr_job_status(jcr, jcr->FDJobStatus); + jcr->setJobStatus(jcr->FDJobStatus); Dmsg1(100, "FDStatus=%c\n", (char)jcr->JobStatus); } else { Jmsg(jcr, M_WARNING, 0, _("Unexpected Client Job message: %s\n"), @@ -526,15 +703,26 @@ int wait_for_job_termination(JCR *jcr, int timeout) stop_bsock_timer(tid); } - if (is_bnet_error(fd)) { + if (fd->is_error() && jcr->getJobStatus() != JS_Canceled) { + int i = 0; Jmsg(jcr, M_FATAL, 0, _("Network error with FD during %s: ERR=%s\n"), - job_type_to_str(jcr->get_JobType()), fd->bstrerror()); + job_type_to_str(jcr->getJobType()), fd->bstrerror()); + while (i++ < 20 && jcr->job->RescheduleIncompleteJobs && jcr->is_canceled()) { + bmicrosleep(3, 0); + } } fd->signal(BNET_TERMINATE); /* tell Client we are terminating */ } - /* Force cancel in SD if failing */ - if (job_canceled(jcr) || !fd_ok) { + /* + * Force cancel in SD if failing, but not for Incomplete jobs + * so that we let the SD despool. + */ + Dmsg5(100, "cancel=%d fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", jcr->is_canceled(), fd_ok, jcr->FDJobStatus, + jcr->JobStatus, jcr->SDJobStatus); + if (jcr->is_canceled() || (!jcr->job->RescheduleIncompleteJobs && !fd_ok)) { + Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus, + jcr->JobStatus, jcr->SDJobStatus); cancel_storage_daemon_job(jcr); } @@ -548,18 +736,21 @@ int wait_for_job_termination(JCR *jcr, int timeout) jcr->ReadBytes = ReadBytes; jcr->JobBytes = JobBytes; jcr->JobWarnings = JobWarnings; - jcr->VSS = VSS; + jcr->CommBytes = CommBytes; + jcr->CommCompressedBytes = CommCompressedBytes; + jcr->Snapshot = VSS; jcr->Encrypt = Encrypt; - } else { + } else if (jcr->getJobStatus() != JS_Canceled) { Jmsg(jcr, M_FATAL, 0, _("No Job status returned from FD.\n")); } -// Dmsg4(100, "fd_ok=%d FDJS=%d JS=%d SDJS=%d\n", fd_ok, jcr->FDJobStatus, -// jcr->JobStatus, jcr->SDJobStatus); - /* Return the first error status we find Dir, FD, or SD */ - if (!fd_ok || is_bnet_error(fd)) { /* if fd not set, that use !fd_ok */ - jcr->FDJobStatus = JS_ErrorTerminated; + if (!fd_ok || fd->is_error()) { /* if fd not set, that use !fd_ok */ + if (jcr->getJobStatus() == JS_Canceled) { + jcr->FDJobStatus = JS_Canceled; + } else { + jcr->FDJobStatus = JS_ErrorTerminated; + } } if (jcr->JobStatus != JS_Terminated) { return jcr->JobStatus; @@ -576,32 +767,47 @@ int wait_for_job_termination(JCR *jcr, int timeout) void backup_cleanup(JCR *jcr, int TermCode) { char sdt[50], edt[50], schedt[50]; - char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], compress[50]; - char ec6[30], ec7[30], ec8[30], elapsed[50]; - char term_code[100], fd_term_msg[100], sd_term_msg[100]; - const char *term_msg; + char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30]; + char ec6[30], ec7[30], ec8[30], ec9[30], ec10[30], elapsed[50]; + char data_compress[200], comm_compress[200]; + char fd_term_msg[100], sd_term_msg[100]; + POOL_MEM term_msg; int msg_type = M_INFO; MEDIA_DBR mr; CLIENT_DBR cr; - double kbps, compression; + double kbps, compression, ratio; utime_t RunTime; POOL_MEM base_info; + POOL_MEM vol_info; + + remove_dummy_jobmedia_records(jcr); - if (jcr->get_JobLevel() == L_VIRTUAL_FULL) { + if (jcr->is_JobLevel(L_VIRTUAL_FULL)) { vbackup_cleanup(jcr, TermCode); return; } Dmsg2(100, "Enter backup_cleanup %d %c\n", TermCode, TermCode); - memset(&mr, 0, sizeof(mr)); memset(&cr, 0, sizeof(cr)); +#ifdef xxxx + /* The current implementation of the JS_Warning status is not + * completed. SQL part looks to be ok, but the code is using + * JS_Terminated almost everywhere instead of (JS_Terminated || JS_Warning) + * as we do with is_canceled() + */ + if (jcr->getJobStatus() == JS_Terminated && + (jcr->JobErrors || jcr->SDErrors || jcr->JobWarnings)) { + TermCode = JS_Warnings; + } +#endif + update_job_end(jcr, TermCode); if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) { Jmsg(jcr, M_WARNING, 0, _("Error getting Job record for Job report: ERR=%s"), db_strerror(jcr->db)); - set_jcr_job_status(jcr, JS_ErrorTerminated); + jcr->setJobStatus(JS_ErrorTerminated); } bstrncpy(cr.Name, jcr->client->name(), sizeof(cr.Name)); @@ -614,7 +820,7 @@ void backup_cleanup(JCR *jcr, int TermCode) if (!db_get_media_record(jcr, jcr->db, &mr)) { Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"), mr.VolumeName, db_strerror(jcr->db)); - set_jcr_job_status(jcr, JS_ErrorTerminated); + jcr->setJobStatus(JS_ErrorTerminated); } update_bootstrap_file(jcr); @@ -622,37 +828,40 @@ void backup_cleanup(JCR *jcr, int TermCode) switch (jcr->JobStatus) { case JS_Terminated: if (jcr->JobErrors || jcr->SDErrors) { - term_msg = _("Backup OK -- with warnings"); + Mmsg(term_msg, _("Backup OK -- %s"), jcr->StatusErrMsg[0] ? jcr->StatusErrMsg : _("with warnings")); + } else { - term_msg = _("Backup OK"); + Mmsg(term_msg, _("Backup OK")); } break; + case JS_Incomplete: + Mmsg(term_msg, _("Backup failed -- incomplete")); + break; case JS_Warnings: - term_msg = _("Backup OK -- with warnings"); + Mmsg(term_msg, _("Backup OK -- %s"), jcr->StatusErrMsg[0] ? jcr->StatusErrMsg : _("with warnings")); break; case JS_FatalError: case JS_ErrorTerminated: - term_msg = _("*** Backup Error ***"); + Mmsg(term_msg, _("*** Backup Error ***")); msg_type = M_ERROR; /* Generate error message */ if (jcr->store_bsock) { jcr->store_bsock->signal(BNET_TERMINATE); - if (jcr->SD_msg_chan) { + if (jcr->SD_msg_chan_started) { pthread_cancel(jcr->SD_msg_chan); } } break; case JS_Canceled: - term_msg = _("Backup Canceled"); + Mmsg(term_msg, _("Backup Canceled")); if (jcr->store_bsock) { jcr->store_bsock->signal(BNET_TERMINATE); - if (jcr->SD_msg_chan) { + if (jcr->SD_msg_chan_started) { pthread_cancel(jcr->SD_msg_chan); } } break; default: - term_msg = term_code; - sprintf(term_code, _("Inappropriate term code: %c\n"), jcr->JobStatus); + Mmsg(term_msg, _("Inappropriate term code: %c\n"), jcr->JobStatus); break; } bstrftimes(schedt, sizeof(schedt), jcr->jr.SchedTime); @@ -660,10 +869,9 @@ void backup_cleanup(JCR *jcr, int TermCode) bstrftimes(edt, sizeof(edt), jcr->jr.EndTime); RunTime = jcr->jr.EndTime - jcr->jr.StartTime; if (RunTime <= 0) { - kbps = 0; - } else { - kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime); + RunTime = 1; } + kbps = ((double)jcr->jr.JobBytes) / (1000.0 * (double)RunTime); if (!db_get_job_volume_names(jcr, jcr->db, jcr->jr.JobId, &jcr->VolumeName)) { /* * Note, if the job has erred, most likely it did not write any @@ -678,27 +886,60 @@ void backup_cleanup(JCR *jcr, int TermCode) } if (jcr->ReadBytes == 0) { - bstrncpy(compress, "None", sizeof(compress)); + bstrncpy(data_compress, "None", sizeof(data_compress)); + } else { + compression = (double)100 - 100.0 * ((double)jcr->SDJobBytes / (double)jcr->ReadBytes); + if (compression < 0.5) { + bstrncpy(data_compress, "None", sizeof(data_compress)); + } else { + if (jcr->SDJobBytes > 0) { + ratio = (double)jcr->ReadBytes / (double)jcr->SDJobBytes; + } else { + ratio = 1.0; + } + bsnprintf(data_compress, sizeof(data_compress), "%.1f%% %.1f:1", + compression, ratio); + } + } + if (jcr->CommBytes == 0 || jcr->CommCompressedBytes == 0) { + bstrncpy(comm_compress, "None", sizeof(comm_compress)); } else { - compression = (double)100 - 100.0 * ((double)jcr->JobBytes / (double)jcr->ReadBytes); + compression = (double)100 - 100.0 * ((double)jcr->CommCompressedBytes / (double)jcr->CommBytes); if (compression < 0.5) { - bstrncpy(compress, "None", sizeof(compress)); + bstrncpy(comm_compress, "None", sizeof(comm_compress)); } else { - bsnprintf(compress, sizeof(compress), "%.1f %%", compression); + ratio = (double)jcr->CommBytes / (double)jcr->CommCompressedBytes; + bsnprintf(comm_compress, sizeof(comm_compress), "%.1f%% %.1f:1", + compression, ratio); } + Dmsg2(200, "=== CommCompressed=%lld CommBytes=%lld\n", + jcr->CommCompressedBytes, jcr->CommBytes); } jobstatus_to_ascii(jcr->FDJobStatus, fd_term_msg, sizeof(fd_term_msg)); jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg)); if (jcr->HasBase) { - Mmsg(base_info, " Base files/Used files: %lld/%lld (%.2f%%)\n", - jcr->nb_base_files, - jcr->nb_base_files_used, + Mmsg(base_info, _(" Base files/Used files: %lld/%lld (%.2f%%)\n"), + jcr->nb_base_files, + jcr->nb_base_files_used, jcr->nb_base_files_used*100.0/jcr->nb_base_files); } + /* Edit string for last volume size */ + if (mr.VolABytes != 0) { + Mmsg(vol_info, _("meta: %s (%sB) aligned: %s (%sB)"), + edit_uint64_with_commas(mr.VolBytes, ec7), + edit_uint64_with_suffix(mr.VolBytes, ec8), + edit_uint64_with_commas(mr.VolABytes, ec9), + edit_uint64_with_suffix(mr.VolABytes, ec10)); + } else { + Mmsg(vol_info, _("%s (%sB)"), + edit_uint64_with_commas(mr.VolBytes, ec7), + edit_uint64_with_suffix(mr.VolBytes, ec8)); + } + // bmicrosleep(15, 0); /* for debugging SIGHUP */ - Jmsg(jcr, msg_type, 0, _("%s %s %s (%s): %s\n" + Jmsg(jcr, msg_type, 0, _("%s %s %s (%s):\n" " Build OS: %s %s %s\n" " JobId: %d\n" " Job: %s\n" @@ -719,24 +960,25 @@ void backup_cleanup(JCR *jcr, int TermCode) " SD Bytes Written: %s (%sB)\n" " Rate: %.1f KB/s\n" " Software Compression: %s\n" +" Comm Line Compression: %s\n" "%s" /* Basefile info */ -" VSS: %s\n" +" Snapshot/VSS: %s\n" " Encryption: %s\n" " Accurate: %s\n" " Volume name(s): %s\n" " Volume Session Id: %d\n" " Volume Session Time: %d\n" -" Last Volume Bytes: %s (%sB)\n" +" Last Volume Bytes: %s\n" " Non-fatal FD errors: %d\n" " SD Errors: %d\n" " FD termination status: %s\n" " SD termination status: %s\n" " Termination: %s\n\n"), - BACULA, my_name, VERSION, LSMDATE, edt, + BACULA, my_name, VERSION, LSMDATE, HOST_OS, DISTNAME, DISTVER, jcr->jr.JobId, jcr->jr.Job, - level_to_str(jcr->get_JobLevel()), jcr->since, + level_to_str(jcr->getJobLevel()), jcr->since, jcr->client->name(), cr.Uname, jcr->fileset->name(), jcr->FSCreateTime, jcr->pool->name(), jcr->pool_source, @@ -754,21 +996,21 @@ void backup_cleanup(JCR *jcr, int TermCode) edit_uint64_with_commas(jcr->SDJobBytes, ec5), edit_uint64_with_suffix(jcr->SDJobBytes, ec6), kbps, - compress, + data_compress, + comm_compress, base_info.c_str(), - jcr->VSS?_("yes"):_("no"), + jcr->Snapshot?_("yes"):_("no"), jcr->Encrypt?_("yes"):_("no"), jcr->accurate?_("yes"):_("no"), jcr->VolumeName, jcr->VolSessionId, jcr->VolSessionTime, - edit_uint64_with_commas(mr.VolBytes, ec7), - edit_uint64_with_suffix(mr.VolBytes, ec8), + vol_info.c_str(), jcr->JobErrors, jcr->SDErrors, fd_term_msg, sd_term_msg, - term_msg); + term_msg.c_str()); Dmsg0(100, "Leave backup_cleanup()\n"); } @@ -782,7 +1024,8 @@ void update_bootstrap_file(JCR *jcr) BPIPE *bpipe = NULL; int got_pipe = 0; POOLMEM *fname = get_pool_memory(PM_FNAME); - fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, ""); + fname = edit_job_codes(jcr, fname, jcr->job->WriteBootstrap, "", + job_code_callback_director); VOL_PARAMS *VolParams = NULL; int VolCount; @@ -794,7 +1037,7 @@ void update_bootstrap_file(JCR *jcr) fd = bpipe ? bpipe->wfd : NULL; } else { /* ***FIXME*** handle BASE */ - fd = fopen(fname, jcr->get_JobLevel()==L_FULL?"w+b":"a+b"); + fd = bfopen(fname, jcr->is_JobLevel(L_FULL)?"w+b":"a+b"); } if (fd) { VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId, @@ -803,14 +1046,14 @@ void update_bootstrap_file(JCR *jcr) Jmsg(jcr, M_ERROR, 0, _("Could not get Job Volume Parameters to " "update Bootstrap file. ERR=%s\n"), db_strerror(jcr->db)); if (jcr->SDJobFiles != 0) { - set_jcr_job_status(jcr, JS_ErrorTerminated); + jcr->setJobStatus(JS_ErrorTerminated); } } /* Start output with when and who wrote it */ bstrftimes(edt, sizeof(edt), time(NULL)); fprintf(fd, "# %s - %s - %s%s\n", edt, jcr->jr.Job, - level_to_str(jcr->get_JobLevel()), jcr->since); + level_to_str(jcr->getJobLevel()), jcr->since); for (int i=0; i < VolCount; i++) { /* Write the record */ fprintf(fd, "Volume=\"%s\"\n", VolParams[i].VolumeName); @@ -820,7 +1063,7 @@ void update_bootstrap_file(JCR *jcr) } fprintf(fd, "VolSessionId=%u\n", jcr->VolSessionId); fprintf(fd, "VolSessionTime=%u\n", jcr->VolSessionTime); - fprintf(fd, "VolAddr=%s-%s\n", + fprintf(fd, "VolAddr=%s-%s\n", edit_uint64(VolParams[i].StartAddr, ed1), edit_uint64(VolParams[i].EndAddr, ed2)); fprintf(fd, "FileIndex=%d-%d\n", VolParams[i].FirstIndex, @@ -838,7 +1081,7 @@ void update_bootstrap_file(JCR *jcr) berrno be; Jmsg(jcr, M_ERROR, 0, _("Could not open WriteBootstrap file:\n" "%s: ERR=%s\n"), fname, be.bstrerror()); - set_jcr_job_status(jcr, JS_ErrorTerminated); + jcr->setJobStatus(JS_ErrorTerminated); } free_pool_memory(fname); }