};
/* Call back context for getting a 32/64 bit value from the database */
-struct db_int64_ctx {
+class db_int64_ctx {
+public:
int64_t value; /* value returned */
int count; /* number of values seen */
+
+ db_int64_ctx() : value(0), count(0) {};
+ ~db_int64_ctx() {};
+private:
+ db_int64_ctx(const db_int64_ctx&); /* prohibit pass by value */
+ db_int64_ctx &operator=(const db_int64_ctx&); /* prohibit class assignment */
};
/* Call back context for getting a list of comma separated strings from the
};
/* sql_query Query Flags */
-#define QF_STORE_RESULT 0x01
+#define QF_STORE_RESULT 0x01
/* Use for better error location printing */
#define UPDATE_DB(jcr, db, cmd) UpdateDB(__FILE__, __LINE__, jcr, db, cmd)
bail_out:
db_sql_query(jcr->db_batch, "DROP TABLE batch", NULL,NULL);
+ jcr->batch_started = false;
return retval;
}
} else if (jcr->HasBase) {
ret = db_create_base_file_attributes_record(jcr, mdb, ar);
} else {
- Jmsg0(jcr, M_FATAL, 0, _("Can't Copy/Migrate job using BaseJob"));
+ Jmsg0(jcr, M_FATAL, 0, _("Cannot Copy/Migrate job using BaseJob"));
ret = true; /* in copy/migration what do we do ? */
}
/*
Bacula® - The Network Backup Solution
- Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
+ Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
The main author of Bacula is Kern Sibbald, with contributions from
many others, a complete list can be found in the file AUTHORS.
* to do the backup.
* When the File daemon finishes the job, update the DB.
*
- * Version $Id$
*/
#include "bacula.h"
#include "ua.h"
/* Commands sent to File daemon */
-static char backupcmd[] = "backup\n";
+static char backupcmd[] = "backup FileIndex=%ld\n";
static char storaddr[] = "storage address=%s port=%d ssl=%d\n";
/* Responses received from File daemon */
bool do_backup_init(JCR *jcr)
{
- if (jcr->getJobLevel() == L_VIRTUAL_FULL) {
+ if (jcr->is_JobLevel(L_VIRTUAL_FULL)) {
return do_vbackup_init(jcr);
}
free_rstorage(jcr); /* we don't read so release */
have_basejob_option = in_block = jcr->HasBase;
break;
case 'C': /* Accurate keyword */
- in_block = (jcr->getJobLevel() != L_FULL);
+ in_block = !jcr->is_JobLevel(L_FULL);
break;
case ':': /* End of keyword */
in_block = false;
bool send_accurate_current_files(JCR *jcr)
{
POOL_MEM buf;
- bool ret=true;
db_list_ctx jobids;
db_list_ctx nb;
+ char ed1[50];
- if (!jcr->accurate || job_canceled(jcr)) {
- return true;
- }
- /* In base level, no previous job is used */
- if (jcr->getJobLevel() == L_BASE) {
- return true;
- }
-
- if (jcr->getJobLevel() == L_FULL) {
- /* On Full mode, if no previous base job, no accurate things */
- if (!get_base_jobids(jcr, &jobids)) {
- goto bail_out;
+ /* For incomplete Jobs, we add our own id */
+ if (jcr->incomplete) {
+ edit_int64(jcr->JobId, ed1);
+ jobids.add(ed1);
+ } else {
+ if (!jcr->accurate || job_canceled(jcr)) {
+ return true;
}
- jcr->HasBase = true;
- Jmsg(jcr, M_INFO, 0, _("Using BaseJobId(s): %s\n"), jobids.list);
+ /* In base level, no previous job is used */
+ if (jcr->is_JobLevel(L_BASE)) {
+ return true;
+ }
+
+ if (jcr->is_JobLevel(L_FULL)) {
+ /* On Full mode, if no previous base job, no accurate things */
+ if (!get_base_jobids(jcr, &jobids)) {
+ return true;
+ }
+ jcr->HasBase = true;
+ Jmsg(jcr, M_INFO, 0, _("Using BaseJobId(s): %s\n"), jobids.list);
- } else {
- /* For Incr/Diff level, we search for older jobs */
- db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, &jobids);
-
- /* We are in Incr/Diff, but no Full to build the accurate list... */
- if (jobids.count == 0) {
- ret=false;
- Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
- goto bail_out;
+ } else {
+ /* For Incr/Diff level, we search for older jobs */
+ db_accurate_get_jobids(jcr, jcr->db, &jcr->jr, &jobids);
+
+ /* We are in Incr/Diff, but no Full to build the accurate list... */
+ if (jobids.count == 0) {
+ Jmsg(jcr, M_FATAL, 0, _("Cannot find previous jobids.\n"));
+ return false; /* fail */
+ }
}
}
if (!db_open_batch_connexion(jcr, jcr->db)) {
Jmsg0(jcr, M_FATAL, 0, "Can't get batch sql connexion");
- return false;
+ return false; /* Fail */
}
if (jcr->HasBase) {
accurate_list_handler, (void *)jcr);
}
- /* TODO: close the batch connexion ? (can be used very soon) */
+ /* TODO: close the batch connection ? (can be used very soon) */
jcr->file_bsock->signal(BNET_EOD);
-
-bail_out:
- return ret;
+ return true;
}
/*
BSOCK *fd;
STORE *store;
char ed1[100];
+ db_int64_ctx job;
+ POOL_MEM buf;
- if (jcr->getJobLevel() == L_VIRTUAL_FULL) {
+ if (jcr->is_JobLevel(L_VIRTUAL_FULL)) {
return do_vbackup(jcr);
}
return false;
}
+ /* For incomplete Jobs, we add our own id */
+ if (jcr->incomplete) {
+ edit_int64(jcr->JobId, ed1);
+ Mmsg(buf, "SELECT count(*) FROM File WHERE JobId=%s", ed1);
+ if (!db_sql_query(jcr->db, buf.c_str(), db_int64_handler, &job)) {
+ Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
+ return false;
+ }
+ jcr->JobFiles = job.value;
+ Mmsg(buf, "SELECT VolSessionId FROM Job WHERE JobId=%s", ed1);
+ if (!db_sql_query(jcr->db, buf.c_str(), db_int64_handler, &job)) {
+ Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
+ return false;
+ }
+ jcr->VolSessionId = job.value;
+ Mmsg(buf, "SELECT VolSessionTime FROM Job WHERE JobId=%s", ed1);
+ if (!db_sql_query(jcr->db, buf.c_str(), db_int64_handler, &job)) {
+ Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
+ return false;
+ }
+ jcr->VolSessionTime = job.value;
+ Dmsg4(100, "JobId=%s JobFiles=%ld VolSessionId=%ld VolSessionTime=%ld\n", ed1,
+ jcr->JobFiles, jcr->VolSessionId, jcr->VolSessionTime);
+ }
+
/*
* Open a message channel connection with the Storage
* daemon. This is to let him know that our client
* to avoid two threads from using the BSOCK structure at
* the same time.
*/
- if (!bnet_fsend(jcr->store_bsock, "run")) {
+ if (!jcr->store_bsock->fsend("run")) {
return false;
}
* all files to FD.
*/
if (!send_accurate_current_files(jcr)) {
- goto bail_out;
+ goto bail_out; /* error */
}
/* Send backup command */
- fd->fsend(backupcmd);
+ fd->fsend(backupcmd, jcr->JobFiles);
+ Dmsg1(100, ">filed: %s", fd->msg);
if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) {
goto bail_out;
}
stat = wait_for_job_termination(jcr);
db_write_batch_file_records(jcr); /* used by bulk batch file insert */
- if (jcr->HasBase &&
- !db_commit_base_file_attributes_record(jcr, jcr->db))
- {
- Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
+ if (jcr->HasBase && !db_commit_base_file_attributes_record(jcr, jcr->db)) {
+ Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
}
if (stat == JS_Terminated) {
fd->signal(BNET_TERMINATE); /* tell Client we are terminating */
}
- /* Force cancel in SD if failing */
- if (job_canceled(jcr) || !fd_ok) {
+ /*
+ * Force cancel in SD if failing, but not for Incomplete jobs
+ * so that we let the SD despool.
+ */
+ if (jcr->is_canceled() || !fd_ok) {
cancel_storage_daemon_job(jcr);
}
utime_t RunTime;
POOL_MEM base_info;
- if (jcr->getJobLevel() == L_VIRTUAL_FULL) {
+ if (jcr->is_JobLevel(L_VIRTUAL_FULL)) {
vbackup_cleanup(jcr, TermCode);
return;
}
term_msg = _("Backup OK");
}
break;
+ case JS_Incomplete:
+ term_msg = _("Backup failed -- incomplete");
+ break;
case JS_Warnings:
term_msg = _("Backup OK -- with warnings");
break;
fd = bpipe ? bpipe->wfd : NULL;
} else {
/* ***FIXME*** handle BASE */
- fd = fopen(fname, jcr->getJobLevel()==L_FULL?"w+b":"a+b");
+ fd = fopen(fname, jcr->is_JobLevel(L_FULL)?"w+b":"a+b");
}
if (fd) {
VolCount = db_get_job_volume_parameters(jcr, jcr->db, jcr->JobId,
BSOCK *fd = jcr->file_bsock;
const char *accurate = jcr->accurate?"accurate_":"";
const char *not_accurate = "";
+ const char *incomplete = jcr->incomplete?" incomplete ":" ";
/*
* Send Level command to File daemon
*/
switch (jcr->getJobLevel()) {
case L_BASE:
- fd->fsend(levelcmd, not_accurate, "base", " ", 0);
+ fd->fsend(levelcmd, not_accurate, "base", incomplete, 0);
break;
/* L_NONE is the console, sending something off to the FD */
case L_NONE:
case L_FULL:
- fd->fsend(levelcmd, not_accurate, "full", " ", 0);
+ fd->fsend(levelcmd, not_accurate, "full", incomplete, 0);
break;
case L_DIFFERENTIAL:
- fd->fsend(levelcmd, accurate, "differential", " ", 0);
+ fd->fsend(levelcmd, accurate, "differential", incomplete, 0);
send_since_time(jcr);
break;
case L_INCREMENTAL:
- fd->fsend(levelcmd, accurate, "incremental", " ", 0);
+ fd->fsend(levelcmd, accurate, "incremental", incomplete, 0);
send_since_time(jcr);
break;
case L_SINCE:
/*
Bacula® - The Network Backup Solution
- Copyright (C) 2003-2010 Free Software Foundation Europe e.V.
+ Copyright (C) 2003-2011 Free Software Foundation Europe e.V.
The main author of Bacula is Kern Sibbald, with contributions from
many others, a complete list can be found in the file AUTHORS.
* Reschedule this job by cleaning it up, but
* reuse the same JobId if possible.
*/
+ jcr->incomplete = jcr->is_incomplete(); /* save incomplete status */
time_t now = time(NULL);
jcr->reschedule_count++;
jcr->sched_time = now + jcr->job->RescheduleInterval;
jcr->JobStatus = -1;
set_jcr_job_status(jcr, JS_WaitStartTime);
jcr->SDJobStatus = 0;
+ jcr->JobErrors = 0;
if (!allow_duplicate_job(jcr)) {
return false;
}
- if (jcr->JobBytes == 0) {
+ if (jcr->JobBytes == 0 || jcr->incomplete) {
Dmsg2(2300, "Requeue job=%d use=%d\n", jcr->JobId, jcr->use_count());
V(jq->mutex);
jobq_add(jq, jcr); /* queue the job to run again */
static char jobcmd[] = "JobId=%s job=%s job_name=%s client_name=%s "
"type=%d level=%d FileSet=%s NoAttr=%d SpoolAttr=%d FileSetMD5=%s "
"SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s "
- "Resched=%d\n";
+ "incomplete=%d VolSessionId=%d VolSessionTime=%d\n";
static char use_storage[] = "use storage=%s media_type=%s pool_name=%s "
"pool_type=%s append=%d copy=%d stripe=%d\n";
static char use_device[] = "use device=%s\n";
fileset_name.c_str(), !jcr->pool->catalog_files,
jcr->job->SpoolAttributes, jcr->fileset->MD5, jcr->spool_data,
jcr->write_part_after_job, jcr->job->PreferMountedVolumes,
- edit_int64(jcr->spool_size, ed2));
+ edit_int64(jcr->spool_size, ed2), jcr->incomplete,
+ jcr->VolSessionId, jcr->VolSessionTime);
Dmsg1(100, ">stored: %s", sd->msg);
if (bget_dirmsg(sd) > 0) {
Dmsg1(100, "<stored: %s", sd->msg);
/* Do read side of storage daemon */
if (ok && rstore) {
/* For the moment, only migrate, copy and vbackup have rpool */
- if (jcr->getJobType() == JT_MIGRATE || jcr->getJobType() == JT_COPY ||
- (jcr->getJobType() == JT_BACKUP && jcr->getJobLevel() == L_VIRTUAL_FULL)) {
+ if (jcr->is_JobType(JT_MIGRATE) || jcr->is_JobType(JT_COPY) ||
+ (jcr->is_JobType(JT_BACKUP) && jcr->is_JobLevel(L_VIRTUAL_FULL))) {
pm_strcpy(pool_type, jcr->rpool->pool_type);
pm_strcpy(pool_name, jcr->rpool->name());
} else {
P(mutex);
pthread_cond_timedwait(&jcr->term_wait, &mutex, &timeout);
V(mutex);
- if (job_canceled(jcr)) {
+ if (jcr->is_canceled()) {
if (jcr->SD_msg_chan) {
jcr->store_bsock->set_timed_out();
jcr->store_bsock->set_terminated();
return 1;
}
-
static void do_storage_setdebug(UAContext *ua, STORE *store, int level, int trace_flag)
{
BSOCK *sd;
return;
}
-static void do_client_setdebug(UAContext *ua, CLIENT *client, int level, int trace_flag)
+/*
+ * For the client, we have the following values that can be set
+ * level = debug level
+ * trace = send debug output to a file
+ * hangup = how many records to send to SD before hanging up
+ * obviously this is most useful for testing restarting
+ * failed jobs.
+ */
+static void do_client_setdebug(UAContext *ua, CLIENT *client,
+ int level, int trace, int hangup)
{
BSOCK *fd;
}
Dmsg0(120, "Connected to file daemon\n");
fd = ua->jcr->file_bsock;
- fd->fsend("setdebug=%d trace=%d\n", level, trace_flag);
+ fd->fsend("setdebug=%d trace=%d hangup=%d\n", level, trace, hangup);
if (fd->recv() >= 0) {
ua->send_msg("%s", fd->msg);
}
}
-static void do_all_setdebug(UAContext *ua, int level, int trace_flag)
+static void do_all_setdebug(UAContext *ua, int level, int trace_flag, int hangup)
{
STORE *store, **unique_store;
CLIENT *client, **unique_client;
/* Call each unique File daemon */
for (j=0; j<i; j++) {
- do_client_setdebug(ua, unique_client[j], level, trace_flag);
+ do_client_setdebug(ua, unique_client[j], level, trace_flag, hangup);
}
free(unique_client);
}
CLIENT *client;
int level;
int trace_flag = -1;
+ int hangup = -1;
int i;
Dmsg1(120, "setdebug:%s:\n", cmd);
}
}
+ /* Look for hangup (debug only)flag. -1 => not change */
+ i = find_arg_with_value(ua, "hangup");
+ if (i >= 0) {
+ hangup = atoi(ua->argv[i]);
+ }
+
+
/* General debug? */
for (i=1; i<ua->argc; i++) {
if (strcasecmp(ua->argk[i], "all") == 0) {
- do_all_setdebug(ua, level, trace_flag);
+ do_all_setdebug(ua, level, trace_flag, hangup);
return 1;
}
if (strcasecmp(ua->argk[i], "dir") == 0 ||
if (ua->argv[i]) {
client = GetClientResWithName(ua->argv[i]);
if (client) {
- do_client_setdebug(ua, client, level, trace_flag);
+ do_client_setdebug(ua, client, level, trace_flag, hangup);
return 1;
}
}
client = select_client_resource(ua);
if (client) {
- do_client_setdebug(ua, client, level, trace_flag);
+ do_client_setdebug(ua, client, level, trace_flag, hangup);
return 1;
}
}
case 2:
client = select_client_resource(ua);
if (client) {
- do_client_setdebug(ua, client, level, trace_flag);
+ do_client_setdebug(ua, client, level, trace_flag, hangup);
}
break;
case 3:
- do_all_setdebug(ua, level, trace_flag);
+ do_all_setdebug(ua, level, trace_flag, hangup);
break;
default:
break;
goto bail_out;
}
- bnet_fsend(jcr->file_bsock, "estimate listing=%d\n", listing);
- while (bnet_recv(jcr->file_bsock) >= 0) {
+ jcr->file_bsock->fsend("estimate listing=%d\n", listing);
+ while (jcr->file_bsock->recv() >= 0) {
ua->send_msg("%s", jcr->file_bsock->msg);
}
bstrncpy(dev_name, store.store->dev_name(), sizeof(dev_name));
bash_spaces(dev_name);
if (slot > 0) {
- bnet_fsend(sd, "%s %s drive=%d slot=%d", command, dev_name, drive, slot);
+ sd->fsend("%s %s drive=%d slot=%d", command, dev_name, drive, slot);
} else {
- bnet_fsend(sd, "%s %s drive=%d", command, dev_name, drive);
+ sd->fsend("%s %s drive=%d", command, dev_name, drive);
}
- while (bnet_recv(sd) >= 0) {
+ while (sd->recv() >= 0) {
ua->send_msg("%s", sd->msg);
}
- bnet_sig(sd, BNET_TERMINATE);
- bnet_close(sd);
+ sd->signal(BNET_TERMINATE);
+ sd->close();
jcr->store_bsock = NULL;
}
{
bool ret=true;
if (jcr->accurate) {
- if (jcr->getJobLevel() == L_FULL) {
- ret = accurate_send_base_file_list(jcr);
- } else {
- ret = accurate_send_deleted_list(jcr);
+ if (!jcr->incomplete) {
+ if (jcr->is_JobLevel(L_FULL)) {
+ ret = accurate_send_base_file_list(jcr);
+ } else if (!jcr->incomplete) {
+ ret = accurate_send_deleted_list(jcr);
+ }
}
-
accurate_free(jcr);
- if (jcr->getJobLevel() == L_FULL) {
+ if (!jcr->incomplete && jcr->is_JobLevel(L_FULL)) {
Jmsg(jcr, M_INFO, 0, _("Space saved with Base jobs: %lld MB\n"),
jcr->base_size/(1024*1024));
}
ff_pkt->delta_seq = 0;
- if (!jcr->accurate) {
+ if (!jcr->accurate && !jcr->incomplete) {
return true;
}
decode_stat(elt.lstat, &statc, &LinkFIc); /* decode catalog stat */
- if (jcr->getJobLevel() == L_FULL) {
+ if (!jcr->incomplete && (jcr->getJobLevel() == L_FULL)) {
opts = ff_pkt->BaseJobOpts;
} else {
opts = ff_pkt->AccurateOpts;
/* TODO: cleanup and factorise this function with verify.c */
case '5': /* compare MD5 */
- case '1': /* compare SHA1 */
+ case '1': /* compare SHA1 */
/*
* The remainder of the function is all about getting the checksum.
* First we initialise, then we read files, other streams and Finder Info.
ff_pkt->flags & (FO_MD5|FO_SHA1|FO_SHA256|FO_SHA512)))
{
- if (!*elt.chksum) {
- Jmsg(jcr, M_WARNING, 0, _("Can't verify checksum for %s\n"),
+ if (!*elt.chksum && !jcr->incomplete) {
+ Jmsg(jcr, M_WARNING, 0, _("Cannot verify checksum for %s\n"),
ff_pkt->fname);
stat = true;
break;
/*
Bacula® - The Network Backup Solution
- Copyright (C) 2000-2010 Free Software Foundation Europe e.V.
+ Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
The main author of Bacula is Kern Sibbald, with contributions from
many others, a complete list can be found in the file AUTHORS.
set_find_options((FF_PKT *)jcr->ff, jcr->incremental, jcr->mtime);
- /** in accurate mode, we overwrite the find_one check function */
+ /** in accurate mode, we overload the find_one check function */
if (jcr->accurate) {
set_find_changed_function((FF_PKT *)jcr->ff, accurate_check_file);
}
#endif
BSOCK *sd = jcr->store_bsock;
- if (jcr->is_job_canceled()) {
+ if (jcr->is_canceled() || jcr->is_incomplete()) {
return 0;
}
}
good_rtn:
- rtnstat = jcr->is_job_canceled() ? 0 : 1; /* good return if not canceled */
+ rtnstat = jcr->is_canceled() ? 0 : 1; /* good return if not canceled */
bail_out:
+ if (jcr->is_incomplete()) {
+ rtnstat = 0;
+ }
if (ff_pkt->cmd_plugin && plugin_started) {
send_plugin_name(jcr, sd, false); /* signal end of plugin data */
}
int attr_stream;
int comp_len;
bool stat;
+ int hangup = get_hangup();
#ifdef FD_NO_SEND_TEST
return true;
#endif
pm_strcpy(jcr->last_fname, ff_pkt->fname);
jcr->unlock();
+ /* Debug code: check if we must hangup */
+ if (hangup && (jcr->JobFiles > (uint32_t)hangup)) {
+ jcr->setJobStatus(JS_Incomplete);
+ Jmsg1(jcr, M_FATAL, 0, "Debug hangup requested after %d files.\n", hangup);
+ return false;
+ }
+
/**
* Send Attributes header to Storage daemon
* <file-index> <stream> <info>
*/
if (!sd->fsend("%ld %d 0", jcr->JobFiles, attr_stream)) {
- if (!jcr->is_job_canceled()) {
+ if (!jcr->is_canceled()) {
Jmsg1(jcr, M_FATAL, 0, _("Network send error to SD. ERR=%s\n"),
sd->bstrerror());
}
/*
Bacula® - The Network Backup Solution
- Copyright (C) 2000-2010 Free Software Foundation Europe e.V.
+ Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
The main author of Bacula is Kern Sibbald, with contributions from
many others, a complete list can be found in the file AUTHORS.
static char OKstore[] = "2000 OK storage\n";
static char OKstoreend[] = "2000 OK storage end\n";
static char OKjob[] = "2000 OK Job %s (%s) %s,%s,%s";
-static char OKsetdebug[] = "2000 OK setdebug=%d\n";
+static char OKsetdebug[] = "2000 OK setdebug=%d trace=%d hangup=%d\n";
static char BADjob[] = "2901 Bad Job\n";
static char EndJob[] = "2800 End Job TermCode=%d JobFiles=%u ReadBytes=%s"
" JobBytes=%s Errors=%u VSS=%d Encrypt=%d\n";
static int setdebug_cmd(JCR *jcr)
{
BSOCK *dir = jcr->dir_bsock;
- int level, trace_flag;
-
- Dmsg1(110, "setdebug_cmd: %s", dir->msg);
- if (sscanf(dir->msg, "setdebug=%d trace=%d", &level, &trace_flag) != 2 || level < 0) {
- pm_strcpy(jcr->errmsg, dir->msg);
- dir->fsend(_("2991 Bad setdebug command: %s\n"), jcr->errmsg);
- return 0;
+ int32_t level, trace, hangup;
+ int scan;
+
+ Dmsg1(50, "setdebug_cmd: %s", dir->msg);
+ scan = sscanf(dir->msg, "setdebug=%d trace=%d hangup=%d",
+ &level, &trace, &hangup);
+ if (scan != 3) {
+ Dmsg2(20, "sscanf failed: msg=%s scan=%d\n", dir->msg, scan);
+ if (sscanf(dir->msg, "setdebug=%d trace=%d", &level, &trace) != 2) {
+ pm_strcpy(jcr->errmsg, dir->msg);
+ dir->fsend(_("2991 Bad setdebug command: %s\n"), jcr->errmsg);
+ return 0;
+ } else {
+ hangup = -1;
+ }
+ }
+ if (level >= 0) {
+ debug_level = level;
}
- debug_level = level;
- set_trace(trace_flag);
- return dir->fsend(OKsetdebug, level);
+ set_trace(trace);
+ set_hangup(hangup);
+ Dmsg3(50, "level=%d trace=%d hangup=%d\n", level, get_trace(), get_hangup());
+ return dir->fsend(OKsetdebug, level, get_trace(), get_hangup());
}
int mtime_only;
level = get_memory(dir->msglen+1);
- Dmsg1(100, "level_cmd: %s", dir->msg);
+ Dmsg1(10, "level_cmd: %s", dir->msg);
/* keep compatibility with older directors */
if (strstr(dir->msg, "accurate")) {
jcr->accurate = true;
}
+ if (strstr(dir->msg, "incomplete")) {
+ jcr->incomplete = true;
+ }
if (sscanf(dir->msg, "level = %s ", level) != 1) {
goto bail_out;
}
BSOCK *sd = jcr->store_bsock;
int ok = 0;
int SDJobStatus;
+ int32_t FileIndex;
#if defined(WIN32_VSS)
// capture state here, if client is backed up by multiple directors
P(vss_mutex);
}
#endif
+
+ if (sscanf(dir->msg, "backup FileIndex=%ld\n", &FileIndex) == 1) {
+ jcr->JobFiles = FileIndex;
+ Dmsg1(100, "JobFiles=%ld\n", jcr->JobFiles);
+ }
/**
* Validate some options given to the backup make sense for the compiled in
/*
Bacula® - The Network Backup Solution
- Copyright (C) 2000-2010 Free Software Foundation Europe e.V.
+ Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
The main author of Bacula is Kern Sibbald, with contributions from
many others, a complete list can be found in the file AUTHORS.
#define job_canceled(jcr) \
(jcr->JobStatus == JS_Canceled || \
jcr->JobStatus == JS_ErrorTerminated || \
- jcr->JobStatus == JS_FatalError || \
- jcr->JobStatus == JS_Incomplete \
+ jcr->JobStatus == JS_FatalError \
)
#define job_waiting(jcr) \
void init_mutex(void) {pthread_mutex_init(&mutex, NULL); };
void destroy_mutex(void) {pthread_mutex_destroy(&mutex); };
bool is_job_canceled() {return job_canceled(this); };
+ bool is_canceled() {return job_canceled(this); };
+ bool is_incomplete() { return JobStatus == JS_Incomplete; };
+ bool is_JobLevel(int32_t JobLevel) { return JobLevel == m_JobLevel; };
+ bool is_JobType(int32_t JobType) { return JobType == m_JobType; };
void set_JobLevel(int32_t JobLevel) { m_JobLevel = JobLevel; };
void setJobLevel(int32_t JobLevel) { m_JobLevel = JobLevel; };
void set_JobType(int32_t JobType) { m_JobType = JobType; };
bool prefix_links; /* Prefix links with Where path */
bool gui; /* set if gui using console */
bool authenticated; /* set when client authenticated */
+ bool cached_attribute; /* set if attribute is cached */
+ bool batch_started; /* is batch mode already started ? */
+ bool cmd_plugin; /* Set when processing a command Plugin = */
+ bool keep_path_list; /* Keep newly created path in a hash */
+ bool accurate; /* true if job is accurate */
+ bool HasBase; /* True if job use base jobs */
+ bool incomplete; /* finishing an incomplete job */
+
void *Python_job; /* Python Job Object */
void *Python_events; /* Python Events Object */
-
- bool cached_attribute; /* set if attribute is cached */
POOLMEM *attr; /* Attribute string from SD */
B_DB *db; /* database pointer */
B_DB *db_batch; /* database pointer for batch and accurate */
- bool batch_started; /* is batch mode already started ? */
- bool HasBase; /* True if job use base jobs */
uint64_t nb_base_files; /* Number of base files */
uint64_t nb_base_files_used; /* Number of useful files in base */
ATTR_DBR *ar; /* DB attribute record */
guid_list *id_list; /* User/group id to name list */
- bool accurate; /* true if job is accurate */
bpContext *plugin_ctx_list; /* list of contexts for plugins */
bpContext *plugin_ctx; /* current plugin context */
Plugin *plugin; /* plugin instance */
save_pkt *plugin_sp; /* plugin save packet */
char *plugin_options; /* user set options for plugin */
- bool cmd_plugin; /* Set when processing a command Plugin = */
POOLMEM *comment; /* Comment for this Job */
+ int64_t max_bandwidth; /* Bandwidth limit for this Job */
+ htable *path_list; /* Directory list (used by findlib) */
/* Daemon specific part of JCR */
/* This should be empty in the library */
POOL *full_pool; /* Full backup pool resource */
POOL *inc_pool; /* Incremental backup pool resource */
POOL *diff_pool; /* Differential backup pool resource */
- bool run_pool_override;
- bool run_full_pool_override;
- bool run_inc_pool_override;
- bool run_diff_pool_override;
- bool sd_canceled; /* set if SD canceled */
FILESET *fileset; /* FileSet resource */
CAT *catalog; /* Catalog resource */
MSGS *messages; /* Default message handler */
bool no_maxtime; /* Don't check Max*Time for this JCR */
bool keep_sd_auth_key; /* Clear or not the SD auth key after connection*/
bool use_accurate_chksum; /* Use or not checksum option in accurate code */
+ bool run_pool_override;
+ bool run_full_pool_override;
+ bool run_inc_pool_override;
+ bool run_diff_pool_override;
+ bool sd_canceled; /* set if SD canceled */
#endif /* DIRECTOR_DAEMON */
{
int priority = 0;
switch (JobStatus) {
+ case JS_Incomplete:
+ priority = 10;
+ break;
case JS_ErrorTerminated:
case JS_FatalError:
case JS_Canceled:
- case JS_Incomplete:
- priority = 10;
+ priority = 9;
break;
case JS_Error:
priority = 8;
#else
static bool trace = false;
#endif
+static int hangup = 0;
/* Constants */
const char *host_os = HOST_OS;
}
}
+void set_hangup(int hangup_value)
+{
+ if (hangup_value < 0) {
+ return;
+ } else {
+ hangup = hangup_value;
+ }
+}
+
+int get_hangup(void)
+{
+ return hangup;
+}
+
bool get_trace(void)
{
return trace;
bool get_trace(void);
const char *get_basename(const char *pathname);
-
class B_DB;
typedef void (*sql_query_func)(JCR *jcr, const char *cmd);
typedef void (*sql_escape_func)(JCR *jcr, B_DB* db, char *snew, char *old, int len);
void free_msgs_res (MSGS *msgs);
void dequeue_messages (JCR *jcr);
void set_trace (int trace_flag);
+bool get_trace (void);
+void set_hangup (int hangup_value);
+int get_hangup (void);
void set_db_type (const char *name);
void register_message_callback(void msg_callback(int type, char *msg));
case JS_Terminated:
jobstat = _("OK");
break;
+ case JS_Incomplete:
+ jobstat = _("Error: incomplete job");
+ break;
case JS_FatalError:
case JS_ErrorTerminated:
jobstat = _("Error");
/*
Bacula® - The Network Backup Solution
- Copyright (C) 2000-2010 Free Software Foundation Europe e.V.
+ Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
The main author of Bacula is Kern Sibbald, with contributions from
many others, a complete list can be found in the file AUTHORS.
/* Forward referenced functions */
+
+/*
+ * Check if we can mark this job incomplete
+ *
+ */
+void possible_incomplete_job(JCR *jcr, int32_t last_file_index)
+{
+ /*
+ * Note, here we decide if it is worthwhile to restart
+ * the Job at this point. For the moment, if at least
+ * 10 Files have been seen, which is good for testing, but
+ * for a production system, we probably want something like
+ * 100-1000 files, and some number of bytes of data.
+ *
+ * ****FIXME**** update this
+ */
+ if (last_file_index > 10) {
+ jcr->setJobStatus(JS_Incomplete);
+ }
+}
/*
* Append Data sent from File daemon
*
}
Jmsg1(jcr, M_FATAL, 0, _("Error reading data header from FD. ERR=%s\n"),
fd->bstrerror());
+ possible_incomplete_job(jcr, last_file_index);
ok = false;
break;
}
Dmsg2(890, "<filed: Header FilInx=%d stream=%d\n", file_index, stream);
- if (!(file_index > 0 && (file_index == last_file_index ||
- file_index == last_file_index + 1))) {
- Jmsg0(jcr, M_FATAL, 0, _("File index from FD not positive or sequential\n"));
- ok = false;
- break;
+ /*
+ * We make sure the file_index is advancing sequentially.
+ * An incomplete job can start the file_index at any number.
+ * otherwise, it must start at 1.
+ */
+ if (jcr->incomplete && file_index > 0 && last_file_index == 0) {
+ goto fi_checked;
}
+ if (file_index > 0 && (file_index == last_file_index ||
+ file_index == last_file_index + 1)) {
+ goto fi_checked;
+ }
+ Jmsg2(jcr, M_FATAL, 0, _("FI=%d from FD not positive or sequential=%d\n"),
+ file_index, last_file_index);
+ possible_incomplete_job(jcr, last_file_index);
+ ok = false;
+ break;
+
+fi_checked:
if (file_index != last_file_index) {
jcr->JobFiles = file_index;
last_file_index = file_index;
Dmsg1(350, "Network read error from FD. ERR=%s\n", fd->bstrerror());
Jmsg1(jcr, M_FATAL, 0, _("Network error reading from FD. ERR=%s\n"),
fd->bstrerror());
+ possible_incomplete_job(jcr, last_file_index);
}
ok = false;
break;
}
- if (!ok) {
+ if (!ok && (jcr->getJobStatus() != JS_Incomplete)) {
discard_data_spool(dcr);
} else {
/* Note: if commit is OK, the device will remain blocked */
*/
release_device(dcr);
- if (!ok || jcr->is_job_canceled()) {
+ if ((!ok || jcr->is_job_canceled()) && (jcr->getJobStatus() != JS_Incomplete)) {
discard_attribute_spool(jcr);
} else {
commit_attribute_spool(jcr);
/*
Bacula® - The Network Backup Solution
- Copyright (C) 2000-2010 Free Software Foundation Europe e.V.
+ Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
The main author of Bacula is Kern Sibbald, with contributions from
many others, a complete list can be found in the file AUTHORS.
static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
"type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
"SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s "
- "Resched=%d\n";
-static char oldjobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
- "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
- "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d SpoolSize=%s\n";
-static char oldoldjobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
- "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
- "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
-
-
+ "incomplete=%d VolSessionId=%d VolSessionTime=%d\n";
/* Responses sent to Director daemon */
static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
-//static char OK_query[] = "3001 OK query\n";
-//static char NO_query[] = "3918 Query failed\n";
-//static char BAD_query[] = "3917 Bad query command: %s\n";
/*
* Director requests us to start a job
POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
int JobType, level, spool_attributes, no_attributes, spool_data;
int write_part_after_job, PreferMountedVols;
- int Resched = 0;
int stat;
JCR *ojcr;
&JobType, &level, fileset_name.c_str(), &no_attributes,
&spool_attributes, fileset_md5.c_str(), &spool_data,
&write_part_after_job, &PreferMountedVols, spool_size,
- &Resched);
- if (stat != 15) {
- /* Try old version */
- stat = sscanf(dir->msg, oldjobcmd, &JobId, job.c_str(), job_name.c_str(),
- client_name.c_str(),
- &JobType, &level, fileset_name.c_str(), &no_attributes,
- &spool_attributes, fileset_md5.c_str(), &spool_data,
- &write_part_after_job, &PreferMountedVols, spool_size);
- if (stat != 14) {
- /* Try oldold version */
- stat = sscanf(dir->msg, oldoldjobcmd, &JobId, job.c_str(), job_name.c_str(),
- client_name.c_str(),
- &JobType, &level, fileset_name.c_str(), &no_attributes,
- &spool_attributes, fileset_md5.c_str(), &spool_data,
- &write_part_after_job, &PreferMountedVols);
- if (stat != 13) {
- pm_strcpy(jcr->errmsg, dir->msg);
- dir->fsend(BAD_job, stat, jcr->errmsg);
- Dmsg1(100, ">dird: %s", dir->msg);
- set_jcr_job_status(jcr, JS_ErrorTerminated);
- return false;
- }
- }
+ &jcr->incomplete, &jcr->VolSessionId, &jcr->VolSessionTime);
+ if (stat != 17) {
+ pm_strcpy(jcr->errmsg, dir->msg);
+ dir->fsend(BAD_job, stat, jcr->errmsg);
+ Dmsg1(100, ">dird: %s", dir->msg);
+ set_jcr_job_status(jcr, JS_ErrorTerminated);
+ return false;
}
+ Dmsg3(100, "==== incomplete=%d VolSesId=%d VolSesTime=%d\n", jcr->incomplete,
+ jcr->VolSessionId, jcr->VolSessionTime);
/*
* Since this job could be rescheduled, we
* check to see if we have it already. If so
}
jcr->JobId = JobId;
Dmsg2(800, "Start JobId=%d %p\n", JobId, jcr);
- jcr->VolSessionId = newVolSessionId();
- jcr->VolSessionTime = VolSessionTime;
+ /*
+ * If job rescheduled because previous was incomplete,
+ * the Resched flag is set and VolSessionId and VolSessionTime
+ * are given to us (same as restarted job).
+ */
+ if (!jcr->incomplete) {
+ jcr->VolSessionId = newVolSessionId();
+ jcr->VolSessionTime = VolSessionTime;
+ }
bstrncpy(jcr->Job, job, sizeof(jcr->Job));
unbash_spaces(job_name);
jcr->job_name = get_pool_memory(PM_NAME);
Maximum Concurrent Jobs = 10
SpoolData=yes
Max Run Time = 30min
+ Reschedule On Error = yes
+ Reschedule Interval = 10
+ Reschedule Times = 1
}
Job {
--- /dev/null
+#!/bin/sh
+#
+# Run a backup of the build directory but force it to have
+# a comm error, and check that it restarts correctly.
+#
+TestName="restart-job-test"
+JobName=RestartJob
+. scripts/functions
+
+scripts/cleanup
+scripts/copy-test-confs
+echo "${cwd}/build" >${cwd}/tmp/file-list
+
+change_jobname NightlySave $JobName
+start_test
+
+cat <<END_OF_DATA >${cwd}/tmp/bconcmds
+@$out /dev/null
+messages
+@$out ${cwd}/tmp/log1.out
+setdebug level=0 trace=0 hangup=100 client
+@#setdebug level=20 dir
+label storage=File volume=TestVolume001
+run job=$JobName yes
+@# We sleep 10 seconds to let the first job fail
+@sleep 10
+@# Now the second job should run
+setdebug level=0 trace=0 hangup=0 client
+wait
+messages
+quit
+END_OF_DATA
+
+run_bacula
+
+scripts/check_for_zombie_jobs storage=File
+
+cat <<END_OF_DATA >${cwd}/tmp/bconcmds
+@$out /dev/null
+messages
+@#
+@# now do a restore
+@#
+@$out ${cwd}/tmp/log2.out
+restore where=$tmp/bacula-restores storage=File select all done
+yes
+wait
+messages
+quit
+END_OF_DATA
+
+run_bconsole
+scripts/check_for_zombie_jobs storage=File
+stop_bacula
+
+check_two_logs
+check_restore_diff
+end_test