max run time or max start wait exceeded. This fixes bug #621.
kes Update maxruntime-test in regression script to properly test
max run time -- from info given in bug #621.
kes Simplify automatic cancel code.
kes Add check for job_canceled() in FD when contacting SD so that if
job is canceled from max runtime, it terminates faster.
git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@3809
91ce42f0-d328-0410-95d8-
f526ca767f89
Makefile
stamp-po
remove-potcdate.sed
Makefile
stamp-po
remove-potcdate.sed
/*
* Now, do pre-run stuff, like setting job level (Inc/diff, ...)
* this allows us to setup a proper job start record for restarting
/*
* Now, do pre-run stuff, like setting job level (Inc/diff, ...)
* this allows us to setup a proper job start record for restarting
if (jcr->job->MaxStartDelay != 0 && jcr->job->MaxStartDelay <
(utime_t)(jcr->start_time - jcr->sched_time)) {
if (jcr->job->MaxStartDelay != 0 && jcr->job->MaxStartDelay <
(utime_t)(jcr->start_time - jcr->sched_time)) {
- Jmsg(jcr, M_FATAL, 0, _("Job canceled because max start delay time exceeded.\n"));
set_jcr_job_status(jcr, JS_Canceled);
set_jcr_job_status(jcr, JS_Canceled);
+ Jmsg(jcr, M_FATAL, 0, _("Job canceled because max start delay time exceeded.\n"));
}
/* TODO : check if it is used somewhere */
}
/* TODO : check if it is used somewhere */
bool cancel_job(UAContext *ua, JCR *jcr)
{
BSOCK *sd, *fd;
bool cancel_job(UAContext *ua, JCR *jcr)
{
BSOCK *sd, *fd;
set_jcr_job_status(jcr, JS_Canceled);
set_jcr_job_status(jcr, JS_Canceled);
case JS_WaitPriority:
case JS_WaitMaxJobs:
case JS_WaitStartTime:
case JS_WaitPriority:
case JS_WaitMaxJobs:
case JS_WaitStartTime:
- bsendmsg(ua, _("JobId %d, Job %s marked to be canceled.\n"),
- jcr->JobId, jcr->Job);
+ bsendmsg(ua, _("JobId %s, Job %s marked to be canceled.\n"),
+ edit_uint64(jcr->JobId, ed1), jcr->Job);
jobq_remove(&job_queue, jcr); /* attempt to remove it from queue */
return true;
jobq_remove(&job_queue, jcr); /* attempt to remove it from queue */
return true;
foreach_jcr(jcr) {
bool cancel;
foreach_jcr(jcr) {
bool cancel;
- if (jcr->JobId == 0) {
- Dmsg2(800, "Skipping JCR %p (%s) with JobId 0\n",
- jcr, jcr->Job);
+ if (jcr->JobId == 0 || job_canceled(jcr)) {
+ Dmsg2(800, "Skipping JCR=%p Job=%s\n", jcr, jcr->Job);
continue;
}
/* check MaxWaitTime */
continue;
}
/* check MaxWaitTime */
- cancel = job_check_maxwaittime(control_jcr, jcr);
-
+ if (job_check_maxwaittime(control_jcr, jcr)) {
+ set_jcr_job_status(jcr, JS_Canceled);
+ Jmsg(jcr, M_FATAL, 0, _("Max wait time exceeded. Job canceled.\n"));
+ cancel = true;
- cancel |= job_check_maxruntime(control_jcr, jcr);
+ } else if (job_check_maxruntime(control_jcr, jcr)) {
+ set_jcr_job_status(jcr, JS_Canceled);
+ Jmsg(jcr, M_FATAL, 0, _("Max run time exceeded. Job canceled.\n"));
+ cancel = true;
+ }
- Dmsg3(800, "Cancelling JCR %p jobid %d (%s)\n",
- jcr, jcr->JobId, jcr->Job);
-
+ Dmsg3(800, "Cancelling JCR %p jobid %d (%s)\n", jcr, jcr->JobId, jcr->Job);
UAContext *ua = new_ua_context(jcr);
ua->jcr = control_jcr;
cancel_job(ua, jcr);
free_ua_context(ua);
UAContext *ua = new_ua_context(jcr);
ua->jcr = control_jcr;
cancel_job(ua, jcr);
free_ua_context(ua);
Dmsg2(800, "Have cancelled JCR %p Job=%d\n", jcr, jcr->JobId);
}
Dmsg2(800, "Have cancelled JCR %p Job=%d\n", jcr, jcr->JobId);
}
- /* Keep reference counts correct */
+ /* Keep reference counts correct */
bool ok_to_cancel = false;
JOB *job = jcr->job;
bool ok_to_cancel = false;
JOB *job = jcr->job;
+ if (job_canceled(jcr)) {
+ return false; /* already canceled */
+ }
if (job->MaxWaitTime == 0 && job->FullMaxWaitTime == 0 &&
job->IncMaxWaitTime == 0 && job->DiffMaxWaitTime == 0) {
return false;
if (job->MaxWaitTime == 0 && job->FullMaxWaitTime == 0 &&
job->IncMaxWaitTime == 0 && job->DiffMaxWaitTime == 0) {
return false;
if (!ok_to_cancel) {
return false;
}
if (!ok_to_cancel) {
return false;
}
+
+/*
+ * I don't see the need for all this -- kes 17Dec06
+ */
+#ifdef xxx
Dmsg3(800, "Job %d (%s): MaxWaitTime of %d seconds exceeded, "
"checking status\n",
jcr->JobId, jcr->Job, job->MaxWaitTime);
Dmsg3(800, "Job %d (%s): MaxWaitTime of %d seconds exceeded, "
"checking status\n",
jcr->JobId, jcr->Job, job->MaxWaitTime);
}
Dmsg3(800, "MaxWaitTime result: %scancel JCR %p (%s)\n",
cancel ? "" : "do not ", jcr, jcr->job);
}
Dmsg3(800, "MaxWaitTime result: %scancel JCR %p (%s)\n",
cancel ? "" : "do not ", jcr, jcr->job);
- if (jcr->job->MaxRunTime == 0) {
+ if (jcr->job->MaxRunTime == 0 || job_canceled(jcr)) {
return false;
}
if ((watchdog_time - jcr->start_time) < jcr->job->MaxRunTime) {
return false;
}
if ((watchdog_time - jcr->start_time) < jcr->job->MaxRunTime) {
switch (jcr->JobStatus) {
case JS_Created:
case JS_Running:
switch (jcr->JobStatus) {
case JS_Created:
case JS_Running:
Dmsg3(200, "MaxRunTime result: %scancel JCR %p (%s)\n",
cancel ? "" : "do not ", jcr, jcr->job);
Dmsg3(200, "MaxRunTime result: %scancel JCR %p (%s)\n",
cancel ? "" : "do not ", jcr, jcr->job);
POOL_MEM query(PM_MESSAGE);
char ed1[50];
POOL_MEM query(PM_MESSAGE);
char ed1[50];
- /* Records associated with the job */
+ /* Delete (or purge) records associated with the job */
purge_job_records_from_catalog(ua, JobId);
/* Now remove the Job record itself */
purge_job_records_from_catalog(ua, JobId);
/* Now remove the Job record itself */
tid = start_bsock_timer(bs, AUTH_TIMEOUT);
/* Challenge the director */
auth_success = cram_md5_challenge(bs, director->password, tls_local_need, compatible);
tid = start_bsock_timer(bs, AUTH_TIMEOUT);
/* Challenge the director */
auth_success = cram_md5_challenge(bs, director->password, tls_local_need, compatible);
+ if (job_canceled(jcr)) {
+ auth_success = false;
+ goto auth_fatal; /* quick exit */
+ }
if (auth_success) {
auth_success = cram_md5_respond(bs, director->password, &tls_remote_need, &compatible);
if (!auth_success) {
if (auth_success) {
auth_success = cram_md5_respond(bs, director->password, &tls_remote_need, &compatible);
if (!auth_success) {
+ if (job_canceled(jcr)) {
+ auth_success = false; /* force quick exit */
+ goto auth_fatal;
+ }
+
/* Respond to SD challenge */
auth_success = cram_md5_respond(sd, jcr->sd_auth_key, &tls_remote_need, &compatible);
/* Respond to SD challenge */
auth_success = cram_md5_respond(sd, jcr->sd_auth_key, &tls_remote_need, &compatible);
+ if (job_canceled(jcr)) {
+ auth_success = false; /* force quick exit */
+ goto auth_fatal;
+ }
if (!auth_success) {
Dmsg1(50, "cram_respond failed for %s\n", sd->who);
} else {
if (!auth_success) {
Dmsg1(50, "cram_respond failed for %s\n", sd->who);
} else {
- /* Destroy session key */
- memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
-
if (!auth_success) {
Jmsg(jcr, M_FATAL, 0, _("Authorization key rejected by Storage daemon.\n"
"Please see http://www.bacula.org/rel-manual/faq.html#AuthorizationErrors for help.\n"));
if (!auth_success) {
Jmsg(jcr, M_FATAL, 0, _("Authorization key rejected by Storage daemon.\n"
"Please see http://www.bacula.org/rel-manual/faq.html#AuthorizationErrors for help.\n"));
+ /* Destroy session key */
+ memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
stop_bsock_timer(tid);
/* Single thread all failures to avoid DOS */
if (!auth_success) {
stop_bsock_timer(tid);
/* Single thread all failures to avoid DOS */
if (!auth_success) {