]> git.sur5r.net Git - bacula/bacula/commitdiff
kes Apply duplicate job tests to restarted jobs.
authorKern Sibbald <kern@sibbald.com>
Sat, 21 Jun 2008 16:18:35 +0000 (16:18 +0000)
committerKern Sibbald <kern@sibbald.com>
Sat, 21 Jun 2008 16:18:35 +0000 (16:18 +0000)
kes  Copy more data when restarting a job so that run
     overrides are kept. This should fix bug #1094.

git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@7207 91ce42f0-d328-0410-95d8-f526ca767f89

bacula/src/dird/job.c
bacula/src/dird/jobq.c
bacula/technotes-2.5

index cf3c1da50436fc1d4b586a5f2bcefff8eb8aefae..0c735578dd62b5ec7bfcddee1c378e437d4e3b7f 100644 (file)
@@ -645,6 +645,7 @@ bool allow_duplicate_job(JCR *jcr)
    }
    if (!job->AllowHigherDuplicates) {
       foreach_jcr(djcr) {
+         char ec1[50];
          if (strcmp(job->name(), djcr->job->name()) == 0) {
             bool cancel_queued = false;
             if (job->DuplicateJobProximity > 0) {
@@ -656,7 +657,8 @@ bool allow_duplicate_job(JCR *jcr)
             /* Cancel */
             if (!(job->CancelQueuedDuplicates || job->CancelRunningDuplicates)) {
                /* Zap current job */
-               Jmsg(jcr, M_FATAL, 0, _("Duplicate job not allowed.\n"));
+               Jmsg(jcr, M_FATAL, 0, _("Duplicate job not allowed. JobId=%s\n"),
+                  edit_uint64(djcr->JobId, ec1));
                return false;
             }
             /* If CancelQueuedDuplicates is set do so only if job is queued */
@@ -677,7 +679,6 @@ bool allow_duplicate_job(JCR *jcr)
             }
             if (cancel_queued || job->CancelRunningDuplicates) {
                UAContext *ua = new_ua_context(djcr);
-               char ec1[50];
                Jmsg(jcr, M_INFO, 0, _("Cancelling duplicate JobId=%s.\n"), 
                   edit_uint64(djcr->JobId, ec1));
                ua->jcr = djcr;
index a46561aa62e7d59539774a72de92c6b01dfbf644..af8605268d300db1aa10e27c3d8ae6d333921def 100644 (file)
@@ -56,6 +56,7 @@ extern "C" void *sched_wait(void *arg);
 
 static int  start_server(jobq_t *jq);
 static bool acquire_resources(JCR *jcr);
+static bool reschedule_job(JCR *jcr, jobq_t *jq, jobq_item_t *je);
 static void dec_read_store(JCR *jcr);
 static void dec_write_store(JCR *jcr);
 
@@ -484,74 +485,10 @@ void *jobq_server(void *arg)
             jcr->acquired_resource_locks = false;
          }
 
-         /*
-          * Reschedule the job if necessary and requested
-          */
-         if (jcr->job->RescheduleOnError &&
-             jcr->JobStatus != JS_Terminated &&
-             jcr->JobStatus != JS_Canceled &&
-             jcr->JobType == JT_BACKUP &&
-             (jcr->job->RescheduleTimes == 0 ||
-              jcr->reschedule_count < jcr->job->RescheduleTimes)) {
-             char dt[50], dt2[50];
-
-             /*
-              * Reschedule this job by cleaning it up, but
-              *  reuse the same JobId if possible.
-              */
-            time_t now = time(NULL);
-            jcr->reschedule_count++;
-            jcr->sched_time = now + jcr->job->RescheduleInterval;
-            bstrftime(dt, sizeof(dt), now);
-            bstrftime(dt2, sizeof(dt2), jcr->sched_time);
-            Dmsg4(2300, "Rescheduled Job %s to re-run in %d seconds.(now=%u,then=%u)\n", jcr->Job,
-                  (int)jcr->job->RescheduleInterval, now, jcr->sched_time);
-            Jmsg(jcr, M_INFO, 0, _("Rescheduled Job %s at %s to re-run in %d seconds (%s).\n"),
-                 jcr->Job, dt, (int)jcr->job->RescheduleInterval, dt2);
-            dird_free_jcr_pointers(jcr);     /* partial cleanup old stuff */
-            jcr->JobStatus = -1;
-            set_jcr_job_status(jcr, JS_WaitStartTime);
-            jcr->SDJobStatus = 0;
-            if (jcr->JobBytes == 0) {
-               Dmsg2(2300, "Requeue job=%d use=%d\n", jcr->JobId, jcr->use_count());
-               V(jq->mutex);
-               jobq_add(jq, jcr);     /* queue the job to run again */
-               P(jq->mutex);
-               free_jcr(jcr);         /* release jcr */
-               free(je);              /* free the job entry */
-               continue;              /* look for another job to run */
-            }
-            /*
-             * Something was actually backed up, so we cannot reuse
-             *   the old JobId or there will be database record
-             *   conflicts.  We now create a new job, copying the
-             *   appropriate fields.
-             */           
-            JCR *njcr = new_jcr(sizeof(JCR), dird_free_jcr);
-            set_jcr_defaults(njcr, jcr->job);
-            njcr->reschedule_count = jcr->reschedule_count;
-            njcr->sched_time = jcr->sched_time;
-            njcr->JobLevel = jcr->JobLevel;
-            njcr->JobStatus = -1;
-            set_jcr_job_status(njcr, jcr->JobStatus);
-            if (jcr->rstore) {
-               copy_rstorage(njcr, jcr->rstorage, _("previous Job"));
-            } else {
-               free_rstorage(njcr);
-            }
-            if (jcr->wstore) {
-               copy_wstorage(njcr, jcr->wstorage, _("previous Job"));
-            } else {
-               free_wstorage(njcr);
-            }
-            njcr->messages = jcr->messages;
-            Dmsg0(2300, "Call to run new job\n");
-            V(jq->mutex);
-            run_job(njcr);            /* This creates a "new" job */
-            free_jcr(njcr);           /* release "new" jcr */
-            P(jq->mutex);
-            Dmsg0(2300, "Back from running new job.\n");
+         if (reschedule_job(jcr, jq, je)) {
+            continue;              /* go look for more work */
          }
+
          /* Clean up and release old jcr */
          Dmsg2(2300, "====== Termination job=%d use_cnt=%d\n", jcr->JobId, jcr->use_count());
          jcr->SDJobStatus = 0;
@@ -664,6 +601,94 @@ void *jobq_server(void *arg)
    return NULL;
 }
 
+/*
+ * Returns true if cleanup done and we should look for more work
+ */
+static bool reschedule_job(JCR *jcr, jobq_t *jq, jobq_item_t *je)
+{
+   /*
+    * Reschedule the job if necessary and requested
+    */
+   if (jcr->job->RescheduleOnError &&
+       jcr->JobStatus != JS_Terminated &&
+       jcr->JobStatus != JS_Canceled &&
+       jcr->JobType == JT_BACKUP &&
+       (jcr->job->RescheduleTimes == 0 ||
+        jcr->reschedule_count < jcr->job->RescheduleTimes)) {
+       char dt[50], dt2[50];
+
+       /*
+        * Reschedule this job by cleaning it up, but
+        *  reuse the same JobId if possible.
+        */
+      time_t now = time(NULL);
+      jcr->reschedule_count++;
+      jcr->sched_time = now + jcr->job->RescheduleInterval;
+      bstrftime(dt, sizeof(dt), now);
+      bstrftime(dt2, sizeof(dt2), jcr->sched_time);
+      Dmsg4(2300, "Rescheduled Job %s to re-run in %d seconds.(now=%u,then=%u)\n", jcr->Job,
+            (int)jcr->job->RescheduleInterval, now, jcr->sched_time);
+      Jmsg(jcr, M_INFO, 0, _("Rescheduled Job %s at %s to re-run in %d seconds (%s).\n"),
+           jcr->Job, dt, (int)jcr->job->RescheduleInterval, dt2);
+      dird_free_jcr_pointers(jcr);     /* partial cleanup old stuff */
+      jcr->JobStatus = -1;
+      set_jcr_job_status(jcr, JS_WaitStartTime);
+      jcr->SDJobStatus = 0;
+      if (!allow_duplicate_job(jcr)) {
+         return false;
+      }
+      if (jcr->JobBytes == 0) {
+         Dmsg2(2300, "Requeue job=%d use=%d\n", jcr->JobId, jcr->use_count());
+         V(jq->mutex);
+         jobq_add(jq, jcr);     /* queue the job to run again */
+         P(jq->mutex);
+         free_jcr(jcr);         /* release jcr */
+         free(je);              /* free the job entry */
+         return true;           /* we already cleaned up */
+      }
+      /*
+       * Something was actually backed up, so we cannot reuse
+       *   the old JobId or there will be database record
+       *   conflicts.  We now create a new job, copying the
+       *   appropriate fields.
+       */           
+      JCR *njcr = new_jcr(sizeof(JCR), dird_free_jcr);
+      set_jcr_defaults(njcr, jcr->job);
+      njcr->reschedule_count = jcr->reschedule_count;
+      njcr->sched_time = jcr->sched_time;
+      njcr->JobLevel = jcr->JobLevel;
+      njcr->pool = jcr->pool;
+      njcr->run_pool_override = jcr->run_pool_override;
+      njcr->full_pool = jcr->full_pool;
+      njcr->run_full_pool_override = jcr->run_full_pool_override;
+      njcr->inc_pool = jcr->inc_pool;
+      njcr->run_inc_pool_override = jcr->run_inc_pool_override;
+      njcr->diff_pool = jcr->diff_pool;
+      njcr->JobStatus = -1;
+      set_jcr_job_status(njcr, jcr->JobStatus);
+      if (jcr->rstore) {
+         copy_rstorage(njcr, jcr->rstorage, _("previous Job"));
+      } else {
+         free_rstorage(njcr);
+      }
+      if (jcr->wstore) {
+         copy_wstorage(njcr, jcr->wstorage, _("previous Job"));
+      } else {
+         free_wstorage(njcr);
+      }
+      njcr->messages = jcr->messages;
+      njcr->spool_data = jcr->spool_data;
+      njcr->write_part_after_job = jcr->write_part_after_job;
+      Dmsg0(2300, "Call to run new job\n");
+      V(jq->mutex);
+      run_job(njcr);            /* This creates a "new" job */
+      free_jcr(njcr);           /* release "new" jcr */
+      P(jq->mutex);
+      Dmsg0(2300, "Back from running new job.\n");
+   }
+   return false;
+}
+
 /*
  * See if we can acquire all the necessary resources for the job (JCR)
  *
index 85c76504454ce6c83161c6c691f08e486770e1d3..8eadc84422a466720cbefd0e633baa19fce9a3d3 100644 (file)
@@ -30,6 +30,10 @@ vtape driver
 
 
 General:
+21Jun08
+kes  Apply duplicate job tests to restarted jobs.
+kes  Copy more data when restarting a job so that run
+     overrides are kept. This should fix bug #1094.
 20Jun08
 kes  More word alignment cleanup.
 kes  Fix bug where SD did not ask operator if the device could not