/*
Bacula® - The Network Backup Solution
- Copyright (C) 2003-2009 Free Software Foundation Europe e.V.
+ Copyright (C) 2003-2014 Free Software Foundation Europe e.V.
- The main author of Bacula is Kern Sibbald, with contributions from
- many others, a complete list can be found in the file AUTHORS.
- This program is Free Software; you can redistribute it and/or
- modify it under the terms of version two of the GNU General Public
- License as published by the Free Software Foundation and included
- in the file LICENSE.
+ The main author of Bacula is Kern Sibbald, with contributions from many
+ others, a complete list can be found in the file AUTHORS.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA.
+ You may use this file and others of this release according to the
+ license defined in the LICENSE file, which includes the Affero General
+ Public License, v3.0 ("AGPLv3") and some additional permissions and
+ terms pursuant to its AGPLv3 Section 7.
Bacula® is a registered trademark of Kern Sibbald.
- The licensor of Bacula is the Free Software Foundation Europe
- (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
- Switzerland, email:ftf@fsfeurope.org.
*/
/*
* Bacula job queue routines.
static int start_server(jobq_t *jq);
static bool acquire_resources(JCR *jcr);
static bool reschedule_job(JCR *jcr, jobq_t *jq, jobq_item_t *je);
-static void dec_read_store(JCR *jcr);
static void dec_write_store(JCR *jcr);
/*
P(jq->mutex);
jq->valid = 0; /* prevent any more operations */
- /*
- * If any threads are active, wake them
+ /*
+ * If any threads are active, wake them
*/
if (jq->num_workers > 0) {
jq->quit = true;
JCR *jcr = ((wait_pkt *)arg)->jcr;
jobq_t *jq = ((wait_pkt *)arg)->jq;
- set_jcr_in_tsd(jcr);
+ set_jcr_in_tsd(INVALID_JCR);
Dmsg0(2300, "Enter sched_wait.\n");
free(arg);
time_t wtime = jcr->sched_time - time(NULL);
- set_jcr_job_status(jcr, JS_WaitStartTime);
+ jcr->setJobStatus(JS_WaitStartTime);
/* Wait until scheduled time arrives */
if (wtime > 0) {
Jmsg(jcr, M_INFO, 0, _("Job %s waiting %d seconds for scheduled start time.\n"),
}
/* Check every 30 seconds if canceled */
while (wtime > 0) {
- Dmsg3(2300, "Waiting on sched time, jobid=%d secs=%d use=%d\n",
+ Dmsg3(2300, "Waiting on sched time, jobid=%d secs=%d use=%d\n",
jcr->JobId, wtime, jcr->use_count());
if (wtime > 30) {
wtime = 30;
pthread_t id;
wait_pkt *sched_pkt;
- if (!jcr->term_wait_inited) {
+ if (!jcr->term_wait_inited) {
/* Initialize termination condition variable */
if ((stat = pthread_cond_init(&jcr->term_wait, NULL)) != 0) {
berrno be;
return stat;
}
jcr->term_wait_inited = true;
- }
-
+ }
+
Dmsg3(2300, "jobq_add jobid=%d jcr=0x%x use_count=%d\n", jcr->JobId, jcr, jcr->use_count());
if (jq->valid != JOBQ_VALID) {
Jmsg0(jcr, M_ERROR, 0, "Jobq_add queue not initialized.\n");
sched_pkt = (wait_pkt *)malloc(sizeof(wait_pkt));
sched_pkt->jcr = jcr;
sched_pkt->jq = jq;
- stat = pthread_create(&id, &jq->attr, sched_wait, (void *)sched_pkt);
+ stat = pthread_create(&id, &jq->attr, sched_wait, (void *)sched_pkt);
if (stat != 0) { /* thread not created */
berrno be;
Jmsg1(jcr, M_ERROR, 0, _("pthread_thread_create: ERR=%s\n"), be.bstrerror(stat));
/*
* if any threads are idle, wake one.
- * Actually we do a broadcast because on /lib/tls
+ * Actually we do a broadcast because on /lib/tls
* these signals seem to get lost from time to time.
*/
if (jq->idle_workers > 0) {
jq->running_jobs->append(je);
/* Attach jcr to this thread while we run the job */
+ jcr->my_thread_id = pthread_self();
+ jcr->set_killable(true);
set_jcr_in_tsd(jcr);
Dmsg1(2300, "Took jobid=%d from ready and appended to run\n", jcr->JobId);
/* Job finished detach from thread */
remove_jcr_from_tsd(je->jcr);
+ je->jcr->set_killable(false);
+
+ /* Clear the threadid, probably not necessary */
+ memset(&jcr->my_thread_id, 0, sizeof(jcr->my_thread_id));
Dmsg2(2300, "Back from user engine jobid=%d use=%d.\n", jcr->JobId,
jcr->use_count());
running_allow_mix = true;
for ( ; re; ) {
Dmsg2(2300, "JobId %d is also running with %s\n",
- re->jcr->JobId,
+ re->jcr->JobId,
re->jcr->job->allow_mixed_priority ? "mix" : "no mix");
if (!re->jcr->job->allow_mixed_priority) {
running_allow_mix = false;
if (!(jcr->JobPriority == Priority
|| (jcr->JobPriority < Priority &&
jcr->job->allow_mixed_priority && running_allow_mix))) {
- set_jcr_job_status(jcr, JS_WaitPriority);
+ jcr->setJobStatus(JS_WaitPriority);
break;
}
*/
static bool reschedule_job(JCR *jcr, jobq_t *jq, jobq_item_t *je)
{
+ bool resched = false;
/*
- * Reschedule the job if necessary and requested
+ * Reschedule the job if requested and possible
*/
- if (jcr->job->RescheduleOnError &&
- jcr->JobStatus != JS_Terminated &&
- jcr->JobStatus != JS_Canceled &&
- jcr->getJobType() == JT_BACKUP &&
- (jcr->job->RescheduleTimes == 0 ||
- jcr->reschedule_count < jcr->job->RescheduleTimes)) {
+ /* Basic condition is that more reschedule times remain */
+ if (jcr->job->RescheduleTimes == 0 ||
+ jcr->reschedule_count < jcr->job->RescheduleTimes) {
+ resched =
+ /* Check for failed jobs */
+ (jcr->job->RescheduleOnError &&
+ !jcr->is_JobStatus(JS_Terminated) &&
+ !jcr->is_JobStatus(JS_Canceled) &&
+ jcr->is_JobType(JT_BACKUP));
+ }
+ if (resched) {
char dt[50], dt2[50];
/*
jcr->Job, dt, (int)jcr->job->RescheduleInterval, dt2);
dird_free_jcr_pointers(jcr); /* partial cleanup old stuff */
jcr->JobStatus = -1;
- set_jcr_job_status(jcr, JS_WaitStartTime);
+ jcr->setJobStatus(JS_WaitStartTime);
jcr->SDJobStatus = 0;
+ jcr->JobErrors = 0;
if (!allow_duplicate_job(jcr)) {
return false;
}
+ /* Only jobs with no output jobs can run on same JCR */
if (jcr->JobBytes == 0) {
Dmsg2(2300, "Requeue job=%d use=%d\n", jcr->JobId, jcr->use_count());
V(jq->mutex);
+ /*
+ * Special test here since a Virtual Full gets marked
+ * as a Full, so we look at the resource record
+ */
+ if (jcr->wasVirtualFull) {
+ jcr->setJobLevel(L_VIRTUAL_FULL);
+ }
jobq_add(jq, jcr); /* queue the job to run again */
P(jq->mutex);
free_jcr(jcr); /* release jcr */
* the old JobId or there will be database record
* conflicts. We now create a new job, copying the
* appropriate fields.
- */
+ */
JCR *njcr = new_jcr(sizeof(JCR), dird_free_jcr);
set_jcr_defaults(njcr, jcr->job);
njcr->reschedule_count = jcr->reschedule_count;
njcr->sched_time = jcr->sched_time;
- njcr->set_JobLevel(jcr->getJobLevel());
+ njcr->initial_sched_time = jcr->initial_sched_time;
+ /*
+ * Special test here since a Virtual Full gets marked
+ * as a Full, so we look at the resource record
+ */
+ if (jcr->wasVirtualFull) {
+ njcr->setJobLevel(L_VIRTUAL_FULL);
+ } else {
+ njcr->setJobLevel(jcr->getJobLevel());
+ }
njcr->pool = jcr->pool;
njcr->run_pool_override = jcr->run_pool_override;
+ njcr->next_pool = jcr->next_pool;
+ njcr->run_next_pool_override = jcr->run_next_pool_override;
njcr->full_pool = jcr->full_pool;
njcr->run_full_pool_override = jcr->run_full_pool_override;
njcr->inc_pool = jcr->inc_pool;
njcr->run_inc_pool_override = jcr->run_inc_pool_override;
njcr->diff_pool = jcr->diff_pool;
njcr->JobStatus = -1;
- set_jcr_job_status(njcr, jcr->JobStatus);
+ njcr->setJobStatus(jcr->JobStatus);
if (jcr->rstore) {
copy_rstorage(njcr, jcr->rstorage, _("previous Job"));
} else {
* but we do not really have enough information here to
* know if this is really a deadlock (it may be a dual drive
* autochanger), and in principle, the SD reservation system
- * should detect these deadlocks, so push the work off on is.
+ * should detect these deadlocks, so push the work off on it.
*/
#ifdef xxx
if (jcr->rstore && jcr->rstore == jcr->wstore) { /* possible deadlock */
Jmsg(jcr, M_FATAL, 0, _("Job canceled. Attempt to read and write same device.\n"
- " Read storage \"%s\" (From %s) -- Write storage \"%s\" (From %s)\n"),
+ " Read storage \"%s\" (From %s) -- Write storage \"%s\" (From %s)\n"),
jcr->rstore->name(), jcr->rstore_source, jcr->wstore->name(), jcr->wstore_source);
- set_jcr_job_status(jcr, JS_Canceled);
+ jcr->setJobStatus(JS_Canceled);
return false;
}
#endif
if (jcr->rstore) {
Dmsg1(200, "Rstore=%s\n", jcr->rstore->name());
- if (jcr->rstore->NumConcurrentJobs < jcr->rstore->MaxConcurrentJobs) {
- jcr->rstore->NumConcurrentReadJobs++;
- jcr->rstore->NumConcurrentJobs++;
- Dmsg1(200, "Inc rncj=%d\n", jcr->rstore->NumConcurrentJobs);
- } else {
+ if (!inc_read_store(jcr)) {
Dmsg1(200, "Fail rncj=%d\n", jcr->rstore->NumConcurrentJobs);
- set_jcr_job_status(jcr, JS_WaitStoreRes);
+ jcr->setJobStatus(JS_WaitStoreRes);
return false;
}
}
-
+
if (jcr->wstore) {
Dmsg1(200, "Wstore=%s\n", jcr->wstore->name());
if (jcr->wstore->NumConcurrentJobs < jcr->wstore->MaxConcurrentJobs) {
}
}
if (skip_this_jcr) {
- set_jcr_job_status(jcr, JS_WaitStoreRes);
+ jcr->setJobStatus(JS_WaitStoreRes);
return false;
}
/* Back out previous locks */
dec_write_store(jcr);
dec_read_store(jcr);
- set_jcr_job_status(jcr, JS_WaitClientRes);
+ jcr->setJobStatus(JS_WaitClientRes);
return false;
}
if (jcr->job->NumConcurrentJobs < jcr->job->MaxConcurrentJobs) {
dec_write_store(jcr);
dec_read_store(jcr);
jcr->client->NumConcurrentJobs--;
- set_jcr_job_status(jcr, JS_WaitJobRes);
+ jcr->setJobStatus(JS_WaitJobRes);
return false;
}
return true;
}
-static void dec_read_store(JCR *jcr)
+static pthread_mutex_t rstore_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/*
+ * Note: inc_read_store() and dec_read_store() are
+ * called from select_rstore() in src/dird/restore.c
+ */
+bool inc_read_store(JCR *jcr)
+{
+ P(rstore_mutex);
+ if (jcr->rstore->NumConcurrentJobs < jcr->rstore->MaxConcurrentJobs &&
+ (jcr->getJobType() == JT_RESTORE ||
+ jcr->rstore->MaxConcurrentReadJobs == 0 ||
+ jcr->rstore->NumConcurrentReadJobs < jcr->rstore->MaxConcurrentReadJobs)) {
+ jcr->rstore->NumConcurrentReadJobs++;
+ jcr->rstore->NumConcurrentJobs++;
+ Dmsg1(200, "Inc rncj=%d\n", jcr->rstore->NumConcurrentJobs);
+ V(rstore_mutex);
+ return true;
+ }
+ V(rstore_mutex);
+ return false;
+}
+
+void dec_read_store(JCR *jcr)
{
if (jcr->rstore) {
+ P(rstore_mutex);
jcr->rstore->NumConcurrentReadJobs--; /* back out rstore */
jcr->rstore->NumConcurrentJobs--; /* back out rstore */
Dmsg1(200, "Dec rncj=%d\n", jcr->rstore->NumConcurrentJobs);
+ V(rstore_mutex);
ASSERT(jcr->rstore->NumConcurrentReadJobs >= 0);
ASSERT(jcr->rstore->NumConcurrentJobs >= 0);
}