X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Fdird%2Fjobq.c;h=4ba28a5f7921028c29c730c5330107f20424a681;hb=10cfd798ced2d27f61ead2de6fe9b1bcc8e3468d;hp=a94235a234e73ed816dc8e2980987882e6bafb5c;hpb=9bc238012a8bd805745630ebd7fda3bcad068457;p=bacula%2Fbacula diff --git a/bacula/src/dird/jobq.c b/bacula/src/dird/jobq.c index a94235a234..4ba28a5f79 100644 --- a/bacula/src/dird/jobq.c +++ b/bacula/src/dird/jobq.c @@ -1,29 +1,20 @@ /* - Bacula® - The Network Backup Solution - - Copyright (C) 2003-2011 Free Software Foundation Europe e.V. - - The main author of Bacula is Kern Sibbald, with contributions from - many others, a complete list can be found in the file AUTHORS. - This program is Free Software; you can redistribute it and/or - modify it under the terms of version three of the GNU Affero General Public - License as published by the Free Software Foundation and included - in the file LICENSE. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - - Bacula® is a registered trademark of Kern Sibbald. - The licensor of Bacula is the Free Software Foundation Europe - (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, - Switzerland, email:ftf@fsfeurope.org. + Bacula(R) - The Network Backup Solution + + Copyright (C) 2000-2017 Kern Sibbald + + The original author of Bacula is Kern Sibbald, with contributions + from many others, a complete list can be found in the file AUTHORS. + + You may use this file and others of this release according to the + license defined in the LICENSE file, which includes the Affero General + Public License, v3.0 ("AGPLv3") and some additional permissions and + terms pursuant to its AGPLv3 Section 7. + + This notice must be preserved when any source code is + conveyed and/or propagated. + + Bacula(R) is a registered trademark of Kern Sibbald. */ /* * Bacula job queue routines. @@ -120,8 +111,8 @@ int jobq_destroy(jobq_t *jq) P(jq->mutex); jq->valid = 0; /* prevent any more operations */ - /* - * If any threads are active, wake them + /* + * If any threads are active, wake them */ if (jq->num_workers > 0) { jq->quit = true; @@ -162,7 +153,7 @@ struct wait_pkt { * this routine is only used for jobs started from the console * for which the user explicitly specified a start time. Otherwise * most jobs are put into the job queue only when their - * scheduled time arives. + * scheduled time arrives. */ extern "C" void *sched_wait(void *arg) @@ -174,7 +165,7 @@ void *sched_wait(void *arg) Dmsg0(2300, "Enter sched_wait.\n"); free(arg); time_t wtime = jcr->sched_time - time(NULL); - set_jcr_job_status(jcr, JS_WaitStartTime); + jcr->setJobStatus(JS_WaitStartTime); /* Wait until scheduled time arrives */ if (wtime > 0) { Jmsg(jcr, M_INFO, 0, _("Job %s waiting %d seconds for scheduled start time.\n"), @@ -182,7 +173,7 @@ void *sched_wait(void *arg) } /* Check every 30 seconds if canceled */ while (wtime > 0) { - Dmsg3(2300, "Waiting on sched time, jobid=%d secs=%d use=%d\n", + Dmsg3(2300, "Waiting on sched time, jobid=%d secs=%d use=%d\n", jcr->JobId, wtime, jcr->use_count()); if (wtime > 30) { wtime = 30; @@ -200,6 +191,32 @@ void *sched_wait(void *arg) return NULL; } +/* Procedure to update the client->NumConcurrentJobs */ +static void update_client_numconcurrentjobs(JCR *jcr, int val) +{ + int num; + if (!jcr->client) { + return; + } + + switch (jcr->getJobType()) + { + case JT_MIGRATE: + case JT_COPY: + case JT_ADMIN: + break; + case JT_BACKUP: + /* Fall through wanted */ + default: + if (jcr->no_client_used() || jcr->wasVirtualFull) { + break; + } + num = jcr->client->getNumConcurrentJobs(); + jcr->client->setNumConcurrentJobs(num + val); + break; + } +} + /* * Add a job to the queue * jq is a queue that was created with jobq_init @@ -213,7 +230,7 @@ int jobq_add(jobq_t *jq, JCR *jcr) pthread_t id; wait_pkt *sched_pkt; - if (!jcr->term_wait_inited) { + if (!jcr->term_wait_inited) { /* Initialize termination condition variable */ if ((stat = pthread_cond_init(&jcr->term_wait, NULL)) != 0) { berrno be; @@ -221,8 +238,8 @@ int jobq_add(jobq_t *jq, JCR *jcr) return stat; } jcr->term_wait_inited = true; - } - + } + Dmsg3(2300, "jobq_add jobid=%d jcr=0x%x use_count=%d\n", jcr->JobId, jcr, jcr->use_count()); if (jq->valid != JOBQ_VALID) { Jmsg0(jcr, M_ERROR, 0, "Jobq_add queue not initialized.\n"); @@ -236,7 +253,7 @@ int jobq_add(jobq_t *jq, JCR *jcr) sched_pkt = (wait_pkt *)malloc(sizeof(wait_pkt)); sched_pkt->jcr = jcr; sched_pkt->jq = jq; - stat = pthread_create(&id, &jq->attr, sched_wait, (void *)sched_pkt); + stat = pthread_create(&id, &jq->attr, sched_wait, (void *)sched_pkt); if (stat != 0) { /* thread not created */ berrno be; Jmsg1(jcr, M_ERROR, 0, _("pthread_thread_create: ERR=%s\n"), be.bstrerror(stat)); @@ -342,7 +359,7 @@ static int start_server(jobq_t *jq) /* * if any threads are idle, wake one. - * Actually we do a broadcast because on /lib/tls + * Actually we do a broadcast because on /lib/tls * these signals seem to get lost from time to time. */ if (jq->idle_workers > 0) { @@ -437,6 +454,7 @@ void *jobq_server(void *arg) jq->running_jobs->append(je); /* Attach jcr to this thread while we run the job */ + jcr->my_thread_id = pthread_self(); jcr->set_killable(true); set_jcr_in_tsd(jcr); Dmsg1(2300, "Took jobid=%d from ready and appended to run\n", jcr->JobId); @@ -466,10 +484,12 @@ void *jobq_server(void *arg) * put into the ready queue. */ if (jcr->acquired_resource_locks) { + int num; dec_read_store(jcr); dec_write_store(jcr); - jcr->client->NumConcurrentJobs--; - jcr->job->NumConcurrentJobs--; + update_client_numconcurrentjobs(jcr, -1); + num = jcr->job->getNumConcurrentJobs() - 1; + jcr->job->setNumConcurrentJobs(num); jcr->acquired_resource_locks = false; } @@ -502,7 +522,7 @@ void *jobq_server(void *arg) running_allow_mix = true; for ( ; re; ) { Dmsg2(2300, "JobId %d is also running with %s\n", - re->jcr->JobId, + re->jcr->JobId, re->jcr->job->allow_mixed_priority ? "mix" : "no mix"); if (!re->jcr->job->allow_mixed_priority) { running_allow_mix = false; @@ -533,7 +553,7 @@ void *jobq_server(void *arg) if (!(jcr->JobPriority == Priority || (jcr->JobPriority < Priority && jcr->job->allow_mixed_priority && running_allow_mix))) { - set_jcr_job_status(jcr, JS_WaitPriority); + jcr->setJobStatus(JS_WaitPriority); break; } @@ -619,16 +639,18 @@ static bool reschedule_job(JCR *jcr, jobq_t *jq, jobq_item_t *je) /* Basic condition is that more reschedule times remain */ if (jcr->job->RescheduleTimes == 0 || jcr->reschedule_count < jcr->job->RescheduleTimes) { - resched = - /* Check for incomplete jobs */ - (jcr->job->RescheduleIncompleteJobs && - jcr->is_incomplete() && jcr->is_JobType(JT_BACKUP) && - !jcr->is_JobLevel(L_BASE)) || + + /* Check for incomplete jobs */ + if (jcr->is_incomplete()) { + resched = (jcr->RescheduleIncompleteJobs && jcr->is_JobType(JT_BACKUP) && + !(jcr->HasBase||jcr->is_JobLevel(L_BASE))); + } else { /* Check for failed jobs */ - (jcr->job->RescheduleOnError && - !jcr->is_JobStatus(JS_Terminated) && - !jcr->is_JobStatus(JS_Canceled) && - jcr->is_JobType(JT_BACKUP)); + resched = (jcr->job->RescheduleOnError && + !jcr->is_JobStatus(JS_Terminated) && + !jcr->is_JobStatus(JS_Canceled) && + jcr->is_JobType(JT_BACKUP)); + } } if (resched) { char dt[50], dt2[50]; @@ -637,7 +659,7 @@ static bool reschedule_job(JCR *jcr, jobq_t *jq, jobq_item_t *je) * Reschedule this job by cleaning it up, but * reuse the same JobId if possible. */ - jcr->incomplete = jcr->is_incomplete(); /* save incomplete status */ + jcr->rerunning = jcr->is_incomplete(); /* save incomplete status */ time_t now = time(NULL); jcr->reschedule_count++; jcr->sched_time = now + jcr->job->RescheduleInterval; @@ -649,16 +671,28 @@ static bool reschedule_job(JCR *jcr, jobq_t *jq, jobq_item_t *je) jcr->Job, dt, (int)jcr->job->RescheduleInterval, dt2); dird_free_jcr_pointers(jcr); /* partial cleanup old stuff */ jcr->JobStatus = -1; - set_jcr_job_status(jcr, JS_WaitStartTime); + jcr->setJobStatus(JS_WaitStartTime); jcr->SDJobStatus = 0; jcr->JobErrors = 0; if (!allow_duplicate_job(jcr)) { return false; } /* Only jobs with no output or Incomplete jobs can run on same JCR */ - if (jcr->JobBytes == 0 || jcr->incomplete) { + if (jcr->JobBytes == 0 || jcr->rerunning) { Dmsg2(2300, "Requeue job=%d use=%d\n", jcr->JobId, jcr->use_count()); V(jq->mutex); + /* + * Special test here since a Virtual Full gets marked + * as a Full, so we look at the resource record + */ + if (jcr->wasVirtualFull) { + jcr->setJobLevel(L_VIRTUAL_FULL); + } + /* + * When we are using the same jcr then make sure to reset + * RealEndTime back to zero. + */ + jcr->jr.RealEndTime = 0; jobq_add(jq, jcr); /* queue the job to run again */ P(jq->mutex); free_jcr(jcr); /* release jcr */ @@ -670,21 +704,49 @@ static bool reschedule_job(JCR *jcr, jobq_t *jq, jobq_item_t *je) * the old JobId or there will be database record * conflicts. We now create a new job, copying the * appropriate fields. - */ + */ JCR *njcr = new_jcr(sizeof(JCR), dird_free_jcr); set_jcr_defaults(njcr, jcr->job); + /* + * Eliminate the new job_end_push, then copy the one from + * the old job, and set the old one to be empty. + */ + void *v; + lock_jobs(); /* protect ourself from reload_config() */ + LockRes(); + foreach_alist(v, (&jcr->job_end_push)) { + njcr->job_end_push.append(v); + } + jcr->job_end_push.destroy(); + jcr->job_end_push.init(1, false); + UnlockRes(); + unlock_jobs(); + njcr->reschedule_count = jcr->reschedule_count; njcr->sched_time = jcr->sched_time; - njcr->set_JobLevel(jcr->getJobLevel()); + njcr->initial_sched_time = jcr->initial_sched_time; + /* + * Special test here since a Virtual Full gets marked + * as a Full, so we look at the resource record + */ + if (jcr->wasVirtualFull) { + njcr->setJobLevel(L_VIRTUAL_FULL); + } else { + njcr->setJobLevel(jcr->getJobLevel()); + } njcr->pool = jcr->pool; njcr->run_pool_override = jcr->run_pool_override; + njcr->next_pool = jcr->next_pool; + njcr->run_next_pool_override = jcr->run_next_pool_override; njcr->full_pool = jcr->full_pool; + njcr->vfull_pool = jcr->vfull_pool; njcr->run_full_pool_override = jcr->run_full_pool_override; + njcr->run_vfull_pool_override = jcr->run_vfull_pool_override; njcr->inc_pool = jcr->inc_pool; njcr->run_inc_pool_override = jcr->run_inc_pool_override; njcr->diff_pool = jcr->diff_pool; njcr->JobStatus = -1; - set_jcr_job_status(njcr, jcr->JobStatus); + njcr->setJobStatus(jcr->JobStatus); if (jcr->rstore) { copy_rstorage(njcr, jcr->rstorage, _("previous Job")); } else { @@ -729,56 +791,61 @@ static bool acquire_resources(JCR *jcr) #ifdef xxx if (jcr->rstore && jcr->rstore == jcr->wstore) { /* possible deadlock */ Jmsg(jcr, M_FATAL, 0, _("Job canceled. Attempt to read and write same device.\n" - " Read storage \"%s\" (From %s) -- Write storage \"%s\" (From %s)\n"), + " Read storage \"%s\" (From %s) -- Write storage \"%s\" (From %s)\n"), jcr->rstore->name(), jcr->rstore_source, jcr->wstore->name(), jcr->wstore_source); - set_jcr_job_status(jcr, JS_Canceled); + jcr->setJobStatus(JS_Canceled); return false; } #endif if (jcr->rstore) { Dmsg1(200, "Rstore=%s\n", jcr->rstore->name()); if (!inc_read_store(jcr)) { - Dmsg1(200, "Fail rncj=%d\n", jcr->rstore->NumConcurrentJobs); - set_jcr_job_status(jcr, JS_WaitStoreRes); + Dmsg1(200, "Fail rncj=%d\n", jcr->rstore->getNumConcurrentJobs()); + jcr->setJobStatus(JS_WaitStoreRes); return false; } } - + if (jcr->wstore) { Dmsg1(200, "Wstore=%s\n", jcr->wstore->name()); - if (jcr->wstore->NumConcurrentJobs < jcr->wstore->MaxConcurrentJobs) { - jcr->wstore->NumConcurrentJobs++; - Dmsg1(200, "Inc wncj=%d\n", jcr->wstore->NumConcurrentJobs); + int num = jcr->wstore->getNumConcurrentJobs(); + if (num < jcr->wstore->MaxConcurrentJobs) { + Dmsg1(200, "Inc wncj=%d\n", num + 1); + jcr->wstore->setNumConcurrentJobs(num + 1); } else if (jcr->rstore) { dec_read_store(jcr); skip_this_jcr = true; } else { - Dmsg1(200, "Fail wncj=%d\n", jcr->wstore->NumConcurrentJobs); + Dmsg1(200, "Fail wncj=%d\n", num); skip_this_jcr = true; } } if (skip_this_jcr) { - set_jcr_job_status(jcr, JS_WaitStoreRes); + jcr->setJobStatus(JS_WaitStoreRes); return false; } - if (jcr->client->NumConcurrentJobs < jcr->client->MaxConcurrentJobs) { - jcr->client->NumConcurrentJobs++; - } else { - /* Back out previous locks */ - dec_write_store(jcr); - dec_read_store(jcr); - set_jcr_job_status(jcr, JS_WaitClientRes); - return false; + if (jcr->client) { + if (jcr->client->getNumConcurrentJobs() < jcr->client->MaxConcurrentJobs) { + update_client_numconcurrentjobs(jcr, 1); + } else { + /* Back out previous locks */ + dec_write_store(jcr); + dec_read_store(jcr); + jcr->setJobStatus(JS_WaitClientRes); + return false; + } } - if (jcr->job->NumConcurrentJobs < jcr->job->MaxConcurrentJobs) { - jcr->job->NumConcurrentJobs++; + if (jcr->job->getNumConcurrentJobs() < jcr->job->MaxConcurrentJobs) { + int num; + num = jcr->job->getNumConcurrentJobs() + 1; + jcr->job->setNumConcurrentJobs(num); } else { /* Back out previous locks */ dec_write_store(jcr); dec_read_store(jcr); - jcr->client->NumConcurrentJobs--; - set_jcr_job_status(jcr, JS_WaitJobRes); + update_client_numconcurrentjobs(jcr, -1); + jcr->setJobStatus(JS_WaitJobRes); return false; } @@ -788,17 +855,27 @@ static bool acquire_resources(JCR *jcr) static pthread_mutex_t rstore_mutex = PTHREAD_MUTEX_INITIALIZER; -/* +/* * Note: inc_read_store() and dec_read_store() are * called from select_rstore() in src/dird/restore.c */ bool inc_read_store(JCR *jcr) { P(rstore_mutex); - if (jcr->rstore->NumConcurrentJobs < jcr->rstore->MaxConcurrentJobs) { - jcr->rstore->NumConcurrentReadJobs++; - jcr->rstore->NumConcurrentJobs++; - Dmsg1(200, "Inc rncj=%d\n", jcr->rstore->NumConcurrentJobs); + int num = jcr->rstore->getNumConcurrentJobs(); + int numread = jcr->rstore->getNumConcurrentReadJobs(); + int maxread = jcr->rstore->MaxConcurrentReadJobs; + if (num < jcr->rstore->MaxConcurrentJobs && + (jcr->getJobType() == JT_RESTORE || + numread == 0 || + maxread == 0 || /* No limit set */ + numread < maxread)) /* Below the limit */ + { + num++; + numread++; + jcr->rstore->setNumConcurrentReadJobs(numread); + jcr->rstore->setNumConcurrentJobs(num); + Dmsg1(200, "Inc rncj=%d\n", num); V(rstore_mutex); return true; } @@ -810,20 +887,20 @@ void dec_read_store(JCR *jcr) { if (jcr->rstore) { P(rstore_mutex); - jcr->rstore->NumConcurrentReadJobs--; /* back out rstore */ - jcr->rstore->NumConcurrentJobs--; /* back out rstore */ - Dmsg1(200, "Dec rncj=%d\n", jcr->rstore->NumConcurrentJobs); + int numread = jcr->rstore->getNumConcurrentReadJobs() - 1; + int num = jcr->rstore->getNumConcurrentJobs() - 1; + jcr->rstore->setNumConcurrentReadJobs(numread); + jcr->rstore->setNumConcurrentJobs(num); + Dmsg1(200, "Dec rncj=%d\n", num); V(rstore_mutex); - ASSERT(jcr->rstore->NumConcurrentReadJobs >= 0); - ASSERT(jcr->rstore->NumConcurrentJobs >= 0); } } static void dec_write_store(JCR *jcr) { if (jcr->wstore) { - jcr->wstore->NumConcurrentJobs--; - Dmsg1(200, "Dec wncj=%d\n", jcr->wstore->NumConcurrentJobs); - ASSERT(jcr->wstore->NumConcurrentJobs >= 0); + int num = jcr->wstore->getNumConcurrentJobs() - 1; + Dmsg1(200, "Dec wncj=%d\n", num); + jcr->wstore->setNumConcurrentJobs(num); } }