X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Fdird%2Fjobq.c;h=c4a4c8cb8b53b922c6a3f87ea20759d5422a373c;hb=0796a980f144632ab1faa901883e619ebd291d04;hp=aa28b7cf34e417b1f837a2511ac607f881b323f8;hpb=105ddc5b0f41e269c1f3d7e3a65cac37bc4207ca;p=bacula%2Fbacula diff --git a/bacula/src/dird/jobq.c b/bacula/src/dird/jobq.c old mode 100755 new mode 100644 index aa28b7cf34..c4a4c8cb8b --- a/bacula/src/dird/jobq.c +++ b/bacula/src/dird/jobq.c @@ -1,3 +1,30 @@ +/* + Bacula® - The Network Backup Solution + + Copyright (C) 2003-2007 Free Software Foundation Europe e.V. + + The main author of Bacula is Kern Sibbald, with contributions from + many others, a complete list can be found in the file AUTHORS. + This program is Free Software; you can redistribute it and/or + modify it under the terms of version two of the GNU General Public + License as published by the Free Software Foundation and included + in the file LICENSE. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + Bacula® is a registered trademark of John Walker. + The licensor of Bacula is the Free Software Foundation Europe + (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, + Switzerland, email:ftf@fsfeurope.org. +*/ /* * Bacula job queue routines. * @@ -17,20 +44,6 @@ * David R. Butenhof * */ -/* - Copyright (C) 2003-2006 Kern Sibbald - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - version 2 as amended with additional clauses defined in the - file LICENSE in the main source directory. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - the file LICENSE for additional details. - - */ #include "bacula.h" #include "dird.h" @@ -59,7 +72,7 @@ int jobq_init(jobq_t *jq, int threads, void *(*engine)(void *arg)) if ((stat = pthread_attr_init(&jq->attr)) != 0) { berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_attr_init: ERR=%s\n"), be.strerror(stat)); + Jmsg1(NULL, M_ERROR, 0, _("pthread_attr_init: ERR=%s\n"), be.bstrerror(stat)); return stat; } if ((stat = pthread_attr_setdetachstate(&jq->attr, PTHREAD_CREATE_DETACHED)) != 0) { @@ -68,13 +81,13 @@ int jobq_init(jobq_t *jq, int threads, void *(*engine)(void *arg)) } if ((stat = pthread_mutex_init(&jq->mutex, NULL)) != 0) { berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_init: ERR=%s\n"), be.strerror(stat)); + Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_init: ERR=%s\n"), be.bstrerror(stat)); pthread_attr_destroy(&jq->attr); return stat; } if ((stat = pthread_cond_init(&jq->work, NULL)) != 0) { berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_cond_init: ERR=%s\n"), be.strerror(stat)); + Jmsg1(NULL, M_ERROR, 0, _("pthread_cond_init: ERR=%s\n"), be.bstrerror(stat)); pthread_mutex_destroy(&jq->mutex); pthread_attr_destroy(&jq->attr); return stat; @@ -107,7 +120,7 @@ int jobq_destroy(jobq_t *jq) } if ((stat = pthread_mutex_lock(&jq->mutex)) != 0) { berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.strerror(stat)); + Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.bstrerror(stat)); return stat; } jq->valid = 0; /* prevent any more operations */ @@ -120,7 +133,7 @@ int jobq_destroy(jobq_t *jq) if (jq->idle_workers) { if ((stat = pthread_cond_broadcast(&jq->work)) != 0) { berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_cond_broadcast: ERR=%s\n"), be.strerror(stat)); + Jmsg1(NULL, M_ERROR, 0, _("pthread_cond_broadcast: ERR=%s\n"), be.bstrerror(stat)); pthread_mutex_unlock(&jq->mutex); return stat; } @@ -128,7 +141,7 @@ int jobq_destroy(jobq_t *jq) while (jq->num_workers > 0) { if ((stat = pthread_cond_wait(&jq->work, &jq->mutex)) != 0) { berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_cond_wait: ERR=%s\n"), be.strerror(stat)); + Jmsg1(NULL, M_ERROR, 0, _("pthread_cond_wait: ERR=%s\n"), be.bstrerror(stat)); pthread_mutex_unlock(&jq->mutex); return stat; } @@ -136,7 +149,7 @@ int jobq_destroy(jobq_t *jq) } if ((stat = pthread_mutex_unlock(&jq->mutex)) != 0) { berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_unlock: ERR=%s\n"), be.strerror(stat)); + Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_unlock: ERR=%s\n"), be.bstrerror(stat)); return stat; } stat = pthread_mutex_destroy(&jq->mutex); @@ -212,7 +225,7 @@ int jobq_add(jobq_t *jq, JCR *jcr) /* Initialize termination condition variable */ if ((stat = pthread_cond_init(&jcr->term_wait, NULL)) != 0) { berrno be; - Jmsg1(jcr, M_FATAL, 0, _("Unable to init job cond variable: ERR=%s\n"), be.strerror(stat)); + Jmsg1(jcr, M_FATAL, 0, _("Unable to init job cond variable: ERR=%s\n"), be.bstrerror(stat)); return stat; } jcr->term_wait_inited = true; @@ -234,14 +247,14 @@ int jobq_add(jobq_t *jq, JCR *jcr) stat = pthread_create(&id, &jq->attr, sched_wait, (void *)sched_pkt); if (stat != 0) { /* thread not created */ berrno be; - Jmsg1(jcr, M_ERROR, 0, _("pthread_thread_create: ERR=%s\n"), be.strerror(stat)); + Jmsg1(jcr, M_ERROR, 0, _("pthread_thread_create: ERR=%s\n"), be.bstrerror(stat)); } return stat; } if ((stat = pthread_mutex_lock(&jq->mutex)) != 0) { berrno be; - Jmsg1(jcr, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.strerror(stat)); + Jmsg1(jcr, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.bstrerror(stat)); free_jcr(jcr); /* release jcr */ return stat; } @@ -306,7 +319,7 @@ int jobq_remove(jobq_t *jq, JCR *jcr) if ((stat = pthread_mutex_lock(&jq->mutex)) != 0) { berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.strerror(stat)); + Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.bstrerror(stat)); return stat; } @@ -352,7 +365,7 @@ static int start_server(jobq_t *jq) Dmsg0(2300, "Signal worker to wake up\n"); if ((stat = pthread_cond_broadcast(&jq->work)) != 0) { berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_cond_signal: ERR=%s\n"), be.strerror(stat)); + Jmsg1(NULL, M_ERROR, 0, _("pthread_cond_signal: ERR=%s\n"), be.bstrerror(stat)); return stat; } } else if (jq->num_workers < jq->max_workers) { @@ -361,7 +374,7 @@ static int start_server(jobq_t *jq) set_thread_concurrency(jq->max_workers + 1); if ((stat = pthread_create(&id, &jq->attr, jobq_server, (void *)jq)) != 0) { berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_create: ERR=%s\n"), be.strerror(stat)); + Jmsg1(NULL, M_ERROR, 0, _("pthread_create: ERR=%s\n"), be.bstrerror(stat)); return stat; } } @@ -387,7 +400,7 @@ void *jobq_server(void *arg) Dmsg0(2300, "Start jobq_server\n"); if ((stat = pthread_mutex_lock(&jq->mutex)) != 0) { berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.strerror(stat)); + Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.bstrerror(stat)); return NULL; } jq->num_workers++; @@ -440,6 +453,7 @@ void *jobq_server(void *arg) } } jq->running_jobs->append(je); + set_jcr_in_tsd(jcr); Dmsg1(2300, "Took jobid=%d from ready and appended to run\n", jcr->JobId); /* Release job queue lock */ @@ -463,9 +477,17 @@ void *jobq_server(void *arg) * put into the ready queue. */ if (jcr->acquired_resource_locks) { - jcr->store->NumConcurrentJobs--; + if (jcr->rstore) { + jcr->rstore->NumConcurrentJobs = 0; + Dmsg1(200, "Dec rncj=%d\n", jcr->rstore->NumConcurrentJobs); + } + if (jcr->wstore) { + jcr->wstore->NumConcurrentJobs--; + Dmsg1(200, "Dec wncj=%d\n", jcr->wstore->NumConcurrentJobs); + } jcr->client->NumConcurrentJobs--; jcr->job->NumConcurrentJobs--; + jcr->acquired_resource_locks = false; } /* @@ -474,7 +496,6 @@ void *jobq_server(void *arg) if (jcr->job->RescheduleOnError && jcr->JobStatus != JS_Terminated && jcr->JobStatus != JS_Canceled && - jcr->job->RescheduleTimes > 0 && jcr->JobType == JT_BACKUP && (jcr->job->RescheduleTimes == 0 || jcr->reschedule_count < jcr->job->RescheduleTimes)) { @@ -519,7 +540,16 @@ void *jobq_server(void *arg) njcr->JobLevel = jcr->JobLevel; njcr->JobStatus = -1; set_jcr_job_status(njcr, jcr->JobStatus); - copy_storage(njcr, jcr->storage); + if (jcr->rstore) { + copy_rstorage(njcr, jcr->rstorage, _("previous Job")); + } else { + free_rstorage(njcr); + } + if (jcr->wstore) { + copy_wstorage(njcr, jcr->wstorage, _("previous Job")); + } else { + free_wstorage(njcr); + } njcr->messages = jcr->messages; Dmsg0(2300, "Call to run new job\n"); V(jq->mutex); @@ -529,10 +559,6 @@ void *jobq_server(void *arg) Dmsg0(2300, "Back from running new job.\n"); } /* Clean up and release old jcr */ - if (jcr->db) { - db_close_database(jcr, jcr->db); - jcr->db = NULL; - } Dmsg2(2300, "====== Termination job=%d use_cnt=%d\n", jcr->JobId, jcr->use_count()); jcr->SDJobStatus = 0; V(jq->mutex); /* release internal lock */ @@ -575,14 +601,19 @@ void *jobq_server(void *arg) } if (!acquire_resources(jcr)) { - je = jn; /* point to next waiting job */ - continue; + /* If resource conflict, job is canceled */ + if (!job_canceled(jcr)) { + je = jn; /* point to next waiting job */ + continue; + } } - /* Got all locks, now remove it from wait queue and append it - * to the ready queue + /* + * Got all locks, now remove it from wait queue and append it + * to the ready queue. Note, we may also get here if the + * job was canceled. Once it is "run", it will quickly + * terminate. */ - jcr->acquired_resource_locks = true; jq->waiting_jobs->remove(je); jq->ready_jobs->append(je); Dmsg1(2300, "moved JobId=%d from wait to ready queue\n", je->jcr->JobId); @@ -649,26 +680,49 @@ static bool acquire_resources(JCR *jcr) { bool skip_this_jcr = false; - if (jcr->JobType == JT_RESTORE || jcr->JobType == JT_VERIFY) { - /* - * Let only one Restore/verify job run at a time regardless - * of MaxConcurrentJobs. - */ - if (jcr->store->NumConcurrentJobs == 0) { - jcr->store->NumConcurrentJobs = 1; + jcr->acquired_resource_locks = false; + if (jcr->rstore) { + Dmsg1(200, "Rstore=%s\n", jcr->rstore->name()); + /* + * Let only one Restore/Verify job run at a time regardless + * of MaxConcurrentjobs. + */ + if (jcr->rstore->NumConcurrentJobs == 0) { + jcr->rstore->NumConcurrentJobs = 1; + Dmsg0(200, "Set rncj=1\n"); } else { + Dmsg1(200, "Fail rncj=%d\n", jcr->rstore->NumConcurrentJobs); set_jcr_job_status(jcr, JS_WaitStoreRes); return false; } - /* We are not doing a Restore or Verify */ - } else if (jcr->store->NumConcurrentJobs == 0 && - jcr->store->NumConcurrentJobs < jcr->store->MaxConcurrentJobs) { - /* Simple case, first job */ - jcr->store->NumConcurrentJobs = 1; - } else if (jcr->store->NumConcurrentJobs < jcr->store->MaxConcurrentJobs) { - jcr->store->NumConcurrentJobs++; - } else { - skip_this_jcr = true; + } + + if (jcr->wstore) { + Dmsg1(200, "Wstore=%s\n", jcr->wstore->name()); + if (jcr->rstore == jcr->wstore) { /* deadlock */ + jcr->rstore->NumConcurrentJobs = 0; /* back out rstore */ + Jmsg(jcr, M_FATAL, 0, _("Job canceled. Attempt to read and write same device.\n" + " Read storage \"%s\" (From %s) -- Write storage \"%s\" (From %s)\n"), + jcr->rstore->name(), jcr->rstore_source, jcr->wstore->name(), jcr->wstore_source); + set_jcr_job_status(jcr, JS_Canceled); + return false; + } + if (jcr->wstore->NumConcurrentJobs == 0 && + jcr->wstore->NumConcurrentJobs < jcr->wstore->MaxConcurrentJobs) { + /* Simple case, first job */ + jcr->wstore->NumConcurrentJobs = 1; + Dmsg0(200, "Set wncj=1\n"); + } else if (jcr->wstore->NumConcurrentJobs < jcr->wstore->MaxConcurrentJobs) { + jcr->wstore->NumConcurrentJobs++; + Dmsg1(200, "Inc wncj=%d\n", jcr->wstore->NumConcurrentJobs); + } else if (jcr->rstore) { + jcr->rstore->NumConcurrentJobs = 0; /* back out rstore */ + Dmsg1(200, "Fail wncj=%d\n", jcr->wstore->NumConcurrentJobs); + skip_this_jcr = true; + } else { + Dmsg1(200, "Fail wncj=%d\n", jcr->wstore->NumConcurrentJobs); + skip_this_jcr = true; + } } if (skip_this_jcr) { set_jcr_job_status(jcr, JS_WaitStoreRes); @@ -679,7 +733,14 @@ static bool acquire_resources(JCR *jcr) jcr->client->NumConcurrentJobs++; } else { /* Back out previous locks */ - jcr->store->NumConcurrentJobs--; + if (jcr->wstore) { + jcr->wstore->NumConcurrentJobs--; + Dmsg1(200, "Dec wncj=%d\n", jcr->wstore->NumConcurrentJobs); + } + if (jcr->rstore) { + jcr->rstore->NumConcurrentJobs = 0; + Dmsg1(200, "Dec rncj=%d\n", jcr->rstore->NumConcurrentJobs); + } set_jcr_job_status(jcr, JS_WaitClientRes); return false; } @@ -687,12 +748,19 @@ static bool acquire_resources(JCR *jcr) jcr->job->NumConcurrentJobs++; } else { /* Back out previous locks */ - jcr->store->NumConcurrentJobs--; + if (jcr->wstore) { + jcr->wstore->NumConcurrentJobs--; + Dmsg1(200, "Dec wncj=%d\n", jcr->wstore->NumConcurrentJobs); + } + if (jcr->rstore) { + jcr->rstore->NumConcurrentJobs = 0; + Dmsg1(200, "Dec rncj=%d\n", jcr->rstore->NumConcurrentJobs); + } jcr->client->NumConcurrentJobs--; set_jcr_job_status(jcr, JS_WaitJobRes); return false; } - /* Check actual device availability */ - /* ***FIXME****/ + + jcr->acquired_resource_locks = true; return true; }