X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Fdird%2Fjob.c;h=48556976e74f34e007532d6dbb409f4838ac3274;hb=e1e7c2aaef15193daad550d12c2ca4c65e76778c;hp=7efd1a3de99ac440ca30cb74adc4e8a1e8519530;hpb=53f3f6e4ca4b44166668ca9a7647f67fd934162f;p=bacula%2Fbacula diff --git a/bacula/src/dird/job.c b/bacula/src/dird/job.c index 7efd1a3de9..48556976e7 100644 --- a/bacula/src/dird/job.c +++ b/bacula/src/dird/job.c @@ -1,7 +1,7 @@ /* Bacula® - The Network Backup Solution - Copyright (C) 2000-2009 Free Software Foundation Europe e.V. + Copyright (C) 2000-2010 Free Software Foundation Europe e.V. The main author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. @@ -31,7 +31,6 @@ * * Kern Sibbald, October MM * - * Version $Id$ */ #include "bacula.h" @@ -140,7 +139,6 @@ bool setup_job(JCR *jcr) goto bail_out; } Dmsg0(150, "DB opened\n"); - if (!jcr->fname) { jcr->fname = get_pool_memory(PM_FNAME); } @@ -196,7 +194,7 @@ bool setup_job(JCR *jcr) * this allows us to setup a proper job start record for restarting * in case of later errors. */ - switch (jcr->get_JobType()) { + switch (jcr->getJobType()) { case JT_BACKUP: if (!do_backup_init(jcr)) { backup_cleanup(jcr, JS_ErrorTerminated); @@ -229,8 +227,8 @@ bool setup_job(JCR *jcr) } break; default: - Pmsg1(0, _("Unimplemented job type: %d\n"), jcr->get_JobType()); - set_jcr_job_status(jcr, JS_ErrorTerminated); + Pmsg1(0, _("Unimplemented job type: %d\n"), jcr->getJobType()); + jcr->setJobStatus(JS_ErrorTerminated); goto bail_out; } @@ -246,7 +244,7 @@ bail_out: void update_job_end(JCR *jcr, int TermCode) { dequeue_messages(jcr); /* display any queued messages */ - set_jcr_job_status(jcr, TermCode); + jcr->setJobStatus(TermCode); update_job_end_record(jcr); } @@ -310,7 +308,7 @@ static void *job_thread(void *arg) generate_job_event(jcr, "JobRun"); generate_plugin_event(jcr, bEventJobRun); - switch (jcr->get_JobType()) { + switch (jcr->getJobType()) { case JT_BACKUP: if (!job_canceled(jcr) && do_backup(jcr)) { do_autoprune(jcr); @@ -348,7 +346,7 @@ static void *job_thread(void *arg) } break; default: - Pmsg1(0, _("Unimplemented job type: %d\n"), jcr->get_JobType()); + Pmsg1(0, _("Unimplemented job type: %d\n"), jcr->getJobType()); break; } @@ -380,7 +378,7 @@ bool cancel_job(UAContext *ua, JCR *jcr) char ed1[50]; int32_t old_status = jcr->JobStatus; - set_jcr_job_status(jcr, JS_Canceled); + jcr->setJobStatus(JS_Canceled); switch (old_status) { case JS_Created: @@ -412,6 +410,11 @@ bool cancel_job(UAContext *ua, JCR *jcr) fd->signal(BNET_TERMINATE); fd->close(); ua->jcr->file_bsock = NULL; + jcr->file_bsock->set_terminated(); + if (jcr->my_thread_id) { + pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL); + Dmsg1(800, "Send kill to jid=%d\n", jcr->JobId); + } } /* Cancel Storage daemon */ @@ -445,6 +448,15 @@ bool cancel_job(UAContext *ua, JCR *jcr) sd->signal(BNET_TERMINATE); sd->close(); ua->jcr->store_bsock = NULL; + jcr->store_bsock->set_timed_out(); + jcr->store_bsock->set_terminated(); + if (jcr->SD_msg_chan) { + Dmsg2(400, "kill jobid=%d use=%d\n", (int)jcr->JobId, jcr->use_count()); + pthread_kill(jcr->SD_msg_chan, TIMEOUT_SIGNAL); + } + if (jcr->my_thread_id) { + pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL); + } } break; } @@ -492,6 +504,15 @@ void cancel_storage_daemon_job(JCR *jcr) sd->close(); ua->jcr->store_bsock = NULL; jcr->sd_canceled = true; + jcr->store_bsock->set_timed_out(); + jcr->store_bsock->set_terminated(); + if (jcr->SD_msg_chan) { + Dmsg2(400, "kill jobid=%d use=%d\n", (int)jcr->JobId, jcr->use_count()); + pthread_kill(jcr->SD_msg_chan, TIMEOUT_SIGNAL); + } + if (jcr->my_thread_id) { + pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL); + } } bail_out: free_jcr(control_jcr); @@ -524,17 +545,17 @@ static void job_monitor_watchdog(watchdog_t *self) /* check MaxWaitTime */ if (job_check_maxwaittime(jcr)) { - set_jcr_job_status(jcr, JS_Canceled); + jcr->setJobStatus(JS_Canceled); Qmsg(jcr, M_FATAL, 0, _("Max wait time exceeded. Job canceled.\n")); cancel = true; /* check MaxRunTime */ } else if (job_check_maxruntime(jcr)) { - set_jcr_job_status(jcr, JS_Canceled); + jcr->setJobStatus(JS_Canceled); Qmsg(jcr, M_FATAL, 0, _("Max run time exceeded. Job canceled.\n")); cancel = true; /* check MaxRunSchedTime */ } else if (job_check_maxschedruntime(jcr)) { - set_jcr_job_status(jcr, JS_Canceled); + jcr->setJobStatus(JS_Canceled); Qmsg(jcr, M_FATAL, 0, _("Max sched run time exceeded. Job canceled.\n")); cancel = true; } @@ -603,15 +624,15 @@ static bool job_check_maxruntime(JCR *jcr) watchdog_time, jcr->start_time, run_time, job->MaxRunTime, job->FullMaxRunTime, job->IncMaxRunTime, job->DiffMaxRunTime); - if (jcr->get_JobLevel() == L_FULL && job->FullMaxRunTime != 0 && + if (jcr->getJobLevel() == L_FULL && job->FullMaxRunTime != 0 && run_time >= job->FullMaxRunTime) { Dmsg0(200, "check_maxwaittime: FullMaxcancel\n"); cancel = true; - } else if (jcr->get_JobLevel() == L_DIFFERENTIAL && job->DiffMaxRunTime != 0 && + } else if (jcr->getJobLevel() == L_DIFFERENTIAL && job->DiffMaxRunTime != 0 && run_time >= job->DiffMaxRunTime) { Dmsg0(200, "check_maxwaittime: DiffMaxcancel\n"); cancel = true; - } else if (jcr->get_JobLevel() == L_INCREMENTAL && job->IncMaxRunTime != 0 && + } else if (jcr->getJobLevel() == L_INCREMENTAL && job->IncMaxRunTime != 0 && run_time >= job->IncMaxRunTime) { Dmsg0(200, "check_maxwaittime: IncMaxcancel\n"); cancel = true; @@ -675,58 +696,102 @@ DBId_t get_or_create_pool_record(JCR *jcr, char *pool_name) bool allow_duplicate_job(JCR *jcr) { JOB *job = jcr->job; - JCR *djcr; /* possible duplicate */ + JCR *djcr; /* possible duplicate job */ if (job->AllowDuplicateJobs) { return true; } - if (!job->AllowHigherDuplicates) { - foreach_jcr(djcr) { - if (jcr == djcr || djcr->JobId == 0) { - continue; /* do not cancel this job or consoles */ + Dmsg0(800, "Enter allow_duplicate_job\n"); + /* + * After this point, we do not want to allow any duplicate + * job to run. + */ + + foreach_jcr(djcr) { + if (jcr == djcr || djcr->JobId == 0) { + continue; /* do not cancel this job or consoles */ + } + if (strcmp(job->name(), djcr->job->name()) == 0) { + bool cancel_dup = false; + bool cancel_me = false; + if (job->DuplicateJobProximity > 0) { + utime_t now = (utime_t)time(NULL); + if ((now - djcr->start_time) > job->DuplicateJobProximity) { + continue; /* not really a duplicate */ + } } - if (strcmp(job->name(), djcr->job->name()) == 0) { - bool cancel_queued = false; - if (job->DuplicateJobProximity > 0) { - utime_t now = (utime_t)time(NULL); - if ((now - djcr->start_time) > job->DuplicateJobProximity) { - continue; /* not really a duplicate */ + if (job->CancelLowerLevelDuplicates && + djcr->getJobType() == 'B' && jcr->getJobType() == 'B') { + switch (jcr->getJobLevel()) { + case L_FULL: + if (djcr->getJobLevel() == L_DIFFERENTIAL || + djcr->getJobLevel() == L_INCREMENTAL) { + cancel_dup = true; + } + break; + case L_DIFFERENTIAL: + if (djcr->getJobLevel() == L_INCREMENTAL) { + cancel_dup = true; + } + if (djcr->getJobLevel() == L_FULL) { + cancel_me = true; + } + break; + case L_INCREMENTAL: + if (djcr->getJobLevel() == L_FULL || + djcr->getJobLevel() == L_DIFFERENTIAL) { + cancel_me = true; } } - /* Cancel */ - /* If CancelQueuedDuplicates is set do so only if job is queued */ - if (job->CancelQueuedDuplicates) { - switch (djcr->JobStatus) { - case JS_Created: - case JS_WaitJobRes: - case JS_WaitClientRes: - case JS_WaitStoreRes: - case JS_WaitPriority: - case JS_WaitMaxJobs: - case JS_WaitStartTime: - cancel_queued = true; - break; - default: - break; - } - } - if (cancel_queued || job->CancelRunningDuplicates) { - UAContext *ua = new_ua_context(djcr); - Jmsg(jcr, M_INFO, 0, _("Cancelling duplicate JobId=%d.\n"), djcr->JobId); - ua->jcr = djcr; - cancel_job(ua, djcr); - free_ua_context(ua); - Dmsg2(800, "Have cancelled JCR %p JobId=%d\n", djcr, djcr->JobId); - } else { - /* Zap current job */ - Jmsg(jcr, M_FATAL, 0, _("JobId %d already running. Duplicate job not allowed.\n"), - djcr->JobId); + /* + * cancel_dup will be done below + */ + if (cancel_me) { + /* Zap current job */ + Jmsg(jcr, M_FATAL, 0, _("JobId %d already running. Duplicate job not allowed.\n"), + djcr->JobId); + break; /* get out of foreach_jcr */ } - break; /* did our work, get out */ + } + /* Cancel one of the two jobs (me or dup) */ + /* If CancelQueuedDuplicates is set do so only if job is queued */ + if (job->CancelQueuedDuplicates) { + switch (djcr->JobStatus) { + case JS_Created: + case JS_WaitJobRes: + case JS_WaitClientRes: + case JS_WaitStoreRes: + case JS_WaitPriority: + case JS_WaitMaxJobs: + case JS_WaitStartTime: + cancel_dup = true; /* cancel queued duplicate */ + break; + default: + break; + } + } + if (cancel_dup || job->CancelRunningDuplicates) { + /* Zap the duplicated job djcr */ + UAContext *ua = new_ua_context(jcr); + Jmsg(jcr, M_INFO, 0, _("Cancelling duplicate JobId=%d.\n"), djcr->JobId); + cancel_job(ua, djcr); + bmicrosleep(0, 500000); + cancel_job(ua, djcr); + free_ua_context(ua); + Dmsg2(800, "Cancel dup %p JobId=%d\n", djcr, djcr->JobId); + } else { + /* Zap current job */ + Jmsg(jcr, M_FATAL, 0, _("JobId %d already running. Duplicate job not allowed.\n"), + djcr->JobId); + Dmsg2(800, "Cancel me %p JobId=%d\n", jcr, jcr->JobId); } + Dmsg4(800, "curJobId=%d use_cnt=%d dupJobId=%d use_cnt=%d\n", + jcr->JobId, jcr->use_count(), djcr->JobId, djcr->use_count()); + break; /* did our work, get out of foreach loop */ } - endeach_jcr(djcr); } + endeach_jcr(djcr); + return true; } @@ -740,7 +805,7 @@ void apply_pool_overrides(JCR *jcr) /* * Apply any level related Pool selections */ - switch (jcr->get_JobLevel()) { + switch (jcr->getJobLevel()) { case L_FULL: if (jcr->full_pool) { jcr->pool = jcr->full_pool; @@ -858,8 +923,8 @@ void init_jcr_job_record(JCR *jcr) jcr->jr.SchedTime = jcr->sched_time; jcr->jr.StartTime = jcr->start_time; jcr->jr.EndTime = 0; /* perhaps rescheduled, clear it */ - jcr->jr.JobType = jcr->get_JobType(); - jcr->jr.JobLevel = jcr->get_JobLevel(); + jcr->jr.JobType = jcr->getJobType(); + jcr->jr.JobLevel = jcr->getJobLevel(); jcr->jr.JobStatus = jcr->JobStatus; jcr->jr.JobId = jcr->JobId; bstrncpy(jcr->jr.Name, jcr->job->name(), sizeof(jcr->jr.Name)); @@ -947,14 +1012,6 @@ void create_unique_job_name(JCR *jcr, const char *base_name) /* Called directly from job rescheduling */ void dird_free_jcr_pointers(JCR *jcr) { - if (jcr->sd_auth_key) { - free(jcr->sd_auth_key); - jcr->sd_auth_key = NULL; - } - if (jcr->where) { - free(jcr->where); - jcr->where = NULL; - } if (jcr->file_bsock) { Dmsg0(200, "Close File bsock\n"); bnet_close(jcr->file_bsock); @@ -965,27 +1022,16 @@ void dird_free_jcr_pointers(JCR *jcr) bnet_close(jcr->store_bsock); jcr->store_bsock = NULL; } - if (jcr->fname) { - Dmsg0(200, "Free JCR fname\n"); - free_pool_memory(jcr->fname); - jcr->fname = NULL; - } - if (jcr->RestoreBootstrap) { - free(jcr->RestoreBootstrap); - jcr->RestoreBootstrap = NULL; - } - if (jcr->client_uname) { - free_pool_memory(jcr->client_uname); - jcr->client_uname = NULL; - } - if (jcr->attr) { - free_pool_memory(jcr->attr); - jcr->attr = NULL; - } - if (jcr->ar) { - free(jcr->ar); - jcr->ar = NULL; - } + + bfree_and_null(jcr->sd_auth_key); + bfree_and_null(jcr->where); + bfree_and_null(jcr->RestoreBootstrap); + bfree_and_null(jcr->ar); + + free_and_null_pool_memory(jcr->JobIds); + free_and_null_pool_memory(jcr->client_uname); + free_and_null_pool_memory(jcr->attr); + free_and_null_pool_memory(jcr->fname); } /* @@ -1011,36 +1057,14 @@ void dird_free_jcr(JCR *jcr) db_close_database(jcr, jcr->db); jcr->db = NULL; } - if (jcr->stime) { - Dmsg0(200, "Free JCR stime\n"); - free_pool_memory(jcr->stime); - jcr->stime = NULL; - } - if (jcr->fname) { - Dmsg0(200, "Free JCR fname\n"); - free_pool_memory(jcr->fname); - jcr->fname = NULL; - } - if (jcr->pool_source) { - free_pool_memory(jcr->pool_source); - jcr->pool_source = NULL; - } - if (jcr->catalog_source) { - free_pool_memory(jcr->catalog_source); - jcr->catalog_source = NULL; - } - if (jcr->rpool_source) { - free_pool_memory(jcr->rpool_source); - jcr->rpool_source = NULL; - } - if (jcr->wstore_source) { - free_pool_memory(jcr->wstore_source); - jcr->wstore_source = NULL; - } - if (jcr->rstore_source) { - free_pool_memory(jcr->rstore_source); - jcr->rstore_source = NULL; - } + + free_and_null_pool_memory(jcr->stime); + free_and_null_pool_memory(jcr->fname); + free_and_null_pool_memory(jcr->pool_source); + free_and_null_pool_memory(jcr->catalog_source); + free_and_null_pool_memory(jcr->rpool_source); + free_and_null_pool_memory(jcr->wstore_source); + free_and_null_pool_memory(jcr->rstore_source); /* Delete lists setup to hold storage pointers */ free_rwstorage(jcr); @@ -1094,7 +1118,7 @@ void set_jcr_defaults(JCR *jcr, JOB *job) jcr->set_JobType(job->JobType); jcr->JobStatus = JS_Created; - switch (jcr->get_JobType()) { + switch (jcr->getJobType()) { case JT_ADMIN: jcr->set_JobLevel(L_NONE); break; @@ -1156,8 +1180,8 @@ void set_jcr_defaults(JCR *jcr, JOB *job) /* This can be overridden by Console program */ jcr->verify_job = job->verify_job; /* If no default level given, set one */ - if (jcr->get_JobLevel() == 0) { - switch (jcr->get_JobType()) { + if (jcr->getJobLevel() == 0) { + switch (jcr->getJobType()) { case JT_VERIFY: jcr->set_JobLevel(L_VERIFY_CATALOG); break;