From f3a0ff8116933b7d469f7b95355083aaed07eb9d Mon Sep 17 00:00:00 2001 From: Kern Sibbald Date: Fri, 18 Jun 2010 23:43:38 +0200 Subject: [PATCH] Attempt to fix duplicate job kill seg fault --- bacula/src/dird/jobq.c | 4 +++- bacula/src/dird/msgchan.c | 2 +- bacula/src/jcr.h | 4 +++- bacula/src/lib/jcr.c | 43 ++++++++++++++++++++------------------- bacula/src/lib/protos.h | 2 +- 5 files changed, 30 insertions(+), 25 deletions(-) diff --git a/bacula/src/dird/jobq.c b/bacula/src/dird/jobq.c index 68febb4075..0e4d68898d 100644 --- a/bacula/src/dird/jobq.c +++ b/bacula/src/dird/jobq.c @@ -170,7 +170,7 @@ void *sched_wait(void *arg) JCR *jcr = ((wait_pkt *)arg)->jcr; jobq_t *jq = ((wait_pkt *)arg)->jq; - set_jcr_in_tsd(jcr); + set_jcr_in_tsd(INVALID_JCR); Dmsg0(2300, "Enter sched_wait.\n"); free(arg); time_t wtime = jcr->sched_time - time(NULL); @@ -437,6 +437,7 @@ void *jobq_server(void *arg) jq->running_jobs->append(je); /* Attach jcr to this thread while we run the job */ + jcr->set_killable(true); set_jcr_in_tsd(jcr); Dmsg1(2300, "Took jobid=%d from ready and appended to run\n", jcr->JobId); @@ -450,6 +451,7 @@ void *jobq_server(void *arg) /* Job finished detach from thread */ remove_jcr_from_tsd(je->jcr); + je->jcr->set_killable(false); Dmsg2(2300, "Back from user engine jobid=%d use=%d.\n", jcr->JobId, jcr->use_count()); diff --git a/bacula/src/dird/msgchan.c b/bacula/src/dird/msgchan.c index f354fb3b45..b8dc297244 100644 --- a/bacula/src/dird/msgchan.c +++ b/bacula/src/dird/msgchan.c @@ -379,7 +379,7 @@ extern "C" void *msg_thread(void *arg) uint64_t JobBytes; pthread_detach(pthread_self()); - set_jcr_in_tsd(jcr, false /* no thread update in jcr */); + set_jcr_in_tsd(jcr); jcr->SD_msg_chan = pthread_self(); pthread_cleanup_push(msg_thread_cleanup, arg); sd = jcr->store_bsock; diff --git a/bacula/src/jcr.h b/bacula/src/jcr.h index 2d38956798..71a3362f58 100644 --- a/bacula/src/jcr.h +++ b/bacula/src/jcr.h @@ -178,6 +178,7 @@ private: volatile int32_t _use_count; /* use count */ int32_t m_JobType; /* backup, restore, verify ... */ int32_t m_JobLevel; /* Job level */ + bool my_thread_killable; /* can we kill the thread? */ public: void lock() {P(mutex); }; void unlock() {V(mutex); }; @@ -203,10 +204,11 @@ public: void setJobStatus(int JobStatus); /* in lib/jcr.c */ bool JobReads(); /* in lib/jcr.c */ void my_thread_send_signal(int sig); /* in lib/jcr.c */ + void set_killable(bool killable); /* in lib/jcr.c */ + bool is_killable() const { return my_thread_killable; }; /* Global part of JCR common to all daemons */ dlink link; /* JCR chain link */ - bool my_thread_running; /* is the thread controlling jcr running*/ pthread_t my_thread_id; /* id of thread controlling jcr */ BSOCK *dir_bsock; /* Director bsock or NULL if we are him */ BSOCK *store_bsock; /* Storage connection socket */ diff --git a/bacula/src/lib/jcr.c b/bacula/src/lib/jcr.c index 445340f64f..9118bf7878 100644 --- a/bacula/src/lib/jcr.c +++ b/bacula/src/lib/jcr.c @@ -365,7 +365,6 @@ JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr) jcr->setJobType(JT_SYSTEM); /* internal job until defined */ jcr->setJobLevel(L_NONE); jcr->setJobStatus(JS_Created); /* ready to run */ - set_jcr_in_tsd(jcr); sigtimer.sa_flags = 0; sigtimer.sa_handler = timeout_handler; sigfillset(&sigtimer.sa_mask); @@ -413,6 +412,7 @@ static void free_common_jcr(JCR *jcr) { /* Uses jcr lock/unlock */ remove_jcr_from_tsd(jcr); + jcr->set_killable(false); jcr->destroy_mutex(); @@ -588,20 +588,29 @@ void remove_jcr_from_tsd(JCR *jcr) { JCR *tjcr = get_jcr_from_tsd(); if (tjcr == jcr) { - jcr->lock(); - jcr->my_thread_running = false; - memset(&jcr->my_thread_id, 0, sizeof(jcr->my_thread_id)); - jcr->unlock(); set_jcr_in_tsd(INVALID_JCR); } } +void JCR::set_killable(bool killable) +{ + JCR *jcr = this; + jcr->lock(); + jcr->my_thread_killable = killable; + if (killable) { + jcr->my_thread_id = pthread_self(); + } else { + memset(&jcr->my_thread_id, 0, sizeof(jcr->my_thread_id)); + } + jcr->unlock(); +} + /* * Put this jcr in the thread specifc data * if update_thread_info is true and the jcr is valide, * we update the my_thread_id in the JCR */ -void set_jcr_in_tsd(JCR *jcr, bool update_thread_info) +void set_jcr_in_tsd(JCR *jcr) { int status = pthread_setspecific(jcr_key, (void *)jcr); if (status != 0) { @@ -609,30 +618,18 @@ void set_jcr_in_tsd(JCR *jcr, bool update_thread_info) Jmsg1(jcr, M_ABORT, 0, _("pthread_setspecific failed: ERR=%s\n"), be.bstrerror(status)); } - - /* We explicitly ask to set a jcr in tsd, we can update jcr->my_thread - */ - if (update_thread_info && jcr && jcr != INVALID_JCR) { - Dmsg2(100, "setting my_thread_stuffs 0x%p => 0x%p\n", - jcr->my_thread_id, pthread_self()); - jcr->lock(); - //ASSERT(jcr->my_thread_running == false); - jcr->my_thread_id = pthread_self(); - jcr->my_thread_running = true; - jcr->unlock(); - } } void JCR::my_thread_send_signal(int sig) { this->lock(); - if ( this->my_thread_running - && !pthread_equal(this->my_thread_id, pthread_self())) + if (this->is_killable() && + !pthread_equal(this->my_thread_id, pthread_self())) { Dmsg1(800, "Send kill to jid=%d\n", this->JobId); pthread_kill(this->my_thread_id, sig); - } else if (!this->my_thread_running) { + } else if (!this->is_killable()) { Dmsg1(10, "Warning, can't send kill to jid=%d\n", this->JobId); } this->unlock(); @@ -1181,6 +1178,10 @@ void dbg_print_jcr(FILE *fp) for (JCR *jcr = (JCR *)jcrs->first(); jcr ; jcr = (JCR *)jcrs->next(jcr)) { fprintf(fp, "threadid=%p JobId=%d JobStatus=%c jcr=%p name=%s\n", (void *)jcr->my_thread_id, (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job); + fprintf(fp, "threadid=%p killable=%d JobId=%d JobStatus=%c " + "jcr=%p name=%s\n", + (void *)jcr->my_thread_id, jcr->is_killable(), + (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job); fprintf(fp, "\tuse_count=%i\n", jcr->use_count()); fprintf(fp, "\tJobType=%c JobLevel=%c\n", jcr->getJobType(), jcr->getJobLevel()); diff --git a/bacula/src/lib/protos.h b/bacula/src/lib/protos.h index 450a70ad8b..b71ca942e8 100644 --- a/bacula/src/lib/protos.h +++ b/bacula/src/lib/protos.h @@ -207,7 +207,7 @@ JCR *jcr_walk_next(JCR *prev_jcr); void jcr_walk_end(JCR *jcr); int job_count(); JCR *get_jcr_from_tsd(); -void set_jcr_in_tsd(JCR *jcr, bool update_thread_info=true); +void set_jcr_in_tsd(JCR *jcr); void remove_jcr_from_tsd(JCR *jcr); uint32_t get_jobid_from_tsd(); uint32_t get_jobid_from_tid(pthread_t tid); -- 2.39.5