]> git.sur5r.net Git - bacula/bacula/commitdiff
Attempt to fix duplicate job kill seg fault
authorKern Sibbald <kern@sibbald.com>
Fri, 18 Jun 2010 21:43:38 +0000 (23:43 +0200)
committerEric Bollengier <eric@eb.homelinux.org>
Mon, 2 Aug 2010 14:53:54 +0000 (16:53 +0200)
bacula/src/dird/jobq.c
bacula/src/dird/msgchan.c
bacula/src/jcr.h
bacula/src/lib/jcr.c
bacula/src/lib/protos.h

index 68febb40759ebb0100363584de5a2475bf198a76..0e4d68898d08edaca27f3e3f3d9cc3f284379447 100644 (file)
@@ -170,7 +170,7 @@ void *sched_wait(void *arg)
    JCR *jcr = ((wait_pkt *)arg)->jcr;
    jobq_t *jq = ((wait_pkt *)arg)->jq;
 
-   set_jcr_in_tsd(jcr);
+   set_jcr_in_tsd(INVALID_JCR);
    Dmsg0(2300, "Enter sched_wait.\n");
    free(arg);
    time_t wtime = jcr->sched_time - time(NULL);
@@ -437,6 +437,7 @@ void *jobq_server(void *arg)
          jq->running_jobs->append(je);
 
          /* Attach jcr to this thread while we run the job */
+         jcr->set_killable(true);
          set_jcr_in_tsd(jcr);
          Dmsg1(2300, "Took jobid=%d from ready and appended to run\n", jcr->JobId);
 
@@ -450,6 +451,7 @@ void *jobq_server(void *arg)
 
          /* Job finished detach from thread */
          remove_jcr_from_tsd(je->jcr);
+         je->jcr->set_killable(false);
 
          Dmsg2(2300, "Back from user engine jobid=%d use=%d.\n", jcr->JobId,
             jcr->use_count());
index f354fb3b457eeed83dd53ff03a3027e12677392c..b8dc2972448093c755068b7b08edf686b0eff0ec 100644 (file)
@@ -379,7 +379,7 @@ extern "C" void *msg_thread(void *arg)
    uint64_t JobBytes;
 
    pthread_detach(pthread_self());
-   set_jcr_in_tsd(jcr, false /* no thread update in jcr */);
+   set_jcr_in_tsd(jcr);
    jcr->SD_msg_chan = pthread_self();
    pthread_cleanup_push(msg_thread_cleanup, arg);
    sd = jcr->store_bsock;
index 2d38956798584304b67e5a975f526a9603f31006..71a3362f5883f5f9b9ef47749c8cb051286e873c 100644 (file)
@@ -178,6 +178,7 @@ private:
    volatile int32_t _use_count;       /* use count */
    int32_t m_JobType;                 /* backup, restore, verify ... */
    int32_t m_JobLevel;                /* Job level */
+   bool my_thread_killable;           /* can we kill the thread? */
 public:
    void lock() {P(mutex); };
    void unlock() {V(mutex); };
@@ -203,10 +204,11 @@ public:
    void setJobStatus(int JobStatus);      /* in lib/jcr.c */
    bool JobReads();                       /* in lib/jcr.c */
    void my_thread_send_signal(int sig);   /* in lib/jcr.c */
+   void set_killable(bool killable);      /* in lib/jcr.c */
+   bool is_killable() const { return my_thread_killable; };
 
    /* Global part of JCR common to all daemons */
    dlink link;                        /* JCR chain link */
-   bool my_thread_running;            /* is the thread controlling jcr running*/
    pthread_t my_thread_id;            /* id of thread controlling jcr */
    BSOCK *dir_bsock;                  /* Director bsock or NULL if we are him */
    BSOCK *store_bsock;                /* Storage connection socket */
index 445340f64f238f44e01caefd62d801373f8402fd..9118bf787861446d6c2c39c5868a944393cd2f61 100644 (file)
@@ -365,7 +365,6 @@ JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr)
    jcr->setJobType(JT_SYSTEM);           /* internal job until defined */
    jcr->setJobLevel(L_NONE);
    jcr->setJobStatus(JS_Created);        /* ready to run */
-   set_jcr_in_tsd(jcr);
    sigtimer.sa_flags = 0;
    sigtimer.sa_handler = timeout_handler;
    sigfillset(&sigtimer.sa_mask);
@@ -413,6 +412,7 @@ static void free_common_jcr(JCR *jcr)
 {
    /* Uses jcr lock/unlock */
    remove_jcr_from_tsd(jcr);
+   jcr->set_killable(false);
 
    jcr->destroy_mutex();
 
@@ -588,20 +588,29 @@ void remove_jcr_from_tsd(JCR *jcr)
 {
    JCR *tjcr = get_jcr_from_tsd();
    if (tjcr == jcr) { 
-      jcr->lock();
-      jcr->my_thread_running = false;
-      memset(&jcr->my_thread_id, 0, sizeof(jcr->my_thread_id));
-      jcr->unlock();
       set_jcr_in_tsd(INVALID_JCR);
    }
 }
 
+void JCR::set_killable(bool killable)
+{
+   JCR *jcr = this;
+   jcr->lock();
+   jcr->my_thread_killable = killable;
+   if (killable) {
+      jcr->my_thread_id = pthread_self();
+   } else {
+      memset(&jcr->my_thread_id, 0, sizeof(jcr->my_thread_id));
+   }
+   jcr->unlock();
+}
+
 /*
  * Put this jcr in the thread specifc data
  *  if update_thread_info is true and the jcr is valide,
  *  we update the my_thread_id in the JCR
  */
-void set_jcr_in_tsd(JCR *jcr, bool update_thread_info)
+void set_jcr_in_tsd(JCR *jcr)
 {
    int status = pthread_setspecific(jcr_key, (void *)jcr);
    if (status != 0) {
@@ -609,30 +618,18 @@ void set_jcr_in_tsd(JCR *jcr, bool update_thread_info)
       Jmsg1(jcr, M_ABORT, 0, _("pthread_setspecific failed: ERR=%s\n"), 
             be.bstrerror(status));
    }
-
-   /* We explicitly ask to set a jcr in tsd, we can update jcr->my_thread
-    */
-   if (update_thread_info && jcr && jcr != INVALID_JCR) {
-      Dmsg2(100, "setting my_thread_stuffs 0x%p => 0x%p\n", 
-            jcr->my_thread_id, pthread_self());
-      jcr->lock();
-      //ASSERT(jcr->my_thread_running == false);
-      jcr->my_thread_id = pthread_self();
-      jcr->my_thread_running = true;
-      jcr->unlock();
-   }
 }
 
 void JCR::my_thread_send_signal(int sig)
 {
    this->lock();
-   if (   this->my_thread_running 
-       && !pthread_equal(this->my_thread_id, pthread_self()))
+   if (this->is_killable() &&
+       !pthread_equal(this->my_thread_id, pthread_self()))
    {
       Dmsg1(800, "Send kill to jid=%d\n", this->JobId);
       pthread_kill(this->my_thread_id, sig);
 
-   } else if (!this->my_thread_running) {
+   } else if (!this->is_killable()) {
       Dmsg1(10, "Warning, can't send kill to jid=%d\n", this->JobId);
    }
    this->unlock();
@@ -1181,6 +1178,10 @@ void dbg_print_jcr(FILE *fp)
    for (JCR *jcr = (JCR *)jcrs->first(); jcr ; jcr = (JCR *)jcrs->next(jcr)) {
       fprintf(fp, "threadid=%p JobId=%d JobStatus=%c jcr=%p name=%s\n", 
               (void *)jcr->my_thread_id, (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
+      fprintf(fp, "threadid=%p killable=%d JobId=%d JobStatus=%c "
+                  "jcr=%p name=%s\n",
+              (void *)jcr->my_thread_id, jcr->is_killable(),
+              (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
       fprintf(fp, "\tuse_count=%i\n", jcr->use_count());
       fprintf(fp, "\tJobType=%c JobLevel=%c\n",
               jcr->getJobType(), jcr->getJobLevel());
index 450a70ad8ba7791109f3adb3c38f6bd13c3853c0..b71ca942e894864b95d680f91b25e6ca6a82cfa3 100644 (file)
@@ -207,7 +207,7 @@ JCR     *jcr_walk_next(JCR *prev_jcr);
 void     jcr_walk_end(JCR *jcr);
 int      job_count();
 JCR     *get_jcr_from_tsd();
-void     set_jcr_in_tsd(JCR *jcr, bool update_thread_info=true);
+void     set_jcr_in_tsd(JCR *jcr);
 void     remove_jcr_from_tsd(JCR *jcr);
 uint32_t get_jobid_from_tsd();             
 uint32_t get_jobid_from_tid(pthread_t tid);