From: Eric Bollengier Date: Wed, 8 Oct 2008 14:18:09 +0000 (+0000) Subject: ebl Try to cancel read() with pthread_kill(TIMEOUT) to fix #1166 X-Git-Tag: Release-3.0.0~850 X-Git-Url: https://git.sur5r.net/?a=commitdiff_plain;h=1d004c676c8f9884dc2c7fe8266b3660019f8fc4;p=bacula%2Fbacula ebl Try to cancel read() with pthread_kill(TIMEOUT) to fix #1166 git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@7734 91ce42f0-d328-0410-95d8-f526ca767f89 --- diff --git a/bacula/patches/testing/bug_1166_cancel_read.patch b/bacula/patches/testing/bug_1166_cancel_read.patch new file mode 100644 index 0000000000..c25e1d017b --- /dev/null +++ b/bacula/patches/testing/bug_1166_cancel_read.patch @@ -0,0 +1,118 @@ +Index: src/dird/backup.c +=================================================================== +--- src/dird/backup.c (revision 7725) ++++ src/dird/backup.c (working copy) +@@ -193,6 +193,7 @@ + Jmsg(jcr, M_INFO, 0, _("Start Backup JobId %s, Job=%s\n"), + edit_uint64(jcr->JobId, ed1), jcr->Job); + ++ jcr->set_owner(); /* we are responsible for this job */ + set_jcr_job_status(jcr, JS_Running); + Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel); + if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) { +@@ -361,8 +362,10 @@ + if (timeout) { + tid = start_bsock_timer(fd, timeout); /* TODO: New timeout directive??? */ + } ++ Dmsg0(1, "======== Wait for a message\n"); + /* Wait for Client to terminate */ + while ((n = bget_dirmsg(fd)) >= 0) { ++ Dmsg1(1, "======= Get client message=%s\n", fd->msg); + if (!fd_ok && + (sscanf(fd->msg, EndJob, &jcr->FDJobStatus, &JobFiles, + &ReadBytes, &JobBytes, &Errors, &VSS, &Encrypt) == 7 || +Index: src/dird/job.c +=================================================================== +--- src/dird/job.c (revision 7731) ++++ src/dird/job.c (working copy) +@@ -390,6 +390,14 @@ + jobq_remove(&job_queue, jcr); /* attempt to remove it from queue */ + return true; + ++ case JS_Running: ++ if (jcr->get_JobType() == JT_BACKUP && jcr->file_bsock) { ++ /* When in JS_Running state, the main thread can wait for ++ * EndJob message from the Client. We send a signal to the job ++ * thread to cancel the read() ++ */ ++ jcr->send_signal_to_owner(TIMEOUT_SIGNAL); ++ } + default: + /* Cancel File daemon */ + if (jcr->file_bsock) { +Index: src/jcr.h +=================================================================== +--- src/jcr.h (revision 7725) ++++ src/jcr.h (working copy) +@@ -180,6 +180,8 @@ + bool is_job_canceled() {return job_canceled(this); }; + int32_t get_JobType() { return m_JobType; }; + int32_t get_JobLevel() { return m_JobLevel; }; ++ void set_owner(); ++ int send_signal_to_owner(int signal); + + const char *get_OperationName(); /* in lib/jcr.c */ + const char *get_ActionName(bool past); /* in lib/jcr.c */ +Index: src/lib/jcr.c +=================================================================== +--- src/lib/jcr.c (revision 7725) ++++ src/lib/jcr.c (working copy) +@@ -303,6 +303,31 @@ + } + + /* ++ * Update the my_thread_id variable with the current thread id ++ */ ++void JCR::set_owner() ++{ ++ Dmsg1(0, "JCR::set_owner(%p)\n", pthread_self()); ++ this->my_thread_id = pthread_self(); ++} ++ ++/* ++ * Send a signal to the JCR thread owner. (to break a system call) ++ * (Must have use set_owner() before) ++ */ ++int JCR::send_signal_to_owner(int sig) ++{ ++ int ret=0; ++ Dmsg2(0, "Sending TIMEOUT ?? to %p by %p\n", this->my_thread_id, pthread_self()); ++ ++ if (!pthread_equal(this->my_thread_id, pthread_self())) { ++ Dmsg2(0, "Sending TIMEOUT to %p by %p\n", this->my_thread_id, pthread_self()); ++ ret = pthread_kill(this->my_thread_id, sig); ++ } ++ return ret; ++} ++ ++/* + * Push a subroutine address into the job end callback stack + */ + void job_end_push(JCR *jcr, void job_end_cb(JCR *jcr,void *), void *ctx) +@@ -719,7 +744,7 @@ + bool set_waittime = false; + int oldJobStatus = jcr->JobStatus; + +- Dmsg2(800, "set_jcr_job_status(%s, %c)\n", jcr->Job, JobStatus); ++ Dmsg2(2, "set_jcr_job_status(%s, %c)\n", jcr->Job, JobStatus); + /* if wait state is new, we keep current time for watchdog MaxWaitTime */ + switch (JobStatus) { + case JS_WaitFD: +@@ -740,7 +765,7 @@ + * For a set of errors, ... keep the current status + * so it isn't lost. For all others, set it. + */ +- Dmsg3(300, "jid=%u OnEntry JobStatus=%c set=%c\n", (uint32_t)jcr->JobId, ++ Dmsg3(2, "jid=%u OnEntry JobStatus=%c set=%c\n", (uint32_t)jcr->JobId, + jcr->JobStatus, JobStatus); + switch (jcr->JobStatus) { + case JS_ErrorTerminated: +@@ -781,7 +806,7 @@ + } + } + if (oldJobStatus != jcr->JobStatus) { +- Dmsg3(200, "jid=%u leave set_old_job_status=%c new_set=%c\n", (uint32_t)jcr->JobId, ++ Dmsg3(2, "jid=%u leave set_old_job_status=%c new_set=%c\n", (uint32_t)jcr->JobId, + oldJobStatus, JobStatus); + // generate_plugin_event(jcr, bEventStatusChange, NULL); + }