From: Kern Sibbald Date: Fri, 23 Oct 2009 14:34:01 +0000 (+0200) Subject: Fix Qmsg race condition reported by Eric X-Git-Tag: Release-7.0.0~2598^2~60 X-Git-Url: https://git.sur5r.net/?a=commitdiff_plain;h=77534941c464a9d13df5376e6006bb152909b4df;p=bacula%2Fbacula Fix Qmsg race condition reported by Eric --- diff --git a/bacula/src/jcr.h b/bacula/src/jcr.h index 2700e2d447..16a4d7d2d8 100644 --- a/bacula/src/jcr.h +++ b/bacula/src/jcr.h @@ -213,8 +213,8 @@ public: BSOCK *file_bsock; /* File daemon connection socket */ JCR_free_HANDLER *daemon_free_jcr; /* Local free routine */ dlist *msg_queue; /* Queued messages */ + pthread_mutex_t msg_queue_mutex; /* message queue mutex */ alist job_end_push; /* Job end pushed calls */ - bool dequeuing; /* dequeuing messages */ POOLMEM *VolumeName; /* Volume name desired -- pool_memory */ POOLMEM *errmsg; /* edited error message */ char Job[MAX_NAME_LENGTH]; /* Unique name of this Job */ diff --git a/bacula/src/lib/jcr.c b/bacula/src/lib/jcr.c index fb039843c9..887aa439f1 100644 --- a/bacula/src/lib/jcr.c +++ b/bacula/src/lib/jcr.c @@ -346,6 +346,11 @@ JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr) memset(jcr, 0, size); jcr->my_thread_id = pthread_self(); jcr->msg_queue = New(dlist(item, &item->link)); + if ((status = pthread_mutex_init(&jcr->msg_queue_mutex, NULL)) != 0) { + berrno be; + Jmsg(NULL, M_ABORT, 0, _("Could not init msg_queue mutex. ERR=%s\n"), + be.bstrerror(status)); + } jcr->job_end_push.init(1, false); jcr->sched_time = time(NULL); jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */ @@ -412,6 +417,7 @@ static void free_common_jcr(JCR *jcr) if (jcr->msg_queue) { delete jcr->msg_queue; jcr->msg_queue = NULL; + pthread_mutex_destroy(&jcr->msg_queue_mutex); } close_msg(jcr); /* close messages for this job */ @@ -1104,7 +1110,7 @@ void dbg_jcr_add_hook(dbg_jcr_hook_t *fct) * !!! WARNING !!! * * This function should be used ONLY after a fatal signal. We walk through the - * JCR chain without doing any lock, bacula should not be running. + * JCR chain without doing any lock, Bacula should not be running. */ void _dbg_print_jcr(FILE *fp) { @@ -1116,19 +1122,10 @@ void _dbg_print_jcr(FILE *fp) fprintf(fp, "Attempt to dump current JCRs\n"); for (JCR *jcr = (JCR *)jcrs->first(); jcr ; jcr = (JCR *)jcrs->next(jcr)) { - if (!jcr) { /* protect us against something ? */ - continue; - } - + fprintf(fp, "JCR=%p JobId=%i name=%s JobStatus=%c\n", jcr, jcr->JobId, jcr->Job, jcr->JobStatus); -#ifdef HAVE_WIN32 - fprintf(fp, "\tuse_count=%i\n", - jcr->use_count()); -#else - /* KES -- removed non-portable code referencing pthread_t */ - fprintf(fp, "\tuse_count=%d\n", jcr->use_count()); -#endif + fprintf(fp, "\tuse_count=%i\n", jcr->use_count()); fprintf(fp, "\tJobType=%c JobLevel=%c\n", jcr->get_JobType(), jcr->get_JobLevel()); bstrftime(buf1, sizeof(buf1), jcr->sched_time); @@ -1137,10 +1134,12 @@ void _dbg_print_jcr(FILE *fp) bstrftime(buf4, sizeof(buf4), jcr->wait_time); fprintf(fp, "\tsched_time=%s start_time=%s\n\tend_time=%s wait_time=%s\n", buf1, buf2, buf3, buf4); - fprintf(fp, "\tdequeing=%i\n", jcr->dequeuing); fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n", jcr->db, jcr->db_batch, jcr->batch_started); + /* + * Call all the jcr debug hooks + */ for(int i=0; i < dbg_jcr_handler_count; i++) { dbg_jcr_hook_t *fct = dbg_jcr_hooks[i]; fct(jcr, fp); diff --git a/bacula/src/lib/lockmgr.c b/bacula/src/lib/lockmgr.c index 60d83cff75..7ded06978e 100644 --- a/bacula/src/lib/lockmgr.c +++ b/bacula/src/lib/lockmgr.c @@ -466,8 +466,8 @@ bool lmgr_detect_deadlock() /* * !!! WARNING !!! - * Use this function only after a fatal signal - * We don't use any lock to display information + * Use this function is used only after a fatal signal + * We don't use locking to display the information */ void dbg_print_lock(FILE *fp) { @@ -731,8 +731,8 @@ int lmgr_thread_create(pthread_t *thread, /* * !!! WARNING !!! - * Use this function only after a fatal signal - * We don't use any lock to display information + * Use this function is used only after a fatal signal + * We don't use locking to display information */ void dbg_print_lock(FILE *fp) { diff --git a/bacula/src/lib/message.c b/bacula/src/lib/message.c index bc527a7b79..d9abff3bcc 100644 --- a/bacula/src/lib/message.c +++ b/bacula/src/lib/message.c @@ -30,11 +30,8 @@ * * Kern Sibbald, April 2000 * - * Version $Id$ - * */ - #include "bacula.h" #include "jcr.h" @@ -1325,8 +1322,6 @@ int Mmsg(POOL_MEM &pool_buf, const char *fmt, ...) } -static pthread_mutex_t msg_queue_mutex = PTHREAD_MUTEX_INITIALIZER; - /* * We queue messages rather than print them directly. This * is generally used in low level routines (msg handler, bnet) @@ -1361,16 +1356,16 @@ void Qmsg(JCR *jcr, int type, utime_t mtime, const char *fmt,...) if (!jcr) { jcr = get_jcr_from_tsd(); } - /* If no jcr or dequeuing send to daemon to avoid recursion */ - if ((jcr && !jcr->msg_queue) || !jcr || jcr->dequeuing) { + /* If no jcr or no queue send to daemon */ + if ((jcr && !jcr->msg_queue) || !jcr) { /* jcr==NULL => daemon message, safe to send now */ Jmsg(jcr, item->type, item->mtime, "%s", item->msg); free(item); } else { /* Queue message for later sending */ - P(msg_queue_mutex); + P(jcr->msg_queue_mutex); jcr->msg_queue->append(item); - V(msg_queue_mutex); + V(jcr->msg_queue_mutex); } free_memory(pool_buf); } @@ -1381,19 +1376,16 @@ void Qmsg(JCR *jcr, int type, utime_t mtime, const char *fmt,...) void dequeue_messages(JCR *jcr) { MQUEUE_ITEM *item; - P(msg_queue_mutex); if (!jcr->msg_queue) { - goto bail_out; + return; } - jcr->dequeuing = true; + P(jcr->msg_queue_mutex); foreach_dlist(item, jcr->msg_queue) { Jmsg(jcr, item->type, item->mtime, "%s", item->msg); } + /* Remove messages just sent */ jcr->msg_queue->destroy(); - jcr->dequeuing = false; - -bail_out: - V(msg_queue_mutex); + V(jcr->msg_queue_mutex); } diff --git a/bacula/src/lib/signal.c b/bacula/src/lib/signal.c index 0e5acea298..e51010b08d 100644 --- a/bacula/src/lib/signal.c +++ b/bacula/src/lib/signal.c @@ -83,7 +83,7 @@ extern void dbg_print_lock(FILE *fp); * !!! WARNING !!! * * This function should be used ONLY after a violent signal. We walk through the - * JCR chain without doing any lock, bacula should not be running. + * JCR chain without locking, Bacula should not be running. */ static void dbg_print_bacula() { @@ -130,7 +130,7 @@ extern "C" void signal_handler(int sig) if (sig == SIGTERM) { // Emsg1(M_TERM, -1, "Shutting down Bacula service: %s ...\n", my_name); } else { -/* ***FIXME*** Display a message without taking any lock in the system +/* ***FIXME*** Display a message without locking * Emsg2(M_FATAL, -1, _("Bacula interrupted by signal %d: %s\n"), sig, get_signal_name(sig)); */ fprintf(stderr, _("Bacula interrupted by signal %d: %s\n"), sig, get_signal_name(sig));