From 0ba68235f44d545a0b6edf42c03a1e3a85fdde4d Mon Sep 17 00:00:00 2001 From: Eric Bollengier Date: Tue, 9 Dec 2008 19:21:38 +0000 Subject: [PATCH] ebl Add a new lock manager that can detect deadlock situation This new option is activated with a --enable-lockmgr configure option. git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@8132 91ce42f0-d328-0410-95d8-f526ca767f89 --- bacula/autoconf/config.h.in | 3 + bacula/autoconf/configure.in | 19 + bacula/configure | 63 +- bacula/src/baconfig.h | 16 - bacula/src/console/console.c | 2 + bacula/src/dird/dird.c | 1 + bacula/src/dird/jobq.c | 46 +- bacula/src/filed/filed.c | 1 + bacula/src/lib/Makefile.in | 11 +- bacula/src/lib/bsys.c | 63 -- bacula/src/lib/lib.h | 1 + bacula/src/lib/lockmgr.c | 1000 ++++++++++++++++++++++++++++++ bacula/src/lib/lockmgr.h | 142 +++++ bacula/src/lib/rwlock.c | 11 +- bacula/src/lib/signal.c | 4 + bacula/src/lib/smartall.c | 2 + bacula/src/lib/workq.c | 55 +- bacula/src/stored/stored.c | 1 + bacula/src/win32/dll/Makefile | 3 +- bacula/src/win32/dll/bacula.def | 6 +- bacula/src/win32/libbac/Makefile | 3 +- bacula/technotes-2.5 | 3 + 22 files changed, 1280 insertions(+), 176 deletions(-) create mode 100644 bacula/src/lib/lockmgr.c create mode 100644 bacula/src/lib/lockmgr.h diff --git a/bacula/autoconf/config.h.in b/bacula/autoconf/config.h.in index e0f50d210b..c20174f918 100644 --- a/bacula/autoconf/config.h.in +++ b/bacula/autoconf/config.h.in @@ -2,6 +2,9 @@ /* ------------------------------------------------------------------------- */ /* -- CONFIGURE SPECIFIED FEATURES -- */ /* ------------------------------------------------------------------------- */ + +/* Define if you want to use the lock manager */ +#undef _USE_LOCKMGR /* Define if you want to use MySQL as Catalog database */ #undef USE_MYSQL_DB diff --git a/bacula/autoconf/configure.in b/bacula/autoconf/configure.in index dd52286b86..417337674f 100644 --- a/bacula/autoconf/configure.in +++ b/bacula/autoconf/configure.in @@ -304,6 +304,7 @@ build_dird=yes build_stored=yes cats= db_type=Internal +support_lockmgr=no DB_TYPE=bdb dnl -------------------------------------------------------------------------- @@ -516,6 +517,23 @@ if test x$support_smartalloc = xyes; then AC_DEFINE(SMARTALLOC, 1, [Set if you want Smartalloc enabled]) fi +dnl ------------------------------------------- +dnl Lock Manager (default off) +dnl ------------------------------------------- +AC_ARG_ENABLE(lockmgr, + AC_HELP_STRING([--enable-lockmgr], [enable lock manager support @<:@default=no@:>@]), + [ + if test x$enableval = xyes; then + support_lockmgr=yes + fi + ] +) + +if test x$support_lockmgr = xyes; then + AC_DEFINE(_USE_LOCKMGR, 1, [Set if you want Lock Manager enabled]) +fi + + dnl ------------------------------------------- dnl static-tools (default off) dnl ------------------------------------------- @@ -2965,6 +2983,7 @@ Configuration on `date`: Encryption support: ${support_crypto} ZLIB support: ${have_zlib} enable-smartalloc: ${support_smartalloc} + enable-lockmgr: ${support_lockmgr} bat support: ${support_bat} ${QWT_LDFLAGS} enable-gnome: ${support_gnome} ${gnome_version} enable-bwx-console: ${support_wx_console} ${wx_version} diff --git a/bacula/configure b/bacula/configure index 18bf8e10c3..b52c27f633 100755 --- a/bacula/configure +++ b/bacula/configure @@ -1650,6 +1650,7 @@ Optional Features: --enable-tray-monitor enable build of Gnome tray monitor (compatible with KDE [default=no] --enable-smartalloc enable smartalloc debugging support [default=no] + --enable-lockmgr enable lock manager support [default=no] --enable-static-tools enable static tape tools [default=no] --enable-static-fd enable static File daemon [default=no] --enable-static-sd enable static Storage daemon [default=no] @@ -5935,13 +5936,13 @@ if test "${lt_cv_nm_interface+set}" = set; then else lt_cv_nm_interface="BSD nm" echo "int some_variable = 0;" > conftest.$ac_ext - (eval echo "\"\$as_me:5938: $ac_compile\"" >&5) + (eval echo "\"\$as_me:5939: $ac_compile\"" >&5) (eval "$ac_compile" 2>conftest.err) cat conftest.err >&5 - (eval echo "\"\$as_me:5941: $NM \\\"conftest.$ac_objext\\\"\"" >&5) + (eval echo "\"\$as_me:5942: $NM \\\"conftest.$ac_objext\\\"\"" >&5) (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) cat conftest.err >&5 - (eval echo "\"\$as_me:5944: output\"" >&5) + (eval echo "\"\$as_me:5945: output\"" >&5) cat conftest.out >&5 if $GREP 'External.*some_variable' conftest.out > /dev/null; then lt_cv_nm_interface="MS dumpbin" @@ -7162,7 +7163,7 @@ ia64-*-hpux*) ;; *-*-irix6*) # Find out which ABI we are using. - echo '#line 7165 "configure"' > conftest.$ac_ext + echo '#line 7166 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -9377,11 +9378,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:9380: $lt_compile\"" >&5) + (eval echo "\"\$as_me:9381: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:9384: \$? = $ac_status" >&5 + echo "$as_me:9385: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -9716,11 +9717,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:9719: $lt_compile\"" >&5) + (eval echo "\"\$as_me:9720: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:9723: \$? = $ac_status" >&5 + echo "$as_me:9724: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -9821,11 +9822,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:9824: $lt_compile\"" >&5) + (eval echo "\"\$as_me:9825: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:9828: \$? = $ac_status" >&5 + echo "$as_me:9829: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -9876,11 +9877,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:9879: $lt_compile\"" >&5) + (eval echo "\"\$as_me:9880: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:9883: \$? = $ac_status" >&5 + echo "$as_me:9884: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -12632,7 +12633,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 12635 "configure" +#line 12636 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12728,7 +12729,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 12731 "configure" +#line 12732 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -14740,11 +14741,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:14743: $lt_compile\"" >&5) + (eval echo "\"\$as_me:14744: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:14747: \$? = $ac_status" >&5 + echo "$as_me:14748: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -14839,11 +14840,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:14842: $lt_compile\"" >&5) + (eval echo "\"\$as_me:14843: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:14846: \$? = $ac_status" >&5 + echo "$as_me:14847: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -14891,11 +14892,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:14894: $lt_compile\"" >&5) + (eval echo "\"\$as_me:14895: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:14898: \$? = $ac_status" >&5 + echo "$as_me:14899: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -23956,6 +23957,7 @@ build_dird=yes build_stored=yes cats= db_type=Internal +support_lockmgr=no DB_TYPE=bdb @@ -24180,6 +24182,26 @@ _ACEOF fi +# Check whether --enable-lockmgr was given. +if test "${enable_lockmgr+set}" = set; then + enableval=$enable_lockmgr; + if test x$enableval = xyes; then + support_lockmgr=yes + fi + + +fi + + +if test x$support_lockmgr = xyes; then + +cat >>confdefs.h <<\_ACEOF +#define _USE_LOCKMGR 1 +_ACEOF + +fi + + # Check whether --enable-static-tools was given. if test "${enable_static_tools+set}" = set; then enableval=$enable_static_tools; @@ -44779,6 +44801,7 @@ Configuration on `date`: Encryption support: ${support_crypto} ZLIB support: ${have_zlib} enable-smartalloc: ${support_smartalloc} + enable-lockmgr: ${support_lockmgr} bat support: ${support_bat} ${QWT_LDFLAGS} enable-gnome: ${support_gnome} ${gnome_version} enable-bwx-console: ${support_wx_console} ${wx_version} diff --git a/bacula/src/baconfig.h b/bacula/src/baconfig.h index 16d6832e10..d7b04ac988 100644 --- a/bacula/src/baconfig.h +++ b/bacula/src/baconfig.h @@ -418,22 +418,6 @@ typedef int64_t boffset_t; typedef off_t boffset_t; #endif -#if defined(DEBUG_MUTEX) -extern void _p(char *file, int line, pthread_mutex_t *m); -extern void _v(char *file, int line, pthread_mutex_t *m); - -#define P(x) _p(__FILE__, __LINE__, &(x)) -#define V(x) _v(__FILE__, __LINE__, &(x)) - -#else -extern void _p(pthread_mutex_t *m); -extern void _v(pthread_mutex_t *m); - -#define P(x) _p(&(x)) -#define V(x) _v(&(x)) - -#endif /* DEBUG_MUTEX */ - /* These probably should be subroutines */ #define Pw(x) \ do { int errstat; if ((errstat=rwl_writelock(&(x)))) \ diff --git a/bacula/src/console/console.c b/bacula/src/console/console.c index 56d7a4ca76..4150407e7d 100644 --- a/bacula/src/console/console.c +++ b/bacula/src/console/console.c @@ -870,6 +870,8 @@ static void terminate_console(int sig) con_term(); } (void)WSACleanup(); /* Cleanup Windows sockets */ + lmgr_cleanup_main(); + if (sig != 0) { exit(1); } diff --git a/bacula/src/dird/dird.c b/bacula/src/dird/dird.c index 19efc1b761..b8b065f621 100644 --- a/bacula/src/dird/dird.c +++ b/bacula/src/dird/dird.c @@ -385,6 +385,7 @@ void terminate_dird(int sig) term_msg(); /* terminate message handler */ cleanup_crypto(); close_memory_pool(); /* release free memory in pool */ + lmgr_cleanup_main(); sm_dump(false); exit(sig); } diff --git a/bacula/src/dird/jobq.c b/bacula/src/dird/jobq.c index 6428dba146..d08d6bcde1 100644 --- a/bacula/src/dird/jobq.c +++ b/bacula/src/dird/jobq.c @@ -119,11 +119,7 @@ int jobq_destroy(jobq_t *jq) if (jq->valid != JOBQ_VALID) { return EINVAL; } - if ((stat = pthread_mutex_lock(&jq->mutex)) != 0) { - berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.bstrerror(stat)); - return stat; - } + P(jq->mutex); jq->valid = 0; /* prevent any more operations */ /* @@ -135,7 +131,7 @@ int jobq_destroy(jobq_t *jq) if ((stat = pthread_cond_broadcast(&jq->work)) != 0) { berrno be; Jmsg1(NULL, M_ERROR, 0, _("pthread_cond_broadcast: ERR=%s\n"), be.bstrerror(stat)); - pthread_mutex_unlock(&jq->mutex); + V(jq->mutex); return stat; } } @@ -143,16 +139,12 @@ int jobq_destroy(jobq_t *jq) if ((stat = pthread_cond_wait(&jq->work, &jq->mutex)) != 0) { berrno be; Jmsg1(NULL, M_ERROR, 0, _("pthread_cond_wait: ERR=%s\n"), be.bstrerror(stat)); - pthread_mutex_unlock(&jq->mutex); + V(jq->mutex); return stat; } } } - if ((stat = pthread_mutex_unlock(&jq->mutex)) != 0) { - berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_unlock: ERR=%s\n"), be.bstrerror(stat)); - return stat; - } + V(jq->mutex); stat = pthread_mutex_destroy(&jq->mutex); stat1 = pthread_cond_destroy(&jq->work); stat2 = pthread_attr_destroy(&jq->attr); @@ -254,12 +246,7 @@ int jobq_add(jobq_t *jq, JCR *jcr) return stat; } - if ((stat = pthread_mutex_lock(&jq->mutex)) != 0) { - berrno be; - Jmsg1(jcr, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.bstrerror(stat)); - free_jcr(jcr); /* release jcr */ - return stat; - } + P(jq->mutex); if ((item = (jobq_item_t *)malloc(sizeof(jobq_item_t))) == NULL) { free_jcr(jcr); /* release jcr */ @@ -296,7 +283,7 @@ int jobq_add(jobq_t *jq, JCR *jcr) /* Ensure that at least one server looks at the queue. */ stat = start_server(jq); - pthread_mutex_unlock(&jq->mutex); + V(jq->mutex); Dmsg0(2300, "Return jobq_add\n"); return stat; } @@ -321,12 +308,7 @@ int jobq_remove(jobq_t *jq, JCR *jcr) return EINVAL; } - if ((stat = pthread_mutex_lock(&jq->mutex)) != 0) { - berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.bstrerror(stat)); - return stat; - } - + P(jq->mutex); foreach_dlist(item, jq->waiting_jobs) { if (jcr == item->jcr) { found = true; @@ -334,7 +316,7 @@ int jobq_remove(jobq_t *jq, JCR *jcr) } } if (!found) { - pthread_mutex_unlock(&jq->mutex); + V(jq->mutex); Dmsg2(2300, "jobq_remove jobid=%d jcr=0x%x not in wait queue\n", jcr->JobId, jcr); return EINVAL; } @@ -346,7 +328,7 @@ int jobq_remove(jobq_t *jq, JCR *jcr) stat = start_server(jq); - pthread_mutex_unlock(&jq->mutex); + V(jq->mutex); Dmsg0(2300, "Return jobq_remove\n"); return stat; } @@ -403,11 +385,7 @@ void *jobq_server(void *arg) set_jcr_in_tsd(INVALID_JCR); Dmsg0(2300, "Start jobq_server\n"); - if ((stat = pthread_mutex_lock(&jq->mutex)) != 0) { - berrno be; - Jmsg1(NULL, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.bstrerror(stat)); - return NULL; - } + P(jq->mutex); jq->num_workers++; for (;;) { @@ -434,7 +412,7 @@ void *jobq_server(void *arg) /* This shouldn't happen */ Dmsg0(2300, "This shouldn't happen\n"); jq->num_workers--; - pthread_mutex_unlock(&jq->mutex); + V(jq->mutex); return NULL; } break; @@ -453,7 +431,7 @@ void *jobq_server(void *arg) Dmsg0(2300, "ready queue not empty start server\n"); if (start_server(jq) != 0) { jq->num_workers--; - pthread_mutex_unlock(&jq->mutex); + V(jq->mutex); return NULL; } } diff --git a/bacula/src/filed/filed.c b/bacula/src/filed/filed.c index 797cb83835..21b7c68fc3 100644 --- a/bacula/src/filed/filed.c +++ b/bacula/src/filed/filed.c @@ -297,6 +297,7 @@ void terminate_filed(int sig) term_msg(); cleanup_crypto(); close_memory_pool(); /* release free memory in pool */ + lmgr_cleanup_main(); sm_dump(false); /* dump orphaned buffers */ exit(sig); } diff --git a/bacula/src/lib/Makefile.in b/bacula/src/lib/Makefile.in index dfbd9030a7..9c3be8dea5 100644 --- a/bacula/src/lib/Makefile.in +++ b/bacula/src/lib/Makefile.in @@ -34,7 +34,7 @@ INCLUDE_FILES = ../baconfig.h ../bacula.h ../bc_types.h \ smartall.h status.h tls.h tree.h var.h \ waitq.h watchdog.h workq.h \ parse_conf.h \ - pythonlib.h + pythonlib.h lockmgr.h # # libbac @@ -49,7 +49,7 @@ LIBBAC_SRCS = attr.c base64.c berrno.c bsys.c bget_msg.c \ rwlock.c scan.c serial.c sha1.c \ signal.c smartall.c rblist.c tls.c tree.c \ util.c var.c watchdog.c workq.c btimers.c \ - address_conf.c breg.c htable.c + address_conf.c breg.c htable.c lockmgr.c LIBBAC_OBJS = $(LIBBAC_SRCS:.c=$(DEFAULT_OBJECT_TYPE)) LIBBAC_LT_CURRENT = 1 LIBBAC_LT_REVISION = 0 @@ -140,6 +140,13 @@ Makefile: $(srcdir)/Makefile.in $(topdir)/config.status cd $(topdir) \ && CONFIG_FILES=$(thisdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status +lockmgr_test: Makefile + rm -f lockmgr.o + $(CXX) -D _TEST_IT $(DEFS) $(DEBUG) -c $(CPPFLAGS) -I$(srcdir) -I$(basedir) $(DINCLUDE) $(CFLAGS) lockmgr.c + $(LIBTOOL_LINK) $(CXX) $(LDFLAGS) -L. -o $@ lockmgr.o $(LIBS) $(DLIB) -lbac $(OPENSSL_LIBS) -lm + rm -f lockmgr.o + $(CXX) $(DEFS) $(DEBUG) -c $(CPPFLAGS) -I$(srcdir) -I$(basedir) $(DINCLUDE) $(CFLAGS) lockmgr.c + rwlock_test: Makefile rm -f rwlock.o $(CXX) -DTEST_RWLOCK $(DEFS) $(DEBUG) -c $(CPPFLAGS) -I$(srcdir) -I$(basedir) $(DINCLUDE) $(CFLAGS) rwlock.c diff --git a/bacula/src/lib/bsys.c b/bacula/src/lib/bsys.c index 91e7873667..3d370c16c9 100644 --- a/bacula/src/lib/bsys.c +++ b/bacula/src/lib/bsys.c @@ -375,69 +375,6 @@ int b_strerror(int errnum, char *buf, size_t bufsiz) return stat; } -/* - * These are mutex routines that do error checking - * for deadlock and such. Normally not turned on. - */ -#ifdef DEBUG_MUTEX -void _p(char *file, int line, pthread_mutex_t *m) -{ - int errstat; - if ((errstat = pthread_mutex_trylock(m))) { - e_msg(file, line, M_ERROR, 0, _("Possible mutex deadlock.\n")); - /* We didn't get the lock, so do it definitely now */ - if ((errstat=pthread_mutex_lock(m))) { - berrno be; - e_msg(file, line, M_ABORT, 0, _("Mutex lock failure. ERR=%s\n"), - be.bstrerror(errstat)); - } else { - e_msg(file, line, M_ERROR, 0, _("Possible mutex deadlock resolved.\n")); - } - - } -} - -void _v(char *file, int line, pthread_mutex_t *m) -{ - int errstat; - - /* Note, this trylock *should* fail if the mutex is locked */ - if ((errstat=pthread_mutex_trylock(m)) == 0) { - berrno be; - e_msg(file, line, M_ERROR, 0, _("Mutex unlock not locked. ERR=%s\n"), - be.bstrerror(errstat)); - } - if ((errstat=pthread_mutex_unlock(m))) { - berrno be; - e_msg(file, line, M_ABORT, 0, _("Mutex unlock failure. ERR=%s\n"), - be.bstrerror(errstat)); - } -} - -#else - -void _p(pthread_mutex_t *m) -{ - int errstat; - if ((errstat=pthread_mutex_lock(m))) { - berrno be; - e_msg(__FILE__, __LINE__, M_ABORT, 0, _("Mutex lock failure. ERR=%s\n"), - be.bstrerror(errstat)); - } -} - -void _v(pthread_mutex_t *m) -{ - int errstat; - if ((errstat=pthread_mutex_unlock(m))) { - berrno be; - e_msg(__FILE__, __LINE__, M_ABORT, 0, _("Mutex unlock failure. ERR=%s\n"), - be.bstrerror(errstat)); - } -} - -#endif /* DEBUG_MUTEX */ - #ifdef DEBUG_MEMSET /* These routines are not normally turned on */ #undef memset diff --git a/bacula/src/lib/lib.h b/bacula/src/lib/lib.h index 2b166f423b..07bc7e518e 100644 --- a/bacula/src/lib/lib.h +++ b/bacula/src/lib/lib.h @@ -36,6 +36,7 @@ */ #include "smartall.h" +#include "lockmgr.h" #include "alist.h" #include "dlist.h" #include "rblist.h" diff --git a/bacula/src/lib/lockmgr.c b/bacula/src/lib/lockmgr.c new file mode 100644 index 0000000000..2756cf825f --- /dev/null +++ b/bacula/src/lib/lockmgr.c @@ -0,0 +1,1000 @@ +/* + Bacula® - The Network Backup Solution + + Copyright (C) 2008-2008 Free Software Foundation Europe e.V. + + The main author of Bacula is Kern Sibbald, with contributions from + many others, a complete list can be found in the file AUTHORS. + This program is Free Software; you can redistribute it and/or + modify it under the terms of version two of the GNU General Public + License as published by the Free Software Foundation, which is + listed in the file LICENSE. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + Bacula® is a registered trademark of Kern Sibbald. + The licensor of Bacula is the Free Software Foundation Europe + (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, + Switzerland, email:ftf@fsfeurope.org. +*/ + +#define _LOCKMGR_COMPLIANT +#include "lockmgr.h" + +#undef ASSERT +#define ASSERT(x) if (!(x)) { \ + char *jcr = NULL; \ + Pmsg3(000, _("%s:%i Failed ASSERT: %s\n"), __FILE__, __LINE__, #x); \ + jcr[0] = 0; } + +/* + Inspired from + http://www.cs.berkeley.edu/~kamil/teaching/sp03/041403.pdf + + This lock manager will replace some pthread calls. It can be + enabled with _USE_LOCKMGR + + Some part of the code can't use this manager, for example the + rwlock object or the smartalloc lib. To disable LMGR, just add + _LOCKMGR_COMPLIANT before the inclusion of "bacula.h" + + cd build/src/tools + g++ -g -c lockmgr.c -I.. -I../lib -D_USE_LOCKMGR -D_TEST_IT + g++ -o lockmgr lockmgr.o -lbac -L../lib/.libs -lssl -lpthread + +*/ + + +/* + * pthread_mutex_lock for memory allocator and other + * parts that are _LOCKMGR_COMPLIANT + */ +void lmgr_p(pthread_mutex_t *m) +{ + int errstat; + if ((errstat=pthread_mutex_lock(m))) { + berrno be; + e_msg(__FILE__, __LINE__, M_ABORT, 0, _("Mutex lock failure. ERR=%s\n"), + be.bstrerror(errstat)); + } +} + +void lmgr_v(pthread_mutex_t *m) +{ + int errstat; + if ((errstat=pthread_mutex_unlock(m))) { + berrno be; + e_msg(__FILE__, __LINE__, M_ABORT, 0, _("Mutex unlock failure. ERR=%s\n"), + be.bstrerror(errstat)); + } +} + +#ifdef _USE_LOCKMGR + +typedef enum +{ + LMGR_WHITE, /* never seen */ + LMGR_BLACK, /* no loop */ + LMGR_GREY, /* seen before */ +} lmgr_color_t; + +/* + * Node used by the Lock Manager + * If the lock is GRANTED, we have mutex -> proc, else it's a proc -> mutex + * relation. + * + * Note, each mutex can be GRANTED once, and each proc can have only one WANTED + * mutex. + */ +class lmgr_node_t: public SMARTALLOC +{ +public: + dlink link; + void *node; + void *child; + lmgr_color_t seen; + + lmgr_node_t() { + child = node = NULL; + seen = LMGR_WHITE; + } + + lmgr_node_t(void *n, void *c) { + init(n,c); + } + + void init(void *n, void *c) { + node = n; + child = c; + seen = LMGR_WHITE; + } + + void mark_as_seen(lmgr_color_t c) { + seen = c; + } + + ~lmgr_node_t() {printf("delete node\n");} +}; + +typedef enum { + LMGR_LOCK_EMPTY = 'E', /* unused */ + LMGR_LOCK_WANTED = 'W', /* before mutex_lock */ + LMGR_LOCK_GRANTED = 'G' /* after mutex_lock */ +} lmgr_state_t; + +/* + * Object associated with each mutex per thread + */ +class lmgr_lock_t: public SMARTALLOC +{ +public: + dlink link; + void *lock; + lmgr_state_t state; + + const char *file; + int line; + + lmgr_lock_t() { + lock = NULL; + state = LMGR_LOCK_EMPTY; + } + + lmgr_lock_t(void *l) { + lock = l; + state = LMGR_LOCK_WANTED; + } + + void set_granted() { + state = LMGR_LOCK_GRANTED; + } + + ~lmgr_lock_t() {} + +}; + +/* + * Get the child list, ret must be already allocated + */ +static void search_all_node(dlist *g, lmgr_node_t *v, alist *ret) +{ + lmgr_node_t *n; + foreach_dlist(n, g) { + if (v->child == n->node) { + ret->append(n); + } + } +} + +static bool visite(dlist *g, lmgr_node_t *v) +{ + bool ret=false; + lmgr_node_t *n; + v->mark_as_seen(LMGR_GREY); + + alist *d = New(alist(5, false)); /* use alist because own=false */ + search_all_node(g, v, d); + + //foreach_alist(n, d) { + // printf("node n=%p c=%p s=%c\n", n->node, n->child, n->seen); + //} + + foreach_alist(n, d) { + if (n->seen == LMGR_GREY) { /* already seen this node */ + ret = true; + goto bail_out; + } else if (n->seen == LMGR_WHITE) { + if (visite(g, n)) { + ret = true; + goto bail_out; + } + } + } + v->mark_as_seen(LMGR_BLACK); /* no loop detected, node is clean */ +bail_out: + delete d; + return ret; +} + +static bool contains_cycle(dlist *g) +{ + lmgr_node_t *n; + foreach_dlist(n, g) { + if (n->seen == LMGR_WHITE) { + if (visite(g, n)) { + return true; + } + } + } + return false; +} + +/****************************************************************/ + +class lmgr_thread_t: public SMARTALLOC +{ +public: + dlink link; + pthread_mutex_t mutex; + pthread_t thread_id; + lmgr_lock_t lock_list[LMGR_MAX_LOCK]; + int current; + int max; + + lmgr_thread_t() { + int status; + if ((status = pthread_mutex_init(&mutex, NULL)) != 0) { + berrno be; + Pmsg1(000, _("pthread key create failed: ERR=%s\n"), + be.bstrerror(status)); + ASSERT(0); + } + thread_id = pthread_self(); + current = -1; + max = 0; + } + + void _dump(FILE *fp) { + fprintf(fp, "threadid=0x%x max=%i current=%i\n", (int)thread_id, max, current); + for(int i=0; i<=current; i++) { + fprintf(fp, " lock=%p state=%c %s:%i\n", + lock_list[i].lock, lock_list[i].state, + lock_list[i].file, lock_list[i].line); + } + } + + void dump(FILE *fp) { + pthread_mutex_lock(&mutex); + { + _dump(fp); + } + pthread_mutex_unlock(&mutex); + } + + /* + * Call before a lock operation (mark mutex as WANTED) + */ + virtual void pre_P(void *m, const char *f="*unknown*", int l=0) { + ASSERT(current < LMGR_MAX_LOCK); + ASSERT(current >= -1); + pthread_mutex_lock(&mutex); + { + current++; + lock_list[current].lock = m; + lock_list[current].state = LMGR_LOCK_WANTED; + lock_list[current].file = f; + lock_list[current].line = l; + max = MAX(current, max); + } + pthread_mutex_unlock(&mutex); + } + + /* + * Call after the lock operation (mark mutex as GRANTED) + */ + virtual void post_P() { + ASSERT(current >= 0); + ASSERT(lock_list[current].state == LMGR_LOCK_WANTED); + lock_list[current].state = LMGR_LOCK_GRANTED; + } + + void shift_list(int i) { + for(int j=i+1; j<=current; j++) { + lock_list[i] = lock_list[j]; + } + if (current >= 0) { + lock_list[current].lock = NULL; + lock_list[current].state = LMGR_LOCK_EMPTY; + } + } + + /* + * Remove the mutex from the list + */ + virtual void do_V(void *m, const char *f="*unknown*", int l=0) { + ASSERT(current >= 0); + pthread_mutex_lock(&mutex); + { + if (lock_list[current].lock == m) { + lock_list[current].lock = NULL; + lock_list[current].state = LMGR_LOCK_EMPTY; + current--; + } else { + ASSERT(current > 0); + Pmsg3(0, "ERROR: wrong P/V order search lock=%p %s:%i\n", m, f, l); + Pmsg4(000, "ERROR: wrong P/V order pos=%i lock=%p %s:%i\n", + current, lock_list[current].lock, lock_list[current].file, + lock_list[current].line); + for (int i=current-1; i >= 0; i--) { /* already seen current */ + Pmsg4(000, "ERROR: wrong P/V order pos=%i lock=%p %s:%i\n", + i, lock_list[i].lock, lock_list[i].file, lock_list[i].line); + if (lock_list[i].lock == m) { + Pmsg3(000, "ERROR: FOUND P pos=%i %s:%i\n", i, f, l); + shift_list(i); + current--; + break; + } + } + } + } + pthread_mutex_unlock(&mutex); + } + + virtual ~lmgr_thread_t() {destroy();} + + void destroy() { + pthread_mutex_destroy(&mutex); + } +} ; + +class lmgr_dummy_thread_t: public lmgr_thread_t +{ + void do_V(void *m, const char *file, int l) {} + void post_P() {} + void pre_P(void *m, const char *file, int l) {} +}; + +/* + * LMGR - Lock Manager + * + * + * + */ + +pthread_once_t key_lmgr_once = PTHREAD_ONCE_INIT; +static pthread_key_t lmgr_key; /* used to get lgmr_thread_t object */ + +static dlist *global_mgr=NULL; /* used to store all lgmr_thread_t objects */ +static pthread_mutex_t lmgr_global_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_t undertaker; + +#define lmgr_is_active() (global_mgr != NULL) + +/* + * Add a new lmgr_thread_t object to the global list + */ +void lmgr_register_thread(lmgr_thread_t *item) +{ + pthread_mutex_lock(&lmgr_global_mutex); + { + global_mgr->prepend(item); + } + pthread_mutex_unlock(&lmgr_global_mutex); +} + +/* + * Call this function to cleanup specific lock thread data + */ +void lmgr_unregister_thread(lmgr_thread_t *item) +{ + if (!lmgr_is_active()) { + return; + } + pthread_mutex_lock(&lmgr_global_mutex); + { + global_mgr->remove(item); + } + pthread_mutex_unlock(&lmgr_global_mutex); +} + +/* + * Search for a deadlock when it's secure to walk across + * locks list. (after lmgr_detect_deadlock or a fatal signal) + */ +bool lmgr_detect_deadlock_unlocked() +{ + bool ret=false; + lmgr_node_t *node=NULL; + lmgr_lock_t *lock; + lmgr_thread_t *item; + dlist *g = New(dlist(node, &node->link)); + + /* First, get a list of all node */ + foreach_dlist(item, global_mgr) { + for(int i=0; i<=item->current; i++) { + node = NULL; + lock = &item->lock_list[i]; + /* Depending if the lock is granted or not, it's a child or a root + * Granted: Mutex -> Thread + * Wanted: Thread -> Mutex + * + * Note: a Mutex can be locked only once, a thread can request only + * one mutex. + * + */ + if (lock->state == LMGR_LOCK_GRANTED) { + node = New(lmgr_node_t((void*)lock->lock, (void*)item->thread_id)); + } else if (lock->state == LMGR_LOCK_WANTED) { + node = New(lmgr_node_t((void*)item->thread_id, (void*)lock->lock)); + } + if (node) { + g->append(node); + } + } + } + + //foreach_dlist(node, g) { + // printf("g n=%p c=%p\n", node->node, node->child); + //} + + ret = contains_cycle(g); + if (ret) { + printf("Found a deadlock !!!!\n"); + } + + delete g; + return ret; +} + +/* + * Search for a deadlock in during the runtime + * It will lock all thread specific lock manager, nothing + * can be locked during this check. + */ +bool lmgr_detect_deadlock() +{ + bool ret=false; + if (!lmgr_is_active()) { + return ret; + } + + pthread_mutex_lock(&lmgr_global_mutex); + { + lmgr_thread_t *item; + foreach_dlist(item, global_mgr) { + pthread_mutex_lock(&item->mutex); + } + + ret = lmgr_detect_deadlock_unlocked(); + + foreach_dlist(item, global_mgr) { + pthread_mutex_unlock(&item->mutex); + } + } + pthread_mutex_unlock(&lmgr_global_mutex); + + return ret; +} + +/* + * !!! WARNING !!! + * Use this function only after a fatal signal + * We don't use any lock to display information + */ +void dbg_print_lock(FILE *fp) +{ + fprintf(fp, "Attempt to dump locks\n"); + if (!lmgr_is_active()) { + return ; + } + lmgr_thread_t *item; + foreach_dlist(item, global_mgr) { + item->_dump(fp); + } +} + +/* + * Dump each lmgr_thread_t object + */ +void lmgr_dump() +{ + pthread_mutex_lock(&lmgr_global_mutex); + { + lmgr_thread_t *item; + foreach_dlist(item, global_mgr) { + item->dump(stderr); + } + } + pthread_mutex_unlock(&lmgr_global_mutex); +} + +void cln_hdl(void *a) +{ + lmgr_cleanup_thread(); +} + +void *check_deadlock(void *) +{ + int old; + lmgr_init_thread(); + pthread_cleanup_push(cln_hdl, NULL); + + while (!bmicrosleep(30, 0)) { + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old); + if (lmgr_detect_deadlock()) { + lmgr_dump(); + ASSERT(0); + } + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old); + pthread_testcancel(); + } + Pmsg0(000, "Undertaker is leaving...\n"); + pthread_cleanup_pop(1); + return NULL; +} + +/* This object is used when LMGR is not initialized */ +lmgr_dummy_thread_t dummy_lmgr; + +/* + * Retrieve the lmgr_thread_t object from the stack + */ +inline lmgr_thread_t *lmgr_get_thread_info() +{ + if (lmgr_is_active()) { + return (lmgr_thread_t *)pthread_getspecific(lmgr_key); + } else { + return &dummy_lmgr; + } +} + +/* + * launch once for all threads + */ +void create_lmgr_key() +{ + int status = pthread_key_create(&lmgr_key, NULL); + if (status != 0) { + berrno be; + Pmsg1(000, _("pthread key create failed: ERR=%s\n"), + be.bstrerror(status)); + ASSERT(0); + } + + lmgr_thread_t *n=NULL; + global_mgr = New(dlist(n, &n->link)); + + if (pthread_create(&undertaker, NULL, check_deadlock, NULL) != 0) { + berrno be; + Pmsg1(000, _("pthread_create failed: ERR=%s\n"), + be.bstrerror(status)); + ASSERT(0); + } +} + +/* + * Each thread have to call this function to put a lmgr_thread_t object + * in the stack and be able to call mutex_lock/unlock + */ +void lmgr_init_thread() +{ + int status = pthread_once(&key_lmgr_once, create_lmgr_key); + if (status != 0) { + berrno be; + Pmsg1(000, _("pthread key create failed: ERR=%s\n"), + be.bstrerror(status)); + ASSERT(0); + } + lmgr_thread_t *l = New(lmgr_thread_t()); + pthread_setspecific(lmgr_key, l); + lmgr_register_thread(l); +} + +/* + * Call this function at the end of the thread + */ +void lmgr_cleanup_thread() +{ + if (!lmgr_is_active()) { + return ; + } + lmgr_thread_t *self = lmgr_get_thread_info(); + lmgr_unregister_thread(self); + delete(self); +} + +/* + * This function should be call at the end of the main thread + * Some thread like the watchdog are already present, so the global_mgr + * list is never empty. Should carefully clear the memory. + */ +void lmgr_cleanup_main() +{ + dlist *temp; + + pthread_cancel(undertaker); + lmgr_cleanup_thread(); + pthread_mutex_lock(&lmgr_global_mutex); + { + temp = global_mgr; + global_mgr=NULL; + delete temp; + } + pthread_mutex_unlock(&lmgr_global_mutex); +} + +/* + * Replacement for pthread_mutex_lock() + */ +int lmgr_mutex_lock(pthread_mutex_t *m, const char *file, int line) +{ + int ret; + lmgr_thread_t *self = lmgr_get_thread_info(); + self->pre_P(m, file, line); + ret = pthread_mutex_lock(m); + self->post_P(); + return ret; +} + +/* + * Replacement for pthread_mutex_unlock() + */ +int lmgr_mutex_unlock(pthread_mutex_t *m, const char *file, int line) +{ + lmgr_thread_t *self = lmgr_get_thread_info(); + self->do_V(m, file, line); + return pthread_mutex_unlock(m); +} + +/* TODO: check this + */ +int lmgr_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex) +{ + int ret; + lmgr_thread_t *self = lmgr_get_thread_info(); + self->do_V(mutex); + ret = pthread_cond_wait(cond, mutex); + self->pre_P(mutex); + self->post_P(); + return ret; +} + +/* + * Use this function when the caller handle the mutex directly + * + * lmgr_pre_lock(m); + * pthread_mutex_lock(m); + * lmgr_post_lock(m); + */ +void lmgr_pre_lock(void *m) +{ + lmgr_thread_t *self = lmgr_get_thread_info(); + self->pre_P(m); +} + +/* + * Use this function when the caller handle the mutex directly + */ +void lmgr_post_lock() +{ + lmgr_thread_t *self = lmgr_get_thread_info(); + self->post_P(); +} + +/* + * Do directly pre_P and post_P (used by trylock) + */ +void lmgr_do_lock(void *m) +{ + lmgr_thread_t *self = lmgr_get_thread_info(); + self->pre_P(m); + self->post_P(); +} + +/* + * Use this function when the caller handle the mutex directly + */ +void lmgr_do_unlock(void *m) +{ + lmgr_thread_t *self = lmgr_get_thread_info(); + self->do_V(m); +} + +typedef struct { + void *(*start_routine)(void*); + void *arg; +} lmgr_thread_arg_t; + +extern "C" +void *lmgr_thread_launcher(void *x) +{ + void *ret=NULL; + lmgr_init_thread(); + pthread_cleanup_push(cln_hdl, NULL); + + lmgr_thread_arg_t arg; + lmgr_thread_arg_t *a = (lmgr_thread_arg_t *)x; + arg.start_routine = a->start_routine; + arg.arg = a->arg; + free(a); + + ret = arg.start_routine(arg.arg); + pthread_cleanup_pop(1); + return ret; +} + +int lmgr_thread_create(pthread_t *thread, + const pthread_attr_t *attr, + void *(*start_routine)(void*), void *arg) +{ + /* Will be freed by the child */ + lmgr_thread_arg_t *a = (lmgr_thread_arg_t*) malloc(sizeof(lmgr_thread_arg_t)); + a->start_routine = start_routine; + a->arg = arg; + return pthread_create(thread, attr, lmgr_thread_launcher, a); +} + +#else /* _USE_LOCKMGR */ + +/* + * !!! WARNING !!! + * Use this function only after a fatal signal + * We don't use any lock to display information + */ +void dbg_print_lock(FILE *fp) +{ + Pmsg0(000, "lockmgr disabled\n"); +} + +#endif /* _USE_LOCKMGR */ + +#ifdef _TEST_IT + +#include "lockmgr.h" +#define pthread_mutex_lock(x) lmgr_mutex_lock(x) +#define pthread_mutex_unlock(x) lmgr_mutex_unlock(x) +#define pthread_cond_wait(x,y) lmgr_cond_wait(x,y) +#define pthread_create(a, b, c, d) lmgr_thread_create(a,b,c,d) +#undef P +#undef V +#define P(x) lmgr_mutex_lock(&(x), __FILE__, __LINE__) +#define V(x) lmgr_mutex_unlock(&(x), __FILE__, __LINE__) + +pthread_mutex_t mutex1 = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t mutex2 = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t mutex3 = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t mutex4 = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t mutex5 = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t mutex6 = PTHREAD_MUTEX_INITIALIZER; + +void *self_lock(void *temp) +{ + P(mutex1); + P(mutex1); + V(mutex1); + + return NULL; +} + +void *nolock(void *temp) +{ + P(mutex2); + sleep(5); + V(mutex2); + return NULL; +} + +void *locker(void *temp) +{ + pthread_mutex_t *m = (pthread_mutex_t*) temp; + pthread_mutex_lock(m); + pthread_mutex_unlock(m); + return NULL; +} + +void *rwlocker(void *temp) +{ + brwlock_t *m = (brwlock_t*) temp; + rwl_writelock(m); + rwl_writelock(m); + + rwl_writeunlock(m); + rwl_writeunlock(m); + return NULL; +} + +void *mix_rwl_mutex(void *temp) +{ + brwlock_t *m = (brwlock_t*) temp; + P(mutex1); + rwl_writelock(m); + rwl_writeunlock(m); + V(mutex1); + return NULL; +} + + +void *th2(void *temp) +{ + P(mutex2); + P(mutex1); + + lmgr_dump(); + + sleep(10); + + V(mutex1); + V(mutex2); + + lmgr_dump(); + return NULL; +} +void *th1(void *temp) +{ + P(mutex1); + sleep(2); + P(mutex2); + + lmgr_dump(); + + sleep(10); + + V(mutex2); + V(mutex1); + + lmgr_dump(); + return NULL; +} + +void *thx(void *temp) +{ + int s= 1 + (int) (500.0 * (rand() / (RAND_MAX + 1.0))) + 200; + P(mutex1); + bmicrosleep(0,s); + P(mutex2); + bmicrosleep(0,s); + + V(mutex2); + V(mutex1); + return NULL; +} + +void *th3(void *a) { + while (1) { + fprintf(stderr, "undertaker sleep()\n"); + sleep(10); + lmgr_dump(); + if (lmgr_detect_deadlock()) { + lmgr_dump(); + exit(1); + } + } + return NULL; +} + +int err=0; +int nb=0; +void _ok(const char *file, int l, const char *op, int value, const char *label) +{ + nb++; + if (!value) { + err++; + printf("ERR %.30s %s:%i on %s\n", label, file, l, op); + } else { + printf("OK %.30s\n", label); + } +} + +#define ok(x, label) _ok(__FILE__, __LINE__, #x, (x), label) + +void _nok(const char *file, int l, const char *op, int value, const char *label) +{ + nb++; + if (value) { + err++; + printf("ERR %.30s %s:%i on !%s\n", label, file, l, op); + } else { + printf("OK %.30s\n", label); + } +} + +#define nok(x, label) _nok(__FILE__, __LINE__, #x, (x), label) + +int report() +{ + printf("Result %i/%i OK\n", nb - err, nb); + return err>0; +} + +/* + * TODO: + * - Must detect multiple lock + * - lock/unlock in wrong order + * - deadlock with 2 or 3 threads + */ +int main() +{ + pthread_t id1, id2, id3, tab[200]; + lmgr_init_thread(); + + pthread_create(&id1, NULL, self_lock, NULL); + sleep(2); + ok(lmgr_detect_deadlock(), "Check self deadlock"); + lmgr_v(&mutex1); /* a bit dirty */ + pthread_join(id1, NULL); + + + pthread_create(&id1, NULL, nolock, NULL); + sleep(2); + nok(lmgr_detect_deadlock(), "Check for nolock"); + pthread_join(id1, NULL); + + P(mutex1); + pthread_create(&id1, NULL, locker, &mutex1); + pthread_create(&id2, NULL, locker, &mutex1); + pthread_create(&id3, NULL, locker, &mutex1); + sleep(2); + nok(lmgr_detect_deadlock(), "Check for multiple lock"); + V(mutex1); + pthread_join(id1, NULL); + pthread_join(id2, NULL); + pthread_join(id3, NULL); + + + brwlock_t wr; + rwl_init(&wr); + rwl_writelock(&wr); + rwl_writelock(&wr); + pthread_create(&id1, NULL, rwlocker, &wr); + pthread_create(&id2, NULL, rwlocker, &wr); + pthread_create(&id3, NULL, rwlocker, &wr); + nok(lmgr_detect_deadlock(), "Check for multiple rwlock"); + rwl_writeunlock(&wr); + nok(lmgr_detect_deadlock(), "Check for simple rwlock"); + rwl_writeunlock(&wr); + nok(lmgr_detect_deadlock(), "Check for multiple rwlock"); + + pthread_join(id1, NULL); + pthread_join(id2, NULL); + pthread_join(id3, NULL); + + rwl_writelock(&wr); + P(mutex1); + pthread_create(&id1, NULL, mix_rwl_mutex, &wr); + nok(lmgr_detect_deadlock(), "Check for mix rwlock/mutex"); + V(mutex1); + nok(lmgr_detect_deadlock(), "Check for mix rwlock/mutex"); + rwl_writeunlock(&wr); + nok(lmgr_detect_deadlock(), "Check for mix rwlock/mutex"); + pthread_join(id1, NULL); + + P(mutex5); + P(mutex6); + V(mutex5); + V(mutex6); + + nok(lmgr_detect_deadlock(), "Check for wrong order"); + + for(int j=0; j<200; j++) { + pthread_create(&tab[j], NULL, thx, NULL); + } + for(int j=0; j<200; j++) { + pthread_join(tab[j], NULL); + if (j%3) { lmgr_detect_deadlock();} + } + nok(lmgr_detect_deadlock(), "Check 200 lockers"); + + P(mutex4); + P(mutex5); + P(mutex6); + V(mutex6); + V(mutex5); + V(mutex4); + + pthread_create(&id1, NULL, th1, NULL); + sleep(1); + pthread_create(&id2, NULL, th2, NULL); + sleep(1); + ok(lmgr_detect_deadlock(), "Check for deadlock"); + +// lmgr_dump(); +// +// pthread_create(&id3, NULL, th3, NULL); +// +// pthread_join(id1, NULL); +// pthread_join(id2, NULL); + lmgr_cleanup_main(); + sm_check(__FILE__, __LINE__, false); + return report(); +} + +#endif diff --git a/bacula/src/lib/lockmgr.h b/bacula/src/lib/lockmgr.h new file mode 100644 index 0000000000..83d9b3c879 --- /dev/null +++ b/bacula/src/lib/lockmgr.h @@ -0,0 +1,142 @@ +/* + Bacula® - The Network Backup Solution + + Copyright (C) 2008-2008 Free Software Foundation Europe e.V. + + The main author of Bacula is Kern Sibbald, with contributions from + many others, a complete list can be found in the file AUTHORS. + This program is Free Software; you can redistribute it and/or + modify it under the terms of version two of the GNU General Public + License as published by the Free Software Foundation, which is + listed in the file LICENSE. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + Bacula® is a registered trademark of Kern Sibbald. + The licensor of Bacula is the Free Software Foundation Europe + (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, + Switzerland, email:ftf@fsfeurope.org. +*/ + +#ifndef _LOCKMGR_H +#define _LOCKMGR_H 1 + +#include "bacula.h" + +/* + * P and V op that don't use the lock manager (for memory allocation or on + * win32) + */ +void lmgr_p(pthread_mutex_t *m); +void lmgr_v(pthread_mutex_t *m); + +#ifdef _USE_LOCKMGR + +/* + * We decide that a thread won't lock more than LMGR_MAX_LOCK at the same time + */ +#define LMGR_MAX_LOCK 32 + +/* Not yet working */ +int lmgr_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex); + +/* Replacement of pthread_mutex_lock() */ +int lmgr_mutex_lock(pthread_mutex_t *m, + const char *file="*unknown*", int line=0); + +/* Replacement of pthread_mutex_unlock() */ +int lmgr_mutex_unlock(pthread_mutex_t *m, + const char *file="*unknown*", int line=0); + +/* + * Use them when you want use your lock yourself (ie rwlock) + */ +void lmgr_pre_lock(void *m); /* Call before requesting the lock */ +void lmgr_post_lock(); /* Call after getting it */ +void lmgr_do_lock(void *m); /* Same as pre+post lock */ +void lmgr_do_unlock(void *m); /* Call just before releasing the lock */ + +/* + * Each thread have to call this function to put a lmgr_thread_t object + * in the stack and be able to call mutex_lock/unlock + */ +void lmgr_init_thread(); + +/* + * Call this function at the end of the thread + */ +void lmgr_cleanup_thread(); + +/* + * Call this at the end of the program, it will release the + * global lock manager + */ +void lmgr_cleanup_main(); + +/* + * Dump each lmgr_thread_t object to stdout + */ +void lmgr_dump(); + +/* + * Search a deadlock + */ +bool lmgr_detect_deadlock(); + +/* + * Search a deadlock after a fatal signal + * no lock are granted, so the program must be + * stopped. + */ +bool lmgr_detect_deadlock_unlocked(); + +/* + * This function will run your thread with lmgr_init_thread() and + * lmgr_cleanup_thread(). + */ +int lmgr_thread_create(pthread_t *thread, + const pthread_attr_t *attr, + void *(*start_routine)(void*), void *arg); + +/* + * Define _LOCKMGR_COMPLIANT to use real pthread functions + */ + +#ifdef _LOCKMGR_COMPLIANT +# define P(x) lmgr_p(&(x)) +# define V(x) lmgr_v(&(x)) +#else +# define P(x) lmgr_mutex_lock(&(x), __FILE__, __LINE__) +# define V(x) lmgr_mutex_unlock(&(x), __FILE__, __LINE__) +# define pthread_mutex_lock(x) lmgr_mutex_lock(x, __FILE__, __LINE__) +# define pthread_mutex_unlock(x) lmgr_mutex_unlock(x, __FILE__, __LINE__) +# define pthread_cond_wait(x,y) lmgr_cond_wait(x,y) +# define pthread_create(a, b, c, d) lmgr_thread_create(a,b,c,d) +#endif + +#else /* _USE_LOCKMGR */ + +# define lmgr_detect_deadloc() +# define lmgr_dump() +# define lmgr_init_thread() +# define lmgr_cleanup_thread() +# define lmgr_pre_lock(m) +# define lmgr_post_lock() +# define lmgr_do_lock(m) +# define lmgr_do_unlock(m) +# define lmgr_cleanup_main() +# define P(x) lmgr_p(&(x)) +# define V(x) lmgr_v(&(x)) + +#endif /* _USE_LOCKMGR */ + +#endif /* _LOCKMGR_H */ diff --git a/bacula/src/lib/rwlock.c b/bacula/src/lib/rwlock.c index c4d5262e36..908d5c3764 100644 --- a/bacula/src/lib/rwlock.c +++ b/bacula/src/lib/rwlock.c @@ -40,6 +40,7 @@ * */ +#define _LOCKMGR_COMPLIANT #include "bacula.h" /* @@ -232,11 +233,13 @@ int rwl_writelock(brwlock_t *rwl) pthread_mutex_unlock(&rwl->mutex); return 0; } + lmgr_pre_lock(rwl); if (rwl->w_active || rwl->r_active > 0) { rwl->w_wait++; /* indicate that we are waiting */ pthread_cleanup_push(rwl_write_release, (void *)rwl); while (rwl->w_active || rwl->r_active > 0) { if ((stat = pthread_cond_wait(&rwl->write, &rwl->mutex)) != 0) { + lmgr_do_unlock(rwl); break; /* error, bail out */ } } @@ -246,7 +249,8 @@ int rwl_writelock(brwlock_t *rwl) if (stat == 0) { rwl->w_active++; /* we are running */ rwl->writer_id = pthread_self(); /* save writer thread's id */ - } + lmgr_post_lock(); + } pthread_mutex_unlock(&rwl->mutex); return stat; } @@ -274,6 +278,7 @@ int rwl_writetrylock(brwlock_t *rwl) } else { rwl->w_active = 1; /* we are running */ rwl->writer_id = pthread_self(); /* save writer thread's id */ + lmgr_do_lock(rwl); } stat2 = pthread_mutex_unlock(&rwl->mutex); return (stat == 0 ? stat2 : stat); @@ -305,6 +310,7 @@ int rwl_writeunlock(brwlock_t *rwl) if (rwl->w_active > 0) { stat = 0; /* writers still active */ } else { + lmgr_do_unlock(rwl); /* No more writers, awaken someone */ if (rwl->r_wait > 0) { /* if readers waiting */ stat = pthread_cond_broadcast(&rwl->read); @@ -567,7 +573,7 @@ void *thread_routine (void *arg) int iteration; int element; int status; - + lmgr_init_thread(); element = 0; /* Current data element */ for (iteration = 0; iteration < ITERATIONS; iteration++) { @@ -601,6 +607,7 @@ void *thread_routine (void *arg) if (element >= DATASIZE) element = 0; } + lmgr_cleanup_thread(); return NULL; } diff --git a/bacula/src/lib/signal.c b/bacula/src/lib/signal.c index 1705942275..16be3856e5 100644 --- a/bacula/src/lib/signal.c +++ b/bacula/src/lib/signal.c @@ -76,6 +76,8 @@ const char *get_signal_name(int sig) extern void _dbg_print_jcr(FILE *fp); /* defined in plugin.c */ extern void _dbg_print_plugin(FILE *fp); +/* defined in lockmgr.c */ +extern void dbg_print_lock(FILE *fp); /* * !!! WARNING !!! @@ -102,6 +104,7 @@ static void dbg_print_bacula() _dbg_print_jcr(fp); _dbg_print_plugin(fp); + dbg_print_lock(fp); if (fp != stderr) { fclose(fp); @@ -230,6 +233,7 @@ extern "C" void signal_handler(int sig) void init_stack_dump(void) { main_pid = getpid(); /* save main thread's pid */ + lmgr_init_thread(); /* initialize the lockmanager stack */ } /* diff --git a/bacula/src/lib/smartall.c b/bacula/src/lib/smartall.c index 675a218f3f..a0a57d278b 100644 --- a/bacula/src/lib/smartall.c +++ b/bacula/src/lib/smartall.c @@ -45,6 +45,8 @@ */ +#define _LOCKMGR_COMPLIANT + #include "bacula.h" /* Use the real routines here */ #undef realloc diff --git a/bacula/src/lib/workq.c b/bacula/src/lib/workq.c index bd108f4875..518af821b7 100644 --- a/bacula/src/lib/workq.c +++ b/bacula/src/lib/workq.c @@ -112,9 +112,7 @@ int workq_destroy(workq_t *wq) if (wq->valid != WORKQ_VALID) { return EINVAL; } - if ((stat = pthread_mutex_lock(&wq->mutex)) != 0) { - return stat; - } + P(wq->mutex); wq->valid = 0; /* prevent any more operations */ /* @@ -124,20 +122,18 @@ int workq_destroy(workq_t *wq) wq->quit = 1; if (wq->idle_workers) { if ((stat = pthread_cond_broadcast(&wq->work)) != 0) { - pthread_mutex_unlock(&wq->mutex); + V(wq->mutex); return stat; } } while (wq->num_workers > 0) { if ((stat = pthread_cond_wait(&wq->work, &wq->mutex)) != 0) { - pthread_mutex_unlock(&wq->mutex); + V(wq->mutex); return stat; } } } - if ((stat = pthread_mutex_unlock(&wq->mutex)) != 0) { - return stat; - } + V(wq->mutex); stat = pthread_mutex_destroy(&wq->mutex); stat1 = pthread_cond_destroy(&wq->work); stat2 = pthread_attr_destroy(&wq->attr); @@ -156,7 +152,7 @@ int workq_destroy(workq_t *wq) */ int workq_add(workq_t *wq, void *element, workq_ele_t **work_item, int priority) { - int stat; + int stat=0; workq_ele_t *item; pthread_t id; @@ -170,10 +166,7 @@ int workq_add(workq_t *wq, void *element, workq_ele_t **work_item, int priority) } item->data = element; item->next = NULL; - if ((stat = pthread_mutex_lock(&wq->mutex)) != 0) { - free(item); - return stat; - } + P(wq->mutex); Dmsg0(1400, "add item to queue\n"); if (priority) { @@ -199,7 +192,7 @@ int workq_add(workq_t *wq, void *element, workq_ele_t **work_item, int priority) if (wq->idle_workers > 0) { Dmsg0(1400, "Signal worker\n"); if ((stat = pthread_cond_broadcast(&wq->work)) != 0) { - pthread_mutex_unlock(&wq->mutex); + V(wq->mutex); return stat; } } else if (wq->num_workers < wq->max_workers) { @@ -207,12 +200,12 @@ int workq_add(workq_t *wq, void *element, workq_ele_t **work_item, int priority) /* No idle threads so create a new one */ set_thread_concurrency(wq->max_workers + 1); if ((stat = pthread_create(&id, &wq->attr, workq_server, (void *)wq)) != 0) { - pthread_mutex_unlock(&wq->mutex); + V(wq->mutex); return stat; } wq->num_workers++; } - pthread_mutex_unlock(&wq->mutex); + V(wq->mutex); Dmsg0(1400, "Return workq_add\n"); /* Return work_item if requested */ if (work_item) { @@ -241,9 +234,7 @@ int workq_remove(workq_t *wq, workq_ele_t *work_item) return EINVAL; } - if ((stat = pthread_mutex_lock(&wq->mutex)) != 0) { - return stat; - } + P(wq->mutex); for (prev=item=wq->first; item; item=item->next) { if (item == work_item) { @@ -270,7 +261,7 @@ int workq_remove(workq_t *wq, workq_ele_t *work_item) if (wq->idle_workers > 0) { Dmsg0(1400, "Signal worker\n"); if ((stat = pthread_cond_broadcast(&wq->work)) != 0) { - pthread_mutex_unlock(&wq->mutex); + V(wq->mutex); return stat; } } else { @@ -278,12 +269,12 @@ int workq_remove(workq_t *wq, workq_ele_t *work_item) /* No idle threads so create a new one */ set_thread_concurrency(wq->max_workers + 1); if ((stat = pthread_create(&id, &wq->attr, workq_server, (void *)wq)) != 0) { - pthread_mutex_unlock(&wq->mutex); + V(wq->mutex); return stat; } wq->num_workers++; } - pthread_mutex_unlock(&wq->mutex); + V(wq->mutex); Dmsg0(1400, "Return workq_remove\n"); return stat; } @@ -302,9 +293,7 @@ void *workq_server(void *arg) int stat, timedout; Dmsg0(1400, "Start workq_server\n"); - if ((stat = pthread_mutex_lock(&wq->mutex)) != 0) { - return NULL; - } + P(wq->mutex); set_jcr_in_tsd(INVALID_JCR); for (;;) { @@ -328,7 +317,7 @@ void *workq_server(void *arg) * time that pthread_cond_timedwait() is called * so fake it out. */ - pthread_mutex_lock(&wq->mutex); + P(wq->mutex); stat = ETIMEDOUT; #else stat = pthread_cond_timedwait(&wq->work, &wq->mutex, &timeout); @@ -341,7 +330,7 @@ void *workq_server(void *arg) /* This shouldn't happen */ Dmsg0(1400, "This shouldn't happen\n"); wq->num_workers--; - pthread_mutex_unlock(&wq->mutex); + V(wq->mutex); return NULL; } } @@ -351,18 +340,14 @@ void *workq_server(void *arg) if (wq->last == we) { wq->last = NULL; } - if ((stat = pthread_mutex_unlock(&wq->mutex)) != 0) { - return NULL; - } + V(wq->mutex); /* Call user's routine here */ Dmsg0(1400, "Calling user engine.\n"); wq->engine(we->data); Dmsg0(1400, "Back from user engine.\n"); free(we); /* release work entry */ Dmsg0(1400, "relock mutex\n"); - if ((stat = pthread_mutex_lock(&wq->mutex)) != 0) { - return NULL; - } + P(wq->mutex); Dmsg0(1400, "Done lock mutex\n"); } /* @@ -376,7 +361,7 @@ void *workq_server(void *arg) pthread_cond_broadcast(&wq->work); } Dmsg0(1400, "Unlock mutex\n"); - pthread_mutex_unlock(&wq->mutex); + V(wq->mutex); Dmsg0(1400, "Return from workq_server\n"); return NULL; } @@ -395,7 +380,7 @@ void *workq_server(void *arg) } /* end of big for loop */ Dmsg0(1400, "unlock mutex\n"); - pthread_mutex_unlock(&wq->mutex); + V(wq->mutex); Dmsg0(1400, "End workq_server\n"); return NULL; } diff --git a/bacula/src/stored/stored.c b/bacula/src/stored/stored.c index 0d1c388b22..76f7b2e2cd 100644 --- a/bacula/src/stored/stored.c +++ b/bacula/src/stored/stored.c @@ -664,6 +664,7 @@ void terminate_stored(int sig) cleanup_crypto(); term_reservations_lock(); close_memory_pool(); + lmgr_cleanup_main(); sm_dump(false); /* dump orphaned buffers */ exit(sig); diff --git a/bacula/src/win32/dll/Makefile b/bacula/src/win32/dll/Makefile index 71ba6ce448..f3baea021b 100644 --- a/bacula/src/win32/dll/Makefile +++ b/bacula/src/win32/dll/Makefile @@ -91,7 +91,8 @@ LIB_OBJS = \ $(OBJDIR)/util.o \ $(OBJDIR)/var.o \ $(OBJDIR)/watchdog.o \ - $(OBJDIR)/workq.o + $(OBJDIR)/workq.o \ + $(OBJDIR)/lockmgr.o DLL_OBJS = \ $(COMPAT_OBJS) $(FIND_OBJS) $(LIB_OBJS) diff --git a/bacula/src/win32/dll/bacula.def b/bacula/src/win32/dll/bacula.def index e227389029..3ad0e9afc1 100644 --- a/bacula/src/win32/dll/bacula.def +++ b/bacula/src/win32/dll/bacula.def @@ -319,8 +319,6 @@ _Z15escape_filenamePKc _Z15read_state_filePcPKci _Z16write_state_filePcPKci _Z20make_unique_filenamePPciS_ -_Z2_pPP16pthread_mutex_t_ -_Z2_vPP16pthread_mutex_t_ _Z4dropPcS_ _Z5bfreePv _Z6bfgetsPciP6_iobuf @@ -755,6 +753,10 @@ _Z17register_watchdogP12s_watchdog_t _Z19unregister_watchdogP12s_watchdog_t watchdog_thread +; lockmgr.o +_Z6lmgr_pPP16pthread_mutex_t_ +_Z6lmgr_vPP16pthread_mutex_t_ + console_command DATA plugin_list DATA plugin_bopen DATA diff --git a/bacula/src/win32/libbac/Makefile b/bacula/src/win32/libbac/Makefile index 55c260be9f..d19adc35f4 100644 --- a/bacula/src/win32/libbac/Makefile +++ b/bacula/src/win32/libbac/Makefile @@ -78,7 +78,8 @@ LIB_OBJS = \ $(OBJDIR)/var.o \ $(OBJDIR)/watchdog.o \ $(OBJDIR)/winapi.o \ - $(OBJDIR)/workq.o + $(OBJDIR)/workq.o \ + $(OBJDIR)/lockmgr.o # $(LIBDIR)/events.o diff --git a/bacula/technotes-2.5 b/bacula/technotes-2.5 index e335f96545..47510f6a71 100644 --- a/bacula/technotes-2.5 +++ b/bacula/technotes-2.5 @@ -11,6 +11,9 @@ mixed priorities General: 09Dec08 +ebl Add a new lock manager that can detect deadlock situation + This new option is activated with a --enable-lockmgr configure + option. ebl Add new Director->MaxConsoleConnections directive 03Dec08 ebl Fix bacula-sd hanging after tape gets full + unload -- 2.39.5