From 306bc282befefe661648ed776d87fe6b5eb58916 Mon Sep 17 00:00:00 2001 From: Eric Bollengier Date: Sun, 9 Nov 2008 21:24:59 +0000 Subject: [PATCH] ebl remove lock debug git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@8022 91ce42f0-d328-0410-95d8-f526ca767f89 --- bacula/patches/testing/debug_lock.patch | 213 ++++++++---------------- 1 file changed, 71 insertions(+), 142 deletions(-) diff --git a/bacula/patches/testing/debug_lock.patch b/bacula/patches/testing/debug_lock.patch index 477154d3a4..fae93331aa 100644 --- a/bacula/patches/testing/debug_lock.patch +++ b/bacula/patches/testing/debug_lock.patch @@ -1,170 +1,99 @@ Index: src/cats/sql.c =================================================================== ---- src/cats/sql.c (révision 7981) +--- src/cats/sql.c (révision 8008) +++ src/cats/sql.c (copie de travail) -@@ -311,6 +311,36 @@ +@@ -311,33 +311,6 @@ return mdb->errmsg; } -+static void update_lock_dbg(B_DB *mdb) { -+ if (mdb->allow_transactions) { /* batch connection */ -+ return; -+ } -+ if (_db_lock_recurse_count && !pthread_equal(_db_lock_threadid, pthread_self())) { -+ Dmsg2(1, "ERROR: not the same threadif %p != %p\n", _db_lock_threadid, pthread_self()); -+ } -+// if (_db_lock_jcr && _db_lock_jcr != mdb->jcr) { -+// Dmsg2(1, "ERROR: not the same jcr %p != %p", _db_lock_jcr, mdb->jcr); -+// } -+ _db_lock_recurse_count++; -+// _db_lock_jobid = mdb->jcr->JobId; -+ _db_lock_time = (utime_t) time(NULL); -+ _db_lock_threadid = pthread_self(); -+} -+ -+static void update_unlock_dbg(B_DB *mdb) { -+ if (mdb->allow_transactions) { /* batch connection */ -+ return; -+ } -+ if (!pthread_equal(_db_lock_threadid, pthread_self())) { -+ Dmsg2(1, "ERROR: not the same threadid %p != %p", _db_lock_threadid, pthread_self()); -+ } -+ _db_lock_recurse_count--; -+ if (!_db_lock_recurse_count) { -+ _db_lock_jobid = 0; -+ memset(&_db_lock_threadid, 0, sizeof(_db_lock_threadid)); -+ } -+} -+ +-static void update_lock_dbg(B_DB *mdb) +-{ +- if (mdb->allow_transactions) { /* batch connection */ +- return; +- } +- if (_db_lock_recurse_count && !pthread_equal(_db_lock_threadid, pthread_self())) { +- Dmsg2(1, "ERROR: not the same threadif %p != %p\n", _db_lock_threadid, pthread_self()); +- } +- _db_lock_recurse_count++; +- _db_lock_time = (utime_t) time(NULL); +- _db_lock_threadid = pthread_self(); +-} +- +-static void update_unlock_dbg(B_DB *mdb) +-{ +- if (mdb->allow_transactions) { /* batch connection */ +- return; +- } +- if (!pthread_equal(_db_lock_threadid, pthread_self())) { +- Dmsg2(1, "ERROR: not the same threadid %p != %p", _db_lock_threadid, pthread_self()); +- } +- _db_lock_recurse_count--; +- if (!_db_lock_recurse_count) { +- memset(&_db_lock_threadid, 0, sizeof(_db_lock_threadid)); +- } +-} +- /* * Lock database, this can be called multiple times by the same * thread without blocking, but must be unlocked the number of -@@ -324,6 +354,7 @@ +@@ -351,7 +324,6 @@ e_msg(file, line, M_FATAL, 0, "rwl_writelock failure. stat=%d: ERR=%s\n", errstat, be.bstrerror(errstat)); } -+ update_lock_dbg(mdb); +- update_lock_dbg(mdb); } /* -@@ -334,6 +365,7 @@ +@@ -362,7 +334,6 @@ void _db_unlock(const char *file, int line, B_DB *mdb) { int errstat; -+ update_unlock_dbg(mdb); +- update_unlock_dbg(mdb); if ((errstat=rwl_writeunlock(&mdb->lock)) != 0) { berrno be; e_msg(file, line, M_FATAL, 0, "rwl_writeunlock failure. stat=%d: ERR=%s\n", -Index: src/lib/message.c +Index: src/lib/protos.h =================================================================== ---- src/lib/message.c (révision 7981) -+++ src/lib/message.c (copie de travail) -@@ -62,6 +62,25 @@ - FILE *con_fd = NULL; /* Console file descriptor */ - brwlock_t con_lock; /* Console lock structure */ +--- src/lib/protos.h (révision 8005) ++++ src/lib/protos.h (copie de travail) +@@ -252,13 +252,6 @@ + void init_signals (void terminate(int sig)); + void init_stack_dump (void); -+ -+/* -+ * Global variables to get information about lock/unlock db access -+ */ -+JCR *_db_lock_jcr = NULL; -+uint32_t _db_lock_jobid = 0; -+utime_t _db_lock_time = 0; -+int _db_lock_recurse_count = 0; -+pthread_t _db_lock_threadid; -+ -+void print_lock_dbg() -+{ -+ char buf[128]; -+ bstrutime(buf, sizeof(buf), _db_lock_time); -+ -+ fprintf(stderr, "lock info: recurse_count=%i jobid=%i threadid=%i time=%s\n", -+ _db_lock_recurse_count, _db_lock_jobid, (int)_db_lock_threadid, buf); -+} -+ - /* Forward referenced functions */ - - /* Imported functions */ -Index: src/lib/message.h -=================================================================== ---- src/lib/message.h (révision 7981) -+++ src/lib/message.h (copie de travail) -@@ -163,3 +163,14 @@ - extern DLL_IMP_EXP int console_msg_pending; - extern DLL_IMP_EXP FILE * con_fd; /* Console file descriptor */ - extern DLL_IMP_EXP brwlock_t con_lock; /* Console lock structure */ -+ -+/* Used to debug database lock -+ * which job takes the main DB access -+ */ -+void print_lock_dbg(); -+extern DLL_IMP_EXP JCR * _db_lock_jcr; -+extern DLL_IMP_EXP uint32_t _db_lock_jobid; -+extern DLL_IMP_EXP utime_t _db_lock_time; -+extern DLL_IMP_EXP int _db_lock_recurse_count; -+extern DLL_IMP_EXP pthread_t _db_lock_threadid; -+ -Index: src/lib/jcr.c -=================================================================== ---- src/lib/jcr.c (révision 7981) -+++ src/lib/jcr.c (copie de travail) -@@ -836,7 +836,37 @@ - V(jcr_lock); - } - -+/* -+ * This function should be used only after receiving a violent signal -+ * We walk through the JCR chain without doing any lock, bacula should -+ * not be running. -+ */ -+void print_jcr_dbg() -+{ -+ char buf1[128], buf2[128], buf3[128], buf4[128]; -+ FILE *fp = stderr; - -+ if (!jcrs) -+ return; -+ -+ for (JCR *jcr = (JCR *)jcrs->first(); jcr ; jcr = (JCR *)jcrs->next(jcr)) { -+ if (!jcr) { -+ continue; -+ } -+ -+ fprintf(fp, "JCR=%p JobId=%i name=%s JobStatus=%c\n", jcr, jcr->JobId, jcr->Job, jcr->JobStatus); -+ fprintf(fp, "\tuse_count=%i threadid=%i\n", jcr->use_count(), (int)jcr->my_thread_id); -+ fprintf(fp, "\tJobType=%i JobLevel=%i\n", jcr->get_JobType(), jcr->get_JobLevel()); -+ bstrftime(buf1, sizeof(buf1), jcr->sched_time); -+ bstrftime(buf2, sizeof(buf2), jcr->start_time); -+ bstrftime(buf3, sizeof(buf3), jcr->end_time); -+ bstrftime(buf4, sizeof(buf4), jcr->wait_time); -+ fprintf(fp, "\tsched_time=%s start_time=%s end_time=%s wait_time=%s\n", buf1, buf2, buf3, buf4); -+ fprintf(fp, "\tdequeing=%i\n", jcr->dequeuing); -+ fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n", jcr->db, jcr->db_batch, jcr->batch_started); -+ } -+} -+ - /* - * Start walk of jcr chain - * The proper way to walk the jcr chain is: +-/* Used to debug database lock +- * which job takes the main DB access +- */ +-extern DLL_IMP_EXP utime_t _db_lock_time; +-extern DLL_IMP_EXP int _db_lock_recurse_count; +-extern DLL_IMP_EXP pthread_t _db_lock_threadid; +- + /* scan.c */ + void strip_leading_space (char *str); + void strip_trailing_junk (char *str); Index: src/lib/signal.c =================================================================== ---- src/lib/signal.c (révision 7981) +--- src/lib/signal.c (révision 8005) +++ src/lib/signal.c (copie de travail) -@@ -53,6 +53,7 @@ - extern char my_name[]; - extern char *exepath; - extern char *exename; -+extern void print_jcr_dbg(); - - static const char *sig_names[BA_NSIG+1]; +@@ -72,22 +72,6 @@ + } + } -@@ -104,6 +105,9 @@ - pid_t pid; - int exelen = strlen(exepath); +-/* +- * Global variables to get information about lock/unlock db access +- */ +-utime_t _db_lock_time = 0; +-int _db_lock_recurse_count = 0; +-pthread_t _db_lock_threadid; +- +-static void print_lock_dbg(FILE *fp) +-{ +- char buf[128]; +- bstrutime(buf, sizeof(buf), _db_lock_time); +- +- fprintf(fp, "lock info: recurse_count=%i threadid=0x%x time=%s\n", +- _db_lock_recurse_count, (int)_db_lock_threadid, buf); +-} +- + /* defined in jcr.c */ + extern void _print_jcr_dbg(FILE *fp); -+ print_lock_dbg(); -+ print_jcr_dbg(); -+ - fprintf(stderr, _("Kaboom! %s, %s got signal %d - %s. Attempting traceback.\n"), - exename, my_name, sig, get_signal_name(sig)); - fprintf(stderr, _("Kaboom! exepath=%s\n"), exepath); -- 2.39.5