From: Eric Bollengier Date: Thu, 6 Nov 2008 13:31:14 +0000 (+0000) Subject: ebl Add code to get more information after a violent signal. X-Git-Tag: Release-3.0.0~633 X-Git-Url: https://git.sur5r.net/?a=commitdiff_plain;h=e4718a90e81561baa0cb1fe628dee870979435bf;p=bacula%2Fbacula ebl Add code to get more information after a violent signal. git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@7990 91ce42f0-d328-0410-95d8-f526ca767f89 --- diff --git a/bacula/src/cats/sql.c b/bacula/src/cats/sql.c index 052d37af49..41b4b7fece 100644 --- a/bacula/src/cats/sql.c +++ b/bacula/src/cats/sql.c @@ -311,6 +311,31 @@ char *db_strerror(B_DB *mdb) return mdb->errmsg; } +static void update_lock_dbg(B_DB *mdb) { + if (mdb->allow_transactions) { /* batch connection */ + return; + } + if (_db_lock_recurse_count && !pthread_equal(_db_lock_threadid, pthread_self())) { + Dmsg2(1, "ERROR: not the same threadif %p != %p\n", _db_lock_threadid, pthread_self()); + } + _db_lock_recurse_count++; + _db_lock_time = (utime_t) time(NULL); + _db_lock_threadid = pthread_self(); +} + +static void update_unlock_dbg(B_DB *mdb) { + if (mdb->allow_transactions) { /* batch connection */ + return; + } + if (!pthread_equal(_db_lock_threadid, pthread_self())) { + Dmsg2(1, "ERROR: not the same threadid %p != %p", _db_lock_threadid, pthread_self()); + } + _db_lock_recurse_count--; + if (!_db_lock_recurse_count) { + memset(&_db_lock_threadid, 0, sizeof(_db_lock_threadid)); + } +} + /* * Lock database, this can be called multiple times by the same * thread without blocking, but must be unlocked the number of @@ -324,6 +349,7 @@ void _db_lock(const char *file, int line, B_DB *mdb) e_msg(file, line, M_FATAL, 0, "rwl_writelock failure. stat=%d: ERR=%s\n", errstat, be.bstrerror(errstat)); } + update_lock_dbg(mdb); } /* @@ -334,6 +360,7 @@ void _db_lock(const char *file, int line, B_DB *mdb) void _db_unlock(const char *file, int line, B_DB *mdb) { int errstat; + update_unlock_dbg(mdb); if ((errstat=rwl_writeunlock(&mdb->lock)) != 0) { berrno be; e_msg(file, line, M_FATAL, 0, "rwl_writeunlock failure. stat=%d: ERR=%s\n", diff --git a/bacula/src/lib/jcr.c b/bacula/src/lib/jcr.c index 4b8b9d110a..6228e36690 100644 --- a/bacula/src/lib/jcr.c +++ b/bacula/src/lib/jcr.c @@ -836,6 +836,43 @@ static void unlock_jcr_chain() V(jcr_lock); } +/* + * This function should be used only after receiving a violent signal + * We walk through the JCR chain without doing any lock, bacula should + * not be running. + */ +void print_jcr_dbg() +{ + char buf1[128], buf2[128], buf3[128], buf4[128]; + + if (!jcrs) { + return; + } + + fprintf(stderr, "Attempt to dump current JCRs\n"); + + for (JCR *jcr = (JCR *)jcrs->first(); jcr ; jcr = (JCR *)jcrs->next(jcr)) { + if (!jcr) { /* protect us against something ? */ + continue; + } + + fprintf(stderr, "JCR=%p JobId=%i name=%s JobStatus=%c\n", + jcr, jcr->JobId, jcr->Job, jcr->JobStatus); + fprintf(stderr, "\tuse_count=%i threadid=0x%x\n", + jcr->use_count(), (int)jcr->my_thread_id); + fprintf(stderr, "\tJobType=%c JobLevel=%c\n", + jcr->get_JobType(), jcr->get_JobLevel()); + bstrftime(buf1, sizeof(buf1), jcr->sched_time); + bstrftime(buf2, sizeof(buf2), jcr->start_time); + bstrftime(buf3, sizeof(buf3), jcr->end_time); + bstrftime(buf4, sizeof(buf4), jcr->wait_time); + fprintf(stderr, "\tsched_time=%s start_time=%s\n\tend_time=%s wait_time=%s\n", + buf1, buf2, buf3, buf4); + fprintf(stderr, "\tdequeing=%i\n", jcr->dequeuing); + fprintf(stderr, "\tdb=%p db_batch=%p batch_started=%i\n", + jcr->db, jcr->db_batch, jcr->batch_started); + } +} /* * Start walk of jcr chain diff --git a/bacula/src/lib/message.c b/bacula/src/lib/message.c index 85ba0b6d05..a7141d3387 100644 --- a/bacula/src/lib/message.c +++ b/bacula/src/lib/message.c @@ -62,6 +62,23 @@ char con_fname[500]; /* Console filename */ FILE *con_fd = NULL; /* Console file descriptor */ brwlock_t con_lock; /* Console lock structure */ + +/* + * Global variables to get information about lock/unlock db access + */ +utime_t _db_lock_time = 0; +int _db_lock_recurse_count = 0; +pthread_t _db_lock_threadid; + +void print_lock_dbg() +{ + char buf[128]; + bstrutime(buf, sizeof(buf), _db_lock_time); + + fprintf(stderr, "lock info: recurse_count=%i threadid=0x%x time=%s\n", + _db_lock_recurse_count, (int)_db_lock_threadid, buf); +} + /* Forward referenced functions */ /* Imported functions */ diff --git a/bacula/src/lib/message.h b/bacula/src/lib/message.h index 3ac586a5e8..5917de5af7 100644 --- a/bacula/src/lib/message.h +++ b/bacula/src/lib/message.h @@ -163,3 +163,12 @@ extern DLL_IMP_EXP time_t daemon_start_time; extern DLL_IMP_EXP int console_msg_pending; extern DLL_IMP_EXP FILE * con_fd; /* Console file descriptor */ extern DLL_IMP_EXP brwlock_t con_lock; /* Console lock structure */ + +/* Used to debug database lock + * which job takes the main DB access + */ +void print_lock_dbg(); +extern DLL_IMP_EXP utime_t _db_lock_time; +extern DLL_IMP_EXP int _db_lock_recurse_count; +extern DLL_IMP_EXP pthread_t _db_lock_threadid; + diff --git a/bacula/src/lib/signal.c b/bacula/src/lib/signal.c index 51fe13a83d..7840af6901 100644 --- a/bacula/src/lib/signal.c +++ b/bacula/src/lib/signal.c @@ -53,6 +53,7 @@ extern char my_name[]; extern char *exepath; extern char *exename; +extern void print_jcr_dbg(); static const char *sig_names[BA_NSIG+1]; @@ -156,6 +157,7 @@ extern "C" void signal_handler(int sig) default: /* parent */ break; } + /* Parent continue here, waiting for child */ sigdefault.sa_flags = 0; sigdefault.sa_handler = SIG_DFL; @@ -165,8 +167,11 @@ extern "C" void signal_handler(int sig) if (pid > 0) { Dmsg0(500, "Doing waitpid\n"); waitpid(pid, NULL, 0); /* wait for child to produce dump */ - fprintf(stderr, _("Traceback complete, attempting cleanup ...\n")); Dmsg0(500, "Done waitpid\n"); + fprintf(stderr, _("Traceback complete, attempting cleanup ...\n")); + /* print information about the current state into stderr */ + print_lock_dbg(); + print_jcr_dbg(); exit_handler(sig); /* clean up if possible */ Dmsg0(500, "Done exit_handler\n"); } else { diff --git a/bacula/technotes-2.5 b/bacula/technotes-2.5 index ccf0b64ec3..6746fb782d 100644 --- a/bacula/technotes-2.5 +++ b/bacula/technotes-2.5 @@ -10,6 +10,8 @@ filepattern (restore with regex in bsr) mixed priorities General: +06Nov08 +ebl Add code to get more information after a violent signal. 05Nov08 ebl Apply Bastian's patch that add spooldata=yes|no option to run command.