2 This patch fixes two problems: 1. A deadlock between the job
3 queue scheduler and the watchdog thread. 2. A bad initialization
4 of the watchdog queue which could cause memory corruption. It also
5 reduces the watchdog granularity from 1 second to 10 seconds.
7 Apply the patch to Bacula 1.34.6 (probably any 1.34.x version) with:
10 patch -p0 <1.34.6-deadlock.patch
15 Index: src/dird/jobq.c
16 ===================================================================
17 RCS file: /cvsroot/bacula/bacula/src/dird/jobq.c,v
18 retrieving revision 1.19
20 --- src/dird/jobq.c 1 Jun 2004 20:10:04 -0000 1.19
21 +++ src/dird/jobq.c 9 Aug 2004 06:08:08 -0000
25 Dmsg1(300, "====== Termination job=%d\n", jcr->JobId);
26 + V(jq->mutex); /* release internal job queue lock */
28 free(je); /* release job entry */
29 + P(jq->mutex); /* acquire internal job queue lock */
32 * If any job in the wait queue can be run,
33 Index: src/lib/watchdog.c
34 ===================================================================
35 RCS file: /cvsroot/bacula/bacula/src/lib/watchdog.c,v
36 retrieving revision 1.27
37 diff -u -r1.27 watchdog.c
38 --- src/lib/watchdog.c 1 Apr 2004 16:37:01 -0000 1.27
39 +++ src/lib/watchdog.c 9 Aug 2004 06:08:08 -0000
42 /* Exported globals */
43 time_t watchdog_time = 0; /* this has granularity of SLEEP_TIME */
44 +time_t watchdog_sleep_time = 10; /* examine things every 10 seconds */
46 -#define SLEEP_TIME 1 /* examine things every second */
48 /* Forward referenced functions */
49 -static void *watchdog_thread(void *arg);
50 +void *watchdog_thread(void *arg);
52 static void wd_lock();
53 static void wd_unlock();
56 Emsg1(M_ABORT, 0, _("Unable to initialize watchdog lock. ERR=%s\n"),
59 - wd_queue = new dlist(wd_queue, &dummy->link);
60 - wd_inactive = new dlist(wd_inactive, &dummy->link);
61 + wd_queue = new dlist(dummy, &dummy->link);
62 + wd_inactive = new dlist(dummy, &dummy->link);
64 if ((stat = pthread_create(&wd_tid, NULL, watchdog_thread, NULL)) != 0) {
70 -static void *watchdog_thread(void *arg)
71 +void *watchdog_thread(void *arg)
73 Dmsg0(400, "NicB-reworked watchdog thread entered\n");
79 - bmicrosleep(SLEEP_TIME, 0);
80 + bmicrosleep(watchdog_sleep_time, 0);
83 Dmsg0(400, "NicB-reworked watchdog thread exited\n");