*
*/
/*
- Copyright (C) 2000-2003 Kern Sibbald and John Walker
+ Copyright (C) 2000-2004 Kern Sibbald and John Walker
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
#include "bacula.h"
#include "jcr.h"
-/* This breaks Kern's #include rules, but I don't want to put it into bacula.h
- * until it has been discussed with him */
-#include "bsd_queue.h"
-
/* Exported globals */
-time_t watchdog_time; /* this has granularity of SLEEP_TIME */
+time_t watchdog_time = 0; /* this has granularity of SLEEP_TIME */
#define SLEEP_TIME 1 /* examine things every second */
/* Forward referenced functions */
static void *watchdog_thread(void *arg);
+static void wd_lock();
+static void wd_unlock();
/* Static globals */
-static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t timer = PTHREAD_COND_INITIALIZER;
-static bool quit;
+static bool quit = false;;
static bool wd_is_init = false;
+static brwlock_t lock; /* watchdog lock */
-/* Forward referenced callback functions */
static pthread_t wd_tid;
-
-/* Static globals */
-static TAILQ_HEAD(/* no struct */, s_watchdog_t) wd_queue =
- TAILQ_HEAD_INITIALIZER(wd_queue);
-static TAILQ_HEAD(/* no struct */, s_watchdog_t) wd_inactive =
- TAILQ_HEAD_INITIALIZER(wd_inactive);
+static dlist *wd_queue;
+static dlist *wd_inactive;
/*
* Start watchdog thread
int start_watchdog(void)
{
int stat;
+ watchdog_t *dummy = NULL;
+ int errstat;
+ if (wd_is_init) {
+ return 0;
+ }
Dmsg0(200, "Initialising NicB-hacked watchdog thread\n");
watchdog_time = time(NULL);
- quit = false;
+
+ if ((errstat=rwl_init(&lock)) != 0) {
+ Emsg1(M_ABORT, 0, _("Unable to initialize watchdog lock. ERR=%s\n"),
+ strerror(errstat));
+ }
+ wd_queue = new dlist(wd_queue, &dummy->link);
+ wd_inactive = new dlist(wd_inactive, &dummy->link);
+
if ((stat = pthread_create(&wd_tid, NULL, watchdog_thread, NULL)) != 0) {
return stat;
}
int stop_watchdog(void)
{
int stat;
- watchdog_t *p, *n;
+ watchdog_t *p;
if (!wd_is_init) {
return 0;
}
- Dmsg0(200, "Sending stop signal to NicB-hacked watchdog thread\n");
- P(mutex);
- quit = true;
- stat = pthread_cond_signal(&timer);
- V(mutex);
-
+ quit = true; /* notify watchdog thread to stop */
wd_is_init = false;
stat = pthread_join(wd_tid, NULL);
- TAILQ_FOREACH_SAFE(p, &wd_queue, qe, n) {
- TAILQ_REMOVE(&wd_queue, p, qe);
+ foreach_dlist(p, wd_queue) {
if (p->destructor != NULL) {
p->destructor(p);
}
free(p);
}
+ delete wd_queue;
+ wd_queue = NULL;
- TAILQ_FOREACH_SAFE(p, &wd_inactive, qe, n) {
- TAILQ_REMOVE(&wd_inactive, p, qe);
+ foreach_dlist(p, wd_inactive) {
if (p->destructor != NULL) {
p->destructor(p);
}
free(p);
}
+ delete wd_inactive;
+ wd_inactive = NULL;
+ rwl_destroy(&lock);
+
return stat;
}
-watchdog_t *watchdog_new(void)
+watchdog_t *new_watchdog(void)
{
- watchdog_t *wd = (watchdog_t *) malloc(sizeof(watchdog_t));
+ watchdog_t *wd = (watchdog_t *)malloc(sizeof(watchdog_t));
if (!wd_is_init) {
- Emsg0(M_ABORT, 0, "BUG! watchdog_new called before start_watchdog\n");
+ start_watchdog();
}
if (wd == NULL) {
Emsg1(M_ABORT, 0, "BUG! Watchdog %p has zero interval\n", wd);
}
- P(mutex);
+ wd_lock();
wd->next_fire = watchdog_time + wd->interval;
- TAILQ_INSERT_TAIL(&wd_queue, wd, qe);
+ wd_queue->append(wd);
Dmsg3(200, "Registered watchdog %p, interval %d%s\n",
wd, wd->interval, wd->one_shot ? " one shot" : "");
- V(mutex);
+ wd_unlock();
return false;
}
bool unregister_watchdog_unlocked(watchdog_t *wd)
{
- watchdog_t *p, *n;
+ watchdog_t *p;
if (!wd_is_init) {
Emsg0(M_ABORT, 0, "BUG! unregister_watchdog_unlocked called before start_watchdog\n");
}
- TAILQ_FOREACH_SAFE(p, &wd_queue, qe, n) {
+ foreach_dlist(p, wd_queue) {
if (wd == p) {
- TAILQ_REMOVE(&wd_queue, wd, qe);
+ wd_queue->remove(wd);
Dmsg1(200, "Unregistered watchdog %p\n", wd);
return true;
}
}
- TAILQ_FOREACH_SAFE(p, &wd_inactive, qe, n) {
+ foreach_dlist(p, wd_inactive) {
if (wd == p) {
- TAILQ_REMOVE(&wd_inactive, wd, qe);
+ wd_inactive->remove(wd);
Dmsg1(200, "Unregistered inactive watchdog %p\n", wd);
return true;
}
}
Dmsg1(200, "Failed to unregister watchdog %p\n", wd);
-
return false;
}
Emsg0(M_ABORT, 0, "BUG! unregister_watchdog called before start_watchdog\n");
}
- P(mutex);
+ wd_lock();
ret = unregister_watchdog_unlocked(wd);
- V(mutex);
+ wd_unlock();
return ret;
}
{
Dmsg0(200, "NicB-reworked watchdog thread entered\n");
- while (true) {
- watchdog_t *p, *n;
-
- P(mutex);
- if (quit) {
- V(mutex);
- break;
- }
-
+ while (!quit) {
+ watchdog_t *p;
+
+ /*
+ * We lock the jcr chain here because a good number of the
+ * callback routines lock the jcr chain. We need to lock
+ * it here *before* the watchdog lock because the SD message
+ * thread first locks the jcr chain, then when closing the
+ * job locks the watchdog chain. If the two thread do not
+ * lock in the same order, we get a deadlock -- each holds
+ * the other's needed lock.
+ */
+ lock_jcr_chain();
+ wd_lock();
watchdog_time = time(NULL);
- TAILQ_FOREACH_SAFE(p, &wd_queue, qe, n) {
+ foreach_dlist(p, wd_queue) {
if (p->next_fire < watchdog_time) {
/* Run the callback */
p->callback(p);
/* Reschedule (or move to inactive list if it's a one-shot timer) */
if (p->one_shot) {
- TAILQ_REMOVE(&wd_queue, p, qe);
- TAILQ_INSERT_TAIL(&wd_inactive, p, qe);
+ wd_queue->remove(p);
+ wd_inactive->append(p);
} else {
p->next_fire = watchdog_time + p->interval;
}
}
}
- V(mutex);
+ wd_unlock();
+ unlock_jcr_chain();
bmicrosleep(SLEEP_TIME, 0);
}
Dmsg0(200, "NicB-reworked watchdog thread exited\n");
-
return NULL;
}
+
+/*
+ * Watchdog lock, this can be called multiple times by the same
+ * thread without blocking, but must be unlocked the number of
+ * times it was locked.
+ */
+static void wd_lock()
+{
+ int errstat;
+ if ((errstat=rwl_writelock(&lock)) != 0) {
+ Emsg1(M_ABORT, 0, "rwl_writelock failure. ERR=%s\n",
+ strerror(errstat));
+ }
+}
+
+/*
+ * Unlock the watchdog. This can be called multiple times by the
+ * same thread up to the number of times that thread called
+ * wd_ lock()/
+ */
+static void wd_unlock()
+{
+ int errstat;
+ if ((errstat=rwl_writeunlock(&lock)) != 0) {
+ Emsg1(M_ABORT, 0, "rwl_writeunlock failure. ERR=%s\n",
+ strerror(errstat));
+ }
+}