2 Bacula® - The Network Backup Solution
4 Copyright (C) 2002-2007 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation plus additions
11 that are listed in the file LICENSE.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Bacula thread watchdog routine. General routine that
30 * allows setting a watchdog timer with a callback that is
31 * called when the timer goes off.
33 * Kern Sibbald, January MMII
40 /* Exported globals */
41 time_t watchdog_time = 0; /* this has granularity of SLEEP_TIME */
42 time_t watchdog_sleep_time = 60; /* examine things every 60 seconds */
45 static pthread_mutex_t timer_mutex = PTHREAD_MUTEX_INITIALIZER;
46 static pthread_cond_t timer = PTHREAD_COND_INITIALIZER;
48 /* Forward referenced functions */
49 extern "C" void *watchdog_thread(void *arg);
51 static void wd_lock();
52 static void wd_unlock();
55 static bool quit = false;;
56 static bool wd_is_init = false;
57 static brwlock_t lock; /* watchdog lock */
59 static pthread_t wd_tid;
60 static dlist *wd_queue;
61 static dlist *wd_inactive;
64 * Start watchdog thread
66 * Returns: 0 on success
69 int start_watchdog(void)
72 watchdog_t *dummy = NULL;
78 Dmsg0(800, "Initialising NicB-hacked watchdog thread\n");
79 watchdog_time = time(NULL);
81 if ((errstat=rwl_init(&lock)) != 0) {
82 Emsg1(M_ABORT, 0, _("Unable to initialize watchdog lock. ERR=%s\n"),
85 wd_queue = New(dlist(dummy, &dummy->link));
86 wd_inactive = New(dlist(dummy, &dummy->link));
89 if ((stat = pthread_create(&wd_tid, NULL, watchdog_thread, NULL)) != 0) {
96 * Wake watchdog timer thread so that it walks the
97 * queue and adjusts its wait time (or exits).
99 static void ping_watchdog()
102 pthread_cond_signal(&timer);
108 * Terminate the watchdog thread
110 * Returns: 0 on success
113 int stop_watchdog(void)
122 quit = true; /* notify watchdog thread to stop */
125 stat = pthread_join(wd_tid, NULL);
127 while (!wd_queue->empty()) {
128 void *item = wd_queue->first();
129 wd_queue->remove(item);
130 p = (watchdog_t *)item;
131 if (p->destructor != NULL) {
139 while (!wd_inactive->empty()) {
140 void *item = wd_inactive->first();
141 wd_inactive->remove(item);
142 p = (watchdog_t *)item;
143 if (p->destructor != NULL) {
156 watchdog_t *new_watchdog(void)
158 watchdog_t *wd = (watchdog_t *)malloc(sizeof(watchdog_t));
170 wd->destructor = NULL;
176 bool register_watchdog(watchdog_t *wd)
179 Emsg0(M_ABORT, 0, _("BUG! register_watchdog called before start_watchdog\n"));
181 if (wd->callback == NULL) {
182 Emsg1(M_ABORT, 0, _("BUG! Watchdog %p has NULL callback\n"), wd);
184 if (wd->interval == 0) {
185 Emsg1(M_ABORT, 0, _("BUG! Watchdog %p has zero interval\n"), wd);
189 wd->next_fire = watchdog_time + wd->interval;
190 wd_queue->append(wd);
191 Dmsg3(800, "Registered watchdog %p, interval %d%s\n",
192 wd, wd->interval, wd->one_shot ? " one shot" : "");
199 bool unregister_watchdog(watchdog_t *wd)
205 Emsg0(M_ABORT, 0, _("BUG! unregister_watchdog_unlocked called before start_watchdog\n"));
209 foreach_dlist(p, wd_queue) {
211 wd_queue->remove(wd);
212 Dmsg1(800, "Unregistered watchdog %p\n", wd);
218 foreach_dlist(p, wd_inactive) {
220 wd_inactive->remove(wd);
221 Dmsg1(800, "Unregistered inactive watchdog %p\n", wd);
227 Dmsg1(800, "Failed to unregister watchdog %p\n", wd);
236 * This is the thread that walks the watchdog queue
237 * and when a queue item fires, the callback is
238 * invoked. If it is a one shot, the queue item
239 * is moved to the inactive queue.
241 extern "C" void *watchdog_thread(void *arg)
243 struct timespec timeout;
248 Dmsg0(800, "NicB-reworked watchdog thread entered\n");
255 * NOTE. lock_jcr_chain removed, but the message below
256 * was left until we are sure there are no deadlocks.
258 * We lock the jcr chain here because a good number of the
259 * callback routines lock the jcr chain. We need to lock
260 * it here *before* the watchdog lock because the SD message
261 * thread first locks the jcr chain, then when closing the
262 * job locks the watchdog chain. If the two threads do not
263 * lock in the same order, we get a deadlock -- each holds
264 * the other's needed lock.
269 watchdog_time = time(NULL);
270 next_time = watchdog_time + watchdog_sleep_time;
271 foreach_dlist(p, wd_queue) {
272 if (p->next_fire <= watchdog_time) {
273 /* Run the callback */
274 Dmsg2(3400, "Watchdog callback p=0x%p fire=%d\n", p, p->next_fire);
277 /* Reschedule (or move to inactive list if it's a one-shot timer) */
280 wd_inactive->append(p);
283 p->next_fire = watchdog_time + p->interval;
286 if (p->next_fire <= next_time) {
287 next_time = p->next_fire;
293 * Wait sleep time or until someone wakes us
295 gettimeofday(&tv, &tz);
296 timeout.tv_nsec = tv.tv_usec * 1000;
297 timeout.tv_sec = tv.tv_sec + next_time - time(NULL);
298 while (timeout.tv_nsec >= 1000000000) {
299 timeout.tv_nsec -= 1000000000;
303 Dmsg1(1900, "pthread_cond_timedwait %d\n", timeout.tv_sec - tv.tv_sec);
304 /* Note, this unlocks mutex during the sleep */
306 pthread_cond_timedwait(&timer, &timer_mutex, &timeout);
310 Dmsg0(800, "NicB-reworked watchdog thread exited\n");
315 * Watchdog lock, this can be called multiple times by the same
316 * thread without blocking, but must be unlocked the number of
317 * times it was locked.
319 static void wd_lock()
322 if ((errstat=rwl_writelock(&lock)) != 0) {
323 Emsg1(M_ABORT, 0, _("rwl_writelock failure. ERR=%s\n"),
329 * Unlock the watchdog. This can be called multiple times by the
330 * same thread up to the number of times that thread called
333 static void wd_unlock()
336 if ((errstat=rwl_writeunlock(&lock)) != 0) {
337 Emsg1(M_ABORT, 0, _("rwl_writeunlock failure. ERR=%s\n"),