2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2017 Kern Sibbald
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
17 Bacula(R) is a registered trademark of Kern Sibbald.
20 * Bacula thread watchdog routine. General routine that
21 * allows setting a watchdog timer with a callback that is
22 * called when the timer goes off.
24 * Kern Sibbald, January MMII
31 /* Exported globals */
32 utime_t watchdog_time = 0; /* this has granularity of SLEEP_TIME */
33 utime_t watchdog_sleep_time = 60; /* examine things every 60 seconds */
36 static pthread_mutex_t timer_mutex = PTHREAD_MUTEX_INITIALIZER;
37 static pthread_cond_t timer = PTHREAD_COND_INITIALIZER;
39 /* Forward referenced functions */
40 extern "C" void *watchdog_thread(void *arg);
42 static void wd_lock();
43 static void wd_unlock();
46 static bool quit = false;
47 static bool wd_is_init = false;
48 static brwlock_t lock; /* watchdog lock */
50 static pthread_t wd_tid;
51 static dlist *wd_queue;
52 static dlist *wd_inactive;
55 * Returns: 0 if the current thread is NOT the watchdog
56 * 1 if the current thread is the watchdog
60 if (wd_is_init && pthread_equal(pthread_self(), wd_tid)) {
68 * Start watchdog thread
70 * Returns: 0 on success
73 int start_watchdog(void)
76 watchdog_t *dummy = NULL;
82 Dmsg0(800, "Initialising NicB-hacked watchdog thread\n");
83 watchdog_time = time(NULL);
85 if ((errstat=rwl_init(&lock)) != 0) {
87 Jmsg1(NULL, M_ABORT, 0, _("Unable to initialize watchdog lock. ERR=%s\n"),
88 be.bstrerror(errstat));
90 wd_queue = New(dlist(dummy, &dummy->link));
91 wd_inactive = New(dlist(dummy, &dummy->link));
94 if ((stat = pthread_create(&wd_tid, NULL, watchdog_thread, NULL)) != 0) {
101 * Wake watchdog timer thread so that it walks the
102 * queue and adjusts its wait time (or exits).
104 static void ping_watchdog()
107 pthread_cond_signal(&timer);
113 * Terminate the watchdog thread
115 * Returns: 0 on success
118 int stop_watchdog(void)
127 quit = true; /* notify watchdog thread to stop */
130 stat = pthread_join(wd_tid, NULL);
132 while (!wd_queue->empty()) {
133 void *item = wd_queue->first();
134 wd_queue->remove(item);
135 p = (watchdog_t *)item;
136 if (p->destructor != NULL) {
144 while (!wd_inactive->empty()) {
145 void *item = wd_inactive->first();
146 wd_inactive->remove(item);
147 p = (watchdog_t *)item;
148 if (p->destructor != NULL) {
161 watchdog_t *new_watchdog(void)
163 watchdog_t *wd = (watchdog_t *)malloc(sizeof(watchdog_t));
175 wd->destructor = NULL;
181 bool register_watchdog(watchdog_t *wd)
184 Jmsg0(NULL, M_ABORT, 0, _("BUG! register_watchdog called before start_watchdog\n"));
186 if (wd->callback == NULL) {
187 Jmsg1(NULL, M_ABORT, 0, _("BUG! Watchdog %p has NULL callback\n"), wd);
189 if (wd->interval == 0) {
190 Jmsg1(NULL, M_ABORT, 0, _("BUG! Watchdog %p has zero interval\n"), wd);
194 wd->next_fire = watchdog_time + wd->interval;
195 wd_queue->append(wd);
196 Dmsg3(800, "Registered watchdog %p, interval %d%s\n",
197 wd, wd->interval, wd->one_shot ? " one shot" : "");
204 bool unregister_watchdog(watchdog_t *wd)
210 Jmsg0(NULL, M_ABORT, 0, _("BUG! unregister_watchdog_unlocked called before start_watchdog\n"));
214 foreach_dlist(p, wd_queue) {
216 wd_queue->remove(wd);
217 Dmsg1(800, "Unregistered watchdog %p\n", wd);
223 foreach_dlist(p, wd_inactive) {
225 wd_inactive->remove(wd);
226 Dmsg1(800, "Unregistered inactive watchdog %p\n", wd);
232 Dmsg1(800, "Failed to unregister watchdog %p\n", wd);
241 * This is the thread that walks the watchdog queue
242 * and when a queue item fires, the callback is
243 * invoked. If it is a one shot, the queue item
244 * is moved to the inactive queue.
246 extern "C" void *watchdog_thread(void *arg)
248 struct timespec timeout;
253 set_jcr_in_tsd(INVALID_JCR);
254 Dmsg0(800, "NicB-reworked watchdog thread entered\n");
262 watchdog_time = time(NULL);
263 next_time = watchdog_time + watchdog_sleep_time;
264 foreach_dlist(p, wd_queue) {
265 if (p->next_fire <= watchdog_time) {
266 /* Run the callback */
267 Dmsg2(3400, "Watchdog callback p=0x%p fire=%d\n", p, p->next_fire);
270 /* Reschedule (or move to inactive list if it's a one-shot timer) */
273 wd_inactive->append(p);
276 p->next_fire = watchdog_time + p->interval;
279 if (p->next_fire <= next_time) {
280 next_time = p->next_fire;
286 * Wait sleep time or until someone wakes us
288 gettimeofday(&tv, &tz);
289 timeout.tv_nsec = tv.tv_usec * 1000;
290 timeout.tv_sec = tv.tv_sec + next_time - time(NULL);
291 while (timeout.tv_nsec >= 1000000000) {
292 timeout.tv_nsec -= 1000000000;
296 Dmsg1(1900, "pthread_cond_timedwait %d\n", timeout.tv_sec - tv.tv_sec);
297 /* Note, this unlocks mutex during the sleep */
299 pthread_cond_timedwait(&timer, &timer_mutex, &timeout);
303 Dmsg0(800, "NicB-reworked watchdog thread exited\n");
308 * Watchdog lock, this can be called multiple times by the same
309 * thread without blocking, but must be unlocked the number of
310 * times it was locked.
312 static void wd_lock()
315 if ((errstat=rwl_writelock(&lock)) != 0) {
317 Jmsg1(NULL, M_ABORT, 0, _("rwl_writelock failure. ERR=%s\n"),
318 be.bstrerror(errstat));
323 * Unlock the watchdog. This can be called multiple times by the
324 * same thread up to the number of times that thread called
327 static void wd_unlock()
330 if ((errstat=rwl_writeunlock(&lock)) != 0) {
332 Jmsg1(NULL, M_ABORT, 0, _("rwl_writeunlock failure. ERR=%s\n"),
333 be.bstrerror(errstat));