/*
- * Bacula thread watchdog routine. General routine that
- * allows setting a watchdog timer with a callback that is
- * called when the timer goes off.
- *
- * Kern Sibbald, January MMII
- *
- */
-/*
- Copyright (C) 2000-2005 Kern Sibbald
+ Bacula® - The Network Backup Solution
+
+ Copyright (C) 2002-2011 Free Software Foundation Europe e.V.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of
- the License, or (at your option) any later version.
+ The main author of Bacula is Kern Sibbald, with contributions from
+ many others, a complete list can be found in the file AUTHORS.
+ This program is Free Software; you can redistribute it and/or
+ modify it under the terms of version three of the GNU Affero General Public
+ License as published by the Free Software Foundation and included
+ in the file LICENSE.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
- You should have received a copy of the GNU General Public
- License along with this program; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
- MA 02111-1307, USA.
+ You should have received a copy of the GNU Affero General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+ Bacula® is a registered trademark of Kern Sibbald.
+ The licensor of Bacula is the Free Software Foundation Europe
+ (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
+ Switzerland, email:ftf@fsfeurope.org.
+*/
+/*
+ * Bacula thread watchdog routine. General routine that
+ * allows setting a watchdog timer with a callback that is
+ * called when the timer goes off.
+ *
+ * Kern Sibbald, January MMII
+ *
*/
#include "bacula.h"
#include "jcr.h"
/* Exported globals */
-time_t watchdog_time = 0; /* this has granularity of SLEEP_TIME */
-time_t watchdog_sleep_time = 60; /* examine things every 60 seconds */
+utime_t watchdog_time = 0; /* this has granularity of SLEEP_TIME */
+utime_t watchdog_sleep_time = 60; /* examine things every 60 seconds */
/* Locals */
static pthread_mutex_t timer_mutex = PTHREAD_MUTEX_INITIALIZER;
/* Static globals */
static bool quit = false;;
static bool wd_is_init = false;
-static brwlock_t lock; /* watchdog lock */
+static brwlock_t lock; /* watchdog lock */
static pthread_t wd_tid;
static dlist *wd_queue;
static dlist *wd_inactive;
+/*
+ * Returns: 0 if the current thread is NOT the watchdog
+ * 1 if the current thread is the watchdog
+ */
+bool is_watchdog()
+{
+ if (wd_is_init && pthread_equal(pthread_self(), wd_tid)) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
/*
* Start watchdog thread
*
* Returns: 0 on success
- * errno on failure
+ * errno on failure
*/
int start_watchdog(void)
{
watchdog_time = time(NULL);
if ((errstat=rwl_init(&lock)) != 0) {
- Emsg1(M_ABORT, 0, _("Unable to initialize watchdog lock. ERR=%s\n"),
- strerror(errstat));
+ berrno be;
+ Jmsg1(NULL, M_ABORT, 0, _("Unable to initialize watchdog lock. ERR=%s\n"),
+ be.bstrerror(errstat));
}
wd_queue = New(dlist(dummy, &dummy->link));
wd_inactive = New(dlist(dummy, &dummy->link));
+ wd_is_init = true;
if ((stat = pthread_create(&wd_tid, NULL, watchdog_thread, NULL)) != 0) {
return stat;
}
- wd_is_init = true;
return 0;
}
P(timer_mutex);
pthread_cond_signal(&timer);
V(timer_mutex);
+ bmicrosleep(0, 100);
}
/*
* Terminate the watchdog thread
*
* Returns: 0 on success
- * errno on failure
+ * errno on failure
*/
int stop_watchdog(void)
{
return 0;
}
- quit = true; /* notify watchdog thread to stop */
- wd_is_init = false;
-
+ quit = true; /* notify watchdog thread to stop */
ping_watchdog();
+
stat = pthread_join(wd_tid, NULL);
while (!wd_queue->empty()) {
wd_queue->remove(item);
p = (watchdog_t *)item;
if (p->destructor != NULL) {
- p->destructor(p);
+ p->destructor(p);
}
free(p);
}
wd_inactive->remove(item);
p = (watchdog_t *)item;
if (p->destructor != NULL) {
- p->destructor(p);
+ p->destructor(p);
}
free(p);
}
delete wd_inactive;
wd_inactive = NULL;
rwl_destroy(&lock);
+ wd_is_init = false;
return stat;
}
bool register_watchdog(watchdog_t *wd)
{
if (!wd_is_init) {
- Emsg0(M_ABORT, 0, "BUG! register_watchdog called before start_watchdog\n");
+ Jmsg0(NULL, M_ABORT, 0, _("BUG! register_watchdog called before start_watchdog\n"));
}
if (wd->callback == NULL) {
- Emsg1(M_ABORT, 0, "BUG! Watchdog %p has NULL callback\n", wd);
+ Jmsg1(NULL, M_ABORT, 0, _("BUG! Watchdog %p has NULL callback\n"), wd);
}
if (wd->interval == 0) {
- Emsg1(M_ABORT, 0, "BUG! Watchdog %p has zero interval\n", wd);
+ Jmsg1(NULL, M_ABORT, 0, _("BUG! Watchdog %p has zero interval\n"), wd);
}
wd_lock();
bool ok = false;
if (!wd_is_init) {
- Emsg0(M_ABORT, 0, "BUG! unregister_watchdog_unlocked called before start_watchdog\n");
+ Jmsg0(NULL, M_ABORT, 0, _("BUG! unregister_watchdog_unlocked called before start_watchdog\n"));
}
wd_lock();
foreach_dlist(p, wd_queue) {
if (wd == p) {
- wd_queue->remove(wd);
+ wd_queue->remove(wd);
Dmsg1(800, "Unregistered watchdog %p\n", wd);
- ok = true;
- goto get_out;
+ ok = true;
+ goto get_out;
}
}
foreach_dlist(p, wd_inactive) {
if (wd == p) {
- wd_inactive->remove(wd);
+ wd_inactive->remove(wd);
Dmsg1(800, "Unregistered inactive watchdog %p\n", wd);
- ok = true;
- goto get_out;
+ ok = true;
+ goto get_out;
}
}
struct timespec timeout;
struct timeval tv;
struct timezone tz;
- time_t next_time;
+ utime_t next_time;
+ set_jcr_in_tsd(INVALID_JCR);
Dmsg0(800, "NicB-reworked watchdog thread entered\n");
while (!quit) {
watchdog_t *p;
/*
+ *
+ * NOTE. lock_jcr_chain removed, but the message below
+ * was left until we are sure there are no deadlocks.
+ *
* We lock the jcr chain here because a good number of the
* callback routines lock the jcr chain. We need to lock
* it here *before* the watchdog lock because the SD message
* lock in the same order, we get a deadlock -- each holds
* the other's needed lock.
*/
- lock_jcr_chain();
wd_lock();
walk_list:
watchdog_time = time(NULL);
next_time = watchdog_time + watchdog_sleep_time;
foreach_dlist(p, wd_queue) {
- if (p->next_fire <= watchdog_time) {
- /* Run the callback */
- p->callback(p);
+ if (p->next_fire <= watchdog_time) {
+ /* Run the callback */
+ Dmsg2(3400, "Watchdog callback p=0x%p fire=%d\n", p, p->next_fire);
+ p->callback(p);
/* Reschedule (or move to inactive list if it's a one-shot timer) */
- if (p->one_shot) {
- wd_queue->remove(p);
- wd_inactive->append(p);
- goto walk_list;
- } else {
- p->next_fire = watchdog_time + p->interval;
- }
- }
- if (p->next_fire < next_time) {
- next_time = p->next_fire;
- }
+ if (p->one_shot) {
+ wd_queue->remove(p);
+ wd_inactive->append(p);
+ goto walk_list;
+ } else {
+ p->next_fire = watchdog_time + p->interval;
+ }
+ }
+ if (p->next_fire <= next_time) {
+ next_time = p->next_fire;
+ }
}
wd_unlock();
- unlock_jcr_chain();
/*
* Wait sleep time or until someone wakes us
timeout.tv_nsec = tv.tv_usec * 1000;
timeout.tv_sec = tv.tv_sec + next_time - time(NULL);
while (timeout.tv_nsec >= 1000000000) {
- timeout.tv_nsec -= 1000000000;
- timeout.tv_sec++;
+ timeout.tv_nsec -= 1000000000;
+ timeout.tv_sec++;
}
Dmsg1(1900, "pthread_cond_timedwait %d\n", timeout.tv_sec - tv.tv_sec);
{
int errstat;
if ((errstat=rwl_writelock(&lock)) != 0) {
- Emsg1(M_ABORT, 0, "rwl_writelock failure. ERR=%s\n",
- strerror(errstat));
+ berrno be;
+ Jmsg1(NULL, M_ABORT, 0, _("rwl_writelock failure. ERR=%s\n"),
+ be.bstrerror(errstat));
}
}
{
int errstat;
if ((errstat=rwl_writeunlock(&lock)) != 0) {
- Emsg1(M_ABORT, 0, "rwl_writeunlock failure. ERR=%s\n",
- strerror(errstat));
+ berrno be;
+ Jmsg1(NULL, M_ABORT, 0, _("rwl_writeunlock failure. ERR=%s\n"),
+ be.bstrerror(errstat));
}
}