X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Flib%2Fwatchdog.c;h=2bb599daeca5c2bd81f48147606a255e62f58733;hb=82151d934f8b46ff7133b084808f288c81edd372;hp=b53b60fae6e0537f824054f5adcb0eb5d1a7405d;hpb=5e580f90285c76c8b00c173fdbc111f10659da94;p=bacula%2Fbacula diff --git a/bacula/src/lib/watchdog.c b/bacula/src/lib/watchdog.c index b53b60fae6..2bb599daec 100755 --- a/bacula/src/lib/watchdog.c +++ b/bacula/src/lib/watchdog.c @@ -8,7 +8,7 @@ * */ /* - Copyright (C) 2000, 2001, 2002 Kern Sibbald and John Walker + Copyright (C) 2000-2003 Kern Sibbald and John Walker This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -38,12 +38,15 @@ time_t watchdog_time; /* this has granularity of SLEEP_TIME */ #define SLEEP_TIME 30 /* examine things every 30 seconds */ /* Forward referenced functions */ -static void *watchdog_thread(void *arg); +static void *btimer_thread(void *arg); +static void stop_btimer(btimer_id wid); +static btimer_id btimer_start_common(uint32_t wait); /* Static globals */ static pthread_mutex_t mutex; static pthread_cond_t timer; static int quit; +static btimer_t *timer_chain = NULL; /* @@ -80,7 +83,7 @@ int start_watchdog(void) return stat; } quit = FALSE; - if ((stat = pthread_create(&wdid, NULL, watchdog_thread, (void *)NULL)) != 0) { + if ((stat = pthread_create(&wdid, NULL, btimer_thread, (void *)NULL)) != 0) { pthread_mutex_destroy(&mutex); pthread_cond_destroy(&timer); return stat; @@ -98,18 +101,14 @@ int stop_watchdog(void) { int stat; - if ((stat = pthread_mutex_lock(&mutex)) != 0) { - return stat; - } + P(mutex); quit = TRUE; if ((stat = pthread_cond_signal(&timer)) != 0) { - pthread_mutex_unlock(&mutex); - return stat; - } - if ((stat = pthread_mutex_unlock(&mutex)) != 0) { + V(mutex); return stat; } + V(mutex); return 0; } @@ -117,24 +116,22 @@ int stop_watchdog(void) /* * This is the actual watchdog thread. */ -static void *watchdog_thread(void *arg) +static void *btimer_thread(void *arg) { struct timespec timeout; int stat; JCR *jcr; BSOCK *fd; + btimer_t *wid; Dmsg0(200, "Start watchdog thread\n"); pthread_detach(pthread_self()); - if ((stat = pthread_mutex_lock(&mutex)) != 0) { - return NULL; - } - + P(mutex); for ( ;!quit; ) { struct timeval tv; struct timezone tz; - time_t timer_start; + time_t timer_start, now; Dmsg0(200, "Top of for loop\n"); @@ -191,12 +188,142 @@ static void *watchdog_thread(void *arg) timeout.tv_sec = tv.tv_sec + SLEEP_TIME; Dmsg1(200, "pthread_cond_timedwait sec=%d\n", timeout.tv_sec); + /* Note, this unlocks mutex during the sleep */ stat = pthread_cond_timedwait(&timer, &mutex, &timeout); Dmsg1(200, "pthread_cond_timedwait stat=%d\n", stat); + + now = time(NULL); + + /* Walk child chain killing off any process overdue */ + for (wid = timer_chain; wid; wid=wid->next) { + int killed = FALSE; + /* First ask him politely to go away */ + if (!wid->killed && now > (wid->start_time + wid->wait)) { +// Dmsg1(000, "Watchdog sigterm pid=%d\n", wid->pid); + if (wid->type == TYPE_CHILD) { + kill(wid->pid, SIGTERM); + killed = TRUE; + } else { + Dmsg1(200, "watchdog kill thread %d\n", wid->tid); + pthread_kill(wid->tid, TIMEOUT_SIGNAL); + wid->killed = TRUE; + } + } + /* If we asked a child to die, wait 3 seconds and slam him */ + if (killed) { + btimer_t *wid1; + sleep(3); + for (wid1 = timer_chain; wid1; wid1=wid1->next) { + if (wid->type == TYPE_CHILD && + !wid1->killed && now > (wid1->start_time + wid1->wait)) { + kill(wid1->pid, SIGKILL); +// Dmsg1(000, "Watchdog killed pid=%d\n", wid->pid); + wid1->killed = TRUE; + } + } + } + } } /* end of big for loop */ - pthread_mutex_unlock(&mutex); /* for good form */ + V(mutex); Dmsg0(200, "End watchdog\n"); return NULL; } + +/* + * Start a timer on a child process of pid, kill it after wait seconds. + * NOTE! Granularity is SLEEP_TIME (i.e. 30 seconds) + * + * Returns: btimer_id (pointer to btimer_t struct) on success + * NULL on failure + */ +btimer_id start_child_timer(pid_t pid, uint32_t wait) +{ + btimer_t *wid; + wid = btimer_start_common(wait); + wid->pid = pid; + wid->type = TYPE_CHILD; + Dmsg2(200, "Start child timer 0x%x for %d secs.\n", wid, wait); + return wid; +} + +/* + * Start a timer on a thread. kill it after wait seconds. + * NOTE! Granularity is SLEEP_TIME (i.e. 30 seconds) + * + * Returns: btimer_id (pointer to btimer_t struct) on success + * NULL on failure + */ +btimer_id start_thread_timer(pthread_t tid, uint32_t wait) +{ + btimer_t *wid; + wid = btimer_start_common(wait); + wid->tid = tid; + wid->type = TYPE_PTHREAD; + Dmsg2(200, "Start thread timer 0x%x for %d secs.\n", wid, wait); + return wid; +} + +static btimer_id btimer_start_common(uint32_t wait) +{ + btimer_id wid = (btimer_id)malloc(sizeof(btimer_t)); + + P(mutex); + /* Chain it into timer_chain as the first item */ + wid->prev = NULL; + wid->next = timer_chain; + if (timer_chain) { + timer_chain->prev = wid; + } + timer_chain = wid; + wid->start_time = time(NULL); + wid->wait = wait; + wid->killed = FALSE; + V(mutex); + return wid; +} + +/* + * Stop child timer + */ +void stop_child_timer(btimer_id wid) +{ + Dmsg2(200, "Stop child timer 0x%x for %d secs.\n", wid, wid->wait); + stop_btimer(wid); +} + +/* + * Stop thread timer + */ +void stop_thread_timer(btimer_id wid) +{ + if (!wid) { + return; + } + Dmsg2(200, "Stop thread timer 0x%x for %d secs.\n", wid, wid->wait); + stop_btimer(wid); +} + + +/* + * Stop btimer + */ +static void stop_btimer(btimer_id wid) +{ + if (wid == NULL) { + Emsg0(M_ABORT, 0, _("NULL btimer_id.\n")); + } + P(mutex); + /* Remove wid from timer_chain */ + if (!wid->prev) { /* if no prev */ + timer_chain = wid->next; /* set new head */ + } else { + wid->prev->next = wid->next; /* update prev */ + } + if (wid->next) { + wid->next->prev = wid->prev; /* unlink it */ + } + V(mutex); + free(wid); +}