#include "bacula.h"
#include "filed.h"
+#if defined(HAVE_CYGWIN) || defined(HAVE_WIN32)
+/* pthread_kill() dies on Cygwin, so disable it */
+#define pthread_kill(x, y)
+/* Use shorter wait interval on Cygwin because no kill */
+#define WAIT_INTERVAL 10
+
+#else /* Unix systems */
+#define WAIT_INTERVAL 60
+#endif
+
/*
* Listen on the SD socket for heartbeat signals.
* Send heartbeats to the Director every HB_TIME
* seconds.
*/
-static void *heartbeat_thread(void *arg)
+static void *sd_heartbeat_thread(void *arg)
{
int32_t n;
JCR *jcr = (JCR *)arg;
sd = dup_bsock(jcr->store_bsock);
dir = dup_bsock(jcr->dir_bsock);
- jcr->duped_sd = sd;
+ jcr->hb_bsock = sd;
/* Hang reading the socket to the SD, and every time we get
- * a heartbeat, we simply send it on to the Director to
- * keep him alive.
+ * a heartbeat or we get a wait timeout (1 minute), we
+ * check to see if we need to send a heartbeat to the
+ * Directory.
*/
for ( ; !is_bnet_stop(sd); ) {
- n = bnet_wait_data_intr(sd, 60);
+ n = bnet_wait_data_intr(sd, WAIT_INTERVAL);
if (me->heartbeat_interval) {
now = time(NULL);
if (now-last_heartbeat >= me->heartbeat_interval) {
}
if (n == 1) { /* input waiting */
bnet_recv(sd); /* read it -- probably heartbeat from sd */
-/* Dmsg1(000, "Got %d from SD\n", sd->msglen); */
+ Dmsg1(100, "Got %d from SD\n", sd->msglen);
}
}
bnet_close(sd);
bnet_close(dir);
- jcr->duped_sd = NULL;
+ jcr->hb_bsock = NULL;
return NULL;
}
/* Startup the heartbeat thread -- see above */
void start_heartbeat_monitor(JCR *jcr)
{
- jcr->duped_sd = NULL;
- pthread_create(&jcr->heartbeat_id, NULL, heartbeat_thread, (void *)jcr);
+ jcr->hb_bsock = NULL;
+ pthread_create(&jcr->heartbeat_id, NULL, sd_heartbeat_thread, (void *)jcr);
}
-/* Terminate the heartbeat thread */
+/* Terminate the heartbeat thread. Used for both SD and DIR */
void stop_heartbeat_monitor(JCR *jcr)
{
- /* Wait for heartbeat thread to start */
- while (jcr->duped_sd == NULL) {
+ int cnt = 0;
+ /* Wait max 10 secs for heartbeat thread to start */
+ while (jcr->hb_bsock == NULL && cnt++ < 200) {
bmicrosleep(0, 50); /* avoid race */
}
- jcr->duped_sd->timed_out = 1; /* set timed_out to terminate read */
- jcr->duped_sd->terminated = 1; /* set to terminate read */
- /* Wait for heartbeat thread to stop */
- while (jcr->duped_sd) {
+ if (jcr->hb_bsock) {
+ jcr->hb_bsock->timed_out = 1; /* set timed_out to terminate read */
+ jcr->hb_bsock->terminated = 1; /* set to terminate read */
+ }
+ cnt = 0;
+ /* Wait max 100 secs for heartbeat thread to stop */
+ while (jcr->hb_bsock && cnt++ < 200) {
+ /* Naturally, Cygwin 1.3.20 craps out on the following */
pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
- bmicrosleep(0, 20);
+ bmicrosleep(0, 500);
}
}
+/*
+ * Thread for sending heartbeats to the Director when there
+ * is no SD monitoring needed -- e.g. restore and verify Vol
+ * both do their own read() on the SD socket.
+ */
+static void *dir_heartbeat_thread(void *arg)
+{
+ JCR *jcr = (JCR *)arg;
+ BSOCK *dir;
+ time_t last_heartbeat = time(NULL);
+
+ pthread_detach(pthread_self());
+
+ /* Get our own local copy */
+ dir = dup_bsock(jcr->dir_bsock);
+
+ jcr->hb_bsock = dir;
+
+ for ( ; !is_bnet_stop(dir); ) {
+ time_t now, next;
+
+ now = time(NULL);
+ next = now - last_heartbeat;
+ if (next >= me->heartbeat_interval) {
+ bnet_sig(dir, BNET_HEARTBEAT);
+ last_heartbeat = now;
+ }
+ bmicrosleep(next, 0);
+ }
+ bnet_close(dir);
+ jcr->hb_bsock = NULL;
+ return NULL;
+}
/*
* Same as above but we don't listen to the SD
*/
void start_dir_heartbeat(JCR *jcr)
{
- /* ***FIXME*** implement */
+ if (me->heartbeat_interval) {
+ pthread_create(&jcr->heartbeat_id, NULL, dir_heartbeat_thread, (void *)jcr);
+ }
}
void stop_dir_heartbeat(JCR *jcr)
{
- /* ***FIXME*** implement */
+ if (me->heartbeat_interval) {
+ stop_heartbeat_monitor(jcr);
+ }
}