2 * Bacula File Daemon heartbeat routines
3 * Listens for heartbeats coming from the SD
4 * If configured, sends heartbeats to Dir
6 * Kern Sibbald, May MMIII
12 Copyright (C) 2003-2004 Kern Sibbald and John Walker
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of
17 the License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public
25 License along with this program; if not, write to the Free
26 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
34 #if defined(HAVE_CYGWIN) || defined(HAVE_WIN32)
35 /* pthread_kill() dies on Cygwin, so disable it */
36 #define pthread_kill(x, y)
37 /* Use shorter wait interval on Cygwin because no kill */
38 #define WAIT_INTERVAL 10
40 #else /* Unix systems */
41 #define WAIT_INTERVAL 60
44 extern "C" void *sd_heartbeat_thread(void *arg);
45 extern "C" void *dir_heartbeat_thread(void *arg);
46 extern bool no_signals;
49 * Listen on the SD socket for heartbeat signals.
50 * Send heartbeats to the Director every HB_TIME
53 extern "C" void *sd_heartbeat_thread(void *arg)
56 JCR *jcr = (JCR *)arg;
58 time_t last_heartbeat = time(NULL);
61 pthread_detach(pthread_self());
63 /* Get our own local copy */
64 sd = dup_bsock(jcr->store_bsock);
65 dir = dup_bsock(jcr->dir_bsock);
69 /* Hang reading the socket to the SD, and every time we get
70 * a heartbeat or we get a wait timeout (1 minute), we
71 * check to see if we need to send a heartbeat to the
74 for ( ; !is_bnet_stop(sd); ) {
75 n = bnet_wait_data_intr(sd, WAIT_INTERVAL);
76 if (me->heartbeat_interval) {
78 if (now-last_heartbeat >= me->heartbeat_interval) {
79 bnet_sig(dir, BNET_HEARTBEAT);
83 if (n == 1) { /* input waiting */
84 bnet_recv(sd); /* read it -- probably heartbeat from sd */
85 if (sd->msglen <= 0) {
86 Dmsg1(100, "Got BNET_SIG %d from SD\n", sd->msglen);
88 Dmsg2(100, "Got %d bytes from SD. MSG=%s\n", sd->msglen, sd->msg);
98 /* Startup the heartbeat thread -- see above */
99 void start_heartbeat_monitor(JCR *jcr)
102 * If no signals are set, do not start the heartbeat because
103 * it gives a constant stream of TIMEOUT_SIGNAL signals that
104 * make debugging impossible.
107 jcr->hb_bsock = NULL;
108 pthread_create(&jcr->heartbeat_id, NULL, sd_heartbeat_thread, (void *)jcr);
112 /* Terminate the heartbeat thread. Used for both SD and DIR */
113 void stop_heartbeat_monitor(JCR *jcr)
119 /* Wait max 10 secs for heartbeat thread to start */
120 while (jcr->hb_bsock == NULL && cnt++ < 200) {
121 bmicrosleep(0, 50); /* avoid race */
125 jcr->hb_bsock->timed_out = 1; /* set timed_out to terminate read */
126 jcr->hb_bsock->terminated = 1; /* set to terminate read */
129 /* Wait max 100 secs for heartbeat thread to stop */
130 while (jcr->hb_bsock && cnt++ < 200) {
131 /* Naturally, Cygwin 1.3.20 craps out on the following */
132 pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
138 * Thread for sending heartbeats to the Director when there
139 * is no SD monitoring needed -- e.g. restore and verify Vol
140 * both do their own read() on the SD socket.
142 extern "C" void *dir_heartbeat_thread(void *arg)
144 JCR *jcr = (JCR *)arg;
146 time_t last_heartbeat = time(NULL);
148 pthread_detach(pthread_self());
150 /* Get our own local copy */
151 dir = dup_bsock(jcr->dir_bsock);
155 for ( ; !is_bnet_stop(dir); ) {
159 next = now - last_heartbeat;
160 if (next >= me->heartbeat_interval) {
161 bnet_sig(dir, BNET_HEARTBEAT);
162 last_heartbeat = now;
164 bmicrosleep(next, 0);
167 jcr->hb_bsock = NULL;
172 * Same as above but we don't listen to the SD
174 void start_dir_heartbeat(JCR *jcr)
176 if (me->heartbeat_interval) {
177 pthread_create(&jcr->heartbeat_id, NULL, dir_heartbeat_thread, (void *)jcr);
181 void stop_dir_heartbeat(JCR *jcr)
183 if (me->heartbeat_interval) {
184 stop_heartbeat_monitor(jcr);