2 * Bacula File Daemon heartbeat routines
3 * Listens for heartbeats coming from the SD
4 * If configured, sends heartbeats to Dir
6 * Kern Sibbald, May MMIII
12 Copyright (C) 2003-2005 Kern Sibbald
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License
16 version 2 as amended with additional clauses defined in the
17 file LICENSE in the main source directory.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 the file LICENSE for additional details.
29 #define WAIT_INTERVAL 10
31 extern "C" void *sd_heartbeat_thread(void *arg);
32 extern "C" void *dir_heartbeat_thread(void *arg);
33 extern bool no_signals;
36 * Listen on the SD socket for heartbeat signals.
37 * Send heartbeats to the Director every HB_TIME
40 extern "C" void *sd_heartbeat_thread(void *arg)
43 JCR *jcr = (JCR *)arg;
45 time_t last_heartbeat = time(NULL);
48 pthread_detach(pthread_self());
50 /* Get our own local copy */
51 sd = dup_bsock(jcr->store_bsock);
54 if ((oflags = fcntl(sd->fd, F_GETFL, 0)) != -1) {
55 fcntl(sd->fd, F_SETFL, oflags|O_NONBLOCK);
58 dir = dup_bsock(jcr->dir_bsock);
61 jcr->hb_dir_bsock = dir;
63 /* Hang reading the socket to the SD, and every time we get
64 * a heartbeat or we get a wait timeout (1 minute), we
65 * check to see if we need to send a heartbeat to the
68 for ( ; !is_bnet_stop(sd); ) {
69 n = bnet_wait_data_intr(sd, WAIT_INTERVAL);
70 if (me->heartbeat_interval) {
72 if (now-last_heartbeat >= me->heartbeat_interval) {
73 bnet_sig(dir, BNET_HEARTBEAT);
77 if (n < 0 || is_bnet_stop(sd)) {
80 if (n == 1) { /* input waiting */
81 bnet_recv(sd); /* read it -- probably heartbeat from sd */
82 if (sd->msglen <= 0) {
83 Dmsg1(100, "Got BNET_SIG %d from SD\n", sd->msglen);
85 Dmsg2(100, "Got %d bytes from SD. MSG=%s\n", sd->msglen, sd->msg);
88 Dmsg2(100, "wait_intr=%d stop=%d\n", n, is_bnet_stop(sd));
93 jcr->hb_dir_bsock = NULL;
97 /* Startup the heartbeat thread -- see above */
98 void start_heartbeat_monitor(JCR *jcr)
101 * If no signals are set, do not start the heartbeat because
102 * it gives a constant stream of TIMEOUT_SIGNAL signals that
103 * make debugging impossible.
106 jcr->hb_bsock = NULL;
107 jcr->hb_dir_bsock = NULL;
108 pthread_create(&jcr->heartbeat_id, NULL, sd_heartbeat_thread, (void *)jcr);
112 /* Terminate the heartbeat thread. Used for both SD and DIR */
113 void stop_heartbeat_monitor(JCR *jcr)
119 /* Wait max 10 secs for heartbeat thread to start */
120 while (jcr->hb_bsock == NULL && cnt++ < 200) {
121 bmicrosleep(0, 50000); /* wait for start */
123 if (!jcr->hb_bsock) {
127 jcr->hb_bsock->timed_out = 1; /* set timed_out to terminate read */
128 jcr->hb_bsock->terminated = 1; /* set to terminate read */
130 if (jcr->hb_dir_bsock) {
131 jcr->hb_dir_bsock->timed_out = 1; /* set timed_out to terminate read */
132 jcr->hb_dir_bsock->terminated = 1; /* set to terminate read */
134 pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
135 bmicrosleep(0, 50000);
137 /* Wait max 100 secs for heartbeat thread to stop */
138 while (jcr->hb_bsock && cnt++ < 200) {
139 pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
140 bmicrosleep(0, 500000);
147 * Thread for sending heartbeats to the Director when there
148 * is no SD monitoring needed -- e.g. restore and verify Vol
149 * both do their own read() on the SD socket.
151 extern "C" void *dir_heartbeat_thread(void *arg)
153 JCR *jcr = (JCR *)arg;
155 time_t last_heartbeat = time(NULL);
157 pthread_detach(pthread_self());
159 /* Get our own local copy */
160 dir = dup_bsock(jcr->dir_bsock);
164 for ( ; !is_bnet_stop(dir); ) {
168 next = now - last_heartbeat;
169 if (next >= me->heartbeat_interval) {
170 bnet_sig(dir, BNET_HEARTBEAT);
171 last_heartbeat = now;
173 bmicrosleep(next, 0);
176 jcr->hb_bsock = NULL;
181 * Same as above but we don't listen to the SD
183 void start_dir_heartbeat(JCR *jcr)
185 if (me->heartbeat_interval) {
186 pthread_create(&jcr->heartbeat_id, NULL, dir_heartbeat_thread, (void *)jcr);
190 void stop_dir_heartbeat(JCR *jcr)
192 if (me->heartbeat_interval) {
193 stop_heartbeat_monitor(jcr);