2 * Bacula File Daemon heartbeat routines
3 * Listens for heartbeats coming from the SD
4 * If configured, sends heartbeats to Dir
6 * Kern Sibbald, May MMIII
12 Bacula® - The Network Backup Solution
14 Copyright (C) 2003-2006 Free Software Foundation Europe e.V.
16 The main author of Bacula is Kern Sibbald, with contributions from
17 many others, a complete list can be found in the file AUTHORS.
18 This program is Free Software; you can redistribute it and/or
19 modify it under the terms of version two of the GNU General Public
20 License as published by the Free Software Foundation plus additions
21 that are listed in the file LICENSE.
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
33 Bacula® is a registered trademark of John Walker.
34 The licensor of Bacula is the Free Software Foundation Europe
35 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
36 Switzerland, email:ftf@fsfeurope.org.
42 #define WAIT_INTERVAL 5
44 extern "C" void *sd_heartbeat_thread(void *arg);
45 extern "C" void *dir_heartbeat_thread(void *arg);
46 extern bool no_signals;
49 * Listen on the SD socket for heartbeat signals.
50 * Send heartbeats to the Director every HB_TIME
53 extern "C" void *sd_heartbeat_thread(void *arg)
56 JCR *jcr = (JCR *)arg;
58 time_t last_heartbeat = time(NULL);
61 pthread_detach(pthread_self());
63 /* Get our own local copy */
64 sd = dup_bsock(jcr->store_bsock);
65 dir = dup_bsock(jcr->dir_bsock);
68 jcr->hb_dir_bsock = dir;
70 /* Hang reading the socket to the SD, and every time we get
71 * a heartbeat or we get a wait timeout (1 minute), we
72 * check to see if we need to send a heartbeat to the
75 for ( ; !is_bnet_stop(sd); ) {
76 n = bnet_wait_data_intr(sd, WAIT_INTERVAL);
77 if (me->heartbeat_interval) {
79 if (now-last_heartbeat >= me->heartbeat_interval) {
80 bnet_sig(dir, BNET_HEARTBEAT);
84 if (n < 0 || is_bnet_stop(sd)) {
87 if (n == 1) { /* input waiting */
88 bnet_recv(sd); /* read it -- probably heartbeat from sd */
89 if (sd->msglen <= 0) {
90 Dmsg1(100, "Got BNET_SIG %d from SD\n", sd->msglen);
92 Dmsg2(100, "Got %d bytes from SD. MSG=%s\n", sd->msglen, sd->msg);
95 Dmsg2(100, "wait_intr=%d stop=%d\n", n, is_bnet_stop(sd));
100 jcr->hb_dir_bsock = NULL;
104 /* Startup the heartbeat thread -- see above */
105 void start_heartbeat_monitor(JCR *jcr)
108 * If no signals are set, do not start the heartbeat because
109 * it gives a constant stream of TIMEOUT_SIGNAL signals that
110 * make debugging impossible.
113 jcr->hb_bsock = NULL;
114 jcr->hb_dir_bsock = NULL;
115 pthread_create(&jcr->heartbeat_id, NULL, sd_heartbeat_thread, (void *)jcr);
119 /* Terminate the heartbeat thread. Used for both SD and DIR */
120 void stop_heartbeat_monitor(JCR *jcr)
126 /* Wait max 10 secs for heartbeat thread to start */
127 while (jcr->hb_bsock == NULL && cnt++ < 200) {
128 bmicrosleep(0, 50000); /* wait for start */
132 jcr->hb_bsock->timed_out = 1; /* set timed_out to terminate read */
133 jcr->hb_bsock->terminated = 1; /* set to terminate read */
135 if (jcr->hb_dir_bsock) {
136 jcr->hb_dir_bsock->timed_out = 1; /* set timed_out to terminate read */
137 jcr->hb_dir_bsock->terminated = 1; /* set to terminate read */
139 Dmsg0(100, "Send kill to heartbeat id\n");
140 pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
141 bmicrosleep(0, 50000);
143 /* Wait max 100 secs for heartbeat thread to stop */
144 while (jcr->hb_bsock && cnt++ < 200) {
145 pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
146 bmicrosleep(0, 500000);
151 * Thread for sending heartbeats to the Director when there
152 * is no SD monitoring needed -- e.g. restore and verify Vol
153 * both do their own read() on the SD socket.
155 extern "C" void *dir_heartbeat_thread(void *arg)
157 JCR *jcr = (JCR *)arg;
159 time_t last_heartbeat = time(NULL);
161 pthread_detach(pthread_self());
163 /* Get our own local copy */
164 dir = dup_bsock(jcr->dir_bsock);
168 for ( ; !is_bnet_stop(dir); ) {
172 next = now - last_heartbeat;
173 if (next >= me->heartbeat_interval) {
174 bnet_sig(dir, BNET_HEARTBEAT);
175 last_heartbeat = now;
177 bmicrosleep(next, 0);
180 jcr->hb_bsock = NULL;
185 * Same as above but we don't listen to the SD
187 void start_dir_heartbeat(JCR *jcr)
189 if (me->heartbeat_interval) {
190 pthread_create(&jcr->heartbeat_id, NULL, dir_heartbeat_thread, (void *)jcr);
194 void stop_dir_heartbeat(JCR *jcr)
196 if (me->heartbeat_interval) {
197 stop_heartbeat_monitor(jcr);