2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2016 Kern Sibbald
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
17 Bacula(R) is a registered trademark of Kern Sibbald.
20 * Bacula File Daemon heartbeat routines
21 * Listens for heartbeats coming from the SD
22 * If configured, sends heartbeats to Dir
24 * Kern Sibbald, May MMIII
31 #define WAIT_INTERVAL 5
33 extern "C" void *sd_heartbeat_thread(void *arg);
34 extern "C" void *dir_heartbeat_thread(void *arg);
35 extern bool no_signals;
38 * Listen on the SD socket for heartbeat signals.
39 * Send heartbeats to the Director every HB_TIME
42 extern "C" void *sd_heartbeat_thread(void *arg)
45 JCR *jcr = (JCR *)arg;
47 time_t last_heartbeat = time(NULL);
51 pthread_detach(pthread_self());
53 /* Get our own local copy */
54 sd = dup_bsock(jcr->store_bsock);
55 dir = dup_bsock(jcr->dir_bsock);
58 jcr->hb_started = true;
59 jcr->hb_dir_bsock = dir;
60 dir->suppress_error_messages(true);
61 sd->suppress_error_messages(true);
63 /* Hang reading the socket to the SD, and every time we get
64 * a heartbeat or we get a wait timeout (5 seconds), we
65 * check to see if we need to send a heartbeat to the
68 while (!sd->is_stop()) {
69 n = sd->wait_data_intr(WAIT_INTERVAL);
70 if (n < 0 || sd->is_stop()) {
73 if (me->heartbeat_interval) {
75 if (now-last_heartbeat >= me->heartbeat_interval) {
76 dir->signal(BNET_HEARTBEAT);
83 if (n == 1) { /* input waiting */
84 sd->recv(); /* read it -- probably heartbeat from sd */
88 if (sd->msglen <= 0) {
89 Dmsg1(100, "Got BNET_SIG %d from SD\n", sd->msglen);
91 Dmsg2(100, "Got %d bytes from SD. MSG=%s\n", sd->msglen, sd->msg);
94 Dmsg2(200, "wait_intr=%d stop=%d\n", n, sd->is_stop());
99 jcr->hb_started = false;
100 jcr->hb_dir_bsock = NULL;
104 /* Startup the heartbeat thread -- see above */
105 void start_heartbeat_monitor(JCR *jcr)
108 * If no signals are set, do not start the heartbeat because
109 * it gives a constant stream of TIMEOUT_SIGNAL signals that
110 * make debugging impossible.
112 if (!no_signals && (me->heartbeat_interval > 0)) {
113 jcr->hb_bsock = NULL;
114 jcr->hb_started = false;
115 jcr->hb_dir_bsock = NULL;
116 pthread_create(&jcr->heartbeat_id, NULL, sd_heartbeat_thread, (void *)jcr);
120 /* Terminate the heartbeat thread. Used for both SD and DIR */
121 void stop_heartbeat_monitor(JCR *jcr)
127 /* Wait max 10 secs for heartbeat thread to start */
128 while (!jcr->hb_started && cnt++ < 200) {
129 bmicrosleep(0, 50000); /* wait for start */
132 if (jcr->hb_started) {
133 jcr->hb_bsock->set_timed_out(); /* set timed_out to terminate read */
134 jcr->hb_bsock->set_terminated(); /* set to terminate read */
136 if (jcr->hb_dir_bsock) {
137 jcr->hb_dir_bsock->set_timed_out(); /* set timed_out to terminate read */
138 jcr->hb_dir_bsock->set_terminated(); /* set to terminate read */
140 if (jcr->hb_started) {
141 Dmsg0(100, "Send kill to heartbeat id\n");
142 pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
143 bmicrosleep(0, 50000);
146 /* Wait max 100 secs for heartbeat thread to stop */
147 while (jcr->hb_started && cnt++ < 200) {
148 pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
149 bmicrosleep(0, 500000);
154 * Thread for sending heartbeats to the Director when there
155 * is no SD monitoring needed -- e.g. restore and verify Vol
156 * both do their own read() on the SD socket.
158 extern "C" void *dir_heartbeat_thread(void *arg)
160 JCR *jcr = (JCR *)arg;
162 time_t last_heartbeat = time(NULL);
164 pthread_detach(pthread_self());
166 /* Get our own local copy */
167 dir = dup_bsock(jcr->dir_bsock);
170 jcr->hb_started = true;
171 dir->suppress_error_messages(true);
173 while (!dir->is_stop()) {
177 next = now - last_heartbeat;
178 if (next >= me->heartbeat_interval) {
179 dir->signal(BNET_HEARTBEAT);
180 if (dir->is_stop()) {
183 last_heartbeat = now;
185 /* This should never happen, but it might ... */
189 bmicrosleep(next, 0);
192 jcr->hb_bsock = NULL;
193 jcr->hb_started = false;
198 * Same as above but we don't listen to the SD
200 void start_dir_heartbeat(JCR *jcr)
202 if (!no_signals && (me->heartbeat_interval > 0)) {
203 jcr->dir_bsock->set_locking();
204 pthread_create(&jcr->heartbeat_id, NULL, dir_heartbeat_thread, (void *)jcr);
208 void stop_dir_heartbeat(JCR *jcr)
210 if (me->heartbeat_interval > 0) {
211 stop_heartbeat_monitor(jcr);