2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2016 Kern Sibbald
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
17 Bacula(R) is a registered trademark of Kern Sibbald.
20 * Bacula File Daemon heartbeat routines
21 * Listens for heartbeats coming from the SD
22 * If configured, sends heartbeats to Dir
24 * Kern Sibbald, May MMIII
31 #define WAIT_INTERVAL 5
33 extern "C" void *sd_heartbeat_thread(void *arg);
34 extern "C" void *dir_heartbeat_thread(void *arg);
35 extern bool no_signals;
38 * Listen on the SD socket for heartbeat signals.
39 * Send heartbeats to the Director every HB_TIME
42 extern "C" void *sd_heartbeat_thread(void *arg)
45 JCR *jcr = (JCR *)arg;
47 time_t last_heartbeat = time(NULL);
51 pthread_detach(pthread_self());
53 /* Get our own local copy */
54 sd = dup_bsock(jcr->store_bsock);
55 dir = dup_bsock(jcr->dir_bsock);
58 jcr->hb_started = true;
59 jcr->hb_dir_bsock = dir;
60 dir->suppress_error_messages(true);
61 sd->suppress_error_messages(true);
63 /* Hang reading the socket to the SD, and every time we get
64 * a heartbeat or we get a wait timeout (5 seconds), we
65 * check to see if we need to send a heartbeat to the
68 while (!sd->is_stop()) {
69 n = sd->wait_data_intr(WAIT_INTERVAL);
70 if (n < 0 || sd->is_stop()) {
73 if (me->heartbeat_interval) {
75 if (now-last_heartbeat >= me->heartbeat_interval) {
76 dir->signal(BNET_HEARTBEAT);
83 if (n == 1) { /* input waiting */
84 sd->recv(); /* read it -- probably heartbeat from sd */
88 if (sd->msglen <= 0) {
89 Dmsg1(100, "Got BNET_SIG %d from SD\n", sd->msglen);
91 Dmsg2(100, "Got %d bytes from SD. MSG=%s\n", sd->msglen, sd->msg);
94 Dmsg2(200, "wait_intr=%d stop=%d\n", n, sd->is_stop());
97 * Note, since sd and dir are local dupped sockets, this
98 * is one place where we can call destroy().
102 jcr->hb_bsock = NULL;
103 jcr->hb_started = false;
104 jcr->hb_dir_bsock = NULL;
108 /* Startup the heartbeat thread -- see above */
109 void start_heartbeat_monitor(JCR *jcr)
112 * If no signals are set, do not start the heartbeat because
113 * it gives a constant stream of TIMEOUT_SIGNAL signals that
114 * make debugging impossible.
116 if (!no_signals && (me->heartbeat_interval > 0)) {
117 jcr->hb_bsock = NULL;
118 jcr->hb_started = false;
119 jcr->hb_dir_bsock = NULL;
120 pthread_create(&jcr->heartbeat_id, NULL, sd_heartbeat_thread, (void *)jcr);
124 /* Terminate the heartbeat thread. Used for both SD and DIR */
125 void stop_heartbeat_monitor(JCR *jcr)
131 /* Wait max 10 secs for heartbeat thread to start */
132 while (!jcr->hb_started && cnt++ < 200) {
133 bmicrosleep(0, 50000); /* wait for start */
136 if (jcr->hb_started) {
137 jcr->hb_bsock->set_timed_out(); /* set timed_out to terminate read */
138 jcr->hb_bsock->set_terminated(); /* set to terminate read */
140 if (jcr->hb_dir_bsock) {
141 jcr->hb_dir_bsock->set_timed_out(); /* set timed_out to terminate read */
142 jcr->hb_dir_bsock->set_terminated(); /* set to terminate read */
144 if (jcr->hb_started) {
145 Dmsg0(100, "Send kill to heartbeat id\n");
146 pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
147 bmicrosleep(0, 50000);
150 /* Wait max 100 secs for heartbeat thread to stop */
151 while (jcr->hb_started && cnt++ < 200) {
152 pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
153 bmicrosleep(0, 500000);
158 * Thread for sending heartbeats to the Director when there
159 * is no SD monitoring needed -- e.g. restore and verify Vol
160 * both do their own read() on the SD socket.
162 extern "C" void *dir_heartbeat_thread(void *arg)
164 JCR *jcr = (JCR *)arg;
166 time_t last_heartbeat = time(NULL);
168 pthread_detach(pthread_self());
170 /* Get our own local copy */
171 dir = dup_bsock(jcr->dir_bsock);
174 jcr->hb_started = true;
175 dir->suppress_error_messages(true);
177 while (!dir->is_stop()) {
181 next = now - last_heartbeat;
182 if (next >= me->heartbeat_interval) {
183 dir->signal(BNET_HEARTBEAT);
184 if (dir->is_stop()) {
187 last_heartbeat = now;
189 /* This should never happen, but it might ... */
193 bmicrosleep(next, 0);
196 jcr->hb_bsock = NULL;
197 jcr->hb_started = false;
202 * Same as above but we don't listen to the SD
204 void start_dir_heartbeat(JCR *jcr)
206 if (!no_signals && (me->heartbeat_interval > 0)) {
207 jcr->dir_bsock->set_locking();
208 pthread_create(&jcr->heartbeat_id, NULL, dir_heartbeat_thread, (void *)jcr);
212 void stop_dir_heartbeat(JCR *jcr)
214 if (me->heartbeat_interval > 0) {
215 stop_heartbeat_monitor(jcr);