2 Bacula® - The Network Backup Solution
4 Copyright (C) 2003-2012 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Bacula File Daemon heartbeat routines
30 * Listens for heartbeats coming from the SD
31 * If configured, sends heartbeats to Dir
33 * Kern Sibbald, May MMIII
40 #define WAIT_INTERVAL 5
42 extern "C" void *sd_heartbeat_thread(void *arg);
43 extern "C" void *dir_heartbeat_thread(void *arg);
44 extern bool no_signals;
47 * Listen on the SD socket for heartbeat signals.
48 * Send heartbeats to the Director every HB_TIME
51 extern "C" void *sd_heartbeat_thread(void *arg)
54 JCR *jcr = (JCR *)arg;
56 time_t last_heartbeat = time(NULL);
59 pthread_detach(pthread_self());
61 /* Get our own local copy */
62 sd = dup_bsock(jcr->store_bsock);
63 dir = dup_bsock(jcr->dir_bsock);
66 jcr->hb_started = true;
67 jcr->hb_dir_bsock = dir;
68 dir->m_suppress_error_msgs = true;
69 sd->m_suppress_error_msgs = true;
71 /* Hang reading the socket to the SD, and every time we get
72 * a heartbeat or we get a wait timeout (5 seconds), we
73 * check to see if we need to send a heartbeat to the
76 while (!sd->is_stop()) {
77 n = bnet_wait_data_intr(sd, WAIT_INTERVAL);
78 if (n < 0 || sd->is_stop()) {
81 if (me->heartbeat_interval) {
83 if (now-last_heartbeat >= me->heartbeat_interval) {
84 dir->signal(BNET_HEARTBEAT);
91 if (n == 1) { /* input waiting */
92 sd->recv(); /* read it -- probably heartbeat from sd */
96 if (sd->msglen <= 0) {
97 Dmsg1(100, "Got BNET_SIG %d from SD\n", sd->msglen);
99 Dmsg2(100, "Got %d bytes from SD. MSG=%s\n", sd->msglen, sd->msg);
102 Dmsg2(200, "wait_intr=%d stop=%d\n", n, is_bnet_stop(sd));
106 jcr->hb_bsock = NULL;
107 jcr->hb_started = false;
108 jcr->hb_dir_bsock = NULL;
112 /* Startup the heartbeat thread -- see above */
113 void start_heartbeat_monitor(JCR *jcr)
116 * If no signals are set, do not start the heartbeat because
117 * it gives a constant stream of TIMEOUT_SIGNAL signals that
118 * make debugging impossible.
121 jcr->hb_bsock = NULL;
122 jcr->hb_started = false;
123 jcr->hb_dir_bsock = NULL;
124 pthread_create(&jcr->heartbeat_id, NULL, sd_heartbeat_thread, (void *)jcr);
128 /* Terminate the heartbeat thread. Used for both SD and DIR */
129 void stop_heartbeat_monitor(JCR *jcr)
135 /* Wait max 10 secs for heartbeat thread to start */
136 while (!jcr->hb_started && cnt++ < 200) {
137 bmicrosleep(0, 50000); /* wait for start */
140 if (jcr->hb_started) {
141 jcr->hb_bsock->set_timed_out(); /* set timed_out to terminate read */
142 jcr->hb_bsock->set_terminated(); /* set to terminate read */
144 if (jcr->hb_dir_bsock) {
145 jcr->hb_dir_bsock->set_timed_out(); /* set timed_out to terminate read */
146 jcr->hb_dir_bsock->set_terminated(); /* set to terminate read */
148 if (jcr->hb_started) {
149 Dmsg0(100, "Send kill to heartbeat id\n");
150 pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
151 bmicrosleep(0, 50000);
154 /* Wait max 100 secs for heartbeat thread to stop */
155 while (jcr->hb_started && cnt++ < 200) {
156 pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
157 bmicrosleep(0, 500000);
162 * Thread for sending heartbeats to the Director when there
163 * is no SD monitoring needed -- e.g. restore and verify Vol
164 * both do their own read() on the SD socket.
166 extern "C" void *dir_heartbeat_thread(void *arg)
168 JCR *jcr = (JCR *)arg;
170 time_t last_heartbeat = time(NULL);
172 pthread_detach(pthread_self());
174 /* Get our own local copy */
175 dir = dup_bsock(jcr->dir_bsock);
178 jcr->hb_started = true;
179 dir->m_suppress_error_msgs = true;
181 while (!dir->is_stop()) {
185 next = now - last_heartbeat;
186 if (next >= me->heartbeat_interval) {
187 dir->signal(BNET_HEARTBEAT);
188 if (dir->is_stop()) {
191 last_heartbeat = now;
193 bmicrosleep(next, 0);
196 jcr->hb_bsock = NULL;
197 jcr->hb_started = false;
202 * Same as above but we don't listen to the SD
204 void start_dir_heartbeat(JCR *jcr)
206 if (me->heartbeat_interval) {
207 jcr->dir_bsock->set_locking();
208 pthread_create(&jcr->heartbeat_id, NULL, dir_heartbeat_thread, (void *)jcr);
212 void stop_dir_heartbeat(JCR *jcr)
214 if (me->heartbeat_interval) {
215 stop_heartbeat_monitor(jcr);