+/*
+ Bacula(R) - The Network Backup Solution
+
+ Copyright (C) 2000-2015 Kern Sibbald
+ Copyright (C) 2003-2014 Free Software Foundation Europe e.V.
+
+ The original author of Bacula is Kern Sibbald, with contributions
+ from many others, a complete list can be found in the file AUTHORS.
+
+ You may use this file and others of this release according to the
+ license defined in the LICENSE file, which includes the Affero General
+ Public License, v3.0 ("AGPLv3") and some additional permissions and
+ terms pursuant to its AGPLv3 Section 7.
+
+ This notice must be preserved when any source code is
+ conveyed and/or propagated.
+
+ Bacula(R) is a registered trademark of Kern Sibbald.
+*/
/*
* Bacula File Daemon heartbeat routines
* Listens for heartbeats coming from the SD
*
* Kern Sibbald, May MMIII
*
- * Version $Id$
- *
- */
-/*
- Copyright (C) 2003-2005 Kern Sibbald
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License
- version 2 as ammended with additional clauses defined in the
- file LICENSE in the main source directory.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- the file LICENSE for additional details.
-
*/
#include "bacula.h"
#include "filed.h"
-#define WAIT_INTERVAL 10
+#define WAIT_INTERVAL 5
extern "C" void *sd_heartbeat_thread(void *arg);
extern "C" void *dir_heartbeat_thread(void *arg);
/* Get our own local copy */
sd = dup_bsock(jcr->store_bsock);
-#ifndef WIN32
- int oflags;
- if ((oflags = fcntl(sd->fd, F_GETFL, 0)) != -1) {
- fcntl(sd->fd, F_SETFL, oflags|O_NONBLOCK);
- }
-#endif
dir = dup_bsock(jcr->dir_bsock);
jcr->hb_bsock = sd;
+ jcr->hb_started = true;
jcr->hb_dir_bsock = dir;
+ dir->suppress_error_messages(true);
+ sd->suppress_error_messages(true);
/* Hang reading the socket to the SD, and every time we get
- * a heartbeat or we get a wait timeout (1 minute), we
+ * a heartbeat or we get a wait timeout (5 seconds), we
* check to see if we need to send a heartbeat to the
* Director.
*/
- for ( ; !is_bnet_stop(sd); ) {
- n = bnet_wait_data_intr(sd, WAIT_INTERVAL);
+ while (!sd->is_stop()) {
+ n = sd->wait_data_intr(WAIT_INTERVAL);
+ if (n < 0 || sd->is_stop()) {
+ break;
+ }
if (me->heartbeat_interval) {
now = time(NULL);
if (now-last_heartbeat >= me->heartbeat_interval) {
- bnet_sig(dir, BNET_HEARTBEAT);
+ dir->signal(BNET_HEARTBEAT);
+ if (dir->is_stop()) {
+ break;
+ }
last_heartbeat = now;
}
}
- if (is_bnet_stop(sd)) {
- break;
- }
- if (n == 1) { /* input waiting */
- bnet_recv(sd); /* read it -- probably heartbeat from sd */
+ if (n == 1) { /* input waiting */
+ sd->recv(); /* read it -- probably heartbeat from sd */
+ if (sd->is_stop()) {
+ break;
+ }
if (sd->msglen <= 0) {
Dmsg1(100, "Got BNET_SIG %d from SD\n", sd->msglen);
} else {
Dmsg2(100, "Got %d bytes from SD. MSG=%s\n", sd->msglen, sd->msg);
}
}
- Dmsg2(000, "wait_intr=%d stop=%d\n", n, is_bnet_stop(sd));
+ Dmsg2(200, "wait_intr=%d stop=%d\n", n, sd->is_stop());
}
- bnet_close(sd);
- bnet_close(dir);
+ /*
+ * Note, since sd and dir are local dupped sockets, this
+ * is one place where we can call destroy().
+ */
+ sd->destroy();
+ dir->destroy();
jcr->hb_bsock = NULL;
+ jcr->hb_started = false;
jcr->hb_dir_bsock = NULL;
return NULL;
}
* it gives a constant stream of TIMEOUT_SIGNAL signals that
* make debugging impossible.
*/
- if (!no_signals) {
+ if (!no_signals && (me->heartbeat_interval > 0)) {
jcr->hb_bsock = NULL;
+ jcr->hb_started = false;
jcr->hb_dir_bsock = NULL;
pthread_create(&jcr->heartbeat_id, NULL, sd_heartbeat_thread, (void *)jcr);
}
return;
}
/* Wait max 10 secs for heartbeat thread to start */
- while (jcr->hb_bsock == NULL && cnt++ < 200) {
+ while (!jcr->hb_started && cnt++ < 200) {
bmicrosleep(0, 50000); /* wait for start */
}
- if (!jcr->hb_bsock) {
- }
- if (jcr->hb_bsock) {
- jcr->hb_bsock->timed_out = 1; /* set timed_out to terminate read */
- jcr->hb_bsock->terminated = 1; /* set to terminate read */
+ if (jcr->hb_started) {
+ jcr->hb_bsock->set_timed_out(); /* set timed_out to terminate read */
+ jcr->hb_bsock->set_terminated(); /* set to terminate read */
}
if (jcr->hb_dir_bsock) {
- jcr->hb_dir_bsock->timed_out = 1; /* set timed_out to terminate read */
- jcr->hb_dir_bsock->terminated = 1; /* set to terminate read */
+ jcr->hb_dir_bsock->set_timed_out(); /* set timed_out to terminate read */
+ jcr->hb_dir_bsock->set_terminated(); /* set to terminate read */
+ }
+ if (jcr->hb_started) {
+ Dmsg0(100, "Send kill to heartbeat id\n");
+ pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
+ bmicrosleep(0, 50000);
}
- pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
- bmicrosleep(0, 50000);
cnt = 0;
/* Wait max 100 secs for heartbeat thread to stop */
- while (jcr->hb_bsock && cnt++ < 200) {
+ while (jcr->hb_started && cnt++ < 200) {
pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */
bmicrosleep(0, 500000);
}
- if (jcr->hb_bsock) {
- }
}
/*
dir = dup_bsock(jcr->dir_bsock);
jcr->hb_bsock = dir;
+ jcr->hb_started = true;
+ dir->suppress_error_messages(true);
- for ( ; !is_bnet_stop(dir); ) {
+ while (!dir->is_stop()) {
time_t now, next;
now = time(NULL);
next = now - last_heartbeat;
if (next >= me->heartbeat_interval) {
- bnet_sig(dir, BNET_HEARTBEAT);
+ dir->signal(BNET_HEARTBEAT);
+ if (dir->is_stop()) {
+ break;
+ }
last_heartbeat = now;
}
+ /* This should never happen, but it might ... */
+ if (next <= 0) {
+ next = 1;
+ }
bmicrosleep(next, 0);
}
- bnet_close(dir);
+ dir->destroy();
jcr->hb_bsock = NULL;
+ jcr->hb_started = false;
return NULL;
}
*/
void start_dir_heartbeat(JCR *jcr)
{
- if (me->heartbeat_interval) {
+ if (!no_signals && (me->heartbeat_interval > 0)) {
+ jcr->dir_bsock->set_locking();
pthread_create(&jcr->heartbeat_id, NULL, dir_heartbeat_thread, (void *)jcr);
}
}
void stop_dir_heartbeat(JCR *jcr)
{
- if (me->heartbeat_interval) {
+ if (me->heartbeat_interval > 0) {
stop_heartbeat_monitor(jcr);
}
}