X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Ffiled%2Fheartbeat.c;h=26ca962bc63ae58833e961cf52cc1fd1821d3e9e;hb=25f607f9bd00610e4cf7b72dd1808cbb23fc3632;hp=81a0e39f2ea182c963f0fdcfe386aef8b4f1ac67;hpb=9ce656ccfd75bfa6ec326d2ea6a3a68b3143e2b2;p=bacula%2Fbacula diff --git a/bacula/src/filed/heartbeat.c b/bacula/src/filed/heartbeat.c index 81a0e39f2e..26ca962bc6 100644 --- a/bacula/src/filed/heartbeat.c +++ b/bacula/src/filed/heartbeat.c @@ -1,3 +1,30 @@ +/* + Bacula® - The Network Backup Solution + + Copyright (C) 2003-2008 Free Software Foundation Europe e.V. + + The main author of Bacula is Kern Sibbald, with contributions from + many others, a complete list can be found in the file AUTHORS. + This program is Free Software; you can redistribute it and/or + modify it under the terms of version three of the GNU Affero General Public + License as published by the Free Software Foundation and included + in the file LICENSE. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + Bacula® is a registered trademark of Kern Sibbald. + The licensor of Bacula is the Free Software Foundation Europe + (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, + Switzerland, email:ftf@fsfeurope.org. +*/ /* * Bacula File Daemon heartbeat routines * Listens for heartbeats coming from the SD @@ -8,44 +35,17 @@ * Version $Id$ * */ -/* - Copyright (C) 2000-2004 Kern Sibbald and John Walker - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, - MA 02111-1307, USA. - - */ #include "bacula.h" #include "filed.h" -#if defined(HAVE_CYGWIN) || defined(HAVE_WIN32) -/* pthread_kill() dies on Cygwin, so disable it */ -#define pthread_kill(x, y) -/* Use shorter wait interval on Cygwin because no kill */ -#define WAIT_INTERVAL 10 - -#else /* Unix systems */ -#define WAIT_INTERVAL 60 -#endif +#define WAIT_INTERVAL 5 extern "C" void *sd_heartbeat_thread(void *arg); extern "C" void *dir_heartbeat_thread(void *arg); extern bool no_signals; -/* +/* * Listen on the SD socket for heartbeat signals. * Send heartbeats to the Director every HB_TIME * seconds. @@ -65,68 +65,90 @@ extern "C" void *sd_heartbeat_thread(void *arg) dir = dup_bsock(jcr->dir_bsock); jcr->hb_bsock = sd; + jcr->hb_started = true; + jcr->hb_dir_bsock = dir; /* Hang reading the socket to the SD, and every time we get - * a heartbeat or we get a wait timeout (1 minute), we - * check to see if we need to send a heartbeat to the - * Directory. + * a heartbeat or we get a wait timeout (1 minute), we + * check to see if we need to send a heartbeat to the + * Director. */ for ( ; !is_bnet_stop(sd); ) { n = bnet_wait_data_intr(sd, WAIT_INTERVAL); if (me->heartbeat_interval) { - now = time(NULL); - if (now-last_heartbeat >= me->heartbeat_interval) { - bnet_sig(dir, BNET_HEARTBEAT); - last_heartbeat = now; - } + now = time(NULL); + if (now-last_heartbeat >= me->heartbeat_interval) { + bnet_sig(dir, BNET_HEARTBEAT); + last_heartbeat = now; + } + } + if (n < 0 || is_bnet_stop(sd)) { + break; } - if (n == 1) { /* input waiting */ - bnet_recv(sd); /* read it -- probably heartbeat from sd */ - Dmsg1(100, "Got %d from SD\n", sd->msglen); + if (n == 1) { /* input waiting */ + sd->recv(); /* read it -- probably heartbeat from sd */ + if (sd->msglen <= 0) { + Dmsg1(100, "Got BNET_SIG %d from SD\n", sd->msglen); + } else { + Dmsg2(100, "Got %d bytes from SD. MSG=%s\n", sd->msglen, sd->msg); + } } + Dmsg2(100, "wait_intr=%d stop=%d\n", n, is_bnet_stop(sd)); } - bnet_close(sd); - bnet_close(dir); + sd->close(); + dir->close(); jcr->hb_bsock = NULL; + jcr->hb_started = false; + jcr->hb_dir_bsock = NULL; return NULL; } /* Startup the heartbeat thread -- see above */ void start_heartbeat_monitor(JCR *jcr) { - /* + /* * If no signals are set, do not start the heartbeat because * it gives a constant stream of TIMEOUT_SIGNAL signals that * make debugging impossible. */ if (!no_signals) { jcr->hb_bsock = NULL; + jcr->hb_started = false; + jcr->hb_dir_bsock = NULL; pthread_create(&jcr->heartbeat_id, NULL, sd_heartbeat_thread, (void *)jcr); } } /* Terminate the heartbeat thread. Used for both SD and DIR */ -void stop_heartbeat_monitor(JCR *jcr) +void stop_heartbeat_monitor(JCR *jcr) { int cnt = 0; if (no_signals) { return; } /* Wait max 10 secs for heartbeat thread to start */ - while (jcr->hb_bsock == NULL && cnt++ < 200) { - bmicrosleep(0, 50); /* avoid race */ + while (!jcr->hb_started && cnt++ < 200) { + bmicrosleep(0, 50000); /* wait for start */ } - if (jcr->hb_bsock) { - jcr->hb_bsock->timed_out = 1; /* set timed_out to terminate read */ - jcr->hb_bsock->terminated = 1; /* set to terminate read */ + if (jcr->hb_started) { + jcr->hb_bsock->set_timed_out(); /* set timed_out to terminate read */ + jcr->hb_bsock->set_terminated(); /* set to terminate read */ + } + if (jcr->hb_dir_bsock) { + jcr->hb_dir_bsock->set_timed_out(); /* set timed_out to terminate read */ + jcr->hb_dir_bsock->set_terminated(); /* set to terminate read */ + } + if (jcr->hb_started) { + Dmsg0(100, "Send kill to heartbeat id\n"); + pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */ + bmicrosleep(0, 50000); } cnt = 0; /* Wait max 100 secs for heartbeat thread to stop */ - while (jcr->hb_bsock && cnt++ < 200) { - /* Naturally, Cygwin 1.3.20 craps out on the following */ - pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */ - bmicrosleep(0, 500); + while (jcr->hb_started && cnt++ < 200) { + pthread_kill(jcr->heartbeat_id, TIMEOUT_SIGNAL); /* make heartbeat thread go away */ + bmicrosleep(0, 500000); } } @@ -147,6 +169,7 @@ extern "C" void *dir_heartbeat_thread(void *arg) dir = dup_bsock(jcr->dir_bsock); jcr->hb_bsock = dir; + jcr->hb_started = true; for ( ; !is_bnet_stop(dir); ) { time_t now, next; @@ -154,13 +177,14 @@ extern "C" void *dir_heartbeat_thread(void *arg) now = time(NULL); next = now - last_heartbeat; if (next >= me->heartbeat_interval) { - bnet_sig(dir, BNET_HEARTBEAT); - last_heartbeat = now; + dir->signal(BNET_HEARTBEAT); + last_heartbeat = now; } bmicrosleep(next, 0); } - bnet_close(dir); + dir->close(); jcr->hb_bsock = NULL; + jcr->hb_started = false; return NULL; } @@ -170,6 +194,7 @@ extern "C" void *dir_heartbeat_thread(void *arg) void start_dir_heartbeat(JCR *jcr) { if (me->heartbeat_interval) { + jcr->dir_bsock->set_locking(); pthread_create(&jcr->heartbeat_id, NULL, dir_heartbeat_thread, (void *)jcr); } }