From: Kern Sibbald Date: Wed, 26 Mar 2008 20:51:14 +0000 (+0000) Subject: kes Implement bsock code to permit locking. Use the new code in Verify X-Git-Tag: Release-7.0.0~4787 X-Git-Url: https://git.sur5r.net/?a=commitdiff_plain;h=38e5a72046f9faca149f4f1b0e3d3dd120420c49;p=bacula%2Fbacula kes Implement bsock code to permit locking. Use the new code in Verify in the FD to prevent the heartbeat thread from disrupting the Verify data sent to the Dir. kes Modify SD locking to eliminate locking the reservations system from outside the system. Use the volume lock when getting media data and reserving a new volume at end of media. kes Add Eric's volume_unused() fix to stored.c so that Volumes found when SD starts are not locked into place. git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@6694 91ce42f0-d328-0410-95d8-f526ca767f89 --- diff --git a/bacula/kernstodo b/bacula/kernstodo index 24842f4f5a..e15506656f 100644 --- a/bacula/kernstodo +++ b/bacula/kernstodo @@ -70,6 +70,21 @@ Professional Needs: and http://www.openeyet.nl/scc/ for managing customer changes Priority: +> There's definitely something fishy in the recording of start and +> end blocks in the JOBMEDIA table. This is a snip from last night's +> incremental run (still using 2.2.8 plus the four published patches, plus +> my posted fix for the jobmedia patch): +> +> JmId | JID | MId | FI | LI | Sfile | Efile | Sblock | Eblock +>- 119 | 26 | 3 | 1 | 53 | 31 | 31 | 0 | 32 + 120 | 27 | 3 | 1 | 83 | 31 | 31 | 0 | 242 + 121 | 28 | 3 | 1 | 239 | 31 | 31 | 0 | 5683 +> +> Since I'm using spooling, those jobs should not be interspersed on +> tape. Still, at least it seems the error is in including too many +> blocks in the set that a job's files occupies, so if I understand +> correctly, it shouldn't cause any restore problems. :) + - Re-check new dcr->reserved_volume - Softlinks that point to non-existent file are not restored in restore all, but are restored if the file is individually selected. BUG! diff --git a/bacula/src/filed/heartbeat.c b/bacula/src/filed/heartbeat.c index 0d45d1dae2..e2ec35f343 100644 --- a/bacula/src/filed/heartbeat.c +++ b/bacula/src/filed/heartbeat.c @@ -1,17 +1,7 @@ -/* - * Bacula File Daemon heartbeat routines - * Listens for heartbeats coming from the SD - * If configured, sends heartbeats to Dir - * - * Kern Sibbald, May MMIII - * - * Version $Id$ - * - */ /* Bacula® - The Network Backup Solution - Copyright (C) 2003-2006 Free Software Foundation Europe e.V. + Copyright (C) 2003-2008 Free Software Foundation Europe e.V. The main author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. @@ -35,6 +25,16 @@ (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, Switzerland, email:ftf@fsfeurope.org. */ +/* + * Bacula File Daemon heartbeat routines + * Listens for heartbeats coming from the SD + * If configured, sends heartbeats to Dir + * + * Kern Sibbald, May MMIII + * + * Version $Id$ + * + */ #include "bacula.h" #include "filed.h" @@ -171,12 +171,12 @@ extern "C" void *dir_heartbeat_thread(void *arg) now = time(NULL); next = now - last_heartbeat; if (next >= me->heartbeat_interval) { - bnet_sig(dir, BNET_HEARTBEAT); + dir->signal(BNET_HEARTBEAT); last_heartbeat = now; } bmicrosleep(next, 0); } - bnet_close(dir); + dir->close(); jcr->hb_bsock = NULL; return NULL; } @@ -187,6 +187,7 @@ extern "C" void *dir_heartbeat_thread(void *arg) void start_dir_heartbeat(JCR *jcr) { if (me->heartbeat_interval) { + jcr->dir_bsock->set_locking(); pthread_create(&jcr->heartbeat_id, NULL, dir_heartbeat_thread, (void *)jcr); } } diff --git a/bacula/src/lib/bnet.c b/bacula/src/lib/bnet.c index 163e172a00..e1d1a5f1f0 100644 --- a/bacula/src/lib/bnet.c +++ b/bacula/src/lib/bnet.c @@ -689,7 +689,7 @@ BSOCK *init_bsock(JCR * jcr, int sockfd, const char *who, const char *host, int return bsock; } -BSOCK *dup_bsock(BSOCK * osock) +BSOCK *dup_bsock(BSOCK *osock) { BSOCK *bsock = (BSOCK *)malloc(sizeof(BSOCK)); memcpy(bsock, osock, sizeof(BSOCK)); diff --git a/bacula/src/lib/breg.c b/bacula/src/lib/breg.c index 6a3d33c7ae..c77ab200c5 100644 --- a/bacula/src/lib/breg.c +++ b/bacula/src/lib/breg.c @@ -1,15 +1,7 @@ -/* - * Manipulation routines for BREGEXP list - * - * Eric Bollengier, March 2007 - * - * Version $Id$ - * - */ /* Bacula® - The Network Backup Solution - Copyright (C) 2006-2006 Free Software Foundation Europe e.V. + Copyright (C) 2006-2008 Free Software Foundation Europe e.V. The main author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. @@ -28,11 +20,19 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - BaculaÂ® is a registered trademark of John Walker. + Bacula® is a registered trademark of John Walker. The licensor of Bacula is the Free Software Foundation Europe - (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 ZÃ¼rich, + (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zuerich, Switzerland, email:ftf@fsfeurope.org. */ +/* + * Manipulation routines for BREGEXP list + * + * Eric Bollengier, March 2007 + * + * Version $Id$ + * + */ #include "bacula.h" diff --git a/bacula/src/lib/bsock.c b/bacula/src/lib/bsock.c index 992d099113..63f90f5385 100644 --- a/bacula/src/lib/bsock.c +++ b/bacula/src/lib/bsock.c @@ -257,7 +257,34 @@ bool BSOCK::open(JCR *jcr, const char *name, char *host, char *service, return true; } +/* + * Force read/write to use locking + */ +bool BSOCK::set_locking() +{ + int stat; + if (m_use_locking) { + return true; /* already set */ + } + if ((stat = pthread_mutex_init(&m_mutex, NULL)) != 0) { + berrno be; + Jmsg(m_jcr, M_FATAL, 0, _("Could not init bsock mutex. ERR=%s\n"), + be.bstrerror(stat)); + return false; + } + m_use_locking = true; + return true; +} +void BSOCK::clear_locking() +{ + if (!m_use_locking) { + return; + } + m_use_locking = false; + pthread_mutex_destroy(&m_mutex); + return; +} /* * Send a message over the network. The send consists of @@ -272,10 +299,12 @@ bool BSOCK::send() int32_t rc; int32_t pktsiz; int32_t *hdr; + bool ok = true; if (errors || is_terminated() || msglen > 1000000) { return false; } + if (m_use_locking) P(m_mutex); /* Compute total packet length */ if (msglen <= 0) { pktsiz = sizeof(pktsiz); /* signal, no data */ @@ -316,9 +345,10 @@ bool BSOCK::send() _("Wrote %d bytes to %s:%s:%d, but only %d accepted.\n"), msglen, m_who, m_host, m_port, rc); } - return false; + ok = false; } - return true; + if (m_use_locking) V(m_mutex); + return ok; } /* @@ -380,6 +410,7 @@ int32_t BSOCK::recv() return BNET_HARDEOF; } + if (m_use_locking) P(m_mutex); read_seqno++; /* bump sequence number */ timer_start = watchdog_time; /* set start wait time */ clear_timed_out(); @@ -393,7 +424,8 @@ int32_t BSOCK::recv() b_errno = errno; } errors++; - return BNET_HARDEOF; /* assume hard EOF received */ + nbytes = BNET_HARDEOF; /* assume hard EOF received */ + goto get_out; } timer_start = 0; /* clear timer */ if (nbytes != sizeof(int32_t)) { @@ -401,16 +433,18 @@ int32_t BSOCK::recv() b_errno = EIO; Qmsg5(m_jcr, M_ERROR, 0, _("Read expected %d got %d from %s:%s:%d\n"), sizeof(int32_t), nbytes, m_who, m_host, m_port); - return BNET_ERROR; + nbytes = BNET_ERROR; + goto get_out; } pktsiz = ntohl(pktsiz); /* decode no. of bytes that follow */ if (pktsiz == 0) { /* No data transferred */ - timer_start = 0; /* clear timer */ + timer_start = 0; /* clear timer */ in_msg_no++; msglen = 0; - return 0; /* zero bytes read */ + nbytes = 0; /* zero bytes read */ + goto get_out; } /* If signal or packet size too big */ @@ -424,10 +458,11 @@ int32_t BSOCK::recv() if (pktsiz == BNET_TERMINATE) { set_terminated(); } - timer_start = 0; /* clear timer */ + timer_start = 0; /* clear timer */ b_errno = ENODATA; - msglen = pktsiz; /* signal code */ - return BNET_SIGNAL; /* signal */ + msglen = pktsiz; /* signal code */ + nbytes = BNET_SIGNAL; /* signal */ + goto get_out; } /* Make sure the buffer is big enough + one byte for EOS */ @@ -448,7 +483,8 @@ int32_t BSOCK::recv() errors++; Qmsg4(m_jcr, M_ERROR, 0, _("Read error from %s:%s:%d: ERR=%s\n"), m_who, m_host, m_port, this->bstrerror()); - return BNET_ERROR; + nbytes = BNET_ERROR; + goto get_out; } timer_start = 0; /* clear timer */ in_msg_no++; @@ -458,7 +494,8 @@ int32_t BSOCK::recv() errors++; Qmsg5(m_jcr, M_ERROR, 0, _("Read expected %d got %d from %s:%s:%d\n"), pktsiz, nbytes, m_who, m_host, m_port); - return BNET_ERROR; + nbytes = BNET_ERROR; + goto get_out; } /* always add a zero by to properly terminate any * string that was send to us. Note, we ensured above that the @@ -466,6 +503,9 @@ int32_t BSOCK::recv() */ msg[nbytes] = 0; /* terminate in case it is a string */ sm_check(__FILE__, __LINE__, false); + +get_out: + if (m_use_locking) V(m_mutex); return nbytes; /* return actual length of message */ } @@ -799,6 +839,9 @@ void BSOCK::close() BSOCK *bsock = this; BSOCK *next; + if (!m_duped) { + clear_locking(); + } for (; bsock; bsock = next) { next = bsock->m_next; /* get possible pointer to next before destoryed */ if (!bsock->m_duped) { diff --git a/bacula/src/lib/bsock.h b/bacula/src/lib/bsock.h index 37a1075b71..0205accd3b 100644 --- a/bacula/src/lib/bsock.h +++ b/bacula/src/lib/bsock.h @@ -54,6 +54,7 @@ class BSOCK { private: BSOCK *m_next; /* next BSOCK if duped */ JCR *m_jcr; /* jcr or NULL for error msgs */ + pthread_mutex_t m_mutex; /* for locking if use_locking set */ char *m_who; /* Name of daemon to which we are talking */ char *m_host; /* Host name/IP */ int m_port; /* desired port */ @@ -62,6 +63,7 @@ private: volatile bool m_terminated: 1; /* set when BNET_TERMINATE arrives */ bool m_duped: 1; /* set if duped BSOCK */ bool m_spool: 1; /* set for spooling */ + bool m_use_locking: 1; /* set to use locking */ void fin_init(JCR * jcr, int sockfd, const char *who, const char *host, int port, struct sockaddr *lclient_addr); @@ -112,6 +114,8 @@ public: int wait_data_intr(int sec); bool authenticate_director(const char *name, const char *password, TLS_CONTEXT *tls_ctx, char *msg, int msglen); + bool set_locking(); /* in bsock.c */ + void clear_locking(); /* in bsock.c */ /* Inline functions */ void set_jcr(JCR *jcr) { m_jcr = jcr; }; diff --git a/bacula/src/lib/workq.c b/bacula/src/lib/workq.c index 3c6d574766..3a13907ef3 100644 --- a/bacula/src/lib/workq.c +++ b/bacula/src/lib/workq.c @@ -1,7 +1,7 @@ /* Bacula® - The Network Backup Solution - Copyright (C) 2001-2007 Free Software Foundation Europe e.V. + Copyright (C) 2001-2008 Free Software Foundation Europe e.V. The main author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. diff --git a/bacula/src/stored/askdir.c b/bacula/src/stored/askdir.c index 03a5f84c29..0a576dab73 100644 --- a/bacula/src/stored/askdir.c +++ b/bacula/src/stored/askdir.c @@ -252,6 +252,7 @@ bool dir_find_next_appendable_volume(DCR *dcr) { JCR *jcr = dcr->jcr; BSOCK *dir = jcr->dir_bsock; + bool rtn; Dmsg2(200, "dir_find_next_appendable_volume: reserved=%d Vol=%s\n", dcr->reserved_device, dcr->VolumeName); @@ -261,7 +262,7 @@ bool dir_find_next_appendable_volume(DCR *dcr) * the most available could already be mounted on another * drive, so we continue looking for a not in use Volume. */ - lock_reservations(); + lock_volumes(); P(vol_info_mutex); dcr->volume_in_use = false; for (int vol_index=1; vol_index < 40; vol_index++) { @@ -271,8 +272,7 @@ bool dir_find_next_appendable_volume(DCR *dcr) unbash_spaces(dcr->media_type); unbash_spaces(dcr->pool_name); Dmsg1(100, ">dird %s", dir->msg); - bool ok = do_get_volume_info(dcr); - if (ok) { + if (do_get_volume_info(dcr)) { if (!is_volume_in_use(dcr)) { Dmsg1(100, "Call reserve_volume. Vol=%s\n", dcr->VolumeName); if (reserve_volume(dcr, dcr->VolumeName) == 0) { @@ -280,11 +280,10 @@ bool dir_find_next_appendable_volume(DCR *dcr) dcr->dev->print_name()); continue; } - V(vol_info_mutex); - unlock_reservations(); Dmsg1(100, "dir_find_next_appendable_volume return true. vol=%s\n", dcr->VolumeName); - return true; + rtn = true; + goto get_out; } else { Dmsg1(100, "Volume %s is in use.\n", dcr->VolumeName); dcr->volume_in_use = true; @@ -295,11 +294,13 @@ bool dir_find_next_appendable_volume(DCR *dcr) dcr->dev->print_name()); break; } - + rtn = false; dcr->VolumeName[0] = 0; + +get_out: V(vol_info_mutex); - unlock_reservations(); - return false; + unlock_volumes(); + return rtn; } diff --git a/bacula/src/stored/mount.c b/bacula/src/stored/mount.c index 0feccef489..08381715bd 100644 --- a/bacula/src/stored/mount.c +++ b/bacula/src/stored/mount.c @@ -557,6 +557,7 @@ void mark_volume_in_error(DCR *dcr) bstrncpy(dev->VolCatInfo.VolCatStatus, "Error", sizeof(dev->VolCatInfo.VolCatStatus)); Dmsg0(150, "dir_update_vol_info. Set Error.\n"); dir_update_volume_info(dcr, false, false); + volume_unused(dcr); } /* diff --git a/bacula/src/stored/reserve.c b/bacula/src/stored/reserve.c index 9afe9bd7d0..1624bc36e1 100644 --- a/bacula/src/stored/reserve.c +++ b/bacula/src/stored/reserve.c @@ -426,7 +426,6 @@ get_out: */ void switch_device(DCR *dcr, DEVICE *dev) { - // lock_reservations(); DCR save_dcr; dev->dlock(); @@ -608,28 +607,37 @@ void free_volume_list() bool is_volume_in_use(DCR *dcr) { - VOLRES *vol = find_volume(dcr); + bool rtn = false; + VOLRES *vol; + + lock_volumes(); + vol = find_volume(dcr); if (!vol) { Dmsg1(dbglvl, "Vol=%s not in use.\n", dcr->VolumeName); - return false; /* vol not in list */ + goto get_out; /* vol not in list */ } ASSERT(vol->dev != NULL); if (dcr->dev == vol->dev) { /* same device OK */ Dmsg1(dbglvl, "Vol=%s on same dev.\n", dcr->VolumeName); - return false; + goto get_out; } else { Dmsg3(dbglvl, "Vol=%s on %s we have %s\n", dcr->VolumeName, vol->dev->print_name(), dcr->dev->print_name()); } if (!vol->dev->is_busy()) { Dmsg2(dbglvl, "Vol=%s dev=%s not busy.\n", dcr->VolumeName, vol->dev->print_name()); - return false; + goto get_out; } else { Dmsg2(dbglvl, "Vol=%s dev=%s busy.\n", dcr->VolumeName, vol->dev->print_name()); } Dmsg2(dbglvl, "Vol=%s in use by %s.\n", dcr->VolumeName, vol->dev->print_name()); - return true; + rtn = true; + +get_out: + unlock_volumes(); + return rtn; + } diff --git a/bacula/src/stored/stored.c b/bacula/src/stored/stored.c index d878330cfb..94d466168d 100644 --- a/bacula/src/stored/stored.c +++ b/bacula/src/stored/stored.c @@ -515,6 +515,7 @@ void *device_initialization(void *arg) switch (read_dev_volume_label(dcr)) { case VOL_OK: memcpy(&dev->VolCatInfo, &dcr->VolCatInfo, sizeof(dev->VolCatInfo)); + volume_unused(dcr); /* mark volume "released" */ break; default: Jmsg1(NULL, M_WARNING, 0, _("Could not mount device %s\n"), dev->print_name()); diff --git a/bacula/src/version.h b/bacula/src/version.h index 9a6abf24fd..895bbb094e 100644 --- a/bacula/src/version.h +++ b/bacula/src/version.h @@ -4,8 +4,8 @@ #undef VERSION #define VERSION "2.3.14" -#define BDATE "24 March 2008" -#define LSMDATE "24Mar08" +#define BDATE "26 March 2008" +#define LSMDATE "26Mar08" #define PROG_COPYRIGHT "Copyright (C) %d-2008 Free Software Foundation Europe e.V.\n" #define BYEAR "2008" /* year for copyright messages in progs */ diff --git a/bacula/technotes-2.3 b/bacula/technotes-2.3 index f73f520e3c..441e011ea6 100644 --- a/bacula/technotes-2.3 +++ b/bacula/technotes-2.3 @@ -22,6 +22,15 @@ Add long term statistic job table General: +26Mar08 +kes Implement bsock code to permit locking. Use the new code in Verify + in the FD to prevent the heartbeat thread from disrupting the Verify + data sent to the Dir. +kes Modify SD locking to eliminate locking the reservations system from + outside the system. Use the volume lock when getting media data + and reserving a new volume at end of media. +kes Add Eric's volume_unused() fix to stored.c so that Volumes found + when SD starts are not locked into place. 24Mar08 kes Fix error compiling runscript code on 64 bit machines. kes Tweak Win32 mount point code.