From 54e4a6ac95a0aedd947722faec1269a3e2059e64 Mon Sep 17 00:00:00 2001 From: Kern Sibbald Date: Fri, 28 Sep 2007 08:16:16 +0000 Subject: [PATCH] More tweaks to the mount volume routines to get everything right. Changed variable name from find to have_vol, which is much easier to understand. kes Enhance btimer debug code. git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@5677 91ce42f0-d328-0410-95d8-f526ca767f89 --- bacula/src/lib/btimers.c | 30 +++++++++++++++++------------- bacula/src/stored/acquire.c | 14 +++++++------- bacula/src/stored/device.c | 4 ++-- bacula/src/stored/mount.c | 6 +++--- bacula/src/stored/protos.h | 2 +- bacula/src/version.h | 4 ++-- bacula/technotes-2.3 | 5 +++++ 7 files changed, 37 insertions(+), 28 deletions(-) diff --git a/bacula/src/lib/btimers.c b/bacula/src/lib/btimers.c index 420ed02030..7fb00bc3ef 100644 --- a/bacula/src/lib/btimers.c +++ b/bacula/src/lib/btimers.c @@ -35,6 +35,8 @@ #include "bacula.h" #include "jcr.h" +const int dbglvl = 900; + /* Forward referenced functions */ static void stop_btimer(btimer_t *wid); static btimer_t *btimer_start_common(uint32_t wait); @@ -71,7 +73,7 @@ btimer_t *start_child_timer(JCR *jcr, pid_t pid, uint32_t wait) wid->wd->interval = wait; register_watchdog(wid->wd); - Dmsg3(900, "Start child timer %p, pid %d for %d secs.\n", wid, pid, wait); + Dmsg3(dbglvl, "Start child timer %p, pid %d for %d secs.\n", wid, pid, wait); return wid; } @@ -81,10 +83,10 @@ btimer_t *start_child_timer(JCR *jcr, pid_t pid, uint32_t wait) void stop_child_timer(btimer_t *wid) { if (wid == NULL) { - Dmsg0(900, "stop_child_timer called with NULL btimer_id\n"); + Dmsg0(dbglvl, "stop_child_timer called with NULL btimer_id\n"); return; } - Dmsg2(900, "Stop child timer %p pid %d\n", wid, wid->pid); + Dmsg2(dbglvl, "Stop child timer %p pid %d\n", wid, wid->pid); stop_btimer(wid); } @@ -105,7 +107,7 @@ static void callback_child_timer(watchdog_t *self) /* First kill attempt; try killing it softly (kill -SONG) first */ wid->killed = true; - Dmsg2(050, "watchdog %p term PID %d\n", self, wid->pid); + Dmsg2(dbglvl, "watchdog %p term PID %d\n", self, wid->pid); /* Kill -TERM the specified PID, and reschedule a -KILL for 5 seconds * later. (Warning: this should let dvd-writepart enough time to term @@ -116,7 +118,7 @@ static void callback_child_timer(watchdog_t *self) self->interval = 5; } else { /* This is the second call - terminate with prejudice. */ - Dmsg2(050, "watchdog %p kill PID %d\n", self, wid->pid); + Dmsg2(dbglvl, "watchdog %p kill PID %d\n", self, wid->pid); kill(wid->pid, SIGKILL); @@ -125,7 +127,6 @@ static void callback_child_timer(watchdog_t *self) */ self->one_shot = true; } - Jmsg(wid->jcr, M_INFO, 0, _("Child timer expired. Child process killed.\n")); } /* @@ -139,7 +140,7 @@ btimer_t *start_thread_timer(JCR *jcr, pthread_t tid, uint32_t wait) btimer_t *wid; wid = btimer_start_common(wait); if (wid == NULL) { - Dmsg1(900, "start_thread_timer return NULL from common. wait=%d.\n", wait); + Dmsg1(dbglvl, "start_thread_timer return NULL from common. wait=%d.\n", wait); return NULL; } wid->type = TYPE_PTHREAD; @@ -151,7 +152,7 @@ btimer_t *start_thread_timer(JCR *jcr, pthread_t tid, uint32_t wait) wid->wd->interval = wait; register_watchdog(wid->wd); - Dmsg3(900, "Start thread timer %p tid %p for %d secs.\n", wid, tid, wait); + Dmsg3(dbglvl, "Start thread timer %p tid %p for %d secs.\n", wid, tid, wait); return wid; } @@ -179,7 +180,7 @@ btimer_t *start_bsock_timer(BSOCK *bsock, uint32_t wait) wid->wd->interval = wait; register_watchdog(wid->wd); - Dmsg4(950, "Start bsock timer %p tid=%p for %d secs at %d\n", wid, + Dmsg4(dbglvl, "Start bsock timer %p tid=%p for %d secs at %d\n", wid, wid->tid, wait, time(NULL)); return wid; @@ -194,7 +195,7 @@ void stop_bsock_timer(btimer_t *wid) Dmsg0(900, "stop_bsock_timer called with NULL btimer_id\n"); return; } - Dmsg3(950, "Stop bsock timer %p tid=%p at %d.\n", wid, wid->tid, time(NULL)); + Dmsg3(dbglvl, "Stop bsock timer %p tid=%p at %d.\n", wid, wid->tid, time(NULL)); stop_btimer(wid); } @@ -205,10 +206,10 @@ void stop_bsock_timer(btimer_t *wid) void stop_thread_timer(btimer_t *wid) { if (wid == NULL) { - Dmsg0(900, "stop_thread_timer called with NULL btimer_id\n"); + Dmsg0(dbglvl, "stop_thread_timer called with NULL btimer_id\n"); return; } - Dmsg2(900, "Stop thread timer %p tid=%p.\n", wid, wid->tid); + Dmsg2(dbglvl, "Stop thread timer %p tid=%p.\n", wid, wid->tid); stop_btimer(wid); } @@ -225,8 +226,11 @@ static void callback_thread_timer(watchdog_t *self) { btimer_t *wid = (btimer_t *)self->data; - Dmsg4(50, "thread timer %p kill %s tid=%p at %d.\n", self, + Dmsg4(dbglvl, "thread timer %p kill %s tid=%p at %d.\n", self, wid->type == TYPE_BSOCK ? "bsock" : "thread", wid->tid, time(NULL)); + if (wid->jcr) { + Dmsg2(dbglvl, "killed jid=%u Job=%s\n", wid->jcr->JobId, wid->jcr->Job); + } if (wid->type == TYPE_BSOCK && wid->bsock) { wid->bsock->set_timed_out(); diff --git a/bacula/src/stored/acquire.c b/bacula/src/stored/acquire.c index a3213b256d..20cac300a1 100644 --- a/bacula/src/stored/acquire.c +++ b/bacula/src/stored/acquire.c @@ -319,7 +319,7 @@ DCR *acquire_device_for_append(DCR *dcr) { bool do_mount = false; bool release = false; - bool find; + bool have_vol; DEVICE *dev = dcr->dev; JCR *jcr = dcr->jcr; @@ -342,7 +342,7 @@ DCR *acquire_device_for_append(DCR *dcr) * find defines whether or not mount_next_write_volume should * as the Director again about what Volume to use. */ - find = !is_suitable_volume_mounted(dcr); + have_vol = is_suitable_volume_mounted(dcr); if (dev->can_append()) { Dmsg0(190, "device already in append.\n"); /* @@ -357,7 +357,7 @@ DCR *acquire_device_for_append(DCR *dcr) * dcr->VolumeName is what we pass into the routines, or * get back from the subroutines. */ - if (!find && + if (!have_vol && !(dir_find_next_appendable_volume(dcr) && strcmp(dev->VolHdr.VolumeName, dcr->VolumeName) == 0)) { /* wrong tape mounted */ Dmsg2(190, "Wrong tape mounted: %s. wants:%s\n", dev->VolHdr.VolumeName, @@ -422,16 +422,16 @@ DCR *acquire_device_for_append(DCR *dcr) } } else { /* Not already in append mode, so mount the device */ - Dmsg2(190, "jid=%u Not in append mode, try mount find=%d\n", - (uint32_t)jcr->JobId, find); + Dmsg2(190, "jid=%u Not in append mode, try mount have_vol=%d\n", + (uint32_t)jcr->JobId, have_vol); ASSERT(dev->num_writers == 0); do_mount = true; } - if (do_mount) { + if (do_mount || !have_vol) { Dmsg1(190, "jid=%u Do mount_next_write_vol\n", (uint32_t)jcr->JobId); - bool mounted = mount_next_write_volume(dcr, find, release); + bool mounted = mount_next_write_volume(dcr, have_vol, release); if (!mounted) { if (!job_canceled(jcr)) { /* Reduce "noise" -- don't print if job canceled */ diff --git a/bacula/src/stored/device.c b/bacula/src/stored/device.c index eb79ab9318..473b3e7950 100644 --- a/bacula/src/stored/device.c +++ b/bacula/src/stored/device.c @@ -122,8 +122,8 @@ bool fixup_device_block_write_error(DCR *dcr) edit_uint64_with_commas(dev->VolCatInfo.VolCatBlocks, b2), bstrftime(dt, sizeof(dt), time(NULL))); - /* Called with find=true, release=true */ - if (!mount_next_write_volume(dcr, true, true)) { + /* Called with have_vol=false, release=true */ + if (!mount_next_write_volume(dcr, false, true)) { free_block(label_blk); dcr->block = block; dev->dlock(); diff --git a/bacula/src/stored/mount.c b/bacula/src/stored/mount.c index 9d0137b8b8..6733f9a6e8 100644 --- a/bacula/src/stored/mount.c +++ b/bacula/src/stored/mount.c @@ -60,7 +60,7 @@ enum { * impossible to get the requested Volume. * */ -bool mount_next_write_volume(DCR *dcr, bool find, bool release) +bool mount_next_write_volume(DCR *dcr, bool have_vol, bool release) { int retry = 0; bool ask = false, recycle, autochanger; @@ -108,7 +108,7 @@ mount_next_vol: * in dcr->VolCatInfo */ Dmsg0(200, "Before dir_find_next_appendable_volume.\n"); - if (find) { + if (!have_vol) { while (!dir_find_next_appendable_volume(dcr)) { Dmsg0(200, "not dir_find_next\n"); if (!dir_ask_sysop_to_create_appendable_volume(dcr)) { @@ -117,7 +117,7 @@ mount_next_vol: Dmsg0(200, "Again dir_find_next_append...\n"); } } else { - find = true; /* set true for next pass if any */ + have_vol = false; /* set false for next pass if any */ } if (job_canceled(jcr)) { return false; diff --git a/bacula/src/stored/protos.h b/bacula/src/stored/protos.h index 842a1493d6..77225f932d 100644 --- a/bacula/src/stored/protos.h +++ b/bacula/src/stored/protos.h @@ -182,7 +182,7 @@ BSR *find_next_bsr(BSR *root_bsr, DEVICE *dev); bool is_this_bsr_done(BSR *bsr, DEV_RECORD *rec); /* From mount.c */ -bool mount_next_write_volume(DCR *dcr, bool find, bool release); +bool mount_next_write_volume(DCR *dcr, bool have_vol, bool release); bool mount_next_read_volume(DCR *dcr); void mark_volume_in_error(DCR *dcr); diff --git a/bacula/src/version.h b/bacula/src/version.h index 04823a8c98..b9dd12b845 100644 --- a/bacula/src/version.h +++ b/bacula/src/version.h @@ -4,8 +4,8 @@ #undef VERSION #define VERSION "2.3.6" -#define BDATE "27 September 2007" -#define LSMDATE "27Sep07" +#define BDATE "28 September 2007" +#define LSMDATE "28Sep07" #define PROG_COPYRIGHT "Copyright (C) %d-2007 Free Software Foundation Europe e.V.\n" #define BYEAR "2007" /* year for copyright messages in progs */ diff --git a/bacula/technotes-2.3 b/bacula/technotes-2.3 index 17f7c2628e..b3d4a44b88 100644 --- a/bacula/technotes-2.3 +++ b/bacula/technotes-2.3 @@ -1,6 +1,11 @@ Technical notes on version 2.3 General: +28Sep07 +kes More tweaks to the mount volume routines to get everything + right. Changed variable name from find to have_vol, which is + much easier to understand. +kes Enhance btimer debug code. 27Sep07 kes Fix FD->SD authorization failure, which was due to spurious wakeups from a pthread_cond_timedwait(). Simply check the -- 2.39.5