From b8224aab234012c2d127b84eceb160e99dd4a14d Mon Sep 17 00:00:00 2001 From: Kern Sibbald Date: Sat, 14 Apr 2007 15:06:02 +0000 Subject: [PATCH] kes Simplify locking in the reservations system. kes Add more debug code in reservations. kes Make sure error condition on reserving a volume is handled correctly. kes Correct handling of volume_in_use. kes Correct handling of initializing a device. kes Move handling of broadcasting releasing a device into release_device(). kes Correct attaching dcr to dev so that it is only done if device is properly initiated. git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@4547 91ce42f0-d328-0410-95d8-f526ca767f89 --- bacula/projects | 28 ++++++++++++++++++++++++++++ bacula/src/stored/acquire.c | 13 ++++++++++--- bacula/src/stored/askdir.c | 10 ++++++++-- bacula/src/stored/dev.c | 1 + bacula/src/stored/dev.h | 1 + bacula/src/stored/dircmd.c | 4 ++++ bacula/src/stored/label.c | 14 ++++++++++++-- bacula/src/stored/reserve.c | 23 ++++++++--------------- bacula/src/stored/spool.c | 2 +- bacula/src/stored/stored.c | 1 + bacula/src/stored/wait.c | 15 +++++++++------ bacula/src/version.h | 4 ++-- bacula/technotes-2.1 | 9 +++++++++ 13 files changed, 94 insertions(+), 31 deletions(-) diff --git a/bacula/projects b/bacula/projects index 53bc0086a8..fdd6664865 100644 --- a/bacula/projects +++ b/bacula/projects @@ -1225,6 +1225,34 @@ Item n: Implement NDMP protocol support Notes (Kern): I am not at all in favor of this until NDMP becomes an Open Standard or until there are Open Source libraries that interface to it. + +Item n: make changing "spooldata=yes|no" possible for + manual/interactive jobs + +Origin: Marc Schiffbauer + +Date: 12 April 2007) + +Status: NEW + +What: Make it possible to modify the spooldata option + for a job when being run from within the console. + Currently it is possible to modify the backup level + and the spooldata setting in a Schedule resource. + It is also possible to modify the backup level when using + the "run" command in the console. + But it is currently not possible to to the same + with "spooldata=yes|no" like: + + run job=MyJob level=incremental spooldata=yes + +Why: In some situations it would be handy to be able to switch + spooldata on or off for interactive/manual jobs based on + which data the admin expects or how fast the LAN/WAN + connection currently is. + +Notes: ./. + ============= Empty Feature Request form =========== Item n: One line summary ... Date: Date submitted diff --git a/bacula/src/stored/acquire.c b/bacula/src/stored/acquire.c index 8eac9dbc87..8a312307a3 100644 --- a/bacula/src/stored/acquire.c +++ b/bacula/src/stored/acquire.c @@ -554,6 +554,9 @@ bool release_device(DCR *dcr) Dmsg1(400, "alert status=%d\n", status); free_pool_memory(alert); } + pthread_cond_broadcast(&dev->wait_next_vol); + Dmsg1(100, "JobId=%u broadcast wait_device_release\n", (uint32_t)jcr->JobId); + pthread_cond_broadcast(&wait_device_release); dcr->dev_locked = false; /* set no longer locked */ dev->dunlock(); if (jcr->read_dcr == dcr) { @@ -563,6 +566,8 @@ bool release_device(DCR *dcr) jcr->dcr = NULL; } free_dcr(dcr); + Dmsg2(100, "===== Device %s released by JobId=%u\n", dev->print_name(), + (uint32_t)jcr->JobId); return ok; } @@ -572,6 +577,7 @@ bool release_device(DCR *dcr) */ DCR *new_dcr(JCR *jcr, DEVICE *dev) { + if (jcr) Dmsg2(100, "enter new_dcr JobId=%u dev=%p\n", (uint32_t)jcr->JobId, dev); DCR *dcr = (DCR *)malloc(sizeof(DCR)); memset(dcr, 0, sizeof(DCR)); dcr->jcr = jcr; @@ -619,14 +625,17 @@ static void attach_dcr_to_dev(DCR *dcr) DEVICE *dev = dcr->dev; JCR *jcr = dcr->jcr; - if (!dcr->attached_to_dev && dev->is_open() && jcr && jcr->JobType != JT_SYSTEM) { + if (jcr) Dmsg1(500, "JobId=%u enter attach_dcr_to_dev\n", (uint32_t)jcr->JobId); + if (!dcr->attached_to_dev && dev->initiated && jcr && jcr->JobType != JT_SYSTEM) { dev->attached_dcrs->append(dcr); /* attach dcr to device */ dcr->attached_to_dev = true; + Dmsg1(500, "JobId=%u attach_dcr_to_dev\n", (uint32_t)jcr->JobId); } } void detach_dcr_from_dev(DCR *dcr) { + Dmsg1(500, "JobId=%u enter detach_dcr_from_dev\n", (uint32_t)dcr->jcr->JobId); unreserve_device(dcr); /* Detach this dcr only if attached */ @@ -635,8 +644,6 @@ void detach_dcr_from_dev(DCR *dcr) dcr->attached_to_dev = false; // remove_dcr_from_dcrs(dcr); /* remove dcr from jcr list */ } - pthread_cond_broadcast(&dcr->dev->wait_next_vol); - pthread_cond_broadcast(&wait_device_release); } /* diff --git a/bacula/src/stored/askdir.c b/bacula/src/stored/askdir.c index 3f728742b4..1e0d8c9346 100644 --- a/bacula/src/stored/askdir.c +++ b/bacula/src/stored/askdir.c @@ -268,7 +268,7 @@ bool dir_find_next_appendable_volume(DCR *dcr) Dmsg1(100, ">dird: %s", dir->msg); bool ok = do_get_volume_info(dcr); if (ok) { - if (dcr->any_volume || !is_volume_in_use(dcr)) { + if (!is_volume_in_use(dcr)) { found = true; break; } else { @@ -284,11 +284,17 @@ bool dir_find_next_appendable_volume(DCR *dcr) } if (found) { Dmsg0(400, "dir_find_next_appendable_volume return true\n"); - reserve_volume(dcr, dcr->VolumeName); /* reserve volume */ + if (reserve_volume(dcr, dcr->VolumeName) == 0) { + Dmsg2(100, "Could not reserve volume %s on %s\n", dcr->VolumeName, + dcr->dev->print_name()); + goto bail_out; + } V(vol_info_mutex); unlock_reservations(); return true; } + +bail_out: dcr->VolumeName[0] = 0; V(vol_info_mutex); unlock_reservations(); diff --git a/bacula/src/stored/dev.c b/bacula/src/stored/dev.c index 802b0647a9..b7e286da2e 100644 --- a/bacula/src/stored/dev.c +++ b/bacula/src/stored/dev.c @@ -254,6 +254,7 @@ init_dev(JCR *jcr, DEVRES *device) dev->clear_opened(); dev->attached_dcrs = New(dlist(dcr, &dcr->dev_link)); Dmsg2(29, "init_dev: tape=%d dev_name=%s\n", dev->is_tape(), dev->dev_name); + dev->initiated = true; return dev; } diff --git a/bacula/src/stored/dev.h b/bacula/src/stored/dev.h index 0fa2847e7a..a77b8e57ad 100644 --- a/bacula/src/stored/dev.h +++ b/bacula/src/stored/dev.h @@ -238,6 +238,7 @@ public: int openmode; /* parameter passed to open_dev (useful to reopen the device) */ int dev_type; /* device type */ bool autoselect; /* Autoselect in autochanger */ + bool initiated; /* set when init_dev() called */ int label_type; /* Bacula/ANSI/IBM label types */ uint32_t drive_index; /* Autochanger drive index (base 0) */ int32_t Slot; /* Slot currently in drive (base 1) */ diff --git a/bacula/src/stored/dircmd.c b/bacula/src/stored/dircmd.c index 77ff63e058..d1ef1d4d30 100644 --- a/bacula/src/stored/dircmd.c +++ b/bacula/src/stored/dircmd.c @@ -307,10 +307,12 @@ static bool cancel_cmd(JCR *cjcr) /* If thread waiting on mount, wake him */ if (jcr->dcr && jcr->dcr->dev && jcr->dcr->dev->waiting_for_mount()) { pthread_cond_broadcast(&jcr->dcr->dev->wait_next_vol); + Dmsg1(100, "JobId=%u broadcast wait_device_release\n", (uint32_t)jcr->JobId); pthread_cond_broadcast(&wait_device_release); } if (jcr->read_dcr && jcr->read_dcr->dev && jcr->read_dcr->dev->waiting_for_mount()) { pthread_cond_broadcast(&jcr->read_dcr->dev->wait_next_vol); + Dmsg1(100, "JobId=%u broadcast wait_device_release\n", (uint32_t)jcr->JobId); pthread_cond_broadcast(&wait_device_release); } Jmsg(jcr, M_INFO, 0, _("Job %s marked to be canceled.\n"), jcr->Job); @@ -650,6 +652,7 @@ static bool mount_cmd(JCR *jcr) bnet_fsend(dir, "3001 OK mount. Device=%s\n", dev->print_name()); pthread_cond_broadcast(&dev->wait_next_vol); + Dmsg1(100, "JobId=%u broadcast wait_device_release\n", (uint32_t)dcr->jcr->JobId); pthread_cond_broadcast(&wait_device_release); break; @@ -689,6 +692,7 @@ static bool mount_cmd(JCR *jcr) dev->print_name()); } pthread_cond_broadcast(&dev->wait_next_vol); + Dmsg1(100, "JobId=%u broadcast wait_device_release\n", (uint32_t)dcr->jcr->JobId); pthread_cond_broadcast(&wait_device_release); break; diff --git a/bacula/src/stored/label.c b/bacula/src/stored/label.c index d99ab2c744..746516cce5 100644 --- a/bacula/src/stored/label.c +++ b/bacula/src/stored/label.c @@ -213,7 +213,12 @@ int read_dev_volume_label(DCR *dcr) } dev->set_labeled(); /* set has Bacula label */ - reserve_volume(dcr, dev->VolHdr.VolumeName); + if (reserve_volume(dcr, dev->VolHdr.VolumeName) == NULL) { + Mmsg2(jcr->errmsg, _("Could not reserve volume %s on %s\n"), + dev->VolHdr.VolumeName, dev->print_name()); + stat = VOL_NAME_ERROR; + goto bail_out; + } /* Compare Volume Names */ Dmsg2(30, "Compare Vol names: VolName=%s hdr=%s\n", VolName?VolName:"*", dev->VolHdr.VolumeName); @@ -393,7 +398,12 @@ bool write_new_volume_label_to_dev(DCR *dcr, const char *VolName, if (debug_level >= 20) { dump_volume_label(dev); } - reserve_volume(dcr, VolName); + if (reserve_volume(dcr, VolName) == NULL) { + Mmsg2(dcr->jcr->errmsg, _("Could not reserve volume %s on %s\n"), + dev->VolHdr.VolumeName, dev->print_name()); + goto bail_out; + } + dev->clear_append(); /* remove append since this is PRE_LABEL */ return true; diff --git a/bacula/src/stored/reserve.c b/bacula/src/stored/reserve.c index 793f2a0b2b..2b7797b801 100644 --- a/bacula/src/stored/reserve.c +++ b/bacula/src/stored/reserve.c @@ -151,7 +151,7 @@ static void debug_list_volumes(const char *imsg, bool do_lock) for (vol=(VOLRES *)vol_list->first(); vol; vol=(VOLRES *)vol_list->next(vol)) { if (vol->dev == dev) { Dmsg0(000, "Two Volumes on same device.\n"); - ASSERT(1); + ASSERT(0); dev = vol->dev; } } @@ -264,7 +264,7 @@ static void free_vol_item(VOLRES *vol) * * Return: VOLRES entry on success - * NULL error + * NULL volume busy on another drive */ VOLRES *reserve_volume(DCR *dcr, const char *VolumeName) { @@ -279,7 +279,7 @@ VOLRES *reserve_volume(DCR *dcr, const char *VolumeName) * when adding a new volume that no newly scheduled * job can reserve it. */ - lock_reservations(); +// lock_reservations(); P(vol_list_lock); debug_list_volumes("begin reserve_volume", debug_nolock); /* @@ -331,18 +331,17 @@ VOLRES *reserve_volume(DCR *dcr, const char *VolumeName) /* Caller wants to switch Volume to another device */ if (!vol->dev->is_busy()) { /* OK to move it -- I'm not sure this will work */ - Dmsg3(100, "Swap vol=%s from dev=%s to %s\n", VolumeName, - dev->print_name(), dev->print_name()); + Dmsg3(100, "==== Swap vol=%s from dev=%s to %s\n", VolumeName, + vol->dev->print_name(), dev->print_name()); vol->dev->vol = NULL; /* take vol from old drive */ vol->dev->VolHdr.VolumeName[0] = 0; vol->dev = dev; /* point vol at new drive */ dev->vol = vol; /* point dev at vol */ dev->VolHdr.VolumeName[0] = 0; } else { + Dmsg3(100, "Volume busy could not swap vol=%s from dev=%s to %s\n", VolumeName, + vol->dev->print_name(), dev->print_name()); vol = NULL; /* device busy */ - Dmsg3(100, "Logic ERROR!!!! could not swap vol=%s from dev=%s to %s\n", VolumeName, - dev->print_name(), dcr->dev->print_name()); - ASSERT(1); /* blow up!!! */ } } } @@ -351,7 +350,7 @@ VOLRES *reserve_volume(DCR *dcr, const char *VolumeName) get_out: debug_list_volumes("end new volume", debug_nolock); V(vol_list_lock); - unlock_reservations(); +// unlock_reservations(); return vol; } @@ -942,10 +941,7 @@ static bool reserve_device_for_read(DCR *dcr) ASSERT(dcr); - /* Get locks in correct order */ - unlock_reservations(); dev->dlock(); - lock_reservations(); if (is_device_unmounted(dev)) { Dmsg1(200, "Device %s is BLOCKED due to user unmount.\n", dev->print_name()); @@ -1001,10 +997,7 @@ static bool reserve_device_for_append(DCR *dcr, RCTX &rctx) ASSERT(dcr); - /* Get locks in correct order */ - unlock_reservations(); dev->dlock(); - lock_reservations(); /* If device is being read, we cannot write it */ if (dev->can_read()) { diff --git a/bacula/src/stored/spool.c b/bacula/src/stored/spool.c index cb2b16ea4d..bf57c08ad6 100644 --- a/bacula/src/stored/spool.c +++ b/bacula/src/stored/spool.c @@ -253,7 +253,7 @@ static bool despool_data(DCR *dcr, bool commit) rdev->max_block_size = dcr->dev->max_block_size; rdev->min_block_size = dcr->dev->min_block_size; rdev->device = dcr->dev->device; - rdcr = new_dcr(NULL, rdev); + rdcr = new_dcr(jcr, rdev); rdcr->spool_fd = dcr->spool_fd; rdcr->jcr = jcr; /* set a valid jcr */ block = dcr->block; /* save block */ diff --git a/bacula/src/stored/stored.c b/bacula/src/stored/stored.c index efe7c5a416..dbce1a6b66 100644 --- a/bacula/src/stored/stored.c +++ b/bacula/src/stored/stored.c @@ -558,6 +558,7 @@ void terminate_stored(int sig) /* ***FIXME*** wiffle through all dcrs */ if (jcr->dcr && jcr->dcr->dev && jcr->dcr->dev->blocked()) { pthread_cond_broadcast(&jcr->dcr->dev->wait_next_vol); + Dmsg1(100, "JobId=%u broadcast wait_device_release\n", (uint32_t)jcr->JobId); pthread_cond_broadcast(&wait_device_release); } if (jcr->read_dcr && jcr->read_dcr->dev && jcr->read_dcr->dev->blocked()) { diff --git a/bacula/src/stored/wait.c b/bacula/src/stored/wait.c index cd034ef1f5..65535d5a92 100644 --- a/bacula/src/stored/wait.c +++ b/bacula/src/stored/wait.c @@ -191,7 +191,10 @@ int wait_for_sysop(DCR *dcr) /* * Wait for any device to be released, then we return, so - * higher level code can rescan possible devices. + * higher level code can rescan possible devices. Since there + * could be a job waiting for a drive to free up, we wait a maximum + * of 1 minute then retry just in case a broadcast was lost, and + * we return to rescan the devices. * * Returns: true if a device has changed state * false if the total wait time has expired. @@ -203,7 +206,7 @@ bool wait_for_device(JCR *jcr, bool first) struct timespec timeout; int stat = 0; bool ok = true; - const int wait_time = 5 * 60; /* wait 5 minutes */ + const int max_wait_time = 1 * 60; /* wait 1 minute */ Dmsg0(100, "Enter wait_for_device\n"); P(device_release_mutex); @@ -214,17 +217,17 @@ bool wait_for_device(JCR *jcr, bool first) gettimeofday(&tv, &tz); timeout.tv_nsec = tv.tv_usec * 1000; - timeout.tv_sec = tv.tv_sec + wait_time; + timeout.tv_sec = tv.tv_sec + max_wait_time; - Dmsg0(100, "I'm going to wait for a device.\n"); + Dmsg1(100, "JobId=%u going to wait for a device.\n", (uint32_t)jcr->JobId); /* Wait required time */ stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout); - Dmsg1(100, "Wokeup from sleep on device stat=%d\n", stat); + Dmsg2(100, "JobId=%u wokeup from sleep on device stat=%d\n", (uint32_t)jcr->JobId, stat); V(device_release_mutex); - Dmsg1(100, "Return from wait_device ok=%d\n", ok); + Dmsg2(100, "JobId=%u return from wait_device ok=%d\n", (uint32_t)jcr->JobId, ok); return ok; } diff --git a/bacula/src/version.h b/bacula/src/version.h index f0c5028c58..474be67b4f 100644 --- a/bacula/src/version.h +++ b/bacula/src/version.h @@ -4,8 +4,8 @@ #undef VERSION #define VERSION "2.1.8" -#define BDATE "12 April 2007" -#define LSMDATE "12Apr07" +#define BDATE "14 April 2007" +#define LSMDATE "14Apr07" #define PROG_COPYRIGHT "Copyright (C) %d-2007 Free Software Foundation Europe e.V.\n" #define BYEAR "2007" /* year for copyright messages in progs */ diff --git a/bacula/technotes-2.1 b/bacula/technotes-2.1 index 2172a3a94b..fc74edd7ac 100644 --- a/bacula/technotes-2.1 +++ b/bacula/technotes-2.1 @@ -1,6 +1,15 @@ Technical notes on version 2.1 General: +14Apr07 +kes Simplify locking in the reservations system. +kes Add more debug code in reservations. +kes Make sure error condition on reserving a volume is handled correctly. +kes Correct handling of volume_in_use. +kes Correct handling of initializing a device. +kes Move handling of broadcasting releasing a device into release_device(). +kes Correct attaching dcr to dev so that it is only done if device + is properly initiated. 12Apr07 kes Locking debug level tweaks in SD. kes Tweak new Volume code. -- 2.39.5