From: Kern Sibbald Date: Mon, 18 Feb 2013 18:20:46 +0000 (+0100) Subject: Refactor lock_volumes so most lock a vol rather than globally X-Git-Url: https://git.sur5r.net/?a=commitdiff_plain;h=7f70d7958a08002d58f13f1b27aa1777c2d197c6;p=bacula%2Fbacula Refactor lock_volumes so most lock a vol rather than globally --- diff --git a/bacula/src/lib/mutex_list.h b/bacula/src/lib/mutex_list.h index b3f51a2ef3..2f07dd589b 100644 --- a/bacula/src/lib/mutex_list.h +++ b/bacula/src/lib/mutex_list.h @@ -36,7 +36,7 @@ #define PRIO_SD_DEV_ACQUIRE 4 /* dev.acquire_mutex */ #define PRIO_SD_DEV_ACCESS 5 /* dev.m_mutex */ -#define PRIO_SD_VOL_LIST 10 /* vol_list_lock */ +#define PRIO_SD_VOL_LIST 0 /* vol_list_lock */ #define PRIO_SD_READ_VOL_LIST 12 /* read_vol_list */ #define PRIO_SD_DEV_SPOOL 14 /* dev.spool_mutex */ #define PRIO_SD_ACH_ACCESS 16 /* autochanger lock mutex */ diff --git a/bacula/src/stored/acquire.c b/bacula/src/stored/acquire.c index c8db2956d8..ef31f74f62 100644 --- a/bacula/src/stored/acquire.c +++ b/bacula/src/stored/acquire.c @@ -535,6 +535,7 @@ bool release_device(DCR *dcr) dev->close(); free_volume(dev); } + unlock_volumes(); /* Fire off Alert command and include any output */ if (!job_canceled(jcr) && dcr->device->alert_command) { @@ -567,7 +568,6 @@ bool release_device(DCR *dcr) Dmsg2(100, "JobId=%u broadcast wait_device_release at %s\n", (uint32_t)jcr->JobId, bstrftimes(tbuf, sizeof(tbuf), (utime_t)time(NULL))); pthread_cond_broadcast(&wait_device_release); - unlock_volumes(); /* * If we are the thread that blocked the device, then unblock it diff --git a/bacula/src/stored/dev.h b/bacula/src/stored/dev.h index 96837b7e4c..c7a01cfbf9 100644 --- a/bacula/src/stored/dev.h +++ b/bacula/src/stored/dev.h @@ -631,33 +631,6 @@ public: }; -/* - * Volume reservation class -- see reserve.c - */ -class VOLRES { - bool m_swapping; /* set when swapping to another drive */ - bool m_in_use; /* set when volume reserved or in use */ - int32_t m_slot; /* slot of swapping volume */ - uint32_t m_JobId; /* JobId for read volumes */ -public: - dlink link; - char *vol_name; /* Volume name */ - DEVICE *dev; /* Pointer to device to which we are attached */ - - bool is_swapping() const { return m_swapping; }; - void set_swapping() { m_swapping = true; }; - void clear_swapping() { m_swapping = false; }; - bool is_in_use() const { return m_in_use; }; - void set_in_use() { m_in_use = true; }; - void clear_in_use() { m_in_use = false; }; - void set_slot(int32_t slot) { m_slot = slot; }; - void clear_slot() { m_slot = -1; }; - int32_t get_slot() const { return m_slot; }; - uint32_t get_jobid() const { return m_JobId; }; - void set_jobid(uint32_t JobId) { m_JobId = JobId; }; -}; - - /* Get some definition of function to position * to the end of the medium in MTEOM. System * dependent. Arrgggg! diff --git a/bacula/src/stored/mount.c b/bacula/src/stored/mount.c index dc1f38bc19..a825123ba7 100644 --- a/bacula/src/stored/mount.c +++ b/bacula/src/stored/mount.c @@ -37,6 +37,8 @@ #include "bacula.h" /* pull in global headers */ #include "stored.h" /* pull in Storage Deamon headers */ +static pthread_mutex_t mount_mutex = PTHREAD_MUTEX_INITIALIZER; + enum { try_next_vol = 1, try_read_vol, @@ -77,27 +79,27 @@ bool DCR::mount_next_write_volume() dev->print_name()); init_device_wait_timers(dcr); + + P(mount_mutex); /* * Attempt to mount the next volume. If something non-fatal goes * wrong, we come back here to re-try (new op messages, re-read * Volume, ...) */ - lock_volumes(); - mount_next_vol: Dmsg1(150, "mount_next_vol retry=%d\n", retry); /* Ignore retry if this is poll request */ if (retry++ > 4) { /* Last ditch effort before giving up, force operator to respond */ VolCatInfo.Slot = 0; - unlock_volumes(); + V(mount_mutex); if (!dir_ask_sysop_to_mount_volume(dcr, ST_APPEND)) { Jmsg(jcr, M_FATAL, 0, _("Too many errors trying to mount device %s.\n"), dev->print_name()); goto no_lock_bail_out; } - lock_volumes(); + P(mount_mutex); Dmsg1(150, "Continue after dir_ask_sysop_to_mount. must_load=%d\n", dev->must_load()); } if (job_canceled(jcr)) { @@ -162,13 +164,13 @@ mount_next_vol: Dmsg2(250, "Ask=%d autochanger=%d\n", ask, autochanger); if (ask) { - unlock_volumes(); + V(mount_mutex); dcr->setVolCatInfo(false); /* out of date when Vols unlocked */ if (!dir_ask_sysop_to_mount_volume(dcr, ST_APPEND)) { Dmsg0(150, "Error return ask_sysop ...\n"); goto no_lock_bail_out; } - lock_volumes(); + P(mount_mutex); } if (job_canceled(jcr)) { goto bail_out; @@ -306,11 +308,11 @@ read_volume: Dmsg1(150, "set APPEND, normal return from mount_next_write_volume. dev=%s\n", dev->print_name()); - unlock_volumes(); + V(mount_mutex); return true; bail_out: - unlock_volumes(); + V(mount_mutex); no_lock_bail_out: return false; @@ -320,7 +322,8 @@ no_lock_bail_out: * This routine is meant to be called once the first pass * to ensure that we have a candidate volume to mount. * Otherwise, we ask the sysop to created one. - * Note, the the Volumes are locked on entry and exit. + * Note, mount_mutex is already locked on entry and thus + * must remain locked on exit from this function. */ bool DCR::find_a_volume() { @@ -344,12 +347,12 @@ bool DCR::find_a_volume() if (job_canceled(jcr)) { return false; } - unlock_volumes(); + V(mount_mutex); if (!dir_ask_sysop_to_create_appendable_volume(dcr)) { - lock_volumes(); + P(mount_mutex); return false; } - lock_volumes(); + P(mount_mutex); if (job_canceled(jcr)) { return false; } diff --git a/bacula/src/stored/reserve.c b/bacula/src/stored/reserve.c index 4e9754ad3a..10cc4f8da2 100644 --- a/bacula/src/stored/reserve.c +++ b/bacula/src/stored/reserve.c @@ -145,7 +145,6 @@ void DCR::clear_reserved() void DCR::unreserve_device() { dev->Lock(); - lock_volumes(); if (is_reserved()) { clear_reserved(); reserved_volume = false; @@ -162,7 +161,6 @@ void DCR::unreserve_device() volume_unused(this); } } - unlock_volumes(); dev->Unlock(); } diff --git a/bacula/src/stored/stored.h b/bacula/src/stored/stored.h index 7d0fe50fb5..2af78c4dca 100644 --- a/bacula/src/stored/stored.h +++ b/bacula/src/stored/stored.h @@ -1,7 +1,7 @@ /* Bacula® - The Network Backup Solution - Copyright (C) 2000-2011 Free Software Foundation Europe e.V. + Copyright (C) 2000-2013 Free Software Foundation Europe e.V. The main author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. @@ -28,7 +28,6 @@ /* * Storage daemon specific defines and includes * - * Version $Id$ */ #ifndef __STORED_H_ @@ -65,6 +64,7 @@ const int sd_dbglvl = 300; #include "stored_conf.h" #include "bsr.h" #include "jcr.h" +#include "vol_mgr.h" #include "reserve.h" #include "protos.h" #ifdef HAVE_LIBZ diff --git a/bacula/src/stored/vol_mgr.c b/bacula/src/stored/vol_mgr.c index 7d12be97cc..f3e28e7b6b 100644 --- a/bacula/src/stored/vol_mgr.c +++ b/bacula/src/stored/vol_mgr.c @@ -1,7 +1,7 @@ /* Bacula® - The Network Backup Solution - Copyright (C) 2000-2012 Free Software Foundation Europe e.V. + Copyright (C) 2000-2013 Free Software Foundation Europe e.V. The main author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. @@ -100,7 +100,7 @@ void term_vol_list_lock() rwl_destroy(&vol_list_lock); } -/* +/* * This allows a given thread to recursively call to lock_volumes() */ void _lock_volumes(const char *file, int line) @@ -149,17 +149,17 @@ void add_read_volume(JCR *jcr, const char *VolumeName) { VOLRES *nvol, *vol; - lock_read_volumes(); nvol = new_vol_item(NULL, VolumeName); nvol->set_jobid(jcr->JobId); + lock_read_volumes(); vol = (VOLRES *)read_vol_list->binary_insert(nvol, read_compare); + unlock_read_volumes(); if (vol != nvol) { free_vol_item(nvol); Dmsg2(dbglvl, "read_vol=%s JobId=%d already in list.\n", VolumeName, jcr->JobId); } else { Dmsg2(dbglvl, "add read_vol=%s JobId=%d\n", VolumeName, jcr->JobId); } - unlock_read_volumes(); } /* @@ -197,8 +197,7 @@ static void debug_list_volumes(const char *imsg) VOLRES *vol; POOL_MEM msg(PM_MESSAGE); - lock_volumes(); - foreach_dlist(vol, vol_list) { + foreach_vol(vol) { if (vol->dev) { Mmsg(msg, "List %s: %s in_use=%d swap=%d on device %s\n", imsg, vol->vol_name, vol->is_in_use(), vol->is_swapping(), vol->dev->print_name()); @@ -208,8 +207,7 @@ static void debug_list_volumes(const char *imsg) } Dmsg1(dbglvl, "%s", msg.c_str()); } - - unlock_volumes(); + endeach_vol(vol); } @@ -222,32 +220,40 @@ void list_volumes(void sendit(const char *msg, int len, void *sarg), void *arg) POOL_MEM msg(PM_MESSAGE); int len; - lock_volumes(); - foreach_dlist(vol, vol_list) { + foreach_vol(vol) { DEVICE *dev = vol->dev; if (dev) { len = Mmsg(msg, "%s on device %s\n", vol->vol_name, dev->print_name()); sendit(msg.c_str(), len, arg); - len = Mmsg(msg, " Reader=%d writers=%d devres=%d volinuse=%d\n", - dev->can_read()?1:0, dev->num_writers, dev->num_reserved(), + len = Mmsg(msg, " Reader=%d writers=%d reserves=%d volinuse=%d\n", + dev->can_read()?1:0, dev->num_writers, dev->num_reserved(), vol->is_in_use()); sendit(msg.c_str(), len, arg); } else { - len = Mmsg(msg, "%s no device. volinuse= %d\n", vol->vol_name, + len = Mmsg(msg, "Volume %s no device. volinuse= %d\n", vol->vol_name, vol->is_in_use()); sendit(msg.c_str(), len, arg); } } - unlock_volumes(); + endeach_vol(vol); lock_read_volumes(); foreach_dlist(vol, read_vol_list) { - len = Mmsg(msg, "%s read volume JobId=%d\n", vol->vol_name, - vol->get_jobid()); - sendit(msg.c_str(), len, arg); + DEVICE *dev = vol->dev; + if (dev) { + len = Mmsg(msg, "Read volume: %s on device %s\n", vol->vol_name, dev->print_name()); + sendit(msg.c_str(), len, arg); + len = Mmsg(msg, " Reader=%d writers=%d reserves=%d volinuse=%d JobId=%d\n", + dev->can_read()?1:0, dev->num_writers, dev->num_reserved(), + vol->is_in_use(), vol->get_jobid()); + sendit(msg.c_str(), len, arg); + } else { + len = Mmsg(msg, "Volume: %s no device. volinuse= %d\n", vol->vol_name, + vol->is_in_use()); + sendit(msg.c_str(), len, arg); + } } unlock_read_volumes(); - } /* @@ -265,6 +271,8 @@ static VOLRES *new_vol_item(DCR *dcr, const char *VolumeName) Dmsg3(dbglvl, "new Vol=%s at %p dev=%s\n", VolumeName, vol->vol_name, vol->dev->print_name()); } + vol->init_mutex(); + vol->inc_use_count(); return vol; } @@ -272,10 +280,18 @@ static void free_vol_item(VOLRES *vol) { DEVICE *dev = NULL; + vol->dec_use_count(); + vol->Lock(); + if (vol->use_count() > 0) { + vol->Unlock(); + return; + } + vol->Unlock(); free(vol->vol_name); if (vol->dev) { dev = vol->dev; } + vol->destroy_mutex(); free(vol); if (dev) { dev->vol = NULL; @@ -296,42 +312,38 @@ static void free_vol_item(VOLRES *vol) * * Some details of the Volume list handling: * - * 1. The Volume list entry must be attached to the drive (rather than - * attached to a job as it currently is. I.e. the drive that "owns" + * 1. The Volume list entry is attached to the drive (rather than + * attached to a job as it was previously. I.e. the drive that "owns" * the volume (in use, mounted) * must point to the volume (still to be maintained in a list). * - * 2. The Volume is entered in the list when a drive is reserved. + * 2. The Volume is entered in the list when a drive is reserved. * * 3. When a drive is in use, the device code must appropriately update the - * volume name as it changes (currently the list is static -- an entry is - * removed when the Volume is no longer reserved, in use or mounted). - * The new code must keep the same list entry as long as the drive + * volume name as it changes. + * This code keeps the same list entry as long as the drive * has any volume associated with it but the volume name in the list * must be updated when the drive has a different volume mounted. * - * 4. A job that has reserved a volume, can un-reserve the volume, and if the + * 4. A job that has reserved a volume, can un-reserve the volume, and if the * volume is not mounted, and not reserved, and not in use, it will be * removed from the list. * * 5. If a job wants to reserve a drive with a different Volume from the one on * the drive, it can re-use the drive for the new Volume. - * + * 6. If a job wants a Volume that is in a different drive, it can either use the * other drive or take the volume, only if the other drive is not in use or * not reserved. * - * One nice aspect of this is that the reserve use count and the writer use count - * already exist and are correctly programmed and will need no changes -- use + * One nice aspect of this is that the reserve use count and the writer use count + * already exist and are correctly programmed and will need no changes -- use * counts are always very tricky. * - * The old code had a concept of "reserving" a Volume, but was changed - * to reserving and using a drive. A volume is must be attached to (owned by) a - * drive and can move from drive to drive or be unused given certain specific - * conditions of the drive. The key is that the drive must "own" the Volume. - * The old code had the job (dcr) owning the volume (more or less). The job was - * to change the insertion and removal of the volumes from the list to be based - * on the drive rather than the job. + * The old code had a concept of "reserving" a Volume, but was changed + * to reserving and using a drive. A volume is must be attached to (owned by) a + * drive and can move from drive to drive or be unused given certain specific + * conditions of the drive. The key is that the drive must "own" the Volume. * * Return: VOLRES entry on success * NULL volume busy on another drive @@ -346,16 +358,17 @@ VOLRES *reserve_volume(DCR *dcr, const char *VolumeName) } ASSERT(dev != NULL); - Dmsg2(dbglvl, "enter reserve_volume=%s drive=%s\n", VolumeName, + Dmsg2(dbglvl, "enter reserve_volume=%s drive=%s\n", VolumeName, dcr->dev->print_name()); - /* + + /* * We lock the reservations system here to ensure * when adding a new volume that no newly scheduled * job can reserve it. */ lock_volumes(); debug_list_volumes("begin reserve_volume"); - /* + /* * First, remove any old volume attached to this device as it * is no longer used. */ @@ -416,7 +429,7 @@ VOLRES *reserve_volume(DCR *dcr, const char *VolumeName) if (vol->dev) { Dmsg2(dbglvl, "dev=%s vol->dev=%s\n", dev->print_name(), vol->dev->print_name()); } - + /* * Check if we are trying to use the Volume on a different drive * dev is our device @@ -480,6 +493,69 @@ get_out: return vol; } +/* + * Start walk of vol chain + * The proper way to walk the vol chain is: + * VOLRES *vol; + * foreach_vol(vol) { + * ... + * } + * endeach_vol(vol); + * + * It is possible to leave out the endeach_vol(vol), but + * in that case, the last vol referenced must be explicitly + * released with: + * + * free_vol_item(vol); + * + */ +VOLRES *vol_walk_start() +{ + VOLRES *vol; + lock_volumes(); + vol = (VOLRES *)vol_list->first(); + if (vol) { + vol->inc_use_count(); + Dmsg2(dbglvl, "Inc walk_start use_count=%d volname=%s\n", + vol->use_count(), vol->vol_name); + } + unlock_volumes(); + return vol; +} + +/* + * Get next vol from chain, and release current one + */ +VOLRES *vol_walk_next(VOLRES *prev_vol) +{ + VOLRES *vol; + + lock_volumes(); + vol = (VOLRES *)vol_list->next(prev_vol); + if (vol) { + vol->inc_use_count(); + Dmsg2(dbglvl, "Inc walk_next use_count=%d volname=%s\n", + vol->use_count(), vol->vol_name); + } + unlock_volumes(); + if (prev_vol) { + free_vol_item(prev_vol); + } + return vol; +} + +/* + * Release last vol referenced + */ +void vol_walk_end(VOLRES *vol) +{ + if (vol) { + Dmsg2(dbglvl, "Free walk_end use_count=%d volname=%s\n", + vol->use_count(), vol->vol_name); + free_vol_item(vol); + } +} + /* * Search for a Volume name in the Volume list. * @@ -530,7 +606,7 @@ static VOLRES *find_read_volume(const char *VolumeName) } -/* +/* * Free a Volume from the Volume list if it is no longer used * Note, for tape drives we want to remember where the Volume * was when last used, so rather than free the volume entry, @@ -559,7 +635,7 @@ bool volume_unused(DCR *dcr) return false; } - /* + /* * If this is a tape, we do not free the volume, rather we wait * until the autoloader unloads it, or until another tape is * explicitly read in this drive. This allows the SD to remember @@ -571,7 +647,7 @@ bool volume_unused(DCR *dcr) return true; } else { /* - * Note, this frees the volume reservation entry, but the + * Note, this frees the volume reservation entry, but the * file descriptor remains open with the OS. */ return free_volume(dev); @@ -637,6 +713,7 @@ static void free_volume_list() } free(vol->vol_name); vol->vol_name = NULL; + vol->destroy_mutex(); } delete vol_list; vol_list = NULL; @@ -661,6 +738,7 @@ void free_volume_lists() } free(vol->vol_name); vol->vol_name = NULL; + vol->destroy_mutex(); } delete read_vol_list; read_vol_list = NULL; @@ -725,13 +803,13 @@ get_out: } -/* +/* * Create a temporary copy of the volume list. We do this, * to avoid having the volume list locked during the * call to reserve_device(), which would cause a deadlock. * Note, we may want to add an update counter on the vol_list * so that if it is modified while we are traversing the copy - * we can take note and act accordingly (probably redo the + * we can take note and act accordingly (probably redo the * search at least a few times). */ dlist *dup_vol_list(JCR *jcr) @@ -739,12 +817,11 @@ dlist *dup_vol_list(JCR *jcr) dlist *temp_vol_list; VOLRES *vol = NULL; - lock_volumes(); - Dmsg0(dbglvl, "lock volumes\n"); + Dmsg0(dbglvl, "lock volumes\n"); Dmsg0(dbglvl, "duplicate vol list\n"); temp_vol_list = New(dlist(vol, &vol->link)); - foreach_dlist(vol, vol_list) { + foreach_vol(vol) { VOLRES *nvol; VOLRES *tvol = (VOLRES *)malloc(sizeof(VOLRES)); memset(tvol, 0, sizeof(VOLRES)); @@ -758,8 +835,8 @@ dlist *dup_vol_list(JCR *jcr) Jmsg(jcr, M_WARNING, 0, "Logic error. Duplicating vol list hit duplicate.\n"); } } + endeach_vol(vol); Dmsg0(dbglvl, "unlock volumes\n"); - unlock_volumes(); return temp_vol_list; } @@ -769,7 +846,7 @@ dlist *dup_vol_list(JCR *jcr) void free_temp_vol_list(dlist *temp_vol_list) { dlist *save_vol_list; - + lock_volumes(); save_vol_list = vol_list; vol_list = temp_vol_list;