X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Fstored%2Freserve.c;h=4e44a524f0ce337f9683af234982af9034d3b4cc;hb=c641313a8a089b0eb390843ab0ac6ea6c7352c0a;hp=bf1bf3dfe73003eed5d161afff03aefe3ea54f57;hpb=23392837f61fc878b87f5acb07d1a8cfb128b25d;p=bacula%2Fbacula diff --git a/bacula/src/stored/reserve.c b/bacula/src/stored/reserve.c index bf1bf3dfe7..4e44a524f0 100644 --- a/bacula/src/stored/reserve.c +++ b/bacula/src/stored/reserve.c @@ -1,24 +1,14 @@ -/* - * Drive reservation functions for Storage Daemon - * - * Kern Sibbald, MM - * - * Split from job.c and acquire.c June 2005 - * - * Version $Id$ - * - */ /* Bacula® - The Network Backup Solution - Copyright (C) 2000-2006 Free Software Foundation Europe e.V. + Copyright (C) 2000-2007 Free Software Foundation Europe e.V. The main author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. This program is Free Software; you can redistribute it and/or modify it under the terms of version two of the GNU General Public - License as published by the Free Software Foundation plus additions - that are listed in the file LICENSE. + License as published by the Free Software Foundation and included + in the file LICENSE. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -35,12 +25,25 @@ (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, Switzerland, email:ftf@fsfeurope.org. */ +/* + * Drive reservation functions for Storage Daemon + * + * Kern Sibbald, MM + * + * Split from job.c and acquire.c June 2005 + * + * Version $Id$ + * + */ #include "bacula.h" #include "stored.h" +const int dbglvl = 50; + static dlist *vol_list = NULL; -static pthread_mutex_t vol_list_lock = PTHREAD_MUTEX_INITIALIZER; +static brwlock_t reservation_lock; +static brwlock_t vol_list_lock; /* Forward referenced functions */ static int can_reserve_drive(DCR *dcr, RCTX &rctx); @@ -49,6 +52,7 @@ static bool reserve_device_for_read(DCR *dcr); static bool reserve_device_for_append(DCR *dcr, RCTX &rctx); static bool use_storage_cmd(JCR *jcr); static void queue_reserve_message(JCR *jcr); +static void pop_reserve_messages(JCR *jcr); /* Requests from the Director daemon */ static char use_storage[] = "use storage=%127s media_type=%127s " @@ -78,7 +82,6 @@ static int my_compare(void *item1, void *item2) return strcmp(((VOLRES *)item1)->vol_name, ((VOLRES *)item2)->vol_name); } -static brwlock_t reservation_lock; void init_reservations_lock() { @@ -86,97 +89,305 @@ void init_reservations_lock() if ((errstat=rwl_init(&reservation_lock)) != 0) { berrno be; Emsg1(M_ABORT, 0, _("Unable to initialize reservation lock. ERR=%s\n"), - be.strerror(errstat)); + be.bstrerror(errstat)); } + if ((errstat=rwl_init(&vol_list_lock)) != 0) { + berrno be; + Emsg1(M_ABORT, 0, _("Unable to initialize volume list lock. ERR=%s\n"), + be.bstrerror(errstat)); + } } void term_reservations_lock() { rwl_destroy(&reservation_lock); + rwl_destroy(&vol_list_lock); } +int reservations_lock_count = 0; + /* This applies to a drive and to Volumes */ -void lock_reservations() +void _lock_reservations() { int errstat; + reservations_lock_count++; if ((errstat=rwl_writelock(&reservation_lock)) != 0) { berrno be; Emsg2(M_ABORT, 0, "rwl_writelock failure. stat=%d: ERR=%s\n", - errstat, be.strerror(errstat)); + errstat, be.bstrerror(errstat)); } } -void unlock_reservations() +void _unlock_reservations() { int errstat; + reservations_lock_count--; if ((errstat=rwl_writeunlock(&reservation_lock)) != 0) { berrno be; Emsg2(M_ABORT, 0, "rwl_writeunlock failure. stat=%d: ERR=%s\n", - errstat, be.strerror(errstat)); + errstat, be.bstrerror(errstat)); + } +} + +int vol_list_lock_count = 0; + +void _lock_volumes() +{ + int errstat; + vol_list_lock_count++; + if ((errstat=rwl_writelock(&vol_list_lock)) != 0) { + berrno be; + Emsg2(M_ABORT, 0, "rwl_writelock failure. stat=%d: ERR=%s\n", + errstat, be.bstrerror(errstat)); + } +} + +void _unlock_volumes() +{ + int errstat; + vol_list_lock_count--; + if ((errstat=rwl_writeunlock(&vol_list_lock)) != 0) { + berrno be; + Emsg2(M_ABORT, 0, "rwl_writeunlock failure. stat=%d: ERR=%s\n", + errstat, be.bstrerror(errstat)); } } +/* + * List Volumes -- this should be moved to status.c + */ +enum { + debug_lock = true, + debug_nolock = false +}; + +static void debug_list_volumes(const char *imsg) +{ + VOLRES *vol; + POOL_MEM msg(PM_MESSAGE); + + lock_volumes(); + foreach_dlist(vol, vol_list) { + if (vol->dev) { + Mmsg(msg, "List from %s: %s at %p on device %s\n", imsg, + vol->vol_name, vol->vol_name, vol->dev->print_name()); + } else { + Mmsg(msg, "List from %s: %s at %p no dev\n", imsg, vol->vol_name, vol->vol_name); + } + Dmsg1(dbglvl, "%s", msg.c_str()); + } + +#ifdef xxx + DEVICE *dev = NULL; + foreach_dlist(vol, vol_list) { + if (vol->dev == dev) { + Dmsg0(dbglvl, "Two Volumes on same device.\n"); + ASSERT(0); + dev = vol->dev; + } + } +#endif + +// Dmsg2(dbglvl, "List from %s: %d volumes\n", imsg, count); + unlock_volumes(); +} + + +/* + * List Volumes -- this should be moved to status.c + */ +void list_volumes(void sendit(const char *msg, int len, void *sarg), void *arg) +{ + VOLRES *vol; + POOL_MEM msg(PM_MESSAGE); + int len; + + lock_volumes(); + foreach_dlist(vol, vol_list) { + DEVICE *dev = vol->dev; + if (dev) { + len = Mmsg(msg, "%s on device %s\n", vol->vol_name, dev->print_name()); + sendit(msg.c_str(), len, arg); + len = Mmsg(msg, " Reader=%d writers=%d reserved=%d released=%d\n", + dev->can_read()?1:0, dev->num_writers, dev->reserved_device, vol->released); + sendit(msg.c_str(), len, arg); + } else { + len = Mmsg(msg, "%s no device. released=%d\n", vol->vol_name, vol->released); + sendit(msg.c_str(), len, arg); + } + } + unlock_volumes(); +} + +/* + * Create a Volume item to put in the Volume list + * Ensure that the device points to it. + */ +static VOLRES *new_vol_item(DCR *dcr, const char *VolumeName) +{ + VOLRES *vol; + vol = (VOLRES *)malloc(sizeof(VOLRES)); + memset(vol, 0, sizeof(VOLRES)); + vol->vol_name = bstrdup(VolumeName); + vol->dev = dcr->dev; + Dmsg3(dbglvl, "new Vol=%s at %p dev=%s\n", + VolumeName, vol->vol_name, vol->dev->print_name()); + return vol; +} + +static void free_vol_item(VOLRES *vol) +{ + free(vol->vol_name); + if (vol->dev) { + vol->dev->vol = NULL; + } + free(vol); +} + + /* * Put a new Volume entry in the Volume list. This * effectively reserves the volume so that it will * not be mounted again. * + * If the device has any current volume associated with it, + * and it is a different Volume, and the device is not busy, + * we release the old Volume item and insert the new one. + * + * It is assumed that the device is free and locked so that + * we can change the device structure. + * + * Some details of the Volume list handling: + * + * 1. The Volume list entry must be attached to the drive (rather than + * attached to a job as it currently is. I.e. the drive that "owns" + * the volume (reserved, in use, mounted) + * must point to the volume (still to be maintained in a list). + * + * 2. The Volume is entered in the list when a drive is reserved. + * + * 3. When a drive is in use, the device code must appropriately update the + * volume name as it changes (currently the list is static -- an entry is + * removed when the Volume is no longer reserved, in use or mounted). + * The new code must keep the same list entry as long as the drive + * has any volume associated with it but the volume name in the list + * must be updated when the drive has a different volume mounted. + * + * 4. A job that has reserved a volume, can un-reserve the volume, and if the + * volume is not mounted, and not reserved, and not in use, it will be + * removed from the list. + * + * 5. If a job wants to reserve a drive with a different Volume from the one on + * the drive, it can re-use the drive for the new Volume. + * + * 6. If a job wants a Volume that is in a different drive, it can either use the + * other drive or take the volume, only if the other drive is not in use or + * not reserved. + * + * One nice aspect of this is that the reserve use count and the writer use count + * already exist and are correctly programmed and will need no changes -- use + * counts are always very tricky. + * + * The old code had a concept of "reserving" a Volume, but was changed + * to reserving and using a drive. A volume is must be attached to (owned by) a + * drive and can move from drive to drive or be unused given certain specific + * conditions of the drive. The key is that the drive must "own" the Volume. + * The old code had the job (dcr) owning the volume (more or less). The job was + * to change the insertion and removal of the volumes from the list to be based + * on the drive rather than the job. + * * Return: VOLRES entry on success - * NULL if the Volume is already in the list + * NULL volume busy on another drive */ -VOLRES *new_volume(DCR *dcr, const char *VolumeName) +VOLRES *reserve_volume(DCR *dcr, const char *VolumeName) { VOLRES *vol, *nvol; + DEVICE *dev = dcr->dev; + + ASSERT(dev != NULL); - Dmsg1(400, "new_volume %s\n", VolumeName); + Dmsg1(dbglvl, "reserve_volume %s\n", VolumeName); /* * We lock the reservations system here to ensure * when adding a new volume that no newly scheduled * job can reserve it. */ - lock_reservations(); - P(vol_list_lock); - if (dcr->dev) { -again: - foreach_dlist(vol, vol_list) { - if (vol && vol->dev == dcr->dev) { - vol_list->remove(vol); - if (vol->vol_name) { - free(vol->vol_name); - } - free(vol); - goto again; - } + lock_volumes(); + debug_list_volumes("begin reserve_volume"); + /* + * First, remove any old volume attached to this device as it + * is no longer used. + */ + if (dev->vol) { + vol = dev->vol; + /* + * Make sure we don't remove the current volume we are inserting + * because it was probably inserted by another job. + */ + if (strcmp(vol->vol_name, VolumeName) == 0) { + Dmsg1(dbglvl, "OK, vol=%s on device.\n", VolumeName); + goto get_out; /* Volume already on this device */ + } else { + Dmsg2(dbglvl, "reserve_vol free vol=%s at %p\n", vol->vol_name, vol->vol_name); + vol_list->remove(vol); + free_vol_item(vol); + debug_list_volumes("reserve_vol free"); } } - vol = (VOLRES *)malloc(sizeof(VOLRES)); - memset(vol, 0, sizeof(VOLRES)); - vol->vol_name = bstrdup(VolumeName); - vol->dev = dcr->dev; - vol->dcr = dcr; - Dmsg2(100, "New Vol=%s dev=%s\n", VolumeName, dcr->dev->print_name()); - nvol = (VOLRES *)vol_list->binary_insert(vol, my_compare); - if (nvol != vol) { - free(vol->vol_name); - free(vol); - vol = NULL; - if (dcr->dev) { - DEVICE *dev = nvol->dev; - if (!dev->is_busy()) { - Dmsg3(100, "Swap vol=%s from dev=%s to %s\n", VolumeName, - dev->print_name(), dcr->dev->print_name()); - nvol->dev = dcr->dev; + + /* Create a new Volume entry */ + nvol = new_vol_item(dcr, VolumeName); + + /* + * Now try to insert the new Volume + */ + vol = (VOLRES *)vol_list->binary_insert(nvol, my_compare); + if (vol != nvol) { + Dmsg2(dbglvl, "Found vol=%s dev-same=%d\n", vol->vol_name, dev==vol->dev); + /* + * At this point, a Volume with this name already is in the list, + * so we simply release our new Volume entry. Note, this should + * only happen if we are moving the volume from one drive to another. + */ + Dmsg2(dbglvl, "reserve_vol free-tmp vol=%s at %p\n", + vol->vol_name, vol->vol_name); + /* + * Clear dev pointer so that free_vol_item() doesn't + * take away our volume. + */ + nvol->dev = NULL; /* don't zap dev entry */ + free_vol_item(nvol); + + /* Check if we are trying to use the Volume on a different drive */ + if (dev != vol->dev) { + /* Caller wants to switch Volume to another device */ + if (!vol->dev->is_busy()) { + /* OK to move it -- I'm not sure this will work */ + Dmsg3(dbglvl, "==== Swap vol=%s from dev=%s to %s\n", VolumeName, + vol->dev->print_name(), dev->print_name()); + vol->dev->vol = NULL; /* take vol from old drive */ + vol->dev->VolHdr.VolumeName[0] = 0; + vol->dev = dev; /* point vol at new drive */ + dev->vol = vol; /* point dev at vol */ dev->VolHdr.VolumeName[0] = 0; } else { - Dmsg3(100, "!!!! could not swap vol=%s from dev=%s to %s\n", VolumeName, - dev->print_name(), dcr->dev->print_name()); + Dmsg3(dbglvl, "Volume busy could not swap vol=%s from dev=%s to %s\n", + VolumeName, vol->dev->print_name(), dev->print_name()); + vol = NULL; /* device busy */ + goto get_out; } } } - V(vol_list_lock); - unlock_reservations(); + dev->vol = vol; + +get_out: + if (vol) { + vol->released = false; + } + debug_list_volumes("end new volume"); + unlock_volumes(); return vol; } @@ -186,116 +397,110 @@ again: * Returns: VOLRES entry on success * NULL if the Volume is not in the list */ -VOLRES *find_volume(const char *VolumeName) +VOLRES *find_volume(DCR *dcr) { VOLRES vol, *fvol; /* Do not lock reservations here */ - P(vol_list_lock); - vol.vol_name = bstrdup(VolumeName); + lock_volumes(); + vol.vol_name = bstrdup(dcr->VolumeName); fvol = (VOLRES *)vol_list->binary_search(&vol, my_compare); free(vol.vol_name); - V(vol_list_lock); + Dmsg2(dbglvl, "find_vol=%s found=%d\n", dcr->VolumeName, fvol!=NULL); + debug_list_volumes("find_volume"); + unlock_volumes(); return fvol; } +/* + * Remove any reservation from a drive and tell the system + * that the volume is unused at least by us. + */ +void unreserve_device(DCR *dcr) +{ + DEVICE *dev = dcr->dev; + if (dcr->reserved_device) { + dcr->reserved_device = false; + dev->reserved_device--; + Dmsg2(dbglvl, "Dec reserve=%d dev=%s\n", dev->reserved_device, dev->print_name()); + dcr->reserved_device = false; + /* If we set read mode in reserving, remove it */ + if (dev->can_read()) { + dev->clear_read(); + } + if (dev->num_writers < 0) { + Jmsg1(dcr->jcr, M_ERROR, 0, _("Hey! num_writers=%d!!!!\n"), dev->num_writers); + dev->num_writers = 0; + } + } + + volume_unused(dcr); +} + /* - * Free a Volume from the Volume list + * Free a Volume from the Volume list if it is no longer used * * Returns: true if the Volume found and removed from the list - * false if the Volume is not in the list + * false if the Volume is not in the list or is in use */ -bool free_volume(DEVICE *dev) +bool volume_unused(DCR *dcr) { - VOLRES vol, *fvol; + DEVICE *dev = dcr->dev; - P(vol_list_lock); - if (dev->VolHdr.VolumeName[0] == 0) { - Dmsg1(100, "free_volume: no vol on dev %s\n", dev->print_name()); - /* - * Our device has no VolumeName listed, but - * search the list for any Volume attached to - * this device and remove it. - */ - foreach_dlist(fvol, vol_list) { - if (fvol && fvol->dev == dev) { - vol_list->remove(fvol); - if (fvol->vol_name) { - Dmsg2(100, "free_volume %s dev=%s\n", fvol->vol_name, dev->print_name()); - free(fvol->vol_name); - } - free(fvol); - break; - } - } - goto bail_out; - } - Dmsg1(400, "free_volume %s\n", dev->VolHdr.VolumeName); - vol.vol_name = bstrdup(dev->VolHdr.VolumeName); - fvol = (VOLRES *)vol_list->binary_search(&vol, my_compare); - if (fvol) { - vol_list->remove(fvol); - Dmsg2(100, "free_volume %s dev=%s\n", fvol->vol_name, dev->print_name()); - free(fvol->vol_name); - free(fvol); + if (dev->vol == NULL) { + Dmsg1(dbglvl, "vol_unused: no vol on %s\n", dev->print_name()); + debug_list_volumes("null vol cannot unreserve_volume"); + return false; } - free(vol.vol_name); - dev->VolHdr.VolumeName[0] = 0; -bail_out: - V(vol_list_lock); - return fvol != NULL; -} -/* Free volume reserved by this dcr but not attached to a dev */ -void free_unused_volume(DCR *dcr) -{ - VOLRES *vol; + if (dev->is_busy()) { + Dmsg1(dbglvl, "vol_unused: busy on %s\n", dev->print_name()); + debug_list_volumes("dev busy cannot unreserve_volume"); + return false; + } - P(vol_list_lock); - for (vol=(VOLRES *)vol_list->first(); vol; vol=(VOLRES *)vol_list->next(vol)) { - if (vol->dcr == dcr && (vol->dev == NULL || - strcmp(vol->vol_name, vol->dev->VolHdr.VolumeName) != 0)) { - vol_list->remove(vol); - Dmsg1(100, "free_unused_volume %s\n", vol->vol_name); - free(vol->vol_name); - free(vol); - break; - } + /* + * If this is a tape, we do not free the volume, rather we wait + * until the autoloader unloads it, or until another tape is + * explicitly read in this drive. This allows the SD to remember + * where the tapes are or last were. + */ + dev->vol->released = true; + if (dev->is_tape() || dev->is_autochanger()) { + return true; + } else { + return free_volume(dev); } - V(vol_list_lock); } /* - * List Volumes -- this should be moved to status.c + * Unconditionally release the volume */ -void list_volumes(void sendit(const char *msg, int len, void *sarg), void *arg) +bool free_volume(DEVICE *dev) { VOLRES *vol; - char *msg; - int len; - - msg = (char *)get_pool_memory(PM_MESSAGE); - P(vol_list_lock); - for (vol=(VOLRES *)vol_list->first(); vol; vol=(VOLRES *)vol_list->next(vol)) { - if (vol->dev) { - len = Mmsg(msg, "%s on device %s\n", vol->vol_name, vol->dev->print_name()); - sendit(msg, len, arg); - } else { - len = Mmsg(msg, "%s\n", vol->vol_name); - sendit(msg, len, arg); - } + if (dev->vol == NULL) { + Dmsg1(dbglvl, "No vol on dev %s\n", dev->print_name()); + return false; } - V(vol_list_lock); - - free_pool_memory(msg); + lock_volumes(); + vol = dev->vol; + dev->vol = NULL; + vol_list->remove(vol); + Dmsg2(dbglvl, "free_volume %s dev=%s\n", vol->vol_name, dev->print_name()); + free_vol_item(vol); + debug_list_volumes("free_volume"); + unlock_volumes(); + return vol != NULL; } + /* Create the Volume list */ void create_volume_list() { - VOLRES *dummy = NULL; + VOLRES *vol = NULL; if (vol_list == NULL) { - vol_list = New(dlist(dummy, &dummy->link)); + vol_list = New(dlist(vol, &vol->link)); } } @@ -306,36 +511,44 @@ void free_volume_list() if (!vol_list) { return; } - P(vol_list_lock); - for (vol=(VOLRES *)vol_list->first(); vol; vol=(VOLRES *)vol_list->next(vol)) { - Dmsg3(100, "Unreleased Volume=%s dcr=0x%x dev=0x%x\n", vol->vol_name, - vol->dcr, vol->dev); + lock_volumes(); + foreach_dlist(vol, vol_list) { + if (vol->dev) { + Dmsg2(dbglvl, "free vol_list Volume=%s dev=%s\n", vol->vol_name, vol->dev->print_name()); + } else { + Dmsg1(dbglvl, "free vol_list Volume=%s No dev\n", vol->vol_name); + } + free(vol->vol_name); + vol->vol_name = NULL; } delete vol_list; vol_list = NULL; - V(vol_list_lock); + unlock_volumes(); } bool is_volume_in_use(DCR *dcr) { - VOLRES *vol = find_volume(dcr->VolumeName); + VOLRES *vol = find_volume(dcr); if (!vol) { - Dmsg1(100, "Vol=%s not in use.\n", dcr->VolumeName); + Dmsg1(dbglvl, "Vol=%s not in use.\n", dcr->VolumeName); return false; /* vol not in list */ } - if (!vol->dev) { /* vol not attached to device */ - Dmsg1(100, "Vol=%s has no dev.\n", dcr->VolumeName); - return false; - } + ASSERT(vol->dev != NULL); + if (dcr->dev == vol->dev) { /* same device OK */ - Dmsg1(100, "Vol=%s on same dev.\n", dcr->VolumeName); + Dmsg1(dbglvl, "Vol=%s on same dev.\n", dcr->VolumeName); return false; + } else { + Dmsg3(dbglvl, "Vol=%s on %s we have %s\n", dcr->VolumeName, + vol->dev->print_name(), dcr->dev->print_name()); } if (!vol->dev->is_busy()) { - Dmsg2(100, "Vol=%s dev=%s not busy.\n", dcr->VolumeName, vol->dev->print_name()); + Dmsg2(dbglvl, "Vol=%s dev=%s not busy.\n", dcr->VolumeName, vol->dev->print_name()); return false; + } else { + Dmsg2(dbglvl, "Vol=%s dev=%s busy.\n", dcr->VolumeName, vol->dev->print_name()); } - Dmsg2(100, "Vol=%s used by %s.\n", dcr->VolumeName, vol->dev->print_name()); + Dmsg2(dbglvl, "Vol=%s in use by %s.\n", dcr->VolumeName, vol->dev->print_name()); return true; } @@ -360,8 +573,6 @@ static bool use_storage_cmd(JCR *jcr) int Copy, Stripe; DIRSTORE *store; RCTX rctx; - char *msg; - alist *msgs; alist *dirstore; memset(&rctx, 0, sizeof(RCTX)); @@ -371,10 +582,9 @@ static bool use_storage_cmd(JCR *jcr) * use_device for each device that it wants to use. */ dirstore = New(alist(10, not_owned_by_alist)); -// Dmsg2(000, "dirstore=%p JobId=%u\n", dirstore, jcr->JobId); - msgs = jcr->reserve_msgs = New(alist(10, not_owned_by_alist)); + jcr->reserve_msgs = New(alist(10, not_owned_by_alist)); do { - Dmsg1(100, "msg); + Dmsg1(dbglvl, "msg); ok = sscanf(dir->msg, use_storage, store_name.c_str(), media_type.c_str(), pool_name.c_str(), pool_type.c_str(), &append, &Copy, &Stripe) == 7; @@ -402,8 +612,8 @@ static bool use_storage_cmd(JCR *jcr) store->append = append; /* Now get all devices */ - while (bnet_recv(dir) >= 0) { - Dmsg1(100, "msg); + while (dir->recv() >= 0) { + Dmsg1(dbglvl, "msg); ok = sscanf(dir->msg, use_device, dev_name.c_str()) == 1; if (!ok) { break; @@ -411,23 +621,29 @@ static bool use_storage_cmd(JCR *jcr) unbash_spaces(dev_name); store->device->append(bstrdup(dev_name.c_str())); } - } while (ok && bnet_recv(dir) >= 0); + } while (ok && dir->recv() >= 0); -#ifdef DEVELOPER - /* This loop is debug code and can be removed */ - /* ***FIXME**** remove after 1.38 release */ + /* Developer debug code */ char *device_name; - foreach_alist(store, dirstore) { - Dmsg5(110, "Storage=%s media_type=%s pool=%s pool_type=%s append=%d\n", - store->name, store->media_type, store->pool_name, - store->pool_type, store->append); - foreach_alist(device_name, store->device) { - Dmsg1(110, " Device=%s\n", device_name); + if (debug_level >= dbglvl) { + foreach_alist(store, dirstore) { + Dmsg5(dbglvl, "Storage=%s media_type=%s pool=%s pool_type=%s append=%d\n", + store->name, store->media_type, store->pool_name, + store->pool_type, store->append); + foreach_alist(device_name, store->device) { + Dmsg1(dbglvl, " Device=%s\n", device_name); + } } } -#endif init_jcr_device_wait_timers(jcr); + jcr->dcr = new_dcr(jcr, NULL, NULL); /* get a dcr */ + if (!jcr->dcr) { + BSOCK *dir = jcr->dir_bsock; + dir->fsend(_("3939 Could not get dcr\n")); + Dmsg1(dbglvl, ">dird: %s", dir->msg); + ok = false; + } /* * At this point, we have a list of all the Director's Storage * resources indicated for this Job, which include Pool, PoolType, @@ -438,25 +654,30 @@ static bool use_storage_cmd(JCR *jcr) * Wiffle through them and find one that can do the backup. */ if (ok) { - bool first = true; /* print wait message once */ + int wait_for_device_retries = 0; + int repeat = 0; bool fail = false; rctx.notify_dir = true; + lock_reservations(); for ( ; !fail && !job_canceled(jcr); ) { - while ((msg = (char *)msgs->pop())) { - free(msg); - } + pop_reserve_messages(jcr); rctx.suitable_device = false; rctx.have_volume = false; + rctx.VolumeName[0] = 0; rctx.any_drive = false; if (!jcr->PreferMountedVols) { - /* Look for unused drives in autochangers */ + /* + * Here we try to find a drive that is not used. + * This will maximize the use of available drives. + * + */ rctx.num_writers = 20000000; /* start with impossible number */ rctx.low_use_drive = NULL; rctx.PreferMountedVols = false; rctx.exact_match = false; rctx.autochanger_only = true; - Dmsg5(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + Dmsg5(dbglvl, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only, rctx.any_drive); if ((ok = find_suitable_device_for_job(jcr, rctx))) { @@ -471,18 +692,22 @@ static bool use_storage_cmd(JCR *jcr) rctx.try_low_use_drive = false; } rctx.autochanger_only = false; - Dmsg5(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + Dmsg5(dbglvl, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only, rctx.any_drive); if ((ok = find_suitable_device_for_job(jcr, rctx))) { break; } } - /* Look for an exact match all drives */ + /* + * Now we look for a drive that may or may not be in + * use. + */ + /* Look for an exact Volume match all drives */ rctx.PreferMountedVols = true; rctx.exact_match = true; rctx.autochanger_only = false; - Dmsg5(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + Dmsg5(dbglvl, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only, rctx.any_drive); if ((ok = find_suitable_device_for_job(jcr, rctx))) { @@ -490,7 +715,7 @@ static bool use_storage_cmd(JCR *jcr) } /* Look for any mounted drive */ rctx.exact_match = false; - Dmsg5(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + Dmsg5(dbglvl, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only, rctx.any_drive); if ((ok = find_suitable_device_for_job(jcr, rctx))) { @@ -498,7 +723,7 @@ static bool use_storage_cmd(JCR *jcr) } /* Try any drive */ rctx.any_drive = true; - Dmsg5(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + Dmsg5(dbglvl, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only, rctx.any_drive); if ((ok = find_suitable_device_for_job(jcr, rctx))) { @@ -506,13 +731,22 @@ static bool use_storage_cmd(JCR *jcr) } /* Keep reservations locked *except* during wait_for_device() */ unlock_reservations(); - if (!rctx.suitable_device || !wait_for_device(jcr, first)) { - Dmsg0(100, "Fail. !suitable_device || !wait_for_device\n"); + /* + * The idea of looping on repeat a few times it to ensure + * that if there is some subtle timing problem between two + * jobs, we will simply try again, and most likely succeed. + * This can happen if one job reserves a drive or finishes using + * a drive at the same time a second job wants it. + */ + if (repeat++ > 1) { /* try algorithm 3 times */ + bmicrosleep(30, 0); /* wait a bit */ + Dmsg0(dbglvl, "repeat reserve algorithm\n"); + } else if (!rctx.suitable_device || !wait_for_device(jcr, wait_for_device_retries)) { + Dmsg0(dbglvl, "Fail. !suitable_device || !wait_for_device\n"); fail = true; } lock_reservations(); - first = false; - bnet_sig(dir, BNET_HEARTBEAT); /* Inform Dir that we are alive */ + dir->signal(BNET_HEARTBEAT); /* Inform Dir that we are alive */ } unlock_reservations(); if (!ok) { @@ -527,37 +761,41 @@ static bool use_storage_cmd(JCR *jcr) Jmsg(jcr, M_FATAL, 0, _("\n" " Device \"%s\" with MediaType \"%s\" requested by DIR not found in SD Device resources.\n"), dev_name.c_str(), media_type.c_str()); - bnet_fsend(dir, NO_device, dev_name.c_str()); + dir->fsend(NO_device, dev_name.c_str()); - Dmsg1(100, ">dird: %s", dir->msg); + Dmsg1(dbglvl, ">dird: %s", dir->msg); } } else { unbash_spaces(dir->msg); pm_strcpy(jcr->errmsg, dir->msg); Jmsg(jcr, M_FATAL, 0, _("Failed command: %s\n"), jcr->errmsg); - bnet_fsend(dir, BAD_use, jcr->errmsg); - Dmsg1(100, ">dird: %s", dir->msg); + dir->fsend(BAD_use, jcr->errmsg); + Dmsg1(dbglvl, ">dird: %s", dir->msg); } - release_msgs(jcr); + release_reserve_messages(jcr); return ok; } -void release_msgs(JCR *jcr) + +/* + * Walk through the autochanger resources and check if + * the volume is in one of them. + * + * Returns: true if volume is in device + * false otherwise + */ +static bool is_vol_in_autochanger(RCTX &rctx, VOLRES *vol) { - alist *msgs = jcr->reserve_msgs; - char *msg; + AUTOCHANGER *changer = vol->dev->device->changer_res; - if (!msgs) { - return; - } - lock_reservations(); - while ((msg = (char *)msgs->pop())) { - free(msg); - } - delete msgs; - jcr->reserve_msgs = NULL; - unlock_reservations(); + /* Find resource, and make sure we were able to open it */ + if (strcmp(rctx.device_name, changer->hdr.name) == 0) { + Dmsg1(dbglvl, "Found changer device %s\n", vol->dev->device->hdr.name); + return true; + } + Dmsg1(dbglvl, "Incorrect changer device %s\n", changer->hdr.name); + return false; } /* @@ -565,24 +803,141 @@ void release_msgs(JCR *jcr) */ bool find_suitable_device_for_job(JCR *jcr, RCTX &rctx) { - bool ok; + bool ok = false; DIRSTORE *store; char *device_name; alist *dirstore; + DCR *dcr = jcr->dcr; if (rctx.append) { dirstore = jcr->write_store; } else { dirstore = jcr->read_store; } + Dmsg4(dbglvl, "PrefMnt=%d exact=%d suitable=%d chgronly=%d\n", + rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, + rctx.autochanger_only); + /* + * If the appropriate conditions of this if are met, namely that + * we are appending and the user wants mounted drive (or we + * force try a mounted drive because they are all busy), we + * start by looking at all the Volumes in the volume list. + */ + if (!vol_list->empty() && rctx.append && rctx.PreferMountedVols) { + dlist *temp_vol_list, *save_vol_list; + VOLRES *vol = NULL; + lock_volumes(); + Dmsg0(dbglvl, "lock volumes\n"); + + /* + * Create a temporary copy of the volume list. We do this, + * to avoid having the volume list locked during the + * call to reserve_device(), which would cause a deadlock. + * Note, we may want to add an update counter on the vol_list + * so that if it is modified while we are traversing the copy + * we can take note and act accordingly (probably redo the + * search at least a few times). + */ + Dmsg0(dbglvl, "duplicate vol list\n"); + temp_vol_list = New(dlist(vol, &vol->link)); + foreach_dlist(vol, vol_list) { + VOLRES *nvol; + VOLRES *tvol = (VOLRES *)malloc(sizeof(VOLRES)); + memset(tvol, 0, sizeof(VOLRES)); + tvol->vol_name = bstrdup(vol->vol_name); + tvol->dev = vol->dev; + nvol = (VOLRES *)temp_vol_list->binary_insert(tvol, my_compare); + if (tvol != nvol) { + tvol->dev = NULL; /* don't zap dev entry */ + free_vol_item(tvol); + Pmsg0(000, "Logic error. Duplicating vol list hit duplicate.\n"); + Jmsg(jcr, M_WARNING, 0, "Logic error. Duplicating vol list hit duplicate.\n"); + } + } + Dmsg0(dbglvl, "unlock volumes\n"); + unlock_volumes(); + + /* Look through reserved volumes for one we can use */ + Dmsg0(dbglvl, "look for vol in vol list\n"); + foreach_dlist(vol, temp_vol_list) { + if (!vol->dev) { + Dmsg1(dbglvl, "vol=%s no dev\n", vol->vol_name); + continue; + } + /* Check with Director if this Volume is OK */ + bstrncpy(dcr->VolumeName, vol->vol_name, sizeof(dcr->VolumeName)); + if (!dir_get_volume_info(dcr, GET_VOL_INFO_FOR_WRITE)) { + continue; + } + + Dmsg1(dbglvl, "vol=%s OK for this job\n", vol->vol_name); + foreach_alist(store, dirstore) { + int stat; + rctx.store = store; + foreach_alist(device_name, store->device) { + /* Found a device, try to use it */ + rctx.device_name = device_name; + rctx.device = vol->dev->device; + + if (vol->dev->is_autochanger()) { + Dmsg1(dbglvl, "vol=%s is in changer\n", vol->vol_name); + if (!is_vol_in_autochanger(rctx, vol)) { + continue; + } + } else if (strcmp(device_name, vol->dev->device->hdr.name) != 0) { + Dmsg2(dbglvl, "device=%s not suitable want %s\n", + vol->dev->device->hdr.name, device_name); + continue; + } + + bstrncpy(rctx.VolumeName, vol->vol_name, sizeof(rctx.VolumeName)); + rctx.have_volume = true; + /* Try reserving this device and volume */ + Dmsg2(dbglvl, "try vol=%s on device=%s\n", rctx.VolumeName, device_name); + stat = reserve_device(rctx); + if (stat == 1) { /* found available device */ + Dmsg1(dbglvl, "Suitable device found=%s\n", device_name); + ok = true; + break; + } else if (stat == 0) { /* device busy */ + Dmsg1(dbglvl, "Suitable device=%s, busy: not use\n", device_name); + } else { + /* otherwise error */ + Dmsg0(dbglvl, "No suitable device found.\n"); + } + rctx.have_volume = false; + } + if (ok) { + break; + } + } + if (ok) { + break; + } + } /* end for loop over reserved volumes */ + + Dmsg0(dbglvl, "lock volumes\n"); + lock_volumes(); + save_vol_list = vol_list; + vol_list = temp_vol_list; + free_volume_list(); /* release temp_vol_list */ + vol_list = save_vol_list; + Dmsg0(dbglvl, "deleted temp vol list\n"); + Dmsg0(dbglvl, "lock volumes\n"); + unlock_volumes(); + } + if (ok) { + Dmsg1(dbglvl, "got vol %s from in-use vols list\n", rctx.VolumeName); + return true; + } + + /* + * No reserved volume we can use, so now search for an available device. + * * For each storage device that the user specified, we * search and see if there is a resource for that device. */ - Dmsg4(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d\n", - rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, - rctx.autochanger_only); - ok = false; foreach_alist(store, dirstore) { rctx.store = store; foreach_alist(device_name, store->device) { @@ -590,14 +945,14 @@ bool find_suitable_device_for_job(JCR *jcr, RCTX &rctx) rctx.device_name = device_name; stat = search_res_for_device(rctx); if (stat == 1) { /* found available device */ - Dmsg1(100, "Suitable device found=%s\n", device_name); + Dmsg1(dbglvl, "available device found=%s\n", device_name); ok = true; break; } else if (stat == 0) { /* device busy */ - Dmsg1(110, "Suitable device found=%s, not used: busy\n", device_name); + Dmsg1(dbglvl, "Suitable device=%s, busy: not use\n", device_name); } else { /* otherwise error */ - Dmsg0(110, "No suitable device found.\n"); + Dmsg0(dbglvl, "No suitable device found.\n"); } } if (ok) { @@ -614,40 +969,30 @@ bool find_suitable_device_for_job(JCR *jcr, RCTX &rctx) int search_res_for_device(RCTX &rctx) { AUTOCHANGER *changer; - BSOCK *dir = rctx.jcr->dir_bsock; - bool ok; int stat; - Dmsg1(110, "Search res for %s\n", rctx.device_name); + Dmsg1(dbglvl, "search res for %s\n", rctx.device_name); /* Look through Autochangers first */ foreach_res(changer, R_AUTOCHANGER) { - Dmsg1(150, "Try match changer res=%s\n", changer->hdr.name); + Dmsg1(dbglvl, "Try match changer res=%s\n", changer->hdr.name); /* Find resource, and make sure we were able to open it */ - if (fnmatch(rctx.device_name, changer->hdr.name, 0) == 0) { + if (strcmp(rctx.device_name, changer->hdr.name) == 0) { /* Try each device in this AutoChanger */ foreach_alist(rctx.device, changer->device) { - Dmsg1(110, "Try changer device %s\n", rctx.device->hdr.name); + Dmsg1(dbglvl, "Try changer device %s\n", rctx.device->hdr.name); stat = reserve_device(rctx); if (stat != 1) { /* try another device */ continue; } - POOL_MEM dev_name; + /* Debug code */ if (rctx.store->append == SD_APPEND) { - Dmsg2(100, "Device %s reserved=%d for append.\n", rctx.device->hdr.name, - rctx.jcr->dcr->dev->reserved_device); - } else { - Dmsg2(100, "Device %s reserved=%d for read.\n", rctx.device->hdr.name, - rctx.jcr->read_dcr->dev->reserved_device); - } - if (rctx.notify_dir) { - pm_strcpy(dev_name, rctx.device->hdr.name); - bash_spaces(dev_name); - ok = bnet_fsend(dir, OK_device, dev_name.c_str()); /* Return real device name */ - Dmsg1(100, ">dird changer: %s", dir->msg); + Dmsg2(dbglvl, "Device %s reserved=%d for append.\n", + rctx.device->hdr.name, rctx.jcr->dcr->dev->reserved_device); } else { - ok = true; + Dmsg2(dbglvl, "Device %s reserved=%d for read.\n", + rctx.device->hdr.name, rctx.jcr->read_dcr->dev->reserved_device); } - return ok ? 1 : -1; + return stat; } } } @@ -655,21 +1000,22 @@ int search_res_for_device(RCTX &rctx) /* Now if requested look through regular devices */ if (!rctx.autochanger_only) { foreach_res(rctx.device, R_DEVICE) { - Dmsg1(150, "Try match res=%s\n", rctx.device->hdr.name); + Dmsg1(dbglvl, "Try match res=%s\n", rctx.device->hdr.name); /* Find resource, and make sure we were able to open it */ - if (fnmatch(rctx.device_name, rctx.device->hdr.name, 0) == 0) { + if (strcmp(rctx.device_name, rctx.device->hdr.name) == 0) { stat = reserve_device(rctx); - if (stat != 1) { - return stat; + if (stat != 1) { /* try another device */ + continue; } - if (rctx.notify_dir) { - bash_spaces(rctx.device_name); - ok = bnet_fsend(dir, OK_device, rctx.device_name); - Dmsg1(100, ">dird dev: %s", dir->msg); + /* Debug code */ + if (rctx.store->append == SD_APPEND) { + Dmsg2(dbglvl, "Device %s reserved=%d for append.\n", + rctx.device->hdr.name, rctx.jcr->dcr->dev->reserved_device); } else { - ok = true; + Dmsg2(dbglvl, "Device %s reserved=%d for read.\n", + rctx.device->hdr.name, rctx.jcr->read_dcr->dev->reserved_device); } - return ok ? 1 : -1; + return stat; } } } @@ -690,7 +1036,7 @@ static int reserve_device(RCTX &rctx) const int name_len = MAX_NAME_LENGTH; /* Make sure MediaType is OK */ - Dmsg2(110, "MediaType device=%s request=%s\n", + Dmsg2(dbglvl, "chk MediaType device=%s request=%s\n", rctx.device->media_type, rctx.store->media_type); if (strcmp(rctx.device->media_type, rctx.store->media_type) != 0) { return -1; @@ -714,13 +1060,12 @@ static int reserve_device(RCTX &rctx) } rctx.suitable_device = true; - Dmsg2(110, "Try reserve %s JobId=%u\n", rctx.device->hdr.name, - rctx.jcr->JobId); - dcr = new_dcr(rctx.jcr, rctx.device->dev); + Dmsg1(dbglvl, "try reserve %s\n", rctx.device->hdr.name); + rctx.jcr->dcr = dcr = new_dcr(rctx.jcr, rctx.jcr->dcr, rctx.device->dev); if (!dcr) { BSOCK *dir = rctx.jcr->dir_bsock; - bnet_fsend(dir, _("3926 Could not get dcr for device: %s\n"), rctx.device_name); - Dmsg1(100, ">dird: %s", dir->msg); + dir->fsend(_("3926 Could not get dcr for device: %s\n"), rctx.device_name); + Dmsg1(dbglvl, ">dird: %s", dir->msg); return -1; } bstrncpy(dcr->pool_name, rctx.store->pool_name, name_len); @@ -728,39 +1073,88 @@ static int reserve_device(RCTX &rctx) bstrncpy(dcr->media_type, rctx.store->media_type, name_len); bstrncpy(dcr->dev_name, rctx.device_name, name_len); if (rctx.store->append == SD_APPEND) { - if (rctx.exact_match && !rctx.have_volume) { + Dmsg2(dbglvl, "have_vol=%d vol=%s\n", rctx.have_volume, rctx.VolumeName); + ok = reserve_device_for_append(dcr, rctx); + if (!ok) { + goto bail_out; + } + + rctx.jcr->dcr = dcr; + Dmsg5(dbglvl, "Reserved=%d dev_name=%s mediatype=%s pool=%s ok=%d\n", + dcr->dev->reserved_device, + dcr->dev_name, dcr->media_type, dcr->pool_name, ok); + if (!rctx.have_volume) { dcr->any_volume = true; if (dir_find_next_appendable_volume(dcr)) { bstrncpy(rctx.VolumeName, dcr->VolumeName, sizeof(rctx.VolumeName)); - Dmsg2(100, "JobId=%u looking for Volume=%s\n", rctx.jcr->JobId, rctx.VolumeName); + Dmsg1(dbglvl, "looking for Volume=%s\n", rctx.VolumeName); rctx.have_volume = true; } else { - Dmsg0(100, "No next volume found\n"); + Dmsg0(dbglvl, "No next volume found\n"); + rctx.have_volume = false; rctx.VolumeName[0] = 0; - } - } - ok = reserve_device_for_append(dcr, rctx); - if (ok) { - rctx.jcr->dcr = dcr; - Dmsg5(100, "Reserved=%d dev_name=%s mediatype=%s pool=%s ok=%d\n", - dcr->dev->reserved_device, - dcr->dev_name, dcr->media_type, dcr->pool_name, ok); + /* + * If there is at least one volume that is valid and in use, + * but we get here, check if we are running with prefers + * non-mounted drives. In that case, we have selected a + * non-used drive and our one and only volume is mounted + * elsewhere, so we bail out and retry using that drive. + */ + if (dcr->volume_in_use && !rctx.PreferMountedVols) { + rctx.PreferMountedVols = true; + if (dcr->VolumeName[0]) { + volume_unused(dcr); + } + goto bail_out; + } + /* + * Note. Under some circumstances, the Director can hand us + * a Volume name that is no the same as the one on the current + * drive, and in that case, the call above to find the next + * volume will fail because in attempting to reserve the Volume + * the code will realize that we already have a tape mounted, + * and it will fail. This *should* only happen if there are + * writers, thus the following test. In that case, we simply + * bail out, and continue waiting, rather than plunging on + * and hoping that the operator can resolve the problem. + */ + if (dcr->dev->num_writers != 0) { + if (dcr->VolumeName[0]) { + volume_unused(dcr); + } + goto bail_out; + } + } } } else { ok = reserve_device_for_read(dcr); if (ok) { rctx.jcr->read_dcr = dcr; - Dmsg5(100, "Read reserved=%d dev_name=%s mediatype=%s pool=%s ok=%d\n", + Dmsg5(dbglvl, "Read reserved=%d dev_name=%s mediatype=%s pool=%s ok=%d\n", dcr->dev->reserved_device, dcr->dev_name, dcr->media_type, dcr->pool_name, ok); } } if (!ok) { - free_dcr(dcr); - Dmsg0(110, "Not OK.\n"); - return 0; + goto bail_out; } - return 1; + if (rctx.notify_dir) { + POOL_MEM dev_name; + BSOCK *dir = rctx.jcr->dir_bsock; + pm_strcpy(dev_name, rctx.device->hdr.name); + bash_spaces(dev_name); + ok = dir->fsend(OK_device, dev_name.c_str()); /* Return real device name */ + Dmsg1(dbglvl, ">dird: %s", dir->msg); + } else { + ok = true; + } + return ok ? 1 : -1; + +bail_out: + rctx.have_volume = false; +// free_dcr(dcr); + Dmsg0(dbglvl, "Not OK.\n"); + return 0; } /* @@ -777,13 +1171,10 @@ static bool reserve_device_for_read(DCR *dcr) ASSERT(dcr); - /* Get locks in correct order */ - unlock_reservations(); - P(dev->mutex); - lock_reservations(); + dev->dlock(); if (is_device_unmounted(dev)) { - Dmsg1(200, "Device %s is BLOCKED due to user unmount.\n", dev->print_name()); + Dmsg1(dbglvl, "Device %s is BLOCKED due to user unmount.\n", dev->print_name()); Mmsg(jcr->errmsg, _("3601 JobId=%u device %s is BLOCKED due to user unmount.\n"), jcr->JobId, dev->print_name()); queue_reserve_message(jcr); @@ -791,7 +1182,8 @@ static bool reserve_device_for_read(DCR *dcr) } if (dev->is_busy()) { - Dmsg4(200, "Device %s is busy ST_READ=%d num_writers=%d reserved=%d.\n", dev->print_name(), + Dmsg4(dbglvl, "Device %s is busy ST_READ=%d num_writers=%d reserved=%d.\n", + dev->print_name(), dev->state & ST_READ?1:0, dev->num_writers, dev->reserved_device); Mmsg(jcr->errmsg, _("3602 JobId=%u device %s is busy (already reading/writing).\n"), jcr->JobId, dev->print_name()); @@ -803,12 +1195,11 @@ static bool reserve_device_for_read(DCR *dcr) dev->set_read(); ok = true; dev->reserved_device++; - Dmsg3(100, "Inc reserve=%d dev=%s %p\n", dev->reserved_device, - dev->print_name(), dev); + Dmsg3(dbglvl, "Inc reserve=%d dev=%s %p\n", dev->reserved_device, dev->print_name(), dev); dcr->reserved_device = true; bail_out: - V(dev->mutex); + dev->dunlock(); return ok; } @@ -836,16 +1227,13 @@ static bool reserve_device_for_append(DCR *dcr, RCTX &rctx) ASSERT(dcr); - /* Get locks in correct order */ - unlock_reservations(); - P(dev->mutex); - lock_reservations(); + dev->dlock(); /* If device is being read, we cannot write it */ if (dev->can_read()) { Mmsg(jcr->errmsg, _("3603 JobId=%u device %s is busy reading.\n"), jcr->JobId, dev->print_name()); - Dmsg1(110, "%s", jcr->errmsg); + Dmsg1(dbglvl, "%s", jcr->errmsg); queue_reserve_message(jcr); goto bail_out; } @@ -854,30 +1242,75 @@ static bool reserve_device_for_append(DCR *dcr, RCTX &rctx) if (is_device_unmounted(dev)) { Mmsg(jcr->errmsg, _("3604 JobId=%u device %s is BLOCKED due to user unmount.\n"), jcr->JobId, dev->print_name()); - Dmsg1(110, "%s", jcr->errmsg); + Dmsg1(dbglvl, "%s", jcr->errmsg); queue_reserve_message(jcr); goto bail_out; } - Dmsg1(110, "reserve_append device is %s\n", dev->is_tape()?"tape":"disk"); + Dmsg1(dbglvl, "reserve_append device is %s\n", dev->print_name()); /* Now do detailed tests ... */ if (can_reserve_drive(dcr, rctx) != 1) { - Dmsg0(110, "can_reserve_drive!=1\n"); + Dmsg0(dbglvl, "can_reserve_drive!=1\n"); goto bail_out; } dev->reserved_device++; - Dmsg3(100, "Inc reserve=%d dev=%s %p\n", dev->reserved_device, + Dmsg3(dbglvl, "Inc reserve=%d dev=%s %p\n", dev->reserved_device, dev->print_name(), dev); dcr->reserved_device = true; ok = true; bail_out: - V(dev->mutex); + dev->dunlock(); return ok; } +static int is_pool_ok(DCR *dcr) +{ + DEVICE *dev = dcr->dev; + JCR *jcr = dcr->jcr; + + /* Now check if we want the same Pool and pool type */ + if (strcmp(dev->pool_name, dcr->pool_name) == 0 && + strcmp(dev->pool_type, dcr->pool_type) == 0) { + /* OK, compatible device */ + Dmsg1(dbglvl, "OK dev: %s num_writers=0, reserved, pool matches\n", dev->print_name()); + return 1; + } else { + /* Drive Pool not suitable for us */ + Mmsg(jcr->errmsg, _( +"3608 JobId=%u wants Pool=\"%s\" but have Pool=\"%s\" nreserve=%d on drive %s.\n"), + (uint32_t)jcr->JobId, dcr->pool_name, dev->pool_name, + dev->reserved_device, dev->print_name()); + queue_reserve_message(jcr); + Dmsg2(dbglvl, "failed: busy num_writers=0, reserved, pool=%s wanted=%s\n", + dev->pool_name, dcr->pool_name); + } + return 0; +} + +static bool is_max_jobs_ok(DCR *dcr) +{ + DEVICE *dev = dcr->dev; + JCR *jcr = dcr->jcr; + + Dmsg4(dbglvl, "MaxJobs=%d Jobs=%d reserves=%d Vol=%s\n", + dcr->VolCatInfo.VolCatMaxJobs, + dcr->VolCatInfo.VolCatJobs, dev->reserved_device, + dcr->VolumeName); + if (dcr->VolCatInfo.VolCatMaxJobs > 0 && dcr->VolCatInfo.VolCatMaxJobs <= + (dcr->VolCatInfo.VolCatJobs + dev->reserved_device)) { + /* Max Job Vols depassed or already reserved */ + Mmsg(jcr->errmsg, _("3610 JobId=%u Volume max jobs exceeded on drive %s.\n"), + (uint32_t)jcr->JobId, dev->print_name()); + queue_reserve_message(jcr); + Dmsg1(dbglvl, "reserve dev failed: %s", jcr->errmsg); + return false; /* wait */ + } + return true; +} + /* * Returns: 1 if drive can be reserved * 0 if we should wait @@ -888,10 +1321,15 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) DEVICE *dev = dcr->dev; JCR *jcr = dcr->jcr; - Dmsg5(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + Dmsg5(dbglvl, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only, rctx.any_drive); + /* Check for max jobs on this Volume */ + if (!is_max_jobs_ok(dcr)) { + return 0; + } + /* setting any_drive overrides PreferMountedVols flag */ if (!rctx.any_drive) { /* @@ -901,8 +1339,8 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) * helps spread the load to the least used drives. */ if (rctx.try_low_use_drive && dev == rctx.low_use_drive) { - Dmsg3(110, "OK dev=%s == low_drive=%s. JobId=%u\n", - dev->print_name(), rctx.low_use_drive->print_name(), jcr->JobId); + Dmsg2(dbglvl, "OK dev=%s == low_drive=%s.\n", + dev->print_name(), rctx.low_use_drive->print_name()); return 1; } /* If he wants a free drive, but this one is busy, no go */ @@ -911,13 +1349,12 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) if ((dev->num_writers + dev->reserved_device) < rctx.num_writers) { rctx.num_writers = dev->num_writers + dev->reserved_device; rctx.low_use_drive = dev; - Dmsg2(110, "set low use drive=%s num_writers=%d\n", dev->print_name(), - rctx.num_writers); + Dmsg2(dbglvl, "set low use drive=%s num_writers=%d\n", + dev->print_name(), rctx.num_writers); } else { - Dmsg1(110, "not low use num_writers=%d\n", dev->num_writers+ - dev->reserved_device); + Dmsg1(dbglvl, "not low use num_writers=%d\n", dev->num_writers+dev->reserved_device); } - Dmsg1(110, "failed: !prefMnt && busy. JobId=%u\n", jcr->JobId); + Dmsg0(dbglvl, "failed: !prefMnt && busy.\n"); Mmsg(jcr->errmsg, _("3605 JobId=%u wants free drive but device %s is busy.\n"), jcr->JobId, dev->print_name()); queue_reserve_message(jcr); @@ -925,33 +1362,48 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) } /* Check for prefer mounted volumes */ - if (rctx.PreferMountedVols && !dev->VolHdr.VolumeName[0] && dev->is_tape()) { + if (rctx.PreferMountedVols && !dev->vol && dev->is_tape()) { Mmsg(jcr->errmsg, _("3606 JobId=%u prefers mounted drives, but drive %s has no Volume.\n"), jcr->JobId, dev->print_name()); queue_reserve_message(jcr); - Dmsg1(110, "failed: want mounted -- no vol JobId=%u\n", jcr->JobId); + Dmsg0(dbglvl, "failed: want mounted -- no vol\n"); return 0; /* No volume mounted */ } /* Check for exact Volume name match */ - if (rctx.exact_match && rctx.have_volume && - strcmp(dev->VolHdr.VolumeName, rctx.VolumeName) != 0) { - Mmsg(jcr->errmsg, _("3607 JobId=%u wants Vol=\"%s\" drive has Vol=\"%s\" on drive %s.\n"), - jcr->JobId, rctx.VolumeName, dev->VolHdr.VolumeName, - dev->print_name()); - queue_reserve_message(jcr); - Dmsg2(110, "failed: Not exact match have=%s want=%s\n", - dev->VolHdr.VolumeName, rctx.VolumeName); - return 0; + /* ***FIXME*** for Disk, we can accept any volume that goes with this + * drive. + */ + if (rctx.exact_match && rctx.have_volume) { + bool ok; + Dmsg5(dbglvl, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, + rctx.autochanger_only, rctx.any_drive); + Dmsg4(dbglvl, "have_vol=%d have=%s resvol=%s want=%s\n", + rctx.have_volume, dev->VolHdr.VolumeName, + dev->vol?dev->vol->vol_name:"*none*", rctx.VolumeName); + ok = strcmp(dev->VolHdr.VolumeName, rctx.VolumeName) == 0 || + (dev->vol && strcmp(dev->vol->vol_name, rctx.VolumeName) == 0); + if (!ok) { + Mmsg(jcr->errmsg, _("3607 JobId=%u wants Vol=\"%s\" drive has Vol=\"%s\" on drive %s.\n"), + jcr->JobId, rctx.VolumeName, dev->VolHdr.VolumeName, + dev->print_name()); + queue_reserve_message(jcr); + Dmsg3(dbglvl, "not OK: dev have=%s resvol=%s want=%s\n", + dev->VolHdr.VolumeName, dev->vol?dev->vol->vol_name:"*none*", rctx.VolumeName); + return 0; + } + if (is_volume_in_use(dcr)) { + return 0; /* fail if volume on another drive */ + } } } /* Check for unused autochanger drive */ - if (rctx.autochanger_only && dev->num_writers == 0 && + if (rctx.autochanger_only && !dev->is_busy() && dev->VolHdr.VolumeName[0] == 0) { /* Device is available but not yet reserved, reserve it for us */ - Dmsg2(100, "OK Res Unused autochanger %s JobId=%u.\n", - dev->print_name(), jcr->JobId); + Dmsg1(dbglvl, "OK Res Unused autochanger %s.\n", dev->print_name()); bstrncpy(dev->pool_name, dcr->pool_name, sizeof(dev->pool_name)); bstrncpy(dev->pool_type, dcr->pool_type, sizeof(dev->pool_type)); return 1; /* reserve drive */ @@ -963,39 +1415,18 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) if (dev->num_writers == 0) { /* Now check if there are any reservations on the drive */ if (dev->reserved_device) { - /* Now check if we want the same Pool and pool type */ - if (strcmp(dev->pool_name, dcr->pool_name) == 0 && - strcmp(dev->pool_type, dcr->pool_type) == 0) { - /* OK, compatible device */ - Dmsg2(100, "OK dev: %s num_writers=0, reserved, pool matches JobId=%u\n", - dev->print_name(), jcr->JobId); - return 1; - } else { - /* Drive Pool not suitable for us */ - Mmsg(jcr->errmsg, _("3608 JobId=%u wants Pool=\"%s\" but have Pool=\"%s\" on drive %s.\n"), - jcr->JobId, dcr->pool_name, dev->pool_name, dev->print_name()); - queue_reserve_message(jcr); - Dmsg2(110, "failed: busy num_writers=0, reserved, pool=%s wanted=%s\n", - dev->pool_name, dcr->pool_name); - return 0; /* wait */ - } + return is_pool_ok(dcr); } else if (dev->can_append()) { - /* Device in append mode, check if changing pool */ - if (strcmp(dev->pool_name, dcr->pool_name) == 0 && - strcmp(dev->pool_type, dcr->pool_type) == 0) { - Dmsg2(100, "OK dev: %s num_writers=0, can_append, pool matches. JobId=%u\n", - dev->print_name(), jcr->JobId); - /* OK, compatible device */ - return 1; + if (is_pool_ok(dcr)) { + return 1; } else { /* Changing pool, unload old tape if any in drive */ - Dmsg0(100, "OK dev: num_writers=0, not reserved, pool change, unload changer\n"); + Dmsg0(dbglvl, "OK dev: num_writers=0, not reserved, pool change, unload changer\n"); unload_autochanger(dcr, 0); } } /* Device is available but not yet reserved, reserve it for us */ - Dmsg2(100, "OK Dev avail reserved %s JobId=%u\n", dev->print_name(), - jcr->JobId); + Dmsg1(dbglvl, "OK Dev avail reserved %s\n", dev->print_name()); bstrncpy(dev->pool_name, dcr->pool_name, sizeof(dev->pool_name)); bstrncpy(dev->pool_type, dcr->pool_type, sizeof(dev->pool_type)); return 1; /* reserve drive */ @@ -1006,24 +1437,9 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) * available if pool is the same). */ if (dev->can_append() || dev->num_writers > 0) { - /* Yes, now check if we want the same Pool and pool type */ - if (strcmp(dev->pool_name, dcr->pool_name) == 0 && - strcmp(dev->pool_type, dcr->pool_type) == 0) { - Dmsg2(100, "OK dev: %s num_writers>=0, can_append, pool matches. JobId=%u\n", - dev->print_name(), jcr->JobId); - /* OK, compatible device */ - return 1; - } else { - /* Drive Pool not suitable for us */ - Mmsg(jcr->errmsg, _("3609 JobId=%u wants Pool=\"%s\" but has Pool=\"%s\" on drive %s.\n"), - jcr->JobId, dcr->pool_name, dev->pool_name, dev->print_name()); - queue_reserve_message(jcr); - Dmsg2(110, "failed: busy num_writers>0, can_append, pool=%s wanted=%s\n", - dev->pool_name, dcr->pool_name); - return 0; /* wait */ - } + return is_pool_ok(dcr); } else { - Pmsg0(000, _("Logic error!!!! Should not get here.\n")); + Pmsg1(000, _("Logic error!!!! JobId=%u Should not get here.\n"), (int)jcr->JobId); Mmsg(jcr->errmsg, _("3910 JobId=%u Logic error!!!! drive %s Should not get here.\n"), jcr->JobId, dev->print_name()); queue_reserve_message(jcr); @@ -1033,21 +1449,27 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) Mmsg(jcr->errmsg, _("3911 JobId=%u failed reserve drive %s.\n"), jcr->JobId, dev->print_name()); queue_reserve_message(jcr); - Dmsg2(110, "failed: No reserve %s JobId=%u\n", dev->print_name(), jcr->JobId); + Dmsg1(dbglvl, "failed: No reserve %s\n", dev->print_name()); return 0; } + + + /* - * search_lock is already set on entering this routine + * Queue a reservation error or failure message for this jcr */ static void queue_reserve_message(JCR *jcr) { int i; - alist *msgs = jcr->reserve_msgs; + alist *msgs; char *msg; + jcr->lock(); + + msgs = jcr->reserve_msgs; if (!msgs) { - return; + goto bail_out; } /* * Look for duplicate message. If found, do @@ -1056,15 +1478,18 @@ static void queue_reserve_message(JCR *jcr) for (i=msgs->size()-1; i >= 0; i--) { msg = (char *)msgs->get(i); if (!msg) { - return; + goto bail_out; } /* Comparison based on 4 digit message number */ if (strncmp(msg, jcr->errmsg, 4) == 0) { - return; + goto bail_out; } } /* Message unique, so insert it */ jcr->reserve_msgs->push(bstrdup(jcr->errmsg)); + +bail_out: + jcr->unlock(); } /* @@ -1076,11 +1501,10 @@ void send_drive_reserve_messages(JCR *jcr, void sendit(const char *msg, int len, alist *msgs; char *msg; - lock_reservations(); + jcr->lock(); msgs = jcr->reserve_msgs; if (!msgs || msgs->size() == 0) { - unlock_reservations(); - return; + goto bail_out; } for (i=msgs->size()-1; i >= 0; i--) { msg = (char *)msgs->get(i); @@ -1091,5 +1515,44 @@ void send_drive_reserve_messages(JCR *jcr, void sendit(const char *msg, int len, break; } } - unlock_reservations(); + +bail_out: + jcr->unlock(); +} + +/* + * Pop and release any reservations messages + */ +static void pop_reserve_messages(JCR *jcr) +{ + alist *msgs; + char *msg; + + jcr->lock(); + msgs = jcr->reserve_msgs; + if (!msgs) { + goto bail_out; + } + while ((msg = (char *)msgs->pop())) { + free(msg); + } +bail_out: + jcr->unlock(); +} + +/* + * Also called from acquire.c + */ +void release_reserve_messages(JCR *jcr) +{ + pop_reserve_messages(jcr); + jcr->lock(); + if (!jcr->reserve_msgs) { + goto bail_out; + } + delete jcr->reserve_msgs; + jcr->reserve_msgs = NULL; + +bail_out: + jcr->unlock(); }