From 92881f5d8acce0a570e12da241f11e63d16f557e Mon Sep 17 00:00:00 2001 From: Kern Sibbald Date: Wed, 9 Dec 2009 08:45:38 +0100 Subject: [PATCH] Fix SD Vol+dev lock race bug --- bacula/src/stored/acquire.c | 19 ++++++++++--------- bacula/src/stored/append.c | 2 +- bacula/src/stored/lock.c | 4 +++- bacula/src/stored/lock.h | 3 ++- bacula/src/stored/mac.c | 2 +- bacula/src/stored/mount.c | 3 +++ bacula/src/stored/spool.c | 13 ++++--------- 7 files changed, 24 insertions(+), 22 deletions(-) diff --git a/bacula/src/stored/acquire.c b/bacula/src/stored/acquire.c index c1962e1aba..d1b41e20ad 100644 --- a/bacula/src/stored/acquire.c +++ b/bacula/src/stored/acquire.c @@ -436,8 +436,8 @@ get_out: * This job is done, so release the device. From a Unix standpoint, * the device remains open. * - * Note, if we are spooling, we may enter with the device locked. - * However, in all cases, unlock the device when leaving. + * Note, if we are spooling, we may enter with the device blocked. + * However, in all cases, unblock the device when leaving. * */ bool release_device(DCR *dcr) @@ -447,11 +447,13 @@ bool release_device(DCR *dcr) bool ok = true; char tbuf[100]; - /* lock only if not already locked by this thread */ - if (!dcr->is_dev_locked()) { - dev->r_dlock(); - } lock_volumes(); + dev->dlock(); + if (!dev->is_blocked()) { + block_device(dev, BST_RELEASING); + } else if (dev->blocked() == BST_DESPOOLING) { + dev->set_blocked(BST_RELEASING); + } Dmsg2(100, "release_device device %s is %s\n", dev->print_name(), dev->is_tape()?"tape":"disk"); /* if device is reserved, job never started, so release the reserve here */ @@ -508,11 +510,9 @@ bool release_device(DCR *dcr) */ volume_unused(dcr); } - unlock_volumes(); Dmsg3(100, "%d writers, %d reserve, dev=%s\n", dev->num_writers, dev->num_reserved(), dev->print_name()); - /* If no writers, close if file or !CAP_ALWAYS_OPEN */ if (dev->num_writers == 0 && (!dev->is_tape() || !dev->has_cap(CAP_ALWAYSOPEN))) { dvd_remove_empty_part(dcr); /* get rid of any empty spool part */ @@ -550,7 +550,8 @@ bool release_device(DCR *dcr) Dmsg2(100, "JobId=%u broadcast wait_device_release at %s\n", (uint32_t)jcr->JobId, bstrftimes(tbuf, sizeof(tbuf), (utime_t)time(NULL))); pthread_cond_broadcast(&wait_device_release); - dev->dunlock(); + dev->dunblock(true); + unlock_volumes(); if (dcr->keep_dcr) { detach_dcr_from_dev(dcr); } else { diff --git a/bacula/src/stored/append.c b/bacula/src/stored/append.c index e97bbd7474..0623426503 100644 --- a/bacula/src/stored/append.c +++ b/bacula/src/stored/append.c @@ -314,7 +314,7 @@ bool do_append_data(JCR *jcr) if (!ok) { discard_data_spool(dcr); } else { - /* Note: if commit is OK, the device will remain locked */ + /* Note: if commit is OK, the device will remain blocked */ commit_data_spool(dcr); } diff --git a/bacula/src/stored/lock.c b/bacula/src/stored/lock.c index 7270773593..110c3278d8 100644 --- a/bacula/src/stored/lock.c +++ b/bacula/src/stored/lock.c @@ -1,7 +1,7 @@ /* Bacula® - The Network Backup Solution - Copyright (C) 2000-2008 Free Software Foundation Europe e.V. + Copyright (C) 2000-2009 Free Software Foundation Europe e.V. The main author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. @@ -328,6 +328,8 @@ const char *DEVICE::print_blocked() const return "BST_MOUNT"; case BST_DESPOOLING: return "BST_DESPOOLING"; + case BST_RELEASING: + return "BST_RELEASING"; default: return _("unknown blocked code"); } diff --git a/bacula/src/stored/lock.h b/bacula/src/stored/lock.h index 98ab6d2f01..10b6ac1a0d 100644 --- a/bacula/src/stored/lock.h +++ b/bacula/src/stored/lock.h @@ -59,7 +59,8 @@ enum { BST_WRITING_LABEL, /* Labeling a tape */ BST_UNMOUNTED_WAITING_FOR_SYSOP, /* User unmounted during wait for op */ BST_MOUNT, /* Mount request */ - BST_DESPOOLING /* Despooling -- i.e. multiple writes */ + BST_DESPOOLING, /* Despooling -- i.e. multiple writes */ + BST_RELEASING /* Releasing the device */ }; typedef struct s_steal_lock { diff --git a/bacula/src/stored/mac.c b/bacula/src/stored/mac.c index 1c02637945..6599466477 100644 --- a/bacula/src/stored/mac.c +++ b/bacula/src/stored/mac.c @@ -136,7 +136,7 @@ ok_out: if (!ok) { discard_data_spool(jcr->dcr); } else { - /* Note: if commit is OK, the device will remain locked */ + /* Note: if commit is OK, the device will remain blocked */ commit_data_spool(jcr->dcr); } diff --git a/bacula/src/stored/mount.c b/bacula/src/stored/mount.c index 96fb652320..462b9b5b4f 100644 --- a/bacula/src/stored/mount.c +++ b/bacula/src/stored/mount.c @@ -310,6 +310,9 @@ no_lock_bail_out: * This routine is meant to be called once the first pass * to ensure that we have a candidate volume to mount. * Otherwise, we ask the sysop to created one. + * Note, the the Volumes are locked on entry. + * They are unlocked on failure and remain locked on + * success. The caller must know this!!! */ bool DCR::find_a_volume() { diff --git a/bacula/src/stored/spool.c b/bacula/src/stored/spool.c index d40f7a85ae..f2dde69b16 100644 --- a/bacula/src/stored/spool.c +++ b/bacula/src/stored/spool.c @@ -358,17 +358,12 @@ static bool despool_data(DCR *dcr, bool commit) free(rdev); dcr->spooling = true; /* turn on spooling again */ dcr->despooling = false; - - /* - * We are done, so unblock the device, but if we have done a - * commit, leave it locked so that the job cleanup does not - * need to wait to release the device (no re-acquire of the lock). + /* + * Note, if committing we leave the device blocked. It will be removed in + * release_device(); */ - dcr->dlock(); - unblock_device(dcr->dev); - /* If doing a commit, leave the device locked -- unlocked in release_device() */ if (!commit) { - dcr->dunlock(); + dcr->dev->dunblock(); } set_jcr_job_status(jcr, JS_Running); dir_send_job_status(jcr); -- 2.39.5