From 1b0d80ab92df7aa1e1a7918b48d053a3e8020587 Mon Sep 17 00:00:00 2001 From: Kern Sibbald Date: Sun, 21 May 2017 17:22:58 +0200 Subject: [PATCH] Fix race in steal_device_lock shown in truncate-concurrent-test --- bacula/src/stored/dev.h | 7 ++++--- bacula/src/stored/dircmd.c | 41 +++++++++++++++++++++++--------------- bacula/src/stored/lock.c | 27 ++++++++++++++++++------- bacula/src/stored/lock.h | 4 ++-- bacula/src/stored/protos.h | 4 ++-- 5 files changed, 53 insertions(+), 30 deletions(-) diff --git a/bacula/src/stored/dev.h b/bacula/src/stored/dev.h index 127abd6b67..5ead2a508c 100644 --- a/bacula/src/stored/dev.h +++ b/bacula/src/stored/dev.h @@ -389,8 +389,9 @@ public: int can_write() const { return is_open() && can_append() && is_labeled() && !at_weot(); } bool can_read() const { return (state & ST_READ) != 0; } - bool can_steal_lock() const { return m_blocked && - (m_blocked == BST_UNMOUNTED || + bool can_obtain_block() const { return + (m_blocked == BST_NOT_BLOCKED || + m_blocked == BST_UNMOUNTED || m_blocked == BST_WAITING_FOR_SYSOP || m_blocked == BST_UNMOUNTED_WAITING_FOR_SYSOP); }; bool waiting_for_mount() const { return @@ -580,7 +581,7 @@ public: virtual void write_adata(DCR *dcr, DEV_RECORD *rec) { return; }; virtual void write_cont_adata(DCR *dcr, DEV_RECORD *rec) { return; }; virtual int write_adata_rechdr(DCR *dcr, DEV_RECORD *rec) { return -1; }; - virtual bool have_adata_header(DCR *dcr, DEV_RECORD *rec, + virtual bool have_adata_header(DCR *dcr, DEV_RECORD *rec, int32_t FileIndex, int32_t Stream, uint32_t VolSessionId) { return false; }; virtual bool read_adata_record_header(DCR *dcr, DEV_BLOCK *block, diff --git a/bacula/src/stored/dircmd.c b/bacula/src/stored/dircmd.c index 5f2d2a7191..fd03959cdc 100644 --- a/bacula/src/stored/dircmd.c +++ b/bacula/src/stored/dircmd.c @@ -607,7 +607,7 @@ static bool do_label(JCR *jcr, int relabel) if (reserve_volume(dcr, newname) == NULL) { ok = false; } - Dmsg1(400, "Reserved volume \"%s\"\n", newname); + Dmsg1(400, "Reserved Volume=%s for relabel/truncate.\n", newname); } else { ok = false; } @@ -619,7 +619,7 @@ static bool do_label(JCR *jcr, int relabel) } /* some command use recv and don't accept catalog update. - * it's not the case here, so we force dir_update_volume_info catalog update */ + * it's not the case here, so we force dir_update_volume_info catalog update */ dcr->force_update_volume_info = true; if (!dev->is_open() && !dev->is_busy()) { @@ -627,8 +627,8 @@ static bool do_label(JCR *jcr, int relabel) label_volume_if_ok(dcr, oldname, newname, poolname, slot, relabel); dev->close(dcr); /* Under certain "safe" conditions, we can steal the lock */ - } else if (dev->can_steal_lock()) { - Dmsg0(400, "Can relabel. can_steal_lock\n"); + } else if (dev->can_obtain_block()) { + Dmsg0(400, "Can relabel. can_obtain_block\n"); label_volume_if_ok(dcr, oldname, newname, poolname, slot, relabel); } else if (dev->is_busy() || dev->is_blocked()) { send_dir_busy_message(dir, dev); @@ -718,7 +718,7 @@ static bool truncate_cache_cmd(JCR *jcr) dev->Unlock(); goto bail_out; } - if ((!dev->is_open() && !dev->is_busy()) || dev->can_steal_lock()) { + if ((!dev->is_open() && !dev->is_busy()) || dev->can_obtain_block()) { Dmsg0(400, "Call truncate_cache\n"); nbpart = dev->truncate_cache(dcr, volname, &size); if (nbpart >= 0) { @@ -834,7 +834,7 @@ static bool upload_cmd(JCR *jcr) dev->Unlock(); goto bail_out; } - if ((!dev->is_open() && !dev->is_busy()) || dev->can_steal_lock()) { + if ((!dev->is_open() && !dev->is_busy()) || dev->can_obtain_block()) { Dmsg0(400, "Can upload, because device is not open.\n"); dev->setVolCatName(volname); dev->part = 0; @@ -911,7 +911,10 @@ static void label_volume_if_ok(DCR *dcr, char *oldname, const char *volname = (relabel == 1) ? oldname : newname; uint64_t volCatBytes; - steal_device_lock(dev, &hold, BST_WRITING_LABEL); + if (!obtain_device_block(dev, &hold, BST_WRITING_LABEL)) { + send_dir_busy_message(dir, dev); + return; + } Dmsg1(100, "Stole device %s lock, writing label.\n", dev->print_name()); Dmsg0(90, "try_autoload_device - looking for volume_info\n"); @@ -1014,14 +1017,14 @@ bail_out: if (dev->is_open() && !dev->has_cap(CAP_ALWAYSOPEN)) { dev->close(dcr); } - + dev->end_of_job(dcr); - + if (!dev->is_open()) { dev->clear_volhdr(); } volume_unused(dcr); /* no longer using volume */ - give_back_device_lock(dev, &hold); + give_back_device_block(dev, &hold); return; } @@ -1039,7 +1042,10 @@ static bool read_label(DCR *dcr) bsteal_lock_t hold; DEVICE *dev = dcr->dev; - steal_device_lock(dev, &hold, BST_DOING_ACQUIRE); + if (!obtain_device_block(dev, &hold, BST_DOING_ACQUIRE)) { + send_dir_busy_message(dir, dev); + return false; + } dcr->VolumeName[0] = 0; dev->clear_labeled(); /* force read of label */ @@ -1055,7 +1061,7 @@ static bool read_label(DCR *dcr) break; } volume_unused(dcr); - give_back_device_lock(dev, &hold); + give_back_device_block(dev, &hold); return ok; } @@ -1749,7 +1755,7 @@ static bool changer_cmd(JCR *jcr) dir->fsend(_("3998 Device \"%s\" is not an autochanger.\n"), dev->print_name()); /* Under certain "safe" conditions, we can steal the lock */ - } else if (safe_cmd || !dev->is_open() || dev->can_steal_lock()) { + } else if (safe_cmd || !dev->is_open() || dev->can_obtain_block()) { autochanger_cmd(dcr, dir, cmd); } else if (dev->is_busy() || dev->is_blocked()) { send_dir_busy_message(dir, dev); @@ -1791,7 +1797,7 @@ static bool readlabel_cmd(JCR *jcr) read_volume_label(jcr, dcr, dev, Slot); dev->close(dcr); /* Under certain "safe" conditions, we can steal the lock */ - } else if (dev->can_steal_lock()) { + } else if (dev->can_obtain_block()) { read_volume_label(jcr, dcr, dev, Slot); } else if (dev->is_busy() || dev->is_blocked()) { send_dir_busy_message(dir, dev); @@ -1823,7 +1829,10 @@ static void read_volume_label(JCR *jcr, DCR *dcr, DEVICE *dev, int Slot) bsteal_lock_t hold; dcr->set_dev(dev); - steal_device_lock(dev, &hold, BST_WRITING_LABEL); + if (!obtain_device_block(dev, &hold, BST_WRITING_LABEL)) { + send_dir_busy_message(dir, dev); + return; + } if (!try_autoload_device(jcr, dcr, Slot, "")) { goto bail_out; /* error */ @@ -1843,7 +1852,7 @@ static void read_volume_label(JCR *jcr, DCR *dcr, DEVICE *dev, int Slot) } bail_out: - give_back_device_lock(dev, &hold); + give_back_device_block(dev, &hold); return; } diff --git a/bacula/src/stored/lock.c b/bacula/src/stored/lock.c index aebc31a787..0c485a7d35 100644 --- a/bacula/src/stored/lock.c +++ b/bacula/src/stored/lock.c @@ -113,13 +113,13 @@ const int dbglvl = 500; * if waiting threads * pthread_cond_broadcast * - * steal_device_lock() does (must be locked and blocked at entry) + * obtain_device_block() does (must be locked and blocked at entry) * save status * set new blocked status * set new pid * Unlock() * - * give_back_device_lock() does (must be blocked but not locked) + * give_back_device_block() does (must be blocked but not locked) * Lock() * reset blocked status * save previous blocked @@ -430,34 +430,46 @@ void _unblock_device(const char *file, int line, DEVICE *dev) } } +static pthread_mutex_t block_mutex = PTHREAD_MUTEX_INITIALIZER; /* - * Enter with device locked and blocked - * Exit with device unlocked and blocked by us. + * Enter with device locked + * + * Note: actually this routine: + * returns true if it can either set or steal the device block + * returns false if it cannot block the device */ -void _steal_device_lock(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold, int state) +bool _obtain_device_block(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold, int state) { + P(block_mutex); Dmsg4(sd_dbglvl, "Steal lock %s old=%s from %s:%d\n", dev->device->hdr.name, dev->print_blocked(), file, line); + if (!dev->can_obtain_block()) { + V(block_mutex); + return false; + } hold->dev_blocked = dev->blocked(); hold->dev_prev_blocked = dev->dev_prev_blocked; hold->no_wait_id = dev->no_wait_id; hold->blocked_by = dev->blocked_by; dev->set_blocked(state); - Dmsg1(sd_dbglvl, "steal lock. new=%s\n", dev->print_blocked()); + Dmsg1(sd_dbglvl, "steal block. new=%s\n", dev->print_blocked()); dev->no_wait_id = pthread_self(); dev->blocked_by = get_jobid_from_tsd(); + V(block_mutex); dev->Unlock(); + return true; } /* * Enter with device blocked by us but not locked * Exit with device locked, and blocked by previous owner */ -void _give_back_device_lock(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold) +void _give_back_device_block(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold) { Dmsg4(sd_dbglvl, "Return lock %s old=%s from %s:%d\n", dev->device->hdr.name, dev->print_blocked(), file, line); dev->Lock(); + P(block_mutex); dev->set_blocked(hold->dev_blocked); dev->dev_prev_blocked = hold->dev_prev_blocked; dev->no_wait_id = hold->no_wait_id; @@ -466,6 +478,7 @@ void _give_back_device_lock(const char *file, int line, DEVICE *dev, bsteal_lock if (dev->num_waiting > 0) { pthread_cond_broadcast(&dev->wait); /* wake them up */ } + V(block_mutex); } const char *DEVICE::print_blocked() const diff --git a/bacula/src/stored/lock.h b/bacula/src/stored/lock.h index bc0a3a286b..5742f3db73 100644 --- a/bacula/src/stored/lock.h +++ b/bacula/src/stored/lock.h @@ -88,8 +88,8 @@ void _unlock_volumes(); #define block_device(d, s) _block_device(__FILE__, __LINE__, (d), s) #define unblock_device(d) _unblock_device(__FILE__, __LINE__, (d)) -#define steal_device_lock(d, p, s) _steal_device_lock(__FILE__, __LINE__, (d), (p), s) -#define give_back_device_lock(d, p) _give_back_device_lock(__FILE__, __LINE__, (d), (p)) +#define obtain_device_block(d, p, s) _obtain_device_block(__FILE__, __LINE__, (d), (p), s) +#define give_back_device_block(d, p) _give_back_device_block(__FILE__, __LINE__, (d), (p)) /* m_blocked states (mutually exclusive) */ enum { diff --git a/bacula/src/stored/protos.h b/bacula/src/stored/protos.h index cb9ab55d3a..7c7cddbc66 100644 --- a/bacula/src/stored/protos.h +++ b/bacula/src/stored/protos.h @@ -189,8 +189,8 @@ void _lock_device(const char *file, int line, DEVICE *dev); void _unlock_device(const char *file, int line, DEVICE *dev); void _block_device(const char *file, int line, DEVICE *dev, int state); void _unblock_device(const char *file, int line, DEVICE *dev); -void _steal_device_lock(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold, int state); -void _give_back_device_lock(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold); +bool _obtain_device_block(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold, int state); +void _give_back_device_block(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold); /* From match_bsr.c */ -- 2.39.5