#ifdef MDB_USE_POSIX_SEM
# define MDB_USE_HASH 1
#include <semaphore.h>
+#else
+#define MDB_USE_POSIX_MUTEX 1
+#endif
#endif
+
+#if defined(_WIN32) + defined(MDB_USE_POSIX_SEM) \
+ + defined(MDB_USE_POSIX_MUTEX) != 1
+# error "Ambiguous shared-lock implementation"
#endif
#ifdef USE_VALGRIND
#define ESECT
#endif
+#ifdef _MSC_VER
+#define CALL_CONV WINAPI
+#else
+#define CALL_CONV
+#endif
+
/** @defgroup internal LMDB Internals
* @{
*/
# define mdb_func_ "<mdb_unknown>"
#endif
+/* Internal error codes, not exposed outside liblmdb */
+#define MDB_NO_ROOT (MDB_LAST_ERRCODE + 10)
+#ifdef _WIN32
+#define MDB_OWNERDEAD ((int) WAIT_ABANDONED)
+#elif defined(MDB_USE_POSIX_MUTEX) && defined(EOWNERDEAD)
+#define MDB_OWNERDEAD EOWNERDEAD /**< #LOCK_MUTEX0() result if dead owner */
+#endif
+
+#ifdef MDB_OWNERDEAD
+#define MDB_ROBUST_SUPPORTED 1
+#endif
+
#ifdef _WIN32
#define MDB_USE_HASH 1
#define MDB_PIDLOCK 0
#define pthread_t HANDLE
#define pthread_mutex_t HANDLE
#define pthread_cond_t HANDLE
+typedef HANDLE mdb_mutex_t, mdb_mutexref_t;
#define pthread_key_t DWORD
#define pthread_self() GetCurrentThreadId()
#define pthread_key_create(x,y) \
#define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0)
#define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL)
#define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE)
-#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_rmutex)
-#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_rmutex)
-#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_wmutex)
-#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_wmutex)
+#define LOCK_MUTEX0(mutex) WaitForSingleObject(mutex, INFINITE)
+#define UNLOCK_MUTEX(mutex) ReleaseMutex(mutex)
+#define mdb_mutex_consistent(mutex) 0
#define getpid() GetCurrentProcessId()
#define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd))
#define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len))
#ifdef MDB_USE_POSIX_SEM
-#define LOCK_MUTEX_R(env) mdb_sem_wait((env)->me_rmutex)
-#define UNLOCK_MUTEX_R(env) sem_post((env)->me_rmutex)
-#define LOCK_MUTEX_W(env) mdb_sem_wait((env)->me_wmutex)
-#define UNLOCK_MUTEX_W(env) sem_post((env)->me_wmutex)
+typedef sem_t *mdb_mutex_t, *mdb_mutexref_t;
+#define LOCK_MUTEX0(mutex) mdb_sem_wait(mutex)
+#define UNLOCK_MUTEX(mutex) sem_post(mutex)
static int
mdb_sem_wait(sem_t *sem)
return rc;
}
-#else
- /** Lock the reader mutex.
+#else /* MDB_USE_POSIX_MUTEX: */
+ /** Shared mutex/semaphore as it is stored (mdb_mutex_t), and as
+ * local variables keep it (mdb_mutexref_t).
+ *
+ * When #mdb_mutexref_t is a pointer declaration and #mdb_mutex_t is
+ * not, then it is array[size 1] so it can be assigned to a pointer.
+ * @{
*/
-#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_txns->mti_mutex)
- /** Unlock the reader mutex.
+typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t;
+ /* @} */
+ /** Lock the reader or writer mutex.
+ * Returns 0 or a code to give #mdb_mutex_failed(), as in #LOCK_MUTEX().
*/
-#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_txns->mti_mutex)
-
- /** Lock the writer mutex.
- * Only a single write transaction is allowed at a time. Other writers
- * will block waiting for this mutex.
+#define LOCK_MUTEX0(mutex) pthread_mutex_lock(mutex)
+ /** Unlock the reader or writer mutex.
*/
-#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_txns->mti_wmutex)
- /** Unlock the writer mutex.
+#define UNLOCK_MUTEX(mutex) pthread_mutex_unlock(mutex)
+ /** Mark mutex-protected data as repaired, after death of previous owner.
*/
-#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_txns->mti_wmutex)
+#define mdb_mutex_consistent(mutex) pthread_mutex_consistent(mutex)
#endif /* MDB_USE_POSIX_SEM */
/** Get the error code for the last failed system function.
/** @} */
+#ifdef MDB_ROBUST_SUPPORTED
+ /** Lock mutex, handle any error, set rc = result.
+ * Return 0 on success, nonzero (not rc) on error.
+ */
+#define LOCK_MUTEX(rc, env, mutex) \
+ (((rc) = LOCK_MUTEX0(mutex)) && \
+ ((rc) = mdb_mutex_failed(env, mutex, rc)))
+static int mdb_mutex_failed(MDB_env *env, mdb_mutexref_t mutex, int rc);
+#else
+#define LOCK_MUTEX(rc, env, mutex) ((rc) = LOCK_MUTEX0(mutex))
+#define mdb_mutex_failed(env, mutex, rc) (rc)
+#endif
+
#ifndef _WIN32
/** A flag for opening a file and requesting synchronous data writes.
* This is only used when writing a meta page. It's not strictly needed;
char mtb_rmname[MNAME_LEN];
#else
/** Mutex protecting access to this table.
- * This is the reader lock that #LOCK_MUTEX_R acquires.
+ * This is the reader table lock used with LOCK_MUTEX().
*/
- pthread_mutex_t mtb_mutex;
+ mdb_mutex_t mtb_rmutex;
#endif
/** The ID of the last transaction committed to the database.
* This is recorded here only for convenience; the value can always
MDB_txbody mtb;
#define mti_magic mt1.mtb.mtb_magic
#define mti_format mt1.mtb.mtb_format
-#define mti_mutex mt1.mtb.mtb_mutex
+#define mti_rmutex mt1.mtb.mtb_rmutex
#define mti_rmname mt1.mtb.mtb_rmname
#define mti_txnid mt1.mtb.mtb_txnid
#define mti_numreaders mt1.mtb.mtb_numreaders
char mt2_wmname[MNAME_LEN];
#define mti_wmname mt2.mt2_wmname
#else
- pthread_mutex_t mt2_wmutex;
+ mdb_mutex_t mt2_wmutex;
#define mti_wmutex mt2.mt2_wmutex
#endif
char pad[(MNAME_LEN+CACHELINE-1) & ~(CACHELINE-1)];
* @ingroup internal
* @{
*/
-#define MDB_TXN_RDONLY 0x01 /**< read-only transaction */
+ /** #mdb_txn_begin() flags */
+#define MDB_TXN_BEGIN_FLAGS MDB_RDONLY
+#define MDB_TXN_RDONLY MDB_RDONLY /**< read-only transaction */
+ /* internal txn flags */
+#define MDB_TXN_WRITEMAP MDB_WRITEMAP /**< copy of #MDB_env flag in writers */
#define MDB_TXN_ERROR 0x02 /**< txn is unusable after an error */
#define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */
#define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */
unsigned int me_psize; /**< DB page size, inited from me_os_psize */
unsigned int me_os_psize; /**< OS page size, from #GET_PAGESIZE */
unsigned int me_maxreaders; /**< size of the reader table */
- unsigned int me_numreaders; /**< max numreaders set by this env */
+ /** Max #MDB_txninfo.%mti_numreaders of interest to #mdb_env_close() */
+ volatile int me_close_readers;
MDB_dbi me_numdbs; /**< number of DBs opened */
MDB_dbi me_maxdbs; /**< size of the DB table */
MDB_PID_T me_pid; /**< process ID of this env */
int me_live_reader; /**< have liveness lock in reader table */
#ifdef _WIN32
int me_pidquery; /**< Used in OpenProcess */
- HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */
- HANDLE me_wmutex;
-#elif defined(MDB_USE_POSIX_SEM)
- sem_t *me_rmutex; /* Shared mutexes are not supported */
- sem_t *me_wmutex;
+#endif
+#ifdef MDB_USE_POSIX_MUTEX /* Posix mutexes reside in shared mem */
+# define me_rmutex me_txns->mti_rmutex /**< Shared reader lock */
+# define me_wmutex me_txns->mti_wmutex /**< Shared writer lock */
+#else
+ mdb_mutex_t me_rmutex;
+ mdb_mutex_t me_wmutex;
#endif
void *me_userctx; /**< User-settable context */
MDB_assert_func *me_assert_func; /**< Callback for assertion failures */
static int mdb_env_read_header(MDB_env *env, MDB_meta *meta);
static int mdb_env_pick_meta(const MDB_env *env);
static int mdb_env_write_meta(MDB_txn *txn);
-#if !(defined(_WIN32) || defined(MDB_USE_POSIX_SEM)) /* Drop unused excl arg */
+#ifdef MDB_USE_POSIX_MUTEX /* Drop unused excl arg */
# define mdb_env_close0(env, excl) mdb_env_close1(env)
#endif
static void mdb_env_close0(MDB_env *env, int excl);
static void mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx);
static void mdb_xcursor_init0(MDB_cursor *mc);
static void mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node);
+static void mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int force);
static int mdb_drop0(MDB_cursor *mc, int subs);
static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi);
+static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead);
/** @cond */
static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int, mdb_cmp_cint, mdb_cmp_long;
"MDB_NOTFOUND: No matching key/data pair found",
"MDB_PAGE_NOTFOUND: Requested page not found",
"MDB_CORRUPTED: Located page was wrong type",
- "MDB_PANIC: Update of meta page failed",
+ "MDB_PANIC: Update of meta page failed or environment had fatal error",
"MDB_VERSION_MISMATCH: Database environment version mismatch",
"MDB_INVALID: File is not an LMDB file",
"MDB_MAP_FULL: Environment mapsize limit reached",
MDB_ID2 mid;
int rc, (*insert)(MDB_ID2L, MDB_ID2 *);
- if (txn->mt_env->me_flags & MDB_WRITEMAP) {
+ if (txn->mt_flags & MDB_TXN_WRITEMAP) {
insert = mdb_mid2l_append;
} else {
insert = mdb_mid2l_insert;
} else {
MDB_PID_T pid = env->me_pid;
MDB_THR_T tid = pthread_self();
+ mdb_mutexref_t rmutex = env->me_rmutex;
if (!env->me_live_reader) {
rc = mdb_reader_pid(env, Pidset, pid);
env->me_live_reader = 1;
}
- LOCK_MUTEX_R(env);
+ if (LOCK_MUTEX(rc, env, rmutex))
+ return rc;
nr = ti->mti_numreaders;
for (i=0; i<nr; i++)
if (ti->mti_readers[i].mr_pid == 0)
break;
if (i == env->me_maxreaders) {
- UNLOCK_MUTEX_R(env);
+ UNLOCK_MUTEX(rmutex);
return MDB_READERS_FULL;
}
- ti->mti_readers[i].mr_pid = pid;
- ti->mti_readers[i].mr_tid = tid;
+ r = &ti->mti_readers[i];
+ /* Claim the reader slot, carefully since other code
+ * uses the reader table un-mutexed: First reset the
+ * slot, next publish it in mti_numreaders. After
+ * that, it is safe for mdb_env_close() to touch it.
+ * When it will be closed, we can finally claim it.
+ */
+ r->mr_pid = 0;
+ r->mr_txnid = (txnid_t)-1;
+ r->mr_tid = tid;
if (i == nr)
ti->mti_numreaders = ++nr;
- /* Save numreaders for un-mutexed mdb_env_close() */
- env->me_numreaders = nr;
- UNLOCK_MUTEX_R(env);
+ env->me_close_readers = nr;
+ r->mr_pid = pid;
+ UNLOCK_MUTEX(rmutex);
- r = &ti->mti_readers[i];
new_notls = (env->me_flags & MDB_NOTLS);
if (!new_notls && (rc=pthread_setspecific(env->me_txkey, r))) {
r->mr_pid = 0;
}
txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */
} else {
+ /* Not yet touching txn == env->me_txn0, it may be active */
if (ti) {
- LOCK_MUTEX_W(env);
-
+ if (LOCK_MUTEX(rc, env, env->me_wmutex))
+ return rc;
txn->mt_txnid = ti->mti_txnid;
meta = env->me_metas[txn->mt_txnid & 1];
} else {
{
MDB_txn *txn;
MDB_ntxn *ntxn;
- int rc, size, tsize = sizeof(MDB_txn);
+ int rc, size, tsize;
+
+ flags &= MDB_TXN_BEGIN_FLAGS;
+ flags |= env->me_flags & MDB_WRITEMAP;
if (env->me_flags & MDB_FATAL_ERROR) {
DPUTS("environment had fatal error, must shutdown!");
return MDB_PANIC;
}
- if ((env->me_flags & MDB_RDONLY) && !(flags & MDB_RDONLY))
+ if (env->me_flags & MDB_RDONLY & ~flags) /* write txn in RDONLY env */
return EACCES;
+
if (parent) {
/* Nested transactions: Max 1 child, write txns only, no writemap */
+ flags |= parent->mt_flags;
if (parent->mt_child ||
- (flags & MDB_RDONLY) ||
- (parent->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) ||
- (env->me_flags & MDB_WRITEMAP))
+ (flags & (MDB_RDONLY|MDB_WRITEMAP|MDB_TXN_ERROR)))
{
return (parent->mt_flags & MDB_TXN_RDONLY) ? EINVAL : MDB_BAD_TXN;
}
- tsize = sizeof(MDB_ntxn);
- }
- size = tsize;
- if (!(flags & MDB_RDONLY)) {
- if (!parent) {
- txn = env->me_txn0; /* just reuse preallocated write txn */
- goto ok;
- }
- /* child txns use own copy of cursors */
- size += env->me_maxdbs * sizeof(MDB_cursor *);
+ /* Child txns save MDB_pgstate and use own copy of cursors */
+ size = env->me_maxdbs * (sizeof(MDB_db)+sizeof(MDB_cursor *)+1);
+ size += tsize = sizeof(MDB_ntxn);
+ } else if (flags & MDB_RDONLY) {
+ size = env->me_maxdbs * (sizeof(MDB_db)+1);
+ size += tsize = sizeof(MDB_txn);
+ } else {
+ /* Reuse preallocated write txn. However, do not touch it until
+ * mdb_txn_renew0() succeeds, since it currently may be active.
+ */
+ txn = env->me_txn0;
+ goto renew;
}
- size += env->me_maxdbs * (sizeof(MDB_db)+1);
-
if ((txn = calloc(1, size)) == NULL) {
DPRINTF(("calloc: %s", strerror(errno)));
return ENOMEM;
}
txn->mt_dbs = (MDB_db *) ((char *)txn + tsize);
- if (flags & MDB_RDONLY) {
- txn->mt_flags |= MDB_TXN_RDONLY;
- txn->mt_dbflags = (unsigned char *)(txn->mt_dbs + env->me_maxdbs);
- txn->mt_dbiseqs = env->me_dbiseqs;
- } else {
- txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs);
- if (parent) {
- txn->mt_dbiseqs = parent->mt_dbiseqs;
- txn->mt_dbflags = (unsigned char *)(txn->mt_cursors + env->me_maxdbs);
- } else {
- txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs);
- txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs);
- }
- }
+ txn->mt_dbflags = (unsigned char *)txn + size - env->me_maxdbs;
+ txn->mt_flags = flags;
txn->mt_env = env;
-ok:
if (parent) {
unsigned int i;
+ txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs);
+ txn->mt_dbiseqs = parent->mt_dbiseqs;
txn->mt_u.dirty_list = malloc(sizeof(MDB_ID2)*MDB_IDL_UM_SIZE);
if (!txn->mt_u.dirty_list ||
!(txn->mt_free_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX)))
parent->mt_child = txn;
txn->mt_parent = parent;
txn->mt_numdbs = parent->mt_numdbs;
- txn->mt_flags = parent->mt_flags;
txn->mt_dbxs = parent->mt_dbxs;
memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db));
/* Copy parent's mt_dbflags, but clear DB_NEW */
rc = mdb_cursor_shadow(parent, txn);
if (rc)
mdb_txn_reset0(txn, "beginchild-fail");
- } else {
+ } else { /* MDB_RDONLY */
+ txn->mt_dbiseqs = env->me_dbiseqs;
+renew:
rc = mdb_txn_renew0(txn);
}
if (rc) {
if (txn != env->me_txn0)
free(txn);
} else {
+ txn->mt_flags |= flags; /* could not change txn=me_txn0 earlier */
*ret = txn;
DPRINTF(("begin txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
- txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
+ txn->mt_txnid, (flags & MDB_RDONLY) ? 'r' : 'w',
(void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root));
}
return txn->mt_env;
}
+size_t
+mdb_txn_id(MDB_txn *txn)
+{
+ if(!txn) return 0;
+ return txn->mt_txnid;
+}
+
/** Export or close DBI handles opened in this txn. */
static void
mdb_dbis_update(MDB_txn *txn, int keep)
env->me_txn = NULL;
/* The writer mutex was locked in mdb_txn_begin. */
if (env->me_txns)
- UNLOCK_MUTEX_W(env);
+ UNLOCK_MUTEX(env->me_wmutex);
} else {
txn->mt_parent->mt_child = NULL;
env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate;
unsigned int i;
MDB_env *env;
- if (txn == NULL || txn->mt_env == NULL)
+ if (txn == NULL)
return EINVAL;
if (txn->mt_child) {
rc = mdb_txn_commit(txn->mt_child);
- txn->mt_child = NULL;
if (rc)
goto fail;
}
mdb_dbis_update(txn, 1);
if (env->me_txns)
- UNLOCK_MUTEX_W(env);
+ UNLOCK_MUTEX(env->me_wmutex);
if (txn != env->me_txn0)
free(txn);
return 0;
}
+/** Fill in most of the zeroed #MDB_meta for an empty database environment */
static void ESECT
mdb_env_init_meta0(MDB_env *env, MDB_meta *meta)
{
/** Write the environment parameters of a freshly created DB environment.
* @param[in] env the environment handle
- * @param[out] meta address of where to store the meta information
+ * @param[in] meta the #MDB_meta to write
* @return 0 on success, non-zero on failure.
*/
static int ESECT
psize = env->me_psize;
- mdb_env_init_meta0(env, meta);
-
p = calloc(2, psize);
if (!p)
return ENOMEM;
{
MDB_env *env;
MDB_meta meta, metab, *mp;
+ unsigned flags;
size_t mapsize;
off_t off;
int rc, len, toggle;
toggle, txn->mt_dbs[MAIN_DBI].md_root));
env = txn->mt_env;
+ flags = env->me_flags;
mp = env->me_metas[toggle];
mapsize = env->me_metas[toggle ^ 1]->mm_mapsize;
/* Persist any increases of mapsize config */
if (mapsize < env->me_mapsize)
mapsize = env->me_mapsize;
- if (env->me_flags & MDB_WRITEMAP) {
+ if (flags & MDB_WRITEMAP) {
mp->mm_mapsize = mapsize;
mp->mm_dbs[0] = txn->mt_dbs[0];
mp->mm_dbs[1] = txn->mt_dbs[1];
mp->mm_last_pg = txn->mt_next_pgno - 1;
+#if (__GNUC__ * 100 + __GNUC_MINOR__ >= 404) && /* TODO: portability */ \
+ !(defined(__i386__) || defined(__x86_64__))
+ /* LY: issue a memory barrier, if not x86. ITS#7969 */
+ __sync_synchronize();
+#endif
mp->mm_txnid = txn->mt_txnid;
- if (!(env->me_flags & (MDB_NOMETASYNC|MDB_NOSYNC))) {
+ if (!(flags & (MDB_NOMETASYNC|MDB_NOSYNC))) {
unsigned meta_size = env->me_psize;
rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC;
ptr = env->me_map;
off += PAGEHDRSZ;
/* Write to the SYNC fd */
- mfd = env->me_flags & (MDB_NOSYNC|MDB_NOMETASYNC) ?
- env->me_fd : env->me_mfd;
+ mfd = (flags & (MDB_NOSYNC|MDB_NOMETASYNC)) ? env->me_fd : env->me_mfd;
#ifdef _WIN32
{
memset(&ov, 0, sizeof(ov));
*/
if (env->me_map) {
int rc;
+ MDB_meta *meta;
void *old;
if (env->me_txn)
return EINVAL;
+ meta = env->me_metas[mdb_env_pick_meta(env)];
if (!size)
- size = env->me_metas[mdb_env_pick_meta(env)]->mm_mapsize;
- else if (size < env->me_mapsize) {
- /* If the configured size is smaller, make sure it's
- * still big enough. Silently round up to minimum if not.
- */
- size_t minsize = (env->me_metas[mdb_env_pick_meta(env)]->mm_last_pg + 1) * env->me_psize;
+ size = meta->mm_mapsize;
+ {
+ /* Silently round up to minimum if the size is too small */
+ size_t minsize = (meta->mm_last_pg + 1) * env->me_psize;
if (size < minsize)
size = minsize;
}
else
env->me_pidquery = PROCESS_QUERY_INFORMATION;
#endif /* _WIN32 */
+
#ifdef BROKEN_FDATASYNC
/* ext3/ext4 fdatasync is broken on some older Linux kernels.
* https://lkml.org/lkml/2012/9/3/83
}
#endif
- memset(&meta, 0, sizeof(meta));
-
if ((i = mdb_env_read_header(env, &meta)) != 0) {
if (i != ENOENT)
return i;
env->me_psize = env->me_os_psize;
if (env->me_psize > MAX_PAGESIZE)
env->me_psize = MAX_PAGESIZE;
+ memset(&meta, 0, sizeof(meta));
+ mdb_env_init_meta0(env, &meta);
+ meta.mm_mapsize = DEFAULT_MAPSIZE;
} else {
env->me_psize = meta.mm_psize;
}
/* Was a mapsize configured? */
if (!env->me_mapsize) {
- /* If this is a new environment, take the default,
- * else use the size recorded in the existing env.
- */
- env->me_mapsize = newenv ? DEFAULT_MAPSIZE : meta.mm_mapsize;
- } else if (env->me_mapsize < meta.mm_mapsize) {
- /* If the configured size is smaller, make sure it's
- * still big enough. Silently round up to minimum if not.
+ env->me_mapsize = meta.mm_mapsize;
+ }
+ {
+ /* Make sure mapsize >= committed data size. Even when using
+ * mm_mapsize, which could be broken in old files (ITS#7789).
*/
size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize;
if (env->me_mapsize < minsize)
env->me_mapsize = minsize;
}
+ meta.mm_mapsize = env->me_mapsize;
+
+ if (newenv && !(flags & MDB_FIXEDMAP)) {
+ /* mdb_env_map() may grow the datafile. Write the metapages
+ * first, so the file will be valid if initialization fails.
+ * Except with FIXEDMAP, since we do not yet know mm_address.
+ * We could fill in mm_address later, but then a different
+ * program might end up doing that - one with a memory layout
+ * and map address which does not suit the main program.
+ */
+ rc = mdb_env_init_meta(env, &meta);
+ if (rc)
+ return rc;
+ newenv = 0;
+ }
rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL);
if (rc)
if (!rc) {
*excl = 1;
} else
-# ifdef MDB_USE_POSIX_SEM
- if (*excl < 0) /* always true when !MDB_USE_POSIX_SEM */
+# ifndef MDB_USE_POSIX_MUTEX
+ if (*excl < 0) /* always true when MDB_USE_POSIX_MUTEX */
# endif
{
lock_info.l_type = F_RDLCK;
env->me_wmutex = sem_open(env->me_txns->mti_wmname,
O_CREAT|O_EXCL, mode, 1);
if (env->me_wmutex == SEM_FAILED) goto fail_errno;
-#else /* MDB_USE_POSIX_SEM */
+#else /* MDB_USE_POSIX_MUTEX: */
pthread_mutexattr_t mattr;
if ((rc = pthread_mutexattr_init(&mattr))
|| (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED))
- || (rc = pthread_mutex_init(&env->me_txns->mti_mutex, &mattr))
- || (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr)))
+#ifdef MDB_ROBUST_SUPPORTED
+ || (rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST))
+#endif
+ || (rc = pthread_mutex_init(env->me_txns->mti_rmutex, &mattr))
+ || (rc = pthread_mutex_init(env->me_txns->mti_wmutex, &mattr)))
goto fail;
pthread_mutexattr_destroy(&mattr);
#endif /* _WIN32 || MDB_USE_POSIX_SEM */
* environment and re-opening it with the new flags.
*/
#define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT)
-#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY|MDB_WRITEMAP| \
- MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD)
+#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY| \
+ MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD)
#if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS)
# error "Persistent DB flags & env flags overlap, but both go in mm_flags"
rc = ENOMEM;
goto leave;
}
+ env->me_dbxs[FREE_DBI].md_cmp = mdb_cmp_long; /* aligned MDB_INTEGERKEY */
/* For RDONLY, get lockfile after we know datafile exists */
if (!(flags & (MDB_RDONLY|MDB_NOLOCK))) {
if (rc)
goto leave;
}
- if (!((flags & MDB_RDONLY) ||
- (env->me_pbuf = calloc(1, env->me_psize))))
- rc = ENOMEM;
if (!(flags & MDB_RDONLY)) {
MDB_txn *txn;
int tsize = sizeof(MDB_txn), size = tsize + env->me_maxdbs *
(sizeof(MDB_db)+sizeof(MDB_cursor *)+sizeof(unsigned int)+1);
- txn = calloc(1, size);
- if (txn) {
+ if ((env->me_pbuf = calloc(1, env->me_psize)) &&
+ (txn = calloc(1, size)))
+ {
txn->mt_dbs = (MDB_db *)((char *)txn + tsize);
txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs);
txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs);
MDB_PID_T pid = env->me_pid;
/* Clearing readers is done in this function because
* me_txkey with its destructor must be disabled first.
+ *
+ * We skip the the reader mutex, so we touch only
+ * data owned by this process (me_close_readers and
+ * our readers), and clear each reader atomically.
*/
- for (i = env->me_numreaders; --i >= 0; )
+ for (i = env->me_close_readers; --i >= 0; )
if (env->me_txns->mti_readers[i].mr_pid == pid)
env->me_txns->mti_readers[i].mr_pid = 0;
#ifdef _WIN32
env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY);
}
-
void ESECT
mdb_env_close(MDB_env *env)
{
MDB_page *p = NULL;
int level;
- if (!((txn->mt_flags & MDB_TXN_RDONLY) | (env->me_flags & MDB_WRITEMAP))) {
+ if (! (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_WRITEMAP))) {
MDB_txn *tx2 = txn;
level = 1;
do {
mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
unsigned int flags)
{
- enum { MDB_NO_ROOT = MDB_LAST_ERRCODE+10 }; /* internal code */
MDB_env *env;
MDB_node *leaf = NULL;
- MDB_page *fp, *mp;
+ MDB_page *fp, *mp, *sub_root = NULL;
uint16_t fp_flags;
MDB_val xdata, *rdata, dkey, olddata;
MDB_db dummy;
offset = env->me_psize - olddata.mv_size;
flags |= F_DUPDATA|F_SUBDATA;
dummy.md_root = mp->mp_pgno;
+ sub_root = mp;
}
if (mp != fp) {
mp->mp_flags = fp_flags | P_DIRTY;
* DB are all zero size.
*/
if (do_sub) {
- int xflags;
+ int xflags, new_dupdata;
size_t ecount;
put_sub:
xdata.mv_size = 0;
xflags = (flags & MDB_NODUPDATA) ?
MDB_NOOVERWRITE|MDB_NOSPILL : MDB_NOSPILL;
}
+ if (sub_root)
+ mc->mc_xcursor->mx_cursor.mc_pg[0] = sub_root;
+ new_dupdata = (int)dkey.mv_size;
/* converted, write the original data first */
if (dkey.mv_size) {
rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags);
if (rc)
goto bad_sub;
- {
- /* Adjust other cursors pointing to mp */
- MDB_cursor *m2;
- unsigned i = mc->mc_top;
- MDB_page *mp = mc->mc_pg[i];
-
- for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) {
- if (m2 == mc || m2->mc_snum < mc->mc_snum) continue;
- if (!(m2->mc_flags & C_INITIALIZED)) continue;
- if (m2->mc_pg[i] == mp && m2->mc_ki[i] == mc->mc_ki[i]) {
- mdb_xcursor_init1(m2, leaf);
+ /* we've done our job */
+ dkey.mv_size = 0;
+ }
+ if (!(leaf->mn_flags & F_SUBDATA) || sub_root) {
+ /* Adjust other cursors pointing to mp */
+ MDB_cursor *m2;
+ MDB_xcursor *mx = mc->mc_xcursor;
+ unsigned i = mc->mc_top;
+ MDB_page *mp = mc->mc_pg[i];
+
+ for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) {
+ if (m2 == mc || m2->mc_snum < mc->mc_snum) continue;
+ if (!(m2->mc_flags & C_INITIALIZED)) continue;
+ if (m2->mc_pg[i] == mp) {
+ if (m2->mc_ki[i] == mc->mc_ki[i]) {
+ mdb_xcursor_init2(m2, mx, new_dupdata);
+ } else if (!insert_key) {
+ MDB_node *n2 = NODEPTR(mp, m2->mc_ki[i]);
+ if (!(n2->mn_flags & F_SUBDATA))
+ m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2);
}
}
}
- /* we've done our job */
- dkey.mv_size = 0;
}
ecount = mc->mc_xcursor->mx_db.md_entries;
if (flags & MDB_APPENDDUP)
mdb_node_shrink(mp, mc->mc_ki[mc->mc_top]);
leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
- /* fix other sub-DB cursors pointed at this fake page */
+ /* fix other sub-DB cursors pointed at fake pages on this page */
for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) {
if (m2 == mc || m2->mc_snum < mc->mc_snum) continue;
- if (m2->mc_pg[mc->mc_top] == mp &&
- m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top])
- m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
+ if (!(m2->mc_flags & C_INITIALIZED)) continue;
+ if (m2->mc_pg[mc->mc_top] == mp) {
+ if (m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top]) {
+ m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
+ } else {
+ MDB_node *n2 = NODEPTR(mp, m2->mc_ki[mc->mc_top]);
+ if (!(n2->mn_flags & F_SUBDATA))
+ m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2);
+ }
+ }
}
}
mc->mc_db->md_entries--;
mx->mx_cursor.mc_flags = C_SUB;
} else {
MDB_page *fp = NODEDATA(node);
- mx->mx_db.md_pad = mc->mc_pg[mc->mc_top]->mp_pad;
+ mx->mx_db.md_pad = 0;
mx->mx_db.md_flags = 0;
mx->mx_db.md_depth = 1;
mx->mx_db.md_branch_pages = 0;
#endif
}
+
+/** Fixup a sorted-dups cursor due to underlying update.
+ * Sets up some fields that depend on the data from the main cursor.
+ * Almost the same as init1, but skips initialization steps if the
+ * xcursor had already been used.
+ * @param[in] mc The main cursor whose sorted-dups cursor is to be fixed up.
+ * @param[in] src_mx The xcursor of an up-to-date cursor.
+ * @param[in] new_dupdata True if converting from a non-#F_DUPDATA item.
+ */
+static void
+mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int new_dupdata)
+{
+ MDB_xcursor *mx = mc->mc_xcursor;
+
+ if (new_dupdata) {
+ mx->mx_cursor.mc_snum = 1;
+ mx->mx_cursor.mc_top = 0;
+ mx->mx_cursor.mc_flags |= C_INITIALIZED;
+ mx->mx_cursor.mc_ki[0] = 0;
+ mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */
+#if UINT_MAX < SIZE_MAX
+ mx->mx_dbx.md_cmp = src_mx->mx_dbx.md_cmp;
+#endif
+ } else if (!(mx->mx_cursor.mc_flags & C_INITIALIZED)) {
+ return;
+ }
+ mx->mx_db = src_mx->mx_db;
+ mx->mx_cursor.mc_pg[0] = src_mx->mx_cursor.mc_pg[0];
+ DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi,
+ mx->mx_db.md_root));
+}
+
/** Initialize a cursor for a given transaction and database. */
static void
mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx)
/* Adjust other cursors pointing to mp */
MDB_cursor *m2, *m3;
MDB_dbi dbi = csrc->mc_dbi;
- MDB_page *mp = csrc->mc_pg[csrc->mc_top];
+ MDB_page *mp;
+
+ mp = cdst->mc_pg[csrc->mc_top];
+ for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
+ if (csrc->mc_flags & C_SUB)
+ m3 = &m2->mc_xcursor->mx_cursor;
+ else
+ m3 = m2;
+ if (m3 == cdst) continue;
+ if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] >=
+ cdst->mc_ki[csrc->mc_top]) {
+ m3->mc_ki[csrc->mc_top]++;
+ }
+ }
+ mp = csrc->mc_pg[csrc->mc_top];
for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
if (csrc->mc_flags & C_SUB)
m3 = &m2->mc_xcursor->mx_cursor;
uint16_t depth = cdst->mc_db->md_depth;
mdb_cursor_pop(cdst);
rc = mdb_rebalance(cdst);
- /* Did the tree shrink? */
- if (depth > cdst->mc_db->md_depth)
- snum--;
+ /* Did the tree height change? */
+ if (depth != cdst->mc_db->md_depth)
+ snum += cdst->mc_db->md_depth - depth;
cdst->mc_snum = snum;
cdst->mc_top = snum-1;
}
*/
if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= thresh && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) {
rc = mdb_node_move(&mn, mc);
- if (mc->mc_ki[ptop]) {
+ if (mc->mc_ki[mc->mc_top-1]) {
oldki++;
}
} else {
MDB_page *mp;
indx_t ki;
unsigned int nkeys;
+ MDB_cursor *m2, *m3;
+ MDB_dbi dbi = mc->mc_dbi;
ki = mc->mc_ki[mc->mc_top];
+ mp = mc->mc_pg[mc->mc_top];
mdb_node_del(mc, mc->mc_db->md_pad);
mc->mc_db->md_entries--;
+ {
+ /* Adjust other cursors pointing to mp */
+ for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
+ m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
+ if (! (m2->mc_flags & m3->mc_flags & C_INITIALIZED))
+ continue;
+ if (m3 == mc || m3->mc_snum < mc->mc_snum)
+ continue;
+ if (m3->mc_pg[mc->mc_top] == mp) {
+ if (m3->mc_ki[mc->mc_top] >= ki) {
+ m3->mc_flags |= C_DEL;
+ if (m3->mc_ki[mc->mc_top] > ki)
+ m3->mc_ki[mc->mc_top]--;
+ else if (mc->mc_db->md_flags & MDB_DUPSORT)
+ m3->mc_xcursor->mx_cursor.mc_flags |= C_EOF;
+ }
+ }
+ }
+ }
rc = mdb_rebalance(mc);
if (rc == MDB_SUCCESS) {
- MDB_cursor *m2, *m3;
- MDB_dbi dbi = mc->mc_dbi;
-
/* DB is totally empty now, just bail out.
* Other cursors adjustments were already done
* by mdb_rebalance and aren't needed here.
mp = mc->mc_pg[mc->mc_top];
nkeys = NUMKEYS(mp);
- /* if mc points past last node in page, find next sibling */
- if (mc->mc_ki[mc->mc_top] >= nkeys) {
- rc = mdb_cursor_sibling(mc, 1);
- if (rc == MDB_NOTFOUND) {
- mc->mc_flags |= C_EOF;
- rc = MDB_SUCCESS;
- }
- }
-
/* Adjust other cursors pointing to mp */
for (m2 = mc->mc_txn->mt_cursors[dbi]; !rc && m2; m2=m2->mc_next) {
m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
if (! (m2->mc_flags & m3->mc_flags & C_INITIALIZED))
continue;
- if (m3 == mc || m3->mc_snum < mc->mc_snum)
+ if (m3->mc_snum < mc->mc_snum)
continue;
if (m3->mc_pg[mc->mc_top] == mp) {
- if (m3->mc_ki[mc->mc_top] >= ki) {
- m3->mc_flags |= C_DEL;
- if (m3->mc_ki[mc->mc_top] > ki)
- m3->mc_ki[mc->mc_top]--;
- else if (mc->mc_db->md_flags & MDB_DUPSORT)
- m3->mc_xcursor->mx_cursor.mc_flags |= C_EOF;
- }
+ /* if m3 points past last node in page, find next sibling */
if (m3->mc_ki[mc->mc_top] >= nkeys) {
rc = mdb_cursor_sibling(m3, 1);
if (rc == MDB_NOTFOUND) {
/* Create a right sibling. */
if ((rc = mdb_page_new(mc, mp->mp_flags, 1, &rp)))
return rc;
+ rp->mp_pad = mp->mp_pad;
DPRINTF(("new right sibling: page %"Z"u", rp->mp_pgno));
if (mc->mc_snum < 2) {
mc->mc_ki[0] = 0;
mc->mc_db->md_root = pp->mp_pgno;
DPRINTF(("root split! new root = %"Z"u", pp->mp_pgno));
- mc->mc_db->md_depth++;
- new_root = 1;
+ new_root = mc->mc_db->md_depth++;
/* Add left (implicit) pointer. */
if ((rc = mdb_node_add(mc, 0, NULL, NULL, mp->mp_pgno, 0)) != MDB_SUCCESS) {
if (new_root) {
int k;
/* root split */
- for (k=m3->mc_top; k>=0; k--) {
+ for (k=new_root; k>=0; k--) {
m3->mc_ki[k+1] = m3->mc_ki[k];
m3->mc_pg[k+1] = m3->mc_pg[k];
}
} mdb_copy;
/** Dedicated writer thread for compacting copy. */
-static THREAD_RET ESECT
+static THREAD_RET ESECT CALL_CONV
mdb_env_copythr(void *arg)
{
mdb_copy *my = arg;
mdb_env_copyfd0(MDB_env *env, HANDLE fd)
{
MDB_txn *txn = NULL;
+ mdb_mutexref_t wmutex = NULL;
int rc;
size_t wsize;
char *ptr;
mdb_txn_reset0(txn, "reset-stage1");
/* Temporarily block writers until we snapshot the meta pages */
- LOCK_MUTEX_W(env);
+ wmutex = env->me_wmutex;
+ if (LOCK_MUTEX(rc, env, wmutex))
+ goto leave;
rc = mdb_txn_renew0(txn);
if (rc) {
- UNLOCK_MUTEX_W(env);
+ UNLOCK_MUTEX(wmutex);
goto leave;
}
}
break;
}
}
- if (env->me_txns)
- UNLOCK_MUTEX_W(env);
+ if (wmutex)
+ UNLOCK_MUTEX(wmutex);
if (rc)
goto leave;
if (!env || !arg)
return EINVAL;
- *arg = env->me_flags;
+ *arg = env->me_flags & (CHANGEABLE|CHANGELESS);
return MDB_SUCCESS;
}
arg->me_mapaddr = env->me_metas[toggle]->mm_address;
arg->me_mapsize = env->me_mapsize;
arg->me_maxreaders = env->me_maxreaders;
-
- /* me_numreaders may be zero if this process never used any readers. Use
- * the shared numreader count if it exists.
- */
- arg->me_numreaders = env->me_txns ? env->me_txns->mti_numreaders : env->me_numreaders;
+ arg->me_numreaders = env->me_txns ? env->me_txns->mti_numreaders : 0;
arg->me_last_pgno = env->me_metas[toggle]->mm_last_pg;
arg->me_last_txnid = env->me_metas[toggle]->mm_txnid;
unsigned int unused = 0, seq;
size_t len;
- if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) {
- mdb_default_cmp(txn, FREE_DBI);
- }
-
if ((flags & VALID_FLAGS) != flags)
return EINVAL;
if (txn->mt_flags & MDB_TXN_ERROR)
MDB_cursor mx;
unsigned int i;
- /* LEAF2 pages have no nodes, cannot have sub-DBs */
- if (IS_LEAF2(mc->mc_pg[mc->mc_top]))
+ /* DUPSORT sub-DBs have no ovpages/DBs. Omit scanning leaves.
+ * This also avoids any P_LEAF2 pages, which have no nodes.
+ */
+ if (mc->mc_flags & C_SUB)
mdb_cursor_pop(mc);
mdb_cursor_copy(mc, &mx);
int ESECT
mdb_reader_check(MDB_env *env, int *dead)
{
- unsigned int i, j, rdrs;
- MDB_reader *mr;
- MDB_PID_T *pids, pid;
- int count = 0;
-
if (!env)
return EINVAL;
if (dead)
*dead = 0;
- if (!env->me_txns)
- return MDB_SUCCESS;
+ return env->me_txns ? mdb_reader_check0(env, 0, dead) : MDB_SUCCESS;
+}
+
+/** As #mdb_reader_check(). rlocked = <caller locked the reader mutex>. */
+static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead)
+{
+ mdb_mutexref_t rmutex = rlocked ? NULL : env->me_rmutex;
+ unsigned int i, j, rdrs;
+ MDB_reader *mr;
+ MDB_PID_T *pids, pid;
+ int rc = MDB_SUCCESS, count = 0;
+
rdrs = env->me_txns->mti_numreaders;
pids = malloc((rdrs+1) * sizeof(MDB_PID_T));
if (!pids)
pids[0] = 0;
mr = env->me_txns->mti_readers;
for (i=0; i<rdrs; i++) {
- if (mr[i].mr_pid && mr[i].mr_pid != env->me_pid) {
- pid = mr[i].mr_pid;
+ pid = mr[i].mr_pid;
+ if (pid && pid != env->me_pid) {
if (mdb_pid_insert(pids, pid) == 0) {
if (!mdb_reader_pid(env, Pidcheck, pid)) {
- LOCK_MUTEX_R(env);
- /* Recheck, a new process may have reused pid */
- if (!mdb_reader_pid(env, Pidcheck, pid)) {
- for (j=i; j<rdrs; j++)
+ /* Stale reader found */
+ j = i;
+ if (rmutex) {
+ if ((rc = LOCK_MUTEX0(rmutex)) != 0) {
+ if ((rc = mdb_mutex_failed(env, rmutex, rc)))
+ break;
+ rdrs = 0; /* the above checked all readers */
+ } else {
+ /* Recheck, a new process may have reused pid */
+ if (mdb_reader_pid(env, Pidcheck, pid))
+ j = rdrs;
+ }
+ }
+ for (; j<rdrs; j++)
if (mr[j].mr_pid == pid) {
DPRINTF(("clear stale reader pid %u txn %"Z"d",
(unsigned) pid, mr[j].mr_txnid));
mr[j].mr_pid = 0;
count++;
}
- }
- UNLOCK_MUTEX_R(env);
+ if (rmutex)
+ UNLOCK_MUTEX(rmutex);
}
}
}
free(pids);
if (dead)
*dead = count;
- return MDB_SUCCESS;
+ return rc;
+}
+
+#ifdef MDB_ROBUST_SUPPORTED
+/** Handle #LOCK_MUTEX0() failure.
+ * Try to repair the lock file if the mutex owner died.
+ * @param[in] env the environment handle
+ * @param[in] mutex LOCK_MUTEX0() mutex
+ * @param[in] rc LOCK_MUTEX0() error (nonzero)
+ * @return 0 on success with the mutex locked, or an error code on failure.
+ */
+static int mdb_mutex_failed(MDB_env *env, mdb_mutexref_t mutex, int rc)
+{
+ int toggle, rlocked, rc2;
+
+ if (rc == MDB_OWNERDEAD) {
+ /* We own the mutex. Clean up after dead previous owner. */
+ rc = MDB_SUCCESS;
+ rlocked = (mutex == env->me_rmutex);
+ if (!rlocked) {
+ /* Keep mti_txnid updated, otherwise next writer can
+ * overwrite data which latest meta page refers to.
+ */
+ toggle = mdb_env_pick_meta(env);
+ env->me_txns->mti_txnid = env->me_metas[toggle]->mm_txnid;
+ /* env is hosed if the dead thread was ours */
+ if (env->me_txn) {
+ env->me_flags |= MDB_FATAL_ERROR;
+ env->me_txn = NULL;
+ rc = MDB_PANIC;
+ }
+ }
+ DPRINTF(("%cmutex owner died, %s", (rlocked ? 'r' : 'w'),
+ (rc ? "this process' env is hosed" : "recovering")));
+ rc2 = mdb_reader_check0(env, rlocked, NULL);
+ if (rc2 == 0)
+ rc2 = mdb_mutex_consistent(mutex);
+ if (rc || (rc = rc2)) {
+ DPRINTF(("LOCK_MUTEX recovery failed, %s", mdb_strerror(rc)));
+ UNLOCK_MUTEX(mutex);
+ }
+ } else {
+#ifdef _WIN32
+ rc = ErrCode();
+#endif
+ DPRINTF(("LOCK_MUTEX failed, %s", mdb_strerror(rc)));
+ }
+
+ return rc;
}
+#endif /* MDB_ROBUST_SUPPORTED */
/** @} */