* cause further writes to grow the database quickly, and
* stale locks can block further operation.
*
- * Fix: Terminate all programs using the database, or make
- * them close it. Next database user will reset the lockfile.
+ * Fix: Check for stale readers periodically, using the
+ * #mdb_reader_check function or the mdb_stat tool. Or just
+ * make all programs using the database close it; the lockfile
+ * is always reset on first open of the environment.
*
* - On BSD systems or others configured with MDB_USE_POSIX_SEM,
* startup can fail due to semaphores owned by another userid.
* BSD systems or when otherwise configured with MDB_USE_POSIX_SEM.
* Multiple users can cause startup to fail later, as noted above.
*
+ * - There is normally no pure read-only mode, since readers need write
+ * access to locks and lock file. Exceptions: On read-only filesystems
+ * or with the #MDB_NOLOCK flag described under #mdb_env_open().
+ *
+ * - By default, in versions before 0.9.10, unused portions of the data
+ * file might receive garbage data from memory freed by other code.
+ * (This does not happen when using the #MDB_WRITEMAP flag.) As of
+ * 0.9.10 the default behavior is to initialize such memory before
+ * writing to the data file. Since there may be a slight performance
+ * cost due to this initialization, applications may disable it using
+ * the #MDB_NOMEMINIT flag. Applications handling sensitive data
+ * which must not be written should not use this flag. This flag is
+ * irrelevant when using #MDB_WRITEMAP.
+ *
* - A thread can only use one transaction at a time, plus any child
* transactions. Each transaction belongs to one thread. See below.
+ * The #MDB_NOTLS flag changes this for read-only transactions.
*
* - Use an MDB_env* in the process which opened it, without fork()ing.
*
* database can grow quickly. Write transactions prevent
* other write transactions, since writes are serialized.
*
- * ...when several processes can use a database concurrently:
- *
* - Avoid suspending a process with active transactions. These
- * would then be "long-lived" as above.
+ * would then be "long-lived" as above. Also read transactions
+ * suspended when writers commit could sometimes see wrong data.
+ *
+ * ...when several processes can use a database concurrently:
*
* - Avoid aborting a process with an active transaction.
- * The transaction becomes "long-lived" as above until the lockfile
- * is reset, since the process may not remove it from the lockfile.
+ * The transaction becomes "long-lived" as above until a check
+ * for stale readers is performed or the lockfile is reset,
+ * since the process may not remove it from the lockfile.
*
- * - If you do that anyway, close the environment once in a while,
- * so the lockfile can get reset.
+ * - If you do that anyway, do a periodic check for stale readers. Or
+ * close the environment once in a while, so the lockfile can get reset.
*
* - Do not use MDB databases on remote filesystems, even between
* processes on the same host. This breaks flock() on some OSes,
extern "C" {
#endif
+/** Unix permissions for creating files, or dummy definition for Windows */
+#ifdef _MSC_VER
+typedef int mdb_mode_t;
+#else
+typedef mode_t mdb_mode_t;
+#endif
+
+/** An abstraction for a file handle.
+ * On POSIX systems file handles are small integers. On Windows
+ * they're opaque pointers.
+ */
+#ifdef _WIN32
+typedef void *mdb_filehandle_t;
+#else
+typedef int mdb_filehandle_t;
+#endif
+
/** @defgroup mdb MDB API
* @{
* @brief OpenLDAP Lightning Memory-Mapped Database Manager
/** Library minor version */
#define MDB_VERSION_MINOR 9
/** Library patch version */
-#define MDB_VERSION_PATCH 6
+#define MDB_VERSION_PATCH 10
/** Combine args a,b,c into a single integer for easy version comparisons */
#define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c))
MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)
/** The release date of this library version */
-#define MDB_VERSION_DATE "January 10, 2013"
+#define MDB_VERSION_DATE "November 11, 2013"
/** A stringifier for the version info */
#define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" d ")"
/** @brief Opaque structure for navigating through a database */
typedef struct MDB_cursor MDB_cursor;
-/** @brief Generic structure used for passing keys and data in and out of the database. */
+/** @brief Generic structure used for passing keys and data in and out
+ * of the database.
+ *
+ * Values returned from the database are valid only until a subsequent
+ * update operation, or the end of the transaction. Do not modify or
+ * free them, they commonly point into the database itself.
+ *
+ * Key sizes must be between 1 and #mdb_env_get_maxkeysize() inclusive.
+ * The same applies to data sizes in databases with the #MDB_DUPSORT flag.
+ * Other data items can in theory be from 0 to 0xffffffff bytes long.
+ */
typedef struct MDB_val {
size_t mv_size; /**< size of the data item */
void *mv_data; /**< address of the data item */
* Values do not overlap Database Flags.
* @{
*/
- /** mmap at a fixed address */
+ /** mmap at a fixed address (experimental) */
#define MDB_FIXEDMAP 0x01
/** no environment directory */
#define MDB_NOSUBDIR 0x4000
#define MDB_NOMETASYNC 0x40000
/** use writable mmap */
#define MDB_WRITEMAP 0x80000
- /** use asynchronous msync */
+ /** use asynchronous msync when #MDB_WRITEMAP is used */
#define MDB_MAPASYNC 0x100000
+ /** tie reader locktable slots to #MDB_txn objects instead of to threads */
+#define MDB_NOTLS 0x200000
+ /** don't do any locking, caller must manage their own locks */
+#define MDB_NOLOCK 0x400000
+ /** don't do readahead (no effect on Windows) */
+#define MDB_NORDAHEAD 0x800000
+ /** don't initialize malloc'd memory before writing to datafile */
+#define MDB_NOMEMINIT 0x1000000
/** @} */
/** @defgroup mdb_dbi_open Database Flags
#define MDB_APPEND 0x20000
/** Duplicate data is being appended, don't split full pages. */
#define MDB_APPENDDUP 0x40000
-/** Store multiple data items in one call. */
+/** Store multiple data items in one call. Only for #MDB_DUPFIXED. */
#define MDB_MULTIPLE 0x80000
/* @} */
Only for #MDB_DUPSORT */
MDB_NEXT_MULTIPLE, /**< Return all duplicate data items at the next
cursor position. Only for #MDB_DUPFIXED */
- MDB_NEXT_NODUP, /**< Position at first data item of next key.
- Only for #MDB_DUPSORT */
+ MDB_NEXT_NODUP, /**< Position at first data item of next key */
MDB_PREV, /**< Position at previous data item */
MDB_PREV_DUP, /**< Position at previous data item of current key.
Only for #MDB_DUPSORT */
- MDB_PREV_NODUP, /**< Position at last data item of previous key.
- Only for #MDB_DUPSORT */
+ MDB_PREV_NODUP, /**< Position at last data item of previous key */
MDB_SET, /**< Position at specified key */
MDB_SET_KEY, /**< Position at specified key, return key + data */
MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */
#define MDB_READERS_FULL (-30790)
/** Too many TLS keys in use - Windows only */
#define MDB_TLS_FULL (-30789)
- /** Nested txn has too many dirty pages */
+ /** Txn has too many dirty pages */
#define MDB_TXN_FULL (-30788)
/** Cursor stack too deep - internal error */
#define MDB_CURSOR_FULL (-30787)
/** Page has not enough space - internal error */
#define MDB_PAGE_FULL (-30786)
-#define MDB_LAST_ERRCODE MDB_PAGE_FULL
+ /** Database contents grew beyond environment mapsize */
+#define MDB_MAP_RESIZED (-30785)
+ /** MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed */
+#define MDB_INCOMPATIBLE (-30784)
+ /** Invalid reuse of reader locktable slot */
+#define MDB_BAD_RSLOT (-30783)
+ /** Transaction cannot recover - it must be aborted */
+#define MDB_BAD_TXN (-30782)
+ /** Too big key/data, key is empty, or wrong DUPFIXED size */
+#define MDB_BAD_VALSIZE (-30781)
+#define MDB_LAST_ERRCODE MDB_BAD_VALSIZE
/** @} */
/** @brief Statistics for a database in the environment */
size_t me_mapsize; /**< Size of the data memory map */
size_t me_last_pgno; /**< ID of the last used page */
size_t me_last_txnid; /**< ID of the last committed transaction */
- unsigned int me_maxreaders; /**< maximum number of threads for the environment */
- unsigned int me_numreaders; /**< maximum number of threads used in the environment */
+ unsigned int me_maxreaders; /**< max reader slots in the environment */
+ unsigned int me_numreaders; /**< max reader slots used in the environment */
} MDB_envinfo;
/** @brief Return the mdb library version information.
* under that directory. With this option, \b path is used as-is for
* the database main data file. The database lock file is the \b path
* with "-lock" appended.
- * <li>#MDB_NOSYNC
- * Don't perform a synchronous flush after committing a transaction. This means
- * transactions will exhibit the ACI (atomicity, consistency, and isolation)
- * properties, but not D (durability); that is database integrity will be
- * maintained but it is possible some number of the most recently committed
- * transactions may be undone after a system crash. The number of transactions
- * at risk is governed by how often the system flushes dirty buffers to disk
- * and how often #mdb_env_sync() is called. This flag may be changed
- * at any time using #mdb_env_set_flags().
- * <li>#MDB_NOMETASYNC
- * Don't perform a synchronous flush of the meta page after committing
- * a transaction. This is similar to the #MDB_NOSYNC case, but safer
- * because the transaction data is still flushed. The meta page for any
- * transaction N will be flushed by the data flush of transaction N+1.
- * In case of a system crash, the last committed transaction may be
- * lost. This flag may be changed at any time using #mdb_env_set_flags().
* <li>#MDB_RDONLY
- * Open the environment in read-only mode. No write operations will be allowed.
+ * Open the environment in read-only mode. No write operations will be
+ * allowed. MDB will still modify the lock file - except on read-only
+ * filesystems, where MDB does not use locks.
+ * <li>#MDB_WRITEMAP
+ * Use a writeable memory map unless MDB_RDONLY is set. This is faster
+ * and uses fewer mallocs, but loses protection from application bugs
+ * like wild pointer writes and other bad updates into the database.
+ * Incompatible with nested transactions.
+ * Processes with and without MDB_WRITEMAP on the same environment do
+ * not cooperate well.
+ * <li>#MDB_NOMETASYNC
+ * Flush system buffers to disk only once per transaction, omit the
+ * metadata flush. Defer that until the system flushes files to disk,
+ * or next non-MDB_RDONLY commit or #mdb_env_sync(). This optimization
+ * maintains database integrity, but a system crash may undo the last
+ * committed transaction. I.e. it preserves the ACI (atomicity,
+ * consistency, isolation) but not D (durability) database property.
+ * This flag may be changed at any time using #mdb_env_set_flags().
+ * <li>#MDB_NOSYNC
+ * Don't flush system buffers to disk when committing a transaction.
+ * This optimization means a system crash can corrupt the database or
+ * lose the last transactions if buffers are not yet flushed to disk.
+ * The risk is governed by how often the system flushes dirty buffers
+ * to disk and how often #mdb_env_sync() is called. However, if the
+ * filesystem preserves write order and the #MDB_WRITEMAP flag is not
+ * used, transactions exhibit ACI (atomicity, consistency, isolation)
+ * properties and only lose D (durability). I.e. database integrity
+ * is maintained, but a system crash may undo the final transactions.
+ * Note that (#MDB_NOSYNC | #MDB_WRITEMAP) leaves the system with no
+ * hint for when to write transactions to disk, unless #mdb_env_sync()
+ * is called. (#MDB_MAPASYNC | #MDB_WRITEMAP) may be preferable.
+ * This flag may be changed at any time using #mdb_env_set_flags().
+ * <li>#MDB_MAPASYNC
+ * When using #MDB_WRITEMAP, use asynchronous flushes to disk.
+ * As with #MDB_NOSYNC, a system crash can then corrupt the
+ * database or lose the last transactions. Calling #mdb_env_sync()
+ * ensures on-disk database integrity until next commit.
+ * This flag may be changed at any time using #mdb_env_set_flags().
+ * <li>#MDB_NOTLS
+ * Don't use Thread-Local Storage. Tie reader locktable slots to
+ * #MDB_txn objects instead of to threads. I.e. #mdb_txn_reset() keeps
+ * the slot reseved for the #MDB_txn object. A thread may use parallel
+ * read-only transactions. A read-only transaction may span threads if
+ * the user synchronizes its use. Applications that multiplex many
+ * user threads over individual OS threads need this option. Such an
+ * application must also serialize the write transactions in an OS
+ * thread, since MDB's write locking is unaware of the user threads.
+ * <li>#MDB_NOLOCK
+ * Don't do any locking. If concurrent access is anticipated, the
+ * caller must manage all concurrency itself. For proper operation
+ * the caller must enforce single-writer semantics, and must ensure
+ * that no readers are using old transactions while a writer is
+ * active. The simplest approach is to use an exclusive lock so that
+ * no readers may be active at all when a writer begins.
+ * <li>#MDB_NORDAHEAD
+ * Turn off readahead. Most operating systems perform readahead on
+ * read requests by default. This option turns it off if the OS
+ * supports it. Turning it off may help random read performance
+ * when the DB is larger than RAM and system RAM is full.
+ * The option is not implemented on Windows.
+ * <li>#MDB_NOMEMINIT
+ * Don't initialize malloc'd memory before writing to unused spaces
+ * in the data file. By default, memory for pages written to the data
+ * file is obtained using malloc. While these pages may be reused in
+ * subsequent transactions, freshly malloc'd pages will be initialized
+ * to zeroes before use. This avoids persisting leftover data from other
+ * code (that used the heap and subsequently freed the memory) into the
+ * data file. Note that many other system libraries may allocate
+ * and free memory from the heap for arbitrary uses. E.g., stdio may
+ * use the heap for file I/O buffers. This initialization step comes
+ * at some performance cost so some applications may want to disable
+ * it using this flag. This option can be a problem for applications
+ * which handle sensitive data like passwords, and it makes memory
+ * checkers like Valgrind noisy. This flag is not needed with #MDB_WRITEMAP,
+ * which writes directly to the mmap instead of using malloc for pages. The
+ * initialization is also skipped if #MDB_RESERVE is used; the
+ * caller is expected to overwrite all of the memory that was
+ * reserved in that case.
+ * This flag may be changed at any time using #mdb_env_set_flags().
* </ul>
* @param[in] mode The UNIX permissions to set on created files. This parameter
* is ignored on Windows.
* <ul>
* <li>#MDB_VERSION_MISMATCH - the version of the MDB library doesn't match the
* version that created the database environment.
- * <li>EINVAL - the environment file headers are corrupted.
+ * <li>#MDB_INVALID - the environment file headers are corrupted.
* <li>ENOENT - the directory specified by the path parameter doesn't exist.
* <li>EACCES - the user didn't have permission to access the environment files.
* <li>EAGAIN - the environment was locked by another process.
* </ul>
*/
-int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode);
+int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode);
/** @brief Copy an MDB environment to the specified path.
*
* This function may be used to make a backup of an existing environment.
+ * No lockfile is created, since it gets recreated at need.
+ * @note This call can trigger significant file size growth if run in
+ * parallel with write transactions, because it employs a read-only
+ * transaction. See long-lived transactions under @ref caveats_sec.
* @param[in] env An environment handle returned by #mdb_env_create(). It
* must have already been opened successfully.
* @param[in] path The directory in which the copy will reside. This
*/
int mdb_env_copy(MDB_env *env, const char *path);
+ /** @brief Copy an MDB environment to the specified file descriptor.
+ *
+ * This function may be used to make a backup of an existing environment.
+ * No lockfile is created, since it gets recreated at need.
+ * @note This call can trigger significant file size growth if run in
+ * parallel with write transactions, because it employs a read-only
+ * transaction. See long-lived transactions under @ref caveats_sec.
+ * @param[in] env An environment handle returned by #mdb_env_create(). It
+ * must have already been opened successfully.
+ * @param[in] fd The filedescriptor to write the copy to. It must
+ * have already been opened for Write access.
+ * @return A non-zero error value on failure and 0 on success.
+ */
+int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd);
+
/** @brief Return statistics about the MDB environment.
*
* @param[in] env An environment handle returned by #mdb_env_create()
* Data is always written to disk when #mdb_txn_commit() is called,
* but the operating system may keep it buffered. MDB always flushes
* the OS buffers upon commit as well, unless the environment was
- * opened with #MDB_NOSYNC.
+ * opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] force If non-zero, force a synchronous flush. Otherwise
* if the environment has the #MDB_NOSYNC flag set the flushes
*/
int mdb_env_get_path(MDB_env *env, const char **path);
+ /** @brief Return the filedescriptor for the given environment.
+ *
+ * @param[in] env An environment handle returned by #mdb_env_create()
+ * @param[out] fd Address of a mdb_filehandle_t to contain the descriptor.
+ * @return A non-zero error value on failure and 0 on success. Some possible
+ * errors are:
+ * <ul>
+ * <li>EINVAL - an invalid parameter was specified.
+ * </ul>
+ */
+int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd);
+
/** @brief Set the size of the memory map to use for this environment.
*
* The size should be a multiple of the OS page size. The default is
* 10485760 bytes. The size of the memory map is also the maximum size
* of the database. The value should be chosen as large as possible,
* to accommodate future growth of the database.
- * This function may only be called after #mdb_env_create() and before #mdb_env_open().
- * The size may be changed by closing and reopening the environment.
+ * This function should be called after #mdb_env_create() and before #mdb_env_open().
+ * It may be called at later times if no transactions are active in
+ * this process. Note that the library does not check for this condition,
+ * the caller must ensure it explicitly.
+ *
+ * If the mapsize is changed by another process, #mdb_txn_begin() will
+ * return #MDB_MAP_RESIZED. This function may be called with a size
+ * of zero to adopt the new size.
+ *
* Any attempt to set a size smaller than the space already consumed
* by the environment will be silently changed to the current size of the used space.
* @param[in] env An environment handle returned by #mdb_env_create()
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
- * <li>EINVAL - an invalid parameter was specified, or the environment is already open.
+ * <li>EINVAL - an invalid parameter was specified, or the environment has
+ * an active write transaction.
* </ul>
*/
int mdb_env_set_mapsize(MDB_env *env, size_t size);
- /** @brief Set the maximum number of threads for the environment.
+ /** @brief Set the maximum number of threads/reader slots for the environment.
*
* This defines the number of slots in the lock table that is used to track readers in the
* the environment. The default is 126.
+ * Starting a read-only transaction normally ties a lock table slot to the
+ * current thread until the environment closes or the thread exits. If
+ * MDB_NOTLS is in use, #mdb_txn_begin() instead ties the slot to the
+ * MDB_txn object until it or the #MDB_env object is destroyed.
* This function may only be called after #mdb_env_create() and before #mdb_env_open().
* @param[in] env An environment handle returned by #mdb_env_create()
- * @param[in] readers The maximum number of threads
+ * @param[in] readers The maximum number of reader lock table slots
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
*/
int mdb_env_set_maxreaders(MDB_env *env, unsigned int readers);
- /** @brief Get the maximum number of threads for the environment.
+ /** @brief Get the maximum number of threads/reader slots for the environment.
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[out] readers Address of an integer to store the number of readers
*/
int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs);
+ /** @brief Get the maximum size of a key for the environment.
+ *
+ * This is the compile-time constant #MDB_MAXKEYSIZE, default 511.
+ * See @ref MDB_val.
+ * @param[in] env An environment handle returned by #mdb_env_create()
+ * @return The maximum size of a key
+ */
+int mdb_env_get_maxkeysize(MDB_env *env);
+
/** @brief Create a transaction for use with the environment.
*
* The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit().
- * @note Transactions may not span threads; a transaction must only be used by a
- * single thread. Also, a thread may only have a single transaction.
- * @note Cursors may not span transactions; each cursor must be opened and closed
- * within a single transaction.
+ * @note A transaction and its cursors must only be used by a single
+ * thread, and a thread may only have a single transaction at a time.
+ * If #MDB_NOTLS is in use, this does not apply to read-only transactions.
+ * @note Cursors may not span transactions.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] parent If this parameter is non-NULL, the new transaction
* will be a nested transaction, with the transaction indicated by \b parent
* as its parent. Transactions may be nested to any level. A parent
- * transaction may not issue any other operations besides mdb_txn_begin,
- * mdb_txn_abort, or mdb_txn_commit while it has active child transactions.
+ * transaction and its cursors may not issue any other operations than
+ * mdb_txn_commit and mdb_txn_abort while it has active child transactions.
* @param[in] flags Special options for this transaction. This parameter
* must be set to 0 or by bitwise OR'ing together one or more of the
* values described here.
* <ul>
* <li>#MDB_PANIC - a fatal error occurred earlier and the environment
* must be shut down.
- * <li>ENOMEM - out of memory, or a read-only transaction was requested and
+ * <li>#MDB_MAP_RESIZED - another process wrote data beyond this MDB_env's
+ * mapsize and this environment's map must be resized as well.
+ * See #mdb_env_set_mapsize().
+ * <li>#MDB_READERS_FULL - a read-only transaction was requested and
* the reader lock table is full. See #mdb_env_set_maxreaders().
+ * <li>ENOMEM - out of memory.
* </ul>
*/
int mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **txn);
+ /** @brief Returns the transaction's #MDB_env
+ *
+ * @param[in] txn A transaction handle returned by #mdb_txn_begin()
+ */
+MDB_env *mdb_txn_env(MDB_txn *txn);
+
/** @brief Commit all the operations of a transaction into the database.
*
- * All cursors opened within the transaction will be closed by this call. The cursors
- * and transaction handle will be freed and must not be used again after this call.
+ * The transaction handle is freed. It and its cursors must not be used
+ * again after this call, except with #mdb_cursor_renew().
+ * @note Earlier documentation incorrectly said all cursors would be freed.
+ * Only write-transactions free cursors.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <li>EINVAL - an invalid parameter was specified.
* <li>ENOSPC - no more disk space.
* <li>EIO - a low-level I/O error occurred while writing.
- * <li>ENOMEM - the transaction is nested and could not be merged into its parent.
+ * <li>ENOMEM - out of memory.
* </ul>
*/
int mdb_txn_commit(MDB_txn *txn);
/** @brief Abandon all the operations of the transaction instead of saving them.
*
- * All cursors opened within the transaction will be closed by this call. The cursors
- * and transaction handle will be freed and must not be used again after this call.
+ * The transaction handle is freed. It and its cursors must not be used
+ * again after this call, except with #mdb_cursor_renew().
+ * @note Earlier documentation incorrectly said all cursors would be freed.
+ * Only write-transactions free cursors.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
*/
void mdb_txn_abort(MDB_txn *txn);
/** @brief Reset a read-only transaction.
*
- * This releases the current reader lock but doesn't free the
- * transaction handle, allowing it to be used again later by #mdb_txn_renew().
- * It otherwise has the same effect as #mdb_txn_abort() but saves some memory
- * allocation/deallocation overhead if a thread is going to start a new
- * read-only transaction again soon.
- * All cursors opened within the transaction must be closed before the transaction
- * is reset.
+ * Abort the transaction like #mdb_txn_abort(), but keep the transaction
+ * handle. #mdb_txn_renew() may reuse the handle. This saves allocation
+ * overhead if the process will start a new read-only transaction soon,
+ * and also locking overhead if #MDB_NOTLS is in use. The reader table
+ * lock is released, but the table slot stays tied to its thread or
+ * #MDB_txn. Use mdb_txn_abort() to discard a reset handle, and to free
+ * its lock table slot if MDB_NOTLS is in use.
+ * Cursors opened within the transaction must not be used
+ * again after this call, except with #mdb_cursor_renew().
* Reader locks generally don't interfere with writers, but they keep old
* versions of database pages allocated. Thus they prevent the old pages
* from being reused when writers commit new data, and so under heavy load
/** @brief Open a database in the environment.
*
- * The database handle may be discarded by calling #mdb_dbi_close(). The
- * database handle resides in the shared environment, it is not owned
- * by the given transaction. Only one thread should call this function;
- * it is not mutex-protected in a read-only transaction.
+ * A database handle denotes the name and parameters of a database,
+ * independently of whether such a database exists.
+ * The database handle may be discarded by calling #mdb_dbi_close().
+ * The old database handle is returned if the database was already open.
+ * The handle must only be closed once.
+ * The database handle will be private to the current transaction until
+ * the transaction is successfully committed. If the transaction is
+ * aborted the handle will be closed automatically.
+ * After a successful commit the
+ * handle will reside in the shared environment, and may be used
+ * by other transactions. This function must not be called from
+ * multiple concurrent transactions. A transaction that uses this function
+ * must finish (either commit or abort) before any other transaction may
+ * use this function.
+ *
* To use named databases (with name != NULL), #mdb_env_set_maxdbs()
- * must be called before opening the enviorment.
+ * must be called before opening the environment.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] name The name of the database to open. If only a single
* database is needed in the environment, this value may be NULL.
* <ul>
* <li>#MDB_NOTFOUND - the specified database doesn't exist in the environment
* and #MDB_CREATE was not specified.
- * <li>ENFILE - too many databases have been opened. See #mdb_env_set_maxdbs().
+ * <li>#MDB_DBS_FULL - too many databases have been opened. See #mdb_env_set_maxdbs().
* </ul>
*/
int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi);
*/
int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat);
+ /** @brief Retrieve the DB flags for a database handle.
+ *
+ * @param[in] txn A transaction handle returned by #mdb_txn_begin()
+ * @param[in] dbi A database handle returned by #mdb_dbi_open()
+ * @param[out] flags Address where the flags will be returned.
+ * @return A non-zero error value on failure and 0 on success.
+ */
+int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags);
+
/** @brief Close a database handle.
*
* This call is not mutex protected. Handles should only be closed by
* a single thread, and only if no other threads are going to reference
- * the database handle any further.
+ * the database handle or one of its cursors any further. Do not close
+ * a handle if an existing transaction has modified its database.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
*/
void mdb_dbi_close(MDB_env *env, MDB_dbi dbi);
- /** @brief Delete a database and/or free all its pages.
+ /** @brief Empty or delete+close a database.
*
- * If the \b del parameter is 1, the DB handle will be closed
- * and the DB will be deleted.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
- * @param[in] del 1 to delete the DB from the environment,
- * 0 to just free its pages.
+ * @param[in] del 0 to empty the DB, 1 to delete it from the
+ * environment and close the DB handle.
* @return A non-zero error value on failure and 0 on success.
*/
int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del);
* database. The caller need not dispose of the memory, and may not
* modify it in any way. For values returned in a read-only transaction
* any modification attempts will cause a SIGSEGV.
+ * @note Values returned from the database are valid only until a
+ * subsequent update operation, or the end of the transaction.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] key The key to search for in the database
* parameter will be set to point to the existing item.
* <li>#MDB_RESERVE - reserve space for data of the given size, but
* don't copy the given data. Instead, return a pointer to the
- * reserved space, which the caller can fill in later. This saves
+ * reserved space, which the caller can fill in later - before
+ * the next update operation or the transaction ends. This saves
* an extra memcpy if the data is being generated later.
+ * MDB does nothing else with this memory, the caller is expected
+ * to modify all of the space requested.
* <li>#MDB_APPEND - append the given key/data pair to the end of the
* database. No key comparisons are performed. This option allows
* fast bulk loading when keys are already known to be in the
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
+ * <li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize().
+ * <li>#MDB_TXN_FULL - the transaction has too many dirty pages.
* <li>EACCES - an attempt was made to write in a read-only transaction.
* <li>EINVAL - an invalid parameter was specified.
- * <li>ENOMEM - the database is full, see #mdb_env_set_mapsize().
* </ul>
*/
int mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data,
/** @brief Create a cursor handle.
*
- * Cursors are associated with a specific transaction and database and
- * may not span threads.
+ * A cursor is associated with a specific transaction and database.
+ * A cursor cannot be used when its database handle is closed. Nor
+ * when its transaction has ended, except with #mdb_cursor_renew().
+ * It can be discarded with #mdb_cursor_close().
+ * A cursor in a write-transaction can be closed before its transaction
+ * ends, and will otherwise be closed when its transaction ends.
+ * A cursor in a read-only transaction must be closed explicitly, before
+ * or after its transaction ends. It can be reused with
+ * #mdb_cursor_renew() before finally closing it.
+ * @note Earlier documentation said that cursors in every transaction
+ * were closed when the transaction committed or aborted.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[out] cursor Address where the new #MDB_cursor handle will be stored
/** @brief Close a cursor handle.
*
* The cursor handle will be freed and must not be used again after this call.
+ * Its transaction must still be live if it is a write-transaction.
* @param[in] cursor A cursor handle returned by #mdb_cursor_open()
*/
void mdb_cursor_close(MDB_cursor *cursor);
/** @brief Renew a cursor handle.
*
- * Cursors are associated with a specific transaction and database and
- * may not span threads. Cursors that are only used in read-only
+ * A cursor is associated with a specific transaction and database.
+ * Cursors that are only used in read-only
* transactions may be re-used, to avoid unnecessary malloc/free overhead.
* The cursor may be associated with a new read-only transaction, and
* referencing the same database handle as it was created with.
+ * This may be done whether the previous transaction is live or dead.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] cursor A cursor handle returned by #mdb_cursor_open()
* @return A non-zero error value on failure and 0 on success. Some possible
* case of the #MDB_SET option, in which the \b key object is unchanged), and
* the address and length of the data are returned in the object to which \b data
* refers.
+ * See #mdb_get() for restrictions on using the output values.
* @param[in] cursor A cursor handle returned by #mdb_cursor_open()
* @param[in,out] key The key for a retrieved item
* @param[in,out] data The data of a retrieved item
* correct order. Loading unsorted keys with this flag will cause
* data corruption.
* <li>#MDB_APPENDDUP - as above, but for sorted dup data.
+ * <li>#MDB_MULTIPLE - store multiple contiguous data elements in a
+ * single request. This flag may only be specified if the database
+ * was opened with #MDB_DUPFIXED. The \b data argument must be an
+ * array of two MDB_vals. The mv_size of the first MDB_val must be
+ * the size of a single data element. The mv_data of the first MDB_val
+ * must point to the beginning of the array of contiguous data elements.
+ * The mv_size of the second MDB_val must be the count of the number
+ * of data elements to store. On return this field will be set to
+ * the count of the number of elements actually written. The mv_data
+ * of the second MDB_val is unused.
* </ul>
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
+ * <li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize().
+ * <li>#MDB_TXN_FULL - the transaction has too many dirty pages.
* <li>EACCES - an attempt was made to modify a read-only database.
* <li>EINVAL - an invalid parameter was specified.
* </ul>
/** @brief Compare two data items according to a particular database.
*
* This returns a comparison as if the two items were data items of
- * a sorted duplicates #MDB_DUPSORT database.
+ * the specified database. The database must have the #MDB_DUPSORT flag.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] a The first item to compare
* @return < 0 if a < b, 0 if a == b, > 0 if a > b
*/
int mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b);
+
+ /** @brief A callback function used to print a message from the library.
+ *
+ * @param[in] msg The string to be printed.
+ * @param[in] ctx An arbitrary context pointer for the callback.
+ * @return < 0 on failure, 0 on success.
+ */
+typedef int (MDB_msg_func)(const char *msg, void *ctx);
+
+ /** @brief Dump the entries in the reader lock table.
+ *
+ * @param[in] env An environment handle returned by #mdb_env_create()
+ * @param[in] func A #MDB_msg_func function
+ * @param[in] ctx Anything the message function needs
+ * @return < 0 on failure, 0 on success.
+ */
+int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx);
+
+ /** @brief Check for stale entries in the reader lock table.
+ *
+ * @param[in] env An environment handle returned by #mdb_env_create()
+ * @param[out] dead Number of stale slots that were cleared
+ * @return 0 on success, non-zero on failure.
+ */
+int mdb_reader_check(MDB_env *env, int *dead);
/** @} */
#ifdef __cplusplus