* cause further writes to grow the database quickly, and
* stale locks can block further operation.
*
- * Fix: Terminate all programs using the database, or make
- * them close it. Next database user will reset the lockfile.
+ * Fix: Check for stale readers periodically, using the
+ * #mdb_reader_check function or the mdb_stat tool. Or just
+ * make all programs using the database close it; the lockfile
+ * is always reset on first open of the environment.
*
* - On BSD systems or others configured with MDB_USE_POSIX_SEM,
* startup can fail due to semaphores owned by another userid.
* BSD systems or when otherwise configured with MDB_USE_POSIX_SEM.
* Multiple users can cause startup to fail later, as noted above.
*
+ * - There is normally no pure read-only mode, since readers need write
+ * access to locks and lock file. Exceptions: On read-only filesystems
+ * or with the #MDB_NOLOCK flag described under #mdb_env_open().
+ *
+ * - By default, in versions before 0.9.10, unused portions of the data
+ * file might receive garbage data from memory freed by other code.
+ * (This does not happen when using the #MDB_WRITEMAP flag.) As of
+ * 0.9.10 the default behavior is to initialize such memory before
+ * writing to the data file. Since there may be a slight performance
+ * cost due to this initialization, applications may disable it using
+ * the #MDB_NOMEMINIT flag. Applications handling sensitive data
+ * which must not be written should not use this flag. This flag is
+ * irrelevant when using #MDB_WRITEMAP.
+ *
* - A thread can only use one transaction at a time, plus any child
* transactions. Each transaction belongs to one thread. See below.
* The #MDB_NOTLS flag changes this for read-only transactions.
* ...when several processes can use a database concurrently:
*
* - Avoid aborting a process with an active transaction.
- * The transaction becomes "long-lived" as above until the lockfile
- * is reset, since the process may not remove it from the lockfile.
+ * The transaction becomes "long-lived" as above until a check
+ * for stale readers is performed or the lockfile is reset,
+ * since the process may not remove it from the lockfile.
*
- * - If you do that anyway, close the environment once in a while,
- * so the lockfile can get reset.
+ * - If you do that anyway, do a periodic check for stale readers. Or
+ * close the environment once in a while, so the lockfile can get reset.
*
* - Do not use MDB databases on remote filesystems, even between
* processes on the same host. This breaks flock() on some OSes,
/** Library minor version */
#define MDB_VERSION_MINOR 9
/** Library patch version */
-#define MDB_VERSION_PATCH 7
+#define MDB_VERSION_PATCH 10
/** Combine args a,b,c into a single integer for easy version comparisons */
#define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c))
MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)
/** The release date of this library version */
-#define MDB_VERSION_DATE "January 10, 2013"
+#define MDB_VERSION_DATE "November 11, 2013"
/** A stringifier for the version info */
#define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" d ")"
/** @brief Generic structure used for passing keys and data in and out
* of the database.
*
- * Key sizes must be between 1 and the liblmdb build-time constant
- * #MDB_MAXKEYSIZE inclusive. This currently defaults to 511. The
- * same applies to data sizes in databases with the #MDB_DUPSORT flag.
- * Other data items can in theory be from 0 to 0xffffffff bytes long.
- *
* Values returned from the database are valid only until a subsequent
- * update operation, or the end of the transaction.
+ * update operation, or the end of the transaction. Do not modify or
+ * free them, they commonly point into the database itself.
+ *
+ * Key sizes must be between 1 and #mdb_env_get_maxkeysize() inclusive.
+ * The same applies to data sizes in databases with the #MDB_DUPSORT flag.
+ * Other data items can in theory be from 0 to 0xffffffff bytes long.
*/
typedef struct MDB_val {
size_t mv_size; /**< size of the data item */
#define MDB_NOMETASYNC 0x40000
/** use writable mmap */
#define MDB_WRITEMAP 0x80000
- /** use asynchronous msync when MDB_WRITEMAP is used */
+ /** use asynchronous msync when #MDB_WRITEMAP is used */
#define MDB_MAPASYNC 0x100000
/** tie reader locktable slots to #MDB_txn objects instead of to threads */
#define MDB_NOTLS 0x200000
+ /** don't do any locking, caller must manage their own locks */
+#define MDB_NOLOCK 0x400000
+ /** don't do readahead (no effect on Windows) */
+#define MDB_NORDAHEAD 0x800000
+ /** don't initialize malloc'd memory before writing to datafile */
+#define MDB_NOMEMINIT 0x1000000
/** @} */
/** @defgroup mdb_dbi_open Database Flags
#define MDB_PAGE_FULL (-30786)
/** Database contents grew beyond environment mapsize */
#define MDB_MAP_RESIZED (-30785)
- /** Database flags changed or would change */
+ /** MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed */
#define MDB_INCOMPATIBLE (-30784)
/** Invalid reuse of reader locktable slot */
#define MDB_BAD_RSLOT (-30783)
* and uses fewer mallocs, but loses protection from application bugs
* like wild pointer writes and other bad updates into the database.
* Incompatible with nested transactions.
+ * Processes with and without MDB_WRITEMAP on the same environment do
+ * not cooperate well.
* <li>#MDB_NOMETASYNC
* Flush system buffers to disk only once per transaction, omit the
* metadata flush. Defer that until the system flushes files to disk,
* user threads over individual OS threads need this option. Such an
* application must also serialize the write transactions in an OS
* thread, since MDB's write locking is unaware of the user threads.
+ * <li>#MDB_NOLOCK
+ * Don't do any locking. If concurrent access is anticipated, the
+ * caller must manage all concurrency itself. For proper operation
+ * the caller must enforce single-writer semantics, and must ensure
+ * that no readers are using old transactions while a writer is
+ * active. The simplest approach is to use an exclusive lock so that
+ * no readers may be active at all when a writer begins.
+ * <li>#MDB_NORDAHEAD
+ * Turn off readahead. Most operating systems perform readahead on
+ * read requests by default. This option turns it off if the OS
+ * supports it. Turning it off may help random read performance
+ * when the DB is larger than RAM and system RAM is full.
+ * The option is not implemented on Windows.
+ * <li>#MDB_NOMEMINIT
+ * Don't initialize malloc'd memory before writing to unused spaces
+ * in the data file. By default, memory for pages written to the data
+ * file is obtained using malloc. While these pages may be reused in
+ * subsequent transactions, freshly malloc'd pages will be initialized
+ * to zeroes before use. This avoids persisting leftover data from other
+ * code (that used the heap and subsequently freed the memory) into the
+ * data file. Note that many other system libraries may allocate
+ * and free memory from the heap for arbitrary uses. E.g., stdio may
+ * use the heap for file I/O buffers. This initialization step comes
+ * at some performance cost so some applications may want to disable
+ * it using this flag. This option can be a problem for applications
+ * which handle sensitive data like passwords, and it makes memory
+ * checkers like Valgrind noisy. This flag is not needed with #MDB_WRITEMAP,
+ * which writes directly to the mmap instead of using malloc for pages. The
+ * initialization is also skipped if #MDB_RESERVE is used; the
+ * caller is expected to overwrite all of the memory that was
+ * reserved in that case.
+ * This flag may be changed at any time using #mdb_env_set_flags().
* </ul>
* @param[in] mode The UNIX permissions to set on created files. This parameter
* is ignored on Windows.
*/
int mdb_env_get_path(MDB_env *env, const char **path);
+ /** @brief Return the filedescriptor for the given environment.
+ *
+ * @param[in] env An environment handle returned by #mdb_env_create()
+ * @param[out] fd Address of a mdb_filehandle_t to contain the descriptor.
+ * @return A non-zero error value on failure and 0 on success. Some possible
+ * errors are:
+ * <ul>
+ * <li>EINVAL - an invalid parameter was specified.
+ * </ul>
+ */
+int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd);
+
/** @brief Set the size of the memory map to use for this environment.
*
* The size should be a multiple of the OS page size. The default is
* 10485760 bytes. The size of the memory map is also the maximum size
* of the database. The value should be chosen as large as possible,
* to accommodate future growth of the database.
- * This function may only be called after #mdb_env_create() and before #mdb_env_open().
- * The size may be changed by closing and reopening the environment.
+ * This function should be called after #mdb_env_create() and before #mdb_env_open().
+ * It may be called at later times if no transactions are active in
+ * this process. Note that the library does not check for this condition,
+ * the caller must ensure it explicitly.
+ *
+ * If the mapsize is changed by another process, #mdb_txn_begin() will
+ * return #MDB_MAP_RESIZED. This function may be called with a size
+ * of zero to adopt the new size.
+ *
* Any attempt to set a size smaller than the space already consumed
* by the environment will be silently changed to the current size of the used space.
* @param[in] env An environment handle returned by #mdb_env_create()
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
- * <li>EINVAL - an invalid parameter was specified, or the environment is already open.
+ * <li>EINVAL - an invalid parameter was specified, or the environment has
+ * an active write transaction.
* </ul>
*/
int mdb_env_set_mapsize(MDB_env *env, size_t size);
/** @brief Get the maximum size of a key for the environment.
*
+ * This is the compile-time constant #MDB_MAXKEYSIZE, default 511.
+ * See @ref MDB_val.
* @param[in] env An environment handle returned by #mdb_env_create()
- * @return The maximum size of a key. (#MDB_MAXKEYSIZE)
+ * @return The maximum size of a key
*/
int mdb_env_get_maxkeysize(MDB_env *env);
* <li>#MDB_PANIC - a fatal error occurred earlier and the environment
* must be shut down.
* <li>#MDB_MAP_RESIZED - another process wrote data beyond this MDB_env's
- * mapsize and the environment must be shut down.
+ * mapsize and this environment's map must be resized as well.
+ * See #mdb_env_set_mapsize().
* <li>#MDB_READERS_FULL - a read-only transaction was requested and
* the reader lock table is full. See #mdb_env_set_maxreaders().
* <li>ENOMEM - out of memory.
*/
int mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **txn);
+ /** @brief Returns the transaction's #MDB_env
+ *
+ * @param[in] txn A transaction handle returned by #mdb_txn_begin()
+ */
+MDB_env *mdb_txn_env(MDB_txn *txn);
+
/** @brief Commit all the operations of a transaction into the database.
*
* The transaction handle is freed. It and its cursors must not be used
/** @brief Retrieve the DB flags for a database handle.
*
- * @param[in] env An environment handle returned by #mdb_env_create()
+ * @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[out] flags Address where the flags will be returned.
* @return A non-zero error value on failure and 0 on success.
*/
-int mdb_dbi_flags(MDB_env *env, MDB_dbi dbi, unsigned int *flags);
+int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags);
/** @brief Close a database handle.
*
* reserved space, which the caller can fill in later - before
* the next update operation or the transaction ends. This saves
* an extra memcpy if the data is being generated later.
+ * MDB does nothing else with this memory, the caller is expected
+ * to modify all of the space requested.
* <li>#MDB_APPEND - append the given key/data pair to the end of the
* database. No key comparisons are performed. This option allows
* fast bulk loading when keys are already known to be in the