* @mainpage MDB Memory-Mapped Database Manager
* MDB is a Btree-based database management library modeled loosely on the
* BerkeleyDB API, but much simplified. The entire database is exposed
- * in a read-only memory map, and all data fetches return data directly
+ * in a memory map, and all data fetches return data directly
* from the mapped memory, so no malloc's or memcpy's occur during
* data fetches. As such, the library is extremely simple because it
* requires no page caching layer of its own, and it is extremely high
* performance and memory-efficient. It is also fully transactional with
- * full ACID semantics, and because the memory map is read-only, the
+ * full ACID semantics, and when the memory map is read-only, the
* database integrity cannot be corrupted by stray pointer writes from
* application code.
*
* the database and re-uses them for new write operations, so the database
* size does not grow without bound in normal use.
*
+ * The memory map can be used as a read-only or read-write map. It is
+ * read-only by default as this provides total immunity to corruption.
+ * Using read-write mode offers much higher write performance, but adds
+ * the possibility for stray application writes thru pointers to silently
+ * corrupt the database. Of course if your application code is known to
+ * be bug-free (...) then this is not an issue.
+ *
+ * Troubleshooting the lock file, plus semaphores on BSD systems:
+ *
+ * - A broken lockfile can cause sync issues.
+ * Stale reader transactions left behind by an aborted program
+ * cause further writes to grow the database quickly, and
+ * stale locks can block further operation.
+ *
+ * Fix: Terminate all programs using the database, or make
+ * them close it. Next database user will reset the lockfile.
+ *
+ * - On BSD systems or others configured with MDB_USE_POSIX_SEM,
+ * startup can fail due to semaphores owned by another userid.
+ *
+ * Fix: Open and close the database as the user which owns the
+ * semaphores (likely last user) or as root, while no other
+ * process is using the database.
+ *
+ * Restrictions/caveats (in addition to those listed for some functions):
+ *
+ * - Only the database owner should normally use the database on
+ * BSD systems or when otherwise configured with MDB_USE_POSIX_SEM.
+ * Multiple users can cause startup to fail later, as noted above.
+ *
+ * - A thread can only use one transaction at a time, plus any child
+ * transactions. Each transaction belongs to one thread. See below.
+ *
+ * - Use an MDB_env* in the process which opened it, without fork()ing.
+ *
+ * - Do not have open an MDB database twice in the same process at
+ * the same time. Not even from a plain open() call - close()ing it
+ * breaks flock() advisory locking.
+ *
+ * - Avoid long-lived transactions. Read transactions prevent
+ * reuse of pages freed by newer write transactions, thus the
+ * database can grow quickly. Write transactions prevent
+ * other write transactions, since writes are serialized.
+ *
+ * ...when several processes can use a database concurrently:
+ *
+ * - Avoid suspending a process with active transactions. These
+ * would then be "long-lived" as above.
+ *
+ * - Avoid aborting a process with an active transaction.
+ * The transaction becomes "long-lived" as above until the lockfile
+ * is reset, since the process may not remove it from the lockfile.
+ *
+ * - If you do that anyway, close the environment once in a while,
+ * so the lockfile can get reset.
+ *
+ * - Do not use MDB databases on remote filesystems, even between
+ * processes on the same host. This breaks flock() on some OSes,
+ * possibly memory map sync, and certainly sync between programs
+ * on different hosts.
+ *
+ * - Opening a database can fail if another process is opening or
+ * closing it at exactly the same time.
+ *
* @author Howard Chu, Symas Corporation.
*
* @copyright Copyright 2011-2012 Howard Chu, Symas Corp. All rights reserved.
/** Library minor version */
#define MDB_VERSION_MINOR 9
/** Library patch version */
-#define MDB_VERSION_PATCH 0
+#define MDB_VERSION_PATCH 4
/** Combine args a,b,c into a single integer for easy version comparisons */
#define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c))
MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)
/** The release date of this library version */
-#define MDB_VERSION_DATE "September 1, 2011"
+#define MDB_VERSION_DATE "September 14, 2012"
/** A stringifier for the version info */
-#define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" #d ")"
+#define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" d ")"
/** A helper for the stringifier macro */
#define MDB_VERFOO(a,b,c,d) MDB_VERSTR(a,b,c,d)
#define MDB_NOSYNC 0x10000
/** read only */
#define MDB_RDONLY 0x20000
+ /** don't fsync metapage after commit */
+#define MDB_NOMETASYNC 0x40000
+ /** use writable mmap */
+#define MDB_WRITEMAP 0x80000
+ /** use asynchronous msync */
+#define MDB_MAPASYNC 0x100000
/** @} */
/** @defgroup mdb_open Database Flags
#define MDB_RESERVE 0x10000
/** Data is being appended, don't split full pages. */
#define MDB_APPEND 0x20000
+/** Duplicate data is being appended, don't split full pages. */
+#define MDB_APPENDDUP 0x40000
/** Store multiple data items in one call. */
-#define MDB_MULTIPLE 0x40000
+#define MDB_MULTIPLE 0x80000
/* @} */
/** @brief Cursor Get operations.
Only for #MDB_DUPSORT */
MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */
MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */
+ MDB_GET_CURRENT, /**< Return key/data at current cursor position */
MDB_GET_MULTIPLE, /**< Return all the duplicate data items at the current
cursor position. Only for #MDB_DUPFIXED */
MDB_LAST, /**< Position at last key/data item */
MDB_PREV_NODUP, /**< Position at last data item of previous key.
Only for #MDB_DUPSORT */
MDB_SET, /**< Position at specified key */
+ MDB_SET_KEY, /**< Position at specified key, return key + data */
MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */
} MDB_cursor_op;
#define MDB_PANIC (-30795)
/** Environment version mismatch */
#define MDB_VERSION_MISMATCH (-30794)
+ /** File is not a valid MDB file */
+#define MDB_INVALID (-30793)
+ /** Environment mapsize reached */
+#define MDB_MAP_FULL (-30792)
+ /** Environment maxdbs reached */
+#define MDB_DBS_FULL (-30791)
+ /** Environment maxreaders reached */
+#define MDB_READERS_FULL (-30790)
+ /** Too many TLS keys in use - Windows only */
+#define MDB_TLS_FULL (-30789)
+ /** Nested txn has too many dirty pages */
+#define MDB_TXN_FULL (-30788)
+ /** Cursor stack too deep - internal error */
+#define MDB_CURSOR_FULL (-30787)
+ /** Page has not enough space - internal error */
+#define MDB_PAGE_FULL (-30786)
+#define MDB_LAST_ERRCODE MDB_PAGE_FULL
/** @} */
/** @brief Statistics for a database in the environment */
size_t ms_entries; /**< Number of data items */
} MDB_stat;
+/** @brief Information about the environment */
+typedef struct MDB_envinfo {
+ void *me_mapaddr; /**< Address of map, if fixed */
+ size_t me_mapsize; /**< Size of the data memory map */
+ size_t me_last_pgno; /**< ID of the last used page */
+ size_t me_last_txnid; /**< ID of the last committed transaction */
+ unsigned int me_maxreaders; /**< maximum number of threads for the environment */
+ unsigned int me_numreaders; /**< maximum number of threads used in the environment */
+} MDB_envinfo;
+
/** @brief Return the mdb library version information.
*
* @param[out] major if non-NULL, the library major version number is copied here
* @param[in] flags Special options for this environment. This parameter
* must be set to 0 or by bitwise OR'ing together one or more of the
* values described here.
+ * Flags set by mdb_env_set_flags() are also used.
* <ul>
* <li>#MDB_FIXEDMAP
* use a fixed address for the mmap region. This flag must be specified
* at risk is governed by how often the system flushes dirty buffers to disk
* and how often #mdb_env_sync() is called. This flag may be changed
* at any time using #mdb_env_set_flags().
+ * <li>#MDB_NOMETASYNC
+ * Don't perform a synchronous flush of the meta page after committing
+ * a transaction. This is similar to the #MDB_NOSYNC case, but safer
+ * because the transaction data is still flushed. The meta page for any
+ * transaction N will be flushed by the data flush of transaction N+1.
+ * In case of a system crash, the last committed transaction may be
+ * lost. This flag may be changed at any time using #mdb_env_set_flags().
* <li>#MDB_RDONLY
* Open the environment in read-only mode. No write operations will be allowed.
* </ul>
*/
int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode);
+ /** @brief Copy an MDB environment to the specified path.
+ *
+ * This function may be used to make a backup of an existing environment.
+ * @param[in] env An environment handle returned by #mdb_env_create(). It
+ * must have already been opened successfully.
+ * @param[in] path The directory in which the copy will reside. This
+ * directory must already exist and be writable but must otherwise be
+ * empty.
+ * @return A non-zero error value on failure and 0 on success.
+ */
+int mdb_env_copy(MDB_env *env, const char *path);
+
/** @brief Return statistics about the MDB environment.
*
* @param[in] env An environment handle returned by #mdb_env_create()
*/
int mdb_env_stat(MDB_env *env, MDB_stat *stat);
+ /** @brief Return information about the MDB environment.
+ *
+ * @param[in] env An environment handle returned by #mdb_env_create()
+ * @param[out] stat The address of an #MDB_envinfo structure
+ * where the information will be copied
+ */
+int mdb_env_info(MDB_env *env, MDB_envinfo *stat);
+
/** @brief Flush the data buffers to disk.
*
* Data is always written to disk when #mdb_txn_commit() is called,
/** @brief Set environment flags.
*
- * This may be used to set some flags that weren't already set during
- * #mdb_env_open(), or to unset these flags. Currently only the
- * #MDB_NOSYNC flag setting may be changed with this function.
+ * This may be used to set some flags in addition to those from
+ * #mdb_env_open(), or to unset these flags.
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] flags The flags to change, bitwise OR'ed together
* @param[in] onoff A non-zero value sets the flags, zero clears them.
* #MDB_KEYEXIST if the key already appears in the database, even if
* the database supports duplicates (#MDB_DUPSORT). The \b data
* parameter will be set to point to the existing item.
+ * <li>#MDB_RESERVE - reserve space for data of the given size, but
+ * don't copy the given data. Instead, return a pointer to the
+ * reserved space, which the caller can fill in later. This saves
+ * an extra memcpy if the data is being generated later.
+ * <li>#MDB_APPEND - append the given key/data pair to the end of the
+ * database. No key comparisons are performed. This option allows
+ * fast bulk loading when keys are already known to be in the
+ * correct order. Loading unsorted keys with this flag will cause
+ * data corruption.
+ * <li>#MDB_APPENDDUP - as above, but for sorted dup data.
* </ul>
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
*/
void mdb_cursor_close(MDB_cursor *cursor);
+ /** @brief Renew a cursor handle.
+ *
+ * Cursors are associated with a specific transaction and database and
+ * may not span threads. Cursors that are only used in read-only
+ * transactions may be re-used, to avoid unnecessary malloc/free overhead.
+ * The cursor may be associated with a new read-only transaction, and
+ * referencing the same database handle as it was created with.
+ * @param[in] txn A transaction handle returned by #mdb_txn_begin()
+ * @param[in] cursor A cursor handle returned by #mdb_cursor_open()
+ * @return A non-zero error value on failure and 0 on success. Some possible
+ * errors are:
+ * <ul>
+ * <li>EINVAL - an invalid parameter was specified.
+ * </ul>
+ */
+int mdb_cursor_renew(MDB_txn *txn, MDB_cursor *cursor);
+
/** @brief Return the cursor's transaction handle.
*
* @param[in] cursor A cursor handle returned by #mdb_cursor_open()
* does not already appear in the database. The function will return
* #MDB_KEYEXIST if the key already appears in the database, even if
* the database supports duplicates (#MDB_DUPSORT).
+ * <li>#MDB_RESERVE - reserve space for data of the given size, but
+ * don't copy the given data. Instead, return a pointer to the
+ * reserved space, which the caller can fill in later. This saves
+ * an extra memcpy if the data is being generated later.
+ * <li>#MDB_APPEND - append the given key/data pair to the end of the
+ * database. No key comparisons are performed. This option allows
+ * fast bulk loading when keys are already known to be in the
+ * correct order. Loading unsorted keys with this flag will cause
+ * data corruption.
+ * <li>#MDB_APPENDDUP - as above, but for sorted dup data.
* </ul>
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are: