X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=libraries%2Fliblmdb%2Flmdb.h;h=19fb1fe07ae74dd495cd3f9da1d3d30e4f711c2d;hb=87927222fecf4d1bd7aac1eb5ee0093702861470;hp=f637229283b7b4c11db76c63ed1bd1a04ab70963;hpb=5b21307f29ccda563e5f0d82294083c841b0e927;p=openldap diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index f637229283..19fb1fe07a 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -1,10 +1,10 @@ /** @file lmdb.h * @brief Lightning memory-mapped database library * - * @mainpage Lightning Memory-Mapped Database Manager (MDB) + * @mainpage Lightning Memory-Mapped Database Manager (LMDB) * * @section intro_sec Introduction - * MDB is a Btree-based database management library modeled loosely on the + * LMDB is a Btree-based database management library modeled loosely on the * BerkeleyDB API, but much simplified. The entire database is exposed * in a memory map, and all data fetches return data directly * from the mapped memory, so no malloc's or memcpy's occur during @@ -26,10 +26,10 @@ * readers, and readers don't block writers. * * Unlike other well-known database mechanisms which use either write-ahead - * transaction logs or append-only data writes, MDB requires no maintenance + * transaction logs or append-only data writes, LMDB requires no maintenance * during operation. Both write-ahead loggers and append-only databases * require periodic checkpointing and/or compaction of their log or database - * files otherwise they grow without bound. MDB tracks free pages within + * files otherwise they grow without bound. LMDB tracks free pages within * the database and re-uses them for new write operations, so the database * size does not grow without bound in normal use. * @@ -49,9 +49,13 @@ * stale locks can block further operation. * * Fix: Check for stale readers periodically, using the - * #mdb_reader_check function or the mdb_stat tool. Or just - * make all programs using the database close it; the lockfile - * is always reset on first open of the environment. + * #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool. + * Stale writers will be cleared automatically on some systems: + * - Windows - automatic + * - Linux, systems using POSIX mutexes with Robust option - automatic + * - not on BSD, systems using POSIX semaphores. + * Otherwise just make all programs using the database close it; + * the lockfile is always reset on first open of the environment. * * - On BSD systems or others configured with MDB_USE_POSIX_SEM, * startup can fail due to semaphores owned by another userid. @@ -66,13 +70,27 @@ * BSD systems or when otherwise configured with MDB_USE_POSIX_SEM. * Multiple users can cause startup to fail later, as noted above. * + * - There is normally no pure read-only mode, since readers need write + * access to locks and lock file. Exceptions: On read-only filesystems + * or with the #MDB_NOLOCK flag described under #mdb_env_open(). + * + * - By default, in versions before 0.9.10, unused portions of the data + * file might receive garbage data from memory freed by other code. + * (This does not happen when using the #MDB_WRITEMAP flag.) As of + * 0.9.10 the default behavior is to initialize such memory before + * writing to the data file. Since there may be a slight performance + * cost due to this initialization, applications may disable it using + * the #MDB_NOMEMINIT flag. Applications handling sensitive data + * which must not be written should not use this flag. This flag is + * irrelevant when using #MDB_WRITEMAP. + * * - A thread can only use one transaction at a time, plus any child * transactions. Each transaction belongs to one thread. See below. * The #MDB_NOTLS flag changes this for read-only transactions. * * - Use an MDB_env* in the process which opened it, without fork()ing. * - * - Do not have open an MDB database twice in the same process at + * - Do not have open an LMDB database twice in the same process at * the same time. Not even from a plain open() call - close()ing it * breaks flock() advisory locking. * @@ -92,10 +110,13 @@ * for stale readers is performed or the lockfile is reset, * since the process may not remove it from the lockfile. * + * This does not apply to write transactions if the system clears + * stale writers, see above. + * * - If you do that anyway, do a periodic check for stale readers. Or * close the environment once in a while, so the lockfile can get reset. * - * - Do not use MDB databases on remote filesystems, even between + * - Do not use LMDB databases on remote filesystems, even between * processes on the same host. This breaks flock() on some OSes, * possibly memory map sync, and certainly sync between programs * on different hosts. @@ -105,7 +126,7 @@ * * @author Howard Chu, Symas Corporation. * - * @copyright Copyright 2011-2013 Howard Chu, Symas Corp. All rights reserved. + * @copyright Copyright 2011-2015 Howard Chu, Symas Corp. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted only as authorized by the OpenLDAP @@ -158,7 +179,7 @@ typedef void *mdb_filehandle_t; typedef int mdb_filehandle_t; #endif -/** @defgroup mdb MDB API +/** @defgroup mdb LMDB API * @{ * @brief OpenLDAP Lightning Memory-Mapped Database Manager */ @@ -170,7 +191,7 @@ typedef int mdb_filehandle_t; /** Library minor version */ #define MDB_VERSION_MINOR 9 /** Library patch version */ -#define MDB_VERSION_PATCH 8 +#define MDB_VERSION_PATCH 16 /** Combine args a,b,c into a single integer for easy version comparisons */ #define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c)) @@ -180,10 +201,10 @@ typedef int mdb_filehandle_t; MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) /** The release date of this library version */ -#define MDB_VERSION_DATE "September 9, 2013" +#define MDB_VERSION_DATE "August 14, 2015" /** A stringifier for the version info */ -#define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" d ")" +#define MDB_VERSTR(a,b,c,d) "LMDB " #a "." #b "." #c ": (" d ")" /** A helper for the stringifier macro */ #define MDB_VERFOO(a,b,c,d) MDB_VERSTR(a,b,c,d) @@ -249,8 +270,6 @@ typedef int (MDB_cmp_func)(const MDB_val *a, const MDB_val *b); typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx); /** @defgroup mdb_env Environment Flags - * - * Values do not overlap Database Flags. * @{ */ /** mmap at a fixed address (experimental) */ @@ -265,27 +284,31 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel #define MDB_NOMETASYNC 0x40000 /** use writable mmap */ #define MDB_WRITEMAP 0x80000 - /** use asynchronous msync when MDB_WRITEMAP is used */ + /** use asynchronous msync when #MDB_WRITEMAP is used */ #define MDB_MAPASYNC 0x100000 /** tie reader locktable slots to #MDB_txn objects instead of to threads */ #define MDB_NOTLS 0x200000 + /** don't do any locking, caller must manage their own locks */ +#define MDB_NOLOCK 0x400000 + /** don't do readahead (no effect on Windows) */ +#define MDB_NORDAHEAD 0x800000 + /** don't initialize malloc'd memory before writing to datafile */ +#define MDB_NOMEMINIT 0x1000000 /** @} */ /** @defgroup mdb_dbi_open Database Flags - * - * Values do not overlap Environment Flags. * @{ */ /** use reverse string keys */ #define MDB_REVERSEKEY 0x02 /** use sorted duplicates */ #define MDB_DUPSORT 0x04 - /** numeric keys in native byte order. + /** numeric keys in native byte order: either unsigned int or size_t. * The keys must all be of the same size. */ #define MDB_INTEGERKEY 0x08 /** with #MDB_DUPSORT, sorted dup items have fixed size */ #define MDB_DUPFIXED 0x10 - /** with #MDB_DUPSORT, dups are numeric in native byte order */ + /** with #MDB_DUPSORT, dups are #MDB_INTEGERKEY-style integers */ #define MDB_INTEGERDUP 0x20 /** with #MDB_DUPSORT, use reverse string dups */ #define MDB_REVERSEDUP 0x40 @@ -317,6 +340,15 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel #define MDB_MULTIPLE 0x80000 /* @} */ +/** @defgroup mdb_copy Copy Flags + * @{ + */ +/** Compacting copy: Omit free space from copy, and renumber all + * pages sequentially. + */ +#define MDB_CP_COMPACT 0x01 +/* @} */ + /** @brief Cursor Get operations. * * This is the set of all operations for retrieving data @@ -329,16 +361,18 @@ typedef enum MDB_cursor_op { MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */ MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */ MDB_GET_CURRENT, /**< Return key/data at current cursor position */ - MDB_GET_MULTIPLE, /**< Return all the duplicate data items at the current - cursor position. Only for #MDB_DUPFIXED */ + MDB_GET_MULTIPLE, /**< Return key and up to a page of duplicate data items + from current cursor position. Move cursor to prepare + for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */ MDB_LAST, /**< Position at last key/data item */ MDB_LAST_DUP, /**< Position at last data item of current key. Only for #MDB_DUPSORT */ MDB_NEXT, /**< Position at next data item */ MDB_NEXT_DUP, /**< Position at next data item of current key. Only for #MDB_DUPSORT */ - MDB_NEXT_MULTIPLE, /**< Return all duplicate data items at the next - cursor position. Only for #MDB_DUPFIXED */ + MDB_NEXT_MULTIPLE, /**< Return key and up to a page of duplicate data items + from next cursor position. Move cursor to prepare + for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */ MDB_NEXT_NODUP, /**< Position at first data item of next key */ MDB_PREV, /**< Position at previous data item */ MDB_PREV_DUP, /**< Position at previous data item of current key. @@ -364,11 +398,11 @@ typedef enum MDB_cursor_op { #define MDB_PAGE_NOTFOUND (-30797) /** Located page was wrong type */ #define MDB_CORRUPTED (-30796) - /** Update of meta page failed, probably I/O error */ + /** Update of meta page failed or environment had fatal error */ #define MDB_PANIC (-30795) /** Environment version mismatch */ #define MDB_VERSION_MISMATCH (-30794) - /** File is not a valid MDB file */ + /** File is not a valid LMDB file */ #define MDB_INVALID (-30793) /** Environment mapsize reached */ #define MDB_MAP_FULL (-30792) @@ -386,15 +420,25 @@ typedef enum MDB_cursor_op { #define MDB_PAGE_FULL (-30786) /** Database contents grew beyond environment mapsize */ #define MDB_MAP_RESIZED (-30785) - /** MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed */ + /** Operation and DB incompatible, or DB type changed. This can mean: + * + */ #define MDB_INCOMPATIBLE (-30784) /** Invalid reuse of reader locktable slot */ #define MDB_BAD_RSLOT (-30783) - /** Transaction cannot recover - it must be aborted */ + /** Transaction must abort, has a child, or is invalid */ #define MDB_BAD_TXN (-30782) - /** Too big key/data, key is empty, or wrong DUPFIXED size */ + /** Unsupported size of key/DB name/data, or wrong DUPFIXED size */ #define MDB_BAD_VALSIZE (-30781) -#define MDB_LAST_ERRCODE MDB_BAD_VALSIZE + /** The specified DBI was changed unexpectedly */ +#define MDB_BAD_DBI (-30780) + /** The last defined error code */ +#define MDB_LAST_ERRCODE MDB_BAD_DBI /** @} */ /** @brief Statistics for a database in the environment */ @@ -418,7 +462,7 @@ typedef struct MDB_envinfo { unsigned int me_numreaders; /**< max reader slots used in the environment */ } MDB_envinfo; - /** @brief Return the mdb library version information. + /** @brief Return the LMDB library version information. * * @param[out] major if non-NULL, the library major version number is copied here * @param[out] minor if non-NULL, the library minor version number is copied here @@ -432,14 +476,14 @@ char *mdb_version(int *major, int *minor, int *patch); * This function is a superset of the ANSI C X3.159-1989 (ANSI C) strerror(3) * function. If the error code is greater than or equal to 0, then the string * returned by the system function strerror(3) is returned. If the error code - * is less than 0, an error string corresponding to the MDB library error is - * returned. See @ref errors for a list of MDB-specific error codes. + * is less than 0, an error string corresponding to the LMDB library error is + * returned. See @ref errors for a list of LMDB-specific error codes. * @param[in] err The error code * @retval "error message" The description of the error */ char *mdb_strerror(int err); - /** @brief Create an MDB environment handle. + /** @brief Create an LMDB environment handle. * * This function allocates memory for a #MDB_env structure. To release * the allocated memory and discard the handle, call #mdb_env_close(). @@ -472,22 +516,22 @@ int mdb_env_create(MDB_env **env); * how the operating system has allocated memory to shared libraries and other uses. * The feature is highly experimental. *
  • #MDB_NOSUBDIR - * By default, MDB creates its environment in a directory whose + * By default, LMDB creates its environment in a directory whose * pathname is given in \b path, and creates its data and lock files * under that directory. With this option, \b path is used as-is for * the database main data file. The database lock file is the \b path * with "-lock" appended. *
  • #MDB_RDONLY * Open the environment in read-only mode. No write operations will be - * allowed. MDB will still modify the lock file - except on read-only - * filesystems, where MDB does not use locks. + * allowed. LMDB will still modify the lock file - except on read-only + * filesystems, where LMDB does not use locks. *
  • #MDB_WRITEMAP * Use a writeable memory map unless MDB_RDONLY is set. This is faster * and uses fewer mallocs, but loses protection from application bugs * like wild pointer writes and other bad updates into the database. * Incompatible with nested transactions. - * Processes with and without MDB_WRITEMAP on the same environment do - * not cooperate well. + * Do not mix processes with and without MDB_WRITEMAP on the same + * environment. This can defeat durability (#mdb_env_sync etc). *
  • #MDB_NOMETASYNC * Flush system buffers to disk only once per transaction, omit the * metadata flush. Defer that until the system flushes files to disk, @@ -524,14 +568,46 @@ int mdb_env_create(MDB_env **env); * the user synchronizes its use. Applications that multiplex many * user threads over individual OS threads need this option. Such an * application must also serialize the write transactions in an OS - * thread, since MDB's write locking is unaware of the user threads. + * thread, since LMDB's write locking is unaware of the user threads. + *
  • #MDB_NOLOCK + * Don't do any locking. If concurrent access is anticipated, the + * caller must manage all concurrency itself. For proper operation + * the caller must enforce single-writer semantics, and must ensure + * that no readers are using old transactions while a writer is + * active. The simplest approach is to use an exclusive lock so that + * no readers may be active at all when a writer begins. + *
  • #MDB_NORDAHEAD + * Turn off readahead. Most operating systems perform readahead on + * read requests by default. This option turns it off if the OS + * supports it. Turning it off may help random read performance + * when the DB is larger than RAM and system RAM is full. + * The option is not implemented on Windows. + *
  • #MDB_NOMEMINIT + * Don't initialize malloc'd memory before writing to unused spaces + * in the data file. By default, memory for pages written to the data + * file is obtained using malloc. While these pages may be reused in + * subsequent transactions, freshly malloc'd pages will be initialized + * to zeroes before use. This avoids persisting leftover data from other + * code (that used the heap and subsequently freed the memory) into the + * data file. Note that many other system libraries may allocate + * and free memory from the heap for arbitrary uses. E.g., stdio may + * use the heap for file I/O buffers. This initialization step has a + * modest performance cost so some applications may want to disable + * it using this flag. This option can be a problem for applications + * which handle sensitive data like passwords, and it makes memory + * checkers like Valgrind noisy. This flag is not needed with #MDB_WRITEMAP, + * which writes directly to the mmap instead of using malloc for pages. The + * initialization is also skipped if #MDB_RESERVE is used; the + * caller is expected to overwrite all of the memory that was + * reserved in that case. + * This flag may be changed at any time using #mdb_env_set_flags(). * - * @param[in] mode The UNIX permissions to set on created files. This parameter - * is ignored on Windows. + * @param[in] mode The UNIX permissions to set on created files and semaphores. + * This parameter is ignored on Windows. * @return A non-zero error value on failure and 0 on success. Some possible * errors are: * * @return A non-zero error value on failure and 0 on success. Some possible @@ -1228,18 +1429,21 @@ int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, /** @brief Store by cursor. * * This function stores key/data pairs into the database. - * If the function fails for any reason, the state of the cursor will be - * unchanged. If the function succeeds and an item is inserted into the - * database, the cursor is always positioned to refer to the newly inserted item. + * The cursor is positioned at the new item, or on failure usually near it. + * @note Earlier documentation incorrectly said errors would leave the + * state of the cursor unchanged. * @param[in] cursor A cursor handle returned by #mdb_cursor_open() * @param[in] key The key operated on. * @param[in] data The data operated on. * @param[in] flags Options for this operation. This parameter * must be set to 0 or one of the values described here. *