X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=libraries%2Fliblmdb%2Flmdb.h;h=2ebd43f3daedf7817786deaddfb7f97821e168d9;hb=5e88fcffc20370c3f388f505f896aafdf4a52e7f;hp=b94bd2d3b81ea5dd584e5137a29093909ce1c6cd;hpb=3394bac2c0fcb69f29da5bd182fdb025b6ac42f6;p=openldap diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index b94bd2d3b8..2ebd43f3da 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -48,8 +48,10 @@ * cause further writes to grow the database quickly, and * stale locks can block further operation. * - * Fix: Terminate all programs using the database, or make - * them close it. Next database user will reset the lockfile. + * Fix: Check for stale readers periodically, using the + * #mdb_reader_check function or the mdb_stat tool. Or just + * make all programs using the database close it; the lockfile + * is always reset on first open of the environment. * * - On BSD systems or others configured with MDB_USE_POSIX_SEM, * startup can fail due to semaphores owned by another userid. @@ -64,8 +66,23 @@ * BSD systems or when otherwise configured with MDB_USE_POSIX_SEM. * Multiple users can cause startup to fail later, as noted above. * + * - There is normally no pure read-only mode, since readers need write + * access to locks and lock file. Exceptions: On read-only filesystems + * or with the #MDB_NOLOCK flag described under #mdb_env_open(). + * + * - By default, in versions before 0.9.10, unused portions of the data + * file might receive garbage data from memory freed by other code. + * (This does not happen when using the #MDB_WRITEMAP flag.) As of + * 0.9.10 the default behavior is to initialize such memory before + * writing to the data file. Since there may be a slight performance + * cost due to this initialization, applications may disable it using + * the #MDB_NOMEMINIT flag. Applications handling sensitive data + * which must not be written should not use this flag. This flag is + * irrelevant when using #MDB_WRITEMAP. + * * - A thread can only use one transaction at a time, plus any child * transactions. Each transaction belongs to one thread. See below. + * The #MDB_NOTLS flag changes this for read-only transactions. * * - Use an MDB_env* in the process which opened it, without fork()ing. * @@ -85,11 +102,12 @@ * ...when several processes can use a database concurrently: * * - Avoid aborting a process with an active transaction. - * The transaction becomes "long-lived" as above until the lockfile - * is reset, since the process may not remove it from the lockfile. + * The transaction becomes "long-lived" as above until a check + * for stale readers is performed or the lockfile is reset, + * since the process may not remove it from the lockfile. * - * - If you do that anyway, close the environment once in a while, - * so the lockfile can get reset. + * - If you do that anyway, do a periodic check for stale readers. Or + * close the environment once in a while, so the lockfile can get reset. * * - Do not use MDB databases on remote filesystems, even between * processes on the same host. This breaks flock() on some OSes, @@ -137,6 +155,23 @@ extern "C" { #endif +/** Unix permissions for creating files, or dummy definition for Windows */ +#ifdef _MSC_VER +typedef int mdb_mode_t; +#else +typedef mode_t mdb_mode_t; +#endif + +/** An abstraction for a file handle. + * On POSIX systems file handles are small integers. On Windows + * they're opaque pointers. + */ +#ifdef _WIN32 +typedef void *mdb_filehandle_t; +#else +typedef int mdb_filehandle_t; +#endif + /** @defgroup mdb MDB API * @{ * @brief OpenLDAP Lightning Memory-Mapped Database Manager @@ -149,7 +184,7 @@ extern "C" { /** Library minor version */ #define MDB_VERSION_MINOR 9 /** Library patch version */ -#define MDB_VERSION_PATCH 6 +#define MDB_VERSION_PATCH 11 /** Combine args a,b,c into a single integer for easy version comparisons */ #define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c)) @@ -159,7 +194,7 @@ extern "C" { MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) /** The release date of this library version */ -#define MDB_VERSION_DATE "January 10, 2013" +#define MDB_VERSION_DATE "January 15, 2014" /** A stringifier for the version info */ #define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" d ")" @@ -195,9 +230,12 @@ typedef struct MDB_cursor MDB_cursor; /** @brief Generic structure used for passing keys and data in and out * of the database. * - * Key sizes must be between 1 and the liblmdb build-time constant - * #MDB_MAXKEYSIZE inclusive. This currently defaults to 511. The - * same applies to data sizes in databases with the #MDB_DUPSORT flag. + * Values returned from the database are valid only until a subsequent + * update operation, or the end of the transaction. Do not modify or + * free them, they commonly point into the database itself. + * + * Key sizes must be between 1 and #mdb_env_get_maxkeysize() inclusive. + * The same applies to data sizes in databases with the #MDB_DUPSORT flag. * Other data items can in theory be from 0 to 0xffffffff bytes long. */ typedef struct MDB_val { @@ -225,8 +263,6 @@ typedef int (MDB_cmp_func)(const MDB_val *a, const MDB_val *b); typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx); /** @defgroup mdb_env Environment Flags - * - * Values do not overlap Database Flags. * @{ */ /** mmap at a fixed address (experimental) */ @@ -241,13 +277,19 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel #define MDB_NOMETASYNC 0x40000 /** use writable mmap */ #define MDB_WRITEMAP 0x80000 - /** use asynchronous msync when MDB_WRITEMAP is used */ + /** use asynchronous msync when #MDB_WRITEMAP is used */ #define MDB_MAPASYNC 0x100000 + /** tie reader locktable slots to #MDB_txn objects instead of to threads */ +#define MDB_NOTLS 0x200000 + /** don't do any locking, caller must manage their own locks */ +#define MDB_NOLOCK 0x400000 + /** don't do readahead (no effect on Windows) */ +#define MDB_NORDAHEAD 0x800000 + /** don't initialize malloc'd memory before writing to datafile */ +#define MDB_NOMEMINIT 0x1000000 /** @} */ /** @defgroup mdb_dbi_open Database Flags - * - * Values do not overlap Environment Flags. * @{ */ /** use reverse string keys */ @@ -287,7 +329,7 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel #define MDB_APPEND 0x20000 /** Duplicate data is being appended, don't split full pages. */ #define MDB_APPENDDUP 0x40000 -/** Store multiple data items in one call. */ +/** Store multiple data items in one call. Only for #MDB_DUPFIXED. */ #define MDB_MULTIPLE 0x80000 /* @} */ @@ -313,13 +355,11 @@ typedef enum MDB_cursor_op { Only for #MDB_DUPSORT */ MDB_NEXT_MULTIPLE, /**< Return all duplicate data items at the next cursor position. Only for #MDB_DUPFIXED */ - MDB_NEXT_NODUP, /**< Position at first data item of next key. - Only for #MDB_DUPSORT */ + MDB_NEXT_NODUP, /**< Position at first data item of next key */ MDB_PREV, /**< Position at previous data item */ MDB_PREV_DUP, /**< Position at previous data item of current key. Only for #MDB_DUPSORT */ - MDB_PREV_NODUP, /**< Position at last data item of previous key. - Only for #MDB_DUPSORT */ + MDB_PREV_NODUP, /**< Position at last data item of previous key */ MDB_SET, /**< Position at specified key */ MDB_SET_KEY, /**< Position at specified key, return key + data */ MDB_SET_RANGE /**< Position at first key greater than or equal to specified key. */ @@ -362,9 +402,15 @@ typedef enum MDB_cursor_op { #define MDB_PAGE_FULL (-30786) /** Database contents grew beyond environment mapsize */ #define MDB_MAP_RESIZED (-30785) - /** Database flags changed or would change */ + /** MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed */ #define MDB_INCOMPATIBLE (-30784) -#define MDB_LAST_ERRCODE MDB_INCOMPATIBLE + /** Invalid reuse of reader locktable slot */ +#define MDB_BAD_RSLOT (-30783) + /** Transaction cannot recover - it must be aborted */ +#define MDB_BAD_TXN (-30782) + /** Too big key/data, key is empty, or wrong DUPFIXED size */ +#define MDB_BAD_VALSIZE (-30781) +#define MDB_LAST_ERRCODE MDB_BAD_VALSIZE /** @} */ /** @brief Statistics for a database in the environment */ @@ -384,8 +430,8 @@ typedef struct MDB_envinfo { size_t me_mapsize; /**< Size of the data memory map */ size_t me_last_pgno; /**< ID of the last used page */ size_t me_last_txnid; /**< ID of the last committed transaction */ - unsigned int me_maxreaders; /**< maximum number of threads for the environment */ - unsigned int me_numreaders; /**< maximum number of threads used in the environment */ + unsigned int me_maxreaders; /**< max reader slots in the environment */ + unsigned int me_numreaders; /**< max reader slots used in the environment */ } MDB_envinfo; /** @brief Return the mdb library version information. @@ -456,6 +502,8 @@ int mdb_env_create(MDB_env **env); * and uses fewer mallocs, but loses protection from application bugs * like wild pointer writes and other bad updates into the database. * Incompatible with nested transactions. + * Processes with and without MDB_WRITEMAP on the same environment do + * not cooperate well. *
  • #MDB_NOMETASYNC * Flush system buffers to disk only once per transaction, omit the * metadata flush. Defer that until the system flushes files to disk, @@ -484,6 +532,47 @@ int mdb_env_create(MDB_env **env); * database or lose the last transactions. Calling #mdb_env_sync() * ensures on-disk database integrity until next commit. * This flag may be changed at any time using #mdb_env_set_flags(). + *
  • #MDB_NOTLS + * Don't use Thread-Local Storage. Tie reader locktable slots to + * #MDB_txn objects instead of to threads. I.e. #mdb_txn_reset() keeps + * the slot reseved for the #MDB_txn object. A thread may use parallel + * read-only transactions. A read-only transaction may span threads if + * the user synchronizes its use. Applications that multiplex many + * user threads over individual OS threads need this option. Such an + * application must also serialize the write transactions in an OS + * thread, since MDB's write locking is unaware of the user threads. + *
  • #MDB_NOLOCK + * Don't do any locking. If concurrent access is anticipated, the + * caller must manage all concurrency itself. For proper operation + * the caller must enforce single-writer semantics, and must ensure + * that no readers are using old transactions while a writer is + * active. The simplest approach is to use an exclusive lock so that + * no readers may be active at all when a writer begins. + *
  • #MDB_NORDAHEAD + * Turn off readahead. Most operating systems perform readahead on + * read requests by default. This option turns it off if the OS + * supports it. Turning it off may help random read performance + * when the DB is larger than RAM and system RAM is full. + * The option is not implemented on Windows. + *
  • #MDB_NOMEMINIT + * Don't initialize malloc'd memory before writing to unused spaces + * in the data file. By default, memory for pages written to the data + * file is obtained using malloc. While these pages may be reused in + * subsequent transactions, freshly malloc'd pages will be initialized + * to zeroes before use. This avoids persisting leftover data from other + * code (that used the heap and subsequently freed the memory) into the + * data file. Note that many other system libraries may allocate + * and free memory from the heap for arbitrary uses. E.g., stdio may + * use the heap for file I/O buffers. This initialization step has a + * modest performance cost so some applications may want to disable + * it using this flag. This option can be a problem for applications + * which handle sensitive data like passwords, and it makes memory + * checkers like Valgrind noisy. This flag is not needed with #MDB_WRITEMAP, + * which writes directly to the mmap instead of using malloc for pages. The + * initialization is also skipped if #MDB_RESERVE is used; the + * caller is expected to overwrite all of the memory that was + * reserved in that case. + * This flag may be changed at any time using #mdb_env_set_flags(). * * @param[in] mode The UNIX permissions to set on created files. This parameter * is ignored on Windows. @@ -498,11 +587,15 @@ int mdb_env_create(MDB_env **env); *
  • EAGAIN - the environment was locked by another process. * */ -int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode); +int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode); /** @brief Copy an MDB environment to the specified path. * * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. * @param[in] env An environment handle returned by #mdb_env_create(). It * must have already been opened successfully. * @param[in] path The directory in which the copy will reside. This @@ -512,6 +605,21 @@ int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mod */ int mdb_env_copy(MDB_env *env, const char *path); + /** @brief Copy an MDB environment to the specified file descriptor. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] fd The filedescriptor to write the copy to. It must + * have already been opened for Write access. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); + /** @brief Return statistics about the MDB environment. * * @param[in] env An environment handle returned by #mdb_env_create() @@ -598,14 +706,33 @@ int mdb_env_get_flags(MDB_env *env, unsigned int *flags); */ int mdb_env_get_path(MDB_env *env, const char **path); + /** @brief Return the filedescriptor for the given environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] fd Address of a mdb_filehandle_t to contain the descriptor. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd); + /** @brief Set the size of the memory map to use for this environment. * * The size should be a multiple of the OS page size. The default is * 10485760 bytes. The size of the memory map is also the maximum size * of the database. The value should be chosen as large as possible, * to accommodate future growth of the database. - * This function may only be called after #mdb_env_create() and before #mdb_env_open(). - * The size may be changed by closing and reopening the environment. + * This function should be called after #mdb_env_create() and before #mdb_env_open(). + * It may be called at later times if no transactions are active in + * this process. Note that the library does not check for this condition, + * the caller must ensure it explicitly. + * + * If the mapsize is changed by another process, #mdb_txn_begin() will + * return #MDB_MAP_RESIZED. This function may be called with a size + * of zero to adopt the new size. + * * Any attempt to set a size smaller than the space already consumed * by the environment will be silently changed to the current size of the used space. * @param[in] env An environment handle returned by #mdb_env_create() @@ -613,18 +740,23 @@ int mdb_env_get_path(MDB_env *env, const char **path); * @return A non-zero error value on failure and 0 on success. Some possible * errors are: * */ int mdb_env_set_mapsize(MDB_env *env, size_t size); - /** @brief Set the maximum number of threads for the environment. + /** @brief Set the maximum number of threads/reader slots for the environment. * * This defines the number of slots in the lock table that is used to track readers in the * the environment. The default is 126. + * Starting a read-only transaction normally ties a lock table slot to the + * current thread until the environment closes or the thread exits. If + * MDB_NOTLS is in use, #mdb_txn_begin() instead ties the slot to the + * MDB_txn object until it or the #MDB_env object is destroyed. * This function may only be called after #mdb_env_create() and before #mdb_env_open(). * @param[in] env An environment handle returned by #mdb_env_create() - * @param[in] readers The maximum number of threads + * @param[in] readers The maximum number of reader lock table slots * @return A non-zero error value on failure and 0 on success. Some possible * errors are: *