X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=libraries%2Fliblmdb%2Flmdb.h;h=5877e957d9a32df5d1285f4165aad48da7dd3b75;hb=509a1977284a8d8c3affea9fc1afb9fa70ddb533;hp=53dab204614631953f47954398d70f44ea8a8642;hpb=a1b16ce5f0f661b0dd5a30706a1d6178cf0fa561;p=openldap diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index 53dab20461..5877e957d9 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -48,8 +48,10 @@ * cause further writes to grow the database quickly, and * stale locks can block further operation. * - * Fix: Terminate all programs using the database, or make - * them close it. Next database user will reset the lockfile. + * Fix: Check for stale readers periodically, using the + * #mdb_reader_check function or the mdb_stat tool. Or just + * make all programs using the database close it; the lockfile + * is always reset on first open of the environment. * * - On BSD systems or others configured with MDB_USE_POSIX_SEM, * startup can fail due to semaphores owned by another userid. @@ -64,6 +66,20 @@ * BSD systems or when otherwise configured with MDB_USE_POSIX_SEM. * Multiple users can cause startup to fail later, as noted above. * + * - There is normally no pure read-only mode, since readers need write + * access to locks and lock file. Exceptions: On read-only filesystems + * or with the #MDB_NOLOCK flag described under #mdb_env_open(). + * + * - By default, in versions before 0.9.10, unused portions of the data + * file might receive garbage data from memory freed by other code. + * (This did not happen when using the #MDB_WRITEMAP flag.) As of + * 0.9.10 the default behavior is to initialize such memory before + * writing to the data file. Since there may be a slight performance + * cost due to this initialization, applications may disable it using + * the #MDB_NOMEMINIT flag. Applications handling sensitive data + * which must not be written, and which don't use #MDB_WRITEMAP, + * should not use this flag. + * * - A thread can only use one transaction at a time, plus any child * transactions. Each transaction belongs to one thread. See below. * The #MDB_NOTLS flag changes this for read-only transactions. @@ -86,11 +102,12 @@ * ...when several processes can use a database concurrently: * * - Avoid aborting a process with an active transaction. - * The transaction becomes "long-lived" as above until the lockfile - * is reset, since the process may not remove it from the lockfile. + * The transaction becomes "long-lived" as above until a check + * for stale readers is performed or the lockfile is reset, + * since the process may not remove it from the lockfile. * - * - If you do that anyway, close the environment once in a while, - * so the lockfile can get reset. + * - If you do that anyway, do a periodic check for stale readers. Or + * close the environment once in a while, so the lockfile can get reset. * * - Do not use MDB databases on remote filesystems, even between * processes on the same host. This breaks flock() on some OSes, @@ -138,12 +155,23 @@ extern "C" { #endif +/** Unix permissions for creating files, or dummy definition for Windows */ #ifdef _MSC_VER typedef int mdb_mode_t; #else typedef mode_t mdb_mode_t; #endif +/** An abstraction for a file handle. + * On POSIX systems file handles are small integers. On Windows + * they're opaque pointers. + */ +#ifdef _WIN32 +typedef void *mdb_filehandle_t; +#else +typedef int mdb_filehandle_t; +#endif + /** @defgroup mdb MDB API * @{ * @brief OpenLDAP Lightning Memory-Mapped Database Manager @@ -156,7 +184,7 @@ typedef mode_t mdb_mode_t; /** Library minor version */ #define MDB_VERSION_MINOR 9 /** Library patch version */ -#define MDB_VERSION_PATCH 6 +#define MDB_VERSION_PATCH 10 /** Combine args a,b,c into a single integer for easy version comparisons */ #define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c)) @@ -166,7 +194,7 @@ typedef mode_t mdb_mode_t; MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) /** The release date of this library version */ -#define MDB_VERSION_DATE "January 10, 2013" +#define MDB_VERSION_DATE "November 11, 2013" /** A stringifier for the version info */ #define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" d ")" @@ -202,13 +230,13 @@ typedef struct MDB_cursor MDB_cursor; /** @brief Generic structure used for passing keys and data in and out * of the database. * - * Key sizes must be between 1 and the liblmdb build-time constant - * #MDB_MAXKEYSIZE inclusive. This currently defaults to 511. The - * same applies to data sizes in databases with the #MDB_DUPSORT flag. - * Other data items can in theory be from 0 to 0xffffffff bytes long. - * * Values returned from the database are valid only until a subsequent - * update operation, or the end of the transaction. + * update operation, or the end of the transaction. Do not modify or + * free them, they commonly point into the database itself. + * + * Key sizes must be between 1 and #mdb_env_get_maxkeysize() inclusive. + * The same applies to data sizes in databases with the #MDB_DUPSORT flag. + * Other data items can in theory be from 0 to 0xffffffff bytes long. */ typedef struct MDB_val { size_t mv_size; /**< size of the data item */ @@ -251,10 +279,16 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel #define MDB_NOMETASYNC 0x40000 /** use writable mmap */ #define MDB_WRITEMAP 0x80000 - /** use asynchronous msync when MDB_WRITEMAP is used */ + /** use asynchronous msync when #MDB_WRITEMAP is used */ #define MDB_MAPASYNC 0x100000 /** tie reader locktable slots to #MDB_txn objects instead of to threads */ #define MDB_NOTLS 0x200000 + /** don't do any locking, caller must manage their own locks */ +#define MDB_NOLOCK 0x400000 + /** don't do readahead (no effect on Windows) */ +#define MDB_NORDAHEAD 0x800000 + /** don't initialize malloc'd memory before writing to datafile */ +#define MDB_NOMEMINIT 0x1000000 /** @} */ /** @defgroup mdb_dbi_open Database Flags @@ -299,7 +333,7 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel #define MDB_APPEND 0x20000 /** Duplicate data is being appended, don't split full pages. */ #define MDB_APPENDDUP 0x40000 -/** Store multiple data items in one call. */ +/** Store multiple data items in one call. Only for #MDB_DUPFIXED. */ #define MDB_MULTIPLE 0x80000 /* @} */ @@ -372,11 +406,15 @@ typedef enum MDB_cursor_op { #define MDB_PAGE_FULL (-30786) /** Database contents grew beyond environment mapsize */ #define MDB_MAP_RESIZED (-30785) - /** Database flags changed or would change */ + /** MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed */ #define MDB_INCOMPATIBLE (-30784) /** Invalid reuse of reader locktable slot */ #define MDB_BAD_RSLOT (-30783) -#define MDB_LAST_ERRCODE MDB_BAD_RSLOT + /** Transaction cannot recover - it must be aborted */ +#define MDB_BAD_TXN (-30782) + /** Too big key/data, key is empty, or wrong DUPFIXED size */ +#define MDB_BAD_VALSIZE (-30781) +#define MDB_LAST_ERRCODE MDB_BAD_VALSIZE /** @} */ /** @brief Statistics for a database in the environment */ @@ -468,6 +506,8 @@ int mdb_env_create(MDB_env **env); * and uses fewer mallocs, but loses protection from application bugs * like wild pointer writes and other bad updates into the database. * Incompatible with nested transactions. + * Processes with and without MDB_WRITEMAP on the same environment do + * not cooperate well. *
  • #MDB_NOMETASYNC * Flush system buffers to disk only once per transaction, omit the * metadata flush. Defer that until the system flushes files to disk, @@ -505,6 +545,38 @@ int mdb_env_create(MDB_env **env); * user threads over individual OS threads need this option. Such an * application must also serialize the write transactions in an OS * thread, since MDB's write locking is unaware of the user threads. + *
  • #MDB_NOLOCK + * Don't do any locking. If concurrent access is anticipated, the + * caller must manage all concurrency itself. For proper operation + * the caller must enforce single-writer semantics, and must ensure + * that no readers are using old transactions while a writer is + * active. The simplest approach is to use an exclusive lock so that + * no readers may be active at all when a writer begins. + *
  • #MDB_NORDAHEAD + * Turn off readahead. Most operating systems perform readahead on + * read requests by default. This option turns it off if the OS + * supports it. Turning it off may help random read performance + * when the DB is larger than RAM and system RAM is full. + * The option is not implemented on Windows. + *
  • #MDB_NOMEMINIT + * Don't initialize malloc'd memory before writing to unused spaces + * in the data file. By default, memory for pages written to the data + * file is obtained using malloc. While these pages may be reused in + * subsequent transactions, freshly malloc'd pages will be initialized + * to zeroes before use. This avoids persisting leftover data from other + * code (that used the heap and subsequently freed the memory) into the + * data file. Note that many other system libraries may allocate + * and free memory from the heap for arbitrary uses. E.g., stdio may + * use the heap for file I/O buffers. This initialization step comes + * at some performance cost so some applications may want to disable + * it using this flag. This option can be a problem for applications + * which handle sensitive data like passwords, and it makes memory + * checkers like Valgrind noisy. This flag is not needed with #MDB_WRITEMAP, + * which writes directly to the mmap instead of using malloc for pages. The + * initialization is also skipped if #MDB_RESERVE is used; the + * caller is expected to overwrite all of the memory that was + * reserved in that case. + * This flag may be changed at any time using #mdb_env_set_flags(). * * @param[in] mode The UNIX permissions to set on created files. This parameter * is ignored on Windows. @@ -524,6 +596,10 @@ int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t /** @brief Copy an MDB environment to the specified path. * * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. * @param[in] env An environment handle returned by #mdb_env_create(). It * must have already been opened successfully. * @param[in] path The directory in which the copy will reside. This @@ -533,6 +609,21 @@ int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t */ int mdb_env_copy(MDB_env *env, const char *path); + /** @brief Copy an MDB environment to the specified file descriptor. + * + * This function may be used to make a backup of an existing environment. + * No lockfile is created, since it gets recreated at need. + * @note This call can trigger significant file size growth if run in + * parallel with write transactions, because it employs a read-only + * transaction. See long-lived transactions under @ref caveats_sec. + * @param[in] env An environment handle returned by #mdb_env_create(). It + * must have already been opened successfully. + * @param[in] fd The filedescriptor to write the copy to. It must + * have already been opened for Write access. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd); + /** @brief Return statistics about the MDB environment. * * @param[in] env An environment handle returned by #mdb_env_create() @@ -619,14 +710,33 @@ int mdb_env_get_flags(MDB_env *env, unsigned int *flags); */ int mdb_env_get_path(MDB_env *env, const char **path); + /** @brief Return the filedescriptor for the given environment. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] fd Address of a mdb_filehandle_t to contain the descriptor. + * @return A non-zero error value on failure and 0 on success. Some possible + * errors are: + * + */ +int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd); + /** @brief Set the size of the memory map to use for this environment. * * The size should be a multiple of the OS page size. The default is * 10485760 bytes. The size of the memory map is also the maximum size * of the database. The value should be chosen as large as possible, * to accommodate future growth of the database. - * This function may only be called after #mdb_env_create() and before #mdb_env_open(). - * The size may be changed by closing and reopening the environment. + * This function should be called after #mdb_env_create() and before #mdb_env_open(). + * It may be called at later times if no transactions are active in + * this process. Note that the library does not check for this condition, + * the caller must ensure it explicitly. + * + * If the mapsize is changed by another process, #mdb_txn_begin() will + * return #MDB_MAP_RESIZED. This function may be called with a size + * of zero to adopt the new size. + * * Any attempt to set a size smaller than the space already consumed * by the environment will be silently changed to the current size of the used space. * @param[in] env An environment handle returned by #mdb_env_create() @@ -634,7 +744,8 @@ int mdb_env_get_path(MDB_env *env, const char **path); * @return A non-zero error value on failure and 0 on success. Some possible * errors are: * */ int mdb_env_set_mapsize(MDB_env *env, size_t size); @@ -686,6 +797,15 @@ int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers); */ int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs); + /** @brief Get the maximum size of a key for the environment. + * + * This is the compile-time constant #MDB_MAXKEYSIZE, default 511. + * See @ref MDB_val. + * @param[in] env An environment handle returned by #mdb_env_create() + * @return The maximum size of a key + */ +int mdb_env_get_maxkeysize(MDB_env *env); + /** @brief Create a transaction for use with the environment. * * The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit(). @@ -697,8 +817,8 @@ int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs); * @param[in] parent If this parameter is non-NULL, the new transaction * will be a nested transaction, with the transaction indicated by \b parent * as its parent. Transactions may be nested to any level. A parent - * transaction may not issue any other operations besides mdb_txn_begin, - * mdb_txn_abort, or mdb_txn_commit while it has active child transactions. + * transaction and its cursors may not issue any other operations than + * mdb_txn_commit and mdb_txn_abort while it has active child transactions. * @param[in] flags Special options for this transaction. This parameter * must be set to 0 or by bitwise OR'ing together one or more of the * values described here. @@ -713,7 +833,8 @@ int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs); *
  • #MDB_PANIC - a fatal error occurred earlier and the environment * must be shut down. *
  • #MDB_MAP_RESIZED - another process wrote data beyond this MDB_env's - * mapsize and the environment must be shut down. + * mapsize and this environment's map must be resized as well. + * See #mdb_env_set_mapsize(). *
  • #MDB_READERS_FULL - a read-only transaction was requested and * the reader lock table is full. See #mdb_env_set_maxreaders(). *
  • ENOMEM - out of memory. @@ -721,6 +842,12 @@ int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs); */ int mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **txn); + /** @brief Returns the transaction's #MDB_env + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + */ +MDB_env *mdb_txn_env(MDB_txn *txn); + /** @brief Commit all the operations of a transaction into the database. * * The transaction handle is freed. It and its cursors must not be used @@ -868,6 +995,15 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *d */ int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat); + /** @brief Retrieve the DB flags for a database handle. + * + * @param[in] txn A transaction handle returned by #mdb_txn_begin() + * @param[in] dbi A database handle returned by #mdb_dbi_open() + * @param[out] flags Address where the flags will be returned. + * @return A non-zero error value on failure and 0 on success. + */ +int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags); + /** @brief Close a database handle. * * This call is not mutex protected. Handles should only be closed by @@ -879,14 +1015,12 @@ int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat); */ void mdb_dbi_close(MDB_env *env, MDB_dbi dbi); - /** @brief Delete a database and/or free all its pages. + /** @brief Empty or delete+close a database. * - * If the \b del parameter is 1, the DB handle will be closed - * and the DB will be deleted. * @param[in] txn A transaction handle returned by #mdb_txn_begin() * @param[in] dbi A database handle returned by #mdb_dbi_open() - * @param[in] del 1 to delete the DB from the environment, - * 0 to just free its pages. + * @param[in] del 0 to empty the DB, 1 to delete it from the + * environment and close the DB handle. * @return A non-zero error value on failure and 0 on success. */ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del); @@ -1028,6 +1162,8 @@ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); * reserved space, which the caller can fill in later - before * the next update operation or the transaction ends. This saves * an extra memcpy if the data is being generated later. + * MDB does nothing else with this memory, the caller is expected + * to modify all of the space requested. *
  • #MDB_APPEND - append the given key/data pair to the end of the * database. No key comparisons are performed. This option allows * fast bulk loading when keys are already known to be in the @@ -1189,6 +1325,16 @@ int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, * correct order. Loading unsorted keys with this flag will cause * data corruption. *
  • #MDB_APPENDDUP - as above, but for sorted dup data. + *
  • #MDB_MULTIPLE - store multiple contiguous data elements in a + * single request. This flag may only be specified if the database + * was opened with #MDB_DUPFIXED. The \b data argument must be an + * array of two MDB_vals. The mv_size of the first MDB_val must be + * the size of a single data element. The mv_data of the first MDB_val + * must point to the beginning of the array of contiguous data elements. + * The mv_size of the second MDB_val must be the count of the number + * of data elements to store. On return this field will be set to + * the count of the number of elements actually written. The mv_data + * of the second MDB_val is unused. * * @return A non-zero error value on failure and 0 on success. Some possible * errors are: @@ -1258,6 +1404,31 @@ int mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b); * @return < 0 if a < b, 0 if a == b, > 0 if a > b */ int mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b); + + /** @brief A callback function used to print a message from the library. + * + * @param[in] msg The string to be printed. + * @param[in] ctx An arbitrary context pointer for the callback. + * @return < 0 on failure, 0 on success. + */ +typedef int (MDB_msg_func)(const char *msg, void *ctx); + + /** @brief Dump the entries in the reader lock table. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[in] func A #MDB_msg_func function + * @param[in] ctx Anything the message function needs + * @return < 0 on failure, 0 on success. + */ +int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx); + + /** @brief Check for stale entries in the reader lock table. + * + * @param[in] env An environment handle returned by #mdb_env_create() + * @param[out] dead Number of stale slots that were cleared + * @return 0 on success, non-zero on failure. + */ +int mdb_reader_check(MDB_env *env, int *dead); /** @} */ #ifdef __cplusplus