#include <assert.h>
#include <errno.h>
+#include <limits.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include "mdb.h"
#include "midl.h"
+#if (__BYTE_ORDER == __LITTLE_ENDIAN) == (__BYTE_ORDER == __BIG_ENDIAN)
+# error "Unknown or unsupported endianness (__BYTE_ORDER)"
+#elif (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF
+# error "Two's complement, reasonably sized integer types, please"
+#endif
+
/** @defgroup internal MDB Internals
* @{
*/
#define pthread_mutex_t HANDLE
#define pthread_key_t DWORD
#define pthread_self() GetCurrentThreadId()
-#define pthread_key_create(x,y) *(x) = TlsAlloc()
+#define pthread_key_create(x,y) (*(x) = TlsAlloc())
#define pthread_key_delete(x) TlsFree(x)
#define pthread_getspecific(x) TlsGetValue(x)
#define pthread_setspecific(x,y) TlsSetValue(x,y)
#define pthread_mutex_unlock(x) ReleaseMutex(x)
#define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE)
-#define LOCK_MUTEX_R(env) pthread_mutex_lock(env->me_rmutex)
-#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(env->me_rmutex)
-#define LOCK_MUTEX_W(env) pthread_mutex_lock(env->me_wmutex)
-#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(env->me_wmutex)
+#define LOCK_MUTEX_R(env) pthread_mutex_lock((env)->me_rmutex)
+#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock((env)->me_rmutex)
+#define LOCK_MUTEX_W(env) pthread_mutex_lock((env)->me_wmutex)
+#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock((env)->me_wmutex)
#define getpid() GetCurrentProcessId()
-#define fdatasync(fd) !FlushFileBuffers(fd)
+#define fdatasync(fd) (!FlushFileBuffers(fd))
#define ErrCode() GetLastError()
-#define GetPageSize(x) {SYSTEM_INFO si; GetSystemInfo(&si); (x) = si.dwPageSize;}
+#define GET_PAGESIZE(x) {SYSTEM_INFO si; GetSystemInfo(&si); (x) = si.dwPageSize;}
#define close(fd) CloseHandle(fd)
#define munmap(ptr,len) UnmapViewOfFile(ptr)
#else
/** Lock the reader mutex.
*/
-#define LOCK_MUTEX_R(env) pthread_mutex_lock(&env->me_txns->mti_mutex)
+#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_txns->mti_mutex)
/** Unlock the reader mutex.
*/
-#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&env->me_txns->mti_mutex)
+#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_txns->mti_mutex)
/** Lock the writer mutex.
* Only a single write transaction is allowed at a time. Other writers
* will block waiting for this mutex.
*/
-#define LOCK_MUTEX_W(env) pthread_mutex_lock(&env->me_txns->mti_wmutex)
+#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_txns->mti_wmutex)
/** Unlock the writer mutex.
*/
-#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&env->me_txns->mti_wmutex)
+#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_txns->mti_wmutex)
/** Get the error code for the last failed system function.
*/
* Mainly used to initialize file variables and signify that they are
* unused.
*/
-#define INVALID_HANDLE_VALUE -1
+#define INVALID_HANDLE_VALUE (-1)
/** Get the size of a memory page for the system.
* This is the basic size that the platform's memory manager uses, and is
* fundamental to the use of memory-mapped files.
*/
-#define GetPageSize(x) (x) = sysconf(_SC_PAGE_SIZE)
+#define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE))
#endif
/** @} */
/** A default memory page size.
* The actual size is platform-dependent, but we use this for
* boot-strapping. We probably should not be using this any more.
- * The #GetPageSize() macro is used to get the actual size.
+ * The #GET_PAGESIZE() macro is used to get the actual size.
*
* Note that we don't currently support Huge pages. On Linux,
* regular data files cannot use Huge pages, and in general
*/
#define DKEY(x) mdb_dkey(x, kbuf)
#else
-#define DKBUF
+#define DKBUF typedef int dummy_kbuf /* so we can put ';' after */
#define DKEY(x)
#endif
#define LAZY_RWLOCK_WRLOCK(x)
/** Grab the DB table read lock */
#define LAZY_RWLOCK_RDLOCK(x)
- /** Declare the DB table rwlock */
+ /** Declare the DB table rwlock. Should not be followed by ';'. */
#define LAZY_RWLOCK_DEF(x)
/** Initialize the DB table rwlock */
#define LAZY_RWLOCK_INIT(x,y)
* Since the database uses multi-version concurrency control, readers don't
* actually need any locking. This table is used to keep track of which
* readers are using data from which old transactions, so that we'll know
- * when a particular old transaction is no longer in use, Old transactions
- * that have freed any data pages can then have their freed pages reclaimed
+ * when a particular old transaction is no longer in use. Old transactions
+ * that have discarded any data pages can then have those pages reclaimed
* for use by a later write transaction.
*
* The lock table is constructed such that reader slots are aligned with the
typedef struct MDB_node {
/** lo and hi are used for data size on leaf nodes and for
* child pgno on branch nodes. On 64 bit platforms, flags
- * is also used for pgno. (branch nodes ignore flags)
+ * is also used for pgno. (Branch nodes have no flags).
+ * They are in in host byte order in case that lets some
+ * accesses be optimized into a 32-bit word access.
*/
- unsigned short mn_lo;
- unsigned short mn_hi; /**< part of dsize or pgno */
+#define mn_lo mn_offset[__BYTE_ORDER!=__LITTLE_ENDIAN]
+#define mn_hi mn_offset[__BYTE_ORDER==__LITTLE_ENDIAN] /**< part of dsize or pgno */
+ unsigned short mn_offset[2];
unsigned short mn_flags; /**< flags for special node types */
#define F_BIGDATA 0x01 /**< data put on overflow page */
#define F_SUBDATA 0x02 /**< data is a sub-database */
#define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks)))
/** Set the \b node's key into \b key, if requested. */
-#define MDB_SET_KEY(node, key) if (key!=NULL) {(key)->mv_size = NODEKSZ(node); (key)->mv_data = NODEKEY(node);}
+#define MDB_SET_KEY(node, key) { if ((key) != NULL) { \
+ (key)->mv_size = NODEKSZ(node); (key)->mv_data = NODEKEY(node); } }
/** Information about a single database in the environment. */
typedef struct MDB_db {
size_t me_mapsize; /**< size of the data memory map */
off_t me_size; /**< current file size */
pgno_t me_maxpg; /**< me_mapsize / me_psize */
- unsigned int me_psize; /**< size of a page, from #GetPageSize */
+ unsigned int me_psize; /**< size of a page, from #GET_PAGESIZE */
unsigned int me_db_toggle; /**< which DB table is current */
MDB_dbx *me_dbxs; /**< array of static DB info */
MDB_db *me_dbs[2]; /**< two arrays of MDB_db info */
/** ID2L of pages that were written during a write txn */
ID2 me_dirty_list[MDB_IDL_UM_SIZE];
/** rwlock for the DB tables, if #LAZY_LOCKS is false */
- LAZY_RWLOCK_DEF(me_dblock);
+ LAZY_RWLOCK_DEF(me_dblock)
#ifdef _WIN32
HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */
HANDLE me_wmutex;
* @param[in] buf the buffer to write into. Should always be #DKBUF.
* @return The key in hexadecimal form.
*/
-static char *
+char *
mdb_dkey(MDB_val *key, char *buf)
{
char *ptr = buf;
DPUTS("writing new meta page");
- GetPageSize(psize);
+ GET_PAGESIZE(psize);
meta->mm_magic = MDB_MAGIC;
meta->mm_version = MDB_VERSION;
unsigned short *u, *c;
int x;
- u = a->mv_data + a->mv_size;
- c = b->mv_data + a->mv_size;
+ u = (unsigned short *) ((char *) a->mv_data + a->mv_size);
+ c = (unsigned short *) ((char *) b->mv_data + a->mv_size);
do {
x = *--u - *--c;
} while(!x && u > (unsigned short *)a->mv_data);
static int
memncmp(const MDB_val *a, const MDB_val *b)
{
- int diff, len_diff;
+ int diff;
+ ssize_t len_diff;
unsigned int len;
len = a->mv_size;
- len_diff = a->mv_size - b->mv_size;
- if (len_diff > 0)
+ len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size;
+ if (len_diff > 0) {
len = b->mv_size;
+ len_diff = 1;
+ }
+
diff = memcmp(a->mv_data, b->mv_data, len);
- return diff ? diff : len_diff;
+ return diff ? diff : len_diff<0 ? -1 : len_diff;
}
static int
memnrcmp(const MDB_val *a, const MDB_val *b)
{
const unsigned char *p1, *p2, *p1_lim;
- int diff, len_diff;
-
- if (b->mv_size == 0)
- return a->mv_size != 0;
- if (a->mv_size == 0)
- return -1;
+ ssize_t len_diff;
+ int diff;
- p1 = (const unsigned char *)a->mv_data + a->mv_size - 1;
- p2 = (const unsigned char *)b->mv_data + b->mv_size - 1;
+ p1_lim = (const unsigned char *)a->mv_data;
+ p1 = (const unsigned char *)a->mv_data + a->mv_size;
+ p2 = (const unsigned char *)b->mv_data + b->mv_size;
- len_diff = a->mv_size - b->mv_size;
- if (len_diff < 0)
- p1_lim = p1 - a->mv_size;
- else
- p1_lim = p1 - b->mv_size;
+ len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size;
+ if (len_diff > 0) {
+ p1_lim += len_diff;
+ len_diff = 1;
+ }
while (p1 > p1_lim) {
- diff = *p1 - *p2;
+ diff = *--p1 - *--p2;
if (diff)
return diff;
- p1--;
- p2--;
}
- return len_diff;
+ return len_diff<0 ? -1 : len_diff;
}
/* Search for key within a leaf page, using binary search.
goto top;
} else {
int exact = 0;
- rc = mdb_cursor_set(mc, key, NULL, MDB_SET, &exact);
+ MDB_val d2;
+ rc = mdb_cursor_set(mc, key, &d2, MDB_SET, &exact);
if (flags == MDB_NOOVERWRITE && rc == 0) {
DPRINTF("duplicate key [%s]", DKEY(key));
+ *data = d2;
return MDB_KEYEXIST;
}
if (rc && rc != MDB_NOTFOUND)