#endif
#include <sys/types.h>
#include <sys/stat.h>
-#include <sys/param.h>
#ifdef _WIN32
#include <windows.h>
+/** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it
+ * as int64 which is wrong. MSVC doesn't define it at all, so just
+ * don't use it.
+ */
+#define MDB_PID_T int
+#ifdef __GNUC__
+# include <sys/param.h>
#else
+# define LITTLE_ENDIAN 1234
+# define BIG_ENDIAN 4321
+# define BYTE_ORDER LITTLE_ENDIAN
+# ifndef SSIZE_MAX
+# define SSIZE_MAX INT_MAX
+# endif
+#endif
+#else
+#define MDB_PID_T pid_t
+#include <sys/param.h>
#include <sys/uio.h>
#include <sys/mman.h>
#ifdef HAVE_SYS_FILE_H
/** Test if the flags \b f are set in a flag word \b w. */
#define F_ISSET(w, f) (((w) & (f)) == (f))
+ /** Round \b n up to an even number. */
+#define EVEN(n) (((n) + 1U) & -2) /* sign-extending -2 to match n+1U */
+
/** Used for offsets within a single page.
* Since memory pages are typically 4 or 8KB in size, 12-13 bits,
* this is plenty.
*/
txnid_t mrb_txnid;
/** The process ID of the process owning this reader txn. */
- pid_t mrb_pid;
+ MDB_PID_T mrb_pid;
/** The thread ID of the thread owning this txn. */
pthread_t mrb_tid;
} MDB_rxbody;
unsigned int me_numreaders; /**< max numreaders set by this env */
MDB_dbi me_numdbs; /**< number of DBs opened */
MDB_dbi me_maxdbs; /**< size of the DB table */
- pid_t me_pid; /**< process ID of this env */
+ MDB_PID_T me_pid; /**< process ID of this env */
char *me_path; /**< path to the DB files */
char *me_map; /**< the memory map of the data file */
MDB_txninfo *me_txns; /**< the memory map of the lock file or NULL */
mdb_page_list(MDB_page *mp)
{
MDB_node *node;
- unsigned int i, nkeys, nsize;
+ unsigned int i, nkeys, nsize, total = 0;
MDB_val key;
DKBUF;
node = NODEPTR(mp, i);
key.mv_size = node->mn_ksize;
key.mv_data = node->mn_data;
- nsize = NODESIZE + NODEKSZ(node) + sizeof(indx_t);
+ nsize = NODESIZE + key.mv_size;
if (IS_BRANCH(mp)) {
fprintf(stderr, "key %d: page %"Z"u, %s\n", i, NODEPGNO(node),
DKEY(&key));
+ total += nsize;
} else {
if (F_ISSET(node->mn_flags, F_BIGDATA))
nsize += sizeof(pgno_t);
else
nsize += NODEDSZ(node);
+ total += nsize;
+ nsize += sizeof(indx_t);
fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key));
}
+ total = EVEN(total);
}
+ fprintf(stderr, "Total: %d\n", total);
}
void
MDB_env *env = txn->mt_env;
MDB_page *ret = env->me_dpages;
size_t psize = env->me_psize, sz = psize, off;
- /* For #MDB_CLEANMEM, psize counts how much to init.
+ /* For ! #MDB_NOMEMINIT, psize counts how much to init.
* For a single page alloc, we init everything after the page header.
* For multi-page, we init the final page; if the caller needed that
* many pages they will be filling in at least up to the last page.
off = sz - psize;
}
if ((ret = malloc(sz)) != NULL) {
- if (env->me_flags & MDB_CLEANMEM) {
+ if (!(env->me_flags & MDB_NOMEMINIT)) {
memset((char *)ret + off, 0, psize);
ret->mp_pad = 0;
}
#else
enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ };
#endif
- int rc, n2 = num-1, retry = Max_retries;
+ int rc, retry = Max_retries;
MDB_txn *txn = mc->mc_txn;
MDB_env *env = txn->mt_env;
pgno_t pgno, *mop = env->me_pghead;
- unsigned i, j, k, mop_len = mop ? mop[0] : 0;
+ unsigned i, j, k, mop_len = mop ? mop[0] : 0, n2 = num-1;
MDB_page *np;
txnid_t oldest = 0, last;
MDB_cursor_op op;
/* Seek a big enough contiguous page range. Prefer
* pages at the tail, just truncating the list.
*/
- if (mop_len >= (unsigned)num) {
+ if (mop_len > n2) {
i = mop_len;
do {
pgno = mop[i];
if (mop[i-n2] == pgno+n2)
goto search_done;
- } while (--i >= (unsigned)num);
+ } while (--i > n2);
if (Max_retries < INT_MAX && --retry < 0)
break;
}
* lock on the lockfile, set at an offset equal to the pid.
*/
static int
-mdb_reader_pid(MDB_env *env, enum Pidlock_op op, pid_t pid)
+mdb_reader_pid(MDB_env *env, enum Pidlock_op op, MDB_PID_T pid)
{
#if !(MDB_PIDLOCK) /* Currently the same as defined(_WIN32) */
int ret = 0;
if (r->mr_pid != env->me_pid || r->mr_txnid != (txnid_t)-1)
return MDB_BAD_RSLOT;
} else {
- pid_t pid = env->me_pid;
+ MDB_PID_T pid = env->me_pid;
pthread_t tid = pthread_self();
if (!(env->me_flags & MDB_LIVE_READER)) {
return rc;
}
- /* MDB_RESERVE cancels CLEANMEM in ovpage malloc (when no WRITEMAP) */
- clean_limit = (env->me_flags & (MDB_CLEANMEM|MDB_WRITEMAP)) == MDB_CLEANMEM
- ? maxfree_1pg : SSIZE_MAX;
+ /* MDB_RESERVE cancels meminit in ovpage malloc (when no WRITEMAP) */
+ clean_limit = (env->me_flags & (MDB_NOMEMINIT|MDB_WRITEMAP))
+ ? SSIZE_MAX : maxfree_1pg;
for (;;) {
/* Come back here after each Put() in case freelist changed */
#pragma comment(linker, "/INCLUDE:_tls_used")
#pragma comment(linker, "/INCLUDE:mdb_tls_cbp")
#pragma const_seg(".CRT$XLB")
-extern const PIMAGE_TLS_CALLBACK mdb_tls_callback;
+extern const PIMAGE_TLS_CALLBACK mdb_tls_cbp;
const PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback;
#pragma const_seg()
#else /* WIN32 */
rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo);
if (size < rsize && *excl > 0) {
#ifdef _WIN32
- if (SetFilePointer(env->me_lfd, rsize, NULL, FILE_BEGIN) != rsize
+ if (SetFilePointer(env->me_lfd, rsize, NULL, FILE_BEGIN) != (DWORD)rsize
|| !SetEndOfFile(env->me_lfd))
goto fail_errno;
#else
* at runtime. Changing other flags requires closing the
* environment and re-opening it with the new flags.
*/
-#define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_CLEANMEM)
+#define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT)
#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY|MDB_WRITEMAP| \
MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD)
if (env->me_fd != INVALID_HANDLE_VALUE)
(void) close(env->me_fd);
if (env->me_txns) {
- pid_t pid = env->me_pid;
+ MDB_PID_T pid = env->me_pid;
/* Clearing readers is done in this function because
* me_txkey with its destructor must be disabled first.
*/
if (op == MDB_GET_BOTH || rc > 0)
return MDB_NOTFOUND;
rc = 0;
+ *data = d2;
}
} else {
if (flags == MDB_CURRENT)
goto current;
- dkey = olddata;
#if UINT_MAX < SIZE_MAX
- if (mc->mc_dbx->md_dcmp == mdb_cmp_int && dkey.mv_size == sizeof(size_t))
+ if (mc->mc_dbx->md_dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t))
#ifdef MISALIGNED_OK
mc->mc_dbx->md_dcmp = mdb_cmp_long;
#else
#endif
#endif
/* if data matches, skip it */
- if (!mc->mc_dbx->md_dcmp(data, &dkey)) {
+ if (!mc->mc_dbx->md_dcmp(data, &olddata)) {
if (flags & MDB_NODUPDATA)
rc = MDB_KEYEXIST;
else if (flags & MDB_MULTIPLE)
return rc;
}
+ /* Back up original data item */
+ dkey.mv_size = olddata.mv_size;
+ dkey.mv_data = memcpy(dbuf, olddata.mv_data, olddata.mv_size);
+
/* create a fake page for the dup items */
- memcpy(dbuf, dkey.mv_data, dkey.mv_size);
- dkey.mv_data = dbuf;
fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP;
fp->mp_lower = PAGEHDRSZ;
xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size;
switch (flags) {
default:
if (!(mc->mc_db->md_flags & MDB_DUPFIXED)) {
- offset = NODESIZE + sizeof(indx_t) + data->mv_size;
- offset += offset & 1;
+ offset = EVEN(NODESIZE + sizeof(indx_t) +
+ data->mv_size);
break;
}
offset = fp->mp_pad;
/* put on overflow page */
sz -= data->mv_size - sizeof(pgno_t);
}
- sz += sz & 1;
- return sz + sizeof(indx_t);
+ return EVEN(sz + sizeof(indx_t));
}
/** Calculate the size of a branch node.
/* Put data on overflow page. */
DPRINTF(("data size is %"Z"u, node would be %"Z"u, put data on overflow page",
data->mv_size, node_size+data->mv_size));
- node_size += sizeof(pgno_t) + (node_size & 1);
+ node_size = EVEN(node_size + sizeof(pgno_t));
if ((ssize_t)node_size > room)
goto full;
if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp)))
node_size += data->mv_size;
}
}
- node_size += node_size & 1;
+ node_size = EVEN(node_size);
if ((ssize_t)node_size > room)
goto full;
else
sz += NODEDSZ(node);
}
- sz += sz & 1;
+ sz = EVEN(sz);
ptr = mp->mp_ptrs[indx];
numkeys = NUMKEYS(mp);
MDB_node *node;
MDB_page *sp, *xp;
char *base;
- int osize, nsize;
- int delta;
+ int nsize, delta;
indx_t i, numkeys, ptr;
node = NODEPTR(mp, indx);
sp = (MDB_page *)NODEDATA(node);
- osize = NODEDSZ(node);
-
- delta = sp->mp_upper - sp->mp_lower;
- SETDSZ(node, osize - delta);
+ delta = SIZELEFT(sp);
xp = (MDB_page *)((char *)sp + delta);
/* shift subpage upward */
if (IS_LEAF2(sp)) {
nsize = NUMKEYS(sp) * sp->mp_pad;
+ if (nsize & 1)
+ return; /* do not make the node uneven-sized */
memmove(METADATA(xp), METADATA(sp), nsize);
} else {
int i;
- nsize = osize - sp->mp_upper;
numkeys = NUMKEYS(sp);
for (i=numkeys-1; i>=0; i--)
xp->mp_ptrs[i] = sp->mp_ptrs[i] - delta;
xp->mp_pad = sp->mp_pad;
COPY_PGNO(xp->mp_pgno, mp->mp_pgno);
+ nsize = NODEDSZ(node) - delta;
+ SETDSZ(node, nsize);
+
/* shift lower nodes upward */
ptr = mp->mp_ptrs[indx];
numkeys = NUMKEYS(mp);
MDB_node *node;
char *base;
size_t len;
- int delta, delta0;
+ int delta, ksize, oksize;
indx_t ptr, i, numkeys, indx;
DKBUF;
}
#endif
- delta0 = delta = key->mv_size - node->mn_ksize;
+ /* Sizes must be 2-byte aligned. */
+ ksize = EVEN(key->mv_size);
+ oksize = EVEN(node->mn_ksize);
+ delta = ksize - oksize;
- /* Must be 2-byte aligned. If new key is
- * shorter by 1, the shift will be skipped.
- */
- delta += (delta & 1);
+ /* Shift node contents if EVEN(key length) changed. */
if (delta) {
if (delta > 0 && SIZELEFT(mp) < delta) {
pgno_t pgno;
}
/* But even if no shift was needed, update ksize */
- if (delta0)
+ if (node->mn_ksize != key->mv_size)
node->mn_ksize = key->mv_size;
if (key->mv_size)
return rc;
if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
- srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); /* fake */
key.mv_size = csrc->mc_db->md_pad;
key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size);
data.mv_size = 0;
flags = 0;
} else {
srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top]);
- assert(!((long)srcnode&1));
+ assert(!((size_t)srcnode&1));
srcpg = NODEPGNO(srcnode);
flags = srcnode->mn_flags;
if (csrc->mc_ki[csrc->mc_top] == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) {
nsize = mdb_leaf_size(env, newkey, newdata);
else
nsize = mdb_branch_size(env, newkey);
- nsize += nsize & 1;
+ nsize = EVEN(nsize);
/* grab a page to hold a temporary copy */
copy = mdb_page_malloc(mc->mc_txn, 1);
else
psize += NODEDSZ(node);
}
- psize += psize & 1;
+ psize = EVEN(psize);
}
if (psize > pmax || i == k-j) {
split_indx = i + (j<0);
/** Insert pid into list if not already present.
* return -1 if already present.
*/
-static int mdb_pid_insert(pid_t *ids, pid_t pid)
+static int mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid)
{
/* binary search of pid in list */
unsigned base = 0;
{
unsigned int i, j, rdrs;
MDB_reader *mr;
- pid_t *pids, pid;
+ MDB_PID_T *pids, pid;
int count = 0;
if (!env)
if (!env->me_txns)
return MDB_SUCCESS;
rdrs = env->me_txns->mti_numreaders;
- pids = malloc((rdrs+1) * sizeof(pid_t));
+ pids = malloc((rdrs+1) * sizeof(MDB_PID_T));
if (!pids)
return ENOMEM;
pids[0] = 0;
mr = env->me_txns->mti_readers;
- j = 0;
for (i=0; i<rdrs; i++) {
if (mr[i].mr_pid && mr[i].mr_pid != env->me_pid) {
pid = mr[i].mr_pid;