#define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ))
+/* We guarantee 2-byte alignment for nodes */
typedef struct MDB_node {
-#define mn_pgno mn_p.np_pgno
-#define mn_dsize mn_p.np_dsize
- union {
- pgno_t np_pgno; /* child page number */
- uint32_t np_dsize; /* leaf data size */
- } mn_p;
- unsigned int mn_flags:4;
- unsigned int mn_ksize:12; /* key size */
+ /* lo and hi are used for data size on leaf nodes and for
+ * child pgno on branch nodes. On 64 bit platforms, flags
+ * is also used for pgno. (branch nodes ignore flags)
+ */
+ unsigned short mn_lo;
+ unsigned short mn_hi;
+ unsigned short mn_flags;
#define F_BIGDATA 0x01 /* data put on overflow page */
#define F_SUBDATA 0x02 /* data is a sub-database */
#define F_DUPDATA 0x04 /* data has duplicates */
+ unsigned short mn_ksize; /* key size */
char mn_data[1];
} MDB_node;
pthread_key_t me_txkey; /* thread-key for readers */
MDB_dpage *me_dpages;
pgno_t me_free_pgs[MDB_IDL_UM_SIZE];
- ID2 me_dirty_list[MDB_IDL_DB_SIZE];
+ ID2 me_dirty_list[MDB_IDL_UM_SIZE];
LAZY_RWLOCK_DEF(me_dblock);
#ifdef _WIN32
HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */
#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i]))
#define NODEKEY(node) (void *)((node)->mn_data)
#define NODEDATA(node) (void *)((char *)(node)->mn_data + (node)->mn_ksize)
-#define NODEPGNO(node) ((node)->mn_pgno)
-#define NODEDSZ(node) ((node)->mn_dsize)
+#if LONG_MAX == 0x7fffffff
+#define NODEPGNO(node) ((node)->mn_lo | ((node)->mn_hi << 16))
+#define SETPGNO(node,pgno) do { \
+ (node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi = (pgno) >> 16;} while(0)
+#else
+#define NODEPGNO(node) ((node)->mn_lo | ((node)->mn_hi << 16) | ((unsigned long)(node)->mn_flags << 32))
+#define SETPGNO(node,pgno) do { \
+ (node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi = (pgno) >> 16; \
+ (node)->mn_flags = (pgno) >> 32; } while(0)
+#endif
+#define NODEDSZ(node) ((node)->mn_lo | ((unsigned)(node)->mn_hi << 16))
+#define SETDSZ(node,size) do { \
+ (node)->mn_lo = (size) & 0xffff; (node)->mn_hi = (size) >> 16;} while(0)
#define NODEKSZ(node) ((node)->mn_ksize)
#define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks)))
static void mdb_del_node(MDB_page *mp, indx_t indx, int ksize);
static int mdb_del0(MDB_cursor *mc, unsigned int ki,
MDB_pageparent *mpp, MDB_node *leaf);
-#if 0
-static int mdb_put0(MDB_txn *txn, MDB_dbi dbi,
- MDB_val *key, MDB_val *data, unsigned int flags);
-#endif
static int mdb_read_data(MDB_txn *txn, MDB_node *leaf, MDB_val *data);
static int mdb_rebalance(MDB_txn *txn, MDB_dbi dbi, MDB_pageparent *mp);
MDB_val *data);
static size_t mdb_branch_size(MDB_env *env, MDB_val *key);
-static int memncmp(const void *s1, size_t n1,
- const void *s2, size_t n2);
-static int memnrcmp(const void *s1, size_t n1,
- const void *s2, size_t n2);
+static void mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi);
+
+static MDB_cmp_func memncmp, memnrcmp, intcmp, cintcmp;
#ifdef _WIN32
static SECURITY_DESCRIPTOR mdb_null_sd;
static int mdb_sec_inited;
#endif
-static int
-memncmp(const void *s1, size_t n1, const void *s2, size_t n2)
-{
- int diff, len_diff = -1;
-
- if (n1 >= n2) {
- len_diff = (n1 > n2);
- n1 = n2;
- }
- diff = memcmp(s1, s2, n1);
- return diff ? diff : len_diff;
-}
-
-static int
-memnrcmp(const void *s1, size_t n1, const void *s2, size_t n2)
-{
- const unsigned char *p1, *p2, *p1_lim;
-
- if (n2 == 0)
- return n1 != 0;
- if (n1 == 0)
- return -1;
-
- p1 = (const unsigned char *)s1 + n1 - 1;
- p2 = (const unsigned char *)s2 + n2 - 1;
-
- for (p1_lim = (n1 <= n2 ? s1 : s2); *p1 == *p2; p1--, p2--) {
- if (p1 == p1_lim)
- return (p1 != s1) ? (p1 != p2) : (p2 != s2) ? -1 : 0;
- }
- return *p1 - *p2;
-}
-
char *
mdb_version(int *maj, int *min, int *pat)
{
unsigned int i;
if (key->mv_size > MAXKEYSIZE)
return "MAXKEYSIZE";
-#if 0
+#if 1
for (i=0; i<key->mv_size; i++)
ptr += sprintf(ptr, "%02x", *c++);
#else
int
mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b)
{
- if (txn->mt_dbxs[dbi].md_cmp)
- return txn->mt_dbxs[dbi].md_cmp(a, b);
-
- if (txn->mt_dbs[dbi].md_flags & (MDB_REVERSEKEY
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- |MDB_INTEGERKEY
-#endif
- ))
- return memnrcmp(a->mv_data, a->mv_size, b->mv_data, b->mv_size);
- else
- return memncmp((char *)a->mv_data, a->mv_size, b->mv_data, b->mv_size);
+ return txn->mt_dbxs[dbi].md_cmp(a, b);
}
int
{
if (txn->mt_dbxs[dbi].md_dcmp)
return txn->mt_dbxs[dbi].md_dcmp(a, b);
-
- if (txn->mt_dbs[dbi].md_flags & (0
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- |MDB_INTEGERDUP
-#endif
- ))
- return memnrcmp(a->mv_data, a->mv_size, b->mv_data, b->mv_size);
else
- return memncmp((char *)a->mv_data, a->mv_size, b->mv_data, b->mv_size);
+ return EINVAL; /* too bad you can't distinguish this from a valid result */
}
/* Allocate new page(s) for writing */
return ENOMEM;
DPRINTF("touched db %u page %lu -> %lu", dbi, mp->mp_pgno, dp->p.mp_pgno);
assert(mp->mp_pgno != dp->p.mp_pgno);
- mdb_midl_insert(txn->mt_free_pgs, mp->mp_pgno);
+ mdb_midl_append(txn->mt_free_pgs, mp->mp_pgno);
pgno = dp->p.mp_pgno;
memcpy(&dp->p, mp, txn->mt_env->me_psize);
mp = &dp->p;
/* Update the page number to new touched page. */
if (pp->mp_parent != NULL)
- NODEPGNO(NODEPTR(pp->mp_parent, pp->mp_pi)) = mp->mp_pgno;
+ SETPGNO(NODEPTR(pp->mp_parent, pp->mp_pi), mp->mp_pgno);
pp->mp_page = mp;
}
return 0;
mc.mc_snum = 0;
mdb_search_page(txn, FREE_DBI, &key, &mc, 1, &mpp);
+ mdb_midl_sort(txn->mt_free_pgs);
#if DEBUG > 1
{
unsigned int i;
free(env);
}
+/* only for aligned ints */
+static int
+intcmp(const MDB_val *a, const MDB_val *b)
+{
+ if (a->mv_size == sizeof(long))
+ {
+ unsigned long *la, *lb;
+ la = a->mv_data;
+ lb = b->mv_data;
+ return *la - *lb;
+ } else {
+ unsigned int *ia, *ib;
+ ia = a->mv_data;
+ ib = b->mv_data;
+ return *ia - *ib;
+ }
+}
+
+/* ints must always be the same size */
+static int
+cintcmp(const MDB_val *a, const MDB_val *b)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ unsigned char *u, *c;
+ int x;
+
+ u = a->mv_data + a->mv_size;
+ c = b->mv_data + a->mv_size;
+ while(u > (unsigned char *)a->mv_data) {
+ x = *--u - *--c;
+ if (x) break;
+ }
+ return x;
+#else
+ return memcmp(a->mv_data, b->mv_data, a->mv_size);
+#endif
+}
+
+static int
+memncmp(const MDB_val *a, const MDB_val *b)
+{
+ int diff, len_diff;
+ unsigned int len;
+
+ len = a->mv_size;
+ len_diff = a->mv_size - b->mv_size;
+ if (len_diff > 0)
+ len = b->mv_size;
+ diff = memcmp(a->mv_data, b->mv_data, len);
+ return diff ? diff : len_diff;
+}
+
+static int
+memnrcmp(const MDB_val *a, const MDB_val *b)
+{
+ const unsigned char *p1, *p2, *p1_lim;
+ int diff, len_diff;
+
+ if (b->mv_size == 0)
+ return a->mv_size != 0;
+ if (a->mv_size == 0)
+ return -1;
+
+ p1 = (const unsigned char *)a->mv_data + a->mv_size - 1;
+ p2 = (const unsigned char *)b->mv_data + b->mv_size - 1;
+
+ len_diff = a->mv_size - b->mv_size;
+ if (len_diff < 0)
+ p1_lim = p1 - a->mv_size;
+ else
+ p1_lim = p1 - b->mv_size;
+
+ while (p1 > p1_lim) {
+ diff = *p1 - *p2;
+ if (diff)
+ return diff;
+ p1--;
+ p2--;
+ }
+ return len_diff;
+}
+
/* Search for key within a leaf page, using binary search.
* Returns the smallest entry larger or equal to the key.
* If exactp is non-null, stores whether the found entry was an exact match
mdb_search_node(MDB_txn *txn, MDB_dbi dbi, MDB_page *mp, MDB_val *key,
int *exactp, unsigned int *kip)
{
- unsigned int i = 0;
+ unsigned int i = 0, nkeys;
int low, high;
int rc = 0;
MDB_node *node = NULL;
MDB_val nodekey;
+ MDB_cmp_func *cmp;
DKBUF;
+ nkeys = NUMKEYS(mp);
+
DPRINTF("searching %u keys in %s page %lu",
- NUMKEYS(mp),
- IS_LEAF(mp) ? "leaf" : "branch",
+ nkeys, IS_LEAF(mp) ? "leaf" : "branch",
mp->mp_pgno);
- assert(NUMKEYS(mp) > 0);
-
- memset(&nodekey, 0, sizeof(nodekey));
+ assert(nkeys > 0);
low = IS_LEAF(mp) ? 0 : 1;
- high = NUMKEYS(mp) - 1;
+ high = nkeys - 1;
+ cmp = txn->mt_dbxs[dbi].md_cmp;
+ if (IS_LEAF2(mp)) {
+ nodekey.mv_size = txn->mt_dbs[dbi].md_pad;
+ node = NODEPTR(mp, 0); /* fake */
+ }
while (low <= high) {
i = (low + high) >> 1;
if (IS_LEAF2(mp)) {
- nodekey.mv_size = txn->mt_dbs[dbi].md_pad;
nodekey.mv_data = LEAF2KEY(mp, i, nodekey.mv_size);
} else {
node = NODEPTR(mp, i);
nodekey.mv_data = NODEKEY(node);
}
- rc = mdb_cmp(txn, dbi, key, &nodekey);
+ rc = cmp(key, &nodekey);
+#if DEBUG
if (IS_LEAF(mp))
DPRINTF("found leaf index %u [%s], rc = %i",
i, DKEY(&nodekey), rc);
else
DPRINTF("found branch index %u [%s -> %lu], rc = %i",
i, DKEY(&nodekey), NODEPGNO(node), rc);
+#endif
if (rc == 0)
break;
if (rc > 0) { /* Found entry is less than the key. */
i++; /* Skip to get the smallest entry larger than key. */
+ if (!IS_LEAF2(mp))
+ node = NODEPTR(mp, i);
}
if (exactp)
*exactp = (rc == 0);
if (kip) /* Store the key index if requested. */
*kip = i;
- if (i >= NUMKEYS(mp))
+ if (i >= nkeys)
/* There is no entry larger or equal to the key. */
return NULL;
/* nodeptr is fake for LEAF2 */
- return IS_LEAF2(mp) ? NODEPTR(mp, 0) : NODEPTR(mp, i);
+ return node;
}
static void
return ENOMEM;
if (modify) {
- MDB_dhead *dh = ((MDB_dhead *)mp)-1;
+ MDB_dhead *dh;
if ((rc = mdb_touch(txn, dbi, mpp)) != 0)
return rc;
dh = ((MDB_dhead *)mpp->mp_page)-1;
int rc;
if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) {
- data->mv_size = leaf->mn_dsize;
+ data->mv_size = NODEDSZ(leaf);
data->mv_data = NODEDATA(leaf);
return MDB_SUCCESS;
}
/* Read overflow data.
*/
- data->mv_size = leaf->mn_dsize;
+ data->mv_size = NODEDSZ(leaf);
memcpy(&pgno, NODEDATA(leaf), sizeof(pgno));
if ((rc = mdb_get_page(txn, pgno, &omp))) {
DPRINTF("read overflow page %lu failed", pgno);
{
MDB_cursor mc;
MDB_xcursor mx;
- int exact;
+ int exact = 0;
DKBUF;
assert(key);
leaf = NODEPTR(top->mp_page, 0);
MDB_SET_KEY(leaf, &nodekey);
}
- rc = mdb_cmp(cursor->mc_txn, cursor->mc_dbi, key, &nodekey);
+ rc = cursor->mc_txn->mt_dbxs[cursor->mc_dbi].md_cmp(key, &nodekey);
if (rc == 0) {
+ /* Probably happens rarely, but first node on the page
+ * was the one we wanted.
+ */
+ top->mp_ki = 0;
set1:
- /* we're already on the right page */
mpp.mp_page = top->mp_page;
+ if (exactp)
+ *exactp = 1;
rc = 0;
- goto set2;
+ leaf = NODEPTR(top->mp_page, top->mp_ki);
+ goto set3;
}
if (rc > 0) {
unsigned int i;
leaf = NODEPTR(top->mp_page, NUMKEYS(top->mp_page)-1);
MDB_SET_KEY(leaf, &nodekey);
}
- rc = mdb_cmp(cursor->mc_txn, cursor->mc_dbi, key, &nodekey);
- if (rc <= 0) goto set1;
+ rc = cursor->mc_txn->mt_dbxs[cursor->mc_dbi].md_cmp(key, &nodekey);
+ if (rc == 0) {
+ /* last node was the one we wanted */
+ top->mp_ki = NUMKEYS(top->mp_page)-1;
+ goto set1;
+ }
+ if (rc < 0) {
+ /* This is definitely the right page, skip search_page */
+ mpp.mp_page = top->mp_page;
+ rc = 0;
+ goto set2;
+ }
}
/* If any parents have right-sibs, search.
* Otherwise, there's nothing further.
break;
if (i == cursor->mc_snum - 1) {
/* There are no other pages */
- goto set1;
+ top->mp_ki = NUMKEYS(top->mp_page);
+ return MDB_NOTFOUND;
}
}
}
leaf = NODEPTR(mpp.mp_page, 0);
}
+set3:
cursor->mc_flags |= C_INITIALIZED;
cursor->mc_flags &= ~C_EOF;
MDB_val d2;
if ((rc = mdb_read_data(cursor->mc_txn, leaf, &d2)) != MDB_SUCCESS)
return rc;
- rc = mdb_dcmp(cursor->mc_txn, cursor->mc_dbi, data, &d2);
+ rc = cursor->mc_txn->mt_dbxs[cursor->mc_dbi].md_dcmp(data, &d2);
if (rc) {
if (op == MDB_GET_BOTH || rc > 0)
return MDB_NOTFOUND;
if (rc == MDB_SUCCESS) {
/* there's only a key anyway, so this is a no-op */
if (IS_LEAF2(top->mp_page)) {
- int ksize = mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad;
+ unsigned int ksize = mc->mc_txn->mt_dbs[mc->mc_dbi].md_pad;
if (key->mv_size != ksize)
return EINVAL;
if (flags == MDB_CURRENT) {
MDB_pageparent mp2;
if (flags != MDB_NODUPDATA) {
-/* mdb_xcursor_init2(mc); */
+ mdb_xcursor_init2(mc);
rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, 0);
mdb_xcursor_fini(mc);
/* If sub-DB still has entries, we're done */
NULL, &mc->mc_xcursor->mx_cursor, 0, &mp2);
if (rc == MDB_SUCCESS) {
MDB_ppage *top, *parent;
- MDB_dpage *dp;
- ID2 mid;
MDB_node *ni;
unsigned int i;
+#if 0
+ MDB_dpage *dp;
+ ID2 mid;
int dirty_root = 0;
+#endif
mc->mc_txn->mt_dbs[mc->mc_dbi].md_entries -=
mc->mc_xcursor->mx_txn.mt_dbs[mc->mc_xcursor->mx_cursor.mc_dbi].md_entries;
#endif
{
/* free it */
- mdb_midl_insert(mc->mc_txn->mt_free_pgs, pg);
+ mdb_midl_append(mc->mc_txn->mt_free_pgs, pg);
}
}
rc = mdb_sibling(&mc->mc_xcursor->mx_cursor, 1);
#endif
{
/* free it */
- mdb_midl_insert(mc->mc_txn->mt_free_pgs,
+ mdb_midl_append(mc->mc_txn->mt_free_pgs,
mc->mc_xcursor->mx_txn.mt_dbs[mc->mc_xcursor->mx_cursor.mc_dbi].md_root);
}
}
/* put on overflow page */
sz -= data->mv_size - sizeof(pgno_t);
}
+ sz += sz & 1;
return sz + sizeof(indx_t);
}
node_size += data->mv_size;
}
}
+ node_size += node_size & 1;
if (node_size + sizeof(indx_t) > SIZELEFT(mp)) {
DPRINTF("not enough room in page %lu, got %u ptrs",
node->mn_ksize = (key == NULL) ? 0 : key->mv_size;
node->mn_flags = flags;
if (IS_LEAF(mp))
- node->mn_dsize = data->mv_size;
+ SETDSZ(node,data->mv_size);
else
- NODEPGNO(node) = pgno;
+ SETPGNO(node,pgno);
if (key)
memcpy(NODEKEY(node), key->mv_data, key->mv_size);
else
sz += NODEDSZ(node);
}
+ sz += sz & 1;
ptr = mp->mp_ptrs[indx];
numkeys = NUMKEYS(mp);
mx->mx_dbxs[dbn+1].md_rel = mx->mx_dbxs[dbn].md_rel;
mx->mx_dbxs[dbn+1].md_dirty = 0;
mx->mx_txn.mt_numdbs = dbn+2;
+ mx->mx_txn.mt_u = mc->mc_txn->mt_u;
mx->mx_cursor.mc_xcursor = NULL;
mx->mx_cursor.mc_txn = &mx->mx_txn;
mx->mx_dbs[1] = mc->mc_txn->mt_dbs[1];
if (mc->mc_dbi > 1) {
mx->mx_dbs[2] = mc->mc_txn->mt_dbs[mc->mc_dbi];
+ mx->mx_dbxs[2].md_dirty = mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dirty;
dbn = 3;
} else {
dbn = 2;
mx->mx_dbxs[dbn].md_name.mv_data = NODEKEY(node);
mx->mx_dbxs[dbn].md_name.mv_size = node->mn_ksize;
mx->mx_txn.mt_next_pgno = mc->mc_txn->mt_next_pgno;
- mx->mx_txn.mt_u = mc->mc_txn->mt_u;
mx->mx_cursor.mc_snum = 0;
mx->mx_cursor.mc_flags = 0;
}
mx->mx_dbs[1] = mc->mc_txn->mt_dbs[1];
if (mc->mc_dbi > 1) {
mx->mx_dbs[2] = mc->mc_txn->mt_dbs[mc->mc_dbi];
+ mx->mx_dbxs[2].md_dirty = mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dirty;
dbn = 3;
} else {
dbn = 2;
mc->mc_txn->mt_next_pgno = mx->mx_txn.mt_next_pgno;
mc->mc_txn->mt_dbs[0] = mx->mx_dbs[0];
mc->mc_txn->mt_dbs[1] = mx->mx_dbs[1];
- mc->mc_txn->mt_dbxs[0].md_dirty = mx->mx_dbxs[0].md_dirty;
- mc->mc_txn->mt_dbxs[1].md_dirty = mx->mx_dbxs[1].md_dirty;
if (mc->mc_dbi > 1) {
mc->mc_txn->mt_dbs[mc->mc_dbi] = mx->mx_dbs[2];
mc->mc_txn->mt_dbxs[mc->mc_dbi].md_dirty = mx->mx_dbxs[2].md_dirty;
data.mv_size = 0;
data.mv_data = NULL;
} else {
- srcnode = NODEPTR(src->mp_page, srcindx);
+ if (srcindx == 0 && IS_BRANCH(src->mp_page)) {
+ /* must find the lowest key below src */
+ MDB_pageparent mpp;
+ mpp.mp_page = src->mp_page;
+ mpp.mp_pi = 0;
+ mdb_search_page_root(txn, dbi, NULL, NULL, 0, &mpp);
+ srcnode = NODEPTR(mpp.mp_page, 0);
+ } else {
+ srcnode = NODEPTR(src->mp_page, srcindx);
+ }
key.mv_size = NODEKSZ(srcnode);
key.mv_data = NODEKEY(srcnode);
data.mv_size = NODEDSZ(srcnode);
*/
mdb_del_node(src->mp_page, srcindx, key.mv_size);
- /* The key value just changed due to del_node, find it again.
- */
- if (!IS_LEAF2(src->mp_page)) {
- srcnode = NODEPTR(src->mp_page, srcindx);
- key.mv_data = NODEKEY(srcnode);
- }
-
/* Update the parent separators.
*/
if (srcindx == 0) {
if (src->mp_pi != 0) {
+ if (IS_LEAF2(src->mp_page)) {
+ key.mv_data = LEAF2KEY(src->mp_page, srcindx, key.mv_size);
+ } else {
+ srcnode = NODEPTR(src->mp_page, srcindx);
+ key.mv_size = NODEKSZ(srcnode);
+ key.mv_data = NODEKEY(srcnode);
+ }
DPRINTF("update separator for source page %lu to [%s]",
src->mp_page->mp_pgno, DKEY(&key));
if ((rc = mdb_update_key(src->mp_parent, src->mp_pi,
if (dstindx == 0) {
if (dst->mp_pi != 0) {
+ if (IS_LEAF2(src->mp_page)) {
+ key.mv_data = LEAF2KEY(dst->mp_page, 0, key.mv_size);
+ } else {
+ srcnode = NODEPTR(dst->mp_page, 0);
+ key.mv_size = NODEKSZ(srcnode);
+ key.mv_data = NODEKEY(srcnode);
+ }
DPRINTF("update separator for destination page %lu to [%s]",
dst->mp_page->mp_pgno, DKEY(&key));
if ((rc = mdb_update_key(dst->mp_parent, dst->mp_pi,
txn->mt_dbs[dbi].md_root = P_INVALID;
txn->mt_dbs[dbi].md_depth = 0;
txn->mt_dbs[dbi].md_leaf_pages = 0;
- mdb_midl_insert(txn->mt_free_pgs, mpp->mp_page->mp_pgno);
+ mdb_midl_append(txn->mt_free_pgs, mpp->mp_page->mp_pgno);
} else if (IS_BRANCH(mpp->mp_page) && NUMKEYS(mpp->mp_page) == 1) {
DPUTS("collapsing root page!");
- mdb_midl_insert(txn->mt_free_pgs, mpp->mp_page->mp_pgno);
+ mdb_midl_append(txn->mt_free_pgs, mpp->mp_page->mp_pgno);
txn->mt_dbs[dbi].md_root = NODEPGNO(NODEPTR(mpp->mp_page, 0));
if ((rc = mdb_get_page(txn, txn->mt_dbs[dbi].md_root, &root)))
return rc;
ovpages = OVPAGES(NODEDSZ(leaf), mc->mc_txn->mt_env->me_psize);
for (i=0; i<ovpages; i++) {
DPRINTF("freed ov page %lu", pg);
- mdb_midl_insert(mc->mc_txn->mt_free_pgs, pg);
+ mdb_midl_append(mc->mc_txn->mt_free_pgs, pg);
pg++;
}
}
psize += sizeof(pgno_t);
else
psize += NODEDSZ(node);
+ psize += psize & 1;
if (psize > pmax) {
split_indx = i;
break;
psize += sizeof(pgno_t);
else
psize += NODEDSZ(node);
+ psize += psize & 1;
if (psize > pmax) {
split_indx = i+1;
break;
rkey.mv_size = node->mn_ksize;
if (IS_LEAF(&mdp->p)) {
rdata.mv_data = NODEDATA(node);
- rdata.mv_size = node->mn_dsize;
+ rdata.mv_size = NODEDSZ(node);
} else
pgno = NODEPGNO(node);
flags = node->mn_flags;
return rc;
}
-#if 0
-static int
-mdb_put0(MDB_txn *txn, MDB_dbi dbi,
- MDB_val *key, MDB_val *data, unsigned int flags)
-{
- int rc = MDB_SUCCESS, exact;
- unsigned int ki;
- MDB_node *leaf;
- MDB_pageparent mpp;
- MDB_val xdata, *rdata, dkey;
- MDB_db dummy;
- char dbuf[PAGESIZE];
- int do_sub = 0;
- size_t nsize;
- DKBUF;
-
- DPRINTF("==> put db %u key [%s], size %zu, data size %zu",
- dbi, DKEY(key), key->mv_size, data->mv_size);
-
- dkey.mv_size = 0;
- mpp.mp_parent = NULL;
- mpp.mp_pi = 0;
- rc = mdb_search_page(txn, dbi, key, NULL, 1, &mpp);
- if (rc == MDB_SUCCESS) {
- leaf = mdb_search_node(txn, dbi, mpp.mp_page, key, &exact, &ki);
- if (leaf && exact) {
- if (flags == MDB_NOOVERWRITE) {
- DPRINTF("duplicate key [%s]", DKEY(key));
- return MDB_KEYEXIST;
- }
- /* there's only a key anyway, so this is a no-op */
- if (IS_LEAF2(mpp.mp_page))
- return MDB_SUCCESS;
-
- if (F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
- /* Was a single item before, must convert now */
- if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
- dkey.mv_size = NODEDSZ(leaf);
- dkey.mv_data = dbuf;
- memcpy(dbuf, NODEDATA(leaf), dkey.mv_size);
- /* data matches, ignore it */
- if (!mdb_dcmp(txn, dbi, data, &dkey))
- return (flags == MDB_NODUPDATA) ? MDB_KEYEXIST : MDB_SUCCESS;
- memset(&dummy, 0, sizeof(dummy));
- if (txn->mt_dbs[dbi].md_flags & MDB_DUPFIXED) {
- dummy.md_pad = data->mv_size;
- dummy.md_flags = MDB_DUPFIXED;
- if (txn->mt_dbs[dbi].md_flags & MDB_INTEGERDUP)
- dummy.md_flags |= MDB_INTEGERKEY;
- }
- dummy.md_root = P_INVALID;
- if (dkey.mv_size == sizeof(MDB_db)) {
- memcpy(NODEDATA(leaf), &dummy, sizeof(dummy));
- goto put_sub;
- }
- mdb_del_node(mpp.mp_page, ki, 0);
- do_sub = 1;
- rdata = &xdata;
- xdata.mv_size = sizeof(MDB_db);
- xdata.mv_data = &dummy;
- goto new_sub;
- }
- goto put_sub;
- }
- /* same size, just replace it */
- if (!F_ISSET(leaf->mn_flags, F_BIGDATA) &&
- NODEDSZ(leaf) == data->mv_size) {
- memcpy(NODEDATA(leaf), data->mv_data, data->mv_size);
- goto done;
- }
- mdb_del_node(mpp.mp_page, ki, 0);
- }
- if (leaf == NULL) { /* append if not found */
- ki = NUMKEYS(mpp.mp_page);
- DPRINTF("appending key at index %i", ki);
- }
- } else if (rc == MDB_NOTFOUND) {
- MDB_dpage *dp;
- /* new file, just write a root leaf page */
- DPUTS("allocating new root leaf page");
- if ((dp = mdb_new_page(txn, dbi, P_LEAF, 1)) == NULL) {
- return ENOMEM;
- }
- mpp.mp_page = &dp->p;
- txn->mt_dbs[dbi].md_root = mpp.mp_page->mp_pgno;
- txn->mt_dbs[dbi].md_depth++;
- txn->mt_dbxs[dbi].md_dirty = 1;
- if ((txn->mt_dbs[dbi].md_flags & (MDB_DUPSORT|MDB_DUPFIXED)) == MDB_DUPFIXED)
- mpp.mp_page->mp_flags |= P_LEAF2;
- ki = 0;
- }
- else
- goto done;
-
- assert(IS_LEAF(mpp.mp_page));
- DPRINTF("there are %u keys, should insert new key at index %i",
- NUMKEYS(mpp.mp_page), ki);
-
- rdata = data;
-
-new_sub:
- nsize = IS_LEAF2(mpp.mp_page) ? key->mv_size : mdb_leaf_size(txn->mt_env, key, rdata);
- if (SIZELEFT(mpp.mp_page) < nsize) {
- rc = mdb_split(txn, dbi, &mpp.mp_page, &ki, key, rdata, P_INVALID);
- } else {
- /* There is room already in this leaf page. */
- rc = mdb_add_node(txn, dbi, mpp.mp_page, ki, key, rdata, 0, 0);
- }
-
- if (rc != MDB_SUCCESS)
- txn->mt_flags |= MDB_TXN_ERROR;
- else {
- /* Remember if we just added a subdatabase */
- if (flags & F_SUBDATA) {
- leaf = NODEPTR(mpp.mp_page, ki);
- leaf->mn_flags |= F_SUBDATA;
- }
-
- /* Now store the actual data in the child DB. Note that we're
- * storing the user data in the keys field, so there are strict
- * size limits on dupdata. The actual data fields of the child
- * DB are all zero size.
- */
- if (do_sub) {
- MDB_cursor mc;
- MDB_xcursor mx;
-
- leaf = NODEPTR(mpp.mp_page, ki);
-put_sub:
- mc.mc_txn = txn;
- mc.mc_dbi = dbi;
- mc.mc_flags = 0;
- mc.mc_xcursor = &mx;
- mdb_xcursor_init0(&mc);
- mdb_xcursor_init1(txn, dbi, &mx, mpp.mp_page, leaf);
- xdata.mv_size = 0;
- xdata.mv_data = "";
- if (flags == MDB_NODUPDATA)
- flags = MDB_NOOVERWRITE;
- /* converted, write the original data first */
- if (dkey.mv_size) {
- rc = mdb_put0(&mx.mx_txn, mx.mx_cursor.mc_dbi, &dkey, &xdata, flags);
- if (rc) return rc;
- leaf->mn_flags |= F_DUPDATA;
- }
- rc = mdb_put0(&mx.mx_txn, mx.mx_cursor.mc_dbi, data, &xdata, flags);
- mdb_xcursor_fini(&mc);
- memcpy(NODEDATA(leaf), &mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi],
- sizeof(MDB_db));
- }
- txn->mt_dbs[dbi].md_entries++;
- }
-
-done:
- return rc;
-}
-#endif
-
int
mdb_put(MDB_txn *txn, MDB_dbi dbi,
MDB_val *key, MDB_val *data, unsigned int flags)
{
MDB_cursor mc;
MDB_xcursor mx;
- int rc;
assert(key != NULL);
assert(data != NULL);
return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg);
}
+static void
+mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi)
+{
+ if (txn->mt_dbs[dbi].md_flags & MDB_REVERSEKEY)
+ txn->mt_dbxs[dbi].md_cmp = memnrcmp;
+ else if (txn->mt_dbs[dbi].md_flags & MDB_INTEGERKEY)
+ txn->mt_dbxs[dbi].md_cmp = cintcmp;
+ else
+ txn->mt_dbxs[dbi].md_cmp = memncmp;
+
+ if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) {
+ if (txn->mt_dbs[dbi].md_flags & MDB_INTEGERDUP) {
+ if (txn->mt_dbs[dbi].md_flags & MDB_DUPFIXED)
+ txn->mt_dbxs[dbi].md_dcmp = intcmp;
+ else
+ txn->mt_dbxs[dbi].md_dcmp = cintcmp;
+ } else if (txn->mt_dbs[dbi].md_flags & MDB_REVERSEDUP) {
+ txn->mt_dbxs[dbi].md_dcmp = memnrcmp;
+ } else {
+ txn->mt_dbxs[dbi].md_dcmp = memncmp;
+ }
+ } else {
+ txn->mt_dbxs[dbi].md_dcmp = NULL;
+ }
+}
+
int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi)
{
MDB_val key, data;
int rc, dirty = 0;
size_t len;
+ if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) {
+ mdb_default_cmp(txn, FREE_DBI);
+ }
+
/* main DB? */
if (!name) {
*dbi = MAIN_DBI;
if (flags & (MDB_DUPSORT|MDB_REVERSEKEY|MDB_INTEGERKEY))
txn->mt_dbs[MAIN_DBI].md_flags |= (flags & (MDB_DUPSORT|MDB_REVERSEKEY|MDB_INTEGERKEY));
+ mdb_default_cmp(txn, MAIN_DBI);
return MDB_SUCCESS;
}
+ if (txn->mt_dbxs[MAIN_DBI].md_cmp == NULL) {
+ mdb_default_cmp(txn, MAIN_DBI);
+ }
+
/* Is the DB already open? */
len = strlen(name);
for (i=2; i<txn->mt_numdbs; i++) {
if (rc == MDB_SUCCESS) {
txn->mt_dbxs[txn->mt_numdbs].md_name.mv_data = strdup(name);
txn->mt_dbxs[txn->mt_numdbs].md_name.mv_size = len;
- txn->mt_dbxs[txn->mt_numdbs].md_cmp = NULL;
- txn->mt_dbxs[txn->mt_numdbs].md_dcmp = NULL;
txn->mt_dbxs[txn->mt_numdbs].md_rel = NULL;
txn->mt_dbxs[txn->mt_numdbs].md_parent = MAIN_DBI;
txn->mt_dbxs[txn->mt_numdbs].md_dirty = dirty;
*dbi = txn->mt_numdbs;
txn->mt_env->me_dbs[0][txn->mt_numdbs] = txn->mt_dbs[txn->mt_numdbs];
txn->mt_env->me_dbs[1][txn->mt_numdbs] = txn->mt_dbs[txn->mt_numdbs];
+ mdb_default_cmp(txn, txn->mt_numdbs);
txn->mt_numdbs++;
}