* BerkeleyDB API, but much simplified.
*/
/*
- * Copyright 2011-2016 Howard Chu, Symas Corp.
+ * Copyright 2011-2018 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
/* Most platforms have posix_memalign, older may only have memalign */
#define HAVE_MEMALIGN 1
#include <malloc.h>
+/* On Solaris, we need the POSIX sigwait function */
+#if defined (__sun)
+# define _POSIX_PTHREAD_SEMANTICS 1
+#endif
#endif
#if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER))
#include <resolv.h> /* defines BYTE_ORDER on HPUX and Solaris */
#endif
-#if defined(__APPLE__) || defined (BSD)
+#if defined(__APPLE__) || defined (BSD) || defined(__FreeBSD_kernel__)
# define MDB_USE_POSIX_SEM 1
# define MDB_FDATASYNC fsync
#elif defined(ANDROID)
* pgno on branch nodes. On 64 bit platforms, #mn_flags is also used
* for pgno. (Branch nodes have no flags). Lo and hi are in host byte
* order in case some accesses can be optimized to 32-bit word access.
+ *
+ * Leaf node flags describe node contents. #F_BIGDATA says the node's
+ * data part is the page number of an overflow page with actual data.
+ * #F_DUPDATA and #F_SUBDATA can be combined giving duplicate data in
+ * a sub-page/sub-database, and named databases (just #F_SUBDATA).
*/
typedef struct MDB_node {
/** part of data size or pgno
unsigned char mx_dbflag;
} MDB_xcursor;
- /** Check if there is an inited xcursor, so #XCURSOR_REFRESH() is proper */
+ /** Check if there is an inited xcursor */
#define XCURSOR_INITED(mc) \
((mc)->mc_xcursor && ((mc)->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED))
- /** Update sub-page pointer, if any, in \b mc->mc_xcursor. Needed
+ /** Update the xcursor's sub-page pointer, if any, in \b mc. Needed
* when the node which contains the sub-page may have moved. Called
- * with \b mp = mc->mc_pg[mc->mc_top], \b ki = mc->mc_ki[mc->mc_top].
+ * with leaf page \b mp = mc->mc_pg[\b top].
*/
-#define XCURSOR_REFRESH(mc, mp, ki) do { \
+#define XCURSOR_REFRESH(mc, top, mp) do { \
MDB_page *xr_pg = (mp); \
- MDB_node *xr_node = NODEPTR(xr_pg, ki); \
+ MDB_node *xr_node; \
+ if (!XCURSOR_INITED(mc) || (mc)->mc_ki[top] >= NUMKEYS(xr_pg)) break; \
+ xr_node = NODEPTR(xr_pg, (mc)->mc_ki[top]); \
if ((xr_node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) \
(mc)->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(xr_node); \
} while (0)
if (m2 == mc) continue;
if (m2->mc_pg[mc->mc_top] == mp) {
m2->mc_pg[mc->mc_top] = np;
- if (XCURSOR_INITED(m2) && IS_LEAF(np))
- XCURSOR_REFRESH(m2, np, m2->mc_ki[mc->mc_top]);
+ if (IS_LEAF(np))
+ XCURSOR_REFRESH(m2, mc->mc_top, np);
}
}
}
if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) {
i = 0;
- if (flags & MDB_PS_LAST)
+ if (flags & MDB_PS_LAST) {
i = NUMKEYS(mp) - 1;
+ /* if already init'd, see if we're already in right place */
+ if (mc->mc_flags & C_INITIALIZED) {
+ if (mc->mc_ki[mc->mc_top] == i) {
+ mc->mc_top = mc->mc_snum++;
+ mp = mc->mc_pg[mc->mc_top];
+ goto ready;
+ }
+ }
+ }
} else {
int exact;
node = mdb_node_search(mc, key, &exact);
if ((rc = mdb_cursor_push(mc, mp)))
return rc;
+ready:
if (flags & MDB_PS_MODIFY) {
if ((rc = mdb_page_touch(mc)) != 0)
return rc;
MDB_node *leaf;
int rc;
- if ((mc->mc_flags & C_EOF) ||
- ((mc->mc_flags & C_DEL) && op == MDB_NEXT_DUP)) {
+ if ((mc->mc_flags & C_DEL && op == MDB_NEXT_DUP))
return MDB_NOTFOUND;
- }
+
if (!(mc->mc_flags & C_INITIALIZED))
return mdb_cursor_first(mc, key, data);
mp = mc->mc_pg[mc->mc_top];
+ if (mc->mc_flags & C_EOF) {
+ if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mp)-1)
+ return MDB_NOTFOUND;
+ mc->mc_flags ^= C_EOF;
+ }
+
if (mc->mc_db->md_flags & MDB_DUPSORT) {
leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
if (mc->mc_xcursor)
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
- if (!(mc->mc_flags & C_EOF)) {
-
- if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) {
- rc = mdb_page_search(mc, NULL, MDB_PS_LAST);
- if (rc != MDB_SUCCESS)
- return rc;
- }
- mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
-
+ if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) {
+ rc = mdb_page_search(mc, NULL, MDB_PS_LAST);
+ if (rc != MDB_SUCCESS)
+ return rc;
}
+ mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
+
mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1;
mc->mc_flags |= C_INITIALIZED|C_EOF;
leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
rc = MDB_INCOMPATIBLE;
break;
}
+ if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) {
+ mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]);
+ rc = MDB_NOTFOUND;
+ break;
+ }
{
MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
} else {
memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE,
olddata.mv_size - fp->mp_upper - PAGEBASE);
+ memcpy((char *)(&mp->mp_ptrs), (char *)(&fp->mp_ptrs), NUMKEYS(fp) * sizeof(mp->mp_ptrs[0]));
for (i=0; i<NUMKEYS(fp); i++)
- mp->mp_ptrs[i] = fp->mp_ptrs[i] + offset;
+ mp->mp_ptrs[i] += offset;
}
}
if (m3->mc_ki[i] >= mc->mc_ki[i] && insert_key) {
m3->mc_ki[i]++;
}
- if (XCURSOR_INITED(m3))
- XCURSOR_REFRESH(m3, mp, m3->mc_ki[i]);
+ XCURSOR_REFRESH(m3, i, mp);
}
}
}
MDB_xcursor *mx = mc->mc_xcursor;
unsigned i = mc->mc_top;
MDB_page *mp = mc->mc_pg[i];
- int nkeys = NUMKEYS(mp);
for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) {
if (m2 == mc || m2->mc_snum < mc->mc_snum) continue;
if (m2->mc_pg[i] == mp) {
if (m2->mc_ki[i] == mc->mc_ki[i]) {
mdb_xcursor_init2(m2, mx, new_dupdata);
- } else if (!insert_key && m2->mc_ki[i] < nkeys) {
- XCURSOR_REFRESH(m2, mp, m2->mc_ki[i]);
+ } else if (!insert_key) {
+ XCURSOR_REFRESH(m2, i, mp);
}
}
}
if (m2 == mc || m2->mc_snum < mc->mc_snum) continue;
if (!(m2->mc_flags & C_INITIALIZED)) continue;
if (m2->mc_pg[mc->mc_top] == mp) {
- MDB_node *n2 = leaf;
- if (m2->mc_ki[mc->mc_top] != mc->mc_ki[mc->mc_top]) {
- n2 = NODEPTR(mp, m2->mc_ki[mc->mc_top]);
- if (n2->mn_flags & F_SUBDATA) continue;
- }
- m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2);
+ XCURSOR_REFRESH(m2, mc->mc_top, mp);
}
}
}
if (!(mc->mc_flags & C_INITIALIZED))
return EINVAL;
- if (!mc->mc_snum || (mc->mc_flags & C_EOF))
+ if (!mc->mc_snum)
return MDB_NOTFOUND;
+ if (mc->mc_flags & C_EOF) {
+ if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top]))
+ return MDB_NOTFOUND;
+ mc->mc_flags ^= C_EOF;
+ }
+
leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
*countp = 1;
m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top];
m3->mc_ki[csrc->mc_top-1]++;
}
- if (XCURSOR_INITED(m3) && IS_LEAF(mps))
- XCURSOR_REFRESH(m3, m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]);
+ if (IS_LEAF(mps))
+ XCURSOR_REFRESH(m3, csrc->mc_top, m3->mc_pg[csrc->mc_top]);
}
} else
/* Adding on the right, bump others down */
} else {
m3->mc_ki[csrc->mc_top]--;
}
- if (XCURSOR_INITED(m3) && IS_LEAF(mps))
- XCURSOR_REFRESH(m3, m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]);
+ if (IS_LEAF(mps))
+ XCURSOR_REFRESH(m3, csrc->mc_top, m3->mc_pg[csrc->mc_top]);
}
}
}
m3->mc_ki[top-1] > csrc->mc_ki[top-1]) {
m3->mc_ki[top-1]--;
}
- if (XCURSOR_INITED(m3) && IS_LEAF(psrc))
- XCURSOR_REFRESH(m3, m3->mc_pg[top], m3->mc_ki[top]);
+ if (IS_LEAF(psrc))
+ XCURSOR_REFRESH(m3, top, m3->mc_pg[top]);
}
}
{
} else if (m3->mc_ki[mc->mc_top] > ki) {
m3->mc_ki[mc->mc_top]--;
}
- if (XCURSOR_INITED(m3))
- XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]);
+ XCURSOR_REFRESH(m3, mc->mc_top, mp);
}
}
}
}
if (mc->mc_db->md_flags & MDB_DUPSORT) {
MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]);
- /* If this node is a fake page, it needs to be reinited
- * because its data has moved. But just reset mc_pg[0]
- * if the xcursor is already live.
+ /* If this node has dupdata, it may need to be reinited
+ * because its data has moved.
+ * If the xcursor was not initd it must be reinited.
+ * Else if node points to a subDB, nothing is needed.
+ * Else (xcursor was initd, not a subDB) needs mc_pg[0] reset.
*/
- if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) {
- if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)
- m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node);
- else
+ if (node->mn_flags & F_DUPDATA) {
+ if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
+ if (!(node->mn_flags & F_SUBDATA))
+ m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node);
+ } else {
mdb_xcursor_init1(m3, node);
+ m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL;
+ }
}
}
}
m3->mc_ki[ptop] >= mc->mc_ki[ptop]) {
m3->mc_ki[ptop]++;
}
- if (XCURSOR_INITED(m3) && IS_LEAF(mp))
- XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]);
+ if (IS_LEAF(mp))
+ XCURSOR_REFRESH(m3, mc->mc_top, m3->mc_pg[mc->mc_top]);
}
}
DPRINTF(("mp left: %d, rp left: %d", SIZELEFT(mp), SIZELEFT(rp)));
MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]);
if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) != F_SUBDATA)
return MDB_INCOMPATIBLE;
- } else if (! (rc == MDB_NOTFOUND && (flags & MDB_CREATE))) {
- return rc;
+ } else {
+ if (rc != MDB_NOTFOUND || !(flags & MDB_CREATE))
+ return rc;
+ if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))
+ return EACCES;
}
/* Done here so we cannot fail after creating a new DB */