/** Nested txn under this txn, set together with flag #MDB_TXN_HAS_CHILD */
MDB_txn *mt_child;
pgno_t mt_next_pgno; /**< next unallocated page */
+#ifdef MDB_VL32
+ pgno_t mt_last_pgno; /**< last written page */
+#endif
/** The ID of this transaction. IDs are integers incrementing from 1.
* Only committed write transactions increment the ID. If a transaction
* aborts, the ID may be re-used by the next writer.
MDB_cursor **mt_cursors;
/** Array of flags for each DB */
unsigned char *mt_dbflags;
+#ifdef MDB_VL32
+ /** List of read-only pages (actually chunks) */
+ MDB_ID3L mt_rpages;
+ /** We map chunks of 16 pages. Even though Windows uses 4KB pages, all
+ * mappings must begin on 64KB boundaries. So we round off all pgnos to
+ * a chunk boundary. We do the same on Linux for symmetry, and also to
+ * reduce the frequency of mmap/munmap calls.
+ */
+#define MDB_RPAGE_CHUNK 16
+#define MDB_TRPAGE_SIZE 4096 /**< size of #mt_rpages array of chunks */
+#define MDB_TRPAGE_MAX (MDB_TRPAGE_SIZE-1) /**< maximum chunk index */
+ unsigned int mt_rpcheck; /**< threshold for reclaiming unref'd chunks */
+#endif
/** Number of DB records in use, or 0 when the txn is finished.
* This number only ever increments until the txn finishes; we
* don't decrement it when individual DB handles are closed.
HANDLE me_fd; /**< The main data file */
HANDLE me_lfd; /**< The lock file */
HANDLE me_mfd; /**< just for writing the meta pages */
+#if defined(MDB_VL32) && defined(_WIN32)
+ HANDLE me_fmh; /**< File Mapping handle */
+#endif
/** Failed to update the meta page. Probably an I/O error. */
#define MDB_FATAL_ERROR 0x80000000U
/** Some fields are initialized. */
#else
mdb_mutex_t me_rmutex;
mdb_mutex_t me_wmutex;
+#endif
+#ifdef MDB_VL32
+ MDB_ID3L me_rpages; /**< like #mt_rpages, but global to env */
+ mdb_mutex_t me_rpmutex; /**< control access to #me_rpages */
+#define MDB_ERPAGE_SIZE 16384
+#define MDB_ERPAGE_MAX (MDB_ERPAGE_SIZE-1)
+ unsigned int me_rpcheck;
#endif
void *me_userctx; /**< User-settable context */
MDB_assert_func *me_assert_func; /**< Callback for assertion failures */
dl[0].mid = 0;
}
+#ifdef MDB_VL32
+static void
+mdb_page_unref(MDB_txn *txn, MDB_page *mp)
+{
+ pgno_t pgno;
+ MDB_ID3L tl = txn->mt_rpages;
+ unsigned x, rem;
+ if (mp->mp_flags & (P_SUBP|P_DIRTY))
+ return;
+ rem = mp->mp_pgno & (MDB_RPAGE_CHUNK-1);
+ pgno = mp->mp_pgno ^ rem;
+ x = mdb_mid3l_search(tl, pgno);
+ if (x != tl[0].mid && tl[x+1].mid == mp->mp_pgno)
+ x++;
+ if (tl[x].mref)
+ tl[x].mref--;
+}
+#define MDB_PAGE_UNREF(txn, mp) mdb_page_unref(txn, mp)
+
+static void
+mdb_cursor_unref(MDB_cursor *mc)
+{
+ int i;
+ if (!mc->mc_pg[0] || IS_SUBP(mc->mc_pg[0]))
+ return;
+ for (i=0; i<mc->mc_snum; i++)
+ mdb_page_unref(mc->mc_txn, mc->mc_pg[i]);
+ mc->mc_snum = mc->mc_top = 0;
+ mc->mc_pg[0] = NULL;
+ mc->mc_flags &= ~C_INITIALIZED;
+}
+#else
+#define MDB_PAGE_UNREF(txn, mp)
+#endif /* MDB_VL32 */
+
/** Loosen or free a single page.
* Saves single pages to a list for future reuse
* in this same txn. It has been pulled from the freeDB
}
}
}
+ MDB_PAGE_UNREF(mc->mc_txn, mp);
return 0;
fail:
/* Moved to here to avoid a data race in read TXNs */
txn->mt_next_pgno = meta->mm_last_pg+1;
+#ifdef MDB_VL32
+ txn->mt_last_pgno = txn->mt_next_pgno - 1;
+#endif
txn->mt_flags = flags;
DPRINTF(("calloc: %s", strerror(errno)));
return ENOMEM;
}
+#ifdef MDB_VL32
+ if (!parent) {
+ txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3));
+ if (!txn->mt_rpages) {
+ free(txn);
+ return ENOMEM;
+ }
+ txn->mt_rpages[0].mid = 0;
+ txn->mt_rpcheck = MDB_TRPAGE_SIZE/2;
+ }
+#endif
txn->mt_dbxs = env->me_dbxs; /* static */
txn->mt_dbs = (MDB_db *) ((char *)txn + tsize);
txn->mt_dbflags = (unsigned char *)txn + size - env->me_maxdbs;
parent->mt_child = txn;
txn->mt_parent = parent;
txn->mt_numdbs = parent->mt_numdbs;
+#ifdef MDB_VL32
+ txn->mt_rpages = parent->mt_rpages;
+#endif
memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db));
/* Copy parent's mt_dbflags, but clear DB_NEW */
for (i=0; i<txn->mt_numdbs; i++)
rc = mdb_txn_renew0(txn);
}
if (rc) {
- if (txn != env->me_txn0)
+ if (txn != env->me_txn0) {
+#ifdef MDB_VL32
+ free(txn->mt_rpages);
+#endif
free(txn);
+ }
} else {
txn->mt_flags |= flags; /* could not change txn=me_txn0 earlier */
*ret = txn;
mdb_midl_free(pghead);
}
-
+#ifdef MDB_VL32
+ if (!txn->mt_parent) {
+ MDB_ID3L el = env->me_rpages, tl = txn->mt_rpages;
+ unsigned i, x, n = tl[0].mid;
+ LOCK_MUTEX0(env->me_rpmutex);
+ for (i = 1; i <= n; i++) {
+ if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) {
+ /* tmp overflow pages that we didn't share in env */
+ munmap(tl[i].mptr, tl[i].mcnt * env->me_psize);
+ } else {
+ x = mdb_mid3l_search(el, tl[i].mid);
+ if (tl[i].mptr == el[x].mptr) {
+ el[x].mref--;
+ } else {
+ /* another tmp overflow page */
+ munmap(tl[i].mptr, tl[i].mcnt * env->me_psize);
+ }
+ }
+ }
+ UNLOCK_MUTEX(env->me_rpmutex);
+ tl[0].mid = 0;
+ if (mode & MDB_END_FREE)
+ free(tl);
+ }
+#endif
if (mode & MDB_END_FREE)
free(txn);
}
n++;
#endif /* _WIN32 */
}
+#ifdef MDB_VL32
+ if (pgno > txn->mt_last_pgno)
+ txn->mt_last_pgno = pgno;
+#endif
/* MIPS has cache coherency issues, this is a no-op everywhere else
* Note: for any size >= on-chip cache size, entire on-chip cache is
if (rc)
return rc;
map = addr;
+#ifdef MDB_VL32
+ msize = NUM_METAS * env->me_psize;
+#endif
rc = NtMapViewOfSection(mh, GetCurrentProcess(), &map, 0, 0, NULL, &msize, ViewUnmap, alloctype, pageprot);
+#ifdef MDB_VL32
+ env->me_fmh = mh;
+#else
NtClose(mh);
+#endif
if (rc)
return rc;
env->me_map = map;
+#else
+#ifdef MDB_VL32
+ (void) flags;
+ env->me_map = mmap(addr, NUM_METAS * env->me_psize, PROT_READ, MAP_SHARED,
+ env->me_fd, 0);
+ if (env->me_map == MAP_FAILED) {
+ env->me_map = NULL;
+ return ErrCode();
+ }
#else
int prot = PROT_READ;
if (flags & MDB_WRITEMAP) {
*/
if (addr && env->me_map != addr)
return EBUSY; /* TODO: Make a new MDB_* error code? */
+#endif
p = (MDB_page *)env->me_map;
env->me_metas[0] = METADATA(p);
* sure there are no active txns.
*/
if (env->me_map) {
- int rc;
MDB_meta *meta;
+#ifndef MDB_VL32
void *old;
+ int rc;
+#endif
if (env->me_txn)
return EINVAL;
meta = mdb_env_pick_meta(env);
if (size < minsize)
size = minsize;
}
+#ifndef MDB_VL32
+ /* For MDB_VL32 this bit is a noop since we dynamically remap
+ * chunks of the DB anyway.
+ */
munmap(env->me_map, env->me_mapsize);
env->me_mapsize = size;
old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL;
rc = mdb_env_map(env, old);
if (rc)
return rc;
+#endif /* !MDB_VL32 */
}
env->me_mapsize = size;
if (env->me_psize)
env->me_wmutex->semnum = 1;
env->me_rmutex->locked = &env->me_txns->mti_rlocked;
env->me_wmutex->locked = &env->me_txns->mti_wlocked;
+#endif
+#ifdef MDB_VL32
+#ifdef _WIN32
+ env->me_rpmutex = CreateMutex(NULL, FALSE, NULL);
+#else
+ pthread_mutex_init(env->me_rpmutex, NULL);
+#endif
#endif
return MDB_SUCCESS;
if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS)))
return EINVAL;
+#ifdef MDB_VL32
+ if (flags & MDB_WRITEMAP) {
+ /* silently ignore WRITEMAP in 32 bit mode */
+ flags ^= MDB_WRITEMAP;
+ }
+ if (flags & MDB_FIXEDMAP) {
+ /* cannot support FIXEDMAP */
+ return EINVAL;
+ }
+#endif
+
len = strlen(path);
if (flags & MDB_NOSUBDIR) {
rc = len + sizeof(LOCKSUFF) + len + 1;
(env->me_dirty_list = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2)))))
rc = ENOMEM;
}
+#ifdef MDB_VL32
+ if (!rc) {
+ env->me_rpages = malloc(MDB_ERPAGE_SIZE * sizeof(MDB_ID3));
+ if (!env->me_rpages) {
+ rc = ENOMEM;
+ goto leave;
+ }
+ env->me_rpages[0].mid = 0;
+ env->me_rpcheck = MDB_ERPAGE_SIZE/2;
+ }
+#endif
env->me_flags = flags |= MDB_ENV_ACTIVE;
if (rc)
goto leave;
txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs);
txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs);
txn->mt_env = env;
+#ifdef MDB_VL32
+ txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3));
+ if (!txn->mt_rpages) {
+ free(txn);
+ rc = ENOMEM;
+ goto leave;
+ }
+ txn->mt_rpages[0].mid = 0;
+ txn->mt_rpcheck = MDB_TRPAGE_SIZE/2;
+#endif
txn->mt_dbxs = env->me_dbxs;
txn->mt_flags = MDB_TXN_FINISHED;
env->me_txn0 = txn;
free(env->me_path);
free(env->me_dirty_list);
free(env->me_txn0);
+#ifdef MDB_VL32
+ { unsigned int x;
+ for (x=1; x<=env->me_rpages[0].mid; x++)
+ munmap(env->me_rpages[x].mptr, env->me_rpages[x].mcnt * env->me_psize);
+ }
+ free(env->me_rpages);
+#endif
mdb_midl_free(env->me_free_pgs);
if (env->me_flags & MDB_ENV_TXKEY) {
}
if (env->me_map) {
+#ifdef MDB_VL32
+ munmap(env->me_map, NUM_METAS*env->me_psize);
+#else
munmap(env->me_map, env->me_mapsize);
+#endif
}
if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE)
(void) close(env->me_mfd);
#endif
(void) close(env->me_lfd);
}
+#ifdef MDB_VL32
+#ifdef _WIN32
+ if (env->me_rpmutex) CloseHandle(env->me_rpmutex);
+#else
+ pthread_mutex_destroy(env->me_rpmutex);
+#endif
+#endif
env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY);
}
return MDB_SUCCESS;
}
+#ifdef MDB_VL32
+/** Map a read-only page.
+ * There are two levels of tracking in use, a per-txn list and a per-env list.
+ * ref'ing and unref'ing the per-txn list is faster since it requires no
+ * locking. Pages are cached in the per-env list for global reuse, and a lock
+ * is required. Pages are not immediately unmapped when their refcnt goes to
+ * zero; they hang around in case they will be reused again soon.
+ *
+ * When the per-txn list gets full, all pages with refcnt=0 are purged from the
+ * list and their refcnts in the per-env list are decremented.
+ *
+ * When the per-env list gets full, all pages with refcnt=0 are purged from the
+ * list and their pages are unmapped.
+ *
+ * @note "full" means the list has reached its respective rpcheck threshold.
+ * This threshold slowly raises if no pages could be purged on a given check,
+ * and returns to its original value when enough pages were purged.
+ *
+ * If purging doesn't free any slots, filling the per-txn list will return
+ * MDB_TXN_FULL, and filling the per-env list returns MDB_MAP_FULL.
+ *
+ * Reference tracking in a txn is imperfect, pages can linger with non-zero
+ * refcnt even without active references. It was deemed to be too invasive
+ * to add unrefs in every required location. However, all pages are unref'd
+ * at the end of the transaction. This guarantees that no stale references
+ * linger in the per-env list.
+ *
+ * Usually we map chunks of 16 pages at a time, but if an overflow page begins
+ * at the tail of the chunk we extend the chunk to include the entire overflow
+ * page. Unfortunately, pages can be turned into overflow pages after their
+ * chunk was already mapped. In that case we must remap the chunk if the
+ * overflow page is referenced. If the chunk's refcnt is 0 we can just remap
+ * it, otherwise we temporarily map a new chunk just for the overflow page.
+ *
+ * @note this chunk handling means we cannot guarantee that a data item
+ * returned from the DB will stay alive for the duration of the transaction:
+ * We unref pages as soon as a cursor moves away from the page
+ * A subsequent op may cause a purge, which may unmap any unref'd chunks
+ * The caller must copy the data if it must be used later in the same txn.
+ *
+ * Also - our reference counting revolves around cursors, but overflow pages
+ * aren't ever pointed to by a cursor. As such, they always remain referenced
+ * in a txn until it ends.
+ *
+ * @param[in] txn the transaction for this access.
+ * @param[in] pgno the page number for the page to retrieve.
+ * @param[out] ret address of a pointer where the page's address will be stored.
+ * @return 0 on success, non-zero on failure.
+ */
+static int
+mdb_rpage_get(MDB_txn *txn, pgno_t pg0, MDB_page **ret)
+{
+ MDB_env *env = txn->mt_env;
+ MDB_page *p;
+ MDB_ID3L tl = txn->mt_rpages;
+ MDB_ID3L el = env->me_rpages;
+ MDB_ID3 id3;
+ unsigned x, rem;
+ pgno_t pgno;
+ int rc, retries = 1;
+#ifdef _WIN32
+ LARGE_INTEGER off;
+ SIZE_T len;
+#define SET_OFF(off,val) off.QuadPart = val
+#define MAP(rc,env,addr,len,off) \
+ addr = NULL; \
+ rc = NtMapViewOfSection(env->me_fmh, GetCurrentProcess(), &addr, 0, \
+ len, &off, &len, ViewUnmap, MEM_RESERVE, PAGE_READONLY)
+#else
+ off_t off;
+ size_t len;
+#define SET_OFF(off,val) off = val
+#define MAP(rc,env,addr,len,off) \
+ addr = mmap(NULL, len, PROT_READ, MAP_SHARED, env->me_fd, off); \
+ rc = (addr == MAP_FAILED) ? errno : 0
+#endif
+
+ /* remember the offset of the actual page number, so we can
+ * return the correct pointer at the end.
+ */
+ rem = pg0 & (MDB_RPAGE_CHUNK-1);
+ pgno = pg0 ^ rem;
+
+ id3.mid = 0;
+ x = mdb_mid3l_search(tl, pgno);
+ if (x <= tl[0].mid && tl[x].mid == pgno) {
+ if (x != tl[0].mid && tl[x+1].mid == pg0)
+ x++;
+ /* check for overflow size */
+ p = (MDB_page *)((char *)tl[x].mptr + rem * env->me_psize);
+ if (IS_OVERFLOW(p) && p->mp_pages + rem > tl[x].mcnt) {
+ id3.mcnt = p->mp_pages + rem;
+ len = id3.mcnt * env->me_psize;
+ SET_OFF(off, pgno * env->me_psize);
+ MAP(rc, env, id3.mptr, len, off);
+ if (rc)
+ return rc;
+ /* check for local-only page */
+ if (rem) {
+ mdb_tassert(txn, tl[x].mid != pg0);
+ /* hope there's room to insert this locally.
+ * setting mid here tells later code to just insert
+ * this id3 instead of searching for a match.
+ */
+ id3.mid = pg0;
+ goto notlocal;
+ } else {
+ /* ignore the mapping we got from env, use new one */
+ tl[x].mptr = id3.mptr;
+ tl[x].mcnt = id3.mcnt;
+ /* if no active ref, see if we can replace in env */
+ if (!tl[x].mref) {
+ unsigned i;
+ LOCK_MUTEX0(env->me_rpmutex);
+ i = mdb_mid3l_search(el, tl[x].mid);
+ if (el[i].mref == 1) {
+ /* just us, replace it */
+ munmap(el[i].mptr, el[i].mcnt * env->me_psize);
+ el[i].mptr = tl[x].mptr;
+ el[i].mcnt = tl[x].mcnt;
+ } else {
+ /* there are others, remove ourself */
+ el[i].mref--;
+ }
+ UNLOCK_MUTEX(env->me_rpmutex);
+ }
+ }
+ }
+ id3.mptr = tl[x].mptr;
+ id3.mcnt = tl[x].mcnt;
+ tl[x].mref++;
+ goto ok;
+ }
+
+notlocal:
+ if (tl[0].mid >= MDB_TRPAGE_MAX - txn->mt_rpcheck) {
+ unsigned i, y;
+ /* purge unref'd pages from our list and unref in env */
+ LOCK_MUTEX0(env->me_rpmutex);
+retry:
+ y = 0;
+ for (i=1; i<tl[0].mid; i++) {
+ if (!tl[i].mref) {
+ if (!y) y = i;
+ /* tmp overflow pages don't go to env */
+ if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) {
+ munmap(tl[i].mptr, tl[i].mcnt * env->me_psize);
+ continue;
+ }
+ x = mdb_mid3l_search(el, tl[i].mid);
+ el[x].mref--;
+ }
+ }
+ UNLOCK_MUTEX(env->me_rpmutex);
+ if (!y) {
+ /* we didn't find any unref'd chunks.
+ * if we're out of room, fail.
+ */
+ if (tl[0].mid >= MDB_TRPAGE_MAX)
+ return MDB_TXN_FULL;
+ /* otherwise, raise threshold for next time around
+ * and let this go.
+ */
+ txn->mt_rpcheck /= 2;
+ } else {
+ /* we found some unused; consolidate the list */
+ for (i=y+1; i<= tl[0].mid; i++)
+ if (tl[i].mref)
+ tl[y++] = tl[i];
+ tl[0].mid = y-1;
+ /* decrease the check threshold toward its original value */
+ if (!txn->mt_rpcheck)
+ txn->mt_rpcheck = 1;
+ while (txn->mt_rpcheck < tl[0].mid && txn->mt_rpcheck < MDB_TRPAGE_SIZE/2)
+ txn->mt_rpcheck *= 2;
+ }
+ }
+ if (tl[0].mid < MDB_TRPAGE_SIZE) {
+ id3.mref = 1;
+ if (id3.mid)
+ goto found;
+ len = env->me_psize * MDB_RPAGE_CHUNK;
+ id3.mid = pgno;
+ id3.mcnt = MDB_RPAGE_CHUNK;
+
+ /* search for page in env */
+ LOCK_MUTEX0(env->me_rpmutex);
+ x = mdb_mid3l_search(el, pgno);
+ if (x <= el[0].mid && el[x].mid == pgno) {
+ id3.mptr = el[x].mptr;
+ id3.mcnt = el[x].mcnt;
+ /* check for overflow size */
+ p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize);
+ if (IS_OVERFLOW(p) && p->mp_pages + rem > id3.mcnt) {
+ id3.mcnt = p->mp_pages + rem;
+ len = id3.mcnt * env->me_psize;
+ SET_OFF(off, pgno * env->me_psize);
+ MAP(rc, env, id3.mptr, len, off);
+ if (rc)
+ goto fail;
+ if (!el[x].mref) {
+ munmap(el[x].mptr, el[x].mcnt);
+ el[x].mptr = id3.mptr;
+ el[x].mcnt = id3.mcnt;
+ } else {
+ id3.mid = pg0;
+ UNLOCK_MUTEX(env->me_rpmutex);
+ goto found;
+ }
+ }
+ el[x].mref++;
+ UNLOCK_MUTEX(env->me_rpmutex);
+ goto found;
+ }
+ if (el[0].mid >= MDB_ERPAGE_MAX - env->me_rpcheck) {
+ /* purge unref'd pages */
+ unsigned i, y = 0;
+ for (i=1; i<el[0].mid; i++) {
+ if (!el[i].mref) {
+ if (!y) y = i;
+ munmap(el[i].mptr, env->me_psize * el[i].mcnt);
+ }
+ }
+ if (!y) {
+ if (retries) {
+ /* see if we can unref some local pages */
+ retries--;
+ id3.mid = 0;
+ goto retry;
+ }
+ if (el[0].mid >= MDB_ERPAGE_MAX) {
+ UNLOCK_MUTEX(env->me_rpmutex);
+ return MDB_MAP_FULL;
+ }
+ env->me_rpcheck /= 2;
+ } else {
+ for (i=y+1; i<= el[0].mid; i++)
+ if (el[i].mref)
+ el[y++] = el[i];
+ el[0].mid = y-1;
+ if (!env->me_rpcheck)
+ env->me_rpcheck = 1;
+ while (env->me_rpcheck < el[0].mid && env->me_rpcheck < MDB_ERPAGE_SIZE/2)
+ env->me_rpcheck *= 2;
+ }
+ }
+ SET_OFF(off, pgno * env->me_psize);
+ MAP(rc, env, id3.mptr, len, off);
+ if (rc) {
+fail:
+ UNLOCK_MUTEX(env->me_rpmutex);
+ return rc;
+ }
+ /* If this page is far enough from the end of the env, scan for
+ * any overflow pages that would spill onto another block.
+ * Note we must compare against mt_last_pgno, the last written
+ * page in the environment. Not mt_next_pgno, which increases
+ * for every newly allocated (but not yet written) page. If
+ * we scanned beyond the last written page we'd get a bus error.
+ */
+ if (pgno + MDB_RPAGE_CHUNK <= txn->mt_last_pgno) {
+ int i;
+ char *cp = (char *)id3.mptr + rem * env->me_psize;
+ for (i=rem; i<MDB_RPAGE_CHUNK;) {
+ p = (MDB_page *)cp;
+ if (IS_OVERFLOW(p)) {
+ int nop = p->mp_pages;
+ if (nop + i > MDB_RPAGE_CHUNK) {
+ munmap(id3.mptr, len);
+ id3.mcnt = nop + i;
+ len = id3.mcnt * env->me_psize;
+ MAP(rc, env, id3.mptr, len, off);
+ if (rc)
+ goto fail;
+ break;
+ }
+ i += nop;
+ cp += nop * env->me_psize;
+ } else {
+ i++;
+ cp += env->me_psize;
+ }
+ }
+ }
+ mdb_mid3l_insert(el, &id3);
+ UNLOCK_MUTEX(env->me_rpmutex);
+found:
+ mdb_mid3l_insert(tl, &id3);
+ } else {
+ return MDB_TXN_FULL;
+ }
+ok:
+ p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize);
+#if MDB_DEBUG /* we don't need this check any more */
+ if (IS_OVERFLOW(p)) {
+ mdb_tassert(txn, p->mp_pages + rem <= id3.mcnt);
+ }
+#endif
+ *ret = p;
+ return MDB_SUCCESS;
+}
+#endif
+
/** Find the address of the page corresponding to a given page number.
* @param[in] txn the transaction for this access.
* @param[in] pgno the page number for the page to retrieve.
static int
mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl)
{
+#ifndef MDB_VL32
MDB_env *env = txn->mt_env;
+#endif
MDB_page *p = NULL;
int level;
MDB_ID pn = pgno << 1;
x = mdb_midl_search(tx2->mt_spill_pgs, pn);
if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) {
+#ifdef MDB_VL32
+ int rc = mdb_rpage_get(txn, pgno, &p);
+ if (rc)
+ return rc;
+#else
p = (MDB_page *)(env->me_map + env->me_psize * pgno);
+#endif
goto done;
}
}
if (pgno < txn->mt_next_pgno) {
level = 0;
+#ifdef MDB_VL32
+ {
+ int rc = mdb_rpage_get(txn, pgno, &p);
+ if (rc)
+ return rc;
+ }
+#else
p = (MDB_page *)(env->me_map + env->me_psize * pgno);
+#endif
} else {
DPRINTF(("page %"Y"u not found", pgno));
txn->mt_flags |= MDB_TXN_ERROR;
}
mdb_cassert(mc, root > 1);
- if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root)
+ if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) {
+#ifdef MDB_VL32
+ if (mc->mc_pg[0])
+ MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[0]);
+#endif
if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0)
return rc;
+ }
+#ifdef MDB_VL32
+ {
+ int i;
+ for (i=1; i<mc->mc_snum; i++)
+ MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[i]);
+ }
+#endif
mc->mc_snum = 1;
mc->mc_top = 0;
{
MDB_cursor mc;
MDB_xcursor mx;
- int exact = 0;
+ int exact = 0, rc;
DKBUF;
DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key)));
return MDB_BAD_TXN;
mdb_cursor_init(&mc, txn, dbi, &mx);
- return mdb_cursor_set(&mc, key, data, MDB_SET, &exact);
+ rc = mdb_cursor_set(&mc, key, data, MDB_SET, &exact);
+#ifdef MDB_VL32
+ {
+ /* unref all the pages - caller must copy the data
+ * before doing anything else
+ */
+ mdb_cursor_unref(&mc);
+ }
+#endif
+ return rc;
}
/** Find a sibling for a page.
int rc;
MDB_node *indx;
MDB_page *mp;
+#ifdef MDB_VL32
+ MDB_page *op;
+#endif
if (mc->mc_snum < 2) {
return MDB_NOTFOUND; /* root has no siblings */
}
+#ifdef MDB_VL32
+ op = mc->mc_pg[mc->mc_top];
+#endif
mdb_cursor_pop(mc);
DPRINTF(("parent page is page %"Y"u, index %u",
mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top]));
}
mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top]));
+ MDB_PAGE_UNREF(mc->mc_txn, op);
+
indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL)) != 0) {
/* mc will be inconsistent if caller does mc_snum++ as above */
return rc;
}
}
+#ifdef MDB_VL32
+ else {
+ if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
+ mdb_cursor_unref(&mc->mc_xcursor->mx_cursor);
+ }
+ }
+#endif
} else {
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
if (op == MDB_NEXT_DUP)
return rc;
}
}
+#ifdef MDB_VL32
+ else {
+ if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
+ mdb_cursor_unref(&mc->mc_xcursor->mx_cursor);
+ }
+ }
+#endif
} else {
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
if (op == MDB_PREV_DUP)
return MDB_SUCCESS;
}
+#ifdef MDB_VL32
+ if (mc->mc_xcursor && mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
+ mdb_cursor_unref(&mc->mc_xcursor->mx_cursor);
+ }
+#endif
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
mdb_xcursor_init1(mc, leaf);
}
int rc;
MDB_node *leaf;
- if (mc->mc_xcursor)
+ if (mc->mc_xcursor) {
+#ifdef MDB_VL32
+ if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
+ mdb_cursor_unref(&mc->mc_xcursor->mx_cursor);
+ }
+#endif
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
+ }
if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) {
rc = mdb_page_search(mc, NULL, MDB_PS_FIRST);
int rc;
MDB_node *leaf;
- if (mc->mc_xcursor)
+ if (mc->mc_xcursor) {
+#ifdef MDB_VL32
+ if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
+ mdb_cursor_unref(&mc->mc_xcursor->mx_cursor);
+ }
+#endif
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
+ }
if (!(mc->mc_flags & C_EOF)) {
mdb_cursor_pop(mc);
mdb_cursor_copy(mc, &mx);
+#ifdef MDB_VL32
+ /* bump refcount for mx's pages */
+ for (i=0; i<mc->mc_snum; i++)
+ mdb_page_get(txn, mc->mc_pg[i]->mp_pgno, &mx.mc_pg[i], NULL);
+#endif
while (mc->mc_snum > 0) {
MDB_page *mp = mc->mc_pg[mc->mc_top];
unsigned n = NUMKEYS(mp);
done:
if (rc)
txn->mt_flags |= MDB_TXN_ERROR;
+#ifdef MDB_VL32
+ /* drop refcount for mx's pages */
+ mdb_cursor_unref(&mx);
+#endif
} else if (rc == MDB_NOTFOUND) {
rc = MDB_SUCCESS;
}