+ mdb_dpage_free(env, dl[i].mptr);
+ }
+ dl[0].mid = 0;
+}
+
+/* Set or clear P_KEEP in non-overflow, non-sub pages in known cursors.
+ * When clearing, only consider backup cursors (from parent txns) since
+ * other P_KEEP flags have already been cleared.
+ * @param[in] mc A cursor handle for the current operation.
+ * @param[in] pflags Flags of the pages to update:
+ * P_DIRTY to set P_KEEP, P_DIRTY|P_KEEP to clear it.
+ */
+static void
+mdb_cursorpages_mark(MDB_cursor *mc, unsigned pflags)
+{
+ MDB_txn *txn = mc->mc_txn;
+ MDB_cursor *m2, *m3;
+ MDB_xcursor *mx;
+ unsigned i, j;
+
+ if (mc->mc_flags & C_UNTRACK)
+ mc = NULL; /* will find mc in mt_cursors */
+ for (i = txn->mt_numdbs;; mc = txn->mt_cursors[--i]) {
+ for (; mc; mc=mc->mc_next) {
+ m2 = pflags == P_DIRTY ? mc : mc->mc_backup;
+ for (; m2; m2 = m2->mc_backup) {
+ for (m3=m2; m3->mc_flags & C_INITIALIZED; m3=&mx->mx_cursor) {
+ for (j=0; j<m3->mc_snum; j++)
+ if ((m3->mc_pg[j]->mp_flags & (P_SUBP|P_DIRTY|P_KEEP))
+ == pflags)
+ m3->mc_pg[j]->mp_flags ^= P_KEEP;
+ if (!(m3->mc_db->md_flags & MDB_DUPSORT))
+ break;
+ /* Cursor backups have mx malloced at the end of m2 */
+ mx = (m3 == mc ? m3->mc_xcursor : (MDB_xcursor *)(m3+1));
+ }
+ }
+ }
+ if (i == 0)
+ break;
+ }
+}
+
+static int mdb_page_flush(MDB_txn *txn);
+
+/** Spill pages from the dirty list back to disk.
+ * This is intended to prevent running into #MDB_TXN_FULL situations,
+ * but note that they may still occur in a few cases:
+ * 1) pages in #MDB_DUPSORT sub-DBs are never spilled, so if there
+ * are too many of these dirtied in one txn, the txn may still get
+ * too full.
+ * 2) child txns may run out of space if their parents dirtied a
+ * lot of pages and never spilled them. TODO: we probably should do
+ * a preemptive spill during #mdb_txn_begin() of a child txn, if
+ * the parent's dirty_room is below a given threshold.
+ * 3) our estimate of the txn size could be too small. At the
+ * moment this seems unlikely.
+ *
+ * Otherwise, if not using nested txns, it is expected that apps will
+ * not run into #MDB_TXN_FULL any more. The pages are flushed to disk
+ * the same way as for a txn commit, e.g. their P_DIRTY flag is cleared.
+ * If the txn never references them again, they can be left alone.
+ * If the txn only reads them, they can be used without any fuss.
+ * If the txn writes them again, they can be dirtied immediately without
+ * going thru all of the work of #mdb_page_touch(). Such references are
+ * handled by #mdb_page_unspill().
+ *
+ * Also note, we never spill DB root pages, nor pages of active cursors,
+ * because we'll need these back again soon anyway. And in nested txns,
+ * we can't spill a page in a child txn if it was already spilled in a
+ * parent txn. That would alter the parent txns' data even though
+ * the child hasn't committed yet, and we'd have no way to undo it if
+ * the child aborted.
+ *
+ * @param[in] m0 cursor A cursor handle identifying the transaction and
+ * database for which we are checking space.
+ * @param[in] key For a put operation, the key being stored.
+ * @param[in] data For a put operation, the data being stored.
+ * @return 0 on success, non-zero on failure.
+ */
+static int
+mdb_page_spill(MDB_cursor *m0, MDB_val *key, MDB_val *data)
+{
+ MDB_txn *txn = m0->mc_txn;
+ MDB_page *dp;
+ MDB_ID2L dl = txn->mt_u.dirty_list;
+ unsigned int i, j;
+ int rc;
+
+ if (m0->mc_flags & C_SUB)
+ return MDB_SUCCESS;
+
+ /* Estimate how much space this op will take */
+ i = m0->mc_db->md_depth;
+ /* Named DBs also dirty the main DB */
+ if (m0->mc_dbi > MAIN_DBI)
+ i += txn->mt_dbs[MAIN_DBI].md_depth;
+ /* For puts, roughly factor in the key+data size */
+ if (key)
+ i += (LEAFSIZE(key, data) + txn->mt_env->me_psize) / txn->mt_env->me_psize;
+ i += i; /* double it for good measure */
+
+ if (txn->mt_dirty_room > i)
+ return MDB_SUCCESS;
+
+ if (!txn->mt_spill_pgs) {
+ txn->mt_spill_pgs = mdb_midl_alloc(MDB_IDL_UM_MAX);
+ if (!txn->mt_spill_pgs)
+ return ENOMEM;
+ }
+
+ /* Mark all the dirty root pages we want to preserve */
+ for (i=0; i<txn->mt_numdbs; i++) {
+ if (txn->mt_dbflags[i] & DB_DIRTY) {
+ j = mdb_mid2l_search(dl, txn->mt_dbs[i].md_root);
+ if (j <= dl[0].mid) {
+ dp = dl[j].mptr;
+ dp->mp_flags |= P_KEEP;
+ }
+ }
+ }
+
+ /* Preserve pages used by cursors */
+ mdb_cursorpages_mark(m0, P_DIRTY);
+
+ /* Save the page IDs of all the pages we're flushing */
+ for (i=1; i<=dl[0].mid; i++) {
+ dp = dl[i].mptr;
+ if (dp->mp_flags & P_KEEP)
+ continue;
+ /* Can't spill twice, make sure it's not already in a parent's
+ * spill list.
+ */
+ if (txn->mt_parent) {
+ MDB_txn *tx2;
+ for (tx2 = txn->mt_parent; tx2; tx2 = tx2->mt_parent) {
+ if (tx2->mt_spill_pgs) {
+ j = mdb_midl_search(tx2->mt_spill_pgs, dl[i].mid);
+ if (j <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[j] == dl[i].mid) {
+ dp->mp_flags |= P_KEEP;
+ break;
+ }
+ }
+ }
+ if (tx2)
+ continue;
+ }
+ if ((rc = mdb_midl_append(&txn->mt_spill_pgs, dl[i].mid)))
+ return rc;
+ }
+ mdb_midl_sort(txn->mt_spill_pgs);
+
+ rc = mdb_page_flush(txn);
+
+ mdb_cursorpages_mark(m0, P_DIRTY|P_KEEP);
+
+ if (rc == 0) {
+ if (txn->mt_parent) {
+ MDB_txn *tx2;
+ pgno_t pgno = dl[i].mid;
+ txn->mt_dirty_room = txn->mt_parent->mt_dirty_room - dl[0].mid;
+ /* dirty pages that are dirty in an ancestor don't
+ * count against this txn's dirty_room.
+ */
+ for (i=1; i<=dl[0].mid; i++) {
+ for (tx2 = txn->mt_parent; tx2; tx2 = tx2->mt_parent) {
+ j = mdb_mid2l_search(tx2->mt_u.dirty_list, pgno);
+ if (j <= tx2->mt_u.dirty_list[0].mid &&
+ tx2->mt_u.dirty_list[j].mid == pgno) {
+ txn->mt_dirty_room++;
+ break;
+ }
+ }
+ }