X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=libraries%2Fliblmdb%2Fmdb.c;h=dfd7d0053e2231fe8706f0955d28e826f998cfaa;hb=e77918a903d980ff789b7e32f71725481e870510;hp=e2f1bbca1d23c19cf81b2db36a36f86402f2c6ab;hpb=6e81d4071b441840fc09855643f35ea992784002;p=openldap diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index e2f1bbca1d..dfd7d0053e 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -5,7 +5,7 @@ * BerkeleyDB API, but much simplified. */ /* - * Copyright 2011-2016 Howard Chu, Symas Corp. + * Copyright 2011-2017 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -113,6 +113,10 @@ typedef SSIZE_T ssize_t; /* Most platforms have posix_memalign, older may only have memalign */ #define HAVE_MEMALIGN 1 #include +/* On Solaris, we need the POSIX sigwait function */ +#if defined (__sun) +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif #endif #if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER)) @@ -120,7 +124,7 @@ typedef SSIZE_T ssize_t; #include /* defines BYTE_ORDER on HPUX and Solaris */ #endif -#if defined(__APPLE__) || defined (BSD) +#if defined(__APPLE__) || defined (BSD) || defined(__FreeBSD_kernel__) # define MDB_USE_POSIX_SEM 1 # define MDB_FDATASYNC fsync #elif defined(ANDROID) @@ -129,6 +133,7 @@ typedef SSIZE_T ssize_t; #ifndef _WIN32 #include +#include #ifdef MDB_USE_POSIX_SEM # define MDB_USE_HASH 1 #include @@ -340,15 +345,15 @@ mdb_sem_wait(sem_t *sem) } #else /* MDB_USE_POSIX_MUTEX: */ - /** Shared mutex/semaphore as it is stored (mdb_mutex_t), and as - * local variables keep it (mdb_mutexref_t). + /** Shared mutex/semaphore as the original is stored. * - * When #mdb_mutexref_t is a pointer declaration and #mdb_mutex_t is - * not, then it is array[size 1] so it can be assigned to a pointer. - * @{ + * Not for copies. Instead it can be assigned to an #mdb_mutexref_t. + * When mdb_mutexref_t is a pointer and mdb_mutex_t is not, then it + * is array[size 1] so it can be assigned to the pointer. */ -typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t; - /* @} */ +typedef pthread_mutex_t mdb_mutex_t[1]; + /** Reference to an #mdb_mutex_t */ +typedef pthread_mutex_t *mdb_mutexref_t; /** Lock the reader or writer mutex. * Returns 0 or a code to give #mdb_mutex_failed(), as in #LOCK_MUTEX(). */ @@ -873,19 +878,26 @@ typedef struct MDB_page { /** Header for a single key/data pair within a page. * Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2. * We guarantee 2-byte alignment for 'MDB_node's. + * + * #mn_lo and #mn_hi are used for data size on leaf nodes, and for child + * pgno on branch nodes. On 64 bit platforms, #mn_flags is also used + * for pgno. (Branch nodes have no flags). Lo and hi are in host byte + * order in case some accesses can be optimized to 32-bit word access. + * + * Leaf node flags describe node contents. #F_BIGDATA says the node's + * data part is the page number of an overflow page with actual data. + * #F_DUPDATA and #F_SUBDATA can be combined giving duplicate data in + * a sub-page/sub-database, and named databases (just #F_SUBDATA). */ typedef struct MDB_node { - /** lo and hi are used for data size on leaf nodes and for - * child pgno on branch nodes. On 64 bit platforms, flags - * is also used for pgno. (Branch nodes have no flags). - * They are in host byte order in case that lets some - * accesses be optimized into a 32-bit word access. - */ + /** part of data size or pgno + * @{ */ #if BYTE_ORDER == LITTLE_ENDIAN - unsigned short mn_lo, mn_hi; /**< part of data size or pgno */ + unsigned short mn_lo, mn_hi; #else unsigned short mn_hi, mn_lo; #endif + /** @} */ /** @defgroup mdb_node Node Flags * @ingroup internal * Flags for node headers. @@ -1214,17 +1226,19 @@ typedef struct MDB_xcursor { unsigned char mx_dbflag; } MDB_xcursor; - /** Check if there is an inited xcursor, so #XCURSOR_REFRESH() is proper */ + /** Check if there is an inited xcursor */ #define XCURSOR_INITED(mc) \ ((mc)->mc_xcursor && ((mc)->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) - /** Update sub-page pointer, if any, in \b mc->mc_xcursor. Needed + /** Update the xcursor's sub-page pointer, if any, in \b mc. Needed * when the node which contains the sub-page may have moved. Called - * with \b mp = mc->mc_pg[mc->mc_top], \b ki = mc->mc_ki[mc->mc_top]. + * with leaf page \b mp = mc->mc_pg[\b top]. */ -#define XCURSOR_REFRESH(mc, mp, ki) do { \ +#define XCURSOR_REFRESH(mc, top, mp) do { \ MDB_page *xr_pg = (mp); \ - MDB_node *xr_node = NODEPTR(xr_pg, ki); \ + MDB_node *xr_node; \ + if (!XCURSOR_INITED(mc) || (mc)->mc_ki[top] >= NUMKEYS(xr_pg)) break; \ + xr_node = NODEPTR(xr_pg, (mc)->mc_ki[top]); \ if ((xr_node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) \ (mc)->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(xr_node); \ } while (0) @@ -1239,7 +1253,7 @@ typedef struct MDB_pgstate { struct MDB_env { HANDLE me_fd; /**< The main data file */ HANDLE me_lfd; /**< The lock file */ - HANDLE me_mfd; /**< just for writing the meta pages */ + HANDLE me_mfd; /**< For writing and syncing the meta pages */ /** Failed to update the meta page. Probably an I/O error. */ #define MDB_FATAL_ERROR 0x80000000U /** Some fields are initialized. */ @@ -1747,6 +1761,7 @@ mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) /** Allocate memory for a page. * Re-use old malloc'd pages first for singletons, otherwise just malloc. + * Set #MDB_TXN_ERROR on failure. */ static MDB_page * mdb_page_malloc(MDB_txn *txn, unsigned num) @@ -2114,7 +2129,7 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp) } /** Allocate page numbers and memory for writing. Maintain me_pglast, - * me_pghead and mt_next_pgno. + * me_pghead and mt_next_pgno. Set #MDB_TXN_ERROR on failure. * * If there are free pages available from older transactions, they * are re-used first. Otherwise allocate a new page at mt_next_pgno. @@ -2237,7 +2252,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) np = m2.mc_pg[m2.mc_top]; leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]); if ((rc = mdb_node_read(&m2, leaf, &data)) != MDB_SUCCESS) - return rc; + goto fail; idl = (MDB_ID *) data.mv_data; i = idl[0]; @@ -2387,6 +2402,7 @@ mdb_page_unspill(MDB_txn *txn, MDB_page *mp, MDB_page **ret) } /** Touch a page: make it dirty and re-insert into tree with updated pgno. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc cursor pointing to the page to be touched * @return 0 on success, non-zero on failure. */ @@ -2475,8 +2491,8 @@ done: if (m2 == mc) continue; if (m2->mc_pg[mc->mc_top] == mp) { m2->mc_pg[mc->mc_top] = np; - if (XCURSOR_INITED(m2) && IS_LEAF(np)) - XCURSOR_REFRESH(m2, np, m2->mc_ki[mc->mc_top]); + if (IS_LEAF(np)) + XCURSOR_REFRESH(m2, mc->mc_top, np); } } } @@ -3827,7 +3843,10 @@ mdb_env_write_meta(MDB_txn *txn) len = sizeof(MDB_meta) - off; off += (char *)mp - env->me_map; - /* Write to the SYNC fd */ + /* Write to the SYNC fd unless MDB_NOSYNC/MDB_NOMETASYNC. + * (me_mfd goes to the same file as me_fd, but writing to it + * also syncs to disk. Avoids a separate fdatasync() call.) + */ mfd = (flags & (MDB_NOSYNC|MDB_NOMETASYNC)) ? env->me_fd : env->me_mfd; #ifdef _WIN32 { @@ -4156,7 +4175,7 @@ enum mdb_fopen_type { /* A comment in mdb_fopen() explains some O_* flag choices. */ MDB_O_RDONLY= O_RDONLY, /**< for RDONLY me_fd */ MDB_O_RDWR = O_RDWR |O_CREAT, /**< for me_fd */ - MDB_O_META = O_RDWR |MDB_DSYNC |MDB_CLOEXEC, /**< for me_mfd */ + MDB_O_META = O_WRONLY|MDB_DSYNC |MDB_CLOEXEC, /**< for me_mfd */ MDB_O_COPY = O_WRONLY|O_CREAT|O_EXCL|MDB_CLOEXEC, /**< for #mdb_env_copy() */ /** Bitmask for open() flags in enum #mdb_fopen_type. The other bits * distinguish otherwise-equal MDB_O_* constants from each other. @@ -4217,6 +4236,7 @@ mdb_fopen(const MDB_env *env, MDB_name *fname, disp = OPEN_EXISTING; break; case MDB_O_META: /* for writing metapages */ + acc = GENERIC_WRITE; disp = OPEN_EXISTING; attrs = FILE_ATTRIBUTE_NORMAL|FILE_FLAG_WRITE_THROUGH; break; @@ -4955,9 +4975,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode } if ((rc = mdb_env_open2(env)) == MDB_SUCCESS) { - if (flags & (MDB_RDONLY|MDB_WRITEMAP)) { - env->me_mfd = env->me_fd; - } else { + if (!(flags & (MDB_RDONLY|MDB_WRITEMAP))) { /* Synchronous fd for meta writes. Needed even with * MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset. */ @@ -5040,7 +5058,7 @@ mdb_env_close0(MDB_env *env, int excl) if (env->me_map) { munmap(env->me_map, env->me_mapsize); } - if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE) + if (env->me_mfd != INVALID_HANDLE_VALUE) (void) close(env->me_mfd); if (env->me_fd != INVALID_HANDLE_VALUE) (void) close(env->me_fd); @@ -5340,7 +5358,9 @@ mdb_cursor_pop(MDB_cursor *mc) } } -/** Push a page onto the top of the cursor's stack. */ +/** Push a page onto the top of the cursor's stack. + * Set #MDB_TXN_ERROR on failure. + */ static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) { @@ -5360,6 +5380,7 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) } /** Find the address of the page corresponding to a given page number. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc the cursor accessing the page. * @param[in] pgno the page number for the page to retrieve. * @param[out] ret address of a pointer where the page's address will be stored. @@ -5444,8 +5465,17 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) { i = 0; - if (flags & MDB_PS_LAST) + if (flags & MDB_PS_LAST) { i = NUMKEYS(mp) - 1; + /* if already init'd, see if we're already in right place */ + if (mc->mc_flags & C_INITIALIZED) { + if (mc->mc_ki[mc->mc_top] == i) { + mc->mc_top = mc->mc_snum++; + mp = mc->mc_pg[mc->mc_top]; + goto ready; + } + } + } } else { int exact; node = mdb_node_search(mc, key, &exact); @@ -5471,6 +5501,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) if ((rc = mdb_cursor_push(mc, mp))) return rc; +ready: if (flags & MDB_PS_MODIFY) { if ((rc = mdb_page_touch(mc)) != 0) return rc; @@ -5796,15 +5827,20 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) MDB_node *leaf; int rc; - if ((mc->mc_flags & C_EOF) || - ((mc->mc_flags & C_DEL) && op == MDB_NEXT_DUP)) { + if ((mc->mc_flags & C_DEL && op == MDB_NEXT_DUP)) return MDB_NOTFOUND; - } + if (!(mc->mc_flags & C_INITIALIZED)) return mdb_cursor_first(mc, key, data); mp = mc->mc_pg[mc->mc_top]; + if (mc->mc_flags & C_EOF) { + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mp)-1) + return MDB_NOTFOUND; + mc->mc_flags ^= C_EOF; + } + if (mc->mc_db->md_flags & MDB_DUPSORT) { leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { @@ -6209,16 +6245,13 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) if (mc->mc_xcursor) mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - if (!(mc->mc_flags & C_EOF)) { - - if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { - rc = mdb_page_search(mc, NULL, MDB_PS_LAST); - if (rc != MDB_SUCCESS) - return rc; - } - mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); - + if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { + rc = mdb_page_search(mc, NULL, MDB_PS_LAST); + if (rc != MDB_SUCCESS) + return rc; } + mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1; mc->mc_flags |= C_INITIALIZED|C_EOF; leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); @@ -6395,6 +6428,11 @@ fetchm: rc = MDB_INCOMPATIBLE; break; } + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) { + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]); + rc = MDB_NOTFOUND; + break; + } { MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { @@ -6761,8 +6799,9 @@ prep_subDB: } else { memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE, olddata.mv_size - fp->mp_upper - PAGEBASE); + memcpy((char *)(&mp->mp_ptrs), (char *)(&fp->mp_ptrs), NUMKEYS(fp) * sizeof(mp->mp_ptrs[0])); for (i=0; imp_ptrs[i] = fp->mp_ptrs[i] + offset; + mp->mp_ptrs[i] += offset; } } @@ -6891,8 +6930,7 @@ new_sub: if (m3->mc_ki[i] >= mc->mc_ki[i] && insert_key) { m3->mc_ki[i]++; } - if (XCURSOR_INITED(m3)) - XCURSOR_REFRESH(m3, mp, m3->mc_ki[i]); + XCURSOR_REFRESH(m3, i, mp); } } } @@ -6934,7 +6972,6 @@ put_sub: MDB_xcursor *mx = mc->mc_xcursor; unsigned i = mc->mc_top; MDB_page *mp = mc->mc_pg[i]; - int nkeys = NUMKEYS(mp); for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; @@ -6942,8 +6979,8 @@ put_sub: if (m2->mc_pg[i] == mp) { if (m2->mc_ki[i] == mc->mc_ki[i]) { mdb_xcursor_init2(m2, mx, new_dupdata); - } else if (!insert_key && m2->mc_ki[i] < nkeys) { - XCURSOR_REFRESH(m2, mp, m2->mc_ki[i]); + } else if (!insert_key) { + XCURSOR_REFRESH(m2, i, mp); } } } @@ -7048,12 +7085,7 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) continue; if (m2->mc_pg[mc->mc_top] == mp) { - MDB_node *n2 = leaf; - if (m2->mc_ki[mc->mc_top] != mc->mc_ki[mc->mc_top]) { - n2 = NODEPTR(mp, m2->mc_ki[mc->mc_top]); - if (n2->mn_flags & F_SUBDATA) continue; - } - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); + XCURSOR_REFRESH(m2, mc->mc_top, mp); } } } @@ -7098,6 +7130,7 @@ fail: } /** Allocate and initialize new pages for a database. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc a cursor on the database being added to. * @param[in] flags flags defining what type of page is being allocated. * @param[in] num the number of pages to allocate. This is usually 1, @@ -7183,6 +7216,7 @@ mdb_branch_size(MDB_env *env, MDB_val *key) } /** Add a node to the page pointed to by the cursor. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc The cursor for this operation. * @param[in] indx The index on the page where the new node should be added. * @param[in] key The key for the new node. @@ -7627,9 +7661,15 @@ mdb_cursor_count(MDB_cursor *mc, size_t *countp) if (!(mc->mc_flags & C_INITIALIZED)) return EINVAL; - if (!mc->mc_snum || (mc->mc_flags & C_EOF)) + if (!mc->mc_snum) return MDB_NOTFOUND; + if (mc->mc_flags & C_EOF) { + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) + return MDB_NOTFOUND; + mc->mc_flags ^= C_EOF; + } + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { *countp = 1; @@ -7671,6 +7711,7 @@ mdb_cursor_dbi(MDB_cursor *mc) } /** Replace the key for a branch node with a new key. + * Set #MDB_TXN_ERROR on failure. * @param[in] mc Cursor pointing to the node to operate on. * @param[in] key The new key to use. * @return 0 on success, non-zero on failure. @@ -7890,8 +7931,8 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; m3->mc_ki[csrc->mc_top-1]++; } - if (XCURSOR_INITED(m3) && IS_LEAF(mps)) - XCURSOR_REFRESH(m3, m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); + if (IS_LEAF(mps)) + XCURSOR_REFRESH(m3, csrc->mc_top, m3->mc_pg[csrc->mc_top]); } } else /* Adding on the right, bump others down */ @@ -7912,8 +7953,8 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) } else { m3->mc_ki[csrc->mc_top]--; } - if (XCURSOR_INITED(m3) && IS_LEAF(mps)) - XCURSOR_REFRESH(m3, m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); + if (IS_LEAF(mps)) + XCURSOR_REFRESH(m3, csrc->mc_top, m3->mc_pg[csrc->mc_top]); } } } @@ -8114,8 +8155,8 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) m3->mc_ki[top-1] > csrc->mc_ki[top-1]) { m3->mc_ki[top-1]--; } - if (XCURSOR_INITED(m3) && IS_LEAF(psrc)) - XCURSOR_REFRESH(m3, m3->mc_pg[top], m3->mc_ki[top]); + if (IS_LEAF(psrc)) + XCURSOR_REFRESH(m3, top, m3->mc_pg[top]); } } { @@ -8377,8 +8418,7 @@ mdb_cursor_del0(MDB_cursor *mc) } else if (m3->mc_ki[mc->mc_top] > ki) { m3->mc_ki[mc->mc_top]--; } - if (XCURSOR_INITED(m3)) - XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); + XCURSOR_REFRESH(m3, mc->mc_top, mp); } } } @@ -8415,15 +8455,20 @@ mdb_cursor_del0(MDB_cursor *mc) } if (mc->mc_db->md_flags & MDB_DUPSORT) { MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]); - /* If this node is a fake page, it needs to be reinited - * because its data has moved. But just reset mc_pg[0] - * if the xcursor is already live. + /* If this node has dupdata, it may need to be reinited + * because its data has moved. + * If the xcursor was not initd it must be reinited. + * Else if node points to a subDB, nothing is needed. + * Else (xcursor was initd, not a subDB) needs mc_pg[0] reset. */ - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) { - if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) - m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - else + if (node->mn_flags & F_DUPDATA) { + if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + if (!(node->mn_flags & F_SUBDATA)) + m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); + } else { mdb_xcursor_init1(m3, node); + m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL; + } } } } @@ -8499,6 +8544,7 @@ mdb_del0(MDB_txn *txn, MDB_dbi dbi, } /** Split a page and insert a new node. + * Set #MDB_TXN_ERROR on failure. * @param[in,out] mc Cursor pointing to the page and desired insertion index. * The cursor will be updated to point to the actual page and index where * the node got inserted after the split. @@ -8908,8 +8954,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno m3->mc_ki[ptop] >= mc->mc_ki[ptop]) { m3->mc_ki[ptop]++; } - if (XCURSOR_INITED(m3) && IS_LEAF(mp)) - XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); + if (IS_LEAF(mp)) + XCURSOR_REFRESH(m3, mc->mc_top, m3->mc_pg[mc->mc_top]); } } DPRINTF(("mp left: %d, rp left: %d", SIZELEFT(mp), SIZELEFT(rp))); @@ -8985,6 +9031,13 @@ mdb_env_copythr(void *arg) #else int len; #define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) +#ifdef SIGPIPE + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGPIPE); + if ((rc = pthread_sigmask(SIG_BLOCK, &set, NULL)) != 0) + my->mc_error = rc; +#endif #endif pthread_mutex_lock(&my->mc_mutex); @@ -9001,6 +9054,15 @@ again: DO_WRITE(rc, my->mc_fd, ptr, wsize, len); if (!rc) { rc = ErrCode(); +#if defined(SIGPIPE) && !defined(_WIN32) + if (rc == EPIPE) { + /* Collect the pending SIGPIPE, otherwise at least OS X + * gives it to the process on thread-exit (ITS#8504). + */ + int tmp; + sigwait(&set, &tmp); + } +#endif break; } else if (len > 0) { rc = MDB_SUCCESS; @@ -9698,8 +9760,11 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) != F_SUBDATA) return MDB_INCOMPATIBLE; - } else if (! (rc == MDB_NOTFOUND && (flags & MDB_CREATE))) { - return rc; + } else { + if (rc != MDB_NOTFOUND || !(flags & MDB_CREATE)) + return rc; + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + return EACCES; } /* Done here so we cannot fail after creating a new DB */ @@ -9713,7 +9778,8 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db memset(&dummy, 0, sizeof(dummy)); dummy.md_root = P_INVALID; dummy.md_flags = flags & PERSISTENT_FLAGS; - rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA); + WITH_CURSOR_TRACKING(mc, + rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA)); dbflag |= DB_DIRTY; } @@ -10066,7 +10132,7 @@ mdb_reader_check(MDB_env *env, int *dead) return env->me_txns ? mdb_reader_check0(env, 0, dead) : MDB_SUCCESS; } -/** As #mdb_reader_check(). rlocked = . */ +/** As #mdb_reader_check(). \b rlocked is set if caller locked #me_rmutex. */ static int ESECT mdb_reader_check0(MDB_env *env, int rlocked, int *dead) {