#ifndef _WIN32
#include <pthread.h>
+#include <signal.h>
#ifdef MDB_USE_POSIX_SEM
# define MDB_USE_HASH 1
#include <semaphore.h>
struct MDB_env {
HANDLE me_fd; /**< The main data file */
HANDLE me_lfd; /**< The lock file */
- HANDLE me_mfd; /**< just for writing the meta pages */
+ HANDLE me_mfd; /**< For writing and syncing the meta pages */
/** Failed to update the meta page. Probably an I/O error. */
#define MDB_FATAL_ERROR 0x80000000U
/** Some fields are initialized. */
/** Allocate memory for a page.
* Re-use old malloc'd pages first for singletons, otherwise just malloc.
+ * Set #MDB_TXN_ERROR on failure.
*/
static MDB_page *
mdb_page_malloc(MDB_txn *txn, unsigned num)
}
/** Allocate page numbers and memory for writing. Maintain me_pglast,
- * me_pghead and mt_next_pgno.
+ * me_pghead and mt_next_pgno. Set #MDB_TXN_ERROR on failure.
*
* If there are free pages available from older transactions, they
* are re-used first. Otherwise allocate a new page at mt_next_pgno.
np = m2.mc_pg[m2.mc_top];
leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]);
if ((rc = mdb_node_read(&m2, leaf, &data)) != MDB_SUCCESS)
- return rc;
+ goto fail;
idl = (MDB_ID *) data.mv_data;
i = idl[0];
}
/** Touch a page: make it dirty and re-insert into tree with updated pgno.
+ * Set #MDB_TXN_ERROR on failure.
* @param[in] mc cursor pointing to the page to be touched
* @return 0 on success, non-zero on failure.
*/
len = sizeof(MDB_meta) - off;
off += (char *)mp - env->me_map;
- /* Write to the SYNC fd */
+ /* Write to the SYNC fd unless MDB_NOSYNC/MDB_NOMETASYNC.
+ * (me_mfd goes to the same file as me_fd, but writing to it
+ * also syncs to disk. Avoids a separate fdatasync() call.)
+ */
mfd = (flags & (MDB_NOSYNC|MDB_NOMETASYNC)) ? env->me_fd : env->me_mfd;
#ifdef _WIN32
{
/* A comment in mdb_fopen() explains some O_* flag choices. */
MDB_O_RDONLY= O_RDONLY, /**< for RDONLY me_fd */
MDB_O_RDWR = O_RDWR |O_CREAT, /**< for me_fd */
- MDB_O_META = O_RDWR |MDB_DSYNC, /**< for me_mfd */
- MDB_O_COPY = O_WRONLY|O_CREAT|O_EXCL, /**< for #mdb_env_copy() */
+ MDB_O_META = O_WRONLY|MDB_DSYNC |MDB_CLOEXEC, /**< for me_mfd */
+ MDB_O_COPY = O_WRONLY|O_CREAT|O_EXCL|MDB_CLOEXEC, /**< for #mdb_env_copy() */
/** Bitmask for open() flags in enum #mdb_fopen_type. The other bits
* distinguish otherwise-equal MDB_O_* constants from each other.
*/
*
* The lockfile needs FD_CLOEXEC (close file descriptor on exec*())
* to avoid the flock() issues noted under Caveats in lmdb.h.
+ * Also set it for other filehandles which the user cannot get at
+ * and close himself, which he may need after fork(). I.e. all but
+ * me_fd, which programs do use via mdb_env_get_fd().
*/
#ifdef _WIN32
disp = OPEN_EXISTING;
break;
case MDB_O_META: /* for writing metapages */
+ acc = GENERIC_WRITE;
disp = OPEN_EXISTING;
attrs = FILE_ATTRIBUTE_NORMAL|FILE_FLAG_WRITE_THROUGH;
break;
rc = ErrCode();
#ifndef _WIN32
else {
- if (which == MDB_O_LOCKS) {
+ if (which != MDB_O_RDONLY && which != MDB_O_RDWR) {
/* Set CLOEXEC if we could not pass it to open() */
if (!MDB_CLOEXEC && (flags = fcntl(fd, F_GETFD)) != -1)
(void) fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
{
MDB_reader *reader = ptr;
- reader->mr_pid = 0;
+#ifndef _WIN32
+ if (reader->mr_pid == getpid()) /* catch pthread_exit() in child process */
+#endif
+ /* We omit the mutex, so do this atomically (i.e. skip mr_txnid) */
+ reader->mr_pid = 0;
}
#ifdef _WIN32
}
if ((rc = mdb_env_open2(env)) == MDB_SUCCESS) {
- if (flags & (MDB_RDONLY|MDB_WRITEMAP)) {
- env->me_mfd = env->me_fd;
- } else {
+ if (!(flags & (MDB_RDONLY|MDB_WRITEMAP))) {
/* Synchronous fd for meta writes. Needed even with
* MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset.
*/
if (env->me_map) {
munmap(env->me_map, env->me_mapsize);
}
- if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE)
+ if (env->me_mfd != INVALID_HANDLE_VALUE)
(void) close(env->me_mfd);
if (env->me_fd != INVALID_HANDLE_VALUE)
(void) close(env->me_fd);
}
}
-/** Push a page onto the top of the cursor's stack. */
+/** Push a page onto the top of the cursor's stack.
+ * Set #MDB_TXN_ERROR on failure.
+ */
static int
mdb_cursor_push(MDB_cursor *mc, MDB_page *mp)
{
}
/** Find the address of the page corresponding to a given page number.
+ * Set #MDB_TXN_ERROR on failure.
* @param[in] mc the cursor accessing the page.
* @param[in] pgno the page number for the page to retrieve.
* @param[out] ret address of a pointer where the page's address will be stored.
}
/** Allocate and initialize new pages for a database.
+ * Set #MDB_TXN_ERROR on failure.
* @param[in] mc a cursor on the database being added to.
* @param[in] flags flags defining what type of page is being allocated.
* @param[in] num the number of pages to allocate. This is usually 1,
}
/** Add a node to the page pointed to by the cursor.
+ * Set #MDB_TXN_ERROR on failure.
* @param[in] mc The cursor for this operation.
* @param[in] indx The index on the page where the new node should be added.
* @param[in] key The key for the new node.
}
/** Replace the key for a branch node with a new key.
+ * Set #MDB_TXN_ERROR on failure.
* @param[in] mc Cursor pointing to the node to operate on.
* @param[in] key The new key to use.
* @return 0 on success, non-zero on failure.
}
/** Split a page and insert a new node.
+ * Set #MDB_TXN_ERROR on failure.
* @param[in,out] mc Cursor pointing to the page and desired insertion index.
* The cursor will be updated to point to the actual page and index where
* the node got inserted after the split.
#else
int len;
#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0)
+#ifdef SIGPIPE
+ sigset_t set;
+ sigemptyset(&set);
+ sigaddset(&set, SIGPIPE);
+ if ((rc = pthread_sigmask(SIG_BLOCK, &set, NULL)) != 0)
+ my->mc_error = rc;
+#endif
#endif
pthread_mutex_lock(&my->mc_mutex);
DO_WRITE(rc, my->mc_fd, ptr, wsize, len);
if (!rc) {
rc = ErrCode();
+#if defined(SIGPIPE) && !defined(_WIN32)
+ if (rc == EPIPE) {
+ /* Collect the pending SIGPIPE, otherwise at least OS X
+ * gives it to the process on thread-exit (ITS#8504).
+ */
+ int tmp;
+ sigwait(&set, &tmp);
+ }
+#endif
break;
} else if (len > 0) {
rc = MDB_SUCCESS;
memset(&dummy, 0, sizeof(dummy));
dummy.md_root = P_INVALID;
dummy.md_flags = flags & PERSISTENT_FLAGS;
- rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA);
+ WITH_CURSOR_TRACKING(mc,
+ rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA));
dbflag |= DB_DIRTY;
}
return env->me_txns ? mdb_reader_check0(env, 0, dead) : MDB_SUCCESS;
}
-/** As #mdb_reader_check(). rlocked = <caller locked the reader mutex>. */
+/** As #mdb_reader_check(). \b rlocked is set if caller locked #me_rmutex. */
static int ESECT
mdb_reader_check0(MDB_env *env, int rlocked, int *dead)
{