]> git.sur5r.net Git - openldap/blobdiff - libraries/liblmdb/mdb.c
Refix root split check from 5da67968afb599697d7557c13b65fb961ec408dd
[openldap] / libraries / liblmdb / mdb.c
index 84af6293961a8d8f23e7920ec42683f81dad7905..5e2b69069430e340500038de28b3e8a359342002 100644 (file)
@@ -238,7 +238,9 @@ typedef SSIZE_T     ssize_t;
 #define MDB_OWNERDEAD  EOWNERDEAD      /**< #LOCK_MUTEX0() result if dead owner */
 #endif
 
-
+#ifdef __GLIBC__
+#define        GLIBC_VER       ((__GLIBC__ << 16 )| __GLIBC_MINOR__)
+#endif
 /** Some platforms define the EOWNERDEAD error code
  * even though they don't support Robust Mutexes.
  * Compile with -DMDB_USE_ROBUST=0, or use some other
@@ -248,12 +250,19 @@ typedef SSIZE_T   ssize_t;
  * either.)
  */
 #ifndef MDB_USE_ROBUST
-/* Android currently lacks Robust Mutex support */
-#if defined(ANDROID) && defined(MDB_USE_POSIX_MUTEX) && !defined(MDB_USE_ROBUST)
-#define MDB_USE_ROBUST 0
-#else
-#define MDB_USE_ROBUST 1
-#endif
+/* Android currently lacks Robust Mutex support. So does glibc < 2.4. */
+# if defined(MDB_USE_POSIX_MUTEX) && (defined(ANDROID) || \
+       (defined(__GLIBC__) && GLIBC_VER < 0x020004))
+#  define MDB_USE_ROBUST       0
+# else
+#  define MDB_USE_ROBUST       1
+/* glibc < 2.10 only provided _np API */
+#  if defined(__GLIBC__) && GLIBC_VER < 0x02000a
+#   define PTHREAD_MUTEX_ROBUST        PTHREAD_MUTEX_ROBUST_NP
+#   define pthread_mutexattr_setrobust(attr, flag)     pthread_mutexattr_setrobust_np(attr, flag)
+#   define pthread_mutex_consistent(mutex)     pthread_mutex_consistent_np(mutex)
+#  endif
+# endif
 #endif /* MDB_USE_ROBUST */
 
 #if defined(MDB_OWNERDEAD) && MDB_USE_ROBUST
@@ -394,10 +403,13 @@ static int mdb_mutex_failed(MDB_env *env, mdb_mutexref_t mutex, int rc);
  *
  *     @note If O_DSYNC is undefined but exists in /usr/include,
  * preferably set some compiler flag to get the definition.
- * Otherwise compile with the less efficient -DMDB_DSYNC=O_SYNC.
  */
 #ifndef MDB_DSYNC
+# ifdef O_DSYNC
 # define MDB_DSYNC     O_DSYNC
+# else
+# define MDB_DSYNC     O_SYNC
+# endif
 #endif
 #endif
 
@@ -1360,6 +1372,8 @@ static MDB_cmp_func       mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int, mdb_cmp_cint, mdb_
 static SECURITY_DESCRIPTOR mdb_null_sd;
 static SECURITY_ATTRIBUTES mdb_all_sa;
 static int mdb_sec_inited;
+
+static int utf8_to_utf16(const char *src, int srcsize, wchar_t **dst, int *dstsize);
 #endif
 
 /** Return the library version info. */
@@ -3462,7 +3476,7 @@ mdb_txn_commit(MDB_txn *txn)
                }
 
                /* Append our loose page list to parent's */
-               for (lp = &parent->mt_loose_pgs; *lp; lp = &NEXT_LOOSE_PAGE(lp))
+               for (lp = &parent->mt_loose_pgs; *lp; lp = &NEXT_LOOSE_PAGE(*lp))
                        ;
                *lp = txn->mt_loose_pgs;
                parent->mt_loose_count += txn->mt_loose_count;
@@ -4428,9 +4442,12 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
        off_t size, rsize;
 
 #ifdef _WIN32
-       env->me_lfd = CreateFileA(lpath, GENERIC_READ|GENERIC_WRITE,
+       wchar_t *wlpath;
+       utf8_to_utf16(lpath, -1, &wlpath, NULL);
+       env->me_lfd = CreateFileW(wlpath, GENERIC_READ|GENERIC_WRITE,
                FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_ALWAYS,
                FILE_ATTRIBUTE_NORMAL, NULL);
+       free(wlpath);
 #else
        env->me_lfd = open(lpath, O_RDWR|O_CREAT|MDB_CLOEXEC, mode);
 #endif
@@ -4648,6 +4665,9 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
 {
        int             oflags, rc, len, excl = -1;
        char *lpath, *dpath;
+#ifdef _WIN32
+       wchar_t *wpath;
+#endif
 
        if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS)))
                return EINVAL;
@@ -4711,8 +4731,10 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
                len = OPEN_ALWAYS;
        }
        mode = FILE_ATTRIBUTE_NORMAL;
-       env->me_fd = CreateFileA(dpath, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE,
+       utf8_to_utf16(dpath, -1, &wpath, NULL);
+       env->me_fd = CreateFileW(wpath, oflags, FILE_SHARE_READ|FILE_SHARE_WRITE,
                NULL, len, mode, NULL);
+       free(wpath);
 #else
        if (F_ISSET(flags, MDB_RDONLY))
                oflags = O_RDONLY;
@@ -4741,9 +4763,11 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
                         */
 #ifdef _WIN32
                        len = OPEN_EXISTING;
-                       env->me_mfd = CreateFileA(dpath, oflags,
+                       utf8_to_utf16(dpath, -1, &wpath, NULL);
+                       env->me_mfd = CreateFileW(wpath, oflags,
                                FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, len,
                                mode | FILE_FLAG_WRITE_THROUGH, NULL);
+                       free(wpath);
 #else
                        oflags &= ~O_CREAT;
                        env->me_mfd = open(dpath, oflags | MDB_DSYNC, mode);
@@ -5437,6 +5461,7 @@ mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp)
                                return MDB_CORRUPTED;
                        }
                }
+               txn->mt_dirty_room++;
                if (!(env->me_flags & MDB_WRITEMAP))
                        mdb_dpage_free(env, mp);
 release:
@@ -6379,16 +6404,18 @@ fix_parent:
                         * update branch key if there is a parent page
                         */
                        if (mc->mc_top && !mc->mc_ki[mc->mc_top]) {
-                               unsigned short top = mc->mc_top;
+                               unsigned short dtop = 1;
                                mc->mc_top--;
                                /* slot 0 is always an empty key, find real slot */
-                               while (mc->mc_top && !mc->mc_ki[mc->mc_top])
+                               while (mc->mc_top && !mc->mc_ki[mc->mc_top]) {
                                        mc->mc_top--;
+                                       dtop++;
+                               }
                                if (mc->mc_ki[mc->mc_top])
                                        rc2 = mdb_update_key(mc, key);
                                else
                                        rc2 = MDB_SUCCESS;
-                               mc->mc_top top;
+                               mc->mc_top += dtop;
                                if (rc2)
                                        return rc2;
                        }
@@ -6569,6 +6596,7 @@ current:
                                                return ENOMEM;
                                        id2.mid = pg;
                                        id2.mptr = np;
+                                       /* Note - this page is already counted in parent's dirty_room */
                                        rc2 = mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2);
                                        mdb_cassert(mc, rc2 == 0);
                                        if (!(flags & MDB_RESERVE)) {
@@ -7617,6 +7645,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
                                        m3->mc_ki[csrc->mc_top] == csrc->mc_ki[csrc->mc_top]) {
                                        m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top];
                                        m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top];
+                                       m3->mc_ki[csrc->mc_top-1]++;
                                }
                        }
                } else
@@ -7632,6 +7661,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
                                        if (!m3->mc_ki[csrc->mc_top]) {
                                                m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top];
                                                m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top];
+                                               m3->mc_ki[csrc->mc_top-1]--;
                                        } else {
                                                m3->mc_ki[csrc->mc_top]--;
                                        }
@@ -7731,6 +7761,9 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
        if ((rc = mdb_page_touch(cdst)))
                return rc;
 
+       /* get dst page again now that we've touched it. */
+       pdst = cdst->mc_pg[cdst->mc_top];
+
        /* Move all nodes from src to dst.
         */
        j = nkeys = NUMKEYS(pdst);
@@ -7821,6 +7854,9 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
                                m3->mc_pg[top] = pdst;
                                m3->mc_ki[top] += nkeys;
                                m3->mc_ki[top-1] = cdst->mc_ki[top-1];
+                       } else if (m3->mc_pg[top-1] == csrc->mc_pg[top-1] &&
+                               m3->mc_ki[top-1] > csrc->mc_ki[top-1]) {
+                               m3->mc_ki[top-1]--;
                        }
                }
        }
@@ -7956,9 +7992,9 @@ mdb_rebalance(MDB_cursor *mc)
                                                m3 = &m2->mc_xcursor->mx_cursor;
                                        else
                                                m3 = m2;
-                                       if (m3 == mc || m3->mc_snum < mc->mc_snum) continue;
+                                       if (m3 == mc) continue;
                                        if (m3->mc_pg[0] == mp) {
-                                               for (i=0; i<m3->mc_snum; i++) {
+                                               for (i=0; i<mc->mc_db->md_depth; i++) {
                                                        m3->mc_pg[i] = m3->mc_pg[i+1];
                                                        m3->mc_ki[i] = m3->mc_ki[i+1];
                                                }
@@ -8036,6 +8072,7 @@ mdb_rebalance(MDB_cursor *mc)
                        mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1;
                        /* We want mdb_rebalance to find mn when doing fixups */
                        if (mc->mc_flags & C_SUB) {
+                               dummy.mc_flags = C_INITIALIZED;
                                dummy.mc_next = mc->mc_txn->mt_cursors[mc->mc_dbi];
                                mc->mc_txn->mt_cursors[mc->mc_dbi] = &dummy;
                                dummy.mc_xcursor = (MDB_xcursor *)&mn;
@@ -8232,12 +8269,19 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
        rp->mp_pad = mp->mp_pad;
        DPRINTF(("new right sibling: page %"Z"u", rp->mp_pgno));
 
-       if (mc->mc_snum < 2) {
+       /* Usually when splitting the root page, the cursor
+        * height is 1. But when called from mdb_update_key,
+        * the cursor height may be greater because it walks
+        * up the stack while finding the branch slot to update.
+        */
+       if (mc->mc_top < 1) {
                if ((rc = mdb_page_new(mc, P_BRANCH, 1, &pp)))
                        goto done;
                /* shift current top to make room for new parent */
-               mc->mc_pg[1] = mc->mc_pg[0];
-               mc->mc_ki[1] = mc->mc_ki[0];
+               for (i=mc->mc_snum; i>0; i--) {
+                       mc->mc_pg[i] = mc->mc_pg[i-1];
+                       mc->mc_ki[i] = mc->mc_ki[i-1];
+               }
                mc->mc_pg[0] = pp;
                mc->mc_ki[0] = 0;
                mc->mc_db->md_root = pp->mp_pgno;
@@ -8253,8 +8297,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
                        mc->mc_db->md_depth--;
                        goto done;
                }
-               mc->mc_snum = 2;
-               mc->mc_top = 1;
+               mc->mc_snum++;
+               mc->mc_top++;
                ptop = 0;
        } else {
                ptop = mc->mc_top-1;
@@ -8406,6 +8450,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
        /* Copy separator key to the parent.
         */
        if (SIZELEFT(mn.mc_pg[ptop]) < mdb_branch_size(env, &sepkey)) {
+               int snum = mc->mc_snum;
                mn.mc_snum--;
                mn.mc_top--;
                did_split = 1;
@@ -8414,13 +8459,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
                        goto done;
 
                /* root split? */
-               if (mn.mc_snum == mc->mc_snum) {
-                       mc->mc_pg[mc->mc_snum] = mc->mc_pg[mc->mc_top];
-                       mc->mc_ki[mc->mc_snum] = mc->mc_ki[mc->mc_top];
-                       mc->mc_pg[mc->mc_top] = mc->mc_pg[ptop];
-                       mc->mc_ki[mc->mc_top] = mc->mc_ki[ptop];
-                       mc->mc_snum++;
-                       mc->mc_top++;
+               if (mc->mc_snum > snum) {
                        ptop++;
                }
                /* Right page might now have changed parent.
@@ -8551,8 +8590,6 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
                                continue;
                        if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED))
                                continue;
-                       if (m3->mc_flags & C_SPLITTING)
-                               continue;
                        if (new_root) {
                                int k;
                                /* root split */
@@ -8569,6 +8606,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
                                m3->mc_snum++;
                                m3->mc_top++;
                        }
+                       if (m3->mc_flags & C_SPLITTING)
+                               continue;
                        if (m3->mc_top >= mc->mc_top && m3->mc_pg[mc->mc_top] == mp) {
                                if (m3->mc_ki[mc->mc_top] >= newindx && !(nflags & MDB_SPLIT_REPLACE))
                                        m3->mc_ki[mc->mc_top]++;
@@ -9113,6 +9152,9 @@ mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags)
        int rc, len;
        char *lpath;
        HANDLE newfd = INVALID_HANDLE_VALUE;
+#ifdef _WIN32
+       wchar_t *wpath;
+#endif
 
        if (env->me_flags & MDB_NOSUBDIR) {
                lpath = (char *)path;
@@ -9130,8 +9172,10 @@ mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags)
         * already in the OS cache.
         */
 #ifdef _WIN32
-       newfd = CreateFileA(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW,
+       utf8_to_utf16(lpath, -1, &wpath, NULL);
+       newfd = CreateFileW(wpath, GENERIC_WRITE, 0, NULL, CREATE_NEW,
                                FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL);
+       free(wpath);
 #else
        newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666);
 #endif
@@ -9836,3 +9880,22 @@ mdb_mutex_failed(MDB_env *env, mdb_mutexref_t mutex, int rc)
 }
 #endif /* MDB_ROBUST_SUPPORTED */
 /** @} */
+
+#if defined(_WIN32)
+static int utf8_to_utf16(const char *src, int srcsize, wchar_t **dst, int *dstsize)
+{
+       int need;
+       wchar_t *result;
+       need = MultiByteToWideChar(CP_UTF8, 0, src, srcsize, NULL, 0);
+       if (need == 0xFFFD)
+               return EILSEQ;
+       if (need == 0)
+               return EINVAL;
+       result = malloc(sizeof(wchar_t) * need);
+       MultiByteToWideChar(CP_UTF8, 0, src, srcsize, result, need);
+       if (dstsize)
+               *dstsize = need;
+       *dst = result;
+       return 0;
+}
+#endif /* defined(_WIN32) */