]> git.sur5r.net Git - openldap/blobdiff - libraries/liblmdb/mdb.c
ITS#8311 add comment
[openldap] / libraries / liblmdb / mdb.c
index 86ceb0bf8e3258de6c3fee9bb9e9be79c15de874..fc0340c51e2e33fe22eb62adc3b5b779e13e9588 100644 (file)
@@ -238,7 +238,9 @@ typedef SSIZE_T     ssize_t;
 #define MDB_OWNERDEAD  EOWNERDEAD      /**< #LOCK_MUTEX0() result if dead owner */
 #endif
 
-
+#ifdef __GLIBC__
+#define        GLIBC_VER       ((__GLIBC__ << 16 )| __GLIBC_MINOR__)
+#endif
 /** Some platforms define the EOWNERDEAD error code
  * even though they don't support Robust Mutexes.
  * Compile with -DMDB_USE_ROBUST=0, or use some other
@@ -248,12 +250,19 @@ typedef SSIZE_T   ssize_t;
  * either.)
  */
 #ifndef MDB_USE_ROBUST
-/* Android currently lacks Robust Mutex support */
-#if defined(ANDROID) && defined(MDB_USE_POSIX_MUTEX) && !defined(MDB_USE_ROBUST)
-#define MDB_USE_ROBUST 0
-#else
-#define MDB_USE_ROBUST 1
-#endif
+/* Android currently lacks Robust Mutex support. So does glibc < 2.4. */
+# if defined(MDB_USE_POSIX_MUTEX) && (defined(ANDROID) || \
+       (defined(__GLIBC__) && GLIBC_VER < 0x020004))
+#  define MDB_USE_ROBUST       0
+# else
+#  define MDB_USE_ROBUST       1
+/* glibc < 2.10 only provided _np API */
+#  if defined(__GLIBC__) && GLIBC_VER < 0x02000a
+#   define PTHREAD_MUTEX_ROBUST        PTHREAD_MUTEX_ROBUST_NP
+#   define pthread_mutexattr_setrobust(attr, flag)     pthread_mutexattr_setrobust_np(attr, flag)
+#   define pthread_mutex_consistent(mutex)     pthread_mutex_consistent_np(mutex)
+#  endif
+# endif
 #endif /* MDB_USE_ROBUST */
 
 #if defined(MDB_OWNERDEAD) && MDB_USE_ROBUST
@@ -394,10 +403,13 @@ static int mdb_mutex_failed(MDB_env *env, mdb_mutexref_t mutex, int rc);
  *
  *     @note If O_DSYNC is undefined but exists in /usr/include,
  * preferably set some compiler flag to get the definition.
- * Otherwise compile with the less efficient -DMDB_DSYNC=O_SYNC.
  */
 #ifndef MDB_DSYNC
+# ifdef O_DSYNC
 # define MDB_DSYNC     O_DSYNC
+# else
+# define MDB_DSYNC     O_SYNC
+# endif
 #endif
 #endif
 
@@ -6379,16 +6391,18 @@ fix_parent:
                         * update branch key if there is a parent page
                         */
                        if (mc->mc_top && !mc->mc_ki[mc->mc_top]) {
-                               unsigned short top = mc->mc_top;
+                               unsigned short dtop = 1;
                                mc->mc_top--;
                                /* slot 0 is always an empty key, find real slot */
-                               while (mc->mc_top && !mc->mc_ki[mc->mc_top])
+                               while (mc->mc_top && !mc->mc_ki[mc->mc_top]) {
                                        mc->mc_top--;
+                                       dtop++;
+                               }
                                if (mc->mc_ki[mc->mc_top])
                                        rc2 = mdb_update_key(mc, key);
                                else
                                        rc2 = MDB_SUCCESS;
-                               mc->mc_top top;
+                               mc->mc_top += dtop;
                                if (rc2)
                                        return rc2;
                        }
@@ -7596,32 +7610,48 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
                /* Adjust other cursors pointing to mp */
                MDB_cursor *m2, *m3;
                MDB_dbi dbi = csrc->mc_dbi;
-               MDB_page *mp;
-
-               mp = cdst->mc_pg[csrc->mc_top];
-               for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
-                       if (csrc->mc_flags & C_SUB)
-                               m3 = &m2->mc_xcursor->mx_cursor;
-                       else
-                               m3 = m2;
-                       if (m3 == cdst) continue;
-                       if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] >=
-                               cdst->mc_ki[csrc->mc_top]) {
-                               m3->mc_ki[csrc->mc_top]++;
+               MDB_page *mpd, *mps;
+
+               mps = csrc->mc_pg[csrc->mc_top];
+               /* If we're adding on the left, bump others up */
+               if (!cdst->mc_ki[csrc->mc_top]) {
+                       mpd = cdst->mc_pg[csrc->mc_top];
+                       for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
+                               if (csrc->mc_flags & C_SUB)
+                                       m3 = &m2->mc_xcursor->mx_cursor;
+                               else
+                                       m3 = m2;
+                               if (m3 != cdst &&
+                                       m3->mc_pg[csrc->mc_top] == mpd &&
+                                       m3->mc_ki[csrc->mc_top] >= cdst->mc_ki[csrc->mc_top]) {
+                                       m3->mc_ki[csrc->mc_top]++;
+                               }
+                               if (m3 !=csrc &&
+                                       m3->mc_pg[csrc->mc_top] == mps &&
+                                       m3->mc_ki[csrc->mc_top] == csrc->mc_ki[csrc->mc_top]) {
+                                       m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top];
+                                       m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top];
+                                       m3->mc_ki[csrc->mc_top-1]++;
+                               }
                        }
-               }
-
-               mp = csrc->mc_pg[csrc->mc_top];
-               for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
-                       if (csrc->mc_flags & C_SUB)
-                               m3 = &m2->mc_xcursor->mx_cursor;
-                       else
-                               m3 = m2;
-                       if (m3 == csrc) continue;
-                       if (m3->mc_pg[csrc->mc_top] == mp && m3->mc_ki[csrc->mc_top] ==
-                               csrc->mc_ki[csrc->mc_top]) {
-                               m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top];
-                               m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top];
+               } else
+               /* Adding on the right, bump others down */
+               {
+                       for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
+                               if (csrc->mc_flags & C_SUB)
+                                       m3 = &m2->mc_xcursor->mx_cursor;
+                               else
+                                       m3 = m2;
+                               if (m3 == csrc) continue;
+                               if (m3->mc_pg[csrc->mc_top] == mps) {
+                                       if (!m3->mc_ki[csrc->mc_top]) {
+                                               m3->mc_pg[csrc->mc_top] = cdst->mc_pg[cdst->mc_top];
+                                               m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top];
+                                               m3->mc_ki[csrc->mc_top-1]--;
+                                       } else {
+                                               m3->mc_ki[csrc->mc_top]--;
+                                       }
+                               }
                        }
                }
        }
@@ -7717,6 +7747,9 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
        if ((rc = mdb_page_touch(cdst)))
                return rc;
 
+       /* get dst page again now that we've touched it. */
+       pdst = cdst->mc_pg[cdst->mc_top];
+
        /* Move all nodes from src to dst.
         */
        j = nkeys = NUMKEYS(pdst);
@@ -7794,6 +7827,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
                /* Adjust other cursors pointing to mp */
                MDB_cursor *m2, *m3;
                MDB_dbi dbi = csrc->mc_dbi;
+               unsigned int top = csrc->mc_top;
 
                for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
                        if (csrc->mc_flags & C_SUB)
@@ -7802,9 +7836,10 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
                                m3 = m2;
                        if (m3 == csrc) continue;
                        if (m3->mc_snum < csrc->mc_snum) continue;
-                       if (m3->mc_pg[csrc->mc_top] == psrc) {
-                               m3->mc_pg[csrc->mc_top] = pdst;
-                               m3->mc_ki[csrc->mc_top] += nkeys;
+                       if (m3->mc_pg[top] == psrc) {
+                               m3->mc_pg[top] = pdst;
+                               m3->mc_ki[top] += nkeys;
+                               m3->mc_ki[top-1] = cdst->mc_ki[top-1];
                        }
                }
        }
@@ -7940,9 +7975,9 @@ mdb_rebalance(MDB_cursor *mc)
                                                m3 = &m2->mc_xcursor->mx_cursor;
                                        else
                                                m3 = m2;
-                                       if (m3 == mc || m3->mc_snum < mc->mc_snum) continue;
+                                       if (m3 == mc) continue;
                                        if (m3->mc_pg[0] == mp) {
-                                               for (i=0; i<m3->mc_snum; i++) {
+                                               for (i=0; i<mc->mc_db->md_depth; i++) {
                                                        m3->mc_pg[i] = m3->mc_pg[i+1];
                                                        m3->mc_ki[i] = m3->mc_ki[i+1];
                                                }
@@ -8007,7 +8042,8 @@ mdb_rebalance(MDB_cursor *mc)
         */
        if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= thresh && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) {
                rc = mdb_node_move(&mn, mc);
-               if (mc->mc_ki[mc->mc_top-1]) {
+               if (!mc->mc_ki[mc->mc_top]) {
+                       /* if we inserted on left, bump position up */
                        oldki++;
                }
        } else {
@@ -8215,12 +8251,19 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
        rp->mp_pad = mp->mp_pad;
        DPRINTF(("new right sibling: page %"Z"u", rp->mp_pgno));
 
-       if (mc->mc_snum < 2) {
+       /* Usually when splitting the root page, the cursor
+        * height is 1. But when called from mdb_update_key,
+        * the cursor height may be greater because it walks
+        * up the stack while finding the branch slot to update.
+        */
+       if (mc->mc_top < 1) {
                if ((rc = mdb_page_new(mc, P_BRANCH, 1, &pp)))
                        goto done;
                /* shift current top to make room for new parent */
-               mc->mc_pg[1] = mc->mc_pg[0];
-               mc->mc_ki[1] = mc->mc_ki[0];
+               for (i=mc->mc_snum; i>0; i--) {
+                       mc->mc_pg[i] = mc->mc_pg[i-1];
+                       mc->mc_ki[i] = mc->mc_ki[i-1];
+               }
                mc->mc_pg[0] = pp;
                mc->mc_ki[0] = 0;
                mc->mc_db->md_root = pp->mp_pgno;
@@ -8236,8 +8279,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
                        mc->mc_db->md_depth--;
                        goto done;
                }
-               mc->mc_snum = 2;
-               mc->mc_top = 1;
+               mc->mc_snum++;
+               mc->mc_top++;
                ptop = 0;
        } else {
                ptop = mc->mc_top-1;
@@ -8296,6 +8339,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
                                rp->mp_upper -= ksize - sizeof(indx_t);
                                mc->mc_ki[mc->mc_top] = x;
                                mc->mc_pg[mc->mc_top] = rp;
+                               mc->mc_ki[ptop]++;
                        }
                } else {
                        int psize, nsize, k;