]> git.sur5r.net Git - openldap/blobdiff - servers/slapd/back-bdb/cache.c
ITS#4088 force cursors to use same locker
[openldap] / servers / slapd / back-bdb / cache.c
index 1d9f4899bb93ea6c799694d53cf8040bdfa30598..eb894b2757826c30f62aca6b2800e53dbf7e4b38 100644 (file)
@@ -2,7 +2,7 @@
 /* $OpenLDAP$ */
 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
  *
- * Copyright 2000-2004 The OpenLDAP Foundation.
+ * Copyright 2000-2005 The OpenLDAP Foundation.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -37,6 +37,14 @@ static void  bdb_lru_print(Cache *cache);
 
 static int bdb_txn_get( Operation *op, DB_ENV *env, DB_TXN **txn, int reset );
 
+/* 4.2.52 */
+#if DB_VERSION_FULL == 0x04020034
+#define        READ_TXN_FLAG   ReadFlag
+static int ReadFlag = DB_TXN_NOT_DURABLE;
+#else
+#define READ_TXN_FLAG  0
+#endif
+
 static EntryInfo *
 bdb_cache_entryinfo_new( Cache *cache )
 {
@@ -249,6 +257,13 @@ bdb_entryinfo_add_internal(
        } else {
                bdb->bi_cache.c_eiused++;
                ber_dupbv( &ei2->bei_nrdn, &ei->bei_nrdn );
+
+               /* This is a new leaf node. But if parent had no kids, then it was
+                * a leaf and we would be decrementing that. So, only increment if
+                * the parent already has kids.
+                */
+               if ( ei->bei_parent->bei_kids || !ei->bei_parent->bei_id )
+                       bdb->bi_cache.c_leaves++;
                avl_insert( &ei->bei_parent->bei_kids, ei2, bdb_rdn_cmp,
                        avl_dup_error );
 #ifdef BDB_HIER
@@ -308,6 +323,11 @@ bdb_cache_find_ndn(
                if ( !ei2 ) {
                        int len = ei.bei_nrdn.bv_len;
                                
+                       if ( BER_BVISEMPTY( ndn )) {
+                               *res = eip;
+                               return LDAP_SUCCESS;
+                       }
+
                        ei.bei_nrdn.bv_len = ndn->bv_len -
                                (ei.bei_nrdn.bv_val - ndn->bv_val);
                        bdb_cache_entryinfo_unlock( eip );
@@ -364,26 +384,25 @@ bdb_cache_find_ndn(
 /* Walk up the tree from a child node, looking for an ID that's already
  * been linked into the cache.
  */
-static int
+int
 hdb_cache_find_parent(
        Operation *op,
        DB_TXN *txn,
+       u_int32_t       locker,
        ID id,
        EntryInfo **res )
 {
        struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private;
        EntryInfo ei, eip, *ei2 = NULL, *ein = NULL, *eir = NULL;
-       char ndn[SLAP_LDAPDN_MAXLEN];
-       ID parent;
        int rc;
-       int addlru = 1;
+       int addlru = 0;
 
        ei.bei_id = id;
        ei.bei_kids = NULL;
        ei.bei_ckids = 0;
 
        for (;;) {
-               rc = hdb_dn2id_parent( op, txn, &ei, &eip.bei_id );
+               rc = hdb_dn2id_parent( op, txn, locker, &ei, &eip.bei_id );
                if ( rc ) break;
 
                /* Save the previous node, if any */
@@ -424,11 +443,15 @@ hdb_cache_find_parent(
                                ein->bei_ckids++;
                                bdb_cache_entryinfo_unlock( ein );
                        }
+                       addlru = 0;
 
-                       if ( !eir ) {
-                               addlru = 0;
-                       }
                }
+               if ( addlru ) {
+                       ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_mutex );
+                       LRU_ADD( &bdb->bi_cache, ein );
+                       ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_mutex );
+               }
+               addlru = 1;
 
                /* If this is the first time, save this node
                 * to be returned later.
@@ -446,6 +469,8 @@ hdb_cache_find_parent(
                        ei2 = &bdb->bi_cache.c_dntree;
                }
                bdb->bi_cache.c_eiused++;
+               if ( ei2 && ( ei2->bei_kids || !ei2->bei_id ))
+                               bdb->bi_cache.c_leaves++;
                ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock );
 
                /* Got the parent, link in and we're done. */
@@ -544,31 +569,53 @@ bdb_cache_lru_add(
                         */
                        if ( bdb_cache_entry_db_lock( bdb->bi_dbenv,
                                        bdb->bi_cache.c_locker, elru, 1, 1, lockp ) == 0 ) {
-                               int stop = 0;
 
-                               /* If there's no entry, or this node is in
-                                * the process of linking into the cache,
+                               int stop = 0, decr = 0;
+
+                               /* If this node is in the process of linking into the cache,
                                 * or this node is being deleted, skip it.
                                 */
-                               if ( !elru->bei_e || (elru->bei_state &
-                                       ( CACHE_ENTRY_NOT_LINKED | CACHE_ENTRY_DELETED ))) {
+                               if ( elru->bei_state &
+                                       ( CACHE_ENTRY_NOT_LINKED | CACHE_ENTRY_DELETED )) {
                                        bdb_cache_entry_db_unlock( bdb->bi_dbenv, lockp );
                                        continue;
                                }
-                               LRU_DELETE( &bdb->bi_cache, elru );
-                               elru->bei_e->e_private = NULL;
+                               /* Free entry for this node if it's present */
+                               if ( elru->bei_e ) {
+                                       elru->bei_e->e_private = NULL;
 #ifdef SLAP_ZONE_ALLOC
-                               bdb_entry_return( bdb, elru->bei_e, elru->bei_zseq );
+                                       bdb_entry_return( bdb, elru->bei_e, elru->bei_zseq );
 #else
-                               bdb_entry_return( elru->bei_e );
+                                       bdb_entry_return( elru->bei_e );
 #endif
-                               elru->bei_e = NULL;
+                                       elru->bei_e = NULL;
+                                       decr = 1;
+                               }
+                               /* ITS#4010 if we're in slapcat, and this node is a leaf
+                                * node, free it.
+                                *
+                                * FIXME: we need to do this for slapd as well, (which is
+                                * why we compute bi_cache.c_leaves now) but at the moment
+                                * we can't because it causes unresolvable deadlocks. 
+                                */
+                               if ( slapMode & SLAP_TOOL_READONLY ) {
+                                       if ( !elru->bei_kids ) {
+                                               /* This does LRU_DELETE for us */
+                                               bdb_cache_delete_internal( &bdb->bi_cache, elru, 0 );
+                                               bdb_cache_delete_cleanup( &bdb->bi_cache, elru );
+                                       }
+                                       /* Leave node on LRU list for a future pass */
+                               } else {
+                                       LRU_DELETE( &bdb->bi_cache, elru );
+                               }
+                               bdb_cache_entry_db_unlock( bdb->bi_dbenv, lockp );
+
                                ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock );
-                               --bdb->bi_cache.c_cursize;
+                               if ( decr )
+                                       --bdb->bi_cache.c_cursize;
                                if (bdb->bi_cache.c_cursize <= bdb->bi_cache.c_maxsize)
                                        stop = 1;
                                ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock );
-                               bdb_cache_entry_db_unlock( bdb->bi_dbenv, lockp );
                                if (stop) break;
                        }
                }
@@ -672,7 +719,7 @@ again:      ldap_pvt_thread_rdwr_rlock( &bdb->bi_cache.c_rwlock );
                        }
                }
 #else
-               rc = hdb_cache_find_parent(op, tid, id, eip );
+               rc = hdb_cache_find_parent(op, tid, locker, id, eip );
                if ( rc == 0 && *eip ) islocked = 1;
 #endif
        }
@@ -873,9 +920,11 @@ bdb_cache_add(
        u_int32_t locker )
 {
        EntryInfo *new, ei;
-       struct berval rdn = e->e_name;
        DB_LOCK lock;
        int rc;
+#ifdef BDB_HIER
+       struct berval rdn = e->e_name;
+#endif
 
        ei.bei_id = e->e_id;
        ei.bei_parent = eip;
@@ -919,11 +968,12 @@ bdb_cache_add(
        }
        bdb_cache_entryinfo_unlock( eip );
 
-       /* set lru mutex */
-       ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_mutex );
        ++bdb->bi_cache.c_cursize;
        ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock );
 
+       /* set lru mutex */
+       ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_mutex );
+
        /* lru_mutex is unlocked for us */
        bdb_cache_lru_add( bdb, locker, new );
 
@@ -960,20 +1010,22 @@ bdb_cache_modify(
  */
 int
 bdb_cache_modrdn(
+       struct bdb_info *bdb,
        Entry *e,
        struct berval *nrdn,
        Entry *new,
        EntryInfo *ein,
-       DB_ENV *env,
        u_int32_t locker,
        DB_LOCK *lock )
 {
        EntryInfo *ei = BEI(e), *pei;
-       struct berval rdn;
        int rc;
+#ifdef BDB_HIER
+       struct berval rdn;
+#endif
 
        /* Get write lock on data */
-       rc =  bdb_cache_entry_db_relock( env, locker, ei, 1, 0, lock );
+       rc =  bdb_cache_entry_db_relock( bdb->bi_dbenv, locker, ei, 1, 0, lock );
        if ( rc ) return rc;
 
        /* If we've done repeated mods on a cached entry, then e_attrs
@@ -1018,12 +1070,11 @@ bdb_cache_modrdn(
        }
 #ifdef BDB_HIER
        {
-               int max = ei->bei_modrdns;
                /* Record the generation number of this change */
-               for ( pei = ein; pei->bei_parent; pei = pei->bei_parent ) {
-                       if ( pei->bei_modrdns > max ) max = pei->bei_modrdns;
-               }
-               ei->bei_modrdns = max + 1;
+               ldap_pvt_thread_mutex_lock( &bdb->bi_modrdns_mutex );
+               bdb->bi_modrdns++;
+               ei->bei_modrdns = bdb->bi_modrdns;
+               ldap_pvt_thread_mutex_unlock( &bdb->bi_modrdns_mutex );
        }
 #endif
        avl_insert( &ein->bei_kids, ei, bdb_rdn_cmp, avl_dup_error );
@@ -1048,7 +1099,7 @@ bdb_cache_delete(
        EntryInfo *ei = BEI(e);
        int     rc;
 
-       assert( e->e_private );
+       assert( e->e_private != NULL );
 
        /* Set this early, warn off any queriers */
        ei->bei_state |= CACHE_ENTRY_DELETED;
@@ -1070,7 +1121,15 @@ bdb_cache_delete(
 
        /* set lru mutex */
        ldap_pvt_thread_mutex_lock( &cache->lru_mutex );
+
+       /* set cache write lock */
+       ldap_pvt_thread_rdwr_wlock( &cache->c_rwlock );
+
        rc = bdb_cache_delete_internal( cache, e->e_private, 1 );
+
+       /* free cache write lock */
+       ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock );
+
        /* free lru mutex */
        ldap_pvt_thread_mutex_unlock( &cache->lru_mutex );
 
@@ -1122,9 +1181,6 @@ bdb_cache_delete_internal(
 {
        int rc = 0;     /* return code */
 
-       /* set cache write lock */
-       ldap_pvt_thread_rdwr_wlock( &cache->c_rwlock );
-
        /* Lock the parent's kids tree */
        bdb_cache_entryinfo_lock( e->bei_parent );
 
@@ -1138,27 +1194,25 @@ bdb_cache_delete_internal(
        {
                rc = -1;
        }
+       if ( e->bei_parent->bei_kids )
+               cache->c_leaves--;
 
        /* id tree */
        if ( avl_delete( &cache->c_idtree, (caddr_t) e, bdb_id_cmp ) == NULL ) {
                rc = -1;
        }
 
-       if (rc != 0) {
-               return rc;
-       }
-
-       cache->c_eiused--;
+       if ( rc == 0 ){
+               cache->c_eiused--;
 
-       /* lru */
-       LRU_DELETE( cache, e );
-       if ( e->bei_e ) cache->c_cursize--;
+               /* lru */
+               LRU_DELETE( cache, e );
+               if ( e->bei_e ) cache->c_cursize--;
+       }
 
-       /* free cache write lock */
-       ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock );
        bdb_cache_entryinfo_unlock( e->bei_parent );
 
-       return( 0 );
+       return( rc );
 }
 
 static void
@@ -1195,8 +1249,13 @@ bdb_cache_release_all( Cache *cache )
                cache->c_lruhead = cache->c_eifree->bei_lrunext;
                bdb_cache_entryinfo_destroy(cache->c_eifree);
        }
+       cache->c_cursize = 0;
+       cache->c_eiused = 0;
+       cache->c_leaves = 0;
+       cache->c_idtree = NULL;
        cache->c_lruhead = NULL;
        cache->c_lrutail = NULL;
+       cache->c_dntree.bei_kids = NULL;
 
        /* free lru mutex */
        ldap_pvt_thread_mutex_unlock( &cache->lru_mutex );
@@ -1235,9 +1294,14 @@ bdb_txn_free( void *key, void *data )
 static int
 bdb_txn_get( Operation *op, DB_ENV *env, DB_TXN **txn, int reset )
 {
-       int i, rc, lockid;
+       int i, rc;
        void *ctx, *data = NULL;
 
+       if ( slapMode & SLAP_TOOL_MODE ) {
+               *txn = NULL;
+               return 0;
+       }
+
        /* If no op was provided, try to find the ctx anyway... */
        if ( op ) {
                ctx = op->o_threadctx;
@@ -1259,7 +1323,17 @@ bdb_txn_get( Operation *op, DB_ENV *env, DB_TXN **txn, int reset )
        if ( ldap_pvt_thread_pool_getkey( ctx, ((char *)env)+1, &data, NULL ) ||
                data == NULL ) {
                for ( i=0, rc=1; rc != 0 && i<4; i++ ) {
-                       rc = TXN_BEGIN( env, NULL, txn, 0 );
+                       rc = TXN_BEGIN( env, NULL, txn, READ_TXN_FLAG );
+#if DB_VERSION_FULL == 0x04020034
+                       if ( rc == EINVAL && READ_TXN_FLAG ) {
+                               READ_TXN_FLAG = 0;
+                               Debug( LDAP_DEBUG_ANY,
+                                       "bdb_txn_get: BerkeleyDB 4.2.52 library needs TXN patch!\n",
+                                       0, 0, 0 );
+                               i--;
+                               continue;
+                       }
+#endif
                        if (rc) ldap_pvt_thread_yield();
                }
                if ( rc != 0) {
@@ -1284,15 +1358,15 @@ static void
 bdb_locker_id_free( void *key, void *data )
 {
        DB_ENV *env = key;
-       int lockid = (int) data;
+       u_int32_t lockid = (long)data;
        int rc;
 
        rc = XLOCK_ID_FREE( env, lockid );
        if ( rc == EINVAL ) {
                DB_LOCKREQ lr;
                Debug( LDAP_DEBUG_ANY,
-                       "bdb_locker_id_free: %d err %s(%d)\n",
-                       lockid, db_strerror(rc), rc );
+                       "bdb_locker_id_free: %lu err %s(%d)\n",
+                       (unsigned long) lockid, db_strerror(rc), rc );
                /* release all locks held by this locker. */
                lr.op = DB_LOCK_PUT_ALL;
                lr.obj = NULL;
@@ -1302,9 +1376,10 @@ bdb_locker_id_free( void *key, void *data )
 }
 
 int
-bdb_locker_id( Operation *op, DB_ENV *env, int *locker )
+bdb_locker_id( Operation *op, DB_ENV *env, u_int32_t *locker )
 {
-       int i, rc, lockid;
+       int i, rc;
+       u_int32_t lockid;
        void *data;
        void *ctx;
 
@@ -1331,7 +1406,7 @@ bdb_locker_id( Operation *op, DB_ENV *env, int *locker )
                if ( rc != 0) {
                        return rc;
                }
-               data = (void *)lockid;
+               data = (void *)((long)lockid);
                if ( ( rc = ldap_pvt_thread_pool_setkey( ctx, env,
                        data, bdb_locker_id_free ) ) ) {
                        XLOCK_ID_FREE( env, lockid );
@@ -1341,12 +1416,12 @@ bdb_locker_id( Operation *op, DB_ENV *env, int *locker )
                        return rc;
                }
        } else {
-               lockid = (int)data;
+               lockid = (long)data;
        }
        *locker = lockid;
        return 0;
 }
-#endif
+#endif /* BDB_REUSE_LOCKERS */
 
 void
 bdb_cache_delete_entry(