X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=servers%2Fslapd%2Fback-bdb%2Fcache.c;h=5c9dc31490405d4a4e79bcadc1cc97e1e9213e57;hb=822888eafcbeb0814cacf9155b8f6e0ae3822fda;hp=8e39d952dffd3a597e9f0dbd6b248a5ef8f14090;hpb=af23be36751dcb34555f7ac7092f6edf4c3a4171;p=openldap diff --git a/servers/slapd/back-bdb/cache.c b/servers/slapd/back-bdb/cache.c index 8e39d952df..5c9dc31490 100644 --- a/servers/slapd/back-bdb/cache.c +++ b/servers/slapd/back-bdb/cache.c @@ -2,7 +2,7 @@ /* $OpenLDAP$ */ /* This work is part of OpenLDAP Software . * - * Copyright 2000-2005 The OpenLDAP Foundation. + * Copyright 2000-2006 The OpenLDAP Foundation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,9 +26,12 @@ #include "back-bdb.h" +#include "ldap_rq.h" + #ifdef BDB_HIER -#define bdb_cache_lru_add hdb_cache_lru_add +#define bdb_cache_lru_purge hdb_cache_lru_purge #endif +static void bdb_cache_lru_purge( struct bdb_info *bdb ); static int bdb_cache_delete_internal(Cache *cache, EntryInfo *e, int decr); #ifdef LDAP_DEBUG @@ -37,41 +40,94 @@ static void bdb_lru_print(Cache *cache); #endif #endif +/* For concurrency experiments only! */ +#if 0 +#define ldap_pvt_thread_rdwr_wlock(a) 0 +#define ldap_pvt_thread_rdwr_wunlock(a) 0 +#define ldap_pvt_thread_rdwr_rlock(a) 0 +#define ldap_pvt_thread_rdwr_runlock(a) 0 +#endif + +#if 0 +#define ldap_pvt_thread_mutex_trylock(a) 0 +#endif + static EntryInfo * bdb_cache_entryinfo_new( Cache *cache ) { EntryInfo *ei = NULL; if ( cache->c_eifree ) { - ldap_pvt_thread_rdwr_wlock( &cache->c_rwlock ); + ldap_pvt_thread_mutex_lock( &cache->c_eifree_mutex ); if ( cache->c_eifree ) { ei = cache->c_eifree; cache->c_eifree = ei->bei_lrunext; } - ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock ); + ldap_pvt_thread_mutex_unlock( &cache->c_eifree_mutex ); } - if ( ei ) { - ei->bei_lrunext = NULL; - ei->bei_state = 0; - } else { - ei = ch_calloc(1, sizeof(struct bdb_entry_info)); + if ( !ei ) { + ei = ch_calloc(1, sizeof(EntryInfo)); ldap_pvt_thread_mutex_init( &ei->bei_kids_mutex ); } + ei->bei_state = CACHE_ENTRY_REFERENCED; + return ei; } +/* Note - we now use a Second-Chance / Clock algorithm instead of + * Least-Recently-Used. This tremendously improves concurrency + * because we no longer need to manipulate the lists every time an + * entry is touched. We only need to lock the lists when adding + * or deleting an entry. It's now a circular doubly-linked list. + * We always append to the tail, but the head traverses the circle + * during a purge operation. + */ +static void +bdb_cache_lru_link( Cache *cache, EntryInfo *ei ) +{ + /* Insert into circular LRU list */ + ldap_pvt_thread_mutex_lock( &cache->lru_tail_mutex ); + ei->bei_lruprev = cache->c_lrutail; + if ( cache->c_lrutail ) { + ei->bei_lrunext = cache->c_lrutail->bei_lrunext; + cache->c_lrutail->bei_lrunext = ei; + if ( ei->bei_lrunext ) + ei->bei_lrunext->bei_lruprev = ei; + } else { + ei->bei_lrunext = ei->bei_lruprev = ei; + cache->c_lruhead = ei; + } + cache->c_lrutail = ei; + ldap_pvt_thread_mutex_unlock( &cache->lru_tail_mutex ); +} + +#ifdef NO_THREADS +#define NO_DB_LOCK +#endif + +/* #define NO_DB_LOCK 1 */ +/* Note: The BerkeleyDB locks are much slower than regular + * mutexes or rdwr locks. But the BDB implementation has the + * advantage of using a fixed size lock table, instead of + * allocating a lock object per entry in the DB. That's a + * key benefit for scaling. It also frees us from worrying + * about undetectable deadlocks between BDB activity and our + * own cache activity. It's still worth exploring faster + * alternatives though. + */ + /* Atomically release and reacquire a lock */ int bdb_cache_entry_db_relock( - DB_ENV *env, + struct bdb_info *bdb, u_int32_t locker, EntryInfo *ei, int rw, int tryOnly, DB_LOCK *lock ) { -#ifdef NO_THREADS +#ifdef NO_DB_LOCK return 0; #else int rc; @@ -89,7 +145,7 @@ bdb_cache_entry_db_relock( list[1].lock = *lock; list[1].mode = rw ? DB_LOCK_WRITE : DB_LOCK_READ; list[1].obj = &lockobj; - rc = env->lock_vec(env, locker, tryOnly ? DB_LOCK_NOWAIT : 0, + rc = bdb->bi_dbenv->lock_vec(bdb->bi_dbenv, locker, tryOnly ? DB_LOCK_NOWAIT : 0, list, 2, NULL ); if (rc && !tryOnly) { @@ -104,10 +160,10 @@ bdb_cache_entry_db_relock( } static int -bdb_cache_entry_db_lock( DB_ENV *env, u_int32_t locker, EntryInfo *ei, +bdb_cache_entry_db_lock( struct bdb_info *bdb, u_int32_t locker, EntryInfo *ei, int rw, int tryOnly, DB_LOCK *lock ) { -#ifdef NO_THREADS +#ifdef NO_DB_LOCK return 0; #else int rc; @@ -124,7 +180,7 @@ bdb_cache_entry_db_lock( DB_ENV *env, u_int32_t locker, EntryInfo *ei, lockobj.data = &ei->bei_id; lockobj.size = sizeof(ei->bei_id) + 1; - rc = LOCK_GET(env, locker, tryOnly ? DB_LOCK_NOWAIT : 0, + rc = LOCK_GET(bdb->bi_dbenv, locker, tryOnly ? DB_LOCK_NOWAIT : 0, &lockobj, db_rw, lock); if (rc && !tryOnly) { Debug( LDAP_DEBUG_TRACE, @@ -132,20 +188,20 @@ bdb_cache_entry_db_lock( DB_ENV *env, u_int32_t locker, EntryInfo *ei, ei->bei_id, rw, rc ); } return rc; -#endif /* NO_THREADS */ +#endif /* NO_DB_LOCK */ } int -bdb_cache_entry_db_unlock ( DB_ENV *env, DB_LOCK *lock ) +bdb_cache_entry_db_unlock ( struct bdb_info *bdb, DB_LOCK *lock ) { -#ifdef NO_THREADS +#ifdef NO_DB_LOCK return 0; #else int rc; - if ( !lock ) return 0; + if ( !lock || lock->mode == DB_LOCK_NG ) return 0; - rc = LOCK_PUT ( env, lock ); + rc = LOCK_PUT ( bdb->bi_dbenv, lock ); return rc; #endif } @@ -162,32 +218,6 @@ bdb_cache_entryinfo_destroy( EntryInfo *e ) return 0; } -#define LRU_DELETE( cache, ei ) do { \ - if ( (ei)->bei_lruprev != NULL ) { \ - (ei)->bei_lruprev->bei_lrunext = (ei)->bei_lrunext; \ - } else { \ - (cache)->c_lruhead = (ei)->bei_lrunext; \ - } \ - if ( (ei)->bei_lrunext != NULL ) { \ - (ei)->bei_lrunext->bei_lruprev = (ei)->bei_lruprev; \ - } else { \ - (cache)->c_lrutail = (ei)->bei_lruprev; \ - } \ - (ei)->bei_lrunext = (ei)->bei_lruprev = NULL; \ -} while(0) - -#define LRU_ADD( cache, ei ) do { \ - (ei)->bei_lrunext = (cache)->c_lruhead; \ - if ( (ei)->bei_lrunext != NULL ) { \ - (ei)->bei_lrunext->bei_lruprev = (ei); \ - } \ - (cache)->c_lruhead = (ei); \ - (ei)->bei_lruprev = NULL; \ - if ( (cache)->c_lrutail == NULL ) { \ - (cache)->c_lrutail = (ei); \ - } \ -} while(0) - /* Do a length-ordered sort on normalized RDNs */ static int bdb_rdn_cmp( const void *v_e1, const void *v_e2 ) @@ -262,6 +292,7 @@ bdb_entryinfo_add_internal( ei->bei_parent->bei_ckids++; #endif } + bdb_cache_lru_link( &bdb->bi_cache, ei2 ); *res = ei2; return 0; @@ -310,6 +341,7 @@ bdb_cache_find_ndn( } for ( bdb_cache_entryinfo_lock( eip ); eip; ) { + eip->bei_state |= CACHE_ENTRY_REFERENCED; ei.bei_parent = eip; ei2 = (EntryInfo *)avl_find( eip->bei_kids, &ei, bdb_rdn_cmp ); if ( !ei2 ) { @@ -387,7 +419,6 @@ hdb_cache_find_parent( struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; EntryInfo ei, eip, *ei2 = NULL, *ein = NULL, *eir = NULL; int rc; - int addlru = 0; ei.bei_id = id; ei.bei_kids = NULL; @@ -413,7 +444,7 @@ hdb_cache_find_parent( ei.bei_ckids = 0; /* This node is not fully connected yet */ - ein->bei_state = CACHE_ENTRY_NOT_LINKED; + ein->bei_state |= CACHE_ENTRY_NOT_LINKED; /* Insert this node into the ID tree */ ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock ); @@ -435,15 +466,9 @@ hdb_cache_find_parent( ein->bei_ckids++; bdb_cache_entryinfo_unlock( ein ); } - addlru = 0; - - } - if ( addlru ) { - ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_mutex ); - LRU_ADD( &bdb->bi_cache, ein ); - ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_mutex ); + } else { + bdb_cache_lru_link( &bdb->bi_cache, ein ); } - addlru = 1; /* If this is the first time, save this node * to be returned later. @@ -469,15 +494,18 @@ hdb_cache_find_parent( if ( ei2 ) { bdb_cache_entryinfo_lock( ei2 ); ein->bei_parent = ei2; + avl_insert( &ei2->bei_kids, (caddr_t)ein, bdb_rdn_cmp, avl_dup_error); ei2->bei_ckids++; - bdb_cache_entryinfo_unlock( ei2 ); - bdb_cache_entryinfo_lock( eir ); /* Reset all the state info */ for (ein = eir; ein != ei2; ein=ein->bei_parent) ein->bei_state &= ~CACHE_ENTRY_NOT_LINKED; + + bdb_cache_entryinfo_unlock( ei2 ); + bdb_cache_entryinfo_lock( eir ); + *res = eir; break; } @@ -526,94 +554,108 @@ int hdb_cache_load( } #endif -/* caller must have lru_mutex locked. mutex - * will be unlocked on return. - */ static void -bdb_cache_lru_add( - struct bdb_info *bdb, - u_int32_t locker, - EntryInfo *ei ) +bdb_cache_lru_purge( struct bdb_info *bdb ) { DB_LOCK lock, *lockp; + EntryInfo *elru, *elnext; + int count, islocked; + + /* Don't bother if we can't get the lock */ + if ( ldap_pvt_thread_mutex_trylock( &bdb->bi_cache.lru_head_mutex ) ) + return; - if ( locker ) { + if ( bdb->bi_cache.c_cursize <= bdb->bi_cache.c_maxsize ) { + ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_head_mutex ); + return; + } + + if ( bdb->bi_cache.c_locker ) { lockp = &lock; } else { lockp = NULL; } - /* See if we're above the cache size limit */ - if ( bdb->bi_cache.c_cursize > bdb->bi_cache.c_maxsize ) { - EntryInfo *elru, *elprev; - int i = 0; + count = 0; + /* Look for an unused entry to remove */ + for (elru = bdb->bi_cache.c_lruhead; elru; elru = elnext ) { + elnext = elru->bei_lrunext; - /* Look for an unused entry to remove */ - for (elru = bdb->bi_cache.c_lrutail; elru; elru = elprev, i++ ) { - elprev = elru->bei_lruprev; + if ( ldap_pvt_thread_mutex_trylock( &elru->bei_kids_mutex )) + continue; - /* Too many probes, not enough idle, give up */ - if (i > 10) - break; + /* This flag implements the clock replacement behavior */ + if ( elru->bei_state & ( CACHE_ENTRY_REFERENCED )) { + elru->bei_state &= ~CACHE_ENTRY_REFERENCED; + bdb_cache_entryinfo_unlock( elru ); + continue; + } - /* If we can successfully writelock it, then - * the object is idle. - */ - if ( bdb_cache_entry_db_lock( bdb->bi_dbenv, - bdb->bi_cache.c_locker, elru, 1, 1, lockp ) == 0 ) { + /* If this node is in the process of linking into the cache, + * or this node is being deleted, skip it. + * + * Also, if this node has no entry attached, skip it, there's + * nothing to purge anyway. + */ + if (( elru->bei_state & ( CACHE_ENTRY_NOT_LINKED | + CACHE_ENTRY_DELETED | CACHE_ENTRY_LOADING )) || + !elru->bei_e ) { + bdb_cache_entryinfo_unlock( elru ); + continue; + } - int stop = 0, decr = 0; + /* entryinfo is locked */ + islocked = 1; - /* If this node is in the process of linking into the cache, - * or this node is being deleted, skip it. - */ - if ( elru->bei_state & - ( CACHE_ENTRY_NOT_LINKED | CACHE_ENTRY_DELETED )) { - bdb_cache_entry_db_unlock( bdb->bi_dbenv, lockp ); - continue; - } - /* Free entry for this node if it's present */ - if ( elru->bei_e ) { - elru->bei_e->e_private = NULL; + /* If we can successfully writelock it, then + * the object is idle. + */ + if ( bdb_cache_entry_db_lock( bdb, + bdb->bi_cache.c_locker, elru, 1, 1, lockp ) == 0 ) { + + /* Free entry for this node if it's present */ + if ( elru->bei_e ) { + elru->bei_e->e_private = NULL; #ifdef SLAP_ZONE_ALLOC - bdb_entry_return( bdb, elru->bei_e, elru->bei_zseq ); + bdb_entry_return( bdb, elru->bei_e, elru->bei_zseq ); #else - bdb_entry_return( elru->bei_e ); + bdb_entry_return( elru->bei_e ); #endif - elru->bei_e = NULL; - decr = 1; - } - /* ITS#4010 if we're in slapcat, and this node is a leaf - * node, free it. - * - * FIXME: we need to do this for slapd as well, (which is - * why we compute bi_cache.c_leaves now) but at the moment - * we can't because it causes unresolvable deadlocks. - */ - if ( slapMode & SLAP_TOOL_READONLY ) { - if ( !elru->bei_kids ) { - /* This does LRU_DELETE for us */ - bdb_cache_delete_internal( &bdb->bi_cache, elru, 0 ); - bdb_cache_delete_cleanup( &bdb->bi_cache, elru ); - } - /* Leave node on LRU list for a future pass */ - } else { - LRU_DELETE( &bdb->bi_cache, elru ); + elru->bei_e = NULL; + count++; + } + bdb_cache_entry_db_unlock( bdb, lockp ); + + /* ITS#4010 if we're in slapcat, and this node is a leaf + * node, free it. + * + * FIXME: we need to do this for slapd as well, (which is + * why we compute bi_cache.c_leaves now) but at the moment + * we can't because it causes unresolvable deadlocks. + */ + if ( slapMode & SLAP_TOOL_READONLY ) { + if ( !elru->bei_kids ) { + bdb_cache_delete_internal( &bdb->bi_cache, elru, 0 ); + bdb_cache_delete_cleanup( &bdb->bi_cache, elru ); + islocked = 0; } - bdb_cache_entry_db_unlock( bdb->bi_dbenv, lockp ); - - ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock ); - if ( decr ) - --bdb->bi_cache.c_cursize; - if (bdb->bi_cache.c_cursize <= bdb->bi_cache.c_maxsize) - stop = 1; - ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); - if (stop) break; + /* Leave node on LRU list for a future pass */ } } + + if ( islocked ) + bdb_cache_entryinfo_unlock( elru ); + + if ( count >= bdb->bi_cache.c_minfree ) { + ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_count_mutex ); + bdb->bi_cache.c_cursize -= count; + ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_count_mutex ); + break; + } } - LRU_ADD( &bdb->bi_cache, ei ); - ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_mutex ); + + bdb->bi_cache.c_lruhead = elnext; + ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_head_mutex ); } EntryInfo * @@ -702,6 +744,7 @@ again: ldap_pvt_thread_rdwr_rlock( &bdb->bi_cache.c_rwlock ); &ep->e_nname, eip ); if ( *eip ) islocked = 1; if ( rc ) { + ep->e_private = NULL; #ifdef SLAP_ZONE_ALLOC bdb_entry_return( bdb, ep, (*eip)->bei_zseq ); #else @@ -712,12 +755,12 @@ again: ldap_pvt_thread_rdwr_rlock( &bdb->bi_cache.c_rwlock ); } #else rc = hdb_cache_find_parent(op, tid, locker, id, eip ); - if ( rc == 0 && *eip ) islocked = 1; + if ( rc == 0 ) islocked = 1; #endif } /* Ok, we found the info, do we have the entry? */ - if ( *eip && rc == 0 ) { + if ( rc == 0 ) { if ( (*eip)->bei_state & CACHE_ENTRY_DELETED ) { rc = DB_NOTFOUND; } else { @@ -738,19 +781,13 @@ load1: bdb_cache_entryinfo_unlock( *eip ); islocked = 0; } - rc = bdb_cache_entry_db_lock( bdb->bi_dbenv, locker, *eip, 0, 0, lock ); + rc = bdb_cache_entry_db_lock( bdb, locker, *eip, load, 0, lock ); if ( (*eip)->bei_state & CACHE_ENTRY_DELETED ) { rc = DB_NOTFOUND; - bdb_cache_entry_db_unlock( bdb->bi_dbenv, lock ); + bdb_cache_entry_db_unlock( bdb, lock ); } else if ( rc == 0 ) { if ( load ) { - /* Give up original read lock, obtain write lock - */ - if ( rc == 0 ) { - rc = bdb_cache_entry_db_relock( bdb->bi_dbenv, locker, - *eip, 1, 0, lock ); - } - if ( rc == 0 && !ep) { + if ( !ep) { rc = bdb_id2entry( op->o_bd, tid, locker, id, &ep ); } if ( rc == 0 ) { @@ -764,21 +801,22 @@ load1: #endif ep = NULL; } + bdb_cache_entryinfo_lock( *eip ); (*eip)->bei_state ^= CACHE_ENTRY_LOADING; + bdb_cache_entryinfo_unlock( *eip ); if ( rc == 0 ) { /* If we succeeded, downgrade back to a readlock. */ - rc = bdb_cache_entry_db_relock( bdb->bi_dbenv, locker, + rc = bdb_cache_entry_db_relock( bdb, locker, *eip, 0, 0, lock ); } else { /* Otherwise, release the lock. */ - bdb_cache_entry_db_unlock( bdb->bi_dbenv, lock ); + bdb_cache_entry_db_unlock( bdb, lock ); } } else if ( !(*eip)->bei_e ) { /* Some other thread is trying to load the entry, - * give it a chance to finish. + * wait for it to finish. */ - bdb_cache_entry_db_unlock( bdb->bi_dbenv, lock ); - ldap_pvt_thread_yield(); + bdb_cache_entry_db_unlock( bdb, lock ); bdb_cache_entryinfo_lock( *eip ); islocked = 1; goto load1; @@ -788,12 +826,12 @@ load1: */ rc = bdb_fix_dn( (*eip)->bei_e, 1 ); if ( rc ) { - bdb_cache_entry_db_relock( bdb->bi_dbenv, + bdb_cache_entry_db_relock( bdb, locker, *eip, 1, 0, lock ); /* check again in case other modifier did it already */ if ( bdb_fix_dn( (*eip)->bei_e, 1 ) ) rc = bdb_fix_dn( (*eip)->bei_e, 2 ); - bdb_cache_entry_db_relock( bdb->bi_dbenv, + bdb_cache_entry_db_relock( bdb, locker, *eip, 0, 0, lock ); } #endif @@ -806,6 +844,7 @@ load1: bdb_cache_entryinfo_unlock( *eip ); } if ( ep ) { + ep->e_private = NULL; #ifdef SLAP_ZONE_ALLOC bdb_entry_return( bdb, ep, (*eip)->bei_zseq ); #else @@ -813,30 +852,17 @@ load1: #endif } if ( rc == 0 ) { + int purge = 0; if ( load ) { - ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock ); + ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_count_mutex ); bdb->bi_cache.c_cursize++; - ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); - } - - ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_mutex ); - - /* If the LRU list has only one entry and this is it, it - * doesn't need to be added again. - */ - if ( bdb->bi_cache.c_lruhead == bdb->bi_cache.c_lrutail && - bdb->bi_cache.c_lruhead == *eip ) { - ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_mutex ); - } else { - - /* if entry is on LRU list, remove from old spot */ - if ( (*eip)->bei_lrunext || (*eip)->bei_lruprev ) { - LRU_DELETE( &bdb->bi_cache, *eip ); - } - /* lru_mutex is unlocked for us */ - bdb_cache_lru_add( bdb, locker, *eip ); + if ( bdb->bi_cache.c_cursize > bdb->bi_cache.c_maxsize ) + purge = 1; + ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_count_mutex ); } + if ( purge ) + bdb_cache_lru_purge( bdb ); } #ifdef SLAP_ZONE_ALLOC @@ -876,11 +902,11 @@ bdb_cache_add( EntryInfo *eip, Entry *e, struct berval *nrdn, - u_int32_t locker ) + u_int32_t locker, + DB_LOCK *lock ) { EntryInfo *new, ei; - DB_LOCK lock; - int rc; + int rc, purge = 0; #ifdef BDB_HIER struct berval rdn = e->e_name; #endif @@ -893,7 +919,7 @@ bdb_cache_add( /* Lock this entry so that bdb_add can run to completion. * It can only fail if BDB has run out of lock resources. */ - rc = bdb_cache_entry_db_lock( bdb->bi_dbenv, locker, &ei, 1, 0, &lock ); + rc = bdb_cache_entry_db_lock( bdb, locker, &ei, 0, 0, lock ); if ( rc ) { bdb_cache_entryinfo_unlock( eip ); return rc; @@ -921,37 +947,38 @@ bdb_cache_add( } new->bei_e = e; e->e_private = new; - new->bei_state = CACHE_ENTRY_NO_KIDS | CACHE_ENTRY_NO_GRANDKIDS; + new->bei_state |= CACHE_ENTRY_NO_KIDS | CACHE_ENTRY_NO_GRANDKIDS; eip->bei_state &= ~CACHE_ENTRY_NO_KIDS; if (eip->bei_parent) { eip->bei_parent->bei_state &= ~CACHE_ENTRY_NO_GRANDKIDS; } bdb_cache_entryinfo_unlock( eip ); - ++bdb->bi_cache.c_cursize; ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); + ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_count_mutex ); + ++bdb->bi_cache.c_cursize; + if ( bdb->bi_cache.c_cursize > bdb->bi_cache.c_maxsize ) + purge = 1; + ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_count_mutex ); - /* set lru mutex */ - ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_mutex ); - - /* lru_mutex is unlocked for us */ - bdb_cache_lru_add( bdb, locker, new ); + if ( purge ) + bdb_cache_lru_purge( bdb ); return rc; } int bdb_cache_modify( + struct bdb_info *bdb, Entry *e, Attribute *newAttrs, - DB_ENV *env, u_int32_t locker, DB_LOCK *lock ) { EntryInfo *ei = BEI(e); int rc; /* Get write lock on data */ - rc = bdb_cache_entry_db_relock( env, locker, ei, 1, 0, lock ); + rc = bdb_cache_entry_db_relock( bdb, locker, ei, 1, 0, lock ); /* If we've done repeated mods on a cached entry, then e_attrs * is no longer contiguous with the entry, and must be freed. @@ -985,7 +1012,7 @@ bdb_cache_modrdn( #endif /* Get write lock on data */ - rc = bdb_cache_entry_db_relock( bdb->bi_dbenv, locker, ei, 1, 0, lock ); + rc = bdb_cache_entry_db_relock( bdb, locker, ei, 1, 0, lock ); if ( rc ) return rc; /* If we've done repeated mods on a cached entry, then e_attrs @@ -1010,6 +1037,10 @@ bdb_cache_modrdn( avl_delete( &pei->bei_kids, (caddr_t) ei, bdb_rdn_cmp ); free( ei->bei_nrdn.bv_val ); ber_dupbv( &ei->bei_nrdn, nrdn ); + + if ( !pei->bei_kids ) + pei->bei_state |= CACHE_ENTRY_NO_KIDS | CACHE_ENTRY_NO_GRANDKIDS; + #ifdef BDB_HIER free( ei->bei_rdn.bv_val ); @@ -1020,6 +1051,8 @@ bdb_cache_modrdn( rdn.bv_len = ptr - rdn.bv_val; } ber_dupbv( &ei->bei_rdn, &rdn ); + pei->bei_ckids--; + if ( pei->bei_dkids ) pei->bei_dkids--; #endif if (!ein) { @@ -1029,7 +1062,16 @@ bdb_cache_modrdn( bdb_cache_entryinfo_unlock( pei ); bdb_cache_entryinfo_lock( ein ); } + /* parent now has kids */ + if ( ein->bei_state & CACHE_ENTRY_NO_KIDS ) + ein->bei_state ^= CACHE_ENTRY_NO_KIDS; + #ifdef BDB_HIER + /* parent might now have grandkids */ + if ( ein->bei_state & CACHE_ENTRY_NO_GRANDKIDS && + !(ei->bei_state & (CACHE_ENTRY_NO_KIDS))) + ein->bei_state ^= CACHE_ENTRY_NO_GRANDKIDS; + { /* Record the generation number of this change */ ldap_pvt_thread_mutex_lock( &bdb->bi_modrdns_mutex ); @@ -1037,6 +1079,8 @@ bdb_cache_modrdn( ei->bei_modrdns = bdb->bi_modrdns; ldap_pvt_thread_mutex_unlock( &bdb->bi_modrdns_mutex ); } + ein->bei_ckids++; + if ( ein->bei_dkids ) ein->bei_dkids++; #endif avl_insert( &ein->bei_kids, ei, bdb_rdn_cmp, avl_dup_error ); bdb_cache_entryinfo_unlock( ein ); @@ -1051,9 +1095,8 @@ bdb_cache_modrdn( */ int bdb_cache_delete( - Cache *cache, + struct bdb_info *bdb, Entry *e, - DB_ENV *env, u_int32_t locker, DB_LOCK *lock ) { @@ -1069,7 +1112,7 @@ bdb_cache_delete( bdb_cache_entryinfo_lock( ei ); /* Get write lock on the data */ - rc = bdb_cache_entry_db_relock( env, locker, ei, 1, 0, lock ); + rc = bdb_cache_entry_db_relock( bdb, locker, ei, 1, 0, lock ); if ( rc ) { /* couldn't lock, undo and give up */ ei->bei_state ^= CACHE_ENTRY_DELETED; @@ -1081,18 +1124,12 @@ bdb_cache_delete( e->e_id, 0, 0 ); /* set lru mutex */ - ldap_pvt_thread_mutex_lock( &cache->lru_mutex ); - - /* set cache write lock */ - ldap_pvt_thread_rdwr_wlock( &cache->c_rwlock ); + ldap_pvt_thread_mutex_lock( &bdb->bi_cache.lru_head_mutex ); - rc = bdb_cache_delete_internal( cache, e->e_private, 1 ); - - /* free cache write lock */ - ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock ); + rc = bdb_cache_delete_internal( &bdb->bi_cache, e->e_private, 1 ); /* free lru mutex */ - ldap_pvt_thread_mutex_unlock( &cache->lru_mutex ); + ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.lru_head_mutex ); /* Leave entry info locked */ @@ -1127,10 +1164,10 @@ bdb_cache_delete_cleanup( ei->bei_kids = NULL; ei->bei_lruprev = NULL; - ldap_pvt_thread_rdwr_wlock( &cache->c_rwlock ); + ldap_pvt_thread_mutex_lock( &cache->c_eifree_mutex ); ei->bei_lrunext = cache->c_eifree; cache->c_eifree = ei; - ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock ); + ldap_pvt_thread_mutex_unlock( &cache->c_eifree_mutex ); bdb_cache_entryinfo_unlock( ei ); } @@ -1141,6 +1178,7 @@ bdb_cache_delete_internal( int decr ) { int rc = 0; /* return code */ + int decr_leaf = 0; /* Lock the parent's kids tree */ bdb_cache_entryinfo_lock( e->bei_parent ); @@ -1156,22 +1194,39 @@ bdb_cache_delete_internal( rc = -1; } if ( e->bei_parent->bei_kids ) - cache->c_leaves--; + decr_leaf = 1; + bdb_cache_entryinfo_unlock( e->bei_parent ); + + ldap_pvt_thread_rdwr_wlock( &cache->c_rwlock ); /* id tree */ - if ( avl_delete( &cache->c_idtree, (caddr_t) e, bdb_id_cmp ) == NULL ) { + if ( avl_delete( &cache->c_idtree, (caddr_t) e, bdb_id_cmp )) { + cache->c_eiused--; + if ( decr_leaf ) + cache->c_leaves--; + } else { rc = -1; } + ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock ); if ( rc == 0 ){ - cache->c_eiused--; - /* lru */ - LRU_DELETE( cache, e ); - if ( e->bei_e ) cache->c_cursize--; - } + if ( e == cache->c_lruhead ) cache->c_lruhead = e->bei_lrunext; + if ( e == cache->c_lrutail ) { + ldap_pvt_thread_mutex_lock( &cache->lru_tail_mutex ); + if ( e == cache->c_lrutail ) cache->c_lrutail = e->bei_lruprev; + ldap_pvt_thread_mutex_unlock( &cache->lru_tail_mutex ); + } - bdb_cache_entryinfo_unlock( e->bei_parent ); + if ( e->bei_lrunext ) e->bei_lrunext->bei_lruprev = e->bei_lruprev; + if ( e->bei_lruprev ) e->bei_lruprev->bei_lrunext = e->bei_lrunext; + + if ( e->bei_e ) { + ldap_pvt_thread_mutex_lock( &cache->c_count_mutex ); + cache->c_cursize--; + ldap_pvt_thread_mutex_unlock( &cache->c_count_mutex ); + } + } return( rc ); } @@ -1200,7 +1255,7 @@ bdb_cache_release_all( Cache *cache ) /* set cache write lock */ ldap_pvt_thread_rdwr_wlock( &cache->c_rwlock ); /* set lru mutex */ - ldap_pvt_thread_mutex_lock( &cache->lru_mutex ); + ldap_pvt_thread_mutex_lock( &cache->lru_tail_mutex ); Debug( LDAP_DEBUG_TRACE, "====> bdb_cache_release_all\n", 0, 0, 0 ); @@ -1219,7 +1274,7 @@ bdb_cache_release_all( Cache *cache ) cache->c_dntree.bei_kids = NULL; /* free lru mutex */ - ldap_pvt_thread_mutex_unlock( &cache->lru_mutex ); + ldap_pvt_thread_mutex_unlock( &cache->lru_tail_mutex ); /* free cache write lock */ ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock ); } @@ -1231,15 +1286,22 @@ bdb_lru_print( Cache *cache ) { EntryInfo *e; - fprintf( stderr, "LRU queue (head to tail):\n" ); - for ( e = cache->c_lruhead; e != NULL; e = e->bei_lrunext ) { - fprintf( stderr, "\trdn \"%20s\" id %ld\n", - e->bei_nrdn.bv_val, e->bei_id ); + fprintf( stderr, "LRU circle head: %p\n", cache->c_lruhead ); + fprintf( stderr, "LRU circle (tail forward):\n" ); + for ( e = cache->c_lrutail; ; ) { + fprintf( stderr, "\t%p, %p id %ld rdn \"%s\"\n", + e, e->bei_e, e->bei_id, e->bei_nrdn.bv_val ); + e = e->bei_lrunext; + if ( e == cache->c_lrutail ) + break; } - fprintf( stderr, "LRU queue (tail to head):\n" ); - for ( e = cache->c_lrutail; e != NULL; e = e->bei_lruprev ) { - fprintf( stderr, "\trdn \"%20s\" id %ld\n", - e->bei_nrdn.bv_val, e->bei_id ); + fprintf( stderr, "LRU circle (tail backward):\n" ); + for ( e = cache->c_lrutail; ; ) { + fprintf( stderr, "\t%p, %p id %ld rdn \"%s\"\n", + e, e->bei_e, e->bei_id, e->bei_nrdn.bv_val ); + e = e->bei_lruprev; + if ( e == cache->c_lrutail ) + break; } } #endif @@ -1314,29 +1376,3 @@ bdb_locker_id( Operation *op, DB_ENV *env, u_int32_t *locker ) return 0; } #endif /* BDB_REUSE_LOCKERS */ - -void -bdb_cache_delete_entry( - struct bdb_info *bdb, - EntryInfo *ei, - u_int32_t locker, - DB_LOCK *lock ) -{ - ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock ); - if ( bdb_cache_entry_db_lock( bdb->bi_dbenv, bdb->bi_cache.c_locker, ei, 1, 1, lock ) == 0 ) - { - if ( ei->bei_e && !(ei->bei_state & CACHE_ENTRY_NOT_LINKED )) { - LRU_DELETE( &bdb->bi_cache, ei ); - ei->bei_e->e_private = NULL; -#ifdef SLAP_ZONE_ALLOC - bdb_entry_return( bdb, ei->bei_e, ei->bei_zseq ); -#else - bdb_entry_return( ei->bei_e ); -#endif - ei->bei_e = NULL; - --bdb->bi_cache.c_cursize; - } - bdb_cache_entry_db_unlock( bdb->bi_dbenv, lock ); - } - ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); -}