X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=servers%2Fslapd%2Fback-bdb%2Fidl.c;h=dad12bf56186af7dcb28f0460e38648fdbe6d3bb;hb=24db207196a453a4f9acdce08593c7e0ed53ce4c;hp=5d534a853385f3b6072cb9a38af167248a4fbde6;hpb=13af7fb0730906aa6a1323c14c07c9a8434170dc;p=openldap diff --git a/servers/slapd/back-bdb/idl.c b/servers/slapd/back-bdb/idl.c index 5d534a8533..dad12bf561 100644 --- a/servers/slapd/back-bdb/idl.c +++ b/servers/slapd/back-bdb/idl.c @@ -1,8 +1,17 @@ /* idl.c - ldap id list handling routines */ /* $OpenLDAP$ */ -/* - * Copyright 1998-2002 The OpenLDAP Foundation, All Rights Reserved. - * COPYING RESTRICTIONS APPLY, see COPYRIGHT file +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2007 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . */ #include "portable.h" @@ -18,10 +27,36 @@ #define IDL_CMP(x,y) ( x < y ? -1 : ( x > y ? 1 : 0 ) ) -#ifndef IDL_DEBUG - /* enable basic checks for now */ -#define IDL_DEBUG 1 -#endif +#define IDL_LRU_DELETE( bdb, e ) do { \ + if ( (e) == (bdb)->bi_idl_lru_head ) { \ + if ( (e)->idl_lru_next == (bdb)->bi_idl_lru_head ) { \ + (bdb)->bi_idl_lru_head = NULL; \ + } else { \ + (bdb)->bi_idl_lru_head = (e)->idl_lru_next; \ + } \ + } \ + if ( (e) == (bdb)->bi_idl_lru_tail ) { \ + if ( (e)->idl_lru_prev == (bdb)->bi_idl_lru_tail ) { \ + assert( (bdb)->bi_idl_lru_head == NULL ); \ + (bdb)->bi_idl_lru_tail = NULL; \ + } else { \ + (bdb)->bi_idl_lru_tail = (e)->idl_lru_prev; \ + } \ + } \ + (e)->idl_lru_next->idl_lru_prev = (e)->idl_lru_prev; \ + (e)->idl_lru_prev->idl_lru_next = (e)->idl_lru_next; \ +} while ( 0 ) + +static int +bdb_idl_entry_cmp( const void *v_idl1, const void *v_idl2 ) +{ + const bdb_idl_cache_entry_t *idl1 = v_idl1, *idl2 = v_idl2; + int rc; + + if ((rc = SLAP_PTRCMP( idl1->db, idl2->db ))) return rc; + if ((rc = idl1->kstr.bv_len - idl2->kstr.bv_len )) return rc; + return ( memcmp ( idl1->kstr.bv_val, idl2->kstr.bv_val , idl1->kstr.bv_len ) ); +} #if IDL_DEBUG > 0 static void idl_check( ID *ids ) @@ -125,7 +160,7 @@ unsigned bdb_idl_search( ID *ids, ID id ) int bdb_idl_insert( ID *ids, ID id ) { - unsigned x = bdb_idl_search( ids, id ); + unsigned x; #if IDL_DEBUG > 1 Debug( LDAP_DEBUG_ANY, "insert: %04lx at %d\n", (long) id, x, 0 ); @@ -134,6 +169,18 @@ int bdb_idl_insert( ID *ids, ID id ) idl_check( ids ); #endif + if (BDB_IDL_IS_RANGE( ids )) { + /* if already in range, treat as a dup */ + if (id >= BDB_IDL_FIRST(ids) && id <= BDB_IDL_LAST(ids)) + return -1; + if (id < BDB_IDL_FIRST(ids)) + ids[1] = id; + else if (id > BDB_IDL_LAST(ids)) + ids[2] = id; + return 0; + } + + x = bdb_idl_search( ids, id ); assert( x > 0 ); if( x < 1 ) { @@ -172,9 +219,9 @@ int bdb_idl_insert( ID *ids, ID id ) return 0; } -static int idl_delete( ID *ids, ID id ) +static int bdb_idl_delete( ID *ids, ID id ) { - unsigned x = bdb_idl_search( ids, id ); + unsigned x; #if IDL_DEBUG > 1 Debug( LDAP_DEBUG_ANY, "delete: %04lx at %d\n", (long) id, x, 0 ); @@ -183,6 +230,23 @@ static int idl_delete( ID *ids, ID id ) idl_check( ids ); #endif + if (BDB_IDL_IS_RANGE( ids )) { + /* If deleting a range boundary, adjust */ + if ( ids[1] == id ) + ids[1]++; + else if ( ids[2] == id ) + ids[2]--; + /* deleting from inside a range is a no-op */ + + /* If the range has collapsed, re-adjust */ + if ( ids[1] > ids[2] ) + ids[0] = 0; + else if ( ids[1] == ids[2] ) + ids[1] = 1; + return 0; + } + + x = bdb_idl_search( ids, id ); assert( x > 0 ); if( x <= 0 ) { @@ -212,58 +276,388 @@ static int idl_delete( ID *ids, ID id ) return 0; } +static char * +bdb_show_key( + DBT *key, + char *buf ) +{ + if ( key->size == 4 /* LUTIL_HASH_BYTES */ ) { + unsigned char *c = key->data; + sprintf( buf, "[%02x%02x%02x%02x]", c[0], c[1], c[2], c[3] ); + return buf; + } else { + return key->data; + } +} + +/* Find a db/key pair in the IDL cache. If ids is non-NULL, + * copy the cached IDL into it, otherwise just return the status. + */ +int +bdb_idl_cache_get( + struct bdb_info *bdb, + DB *db, + DBT *key, + ID *ids ) +{ + bdb_idl_cache_entry_t idl_tmp; + bdb_idl_cache_entry_t *matched_idl_entry; + int rc = LDAP_NO_SUCH_OBJECT; + + DBT2bv( key, &idl_tmp.kstr ); + idl_tmp.db = db; + ldap_pvt_thread_rdwr_rlock( &bdb->bi_idl_tree_rwlock ); + matched_idl_entry = avl_find( bdb->bi_idl_tree, &idl_tmp, + bdb_idl_entry_cmp ); + if ( matched_idl_entry != NULL ) { + if ( matched_idl_entry->idl && ids ) + BDB_IDL_CPY( ids, matched_idl_entry->idl ); + matched_idl_entry->idl_flags |= CACHE_ENTRY_REFERENCED; + if ( matched_idl_entry->idl ) + rc = LDAP_SUCCESS; + else + rc = DB_NOTFOUND; + } + ldap_pvt_thread_rdwr_runlock( &bdb->bi_idl_tree_rwlock ); + + return rc; +} + +void +bdb_idl_cache_put( + struct bdb_info *bdb, + DB *db, + DBT *key, + ID *ids, + int rc ) +{ + bdb_idl_cache_entry_t idl_tmp; + bdb_idl_cache_entry_t *ee, *eprev; + + if ( rc == DB_NOTFOUND || BDB_IDL_IS_ZERO( ids )) + return; + + DBT2bv( key, &idl_tmp.kstr ); + + ee = (bdb_idl_cache_entry_t *) ch_malloc( + sizeof( bdb_idl_cache_entry_t ) ); + ee->db = db; + ee->idl = (ID*) ch_malloc( BDB_IDL_SIZEOF ( ids ) ); + BDB_IDL_CPY( ee->idl, ids ); + + ee->idl_lru_prev = NULL; + ee->idl_lru_next = NULL; + ee->idl_flags = 0; + ber_dupbv( &ee->kstr, &idl_tmp.kstr ); + ldap_pvt_thread_rdwr_wlock( &bdb->bi_idl_tree_rwlock ); + if ( avl_insert( &bdb->bi_idl_tree, (caddr_t) ee, + bdb_idl_entry_cmp, avl_dup_error )) + { + ch_free( ee->kstr.bv_val ); + ch_free( ee->idl ); + ch_free( ee ); + ldap_pvt_thread_rdwr_wunlock( &bdb->bi_idl_tree_rwlock ); + return; + } + ldap_pvt_thread_mutex_lock( &bdb->bi_idl_tree_lrulock ); + /* LRU_ADD */ + if ( bdb->bi_idl_lru_head ) { + assert( bdb->bi_idl_lru_tail != NULL ); + assert( bdb->bi_idl_lru_head->idl_lru_prev != NULL ); + assert( bdb->bi_idl_lru_head->idl_lru_next != NULL ); + + ee->idl_lru_next = bdb->bi_idl_lru_head; + ee->idl_lru_prev = bdb->bi_idl_lru_head->idl_lru_prev; + bdb->bi_idl_lru_head->idl_lru_prev->idl_lru_next = ee; + bdb->bi_idl_lru_head->idl_lru_prev = ee; + } else { + ee->idl_lru_next = ee->idl_lru_prev = ee; + bdb->bi_idl_lru_tail = ee; + } + bdb->bi_idl_lru_head = ee; + + if ( ++bdb->bi_idl_cache_size > bdb->bi_idl_cache_max_size ) { + int i; + ee = bdb->bi_idl_lru_tail; + for ( i = 0; ee != NULL && i < 10; i++, ee = eprev ) { + eprev = ee->idl_lru_prev; + if ( eprev == ee ) { + eprev = NULL; + } + if ( ee->idl_flags & CACHE_ENTRY_REFERENCED ) { + ee->idl_flags ^= CACHE_ENTRY_REFERENCED; + continue; + } + if ( avl_delete( &bdb->bi_idl_tree, (caddr_t) ee, + bdb_idl_entry_cmp ) == NULL ) { + Debug( LDAP_DEBUG_ANY, "=> bdb_idl_cache_put: " + "AVL delete failed\n", + 0, 0, 0 ); + } + IDL_LRU_DELETE( bdb, ee ); + i++; + --bdb->bi_idl_cache_size; + ch_free( ee->kstr.bv_val ); + ch_free( ee->idl ); + ch_free( ee ); + } + bdb->bi_idl_lru_tail = eprev; + assert( bdb->bi_idl_lru_tail != NULL + || bdb->bi_idl_lru_head == NULL ); + } + ldap_pvt_thread_mutex_unlock( &bdb->bi_idl_tree_lrulock ); + ldap_pvt_thread_rdwr_wunlock( &bdb->bi_idl_tree_rwlock ); +} + +void +bdb_idl_cache_del( + struct bdb_info *bdb, + DB *db, + DBT *key ) +{ + bdb_idl_cache_entry_t *matched_idl_entry, idl_tmp; + DBT2bv( key, &idl_tmp.kstr ); + idl_tmp.db = db; + ldap_pvt_thread_rdwr_wlock( &bdb->bi_idl_tree_rwlock ); + matched_idl_entry = avl_find( bdb->bi_idl_tree, &idl_tmp, + bdb_idl_entry_cmp ); + if ( matched_idl_entry != NULL ) { + if ( avl_delete( &bdb->bi_idl_tree, (caddr_t) matched_idl_entry, + bdb_idl_entry_cmp ) == NULL ) { + Debug( LDAP_DEBUG_ANY, "=> bdb_idl_cache_del: " + "AVL delete failed\n", + 0, 0, 0 ); + } + --bdb->bi_idl_cache_size; + ldap_pvt_thread_mutex_lock( &bdb->bi_idl_tree_lrulock ); + IDL_LRU_DELETE( bdb, matched_idl_entry ); + ldap_pvt_thread_mutex_unlock( &bdb->bi_idl_tree_lrulock ); + free( matched_idl_entry->kstr.bv_val ); + if ( matched_idl_entry->idl ) + free( matched_idl_entry->idl ); + free( matched_idl_entry ); + } + ldap_pvt_thread_rdwr_wunlock( &bdb->bi_idl_tree_rwlock ); +} + +void +bdb_idl_cache_add_id( + struct bdb_info *bdb, + DB *db, + DBT *key, + ID id ) +{ + bdb_idl_cache_entry_t *cache_entry, idl_tmp; + DBT2bv( key, &idl_tmp.kstr ); + idl_tmp.db = db; + ldap_pvt_thread_rdwr_wlock( &bdb->bi_idl_tree_rwlock ); + cache_entry = avl_find( bdb->bi_idl_tree, &idl_tmp, + bdb_idl_entry_cmp ); + if ( cache_entry != NULL ) { + if ( !BDB_IDL_IS_RANGE( cache_entry->idl ) && + cache_entry->idl[0] < BDB_IDL_DB_MAX ) { + size_t s = BDB_IDL_SIZEOF( cache_entry->idl ) + sizeof(ID); + cache_entry->idl = ch_realloc( cache_entry->idl, s ); + } + bdb_idl_insert( cache_entry->idl, id ); + } + ldap_pvt_thread_rdwr_wunlock( &bdb->bi_idl_tree_rwlock ); +} + +void +bdb_idl_cache_del_id( + struct bdb_info *bdb, + DB *db, + DBT *key, + ID id ) +{ + bdb_idl_cache_entry_t *cache_entry, idl_tmp; + DBT2bv( key, &idl_tmp.kstr ); + idl_tmp.db = db; + ldap_pvt_thread_rdwr_wlock( &bdb->bi_idl_tree_rwlock ); + cache_entry = avl_find( bdb->bi_idl_tree, &idl_tmp, + bdb_idl_entry_cmp ); + if ( cache_entry != NULL ) { + bdb_idl_delete( cache_entry->idl, id ); + if ( cache_entry->idl[0] == 0 ) { + if ( avl_delete( &bdb->bi_idl_tree, (caddr_t) cache_entry, + bdb_idl_entry_cmp ) == NULL ) { + Debug( LDAP_DEBUG_ANY, "=> bdb_idl_cache_del: " + "AVL delete failed\n", + 0, 0, 0 ); + } + --bdb->bi_idl_cache_size; + ldap_pvt_thread_mutex_lock( &bdb->bi_idl_tree_lrulock ); + IDL_LRU_DELETE( bdb, cache_entry ); + ldap_pvt_thread_mutex_unlock( &bdb->bi_idl_tree_lrulock ); + free( cache_entry->kstr.bv_val ); + free( cache_entry->idl ); + free( cache_entry ); + } + } + ldap_pvt_thread_rdwr_wunlock( &bdb->bi_idl_tree_rwlock ); +} + int bdb_idl_fetch_key( BackendDB *be, DB *db, - DB_TXN *tid, + BDB_LOCKER locker, DBT *key, - ID *ids ) + ID *ids, + DBC **saved_cursor, + int get_flag ) { struct bdb_info *bdb = (struct bdb_info *) be->be_private; int rc; - DBT data; + DBT data, key2, *kptr; + DBC *cursor; + ID *i; + void *ptr; + size_t len; + int rc2; + int flags = bdb->bi_db_opflags | DB_MULTIPLE; + int opflag; + + /* If using BerkeleyDB 4.0, the buf must be large enough to + * grab the entire IDL in one get(), otherwise BDB will leak + * resources on subsequent get's. We can safely call get() + * twice - once for the data, and once to get the DB_NOTFOUND + * result meaning there's no more data. See ITS#2040 for details. + * This bug is fixed in BDB 4.1 so a smaller buffer will work if + * stack space is too limited. + * + * configure now requires Berkeley DB 4.1. + */ +#if DB_VERSION_FULL < 0x04010000 +# define BDB_ENOUGH 5 +#else + /* We sometimes test with tiny IDLs, and BDB always wants buffers + * that are at least one page in size. + */ +# if BDB_IDL_DB_SIZE < 4096 +# define BDB_ENOUGH 2048 +# else +# define BDB_ENOUGH 1 +# endif +#endif + ID buf[BDB_IDL_DB_SIZE*BDB_ENOUGH]; + + char keybuf[16]; + + Debug( LDAP_DEBUG_ARGS, + "bdb_idl_fetch_key: %s\n", + bdb_show_key( key, keybuf ), 0, 0 ); assert( ids != NULL ); + if ( saved_cursor && *saved_cursor ) { + opflag = DB_NEXT; + } else if ( get_flag == LDAP_FILTER_GE ) { + opflag = DB_SET_RANGE; + } else if ( get_flag == LDAP_FILTER_LE ) { + opflag = DB_FIRST; + } else { + opflag = DB_SET; + } + + /* only non-range lookups can use the IDL cache */ + if ( bdb->bi_idl_cache_size && opflag == DB_SET ) { + rc = bdb_idl_cache_get( bdb, db, key, ids ); + if ( rc != LDAP_NO_SUCH_OBJECT ) return rc; + } + DBTzero( &data ); -#ifdef BDB_IDL_MULTI - { - ID buf[BDB_IDL_UM_SIZE]; - ID *i, *j; - void *ptr; - size_t len; - data.data = buf; - data.ulen = BDB_IDL_UM_SIZEOF; - data.flags = DB_DBT_USERMEM; - rc = db->get( db, tid, key, &data, bdb->bi_db_opflags | - DB_MULTIPLE ); - if (rc == 0) { + data.data = buf; + data.ulen = sizeof(buf); + data.flags = DB_DBT_USERMEM; + + /* If we're not reusing an existing cursor, get a new one */ + if( opflag != DB_NEXT ) { + rc = db->cursor( db, NULL, &cursor, bdb->bi_db_opflags ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, "=> bdb_idl_fetch_key: " + "cursor failed: %s (%d)\n", db_strerror(rc), rc, 0 ); + return rc; + } + CURSOR_SETLOCKER( cursor, locker ); + } else { + cursor = *saved_cursor; + } + + /* If this is a LE lookup, save original key so we can determine + * when to stop. If this is a GE lookup, save the key since it + * will be overwritten. + */ + if ( get_flag == LDAP_FILTER_LE || get_flag == LDAP_FILTER_GE ) { + DBTzero( &key2 ); + key2.flags = DB_DBT_USERMEM; + key2.ulen = sizeof(keybuf); + key2.data = keybuf; + key2.size = key->size; + AC_MEMCPY( keybuf, key->data, key->size ); + kptr = &key2; + } else { + kptr = key; + } + len = key->size; + rc = cursor->c_get( cursor, kptr, &data, flags | opflag ); + + /* skip presence key on range inequality lookups */ + while (rc == 0 && kptr->size != len) { + rc = cursor->c_get( cursor, kptr, &data, flags | DB_NEXT_NODUP ); + } + /* If we're doing a LE compare and the new key is greater than + * our search key, we're done + */ + if (rc == 0 && get_flag == LDAP_FILTER_LE && memcmp( kptr->data, + key->data, key->size ) > 0 ) { + rc = DB_NOTFOUND; + } + if (rc == 0) { + i = ids; + while (rc == 0) { + u_int8_t *j; + DB_MULTIPLE_INIT( ptr, &data ); - i = ids; while (ptr) { DB_MULTIPLE_NEXT(ptr, &data, j, len); if (j) { ++i; - AC_MEMCPY( i, j, sizeof(ID) ); + BDB_DISK2ID( j, i ); } } - if (ids[1] == 0) { - BDB_IDL_RANGE( ids, ids[2], ids[3] ); - } else { - ids[0] = (i - ids); + rc = cursor->c_get( cursor, key, &data, flags | DB_NEXT_DUP ); + } + if ( rc == DB_NOTFOUND ) rc = 0; + ids[0] = i - ids; + /* On disk, a range is denoted by 0 in the first element */ + if (ids[1] == 0) { + if (ids[0] != BDB_IDL_RANGE_SIZE) { + Debug( LDAP_DEBUG_ANY, "=> bdb_idl_fetch_key: " + "range size mismatch: expected %d, got %ld\n", + BDB_IDL_RANGE_SIZE, ids[0], 0 ); + cursor->c_close( cursor ); + return -1; } - data.size = BDB_IDL_SIZEOF(ids); + BDB_IDL_RANGE( ids, ids[2], ids[3] ); } + data.size = BDB_IDL_SIZEOF(ids); + } + + if ( saved_cursor && rc == 0 ) { + if ( !*saved_cursor ) + *saved_cursor = cursor; + rc2 = 0; + } + else + rc2 = cursor->c_close( cursor ); + if (rc2) { + Debug( LDAP_DEBUG_ANY, "=> bdb_idl_fetch_key: " + "close failed: %s (%d)\n", db_strerror(rc2), rc2, 0 ); + return rc2; } -#else - data.data = ids; - data.ulen = BDB_IDL_UM_SIZEOF; - data.flags = DB_DBT_USERMEM; - /* fetch it */ - rc = db->get( db, tid, key, &data, bdb->bi_db_opflags ); -#endif if( rc == DB_NOTFOUND ) { return rc; @@ -289,9 +683,14 @@ bdb_idl_fetch_key( return -1; } + if ( bdb->bi_idl_cache_max_size ) { + bdb_idl_cache_put( bdb, db, key, ids, rc ); + } + return rc; } + int bdb_idl_insert_key( BackendDB *be, @@ -303,104 +702,199 @@ bdb_idl_insert_key( struct bdb_info *bdb = (struct bdb_info *) be->be_private; int rc; DBT data; -#ifndef BDB_IDL_MULTI - ID ids[BDB_IDL_DB_SIZE]; -#endif + DBC *cursor; + ID lo, hi, nlo, nhi, nid; + char *err; - /* for printable keys only */ - Debug( LDAP_DEBUG_ARGS, - "=> bdb_idl_insert_key: %s %ld\n", - (char *)key->data, (long) id, 0 ); + { + char buf[16]; + Debug( LDAP_DEBUG_ARGS, + "bdb_idl_insert_key: %lx %s\n", + (long) id, bdb_show_key( key, buf ), 0 ); + } assert( id != NOID ); - DBTzero( &data ); -#ifdef BDB_IDL_MULTI - /* FIXME: We really ought to count how many data items currently exist - * for this key, and cap off with a range when we hit the max. We need - * to use a 0 in the first slot to denote a range - since the data are - * sorted in ascending order, the only way to get a flag into the - * first slot is to use the smallest possible ID value. The fetch - * function above can turn it into a "memory-format" range. We also - * have to delete all of the existing data items when converting from - * a list to a range. And of course, if it's already a range, we need - * to read it in and process it instead of just doing the blind put - * that we do right now... - */ - data.data = &id; - data.size = sizeof(id); - data.flags = DB_DBT_USERMEM; + if ( bdb->bi_idl_cache_size ) { + bdb_idl_cache_del( bdb, db, key ); + } - rc = db->put( db, tid, key, &data, DB_NODUPDATA ); -#else - data.data = ids; - data.ulen = sizeof ids; + DBTzero( &data ); + data.size = sizeof( ID ); + data.ulen = data.size; data.flags = DB_DBT_USERMEM; - /* fetch the key for read/modify/write */ - rc = db->get( db, tid, key, &data, DB_RMW | bdb->bi_db_opflags ); - - if( rc == DB_NOTFOUND ) { - ids[0] = 1; - ids[1] = id; - data.size = 2 * sizeof( ID ); + BDB_ID2DISK( id, &nid ); - } else if ( rc != 0 ) { + rc = db->cursor( db, tid, &cursor, bdb->bi_db_opflags ); + if ( rc != 0 ) { Debug( LDAP_DEBUG_ANY, "=> bdb_idl_insert_key: " - "get failed: %s (%d)\n", - db_strerror(rc), rc, 0 ); + "cursor failed: %s (%d)\n", db_strerror(rc), rc, 0 ); return rc; + } + data.data = &nlo; + /* Fetch the first data item for this key, to see if it + * exists and if it's a range. + */ + rc = cursor->c_get( cursor, key, &data, DB_SET ); + err = "c_get"; + if ( rc == 0 ) { + if ( nlo != 0 ) { + /* not a range, count the number of items */ + db_recno_t count; + rc = cursor->c_count( cursor, &count, 0 ); + if ( rc != 0 ) { + err = "c_count"; + goto fail; + } + if ( count >= BDB_IDL_DB_MAX ) { + /* No room, convert to a range */ + DBT key2 = *key; + db_recno_t i; - } else if ( data.size == 0 || data.size % sizeof( ID ) ) { - /* size not multiple of ID size */ - Debug( LDAP_DEBUG_ANY, "=> bdb_idl_insert_key: " - "odd size: expected %ld multiple, got %ld\n", - (long) sizeof( ID ), (long) data.size, 0 ); - return -1; - - } else if ( data.size != BDB_IDL_SIZEOF(ids) ) { - /* size mismatch */ - Debug( LDAP_DEBUG_ANY, "=> bdb_idl_insert_key: " - "get size mismatch: expected %ld, got %ld\n", - (long) ((1 + ids[0]) * sizeof( ID )), (long) data.size, 0 ); - return -1; + key2.dlen = key2.ulen; + key2.flags |= DB_DBT_PARTIAL; - } else if ( BDB_IDL_IS_RANGE(ids) ) { - if( id < ids[1] ) { - ids[1] = id; - } else if ( ids[2] > id ) { - ids[2] = id; - } else { - return 0; - } - - } else { - rc = bdb_idl_insert( ids, id ); + BDB_DISK2ID( &nlo, &lo ); + data.data = &nhi; - if( rc == -1 ) { - Debug( LDAP_DEBUG_TRACE, "=> bdb_idl_insert_key: dup\n", - 0, 0, 0 ); - return 0; + rc = cursor->c_get( cursor, &key2, &data, DB_NEXT_NODUP ); + if ( rc != 0 && rc != DB_NOTFOUND ) { + err = "c_get next_nodup"; + goto fail; + } + if ( rc == DB_NOTFOUND ) { + rc = cursor->c_get( cursor, key, &data, DB_LAST ); + if ( rc != 0 ) { + err = "c_get last"; + goto fail; + } + } else { + rc = cursor->c_get( cursor, key, &data, DB_PREV ); + if ( rc != 0 ) { + err = "c_get prev"; + goto fail; + } + } + BDB_DISK2ID( &nhi, &hi ); + /* Update hi/lo if needed, then delete all the items + * between lo and hi + */ + if ( id < lo ) { + lo = id; + nlo = nid; + } else if ( id > hi ) { + hi = id; + nhi = nid; + } + data.data = &nid; + /* Don't fetch anything, just position cursor */ + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + data.dlen = data.ulen = 0; + rc = cursor->c_get( cursor, key, &data, DB_SET ); + if ( rc != 0 ) { + err = "c_get 2"; + goto fail; + } + rc = cursor->c_del( cursor, 0 ); + if ( rc != 0 ) { + err = "c_del range1"; + goto fail; + } + /* Delete all the records */ + for ( i=1; ic_get( cursor, &key2, &data, DB_NEXT_DUP ); + if ( rc != 0 ) { + err = "c_get next_dup"; + goto fail; + } + rc = cursor->c_del( cursor, 0 ); + if ( rc != 0 ) { + err = "c_del range"; + goto fail; + } + } + /* Store the range marker */ + data.size = data.ulen = sizeof(ID); + data.flags = DB_DBT_USERMEM; + nid = 0; + rc = cursor->c_put( cursor, key, &data, DB_KEYFIRST ); + if ( rc != 0 ) { + err = "c_put range"; + goto fail; + } + nid = nlo; + rc = cursor->c_put( cursor, key, &data, DB_KEYLAST ); + if ( rc != 0 ) { + err = "c_put lo"; + goto fail; + } + nid = nhi; + rc = cursor->c_put( cursor, key, &data, DB_KEYLAST ); + if ( rc != 0 ) { + err = "c_put hi"; + goto fail; + } + } else { + /* There's room, just store it */ + goto put1; + } + } else { + /* It's a range, see if we need to rewrite + * the boundaries + */ + hi = id; + data.data = &nlo; + rc = cursor->c_get( cursor, key, &data, DB_NEXT_DUP ); + if ( rc != 0 ) { + err = "c_get lo"; + goto fail; + } + BDB_DISK2ID( &nlo, &lo ); + if ( id > lo ) { + data.data = &nhi; + rc = cursor->c_get( cursor, key, &data, DB_NEXT_DUP ); + if ( rc != 0 ) { + err = "c_get hi"; + goto fail; + } + BDB_DISK2ID( &nhi, &hi ); + } + if ( id < lo || id > hi ) { + /* Delete the current lo/hi */ + rc = cursor->c_del( cursor, 0 ); + if ( rc != 0 ) { + err = "c_del"; + goto fail; + } + data.data = &nid; + rc = cursor->c_put( cursor, key, &data, DB_KEYFIRST ); + if ( rc != 0 ) { + err = "c_put lo/hi"; + goto fail; + } + } } - if( rc != 0 ) { - Debug( LDAP_DEBUG_ANY, "=> bdb_idl_insert_key: " - "bdb_idl_insert failed (%d)\n", - rc, 0, 0 ); - - return rc; + } else if ( rc == DB_NOTFOUND ) { +put1: data.data = &nid; + rc = cursor->c_put( cursor, key, &data, DB_NODUPDATA ); + /* Don't worry if it's already there */ + if ( rc != 0 && rc != DB_KEYEXIST ) { + err = "c_put id"; + goto fail; } - - data.size = BDB_IDL_SIZEOF( ids ); + } else { + /* initial c_get failed, nothing was done */ +fail: + Debug( LDAP_DEBUG_ANY, "=> bdb_idl_insert_key: " + "%s failed: %s (%d)\n", err, db_strerror(rc), rc ); + cursor->c_close( cursor ); + return rc; } - - /* store the key */ - rc = db->put( db, tid, key, &data, 0 ); -#endif - if( rc == DB_KEYEXIST ) rc = 0; - + rc = cursor->c_close( cursor ); if( rc != 0 ) { Debug( LDAP_DEBUG_ANY, "=> bdb_idl_insert_key: " - "put failed: %s (%d)\n", + "c_close failed: %s (%d)\n", db_strerror(rc), rc, 0 ); } return rc; @@ -417,97 +911,128 @@ bdb_idl_delete_key( struct bdb_info *bdb = (struct bdb_info *) be->be_private; int rc; DBT data; -#ifndef BDB_IDL_MULTI - ID ids[BDB_IDL_DB_SIZE]; -#endif - - /* for printable keys only */ - Debug( LDAP_DEBUG_ARGS, - "=> bdb_idl_delete_key: %s %ld\n", - (char *)key->data, (long) id, 0 ); + DBC *cursor; + ID lo, hi, tmp, nid, nlo, nhi; + char *err; + { + char buf[16]; + Debug( LDAP_DEBUG_ARGS, + "bdb_idl_delete_key: %lx %s\n", + (long) id, bdb_show_key( key, buf ), 0 ); + } assert( id != NOID ); - DBTzero( &data ); -#ifdef BDB_IDL_MULTI - { - DBC *cursor; + if ( bdb->bi_idl_cache_max_size ) { + bdb_idl_cache_del( bdb, db, key ); + } - data.data = &id; - data.size = sizeof( id ); - data.ulen = data.size; - data.flags = DB_DBT_USERMEM; + BDB_ID2DISK( id, &nid ); - rc = db->cursor( db, tid, &cursor, bdb->bi_db_opflags ); - rc = cursor->c_get( cursor, key, &data, bdb->bi_db_opflags | - DB_GET_BOTH | DB_RMW ); - if (rc == 0) - rc = cursor->c_del( cursor, 0 ); - rc = cursor->c_close( cursor ); - } -#else - data.data = ids; - data.ulen = sizeof( ids ); + DBTzero( &data ); + data.data = &tmp; + data.size = sizeof( id ); + data.ulen = data.size; data.flags = DB_DBT_USERMEM; - /* fetch the key for read/modify/write */ - rc = db->get( db, tid, key, &data, DB_RMW | bdb->bi_db_opflags ); - + rc = db->cursor( db, tid, &cursor, bdb->bi_db_opflags ); if ( rc != 0 ) { Debug( LDAP_DEBUG_ANY, "=> bdb_idl_delete_key: " - "get failed: %s (%d)\n", - db_strerror(rc), rc, 0 ); + "cursor failed: %s (%d)\n", db_strerror(rc), rc, 0 ); return rc; - - } else if ( data.size == 0 || data.size % sizeof( ID ) ) { - /* size not multiple of ID size */ - Debug( LDAP_DEBUG_ANY, "=> bdb_idl_delete_key: " - "odd size: expected %ld multiple, got %ld\n", - (long) sizeof( ID ), (long) data.size, 0 ); - return -1; - - } else if ( BDB_IDL_IS_RANGE(ids) ) { - return 0; - - } else if ( data.size != (1 + ids[0]) * sizeof( ID ) ) { - /* size mismatch */ - Debug( LDAP_DEBUG_ANY, "=> bdb_idl_delete_key: " - "get size mismatch: expected %ld, got %ld\n", - (long) ((1 + ids[0]) * sizeof( ID )), (long) data.size, 0 ); - return -1; - - } else { - rc = idl_delete( ids, id ); - - if( rc != 0 ) { - Debug( LDAP_DEBUG_ANY, "=> bdb_idl_delete_key: " - "idl_delete failed (%d)\n", - rc, 0, 0 ); - return rc; - } - - if( ids[0] == 0 ) { - /* delete the key */ - rc = db->del( db, tid, key, 0 ); - if( rc != 0 ) { - Debug( LDAP_DEBUG_ANY, "=> bdb_idl_delete_key: " - "delete failed: %s (%d)\n", - db_strerror(rc), rc, 0 ); + } + /* Fetch the first data item for this key, to see if it + * exists and if it's a range. + */ + rc = cursor->c_get( cursor, key, &data, DB_SET ); + err = "c_get"; + if ( rc == 0 ) { + if ( tmp != 0 ) { + /* Not a range, just delete it */ + if (tmp != nid) { + /* position to correct item */ + tmp = nid; + rc = cursor->c_get( cursor, key, &data, DB_GET_BOTH ); + if ( rc != 0 ) { + err = "c_get id"; + goto fail; + } + } + rc = cursor->c_del( cursor, 0 ); + if ( rc != 0 ) { + err = "c_del id"; + goto fail; + } + } else { + /* It's a range, see if we need to rewrite + * the boundaries + */ + data.data = &nlo; + rc = cursor->c_get( cursor, key, &data, DB_NEXT_DUP ); + if ( rc != 0 ) { + err = "c_get lo"; + goto fail; + } + BDB_DISK2ID( &nlo, &lo ); + data.data = &nhi; + rc = cursor->c_get( cursor, key, &data, DB_NEXT_DUP ); + if ( rc != 0 ) { + err = "c_get hi"; + goto fail; + } + BDB_DISK2ID( &nhi, &hi ); + if ( id == lo || id == hi ) { + if ( id == lo ) { + id++; + lo = id; + } else if ( id == hi ) { + id--; + hi = id; + } + if ( lo >= hi ) { + /* The range has collapsed... */ + rc = db->del( db, tid, key, 0 ); + if ( rc != 0 ) { + err = "del"; + goto fail; + } + } else { + if ( id == lo ) { + /* reposition on lo slot */ + data.data = &nlo; + cursor->c_get( cursor, key, &data, DB_PREV ); + } + rc = cursor->c_del( cursor, 0 ); + if ( rc != 0 ) { + err = "c_del"; + goto fail; + } + } + if ( lo <= hi ) { + BDB_ID2DISK( id, &nid ); + data.data = &nid; + rc = cursor->c_put( cursor, key, &data, DB_KEYFIRST ); + if ( rc != 0 ) { + err = "c_put lo/hi"; + goto fail; + } + } } - return rc; } - - data.size = (ids[0]+1) * sizeof( ID ); + } else { + /* initial c_get failed, nothing was done */ +fail: + if ( rc != DB_NOTFOUND ) { + Debug( LDAP_DEBUG_ANY, "=> bdb_idl_delete_key: " + "%s failed: %s (%d)\n", err, db_strerror(rc), rc ); + } + cursor->c_close( cursor ); + return rc; } - - /* store the key */ - rc = db->put( db, tid, key, &data, 0 ); - -#endif /* BDB_IDL_MULTI */ - + rc = cursor->c_close( cursor ); if( rc != 0 ) { Debug( LDAP_DEBUG_ANY, - "=> bdb_idl_delete_key: put failed: %s (%d)\n", + "=> bdb_idl_delete_key: c_close failed: %s (%d)\n", db_strerror(rc), rc, 0 ); } @@ -544,21 +1069,28 @@ bdb_idl_intersection( return 0; } - if ( BDB_IDL_IS_RANGE( a ) && BDB_IDL_IS_RANGE(b) ) { - a[1] = idmin; - a[2] = idmax; - return 0; - } - if ( BDB_IDL_IS_RANGE( a ) ) { - ID *tmp = a; - a = b; - b = tmp; - swap = 1; + if ( BDB_IDL_IS_RANGE(b) ) { + /* If both are ranges, just shrink the boundaries */ + a[1] = idmin; + a[2] = idmax; + return 0; + } else { + /* Else swap so that b is the range, a is a list */ + ID *tmp = a; + a = b; + b = tmp; + swap = 1; + } } - if ( BDB_IDL_IS_RANGE( b ) && BDB_IDL_FIRST( b ) <= idmin && - BDB_IDL_LAST( b ) >= idmax) { + /* If a range completely covers the list, the result is + * just the list. If idmin to idmax is contiguous, just + * turn it into a range. + */ + if ( BDB_IDL_IS_RANGE( b ) + && BDB_IDL_FIRST( b ) <= BDB_IDL_FIRST( a ) + && BDB_IDL_LAST( b ) >= BDB_IDL_LAST( a ) ) { if (idmax - idmin + 1 == a[0]) { a[0] = NOID; @@ -568,15 +1100,14 @@ bdb_idl_intersection( goto done; } + /* Fine, do the intersection one element at a time. + * First advance to idmin in both IDLs. + */ + cursora = cursorb = idmin; ida = bdb_idl_first( a, &cursora ); idb = bdb_idl_first( b, &cursorb ); cursorc = 0; - while( ida < idmin ) - ida = bdb_idl_next( a, &cursora ); - while( idb < idmin ) - idb = bdb_idl_next( b, &cursorb ); - while( ida <= idmax || idb <= idmax ) { if( ida == idb ) { a[++cursorc] = ida; @@ -618,8 +1149,11 @@ bdb_idl_union( } if ( BDB_IDL_IS_RANGE( a ) || BDB_IDL_IS_RANGE(b) ) { -over: a[1] = IDL_MIN( BDB_IDL_FIRST(a), BDB_IDL_FIRST(b) ); - a[2] = IDL_MAX( BDB_IDL_LAST(a), BDB_IDL_LAST(b) ); +over: ida = IDL_MIN( BDB_IDL_FIRST(a), BDB_IDL_FIRST(b) ); + idb = IDL_MAX( BDB_IDL_LAST(a), BDB_IDL_LAST(b) ); + a[0] = NOID; + a[1] = ida; + a[2] = idb; return 0; } @@ -632,7 +1166,6 @@ over: a[1] = IDL_MIN( BDB_IDL_FIRST(a), BDB_IDL_FIRST(b) ); while( ida != NOID || idb != NOID ) { if ( ida < idb ) { if( ++cursorc > BDB_IDL_UM_MAX ) { - a[0] = NOID; goto over; } b[cursorc] = ida; @@ -655,7 +1188,7 @@ over: a[1] = IDL_MIN( BDB_IDL_FIRST(a), BDB_IDL_FIRST(b) ); idb = NOID; else idb = b[cursorc]; - if (b[cursorb] < idb) + if (cursorb <= b[0] && b[cursorb] < idb) a[cursora++] = b[cursorb++]; else { a[cursora++] = idb; @@ -765,3 +1298,276 @@ ID bdb_idl_next( ID *ids, ID *cursor ) return NOID; } + +#ifdef BDB_HIER + +/* Add one ID to an unsorted list. We ensure that the first element is the + * minimum and the last element is the maximum, for fast range compaction. + * this means IDLs up to length 3 are always sorted... + */ +int bdb_idl_append_one( ID *ids, ID id ) +{ + if (BDB_IDL_IS_RANGE( ids )) { + /* if already in range, treat as a dup */ + if (id >= BDB_IDL_FIRST(ids) && id <= BDB_IDL_LAST(ids)) + return -1; + if (id < BDB_IDL_FIRST(ids)) + ids[1] = id; + else if (id > BDB_IDL_LAST(ids)) + ids[2] = id; + return 0; + } + if ( ids[0] ) { + ID tmp; + + if (id < ids[1]) { + tmp = ids[1]; + ids[1] = id; + id = tmp; + } + if ( ids[0] > 1 && id < ids[ids[0]] ) { + tmp = ids[ids[0]]; + ids[ids[0]] = id; + id = tmp; + } + } + ids[0]++; + if ( ids[0] >= BDB_IDL_UM_MAX ) { + ids[0] = NOID; + ids[2] = id; + } else { + ids[ids[0]] = id; + } + return 0; +} + +/* Append sorted list b to sorted list a. The result is unsorted but + * a[1] is the min of the result and a[a[0]] is the max. + */ +int bdb_idl_append( ID *a, ID *b ) +{ + ID ida, idb, tmp, swap = 0; + + if ( BDB_IDL_IS_ZERO( b ) ) { + return 0; + } + + if ( BDB_IDL_IS_ZERO( a ) ) { + BDB_IDL_CPY( a, b ); + return 0; + } + + ida = BDB_IDL_LAST( a ); + idb = BDB_IDL_LAST( b ); + if ( BDB_IDL_IS_RANGE( a ) || BDB_IDL_IS_RANGE(b) || + a[0] + b[0] >= BDB_IDL_UM_MAX ) { + a[2] = IDL_MAX( ida, idb ); + a[1] = IDL_MIN( a[1], b[1] ); + a[0] = NOID; + return 0; + } + + if ( b[0] > 1 && ida > idb ) { + swap = idb; + a[a[0]] = idb; + b[b[0]] = ida; + } + + if ( b[1] < a[1] ) { + tmp = a[1]; + a[1] = b[1]; + } else { + tmp = b[1]; + } + a[0]++; + a[a[0]] = tmp; + + if ( b[0] > 1 ) { + int i = b[0] - 1; + AC_MEMCPY(a+a[0]+1, b+2, i * sizeof(ID)); + a[0] += i; + } + if ( swap ) { + b[b[0]] = swap; + } + return 0; +} + +#if 1 + +/* Quicksort + Insertion sort for small arrays */ + +#define SMALL 8 +#define SWAP(a,b) itmp=(a);(a)=(b);(b)=itmp + +void +bdb_idl_sort( ID *ids, ID *tmp ) +{ + int *istack = (int *)tmp; + int i,j,k,l,ir,jstack; + ID a, itmp; + + if ( BDB_IDL_IS_RANGE( ids )) + return; + + ir = ids[0]; + l = 1; + jstack = 0; + for(;;) { + if (ir - l < SMALL) { /* Insertion sort */ + for (j=l+1;j<=ir;j++) { + a = ids[j]; + for (i=j-1;i>=1;i--) { + if (ids[i] <= a) break; + ids[i+1] = ids[i]; + } + ids[i+1] = a; + } + if (jstack == 0) break; + ir = istack[jstack--]; + l = istack[jstack--]; + } else { + k = (l + ir) >> 1; /* Choose median of left, center, right */ + SWAP(ids[k], ids[l+1]); + if (ids[l] > ids[ir]) { + SWAP(ids[l], ids[ir]); + } + if (ids[l+1] > ids[ir]) { + SWAP(ids[l+1], ids[ir]); + } + if (ids[l] > ids[l+1]) { + SWAP(ids[l], ids[l+1]); + } + i = l+1; + j = ir; + a = ids[l+1]; + for(;;) { + do i++; while(ids[i] < a); + do j--; while(ids[j] > a); + if (j < i) break; + SWAP(ids[i],ids[j]); + } + ids[l+1] = ids[j]; + ids[j] = a; + jstack += 2; + if (ir-i+1 >= j-1) { + istack[jstack] = ir; + istack[jstack-1] = i; + ir = j-1; + } else { + istack[jstack] = j-1; + istack[jstack-1] = l; + l = i; + } + } + } +} + +#else + +/* 8 bit Radix sort + insertion sort + * + * based on code from http://www.cubic.org/docs/radix.htm + * with improvements by mbackes@symas.com and hyc@symas.com + * + * This code is O(n) but has a relatively high constant factor. For lists + * up to ~50 Quicksort is slightly faster; up to ~100 they are even. + * Much faster than quicksort for lists longer than ~100. Insertion + * sort is actually superior for lists <50. + */ + +#define BUCKETS (1<<8) +#define SMALL 50 + +void +bdb_idl_sort( ID *ids, ID *tmp ) +{ + int count, soft_limit, phase = 0, size = ids[0]; + ID *idls[2]; + unsigned char *maxv = (unsigned char *)&ids[size]; + + if ( BDB_IDL_IS_RANGE( ids )) + return; + + /* Use insertion sort for small lists */ + if ( size <= SMALL ) { + int i,j; + ID a; + + for (j=1;j<=size;j++) { + a = ids[j]; + for (i=j-1;i>=1;i--) { + if (ids[i] <= a) break; + ids[i+1] = ids[i]; + } + ids[i+1] = a; + } + return; + } + + tmp[0] = size; + idls[0] = ids; + idls[1] = tmp; + +#if BYTE_ORDER == BIG_ENDIAN + for (soft_limit = 0; !maxv[soft_limit]; soft_limit++); +#else + for (soft_limit = sizeof(ID)-1; !maxv[soft_limit]; soft_limit--); +#endif + + for ( +#if BYTE_ORDER == BIG_ENDIAN + count = sizeof(ID)-1; count >= soft_limit; --count +#else + count = 0; count <= soft_limit; ++count +#endif + ) { + unsigned int num[BUCKETS], * np, n, sum; + int i; + ID *sp, *source, *dest; + unsigned char *bp, *source_start; + + source = idls[phase]+1; + dest = idls[phase^1]+1; + source_start = ((unsigned char *) source) + count; + + np = num; + for ( i = BUCKETS; i > 0; --i ) *np++ = 0; + + /* count occurences of every byte value */ + bp = source_start; + for ( i = size; i > 0; --i, bp += sizeof(ID) ) + num[*bp]++; + + /* transform count into index by summing elements and storing + * into same array + */ + sum = 0; + np = num; + for ( i = BUCKETS; i > 0; --i ) { + n = *np; + *np++ = sum; + sum += n; + } + + /* fill dest with the right values in the right place */ + bp = source_start; + sp = source; + for ( i = size; i > 0; --i, bp += sizeof(ID) ) { + np = num + *bp; + dest[*np] = *sp++; + ++(*np); + } + phase ^= 1; + } + + /* copy back from temp if needed */ + if ( phase ) { + ids++; tmp++; + for ( count = 0; count < size; ++count ) + *ids++ = *tmp++; + } +} +#endif /* Quick vs Radix */ + +#endif /* BDB_HIER */