X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=servers%2Fslapd%2Fback-bdb%2Fdn2id.c;h=eb7fbdb3fa52b8843392884c0f944102c3739b39;hb=19deaedb59bfda97526836af89d56d5d8b1d8c14;hp=17b0650dfcf29a288bfc6dd0349dfbe8646b2ccb;hpb=26e327f32e0dc8a2ddc2e4df08cdb6335b33de92;p=openldap diff --git a/servers/slapd/back-bdb/dn2id.c b/servers/slapd/back-bdb/dn2id.c index 17b0650dfc..eb7fbdb3fa 100644 --- a/servers/slapd/back-bdb/dn2id.c +++ b/servers/slapd/back-bdb/dn2id.c @@ -1,8 +1,17 @@ /* dn2id.c - routines to deal with the dn2id index */ /* $OpenLDAP$ */ -/* - * Copyright 1998-2002 The OpenLDAP Foundation, All Rights Reserved. - * COPYING RESTRICTIONS APPLY, see COPYRIGHT file +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2007 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . */ #include "portable.h" @@ -12,326 +21,339 @@ #include "back-bdb.h" #include "idl.h" +#include "lutil.h" + +#define bdb_dn2id_lock BDB_SYMBOL(dn2id_lock) + +static int +bdb_dn2id_lock( struct bdb_info *bdb, struct berval *dn, + int rw, u_int32_t locker, DB_LOCK *lock ) +{ + int rc; + DBT lockobj; + int db_rw; + + if (rw) + db_rw = DB_LOCK_WRITE; + else + db_rw = DB_LOCK_READ; + + lockobj.data = dn->bv_val; + lockobj.size = dn->bv_len; + + rc = LOCK_GET(bdb->bi_dbenv, locker, DB_LOCK_NOWAIT, + &lockobj, db_rw, lock); + return rc; +} #ifndef BDB_HIER int bdb_dn2id_add( - BackendDB *be, + Operation *op, DB_TXN *txn, - struct berval *pbv, + EntryInfo *eip, Entry *e ) { - struct bdb_info *bdb = (struct bdb_info *) be->be_private; + struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; DB *db = bdb->bi_dn2id->bdi_db; int rc; DBT key, data; - char *buf, *ptr, *pdn; + ID nid; + char *buf; + struct berval ptr, pdn; - Debug( LDAP_DEBUG_TRACE, "=> bdb_dn2id_add( \"%s\", 0x%08lx )\n", - e->e_ndn, (long) e->e_id, 0 ); + Debug( LDAP_DEBUG_TRACE, "=> bdb_dn2id_add 0x%lx: \"%s\"\n", + e->e_id, e->e_ndn, 0 ); assert( e->e_id != NOID ); DBTzero( &key ); key.size = e->e_nname.bv_len + 2; - buf = ch_malloc( key.size ); + key.ulen = key.size; + key.flags = DB_DBT_USERMEM; + buf = op->o_tmpalloc( key.size, op->o_tmpmemctx ); key.data = buf; buf[0] = DN_BASE_PREFIX; - ptr = buf + 1; - AC_MEMCPY( ptr, e->e_ndn, key.size - 1 ); + ptr.bv_val = buf + 1; + ptr.bv_len = e->e_nname.bv_len; + AC_MEMCPY( ptr.bv_val, e->e_nname.bv_val, e->e_nname.bv_len ); + ptr.bv_val[ptr.bv_len] = '\0'; DBTzero( &data ); - data.data = (char *) &e->e_id; - data.size = sizeof( e->e_id ); + data.data = &nid; + data.size = sizeof( nid ); + BDB_ID2DISK( e->e_id, &nid ); /* store it -- don't override */ rc = db->put( db, txn, &key, &data, DB_NOOVERWRITE ); if( rc != 0 ) { - Debug( LDAP_DEBUG_ANY, "=> bdb_dn2id_add: put failed: %s %d\n", - db_strerror(rc), rc, 0 ); + Debug( LDAP_DEBUG_ANY, "=> bdb_dn2id_add 0x%lx: put failed: %s %d\n", + e->e_id, db_strerror(rc), rc ); goto done; } - if( !be_issuffix( be, ptr )) { +#ifndef BDB_MULTIPLE_SUFFIXES + if( !be_issuffix( op->o_bd, &ptr )) +#endif + { buf[0] = DN_SUBTREE_PREFIX; - rc = bdb_idl_insert_key( be, db, txn, &key, e->e_id ); + rc = db->put( db, txn, &key, &data, DB_NOOVERWRITE ); if( rc != 0 ) { Debug( LDAP_DEBUG_ANY, - "=> bdb_dn2id_add: subtree (%s) insert failed: %d\n", - ptr, rc, 0 ); + "=> bdb_dn2id_add 0x%lx: subtree (%s) put failed: %d\n", + e->e_id, ptr.bv_val, rc ); goto done; } - } - - pdn = dn_parent( be, ptr ); - - if( pdn != NULL ) { - key.size -= pdn - ptr; - pdn[-1] = DN_ONE_PREFIX; - key.data = pdn - 1; + +#ifdef BDB_MULTIPLE_SUFFIXES + if( !be_issuffix( op->o_bd, &ptr )) +#endif + { + dnParent( &ptr, &pdn ); + + key.size = pdn.bv_len + 2; + key.ulen = key.size; + pdn.bv_val[-1] = DN_ONE_PREFIX; + key.data = pdn.bv_val-1; + ptr = pdn; - rc = bdb_idl_insert_key( be, db, txn, &key, e->e_id ); + rc = bdb_idl_insert_key( op->o_bd, db, txn, &key, e->e_id ); if( rc != 0 ) { Debug( LDAP_DEBUG_ANY, - "=> bdb_dn2id_add: parent (%s) insert failed: %d\n", - pdn, rc, 0 ); + "=> bdb_dn2id_add 0x%lx: parent (%s) insert failed: %d\n", + e->e_id, ptr.bv_val, rc ); goto done; } } - while( pdn != NULL ) { - if ( be_issuffix( be, pdn )) - break; - pdn[-1] = DN_SUBTREE_PREFIX; +#ifndef BDB_MULTIPLE_SUFFIXES + while( !be_issuffix( op->o_bd, &ptr )) +#else + for (;;) +#endif + { + ptr.bv_val[-1] = DN_SUBTREE_PREFIX; - rc = bdb_idl_insert_key( be, db, txn, &key, e->e_id ); + rc = bdb_idl_insert_key( op->o_bd, db, txn, &key, e->e_id ); if( rc != 0 ) { Debug( LDAP_DEBUG_ANY, - "=> bdb_dn2id_add: subtree (%s) insert failed: %d\n", - pdn, rc, 0 ); + "=> bdb_dn2id_add 0x%lx: subtree (%s) insert failed: %d\n", + e->e_id, ptr.bv_val, rc ); break; } +#ifdef BDB_MULTIPLE_SUFFIXES + if( be_issuffix( op->o_bd, &ptr )) break; +#endif + dnParent( &ptr, &pdn ); + + key.size = pdn.bv_len + 2; + key.ulen = key.size; + key.data = pdn.bv_val - 1; ptr = pdn; - pdn = dn_parent( be, pdn ); - key.size -= pdn - ptr; - key.data = pdn - 1; + } } done: - ch_free( buf ); - Debug( LDAP_DEBUG_TRACE, "<= bdb_dn2id_add: %d\n", rc, 0, 0 ); + op->o_tmpfree( buf, op->o_tmpmemctx ); + Debug( LDAP_DEBUG_TRACE, "<= bdb_dn2id_add 0x%lx: %d\n", e->e_id, rc, 0 ); return rc; } int bdb_dn2id_delete( - BackendDB *be, + Operation *op, DB_TXN *txn, - char *pdn, + EntryInfo *eip, Entry *e ) { - struct bdb_info *bdb = (struct bdb_info *) be->be_private; + struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; DB *db = bdb->bi_dn2id->bdi_db; - int rc; + char *buf; DBT key; - char *buf, *ptr; + DB_LOCK lock; + struct berval pdn, ptr; + int rc; - Debug( LDAP_DEBUG_TRACE, "=> bdb_dn2id_delete( \"%s\", 0x%08lx )\n", - e->e_ndn, e->e_id, 0 ); + Debug( LDAP_DEBUG_TRACE, "=> bdb_dn2id_delete 0x%lx: \"%s\"\n", + e->e_id, e->e_ndn, 0 ); DBTzero( &key ); key.size = e->e_nname.bv_len + 2; - buf = ch_malloc( key.size ); + buf = op->o_tmpalloc( key.size, op->o_tmpmemctx ); key.data = buf; key.flags = DB_DBT_USERMEM; buf[0] = DN_BASE_PREFIX; - ptr = buf+1; - AC_MEMCPY( ptr, e->e_ndn, key.size - 1 ); + ptr.bv_val = buf+1; + ptr.bv_len = e->e_nname.bv_len; + AC_MEMCPY( ptr.bv_val, e->e_nname.bv_val, e->e_nname.bv_len ); + ptr.bv_val[ptr.bv_len] = '\0'; + + /* We hold this lock until the TXN completes */ + rc = bdb_dn2id_lock( bdb, &e->e_nname, 1, TXN_ID( txn ), &lock ); + if ( rc ) goto done; /* delete it */ rc = db->del( db, txn, &key, 0 ); if( rc != 0 ) { - Debug( LDAP_DEBUG_ANY, "=> bdb_dn2id_delete: delete failed: %s %d\n", - db_strerror(rc), rc, 0 ); + Debug( LDAP_DEBUG_ANY, "=> bdb_dn2id_delete 0x%lx: delete failed: %s %d\n", + e->e_id, db_strerror(rc), rc ); goto done; } - if( !be_issuffix( be, ptr )) { +#ifndef BDB_MULTIPLE_SUFFIXES + if( !be_issuffix( op->o_bd, &ptr )) +#endif + { buf[0] = DN_SUBTREE_PREFIX; - rc = bdb_idl_delete_key( be, db, txn, &key, e->e_id ); + rc = bdb_idl_delete_key( op->o_bd, db, txn, &key, e->e_id ); if( rc != 0 ) { Debug( LDAP_DEBUG_ANY, - "=> bdb_dn2id_delete: subtree (%s) delete failed: %d\n", - ptr, rc, 0 ); + "=> bdb_dn2id_delete 0x%lx: subtree (%s) delete failed: %d\n", + e->e_id, ptr.bv_val, rc ); goto done; } - } - pdn = dn_parent( be, ptr ); +#ifdef BDB_MULTIPLE_SUFFIXES + if( !be_issuffix( op->o_bd, &ptr )) +#endif + { + dnParent( &ptr, &pdn ); - if( pdn != NULL ) { - key.size -= pdn - ptr; - pdn[-1] = DN_ONE_PREFIX; - key.data = pdn - 1; + key.size = pdn.bv_len + 2; + key.ulen = key.size; + pdn.bv_val[-1] = DN_ONE_PREFIX; + key.data = pdn.bv_val - 1; + ptr = pdn; - rc = bdb_idl_delete_key( be, db, txn, &key, e->e_id ); + rc = bdb_idl_delete_key( op->o_bd, db, txn, &key, e->e_id ); if( rc != 0 ) { Debug( LDAP_DEBUG_ANY, - "=> bdb_dn2id_delete: parent (%s) delete failed: %d\n", - pdn, rc, 0 ); + "=> bdb_dn2id_delete 0x%lx: parent (%s) delete failed: %d\n", + e->e_id, ptr.bv_val, rc ); goto done; } } - while( pdn != NULL ) { - if ( be_issuffix( be, pdn )) - break; - - pdn[-1] = DN_SUBTREE_PREFIX; +#ifndef BDB_MULTIPLE_SUFFIXES + while( !be_issuffix( op->o_bd, &ptr )) +#else + for (;;) +#endif + { + ptr.bv_val[-1] = DN_SUBTREE_PREFIX; - rc = bdb_idl_delete_key( be, db, txn, &key, e->e_id ); + rc = bdb_idl_delete_key( op->o_bd, db, txn, &key, e->e_id ); if( rc != 0 ) { Debug( LDAP_DEBUG_ANY, - "=> bdb_dn2id_delete: subtree (%s) delete failed: %d\n", - pdn, rc, 0 ); + "=> bdb_dn2id_delete 0x%lx: subtree (%s) delete failed: %d\n", + e->e_id, ptr.bv_val, rc ); goto done; } +#ifdef BDB_MULTIPLE_SUFFIXES + if( be_issuffix( op->o_bd, &ptr )) break; +#endif + dnParent( &ptr, &pdn ); + + key.size = pdn.bv_len + 2; + key.ulen = key.size; + key.data = pdn.bv_val - 1; ptr = pdn; - pdn = dn_parent( be, pdn ); - key.size -= pdn - ptr; - key.data = pdn - 1; + } } done: - ch_free( buf ); - Debug( LDAP_DEBUG_TRACE, "<= bdb_dn2id_delete %d\n", rc, 0, 0 ); + op->o_tmpfree( buf, op->o_tmpmemctx ); + Debug( LDAP_DEBUG_TRACE, "<= bdb_dn2id_delete 0x%lx: %d\n", e->e_id, rc, 0 ); return rc; } int bdb_dn2id( - BackendDB *be, - DB_TXN *txn, + Operation *op, struct berval *dn, - ID *id ) + EntryInfo *ei, + u_int32_t locker, + DB_LOCK *lock ) { + struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; + DB *db = bdb->bi_dn2id->bdi_db; + DBC *cursor; int rc; DBT key, data; - struct bdb_info *bdb = (struct bdb_info *) be->be_private; - DB *db = bdb->bi_dn2id->bdi_db; + ID nid; - Debug( LDAP_DEBUG_TRACE, "=> bdb_dn2id( \"%s\" )\n", dn->bv_val, 0, 0 ); + Debug( LDAP_DEBUG_TRACE, "=> bdb_dn2id(\"%s\")\n", dn->bv_val, 0, 0 ); DBTzero( &key ); key.size = dn->bv_len + 2; - key.data = ch_malloc( key.size ); + key.data = op->o_tmpalloc( key.size, op->o_tmpmemctx ); ((char *)key.data)[0] = DN_BASE_PREFIX; AC_MEMCPY( &((char *)key.data)[1], dn->bv_val, key.size - 1 ); /* store the ID */ DBTzero( &data ); - data.data = id; + data.data = &nid; data.ulen = sizeof(ID); data.flags = DB_DBT_USERMEM; + rc = db->cursor( db, NULL, &cursor, bdb->bi_db_opflags ); + if ( rc ) goto leave; + + rc = bdb_dn2id_lock( bdb, dn, 0, locker, lock ); + if ( rc ) goto nolock; + + if ( locker ) { + cursor->locker = locker; + } + /* fetch it */ - rc = db->get( db, txn, &key, &data, bdb->bi_db_opflags ); + rc = cursor->c_get( cursor, &key, &data, DB_SET ); + +nolock: + cursor->c_close( cursor ); +leave: if( rc != 0 ) { Debug( LDAP_DEBUG_TRACE, "<= bdb_dn2id: get failed: %s (%d)\n", db_strerror( rc ), rc, 0 ); } else { - Debug( LDAP_DEBUG_TRACE, "<= bdb_dn2id: got id=0x%08lx\n", - *id, 0, 0 ); - } - - ch_free( key.data ); - return rc; -} - -int -bdb_dn2id_matched( - BackendDB *be, - DB_TXN *txn, - struct berval *in, - ID *id, - ID *id2 ) -{ - int rc; - DBT key, data; - struct bdb_info *bdb = (struct bdb_info *) be->be_private; - DB *db = bdb->bi_dn2id->bdi_db; - char *buf, *dn; - - Debug( LDAP_DEBUG_TRACE, "=> bdb_dn2id_matched( \"%s\" )\n", in->bv_val, 0, 0 ); - - DBTzero( &key ); - key.size = in->bv_len + 2; - buf = ch_malloc( key.size ); - key.data = buf; - dn = buf+1; - AC_MEMCPY( dn, in->bv_val, key.size - 1 ); - - /* store the ID */ - DBTzero( &data ); - data.data = id; - data.ulen = sizeof(ID); - data.flags = DB_DBT_USERMEM; - - while(1) { - dn[-1] = DN_BASE_PREFIX; - - *id = NOID; - - /* fetch it */ - rc = db->get( db, txn, &key, &data, bdb->bi_db_opflags ); - - if( rc == DB_NOTFOUND ) { - char *pdn = dn_parent( be, dn ); - - if( pdn == NULL || *pdn == '\0' ) { - Debug( LDAP_DEBUG_TRACE, - "<= bdb_dn2id_matched: no match\n", - 0, 0, 0 ); - break; - } - - key.size -= pdn - dn; - dn = pdn; - key.data = pdn - 1; - - } else if ( rc == 0 ) { - if( data.size != sizeof( ID ) ) { - Debug( LDAP_DEBUG_ANY, - "<= bdb_dn2id_matched: get size mismatch: " - "expected %ld, got %ld\n", - (long) sizeof(ID), (long) data.size, 0 ); - } - - if( dn != buf+1 ) { - *id2 = *id; - } - - Debug( LDAP_DEBUG_TRACE, - "<= bdb_dn2id_matched: id=0x%08lx: %s %s\n", - (long) *id, *id2 == 0 ? "entry" : "matched", dn ); - break; - - } else { - Debug( LDAP_DEBUG_ANY, - "<= bdb_dn2id_matched: get failed: %s (%d)\n", - db_strerror(rc), rc, 0 ); - break; - } + BDB_DISK2ID( &nid, &ei->bei_id ); + Debug( LDAP_DEBUG_TRACE, "<= bdb_dn2id: got id=0x%lx\n", + ei->bei_id, 0, 0 ); } - - ch_free( buf ); + op->o_tmpfree( key.data, op->o_tmpmemctx ); return rc; } int bdb_dn2id_children( - BackendDB *be, + Operation *op, DB_TXN *txn, - struct berval *dn ) + Entry *e ) { - int rc; DBT key, data; - struct bdb_info *bdb = (struct bdb_info *) be->be_private; + struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; DB *db = bdb->bi_dn2id->bdi_db; ID id; + int rc; - Debug( LDAP_DEBUG_TRACE, "=> bdb_dn2id_children( %s )\n", - dn->bv_val, 0, 0 ); - + Debug( LDAP_DEBUG_TRACE, "=> bdb_dn2id_children(\"%s\")\n", + e->e_nname.bv_val, 0, 0 ); DBTzero( &key ); - key.size = dn->bv_len + 2; - key.data = ch_malloc( key.size ); + key.size = e->e_nname.bv_len + 2; + key.data = op->o_tmpalloc( key.size, op->o_tmpmemctx ); ((char *)key.data)[0] = DN_ONE_PREFIX; - AC_MEMCPY( &((char *)key.data)[1], dn->bv_val, key.size - 1 ); + AC_MEMCPY( &((char *)key.data)[1], e->e_nname.bv_val, key.size - 1 ); + if ( bdb->bi_idl_cache_size ) { + rc = bdb_idl_cache_get( bdb, db, &key, NULL ); + if ( rc != LDAP_NO_SUCH_OBJECT ) { + op->o_tmpfree( key.data, op->o_tmpmemctx ); + return rc; + } + } /* we actually could do a empty get... */ DBTzero( &data ); data.data = &id; @@ -341,10 +363,10 @@ bdb_dn2id_children( data.dlen = sizeof(id); rc = db->get( db, txn, &key, &data, bdb->bi_db_opflags ); - free( key.data ); + op->o_tmpfree( key.data, op->o_tmpmemctx ); - Debug( LDAP_DEBUG_TRACE, "<= bdb_dn2id_children( %s ): %schildren (%d)\n", - dn->bv_val, + Debug( LDAP_DEBUG_TRACE, "<= bdb_dn2id_children(\"%s\"): %s (%d)\n", + e->e_nname.bv_val, rc == 0 ? "" : ( rc == DB_NOTFOUND ? "no " : db_strerror(rc) ), rc ); @@ -353,31 +375,38 @@ bdb_dn2id_children( int bdb_dn2idl( - BackendDB *be, - struct berval *dn, - int prefix, - ID *ids ) + Operation *op, + Entry *e, + ID *ids, + ID *stack ) { int rc; DBT key; - struct bdb_info *bdb = (struct bdb_info *) be->be_private; + struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; DB *db = bdb->bi_dn2id->bdi_db; + int prefix = ( op->ors_scope == LDAP_SCOPE_ONELEVEL ) + ? DN_ONE_PREFIX : DN_SUBTREE_PREFIX; - Debug( LDAP_DEBUG_TRACE, "=> bdb_dn2idl( \"%s\" )\n", dn->bv_val, 0, 0 ); + Debug( LDAP_DEBUG_TRACE, "=> bdb_dn2idl(\"%s\")\n", + e->e_nname.bv_val, 0, 0 ); - if (prefix == DN_SUBTREE_PREFIX && be_issuffix(be, dn->bv_val)) - { +#ifndef BDB_MULTIPLE_SUFFIXES + if ( prefix == DN_SUBTREE_PREFIX && BEI(e)->bei_parent->bei_id == 0 ) { BDB_IDL_ALL(bdb, ids); return 0; } +#endif DBTzero( &key ); - key.size = dn->bv_len + 2; - key.data = ch_malloc( key.size ); + key.size = e->e_nname.bv_len + 2; + key.ulen = key.size; + key.flags = DB_DBT_USERMEM; + key.data = op->o_tmpalloc( key.size, op->o_tmpmemctx ); ((char *)key.data)[0] = prefix; - AC_MEMCPY( &((char *)key.data)[1], dn->bv_val, key.size - 1 ); + AC_MEMCPY( &((char *)key.data)[1], e->e_nname.bv_val, key.size - 1 ); - rc = bdb_idl_fetch_key( be, db, NULL, &key, ids ); + BDB_IDL_ZERO( ids ); + rc = bdb_idl_fetch_key( op->o_bd, db, NULL, &key, ids, NULL, 0 ); if( rc != 0 ) { Debug( LDAP_DEBUG_TRACE, @@ -391,498 +420,815 @@ bdb_dn2idl( (long) BDB_IDL_FIRST( ids ), (long) BDB_IDL_LAST( ids ) ); } - ch_free( key.data ); + op->o_tmpfree( key.data, op->o_tmpmemctx ); return rc; } -#else /* BDB_HIER */ -/* Experimental management routines for a hierarchically structured backend. +#else /* BDB_HIER */ +/* Management routines for a hierarchically structured database. * - * Unsupported! Use at your own risk! + * Instead of a ldbm-style dn2id database, we use a hierarchical one. Each + * entry in this database is a struct diskNode, keyed by entryID and with + * the data containing the RDN and entryID of the node's children. We use + * a B-Tree with sorted duplicates to store all the children of a node under + * the same key. Also, the first item under the key contains the entry's own + * rdn and the ID of the node's parent, to allow bottom-up tree traversal as + * well as top-down. To keep this info first in the list, the high bit of all + * subsequent nrdnlen's is always set. This means we can only accomodate + * RDNs up to length 32767, but that's fine since full DNs are already + * restricted to 8192. * - * Instead of a dn2id database, we use an id2parent database. Each entry in - * this database is a struct diskNode, containing the ID of the node's parent - * and the RDN of the node. + * The diskNode is a variable length structure. This definition is not + * directly usable for in-memory manipulation. */ typedef struct diskNode { - ID parent; - struct berval rdn; - struct berval nrdn; + unsigned char nrdnlen[2]; + char nrdn[1]; + char rdn[1]; /* variable placement */ + unsigned char entryID[sizeof(ID)]; /* variable placement */ } diskNode; -/* In bdb_db_open() we call bdb_build_tree() which reads the entire id2parent - * database into memory (into an AVL tree). Next we iterate through each node - * of this tree, connecting each child to its parent. The nodes in this AVL - * tree are a struct idNode. The immediate (Onelevel) children of a node are - * referenced in the i_kids AVL tree. With this arrangement, there is no need - * to maintain the DN_ONE_PREFIX or DN_SUBTREE_PREFIX database keys. Note that - * the DN of an entry is constructed by walking up the list of i_parent - * pointers, so no full DN is stored on disk anywhere. This makes modrdn - * extremely efficient, even when operating on a populated subtree. - * - * The idNode tree is searched directly from the root when performing id to - * entry lookups. The tree is traversed using the i_kids subtrees when - * performing dn to id lookups. - */ -typedef struct idNode { - ID i_id; - struct idNode *i_parent; - diskNode *i_rdn; - Avlnode *i_kids; - ldap_pvt_thread_rdwr_t i_kids_rdwr; -} idNode; - - -/* The main AVL tree is sorted in ID order. The i_kids AVL trees are - * sorted in lexical order. These are the various helper routines used - * for the searches and sorts. +/* Sort function for the sorted duplicate data items of a dn2id key. + * Sorts based on normalized RDN, in length order. */ -static int -node_find_cmp( - ID id, - idNode *n -) -{ - return id - n->i_id; -} - -static int -node_frdn_cmp( - char *nrdn, - idNode *n -) -{ - return strcmp(nrdn, n->i_rdn->nrdn.bv_val); -} - -static int -node_add_cmp( - idNode *a, - idNode *b +int +hdb_dup_compare( + DB *db, + const DBT *usrkey, + const DBT *curkey ) { - return a->i_id - b->i_id; -} + diskNode *un, *cn; + int rc, ul, cl; -static int -node_rdn_cmp( - idNode *a, - idNode *b -) -{ - return strcmp(a->i_rdn->nrdn.bv_val, b->i_rdn->nrdn.bv_val); -} + un = (diskNode *)usrkey->data; + cn = (diskNode *)curkey->data; -idNode * bdb_find_id_node( - ID id, - Avlnode *tree -) -{ - return avl_find(tree, (const void *)id, (AVL_CMP)node_find_cmp); -} + /* data is not aligned, cannot compare directly */ + rc = un->nrdnlen[0] - cn->nrdnlen[0]; + if ( rc ) return rc; + rc = un->nrdnlen[1] - cn->nrdnlen[1]; + if ( rc ) return rc; -idNode * bdb_find_rdn_node( - char *nrdn, - Avlnode *tree -) -{ - return avl_find(tree, (const void *)nrdn, (AVL_CMP)node_frdn_cmp); + return strcmp( un->nrdn, cn->nrdn ); } -/* This function links a node into its parent's i_kids tree. */ -int bdb_insert_kid( - idNode *a, - Avlnode *tree -) +/* This function constructs a full DN for a given entry. + */ +int hdb_fix_dn( + Entry *e, + int checkit ) { - int rc; + EntryInfo *ei; + int rlen = 0, nrlen = 0; + char *ptr, *nptr; + int max = 0; - if (a->i_rdn->parent == 0) + if ( !e->e_id ) return 0; - a->i_parent = bdb_find_id_node(a->i_rdn->parent, tree); - if (!a->i_parent) - return -1; - ldap_pvt_thread_rdwr_wlock(&a->i_parent->i_kids_rdwr); - rc = avl_insert( &a->i_parent->i_kids, (caddr_t) a, - (AVL_CMP)node_rdn_cmp, (AVL_DUP) avl_dup_error ); - ldap_pvt_thread_rdwr_wunlock(&a->i_parent->i_kids_rdwr); - return rc; -} - -/* This function adds a node into the main AVL tree */ -idNode *bdb_add_node( - ID id, - char *d, - struct bdb_info *bdb -) -{ - idNode *node; - - node = (idNode *)ch_malloc(sizeof(idNode)); - node->i_id = id; - node->i_parent = NULL; - node->i_kids = NULL; - node->i_rdn = (diskNode *)d; - node->i_rdn->rdn.bv_val += (long)d; - node->i_rdn->nrdn.bv_val += (long)d; - ldap_pvt_thread_rdwr_init(&node->i_kids_rdwr); - avl_insert( &bdb->bi_tree, (caddr_t) node, - (AVL_CMP)node_add_cmp, (AVL_DUP) avl_dup_error ); - if (id == 1) - bdb->bi_troot = node; - return node; -} - -/* This function initializes the trees at startup time. */ -int bdb_build_tree( - Backend *be -) -{ - struct bdb_info *bdb = (struct bdb_info *) be->be_private; - int i, rc; - DBC *cursor; - DBT key, data; - ID id; - idNode *node; - char **rdns; - - bdb->bi_tree = NULL; - rc = bdb->bi_id2parent->bdi_db->cursor( - bdb->bi_id2parent->bdi_db, NULL, &cursor, - bdb->bi_db_opflags ); - if( rc != 0 ) { - return NOID; + /* count length of all DN components */ + for ( ei = BEI(e); ei && ei->bei_id; ei=ei->bei_parent ) { + rlen += ei->bei_rdn.bv_len + 1; + nrlen += ei->bei_nrdn.bv_len + 1; + if (ei->bei_modrdns > max) max = ei->bei_modrdns; } - /* When be_suffix is turned into struct berval or LDAPDN - * life will get a lot easier... Since no DNs live on disk, we - * need to operate on the be_suffix to fully qualify our DNs. - * We need to know how many components are in the suffix DN, - * so we can tell where the suffix ends and our nodes begin. - * - * Note that this code always uses be_suffix[0], so defining - * multiple suffixes for a single backend won't work! - */ - rdns = ldap_explode_dn(be->be_nsuffix[0]->bv_val, 0); - for (i=0; rdns[i]; i++); - bdb->bi_nrdns = i; - charray_free(rdns); - - DBTzero( &key ); - DBTzero( &data ); - key.data = (char *)&id; - key.ulen = sizeof( id ); - key.flags = DB_DBT_USERMEM; - data.flags = DB_DBT_MALLOC; - - while (cursor->c_get( cursor, &key, &data, DB_NEXT ) == 0) { - bdb_add_node( id, data.data, bdb ); + /* See if the entry DN was invalidated by a subtree rename */ + if ( checkit ) { + if ( BEI(e)->bei_modrdns >= max ) { + return 0; + } + /* We found a mismatch, tell the caller to lock it */ + if ( checkit == 1 ) { + return 1; + } + /* checkit == 2. do the fix. */ + free( e->e_name.bv_val ); + free( e->e_nname.bv_val ); } - cursor->c_close( cursor ); - rc = avl_apply(bdb->bi_tree, (AVL_APPLY)bdb_insert_kid, bdb->bi_tree, - -1, AVL_INORDER ); - - return rc; -} - -/* This function constructs a full DN for a given id. We really should - * be passing idNodes directly, to save some effort... - */ -int bdb_fix_dn( - BackendDB *be, - ID id, - Entry *e -) -{ - struct bdb_info *bdb = (struct bdb_info *) be->be_private; - idNode *n, *o; - int rlen, nrlen; - char *ptr, *nptr; - - ldap_pvt_thread_rdwr_rlock(&bdb->bi_tree_rdwr); - o = bdb_find_id_node(id, bdb->bi_tree); - rlen = be->be_suffix[0]->bv_len + 1; - nrlen = be->be_nsuffix[0]->bv_len + 1; - for (n = o; n && n->i_parent; n=n->i_parent) { - rlen += n->i_rdn->rdn.bv_len + 1; - nrlen += n->i_rdn->nrdn.bv_len + 1; - } e->e_name.bv_len = rlen - 1; e->e_nname.bv_len = nrlen - 1; - e->e_name.bv_val = ch_malloc(rlen + nrlen); - e->e_nname.bv_val = e->e_name.bv_val + rlen; + e->e_name.bv_val = ch_malloc(rlen); + e->e_nname.bv_val = ch_malloc(nrlen); ptr = e->e_name.bv_val; nptr = e->e_nname.bv_val; - for (n = o; n && n->i_parent; n=n->i_parent) { - ptr = slap_strcopy(ptr, n->i_rdn->rdn.bv_val); - *ptr++ = ','; - nptr = slap_strcopy(nptr, n->i_rdn->nrdn.bv_val); - *nptr++ = ','; + for ( ei = BEI(e); ei && ei->bei_id; ei=ei->bei_parent ) { + ptr = lutil_strcopy(ptr, ei->bei_rdn.bv_val); + nptr = lutil_strcopy(nptr, ei->bei_nrdn.bv_val); + if ( ei->bei_parent ) { + *ptr++ = ','; + *nptr++ = ','; + } } - ldap_pvt_thread_rdwr_runlock(&bdb->bi_tree_rdwr); - - strcpy(ptr, be->be_suffix[0]->bv_val); - strcpy(nptr, be->be_nsuffix[0]->bv_val); + BEI(e)->bei_modrdns = max; + ptr[-1] = '\0'; + nptr[-1] = '\0'; return 0; } +/* We add two elements to the DN2ID database - a data item under the parent's + * entryID containing the child's RDN and entryID, and an item under the + * child's entryID containing the parent's entryID. + */ int -bdb_dn2id_add( - BackendDB *be, +hdb_dn2id_add( + Operation *op, DB_TXN *txn, - struct berval *pdn, + EntryInfo *eip, Entry *e ) { - struct bdb_info *bdb = (struct bdb_info *) be->be_private; - int rc, rlen, nrlen; + struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; + DB *db = bdb->bi_dn2id->bdi_db; DBT key, data; - DB *db = bdb->bi_id2parent->bdi_db; + ID nid; + int rc, rlen, nrlen; diskNode *d; - idNode *n; + char *ptr; + + Debug( LDAP_DEBUG_TRACE, "=> hdb_dn2id_add 0x%lx: \"%s\"\n", + e->e_id, e->e_ndn, 0 ); - nrlen = dn_rdnlen( be, &e->e_nname ); + nrlen = dn_rdnlen( op->o_bd, &e->e_nname ); if (nrlen) { - rlen = dn_rdnlen( be, &e->e_name ); + rlen = dn_rdnlen( op->o_bd, &e->e_name ); } else { - rlen = 0; - } - - d = ch_malloc(sizeof(diskNode) + rlen + nrlen + 2); - d->rdn.bv_len = rlen; - d->nrdn.bv_len = nrlen; - d->rdn.bv_val = (char *)(d+1); - d->nrdn.bv_val = d->rdn.bv_val + rlen + 1; - strncpy(d->rdn.bv_val, e->e_dn, rlen); - d->rdn.bv_val[rlen] = '\0'; - strncpy(d->nrdn.bv_val, e->e_ndn, nrlen); - d->nrdn.bv_val[nrlen] = '\0'; - d->rdn.bv_val -= (long)d; - d->nrdn.bv_val -= (long)d; - - if (pdn->bv_len) { - bdb_dn2id(be, txn, pdn, &d->parent); - } else { - d->parent = 0; + nrlen = e->e_nname.bv_len; + rlen = e->e_name.bv_len; } + d = op->o_tmpalloc(sizeof(diskNode) + rlen + nrlen, op->o_tmpmemctx); + d->nrdnlen[1] = nrlen & 0xff; + d->nrdnlen[0] = (nrlen >> 8) | 0x80; + ptr = lutil_strncopy( d->nrdn, e->e_nname.bv_val, nrlen ); + *ptr++ = '\0'; + ptr = lutil_strncopy( ptr, e->e_name.bv_val, rlen ); + *ptr++ = '\0'; + BDB_ID2DISK( e->e_id, ptr ); + DBTzero(&key); DBTzero(&data); - key.data = &e->e_id; key.size = sizeof(ID); key.flags = DB_DBT_USERMEM; + BDB_ID2DISK( eip->bei_id, &nid ); + + key.data = &nid; + + /* Need to make dummy root node once. Subsequent attempts + * will fail harmlessly. + */ + if ( eip->bei_id == 0 ) { + diskNode dummy = {{0, 0}, "", "", ""}; + data.data = &dummy; + data.size = sizeof(diskNode); + data.flags = DB_DBT_USERMEM; + + db->put( db, txn, &key, &data, DB_NODUPDATA ); + } data.data = d; - data.size = sizeof(diskNode) + rlen + nrlen + 2; + data.size = sizeof(diskNode) + rlen + nrlen; data.flags = DB_DBT_USERMEM; - rc = db->put( db, txn, &key, &data, DB_NOOVERWRITE ); + rc = db->put( db, txn, &key, &data, DB_NODUPDATA ); if (rc == 0) { - ldap_pvt_thread_rdwr_wlock(&bdb->bi_tree_rdwr); - n = bdb_add_node( e->e_id, data.data, bdb); - ldap_pvt_thread_rdwr_wunlock(&bdb->bi_tree_rdwr); - - if (d->parent) { - ldap_pvt_thread_rdwr_rlock(&bdb->bi_tree_rdwr); - bdb_insert_kid(n, bdb->bi_tree); - ldap_pvt_thread_rdwr_runlock(&bdb->bi_tree_rdwr); + BDB_ID2DISK( e->e_id, &nid ); + BDB_ID2DISK( eip->bei_id, ptr ); + d->nrdnlen[0] ^= 0x80; + + rc = db->put( db, txn, &key, &data, DB_NODUPDATA ); + } + + /* Update all parents' IDL cache entries */ + if ( rc == 0 && bdb->bi_idl_cache_size ) { + ID tmp[2]; + char *ptr = ((char *)&tmp[1])-1; + key.data = ptr; + key.size = sizeof(ID)+1; + tmp[1] = eip->bei_id; + *ptr = DN_ONE_PREFIX; + bdb_idl_cache_add_id( bdb, db, &key, e->e_id ); + *ptr = DN_SUBTREE_PREFIX; + for (; eip && eip->bei_parent->bei_id; eip = eip->bei_parent) { + tmp[1] = eip->bei_id; + bdb_idl_cache_add_id( bdb, db, &key, e->e_id ); } - } else { - free(d); } + +leave: + op->o_tmpfree( d, op->o_tmpmemctx ); + Debug( LDAP_DEBUG_TRACE, "<= hdb_dn2id_add 0x%lx: %d\n", e->e_id, rc, 0 ); + return rc; } int -bdb_dn2id_delete( - BackendDB *be, +hdb_dn2id_delete( + Operation *op, DB_TXN *txn, - char *pdn, + EntryInfo *eip, Entry *e ) { - struct bdb_info *bdb = (struct bdb_info *) be->be_private; + struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; + DB *db = bdb->bi_dn2id->bdi_db; + DBT key, data; + DBC *cursor; + diskNode *d; int rc; - DBT key; - DB *db = bdb->bi_id2parent->bdi_db; - idNode *n; + ID nid; + unsigned char dlen[2]; + DB_LOCK lock; + + Debug( LDAP_DEBUG_TRACE, "=> hdb_dn2id_delete 0x%lx: \"%s\"\n", + e->e_id, e->e_ndn, 0 ); DBTzero(&key); - key.size = sizeof(e->e_id); - key.data = &e->e_id; + key.size = sizeof(ID); + key.ulen = key.size; + key.flags = DB_DBT_USERMEM; + BDB_ID2DISK( eip->bei_id, &nid ); - rc = db->del( db, txn, &key, 0); + DBTzero(&data); + data.size = sizeof(diskNode) + BEI(e)->bei_nrdn.bv_len - sizeof(ID) - 1; + data.ulen = data.size; + data.dlen = data.size; + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + + key.data = &nid; + + d = op->o_tmpalloc( data.size, op->o_tmpmemctx ); + d->nrdnlen[1] = BEI(e)->bei_nrdn.bv_len & 0xff; + d->nrdnlen[0] = (BEI(e)->bei_nrdn.bv_len >> 8) | 0x80; + dlen[0] = d->nrdnlen[0]; + dlen[1] = d->nrdnlen[1]; + strcpy( d->nrdn, BEI(e)->bei_nrdn.bv_val ); + data.data = d; - ldap_pvt_thread_rdwr_wlock(&bdb->bi_tree_rdwr); - n = avl_delete(&bdb->bi_tree, (void *)e->e_id, (AVL_CMP)node_find_cmp); - if (n) { - if (n->i_parent) { - ldap_pvt_thread_rdwr_wlock(&n->i_parent->i_kids_rdwr); - avl_delete(&n->i_parent->i_kids, n->i_rdn->nrdn.bv_val, - (AVL_CMP)node_frdn_cmp); - ldap_pvt_thread_rdwr_wunlock(&n->i_parent->i_kids_rdwr); - } - free(n->i_rdn); - ldap_pvt_thread_rdwr_destroy(&n->i_kids_rdwr); - free(n); + rc = db->cursor( db, txn, &cursor, bdb->bi_db_opflags ); + if ( rc ) goto leave; + + /* We hold this lock until the TXN completes */ + rc = bdb_dn2id_lock( bdb, &e->e_nname, 1, TXN_ID( txn ), &lock ); + if ( rc ) goto nolock; + + /* Delete our ID from the parent's list */ + rc = cursor->c_get( cursor, &key, &data, DB_GET_BOTH_RANGE ); + if ( rc == 0 ) { + if ( dlen[1] == d->nrdnlen[1] && dlen[0] == d->nrdnlen[0] && + !strcmp( d->nrdn, BEI(e)->bei_nrdn.bv_val )) + rc = cursor->c_del( cursor, 0 ); + else + rc = DB_NOTFOUND; } - if (e->e_id == 1) - bdb->bi_troot = NULL; - ldap_pvt_thread_rdwr_wunlock(&bdb->bi_tree_rdwr); + /* Delete our ID from the tree. With sorted duplicates, this + * will leave any child nodes still hanging around. This is OK + * for modrdn, which will add our info back in later. + */ + if ( rc == 0 ) { + BDB_ID2DISK( e->e_id, &nid ); + rc = cursor->c_get( cursor, &key, &data, DB_SET ); + if ( rc == 0 ) + rc = cursor->c_del( cursor, 0 ); + } + +nolock: + cursor->c_close( cursor ); +leave: + op->o_tmpfree( d, op->o_tmpmemctx ); + + /* Delete IDL cache entries */ + if ( rc == 0 && bdb->bi_idl_cache_size ) { + ID tmp[2]; + char *ptr = ((char *)&tmp[1])-1; + key.data = ptr; + key.size = sizeof(ID)+1; + tmp[1] = eip->bei_id; + *ptr = DN_ONE_PREFIX; + bdb_idl_cache_del_id( bdb, db, &key, e->e_id ); + *ptr = DN_SUBTREE_PREFIX; + for (; eip && eip->bei_parent->bei_id; eip = eip->bei_parent) { + tmp[1] = eip->bei_id; + bdb_idl_cache_del_id( bdb, db, &key, e->e_id ); + } + } + Debug( LDAP_DEBUG_TRACE, "<= hdb_dn2id_delete 0x%lx: %d\n", e->e_id, rc, 0 ); return rc; } + int -bdb_dn2id_matched( - BackendDB *be, - DB_TXN *txn, +hdb_dn2id( + Operation *op, struct berval *in, - ID *id, - ID *id2 ) + EntryInfo *ei, + u_int32_t locker, + DB_LOCK *lock ) { - struct bdb_info *bdb = (struct bdb_info *) be->be_private; - int i; - char **rdns; - idNode *n, *p; + struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; + DB *db = bdb->bi_dn2id->bdi_db; + DBT key, data; + DBC *cursor; + int rc = 0, nrlen; + diskNode *d; + char *ptr; + unsigned char dlen[2]; + ID idp, parentID; - if (!bdb->bi_troot) - return DB_NOTFOUND; + Debug( LDAP_DEBUG_TRACE, "=> hdb_dn2id(\"%s\")\n", in->bv_val, 0, 0 ); - p = bdb->bi_troot; - if (be_issuffix(be, in->bv_val)) { - *id = p->i_id; - return 0; + nrlen = dn_rdnlen( op->o_bd, in ); + if (!nrlen) nrlen = in->bv_len; + + DBTzero(&key); + key.size = sizeof(ID); + key.data = &idp; + key.ulen = sizeof(ID); + key.flags = DB_DBT_USERMEM; + parentID = ( ei->bei_parent != NULL ) ? ei->bei_parent->bei_id : 0; + BDB_ID2DISK( parentID, &idp ); + + DBTzero(&data); + data.size = sizeof(diskNode) + nrlen - sizeof(ID) - 1; + data.ulen = data.size * 3; + data.dlen = data.ulen; + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + + rc = db->cursor( db, NULL, &cursor, bdb->bi_db_opflags ); + if ( rc ) return rc; + if ( locker ) { + cursor->locker = locker; } - rdns = ldap_explode_dn(in->bv_val, 0); - for (i=0; rdns[i]; i++); - i -= bdb->bi_nrdns; - if (i < 0) { - charray_free(rdns); - return -1; + d = op->o_tmpalloc( data.size * 3, op->o_tmpmemctx ); + d->nrdnlen[1] = nrlen & 0xff; + d->nrdnlen[0] = (nrlen >> 8) | 0x80; + dlen[0] = d->nrdnlen[0]; + dlen[1] = d->nrdnlen[1]; + ptr = lutil_strncopy( d->nrdn, in->bv_val, nrlen ); + *ptr = '\0'; + data.data = d; + + rc = bdb_dn2id_lock( bdb, in, 0, locker, lock ); + if ( rc ) goto leave; + + rc = cursor->c_get( cursor, &key, &data, DB_GET_BOTH_RANGE ); + if ( rc == 0 && (dlen[1] != d->nrdnlen[1] || dlen[0] != d->nrdnlen[0] || + strncmp( d->nrdn, in->bv_val, nrlen ))) { + rc = DB_NOTFOUND; } - n = p; - ldap_pvt_thread_rdwr_rlock(&bdb->bi_tree_rdwr); - for (--i; i>=0; i--) { - ldap_pvt_thread_rdwr_rlock(&p->i_kids_rdwr); - n = bdb_find_rdn_node(rdns[i], p->i_kids); - ldap_pvt_thread_rdwr_runlock(&p->i_kids_rdwr); - if (!n) break; - p = n; + if ( rc == 0 ) { + ptr = (char *) data.data + data.size - sizeof(ID); + BDB_DISK2ID( ptr, &ei->bei_id ); + ei->bei_rdn.bv_len = data.size - sizeof(diskNode) - nrlen; + ptr = d->nrdn + nrlen + 1; + ber_str2bv( ptr, ei->bei_rdn.bv_len, 1, &ei->bei_rdn ); + if ( ei->bei_parent != NULL && !ei->bei_parent->bei_dkids ) { + db_recno_t dkids; + /* How many children does the parent have? */ + /* FIXME: do we need to lock the parent + * entryinfo? Seems safe... + */ + cursor->c_count( cursor, &dkids, 0 ); + ei->bei_parent->bei_dkids = dkids; + } } - ldap_pvt_thread_rdwr_runlock(&bdb->bi_tree_rdwr); - charray_free(rdns); - if (n) { - *id = n->i_id; - } else if (id2) { - *id2 = p->i_id; +leave: + cursor->c_close( cursor ); + op->o_tmpfree( d, op->o_tmpmemctx ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_TRACE, "<= hdb_dn2id: get failed: %s (%d)\n", + db_strerror( rc ), rc, 0 ); + } else { + Debug( LDAP_DEBUG_TRACE, "<= hdb_dn2id: got id=0x%lx\n", + ei->bei_id, 0, 0 ); } - return n ? 0 : DB_NOTFOUND; + + return rc; } int -bdb_dn2id( - BackendDB *be, - DB_TXN *txn, - struct berval *dn, - ID *id ) +hdb_dn2id_parent( + Operation *op, + u_int32_t locker, + EntryInfo *ei, + ID *idp ) { - return bdb_dn2id_matched(be, txn, dn, id, NULL); + struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; + DB *db = bdb->bi_dn2id->bdi_db; + DBT key, data; + DBC *cursor; + int rc = 0; + diskNode *d; + char *ptr; + ID nid; + + DBTzero(&key); + key.size = sizeof(ID); + key.data = &nid; + key.ulen = sizeof(ID); + key.flags = DB_DBT_USERMEM; + BDB_ID2DISK( ei->bei_id, &nid ); + + DBTzero(&data); + data.flags = DB_DBT_USERMEM; + + rc = db->cursor( db, NULL, &cursor, bdb->bi_db_opflags ); + if ( rc ) return rc; + if ( locker ) { + cursor->locker = locker; + } + + data.ulen = sizeof(diskNode) + (SLAP_LDAPDN_MAXLEN * 2); + d = op->o_tmpalloc( data.ulen, op->o_tmpmemctx ); + data.data = d; + + rc = cursor->c_get( cursor, &key, &data, DB_SET ); + if ( rc == 0 ) { + if (d->nrdnlen[0] & 0x80) { + rc = LDAP_OTHER; + } else { + db_recno_t dkids; + ptr = (char *) data.data + data.size - sizeof(ID); + BDB_DISK2ID( ptr, idp ); + ei->bei_nrdn.bv_len = (d->nrdnlen[0] << 8) | d->nrdnlen[1]; + ber_str2bv( d->nrdn, ei->bei_nrdn.bv_len, 1, &ei->bei_nrdn ); + ei->bei_rdn.bv_len = data.size - sizeof(diskNode) - + ei->bei_nrdn.bv_len; + ptr = d->nrdn + ei->bei_nrdn.bv_len + 1; + ber_str2bv( ptr, ei->bei_rdn.bv_len, 1, &ei->bei_rdn ); + /* How many children does this node have? */ + cursor->c_count( cursor, &dkids, 0 ); + ei->bei_dkids = dkids; + } + } + cursor->c_close( cursor ); + op->o_tmpfree( d, op->o_tmpmemctx ); + return rc; } int -bdb_dn2id_children( - BackendDB *be, +hdb_dn2id_children( + Operation *op, DB_TXN *txn, - struct berval *dn ) + Entry *e ) { + struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; + DB *db = bdb->bi_dn2id->bdi_db; + DBT key, data; + DBC *cursor; int rc; - struct bdb_info *bdb = (struct bdb_info *) be->be_private; ID id; - idNode *n; + diskNode d; - rc = bdb_dn2id(be, txn, dn, &id); - if (rc != 0) - return rc; + DBTzero(&key); + key.size = sizeof(ID); + key.data = &e->e_id; + key.flags = DB_DBT_USERMEM; + BDB_ID2DISK( e->e_id, &id ); - ldap_pvt_thread_rdwr_rlock(&bdb->bi_tree_rdwr); - n = bdb_find_id_node(id, bdb->bi_tree); - ldap_pvt_thread_rdwr_runlock(&bdb->bi_tree_rdwr); + /* IDL cache is in host byte order */ + if ( bdb->bi_idl_cache_size ) { + rc = bdb_idl_cache_get( bdb, db, &key, NULL ); + if ( rc != LDAP_NO_SUCH_OBJECT ) { + return rc; + } + } - if (!n->i_kids) - return DB_NOTFOUND; - else - return 0; + key.data = &id; + DBTzero(&data); + data.data = &d; + data.ulen = sizeof(d); + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + data.dlen = sizeof(d); + + rc = db->cursor( db, txn, &cursor, bdb->bi_db_opflags ); + if ( rc ) return rc; + + rc = cursor->c_get( cursor, &key, &data, DB_SET ); + if ( rc == 0 ) { + db_recno_t dkids; + rc = cursor->c_count( cursor, &dkids, 0 ); + if ( rc == 0 ) { + BEI(e)->bei_dkids = dkids; + if ( dkids < 2 ) rc = DB_NOTFOUND; + } + } + cursor->c_close( cursor ); + return rc; } -/* Since we don't store IDLs for onelevel or subtree, we have to construct - * them on the fly... Perhaps the i_kids tree ought to just be an IDL? +/* bdb_dn2idl: + * We can't just use bdb_idl_fetch_key because + * 1 - our data items are longer than just an entry ID + * 2 - our data items are sorted alphabetically by nrdn, not by ID. + * + * We descend the tree recursively, so we define this cookie + * to hold our necessary state information. The bdb_dn2idl_internal + * function uses this cookie when calling itself. */ + +struct dn2id_cookie { + struct bdb_info *bdb; + Operation *op; + EntryInfo *ei; + ID *ids; + ID *tmp; + ID *buf; + DB *db; + DBC *dbc; + DBT key; + DBT data; + ID dbuf; + ID id; + ID nid; + int rc; + int depth; + char need_sort; + char prefix; +}; + static int -insert_one( - idNode *n, - ID *ids -) +apply_func( + void *data, + void *arg ) { - return bdb_idl_insert(ids, n->i_id); + EntryInfo *ei = data; + ID *idl = arg; + + bdb_idl_append_one( idl, ei->bei_id ); + return 0; } static int -insert_sub( - idNode *n, - ID *ids +hdb_dn2idl_internal( + struct dn2id_cookie *cx ) { - int rc; + BDB_IDL_ZERO( cx->tmp ); + + if ( cx->bdb->bi_idl_cache_size ) { + char *ptr = ((char *)&cx->id)-1; + + cx->key.data = ptr; + cx->key.size = sizeof(ID)+1; + if ( cx->prefix == DN_SUBTREE_PREFIX ) { + ID *ids = cx->depth ? cx->tmp : cx->ids; + *ptr = cx->prefix; + cx->rc = bdb_idl_cache_get(cx->bdb, cx->db, &cx->key, ids); + if ( cx->rc == LDAP_SUCCESS ) { + if ( cx->depth ) { + bdb_idl_append( cx->ids, cx->tmp ); + cx->need_sort = 1; + } + return cx->rc; + } + } + *ptr = DN_ONE_PREFIX; + cx->rc = bdb_idl_cache_get(cx->bdb, cx->db, &cx->key, cx->tmp); + if ( cx->rc == LDAP_SUCCESS ) { + goto gotit; + } + if ( cx->rc == DB_NOTFOUND ) { + return cx->rc; + } + } - rc = bdb_idl_insert(ids, n->i_id); - if (rc == 0) { - ldap_pvt_thread_rdwr_rlock(&n->i_kids_rdwr); - rc = avl_apply(n->i_kids, (AVL_APPLY)insert_sub, ids, -1, - AVL_INORDER); - ldap_pvt_thread_rdwr_runlock(&n->i_kids_rdwr); + bdb_cache_entryinfo_lock( cx->ei ); + + /* If number of kids in the cache differs from on-disk, load + * up all the kids from the database + */ + if ( cx->ei->bei_ckids+1 != cx->ei->bei_dkids ) { + EntryInfo ei; + db_recno_t dkids = cx->ei->bei_dkids; + ei.bei_parent = cx->ei; + + /* Only one thread should load the cache */ + while ( cx->ei->bei_state & CACHE_ENTRY_ONELEVEL ) { + bdb_cache_entryinfo_unlock( cx->ei ); + ldap_pvt_thread_yield(); + bdb_cache_entryinfo_lock( cx->ei ); + if ( cx->ei->bei_ckids+1 == cx->ei->bei_dkids ) { + goto synced; + } + } + + cx->ei->bei_state |= CACHE_ENTRY_ONELEVEL; + + bdb_cache_entryinfo_unlock( cx->ei ); + + cx->rc = cx->db->cursor( cx->db, NULL, &cx->dbc, + cx->bdb->bi_db_opflags ); + if ( cx->rc ) + goto done_one; + + cx->data.data = &cx->dbuf; + cx->data.ulen = sizeof(ID); + cx->data.dlen = sizeof(ID); + cx->data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + + /* The first item holds the parent ID. Ignore it. */ + cx->key.data = &cx->nid; + cx->key.size = sizeof(ID); + cx->rc = cx->dbc->c_get( cx->dbc, &cx->key, &cx->data, DB_SET ); + if ( cx->rc ) { + cx->dbc->c_close( cx->dbc ); + goto done_one; + } + + /* If the on-disk count is zero we've never checked it. + * Count it now. + */ + if ( !dkids ) { + cx->dbc->c_count( cx->dbc, &dkids, 0 ); + cx->ei->bei_dkids = dkids; + } + + cx->data.data = cx->buf; + cx->data.ulen = BDB_IDL_UM_SIZE * sizeof(ID); + cx->data.flags = DB_DBT_USERMEM; + + if ( dkids > 1 ) { + /* Fetch the rest of the IDs in a loop... */ + while ( (cx->rc = cx->dbc->c_get( cx->dbc, &cx->key, &cx->data, + DB_MULTIPLE | DB_NEXT_DUP )) == 0 ) { + u_int8_t *j; + size_t len; + void *ptr; + DB_MULTIPLE_INIT( ptr, &cx->data ); + while (ptr) { + DB_MULTIPLE_NEXT( ptr, &cx->data, j, len ); + if (j) { + EntryInfo *ei2; + diskNode *d = (diskNode *)j; + short nrlen; + + BDB_DISK2ID( j + len - sizeof(ID), &ei.bei_id ); + nrlen = ((d->nrdnlen[0] ^ 0x80) << 8) | d->nrdnlen[1]; + ei.bei_nrdn.bv_len = nrlen; + /* nrdn/rdn are set in-place. + * hdb_cache_load will copy them as needed + */ + ei.bei_nrdn.bv_val = d->nrdn; + ei.bei_rdn.bv_len = len - sizeof(diskNode) + - ei.bei_nrdn.bv_len; + ei.bei_rdn.bv_val = d->nrdn + ei.bei_nrdn.bv_len + 1; + bdb_idl_append_one( cx->tmp, ei.bei_id ); + hdb_cache_load( cx->bdb, &ei, &ei2 ); + } + } + } + } + + cx->rc = cx->dbc->c_close( cx->dbc ); +done_one: + bdb_cache_entryinfo_lock( cx->ei ); + cx->ei->bei_state ^= CACHE_ENTRY_ONELEVEL; + bdb_cache_entryinfo_unlock( cx->ei ); + if ( cx->rc ) + return cx->rc; + + } else { + /* The in-memory cache is in sync with the on-disk data. + * do we have any kids? + */ +synced: + cx->rc = 0; + if ( cx->ei->bei_ckids > 0 ) { + /* Walk the kids tree; order is irrelevant since bdb_idl_sort + * will sort it later. + */ + avl_apply( cx->ei->bei_kids, apply_func, + cx->tmp, -1, AVL_POSTORDER ); + } + bdb_cache_entryinfo_unlock( cx->ei ); } - return rc; + + if ( !BDB_IDL_IS_RANGE( cx->tmp ) && cx->tmp[0] > 3 ) + bdb_idl_sort( cx->tmp, cx->buf ); + if ( cx->bdb->bi_idl_cache_max_size && !BDB_IDL_IS_ZERO( cx->tmp )) { + char *ptr = ((char *)&cx->id)-1; + cx->key.data = ptr; + cx->key.size = sizeof(ID)+1; + *ptr = DN_ONE_PREFIX; + bdb_idl_cache_put( cx->bdb, cx->db, &cx->key, cx->tmp, cx->rc ); + } + +gotit: + if ( !BDB_IDL_IS_ZERO( cx->tmp )) { + if ( cx->prefix == DN_SUBTREE_PREFIX ) { + bdb_idl_append( cx->ids, cx->tmp ); + cx->need_sort = 1; + if ( !(cx->ei->bei_state & CACHE_ENTRY_NO_GRANDKIDS)) { + ID *save, idcurs; + EntryInfo *ei = cx->ei; + int nokids = 1; + save = cx->op->o_tmpalloc( BDB_IDL_SIZEOF( cx->tmp ), + cx->op->o_tmpmemctx ); + BDB_IDL_CPY( save, cx->tmp ); + + idcurs = 0; + cx->depth++; + for ( cx->id = bdb_idl_first( save, &idcurs ); + cx->id != NOID; + cx->id = bdb_idl_next( save, &idcurs )) { + cx->ei = bdb_cache_find_info( cx->bdb, cx->id ); + if ( !cx->ei || + ( cx->ei->bei_state & CACHE_ENTRY_NO_KIDS )) + continue; + + BDB_ID2DISK( cx->id, &cx->nid ); + hdb_dn2idl_internal( cx ); + if ( !BDB_IDL_IS_ZERO( cx->tmp )) + nokids = 0; + } + cx->depth--; + cx->op->o_tmpfree( save, cx->op->o_tmpmemctx ); + if ( nokids ) ei->bei_state |= CACHE_ENTRY_NO_GRANDKIDS; + } + /* Make sure caller knows it had kids! */ + cx->tmp[0]=1; + + cx->rc = 0; + } else { + BDB_IDL_CPY( cx->ids, cx->tmp ); + } + } + return cx->rc; } int -bdb_dn2idl( - BackendDB *be, - struct berval *dn, - int prefix, - ID *ids ) +hdb_dn2idl( + Operation *op, + Entry *e, + ID *ids, + ID *stack ) { - struct bdb_info *bdb = (struct bdb_info *) be->be_private; - int rc; - ID id; - idNode *n; + struct bdb_info *bdb = (struct bdb_info *)op->o_bd->be_private; + struct dn2id_cookie cx; - if (prefix == DN_SUBTREE_PREFIX && be_issuffix(be, dn->bv_val)) { - BDB_IDL_ALL(bdb, ids); + Debug( LDAP_DEBUG_TRACE, "=> hdb_dn2idl(\"%s\")\n", + e->e_nname.bv_val, 0, 0 ); + +#ifndef BDB_MULTIPLE_SUFFIXES + if ( op->ors_scope != LDAP_SCOPE_ONELEVEL && + BEI(e)->bei_parent->bei_id == 0 ) + { + BDB_IDL_ALL( bdb, ids ); return 0; } - - rc = bdb_dn2id(be, NULL, dn, &id); - if (rc) return rc; - - ldap_pvt_thread_rdwr_rlock(&bdb->bi_tree_rdwr); - n = bdb_find_id_node(id, bdb->bi_tree); - ldap_pvt_thread_rdwr_runlock(&bdb->bi_tree_rdwr); - - ids[0] = 0; - ldap_pvt_thread_rdwr_rlock(&n->i_kids_rdwr); - if (prefix == DN_ONE_PREFIX) { - rc = avl_apply(n->i_kids, (AVL_APPLY)insert_one, ids, -1, - AVL_INORDER); +#endif + + cx.id = e->e_id; + BDB_ID2DISK( cx.id, &cx.nid ); + cx.ei = e->e_id ? BEI(e) : &bdb->bi_cache.c_dntree; + cx.bdb = bdb; + cx.db = cx.bdb->bi_dn2id->bdi_db; + cx.prefix = (op->ors_scope == LDAP_SCOPE_ONELEVEL) ? + DN_ONE_PREFIX : DN_SUBTREE_PREFIX; + cx.ids = ids; + cx.tmp = stack; + cx.buf = stack + BDB_IDL_UM_SIZE; + cx.op = op; + cx.need_sort = 0; + cx.depth = 0; + + if ( cx.prefix == DN_SUBTREE_PREFIX ) { + ids[0] = 1; + ids[1] = cx.id; } else { - rc = avl_apply(n->i_kids, (AVL_APPLY)insert_sub, ids, -1, - AVL_INORDER); + BDB_IDL_ZERO( ids ); } - ldap_pvt_thread_rdwr_runlock(&n->i_kids_rdwr); - return rc; + if ( cx.ei->bei_state & CACHE_ENTRY_NO_KIDS ) + return LDAP_SUCCESS; + + DBTzero(&cx.key); + cx.key.ulen = sizeof(ID); + cx.key.size = sizeof(ID); + cx.key.flags = DB_DBT_USERMEM; + + DBTzero(&cx.data); + + hdb_dn2idl_internal(&cx); + if ( cx.need_sort ) { + char *ptr = ((char *)&cx.id)-1; + if ( !BDB_IDL_IS_RANGE( cx.ids ) && cx.ids[0] > 3 ) + bdb_idl_sort( cx.ids, cx.tmp ); + cx.key.data = ptr; + cx.key.size = sizeof(ID)+1; + *ptr = cx.prefix; + cx.id = e->e_id; + bdb_idl_cache_put( cx.bdb, cx.db, &cx.key, cx.ids, cx.rc ); + } + + if ( cx.rc == DB_NOTFOUND ) + cx.rc = LDAP_SUCCESS; + + return cx.rc; } #endif /* BDB_HIER */