From 42a8de3bf336cc03bb29cf36ddee6a5b9379f21c Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Fri, 12 Aug 2011 05:16:53 -0700 Subject: [PATCH] Copied from back-bdb to start back-mdb --- servers/slapd/back-mdb/Makefile.in | 53 + servers/slapd/back-mdb/add.c | 479 ++++ servers/slapd/back-mdb/attr.c | 441 ++++ servers/slapd/back-mdb/back-mdb.h | 163 ++ servers/slapd/back-mdb/bind.c | 166 ++ servers/slapd/back-mdb/compare.c | 143 ++ servers/slapd/back-mdb/config.c | 942 ++++++++ servers/slapd/back-mdb/dbcache.c | 119 + servers/slapd/back-mdb/delete.c | 601 +++++ servers/slapd/back-mdb/dn2entry.c | 84 + servers/slapd/back-mdb/dn2id.c | 844 +++++++ servers/slapd/back-mdb/error.c | 62 + servers/slapd/back-mdb/extended.c | 54 + servers/slapd/back-mdb/filterindex.c | 1179 ++++++++++ servers/slapd/back-mdb/id2entry.c | 440 ++++ servers/slapd/back-mdb/idl.c | 1575 +++++++++++++ servers/slapd/back-mdb/idl.h | 74 + servers/slapd/back-mdb/index.c | 574 +++++ servers/slapd/back-mdb/init.c | 448 ++++ servers/slapd/back-mdb/key.c | 98 + servers/slapd/back-mdb/mdb.c | 3246 ++++++++++++++++++++++++++ servers/slapd/back-mdb/mdb.h | 153 ++ servers/slapd/back-mdb/midl.c | 109 + servers/slapd/back-mdb/midl.h | 78 + servers/slapd/back-mdb/modify.c | 782 +++++++ servers/slapd/back-mdb/modrdn.c | 838 +++++++ servers/slapd/back-mdb/monitor.c | 725 ++++++ servers/slapd/back-mdb/nextid.c | 80 + servers/slapd/back-mdb/operational.c | 151 ++ servers/slapd/back-mdb/proto-mdb.h | 350 +++ servers/slapd/back-mdb/referral.c | 152 ++ servers/slapd/back-mdb/search.c | 1383 +++++++++++ servers/slapd/back-mdb/tools.c | 999 ++++++++ 33 files changed, 17585 insertions(+) create mode 100644 servers/slapd/back-mdb/Makefile.in create mode 100644 servers/slapd/back-mdb/add.c create mode 100644 servers/slapd/back-mdb/attr.c create mode 100644 servers/slapd/back-mdb/back-mdb.h create mode 100644 servers/slapd/back-mdb/bind.c create mode 100644 servers/slapd/back-mdb/compare.c create mode 100644 servers/slapd/back-mdb/config.c create mode 100644 servers/slapd/back-mdb/dbcache.c create mode 100644 servers/slapd/back-mdb/delete.c create mode 100644 servers/slapd/back-mdb/dn2entry.c create mode 100644 servers/slapd/back-mdb/dn2id.c create mode 100644 servers/slapd/back-mdb/error.c create mode 100644 servers/slapd/back-mdb/extended.c create mode 100644 servers/slapd/back-mdb/filterindex.c create mode 100644 servers/slapd/back-mdb/id2entry.c create mode 100644 servers/slapd/back-mdb/idl.c create mode 100644 servers/slapd/back-mdb/idl.h create mode 100644 servers/slapd/back-mdb/index.c create mode 100644 servers/slapd/back-mdb/init.c create mode 100644 servers/slapd/back-mdb/key.c create mode 100644 servers/slapd/back-mdb/mdb.c create mode 100644 servers/slapd/back-mdb/mdb.h create mode 100644 servers/slapd/back-mdb/midl.c create mode 100644 servers/slapd/back-mdb/midl.h create mode 100644 servers/slapd/back-mdb/modify.c create mode 100644 servers/slapd/back-mdb/modrdn.c create mode 100644 servers/slapd/back-mdb/monitor.c create mode 100644 servers/slapd/back-mdb/nextid.c create mode 100644 servers/slapd/back-mdb/operational.c create mode 100644 servers/slapd/back-mdb/proto-mdb.h create mode 100644 servers/slapd/back-mdb/referral.c create mode 100644 servers/slapd/back-mdb/search.c create mode 100644 servers/slapd/back-mdb/tools.c diff --git a/servers/slapd/back-mdb/Makefile.in b/servers/slapd/back-mdb/Makefile.in new file mode 100644 index 0000000000..a0c77a0250 --- /dev/null +++ b/servers/slapd/back-mdb/Makefile.in @@ -0,0 +1,53 @@ +# Makefile.in for back-mdb +# $OpenLDAP$ +## This work is part of OpenLDAP Software . +## +## Copyright 2011 The OpenLDAP Foundation. +## All rights reserved. +## +## Redistribution and use in source and binary forms, with or without +## modification, are permitted only as authorized by the OpenLDAP +## Public License. +## +## A copy of this license is available in the file LICENSE in the +## top-level directory of the distribution or, alternatively, at +## . + +SRCS = init.c tools.c config.c \ + add.c bind.c compare.c delete.c modify.c modrdn.c search.c \ + extended.c referral.c operational.c \ + attr.c index.c key.c dbcache.c filterindex.c \ + dn2entry.c dn2id.c error.c id2entry.c idl.c \ + nextid.c monitor.c mdb.c midl.c + +OBJS = init.lo tools.lo config.lo \ + add.lo bind.lo compare.lo delete.lo modify.lo modrdn.lo search.lo \ + extended.lo referral.lo operational.lo \ + attr.lo index.lo key.lo dbcache.lo filterindex.lo \ + dn2entry.lo dn2id.lo error.lo id2entry.lo idl.lo \ + nextid.lo monitor.lo mdb.lo midl.lo + +LDAP_INCDIR= ../../../include +LDAP_LIBDIR= ../../../libraries + +BUILD_OPT = "--enable-mdb" +BUILD_MOD = @BUILD_MDB@ + +mod_DEFS = -DSLAPD_IMPORT +MOD_DEFS = $(@BUILD_MDB@_DEFS) +MOD_LIBS = $(MDB_LIBS) + +shared_LDAP_LIBS = $(LDAP_LIBLDAP_R_LA) $(LDAP_LIBLBER_LA) +NT_LINK_LIBS = -L.. -lslapd $(@BUILD_LIBS_DYNAMIC@_LDAP_LIBS) +UNIX_LINK_LIBS = $(@BUILD_LIBS_DYNAMIC@_LDAP_LIBS) + +LIBBASE = back_mdb + +XINCPATH = -I.. -I$(srcdir)/.. +XDEFS = $(MODULES_CPPFLAGS) + +all-local-lib: ../.backend + +../.backend: lib$(LIBBASE).a + @touch $@ + diff --git a/servers/slapd/back-mdb/add.c b/servers/slapd/back-mdb/add.c new file mode 100644 index 0000000000..58f3b18f70 --- /dev/null +++ b/servers/slapd/back-mdb/add.c @@ -0,0 +1,479 @@ +/* add.c - ldap mdb back-end add routine */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "back-mdb.h" + +int +mdb_add(Operation *op, SlapReply *rs ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + struct berval pdn; + Entry *p = NULL, *oe = op->ora_e; + EntryInfo *ei; + char textbuf[SLAP_TEXT_BUFLEN]; + size_t textlen = sizeof textbuf; + AttributeDescription *children = slap_schema.si_ad_children; + AttributeDescription *entry = slap_schema.si_ad_entry; + DB_TXN *ltid = NULL, *lt2; + ID eid = NOID; + struct mdb_op_info opinfo = {{{ 0 }}}; + int subentry; + DB_LOCK lock; + + int num_retries = 0; + int success; + + LDAPControl **postread_ctrl = NULL; + LDAPControl *ctrls[SLAP_MAX_RESPONSE_CONTROLS]; + int num_ctrls = 0; + +#ifdef LDAP_X_TXN + int settle = 0; +#endif + + Debug(LDAP_DEBUG_ARGS, "==> " LDAP_XSTRING(mdb_add) ": %s\n", + op->ora_e->e_name.bv_val, 0, 0); + +#ifdef LDAP_X_TXN + if( op->o_txnSpec ) { + /* acquire connection lock */ + ldap_pvt_thread_mutex_lock( &op->o_conn->c_mutex ); + if( op->o_conn->c_txn == CONN_TXN_INACTIVE ) { + rs->sr_text = "invalid transaction identifier"; + rs->sr_err = LDAP_X_TXN_ID_INVALID; + goto txnReturn; + } else if( op->o_conn->c_txn == CONN_TXN_SETTLE ) { + settle=1; + goto txnReturn; + } + + if( op->o_conn->c_txn_backend == NULL ) { + op->o_conn->c_txn_backend = op->o_bd; + + } else if( op->o_conn->c_txn_backend != op->o_bd ) { + rs->sr_text = "transaction cannot span multiple database contexts"; + rs->sr_err = LDAP_AFFECTS_MULTIPLE_DSAS; + goto txnReturn; + } + + /* insert operation into transaction */ + + rs->sr_text = "transaction specified"; + rs->sr_err = LDAP_X_TXN_SPECIFY_OKAY; + +txnReturn: + /* release connection lock */ + ldap_pvt_thread_mutex_unlock( &op->o_conn->c_mutex ); + + if( !settle ) { + send_ldap_result( op, rs ); + return rs->sr_err; + } + } +#endif + + ctrls[num_ctrls] = 0; + + /* check entry's schema */ + rs->sr_err = entry_schema_check( op, op->ora_e, NULL, + get_relax(op), 1, NULL, &rs->sr_text, textbuf, textlen ); + if ( rs->sr_err != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": entry failed schema check: " + "%s (%d)\n", rs->sr_text, rs->sr_err, 0 ); + goto return_results; + } + + /* add opattrs to shadow as well, only missing attrs will actually + * be added; helps compatibility with older OL versions */ + rs->sr_err = slap_add_opattrs( op, &rs->sr_text, textbuf, textlen, 1 ); + if ( rs->sr_err != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": entry failed op attrs add: " + "%s (%d)\n", rs->sr_text, rs->sr_err, 0 ); + goto return_results; + } + + if ( get_assert( op ) && + ( test_filter( op, op->ora_e, get_assertion( op )) != LDAP_COMPARE_TRUE )) + { + rs->sr_err = LDAP_ASSERTION_FAILED; + goto return_results; + } + + subentry = is_entry_subentry( op->ora_e ); + + /* begin transaction */ + rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, NULL, <id, + mdb->bi_db_opflags ); + rs->sr_text = NULL; + if( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": txn_begin failed: %s (%d)\n", + db_strerror(rs->sr_err), rs->sr_err, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + opinfo.boi_oe.oe_key = mdb; + opinfo.boi_txn = ltid; + opinfo.boi_err = 0; + opinfo.boi_acl_cache = op->o_do_not_cache; + LDAP_SLIST_INSERT_HEAD( &op->o_extra, &opinfo.boi_oe, oe_next ); + + /* + * Get the parent dn and see if the corresponding entry exists. + */ + if ( be_issuffix( op->o_bd, &op->ora_e->e_nname ) ) { + pdn = slap_empty_bv; + } else { + dnParent( &op->ora_e->e_nname, &pdn ); + } + + /* get entry or parent */ + rs->sr_err = mdb_dn2entry( op, ltid, &op->ora_e->e_nname, &ei, + 1, &lock ); + switch( rs->sr_err ) { + case 0: + rs->sr_err = LDAP_ALREADY_EXISTS; + goto return_results; + case DB_NOTFOUND: + break; + case LDAP_BUSY: + rs->sr_text = "ldap server busy"; + goto return_results; + default: + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + p = ei->bei_e; + if ( !p ) + p = (Entry *)&slap_entry_root; + + if ( !bvmatch( &pdn, &p->e_nname ) ) { + rs->sr_matched = ber_strdup_x( p->e_name.bv_val, + op->o_tmpmemctx ); + rs->sr_ref = is_entry_referral( p ) + ? get_entry_referrals( op, p ) + : NULL; + if ( p != (Entry *)&slap_entry_root ) + mdb_unlocked_cache_return_entry_r( mdb, p ); + p = NULL; + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": parent " + "does not exist\n", 0, 0, 0 ); + + rs->sr_err = LDAP_REFERRAL; + rs->sr_flags = REP_MATCHED_MUSTBEFREED | REP_REF_MUSTBEFREED; + goto return_results; + } + + rs->sr_err = access_allowed( op, p, + children, NULL, ACL_WADD, NULL ); + + if ( ! rs->sr_err ) { + if ( p != (Entry *)&slap_entry_root ) + mdb_unlocked_cache_return_entry_r( mdb, p ); + p = NULL; + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": no write access to parent\n", + 0, 0, 0 ); + rs->sr_err = LDAP_INSUFFICIENT_ACCESS; + rs->sr_text = "no write access to parent"; + goto return_results;; + } + + if ( p != (Entry *)&slap_entry_root ) { + if ( is_entry_subentry( p ) ) { + mdb_unlocked_cache_return_entry_r( mdb, p ); + p = NULL; + /* parent is a subentry, don't allow add */ + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": parent is subentry\n", + 0, 0, 0 ); + rs->sr_err = LDAP_OBJECT_CLASS_VIOLATION; + rs->sr_text = "parent is a subentry"; + goto return_results;; + } + + if ( is_entry_alias( p ) ) { + mdb_unlocked_cache_return_entry_r( mdb, p ); + p = NULL; + /* parent is an alias, don't allow add */ + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": parent is alias\n", + 0, 0, 0 ); + rs->sr_err = LDAP_ALIAS_PROBLEM; + rs->sr_text = "parent is an alias"; + goto return_results;; + } + + if ( is_entry_referral( p ) ) { + /* parent is a referral, don't allow add */ + rs->sr_matched = ber_strdup_x( p->e_name.bv_val, + op->o_tmpmemctx ); + rs->sr_ref = get_entry_referrals( op, p ); + mdb_unlocked_cache_return_entry_r( mdb, p ); + p = NULL; + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": parent is referral\n", + 0, 0, 0 ); + + rs->sr_err = LDAP_REFERRAL; + rs->sr_flags = REP_MATCHED_MUSTBEFREED | REP_REF_MUSTBEFREED; + goto return_results; + } + + } + + if ( subentry ) { + /* FIXME: */ + /* parent must be an administrative point of the required kind */ + } + + /* free parent and reader lock */ + if ( p != (Entry *)&slap_entry_root ) { + if ( p->e_nname.bv_len ) { + struct berval ppdn; + + /* ITS#5326: use parent's DN if differs from provided one */ + dnParent( &op->ora_e->e_name, &ppdn ); + if ( !dn_match( &p->e_name, &ppdn ) ) { + struct berval rdn; + struct berval newdn; + + dnRdn( &op->ora_e->e_name, &rdn ); + + build_new_dn( &newdn, &p->e_name, &rdn, NULL ); + if ( op->ora_e->e_name.bv_val != op->o_req_dn.bv_val ) + ber_memfree( op->ora_e->e_name.bv_val ); + op->ora_e->e_name = newdn; + + /* FIXME: should check whether + * dnNormalize(newdn) == e->e_nname ... */ + } + } + + mdb_unlocked_cache_return_entry_r( mdb, p ); + } + p = NULL; + + rs->sr_err = access_allowed( op, op->ora_e, + entry, NULL, ACL_WADD, NULL ); + + if ( ! rs->sr_err ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": no write access to entry\n", + 0, 0, 0 ); + rs->sr_err = LDAP_INSUFFICIENT_ACCESS; + rs->sr_text = "no write access to entry"; + goto return_results;; + } + + /* + * Check ACL for attribute write access + */ + if (!acl_check_modlist(op, oe, op->ora_modlist)) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": no write access to attribute\n", + 0, 0, 0 ); + rs->sr_err = LDAP_INSUFFICIENT_ACCESS; + rs->sr_text = "no write access to attribute"; + goto return_results;; + } + + if ( eid == NOID ) { + rs->sr_err = mdb_next_id( op->o_bd, &eid ); + if( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": next_id failed (%d)\n", + rs->sr_err, 0, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + op->ora_e->e_id = eid; + } + + /* nested transaction */ + rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, ltid, <2, + mdb->bi_db_opflags ); + rs->sr_text = NULL; + if( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": txn_begin(2) failed: " + "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + /* dn2id index */ + rs->sr_err = mdb_dn2id_add( op, lt2, ei, op->ora_e ); + if ( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": dn2id_add failed: %s (%d)\n", + db_strerror(rs->sr_err), rs->sr_err, 0 ); + + switch( rs->sr_err ) { + case DB_KEYEXIST: + rs->sr_err = LDAP_ALREADY_EXISTS; + break; + default: + rs->sr_err = LDAP_OTHER; + } + goto return_results; + } + + /* attribute indexes */ + rs->sr_err = mdb_index_entry_add( op, lt2, op->ora_e ); + if ( rs->sr_err != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": index_entry_add failed\n", + 0, 0, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "index generation failed"; + goto return_results; + } + + /* id2entry index */ + rs->sr_err = mdb_id2entry_add( op->o_bd, lt2, op->ora_e ); + if ( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": id2entry_add failed\n", + 0, 0, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "entry store failed"; + goto return_results; + } + + if ( TXN_COMMIT( lt2, 0 ) != 0 ) { + rs->sr_err = LDAP_OTHER; + rs->sr_text = "txn_commit(2) failed"; + goto return_results; + } + + /* post-read */ + if( op->o_postread ) { + if( postread_ctrl == NULL ) { + postread_ctrl = &ctrls[num_ctrls++]; + ctrls[num_ctrls] = NULL; + } + if ( slap_read_controls( op, rs, op->ora_e, + &slap_post_read_bv, postread_ctrl ) ) + { + Debug( LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_add) ": post-read " + "failed!\n", 0, 0, 0 ); + if ( op->o_postread & SLAP_CONTROL_CRITICAL ) { + /* FIXME: is it correct to abort + * operation if control fails? */ + goto return_results; + } + } + } + + if ( op->o_noop ) { + if (( rs->sr_err=TXN_ABORT( ltid )) != 0 ) { + rs->sr_text = "txn_abort (no-op) failed"; + } else { + rs->sr_err = LDAP_X_NO_OPERATION; + ltid = NULL; + goto return_results; + } + + } else { + struct berval nrdn; + + /* pick the RDN if not suffix; otherwise pick the entire DN */ + if (pdn.bv_len) { + nrdn.bv_val = op->ora_e->e_nname.bv_val; + nrdn.bv_len = pdn.bv_val - op->ora_e->e_nname.bv_val - 1; + } else { + nrdn = op->ora_e->e_nname; + } + + if(( rs->sr_err=TXN_COMMIT( ltid, 0 )) != 0 ) { + rs->sr_text = "txn_commit failed"; + } else { + rs->sr_err = LDAP_SUCCESS; + } + } + + ltid = NULL; + LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next ); + opinfo.boi_oe.oe_key = NULL; + + if ( rs->sr_err != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": %s : %s (%d)\n", + rs->sr_text, db_strerror(rs->sr_err), rs->sr_err ); + rs->sr_err = LDAP_OTHER; + goto return_results; + } + + Debug(LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_add) ": added%s id=%08lx dn=\"%s\"\n", + op->o_noop ? " (no-op)" : "", + op->ora_e->e_id, op->ora_e->e_dn ); + + rs->sr_text = NULL; + if( num_ctrls ) rs->sr_ctrls = ctrls; + +return_results: + success = rs->sr_err; + send_ldap_result( op, rs ); + + if( ltid != NULL ) { + TXN_ABORT( ltid ); + } + if ( opinfo.boi_oe.oe_key ) { + LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next ); + } + + if( success == LDAP_SUCCESS ) { + /* We own the entry now, and it can be purged at will + * Check to make sure it's the same entry we entered with. + * Possibly a callback may have mucked with it, although + * in general callbacks should treat the entry as read-only. + */ + mdb_cache_deref( oe->e_private ); + if ( op->ora_e == oe ) + op->ora_e = NULL; + + if ( mdb->bi_txn_cp_kbyte ) { + TXN_CHECKPOINT( mdb->bi_dbenv, + mdb->bi_txn_cp_kbyte, mdb->bi_txn_cp_min, 0 ); + } + } + + slap_graduate_commit_csn( op ); + + if( postread_ctrl != NULL && (*postread_ctrl) != NULL ) { + slap_sl_free( (*postread_ctrl)->ldctl_value.bv_val, op->o_tmpmemctx ); + slap_sl_free( *postread_ctrl, op->o_tmpmemctx ); + } + return rs->sr_err; +} diff --git a/servers/slapd/back-mdb/attr.c b/servers/slapd/back-mdb/attr.c new file mode 100644 index 0000000000..86cdd598eb --- /dev/null +++ b/servers/slapd/back-mdb/attr.c @@ -0,0 +1,441 @@ +/* attr.c - backend routines for dealing with attributes */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include + +#include +#include + +#include "slap.h" +#include "back-mdb.h" +#include "config.h" +#include "lutil.h" + +/* Find the ad, return -1 if not found, + * set point for insertion if ins is non-NULL + */ +int +mdb_attr_slot( struct mdb_info *mdb, AttributeDescription *ad, int *ins ) +{ + unsigned base = 0, cursor = 0; + unsigned n = mdb->bi_nattrs; + int val = 0; + + while ( 0 < n ) { + unsigned pivot = n >> 1; + cursor = base + pivot; + + val = SLAP_PTRCMP( ad, mdb->mi_attrs[cursor]->ai_desc ); + if ( val < 0 ) { + n = pivot; + } else if ( val > 0 ) { + base = cursor + 1; + n -= pivot + 1; + } else { + return cursor; + } + } + if ( ins ) { + if ( val > 0 ) + ++cursor; + *ins = cursor; + } + return -1; +} + +static int +ainfo_insert( struct mdb_info *mdb, AttrInfo *a ) +{ + int x; + int i = mdb_attr_slot( mdb, a->ai_desc, &x ); + + /* Is it a dup? */ + if ( i >= 0 ) + return -1; + + mdb->mi_attrs = ch_realloc( mdb->mi_attrs, ( mdb->mi_nattrs+1 ) * + sizeof( AttrInfo * )); + if ( x < mdb->mi_nattrs ) + AC_MEMCPY( &mdb->mi_attrs[x+1], &mdb->mi_attrs[x], + ( mdb->mi_nattrs - x ) * sizeof( AttrInfo *)); + mdb->mi_attrs[x] = a; + mdb->mi_nattrs++; + return 0; +} + +AttrInfo * +mdb_attr_mask( + struct mdb_info *mdb, + AttributeDescription *desc ) +{ + int i = mdb_attr_slot( mdb, desc, NULL ); + return i < 0 ? NULL : mdb->bi_attrs[i]; +} + +int +mdb_attr_index_config( + struct mdb_info *mdb, + const char *fname, + int lineno, + int argc, + char **argv, + struct config_reply_s *c_reply) +{ + int rc = 0; + int i; + slap_mask_t mask; + char **attrs; + char **indexes = NULL; + + attrs = ldap_str2charray( argv[0], "," ); + + if( attrs == NULL ) { + fprintf( stderr, "%s: line %d: " + "no attributes specified: %s\n", + fname, lineno, argv[0] ); + return LDAP_PARAM_ERROR; + } + + if ( argc > 1 ) { + indexes = ldap_str2charray( argv[1], "," ); + + if( indexes == NULL ) { + fprintf( stderr, "%s: line %d: " + "no indexes specified: %s\n", + fname, lineno, argv[1] ); + rc = LDAP_PARAM_ERROR; + goto done; + } + } + + if( indexes == NULL ) { + mask = mdb->bi_defaultmask; + + } else { + mask = 0; + + for ( i = 0; indexes[i] != NULL; i++ ) { + slap_mask_t index; + rc = slap_str2index( indexes[i], &index ); + + if( rc != LDAP_SUCCESS ) { + if ( c_reply ) + { + snprintf(c_reply->msg, sizeof(c_reply->msg), + "index type \"%s\" undefined", indexes[i] ); + + fprintf( stderr, "%s: line %d: %s\n", + fname, lineno, c_reply->msg ); + } + rc = LDAP_PARAM_ERROR; + goto done; + } + + mask |= index; + } + } + + if( !mask ) { + if ( c_reply ) + { + snprintf(c_reply->msg, sizeof(c_reply->msg), + "no indexes selected" ); + fprintf( stderr, "%s: line %d: %s\n", + fname, lineno, c_reply->msg ); + } + rc = LDAP_PARAM_ERROR; + goto done; + } + + for ( i = 0; attrs[i] != NULL; i++ ) { + AttrInfo *a; + AttributeDescription *ad; + const char *text; +#ifdef LDAP_COMP_MATCH + ComponentReference* cr = NULL; + AttrInfo *a_cr = NULL; +#endif + + if( strcasecmp( attrs[i], "default" ) == 0 ) { + mdb->bi_defaultmask |= mask; + continue; + } + +#ifdef LDAP_COMP_MATCH + if ( is_component_reference( attrs[i] ) ) { + rc = extract_component_reference( attrs[i], &cr ); + if ( rc != LDAP_SUCCESS ) { + if ( c_reply ) + { + snprintf(c_reply->msg, sizeof(c_reply->msg), + "index component reference\"%s\" undefined", + attrs[i] ); + fprintf( stderr, "%s: line %d: %s\n", + fname, lineno, c_reply->msg ); + } + goto done; + } + cr->cr_indexmask = mask; + /* + * After extracting a component reference + * only the name of a attribute will be remaining + */ + } else { + cr = NULL; + } +#endif + ad = NULL; + rc = slap_str2ad( attrs[i], &ad, &text ); + + if( rc != LDAP_SUCCESS ) { + if ( c_reply ) + { + snprintf(c_reply->msg, sizeof(c_reply->msg), + "index attribute \"%s\" undefined", + attrs[i] ); + + fprintf( stderr, "%s: line %d: %s\n", + fname, lineno, c_reply->msg ); + } + goto done; + } + + if( ad == slap_schema.si_ad_entryDN || slap_ad_is_binary( ad ) ) { + if (c_reply) { + snprintf(c_reply->msg, sizeof(c_reply->msg), + "index of attribute \"%s\" disallowed", attrs[i] ); + fprintf( stderr, "%s: line %d: %s\n", + fname, lineno, c_reply->msg ); + } + rc = LDAP_UNWILLING_TO_PERFORM; + goto done; + } + + if( IS_SLAP_INDEX( mask, SLAP_INDEX_APPROX ) && !( + ad->ad_type->sat_approx + && ad->ad_type->sat_approx->smr_indexer + && ad->ad_type->sat_approx->smr_filter ) ) + { + if (c_reply) { + snprintf(c_reply->msg, sizeof(c_reply->msg), + "approx index of attribute \"%s\" disallowed", attrs[i] ); + fprintf( stderr, "%s: line %d: %s\n", + fname, lineno, c_reply->msg ); + } + rc = LDAP_INAPPROPRIATE_MATCHING; + goto done; + } + + if( IS_SLAP_INDEX( mask, SLAP_INDEX_EQUALITY ) && !( + ad->ad_type->sat_equality + && ad->ad_type->sat_equality->smr_indexer + && ad->ad_type->sat_equality->smr_filter ) ) + { + if (c_reply) { + snprintf(c_reply->msg, sizeof(c_reply->msg), + "equality index of attribute \"%s\" disallowed", attrs[i] ); + fprintf( stderr, "%s: line %d: %s\n", + fname, lineno, c_reply->msg ); + } + rc = LDAP_INAPPROPRIATE_MATCHING; + goto done; + } + + if( IS_SLAP_INDEX( mask, SLAP_INDEX_SUBSTR ) && !( + ad->ad_type->sat_substr + && ad->ad_type->sat_substr->smr_indexer + && ad->ad_type->sat_substr->smr_filter ) ) + { + if (c_reply) { + snprintf(c_reply->msg, sizeof(c_reply->msg), + "substr index of attribute \"%s\" disallowed", attrs[i] ); + fprintf( stderr, "%s: line %d: %s\n", + fname, lineno, c_reply->msg ); + } + rc = LDAP_INAPPROPRIATE_MATCHING; + goto done; + } + + Debug( LDAP_DEBUG_CONFIG, "index %s 0x%04lx\n", + ad->ad_cname.bv_val, mask, 0 ); + + a = (AttrInfo *) ch_malloc( sizeof(AttrInfo) ); + +#ifdef LDAP_COMP_MATCH + a->ai_cr = NULL; +#endif + a->ai_desc = ad; + + if ( mdb->bi_flags & MDB_IS_OPEN ) { + a->ai_indexmask = 0; + a->ai_newmask = mask; + } else { + a->ai_indexmask = mask; + a->ai_newmask = 0; + } + +#ifdef LDAP_COMP_MATCH + if ( cr ) { + a_cr = mdb_attr_mask( mdb, ad ); + if ( a_cr ) { + /* + * AttrInfo is already in AVL + * just add the extracted component reference + * in the AttrInfo + */ + rc = insert_component_reference( cr, &a_cr->ai_cr ); + if ( rc != LDAP_SUCCESS) { + fprintf( stderr, " error during inserting component reference in %s ", attrs[i]); + rc = LDAP_PARAM_ERROR; + goto done; + } + continue; + } else { + rc = insert_component_reference( cr, &a->ai_cr ); + if ( rc != LDAP_SUCCESS) { + fprintf( stderr, " error during inserting component reference in %s ", attrs[i]); + rc = LDAP_PARAM_ERROR; + goto done; + } + } + } +#endif + rc = ainfo_insert( mdb, a ); + if( rc ) { + if ( mdb->bi_flags & MDB_IS_OPEN ) { + AttrInfo *b = mdb_attr_mask( mdb, ad ); + /* If there is already an index defined for this attribute + * it must be replaced. Otherwise we end up with multiple + * olcIndex values for the same attribute */ + if ( b->ai_indexmask & MDB_INDEX_DELETING ) { + /* If we were editing this attr, reset it */ + b->ai_indexmask &= ~MDB_INDEX_DELETING; + /* If this is leftover from a previous add, commit it */ + if ( b->ai_newmask ) + b->ai_indexmask = b->ai_newmask; + b->ai_newmask = a->ai_newmask; + ch_free( a ); + rc = 0; + continue; + } + } + if (c_reply) { + snprintf(c_reply->msg, sizeof(c_reply->msg), + "duplicate index definition for attr \"%s\"", + attrs[i] ); + fprintf( stderr, "%s: line %d: %s\n", + fname, lineno, c_reply->msg ); + } + + rc = LDAP_PARAM_ERROR; + goto done; + } + } + +done: + ldap_charray_free( attrs ); + if ( indexes != NULL ) ldap_charray_free( indexes ); + + return rc; +} + +static int +mdb_attr_index_unparser( void *v1, void *v2 ) +{ + AttrInfo *ai = v1; + BerVarray *bva = v2; + struct berval bv; + char *ptr; + + slap_index2bvlen( ai->ai_indexmask, &bv ); + if ( bv.bv_len ) { + bv.bv_len += ai->ai_desc->ad_cname.bv_len + 1; + ptr = ch_malloc( bv.bv_len+1 ); + bv.bv_val = lutil_strcopy( ptr, ai->ai_desc->ad_cname.bv_val ); + *bv.bv_val++ = ' '; + slap_index2bv( ai->ai_indexmask, &bv ); + bv.bv_val = ptr; + ber_bvarray_add( bva, &bv ); + } + return 0; +} + +static AttributeDescription addef = { NULL, NULL, BER_BVC("default") }; +static AttrInfo aidef = { &addef }; + +void +mdb_attr_index_unparse( struct mdb_info *mdb, BerVarray *bva ) +{ + int i; + + if ( mdb->bi_defaultmask ) { + aidef.ai_indexmask = mdb->bi_defaultmask; + mdb_attr_index_unparser( &aidef, bva ); + } + for ( i=0; ibi_nattrs; i++ ) + mdb_attr_index_unparser( mdb->bi_attrs[i], bva ); +} + +void +mdb_attr_info_free( AttrInfo *ai ) +{ +#ifdef LDAP_COMP_MATCH + free( ai->ai_cr ); +#endif + free( ai ); +} + +void +mdb_attr_index_destroy( struct mdb_info *mdb ) +{ + int i; + + for ( i=0; ibi_nattrs; i++ ) + mdb_attr_info_free( mdb->bi_attrs[i] ); + + free( mdb->bi_attrs ); +} + +void mdb_attr_index_free( struct mdb_info *mdb, AttributeDescription *ad ) +{ + int i; + + i = mdb_attr_slot( mdb, ad, NULL ); + if ( i >= 0 ) { + mdb_attr_info_free( mdb->bi_attrs[i] ); + mdb->bi_nattrs--; + for (; ibi_nattrs; i++) + mdb->bi_attrs[i] = mdb->bi_attrs[i+1]; + } +} + +void mdb_attr_flush( struct mdb_info *mdb ) +{ + int i; + + for ( i=0; ibi_nattrs; i++ ) { + if ( mdb->bi_attrs[i]->ai_indexmask & MDB_INDEX_DELETING ) { + int j; + mdb_attr_info_free( mdb->bi_attrs[i] ); + mdb->bi_nattrs--; + for (j=i; jbi_nattrs; j++) + mdb->bi_attrs[j] = mdb->bi_attrs[j+1]; + i--; + } + } +} diff --git a/servers/slapd/back-mdb/back-mdb.h b/servers/slapd/back-mdb/back-mdb.h new file mode 100644 index 0000000000..9e728da922 --- /dev/null +++ b/servers/slapd/back-mdb/back-mdb.h @@ -0,0 +1,163 @@ +/* back-mdb.h - mdb back-end header file */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#ifndef _BACK_MDB_H_ +#define _BACK_MDB_H_ + +#include +#include "slap.h" +#include "mdb.h" + +LDAP_BEGIN_DECL + +#define DN_BASE_PREFIX SLAP_INDEX_EQUALITY_PREFIX +#define DN_ONE_PREFIX '%' +#define DN_SUBTREE_PREFIX '@' + +#define MDB_AD2ID 0 +#define MDB_DN2ID 1 +#define MDB_ID2ENTRY 2 +#define MDB_NDB 3 + +/* The default search IDL stack cache depth */ +#define DEFAULT_SEARCH_STACK_DEPTH 16 + +/* The minimum we can function with */ +#define MINIMUM_SEARCH_STACK_DEPTH 8 + +#define MDB_INDICES 128 + +struct mdb_db_info { + struct berval mdi_name; + MDB_dbi mdi_dbi; +}; + +#ifdef LDAP_DEVEL +#define MDB_MONITOR_IDX +#endif /* LDAP_DEVEL */ + +typedef struct mdb_monitor_t { + void *mdm_cb; + struct berval mdm_ndn; +} mdb_monitor_t; + +/* From ldap_rq.h */ +struct re_s; + +struct mdb_info { + MDB_env *mi_dbenv; + + /* DB_ENV parameters */ + /* The DB_ENV can be tuned via DB_CONFIG */ + char *mi_dbenv_home; + u_int32_t mi_dbenv_flags; + int mi_dbenv_mode; + + size_t mi_mapsize; + + int mi_ndatabases; + int mi_db_opflags; /* db-specific flags */ + struct mdb_db_info **mi_databases; + ldap_pvt_thread_mutex_t mi_database_mutex; + + slap_mask_t mi_defaultmask; + struct mdb_attrinfo **mi_attrs; + int mi_nattrs; + void *mi_search_stack; + int mi_search_stack_depth; + + int mi_txn_cp; + u_int32_t mi_txn_cp_min; + u_int32_t mi_txn_cp_kbyte; + struct re_s *mi_txn_cp_task; + struct re_s *mi_index_task; + + mdb_monitor_t mi_monitor; + +#ifdef MDB_MONITOR_IDX + ldap_pvt_thread_mutex_t mi_idx_mutex; + Avlnode *mi_idx; +#endif /* MDB_MONITOR_IDX */ + + int mi_flags; +#define MDB_IS_OPEN 0x01 +#define MDB_DEL_INDEX 0x08 +}; + +#define mi_id2entry mi_databases[MDB_ID2ENTRY] +#define mi_dn2id mi_databases[MDB_DN2ID] +#define mi_ad2id mi_databases[MDB_AD2ID] + +struct mdb_op_info { + OpExtra moi_oe; + MDB_txn* moi_txn; + u_int32_t moi_err; + char moi_acl_cache; + char moi_flag; +}; +#define MOI_DONTFREE 1 + +/* Copy an ID "src" to pointer "dst" in big-endian byte order */ +#define MDB_ID2DISK( src, dst ) \ + do { int i0; ID tmp; unsigned char *_p; \ + tmp = (src); _p = (unsigned char *)(dst); \ + for ( i0=sizeof(ID)-1; i0>=0; i0-- ) { \ + _p[i0] = tmp & 0xff; tmp >>= 8; \ + } \ + } while(0) + +/* Copy a pointer "src" to a pointer "dst" from big-endian to native order */ +#define MDB_DISK2ID( src, dst ) \ + do { unsigned i0; ID tmp = 0; unsigned char *_p; \ + _p = (unsigned char *)(src); \ + for ( i0=0; i0. + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include +#include + +#include "back-mdb.h" + +int +mdb_bind( Operation *op, SlapReply *rs ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + Entry *e; + Attribute *a; + EntryInfo *ei; + + AttributeDescription *password = slap_schema.si_ad_userPassword; + + DB_TXN *rtxn; + DB_LOCK lock; + + Debug( LDAP_DEBUG_ARGS, + "==> " LDAP_XSTRING(mdb_bind) ": dn: %s\n", + op->o_req_dn.bv_val, 0, 0); + + /* allow noauth binds */ + switch ( be_rootdn_bind( op, NULL ) ) { + case LDAP_SUCCESS: + /* frontend will send result */ + return rs->sr_err = LDAP_SUCCESS; + + default: + /* give the database a chance */ + /* NOTE: this behavior departs from that of other backends, + * since the others, in case of password checking failure + * do not give the database a chance. If an entry with + * rootdn's name does not exist in the database the result + * will be the same. See ITS#4962 for discussion. */ + break; + } + + rs->sr_err = mdb_reader_get(op, mdb->bi_dbenv, &rtxn); + switch(rs->sr_err) { + case 0: + break; + default: + rs->sr_text = "internal error"; + send_ldap_result( op, rs ); + return rs->sr_err; + } + +dn2entry_retry: + /* get entry with reader lock */ + rs->sr_err = mdb_dn2entry( op, rtxn, &op->o_req_ndn, &ei, 1, + &lock ); + + switch(rs->sr_err) { + case DB_NOTFOUND: + case 0: + break; + case LDAP_BUSY: + send_ldap_error( op, rs, LDAP_BUSY, "ldap_server_busy" ); + return LDAP_BUSY; + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto dn2entry_retry; + default: + send_ldap_error( op, rs, LDAP_OTHER, "internal error" ); + return rs->sr_err; + } + + e = ei->bei_e; + if ( rs->sr_err == DB_NOTFOUND ) { + if( e != NULL ) { + mdb_cache_return_entry_r( mdb, e, &lock ); + e = NULL; + } + + rs->sr_err = LDAP_INVALID_CREDENTIALS; + send_ldap_result( op, rs ); + + return rs->sr_err; + } + + ber_dupbv( &op->oq_bind.rb_edn, &e->e_name ); + + /* check for deleted */ + if ( is_entry_subentry( e ) ) { + /* entry is an subentry, don't allow bind */ + Debug( LDAP_DEBUG_TRACE, "entry is subentry\n", 0, + 0, 0 ); + rs->sr_err = LDAP_INVALID_CREDENTIALS; + goto done; + } + + if ( is_entry_alias( e ) ) { + /* entry is an alias, don't allow bind */ + Debug( LDAP_DEBUG_TRACE, "entry is alias\n", 0, 0, 0 ); + rs->sr_err = LDAP_INVALID_CREDENTIALS; + goto done; + } + + if ( is_entry_referral( e ) ) { + Debug( LDAP_DEBUG_TRACE, "entry is referral\n", 0, + 0, 0 ); + rs->sr_err = LDAP_INVALID_CREDENTIALS; + goto done; + } + + switch ( op->oq_bind.rb_method ) { + case LDAP_AUTH_SIMPLE: + a = attr_find( e->e_attrs, password ); + if ( a == NULL ) { + rs->sr_err = LDAP_INVALID_CREDENTIALS; + goto done; + } + + if ( slap_passwd_check( op, e, a, &op->oq_bind.rb_cred, + &rs->sr_text ) != 0 ) + { + /* failure; stop front end from sending result */ + rs->sr_err = LDAP_INVALID_CREDENTIALS; + goto done; + } + + rs->sr_err = 0; + break; + + default: + assert( 0 ); /* should not be reachable */ + rs->sr_err = LDAP_STRONG_AUTH_NOT_SUPPORTED; + rs->sr_text = "authentication method not supported"; + } + +done: + /* free entry and reader lock */ + if( e != NULL ) { + mdb_cache_return_entry_r( mdb, e, &lock ); + } + + if ( rs->sr_err ) { + send_ldap_result( op, rs ); + if ( rs->sr_ref ) { + ber_bvarray_free( rs->sr_ref ); + rs->sr_ref = NULL; + } + } + /* front end will send result on success (rs->sr_err==0) */ + return rs->sr_err; +} diff --git a/servers/slapd/back-mdb/compare.c b/servers/slapd/back-mdb/compare.c new file mode 100644 index 0000000000..55c1041720 --- /dev/null +++ b/servers/slapd/back-mdb/compare.c @@ -0,0 +1,143 @@ +/* compare.c - mdb backend compare routine */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "back-mdb.h" + +int +mdb_compare( Operation *op, SlapReply *rs ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + Entry *e = NULL; + EntryInfo *ei; + int manageDSAit = get_manageDSAit( op ); + + DB_TXN *rtxn; + DB_LOCK lock; + + rs->sr_err = mdb_reader_get(op, mdb->bi_dbenv, &rtxn); + switch(rs->sr_err) { + case 0: + break; + default: + send_ldap_error( op, rs, LDAP_OTHER, "internal error" ); + return rs->sr_err; + } + +dn2entry_retry: + /* get entry */ + rs->sr_err = mdb_dn2entry( op, rtxn, &op->o_req_ndn, &ei, 1, + &lock ); + + switch( rs->sr_err ) { + case DB_NOTFOUND: + case 0: + break; + case LDAP_BUSY: + rs->sr_text = "ldap server busy"; + goto return_results; + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto dn2entry_retry; + default: + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + e = ei->bei_e; + if ( rs->sr_err == DB_NOTFOUND ) { + if ( e != NULL ) { + /* return referral only if "disclose" is granted on the object */ + if ( ! access_allowed( op, e, slap_schema.si_ad_entry, + NULL, ACL_DISCLOSE, NULL ) ) + { + rs->sr_err = LDAP_NO_SUCH_OBJECT; + + } else { + rs->sr_matched = ch_strdup( e->e_dn ); + rs->sr_ref = is_entry_referral( e ) + ? get_entry_referrals( op, e ) + : NULL; + rs->sr_err = LDAP_REFERRAL; + } + + mdb_cache_return_entry_r( mdb, e, &lock ); + e = NULL; + + } else { + rs->sr_ref = referral_rewrite( default_referral, + NULL, &op->o_req_dn, LDAP_SCOPE_DEFAULT ); + rs->sr_err = rs->sr_ref ? LDAP_REFERRAL : LDAP_NO_SUCH_OBJECT; + } + + send_ldap_result( op, rs ); + + ber_bvarray_free( rs->sr_ref ); + free( (char *)rs->sr_matched ); + rs->sr_ref = NULL; + rs->sr_matched = NULL; + + goto done; + } + + if (!manageDSAit && is_entry_referral( e ) ) { + /* return referral only if "disclose" is granted on the object */ + if ( !access_allowed( op, e, slap_schema.si_ad_entry, + NULL, ACL_DISCLOSE, NULL ) ) + { + rs->sr_err = LDAP_NO_SUCH_OBJECT; + } else { + /* entry is a referral, don't allow compare */ + rs->sr_ref = get_entry_referrals( op, e ); + rs->sr_err = LDAP_REFERRAL; + rs->sr_matched = e->e_name.bv_val; + } + + Debug( LDAP_DEBUG_TRACE, "entry is referral\n", 0, 0, 0 ); + + send_ldap_result( op, rs ); + + ber_bvarray_free( rs->sr_ref ); + rs->sr_ref = NULL; + rs->sr_matched = NULL; + goto done; + } + + rs->sr_err = slap_compare_entry( op, e, op->orc_ava ); + +return_results: + send_ldap_result( op, rs ); + + switch ( rs->sr_err ) { + case LDAP_COMPARE_FALSE: + case LDAP_COMPARE_TRUE: + rs->sr_err = LDAP_SUCCESS; + break; + } + +done: + /* free entry */ + if ( e != NULL ) { + mdb_cache_return_entry_r( mdb, e, &lock ); + } + + return rs->sr_err; +} diff --git a/servers/slapd/back-mdb/config.c b/servers/slapd/back-mdb/config.c new file mode 100644 index 0000000000..5b26bece1f --- /dev/null +++ b/servers/slapd/back-mdb/config.c @@ -0,0 +1,942 @@ +/* config.c - mdb backend configuration file routine */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include +#include +#include + +#include "back-mdb.h" + +#include "config.h" + +#include "lutil.h" +#include "ldap_rq.h" + +#ifdef DB_DIRTY_READ +# define SLAP_MDB_ALLOW_DIRTY_READ +#endif + +#define mdb_cf_gen MDB_SYMBOL(cf_gen) +#define mdb_cf_cleanup MDB_SYMBOL(cf_cleanup) +#define mdb_checkpoint MDB_SYMBOL(checkpoint) +#define mdb_online_index MDB_SYMBOL(online_index) + +static ConfigDriver mdb_cf_gen; + +enum { + MDB_CHKPT = 1, + MDB_CONFIG, + MDB_CRYPTFILE, + MDB_CRYPTKEY, + MDB_DIRECTORY, + MDB_NOSYNC, + MDB_DIRTYR, + MDB_INDEX, + MDB_LOCKD, + MDB_SSTACK, + MDB_MODE, + MDB_PGSIZE, + MDB_CHECKSUM +}; + +static ConfigTable mdbcfg[] = { + { "directory", "dir", 2, 2, 0, ARG_STRING|ARG_MAGIC|MDB_DIRECTORY, + mdb_cf_gen, "( OLcfgDbAt:0.1 NAME 'olcDbDirectory' " + "DESC 'Directory for database content' " + "EQUALITY caseIgnoreMatch " + "SYNTAX OMsDirectoryString SINGLE-VALUE )", NULL, NULL }, + { "cachefree", "size", 2, 2, 0, ARG_ULONG|ARG_OFFSET, + (void *)offsetof(struct mdb_info, bi_cache.c_minfree), + "( OLcfgDbAt:1.11 NAME 'olcDbCacheFree' " + "DESC 'Number of extra entries to free when max is reached' " + "SYNTAX OMsInteger SINGLE-VALUE )", NULL, NULL }, + { "cachesize", "size", 2, 2, 0, ARG_ULONG|ARG_OFFSET, + (void *)offsetof(struct mdb_info, bi_cache.c_maxsize), + "( OLcfgDbAt:1.1 NAME 'olcDbCacheSize' " + "DESC 'Entry cache size in entries' " + "SYNTAX OMsInteger SINGLE-VALUE )", NULL, NULL }, + { "checkpoint", "kbyte> <[pres,eq,approx,sub]", 2, 3, 0, ARG_MAGIC|MDB_INDEX, + mdb_cf_gen, "( OLcfgDbAt:0.2 NAME 'olcDbIndex' " + "DESC 'Attribute index parameters' " + "EQUALITY caseIgnoreMatch " + "SYNTAX OMsDirectoryString )", NULL, NULL }, + { "linearindex", NULL, 1, 2, 0, ARG_ON_OFF|ARG_OFFSET, + (void *)offsetof(struct mdb_info, bi_linear_index), + "( OLcfgDbAt:1.7 NAME 'olcDbLinearIndex' " + "DESC 'Index attributes one at a time' " + "SYNTAX OMsBoolean SINGLE-VALUE )", NULL, NULL }, + { "lockdetect", "policy", 2, 2, 0, ARG_MAGIC|MDB_LOCKD, + mdb_cf_gen, "( OLcfgDbAt:1.8 NAME 'olcDbLockDetect' " + "DESC 'Deadlock detection algorithm' " + "SYNTAX OMsDirectoryString SINGLE-VALUE )", NULL, NULL }, + { "mode", "mode", 2, 2, 0, ARG_MAGIC|MDB_MODE, + mdb_cf_gen, "( OLcfgDbAt:0.3 NAME 'olcDbMode' " + "DESC 'Unix permissions of database files' " + "SYNTAX OMsDirectoryString SINGLE-VALUE )", NULL, NULL }, + { "searchstack", "depth", 2, 2, 0, ARG_INT|ARG_MAGIC|MDB_SSTACK, + mdb_cf_gen, "( OLcfgDbAt:1.9 NAME 'olcDbSearchStack' " + "DESC 'Depth of search stack in IDLs' " + "SYNTAX OMsInteger SINGLE-VALUE )", NULL, NULL }, + { "shm_key", "key", 2, 2, 0, ARG_LONG|ARG_OFFSET, + (void *)offsetof(struct mdb_info, bi_shm_key), + "( OLcfgDbAt:1.10 NAME 'olcDbShmKey' " + "DESC 'Key for shared memory region' " + "SYNTAX OMsInteger SINGLE-VALUE )", NULL, NULL }, + { NULL, NULL, 0, 0, 0, ARG_IGNORED, + NULL, NULL, NULL, NULL } +}; + +static ConfigOCs mdbocs[] = { + { +#ifdef MDB_HIER + "( OLcfgDbOc:1.2 " + "NAME 'olcHdbConfig' " + "DESC 'HDB backend configuration' " +#else + "( OLcfgDbOc:1.1 " + "NAME 'olcBdbConfig' " + "DESC 'MDB backend configuration' " +#endif + "SUP olcDatabaseConfig " + "MUST olcDbDirectory " + "MAY ( olcDbCacheSize $ olcDbCheckpoint $ olcDbConfig $ " + "olcDbCryptFile $ olcDbCryptKey $ " + "olcDbNoSync $ olcDbDirtyRead $ olcDbIDLcacheSize $ " + "olcDbIndex $ olcDbLinearIndex $ olcDbLockDetect $ " + "olcDbMode $ olcDbSearchStack $ olcDbShmKey $ " + "olcDbCacheFree $ olcDbDNcacheSize $ olcDbPageSize ) )", + Cft_Database, mdbcfg }, + { NULL, 0, NULL } +}; + +static slap_verbmasks mdb_lockd[] = { + { BER_BVC("default"), DB_LOCK_DEFAULT }, + { BER_BVC("oldest"), DB_LOCK_OLDEST }, + { BER_BVC("random"), DB_LOCK_RANDOM }, + { BER_BVC("youngest"), DB_LOCK_YOUNGEST }, + { BER_BVC("fewest"), DB_LOCK_MINLOCKS }, + { BER_BVNULL, 0 } +}; + +/* perform periodic checkpoints */ +static void * +mdb_checkpoint( void *ctx, void *arg ) +{ + struct re_s *rtask = arg; + struct mdb_info *mdb = rtask->arg; + + TXN_CHECKPOINT( mdb->bi_dbenv, mdb->bi_txn_cp_kbyte, + mdb->bi_txn_cp_min, 0 ); + ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex ); + ldap_pvt_runqueue_stoptask( &slapd_rq, rtask ); + ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex ); + return NULL; +} + +/* reindex entries on the fly */ +static void * +mdb_online_index( void *ctx, void *arg ) +{ + struct re_s *rtask = arg; + BackendDB *be = rtask->arg; + struct mdb_info *mdb = be->be_private; + + Connection conn = {0}; + OperationBuffer opbuf; + Operation *op; + + DBC *curs; + DBT key, data; + DB_TXN *txn; + DB_LOCK lock; + ID id, nid; + EntryInfo *ei; + int rc, getnext = 1; + int i; + + connection_fake_init( &conn, &opbuf, ctx ); + op = &opbuf.ob_op; + + op->o_bd = be; + + DBTzero( &key ); + DBTzero( &data ); + + id = 1; + key.data = &nid; + key.size = key.ulen = sizeof(ID); + key.flags = DB_DBT_USERMEM; + + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + data.dlen = data.ulen = 0; + + while ( 1 ) { + if ( slapd_shutdown ) + break; + + rc = TXN_BEGIN( mdb->bi_dbenv, NULL, &txn, mdb->bi_db_opflags ); + if ( rc ) + break; + if ( getnext ) { + getnext = 0; + MDB_ID2DISK( id, &nid ); + rc = mdb->bi_id2entry->bdi_db->cursor( + mdb->bi_id2entry->bdi_db, txn, &curs, mdb->bi_db_opflags ); + if ( rc ) { + TXN_ABORT( txn ); + break; + } + rc = curs->c_get( curs, &key, &data, DB_SET_RANGE ); + curs->c_close( curs ); + if ( rc ) { + TXN_ABORT( txn ); + if ( rc == DB_NOTFOUND ) + rc = 0; + if ( rc == DB_LOCK_DEADLOCK ) { + ldap_pvt_thread_yield(); + continue; + } + break; + } + MDB_DISK2ID( &nid, &id ); + } + + ei = NULL; + rc = mdb_cache_find_id( op, txn, id, &ei, 0, &lock ); + if ( rc ) { + TXN_ABORT( txn ); + if ( rc == DB_LOCK_DEADLOCK ) { + ldap_pvt_thread_yield(); + continue; + } + if ( rc == DB_NOTFOUND ) { + id++; + getnext = 1; + continue; + } + break; + } + if ( ei->bei_e ) { + rc = mdb_index_entry( op, txn, MDB_INDEX_UPDATE_OP, ei->bei_e ); + if ( rc == DB_LOCK_DEADLOCK ) { + TXN_ABORT( txn ); + ldap_pvt_thread_yield(); + continue; + } + if ( rc == 0 ) { + rc = TXN_COMMIT( txn, 0 ); + txn = NULL; + } + if ( rc ) + break; + } + id++; + getnext = 1; + } + + for ( i = 0; i < mdb->bi_nattrs; i++ ) { + if ( mdb->bi_attrs[ i ]->ai_indexmask & MDB_INDEX_DELETING + || mdb->bi_attrs[ i ]->ai_newmask == 0 ) + { + continue; + } + mdb->bi_attrs[ i ]->ai_indexmask = mdb->bi_attrs[ i ]->ai_newmask; + mdb->bi_attrs[ i ]->ai_newmask = 0; + } + + ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex ); + ldap_pvt_runqueue_stoptask( &slapd_rq, rtask ); + mdb->bi_index_task = NULL; + ldap_pvt_runqueue_remove( &slapd_rq, rtask ); + ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex ); + + return NULL; +} + +/* Cleanup loose ends after Modify completes */ +static int +mdb_cf_cleanup( ConfigArgs *c ) +{ + struct mdb_info *mdb = c->be->be_private; + int rc = 0; + + if ( mdb->bi_flags & MDB_UPD_CONFIG ) { + if ( mdb->bi_db_config ) { + int i; + FILE *f = fopen( mdb->bi_db_config_path, "w" ); + if ( f ) { + for (i=0; mdb->bi_db_config[i].bv_val; i++) + fprintf( f, "%s\n", mdb->bi_db_config[i].bv_val ); + fclose( f ); + } + } else { + unlink( mdb->bi_db_config_path ); + } + mdb->bi_flags ^= MDB_UPD_CONFIG; + } + + if ( mdb->bi_flags & MDB_DEL_INDEX ) { + mdb_attr_flush( mdb ); + mdb->bi_flags ^= MDB_DEL_INDEX; + } + + if ( mdb->bi_flags & MDB_RE_OPEN ) { + mdb->bi_flags ^= MDB_RE_OPEN; + rc = c->be->bd_info->bi_db_close( c->be, &c->reply ); + if ( rc == 0 ) + rc = c->be->bd_info->bi_db_open( c->be, &c->reply ); + /* If this fails, we need to restart */ + if ( rc ) { + slapd_shutdown = 2; + snprintf( c->cr_msg, sizeof( c->cr_msg ), + "failed to reopen database, rc=%d", rc ); + Debug( LDAP_DEBUG_ANY, LDAP_XSTRING(mdb_cf_cleanup) + ": %s\n", c->cr_msg, 0, 0 ); + rc = LDAP_OTHER; + } + } + return rc; +} + +static int +mdb_cf_gen( ConfigArgs *c ) +{ + struct mdb_info *mdb = c->be->be_private; + int rc; + + if ( c->op == SLAP_CONFIG_EMIT ) { + rc = 0; + switch( c->type ) { + case MDB_MODE: { + char buf[64]; + struct berval bv; + bv.bv_len = snprintf( buf, sizeof(buf), "0%o", mdb->bi_dbenv_mode ); + if ( bv.bv_len > 0 && bv.bv_len < sizeof(buf) ) { + bv.bv_val = buf; + value_add_one( &c->rvalue_vals, &bv ); + } else { + rc = 1; + } + } break; + + case MDB_CHKPT: + if ( mdb->bi_txn_cp ) { + char buf[64]; + struct berval bv; + bv.bv_len = snprintf( buf, sizeof(buf), "%ld %ld", + (long) mdb->bi_txn_cp_kbyte, (long) mdb->bi_txn_cp_min ); + if ( bv.bv_len > 0 && bv.bv_len < sizeof(buf) ) { + bv.bv_val = buf; + value_add_one( &c->rvalue_vals, &bv ); + } else { + rc = 1; + } + } else { + rc = 1; + } + break; + + case MDB_CRYPTFILE: + if ( mdb->bi_db_crypt_file ) { + c->value_string = ch_strdup( mdb->bi_db_crypt_file ); + } else { + rc = 1; + } + break; + + /* If a crypt file has been set, its contents are copied here. + * But we don't want the key to be incorporated here. + */ + case MDB_CRYPTKEY: + if ( !mdb->bi_db_crypt_file && !BER_BVISNULL( &mdb->bi_db_crypt_key )) { + value_add_one( &c->rvalue_vals, &mdb->bi_db_crypt_key ); + } else { + rc = 1; + } + break; + + case MDB_DIRECTORY: + if ( mdb->bi_dbenv_home ) { + c->value_string = ch_strdup( mdb->bi_dbenv_home ); + } else { + rc = 1; + } + break; + + case MDB_CONFIG: + if ( !( mdb->bi_flags & MDB_IS_OPEN ) + && !mdb->bi_db_config ) + { + char buf[SLAP_TEXT_BUFLEN]; + FILE *f = fopen( mdb->bi_db_config_path, "r" ); + struct berval bv; + + if ( f ) { + mdb->bi_flags |= MDB_HAS_CONFIG; + while ( fgets( buf, sizeof(buf), f )) { + ber_str2bv( buf, 0, 1, &bv ); + if ( bv.bv_len > 0 && bv.bv_val[bv.bv_len-1] == '\n' ) { + bv.bv_len--; + bv.bv_val[bv.bv_len] = '\0'; + } + /* shouldn't need this, but ... */ + if ( bv.bv_len > 0 && bv.bv_val[bv.bv_len-1] == '\r' ) { + bv.bv_len--; + bv.bv_val[bv.bv_len] = '\0'; + } + ber_bvarray_add( &mdb->bi_db_config, &bv ); + } + fclose( f ); + } + } + if ( mdb->bi_db_config ) { + int i; + struct berval bv; + + bv.bv_val = c->log; + for (i=0; !BER_BVISNULL(&mdb->bi_db_config[i]); i++) { + bv.bv_len = sprintf( bv.bv_val, "{%d}%s", i, + mdb->bi_db_config[i].bv_val ); + value_add_one( &c->rvalue_vals, &bv ); + } + } + if ( !c->rvalue_vals ) rc = 1; + break; + + case MDB_NOSYNC: + if ( mdb->bi_dbenv_xflags & DB_TXN_NOSYNC ) + c->value_int = 1; + break; + + case MDB_CHECKSUM: + if ( mdb->bi_flags & MDB_CHKSUM ) + c->value_int = 1; + break; + + case MDB_INDEX: + mdb_attr_index_unparse( mdb, &c->rvalue_vals ); + if ( !c->rvalue_vals ) rc = 1; + break; + + case MDB_LOCKD: + rc = 1; + if ( mdb->bi_lock_detect != DB_LOCK_DEFAULT ) { + int i; + for (i=0; !BER_BVISNULL(&mdb_lockd[i].word); i++) { + if ( mdb->bi_lock_detect == (u_int32_t)mdb_lockd[i].mask ) { + value_add_one( &c->rvalue_vals, &mdb_lockd[i].word ); + rc = 0; + break; + } + } + } + break; + + case MDB_SSTACK: + c->value_int = mdb->bi_search_stack_depth; + break; + + case MDB_PGSIZE: { + struct mdb_db_pgsize *ps; + char buf[SLAP_TEXT_BUFLEN]; + struct berval bv; + int rc = 1; + + bv.bv_val = buf; + for ( ps = mdb->bi_pagesizes; ps; ps = ps->bdp_next ) { + bv.bv_len = sprintf( buf, "%s %d", ps->bdp_name.bv_val, + ps->bdp_size / 1024 ); + value_add_one( &c->rvalue_vals, &bv ); + rc = 0; + + } + break; + } + } + return rc; + } else if ( c->op == LDAP_MOD_DELETE ) { + rc = 0; + switch( c->type ) { + case MDB_MODE: +#if 0 + /* FIXME: does it make any sense to change the mode, + * if we don't exec a chmod()? */ + mdb->bi_dbenv_mode = SLAPD_DEFAULT_DB_MODE; + break; +#endif + + /* single-valued no-ops */ + case MDB_LOCKD: + case MDB_SSTACK: + break; + + case MDB_CHKPT: + if ( mdb->bi_txn_cp_task ) { + struct re_s *re = mdb->bi_txn_cp_task; + mdb->bi_txn_cp_task = NULL; + ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex ); + if ( ldap_pvt_runqueue_isrunning( &slapd_rq, re ) ) + ldap_pvt_runqueue_stoptask( &slapd_rq, re ); + ldap_pvt_runqueue_remove( &slapd_rq, re ); + ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex ); + } + mdb->bi_txn_cp = 0; + break; + case MDB_CONFIG: + if ( c->valx < 0 ) { + ber_bvarray_free( mdb->bi_db_config ); + mdb->bi_db_config = NULL; + } else { + int i = c->valx; + ch_free( mdb->bi_db_config[i].bv_val ); + for (; mdb->bi_db_config[i].bv_val; i++) + mdb->bi_db_config[i] = mdb->bi_db_config[i+1]; + } + mdb->bi_flags |= MDB_UPD_CONFIG; + c->cleanup = mdb_cf_cleanup; + break; + /* Doesn't really make sense to change these on the fly; + * the entire DB must be dumped and reloaded + */ + case MDB_CRYPTFILE: + if ( mdb->bi_db_crypt_file ) { + ch_free( mdb->bi_db_crypt_file ); + mdb->bi_db_crypt_file = NULL; + } + /* FALLTHRU */ + case MDB_CRYPTKEY: + if ( !BER_BVISNULL( &mdb->bi_db_crypt_key )) { + ch_free( mdb->bi_db_crypt_key.bv_val ); + BER_BVZERO( &mdb->bi_db_crypt_key ); + } + break; + case MDB_DIRECTORY: + mdb->bi_flags |= MDB_RE_OPEN; + mdb->bi_flags ^= MDB_HAS_CONFIG; + ch_free( mdb->bi_dbenv_home ); + mdb->bi_dbenv_home = NULL; + ch_free( mdb->bi_db_config_path ); + mdb->bi_db_config_path = NULL; + c->cleanup = mdb_cf_cleanup; + ldap_pvt_thread_pool_purgekey( mdb->bi_dbenv ); + break; + case MDB_NOSYNC: + mdb->bi_dbenv->set_flags( mdb->bi_dbenv, DB_TXN_NOSYNC, 0 ); + break; + case MDB_CHECKSUM: + mdb->bi_flags &= ~MDB_CHKSUM; + break; + case MDB_INDEX: + if ( c->valx == -1 ) { + int i; + + /* delete all (FIXME) */ + for ( i = 0; i < mdb->bi_nattrs; i++ ) { + mdb->bi_attrs[i]->ai_indexmask |= MDB_INDEX_DELETING; + } + mdb->bi_flags |= MDB_DEL_INDEX; + c->cleanup = mdb_cf_cleanup; + + } else { + struct berval bv, def = BER_BVC("default"); + char *ptr; + + for (ptr = c->line; !isspace( (unsigned char) *ptr ); ptr++); + + bv.bv_val = c->line; + bv.bv_len = ptr - bv.bv_val; + if ( bvmatch( &bv, &def )) { + mdb->bi_defaultmask = 0; + + } else { + int i; + char **attrs; + char sep; + + sep = bv.bv_val[ bv.bv_len ]; + bv.bv_val[ bv.bv_len ] = '\0'; + attrs = ldap_str2charray( bv.bv_val, "," ); + + for ( i = 0; attrs[ i ]; i++ ) { + AttributeDescription *ad = NULL; + const char *text; + AttrInfo *ai; + + slap_str2ad( attrs[ i ], &ad, &text ); + /* if we got here... */ + assert( ad != NULL ); + + ai = mdb_attr_mask( mdb, ad ); + /* if we got here... */ + assert( ai != NULL ); + + ai->ai_indexmask |= MDB_INDEX_DELETING; + mdb->bi_flags |= MDB_DEL_INDEX; + c->cleanup = mdb_cf_cleanup; + } + + bv.bv_val[ bv.bv_len ] = sep; + ldap_charray_free( attrs ); + } + } + break; + /* doesn't make sense on the fly; the DB file must be + * recreated + */ + case MDB_PGSIZE: { + struct mdb_db_pgsize *ps, **prev; + int i; + + for ( i = 0, prev = &mdb->bi_pagesizes, ps = *prev; ps; + prev = &ps->bdp_next, ps = ps->bdp_next, i++ ) { + if ( c->valx == -1 || i == c->valx ) { + *prev = ps->bdp_next; + ch_free( ps ); + ps = *prev; + if ( i == c->valx ) break; + } + } + } + break; + } + return rc; + } + + switch( c->type ) { + case MDB_MODE: + if ( ASCII_DIGIT( c->argv[1][0] ) ) { + long mode; + char *next; + errno = 0; + mode = strtol( c->argv[1], &next, 0 ); + if ( errno != 0 || next == c->argv[1] || next[0] != '\0' ) { + fprintf( stderr, "%s: " + "unable to parse mode=\"%s\".\n", + c->log, c->argv[1] ); + return 1; + } + mdb->bi_dbenv_mode = mode; + + } else { + char *m = c->argv[1]; + int who, what, mode = 0; + + if ( strlen( m ) != STRLENOF("-rwxrwxrwx") ) { + return 1; + } + + if ( m[0] != '-' ) { + return 1; + } + + m++; + for ( who = 0; who < 3; who++ ) { + for ( what = 0; what < 3; what++, m++ ) { + if ( m[0] == '-' ) { + continue; + } else if ( m[0] != "rwx"[what] ) { + return 1; + } + mode += ((1 << (2 - what)) << 3*(2 - who)); + } + } + mdb->bi_dbenv_mode = mode; + } + break; + case MDB_CHKPT: { + long l; + mdb->bi_txn_cp = 1; + if ( lutil_atolx( &l, c->argv[1], 0 ) != 0 ) { + fprintf( stderr, "%s: " + "invalid kbyte \"%s\" in \"checkpoint\".\n", + c->log, c->argv[1] ); + return 1; + } + mdb->bi_txn_cp_kbyte = l; + if ( lutil_atolx( &l, c->argv[2], 0 ) != 0 ) { + fprintf( stderr, "%s: " + "invalid minutes \"%s\" in \"checkpoint\".\n", + c->log, c->argv[2] ); + return 1; + } + mdb->bi_txn_cp_min = l; + /* If we're in server mode and time-based checkpointing is enabled, + * submit a task to perform periodic checkpoints. + */ + if ((slapMode & SLAP_SERVER_MODE) && mdb->bi_txn_cp_min ) { + struct re_s *re = mdb->bi_txn_cp_task; + if ( re ) { + re->interval.tv_sec = mdb->bi_txn_cp_min * 60; + } else { + if ( c->be->be_suffix == NULL || BER_BVISNULL( &c->be->be_suffix[0] ) ) { + fprintf( stderr, "%s: " + "\"checkpoint\" must occur after \"suffix\".\n", + c->log ); + return 1; + } + ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex ); + mdb->bi_txn_cp_task = ldap_pvt_runqueue_insert( &slapd_rq, + mdb->bi_txn_cp_min * 60, mdb_checkpoint, mdb, + LDAP_XSTRING(mdb_checkpoint), c->be->be_suffix[0].bv_val ); + ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex ); + } + } + } break; + + case MDB_CONFIG: { + char *ptr = c->line; + struct berval bv; + + if ( c->op == SLAP_CONFIG_ADD ) { + ptr += STRLENOF("dbconfig"); + while (!isspace((unsigned char)*ptr)) ptr++; + while (isspace((unsigned char)*ptr)) ptr++; + } + + if ( mdb->bi_flags & MDB_IS_OPEN ) { + mdb->bi_flags |= MDB_UPD_CONFIG; + c->cleanup = mdb_cf_cleanup; + } else { + /* If we're just starting up... + */ + FILE *f; + /* If a DB_CONFIG file exists, or we don't know the path + * to the DB_CONFIG file, ignore these directives + */ + if (( mdb->bi_flags & MDB_HAS_CONFIG ) || !mdb->bi_db_config_path ) + break; + f = fopen( mdb->bi_db_config_path, "a" ); + if ( f ) { + /* FIXME: EBCDIC probably needs special handling */ + fprintf( f, "%s\n", ptr ); + fclose( f ); + } + } + ber_str2bv( ptr, 0, 1, &bv ); + ber_bvarray_add( &mdb->bi_db_config, &bv ); + } + break; + + case MDB_CRYPTFILE: + rc = lutil_get_filed_password( c->value_string, &mdb->bi_db_crypt_key ); + if ( rc == 0 ) { + mdb->bi_db_crypt_file = c->value_string; + } + break; + + /* Cannot set key if file was already set */ + case MDB_CRYPTKEY: + if ( mdb->bi_db_crypt_file ) { + rc = 1; + } else { + mdb->bi_db_crypt_key = c->value_bv; + } + break; + + case MDB_DIRECTORY: { + FILE *f; + char *ptr, *testpath; + int len; + + len = strlen( c->value_string ); + testpath = ch_malloc( len + STRLENOF(LDAP_DIRSEP) + STRLENOF("DUMMY") + 1 ); + ptr = lutil_strcopy( testpath, c->value_string ); + *ptr++ = LDAP_DIRSEP[0]; + strcpy( ptr, "DUMMY" ); + f = fopen( testpath, "w" ); + if ( f ) { + fclose( f ); + unlink( testpath ); + } + ch_free( testpath ); + if ( !f ) { + snprintf( c->cr_msg, sizeof( c->cr_msg ), "%s: invalid path: %s", + c->log, strerror( errno )); + Debug( LDAP_DEBUG_ANY, "%s\n", c->cr_msg, 0, 0 ); + return -1; + } + + if ( mdb->bi_dbenv_home ) + ch_free( mdb->bi_dbenv_home ); + mdb->bi_dbenv_home = c->value_string; + + /* See if a DB_CONFIG file already exists here */ + if ( mdb->bi_db_config_path ) + ch_free( mdb->bi_db_config_path ); + mdb->bi_db_config_path = ch_malloc( len + + STRLENOF(LDAP_DIRSEP) + STRLENOF("DB_CONFIG") + 1 ); + ptr = lutil_strcopy( mdb->bi_db_config_path, mdb->bi_dbenv_home ); + *ptr++ = LDAP_DIRSEP[0]; + strcpy( ptr, "DB_CONFIG" ); + + f = fopen( mdb->bi_db_config_path, "r" ); + if ( f ) { + mdb->bi_flags |= MDB_HAS_CONFIG; + fclose(f); + } + } + break; + + case MDB_NOSYNC: + if ( c->value_int ) + mdb->bi_dbenv_xflags |= DB_TXN_NOSYNC; + else + mdb->bi_dbenv_xflags &= ~DB_TXN_NOSYNC; + if ( mdb->bi_flags & MDB_IS_OPEN ) { + mdb->bi_dbenv->set_flags( mdb->bi_dbenv, DB_TXN_NOSYNC, + c->value_int ); + } + break; + + case MDB_CHECKSUM: + if ( c->value_int ) + mdb->bi_flags |= MDB_CHKSUM; + else + mdb->bi_flags &= ~MDB_CHKSUM; + break; + + case MDB_INDEX: + rc = mdb_attr_index_config( mdb, c->fname, c->lineno, + c->argc - 1, &c->argv[1], &c->reply); + + if( rc != LDAP_SUCCESS ) return 1; + if (( mdb->bi_flags & MDB_IS_OPEN ) && !mdb->bi_index_task ) { + /* Start the task as soon as we finish here. Set a long + * interval (10 hours) so that it only gets scheduled once. + */ + if ( c->be->be_suffix == NULL || BER_BVISNULL( &c->be->be_suffix[0] ) ) { + fprintf( stderr, "%s: " + "\"index\" must occur after \"suffix\".\n", + c->log ); + return 1; + } + ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex ); + mdb->bi_index_task = ldap_pvt_runqueue_insert( &slapd_rq, 36000, + mdb_online_index, c->be, + LDAP_XSTRING(mdb_online_index), c->be->be_suffix[0].bv_val ); + ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex ); + } + break; + + case MDB_LOCKD: + rc = verb_to_mask( c->argv[1], mdb_lockd ); + if ( BER_BVISNULL(&mdb_lockd[rc].word) ) { + fprintf( stderr, "%s: " + "bad policy (%s) in \"lockDetect \" line\n", + c->log, c->argv[1] ); + return 1; + } + mdb->bi_lock_detect = (u_int32_t)rc; + break; + + case MDB_SSTACK: + if ( c->value_int < MINIMUM_SEARCH_STACK_DEPTH ) { + fprintf( stderr, + "%s: depth %d too small, using %d\n", + c->log, c->value_int, MINIMUM_SEARCH_STACK_DEPTH ); + c->value_int = MINIMUM_SEARCH_STACK_DEPTH; + } + mdb->bi_search_stack_depth = c->value_int; + break; + + case MDB_PGSIZE: { + struct mdb_db_pgsize *ps, **prev; + int i, s; + + s = atoi(c->argv[2]); + if ( s < 1 || s > 64 ) { + snprintf( c->cr_msg, sizeof( c->cr_msg ), + "%s: size must be > 0 and <= 64: %d", + c->log, s ); + Debug( LDAP_DEBUG_ANY, "%s\n", c->cr_msg, 0, 0 ); + return -1; + } + i = strlen(c->argv[1]); + ps = ch_malloc( sizeof(struct mdb_db_pgsize) + i + 1 ); + ps->bdp_next = NULL; + ps->bdp_name.bv_len = i; + ps->bdp_name.bv_val = (char *)(ps+1); + strcpy( ps->bdp_name.bv_val, c->argv[1] ); + ps->bdp_size = s * 1024; + for ( prev = &mdb->bi_pagesizes; *prev; prev = &(*prev)->bdp_next ) + ; + *prev = ps; + } + break; + } + return 0; +} + +int mdb_back_init_cf( BackendInfo *bi ) +{ + int rc; + bi->bi_cf_ocs = mdbocs; + + rc = config_register_schema( mdbcfg, mdbocs ); + if ( rc ) return rc; + return 0; +} diff --git a/servers/slapd/back-mdb/dbcache.c b/servers/slapd/back-mdb/dbcache.c new file mode 100644 index 0000000000..fb7a0e174b --- /dev/null +++ b/servers/slapd/back-mdb/dbcache.c @@ -0,0 +1,119 @@ +/* dbcache.c - manage cache of open databases */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include + +#include +#include +#include +#include +#include + +#include "slap.h" +#include "back-mdb.h" + +int +mdb_db_cache( + Backend *be, + struct berval *name, + DB **dbout ) +{ + int i, flags; + int rc; + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + struct mdb_db_info *db; + char *file; + + *dbout = NULL; + + for( i=MDB_NDB; i < mdb->bi_ndatabases; i++ ) { + if( !ber_bvcmp( &mdb->bi_databases[i]->bdi_name, name) ) { + *dbout = mdb->bi_databases[i]->bdi_db; + return 0; + } + } + + ldap_pvt_thread_mutex_lock( &mdb->bi_database_mutex ); + + /* check again! may have been added by another thread */ + for( i=MDB_NDB; i < mdb->bi_ndatabases; i++ ) { + if( !ber_bvcmp( &mdb->bi_databases[i]->bdi_name, name) ) { + *dbout = mdb->bi_databases[i]->bdi_db; + ldap_pvt_thread_mutex_unlock( &mdb->bi_database_mutex ); + return 0; + } + } + + if( i >= MDB_INDICES ) { + ldap_pvt_thread_mutex_unlock( &mdb->bi_database_mutex ); + return -1; + } + + db = (struct mdb_db_info *) ch_calloc(1, sizeof(struct mdb_db_info)); + + ber_dupbv( &db->bdi_name, name ); + + rc = db_create( &db->bdi_db, mdb->bi_dbenv, 0 ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, + "mdb_db_cache: db_create(%s) failed: %s (%d)\n", + mdb->bi_dbenv_home, db_strerror(rc), rc ); + ldap_pvt_thread_mutex_unlock( &mdb->bi_database_mutex ); + ch_free( db ); + return rc; + } + + file = ch_malloc( db->bdi_name.bv_len + sizeof(MDB_SUFFIX) ); + strcpy( file, db->bdi_name.bv_val ); + strcpy( file+db->bdi_name.bv_len, MDB_SUFFIX ); + +#ifdef HAVE_EBCDIC + __atoe( file ); +#endif + flags = DB_CREATE | DB_THREAD; +#ifdef DB_AUTO_COMMIT + if ( !( slapMode & SLAP_TOOL_QUICK )) + flags |= DB_AUTO_COMMIT; +#endif + /* Cannot Truncate when Transactions are in use */ + if ( (slapMode & (SLAP_TOOL_QUICK|SLAP_TRUNCATE_MODE)) == + (SLAP_TOOL_QUICK|SLAP_TRUNCATE_MODE)) + flags |= DB_TRUNCATE; + + rc = DB_OPEN( db->bdi_db, + file, NULL /* name */, + MDB_INDEXTYPE, mdb->bi_db_opflags | flags, mdb->bi_dbenv_mode ); + + ch_free( file ); + + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, + "mdb_db_cache: db_open(%s) failed: %s (%d)\n", + name->bv_val, db_strerror(rc), rc ); + ldap_pvt_thread_mutex_unlock( &mdb->bi_database_mutex ); + return rc; + } + + mdb->bi_databases[i] = db; + mdb->bi_ndatabases = i+1; + + *dbout = db->bdi_db; + + ldap_pvt_thread_mutex_unlock( &mdb->bi_database_mutex ); + return 0; +} diff --git a/servers/slapd/back-mdb/delete.c b/servers/slapd/back-mdb/delete.c new file mode 100644 index 0000000000..99a9fd4a13 --- /dev/null +++ b/servers/slapd/back-mdb/delete.c @@ -0,0 +1,601 @@ +/* delete.c - mdb backend delete routine */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "lutil.h" +#include "back-mdb.h" + +int +mdb_delete( Operation *op, SlapReply *rs ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + Entry *matched = NULL; + struct berval pdn = {0, NULL}; + Entry *e = NULL; + Entry *p = NULL; + EntryInfo *ei = NULL, *eip = NULL; + int manageDSAit = get_manageDSAit( op ); + AttributeDescription *children = slap_schema.si_ad_children; + AttributeDescription *entry = slap_schema.si_ad_entry; + DB_TXN *ltid = NULL, *lt2; + struct mdb_op_info opinfo = {{{ 0 }}}; + ID eid; + + DB_LOCK lock, plock; + + int num_retries = 0; + + int rc; + + LDAPControl **preread_ctrl = NULL; + LDAPControl *ctrls[SLAP_MAX_RESPONSE_CONTROLS]; + int num_ctrls = 0; + + int parent_is_glue = 0; + int parent_is_leaf = 0; + +#ifdef LDAP_X_TXN + int settle = 0; +#endif + + Debug( LDAP_DEBUG_ARGS, "==> " LDAP_XSTRING(mdb_delete) ": %s\n", + op->o_req_dn.bv_val, 0, 0 ); + +#ifdef LDAP_X_TXN + if( op->o_txnSpec ) { + /* acquire connection lock */ + ldap_pvt_thread_mutex_lock( &op->o_conn->c_mutex ); + if( op->o_conn->c_txn == CONN_TXN_INACTIVE ) { + rs->sr_text = "invalid transaction identifier"; + rs->sr_err = LDAP_X_TXN_ID_INVALID; + goto txnReturn; + } else if( op->o_conn->c_txn == CONN_TXN_SETTLE ) { + settle=1; + goto txnReturn; + } + + if( op->o_conn->c_txn_backend == NULL ) { + op->o_conn->c_txn_backend = op->o_bd; + + } else if( op->o_conn->c_txn_backend != op->o_bd ) { + rs->sr_text = "transaction cannot span multiple database contexts"; + rs->sr_err = LDAP_AFFECTS_MULTIPLE_DSAS; + goto txnReturn; + } + + /* insert operation into transaction */ + + rs->sr_text = "transaction specified"; + rs->sr_err = LDAP_X_TXN_SPECIFY_OKAY; + +txnReturn: + /* release connection lock */ + ldap_pvt_thread_mutex_unlock( &op->o_conn->c_mutex ); + + if( !settle ) { + send_ldap_result( op, rs ); + return rs->sr_err; + } + } +#endif + + ctrls[num_ctrls] = 0; + + /* allocate CSN */ + if ( BER_BVISNULL( &op->o_csn ) ) { + struct berval csn; + char csnbuf[LDAP_PVT_CSNSTR_BUFSIZE]; + + csn.bv_val = csnbuf; + csn.bv_len = sizeof(csnbuf); + slap_get_csn( op, &csn, 1 ); + } + + if( 0 ) { +retry: /* transaction retry */ + if( e != NULL ) { + mdb_unlocked_cache_return_entry_w(&mdb->bi_cache, e); + e = NULL; + } + if( p != NULL ) { + mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, p); + p = NULL; + } + Debug( LDAP_DEBUG_TRACE, + "==> " LDAP_XSTRING(mdb_delete) ": retrying...\n", + 0, 0, 0 ); + rs->sr_err = TXN_ABORT( ltid ); + ltid = NULL; + LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next ); + opinfo.boi_oe.oe_key = NULL; + op->o_do_not_cache = opinfo.boi_acl_cache; + if( rs->sr_err != 0 ) { + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + if ( op->o_abandon ) { + rs->sr_err = SLAPD_ABANDON; + goto return_results; + } + parent_is_glue = 0; + parent_is_leaf = 0; + mdb_trans_backoff( ++num_retries ); + } + + /* begin transaction */ + rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, NULL, <id, + mdb->bi_db_opflags ); + rs->sr_text = NULL; + if( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_delete) ": txn_begin failed: " + "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + opinfo.boi_oe.oe_key = mdb; + opinfo.boi_txn = ltid; + opinfo.boi_err = 0; + opinfo.boi_acl_cache = op->o_do_not_cache; + LDAP_SLIST_INSERT_HEAD( &op->o_extra, &opinfo.boi_oe, oe_next ); + + if ( !be_issuffix( op->o_bd, &op->o_req_ndn ) ) { + dnParent( &op->o_req_ndn, &pdn ); + } + + /* get entry */ + rs->sr_err = mdb_dn2entry( op, ltid, &op->o_req_ndn, &ei, 1, + &lock ); + + switch( rs->sr_err ) { + case 0: + case DB_NOTFOUND: + break; + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + case LDAP_BUSY: + rs->sr_text = "ldap server busy"; + goto return_results; + default: + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + if ( rs->sr_err == 0 ) { + e = ei->bei_e; + eip = ei->bei_parent; + } else { + matched = ei->bei_e; + } + + /* FIXME : dn2entry() should return non-glue entry */ + if ( e == NULL || ( !manageDSAit && is_entry_glue( e ))) { + Debug( LDAP_DEBUG_ARGS, + "<=- " LDAP_XSTRING(mdb_delete) ": no such object %s\n", + op->o_req_dn.bv_val, 0, 0); + + if ( matched != NULL ) { + rs->sr_matched = ch_strdup( matched->e_dn ); + rs->sr_ref = is_entry_referral( matched ) + ? get_entry_referrals( op, matched ) + : NULL; + mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, matched); + matched = NULL; + + } else { + rs->sr_ref = referral_rewrite( default_referral, NULL, + &op->o_req_dn, LDAP_SCOPE_DEFAULT ); + } + + rs->sr_err = LDAP_REFERRAL; + rs->sr_flags = REP_MATCHED_MUSTBEFREED | REP_REF_MUSTBEFREED; + goto return_results; + } + + rc = mdb_cache_find_id( op, ltid, eip->bei_id, &eip, 0, &plock ); + switch( rc ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + case 0: + case DB_NOTFOUND: + break; + default: + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + if ( eip ) p = eip->bei_e; + + if ( pdn.bv_len != 0 ) { + if( p == NULL || !bvmatch( &pdn, &p->e_nname )) { + Debug( LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_delete) ": parent " + "does not exist\n", 0, 0, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "could not locate parent of entry"; + goto return_results; + } + + /* check parent for "children" acl */ + rs->sr_err = access_allowed( op, p, + children, NULL, ACL_WDEL, NULL ); + + if ( !rs->sr_err ) { + switch( opinfo.boi_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + + Debug( LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_delete) ": no write " + "access to parent\n", 0, 0, 0 ); + rs->sr_err = LDAP_INSUFFICIENT_ACCESS; + rs->sr_text = "no write access to parent"; + goto return_results; + } + + } else { + /* no parent, must be root to delete */ + if( ! be_isroot( op ) ) { + if ( be_issuffix( op->o_bd, (struct berval *)&slap_empty_bv ) + || be_shadow_update( op ) ) { + p = (Entry *)&slap_entry_root; + + /* check parent for "children" acl */ + rs->sr_err = access_allowed( op, p, + children, NULL, ACL_WDEL, NULL ); + + p = NULL; + + if ( !rs->sr_err ) { + switch( opinfo.boi_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + + Debug( LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_delete) + ": no access to parent\n", + 0, 0, 0 ); + rs->sr_err = LDAP_INSUFFICIENT_ACCESS; + rs->sr_text = "no write access to parent"; + goto return_results; + } + + } else { + Debug( LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_delete) + ": no parent and not root\n", 0, 0, 0 ); + rs->sr_err = LDAP_INSUFFICIENT_ACCESS; + goto return_results; + } + } + } + + if ( get_assert( op ) && + ( test_filter( op, e, get_assertion( op )) != LDAP_COMPARE_TRUE )) + { + rs->sr_err = LDAP_ASSERTION_FAILED; + goto return_results; + } + + rs->sr_err = access_allowed( op, e, + entry, NULL, ACL_WDEL, NULL ); + + if ( !rs->sr_err ) { + switch( opinfo.boi_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + + Debug( LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_delete) ": no write access " + "to entry\n", 0, 0, 0 ); + rs->sr_err = LDAP_INSUFFICIENT_ACCESS; + rs->sr_text = "no write access to entry"; + goto return_results; + } + + if ( !manageDSAit && is_entry_referral( e ) ) { + /* entry is a referral, don't allow delete */ + rs->sr_ref = get_entry_referrals( op, e ); + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_delete) ": entry is referral\n", + 0, 0, 0 ); + + rs->sr_err = LDAP_REFERRAL; + rs->sr_matched = ch_strdup( e->e_name.bv_val ); + rs->sr_flags = REP_MATCHED_MUSTBEFREED | REP_REF_MUSTBEFREED; + goto return_results; + } + + /* pre-read */ + if( op->o_preread ) { + if( preread_ctrl == NULL ) { + preread_ctrl = &ctrls[num_ctrls++]; + ctrls[num_ctrls] = NULL; + } + if( slap_read_controls( op, rs, e, + &slap_pre_read_bv, preread_ctrl ) ) + { + Debug( LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_delete) ": pre-read " + "failed!\n", 0, 0, 0 ); + if ( op->o_preread & SLAP_CONTROL_CRITICAL ) { + /* FIXME: is it correct to abort + * operation if control fails? */ + goto return_results; + } + } + } + + /* nested transaction */ + rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, ltid, <2, + mdb->bi_db_opflags ); + rs->sr_text = NULL; + if( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_delete) ": txn_begin(2) failed: " + "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + MDB_LOG_PRINTF( mdb->bi_dbenv, lt2, "slapd Starting delete %s(%d)", + e->e_nname.bv_val, e->e_id ); + + /* Can't do it if we have kids */ + rs->sr_err = mdb_cache_children( op, lt2, e ); + if( rs->sr_err != DB_NOTFOUND ) { + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + case 0: + Debug(LDAP_DEBUG_ARGS, + "<=- " LDAP_XSTRING(mdb_delete) + ": non-leaf %s\n", + op->o_req_dn.bv_val, 0, 0); + rs->sr_err = LDAP_NOT_ALLOWED_ON_NONLEAF; + rs->sr_text = "subordinate objects must be deleted first"; + break; + default: + Debug(LDAP_DEBUG_ARGS, + "<=- " LDAP_XSTRING(mdb_delete) + ": has_children failed: %s (%d)\n", + db_strerror(rs->sr_err), rs->sr_err, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + } + goto return_results; + } + + /* delete from dn2id */ + rs->sr_err = mdb_dn2id_delete( op, lt2, eip, e ); + if ( rs->sr_err != 0 ) { + Debug(LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_delete) ": dn2id failed: " + "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 ); + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + rs->sr_text = "DN index delete failed"; + rs->sr_err = LDAP_OTHER; + goto return_results; + } + + /* delete indices for old attributes */ + rs->sr_err = mdb_index_entry_del( op, lt2, e ); + if ( rs->sr_err != LDAP_SUCCESS ) { + Debug(LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_delete) ": index failed: " + "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 ); + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + rs->sr_text = "entry index delete failed"; + rs->sr_err = LDAP_OTHER; + goto return_results; + } + + /* fixup delete CSN */ + if ( !SLAP_SHADOW( op->o_bd )) { + struct berval vals[2]; + + assert( !BER_BVISNULL( &op->o_csn ) ); + vals[0] = op->o_csn; + BER_BVZERO( &vals[1] ); + rs->sr_err = mdb_index_values( op, lt2, slap_schema.si_ad_entryCSN, + vals, 0, SLAP_INDEX_ADD_OP ); + if ( rs->sr_err != LDAP_SUCCESS ) { + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + rs->sr_text = "entryCSN index update failed"; + rs->sr_err = LDAP_OTHER; + goto return_results; + } + } + + /* delete from id2entry */ + rs->sr_err = mdb_id2entry_delete( op->o_bd, lt2, e ); + if ( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_delete) ": id2entry failed: " + "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 ); + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + rs->sr_text = "entry delete failed"; + rs->sr_err = LDAP_OTHER; + goto return_results; + } + + if ( pdn.bv_len != 0 ) { + parent_is_glue = is_entry_glue(p); + rs->sr_err = mdb_cache_children( op, lt2, p ); + if ( rs->sr_err != DB_NOTFOUND ) { + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + case 0: + break; + default: + Debug(LDAP_DEBUG_ARGS, + "<=- " LDAP_XSTRING(mdb_delete) + ": has_children failed: %s (%d)\n", + db_strerror(rs->sr_err), rs->sr_err, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + parent_is_leaf = 1; + } + mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, p); + p = NULL; + } + + MDB_LOG_PRINTF( mdb->bi_dbenv, lt2, "slapd Commit1 delete %s(%d)", + e->e_nname.bv_val, e->e_id ); + + if ( TXN_COMMIT( lt2, 0 ) != 0 ) { + rs->sr_err = LDAP_OTHER; + rs->sr_text = "txn_commit(2) failed"; + goto return_results; + } + + eid = e->e_id; + +#if 0 /* Do we want to reclaim deleted IDs? */ + ldap_pvt_thread_mutex_lock( &mdb->bi_lastid_mutex ); + if ( e->e_id == mdb->bi_lastid ) { + mdb_last_id( op->o_bd, ltid ); + } + ldap_pvt_thread_mutex_unlock( &mdb->bi_lastid_mutex ); +#endif + + if( op->o_noop ) { + if ( ( rs->sr_err = TXN_ABORT( ltid ) ) != 0 ) { + rs->sr_text = "txn_abort (no-op) failed"; + } else { + rs->sr_err = LDAP_X_NO_OPERATION; + ltid = NULL; + goto return_results; + } + } else { + + MDB_LOG_PRINTF( mdb->bi_dbenv, ltid, "slapd Cache delete %s(%d)", + e->e_nname.bv_val, e->e_id ); + + rc = mdb_cache_delete( mdb, e, ltid, &lock ); + switch( rc ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + + rs->sr_err = TXN_COMMIT( ltid, 0 ); + } + ltid = NULL; + LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next ); + opinfo.boi_oe.oe_key = NULL; + + MDB_LOG_PRINTF( mdb->bi_dbenv, NULL, "slapd Committed delete %s(%d)", + e->e_nname.bv_val, e->e_id ); + + if( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_delete) ": txn_%s failed: %s (%d)\n", + op->o_noop ? "abort (no-op)" : "commit", + db_strerror(rs->sr_err), rs->sr_err ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "commit failed"; + + goto return_results; + } + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_delete) ": deleted%s id=%08lx dn=\"%s\"\n", + op->o_noop ? " (no-op)" : "", + eid, op->o_req_dn.bv_val ); + rs->sr_err = LDAP_SUCCESS; + rs->sr_text = NULL; + if( num_ctrls ) rs->sr_ctrls = ctrls; + +return_results: + if ( rs->sr_err == LDAP_SUCCESS && parent_is_glue && parent_is_leaf ) { + op->o_delete_glue_parent = 1; + } + + if ( p ) + mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, p); + + /* free entry */ + if( e != NULL ) { + if ( rs->sr_err == LDAP_SUCCESS ) { + /* Free the EntryInfo and the Entry */ + mdb_cache_entryinfo_lock( BEI(e) ); + mdb_cache_delete_cleanup( &mdb->bi_cache, BEI(e) ); + } else { + mdb_unlocked_cache_return_entry_w(&mdb->bi_cache, e); + } + } + + if( ltid != NULL ) { + TXN_ABORT( ltid ); + } + if ( opinfo.boi_oe.oe_key ) { + LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next ); + } + + send_ldap_result( op, rs ); + slap_graduate_commit_csn( op ); + + if( preread_ctrl != NULL && (*preread_ctrl) != NULL ) { + slap_sl_free( (*preread_ctrl)->ldctl_value.bv_val, op->o_tmpmemctx ); + slap_sl_free( *preread_ctrl, op->o_tmpmemctx ); + } + + if( rs->sr_err == LDAP_SUCCESS && mdb->bi_txn_cp_kbyte ) { + TXN_CHECKPOINT( mdb->bi_dbenv, + mdb->bi_txn_cp_kbyte, mdb->bi_txn_cp_min, 0 ); + } + return rs->sr_err; +} diff --git a/servers/slapd/back-mdb/dn2entry.c b/servers/slapd/back-mdb/dn2entry.c new file mode 100644 index 0000000000..fb9e18c2d8 --- /dev/null +++ b/servers/slapd/back-mdb/dn2entry.c @@ -0,0 +1,84 @@ +/* dn2entry.c - routines to deal with the dn2id / id2entry glue */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "back-mdb.h" + +/* + * dn2entry - look up dn in the cache/indexes and return the corresponding + * entry. If the requested DN is not found and matched is TRUE, return info + * for the closest ancestor of the DN. Otherwise e is NULL. + */ + +int +mdb_dn2entry( + Operation *op, + DB_TXN *tid, + struct berval *dn, + EntryInfo **e, + int matched, + DB_LOCK *lock ) +{ + EntryInfo *ei = NULL; + int rc, rc2; + + Debug(LDAP_DEBUG_TRACE, "mdb_dn2entry(\"%s\")\n", + dn->bv_val, 0, 0 ); + + *e = NULL; + + rc = mdb_cache_find_ndn( op, tid, dn, &ei ); + if ( rc ) { + if ( matched && rc == DB_NOTFOUND ) { + /* Set the return value, whether we have its entry + * or not. + */ + *e = ei; + if ( ei && ei->bei_id ) { + rc2 = mdb_cache_find_id( op, tid, ei->bei_id, + &ei, ID_LOCKED, lock ); + if ( rc2 ) rc = rc2; + } else if ( ei ) { + mdb_cache_entryinfo_unlock( ei ); + memset( lock, 0, sizeof( *lock )); + lock->mode = DB_LOCK_NG; + } + } else if ( ei ) { + mdb_cache_entryinfo_unlock( ei ); + } + } else { + rc = mdb_cache_find_id( op, tid, ei->bei_id, &ei, ID_LOCKED, + lock ); + if ( rc == 0 ) { + *e = ei; + } else if ( matched && rc == DB_NOTFOUND ) { + /* always return EntryInfo */ + if ( ei->bei_parent ) { + ei = ei->bei_parent; + rc2 = mdb_cache_find_id( op, tid, ei->bei_id, &ei, 0, + lock ); + if ( rc2 ) rc = rc2; + } + *e = ei; + } + } + + return rc; +} diff --git a/servers/slapd/back-mdb/dn2id.c b/servers/slapd/back-mdb/dn2id.c new file mode 100644 index 0000000000..1e9ca02165 --- /dev/null +++ b/servers/slapd/back-mdb/dn2id.c @@ -0,0 +1,844 @@ +/* dn2id.c - routines to deal with the dn2id index */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "back-mdb.h" +#include "idl.h" +#include "lutil.h" + +/* Management routines for a hierarchically structured database. + * + * Instead of a ldbm-style dn2id database, we use a hierarchical one. Each + * entry in this database is a struct diskNode, keyed by entryID and with + * the data containing the RDN and entryID of the node's children. We use + * a B-Tree with sorted duplicates to store all the children of a node under + * the same key. Also, the first item under the key contains the entry's own + * rdn and the ID of the node's parent, to allow bottom-up tree traversal as + * well as top-down. To keep this info first in the list, the high bit of all + * subsequent nrdnlen's is always set. This means we can only accomodate + * RDNs up to length 32767, but that's fine since full DNs are already + * restricted to 8192. + * + * The diskNode is a variable length structure. This definition is not + * directly usable for in-memory manipulation. + */ +typedef struct diskNode { + unsigned char nrdnlen[2]; + char nrdn[1]; + char rdn[1]; /* variable placement */ + unsigned char entryID[sizeof(ID)]; /* variable placement */ +} diskNode; + +/* Sort function for the sorted duplicate data items of a dn2id key. + * Sorts based on normalized RDN, in length order. + */ +int +mdb_dup_compare( + DB *db, + const DBT *usrkey, + const DBT *curkey +) +{ + diskNode *un, *cn; + int rc; + + un = (diskNode *)usrkey->data; + cn = (diskNode *)curkey->data; + + /* data is not aligned, cannot compare directly */ + rc = un->nrdnlen[0] - cn->nrdnlen[0]; + if ( rc ) return rc; + rc = un->nrdnlen[1] - cn->nrdnlen[1]; + if ( rc ) return rc; + + return strcmp( un->nrdn, cn->nrdn ); +} + +/* This function constructs a full DN for a given entry. + */ +int mdb_fix_dn( + Entry *e, + int checkit ) +{ + EntryInfo *ei; + int rlen = 0, nrlen = 0; + char *ptr, *nptr; + int max = 0; + + if ( !e->e_id ) + return 0; + + /* count length of all DN components */ + for ( ei = BEI(e); ei && ei->bei_id; ei=ei->bei_parent ) { + rlen += ei->bei_rdn.bv_len + 1; + nrlen += ei->bei_nrdn.bv_len + 1; + if (ei->bei_modrdns > max) max = ei->bei_modrdns; + } + + /* See if the entry DN was invalidated by a subtree rename */ + if ( checkit ) { + if ( BEI(e)->bei_modrdns >= max ) { + return 0; + } + /* We found a mismatch, tell the caller to lock it */ + if ( checkit == 1 ) { + return 1; + } + /* checkit == 2. do the fix. */ + free( e->e_name.bv_val ); + free( e->e_nname.bv_val ); + } + + e->e_name.bv_len = rlen - 1; + e->e_nname.bv_len = nrlen - 1; + e->e_name.bv_val = ch_malloc(rlen); + e->e_nname.bv_val = ch_malloc(nrlen); + ptr = e->e_name.bv_val; + nptr = e->e_nname.bv_val; + for ( ei = BEI(e); ei && ei->bei_id; ei=ei->bei_parent ) { + ptr = lutil_strcopy(ptr, ei->bei_rdn.bv_val); + nptr = lutil_strcopy(nptr, ei->bei_nrdn.bv_val); + if ( ei->bei_parent ) { + *ptr++ = ','; + *nptr++ = ','; + } + } + BEI(e)->bei_modrdns = max; + if ( ptr > e->e_name.bv_val ) ptr[-1] = '\0'; + if ( nptr > e->e_nname.bv_val ) nptr[-1] = '\0'; + + return 0; +} + +/* We add two elements to the DN2ID database - a data item under the parent's + * entryID containing the child's RDN and entryID, and an item under the + * child's entryID containing the parent's entryID. + */ +int +mdb_dn2id_add( + Operation *op, + DB_TXN *txn, + EntryInfo *eip, + Entry *e ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + DB *db = mdb->bi_dn2id->bdi_db; + DBT key, data; + ID nid; + int rc, rlen, nrlen; + diskNode *d; + char *ptr; + + Debug( LDAP_DEBUG_TRACE, "=> mdb_dn2id_add 0x%lx: \"%s\"\n", + e->e_id, e->e_ndn, 0 ); + + nrlen = dn_rdnlen( op->o_bd, &e->e_nname ); + if (nrlen) { + rlen = dn_rdnlen( op->o_bd, &e->e_name ); + } else { + nrlen = e->e_nname.bv_len; + rlen = e->e_name.bv_len; + } + + d = op->o_tmpalloc(sizeof(diskNode) + rlen + nrlen, op->o_tmpmemctx); + d->nrdnlen[1] = nrlen & 0xff; + d->nrdnlen[0] = (nrlen >> 8) | 0x80; + ptr = lutil_strncopy( d->nrdn, e->e_nname.bv_val, nrlen ); + *ptr++ = '\0'; + ptr = lutil_strncopy( ptr, e->e_name.bv_val, rlen ); + *ptr++ = '\0'; + MDB_ID2DISK( e->e_id, ptr ); + + DBTzero(&key); + DBTzero(&data); + key.size = sizeof(ID); + key.flags = DB_DBT_USERMEM; + MDB_ID2DISK( eip->bei_id, &nid ); + + key.data = &nid; + + /* Need to make dummy root node once. Subsequent attempts + * will fail harmlessly. + */ + if ( eip->bei_id == 0 ) { + diskNode dummy = {{0, 0}, "", "", ""}; + data.data = &dummy; + data.size = sizeof(diskNode); + data.flags = DB_DBT_USERMEM; + + db->put( db, txn, &key, &data, DB_NODUPDATA ); + } + + data.data = d; + data.size = sizeof(diskNode) + rlen + nrlen; + data.flags = DB_DBT_USERMEM; + + rc = db->put( db, txn, &key, &data, DB_NODUPDATA ); + + if (rc == 0) { + MDB_ID2DISK( e->e_id, &nid ); + MDB_ID2DISK( eip->bei_id, ptr ); + d->nrdnlen[0] ^= 0x80; + + rc = db->put( db, txn, &key, &data, DB_NODUPDATA ); + } + + /* Update all parents' IDL cache entries */ + if ( rc == 0 && mdb->bi_idl_cache_size ) { + ID tmp[2]; + char *ptr = ((char *)&tmp[1])-1; + key.data = ptr; + key.size = sizeof(ID)+1; + tmp[1] = eip->bei_id; + *ptr = DN_ONE_PREFIX; + mdb_idl_cache_add_id( mdb, db, &key, e->e_id ); + if ( eip->bei_parent ) { + *ptr = DN_SUBTREE_PREFIX; + for (; eip && eip->bei_parent->bei_id; eip = eip->bei_parent) { + tmp[1] = eip->bei_id; + mdb_idl_cache_add_id( mdb, db, &key, e->e_id ); + } + /* Handle DB with empty suffix */ + if ( !op->o_bd->be_suffix[0].bv_len && eip ) { + tmp[1] = eip->bei_id; + mdb_idl_cache_add_id( mdb, db, &key, e->e_id ); + } + } + } + + op->o_tmpfree( d, op->o_tmpmemctx ); + Debug( LDAP_DEBUG_TRACE, "<= mdb_dn2id_add 0x%lx: %d\n", e->e_id, rc, 0 ); + + return rc; +} + +int +mdb_dn2id_delete( + Operation *op, + DB_TXN *txn, + EntryInfo *eip, + Entry *e ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + DB *db = mdb->bi_dn2id->bdi_db; + DBT key, data; + DBC *cursor; + diskNode *d; + int rc; + ID nid; + unsigned char dlen[2]; + + Debug( LDAP_DEBUG_TRACE, "=> mdb_dn2id_delete 0x%lx: \"%s\"\n", + e->e_id, e->e_ndn, 0 ); + + DBTzero(&key); + key.size = sizeof(ID); + key.ulen = key.size; + key.flags = DB_DBT_USERMEM; + MDB_ID2DISK( eip->bei_id, &nid ); + + DBTzero(&data); + data.size = sizeof(diskNode) + BEI(e)->bei_nrdn.bv_len - sizeof(ID) - 1; + data.ulen = data.size; + data.dlen = data.size; + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + + key.data = &nid; + + d = op->o_tmpalloc( data.size, op->o_tmpmemctx ); + d->nrdnlen[1] = BEI(e)->bei_nrdn.bv_len & 0xff; + d->nrdnlen[0] = (BEI(e)->bei_nrdn.bv_len >> 8) | 0x80; + dlen[0] = d->nrdnlen[0]; + dlen[1] = d->nrdnlen[1]; + memcpy( d->nrdn, BEI(e)->bei_nrdn.bv_val, BEI(e)->bei_nrdn.bv_len+1 ); + data.data = d; + + rc = db->cursor( db, txn, &cursor, mdb->bi_db_opflags ); + if ( rc ) goto func_leave; + + /* Delete our ID from the parent's list */ + rc = cursor->c_get( cursor, &key, &data, DB_GET_BOTH_RANGE ); + if ( rc == 0 ) { + if ( dlen[1] == d->nrdnlen[1] && dlen[0] == d->nrdnlen[0] && + !strcmp( d->nrdn, BEI(e)->bei_nrdn.bv_val )) + rc = cursor->c_del( cursor, 0 ); + else + rc = DB_NOTFOUND; + } + + /* Delete our ID from the tree. With sorted duplicates, this + * will leave any child nodes still hanging around. This is OK + * for modrdn, which will add our info back in later. + */ + if ( rc == 0 ) { + MDB_ID2DISK( e->e_id, &nid ); + rc = cursor->c_get( cursor, &key, &data, DB_SET ); + if ( rc == 0 ) + rc = cursor->c_del( cursor, 0 ); + } + + cursor->c_close( cursor ); +func_leave: + op->o_tmpfree( d, op->o_tmpmemctx ); + + /* Delete IDL cache entries */ + if ( rc == 0 && mdb->bi_idl_cache_size ) { + ID tmp[2]; + char *ptr = ((char *)&tmp[1])-1; + key.data = ptr; + key.size = sizeof(ID)+1; + tmp[1] = eip->bei_id; + *ptr = DN_ONE_PREFIX; + mdb_idl_cache_del_id( mdb, db, &key, e->e_id ); + if ( eip ->bei_parent ) { + *ptr = DN_SUBTREE_PREFIX; + for (; eip && eip->bei_parent->bei_id; eip = eip->bei_parent) { + tmp[1] = eip->bei_id; + mdb_idl_cache_del_id( mdb, db, &key, e->e_id ); + } + /* Handle DB with empty suffix */ + if ( !op->o_bd->be_suffix[0].bv_len && eip ) { + tmp[1] = eip->bei_id; + mdb_idl_cache_del_id( mdb, db, &key, e->e_id ); + } + } + } + Debug( LDAP_DEBUG_TRACE, "<= mdb_dn2id_delete 0x%lx: %d\n", e->e_id, rc, 0 ); + return rc; +} + + +int +mdb_dn2id( + Operation *op, + struct berval *in, + EntryInfo *ei, + DB_TXN *txn, + DBC **cursor ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + DB *db = mdb->bi_dn2id->bdi_db; + DBT key, data; + int rc = 0, nrlen; + diskNode *d; + char *ptr; + unsigned char dlen[2]; + ID idp, parentID; + + Debug( LDAP_DEBUG_TRACE, "=> mdb_dn2id(\"%s\")\n", in->bv_val, 0, 0 ); + + nrlen = dn_rdnlen( op->o_bd, in ); + if (!nrlen) nrlen = in->bv_len; + + DBTzero(&key); + key.size = sizeof(ID); + key.data = &idp; + key.ulen = sizeof(ID); + key.flags = DB_DBT_USERMEM; + parentID = ( ei->bei_parent != NULL ) ? ei->bei_parent->bei_id : 0; + MDB_ID2DISK( parentID, &idp ); + + DBTzero(&data); + data.size = sizeof(diskNode) + nrlen - sizeof(ID) - 1; + data.ulen = data.size * 3; + data.dlen = data.ulen; + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + + rc = db->cursor( db, txn, cursor, mdb->bi_db_opflags ); + if ( rc ) return rc; + + d = op->o_tmpalloc( data.size * 3, op->o_tmpmemctx ); + d->nrdnlen[1] = nrlen & 0xff; + d->nrdnlen[0] = (nrlen >> 8) | 0x80; + dlen[0] = d->nrdnlen[0]; + dlen[1] = d->nrdnlen[1]; + ptr = lutil_strncopy( d->nrdn, in->bv_val, nrlen ); + *ptr = '\0'; + data.data = d; + + rc = (*cursor)->c_get( *cursor, &key, &data, DB_GET_BOTH_RANGE ); + if ( rc == 0 && (dlen[1] != d->nrdnlen[1] || dlen[0] != d->nrdnlen[0] || + strncmp( d->nrdn, in->bv_val, nrlen ))) { + rc = DB_NOTFOUND; + } + if ( rc == 0 ) { + ptr = (char *) data.data + data.size - sizeof(ID); + MDB_DISK2ID( ptr, &ei->bei_id ); + ei->bei_rdn.bv_len = data.size - sizeof(diskNode) - nrlen; + ptr = d->nrdn + nrlen + 1; + ber_str2bv( ptr, ei->bei_rdn.bv_len, 1, &ei->bei_rdn ); + if ( ei->bei_parent != NULL && !ei->bei_parent->bei_dkids ) { + db_recno_t dkids; + /* How many children does the parent have? */ + /* FIXME: do we need to lock the parent + * entryinfo? Seems safe... + */ + (*cursor)->c_count( *cursor, &dkids, 0 ); + ei->bei_parent->bei_dkids = dkids; + } + } + + op->o_tmpfree( d, op->o_tmpmemctx ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_TRACE, "<= mdb_dn2id: get failed: %s (%d)\n", + db_strerror( rc ), rc, 0 ); + } else { + Debug( LDAP_DEBUG_TRACE, "<= mdb_dn2id: got id=0x%lx\n", + ei->bei_id, 0, 0 ); + } + + return rc; +} + +int +mdb_dn2id_parent( + Operation *op, + DB_TXN *txn, + EntryInfo *ei, + ID *idp ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + DB *db = mdb->bi_dn2id->bdi_db; + DBT key, data; + DBC *cursor; + int rc = 0; + diskNode *d; + char *ptr; + ID nid; + + DBTzero(&key); + key.size = sizeof(ID); + key.data = &nid; + key.ulen = sizeof(ID); + key.flags = DB_DBT_USERMEM; + MDB_ID2DISK( ei->bei_id, &nid ); + + DBTzero(&data); + data.flags = DB_DBT_USERMEM; + + rc = db->cursor( db, txn, &cursor, mdb->bi_db_opflags ); + if ( rc ) return rc; + + data.ulen = sizeof(diskNode) + (SLAP_LDAPDN_MAXLEN * 2); + d = op->o_tmpalloc( data.ulen, op->o_tmpmemctx ); + data.data = d; + + rc = cursor->c_get( cursor, &key, &data, DB_SET ); + if ( rc == 0 ) { + if (d->nrdnlen[0] & 0x80) { + rc = LDAP_OTHER; + } else { + db_recno_t dkids; + ptr = (char *) data.data + data.size - sizeof(ID); + MDB_DISK2ID( ptr, idp ); + ei->bei_nrdn.bv_len = (d->nrdnlen[0] << 8) | d->nrdnlen[1]; + ber_str2bv( d->nrdn, ei->bei_nrdn.bv_len, 1, &ei->bei_nrdn ); + ei->bei_rdn.bv_len = data.size - sizeof(diskNode) - + ei->bei_nrdn.bv_len; + ptr = d->nrdn + ei->bei_nrdn.bv_len + 1; + ber_str2bv( ptr, ei->bei_rdn.bv_len, 1, &ei->bei_rdn ); + /* How many children does this node have? */ + cursor->c_count( cursor, &dkids, 0 ); + ei->bei_dkids = dkids; + } + } + cursor->c_close( cursor ); + op->o_tmpfree( d, op->o_tmpmemctx ); + return rc; +} + +int +mdb_dn2id_children( + Operation *op, + DB_TXN *txn, + Entry *e ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + DB *db = mdb->bi_dn2id->bdi_db; + DBT key, data; + DBC *cursor; + int rc; + ID id; + diskNode d; + + DBTzero(&key); + key.size = sizeof(ID); + key.data = &e->e_id; + key.flags = DB_DBT_USERMEM; + MDB_ID2DISK( e->e_id, &id ); + + /* IDL cache is in host byte order */ + if ( mdb->bi_idl_cache_size ) { + rc = mdb_idl_cache_get( mdb, db, &key, NULL ); + if ( rc != LDAP_NO_SUCH_OBJECT ) { + return rc; + } + } + + key.data = &id; + DBTzero(&data); + data.data = &d; + data.ulen = sizeof(d); + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + data.dlen = sizeof(d); + + rc = db->cursor( db, txn, &cursor, mdb->bi_db_opflags ); + if ( rc ) return rc; + + rc = cursor->c_get( cursor, &key, &data, DB_SET ); + if ( rc == 0 ) { + db_recno_t dkids; + rc = cursor->c_count( cursor, &dkids, 0 ); + if ( rc == 0 ) { + BEI(e)->bei_dkids = dkids; + if ( dkids < 2 ) rc = DB_NOTFOUND; + } + } + cursor->c_close( cursor ); + return rc; +} + +/* mdb_dn2idl: + * We can't just use mdb_idl_fetch_key because + * 1 - our data items are longer than just an entry ID + * 2 - our data items are sorted alphabetically by nrdn, not by ID. + * + * We descend the tree recursively, so we define this cookie + * to hold our necessary state information. The mdb_dn2idl_internal + * function uses this cookie when calling itself. + */ + +struct dn2id_cookie { + struct mdb_info *mdb; + Operation *op; + DB_TXN *txn; + EntryInfo *ei; + ID *ids; + ID *tmp; + ID *buf; + DB *db; + DBC *dbc; + DBT key; + DBT data; + ID dbuf; + ID id; + ID nid; + int rc; + int depth; + char need_sort; + char prefix; +}; + +static int +apply_func( + void *data, + void *arg ) +{ + EntryInfo *ei = data; + ID *idl = arg; + + mdb_idl_append_one( idl, ei->bei_id ); + return 0; +} + +static int +mdb_dn2idl_internal( + struct dn2id_cookie *cx +) +{ + MDB_IDL_ZERO( cx->tmp ); + + if ( cx->mdb->bi_idl_cache_size ) { + char *ptr = ((char *)&cx->id)-1; + + cx->key.data = ptr; + cx->key.size = sizeof(ID)+1; + if ( cx->prefix == DN_SUBTREE_PREFIX ) { + ID *ids = cx->depth ? cx->tmp : cx->ids; + *ptr = cx->prefix; + cx->rc = mdb_idl_cache_get(cx->mdb, cx->db, &cx->key, ids); + if ( cx->rc == LDAP_SUCCESS ) { + if ( cx->depth ) { + mdb_idl_append( cx->ids, cx->tmp ); + cx->need_sort = 1; + } + return cx->rc; + } + } + *ptr = DN_ONE_PREFIX; + cx->rc = mdb_idl_cache_get(cx->mdb, cx->db, &cx->key, cx->tmp); + if ( cx->rc == LDAP_SUCCESS ) { + goto gotit; + } + if ( cx->rc == DB_NOTFOUND ) { + return cx->rc; + } + } + + mdb_cache_entryinfo_lock( cx->ei ); + + /* If number of kids in the cache differs from on-disk, load + * up all the kids from the database + */ + if ( cx->ei->bei_ckids+1 != cx->ei->bei_dkids ) { + EntryInfo ei; + db_recno_t dkids = cx->ei->bei_dkids; + ei.bei_parent = cx->ei; + + /* Only one thread should load the cache */ + while ( cx->ei->bei_state & CACHE_ENTRY_ONELEVEL ) { + mdb_cache_entryinfo_unlock( cx->ei ); + ldap_pvt_thread_yield(); + mdb_cache_entryinfo_lock( cx->ei ); + if ( cx->ei->bei_ckids+1 == cx->ei->bei_dkids ) { + goto synced; + } + } + + cx->ei->bei_state |= CACHE_ENTRY_ONELEVEL; + + mdb_cache_entryinfo_unlock( cx->ei ); + + cx->rc = cx->db->cursor( cx->db, NULL, &cx->dbc, + cx->mdb->bi_db_opflags ); + if ( cx->rc ) + goto done_one; + + cx->data.data = &cx->dbuf; + cx->data.ulen = sizeof(ID); + cx->data.dlen = sizeof(ID); + cx->data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + + /* The first item holds the parent ID. Ignore it. */ + cx->key.data = &cx->nid; + cx->key.size = sizeof(ID); + cx->rc = cx->dbc->c_get( cx->dbc, &cx->key, &cx->data, DB_SET ); + if ( cx->rc ) { + cx->dbc->c_close( cx->dbc ); + goto done_one; + } + + /* If the on-disk count is zero we've never checked it. + * Count it now. + */ + if ( !dkids ) { + cx->dbc->c_count( cx->dbc, &dkids, 0 ); + cx->ei->bei_dkids = dkids; + } + + cx->data.data = cx->buf; + cx->data.ulen = MDB_IDL_UM_SIZE * sizeof(ID); + cx->data.flags = DB_DBT_USERMEM; + + if ( dkids > 1 ) { + /* Fetch the rest of the IDs in a loop... */ + while ( (cx->rc = cx->dbc->c_get( cx->dbc, &cx->key, &cx->data, + DB_MULTIPLE | DB_NEXT_DUP )) == 0 ) { + u_int8_t *j; + size_t len; + void *ptr; + DB_MULTIPLE_INIT( ptr, &cx->data ); + while (ptr) { + DB_MULTIPLE_NEXT( ptr, &cx->data, j, len ); + if (j) { + EntryInfo *ei2; + diskNode *d = (diskNode *)j; + short nrlen; + + MDB_DISK2ID( j + len - sizeof(ID), &ei.bei_id ); + nrlen = ((d->nrdnlen[0] ^ 0x80) << 8) | d->nrdnlen[1]; + ei.bei_nrdn.bv_len = nrlen; + /* nrdn/rdn are set in-place. + * mdb_cache_load will copy them as needed + */ + ei.bei_nrdn.bv_val = d->nrdn; + ei.bei_rdn.bv_len = len - sizeof(diskNode) + - ei.bei_nrdn.bv_len; + ei.bei_rdn.bv_val = d->nrdn + ei.bei_nrdn.bv_len + 1; + mdb_idl_append_one( cx->tmp, ei.bei_id ); + mdb_cache_load( cx->mdb, &ei, &ei2 ); + } + } + } + } + + cx->rc = cx->dbc->c_close( cx->dbc ); +done_one: + mdb_cache_entryinfo_lock( cx->ei ); + cx->ei->bei_state &= ~CACHE_ENTRY_ONELEVEL; + mdb_cache_entryinfo_unlock( cx->ei ); + if ( cx->rc ) + return cx->rc; + + } else { + /* The in-memory cache is in sync with the on-disk data. + * do we have any kids? + */ +synced: + cx->rc = 0; + if ( cx->ei->bei_ckids > 0 ) { + /* Walk the kids tree; order is irrelevant since mdb_idl_sort + * will sort it later. + */ + avl_apply( cx->ei->bei_kids, apply_func, + cx->tmp, -1, AVL_POSTORDER ); + } + mdb_cache_entryinfo_unlock( cx->ei ); + } + + if ( !MDB_IDL_IS_RANGE( cx->tmp ) && cx->tmp[0] > 3 ) + mdb_idl_sort( cx->tmp, cx->buf ); + if ( cx->mdb->bi_idl_cache_max_size && !MDB_IDL_IS_ZERO( cx->tmp )) { + char *ptr = ((char *)&cx->id)-1; + cx->key.data = ptr; + cx->key.size = sizeof(ID)+1; + *ptr = DN_ONE_PREFIX; + mdb_idl_cache_put( cx->mdb, cx->db, &cx->key, cx->tmp, cx->rc ); + } + +gotit: + if ( !MDB_IDL_IS_ZERO( cx->tmp )) { + if ( cx->prefix == DN_SUBTREE_PREFIX ) { + mdb_idl_append( cx->ids, cx->tmp ); + cx->need_sort = 1; + if ( !(cx->ei->bei_state & CACHE_ENTRY_NO_GRANDKIDS)) { + ID *save, idcurs; + EntryInfo *ei = cx->ei; + int nokids = 1; + save = cx->op->o_tmpalloc( MDB_IDL_SIZEOF( cx->tmp ), + cx->op->o_tmpmemctx ); + MDB_IDL_CPY( save, cx->tmp ); + + idcurs = 0; + cx->depth++; + for ( cx->id = mdb_idl_first( save, &idcurs ); + cx->id != NOID; + cx->id = mdb_idl_next( save, &idcurs )) { + EntryInfo *ei2; + cx->ei = NULL; + if ( mdb_cache_find_id( cx->op, cx->txn, cx->id, &cx->ei, + ID_NOENTRY, NULL )) + continue; + if ( cx->ei ) { + ei2 = cx->ei; + if ( !( ei2->bei_state & CACHE_ENTRY_NO_KIDS )) { + MDB_ID2DISK( cx->id, &cx->nid ); + mdb_dn2idl_internal( cx ); + if ( !MDB_IDL_IS_ZERO( cx->tmp )) + nokids = 0; + } + mdb_cache_entryinfo_lock( ei2 ); + ei2->bei_finders--; + mdb_cache_entryinfo_unlock( ei2 ); + } + } + cx->depth--; + cx->op->o_tmpfree( save, cx->op->o_tmpmemctx ); + if ( nokids ) { + mdb_cache_entryinfo_lock( ei ); + ei->bei_state |= CACHE_ENTRY_NO_GRANDKIDS; + mdb_cache_entryinfo_unlock( ei ); + } + } + /* Make sure caller knows it had kids! */ + cx->tmp[0]=1; + + cx->rc = 0; + } else { + MDB_IDL_CPY( cx->ids, cx->tmp ); + } + } + return cx->rc; +} + +int +mdb_dn2idl( + Operation *op, + DB_TXN *txn, + struct berval *ndn, + EntryInfo *ei, + ID *ids, + ID *stack ) +{ + struct mdb_info *mdb = (struct mdb_info *)op->o_bd->be_private; + struct dn2id_cookie cx; + + Debug( LDAP_DEBUG_TRACE, "=> mdb_dn2idl(\"%s\")\n", + ndn->bv_val, 0, 0 ); + +#ifndef MDB_MULTIPLE_SUFFIXES + if ( op->ors_scope != LDAP_SCOPE_ONELEVEL && + ( ei->bei_id == 0 || + ( ei->bei_parent->bei_id == 0 && op->o_bd->be_suffix[0].bv_len ))) + { + MDB_IDL_ALL( mdb, ids ); + return 0; + } +#endif + + cx.id = ei->bei_id; + MDB_ID2DISK( cx.id, &cx.nid ); + cx.ei = ei; + cx.mdb = mdb; + cx.db = cx.mdb->bi_dn2id->bdi_db; + cx.prefix = (op->ors_scope == LDAP_SCOPE_ONELEVEL) ? + DN_ONE_PREFIX : DN_SUBTREE_PREFIX; + cx.ids = ids; + cx.tmp = stack; + cx.buf = stack + MDB_IDL_UM_SIZE; + cx.op = op; + cx.txn = txn; + cx.need_sort = 0; + cx.depth = 0; + + if ( cx.prefix == DN_SUBTREE_PREFIX ) { + ids[0] = 1; + ids[1] = cx.id; + } else { + MDB_IDL_ZERO( ids ); + } + if ( cx.ei->bei_state & CACHE_ENTRY_NO_KIDS ) + return LDAP_SUCCESS; + + DBTzero(&cx.key); + cx.key.ulen = sizeof(ID); + cx.key.size = sizeof(ID); + cx.key.flags = DB_DBT_USERMEM; + + DBTzero(&cx.data); + + mdb_dn2idl_internal(&cx); + if ( cx.need_sort ) { + char *ptr = ((char *)&cx.id)-1; + if ( !MDB_IDL_IS_RANGE( cx.ids ) && cx.ids[0] > 3 ) + mdb_idl_sort( cx.ids, cx.tmp ); + cx.key.data = ptr; + cx.key.size = sizeof(ID)+1; + *ptr = cx.prefix; + cx.id = ei->bei_id; + if ( cx.mdb->bi_idl_cache_max_size ) + mdb_idl_cache_put( cx.mdb, cx.db, &cx.key, cx.ids, cx.rc ); + } + + if ( cx.rc == DB_NOTFOUND ) + cx.rc = LDAP_SUCCESS; + + return cx.rc; +} diff --git a/servers/slapd/back-mdb/error.c b/servers/slapd/back-mdb/error.c new file mode 100644 index 0000000000..14a06726de --- /dev/null +++ b/servers/slapd/back-mdb/error.c @@ -0,0 +1,62 @@ +/* error.c - MDB errcall routine */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "slap.h" +#include "back-mdb.h" + +#if DB_VERSION_FULL < 0x04030000 +void mdb_errcall( const char *pfx, char * msg ) +#else +void mdb_errcall( const DB_ENV *env, const char *pfx, const char * msg ) +#endif +{ +#ifdef HAVE_EBCDIC + if ( msg[0] > 0x7f ) + __etoa( msg ); +#endif + Debug( LDAP_DEBUG_ANY, "mdb(%s): %s\n", pfx, msg, 0 ); +} + +#if DB_VERSION_FULL >= 0x04030000 +void mdb_msgcall( const DB_ENV *env, const char *msg ) +{ +#ifdef HAVE_EBCDIC + if ( msg[0] > 0x7f ) + __etoa( msg ); +#endif + Debug( LDAP_DEBUG_TRACE, "mdb: %s\n", msg, 0, 0 ); +} +#endif + +#ifdef HAVE_EBCDIC + +#undef db_strerror + +/* Not re-entrant! */ +char *ebcdic_dberror( int rc ) +{ + static char msg[1024]; + + strcpy( msg, db_strerror( rc ) ); + __etoa( msg ); + return msg; +} +#endif diff --git a/servers/slapd/back-mdb/extended.c b/servers/slapd/back-mdb/extended.c new file mode 100644 index 0000000000..57a057139c --- /dev/null +++ b/servers/slapd/back-mdb/extended.c @@ -0,0 +1,54 @@ +/* extended.c - mdb backend extended routines */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "back-mdb.h" +#include "lber_pvt.h" + +static struct exop { + struct berval *oid; + BI_op_extended *extended; +} exop_table[] = { + { NULL, NULL } +}; + +int +mdb_extended( Operation *op, SlapReply *rs ) +/* struct berval *reqoid, + struct berval *reqdata, + char **rspoid, + struct berval **rspdata, + LDAPControl *** rspctrls, + const char** text, + BerVarray *refs +) */ +{ + int i; + + for( i=0; exop_table[i].extended != NULL; i++ ) { + if( ber_bvcmp( exop_table[i].oid, &op->oq_extended.rs_reqoid ) == 0 ) { + return (exop_table[i].extended)( op, rs ); + } + } + + rs->sr_text = "not supported within naming context"; + return rs->sr_err = LDAP_UNWILLING_TO_PERFORM; +} + diff --git a/servers/slapd/back-mdb/filterindex.c b/servers/slapd/back-mdb/filterindex.c new file mode 100644 index 0000000000..747a6d0c4f --- /dev/null +++ b/servers/slapd/back-mdb/filterindex.c @@ -0,0 +1,1179 @@ +/* filterindex.c - generate the list of candidate entries from a filter */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "back-mdb.h" +#include "idl.h" +#ifdef LDAP_COMP_MATCH +#include +#endif + +static int presence_candidates( + Operation *op, + DB_TXN *rtxn, + AttributeDescription *desc, + ID *ids ); + +static int equality_candidates( + Operation *op, + DB_TXN *rtxn, + AttributeAssertion *ava, + ID *ids, + ID *tmp ); +static int inequality_candidates( + Operation *op, + DB_TXN *rtxn, + AttributeAssertion *ava, + ID *ids, + ID *tmp, + int gtorlt ); +static int approx_candidates( + Operation *op, + DB_TXN *rtxn, + AttributeAssertion *ava, + ID *ids, + ID *tmp ); +static int substring_candidates( + Operation *op, + DB_TXN *rtxn, + SubstringsAssertion *sub, + ID *ids, + ID *tmp ); + +static int list_candidates( + Operation *op, + DB_TXN *rtxn, + Filter *flist, + int ftype, + ID *ids, + ID *tmp, + ID *stack ); + +static int +ext_candidates( + Operation *op, + DB_TXN *rtxn, + MatchingRuleAssertion *mra, + ID *ids, + ID *tmp, + ID *stack); + +#ifdef LDAP_COMP_MATCH +static int +comp_candidates ( + Operation *op, + DB_TXN *rtxn, + MatchingRuleAssertion *mra, + ComponentFilter *f, + ID *ids, + ID *tmp, + ID *stack); + +static int +ava_comp_candidates ( + Operation *op, + DB_TXN *rtxn, + AttributeAssertion *ava, + AttributeAliasing *aa, + ID *ids, + ID *tmp, + ID *stack); +#endif + +int +mdb_filter_candidates( + Operation *op, + DB_TXN *rtxn, + Filter *f, + ID *ids, + ID *tmp, + ID *stack ) +{ + int rc = 0; +#ifdef LDAP_COMP_MATCH + AttributeAliasing *aa; +#endif + Debug( LDAP_DEBUG_FILTER, "=> mdb_filter_candidates\n", 0, 0, 0 ); + + if ( f->f_choice & SLAPD_FILTER_UNDEFINED ) { + MDB_IDL_ZERO( ids ); + goto out; + } + + switch ( f->f_choice ) { + case SLAPD_FILTER_COMPUTED: + switch( f->f_result ) { + case SLAPD_COMPARE_UNDEFINED: + /* This technically is not the same as FALSE, but it + * certainly will produce no matches. + */ + /* FALL THRU */ + case LDAP_COMPARE_FALSE: + MDB_IDL_ZERO( ids ); + break; + case LDAP_COMPARE_TRUE: { + struct mdb_info *mdb = (struct mdb_info *)op->o_bd->be_private; + MDB_IDL_ALL( mdb, ids ); + } break; + case LDAP_SUCCESS: + /* this is a pre-computed scope, leave it alone */ + break; + } + break; + case LDAP_FILTER_PRESENT: + Debug( LDAP_DEBUG_FILTER, "\tPRESENT\n", 0, 0, 0 ); + rc = presence_candidates( op, rtxn, f->f_desc, ids ); + break; + + case LDAP_FILTER_EQUALITY: + Debug( LDAP_DEBUG_FILTER, "\tEQUALITY\n", 0, 0, 0 ); +#ifdef LDAP_COMP_MATCH + if ( is_aliased_attribute && ( aa = is_aliased_attribute ( f->f_ava->aa_desc ) ) ) { + rc = ava_comp_candidates ( op, rtxn, f->f_ava, aa, ids, tmp, stack ); + } + else +#endif + { + rc = equality_candidates( op, rtxn, f->f_ava, ids, tmp ); + } + break; + + case LDAP_FILTER_APPROX: + Debug( LDAP_DEBUG_FILTER, "\tAPPROX\n", 0, 0, 0 ); + rc = approx_candidates( op, rtxn, f->f_ava, ids, tmp ); + break; + + case LDAP_FILTER_SUBSTRINGS: + Debug( LDAP_DEBUG_FILTER, "\tSUBSTRINGS\n", 0, 0, 0 ); + rc = substring_candidates( op, rtxn, f->f_sub, ids, tmp ); + break; + + case LDAP_FILTER_GE: + /* if no GE index, use pres */ + Debug( LDAP_DEBUG_FILTER, "\tGE\n", 0, 0, 0 ); + if( f->f_ava->aa_desc->ad_type->sat_ordering && + ( f->f_ava->aa_desc->ad_type->sat_ordering->smr_usage & SLAP_MR_ORDERED_INDEX ) ) + rc = inequality_candidates( op, rtxn, f->f_ava, ids, tmp, LDAP_FILTER_GE ); + else + rc = presence_candidates( op, rtxn, f->f_ava->aa_desc, ids ); + break; + + case LDAP_FILTER_LE: + /* if no LE index, use pres */ + Debug( LDAP_DEBUG_FILTER, "\tLE\n", 0, 0, 0 ); + if( f->f_ava->aa_desc->ad_type->sat_ordering && + ( f->f_ava->aa_desc->ad_type->sat_ordering->smr_usage & SLAP_MR_ORDERED_INDEX ) ) + rc = inequality_candidates( op, rtxn, f->f_ava, ids, tmp, LDAP_FILTER_LE ); + else + rc = presence_candidates( op, rtxn, f->f_ava->aa_desc, ids ); + break; + + case LDAP_FILTER_NOT: + /* no indexing to support NOT filters */ + Debug( LDAP_DEBUG_FILTER, "\tNOT\n", 0, 0, 0 ); + { struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + MDB_IDL_ALL( mdb, ids ); + } + break; + + case LDAP_FILTER_AND: + Debug( LDAP_DEBUG_FILTER, "\tAND\n", 0, 0, 0 ); + rc = list_candidates( op, rtxn, + f->f_and, LDAP_FILTER_AND, ids, tmp, stack ); + break; + + case LDAP_FILTER_OR: + Debug( LDAP_DEBUG_FILTER, "\tOR\n", 0, 0, 0 ); + rc = list_candidates( op, rtxn, + f->f_or, LDAP_FILTER_OR, ids, tmp, stack ); + break; + case LDAP_FILTER_EXT: + Debug( LDAP_DEBUG_FILTER, "\tEXT\n", 0, 0, 0 ); + rc = ext_candidates( op, rtxn, f->f_mra, ids, tmp, stack ); + break; + default: + Debug( LDAP_DEBUG_FILTER, "\tUNKNOWN %lu\n", + (unsigned long) f->f_choice, 0, 0 ); + /* Must not return NULL, otherwise extended filters break */ + { struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + MDB_IDL_ALL( mdb, ids ); + } + } + +out: + Debug( LDAP_DEBUG_FILTER, + "<= mdb_filter_candidates: id=%ld first=%ld last=%ld\n", + (long) ids[0], + (long) MDB_IDL_FIRST( ids ), + (long) MDB_IDL_LAST( ids ) ); + + return rc; +} + +#ifdef LDAP_COMP_MATCH +static int +comp_list_candidates( + Operation *op, + DB_TXN *rtxn, + MatchingRuleAssertion* mra, + ComponentFilter *flist, + int ftype, + ID *ids, + ID *tmp, + ID *save ) +{ + int rc = 0; + ComponentFilter *f; + + Debug( LDAP_DEBUG_FILTER, "=> comp_list_candidates 0x%x\n", ftype, 0, 0 ); + for ( f = flist; f != NULL; f = f->cf_next ) { + /* ignore precomputed scopes */ + if ( f->cf_choice == SLAPD_FILTER_COMPUTED && + f->cf_result == LDAP_SUCCESS ) { + continue; + } + MDB_IDL_ZERO( save ); + rc = comp_candidates( op, rtxn, mra, f, save, tmp, save+MDB_IDL_UM_SIZE ); + + if ( rc != 0 ) { + if ( ftype == LDAP_COMP_FILTER_AND ) { + rc = 0; + continue; + } + break; + } + + if ( ftype == LDAP_COMP_FILTER_AND ) { + if ( f == flist ) { + MDB_IDL_CPY( ids, save ); + } else { + mdb_idl_intersection( ids, save ); + } + if( MDB_IDL_IS_ZERO( ids ) ) + break; + } else { + if ( f == flist ) { + MDB_IDL_CPY( ids, save ); + } else { + mdb_idl_union( ids, save ); + } + } + } + + if( rc == LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_FILTER, + "<= comp_list_candidates: id=%ld first=%ld last=%ld\n", + (long) ids[0], + (long) MDB_IDL_FIRST(ids), + (long) MDB_IDL_LAST(ids) ); + + } else { + Debug( LDAP_DEBUG_FILTER, + "<= comp_list_candidates: undefined rc=%d\n", + rc, 0, 0 ); + } + + return rc; +} + +static int +comp_equality_candidates ( + Operation *op, + DB_TXN *rtxn, + MatchingRuleAssertion *mra, + ComponentAssertion *ca, + ID *ids, + ID *tmp, + ID *stack) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + DB *db; + int i; + int rc; + slap_mask_t mask; + struct berval prefix = {0, NULL}; + struct berval *keys = NULL; + MatchingRule *mr = mra->ma_rule; + Syntax *sat_syntax; + ComponentReference* cr_list, *cr; + AttrInfo *ai; + + MDB_IDL_ALL( mdb, ids ); + + if ( !ca->ca_comp_ref ) + return 0; + + ai = mdb_attr_mask( op->o_bd->be_private, mra->ma_desc ); + if( ai ) { + cr_list = ai->ai_cr; + } + else { + return 0; + } + /* find a component reference to be indexed */ + sat_syntax = ca->ca_ma_rule->smr_syntax; + for ( cr = cr_list ; cr ; cr = cr->cr_next ) { + if ( cr->cr_string.bv_len == ca->ca_comp_ref->cr_string.bv_len && + strncmp( cr->cr_string.bv_val, ca->ca_comp_ref->cr_string.bv_val,cr->cr_string.bv_len ) == 0 ) + break; + } + + if ( !cr ) + return 0; + + rc = mdb_index_param( op->o_bd, mra->ma_desc, LDAP_FILTER_EQUALITY, + &db, &mask, &prefix ); + + if( rc != LDAP_SUCCESS ) { + return 0; + } + + if( !mr ) { + return 0; + } + + if( !mr->smr_filter ) { + return 0; + } + + rc = (ca->ca_ma_rule->smr_filter)( + LDAP_FILTER_EQUALITY, + cr->cr_indexmask, + sat_syntax, + ca->ca_ma_rule, + &prefix, + &ca->ca_ma_value, + &keys, op->o_tmpmemctx ); + + if( rc != LDAP_SUCCESS ) { + return 0; + } + + if( keys == NULL ) { + return 0; + } + for ( i= 0; keys[i].bv_val != NULL; i++ ) { + rc = mdb_key_read( op->o_bd, db, rtxn, &keys[i], tmp, NULL, 0 ); + + if( rc == DB_NOTFOUND ) { + MDB_IDL_ZERO( ids ); + rc = 0; + break; + } else if( rc != LDAP_SUCCESS ) { + break; + } + + if( MDB_IDL_IS_ZERO( tmp ) ) { + MDB_IDL_ZERO( ids ); + break; + } + + if ( i == 0 ) { + MDB_IDL_CPY( ids, tmp ); + } else { + mdb_idl_intersection( ids, tmp ); + } + + if( MDB_IDL_IS_ZERO( ids ) ) + break; + } + ber_bvarray_free_x( keys, op->o_tmpmemctx ); + + Debug( LDAP_DEBUG_TRACE, + "<= comp_equality_candidates: id=%ld, first=%ld, last=%ld\n", + (long) ids[0], + (long) MDB_IDL_FIRST(ids), + (long) MDB_IDL_LAST(ids) ); + return( rc ); +} + +static int +ava_comp_candidates ( + Operation *op, + DB_TXN *rtxn, + AttributeAssertion *ava, + AttributeAliasing *aa, + ID *ids, + ID *tmp, + ID *stack ) +{ + MatchingRuleAssertion mra; + + mra.ma_rule = ava->aa_desc->ad_type->sat_equality; + if ( !mra.ma_rule ) { + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + MDB_IDL_ALL( mdb, ids ); + return 0; + } + mra.ma_desc = aa->aa_aliased_ad; + mra.ma_rule = ava->aa_desc->ad_type->sat_equality; + + return comp_candidates ( op, rtxn, &mra, ava->aa_cf, ids, tmp, stack ); +} + +static int +comp_candidates ( + Operation *op, + DB_TXN *rtxn, + MatchingRuleAssertion *mra, + ComponentFilter *f, + ID *ids, + ID *tmp, + ID *stack) +{ + int rc; + + if ( !f ) return LDAP_PROTOCOL_ERROR; + + Debug( LDAP_DEBUG_FILTER, "comp_candidates\n", 0, 0, 0 ); + switch ( f->cf_choice ) { + case SLAPD_FILTER_COMPUTED: + rc = f->cf_result; + break; + case LDAP_COMP_FILTER_AND: + rc = comp_list_candidates( op, rtxn, mra, f->cf_and, LDAP_COMP_FILTER_AND, ids, tmp, stack ); + break; + case LDAP_COMP_FILTER_OR: + rc = comp_list_candidates( op, rtxn, mra, f->cf_or, LDAP_COMP_FILTER_OR, ids, tmp, stack ); + break; + case LDAP_COMP_FILTER_NOT: + /* No component indexing supported for NOT filter */ + Debug( LDAP_DEBUG_FILTER, "\tComponent NOT\n", 0, 0, 0 ); + { + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + MDB_IDL_ALL( mdb, ids ); + } + rc = LDAP_PROTOCOL_ERROR; + break; + case LDAP_COMP_FILTER_ITEM: + rc = comp_equality_candidates( op, rtxn, mra, f->cf_ca, ids, tmp, stack ); + break; + default: + { + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + MDB_IDL_ALL( mdb, ids ); + } + rc = LDAP_PROTOCOL_ERROR; + } + + return( rc ); +} +#endif + +static int +ext_candidates( + Operation *op, + DB_TXN *rtxn, + MatchingRuleAssertion *mra, + ID *ids, + ID *tmp, + ID *stack) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + +#ifdef LDAP_COMP_MATCH + /* + * Currently Only Component Indexing for componentFilterMatch is supported + * Indexing for an extensible filter is not supported yet + */ + if ( mra->ma_cf ) { + return comp_candidates ( op, rtxn, mra, mra->ma_cf, ids, tmp, stack); + } +#endif + if ( mra->ma_desc == slap_schema.si_ad_entryDN ) { + int rc; + EntryInfo *ei; + + MDB_IDL_ZERO( ids ); + if ( mra->ma_rule == slap_schema.si_mr_distinguishedNameMatch ) { + ei = NULL; + rc = mdb_cache_find_ndn( op, rtxn, &mra->ma_value, &ei ); + if ( rc == LDAP_SUCCESS ) + mdb_idl_insert( ids, ei->bei_id ); + if ( ei ) + mdb_cache_entryinfo_unlock( ei ); + return 0; + } else if ( mra->ma_rule && mra->ma_rule->smr_match == + dnRelativeMatch && dnIsSuffix( &mra->ma_value, + op->o_bd->be_nsuffix )) { + int scope; + if ( mra->ma_rule == slap_schema.si_mr_dnSuperiorMatch ) { + struct berval pdn; + ei = NULL; + dnParent( &mra->ma_value, &pdn ); + mdb_cache_find_ndn( op, rtxn, &pdn, &ei ); + if ( ei ) { + mdb_cache_entryinfo_unlock( ei ); + while ( ei && ei->bei_id ) { + mdb_idl_insert( ids, ei->bei_id ); + ei = ei->bei_parent; + } + } + return 0; + } + if ( mra->ma_rule == slap_schema.si_mr_dnSubtreeMatch ) + scope = LDAP_SCOPE_SUBTREE; + else if ( mra->ma_rule == slap_schema.si_mr_dnOneLevelMatch ) + scope = LDAP_SCOPE_ONELEVEL; + else if ( mra->ma_rule == slap_schema.si_mr_dnSubordinateMatch ) + scope = LDAP_SCOPE_SUBORDINATE; + else + scope = LDAP_SCOPE_BASE; + if ( scope > LDAP_SCOPE_BASE ) { + ei = NULL; + rc = mdb_cache_find_ndn( op, rtxn, &mra->ma_value, &ei ); + if ( ei ) + mdb_cache_entryinfo_unlock( ei ); + if ( rc == LDAP_SUCCESS ) { + int sc = op->ors_scope; + op->ors_scope = scope; + rc = mdb_dn2idl( op, rtxn, &mra->ma_value, ei, ids, + stack ); + op->ors_scope = sc; + } + return 0; + } + } + } + + MDB_IDL_ALL( mdb, ids ); + return 0; +} + +static int +list_candidates( + Operation *op, + DB_TXN *rtxn, + Filter *flist, + int ftype, + ID *ids, + ID *tmp, + ID *save ) +{ + int rc = 0; + Filter *f; + + Debug( LDAP_DEBUG_FILTER, "=> mdb_list_candidates 0x%x\n", ftype, 0, 0 ); + for ( f = flist; f != NULL; f = f->f_next ) { + /* ignore precomputed scopes */ + if ( f->f_choice == SLAPD_FILTER_COMPUTED && + f->f_result == LDAP_SUCCESS ) { + continue; + } + MDB_IDL_ZERO( save ); + rc = mdb_filter_candidates( op, rtxn, f, save, tmp, + save+MDB_IDL_UM_SIZE ); + + if ( rc != 0 ) { + if ( rc == DB_LOCK_DEADLOCK ) + return rc; + + if ( ftype == LDAP_FILTER_AND ) { + rc = 0; + continue; + } + break; + } + + + if ( ftype == LDAP_FILTER_AND ) { + if ( f == flist ) { + MDB_IDL_CPY( ids, save ); + } else { + mdb_idl_intersection( ids, save ); + } + if( MDB_IDL_IS_ZERO( ids ) ) + break; + } else { + if ( f == flist ) { + MDB_IDL_CPY( ids, save ); + } else { + mdb_idl_union( ids, save ); + } + } + } + + if( rc == LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_FILTER, + "<= mdb_list_candidates: id=%ld first=%ld last=%ld\n", + (long) ids[0], + (long) MDB_IDL_FIRST(ids), + (long) MDB_IDL_LAST(ids) ); + + } else { + Debug( LDAP_DEBUG_FILTER, + "<= mdb_list_candidates: undefined rc=%d\n", + rc, 0, 0 ); + } + + return rc; +} + +static int +presence_candidates( + Operation *op, + DB_TXN *rtxn, + AttributeDescription *desc, + ID *ids ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + DB *db; + int rc; + slap_mask_t mask; + struct berval prefix = {0, NULL}; + + Debug( LDAP_DEBUG_TRACE, "=> mdb_presence_candidates (%s)\n", + desc->ad_cname.bv_val, 0, 0 ); + + MDB_IDL_ALL( mdb, ids ); + + if( desc == slap_schema.si_ad_objectClass ) { + return 0; + } + + rc = mdb_index_param( op->o_bd, desc, LDAP_FILTER_PRESENT, + &db, &mask, &prefix ); + + if( rc == LDAP_INAPPROPRIATE_MATCHING ) { + /* not indexed */ + Debug( LDAP_DEBUG_TRACE, + "<= mdb_presence_candidates: (%s) not indexed\n", + desc->ad_cname.bv_val, 0, 0 ); + return 0; + } + + if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_presence_candidates: (%s) index_param " + "returned=%d\n", + desc->ad_cname.bv_val, rc, 0 ); + return 0; + } + + if( prefix.bv_val == NULL ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_presence_candidates: (%s) no prefix\n", + desc->ad_cname.bv_val, 0, 0 ); + return -1; + } + + rc = mdb_key_read( op->o_bd, db, rtxn, &prefix, ids, NULL, 0 ); + + if( rc == DB_NOTFOUND ) { + MDB_IDL_ZERO( ids ); + rc = 0; + } else if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_presense_candidates: (%s) " + "key read failed (%d)\n", + desc->ad_cname.bv_val, rc, 0 ); + goto done; + } + + Debug(LDAP_DEBUG_TRACE, + "<= mdb_presence_candidates: id=%ld first=%ld last=%ld\n", + (long) ids[0], + (long) MDB_IDL_FIRST(ids), + (long) MDB_IDL_LAST(ids) ); + +done: + return rc; +} + +static int +equality_candidates( + Operation *op, + DB_TXN *rtxn, + AttributeAssertion *ava, + ID *ids, + ID *tmp ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + DB *db; + int i; + int rc; + slap_mask_t mask; + struct berval prefix = {0, NULL}; + struct berval *keys = NULL; + MatchingRule *mr; + + Debug( LDAP_DEBUG_TRACE, "=> mdb_equality_candidates (%s)\n", + ava->aa_desc->ad_cname.bv_val, 0, 0 ); + + if ( ava->aa_desc == slap_schema.si_ad_entryDN ) { + EntryInfo *ei = NULL; + rc = mdb_cache_find_ndn( op, rtxn, &ava->aa_value, &ei ); + if ( rc == LDAP_SUCCESS ) { + /* exactly one ID can match */ + ids[0] = 1; + ids[1] = ei->bei_id; + } + if ( ei ) { + mdb_cache_entryinfo_unlock( ei ); + } + return rc; + } + + MDB_IDL_ALL( mdb, ids ); + + rc = mdb_index_param( op->o_bd, ava->aa_desc, LDAP_FILTER_EQUALITY, + &db, &mask, &prefix ); + + if ( rc == LDAP_INAPPROPRIATE_MATCHING ) { + Debug( LDAP_DEBUG_ANY, + "<= mdb_equality_candidates: (%s) not indexed\n", + ava->aa_desc->ad_cname.bv_val, 0, 0 ); + return 0; + } + + if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_ANY, + "<= mdb_equality_candidates: (%s) " + "index_param failed (%d)\n", + ava->aa_desc->ad_cname.bv_val, rc, 0 ); + return 0; + } + + mr = ava->aa_desc->ad_type->sat_equality; + if( !mr ) { + return 0; + } + + if( !mr->smr_filter ) { + return 0; + } + + rc = (mr->smr_filter)( + LDAP_FILTER_EQUALITY, + mask, + ava->aa_desc->ad_type->sat_syntax, + mr, + &prefix, + &ava->aa_value, + &keys, op->o_tmpmemctx ); + + if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_equality_candidates: (%s, %s) " + "MR filter failed (%d)\n", + prefix.bv_val, ava->aa_desc->ad_cname.bv_val, rc ); + return 0; + } + + if( keys == NULL ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_equality_candidates: (%s) no keys\n", + ava->aa_desc->ad_cname.bv_val, 0, 0 ); + return 0; + } + + for ( i= 0; keys[i].bv_val != NULL; i++ ) { + rc = mdb_key_read( op->o_bd, db, rtxn, &keys[i], tmp, NULL, 0 ); + + if( rc == DB_NOTFOUND ) { + MDB_IDL_ZERO( ids ); + rc = 0; + break; + } else if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_equality_candidates: (%s) " + "key read failed (%d)\n", + ava->aa_desc->ad_cname.bv_val, rc, 0 ); + break; + } + + if( MDB_IDL_IS_ZERO( tmp ) ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_equality_candidates: (%s) NULL\n", + ava->aa_desc->ad_cname.bv_val, 0, 0 ); + MDB_IDL_ZERO( ids ); + break; + } + + if ( i == 0 ) { + MDB_IDL_CPY( ids, tmp ); + } else { + mdb_idl_intersection( ids, tmp ); + } + + if( MDB_IDL_IS_ZERO( ids ) ) + break; + } + + ber_bvarray_free_x( keys, op->o_tmpmemctx ); + + Debug( LDAP_DEBUG_TRACE, + "<= mdb_equality_candidates: id=%ld, first=%ld, last=%ld\n", + (long) ids[0], + (long) MDB_IDL_FIRST(ids), + (long) MDB_IDL_LAST(ids) ); + return( rc ); +} + + +static int +approx_candidates( + Operation *op, + DB_TXN *rtxn, + AttributeAssertion *ava, + ID *ids, + ID *tmp ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + DB *db; + int i; + int rc; + slap_mask_t mask; + struct berval prefix = {0, NULL}; + struct berval *keys = NULL; + MatchingRule *mr; + + Debug( LDAP_DEBUG_TRACE, "=> mdb_approx_candidates (%s)\n", + ava->aa_desc->ad_cname.bv_val, 0, 0 ); + + MDB_IDL_ALL( mdb, ids ); + + rc = mdb_index_param( op->o_bd, ava->aa_desc, LDAP_FILTER_APPROX, + &db, &mask, &prefix ); + + if ( rc == LDAP_INAPPROPRIATE_MATCHING ) { + Debug( LDAP_DEBUG_ANY, + "<= mdb_approx_candidates: (%s) not indexed\n", + ava->aa_desc->ad_cname.bv_val, 0, 0 ); + return 0; + } + + if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_ANY, + "<= mdb_approx_candidates: (%s) " + "index_param failed (%d)\n", + ava->aa_desc->ad_cname.bv_val, rc, 0 ); + return 0; + } + + mr = ava->aa_desc->ad_type->sat_approx; + if( !mr ) { + /* no approx matching rule, try equality matching rule */ + mr = ava->aa_desc->ad_type->sat_equality; + } + + if( !mr ) { + return 0; + } + + if( !mr->smr_filter ) { + return 0; + } + + rc = (mr->smr_filter)( + LDAP_FILTER_APPROX, + mask, + ava->aa_desc->ad_type->sat_syntax, + mr, + &prefix, + &ava->aa_value, + &keys, op->o_tmpmemctx ); + + if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_approx_candidates: (%s, %s) " + "MR filter failed (%d)\n", + prefix.bv_val, ava->aa_desc->ad_cname.bv_val, rc ); + return 0; + } + + if( keys == NULL ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_approx_candidates: (%s) no keys (%s)\n", + prefix.bv_val, ava->aa_desc->ad_cname.bv_val, 0 ); + return 0; + } + + for ( i= 0; keys[i].bv_val != NULL; i++ ) { + rc = mdb_key_read( op->o_bd, db, rtxn, &keys[i], tmp, NULL, 0 ); + + if( rc == DB_NOTFOUND ) { + MDB_IDL_ZERO( ids ); + rc = 0; + break; + } else if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_approx_candidates: (%s) " + "key read failed (%d)\n", + ava->aa_desc->ad_cname.bv_val, rc, 0 ); + break; + } + + if( MDB_IDL_IS_ZERO( tmp ) ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_approx_candidates: (%s) NULL\n", + ava->aa_desc->ad_cname.bv_val, 0, 0 ); + MDB_IDL_ZERO( ids ); + break; + } + + if ( i == 0 ) { + MDB_IDL_CPY( ids, tmp ); + } else { + mdb_idl_intersection( ids, tmp ); + } + + if( MDB_IDL_IS_ZERO( ids ) ) + break; + } + + ber_bvarray_free_x( keys, op->o_tmpmemctx ); + + Debug( LDAP_DEBUG_TRACE, "<= mdb_approx_candidates %ld, first=%ld, last=%ld\n", + (long) ids[0], + (long) MDB_IDL_FIRST(ids), + (long) MDB_IDL_LAST(ids) ); + return( rc ); +} + +static int +substring_candidates( + Operation *op, + DB_TXN *rtxn, + SubstringsAssertion *sub, + ID *ids, + ID *tmp ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + DB *db; + int i; + int rc; + slap_mask_t mask; + struct berval prefix = {0, NULL}; + struct berval *keys = NULL; + MatchingRule *mr; + + Debug( LDAP_DEBUG_TRACE, "=> mdb_substring_candidates (%s)\n", + sub->sa_desc->ad_cname.bv_val, 0, 0 ); + + MDB_IDL_ALL( mdb, ids ); + + rc = mdb_index_param( op->o_bd, sub->sa_desc, LDAP_FILTER_SUBSTRINGS, + &db, &mask, &prefix ); + + if ( rc == LDAP_INAPPROPRIATE_MATCHING ) { + Debug( LDAP_DEBUG_ANY, + "<= mdb_substring_candidates: (%s) not indexed\n", + sub->sa_desc->ad_cname.bv_val, 0, 0 ); + return 0; + } + + if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_ANY, + "<= mdb_substring_candidates: (%s) " + "index_param failed (%d)\n", + sub->sa_desc->ad_cname.bv_val, rc, 0 ); + return 0; + } + + mr = sub->sa_desc->ad_type->sat_substr; + + if( !mr ) { + return 0; + } + + if( !mr->smr_filter ) { + return 0; + } + + rc = (mr->smr_filter)( + LDAP_FILTER_SUBSTRINGS, + mask, + sub->sa_desc->ad_type->sat_syntax, + mr, + &prefix, + sub, + &keys, op->o_tmpmemctx ); + + if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_substring_candidates: (%s) " + "MR filter failed (%d)\n", + sub->sa_desc->ad_cname.bv_val, rc, 0 ); + return 0; + } + + if( keys == NULL ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_substring_candidates: (0x%04lx) no keys (%s)\n", + mask, sub->sa_desc->ad_cname.bv_val, 0 ); + return 0; + } + + for ( i= 0; keys[i].bv_val != NULL; i++ ) { + rc = mdb_key_read( op->o_bd, db, rtxn, &keys[i], tmp, NULL, 0 ); + + if( rc == DB_NOTFOUND ) { + MDB_IDL_ZERO( ids ); + rc = 0; + break; + } else if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_substring_candidates: (%s) " + "key read failed (%d)\n", + sub->sa_desc->ad_cname.bv_val, rc, 0 ); + break; + } + + if( MDB_IDL_IS_ZERO( tmp ) ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_substring_candidates: (%s) NULL\n", + sub->sa_desc->ad_cname.bv_val, 0, 0 ); + MDB_IDL_ZERO( ids ); + break; + } + + if ( i == 0 ) { + MDB_IDL_CPY( ids, tmp ); + } else { + mdb_idl_intersection( ids, tmp ); + } + + if( MDB_IDL_IS_ZERO( ids ) ) + break; + } + + ber_bvarray_free_x( keys, op->o_tmpmemctx ); + + Debug( LDAP_DEBUG_TRACE, "<= mdb_substring_candidates: %ld, first=%ld, last=%ld\n", + (long) ids[0], + (long) MDB_IDL_FIRST(ids), + (long) MDB_IDL_LAST(ids) ); + return( rc ); +} + +static int +inequality_candidates( + Operation *op, + DB_TXN *rtxn, + AttributeAssertion *ava, + ID *ids, + ID *tmp, + int gtorlt ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + DB *db; + int rc; + slap_mask_t mask; + struct berval prefix = {0, NULL}; + struct berval *keys = NULL; + MatchingRule *mr; + DBC * cursor = NULL; + + Debug( LDAP_DEBUG_TRACE, "=> mdb_inequality_candidates (%s)\n", + ava->aa_desc->ad_cname.bv_val, 0, 0 ); + + MDB_IDL_ALL( mdb, ids ); + + rc = mdb_index_param( op->o_bd, ava->aa_desc, LDAP_FILTER_EQUALITY, + &db, &mask, &prefix ); + + if ( rc == LDAP_INAPPROPRIATE_MATCHING ) { + Debug( LDAP_DEBUG_ANY, + "<= mdb_inequality_candidates: (%s) not indexed\n", + ava->aa_desc->ad_cname.bv_val, 0, 0 ); + return 0; + } + + if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_ANY, + "<= mdb_inequality_candidates: (%s) " + "index_param failed (%d)\n", + ava->aa_desc->ad_cname.bv_val, rc, 0 ); + return 0; + } + + mr = ava->aa_desc->ad_type->sat_equality; + if( !mr ) { + return 0; + } + + if( !mr->smr_filter ) { + return 0; + } + + rc = (mr->smr_filter)( + LDAP_FILTER_EQUALITY, + mask, + ava->aa_desc->ad_type->sat_syntax, + mr, + &prefix, + &ava->aa_value, + &keys, op->o_tmpmemctx ); + + if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_inequality_candidates: (%s, %s) " + "MR filter failed (%d)\n", + prefix.bv_val, ava->aa_desc->ad_cname.bv_val, rc ); + return 0; + } + + if( keys == NULL ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_inequality_candidates: (%s) no keys\n", + ava->aa_desc->ad_cname.bv_val, 0, 0 ); + return 0; + } + + MDB_IDL_ZERO( ids ); + while(1) { + rc = mdb_key_read( op->o_bd, db, rtxn, &keys[0], tmp, &cursor, gtorlt ); + + if( rc == DB_NOTFOUND ) { + rc = 0; + break; + } else if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_inequality_candidates: (%s) " + "key read failed (%d)\n", + ava->aa_desc->ad_cname.bv_val, rc, 0 ); + break; + } + + if( MDB_IDL_IS_ZERO( tmp ) ) { + Debug( LDAP_DEBUG_TRACE, + "<= mdb_inequality_candidates: (%s) NULL\n", + ava->aa_desc->ad_cname.bv_val, 0, 0 ); + break; + } + + mdb_idl_union( ids, tmp ); + + if( op->ors_limit && op->ors_limit->lms_s_unchecked != -1 && + MDB_IDL_N( ids ) >= (unsigned) op->ors_limit->lms_s_unchecked ) { + cursor->c_close( cursor ); + break; + } + } + ber_bvarray_free_x( keys, op->o_tmpmemctx ); + + Debug( LDAP_DEBUG_TRACE, + "<= mdb_inequality_candidates: id=%ld, first=%ld, last=%ld\n", + (long) ids[0], + (long) MDB_IDL_FIRST(ids), + (long) MDB_IDL_LAST(ids) ); + return( rc ); +} diff --git a/servers/slapd/back-mdb/id2entry.c b/servers/slapd/back-mdb/id2entry.c new file mode 100644 index 0000000000..951cb0b639 --- /dev/null +++ b/servers/slapd/back-mdb/id2entry.c @@ -0,0 +1,440 @@ +/* id2entry.c - routines to deal with the id2entry database */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include +#include + +#include "back-mdb.h" + +static int mdb_id2entry_put( + BackendDB *be, + DB_TXN *tid, + Entry *e, + int flag ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + DB *db = mdb->bi_id2entry->bdi_db; + DBT key, data; + struct berval bv; + int rc; + ID nid; +#ifdef MDB_HIER + struct berval odn, ondn; + + /* We only store rdns, and they go in the dn2id database. */ + + odn = e->e_name; ondn = e->e_nname; + + e->e_name = slap_empty_bv; + e->e_nname = slap_empty_bv; +#endif + DBTzero( &key ); + + /* Store ID in BigEndian format */ + key.data = &nid; + key.size = sizeof(ID); + MDB_ID2DISK( e->e_id, &nid ); + + rc = entry_encode( e, &bv ); +#ifdef MDB_HIER + e->e_name = odn; e->e_nname = ondn; +#endif + if( rc != LDAP_SUCCESS ) { + return -1; + } + + DBTzero( &data ); + bv2DBT( &bv, &data ); + + rc = db->put( db, tid, &key, &data, flag ); + + free( bv.bv_val ); + return rc; +} + +/* + * This routine adds (or updates) an entry on disk. + * The cache should be already be updated. + */ + + +int mdb_id2entry_add( + BackendDB *be, + DB_TXN *tid, + Entry *e ) +{ + return mdb_id2entry_put(be, tid, e, DB_NOOVERWRITE); +} + +int mdb_id2entry_update( + BackendDB *be, + DB_TXN *tid, + Entry *e ) +{ + return mdb_id2entry_put(be, tid, e, 0); +} + +int mdb_id2entry( + BackendDB *be, + DB_TXN *tid, + ID id, + Entry **e ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + DB *db = mdb->bi_id2entry->bdi_db; + DBT key, data; + DBC *cursor; + EntryHeader eh; + char buf[16]; + int rc = 0, off; + ID nid; + + *e = NULL; + + DBTzero( &key ); + key.data = &nid; + key.size = sizeof(ID); + MDB_ID2DISK( id, &nid ); + + DBTzero( &data ); + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + + /* fetch it */ + rc = db->cursor( db, tid, &cursor, mdb->bi_db_opflags ); + if ( rc ) return rc; + + /* Get the nattrs / nvals counts first */ + data.ulen = data.dlen = sizeof(buf); + data.data = buf; + rc = cursor->c_get( cursor, &key, &data, DB_SET ); + if ( rc ) goto finish; + + + eh.bv.bv_val = buf; + eh.bv.bv_len = data.size; + rc = entry_header( &eh ); + if ( rc ) goto finish; + + /* Get the size */ + data.flags ^= DB_DBT_PARTIAL; + data.ulen = 0; + rc = cursor->c_get( cursor, &key, &data, DB_CURRENT ); + if ( rc != DB_BUFFER_SMALL ) goto finish; + + /* Allocate a block and retrieve the data */ + off = eh.data - eh.bv.bv_val; + eh.bv.bv_len = eh.nvals * sizeof( struct berval ) + data.size; + eh.bv.bv_val = ch_malloc( eh.bv.bv_len ); + eh.data = eh.bv.bv_val + eh.nvals * sizeof( struct berval ); + data.data = eh.data; + data.ulen = data.size; + + /* skip past already parsed nattr/nvals */ + eh.data += off; + + rc = cursor->c_get( cursor, &key, &data, DB_CURRENT ); + +finish: + cursor->c_close( cursor ); + + if( rc != 0 ) { + return rc; + } + +#ifdef SLAP_ZONE_ALLOC + rc = entry_decode(&eh, e, mdb->bi_cache.c_zctx); +#else + rc = entry_decode(&eh, e); +#endif + + if( rc == 0 ) { + (*e)->e_id = id; + } else { + /* only free on error. On success, the entry was + * decoded in place. + */ +#ifndef SLAP_ZONE_ALLOC + ch_free(eh.bv.bv_val); +#endif + } +#ifdef SLAP_ZONE_ALLOC + ch_free(eh.bv.bv_val); +#endif + + return rc; +} + +int mdb_id2entry_delete( + BackendDB *be, + DB_TXN *tid, + Entry *e ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + DB *db = mdb->bi_id2entry->bdi_db; + DBT key; + int rc; + ID nid; + + DBTzero( &key ); + key.data = &nid; + key.size = sizeof(ID); + MDB_ID2DISK( e->e_id, &nid ); + + /* delete from database */ + rc = db->del( db, tid, &key, 0 ); + + return rc; +} + +int mdb_entry_return( + Entry *e +) +{ + /* Our entries are allocated in two blocks; the data comes from + * the db itself and the Entry structure and associated pointers + * are allocated in entry_decode. The db data pointer is saved + * in e_bv. + */ + if ( e->e_bv.bv_val ) { + /* See if the DNs were changed by modrdn */ + if( e->e_nname.bv_val < e->e_bv.bv_val || e->e_nname.bv_val > + e->e_bv.bv_val + e->e_bv.bv_len ) { + ch_free(e->e_name.bv_val); + ch_free(e->e_nname.bv_val); + } + e->e_name.bv_val = NULL; + e->e_nname.bv_val = NULL; + /* In tool mode the e_bv buffer is realloc'd, leave it alone */ + if( !(slapMode & SLAP_TOOL_MODE) ) { + free( e->e_bv.bv_val ); + } + BER_BVZERO( &e->e_bv ); + } + entry_free( e ); + return 0; +} + +int mdb_entry_release( + Operation *op, + Entry *e, + int rw ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + struct mdb_op_info *boi; + OpExtra *oex; + + /* slapMode : SLAP_SERVER_MODE, SLAP_TOOL_MODE, + SLAP_TRUNCATE_MODE, SLAP_UNDEFINED_MODE */ + + if ( slapMode == SLAP_SERVER_MODE ) { + /* If not in our cache, just free it */ + if ( !e->e_private ) { +#ifdef SLAP_ZONE_ALLOC + return mdb_entry_return( mdb, e, -1 ); +#else + return mdb_entry_return( e ); +#endif + } + /* free entry and reader or writer lock */ + LDAP_SLIST_FOREACH( oex, &op->o_extra, oe_next ) { + if ( oex->oe_key == mdb ) break; + } + boi = (struct mdb_op_info *)oex; + + /* lock is freed with txn */ + if ( !boi || boi->boi_txn ) { + mdb_unlocked_cache_return_entry_rw( mdb, e, rw ); + } else { + struct mdb_lock_info *bli, *prev; + for ( prev=(struct mdb_lock_info *)&boi->boi_locks, + bli = boi->boi_locks; bli; prev=bli, bli=bli->bli_next ) { + if ( bli->bli_id == e->e_id ) { + mdb_cache_return_entry_rw( mdb, e, rw, &bli->bli_lock ); + prev->bli_next = bli->bli_next; + /* Cleanup, or let caller know we unlocked */ + if ( bli->bli_flag & BLI_DONTFREE ) + bli->bli_flag = 0; + else + op->o_tmpfree( bli, op->o_tmpmemctx ); + break; + } + } + if ( !boi->boi_locks ) { + LDAP_SLIST_REMOVE( &op->o_extra, &boi->boi_oe, OpExtra, oe_next ); + if ( !(boi->boi_flag & BOI_DONTFREE)) + op->o_tmpfree( boi, op->o_tmpmemctx ); + } + } + } else { +#ifdef SLAP_ZONE_ALLOC + int zseq = -1; + if (e->e_private != NULL) { + BEI(e)->bei_e = NULL; + zseq = BEI(e)->bei_zseq; + } +#else + if (e->e_private != NULL) + BEI(e)->bei_e = NULL; +#endif + e->e_private = NULL; +#ifdef SLAP_ZONE_ALLOC + mdb_entry_return ( mdb, e, zseq ); +#else + mdb_entry_return ( e ); +#endif + } + + return 0; +} + +/* return LDAP_SUCCESS IFF we can retrieve the specified entry. + */ +int mdb_entry_get( + Operation *op, + struct berval *ndn, + ObjectClass *oc, + AttributeDescription *at, + int rw, + Entry **ent ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + struct mdb_op_info *boi = NULL; + DB_TXN *txn = NULL; + Entry *e = NULL; + EntryInfo *ei; + int rc; + const char *at_name = at ? at->ad_cname.bv_val : "(null)"; + + DB_LOCK lock; + + Debug( LDAP_DEBUG_ARGS, + "=> mdb_entry_get: ndn: \"%s\"\n", ndn->bv_val, 0, 0 ); + Debug( LDAP_DEBUG_ARGS, + "=> mdb_entry_get: oc: \"%s\", at: \"%s\"\n", + oc ? oc->soc_cname.bv_val : "(null)", at_name, 0); + + if( op ) { + OpExtra *oex; + LDAP_SLIST_FOREACH( oex, &op->o_extra, oe_next ) { + if ( oex->oe_key == mdb ) break; + } + boi = (struct mdb_op_info *)oex; + if ( boi ) + txn = boi->boi_txn; + } + + if ( !txn ) { + rc = mdb_reader_get( op, mdb->bi_dbenv, &txn ); + switch(rc) { + case 0: + break; + default: + return LDAP_OTHER; + } + } + +dn2entry_retry: + /* can we find entry */ + rc = mdb_dn2entry( op, txn, ndn, &ei, 0, &lock ); + switch( rc ) { + case DB_NOTFOUND: + case 0: + break; + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + /* the txn must abort and retry */ + if ( txn ) { + if ( boi ) boi->boi_err = rc; + return LDAP_BUSY; + } + ldap_pvt_thread_yield(); + goto dn2entry_retry; + default: + if ( boi ) boi->boi_err = rc; + return (rc != LDAP_BUSY) ? LDAP_OTHER : LDAP_BUSY; + } + if (ei) e = ei->bei_e; + if (e == NULL) { + Debug( LDAP_DEBUG_ACL, + "=> mdb_entry_get: cannot find entry: \"%s\"\n", + ndn->bv_val, 0, 0 ); + return LDAP_NO_SUCH_OBJECT; + } + + Debug( LDAP_DEBUG_ACL, + "=> mdb_entry_get: found entry: \"%s\"\n", + ndn->bv_val, 0, 0 ); + + if ( oc && !is_entry_objectclass( e, oc, 0 )) { + Debug( LDAP_DEBUG_ACL, + "<= mdb_entry_get: failed to find objectClass %s\n", + oc->soc_cname.bv_val, 0, 0 ); + rc = LDAP_NO_SUCH_ATTRIBUTE; + goto return_results; + } + + /* NOTE: attr_find() or attrs_find()? */ + if ( at && attr_find( e->e_attrs, at ) == NULL ) { + Debug( LDAP_DEBUG_ACL, + "<= mdb_entry_get: failed to find attribute %s\n", + at->ad_cname.bv_val, 0, 0 ); + rc = LDAP_NO_SUCH_ATTRIBUTE; + goto return_results; + } + +return_results: + if( rc != LDAP_SUCCESS ) { + /* free entry */ + mdb_cache_return_entry_rw(mdb, e, rw, &lock); + + } else { + if ( slapMode == SLAP_SERVER_MODE ) { + *ent = e; + /* big drag. we need a place to store a read lock so we can + * release it later?? If we're in a txn, nothing is needed + * here because the locks will go away with the txn. + */ + if ( op ) { + if ( !boi ) { + boi = op->o_tmpcalloc(1,sizeof(struct mdb_op_info),op->o_tmpmemctx); + boi->boi_oe.oe_key = mdb; + LDAP_SLIST_INSERT_HEAD( &op->o_extra, &boi->boi_oe, oe_next ); + } + if ( !boi->boi_txn ) { + struct mdb_lock_info *bli; + bli = op->o_tmpalloc( sizeof(struct mdb_lock_info), + op->o_tmpmemctx ); + bli->bli_next = boi->boi_locks; + bli->bli_id = e->e_id; + bli->bli_flag = 0; + bli->bli_lock = lock; + boi->boi_locks = bli; + } + } + } else { + *ent = entry_dup( e ); + mdb_cache_return_entry_rw(mdb, e, rw, &lock); + } + } + + Debug( LDAP_DEBUG_TRACE, + "mdb_entry_get: rc=%d\n", + rc, 0, 0 ); + return(rc); +} diff --git a/servers/slapd/back-mdb/idl.c b/servers/slapd/back-mdb/idl.c new file mode 100644 index 0000000000..c111630a34 --- /dev/null +++ b/servers/slapd/back-mdb/idl.c @@ -0,0 +1,1575 @@ +/* idl.c - ldap id list handling routines */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "back-mdb.h" +#include "idl.h" + +#define IDL_MAX(x,y) ( x > y ? x : y ) +#define IDL_MIN(x,y) ( x < y ? x : y ) + +#define IDL_CMP(x,y) ( x < y ? -1 : ( x > y ? 1 : 0 ) ) + +#define IDL_LRU_DELETE( mdb, e ) do { \ + if ( (e) == (mdb)->bi_idl_lru_head ) { \ + if ( (e)->idl_lru_next == (mdb)->bi_idl_lru_head ) { \ + (mdb)->bi_idl_lru_head = NULL; \ + } else { \ + (mdb)->bi_idl_lru_head = (e)->idl_lru_next; \ + } \ + } \ + if ( (e) == (mdb)->bi_idl_lru_tail ) { \ + if ( (e)->idl_lru_prev == (mdb)->bi_idl_lru_tail ) { \ + assert( (mdb)->bi_idl_lru_head == NULL ); \ + (mdb)->bi_idl_lru_tail = NULL; \ + } else { \ + (mdb)->bi_idl_lru_tail = (e)->idl_lru_prev; \ + } \ + } \ + (e)->idl_lru_next->idl_lru_prev = (e)->idl_lru_prev; \ + (e)->idl_lru_prev->idl_lru_next = (e)->idl_lru_next; \ +} while ( 0 ) + +static int +mdb_idl_entry_cmp( const void *v_idl1, const void *v_idl2 ) +{ + const mdb_idl_cache_entry_t *idl1 = v_idl1, *idl2 = v_idl2; + int rc; + + if ((rc = SLAP_PTRCMP( idl1->db, idl2->db ))) return rc; + if ((rc = idl1->kstr.bv_len - idl2->kstr.bv_len )) return rc; + return ( memcmp ( idl1->kstr.bv_val, idl2->kstr.bv_val , idl1->kstr.bv_len ) ); +} + +#if IDL_DEBUG > 0 +static void idl_check( ID *ids ) +{ + if( MDB_IDL_IS_RANGE( ids ) ) { + assert( MDB_IDL_RANGE_FIRST(ids) <= MDB_IDL_RANGE_LAST(ids) ); + } else { + ID i; + for( i=1; i < ids[0]; i++ ) { + assert( ids[i+1] > ids[i] ); + } + } +} + +#if IDL_DEBUG > 1 +static void idl_dump( ID *ids ) +{ + if( MDB_IDL_IS_RANGE( ids ) ) { + Debug( LDAP_DEBUG_ANY, + "IDL: range ( %ld - %ld )\n", + (long) MDB_IDL_RANGE_FIRST( ids ), + (long) MDB_IDL_RANGE_LAST( ids ) ); + + } else { + ID i; + Debug( LDAP_DEBUG_ANY, "IDL: size %ld", (long) ids[0], 0, 0 ); + + for( i=1; i<=ids[0]; i++ ) { + if( i % 16 == 1 ) { + Debug( LDAP_DEBUG_ANY, "\n", 0, 0, 0 ); + } + Debug( LDAP_DEBUG_ANY, " %02lx", (long) ids[i], 0, 0 ); + } + + Debug( LDAP_DEBUG_ANY, "\n", 0, 0, 0 ); + } + + idl_check( ids ); +} +#endif /* IDL_DEBUG > 1 */ +#endif /* IDL_DEBUG > 0 */ + +unsigned mdb_idl_search( ID *ids, ID id ) +{ +#define IDL_BINARY_SEARCH 1 +#ifdef IDL_BINARY_SEARCH + /* + * binary search of id in ids + * if found, returns position of id + * if not found, returns first postion greater than id + */ + unsigned base = 0; + unsigned cursor = 0; + int val = 0; + unsigned n = ids[0]; + +#if IDL_DEBUG > 0 + idl_check( ids ); +#endif + + while( 0 < n ) { + int pivot = n >> 1; + cursor = base + pivot; + val = IDL_CMP( id, ids[cursor + 1] ); + + if( val < 0 ) { + n = pivot; + + } else if ( val > 0 ) { + base = cursor + 1; + n -= pivot + 1; + + } else { + return cursor + 1; + } + } + + if( val > 0 ) { + return cursor + 2; + } else { + return cursor + 1; + } + +#else + /* (reverse) linear search */ + int i; + +#if IDL_DEBUG > 0 + idl_check( ids ); +#endif + + for( i=ids[0]; i; i-- ) { + if( id > ids[i] ) { + break; + } + } + + return i+1; +#endif +} + +int mdb_idl_insert( ID *ids, ID id ) +{ + unsigned x; + +#if IDL_DEBUG > 1 + Debug( LDAP_DEBUG_ANY, "insert: %04lx at %d\n", (long) id, x, 0 ); + idl_dump( ids ); +#elif IDL_DEBUG > 0 + idl_check( ids ); +#endif + + if (MDB_IDL_IS_RANGE( ids )) { + /* if already in range, treat as a dup */ + if (id >= MDB_IDL_FIRST(ids) && id <= MDB_IDL_LAST(ids)) + return -1; + if (id < MDB_IDL_FIRST(ids)) + ids[1] = id; + else if (id > MDB_IDL_LAST(ids)) + ids[2] = id; + return 0; + } + + x = mdb_idl_search( ids, id ); + assert( x > 0 ); + + if( x < 1 ) { + /* internal error */ + return -2; + } + + if ( x <= ids[0] && ids[x] == id ) { + /* duplicate */ + return -1; + } + + if ( ++ids[0] >= MDB_IDL_DB_MAX ) { + if( id < ids[1] ) { + ids[1] = id; + ids[2] = ids[ids[0]-1]; + } else if ( ids[ids[0]-1] < id ) { + ids[2] = id; + } else { + ids[2] = ids[ids[0]-1]; + } + ids[0] = NOID; + + } else { + /* insert id */ + AC_MEMCPY( &ids[x+1], &ids[x], (ids[0]-x) * sizeof(ID) ); + ids[x] = id; + } + +#if IDL_DEBUG > 1 + idl_dump( ids ); +#elif IDL_DEBUG > 0 + idl_check( ids ); +#endif + + return 0; +} + +static int mdb_idl_delete( ID *ids, ID id ) +{ + unsigned x; + +#if IDL_DEBUG > 1 + Debug( LDAP_DEBUG_ANY, "delete: %04lx at %d\n", (long) id, x, 0 ); + idl_dump( ids ); +#elif IDL_DEBUG > 0 + idl_check( ids ); +#endif + + if (MDB_IDL_IS_RANGE( ids )) { + /* If deleting a range boundary, adjust */ + if ( ids[1] == id ) + ids[1]++; + else if ( ids[2] == id ) + ids[2]--; + /* deleting from inside a range is a no-op */ + + /* If the range has collapsed, re-adjust */ + if ( ids[1] > ids[2] ) + ids[0] = 0; + else if ( ids[1] == ids[2] ) + ids[1] = 1; + return 0; + } + + x = mdb_idl_search( ids, id ); + assert( x > 0 ); + + if( x <= 0 ) { + /* internal error */ + return -2; + } + + if( x > ids[0] || ids[x] != id ) { + /* not found */ + return -1; + + } else if ( --ids[0] == 0 ) { + if( x != 1 ) { + return -3; + } + + } else { + AC_MEMCPY( &ids[x], &ids[x+1], (1+ids[0]-x) * sizeof(ID) ); + } + +#if IDL_DEBUG > 1 + idl_dump( ids ); +#elif IDL_DEBUG > 0 + idl_check( ids ); +#endif + + return 0; +} + +static char * +mdb_show_key( + DBT *key, + char *buf ) +{ + if ( key->size == 4 /* LUTIL_HASH_BYTES */ ) { + unsigned char *c = key->data; + sprintf( buf, "[%02x%02x%02x%02x]", c[0], c[1], c[2], c[3] ); + return buf; + } else { + return key->data; + } +} + +/* Find a db/key pair in the IDL cache. If ids is non-NULL, + * copy the cached IDL into it, otherwise just return the status. + */ +int +mdb_idl_cache_get( + struct mdb_info *mdb, + DB *db, + DBT *key, + ID *ids ) +{ + mdb_idl_cache_entry_t idl_tmp; + mdb_idl_cache_entry_t *matched_idl_entry; + int rc = LDAP_NO_SUCH_OBJECT; + + DBT2bv( key, &idl_tmp.kstr ); + idl_tmp.db = db; + ldap_pvt_thread_rdwr_rlock( &mdb->bi_idl_tree_rwlock ); + matched_idl_entry = avl_find( mdb->bi_idl_tree, &idl_tmp, + mdb_idl_entry_cmp ); + if ( matched_idl_entry != NULL ) { + if ( matched_idl_entry->idl && ids ) + MDB_IDL_CPY( ids, matched_idl_entry->idl ); + matched_idl_entry->idl_flags |= CACHE_ENTRY_REFERENCED; + if ( matched_idl_entry->idl ) + rc = LDAP_SUCCESS; + else + rc = DB_NOTFOUND; + } + ldap_pvt_thread_rdwr_runlock( &mdb->bi_idl_tree_rwlock ); + + return rc; +} + +void +mdb_idl_cache_put( + struct mdb_info *mdb, + DB *db, + DBT *key, + ID *ids, + int rc ) +{ + mdb_idl_cache_entry_t idl_tmp; + mdb_idl_cache_entry_t *ee, *eprev; + + if ( rc == DB_NOTFOUND || MDB_IDL_IS_ZERO( ids )) + return; + + DBT2bv( key, &idl_tmp.kstr ); + + ee = (mdb_idl_cache_entry_t *) ch_malloc( + sizeof( mdb_idl_cache_entry_t ) ); + ee->db = db; + ee->idl = (ID*) ch_malloc( MDB_IDL_SIZEOF ( ids ) ); + MDB_IDL_CPY( ee->idl, ids ); + + ee->idl_lru_prev = NULL; + ee->idl_lru_next = NULL; + ee->idl_flags = 0; + ber_dupbv( &ee->kstr, &idl_tmp.kstr ); + ldap_pvt_thread_rdwr_wlock( &mdb->bi_idl_tree_rwlock ); + if ( avl_insert( &mdb->bi_idl_tree, (caddr_t) ee, + mdb_idl_entry_cmp, avl_dup_error )) + { + ch_free( ee->kstr.bv_val ); + ch_free( ee->idl ); + ch_free( ee ); + ldap_pvt_thread_rdwr_wunlock( &mdb->bi_idl_tree_rwlock ); + return; + } + ldap_pvt_thread_mutex_lock( &mdb->bi_idl_tree_lrulock ); + /* LRU_ADD */ + if ( mdb->bi_idl_lru_head ) { + assert( mdb->bi_idl_lru_tail != NULL ); + assert( mdb->bi_idl_lru_head->idl_lru_prev != NULL ); + assert( mdb->bi_idl_lru_head->idl_lru_next != NULL ); + + ee->idl_lru_next = mdb->bi_idl_lru_head; + ee->idl_lru_prev = mdb->bi_idl_lru_head->idl_lru_prev; + mdb->bi_idl_lru_head->idl_lru_prev->idl_lru_next = ee; + mdb->bi_idl_lru_head->idl_lru_prev = ee; + } else { + ee->idl_lru_next = ee->idl_lru_prev = ee; + mdb->bi_idl_lru_tail = ee; + } + mdb->bi_idl_lru_head = ee; + + if ( mdb->bi_idl_cache_size >= mdb->bi_idl_cache_max_size ) { + int i; + eprev = mdb->bi_idl_lru_tail; + for ( i = 0; (ee = eprev) != NULL && i < 10; i++ ) { + eprev = ee->idl_lru_prev; + if ( eprev == ee ) { + eprev = NULL; + } + if ( ee->idl_flags & CACHE_ENTRY_REFERENCED ) { + ee->idl_flags ^= CACHE_ENTRY_REFERENCED; + continue; + } + if ( avl_delete( &mdb->bi_idl_tree, (caddr_t) ee, + mdb_idl_entry_cmp ) == NULL ) { + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_cache_put: " + "AVL delete failed\n", + 0, 0, 0 ); + } + IDL_LRU_DELETE( mdb, ee ); + i++; + --mdb->bi_idl_cache_size; + ch_free( ee->kstr.bv_val ); + ch_free( ee->idl ); + ch_free( ee ); + } + mdb->bi_idl_lru_tail = eprev; + assert( mdb->bi_idl_lru_tail != NULL + || mdb->bi_idl_lru_head == NULL ); + } + mdb->bi_idl_cache_size++; + ldap_pvt_thread_mutex_unlock( &mdb->bi_idl_tree_lrulock ); + ldap_pvt_thread_rdwr_wunlock( &mdb->bi_idl_tree_rwlock ); +} + +void +mdb_idl_cache_del( + struct mdb_info *mdb, + DB *db, + DBT *key ) +{ + mdb_idl_cache_entry_t *matched_idl_entry, idl_tmp; + DBT2bv( key, &idl_tmp.kstr ); + idl_tmp.db = db; + ldap_pvt_thread_rdwr_wlock( &mdb->bi_idl_tree_rwlock ); + matched_idl_entry = avl_find( mdb->bi_idl_tree, &idl_tmp, + mdb_idl_entry_cmp ); + if ( matched_idl_entry != NULL ) { + if ( avl_delete( &mdb->bi_idl_tree, (caddr_t) matched_idl_entry, + mdb_idl_entry_cmp ) == NULL ) { + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_cache_del: " + "AVL delete failed\n", + 0, 0, 0 ); + } + --mdb->bi_idl_cache_size; + ldap_pvt_thread_mutex_lock( &mdb->bi_idl_tree_lrulock ); + IDL_LRU_DELETE( mdb, matched_idl_entry ); + ldap_pvt_thread_mutex_unlock( &mdb->bi_idl_tree_lrulock ); + free( matched_idl_entry->kstr.bv_val ); + if ( matched_idl_entry->idl ) + free( matched_idl_entry->idl ); + free( matched_idl_entry ); + } + ldap_pvt_thread_rdwr_wunlock( &mdb->bi_idl_tree_rwlock ); +} + +void +mdb_idl_cache_add_id( + struct mdb_info *mdb, + DB *db, + DBT *key, + ID id ) +{ + mdb_idl_cache_entry_t *cache_entry, idl_tmp; + DBT2bv( key, &idl_tmp.kstr ); + idl_tmp.db = db; + ldap_pvt_thread_rdwr_wlock( &mdb->bi_idl_tree_rwlock ); + cache_entry = avl_find( mdb->bi_idl_tree, &idl_tmp, + mdb_idl_entry_cmp ); + if ( cache_entry != NULL ) { + if ( !MDB_IDL_IS_RANGE( cache_entry->idl ) && + cache_entry->idl[0] < MDB_IDL_DB_MAX ) { + size_t s = MDB_IDL_SIZEOF( cache_entry->idl ) + sizeof(ID); + cache_entry->idl = ch_realloc( cache_entry->idl, s ); + } + mdb_idl_insert( cache_entry->idl, id ); + } + ldap_pvt_thread_rdwr_wunlock( &mdb->bi_idl_tree_rwlock ); +} + +void +mdb_idl_cache_del_id( + struct mdb_info *mdb, + DB *db, + DBT *key, + ID id ) +{ + mdb_idl_cache_entry_t *cache_entry, idl_tmp; + DBT2bv( key, &idl_tmp.kstr ); + idl_tmp.db = db; + ldap_pvt_thread_rdwr_wlock( &mdb->bi_idl_tree_rwlock ); + cache_entry = avl_find( mdb->bi_idl_tree, &idl_tmp, + mdb_idl_entry_cmp ); + if ( cache_entry != NULL ) { + mdb_idl_delete( cache_entry->idl, id ); + if ( cache_entry->idl[0] == 0 ) { + if ( avl_delete( &mdb->bi_idl_tree, (caddr_t) cache_entry, + mdb_idl_entry_cmp ) == NULL ) { + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_cache_del: " + "AVL delete failed\n", + 0, 0, 0 ); + } + --mdb->bi_idl_cache_size; + ldap_pvt_thread_mutex_lock( &mdb->bi_idl_tree_lrulock ); + IDL_LRU_DELETE( mdb, cache_entry ); + ldap_pvt_thread_mutex_unlock( &mdb->bi_idl_tree_lrulock ); + free( cache_entry->kstr.bv_val ); + free( cache_entry->idl ); + free( cache_entry ); + } + } + ldap_pvt_thread_rdwr_wunlock( &mdb->bi_idl_tree_rwlock ); +} + +int +mdb_idl_fetch_key( + BackendDB *be, + DB *db, + DB_TXN *txn, + DBT *key, + ID *ids, + DBC **saved_cursor, + int get_flag ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + int rc; + DBT data, key2, *kptr; + DBC *cursor; + ID *i; + void *ptr; + size_t len; + int rc2; + int flags = mdb->bi_db_opflags | DB_MULTIPLE; + int opflag; + + /* If using BerkeleyDB 4.0, the buf must be large enough to + * grab the entire IDL in one get(), otherwise MDB will leak + * resources on subsequent get's. We can safely call get() + * twice - once for the data, and once to get the DB_NOTFOUND + * result meaning there's no more data. See ITS#2040 for details. + * This bug is fixed in MDB 4.1 so a smaller buffer will work if + * stack space is too limited. + * + * configure now requires Berkeley DB 4.1. + */ +#if DB_VERSION_FULL < 0x04010000 +# define MDB_ENOUGH 5 +#else + /* We sometimes test with tiny IDLs, and MDB always wants buffers + * that are at least one page in size. + */ +# if MDB_IDL_DB_SIZE < 4096 +# define MDB_ENOUGH 2048 +# else +# define MDB_ENOUGH 1 +# endif +#endif + ID buf[MDB_IDL_DB_SIZE*MDB_ENOUGH]; + + char keybuf[16]; + + Debug( LDAP_DEBUG_ARGS, + "mdb_idl_fetch_key: %s\n", + mdb_show_key( key, keybuf ), 0, 0 ); + + assert( ids != NULL ); + + if ( saved_cursor && *saved_cursor ) { + opflag = DB_NEXT; + } else if ( get_flag == LDAP_FILTER_GE ) { + opflag = DB_SET_RANGE; + } else if ( get_flag == LDAP_FILTER_LE ) { + opflag = DB_FIRST; + } else { + opflag = DB_SET; + } + + /* only non-range lookups can use the IDL cache */ + if ( mdb->bi_idl_cache_size && opflag == DB_SET ) { + rc = mdb_idl_cache_get( mdb, db, key, ids ); + if ( rc != LDAP_NO_SUCH_OBJECT ) return rc; + } + + DBTzero( &data ); + + data.data = buf; + data.ulen = sizeof(buf); + data.flags = DB_DBT_USERMEM; + + /* If we're not reusing an existing cursor, get a new one */ + if( opflag != DB_NEXT ) { + rc = db->cursor( db, txn, &cursor, mdb->bi_db_opflags ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_fetch_key: " + "cursor failed: %s (%d)\n", db_strerror(rc), rc, 0 ); + return rc; + } + } else { + cursor = *saved_cursor; + } + + /* If this is a LE lookup, save original key so we can determine + * when to stop. If this is a GE lookup, save the key since it + * will be overwritten. + */ + if ( get_flag == LDAP_FILTER_LE || get_flag == LDAP_FILTER_GE ) { + DBTzero( &key2 ); + key2.flags = DB_DBT_USERMEM; + key2.ulen = sizeof(keybuf); + key2.data = keybuf; + key2.size = key->size; + AC_MEMCPY( keybuf, key->data, key->size ); + kptr = &key2; + } else { + kptr = key; + } + len = key->size; + rc = cursor->c_get( cursor, kptr, &data, flags | opflag ); + + /* skip presence key on range inequality lookups */ + while (rc == 0 && kptr->size != len) { + rc = cursor->c_get( cursor, kptr, &data, flags | DB_NEXT_NODUP ); + } + /* If we're doing a LE compare and the new key is greater than + * our search key, we're done + */ + if (rc == 0 && get_flag == LDAP_FILTER_LE && memcmp( kptr->data, + key->data, key->size ) > 0 ) { + rc = DB_NOTFOUND; + } + if (rc == 0) { + i = ids; + while (rc == 0) { + u_int8_t *j; + + DB_MULTIPLE_INIT( ptr, &data ); + while (ptr) { + DB_MULTIPLE_NEXT(ptr, &data, j, len); + if (j) { + ++i; + MDB_DISK2ID( j, i ); + } + } + rc = cursor->c_get( cursor, key, &data, flags | DB_NEXT_DUP ); + } + if ( rc == DB_NOTFOUND ) rc = 0; + ids[0] = i - ids; + /* On disk, a range is denoted by 0 in the first element */ + if (ids[1] == 0) { + if (ids[0] != MDB_IDL_RANGE_SIZE) { + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_fetch_key: " + "range size mismatch: expected %d, got %ld\n", + MDB_IDL_RANGE_SIZE, ids[0], 0 ); + cursor->c_close( cursor ); + return -1; + } + MDB_IDL_RANGE( ids, ids[2], ids[3] ); + } + data.size = MDB_IDL_SIZEOF(ids); + } + + if ( saved_cursor && rc == 0 ) { + if ( !*saved_cursor ) + *saved_cursor = cursor; + rc2 = 0; + } + else + rc2 = cursor->c_close( cursor ); + if (rc2) { + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_fetch_key: " + "close failed: %s (%d)\n", db_strerror(rc2), rc2, 0 ); + return rc2; + } + + if( rc == DB_NOTFOUND ) { + return rc; + + } else if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_fetch_key: " + "get failed: %s (%d)\n", + db_strerror(rc), rc, 0 ); + return rc; + + } else if ( data.size == 0 || data.size % sizeof( ID ) ) { + /* size not multiple of ID size */ + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_fetch_key: " + "odd size: expected %ld multiple, got %ld\n", + (long) sizeof( ID ), (long) data.size, 0 ); + return -1; + + } else if ( data.size != MDB_IDL_SIZEOF(ids) ) { + /* size mismatch */ + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_fetch_key: " + "get size mismatch: expected %ld, got %ld\n", + (long) ((1 + ids[0]) * sizeof( ID )), (long) data.size, 0 ); + return -1; + } + + if ( mdb->bi_idl_cache_max_size ) { + mdb_idl_cache_put( mdb, db, key, ids, rc ); + } + + return rc; +} + + +int +mdb_idl_insert_key( + BackendDB *be, + DB *db, + DB_TXN *tid, + DBT *key, + ID id ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + int rc; + DBT data; + DBC *cursor; + ID lo, hi, nlo, nhi, nid; + char *err; + + { + char buf[16]; + Debug( LDAP_DEBUG_ARGS, + "mdb_idl_insert_key: %lx %s\n", + (long) id, mdb_show_key( key, buf ), 0 ); + } + + assert( id != NOID ); + + DBTzero( &data ); + data.size = sizeof( ID ); + data.ulen = data.size; + data.flags = DB_DBT_USERMEM; + + MDB_ID2DISK( id, &nid ); + + rc = db->cursor( db, tid, &cursor, mdb->bi_db_opflags ); + if ( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_insert_key: " + "cursor failed: %s (%d)\n", db_strerror(rc), rc, 0 ); + return rc; + } + data.data = &nlo; + /* Fetch the first data item for this key, to see if it + * exists and if it's a range. + */ + rc = cursor->c_get( cursor, key, &data, DB_SET ); + err = "c_get"; + if ( rc == 0 ) { + if ( nlo != 0 ) { + /* not a range, count the number of items */ + db_recno_t count; + rc = cursor->c_count( cursor, &count, 0 ); + if ( rc != 0 ) { + err = "c_count"; + goto fail; + } + if ( count >= MDB_IDL_DB_MAX ) { + /* No room, convert to a range */ + DBT key2 = *key; + db_recno_t i; + + key2.dlen = key2.ulen; + key2.flags |= DB_DBT_PARTIAL; + + MDB_DISK2ID( &nlo, &lo ); + data.data = &nhi; + + rc = cursor->c_get( cursor, &key2, &data, DB_NEXT_NODUP ); + if ( rc != 0 && rc != DB_NOTFOUND ) { + err = "c_get next_nodup"; + goto fail; + } + if ( rc == DB_NOTFOUND ) { + rc = cursor->c_get( cursor, key, &data, DB_LAST ); + if ( rc != 0 ) { + err = "c_get last"; + goto fail; + } + } else { + rc = cursor->c_get( cursor, key, &data, DB_PREV ); + if ( rc != 0 ) { + err = "c_get prev"; + goto fail; + } + } + MDB_DISK2ID( &nhi, &hi ); + /* Update hi/lo if needed, then delete all the items + * between lo and hi + */ + if ( id < lo ) { + lo = id; + nlo = nid; + } else if ( id > hi ) { + hi = id; + nhi = nid; + } + data.data = &nid; + /* Don't fetch anything, just position cursor */ + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + data.dlen = data.ulen = 0; + rc = cursor->c_get( cursor, key, &data, DB_SET ); + if ( rc != 0 ) { + err = "c_get 2"; + goto fail; + } + rc = cursor->c_del( cursor, 0 ); + if ( rc != 0 ) { + err = "c_del range1"; + goto fail; + } + /* Delete all the records */ + for ( i=1; ic_get( cursor, &key2, &data, DB_NEXT_DUP ); + if ( rc != 0 ) { + err = "c_get next_dup"; + goto fail; + } + rc = cursor->c_del( cursor, 0 ); + if ( rc != 0 ) { + err = "c_del range"; + goto fail; + } + } + /* Store the range marker */ + data.size = data.ulen = sizeof(ID); + data.flags = DB_DBT_USERMEM; + nid = 0; + rc = cursor->c_put( cursor, key, &data, DB_KEYFIRST ); + if ( rc != 0 ) { + err = "c_put range"; + goto fail; + } + nid = nlo; + rc = cursor->c_put( cursor, key, &data, DB_KEYLAST ); + if ( rc != 0 ) { + err = "c_put lo"; + goto fail; + } + nid = nhi; + rc = cursor->c_put( cursor, key, &data, DB_KEYLAST ); + if ( rc != 0 ) { + err = "c_put hi"; + goto fail; + } + } else { + /* There's room, just store it */ + goto put1; + } + } else { + /* It's a range, see if we need to rewrite + * the boundaries + */ + hi = id; + data.data = &nlo; + rc = cursor->c_get( cursor, key, &data, DB_NEXT_DUP ); + if ( rc != 0 ) { + err = "c_get lo"; + goto fail; + } + MDB_DISK2ID( &nlo, &lo ); + if ( id > lo ) { + data.data = &nhi; + rc = cursor->c_get( cursor, key, &data, DB_NEXT_DUP ); + if ( rc != 0 ) { + err = "c_get hi"; + goto fail; + } + MDB_DISK2ID( &nhi, &hi ); + } + if ( id < lo || id > hi ) { + /* Delete the current lo/hi */ + rc = cursor->c_del( cursor, 0 ); + if ( rc != 0 ) { + err = "c_del"; + goto fail; + } + data.data = &nid; + rc = cursor->c_put( cursor, key, &data, DB_KEYFIRST ); + if ( rc != 0 ) { + err = "c_put lo/hi"; + goto fail; + } + } + } + } else if ( rc == DB_NOTFOUND ) { +put1: data.data = &nid; + rc = cursor->c_put( cursor, key, &data, DB_NODUPDATA ); + /* Don't worry if it's already there */ + if ( rc != 0 && rc != DB_KEYEXIST ) { + err = "c_put id"; + goto fail; + } + } else { + /* initial c_get failed, nothing was done */ +fail: + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_insert_key: " + "%s failed: %s (%d)\n", err, db_strerror(rc), rc ); + cursor->c_close( cursor ); + return rc; + } + /* If key was added (didn't already exist) and using IDL cache, + * update key in IDL cache. + */ + if ( !rc && mdb->bi_idl_cache_max_size ) { + mdb_idl_cache_add_id( mdb, db, key, id ); + } + rc = cursor->c_close( cursor ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_insert_key: " + "c_close failed: %s (%d)\n", + db_strerror(rc), rc, 0 ); + } + return rc; +} + +int +mdb_idl_delete_key( + BackendDB *be, + DB *db, + DB_TXN *tid, + DBT *key, + ID id ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + int rc; + DBT data; + DBC *cursor; + ID lo, hi, tmp, nid, nlo, nhi; + char *err; + + { + char buf[16]; + Debug( LDAP_DEBUG_ARGS, + "mdb_idl_delete_key: %lx %s\n", + (long) id, mdb_show_key( key, buf ), 0 ); + } + assert( id != NOID ); + + if ( mdb->bi_idl_cache_size ) { + mdb_idl_cache_del( mdb, db, key ); + } + + MDB_ID2DISK( id, &nid ); + + DBTzero( &data ); + data.data = &tmp; + data.size = sizeof( id ); + data.ulen = data.size; + data.flags = DB_DBT_USERMEM; + + rc = db->cursor( db, tid, &cursor, mdb->bi_db_opflags ); + if ( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_delete_key: " + "cursor failed: %s (%d)\n", db_strerror(rc), rc, 0 ); + return rc; + } + /* Fetch the first data item for this key, to see if it + * exists and if it's a range. + */ + rc = cursor->c_get( cursor, key, &data, DB_SET ); + err = "c_get"; + if ( rc == 0 ) { + if ( tmp != 0 ) { + /* Not a range, just delete it */ + if (tmp != nid) { + /* position to correct item */ + tmp = nid; + rc = cursor->c_get( cursor, key, &data, DB_GET_BOTH ); + if ( rc != 0 ) { + err = "c_get id"; + goto fail; + } + } + rc = cursor->c_del( cursor, 0 ); + if ( rc != 0 ) { + err = "c_del id"; + goto fail; + } + } else { + /* It's a range, see if we need to rewrite + * the boundaries + */ + data.data = &nlo; + rc = cursor->c_get( cursor, key, &data, DB_NEXT_DUP ); + if ( rc != 0 ) { + err = "c_get lo"; + goto fail; + } + MDB_DISK2ID( &nlo, &lo ); + data.data = &nhi; + rc = cursor->c_get( cursor, key, &data, DB_NEXT_DUP ); + if ( rc != 0 ) { + err = "c_get hi"; + goto fail; + } + MDB_DISK2ID( &nhi, &hi ); + if ( id == lo || id == hi ) { + if ( id == lo ) { + id++; + lo = id; + } else if ( id == hi ) { + id--; + hi = id; + } + if ( lo >= hi ) { + /* The range has collapsed... */ + rc = db->del( db, tid, key, 0 ); + if ( rc != 0 ) { + err = "del"; + goto fail; + } + } else { + if ( id == lo ) { + /* reposition on lo slot */ + data.data = &nlo; + cursor->c_get( cursor, key, &data, DB_PREV ); + } + rc = cursor->c_del( cursor, 0 ); + if ( rc != 0 ) { + err = "c_del"; + goto fail; + } + } + if ( lo <= hi ) { + MDB_ID2DISK( id, &nid ); + data.data = &nid; + rc = cursor->c_put( cursor, key, &data, DB_KEYFIRST ); + if ( rc != 0 ) { + err = "c_put lo/hi"; + goto fail; + } + } + } + } + } else { + /* initial c_get failed, nothing was done */ +fail: + if ( rc != DB_NOTFOUND ) { + Debug( LDAP_DEBUG_ANY, "=> mdb_idl_delete_key: " + "%s failed: %s (%d)\n", err, db_strerror(rc), rc ); + } + cursor->c_close( cursor ); + return rc; + } + rc = cursor->c_close( cursor ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, + "=> mdb_idl_delete_key: c_close failed: %s (%d)\n", + db_strerror(rc), rc, 0 ); + } + + return rc; +} + + +/* + * idl_intersection - return a = a intersection b + */ +int +mdb_idl_intersection( + ID *a, + ID *b ) +{ + ID ida, idb; + ID idmax, idmin; + ID cursora = 0, cursorb = 0, cursorc; + int swap = 0; + + if ( MDB_IDL_IS_ZERO( a ) || MDB_IDL_IS_ZERO( b ) ) { + a[0] = 0; + return 0; + } + + idmin = IDL_MAX( MDB_IDL_FIRST(a), MDB_IDL_FIRST(b) ); + idmax = IDL_MIN( MDB_IDL_LAST(a), MDB_IDL_LAST(b) ); + if ( idmin > idmax ) { + a[0] = 0; + return 0; + } else if ( idmin == idmax ) { + a[0] = 1; + a[1] = idmin; + return 0; + } + + if ( MDB_IDL_IS_RANGE( a ) ) { + if ( MDB_IDL_IS_RANGE(b) ) { + /* If both are ranges, just shrink the boundaries */ + a[1] = idmin; + a[2] = idmax; + return 0; + } else { + /* Else swap so that b is the range, a is a list */ + ID *tmp = a; + a = b; + b = tmp; + swap = 1; + } + } + + /* If a range completely covers the list, the result is + * just the list. If idmin to idmax is contiguous, just + * turn it into a range. + */ + if ( MDB_IDL_IS_RANGE( b ) + && MDB_IDL_FIRST( b ) <= MDB_IDL_FIRST( a ) + && MDB_IDL_LAST( b ) >= MDB_IDL_LAST( a ) ) { + if (idmax - idmin + 1 == a[0]) + { + a[0] = NOID; + a[1] = idmin; + a[2] = idmax; + } + goto done; + } + + /* Fine, do the intersection one element at a time. + * First advance to idmin in both IDLs. + */ + cursora = cursorb = idmin; + ida = mdb_idl_first( a, &cursora ); + idb = mdb_idl_first( b, &cursorb ); + cursorc = 0; + + while( ida <= idmax || idb <= idmax ) { + if( ida == idb ) { + a[++cursorc] = ida; + ida = mdb_idl_next( a, &cursora ); + idb = mdb_idl_next( b, &cursorb ); + } else if ( ida < idb ) { + ida = mdb_idl_next( a, &cursora ); + } else { + idb = mdb_idl_next( b, &cursorb ); + } + } + a[0] = cursorc; +done: + if (swap) + MDB_IDL_CPY( b, a ); + + return 0; +} + + +/* + * idl_union - return a = a union b + */ +int +mdb_idl_union( + ID *a, + ID *b ) +{ + ID ida, idb; + ID cursora = 0, cursorb = 0, cursorc; + + if ( MDB_IDL_IS_ZERO( b ) ) { + return 0; + } + + if ( MDB_IDL_IS_ZERO( a ) ) { + MDB_IDL_CPY( a, b ); + return 0; + } + + if ( MDB_IDL_IS_RANGE( a ) || MDB_IDL_IS_RANGE(b) ) { +over: ida = IDL_MIN( MDB_IDL_FIRST(a), MDB_IDL_FIRST(b) ); + idb = IDL_MAX( MDB_IDL_LAST(a), MDB_IDL_LAST(b) ); + a[0] = NOID; + a[1] = ida; + a[2] = idb; + return 0; + } + + ida = mdb_idl_first( a, &cursora ); + idb = mdb_idl_first( b, &cursorb ); + + cursorc = b[0]; + + /* The distinct elements of a are cat'd to b */ + while( ida != NOID || idb != NOID ) { + if ( ida < idb ) { + if( ++cursorc > MDB_IDL_UM_MAX ) { + goto over; + } + b[cursorc] = ida; + ida = mdb_idl_next( a, &cursora ); + + } else { + if ( ida == idb ) + ida = mdb_idl_next( a, &cursora ); + idb = mdb_idl_next( b, &cursorb ); + } + } + + /* b is copied back to a in sorted order */ + a[0] = cursorc; + cursora = 1; + cursorb = 1; + cursorc = b[0]+1; + while (cursorb <= b[0] || cursorc <= a[0]) { + if (cursorc > a[0]) + idb = NOID; + else + idb = b[cursorc]; + if (cursorb <= b[0] && b[cursorb] < idb) + a[cursora++] = b[cursorb++]; + else { + a[cursora++] = idb; + cursorc++; + } + } + + return 0; +} + + +#if 0 +/* + * mdb_idl_notin - return a intersection ~b (or a minus b) + */ +int +mdb_idl_notin( + ID *a, + ID *b, + ID *ids ) +{ + ID ida, idb; + ID cursora = 0, cursorb = 0; + + if( MDB_IDL_IS_ZERO( a ) || + MDB_IDL_IS_ZERO( b ) || + MDB_IDL_IS_RANGE( b ) ) + { + MDB_IDL_CPY( ids, a ); + return 0; + } + + if( MDB_IDL_IS_RANGE( a ) ) { + MDB_IDL_CPY( ids, a ); + return 0; + } + + ida = mdb_idl_first( a, &cursora ), + idb = mdb_idl_first( b, &cursorb ); + + ids[0] = 0; + + while( ida != NOID ) { + if ( idb == NOID ) { + /* we could shortcut this */ + ids[++ids[0]] = ida; + ida = mdb_idl_next( a, &cursora ); + + } else if ( ida < idb ) { + ids[++ids[0]] = ida; + ida = mdb_idl_next( a, &cursora ); + + } else if ( ida > idb ) { + idb = mdb_idl_next( b, &cursorb ); + + } else { + ida = mdb_idl_next( a, &cursora ); + idb = mdb_idl_next( b, &cursorb ); + } + } + + return 0; +} +#endif + +ID mdb_idl_first( ID *ids, ID *cursor ) +{ + ID pos; + + if ( ids[0] == 0 ) { + *cursor = NOID; + return NOID; + } + + if ( MDB_IDL_IS_RANGE( ids ) ) { + if( *cursor < ids[1] ) { + *cursor = ids[1]; + } + return *cursor; + } + + if ( *cursor == 0 ) + pos = 1; + else + pos = mdb_idl_search( ids, *cursor ); + + if( pos > ids[0] ) { + return NOID; + } + + *cursor = pos; + return ids[pos]; +} + +ID mdb_idl_next( ID *ids, ID *cursor ) +{ + if ( MDB_IDL_IS_RANGE( ids ) ) { + if( ids[2] < ++(*cursor) ) { + return NOID; + } + return *cursor; + } + + if ( ++(*cursor) <= ids[0] ) { + return ids[*cursor]; + } + + return NOID; +} + +#ifdef MDB_HIER + +/* Add one ID to an unsorted list. We ensure that the first element is the + * minimum and the last element is the maximum, for fast range compaction. + * this means IDLs up to length 3 are always sorted... + */ +int mdb_idl_append_one( ID *ids, ID id ) +{ + if (MDB_IDL_IS_RANGE( ids )) { + /* if already in range, treat as a dup */ + if (id >= MDB_IDL_FIRST(ids) && id <= MDB_IDL_LAST(ids)) + return -1; + if (id < MDB_IDL_FIRST(ids)) + ids[1] = id; + else if (id > MDB_IDL_LAST(ids)) + ids[2] = id; + return 0; + } + if ( ids[0] ) { + ID tmp; + + if (id < ids[1]) { + tmp = ids[1]; + ids[1] = id; + id = tmp; + } + if ( ids[0] > 1 && id < ids[ids[0]] ) { + tmp = ids[ids[0]]; + ids[ids[0]] = id; + id = tmp; + } + } + ids[0]++; + if ( ids[0] >= MDB_IDL_UM_MAX ) { + ids[0] = NOID; + ids[2] = id; + } else { + ids[ids[0]] = id; + } + return 0; +} + +/* Append sorted list b to sorted list a. The result is unsorted but + * a[1] is the min of the result and a[a[0]] is the max. + */ +int mdb_idl_append( ID *a, ID *b ) +{ + ID ida, idb, tmp, swap = 0; + + if ( MDB_IDL_IS_ZERO( b ) ) { + return 0; + } + + if ( MDB_IDL_IS_ZERO( a ) ) { + MDB_IDL_CPY( a, b ); + return 0; + } + + ida = MDB_IDL_LAST( a ); + idb = MDB_IDL_LAST( b ); + if ( MDB_IDL_IS_RANGE( a ) || MDB_IDL_IS_RANGE(b) || + a[0] + b[0] >= MDB_IDL_UM_MAX ) { + a[2] = IDL_MAX( ida, idb ); + a[1] = IDL_MIN( a[1], b[1] ); + a[0] = NOID; + return 0; + } + + if ( b[0] > 1 && ida > idb ) { + swap = idb; + a[a[0]] = idb; + b[b[0]] = ida; + } + + if ( b[1] < a[1] ) { + tmp = a[1]; + a[1] = b[1]; + } else { + tmp = b[1]; + } + a[0]++; + a[a[0]] = tmp; + + if ( b[0] > 1 ) { + int i = b[0] - 1; + AC_MEMCPY(a+a[0]+1, b+2, i * sizeof(ID)); + a[0] += i; + } + if ( swap ) { + b[b[0]] = swap; + } + return 0; +} + +#if 1 + +/* Quicksort + Insertion sort for small arrays */ + +#define SMALL 8 +#define SWAP(a,b) itmp=(a);(a)=(b);(b)=itmp + +void +mdb_idl_sort( ID *ids, ID *tmp ) +{ + int *istack = (int *)tmp; + int i,j,k,l,ir,jstack; + ID a, itmp; + + if ( MDB_IDL_IS_RANGE( ids )) + return; + + ir = ids[0]; + l = 1; + jstack = 0; + for(;;) { + if (ir - l < SMALL) { /* Insertion sort */ + for (j=l+1;j<=ir;j++) { + a = ids[j]; + for (i=j-1;i>=1;i--) { + if (ids[i] <= a) break; + ids[i+1] = ids[i]; + } + ids[i+1] = a; + } + if (jstack == 0) break; + ir = istack[jstack--]; + l = istack[jstack--]; + } else { + k = (l + ir) >> 1; /* Choose median of left, center, right */ + SWAP(ids[k], ids[l+1]); + if (ids[l] > ids[ir]) { + SWAP(ids[l], ids[ir]); + } + if (ids[l+1] > ids[ir]) { + SWAP(ids[l+1], ids[ir]); + } + if (ids[l] > ids[l+1]) { + SWAP(ids[l], ids[l+1]); + } + i = l+1; + j = ir; + a = ids[l+1]; + for(;;) { + do i++; while(ids[i] < a); + do j--; while(ids[j] > a); + if (j < i) break; + SWAP(ids[i],ids[j]); + } + ids[l+1] = ids[j]; + ids[j] = a; + jstack += 2; + if (ir-i+1 >= j-1) { + istack[jstack] = ir; + istack[jstack-1] = i; + ir = j-1; + } else { + istack[jstack] = j-1; + istack[jstack-1] = l; + l = i; + } + } + } +} + +#else + +/* 8 bit Radix sort + insertion sort + * + * based on code from http://www.cubic.org/docs/radix.htm + * with improvements by mbackes@symas.com and hyc@symas.com + * + * This code is O(n) but has a relatively high constant factor. For lists + * up to ~50 Quicksort is slightly faster; up to ~100 they are even. + * Much faster than quicksort for lists longer than ~100. Insertion + * sort is actually superior for lists <50. + */ + +#define BUCKETS (1<<8) +#define SMALL 50 + +void +mdb_idl_sort( ID *ids, ID *tmp ) +{ + int count, soft_limit, phase = 0, size = ids[0]; + ID *idls[2]; + unsigned char *maxv = (unsigned char *)&ids[size]; + + if ( MDB_IDL_IS_RANGE( ids )) + return; + + /* Use insertion sort for small lists */ + if ( size <= SMALL ) { + int i,j; + ID a; + + for (j=1;j<=size;j++) { + a = ids[j]; + for (i=j-1;i>=1;i--) { + if (ids[i] <= a) break; + ids[i+1] = ids[i]; + } + ids[i+1] = a; + } + return; + } + + tmp[0] = size; + idls[0] = ids; + idls[1] = tmp; + +#if BYTE_ORDER == BIG_ENDIAN + for (soft_limit = 0; !maxv[soft_limit]; soft_limit++); +#else + for (soft_limit = sizeof(ID)-1; !maxv[soft_limit]; soft_limit--); +#endif + + for ( +#if BYTE_ORDER == BIG_ENDIAN + count = sizeof(ID)-1; count >= soft_limit; --count +#else + count = 0; count <= soft_limit; ++count +#endif + ) { + unsigned int num[BUCKETS], * np, n, sum; + int i; + ID *sp, *source, *dest; + unsigned char *bp, *source_start; + + source = idls[phase]+1; + dest = idls[phase^1]+1; + source_start = ((unsigned char *) source) + count; + + np = num; + for ( i = BUCKETS; i > 0; --i ) *np++ = 0; + + /* count occurences of every byte value */ + bp = source_start; + for ( i = size; i > 0; --i, bp += sizeof(ID) ) + num[*bp]++; + + /* transform count into index by summing elements and storing + * into same array + */ + sum = 0; + np = num; + for ( i = BUCKETS; i > 0; --i ) { + n = *np; + *np++ = sum; + sum += n; + } + + /* fill dest with the right values in the right place */ + bp = source_start; + sp = source; + for ( i = size; i > 0; --i, bp += sizeof(ID) ) { + np = num + *bp; + dest[*np] = *sp++; + ++(*np); + } + phase ^= 1; + } + + /* copy back from temp if needed */ + if ( phase ) { + ids++; tmp++; + for ( count = 0; count < size; ++count ) + *ids++ = *tmp++; + } +} +#endif /* Quick vs Radix */ + +#endif /* MDB_HIER */ diff --git a/servers/slapd/back-mdb/idl.h b/servers/slapd/back-mdb/idl.h new file mode 100644 index 0000000000..be70dbb96c --- /dev/null +++ b/servers/slapd/back-mdb/idl.h @@ -0,0 +1,74 @@ +/* idl.h - ldap mdb back-end ID list header file */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#ifndef _MDB_IDL_H_ +#define _MDB_IDL_H_ + +/* IDL sizes - likely should be even bigger + * limiting factors: sizeof(ID), thread stack size + */ +#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ +#define MDB_IDL_DB_SIZE (1<bi_lastid) ) +#define MDB_IDL_ALL( mdb, ids ) MDB_IDL_RANGE( ids, 1, ((mdb)->bi_lastid) ) + +#define MDB_IDL_FIRST( ids ) ( ids[1] ) +#define MDB_IDL_LAST( ids ) ( MDB_IDL_IS_RANGE(ids) \ + ? ids[2] : ids[ids[0]] ) + +#define MDB_IDL_N( ids ) ( MDB_IDL_IS_RANGE(ids) \ + ? (ids[2]-ids[1])+1 : ids[0] ) + +LDAP_BEGIN_DECL +LDAP_END_DECL + +#endif diff --git a/servers/slapd/back-mdb/index.c b/servers/slapd/back-mdb/index.c new file mode 100644 index 0000000000..7a9453bc71 --- /dev/null +++ b/servers/slapd/back-mdb/index.c @@ -0,0 +1,574 @@ +/* index.c - routines for dealing with attribute indexes */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include + +#include +#include + +#include "slap.h" +#include "back-mdb.h" +#include "lutil_hash.h" + +static char presence_keyval[] = {0,0}; +static struct berval presence_key = BER_BVC(presence_keyval); + +AttrInfo *mdb_index_mask( + Backend *be, + AttributeDescription *desc, + struct berval *atname ) +{ + AttributeType *at; + AttrInfo *ai = mdb_attr_mask( be->be_private, desc ); + + if( ai ) { + *atname = desc->ad_cname; + return ai; + } + + /* If there is a tagging option, did we ever index the base + * type? If so, check for mask, otherwise it's not there. + */ + if( slap_ad_is_tagged( desc ) && desc != desc->ad_type->sat_ad ) { + /* has tagging option */ + ai = mdb_attr_mask( be->be_private, desc->ad_type->sat_ad ); + + if ( ai && !( ai->ai_indexmask & SLAP_INDEX_NOTAGS ) ) { + *atname = desc->ad_type->sat_cname; + return ai; + } + } + + /* see if supertype defined mask for its subtypes */ + for( at = desc->ad_type; at != NULL ; at = at->sat_sup ) { + /* If no AD, we've never indexed this type */ + if ( !at->sat_ad ) continue; + + ai = mdb_attr_mask( be->be_private, at->sat_ad ); + + if ( ai && !( ai->ai_indexmask & SLAP_INDEX_NOSUBTYPES ) ) { + *atname = at->sat_cname; + return ai; + } + } + + return 0; +} + +/* This function is only called when evaluating search filters. + */ +int mdb_index_param( + Backend *be, + AttributeDescription *desc, + int ftype, + DB **dbp, + slap_mask_t *maskp, + struct berval *prefixp ) +{ + AttrInfo *ai; + int rc; + slap_mask_t mask, type = 0; + DB *db; + + ai = mdb_index_mask( be, desc, prefixp ); + + if ( !ai ) { +#ifdef MDB_MONITOR_IDX + switch ( ftype ) { + case LDAP_FILTER_PRESENT: + type = SLAP_INDEX_PRESENT; + break; + case LDAP_FILTER_APPROX: + type = SLAP_INDEX_APPROX; + break; + case LDAP_FILTER_EQUALITY: + type = SLAP_INDEX_EQUALITY; + break; + case LDAP_FILTER_SUBSTRINGS: + type = SLAP_INDEX_SUBSTR; + break; + default: + return LDAP_INAPPROPRIATE_MATCHING; + } + mdb_monitor_idx_add( be->be_private, desc, type ); +#endif /* MDB_MONITOR_IDX */ + + return LDAP_INAPPROPRIATE_MATCHING; + } + mask = ai->ai_indexmask; + + rc = mdb_db_cache( be, prefixp, &db ); + + if( rc != LDAP_SUCCESS ) { + return rc; + } + + switch( ftype ) { + case LDAP_FILTER_PRESENT: + type = SLAP_INDEX_PRESENT; + if( IS_SLAP_INDEX( mask, SLAP_INDEX_PRESENT ) ) { + *prefixp = presence_key; + goto done; + } + break; + + case LDAP_FILTER_APPROX: + type = SLAP_INDEX_APPROX; + if ( desc->ad_type->sat_approx ) { + if( IS_SLAP_INDEX( mask, SLAP_INDEX_APPROX ) ) { + goto done; + } + break; + } + + /* Use EQUALITY rule and index for approximate match */ + /* fall thru */ + + case LDAP_FILTER_EQUALITY: + type = SLAP_INDEX_EQUALITY; + if( IS_SLAP_INDEX( mask, SLAP_INDEX_EQUALITY ) ) { + goto done; + } + break; + + case LDAP_FILTER_SUBSTRINGS: + type = SLAP_INDEX_SUBSTR; + if( IS_SLAP_INDEX( mask, SLAP_INDEX_SUBSTR ) ) { + goto done; + } + break; + + default: + return LDAP_OTHER; + } + +#ifdef MDB_MONITOR_IDX + mdb_monitor_idx_add( be->be_private, desc, type ); +#endif /* MDB_MONITOR_IDX */ + + return LDAP_INAPPROPRIATE_MATCHING; + +done: + *dbp = db; + *maskp = mask; + return LDAP_SUCCESS; +} + +static int indexer( + Operation *op, + DB_TXN *txn, + AttributeDescription *ad, + struct berval *atname, + BerVarray vals, + ID id, + int opid, + slap_mask_t mask ) +{ + int rc, i; + DB *db; + struct berval *keys; + + assert( mask != 0 ); + + rc = mdb_db_cache( op->o_bd, atname, &db ); + + if ( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_ANY, + "mdb_index_read: Could not open DB %s\n", + atname->bv_val, 0, 0 ); + return LDAP_OTHER; + } + + if( IS_SLAP_INDEX( mask, SLAP_INDEX_PRESENT ) ) { + rc = mdb_key_change( op->o_bd, db, txn, &presence_key, id, opid ); + if( rc ) { + goto done; + } + } + + if( IS_SLAP_INDEX( mask, SLAP_INDEX_EQUALITY ) ) { + rc = ad->ad_type->sat_equality->smr_indexer( + LDAP_FILTER_EQUALITY, + mask, + ad->ad_type->sat_syntax, + ad->ad_type->sat_equality, + atname, vals, &keys, op->o_tmpmemctx ); + + if( rc == LDAP_SUCCESS && keys != NULL ) { + for( i=0; keys[i].bv_val != NULL; i++ ) { + rc = mdb_key_change( op->o_bd, db, txn, &keys[i], id, opid ); + if( rc ) { + ber_bvarray_free_x( keys, op->o_tmpmemctx ); + goto done; + } + } + ber_bvarray_free_x( keys, op->o_tmpmemctx ); + } + rc = LDAP_SUCCESS; + } + + if( IS_SLAP_INDEX( mask, SLAP_INDEX_APPROX ) ) { + rc = ad->ad_type->sat_approx->smr_indexer( + LDAP_FILTER_APPROX, + mask, + ad->ad_type->sat_syntax, + ad->ad_type->sat_approx, + atname, vals, &keys, op->o_tmpmemctx ); + + if( rc == LDAP_SUCCESS && keys != NULL ) { + for( i=0; keys[i].bv_val != NULL; i++ ) { + rc = mdb_key_change( op->o_bd, db, txn, &keys[i], id, opid ); + if( rc ) { + ber_bvarray_free_x( keys, op->o_tmpmemctx ); + goto done; + } + } + ber_bvarray_free_x( keys, op->o_tmpmemctx ); + } + + rc = LDAP_SUCCESS; + } + + if( IS_SLAP_INDEX( mask, SLAP_INDEX_SUBSTR ) ) { + rc = ad->ad_type->sat_substr->smr_indexer( + LDAP_FILTER_SUBSTRINGS, + mask, + ad->ad_type->sat_syntax, + ad->ad_type->sat_substr, + atname, vals, &keys, op->o_tmpmemctx ); + + if( rc == LDAP_SUCCESS && keys != NULL ) { + for( i=0; keys[i].bv_val != NULL; i++ ) { + rc = mdb_key_change( op->o_bd, db, txn, &keys[i], id, opid ); + if( rc ) { + ber_bvarray_free_x( keys, op->o_tmpmemctx ); + goto done; + } + } + ber_bvarray_free_x( keys, op->o_tmpmemctx ); + } + + rc = LDAP_SUCCESS; + } + +done: + switch( rc ) { + /* The callers all know how to deal with these results */ + case 0: + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + break; + /* Anything else is bad news */ + default: + rc = LDAP_OTHER; + } + return rc; +} + +static int index_at_values( + Operation *op, + DB_TXN *txn, + AttributeDescription *ad, + AttributeType *type, + struct berval *tags, + BerVarray vals, + ID id, + int opid ) +{ + int rc; + slap_mask_t mask = 0; + int ixop = opid; + AttrInfo *ai = NULL; + + if ( opid == MDB_INDEX_UPDATE_OP ) + ixop = SLAP_INDEX_ADD_OP; + + if( type->sat_sup ) { + /* recurse */ + rc = index_at_values( op, txn, NULL, + type->sat_sup, tags, + vals, id, opid ); + + if( rc ) return rc; + } + + /* If this type has no AD, we've never used it before */ + if( type->sat_ad ) { + ai = mdb_attr_mask( op->o_bd->be_private, type->sat_ad ); + if ( ai ) { +#ifdef LDAP_COMP_MATCH + /* component indexing */ + if ( ai->ai_cr ) { + ComponentReference *cr; + for( cr = ai->ai_cr ; cr ; cr = cr->cr_next ) { + rc = indexer( op, txn, cr->cr_ad, &type->sat_cname, + cr->cr_nvals, id, ixop, + cr->cr_indexmask ); + } + } +#endif + ad = type->sat_ad; + /* If we're updating the index, just set the new bits that aren't + * already in the old mask. + */ + if ( opid == MDB_INDEX_UPDATE_OP ) + mask = ai->ai_newmask & ~ai->ai_indexmask; + else + /* For regular updates, if there is a newmask use it. Otherwise + * just use the old mask. + */ + mask = ai->ai_newmask ? ai->ai_newmask : ai->ai_indexmask; + if( mask ) { + rc = indexer( op, txn, ad, &type->sat_cname, + vals, id, ixop, mask ); + + if( rc ) return rc; + } + } + } + + if( tags->bv_len ) { + AttributeDescription *desc; + + desc = ad_find_tags( type, tags ); + if( desc ) { + ai = mdb_attr_mask( op->o_bd->be_private, desc ); + + if( ai ) { + if ( opid == MDB_INDEX_UPDATE_OP ) + mask = ai->ai_newmask & ~ai->ai_indexmask; + else + mask = ai->ai_newmask ? ai->ai_newmask : ai->ai_indexmask; + if ( mask ) { + rc = indexer( op, txn, desc, &desc->ad_cname, + vals, id, ixop, mask ); + + if( rc ) { + return rc; + } + } + } + } + } + + return LDAP_SUCCESS; +} + +int mdb_index_values( + Operation *op, + DB_TXN *txn, + AttributeDescription *desc, + BerVarray vals, + ID id, + int opid ) +{ + int rc; + + /* Never index ID 0 */ + if ( id == 0 ) + return 0; + + rc = index_at_values( op, txn, desc, + desc->ad_type, &desc->ad_tags, + vals, id, opid ); + + return rc; +} + +/* Get the list of which indices apply to this attr */ +int +mdb_index_recset( + struct mdb_info *mdb, + Attribute *a, + AttributeType *type, + struct berval *tags, + IndexRec *ir ) +{ + int rc, slot; + AttrList *al; + + if( type->sat_sup ) { + /* recurse */ + rc = mdb_index_recset( mdb, a, type->sat_sup, tags, ir ); + if( rc ) return rc; + } + /* If this type has no AD, we've never used it before */ + if( type->sat_ad ) { + slot = mdb_attr_slot( mdb, type->sat_ad, NULL ); + if ( slot >= 0 ) { + ir[slot].ai = mdb->bi_attrs[slot]; + al = ch_malloc( sizeof( AttrList )); + al->attr = a; + al->next = ir[slot].attrs; + ir[slot].attrs = al; + } + } + if( tags->bv_len ) { + AttributeDescription *desc; + + desc = ad_find_tags( type, tags ); + if( desc ) { + slot = mdb_attr_slot( mdb, desc, NULL ); + if ( slot >= 0 ) { + ir[slot].ai = mdb->bi_attrs[slot]; + al = ch_malloc( sizeof( AttrList )); + al->attr = a; + al->next = ir[slot].attrs; + ir[slot].attrs = al; + } + } + } + return LDAP_SUCCESS; +} + +/* Apply the indices for the recset */ +int mdb_index_recrun( + Operation *op, + struct mdb_info *mdb, + IndexRec *ir0, + ID id, + int base ) +{ + IndexRec *ir; + AttrList *al; + int i, rc = 0; + + /* Never index ID 0 */ + if ( id == 0 ) + return 0; + + for (i=base; ibi_nattrs; i+=slap_tool_thread_max) { + ir = ir0 + i; + if ( !ir->ai ) continue; + while (( al = ir->attrs )) { + ir->attrs = al->next; + rc = indexer( op, NULL, ir->ai->ai_desc, + &ir->ai->ai_desc->ad_type->sat_cname, + al->attr->a_nvals, id, SLAP_INDEX_ADD_OP, + ir->ai->ai_indexmask ); + free( al ); + if ( rc ) break; + } + } + return rc; +} + +int +mdb_index_entry( + Operation *op, + DB_TXN *txn, + int opid, + Entry *e ) +{ + int rc; + Attribute *ap = e->e_attrs; +#if 0 /* ifdef LDAP_COMP_MATCH */ + ComponentReference *cr_list = NULL; + ComponentReference *cr = NULL, *dupped_cr = NULL; + void* decoded_comp; + ComponentSyntaxInfo* csi_attr; + Syntax* syn; + AttributeType* at; + int i, num_attr; + void* mem_op; + struct berval value = {0}; +#endif + + /* Never index ID 0 */ + if ( e->e_id == 0 ) + return 0; + + Debug( LDAP_DEBUG_TRACE, "=> index_entry_%s( %ld, \"%s\" )\n", + opid == SLAP_INDEX_DELETE_OP ? "del" : "add", + (long) e->e_id, e->e_dn ); + + /* add each attribute to the indexes */ + for ( ; ap != NULL; ap = ap->a_next ) { +#if 0 /* ifdef LDAP_COMP_MATCH */ + AttrInfo *ai; + /* see if attribute has components to be indexed */ + ai = mdb_attr_mask( op->o_bd->be_private, ap->a_desc->ad_type->sat_ad ); + if ( !ai ) continue; + cr_list = ai->ai_cr; + if ( attr_converter && cr_list ) { + syn = ap->a_desc->ad_type->sat_syntax; + ap->a_comp_data = op->o_tmpalloc( sizeof( ComponentData ), op->o_tmpmemctx ); + /* Memory chunk(nibble) pre-allocation for decoders */ + mem_op = nibble_mem_allocator ( 1024*16, 1024*4 ); + ap->a_comp_data->cd_mem_op = mem_op; + for( cr = cr_list ; cr ; cr = cr->cr_next ) { + /* count how many values in an attribute */ + for( num_attr=0; ap->a_vals[num_attr].bv_val != NULL; num_attr++ ); + num_attr++; + cr->cr_nvals = (BerVarray)op->o_tmpalloc( sizeof( struct berval )*num_attr, op->o_tmpmemctx ); + for( i=0; ap->a_vals[i].bv_val != NULL; i++ ) { + /* decoding attribute value */ + decoded_comp = attr_converter ( ap, syn, &ap->a_vals[i] ); + if ( !decoded_comp ) + return LDAP_DECODING_ERROR; + /* extracting the referenced component */ + dupped_cr = dup_comp_ref( op, cr ); + csi_attr = ((ComponentSyntaxInfo*)decoded_comp)->csi_comp_desc->cd_extract_i( mem_op, dupped_cr, decoded_comp ); + if ( !csi_attr ) + return LDAP_DECODING_ERROR; + cr->cr_asn_type_id = csi_attr->csi_comp_desc->cd_type_id; + cr->cr_ad = (AttributeDescription*)get_component_description ( cr->cr_asn_type_id ); + if ( !cr->cr_ad ) + return LDAP_INVALID_SYNTAX; + at = cr->cr_ad->ad_type; + /* encoding the value of component in GSER */ + rc = component_encoder( mem_op, csi_attr, &value ); + if ( rc != LDAP_SUCCESS ) + return LDAP_ENCODING_ERROR; + /* Normalize the encoded component values */ + if ( at->sat_equality && at->sat_equality->smr_normalize ) { + rc = at->sat_equality->smr_normalize ( + SLAP_MR_VALUE_OF_ATTRIBUTE_SYNTAX, + at->sat_syntax, at->sat_equality, + &value, &cr->cr_nvals[i], op->o_tmpmemctx ); + } else { + cr->cr_nvals[i] = value; + } + } + /* The end of BerVarray */ + cr->cr_nvals[num_attr-1].bv_val = NULL; + cr->cr_nvals[num_attr-1].bv_len = 0; + } + op->o_tmpfree( ap->a_comp_data, op->o_tmpmemctx ); + nibble_mem_free ( mem_op ); + ap->a_comp_data = NULL; + } +#endif + rc = mdb_index_values( op, txn, ap->a_desc, + ap->a_nvals, e->e_id, opid ); + + if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + "<= index_entry_%s( %ld, \"%s\" ) failure\n", + opid == SLAP_INDEX_ADD_OP ? "add" : "del", + (long) e->e_id, e->e_dn ); + return rc; + } + } + + Debug( LDAP_DEBUG_TRACE, "<= index_entry_%s( %ld, \"%s\" ) success\n", + opid == SLAP_INDEX_DELETE_OP ? "del" : "add", + (long) e->e_id, e->e_dn ); + + return LDAP_SUCCESS; +} diff --git a/servers/slapd/back-mdb/init.c b/servers/slapd/back-mdb/init.c new file mode 100644 index 0000000000..dcda605025 --- /dev/null +++ b/servers/slapd/back-mdb/init.c @@ -0,0 +1,448 @@ +/* init.c - initialize mdb backend */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include +#include +#include +#include +#include +#include "back-mdb.h" +#include +#include +#include "config.h" + +static const struct berval mdmi_databases[] = { + BER_BVC("ad2i"), + BER_BVC("dn2i"), + BER_BVC("id2e"), + BER_BVNULL +}; + +static int +mdb_db_init( BackendDB *be, ConfigReply *cr ) +{ + struct mdb_info *mdb; + int rc; + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_db_init) ": Initializing mdb database\n", + 0, 0, 0 ); + + /* allocate backend-database-specific stuff */ + mdb = (struct mdb_info *) ch_calloc( 1, sizeof(struct mdb_info) ); + + /* DBEnv parameters */ + mdb->mi_dbenv_home = ch_strdup( SLAPD_DEFAULT_DB_DIR ); + mdb->mi_dbenv_flags = 0; + mdb->mi_dbenv_mode = SLAPD_DEFAULT_DB_MODE; + + mdb->mi_search_stack_depth = DEFAULT_SEARCH_STACK_DEPTH; + mdb->mi_search_stack = NULL; + + ldap_pvt_thread_mutex_init( &mdb->mi_database_mutex ); + + be->be_private = mdb; + be->be_cf_ocs = be->bd_info->bi_cf_ocs; + +#ifndef MDB_MULTIPLE_SUFFIXES + SLAP_DBFLAGS( be ) |= SLAP_DBFLAG_ONE_SUFFIX; +#endif + + rc = mdb_monitor_db_init( be ); + + return rc; +} + +static int +mdb_db_close( BackendDB *be, ConfigReply *cr ); + +static int +mdb_db_open( BackendDB *be, ConfigReply *cr ) +{ + int rc, i; + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + struct stat stat1, stat2; + u_int32_t flags; + char path[MAXPATHLEN]; + char *dbhome; + Entry *e = NULL; + int alockt, quick = 0; + int do_retry = 1; + MDB_txn *txn; + + if ( be->be_suffix == NULL ) { + Debug( LDAP_DEBUG_ANY, + LDAP_XSTRING(mdb_db_open) ": need suffix.\n", + 1, 0, 0 ); + return -1; + } + + Debug( LDAP_DEBUG_ARGS, + LDAP_XSTRING(mdb_db_open) ": \"%s\"\n", + be->be_suffix[0].bv_val, 0, 0 ); + + /* Check existence of dbenv_home. Any error means trouble */ + rc = stat( mdb->mi_dbenv_home, &stat1 ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, + LDAP_XSTRING(mdb_db_open) ": database \"%s\": " + "cannot access database directory \"%s\" (%d).\n", + be->be_suffix[0].bv_val, mdb->mi_dbenv_home, errno ); + return -1; + } + + /* mdb is always clean */ + be->be_flags |= SLAP_DBFLAG_CLEAN; + + rc = mdb_env_create( &mdb->mi_dbenv ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, + LDAP_XSTRING(mdb_db_open) ": database \"%s\": " + "mdb_env_create failed: %s (%d).\n", + be->be_suffix[0].bv_val, mdb_strerror(rc), rc ); + goto fail; + } + + if ( mdb->mi_mapsize ) { + rc = mdb_env_set_mapsize( mdb->mi_dbenv, mdb->mi_mapsize ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, + LDAP_XSTRING(mdb_db_open) ": database \"%s\": " + "mdb_env_set_mapsize failed: %s (%d).\n", + be->be_suffix[0].bv_val, mdb_strerror(rc), rc ); + goto fail; + } + } + + rc = mdb_env_set_maxdbs( mdb->mi_dbenv, MDB_INDICES ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, + LDAP_XSTRING(mdb_db_open) ": database \"%s\": " + "mdb_env_set_maxdbs failed: %s (%d).\n", + be->be_suffix[0].bv_val, mdb_strerror(rc), rc ); + goto fail; + } + +#ifdef HAVE_EBCDIC + strcpy( path, mdb->mi_dbenv_home ); + __atoe( path ); + dbhome = path; +#else + dbhome = mdb->mi_dbenv_home; +#endif + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_db_open) ": database \"%s\": " + "dbenv_open(%s).\n", + be->be_suffix[0].bv_val, mdb->mi_dbenv_home, 0); + + flags = mdb->mi_dbenv_flags; + + if ( quick ) + flags |= MDB_NOSYNC; + + if ( slapMode & SLAP_TOOL_READONLY) + flags |= MDB_RDONLY; + + rc = mdb_env_open( mdb->mi_dbenv, dbhome, + flags, mdb->mi_dbenv_mode ); + + if ( rc ) { + Debug( LDAP_DEBUG_ANY, + LDAP_XSTRING(mdb_db_open) ": database \"%s\" cannot be opened, err %d. " + "Restore from backup!\n", + be->be_suffix[0].bv_val, rc, 0 ); + goto fail; + } + + mdb->mi_databases = (struct mdb_db_info **) ch_malloc( + MDB_INDICES * sizeof(struct mdb_db_info *) ); + + rc = mdb_txn_begin( mdb->mi_dbenv, 0, &txn ); + if ( rc ) { + Debug( LDAP_DEBUG_ANY, + LDAP_XSTRING(mdb_db_open) ": database \"%s\" cannot be opened, err %d. " + "Restore from backup!\n", + be->be_suffix[0].bv_val, rc, 0 ); + goto fail; + } + + /* open (and create) main databases */ + for( i = 0; mdmi_databases[i].bv_val; i++ ) { + struct mdb_db_info *db; + + db = (struct mdb_db_info *) ch_calloc(1, sizeof(struct mdb_db_info)); + + flags = MDB_INTEGERKEY; + if( i == MDB_ID2ENTRY ) { + if ( !(slapMode & (SLAP_TOOL_READMAIN|SLAP_TOOL_READONLY) )) + flags |= MDB_CREATE; + } else { + if ( i == MDB_DN2ID ) + flags |= MDB_DUPSORT; + if ( !(slapMode & SLAP_TOOL_READONLY) ) + flags |= MDB_CREATE; + } + + rc = mdb_open( txn, + mdmi_databases[i].bv_val, + flags, + &db->mdi_dbi ); + + if ( rc != 0 ) { + snprintf( cr->msg, sizeof(cr->msg), "database \"%s\": " + "mdb_open(%s/%s) failed: %s (%d).", + be->be_suffix[0].bv_val, + mdb->mi_dbenv_home, mdmi_databases[i].bv_val, + mdb_strerror(rc), rc ); + Debug( LDAP_DEBUG_ANY, + LDAP_XSTRING(mdb_db_open) ": %s\n", + cr->msg, 0, 0 ); + goto fail; + } + + db->mdi_name = mdmi_databases[i]; + mdb->mi_databases[i] = db; + } + + rc = mdb_txn_commit(txn); + if ( rc != 0 ) { + goto fail; + } + + mdb->mi_databases[i] = NULL; + mdb->mi_ndatabases = i; + + /* monitor setup */ + rc = mdb_monitor_db_open( be ); + if ( rc != 0 ) { + goto fail; + } + + mdb->mi_flags |= MDB_IS_OPEN; + + return 0; + +fail: + mdb_db_close( be, NULL ); + return rc; +} + +static int +mdb_db_close( BackendDB *be, ConfigReply *cr ) +{ + int rc; + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + struct mdb_db_info *db; + MDB_txn *txn; + + /* monitor handling */ + (void)mdb_monitor_db_close( be ); + + mdb->mi_flags &= ~MDB_IS_OPEN; + + if( mdb->mi_dbenv ) { + mdb_reader_flush( mdb->mi_dbenv ); + } + + rc = mdb_txn_begin( mdb->mi_dbenv, 1, &txn ); + + while( mdb->mi_databases && mdb->mi_ndatabases-- ) { + db = mdb->mi_databases[mdb->mi_ndatabases]; + mdb_close( txn, db->mdi_dbi ); + /* Lower numbered names are not strdup'd */ + if( mdb->mi_ndatabases >= MDB_NDB ) + free( db->mdi_name.bv_val ); + free( db ); + } + mdb_txn_abort( txn ); + + free( mdb->mi_databases ); + mdb->mi_databases = NULL; + + /* close db environment */ + if( mdb->mi_dbenv ) { + /* force a sync */ + rc = mdb_env_sync( mdb->mi_dbenv, 1 ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, + "mdb_db_close: database \"%s\": " + "mdb_env_sync failed: %s (%d).\n", + be->be_suffix[0].bv_val, mdb_strerror(rc), rc ); + } + + mdb_env_close( mdb->mi_dbenv ); + mdb->mi_dbenv = NULL; + } + + return 0; +} + +static int +mdb_db_destroy( BackendDB *be, ConfigReply *cr ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + + /* stop and remove checkpoint task */ + if ( mdb->mi_txn_cp_task ) { + struct re_s *re = mdb->mi_txn_cp_task; + mdb->mi_txn_cp_task = NULL; + ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex ); + if ( ldap_pvt_runqueue_isrunning( &slapd_rq, re ) ) + ldap_pvt_runqueue_stoptask( &slapd_rq, re ); + ldap_pvt_runqueue_remove( &slapd_rq, re ); + ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex ); + } + + /* monitor handling */ + (void)mdb_monitor_db_destroy( be ); + + if( mdb->mi_dbenv_home ) ch_free( mdb->mi_dbenv_home ); + + mdb_attr_index_destroy( mdb ); + + ldap_pvt_thread_mutex_destroy( &mdb->mi_database_mutex ); + + ch_free( mdb ); + be->be_private = NULL; + + return 0; +} + +int +mdb_back_initialize( + BackendInfo *bi ) +{ + int rc; + + static char *controls[] = { + LDAP_CONTROL_ASSERT, + LDAP_CONTROL_MANAGEDSAIT, + LDAP_CONTROL_NOOP, + LDAP_CONTROL_PAGEDRESULTS, + LDAP_CONTROL_PRE_READ, + LDAP_CONTROL_POST_READ, + LDAP_CONTROL_SUBENTRIES, + LDAP_CONTROL_X_PERMISSIVE_MODIFY, +#ifdef LDAP_X_TXN + LDAP_CONTROL_X_TXN_SPEC, +#endif + NULL + }; + + /* initialize the underlying database system */ + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_back_initialize) ": initialize " + MDB_UCTYPE " backend\n", 0, 0, 0 ); + + bi->bi_flags |= + SLAP_BFLAG_INCREMENT | + SLAP_BFLAG_SUBENTRIES | + SLAP_BFLAG_ALIASES | + SLAP_BFLAG_REFERRALS; + + bi->bi_controls = controls; + + { /* version check */ + int major, minor, patch, ver; + char *version = mdb_version( &major, &minor, &patch ); +#ifdef HAVE_EBCDIC + char v2[1024]; + + /* All our stdio does an ASCII to EBCDIC conversion on + * the output. Strings from the MDB library are already + * in EBCDIC; we have to go back and forth... + */ + strcpy( v2, version ); + __etoa( v2 ); + version = v2; +#endif + ver = (major << 24) | (minor << 16) | patch; + if( ver != MDB_VERSION_FULL ) { + /* fail if a versions don't match */ + Debug( LDAP_DEBUG_ANY, + LDAP_XSTRING(mdb_back_initialize) ": " + "MDB library version mismatch:" + " expected " MDB_VERSION_STRING "," + " got %s\n", version, 0, 0 ); + return -1; + } + + Debug( LDAP_DEBUG_TRACE, LDAP_XSTRING(mdb_back_initialize) + ": %s\n", version, 0, 0 ); + } + + bi->bi_open = 0; + bi->bi_close = 0; + bi->bi_config = 0; + bi->bi_destroy = 0; + + bi->bi_db_init = mdb_db_init; + bi->bi_db_config = config_generic_wrapper; + bi->bi_db_open = mdb_db_open; + bi->bi_db_close = mdb_db_close; + bi->bi_db_destroy = mdb_db_destroy; + + bi->bi_op_add = mdb_add; + bi->bi_op_bind = mdb_bind; + bi->bi_op_compare = mdb_compare; + bi->bi_op_delete = mdb_delete; + bi->bi_op_modify = mdb_modify; + bi->bi_op_modrdn = mdb_modrdn; + bi->bi_op_search = mdb_search; + + bi->bi_op_unbind = 0; + + bi->bi_extended = mdb_extended; + + bi->bi_chk_referrals = mdb_referrals; + bi->bi_operational = mdb_operational; + bi->bi_has_subordinates = mdb_hasSubordinates; + bi->bi_entry_release_rw = mdb_entry_release; + bi->bi_entry_get_rw = mdb_entry_get; + + /* + * hooks for slap tools + */ + bi->bi_tool_entry_open = mdb_tool_entry_open; + bi->bi_tool_entry_close = mdb_tool_entry_close; + bi->bi_tool_entry_first = backend_tool_entry_first; + bi->bi_tool_entry_first_x = mdb_tool_entry_first_x; + bi->bi_tool_entry_next = mdb_tool_entry_next; + bi->bi_tool_entry_get = mdb_tool_entry_get; + bi->bi_tool_entry_put = mdb_tool_entry_put; + bi->bi_tool_entry_reindex = mdb_tool_entry_reindex; + bi->bi_tool_sync = 0; + bi->bi_tool_dn2id_get = mdb_tool_dn2id_get; + bi->bi_tool_entry_modify = mdb_tool_entry_modify; + + bi->bi_connection_init = 0; + bi->bi_connection_destroy = 0; + + rc = mdb_back_init_cf( bi ); + + return rc; +} + +#if (SLAPD_MDB == SLAPD_MOD_DYNAMIC) + +SLAP_BACKEND_INIT_MODULE( mdb ) + +#endif /* SLAPD_MDB == SLAPD_MOD_DYNAMIC */ + diff --git a/servers/slapd/back-mdb/key.c b/servers/slapd/back-mdb/key.c new file mode 100644 index 0000000000..62377b5c91 --- /dev/null +++ b/servers/slapd/back-mdb/key.c @@ -0,0 +1,98 @@ +/* index.c - routines for dealing with attribute indexes */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include + +#include +#include + +#include "slap.h" +#include "back-mdb.h" +#include "idl.h" + +/* read a key */ +int +mdb_key_read( + Backend *be, + DB *db, + DB_TXN *txn, + struct berval *k, + ID *ids, + DBC **saved_cursor, + int get_flag +) +{ + int rc; + DBT key; + + Debug( LDAP_DEBUG_TRACE, "=> key_read\n", 0, 0, 0 ); + + DBTzero( &key ); + bv2DBT(k,&key); + key.ulen = key.size; + key.flags = DB_DBT_USERMEM; + + rc = mdb_idl_fetch_key( be, db, txn, &key, ids, saved_cursor, get_flag ); + + if( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, "<= mdb_index_read: failed (%d)\n", + rc, 0, 0 ); + } else { + Debug( LDAP_DEBUG_TRACE, "<= mdb_index_read %ld candidates\n", + (long) MDB_IDL_N(ids), 0, 0 ); + } + + return rc; +} + +/* Add or remove stuff from index files */ +int +mdb_key_change( + Backend *be, + DB *db, + DB_TXN *txn, + struct berval *k, + ID id, + int op +) +{ + int rc; + DBT key; + + Debug( LDAP_DEBUG_TRACE, "=> key_change(%s,%lx)\n", + op == SLAP_INDEX_ADD_OP ? "ADD":"DELETE", (long) id, 0 ); + + DBTzero( &key ); + bv2DBT(k,&key); + key.ulen = key.size; + key.flags = DB_DBT_USERMEM; + + if (op == SLAP_INDEX_ADD_OP) { + /* Add values */ + rc = mdb_idl_insert_key( be, db, txn, &key, id ); + if ( rc == DB_KEYEXIST ) rc = 0; + } else { + /* Delete values */ + rc = mdb_idl_delete_key( be, db, txn, &key, id ); + if ( rc == DB_NOTFOUND ) rc = 0; + } + + Debug( LDAP_DEBUG_TRACE, "<= key_change %d\n", rc, 0, 0 ); + + return rc; +} diff --git a/servers/slapd/back-mdb/mdb.c b/servers/slapd/back-mdb/mdb.c new file mode 100644 index 0000000000..d7246555ad --- /dev/null +++ b/servers/slapd/back-mdb/mdb.c @@ -0,0 +1,3246 @@ +/* mdb.c - memory-mapped database library */ +/* + * Copyright 2011 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + * + * This code is derived from btree.c written by Martin Hedenfalk. + * + * Copyright (c) 2009, 2010 Martin Hedenfalk + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#ifdef HAVE_SYS_FILE_H +#include +#endif +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mdb.h" + +#define ULONG unsigned long +typedef ULONG pgno_t; + +#include "midl.h" + +#ifndef DEBUG +#define DEBUG 1 +#endif + +#if DEBUG && defined(__GNUC__) +# define DPRINTF(fmt, ...) \ + fprintf(stderr, "%s:%d: " fmt "\n", __func__, __LINE__, ##__VA_ARGS__) +#else +# define DPRINTF(...) ((void) 0) +#endif + +#define PAGESIZE 4096 +#define MDB_MINKEYS 4 +#define MDB_MAGIC 0xBEEFC0DE +#define MDB_VERSION 1 +#define MAXKEYSIZE 511 + +#define P_INVALID (~0UL) + +#define F_ISSET(w, f) (((w) & (f)) == (f)) + +typedef uint16_t indx_t; + +#define DEFAULT_READERS 126 +#define DEFAULT_MAPSIZE 1048576 + +/* Lock descriptor stuff */ +#define RXBODY \ + ULONG mr_txnid; \ + pid_t mr_pid; \ + pthread_t mr_tid +typedef struct MDB_rxbody { + RXBODY; +} MDB_rxbody; + +#ifndef CACHELINE +# ifdef __APPLE__ +# define CACHELINE 128 /* 64 is too small to contain a mutex */ +# else +# define CACHELINE 64 /* most CPUs. Itanium uses 128 */ +# endif +#endif + +typedef struct MDB_reader { + RXBODY; + /* cache line alignment */ + char pad[CACHELINE-sizeof(MDB_rxbody)]; +} MDB_reader; + +#define TXBODY \ + uint32_t mt_magic; \ + uint32_t mt_version; \ + pthread_mutex_t mt_mutex; \ + ULONG mt_txnid; \ + uint32_t mt_numreaders +typedef struct MDB_txbody { + TXBODY; +} MDB_txbody; + +typedef struct MDB_txninfo { + TXBODY; + char pad[CACHELINE-sizeof(MDB_txbody)]; + pthread_mutex_t mt_wmutex; + char pad2[CACHELINE-sizeof(pthread_mutex_t)]; + MDB_reader mt_readers[1]; +} MDB_txninfo; + +/* Common header for all page types. Overflow pages + * occupy a number of contiguous pages with no + * headers on any page after the first. + */ +typedef struct MDB_page { /* represents a page of storage */ +#define mp_pgno mp_p.p_pgno + union padded { + pgno_t p_pgno; /* page number */ + void * p_pad; + } mp_p; +#define P_BRANCH 0x01 /* branch page */ +#define P_LEAF 0x02 /* leaf page */ +#define P_OVERFLOW 0x04 /* overflow page */ +#define P_META 0x08 /* meta page */ +#define P_DIRTY 0x10 /* dirty page */ + uint32_t mp_flags; +#define mp_lower mp_pb.pb.pb_lower +#define mp_upper mp_pb.pb.pb_upper +#define mp_pages mp_pb.pb_pages + union page_bounds { + struct { + indx_t pb_lower; /* lower bound of free space */ + indx_t pb_upper; /* upper bound of free space */ + } pb; + uint32_t pb_pages; /* number of overflow pages */ + } mp_pb; + indx_t mp_ptrs[1]; /* dynamic size */ +} MDB_page; + +#define PAGEHDRSZ ((unsigned) offsetof(MDB_page, mp_ptrs)) + +#define NUMKEYS(p) (((p)->mp_lower - PAGEHDRSZ) >> 1) +#define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower) +#define PAGEFILL(env, p) (1000L * ((env)->me_psize - PAGEHDRSZ - SIZELEFT(p)) / \ + ((env)->me_psize - PAGEHDRSZ)) +#define IS_LEAF(p) F_ISSET((p)->mp_flags, P_LEAF) +#define IS_BRANCH(p) F_ISSET((p)->mp_flags, P_BRANCH) +#define IS_OVERFLOW(p) F_ISSET((p)->mp_flags, P_OVERFLOW) + +#define OVPAGES(size, psize) (PAGEHDRSZ + size + psize - 1) / psize; + +typedef struct MDB_db { + uint32_t md_pad; + uint16_t md_flags; + uint16_t md_depth; + ULONG md_branch_pages; + ULONG md_leaf_pages; + ULONG md_overflow_pages; + ULONG md_entries; + pgno_t md_root; +} MDB_db; + +#define FREE_DBI 0 +#define MAIN_DBI 1 + +typedef struct MDB_meta { /* meta (footer) page content */ + uint32_t mm_magic; + uint32_t mm_version; + void *mm_address; /* address for fixed mapping */ + size_t mm_mapsize; /* size of mmap region */ + MDB_db mm_dbs[2]; /* first is free space, 2nd is main db */ +#define mm_psize mm_dbs[0].md_pad +#define mm_flags mm_dbs[0].md_flags + pgno_t mm_last_pg; /* last used page in file */ + ULONG mm_txnid; /* txnid that committed this page */ +} MDB_meta; + +typedef struct MDB_dhead { /* a dirty page */ + STAILQ_ENTRY(MDB_dpage) md_next; /* queue of dirty pages */ + MDB_page *md_parent; + unsigned md_pi; /* parent index */ + int md_num; +} MDB_dhead; + +typedef struct MDB_dpage { + MDB_dhead h; + MDB_page p; +} MDB_dpage; + +STAILQ_HEAD(dirty_queue, MDB_dpage); /* FIXME: use a sorted data structure */ + +typedef struct MDB_oldpages { + struct MDB_oldpages *mo_next; + ULONG mo_txnid; + pgno_t mo_pages[1]; /* dynamic */ +} MDB_oldpages; + +typedef struct MDB_pageparent { + MDB_page *mp_page; + MDB_page *mp_parent; + unsigned mp_pi; +} MDB_pageparent; + +static MDB_dpage *mdb_alloc_page(MDB_txn *txn, MDB_page *parent, unsigned int parent_idx, int num); +static int mdb_touch(MDB_txn *txn, MDB_pageparent *mp); + +typedef struct MDB_ppage { /* ordered list of pages */ + SLIST_ENTRY(MDB_ppage) mp_entry; + MDB_page *mp_page; + unsigned int mp_ki; /* cursor index on page */ +} MDB_ppage; +SLIST_HEAD(page_stack, MDB_ppage); + +/* FIXME: tree depth is mostly bounded, we should just + * use a fixed array and avoid malloc/pointer chasing + */ +#define CURSOR_EMPTY(c) SLIST_EMPTY(&(c)->mc_stack) +#define CURSOR_TOP(c) SLIST_FIRST(&(c)->mc_stack) +#define CURSOR_POP(c) SLIST_REMOVE_HEAD(&(c)->mc_stack, mp_entry) +#define CURSOR_PUSH(c,p) SLIST_INSERT_HEAD(&(c)->mc_stack, p, mp_entry) + +struct MDB_xcursor; + +struct MDB_cursor { + MDB_txn *mc_txn; + struct page_stack mc_stack; /* stack of parent pages */ + MDB_dbi mc_dbi; + short mc_initialized; /* 1 if initialized */ + short mc_eof; /* 1 if end is reached */ + struct MDB_xcursor *mc_xcursor; +}; + +#define METAHASHLEN offsetof(MDB_meta, mm_hash) +#define METADATA(p) ((void *)((char *)p + PAGEHDRSZ)) + +typedef struct MDB_node { +#define mn_pgno mn_p.np_pgno +#define mn_dsize mn_p.np_dsize + union { + pgno_t np_pgno; /* child page number */ + uint32_t np_dsize; /* leaf data size */ + } mn_p; + unsigned int mn_flags:4; + unsigned int mn_ksize:12; /* key size */ +#define F_BIGDATA 0x01 /* data put on overflow page */ +#define F_SUBDATA 0x02 /* data is a sub-database */ + char mn_data[1]; +} MDB_node; + +typedef struct MDB_dbx { + MDB_val md_name; + MDB_cmp_func *md_cmp; /* user compare function */ + MDB_cmp_func *md_dcmp; /* user dupsort function */ + MDB_rel_func *md_rel; /* user relocate function */ + MDB_dbi md_parent; + unsigned int md_dirty; +} MDB_dbx; + +struct MDB_txn { + pgno_t mt_next_pgno; /* next unallocated page */ + ULONG mt_txnid; + ULONG mt_oldest; + MDB_env *mt_env; + pgno_t *mt_free_pgs; /* this is an IDL */ + union { + struct dirty_queue *dirty_queue; /* modified pages */ + MDB_reader *reader; + } mt_u; + MDB_dbx *mt_dbxs; /* array */ + MDB_db *mt_dbs; + unsigned int mt_numdbs; + +#define MDB_TXN_RDONLY 0x01 /* read-only transaction */ +#define MDB_TXN_ERROR 0x02 /* an error has occurred */ +#define MDB_TXN_METOGGLE 0x04 /* used meta page 1 */ + unsigned int mt_flags; +}; + +/* Context for sorted-dup records */ +typedef struct MDB_xcursor { + MDB_cursor mx_cursor; + MDB_txn mx_txn; + MDB_dbx mx_dbxs[4]; + MDB_db mx_dbs[4]; +} MDB_xcursor; + +struct MDB_env { + int me_fd; + int me_lfd; + uint32_t me_flags; + unsigned int me_maxreaders; + unsigned int me_numdbs; + unsigned int me_maxdbs; + char *me_path; + char *me_map; + MDB_txninfo *me_txns; + MDB_meta *me_metas[2]; + MDB_meta *me_meta; + MDB_txn *me_txn; /* current write transaction */ + size_t me_mapsize; + off_t me_size; /* current file size */ + unsigned int me_psize; + int me_db_toggle; + MDB_dbx *me_dbxs; /* array */ + MDB_db *me_dbs[2]; + MDB_oldpages *me_pghead; + pthread_key_t me_txkey; /* thread-key for readers */ + pgno_t me_free_pgs[MDB_IDL_UM_SIZE]; +}; + +#define NODESIZE offsetof(MDB_node, mn_data) + +#define INDXSIZE(k) (NODESIZE + ((k) == NULL ? 0 : (k)->mv_size)) +#define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size) +#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i])) +#define NODEKEY(node) (void *)((node)->mn_data) +#define NODEDATA(node) (void *)((char *)(node)->mn_data + (node)->mn_ksize) +#define NODEPGNO(node) ((node)->mn_pgno) +#define NODEDSZ(node) ((node)->mn_dsize) + +#define MDB_COMMIT_PAGES 64 /* max number of pages to write in one commit */ +#define MDB_MAXCACHE_DEF 1024 /* max number of pages to keep in cache */ + +static int mdb_search_page_root(MDB_txn *txn, + MDB_dbi dbi, MDB_val *key, + MDB_cursor *cursor, int modify, + MDB_pageparent *mpp); +static int mdb_search_page(MDB_txn *txn, + MDB_dbi dbi, MDB_val *key, + MDB_cursor *cursor, int modify, + MDB_pageparent *mpp); + +static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); +static int mdb_env_read_meta(MDB_env *env, int *which); +static int mdb_env_write_meta(MDB_txn *txn); +static MDB_page *mdb_get_page(MDB_txn *txn, pgno_t pgno); + +static MDB_node *mdb_search_node(MDB_txn *txn, MDB_dbi dbi, MDB_page *mp, + MDB_val *key, int *exactp, unsigned int *kip); +static int mdb_add_node(MDB_txn *txn, MDB_dbi dbi, MDB_page *mp, + indx_t indx, MDB_val *key, MDB_val *data, + pgno_t pgno, uint8_t flags); +static void mdb_del_node(MDB_page *mp, indx_t indx); +static int mdb_del0(MDB_txn *txn, MDB_dbi dbi, unsigned int ki, + MDB_pageparent *mpp, MDB_node *leaf); +static int mdb_put0(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, unsigned int flags); +static int mdb_read_data(MDB_txn *txn, MDB_node *leaf, MDB_val *data); + +static int mdb_rebalance(MDB_txn *txn, MDB_dbi dbi, MDB_pageparent *mp); +static int mdb_update_key(MDB_page *mp, indx_t indx, MDB_val *key); +static int mdb_move_node(MDB_txn *txn, MDB_dbi dbi, + MDB_pageparent *src, indx_t srcindx, + MDB_pageparent *dst, indx_t dstindx); +static int mdb_merge(MDB_txn *txn, MDB_dbi dbi, MDB_pageparent *src, + MDB_pageparent *dst); +static int mdb_split(MDB_txn *txn, MDB_dbi dbi, MDB_page **mpp, + unsigned int *newindxp, MDB_val *newkey, + MDB_val *newdata, pgno_t newpgno); +static MDB_dpage *mdb_new_page(MDB_txn *txn, MDB_dbi dbi, uint32_t flags, int num); + +static void cursor_pop_page(MDB_cursor *cursor); +static MDB_ppage *cursor_push_page(MDB_cursor *cursor, + MDB_page *mp); + +static int mdb_set_key(MDB_node *node, MDB_val *key); +static int mdb_sibling(MDB_cursor *cursor, int move_right); +static int mdb_cursor_next(MDB_cursor *cursor, + MDB_val *key, MDB_val *data, MDB_cursor_op op); +static int mdb_cursor_prev(MDB_cursor *cursor, + MDB_val *key, MDB_val *data, MDB_cursor_op op); +static int mdb_cursor_set(MDB_cursor *cursor, + MDB_val *key, MDB_val *data, MDB_cursor_op op, int *exactp); +static int mdb_cursor_first(MDB_cursor *cursor, + MDB_val *key, MDB_val *data); +static int mdb_cursor_last(MDB_cursor *cursor, + MDB_val *key, MDB_val *data); + +static void mdb_xcursor_init0(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx); +static void mdb_xcursor_init1(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx, MDB_node *node); +static void mdb_xcursor_fini(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx); + +static size_t mdb_leaf_size(MDB_env *env, MDB_val *key, + MDB_val *data); +static size_t mdb_branch_size(MDB_env *env, MDB_val *key); + +static int memncmp(const void *s1, size_t n1, + const void *s2, size_t n2); +static int memnrcmp(const void *s1, size_t n1, + const void *s2, size_t n2); + +static int +memncmp(const void *s1, size_t n1, const void *s2, size_t n2) +{ + int diff, len_diff = -1; + + if (n1 >= n2) { + len_diff = (n1 > n2); + n1 = n2; + } + diff = memcmp(s1, s2, n1); + return diff ? diff : len_diff; +} + +static int +memnrcmp(const void *s1, size_t n1, const void *s2, size_t n2) +{ + const unsigned char *p1, *p2, *p1_lim; + + if (n2 == 0) + return n1 != 0; + if (n1 == 0) + return -1; + + p1 = (const unsigned char *)s1 + n1 - 1; + p2 = (const unsigned char *)s2 + n2 - 1; + + for (p1_lim = (n1 <= n2 ? s1 : s2); *p1 == *p2; p1--, p2--) { + if (p1 == p1_lim) + return (p1 != s1) ? (p1 != p2) : (p2 != s2) ? -1 : 0; + } + return *p1 - *p2; +} + +char * +mdb_version(int *maj, int *min, int *pat) +{ + *maj = MDB_VERSION_MAJOR; + *min = MDB_VERSION_MINOR; + *pat = MDB_VERSION_PATCH; + return MDB_VERSION_STRING; +} + +int +mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b) +{ + return txn->mt_dbxs[dbi].md_cmp(a, b); +} + +static int +_mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *key1, const MDB_val *key2) +{ + if (txn->mt_dbs[dbi].md_flags & (MDB_REVERSEKEY +#if __BYTE_ORDER == __LITTLE_ENDIAN + |MDB_INTEGERKEY +#endif + )) + return memnrcmp(key1->mv_data, key1->mv_size, key2->mv_data, key2->mv_size); + else + return memncmp((char *)key1->mv_data, key1->mv_size, key2->mv_data, key2->mv_size); +} + +/* Allocate new page(s) for writing */ +static MDB_dpage * +mdb_alloc_page(MDB_txn *txn, MDB_page *parent, unsigned int parent_idx, int num) +{ + MDB_dpage *dp; + pgno_t pgno = P_INVALID; + ULONG oldest; + + if (txn->mt_txnid > 2) { + + oldest = txn->mt_txnid - 2; + if (!txn->mt_env->me_pghead && txn->mt_dbs[FREE_DBI].md_root != P_INVALID) { + /* See if there's anything in the free DB */ + MDB_pageparent mpp; + MDB_node *leaf; + ULONG *kptr; + + mpp.mp_parent = NULL; + mpp.mp_pi = 0; + mdb_search_page(txn, FREE_DBI, NULL, NULL, 0, &mpp); + leaf = NODEPTR(mpp.mp_page, 0); + kptr = (ULONG *)NODEKEY(leaf); + + /* It's potentially usable, unless there are still + * older readers outstanding. Grab it. + */ + if (oldest > *kptr) { + MDB_oldpages *mop; + MDB_val data; + pgno_t *idl; + + mdb_read_data(txn, leaf, &data); + idl = (ULONG *)data.mv_data; + mop = malloc(sizeof(MDB_oldpages) + MDB_IDL_SIZEOF(idl) - sizeof(pgno_t)); + mop->mo_next = txn->mt_env->me_pghead; + mop->mo_txnid = *kptr; + txn->mt_env->me_pghead = mop; + memcpy(mop->mo_pages, idl, MDB_IDL_SIZEOF(idl)); + +#if DEBUG > 1 + { + unsigned int i; + DPRINTF("IDL read txn %lu root %lu num %lu", + mop->mo_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]); + for (i=0; imt_env->me_pghead) { + unsigned int i; + for (i=0; imt_env->me_txns->mt_numreaders; i++) { + ULONG mr = txn->mt_env->me_txns->mt_readers[i].mr_txnid; + if (!mr) continue; + if (mr < oldest) + oldest = txn->mt_env->me_txns->mt_readers[i].mr_txnid; + } + if (oldest > txn->mt_env->me_pghead->mo_txnid) { + MDB_oldpages *mop = txn->mt_env->me_pghead; + txn->mt_oldest = oldest; + if (num > 1) { + /* FIXME: For now, always use fresh pages. We + * really ought to search the free list for a + * contiguous range. + */ + ; + } else { + /* peel pages off tail, so we only have to truncate the list */ + pgno = MDB_IDL_LAST(mop->mo_pages); + if (MDB_IDL_IS_RANGE(mop->mo_pages)) { + mop->mo_pages[2]++; + if (mop->mo_pages[2] > mop->mo_pages[1]) + mop->mo_pages[0] = 0; + } else { + mop->mo_pages[0]--; + } + if (MDB_IDL_IS_ZERO(mop->mo_pages)) { + txn->mt_env->me_pghead = mop->mo_next; + free(mop); + } + } + } + } + } + + if ((dp = malloc(txn->mt_env->me_psize * num + sizeof(MDB_dhead))) == NULL) + return NULL; + dp->h.md_num = num; + dp->h.md_parent = parent; + dp->h.md_pi = parent_idx; + STAILQ_INSERT_TAIL(txn->mt_u.dirty_queue, dp, h.md_next); + if (pgno == P_INVALID) { + dp->p.mp_pgno = txn->mt_next_pgno; + txn->mt_next_pgno += num; + } else { + dp->p.mp_pgno = pgno; + } + + return dp; +} + +/* Touch a page: make it dirty and re-insert into tree with updated pgno. + */ +static int +mdb_touch(MDB_txn *txn, MDB_pageparent *pp) +{ + MDB_page *mp = pp->mp_page; + pgno_t pgno; + assert(txn != NULL); + assert(pp != NULL); + + if (!F_ISSET(mp->mp_flags, P_DIRTY)) { + MDB_dpage *dp; + if ((dp = mdb_alloc_page(txn, pp->mp_parent, pp->mp_pi, 1)) == NULL) + return ENOMEM; + DPRINTF("touched page %lu -> %lu", mp->mp_pgno, dp->p.mp_pgno); + mdb_midl_insert(txn->mt_free_pgs, mp->mp_pgno); + pgno = dp->p.mp_pgno; + memcpy(&dp->p, mp, txn->mt_env->me_psize); + mp = &dp->p; + mp->mp_pgno = pgno; + mp->mp_flags |= P_DIRTY; + + /* Update the page number to new touched page. */ + if (pp->mp_parent != NULL) + NODEPGNO(NODEPTR(pp->mp_parent, pp->mp_pi)) = mp->mp_pgno; + pp->mp_page = mp; + } + return 0; +} + +int +mdb_env_sync(MDB_env *env, int force) +{ + int rc = 0; + if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) { + if (fsync(env->me_fd)) + rc = errno; + } + return rc; +} + +int +mdb_txn_begin(MDB_env *env, int rdonly, MDB_txn **ret) +{ + MDB_txn *txn; + int rc, toggle; + + if ((txn = calloc(1, sizeof(MDB_txn))) == NULL) { + DPRINTF("calloc: %s", strerror(errno)); + return ENOMEM; + } + + if (rdonly) { + txn->mt_flags |= MDB_TXN_RDONLY; + } else { + txn->mt_u.dirty_queue = calloc(1, sizeof(*txn->mt_u.dirty_queue)); + if (txn->mt_u.dirty_queue == NULL) { + free(txn); + return ENOMEM; + } + STAILQ_INIT(txn->mt_u.dirty_queue); + + pthread_mutex_lock(&env->me_txns->mt_wmutex); + env->me_txns->mt_txnid++; + txn->mt_free_pgs = env->me_free_pgs; + txn->mt_free_pgs[0] = 0; + } + + txn->mt_txnid = env->me_txns->mt_txnid; + if (rdonly) { + MDB_reader *r = pthread_getspecific(env->me_txkey); + if (!r) { + unsigned int i; + pthread_mutex_lock(&env->me_txns->mt_mutex); + for (i=0; ime_txns->mt_numreaders; i++) + if (env->me_txns->mt_readers[i].mr_pid == 0) + break; + if (i == env->me_maxreaders) { + return ENOSPC; + } + env->me_txns->mt_readers[i].mr_pid = getpid(); + env->me_txns->mt_readers[i].mr_tid = pthread_self(); + r = &env->me_txns->mt_readers[i]; + pthread_setspecific(env->me_txkey, r); + if (i >= env->me_txns->mt_numreaders) + env->me_txns->mt_numreaders = i+1; + pthread_mutex_unlock(&env->me_txns->mt_mutex); + } + r->mr_txnid = txn->mt_txnid; + txn->mt_u.reader = r; + } else { + env->me_txn = txn; + } + + txn->mt_env = env; + + if ((rc = mdb_env_read_meta(env, &toggle)) != MDB_SUCCESS) { + mdb_txn_abort(txn); + return rc; + } + + /* Copy the DB arrays */ + txn->mt_numdbs = env->me_numdbs; + txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ + txn->mt_dbs = malloc(env->me_maxdbs * sizeof(MDB_db)); + memcpy(txn->mt_dbs, env->me_meta->mm_dbs, 2 * sizeof(MDB_db)); + if (txn->mt_numdbs > 2) + memcpy(txn->mt_dbs+2, env->me_dbs[env->me_db_toggle]+2, + (txn->mt_numdbs - 2) * sizeof(MDB_db)); + + if (!rdonly) { + if (toggle) + txn->mt_flags |= MDB_TXN_METOGGLE; + txn->mt_next_pgno = env->me_meta->mm_last_pg+1; + } + + DPRINTF("begin transaction %lu on mdbenv %p, root page %lu", + txn->mt_txnid, (void *) env, txn->mt_dbs[MAIN_DBI].md_root); + + *ret = txn; + return MDB_SUCCESS; +} + +void +mdb_txn_abort(MDB_txn *txn) +{ + MDB_dpage *dp; + MDB_env *env; + + if (txn == NULL) + return; + + env = txn->mt_env; + DPRINTF("abort transaction %lu on mdbenv %p, root page %lu", + txn->mt_txnid, (void *) env, txn->mt_dbs[MAIN_DBI].md_root); + + free(txn->mt_dbs); + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + txn->mt_u.reader->mr_txnid = 0; + } else { + MDB_oldpages *mop; + unsigned int i; + + /* Discard all dirty pages. */ + while (!STAILQ_EMPTY(txn->mt_u.dirty_queue)) { + dp = STAILQ_FIRST(txn->mt_u.dirty_queue); + STAILQ_REMOVE_HEAD(txn->mt_u.dirty_queue, h.md_next); + free(dp); + } + free(txn->mt_u.dirty_queue); + + while ((mop = txn->mt_env->me_pghead)) { + txn->mt_env->me_pghead = mop->mo_next; + free(mop); + } + + env->me_txn = NULL; + env->me_txns->mt_txnid--; + for (i=2; ime_numdbs; i++) + env->me_dbxs[i].md_dirty = 0; + pthread_mutex_unlock(&env->me_txns->mt_wmutex); + } + + free(txn); +} + +int +mdb_txn_commit(MDB_txn *txn) +{ + int n, done; + unsigned int i; + ssize_t rc; + off_t size; + MDB_dpage *dp; + MDB_env *env; + pgno_t next; + struct iovec iov[MDB_COMMIT_PAGES]; + + assert(txn != NULL); + assert(txn->mt_env != NULL); + + env = txn->mt_env; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + DPRINTF("attempt to commit read-only transaction"); + mdb_txn_abort(txn); + return EPERM; + } + + if (txn != env->me_txn) { + DPRINTF("attempt to commit unknown transaction"); + mdb_txn_abort(txn); + return EINVAL; + } + + if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) { + DPRINTF("error flag is set, can't commit"); + mdb_txn_abort(txn); + return EINVAL; + } + + if (STAILQ_EMPTY(txn->mt_u.dirty_queue)) + goto done; + + DPRINTF("committing transaction %lu on mdbenv %p, root page %lu", + txn->mt_txnid, (void *) env, txn->mt_dbs[MAIN_DBI].md_root); + + /* should only be one record now */ + if (env->me_pghead) { + MDB_val key, data; + MDB_oldpages *mop; + + mop = env->me_pghead; + key.mv_size = sizeof(pgno_t); + key.mv_data = (char *)&mop->mo_txnid; + data.mv_size = MDB_IDL_SIZEOF(mop->mo_pages); + data.mv_data = mop->mo_pages; + mdb_put0(txn, FREE_DBI, &key, &data, 0); + free(env->me_pghead); + env->me_pghead = NULL; + } + /* save to free list */ + if (!MDB_IDL_IS_ZERO(txn->mt_free_pgs)) { + MDB_val key, data; + MDB_pageparent mpp; + + /* make sure last page of freeDB is touched and on freelist */ + key.mv_size = MAXKEYSIZE+1; + key.mv_data = NULL; + mpp.mp_parent = NULL; + mpp.mp_pi = 0; + mdb_search_page(txn, FREE_DBI, &key, NULL, 1, &mpp); + +#if DEBUG > 1 + { + unsigned int i; + ULONG *idl = txn->mt_free_pgs; + DPRINTF("IDL write txn %lu root %lu num %lu", + txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]); + for (i=0; imt_txnid; + data.mv_size = MDB_IDL_SIZEOF(txn->mt_free_pgs); + data.mv_data = txn->mt_free_pgs; + mdb_put0(txn, FREE_DBI, &key, &data, 0); + } + + /* Update DB root pointers. Their pages have already been + * touched so this is all in-place and cannot fail. + */ + { + MDB_val data; + data.mv_size = sizeof(MDB_db); + + for (i = 2; i < txn->mt_numdbs; i++) { + if (txn->mt_dbxs[i].md_dirty) { + data.mv_data = &txn->mt_dbs[i]; + mdb_put0(txn, MAIN_DBI, &txn->mt_dbxs[i].md_name, &data, 0); + } + } + } + + /* Commit up to MDB_COMMIT_PAGES dirty pages to disk until done. + */ + next = 0; + do { + n = 0; + done = 1; + size = 0; + STAILQ_FOREACH(dp, txn->mt_u.dirty_queue, h.md_next) { + if (dp->p.mp_pgno != next) { + if (n) { + DPRINTF("committing %u dirty pages", n); + rc = writev(env->me_fd, iov, n); + if (rc != size) { + n = errno; + if (rc > 0) + DPRINTF("short write, filesystem full?"); + else + DPRINTF("writev: %s", strerror(errno)); + mdb_txn_abort(txn); + return n; + } + n = 0; + size = 0; + } + lseek(env->me_fd, dp->p.mp_pgno * env->me_psize, SEEK_SET); + next = dp->p.mp_pgno; + } + DPRINTF("committing page %lu", dp->p.mp_pgno); + iov[n].iov_len = env->me_psize * dp->h.md_num; + iov[n].iov_base = &dp->p; + size += iov[n].iov_len; + next = dp->p.mp_pgno + dp->h.md_num; + /* clear dirty flag */ + dp->p.mp_flags &= ~P_DIRTY; + if (++n >= MDB_COMMIT_PAGES) { + done = 0; + break; + } + } + + if (n == 0) + break; + + DPRINTF("committing %u dirty pages", n); + rc = writev(env->me_fd, iov, n); + if (rc != size) { + n = errno; + if (rc > 0) + DPRINTF("short write, filesystem full?"); + else + DPRINTF("writev: %s", strerror(errno)); + mdb_txn_abort(txn); + return n; + } + + } while (!done); + + /* Drop the dirty pages. + */ + while (!STAILQ_EMPTY(txn->mt_u.dirty_queue)) { + dp = STAILQ_FIRST(txn->mt_u.dirty_queue); + STAILQ_REMOVE_HEAD(txn->mt_u.dirty_queue, h.md_next); + free(dp); + } + + if ((n = mdb_env_sync(env, 0)) != 0 || + (n = mdb_env_write_meta(txn)) != MDB_SUCCESS || + (n = mdb_env_sync(env, 0)) != 0) { + mdb_txn_abort(txn); + return n; + } + env->me_txn = NULL; + + /* update the DB tables */ + { + int toggle = !env->me_db_toggle; + + for (i = 2; i < env->me_numdbs; i++) { + if (txn->mt_dbxs[i].md_dirty) { + env->me_dbs[toggle][i] = txn->mt_dbs[i]; + txn->mt_dbxs[i].md_dirty = 0; + } + } + for (i = env->me_numdbs; i < txn->mt_numdbs; i++) { + txn->mt_dbxs[i].md_dirty = 0; + env->me_dbxs[i] = txn->mt_dbxs[i]; + env->me_dbs[toggle][i] = txn->mt_dbs[i]; + } + env->me_db_toggle = toggle; + env->me_numdbs = txn->mt_numdbs; + + free(txn->mt_dbs); + } + + pthread_mutex_unlock(&env->me_txns->mt_wmutex); + free(txn->mt_u.dirty_queue); + free(txn); + txn = NULL; + +done: + mdb_txn_abort(txn); + + return MDB_SUCCESS; +} + +static int +mdb_env_read_header(MDB_env *env, MDB_meta *meta) +{ + char page[PAGESIZE]; + MDB_page *p; + MDB_meta *m; + int rc; + + assert(env != NULL); + + /* We don't know the page size yet, so use a minimum value. + */ + + if ((rc = pread(env->me_fd, page, PAGESIZE, 0)) == 0) { + return ENOENT; + } else if (rc != PAGESIZE) { + if (rc > 0) + errno = EINVAL; + DPRINTF("read: %s", strerror(errno)); + return errno; + } + + p = (MDB_page *)page; + + if (!F_ISSET(p->mp_flags, P_META)) { + DPRINTF("page %lu not a meta page", p->mp_pgno); + return EINVAL; + } + + m = METADATA(p); + if (m->mm_magic != MDB_MAGIC) { + DPRINTF("meta has invalid magic"); + return EINVAL; + } + + if (m->mm_version != MDB_VERSION) { + DPRINTF("database is version %u, expected version %u", + m->mm_version, MDB_VERSION); + return MDB_VERSION_MISMATCH; + } + + memcpy(meta, m, sizeof(*m)); + return 0; +} + +static int +mdb_env_init_meta(MDB_env *env, MDB_meta *meta) +{ + MDB_page *p, *q; + MDB_meta *m; + int rc; + unsigned int psize; + + DPRINTF("writing new meta page"); + psize = sysconf(_SC_PAGE_SIZE); + + meta->mm_magic = MDB_MAGIC; + meta->mm_version = MDB_VERSION; + meta->mm_psize = psize; + meta->mm_last_pg = 1; + meta->mm_flags = env->me_flags & 0xffff; + meta->mm_flags |= MDB_INTEGERKEY; + meta->mm_dbs[0].md_root = P_INVALID; + meta->mm_dbs[1].md_root = P_INVALID; + + p = calloc(2, psize); + p->mp_pgno = 0; + p->mp_flags = P_META; + + m = METADATA(p); + memcpy(m, meta, sizeof(*meta)); + + q = (MDB_page *)((char *)p + psize); + + q->mp_pgno = 1; + q->mp_flags = P_META; + + m = METADATA(q); + memcpy(m, meta, sizeof(*meta)); + + rc = write(env->me_fd, p, psize * 2); + free(p); + return (rc == (int)psize * 2) ? MDB_SUCCESS : errno; +} + +static int +mdb_env_write_meta(MDB_txn *txn) +{ + MDB_env *env; + MDB_meta meta; + off_t off; + int rc, len; + char *ptr; + + assert(txn != NULL); + assert(txn->mt_env != NULL); + + DPRINTF("writing meta page %d for root page %lu", + !F_ISSET(txn->mt_flags, MDB_TXN_METOGGLE), txn->mt_dbs[MAIN_DBI].md_root); + + env = txn->mt_env; + + ptr = (char *)&meta; + off = offsetof(MDB_meta, mm_dbs[0].md_depth); + len = sizeof(MDB_meta) - off; + + ptr += off; + meta.mm_dbs[0] = txn->mt_dbs[0]; + meta.mm_dbs[1] = txn->mt_dbs[1]; + meta.mm_last_pg = txn->mt_next_pgno - 1; + meta.mm_txnid = txn->mt_txnid; + + if (!F_ISSET(txn->mt_flags, MDB_TXN_METOGGLE)) + off += env->me_psize; + off += PAGEHDRSZ; + + lseek(env->me_fd, off, SEEK_SET); + rc = write(env->me_fd, ptr, len); + if (rc != len) { + DPRINTF("write failed, disk error?"); + return errno; + } + + return MDB_SUCCESS; +} + +static int +mdb_env_read_meta(MDB_env *env, int *which) +{ + int toggle = 0; + + assert(env != NULL); + + if (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid) + toggle = 1; + + if (env->me_meta != env->me_metas[toggle]) + env->me_meta = env->me_metas[toggle]; + if (which) + *which = toggle; + + DPRINTF("Using meta page %d", toggle); + + return MDB_SUCCESS; +} + +int +mdb_env_create(MDB_env **env) +{ + MDB_env *e; + + e = calloc(1, sizeof(MDB_env)); + if (!e) return ENOMEM; + + e->me_maxreaders = DEFAULT_READERS; + e->me_maxdbs = 2; + e->me_fd = -1; + e->me_lfd = -1; + *env = e; + return MDB_SUCCESS; +} + +int +mdb_env_set_mapsize(MDB_env *env, size_t size) +{ + if (env->me_map) + return EINVAL; + env->me_mapsize = size; + return MDB_SUCCESS; +} + +int +mdb_env_set_maxdbs(MDB_env *env, int dbs) +{ + env->me_maxdbs = dbs; + return MDB_SUCCESS; +} + +int +mdb_env_set_maxreaders(MDB_env *env, int readers) +{ + env->me_maxreaders = readers; + return MDB_SUCCESS; +} + +int +mdb_env_get_maxreaders(MDB_env *env, int *readers) +{ + if (!env || !readers) + return EINVAL; + *readers = env->me_maxreaders; + return MDB_SUCCESS; +} + +static int +mdb_env_open2(MDB_env *env, unsigned int flags) +{ + int i, newenv = 0; + MDB_meta meta; + MDB_page *p; + + env->me_flags = flags; + + memset(&meta, 0, sizeof(meta)); + + if ((i = mdb_env_read_header(env, &meta)) != 0) { + if (i != ENOENT) + return i; + DPRINTF("new mdbenv"); + newenv = 1; + } + + if (!env->me_mapsize) { + env->me_mapsize = newenv ? DEFAULT_MAPSIZE : meta.mm_mapsize; + } + + i = MAP_SHARED; + if (meta.mm_address && (flags & MDB_FIXEDMAP)) + i |= MAP_FIXED; + env->me_map = mmap(meta.mm_address, env->me_mapsize, PROT_READ, i, + env->me_fd, 0); + if (env->me_map == MAP_FAILED) + return errno; + + if (newenv) { + meta.mm_mapsize = env->me_mapsize; + if (flags & MDB_FIXEDMAP) + meta.mm_address = env->me_map; + i = mdb_env_init_meta(env, &meta); + if (i != MDB_SUCCESS) { + munmap(env->me_map, env->me_mapsize); + return i; + } + } + env->me_psize = meta.mm_psize; + + p = (MDB_page *)(MDB_page *)(MDB_page *)(MDB_page *)(MDB_page *)(MDB_page *)(MDB_page *)(MDB_page *)(MDB_page *)env->me_map; + env->me_metas[0] = METADATA(p); + env->me_metas[1] = (MDB_meta *)((char *)env->me_metas[0] + meta.mm_psize); + + if ((i = mdb_env_read_meta(env, NULL)) != 0) + return i; + + DPRINTF("opened database version %u, pagesize %u", + env->me_meta->mm_version, env->me_psize); + DPRINTF("depth: %u", env->me_meta->mm_dbs[MAIN_DBI].md_depth); + DPRINTF("entries: %lu", env->me_meta->mm_dbs[MAIN_DBI].md_entries); + DPRINTF("branch pages: %lu", env->me_meta->mm_dbs[MAIN_DBI].md_branch_pages); + DPRINTF("leaf pages: %lu", env->me_meta->mm_dbs[MAIN_DBI].md_leaf_pages); + DPRINTF("overflow pages: %lu", env->me_meta->mm_dbs[MAIN_DBI].md_overflow_pages); + DPRINTF("root: %lu", env->me_meta->mm_dbs[MAIN_DBI].md_root); + + return MDB_SUCCESS; +} + +static void +mdb_env_reader_dest(void *ptr) +{ + MDB_reader *reader = ptr; + + reader->mr_txnid = 0; + reader->mr_pid = 0; + reader->mr_tid = 0; +} + +/* downgrade the exclusive lock on the region back to shared */ +static void +mdb_env_share_locks(MDB_env *env) +{ + struct flock lock_info; + + env->me_txns->mt_txnid = env->me_meta->mm_txnid; + + memset((void *)&lock_info, 0, sizeof(lock_info)); + lock_info.l_type = F_RDLCK; + lock_info.l_whence = SEEK_SET; + lock_info.l_start = 0; + lock_info.l_len = 1; + fcntl(env->me_lfd, F_SETLK, &lock_info); +} + +static int +mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) +{ + int rc; + off_t size, rsize; + struct flock lock_info; + + *excl = 0; + + if ((env->me_lfd = open(lpath, O_RDWR|O_CREAT, mode)) == -1) { + rc = errno; + return rc; + } + /* Try to get exclusive lock. If we succeed, then + * nobody is using the lock region and we should initialize it. + */ + memset((void *)&lock_info, 0, sizeof(lock_info)); + lock_info.l_type = F_WRLCK; + lock_info.l_whence = SEEK_SET; + lock_info.l_start = 0; + lock_info.l_len = 1; + rc = fcntl(env->me_lfd, F_SETLK, &lock_info); + if (rc == 0) { + *excl = 1; + } else { + lock_info.l_type = F_RDLCK; + rc = fcntl(env->me_lfd, F_SETLK, &lock_info); + if (rc) { + rc = errno; + goto fail; + } + } + size = lseek(env->me_lfd, 0, SEEK_END); + rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo); + if (size < rsize && *excl) { + if (ftruncate(env->me_lfd, rsize) != 0) { + rc = errno; + goto fail; + } + } else { + rsize = size; + size = rsize - sizeof(MDB_txninfo); + env->me_maxreaders = size/sizeof(MDB_reader) + 1; + } + env->me_txns = mmap(0, rsize, PROT_READ|PROT_WRITE, MAP_SHARED, + env->me_lfd, 0); + if (env->me_txns == MAP_FAILED) { + rc = errno; + goto fail; + } + if (*excl) { + pthread_mutexattr_t mattr; + + pthread_mutexattr_init(&mattr); + pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED); + pthread_mutex_init(&env->me_txns->mt_mutex, &mattr); + pthread_mutex_init(&env->me_txns->mt_wmutex, &mattr); + env->me_txns->mt_version = MDB_VERSION; + env->me_txns->mt_magic = MDB_MAGIC; + env->me_txns->mt_txnid = 0; + env->me_txns->mt_numreaders = 0; + + } else { + if (env->me_txns->mt_magic != MDB_MAGIC) { + DPRINTF("lock region has invalid magic"); + rc = EINVAL; + goto fail; + } + if (env->me_txns->mt_version != MDB_VERSION) { + DPRINTF("lock region is version %u, expected version %u", + env->me_txns->mt_version, MDB_VERSION); + rc = MDB_VERSION_MISMATCH; + goto fail; + } + if (errno != EACCES && errno != EAGAIN) { + rc = errno; + goto fail; + } + } + return MDB_SUCCESS; + +fail: + close(env->me_lfd); + return rc; + +} + +#define LOCKNAME "/lock.mdb" +#define DATANAME "/data.mdb" +int +mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode) +{ + int oflags, rc, len, excl; + char *lpath, *dpath; + + len = strlen(path); + lpath = malloc(len + sizeof(LOCKNAME) + len + sizeof(DATANAME)); + if (!lpath) + return ENOMEM; + dpath = lpath + len + sizeof(LOCKNAME); + sprintf(lpath, "%s" LOCKNAME, path); + sprintf(dpath, "%s" DATANAME, path); + + rc = mdb_env_setup_locks(env, lpath, mode, &excl); + if (rc) + goto leave; + + if (F_ISSET(flags, MDB_RDONLY)) + oflags = O_RDONLY; + else + oflags = O_RDWR | O_CREAT; + + if ((env->me_fd = open(dpath, oflags, mode)) == -1) + return errno; + + if ((rc = mdb_env_open2(env, flags)) != MDB_SUCCESS) { + close(env->me_fd); + env->me_fd = -1; + } else { + env->me_path = strdup(path); + DPRINTF("opened dbenv %p", (void *) env); + pthread_key_create(&env->me_txkey, mdb_env_reader_dest); + if (excl) + mdb_env_share_locks(env); + env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx)); + env->me_dbs[0] = calloc(env->me_maxdbs, sizeof(MDB_db)); + env->me_dbs[1] = calloc(env->me_maxdbs, sizeof(MDB_db)); + env->me_numdbs = 2; + } + +leave: + free(lpath); + return rc; +} + +void +mdb_env_close(MDB_env *env) +{ + if (env == NULL) + return; + + free(env->me_dbs[1]); + free(env->me_dbs[0]); + free(env->me_dbxs); + free(env->me_path); + + if (env->me_map) { + munmap(env->me_map, env->me_mapsize); + } + close(env->me_fd); + if (env->me_txns) { + size_t size = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo); + munmap(env->me_txns, size); + } + close(env->me_lfd); + free(env); +} + +/* Search for key within a leaf page, using binary search. + * Returns the smallest entry larger or equal to the key. + * If exactp is non-null, stores whether the found entry was an exact match + * in *exactp (1 or 0). + * If kip is non-null, stores the index of the found entry in *kip. + * If no entry larger or equal to the key is found, returns NULL. + */ +static MDB_node * +mdb_search_node(MDB_txn *txn, MDB_dbi dbi, MDB_page *mp, MDB_val *key, + int *exactp, unsigned int *kip) +{ + unsigned int i = 0; + int low, high; + int rc = 0; + MDB_node *node; + MDB_val nodekey; + + DPRINTF("searching %u keys in %s page %lu", + NUMKEYS(mp), + IS_LEAF(mp) ? "leaf" : "branch", + mp->mp_pgno); + + assert(NUMKEYS(mp) > 0); + + memset(&nodekey, 0, sizeof(nodekey)); + + low = IS_LEAF(mp) ? 0 : 1; + high = NUMKEYS(mp) - 1; + while (low <= high) { + i = (low + high) >> 1; + node = NODEPTR(mp, i); + + nodekey.mv_size = node->mn_ksize; + nodekey.mv_data = NODEKEY(node); + + if (txn->mt_dbxs[dbi].md_cmp) + rc = txn->mt_dbxs[dbi].md_cmp(key, &nodekey); + else + rc = _mdb_cmp(txn, dbi, key, &nodekey); + + if (IS_LEAF(mp)) + DPRINTF("found leaf index %u [%.*s], rc = %i", + i, (int)nodekey.mv_size, (char *)nodekey.mv_data, rc); + else + DPRINTF("found branch index %u [%.*s -> %lu], rc = %i", + i, (int)node->mn_ksize, (char *)NODEKEY(node), + node->mn_pgno, rc); + + if (rc == 0) + break; + if (rc > 0) + low = i + 1; + else + high = i - 1; + } + + if (rc > 0) { /* Found entry is less than the key. */ + i++; /* Skip to get the smallest entry larger than key. */ + if (i >= NUMKEYS(mp)) + /* There is no entry larger or equal to the key. */ + return NULL; + } + if (exactp) + *exactp = (rc == 0); + if (kip) /* Store the key index if requested. */ + *kip = i; + + return NODEPTR(mp, i); +} + +static void +cursor_pop_page(MDB_cursor *cursor) +{ + MDB_ppage *top; + + top = CURSOR_TOP(cursor); + CURSOR_POP(cursor); + + DPRINTF("popped page %lu off cursor %p", top->mp_page->mp_pgno, (void *) cursor); + + free(top); +} + +static MDB_ppage * +cursor_push_page(MDB_cursor *cursor, MDB_page *mp) +{ + MDB_ppage *ppage; + + DPRINTF("pushing page %lu on cursor %p", mp->mp_pgno, (void *) cursor); + + if ((ppage = calloc(1, sizeof(MDB_ppage))) == NULL) + return NULL; + ppage->mp_page = mp; + CURSOR_PUSH(cursor, ppage); + return ppage; +} + +static MDB_page * +mdb_get_page(MDB_txn *txn, pgno_t pgno) +{ + MDB_page *p = NULL; + int found = 0; + + if (!F_ISSET(txn->mt_flags, MDB_TXN_RDONLY) && !STAILQ_EMPTY(txn->mt_u.dirty_queue)) { + MDB_dpage *dp; + STAILQ_FOREACH(dp, txn->mt_u.dirty_queue, h.md_next) { + if (dp->p.mp_pgno == pgno) { + p = &dp->p; + found = 1; + break; + } + } + } + if (!found) { + if (pgno > txn->mt_env->me_meta->mm_last_pg) + return NULL; + p = (MDB_page *)(txn->mt_env->me_map + txn->mt_env->me_psize * pgno); + } + return p; +} + +static int +mdb_search_page_root(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, + MDB_cursor *cursor, int modify, MDB_pageparent *mpp) +{ + MDB_page *mp = mpp->mp_page; + int rc; + + if (cursor && cursor_push_page(cursor, mp) == NULL) + return MDB_FAIL; + + while (IS_BRANCH(mp)) { + unsigned int i = 0; + MDB_node *node; + + DPRINTF("branch page %lu has %u keys", mp->mp_pgno, NUMKEYS(mp)); + assert(NUMKEYS(mp) > 1); + DPRINTF("found index 0 to page %lu", NODEPGNO(NODEPTR(mp, 0))); + + if (key == NULL) /* Initialize cursor to first page. */ + i = 0; + else if (key->mv_size > MAXKEYSIZE && key->mv_data == NULL) { + /* cursor to last page */ + i = NUMKEYS(mp)-1; + } else { + int exact; + node = mdb_search_node(txn, dbi, mp, key, &exact, &i); + if (node == NULL) + i = NUMKEYS(mp) - 1; + else if (!exact) { + assert(i > 0); + i--; + } + } + + if (key) + DPRINTF("following index %u for key %.*s", + i, (int)key->mv_size, (char *)key->mv_data); + assert(i < NUMKEYS(mp)); + node = NODEPTR(mp, i); + + if (cursor) + CURSOR_TOP(cursor)->mp_ki = i; + + mpp->mp_parent = mp; + if ((mp = mdb_get_page(txn, NODEPGNO(node))) == NULL) + return MDB_FAIL; + mpp->mp_pi = i; + mpp->mp_page = mp; + + if (cursor && cursor_push_page(cursor, mp) == NULL) + return MDB_FAIL; + + if (modify) { + MDB_dhead *dh = ((MDB_dhead *)mp)-1; + if ((rc = mdb_touch(txn, mpp)) != 0) + return rc; + dh = ((MDB_dhead *)mpp->mp_page)-1; + dh->md_parent = mpp->mp_parent; + dh->md_pi = mpp->mp_pi; + } + + mp = mpp->mp_page; + } + + if (!IS_LEAF(mp)) { + DPRINTF("internal error, index points to a %02X page!?", + mp->mp_flags); + return MDB_FAIL; + } + + DPRINTF("found leaf page %lu for key %.*s", mp->mp_pgno, + key ? (int)key->mv_size : 0, key ? (char *)key->mv_data : NULL); + + return MDB_SUCCESS; +} + +/* Search for the page a given key should be in. + * Stores a pointer to the found page in *mpp. + * If key is NULL, search for the lowest page (used by mdb_cursor_first). + * If cursor is non-null, pushes parent pages on the cursor stack. + * If modify is true, visited pages are updated with new page numbers. + */ +static int +mdb_search_page(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, + MDB_cursor *cursor, int modify, MDB_pageparent *mpp) +{ + int rc; + pgno_t root; + + /* Choose which root page to start with. If a transaction is given + * use the root page from the transaction, otherwise read the last + * committed root page. + */ + if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) { + DPRINTF("transaction has failed, must abort"); + return EINVAL; + } else + root = txn->mt_dbs[dbi].md_root; + + if (root == P_INVALID) { /* Tree is empty. */ + DPRINTF("tree is empty"); + return MDB_NOTFOUND; + } + + if ((mpp->mp_page = mdb_get_page(txn, root)) == NULL) + return MDB_FAIL; + + DPRINTF("root page has flags 0x%X", mpp->mp_page->mp_flags); + + if (modify) { + /* For sub-databases, update main root first */ + if (dbi > MAIN_DBI && !txn->mt_dbxs[dbi].md_dirty) { + MDB_pageparent mp2; + rc = mdb_search_page(txn, MAIN_DBI, &txn->mt_dbxs[dbi].md_name, + NULL, 1, &mp2); + if (rc) + return rc; + txn->mt_dbxs[dbi].md_dirty = 1; + } + if (!F_ISSET(mpp->mp_page->mp_flags, P_DIRTY)) { + mpp->mp_parent = NULL; + mpp->mp_pi = 0; + if ((rc = mdb_touch(txn, mpp))) + return rc; + txn->mt_dbs[dbi].md_root = mpp->mp_page->mp_pgno; + } + } + + return mdb_search_page_root(txn, dbi, key, cursor, modify, mpp); +} + +static int +mdb_read_data(MDB_txn *txn, MDB_node *leaf, MDB_val *data) +{ + MDB_page *omp; /* overflow mpage */ + pgno_t pgno; + + if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) { + data->mv_size = leaf->mn_dsize; + data->mv_data = NODEDATA(leaf); + return MDB_SUCCESS; + } + + /* Read overflow data. + */ + data->mv_size = leaf->mn_dsize; + memcpy(&pgno, NODEDATA(leaf), sizeof(pgno)); + if ((omp = mdb_get_page(txn, pgno)) == NULL) { + DPRINTF("read overflow page %lu failed", pgno); + return MDB_FAIL; + } + data->mv_data = omp; + + return MDB_SUCCESS; +} + +int +mdb_get(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data) +{ + int rc, exact; + MDB_node *leaf; + MDB_pageparent mpp; + + assert(key); + assert(data); + DPRINTF("===> get key [%.*s]", (int)key->mv_size, (char *)key->mv_data); + + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs) + return EINVAL; + + if (key->mv_size == 0 || key->mv_size > MAXKEYSIZE) { + return EINVAL; + } + + if ((rc = mdb_search_page(txn, dbi, key, NULL, 0, &mpp)) != MDB_SUCCESS) + return rc; + + leaf = mdb_search_node(txn, dbi, mpp.mp_page, key, &exact, NULL); + if (leaf && exact) { + /* Return first duplicate data item */ + if (F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) { + MDB_xcursor mx; + + mdb_xcursor_init0(txn, dbi, &mx); + mdb_xcursor_init1(txn, dbi, &mx, leaf); + rc = mdb_search_page(&mx.mx_txn, mx.mx_cursor.mc_dbi, NULL, NULL, 0, &mpp); + if (rc != MDB_SUCCESS) + return rc; + leaf = NODEPTR(mpp.mp_page, 0); + } + rc = mdb_read_data(txn, leaf, data); + } else { + rc = MDB_NOTFOUND; + } + + return rc; +} + +static int +mdb_sibling(MDB_cursor *cursor, int move_right) +{ + int rc; + MDB_node *indx; + MDB_ppage *parent, *top; + MDB_page *mp; + + top = CURSOR_TOP(cursor); + if ((parent = SLIST_NEXT(top, mp_entry)) == NULL) { + return MDB_NOTFOUND; /* root has no siblings */ + } + + DPRINTF("parent page is page %lu, index %u", + parent->mp_page->mp_pgno, parent->mp_ki); + + cursor_pop_page(cursor); + if (move_right ? (parent->mp_ki + 1 >= NUMKEYS(parent->mp_page)) + : (parent->mp_ki == 0)) { + DPRINTF("no more keys left, moving to %s sibling", + move_right ? "right" : "left"); + if ((rc = mdb_sibling(cursor, move_right)) != MDB_SUCCESS) + return rc; + parent = CURSOR_TOP(cursor); + } else { + if (move_right) + parent->mp_ki++; + else + parent->mp_ki--; + DPRINTF("just moving to %s index key %u", + move_right ? "right" : "left", parent->mp_ki); + } + assert(IS_BRANCH(parent->mp_page)); + + indx = NODEPTR(parent->mp_page, parent->mp_ki); + if ((mp = mdb_get_page(cursor->mc_txn, indx->mn_pgno)) == NULL) + return MDB_FAIL; +#if 0 + mp->parent = parent->mp_page; + mp->parent_index = parent->mp_ki; +#endif + + cursor_push_page(cursor, mp); + + return MDB_SUCCESS; +} + +static int +mdb_set_key(MDB_node *node, MDB_val *key) +{ + if (key == NULL) + return 0; + + key->mv_size = node->mn_ksize; + key->mv_data = NODEKEY(node); + + return 0; +} + +static int +mdb_cursor_next(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op op) +{ + MDB_ppage *top; + MDB_page *mp; + MDB_node *leaf; + int rc; + + if (cursor->mc_eof) { + return MDB_NOTFOUND; + } + + assert(cursor->mc_initialized); + + if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) { + if (op == MDB_NEXT || op == MDB_NEXT_DUP) { + rc = mdb_cursor_next(&cursor->mc_xcursor->mx_cursor, data, NULL, MDB_NEXT); + if (op != MDB_NEXT || rc == MDB_SUCCESS) + return rc; + } + } + + top = CURSOR_TOP(cursor); + mp = top->mp_page; + + DPRINTF("cursor_next: top page is %lu in cursor %p", mp->mp_pgno, (void *) cursor); + + if (top->mp_ki + 1 >= NUMKEYS(mp)) { + DPRINTF("=====> move to next sibling page"); + if (mdb_sibling(cursor, 1) != MDB_SUCCESS) { + cursor->mc_eof = 1; + return MDB_NOTFOUND; + } + top = CURSOR_TOP(cursor); + mp = top->mp_page; + DPRINTF("next page is %lu, key index %u", mp->mp_pgno, top->mp_ki); + } else + top->mp_ki++; + + DPRINTF("==> cursor points to page %lu with %u keys, key index %u", + mp->mp_pgno, NUMKEYS(mp), top->mp_ki); + + assert(IS_LEAF(mp)); + leaf = NODEPTR(mp, top->mp_ki); + + if (data) { + if ((rc = mdb_read_data(cursor->mc_txn, leaf, data) != MDB_SUCCESS)) + return rc; + + if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) { + mdb_xcursor_init1(cursor->mc_txn, cursor->mc_dbi, cursor->mc_xcursor, leaf); + rc = mdb_cursor_first(&cursor->mc_xcursor->mx_cursor, data, NULL); + if (rc != MDB_SUCCESS) + return rc; + } + } + + return mdb_set_key(leaf, key); +} + +static int +mdb_cursor_prev(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op op) +{ + MDB_ppage *top; + MDB_page *mp; + MDB_node *leaf; + int rc; + + assert(cursor->mc_initialized); + + if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) { + if (op == MDB_PREV || op == MDB_PREV_DUP) { + rc = mdb_cursor_prev(&cursor->mc_xcursor->mx_cursor, data, NULL, MDB_PREV); + if (op != MDB_PREV || rc == MDB_SUCCESS) + return rc; + } + } + + top = CURSOR_TOP(cursor); + mp = top->mp_page; + + DPRINTF("cursor_prev: top page is %lu in cursor %p", mp->mp_pgno, (void *) cursor); + + if (top->mp_ki == 0) { + DPRINTF("=====> move to prev sibling page"); + if (mdb_sibling(cursor, 0) != MDB_SUCCESS) { + return MDB_NOTFOUND; + } + top = CURSOR_TOP(cursor); + mp = top->mp_page; + top->mp_ki = NUMKEYS(mp) - 1; + DPRINTF("prev page is %lu, key index %u", mp->mp_pgno, top->mp_ki); + } else + top->mp_ki--; + + cursor->mc_eof = 0; + + DPRINTF("==> cursor points to page %lu with %u keys, key index %u", + mp->mp_pgno, NUMKEYS(mp), top->mp_ki); + + assert(IS_LEAF(mp)); + leaf = NODEPTR(mp, top->mp_ki); + + if (data) { + if ((rc = mdb_read_data(cursor->mc_txn, leaf, data) != MDB_SUCCESS)) + return rc; + + if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) { + mdb_xcursor_init1(cursor->mc_txn, cursor->mc_dbi, cursor->mc_xcursor, leaf); + rc = mdb_cursor_last(&cursor->mc_xcursor->mx_cursor, data, NULL); + if (rc != MDB_SUCCESS) + return rc; + } + } + + return mdb_set_key(leaf, key); +} + +static int +mdb_cursor_set(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + MDB_cursor_op op, int *exactp) +{ + int rc; + MDB_node *leaf; + MDB_ppage *top; + MDB_pageparent mpp; + + assert(cursor); + assert(key); + assert(key->mv_size > 0); + + while (CURSOR_TOP(cursor) != NULL) + cursor_pop_page(cursor); + + rc = mdb_search_page(cursor->mc_txn, cursor->mc_dbi, key, cursor, 0, &mpp); + if (rc != MDB_SUCCESS) + return rc; + assert(IS_LEAF(mpp.mp_page)); + + top = CURSOR_TOP(cursor); + leaf = mdb_search_node(cursor->mc_txn, cursor->mc_dbi, mpp.mp_page, key, exactp, &top->mp_ki); + if (exactp != NULL && !*exactp) { + /* MDB_SET specified and not an exact match. */ + return MDB_NOTFOUND; + } + + if (leaf == NULL) { + DPRINTF("===> inexact leaf not found, goto sibling"); + if ((rc = mdb_sibling(cursor, 1)) != MDB_SUCCESS) + return rc; /* no entries matched */ + top = CURSOR_TOP(cursor); + top->mp_ki = 0; + mpp.mp_page = top->mp_page; + assert(IS_LEAF(mpp.mp_page)); + leaf = NODEPTR(mpp.mp_page, 0); + } + + cursor->mc_initialized = 1; + cursor->mc_eof = 0; + + if (data) { + if ((rc = mdb_read_data(cursor->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + + if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) { + mdb_xcursor_init1(cursor->mc_txn, cursor->mc_dbi, cursor->mc_xcursor, leaf); + if (op == MDB_SET || op == MDB_SET_RANGE) { + rc = mdb_cursor_first(&cursor->mc_xcursor->mx_cursor, data, NULL); + } else { + int ex2, *ex2p; + MDB_cursor_op op2; + if (op == MDB_GET_BOTH) { + ex2p = &ex2; + op2 = MDB_SET; + } else { + ex2p = NULL; + op2 = MDB_SET_RANGE; + } + rc = mdb_cursor_set(&cursor->mc_xcursor->mx_cursor, data, NULL, op2, ex2p); + if (rc != MDB_SUCCESS) + return rc; + } + } + } + + rc = mdb_set_key(leaf, key); + if (rc == MDB_SUCCESS) { + DPRINTF("==> cursor placed on key %.*s", + (int)key->mv_size, (char *)key->mv_data); + ; + } + + return rc; +} + +static int +mdb_cursor_first(MDB_cursor *cursor, MDB_val *key, MDB_val *data) +{ + int rc; + MDB_pageparent mpp; + MDB_node *leaf; + + while (CURSOR_TOP(cursor) != NULL) + cursor_pop_page(cursor); + + rc = mdb_search_page(cursor->mc_txn, cursor->mc_dbi, NULL, cursor, 0, &mpp); + if (rc != MDB_SUCCESS) + return rc; + assert(IS_LEAF(mpp.mp_page)); + + leaf = NODEPTR(mpp.mp_page, 0); + cursor->mc_initialized = 1; + cursor->mc_eof = 0; + + if (data) { + if ((rc = mdb_read_data(cursor->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + + if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) { + mdb_xcursor_init1(cursor->mc_txn, cursor->mc_dbi, cursor->mc_xcursor, leaf); + rc = mdb_cursor_first(&cursor->mc_xcursor->mx_cursor, data, NULL); + if (rc) + return rc; + } + } + return mdb_set_key(leaf, key); +} + +static int +mdb_cursor_last(MDB_cursor *cursor, MDB_val *key, MDB_val *data) +{ + int rc; + MDB_ppage *top; + MDB_pageparent mpp; + MDB_node *leaf; + MDB_val lkey; + + while (CURSOR_TOP(cursor) != NULL) + cursor_pop_page(cursor); + + lkey.mv_size = MAXKEYSIZE+1; + lkey.mv_data = NULL; + + rc = mdb_search_page(cursor->mc_txn, cursor->mc_dbi, &lkey, cursor, 0, &mpp); + if (rc != MDB_SUCCESS) + return rc; + assert(IS_LEAF(mpp.mp_page)); + + leaf = NODEPTR(mpp.mp_page, NUMKEYS(mpp.mp_page)-1); + cursor->mc_initialized = 1; + cursor->mc_eof = 0; + + top = CURSOR_TOP(cursor); + top->mp_ki = NUMKEYS(top->mp_page) - 1; + + if (data) { + if ((rc = mdb_read_data(cursor->mc_txn, leaf, data)) != MDB_SUCCESS) + return rc; + + if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) { + mdb_xcursor_init1(cursor->mc_txn, cursor->mc_dbi, cursor->mc_xcursor, leaf); + rc = mdb_cursor_last(&cursor->mc_xcursor->mx_cursor, data, NULL); + if (rc) + return rc; + } + } + + return mdb_set_key(leaf, key); +} + +int +mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + MDB_cursor_op op) +{ + int rc; + int exact = 0; + + assert(cursor); + + switch (op) { + case MDB_GET_BOTH: + case MDB_GET_BOTH_RANGE: + if (data == NULL) { + rc = EINVAL; + break; + } + /* FALLTHRU */ + case MDB_SET: + case MDB_SET_RANGE: + if (key == NULL || key->mv_size == 0 || key->mv_size > MAXKEYSIZE) { + rc = EINVAL; + } else if (op != MDB_SET_RANGE) + rc = mdb_cursor_set(cursor, key, data, op, NULL); + else + rc = mdb_cursor_set(cursor, key, data, op, &exact); + break; + case MDB_NEXT: + case MDB_NEXT_DUP: + case MDB_NEXT_NODUP: + if (!cursor->mc_initialized) + rc = mdb_cursor_first(cursor, key, data); + else + rc = mdb_cursor_next(cursor, key, data, op); + break; + case MDB_PREV: + case MDB_PREV_DUP: + case MDB_PREV_NODUP: + if (!cursor->mc_initialized || cursor->mc_eof) + rc = mdb_cursor_last(cursor, key, data); + else + rc = mdb_cursor_prev(cursor, key, data, op); + break; + case MDB_FIRST: + rc = mdb_cursor_first(cursor, key, data); + break; + case MDB_LAST: + rc = mdb_cursor_last(cursor, key, data); + break; + default: + DPRINTF("unhandled/unimplemented cursor operation %u", op); + rc = EINVAL; + break; + } + + return rc; +} + +/* Allocate a page and initialize it + */ +static MDB_dpage * +mdb_new_page(MDB_txn *txn, MDB_dbi dbi, uint32_t flags, int num) +{ + MDB_dpage *dp; + + if ((dp = mdb_alloc_page(txn, NULL, 0, num)) == NULL) + return NULL; + DPRINTF("allocated new mpage %lu, page size %u", + dp->p.mp_pgno, txn->mt_env->me_psize); + dp->p.mp_flags = flags | P_DIRTY; + dp->p.mp_lower = PAGEHDRSZ; + dp->p.mp_upper = txn->mt_env->me_psize; + + if (IS_BRANCH(&dp->p)) + txn->mt_dbs[dbi].md_branch_pages++; + else if (IS_LEAF(&dp->p)) + txn->mt_dbs[dbi].md_leaf_pages++; + else if (IS_OVERFLOW(&dp->p)) { + txn->mt_dbs[dbi].md_overflow_pages += num; + dp->p.mp_pages = num; + } + + return dp; +} + +static size_t +mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data) +{ + size_t sz; + + sz = LEAFSIZE(key, data); + if (data->mv_size >= env->me_psize / MDB_MINKEYS) { + /* put on overflow page */ + sz -= data->mv_size - sizeof(pgno_t); + } + + return sz + sizeof(indx_t); +} + +static size_t +mdb_branch_size(MDB_env *env, MDB_val *key) +{ + size_t sz; + + sz = INDXSIZE(key); + if (sz >= env->me_psize / MDB_MINKEYS) { + /* put on overflow page */ + /* not implemented */ + /* sz -= key->size - sizeof(pgno_t); */ + } + + return sz + sizeof(indx_t); +} + +static int +mdb_add_node(MDB_txn *txn, MDB_dbi dbi, MDB_page *mp, indx_t indx, + MDB_val *key, MDB_val *data, pgno_t pgno, uint8_t flags) +{ + unsigned int i; + size_t node_size = NODESIZE; + indx_t ofs; + MDB_node *node; + MDB_dpage *ofp = NULL; /* overflow page */ + + assert(mp->mp_upper >= mp->mp_lower); + + DPRINTF("add node [%.*s] to %s page %lu at index %i, key size %zu", + key ? (int)key->mv_size : 0, key ? (char *)key->mv_data : NULL, + IS_LEAF(mp) ? "leaf" : "branch", + mp->mp_pgno, indx, key ? key->mv_size : 0); + + if (key != NULL) + node_size += key->mv_size; + + if (IS_LEAF(mp)) { + assert(data); + if (F_ISSET(flags, F_BIGDATA)) { + /* Data already on overflow page. */ + node_size += sizeof(pgno_t); + } else if (data->mv_size >= txn->mt_env->me_psize / MDB_MINKEYS) { + int ovpages = OVPAGES(data->mv_size, txn->mt_env->me_psize); + /* Put data on overflow page. */ + DPRINTF("data size is %zu, put on overflow page", + data->mv_size); + node_size += sizeof(pgno_t); + if ((ofp = mdb_new_page(txn, dbi, P_OVERFLOW, ovpages)) == NULL) + return MDB_FAIL; + DPRINTF("allocated overflow page %lu", ofp->p.mp_pgno); + flags |= F_BIGDATA; + } else { + node_size += data->mv_size; + } + } + + if (node_size + sizeof(indx_t) > SIZELEFT(mp)) { + DPRINTF("not enough room in page %lu, got %u ptrs", + mp->mp_pgno, NUMKEYS(mp)); + DPRINTF("upper - lower = %u - %u = %u", mp->mp_upper, mp->mp_lower, + mp->mp_upper - mp->mp_lower); + DPRINTF("node size = %zu", node_size); + return ENOSPC; + } + + /* Move higher pointers up one slot. */ + for (i = NUMKEYS(mp); i > indx; i--) + mp->mp_ptrs[i] = mp->mp_ptrs[i - 1]; + + /* Adjust free space offsets. */ + ofs = mp->mp_upper - node_size; + assert(ofs >= mp->mp_lower + sizeof(indx_t)); + mp->mp_ptrs[indx] = ofs; + mp->mp_upper = ofs; + mp->mp_lower += sizeof(indx_t); + + /* Write the node data. */ + node = NODEPTR(mp, indx); + node->mn_ksize = (key == NULL) ? 0 : key->mv_size; + node->mn_flags = flags; + if (IS_LEAF(mp)) + node->mn_dsize = data->mv_size; + else + node->mn_pgno = pgno; + + if (key) + memcpy(NODEKEY(node), key->mv_data, key->mv_size); + + if (IS_LEAF(mp)) { + assert(key); + if (ofp == NULL) { + if (F_ISSET(flags, F_BIGDATA)) + memcpy(node->mn_data + key->mv_size, data->mv_data, + sizeof(pgno_t)); + else + memcpy(node->mn_data + key->mv_size, data->mv_data, + data->mv_size); + } else { + memcpy(node->mn_data + key->mv_size, &ofp->p.mp_pgno, + sizeof(pgno_t)); + memcpy(METADATA(&ofp->p), data->mv_data, data->mv_size); + } + } + + return MDB_SUCCESS; +} + +static void +mdb_del_node(MDB_page *mp, indx_t indx) +{ + unsigned int sz; + indx_t i, j, numkeys, ptr; + MDB_node *node; + char *base; + + DPRINTF("delete node %u on %s page %lu", indx, + IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno); + assert(indx < NUMKEYS(mp)); + + node = NODEPTR(mp, indx); + sz = NODESIZE + node->mn_ksize; + if (IS_LEAF(mp)) { + if (F_ISSET(node->mn_flags, F_BIGDATA)) + sz += sizeof(pgno_t); + else + sz += NODEDSZ(node); + } + + ptr = mp->mp_ptrs[indx]; + numkeys = NUMKEYS(mp); + for (i = j = 0; i < numkeys; i++) { + if (i != indx) { + mp->mp_ptrs[j] = mp->mp_ptrs[i]; + if (mp->mp_ptrs[i] < ptr) + mp->mp_ptrs[j] += sz; + j++; + } + } + + base = (char *)mp + mp->mp_upper; + memmove(base + sz, base, ptr - mp->mp_upper); + + mp->mp_lower -= sizeof(indx_t); + mp->mp_upper += sz; +} + +static void +mdb_xcursor_init0(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) +{ + MDB_dbi dbn; + + mx->mx_txn = *txn; + mx->mx_txn.mt_dbxs = mx->mx_dbxs; + mx->mx_txn.mt_dbs = mx->mx_dbs; + mx->mx_dbxs[0] = txn->mt_dbxs[0]; + mx->mx_dbxs[1] = txn->mt_dbxs[1]; + if (dbi > 1) { + mx->mx_dbxs[2] = txn->mt_dbxs[dbi]; + dbn = 2; + } else { + dbn = 1; + } + mx->mx_dbxs[dbn+1].md_parent = dbn; + mx->mx_dbxs[dbn+1].md_cmp = mx->mx_dbxs[dbn].md_dcmp; + mx->mx_dbxs[dbn+1].md_rel = mx->mx_dbxs[dbn].md_rel; + mx->mx_dbxs[dbn+1].md_dirty = 0; + mx->mx_txn.mt_numdbs = dbn+2; + + SLIST_INIT(&mx->mx_cursor.mc_stack); + mx->mx_cursor.mc_txn = &mx->mx_txn; + mx->mx_cursor.mc_dbi = dbn+1; +} + +static void +mdb_xcursor_init1(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx, MDB_node *node) +{ + MDB_db *db = NODEDATA(node); + MDB_dbi dbn; + mx->mx_dbs[0] = txn->mt_dbs[0]; + mx->mx_dbs[1] = txn->mt_dbs[1]; + if (dbi > 1) { + mx->mx_dbs[2] = txn->mt_dbs[dbi]; + dbn = 3; + } else { + dbn = 2; + } + mx->mx_dbs[dbn] = *db; + mx->mx_dbxs[dbn].md_name.mv_data = NODEKEY(node); + mx->mx_dbxs[dbn].md_name.mv_size = node->mn_ksize; + mx->mx_txn.mt_next_pgno = txn->mt_next_pgno; + mx->mx_txn.mt_oldest = txn->mt_oldest; + mx->mx_txn.mt_u = txn->mt_u; +} + +static void +mdb_xcursor_fini(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx) +{ + txn->mt_next_pgno = mx->mx_txn.mt_next_pgno; + txn->mt_oldest = mx->mx_txn.mt_oldest; + txn->mt_u = mx->mx_txn.mt_u; + txn->mt_dbs[0] = mx->mx_dbs[0]; + txn->mt_dbs[1] = mx->mx_dbs[1]; + txn->mt_dbxs[0].md_dirty = mx->mx_dbxs[0].md_dirty; + txn->mt_dbxs[1].md_dirty = mx->mx_dbxs[1].md_dirty; + if (dbi > 1) { + txn->mt_dbs[dbi] = mx->mx_dbs[2]; + txn->mt_dbxs[dbi].md_dirty = mx->mx_dbxs[2].md_dirty; + } +} + +int +mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret) +{ + MDB_cursor *cursor; + size_t size = sizeof(MDB_cursor); + + if (txn == NULL || ret == NULL || !dbi || dbi >= txn->mt_numdbs) + return EINVAL; + + if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) + size += sizeof(MDB_xcursor); + + if ((cursor = calloc(1, size)) != NULL) { + SLIST_INIT(&cursor->mc_stack); + cursor->mc_dbi = dbi; + cursor->mc_txn = txn; + if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) { + MDB_xcursor *mx = (MDB_xcursor *)(cursor + 1); + cursor->mc_xcursor = mx; + mdb_xcursor_init0(txn, dbi, mx); + } + } else { + return ENOMEM; + } + + *ret = cursor; + + return MDB_SUCCESS; +} + +/* Return the count of duplicate data items for the current key */ +int +mdb_cursor_count(MDB_cursor *mc, unsigned long *countp) +{ + if (mc == NULL || countp == NULL) + return EINVAL; + + if (!(mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags & MDB_DUPSORT)) + return EINVAL; + + if (!mc->mc_xcursor->mx_cursor.mc_initialized) + return EINVAL; + + *countp = mc->mc_xcursor->mx_txn.mt_dbs[mc->mc_xcursor->mx_cursor.mc_dbi].md_entries; + return MDB_SUCCESS; +} + +void +mdb_cursor_close(MDB_cursor *cursor) +{ + if (cursor != NULL) { + while(!CURSOR_EMPTY(cursor)) + cursor_pop_page(cursor); + if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) { + mdb_xcursor_fini(cursor->mc_txn, cursor->mc_dbi, cursor->mc_xcursor); + while(!CURSOR_EMPTY(&cursor->mc_xcursor->mx_cursor)) + cursor_pop_page(&cursor->mc_xcursor->mx_cursor); + } + + free(cursor); + } +} + +static int +mdb_update_key(MDB_page *mp, indx_t indx, MDB_val *key) +{ + indx_t ptr, i, numkeys; + int delta; + size_t len; + MDB_node *node; + char *base; + + node = NODEPTR(mp, indx); + ptr = mp->mp_ptrs[indx]; + DPRINTF("update key %u (ofs %u) [%.*s] to [%.*s] on page %lu", + indx, ptr, + (int)node->mn_ksize, (char *)NODEKEY(node), + (int)key->mv_size, (char *)key->mv_data, + mp->mp_pgno); + + delta = key->mv_size - node->mn_ksize; + if (delta) { + if (delta > 0 && SIZELEFT(mp) < delta) { + DPRINTF("OUCH! Not enough room, delta = %d", delta); + return ENOSPC; + } + + numkeys = NUMKEYS(mp); + for (i = 0; i < numkeys; i++) { + if (mp->mp_ptrs[i] <= ptr) + mp->mp_ptrs[i] -= delta; + } + + base = (char *)mp + mp->mp_upper; + len = ptr - mp->mp_upper + NODESIZE; + memmove(base - delta, base, len); + mp->mp_upper -= delta; + + node = NODEPTR(mp, indx); + node->mn_ksize = key->mv_size; + } + + memcpy(NODEKEY(node), key->mv_data, key->mv_size); + + return MDB_SUCCESS; +} + +/* Move a node from src to dst. + */ +static int +mdb_move_node(MDB_txn *txn, MDB_dbi dbi, MDB_pageparent *src, indx_t srcindx, + MDB_pageparent *dst, indx_t dstindx) +{ + int rc; + MDB_node *srcnode; + MDB_val key, data; + + srcnode = NODEPTR(src->mp_page, srcindx); + DPRINTF("moving %s node %u [%.*s] on page %lu to node %u on page %lu", + IS_LEAF(src->mp_page) ? "leaf" : "branch", + srcindx, + (int)srcnode->mn_ksize, (char *)NODEKEY(srcnode), + src->mp_page->mp_pgno, + dstindx, dst->mp_page->mp_pgno); + + /* Mark src and dst as dirty. */ + if ((rc = mdb_touch(txn, src)) || + (rc = mdb_touch(txn, dst))) + return rc;; + + /* Add the node to the destination page. + */ + key.mv_size = srcnode->mn_ksize; + key.mv_data = NODEKEY(srcnode); + data.mv_size = NODEDSZ(srcnode); + data.mv_data = NODEDATA(srcnode); + rc = mdb_add_node(txn, dbi, dst->mp_page, dstindx, &key, &data, NODEPGNO(srcnode), + srcnode->mn_flags); + if (rc != MDB_SUCCESS) + return rc; + + /* Delete the node from the source page. + */ + mdb_del_node(src->mp_page, srcindx); + + /* Update the parent separators. + */ + if (srcindx == 0 && src->mp_pi != 0) { + DPRINTF("update separator for source page %lu to [%.*s]", + src->mp_page->mp_pgno, (int)key.mv_size, (char *)key.mv_data); + if ((rc = mdb_update_key(src->mp_parent, src->mp_pi, + &key)) != MDB_SUCCESS) + return rc; + } + + if (srcindx == 0 && IS_BRANCH(src->mp_page)) { + MDB_val nullkey; + nullkey.mv_size = 0; + assert(mdb_update_key(src->mp_page, 0, &nullkey) == MDB_SUCCESS); + } + + if (dstindx == 0 && dst->mp_pi != 0) { + DPRINTF("update separator for destination page %lu to [%.*s]", + dst->mp_page->mp_pgno, (int)key.mv_size, (char *)key.mv_data); + if ((rc = mdb_update_key(dst->mp_parent, dst->mp_pi, + &key)) != MDB_SUCCESS) + return rc; + } + + if (dstindx == 0 && IS_BRANCH(dst->mp_page)) { + MDB_val nullkey; + nullkey.mv_size = 0; + assert(mdb_update_key(dst->mp_page, 0, &nullkey) == MDB_SUCCESS); + } + + return MDB_SUCCESS; +} + +static int +mdb_merge(MDB_txn *txn, MDB_dbi dbi, MDB_pageparent *src, MDB_pageparent *dst) +{ + int rc; + indx_t i; + MDB_node *srcnode; + MDB_val key, data; + MDB_pageparent mpp; + MDB_dhead *dh; + + DPRINTF("merging page %lu and %lu", src->mp_page->mp_pgno, dst->mp_page->mp_pgno); + + assert(txn != NULL); + assert(src->mp_parent); /* can't merge root page */ + assert(dst->mp_parent); + + /* Mark src and dst as dirty. */ + if ((rc = mdb_touch(txn, src)) || + (rc = mdb_touch(txn, dst))) + return rc; + + /* Move all nodes from src to dst. + */ + for (i = 0; i < NUMKEYS(src->mp_page); i++) { + srcnode = NODEPTR(src->mp_page, i); + + key.mv_size = srcnode->mn_ksize; + key.mv_data = NODEKEY(srcnode); + data.mv_size = NODEDSZ(srcnode); + data.mv_data = NODEDATA(srcnode); + rc = mdb_add_node(txn, dbi, dst->mp_page, NUMKEYS(dst->mp_page), &key, + &data, NODEPGNO(srcnode), srcnode->mn_flags); + if (rc != MDB_SUCCESS) + return rc; + } + + DPRINTF("dst page %lu now has %u keys (%.1f%% filled)", + dst->mp_page->mp_pgno, NUMKEYS(dst->mp_page), (float)PAGEFILL(txn->mt_env, dst->mp_page) / 10); + + /* Unlink the src page from parent. + */ + mdb_del_node(src->mp_parent, src->mp_pi); + if (src->mp_pi == 0) { + key.mv_size = 0; + if ((rc = mdb_update_key(src->mp_parent, 0, &key)) != MDB_SUCCESS) + return rc; + } + + if (IS_LEAF(src->mp_page)) + txn->mt_dbs[dbi].md_leaf_pages--; + else + txn->mt_dbs[dbi].md_branch_pages--; + + mpp.mp_page = src->mp_parent; + dh = (MDB_dhead *)src->mp_parent; + dh--; + mpp.mp_parent = dh->md_parent; + mpp.mp_pi = dh->md_pi; + + return mdb_rebalance(txn, dbi, &mpp); +} + +#define FILL_THRESHOLD 250 + +static int +mdb_rebalance(MDB_txn *txn, MDB_dbi dbi, MDB_pageparent *mpp) +{ + MDB_node *node; + MDB_page *root; + MDB_pageparent npp; + indx_t si = 0, di = 0; + + assert(txn != NULL); + assert(mpp != NULL); + + DPRINTF("rebalancing %s page %lu (has %u keys, %.1f%% full)", + IS_LEAF(mpp->mp_page) ? "leaf" : "branch", + mpp->mp_page->mp_pgno, NUMKEYS(mpp->mp_page), (float)PAGEFILL(txn->mt_env, mpp->mp_page) / 10); + + if (PAGEFILL(txn->mt_env, mpp->mp_page) >= FILL_THRESHOLD) { + DPRINTF("no need to rebalance page %lu, above fill threshold", + mpp->mp_page->mp_pgno); + return MDB_SUCCESS; + } + + if (mpp->mp_parent == NULL) { + if (NUMKEYS(mpp->mp_page) == 0) { + DPRINTF("tree is completely empty"); + txn->mt_dbs[dbi].md_root = P_INVALID; + txn->mt_dbs[dbi].md_depth--; + txn->mt_dbs[dbi].md_leaf_pages--; + } else if (IS_BRANCH(mpp->mp_page) && NUMKEYS(mpp->mp_page) == 1) { + DPRINTF("collapsing root page!"); + txn->mt_dbs[dbi].md_root = NODEPGNO(NODEPTR(mpp->mp_page, 0)); + if ((root = mdb_get_page(txn, txn->mt_dbs[dbi].md_root)) == NULL) + return MDB_FAIL; + txn->mt_dbs[dbi].md_depth--; + txn->mt_dbs[dbi].md_branch_pages--; + } else + DPRINTF("root page doesn't need rebalancing"); + return MDB_SUCCESS; + } + + /* The parent (branch page) must have at least 2 pointers, + * otherwise the tree is invalid. + */ + assert(NUMKEYS(mpp->mp_parent) > 1); + + /* Leaf page fill factor is below the threshold. + * Try to move keys from left or right neighbor, or + * merge with a neighbor page. + */ + + /* Find neighbors. + */ + if (mpp->mp_pi == 0) { + /* We're the leftmost leaf in our parent. + */ + DPRINTF("reading right neighbor"); + node = NODEPTR(mpp->mp_parent, mpp->mp_pi + 1); + if ((npp.mp_page = mdb_get_page(txn, NODEPGNO(node))) == NULL) + return MDB_FAIL; + npp.mp_pi = mpp->mp_pi + 1; + si = 0; + di = NUMKEYS(mpp->mp_page); + } else { + /* There is at least one neighbor to the left. + */ + DPRINTF("reading left neighbor"); + node = NODEPTR(mpp->mp_parent, mpp->mp_pi - 1); + if ((npp.mp_page = mdb_get_page(txn, NODEPGNO(node))) == NULL) + return MDB_FAIL; + npp.mp_pi = mpp->mp_pi - 1; + si = NUMKEYS(npp.mp_page) - 1; + di = 0; + } + npp.mp_parent = mpp->mp_parent; + + DPRINTF("found neighbor page %lu (%u keys, %.1f%% full)", + npp.mp_page->mp_pgno, NUMKEYS(npp.mp_page), (float)PAGEFILL(txn->mt_env, npp.mp_page) / 10); + + /* If the neighbor page is above threshold and has at least two + * keys, move one key from it. + * + * Otherwise we should try to merge them. + */ + if (PAGEFILL(txn->mt_env, npp.mp_page) >= FILL_THRESHOLD && NUMKEYS(npp.mp_page) >= 2) + return mdb_move_node(txn, dbi, &npp, si, mpp, di); + else { /* FIXME: if (has_enough_room()) */ + if (mpp->mp_pi == 0) + return mdb_merge(txn, dbi, &npp, mpp); + else + return mdb_merge(txn, dbi, mpp, &npp); + } +} + +static int +mdb_del0(MDB_txn *txn, MDB_dbi dbi, unsigned int ki, MDB_pageparent *mpp, MDB_node *leaf) +{ + int rc; + + /* add overflow pages to free list */ + if (F_ISSET(leaf->mn_flags, F_BIGDATA)) { + int i, ovpages; + pgno_t pg; + + memcpy(&pg, NODEDATA(leaf), sizeof(pg)); + ovpages = OVPAGES(NODEDSZ(leaf), txn->mt_env->me_psize); + for (i=0; imt_free_pgs, pg); + pg++; + } + } + mdb_del_node(mpp->mp_page, ki); + txn->mt_dbs[dbi].md_entries--; + rc = mdb_rebalance(txn, dbi, mpp); + if (rc != MDB_SUCCESS) + txn->mt_flags |= MDB_TXN_ERROR; + + return rc; +} + +int +mdb_del(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, + unsigned int flags) +{ + int rc, exact; + unsigned int ki; + MDB_node *leaf; + MDB_pageparent mpp; + + DPRINTF("========> delete key %.*s", (int)key->mv_size, (char *)key->mv_data); + + assert(key != NULL); + + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs) + return EINVAL; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + return EINVAL; + } + + if (key->mv_size == 0 || key->mv_size > MAXKEYSIZE) { + return EINVAL; + } + + mpp.mp_parent = NULL; + mpp.mp_pi = 0; + if ((rc = mdb_search_page(txn, dbi, key, NULL, 1, &mpp)) != MDB_SUCCESS) + return rc; + + leaf = mdb_search_node(txn, dbi, mpp.mp_page, key, &exact, &ki); + if (leaf == NULL || !exact) { + return MDB_NOTFOUND; + } + + if (F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) { + MDB_xcursor mx; + MDB_pageparent mp2; + + mdb_xcursor_init0(txn, dbi, &mx); + mdb_xcursor_init1(txn, dbi, &mx, leaf); + if (flags == MDB_DEL_DUP) { + rc = mdb_del(&mx.mx_txn, mx.mx_cursor.mc_dbi, data, NULL, 0); + mdb_xcursor_fini(txn, dbi, &mx); + if (rc != MDB_SUCCESS) + return rc; + /* If sub-DB still has entries, we're done */ + if (mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi].md_root != P_INVALID) { + memcpy(NODEDATA(leaf), &mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi], + sizeof(MDB_db)); + return rc; + } + /* otherwise fall thru and delete the sub-DB */ + } else { + /* add all the child DB's pages to the free list */ + rc = mdb_search_page(&mx.mx_txn, mx.mx_cursor.mc_dbi, + NULL, &mx.mx_cursor, 0, &mp2); + if (rc == MDB_SUCCESS) { + MDB_ppage *top, *parent; + MDB_node *ni; + unsigned int i; + + cursor_pop_page(&mx.mx_cursor); + top = CURSOR_TOP(&mx.mx_cursor); + if (top != NULL) { + parent = SLIST_NEXT(top, mp_entry); + while (parent != NULL) { + for (i=0; imp_page); i++) { + ni = NODEPTR(top->mp_page, i); + mdb_midl_insert(txn->mt_free_pgs, ni->mn_pgno); + } + if (parent) { + parent->mp_ki++; + if (parent->mp_ki >= NUMKEYS(parent->mp_page)) { + cursor_pop_page(&mx.mx_cursor); + top = CURSOR_TOP(&mx.mx_cursor); + parent = SLIST_NEXT(top, mp_entry); + } else { + ni = NODEPTR(parent->mp_page, parent->mp_ki); + top->mp_page = mdb_get_page(&mx.mx_txn, ni->mn_pgno); + } + } + } + } + mdb_midl_insert(txn->mt_free_pgs, mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi].md_root); + } + } + } + + if (data && (rc = mdb_read_data(txn, leaf, data)) != MDB_SUCCESS) + return rc; + + return mdb_del0(txn, dbi, ki, &mpp, leaf); +} + +/* Split page <*mpp>, and insert in either left or + * right sibling, at index <*newindxp> (as if unsplit). Updates *mpp and + * *newindxp with the actual values after split, ie if *mpp and *newindxp + * refer to a node in the new right sibling page. + */ +static int +mdb_split(MDB_txn *txn, MDB_dbi dbi, MDB_page **mpp, unsigned int *newindxp, + MDB_val *newkey, MDB_val *newdata, pgno_t newpgno) +{ + uint8_t flags; + int rc = MDB_SUCCESS, ins_new = 0; + indx_t newindx; + pgno_t pgno = 0; + unsigned int i, j, split_indx; + MDB_node *node; + MDB_val sepkey, rkey, rdata; + MDB_page *copy; + MDB_dpage *mdp, *rdp, *pdp; + MDB_dhead *dh; + + assert(txn != NULL); + + dh = ((MDB_dhead *)*mpp) - 1; + mdp = (MDB_dpage *)dh; + newindx = *newindxp; + + DPRINTF("-----> splitting %s page %lu and adding [%.*s] at index %i", + IS_LEAF(&mdp->p) ? "leaf" : "branch", mdp->p.mp_pgno, + (int)newkey->mv_size, (char *)newkey->mv_data, *newindxp); + + if (mdp->h.md_parent == NULL) { + if ((pdp = mdb_new_page(txn, dbi, P_BRANCH, 1)) == NULL) + return MDB_FAIL; + mdp->h.md_pi = 0; + mdp->h.md_parent = &pdp->p; + txn->mt_dbs[dbi].md_root = pdp->p.mp_pgno; + DPRINTF("root split! new root = %lu", pdp->p.mp_pgno); + txn->mt_dbs[dbi].md_depth++; + + /* Add left (implicit) pointer. */ + if (mdb_add_node(txn, dbi, &pdp->p, 0, NULL, NULL, + mdp->p.mp_pgno, 0) != MDB_SUCCESS) + return MDB_FAIL; + } else { + DPRINTF("parent branch page is %lu", mdp->h.md_parent->mp_pgno); + } + + /* Create a right sibling. */ + if ((rdp = mdb_new_page(txn, dbi, mdp->p.mp_flags, 1)) == NULL) + return MDB_FAIL; + rdp->h.md_parent = mdp->h.md_parent; + rdp->h.md_pi = mdp->h.md_pi + 1; + DPRINTF("new right sibling: page %lu", rdp->p.mp_pgno); + + /* Move half of the keys to the right sibling. */ + if ((copy = malloc(txn->mt_env->me_psize)) == NULL) + return MDB_FAIL; + memcpy(copy, &mdp->p, txn->mt_env->me_psize); + memset(&mdp->p.mp_ptrs, 0, txn->mt_env->me_psize - PAGEHDRSZ); + mdp->p.mp_lower = PAGEHDRSZ; + mdp->p.mp_upper = txn->mt_env->me_psize; + + split_indx = NUMKEYS(copy) / 2 + 1; + + /* First find the separating key between the split pages. + */ + memset(&sepkey, 0, sizeof(sepkey)); + if (newindx == split_indx) { + sepkey.mv_size = newkey->mv_size; + sepkey.mv_data = newkey->mv_data; + } else { + node = NODEPTR(copy, split_indx); + sepkey.mv_size = node->mn_ksize; + sepkey.mv_data = NODEKEY(node); + } + + DPRINTF("separator is [%.*s]", (int)sepkey.mv_size, (char *)sepkey.mv_data); + + /* Copy separator key to the parent. + */ + if (SIZELEFT(rdp->h.md_parent) < mdb_branch_size(txn->mt_env, &sepkey)) { + rc = mdb_split(txn, dbi, &rdp->h.md_parent, &rdp->h.md_pi, + &sepkey, NULL, rdp->p.mp_pgno); + + /* Right page might now have changed parent. + * Check if left page also changed parent. + */ + if (rdp->h.md_parent != mdp->h.md_parent && + mdp->h.md_pi >= NUMKEYS(mdp->h.md_parent)) { + mdp->h.md_parent = rdp->h.md_parent; + mdp->h.md_pi = rdp->h.md_pi - 1; + } + } else { + rc = mdb_add_node(txn, dbi, rdp->h.md_parent, rdp->h.md_pi, + &sepkey, NULL, rdp->p.mp_pgno, 0); + } + if (rc != MDB_SUCCESS) { + free(copy); + return MDB_FAIL; + } + + for (i = j = 0; i <= NUMKEYS(copy); j++) { + if (i < split_indx) { + /* Re-insert in left sibling. */ + pdp = mdp; + } else { + /* Insert in right sibling. */ + if (i == split_indx) + /* Reset insert index for right sibling. */ + j = (i == newindx && ins_new); + pdp = rdp; + } + + if (i == newindx && !ins_new) { + /* Insert the original entry that caused the split. */ + rkey.mv_data = newkey->mv_data; + rkey.mv_size = newkey->mv_size; + if (IS_LEAF(&mdp->p)) { + rdata.mv_data = newdata->mv_data; + rdata.mv_size = newdata->mv_size; + } else + pgno = newpgno; + flags = 0; + + ins_new = 1; + + /* Update page and index for the new key. */ + *newindxp = j; + *mpp = &pdp->p; + } else if (i == NUMKEYS(copy)) { + break; + } else { + node = NODEPTR(copy, i); + rkey.mv_data = NODEKEY(node); + rkey.mv_size = node->mn_ksize; + if (IS_LEAF(&mdp->p)) { + rdata.mv_data = NODEDATA(node); + rdata.mv_size = node->mn_dsize; + } else + pgno = node->mn_pgno; + flags = node->mn_flags; + + i++; + } + + if (!IS_LEAF(&mdp->p) && j == 0) { + /* First branch index doesn't need key data. */ + rkey.mv_size = 0; + } + + rc = mdb_add_node(txn, dbi, &pdp->p, j, &rkey, &rdata, pgno,flags); + } + + free(copy); + return rc; +} + +static int +mdb_put0(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, unsigned int flags) +{ + int rc = MDB_SUCCESS, exact; + unsigned int ki; + MDB_node *leaf; + MDB_pageparent mpp; + MDB_val xdata, *rdata; + MDB_db dummy; + + DPRINTF("==> put key %.*s, size %zu, data size %zu", + (int)key->mv_size, (char *)key->mv_data, key->mv_size, data->mv_size); + + mpp.mp_parent = NULL; + mpp.mp_pi = 0; + rc = mdb_search_page(txn, dbi, key, NULL, 1, &mpp); + if (rc == MDB_SUCCESS) { + leaf = mdb_search_node(txn, dbi, mpp.mp_page, key, &exact, &ki); + if (leaf && exact) { + if (F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) { + goto put_sub; + } + if (flags == MDB_NOOVERWRITE) { + DPRINTF("duplicate key %.*s", + (int)key->mv_size, (char *)key->mv_data); + return MDB_KEYEXIST; + } + /* same size, just replace it */ + if (NODEDSZ(leaf) == data->mv_size) { + memcpy(NODEDATA(leaf), data->mv_data, data->mv_size); + goto done; + } + mdb_del_node(mpp.mp_page, ki); + } + if (leaf == NULL) { /* append if not found */ + ki = NUMKEYS(mpp.mp_page); + DPRINTF("appending key at index %i", ki); + } + } else if (rc == MDB_NOTFOUND) { + MDB_dpage *dp; + /* new file, just write a root leaf page */ + DPRINTF("allocating new root leaf page"); + if ((dp = mdb_new_page(txn, dbi, P_LEAF, 1)) == NULL) { + return ENOMEM; + } + mpp.mp_page = &dp->p; + txn->mt_dbs[dbi].md_root = mpp.mp_page->mp_pgno; + txn->mt_dbs[dbi].md_depth++; + ki = 0; + } + else + goto done; + + assert(IS_LEAF(mpp.mp_page)); + DPRINTF("there are %u keys, should insert new key at index %i", + NUMKEYS(mpp.mp_page), ki); + + /* For sorted dups, the data item at this level is a DB record + * for a child DB; the actual data elements are stored as keys + * in the child DB. + */ + if (F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) { + rdata = &xdata; + xdata.mv_size = sizeof(MDB_db); + xdata.mv_data = &dummy; + memset(&dummy, 0, sizeof(dummy)); + dummy.md_root = P_INVALID; + } else { + rdata = data; + } + + if (SIZELEFT(mpp.mp_page) < mdb_leaf_size(txn->mt_env, key, rdata)) { + rc = mdb_split(txn, dbi, &mpp.mp_page, &ki, key, rdata, P_INVALID); + } else { + /* There is room already in this leaf page. */ + rc = mdb_add_node(txn, dbi, mpp.mp_page, ki, key, rdata, 0, 0); + } + + if (rc != MDB_SUCCESS) + txn->mt_flags |= MDB_TXN_ERROR; + else { + txn->mt_dbs[dbi].md_entries++; + + /* Remember if we just added a subdatabase */ + if (flags & F_SUBDATA) { + leaf = NODEPTR(mpp.mp_page, ki); + leaf->mn_flags |= F_SUBDATA; + } + + /* Now store the actual data in the child DB. Note that we're + * storing the user data in the keys field, so there are strict + * size limits on dupdata. The actual data fields of the child + * DB are all zero size. + */ + if (F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) { + MDB_xcursor mx; + + leaf = NODEPTR(mpp.mp_page, ki); +put_sub: + mdb_xcursor_init0(txn, dbi, &mx); + mdb_xcursor_init1(txn, dbi, &mx, leaf); + xdata.mv_size = 0; + xdata.mv_data = ""; + if (flags == MDB_NODUPDATA) + flags = MDB_NOOVERWRITE; + rc = mdb_put0(&mx.mx_txn, mx.mx_cursor.mc_dbi, data, &xdata, flags); + mdb_xcursor_fini(txn, dbi, &mx); + memcpy(NODEDATA(leaf), &mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi], + sizeof(MDB_db)); + } + } + +done: + return rc; +} + +int +mdb_put(MDB_txn *txn, MDB_dbi dbi, + MDB_val *key, MDB_val *data, unsigned int flags) +{ + assert(key != NULL); + assert(data != NULL); + + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs) + return EINVAL; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + return EINVAL; + } + + if (key->mv_size == 0 || key->mv_size > MAXKEYSIZE) { + return EINVAL; + } + + if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA)) != flags) + return EINVAL; + + return mdb_put0(txn, dbi, key, data, flags); +} + +int +mdb_env_get_flags(MDB_env *env, unsigned int *arg) +{ + if (!env || !arg) + return EINVAL; + + *arg = env->me_flags; + return MDB_SUCCESS; +} + +int +mdb_env_get_path(MDB_env *env, const char **arg) +{ + if (!env || !arg) + return EINVAL; + + *arg = env->me_path; + return MDB_SUCCESS; +} + +static int +mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg) +{ + arg->ms_psize = env->me_psize; + arg->ms_depth = db->md_depth; + arg->ms_branch_pages = db->md_branch_pages; + arg->ms_leaf_pages = db->md_leaf_pages; + arg->ms_overflow_pages = db->md_overflow_pages; + arg->ms_entries = db->md_entries; + + return MDB_SUCCESS; +} +int +mdb_env_stat(MDB_env *env, MDB_stat *arg) +{ + if (env == NULL || arg == NULL) + return EINVAL; + + return mdb_stat0(env, &env->me_meta->mm_dbs[MAIN_DBI], arg); +} + +int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) +{ + MDB_val key, data; + MDB_dbi i; + int rc, dirty = 0; + size_t len; + + /* main DB? */ + if (!name) { + *dbi = MAIN_DBI; + if (flags & (MDB_DUPSORT|MDB_REVERSEKEY|MDB_INTEGERKEY)) + txn->mt_dbs[MAIN_DBI].md_flags |= (flags & (MDB_DUPSORT|MDB_REVERSEKEY|MDB_INTEGERKEY)); + return MDB_SUCCESS; + } + + /* Is the DB already open? */ + len = strlen(name); + for (i=2; imt_numdbs; i++) { + if (len == txn->mt_dbxs[i].md_name.mv_size && + !strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) { + *dbi = i; + return MDB_SUCCESS; + } + } + + if (txn->mt_numdbs >= txn->mt_env->me_maxdbs - 1) + return ENFILE; + + /* Find the DB info */ + key.mv_size = len; + key.mv_data = (void *)name; + rc = mdb_get(txn, MAIN_DBI, &key, &data); + + /* Create if requested */ + if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) { + MDB_db dummy; + data.mv_size = sizeof(MDB_db); + data.mv_data = &dummy; + memset(&dummy, 0, sizeof(dummy)); + dummy.md_root = P_INVALID; + dummy.md_flags = flags & 0xffff; + rc = mdb_put0(txn, MAIN_DBI, &key, &data, F_SUBDATA); + dirty = 1; + } + + /* OK, got info, add to table */ + if (rc == MDB_SUCCESS) { + txn->mt_dbxs[txn->mt_numdbs].md_name.mv_data = strdup(name); + txn->mt_dbxs[txn->mt_numdbs].md_name.mv_size = len; + txn->mt_dbxs[txn->mt_numdbs].md_cmp = NULL; + txn->mt_dbxs[txn->mt_numdbs].md_dcmp = NULL; + txn->mt_dbxs[txn->mt_numdbs].md_rel = NULL; + txn->mt_dbxs[txn->mt_numdbs].md_parent = MAIN_DBI; + txn->mt_dbxs[txn->mt_numdbs].md_dirty = dirty; + memcpy(&txn->mt_dbs[txn->mt_numdbs], data.mv_data, sizeof(MDB_db)); + *dbi = txn->mt_numdbs; + txn->mt_numdbs++; + } + + return rc; +} + +int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg) +{ + if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs) + return EINVAL; + + return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg); +} + +void mdb_close(MDB_txn *txn, MDB_dbi dbi) +{ + char *ptr; + if (dbi <= MAIN_DBI || dbi >= txn->mt_numdbs) + return; + ptr = txn->mt_dbxs[dbi].md_name.mv_data; + txn->mt_dbxs[dbi].md_name.mv_data = NULL; + txn->mt_dbxs[dbi].md_name.mv_size = 0; + free(ptr); +} + +int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) +{ + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs) + return EINVAL; + + txn->mt_dbxs[dbi].md_cmp = cmp; + return MDB_SUCCESS; +} + +int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp) +{ + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs) + return EINVAL; + + txn->mt_dbxs[dbi].md_dcmp = cmp; + return MDB_SUCCESS; +} + +int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel) +{ + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs) + return EINVAL; + + txn->mt_dbxs[dbi].md_rel = rel; + return MDB_SUCCESS; +} diff --git a/servers/slapd/back-mdb/mdb.h b/servers/slapd/back-mdb/mdb.h new file mode 100644 index 0000000000..f7e98e58b6 --- /dev/null +++ b/servers/slapd/back-mdb/mdb.h @@ -0,0 +1,153 @@ +/* mdb.h - memory-mapped database library header file */ +/* + * Copyright 2011 Howard Chu, Symas Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + * + * This code is derived from btree.c written by Martin Hedenfalk. + * + * Copyright (c) 2009, 2010 Martin Hedenfalk + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef _MDB_H_ +#define _MDB_H_ + +#include + +#define MDB_VERSION_MAJOR 0 +#define MDB_VERSION_MINOR 8 +#define MDB_VERSION_PATCH 0 +#define MDB_VERINT(a,b,c) ((a << 24) | (b << 16) | c) +#define MDB_VERSION_FULL \ + MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) +#define MDB_VERSION_DATE "August 11, 2011" +#define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c "(" #d ")" +#define MDB_VERSION_STRING \ + MDB_VERSTR(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH,MDB_VERSION_DATE) + +struct MDB_cursor; +struct MDB_txn; +struct MDB_env; + +typedef struct MDB_cursor MDB_cursor; +typedef struct MDB_txn MDB_txn; +typedef struct MDB_env MDB_env; + +typedef unsigned int MDB_dbi; + +typedef struct MDB_val { + size_t mv_size; + void *mv_data; +} MDB_val; + +typedef int (MDB_cmp_func)(const MDB_val *a, const MDB_val *b); +typedef void (MDB_rel_func)(void *ptr, void *oldptr); + +#define MDB_NOOVERWRITE 0x10 +#define MDB_NODUPDATA 0x20 +#define MDB_DEL_DUP 0x40 + +typedef enum MDB_cursor_op { /* cursor operations */ + MDB_FIRST, + MDB_GET_BOTH, /* position at key/data */ + MDB_GET_BOTH_RANGE, /* position at key, nearest data */ + MDB_LAST, + MDB_NEXT, + MDB_NEXT_DUP, + MDB_NEXT_NODUP, + MDB_PREV, + MDB_PREV_DUP, + MDB_PREV_NODUP, + MDB_SET, /* position at key, or fail */ + MDB_SET_RANGE /* position at given key */ +} MDB_cursor_op; + +/* return codes */ +#define MDB_SUCCESS 0 +#define MDB_FAIL -1 +#define MDB_KEYEXIST -2 +#define MDB_NOTFOUND -3 +#define MDB_VERSION_MISMATCH -4 + +/* DB flags */ +#define MDB_REVERSEKEY 0x02 /* use reverse string keys */ +#define MDB_DUPSORT 0x04 /* use sorted duplicates */ +#define MDB_INTEGERKEY 0x08 /* numeric keys in native byte order */ + +/* environment flags */ +#define MDB_FIXEDMAP 0x01 /* mmap at a fixed address */ +#define MDB_NOSYNC 0x10000 /* don't fsync after commit */ +#define MDB_RDONLY 0x20000 /* read only */ + +/* DB or env flags */ +#define MDB_CREATE 0x40000 /* create if not present */ + +typedef struct MDB_stat { + unsigned int ms_psize; + unsigned int ms_depth; + unsigned long ms_branch_pages; + unsigned long ms_leaf_pages; + unsigned long ms_overflow_pages; + unsigned long ms_entries; +} MDB_stat; + +char *mdb_version(int *major, int *minor, int *patch); +char *mdb_strerror(int rc); + +int mdb_env_create(MDB_env **env); +int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode); +int mdb_env_stat(MDB_env *env, MDB_stat *stat); +int mdb_env_sync(MDB_env *env, int force); +void mdb_env_close(MDB_env *env); +int mdb_env_get_flags(MDB_env *env, unsigned int *flags); +int mdb_env_get_path(MDB_env *env, const char **path); +int mdb_env_set_mapsize(MDB_env *env, size_t size); +int mdb_env_set_maxreaders(MDB_env *env, int readers); +int mdb_env_get_maxreaders(MDB_env *env, int *readers); +int mdb_env_set_maxdbs(MDB_env *env, int dbs); + +int mdb_txn_begin(MDB_env *env, int rdonly, MDB_txn **txn); +int mdb_txn_commit(MDB_txn *txn); +void mdb_txn_abort(MDB_txn *txn); + +int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi); +int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat); +void mdb_close(MDB_txn *txn, MDB_dbi dbi); + +int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp); +int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp); +int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel); + +int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data); +int mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + unsigned int flags); +int mdb_del(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, + unsigned int flags); + +int mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **cursor); +void mdb_cursor_close(MDB_cursor *cursor); +int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data, + MDB_cursor_op op); +int mdb_cursor_count(MDB_cursor *cursor, unsigned long *countp); + +int mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b); + +#endif /* _MDB_H_ */ diff --git a/servers/slapd/back-mdb/midl.c b/servers/slapd/back-mdb/midl.c new file mode 100644 index 0000000000..8b39acad65 --- /dev/null +++ b/servers/slapd/back-mdb/midl.c @@ -0,0 +1,109 @@ +/* idl.c - ldap bdb back-end ID list functions */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include +#include +#include +#include "midl.h" + +typedef unsigned long pgno_t; + +/* Sort the IDLs from highest to lowest */ +#define IDL_CMP(x,y) ( x > y ? -1 : ( x < y ? 1 : 0 ) ) + +unsigned mdb_midl_search( ID *ids, ID id ) +{ + /* + * binary search of id in ids + * if found, returns position of id + * if not found, returns first position greater than id + */ + unsigned base = 0; + unsigned cursor = 0; + int val = 0; + unsigned n = ids[0]; + + while( 0 < n ) { + int pivot = n >> 1; + cursor = base + pivot; + val = IDL_CMP( id, ids[cursor + 1] ); + + if( val < 0 ) { + n = pivot; + + } else if ( val > 0 ) { + base = cursor + 1; + n -= pivot + 1; + + } else { + return cursor + 1; + } + } + + if( val > 0 ) { + return cursor + 2; + } else { + return cursor + 1; + } +} + +int mdb_midl_insert( ID *ids, ID id ) +{ + unsigned x; + + if (MDB_IDL_IS_RANGE( ids )) { + /* if already in range, treat as a dup */ + if (id >= MDB_IDL_FIRST(ids) && id <= MDB_IDL_LAST(ids)) + return -1; + if (id < MDB_IDL_FIRST(ids)) + ids[1] = id; + else if (id > MDB_IDL_LAST(ids)) + ids[2] = id; + return 0; + } + + x = mdb_midl_search( ids, id ); + assert( x > 0 ); + + if( x < 1 ) { + /* internal error */ + return -2; + } + + if ( x <= ids[0] && ids[x] == id ) { + /* duplicate */ + return -1; + } + + if ( ++ids[0] >= MDB_IDL_DB_MAX ) { + if( id < ids[1] ) { + ids[1] = id; + ids[2] = ids[ids[0]-1]; + } else if ( ids[ids[0]-1] < id ) { + ids[2] = id; + } else { + ids[2] = ids[ids[0]-1]; + } + ids[0] = NOID; + + } else { + /* insert id */ + AC_MEMCPY( &ids[x+1], &ids[x], (ids[0]-x) * sizeof(ID) ); + ids[x] = id; + } + + return 0; +} diff --git a/servers/slapd/back-mdb/midl.h b/servers/slapd/back-mdb/midl.h new file mode 100644 index 0000000000..aeb0af6203 --- /dev/null +++ b/servers/slapd/back-mdb/midl.h @@ -0,0 +1,78 @@ +/* idl.h - ldap bdb back-end ID list header file */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#ifndef _MDB_MIDL_H_ +#define _MDB_MIDL_H_ + +#define AC_MEMCPY(dst,src,size) memcpy(dst,src,size) + +#define ID unsigned long +#define NOID ((ID)~0) + +/* IDL sizes - likely should be even bigger + * limiting factors: sizeof(ID), thread stack size + */ +#define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ +#define MDB_IDL_DB_SIZE (1<bi_lastid) ) +#define MDB_IDL_ALL( bdb, ids ) MDB_IDL_RANGE( ids, 1, ((bdb)->bi_lastid) ) + +#define MDB_IDL_FIRST( ids ) ( ids[1] ) +#define MDB_IDL_LAST( ids ) ( MDB_IDL_IS_RANGE(ids) \ + ? ids[2] : ids[ids[0]] ) + +#define MDB_IDL_N( ids ) ( MDB_IDL_IS_RANGE(ids) \ + ? (ids[2]-ids[1])+1 : ids[0] ) + +int mdb_midl_insert( ID *ids, ID id ); + +#endif /* _MDB_MIDL_H_ */ diff --git a/servers/slapd/back-mdb/modify.c b/servers/slapd/back-mdb/modify.c new file mode 100644 index 0000000000..128077ed2c --- /dev/null +++ b/servers/slapd/back-mdb/modify.c @@ -0,0 +1,782 @@ +/* modify.c - mdb backend modify routine */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include +#include + +#include "back-mdb.h" + +static struct berval scbva[] = { + BER_BVC("glue"), + BER_BVNULL +}; + +static void +mdb_modify_idxflags( + Operation *op, + AttributeDescription *desc, + int got_delete, + Attribute *newattrs, + Attribute *oldattrs ) +{ + struct berval ix_at; + AttrInfo *ai; + + /* check if modified attribute was indexed + * but not in case of NOOP... */ + ai = mdb_index_mask( op->o_bd, desc, &ix_at ); + if ( ai ) { + if ( got_delete ) { + Attribute *ap; + struct berval ix2; + + ap = attr_find( oldattrs, desc ); + if ( ap ) ap->a_flags |= SLAP_ATTR_IXDEL; + + /* Find all other attrs that index to same slot */ + for ( ap = newattrs; ap; ap = ap->a_next ) { + ai = mdb_index_mask( op->o_bd, ap->a_desc, &ix2 ); + if ( ai && ix2.bv_val == ix_at.bv_val ) + ap->a_flags |= SLAP_ATTR_IXADD; + } + + } else { + Attribute *ap; + + ap = attr_find( newattrs, desc ); + if ( ap ) ap->a_flags |= SLAP_ATTR_IXADD; + } + } +} + +int mdb_modify_internal( + Operation *op, + DB_TXN *tid, + Modifications *modlist, + Entry *e, + const char **text, + char *textbuf, + size_t textlen ) +{ + int rc, err; + Modification *mod; + Modifications *ml; + Attribute *save_attrs; + Attribute *ap; + int glue_attr_delete = 0; + int got_delete; + + Debug( LDAP_DEBUG_TRACE, "mdb_modify_internal: 0x%08lx: %s\n", + e->e_id, e->e_dn, 0); + + if ( !acl_check_modlist( op, e, modlist )) { + return LDAP_INSUFFICIENT_ACCESS; + } + + /* save_attrs will be disposed of by mdb_cache_modify */ + save_attrs = e->e_attrs; + e->e_attrs = attrs_dup( e->e_attrs ); + + for ( ml = modlist; ml != NULL; ml = ml->sml_next ) { + int match; + mod = &ml->sml_mod; + switch( mod->sm_op ) { + case LDAP_MOD_ADD: + case LDAP_MOD_REPLACE: + if ( mod->sm_desc == slap_schema.si_ad_structuralObjectClass ) { + value_match( &match, slap_schema.si_ad_structuralObjectClass, + slap_schema.si_ad_structuralObjectClass-> + ad_type->sat_equality, + SLAP_MR_VALUE_OF_ATTRIBUTE_SYNTAX, + &mod->sm_values[0], &scbva[0], text ); + if ( !match ) glue_attr_delete = 1; + } + } + if ( glue_attr_delete ) + break; + } + + if ( glue_attr_delete ) { + Attribute **app = &e->e_attrs; + while ( *app != NULL ) { + if ( !is_at_operational( (*app)->a_desc->ad_type )) { + Attribute *save = *app; + *app = (*app)->a_next; + attr_free( save ); + continue; + } + app = &(*app)->a_next; + } + } + + for ( ml = modlist; ml != NULL; ml = ml->sml_next ) { + mod = &ml->sml_mod; + got_delete = 0; + + switch ( mod->sm_op ) { + case LDAP_MOD_ADD: + Debug(LDAP_DEBUG_ARGS, + "mdb_modify_internal: add %s\n", + mod->sm_desc->ad_cname.bv_val, 0, 0); + err = modify_add_values( e, mod, get_permissiveModify(op), + text, textbuf, textlen ); + if( err != LDAP_SUCCESS ) { + Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n", + err, *text, 0); + } + break; + + case LDAP_MOD_DELETE: + if ( glue_attr_delete ) { + err = LDAP_SUCCESS; + break; + } + + Debug(LDAP_DEBUG_ARGS, + "mdb_modify_internal: delete %s\n", + mod->sm_desc->ad_cname.bv_val, 0, 0); + err = modify_delete_values( e, mod, get_permissiveModify(op), + text, textbuf, textlen ); + if( err != LDAP_SUCCESS ) { + Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n", + err, *text, 0); + } else { + got_delete = 1; + } + break; + + case LDAP_MOD_REPLACE: + Debug(LDAP_DEBUG_ARGS, + "mdb_modify_internal: replace %s\n", + mod->sm_desc->ad_cname.bv_val, 0, 0); + err = modify_replace_values( e, mod, get_permissiveModify(op), + text, textbuf, textlen ); + if( err != LDAP_SUCCESS ) { + Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n", + err, *text, 0); + } else { + got_delete = 1; + } + break; + + case LDAP_MOD_INCREMENT: + Debug(LDAP_DEBUG_ARGS, + "mdb_modify_internal: increment %s\n", + mod->sm_desc->ad_cname.bv_val, 0, 0); + err = modify_increment_values( e, mod, get_permissiveModify(op), + text, textbuf, textlen ); + if( err != LDAP_SUCCESS ) { + Debug(LDAP_DEBUG_ARGS, + "mdb_modify_internal: %d %s\n", + err, *text, 0); + } else { + got_delete = 1; + } + break; + + case SLAP_MOD_SOFTADD: + Debug(LDAP_DEBUG_ARGS, + "mdb_modify_internal: softadd %s\n", + mod->sm_desc->ad_cname.bv_val, 0, 0); + /* Avoid problems in index_add_mods() + * We need to add index if necessary. + */ + mod->sm_op = LDAP_MOD_ADD; + + err = modify_add_values( e, mod, get_permissiveModify(op), + text, textbuf, textlen ); + + mod->sm_op = SLAP_MOD_SOFTADD; + + if ( err == LDAP_TYPE_OR_VALUE_EXISTS ) { + err = LDAP_SUCCESS; + } + + if( err != LDAP_SUCCESS ) { + Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n", + err, *text, 0); + } + break; + + case SLAP_MOD_SOFTDEL: + Debug(LDAP_DEBUG_ARGS, + "mdb_modify_internal: softdel %s\n", + mod->sm_desc->ad_cname.bv_val, 0, 0); + /* Avoid problems in index_delete_mods() + * We need to add index if necessary. + */ + mod->sm_op = LDAP_MOD_DELETE; + + err = modify_delete_values( e, mod, get_permissiveModify(op), + text, textbuf, textlen ); + + mod->sm_op = SLAP_MOD_SOFTDEL; + + if ( err == LDAP_NO_SUCH_ATTRIBUTE ) { + err = LDAP_SUCCESS; + } + + if( err != LDAP_SUCCESS ) { + Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n", + err, *text, 0); + } + break; + + case SLAP_MOD_ADD_IF_NOT_PRESENT: + if ( attr_find( e->e_attrs, mod->sm_desc ) != NULL ) { + /* skip */ + err = LDAP_SUCCESS; + break; + } + + Debug(LDAP_DEBUG_ARGS, + "mdb_modify_internal: add_if_not_present %s\n", + mod->sm_desc->ad_cname.bv_val, 0, 0); + /* Avoid problems in index_add_mods() + * We need to add index if necessary. + */ + mod->sm_op = LDAP_MOD_ADD; + + err = modify_add_values( e, mod, get_permissiveModify(op), + text, textbuf, textlen ); + + mod->sm_op = SLAP_MOD_ADD_IF_NOT_PRESENT; + + if( err != LDAP_SUCCESS ) { + Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n", + err, *text, 0); + } + break; + + default: + Debug(LDAP_DEBUG_ANY, "mdb_modify_internal: invalid op %d\n", + mod->sm_op, 0, 0); + *text = "Invalid modify operation"; + err = LDAP_OTHER; + Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n", + err, *text, 0); + } + + if ( err != LDAP_SUCCESS ) { + attrs_free( e->e_attrs ); + e->e_attrs = save_attrs; + /* unlock entry, delete from cache */ + return err; + } + + /* If objectClass was modified, reset the flags */ + if ( mod->sm_desc == slap_schema.si_ad_objectClass ) { + e->e_ocflags = 0; + } + + if ( glue_attr_delete ) e->e_ocflags = 0; + + + /* check if modified attribute was indexed + * but not in case of NOOP... */ + if ( !op->o_noop ) { + mdb_modify_idxflags( op, mod->sm_desc, got_delete, e->e_attrs, save_attrs ); + } + } + + /* check that the entry still obeys the schema */ + ap = NULL; + rc = entry_schema_check( op, e, save_attrs, get_relax(op), 0, &ap, + text, textbuf, textlen ); + if ( rc != LDAP_SUCCESS || op->o_noop ) { + attrs_free( e->e_attrs ); + /* clear the indexing flags */ + for ( ap = save_attrs; ap != NULL; ap = ap->a_next ) { + ap->a_flags &= ~(SLAP_ATTR_IXADD|SLAP_ATTR_IXDEL); + } + e->e_attrs = save_attrs; + + if ( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_ANY, + "entry failed schema check: %s\n", + *text, 0, 0 ); + } + + /* if NOOP then silently revert to saved attrs */ + return rc; + } + + /* structuralObjectClass modified! */ + if ( ap ) { + assert( ap->a_desc == slap_schema.si_ad_structuralObjectClass ); + if ( !op->o_noop ) { + mdb_modify_idxflags( op, slap_schema.si_ad_structuralObjectClass, + 1, e->e_attrs, save_attrs ); + } + } + + /* update the indices of the modified attributes */ + + /* start with deleting the old index entries */ + for ( ap = save_attrs; ap != NULL; ap = ap->a_next ) { + if ( ap->a_flags & SLAP_ATTR_IXDEL ) { + struct berval *vals; + Attribute *a2; + ap->a_flags &= ~SLAP_ATTR_IXDEL; + a2 = attr_find( e->e_attrs, ap->a_desc ); + if ( a2 ) { + /* need to detect which values were deleted */ + int i, j; + vals = op->o_tmpalloc( (ap->a_numvals + 1) * + sizeof(struct berval), op->o_tmpmemctx ); + j = 0; + for ( i=0; i < ap->a_numvals; i++ ) { + rc = attr_valfind( a2, SLAP_MR_ASSERTED_VALUE_NORMALIZED_MATCH, + &ap->a_nvals[i], NULL, op->o_tmpmemctx ); + /* Save deleted values */ + if ( rc == LDAP_NO_SUCH_ATTRIBUTE ) + vals[j++] = ap->a_nvals[i]; + } + BER_BVZERO(vals+j); + } else { + /* attribute was completely deleted */ + vals = ap->a_nvals; + } + rc = 0; + if ( !BER_BVISNULL( vals )) { + rc = mdb_index_values( op, tid, ap->a_desc, + vals, e->e_id, SLAP_INDEX_DELETE_OP ); + if ( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_ANY, + "%s: attribute \"%s\" index delete failure\n", + op->o_log_prefix, ap->a_desc->ad_cname.bv_val, 0 ); + attrs_free( e->e_attrs ); + e->e_attrs = save_attrs; + } + } + if ( vals != ap->a_nvals ) + op->o_tmpfree( vals, op->o_tmpmemctx ); + if ( rc ) return rc; + } + } + + /* add the new index entries */ + for ( ap = e->e_attrs; ap != NULL; ap = ap->a_next ) { + if (ap->a_flags & SLAP_ATTR_IXADD) { + ap->a_flags &= ~SLAP_ATTR_IXADD; + rc = mdb_index_values( op, tid, ap->a_desc, + ap->a_nvals, + e->e_id, SLAP_INDEX_ADD_OP ); + if ( rc != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_ANY, + "%s: attribute \"%s\" index add failure\n", + op->o_log_prefix, ap->a_desc->ad_cname.bv_val, 0 ); + attrs_free( e->e_attrs ); + e->e_attrs = save_attrs; + return rc; + } + } + } + + return rc; +} + + +int +mdb_modify( Operation *op, SlapReply *rs ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + Entry *e = NULL; + EntryInfo *ei = NULL; + int manageDSAit = get_manageDSAit( op ); + char textbuf[SLAP_TEXT_BUFLEN]; + size_t textlen = sizeof textbuf; + DB_TXN *ltid = NULL, *lt2; + struct mdb_op_info opinfo = {{{ 0 }}}; + Entry dummy = {0}; + + DB_LOCK lock; + + int num_retries = 0; + + LDAPControl **preread_ctrl = NULL; + LDAPControl **postread_ctrl = NULL; + LDAPControl *ctrls[SLAP_MAX_RESPONSE_CONTROLS]; + int num_ctrls = 0; + + int rc; + +#ifdef LDAP_X_TXN + int settle = 0; +#endif + + Debug( LDAP_DEBUG_ARGS, LDAP_XSTRING(mdb_modify) ": %s\n", + op->o_req_dn.bv_val, 0, 0 ); + +#ifdef LDAP_X_TXN + if( op->o_txnSpec ) { + /* acquire connection lock */ + ldap_pvt_thread_mutex_lock( &op->o_conn->c_mutex ); + if( op->o_conn->c_txn == CONN_TXN_INACTIVE ) { + rs->sr_text = "invalid transaction identifier"; + rs->sr_err = LDAP_X_TXN_ID_INVALID; + goto txnReturn; + } else if( op->o_conn->c_txn == CONN_TXN_SETTLE ) { + settle=1; + goto txnReturn; + } + + if( op->o_conn->c_txn_backend == NULL ) { + op->o_conn->c_txn_backend = op->o_bd; + + } else if( op->o_conn->c_txn_backend != op->o_bd ) { + rs->sr_text = "transaction cannot span multiple database contexts"; + rs->sr_err = LDAP_AFFECTS_MULTIPLE_DSAS; + goto txnReturn; + } + + /* insert operation into transaction */ + + rs->sr_text = "transaction specified"; + rs->sr_err = LDAP_X_TXN_SPECIFY_OKAY; + +txnReturn: + /* release connection lock */ + ldap_pvt_thread_mutex_unlock( &op->o_conn->c_mutex ); + + if( !settle ) { + send_ldap_result( op, rs ); + return rs->sr_err; + } + } +#endif + + ctrls[num_ctrls] = NULL; + + /* Don't touch the opattrs, if this is a contextCSN update + * initiated from updatedn */ + if ( !be_isupdate(op) || !op->orm_modlist || op->orm_modlist->sml_next || + op->orm_modlist->sml_desc != slap_schema.si_ad_contextCSN ) { + + slap_mods_opattrs( op, &op->orm_modlist, 1 ); + } + + if( 0 ) { +retry: /* transaction retry */ + if ( dummy.e_attrs ) { + attrs_free( dummy.e_attrs ); + dummy.e_attrs = NULL; + } + if( e != NULL ) { + mdb_unlocked_cache_return_entry_w(&mdb->bi_cache, e); + e = NULL; + } + Debug(LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modify) ": retrying...\n", 0, 0, 0); + + rs->sr_err = TXN_ABORT( ltid ); + ltid = NULL; + LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next ); + opinfo.boi_oe.oe_key = NULL; + op->o_do_not_cache = opinfo.boi_acl_cache; + if( rs->sr_err != 0 ) { + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + if ( op->o_abandon ) { + rs->sr_err = SLAPD_ABANDON; + goto return_results; + } + mdb_trans_backoff( ++num_retries ); + } + + /* begin transaction */ + rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, NULL, <id, + mdb->bi_db_opflags ); + rs->sr_text = NULL; + if( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modify) ": txn_begin failed: " + "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + opinfo.boi_oe.oe_key = mdb; + opinfo.boi_txn = ltid; + opinfo.boi_err = 0; + opinfo.boi_acl_cache = op->o_do_not_cache; + LDAP_SLIST_INSERT_HEAD( &op->o_extra, &opinfo.boi_oe, oe_next ); + + /* get entry or ancestor */ + rs->sr_err = mdb_dn2entry( op, ltid, &op->o_req_ndn, &ei, 1, + &lock ); + + if ( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modify) ": dn2entry failed (%d)\n", + rs->sr_err, 0, 0 ); + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + case DB_NOTFOUND: + break; + case LDAP_BUSY: + rs->sr_text = "ldap server busy"; + goto return_results; + default: + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + } + + e = ei->bei_e; + + /* acquire and lock entry */ + /* FIXME: dn2entry() should return non-glue entry */ + if (( rs->sr_err == DB_NOTFOUND ) || + ( !manageDSAit && e && is_entry_glue( e ))) + { + if ( e != NULL ) { + rs->sr_matched = ch_strdup( e->e_dn ); + rs->sr_ref = is_entry_referral( e ) + ? get_entry_referrals( op, e ) + : NULL; + mdb_unlocked_cache_return_entry_r (&mdb->bi_cache, e); + e = NULL; + + } else { + rs->sr_ref = referral_rewrite( default_referral, NULL, + &op->o_req_dn, LDAP_SCOPE_DEFAULT ); + } + + rs->sr_err = LDAP_REFERRAL; + send_ldap_result( op, rs ); + + if ( rs->sr_ref != default_referral ) { + ber_bvarray_free( rs->sr_ref ); + } + free( (char *)rs->sr_matched ); + rs->sr_ref = NULL; + rs->sr_matched = NULL; + + goto done; + } + + if ( !manageDSAit && is_entry_referral( e ) ) { + /* entry is a referral, don't allow modify */ + rs->sr_ref = get_entry_referrals( op, e ); + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modify) ": entry is referral\n", + 0, 0, 0 ); + + rs->sr_err = LDAP_REFERRAL; + rs->sr_matched = e->e_name.bv_val; + send_ldap_result( op, rs ); + + ber_bvarray_free( rs->sr_ref ); + rs->sr_ref = NULL; + rs->sr_matched = NULL; + goto done; + } + + if ( get_assert( op ) && + ( test_filter( op, e, get_assertion( op )) != LDAP_COMPARE_TRUE )) + { + rs->sr_err = LDAP_ASSERTION_FAILED; + goto return_results; + } + + if( op->o_preread ) { + if( preread_ctrl == NULL ) { + preread_ctrl = &ctrls[num_ctrls++]; + ctrls[num_ctrls] = NULL; + } + if ( slap_read_controls( op, rs, e, + &slap_pre_read_bv, preread_ctrl ) ) + { + Debug( LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_modify) ": pre-read " + "failed!\n", 0, 0, 0 ); + if ( op->o_preread & SLAP_CONTROL_CRITICAL ) { + /* FIXME: is it correct to abort + * operation if control fails? */ + goto return_results; + } + } + } + + /* nested transaction */ + rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, ltid, <2, mdb->bi_db_opflags ); + rs->sr_text = NULL; + if( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modify) ": txn_begin(2) failed: " "%s (%d)\n", + db_strerror(rs->sr_err), rs->sr_err, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + /* Modify the entry */ + dummy = *e; + rs->sr_err = mdb_modify_internal( op, lt2, op->orm_modlist, + &dummy, &rs->sr_text, textbuf, textlen ); + + if( rs->sr_err != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modify) ": modify failed (%d)\n", + rs->sr_err, 0, 0 ); + if ( (rs->sr_err == LDAP_INSUFFICIENT_ACCESS) && opinfo.boi_err ) { + rs->sr_err = opinfo.boi_err; + } + /* Only free attrs if they were dup'd. */ + if ( dummy.e_attrs == e->e_attrs ) dummy.e_attrs = NULL; + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + goto return_results; + } + + /* change the entry itself */ + rs->sr_err = mdb_id2entry_update( op->o_bd, lt2, &dummy ); + if ( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modify) ": id2entry update failed " "(%d)\n", + rs->sr_err, 0, 0 ); + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + rs->sr_text = "entry update failed"; + goto return_results; + } + + if ( TXN_COMMIT( lt2, 0 ) != 0 ) { + rs->sr_err = LDAP_OTHER; + rs->sr_text = "txn_commit(2) failed"; + goto return_results; + } + + if( op->o_postread ) { + if( postread_ctrl == NULL ) { + postread_ctrl = &ctrls[num_ctrls++]; + ctrls[num_ctrls] = NULL; + } + if( slap_read_controls( op, rs, &dummy, + &slap_post_read_bv, postread_ctrl ) ) + { + Debug( LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_modify) + ": post-read failed!\n", 0, 0, 0 ); + if ( op->o_postread & SLAP_CONTROL_CRITICAL ) { + /* FIXME: is it correct to abort + * operation if control fails? */ + goto return_results; + } + } + } + + if( op->o_noop ) { + if ( ( rs->sr_err = TXN_ABORT( ltid ) ) != 0 ) { + rs->sr_text = "txn_abort (no-op) failed"; + } else { + rs->sr_err = LDAP_X_NO_OPERATION; + ltid = NULL; + /* Only free attrs if they were dup'd. */ + if ( dummy.e_attrs == e->e_attrs ) dummy.e_attrs = NULL; + goto return_results; + } + } else { + /* may have changed in mdb_modify_internal() */ + e->e_ocflags = dummy.e_ocflags; + rc = mdb_cache_modify( mdb, e, dummy.e_attrs, ltid, &lock ); + switch( rc ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + dummy.e_attrs = NULL; + + rs->sr_err = TXN_COMMIT( ltid, 0 ); + } + ltid = NULL; + LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next ); + opinfo.boi_oe.oe_key = NULL; + + if( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modify) ": txn_%s failed: %s (%d)\n", + op->o_noop ? "abort (no-op)" : "commit", + db_strerror(rs->sr_err), rs->sr_err ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "commit failed"; + + goto return_results; + } + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modify) ": updated%s id=%08lx dn=\"%s\"\n", + op->o_noop ? " (no-op)" : "", + dummy.e_id, op->o_req_dn.bv_val ); + + rs->sr_err = LDAP_SUCCESS; + rs->sr_text = NULL; + if( num_ctrls ) rs->sr_ctrls = ctrls; + +return_results: + if( dummy.e_attrs ) { + attrs_free( dummy.e_attrs ); + } + send_ldap_result( op, rs ); + + if( rs->sr_err == LDAP_SUCCESS && mdb->bi_txn_cp_kbyte ) { + TXN_CHECKPOINT( mdb->bi_dbenv, + mdb->bi_txn_cp_kbyte, mdb->bi_txn_cp_min, 0 ); + } + +done: + slap_graduate_commit_csn( op ); + + if( ltid != NULL ) { + TXN_ABORT( ltid ); + } + if ( opinfo.boi_oe.oe_key ) { + LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next ); + } + + if( e != NULL ) { + mdb_unlocked_cache_return_entry_w (&mdb->bi_cache, e); + } + + if( preread_ctrl != NULL && (*preread_ctrl) != NULL ) { + slap_sl_free( (*preread_ctrl)->ldctl_value.bv_val, op->o_tmpmemctx ); + slap_sl_free( *preread_ctrl, op->o_tmpmemctx ); + } + if( postread_ctrl != NULL && (*postread_ctrl) != NULL ) { + slap_sl_free( (*postread_ctrl)->ldctl_value.bv_val, op->o_tmpmemctx ); + slap_sl_free( *postread_ctrl, op->o_tmpmemctx ); + } + + rs->sr_text = NULL; + + return rs->sr_err; +} diff --git a/servers/slapd/back-mdb/modrdn.c b/servers/slapd/back-mdb/modrdn.c new file mode 100644 index 0000000000..01f236c9f9 --- /dev/null +++ b/servers/slapd/back-mdb/modrdn.c @@ -0,0 +1,838 @@ +/* modrdn.c - mdb backend modrdn routine */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "back-mdb.h" + +int +mdb_modrdn( Operation *op, SlapReply *rs ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + AttributeDescription *children = slap_schema.si_ad_children; + AttributeDescription *entry = slap_schema.si_ad_entry; + struct berval p_dn, p_ndn; + struct berval new_dn = {0, NULL}, new_ndn = {0, NULL}; + Entry *e = NULL; + Entry *p = NULL; + EntryInfo *ei = NULL, *eip = NULL, *nei = NULL, *neip = NULL; + /* LDAP v2 supporting correct attribute handling. */ + char textbuf[SLAP_TEXT_BUFLEN]; + size_t textlen = sizeof textbuf; + DB_TXN *ltid = NULL, *lt2; + struct mdb_op_info opinfo = {{{ 0 }}}; + Entry dummy = {0}; + + Entry *np = NULL; /* newSuperior Entry */ + struct berval *np_dn = NULL; /* newSuperior dn */ + struct berval *np_ndn = NULL; /* newSuperior ndn */ + struct berval *new_parent_dn = NULL; /* np_dn, p_dn, or NULL */ + + int manageDSAit = get_manageDSAit( op ); + + DB_LOCK lock, plock, nplock; + + int num_retries = 0; + + LDAPControl **preread_ctrl = NULL; + LDAPControl **postread_ctrl = NULL; + LDAPControl *ctrls[SLAP_MAX_RESPONSE_CONTROLS]; + int num_ctrls = 0; + + int rc; + + int parent_is_glue = 0; + int parent_is_leaf = 0; + +#ifdef LDAP_X_TXN + int settle = 0; +#endif + + Debug( LDAP_DEBUG_TRACE, "==>" LDAP_XSTRING(mdb_modrdn) "(%s,%s,%s)\n", + op->o_req_dn.bv_val,op->oq_modrdn.rs_newrdn.bv_val, + op->oq_modrdn.rs_newSup ? op->oq_modrdn.rs_newSup->bv_val : "NULL" ); + +#ifdef LDAP_X_TXN + if( op->o_txnSpec ) { + /* acquire connection lock */ + ldap_pvt_thread_mutex_lock( &op->o_conn->c_mutex ); + if( op->o_conn->c_txn == CONN_TXN_INACTIVE ) { + rs->sr_text = "invalid transaction identifier"; + rs->sr_err = LDAP_X_TXN_ID_INVALID; + goto txnReturn; + } else if( op->o_conn->c_txn == CONN_TXN_SETTLE ) { + settle=1; + goto txnReturn; + } + + if( op->o_conn->c_txn_backend == NULL ) { + op->o_conn->c_txn_backend = op->o_bd; + + } else if( op->o_conn->c_txn_backend != op->o_bd ) { + rs->sr_text = "transaction cannot span multiple database contexts"; + rs->sr_err = LDAP_AFFECTS_MULTIPLE_DSAS; + goto txnReturn; + } + + /* insert operation into transaction */ + + rs->sr_text = "transaction specified"; + rs->sr_err = LDAP_X_TXN_SPECIFY_OKAY; + +txnReturn: + /* release connection lock */ + ldap_pvt_thread_mutex_unlock( &op->o_conn->c_mutex ); + + if( !settle ) { + send_ldap_result( op, rs ); + return rs->sr_err; + } + } +#endif + + ctrls[num_ctrls] = NULL; + + slap_mods_opattrs( op, &op->orr_modlist, 1 ); + + if( 0 ) { +retry: /* transaction retry */ + if ( dummy.e_attrs ) { + attrs_free( dummy.e_attrs ); + dummy.e_attrs = NULL; + } + if (e != NULL) { + mdb_unlocked_cache_return_entry_w(&mdb->bi_cache, e); + e = NULL; + } + if (p != NULL) { + mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, p); + p = NULL; + } + if (np != NULL) { + mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, np); + np = NULL; + } + Debug( LDAP_DEBUG_TRACE, "==>" LDAP_XSTRING(mdb_modrdn) + ": retrying...\n", 0, 0, 0 ); + + rs->sr_err = TXN_ABORT( ltid ); + ltid = NULL; + LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next ); + opinfo.boi_oe.oe_key = NULL; + op->o_do_not_cache = opinfo.boi_acl_cache; + if( rs->sr_err != 0 ) { + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + if ( op->o_abandon ) { + rs->sr_err = SLAPD_ABANDON; + goto return_results; + } + parent_is_glue = 0; + parent_is_leaf = 0; + mdb_trans_backoff( ++num_retries ); + } + + /* begin transaction */ + rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, NULL, <id, + mdb->bi_db_opflags ); + rs->sr_text = NULL; + if( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) ": txn_begin failed: " + "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + opinfo.boi_oe.oe_key = mdb; + opinfo.boi_txn = ltid; + opinfo.boi_err = 0; + opinfo.boi_acl_cache = op->o_do_not_cache; + LDAP_SLIST_INSERT_HEAD( &op->o_extra, &opinfo.boi_oe, oe_next ); + + /* get entry */ + rs->sr_err = mdb_dn2entry( op, ltid, &op->o_req_ndn, &ei, 1, + &lock ); + + switch( rs->sr_err ) { + case 0: + case DB_NOTFOUND: + break; + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + case LDAP_BUSY: + rs->sr_text = "ldap server busy"; + goto return_results; + default: + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + e = ei->bei_e; + /* FIXME: dn2entry() should return non-glue entry */ + if (( rs->sr_err == DB_NOTFOUND ) || + ( !manageDSAit && e && is_entry_glue( e ))) + { + if( e != NULL ) { + rs->sr_matched = ch_strdup( e->e_dn ); + rs->sr_ref = is_entry_referral( e ) + ? get_entry_referrals( op, e ) + : NULL; + mdb_unlocked_cache_return_entry_r( &mdb->bi_cache, e); + e = NULL; + + } else { + rs->sr_ref = referral_rewrite( default_referral, NULL, + &op->o_req_dn, LDAP_SCOPE_DEFAULT ); + } + + rs->sr_err = LDAP_REFERRAL; + send_ldap_result( op, rs ); + + ber_bvarray_free( rs->sr_ref ); + free( (char *)rs->sr_matched ); + rs->sr_ref = NULL; + rs->sr_matched = NULL; + + goto done; + } + + if ( get_assert( op ) && + ( test_filter( op, e, get_assertion( op )) != LDAP_COMPARE_TRUE )) + { + rs->sr_err = LDAP_ASSERTION_FAILED; + goto return_results; + } + + /* check write on old entry */ + rs->sr_err = access_allowed( op, e, entry, NULL, ACL_WRITE, NULL ); + if ( ! rs->sr_err ) { + switch( opinfo.boi_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + + Debug( LDAP_DEBUG_TRACE, "no access to entry\n", 0, + 0, 0 ); + rs->sr_text = "no write access to old entry"; + rs->sr_err = LDAP_INSUFFICIENT_ACCESS; + goto return_results; + } + +#ifndef MDB_HIER + rs->sr_err = mdb_cache_children( op, ltid, e ); + if ( rs->sr_err != DB_NOTFOUND ) { + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + case 0: + Debug(LDAP_DEBUG_ARGS, + "<=- " LDAP_XSTRING(mdb_modrdn) + ": non-leaf %s\n", + op->o_req_dn.bv_val, 0, 0); + rs->sr_err = LDAP_NOT_ALLOWED_ON_NONLEAF; + rs->sr_text = "subtree rename not supported"; + break; + default: + Debug(LDAP_DEBUG_ARGS, + "<=- " LDAP_XSTRING(mdb_modrdn) + ": has_children failed: %s (%d)\n", + db_strerror(rs->sr_err), rs->sr_err, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + } + goto return_results; + } + ei->bei_state |= CACHE_ENTRY_NO_KIDS; +#endif + + if (!manageDSAit && is_entry_referral( e ) ) { + /* parent is a referral, don't allow add */ + rs->sr_ref = get_entry_referrals( op, e ); + + Debug( LDAP_DEBUG_TRACE, LDAP_XSTRING(mdb_modrdn) + ": entry %s is referral\n", e->e_dn, 0, 0 ); + + rs->sr_err = LDAP_REFERRAL, + rs->sr_matched = e->e_name.bv_val; + send_ldap_result( op, rs ); + + ber_bvarray_free( rs->sr_ref ); + rs->sr_ref = NULL; + rs->sr_matched = NULL; + goto done; + } + + if ( be_issuffix( op->o_bd, &e->e_nname ) ) { +#ifdef MDB_MULTIPLE_SUFFIXES + /* Allow renaming one suffix entry to another */ + p_ndn = slap_empty_bv; +#else + /* There can only be one suffix entry */ + rs->sr_err = LDAP_NAMING_VIOLATION; + rs->sr_text = "cannot rename suffix entry"; + goto return_results; +#endif + } else { + dnParent( &e->e_nname, &p_ndn ); + } + np_ndn = &p_ndn; + eip = ei->bei_parent; + if ( eip && eip->bei_id ) { + /* Make sure parent entry exist and we can write its + * children. + */ + rs->sr_err = mdb_cache_find_id( op, ltid, + eip->bei_id, &eip, 0, &plock ); + + switch( rs->sr_err ) { + case 0: + case DB_NOTFOUND: + break; + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + case LDAP_BUSY: + rs->sr_text = "ldap server busy"; + goto return_results; + default: + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + p = eip->bei_e; + if( p == NULL) { + Debug( LDAP_DEBUG_TRACE, LDAP_XSTRING(mdb_modrdn) + ": parent does not exist\n", 0, 0, 0); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "old entry's parent does not exist"; + goto return_results; + } + } else { + p = (Entry *)&slap_entry_root; + } + + /* check parent for "children" acl */ + rs->sr_err = access_allowed( op, p, + children, NULL, + op->oq_modrdn.rs_newSup == NULL ? + ACL_WRITE : ACL_WDEL, + NULL ); + + if ( !p_ndn.bv_len ) + p = NULL; + + if ( ! rs->sr_err ) { + switch( opinfo.boi_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + + rs->sr_err = LDAP_INSUFFICIENT_ACCESS; + Debug( LDAP_DEBUG_TRACE, "no access to parent\n", 0, + 0, 0 ); + rs->sr_text = "no write access to old parent's children"; + goto return_results; + } + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) ": wr to children " + "of entry %s OK\n", p_ndn.bv_val, 0, 0 ); + + if ( p_ndn.bv_val == slap_empty_bv.bv_val ) { + p_dn = slap_empty_bv; + } else { + dnParent( &e->e_name, &p_dn ); + } + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) ": parent dn=%s\n", + p_dn.bv_val, 0, 0 ); + + new_parent_dn = &p_dn; /* New Parent unless newSuperior given */ + + if ( op->oq_modrdn.rs_newSup != NULL ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) + ": new parent \"%s\" requested...\n", + op->oq_modrdn.rs_newSup->bv_val, 0, 0 ); + + /* newSuperior == oldParent? */ + if( dn_match( &p_ndn, op->oq_modrdn.rs_nnewSup ) ) { + Debug( LDAP_DEBUG_TRACE, "mdb_back_modrdn: " + "new parent \"%s\" same as the old parent \"%s\"\n", + op->oq_modrdn.rs_newSup->bv_val, p_dn.bv_val, 0 ); + op->oq_modrdn.rs_newSup = NULL; /* ignore newSuperior */ + } + } + + /* There's a MDB_MULTIPLE_SUFFIXES case here that this code doesn't + * support. E.g., two suffixes dc=foo,dc=com and dc=bar,dc=net. + * We do not allow modDN + * dc=foo,dc=com + * newrdn dc=bar + * newsup dc=net + * and we probably should. But since MULTIPLE_SUFFIXES is deprecated + * I'm ignoring this problem for now. + */ + if ( op->oq_modrdn.rs_newSup != NULL ) { + if ( op->oq_modrdn.rs_newSup->bv_len ) { + np_dn = op->oq_modrdn.rs_newSup; + np_ndn = op->oq_modrdn.rs_nnewSup; + + /* newSuperior == oldParent? - checked above */ + /* newSuperior == entry being moved?, if so ==> ERROR */ + if ( dnIsSuffix( np_ndn, &e->e_nname )) { + rs->sr_err = LDAP_NO_SUCH_OBJECT; + rs->sr_text = "new superior not found"; + goto return_results; + } + /* Get Entry with dn=newSuperior. Does newSuperior exist? */ + + rs->sr_err = mdb_dn2entry( op, ltid, np_ndn, + &neip, 0, &nplock ); + + switch( rs->sr_err ) { + case 0: np = neip->bei_e; + case DB_NOTFOUND: + break; + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + case LDAP_BUSY: + rs->sr_text = "ldap server busy"; + goto return_results; + default: + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + if( np == NULL) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) + ": newSup(ndn=%s) not here!\n", + np_ndn->bv_val, 0, 0); + rs->sr_text = "new superior not found"; + rs->sr_err = LDAP_NO_SUCH_OBJECT; + goto return_results; + } + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) + ": wr to new parent OK np=%p, id=%ld\n", + (void *) np, (long) np->e_id, 0 ); + + /* check newSuperior for "children" acl */ + rs->sr_err = access_allowed( op, np, children, + NULL, ACL_WADD, NULL ); + + if( ! rs->sr_err ) { + switch( opinfo.boi_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) + ": no wr to newSup children\n", + 0, 0, 0 ); + rs->sr_text = "no write access to new superior's children"; + rs->sr_err = LDAP_INSUFFICIENT_ACCESS; + goto return_results; + } + + if ( is_entry_alias( np ) ) { + /* parent is an alias, don't allow add */ + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) + ": entry is alias\n", + 0, 0, 0 ); + rs->sr_text = "new superior is an alias"; + rs->sr_err = LDAP_ALIAS_PROBLEM; + goto return_results; + } + + if ( is_entry_referral( np ) ) { + /* parent is a referral, don't allow add */ + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) + ": entry is referral\n", + 0, 0, 0 ); + rs->sr_text = "new superior is a referral"; + rs->sr_err = LDAP_OTHER; + goto return_results; + } + + } else { + np_dn = NULL; + + /* no parent, modrdn entry directly under root */ + if ( be_issuffix( op->o_bd, (struct berval *)&slap_empty_bv ) + || be_isupdate( op ) ) { + np = (Entry *)&slap_entry_root; + + /* check parent for "children" acl */ + rs->sr_err = access_allowed( op, np, + children, NULL, ACL_WADD, NULL ); + + np = NULL; + + if ( ! rs->sr_err ) { + switch( opinfo.boi_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + + rs->sr_err = LDAP_INSUFFICIENT_ACCESS; + Debug( LDAP_DEBUG_TRACE, + "no access to new superior\n", + 0, 0, 0 ); + rs->sr_text = + "no write access to new superior's children"; + goto return_results; + } + } + } + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) + ": wr to new parent's children OK\n", + 0, 0, 0 ); + + new_parent_dn = np_dn; + } + + /* Build target dn and make sure target entry doesn't exist already. */ + if (!new_dn.bv_val) { + build_new_dn( &new_dn, new_parent_dn, &op->oq_modrdn.rs_newrdn, NULL ); + } + + if (!new_ndn.bv_val) { + struct berval bv = {0, NULL}; + dnNormalize( 0, NULL, NULL, &new_dn, &bv, op->o_tmpmemctx ); + ber_dupbv( &new_ndn, &bv ); + /* FIXME: why not call dnNormalize() w/o ctx? */ + op->o_tmpfree( bv.bv_val, op->o_tmpmemctx ); + } + + Debug( LDAP_DEBUG_TRACE, LDAP_XSTRING(mdb_modrdn) ": new ndn=%s\n", + new_ndn.bv_val, 0, 0 ); + + /* Shortcut the search */ + nei = neip ? neip : eip; + rs->sr_err = mdb_cache_find_ndn ( op, ltid, &new_ndn, &nei ); + if ( nei ) mdb_cache_entryinfo_unlock( nei ); + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + case DB_NOTFOUND: + break; + case 0: + /* Allow rename to same DN */ + if ( nei == ei ) + break; + rs->sr_err = LDAP_ALREADY_EXISTS; + goto return_results; + default: + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + assert( op->orr_modlist != NULL ); + + if( op->o_preread ) { + if( preread_ctrl == NULL ) { + preread_ctrl = &ctrls[num_ctrls++]; + ctrls[num_ctrls] = NULL; + } + if( slap_read_controls( op, rs, e, + &slap_pre_read_bv, preread_ctrl ) ) + { + Debug( LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_modrdn) + ": pre-read failed!\n", 0, 0, 0 ); + if ( op->o_preread & SLAP_CONTROL_CRITICAL ) { + /* FIXME: is it correct to abort + * operation if control fails? */ + goto return_results; + } + } + } + + /* nested transaction */ + rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, ltid, <2, mdb->bi_db_opflags ); + rs->sr_text = NULL; + if( rs->sr_err != 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) + ": txn_begin(2) failed: %s (%d)\n", + db_strerror(rs->sr_err), rs->sr_err, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + + /* delete old DN */ + rs->sr_err = mdb_dn2id_delete( op, lt2, eip, e ); + if ( rs->sr_err != 0 ) { + Debug(LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_modrdn) + ": dn2id del failed: %s (%d)\n", + db_strerror(rs->sr_err), rs->sr_err, 0 ); + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + rs->sr_err = LDAP_OTHER; + rs->sr_text = "DN index delete fail"; + goto return_results; + } + + /* copy the entry, then override some fields */ + dummy = *e; + dummy.e_name = new_dn; + dummy.e_nname = new_ndn; + dummy.e_attrs = NULL; + + /* add new DN */ + rs->sr_err = mdb_dn2id_add( op, lt2, neip ? neip : eip, &dummy ); + if ( rs->sr_err != 0 ) { + Debug(LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_modrdn) + ": dn2id add failed: %s (%d)\n", + db_strerror(rs->sr_err), rs->sr_err, 0 ); + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + rs->sr_err = LDAP_OTHER; + rs->sr_text = "DN index add failed"; + goto return_results; + } + + dummy.e_attrs = e->e_attrs; + + /* modify entry */ + rs->sr_err = mdb_modify_internal( op, lt2, op->orr_modlist, &dummy, + &rs->sr_text, textbuf, textlen ); + if( rs->sr_err != LDAP_SUCCESS ) { + Debug(LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_modrdn) + ": modify failed: %s (%d)\n", + db_strerror(rs->sr_err), rs->sr_err, 0 ); + if ( ( rs->sr_err == LDAP_INSUFFICIENT_ACCESS ) && opinfo.boi_err ) { + rs->sr_err = opinfo.boi_err; + } + if ( dummy.e_attrs == e->e_attrs ) dummy.e_attrs = NULL; + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + goto return_results; + } + + /* id2entry index */ + rs->sr_err = mdb_id2entry_update( op->o_bd, lt2, &dummy ); + if ( rs->sr_err != 0 ) { + Debug(LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_modrdn) + ": id2entry failed: %s (%d)\n", + db_strerror(rs->sr_err), rs->sr_err, 0 ); + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + rs->sr_err = LDAP_OTHER; + rs->sr_text = "entry update failed"; + goto return_results; + } + + if ( p_ndn.bv_len != 0 ) { + parent_is_glue = is_entry_glue(p); + rs->sr_err = mdb_cache_children( op, lt2, p ); + if ( rs->sr_err != DB_NOTFOUND ) { + switch( rs->sr_err ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + case 0: + break; + default: + Debug(LDAP_DEBUG_ARGS, + "<=- " LDAP_XSTRING(mdb_modrdn) + ": has_children failed: %s (%d)\n", + db_strerror(rs->sr_err), rs->sr_err, 0 ); + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error"; + goto return_results; + } + parent_is_leaf = 1; + } + mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, p); + p = NULL; + } + + if ( TXN_COMMIT( lt2, 0 ) != 0 ) { + rs->sr_err = LDAP_OTHER; + rs->sr_text = "txn_commit(2) failed"; + goto return_results; + } + + if( op->o_postread ) { + if( postread_ctrl == NULL ) { + postread_ctrl = &ctrls[num_ctrls++]; + ctrls[num_ctrls] = NULL; + } + if( slap_read_controls( op, rs, &dummy, + &slap_post_read_bv, postread_ctrl ) ) + { + Debug( LDAP_DEBUG_TRACE, + "<=- " LDAP_XSTRING(mdb_modrdn) + ": post-read failed!\n", 0, 0, 0 ); + if ( op->o_postread & SLAP_CONTROL_CRITICAL ) { + /* FIXME: is it correct to abort + * operation if control fails? */ + goto return_results; + } + } + } + + if( op->o_noop ) { + if(( rs->sr_err=TXN_ABORT( ltid )) != 0 ) { + rs->sr_text = "txn_abort (no-op) failed"; + } else { + rs->sr_err = LDAP_X_NO_OPERATION; + ltid = NULL; + /* Only free attrs if they were dup'd. */ + if ( dummy.e_attrs == e->e_attrs ) dummy.e_attrs = NULL; + goto return_results; + } + + } else { + rc = mdb_cache_modrdn( mdb, e, &op->orr_nnewrdn, &dummy, neip, + ltid, &lock ); + switch( rc ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + } + dummy.e_attrs = NULL; + new_dn.bv_val = NULL; + new_ndn.bv_val = NULL; + + if(( rs->sr_err=TXN_COMMIT( ltid, 0 )) != 0 ) { + rs->sr_text = "txn_commit failed"; + } else { + rs->sr_err = LDAP_SUCCESS; + } + } + + ltid = NULL; + LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next ); + opinfo.boi_oe.oe_key = NULL; + + if( rs->sr_err != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) ": %s : %s (%d)\n", + rs->sr_text, db_strerror(rs->sr_err), rs->sr_err ); + rs->sr_err = LDAP_OTHER; + + goto return_results; + } + + Debug(LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_modrdn) + ": rdn modified%s id=%08lx dn=\"%s\"\n", + op->o_noop ? " (no-op)" : "", + dummy.e_id, op->o_req_dn.bv_val ); + rs->sr_text = NULL; + if( num_ctrls ) rs->sr_ctrls = ctrls; + +return_results: + if ( dummy.e_attrs ) { + attrs_free( dummy.e_attrs ); + } + send_ldap_result( op, rs ); + + if( rs->sr_err == LDAP_SUCCESS && mdb->bi_txn_cp_kbyte ) { + TXN_CHECKPOINT( mdb->bi_dbenv, + mdb->bi_txn_cp_kbyte, mdb->bi_txn_cp_min, 0 ); + } + + if ( rs->sr_err == LDAP_SUCCESS && parent_is_glue && parent_is_leaf ) { + op->o_delete_glue_parent = 1; + } + +done: + slap_graduate_commit_csn( op ); + + if( new_dn.bv_val != NULL ) free( new_dn.bv_val ); + if( new_ndn.bv_val != NULL ) free( new_ndn.bv_val ); + + /* LDAP v3 Support */ + if( np != NULL ) { + /* free new parent and reader lock */ + mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, np); + } + + if( p != NULL ) { + /* free parent and reader lock */ + mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, p); + } + + /* free entry */ + if( e != NULL ) { + mdb_unlocked_cache_return_entry_w( &mdb->bi_cache, e); + } + + if( ltid != NULL ) { + TXN_ABORT( ltid ); + } + if ( opinfo.boi_oe.oe_key ) { + LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next ); + } + + if( preread_ctrl != NULL && (*preread_ctrl) != NULL ) { + slap_sl_free( (*preread_ctrl)->ldctl_value.bv_val, op->o_tmpmemctx ); + slap_sl_free( *preread_ctrl, op->o_tmpmemctx ); + } + if( postread_ctrl != NULL && (*postread_ctrl) != NULL ) { + slap_sl_free( (*postread_ctrl)->ldctl_value.bv_val, op->o_tmpmemctx ); + slap_sl_free( *postread_ctrl, op->o_tmpmemctx ); + } + return rs->sr_err; +} diff --git a/servers/slapd/back-mdb/monitor.c b/servers/slapd/back-mdb/monitor.c new file mode 100644 index 0000000000..f629b2134e --- /dev/null +++ b/servers/slapd/back-mdb/monitor.c @@ -0,0 +1,725 @@ +/* monitor.c - monitor mdb backend */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include +#include +#include +#include +#include +#include "lutil.h" +#include "back-mdb.h" + +#include "../back-monitor/back-monitor.h" + +#include "config.h" + +static ObjectClass *oc_olmMDBDatabase; + +static AttributeDescription *ad_olmMDBEntryCache, + *ad_olmMDBDNCache, *ad_olmMDBIDLCache, + *ad_olmDbDirectory; + +#ifdef MDB_MONITOR_IDX +static int +mdb_monitor_idx_entry_add( + struct mdb_info *mdb, + Entry *e ); + +static AttributeDescription *ad_olmMDBNotIndexed; +#endif /* MDB_MONITOR_IDX */ + +/* + * NOTE: there's some confusion in monitor OID arc; + * by now, let's consider: + * + * Subsystems monitor attributes 1.3.6.1.4.1.4203.666.1.55.0 + * Databases monitor attributes 1.3.6.1.4.1.4203.666.1.55.0.1 + * MDB database monitor attributes 1.3.6.1.4.1.4203.666.1.55.0.1.1 + * + * Subsystems monitor objectclasses 1.3.6.1.4.1.4203.666.3.16.0 + * Databases monitor objectclasses 1.3.6.1.4.1.4203.666.3.16.0.1 + * MDB database monitor objectclasses 1.3.6.1.4.1.4203.666.3.16.0.1.1 + */ + +static struct { + char *name; + char *oid; +} s_oid[] = { + { "olmMDBAttributes", "olmDatabaseAttributes:1" }, + { "olmMDBObjectClasses", "olmDatabaseObjectClasses:1" }, + + { NULL } +}; + +static struct { + char *desc; + AttributeDescription **ad; +} s_at[] = { + { "( olmMDBAttributes:1 " + "NAME ( 'olmMDBEntryCache' ) " + "DESC 'Number of items in Entry Cache' " + "SUP monitorCounter " + "NO-USER-MODIFICATION " + "USAGE dSAOperation )", + &ad_olmMDBEntryCache }, + + { "( olmMDBAttributes:2 " + "NAME ( 'olmMDBDNCache' ) " + "DESC 'Number of items in DN Cache' " + "SUP monitorCounter " + "NO-USER-MODIFICATION " + "USAGE dSAOperation )", + &ad_olmMDBDNCache }, + + { "( olmMDBAttributes:3 " + "NAME ( 'olmMDBIDLCache' ) " + "DESC 'Number of items in IDL Cache' " + "SUP monitorCounter " + "NO-USER-MODIFICATION " + "USAGE dSAOperation )", + &ad_olmMDBIDLCache }, + + { "( olmMDBAttributes:4 " + "NAME ( 'olmDbDirectory' ) " + "DESC 'Path name of the directory " + "where the database environment resides' " + "SUP monitoredInfo " + "NO-USER-MODIFICATION " + "USAGE dSAOperation )", + &ad_olmDbDirectory }, + +#ifdef MDB_MONITOR_IDX + { "( olmMDBAttributes:5 " + "NAME ( 'olmMDBNotIndexed' ) " + "DESC 'Missing indexes resulting from candidate selection' " + "SUP monitoredInfo " + "NO-USER-MODIFICATION " + "USAGE dSAOperation )", + &ad_olmMDBNotIndexed }, +#endif /* MDB_MONITOR_IDX */ + + { NULL } +}; + +static struct { + char *desc; + ObjectClass **oc; +} s_oc[] = { + /* augments an existing object, so it must be AUXILIARY + * FIXME: derive from some ABSTRACT "monitoredEntity"? */ + { "( olmMDBObjectClasses:1 " + "NAME ( 'olmMDBDatabase' ) " + "SUP top AUXILIARY " + "MAY ( " + "olmMDBEntryCache " + "$ olmMDBDNCache " + "$ olmMDBIDLCache " + "$ olmDbDirectory " +#ifdef MDB_MONITOR_IDX + "$ olmMDBNotIndexed " +#endif /* MDB_MONITOR_IDX */ + ") )", + &oc_olmMDBDatabase }, + + { NULL } +}; + +static int +mdb_monitor_update( + Operation *op, + SlapReply *rs, + Entry *e, + void *priv ) +{ + struct mdb_info *mdb = (struct mdb_info *) priv; + Attribute *a; + + char buf[ BUFSIZ ]; + struct berval bv; + + assert( ad_olmMDBEntryCache != NULL ); + + a = attr_find( e->e_attrs, ad_olmMDBEntryCache ); + assert( a != NULL ); + bv.bv_val = buf; + bv.bv_len = snprintf( buf, sizeof( buf ), "%lu", mdb->bi_cache.c_cursize ); + ber_bvreplace( &a->a_vals[ 0 ], &bv ); + + a = attr_find( e->e_attrs, ad_olmMDBDNCache ); + assert( a != NULL ); + bv.bv_len = snprintf( buf, sizeof( buf ), "%lu", mdb->bi_cache.c_eiused ); + ber_bvreplace( &a->a_vals[ 0 ], &bv ); + + a = attr_find( e->e_attrs, ad_olmMDBIDLCache ); + assert( a != NULL ); + bv.bv_len = snprintf( buf, sizeof( buf ), "%lu", mdb->bi_idl_cache_size ); + ber_bvreplace( &a->a_vals[ 0 ], &bv ); + +#ifdef MDB_MONITOR_IDX + mdb_monitor_idx_entry_add( mdb, e ); +#endif /* MDB_MONITOR_IDX */ + + return SLAP_CB_CONTINUE; +} + +#if 0 /* uncomment if required */ +static int +mdb_monitor_modify( + Operation *op, + SlapReply *rs, + Entry *e, + void *priv ) +{ + return SLAP_CB_CONTINUE; +} +#endif + +static int +mdb_monitor_free( + Entry *e, + void **priv ) +{ + struct berval values[ 2 ]; + Modification mod = { 0 }; + + const char *text; + char textbuf[ SLAP_TEXT_BUFLEN ]; + + int i, rc; + + /* NOTE: if slap_shutdown != 0, priv might have already been freed */ + *priv = NULL; + + /* Remove objectClass */ + mod.sm_op = LDAP_MOD_DELETE; + mod.sm_desc = slap_schema.si_ad_objectClass; + mod.sm_values = values; + mod.sm_numvals = 1; + values[ 0 ] = oc_olmMDBDatabase->soc_cname; + BER_BVZERO( &values[ 1 ] ); + + rc = modify_delete_values( e, &mod, 1, &text, + textbuf, sizeof( textbuf ) ); + /* don't care too much about return code... */ + + /* remove attrs */ + mod.sm_values = NULL; + mod.sm_numvals = 0; + for ( i = 0; s_at[ i ].desc != NULL; i++ ) { + mod.sm_desc = *s_at[ i ].ad; + rc = modify_delete_values( e, &mod, 1, &text, + textbuf, sizeof( textbuf ) ); + /* don't care too much about return code... */ + } + + return SLAP_CB_CONTINUE; +} + +#define mdb_monitor_initialize MDB_SYMBOL(monitor_initialize) + +/* + * call from within mdb_initialize() + */ +static int +mdb_monitor_initialize( void ) +{ + int i, code; + ConfigArgs c; + char *argv[ 3 ]; + + static int mdb_monitor_initialized = 0; + + /* set to 0 when successfully initialized; otherwise, remember failure */ + static int mdb_monitor_initialized_failure = 1; + + if ( mdb_monitor_initialized++ ) { + return mdb_monitor_initialized_failure; + } + + if ( backend_info( "monitor" ) == NULL ) { + return -1; + } + + /* register schema here */ + + argv[ 0 ] = "back-mdb/back-hdb monitor"; + c.argv = argv; + c.argc = 3; + c.fname = argv[0]; + + for ( i = 0; s_oid[ i ].name; i++ ) { + c.lineno = i; + argv[ 1 ] = s_oid[ i ].name; + argv[ 2 ] = s_oid[ i ].oid; + + if ( parse_oidm( &c, 0, NULL ) != 0 ) { + Debug( LDAP_DEBUG_ANY, LDAP_XSTRING(mdb_monitor_initialize) + ": unable to add " + "objectIdentifier \"%s=%s\"\n", + s_oid[ i ].name, s_oid[ i ].oid, 0 ); + return 2; + } + } + + for ( i = 0; s_at[ i ].desc != NULL; i++ ) { + code = register_at( s_at[ i ].desc, s_at[ i ].ad, 1 ); + if ( code != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_ANY, LDAP_XSTRING(mdb_monitor_initialize) + ": register_at failed for attributeType (%s)\n", + s_at[ i ].desc, 0, 0 ); + return 3; + + } else { + (*s_at[ i ].ad)->ad_type->sat_flags |= SLAP_AT_HIDE; + } + } + + for ( i = 0; s_oc[ i ].desc != NULL; i++ ) { + code = register_oc( s_oc[ i ].desc, s_oc[ i ].oc, 1 ); + if ( code != LDAP_SUCCESS ) { + Debug( LDAP_DEBUG_ANY, LDAP_XSTRING(mdb_monitor_initialize) + ": register_oc failed for objectClass (%s)\n", + s_oc[ i ].desc, 0, 0 ); + return 4; + + } else { + (*s_oc[ i ].oc)->soc_flags |= SLAP_OC_HIDE; + } + } + + return ( mdb_monitor_initialized_failure = LDAP_SUCCESS ); +} + +/* + * call from within mdb_db_init() + */ +int +mdb_monitor_db_init( BackendDB *be ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + + if ( mdb_monitor_initialize() == LDAP_SUCCESS ) { + /* monitoring in back-mdb is on by default */ + SLAP_DBFLAGS( be ) |= SLAP_DBFLAG_MONITORING; + } + +#ifdef MDB_MONITOR_IDX + mdb->bi_idx = NULL; + ldap_pvt_thread_mutex_init( &mdb->bi_idx_mutex ); +#endif /* MDB_MONITOR_IDX */ + + return 0; +} + +/* + * call from within mdb_db_open() + */ +int +mdb_monitor_db_open( BackendDB *be ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + Attribute *a, *next; + monitor_callback_t *cb = NULL; + int rc = 0; + BackendInfo *mi; + monitor_extra_t *mbe; + struct berval dummy = BER_BVC(""); + + if ( !SLAP_DBMONITORING( be ) ) { + return 0; + } + + mi = backend_info( "monitor" ); + if ( !mi || !mi->bi_extra ) { + SLAP_DBFLAGS( be ) ^= SLAP_DBFLAG_MONITORING; + return 0; + } + mbe = mi->bi_extra; + + /* don't bother if monitor is not configured */ + if ( !mbe->is_configured() ) { + static int warning = 0; + + if ( warning++ == 0 ) { + Debug( LDAP_DEBUG_ANY, LDAP_XSTRING(mdb_monitor_db_open) + ": monitoring disabled; " + "configure monitor database to enable\n", + 0, 0, 0 ); + } + + return 0; + } + + /* alloc as many as required (plus 1 for objectClass) */ + a = attrs_alloc( 1 + 4 ); + if ( a == NULL ) { + rc = 1; + goto cleanup; + } + + a->a_desc = slap_schema.si_ad_objectClass; + attr_valadd( a, &oc_olmMDBDatabase->soc_cname, NULL, 1 ); + next = a->a_next; + + { + struct berval bv = BER_BVC( "0" ); + + next->a_desc = ad_olmMDBEntryCache; + attr_valadd( next, &bv, NULL, 1 ); + next = next->a_next; + + next->a_desc = ad_olmMDBDNCache; + attr_valadd( next, &bv, NULL, 1 ); + next = next->a_next; + + next->a_desc = ad_olmMDBIDLCache; + attr_valadd( next, &bv, NULL, 1 ); + next = next->a_next; + } + + { + struct berval bv, nbv; + ber_len_t pathlen = 0, len = 0; + char path[ MAXPATHLEN ] = { '\0' }; + char *fname = mdb->bi_dbenv_home, + *ptr; + + len = strlen( fname ); + if ( fname[ 0 ] != '/' ) { + /* get full path name */ + getcwd( path, sizeof( path ) ); + pathlen = strlen( path ); + + if ( fname[ 0 ] == '.' && fname[ 1 ] == '/' ) { + fname += 2; + len -= 2; + } + } + + bv.bv_len = pathlen + STRLENOF( "/" ) + len; + ptr = bv.bv_val = ch_malloc( bv.bv_len + STRLENOF( "/" ) + 1 ); + if ( pathlen ) { + ptr = lutil_strncopy( ptr, path, pathlen ); + ptr[ 0 ] = '/'; + ptr++; + } + ptr = lutil_strncopy( ptr, fname, len ); + if ( ptr[ -1 ] != '/' ) { + ptr[ 0 ] = '/'; + ptr++; + } + ptr[ 0 ] = '\0'; + + attr_normalize_one( ad_olmDbDirectory, &bv, &nbv, NULL ); + + next->a_desc = ad_olmDbDirectory; + next->a_vals = ch_calloc( sizeof( struct berval ), 2 ); + next->a_vals[ 0 ] = bv; + next->a_numvals = 1; + + if ( BER_BVISNULL( &nbv ) ) { + next->a_nvals = next->a_vals; + + } else { + next->a_nvals = ch_calloc( sizeof( struct berval ), 2 ); + next->a_nvals[ 0 ] = nbv; + } + + next = next->a_next; + } + + cb = ch_calloc( sizeof( monitor_callback_t ), 1 ); + cb->mc_update = mdb_monitor_update; +#if 0 /* uncomment if required */ + cb->mc_modify = mdb_monitor_modify; +#endif + cb->mc_free = mdb_monitor_free; + cb->mc_private = (void *)mdb; + + /* make sure the database is registered; then add monitor attributes */ + rc = mbe->register_database( be, &mdb->bi_monitor.bdm_ndn ); + if ( rc == 0 ) { + rc = mbe->register_entry_attrs( &mdb->bi_monitor.bdm_ndn, a, cb, + &dummy, 0, &dummy ); + } + +cleanup:; + if ( rc != 0 ) { + if ( cb != NULL ) { + ch_free( cb ); + cb = NULL; + } + + if ( a != NULL ) { + attrs_free( a ); + a = NULL; + } + } + + /* store for cleanup */ + mdb->bi_monitor.bdm_cb = (void *)cb; + + /* we don't need to keep track of the attributes, because + * mdb_monitor_free() takes care of everything */ + if ( a != NULL ) { + attrs_free( a ); + } + + return rc; +} + +/* + * call from within mdb_db_close() + */ +int +mdb_monitor_db_close( BackendDB *be ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + + if ( !BER_BVISNULL( &mdb->bi_monitor.bdm_ndn ) ) { + BackendInfo *mi = backend_info( "monitor" ); + monitor_extra_t *mbe; + + if ( mi && &mi->bi_extra ) { + mbe = mi->bi_extra; + mbe->unregister_entry_callback( &mdb->bi_monitor.bdm_ndn, + (monitor_callback_t *)mdb->bi_monitor.bdm_cb, + NULL, 0, NULL ); + } + + memset( &mdb->bi_monitor, 0, sizeof( mdb->bi_monitor ) ); + } + + return 0; +} + +/* + * call from within mdb_db_destroy() + */ +int +mdb_monitor_db_destroy( BackendDB *be ) +{ +#ifdef MDB_MONITOR_IDX + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + + /* TODO: free tree */ + ldap_pvt_thread_mutex_destroy( &mdb->bi_idx_mutex ); + avl_free( mdb->bi_idx, ch_free ); +#endif /* MDB_MONITOR_IDX */ + + return 0; +} + +#ifdef MDB_MONITOR_IDX + +#define MDB_MONITOR_IDX_TYPES (4) + +typedef struct monitor_idx_t monitor_idx_t; + +struct monitor_idx_t { + AttributeDescription *idx_ad; + unsigned long idx_count[MDB_MONITOR_IDX_TYPES]; +}; + +static int +mdb_monitor_bitmask2key( slap_mask_t bitmask ) +{ + int key; + + for ( key = 0; key < 8 * (int)sizeof(slap_mask_t) && !( bitmask & 0x1U ); + key++ ) + bitmask >>= 1; + + return key; +} + +static struct berval idxbv[] = { + BER_BVC( "present=" ), + BER_BVC( "equality=" ), + BER_BVC( "approx=" ), + BER_BVC( "substr=" ), + BER_BVNULL +}; + +static ber_len_t +mdb_monitor_idx2len( monitor_idx_t *idx ) +{ + int i; + ber_len_t len = 0; + + for ( i = 0; i < MDB_MONITOR_IDX_TYPES; i++ ) { + if ( idx->idx_count[ i ] != 0 ) { + len += idxbv[i].bv_len; + } + } + + return len; +} + +static int +monitor_idx_cmp( const void *p1, const void *p2 ) +{ + const monitor_idx_t *idx1 = (const monitor_idx_t *)p1; + const monitor_idx_t *idx2 = (const monitor_idx_t *)p2; + + return SLAP_PTRCMP( idx1->idx_ad, idx2->idx_ad ); +} + +static int +monitor_idx_dup( void *p1, void *p2 ) +{ + monitor_idx_t *idx1 = (monitor_idx_t *)p1; + monitor_idx_t *idx2 = (monitor_idx_t *)p2; + + return SLAP_PTRCMP( idx1->idx_ad, idx2->idx_ad ) == 0 ? -1 : 0; +} + +int +mdb_monitor_idx_add( + struct mdb_info *mdb, + AttributeDescription *desc, + slap_mask_t type ) +{ + monitor_idx_t idx_dummy = { 0 }, + *idx; + int rc = 0, key; + + idx_dummy.idx_ad = desc; + key = mdb_monitor_bitmask2key( type ) - 1; + if ( key >= MDB_MONITOR_IDX_TYPES ) { + /* invalid index type */ + return -1; + } + + ldap_pvt_thread_mutex_lock( &mdb->bi_idx_mutex ); + + idx = (monitor_idx_t *)avl_find( mdb->bi_idx, + (caddr_t)&idx_dummy, monitor_idx_cmp ); + if ( idx == NULL ) { + idx = (monitor_idx_t *)ch_calloc( sizeof( monitor_idx_t ), 1 ); + idx->idx_ad = desc; + idx->idx_count[ key ] = 1; + + switch ( avl_insert( &mdb->bi_idx, (caddr_t)idx, + monitor_idx_cmp, monitor_idx_dup ) ) + { + case 0: + break; + + default: + ch_free( idx ); + rc = -1; + } + + } else { + idx->idx_count[ key ]++; + } + + ldap_pvt_thread_mutex_unlock( &mdb->bi_idx_mutex ); + + return rc; +} + +static int +mdb_monitor_idx_apply( void *v_idx, void *v_valp ) +{ + monitor_idx_t *idx = (monitor_idx_t *)v_idx; + BerVarray *valp = (BerVarray *)v_valp; + + struct berval bv; + char *ptr; + char count_buf[ MDB_MONITOR_IDX_TYPES ][ SLAP_TEXT_BUFLEN ]; + ber_len_t count_len[ MDB_MONITOR_IDX_TYPES ], + idx_len; + int i, num = 0; + + idx_len = mdb_monitor_idx2len( idx ); + + bv.bv_len = 0; + for ( i = 0; i < MDB_MONITOR_IDX_TYPES; i++ ) { + if ( idx->idx_count[ i ] == 0 ) { + continue; + } + + count_len[ i ] = snprintf( count_buf[ i ], + sizeof( count_buf[ i ] ), "%lu", idx->idx_count[ i ] ); + bv.bv_len += count_len[ i ]; + num++; + } + + bv.bv_len += idx->idx_ad->ad_cname.bv_len + + num + + idx_len; + ptr = bv.bv_val = ch_malloc( bv.bv_len + 1 ); + ptr = lutil_strcopy( ptr, idx->idx_ad->ad_cname.bv_val ); + for ( i = 0; i < MDB_MONITOR_IDX_TYPES; i++ ) { + if ( idx->idx_count[ i ] == 0 ) { + continue; + } + + ptr[ 0 ] = '#'; + ++ptr; + ptr = lutil_strcopy( ptr, idxbv[ i ].bv_val ); + ptr = lutil_strcopy( ptr, count_buf[ i ] ); + } + + ber_bvarray_add( valp, &bv ); + + return 0; +} + +static int +mdb_monitor_idx_entry_add( + struct mdb_info *mdb, + Entry *e ) +{ + BerVarray vals = NULL; + Attribute *a; + + a = attr_find( e->e_attrs, ad_olmMDBNotIndexed ); + + ldap_pvt_thread_mutex_lock( &mdb->bi_idx_mutex ); + + avl_apply( mdb->bi_idx, mdb_monitor_idx_apply, + &vals, -1, AVL_INORDER ); + + ldap_pvt_thread_mutex_unlock( &mdb->bi_idx_mutex ); + + if ( vals != NULL ) { + if ( a != NULL ) { + assert( a->a_nvals == a->a_vals ); + + ber_bvarray_free( a->a_vals ); + + } else { + Attribute **ap; + + for ( ap = &e->e_attrs; *ap != NULL; ap = &(*ap)->a_next ) + ; + *ap = attr_alloc( ad_olmMDBNotIndexed ); + a = *ap; + } + a->a_vals = vals; + a->a_nvals = a->a_vals; + } + + return 0; +} + +#endif /* MDB_MONITOR_IDX */ diff --git a/servers/slapd/back-mdb/nextid.c b/servers/slapd/back-mdb/nextid.c new file mode 100644 index 0000000000..9e95819edc --- /dev/null +++ b/servers/slapd/back-mdb/nextid.c @@ -0,0 +1,80 @@ +/* init.c - initialize mdb backend */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "back-mdb.h" + +int mdb_next_id( BackendDB *be, ID *out ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + + ldap_pvt_thread_mutex_lock( &mdb->bi_lastid_mutex ); + *out = ++mdb->bi_lastid; + ldap_pvt_thread_mutex_unlock( &mdb->bi_lastid_mutex ); + + return 0; +} + +int mdb_last_id( BackendDB *be, DB_TXN *tid ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + int rc; + ID id = 0; + unsigned char idbuf[sizeof(ID)]; + DBT key, data; + DBC *cursor; + + DBTzero( &key ); + key.flags = DB_DBT_USERMEM; + key.data = (char *) idbuf; + key.ulen = sizeof( idbuf ); + + DBTzero( &data ); + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + + /* Get a read cursor */ + rc = mdb->bi_id2entry->bdi_db->cursor( mdb->bi_id2entry->bdi_db, + tid, &cursor, 0 ); + + if (rc == 0) { + rc = cursor->c_get(cursor, &key, &data, DB_LAST); + cursor->c_close(cursor); + } + + switch(rc) { + case DB_NOTFOUND: + rc = 0; + break; + case 0: + MDB_DISK2ID( idbuf, &id ); + break; + + default: + Debug( LDAP_DEBUG_ANY, + "=> mdb_last_id: get failed: %s (%d)\n", + db_strerror(rc), rc, 0 ); + goto done; + } + + mdb->bi_lastid = id; + +done: + return rc; +} diff --git a/servers/slapd/back-mdb/operational.c b/servers/slapd/back-mdb/operational.c new file mode 100644 index 0000000000..8985959d2a --- /dev/null +++ b/servers/slapd/back-mdb/operational.c @@ -0,0 +1,151 @@ +/* operational.c - mdb backend operational attributes function */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include + +#include +#include + +#include "slap.h" +#include "back-mdb.h" + +/* + * sets *hasSubordinates to LDAP_COMPARE_TRUE/LDAP_COMPARE_FALSE + * if the entry has children or not. + */ +int +mdb_hasSubordinates( + Operation *op, + Entry *e, + int *hasSubordinates ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + struct mdb_op_info *opinfo; + OpExtra *oex; + DB_TXN *rtxn; + int rc; + int release = 0; + + assert( e != NULL ); + + /* NOTE: this should never happen, but it actually happens + * when using back-relay; until we find a better way to + * preserve entry's private information while rewriting it, + * let's disable the hasSubordinate feature for back-relay. + */ + if ( BEI( e ) == NULL ) { + Entry *ee = NULL; + rc = be_entry_get_rw( op, &e->e_nname, NULL, NULL, 0, &ee ); + if ( rc != LDAP_SUCCESS || ee == NULL ) { + rc = LDAP_OTHER; + goto done; + } + e = ee; + release = 1; + if ( BEI( ee ) == NULL ) { + rc = LDAP_OTHER; + goto done; + } + } + + /* Check for a txn in a parent op, otherwise use reader txn */ + LDAP_SLIST_FOREACH( oex, &op->o_extra, oe_next ) { + if ( oex->oe_key == mdb ) + break; + } + opinfo = (struct mdb_op_info *) oex; + if ( opinfo && opinfo->boi_txn ) { + rtxn = opinfo->boi_txn; + } else { + rc = mdb_reader_get(op, mdb->bi_dbenv, &rtxn); + if ( rc ) { + rc = LDAP_OTHER; + goto done; + } + } + +retry: + /* FIXME: we can no longer assume the entry's e_private + * field is correctly populated; so we need to reacquire + * it with reader lock */ + rc = mdb_cache_children( op, rtxn, e ); + + switch( rc ) { + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto retry; + + case 0: + *hasSubordinates = LDAP_COMPARE_TRUE; + break; + + case DB_NOTFOUND: + *hasSubordinates = LDAP_COMPARE_FALSE; + rc = LDAP_SUCCESS; + break; + + default: + Debug(LDAP_DEBUG_ARGS, + "<=- " LDAP_XSTRING(mdb_hasSubordinates) + ": has_children failed: %s (%d)\n", + db_strerror(rc), rc, 0 ); + rc = LDAP_OTHER; + } + +done:; + if ( release && e != NULL ) be_entry_release_r( op, e ); + return rc; +} + +/* + * sets the supported operational attributes (if required) + */ +int +mdb_operational( + Operation *op, + SlapReply *rs ) +{ + Attribute **ap; + + assert( rs->sr_entry != NULL ); + + for ( ap = &rs->sr_operational_attrs; *ap; ap = &(*ap)->a_next ) { + if ( (*ap)->a_desc == slap_schema.si_ad_hasSubordinates ) { + break; + } + } + + if ( *ap == NULL && + attr_find( rs->sr_entry->e_attrs, slap_schema.si_ad_hasSubordinates ) == NULL && + ( SLAP_OPATTRS( rs->sr_attr_flags ) || + ad_inlist( slap_schema.si_ad_hasSubordinates, rs->sr_attrs ) ) ) + { + int hasSubordinates, rc; + + rc = mdb_hasSubordinates( op, rs->sr_entry, &hasSubordinates ); + if ( rc == LDAP_SUCCESS ) { + *ap = slap_operational_hasSubordinate( hasSubordinates == LDAP_COMPARE_TRUE ); + assert( *ap != NULL ); + + ap = &(*ap)->a_next; + } + } + + return LDAP_SUCCESS; +} + diff --git a/servers/slapd/back-mdb/proto-mdb.h b/servers/slapd/back-mdb/proto-mdb.h new file mode 100644 index 0000000000..f117ff832d --- /dev/null +++ b/servers/slapd/back-mdb/proto-mdb.h @@ -0,0 +1,350 @@ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#ifndef _PROTO_MDB_H +#define _PROTO_MDB_H + +LDAP_BEGIN_DECL + +#define MDB_UCTYPE "MDB" + +/* + * attr.c + */ + +AttrInfo *mdb_attr_mask( struct mdb_info *mdb, + AttributeDescription *desc ); + +void mdb_attr_flush( struct mdb_info *mdb ); + +int mdb_attr_slot( struct mdb_info *mdb, + AttributeDescription *desc, int *insert ); + +int mdb_attr_index_config LDAP_P(( struct mdb_info *mdb, + const char *fname, int lineno, + int argc, char **argv, struct config_reply_s *cr )); + +void mdb_attr_index_unparse LDAP_P(( struct mdb_info *mdb, BerVarray *bva )); +void mdb_attr_index_destroy LDAP_P(( struct mdb_info *mdb )); +void mdb_attr_index_free LDAP_P(( struct mdb_info *mdb, + AttributeDescription *ad )); + +void mdb_attr_info_free( AttrInfo *ai ); + +/* + * config.c + */ + +int mdb_back_init_cf( BackendInfo *bi ); + +/* + * dbcache.c + */ + +int +mdb_db_cache( + Backend *be, + struct berval *name, + MDB_dbi *dbi ); + +/* + * dn2entry.c + */ + +int mdb_dn2entry LDAP_P(( Operation *op, MDB_txn *tid, + struct berval *dn, Entry *e, int matched )); + +/* + * dn2id.c + */ + +int mdb_dn2id( + Operation *op, + MDB_txn *txn, + struct berval *dn, + ID *id ); + +int mdb_dn2id_add( + Operation *op, + MDB_txn *tid, + ID pid, + Entry *e ); + +int mdb_dn2id_delete( + Operation *op, + MDB_txn *tid, + ID pid, + Entry *e ); + +int mdb_dn2id_children( + Operation *op, + MDB_txn *tid, + Entry *e ); + +int mdb_dn2idl( + Operation *op, + MDB_txn *txn, + struct berval *ndn, + ID eid, + ID *ids, + ID *stack ); + +int mdb_dn2id_parent( + Operation *op, + MDB_txn *txn, + ID eid, + ID *idp ); + +MDB_cmp_func mdb_dup_compare; + +/* + * filterentry.c + */ + +int mdb_filter_candidates( + Operation *op, + MDB_txn *txn, + Filter *f, + ID *ids, + ID *tmp, + ID *stack ); + +/* + * id2entry.c + */ + +int mdb_id2entry_add( + BackendDB *be, + MDB_txn *tid, + Entry *e ); + +int mdb_id2entry_update( + BackendDB *be, + MDB_txn *tid, + Entry *e ); + +int mdb_id2entry_delete( + BackendDB *be, + MDB_txn *tid, + Entry *e); + +int mdb_id2entry( + BackendDB *be, + MDB_txn *tid, + ID id, + Entry **e); + +void mdb_entry_free ( Entry *e ); +int mdb_entry_return( Entry *e ); +BI_entry_release_rw mdb_entry_release; +BI_entry_get_rw mdb_entry_get; + + +/* + * idl.c + */ + +unsigned mdb_idl_search( ID *ids, ID id ); + +int mdb_idl_fetch_key( + BackendDB *be, + MDB_txn *txn, + MDB_dbi dbi, + MDB_val *key, + ID *ids, + MDB_cursor **saved_cursor, + int get_flag ); + +int mdb_idl_insert( ID *ids, ID id ); + +int mdb_idl_insert_key( + BackendDB *be, + MDB_txn *txn, + MDB_dbi dbi, + MDB_val *key, + ID id ); + +int mdb_idl_delete_key( + BackendDB *be, + MDB_txn *txn, + MDB_dbi dbi, + MDB_val *key, + ID id ); + +int +mdb_idl_intersection( + ID *a, + ID *b ); + +int +mdb_idl_union( + ID *a, + ID *b ); + +ID mdb_idl_first( ID *ids, ID *cursor ); +ID mdb_idl_next( ID *ids, ID *cursor ); + +void mdb_idl_sort( ID *ids, ID *tmp ); +int mdb_idl_append( ID *a, ID *b ); +int mdb_idl_append_one( ID *ids, ID id ); + + +/* + * index.c + */ + +extern AttrInfo * +mdb_index_mask LDAP_P(( + Backend *be, + AttributeDescription *desc, + struct berval *name )); + +extern int +mdb_index_param LDAP_P(( + Backend *be, + AttributeDescription *desc, + int ftype, + MDB_dbi *dbi, + slap_mask_t *mask, + struct berval *prefix )); + +extern int +mdb_index_values LDAP_P(( + Operation *op, + MDB_txn *txn, + AttributeDescription *desc, + BerVarray vals, + ID id, + int opid )); + +extern int +mdb_index_recset LDAP_P(( + struct mdb_info *mdb, + Attribute *a, + AttributeType *type, + struct berval *tags, + IndexRec *ir )); + +extern int +mdb_index_recrun LDAP_P(( + Operation *op, + struct mdb_info *mdb, + IndexRec *ir, + ID id, + int base )); + +int mdb_index_entry LDAP_P(( Operation *op, MDB_txn *t, int r, Entry *e )); + +#define mdb_index_entry_add(op,t,e) \ + mdb_index_entry((op),(t),SLAP_INDEX_ADD_OP,(e)) +#define mdb_index_entry_del(op,t,e) \ + mdb_index_entry((op),(t),SLAP_INDEX_DELETE_OP,(e)) + +/* + * key.c + */ + +extern int +mdb_key_read( + Backend *be, + MDB_txn *txn, + MDB_dbi dbi, + struct berval *k, + ID *ids, + MDB_cursor **saved_cursor, + int get_flags ); + +extern int +mdb_key_change( + Backend *be, + MDB_txn *txn, + MDB_dbi dbi, + struct berval *k, + ID id, + int op ); + +/* + * nextid.c + */ + +int mdb_next_id( BackendDB *be, MDB_txn *tid, ID *id ); + +/* + * modify.c + */ + +int mdb_modify_internal( + Operation *op, + MDB_txn *tid, + Modifications *modlist, + Entry *e, + const char **text, + char *textbuf, + size_t textlen ); + +/* + * monitor.c + */ + +int mdb_monitor_db_init( BackendDB *be ); +int mdb_monitor_db_open( BackendDB *be ); +int mdb_monitor_db_close( BackendDB *be ); +int mdb_monitor_db_destroy( BackendDB *be ); + +#ifdef MDB_MONITOR_IDX +int +mdb_monitor_idx_add( + struct mdb_info *mdb, + AttributeDescription *desc, + slap_mask_t type ); +#endif /* MDB_MONITOR_IDX */ + +/* + * former external.h + */ + +extern BI_init mdb_back_initialize; + +extern BI_db_config mdb_db_config; + +extern BI_op_add mdb_add; +extern BI_op_bind mdb_bind; +extern BI_op_compare mdb_compare; +extern BI_op_delete mdb_delete; +extern BI_op_modify mdb_modify; +extern BI_op_modrdn mdb_modrdn; +extern BI_op_search mdb_search; +extern BI_op_extended mdb_extended; + +extern BI_chk_referrals mdb_referrals; + +extern BI_operational mdb_operational; + +extern BI_has_subordinates mdb_hasSubordinates; + +/* tools.c */ +extern BI_tool_entry_open mdb_tool_entry_open; +extern BI_tool_entry_close mdb_tool_entry_close; +extern BI_tool_entry_first_x mdb_tool_entry_first_x; +extern BI_tool_entry_next mdb_tool_entry_next; +extern BI_tool_entry_get mdb_tool_entry_get; +extern BI_tool_entry_put mdb_tool_entry_put; +extern BI_tool_entry_reindex mdb_tool_entry_reindex; +extern BI_tool_dn2id_get mdb_tool_dn2id_get; +extern BI_tool_entry_modify mdb_tool_entry_modify; + +LDAP_END_DECL + +#endif /* _PROTO_MDB_H */ diff --git a/servers/slapd/back-mdb/referral.c b/servers/slapd/back-mdb/referral.c new file mode 100644 index 0000000000..242fe3ab79 --- /dev/null +++ b/servers/slapd/back-mdb/referral.c @@ -0,0 +1,152 @@ +/* referral.c - MDB backend referral handler */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" +#include +#include + +#include "back-mdb.h" + +int +mdb_referrals( Operation *op, SlapReply *rs ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + Entry *e = NULL; + EntryInfo *ei; + int rc = LDAP_SUCCESS; + + DB_TXN *rtxn; + DB_LOCK lock; + + if( op->o_tag == LDAP_REQ_SEARCH ) { + /* let search take care of itself */ + return rc; + } + + if( get_manageDSAit( op ) ) { + /* let op take care of DSA management */ + return rc; + } + + rc = mdb_reader_get(op, mdb->bi_dbenv, &rtxn); + switch(rc) { + case 0: + break; + default: + return LDAP_OTHER; + } + +dn2entry_retry: + /* get entry */ + rc = mdb_dn2entry( op, rtxn, &op->o_req_ndn, &ei, 1, &lock ); + + /* mdb_dn2entry() may legally leave ei == NULL + * if rc != 0 and rc != DB_NOTFOUND + */ + if ( ei ) { + e = ei->bei_e; + } + + switch(rc) { + case DB_NOTFOUND: + case 0: + break; + case LDAP_BUSY: + rs->sr_text = "ldap server busy"; + return LDAP_BUSY; + case DB_LOCK_DEADLOCK: + case DB_LOCK_NOTGRANTED: + goto dn2entry_retry; + default: + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_referrals) + ": dn2entry failed: %s (%d)\n", + db_strerror(rc), rc, 0 ); + rs->sr_text = "internal error"; + return LDAP_OTHER; + } + + if ( rc == DB_NOTFOUND ) { + rc = LDAP_SUCCESS; + rs->sr_matched = NULL; + if ( e != NULL ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_referrals) + ": tag=%lu target=\"%s\" matched=\"%s\"\n", + (unsigned long)op->o_tag, op->o_req_dn.bv_val, e->e_name.bv_val ); + + if( is_entry_referral( e ) ) { + BerVarray ref = get_entry_referrals( op, e ); + rc = LDAP_OTHER; + rs->sr_ref = referral_rewrite( ref, &e->e_name, + &op->o_req_dn, LDAP_SCOPE_DEFAULT ); + ber_bvarray_free( ref ); + if ( rs->sr_ref ) { + rs->sr_matched = ber_strdup_x( + e->e_name.bv_val, op->o_tmpmemctx ); + } + } + + mdb_cache_return_entry_r (mdb, e, &lock); + e = NULL; + } + + if( rs->sr_ref != NULL ) { + /* send referrals */ + rc = rs->sr_err = LDAP_REFERRAL; + send_ldap_result( op, rs ); + ber_bvarray_free( rs->sr_ref ); + rs->sr_ref = NULL; + } else if ( rc != LDAP_SUCCESS ) { + rs->sr_text = rs->sr_matched ? "bad referral object" : NULL; + } + + if (rs->sr_matched) { + op->o_tmpfree( (char *)rs->sr_matched, op->o_tmpmemctx ); + rs->sr_matched = NULL; + } + return rc; + } + + if ( is_entry_referral( e ) ) { + /* entry is a referral */ + BerVarray refs = get_entry_referrals( op, e ); + rs->sr_ref = referral_rewrite( + refs, &e->e_name, &op->o_req_dn, LDAP_SCOPE_DEFAULT ); + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_referrals) + ": tag=%lu target=\"%s\" matched=\"%s\"\n", + (unsigned long)op->o_tag, op->o_req_dn.bv_val, e->e_name.bv_val ); + + rs->sr_matched = e->e_name.bv_val; + if( rs->sr_ref != NULL ) { + rc = rs->sr_err = LDAP_REFERRAL; + send_ldap_result( op, rs ); + ber_bvarray_free( rs->sr_ref ); + rs->sr_ref = NULL; + } else { + rc = LDAP_OTHER; + rs->sr_text = "bad referral object"; + } + + rs->sr_matched = NULL; + ber_bvarray_free( refs ); + } + + mdb_cache_return_entry_r(mdb, e, &lock); + return rc; +} diff --git a/servers/slapd/back-mdb/search.c b/servers/slapd/back-mdb/search.c new file mode 100644 index 0000000000..4cec717e22 --- /dev/null +++ b/servers/slapd/back-mdb/search.c @@ -0,0 +1,1383 @@ +/* search.c - search operation */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include + +#include "back-mdb.h" +#include "idl.h" + +static int base_candidate( + BackendDB *be, + Entry *e, + ID *ids ); + +static int search_candidates( + Operation *op, + SlapReply *rs, + Entry *e, + DB_TXN *txn, + ID *ids, + ID *scopes ); + +static int parse_paged_cookie( Operation *op, SlapReply *rs ); + +static void send_paged_response( + Operation *op, + SlapReply *rs, + ID *lastid, + int tentries ); + +/* Dereference aliases for a single alias entry. Return the final + * dereferenced entry on success, NULL on any failure. + */ +static Entry * deref_base ( + Operation *op, + SlapReply *rs, + Entry *e, + Entry **matched, + DB_TXN *txn, + DB_LOCK *lock, + ID *tmp, + ID *visited ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + struct berval ndn; + EntryInfo *ei; + DB_LOCK lockr; + + rs->sr_err = LDAP_ALIAS_DEREF_PROBLEM; + rs->sr_text = "maximum deref depth exceeded"; + + for (;;) { + /* Remember the last entry we looked at, so we can + * report broken links + */ + *matched = e; + + if (MDB_IDL_N(tmp) >= op->o_bd->be_max_deref_depth) { + e = NULL; + break; + } + + /* If this is part of a subtree or onelevel search, + * have we seen this ID before? If so, quit. + */ + if ( visited && mdb_idl_insert( visited, e->e_id ) ) { + e = NULL; + break; + } + + /* If we've seen this ID during this deref iteration, + * we've hit a loop. + */ + if ( mdb_idl_insert( tmp, e->e_id ) ) { + rs->sr_err = LDAP_ALIAS_PROBLEM; + rs->sr_text = "circular alias"; + e = NULL; + break; + } + + /* If there was a problem getting the aliasedObjectName, + * get_alias_dn will have set the error status. + */ + if ( get_alias_dn(e, &ndn, &rs->sr_err, &rs->sr_text) ) { + e = NULL; + break; + } + + rs->sr_err = mdb_dn2entry( op, txn, &ndn, &ei, + 0, &lockr ); + if ( rs->sr_err == DB_LOCK_DEADLOCK ) + return NULL; + + if ( ei ) { + e = ei->bei_e; + } else { + e = NULL; + } + + if (!e) { + rs->sr_err = LDAP_ALIAS_PROBLEM; + rs->sr_text = "aliasedObject not found"; + break; + } + + /* Free the previous entry, continue to work with the + * one we just retrieved. + */ + mdb_cache_return_entry_r( mdb, *matched, lock); + *lock = lockr; + + /* We found a regular entry. Return this to the caller. The + * entry is still locked for Read. + */ + if (!is_entry_alias(e)) { + rs->sr_err = LDAP_SUCCESS; + rs->sr_text = NULL; + break; + } + } + return e; +} + +/* Look for and dereference all aliases within the search scope. Adds + * the dereferenced entries to the "ids" list. Requires "stack" to be + * able to hold 8 levels of DB_SIZE IDLs. Of course we're hardcoded to + * require a minimum of 8 UM_SIZE IDLs so this is never a problem. + */ +static int search_aliases( + Operation *op, + SlapReply *rs, + Entry *e, + DB_TXN *txn, + ID *ids, + ID *scopes, + ID *stack ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + ID *aliases, *curscop, *subscop, *visited, *newsubs, *oldsubs, *tmp; + ID cursora, ida, cursoro, ido, *subscop2; + Entry *matched, *a; + EntryInfo *ei; + struct berval bv_alias = BER_BVC( "alias" ); + AttributeAssertion aa_alias = ATTRIBUTEASSERTION_INIT; + Filter af; + DB_LOCK locka, lockr; + int first = 1; + + aliases = stack; /* IDL of all aliases in the database */ + curscop = aliases + MDB_IDL_DB_SIZE; /* Aliases in the current scope */ + subscop = curscop + MDB_IDL_DB_SIZE; /* The current scope */ + visited = subscop + MDB_IDL_DB_SIZE; /* IDs we've seen in this search */ + newsubs = visited + MDB_IDL_DB_SIZE; /* New subtrees we've added */ + oldsubs = newsubs + MDB_IDL_DB_SIZE; /* Subtrees added previously */ + tmp = oldsubs + MDB_IDL_DB_SIZE; /* Scratch space for deref_base() */ + + /* A copy of subscop, because subscop gets clobbered by + * the mdb_idl_union/intersection routines + */ + subscop2 = tmp + MDB_IDL_DB_SIZE; + + af.f_choice = LDAP_FILTER_EQUALITY; + af.f_ava = &aa_alias; + af.f_av_desc = slap_schema.si_ad_objectClass; + af.f_av_value = bv_alias; + af.f_next = NULL; + + /* Find all aliases in database */ + MDB_IDL_ZERO( aliases ); + rs->sr_err = mdb_filter_candidates( op, txn, &af, aliases, + curscop, visited ); + if (rs->sr_err != LDAP_SUCCESS) { + return rs->sr_err; + } + oldsubs[0] = 1; + oldsubs[1] = e->e_id; + + MDB_IDL_ZERO( ids ); + MDB_IDL_ZERO( visited ); + MDB_IDL_ZERO( newsubs ); + + cursoro = 0; + ido = mdb_idl_first( oldsubs, &cursoro ); + + for (;;) { + /* Set curscop to only the aliases in the current scope. Start with + * all the aliases, obtain the IDL for the current scope, and then + * get the intersection of these two IDLs. Add the current scope + * to the cumulative list of candidates. + */ + MDB_IDL_CPY( curscop, aliases ); + rs->sr_err = mdb_dn2idl( op, txn, &e->e_nname, BEI(e), subscop, + subscop2+MDB_IDL_DB_SIZE ); + + if (first) { + first = 0; + } else { + mdb_cache_return_entry_r (mdb, e, &locka); + } + if ( rs->sr_err == DB_LOCK_DEADLOCK ) + return rs->sr_err; + + MDB_IDL_CPY(subscop2, subscop); + rs->sr_err = mdb_idl_intersection(curscop, subscop); + mdb_idl_union( ids, subscop2 ); + + /* Dereference all of the aliases in the current scope. */ + cursora = 0; + for (ida = mdb_idl_first(curscop, &cursora); ida != NOID; + ida = mdb_idl_next(curscop, &cursora)) + { + ei = NULL; +retry1: + rs->sr_err = mdb_cache_find_id(op, txn, + ida, &ei, 0, &lockr ); + if (rs->sr_err != LDAP_SUCCESS) { + if ( rs->sr_err == DB_LOCK_DEADLOCK ) + return rs->sr_err; + if ( rs->sr_err == DB_LOCK_NOTGRANTED ) + goto retry1; + continue; + } + a = ei->bei_e; + + /* This should only happen if the curscop IDL has maxed out and + * turned into a range that spans IDs indiscriminately + */ + if (!is_entry_alias(a)) { + mdb_cache_return_entry_r (mdb, a, &lockr); + continue; + } + + /* Actually dereference the alias */ + MDB_IDL_ZERO(tmp); + a = deref_base( op, rs, a, &matched, txn, &lockr, + tmp, visited ); + if (a) { + /* If the target was not already in our current candidates, + * make note of it in the newsubs list. Also + * set it in the scopes list so that mdb_search + * can check it. + */ + if (mdb_idl_insert(ids, a->e_id) == 0) { + mdb_idl_insert(newsubs, a->e_id); + mdb_idl_insert(scopes, a->e_id); + } + mdb_cache_return_entry_r( mdb, a, &lockr); + + } else if ( rs->sr_err == DB_LOCK_DEADLOCK ) { + return rs->sr_err; + } else if (matched) { + /* Alias could not be dereferenced, or it deref'd to + * an ID we've already seen. Ignore it. + */ + mdb_cache_return_entry_r( mdb, matched, &lockr ); + rs->sr_text = NULL; + } + } + /* If this is a OneLevel search, we're done; oldsubs only had one + * ID in it. For a Subtree search, oldsubs may be a list of scope IDs. + */ + if ( op->ors_scope == LDAP_SCOPE_ONELEVEL ) break; +nextido: + ido = mdb_idl_next( oldsubs, &cursoro ); + + /* If we're done processing the old scopes, did we add any new + * scopes in this iteration? If so, go back and do those now. + */ + if (ido == NOID) { + if (MDB_IDL_IS_ZERO(newsubs)) break; + MDB_IDL_CPY(oldsubs, newsubs); + MDB_IDL_ZERO(newsubs); + cursoro = 0; + ido = mdb_idl_first( oldsubs, &cursoro ); + } + + /* Find the entry corresponding to the next scope. If it can't + * be found, ignore it and move on. This should never happen; + * we should never see the ID of an entry that doesn't exist. + * Set the name so that the scope's IDL can be retrieved. + */ + ei = NULL; +sameido: + rs->sr_err = mdb_cache_find_id(op, txn, ido, &ei, + 0, &locka ); + if ( rs->sr_err != LDAP_SUCCESS ) { + if ( rs->sr_err == DB_LOCK_DEADLOCK ) + return rs->sr_err; + if ( rs->sr_err == DB_LOCK_NOTGRANTED ) + goto sameido; + goto nextido; + } + e = ei->bei_e; + } + return rs->sr_err; +} + +/* Get the next ID from the DB. Used if the candidate list is + * a range and simple iteration hits missing entryIDs + */ +static int +mdb_get_nextid(struct mdb_info *mdb, DB_TXN *ltid, ID *cursor) +{ + DBC *curs; + DBT key, data; + ID id, nid; + int rc; + + id = *cursor + 1; + MDB_ID2DISK( id, &nid ); + rc = mdb->bi_id2entry->bdi_db->cursor( + mdb->bi_id2entry->bdi_db, ltid, &curs, mdb->bi_db_opflags ); + if ( rc ) + return rc; + key.data = &nid; + key.size = key.ulen = sizeof(ID); + key.flags = DB_DBT_USERMEM; + data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + data.dlen = data.ulen = 0; + rc = curs->c_get( curs, &key, &data, DB_SET_RANGE ); + curs->c_close( curs ); + if ( rc ) + return rc; + MDB_DISK2ID( &nid, cursor ); + return 0; +} + +int +mdb_search( Operation *op, SlapReply *rs ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + ID id, cursor; + ID lastid = NOID; + ID candidates[MDB_IDL_UM_SIZE]; + ID scopes[MDB_IDL_DB_SIZE]; + Entry *e = NULL, base, *e_root; + Entry *matched = NULL; + EntryInfo *ei; + AttributeName *attrs; + struct berval realbase = BER_BVNULL; + slap_mask_t mask; + time_t stoptime; + int manageDSAit; + int tentries = 0; + unsigned nentries = 0; + int idflag = 0; + + DB_LOCK lock; + struct mdb_op_info *opinfo = NULL; + DB_TXN *ltid = NULL; + OpExtra *oex; + + Debug( LDAP_DEBUG_TRACE, "=> " LDAP_XSTRING(mdb_search) "\n", 0, 0, 0); + attrs = op->oq_search.rs_attrs; + + LDAP_SLIST_FOREACH( oex, &op->o_extra, oe_next ) { + if ( oex->oe_key == mdb ) + break; + } + opinfo = (struct mdb_op_info *) oex; + + manageDSAit = get_manageDSAit( op ); + + if ( opinfo && opinfo->boi_txn ) { + ltid = opinfo->boi_txn; + } else { + rs->sr_err = mdb_reader_get( op, mdb->bi_dbenv, <id ); + + switch(rs->sr_err) { + case 0: + break; + default: + send_ldap_error( op, rs, LDAP_OTHER, "internal error" ); + return rs->sr_err; + } + } + + e_root = mdb->bi_cache.c_dntree.bei_e; + if ( op->o_req_ndn.bv_len == 0 ) { + /* DIT root special case */ + ei = e_root->e_private; + rs->sr_err = LDAP_SUCCESS; + } else { + if ( op->ors_deref & LDAP_DEREF_FINDING ) { + MDB_IDL_ZERO(candidates); + } +dn2entry_retry: + /* get entry with reader lock */ + rs->sr_err = mdb_dn2entry( op, ltid, &op->o_req_ndn, &ei, + 1, &lock ); + } + + switch(rs->sr_err) { + case DB_NOTFOUND: + matched = ei->bei_e; + break; + case 0: + e = ei->bei_e; + break; + case DB_LOCK_DEADLOCK: + if ( !opinfo ) { + ltid->flags &= ~TXN_DEADLOCK; + goto dn2entry_retry; + } + opinfo->boi_err = rs->sr_err; + /* FALLTHRU */ + case LDAP_BUSY: + send_ldap_error( op, rs, LDAP_BUSY, "ldap server busy" ); + return LDAP_BUSY; + case DB_LOCK_NOTGRANTED: + goto dn2entry_retry; + default: + send_ldap_error( op, rs, LDAP_OTHER, "internal error" ); + return rs->sr_err; + } + + if ( op->ors_deref & LDAP_DEREF_FINDING ) { + if ( matched && is_entry_alias( matched )) { + struct berval stub; + + stub.bv_val = op->o_req_ndn.bv_val; + stub.bv_len = op->o_req_ndn.bv_len - matched->e_nname.bv_len - 1; + e = deref_base( op, rs, matched, &matched, ltid, &lock, + candidates, NULL ); + if ( e ) { + build_new_dn( &op->o_req_ndn, &e->e_nname, &stub, + op->o_tmpmemctx ); + mdb_cache_return_entry_r (mdb, e, &lock); + matched = NULL; + goto dn2entry_retry; + } + } else if ( e && is_entry_alias( e )) { + e = deref_base( op, rs, e, &matched, ltid, &lock, + candidates, NULL ); + } + } + + if ( e == NULL ) { + struct berval matched_dn = BER_BVNULL; + + if ( matched != NULL ) { + BerVarray erefs = NULL; + + /* return referral only if "disclose" + * is granted on the object */ + if ( ! access_allowed( op, matched, + slap_schema.si_ad_entry, + NULL, ACL_DISCLOSE, NULL ) ) + { + rs->sr_err = LDAP_NO_SUCH_OBJECT; + + } else { + ber_dupbv( &matched_dn, &matched->e_name ); + + erefs = is_entry_referral( matched ) + ? get_entry_referrals( op, matched ) + : NULL; + if ( rs->sr_err == DB_NOTFOUND ) + rs->sr_err = LDAP_REFERRAL; + rs->sr_matched = matched_dn.bv_val; + } + +#ifdef SLAP_ZONE_ALLOC + slap_zn_runlock(mdb->bi_cache.c_zctx, matched); +#endif + mdb_cache_return_entry_r (mdb, matched, &lock); + matched = NULL; + + if ( erefs ) { + rs->sr_ref = referral_rewrite( erefs, &matched_dn, + &op->o_req_dn, op->oq_search.rs_scope ); + ber_bvarray_free( erefs ); + } + + } else { +#ifdef SLAP_ZONE_ALLOC + slap_zn_runlock(mdb->bi_cache.c_zctx, matched); +#endif + rs->sr_ref = referral_rewrite( default_referral, + NULL, &op->o_req_dn, op->oq_search.rs_scope ); + rs->sr_err = rs->sr_ref != NULL ? LDAP_REFERRAL : LDAP_NO_SUCH_OBJECT; + } + + send_ldap_result( op, rs ); + + if ( rs->sr_ref ) { + ber_bvarray_free( rs->sr_ref ); + rs->sr_ref = NULL; + } + if ( !BER_BVISNULL( &matched_dn ) ) { + ber_memfree( matched_dn.bv_val ); + rs->sr_matched = NULL; + } + return rs->sr_err; + } + + /* NOTE: __NEW__ "search" access is required + * on searchBase object */ + if ( ! access_allowed_mask( op, e, slap_schema.si_ad_entry, + NULL, ACL_SEARCH, NULL, &mask ) ) + { + if ( !ACL_GRANT( mask, ACL_DISCLOSE ) ) { + rs->sr_err = LDAP_NO_SUCH_OBJECT; + } else { + rs->sr_err = LDAP_INSUFFICIENT_ACCESS; + } + +#ifdef SLAP_ZONE_ALLOC + slap_zn_runlock(mdb->bi_cache.c_zctx, e); +#endif + if ( e != e_root ) { + mdb_cache_return_entry_r(mdb, e, &lock); + } + send_ldap_result( op, rs ); + return rs->sr_err; + } + + if ( !manageDSAit && e != e_root && is_entry_referral( e ) ) { + /* entry is a referral, don't allow add */ + struct berval matched_dn = BER_BVNULL; + BerVarray erefs = NULL; + + ber_dupbv( &matched_dn, &e->e_name ); + erefs = get_entry_referrals( op, e ); + + rs->sr_err = LDAP_REFERRAL; + +#ifdef SLAP_ZONE_ALLOC + slap_zn_runlock(mdb->bi_cache.c_zctx, e); +#endif + mdb_cache_return_entry_r( mdb, e, &lock ); + e = NULL; + + if ( erefs ) { + rs->sr_ref = referral_rewrite( erefs, &matched_dn, + &op->o_req_dn, op->oq_search.rs_scope ); + ber_bvarray_free( erefs ); + + if ( !rs->sr_ref ) { + rs->sr_text = "bad_referral object"; + } + } + + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_search) ": entry is referral\n", + 0, 0, 0 ); + + rs->sr_matched = matched_dn.bv_val; + send_ldap_result( op, rs ); + + ber_bvarray_free( rs->sr_ref ); + rs->sr_ref = NULL; + ber_memfree( matched_dn.bv_val ); + rs->sr_matched = NULL; + return 1; + } + + if ( get_assert( op ) && + ( test_filter( op, e, get_assertion( op )) != LDAP_COMPARE_TRUE )) + { + rs->sr_err = LDAP_ASSERTION_FAILED; +#ifdef SLAP_ZONE_ALLOC + slap_zn_runlock(mdb->bi_cache.c_zctx, e); +#endif + if ( e != e_root ) { + mdb_cache_return_entry_r(mdb, e, &lock); + } + send_ldap_result( op, rs ); + return 1; + } + + /* compute it anyway; root does not use it */ + stoptime = op->o_time + op->ors_tlimit; + + /* need normalized dn below */ + ber_dupbv( &realbase, &e->e_nname ); + + /* Copy info to base, must free entry before accessing the database + * in search_candidates, to avoid deadlocks. + */ + base.e_private = e->e_private; + base.e_nname = realbase; + base.e_id = e->e_id; + +#ifdef SLAP_ZONE_ALLOC + slap_zn_runlock(mdb->bi_cache.c_zctx, e); +#endif + if ( e != e_root ) { + mdb_cache_return_entry_r(mdb, e, &lock); + } + e = NULL; + + /* select candidates */ + if ( op->oq_search.rs_scope == LDAP_SCOPE_BASE ) { + rs->sr_err = base_candidate( op->o_bd, &base, candidates ); + + } else { +cand_retry: + MDB_IDL_ZERO( candidates ); + MDB_IDL_ZERO( scopes ); + rs->sr_err = search_candidates( op, rs, &base, + ltid, candidates, scopes ); + if ( rs->sr_err == DB_LOCK_DEADLOCK ) { + if ( !opinfo ) { + ltid->flags &= ~TXN_DEADLOCK; + goto cand_retry; + } + opinfo->boi_err = rs->sr_err; + send_ldap_error( op, rs, LDAP_BUSY, "ldap server busy" ); + return LDAP_BUSY; + } + } + + /* start cursor at beginning of candidates. + */ + cursor = 0; + + if ( candidates[0] == 0 ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_search) ": no candidates\n", + 0, 0, 0 ); + + goto nochange; + } + + /* if not root and candidates exceed to-be-checked entries, abort */ + if ( op->ors_limit /* isroot == FALSE */ && + op->ors_limit->lms_s_unchecked != -1 && + MDB_IDL_N(candidates) > (unsigned) op->ors_limit->lms_s_unchecked ) + { + rs->sr_err = LDAP_ADMINLIMIT_EXCEEDED; + send_ldap_result( op, rs ); + rs->sr_err = LDAP_SUCCESS; + goto done; + } + + if ( op->ors_limit == NULL /* isroot == TRUE */ || + !op->ors_limit->lms_s_pr_hide ) + { + tentries = MDB_IDL_N(candidates); + } + + if ( get_pagedresults( op ) > SLAP_CONTROL_IGNORED ) { + PagedResultsState *ps = op->o_pagedresults_state; + /* deferred cookie parsing */ + rs->sr_err = parse_paged_cookie( op, rs ); + if ( rs->sr_err != LDAP_SUCCESS ) { + send_ldap_result( op, rs ); + goto done; + } + + cursor = (ID) ps->ps_cookie; + if ( cursor && ps->ps_size == 0 ) { + rs->sr_err = LDAP_SUCCESS; + rs->sr_text = "search abandoned by pagedResult size=0"; + send_ldap_result( op, rs ); + goto done; + } + id = mdb_idl_first( candidates, &cursor ); + if ( id == NOID ) { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_search) + ": no paged results candidates\n", + 0, 0, 0 ); + send_paged_response( op, rs, &lastid, 0 ); + + rs->sr_err = LDAP_OTHER; + goto done; + } + nentries = ps->ps_count; + if ( id == (ID)ps->ps_cookie ) + id = mdb_idl_next( candidates, &cursor ); + goto loop_begin; + } + + for ( id = mdb_idl_first( candidates, &cursor ); + id != NOID ; id = mdb_idl_next( candidates, &cursor ) ) + { + int scopeok; + +loop_begin: + + /* check for abandon */ + if ( op->o_abandon ) { + rs->sr_err = SLAPD_ABANDON; + send_ldap_result( op, rs ); + goto done; + } + + /* mostly needed by internal searches, + * e.g. related to syncrepl, for whom + * abandon does not get set... */ + if ( slapd_shutdown ) { + rs->sr_err = LDAP_UNAVAILABLE; + send_ldap_disconnect( op, rs ); + goto done; + } + + /* check time limit */ + if ( op->ors_tlimit != SLAP_NO_LIMIT + && slap_get_time() > stoptime ) + { + rs->sr_err = LDAP_TIMELIMIT_EXCEEDED; + rs->sr_ref = rs->sr_v2ref; + send_ldap_result( op, rs ); + rs->sr_err = LDAP_SUCCESS; + goto done; + } + + /* If we inspect more entries than will + * fit into the entry cache, stop caching + * any subsequent entries + */ + nentries++; + if ( nentries > mdb->bi_cache.c_maxsize && !idflag ) { + idflag = ID_NOCACHE; + } + +fetch_entry_retry: + /* get the entry with reader lock */ + ei = NULL; + rs->sr_err = mdb_cache_find_id( op, ltid, + id, &ei, idflag, &lock ); + + if (rs->sr_err == LDAP_BUSY) { + rs->sr_text = "ldap server busy"; + send_ldap_result( op, rs ); + goto done; + + } else if ( rs->sr_err == DB_LOCK_DEADLOCK ) { + if ( !opinfo ) { + ltid->flags &= ~TXN_DEADLOCK; + goto fetch_entry_retry; + } +txnfail: + opinfo->boi_err = rs->sr_err; + send_ldap_error( op, rs, LDAP_BUSY, "ldap server busy" ); + goto done; + + } else if ( rs->sr_err == DB_LOCK_NOTGRANTED ) + { + goto fetch_entry_retry; + } else if ( rs->sr_err == LDAP_OTHER ) { + rs->sr_text = "internal error"; + send_ldap_result( op, rs ); + goto done; + } + + if ( ei && rs->sr_err == LDAP_SUCCESS ) { + e = ei->bei_e; + } else { + e = NULL; + } + + if ( e == NULL ) { + if( !MDB_IDL_IS_RANGE(candidates) ) { + /* only complain for non-range IDLs */ + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_search) + ": candidate %ld not found\n", + (long) id, 0, 0 ); + } else { + /* get the next ID from the DB */ +id_retry: + rs->sr_err = mdb_get_nextid( mdb, ltid, &cursor ); + if ( rs->sr_err == DB_NOTFOUND ) { + break; + } else if ( rs->sr_err == DB_LOCK_DEADLOCK ) { + if ( opinfo ) + goto txnfail; + ltid->flags &= ~TXN_DEADLOCK; + goto id_retry; + } else if ( rs->sr_err == DB_LOCK_NOTGRANTED ) { + goto id_retry; + } + if ( rs->sr_err ) { + rs->sr_err = LDAP_OTHER; + rs->sr_text = "internal error in get_nextid"; + send_ldap_result( op, rs ); + goto done; + } + cursor--; + } + + goto loop_continue; + } + + if ( is_entry_subentry( e ) ) { + if( op->oq_search.rs_scope != LDAP_SCOPE_BASE ) { + if(!get_subentries_visibility( op )) { + /* only subentries are visible */ + goto loop_continue; + } + + } else if ( get_subentries( op ) && + !get_subentries_visibility( op )) + { + /* only subentries are visible */ + goto loop_continue; + } + + } else if ( get_subentries_visibility( op )) { + /* only subentries are visible */ + goto loop_continue; + } + + /* Does this candidate actually satisfy the search scope? + * + * Note that we don't lock access to the bei_parent pointer. + * Since only leaf nodes can be deleted, the parent of any + * node will always be a valid node. Also since we have + * a Read lock on the data, it cannot be renamed out of the + * scope while we are looking at it, and unless we're using + * MDB_HIER, its parents cannot be moved either. + */ + scopeok = 0; + switch( op->ors_scope ) { + case LDAP_SCOPE_BASE: + /* This is always true, yes? */ + if ( id == base.e_id ) scopeok = 1; + break; + + case LDAP_SCOPE_ONELEVEL: + if ( ei->bei_parent->bei_id == base.e_id ) scopeok = 1; + break; + +#ifdef LDAP_SCOPE_CHILDREN + case LDAP_SCOPE_CHILDREN: + if ( id == base.e_id ) break; + /* Fall-thru */ +#endif + case LDAP_SCOPE_SUBTREE: { + EntryInfo *tmp; + for ( tmp = BEI(e); tmp; tmp = tmp->bei_parent ) { + if ( tmp->bei_id == base.e_id ) { + scopeok = 1; + break; + } + } + } break; + } + + /* aliases were already dereferenced in candidate list */ + if ( op->ors_deref & LDAP_DEREF_SEARCHING ) { + /* but if the search base is an alias, and we didn't + * deref it when finding, return it. + */ + if ( is_entry_alias(e) && + ((op->ors_deref & LDAP_DEREF_FINDING) || + !bvmatch(&e->e_nname, &op->o_req_ndn))) + { + goto loop_continue; + } + + /* scopes is only non-empty for onelevel or subtree */ + if ( !scopeok && MDB_IDL_N(scopes) ) { + unsigned x; + if ( op->ors_scope == LDAP_SCOPE_ONELEVEL ) { + x = mdb_idl_search( scopes, e->e_id ); + if ( scopes[x] == e->e_id ) scopeok = 1; + } else { + /* subtree, walk up the tree */ + EntryInfo *tmp = BEI(e); + for (;tmp->bei_parent; tmp=tmp->bei_parent) { + x = mdb_idl_search( scopes, tmp->bei_id ); + if ( scopes[x] == tmp->bei_id ) { + scopeok = 1; + break; + } + } + } + } + } + + /* Not in scope, ignore it */ + if ( !scopeok ) + { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_search) + ": %ld scope not okay\n", + (long) id, 0, 0 ); + goto loop_continue; + } + + /* + * if it's a referral, add it to the list of referrals. only do + * this for non-base searches, and don't check the filter + * explicitly here since it's only a candidate anyway. + */ + if ( !manageDSAit && op->oq_search.rs_scope != LDAP_SCOPE_BASE + && is_entry_referral( e ) ) + { + struct mdb_op_info bois; + struct mdb_lock_info blis; + BerVarray erefs = get_entry_referrals( op, e ); + rs->sr_ref = referral_rewrite( erefs, &e->e_name, NULL, + op->oq_search.rs_scope == LDAP_SCOPE_ONELEVEL + ? LDAP_SCOPE_BASE : LDAP_SCOPE_SUBTREE ); + + /* Must set lockinfo so that entry_release will work */ + if (!opinfo) { + bois.boi_oe.oe_key = mdb; + bois.boi_txn = NULL; + bois.boi_err = 0; + bois.boi_acl_cache = op->o_do_not_cache; + bois.boi_flag = BOI_DONTFREE; + bois.boi_locks = &blis; + blis.bli_next = NULL; + LDAP_SLIST_INSERT_HEAD( &op->o_extra, &bois.boi_oe, + oe_next ); + } else { + blis.bli_next = opinfo->boi_locks; + opinfo->boi_locks = &blis; + } + blis.bli_id = e->e_id; + blis.bli_lock = lock; + blis.bli_flag = BLI_DONTFREE; + + rs->sr_entry = e; + rs->sr_flags = REP_ENTRY_MUSTRELEASE; + + send_search_reference( op, rs ); + + if ( blis.bli_flag ) { +#ifdef SLAP_ZONE_ALLOC + slap_zn_runlock(mdb->bi_cache.c_zctx, e); +#endif + mdb_cache_return_entry_r(mdb, e, &lock); + if ( opinfo ) { + opinfo->boi_locks = blis.bli_next; + } else { + LDAP_SLIST_REMOVE( &op->o_extra, &bois.boi_oe, + OpExtra, oe_next ); + } + } + rs->sr_entry = NULL; + e = NULL; + + ber_bvarray_free( rs->sr_ref ); + ber_bvarray_free( erefs ); + rs->sr_ref = NULL; + + goto loop_continue; + } + + if ( !manageDSAit && is_entry_glue( e )) { + goto loop_continue; + } + + /* if it matches the filter and scope, send it */ + rs->sr_err = test_filter( op, e, op->oq_search.rs_filter ); + + if ( rs->sr_err == LDAP_COMPARE_TRUE ) { + /* check size limit */ + if ( get_pagedresults(op) > SLAP_CONTROL_IGNORED ) { + if ( rs->sr_nentries >= ((PagedResultsState *)op->o_pagedresults_state)->ps_size ) { +#ifdef SLAP_ZONE_ALLOC + slap_zn_runlock(mdb->bi_cache.c_zctx, e); +#endif + mdb_cache_return_entry_r( mdb, e, &lock ); + e = NULL; + send_paged_response( op, rs, &lastid, tentries ); + goto done; + } + lastid = id; + } + + if (e) { + struct mdb_op_info bois; + struct mdb_lock_info blis; + + /* Must set lockinfo so that entry_release will work */ + if (!opinfo) { + bois.boi_oe.oe_key = mdb; + bois.boi_txn = NULL; + bois.boi_err = 0; + bois.boi_acl_cache = op->o_do_not_cache; + bois.boi_flag = BOI_DONTFREE; + bois.boi_locks = &blis; + blis.bli_next = NULL; + LDAP_SLIST_INSERT_HEAD( &op->o_extra, &bois.boi_oe, + oe_next ); + } else { + blis.bli_next = opinfo->boi_locks; + opinfo->boi_locks = &blis; + } + blis.bli_id = e->e_id; + blis.bli_lock = lock; + blis.bli_flag = BLI_DONTFREE; + + /* safe default */ + rs->sr_attrs = op->oq_search.rs_attrs; + rs->sr_operational_attrs = NULL; + rs->sr_ctrls = NULL; + rs->sr_entry = e; + RS_ASSERT( e->e_private != NULL ); + rs->sr_flags = REP_ENTRY_MUSTRELEASE; + rs->sr_err = LDAP_SUCCESS; + rs->sr_err = send_search_entry( op, rs ); + rs->sr_attrs = NULL; + rs->sr_entry = NULL; + + /* send_search_entry will usually free it. + * an overlay might leave its own copy here; + * bli_flag will be 0 if lock was already released. + */ + if ( blis.bli_flag ) { +#ifdef SLAP_ZONE_ALLOC + slap_zn_runlock(mdb->bi_cache.c_zctx, e); +#endif + mdb_cache_return_entry_r(mdb, e, &lock); + if ( opinfo ) { + opinfo->boi_locks = blis.bli_next; + } else { + LDAP_SLIST_REMOVE( &op->o_extra, &bois.boi_oe, + OpExtra, oe_next ); + } + } + e = NULL; + + switch ( rs->sr_err ) { + case LDAP_SUCCESS: /* entry sent ok */ + break; + default: /* entry not sent */ + break; + case LDAP_UNAVAILABLE: + case LDAP_SIZELIMIT_EXCEEDED: + if ( rs->sr_err == LDAP_SIZELIMIT_EXCEEDED ) { + rs->sr_ref = rs->sr_v2ref; + send_ldap_result( op, rs ); + rs->sr_err = LDAP_SUCCESS; + + } else { + rs->sr_err = LDAP_OTHER; + } + goto done; + } + } + + } else { + Debug( LDAP_DEBUG_TRACE, + LDAP_XSTRING(mdb_search) + ": %ld does not match filter\n", + (long) id, 0, 0 ); + } + +loop_continue: + if( e != NULL ) { + /* free reader lock */ +#ifdef SLAP_ZONE_ALLOC + slap_zn_runlock(mdb->bi_cache.c_zctx, e); +#endif + mdb_cache_return_entry_r( mdb, e , &lock ); + RS_ASSERT( rs->sr_entry == NULL ); + e = NULL; + rs->sr_entry = NULL; + } + } + +nochange: + rs->sr_ctrls = NULL; + rs->sr_ref = rs->sr_v2ref; + rs->sr_err = (rs->sr_v2ref == NULL) ? LDAP_SUCCESS : LDAP_REFERRAL; + rs->sr_rspoid = NULL; + if ( get_pagedresults(op) > SLAP_CONTROL_IGNORED ) { + send_paged_response( op, rs, NULL, 0 ); + } else { + send_ldap_result( op, rs ); + } + + rs->sr_err = LDAP_SUCCESS; + +done: + if( rs->sr_v2ref ) { + ber_bvarray_free( rs->sr_v2ref ); + rs->sr_v2ref = NULL; + } + if( realbase.bv_val ) ch_free( realbase.bv_val ); + + return rs->sr_err; +} + + +static int base_candidate( + BackendDB *be, + Entry *e, + ID *ids ) +{ + Debug(LDAP_DEBUG_ARGS, "base_candidates: base: \"%s\" (0x%08lx)\n", + e->e_nname.bv_val, (long) e->e_id, 0); + + ids[0] = 1; + ids[1] = e->e_id; + return 0; +} + +/* Look for "objectClass Present" in this filter. + * Also count depth of filter tree while we're at it. + */ +static int oc_filter( + Filter *f, + int cur, + int *max ) +{ + int rc = 0; + + assert( f != NULL ); + + if( cur > *max ) *max = cur; + + switch( f->f_choice ) { + case LDAP_FILTER_PRESENT: + if (f->f_desc == slap_schema.si_ad_objectClass) { + rc = 1; + } + break; + + case LDAP_FILTER_AND: + case LDAP_FILTER_OR: + cur++; + for ( f=f->f_and; f; f=f->f_next ) { + (void) oc_filter(f, cur, max); + } + break; + + default: + break; + } + return rc; +} + +static void search_stack_free( void *key, void *data ) +{ + ber_memfree_x(data, NULL); +} + +static void *search_stack( Operation *op ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + void *ret = NULL; + + if ( op->o_threadctx ) { + ldap_pvt_thread_pool_getkey( op->o_threadctx, (void *)search_stack, + &ret, NULL ); + } else { + ret = mdb->bi_search_stack; + } + + if ( !ret ) { + ret = ch_malloc( mdb->bi_search_stack_depth * MDB_IDL_UM_SIZE + * sizeof( ID ) ); + if ( op->o_threadctx ) { + ldap_pvt_thread_pool_setkey( op->o_threadctx, (void *)search_stack, + ret, search_stack_free, NULL, NULL ); + } else { + mdb->bi_search_stack = ret; + } + } + return ret; +} + +static int search_candidates( + Operation *op, + SlapReply *rs, + Entry *e, + DB_TXN *txn, + ID *ids, + ID *scopes ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + int rc, depth = 1; + Filter f, rf, xf, nf; + ID *stack; + AttributeAssertion aa_ref = ATTRIBUTEASSERTION_INIT; + Filter sf; + AttributeAssertion aa_subentry = ATTRIBUTEASSERTION_INIT; + + /* + * This routine takes as input a filter (user-filter) + * and rewrites it as follows: + * (&(scope=DN)[(objectClass=subentry)] + * (|[(objectClass=referral)(objectClass=alias)](user-filter)) + */ + + Debug(LDAP_DEBUG_TRACE, + "search_candidates: base=\"%s\" (0x%08lx) scope=%d\n", + e->e_nname.bv_val, (long) e->e_id, op->oq_search.rs_scope ); + + xf.f_or = op->oq_search.rs_filter; + xf.f_choice = LDAP_FILTER_OR; + xf.f_next = NULL; + + /* If the user's filter uses objectClass=*, + * these clauses are redundant. + */ + if (!oc_filter(op->oq_search.rs_filter, 1, &depth) + && !get_subentries_visibility(op)) { + if( !get_manageDSAit(op) && !get_domainScope(op) ) { + /* match referral objects */ + struct berval bv_ref = BER_BVC( "referral" ); + rf.f_choice = LDAP_FILTER_EQUALITY; + rf.f_ava = &aa_ref; + rf.f_av_desc = slap_schema.si_ad_objectClass; + rf.f_av_value = bv_ref; + rf.f_next = xf.f_or; + xf.f_or = &rf; + depth++; + } + } + + f.f_next = NULL; + f.f_choice = LDAP_FILTER_AND; + f.f_and = &nf; + /* Dummy; we compute scope separately now */ + nf.f_choice = SLAPD_FILTER_COMPUTED; + nf.f_result = LDAP_SUCCESS; + nf.f_next = ( xf.f_or == op->oq_search.rs_filter ) + ? op->oq_search.rs_filter : &xf ; + /* Filter depth increased again, adding dummy clause */ + depth++; + + if( get_subentries_visibility( op ) ) { + struct berval bv_subentry = BER_BVC( "subentry" ); + sf.f_choice = LDAP_FILTER_EQUALITY; + sf.f_ava = &aa_subentry; + sf.f_av_desc = slap_schema.si_ad_objectClass; + sf.f_av_value = bv_subentry; + sf.f_next = nf.f_next; + nf.f_next = &sf; + } + + /* Allocate IDL stack, plus 1 more for former tmp */ + if ( depth+1 > mdb->bi_search_stack_depth ) { + stack = ch_malloc( (depth + 1) * MDB_IDL_UM_SIZE * sizeof( ID ) ); + } else { + stack = search_stack( op ); + } + + if( op->ors_deref & LDAP_DEREF_SEARCHING ) { + rc = search_aliases( op, rs, e, txn, ids, scopes, stack ); + } else { + rc = mdb_dn2idl( op, txn, &e->e_nname, BEI(e), ids, stack ); + } + + if ( rc == LDAP_SUCCESS ) { + rc = mdb_filter_candidates( op, txn, &f, ids, + stack, stack+MDB_IDL_UM_SIZE ); + } + + if ( depth+1 > mdb->bi_search_stack_depth ) { + ch_free( stack ); + } + + if( rc ) { + Debug(LDAP_DEBUG_TRACE, + "mdb_search_candidates: failed (rc=%d)\n", + rc, NULL, NULL ); + + } else { + Debug(LDAP_DEBUG_TRACE, + "mdb_search_candidates: id=%ld first=%ld last=%ld\n", + (long) ids[0], + (long) MDB_IDL_FIRST(ids), + (long) MDB_IDL_LAST(ids) ); + } + + return rc; +} + +static int +parse_paged_cookie( Operation *op, SlapReply *rs ) +{ + int rc = LDAP_SUCCESS; + PagedResultsState *ps = op->o_pagedresults_state; + + /* this function must be invoked only if the pagedResults + * control has been detected, parsed and partially checked + * by the frontend */ + assert( get_pagedresults( op ) > SLAP_CONTROL_IGNORED ); + + /* cookie decoding/checks deferred to backend... */ + if ( ps->ps_cookieval.bv_len ) { + PagedResultsCookie reqcookie; + if( ps->ps_cookieval.bv_len != sizeof( reqcookie ) ) { + /* bad cookie */ + rs->sr_text = "paged results cookie is invalid"; + rc = LDAP_PROTOCOL_ERROR; + goto done; + } + + AC_MEMCPY( &reqcookie, ps->ps_cookieval.bv_val, sizeof( reqcookie )); + + if ( reqcookie > ps->ps_cookie ) { + /* bad cookie */ + rs->sr_text = "paged results cookie is invalid"; + rc = LDAP_PROTOCOL_ERROR; + goto done; + + } else if ( reqcookie < ps->ps_cookie ) { + rs->sr_text = "paged results cookie is invalid or old"; + rc = LDAP_UNWILLING_TO_PERFORM; + goto done; + } + + } else { + /* we're going to use ps_cookie */ + op->o_conn->c_pagedresults_state.ps_cookie = 0; + } + +done:; + + return rc; +} + +static void +send_paged_response( + Operation *op, + SlapReply *rs, + ID *lastid, + int tentries ) +{ + LDAPControl *ctrls[2]; + BerElementBuffer berbuf; + BerElement *ber = (BerElement *)&berbuf; + PagedResultsCookie respcookie; + struct berval cookie; + + Debug(LDAP_DEBUG_ARGS, + "send_paged_response: lastid=0x%08lx nentries=%d\n", + lastid ? *lastid : 0, rs->sr_nentries, NULL ); + + ctrls[1] = NULL; + + ber_init2( ber, NULL, LBER_USE_DER ); + + if ( lastid ) { + respcookie = ( PagedResultsCookie )(*lastid); + cookie.bv_len = sizeof( respcookie ); + cookie.bv_val = (char *)&respcookie; + + } else { + respcookie = ( PagedResultsCookie )0; + BER_BVSTR( &cookie, "" ); + } + + op->o_conn->c_pagedresults_state.ps_cookie = respcookie; + op->o_conn->c_pagedresults_state.ps_count = + ((PagedResultsState *)op->o_pagedresults_state)->ps_count + + rs->sr_nentries; + + /* return size of 0 -- no estimate */ + ber_printf( ber, "{iO}", 0, &cookie ); + + ctrls[0] = op->o_tmpalloc( sizeof(LDAPControl), op->o_tmpmemctx ); + if ( ber_flatten2( ber, &ctrls[0]->ldctl_value, 0 ) == -1 ) { + goto done; + } + + ctrls[0]->ldctl_oid = LDAP_CONTROL_PAGEDRESULTS; + ctrls[0]->ldctl_iscritical = 0; + + slap_add_ctrls( op, rs, ctrls ); + rs->sr_err = LDAP_SUCCESS; + send_ldap_result( op, rs ); + +done: + (void) ber_free_buf( ber ); +} diff --git a/servers/slapd/back-mdb/tools.c b/servers/slapd/back-mdb/tools.c new file mode 100644 index 0000000000..208afab1b3 --- /dev/null +++ b/servers/slapd/back-mdb/tools.c @@ -0,0 +1,999 @@ +/* tools.c - tools for slap tools */ +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 2000-2011 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "portable.h" + +#include +#include +#include + +#define AVL_INTERNAL +#include "back-mdb.h" +#include "idl.h" + +static DBC *cursor = NULL; +static DBT key, data; +static EntryHeader eh; +static ID nid, previd = NOID; +static char ehbuf[16]; + +typedef struct dn_id { + ID id; + struct berval dn; +} dn_id; + +#define HOLE_SIZE 4096 +static dn_id hbuf[HOLE_SIZE], *holes = hbuf; +static unsigned nhmax = HOLE_SIZE; +static unsigned nholes; + +static int index_nattrs; + +static struct berval *tool_base; +static int tool_scope; +static Filter *tool_filter; +static Entry *tool_next_entry; + +static ID mdb_tool_ix_id; +static Operation *mdb_tool_ix_op; +static int *mdb_tool_index_threads, mdb_tool_index_tcount; +static void *mdb_tool_index_rec; +static struct mdb_info *mdb_tool_info; +static ldap_pvt_thread_mutex_t mdb_tool_index_mutex; +static ldap_pvt_thread_cond_t mdb_tool_index_cond_main; +static ldap_pvt_thread_cond_t mdb_tool_index_cond_work; + +#if DB_VERSION_FULL >= 0x04060000 +#define USE_TRICKLE 1 +#else +/* Seems to slow things down too much in MDB 4.5 */ +#undef USE_TRICKLE +#endif + +#ifdef USE_TRICKLE +static ldap_pvt_thread_mutex_t mdb_tool_trickle_mutex; +static ldap_pvt_thread_cond_t mdb_tool_trickle_cond; +static ldap_pvt_thread_cond_t mdb_tool_trickle_cond_end; + +static void * mdb_tool_trickle_task( void *ctx, void *ptr ); +static int mdb_tool_trickle_active; +#endif + +static void * mdb_tool_index_task( void *ctx, void *ptr ); + +static int +mdb_tool_entry_get_int( BackendDB *be, ID id, Entry **ep ); + +int mdb_tool_entry_open( + BackendDB *be, int mode ) +{ + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + + /* initialize key and data thangs */ + DBTzero( &key ); + DBTzero( &data ); + key.flags = DB_DBT_USERMEM; + key.data = &nid; + key.size = key.ulen = sizeof( nid ); + data.flags = DB_DBT_USERMEM; + + if (cursor == NULL) { + int rc = mdb->bi_id2entry->bdi_db->cursor( + mdb->bi_id2entry->bdi_db, mdb->bi_cache.c_txn, &cursor, + mdb->bi_db_opflags ); + if( rc != 0 ) { + return -1; + } + } + + /* Set up for threaded slapindex */ + if (( slapMode & (SLAP_TOOL_QUICK|SLAP_TOOL_READONLY)) == SLAP_TOOL_QUICK ) { + if ( !mdb_tool_info ) { +#ifdef USE_TRICKLE + ldap_pvt_thread_mutex_init( &mdb_tool_trickle_mutex ); + ldap_pvt_thread_cond_init( &mdb_tool_trickle_cond ); + ldap_pvt_thread_cond_init( &mdb_tool_trickle_cond_end ); + ldap_pvt_thread_pool_submit( &connection_pool, mdb_tool_trickle_task, mdb->bi_dbenv ); +#endif + + ldap_pvt_thread_mutex_init( &mdb_tool_index_mutex ); + ldap_pvt_thread_cond_init( &mdb_tool_index_cond_main ); + ldap_pvt_thread_cond_init( &mdb_tool_index_cond_work ); + if ( mdb->bi_nattrs ) { + int i; + mdb_tool_index_threads = ch_malloc( slap_tool_thread_max * sizeof( int )); + mdb_tool_index_rec = ch_malloc( mdb->bi_nattrs * sizeof( IndexRec )); + mdb_tool_index_tcount = slap_tool_thread_max - 1; + for (i=1; ic_close( cursor ); + cursor = NULL; + } + + if( nholes ) { + unsigned i; + fprintf( stderr, "Error, entries missing!\n"); + for (i=0; ibe_private; + assert( mdb != NULL ); + +next:; + /* Get the header */ + data.ulen = data.dlen = sizeof( ehbuf ); + data.data = ehbuf; + data.flags |= DB_DBT_PARTIAL; + rc = cursor->c_get( cursor, &key, &data, DB_NEXT ); + + if( rc ) { + /* If we're doing linear indexing and there are more attrs to + * index, and we're at the end of the database, start over. + */ + if ( index_nattrs && rc == DB_NOTFOUND ) { + /* optional - do a checkpoint here? */ + mdb_attr_info_free( mdb->bi_attrs[0] ); + mdb->bi_attrs[0] = mdb->bi_attrs[index_nattrs]; + index_nattrs--; + rc = cursor->c_get( cursor, &key, &data, DB_FIRST ); + if ( rc ) { + return NOID; + } + } else { + return NOID; + } + } + + MDB_DISK2ID( key.data, &id ); + previd = id; + + if ( tool_filter || tool_base ) { + static Operation op = {0}; + static Opheader ohdr = {0}; + + op.o_hdr = &ohdr; + op.o_bd = be; + op.o_tmpmemctx = NULL; + op.o_tmpmfuncs = &ch_mfuncs; + + if ( tool_next_entry ) { + mdb_entry_release( &op, tool_next_entry, 0 ); + tool_next_entry = NULL; + } + + rc = mdb_tool_entry_get_int( be, id, &tool_next_entry ); + if ( rc == LDAP_NO_SUCH_OBJECT ) { + goto next; + } + + assert( tool_next_entry != NULL ); + +#ifdef MDB_HIER + /* TODO: needed until MDB_HIER is handled accordingly + * in mdb_tool_entry_get_int() */ + if ( tool_base && !dnIsSuffixScope( &tool_next_entry->e_nname, tool_base, tool_scope ) ) + { + mdb_entry_release( &op, tool_next_entry, 0 ); + tool_next_entry = NULL; + goto next; + } +#endif + + if ( tool_filter && test_filter( NULL, tool_next_entry, tool_filter ) != LDAP_COMPARE_TRUE ) + { + mdb_entry_release( &op, tool_next_entry, 0 ); + tool_next_entry = NULL; + goto next; + } + } + + return id; +} + +ID mdb_tool_dn2id_get( + Backend *be, + struct berval *dn +) +{ + Operation op = {0}; + Opheader ohdr = {0}; + EntryInfo *ei = NULL; + int rc; + + if ( BER_BVISEMPTY(dn) ) + return 0; + + op.o_hdr = &ohdr; + op.o_bd = be; + op.o_tmpmemctx = NULL; + op.o_tmpmfuncs = &ch_mfuncs; + + rc = mdb_cache_find_ndn( &op, 0, dn, &ei ); + if ( ei ) mdb_cache_entryinfo_unlock( ei ); + if ( rc == DB_NOTFOUND ) + return NOID; + + return ei->bei_id; +} + +static int +mdb_tool_entry_get_int( BackendDB *be, ID id, Entry **ep ) +{ + Entry *e = NULL; + char *dptr; + int rc, eoff; + + assert( be != NULL ); + assert( slapMode & SLAP_TOOL_MODE ); + + if ( ( tool_filter || tool_base ) && id == previd && tool_next_entry != NULL ) { + *ep = tool_next_entry; + tool_next_entry = NULL; + return LDAP_SUCCESS; + } + + if ( id != previd ) { + data.ulen = data.dlen = sizeof( ehbuf ); + data.data = ehbuf; + data.flags |= DB_DBT_PARTIAL; + + MDB_ID2DISK( id, &nid ); + rc = cursor->c_get( cursor, &key, &data, DB_SET ); + if ( rc ) { + rc = LDAP_OTHER; + goto done; + } + } + + /* Get the header */ + dptr = eh.bv.bv_val; + eh.bv.bv_val = ehbuf; + eh.bv.bv_len = data.size; + rc = entry_header( &eh ); + eoff = eh.data - eh.bv.bv_val; + eh.bv.bv_val = dptr; + if ( rc ) { + rc = LDAP_OTHER; + goto done; + } + + /* Get the size */ + data.flags &= ~DB_DBT_PARTIAL; + data.ulen = 0; + rc = cursor->c_get( cursor, &key, &data, DB_CURRENT ); + if ( rc != DB_BUFFER_SMALL ) { + rc = LDAP_OTHER; + goto done; + } + + /* Allocate a block and retrieve the data */ + eh.bv.bv_len = eh.nvals * sizeof( struct berval ) + data.size; + eh.bv.bv_val = ch_realloc( eh.bv.bv_val, eh.bv.bv_len ); + eh.data = eh.bv.bv_val + eh.nvals * sizeof( struct berval ); + data.data = eh.data; + data.ulen = data.size; + + /* Skip past already parsed nattr/nvals */ + eh.data += eoff; + + rc = cursor->c_get( cursor, &key, &data, DB_CURRENT ); + if ( rc ) { + rc = LDAP_OTHER; + goto done; + } + +#ifndef MDB_HIER + /* TODO: handle MDB_HIER accordingly */ + if ( tool_base != NULL ) { + struct berval ndn; + entry_decode_dn( &eh, NULL, &ndn ); + + if ( !dnIsSuffixScope( &ndn, tool_base, tool_scope ) ) { + return LDAP_NO_SUCH_OBJECT; + } + } +#endif + +#ifdef SLAP_ZONE_ALLOC + /* FIXME: will add ctx later */ + rc = entry_decode( &eh, &e, NULL ); +#else + rc = entry_decode( &eh, &e ); +#endif + + if( rc == LDAP_SUCCESS ) { + e->e_id = id; +#ifdef MDB_HIER + if ( slapMode & SLAP_TOOL_READONLY ) { + struct mdb_info *mdb = (struct mdb_info *) be->be_private; + EntryInfo *ei = NULL; + Operation op = {0}; + Opheader ohdr = {0}; + + op.o_hdr = &ohdr; + op.o_bd = be; + op.o_tmpmemctx = NULL; + op.o_tmpmfuncs = &ch_mfuncs; + + rc = mdb_cache_find_parent( &op, mdb->bi_cache.c_txn, id, &ei ); + if ( rc == LDAP_SUCCESS ) { + mdb_cache_entryinfo_unlock( ei ); + e->e_private = ei; + ei->bei_e = e; + mdb_fix_dn( e, 0 ); + ei->bei_e = NULL; + e->e_private = NULL; + } + } +#endif + } +done: + if ( e != NULL ) { + *ep = e; + } + + return rc; +} + +Entry* +mdb_tool_entry_get( BackendDB *be, ID id ) +{ + Entry *e = NULL; + + (void)mdb_tool_entry_get_int( be, id, &e ); + return e; +} + +static int mdb_tool_next_id( + Operation *op, + DB_TXN *tid, + Entry *e, + struct berval *text, + int hole ) +{ + struct berval dn = e->e_name; + struct berval ndn = e->e_nname; + struct berval pdn, npdn; + EntryInfo *ei = NULL, eidummy; + int rc; + + if (ndn.bv_len == 0) { + e->e_id = 0; + return 0; + } + + rc = mdb_cache_find_ndn( op, tid, &ndn, &ei ); + if ( ei ) mdb_cache_entryinfo_unlock( ei ); + if ( rc == DB_NOTFOUND ) { + if ( !be_issuffix( op->o_bd, &ndn ) ) { + ID eid = e->e_id; + dnParent( &dn, &pdn ); + dnParent( &ndn, &npdn ); + e->e_name = pdn; + e->e_nname = npdn; + rc = mdb_tool_next_id( op, tid, e, text, 1 ); + e->e_name = dn; + e->e_nname = ndn; + if ( rc ) { + return rc; + } + /* If parent didn't exist, it was created just now + * and its ID is now in e->e_id. Make sure the current + * entry gets added under the new parent ID. + */ + if ( eid != e->e_id ) { + eidummy.bei_id = e->e_id; + ei = &eidummy; + } + } + rc = mdb_next_id( op->o_bd, &e->e_id ); + if ( rc ) { + snprintf( text->bv_val, text->bv_len, + "next_id failed: %s (%d)", + db_strerror(rc), rc ); + Debug( LDAP_DEBUG_ANY, + "=> mdb_tool_next_id: %s\n", text->bv_val, 0, 0 ); + return rc; + } + rc = mdb_dn2id_add( op, tid, ei, e ); + if ( rc ) { + snprintf( text->bv_val, text->bv_len, + "dn2id_add failed: %s (%d)", + db_strerror(rc), rc ); + Debug( LDAP_DEBUG_ANY, + "=> mdb_tool_next_id: %s\n", text->bv_val, 0, 0 ); + } else if ( hole ) { + if ( nholes == nhmax - 1 ) { + if ( holes == hbuf ) { + holes = ch_malloc( nhmax * sizeof(dn_id) * 2 ); + AC_MEMCPY( holes, hbuf, sizeof(hbuf) ); + } else { + holes = ch_realloc( holes, nhmax * sizeof(dn_id) * 2 ); + } + nhmax *= 2; + } + ber_dupbv( &holes[nholes].dn, &ndn ); + holes[nholes++].id = e->e_id; + } + } else if ( !hole ) { + unsigned i, j; + + e->e_id = ei->bei_id; + + for ( i=0; ie_id ) { + free(holes[i].dn.bv_val); + for (j=i;j e->e_id ) { + break; + } + } + } + return rc; +} + +static int +mdb_tool_index_add( + Operation *op, + DB_TXN *txn, + Entry *e ) +{ + struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private; + + if ( !mdb->bi_nattrs ) + return 0; + + if ( slapMode & SLAP_TOOL_QUICK ) { + IndexRec *ir; + int i, rc; + Attribute *a; + + ir = mdb_tool_index_rec; + memset(ir, 0, mdb->bi_nattrs * sizeof( IndexRec )); + + for ( a = e->e_attrs; a != NULL; a = a->a_next ) { + rc = mdb_index_recset( mdb, a, a->a_desc->ad_type, + &a->a_desc->ad_tags, ir ); + if ( rc ) + return rc; + } + mdb_tool_ix_id = e->e_id; + mdb_tool_ix_op = op; + ldap_pvt_thread_mutex_lock( &mdb_tool_index_mutex ); + /* Wait for all threads to be ready */ + while ( mdb_tool_index_tcount ) { + ldap_pvt_thread_cond_wait( &mdb_tool_index_cond_main, + &mdb_tool_index_mutex ); + } + for ( i=1; ie_id, 0 ); + if ( rc ) + return rc; + ldap_pvt_thread_mutex_lock( &mdb_tool_index_mutex ); + for ( i=1; ibv_val != NULL ); + assert( text->bv_val[0] == '\0' ); /* overconservative? */ + + Debug( LDAP_DEBUG_TRACE, "=> " LDAP_XSTRING(mdb_tool_entry_put) + "( %ld, \"%s\" )\n", (long) e->e_id, e->e_dn, 0 ); + + mdb = (struct mdb_info *) be->be_private; + + if (! (slapMode & SLAP_TOOL_QUICK)) { + rc = TXN_BEGIN( mdb->bi_dbenv, NULL, &tid, + mdb->bi_db_opflags ); + if( rc != 0 ) { + snprintf( text->bv_val, text->bv_len, + "txn_begin failed: %s (%d)", + db_strerror(rc), rc ); + Debug( LDAP_DEBUG_ANY, + "=> " LDAP_XSTRING(mdb_tool_entry_put) ": %s\n", + text->bv_val, 0, 0 ); + return NOID; + } + } + + op.o_hdr = &ohdr; + op.o_bd = be; + op.o_tmpmemctx = NULL; + op.o_tmpmfuncs = &ch_mfuncs; + + /* add dn2id indices */ + rc = mdb_tool_next_id( &op, tid, e, text, 0 ); + if( rc != 0 ) { + goto done; + } + +#ifdef USE_TRICKLE + if (( slapMode & SLAP_TOOL_QUICK ) && (( e->e_id & 0xfff ) == 0xfff )) { + ldap_pvt_thread_cond_signal( &mdb_tool_trickle_cond ); + } +#endif + + if ( !mdb->bi_linear_index ) + rc = mdb_tool_index_add( &op, tid, e ); + if( rc != 0 ) { + snprintf( text->bv_val, text->bv_len, + "index_entry_add failed: %s (%d)", + rc == LDAP_OTHER ? "Internal error" : + db_strerror(rc), rc ); + Debug( LDAP_DEBUG_ANY, + "=> " LDAP_XSTRING(mdb_tool_entry_put) ": %s\n", + text->bv_val, 0, 0 ); + goto done; + } + + /* id2entry index */ + rc = mdb_id2entry_add( be, tid, e ); + if( rc != 0 ) { + snprintf( text->bv_val, text->bv_len, + "id2entry_add failed: %s (%d)", + db_strerror(rc), rc ); + Debug( LDAP_DEBUG_ANY, + "=> " LDAP_XSTRING(mdb_tool_entry_put) ": %s\n", + text->bv_val, 0, 0 ); + goto done; + } + +done: + if( rc == 0 ) { + if ( !( slapMode & SLAP_TOOL_QUICK )) { + rc = TXN_COMMIT( tid, 0 ); + if( rc != 0 ) { + snprintf( text->bv_val, text->bv_len, + "txn_commit failed: %s (%d)", + db_strerror(rc), rc ); + Debug( LDAP_DEBUG_ANY, + "=> " LDAP_XSTRING(mdb_tool_entry_put) ": %s\n", + text->bv_val, 0, 0 ); + e->e_id = NOID; + } + } + + } else { + if ( !( slapMode & SLAP_TOOL_QUICK )) { + TXN_ABORT( tid ); + snprintf( text->bv_val, text->bv_len, + "txn_aborted! %s (%d)", + rc == LDAP_OTHER ? "Internal error" : + db_strerror(rc), rc ); + Debug( LDAP_DEBUG_ANY, + "=> " LDAP_XSTRING(mdb_tool_entry_put) ": %s\n", + text->bv_val, 0, 0 ); + } + e->e_id = NOID; + } + + return e->e_id; +} + +int mdb_tool_entry_reindex( + BackendDB *be, + ID id, + AttributeDescription **adv ) +{ + struct mdb_info *bi = (struct mdb_info *) be->be_private; + int rc; + Entry *e; + DB_TXN *tid = NULL; + Operation op = {0}; + Opheader ohdr = {0}; + + Debug( LDAP_DEBUG_ARGS, + "=> " LDAP_XSTRING(mdb_tool_entry_reindex) "( %ld )\n", + (long) id, 0, 0 ); + assert( tool_base == NULL ); + assert( tool_filter == NULL ); + + /* No indexes configured, nothing to do. Could return an + * error here to shortcut things. + */ + if (!bi->bi_attrs) { + return 0; + } + + /* Check for explicit list of attrs to index */ + if ( adv ) { + int i, j, n; + + if ( bi->bi_attrs[0]->ai_desc != adv[0] ) { + /* count */ + for ( n = 0; adv[n]; n++ ) ; + + /* insertion sort */ + for ( i = 0; i < n; i++ ) { + AttributeDescription *ad = adv[i]; + for ( j = i-1; j>=0; j--) { + if ( SLAP_PTRCMP( adv[j], ad ) <= 0 ) break; + adv[j+1] = adv[j]; + } + adv[j+1] = ad; + } + } + + for ( i = 0; adv[i]; i++ ) { + if ( bi->bi_attrs[i]->ai_desc != adv[i] ) { + for ( j = i+1; j < bi->bi_nattrs; j++ ) { + if ( bi->bi_attrs[j]->ai_desc == adv[i] ) { + AttrInfo *ai = bi->bi_attrs[i]; + bi->bi_attrs[i] = bi->bi_attrs[j]; + bi->bi_attrs[j] = ai; + break; + } + } + if ( j == bi->bi_nattrs ) { + Debug( LDAP_DEBUG_ANY, + LDAP_XSTRING(mdb_tool_entry_reindex) + ": no index configured for %s\n", + adv[i]->ad_cname.bv_val, 0, 0 ); + return -1; + } + } + } + bi->bi_nattrs = i; + } + + /* Get the first attribute to index */ + if (bi->bi_linear_index && !index_nattrs) { + index_nattrs = bi->bi_nattrs - 1; + bi->bi_nattrs = 1; + } + + e = mdb_tool_entry_get( be, id ); + + if( e == NULL ) { + Debug( LDAP_DEBUG_ANY, + LDAP_XSTRING(mdb_tool_entry_reindex) + ": could not locate id=%ld\n", + (long) id, 0, 0 ); + return -1; + } + + if (! (slapMode & SLAP_TOOL_QUICK)) { + rc = TXN_BEGIN( bi->bi_dbenv, NULL, &tid, bi->bi_db_opflags ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, + "=> " LDAP_XSTRING(mdb_tool_entry_reindex) ": " + "txn_begin failed: %s (%d)\n", + db_strerror(rc), rc, 0 ); + goto done; + } + } + + /* + * just (re)add them for now + * assume that some other routine (not yet implemented) + * will zap index databases + * + */ + + Debug( LDAP_DEBUG_TRACE, + "=> " LDAP_XSTRING(mdb_tool_entry_reindex) "( %ld, \"%s\" )\n", + (long) id, e->e_dn, 0 ); + + op.o_hdr = &ohdr; + op.o_bd = be; + op.o_tmpmemctx = NULL; + op.o_tmpmfuncs = &ch_mfuncs; + + rc = mdb_tool_index_add( &op, tid, e ); + +done: + if( rc == 0 ) { + if (! (slapMode & SLAP_TOOL_QUICK)) { + rc = TXN_COMMIT( tid, 0 ); + if( rc != 0 ) { + Debug( LDAP_DEBUG_ANY, + "=> " LDAP_XSTRING(mdb_tool_entry_reindex) + ": txn_commit failed: %s (%d)\n", + db_strerror(rc), rc, 0 ); + e->e_id = NOID; + } + } + + } else { + if (! (slapMode & SLAP_TOOL_QUICK)) { + TXN_ABORT( tid ); + Debug( LDAP_DEBUG_ANY, + "=> " LDAP_XSTRING(mdb_tool_entry_reindex) + ": txn_aborted! %s (%d)\n", + db_strerror(rc), rc, 0 ); + } + e->e_id = NOID; + } + mdb_entry_release( &op, e, 0 ); + + return rc; +} + +ID mdb_tool_entry_modify( + BackendDB *be, + Entry *e, + struct berval *text ) +{ + int rc; + struct mdb_info *mdb; + DB_TXN *tid = NULL; + Operation op = {0}; + Opheader ohdr = {0}; + + assert( be != NULL ); + assert( slapMode & SLAP_TOOL_MODE ); + + assert( text != NULL ); + assert( text->bv_val != NULL ); + assert( text->bv_val[0] == '\0' ); /* overconservative? */ + + assert ( e->e_id != NOID ); + + Debug( LDAP_DEBUG_TRACE, + "=> " LDAP_XSTRING(mdb_tool_entry_modify) "( %ld, \"%s\" )\n", + (long) e->e_id, e->e_dn, 0 ); + + mdb = (struct mdb_info *) be->be_private; + + if (! (slapMode & SLAP_TOOL_QUICK)) { + if( cursor ) { + cursor->c_close( cursor ); + cursor = NULL; + } + rc = TXN_BEGIN( mdb->bi_dbenv, NULL, &tid, + mdb->bi_db_opflags ); + if( rc != 0 ) { + snprintf( text->bv_val, text->bv_len, + "txn_begin failed: %s (%d)", + db_strerror(rc), rc ); + Debug( LDAP_DEBUG_ANY, + "=> " LDAP_XSTRING(mdb_tool_entry_modify) ": %s\n", + text->bv_val, 0, 0 ); + return NOID; + } + } + + op.o_hdr = &ohdr; + op.o_bd = be; + op.o_tmpmemctx = NULL; + op.o_tmpmfuncs = &ch_mfuncs; + + /* id2entry index */ + rc = mdb_id2entry_update( be, tid, e ); + if( rc != 0 ) { + snprintf( text->bv_val, text->bv_len, + "id2entry_add failed: %s (%d)", + db_strerror(rc), rc ); + Debug( LDAP_DEBUG_ANY, + "=> " LDAP_XSTRING(mdb_tool_entry_modify) ": %s\n", + text->bv_val, 0, 0 ); + goto done; + } + +done: + if( rc == 0 ) { + if (! (slapMode & SLAP_TOOL_QUICK)) { + rc = TXN_COMMIT( tid, 0 ); + if( rc != 0 ) { + snprintf( text->bv_val, text->bv_len, + "txn_commit failed: %s (%d)", + db_strerror(rc), rc ); + Debug( LDAP_DEBUG_ANY, + "=> " LDAP_XSTRING(mdb_tool_entry_modify) ": " + "%s\n", text->bv_val, 0, 0 ); + e->e_id = NOID; + } + } + + } else { + if (! (slapMode & SLAP_TOOL_QUICK)) { + TXN_ABORT( tid ); + snprintf( text->bv_val, text->bv_len, + "txn_aborted! %s (%d)", + db_strerror(rc), rc ); + Debug( LDAP_DEBUG_ANY, + "=> " LDAP_XSTRING(mdb_tool_entry_modify) ": %s\n", + text->bv_val, 0, 0 ); + } + e->e_id = NOID; + } + + return e->e_id; +} + +#ifdef USE_TRICKLE +static void * +mdb_tool_trickle_task( void *ctx, void *ptr ) +{ + DB_ENV *env = ptr; + int wrote; + + ldap_pvt_thread_mutex_lock( &mdb_tool_trickle_mutex ); + mdb_tool_trickle_active = 1; + ldap_pvt_thread_cond_signal( &mdb_tool_trickle_cond_end ); + while ( 1 ) { + ldap_pvt_thread_cond_wait( &mdb_tool_trickle_cond, + &mdb_tool_trickle_mutex ); + if ( slapd_shutdown ) + break; + env->memp_trickle( env, 30, &wrote ); + } + mdb_tool_trickle_active = 0; + ldap_pvt_thread_cond_signal( &mdb_tool_trickle_cond_end ); + ldap_pvt_thread_mutex_unlock( &mdb_tool_trickle_mutex ); + + return NULL; +} +#endif + +static void * +mdb_tool_index_task( void *ctx, void *ptr ) +{ + int base = *(int *)ptr; + + free( ptr ); + while ( 1 ) { + ldap_pvt_thread_mutex_lock( &mdb_tool_index_mutex ); + mdb_tool_index_tcount--; + if ( !mdb_tool_index_tcount ) + ldap_pvt_thread_cond_signal( &mdb_tool_index_cond_main ); + ldap_pvt_thread_cond_wait( &mdb_tool_index_cond_work, + &mdb_tool_index_mutex ); + if ( slapd_shutdown ) { + mdb_tool_index_tcount--; + if ( !mdb_tool_index_tcount ) + ldap_pvt_thread_cond_signal( &mdb_tool_index_cond_main ); + ldap_pvt_thread_mutex_unlock( &mdb_tool_index_mutex ); + break; + } + ldap_pvt_thread_mutex_unlock( &mdb_tool_index_mutex ); + + mdb_tool_index_threads[base] = mdb_index_recrun( mdb_tool_ix_op, + mdb_tool_info, mdb_tool_index_rec, mdb_tool_ix_id, base ); + } + + return NULL; +} -- 2.39.5