From: Pierangelo Masarati Date: Mon, 2 Sep 2002 19:41:03 +0000 (+0000) Subject: /* X-Git-Tag: NO_SLAP_OP_BLOCKS~1037 X-Git-Url: https://git.sur5r.net/?a=commitdiff_plain;h=d3ca441ae8c00998a5a92b6c55b9e7e3516e7501;p=openldap /* * The original code performs ( n ) normalizations * and ( n * ( n - 1 ) / 2 ) matches, which hide * the same number of normalizations. The new code * performs the same number of normalizations ( n ) * and ( n * ( n - 1 ) / 2 ) mem compares, far less * expensive than an entire match, if a match is * equivalent to a normalization and a mem compare ... * * This is far more memory expensive than the previous, * but it can heavily improve performances when big * chunks of data are added (typical example is a group * with thousands of DN-syntax members; on my system: * for members of 5-RDN DNs, members orig bvmatch (dirty) new 1000 0m38.456s 0m0.553s 0m0.608s 2000 2m33.341s 0m0.851s 0m1.003s * Moreover, 100 groups with 10000 members each were * added in 37m27.933s (an analogous LDIF file was * loaded into Active Directory in 38m28.682s, BTW). * * Maybe we could switch to the new algorithm when * the number of values overcomes a given threshold? */ --- diff --git a/servers/slapd/add.c b/servers/slapd/add.c index 6e7cb7e443..a907872273 100644 --- a/servers/slapd/add.c +++ b/servers/slapd/add.c @@ -398,10 +398,8 @@ static int slap_mods2entry( for ( i = 0; mods->sml_bvalues[i].bv_val != NULL; i++ ) { /* test asserted values against themselves */ for( j = 0; j < i; j++ ) { - int rc = ber_bvcmp( &mods->sml_bvalues[i], - &mods->sml_bvalues[j] ); - - if( rc == 0 ) { + if ( bvmatch( &mods->sml_bvalues[i], + &mods->sml_bvalues[j] ) ) { /* value exists already */ snprintf( textbuf, textlen, "%s: value #%d provided more than once", @@ -412,34 +410,16 @@ static int slap_mods2entry( } } else { - for ( i = 0; mods->sml_bvalues[i].bv_val != NULL; i++ ) { - int rc, match; - const char *text = NULL; - struct berval asserted; - - rc = value_normalize( mods->sml_desc, - SLAP_MR_EQUALITY, - &mods->sml_bvalues[i], - &asserted, - &text ); - - if( rc != LDAP_SUCCESS ) return rc; - - for ( j = 0; j < i; j++ ) { - int rc = value_match( &match, mods->sml_desc, mr, - SLAP_MR_VALUE_SYNTAX_MATCH, - &mods->sml_bvalues[j], &asserted, &text ); - - if( rc == LDAP_SUCCESS && match == 0 ) { - free( asserted.bv_val ); - snprintf( textbuf, textlen, - "%s: value #%d provided more than once", - mods->sml_desc->ad_cname.bv_val, j ); - return LDAP_TYPE_OR_VALUE_EXISTS; - } - } - - free( asserted.bv_val ); + int rc; + const char *text = NULL; + char textbuf[ SLAP_TEXT_BUFLEN ] = { '\0' }; + + rc = modify_check_duplicates( mods->sml_desc, mr, + NULL, mods->sml_bvalues, + &text, textbuf, sizeof( textbuf ) ); + + if ( rc != LDAP_SUCCESS ) { + return rc; } } } diff --git a/servers/slapd/mods.c b/servers/slapd/mods.c index f57cf41dab..47ae759468 100644 --- a/servers/slapd/mods.c +++ b/servers/slapd/mods.c @@ -18,6 +18,160 @@ #include "slap.h" +#undef QUICK_DIRTY_DUPLICATE_CHECK + +int +modify_check_duplicates( + AttributeDescription *ad, + MatchingRule *mr, + BerVarray vals, + BerVarray mods, + const char **text, + char *textbuf, size_t textlen ) +{ + int i, j, rc = LDAP_SUCCESS; + BerVarray nvals = NULL, nmods; + + /* + * FIXME: better do the following + * + * - count the existing values + * - count the new values + * + * - if the existing values are less than the new ones { + * // current code + * - normalize the existing values + * - for each new value { + * - normalize + * - check with existing + * - cross-check with already normalized new vals + * } + * } else { + * // to be implemented + * - for each new value { + * - normalize + * - cross-check with already normalized new vals + * } + * - for each existing value { + * - normalize + * - check with already normalized new values + * } + * } + * + * The first case is good when adding a lot of new values, + * and significantly at first import of values (e.g. adding + * a new group); the latter case seems to be quite important + * as well, because it is likely to be the most frequently + * used when administering the entry. The current + * implementation will always normalize all the existing + * values before checking. If there's no duplicate, the + * performances should not change; they will in case of error. + */ + + if ( vals ) { + for ( j = 0; vals[ j ].bv_val != NULL; j++ ) + /* count existing values */ ; + + nvals = ch_calloc( j + 1, sizeof( struct berval ) ); + + /* normalize the existing values first */ + for ( j = 0; vals[ j ].bv_val != NULL; j++ ) { + rc = value_normalize( ad, SLAP_MR_EQUALITY, + &vals[ j ], &nvals[ j ], text ); + + /* existing attribute values must normalize */ + assert( rc == LDAP_SUCCESS ); + + if ( rc != LDAP_SUCCESS ) { + nvals[ j ].bv_val = NULL; + goto return_results; + } + } + nvals[ j ].bv_val = NULL; + } + + for ( i = 0; mods[ i ].bv_val != NULL; i++ ) + /* count new values */ ; + + nmods = ch_calloc( i + 1, sizeof( struct berval ) ); + + for ( i = 0; mods[ i ].bv_val != NULL; i++ ) { + + rc = value_normalize( ad, SLAP_MR_EQUALITY, + &mods[ i ], &nmods[ i ], text ); + + if ( rc != LDAP_SUCCESS ) { + nmods[ i ].bv_val = NULL; + goto return_results; + } + + if ( vals ) { + for ( j = 0; nvals[ j ].bv_val; j++ ) { +#ifdef QUICK_DIRTY_DUPLICATE_CHECK + if ( bvmatch( &nmods[ i ], &nvals[ j ] ) ) { +#else /* !QUICK_DIRTY_DUPLICATE_CHECK */ + int match; + + rc = (mr->smr_match)( &match, + SLAP_MR_VALUE_SYNTAX_MATCH, + ad->ad_type->sat_syntax, + mr, &nmods[ i ], &nvals[ j ] ); + if ( rc != LDAP_SUCCESS ) { + nmods[ i + 1 ].bv_val = NULL; + goto return_results; + } + + if ( match == 0 ) { +#endif /* !QUICK_DIRTY_DUPLICATE_CHECK */ + snprintf( textbuf, textlen, + "%s: value #%d provided more than once", + ad->ad_cname.bv_val, i ); + rc = LDAP_TYPE_OR_VALUE_EXISTS; + nmods[ i + 1 ].bv_val = NULL; + goto return_results; + } + } + } + + for ( j = 0; j < i; j++ ) { +#ifdef QUICK_DIRTY_DUPLICATE_CHECK + if ( bvmatch( &nmods[ i ], &nmods[ j ] ) ) { +#else /* !QUICK_DIRTY_DUPLICATE_CHECK */ + int match; + + rc = (mr->smr_match)( &match, + SLAP_MR_VALUE_SYNTAX_MATCH, + ad->ad_type->sat_syntax, + mr, &nmods[ i ], &nmods[ j ] ); + if ( rc != LDAP_SUCCESS ) { + nmods[ i + 1 ].bv_val = NULL; + goto return_results; + } + + if ( match == 0 ) { +#endif /* !QUICK_DIRTY_DUPLICATE_CHECK */ + snprintf( textbuf, textlen, + "%s: value #%d provided more than once", + ad->ad_cname.bv_val, j ); + rc = LDAP_TYPE_OR_VALUE_EXISTS; + nmods[ i + 1 ].bv_val = NULL; + goto return_results; + } + } + } + nmods[ i ].bv_val = NULL; + +return_results:; + if ( nvals ) { + ber_bvarray_free( nvals ); + } + if ( nmods ) { + ber_bvarray_free( nmods ); + } + + return rc; +} + int modify_add_values( Entry *e, @@ -61,10 +215,9 @@ modify_add_values( /* test asserted values against existing values */ if( a ) { for( j = 0; a->a_vals[j].bv_val != NULL; j++ ) { - int rc = ber_bvcmp( &mod->sm_bvalues[i], - &a->a_vals[j] ); + if ( bvmatch( &mod->sm_bvalues[i], + &a->a_vals[j] ) ) { - if( rc == 0 ) { /* value exists already */ *text = textbuf; snprintf( textbuf, textlen, @@ -77,10 +230,9 @@ modify_add_values( /* test asserted values against themselves */ for( j = 0; j < i; j++ ) { - int rc = ber_bvcmp( &mod->sm_bvalues[i], - &mod->sm_bvalues[j] ); + if ( bvmatch( &mod->sm_bvalues[i], + &mod->sm_bvalues[j] ) ) { - if( rc == 0 ) { /* value exists already */ *text = textbuf; snprintf( textbuf, textlen, @@ -92,23 +244,56 @@ modify_add_values( } } else { - for ( i = 0; mod->sm_bvalues[i].bv_val != NULL; i++ ) { - int rc, match; - struct berval asserted; - rc = value_normalize( mod->sm_desc, - SLAP_MR_EQUALITY, - &mod->sm_bvalues[i], - &asserted, - text ); + /* + * The original code performs ( n ) normalizations + * and ( n * ( n - 1 ) / 2 ) matches, which hide + * the same number of normalizations. The new code + * performs the same number of normalizations ( n ) + * and ( n * ( n - 1 ) / 2 ) mem compares, far less + * expensive than an entire match, if a match is + * equivalent to a normalization and a mem compare ... + * + * This is far more memory expensive than the previous, + * but it can heavily improve performances when big + * chunks of data are added (typical example is a group + * with thousands of DN-syntax members; on my system: + * for members of 5-RDN DNs, + + members orig bvmatch (dirty) new + 1000 0m38.456s 0m0.553s 0m0.608s + 2000 2m33.341s 0m0.851s 0m1.003s + + * Moreover, 100 groups with 10000 members each were + * added in 37m27.933s (an analogous LDIF file was + * loaded into Active Directory in 38m28.682s, BTW). + * + * Maybe we could switch to the new algorithm when + * the number of values overcomes a given threshold? + */ + + int rc; + const char *text = NULL; + char textbuf[ SLAP_TEXT_BUFLEN ] = { '\0' }; + + if ( mod->sm_bvalues[ 1 ].bv_val == 0 ) { + if ( a != NULL ) { + struct berval asserted; + int i; + + rc = value_normalize( mod->sm_desc, SLAP_MR_EQUALITY, + &mod->sm_bvalues[ 0 ], &asserted, &text ); + + if ( rc != LDAP_SUCCESS ) { + return rc; + } - if( rc != LDAP_SUCCESS ) return rc; + for ( i = 0; a->a_vals[ i ].bv_val; i++ ) { + int match; - if( a ) { - for ( j = 0; a->a_vals[j].bv_val != NULL; j++ ) { - int rc = value_match( &match, mod->sm_desc, mr, + rc = value_match( &match, mod->sm_desc, mr, SLAP_MR_VALUE_SYNTAX_MATCH, - &a->a_vals[j], &asserted, text ); + &a->a_vals[ i ], &asserted, &text ); if( rc == LDAP_SUCCESS && match == 0 ) { free( asserted.bv_val ); @@ -117,18 +302,14 @@ modify_add_values( } } - for ( j = 0; j < i; j++ ) { - int rc = value_match( &match, mod->sm_desc, mr, - SLAP_MR_VALUE_SYNTAX_MATCH, - &mod->sm_bvalues[j], &asserted, text ); - - if( rc == LDAP_SUCCESS && match == 0 ) { - free( asserted.bv_val ); - return LDAP_TYPE_OR_VALUE_EXISTS; - } + } else { + rc = modify_check_duplicates( mod->sm_desc, mr, + a ? a->a_vals : NULL, mod->sm_bvalues, + &text, textbuf, sizeof( textbuf ) ); + + if ( rc != LDAP_SUCCESS ) { + return rc; } - - free( asserted.bv_val ); } } diff --git a/servers/slapd/proto-slap.h b/servers/slapd/proto-slap.h index db7d7ba9fa..bde85b5368 100644 --- a/servers/slapd/proto-slap.h +++ b/servers/slapd/proto-slap.h @@ -570,6 +570,10 @@ LDAP_SLAPD_F( int ) slap_mods_opattrs( /* * mods.c */ +LDAP_SLAPD_F( int ) modify_check_duplicates( + AttributeDescription *ad, MatchingRule *mr, + BerVarray vals, BerVarray mods, + const char **text, char *textbuf, size_t textlen ); LDAP_SLAPD_F( int ) modify_add_values( Entry *e, Modification *mod, const char **text, char *textbuf, size_t textlen ); diff --git a/servers/slapd/value.c b/servers/slapd/value.c index 3aa2adfa50..c53da1e9eb 100644 --- a/servers/slapd/value.c +++ b/servers/slapd/value.c @@ -330,7 +330,6 @@ int value_find_ex( int i; int rc; struct berval nval = { 0, NULL }; - struct berval nval_tmp; MatchingRule *mr = ad->ad_type->sat_equality; if( mr == NULL || !mr->smr_match ) { @@ -351,8 +350,11 @@ int value_find_ex( } if( mr->smr_syntax->ssyn_normalize ) { + struct berval nval_tmp; + rc = mr->smr_syntax->ssyn_normalize( - mr->smr_syntax, nval.bv_val == NULL ? val : &nval, &nval_tmp ); + mr->smr_syntax, + nval.bv_val == NULL ? val : &nval, &nval_tmp ); free(nval.bv_val); nval = nval_tmp;