From bf7c7a22d66c5ff4e5aece5f9afe5494d16e2c32 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Tue, 28 Sep 2004 02:51:00 +0000 Subject: [PATCH] ITS#2661: configurable substring index lengths ITS#2588: use subany to help long subinitial/subfinal matches --- servers/slapd/config.c | 76 ++++++++++++ servers/slapd/proto-slap.h | 5 + servers/slapd/schema_init.c | 241 +++++++++++++++++------------------- servers/slapd/slap.h | 20 +-- 4 files changed, 202 insertions(+), 140 deletions(-) diff --git a/servers/slapd/config.c b/servers/slapd/config.c index c7884bca29..5a03edbf9e 100644 --- a/servers/slapd/config.c +++ b/servers/slapd/config.c @@ -240,6 +240,82 @@ read_config( const char *fname, int depth ) ldap_pvt_thread_set_concurrency( c ); + /* set substring initial/final index minimum length */ + } else if ( strcasecmp( cargv[0], "index_substr_if_minlen" ) == 0 ) { + long min; + if ( cargc < 2 ) { + Debug( LDAP_DEBUG_ANY, + "%s: line %d: missing min in \"index_substr_if_minlen \" line\n", + fname, lineno, 0 ); + return( 1 ); + } + min = atoi( cargv[1] ); + if( min < 1 || min > index_substr_if_maxlen ) { + Debug( LDAP_DEBUG_ANY, + "%s: line %d: invalid min value (%ld) in " + "\"index_substr_if_minlen \" line.\n", + fname, lineno, min ); + return( 1 ); + } + index_substr_if_minlen = min; + + /* set substring initial/final index maximum length */ + } else if ( strcasecmp( cargv[0], "index_substr_if_maxlen" ) == 0 ) { + long max; + if ( cargc < 2 ) { + Debug( LDAP_DEBUG_ANY, + "%s: line %d: missing max in \"index_substr_if_maxlen \" line\n", + fname, lineno, 0 ); + return( 1 ); + } + max = atol( cargv[1] ); + if( max < 1 || max < index_substr_if_minlen ) { + Debug( LDAP_DEBUG_ANY, + "%s: line %d: invalid max value (%ld) in " + "\"index_substr_maxlen \" line.\n", + fname, lineno, max ); + return( 1 ); + } + index_substr_if_maxlen = max; + + /* set substring any index len */ + } else if ( strcasecmp( cargv[0], "index_substr_any_len" ) == 0 ) { + long len; + if ( cargc < 2 ) { + Debug( LDAP_DEBUG_ANY, + "%s: line %d: missing len in \"index_substr_any_len \" line\n", + fname, lineno, 0 ); + return( 1 ); + } + len = atol( cargv[1] ); + if( len < 1 ) { + Debug( LDAP_DEBUG_ANY, + "%s: line %d: invalid len value (%ld) in " + "\"index_substr_any_len \" line.\n", + fname, lineno, len ); + return( 1 ); + } + index_substr_any_len = len; + + /* set substring any index step */ + } else if ( strcasecmp( cargv[0], "index_substr_any_step" ) == 0 ) { + long step; + if ( cargc < 2 ) { + Debug( LDAP_DEBUG_ANY, + "%s: line %d: missing step in \"index_substr_any_step \" line\n", + fname, lineno, 0 ); + return( 1 ); + } + step = atol( cargv[1] ); + if( step < 1 ) { + Debug( LDAP_DEBUG_ANY, + "%s: line %d: invalid step value (%ld) in " + "\"index_substr_any_step \" line.\n", + fname, lineno, step ); + return( 1 ); + } + index_substr_any_step = step; + /* set sockbuf max */ } else if ( strcasecmp( cargv[0], "sockbuf_max_incoming" ) == 0 ) { long max; diff --git a/servers/slapd/proto-slap.h b/servers/slapd/proto-slap.h index 2d7d4f2a44..244d4ccd86 100644 --- a/servers/slapd/proto-slap.h +++ b/servers/slapd/proto-slap.h @@ -1310,6 +1310,11 @@ LDAP_SLAPD_F (int) value_add_one LDAP_P(( */ LDAP_SLAPD_V(unsigned) num_subordinates; +LDAP_SLAPD_V (unsigned int) index_substr_if_minlen; +LDAP_SLAPD_V (unsigned int) index_substr_if_maxlen; +LDAP_SLAPD_V (unsigned int) index_substr_any_len; +LDAP_SLAPD_V (unsigned int) index_substr_any_step; + LDAP_SLAPD_V (ber_len_t) sockbuf_max_incoming; LDAP_SLAPD_V (ber_len_t) sockbuf_max_incoming_auth; LDAP_SLAPD_V (int) slap_conn_max_pending; diff --git a/servers/slapd/schema_init.c b/servers/slapd/schema_init.c index 4390130435..cb2e88b800 100644 --- a/servers/slapd/schema_init.c +++ b/servers/slapd/schema_init.c @@ -59,6 +59,11 @@ #define IA5StringApproxIndexer approxIndexer #define IA5StringApproxFilter approxFilter +unsigned int index_substr_if_minlen = SLAP_INDEX_SUBSTR_IF_MINLEN_DEFAULT; +unsigned int index_substr_if_maxlen = SLAP_INDEX_SUBSTR_IF_MAXLEN_DEFAULT; +unsigned int index_substr_any_len = SLAP_INDEX_SUBSTR_ANY_LEN_DEFAULT; +unsigned int index_substr_any_step = SLAP_INDEX_SUBSTR_ANY_STEP_DEFAULT; + static int inValidate( Syntax *syntax, @@ -148,6 +153,29 @@ octetStringOrderingMatch( return LDAP_SUCCESS; } +hashDigestify( + HASH_CONTEXT *HASHcontext, + unsigned char *HASHdigest, + struct berval *prefix, + char pre, + Syntax *syntax, + MatchingRule *mr, + unsigned char *value, + int value_len) +{ + HASH_Init(HASHcontext); + if(prefix && prefix->bv_len > 0) { + HASH_Update(HASHcontext, + (unsigned char *)prefix->bv_val, prefix->bv_len); + } + if(pre) HASH_Update(HASHcontext, (unsigned char*)&pre, sizeof(pre)); + HASH_Update(HASHcontext, (unsigned char*)syntax->ssyn_oid, syntax->ssyn_oidlen); + HASH_Update(HASHcontext, (unsigned char*)mr->smr_oid, mr->smr_oidlen); + HASH_Update(HASHcontext, value, value_len); + HASH_Final(HASHdigest, HASHcontext); + return; +} + /* Index generation function */ int octetStringIndexer( slap_mask_t use, @@ -180,6 +208,7 @@ int octetStringIndexer( slen = syntax->ssyn_oidlen; mlen = mr->smr_oidlen; + /* XXX this invocation does not like hashDigestify() */ for( i=0; values[i].bv_val != NULL; i++ ) { HASH_Init( &HASHcontext ); if( prefix != NULL && prefix->bv_len > 0 ) { @@ -389,7 +418,7 @@ octetStringSubstringsIndexer( BerVarray *keysp, void *ctx ) { - ber_len_t i, j, nkeys; + ber_len_t i, j, len, nkeys; size_t slen, mlen; BerVarray keys; @@ -404,26 +433,26 @@ octetStringSubstringsIndexer( for( i=0; values[i].bv_val != NULL; i++ ) { /* count number of indices to generate */ if( flags & SLAP_INDEX_SUBSTR_INITIAL ) { - if( values[i].bv_len >= SLAP_INDEX_SUBSTR_IF_MAXLEN ) { - nkeys += SLAP_INDEX_SUBSTR_IF_MAXLEN - - (SLAP_INDEX_SUBSTR_IF_MINLEN - 1); - } else if( values[i].bv_len >= SLAP_INDEX_SUBSTR_IF_MINLEN ) { - nkeys += values[i].bv_len - (SLAP_INDEX_SUBSTR_IF_MINLEN - 1); + if( values[i].bv_len >= index_substr_if_maxlen ) { + nkeys += index_substr_if_maxlen - + (index_substr_if_minlen - 1); + } else if( values[i].bv_len >= index_substr_if_minlen ) { + nkeys += values[i].bv_len - (index_substr_if_minlen - 1); } } if( flags & SLAP_INDEX_SUBSTR_ANY ) { - if( values[i].bv_len >= SLAP_INDEX_SUBSTR_ANY_LEN ) { - nkeys += values[i].bv_len - (SLAP_INDEX_SUBSTR_ANY_LEN - 1); + if( values[i].bv_len >= index_substr_any_len ) { + nkeys += values[i].bv_len - (index_substr_any_len - 1); } } if( flags & SLAP_INDEX_SUBSTR_FINAL ) { - if( values[i].bv_len >= SLAP_INDEX_SUBSTR_IF_MAXLEN ) { - nkeys += SLAP_INDEX_SUBSTR_IF_MAXLEN - - ( SLAP_INDEX_SUBSTR_IF_MINLEN - 1); - } else if( values[i].bv_len >= SLAP_INDEX_SUBSTR_IF_MINLEN ) { - nkeys += values[i].bv_len - (SLAP_INDEX_SUBSTR_IF_MINLEN - 1); + if( values[i].bv_len >= index_substr_if_maxlen ) { + nkeys += index_substr_if_maxlen - + (index_substr_if_minlen - 1); + } else if( values[i].bv_len >= index_substr_if_minlen ) { + nkeys += values[i].bv_len - (index_substr_if_minlen - 1); } } } @@ -444,79 +473,38 @@ octetStringSubstringsIndexer( ber_len_t j,max; if( ( flags & SLAP_INDEX_SUBSTR_ANY ) && - ( values[i].bv_len >= SLAP_INDEX_SUBSTR_ANY_LEN ) ) + ( values[i].bv_len >= index_substr_any_len ) ) { char pre = SLAP_INDEX_SUBSTR_PREFIX; - max = values[i].bv_len - (SLAP_INDEX_SUBSTR_ANY_LEN - 1); + max = values[i].bv_len - (index_substr_any_len - 1); for( j=0; jbv_len > 0 ) { - HASH_Update( &HASHcontext, - (unsigned char *)prefix->bv_val, prefix->bv_len ); - } - - HASH_Update( &HASHcontext, - (unsigned char *)&pre, sizeof( pre ) ); - HASH_Update( &HASHcontext, - (unsigned char *)syntax->ssyn_oid, slen ); - HASH_Update( &HASHcontext, - (unsigned char *)mr->smr_oid, mlen ); - HASH_Update( &HASHcontext, - (unsigned char *)&values[i].bv_val[j], - SLAP_INDEX_SUBSTR_ANY_LEN ); - HASH_Final( HASHdigest, &HASHcontext ); - + hashDigestify( &HASHcontext, HASHdigest, prefix, pre, + syntax, mr, (unsigned char *)&values[i].bv_val[j], index_substr_any_len); ber_dupbv_x( &keys[nkeys++], &digest, ctx ); } } /* skip if too short */ - if( values[i].bv_len < SLAP_INDEX_SUBSTR_IF_MINLEN ) continue; + if( values[i].bv_len < index_substr_if_minlen ) continue; - max = SLAP_INDEX_SUBSTR_IF_MAXLEN < values[i].bv_len - ? SLAP_INDEX_SUBSTR_IF_MAXLEN : values[i].bv_len; + max = index_substr_if_maxlen < values[i].bv_len + ? index_substr_if_maxlen : values[i].bv_len; - for( j=SLAP_INDEX_SUBSTR_IF_MINLEN; j<=max; j++ ) { + for( j=index_substr_if_minlen; j<=max; j++ ) { char pre; if( flags & SLAP_INDEX_SUBSTR_INITIAL ) { pre = SLAP_INDEX_SUBSTR_INITIAL_PREFIX; - HASH_Init( &HASHcontext ); - if( prefix != NULL && prefix->bv_len > 0 ) { - HASH_Update( &HASHcontext, - (unsigned char *)prefix->bv_val, prefix->bv_len ); - } - HASH_Update( &HASHcontext, - (unsigned char *)&pre, sizeof( pre ) ); - HASH_Update( &HASHcontext, - (unsigned char *)syntax->ssyn_oid, slen ); - HASH_Update( &HASHcontext, - (unsigned char *)mr->smr_oid, mlen ); - HASH_Update( &HASHcontext, - (unsigned char *)values[i].bv_val, j ); - HASH_Final( HASHdigest, &HASHcontext ); - + hashDigestify( &HASHcontext, HASHdigest, prefix, pre, + syntax, mr, (unsigned char *)values[i].bv_val, j ); ber_dupbv_x( &keys[nkeys++], &digest, ctx ); } if( flags & SLAP_INDEX_SUBSTR_FINAL ) { pre = SLAP_INDEX_SUBSTR_FINAL_PREFIX; - HASH_Init( &HASHcontext ); - if( prefix != NULL && prefix->bv_len > 0 ) { - HASH_Update( &HASHcontext, - (unsigned char *)prefix->bv_val, prefix->bv_len ); - } - HASH_Update( &HASHcontext, - (unsigned char *)&pre, sizeof( pre ) ); - HASH_Update( &HASHcontext, - (unsigned char *)syntax->ssyn_oid, slen ); - HASH_Update( &HASHcontext, - (unsigned char *)mr->smr_oid, mlen ); - HASH_Update( &HASHcontext, - (unsigned char *)&values[i].bv_val[values[i].bv_len-j], j ); - HASH_Final( HASHdigest, &HASHcontext ); - + hashDigestify( &HASHcontext, HASHdigest, prefix, pre, + syntax, mr, (unsigned char *)&values[i].bv_val[values[i].bv_len-j], j ); ber_dupbv_x( &keys[nkeys++], &digest, ctx ); } @@ -547,7 +535,7 @@ octetStringSubstringsFilter ( { SubstringsAssertion *sa; char pre; - ber_len_t nkeys = 0; + ber_len_t len, max, nkeys = 0; size_t slen, mlen, klen; BerVarray keys; HASH_CONTEXT HASHcontext; @@ -559,27 +547,37 @@ octetStringSubstringsFilter ( if( flags & SLAP_INDEX_SUBSTR_INITIAL && sa->sa_initial.bv_val != NULL && - sa->sa_initial.bv_len >= SLAP_INDEX_SUBSTR_IF_MINLEN ) + sa->sa_initial.bv_len >= index_substr_if_minlen ) { nkeys++; + if ( sa->sa_initial.bv_len > index_substr_if_maxlen && + ( flags & SLAP_INDEX_SUBSTR_ANY )) + { + nkeys += (sa->sa_initial.bv_len - index_substr_if_maxlen) / index_substr_any_step; + } } if( flags & SLAP_INDEX_SUBSTR_ANY && sa->sa_any != NULL ) { ber_len_t i; for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) { - if( sa->sa_any[i].bv_len >= SLAP_INDEX_SUBSTR_ANY_LEN ) { + if( sa->sa_any[i].bv_len >= index_substr_any_len ) { /* don't bother accounting with stepping */ nkeys += sa->sa_any[i].bv_len - - ( SLAP_INDEX_SUBSTR_ANY_LEN - 1 ); + ( index_substr_any_len - 1 ); } } } if( flags & SLAP_INDEX_SUBSTR_FINAL && sa->sa_final.bv_val != NULL && - sa->sa_final.bv_len >= SLAP_INDEX_SUBSTR_IF_MINLEN ) + sa->sa_final.bv_len >= index_substr_if_minlen ) { nkeys++; + if ( sa->sa_final.bv_len > index_substr_if_maxlen && + ( flags & SLAP_INDEX_SUBSTR_ANY )) + { + nkeys += (sa->sa_final.bv_len - index_substr_if_maxlen) / index_substr_any_step; + } } if( nkeys == 0 ) { @@ -598,63 +596,52 @@ octetStringSubstringsFilter ( if( flags & SLAP_INDEX_SUBSTR_INITIAL && sa->sa_initial.bv_val != NULL && - sa->sa_initial.bv_len >= SLAP_INDEX_SUBSTR_IF_MINLEN ) + sa->sa_initial.bv_len >= index_substr_if_minlen ) { pre = SLAP_INDEX_SUBSTR_INITIAL_PREFIX; value = &sa->sa_initial; - klen = SLAP_INDEX_SUBSTR_IF_MAXLEN < value->bv_len - ? SLAP_INDEX_SUBSTR_IF_MAXLEN : value->bv_len; - - HASH_Init( &HASHcontext ); - if( prefix != NULL && prefix->bv_len > 0 ) { - HASH_Update( &HASHcontext, - (unsigned char *)prefix->bv_val, prefix->bv_len ); - } - HASH_Update( &HASHcontext, - (unsigned char *)&pre, sizeof( pre ) ); - HASH_Update( &HASHcontext, - (unsigned char *)syntax->ssyn_oid, slen ); - HASH_Update( &HASHcontext, - (unsigned char *)mr->smr_oid, mlen ); - HASH_Update( &HASHcontext, - (unsigned char *)value->bv_val, klen ); - HASH_Final( HASHdigest, &HASHcontext ); + klen = index_substr_if_maxlen < value->bv_len + ? index_substr_if_maxlen : value->bv_len; + hashDigestify( &HASHcontext, HASHdigest, prefix, pre, + syntax, mr, (unsigned char *)value->bv_val, klen ); ber_dupbv_x( &keys[nkeys++], &digest, ctx ); + + /* If initial is too long and we have subany indexed, use it + * to match the excess... + */ + if (value->bv_len > index_substr_if_maxlen && (flags & SLAP_INDEX_SUBSTR_ANY)) + { + ber_len_t j; + pre = SLAP_INDEX_SUBSTR_PREFIX; + for ( j=index_substr_if_maxlen-1; j <= value->bv_len - index_substr_any_len; j+=index_substr_any_step ) + { + hashDigestify( &HASHcontext, HASHdigest, prefix, pre, + syntax, mr, (unsigned char *)&value->bv_val[j], index_substr_any_len ); + ber_dupbv_x( &keys[nkeys++], &digest, ctx ); + } + } } if( flags & SLAP_INDEX_SUBSTR_ANY && sa->sa_any != NULL ) { ber_len_t i, j; pre = SLAP_INDEX_SUBSTR_PREFIX; - klen = SLAP_INDEX_SUBSTR_ANY_LEN; + klen = index_substr_any_len; for( i=0; sa->sa_any[i].bv_val != NULL; i++ ) { - if( sa->sa_any[i].bv_len < SLAP_INDEX_SUBSTR_ANY_LEN ) { + if( sa->sa_any[i].bv_len < index_substr_any_len ) { continue; } value = &sa->sa_any[i]; for(j=0; - j <= value->bv_len - SLAP_INDEX_SUBSTR_ANY_LEN; - j += SLAP_INDEX_SUBSTR_ANY_STEP ) + j <= value->bv_len - index_substr_any_len; + j += index_substr_any_step ) { - HASH_Init( &HASHcontext ); - if( prefix != NULL && prefix->bv_len > 0 ) { - HASH_Update( &HASHcontext, - (unsigned char *)prefix->bv_val, prefix->bv_len ); - } - HASH_Update( &HASHcontext, - (unsigned char *)&pre, sizeof( pre ) ); - HASH_Update( &HASHcontext, - (unsigned char *)syntax->ssyn_oid, slen ); - HASH_Update( &HASHcontext, - (unsigned char *)mr->smr_oid, mlen ); - HASH_Update( &HASHcontext, - (unsigned char *)&value->bv_val[j], klen ); - HASH_Final( HASHdigest, &HASHcontext ); - + hashDigestify( &HASHcontext, HASHdigest, prefix, pre, + syntax, mr, (unsigned char *)&value->bv_val[j], klen ); ber_dupbv_x( &keys[nkeys++], &digest, ctx ); } } @@ -662,30 +649,32 @@ octetStringSubstringsFilter ( if( flags & SLAP_INDEX_SUBSTR_FINAL && sa->sa_final.bv_val != NULL && - sa->sa_final.bv_len >= SLAP_INDEX_SUBSTR_IF_MINLEN ) + sa->sa_final.bv_len >= index_substr_if_minlen ) { pre = SLAP_INDEX_SUBSTR_FINAL_PREFIX; value = &sa->sa_final; - klen = SLAP_INDEX_SUBSTR_IF_MAXLEN < value->bv_len - ? SLAP_INDEX_SUBSTR_IF_MAXLEN : value->bv_len; - - HASH_Init( &HASHcontext ); - if( prefix != NULL && prefix->bv_len > 0 ) { - HASH_Update( &HASHcontext, - (unsigned char *)prefix->bv_val, prefix->bv_len ); - } - HASH_Update( &HASHcontext, - (unsigned char *)&pre, sizeof( pre ) ); - HASH_Update( &HASHcontext, - (unsigned char *)syntax->ssyn_oid, slen ); - HASH_Update( &HASHcontext, - (unsigned char *)mr->smr_oid, mlen ); - HASH_Update( &HASHcontext, - (unsigned char *)&value->bv_val[value->bv_len-klen], klen ); - HASH_Final( HASHdigest, &HASHcontext ); + klen = index_substr_if_maxlen < value->bv_len + ? index_substr_if_maxlen : value->bv_len; + hashDigestify( &HASHcontext, HASHdigest, prefix, pre, + syntax, mr, (unsigned char *)&value->bv_val[value->bv_len-klen], klen ); ber_dupbv_x( &keys[nkeys++], &digest, ctx ); + + /* If final is too long and we have subany indexed, use it + * to match the excess... + */ + if (value->bv_len > index_substr_if_maxlen && (flags & SLAP_INDEX_SUBSTR_ANY)) + { + ber_len_t j; + pre = SLAP_INDEX_SUBSTR_PREFIX; + for ( j=0; j <= value->bv_len - index_substr_if_maxlen; j+=index_substr_any_step ) + { + hashDigestify( &HASHcontext, HASHdigest, prefix, pre, + syntax, mr, (unsigned char *)&value->bv_val[j], index_substr_any_len ); + ber_dupbv_x( &keys[nkeys++], &digest, ctx ); + } + } } if( nkeys > 0 ) { diff --git a/servers/slapd/slap.h b/servers/slapd/slap.h index 6dd71ad651..4d4bf0ed41 100644 --- a/servers/slapd/slap.h +++ b/servers/slapd/slap.h @@ -230,21 +230,13 @@ typedef struct slap_ssf_set { | SLAP_INDEX_SUBSTR_ANY \ | SLAP_INDEX_SUBSTR_FINAL ) -/* constants for initial/final substrings indices */ -#ifndef SLAP_INDEX_SUBSTR_IF_MINLEN -# define SLAP_INDEX_SUBSTR_IF_MINLEN 2 -#endif -#ifndef SLAP_INDEX_SUBSTR_IF_MAXLEN -# define SLAP_INDEX_SUBSTR_IF_MAXLEN 4 -#endif +/* defaults for initial/final substring indices */ +#define SLAP_INDEX_SUBSTR_IF_MINLEN_DEFAULT 2 +#define SLAP_INDEX_SUBSTR_IF_MAXLEN_DEFAULT 4 -/* constants for any substrings indices */ -#ifndef SLAP_INDEX_SUBSTR_ANY_LEN -# define SLAP_INDEX_SUBSTR_ANY_LEN 4 -#endif -#ifndef SLAP_INDEX_SUBSTR_ANY_STEP -# define SLAP_INDEX_SUBSTR_ANY_STEP 2 -#endif +/* defaults for any substring indices */ +#define SLAP_INDEX_SUBSTR_ANY_LEN_DEFAULT 4 +#define SLAP_INDEX_SUBSTR_ANY_STEP_DEFAULT 2 #define SLAP_INDEX_FLAGS 0xF000UL #define SLAP_INDEX_NOSUBTYPES 0x1000UL /* don't use index w/ subtypes */ -- 2.39.5