From: Stig Venaas Date: Wed, 25 Jul 2001 21:22:55 +0000 (+0000) Subject: Made approxMatch/Indexer/Filter all do Unicode cannonical normalization X-Git-Tag: LDBM_PRE_GIANT_RWLOCK~1200 X-Git-Url: https://git.sur5r.net/?a=commitdiff_plain;h=92ec77f6dca4824e2cbe44614be2ea7af2e939f7;p=openldap Made approxMatch/Indexer/Filter all do Unicode cannonical normalization followed by stripping of characters with 8th bit set. The normalization is needed to make exact match imply approx match. --- diff --git a/servers/slapd/schema_init.c b/servers/slapd/schema_init.c index c8ee4b9e38..7e14971f02 100644 --- a/servers/slapd/schema_init.c +++ b/servers/slapd/schema_init.c @@ -654,6 +654,30 @@ err: return NULL; } +/* Strip characters with the 8th bit set */ +char * +strip8bitChars( + char *in ) +{ + char *p = in, *q; + + if( in == NULL ) { + return NULL; + } + while( *p ) { + if( *p & 0x80 ) { + q = p; + while( *++q & 0x80 ) { + /* empty */ + } + p = memmove(p, q, strlen(q) + 1); + } else { + p++; + } + } + return in; +} + #if defined(SLAPD_APPROX_MULTISTRING) #if defined(SLAPD_APPROX_INITIALS) @@ -675,10 +699,28 @@ approxMatch( { char *val, *assertv, **values, **words, *c; int i, count, len, nextchunk=0, nextavail=0; + size_t avlen; + /* Yes, this is necessary */ + val = UTF8normalize( value->bv_val, UTF8_NOCASEFOLD ); + if( val == NULL ) { + *matchp = 1; + return LDAP_SUCCESS; + } + strip8bitChars( val ); + + /* Yes, this is necessary */ + assertv = UTF8normalize( ((struct berval *)assertedValue)->bv_val, + UTF8_NOCASEFOLD ); + if( assertv == NULL ) { + free( val ); + *matchp = 1; + return LDAP_SUCCESS; + } + strip8bitChars( assertv ); + avlen = strlen( assertv ); /* Isolate how many words there are */ - val = ch_strdup( value->bv_val ); for( c=val,count=1; *c; c++ ) { c = strpbrk( c, SLAPD_APPROX_DELIMITER ); if ( c == NULL ) break; @@ -694,12 +736,10 @@ approxMatch( values[i] = phonetic(c); } - - /* Work through the asserted value's words, to see if at least some + /* Work through the asserted value's words, to see if at least some of the words are there, in the same order. */ - assertv = ch_strdup( ((struct berval *)assertedValue)->bv_val ); len = 0; - while ( nextchunk < ((struct berval *)assertedValue)->bv_len ) { + while ( nextchunk < avlen ) { len = strcspn( assertv + nextchunk, SLAPD_APPROX_DELIMITER); if( len == 0 ) { nextchunk++; @@ -748,18 +788,17 @@ approxMatch( } /* Cleanup allocs */ - ch_free( assertv ); + free( assertv ); for( i=0; ibv_val, UTF8_NOCASEFOLD ); + strip8bitChars( val ); + /* Isolate how many words there are. There will be a key for each */ - val = ch_strdup( values[j]->bv_val ); for( wordcount=0,c=val; *c; c++) { len = strcspn(c, SLAPD_APPROX_DELIMITER); if( len >= SLAPD_APPROX_WORDLEN ) wordcount++; @@ -804,7 +845,7 @@ approxIndexer( i++; } - ch_free( val ); + free( val ); } keys[keycount] = NULL; *keysp = keys; @@ -827,8 +868,18 @@ approxFilter( int i, count, len; struct berval **keys; + /* Yes, this is necessary */ + val = UTF8normalize( ((struct berval *)assertValue)->bv_val, + UTF8_NOCASEFOLD ); + if( val == NULL ) { + keys = (struct berval **)ch_malloc( sizeof(struct berval *) ); + keys[0] = NULL; + *keysp = keys; + return LDAP_SUCCESS; + } + strip8bitChars( val ); + /* Isolate how many words there are. There will be a key for each */ - val = ch_strdup( ((struct berval *)assertValue)->bv_val ); for( count=0,c=val; *c; c++) { len = strcspn(c, SLAPD_APPROX_DELIMITER); if( len >= SLAPD_APPROX_WORDLEN ) count++; @@ -844,13 +895,11 @@ approxFilter( for( c=val,i=0; ibv_val = phonetic( c ); - keys[i]->bv_len = strlen( keys[i]->bv_val ); + keys[i] = ber_bvstr( phonetic( c ) ); i++; } - ch_free( val ); + free( val ); keys[count] = NULL; *keysp = keys; @@ -872,9 +921,29 @@ approxMatch( void *assertedValue ) { char *vapprox, *avapprox; + char *s, *t; - vapprox = phonetic( value->bv_val ); - avapprox = phonetic( ((struct berval *)assertedValue)->bv_val); + /* Yes, this is necessary */ + s = UTF8normalize( value->bv_val, UTF8_NOCASEFOLD ); + if( s == NULL ) { + *matchp = 1; + return LDAP_SUCCESS; + } + + /* Yes, this is necessary */ + t = UTF8normalize( ((struct berval *)assertedValue)->bv_val, + UTF8_NOCASEFOLD ); + if( t == NULL ) { + free( s ); + *matchp = -1; + return LDAP_SUCCESS; + } + + vapprox = phonetic( strip8bitChars( s ) ); + avapprox = phonetic( strip8bitChars( t ) ); + + free( s ); + free( t ); *matchp = strcmp( vapprox, avapprox ); @@ -896,6 +965,7 @@ approxIndexer( { int i; struct berval **keys; + char *s; for( i=0; values[i] != NULL; i++ ) { /* just count them */ @@ -906,9 +976,12 @@ approxIndexer( /* Copy each value and run it through phonetic() */ for( i=0; values[i] != NULL; i++ ) { - keys[i] = ch_malloc( sizeof( struct berval * ) ); - keys[i]->bv_val = phonetic( values[i]->bv_val ); - keys[i]->bv_len = strlen( keys[i]->bv_val ); + /* Yes, this is necessary */ + s = UTF8normalize( values[i]->bv_val, UTF8_NOCASEFOLD ); + + /* strip 8-bit chars and run through phonetic() */ + keys[i] = ber_bvstr( phonetic( strip8bitChars( s ) ) ); + free( s ); } keys[i] = NULL; @@ -928,14 +1001,21 @@ approxFilter( struct berval ***keysp ) { struct berval **keys; + char *s; keys = (struct berval **)ch_malloc( sizeof( struct berval * ) * 2 ); - /* Copy the value and run it through phonetic() */ - keys[0] = ch_malloc( sizeof( struct berval * ) ); - keys[0]->bv_val = phonetic( ((struct berval *)assertValue)->bv_val ); - keys[0]->bv_len = strlen( keys[0]->bv_val ); - keys[1] = NULL; + /* Yes, this is necessary */ + s = UTF8normalize( ((struct berval *)assertValue)->bv_val, + UTF8_NOCASEFOLD ); + if( s == NULL ) { + keys[0] = NULL; + } else { + /* strip 8-bit chars and run through phonetic() */ + keys[0] = ber_bvstr( phonetic( strip8bitChars( s ) ) ); + free( s ); + keys[1] = NULL; + } *keysp = keys; return LDAP_SUCCESS;