From: Stig Venaas Date: Wed, 27 Feb 2002 16:11:24 +0000 (+0000) Subject: Using new UTF8bv* all over, getting rid of UTF8normalize() and X-Git-Tag: OPENLDAP_REL_ENG_2_MP~389 X-Git-Url: https://git.sur5r.net/?a=commitdiff_plain;h=c476878fdee7d1273727f4e4ecf7cc5dd5605045;p=openldap Using new UTF8bv* all over, getting rid of UTF8normalize() and UTF8normcmp(). --- diff --git a/include/ldap_pvt_uc.h b/include/ldap_pvt_uc.h index ba20d28fab..1cc84620eb 100644 --- a/include/ldap_pvt_uc.h +++ b/include/ldap_pvt_uc.h @@ -143,20 +143,11 @@ LDAP_LUNICODE_F(void) ucstr2upper( #define LDAP_UTF8_ARG2NFC 0x4U #define LDAP_UTF8_APPROX 0x8U -LDAP_LUNICODE_F(char *) UTF8normalize( - struct berval *, - unsigned ); - LDAP_LUNICODE_F(struct berval *) UTF8bvnormalize( struct berval *, struct berval *, unsigned ); -LDAP_LUNICODE_F(int) UTF8normcmp( - const char *, - const char *, - unsigned ); - LDAP_LUNICODE_F(int) UTF8bvnormcmp( struct berval *, struct berval *, diff --git a/libraries/liblunicode/ucstr.c b/libraries/liblunicode/ucstr.c index fa45868ebd..b5a98151d0 100644 --- a/libraries/liblunicode/ucstr.c +++ b/libraries/liblunicode/ucstr.c @@ -92,156 +92,6 @@ void ucstr2upper( } } -char * UTF8normalize( - struct berval *bv, - unsigned casefold ) -{ - int i, j, len, clen, outpos, ucsoutlen, outsize, last; - char *out, *s; - unsigned long *ucs, *p, *ucsout; - - static unsigned char mask[] = { - 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 }; - - if ( bv == NULL ) { - return NULL; - } - - s = bv->bv_val; - len = bv->bv_len; - - /* See if the string is pure ASCII so we can shortcut */ - for ( i=0; ibv_val, len ); - } else { - for ( j=0; j 0 ) { - *p = casefold ? TOUPPER( s[i - 1] ) : s[i - 1]; - p++; - } - - /* s[i] is now first non-ascii character */ - for (;;) { - /* s[i] is non-ascii */ - /* convert everything up to next ascii to ucs-4 */ - while ( i < len ) { - clen = LDAP_UTF8_CHARLEN2( s + i, clen ); - if ( clen == 0 ) { - free( ucs ); - free( out ); - return NULL; - } - if ( clen == 1 ) { - /* ascii */ - break; - } - *p = s[i] & mask[clen]; - i++; - for( j = 1; j < clen; j++ ) { - if ( (s[i] & 0xc0) != 0x80 ) { - free( ucs ); - free( out ); - return NULL; - } - *p <<= 6; - *p |= s[i] & 0x3f; - i++; - } - if ( casefold ) { - *p = uctoupper( *p ); - } - p++; - } - /* normalize ucs of length p - ucs */ - uccanondecomp( ucs, p - ucs, &ucsout, &ucsoutlen ); - ucsoutlen = uccanoncomp( ucsout, ucsoutlen ); - /* convert ucs to utf-8 and store in out */ - for ( j = 0; j < ucsoutlen; j++ ) { - /* allocate more space if not enough room for - 6 bytes and terminator */ - if ( outsize - outpos < 7 ) { - outsize = ucsoutlen - j + outpos + 6; - out = (char *) realloc( out, outsize ); - if ( out == NULL ) { - free( ucs ); - return NULL; - } - } - outpos += ldap_x_ucs4_to_utf8( ucsout[j], &out[outpos] ); - } - - if ( i == len ) { - break; - } - - last = i; - - /* s[i] is ascii */ - /* finish off everything up to char before next non-ascii */ - for ( i++; (i < len) && LDAP_UTF8_ISASCII(s + i); i++ ) { - out[outpos++] = casefold ? TOUPPER( s[i-1] ) : s[i-1]; - } - if ( i == len ) { - out[outpos++] = casefold ? TOUPPER( s[len - 1] ) : s[len - 1]; - break; - } - - /* convert character before next non-ascii to ucs-4 */ - *ucs = casefold ? TOUPPER( s[i - 1] ) : s[i - 1]; - p = ucs + 1; - } - free( ucs ); - out[outpos] = '\0'; - return out; -} - struct berval * UTF8bvnormalize( struct berval *bv, struct berval *newbv, @@ -412,102 +262,6 @@ struct berval * UTF8bvnormalize( return ber_str2bv( out, outpos, 0, newbv ); } -/* compare UTF8-strings, optionally ignore casing, string pointers must not be NULL */ -/* slow, should be optimized */ -int UTF8normcmp( - const char *s1, - const char *s2, - unsigned casefold ) -{ - int i, l1, l2, len, ulen, res; - unsigned long *ucs, *ucsout1, *ucsout2; - - l1 = strlen( s1 ); - l2 = strlen( s2 ); - - if ( ( l1 == 0 ) || ( l2 == 0 ) ) { - if ( l1 == l2 ) { - return 0; - } - return *s1 - *s2 > 0 ? 1 : -1; - } - - /* See if we can get away with a straight ASCII compare */ - len = (l1 < l2) ? l1 : l2; - for ( i = 0; i l2 ? l1 : l2 ) * sizeof(*ucs) ); - if ( ucs == NULL ) { - return l1 > l2 ? 1 : -1; /* what to do??? */ - } - - /* - * XXYYZ: we convert to ucs4 even though -llunicode - * expects ucs2 in an unsigned long - */ - - /* convert and normalize 1st string */ - for ( i = 0, ulen = 0; i < l1; i += len, ulen++ ) { - ucs[ulen] = ldap_x_utf8_to_ucs4( s1 + i ); - if ( ucs[ulen] == LDAP_UCS4_INVALID ) { - free( ucs ); - return -1; /* what to do??? */ - } - len = LDAP_UTF8_CHARLEN( s1 + i ); - } - uccanondecomp( ucs, ulen, &ucsout1, &l1 ); - l1 = uccanoncomp( ucsout1, l1 ); - - /* convert and normalize 2nd string */ - for ( i = 0, ulen = 0; i < l2; i += len, ulen++ ) { - ucs[ulen] = ldap_x_utf8_to_ucs4( s2 + i ); - if ( ucs[ulen] == LDAP_UCS4_INVALID ) { - free( ucsout1 ); - free( ucs ); - return 1; /* what to do??? */ - } - len = LDAP_UTF8_CHARLEN( s2 + i ); - } - uccanondecomp( ucs, ulen, &ucsout2, &l2 ); - l2 = uccanoncomp( ucsout2, l2 ); - - free( ucs ); - - res = casefold - ? ucstrncasecmp( ucsout1, ucsout2, l1 < l2 ? l1 : l2 ) - : ucstrncmp( ucsout1, ucsout2, l1 < l2 ? l1 : l2 ); - free( ucsout1 ); - free( ucsout2 ); - - if ( res != 0 ) { - return res; - } - if ( l1 == l2 ) { - return 0; - } - return l1 > l2 ? 1 : -1; -} - /* compare UTF8-strings, optionally ignore casing */ /* slow, should be optimized */ int UTF8bvnormcmp( diff --git a/servers/slapd/schema_init.c b/servers/slapd/schema_init.c index 40df017554..b5ce9113f6 100644 --- a/servers/slapd/schema_init.c +++ b/servers/slapd/schema_init.c @@ -776,16 +776,17 @@ approxIndexer( { char *c; int i,j, len, wordcount, keycount=0; - struct berval *val, *newkeys; + struct berval *newkeys; BerVarray keys=NULL; for( j=0; values[j].bv_val != NULL; j++ ) { + struct berval val = { 0, NULL }; /* Yes, this is necessary */ - val = UTF8bvnormalize( &values[j], NULL, LDAP_UTF8_APPROX ); - assert( val != NULL && val->bv_val != NULL ); + UTF8bvnormalize( &values[j], &val, LDAP_UTF8_APPROX ); + assert( val.bv_val != NULL ); /* Isolate how many words there are. There will be a key for each */ - for( wordcount = 0, c = val->bv_val; *c; c++) { + for( wordcount = 0, c = val.bv_val; *c; c++) { len = strcspn(c, SLAPD_APPROX_DELIMITER); if( len >= SLAPD_APPROX_WORDLEN ) wordcount++; c+= len; @@ -801,7 +802,7 @@ approxIndexer( keys = newkeys; /* Get a phonetic copy of each word */ - for( c = val->bv_val, i = 0; i < wordcount; c += len + 1 ) { + for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) { len = strlen( c ); if( len < SLAPD_APPROX_WORDLEN ) continue; ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] ); @@ -809,7 +810,7 @@ approxIndexer( i++; } - ber_bvfree( val ); + ber_memfree( val.bv_val ); } keys[keycount].bv_val = NULL; *keysp = keys; @@ -997,9 +998,7 @@ caseExactMatch( struct berval *value, void *assertedValue ) { - *matchp = UTF8normcmp( value->bv_val, - ((struct berval *) assertedValue)->bv_val, - LDAP_UTF8_NOCASEFOLD ); + *matchp = UTF8bvnormcmp( value, (struct berval *) assertedValue, LDAP_UTF8_NOCASEFOLD ); return LDAP_SUCCESS; } @@ -1195,8 +1194,7 @@ static int caseExactIgnoreIndexer( for( i=0; values[i].bv_val != NULL; i++ ) { struct berval value; - ber_str2bv( UTF8normalize( &values[i], casefold ), 0, 0, - &value ); + UTF8bvnormalize( &values[i], &value, casefold ); HASH_Init( &HASHcontext ); if( prefix != NULL && prefix->bv_len > 0 ) { @@ -1236,8 +1234,9 @@ static int caseExactIgnoreFilter( BerVarray keys; HASH_CONTEXT HASHcontext; unsigned char HASHdigest[HASH_BYTES]; - struct berval value; + struct berval value = { 0, NULL }; struct berval digest; + digest.bv_val = HASHdigest; digest.bv_len = sizeof(HASHdigest); @@ -1247,8 +1246,7 @@ static int caseExactIgnoreFilter( casefold = strcmp( mr->smr_oid, caseExactMatchOID ) ? LDAP_UTF8_CASEFOLD : LDAP_UTF8_NOCASEFOLD; - ber_str2bv( UTF8normalize( ((struct berval *) assertValue), casefold ), - 0, 0, &value ); + UTF8bvnormalize( (struct berval *) assertValue, &value, casefold ); /* This usually happens if filter contains bad UTF8 */ if( value.bv_val == NULL ) { keys = ch_malloc( sizeof( struct berval ) ); @@ -1316,8 +1314,7 @@ static int caseExactIgnoreSubstringsIndexer( nvalues = ch_malloc( sizeof( struct berval ) * (i+1) ); for( i=0; values[i].bv_val != NULL; i++ ) { - ber_str2bv( UTF8normalize( &values[i], casefold ), - 0, 0, &nvalues[i] ); + UTF8bvnormalize( &values[i], &nvalues[i], casefold ); } nvalues[i].bv_val = NULL; values = nvalues; @@ -1647,9 +1644,7 @@ caseIgnoreMatch( struct berval *value, void *assertedValue ) { - *matchp = UTF8normcmp( value->bv_val, - ((struct berval *) assertedValue)->bv_val, - LDAP_UTF8_CASEFOLD ); + *matchp = UTF8bvnormcmp( value, (struct berval *) assertedValue, LDAP_UTF8_CASEFOLD ); return LDAP_SUCCESS; }