From 42cc5e5333c36b1bd7ab250dac3a1a54ec3c439a Mon Sep 17 00:00:00 2001 From: Kurt Zeilenga Date: Sun, 23 Jan 2000 23:07:24 +0000 Subject: [PATCH] Fix bugs in UTF-8 code. Apply to getdn and charray. --- include/ldap_pvt.h | 20 +++-- libraries/libldap/charray.c | 12 +-- libraries/libldap/getdn.c | 141 +++++++++++++++++++++++------------- libraries/libldap/string.c | 5 ++ libraries/libldap/utf-8.c | 80 ++++++++++++-------- 5 files changed, 161 insertions(+), 97 deletions(-) diff --git a/include/ldap_pvt.h b/include/ldap_pvt.h index 4509a0f396..bbfff50d4f 100644 --- a/include/ldap_pvt.h +++ b/include/ldap_pvt.h @@ -77,8 +77,8 @@ ldap_charray_dup LDAP_P(( char **a )); LIBLDAP_F( char ** ) ldap_str2charray LDAP_P(( - char *str, - char *brkstr )); + const char *str, + const char *brkstr )); /* url.c */ LIBLDAP_F (void) ldap_pvt_hex_unescape LDAP_P(( char *s )); @@ -152,7 +152,9 @@ LIBLDAP_F (int) ldap_pvt_tls_start LDAP_P(( Sockbuf *sb, void *ctx_arg )); LIBLDAP_F (ber_len_t) ldap_utf8_bytes( const char * ); /* returns the number of UTF-8 characters in the string */ LIBLDAP_F (ber_len_t) ldap_utf8_chars( const char * ); -/* returns the length (in bytes) of a UTF-8 string */ +/* returns the length (in bytes) of the UTF-8 character */ +LIBLDAP_F (int) ldap_utf8_offset( const char * ); +/* returns the length (in bytes) indicated by the UTF-8 character */ LIBLDAP_F (int) ldap_utf8_charlen( const char * ); /* copies a UTF-8 character and returning number of bytes copied */ LIBLDAP_F (int) ldap_utf8_copy( char *, const char *); @@ -174,6 +176,8 @@ LIBLDAP_F (int) ldap_utf8_isspace( const char * ); LIBLDAP_F (ber_len_t) ldap_utf8_strcspn( const char* str, const char *set); /* span characters in set, return bytes spanned */ LIBLDAP_F (ber_len_t) ldap_utf8_strspn( const char* str, const char *set); +/* return first occurance of character in string */ +LIBLDAP_F (char *) ldap_utf8_strchr( const char* str, const char *chr); /* return first character of set in string */ LIBLDAP_F (char *) ldap_utf8_strpbrk( const char* str, const char *set); /* reentrant tokenizer */ @@ -183,14 +187,16 @@ LIBLDAP_F (char*) ldap_utf8_strtok( char* sp, const char* sep, char **last); #define LDAP_UTF8_ISASCII(p) ( * (const unsigned char *) (p) < 0x100 ) #define LDAP_UTF8_CHARLEN(p) ( LDAP_UTF8_ISASCII(p) \ ? 1 : ldap_utf8_charlen((p)) ) +#define LDAP_UTF8_OFFSET(p) ( LDAP_UTF8_ISASCII(p) \ + ? 1 : ldap_utf8_offset((p)) ) -#define LDAP_UTF8_COPY(p) ( LDAP_UTF8_ISASCII(p) \ - ? (*(d) = *(s), 1) : ldap_utf8_cpy((d),(s)) ) +#define LDAP_UTF8_COPY(d,s) ( LDAP_UTF8_ISASCII(s) \ + ? (*(d) = *(s), 1) : ldap_utf8_copy((d),(s)) ) #define LDAP_UTF8_NEXT(p) ( LDAP_UTF8_ISASCII(p) \ - ? &(p)[1] : ldap_utf8_next((p)) ) + ? (char *)(p)+1 : ldap_utf8_next((p)) ) -#define LDAP_UTF8_INCR(p) ( (p) = LDAP_UTF8_NEXT(p) ) +#define LDAP_UTF8_INCR(p) ((p) = LDAP_UTF8_NEXT(p)) /* For symmetry */ #define LDAP_UTF8_PREV(p) (ldap_utf8_prev((p))) diff --git a/libraries/libldap/charray.c b/libraries/libldap/charray.c index 6be3152427..ff6dd8351c 100644 --- a/libraries/libldap/charray.c +++ b/libraries/libldap/charray.c @@ -165,22 +165,22 @@ ldap_charray_dup( char **a ) } char ** -ldap_str2charray( char *str, char *brkstr ) +ldap_str2charray( const char *str_in, const char *brkstr ) { char **res; - char *s; + char *str, *s; char *lasts; int i; /* protect the input string from strtok */ - str = LDAP_STRDUP( str ); + str = LDAP_STRDUP( str_in ); if( str == NULL ) { return NULL; } i = 1; for ( s = str; *s; s++ ) { - if ( strchr( brkstr, *s ) != NULL ) { + if ( ldap_utf8_strchr( brkstr, s ) != NULL ) { i++; } } @@ -194,9 +194,9 @@ ldap_str2charray( char *str, char *brkstr ) i = 0; - for ( s = ldap_pvt_strtok( str, brkstr, &lasts ); + for ( s = ldap_utf8_strtok( str, brkstr, &lasts ); s != NULL; - s = ldap_pvt_strtok( NULL, brkstr, &lasts ) ) + s = ldap_utf8_strtok( NULL, brkstr, &lasts ) ) { res[i] = LDAP_STRDUP( s ); diff --git a/libraries/libldap/getdn.c b/libraries/libldap/getdn.c index f03be3b455..01852c6ef8 100644 --- a/libraries/libldap/getdn.c +++ b/libraries/libldap/getdn.c @@ -23,11 +23,11 @@ #include "ldap-int.h" -#define DN_TYPE_LDAP_RDN 0 -#define DN_TYPE_LDAP_DN 1 -#define DN_TYPE_DCE_DN 2 +#define NAME_TYPE_LDAP_RDN 0 +#define NAME_TYPE_LDAP_DN 1 +#define NAME_TYPE_DCE_DN 2 -static char **explode_name( const char *name, int notypes, int is_dn ); +static char **explode_name( const char *name, int notypes, int is_type ); char * ldap_get_dn( LDAP *ld, LDAPMessage *entry ) @@ -64,27 +64,28 @@ ldap_dn2ufn( LDAP_CONST char *dn ) } if ( ldap_is_dns_dn( dn ) || - ( p = strchr( dn, '=' ) ) == NULL ) + ( p = ldap_utf8_strpbrk( dn, "=" ) ) == NULL ) { return( LDAP_STRDUP( dn ) ); } - ufn = LDAP_STRDUP( ++p ); + if( ufn == NULL ) return NULL; + #define INQUOTE 1 #define OUTQUOTE 2 state = OUTQUOTE; - for ( p = ufn, r = ufn; *p; p++ ) { + for ( p = ufn, r = ufn; *p; LDAP_UTF8_INCR(p) ) { switch ( *p ) { case '\\': - if ( *++p == '\0' ) - p--; - else { + if ( p[1] != '\0' ) { *r++ = '\\'; - *r++ = *p; + LDAP_UTF8_COPY(r,++p); + LDAP_UTF8_INCR(r); } break; + case '"': if ( state == INQUOTE ) state = OUTQUOTE; @@ -92,6 +93,7 @@ ldap_dn2ufn( LDAP_CONST char *dn ) state = INQUOTE; *r++ = *p; break; + case ';': case ',': if ( state == OUTQUOTE ) @@ -99,17 +101,22 @@ ldap_dn2ufn( LDAP_CONST char *dn ) else *r++ = *p; break; + case '=': - if ( state == INQUOTE ) + if ( state == INQUOTE ) { *r++ = *p; - else { + } else { char *rsave = r; - *r-- = '\0'; - while ( !isspace( (unsigned char) *r ) + *r = '\0'; + LDAP_UTF8_DECR( r ); + + while ( !ldap_utf8_isspace( r ) && *r != ';' && *r != ',' && r > ufn ) - r--; - r++; + { + LDAP_UTF8_DECR( r ); + } + LDAP_UTF8_INCR( r ); if ( strcasecmp( r, "c" ) && strcasecmp( r, "o" ) @@ -122,8 +129,10 @@ ldap_dn2ufn( LDAP_CONST char *dn ) } } break; + default: - *r++ = *p; + LDAP_UTF8_COPY(r, p); + LDAP_UTF8_INCR(r); break; } } @@ -184,14 +193,14 @@ ldap_explode_dn( LDAP_CONST char *dn, int notypes ) if ( ldap_is_dns_dn( dn ) ) { return( ldap_explode_dns( dn ) ); } - return explode_name( dn, notypes, DN_TYPE_LDAP_DN ); + return explode_name( dn, notypes, NAME_TYPE_LDAP_DN ); } char ** ldap_explode_rdn( LDAP_CONST char *rdn, int notypes ) { Debug( LDAP_DEBUG_TRACE, "ldap_explode_rdn\n", 0, 0, 0 ); - return explode_name( rdn, notypes, DN_TYPE_LDAP_RDN ); + return explode_name( rdn, notypes, NAME_TYPE_LDAP_RDN ); } char * @@ -202,7 +211,7 @@ ldap_dn2dcedn( LDAP_CONST char *dn ) Debug( LDAP_DEBUG_TRACE, "ldap_dn2dcedn\n", 0, 0, 0 ); - rdns = explode_name( dn, 0, DN_TYPE_LDAP_DN ); + rdns = explode_name( dn, 0, NAME_TYPE_LDAP_DN ); if ( rdns == NULL ) { return NULL; } @@ -240,7 +249,7 @@ ldap_dcedn2dn( LDAP_CONST char *dce ) Debug( LDAP_DEBUG_TRACE, "ldap_dcedn2dn\n", 0, 0, 0 ); - rdns = explode_name( dce, 0, DN_TYPE_DCE_DN ); + rdns = explode_name( dce, 0, NAME_TYPE_DCE_DN ); if ( rdns == NULL ) { return NULL; } @@ -279,22 +288,35 @@ ldap_dcedn2dn( LDAP_CONST char *dce ) } static char ** -explode_name( const char *name, int notypes, int is_dn ) +explode_name( const char *name, int notypes, int is_type ) { - const char *p, *q; + const char *p, *q, *rdn; char **parts = NULL; - int state, count = 0, endquote, len; + int offset, state, have_equals, count = 0, endquote, len; + + /* safe guard */ + if(name == NULL) name = ""; + + /* skip leading whitespace */ + while( ldap_utf8_isspace( name )) { + LDAP_UTF8_INCR( name ); + } - p = name-1; + p = rdn = name; + offset = 0; state = OUTQUOTE; + have_equals=0; do { + /* step forward */ + p += offset; + offset = 1; - ++p; switch ( *p ) { case '\\': - if ( *++p == '\0' ) - p--; + if ( p[1] != '\0' ) { + offset = LDAP_UTF8_OFFSET(++p); + } break; case '"': if ( state == INQUOTE ) @@ -302,23 +324,28 @@ explode_name( const char *name, int notypes, int is_dn ) else state = INQUOTE; break; + case '=': + if( state = OUTQUOTE ) have_equals++; + break; case '+': - if (is_dn == DN_TYPE_LDAP_RDN) + if (is_type == NAME_TYPE_LDAP_RDN) goto end_part; break; case '/': - if (is_dn == DN_TYPE_DCE_DN) + if (is_type == NAME_TYPE_DCE_DN) goto end_part; break; case ';': case ',': - if (is_dn == DN_TYPE_LDAP_DN) + if (is_type == NAME_TYPE_LDAP_DN) goto end_part; break; case '\0': end_part: if ( state == OUTQUOTE ) { ++count; + have_equals=0; + if ( parts == NULL ) { if (( parts = (char **)LDAP_MALLOC( 8 * sizeof( char *))) == NULL ) @@ -329,31 +356,45 @@ explode_name( const char *name, int notypes, int is_dn ) == NULL ) return( NULL ); } + parts[ count ] = NULL; endquote = 0; + if ( notypes ) { - for ( q = name; - q < p && *q != '='; ++q ) { - ; + for ( q = rdn; q < p && *q != '='; ++q ) { + /* EMPTY */; } + if ( q < p ) { - name = ++q; + rdn = ++q; } - if ( *name == '"' ) { - ++name; + + if ( *rdn == '"' ) { + ++rdn; } - if ( *(p-1) == '"' ) { + if ( p[-1] == '"' ) { endquote = 1; --p; } } - len = p - name; + len = p - rdn; + if (( parts[ count-1 ] = (char *)LDAP_CALLOC( 1, - len + 1 )) != NULL ) { - SAFEMEMCPY( parts[ count-1 ], name, - len ); + len + 1 )) != NULL ) + { + SAFEMEMCPY( parts[ count-1 ], rdn, len ); + + if( !endquote ) { + /* skip trailing spaces */ + while( len > 0 && ldap_utf8_isspace( + &parts[count-1][len-1] ) ) + { + --len; + } + } + parts[ count-1 ][ len ] = '\0'; } @@ -365,11 +406,10 @@ explode_name( const char *name, int notypes, int is_dn ) if ( endquote == 1 ) p++; - name = *p ? p + 1 : p; - while ( isascii( *name ) && isspace( *name ) ) - ++name; - } - break; + rdn = *p ? &p[1] : p; + while ( ldap_utf8_isspace( rdn ) ) + ++rdn; + } break; } } while ( *p ); @@ -380,9 +420,6 @@ explode_name( const char *name, int notypes, int is_dn ) int ldap_is_dns_dn( LDAP_CONST char *dn ) { - return( dn[ 0 ] != '\0' - && strchr( dn, '=' ) == NULL - && strchr( dn, ',' ) == NULL - && strchr( dn, ';' ) == NULL ); + return dn[ 0 ] != '\0' && ldap_utf8_strpbrk( dn, "=,;" ) == NULL; } diff --git a/libraries/libldap/string.c b/libraries/libldap/string.c index cd9994ad06..def6fce9fa 100644 --- a/libraries/libldap/string.c +++ b/libraries/libldap/string.c @@ -4,6 +4,11 @@ * COPYING RESTRICTIONS APPLY, see COPYRIGHT file */ +/* + * Locale-specific 1-byte character versions + * See utf-8.c for UTF-8 versions + */ + #include "portable.h" #include diff --git a/libraries/libldap/utf-8.c b/libraries/libldap/utf-8.c index eadf1de118..2c0cf19082 100644 --- a/libraries/libldap/utf-8.c +++ b/libraries/libldap/utf-8.c @@ -60,13 +60,19 @@ ber_len_t ldap_utf8_chars( const char * p ) /* could be optimized and could check for invalid sequences */ ber_len_t chars=0; - for( ; *p ; p=LDAP_UTF8_NEXT(p) ) { + for( ; *p ; LDAP_UTF8_INCR(p) ) { chars++; }; return chars; } +/* return offset to next character */ +int ldap_utf8_offset( const char * p ) +{ + return LDAP_UTF8_NEXT(p) - p; +} + /* * Returns length indicated by first byte. * @@ -111,7 +117,7 @@ ber_int_t ldap_utf8_to_ucs4( const char * p ) ber_int_t ch; int len, i; static unsigned char mask[] = { - 0, 0x7f, 0x1F, 0x0F, 0x07, 0x03, 0x01 }; + 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 }; len = LDAP_UTF8_CHARLEN(p); @@ -146,33 +152,33 @@ int ldap_ucs4_to_utf8( ber_int_t c, char *buf ) } else if( c < 0x800 ) { p[len++] = 0xc0 | ( c >> 6 ); - p[len++] = 0x80 | ( c & 0x3F ); + p[len++] = 0x80 | ( c & 0x3f ); } else if( c < 0x10000 ) { p[len++] = 0xe0 | ( c >> 12 ); - p[len++] = 0x80 | ( (c >> 6) & 0x3F ); - p[len++] = 0x80 | ( c & 0x3F ); + p[len++] = 0x80 | ( (c >> 6) & 0x3f ); + p[len++] = 0x80 | ( c & 0x3f ); } else if( c < 0x200000 ) { p[len++] = 0xf0 | ( c >> 18 ); - p[len++] = 0x80 | ( (c >> 12) & 0x3F ); - p[len++] = 0x80 | ( (c >> 6) & 0x3F ); - p[len++] = 0x80 | ( c & 0x3F ); + p[len++] = 0x80 | ( (c >> 12) & 0x3f ); + p[len++] = 0x80 | ( (c >> 6) & 0x3f ); + p[len++] = 0x80 | ( c & 0x3f ); } else if( c < 0x400000 ) { p[len++] = 0xf8 | ( c >> 24 ); - p[len++] = 0x80 | ( (c >> 18) & 0x3F ); - p[len++] = 0x80 | ( (c >> 12) & 0x3F ); - p[len++] = 0x80 | ( (c >> 6) & 0x3F ); - p[len++] = 0x80 | ( c & 0x3F ); + p[len++] = 0x80 | ( (c >> 18) & 0x3f ); + p[len++] = 0x80 | ( (c >> 12) & 0x3f ); + p[len++] = 0x80 | ( (c >> 6) & 0x3f ); + p[len++] = 0x80 | ( c & 0x3f ); } else /* if( c < 0x80000000 ) */ { p[len++] = 0xfc | ( c >> 30 ); - p[len++] = 0x80 | ( (c >> 24) & 0x3F ); - p[len++] = 0x80 | ( (c >> 18) & 0x3F ); - p[len++] = 0x80 | ( (c >> 12) & 0x3F ); - p[len++] = 0x80 | ( (c >> 6) & 0x3F ); - p[len++] = 0x80 | ( c & 0x3F ); + p[len++] = 0x80 | ( (c >> 24) & 0x3f ); + p[len++] = 0x80 | ( (c >> 18) & 0x3f ); + p[len++] = 0x80 | ( (c >> 12) & 0x3f ); + p[len++] = 0x80 | ( (c >> 6) & 0x3f ); + p[len++] = 0x80 | ( c & 0x3f ); } buf[len] = '\0'; @@ -198,7 +204,7 @@ char* ldap_utf8_next( const char * p ) } for( i=1; i<6; i++ ) { - if ( u[i] & 0xC0 != 0x80 ) { + if ( u[i] & 0xc0 != 0x80 ) { return (char *) &p[i]; } } @@ -221,7 +227,7 @@ char* ldap_utf8_prev( const char * p ) const unsigned char *u = p; for( i=-1; i>-6 ; i-- ) { - if ( u[i] & 0xC0 != 0x80 ) { + if ( u[i] & 0xc0 != 0x80 ) { return (char *) &p[i]; } } @@ -251,7 +257,7 @@ int ldap_utf8_copy( char* dst, const char *src ) } for( i=1; i<6; i++ ) { - if ( u[i] & 0xC0 != 0x80 ) { + if ( u[i] & 0xc0 != 0x80 ) { return i; } dst[i] = src[i]; @@ -340,7 +346,7 @@ int ldap_utf8_islower( const char * p ) { unsigned c = * (const unsigned char *) p; - if(!UTF8_ISASCII(c)) return 0; + if(!ISASCII(c)) return 0; return ( c >= 'a' && c <= 'z' ); } @@ -360,15 +366,26 @@ int ldap_utf8_isupper( const char * p ) * UTF-8 string routines */ +/* like strchr() */ +char * (ldap_utf8_strchr)( const char *str, const char *chr ) +{ + for( ; *str != '\0'; LDAP_UTF8_INCR(str) ) { + if( ldap_utf8_to_ucs4( str ) == ldap_utf8_to_ucs4( chr ) ) { + return (char *) str; + } + } + + return NULL; +} + /* like strcspn() but returns number of bytes, not characters */ ber_len_t (ldap_utf8_strcspn)( const char *str, const char *set ) { const char *cstr; + const char *cset; - for( cstr = str; *cstr != '\0'; cstr = LDAP_UTF8_NEXT(cstr) ) { - const char *cset; - - for( cset = set; ; cset = LDAP_UTF8_NEXT(cset) ) { + for( cstr = str; *cstr != '\0'; LDAP_UTF8_INCR(cstr) ) { + for( cset = set; *cset != '\0'; LDAP_UTF8_INCR(cset) ) { if( ldap_utf8_to_ucs4( cstr ) == ldap_utf8_to_ucs4( cset ) ) { return cstr - str; } @@ -382,9 +399,9 @@ ber_len_t (ldap_utf8_strcspn)( const char *str, const char *set ) ber_len_t (ldap_utf8_strspn)( const char *str, const char *set ) { const char *cstr; + const char *cset; for( cstr = str; *cstr != '\0'; LDAP_UTF8_INCR(cstr) ) { - const char *cset; for( cset = set; ; LDAP_UTF8_INCR(cset) ) { if( *cset == '\0' ) { @@ -404,14 +421,13 @@ ber_len_t (ldap_utf8_strspn)( const char *str, const char *set ) char *(ldap_utf8_strpbrk)( const char *str, const char *set ) { int len; - const char *cstr; - for( cstr = str; *cstr != '\0'; LDAP_UTF8_INCR(cstr) ) { + for( ; *str != '\0'; LDAP_UTF8_INCR(str) ) { const char *cset; - for( cset = set; ; LDAP_UTF8_INCR(cset) ) { - if( ldap_utf8_to_ucs4( cstr ) == ldap_utf8_to_ucs4( cset ) ) { - return (char *) cstr; + for( cset = set; *cset != '\0'; LDAP_UTF8_INCR(cset) ) { + if( ldap_utf8_to_ucs4( str ) == ldap_utf8_to_ucs4( cset ) ) { + return (char *) str; } } } @@ -436,7 +452,7 @@ char *(ldap_utf8_strtok)(char *str, const char *sep, char **last) return NULL; } - end = &begin[ ldap_utf8_strcpn( begin, sep ) ]; + end = &begin[ ldap_utf8_strcspn( begin, sep ) ]; if( *end != '\0' ) { char *next = LDAP_UTF8_NEXT( end ); -- 2.39.2