X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=libraries%2Flibldap%2Futf-8.c;h=6b2ac969e1046d0f8be70daa30daa521fde189eb;hb=2b0819c4a9209784f762ec154ce4721038522a8a;hp=2c0cf19082c92cfc2835a1d5b1c3306f9efeabe5;hpb=42cc5e5333c36b1bd7ab250dac3a1a54ec3c439a;p=openldap diff --git a/libraries/libldap/utf-8.c b/libraries/libldap/utf-8.c index 2c0cf19082..6b2ac969e1 100644 --- a/libraries/libldap/utf-8.c +++ b/libraries/libldap/utf-8.c @@ -25,13 +25,13 @@ #include #include +#include "ldap_utf8.h" + #include "ldap-int.h" #include "ldap_defaults.h" -#undef ISASCII -#define ISASCII(uc) ((uc) < 0x100) -#undef UCS4_INVALID -#define UCS4_INVALID 0x80000000U +#undef LDAP_IS_ASCII +#define LDAP_IS_ASCII(uc) ((uc) < 0x80) /* * Basic UTF-8 routines @@ -39,16 +39,14 @@ /* * return the number of bytes required to hold the - * NULL-terminated UTF-8 string INCLUDING the + * NULL-terminated UTF-8 string NOT INCLUDING the * termination. */ ber_len_t ldap_utf8_bytes( const char * p ) { - ber_len_t bytes = 0; - - if( p == NULL ) return bytes; + ber_len_t bytes; - while( p[bytes++] ) { + for( bytes=0; p[bytes]; bytes++ ) { /* EMPTY */ ; } @@ -111,23 +109,23 @@ int ldap_utf8_charlen( const char * p ) } /* conv UTF-8 to UCS-4, useful for comparisons */ -ber_int_t ldap_utf8_to_ucs4( const char * p ) +ldap_ucs4_t ldap_x_utf8_to_ucs4( const char * p ) { const unsigned char *c = p; - ber_int_t ch; + ldap_ucs4_t ch; int len, i; static unsigned char mask[] = { 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 }; len = LDAP_UTF8_CHARLEN(p); - if( len == 0 ) return UCS4_INVALID; + if( len == 0 ) return LDAP_UCS4_INVALID; ch = c[0] & mask[len]; for(i=1; i < len; i++) { if ((c[i] & 0xc0) != 0x80) { - return UCS4_INVALID; + return LDAP_UCS4_INVALID; } ch <<= 6; @@ -138,7 +136,7 @@ ber_int_t ldap_utf8_to_ucs4( const char * p ) } /* conv UCS-4 to UTF-8, not used */ -int ldap_ucs4_to_utf8( ber_int_t c, char *buf ) +int ldap_x_ucs4_to_utf8( ldap_ucs4_t c, char *buf ) { int len=0; unsigned char* p = buf; @@ -165,7 +163,7 @@ int ldap_ucs4_to_utf8( ber_int_t c, char *buf ) p[len++] = 0x80 | ( (c >> 6) & 0x3f ); p[len++] = 0x80 | ( c & 0x3f ); - } else if( c < 0x400000 ) { + } else if( c < 0x4000000 ) { p[len++] = 0xf8 | ( c >> 24 ); p[len++] = 0x80 | ( (c >> 18) & 0x3f ); p[len++] = 0x80 | ( (c >> 12) & 0x3f ); @@ -204,7 +202,7 @@ char* ldap_utf8_next( const char * p ) } for( i=1; i<6; i++ ) { - if ( u[i] & 0xc0 != 0x80 ) { + if ( ( u[i] & 0xc0 ) != 0x80 ) { return (char *) &p[i]; } } @@ -227,7 +225,7 @@ char* ldap_utf8_prev( const char * p ) const unsigned char *u = p; for( i=-1; i>-6 ; i-- ) { - if ( u[i] & 0xc0 != 0x80 ) { + if ( ( u[i] & 0xc0 ) != 0x80 ) { return (char *) &p[i]; } } @@ -257,7 +255,7 @@ int ldap_utf8_copy( char* dst, const char *src ) } for( i=1; i<6; i++ ) { - if ( u[i] & 0xc0 != 0x80 ) { + if ( ( u[i] & 0xc0 ) != 0x80 ) { return i; } dst[i] = src[i]; @@ -268,20 +266,20 @@ int ldap_utf8_copy( char* dst, const char *src ) /* * UTF-8 ctype routines - * Only deals with characters < 0x100 (ie: US-ASCII) + * Only deals with characters < 0x80 (ie: US-ASCII) */ int ldap_utf8_isascii( const char * p ) { unsigned c = * (const unsigned char *) p; - return ISASCII(c); + return LDAP_IS_ASCII(c); } int ldap_utf8_isdigit( const char * p ) { unsigned c = * (const unsigned char *) p; - if(!ISASCII(c)) return 0; + if(!LDAP_IS_ASCII(c)) return 0; return c >= '0' && c <= '9'; } @@ -290,7 +288,7 @@ int ldap_utf8_isxdigit( const char * p ) { unsigned c = * (const unsigned char *) p; - if(!ISASCII(c)) return 0; + if(!LDAP_IS_ASCII(c)) return 0; return ( c >= '0' && c <= '9' ) || ( c >= 'A' && c <= 'F' ) @@ -301,7 +299,7 @@ int ldap_utf8_isspace( const char * p ) { unsigned c = * (const unsigned char *) p; - if(!ISASCII(c)) return 0; + if(!LDAP_IS_ASCII(c)) return 0; switch(c) { case ' ': @@ -325,7 +323,7 @@ int ldap_utf8_isalpha( const char * p ) { unsigned c = * (const unsigned char *) p; - if(!ISASCII(c)) return 0; + if(!LDAP_IS_ASCII(c)) return 0; return ( c >= 'A' && c <= 'Z' ) || ( c >= 'a' && c <= 'z' ); @@ -335,7 +333,7 @@ int ldap_utf8_isalnum( const char * p ) { unsigned c = * (const unsigned char *) p; - if(!ISASCII(c)) return 0; + if(!LDAP_IS_ASCII(c)) return 0; return ( c >= '0' && c <= '9' ) || ( c >= 'A' && c <= 'Z' ) @@ -346,7 +344,7 @@ int ldap_utf8_islower( const char * p ) { unsigned c = * (const unsigned char *) p; - if(!ISASCII(c)) return 0; + if(!LDAP_IS_ASCII(c)) return 0; return ( c >= 'a' && c <= 'z' ); } @@ -355,7 +353,7 @@ int ldap_utf8_isupper( const char * p ) { unsigned c = * (const unsigned char *) p; - if(!ISASCII(c)) return 0; + if(!LDAP_IS_ASCII(c)) return 0; return ( c >= 'A' && c <= 'Z' ); } @@ -370,7 +368,7 @@ int ldap_utf8_isupper( const char * p ) char * (ldap_utf8_strchr)( const char *str, const char *chr ) { for( ; *str != '\0'; LDAP_UTF8_INCR(str) ) { - if( ldap_utf8_to_ucs4( str ) == ldap_utf8_to_ucs4( chr ) ) { + if( ldap_x_utf8_to_ucs4( str ) == ldap_x_utf8_to_ucs4( chr ) ) { return (char *) str; } } @@ -386,7 +384,7 @@ ber_len_t (ldap_utf8_strcspn)( const char *str, const char *set ) for( cstr = str; *cstr != '\0'; LDAP_UTF8_INCR(cstr) ) { for( cset = set; *cset != '\0'; LDAP_UTF8_INCR(cset) ) { - if( ldap_utf8_to_ucs4( cstr ) == ldap_utf8_to_ucs4( cset ) ) { + if( ldap_x_utf8_to_ucs4( cstr ) == ldap_x_utf8_to_ucs4( cset ) ) { return cstr - str; } } @@ -402,13 +400,12 @@ ber_len_t (ldap_utf8_strspn)( const char *str, const char *set ) const char *cset; for( cstr = str; *cstr != '\0'; LDAP_UTF8_INCR(cstr) ) { - for( cset = set; ; LDAP_UTF8_INCR(cset) ) { if( *cset == '\0' ) { return cstr - str; } - if( ldap_utf8_to_ucs4( cstr ) == ldap_utf8_to_ucs4( cset ) ) { + if( ldap_x_utf8_to_ucs4( cstr ) == ldap_x_utf8_to_ucs4( cset ) ) { break; } } @@ -420,13 +417,11 @@ ber_len_t (ldap_utf8_strspn)( const char *str, const char *set ) /* like strpbrk(), replaces strchr() as well */ char *(ldap_utf8_strpbrk)( const char *str, const char *set ) { - int len; - for( ; *str != '\0'; LDAP_UTF8_INCR(str) ) { const char *cset; for( cset = set; *cset != '\0'; LDAP_UTF8_INCR(cset) ) { - if( ldap_utf8_to_ucs4( str ) == ldap_utf8_to_ucs4( cset ) ) { + if( ldap_x_utf8_to_ucs4( str ) == ldap_x_utf8_to_ucs4( cset ) ) { return (char *) str; } }