From: Kurt Zeilenga Date: Sun, 23 Jan 2000 05:35:38 +0000 (+0000) Subject: Add comments to UTF-8 declarations. X-Git-Tag: UCDATA_2_4~13 X-Git-Url: https://git.sur5r.net/?a=commitdiff_plain;h=16c903909cc11ad1f0cb67afa87b15f9a11499cb;p=openldap Add comments to UTF-8 declarations. Add US ASCII optimizations macros. #ifdef out unused routines Ready to hack getdn.c and others to support UTF-8 --- diff --git a/include/ldap_pvt.h b/include/ldap_pvt.h index f1a0ab6aef..8bb43326fe 100644 --- a/include/ldap_pvt.h +++ b/include/ldap_pvt.h @@ -147,25 +147,54 @@ LIBLDAP_F (int) ldap_pvt_tls_start LDAP_P(( Sockbuf *sb, void *ctx_arg )); * UTF-8 (in utf-8.c) */ +/* returns the number of bytes in the UTF-8 string + (counting the NULL) */ LIBLDAP_F (ber_len_t) ldap_utf8_bytes( const char * ); +/* returns the number of UTF-8 characters in the string */ LIBLDAP_F (ber_len_t) ldap_utf8_chars( const char * ); +/* returns the length (in bytes) of a UTF-8 string */ LIBLDAP_F (int) ldap_utf8_charlen( const char * ); +/* copies a UTF-8 character and returning number of bytes copied */ +LIBLDAP_F (int) ldap_utf8_copy( char *, const char *); +/* returns pointer of next UTF-8 character in string */ LIBLDAP_F (char*) ldap_utf8_next( const char * ); +/* returns pointer of previous UTF-8 character in string */ LIBLDAP_F (char*) ldap_utf8_prev( const char * ); +/* primitive ctype routines -- not aware of non-ascii characters */ LIBLDAP_F (int) ldap_utf8_isascii( const char * ); -LIBLDAP_F (int) ldap_utf8_isalpa( const char * ); +LIBLDAP_F (int) ldap_utf8_isalpha( const char * ); LIBLDAP_F (int) ldap_utf8_isalnum( const char * ); LIBLDAP_F (int) ldap_utf8_isdigit( const char * ); LIBLDAP_F (int) ldap_utf8_isxdigit( const char * ); LIBLDAP_F (int) ldap_utf8_isspace( const char * ); +/* span characters not in set, return bytes spanned */ LIBLDAP_F (ber_len_t) ldap_utf8_strcspn( const char* str, const char *set); +/* span characters in set, return bytes spanned */ LIBLDAP_F (ber_len_t) ldap_utf8_strspn( const char* str, const char *set); +/* return first character of set in string */ LIBLDAP_F (char *) ldap_utf8_strpbrk( const char* str, const char *set); +/* reentrant tokenizer */ LIBLDAP_F (char*) ldap_utf8_strtok( char* sp, const char* sep, char **last); +/* Optimizations */ +#define LDAP_UTF8_CHARLEN(p) (*(unsigned char *)(p) < 0x100 \ + ? 1 : ldap_utf8_charlen((p))) + +#define LDAP_UTF8_COPY(p) (*(unsigned char *)(s) < 0x100 \ + ? (*(d) = *(s), 1) : ldap_utf8_cpy((d),(s))) + +#define LDAP_UTF8_NEXT(p) (*(unsigned char *)(p) < 0x100 \ + ? &(p)[1] : ldap_utf8_next((p))) + +#define LDAP_UTF8_INCR(p) (*(unsigned char *)(p) < 0x100 \ + ? ++(p) : (p)=ldap_utf8_next((p))) + +/* For symmetry */ +#define LDAP_UTF8_PREV(p) (ldap_utf8_prev((p))) +#define LDAP_UTF8_DECR(p) ((p)=ldap_utf8_prev((p))) LDAP_END_DECL diff --git a/libraries/libldap/utf-8.c b/libraries/libldap/utf-8.c index e14ebf84d8..29e96a18a8 100644 --- a/libraries/libldap/utf-8.c +++ b/libraries/libldap/utf-8.c @@ -308,7 +308,6 @@ int ldap_utf8_isupper( const char * p ) return ( c >= 'A' && c <= 'Z' ); } -#endif /* * get one UTF-8 character @@ -352,6 +351,7 @@ char* ldap_utf8_fgetc( FILE *s, char *buf ) return buf; } +#endif /* @@ -423,7 +423,7 @@ char *(ldap_utf8_strpbrk)( const char *str, const char *set ) for( cset = set; ; cset += len ) { if( ldap_utf8_to_ucs4( cstr ) == ldap_utf8_to_ucs4( cset ) ) { - return cstr; + return (char *) cstr; } len = ldap_utf8_charlen(cset);