From 4e0964e8b477b77ccf50c7bcf8f163253480be2d Mon Sep 17 00:00:00 2001 From: Kurt Zeilenga Date: Sun, 23 Jan 2000 02:39:55 +0000 Subject: [PATCH] Add comments. --- libraries/libldap/utf-8.c | 76 ++++++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/libraries/libldap/utf-8.c b/libraries/libldap/utf-8.c index 2069b591f3..e14ebf84d8 100644 --- a/libraries/libldap/utf-8.c +++ b/libraries/libldap/utf-8.c @@ -1,12 +1,17 @@ /* $OpenLDAP$ */ /* - * Copyright 1998-1999 The OpenLDAP Foundation, All Rights Reserved. + * Copyright 1998-2000 The OpenLDAP Foundation, All Rights Reserved. * COPYING RESTRICTIONS APPLY, see COPYRIGHT file */ /* * Basic UTF-8 routines * + * These routines are "dumb". Though they understand UTF-8, + * they don't grok Unicode. That is, they can push bits, + * but don't have a clue what the bits represent. That's + * good enough for use with the LDAP Client SDK. + * * These routines are not optimized. */ @@ -23,9 +28,13 @@ #include "ldap-int.h" #include "ldap_defaults.h" -#define UTF8_ISASCII(u) ( !((u) & ~0x7f) ) +#define UTF8_ISASCII(u) ( (u) < 0x100 ) #define UCS4_INVALID 0x80000000U +/* + * Basic UTF-8 routines + */ + /* * return the number of bytes required to hold the * NULL-terminated UTF-8 string INCLUDING the @@ -92,6 +101,7 @@ int ldap_utf8_charlen( const char * p ) return 1; } +/* conv UTF-8 to UCS-4, useful for comparisons */ ber_int_t ldap_utf8_to_ucs4( const char * p ) { int len, i; @@ -138,6 +148,7 @@ ber_int_t ldap_utf8_to_ucs4( const char * p ) return c; } +/* conv UCS-4 to UTF-8, not used */ int ldap_ucs4_to_utf8( ber_int_t c, char *buf ) { int len=0; @@ -204,6 +215,11 @@ char* ldap_utf8_prev( const char * p ) return NULL; } +/* + * UTF-8 ctype routines + * Only deals with characters < 0x100 (ie: US-ASCII) + */ + int ldap_utf8_isascii( const char * p ) { unsigned c = * (const unsigned char *) p; @@ -230,6 +246,30 @@ int ldap_utf8_isxdigit( const char * p ) || ( c >= 'a' && c <= 'f' ); } +int ldap_utf8_isspace( const char * p ) +{ + unsigned c = * (const unsigned char *) p; + + if(!UTF8_ISASCII(c)) return 0; + + switch(c) { + case ' ': + case '\t': + case '\n': + case '\r': + case '\v': + case '\f': + return 1; + } + + return 0; +} + +#ifndef UTF8_ALPHA_CTYPE +/* + * These are not needed by the C SDK and are + * not "good enough" for general use. + */ int ldap_utf8_isalpha( const char * p ) { unsigned c = * (const unsigned char *) p; @@ -268,26 +308,11 @@ int ldap_utf8_isupper( const char * p ) return ( c >= 'A' && c <= 'Z' ); } +#endif -int ldap_utf8_isspace( const char * p ) -{ - unsigned c = * (const unsigned char *) p; - - if(!UTF8_ISASCII(c)) return 0; - - switch(c) { - case ' ': - case '\t': - case '\n': - case '\r': - case '\v': - case '\f': - return 1; - } - - return 0; -} - +/* + * get one UTF-8 character + */ char* ldap_utf8_fgetc( FILE *s, char *buf ) { int i; @@ -328,6 +353,12 @@ char* ldap_utf8_fgetc( FILE *s, char *buf ) return buf; } + +/* + * UTF-8 string routines + */ + +/* like strcspn() but returns number of bytes, not characters */ ber_len_t (ldap_utf8_strcspn)( const char *str, const char *set ) { int len; @@ -352,6 +383,7 @@ ber_len_t (ldap_utf8_strcspn)( const char *str, const char *set ) return cstr - str; } +/* like strspn() but returns number of bytes, not characters */ ber_len_t (ldap_utf8_strspn)( const char *str, const char *set ) { int len; @@ -380,6 +412,7 @@ ber_len_t (ldap_utf8_strspn)( const char *str, const char *set ) return cstr - str; } +/* like strpbrk(), replaces strchr() as well */ char *(ldap_utf8_strpbrk)( const char *str, const char *set ) { int len; @@ -404,6 +437,7 @@ char *(ldap_utf8_strpbrk)( const char *str, const char *set ) return NULL; } +/* like strtok_r(), not strtok() */ char *(ldap_utf8_strtok)(char *str, const char *sep, char **last) { char *begin; -- 2.39.5