]> git.sur5r.net Git - openldap/commitdiff
Fix bugs in UTF-8 code. Apply to getdn and charray.
authorKurt Zeilenga <kurt@openldap.org>
Sun, 23 Jan 2000 23:07:24 +0000 (23:07 +0000)
committerKurt Zeilenga <kurt@openldap.org>
Sun, 23 Jan 2000 23:07:24 +0000 (23:07 +0000)
include/ldap_pvt.h
libraries/libldap/charray.c
libraries/libldap/getdn.c
libraries/libldap/string.c
libraries/libldap/utf-8.c

index 4509a0f396988ff1509a3509dbc8cec0c9813993..bbfff50d4fa6f7231f69380063aa754db8c9f916 100644 (file)
@@ -77,8 +77,8 @@ ldap_charray_dup LDAP_P(( char **a ));
 
 LIBLDAP_F( char ** )
 ldap_str2charray LDAP_P((
-       char *str,
-       char *brkstr ));
+       const char *str,
+       const char *brkstr ));
 
 /* url.c */
 LIBLDAP_F (void) ldap_pvt_hex_unescape LDAP_P(( char *s ));
@@ -152,7 +152,9 @@ LIBLDAP_F (int) ldap_pvt_tls_start LDAP_P(( Sockbuf *sb, void *ctx_arg ));
 LIBLDAP_F (ber_len_t) ldap_utf8_bytes( const char * );
 /* returns the number of UTF-8 characters in the string */
 LIBLDAP_F (ber_len_t) ldap_utf8_chars( const char * );
-/* returns the length (in bytes) of a UTF-8 string */
+/* returns the length (in bytes) of the UTF-8 character */
+LIBLDAP_F (int) ldap_utf8_offset( const char * );
+/* returns the length (in bytes) indicated by the UTF-8 character */
 LIBLDAP_F (int) ldap_utf8_charlen( const char * );
 /* copies a UTF-8 character and returning number of bytes copied */
 LIBLDAP_F (int) ldap_utf8_copy( char *, const char *);
@@ -174,6 +176,8 @@ LIBLDAP_F (int) ldap_utf8_isspace( const char * );
 LIBLDAP_F (ber_len_t) ldap_utf8_strcspn( const char* str, const char *set);
 /* span characters in set, return bytes spanned */
 LIBLDAP_F (ber_len_t) ldap_utf8_strspn( const char* str, const char *set);
+/* return first occurance of character in string */
+LIBLDAP_F (char *) ldap_utf8_strchr( const char* str, const char *chr);
 /* return first character of set in string */
 LIBLDAP_F (char *) ldap_utf8_strpbrk( const char* str, const char *set);
 /* reentrant tokenizer */
@@ -183,14 +187,16 @@ LIBLDAP_F (char*) ldap_utf8_strtok( char* sp, const char* sep, char **last);
 #define LDAP_UTF8_ISASCII(p) ( * (const unsigned char *) (p) < 0x100 )
 #define LDAP_UTF8_CHARLEN(p) ( LDAP_UTF8_ISASCII(p) \
        ? 1 : ldap_utf8_charlen((p)) )
+#define LDAP_UTF8_OFFSET(p) ( LDAP_UTF8_ISASCII(p) \
+       ? 1 : ldap_utf8_offset((p)) )
 
-#define LDAP_UTF8_COPY(p) (    LDAP_UTF8_ISASCII(p) \
-       ? (*(d) = *(s), 1) : ldap_utf8_cpy((d),(s)) )
+#define LDAP_UTF8_COPY(d,s) (  LDAP_UTF8_ISASCII(s) \
+       ? (*(d) = *(s), 1) : ldap_utf8_copy((d),(s)) )
 
 #define LDAP_UTF8_NEXT(p) (    LDAP_UTF8_ISASCII(p) \
-       ? &(p)[1] : ldap_utf8_next((p)) )
+       ? (char *)(p)+1 : ldap_utf8_next((p)) )
 
-#define LDAP_UTF8_INCR(p) ( (p) = LDAP_UTF8_NEXT(p) )
+#define LDAP_UTF8_INCR(p) ((p) = LDAP_UTF8_NEXT(p))
 
 /* For symmetry */
 #define LDAP_UTF8_PREV(p) (ldap_utf8_prev((p)))
index 6be31524278042e45666f77ed1dec6f84ca0ca09..ff6dd8351cf1e11b9d636155ce325639283d9360 100644 (file)
@@ -165,22 +165,22 @@ ldap_charray_dup( char **a )
 }
 
 char **
-ldap_str2charray( char *str, char *brkstr )
+ldap_str2charray( const char *str_in, const char *brkstr )
 {
        char    **res;
-       char    *s;
+       char    *str, *s;
        char    *lasts;
        int     i;
 
        /* protect the input string from strtok */
-       str = LDAP_STRDUP( str );
+       str = LDAP_STRDUP( str_in );
        if( str == NULL ) {
                return NULL;
        }
 
        i = 1;
        for ( s = str; *s; s++ ) {
-               if ( strchr( brkstr, *s ) != NULL ) {
+               if ( ldap_utf8_strchr( brkstr, s ) != NULL ) {
                        i++;
                }
        }
@@ -194,9 +194,9 @@ ldap_str2charray( char *str, char *brkstr )
 
        i = 0;
 
-       for ( s = ldap_pvt_strtok( str, brkstr, &lasts );
+       for ( s = ldap_utf8_strtok( str, brkstr, &lasts );
                s != NULL;
-               s = ldap_pvt_strtok( NULL, brkstr, &lasts ) )
+               s = ldap_utf8_strtok( NULL, brkstr, &lasts ) )
        {
                res[i] = LDAP_STRDUP( s );
 
index f03be3b455d15a1c7039cc41e20f0bb1882eca6f..01852c6ef89d627b1c900292c286018140290f50 100644 (file)
 
 #include "ldap-int.h"
 
-#define DN_TYPE_LDAP_RDN       0
-#define DN_TYPE_LDAP_DN                1
-#define DN_TYPE_DCE_DN         2
+#define NAME_TYPE_LDAP_RDN     0
+#define NAME_TYPE_LDAP_DN      1
+#define NAME_TYPE_DCE_DN       2
 
-static char **explode_name( const char *name, int notypes, int is_dn );
+static char **explode_name( const char *name, int notypes, int is_type );
 
 char *
 ldap_get_dn( LDAP *ld, LDAPMessage *entry )
@@ -64,27 +64,28 @@ ldap_dn2ufn( LDAP_CONST char *dn )
        }
 
        if ( ldap_is_dns_dn( dn ) ||
-               ( p = strchr( dn, '=' ) ) == NULL )
+               ( p = ldap_utf8_strpbrk( dn, "=" ) ) == NULL )
        {
                return( LDAP_STRDUP( dn ) );
        }
 
-
        ufn = LDAP_STRDUP( ++p );
 
+       if( ufn == NULL ) return NULL;
+
 #define INQUOTE                1
 #define OUTQUOTE       2
        state = OUTQUOTE;
-       for ( p = ufn, r = ufn; *p; p++ ) {
+       for ( p = ufn, r = ufn; *p; LDAP_UTF8_INCR(p) ) {
                switch ( *p ) {
                case '\\':
-                       if ( *++p == '\0' )
-                               p--;
-                       else {
+                       if ( p[1] != '\0' ) {
                                *r++ = '\\';
-                               *r++ = *p;
+                               LDAP_UTF8_COPY(r,++p);
+                               LDAP_UTF8_INCR(r);
                        }
                        break;
+
                case '"':
                        if ( state == INQUOTE )
                                state = OUTQUOTE;
@@ -92,6 +93,7 @@ ldap_dn2ufn( LDAP_CONST char *dn )
                                state = INQUOTE;
                        *r++ = *p;
                        break;
+
                case ';':
                case ',':
                        if ( state == OUTQUOTE )
@@ -99,17 +101,22 @@ ldap_dn2ufn( LDAP_CONST char *dn )
                        else
                                *r++ = *p;
                        break;
+
                case '=':
-                       if ( state == INQUOTE )
+                       if ( state == INQUOTE ) {
                                *r++ = *p;
-                       else {
+                       else {
                                char    *rsave = r;
 
-                               *r-- = '\0';
-                               while ( !isspace( (unsigned char) *r )
+                               *r = '\0';
+                               LDAP_UTF8_DECR( r );
+
+                               while ( !ldap_utf8_isspace( r )
                                        && *r != ';' && *r != ',' && r > ufn )
-                                       r--;
-                               r++;
+                               {
+                                       LDAP_UTF8_DECR( r );
+                               }
+                               LDAP_UTF8_INCR( r );
 
                                if ( strcasecmp( r, "c" )
                                    && strcasecmp( r, "o" )
@@ -122,8 +129,10 @@ ldap_dn2ufn( LDAP_CONST char *dn )
                                }
                        }
                        break;
+
                default:
-                       *r++ = *p;
+                       LDAP_UTF8_COPY(r, p);
+                       LDAP_UTF8_INCR(r);
                        break;
                }
        }
@@ -184,14 +193,14 @@ ldap_explode_dn( LDAP_CONST char *dn, int notypes )
        if ( ldap_is_dns_dn( dn ) ) {
                return( ldap_explode_dns( dn ) );
        }
-       return explode_name( dn, notypes, DN_TYPE_LDAP_DN );
+       return explode_name( dn, notypes, NAME_TYPE_LDAP_DN );
 }
 
 char **
 ldap_explode_rdn( LDAP_CONST char *rdn, int notypes )
 {
        Debug( LDAP_DEBUG_TRACE, "ldap_explode_rdn\n", 0, 0, 0 );
-       return explode_name( rdn, notypes, DN_TYPE_LDAP_RDN );
+       return explode_name( rdn, notypes, NAME_TYPE_LDAP_RDN );
 }
 
 char *
@@ -202,7 +211,7 @@ ldap_dn2dcedn( LDAP_CONST char *dn )
 
        Debug( LDAP_DEBUG_TRACE, "ldap_dn2dcedn\n", 0, 0, 0 );
 
-       rdns = explode_name( dn, 0, DN_TYPE_LDAP_DN );
+       rdns = explode_name( dn, 0, NAME_TYPE_LDAP_DN );
        if ( rdns == NULL ) {
                return NULL;
        }
@@ -240,7 +249,7 @@ ldap_dcedn2dn( LDAP_CONST char *dce )
 
        Debug( LDAP_DEBUG_TRACE, "ldap_dcedn2dn\n", 0, 0, 0 );
 
-       rdns = explode_name( dce, 0, DN_TYPE_DCE_DN );
+       rdns = explode_name( dce, 0, NAME_TYPE_DCE_DN );
        if ( rdns == NULL ) {
                return NULL;
        }
@@ -279,22 +288,35 @@ ldap_dcedn2dn( LDAP_CONST char *dce )
 }
 
 static char **
-explode_name( const char *name, int notypes, int is_dn )
+explode_name( const char *name, int notypes, int is_type )
 {
-       const char *p, *q;
+       const char *p, *q, *rdn;
        char **parts = NULL;
-       int     state, count = 0, endquote, len;
+       int     offset, state, have_equals, count = 0, endquote, len;
+
+       /* safe guard */
+       if(name == NULL) name = "";
+
+       /* skip leading whitespace */
+       while( ldap_utf8_isspace( name )) {
+               LDAP_UTF8_INCR( name );
+       }
 
-       p = name-1;
+       p = rdn = name;
+       offset = 0;
        state = OUTQUOTE;
+       have_equals=0;
 
        do {
+               /* step forward */
+               p += offset;
+               offset = 1;
 
-               ++p;
                switch ( *p ) {
                case '\\':
-                       if ( *++p == '\0' )
-                               p--;
+                       if ( p[1] != '\0' ) {
+                               offset = LDAP_UTF8_OFFSET(++p);
+                       }
                        break;
                case '"':
                        if ( state == INQUOTE )
@@ -302,23 +324,28 @@ explode_name( const char *name, int notypes, int is_dn )
                        else
                                state = INQUOTE;
                        break;
+               case '=':
+                       if( state = OUTQUOTE ) have_equals++;
+                       break;
                case '+':
-                       if (is_dn == DN_TYPE_LDAP_RDN)
+                       if (is_type == NAME_TYPE_LDAP_RDN)
                                goto end_part;
                        break;
                case '/':
-                       if (is_dn == DN_TYPE_DCE_DN)
+                       if (is_type == NAME_TYPE_DCE_DN)
                                goto end_part;
                        break;
                case ';':
                case ',':
-                       if (is_dn == DN_TYPE_LDAP_DN)
+                       if (is_type == NAME_TYPE_LDAP_DN)
                                goto end_part;
                        break;
                case '\0':
                end_part:
                        if ( state == OUTQUOTE ) {
                                ++count;
+                               have_equals=0;
+
                                if ( parts == NULL ) {
                                        if (( parts = (char **)LDAP_MALLOC( 8
                                                 * sizeof( char *))) == NULL )
@@ -329,31 +356,45 @@ explode_name( const char *name, int notypes, int is_dn )
                                                == NULL )
                                                return( NULL );
                                }
+
                                parts[ count ] = NULL;
                                endquote = 0;
+
                                if ( notypes ) {
-                                       for ( q = name;
-                                           q < p && *q != '='; ++q ) {
-                                               ;
+                                       for ( q = rdn; q < p && *q != '='; ++q ) {
+                                               /* EMPTY */;
                                        }
+
                                        if ( q < p ) {
-                                               name = ++q;
+                                               rdn = ++q;
                                        }
-                                       if ( *name == '"' ) {
-                                               ++name;
+
+                                       if ( *rdn == '"' ) {
+                                               ++rdn;
                                        }
                                        
-                                       if ( *(p-1) == '"' ) {
+                                       if ( p[-1] == '"' ) {
                                                endquote = 1;
                                                --p;
                                        }
                                }
 
-                               len = p - name;
+                               len = p - rdn;
+
                                if (( parts[ count-1 ] = (char *)LDAP_CALLOC( 1,
-                                   len + 1 )) != NULL ) {
-                                       SAFEMEMCPY( parts[ count-1 ], name,
-                                           len );
+                                   len + 1 )) != NULL )
+                               {
+                                       SAFEMEMCPY( parts[ count-1 ], rdn, len );
+
+                                       if( !endquote ) {
+                                               /* skip trailing spaces */
+                                               while( len > 0 && ldap_utf8_isspace(
+                                                       &parts[count-1][len-1] ) )
+                                               {
+                                                       --len;
+                                               }
+                                       }
+
                                        parts[ count-1 ][ len ] = '\0';
                                }
 
@@ -365,11 +406,10 @@ explode_name( const char *name, int notypes, int is_dn )
                                if ( endquote == 1 )
                                        p++;
 
-                               name = *p ? p + 1 : p;
-                               while ( isascii( *name ) && isspace( *name ) )
-                                       ++name;
-                       }
-                       break;
+                               rdn = *p ? &p[1] : p;
+                               while ( ldap_utf8_isspace( rdn ) )
+                                       ++rdn;
+                       } break;
                }
        } while ( *p );
 
@@ -380,9 +420,6 @@ explode_name( const char *name, int notypes, int is_dn )
 int
 ldap_is_dns_dn( LDAP_CONST char *dn )
 {
-       return( dn[ 0 ] != '\0'
-               && strchr( dn, '=' ) == NULL
-               && strchr( dn, ',' ) == NULL
-               && strchr( dn, ';' ) == NULL );
+       return dn[ 0 ] != '\0' && ldap_utf8_strpbrk( dn, "=,;" ) == NULL;
 }
 
index cd9994ad0601a6ae96c145bd5ad84f42521e3330..def6fce9faa0aaabd5e75e30c6f4c2f7d141860f 100644 (file)
@@ -4,6 +4,11 @@
  * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
  */
 
+/*
+ * Locale-specific 1-byte character versions
+ * See utf-8.c for UTF-8 versions
+ */
+
 #include "portable.h"
 
 #include <ac/stdlib.h>
index eadf1de1183b19d32eafccb6f3cab9e9abcdc553..2c0cf19082c92cfc2835a1d5b1c3306f9efeabe5 100644 (file)
@@ -60,13 +60,19 @@ ber_len_t ldap_utf8_chars( const char * p )
        /* could be optimized and could check for invalid sequences */
        ber_len_t chars=0;
 
-       for( ; *p ; p=LDAP_UTF8_NEXT(p) ) {
+       for( ; *p ; LDAP_UTF8_INCR(p) ) {
                chars++;
        };
 
        return chars;
 }
 
+/* return offset to next character */
+int ldap_utf8_offset( const char * p )
+{
+       return LDAP_UTF8_NEXT(p) - p;
+}
+
 /*
  * Returns length indicated by first byte.
  *
@@ -111,7 +117,7 @@ ber_int_t ldap_utf8_to_ucs4( const char * p )
     ber_int_t ch;
        int len, i;
        static unsigned char mask[] = {
-               0, 0x7f, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
+               0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
 
        len = LDAP_UTF8_CHARLEN(p);
 
@@ -146,33 +152,33 @@ int ldap_ucs4_to_utf8( ber_int_t c, char *buf )
 
        } else if( c < 0x800 ) {
                p[len++] = 0xc0 | ( c >> 6 );
-               p[len++] = 0x80 | ( c & 0x3F );
+               p[len++] = 0x80 | ( c & 0x3f );
 
        } else if( c < 0x10000 ) {
                p[len++] = 0xe0 | ( c >> 12 );
-               p[len++] = 0x80 | ( (c >> 6) & 0x3F );
-               p[len++] = 0x80 | ( c & 0x3F );
+               p[len++] = 0x80 | ( (c >> 6) & 0x3f );
+               p[len++] = 0x80 | ( c & 0x3f );
 
        } else if( c < 0x200000 ) {
                p[len++] = 0xf0 | ( c >> 18 );
-               p[len++] = 0x80 | ( (c >> 12) & 0x3F );
-               p[len++] = 0x80 | ( (c >> 6) & 0x3F );
-               p[len++] = 0x80 | ( c & 0x3F );
+               p[len++] = 0x80 | ( (c >> 12) & 0x3f );
+               p[len++] = 0x80 | ( (c >> 6) & 0x3f );
+               p[len++] = 0x80 | ( c & 0x3f );
 
        } else if( c < 0x400000 ) {
                p[len++] = 0xf8 | ( c >> 24 );
-               p[len++] = 0x80 | ( (c >> 18) & 0x3F );
-               p[len++] = 0x80 | ( (c >> 12) & 0x3F );
-               p[len++] = 0x80 | ( (c >> 6) & 0x3F );
-               p[len++] = 0x80 | ( c & 0x3F );
+               p[len++] = 0x80 | ( (c >> 18) & 0x3f );
+               p[len++] = 0x80 | ( (c >> 12) & 0x3f );
+               p[len++] = 0x80 | ( (c >> 6) & 0x3f );
+               p[len++] = 0x80 | ( c & 0x3f );
 
        } else /* if( c < 0x80000000 ) */ {
                p[len++] = 0xfc | ( c >> 30 );
-               p[len++] = 0x80 | ( (c >> 24) & 0x3F );
-               p[len++] = 0x80 | ( (c >> 18) & 0x3F );
-               p[len++] = 0x80 | ( (c >> 12) & 0x3F );
-               p[len++] = 0x80 | ( (c >> 6) & 0x3F );
-               p[len++] = 0x80 | ( c & 0x3F );
+               p[len++] = 0x80 | ( (c >> 24) & 0x3f );
+               p[len++] = 0x80 | ( (c >> 18) & 0x3f );
+               p[len++] = 0x80 | ( (c >> 12) & 0x3f );
+               p[len++] = 0x80 | ( (c >> 6) & 0x3f );
+               p[len++] = 0x80 | ( c & 0x3f );
        }
 
        buf[len] = '\0';
@@ -198,7 +204,7 @@ char* ldap_utf8_next( const char * p )
        }
 
        for( i=1; i<6; i++ ) {
-               if ( u[i] & 0xC0 != 0x80 ) {
+               if ( u[i] & 0xc0 != 0x80 ) {
                        return (char *) &p[i];
                }
        }
@@ -221,7 +227,7 @@ char* ldap_utf8_prev( const char * p )
        const unsigned char *u = p;
 
        for( i=-1; i>-6 ; i-- ) {
-               if ( u[i] & 0xC0 != 0x80 ) {
+               if ( u[i] & 0xc0 != 0x80 ) {
                        return (char *) &p[i];
                }
        }
@@ -251,7 +257,7 @@ int ldap_utf8_copy( char* dst, const char *src )
        }
 
        for( i=1; i<6; i++ ) {
-               if ( u[i] & 0xC0 != 0x80 ) {
+               if ( u[i] & 0xc0 != 0x80 ) {
                        return i; 
                }
                dst[i] = src[i];
@@ -340,7 +346,7 @@ int ldap_utf8_islower( const char * p )
 {
        unsigned c = * (const unsigned char *) p;
 
-       if(!UTF8_ISASCII(c)) return 0;
+       if(!ISASCII(c)) return 0;
 
        return ( c >= 'a' && c <= 'z' );
 }
@@ -360,15 +366,26 @@ int ldap_utf8_isupper( const char * p )
  * UTF-8 string routines
  */
 
+/* like strchr() */
+char * (ldap_utf8_strchr)( const char *str, const char *chr )
+{
+       for( ; *str != '\0'; LDAP_UTF8_INCR(str) ) {
+               if( ldap_utf8_to_ucs4( str ) == ldap_utf8_to_ucs4( chr ) ) {
+                       return (char *) str;
+               } 
+       }
+
+       return NULL;
+}
+
 /* like strcspn() but returns number of bytes, not characters */
 ber_len_t (ldap_utf8_strcspn)( const char *str, const char *set )
 {
        const char *cstr;
+       const char *cset;
 
-       for( cstr = str; *cstr != '\0'; cstr = LDAP_UTF8_NEXT(cstr) ) {
-               const char *cset;
-
-               for( cset = set; ; cset = LDAP_UTF8_NEXT(cset) ) {
+       for( cstr = str; *cstr != '\0'; LDAP_UTF8_INCR(cstr) ) {
+               for( cset = set; *cset != '\0'; LDAP_UTF8_INCR(cset) ) {
                        if( ldap_utf8_to_ucs4( cstr ) == ldap_utf8_to_ucs4( cset ) ) {
                                return cstr - str;
                        } 
@@ -382,9 +399,9 @@ ber_len_t (ldap_utf8_strcspn)( const char *str, const char *set )
 ber_len_t (ldap_utf8_strspn)( const char *str, const char *set )
 {
        const char *cstr;
+       const char *cset;
 
        for( cstr = str; *cstr != '\0'; LDAP_UTF8_INCR(cstr) ) {
-               const char *cset;
 
                for( cset = set; ; LDAP_UTF8_INCR(cset) ) {
                        if( *cset == '\0' ) {
@@ -404,14 +421,13 @@ ber_len_t (ldap_utf8_strspn)( const char *str, const char *set )
 char *(ldap_utf8_strpbrk)( const char *str, const char *set )
 {
        int len;
-       const char *cstr;
 
-       for( cstr = str; *cstr != '\0'; LDAP_UTF8_INCR(cstr) ) {
+       for( ; *str != '\0'; LDAP_UTF8_INCR(str) ) {
                const char *cset;
 
-               for( cset = set; ; LDAP_UTF8_INCR(cset) ) {
-                       if( ldap_utf8_to_ucs4( cstr ) == ldap_utf8_to_ucs4( cset ) ) {
-                               return (char *) cstr;
+               for( cset = set; *cset != '\0'; LDAP_UTF8_INCR(cset) ) {
+                       if( ldap_utf8_to_ucs4( str ) == ldap_utf8_to_ucs4( cset ) ) {
+                               return (char *) str;
                        } 
                }
        }
@@ -436,7 +452,7 @@ char *(ldap_utf8_strtok)(char *str, const char *sep, char **last)
                return NULL;
        }
 
-       end = &begin[ ldap_utf8_strcpn( begin, sep ) ];
+       end = &begin[ ldap_utf8_strcspn( begin, sep ) ];
 
        if( *end != '\0' ) {
                char *next = LDAP_UTF8_NEXT( end );