]> git.sur5r.net Git - openldap/commitdiff
Initial UTF-8 routines.
authorKurt Zeilenga <kurt@openldap.org>
Sat, 22 Jan 2000 01:55:34 +0000 (01:55 +0000)
committerKurt Zeilenga <kurt@openldap.org>
Sat, 22 Jan 2000 01:55:34 +0000 (01:55 +0000)
libraries/libldap/Makefile.in
libraries/libldap/ldap-int.h
libraries/libldap/utf-8.c [new file with mode: 0644]
libraries/libldap_r/Makefile.in

index eb92ea5d18f38b530cb969af1b19d71d2dae8900..826ad0da707ff81839525669b8f674bf7d133076 100644 (file)
@@ -17,7 +17,8 @@ SRCS  = bind.c open.c result.c error.c compare.c search.c \
        getdn.c getentry.c getattr.c getvalues.c addentry.c \
        request.c getdxbyname.c os-ip.c url.c charset.c \
        init.c options.c print.c string.c util-int.c schema.c \
-       charray.c digest.c tls.c dn.c os-local.c dnssrv.c
+       charray.c digest.c tls.c dn.c os-local.c dnssrv.c \
+       utf-8.c
 OBJS   = bind.lo open.lo result.lo error.lo compare.lo search.lo \
        controls.lo messages.lo references.lo extended.lo \
        modify.lo add.lo modrdn.lo delete.lo abandon.lo ufn.lo cache.lo \
@@ -26,7 +27,8 @@ OBJS  = bind.lo open.lo result.lo error.lo compare.lo search.lo \
        getdn.lo getentry.lo getattr.lo getvalues.lo addentry.lo \
        request.lo getdxbyname.lo os-ip.lo url.lo charset.lo \
        init.lo options.lo print.lo string.lo util-int.lo schema.lo \
-       charray.lo digest.lo tls.lo dn.lo os-local.lo dnssrv.lo
+       charray.lo digest.lo tls.lo dn.lo os-local.lo dnssrv.lo \
+       utf-8.lo
 
 LDAP_INCDIR= ../../include       
 LDAP_LIBDIR= ../../libraries
index 9f292a659633860ca07802d41c7828609140551d..69be28ebf88cc3ed56b6410ae57ec6897275773d 100644 (file)
@@ -506,6 +506,24 @@ LIBLDAP_F (int) ldap_8859_to_t61( char **bufp, ber_len_t *buflenp, int free_inpu
 #endif /* LDAP_CHARSET_8859 == LDAP_DEFAULT_CHARSET */
 #endif /* STR_TRANSLATION && LDAP_DEFAULT_CHARSET */
 
+/*
+ * UTF-8 (in utf-8.c)
+ */
+
+LIBLDAP_F (ber_len_t) ldap_utf8_bytes( const char * );
+LIBLDAP_F (ber_len_t) ldap_utf8_chars( const char * );
+LIBLDAP_F (int) ldap_utf8_charlen( const char * );
+
+LIBLDAP_F (char*) ldap_utf8_next(char *);
+LIBLDAP_F (char*) ldap_utf8_prev(char *);
+
+LIBLDAP_F (int) ldap_utf8_isascii( const char * );
+LIBLDAP_F (int) ldap_utf8_isalpa( const char * );
+LIBLDAP_F (int) ldap_utf8_isalnum( const char * );
+LIBLDAP_F (int) ldap_utf8_isdigit( const char * );
+LIBLDAP_F (int) ldap_utf8_isxdigit( const char * );
+LIBLDAP_F (int) ldap_utf8_isspace( const char * );
+
 LDAP_END_DECL
 
 #endif /* _LDAP_INT_H */
diff --git a/libraries/libldap/utf-8.c b/libraries/libldap/utf-8.c
new file mode 100644 (file)
index 0000000..7051b6d
--- /dev/null
@@ -0,0 +1,181 @@
+/* $OpenLDAP$ */
+/*
+ * Copyright 1998-1999 The OpenLDAP Foundation, All Rights Reserved.
+ * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
+ */
+
+/*
+ * Basic UTF-8 routines
+ *
+ * These routines are not optimized.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+
+#include <ac/stdlib.h>
+
+#include <ac/socket.h>
+#include <ac/string.h>
+#include <ac/time.h>
+
+#include "ldap-int.h"
+#include "ldap_defaults.h"
+
+#define UTF8_ISASCII(u)        ( !((u) & ~0x7f) )
+
+ber_len_t ldap_utf8_bytes( const char * p )
+{
+       ber_len_t bytes;
+
+       for( bytes=0; p[bytes] ; bytes++ ) {
+               /* EMPTY */ ;
+       }
+
+       return bytes;
+}
+
+ber_len_t ldap_utf8_chars( const char * p )
+{
+       /* could be optimized */
+       int chars=0;
+       int i=0;
+       unsigned char *u;
+
+       for( i=0; u[i]; i++) {
+               if ( u[i] & 0xC0 != 0x80 ) chars++;
+       }
+
+       return i;
+}
+
+int ldap_utf8_charlen( const char * p )
+{
+       unsigned c;
+
+       if ((c & 0xFE ) == 0xFC) {
+               return 6;
+       }
+       if ((c & 0xFC ) == 0xF8) {
+               return 5;
+       }
+       if ((c & 0xF8 ) == 0xF0) {
+               return 4;
+       }
+       if ((c & 0xF0 ) == 0xE0) {
+               return 3;
+       }
+       if ((c & 0xE0 ) == 0xC0) {
+               return 2;
+       }
+       if ((c & 0x80 ) == 0x80) {
+               /* INVALID */
+               return 0;
+       }
+
+       return 1;
+}
+
+char* ldap_utf8_next( char * p )
+{
+       int len = ldap_utf8_charlen( p );
+
+       return len ? &p[len] : NULL;
+}
+
+char* ldap_utf8_prev( char * p )
+{
+       int i;
+       unsigned char *u = p;
+
+       for( i = -1; i >= -6 ; i-- ) {
+               if ( u[i] & 0xC0 != 0x80 ) return &p[i];
+       }
+
+       return NULL;
+}
+
+int ldap_utf8_isascii( const char * p )
+{
+       unsigned c = * (const unsigned char *) p;
+       return UTF8_ISASCII(c);
+}
+
+int ldap_utf8_isdigit( const char * p )
+{
+       unsigned c = * (const unsigned char *) p;
+
+       if(!UTF8_ISASCII(c)) return 0;
+
+       return c >= '0' && c <= '9';
+}
+
+int ldap_utf8_isxdigit( const char * p )
+{
+       unsigned c = * (const unsigned char *) p;
+
+       if(!UTF8_ISASCII(c)) return 0;
+
+       return ( c >= '0' && c <= '9' )
+               || ( c >= 'A' && c <= 'F' )
+               || ( c >= 'a' && c <= 'f' );
+}
+
+int ldap_utf8_isalpha( const char * p )
+{
+       unsigned c = * (const unsigned char *) p;
+
+       if(!UTF8_ISASCII(c)) return 0;
+
+       return ( c >= 'A' && c <= 'Z' )
+               || ( c >= 'a' && c <= 'z' );
+}
+
+int ldap_utf8_isalnum( const char * p )
+{
+       unsigned c = * (const unsigned char *) p;
+
+       if(!UTF8_ISASCII(c)) return 0;
+
+       return ( c >= '0' && c <= '9' )
+               || ( c >= 'A' && c <= 'Z' )
+               || ( c >= 'a' && c <= 'z' );
+}
+
+int ldap_utf8_islower( const char * p )
+{
+       unsigned c = * (const unsigned char *) p;
+
+       if(!UTF8_ISASCII(c)) return 0;
+
+       return ( c >= 'a' && c <= 'z' );
+}
+
+int ldap_utf8_isupper( const char * p )
+{
+       unsigned c = * (const unsigned char *) p;
+
+       if(!UTF8_ISASCII(c)) return 0;
+
+       return ( c >= 'A' && c <= 'Z' );
+}
+
+int ldap_utf8_isspace( const char * p )
+{
+       unsigned c = * (const unsigned char *) p;
+
+       if(!UTF8_ISASCII(c)) return 0;
+
+       switch(c) {
+       case ' ':
+       case '\t':
+       case '\n':
+       case '\r':
+       case '\v':
+       case '\f':
+               return 1;
+       }
+
+       return 0;
+}
index 43cf54d086c68b1549e0423313333ff22250494f..beddcf0f039000140aac874f0e1c639510d9532e 100644 (file)
@@ -16,7 +16,8 @@ XXSRCS        = apitest.c test.c tmpltest.c extended.c \
        getdn.c getentry.c getattr.c getvalues.c addentry.c \
        request.c getdxbyname.c os-ip.c url.c charset.c \
        init.c options.c print.c string.c util-int.c schema.c \
-       charray.c digest.c tls.c dn.c os-local.c dnssrv.c
+       charray.c digest.c tls.c dn.c os-local.c dnssrv.c \
+       utf-8.c
 SRCS   = thr_posix.c thr_cthreads.c thr_thr.c thr_lwp.c thr_nt.c \
        thr_pth.c thr_sleep.c thr_stub.c rdwr.c
 OBJS   = extended.lo \
@@ -29,7 +30,8 @@ OBJS  = extended.lo \
        init.lo options.lo print.lo string.lo util-int.lo schema.lo \
        thr_posix.lo thr_cthreads.lo thr_thr.lo thr_lwp.lo thr_nt.lo \
        thr_pth.lo thr_sleep.lo thr_stub.lo rdwr.lo \
-       charray.lo digest.lo tls.lo dn.lo os-local.lo dnssrv.lo
+       charray.lo digest.lo tls.lo dn.lo os-local.lo dnssrv.lo \
+       utf-8.lo
 
 LDAP_INCDIR= ../../include       
 LDAP_LIBDIR= ../../libraries