X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=libraries%2Flibldap%2Futf-8-conv.c;h=2d2b8c82b9d238383476b0082ca1c7a7c7724414;hb=bfbc6fe4a6e515189962aebba517138d909fbfed;hp=a82ee7b2c0b60160490fd41856abe88188847f7b;hpb=159de0f1359459371c590ac770cc30edbade58d5;p=openldap diff --git a/libraries/libldap/utf-8-conv.c b/libraries/libldap/utf-8-conv.c index a82ee7b2c0..436b43bf97 100644 --- a/libraries/libldap/utf-8-conv.c +++ b/libraries/libldap/utf-8-conv.c @@ -1,7 +1,7 @@ /* $OpenLDAP$ */ /* This work is part of OpenLDAP Software . * - * Copyright 1998-2003 The OpenLDAP Foundation. + * Copyright 1998-2014 The OpenLDAP Foundation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,6 +40,9 @@ #include "portable.h" +#if SIZEOF_WCHAR_T >= 4 +/* These routines assume ( sizeof(wchar_t) >= 4 ) */ + #include #include /* For wctomb, wcstombs, mbtowc, mbstowcs */ #include @@ -76,8 +79,8 @@ ASCII chars 7 bits Unicode address space (0 - 0x10FFFF) 21 bits ISO-10646 address space (0 - 0x7FFFFFFF) 31 bits -Note: This code does not prevent UTF-8 sequences which are longer than - necessary from being decoded. +Note: This code does not prevent UTF-8 sequences which are longer than + necessary from being decoded. */ /*----------------------------------------------------------------------------- @@ -90,31 +93,25 @@ ldap_x_utf8_to_wc ( wchar_t *wchar, const char *utf8char ) int utflen, i; wchar_t ch; - /* If input ptr is NULL, treat it as empty string. */ - if (utf8char == NULL) - utf8char = ""; + if (utf8char == NULL) return -1; /* Get UTF-8 sequence length from 1st byte */ utflen = LDAP_UTF8_CHARLEN2(utf8char, utflen); - if( utflen==0 || utflen > (int)LDAP_MAX_UTF8_LEN ) - return -1; /* Invalid input */ + if( utflen==0 || utflen > (int)LDAP_MAX_UTF8_LEN ) return -1; /* First byte minus length tag */ ch = (wchar_t)(utf8char[0] & mask[utflen]); - for(i=1; i < utflen; i++) - { + for(i=1; i < utflen; i++) { /* Subsequent bytes must start with 10 */ - if ((utf8char[i] & 0xc0) != 0x80) - return -1; + if ((utf8char[i] & 0xc0) != 0x80) return -1; ch <<= 6; /* 6 bits of data in each subsequent byte */ ch |= (wchar_t)(utf8char[i] & 0x3f); } - if (wchar) - *wchar = ch; + if (wchar) *wchar = ch; return utflen; } @@ -132,42 +129,39 @@ ldap_x_utf8s_to_wcs ( wchar_t *wcstr, const char *utf8str, size_t count ) wchar_t ch; - /* If input ptr is NULL, treat it as empty string. */ - if (utf8str == NULL) - utf8str = ""; + /* If input ptr is NULL or empty... */ + if (utf8str == NULL || !*utf8str) { + if ( wcstr ) + *wcstr = 0; + return 0; + } /* Examine next UTF-8 character. If output buffer is NULL, ignore count */ - while ( *utf8str && (wcstr==NULL || wclen (int)LDAP_MAX_UTF8_LEN ) - return -1; /* Invalid input */ + if( utflen==0 || utflen > (int)LDAP_MAX_UTF8_LEN ) return -1; /* First byte minus length tag */ ch = (wchar_t)(utf8str[0] & mask[utflen]); - for(i=1; i < utflen; i++) - { + for(i=1; i < utflen; i++) { /* Subsequent bytes must start with 10 */ - if ((utf8str[i] & 0xc0) != 0x80) - return -1; + if ((utf8str[i] & 0xc0) != 0x80) return -1; ch <<= 6; /* 6 bits of data in each subsequent byte */ ch |= (wchar_t)(utf8str[i] & 0x3f); } - if (wcstr) - wcstr[wclen] = ch; + if (wcstr) wcstr[wclen] = ch; - utf8str += utflen; /* Move to next UTF-8 character */ - wclen++; /* Count number of wide chars stored/required */ + utf8str += utflen; /* Move to next UTF-8 character */ + wclen++; /* Count number of wide chars stored/required */ } /* Add null terminator if there's room in the buffer. */ - if (wcstr && wclen < count) - wcstr[wclen] = 0; + if (wcstr && wclen < count) wcstr[wclen] = 0; return wclen; } @@ -197,7 +191,10 @@ ldap_x_wc_to_utf8 ( char *utf8char, wchar_t wchar, size_t count ) return 4; if( wchar < 0x4000000 ) return 5; - if( wchar < 0x80000000 ) +#if SIZEOF_WCHAR_T > 4 + /* UL is not strictly needed by ANSI C */ + if( wchar < (wchar_t)0x80000000UL ) +#endif /* SIZEOF_WCHAR_T > 4 */ return 6; return -1; } @@ -241,7 +238,12 @@ ldap_x_wc_to_utf8 ( char *utf8char, wchar_t wchar, size_t count ) utf8char[len++] = 0x80 | ( wchar & 0x3f ); } - } else if( wchar < 0x80000000 ) { + } else +#if SIZEOF_WCHAR_T > 4 + /* UL is not strictly needed by ANSI C */ + if( wchar < (wchar_t)0x80000000UL ) +#endif /* SIZEOF_WCHAR_T > 4 */ + { if (count >= 6) { utf8char[len++] = 0xfc | ( wchar >> 30 ); utf8char[len++] = 0x80 | ( (wchar >> 24) & 0x3f ); @@ -251,8 +253,11 @@ ldap_x_wc_to_utf8 ( char *utf8char, wchar_t wchar, size_t count ) utf8char[len++] = 0x80 | ( wchar & 0x3f ); } - } else +#if SIZEOF_WCHAR_T > 4 + } else { len = -1; +#endif /* SIZEOF_WCHAR_T > 4 */ + } return len; @@ -323,6 +328,10 @@ ldap_x_wcs_to_utf8s ( char *utf8str, const wchar_t *wcstr, size_t count ) return (p - utf8str); } +#ifdef ANDROID +int wctomb(char *s, wchar_t wc) { return wcrtomb(s,wc,NULL); } +int mbtowc(wchar_t *pwc, const char *s, size_t n) { return mbrtowc(pwc, s, n, NULL); } +#endif /*----------------------------------------------------------------------------- Convert a UTF-8 character to a MultiByte character. @@ -472,3 +481,5 @@ ldap_x_mbs_to_utf8s ( char *utf8str, const char *mbstr, size_t count, return n; } + +#endif /* SIZEOF_WCHAR_T >= 4 */