if (!(*p & 0x80))
return 1;
- return ldap_utf8_lentab[*(unsigned char *)p ^ 0x80];
+ return ldap_utf8_lentab[*(const unsigned char *)p ^ 0x80];
}
/*
*/
/* mask of required bits in second octet */
-const char ldap_utf8_mintab[] = {
- 0x20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
- 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
- 0x30, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
- 0x38, 0x80, 0x80, 0x80, 0x3c, 0x80, 0x00, 0x00 };
+#undef c
+#define c const char
+c ldap_utf8_mintab[] = {
+ (c)0x20, (c)0x80, (c)0x80, (c)0x80, (c)0x80, (c)0x80, (c)0x80, (c)0x80,
+ (c)0x80, (c)0x80, (c)0x80, (c)0x80, (c)0x80, (c)0x80, (c)0x80, (c)0x80,
+ (c)0x30, (c)0x80, (c)0x80, (c)0x80, (c)0x80, (c)0x80, (c)0x80, (c)0x80,
+ (c)0x38, (c)0x80, (c)0x80, (c)0x80, (c)0x3c, (c)0x80, (c)0x00, (c)0x00 };
+#undef c
int ldap_utf8_charlen2( const char * p )
{
{
int len=0;
unsigned char* p = buf;
- if(buf == NULL) return 0;
- if ( c < 0 ) {
- /* not a valid Unicode character */
+ /* not a valid Unicode character */
+ if ( c < 0 ) return 0;
+
+ /* Just return length, don't convert */
+ if(buf == NULL) {
+ if( c < 0x80 ) return 1;
+ else if( c < 0x800 ) return 2;
+ else if( c < 0x10000 ) return 3;
+ else if( c < 0x200000 ) return 4;
+ else if( c < 0x4000000 ) return 5;
+ else return 6;
+ }
- } else if( c < 0x80 ) {
+ if( c < 0x80 ) {
p[len++] = c;
} else if( c < 0x800 ) {
p[len++] = 0x80 | ( c & 0x3f );
}
- buf[len] = '\0';
return len;
}
+#define LDAP_UCS_UTF8LEN(c) \
+ c < 0 ? 0 : (c < 0x80 ? 1 : (c < 0x800 ? 2 : (c < 0x10000 ? 3 : \
+ (c < 0x200000 ? 4 : (c < 0x4000000 ? 5 : 6)))))
+
+/* Convert a string to UTF-8 format. The input string is expected to
+ * have characters of 1, 2, or 4 octets (in network byte order)
+ * corresponding to the ASN.1 T61STRING, BMPSTRING, and UNIVERSALSTRING
+ * types respectively. (Here T61STRING just means that there is one
+ * octet per character and characters may use the high bit of the octet.
+ * The characters are assumed to use ISO mappings, no provision is made
+ * for converting from T.61 coding rules to Unicode.)
+ */
+
+int
+ldap_ucs_to_utf8s( struct berval *ucs, int csize, struct berval *utf8s )
+{
+ unsigned char *in, *end;
+ char *ptr;
+ ldap_ucs4_t u;
+ int i, l = 0;
+
+ utf8s->bv_val = NULL;
+ utf8s->bv_len = 0;
+
+ in = (unsigned char *)ucs->bv_val;
+
+ /* Make sure we stop at an even multiple of csize */
+ end = in + ( ucs->bv_len & ~(csize-1) );
+
+ for (; in < end; ) {
+ u = *in++;
+ if (csize > 1) {
+ u <<= 8;
+ u |= *in++;
+ }
+ if (csize > 2) {
+ u <<= 8;
+ u |= *in++;
+ u <<= 8;
+ u |= *in++;
+ }
+ i = LDAP_UCS_UTF8LEN(u);
+ if (i == 0)
+ return LDAP_INVALID_SYNTAX;
+ l += i;
+ }
+
+ utf8s->bv_val = LDAP_MALLOC( l+1 );
+ if (utf8s->bv_val == NULL)
+ return LDAP_NO_MEMORY;
+ utf8s->bv_len = l;
+
+ ptr = utf8s->bv_val;
+ for (in = (unsigned char *)ucs->bv_val; in < end; ) {
+ u = *in++;
+ if (csize > 1) {
+ u <<= 8;
+ u |= *in++;
+ }
+ if (csize > 2) {
+ u <<= 8;
+ u |= *in++;
+ u <<= 8;
+ u |= *in++;
+ }
+ ptr += ldap_x_ucs4_to_utf8(u, ptr);
+ }
+ *ptr = '\0';
+ return LDAP_SUCCESS;
+}
+
/*
* Advance to the next UTF-8 character
*