+/* conv UTF-8 to UCS-4, useful for comparisons */
+ldap_ucs4_t ldap_utf8_to_ucs4( const char * p )
+{
+ const unsigned char *c = p;
+ ldap_ucs4_t ch;
+ int len, i;
+ static unsigned char mask[] = {
+ 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
+
+ len = LDAP_UTF8_CHARLEN(p);
+
+ if( len == 0 ) return UCS4_INVALID;
+
+ ch = c[0] & mask[len];
+
+ for(i=1; i < len; i++) {
+ if ((c[i] & 0xc0) != 0x80) {
+ return UCS4_INVALID;
+ }
+
+ ch <<= 6;
+ ch |= c[i] & 0x3f;
+ }
+
+ return ch;
+}
+
+/* conv UCS-4 to UTF-8, not used */
+int ldap_ucs4_to_utf8( ldap_ucs4_t c, char *buf )
+{
+ int len=0;
+ unsigned char* p = buf;
+ if(buf == NULL) return 0;
+
+ if ( c < 0 ) {
+ /* not a valid Unicode character */
+
+ } else if( c < 0x80 ) {
+ p[len++] = c;
+
+ } else if( c < 0x800 ) {
+ p[len++] = 0xc0 | ( c >> 6 );
+ p[len++] = 0x80 | ( c & 0x3f );
+
+ } else if( c < 0x10000 ) {
+ p[len++] = 0xe0 | ( c >> 12 );
+ p[len++] = 0x80 | ( (c >> 6) & 0x3f );
+ p[len++] = 0x80 | ( c & 0x3f );
+
+ } else if( c < 0x200000 ) {
+ p[len++] = 0xf0 | ( c >> 18 );
+ p[len++] = 0x80 | ( (c >> 12) & 0x3f );
+ p[len++] = 0x80 | ( (c >> 6) & 0x3f );
+ p[len++] = 0x80 | ( c & 0x3f );
+
+ } else if( c < 0x400000 ) {
+ p[len++] = 0xf8 | ( c >> 24 );
+ p[len++] = 0x80 | ( (c >> 18) & 0x3f );
+ p[len++] = 0x80 | ( (c >> 12) & 0x3f );
+ p[len++] = 0x80 | ( (c >> 6) & 0x3f );
+ p[len++] = 0x80 | ( c & 0x3f );
+
+ } else /* if( c < 0x80000000 ) */ {
+ p[len++] = 0xfc | ( c >> 30 );
+ p[len++] = 0x80 | ( (c >> 24) & 0x3f );
+ p[len++] = 0x80 | ( (c >> 18) & 0x3f );
+ p[len++] = 0x80 | ( (c >> 12) & 0x3f );
+ p[len++] = 0x80 | ( (c >> 6) & 0x3f );
+ p[len++] = 0x80 | ( c & 0x3f );
+ }
+
+ buf[len] = '\0';
+ return len;
+}
+
+/*
+ * Advance to the next UTF-8 character
+ *
+ * Ignores length of multibyte character, instead rely on
+ * continuation markers to find start of next character.
+ * This allows for "resyncing" of when invalid characters
+ * are provided provided the start of the next character
+ * is appears within the 6 bytes examined.
+ */
+char* ldap_utf8_next( const char * p )