From: Kurt Zeilenga <kurt@openldap.org>
Date: Thu, 28 Dec 2000 02:20:37 +0000 (+0000)
Subject: Add UTF-8 wc/mb conversion routines contributed by Novell.
X-Git-Tag: LDBM_PRE_GIANT_RWLOCK~1620
X-Git-Url: https://git.sur5r.net/?a=commitdiff_plain;h=5082731e24ede6bbddb68364f138c3f1f019dfcb;p=openldap

Add UTF-8 wc/mb conversion routines contributed by Novell.

/******************************************************************************
 * Copyright (C) 1999, 2000 Novell, Inc. All Rights Reserved.
 *
 * THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND
 * TREATIES. USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT
 * TO VERSION 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS
 * AVAILABLE AT HTTP://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE"
 * IN THE TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION
 * OF THIS WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP
 * PUBLIC LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT
 * THE PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY.
 ******************************************************************************/
---

diff --git a/doc/devel/utfconv.txt b/doc/devel/utfconv.txt
new file mode 100644
index 0000000000..88dfb1db3e
--- /dev/null
+++ b/doc/devel/utfconv.txt
@@ -0,0 +1,291 @@
+                                                                Dec 5, 2000
+                                                                Dave Steck
+                                                                Novell, Inc.
+                    
+                    UTF-8 Conversion Functions
+
+
+1.  Strings in the LDAP C SDK should be encoded in UTF-8 format.
+    However, most platforms do not provide APIs for converting to
+    this format.  If they do, they are platform-specific.
+    
+    As a result, most applications (knowingly or not) use local strings
+    with LDAP functions.  This works fine for 7-bit ASCII characters,
+    but will fail with 8-bit European characters, Asian characters, etc.
+    
+    We propose adding the following platform-independent conversion functions 
+    to the OpenLDAP SDK.  There are 4 functions for converting between UTF-8 
+    and wide characters, and 4 functions for converting between UTF-8 and 
+    multibyte characters.
+
+    For multibyte to UTF-8 conversions, charset translation is necessary.
+    While a full charset translator is not practical or appropriate for the
+    LDAP SDK, we can pass the translator function in as an argument.
+    A NULL for this argument will use the ANSI C functions mbtowc, mbstowcs,
+    wctomb, and wcstombs.
+
+2.  UTF-8 <--> Wide Character conversions
+
+The following new conversion routines will be added, following the pattern of 
+the ANSI C conversion routines (mbtowc, mbstowcs, etc).  These routines use
+the wchar_t type.  wchar_t is 2 bytes on some systems and 4 bytes on others.  
+However the advantage of using wchar_t is that all the standard wide character 
+string functions may be used on these strings:   wcslen, wcscpy, etc.
+
+   int ldap_x_utf8_to_wc  -  Convert a single UTF-8 encoded character to a wide character.
+   int ldap_x_utf8s_to_wcs  -  Convert a UTF-8 string to a wide character string.
+   int ldap_x_wc_to_utf8  -  Convert a single wide character to a UTF-8 sequence.
+   int ldap_x_wcs_to_utf8s  -  Convert a wide character string to a UTF-8 string.
+
+
+2.1  ldap_x_utf8_to_wc  -  Convert a single UTF-8  encoded character to a wide character.
+
+int ldap_x_utf8_to_wc ( wchar_t *wchar, const char *utf8char )
+
+  wchar		(OUT)	Points to a wide character code to receive the 
+                    converted character.
+
+  utf8char	(IN)	Address of the UTF8 sequence of bytes.
+
+Return Value:
+		If successful, the function returns the length in 
+        bytes of the UTF-8 input character.
+
+        If utf8char is NULL or points to an empty string, the
+        function returns 1 and a NULL is written to wchar.
+        
+        If utf8char contains an invalid UTF-8 sequence -1 is returned.
+
+
+2.2  ldap_x_utf8s_to_wcs   -  Convert a UTF-8 string to a wide character string.
+
+int ldap_x_utf8s_to_wcs (wchar_t *wcstr, const char *utf8str, size_t count)
+
+  wcstr		(OUT)	Points to a wide char buffer to receive the 
+                    converted wide char string. The output string will be 
+                    null terminated if there is space for it in the 
+                    buffer.
+
+  utf8str   (IN)	Address of the null-terminated UTF-8 string to convert.  
+
+  count		(IN)	The number of UTF-8 characters to convert, or
+        			equivalently, the size of the output buffer in wide
+        			characters.
+
+Return Value:
+    If successful, the function returns the number of wide
+    characters written to wcstr, excluding the null termination
+    character, if any.
+
+	If wcstr is NULL, the function returns the number of wide
+    characters required to contain the converted string,
+    excluding the null termination character.
+
+    If an invalid UTF-8 sequence is encountered, the 
+    function returns -1. 
+
+    If the return value equals count, there was not enough space to fit the 
+    string and the null terminator in the buffer.  
+
+
+2.3  ldap_x_wc_to_utf8  -  Convert a single wide character to a UTF-8 sequence.
+
+int ldap_x_wc_to_utf8 ( char *utf8char, wchar_t wchar, count )
+
+  utf8char	(OUT)	Points to a byte array to receive the converted UTF-8
+        			string.
+
+  wchar		(IN)	The wide character to convert.
+
+  count		(IN)	The maximum number of bytes to write to the output
+                    buffer.  Normally set this to LDAP_MAX_UTF8_LEN, which 
+                    is defined as 3 or 6 depending on the size of wchar_t.  
+                    A partial character will not be written.
+                    
+Return Value:
+		If successful, the function returns the length in bytes of
+		the converted UTF-8 output character.
+
+        If wchar is NULL, the function returns 1 and a NULL is 
+        written to utf8char.
+        
+        If wchar cannot be converted to a UTF-8 character, the 
+        function returns -1.
+
+
+2.4  int ldap_x_wcs_to_utf8s  -  Convert a wide character string to a UTF-8 string.
+
+int ldap_x_wcs_to_utf8s (char *utf8str, const wchar_t *wcstr, size_t count)
+
+  utf8str	(OUT)	Points to a byte array to receive the converted 
+                    UTF-8 string. The output string will be null 
+                    terminated if there is space for it in the 
+                    buffer.
+
+
+  wcstr		(IN)	Address of the null-terminated wide char string to convert.
+
+  count		(IN)	The size of the output buffer in bytes.
+
+Return Value:
+		If successful, the function returns the number of bytes
+		written to utf8str, excluding the null termination
+        character, if any.
+
+		If utf8str is NULL, the function returns the number of
+        bytes required to contain the converted string, excluding 
+        the null termination character.  The 'count' parameter is ignored.
+        
+        If the function encounters a wide character that cannot 
+        be mapped to a UTF-8 sequence, the function returns -1.
+        
+        If the return value equals count, there was not enough space to fit 
+        the string and the null terminator in the buffer.
+
+
+
+3. Multi-byte <--> UTF-8 Conversions
+
+These functions convert the string in a two-step process, from multibyte 
+to Wide, then from Wide to UTF8, or vice versa.  This conversion requires a 
+charset translation routine, which is passed in as an argument.
+ 
+   ldap_x_mb_to_utf8  -  Convert a multi-byte character  to a UTF-8 character.
+   ldap_x_mbs_to_utf8s  -  Convert a multi-byte string to a UTF-8 string.
+   ldap_x_utf8_to_mb  -  Convert a UTF-8 character to a multi-byte character.
+   ldap_x_utf8s_to_mbs  -  Convert a UTF-8 string to a multi-byte string.
+
+3.1  ldap_x_mb_to_utf8  - Convert a multi-byte character  to a UTF-8 character.
+
+int ldap_x_mb_to_utf8 ( char *utf8char, const char *mbchar, size_t mbsize, int (*f_mbtowc)(wchar_t *wchar, const char *mbchar, size_t count)  )
+
+  utf8char	(OUT)	Points to a byte buffer to receive the converted 
+                    UTF-8 character.  May be NULL.  The output is not
+                    null-terminated.
+
+  mbchar    (IN)	Address of a sequence of bytes forming a multibyte character.
+
+  mbsize	(IN)	The maximum number of bytes of the mbchar argument to 
+                    check.  This should normally be MB_CUR_MAX.
+
+  f_mbtowc	(IN)	The function to use for converting a multibyte 
+                    character to a wide character.  If NULL, the local 
+                    ANSI C routine mbtowc is used.
+
+Return Value:
+		If successful, the function returns the length in bytes of
+        the UTF-8 output character.  
+        
+        If utf8char is NULL, count is ignored and the funtion 
+        returns the number of bytes that would be written to the 
+        output char.
+
+        If count is zero, 0 is returned and nothing is written to
+        utf8char.
+         
+        If mbchar is NULL or points to an empty string, the 
+        function returns 1 and a null byte is written to utf8char.
+        
+        If mbchar contains an invalid multi-byte character, -1 is returned.
+
+
+3.2  ldap_x_mbs_to_utf8s  - Convert a multi-byte string  to a UTF-8 string.
+
+int ldap_x_mbs_to_utf8s (char *utf8str, const char *mbstr, size_t count, 
+        size_t (*f_mbstowcs)(wchar_t *wcstr, const char *mbstr, size_t count))
+
+utf8str	    (OUT)	Points to a buffer to receive the converted UTF-8 string.  
+                    May be NULL.
+
+  mbchar	(IN)	Address of the null-terminated multi-byte input string.
+
+  count	    (IN)	The size of the output buffer in bytes.
+
+  f_mbstowcs (IN)	The function to use for converting a multibyte string
+            		to a wide character string.  If NULL, the local ANSI
+            		C routine mbstowcs is used.
+
+Return Value:
+		If successful, the function returns the length in 
+        bytes of the UTF-8 output string, excluding the null
+        terminator, if present.
+        
+        If utf8str is NULL, count is ignored and the function 
+        returns the number of bytes required for the output string, 
+        excluding the NULL.
+        
+        If count is zero, 0 is returned and nothing is written to utf8str.
+         
+        If mbstr is NULL or points to an empty string, the 
+        function returns 1 and a null byte is written to utf8str.
+        
+        If mbstr contains an invalid multi-byte character, -1 is returned.
+        
+        If the returned value is equal to count, the entire null-terminated 
+        string would not fit in the output buffer.
+
+
+3.3  ldap_x_utf8_to_mb  -  Convert a UTF-8 character to a multi-byte character.
+
+int ldap_x_utf8_to_mb ( char *mbchar, const char *utf8char,
+                        int (*f_wctomb)(char *mbchar, wchar_t wchar) )
+
+mbchar	(OUT)	Points to a byte buffer to receive the converted multi-byte 
+                character.  May be NULL.
+
+  utf8char	(IN)	Address of the UTF-8 character sequence.
+
+  f_wctomb	(IN)	The function to use for converting a wide character 
+                    to a multibyte character.  If NULL, the local 
+                    ANSI C routine wctomb is used.
+
+
+Return Value:
+		If successful, the function returns the length in 
+        bytes of the multi-byte output character.  
+        
+        If utf8char is NULL or points to an empty string, the 
+        function returns 1 and a null byte is written to mbchar.
+        
+        If utf8char contains an invalid UTF-8 sequence, -1 is returned.
+
+
+3.4  int ldap_x_utf8s_to_mbs  - Convert a UTF-8 string to a multi-byte string.
+
+
+int ldap_x_utf8s_to_mbs ( char *mbstr, const char *utf8str, size_t count, 
+        size_t (*f_wcstombs)(char *mbstr, const wchar_t *wcstr, size_t count) )
+
+  mbstr		(OUT)	Points to a byte buffer to receive the converted 
+                    multi-byte string.  May be NULL.
+
+  utf8str   (IN)	Address of the null-terminated UTF-8 string to convert.
+
+  count		(IN)	The size of the output buffer in bytes.
+
+  f_wcstombs (IN)	The function to use for converting a wide character 
+                    string to a multibyte string.  If NULL, the local 
+                    ANSI C routine wcstombs is used.
+
+Return Value:
+        If successful, the function returns the number of bytes
+		written to mbstr, excluding the null termination
+        character, if any.
+
+        If mbstr is NULL, count is ignored and the funtion 
+        returns the number of bytes required for the output string,
+        excluding the NULL.
+        
+        If count is zero, 0 is returned and nothing is written to
+        mbstr.
+        
+        If utf8str is NULL or points to an empty string, the 
+        function returns 1 and a null byte is written to mbstr.
+        
+        If an invalid UTF-8 character is encountered, the 
+        function returns -1.
+
+The output string will be null terminated if there is space for it in 
+the output buffer.
+
+
diff --git a/include/ldap_utf8.h b/include/ldap_utf8.h
new file mode 100644
index 0000000000..4646d2ce7f
--- /dev/null
+++ b/include/ldap_utf8.h
@@ -0,0 +1,86 @@
+/* $OpenLDAP$ */
+/* $Novell: /ldap/src/cldap/include/ldap_utf8.h,v 1.3 2000/12/04 20:23:20 dsteck Exp $ 
+/*
+ * Copyright 2000 The OpenLDAP Foundation, Redwood City, California, USA
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted only
+ * as authorized by the OpenLDAP Public License.  A copy of this
+ * license is available at http://www.OpenLDAP.org/license.html or
+ * in file LICENSE in the top-level directory of the distribution.
+ */
+/******************************************************************************
+ * This notice applies to changes, created by or for Novell, Inc.,
+ * to preexisting works for which notices appear elsewhere in this file.
+ *
+ * Copyright (C) 2000 Novell, Inc. All Rights Reserved.
+ *
+ * THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND TREATIES.
+ * USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT TO VERSION
+ * 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS AVAILABLE AT
+ * HTTP://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE" IN THE
+ * TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION OF THIS
+ * WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP PUBLIC
+ * LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT THE
+ * PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY. 
+ ******************************************************************************/
+
+#ifndef _LDAP_UTF8_H
+#define _LDAP_UTF8_H
+
+LDAP_BEGIN_DECL
+
+/*  
+ * UTF-8 Utility Routines (in utf-8.c)
+ */
+
+#define LDAP_UCS4_INVALID (0x80000000U)
+
+/* LDAP_MAX_UTF8_LEN is 3 or 6 depending on size of wchar_t */
+#define LDAP_MAX_UTF8_LEN  sizeof(wchar_t)*3/2
+
+
+/*
+ * UTF-8 Conversion Routines.   (in utfconv.c)
+ */
+
+/* UTF-8 character to Wide Char */
+LDAP_F(int)
+ldap_x_utf8_to_wc ( wchar_t *wchar, const char *utf8char );
+
+/* UTF-8 string to Wide Char string */
+LDAP_F(int)
+ldap_x_utf8s_to_wcs ( wchar_t *wcstr, const char *utf8str, size_t count );
+
+/* Wide Char to UTF-8 character */
+LDAP_F(int)
+ldap_x_wc_to_utf8 ( char *utf8char, wchar_t wchar, size_t count );
+
+/* Wide Char string to UTF-8 string */
+LDAP_F(int)
+ldap_x_wcs_to_utf8s ( char *utf8str, const wchar_t *wcstr, size_t count );
+
+
+/* UTF-8 character to MultiByte character */
+LDAP_F(int)
+ldap_x_utf8_to_mb ( char *mbchar, const char *utf8char,
+		int (*f_wctomb)(char *mbchar, wchar_t wchar) );
+
+/* UTF-8 string to MultiByte string */
+LDAP_F(int)
+ldap_x_utf8s_to_mbs ( char *mbstr, const char *utf8str, size_t count,
+		size_t (*f_wcstombs)(char *mbstr, const wchar_t *wcstr, size_t count) );
+
+/* MultiByte character to UTF-8 character */
+LDAP_F(int)
+ldap_x_mb_to_utf8 ( char *utf8char, const char *mbchar, size_t mbsize,
+		int (*f_mbtowc)(wchar_t *wchar, const char *mbchar, size_t count) );
+
+/* MultiByte string to UTF-8 string */
+LDAP_F(int)
+ldap_x_mbs_to_utf8s ( char *utf8str, const char *mbstr, size_t count,
+		size_t (*f_mbstowcs)(wchar_t *wcstr, const char *mbstr, size_t count) );
+
+LDAP_END_DECL
+
+#endif /* _LDAP_UTF8_H */
diff --git a/libraries/libldap/Makefile.in b/libraries/libldap/Makefile.in
index 2be2d6f925..12a01a71d8 100644
--- a/libraries/libldap/Makefile.in
+++ b/libraries/libldap/Makefile.in
@@ -18,7 +18,7 @@ SRCS	= bind.c open.c result.c error.c compare.c search.c \
 	request.c os-ip.c url.c sortctrl.c vlvctrl.c \
 	init.c options.c print.c string.c util-int.c schema.c \
 	charray.c tls.c dn.c os-local.c dnssrv.c \
-	utf-8.c
+	utf-8.c utf-8-conv.c
 OBJS	= bind.lo open.lo result.lo error.lo compare.lo search.lo \
 	controls.lo messages.lo references.lo extended.lo cyrus.lo \
 	modify.lo add.lo modrdn.lo delete.lo abandon.lo ufn.lo cache.lo \
@@ -28,7 +28,7 @@ OBJS	= bind.lo open.lo result.lo error.lo compare.lo search.lo \
 	request.lo os-ip.lo url.lo sortctrl.lo vlvctrl.lo \
 	init.lo options.lo print.lo string.lo util-int.lo schema.lo \
 	charray.lo tls.lo dn.lo os-local.lo dnssrv.lo \
-	utf-8.lo
+	utf-8.lo utf-8-conv.lo
 
 LDAP_INCDIR= ../../include       
 LDAP_LIBDIR= ../../libraries
diff --git a/libraries/libldap/utf-8-conv.c b/libraries/libldap/utf-8-conv.c
new file mode 100644
index 0000000000..04a464a795
--- /dev/null
+++ b/libraries/libldap/utf-8-conv.c
@@ -0,0 +1,464 @@
+/* $OpenLDAP$ */
+/*
+ * Copyright 2000 The OpenLDAP Foundation, All Rights Reserved.
+ * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
+ */
+
+/* $Novell: /ldap/src/cldap/libraries/libldap/utfconv.c,v 1.3 2000/12/11 19:35:37 dsteck Exp $ */
+/******************************************************************************
+ * Copyright (C) 1999, 2000 Novell, Inc. All Rights Reserved.
+ * 
+ * THIS WORK IS SUBJECT TO U.S. AND INTERNATIONAL COPYRIGHT LAWS AND
+ * TREATIES. USE, MODIFICATION, AND REDISTRIBUTION OF THIS WORK IS SUBJECT
+ * TO VERSION 2.0.1 OF THE OPENLDAP PUBLIC LICENSE, A COPY OF WHICH IS
+ * AVAILABLE AT HTTP://WWW.OPENLDAP.ORG/LICENSE.HTML OR IN THE FILE "LICENSE"
+ * IN THE TOP-LEVEL DIRECTORY OF THE DISTRIBUTION. ANY USE OR EXPLOITATION
+ * OF THIS WORK OTHER THAN AS AUTHORIZED IN VERSION 2.0.1 OF THE OPENLDAP
+ * PUBLIC LICENSE, OR OTHER PRIOR WRITTEN CONSENT FROM NOVELL, COULD SUBJECT
+ * THE PERPETRATOR TO CRIMINAL AND CIVIL LIABILITY. 
+ ******************************************************************************/
+
+/*
+ * UTF-8 Conversion Routines
+ *
+ * These routines convert between Wide Character and UTF-8,
+ * or between MultiByte and UTF-8 encodings.
+ *
+ * Both single character and string versions of the functions are provided.
+ * All functions return -1 if the character or string cannot be converted.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/stdlib.h>		/* For wctomb, wcstombs, mbtowc, mbstowcs */
+#include <ac/string.h>
+#include <ac/time.h>		/* for time_t */
+
+#include "ldap-int.h"
+
+#include <ldap_utf8.h>
+
+static unsigned char mask[] = { 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
+
+
+/*-----------------------------------------------------------------------------
+					UTF-8 Format Summary
+
+ASCII chars 						7 bits
+    0xxxxxxx
+    
+2-character UTF-8 sequence:        11 bits
+    110xxxxx  10xxxxxx
+
+3-character UTF-8                  16 bits
+    1110xxxx  10xxxxxx  10xxxxxx   
+    
+4-char UTF-8                       21 bits 
+    11110xxx  10xxxxxx  10xxxxxx  10xxxxxx
+    
+5-char UTF-8                       26 bits
+    111110xx  10xxxxxx  10xxxxxx  10xxxxxx  10xxxxxx
+    
+6-char UTF-8                       31 bits
+    1111110x  10xxxxxx  10xxxxxx  10xxxxxx  10xxxxxx  10xxxxxx
+    
+Unicode address space   (0 - 0x10FFFF)    21 bits
+ISO-10646 address space (0 - 0x7FFFFFFF)  31 bits
+
+Note:  This code does not prevent UTF-8 sequences which are longer than
+	   necessary from being decoded.
+*/
+
+/*------------------------------------------------------------------------------ 
+   Convert a UTF-8 character to a wide char. 
+   Return the length of the UTF-8 input character in bytes.
+*/
+int
+ldap_x_utf8_to_wc ( wchar_t *wchar, const char *utf8char )
+{
+	int utflen, i;
+	wchar_t ch;
+
+	/* If input ptr is NULL, treat it as empty string. */
+	if (utf8char == NULL)
+		utf8char = "";
+
+	/* Get UTF-8 sequence length from 1st byte */
+	utflen = UTF8_CHARLEN(utf8char);
+	
+	if( utflen==0 || utflen > LDAP_MAX_UTF8_LEN )
+		return -1;								 	/* Invalid input */
+
+	/* First byte minus length tag */
+	ch = (wchar_t)(utf8char[0] & mask[utflen]);
+	
+	for(i=1; i < utflen; i++)
+	{
+		/* Subsequent bytes must start with 10 */
+		if ((utf8char[i] & 0xc0) != 0x80)
+			return -1;
+	
+		ch <<= 6;			/* 6 bits of data in each subsequent byte */
+		ch |= (wchar_t)(utf8char[i] & 0x3f);
+	}
+	
+	if (wchar)
+		*wchar = ch;
+
+	return utflen;
+}
+
+/*-----------------------------------------------------------------------------
+   Convert a UTF-8 string to a wide char string.
+   No more than 'count' wide chars will be written to the output buffer.
+   Return the size of the converted string in wide chars, excl null terminator.
+*/
+int
+ldap_x_utf8s_to_wcs ( wchar_t *wcstr, const char *utf8str, size_t count )
+{
+	size_t wclen = 0;
+	int utflen, i;
+	wchar_t ch;
+
+
+	/* If input ptr is NULL, treat it as empty string. */
+	if (utf8str == NULL)
+		utf8str = "";
+
+	/* Examine next UTF-8 character.  If output buffer is NULL, ignore count */
+	while ( *utf8str && (wcstr==NULL || wclen<count) )
+	{
+		/* Get UTF-8 sequence length from 1st byte */
+		utflen = UTF8_CHARLEN(utf8str);
+		
+		if( utflen==0 || utflen > LDAP_MAX_UTF8_LEN )
+			return -1;								 	/* Invalid input */
+
+		/* First byte minus length tag */
+		ch = (wchar_t)(utf8str[0] & mask[utflen]);
+		
+		for(i=1; i < utflen; i++)
+		{
+			/* Subsequent bytes must start with 10 */
+			if ((utf8str[i] & 0xc0) != 0x80)
+				return -1;
+		
+			ch <<= 6;			/* 6 bits of data in each subsequent byte */
+			ch |= (wchar_t)(utf8str[i] & 0x3f);
+		}
+		
+		if (wcstr)
+			wcstr[wclen] = ch;
+		
+		utf8str += utflen;		/* Move to next UTF-8 character */
+		wclen++;				/* Count number of wide chars stored/required */
+	}
+
+	/* Add null terminator if there's room in the buffer. */
+	if (wcstr && wclen < count)
+		wcstr[wclen] = 0;
+
+	return wclen;
+}
+
+
+/*------------------------------------------------------------------------------ 
+   Convert one wide char to a UTF-8 character.
+   Return the length of the converted UTF-8 character in bytes.
+   No more than 'count' bytes will be written to the output buffer.
+*/
+int
+ldap_x_wc_to_utf8 ( char *utf8char, wchar_t wchar, size_t count )
+{
+	int len=0;
+
+	if (utf8char == NULL)   /* Just determine the required UTF-8 char length. */
+	{						/* Ignore count */
+		if( wchar < 0 )
+			return -1;
+		if( wchar < 0x80 )
+			return 1;
+		if( wchar < 0x800 )
+			return 2; 
+		if( wchar < 0x10000 )
+			return 3;
+		if( wchar < 0x200000 ) 
+			return 4;
+		if( wchar < 0x4000000 ) 
+			return 5;
+		if( wchar < 0x80000000 )
+			return 6;
+		return -1;
+	}
+
+	
+	if ( wchar < 0 ) {				/* Invalid wide character */
+		len = -1;
+
+	} else if( wchar < 0x80 ) {
+		if (count >= 1) {
+			utf8char[len++] = (char)wchar;
+		}
+
+	} else if( wchar < 0x800 ) {
+		if (count >=2) {
+			utf8char[len++] = 0xc0 | ( wchar >> 6 );
+			utf8char[len++] = 0x80 | ( wchar & 0x3f );
+		}
+
+	} else if( wchar < 0x10000 ) {
+		if (count >= 3) {	
+			utf8char[len++] = 0xe0 | ( wchar >> 12 );
+			utf8char[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
+			utf8char[len++] = 0x80 | ( wchar & 0x3f );
+		}
+	
+	} else if( wchar < 0x200000 ) {
+		if (count >= 4) {
+			utf8char[len++] = 0xf0 | ( wchar >> 18 );
+			utf8char[len++] = 0x80 | ( (wchar >> 12) & 0x3f );
+			utf8char[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
+			utf8char[len++] = 0x80 | ( wchar & 0x3f );
+		}
+
+	} else if( wchar < 0x4000000 ) {
+		if (count >= 5) {
+			utf8char[len++] = 0xf8 | ( wchar >> 24 );
+			utf8char[len++] = 0x80 | ( (wchar >> 18) & 0x3f );
+			utf8char[len++] = 0x80 | ( (wchar >> 12) & 0x3f );
+			utf8char[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
+			utf8char[len++] = 0x80 | ( wchar & 0x3f );
+		}
+
+	} else if( wchar < 0x80000000 ) {
+		if (count >= 6) {
+			utf8char[len++] = 0xfc | ( wchar >> 30 );
+			utf8char[len++] = 0x80 | ( (wchar >> 24) & 0x3f );
+			utf8char[len++] = 0x80 | ( (wchar >> 18) & 0x3f );
+			utf8char[len++] = 0x80 | ( (wchar >> 12) & 0x3f );
+			utf8char[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
+			utf8char[len++] = 0x80 | ( wchar & 0x3f );
+		}
+
+	} else
+		len = -1;
+	
+	return len;
+
+}
+
+
+/*-----------------------------------------------------------------------------
+   Convert a wide char string to a UTF-8 string.
+   No more than 'count' bytes will be written to the output buffer.
+   Return the # of bytes written to the output buffer, excl null terminator.
+*/
+int
+ldap_x_wcs_to_utf8s ( char *utf8str, const wchar_t *wcstr, size_t count )
+{
+	int len = 0;
+	int n;
+	char *p = utf8str;
+	wchar_t empty = 0;		/* To avoid use of L"" construct */
+
+	if (wcstr == NULL)		/* Treat input ptr NULL as an empty string */
+		wcstr = &empty;
+
+	if (utf8str == NULL)	/* Just compute size of output, excl null */
+	{
+		while (*wcstr)
+		{
+			/* Get UTF-8 size of next wide char */
+			n = ldap_x_wc_to_utf8( NULL, *wcstr++, LDAP_MAX_UTF8_LEN);
+			if (n == -1)
+				return -1;
+			len += n;
+		}
+
+		return len;
+	}
+
+	
+	/* Do the actual conversion. */
+
+	n = 1;					/* In case of empty wcstr */
+	while (*wcstr)
+	{
+		n = ldap_x_wc_to_utf8( p, *wcstr++, count);
+		
+		if (n <= 0)  		/* If encoding error (-1) or won't fit (0), quit */
+			break;
+		
+		p += n;
+		count -= n;			/* Space left in output buffer */
+	}
+
+	/* If not enough room for last character, pad remainder with null
+	   so that return value = original count, indicating buffer full. */
+	if (n == 0)
+	{
+		while (count--)
+			*p++ = 0;
+	}
+
+	/* Add a null terminator if there's room. */
+	else if (count)
+		*p = 0;
+
+	if (n == -1)			/* Conversion encountered invalid wide char. */
+		return -1;
+
+	/* Return the number of bytes written to output buffer, excl null. */ 
+	return (p - utf8str);
+}
+
+
+/*-----------------------------------------------------------------------------
+   Convert a UTF-8 character to a MultiByte character.
+   Return the size of the converted character in bytes.
+*/
+int
+ldap_x_utf8_to_mb ( char *mbchar, const char *utf8char,
+		int (*f_wctomb)(char *mbchar, wchar_t wchar) )
+{
+	wchar_t wchar;
+	int n;
+	char tmp[6];				/* Large enough for biggest multibyte char */
+
+	if (f_wctomb == NULL)		/* If no conversion function was given... */
+		f_wctomb = wctomb;		/*    use the local ANSI C function */
+ 
+	/* First convert UTF-8 char to a wide char */
+	n = ldap_x_utf8_to_wc( &wchar, utf8char);
+
+	if (n == -1)
+		return -1;		/* Invalid UTF-8 character */
+
+	if (mbchar == NULL)
+		n = f_wctomb( tmp, wchar );
+	else
+		n = f_wctomb( mbchar, wchar);
+
+	return n;
+}
+
+/*-----------------------------------------------------------------------------
+   Convert a UTF-8 string to a MultiByte string.
+   No more than 'count' bytes will be written to the output buffer.
+   Return the size of the converted string in bytes, excl null terminator.
+*/
+int
+ldap_x_utf8s_to_mbs ( char *mbstr, const char *utf8str, size_t count,
+		size_t (*f_wcstombs)(char *mbstr, const wchar_t *wcstr, size_t count) )
+{
+	wchar_t *wcs;
+	size_t wcsize;
+    int n;
+
+	if (f_wcstombs == NULL)		/* If no conversion function was given... */
+		f_wcstombs = wcstombs;	/*    use the local ANSI C function */
+ 
+	if (utf8str == NULL || *utf8str == 0)	/* NULL or empty input string */
+	{
+		if (mbstr)
+			*mbstr = 0;
+		return 0;
+	}
+
+/* Allocate memory for the maximum size wchar string that we could get. */
+	wcsize = strlen(utf8str) + 1;
+	wcs = (wchar_t *)LDAP_MALLOC(wcsize * sizeof(wchar_t));
+	if (wcs == NULL)
+		return -1;				/* Memory allocation failure. */
+
+	/* First convert the UTF-8 string to a wide char string */
+	n = ldap_x_utf8s_to_wcs( wcs, utf8str, wcsize);
+
+	/* Then convert wide char string to multi-byte string */
+	if (n != -1)
+	{
+		n = f_wcstombs(mbstr, wcs, count);
+	}
+
+	LDAP_FREE(wcs);
+
+	return n;
+}
+
+/*-----------------------------------------------------------------------------
+   Convert a MultiByte character to a UTF-8 character.
+   'mbsize' indicates the number of bytes of 'mbchar' to check.
+   Returns the number of bytes written to the output character.
+*/
+int
+ldap_x_mb_to_utf8 ( char *utf8char, const char *mbchar, size_t mbsize,
+		int (*f_mbtowc)(wchar_t *wchar, const char *mbchar, size_t count) )
+{
+    wchar_t wchar;
+    int n;
+
+	if (f_mbtowc == NULL)		/* If no conversion function was given... */
+		f_mbtowc = mbtowc;		/*    use the local ANSI C function */
+ 
+    if (mbsize == 0)				/* 0 is not valid. */
+        return -1;
+
+    if (mbchar == NULL || *mbchar == 0)
+    {
+        if (utf8char)
+            *utf8char = 0;
+        return 1;
+    }
+
+	/* First convert the MB char to a Wide Char */
+	n = f_mbtowc( &wchar, mbchar, mbsize);
+
+	if (n == -1)
+		return -1;
+
+	/* Convert the Wide Char to a UTF-8 character. */
+	n = ldap_x_wc_to_utf8( utf8char, wchar, LDAP_MAX_UTF8_LEN);
+
+	return n;
+}
+
+
+/*-----------------------------------------------------------------------------
+   Convert a MultiByte string to a UTF-8 string.
+   No more than 'count' bytes will be written to the output buffer.
+   Return the size of the converted string in bytes, excl null terminator.
+*/   
+int
+ldap_x_mbs_to_utf8s ( char *utf8str, const char *mbstr, size_t count,
+		size_t (*f_mbstowcs)(wchar_t *wcstr, const char *mbstr, size_t count) )
+{
+	wchar_t *wcs;
+	int n;
+	size_t wcsize;
+
+	if (mbstr == NULL)		   /* Treat NULL input string as an empty string */
+		mbstr = "";
+
+	if (f_mbstowcs == NULL)		/* If no conversion function was given... */
+		f_mbstowcs = mbstowcs;	/*    use the local ANSI C function */
+ 
+	/* Allocate memory for the maximum size wchar string that we could get. */
+	wcsize = strlen(mbstr) + 1;
+	wcs = (wchar_t *)LDAP_MALLOC( wcsize * sizeof(wchar_t) );
+	if (wcs == NULL)
+		return -1;
+
+	/* First convert multi-byte string to a wide char string */
+	n = f_mbstowcs(wcs, mbstr, wcsize);
+
+	/* Convert wide char string to UTF-8 string */
+	if (n != -1)
+	{
+		n = ldap_x_wcs_to_utf8s( utf8str, wcs, count);
+	}
+
+	LDAP_FREE(wcs);
+
+	return n;	
+}