X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=libraries%2Fliblunicode%2Fucdata%2Fucdata.c;h=c5285a7e858352a11d858cfae763883c432e3c7e;hb=255895125148704f4219868b295d6f69dfdcd68a;hp=29f97bd70816b7760a7b29e408917db8a827d066;hpb=fe98d9fa7b313ffe51f09ea175e5126200793bcd;p=openldap
diff --git a/libraries/liblunicode/ucdata/ucdata.c b/libraries/liblunicode/ucdata/ucdata.c
index 29f97bd708..c5285a7e85 100644
--- a/libraries/liblunicode/ucdata/ucdata.c
+++ b/libraries/liblunicode/ucdata/ucdata.c
@@ -1,5 +1,18 @@
-/*
- * Copyright 1999 Computing Research Labs, New Mexico State University
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software .
+ *
+ * Copyright 1998-2009 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * .
+ */
+/* Copyright 2001 Computing Research Labs, New Mexico State University
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -19,23 +32,29 @@
* OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#ifndef lint
-#ifdef __GNUC__
-static char rcsid[] __attribute__ ((unused)) = "$Id: ucdata.c,v 1.3 1999/08/23 16:14:09 mleisher Exp $";
-#else
-static char rcsid[] = "$Id: ucdata.c,v 1.3 1999/08/23 16:14:09 mleisher Exp $";
-#endif
-#endif
+/* $Id: ucdata.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */
+
+#include "portable.h"
+#include "ldap_config.h"
#include
-#include
-#include
-#ifndef WIN32
-#include
-#endif
+#include
+#include
+#include
+
+#include
+#include "lber_pvt.h"
#include "ucdata.h"
+#ifndef HARDCODE_DATA
+#define HARDCODE_DATA 1
+#endif
+
+#if HARDCODE_DATA
+#include "uctable.h"
+#endif
+
/**************************************************************************
*
* Miscellaneous types, data, and support functions.
@@ -43,37 +62,35 @@ static char rcsid[] = "$Id: ucdata.c,v 1.3 1999/08/23 16:14:09 mleisher Exp $";
**************************************************************************/
typedef struct {
- unsigned short bom;
- unsigned short cnt;
+ ac_uint2 bom;
+ ac_uint2 cnt;
union {
- unsigned long bytes;
- unsigned short len[2];
+ ac_uint4 bytes;
+ ac_uint2 len[2];
} size;
} _ucheader_t;
/*
* A simple array of 32-bit masks for lookup.
*/
-static unsigned long masks32[32] = {
- 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
- 0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
- 0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
- 0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
- 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
- 0x40000000, 0x80000000
+static ac_uint4 masks32[32] = {
+ 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
+ 0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL,
+ 0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL,
+ 0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL,
+ 0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL,
+ 0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL,
+ 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL,
+ 0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL
};
#define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8))
#define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\
((((cc) >> 16) & 0xff) << 8)|((cc) >> 24))
+#if !HARDCODE_DATA
static FILE *
-#ifdef __STDC__
_ucopenfile(char *paths, char *filename, char *mode)
-#else
-_ucopenfile(paths, filename, mode)
-char *paths, *filename, *mode;
-#endif
{
FILE *f;
char *fp, *dp, *pp, path[BUFSIZ];
@@ -86,7 +103,7 @@ char *paths, *filename, *mode;
pp = path;
while (*dp && *dp != ':')
*pp++ = *dp++;
- *pp++ = '/';
+ *pp++ = *LDAP_DIRSEP;
fp = filename;
while (*fp)
@@ -102,6 +119,7 @@ char *paths, *filename, *mode;
return 0;
}
+#endif
/**************************************************************************
*
@@ -109,21 +127,20 @@ char *paths, *filename, *mode;
*
**************************************************************************/
-static unsigned long _ucprop_size;
-static unsigned short *_ucprop_offsets;
-static unsigned long *_ucprop_ranges;
+#if !HARDCODE_DATA
-static void
-#ifdef __STDC__
+static ac_uint4 _ucprop_size;
+static ac_uint2 *_ucprop_offsets;
+static ac_uint4 *_ucprop_ranges;
+
+/*
+ * Return -1 on error, 0 if okay
+ */
+static int
_ucprop_load(char *paths, int reload)
-#else
-_ucprop_load(paths, reload)
-char *paths;
-int reload;
-#endif
{
FILE *in;
- unsigned long size, i;
+ ac_uint4 size, i;
_ucheader_t hdr;
if (_ucprop_size > 0) {
@@ -131,7 +148,7 @@ int reload;
/*
* The character properties have already been loaded.
*/
- return;
+ return 0;
/*
* Unload the current character property data in preparation for
@@ -144,7 +161,7 @@ int reload;
}
if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0)
- return;
+ return -1;
/*
* Load the header.
@@ -158,13 +175,13 @@ int reload;
if ((_ucprop_size = hdr.cnt) == 0) {
fclose(in);
- return;
+ return -1;
}
/*
* Allocate all the storage needed for the lookup table.
*/
- _ucprop_offsets = (unsigned short *) malloc(hdr.size.bytes);
+ _ucprop_offsets = (ac_uint2 *) malloc(hdr.size.bytes);
/*
* Calculate the offset into the storage for the ranges. The offsets
@@ -172,15 +189,15 @@ int reload;
* the header count field. This means the offset to the ranges must be
* calculated after aligning the count to a 4-byte boundary.
*/
- if ((size = ((hdr.cnt + 1) * sizeof(unsigned short))) & 3)
+ if ((size = ((hdr.cnt + 1) * sizeof(ac_uint2))) & 3)
size += 4 - (size & 3);
size >>= 1;
- _ucprop_ranges = (unsigned long *) (_ucprop_offsets + size);
+ _ucprop_ranges = (ac_uint4 *) (_ucprop_offsets + size);
/*
* Load the offset array.
*/
- fread((char *) _ucprop_offsets, sizeof(unsigned short), size, in);
+ fread((char *) _ucprop_offsets, sizeof(ac_uint2), size, in);
/*
* Do an endian swap if necessary. Don't forget there is an extra node on
@@ -195,7 +212,7 @@ int reload;
* Load the ranges. The number of elements is in the last array position
* of the offsets.
*/
- fread((char *) _ucprop_ranges, sizeof(unsigned long),
+ fread((char *) _ucprop_ranges, sizeof(ac_uint4),
_ucprop_offsets[_ucprop_size], in);
fclose(in);
@@ -207,14 +224,11 @@ int reload;
for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++)
_ucprop_ranges[i] = endian_long(_ucprop_ranges[i]);
}
+ return 0;
}
static void
-#ifdef __STDC__
_ucprop_unload(void)
-#else
-_ucprop_unload()
-#endif
{
if (_ucprop_size == 0)
return;
@@ -226,17 +240,16 @@ _ucprop_unload()
free((char *) _ucprop_offsets);
_ucprop_size = 0;
}
+#endif
static int
-#ifdef __STDC__
-_ucprop_lookup(unsigned long code, unsigned long n)
-#else
-_ucprop_lookup(code, n)
-unsigned long code, n;
-#endif
+_ucprop_lookup(ac_uint4 code, ac_uint4 n)
{
long l, r, m;
+ if (_ucprop_size == 0)
+ return 0;
+
/*
* There is an extra node on the end of the offsets to allow this routine
* to work right. If the index is 0xffff, then there are no nodes for the
@@ -272,14 +285,9 @@ unsigned long code, n;
}
int
-#ifdef __STDC__
-ucisprop(unsigned long code, unsigned long mask1, unsigned long mask2)
-#else
-ucisprop(code, mask1, mask2)
-unsigned long code, mask1, mask2;
-#endif
+ucisprop(ac_uint4 code, ac_uint4 mask1, ac_uint4 mask2)
{
- unsigned long i;
+ ac_uint4 i;
if (mask1 == 0 && mask2 == 0)
return 0;
@@ -303,21 +311,23 @@ unsigned long code, mask1, mask2;
*
**************************************************************************/
-static unsigned long _uccase_size;
-static unsigned short _uccase_len[2];
-static unsigned long *_uccase_map;
+#if !HARDCODE_DATA
-static void
-#ifdef __STDC__
+/* These record the number of slots in the map.
+ * There are 3 words per slot.
+ */
+static ac_uint4 _uccase_size;
+static ac_uint2 _uccase_len[2];
+static ac_uint4 *_uccase_map;
+
+/*
+ * Return -1 on error, 0 if okay
+ */
+static int
_uccase_load(char *paths, int reload)
-#else
-_uccase_load(paths, reload)
-char *paths;
-int reload;
-#endif
{
FILE *in;
- unsigned long i;
+ ac_uint4 i;
_ucheader_t hdr;
if (_uccase_size > 0) {
@@ -325,14 +335,14 @@ int reload;
/*
* The case mappings have already been loaded.
*/
- return;
+ return 0;
free((char *) _uccase_map);
_uccase_size = 0;
}
if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0)
- return;
+ return -1;
/*
* Load the header.
@@ -349,33 +359,31 @@ int reload;
* Set the node count and lengths of the upper and lower case mapping
* tables.
*/
- _uccase_size = hdr.cnt * 3;
- _uccase_len[0] = hdr.size.len[0] * 3;
- _uccase_len[1] = hdr.size.len[1] * 3;
+ _uccase_size = hdr.cnt;
+ _uccase_len[0] = hdr.size.len[0];
+ _uccase_len[1] = hdr.size.len[1];
- _uccase_map = (unsigned long *)
- malloc(_uccase_size * sizeof(unsigned long));
+ _uccase_map = (ac_uint4 *)
+ malloc(_uccase_size * 3 * sizeof(ac_uint4));
/*
* Load the case mapping table.
*/
- fread((char *) _uccase_map, sizeof(unsigned long), _uccase_size, in);
+ fread((char *) _uccase_map, sizeof(ac_uint4), _uccase_size * 3, in);
/*
* Do an endian swap if necessary.
*/
if (hdr.bom == 0xfffe) {
- for (i = 0; i < _uccase_size; i++)
+ for (i = 0; i < _uccase_size * 3; i++)
_uccase_map[i] = endian_long(_uccase_map[i]);
}
+ fclose(in);
+ return 0;
}
static void
-#ifdef __STDC__
_uccase_unload(void)
-#else
-_uccase_unload()
-#endif
{
if (_uccase_size == 0)
return;
@@ -383,18 +391,13 @@ _uccase_unload()
free((char *) _uccase_map);
_uccase_size = 0;
}
-
-static unsigned long
-#ifdef __STDC__
-_uccase_lookup(unsigned long code, long l, long r, int field)
-#else
-_uccase_lookup(code, l, r, field)
-unsigned long code;
-long l, r;
-int field;
#endif
+
+static ac_uint4
+_uccase_lookup(ac_uint4 code, long l, long r, int field)
{
long m;
+ const ac_uint4 *tmp;
/*
* Do the binary search.
@@ -405,25 +408,20 @@ int field;
* the beginning of a case mapping triple.
*/
m = (l + r) >> 1;
- m -= (m % 3);
- if (code > _uccase_map[m])
- l = m + 3;
- else if (code < _uccase_map[m])
- r = m - 3;
- else if (code == _uccase_map[m])
- return _uccase_map[m + field];
+ tmp = &_uccase_map[m*3];
+ if (code > *tmp)
+ l = m + 1;
+ else if (code < *tmp)
+ r = m - 1;
+ else if (code == *tmp)
+ return tmp[field];
}
return code;
}
-unsigned long
-#ifdef __STDC__
-uctoupper(unsigned long code)
-#else
-uctoupper(code)
-unsigned long code;
-#endif
+ac_uint4
+uctoupper(ac_uint4 code)
{
int field;
long l, r;
@@ -437,25 +435,20 @@ unsigned long code;
*/
field = 2;
l = _uccase_len[0];
- r = (l + _uccase_len[1]) - 3;
+ r = (l + _uccase_len[1]) - 1;
} else {
/*
* The character is title case.
*/
field = 1;
l = _uccase_len[0] + _uccase_len[1];
- r = _uccase_size - 3;
+ r = _uccase_size - 1;
}
return _uccase_lookup(code, l, r, field);
}
-unsigned long
-#ifdef __STDC__
-uctolower(unsigned long code)
-#else
-uctolower(code)
-unsigned long code;
-#endif
+ac_uint4
+uctolower(ac_uint4 code)
{
int field;
long l, r;
@@ -469,25 +462,20 @@ unsigned long code;
*/
field = 1;
l = 0;
- r = _uccase_len[0] - 3;
+ r = _uccase_len[0] - 1;
} else {
/*
* The character is title case.
*/
field = 2;
l = _uccase_len[0] + _uccase_len[1];
- r = _uccase_size - 3;
+ r = _uccase_size - 1;
}
return _uccase_lookup(code, l, r, field);
}
-unsigned long
-#ifdef __STDC__
-uctotitle(unsigned long code)
-#else
-uctotitle(code)
-unsigned long code;
-#endif
+ac_uint4
+uctotitle(ac_uint4 code)
{
int field;
long l, r;
@@ -505,53 +493,249 @@ unsigned long code;
* The character is upper case.
*/
l = 0;
- r = _uccase_len[0] - 3;
+ r = _uccase_len[0] - 1;
} else {
/*
* The character is lower case.
*/
l = _uccase_len[0];
- r = (l + _uccase_len[1]) - 3;
+ r = (l + _uccase_len[1]) - 1;
}
return _uccase_lookup(code, l, r, field);
}
/**************************************************************************
*
- * Support for decompositions.
+ * Support for compositions.
*
**************************************************************************/
-static unsigned long _ucdcmp_size;
-static unsigned long *_ucdcmp_nodes;
-static unsigned long *_ucdcmp_decomp;
+#if !HARDCODE_DATA
+
+static ac_uint4 _uccomp_size;
+static ac_uint4 *_uccomp_data;
+
+/*
+ * Return -1 on error, 0 if okay
+ */
+static int
+_uccomp_load(char *paths, int reload)
+{
+ FILE *in;
+ ac_uint4 size, i;
+ _ucheader_t hdr;
+
+ if (_uccomp_size > 0) {
+ if (!reload)
+ /*
+ * The compositions have already been loaded.
+ */
+ return 0;
+
+ free((char *) _uccomp_data);
+ _uccomp_size = 0;
+ }
+
+ if ((in = _ucopenfile(paths, "comp.dat", "rb")) == 0)
+ return -1;
+
+ /*
+ * Load the header.
+ */
+ fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
+
+ if (hdr.bom == 0xfffe) {
+ hdr.cnt = endian_short(hdr.cnt);
+ hdr.size.bytes = endian_long(hdr.size.bytes);
+ }
+
+ _uccomp_size = hdr.cnt;
+ _uccomp_data = (ac_uint4 *) malloc(hdr.size.bytes);
+
+ /*
+ * Read the composition data in.
+ */
+ size = hdr.size.bytes / sizeof(ac_uint4);
+ fread((char *) _uccomp_data, sizeof(ac_uint4), size, in);
+
+ /*
+ * Do an endian swap if necessary.
+ */
+ if (hdr.bom == 0xfffe) {
+ for (i = 0; i < size; i++)
+ _uccomp_data[i] = endian_long(_uccomp_data[i]);
+ }
+
+ /*
+ * Assume that the data is ordered on count, so that all compositions
+ * of length 2 come first. Only handling length 2 for now.
+ */
+ for (i = 1; i < size; i += 4)
+ if (_uccomp_data[i] != 2)
+ break;
+ _uccomp_size = i - 1;
+
+ fclose(in);
+ return 0;
+}
static void
-#ifdef __STDC__
-_ucdcmp_load(char *paths, int reload)
-#else
-_ucdcmp_load(paths, reload)
-char *paths;
-int reload;
+_uccomp_unload(void)
+{
+ if (_uccomp_size == 0)
+ return;
+
+ free((char *) _uccomp_data);
+ _uccomp_size = 0;
+}
#endif
+
+int
+uccomp(ac_uint4 node1, ac_uint4 node2, ac_uint4 *comp)
+{
+ int l, r, m;
+
+ l = 0;
+ r = _uccomp_size - 1;
+
+ while (l <= r) {
+ m = ((r + l) >> 1);
+ m -= m & 3;
+ if (node1 > _uccomp_data[m+2])
+ l = m + 4;
+ else if (node1 < _uccomp_data[m+2])
+ r = m - 4;
+ else if (node2 > _uccomp_data[m+3])
+ l = m + 4;
+ else if (node2 < _uccomp_data[m+3])
+ r = m - 4;
+ else {
+ *comp = _uccomp_data[m];
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int
+uccomp_hangul(ac_uint4 *str, int len)
+{
+ const int SBase = 0xAC00, LBase = 0x1100,
+ VBase = 0x1161, TBase = 0x11A7,
+ LCount = 19, VCount = 21, TCount = 28,
+ NCount = VCount * TCount, /* 588 */
+ SCount = LCount * NCount; /* 11172 */
+
+ int i, rlen;
+ ac_uint4 ch, last, lindex, sindex;
+
+ last = str[0];
+ rlen = 1;
+ for ( i = 1; i < len; i++ ) {
+ ch = str[i];
+
+ /* check if two current characters are L and V */
+ lindex = last - LBase;
+ if (lindex < (ac_uint4) LCount) {
+ ac_uint4 vindex = ch - VBase;
+ if (vindex < (ac_uint4) VCount) {
+ /* make syllable of form LV */
+ last = SBase + (lindex * VCount + vindex) * TCount;
+ str[rlen-1] = last; /* reset last */
+ continue;
+ }
+ }
+
+ /* check if two current characters are LV and T */
+ sindex = last - SBase;
+ if (sindex < (ac_uint4) SCount
+ && (sindex % TCount) == 0)
+ {
+ ac_uint4 tindex = ch - TBase;
+ if (tindex <= (ac_uint4) TCount) {
+ /* make syllable of form LVT */
+ last += tindex;
+ str[rlen-1] = last; /* reset last */
+ continue;
+ }
+ }
+
+ /* if neither case was true, just add the character */
+ last = ch;
+ str[rlen] = ch;
+ rlen++;
+ }
+ return rlen;
+}
+
+int
+uccanoncomp(ac_uint4 *str, int len)
+{
+ int i, stpos, copos;
+ ac_uint4 cl, prevcl, st, ch, co;
+
+ st = str[0];
+ stpos = 0;
+ copos = 1;
+ prevcl = uccombining_class(st) == 0 ? 0 : 256;
+
+ for (i = 1; i < len; i++) {
+ ch = str[i];
+ cl = uccombining_class(ch);
+ if (uccomp(st, ch, &co) && (prevcl < cl || prevcl == 0))
+ st = str[stpos] = co;
+ else {
+ if (cl == 0) {
+ stpos = copos;
+ st = ch;
+ }
+ prevcl = cl;
+ str[copos++] = ch;
+ }
+ }
+
+ return uccomp_hangul(str, copos);
+}
+
+/**************************************************************************
+ *
+ * Support for decompositions.
+ *
+ **************************************************************************/
+
+#if !HARDCODE_DATA
+
+static ac_uint4 _ucdcmp_size;
+static ac_uint4 *_ucdcmp_nodes;
+static ac_uint4 *_ucdcmp_decomp;
+
+static ac_uint4 _uckdcmp_size;
+static ac_uint4 *_uckdcmp_nodes;
+static ac_uint4 *_uckdcmp_decomp;
+
+/*
+ * Return -1 on error, 0 if okay
+ */
+static int
+_ucdcmp_load(char *paths, int reload)
{
FILE *in;
- unsigned long size, i;
+ ac_uint4 size, i;
_ucheader_t hdr;
if (_ucdcmp_size > 0) {
if (!reload)
- /*
- * The decompositions have already been loaded.
- */
- return;
+ /*
+ * The decompositions have already been loaded.
+ */
+ return 0;
free((char *) _ucdcmp_nodes);
_ucdcmp_size = 0;
}
if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0)
- return;
+ return -1;
/*
* Load the header.
@@ -564,30 +748,83 @@ int reload;
}
_ucdcmp_size = hdr.cnt << 1;
- _ucdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes);
+ _ucdcmp_nodes = (ac_uint4 *) malloc(hdr.size.bytes);
_ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1);
/*
* Read the decomposition data in.
*/
- size = hdr.size.bytes / sizeof(unsigned long);
- fread((char *) _ucdcmp_nodes, sizeof(unsigned long), size, in);
+ size = hdr.size.bytes / sizeof(ac_uint4);
+ fread((char *) _ucdcmp_nodes, sizeof(ac_uint4), size, in);
/*
* Do an endian swap if necessary.
*/
if (hdr.bom == 0xfffe) {
for (i = 0; i < size; i++)
- _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]);
- }
+ _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]);
+ }
+ fclose(in);
+ return 0;
+}
+
+/*
+ * Return -1 on error, 0 if okay
+ */
+static int
+_uckdcmp_load(char *paths, int reload)
+{
+ FILE *in;
+ ac_uint4 size, i;
+ _ucheader_t hdr;
+
+ if (_uckdcmp_size > 0) {
+ if (!reload)
+ /*
+ * The decompositions have already been loaded.
+ */
+ return 0;
+
+ free((char *) _uckdcmp_nodes);
+ _uckdcmp_size = 0;
+ }
+
+ if ((in = _ucopenfile(paths, "kdecomp.dat", "rb")) == 0)
+ return -1;
+
+ /*
+ * Load the header.
+ */
+ fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
+
+ if (hdr.bom == 0xfffe) {
+ hdr.cnt = endian_short(hdr.cnt);
+ hdr.size.bytes = endian_long(hdr.size.bytes);
+ }
+
+ _uckdcmp_size = hdr.cnt << 1;
+ _uckdcmp_nodes = (ac_uint4 *) malloc(hdr.size.bytes);
+ _uckdcmp_decomp = _uckdcmp_nodes + (_uckdcmp_size + 1);
+
+ /*
+ * Read the decomposition data in.
+ */
+ size = hdr.size.bytes / sizeof(ac_uint4);
+ fread((char *) _uckdcmp_nodes, sizeof(ac_uint4), size, in);
+
+ /*
+ * Do an endian swap if necessary.
+ */
+ if (hdr.bom == 0xfffe) {
+ for (i = 0; i < size; i++)
+ _uckdcmp_nodes[i] = endian_long(_uckdcmp_nodes[i]);
+ }
+ fclose(in);
+ return 0;
}
static void
-#ifdef __STDC__
_ucdcmp_unload(void)
-#else
-_ucdcmp_unload()
-#endif
{
if (_ucdcmp_size == 0)
return;
@@ -600,16 +837,30 @@ _ucdcmp_unload()
_ucdcmp_size = 0;
}
-int
-#ifdef __STDC__
-ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
-#else
-ucdecomp(code, num, decomp)
-unsigned long code, *num, **decomp;
+static void
+_uckdcmp_unload(void)
+{
+ if (_uckdcmp_size == 0)
+ return;
+
+ /*
+ * Only need to free the offsets because the memory is allocated as a
+ * single block.
+ */
+ free((char *) _uckdcmp_nodes);
+ _uckdcmp_size = 0;
+}
#endif
+
+int
+ucdecomp(ac_uint4 code, ac_uint4 *num, ac_uint4 **decomp)
{
long l, r, m;
+ if (code < _ucdcmp_nodes[0]) {
+ return 0;
+ }
+
l = 0;
r = _ucdcmp_nodes[_ucdcmp_size] - 1;
@@ -626,7 +877,7 @@ unsigned long code, *num, **decomp;
r = m - 2;
else if (code == _ucdcmp_nodes[m]) {
*num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1];
- *decomp = &_ucdcmp_decomp[_ucdcmp_nodes[m + 1]];
+ *decomp = (ac_uint4*)&_ucdcmp_decomp[_ucdcmp_nodes[m + 1]];
return 1;
}
}
@@ -634,60 +885,169 @@ unsigned long code, *num, **decomp;
}
int
-#ifdef __STDC__
-ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[])
-#else
-ucdecomp_hangul(code, num, decomp)
-unsigned long code, *num, decomp[];
-#endif
+uckdecomp(ac_uint4 code, ac_uint4 *num, ac_uint4 **decomp)
+{
+ long l, r, m;
+
+ if (code < _uckdcmp_nodes[0]) {
+ return 0;
+ }
+
+ l = 0;
+ r = _uckdcmp_nodes[_uckdcmp_size] - 1;
+
+ while (l <= r) {
+ /*
+ * Determine a "mid" point and adjust to make sure the mid point is at
+ * the beginning of a code+offset pair.
+ */
+ m = (l + r) >> 1;
+ m -= (m & 1);
+ if (code > _uckdcmp_nodes[m])
+ l = m + 2;
+ else if (code < _uckdcmp_nodes[m])
+ r = m - 2;
+ else if (code == _uckdcmp_nodes[m]) {
+ *num = _uckdcmp_nodes[m + 3] - _uckdcmp_nodes[m + 1];
+ *decomp = (ac_uint4*)&_uckdcmp_decomp[_uckdcmp_nodes[m + 1]];
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int
+ucdecomp_hangul(ac_uint4 code, ac_uint4 *num, ac_uint4 decomp[])
{
if (!ucishangul(code))
return 0;
code -= 0xac00;
- decomp[0] = 0x1100 + (unsigned long) (code / 588);
- decomp[1] = 0x1161 + (unsigned long) ((code % 588) / 28);
- decomp[2] = 0x11a7 + (unsigned long) (code % 28);
+ decomp[0] = 0x1100 + (ac_uint4) (code / 588);
+ decomp[1] = 0x1161 + (ac_uint4) ((code % 588) / 28);
+ decomp[2] = 0x11a7 + (ac_uint4) (code % 28);
*num = (decomp[2] != 0x11a7) ? 3 : 2;
return 1;
}
+/* mode == 0 for canonical, mode == 1 for compatibility */
+static int
+uccanoncompatdecomp(const ac_uint4 *in, int inlen,
+ ac_uint4 **out, int *outlen, short mode, void *ctx)
+{
+ int l, size;
+ unsigned i, j, k;
+ ac_uint4 num, class, *decomp, hangdecomp[3];
+
+ size = inlen * 2;
+ *out = (ac_uint4 *) ber_memalloc_x(size * sizeof(**out), ctx);
+ if (*out == NULL)
+ return *outlen = -1;
+
+ i = 0;
+ for (j = 0; j < (unsigned) inlen; j++) {
+ if (mode ? uckdecomp(in[j], &num, &decomp) : ucdecomp(in[j], &num, &decomp)) {
+ if ( size - i < num) {
+ size = inlen + i - j + num - 1;
+ *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx );
+ if (*out == NULL)
+ return *outlen = -1;
+ }
+ for (k = 0; k < num; k++) {
+ class = uccombining_class(decomp[k]);
+ if (class == 0) {
+ (*out)[i] = decomp[k];
+ } else {
+ for (l = i; l > 0; l--)
+ if (class >= uccombining_class((*out)[l-1]))
+ break;
+ AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out));
+ (*out)[l] = decomp[k];
+ }
+ i++;
+ }
+ } else if (ucdecomp_hangul(in[j], &num, hangdecomp)) {
+ if (size - i < num) {
+ size = inlen + i - j + num - 1;
+ *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx);
+ if (*out == NULL)
+ return *outlen = -1;
+ }
+ for (k = 0; k < num; k++) {
+ (*out)[i] = hangdecomp[k];
+ i++;
+ }
+ } else {
+ if (size - i < 1) {
+ size = inlen + i - j;
+ *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx);
+ if (*out == NULL)
+ return *outlen = -1;
+ }
+ class = uccombining_class(in[j]);
+ if (class == 0) {
+ (*out)[i] = in[j];
+ } else {
+ for (l = i; l > 0; l--)
+ if (class >= uccombining_class((*out)[l-1]))
+ break;
+ AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out));
+ (*out)[l] = in[j];
+ }
+ i++;
+ }
+ }
+ return *outlen = i;
+}
+
+int
+uccanondecomp(const ac_uint4 *in, int inlen,
+ ac_uint4 **out, int *outlen, void *ctx)
+{
+ return uccanoncompatdecomp(in, inlen, out, outlen, 0, ctx);
+}
+
+int
+uccompatdecomp(const ac_uint4 *in, int inlen,
+ ac_uint4 **out, int *outlen, void *ctx)
+{
+ return uccanoncompatdecomp(in, inlen, out, outlen, 1, ctx);
+}
+
/**************************************************************************
*
* Support for combining classes.
*
**************************************************************************/
-static unsigned long _uccmcl_size;
-static unsigned long *_uccmcl_nodes;
+#if !HARDCODE_DATA
+static ac_uint4 _uccmcl_size;
+static ac_uint4 *_uccmcl_nodes;
-static void
-#ifdef __STDC__
+/*
+ * Return -1 on error, 0 if okay
+ */
+static int
_uccmcl_load(char *paths, int reload)
-#else
-_uccmcl_load(paths, reload)
-char *paths;
-int reload;
-#endif
{
FILE *in;
- unsigned long i;
+ ac_uint4 i;
_ucheader_t hdr;
if (_uccmcl_size > 0) {
if (!reload)
- /*
- * The combining classes have already been loaded.
- */
- return;
+ /*
+ * The combining classes have already been loaded.
+ */
+ return 0;
free((char *) _uccmcl_nodes);
_uccmcl_size = 0;
}
if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0)
- return;
+ return -1;
/*
* Load the header.
@@ -700,28 +1060,26 @@ int reload;
}
_uccmcl_size = hdr.cnt * 3;
- _uccmcl_nodes = (unsigned long *) malloc(hdr.size.bytes);
+ _uccmcl_nodes = (ac_uint4 *) malloc(hdr.size.bytes);
/*
* Read the combining classes in.
*/
- fread((char *) _uccmcl_nodes, sizeof(unsigned long), _uccmcl_size, in);
+ fread((char *) _uccmcl_nodes, sizeof(ac_uint4), _uccmcl_size, in);
/*
* Do an endian swap if necessary.
*/
if (hdr.bom == 0xfffe) {
for (i = 0; i < _uccmcl_size; i++)
- _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]);
- }
+ _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]);
+ }
+ fclose(in);
+ return 0;
}
static void
-#ifdef __STDC__
_uccmcl_unload(void)
-#else
-_uccmcl_unload()
-#endif
{
if (_uccmcl_size == 0)
return;
@@ -729,14 +1087,10 @@ _uccmcl_unload()
free((char *) _uccmcl_nodes);
_uccmcl_size = 0;
}
-
-unsigned long
-#ifdef __STDC__
-uccombining_class(unsigned long code)
-#else
-uccombining_class(code)
-unsigned long code;
#endif
+
+ac_uint4
+uccombining_class(ac_uint4 code)
{
long l, r, m;
@@ -762,21 +1116,19 @@ unsigned long code;
*
**************************************************************************/
-static unsigned long *_ucnum_nodes;
-static unsigned long _ucnum_size;
+#if !HARDCODE_DATA
+static ac_uint4 *_ucnum_nodes;
+static ac_uint4 _ucnum_size;
static short *_ucnum_vals;
-static void
-#ifdef __STDC__
+/*
+ * Return -1 on error, 0 if okay
+ */
+static int
_ucnumb_load(char *paths, int reload)
-#else
-_ucnumb_load(paths, reload)
-char *paths;
-int reload;
-#endif
{
FILE *in;
- unsigned long size, i;
+ ac_uint4 size, i;
_ucheader_t hdr;
if (_ucnum_size > 0) {
@@ -784,14 +1136,14 @@ int reload;
/*
* The numbers have already been loaded.
*/
- return;
+ return 0;
free((char *) _ucnum_nodes);
_ucnum_size = 0;
}
if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0)
- return;
+ return -1;
/*
* Load the header.
@@ -804,7 +1156,7 @@ int reload;
}
_ucnum_size = hdr.cnt;
- _ucnum_nodes = (unsigned long *) malloc(hdr.size.bytes);
+ _ucnum_nodes = (ac_uint4 *) malloc(hdr.size.bytes);
_ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size);
/*
@@ -823,20 +1175,18 @@ int reload;
* Determine the number of values that have to be adjusted.
*/
size = (hdr.size.bytes -
- (_ucnum_size * (sizeof(unsigned long) << 1))) /
+ (_ucnum_size * (sizeof(ac_uint4) << 1))) /
sizeof(short);
for (i = 0; i < size; i++)
_ucnum_vals[i] = endian_short(_ucnum_vals[i]);
- }
+ }
+ fclose(in);
+ return 0;
}
static void
-#ifdef __STDC__
_ucnumb_unload(void)
-#else
-_ucnumb_unload()
-#endif
{
if (_ucnum_size == 0)
return;
@@ -844,15 +1194,10 @@ _ucnumb_unload()
free((char *) _ucnum_nodes);
_ucnum_size = 0;
}
+#endif
int
-#ifdef __STDC__
-ucnumber_lookup(unsigned long code, struct ucnumber *num)
-#else
-ucnumber_lookup(code, num)
-unsigned long code;
-struct ucnumber *num;
-#endif
+ucnumber_lookup(ac_uint4 code, struct ucnumber *num)
{
long l, r, m;
short *vp;
@@ -871,7 +1216,7 @@ struct ucnumber *num;
else if (code < _ucnum_nodes[m])
r = m - 2;
else {
- vp = _ucnum_vals + _ucnum_nodes[m + 1];
+ vp = (short *)_ucnum_vals + _ucnum_nodes[m + 1];
num->numerator = (int) *vp++;
num->denominator = (int) *vp;
return 1;
@@ -881,13 +1226,7 @@ struct ucnumber *num;
}
int
-#ifdef __STDC__
-ucdigit_lookup(unsigned long code, int *digit)
-#else
-ucdigit_lookup(code, digit)
-unsigned long code;
-int *digit;
-#endif
+ucdigit_lookup(ac_uint4 code, int *digit)
{
long l, r, m;
short *vp;
@@ -906,7 +1245,7 @@ int *digit;
else if (code < _ucnum_nodes[m])
r = m - 2;
else {
- vp = _ucnum_vals + _ucnum_nodes[m + 1];
+ vp = (short *)_ucnum_vals + _ucnum_nodes[m + 1];
if (*vp == *(vp + 1)) {
*digit = *vp;
return 1;
@@ -918,12 +1257,7 @@ int *digit;
}
struct ucnumber
-#ifdef __STDC__
-ucgetnumber(unsigned long code)
-#else
-ucgetnumber(code)
-unsigned long code;
-#endif
+ucgetnumber(ac_uint4 code)
{
struct ucnumber num;
@@ -940,12 +1274,7 @@ unsigned long code;
}
int
-#ifdef __STDC__
-ucgetdigit(unsigned long code)
-#else
-ucgetdigit(code)
-unsigned long code;
-#endif
+ucgetdigit(ac_uint4 code)
{
int dig;
@@ -967,34 +1296,39 @@ unsigned long code;
*
**************************************************************************/
-void
-#ifdef __STDC__
-ucdata_load(char *paths, int masks)
+#if HARDCODE_DATA
+int ucdata_load(char *paths, int masks) { return 0; }
+void ucdata_unload(int masks) { }
+int ucdata_reload(char *paths, int masks) { return 0; }
#else
-ucdata_load(paths, masks)
-char *paths;
-int masks;
-#endif
+/*
+ * Return 0 if okay, negative on error
+ */
+int
+ucdata_load(char *paths, int masks)
{
+ int error = 0;
+
if (masks & UCDATA_CTYPE)
- _ucprop_load(paths, 0);
+ error |= _ucprop_load(paths, 0) < 0 ? UCDATA_CTYPE : 0;
if (masks & UCDATA_CASE)
- _uccase_load(paths, 0);
+ error |= _uccase_load(paths, 0) < 0 ? UCDATA_CASE : 0;
if (masks & UCDATA_DECOMP)
- _ucdcmp_load(paths, 0);
+ error |= _ucdcmp_load(paths, 0) < 0 ? UCDATA_DECOMP : 0;
if (masks & UCDATA_CMBCL)
- _uccmcl_load(paths, 0);
+ error |= _uccmcl_load(paths, 0) < 0 ? UCDATA_CMBCL : 0;
if (masks & UCDATA_NUM)
- _ucnumb_load(paths, 0);
+ error |= _ucnumb_load(paths, 0) < 0 ? UCDATA_NUM : 0;
+ if (masks & UCDATA_COMP)
+ error |= _uccomp_load(paths, 0) < 0 ? UCDATA_COMP : 0;
+ if (masks & UCDATA_KDECOMP)
+ error |= _uckdcmp_load(paths, 0) < 0 ? UCDATA_KDECOMP : 0;
+
+ return -error;
}
void
-#ifdef __STDC__
ucdata_unload(int masks)
-#else
-ucdata_unload(masks)
-int masks;
-#endif
{
if (masks & UCDATA_CTYPE)
_ucprop_unload();
@@ -1006,43 +1340,49 @@ int masks;
_uccmcl_unload();
if (masks & UCDATA_NUM)
_ucnumb_unload();
+ if (masks & UCDATA_COMP)
+ _uccomp_unload();
+ if (masks & UCDATA_KDECOMP)
+ _uckdcmp_unload();
}
-void
-#ifdef __STDC__
+/*
+ * Return 0 if okay, negative on error
+ */
+int
ucdata_reload(char *paths, int masks)
-#else
-ucdata_reload(paths, masks)
-char *paths;
-int masks;
-#endif
{
+ int error = 0;
+
if (masks & UCDATA_CTYPE)
- _ucprop_load(paths, 1);
+ error |= _ucprop_load(paths, 1) < 0 ? UCDATA_CTYPE : 0;
if (masks & UCDATA_CASE)
- _uccase_load(paths, 1);
+ error |= _uccase_load(paths, 1) < 0 ? UCDATA_CASE : 0;
if (masks & UCDATA_DECOMP)
- _ucdcmp_load(paths, 1);
+ error |= _ucdcmp_load(paths, 1) < 0 ? UCDATA_DECOMP : 0;
if (masks & UCDATA_CMBCL)
- _uccmcl_load(paths, 1);
+ error |= _uccmcl_load(paths, 1) < 0 ? UCDATA_CMBCL : 0;
if (masks & UCDATA_NUM)
- _ucnumb_load(paths, 1);
+ error |= _ucnumb_load(paths, 1) < 0 ? UCDATA_NUM : 0;
+ if (masks & UCDATA_COMP)
+ error |= _uccomp_load(paths, 1) < 0 ? UCDATA_COMP : 0;
+ if (masks & UCDATA_KDECOMP)
+ error |= _uckdcmp_load(paths, 1) < 0 ? UCDATA_KDECOMP : 0;
+
+ return -error;
}
+#endif
#ifdef TEST
void
-#ifdef __STDC__
main(void)
-#else
-main()
-#endif
{
int dig;
- unsigned long i, lo, *dec;
+ ac_uint4 i, lo, *dec;
struct ucnumber num;
- ucdata_setup(".");
+/* ucdata_setup("."); */
if (ucisweak(0x30))
printf("WEAK\n");
@@ -1096,7 +1436,7 @@ main()
printf("0x10000 NOT DEFINED\n");
if (ucnumber_lookup(0x30, &num)) {
- if (num.numerator != num.denominator)
+ if (num.denominator != 1)
printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
else
printf("UCNUMBER: 0x30 = %d\n", num.numerator);
@@ -1104,7 +1444,7 @@ main()
printf("UCNUMBER: 0x30 NOT A NUMBER\n");
if (ucnumber_lookup(0xbc, &num)) {
- if (num.numerator != num.denominator)
+ if (num.denominator != 1)
printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
else
printf("UCNUMBER: 0xbc = %d\n", num.numerator);
@@ -1113,7 +1453,7 @@ main()
if (ucnumber_lookup(0xff19, &num)) {
- if (num.numerator != num.denominator)
+ if (num.denominator != 1)
printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
else
printf("UCNUMBER: 0xff19 = %d\n", num.numerator);
@@ -1121,7 +1461,7 @@ main()
printf("UCNUMBER: 0xff19 NOT A NUMBER\n");
if (ucnumber_lookup(0x4e00, &num)) {
- if (num.numerator != num.denominator)
+ if (num.denominator != 1)
printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator);
else
printf("UCNUMBER: 0x4e00 = %d\n", num.numerator);
@@ -1137,24 +1477,24 @@ main()
printf("UCGETDIGIT: 0x969 = %d\n", dig);
num = ucgetnumber(0x30);
- if (num.numerator != num.denominator)
+ if (num.denominator != 1)
printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
else
printf("UCGETNUMBER: 0x30 = %d\n", num.numerator);
num = ucgetnumber(0xbc);
- if (num.numerator != num.denominator)
+ if (num.denominator != 1)
printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
else
printf("UCGETNUMBER: 0xbc = %d\n", num.numerator);
num = ucgetnumber(0xff19);
- if (num.numerator != num.denominator)
+ if (num.denominator != 1)
printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
else
printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator);
- ucdata_cleanup();
+/* ucdata_cleanup(); */
exit(0);
}