X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=libraries%2Fliblunicode%2Fucdata%2Fucdata.c;h=c5285a7e858352a11d858cfae763883c432e3c7e;hb=255895125148704f4219868b295d6f69dfdcd68a;hp=29f97bd70816b7760a7b29e408917db8a827d066;hpb=fe98d9fa7b313ffe51f09ea175e5126200793bcd;p=openldap diff --git a/libraries/liblunicode/ucdata/ucdata.c b/libraries/liblunicode/ucdata/ucdata.c index 29f97bd708..c5285a7e85 100644 --- a/libraries/liblunicode/ucdata/ucdata.c +++ b/libraries/liblunicode/ucdata/ucdata.c @@ -1,5 +1,18 @@ -/* - * Copyright 1999 Computing Research Labs, New Mexico State University +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 1998-2009 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ +/* Copyright 2001 Computing Research Labs, New Mexico State University * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,23 +32,29 @@ * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef lint -#ifdef __GNUC__ -static char rcsid[] __attribute__ ((unused)) = "$Id: ucdata.c,v 1.3 1999/08/23 16:14:09 mleisher Exp $"; -#else -static char rcsid[] = "$Id: ucdata.c,v 1.3 1999/08/23 16:14:09 mleisher Exp $"; -#endif -#endif +/* $Id: ucdata.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */ + +#include "portable.h" +#include "ldap_config.h" #include -#include -#include -#ifndef WIN32 -#include -#endif +#include +#include +#include + +#include +#include "lber_pvt.h" #include "ucdata.h" +#ifndef HARDCODE_DATA +#define HARDCODE_DATA 1 +#endif + +#if HARDCODE_DATA +#include "uctable.h" +#endif + /************************************************************************** * * Miscellaneous types, data, and support functions. @@ -43,37 +62,35 @@ static char rcsid[] = "$Id: ucdata.c,v 1.3 1999/08/23 16:14:09 mleisher Exp $"; **************************************************************************/ typedef struct { - unsigned short bom; - unsigned short cnt; + ac_uint2 bom; + ac_uint2 cnt; union { - unsigned long bytes; - unsigned short len[2]; + ac_uint4 bytes; + ac_uint2 len[2]; } size; } _ucheader_t; /* * A simple array of 32-bit masks for lookup. */ -static unsigned long masks32[32] = { - 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020, - 0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800, - 0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000, - 0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000, - 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, - 0x40000000, 0x80000000 +static ac_uint4 masks32[32] = { + 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL, + 0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL, + 0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL, + 0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL, + 0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL, + 0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL, + 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL, + 0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL }; #define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8)) #define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\ ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24)) +#if !HARDCODE_DATA static FILE * -#ifdef __STDC__ _ucopenfile(char *paths, char *filename, char *mode) -#else -_ucopenfile(paths, filename, mode) -char *paths, *filename, *mode; -#endif { FILE *f; char *fp, *dp, *pp, path[BUFSIZ]; @@ -86,7 +103,7 @@ char *paths, *filename, *mode; pp = path; while (*dp && *dp != ':') *pp++ = *dp++; - *pp++ = '/'; + *pp++ = *LDAP_DIRSEP; fp = filename; while (*fp) @@ -102,6 +119,7 @@ char *paths, *filename, *mode; return 0; } +#endif /************************************************************************** * @@ -109,21 +127,20 @@ char *paths, *filename, *mode; * **************************************************************************/ -static unsigned long _ucprop_size; -static unsigned short *_ucprop_offsets; -static unsigned long *_ucprop_ranges; +#if !HARDCODE_DATA -static void -#ifdef __STDC__ +static ac_uint4 _ucprop_size; +static ac_uint2 *_ucprop_offsets; +static ac_uint4 *_ucprop_ranges; + +/* + * Return -1 on error, 0 if okay + */ +static int _ucprop_load(char *paths, int reload) -#else -_ucprop_load(paths, reload) -char *paths; -int reload; -#endif { FILE *in; - unsigned long size, i; + ac_uint4 size, i; _ucheader_t hdr; if (_ucprop_size > 0) { @@ -131,7 +148,7 @@ int reload; /* * The character properties have already been loaded. */ - return; + return 0; /* * Unload the current character property data in preparation for @@ -144,7 +161,7 @@ int reload; } if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0) - return; + return -1; /* * Load the header. @@ -158,13 +175,13 @@ int reload; if ((_ucprop_size = hdr.cnt) == 0) { fclose(in); - return; + return -1; } /* * Allocate all the storage needed for the lookup table. */ - _ucprop_offsets = (unsigned short *) malloc(hdr.size.bytes); + _ucprop_offsets = (ac_uint2 *) malloc(hdr.size.bytes); /* * Calculate the offset into the storage for the ranges. The offsets @@ -172,15 +189,15 @@ int reload; * the header count field. This means the offset to the ranges must be * calculated after aligning the count to a 4-byte boundary. */ - if ((size = ((hdr.cnt + 1) * sizeof(unsigned short))) & 3) + if ((size = ((hdr.cnt + 1) * sizeof(ac_uint2))) & 3) size += 4 - (size & 3); size >>= 1; - _ucprop_ranges = (unsigned long *) (_ucprop_offsets + size); + _ucprop_ranges = (ac_uint4 *) (_ucprop_offsets + size); /* * Load the offset array. */ - fread((char *) _ucprop_offsets, sizeof(unsigned short), size, in); + fread((char *) _ucprop_offsets, sizeof(ac_uint2), size, in); /* * Do an endian swap if necessary. Don't forget there is an extra node on @@ -195,7 +212,7 @@ int reload; * Load the ranges. The number of elements is in the last array position * of the offsets. */ - fread((char *) _ucprop_ranges, sizeof(unsigned long), + fread((char *) _ucprop_ranges, sizeof(ac_uint4), _ucprop_offsets[_ucprop_size], in); fclose(in); @@ -207,14 +224,11 @@ int reload; for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++) _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]); } + return 0; } static void -#ifdef __STDC__ _ucprop_unload(void) -#else -_ucprop_unload() -#endif { if (_ucprop_size == 0) return; @@ -226,17 +240,16 @@ _ucprop_unload() free((char *) _ucprop_offsets); _ucprop_size = 0; } +#endif static int -#ifdef __STDC__ -_ucprop_lookup(unsigned long code, unsigned long n) -#else -_ucprop_lookup(code, n) -unsigned long code, n; -#endif +_ucprop_lookup(ac_uint4 code, ac_uint4 n) { long l, r, m; + if (_ucprop_size == 0) + return 0; + /* * There is an extra node on the end of the offsets to allow this routine * to work right. If the index is 0xffff, then there are no nodes for the @@ -272,14 +285,9 @@ unsigned long code, n; } int -#ifdef __STDC__ -ucisprop(unsigned long code, unsigned long mask1, unsigned long mask2) -#else -ucisprop(code, mask1, mask2) -unsigned long code, mask1, mask2; -#endif +ucisprop(ac_uint4 code, ac_uint4 mask1, ac_uint4 mask2) { - unsigned long i; + ac_uint4 i; if (mask1 == 0 && mask2 == 0) return 0; @@ -303,21 +311,23 @@ unsigned long code, mask1, mask2; * **************************************************************************/ -static unsigned long _uccase_size; -static unsigned short _uccase_len[2]; -static unsigned long *_uccase_map; +#if !HARDCODE_DATA -static void -#ifdef __STDC__ +/* These record the number of slots in the map. + * There are 3 words per slot. + */ +static ac_uint4 _uccase_size; +static ac_uint2 _uccase_len[2]; +static ac_uint4 *_uccase_map; + +/* + * Return -1 on error, 0 if okay + */ +static int _uccase_load(char *paths, int reload) -#else -_uccase_load(paths, reload) -char *paths; -int reload; -#endif { FILE *in; - unsigned long i; + ac_uint4 i; _ucheader_t hdr; if (_uccase_size > 0) { @@ -325,14 +335,14 @@ int reload; /* * The case mappings have already been loaded. */ - return; + return 0; free((char *) _uccase_map); _uccase_size = 0; } if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0) - return; + return -1; /* * Load the header. @@ -349,33 +359,31 @@ int reload; * Set the node count and lengths of the upper and lower case mapping * tables. */ - _uccase_size = hdr.cnt * 3; - _uccase_len[0] = hdr.size.len[0] * 3; - _uccase_len[1] = hdr.size.len[1] * 3; + _uccase_size = hdr.cnt; + _uccase_len[0] = hdr.size.len[0]; + _uccase_len[1] = hdr.size.len[1]; - _uccase_map = (unsigned long *) - malloc(_uccase_size * sizeof(unsigned long)); + _uccase_map = (ac_uint4 *) + malloc(_uccase_size * 3 * sizeof(ac_uint4)); /* * Load the case mapping table. */ - fread((char *) _uccase_map, sizeof(unsigned long), _uccase_size, in); + fread((char *) _uccase_map, sizeof(ac_uint4), _uccase_size * 3, in); /* * Do an endian swap if necessary. */ if (hdr.bom == 0xfffe) { - for (i = 0; i < _uccase_size; i++) + for (i = 0; i < _uccase_size * 3; i++) _uccase_map[i] = endian_long(_uccase_map[i]); } + fclose(in); + return 0; } static void -#ifdef __STDC__ _uccase_unload(void) -#else -_uccase_unload() -#endif { if (_uccase_size == 0) return; @@ -383,18 +391,13 @@ _uccase_unload() free((char *) _uccase_map); _uccase_size = 0; } - -static unsigned long -#ifdef __STDC__ -_uccase_lookup(unsigned long code, long l, long r, int field) -#else -_uccase_lookup(code, l, r, field) -unsigned long code; -long l, r; -int field; #endif + +static ac_uint4 +_uccase_lookup(ac_uint4 code, long l, long r, int field) { long m; + const ac_uint4 *tmp; /* * Do the binary search. @@ -405,25 +408,20 @@ int field; * the beginning of a case mapping triple. */ m = (l + r) >> 1; - m -= (m % 3); - if (code > _uccase_map[m]) - l = m + 3; - else if (code < _uccase_map[m]) - r = m - 3; - else if (code == _uccase_map[m]) - return _uccase_map[m + field]; + tmp = &_uccase_map[m*3]; + if (code > *tmp) + l = m + 1; + else if (code < *tmp) + r = m - 1; + else if (code == *tmp) + return tmp[field]; } return code; } -unsigned long -#ifdef __STDC__ -uctoupper(unsigned long code) -#else -uctoupper(code) -unsigned long code; -#endif +ac_uint4 +uctoupper(ac_uint4 code) { int field; long l, r; @@ -437,25 +435,20 @@ unsigned long code; */ field = 2; l = _uccase_len[0]; - r = (l + _uccase_len[1]) - 3; + r = (l + _uccase_len[1]) - 1; } else { /* * The character is title case. */ field = 1; l = _uccase_len[0] + _uccase_len[1]; - r = _uccase_size - 3; + r = _uccase_size - 1; } return _uccase_lookup(code, l, r, field); } -unsigned long -#ifdef __STDC__ -uctolower(unsigned long code) -#else -uctolower(code) -unsigned long code; -#endif +ac_uint4 +uctolower(ac_uint4 code) { int field; long l, r; @@ -469,25 +462,20 @@ unsigned long code; */ field = 1; l = 0; - r = _uccase_len[0] - 3; + r = _uccase_len[0] - 1; } else { /* * The character is title case. */ field = 2; l = _uccase_len[0] + _uccase_len[1]; - r = _uccase_size - 3; + r = _uccase_size - 1; } return _uccase_lookup(code, l, r, field); } -unsigned long -#ifdef __STDC__ -uctotitle(unsigned long code) -#else -uctotitle(code) -unsigned long code; -#endif +ac_uint4 +uctotitle(ac_uint4 code) { int field; long l, r; @@ -505,53 +493,249 @@ unsigned long code; * The character is upper case. */ l = 0; - r = _uccase_len[0] - 3; + r = _uccase_len[0] - 1; } else { /* * The character is lower case. */ l = _uccase_len[0]; - r = (l + _uccase_len[1]) - 3; + r = (l + _uccase_len[1]) - 1; } return _uccase_lookup(code, l, r, field); } /************************************************************************** * - * Support for decompositions. + * Support for compositions. * **************************************************************************/ -static unsigned long _ucdcmp_size; -static unsigned long *_ucdcmp_nodes; -static unsigned long *_ucdcmp_decomp; +#if !HARDCODE_DATA + +static ac_uint4 _uccomp_size; +static ac_uint4 *_uccomp_data; + +/* + * Return -1 on error, 0 if okay + */ +static int +_uccomp_load(char *paths, int reload) +{ + FILE *in; + ac_uint4 size, i; + _ucheader_t hdr; + + if (_uccomp_size > 0) { + if (!reload) + /* + * The compositions have already been loaded. + */ + return 0; + + free((char *) _uccomp_data); + _uccomp_size = 0; + } + + if ((in = _ucopenfile(paths, "comp.dat", "rb")) == 0) + return -1; + + /* + * Load the header. + */ + fread((char *) &hdr, sizeof(_ucheader_t), 1, in); + + if (hdr.bom == 0xfffe) { + hdr.cnt = endian_short(hdr.cnt); + hdr.size.bytes = endian_long(hdr.size.bytes); + } + + _uccomp_size = hdr.cnt; + _uccomp_data = (ac_uint4 *) malloc(hdr.size.bytes); + + /* + * Read the composition data in. + */ + size = hdr.size.bytes / sizeof(ac_uint4); + fread((char *) _uccomp_data, sizeof(ac_uint4), size, in); + + /* + * Do an endian swap if necessary. + */ + if (hdr.bom == 0xfffe) { + for (i = 0; i < size; i++) + _uccomp_data[i] = endian_long(_uccomp_data[i]); + } + + /* + * Assume that the data is ordered on count, so that all compositions + * of length 2 come first. Only handling length 2 for now. + */ + for (i = 1; i < size; i += 4) + if (_uccomp_data[i] != 2) + break; + _uccomp_size = i - 1; + + fclose(in); + return 0; +} static void -#ifdef __STDC__ -_ucdcmp_load(char *paths, int reload) -#else -_ucdcmp_load(paths, reload) -char *paths; -int reload; +_uccomp_unload(void) +{ + if (_uccomp_size == 0) + return; + + free((char *) _uccomp_data); + _uccomp_size = 0; +} #endif + +int +uccomp(ac_uint4 node1, ac_uint4 node2, ac_uint4 *comp) +{ + int l, r, m; + + l = 0; + r = _uccomp_size - 1; + + while (l <= r) { + m = ((r + l) >> 1); + m -= m & 3; + if (node1 > _uccomp_data[m+2]) + l = m + 4; + else if (node1 < _uccomp_data[m+2]) + r = m - 4; + else if (node2 > _uccomp_data[m+3]) + l = m + 4; + else if (node2 < _uccomp_data[m+3]) + r = m - 4; + else { + *comp = _uccomp_data[m]; + return 1; + } + } + return 0; +} + +int +uccomp_hangul(ac_uint4 *str, int len) +{ + const int SBase = 0xAC00, LBase = 0x1100, + VBase = 0x1161, TBase = 0x11A7, + LCount = 19, VCount = 21, TCount = 28, + NCount = VCount * TCount, /* 588 */ + SCount = LCount * NCount; /* 11172 */ + + int i, rlen; + ac_uint4 ch, last, lindex, sindex; + + last = str[0]; + rlen = 1; + for ( i = 1; i < len; i++ ) { + ch = str[i]; + + /* check if two current characters are L and V */ + lindex = last - LBase; + if (lindex < (ac_uint4) LCount) { + ac_uint4 vindex = ch - VBase; + if (vindex < (ac_uint4) VCount) { + /* make syllable of form LV */ + last = SBase + (lindex * VCount + vindex) * TCount; + str[rlen-1] = last; /* reset last */ + continue; + } + } + + /* check if two current characters are LV and T */ + sindex = last - SBase; + if (sindex < (ac_uint4) SCount + && (sindex % TCount) == 0) + { + ac_uint4 tindex = ch - TBase; + if (tindex <= (ac_uint4) TCount) { + /* make syllable of form LVT */ + last += tindex; + str[rlen-1] = last; /* reset last */ + continue; + } + } + + /* if neither case was true, just add the character */ + last = ch; + str[rlen] = ch; + rlen++; + } + return rlen; +} + +int +uccanoncomp(ac_uint4 *str, int len) +{ + int i, stpos, copos; + ac_uint4 cl, prevcl, st, ch, co; + + st = str[0]; + stpos = 0; + copos = 1; + prevcl = uccombining_class(st) == 0 ? 0 : 256; + + for (i = 1; i < len; i++) { + ch = str[i]; + cl = uccombining_class(ch); + if (uccomp(st, ch, &co) && (prevcl < cl || prevcl == 0)) + st = str[stpos] = co; + else { + if (cl == 0) { + stpos = copos; + st = ch; + } + prevcl = cl; + str[copos++] = ch; + } + } + + return uccomp_hangul(str, copos); +} + +/************************************************************************** + * + * Support for decompositions. + * + **************************************************************************/ + +#if !HARDCODE_DATA + +static ac_uint4 _ucdcmp_size; +static ac_uint4 *_ucdcmp_nodes; +static ac_uint4 *_ucdcmp_decomp; + +static ac_uint4 _uckdcmp_size; +static ac_uint4 *_uckdcmp_nodes; +static ac_uint4 *_uckdcmp_decomp; + +/* + * Return -1 on error, 0 if okay + */ +static int +_ucdcmp_load(char *paths, int reload) { FILE *in; - unsigned long size, i; + ac_uint4 size, i; _ucheader_t hdr; if (_ucdcmp_size > 0) { if (!reload) - /* - * The decompositions have already been loaded. - */ - return; + /* + * The decompositions have already been loaded. + */ + return 0; free((char *) _ucdcmp_nodes); _ucdcmp_size = 0; } if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0) - return; + return -1; /* * Load the header. @@ -564,30 +748,83 @@ int reload; } _ucdcmp_size = hdr.cnt << 1; - _ucdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes); + _ucdcmp_nodes = (ac_uint4 *) malloc(hdr.size.bytes); _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1); /* * Read the decomposition data in. */ - size = hdr.size.bytes / sizeof(unsigned long); - fread((char *) _ucdcmp_nodes, sizeof(unsigned long), size, in); + size = hdr.size.bytes / sizeof(ac_uint4); + fread((char *) _ucdcmp_nodes, sizeof(ac_uint4), size, in); /* * Do an endian swap if necessary. */ if (hdr.bom == 0xfffe) { for (i = 0; i < size; i++) - _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]); - } + _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]); + } + fclose(in); + return 0; +} + +/* + * Return -1 on error, 0 if okay + */ +static int +_uckdcmp_load(char *paths, int reload) +{ + FILE *in; + ac_uint4 size, i; + _ucheader_t hdr; + + if (_uckdcmp_size > 0) { + if (!reload) + /* + * The decompositions have already been loaded. + */ + return 0; + + free((char *) _uckdcmp_nodes); + _uckdcmp_size = 0; + } + + if ((in = _ucopenfile(paths, "kdecomp.dat", "rb")) == 0) + return -1; + + /* + * Load the header. + */ + fread((char *) &hdr, sizeof(_ucheader_t), 1, in); + + if (hdr.bom == 0xfffe) { + hdr.cnt = endian_short(hdr.cnt); + hdr.size.bytes = endian_long(hdr.size.bytes); + } + + _uckdcmp_size = hdr.cnt << 1; + _uckdcmp_nodes = (ac_uint4 *) malloc(hdr.size.bytes); + _uckdcmp_decomp = _uckdcmp_nodes + (_uckdcmp_size + 1); + + /* + * Read the decomposition data in. + */ + size = hdr.size.bytes / sizeof(ac_uint4); + fread((char *) _uckdcmp_nodes, sizeof(ac_uint4), size, in); + + /* + * Do an endian swap if necessary. + */ + if (hdr.bom == 0xfffe) { + for (i = 0; i < size; i++) + _uckdcmp_nodes[i] = endian_long(_uckdcmp_nodes[i]); + } + fclose(in); + return 0; } static void -#ifdef __STDC__ _ucdcmp_unload(void) -#else -_ucdcmp_unload() -#endif { if (_ucdcmp_size == 0) return; @@ -600,16 +837,30 @@ _ucdcmp_unload() _ucdcmp_size = 0; } -int -#ifdef __STDC__ -ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp) -#else -ucdecomp(code, num, decomp) -unsigned long code, *num, **decomp; +static void +_uckdcmp_unload(void) +{ + if (_uckdcmp_size == 0) + return; + + /* + * Only need to free the offsets because the memory is allocated as a + * single block. + */ + free((char *) _uckdcmp_nodes); + _uckdcmp_size = 0; +} #endif + +int +ucdecomp(ac_uint4 code, ac_uint4 *num, ac_uint4 **decomp) { long l, r, m; + if (code < _ucdcmp_nodes[0]) { + return 0; + } + l = 0; r = _ucdcmp_nodes[_ucdcmp_size] - 1; @@ -626,7 +877,7 @@ unsigned long code, *num, **decomp; r = m - 2; else if (code == _ucdcmp_nodes[m]) { *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1]; - *decomp = &_ucdcmp_decomp[_ucdcmp_nodes[m + 1]]; + *decomp = (ac_uint4*)&_ucdcmp_decomp[_ucdcmp_nodes[m + 1]]; return 1; } } @@ -634,60 +885,169 @@ unsigned long code, *num, **decomp; } int -#ifdef __STDC__ -ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[]) -#else -ucdecomp_hangul(code, num, decomp) -unsigned long code, *num, decomp[]; -#endif +uckdecomp(ac_uint4 code, ac_uint4 *num, ac_uint4 **decomp) +{ + long l, r, m; + + if (code < _uckdcmp_nodes[0]) { + return 0; + } + + l = 0; + r = _uckdcmp_nodes[_uckdcmp_size] - 1; + + while (l <= r) { + /* + * Determine a "mid" point and adjust to make sure the mid point is at + * the beginning of a code+offset pair. + */ + m = (l + r) >> 1; + m -= (m & 1); + if (code > _uckdcmp_nodes[m]) + l = m + 2; + else if (code < _uckdcmp_nodes[m]) + r = m - 2; + else if (code == _uckdcmp_nodes[m]) { + *num = _uckdcmp_nodes[m + 3] - _uckdcmp_nodes[m + 1]; + *decomp = (ac_uint4*)&_uckdcmp_decomp[_uckdcmp_nodes[m + 1]]; + return 1; + } + } + return 0; +} + +int +ucdecomp_hangul(ac_uint4 code, ac_uint4 *num, ac_uint4 decomp[]) { if (!ucishangul(code)) return 0; code -= 0xac00; - decomp[0] = 0x1100 + (unsigned long) (code / 588); - decomp[1] = 0x1161 + (unsigned long) ((code % 588) / 28); - decomp[2] = 0x11a7 + (unsigned long) (code % 28); + decomp[0] = 0x1100 + (ac_uint4) (code / 588); + decomp[1] = 0x1161 + (ac_uint4) ((code % 588) / 28); + decomp[2] = 0x11a7 + (ac_uint4) (code % 28); *num = (decomp[2] != 0x11a7) ? 3 : 2; return 1; } +/* mode == 0 for canonical, mode == 1 for compatibility */ +static int +uccanoncompatdecomp(const ac_uint4 *in, int inlen, + ac_uint4 **out, int *outlen, short mode, void *ctx) +{ + int l, size; + unsigned i, j, k; + ac_uint4 num, class, *decomp, hangdecomp[3]; + + size = inlen * 2; + *out = (ac_uint4 *) ber_memalloc_x(size * sizeof(**out), ctx); + if (*out == NULL) + return *outlen = -1; + + i = 0; + for (j = 0; j < (unsigned) inlen; j++) { + if (mode ? uckdecomp(in[j], &num, &decomp) : ucdecomp(in[j], &num, &decomp)) { + if ( size - i < num) { + size = inlen + i - j + num - 1; + *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx ); + if (*out == NULL) + return *outlen = -1; + } + for (k = 0; k < num; k++) { + class = uccombining_class(decomp[k]); + if (class == 0) { + (*out)[i] = decomp[k]; + } else { + for (l = i; l > 0; l--) + if (class >= uccombining_class((*out)[l-1])) + break; + AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out)); + (*out)[l] = decomp[k]; + } + i++; + } + } else if (ucdecomp_hangul(in[j], &num, hangdecomp)) { + if (size - i < num) { + size = inlen + i - j + num - 1; + *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx); + if (*out == NULL) + return *outlen = -1; + } + for (k = 0; k < num; k++) { + (*out)[i] = hangdecomp[k]; + i++; + } + } else { + if (size - i < 1) { + size = inlen + i - j; + *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx); + if (*out == NULL) + return *outlen = -1; + } + class = uccombining_class(in[j]); + if (class == 0) { + (*out)[i] = in[j]; + } else { + for (l = i; l > 0; l--) + if (class >= uccombining_class((*out)[l-1])) + break; + AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out)); + (*out)[l] = in[j]; + } + i++; + } + } + return *outlen = i; +} + +int +uccanondecomp(const ac_uint4 *in, int inlen, + ac_uint4 **out, int *outlen, void *ctx) +{ + return uccanoncompatdecomp(in, inlen, out, outlen, 0, ctx); +} + +int +uccompatdecomp(const ac_uint4 *in, int inlen, + ac_uint4 **out, int *outlen, void *ctx) +{ + return uccanoncompatdecomp(in, inlen, out, outlen, 1, ctx); +} + /************************************************************************** * * Support for combining classes. * **************************************************************************/ -static unsigned long _uccmcl_size; -static unsigned long *_uccmcl_nodes; +#if !HARDCODE_DATA +static ac_uint4 _uccmcl_size; +static ac_uint4 *_uccmcl_nodes; -static void -#ifdef __STDC__ +/* + * Return -1 on error, 0 if okay + */ +static int _uccmcl_load(char *paths, int reload) -#else -_uccmcl_load(paths, reload) -char *paths; -int reload; -#endif { FILE *in; - unsigned long i; + ac_uint4 i; _ucheader_t hdr; if (_uccmcl_size > 0) { if (!reload) - /* - * The combining classes have already been loaded. - */ - return; + /* + * The combining classes have already been loaded. + */ + return 0; free((char *) _uccmcl_nodes); _uccmcl_size = 0; } if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0) - return; + return -1; /* * Load the header. @@ -700,28 +1060,26 @@ int reload; } _uccmcl_size = hdr.cnt * 3; - _uccmcl_nodes = (unsigned long *) malloc(hdr.size.bytes); + _uccmcl_nodes = (ac_uint4 *) malloc(hdr.size.bytes); /* * Read the combining classes in. */ - fread((char *) _uccmcl_nodes, sizeof(unsigned long), _uccmcl_size, in); + fread((char *) _uccmcl_nodes, sizeof(ac_uint4), _uccmcl_size, in); /* * Do an endian swap if necessary. */ if (hdr.bom == 0xfffe) { for (i = 0; i < _uccmcl_size; i++) - _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]); - } + _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]); + } + fclose(in); + return 0; } static void -#ifdef __STDC__ _uccmcl_unload(void) -#else -_uccmcl_unload() -#endif { if (_uccmcl_size == 0) return; @@ -729,14 +1087,10 @@ _uccmcl_unload() free((char *) _uccmcl_nodes); _uccmcl_size = 0; } - -unsigned long -#ifdef __STDC__ -uccombining_class(unsigned long code) -#else -uccombining_class(code) -unsigned long code; #endif + +ac_uint4 +uccombining_class(ac_uint4 code) { long l, r, m; @@ -762,21 +1116,19 @@ unsigned long code; * **************************************************************************/ -static unsigned long *_ucnum_nodes; -static unsigned long _ucnum_size; +#if !HARDCODE_DATA +static ac_uint4 *_ucnum_nodes; +static ac_uint4 _ucnum_size; static short *_ucnum_vals; -static void -#ifdef __STDC__ +/* + * Return -1 on error, 0 if okay + */ +static int _ucnumb_load(char *paths, int reload) -#else -_ucnumb_load(paths, reload) -char *paths; -int reload; -#endif { FILE *in; - unsigned long size, i; + ac_uint4 size, i; _ucheader_t hdr; if (_ucnum_size > 0) { @@ -784,14 +1136,14 @@ int reload; /* * The numbers have already been loaded. */ - return; + return 0; free((char *) _ucnum_nodes); _ucnum_size = 0; } if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0) - return; + return -1; /* * Load the header. @@ -804,7 +1156,7 @@ int reload; } _ucnum_size = hdr.cnt; - _ucnum_nodes = (unsigned long *) malloc(hdr.size.bytes); + _ucnum_nodes = (ac_uint4 *) malloc(hdr.size.bytes); _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size); /* @@ -823,20 +1175,18 @@ int reload; * Determine the number of values that have to be adjusted. */ size = (hdr.size.bytes - - (_ucnum_size * (sizeof(unsigned long) << 1))) / + (_ucnum_size * (sizeof(ac_uint4) << 1))) / sizeof(short); for (i = 0; i < size; i++) _ucnum_vals[i] = endian_short(_ucnum_vals[i]); - } + } + fclose(in); + return 0; } static void -#ifdef __STDC__ _ucnumb_unload(void) -#else -_ucnumb_unload() -#endif { if (_ucnum_size == 0) return; @@ -844,15 +1194,10 @@ _ucnumb_unload() free((char *) _ucnum_nodes); _ucnum_size = 0; } +#endif int -#ifdef __STDC__ -ucnumber_lookup(unsigned long code, struct ucnumber *num) -#else -ucnumber_lookup(code, num) -unsigned long code; -struct ucnumber *num; -#endif +ucnumber_lookup(ac_uint4 code, struct ucnumber *num) { long l, r, m; short *vp; @@ -871,7 +1216,7 @@ struct ucnumber *num; else if (code < _ucnum_nodes[m]) r = m - 2; else { - vp = _ucnum_vals + _ucnum_nodes[m + 1]; + vp = (short *)_ucnum_vals + _ucnum_nodes[m + 1]; num->numerator = (int) *vp++; num->denominator = (int) *vp; return 1; @@ -881,13 +1226,7 @@ struct ucnumber *num; } int -#ifdef __STDC__ -ucdigit_lookup(unsigned long code, int *digit) -#else -ucdigit_lookup(code, digit) -unsigned long code; -int *digit; -#endif +ucdigit_lookup(ac_uint4 code, int *digit) { long l, r, m; short *vp; @@ -906,7 +1245,7 @@ int *digit; else if (code < _ucnum_nodes[m]) r = m - 2; else { - vp = _ucnum_vals + _ucnum_nodes[m + 1]; + vp = (short *)_ucnum_vals + _ucnum_nodes[m + 1]; if (*vp == *(vp + 1)) { *digit = *vp; return 1; @@ -918,12 +1257,7 @@ int *digit; } struct ucnumber -#ifdef __STDC__ -ucgetnumber(unsigned long code) -#else -ucgetnumber(code) -unsigned long code; -#endif +ucgetnumber(ac_uint4 code) { struct ucnumber num; @@ -940,12 +1274,7 @@ unsigned long code; } int -#ifdef __STDC__ -ucgetdigit(unsigned long code) -#else -ucgetdigit(code) -unsigned long code; -#endif +ucgetdigit(ac_uint4 code) { int dig; @@ -967,34 +1296,39 @@ unsigned long code; * **************************************************************************/ -void -#ifdef __STDC__ -ucdata_load(char *paths, int masks) +#if HARDCODE_DATA +int ucdata_load(char *paths, int masks) { return 0; } +void ucdata_unload(int masks) { } +int ucdata_reload(char *paths, int masks) { return 0; } #else -ucdata_load(paths, masks) -char *paths; -int masks; -#endif +/* + * Return 0 if okay, negative on error + */ +int +ucdata_load(char *paths, int masks) { + int error = 0; + if (masks & UCDATA_CTYPE) - _ucprop_load(paths, 0); + error |= _ucprop_load(paths, 0) < 0 ? UCDATA_CTYPE : 0; if (masks & UCDATA_CASE) - _uccase_load(paths, 0); + error |= _uccase_load(paths, 0) < 0 ? UCDATA_CASE : 0; if (masks & UCDATA_DECOMP) - _ucdcmp_load(paths, 0); + error |= _ucdcmp_load(paths, 0) < 0 ? UCDATA_DECOMP : 0; if (masks & UCDATA_CMBCL) - _uccmcl_load(paths, 0); + error |= _uccmcl_load(paths, 0) < 0 ? UCDATA_CMBCL : 0; if (masks & UCDATA_NUM) - _ucnumb_load(paths, 0); + error |= _ucnumb_load(paths, 0) < 0 ? UCDATA_NUM : 0; + if (masks & UCDATA_COMP) + error |= _uccomp_load(paths, 0) < 0 ? UCDATA_COMP : 0; + if (masks & UCDATA_KDECOMP) + error |= _uckdcmp_load(paths, 0) < 0 ? UCDATA_KDECOMP : 0; + + return -error; } void -#ifdef __STDC__ ucdata_unload(int masks) -#else -ucdata_unload(masks) -int masks; -#endif { if (masks & UCDATA_CTYPE) _ucprop_unload(); @@ -1006,43 +1340,49 @@ int masks; _uccmcl_unload(); if (masks & UCDATA_NUM) _ucnumb_unload(); + if (masks & UCDATA_COMP) + _uccomp_unload(); + if (masks & UCDATA_KDECOMP) + _uckdcmp_unload(); } -void -#ifdef __STDC__ +/* + * Return 0 if okay, negative on error + */ +int ucdata_reload(char *paths, int masks) -#else -ucdata_reload(paths, masks) -char *paths; -int masks; -#endif { + int error = 0; + if (masks & UCDATA_CTYPE) - _ucprop_load(paths, 1); + error |= _ucprop_load(paths, 1) < 0 ? UCDATA_CTYPE : 0; if (masks & UCDATA_CASE) - _uccase_load(paths, 1); + error |= _uccase_load(paths, 1) < 0 ? UCDATA_CASE : 0; if (masks & UCDATA_DECOMP) - _ucdcmp_load(paths, 1); + error |= _ucdcmp_load(paths, 1) < 0 ? UCDATA_DECOMP : 0; if (masks & UCDATA_CMBCL) - _uccmcl_load(paths, 1); + error |= _uccmcl_load(paths, 1) < 0 ? UCDATA_CMBCL : 0; if (masks & UCDATA_NUM) - _ucnumb_load(paths, 1); + error |= _ucnumb_load(paths, 1) < 0 ? UCDATA_NUM : 0; + if (masks & UCDATA_COMP) + error |= _uccomp_load(paths, 1) < 0 ? UCDATA_COMP : 0; + if (masks & UCDATA_KDECOMP) + error |= _uckdcmp_load(paths, 1) < 0 ? UCDATA_KDECOMP : 0; + + return -error; } +#endif #ifdef TEST void -#ifdef __STDC__ main(void) -#else -main() -#endif { int dig; - unsigned long i, lo, *dec; + ac_uint4 i, lo, *dec; struct ucnumber num; - ucdata_setup("."); +/* ucdata_setup("."); */ if (ucisweak(0x30)) printf("WEAK\n"); @@ -1096,7 +1436,7 @@ main() printf("0x10000 NOT DEFINED\n"); if (ucnumber_lookup(0x30, &num)) { - if (num.numerator != num.denominator) + if (num.denominator != 1) printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator); else printf("UCNUMBER: 0x30 = %d\n", num.numerator); @@ -1104,7 +1444,7 @@ main() printf("UCNUMBER: 0x30 NOT A NUMBER\n"); if (ucnumber_lookup(0xbc, &num)) { - if (num.numerator != num.denominator) + if (num.denominator != 1) printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator); else printf("UCNUMBER: 0xbc = %d\n", num.numerator); @@ -1113,7 +1453,7 @@ main() if (ucnumber_lookup(0xff19, &num)) { - if (num.numerator != num.denominator) + if (num.denominator != 1) printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator); else printf("UCNUMBER: 0xff19 = %d\n", num.numerator); @@ -1121,7 +1461,7 @@ main() printf("UCNUMBER: 0xff19 NOT A NUMBER\n"); if (ucnumber_lookup(0x4e00, &num)) { - if (num.numerator != num.denominator) + if (num.denominator != 1) printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator); else printf("UCNUMBER: 0x4e00 = %d\n", num.numerator); @@ -1137,24 +1477,24 @@ main() printf("UCGETDIGIT: 0x969 = %d\n", dig); num = ucgetnumber(0x30); - if (num.numerator != num.denominator) + if (num.denominator != 1) printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator); else printf("UCGETNUMBER: 0x30 = %d\n", num.numerator); num = ucgetnumber(0xbc); - if (num.numerator != num.denominator) + if (num.denominator != 1) printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator); else printf("UCGETNUMBER: 0xbc = %d\n", num.numerator); num = ucgetnumber(0xff19); - if (num.numerator != num.denominator) + if (num.denominator != 1) printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator); else printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator); - ucdata_cleanup(); +/* ucdata_cleanup(); */ exit(0); }