X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=libraries%2Fliblunicode%2Fucdata%2Fucdata.c;h=77e32a3a3cceaf62771029e24b15de22cbc923bf;hb=b6bbc69e2f8d3f27b87f7448828a7d4f632214c9;hp=01a94a02946fb3b5e35233e0f6833245053b2bf6;hpb=f9a76ce1ab269d7bc31943a7301968e5580c618f;p=openldap diff --git a/libraries/liblunicode/ucdata/ucdata.c b/libraries/liblunicode/ucdata/ucdata.c index 01a94a0294..77e32a3a3c 100644 --- a/libraries/liblunicode/ucdata/ucdata.c +++ b/libraries/liblunicode/ucdata/ucdata.c @@ -1,6 +1,6 @@ /* $OpenLDAP$ */ /* - * Copyright 2000 The OpenLDAP Foundation, All Rights Reserved. + * Copyright 2000-2002 The OpenLDAP Foundation, All Rights Reserved. * COPYING RESTRICTIONS APPLY, see COPYRIGHT file */ /* @@ -27,21 +27,17 @@ /* $Id: ucdata.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */ #include "portable.h" +#include "ldap_config.h" #include -#include +#include +#include #include #include #include "ucdata.h" -#ifdef WIN32 -#define UC_DIRSEP '\\' -#else -#define UC_DIRSEP '/' -#endif - /************************************************************************** * * Miscellaneous types, data, and support functions. @@ -49,11 +45,11 @@ **************************************************************************/ typedef struct { - unsigned short bom; - unsigned short cnt; + ac_uint4 bom; + ac_uint4 cnt; union { - unsigned long bytes; - unsigned short len[2]; + ac_uint4 bytes; + ac_uint2 len[2]; } size; } _ucheader_t; @@ -89,7 +85,7 @@ _ucopenfile(char *paths, char *filename, char *mode) pp = path; while (*dp && *dp != ':') *pp++ = *dp++; - *pp++ = UC_DIRSEP; + *pp++ = *LDAP_DIRSEP; fp = filename; while (*fp) @@ -355,6 +351,7 @@ _uccase_load(char *paths, int reload) for (i = 0; i < _uccase_size; i++) _uccase_map[i] = endian_long(_uccase_map[i]); } + fclose(in); return 0; } @@ -547,6 +544,7 @@ _uccomp_load(char *paths, int reload) break; _uccomp_size = i - 1; + fclose(in); return 0; } @@ -606,9 +604,9 @@ uccomp_hangul(unsigned long *str, int len) /* check if two current characters are L and V */ lindex = last - LBase; - if (0 <= lindex && lindex < LCount) { + if (lindex < (unsigned long) LCount) { unsigned long vindex = ch - VBase; - if (0 <= vindex && vindex < VCount) { + if (vindex < (unsigned long) VCount) { /* make syllable of form LV */ last = SBase + (lindex * VCount + vindex) * TCount; str[rlen-1] = last; /* reset last */ @@ -618,9 +616,11 @@ uccomp_hangul(unsigned long *str, int len) /* check if two current characters are LV and T */ sindex = last - SBase; - if (0 <= sindex && sindex < SCount && (sindex % TCount) == 0) { + if (sindex < (unsigned long) SCount + && (sindex % TCount) == 0) + { unsigned long tindex = ch - TBase; - if (0 <= tindex && tindex <= TCount) { + if (tindex <= (unsigned long) TCount) { /* make syllable of form LVT */ last += tindex; str[rlen-1] = last; /* reset last */ @@ -675,6 +675,10 @@ static unsigned long _ucdcmp_size; static unsigned long *_ucdcmp_nodes; static unsigned long *_ucdcmp_decomp; +static unsigned long _uckdcmp_size; +static unsigned long *_uckdcmp_nodes; +static unsigned long *_uckdcmp_decomp; + /* * Return -1 on error, 0 if okay */ @@ -726,6 +730,62 @@ _ucdcmp_load(char *paths, int reload) for (i = 0; i < size; i++) _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]); } + fclose(in); + return 0; +} + +/* + * Return -1 on error, 0 if okay + */ +static int +_uckdcmp_load(char *paths, int reload) +{ + FILE *in; + unsigned long size, i; + _ucheader_t hdr; + + if (_uckdcmp_size > 0) { + if (!reload) + /* + * The decompositions have already been loaded. + */ + return 0; + + free((char *) _uckdcmp_nodes); + _uckdcmp_size = 0; + } + + if ((in = _ucopenfile(paths, "kdecomp.dat", "rb")) == 0) + return -1; + + /* + * Load the header. + */ + fread((char *) &hdr, sizeof(_ucheader_t), 1, in); + + if (hdr.bom == 0xfffe) { + hdr.cnt = endian_short(hdr.cnt); + hdr.size.bytes = endian_long(hdr.size.bytes); + } + + _uckdcmp_size = hdr.cnt << 1; + _uckdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes); + _uckdcmp_decomp = _uckdcmp_nodes + (_uckdcmp_size + 1); + + /* + * Read the decomposition data in. + */ + size = hdr.size.bytes / sizeof(unsigned long); + fread((char *) _uckdcmp_nodes, sizeof(unsigned long), size, in); + + /* + * Do an endian swap if necessary. + */ + if (hdr.bom == 0xfffe) { + for (i = 0; i < size; i++) + _uckdcmp_nodes[i] = endian_long(_uckdcmp_nodes[i]); + } + fclose(in); return 0; } @@ -743,11 +803,29 @@ _ucdcmp_unload(void) _ucdcmp_size = 0; } +static void +_uckdcmp_unload(void) +{ + if (_uckdcmp_size == 0) + return; + + /* + * Only need to free the offsets because the memory is allocated as a + * single block. + */ + free((char *) _uckdcmp_nodes); + _uckdcmp_size = 0; +} + int ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp) { long l, r, m; + if (code < _ucdcmp_nodes[0]) { + return 0; + } + l = 0; r = _ucdcmp_nodes[_ucdcmp_size] - 1; @@ -771,6 +849,38 @@ ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp) return 0; } +int +uckdecomp(unsigned long code, unsigned long *num, unsigned long **decomp) +{ + long l, r, m; + + if (code < _uckdcmp_nodes[0]) { + return 0; + } + + l = 0; + r = _uckdcmp_nodes[_uckdcmp_size] - 1; + + while (l <= r) { + /* + * Determine a "mid" point and adjust to make sure the mid point is at + * the beginning of a code+offset pair. + */ + m = (l + r) >> 1; + m -= (m & 1); + if (code > _uckdcmp_nodes[m]) + l = m + 2; + else if (code < _uckdcmp_nodes[m]) + r = m - 2; + else if (code == _uckdcmp_nodes[m]) { + *num = _uckdcmp_nodes[m + 3] - _uckdcmp_nodes[m + 1]; + *decomp = &_uckdcmp_decomp[_uckdcmp_nodes[m + 1]]; + return 1; + } + } + return 0; +} + int ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[]) { @@ -786,11 +896,13 @@ ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[]) return 1; } -int -uccanondecomp(const unsigned long *in, int inlen, - unsigned long **out, int *outlen) +/* mode == 0 for canonical, mode == 1 for compatibility */ +static int +uccanoncompatdecomp(const unsigned long *in, int inlen, + unsigned long **out, int *outlen, short mode) { - int i, j, k, l, size; + int l, size; + unsigned i, j, k; unsigned long num, class, *decomp, hangdecomp[3]; size = inlen; @@ -799,9 +911,9 @@ uccanondecomp(const unsigned long *in, int inlen, return *outlen = -1; i = 0; - for (j = 0; j < inlen; j++) { - if (ucdecomp(in[j], &num, &decomp)) { - if (size - i < num) { + for (j = 0; j < (unsigned) inlen; j++) { + if (mode ? uckdecomp(in[j], &num, &decomp) : ucdecomp(in[j], &num, &decomp)) { + if ( size - i < num) { size = inlen + i - j + num - 1; *out = (unsigned long *) realloc(*out, size * sizeof(**out)); if (*out == NULL) @@ -815,7 +927,7 @@ uccanondecomp(const unsigned long *in, int inlen, for (l = i; l > 0; l--) if (class >= uccombining_class((*out)[l-1])) break; - memmove(*out + l + 1, *out + l, (i - l) * sizeof(**out)); + AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out)); (*out)[l] = decomp[k]; } i++; @@ -845,7 +957,7 @@ uccanondecomp(const unsigned long *in, int inlen, for (l = i; l > 0; l--) if (class >= uccombining_class((*out)[l-1])) break; - memmove(*out + l + 1, *out + l, (i - l) * sizeof(**out)); + AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out)); (*out)[l] = in[j]; } i++; @@ -854,6 +966,20 @@ uccanondecomp(const unsigned long *in, int inlen, return *outlen = i; } +int +uccanondecomp(const unsigned long *in, int inlen, + unsigned long **out, int *outlen) +{ + return uccanoncompatdecomp(in, inlen, out, outlen, 0); +} + +int +uccompatdecomp(const unsigned long *in, int inlen, + unsigned long **out, int *outlen) +{ + return uccanoncompatdecomp(in, inlen, out, outlen, 1); +} + /************************************************************************** * * Support for combining classes. @@ -912,6 +1038,7 @@ _uccmcl_load(char *paths, int reload) for (i = 0; i < _uccmcl_size; i++) _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]); } + fclose(in); return 0; } @@ -1016,6 +1143,7 @@ _ucnumb_load(char *paths, int reload) for (i = 0; i < size; i++) _ucnum_vals[i] = endian_short(_ucnum_vals[i]); } + fclose(in); return 0; } @@ -1149,6 +1277,8 @@ ucdata_load(char *paths, int masks) error |= _ucnumb_load(paths, 0) < 0 ? UCDATA_NUM : 0; if (masks & UCDATA_COMP) error |= _uccomp_load(paths, 0) < 0 ? UCDATA_COMP : 0; + if (masks & UCDATA_KDECOMP) + error |= _uckdcmp_load(paths, 0) < 0 ? UCDATA_KDECOMP : 0; return -error; } @@ -1168,6 +1298,8 @@ ucdata_unload(int masks) _ucnumb_unload(); if (masks & UCDATA_COMP) _uccomp_unload(); + if (masks & UCDATA_KDECOMP) + _uckdcmp_unload(); } /* @@ -1190,6 +1322,8 @@ ucdata_reload(char *paths, int masks) error |= _ucnumb_load(paths, 1) < 0 ? UCDATA_NUM : 0; if (masks & UCDATA_COMP) error |= _uccomp_load(paths, 1) < 0 ? UCDATA_COMP : 0; + if (masks & UCDATA_KDECOMP) + error |= _uckdcmp_load(paths, 1) < 0 ? UCDATA_KDECOMP : 0; return -error; }