3 * Copyright 2000-2003 The OpenLDAP Foundation, All Rights Reserved.
4 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
7 * Copyright 2001 Computing Research Labs, New Mexico State University
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included in
17 * all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
23 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
24 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
25 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 /* $Id: ucdata.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */
30 #include "ldap_config.h"
33 #include <ac/stdlib.h>
34 #include <ac/string.h>
35 #include <ac/unistd.h>
42 /**************************************************************************
44 * Miscellaneous types, data, and support functions.
46 **************************************************************************/
58 * A simple array of 32-bit masks for lookup.
60 static unsigned long masks32[32] = {
61 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
62 0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL,
63 0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL,
64 0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL,
65 0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL,
66 0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL,
67 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL,
68 0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL
71 #define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8))
72 #define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\
73 ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24))
76 _ucopenfile(char *paths, char *filename, char *mode)
79 char *fp, *dp, *pp, path[BUFSIZ];
81 if (filename == 0 || *filename == 0)
87 while (*dp && *dp != ':')
96 if ((f = fopen(path, mode)) != 0)
106 /**************************************************************************
108 * Support for the character properties.
110 **************************************************************************/
112 static unsigned long _ucprop_size;
113 static unsigned short *_ucprop_offsets;
114 static unsigned long *_ucprop_ranges;
117 * Return -1 on error, 0 if okay
120 _ucprop_load(char *paths, int reload)
123 unsigned long size, i;
126 if (_ucprop_size > 0) {
129 * The character properties have already been loaded.
134 * Unload the current character property data in preparation for
135 * loading a new copy. Only the first array has to be deallocated
136 * because all the memory for the arrays is allocated as a single
139 free((char *) _ucprop_offsets);
143 if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0)
149 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
151 if (hdr.bom == 0xfffe) {
152 hdr.cnt = endian_short(hdr.cnt);
153 hdr.size.bytes = endian_long(hdr.size.bytes);
156 if ((_ucprop_size = hdr.cnt) == 0) {
162 * Allocate all the storage needed for the lookup table.
164 _ucprop_offsets = (unsigned short *) malloc(hdr.size.bytes);
167 * Calculate the offset into the storage for the ranges. The offsets
168 * array is on a 4-byte boundary and one larger than the value provided in
169 * the header count field. This means the offset to the ranges must be
170 * calculated after aligning the count to a 4-byte boundary.
172 if ((size = ((hdr.cnt + 1) * sizeof(unsigned short))) & 3)
173 size += 4 - (size & 3);
175 _ucprop_ranges = (unsigned long *) (_ucprop_offsets + size);
178 * Load the offset array.
180 fread((char *) _ucprop_offsets, sizeof(unsigned short), size, in);
183 * Do an endian swap if necessary. Don't forget there is an extra node on
184 * the end with the final index.
186 if (hdr.bom == 0xfffe) {
187 for (i = 0; i <= _ucprop_size; i++)
188 _ucprop_offsets[i] = endian_short(_ucprop_offsets[i]);
192 * Load the ranges. The number of elements is in the last array position
195 fread((char *) _ucprop_ranges, sizeof(unsigned long),
196 _ucprop_offsets[_ucprop_size], in);
201 * Do an endian swap if necessary.
203 if (hdr.bom == 0xfffe) {
204 for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++)
205 _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]);
213 if (_ucprop_size == 0)
217 * Only need to free the offsets because the memory is allocated as a
220 free((char *) _ucprop_offsets);
225 _ucprop_lookup(unsigned long code, unsigned long n)
229 if (_ucprop_size == 0)
233 * There is an extra node on the end of the offsets to allow this routine
234 * to work right. If the index is 0xffff, then there are no nodes for the
237 if ((l = _ucprop_offsets[n]) == 0xffff)
241 * Locate the next offset that is not 0xffff. The sentinel at the end of
242 * the array is the max index value.
245 n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ;
247 r = _ucprop_offsets[n + m] - 1;
251 * Determine a "mid" point and adjust to make sure the mid point is at
252 * the beginning of a range pair.
256 if (code > _ucprop_ranges[m + 1])
258 else if (code < _ucprop_ranges[m])
260 else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
267 ucisprop(unsigned long code, unsigned long mask1, unsigned long mask2)
271 if (mask1 == 0 && mask2 == 0)
274 for (i = 0; mask1 && i < 32; i++) {
275 if ((mask1 & masks32[i]) && _ucprop_lookup(code, i))
279 for (i = 32; mask2 && i < _ucprop_size; i++) {
280 if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i))
287 /**************************************************************************
289 * Support for case mapping.
291 **************************************************************************/
293 static unsigned long _uccase_size;
294 static unsigned short _uccase_len[2];
295 static unsigned long *_uccase_map;
298 * Return -1 on error, 0 if okay
301 _uccase_load(char *paths, int reload)
307 if (_uccase_size > 0) {
310 * The case mappings have already been loaded.
314 free((char *) _uccase_map);
318 if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0)
324 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
326 if (hdr.bom == 0xfffe) {
327 hdr.cnt = endian_short(hdr.cnt);
328 hdr.size.len[0] = endian_short(hdr.size.len[0]);
329 hdr.size.len[1] = endian_short(hdr.size.len[1]);
333 * Set the node count and lengths of the upper and lower case mapping
336 _uccase_size = hdr.cnt * 3;
337 _uccase_len[0] = hdr.size.len[0] * 3;
338 _uccase_len[1] = hdr.size.len[1] * 3;
340 _uccase_map = (unsigned long *)
341 malloc(_uccase_size * sizeof(unsigned long));
344 * Load the case mapping table.
346 fread((char *) _uccase_map, sizeof(unsigned long), _uccase_size, in);
349 * Do an endian swap if necessary.
351 if (hdr.bom == 0xfffe) {
352 for (i = 0; i < _uccase_size; i++)
353 _uccase_map[i] = endian_long(_uccase_map[i]);
362 if (_uccase_size == 0)
365 free((char *) _uccase_map);
370 _uccase_lookup(unsigned long code, long l, long r, int field)
375 * Do the binary search.
379 * Determine a "mid" point and adjust to make sure the mid point is at
380 * the beginning of a case mapping triple.
384 if (code > _uccase_map[m])
386 else if (code < _uccase_map[m])
388 else if (code == _uccase_map[m])
389 return _uccase_map[m + field];
396 uctoupper(unsigned long code)
404 if (ucislower(code)) {
406 * The character is lower case.
410 r = (l + _uccase_len[1]) - 3;
413 * The character is title case.
416 l = _uccase_len[0] + _uccase_len[1];
417 r = _uccase_size - 3;
419 return _uccase_lookup(code, l, r, field);
423 uctolower(unsigned long code)
431 if (ucisupper(code)) {
433 * The character is upper case.
437 r = _uccase_len[0] - 3;
440 * The character is title case.
443 l = _uccase_len[0] + _uccase_len[1];
444 r = _uccase_size - 3;
446 return _uccase_lookup(code, l, r, field);
450 uctotitle(unsigned long code)
459 * The offset will always be the same for converting to title case.
463 if (ucisupper(code)) {
465 * The character is upper case.
468 r = _uccase_len[0] - 3;
471 * The character is lower case.
474 r = (l + _uccase_len[1]) - 3;
476 return _uccase_lookup(code, l, r, field);
479 /**************************************************************************
481 * Support for compositions.
483 **************************************************************************/
485 static unsigned long _uccomp_size;
486 static unsigned long *_uccomp_data;
489 * Return -1 on error, 0 if okay
492 _uccomp_load(char *paths, int reload)
495 unsigned long size, i;
498 if (_uccomp_size > 0) {
501 * The compositions have already been loaded.
505 free((char *) _uccomp_data);
509 if ((in = _ucopenfile(paths, "comp.dat", "rb")) == 0)
515 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
517 if (hdr.bom == 0xfffe) {
518 hdr.cnt = endian_short(hdr.cnt);
519 hdr.size.bytes = endian_long(hdr.size.bytes);
522 _uccomp_size = hdr.cnt;
523 _uccomp_data = (unsigned long *) malloc(hdr.size.bytes);
526 * Read the composition data in.
528 size = hdr.size.bytes / sizeof(unsigned long);
529 fread((char *) _uccomp_data, sizeof(unsigned long), size, in);
532 * Do an endian swap if necessary.
534 if (hdr.bom == 0xfffe) {
535 for (i = 0; i < size; i++)
536 _uccomp_data[i] = endian_long(_uccomp_data[i]);
540 * Assume that the data is ordered on count, so that all compositions
541 * of length 2 come first. Only handling length 2 for now.
543 for (i = 1; i < size; i += 4)
544 if (_uccomp_data[i] != 2)
546 _uccomp_size = i - 1;
555 if (_uccomp_size == 0)
558 free((char *) _uccomp_data);
563 uccomp(unsigned long node1, unsigned long node2, unsigned long *comp)
568 r = _uccomp_size - 1;
573 if (node1 > _uccomp_data[m+2])
575 else if (node1 < _uccomp_data[m+2])
577 else if (node2 > _uccomp_data[m+3])
579 else if (node2 < _uccomp_data[m+3])
582 *comp = _uccomp_data[m];
590 uccomp_hangul(unsigned long *str, int len)
592 const int SBase = 0xAC00, LBase = 0x1100,
593 VBase = 0x1161, TBase = 0x11A7,
594 LCount = 19, VCount = 21, TCount = 28,
595 NCount = VCount * TCount, /* 588 */
596 SCount = LCount * NCount; /* 11172 */
599 unsigned long ch, last, lindex, sindex;
603 for ( i = 1; i < len; i++ ) {
606 /* check if two current characters are L and V */
607 lindex = last - LBase;
608 if (lindex < (unsigned long) LCount) {
609 unsigned long vindex = ch - VBase;
610 if (vindex < (unsigned long) VCount) {
611 /* make syllable of form LV */
612 last = SBase + (lindex * VCount + vindex) * TCount;
613 str[rlen-1] = last; /* reset last */
618 /* check if two current characters are LV and T */
619 sindex = last - SBase;
620 if (sindex < (unsigned long) SCount
621 && (sindex % TCount) == 0)
623 unsigned long tindex = ch - TBase;
624 if (tindex <= (unsigned long) TCount) {
625 /* make syllable of form LVT */
627 str[rlen-1] = last; /* reset last */
632 /* if neither case was true, just add the character */
641 uccanoncomp(unsigned long *str, int len)
644 unsigned long cl, prevcl, st, ch, co;
649 prevcl = uccombining_class(st) == 0 ? 0 : 256;
651 for (i = 1; i < len; i++) {
653 cl = uccombining_class(ch);
654 if (uccomp(st, ch, &co) && (prevcl < cl || prevcl == 0))
655 st = str[stpos] = co;
666 return uccomp_hangul(str, copos);
669 /**************************************************************************
671 * Support for decompositions.
673 **************************************************************************/
675 static unsigned long _ucdcmp_size;
676 static unsigned long *_ucdcmp_nodes;
677 static unsigned long *_ucdcmp_decomp;
679 static unsigned long _uckdcmp_size;
680 static unsigned long *_uckdcmp_nodes;
681 static unsigned long *_uckdcmp_decomp;
684 * Return -1 on error, 0 if okay
687 _ucdcmp_load(char *paths, int reload)
690 unsigned long size, i;
693 if (_ucdcmp_size > 0) {
696 * The decompositions have already been loaded.
700 free((char *) _ucdcmp_nodes);
704 if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0)
710 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
712 if (hdr.bom == 0xfffe) {
713 hdr.cnt = endian_short(hdr.cnt);
714 hdr.size.bytes = endian_long(hdr.size.bytes);
717 _ucdcmp_size = hdr.cnt << 1;
718 _ucdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes);
719 _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1);
722 * Read the decomposition data in.
724 size = hdr.size.bytes / sizeof(unsigned long);
725 fread((char *) _ucdcmp_nodes, sizeof(unsigned long), size, in);
728 * Do an endian swap if necessary.
730 if (hdr.bom == 0xfffe) {
731 for (i = 0; i < size; i++)
732 _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]);
739 * Return -1 on error, 0 if okay
742 _uckdcmp_load(char *paths, int reload)
745 unsigned long size, i;
748 if (_uckdcmp_size > 0) {
751 * The decompositions have already been loaded.
755 free((char *) _uckdcmp_nodes);
759 if ((in = _ucopenfile(paths, "kdecomp.dat", "rb")) == 0)
765 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
767 if (hdr.bom == 0xfffe) {
768 hdr.cnt = endian_short(hdr.cnt);
769 hdr.size.bytes = endian_long(hdr.size.bytes);
772 _uckdcmp_size = hdr.cnt << 1;
773 _uckdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes);
774 _uckdcmp_decomp = _uckdcmp_nodes + (_uckdcmp_size + 1);
777 * Read the decomposition data in.
779 size = hdr.size.bytes / sizeof(unsigned long);
780 fread((char *) _uckdcmp_nodes, sizeof(unsigned long), size, in);
783 * Do an endian swap if necessary.
785 if (hdr.bom == 0xfffe) {
786 for (i = 0; i < size; i++)
787 _uckdcmp_nodes[i] = endian_long(_uckdcmp_nodes[i]);
796 if (_ucdcmp_size == 0)
800 * Only need to free the offsets because the memory is allocated as a
803 free((char *) _ucdcmp_nodes);
808 _uckdcmp_unload(void)
810 if (_uckdcmp_size == 0)
814 * Only need to free the offsets because the memory is allocated as a
817 free((char *) _uckdcmp_nodes);
822 ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
826 if (code < _ucdcmp_nodes[0]) {
831 r = _ucdcmp_nodes[_ucdcmp_size] - 1;
835 * Determine a "mid" point and adjust to make sure the mid point is at
836 * the beginning of a code+offset pair.
840 if (code > _ucdcmp_nodes[m])
842 else if (code < _ucdcmp_nodes[m])
844 else if (code == _ucdcmp_nodes[m]) {
845 *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1];
846 *decomp = &_ucdcmp_decomp[_ucdcmp_nodes[m + 1]];
854 uckdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
858 if (code < _uckdcmp_nodes[0]) {
863 r = _uckdcmp_nodes[_uckdcmp_size] - 1;
867 * Determine a "mid" point and adjust to make sure the mid point is at
868 * the beginning of a code+offset pair.
872 if (code > _uckdcmp_nodes[m])
874 else if (code < _uckdcmp_nodes[m])
876 else if (code == _uckdcmp_nodes[m]) {
877 *num = _uckdcmp_nodes[m + 3] - _uckdcmp_nodes[m + 1];
878 *decomp = &_uckdcmp_decomp[_uckdcmp_nodes[m + 1]];
886 ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[])
888 if (!ucishangul(code))
892 decomp[0] = 0x1100 + (unsigned long) (code / 588);
893 decomp[1] = 0x1161 + (unsigned long) ((code % 588) / 28);
894 decomp[2] = 0x11a7 + (unsigned long) (code % 28);
895 *num = (decomp[2] != 0x11a7) ? 3 : 2;
900 /* mode == 0 for canonical, mode == 1 for compatibility */
902 uccanoncompatdecomp(const unsigned long *in, int inlen,
903 unsigned long **out, int *outlen, short mode, void *ctx)
907 unsigned long num, class, *decomp, hangdecomp[3];
910 *out = (unsigned long *) ber_memalloc_x(size * sizeof(**out), ctx);
915 for (j = 0; j < (unsigned) inlen; j++) {
916 if (mode ? uckdecomp(in[j], &num, &decomp) : ucdecomp(in[j], &num, &decomp)) {
917 if ( size - i < num) {
918 size = inlen + i - j + num - 1;
919 *out = (unsigned long *) ber_memrealloc_x(*out, size * sizeof(**out), ctx );
923 for (k = 0; k < num; k++) {
924 class = uccombining_class(decomp[k]);
926 (*out)[i] = decomp[k];
928 for (l = i; l > 0; l--)
929 if (class >= uccombining_class((*out)[l-1]))
931 AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out));
932 (*out)[l] = decomp[k];
936 } else if (ucdecomp_hangul(in[j], &num, hangdecomp)) {
937 if (size - i < num) {
938 size = inlen + i - j + num - 1;
939 *out = (unsigned long *) ber_memrealloc_x(*out, size * sizeof(**out), ctx);
943 for (k = 0; k < num; k++) {
944 (*out)[i] = hangdecomp[k];
949 size = inlen + i - j;
950 *out = (unsigned long *) ber_memrealloc_x(*out, size * sizeof(**out), ctx);
954 class = uccombining_class(in[j]);
958 for (l = i; l > 0; l--)
959 if (class >= uccombining_class((*out)[l-1]))
961 AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out));
971 uccanondecomp(const unsigned long *in, int inlen,
972 unsigned long **out, int *outlen, void *ctx)
974 return uccanoncompatdecomp(in, inlen, out, outlen, 0, ctx);
978 uccompatdecomp(const unsigned long *in, int inlen,
979 unsigned long **out, int *outlen, void *ctx)
981 return uccanoncompatdecomp(in, inlen, out, outlen, 1, ctx);
984 /**************************************************************************
986 * Support for combining classes.
988 **************************************************************************/
990 static unsigned long _uccmcl_size;
991 static unsigned long *_uccmcl_nodes;
994 * Return -1 on error, 0 if okay
997 _uccmcl_load(char *paths, int reload)
1003 if (_uccmcl_size > 0) {
1006 * The combining classes have already been loaded.
1010 free((char *) _uccmcl_nodes);
1014 if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0)
1020 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
1022 if (hdr.bom == 0xfffe) {
1023 hdr.cnt = endian_short(hdr.cnt);
1024 hdr.size.bytes = endian_long(hdr.size.bytes);
1027 _uccmcl_size = hdr.cnt * 3;
1028 _uccmcl_nodes = (unsigned long *) malloc(hdr.size.bytes);
1031 * Read the combining classes in.
1033 fread((char *) _uccmcl_nodes, sizeof(unsigned long), _uccmcl_size, in);
1036 * Do an endian swap if necessary.
1038 if (hdr.bom == 0xfffe) {
1039 for (i = 0; i < _uccmcl_size; i++)
1040 _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]);
1047 _uccmcl_unload(void)
1049 if (_uccmcl_size == 0)
1052 free((char *) _uccmcl_nodes);
1057 uccombining_class(unsigned long code)
1062 r = _uccmcl_size - 1;
1067 if (code > _uccmcl_nodes[m + 1])
1069 else if (code < _uccmcl_nodes[m])
1071 else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1])
1072 return _uccmcl_nodes[m + 2];
1077 /**************************************************************************
1079 * Support for numeric values.
1081 **************************************************************************/
1083 static unsigned long *_ucnum_nodes;
1084 static unsigned long _ucnum_size;
1085 static short *_ucnum_vals;
1088 * Return -1 on error, 0 if okay
1091 _ucnumb_load(char *paths, int reload)
1094 unsigned long size, i;
1097 if (_ucnum_size > 0) {
1100 * The numbers have already been loaded.
1104 free((char *) _ucnum_nodes);
1108 if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0)
1114 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
1116 if (hdr.bom == 0xfffe) {
1117 hdr.cnt = endian_short(hdr.cnt);
1118 hdr.size.bytes = endian_long(hdr.size.bytes);
1121 _ucnum_size = hdr.cnt;
1122 _ucnum_nodes = (unsigned long *) malloc(hdr.size.bytes);
1123 _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size);
1126 * Read the combining classes in.
1128 fread((char *) _ucnum_nodes, sizeof(unsigned char), hdr.size.bytes, in);
1131 * Do an endian swap if necessary.
1133 if (hdr.bom == 0xfffe) {
1134 for (i = 0; i < _ucnum_size; i++)
1135 _ucnum_nodes[i] = endian_long(_ucnum_nodes[i]);
1138 * Determine the number of values that have to be adjusted.
1140 size = (hdr.size.bytes -
1141 (_ucnum_size * (sizeof(unsigned long) << 1))) /
1144 for (i = 0; i < size; i++)
1145 _ucnum_vals[i] = endian_short(_ucnum_vals[i]);
1152 _ucnumb_unload(void)
1154 if (_ucnum_size == 0)
1157 free((char *) _ucnum_nodes);
1162 ucnumber_lookup(unsigned long code, struct ucnumber *num)
1168 r = _ucnum_size - 1;
1171 * Determine a "mid" point and adjust to make sure the mid point is at
1172 * the beginning of a code+offset pair.
1176 if (code > _ucnum_nodes[m])
1178 else if (code < _ucnum_nodes[m])
1181 vp = _ucnum_vals + _ucnum_nodes[m + 1];
1182 num->numerator = (int) *vp++;
1183 num->denominator = (int) *vp;
1191 ucdigit_lookup(unsigned long code, int *digit)
1197 r = _ucnum_size - 1;
1200 * Determine a "mid" point and adjust to make sure the mid point is at
1201 * the beginning of a code+offset pair.
1205 if (code > _ucnum_nodes[m])
1207 else if (code < _ucnum_nodes[m])
1210 vp = _ucnum_vals + _ucnum_nodes[m + 1];
1211 if (*vp == *(vp + 1)) {
1222 ucgetnumber(unsigned long code)
1224 struct ucnumber num;
1227 * Initialize with some arbitrary value, because the caller simply cannot
1228 * tell for sure if the code is a number without calling the ucisnumber()
1229 * macro before calling this function.
1231 num.numerator = num.denominator = -111;
1233 (void) ucnumber_lookup(code, &num);
1239 ucgetdigit(unsigned long code)
1244 * Initialize with some arbitrary value, because the caller simply cannot
1245 * tell for sure if the code is a number without calling the ucisdigit()
1246 * macro before calling this function.
1250 (void) ucdigit_lookup(code, &dig);
1255 /**************************************************************************
1257 * Setup and cleanup routines.
1259 **************************************************************************/
1262 * Return 0 if okay, negative on error
1265 ucdata_load(char *paths, int masks)
1269 if (masks & UCDATA_CTYPE)
1270 error |= _ucprop_load(paths, 0) < 0 ? UCDATA_CTYPE : 0;
1271 if (masks & UCDATA_CASE)
1272 error |= _uccase_load(paths, 0) < 0 ? UCDATA_CASE : 0;
1273 if (masks & UCDATA_DECOMP)
1274 error |= _ucdcmp_load(paths, 0) < 0 ? UCDATA_DECOMP : 0;
1275 if (masks & UCDATA_CMBCL)
1276 error |= _uccmcl_load(paths, 0) < 0 ? UCDATA_CMBCL : 0;
1277 if (masks & UCDATA_NUM)
1278 error |= _ucnumb_load(paths, 0) < 0 ? UCDATA_NUM : 0;
1279 if (masks & UCDATA_COMP)
1280 error |= _uccomp_load(paths, 0) < 0 ? UCDATA_COMP : 0;
1281 if (masks & UCDATA_KDECOMP)
1282 error |= _uckdcmp_load(paths, 0) < 0 ? UCDATA_KDECOMP : 0;
1288 ucdata_unload(int masks)
1290 if (masks & UCDATA_CTYPE)
1292 if (masks & UCDATA_CASE)
1294 if (masks & UCDATA_DECOMP)
1296 if (masks & UCDATA_CMBCL)
1298 if (masks & UCDATA_NUM)
1300 if (masks & UCDATA_COMP)
1302 if (masks & UCDATA_KDECOMP)
1307 * Return 0 if okay, negative on error
1310 ucdata_reload(char *paths, int masks)
1314 if (masks & UCDATA_CTYPE)
1315 error |= _ucprop_load(paths, 1) < 0 ? UCDATA_CTYPE : 0;
1316 if (masks & UCDATA_CASE)
1317 error |= _uccase_load(paths, 1) < 0 ? UCDATA_CASE : 0;
1318 if (masks & UCDATA_DECOMP)
1319 error |= _ucdcmp_load(paths, 1) < 0 ? UCDATA_DECOMP : 0;
1320 if (masks & UCDATA_CMBCL)
1321 error |= _uccmcl_load(paths, 1) < 0 ? UCDATA_CMBCL : 0;
1322 if (masks & UCDATA_NUM)
1323 error |= _ucnumb_load(paths, 1) < 0 ? UCDATA_NUM : 0;
1324 if (masks & UCDATA_COMP)
1325 error |= _uccomp_load(paths, 1) < 0 ? UCDATA_COMP : 0;
1326 if (masks & UCDATA_KDECOMP)
1327 error |= _uckdcmp_load(paths, 1) < 0 ? UCDATA_KDECOMP : 0;
1338 unsigned long i, lo, *dec;
1339 struct ucnumber num;
1346 printf("NOT WEAK\n");
1348 printf("LOWER 0x%04lX\n", uctolower(0xff3a));
1349 printf("UPPER 0x%04lX\n", uctoupper(0xff5a));
1351 if (ucisalpha(0x1d5))
1354 printf("NOT ALPHA\n");
1356 if (ucisupper(0x1d5)) {
1358 lo = uctolower(0x1d5);
1359 printf("0x%04lx\n", lo);
1360 lo = uctotitle(0x1d5);
1361 printf("0x%04lx\n", lo);
1363 printf("NOT UPPER\n");
1365 if (ucistitle(0x1d5))
1368 printf("NOT TITLE\n");
1370 if (uciscomposite(0x1d5))
1371 printf("COMPOSITE\n");
1373 printf("NOT COMPOSITE\n");
1375 if (ucdecomp(0x1d5, &lo, &dec)) {
1376 for (i = 0; i < lo; i++)
1377 printf("0x%04lx ", dec[i]);
1381 if ((lo = uccombining_class(0x41)) != 0)
1382 printf("0x41 CCL %ld\n", lo);
1384 if (ucisxdigit(0xfeff))
1385 printf("0xFEFF HEX DIGIT\n");
1387 printf("0xFEFF NOT HEX DIGIT\n");
1389 if (ucisdefined(0x10000))
1390 printf("0x10000 DEFINED\n");
1392 printf("0x10000 NOT DEFINED\n");
1394 if (ucnumber_lookup(0x30, &num)) {
1395 if (num.numerator != num.denominator)
1396 printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
1398 printf("UCNUMBER: 0x30 = %d\n", num.numerator);
1400 printf("UCNUMBER: 0x30 NOT A NUMBER\n");
1402 if (ucnumber_lookup(0xbc, &num)) {
1403 if (num.numerator != num.denominator)
1404 printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
1406 printf("UCNUMBER: 0xbc = %d\n", num.numerator);
1408 printf("UCNUMBER: 0xbc NOT A NUMBER\n");
1411 if (ucnumber_lookup(0xff19, &num)) {
1412 if (num.numerator != num.denominator)
1413 printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
1415 printf("UCNUMBER: 0xff19 = %d\n", num.numerator);
1417 printf("UCNUMBER: 0xff19 NOT A NUMBER\n");
1419 if (ucnumber_lookup(0x4e00, &num)) {
1420 if (num.numerator != num.denominator)
1421 printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator);
1423 printf("UCNUMBER: 0x4e00 = %d\n", num.numerator);
1425 printf("UCNUMBER: 0x4e00 NOT A NUMBER\n");
1427 if (ucdigit_lookup(0x06f9, &dig))
1428 printf("UCDIGIT: 0x6f9 = %d\n", dig);
1430 printf("UCDIGIT: 0x6f9 NOT A NUMBER\n");
1432 dig = ucgetdigit(0x0969);
1433 printf("UCGETDIGIT: 0x969 = %d\n", dig);
1435 num = ucgetnumber(0x30);
1436 if (num.numerator != num.denominator)
1437 printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
1439 printf("UCGETNUMBER: 0x30 = %d\n", num.numerator);
1441 num = ucgetnumber(0xbc);
1442 if (num.numerator != num.denominator)
1443 printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
1445 printf("UCGETNUMBER: 0xbc = %d\n", num.numerator);
1447 num = ucgetnumber(0xff19);
1448 if (num.numerator != num.denominator)
1449 printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
1451 printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator);