3 * Copyright 2000 The OpenLDAP Foundation, All Rights Reserved.
4 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
7 * Copyright 2001 Computing Research Labs, New Mexico State University
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included in
17 * all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
23 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
24 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
25 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 /* $Id: ucdata.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */
30 #include "ldap_config.h"
33 #include <ac/stdlib.h>
34 #include <ac/string.h>
35 #include <ac/unistd.h>
40 /**************************************************************************
42 * Miscellaneous types, data, and support functions.
44 **************************************************************************/
51 unsigned short len[2];
56 * A simple array of 32-bit masks for lookup.
58 static unsigned long masks32[32] = {
59 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
60 0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL,
61 0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL,
62 0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL,
63 0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL,
64 0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL,
65 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL,
66 0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL
69 #define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8))
70 #define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\
71 ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24))
74 _ucopenfile(char *paths, char *filename, char *mode)
77 char *fp, *dp, *pp, path[BUFSIZ];
79 if (filename == 0 || *filename == 0)
85 while (*dp && *dp != ':')
94 if ((f = fopen(path, mode)) != 0)
104 /**************************************************************************
106 * Support for the character properties.
108 **************************************************************************/
110 static unsigned long _ucprop_size;
111 static unsigned short *_ucprop_offsets;
112 static unsigned long *_ucprop_ranges;
115 * Return -1 on error, 0 if okay
118 _ucprop_load(char *paths, int reload)
121 unsigned long size, i;
124 if (_ucprop_size > 0) {
127 * The character properties have already been loaded.
132 * Unload the current character property data in preparation for
133 * loading a new copy. Only the first array has to be deallocated
134 * because all the memory for the arrays is allocated as a single
137 free((char *) _ucprop_offsets);
141 if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0)
147 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
149 if (hdr.bom == 0xfffe) {
150 hdr.cnt = endian_short(hdr.cnt);
151 hdr.size.bytes = endian_long(hdr.size.bytes);
154 if ((_ucprop_size = hdr.cnt) == 0) {
160 * Allocate all the storage needed for the lookup table.
162 _ucprop_offsets = (unsigned short *) malloc(hdr.size.bytes);
165 * Calculate the offset into the storage for the ranges. The offsets
166 * array is on a 4-byte boundary and one larger than the value provided in
167 * the header count field. This means the offset to the ranges must be
168 * calculated after aligning the count to a 4-byte boundary.
170 if ((size = ((hdr.cnt + 1) * sizeof(unsigned short))) & 3)
171 size += 4 - (size & 3);
173 _ucprop_ranges = (unsigned long *) (_ucprop_offsets + size);
176 * Load the offset array.
178 fread((char *) _ucprop_offsets, sizeof(unsigned short), size, in);
181 * Do an endian swap if necessary. Don't forget there is an extra node on
182 * the end with the final index.
184 if (hdr.bom == 0xfffe) {
185 for (i = 0; i <= _ucprop_size; i++)
186 _ucprop_offsets[i] = endian_short(_ucprop_offsets[i]);
190 * Load the ranges. The number of elements is in the last array position
193 fread((char *) _ucprop_ranges, sizeof(unsigned long),
194 _ucprop_offsets[_ucprop_size], in);
199 * Do an endian swap if necessary.
201 if (hdr.bom == 0xfffe) {
202 for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++)
203 _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]);
211 if (_ucprop_size == 0)
215 * Only need to free the offsets because the memory is allocated as a
218 free((char *) _ucprop_offsets);
223 _ucprop_lookup(unsigned long code, unsigned long n)
227 if (_ucprop_size == 0)
231 * There is an extra node on the end of the offsets to allow this routine
232 * to work right. If the index is 0xffff, then there are no nodes for the
235 if ((l = _ucprop_offsets[n]) == 0xffff)
239 * Locate the next offset that is not 0xffff. The sentinel at the end of
240 * the array is the max index value.
243 n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ;
245 r = _ucprop_offsets[n + m] - 1;
249 * Determine a "mid" point and adjust to make sure the mid point is at
250 * the beginning of a range pair.
254 if (code > _ucprop_ranges[m + 1])
256 else if (code < _ucprop_ranges[m])
258 else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
265 ucisprop(unsigned long code, unsigned long mask1, unsigned long mask2)
269 if (mask1 == 0 && mask2 == 0)
272 for (i = 0; mask1 && i < 32; i++) {
273 if ((mask1 & masks32[i]) && _ucprop_lookup(code, i))
277 for (i = 32; mask2 && i < _ucprop_size; i++) {
278 if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i))
285 /**************************************************************************
287 * Support for case mapping.
289 **************************************************************************/
291 static unsigned long _uccase_size;
292 static unsigned short _uccase_len[2];
293 static unsigned long *_uccase_map;
296 * Return -1 on error, 0 if okay
299 _uccase_load(char *paths, int reload)
305 if (_uccase_size > 0) {
308 * The case mappings have already been loaded.
312 free((char *) _uccase_map);
316 if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0)
322 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
324 if (hdr.bom == 0xfffe) {
325 hdr.cnt = endian_short(hdr.cnt);
326 hdr.size.len[0] = endian_short(hdr.size.len[0]);
327 hdr.size.len[1] = endian_short(hdr.size.len[1]);
331 * Set the node count and lengths of the upper and lower case mapping
334 _uccase_size = hdr.cnt * 3;
335 _uccase_len[0] = hdr.size.len[0] * 3;
336 _uccase_len[1] = hdr.size.len[1] * 3;
338 _uccase_map = (unsigned long *)
339 malloc(_uccase_size * sizeof(unsigned long));
342 * Load the case mapping table.
344 fread((char *) _uccase_map, sizeof(unsigned long), _uccase_size, in);
347 * Do an endian swap if necessary.
349 if (hdr.bom == 0xfffe) {
350 for (i = 0; i < _uccase_size; i++)
351 _uccase_map[i] = endian_long(_uccase_map[i]);
360 if (_uccase_size == 0)
363 free((char *) _uccase_map);
368 _uccase_lookup(unsigned long code, long l, long r, int field)
373 * Do the binary search.
377 * Determine a "mid" point and adjust to make sure the mid point is at
378 * the beginning of a case mapping triple.
382 if (code > _uccase_map[m])
384 else if (code < _uccase_map[m])
386 else if (code == _uccase_map[m])
387 return _uccase_map[m + field];
394 uctoupper(unsigned long code)
402 if (ucislower(code)) {
404 * The character is lower case.
408 r = (l + _uccase_len[1]) - 3;
411 * The character is title case.
414 l = _uccase_len[0] + _uccase_len[1];
415 r = _uccase_size - 3;
417 return _uccase_lookup(code, l, r, field);
421 uctolower(unsigned long code)
429 if (ucisupper(code)) {
431 * The character is upper case.
435 r = _uccase_len[0] - 3;
438 * The character is title case.
441 l = _uccase_len[0] + _uccase_len[1];
442 r = _uccase_size - 3;
444 return _uccase_lookup(code, l, r, field);
448 uctotitle(unsigned long code)
457 * The offset will always be the same for converting to title case.
461 if (ucisupper(code)) {
463 * The character is upper case.
466 r = _uccase_len[0] - 3;
469 * The character is lower case.
472 r = (l + _uccase_len[1]) - 3;
474 return _uccase_lookup(code, l, r, field);
477 /**************************************************************************
479 * Support for compositions.
481 **************************************************************************/
483 static unsigned long _uccomp_size;
484 static unsigned long *_uccomp_data;
487 * Return -1 on error, 0 if okay
490 _uccomp_load(char *paths, int reload)
493 unsigned long size, i;
496 if (_uccomp_size > 0) {
499 * The compositions have already been loaded.
503 free((char *) _uccomp_data);
507 if ((in = _ucopenfile(paths, "comp.dat", "rb")) == 0)
513 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
515 if (hdr.bom == 0xfffe) {
516 hdr.cnt = endian_short(hdr.cnt);
517 hdr.size.bytes = endian_long(hdr.size.bytes);
520 _uccomp_size = hdr.cnt;
521 _uccomp_data = (unsigned long *) malloc(hdr.size.bytes);
524 * Read the composition data in.
526 size = hdr.size.bytes / sizeof(unsigned long);
527 fread((char *) _uccomp_data, sizeof(unsigned long), size, in);
530 * Do an endian swap if necessary.
532 if (hdr.bom == 0xfffe) {
533 for (i = 0; i < size; i++)
534 _uccomp_data[i] = endian_long(_uccomp_data[i]);
538 * Assume that the data is ordered on count, so that all compositions
539 * of length 2 come first. Only handling length 2 for now.
541 for (i = 1; i < size; i += 4)
542 if (_uccomp_data[i] != 2)
544 _uccomp_size = i - 1;
553 if (_uccomp_size == 0)
556 free((char *) _uccomp_data);
561 uccomp(unsigned long node1, unsigned long node2, unsigned long *comp)
566 r = _uccomp_size - 1;
571 if (node1 > _uccomp_data[m+2])
573 else if (node1 < _uccomp_data[m+2])
575 else if (node2 > _uccomp_data[m+3])
577 else if (node2 < _uccomp_data[m+3])
580 *comp = _uccomp_data[m];
588 uccomp_hangul(unsigned long *str, int len)
590 const int SBase = 0xAC00, LBase = 0x1100,
591 VBase = 0x1161, TBase = 0x11A7,
592 LCount = 19, VCount = 21, TCount = 28,
593 NCount = VCount * TCount, /* 588 */
594 SCount = LCount * NCount; /* 11172 */
597 unsigned long ch, last, lindex, sindex;
601 for ( i = 1; i < len; i++ ) {
604 /* check if two current characters are L and V */
605 lindex = last - LBase;
606 if (0 <= lindex && lindex < LCount) {
607 unsigned long vindex = ch - VBase;
608 if (0 <= vindex && vindex < VCount) {
609 /* make syllable of form LV */
610 last = SBase + (lindex * VCount + vindex) * TCount;
611 str[rlen-1] = last; /* reset last */
616 /* check if two current characters are LV and T */
617 sindex = last - SBase;
618 if (0 <= sindex && sindex < SCount && (sindex % TCount) == 0) {
619 unsigned long tindex = ch - TBase;
620 if (0 <= tindex && tindex <= TCount) {
621 /* make syllable of form LVT */
623 str[rlen-1] = last; /* reset last */
628 /* if neither case was true, just add the character */
637 uccanoncomp(unsigned long *str, int len)
640 unsigned long cl, prevcl, st, ch, co;
645 prevcl = uccombining_class(st) == 0 ? 0 : 256;
647 for (i = 1; i < len; i++) {
649 cl = uccombining_class(ch);
650 if (uccomp(st, ch, &co) && (prevcl < cl || prevcl == 0))
651 st = str[stpos] = co;
662 return uccomp_hangul(str, copos);
665 /**************************************************************************
667 * Support for decompositions.
669 **************************************************************************/
671 static unsigned long _ucdcmp_size;
672 static unsigned long *_ucdcmp_nodes;
673 static unsigned long *_ucdcmp_decomp;
676 * Return -1 on error, 0 if okay
679 _ucdcmp_load(char *paths, int reload)
682 unsigned long size, i;
685 if (_ucdcmp_size > 0) {
688 * The decompositions have already been loaded.
692 free((char *) _ucdcmp_nodes);
696 if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0)
702 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
704 if (hdr.bom == 0xfffe) {
705 hdr.cnt = endian_short(hdr.cnt);
706 hdr.size.bytes = endian_long(hdr.size.bytes);
709 _ucdcmp_size = hdr.cnt << 1;
710 _ucdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes);
711 _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1);
714 * Read the decomposition data in.
716 size = hdr.size.bytes / sizeof(unsigned long);
717 fread((char *) _ucdcmp_nodes, sizeof(unsigned long), size, in);
720 * Do an endian swap if necessary.
722 if (hdr.bom == 0xfffe) {
723 for (i = 0; i < size; i++)
724 _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]);
733 if (_ucdcmp_size == 0)
737 * Only need to free the offsets because the memory is allocated as a
740 free((char *) _ucdcmp_nodes);
745 ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
750 r = _ucdcmp_nodes[_ucdcmp_size] - 1;
754 * Determine a "mid" point and adjust to make sure the mid point is at
755 * the beginning of a code+offset pair.
759 if (code > _ucdcmp_nodes[m])
761 else if (code < _ucdcmp_nodes[m])
763 else if (code == _ucdcmp_nodes[m]) {
764 *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1];
765 *decomp = &_ucdcmp_decomp[_ucdcmp_nodes[m + 1]];
773 ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[])
775 if (!ucishangul(code))
779 decomp[0] = 0x1100 + (unsigned long) (code / 588);
780 decomp[1] = 0x1161 + (unsigned long) ((code % 588) / 28);
781 decomp[2] = 0x11a7 + (unsigned long) (code % 28);
782 *num = (decomp[2] != 0x11a7) ? 3 : 2;
788 uccanondecomp(const unsigned long *in, int inlen,
789 unsigned long **out, int *outlen)
791 int i, j, k, l, size;
792 unsigned long num, class, *decomp, hangdecomp[3];
795 *out = (unsigned long *) malloc(size * sizeof(**out));
800 for (j = 0; j < inlen; j++) {
801 if (ucdecomp(in[j], &num, &decomp)) {
802 if (size - i < num) {
803 size = inlen + i - j + num - 1;
804 *out = (unsigned long *) realloc(*out, size * sizeof(**out));
808 for (k = 0; k < num; k++) {
809 class = uccombining_class(decomp[k]);
811 (*out)[i] = decomp[k];
813 for (l = i; l > 0; l--)
814 if (class >= uccombining_class((*out)[l-1]))
816 memmove(*out + l + 1, *out + l, (i - l) * sizeof(**out));
817 (*out)[l] = decomp[k];
821 } else if (ucdecomp_hangul(in[j], &num, hangdecomp)) {
822 if (size - i < num) {
823 size = inlen + i - j + num - 1;
824 *out = (unsigned long *) realloc(*out, size * sizeof(**out));
828 for (k = 0; k < num; k++) {
829 (*out)[i] = hangdecomp[k];
834 size = inlen + i - j;
835 *out = (unsigned long *) realloc(*out, size * sizeof(**out));
839 class = uccombining_class(in[j]);
843 for (l = i; l > 0; l--)
844 if (class >= uccombining_class((*out)[l-1]))
846 memmove(*out + l + 1, *out + l, (i - l) * sizeof(**out));
855 /**************************************************************************
857 * Support for combining classes.
859 **************************************************************************/
861 static unsigned long _uccmcl_size;
862 static unsigned long *_uccmcl_nodes;
865 * Return -1 on error, 0 if okay
868 _uccmcl_load(char *paths, int reload)
874 if (_uccmcl_size > 0) {
877 * The combining classes have already been loaded.
881 free((char *) _uccmcl_nodes);
885 if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0)
891 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
893 if (hdr.bom == 0xfffe) {
894 hdr.cnt = endian_short(hdr.cnt);
895 hdr.size.bytes = endian_long(hdr.size.bytes);
898 _uccmcl_size = hdr.cnt * 3;
899 _uccmcl_nodes = (unsigned long *) malloc(hdr.size.bytes);
902 * Read the combining classes in.
904 fread((char *) _uccmcl_nodes, sizeof(unsigned long), _uccmcl_size, in);
907 * Do an endian swap if necessary.
909 if (hdr.bom == 0xfffe) {
910 for (i = 0; i < _uccmcl_size; i++)
911 _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]);
920 if (_uccmcl_size == 0)
923 free((char *) _uccmcl_nodes);
928 uccombining_class(unsigned long code)
933 r = _uccmcl_size - 1;
938 if (code > _uccmcl_nodes[m + 1])
940 else if (code < _uccmcl_nodes[m])
942 else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1])
943 return _uccmcl_nodes[m + 2];
948 /**************************************************************************
950 * Support for numeric values.
952 **************************************************************************/
954 static unsigned long *_ucnum_nodes;
955 static unsigned long _ucnum_size;
956 static short *_ucnum_vals;
959 * Return -1 on error, 0 if okay
962 _ucnumb_load(char *paths, int reload)
965 unsigned long size, i;
968 if (_ucnum_size > 0) {
971 * The numbers have already been loaded.
975 free((char *) _ucnum_nodes);
979 if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0)
985 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
987 if (hdr.bom == 0xfffe) {
988 hdr.cnt = endian_short(hdr.cnt);
989 hdr.size.bytes = endian_long(hdr.size.bytes);
992 _ucnum_size = hdr.cnt;
993 _ucnum_nodes = (unsigned long *) malloc(hdr.size.bytes);
994 _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size);
997 * Read the combining classes in.
999 fread((char *) _ucnum_nodes, sizeof(unsigned char), hdr.size.bytes, in);
1002 * Do an endian swap if necessary.
1004 if (hdr.bom == 0xfffe) {
1005 for (i = 0; i < _ucnum_size; i++)
1006 _ucnum_nodes[i] = endian_long(_ucnum_nodes[i]);
1009 * Determine the number of values that have to be adjusted.
1011 size = (hdr.size.bytes -
1012 (_ucnum_size * (sizeof(unsigned long) << 1))) /
1015 for (i = 0; i < size; i++)
1016 _ucnum_vals[i] = endian_short(_ucnum_vals[i]);
1023 _ucnumb_unload(void)
1025 if (_ucnum_size == 0)
1028 free((char *) _ucnum_nodes);
1033 ucnumber_lookup(unsigned long code, struct ucnumber *num)
1039 r = _ucnum_size - 1;
1042 * Determine a "mid" point and adjust to make sure the mid point is at
1043 * the beginning of a code+offset pair.
1047 if (code > _ucnum_nodes[m])
1049 else if (code < _ucnum_nodes[m])
1052 vp = _ucnum_vals + _ucnum_nodes[m + 1];
1053 num->numerator = (int) *vp++;
1054 num->denominator = (int) *vp;
1062 ucdigit_lookup(unsigned long code, int *digit)
1068 r = _ucnum_size - 1;
1071 * Determine a "mid" point and adjust to make sure the mid point is at
1072 * the beginning of a code+offset pair.
1076 if (code > _ucnum_nodes[m])
1078 else if (code < _ucnum_nodes[m])
1081 vp = _ucnum_vals + _ucnum_nodes[m + 1];
1082 if (*vp == *(vp + 1)) {
1093 ucgetnumber(unsigned long code)
1095 struct ucnumber num;
1098 * Initialize with some arbitrary value, because the caller simply cannot
1099 * tell for sure if the code is a number without calling the ucisnumber()
1100 * macro before calling this function.
1102 num.numerator = num.denominator = -111;
1104 (void) ucnumber_lookup(code, &num);
1110 ucgetdigit(unsigned long code)
1115 * Initialize with some arbitrary value, because the caller simply cannot
1116 * tell for sure if the code is a number without calling the ucisdigit()
1117 * macro before calling this function.
1121 (void) ucdigit_lookup(code, &dig);
1126 /**************************************************************************
1128 * Setup and cleanup routines.
1130 **************************************************************************/
1133 * Return 0 if okay, negative on error
1136 ucdata_load(char *paths, int masks)
1140 if (masks & UCDATA_CTYPE)
1141 error |= _ucprop_load(paths, 0) < 0 ? UCDATA_CTYPE : 0;
1142 if (masks & UCDATA_CASE)
1143 error |= _uccase_load(paths, 0) < 0 ? UCDATA_CASE : 0;
1144 if (masks & UCDATA_DECOMP)
1145 error |= _ucdcmp_load(paths, 0) < 0 ? UCDATA_DECOMP : 0;
1146 if (masks & UCDATA_CMBCL)
1147 error |= _uccmcl_load(paths, 0) < 0 ? UCDATA_CMBCL : 0;
1148 if (masks & UCDATA_NUM)
1149 error |= _ucnumb_load(paths, 0) < 0 ? UCDATA_NUM : 0;
1150 if (masks & UCDATA_COMP)
1151 error |= _uccomp_load(paths, 0) < 0 ? UCDATA_COMP : 0;
1157 ucdata_unload(int masks)
1159 if (masks & UCDATA_CTYPE)
1161 if (masks & UCDATA_CASE)
1163 if (masks & UCDATA_DECOMP)
1165 if (masks & UCDATA_CMBCL)
1167 if (masks & UCDATA_NUM)
1169 if (masks & UCDATA_COMP)
1174 * Return 0 if okay, negative on error
1177 ucdata_reload(char *paths, int masks)
1181 if (masks & UCDATA_CTYPE)
1182 error |= _ucprop_load(paths, 1) < 0 ? UCDATA_CTYPE : 0;
1183 if (masks & UCDATA_CASE)
1184 error |= _uccase_load(paths, 1) < 0 ? UCDATA_CASE : 0;
1185 if (masks & UCDATA_DECOMP)
1186 error |= _ucdcmp_load(paths, 1) < 0 ? UCDATA_DECOMP : 0;
1187 if (masks & UCDATA_CMBCL)
1188 error |= _uccmcl_load(paths, 1) < 0 ? UCDATA_CMBCL : 0;
1189 if (masks & UCDATA_NUM)
1190 error |= _ucnumb_load(paths, 1) < 0 ? UCDATA_NUM : 0;
1191 if (masks & UCDATA_COMP)
1192 error |= _uccomp_load(paths, 1) < 0 ? UCDATA_COMP : 0;
1203 unsigned long i, lo, *dec;
1204 struct ucnumber num;
1211 printf("NOT WEAK\n");
1213 printf("LOWER 0x%04lX\n", uctolower(0xff3a));
1214 printf("UPPER 0x%04lX\n", uctoupper(0xff5a));
1216 if (ucisalpha(0x1d5))
1219 printf("NOT ALPHA\n");
1221 if (ucisupper(0x1d5)) {
1223 lo = uctolower(0x1d5);
1224 printf("0x%04lx\n", lo);
1225 lo = uctotitle(0x1d5);
1226 printf("0x%04lx\n", lo);
1228 printf("NOT UPPER\n");
1230 if (ucistitle(0x1d5))
1233 printf("NOT TITLE\n");
1235 if (uciscomposite(0x1d5))
1236 printf("COMPOSITE\n");
1238 printf("NOT COMPOSITE\n");
1240 if (ucdecomp(0x1d5, &lo, &dec)) {
1241 for (i = 0; i < lo; i++)
1242 printf("0x%04lx ", dec[i]);
1246 if ((lo = uccombining_class(0x41)) != 0)
1247 printf("0x41 CCL %ld\n", lo);
1249 if (ucisxdigit(0xfeff))
1250 printf("0xFEFF HEX DIGIT\n");
1252 printf("0xFEFF NOT HEX DIGIT\n");
1254 if (ucisdefined(0x10000))
1255 printf("0x10000 DEFINED\n");
1257 printf("0x10000 NOT DEFINED\n");
1259 if (ucnumber_lookup(0x30, &num)) {
1260 if (num.numerator != num.denominator)
1261 printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
1263 printf("UCNUMBER: 0x30 = %d\n", num.numerator);
1265 printf("UCNUMBER: 0x30 NOT A NUMBER\n");
1267 if (ucnumber_lookup(0xbc, &num)) {
1268 if (num.numerator != num.denominator)
1269 printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
1271 printf("UCNUMBER: 0xbc = %d\n", num.numerator);
1273 printf("UCNUMBER: 0xbc NOT A NUMBER\n");
1276 if (ucnumber_lookup(0xff19, &num)) {
1277 if (num.numerator != num.denominator)
1278 printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
1280 printf("UCNUMBER: 0xff19 = %d\n", num.numerator);
1282 printf("UCNUMBER: 0xff19 NOT A NUMBER\n");
1284 if (ucnumber_lookup(0x4e00, &num)) {
1285 if (num.numerator != num.denominator)
1286 printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator);
1288 printf("UCNUMBER: 0x4e00 = %d\n", num.numerator);
1290 printf("UCNUMBER: 0x4e00 NOT A NUMBER\n");
1292 if (ucdigit_lookup(0x06f9, &dig))
1293 printf("UCDIGIT: 0x6f9 = %d\n", dig);
1295 printf("UCDIGIT: 0x6f9 NOT A NUMBER\n");
1297 dig = ucgetdigit(0x0969);
1298 printf("UCGETDIGIT: 0x969 = %d\n", dig);
1300 num = ucgetnumber(0x30);
1301 if (num.numerator != num.denominator)
1302 printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
1304 printf("UCGETNUMBER: 0x30 = %d\n", num.numerator);
1306 num = ucgetnumber(0xbc);
1307 if (num.numerator != num.denominator)
1308 printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
1310 printf("UCGETNUMBER: 0xbc = %d\n", num.numerator);
1312 num = ucgetnumber(0xff19);
1313 if (num.numerator != num.denominator)
1314 printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
1316 printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator);