3 * Copyright 2000-2002 The OpenLDAP Foundation, All Rights Reserved.
4 * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
7 * Copyright 2001 Computing Research Labs, New Mexico State University
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice shall be included in
17 * all copies or substantial portions of the Software.
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
23 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
24 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
25 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 /* $Id: ucdata.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */
30 #include "ldap_config.h"
33 #include <ac/stdlib.h>
34 #include <ac/string.h>
35 #include <ac/unistd.h>
40 /**************************************************************************
42 * Miscellaneous types, data, and support functions.
44 **************************************************************************/
51 unsigned short len[2];
56 * A simple array of 32-bit masks for lookup.
58 static unsigned long masks32[32] = {
59 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
60 0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL,
61 0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL,
62 0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL,
63 0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL,
64 0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL,
65 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL,
66 0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL
69 #define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8))
70 #define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\
71 ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24))
74 _ucopenfile(char *paths, char *filename, char *mode)
77 char *fp, *dp, *pp, path[BUFSIZ];
79 if (filename == 0 || *filename == 0)
85 while (*dp && *dp != ':')
94 if ((f = fopen(path, mode)) != 0)
104 /**************************************************************************
106 * Support for the character properties.
108 **************************************************************************/
110 static unsigned long _ucprop_size;
111 static unsigned short *_ucprop_offsets;
112 static unsigned long *_ucprop_ranges;
115 * Return -1 on error, 0 if okay
118 _ucprop_load(char *paths, int reload)
121 unsigned long size, i;
124 if (_ucprop_size > 0) {
127 * The character properties have already been loaded.
132 * Unload the current character property data in preparation for
133 * loading a new copy. Only the first array has to be deallocated
134 * because all the memory for the arrays is allocated as a single
137 free((char *) _ucprop_offsets);
141 if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0)
147 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
149 if (hdr.bom == 0xfffe) {
150 hdr.cnt = endian_short(hdr.cnt);
151 hdr.size.bytes = endian_long(hdr.size.bytes);
154 if ((_ucprop_size = hdr.cnt) == 0) {
160 * Allocate all the storage needed for the lookup table.
162 _ucprop_offsets = (unsigned short *) malloc(hdr.size.bytes);
165 * Calculate the offset into the storage for the ranges. The offsets
166 * array is on a 4-byte boundary and one larger than the value provided in
167 * the header count field. This means the offset to the ranges must be
168 * calculated after aligning the count to a 4-byte boundary.
170 if ((size = ((hdr.cnt + 1) * sizeof(unsigned short))) & 3)
171 size += 4 - (size & 3);
173 _ucprop_ranges = (unsigned long *) (_ucprop_offsets + size);
176 * Load the offset array.
178 fread((char *) _ucprop_offsets, sizeof(unsigned short), size, in);
181 * Do an endian swap if necessary. Don't forget there is an extra node on
182 * the end with the final index.
184 if (hdr.bom == 0xfffe) {
185 for (i = 0; i <= _ucprop_size; i++)
186 _ucprop_offsets[i] = endian_short(_ucprop_offsets[i]);
190 * Load the ranges. The number of elements is in the last array position
193 fread((char *) _ucprop_ranges, sizeof(unsigned long),
194 _ucprop_offsets[_ucprop_size], in);
199 * Do an endian swap if necessary.
201 if (hdr.bom == 0xfffe) {
202 for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++)
203 _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]);
211 if (_ucprop_size == 0)
215 * Only need to free the offsets because the memory is allocated as a
218 free((char *) _ucprop_offsets);
223 _ucprop_lookup(unsigned long code, unsigned long n)
227 if (_ucprop_size == 0)
231 * There is an extra node on the end of the offsets to allow this routine
232 * to work right. If the index is 0xffff, then there are no nodes for the
235 if ((l = _ucprop_offsets[n]) == 0xffff)
239 * Locate the next offset that is not 0xffff. The sentinel at the end of
240 * the array is the max index value.
243 n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ;
245 r = _ucprop_offsets[n + m] - 1;
249 * Determine a "mid" point and adjust to make sure the mid point is at
250 * the beginning of a range pair.
254 if (code > _ucprop_ranges[m + 1])
256 else if (code < _ucprop_ranges[m])
258 else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
265 ucisprop(unsigned long code, unsigned long mask1, unsigned long mask2)
269 if (mask1 == 0 && mask2 == 0)
272 for (i = 0; mask1 && i < 32; i++) {
273 if ((mask1 & masks32[i]) && _ucprop_lookup(code, i))
277 for (i = 32; mask2 && i < _ucprop_size; i++) {
278 if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i))
285 /**************************************************************************
287 * Support for case mapping.
289 **************************************************************************/
291 static unsigned long _uccase_size;
292 static unsigned short _uccase_len[2];
293 static unsigned long *_uccase_map;
296 * Return -1 on error, 0 if okay
299 _uccase_load(char *paths, int reload)
305 if (_uccase_size > 0) {
308 * The case mappings have already been loaded.
312 free((char *) _uccase_map);
316 if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0)
322 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
324 if (hdr.bom == 0xfffe) {
325 hdr.cnt = endian_short(hdr.cnt);
326 hdr.size.len[0] = endian_short(hdr.size.len[0]);
327 hdr.size.len[1] = endian_short(hdr.size.len[1]);
331 * Set the node count and lengths of the upper and lower case mapping
334 _uccase_size = hdr.cnt * 3;
335 _uccase_len[0] = hdr.size.len[0] * 3;
336 _uccase_len[1] = hdr.size.len[1] * 3;
338 _uccase_map = (unsigned long *)
339 malloc(_uccase_size * sizeof(unsigned long));
342 * Load the case mapping table.
344 fread((char *) _uccase_map, sizeof(unsigned long), _uccase_size, in);
347 * Do an endian swap if necessary.
349 if (hdr.bom == 0xfffe) {
350 for (i = 0; i < _uccase_size; i++)
351 _uccase_map[i] = endian_long(_uccase_map[i]);
360 if (_uccase_size == 0)
363 free((char *) _uccase_map);
368 _uccase_lookup(unsigned long code, long l, long r, int field)
373 * Do the binary search.
377 * Determine a "mid" point and adjust to make sure the mid point is at
378 * the beginning of a case mapping triple.
382 if (code > _uccase_map[m])
384 else if (code < _uccase_map[m])
386 else if (code == _uccase_map[m])
387 return _uccase_map[m + field];
394 uctoupper(unsigned long code)
402 if (ucislower(code)) {
404 * The character is lower case.
408 r = (l + _uccase_len[1]) - 3;
411 * The character is title case.
414 l = _uccase_len[0] + _uccase_len[1];
415 r = _uccase_size - 3;
417 return _uccase_lookup(code, l, r, field);
421 uctolower(unsigned long code)
429 if (ucisupper(code)) {
431 * The character is upper case.
435 r = _uccase_len[0] - 3;
438 * The character is title case.
441 l = _uccase_len[0] + _uccase_len[1];
442 r = _uccase_size - 3;
444 return _uccase_lookup(code, l, r, field);
448 uctotitle(unsigned long code)
457 * The offset will always be the same for converting to title case.
461 if (ucisupper(code)) {
463 * The character is upper case.
466 r = _uccase_len[0] - 3;
469 * The character is lower case.
472 r = (l + _uccase_len[1]) - 3;
474 return _uccase_lookup(code, l, r, field);
477 /**************************************************************************
479 * Support for compositions.
481 **************************************************************************/
483 static unsigned long _uccomp_size;
484 static unsigned long *_uccomp_data;
487 * Return -1 on error, 0 if okay
490 _uccomp_load(char *paths, int reload)
493 unsigned long size, i;
496 if (_uccomp_size > 0) {
499 * The compositions have already been loaded.
503 free((char *) _uccomp_data);
507 if ((in = _ucopenfile(paths, "comp.dat", "rb")) == 0)
513 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
515 if (hdr.bom == 0xfffe) {
516 hdr.cnt = endian_short(hdr.cnt);
517 hdr.size.bytes = endian_long(hdr.size.bytes);
520 _uccomp_size = hdr.cnt;
521 _uccomp_data = (unsigned long *) malloc(hdr.size.bytes);
524 * Read the composition data in.
526 size = hdr.size.bytes / sizeof(unsigned long);
527 fread((char *) _uccomp_data, sizeof(unsigned long), size, in);
530 * Do an endian swap if necessary.
532 if (hdr.bom == 0xfffe) {
533 for (i = 0; i < size; i++)
534 _uccomp_data[i] = endian_long(_uccomp_data[i]);
538 * Assume that the data is ordered on count, so that all compositions
539 * of length 2 come first. Only handling length 2 for now.
541 for (i = 1; i < size; i += 4)
542 if (_uccomp_data[i] != 2)
544 _uccomp_size = i - 1;
553 if (_uccomp_size == 0)
556 free((char *) _uccomp_data);
561 uccomp(unsigned long node1, unsigned long node2, unsigned long *comp)
566 r = _uccomp_size - 1;
571 if (node1 > _uccomp_data[m+2])
573 else if (node1 < _uccomp_data[m+2])
575 else if (node2 > _uccomp_data[m+3])
577 else if (node2 < _uccomp_data[m+3])
580 *comp = _uccomp_data[m];
588 uccomp_hangul(unsigned long *str, int len)
590 const int SBase = 0xAC00, LBase = 0x1100,
591 VBase = 0x1161, TBase = 0x11A7,
592 LCount = 19, VCount = 21, TCount = 28,
593 NCount = VCount * TCount, /* 588 */
594 SCount = LCount * NCount; /* 11172 */
597 unsigned long ch, last, lindex, sindex;
601 for ( i = 1; i < len; i++ ) {
604 /* check if two current characters are L and V */
605 lindex = last - LBase;
606 if (0 <= lindex && lindex < (unsigned long) LCount) {
607 unsigned long vindex = ch - VBase;
608 if (0 <= vindex && vindex < (unsigned long) VCount) {
609 /* make syllable of form LV */
610 last = SBase + (lindex * VCount + vindex) * TCount;
611 str[rlen-1] = last; /* reset last */
616 /* check if two current characters are LV and T */
617 sindex = last - SBase;
618 if (0 <= sindex && sindex < (unsigned long) SCount
619 && (sindex % TCount) == 0)
621 unsigned long tindex = ch - TBase;
622 if (0 <= tindex && tindex <= (unsigned long) TCount) {
623 /* make syllable of form LVT */
625 str[rlen-1] = last; /* reset last */
630 /* if neither case was true, just add the character */
639 uccanoncomp(unsigned long *str, int len)
642 unsigned long cl, prevcl, st, ch, co;
647 prevcl = uccombining_class(st) == 0 ? 0 : 256;
649 for (i = 1; i < len; i++) {
651 cl = uccombining_class(ch);
652 if (uccomp(st, ch, &co) && (prevcl < cl || prevcl == 0))
653 st = str[stpos] = co;
664 return uccomp_hangul(str, copos);
667 /**************************************************************************
669 * Support for decompositions.
671 **************************************************************************/
673 static unsigned long _ucdcmp_size;
674 static unsigned long *_ucdcmp_nodes;
675 static unsigned long *_ucdcmp_decomp;
678 * Return -1 on error, 0 if okay
681 _ucdcmp_load(char *paths, int reload)
684 unsigned long size, i;
687 if (_ucdcmp_size > 0) {
690 * The decompositions have already been loaded.
694 free((char *) _ucdcmp_nodes);
698 if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0)
704 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
706 if (hdr.bom == 0xfffe) {
707 hdr.cnt = endian_short(hdr.cnt);
708 hdr.size.bytes = endian_long(hdr.size.bytes);
711 _ucdcmp_size = hdr.cnt << 1;
712 _ucdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes);
713 _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1);
716 * Read the decomposition data in.
718 size = hdr.size.bytes / sizeof(unsigned long);
719 fread((char *) _ucdcmp_nodes, sizeof(unsigned long), size, in);
722 * Do an endian swap if necessary.
724 if (hdr.bom == 0xfffe) {
725 for (i = 0; i < size; i++)
726 _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]);
735 if (_ucdcmp_size == 0)
739 * Only need to free the offsets because the memory is allocated as a
742 free((char *) _ucdcmp_nodes);
747 ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
752 r = _ucdcmp_nodes[_ucdcmp_size] - 1;
756 * Determine a "mid" point and adjust to make sure the mid point is at
757 * the beginning of a code+offset pair.
761 if (code > _ucdcmp_nodes[m])
763 else if (code < _ucdcmp_nodes[m])
765 else if (code == _ucdcmp_nodes[m]) {
766 *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1];
767 *decomp = &_ucdcmp_decomp[_ucdcmp_nodes[m + 1]];
775 ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[])
777 if (!ucishangul(code))
781 decomp[0] = 0x1100 + (unsigned long) (code / 588);
782 decomp[1] = 0x1161 + (unsigned long) ((code % 588) / 28);
783 decomp[2] = 0x11a7 + (unsigned long) (code % 28);
784 *num = (decomp[2] != 0x11a7) ? 3 : 2;
790 uccanondecomp(const unsigned long *in, int inlen,
791 unsigned long **out, int *outlen)
795 unsigned long num, class, *decomp, hangdecomp[3];
798 *out = (unsigned long *) malloc(size * sizeof(**out));
803 for (j = 0; j < (unsigned) inlen; j++) {
804 if (ucdecomp(in[j], &num, &decomp)) {
805 if ( size - i < num) {
806 size = inlen + i - j + num - 1;
807 *out = (unsigned long *) realloc(*out, size * sizeof(**out));
811 for (k = 0; k < num; k++) {
812 class = uccombining_class(decomp[k]);
814 (*out)[i] = decomp[k];
816 for (l = i; l > 0; l--)
817 if (class >= uccombining_class((*out)[l-1]))
819 AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out));
820 (*out)[l] = decomp[k];
824 } else if (ucdecomp_hangul(in[j], &num, hangdecomp)) {
825 if (size - i < num) {
826 size = inlen + i - j + num - 1;
827 *out = (unsigned long *) realloc(*out, size * sizeof(**out));
831 for (k = 0; k < num; k++) {
832 (*out)[i] = hangdecomp[k];
837 size = inlen + i - j;
838 *out = (unsigned long *) realloc(*out, size * sizeof(**out));
842 class = uccombining_class(in[j]);
846 for (l = i; l > 0; l--)
847 if (class >= uccombining_class((*out)[l-1]))
849 AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out));
858 /**************************************************************************
860 * Support for combining classes.
862 **************************************************************************/
864 static unsigned long _uccmcl_size;
865 static unsigned long *_uccmcl_nodes;
868 * Return -1 on error, 0 if okay
871 _uccmcl_load(char *paths, int reload)
877 if (_uccmcl_size > 0) {
880 * The combining classes have already been loaded.
884 free((char *) _uccmcl_nodes);
888 if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0)
894 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
896 if (hdr.bom == 0xfffe) {
897 hdr.cnt = endian_short(hdr.cnt);
898 hdr.size.bytes = endian_long(hdr.size.bytes);
901 _uccmcl_size = hdr.cnt * 3;
902 _uccmcl_nodes = (unsigned long *) malloc(hdr.size.bytes);
905 * Read the combining classes in.
907 fread((char *) _uccmcl_nodes, sizeof(unsigned long), _uccmcl_size, in);
910 * Do an endian swap if necessary.
912 if (hdr.bom == 0xfffe) {
913 for (i = 0; i < _uccmcl_size; i++)
914 _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]);
923 if (_uccmcl_size == 0)
926 free((char *) _uccmcl_nodes);
931 uccombining_class(unsigned long code)
936 r = _uccmcl_size - 1;
941 if (code > _uccmcl_nodes[m + 1])
943 else if (code < _uccmcl_nodes[m])
945 else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1])
946 return _uccmcl_nodes[m + 2];
951 /**************************************************************************
953 * Support for numeric values.
955 **************************************************************************/
957 static unsigned long *_ucnum_nodes;
958 static unsigned long _ucnum_size;
959 static short *_ucnum_vals;
962 * Return -1 on error, 0 if okay
965 _ucnumb_load(char *paths, int reload)
968 unsigned long size, i;
971 if (_ucnum_size > 0) {
974 * The numbers have already been loaded.
978 free((char *) _ucnum_nodes);
982 if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0)
988 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
990 if (hdr.bom == 0xfffe) {
991 hdr.cnt = endian_short(hdr.cnt);
992 hdr.size.bytes = endian_long(hdr.size.bytes);
995 _ucnum_size = hdr.cnt;
996 _ucnum_nodes = (unsigned long *) malloc(hdr.size.bytes);
997 _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size);
1000 * Read the combining classes in.
1002 fread((char *) _ucnum_nodes, sizeof(unsigned char), hdr.size.bytes, in);
1005 * Do an endian swap if necessary.
1007 if (hdr.bom == 0xfffe) {
1008 for (i = 0; i < _ucnum_size; i++)
1009 _ucnum_nodes[i] = endian_long(_ucnum_nodes[i]);
1012 * Determine the number of values that have to be adjusted.
1014 size = (hdr.size.bytes -
1015 (_ucnum_size * (sizeof(unsigned long) << 1))) /
1018 for (i = 0; i < size; i++)
1019 _ucnum_vals[i] = endian_short(_ucnum_vals[i]);
1026 _ucnumb_unload(void)
1028 if (_ucnum_size == 0)
1031 free((char *) _ucnum_nodes);
1036 ucnumber_lookup(unsigned long code, struct ucnumber *num)
1042 r = _ucnum_size - 1;
1045 * Determine a "mid" point and adjust to make sure the mid point is at
1046 * the beginning of a code+offset pair.
1050 if (code > _ucnum_nodes[m])
1052 else if (code < _ucnum_nodes[m])
1055 vp = _ucnum_vals + _ucnum_nodes[m + 1];
1056 num->numerator = (int) *vp++;
1057 num->denominator = (int) *vp;
1065 ucdigit_lookup(unsigned long code, int *digit)
1071 r = _ucnum_size - 1;
1074 * Determine a "mid" point and adjust to make sure the mid point is at
1075 * the beginning of a code+offset pair.
1079 if (code > _ucnum_nodes[m])
1081 else if (code < _ucnum_nodes[m])
1084 vp = _ucnum_vals + _ucnum_nodes[m + 1];
1085 if (*vp == *(vp + 1)) {
1096 ucgetnumber(unsigned long code)
1098 struct ucnumber num;
1101 * Initialize with some arbitrary value, because the caller simply cannot
1102 * tell for sure if the code is a number without calling the ucisnumber()
1103 * macro before calling this function.
1105 num.numerator = num.denominator = -111;
1107 (void) ucnumber_lookup(code, &num);
1113 ucgetdigit(unsigned long code)
1118 * Initialize with some arbitrary value, because the caller simply cannot
1119 * tell for sure if the code is a number without calling the ucisdigit()
1120 * macro before calling this function.
1124 (void) ucdigit_lookup(code, &dig);
1129 /**************************************************************************
1131 * Setup and cleanup routines.
1133 **************************************************************************/
1136 * Return 0 if okay, negative on error
1139 ucdata_load(char *paths, int masks)
1143 if (masks & UCDATA_CTYPE)
1144 error |= _ucprop_load(paths, 0) < 0 ? UCDATA_CTYPE : 0;
1145 if (masks & UCDATA_CASE)
1146 error |= _uccase_load(paths, 0) < 0 ? UCDATA_CASE : 0;
1147 if (masks & UCDATA_DECOMP)
1148 error |= _ucdcmp_load(paths, 0) < 0 ? UCDATA_DECOMP : 0;
1149 if (masks & UCDATA_CMBCL)
1150 error |= _uccmcl_load(paths, 0) < 0 ? UCDATA_CMBCL : 0;
1151 if (masks & UCDATA_NUM)
1152 error |= _ucnumb_load(paths, 0) < 0 ? UCDATA_NUM : 0;
1153 if (masks & UCDATA_COMP)
1154 error |= _uccomp_load(paths, 0) < 0 ? UCDATA_COMP : 0;
1160 ucdata_unload(int masks)
1162 if (masks & UCDATA_CTYPE)
1164 if (masks & UCDATA_CASE)
1166 if (masks & UCDATA_DECOMP)
1168 if (masks & UCDATA_CMBCL)
1170 if (masks & UCDATA_NUM)
1172 if (masks & UCDATA_COMP)
1177 * Return 0 if okay, negative on error
1180 ucdata_reload(char *paths, int masks)
1184 if (masks & UCDATA_CTYPE)
1185 error |= _ucprop_load(paths, 1) < 0 ? UCDATA_CTYPE : 0;
1186 if (masks & UCDATA_CASE)
1187 error |= _uccase_load(paths, 1) < 0 ? UCDATA_CASE : 0;
1188 if (masks & UCDATA_DECOMP)
1189 error |= _ucdcmp_load(paths, 1) < 0 ? UCDATA_DECOMP : 0;
1190 if (masks & UCDATA_CMBCL)
1191 error |= _uccmcl_load(paths, 1) < 0 ? UCDATA_CMBCL : 0;
1192 if (masks & UCDATA_NUM)
1193 error |= _ucnumb_load(paths, 1) < 0 ? UCDATA_NUM : 0;
1194 if (masks & UCDATA_COMP)
1195 error |= _uccomp_load(paths, 1) < 0 ? UCDATA_COMP : 0;
1206 unsigned long i, lo, *dec;
1207 struct ucnumber num;
1214 printf("NOT WEAK\n");
1216 printf("LOWER 0x%04lX\n", uctolower(0xff3a));
1217 printf("UPPER 0x%04lX\n", uctoupper(0xff5a));
1219 if (ucisalpha(0x1d5))
1222 printf("NOT ALPHA\n");
1224 if (ucisupper(0x1d5)) {
1226 lo = uctolower(0x1d5);
1227 printf("0x%04lx\n", lo);
1228 lo = uctotitle(0x1d5);
1229 printf("0x%04lx\n", lo);
1231 printf("NOT UPPER\n");
1233 if (ucistitle(0x1d5))
1236 printf("NOT TITLE\n");
1238 if (uciscomposite(0x1d5))
1239 printf("COMPOSITE\n");
1241 printf("NOT COMPOSITE\n");
1243 if (ucdecomp(0x1d5, &lo, &dec)) {
1244 for (i = 0; i < lo; i++)
1245 printf("0x%04lx ", dec[i]);
1249 if ((lo = uccombining_class(0x41)) != 0)
1250 printf("0x41 CCL %ld\n", lo);
1252 if (ucisxdigit(0xfeff))
1253 printf("0xFEFF HEX DIGIT\n");
1255 printf("0xFEFF NOT HEX DIGIT\n");
1257 if (ucisdefined(0x10000))
1258 printf("0x10000 DEFINED\n");
1260 printf("0x10000 NOT DEFINED\n");
1262 if (ucnumber_lookup(0x30, &num)) {
1263 if (num.numerator != num.denominator)
1264 printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
1266 printf("UCNUMBER: 0x30 = %d\n", num.numerator);
1268 printf("UCNUMBER: 0x30 NOT A NUMBER\n");
1270 if (ucnumber_lookup(0xbc, &num)) {
1271 if (num.numerator != num.denominator)
1272 printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
1274 printf("UCNUMBER: 0xbc = %d\n", num.numerator);
1276 printf("UCNUMBER: 0xbc NOT A NUMBER\n");
1279 if (ucnumber_lookup(0xff19, &num)) {
1280 if (num.numerator != num.denominator)
1281 printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
1283 printf("UCNUMBER: 0xff19 = %d\n", num.numerator);
1285 printf("UCNUMBER: 0xff19 NOT A NUMBER\n");
1287 if (ucnumber_lookup(0x4e00, &num)) {
1288 if (num.numerator != num.denominator)
1289 printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator);
1291 printf("UCNUMBER: 0x4e00 = %d\n", num.numerator);
1293 printf("UCNUMBER: 0x4e00 NOT A NUMBER\n");
1295 if (ucdigit_lookup(0x06f9, &dig))
1296 printf("UCDIGIT: 0x6f9 = %d\n", dig);
1298 printf("UCDIGIT: 0x6f9 NOT A NUMBER\n");
1300 dig = ucgetdigit(0x0969);
1301 printf("UCGETDIGIT: 0x969 = %d\n", dig);
1303 num = ucgetnumber(0x30);
1304 if (num.numerator != num.denominator)
1305 printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
1307 printf("UCGETNUMBER: 0x30 = %d\n", num.numerator);
1309 num = ucgetnumber(0xbc);
1310 if (num.numerator != num.denominator)
1311 printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
1313 printf("UCGETNUMBER: 0xbc = %d\n", num.numerator);
1315 num = ucgetnumber(0xff19);
1316 if (num.numerator != num.denominator)
1317 printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
1319 printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator);