2 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
4 * Copyright 1998-2004 The OpenLDAP Foundation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted only as authorized by the OpenLDAP
11 * A copy of this license is available in file LICENSE in the
12 * top-level directory of the distribution or, alternatively, at
13 * <http://www.OpenLDAP.org/license.html>.
15 /* Copyright 2001 Computing Research Labs, New Mexico State University
17 * Permission is hereby granted, free of charge, to any person obtaining a
18 * copy of this software and associated documentation files (the "Software"),
19 * to deal in the Software without restriction, including without limitation
20 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
21 * and/or sell copies of the Software, and to permit persons to whom the
22 * Software is furnished to do so, subject to the following conditions:
24 * The above copyright notice and this permission notice shall be included in
25 * all copies or substantial portions of the Software.
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
30 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
31 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
32 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
33 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
35 /* $Id: ucdata.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */
38 #include "ldap_config.h"
41 #include <ac/stdlib.h>
42 #include <ac/string.h>
43 #include <ac/unistd.h>
50 /**************************************************************************
52 * Miscellaneous types, data, and support functions.
54 **************************************************************************/
66 * A simple array of 32-bit masks for lookup.
68 static ac_uint4 masks32[32] = {
69 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
70 0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL,
71 0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL,
72 0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL,
73 0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL,
74 0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL,
75 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL,
76 0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL
79 #define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8))
80 #define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\
81 ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24))
84 _ucopenfile(char *paths, char *filename, char *mode)
87 char *fp, *dp, *pp, path[BUFSIZ];
89 if (filename == 0 || *filename == 0)
95 while (*dp && *dp != ':')
104 if ((f = fopen(path, mode)) != 0)
114 /**************************************************************************
116 * Support for the character properties.
118 **************************************************************************/
120 static ac_uint4 _ucprop_size;
121 static ac_uint2 *_ucprop_offsets;
122 static ac_uint4 *_ucprop_ranges;
125 * Return -1 on error, 0 if okay
128 _ucprop_load(char *paths, int reload)
134 if (_ucprop_size > 0) {
137 * The character properties have already been loaded.
142 * Unload the current character property data in preparation for
143 * loading a new copy. Only the first array has to be deallocated
144 * because all the memory for the arrays is allocated as a single
147 free((char *) _ucprop_offsets);
151 if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0)
157 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
159 if (hdr.bom == 0xfffe) {
160 hdr.cnt = endian_short(hdr.cnt);
161 hdr.size.bytes = endian_long(hdr.size.bytes);
164 if ((_ucprop_size = hdr.cnt) == 0) {
170 * Allocate all the storage needed for the lookup table.
172 _ucprop_offsets = (ac_uint2 *) malloc(hdr.size.bytes);
175 * Calculate the offset into the storage for the ranges. The offsets
176 * array is on a 4-byte boundary and one larger than the value provided in
177 * the header count field. This means the offset to the ranges must be
178 * calculated after aligning the count to a 4-byte boundary.
180 if ((size = ((hdr.cnt + 1) * sizeof(ac_uint2))) & 3)
181 size += 4 - (size & 3);
183 _ucprop_ranges = (ac_uint4 *) (_ucprop_offsets + size);
186 * Load the offset array.
188 fread((char *) _ucprop_offsets, sizeof(ac_uint2), size, in);
191 * Do an endian swap if necessary. Don't forget there is an extra node on
192 * the end with the final index.
194 if (hdr.bom == 0xfffe) {
195 for (i = 0; i <= _ucprop_size; i++)
196 _ucprop_offsets[i] = endian_short(_ucprop_offsets[i]);
200 * Load the ranges. The number of elements is in the last array position
203 fread((char *) _ucprop_ranges, sizeof(ac_uint4),
204 _ucprop_offsets[_ucprop_size], in);
209 * Do an endian swap if necessary.
211 if (hdr.bom == 0xfffe) {
212 for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++)
213 _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]);
221 if (_ucprop_size == 0)
225 * Only need to free the offsets because the memory is allocated as a
228 free((char *) _ucprop_offsets);
233 _ucprop_lookup(ac_uint4 code, ac_uint4 n)
237 if (_ucprop_size == 0)
241 * There is an extra node on the end of the offsets to allow this routine
242 * to work right. If the index is 0xffff, then there are no nodes for the
245 if ((l = _ucprop_offsets[n]) == 0xffff)
249 * Locate the next offset that is not 0xffff. The sentinel at the end of
250 * the array is the max index value.
253 n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ;
255 r = _ucprop_offsets[n + m] - 1;
259 * Determine a "mid" point and adjust to make sure the mid point is at
260 * the beginning of a range pair.
264 if (code > _ucprop_ranges[m + 1])
266 else if (code < _ucprop_ranges[m])
268 else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1])
275 ucisprop(ac_uint4 code, ac_uint4 mask1, ac_uint4 mask2)
279 if (mask1 == 0 && mask2 == 0)
282 for (i = 0; mask1 && i < 32; i++) {
283 if ((mask1 & masks32[i]) && _ucprop_lookup(code, i))
287 for (i = 32; mask2 && i < _ucprop_size; i++) {
288 if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i))
295 /**************************************************************************
297 * Support for case mapping.
299 **************************************************************************/
301 static ac_uint4 _uccase_size;
302 static ac_uint2 _uccase_len[2];
303 static ac_uint4 *_uccase_map;
306 * Return -1 on error, 0 if okay
309 _uccase_load(char *paths, int reload)
315 if (_uccase_size > 0) {
318 * The case mappings have already been loaded.
322 free((char *) _uccase_map);
326 if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0)
332 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
334 if (hdr.bom == 0xfffe) {
335 hdr.cnt = endian_short(hdr.cnt);
336 hdr.size.len[0] = endian_short(hdr.size.len[0]);
337 hdr.size.len[1] = endian_short(hdr.size.len[1]);
341 * Set the node count and lengths of the upper and lower case mapping
344 _uccase_size = hdr.cnt * 3;
345 _uccase_len[0] = hdr.size.len[0] * 3;
346 _uccase_len[1] = hdr.size.len[1] * 3;
348 _uccase_map = (ac_uint4 *)
349 malloc(_uccase_size * sizeof(ac_uint4));
352 * Load the case mapping table.
354 fread((char *) _uccase_map, sizeof(ac_uint4), _uccase_size, in);
357 * Do an endian swap if necessary.
359 if (hdr.bom == 0xfffe) {
360 for (i = 0; i < _uccase_size; i++)
361 _uccase_map[i] = endian_long(_uccase_map[i]);
370 if (_uccase_size == 0)
373 free((char *) _uccase_map);
378 _uccase_lookup(ac_uint4 code, long l, long r, int field)
383 * Do the binary search.
387 * Determine a "mid" point and adjust to make sure the mid point is at
388 * the beginning of a case mapping triple.
392 if (code > _uccase_map[m])
394 else if (code < _uccase_map[m])
396 else if (code == _uccase_map[m])
397 return _uccase_map[m + field];
404 uctoupper(ac_uint4 code)
412 if (ucislower(code)) {
414 * The character is lower case.
418 r = (l + _uccase_len[1]) - 3;
421 * The character is title case.
424 l = _uccase_len[0] + _uccase_len[1];
425 r = _uccase_size - 3;
427 return _uccase_lookup(code, l, r, field);
431 uctolower(ac_uint4 code)
439 if (ucisupper(code)) {
441 * The character is upper case.
445 r = _uccase_len[0] - 3;
448 * The character is title case.
451 l = _uccase_len[0] + _uccase_len[1];
452 r = _uccase_size - 3;
454 return _uccase_lookup(code, l, r, field);
458 uctotitle(ac_uint4 code)
467 * The offset will always be the same for converting to title case.
471 if (ucisupper(code)) {
473 * The character is upper case.
476 r = _uccase_len[0] - 3;
479 * The character is lower case.
482 r = (l + _uccase_len[1]) - 3;
484 return _uccase_lookup(code, l, r, field);
487 /**************************************************************************
489 * Support for compositions.
491 **************************************************************************/
493 static ac_uint4 _uccomp_size;
494 static ac_uint4 *_uccomp_data;
497 * Return -1 on error, 0 if okay
500 _uccomp_load(char *paths, int reload)
506 if (_uccomp_size > 0) {
509 * The compositions have already been loaded.
513 free((char *) _uccomp_data);
517 if ((in = _ucopenfile(paths, "comp.dat", "rb")) == 0)
523 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
525 if (hdr.bom == 0xfffe) {
526 hdr.cnt = endian_short(hdr.cnt);
527 hdr.size.bytes = endian_long(hdr.size.bytes);
530 _uccomp_size = hdr.cnt;
531 _uccomp_data = (ac_uint4 *) malloc(hdr.size.bytes);
534 * Read the composition data in.
536 size = hdr.size.bytes / sizeof(ac_uint4);
537 fread((char *) _uccomp_data, sizeof(ac_uint4), size, in);
540 * Do an endian swap if necessary.
542 if (hdr.bom == 0xfffe) {
543 for (i = 0; i < size; i++)
544 _uccomp_data[i] = endian_long(_uccomp_data[i]);
548 * Assume that the data is ordered on count, so that all compositions
549 * of length 2 come first. Only handling length 2 for now.
551 for (i = 1; i < size; i += 4)
552 if (_uccomp_data[i] != 2)
554 _uccomp_size = i - 1;
563 if (_uccomp_size == 0)
566 free((char *) _uccomp_data);
571 uccomp(ac_uint4 node1, ac_uint4 node2, ac_uint4 *comp)
576 r = _uccomp_size - 1;
581 if (node1 > _uccomp_data[m+2])
583 else if (node1 < _uccomp_data[m+2])
585 else if (node2 > _uccomp_data[m+3])
587 else if (node2 < _uccomp_data[m+3])
590 *comp = _uccomp_data[m];
598 uccomp_hangul(ac_uint4 *str, int len)
600 const int SBase = 0xAC00, LBase = 0x1100,
601 VBase = 0x1161, TBase = 0x11A7,
602 LCount = 19, VCount = 21, TCount = 28,
603 NCount = VCount * TCount, /* 588 */
604 SCount = LCount * NCount; /* 11172 */
607 ac_uint4 ch, last, lindex, sindex;
611 for ( i = 1; i < len; i++ ) {
614 /* check if two current characters are L and V */
615 lindex = last - LBase;
616 if (lindex < (ac_uint4) LCount) {
617 ac_uint4 vindex = ch - VBase;
618 if (vindex < (ac_uint4) VCount) {
619 /* make syllable of form LV */
620 last = SBase + (lindex * VCount + vindex) * TCount;
621 str[rlen-1] = last; /* reset last */
626 /* check if two current characters are LV and T */
627 sindex = last - SBase;
628 if (sindex < (ac_uint4) SCount
629 && (sindex % TCount) == 0)
631 ac_uint4 tindex = ch - TBase;
632 if (tindex <= (ac_uint4) TCount) {
633 /* make syllable of form LVT */
635 str[rlen-1] = last; /* reset last */
640 /* if neither case was true, just add the character */
649 uccanoncomp(ac_uint4 *str, int len)
652 ac_uint4 cl, prevcl, st, ch, co;
657 prevcl = uccombining_class(st) == 0 ? 0 : 256;
659 for (i = 1; i < len; i++) {
661 cl = uccombining_class(ch);
662 if (uccomp(st, ch, &co) && (prevcl < cl || prevcl == 0))
663 st = str[stpos] = co;
674 return uccomp_hangul(str, copos);
677 /**************************************************************************
679 * Support for decompositions.
681 **************************************************************************/
683 static ac_uint4 _ucdcmp_size;
684 static ac_uint4 *_ucdcmp_nodes;
685 static ac_uint4 *_ucdcmp_decomp;
687 static ac_uint4 _uckdcmp_size;
688 static ac_uint4 *_uckdcmp_nodes;
689 static ac_uint4 *_uckdcmp_decomp;
692 * Return -1 on error, 0 if okay
695 _ucdcmp_load(char *paths, int reload)
701 if (_ucdcmp_size > 0) {
704 * The decompositions have already been loaded.
708 free((char *) _ucdcmp_nodes);
712 if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0)
718 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
720 if (hdr.bom == 0xfffe) {
721 hdr.cnt = endian_short(hdr.cnt);
722 hdr.size.bytes = endian_long(hdr.size.bytes);
725 _ucdcmp_size = hdr.cnt << 1;
726 _ucdcmp_nodes = (ac_uint4 *) malloc(hdr.size.bytes);
727 _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1);
730 * Read the decomposition data in.
732 size = hdr.size.bytes / sizeof(ac_uint4);
733 fread((char *) _ucdcmp_nodes, sizeof(ac_uint4), size, in);
736 * Do an endian swap if necessary.
738 if (hdr.bom == 0xfffe) {
739 for (i = 0; i < size; i++)
740 _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]);
747 * Return -1 on error, 0 if okay
750 _uckdcmp_load(char *paths, int reload)
756 if (_uckdcmp_size > 0) {
759 * The decompositions have already been loaded.
763 free((char *) _uckdcmp_nodes);
767 if ((in = _ucopenfile(paths, "kdecomp.dat", "rb")) == 0)
773 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
775 if (hdr.bom == 0xfffe) {
776 hdr.cnt = endian_short(hdr.cnt);
777 hdr.size.bytes = endian_long(hdr.size.bytes);
780 _uckdcmp_size = hdr.cnt << 1;
781 _uckdcmp_nodes = (ac_uint4 *) malloc(hdr.size.bytes);
782 _uckdcmp_decomp = _uckdcmp_nodes + (_uckdcmp_size + 1);
785 * Read the decomposition data in.
787 size = hdr.size.bytes / sizeof(ac_uint4);
788 fread((char *) _uckdcmp_nodes, sizeof(ac_uint4), size, in);
791 * Do an endian swap if necessary.
793 if (hdr.bom == 0xfffe) {
794 for (i = 0; i < size; i++)
795 _uckdcmp_nodes[i] = endian_long(_uckdcmp_nodes[i]);
804 if (_ucdcmp_size == 0)
808 * Only need to free the offsets because the memory is allocated as a
811 free((char *) _ucdcmp_nodes);
816 _uckdcmp_unload(void)
818 if (_uckdcmp_size == 0)
822 * Only need to free the offsets because the memory is allocated as a
825 free((char *) _uckdcmp_nodes);
830 ucdecomp(ac_uint4 code, ac_uint4 *num, ac_uint4 **decomp)
834 if (code < _ucdcmp_nodes[0]) {
839 r = _ucdcmp_nodes[_ucdcmp_size] - 1;
843 * Determine a "mid" point and adjust to make sure the mid point is at
844 * the beginning of a code+offset pair.
848 if (code > _ucdcmp_nodes[m])
850 else if (code < _ucdcmp_nodes[m])
852 else if (code == _ucdcmp_nodes[m]) {
853 *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1];
854 *decomp = &_ucdcmp_decomp[_ucdcmp_nodes[m + 1]];
862 uckdecomp(ac_uint4 code, ac_uint4 *num, ac_uint4 **decomp)
866 if (code < _uckdcmp_nodes[0]) {
871 r = _uckdcmp_nodes[_uckdcmp_size] - 1;
875 * Determine a "mid" point and adjust to make sure the mid point is at
876 * the beginning of a code+offset pair.
880 if (code > _uckdcmp_nodes[m])
882 else if (code < _uckdcmp_nodes[m])
884 else if (code == _uckdcmp_nodes[m]) {
885 *num = _uckdcmp_nodes[m + 3] - _uckdcmp_nodes[m + 1];
886 *decomp = &_uckdcmp_decomp[_uckdcmp_nodes[m + 1]];
894 ucdecomp_hangul(ac_uint4 code, ac_uint4 *num, ac_uint4 decomp[])
896 if (!ucishangul(code))
900 decomp[0] = 0x1100 + (ac_uint4) (code / 588);
901 decomp[1] = 0x1161 + (ac_uint4) ((code % 588) / 28);
902 decomp[2] = 0x11a7 + (ac_uint4) (code % 28);
903 *num = (decomp[2] != 0x11a7) ? 3 : 2;
908 /* mode == 0 for canonical, mode == 1 for compatibility */
910 uccanoncompatdecomp(const ac_uint4 *in, int inlen,
911 ac_uint4 **out, int *outlen, short mode, void *ctx)
915 ac_uint4 num, class, *decomp, hangdecomp[3];
918 *out = (ac_uint4 *) ber_memalloc_x(size * sizeof(**out), ctx);
923 for (j = 0; j < (unsigned) inlen; j++) {
924 if (mode ? uckdecomp(in[j], &num, &decomp) : ucdecomp(in[j], &num, &decomp)) {
925 if ( size - i < num) {
926 size = inlen + i - j + num - 1;
927 *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx );
931 for (k = 0; k < num; k++) {
932 class = uccombining_class(decomp[k]);
934 (*out)[i] = decomp[k];
936 for (l = i; l > 0; l--)
937 if (class >= uccombining_class((*out)[l-1]))
939 AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out));
940 (*out)[l] = decomp[k];
944 } else if (ucdecomp_hangul(in[j], &num, hangdecomp)) {
945 if (size - i < num) {
946 size = inlen + i - j + num - 1;
947 *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx);
951 for (k = 0; k < num; k++) {
952 (*out)[i] = hangdecomp[k];
957 size = inlen + i - j;
958 *out = (ac_uint4 *) ber_memrealloc_x(*out, size * sizeof(**out), ctx);
962 class = uccombining_class(in[j]);
966 for (l = i; l > 0; l--)
967 if (class >= uccombining_class((*out)[l-1]))
969 AC_MEMCPY(*out + l + 1, *out + l, (i - l) * sizeof(**out));
979 uccanondecomp(const ac_uint4 *in, int inlen,
980 ac_uint4 **out, int *outlen, void *ctx)
982 return uccanoncompatdecomp(in, inlen, out, outlen, 0, ctx);
986 uccompatdecomp(const ac_uint4 *in, int inlen,
987 ac_uint4 **out, int *outlen, void *ctx)
989 return uccanoncompatdecomp(in, inlen, out, outlen, 1, ctx);
992 /**************************************************************************
994 * Support for combining classes.
996 **************************************************************************/
998 static ac_uint4 _uccmcl_size;
999 static ac_uint4 *_uccmcl_nodes;
1002 * Return -1 on error, 0 if okay
1005 _uccmcl_load(char *paths, int reload)
1011 if (_uccmcl_size > 0) {
1014 * The combining classes have already been loaded.
1018 free((char *) _uccmcl_nodes);
1022 if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0)
1028 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
1030 if (hdr.bom == 0xfffe) {
1031 hdr.cnt = endian_short(hdr.cnt);
1032 hdr.size.bytes = endian_long(hdr.size.bytes);
1035 _uccmcl_size = hdr.cnt * 3;
1036 _uccmcl_nodes = (ac_uint4 *) malloc(hdr.size.bytes);
1039 * Read the combining classes in.
1041 fread((char *) _uccmcl_nodes, sizeof(ac_uint4), _uccmcl_size, in);
1044 * Do an endian swap if necessary.
1046 if (hdr.bom == 0xfffe) {
1047 for (i = 0; i < _uccmcl_size; i++)
1048 _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]);
1055 _uccmcl_unload(void)
1057 if (_uccmcl_size == 0)
1060 free((char *) _uccmcl_nodes);
1065 uccombining_class(ac_uint4 code)
1070 r = _uccmcl_size - 1;
1075 if (code > _uccmcl_nodes[m + 1])
1077 else if (code < _uccmcl_nodes[m])
1079 else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1])
1080 return _uccmcl_nodes[m + 2];
1085 /**************************************************************************
1087 * Support for numeric values.
1089 **************************************************************************/
1091 static ac_uint4 *_ucnum_nodes;
1092 static ac_uint4 _ucnum_size;
1093 static short *_ucnum_vals;
1096 * Return -1 on error, 0 if okay
1099 _ucnumb_load(char *paths, int reload)
1105 if (_ucnum_size > 0) {
1108 * The numbers have already been loaded.
1112 free((char *) _ucnum_nodes);
1116 if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0)
1122 fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
1124 if (hdr.bom == 0xfffe) {
1125 hdr.cnt = endian_short(hdr.cnt);
1126 hdr.size.bytes = endian_long(hdr.size.bytes);
1129 _ucnum_size = hdr.cnt;
1130 _ucnum_nodes = (ac_uint4 *) malloc(hdr.size.bytes);
1131 _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size);
1134 * Read the combining classes in.
1136 fread((char *) _ucnum_nodes, sizeof(unsigned char), hdr.size.bytes, in);
1139 * Do an endian swap if necessary.
1141 if (hdr.bom == 0xfffe) {
1142 for (i = 0; i < _ucnum_size; i++)
1143 _ucnum_nodes[i] = endian_long(_ucnum_nodes[i]);
1146 * Determine the number of values that have to be adjusted.
1148 size = (hdr.size.bytes -
1149 (_ucnum_size * (sizeof(ac_uint4) << 1))) /
1152 for (i = 0; i < size; i++)
1153 _ucnum_vals[i] = endian_short(_ucnum_vals[i]);
1160 _ucnumb_unload(void)
1162 if (_ucnum_size == 0)
1165 free((char *) _ucnum_nodes);
1170 ucnumber_lookup(ac_uint4 code, struct ucnumber *num)
1176 r = _ucnum_size - 1;
1179 * Determine a "mid" point and adjust to make sure the mid point is at
1180 * the beginning of a code+offset pair.
1184 if (code > _ucnum_nodes[m])
1186 else if (code < _ucnum_nodes[m])
1189 vp = _ucnum_vals + _ucnum_nodes[m + 1];
1190 num->numerator = (int) *vp++;
1191 num->denominator = (int) *vp;
1199 ucdigit_lookup(ac_uint4 code, int *digit)
1205 r = _ucnum_size - 1;
1208 * Determine a "mid" point and adjust to make sure the mid point is at
1209 * the beginning of a code+offset pair.
1213 if (code > _ucnum_nodes[m])
1215 else if (code < _ucnum_nodes[m])
1218 vp = _ucnum_vals + _ucnum_nodes[m + 1];
1219 if (*vp == *(vp + 1)) {
1230 ucgetnumber(ac_uint4 code)
1232 struct ucnumber num;
1235 * Initialize with some arbitrary value, because the caller simply cannot
1236 * tell for sure if the code is a number without calling the ucisnumber()
1237 * macro before calling this function.
1239 num.numerator = num.denominator = -111;
1241 (void) ucnumber_lookup(code, &num);
1247 ucgetdigit(ac_uint4 code)
1252 * Initialize with some arbitrary value, because the caller simply cannot
1253 * tell for sure if the code is a number without calling the ucisdigit()
1254 * macro before calling this function.
1258 (void) ucdigit_lookup(code, &dig);
1263 /**************************************************************************
1265 * Setup and cleanup routines.
1267 **************************************************************************/
1270 * Return 0 if okay, negative on error
1273 ucdata_load(char *paths, int masks)
1277 if (masks & UCDATA_CTYPE)
1278 error |= _ucprop_load(paths, 0) < 0 ? UCDATA_CTYPE : 0;
1279 if (masks & UCDATA_CASE)
1280 error |= _uccase_load(paths, 0) < 0 ? UCDATA_CASE : 0;
1281 if (masks & UCDATA_DECOMP)
1282 error |= _ucdcmp_load(paths, 0) < 0 ? UCDATA_DECOMP : 0;
1283 if (masks & UCDATA_CMBCL)
1284 error |= _uccmcl_load(paths, 0) < 0 ? UCDATA_CMBCL : 0;
1285 if (masks & UCDATA_NUM)
1286 error |= _ucnumb_load(paths, 0) < 0 ? UCDATA_NUM : 0;
1287 if (masks & UCDATA_COMP)
1288 error |= _uccomp_load(paths, 0) < 0 ? UCDATA_COMP : 0;
1289 if (masks & UCDATA_KDECOMP)
1290 error |= _uckdcmp_load(paths, 0) < 0 ? UCDATA_KDECOMP : 0;
1296 ucdata_unload(int masks)
1298 if (masks & UCDATA_CTYPE)
1300 if (masks & UCDATA_CASE)
1302 if (masks & UCDATA_DECOMP)
1304 if (masks & UCDATA_CMBCL)
1306 if (masks & UCDATA_NUM)
1308 if (masks & UCDATA_COMP)
1310 if (masks & UCDATA_KDECOMP)
1315 * Return 0 if okay, negative on error
1318 ucdata_reload(char *paths, int masks)
1322 if (masks & UCDATA_CTYPE)
1323 error |= _ucprop_load(paths, 1) < 0 ? UCDATA_CTYPE : 0;
1324 if (masks & UCDATA_CASE)
1325 error |= _uccase_load(paths, 1) < 0 ? UCDATA_CASE : 0;
1326 if (masks & UCDATA_DECOMP)
1327 error |= _ucdcmp_load(paths, 1) < 0 ? UCDATA_DECOMP : 0;
1328 if (masks & UCDATA_CMBCL)
1329 error |= _uccmcl_load(paths, 1) < 0 ? UCDATA_CMBCL : 0;
1330 if (masks & UCDATA_NUM)
1331 error |= _ucnumb_load(paths, 1) < 0 ? UCDATA_NUM : 0;
1332 if (masks & UCDATA_COMP)
1333 error |= _uccomp_load(paths, 1) < 0 ? UCDATA_COMP : 0;
1334 if (masks & UCDATA_KDECOMP)
1335 error |= _uckdcmp_load(paths, 1) < 0 ? UCDATA_KDECOMP : 0;
1346 ac_uint4 i, lo, *dec;
1347 struct ucnumber num;
1354 printf("NOT WEAK\n");
1356 printf("LOWER 0x%04lX\n", uctolower(0xff3a));
1357 printf("UPPER 0x%04lX\n", uctoupper(0xff5a));
1359 if (ucisalpha(0x1d5))
1362 printf("NOT ALPHA\n");
1364 if (ucisupper(0x1d5)) {
1366 lo = uctolower(0x1d5);
1367 printf("0x%04lx\n", lo);
1368 lo = uctotitle(0x1d5);
1369 printf("0x%04lx\n", lo);
1371 printf("NOT UPPER\n");
1373 if (ucistitle(0x1d5))
1376 printf("NOT TITLE\n");
1378 if (uciscomposite(0x1d5))
1379 printf("COMPOSITE\n");
1381 printf("NOT COMPOSITE\n");
1383 if (ucdecomp(0x1d5, &lo, &dec)) {
1384 for (i = 0; i < lo; i++)
1385 printf("0x%04lx ", dec[i]);
1389 if ((lo = uccombining_class(0x41)) != 0)
1390 printf("0x41 CCL %ld\n", lo);
1392 if (ucisxdigit(0xfeff))
1393 printf("0xFEFF HEX DIGIT\n");
1395 printf("0xFEFF NOT HEX DIGIT\n");
1397 if (ucisdefined(0x10000))
1398 printf("0x10000 DEFINED\n");
1400 printf("0x10000 NOT DEFINED\n");
1402 if (ucnumber_lookup(0x30, &num)) {
1403 if (num.numerator != num.denominator)
1404 printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
1406 printf("UCNUMBER: 0x30 = %d\n", num.numerator);
1408 printf("UCNUMBER: 0x30 NOT A NUMBER\n");
1410 if (ucnumber_lookup(0xbc, &num)) {
1411 if (num.numerator != num.denominator)
1412 printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
1414 printf("UCNUMBER: 0xbc = %d\n", num.numerator);
1416 printf("UCNUMBER: 0xbc NOT A NUMBER\n");
1419 if (ucnumber_lookup(0xff19, &num)) {
1420 if (num.numerator != num.denominator)
1421 printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
1423 printf("UCNUMBER: 0xff19 = %d\n", num.numerator);
1425 printf("UCNUMBER: 0xff19 NOT A NUMBER\n");
1427 if (ucnumber_lookup(0x4e00, &num)) {
1428 if (num.numerator != num.denominator)
1429 printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator);
1431 printf("UCNUMBER: 0x4e00 = %d\n", num.numerator);
1433 printf("UCNUMBER: 0x4e00 NOT A NUMBER\n");
1435 if (ucdigit_lookup(0x06f9, &dig))
1436 printf("UCDIGIT: 0x6f9 = %d\n", dig);
1438 printf("UCDIGIT: 0x6f9 NOT A NUMBER\n");
1440 dig = ucgetdigit(0x0969);
1441 printf("UCGETDIGIT: 0x969 = %d\n", dig);
1443 num = ucgetnumber(0x30);
1444 if (num.numerator != num.denominator)
1445 printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator);
1447 printf("UCGETNUMBER: 0x30 = %d\n", num.numerator);
1449 num = ucgetnumber(0xbc);
1450 if (num.numerator != num.denominator)
1451 printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator);
1453 printf("UCGETNUMBER: 0xbc = %d\n", num.numerator);
1455 num = ucgetnumber(0xff19);
1456 if (num.numerator != num.denominator)
1457 printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator);
1459 printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator);