3 * Copyright 1999 Computing Research Labs, New Mexico State University
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
20 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
21 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 /* $Id: ucgendat.c,v 1.3 1999/10/07 20:49:56 mleisher Exp $" */
29 #include <ac/string.h>
30 #include <ac/unistd.h>
32 #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\
33 ((cc) >= 'A' && (cc) <= 'F') ||\
34 ((cc) >= 'a' && (cc) <= 'f'))
37 * A header written to the output file with the byte-order-mark and the number
40 static unsigned short hdr[2] = {0xfeff, 0};
43 #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3)))
51 * List of properties expected to be found in the Unicode Character Database
52 * including some implementation specific properties.
54 * The implementation specific properties are:
55 * Cm = Composed (can be decomposed)
57 * Sy = Symmetric (has left and right forms)
62 * Cp = Defined character
64 static _prop_t props[NUMPROPS] = {
65 {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2},
66 {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2},
67 {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2},
68 {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2},
69 {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L", 1}, {"R", 1},
70 {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B", 1},
71 {"S", 1}, {"WS", 2}, {"ON", 2},
72 {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2},
73 {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}
77 unsigned long *ranges;
82 static _ranges_t proptbl[NUMPROPS];
85 * Make sure this array is sized to be on a 4-byte boundary at compile time.
87 static unsigned short propcnt[NEEDPROPS];
90 * Array used to collect a decomposition before adding it to the decomposition
93 static unsigned long dectmp[64];
94 static unsigned long dectmp_size;
100 unsigned long *decomp;
104 * List of decomposition. Created and expanded in order as the characters are
107 static _decomp_t *decomps;
108 static unsigned long decomps_used;
109 static unsigned long decomps_size;
112 * Types and lists for handling lists of case mappings.
116 unsigned long other1;
117 unsigned long other2;
120 static _case_t *upper;
121 static _case_t *lower;
122 static _case_t *title;
123 static unsigned long upper_used;
124 static unsigned long upper_size;
125 static unsigned long lower_used;
126 static unsigned long lower_size;
127 static unsigned long title_used;
128 static unsigned long title_size;
131 * Array used to collect case mappings before adding them to a list.
133 static unsigned long cases[3];
136 * An array to hold ranges for combining classes.
138 static unsigned long *ccl;
139 static unsigned long ccl_used;
140 static unsigned long ccl_size;
143 * Structures for handling numbers.
156 * Arrays to hold the mapping of codes to numbers.
158 static _codeidx_t *ncodes;
159 static unsigned long ncodes_used;
160 static unsigned long ncodes_size;
163 static unsigned long nums_used;
164 static unsigned long nums_size;
167 * Array for holding numbers.
170 static unsigned long nums_used;
171 static unsigned long nums_size;
174 add_range(unsigned long start, unsigned long end, char *p1, char *p2)
180 for (k = 0; k < 2; k++) {
192 for (i = 0; i < NUMPROPS; i++) {
193 if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
203 * Resize the range list if necessary.
205 if (rlp->used == rlp->size) {
207 rlp->ranges = (unsigned long *)
208 malloc(sizeof(unsigned long) << 3);
210 rlp->ranges = (unsigned long *)
211 realloc((char *) rlp->ranges,
212 sizeof(unsigned long) * (rlp->size + 8));
217 * If this is the first code for this property list, just add it
220 if (rlp->used == 0) {
221 rlp->ranges[0] = start;
222 rlp->ranges[1] = end;
228 * Optimize the case of adding the range to the end.
231 if (start > rlp->ranges[j]) {
233 rlp->ranges[j++] = start;
234 rlp->ranges[j++] = end;
240 * Need to locate the insertion point.
243 i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ;
246 * If the start value lies in the current range, then simply set the
247 * new end point of the range to the end value passed as a parameter.
249 if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) {
250 rlp->ranges[i + 1] = end;
255 * Shift following values up by two.
257 for (j = rlp->used; j > i; j -= 2) {
258 rlp->ranges[j] = rlp->ranges[j - 2];
259 rlp->ranges[j + 1] = rlp->ranges[j - 1];
263 * Add the new range at the insertion point.
265 rlp->ranges[i] = start;
266 rlp->ranges[i + 1] = end;
272 ordered_range_insert(unsigned long c, char *name, int len)
282 * Deal with directionality codes introduced in Unicode 3.0.
285 if (memcmp(name, "AL", 2) == 0) {
287 * Mark the Arabic letters as having RTL directionality.
291 } else if (memcmp(name, "BN", 2) == 0) {
293 * Mark the control characters as being Other Neutrals.
298 } else if (len == 3 &&
299 (memcmp(name, "NSM", 3) == 0 || memcmp(name, "PDF", 3) == 0 ||
300 memcmp(name, "LRE", 3) == 0 || memcmp(name, "LRO", 3) == 0 ||
301 memcmp(name, "RLE", 3) == 0 || memcmp(name, "RLO", 3) == 0)) {
303 * Mark all of these as Other Neutral to preserve compatibility with
310 for (i = 0; i < NUMPROPS; i++) {
311 if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
319 * Have a match, so insert the code in order.
324 * Resize the range list if necessary.
326 if (rlp->used == rlp->size) {
328 rlp->ranges = (unsigned long *)
329 malloc(sizeof(unsigned long) << 3);
331 rlp->ranges = (unsigned long *)
332 realloc((char *) rlp->ranges,
333 sizeof(unsigned long) * (rlp->size + 8));
338 * If this is the first code for this property list, just add it
341 if (rlp->used == 0) {
342 rlp->ranges[0] = rlp->ranges[1] = c;
348 * Optimize the cases of extending the last range and adding new ranges to
353 s = rlp->ranges[j - 1];
357 * Extend the last range.
365 * Start another range on the end.
368 rlp->ranges[j] = rlp->ranges[j + 1] = c;
375 * The code is a duplicate of a code in the last range, so just return.
380 * The code should be inserted somewhere before the last range in the
381 * list. Locate the insertion point.
384 i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ;
387 e = rlp->ranges[i + 1];
391 * Simply extend the current range.
393 rlp->ranges[i + 1] = c;
396 * Add a new entry before the current location. Shift all entries
397 * before the current one up by one to make room.
399 for (j = rlp->used; j > i; j -= 2) {
400 rlp->ranges[j] = rlp->ranges[j - 2];
401 rlp->ranges[j + 1] = rlp->ranges[j - 1];
403 rlp->ranges[i] = rlp->ranges[i + 1] = c;
410 add_decomp(unsigned long code)
412 unsigned long i, j, size;
415 * Add the code to the composite property.
417 ordered_range_insert(code, "Cm", 2);
420 * Locate the insertion point for the code.
422 for (i = 0; i < decomps_used && code > decomps[i].code; i++) ;
425 * Allocate space for a new decomposition.
427 if (decomps_used == decomps_size) {
428 if (decomps_size == 0)
429 decomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3);
431 decomps = (_decomp_t *)
432 realloc((char *) decomps,
433 sizeof(_decomp_t) * (decomps_size + 8));
434 (void) memset((char *) (decomps + decomps_size), '\0',
435 sizeof(_decomp_t) << 3);
439 if (i < decomps_used && code != decomps[i].code) {
441 * Shift the decomps up by one if the codes don't match.
443 for (j = decomps_used; j > i; j--)
444 (void) AC_MEMCPY((char *) &decomps[j], (char *) &decomps[j - 1],
449 * Insert or replace a decomposition.
451 size = dectmp_size + (4 - (dectmp_size & 3));
452 if (decomps[i].size < size) {
453 if (decomps[i].size == 0)
454 decomps[i].decomp = (unsigned long *)
455 malloc(sizeof(unsigned long) * size);
457 decomps[i].decomp = (unsigned long *)
458 realloc((char *) decomps[i].decomp,
459 sizeof(unsigned long) * size);
460 decomps[i].size = size;
463 if (decomps[i].code != code)
466 decomps[i].code = code;
467 decomps[i].used = dectmp_size;
468 (void) AC_MEMCPY((char *) decomps[i].decomp, (char *) dectmp,
469 sizeof(unsigned long) * dectmp_size);
474 add_title(unsigned long code)
479 * Always map the code to itself.
483 if (title_used == title_size) {
485 title = (_case_t *) malloc(sizeof(_case_t) << 3);
487 title = (_case_t *) realloc((char *) title,
488 sizeof(_case_t) * (title_size + 8));
493 * Locate the insertion point.
495 for (i = 0; i < title_used && code > title[i].key; i++) ;
497 if (i < title_used) {
499 * Shift the array up by one.
501 for (j = title_used; j > i; j--)
502 (void) AC_MEMCPY((char *) &title[j], (char *) &title[j - 1],
506 title[i].key = cases[2]; /* Title */
507 title[i].other1 = cases[0]; /* Upper */
508 title[i].other2 = cases[1]; /* Lower */
514 add_upper(unsigned long code)
519 * Always map the code to itself.
524 * If the title case character is not present, then make it the same as
530 if (upper_used == upper_size) {
532 upper = (_case_t *) malloc(sizeof(_case_t) << 3);
534 upper = (_case_t *) realloc((char *) upper,
535 sizeof(_case_t) * (upper_size + 8));
540 * Locate the insertion point.
542 for (i = 0; i < upper_used && code > upper[i].key; i++) ;
544 if (i < upper_used) {
546 * Shift the array up by one.
548 for (j = upper_used; j > i; j--)
549 (void) AC_MEMCPY((char *) &upper[j], (char *) &upper[j - 1],
553 upper[i].key = cases[0]; /* Upper */
554 upper[i].other1 = cases[1]; /* Lower */
555 upper[i].other2 = cases[2]; /* Title */
561 add_lower(unsigned long code)
566 * Always map the code to itself.
571 * If the title case character is empty, then make it the same as the
577 if (lower_used == lower_size) {
579 lower = (_case_t *) malloc(sizeof(_case_t) << 3);
581 lower = (_case_t *) realloc((char *) lower,
582 sizeof(_case_t) * (lower_size + 8));
587 * Locate the insertion point.
589 for (i = 0; i < lower_used && code > lower[i].key; i++) ;
591 if (i < lower_used) {
593 * Shift the array up by one.
595 for (j = lower_used; j > i; j--)
596 (void) AC_MEMCPY((char *) &lower[j], (char *) &lower[j - 1],
600 lower[i].key = cases[1]; /* Lower */
601 lower[i].other1 = cases[0]; /* Upper */
602 lower[i].other2 = cases[2]; /* Title */
608 ordered_ccl_insert(unsigned long c, unsigned long ccl_code)
612 if (ccl_used == ccl_size) {
614 ccl = (unsigned long *) malloc(sizeof(unsigned long) * 24);
616 ccl = (unsigned long *)
617 realloc((char *) ccl, sizeof(unsigned long) * (ccl_size + 24));
622 * Optimize adding the first item.
632 * Handle the special case of extending the range on the end. This
633 * requires that the combining class codes are the same.
635 if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) {
636 ccl[ccl_used - 2] = c;
641 * Handle the special case of adding another range on the end.
643 if (c > ccl[ccl_used - 2] + 1 ||
644 (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) {
647 ccl[ccl_used++] = ccl_code;
652 * Locate either the insertion point or range for the code.
654 for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ;
656 if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) {
658 * Extend an existing range.
662 } else if (c < ccl[i]) {
664 * Start a new range before the current location.
666 for (j = ccl_used; j > i; j -= 3) {
668 ccl[j - 1] = ccl[j - 4];
669 ccl[j - 2] = ccl[j - 5];
671 ccl[i] = ccl[i + 1] = c;
672 ccl[i + 2] = ccl_code;
677 * Adds a number if it does not already exist and returns an index value
681 make_number(short num, short denom)
686 * Determine if the number already exists.
688 for (n = 0; n < nums_used; n++) {
689 if (nums[n].numerator == num && nums[n].denominator == denom)
693 if (nums_used == nums_size) {
695 nums = (_num_t *) malloc(sizeof(_num_t) << 3);
697 nums = (_num_t *) realloc((char *) nums,
698 sizeof(_num_t) * (nums_size + 8));
703 nums[n].numerator = num;
704 nums[n].denominator = denom;
710 add_number(unsigned long code, short num, short denom)
715 * Insert the code in order.
717 for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ;
720 * Handle the case of the codes matching and simply replace the number
721 * that was there before.
723 if (ncodes_used > 0 && code == ncodes[i].code) {
724 ncodes[i].idx = make_number(num, denom);
729 * Resize the array if necessary.
731 if (ncodes_used == ncodes_size) {
732 if (ncodes_size == 0)
733 ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3);
735 ncodes = (_codeidx_t *)
736 realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8));
742 * Shift things around to insert the code if necessary.
744 if (i < ncodes_used) {
745 for (j = ncodes_used; j > i; j--) {
746 ncodes[j].code = ncodes[j - 1].code;
747 ncodes[j].idx = ncodes[j - 1].idx;
750 ncodes[i].code = code;
751 ncodes[i].idx = make_number(num, denom);
757 * This routine assumes that the line is a valid Unicode Character Database
763 unsigned long i, lineno, skip, code, ccl_code;
764 short wnum, neg, number[2];
765 char line[512], *s, *e;
768 while (fscanf(in, "%[^\n]\n", line) != EOF) {
772 * Skip blank lines and lines that start with a '#'.
774 if (line[0] == 0 || line[0] == '#')
778 * If lines need to be skipped, do it here.
786 * Collect the code. The code can be up to 6 hex digits in length to
787 * allow surrogates to be specified.
789 for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) {
791 if (*s >= '0' && *s <= '9')
793 else if (*s >= 'A' && *s <= 'F')
794 code += (*s - 'A') + 10;
795 else if (*s >= 'a' && *s <= 'f')
796 code += (*s - 'a') + 10;
800 * Handle the following special cases:
801 * 1. 4E00-9FA5 CJK Ideographs.
802 * 2. AC00-D7A3 Hangul Syllables.
803 * 3. D800-DFFF Surrogates.
804 * 4. E000-F8FF Private Use Area.
805 * 5. F900-FA2D Han compatibility.
810 * The Han ideographs.
812 add_range(0x4e00, 0x9fff, "Lo", "L");
815 * Add the characters to the defined category.
817 add_range(0x4e00, 0x9fa5, "Cp", 0);
823 * The Hangul syllables.
825 add_range(0xac00, 0xd7a3, "Lo", "L");
828 * Add the characters to the defined category.
830 add_range(0xac00, 0xd7a3, "Cp", 0);
836 * Make a range of all surrogates and assume some default
839 add_range(0x010000, 0x10ffff, "Cs", "L");
844 * The Private Use area. Add with a default set of properties.
846 add_range(0xe000, 0xf8ff, "Co", "L");
851 * The CJK compatibility area.
853 add_range(0xf900, 0xfaff, "Lo", "L");
856 * Add the characters to the defined category.
858 add_range(0xf900, 0xfaff, "Cp", 0);
867 * Add the code to the defined category.
869 ordered_range_insert(code, "Cp", 2);
872 * Locate the first character property field.
874 for (i = 0; *s != 0 && i < 2; s++) {
878 for (e = s; *e && *e != ';'; e++) ;
880 ordered_range_insert(code, s, e - s);
883 * Locate the combining class code.
885 for (s = e; *s != 0 && i < 3; s++) {
891 * Convert the combining class code from decimal.
893 for (ccl_code = 0, e = s; *e && *e != ';'; e++)
894 ccl_code = (ccl_code * 10) + (*e - '0');
897 * Add the code if it not 0.
900 ordered_ccl_insert(code, ccl_code);
903 * Locate the second character property field.
905 for (s = e; *s != 0 && i < 4; s++) {
909 for (e = s; *e && *e != ';'; e++) ;
911 ordered_range_insert(code, s, e - s);
914 * Check for a decomposition.
917 if (*s != ';' && *s != '<') {
919 * Collect the codes of the decomposition.
921 for (dectmp_size = 0; *s != ';'; ) {
923 * Skip all leading non-hex digits.
925 while (!ishdigit(*s))
928 for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) {
929 dectmp[dectmp_size] <<= 4;
930 if (*s >= '0' && *s <= '9')
931 dectmp[dectmp_size] += *s - '0';
932 else if (*s >= 'A' && *s <= 'F')
933 dectmp[dectmp_size] += (*s - 'A') + 10;
934 else if (*s >= 'a' && *s <= 'f')
935 dectmp[dectmp_size] += (*s - 'a') + 10;
941 * If there is more than one code in the temporary decomposition
942 * array, then add the character with its decomposition.
949 * Skip to the number field.
951 for (i = 0; i < 3 && *s; s++) {
957 * Scan the number in.
959 number[0] = number[1] = 0;
960 for (e = s, neg = wnum = 0; *e && *e != ';'; e++) {
968 * Move the the denominator of the fraction.
976 number[wnum] = (number[wnum] * 10) + (*e - '0');
981 * Adjust the denominator in case of integers and add the number.
984 number[1] = number[0];
986 add_number(code, number[0], number[1]);
990 * Skip to the start of the possible case mappings.
992 for (s = e, i = 0; i < 4 && *s; s++) {
998 * Collect the case mappings.
1000 cases[0] = cases[1] = cases[2] = 0;
1001 for (i = 0; i < 3; i++) {
1002 while (ishdigit(*s)) {
1004 if (*s >= '0' && *s <= '9')
1005 cases[i] += *s - '0';
1006 else if (*s >= 'A' && *s <= 'F')
1007 cases[i] += (*s - 'A') + 10;
1008 else if (*s >= 'a' && *s <= 'f')
1009 cases[i] += (*s - 'a') + 10;
1015 if (cases[0] && cases[1])
1017 * Add the upper and lower mappings for a title case character.
1022 * Add the lower and title case mappings for the upper case
1028 * Add the upper and title case mappings for the lower case
1036 find_decomp(unsigned long code)
1041 r = decomps_used - 1;
1044 if (code > decomps[m].code)
1046 else if (code < decomps[m].code)
1055 decomp_it(_decomp_t *d)
1060 for (i = 0; i < d->used; i++) {
1061 if ((dp = find_decomp(d->decomp[i])) != 0)
1064 dectmp[dectmp_size++] = d->decomp[i];
1069 * Expand all decompositions by recursively decomposing each character
1070 * in the decomposition.
1077 for (i = 0; i < decomps_used; i++) {
1079 decomp_it(&decomps[i]);
1080 if (dectmp_size > 0)
1081 add_decomp(decomps[i].code);
1086 write_cdata(char *opath)
1089 unsigned long i, idx, bytes, nprops;
1090 unsigned short casecnt[2];
1093 /*****************************************************************
1095 * Generate the ctype data.
1097 *****************************************************************/
1100 * Open the ctype.dat file.
1102 sprintf(path, "%s/ctype.dat", opath);
1103 if ((out = fopen(path, "wb")) == 0)
1107 * Collect the offsets for the properties. The offsets array is
1108 * on a 4-byte boundary to keep things efficient for architectures
1109 * that need such a thing.
1111 for (i = idx = 0; i < NUMPROPS; i++) {
1112 propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff;
1113 idx += proptbl[i].used;
1117 * Add the sentinel index which is used by the binary search as the upper
1118 * bound for a search.
1123 * Record the actual number of property lists. This may be different than
1124 * the number of offsets actually written because of aligning on a 4-byte
1130 * Calculate the byte count needed and pad the property counts array to a
1133 if ((bytes = sizeof(unsigned short) * (NUMPROPS + 1)) & 3)
1134 bytes += 4 - (bytes & 3);
1135 nprops = bytes / sizeof(unsigned short);
1136 bytes += sizeof(unsigned long) * idx;
1141 fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1144 * Write the byte count.
1146 fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1149 * Write the property list counts.
1151 fwrite((char *) propcnt, sizeof(unsigned short), nprops, out);
1154 * Write the property lists.
1156 for (i = 0; i < NUMPROPS; i++) {
1157 if (proptbl[i].used > 0)
1158 fwrite((char *) proptbl[i].ranges, sizeof(unsigned long),
1159 proptbl[i].used, out);
1164 /*****************************************************************
1166 * Generate the case mapping data.
1168 *****************************************************************/
1171 * Open the case.dat file.
1173 sprintf(path, "%s/case.dat", opath);
1174 if ((out = fopen(path, "wb")) == 0)
1178 * Write the case mapping tables.
1180 hdr[1] = upper_used + lower_used + title_used;
1181 casecnt[0] = upper_used;
1182 casecnt[1] = lower_used;
1187 fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1190 * Write the upper and lower case table sizes.
1192 fwrite((char *) casecnt, sizeof(unsigned short), 2, out);
1196 * Write the upper case table.
1198 fwrite((char *) upper, sizeof(_case_t), upper_used, out);
1202 * Write the lower case table.
1204 fwrite((char *) lower, sizeof(_case_t), lower_used, out);
1208 * Write the title case table.
1210 fwrite((char *) title, sizeof(_case_t), title_used, out);
1214 /*****************************************************************
1216 * Generate the decomposition data.
1218 *****************************************************************/
1221 * Fully expand all decompositions before generating the output file.
1226 * Open the decomp.dat file.
1228 sprintf(path, "%s/decomp.dat", opath);
1229 if ((out = fopen(path, "wb")) == 0)
1232 hdr[1] = decomps_used;
1237 fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1240 * Write a temporary byte count which will be calculated as the
1241 * decompositions are written out.
1244 fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1248 * Write the list of decomp nodes.
1250 for (i = idx = 0; i < decomps_used; i++) {
1251 fwrite((char *) &decomps[i].code, sizeof(unsigned long), 1, out);
1252 fwrite((char *) &idx, sizeof(unsigned long), 1, out);
1253 idx += decomps[i].used;
1257 * Write the sentinel index as the last decomp node.
1259 fwrite((char *) &idx, sizeof(unsigned long), 1, out);
1262 * Write the decompositions themselves.
1264 for (i = 0; i < decomps_used; i++)
1265 fwrite((char *) decomps[i].decomp, sizeof(unsigned long),
1266 decomps[i].used, out);
1269 * Seek back to the beginning and write the byte count.
1271 bytes = (sizeof(unsigned long) * idx) +
1272 (sizeof(unsigned long) * ((hdr[1] << 1) + 1));
1273 fseek(out, sizeof(unsigned short) << 1, 0L);
1274 fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1279 /*****************************************************************
1281 * Generate the combining class data.
1283 *****************************************************************/
1286 * Open the cmbcl.dat file.
1288 sprintf(path, "%s/cmbcl.dat", opath);
1289 if ((out = fopen(path, "wb")) == 0)
1293 * Set the number of ranges used. Each range has a combining class which
1294 * means each entry is a 3-tuple.
1296 hdr[1] = ccl_used / 3;
1301 fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1304 * Write out the byte count to maintain header size.
1306 bytes = ccl_used * sizeof(unsigned long);
1307 fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1311 * Write the combining class ranges out.
1313 fwrite((char *) ccl, sizeof(unsigned long), ccl_used, out);
1317 /*****************************************************************
1319 * Generate the number data.
1321 *****************************************************************/
1324 * Open the num.dat file.
1326 sprintf(path, "%s/num.dat", opath);
1327 if ((out = fopen(path, "wb")) == 0)
1331 * The count part of the header will be the total number of codes that
1334 hdr[1] = (unsigned short) (ncodes_used << 1);
1335 bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t));
1340 fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1343 * Write out the byte count to maintain header size.
1345 fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1348 * Now, if number mappings exist, write them out.
1350 if (ncodes_used > 0) {
1351 fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out);
1352 fwrite((char *) nums, sizeof(_num_t), nums_used, out);
1359 main(int argc, char *argv[])
1364 if ((prog = strrchr(argv[0], '/')) != 0)
1376 if (argv[0][0] == '-' && argv[0][1] == 'o') {
1381 if (in != stdin && in != NULL)
1383 if ((in = fopen(argv[0], "rb")) == 0)
1384 fprintf(stderr, "%s: unable to open ctype file %s\n",