X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=libraries%2Fliblunicode%2Fucdata%2Fucgendat.c;h=cc4b8ba19687d24a5e6ef0b42849a225dc488ddf;hb=9cdb7b18a929d546a7681d3ac0f830821069c5a5;hp=06ebc5da959d1b7ffe64093cd1cd68e3c499d902;hpb=597ddd3a80bea0bf0df010807733a3b00c2ca650;p=openldap diff --git a/libraries/liblunicode/ucdata/ucgendat.c b/libraries/liblunicode/ucdata/ucgendat.c index 06ebc5da95..cc4b8ba196 100644 --- a/libraries/liblunicode/ucdata/ucgendat.c +++ b/libraries/liblunicode/ucdata/ucgendat.c @@ -1,5 +1,18 @@ -/* - * Copyright 1999 Computing Research Labs, New Mexico State University +/* $OpenLDAP$ */ +/* This work is part of OpenLDAP Software . + * + * Copyright 1998-2013 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ +/* Copyright 2001 Computing Research Labs, New Mexico State University * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,21 +32,26 @@ * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef lint -#ifdef __GNUC__ -static char rcsid[] __attribute__ ((unused)) = "$Id: ucgendat.c,v 1.3 1999/10/07 20:49:56 mleisher Exp $"; -#else -static char rcsid[] = "$Id: ucgendat.c,v 1.3 1999/10/07 20:49:56 mleisher Exp $"; -#endif -#endif +/* $Id: ucgendat.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */ #include "portable.h" +#include "ldap_config.h" #include -#include +#include +#include #include #include +#include + +#include + +#ifndef HARDCODE_DATA +#define HARDCODE_DATA 1 +#endif + +#undef ishdigit #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\ ((cc) >= 'A' && (cc) <= 'F') ||\ ((cc) >= 'a' && (cc) <= 'f')) @@ -42,9 +60,9 @@ static char rcsid[] = "$Id: ucgendat.c,v 1.3 1999/10/07 20:49:56 mleisher Exp $" * A header written to the output file with the byte-order-mark and the number * of property nodes. */ -static unsigned short hdr[2] = {0xfeff, 0}; +static ac_uint2 hdr[2] = {0xfeff, 0}; -#define NUMPROPS 49 +#define NUMPROPS 50 #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3))) typedef struct { @@ -75,13 +93,13 @@ static _prop_t props[NUMPROPS] = { {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B", 1}, {"S", 1}, {"WS", 2}, {"ON", 2}, {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2}, - {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2} + {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}, {"AL", 2} }; typedef struct { - unsigned long *ranges; - unsigned short used; - unsigned short size; + ac_uint4 *ranges; + ac_uint2 used; + ac_uint2 size; } _ranges_t; static _ranges_t proptbl[NUMPROPS]; @@ -89,67 +107,92 @@ static _ranges_t proptbl[NUMPROPS]; /* * Make sure this array is sized to be on a 4-byte boundary at compile time. */ -static unsigned short propcnt[NEEDPROPS]; +static ac_uint2 propcnt[NEEDPROPS]; /* * Array used to collect a decomposition before adding it to the decomposition * table. */ -static unsigned long dectmp[64]; -static unsigned long dectmp_size; +static ac_uint4 dectmp[64]; +static ac_uint4 dectmp_size; typedef struct { - unsigned long code; - unsigned short size; - unsigned short used; - unsigned long *decomp; + ac_uint4 code; + ac_uint2 size; + ac_uint2 used; + ac_uint4 *decomp; } _decomp_t; /* * List of decomposition. Created and expanded in order as the characters are - * encountered. + * encountered. First list contains canonical mappings, second also includes + * compatibility mappings. */ static _decomp_t *decomps; -static unsigned long decomps_used; -static unsigned long decomps_size; +static ac_uint4 decomps_used; +static ac_uint4 decomps_size; + +static _decomp_t *kdecomps; +static ac_uint4 kdecomps_used; +static ac_uint4 kdecomps_size; + +/* + * Composition exclusion table stuff. + */ +#define COMPEX_SET(c) (compexs[(c) >> 5] |= (1 << ((c) & 31))) +#define COMPEX_TEST(c) (compexs[(c) >> 5] & (1 << ((c) & 31))) +static ac_uint4 compexs[8192]; + +/* + * Struct for holding a composition pair, and array of composition pairs + */ +typedef struct { + ac_uint4 comp; + ac_uint4 count; + ac_uint4 code1; + ac_uint4 code2; +} _comp_t; + +static _comp_t *comps; +static ac_uint4 comps_used; /* * Types and lists for handling lists of case mappings. */ typedef struct { - unsigned long key; - unsigned long other1; - unsigned long other2; + ac_uint4 key; + ac_uint4 other1; + ac_uint4 other2; } _case_t; static _case_t *upper; static _case_t *lower; static _case_t *title; -static unsigned long upper_used; -static unsigned long upper_size; -static unsigned long lower_used; -static unsigned long lower_size; -static unsigned long title_used; -static unsigned long title_size; +static ac_uint4 upper_used; +static ac_uint4 upper_size; +static ac_uint4 lower_used; +static ac_uint4 lower_size; +static ac_uint4 title_used; +static ac_uint4 title_size; /* * Array used to collect case mappings before adding them to a list. */ -static unsigned long cases[3]; +static ac_uint4 cases[3]; /* * An array to hold ranges for combining classes. */ -static unsigned long *ccl; -static unsigned long ccl_used; -static unsigned long ccl_size; +static ac_uint4 *ccl; +static ac_uint4 ccl_used; +static ac_uint4 ccl_size; /* * Structures for handling numbers. */ typedef struct { - unsigned long code; - unsigned long idx; + ac_uint4 code; + ac_uint4 idx; } _codeidx_t; typedef struct { @@ -161,22 +204,22 @@ typedef struct { * Arrays to hold the mapping of codes to numbers. */ static _codeidx_t *ncodes; -static unsigned long ncodes_used; -static unsigned long ncodes_size; +static ac_uint4 ncodes_used; +static ac_uint4 ncodes_size; static _num_t *nums; -static unsigned long nums_used; -static unsigned long nums_size; +static ac_uint4 nums_used; +static ac_uint4 nums_size; /* * Array for holding numbers. */ static _num_t *nums; -static unsigned long nums_used; -static unsigned long nums_size; +static ac_uint4 nums_used; +static ac_uint4 nums_size; static void -add_range(unsigned long start, unsigned long end, char *p1, char *p2) +add_range(ac_uint4 start, ac_uint4 end, char *p1, char *p2) { int i, j, k, len; _ranges_t *rlp; @@ -209,12 +252,12 @@ add_range(unsigned long start, unsigned long end, char *p1, char *p2) */ if (rlp->used == rlp->size) { if (rlp->size == 0) - rlp->ranges = (unsigned long *) - malloc(sizeof(unsigned long) << 3); + rlp->ranges = (ac_uint4 *) + malloc(sizeof(ac_uint4) << 3); else - rlp->ranges = (unsigned long *) + rlp->ranges = (ac_uint4 *) realloc((char *) rlp->ranges, - sizeof(unsigned long) * (rlp->size + 8)); + sizeof(ac_uint4) * (rlp->size + 8)); rlp->size += 8; } @@ -274,10 +317,10 @@ add_range(unsigned long start, unsigned long end, char *p1, char *p2) } static void -ordered_range_insert(unsigned long c, char *name, int len) +ordered_range_insert(ac_uint4 c, char *name, int len) { int i, j; - unsigned long s, e; + ac_uint4 s, e; _ranges_t *rlp; if (len == 0) @@ -286,24 +329,11 @@ ordered_range_insert(unsigned long c, char *name, int len) /* * Deal with directionality codes introduced in Unicode 3.0. */ - if (len == 2) { - if (memcmp(name, "AL", 2) == 0) { - /* - * Mark the Arabic letters as having RTL directionality. - */ - len = 1; - name = "R"; - } else if (memcmp(name, "BN", 2) == 0) { - /* - * Mark the control characters as being Other Neutrals. - */ - len = 2; - name = "ON"; - } - } else if (len == 3 && - (memcmp(name, "NSM", 3) == 0 || memcmp(name, "PDF", 3) == 0 || - memcmp(name, "LRE", 3) == 0 || memcmp(name, "LRO", 3) == 0 || - memcmp(name, "RLE", 3) == 0 || memcmp(name, "RLO", 3) == 0)) { + if ((len == 2 && memcmp(name, "BN", 2) == 0) || + (len == 3 && + (memcmp(name, "NSM", 3) == 0 || memcmp(name, "PDF", 3) == 0 || + memcmp(name, "LRE", 3) == 0 || memcmp(name, "LRO", 3) == 0 || + memcmp(name, "RLE", 3) == 0 || memcmp(name, "RLO", 3) == 0))) { /* * Mark all of these as Other Neutral to preserve compatibility with * older versions. @@ -330,12 +360,12 @@ ordered_range_insert(unsigned long c, char *name, int len) */ if (rlp->used == rlp->size) { if (rlp->size == 0) - rlp->ranges = (unsigned long *) - malloc(sizeof(unsigned long) << 3); + rlp->ranges = (ac_uint4 *) + malloc(sizeof(ac_uint4) << 3); else - rlp->ranges = (unsigned long *) + rlp->ranges = (ac_uint4 *) realloc((char *) rlp->ranges, - sizeof(unsigned long) * (rlp->size + 8)); + sizeof(ac_uint4) * (rlp->size + 8)); rlp->size += 8; } @@ -412,41 +442,56 @@ ordered_range_insert(unsigned long c, char *name, int len) } static void -add_decomp(unsigned long code) +add_decomp(ac_uint4 code, short compat) { - unsigned long i, j, size; - + ac_uint4 i, j, size; + _decomp_t **pdecomps; + ac_uint4 *pdecomps_used; + ac_uint4 *pdecomps_size; + + if (compat) { + pdecomps = &kdecomps; + pdecomps_used = &kdecomps_used; + pdecomps_size = &kdecomps_size; + } else { + pdecomps = &decomps; + pdecomps_used = &decomps_used; + pdecomps_size = &decomps_size; + } + /* * Add the code to the composite property. */ - ordered_range_insert(code, "Cm", 2); + if (!compat) { + ordered_range_insert(code, "Cm", 2); + } /* * Locate the insertion point for the code. */ - for (i = 0; i < decomps_used && code > decomps[i].code; i++) ; + for (i = 0; i < *pdecomps_used && code > (*pdecomps)[i].code; i++) ; /* * Allocate space for a new decomposition. */ - if (decomps_used == decomps_size) { - if (decomps_size == 0) - decomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3); + if (*pdecomps_used == *pdecomps_size) { + if (*pdecomps_size == 0) + *pdecomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3); else - decomps = (_decomp_t *) - realloc((char *) decomps, - sizeof(_decomp_t) * (decomps_size + 8)); - (void) memset((char *) (decomps + decomps_size), 0, + *pdecomps = (_decomp_t *) + realloc((char *) *pdecomps, + sizeof(_decomp_t) * (*pdecomps_size + 8)); + (void) memset((char *) (*pdecomps + *pdecomps_size), '\0', sizeof(_decomp_t) << 3); - decomps_size += 8; + *pdecomps_size += 8; } - if (i < decomps_used && code != decomps[i].code) { + if (i < *pdecomps_used && code != (*pdecomps)[i].code) { /* * Shift the decomps up by one if the codes don't match. */ - for (j = decomps_used; j > i; j--) - (void) memcpy((char *) &decomps[j], (char *) &decomps[j - 1], + for (j = *pdecomps_used; j > i; j--) + (void) AC_MEMCPY((char *) &(*pdecomps)[j], (char *) &(*pdecomps)[j - 1], sizeof(_decomp_t)); } @@ -454,31 +499,37 @@ add_decomp(unsigned long code) * Insert or replace a decomposition. */ size = dectmp_size + (4 - (dectmp_size & 3)); - if (decomps[i].size < size) { - if (decomps[i].size == 0) - decomps[i].decomp = (unsigned long *) - malloc(sizeof(unsigned long) * size); + if ((*pdecomps)[i].size < size) { + if ((*pdecomps)[i].size == 0) + (*pdecomps)[i].decomp = (ac_uint4 *) + malloc(sizeof(ac_uint4) * size); else - decomps[i].decomp = (unsigned long *) - realloc((char *) decomps[i].decomp, - sizeof(unsigned long) * size); - decomps[i].size = size; + (*pdecomps)[i].decomp = (ac_uint4 *) + realloc((char *) (*pdecomps)[i].decomp, + sizeof(ac_uint4) * size); + (*pdecomps)[i].size = size; } - if (decomps[i].code != code) - decomps_used++; + if ((*pdecomps)[i].code != code) + (*pdecomps_used)++; - decomps[i].code = code; - decomps[i].used = dectmp_size; - (void) memcpy((char *) decomps[i].decomp, (char *) dectmp, - sizeof(unsigned long) * dectmp_size); + (*pdecomps)[i].code = code; + (*pdecomps)[i].used = dectmp_size; + (void) AC_MEMCPY((char *) (*pdecomps)[i].decomp, (char *) dectmp, + sizeof(ac_uint4) * dectmp_size); + /* + * NOTICE: This needs changing later so it is more general than simply + * pairs. This calculation is done here to simplify allocation elsewhere. + */ + if (!compat && dectmp_size == 2) + comps_used++; } static void -add_title(unsigned long code) +add_title(ac_uint4 code) { - unsigned long i, j; + ac_uint4 i, j; /* * Always map the code to itself. @@ -504,7 +555,7 @@ add_title(unsigned long code) * Shift the array up by one. */ for (j = title_used; j > i; j--) - (void) memcpy((char *) &title[j], (char *) &title[j - 1], + (void) AC_MEMCPY((char *) &title[j], (char *) &title[j - 1], sizeof(_case_t)); } @@ -516,9 +567,9 @@ add_title(unsigned long code) } static void -add_upper(unsigned long code) +add_upper(ac_uint4 code) { - unsigned long i, j; + ac_uint4 i, j; /* * Always map the code to itself. @@ -551,7 +602,7 @@ add_upper(unsigned long code) * Shift the array up by one. */ for (j = upper_used; j > i; j--) - (void) memcpy((char *) &upper[j], (char *) &upper[j - 1], + (void) AC_MEMCPY((char *) &upper[j], (char *) &upper[j - 1], sizeof(_case_t)); } @@ -563,9 +614,9 @@ add_upper(unsigned long code) } static void -add_lower(unsigned long code) +add_lower(ac_uint4 code) { - unsigned long i, j; + ac_uint4 i, j; /* * Always map the code to itself. @@ -598,7 +649,7 @@ add_lower(unsigned long code) * Shift the array up by one. */ for (j = lower_used; j > i; j--) - (void) memcpy((char *) &lower[j], (char *) &lower[j - 1], + (void) AC_MEMCPY((char *) &lower[j], (char *) &lower[j - 1], sizeof(_case_t)); } @@ -610,16 +661,16 @@ add_lower(unsigned long code) } static void -ordered_ccl_insert(unsigned long c, unsigned long ccl_code) +ordered_ccl_insert(ac_uint4 c, ac_uint4 ccl_code) { - unsigned long i, j; + ac_uint4 i, j; if (ccl_used == ccl_size) { if (ccl_size == 0) - ccl = (unsigned long *) malloc(sizeof(unsigned long) * 24); + ccl = (ac_uint4 *) malloc(sizeof(ac_uint4) * 24); else - ccl = (unsigned long *) - realloc((char *) ccl, sizeof(unsigned long) * (ccl_size + 24)); + ccl = (ac_uint4 *) + realloc((char *) ccl, sizeof(ac_uint4) * (ccl_size + 24)); ccl_size += 24; } @@ -682,10 +733,10 @@ ordered_ccl_insert(unsigned long c, unsigned long ccl_code) * Adds a number if it does not already exist and returns an index value * multiplied by 2. */ -static unsigned long +static ac_uint4 make_number(short num, short denom) { - unsigned long n; + ac_uint4 n; /* * Determine if the number already exists. @@ -712,9 +763,9 @@ make_number(short num, short denom) } static void -add_number(unsigned long code, short num, short denom) +add_number(ac_uint4 code, short num, short denom) { - unsigned long i, j; + ac_uint4 i, j; /* * Insert the code in order. @@ -725,7 +776,7 @@ add_number(unsigned long code, short num, short denom) * Handle the case of the codes matching and simply replace the number * that was there before. */ - if (ncodes_used > 0 && code == ncodes[i].code) { + if (i < ncodes_used && code == ncodes[i].code) { ncodes[i].idx = make_number(num, denom); return; } @@ -765,12 +816,13 @@ add_number(unsigned long code, short num, short denom) static void read_cdata(FILE *in) { - unsigned long i, lineno, skip, code, ccl_code; - short wnum, neg, number[2]; + ac_uint4 i, lineno, skip, code, ccl_code; + short wnum, neg, number[2], compat; char line[512], *s, *e; lineno = skip = 0; - while (fscanf(in, "%[^\n]\n", line) != EOF) { + while (fgets(line, sizeof(line), in)) { + if( (s=strchr(line, '\n')) ) *s = '\0'; lineno++; /* @@ -808,8 +860,17 @@ read_cdata(FILE *in) * 3. D800-DFFF Surrogates. * 4. E000-F8FF Private Use Area. * 5. F900-FA2D Han compatibility. + * ...Plus additional ranges in newer Unicode versions... */ switch (code) { + case 0x3400: + /* CJK Ideograph Extension A */ + add_range(0x3400, 0x4db5, "Lo", "L"); + + add_range(0x3400, 0x4db5, "Cp", 0); + + skip = 1; + break; case 0x4e00: /* * The Han ideographs. @@ -863,6 +924,26 @@ read_cdata(FILE *in) add_range(0xf900, 0xfaff, "Cp", 0); skip = 1; + break; + case 0x20000: + /* CJK Ideograph Extension B */ + add_range(0x20000, 0x2a6d6, "Lo", "L"); + + add_range(0x20000, 0x2a6d6, "Cp", 0); + + skip = 1; + break; + case 0xf0000: + /* Plane 15 private use */ + add_range(0xf0000, 0xffffd, "Co", "L"); + skip = 1; + break; + + case 0x100000: + /* Plane 16 private use */ + add_range(0x100000, 0x10fffd, "Co", "L"); + skip = 1; + break; } if (skip) @@ -919,7 +1000,14 @@ read_cdata(FILE *in) * Check for a decomposition. */ s = ++e; - if (*s != ';' && *s != '<') { + if (*s != ';') { + compat = *s == '<'; + if (compat) { + /* + * Skip compatibility formatting tag. + */ + while (*s++ != '>'); + } /* * Collect the codes of the decomposition. */ @@ -928,7 +1016,7 @@ read_cdata(FILE *in) * Skip all leading non-hex digits. */ while (!ishdigit(*s)) - s++; + s++; for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) { dectmp[dectmp_size] <<= 4; @@ -943,11 +1031,15 @@ read_cdata(FILE *in) } /* - * If there is more than one code in the temporary decomposition - * array, then add the character with its decomposition. + * If there are any codes in the temporary decomposition array, + * then add the character with its decomposition. */ - if (dectmp_size > 1) - add_decomp(code); + if (dectmp_size > 0) { + if (!compat) { + add_decomp(code, 0); + } + add_decomp(code, 1); + } } /* @@ -986,7 +1078,7 @@ read_cdata(FILE *in) * Adjust the denominator in case of integers and add the number. */ if (wnum == 0) - number[1] = number[0]; + number[1] = 1; add_number(code, number[0], number[1]); } @@ -1038,33 +1130,35 @@ read_cdata(FILE *in) } static _decomp_t * -find_decomp(unsigned long code) +find_decomp(ac_uint4 code, short compat) { long l, r, m; - + _decomp_t *decs; + l = 0; - r = decomps_used - 1; + r = (compat ? kdecomps_used : decomps_used) - 1; + decs = compat ? kdecomps : decomps; while (l <= r) { m = (l + r) >> 1; - if (code > decomps[m].code) + if (code > decs[m].code) l = m + 1; - else if (code < decomps[m].code) + else if (code < decs[m].code) r = m - 1; else - return &decomps[m]; + return &decs[m]; } return 0; } static void -decomp_it(_decomp_t *d) +decomp_it(_decomp_t *d, short compat) { - unsigned long i; + ac_uint4 i; _decomp_t *dp; for (i = 0; i < d->used; i++) { - if ((dp = find_decomp(d->decomp[i])) != 0) - decomp_it(dp); + if ((dp = find_decomp(d->decomp[i], compat)) != 0) + decomp_it(dp, compat); else dectmp[dectmp_size++] = d->decomp[i]; } @@ -1077,23 +1171,126 @@ decomp_it(_decomp_t *d) static void expand_decomp(void) { - unsigned long i; + ac_uint4 i; for (i = 0; i < decomps_used; i++) { dectmp_size = 0; - decomp_it(&decomps[i]); + decomp_it(&decomps[i], 0); + if (dectmp_size > 0) + add_decomp(decomps[i].code, 0); + } + + for (i = 0; i < kdecomps_used; i++) { + dectmp_size = 0; + decomp_it(&kdecomps[i], 1); if (dectmp_size > 0) - add_decomp(decomps[i].code); + add_decomp(kdecomps[i].code, 1); } } +static int +cmpcomps(const void *v_comp1, const void *v_comp2) +{ + const _comp_t *comp1 = v_comp1, *comp2 = v_comp2; + long diff = comp1->code1 - comp2->code1; + + if (!diff) + diff = comp1->code2 - comp2->code2; + return (int) diff; +} + +/* + * Load composition exclusion data + */ +static void +read_compexdata(FILE *in) +{ + ac_uint2 i; + ac_uint4 code; + char line[512], *s; + + (void) memset((char *) compexs, 0, sizeof(compexs)); + + while (fgets(line, sizeof(line), in)) { + if( (s=strchr(line, '\n')) ) *s = '\0'; + /* + * Skip blank lines and lines that start with a '#'. + */ + if (line[0] == 0 || line[0] == '#') + continue; + + /* + * Collect the code. Assume max 6 digits + */ + + for (s = line, i = code = 0; *s != '#' && i < 6; i++, s++) { + if (isspace((unsigned char)*s)) break; + code <<= 4; + if (*s >= '0' && *s <= '9') + code += *s - '0'; + else if (*s >= 'A' && *s <= 'F') + code += (*s - 'A') + 10; + else if (*s >= 'a' && *s <= 'f') + code += (*s - 'a') + 10; + } + COMPEX_SET(code); + } +} + +/* + * Creates array of compositions from decomposition array + */ +static void +create_comps(void) +{ + ac_uint4 i, cu; + + comps = (_comp_t *) malloc(comps_used * sizeof(_comp_t)); + + for (i = cu = 0; i < decomps_used; i++) { + if (decomps[i].used != 2 || COMPEX_TEST(decomps[i].code)) + continue; + comps[cu].comp = decomps[i].code; + comps[cu].count = 2; + comps[cu].code1 = decomps[i].decomp[0]; + comps[cu].code2 = decomps[i].decomp[1]; + cu++; + } + comps_used = cu; + qsort(comps, comps_used, sizeof(_comp_t), cmpcomps); +} + +#if HARDCODE_DATA +static void +write_case(FILE *out, _case_t *tab, int num, int first) +{ + int i; + + for (i=0; i 0) { + for (j=0; j 0) - fwrite((char *) proptbl[i].ranges, sizeof(unsigned long), + fwrite((char *) proptbl[i].ranges, sizeof(ac_uint4), proptbl[i].used, out); } fclose(out); +#endif /***************************************************************** * @@ -1172,10 +1407,41 @@ write_cdata(char *opath) * *****************************************************************/ +#if HARDCODE_DATA + fprintf(out, PREF "ac_uint4 _uccase_size = %ld;\n\n", + (long) (upper_used + lower_used + title_used)); + + fprintf(out, PREF "ac_uint2 _uccase_len[2] = {%ld, %ld};\n\n", + (long) upper_used, (long) lower_used); + fprintf(out, PREF "ac_uint4 _uccase_map[] = {"); + + if (upper_used > 0) + /* + * Write the upper case table. + */ + write_case(out, upper, upper_used, 1); + + if (lower_used > 0) + /* + * Write the lower case table. + */ + write_case(out, lower, lower_used, !upper_used); + + if (title_used > 0) + /* + * Write the title case table. + */ + write_case(out, title, title_used, !(upper_used||lower_used)); + + if (!(upper_used || lower_used || title_used)) + fprintf(out, "\t0"); + + fprintf(out, "\n};\n\n"); +#else /* * Open the case.dat file. */ - sprintf(path, "%s/case.dat", opath); + snprintf(path, sizeof path, "%s" LDAP_DIRSEP "case.dat", opath); if ((out = fopen(path, "wb")) == 0) return; @@ -1189,12 +1455,12 @@ write_cdata(char *opath) /* * Write the header. */ - fwrite((char *) hdr, sizeof(unsigned short), 2, out); + fwrite((char *) hdr, sizeof(ac_uint2), 2, out); /* * Write the upper and lower case table sizes. */ - fwrite((char *) casecnt, sizeof(unsigned short), 2, out); + fwrite((char *) casecnt, sizeof(ac_uint2), 2, out); if (upper_used > 0) /* @@ -1215,7 +1481,68 @@ write_cdata(char *opath) fwrite((char *) title, sizeof(_case_t), title_used, out); fclose(out); +#endif + /***************************************************************** + * + * Generate the composition data. + * + *****************************************************************/ + + /* + * Create compositions from decomposition data + */ + create_comps(); + +#if HARDCODE_DATA + fprintf(out, PREF "ac_uint4 _uccomp_size = %ld;\n\n", + comps_used * 4L); + + fprintf(out, PREF "ac_uint4 _uccomp_data[] = {"); + + /* + * Now, if comps exist, write them out. + */ + if (comps_used > 0) { + for (i=0; i 0) + fwrite((char *) comps, sizeof(_comp_t), comps_used, out); + + fclose(out); +#endif + /***************************************************************** * * Generate the decomposition data. @@ -1227,10 +1554,47 @@ write_cdata(char *opath) */ expand_decomp(); +#if HARDCODE_DATA + fprintf(out, PREF "ac_uint4 _ucdcmp_size = %ld;\n\n", + decomps_used * 2L); + + fprintf(out, PREF "ac_uint4 _ucdcmp_nodes[] = {"); + + if (decomps_used) { + /* + * Write the list of decomp nodes. + */ + for (i = idx = 0; i < decomps_used; i++) { + fprintf(out, "\n\t0x%08lx, 0x%08lx,", + (unsigned long) decomps[i].code, (unsigned long) idx); + idx += decomps[i].used; + } + + /* + * Write the sentinel index as the last decomp node. + */ + fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx); + + fprintf(out, PREF "ac_uint4 _ucdcmp_decomp[] = {"); + /* + * Write the decompositions themselves. + */ + k = 0; + for (i = 0; i < decomps_used; i++) + for (j=0; j 0) { + /* + * Write the combining class ranges out. + */ + for (i = 0; i 0) /* * Write the combining class ranges out. */ - fwrite((char *) ccl, sizeof(unsigned long), ccl_used, out); + fwrite((char *) ccl, sizeof(ac_uint4), ccl_used, out); fclose(out); +#endif /***************************************************************** * @@ -1325,10 +1803,45 @@ write_cdata(char *opath) * *****************************************************************/ +#if HARDCODE_DATA + fprintf(out, PREF "ac_uint4 _ucnum_size = %lu;\n\n", + (unsigned long)ncodes_used<<1); + + fprintf(out, PREF "ac_uint4 _ucnum_nodes[] = {"); + + /* + * Now, if number mappings exist, write them out. + */ + if (ncodes_used > 0) { + for (i = 0; i 0) { - if (argv[0][0] == '-' && argv[0][1] == 'o') { - argc--; - argv++; - opath = argv[0]; + if (argv[0][0] == '-') { + switch (argv[0][1]) { + case 'o': + argc--; + argv++; + opath = argv[0]; + break; + case 'x': + argc--; + argv++; + if ((in = fopen(argv[0], "r")) == 0) + fprintf(stderr, + "%s: unable to open composition exclusion file %s\n", + prog, argv[0]); + else { + read_compexdata(in); + fclose(in); + in = 0; + } + break; + default: + usage(prog); + } } else { if (in != stdin && in != NULL) fclose(in); - if ((in = fopen(argv[0], "rb")) == 0) + if ((in = fopen(argv[0], "r")) == 0) fprintf(stderr, "%s: unable to open ctype file %s\n", prog, argv[0]); else { read_cdata(in); fclose(in); in = 0; - } + } } argc--; argv++;