X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=libraries%2Fliblunicode%2Fucdata%2Fucgendat.c;h=3be66a670eb34df44ed6984e32a4ab437131cac4;hb=a7c595088c1561c4f08932a97f9a716ae9dabd9c;hp=591fcc4f1049717db0e33800f75e3e22e8817249;hpb=e08bc054c7f308f19ebc19d19f29f70ca9147348;p=openldap diff --git a/libraries/liblunicode/ucdata/ucgendat.c b/libraries/liblunicode/ucdata/ucgendat.c index 591fcc4f10..3be66a670e 100644 --- a/libraries/liblunicode/ucdata/ucgendat.c +++ b/libraries/liblunicode/ucdata/ucgendat.c @@ -1,10 +1,18 @@ /* $OpenLDAP$ */ -/* - * Copyright 2000-2002 The OpenLDAP Foundation, All Rights Reserved. - * COPYING RESTRICTIONS APPLY, see COPYRIGHT file +/* This work is part of OpenLDAP Software . + * + * Copyright 1998-2012 The OpenLDAP Foundation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . */ -/* - * Copyright 2001 Computing Research Labs, New Mexico State University +/* Copyright 2001 Computing Research Labs, New Mexico State University * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -30,10 +38,19 @@ #include "ldap_config.h" #include +#include #include #include #include +#include + +#include + +#ifndef HARDCODE_DATA +#define HARDCODE_DATA 1 +#endif + #undef ishdigit #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\ ((cc) >= 'A' && (cc) <= 'F') ||\ @@ -43,7 +60,7 @@ * A header written to the output file with the byte-order-mark and the number * of property nodes. */ -static unsigned short hdr[2] = {0xfeff, 0}; +static ac_uint2 hdr[2] = {0xfeff, 0}; #define NUMPROPS 50 #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3))) @@ -80,9 +97,9 @@ static _prop_t props[NUMPROPS] = { }; typedef struct { - unsigned long *ranges; - unsigned short used; - unsigned short size; + ac_uint4 *ranges; + ac_uint2 used; + ac_uint2 size; } _ranges_t; static _ranges_t proptbl[NUMPROPS]; @@ -90,20 +107,20 @@ static _ranges_t proptbl[NUMPROPS]; /* * Make sure this array is sized to be on a 4-byte boundary at compile time. */ -static unsigned short propcnt[NEEDPROPS]; +static ac_uint2 propcnt[NEEDPROPS]; /* * Array used to collect a decomposition before adding it to the decomposition * table. */ -static unsigned long dectmp[64]; -static unsigned long dectmp_size; +static ac_uint4 dectmp[64]; +static ac_uint4 dectmp_size; typedef struct { - unsigned long code; - unsigned short size; - unsigned short used; - unsigned long *decomp; + ac_uint4 code; + ac_uint2 size; + ac_uint2 used; + ac_uint4 *decomp; } _decomp_t; /* @@ -112,70 +129,70 @@ typedef struct { * compatibility mappings. */ static _decomp_t *decomps; -static unsigned long decomps_used; -static unsigned long decomps_size; +static ac_uint4 decomps_used; +static ac_uint4 decomps_size; static _decomp_t *kdecomps; -static unsigned long kdecomps_used; -static unsigned long kdecomps_size; +static ac_uint4 kdecomps_used; +static ac_uint4 kdecomps_size; /* * Composition exclusion table stuff. */ #define COMPEX_SET(c) (compexs[(c) >> 5] |= (1 << ((c) & 31))) #define COMPEX_TEST(c) (compexs[(c) >> 5] & (1 << ((c) & 31))) -static unsigned long compexs[2048]; +static ac_uint4 compexs[8192]; /* * Struct for holding a composition pair, and array of composition pairs */ typedef struct { - unsigned long comp; - unsigned long count; - unsigned long code1; - unsigned long code2; + ac_uint4 comp; + ac_uint4 count; + ac_uint4 code1; + ac_uint4 code2; } _comp_t; static _comp_t *comps; -static unsigned long comps_used; +static ac_uint4 comps_used; /* * Types and lists for handling lists of case mappings. */ typedef struct { - unsigned long key; - unsigned long other1; - unsigned long other2; + ac_uint4 key; + ac_uint4 other1; + ac_uint4 other2; } _case_t; static _case_t *upper; static _case_t *lower; static _case_t *title; -static unsigned long upper_used; -static unsigned long upper_size; -static unsigned long lower_used; -static unsigned long lower_size; -static unsigned long title_used; -static unsigned long title_size; +static ac_uint4 upper_used; +static ac_uint4 upper_size; +static ac_uint4 lower_used; +static ac_uint4 lower_size; +static ac_uint4 title_used; +static ac_uint4 title_size; /* * Array used to collect case mappings before adding them to a list. */ -static unsigned long cases[3]; +static ac_uint4 cases[3]; /* * An array to hold ranges for combining classes. */ -static unsigned long *ccl; -static unsigned long ccl_used; -static unsigned long ccl_size; +static ac_uint4 *ccl; +static ac_uint4 ccl_used; +static ac_uint4 ccl_size; /* * Structures for handling numbers. */ typedef struct { - unsigned long code; - unsigned long idx; + ac_uint4 code; + ac_uint4 idx; } _codeidx_t; typedef struct { @@ -187,22 +204,22 @@ typedef struct { * Arrays to hold the mapping of codes to numbers. */ static _codeidx_t *ncodes; -static unsigned long ncodes_used; -static unsigned long ncodes_size; +static ac_uint4 ncodes_used; +static ac_uint4 ncodes_size; static _num_t *nums; -static unsigned long nums_used; -static unsigned long nums_size; +static ac_uint4 nums_used; +static ac_uint4 nums_size; /* * Array for holding numbers. */ static _num_t *nums; -static unsigned long nums_used; -static unsigned long nums_size; +static ac_uint4 nums_used; +static ac_uint4 nums_size; static void -add_range(unsigned long start, unsigned long end, char *p1, char *p2) +add_range(ac_uint4 start, ac_uint4 end, char *p1, char *p2) { int i, j, k, len; _ranges_t *rlp; @@ -235,12 +252,12 @@ add_range(unsigned long start, unsigned long end, char *p1, char *p2) */ if (rlp->used == rlp->size) { if (rlp->size == 0) - rlp->ranges = (unsigned long *) - malloc(sizeof(unsigned long) << 3); + rlp->ranges = (ac_uint4 *) + malloc(sizeof(ac_uint4) << 3); else - rlp->ranges = (unsigned long *) + rlp->ranges = (ac_uint4 *) realloc((char *) rlp->ranges, - sizeof(unsigned long) * (rlp->size + 8)); + sizeof(ac_uint4) * (rlp->size + 8)); rlp->size += 8; } @@ -300,10 +317,10 @@ add_range(unsigned long start, unsigned long end, char *p1, char *p2) } static void -ordered_range_insert(unsigned long c, char *name, int len) +ordered_range_insert(ac_uint4 c, char *name, int len) { int i, j; - unsigned long s, e; + ac_uint4 s, e; _ranges_t *rlp; if (len == 0) @@ -343,12 +360,12 @@ ordered_range_insert(unsigned long c, char *name, int len) */ if (rlp->used == rlp->size) { if (rlp->size == 0) - rlp->ranges = (unsigned long *) - malloc(sizeof(unsigned long) << 3); + rlp->ranges = (ac_uint4 *) + malloc(sizeof(ac_uint4) << 3); else - rlp->ranges = (unsigned long *) + rlp->ranges = (ac_uint4 *) realloc((char *) rlp->ranges, - sizeof(unsigned long) * (rlp->size + 8)); + sizeof(ac_uint4) * (rlp->size + 8)); rlp->size += 8; } @@ -425,12 +442,12 @@ ordered_range_insert(unsigned long c, char *name, int len) } static void -add_decomp(unsigned long code, short compat) +add_decomp(ac_uint4 code, short compat) { - unsigned long i, j, size; + ac_uint4 i, j, size; _decomp_t **pdecomps; - unsigned long *pdecomps_used; - unsigned long *pdecomps_size; + ac_uint4 *pdecomps_used; + ac_uint4 *pdecomps_size; if (compat) { pdecomps = &kdecomps; @@ -484,12 +501,12 @@ add_decomp(unsigned long code, short compat) size = dectmp_size + (4 - (dectmp_size & 3)); if ((*pdecomps)[i].size < size) { if ((*pdecomps)[i].size == 0) - (*pdecomps)[i].decomp = (unsigned long *) - malloc(sizeof(unsigned long) * size); + (*pdecomps)[i].decomp = (ac_uint4 *) + malloc(sizeof(ac_uint4) * size); else - (*pdecomps)[i].decomp = (unsigned long *) + (*pdecomps)[i].decomp = (ac_uint4 *) realloc((char *) (*pdecomps)[i].decomp, - sizeof(unsigned long) * size); + sizeof(ac_uint4) * size); (*pdecomps)[i].size = size; } @@ -499,7 +516,7 @@ add_decomp(unsigned long code, short compat) (*pdecomps)[i].code = code; (*pdecomps)[i].used = dectmp_size; (void) AC_MEMCPY((char *) (*pdecomps)[i].decomp, (char *) dectmp, - sizeof(unsigned long) * dectmp_size); + sizeof(ac_uint4) * dectmp_size); /* * NOTICE: This needs changing later so it is more general than simply @@ -510,9 +527,9 @@ add_decomp(unsigned long code, short compat) } static void -add_title(unsigned long code) +add_title(ac_uint4 code) { - unsigned long i, j; + ac_uint4 i, j; /* * Always map the code to itself. @@ -550,9 +567,9 @@ add_title(unsigned long code) } static void -add_upper(unsigned long code) +add_upper(ac_uint4 code) { - unsigned long i, j; + ac_uint4 i, j; /* * Always map the code to itself. @@ -597,9 +614,9 @@ add_upper(unsigned long code) } static void -add_lower(unsigned long code) +add_lower(ac_uint4 code) { - unsigned long i, j; + ac_uint4 i, j; /* * Always map the code to itself. @@ -644,16 +661,16 @@ add_lower(unsigned long code) } static void -ordered_ccl_insert(unsigned long c, unsigned long ccl_code) +ordered_ccl_insert(ac_uint4 c, ac_uint4 ccl_code) { - unsigned long i, j; + ac_uint4 i, j; if (ccl_used == ccl_size) { if (ccl_size == 0) - ccl = (unsigned long *) malloc(sizeof(unsigned long) * 24); + ccl = (ac_uint4 *) malloc(sizeof(ac_uint4) * 24); else - ccl = (unsigned long *) - realloc((char *) ccl, sizeof(unsigned long) * (ccl_size + 24)); + ccl = (ac_uint4 *) + realloc((char *) ccl, sizeof(ac_uint4) * (ccl_size + 24)); ccl_size += 24; } @@ -716,10 +733,10 @@ ordered_ccl_insert(unsigned long c, unsigned long ccl_code) * Adds a number if it does not already exist and returns an index value * multiplied by 2. */ -static unsigned long +static ac_uint4 make_number(short num, short denom) { - unsigned long n; + ac_uint4 n; /* * Determine if the number already exists. @@ -746,9 +763,9 @@ make_number(short num, short denom) } static void -add_number(unsigned long code, short num, short denom) +add_number(ac_uint4 code, short num, short denom) { - unsigned long i, j; + ac_uint4 i, j; /* * Insert the code in order. @@ -799,12 +816,13 @@ add_number(unsigned long code, short num, short denom) static void read_cdata(FILE *in) { - unsigned long i, lineno, skip, code, ccl_code; + ac_uint4 i, lineno, skip, code, ccl_code; short wnum, neg, number[2], compat; char line[512], *s, *e; lineno = skip = 0; - while (fscanf(in, "%[^\n]\n", line) != EOF) { + while (fgets(line, sizeof(line), in)) { + if( (s=strchr(line, '\n')) ) *s = '\0'; lineno++; /* @@ -842,8 +860,17 @@ read_cdata(FILE *in) * 3. D800-DFFF Surrogates. * 4. E000-F8FF Private Use Area. * 5. F900-FA2D Han compatibility. + * ...Plus additional ranges in newer Unicode versions... */ switch (code) { + case 0x3400: + /* CJK Ideograph Extension A */ + add_range(0x3400, 0x4db5, "Lo", "L"); + + add_range(0x3400, 0x4db5, "Cp", 0); + + skip = 1; + break; case 0x4e00: /* * The Han ideographs. @@ -897,6 +924,26 @@ read_cdata(FILE *in) add_range(0xf900, 0xfaff, "Cp", 0); skip = 1; + break; + case 0x20000: + /* CJK Ideograph Extension B */ + add_range(0x20000, 0x2a6d6, "Lo", "L"); + + add_range(0x20000, 0x2a6d6, "Cp", 0); + + skip = 1; + break; + case 0xf0000: + /* Plane 15 private use */ + add_range(0xf0000, 0xffffd, "Co", "L"); + skip = 1; + break; + + case 0x100000: + /* Plane 16 private use */ + add_range(0x100000, 0x10fffd, "Co", "L"); + skip = 1; + break; } if (skip) @@ -1031,7 +1078,7 @@ read_cdata(FILE *in) * Adjust the denominator in case of integers and add the number. */ if (wnum == 0) - number[1] = number[0]; + number[1] = 1; add_number(code, number[0], number[1]); } @@ -1083,7 +1130,7 @@ read_cdata(FILE *in) } static _decomp_t * -find_decomp(unsigned long code, short compat) +find_decomp(ac_uint4 code, short compat) { long l, r, m; _decomp_t *decs; @@ -1106,7 +1153,7 @@ find_decomp(unsigned long code, short compat) static void decomp_it(_decomp_t *d, short compat) { - unsigned long i; + ac_uint4 i; _decomp_t *dp; for (i = 0; i < d->used; i++) { @@ -1124,7 +1171,7 @@ decomp_it(_decomp_t *d, short compat) static void expand_decomp(void) { - unsigned long i; + ac_uint4 i; for (i = 0; i < decomps_used; i++) { dectmp_size = 0; @@ -1142,8 +1189,9 @@ expand_decomp(void) } static int -cmpcomps(_comp_t *comp1, _comp_t *comp2) +cmpcomps(const void *v_comp1, const void *v_comp2) { + const _comp_t *comp1 = v_comp1, *comp2 = v_comp2; long diff = comp1->code1 - comp2->code1; if (!diff) @@ -1157,12 +1205,14 @@ cmpcomps(_comp_t *comp1, _comp_t *comp2) static void read_compexdata(FILE *in) { - unsigned short i, code; + ac_uint2 i; + ac_uint4 code; char line[512], *s; - (void) memset((char *) compexs, 0, sizeof(unsigned long) << 11); + (void) memset((char *) compexs, 0, sizeof(compexs)); - while (fscanf(in, "%[^\n]\n", line) != EOF) { + while (fgets(line, sizeof(line), in)) { + if( (s=strchr(line, '\n')) ) *s = '\0'; /* * Skip blank lines and lines that start with a '#'. */ @@ -1170,10 +1220,11 @@ read_compexdata(FILE *in) continue; /* - * Collect the code. Assume max 4 digits + * Collect the code. Assume max 6 digits */ - for (s = line, i = code = 0; *s != '#' && i < 4; i++, s++) { + for (s = line, i = code = 0; *s != '#' && i < 6; i++, s++) { + if (isspace((unsigned char)*s)) break; code <<= 4; if (*s >= '0' && *s <= '9') code += *s - '0'; @@ -1192,7 +1243,7 @@ read_compexdata(FILE *in) static void create_comps(void) { - unsigned long i, cu; + ac_uint4 i, cu; comps = (_comp_t *) malloc(comps_used * sizeof(_comp_t)); @@ -1206,17 +1257,40 @@ create_comps(void) cu++; } comps_used = cu; - qsort(comps, comps_used, sizeof(_comp_t), - (int (*)(const void *, const void *)) cmpcomps); + qsort(comps, comps_used, sizeof(_comp_t), cmpcomps); } +#if HARDCODE_DATA +static void +write_case(FILE *out, _case_t *tab, int num, int first) +{ + int i; + + for (i=0; i 0) { + for (j=0; j 0) - fwrite((char *) proptbl[i].ranges, sizeof(unsigned long), + fwrite((char *) proptbl[i].ranges, sizeof(ac_uint4), proptbl[i].used, out); } fclose(out); +#endif /***************************************************************** * @@ -1295,10 +1407,41 @@ write_cdata(char *opath) * *****************************************************************/ +#if HARDCODE_DATA + fprintf(out, PREF "ac_uint4 _uccase_size = %ld;\n\n", + (long) (upper_used + lower_used + title_used)); + + fprintf(out, PREF "ac_uint2 _uccase_len[2] = {%ld, %ld};\n\n", + (long) upper_used, (long) lower_used); + fprintf(out, PREF "ac_uint4 _uccase_map[] = {"); + + if (upper_used > 0) + /* + * Write the upper case table. + */ + write_case(out, upper, upper_used, 1); + + if (lower_used > 0) + /* + * Write the lower case table. + */ + write_case(out, lower, lower_used, !upper_used); + + if (title_used > 0) + /* + * Write the title case table. + */ + write_case(out, title, title_used, !(upper_used||lower_used)); + + if (!(upper_used || lower_used || title_used)) + fprintf(out, "\t0"); + + fprintf(out, "\n};\n\n"); +#else /* * Open the case.dat file. */ - sprintf(path, "%s%scase.dat", opath, LDAP_DIRSEP); + snprintf(path, sizeof path, "%s" LDAP_DIRSEP "case.dat", opath); if ((out = fopen(path, "wb")) == 0) return; @@ -1312,12 +1455,12 @@ write_cdata(char *opath) /* * Write the header. */ - fwrite((char *) hdr, sizeof(unsigned short), 2, out); + fwrite((char *) hdr, sizeof(ac_uint2), 2, out); /* * Write the upper and lower case table sizes. */ - fwrite((char *) casecnt, sizeof(unsigned short), 2, out); + fwrite((char *) casecnt, sizeof(ac_uint2), 2, out); if (upper_used > 0) /* @@ -1338,6 +1481,7 @@ write_cdata(char *opath) fwrite((char *) title, sizeof(_case_t), title_used, out); fclose(out); +#endif /***************************************************************** * @@ -1350,24 +1494,45 @@ write_cdata(char *opath) */ create_comps(); +#if HARDCODE_DATA + fprintf(out, PREF "ac_uint4 _uccomp_size = %ld;\n\n", + comps_used * 4L); + + fprintf(out, PREF "ac_uint4 _uccomp_data[] = {"); + + /* + * Now, if comps exist, write them out. + */ + if (comps_used > 0) { + for (i=0; i 0) { + /* + * Write the combining class ranges out. + */ + for (i = 0; i 0) /* * Write the combining class ranges out. */ - fwrite((char *) ccl, sizeof(unsigned long), ccl_used, out); + fwrite((char *) ccl, sizeof(ac_uint4), ccl_used, out); fclose(out); +#endif /***************************************************************** * @@ -1540,10 +1803,45 @@ write_cdata(char *opath) * *****************************************************************/ +#if HARDCODE_DATA + fprintf(out, PREF "ac_uint4 _ucnum_size = %lu;\n\n", + (unsigned long)ncodes_used<<1); + + fprintf(out, PREF "ac_uint4 _ucnum_nodes[] = {"); + + /* + * Now, if number mappings exist, write them out. + */ + if (ncodes_used > 0) { + for (i = 0; i