static unsigned long *_ucdcmp_nodes;
static unsigned long *_ucdcmp_decomp;
+static unsigned long _uckdcmp_size;
+static unsigned long *_uckdcmp_nodes;
+static unsigned long *_uckdcmp_decomp;
+
/*
* Return -1 on error, 0 if okay
*/
return 0;
}
+/*
+ * Return -1 on error, 0 if okay
+ */
+static int
+_uckdcmp_load(char *paths, int reload)
+{
+ FILE *in;
+ unsigned long size, i;
+ _ucheader_t hdr;
+
+ if (_uckdcmp_size > 0) {
+ if (!reload)
+ /*
+ * The decompositions have already been loaded.
+ */
+ return 0;
+
+ free((char *) _uckdcmp_nodes);
+ _uckdcmp_size = 0;
+ }
+
+ if ((in = _ucopenfile(paths, "kdecomp.dat", "rb")) == 0)
+ return -1;
+
+ /*
+ * Load the header.
+ */
+ fread((char *) &hdr, sizeof(_ucheader_t), 1, in);
+
+ if (hdr.bom == 0xfffe) {
+ hdr.cnt = endian_short(hdr.cnt);
+ hdr.size.bytes = endian_long(hdr.size.bytes);
+ }
+
+ _uckdcmp_size = hdr.cnt << 1;
+ _uckdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes);
+ _uckdcmp_decomp = _uckdcmp_nodes + (_uckdcmp_size + 1);
+
+ /*
+ * Read the decomposition data in.
+ */
+ size = hdr.size.bytes / sizeof(unsigned long);
+ fread((char *) _uckdcmp_nodes, sizeof(unsigned long), size, in);
+
+ /*
+ * Do an endian swap if necessary.
+ */
+ if (hdr.bom == 0xfffe) {
+ for (i = 0; i < size; i++)
+ _uckdcmp_nodes[i] = endian_long(_uckdcmp_nodes[i]);
+ }
+ fclose(in);
+ return 0;
+}
+
static void
_ucdcmp_unload(void)
{
_ucdcmp_size = 0;
}
+static void
+_uckdcmp_unload(void)
+{
+ if (_uckdcmp_size == 0)
+ return;
+
+ /*
+ * Only need to free the offsets because the memory is allocated as a
+ * single block.
+ */
+ free((char *) _uckdcmp_nodes);
+ _uckdcmp_size = 0;
+}
+
int
ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
{
long l, r, m;
+ if (code < _ucdcmp_nodes[0]) {
+ return 0;
+ }
+
l = 0;
r = _ucdcmp_nodes[_ucdcmp_size] - 1;
return 0;
}
+int
+uckdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
+{
+ long l, r, m;
+
+ if (code < _uckdcmp_nodes[0]) {
+ return 0;
+ }
+
+ l = 0;
+ r = _uckdcmp_nodes[_uckdcmp_size] - 1;
+
+ while (l <= r) {
+ /*
+ * Determine a "mid" point and adjust to make sure the mid point is at
+ * the beginning of a code+offset pair.
+ */
+ m = (l + r) >> 1;
+ m -= (m & 1);
+ if (code > _uckdcmp_nodes[m])
+ l = m + 2;
+ else if (code < _uckdcmp_nodes[m])
+ r = m - 2;
+ else if (code == _uckdcmp_nodes[m]) {
+ *num = _uckdcmp_nodes[m + 3] - _uckdcmp_nodes[m + 1];
+ *decomp = &_uckdcmp_decomp[_uckdcmp_nodes[m + 1]];
+ return 1;
+ }
+ }
+ return 0;
+}
+
int
ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[])
{
return 1;
}
-int
-uccanondecomp(const unsigned long *in, int inlen,
- unsigned long **out, int *outlen)
+/* mode == 0 for canonical, mode == 1 for compatibility */
+static int
+uccanoncompatdecomp(const unsigned long *in, int inlen,
+ unsigned long **out, int *outlen, short mode)
{
int l, size;
unsigned i, j, k;
i = 0;
for (j = 0; j < (unsigned) inlen; j++) {
- if (ucdecomp(in[j], &num, &decomp)) {
+ if (mode ? uckdecomp(in[j], &num, &decomp) : ucdecomp(in[j], &num, &decomp)) {
if ( size - i < num) {
size = inlen + i - j + num - 1;
*out = (unsigned long *) realloc(*out, size * sizeof(**out));
return *outlen = i;
}
+int
+uccanondecomp(const unsigned long *in, int inlen,
+ unsigned long **out, int *outlen)
+{
+ return uccanoncompatdecomp(in, inlen, out, outlen, 0);
+}
+
+int
+uccompatdecomp(const unsigned long *in, int inlen,
+ unsigned long **out, int *outlen)
+{
+ return uccanoncompatdecomp(in, inlen, out, outlen, 1);
+}
+
/**************************************************************************
*
* Support for combining classes.
error |= _ucnumb_load(paths, 0) < 0 ? UCDATA_NUM : 0;
if (masks & UCDATA_COMP)
error |= _uccomp_load(paths, 0) < 0 ? UCDATA_COMP : 0;
+ if (masks & UCDATA_KDECOMP)
+ error |= _uckdcmp_load(paths, 0) < 0 ? UCDATA_KDECOMP : 0;
return -error;
}
_ucnumb_unload();
if (masks & UCDATA_COMP)
_uccomp_unload();
+ if (masks & UCDATA_KDECOMP)
+ _uckdcmp_unload();
}
/*
error |= _ucnumb_load(paths, 1) < 0 ? UCDATA_NUM : 0;
if (masks & UCDATA_COMP)
error |= _uccomp_load(paths, 1) < 0 ? UCDATA_COMP : 0;
+ if (masks & UCDATA_KDECOMP)
+ error |= _uckdcmp_load(paths, 1) < 0 ? UCDATA_KDECOMP : 0;
return -error;
}
/*
* List of decomposition. Created and expanded in order as the characters are
- * encountered.
+ * encountered. First list contains canonical mappings, second also includes
+ * compatibility mappings.
*/
static _decomp_t *decomps;
static unsigned long decomps_used;
static unsigned long decomps_size;
+static _decomp_t *kdecomps;
+static unsigned long kdecomps_used;
+static unsigned long kdecomps_size;
+
/*
* Composition exclusion table stuff.
*/
}
static void
-add_decomp(unsigned long code)
+add_decomp(unsigned long code, short compat)
{
unsigned long i, j, size;
-
+ _decomp_t **pdecomps;
+ unsigned long *pdecomps_used;
+ unsigned long *pdecomps_size;
+
+ if (compat) {
+ pdecomps = &kdecomps;
+ pdecomps_used = &kdecomps_used;
+ pdecomps_size = &kdecomps_size;
+ } else {
+ pdecomps = &decomps;
+ pdecomps_used = &decomps_used;
+ pdecomps_size = &decomps_size;
+ }
+
/*
* Add the code to the composite property.
*/
- ordered_range_insert(code, "Cm", 2);
+ if (!compat) {
+ ordered_range_insert(code, "Cm", 2);
+ }
/*
* Locate the insertion point for the code.
*/
- for (i = 0; i < decomps_used && code > decomps[i].code; i++) ;
+ for (i = 0; i < *pdecomps_used && code > (*pdecomps)[i].code; i++) ;
/*
* Allocate space for a new decomposition.
*/
- if (decomps_used == decomps_size) {
- if (decomps_size == 0)
- decomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3);
+ if (*pdecomps_used == *pdecomps_size) {
+ if (*pdecomps_size == 0)
+ *pdecomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3);
else
- decomps = (_decomp_t *)
- realloc((char *) decomps,
- sizeof(_decomp_t) * (decomps_size + 8));
- (void) memset((char *) (decomps + decomps_size), '\0',
+ *pdecomps = (_decomp_t *)
+ realloc((char *) *pdecomps,
+ sizeof(_decomp_t) * (*pdecomps_size + 8));
+ (void) memset((char *) (*pdecomps + *pdecomps_size), '\0',
sizeof(_decomp_t) << 3);
- decomps_size += 8;
+ *pdecomps_size += 8;
}
- if (i < decomps_used && code != decomps[i].code) {
+ if (i < *pdecomps_used && code != (*pdecomps)[i].code) {
/*
* Shift the decomps up by one if the codes don't match.
*/
- for (j = decomps_used; j > i; j--)
- (void) AC_MEMCPY((char *) &decomps[j], (char *) &decomps[j - 1],
+ for (j = *pdecomps_used; j > i; j--)
+ (void) AC_MEMCPY((char *) &(*pdecomps)[j], (char *) &(*pdecomps)[j - 1],
sizeof(_decomp_t));
}
* Insert or replace a decomposition.
*/
size = dectmp_size + (4 - (dectmp_size & 3));
- if (decomps[i].size < size) {
- if (decomps[i].size == 0)
- decomps[i].decomp = (unsigned long *)
+ if ((*pdecomps)[i].size < size) {
+ if ((*pdecomps)[i].size == 0)
+ (*pdecomps)[i].decomp = (unsigned long *)
malloc(sizeof(unsigned long) * size);
else
- decomps[i].decomp = (unsigned long *)
- realloc((char *) decomps[i].decomp,
+ (*pdecomps)[i].decomp = (unsigned long *)
+ realloc((char *) (*pdecomps)[i].decomp,
sizeof(unsigned long) * size);
- decomps[i].size = size;
+ (*pdecomps)[i].size = size;
}
- if (decomps[i].code != code)
- decomps_used++;
+ if ((*pdecomps)[i].code != code)
+ (*pdecomps_used)++;
- decomps[i].code = code;
- decomps[i].used = dectmp_size;
- (void) AC_MEMCPY((char *) decomps[i].decomp, (char *) dectmp,
+ (*pdecomps)[i].code = code;
+ (*pdecomps)[i].used = dectmp_size;
+ (void) AC_MEMCPY((char *) (*pdecomps)[i].decomp, (char *) dectmp,
sizeof(unsigned long) * dectmp_size);
/*
* NOTICE: This needs changing later so it is more general than simply
* pairs. This calculation is done here to simplify allocation elsewhere.
*/
- if (dectmp_size == 2)
+ if (!compat && dectmp_size == 2)
comps_used++;
}
read_cdata(FILE *in)
{
unsigned long i, lineno, skip, code, ccl_code;
- short wnum, neg, number[2];
+ short wnum, neg, number[2], compat;
char line[512], *s, *e;
lineno = skip = 0;
* Check for a decomposition.
*/
s = ++e;
- if (*s != ';' && *s != '<') {
+ if (*s != ';') {
+ compat = *s == '<';
+ if (compat) {
+ /*
+ * Skip compatibility formatting tag.
+ */
+ while (*s++ != '>');
+ }
/*
* Collect the codes of the decomposition.
*/
* Skip all leading non-hex digits.
*/
while (!ishdigit(*s))
- s++;
+ s++;
for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) {
dectmp[dectmp_size] <<= 4;
* If there are any codes in the temporary decomposition array,
* then add the character with its decomposition.
*/
- if (dectmp_size > 0)
- add_decomp(code);
+ if (dectmp_size > 0) {
+ if (!compat) {
+ add_decomp(code, 0);
+ }
+ add_decomp(code, 1);
+ }
}
/*
}
static _decomp_t *
-find_decomp(unsigned long code)
+find_decomp(unsigned long code, short compat)
{
long l, r, m;
-
+ _decomp_t *decs;
+
l = 0;
- r = decomps_used - 1;
+ r = (compat ? kdecomps_used : decomps_used) - 1;
+ decs = compat ? kdecomps : decomps;
while (l <= r) {
m = (l + r) >> 1;
- if (code > decomps[m].code)
+ if (code > decs[m].code)
l = m + 1;
- else if (code < decomps[m].code)
+ else if (code < decs[m].code)
r = m - 1;
else
- return &decomps[m];
+ return &decs[m];
}
return 0;
}
static void
-decomp_it(_decomp_t *d)
+decomp_it(_decomp_t *d, short compat)
{
unsigned long i;
_decomp_t *dp;
for (i = 0; i < d->used; i++) {
- if ((dp = find_decomp(d->decomp[i])) != 0)
- decomp_it(dp);
+ if ((dp = find_decomp(d->decomp[i], compat)) != 0)
+ decomp_it(dp, compat);
else
dectmp[dectmp_size++] = d->decomp[i];
}
for (i = 0; i < decomps_used; i++) {
dectmp_size = 0;
- decomp_it(&decomps[i]);
+ decomp_it(&decomps[i], 0);
if (dectmp_size > 0)
- add_decomp(decomps[i].code);
+ add_decomp(decomps[i].code, 0);
+ }
+
+ for (i = 0; i < kdecomps_used; i++) {
+ dectmp_size = 0;
+ decomp_it(&kdecomps[i], 1);
+ if (dectmp_size > 0)
+ add_decomp(kdecomps[i].code, 1);
}
}
fclose(out);
}
+ /*
+ * Open the kdecomp.dat file.
+ */
+ sprintf(path, "%s%skdecomp.dat", opath, LDAP_DIRSEP);
+ if ((out = fopen(path, "wb")) == 0)
+ return;
+
+ hdr[1] = kdecomps_used;
+
+ /*
+ * Write the header.
+ */
+ fwrite((char *) hdr, sizeof(unsigned short), 2, out);
+
+ /*
+ * Write a temporary byte count which will be calculated as the
+ * decompositions are written out.
+ */
+ bytes = 0;
+ fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
+
+ if (kdecomps_used) {
+ /*
+ * Write the list of kdecomp nodes.
+ */
+ for (i = idx = 0; i < kdecomps_used; i++) {
+ fwrite((char *) &kdecomps[i].code, sizeof(unsigned long), 1, out);
+ fwrite((char *) &idx, sizeof(unsigned long), 1, out);
+ idx += kdecomps[i].used;
+ }
+
+ /*
+ * Write the sentinel index as the last decomp node.
+ */
+ fwrite((char *) &idx, sizeof(unsigned long), 1, out);
+
+ /*
+ * Write the decompositions themselves.
+ */
+ for (i = 0; i < kdecomps_used; i++)
+ fwrite((char *) kdecomps[i].decomp, sizeof(unsigned long),
+ kdecomps[i].used, out);
+
+ /*
+ * Seek back to the beginning and write the byte count.
+ */
+ bytes = (sizeof(unsigned long) * idx) +
+ (sizeof(unsigned long) * ((hdr[1] << 1) + 1));
+ fseek(out, sizeof(unsigned short) << 1, 0L);
+ fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
+
+ fclose(out);
+ }
+
/*****************************************************************
*
* Generate the combining class data.