git.sur5r.net Git - openldap/blob - libraries/liblunicode/ucdata/ucgendat.c

   1 /* $OpenLDAP$ */
   2 /*
   3  * Copyright 2000-2002 The OpenLDAP Foundation, All Rights Reserved.
   4  * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
   5  */
   6 /*
   7  * Copyright 2001 Computing Research Labs, New Mexico State University
   8  *
   9  * Permission is hereby granted, free of charge, to any person obtaining a
  10  * copy of this software and associated documentation files (the "Software"),
  11  * to deal in the Software without restriction, including without limitation
  12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13  * and/or sell copies of the Software, and to permit persons to whom the
  14  * Software is furnished to do so, subject to the following conditions:
  15  *
  16  * The above copyright notice and this permission notice shall be included in
  17  * all copies or substantial portions of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  22  * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
  23  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
  24  * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
  25  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  */
  27 /* $Id: ucgendat.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */
  28
  29 #include "portable.h"
  30 #include "ldap_config.h"
  31
  32 #include <stdio.h>
  33 #include <ac/bytes.h>
  34 #include <ac/stdlib.h>
  35 #include <ac/string.h>
  36 #include <ac/unistd.h>
  37
  38 #undef ishdigit
  39 #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\
  40                       ((cc) >= 'A' && (cc) <= 'F') ||\
  41                       ((cc) >= 'a' && (cc) <= 'f'))
  42
  43 /*
  44  * A header written to the output file with the byte-order-mark and the number
  45  * of property nodes.
  46  */
  47 static unsigned short hdr[2] = {0xfeff, 0};
  48
  49 #define NUMPROPS 50
  50 #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3)))
  51
  52 typedef struct {
  53     char *name;
  54     int len;
  55 } _prop_t;
  56
  57 /*
  58  * List of properties expected to be found in the Unicode Character Database
  59  * including some implementation specific properties.
  60  *
  61  * The implementation specific properties are:
  62  * Cm = Composed (can be decomposed)
  63  * Nb = Non-breaking
  64  * Sy = Symmetric (has left and right forms)
  65  * Hd = Hex digit
  66  * Qm = Quote marks
  67  * Mr = Mirroring
  68  * Ss = Space, other
  69  * Cp = Defined character
  70  */
  71 static _prop_t props[NUMPROPS] = {
  72     {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2},
  73     {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2},
  74     {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2},
  75     {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2},
  76     {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L",  1}, {"R",  1},
  77     {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B",  1},
  78     {"S",  1}, {"WS", 2}, {"ON", 2},
  79     {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2},
  80     {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}, {"AL", 2}
  81 };
  82
  83 typedef struct {
  84     unsigned long *ranges;
  85     unsigned short used;
  86     unsigned short size;
  87 } _ranges_t;
  88
  89 static _ranges_t proptbl[NUMPROPS];
  90
  91 /*
  92  * Make sure this array is sized to be on a 4-byte boundary at compile time.
  93  */
  94 static unsigned short propcnt[NEEDPROPS];
  95
  96 /*
  97  * Array used to collect a decomposition before adding it to the decomposition
  98  * table.
  99  */
 100 static unsigned long dectmp[64];
 101 static unsigned long dectmp_size;
 102
 103 typedef struct {
 104     unsigned long code;
 105     unsigned short size;
 106     unsigned short used;
 107     unsigned long *decomp;
 108 } _decomp_t;
 109
 110 /*
 111  * List of decomposition.  Created and expanded in order as the characters are
 112  * encountered. First list contains canonical mappings, second also includes
 113  * compatibility mappings.
 114  */
 115 static _decomp_t *decomps;
 116 static unsigned long decomps_used;
 117 static unsigned long decomps_size;
 118
 119 static _decomp_t *kdecomps;
 120 static unsigned long kdecomps_used;
 121 static unsigned long kdecomps_size;
 122
 123 /*
 124  * Composition exclusion table stuff.
 125  */
 126 #define COMPEX_SET(c) (compexs[(c) >> 5] |= (1 << ((c) & 31)))
 127 #define COMPEX_TEST(c) (compexs[(c) >> 5] & (1 << ((c) & 31)))
 128 static unsigned long compexs[2048];
 129
 130 /*
 131  * Struct for holding a composition pair, and array of composition pairs
 132  */
 133 typedef struct {
 134     unsigned long comp;
 135     unsigned long count;
 136     unsigned long code1;
 137     unsigned long code2;
 138 } _comp_t;
 139
 140 static _comp_t *comps;
 141 static unsigned long comps_used;
 142
 143 /*
 144  * Types and lists for handling lists of case mappings.
 145  */
 146 typedef struct {
 147     unsigned long key;
 148     unsigned long other1;
 149     unsigned long other2;
 150 } _case_t;
 151
 152 static _case_t *upper;
 153 static _case_t *lower;
 154 static _case_t *title;
 155 static unsigned long upper_used;
 156 static unsigned long upper_size;
 157 static unsigned long lower_used;
 158 static unsigned long lower_size;
 159 static unsigned long title_used;
 160 static unsigned long title_size;
 161
 162 /*
 163  * Array used to collect case mappings before adding them to a list.
 164  */
 165 static unsigned long cases[3];
 166
 167 /*
 168  * An array to hold ranges for combining classes.
 169  */
 170 static unsigned long *ccl;
 171 static unsigned long ccl_used;
 172 static unsigned long ccl_size;
 173
 174 /*
 175  * Structures for handling numbers.
 176  */
 177 typedef struct {
 178     unsigned long code;
 179     unsigned long idx;
 180 } _codeidx_t;
 181
 182 typedef struct {
 183     short numerator;
 184     short denominator;
 185 } _num_t;
 186
 187 /*
 188  * Arrays to hold the mapping of codes to numbers.
 189  */
 190 static _codeidx_t *ncodes;
 191 static unsigned long ncodes_used;
 192 static unsigned long ncodes_size;
 193
 194 static _num_t *nums;
 195 static unsigned long nums_used;
 196 static unsigned long nums_size;
 197
 198 /*
 199  * Array for holding numbers.
 200  */
 201 static _num_t *nums;
 202 static unsigned long nums_used;
 203 static unsigned long nums_size;
 204
 205 static void
 206 add_range(unsigned long start, unsigned long end, char *p1, char *p2)
 207 {
 208     int i, j, k, len;
 209     _ranges_t *rlp;
 210     char *name;
 211
 212     for (k = 0; k < 2; k++) {
 213         if (k == 0) {
 214             name = p1;
 215             len = 2;
 216         } else {
 217             if (p2 == 0)
 218               break;
 219
 220             name = p2;
 221             len = 1;
 222         }
 223
 224         for (i = 0; i < NUMPROPS; i++) {
 225             if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
 226               break;
 227         }
 228
 229         if (i == NUMPROPS)
 230           continue;
 231
 232         rlp = &proptbl[i];
 233
 234         /*
 235          * Resize the range list if necessary.
 236          */
 237         if (rlp->used == rlp->size) {
 238             if (rlp->size == 0)
 239               rlp->ranges = (unsigned long *)
 240                   malloc(sizeof(unsigned long) << 3);
 241             else
 242               rlp->ranges = (unsigned long *)
 243                   realloc((char *) rlp->ranges,
 244                           sizeof(unsigned long) * (rlp->size + 8));
 245             rlp->size += 8;
 246         }
 247
 248         /*
 249          * If this is the first code for this property list, just add it
 250          * and return.
 251          */
 252         if (rlp->used == 0) {
 253             rlp->ranges[0] = start;
 254             rlp->ranges[1] = end;
 255             rlp->used += 2;
 256             continue;
 257         }
 258
 259         /*
 260          * Optimize the case of adding the range to the end.
 261          */
 262         j = rlp->used - 1;
 263         if (start > rlp->ranges[j]) {
 264             j = rlp->used;
 265             rlp->ranges[j++] = start;
 266             rlp->ranges[j++] = end;
 267             rlp->used = j;
 268             continue;
 269         }
 270
 271         /*
 272          * Need to locate the insertion point.
 273          */
 274         for (i = 0;
 275              i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ;
 276
 277         /*
 278          * If the start value lies in the current range, then simply set the
 279          * new end point of the range to the end value passed as a parameter.
 280          */
 281         if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) {
 282             rlp->ranges[i + 1] = end;
 283             return;
 284         }
 285
 286         /*
 287          * Shift following values up by two.
 288          */
 289         for (j = rlp->used; j > i; j -= 2) {
 290             rlp->ranges[j] = rlp->ranges[j - 2];
 291             rlp->ranges[j + 1] = rlp->ranges[j - 1];
 292         }
 293
 294         /*
 295          * Add the new range at the insertion point.
 296          */
 297         rlp->ranges[i] = start;
 298         rlp->ranges[i + 1] = end;
 299         rlp->used += 2;
 300     }
 301 }
 302
 303 static void
 304 ordered_range_insert(unsigned long c, char *name, int len)
 305 {
 306     int i, j;
 307     unsigned long s, e;
 308     _ranges_t *rlp;
 309
 310     if (len == 0)
 311       return;
 312
 313     /*
 314      * Deal with directionality codes introduced in Unicode 3.0.
 315      */
 316     if ((len == 2 && memcmp(name, "BN", 2) == 0) ||
 317         (len == 3 &&
 318          (memcmp(name, "NSM", 3) == 0 || memcmp(name, "PDF", 3) == 0 ||
 319           memcmp(name, "LRE", 3) == 0 || memcmp(name, "LRO", 3) == 0 ||
 320           memcmp(name, "RLE", 3) == 0 || memcmp(name, "RLO", 3) == 0))) {
 321         /*
 322          * Mark all of these as Other Neutral to preserve compatibility with
 323          * older versions.
 324          */
 325         len = 2;
 326         name = "ON";
 327     }
 328
 329     for (i = 0; i < NUMPROPS; i++) {
 330         if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
 331           break;
 332     }
 333
 334     if (i == NUMPROPS)
 335       return;
 336
 337     /*
 338      * Have a match, so insert the code in order.
 339      */
 340     rlp = &proptbl[i];
 341
 342     /*
 343      * Resize the range list if necessary.
 344      */
 345     if (rlp->used == rlp->size) {
 346         if (rlp->size == 0)
 347           rlp->ranges = (unsigned long *)
 348               malloc(sizeof(unsigned long) << 3);
 349         else
 350           rlp->ranges = (unsigned long *)
 351               realloc((char *) rlp->ranges,
 352                       sizeof(unsigned long) * (rlp->size + 8));
 353         rlp->size += 8;
 354     }
 355
 356     /*
 357      * If this is the first code for this property list, just add it
 358      * and return.
 359      */
 360     if (rlp->used == 0) {
 361         rlp->ranges[0] = rlp->ranges[1] = c;
 362         rlp->used += 2;
 363         return;
 364     }
 365
 366     /*
 367      * Optimize the cases of extending the last range and adding new ranges to
 368      * the end.
 369      */
 370     j = rlp->used - 1;
 371     e = rlp->ranges[j];
 372     s = rlp->ranges[j - 1];
 373
 374     if (c == e + 1) {
 375         /*
 376          * Extend the last range.
 377          */
 378         rlp->ranges[j] = c;
 379         return;
 380     }
 381
 382     if (c > e + 1) {
 383         /*
 384          * Start another range on the end.
 385          */
 386         j = rlp->used;
 387         rlp->ranges[j] = rlp->ranges[j + 1] = c;
 388         rlp->used += 2;
 389         return;
 390     }
 391
 392     if (c >= s)
 393       /*
 394        * The code is a duplicate of a code in the last range, so just return.
 395        */
 396       return;
 397
 398     /*
 399      * The code should be inserted somewhere before the last range in the
 400      * list.  Locate the insertion point.
 401      */
 402     for (i = 0;
 403          i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ;
 404
 405     s = rlp->ranges[i];
 406     e = rlp->ranges[i + 1];
 407
 408     if (c == e + 1)
 409       /*
 410        * Simply extend the current range.
 411        */
 412       rlp->ranges[i + 1] = c;
 413     else if (c < s) {
 414         /*
 415          * Add a new entry before the current location.  Shift all entries
 416          * before the current one up by one to make room.
 417          */
 418         for (j = rlp->used; j > i; j -= 2) {
 419             rlp->ranges[j] = rlp->ranges[j - 2];
 420             rlp->ranges[j + 1] = rlp->ranges[j - 1];
 421         }
 422         rlp->ranges[i] = rlp->ranges[i + 1] = c;
 423
 424         rlp->used += 2;
 425     }
 426 }
 427
 428 static void
 429 add_decomp(unsigned long code, short compat)
 430 {
 431     unsigned long i, j, size;
 432     _decomp_t **pdecomps;
 433     unsigned long *pdecomps_used;
 434     unsigned long *pdecomps_size;
 435
 436     if (compat) {
 437         pdecomps = &kdecomps;
 438         pdecomps_used = &kdecomps_used;
 439         pdecomps_size = &kdecomps_size;
 440     } else {
 441         pdecomps = &decomps;
 442         pdecomps_used = &decomps_used;
 443         pdecomps_size = &decomps_size;
 444     }
 445
 446     /*
 447      * Add the code to the composite property.
 448      */
 449     if (!compat) {
 450         ordered_range_insert(code, "Cm", 2);
 451     }
 452
 453     /*
 454      * Locate the insertion point for the code.
 455      */
 456     for (i = 0; i < *pdecomps_used && code > (*pdecomps)[i].code; i++) ;
 457
 458     /*
 459      * Allocate space for a new decomposition.
 460      */
 461     if (*pdecomps_used == *pdecomps_size) {
 462         if (*pdecomps_size == 0)
 463           *pdecomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3);
 464         else
 465           *pdecomps = (_decomp_t *)
 466               realloc((char *) *pdecomps,
 467                       sizeof(_decomp_t) * (*pdecomps_size + 8));
 468         (void) memset((char *) (*pdecomps + *pdecomps_size), '\0',
 469                       sizeof(_decomp_t) << 3);
 470         *pdecomps_size += 8;
 471     }
 472
 473     if (i < *pdecomps_used && code != (*pdecomps)[i].code) {
 474         /*
 475          * Shift the decomps up by one if the codes don't match.
 476          */
 477         for (j = *pdecomps_used; j > i; j--)
 478           (void) AC_MEMCPY((char *) &(*pdecomps)[j], (char *) &(*pdecomps)[j - 1],
 479                         sizeof(_decomp_t));
 480     }
 481
 482     /*
 483      * Insert or replace a decomposition.
 484      */
 485     size = dectmp_size + (4 - (dectmp_size & 3));
 486     if ((*pdecomps)[i].size < size) {
 487         if ((*pdecomps)[i].size == 0)
 488           (*pdecomps)[i].decomp = (unsigned long *)
 489               malloc(sizeof(unsigned long) * size);
 490         else
 491           (*pdecomps)[i].decomp = (unsigned long *)
 492               realloc((char *) (*pdecomps)[i].decomp,
 493                       sizeof(unsigned long) * size);
 494         (*pdecomps)[i].size = size;
 495     }
 496
 497     if ((*pdecomps)[i].code != code)
 498       (*pdecomps_used)++;
 499
 500     (*pdecomps)[i].code = code;
 501     (*pdecomps)[i].used = dectmp_size;
 502     (void) AC_MEMCPY((char *) (*pdecomps)[i].decomp, (char *) dectmp,
 503                   sizeof(unsigned long) * dectmp_size);
 504
 505     /*
 506      * NOTICE: This needs changing later so it is more general than simply
 507      * pairs.  This calculation is done here to simplify allocation elsewhere.
 508      */
 509     if (!compat && dectmp_size == 2)
 510       comps_used++;
 511 }
 512
 513 static void
 514 add_title(unsigned long code)
 515 {
 516     unsigned long i, j;
 517
 518     /*
 519      * Always map the code to itself.
 520      */
 521     cases[2] = code;
 522
 523     if (title_used == title_size) {
 524         if (title_size == 0)
 525           title = (_case_t *) malloc(sizeof(_case_t) << 3);
 526         else
 527           title = (_case_t *) realloc((char *) title,
 528                                       sizeof(_case_t) * (title_size + 8));
 529         title_size += 8;
 530     }
 531
 532     /*
 533      * Locate the insertion point.
 534      */
 535     for (i = 0; i < title_used && code > title[i].key; i++) ;
 536
 537     if (i < title_used) {
 538         /*
 539          * Shift the array up by one.
 540          */
 541         for (j = title_used; j > i; j--)
 542           (void) AC_MEMCPY((char *) &title[j], (char *) &title[j - 1],
 543                         sizeof(_case_t));
 544     }
 545
 546     title[i].key = cases[2];    /* Title */
 547     title[i].other1 = cases[0]; /* Upper */
 548     title[i].other2 = cases[1]; /* Lower */
 549
 550     title_used++;
 551 }
 552
 553 static void
 554 add_upper(unsigned long code)
 555 {
 556     unsigned long i, j;
 557
 558     /*
 559      * Always map the code to itself.
 560      */
 561     cases[0] = code;
 562
 563     /*
 564      * If the title case character is not present, then make it the same as
 565      * the upper case.
 566      */
 567     if (cases[2] == 0)
 568       cases[2] = code;
 569
 570     if (upper_used == upper_size) {
 571         if (upper_size == 0)
 572           upper = (_case_t *) malloc(sizeof(_case_t) << 3);
 573         else
 574           upper = (_case_t *) realloc((char *) upper,
 575                                       sizeof(_case_t) * (upper_size + 8));
 576         upper_size += 8;
 577     }
 578
 579     /*
 580      * Locate the insertion point.
 581      */
 582     for (i = 0; i < upper_used && code > upper[i].key; i++) ;
 583
 584     if (i < upper_used) {
 585         /*
 586          * Shift the array up by one.
 587          */
 588         for (j = upper_used; j > i; j--)
 589           (void) AC_MEMCPY((char *) &upper[j], (char *) &upper[j - 1],
 590                         sizeof(_case_t));
 591     }
 592
 593     upper[i].key = cases[0];    /* Upper */
 594     upper[i].other1 = cases[1]; /* Lower */
 595     upper[i].other2 = cases[2]; /* Title */
 596
 597     upper_used++;
 598 }
 599
 600 static void
 601 add_lower(unsigned long code)
 602 {
 603     unsigned long i, j;
 604
 605     /*
 606      * Always map the code to itself.
 607      */
 608     cases[1] = code;
 609
 610     /*
 611      * If the title case character is empty, then make it the same as the
 612      * upper case.
 613      */
 614     if (cases[2] == 0)
 615       cases[2] = cases[0];
 616
 617     if (lower_used == lower_size) {
 618         if (lower_size == 0)
 619           lower = (_case_t *) malloc(sizeof(_case_t) << 3);
 620         else
 621           lower = (_case_t *) realloc((char *) lower,
 622                                       sizeof(_case_t) * (lower_size + 8));
 623         lower_size += 8;
 624     }
 625
 626     /*
 627      * Locate the insertion point.
 628      */
 629     for (i = 0; i < lower_used && code > lower[i].key; i++) ;
 630
 631     if (i < lower_used) {
 632         /*
 633          * Shift the array up by one.
 634          */
 635         for (j = lower_used; j > i; j--)
 636           (void) AC_MEMCPY((char *) &lower[j], (char *) &lower[j - 1],
 637                         sizeof(_case_t));
 638     }
 639
 640     lower[i].key = cases[1];    /* Lower */
 641     lower[i].other1 = cases[0]; /* Upper */
 642     lower[i].other2 = cases[2]; /* Title */
 643
 644     lower_used++;
 645 }
 646
 647 static void
 648 ordered_ccl_insert(unsigned long c, unsigned long ccl_code)
 649 {
 650     unsigned long i, j;
 651
 652     if (ccl_used == ccl_size) {
 653         if (ccl_size == 0)
 654           ccl = (unsigned long *) malloc(sizeof(unsigned long) * 24);
 655         else
 656           ccl = (unsigned long *)
 657               realloc((char *) ccl, sizeof(unsigned long) * (ccl_size + 24));
 658         ccl_size += 24;
 659     }
 660
 661     /*
 662      * Optimize adding the first item.
 663      */
 664     if (ccl_used == 0) {
 665         ccl[0] = ccl[1] = c;
 666         ccl[2] = ccl_code;
 667         ccl_used += 3;
 668         return;
 669     }
 670
 671     /*
 672      * Handle the special case of extending the range on the end.  This
 673      * requires that the combining class codes are the same.
 674      */
 675     if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) {
 676         ccl[ccl_used - 2] = c;
 677         return;
 678     }
 679
 680     /*
 681      * Handle the special case of adding another range on the end.
 682      */
 683     if (c > ccl[ccl_used - 2] + 1 ||
 684         (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) {
 685         ccl[ccl_used++] = c;
 686         ccl[ccl_used++] = c;
 687         ccl[ccl_used++] = ccl_code;
 688         return;
 689     }
 690
 691     /*
 692      * Locate either the insertion point or range for the code.
 693      */
 694     for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ;
 695
 696     if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) {
 697         /*
 698          * Extend an existing range.
 699          */
 700         ccl[i + 1] = c;
 701         return;
 702     } else if (c < ccl[i]) {
 703         /*
 704          * Start a new range before the current location.
 705          */
 706         for (j = ccl_used; j > i; j -= 3) {
 707             ccl[j] = ccl[j - 3];
 708             ccl[j - 1] = ccl[j - 4];
 709             ccl[j - 2] = ccl[j - 5];
 710         }
 711         ccl[i] = ccl[i + 1] = c;
 712         ccl[i + 2] = ccl_code;
 713     }
 714 }
 715
 716 /*
 717  * Adds a number if it does not already exist and returns an index value
 718  * multiplied by 2.
 719  */
 720 static unsigned long
 721 make_number(short num, short denom)
 722 {
 723     unsigned long n;
 724
 725     /*
 726      * Determine if the number already exists.
 727      */
 728     for (n = 0; n < nums_used; n++) {
 729         if (nums[n].numerator == num && nums[n].denominator == denom)
 730           return n << 1;
 731     }
 732
 733     if (nums_used == nums_size) {
 734         if (nums_size == 0)
 735           nums = (_num_t *) malloc(sizeof(_num_t) << 3);
 736         else
 737           nums = (_num_t *) realloc((char *) nums,
 738                                     sizeof(_num_t) * (nums_size + 8));
 739         nums_size += 8;
 740     }
 741
 742     n = nums_used++;
 743     nums[n].numerator = num;
 744     nums[n].denominator = denom;
 745
 746     return n << 1;
 747 }
 748
 749 static void
 750 add_number(unsigned long code, short num, short denom)
 751 {
 752     unsigned long i, j;
 753
 754     /*
 755      * Insert the code in order.
 756      */
 757     for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ;
 758
 759     /*
 760      * Handle the case of the codes matching and simply replace the number
 761      * that was there before.
 762      */
 763     if (i < ncodes_used && code == ncodes[i].code) {
 764         ncodes[i].idx = make_number(num, denom);
 765         return;
 766     }
 767
 768     /*
 769      * Resize the array if necessary.
 770      */
 771     if (ncodes_used == ncodes_size) {
 772         if (ncodes_size == 0)
 773           ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3);
 774         else
 775           ncodes = (_codeidx_t *)
 776               realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8));
 777
 778         ncodes_size += 8;
 779     }
 780
 781     /*
 782      * Shift things around to insert the code if necessary.
 783      */
 784     if (i < ncodes_used) {
 785         for (j = ncodes_used; j > i; j--) {
 786             ncodes[j].code = ncodes[j - 1].code;
 787             ncodes[j].idx = ncodes[j - 1].idx;
 788         }
 789     }
 790     ncodes[i].code = code;
 791     ncodes[i].idx = make_number(num, denom);
 792
 793     ncodes_used++;
 794 }
 795
 796 /*
 797  * This routine assumes that the line is a valid Unicode Character Database
 798  * entry.
 799  */
 800 static void
 801 read_cdata(FILE *in)
 802 {
 803     unsigned long i, lineno, skip, code, ccl_code;
 804     short wnum, neg, number[2], compat;
 805     char line[512], *s, *e;
 806
 807     lineno = skip = 0;
 808     while (!feof(in)) {
 809                 if( fscanf(in, "%[^\n]\n", line) != 1) break;
 810         lineno++;
 811
 812         /*
 813          * Skip blank lines and lines that start with a '#'.
 814          */
 815         if (line[0] == 0 || line[0] == '#')
 816           continue;
 817
 818         /*
 819          * If lines need to be skipped, do it here.
 820          */
 821         if (skip) {
 822             skip--;
 823             continue;
 824         }
 825
 826         /*
 827          * Collect the code.  The code can be up to 6 hex digits in length to
 828          * allow surrogates to be specified.
 829          */
 830         for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) {
 831             code <<= 4;
 832             if (*s >= '0' && *s <= '9')
 833               code += *s - '0';
 834             else if (*s >= 'A' && *s <= 'F')
 835               code += (*s - 'A') + 10;
 836             else if (*s >= 'a' && *s <= 'f')
 837               code += (*s - 'a') + 10;
 838         }
 839
 840         /*
 841          * Handle the following special cases:
 842          * 1. 4E00-9FA5 CJK Ideographs.
 843          * 2. AC00-D7A3 Hangul Syllables.
 844          * 3. D800-DFFF Surrogates.
 845          * 4. E000-F8FF Private Use Area.
 846          * 5. F900-FA2D Han compatibility.
 847          */
 848         switch (code) {
 849           case 0x4e00:
 850             /*
 851              * The Han ideographs.
 852              */
 853             add_range(0x4e00, 0x9fff, "Lo", "L");
 854
 855             /*
 856              * Add the characters to the defined category.
 857              */
 858             add_range(0x4e00, 0x9fa5, "Cp", 0);
 859
 860             skip = 1;
 861             break;
 862           case 0xac00:
 863             /*
 864              * The Hangul syllables.
 865              */
 866             add_range(0xac00, 0xd7a3, "Lo", "L");
 867
 868             /*
 869              * Add the characters to the defined category.
 870              */
 871             add_range(0xac00, 0xd7a3, "Cp", 0);
 872
 873             skip = 1;
 874             break;
 875           case 0xd800:
 876             /*
 877              * Make a range of all surrogates and assume some default
 878              * properties.
 879              */
 880             add_range(0x010000, 0x10ffff, "Cs", "L");
 881             skip = 5;
 882             break;
 883           case 0xe000:
 884             /*
 885              * The Private Use area.  Add with a default set of properties.
 886              */
 887             add_range(0xe000, 0xf8ff, "Co", "L");
 888             skip = 1;
 889             break;
 890           case 0xf900:
 891             /*
 892              * The CJK compatibility area.
 893              */
 894             add_range(0xf900, 0xfaff, "Lo", "L");
 895
 896             /*
 897              * Add the characters to the defined category.
 898              */
 899             add_range(0xf900, 0xfaff, "Cp", 0);
 900
 901             skip = 1;
 902         }
 903
 904         if (skip)
 905           continue;
 906
 907         /*
 908          * Add the code to the defined category.
 909          */
 910         ordered_range_insert(code, "Cp", 2);
 911
 912         /*
 913          * Locate the first character property field.
 914          */
 915         for (i = 0; *s != 0 && i < 2; s++) {
 916             if (*s == ';')
 917               i++;
 918         }
 919         for (e = s; *e && *e != ';'; e++) ;
 920
 921         ordered_range_insert(code, s, e - s);
 922
 923         /*
 924          * Locate the combining class code.
 925          */
 926         for (s = e; *s != 0 && i < 3; s++) {
 927             if (*s == ';')
 928               i++;
 929         }
 930
 931         /*
 932          * Convert the combining class code from decimal.
 933          */
 934         for (ccl_code = 0, e = s; *e && *e != ';'; e++)
 935           ccl_code = (ccl_code * 10) + (*e - '0');
 936
 937         /*
 938          * Add the code if it not 0.
 939          */
 940         if (ccl_code != 0)
 941           ordered_ccl_insert(code, ccl_code);
 942
 943         /*
 944          * Locate the second character property field.
 945          */
 946         for (s = e; *s != 0 && i < 4; s++) {
 947             if (*s == ';')
 948               i++;
 949         }
 950         for (e = s; *e && *e != ';'; e++) ;
 951
 952         ordered_range_insert(code, s, e - s);
 953
 954         /*
 955          * Check for a decomposition.
 956          */
 957         s = ++e;
 958         if (*s != ';') {
 959             compat = *s == '<';
 960             if (compat) {
 961                 /*
 962                  * Skip compatibility formatting tag.
 963                  */
 964                 while (*s++ != '>');
 965             }
 966             /*
 967              * Collect the codes of the decomposition.
 968              */
 969             for (dectmp_size = 0; *s != ';'; ) {
 970                 /*
 971                  * Skip all leading non-hex digits.
 972                  */
 973                 while (!ishdigit(*s))
 974                   s++;
 975
 976                 for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) {
 977                     dectmp[dectmp_size] <<= 4;
 978                     if (*s >= '0' && *s <= '9')
 979                       dectmp[dectmp_size] += *s - '0';
 980                     else if (*s >= 'A' && *s <= 'F')
 981                       dectmp[dectmp_size] += (*s - 'A') + 10;
 982                     else if (*s >= 'a' && *s <= 'f')
 983                       dectmp[dectmp_size] += (*s - 'a') + 10;
 984                 }
 985                 dectmp_size++;
 986             }
 987
 988             /*
 989              * If there are any codes in the temporary decomposition array,
 990              * then add the character with its decomposition.
 991              */
 992             if (dectmp_size > 0) {
 993                 if (!compat) {
 994                     add_decomp(code, 0);
 995                 }
 996                 add_decomp(code, 1);
 997             }
 998         }
 999
1000         /*
1001          * Skip to the number field.
1002          */
1003         for (i = 0; i < 3 && *s; s++) {
1004             if (*s == ';')
1005               i++;
1006         }
1007
1008         /*
1009          * Scan the number in.
1010          */
1011         number[0] = number[1] = 0;
1012         for (e = s, neg = wnum = 0; *e && *e != ';'; e++) {
1013             if (*e == '-') {
1014                 neg = 1;
1015                 continue;
1016             }
1017
1018             if (*e == '/') {
1019                 /*
1020                  * Move the the denominator of the fraction.
1021                  */
1022                 if (neg)
1023                   number[wnum] *= -1;
1024                 neg = 0;
1025                 e++;
1026                 wnum++;
1027             }
1028             number[wnum] = (number[wnum] * 10) + (*e - '0');
1029         }
1030
1031         if (e > s) {
1032             /*
1033              * Adjust the denominator in case of integers and add the number.
1034              */
1035             if (wnum == 0)
1036               number[1] = number[0];
1037
1038             add_number(code, number[0], number[1]);
1039         }
1040
1041         /*
1042          * Skip to the start of the possible case mappings.
1043          */
1044         for (s = e, i = 0; i < 4 && *s; s++) {
1045             if (*s == ';')
1046               i++;
1047         }
1048
1049         /*
1050          * Collect the case mappings.
1051          */
1052         cases[0] = cases[1] = cases[2] = 0;
1053         for (i = 0; i < 3; i++) {
1054             while (ishdigit(*s)) {
1055                 cases[i] <<= 4;
1056                 if (*s >= '0' && *s <= '9')
1057                   cases[i] += *s - '0';
1058                 else if (*s >= 'A' && *s <= 'F')
1059                   cases[i] += (*s - 'A') + 10;
1060                 else if (*s >= 'a' && *s <= 'f')
1061                   cases[i] += (*s - 'a') + 10;
1062                 s++;
1063             }
1064             if (*s == ';')
1065               s++;
1066         }
1067         if (cases[0] && cases[1])
1068           /*
1069            * Add the upper and lower mappings for a title case character.
1070            */
1071           add_title(code);
1072         else if (cases[1])
1073           /*
1074            * Add the lower and title case mappings for the upper case
1075            * character.
1076            */
1077           add_upper(code);
1078         else if (cases[0])
1079           /*
1080            * Add the upper and title case mappings for the lower case
1081            * character.
1082            */
1083           add_lower(code);
1084     }
1085 }
1086
1087 static _decomp_t *
1088 find_decomp(unsigned long code, short compat)
1089 {
1090     long l, r, m;
1091     _decomp_t *decs;
1092
1093     l = 0;
1094     r = (compat ? kdecomps_used : decomps_used) - 1;
1095     decs = compat ? kdecomps : decomps;
1096     while (l <= r) {
1097         m = (l + r) >> 1;
1098         if (code > decs[m].code)
1099           l = m + 1;
1100         else if (code < decs[m].code)
1101           r = m - 1;
1102         else
1103           return &decs[m];
1104     }
1105     return 0;
1106 }
1107
1108 static void
1109 decomp_it(_decomp_t *d, short compat)
1110 {
1111     unsigned long i;
1112     _decomp_t *dp;
1113
1114     for (i = 0; i < d->used; i++) {
1115         if ((dp = find_decomp(d->decomp[i], compat)) != 0)
1116           decomp_it(dp, compat);
1117         else
1118           dectmp[dectmp_size++] = d->decomp[i];
1119     }
1120 }
1121
1122 /*
1123  * Expand all decompositions by recursively decomposing each character
1124  * in the decomposition.
1125  */
1126 static void
1127 expand_decomp(void)
1128 {
1129     unsigned long i;
1130
1131     for (i = 0; i < decomps_used; i++) {
1132         dectmp_size = 0;
1133         decomp_it(&decomps[i], 0);
1134         if (dectmp_size > 0)
1135           add_decomp(decomps[i].code, 0);
1136     }
1137
1138     for (i = 0; i < kdecomps_used; i++) {
1139         dectmp_size = 0;
1140         decomp_it(&kdecomps[i], 1);
1141         if (dectmp_size > 0)
1142           add_decomp(kdecomps[i].code, 1);
1143     }
1144 }
1145
1146 static int
1147 cmpcomps(_comp_t *comp1, _comp_t *comp2)
1148 {
1149     long diff = comp1->code1 - comp2->code1;
1150
1151     if (!diff)
1152         diff = comp1->code2 - comp2->code2;
1153     return (int) diff;
1154 }
1155
1156 /*
1157  * Load composition exclusion data
1158  */
1159 static void
1160 read_compexdata(FILE *in)
1161 {
1162     unsigned short i, code;
1163     char line[512], *s;
1164
1165     (void) memset((char *) compexs, 0, sizeof(unsigned long) << 11);
1166
1167     while (!feof(in)) {
1168                 if( fscanf(in, "%[^\n]\n", line) != 1) break;
1169         /*
1170          * Skip blank lines and lines that start with a '#'.
1171          */
1172         if (line[0] == 0 || line[0] == '#')
1173             continue;
1174
1175         /*
1176          * Collect the code.  Assume max 4 digits
1177          */
1178
1179         for (s = line, i = code = 0; *s != '#' && i < 4; i++, s++) {
1180             code <<= 4;
1181             if (*s >= '0' && *s <= '9')
1182                 code += *s - '0';
1183             else if (*s >= 'A' && *s <= 'F')
1184                 code += (*s - 'A') + 10;
1185             else if (*s >= 'a' && *s <= 'f')
1186                 code += (*s - 'a') + 10;
1187         }
1188         COMPEX_SET(code);
1189     }
1190 }
1191
1192 /*
1193  * Creates array of compositions from decomposition array
1194  */
1195 static void
1196 create_comps(void)
1197 {
1198     unsigned long i, cu;
1199
1200     comps = (_comp_t *) malloc(comps_used * sizeof(_comp_t));
1201
1202     for (i = cu = 0; i < decomps_used; i++) {
1203         if (decomps[i].used != 2 || COMPEX_TEST(decomps[i].code))
1204             continue;
1205         comps[cu].comp = decomps[i].code;
1206         comps[cu].count = 2;
1207         comps[cu].code1 = decomps[i].decomp[0];
1208         comps[cu].code2 = decomps[i].decomp[1];
1209         cu++;
1210     }
1211     comps_used = cu;
1212     qsort(comps, comps_used, sizeof(_comp_t),
1213           (int (*)(const void *, const void *)) cmpcomps);
1214 }
1215
1216 static void
1217 write_cdata(char *opath)
1218 {
1219     FILE *out;
1220         ac_uint4 bytes;
1221     unsigned long i, idx, nprops;
1222     unsigned short casecnt[2];
1223     char path[BUFSIZ];
1224
1225     /*****************************************************************
1226      *
1227      * Generate the ctype data.
1228      *
1229      *****************************************************************/
1230
1231     /*
1232      * Open the ctype.dat file.
1233      */
1234     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "ctype.dat", opath);
1235     if ((out = fopen(path, "wb")) == 0)
1236       return;
1237
1238     /*
1239      * Collect the offsets for the properties.  The offsets array is
1240      * on a 4-byte boundary to keep things efficient for architectures
1241      * that need such a thing.
1242      */
1243     for (i = idx = 0; i < NUMPROPS; i++) {
1244         propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff;
1245         idx += proptbl[i].used;
1246     }
1247
1248     /*
1249      * Add the sentinel index which is used by the binary search as the upper
1250      * bound for a search.
1251      */
1252     propcnt[i] = idx;
1253
1254     /*
1255      * Record the actual number of property lists.  This may be different than
1256      * the number of offsets actually written because of aligning on a 4-byte
1257      * boundary.
1258      */
1259     hdr[1] = NUMPROPS;
1260
1261     /*
1262      * Calculate the byte count needed and pad the property counts array to a
1263      * 4-byte boundary.
1264      */
1265     if ((bytes = sizeof(unsigned short) * (NUMPROPS + 1)) & 3)
1266       bytes += 4 - (bytes & 3);
1267     nprops = bytes / sizeof(unsigned short);
1268     bytes += sizeof(unsigned long) * idx;
1269
1270     /*
1271      * Write the header.
1272      */
1273     fwrite((char *) hdr, sizeof(ac_uint4), 2, out);
1274
1275     /*
1276      * Write the byte count.
1277      */
1278     fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1279
1280     /*
1281      * Write the property list counts.
1282      */
1283     fwrite((char *) propcnt, sizeof(unsigned short), nprops, out);
1284
1285     /*
1286      * Write the property lists.
1287      */
1288     for (i = 0; i < NUMPROPS; i++) {
1289         if (proptbl[i].used > 0)
1290           fwrite((char *) proptbl[i].ranges, sizeof(unsigned long),
1291                  proptbl[i].used, out);
1292     }
1293
1294     fclose(out);
1295
1296     /*****************************************************************
1297      *
1298      * Generate the case mapping data.
1299      *
1300      *****************************************************************/
1301
1302     /*
1303      * Open the case.dat file.
1304      */
1305     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "case.dat", opath);
1306     if ((out = fopen(path, "wb")) == 0)
1307       return;
1308
1309     /*
1310      * Write the case mapping tables.
1311      */
1312     hdr[1] = upper_used + lower_used + title_used;
1313     casecnt[0] = upper_used;
1314     casecnt[1] = lower_used;
1315
1316     /*
1317      * Write the header.
1318      */
1319     fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1320
1321     /*
1322      * Write the upper and lower case table sizes.
1323      */
1324     fwrite((char *) casecnt, sizeof(unsigned short), 2, out);
1325
1326     if (upper_used > 0)
1327       /*
1328        * Write the upper case table.
1329        */
1330       fwrite((char *) upper, sizeof(_case_t), upper_used, out);
1331
1332     if (lower_used > 0)
1333       /*
1334        * Write the lower case table.
1335        */
1336       fwrite((char *) lower, sizeof(_case_t), lower_used, out);
1337
1338     if (title_used > 0)
1339       /*
1340        * Write the title case table.
1341        */
1342       fwrite((char *) title, sizeof(_case_t), title_used, out);
1343
1344     fclose(out);
1345
1346     /*****************************************************************
1347      *
1348      * Generate the composition data.
1349      *
1350      *****************************************************************/
1351
1352     /*
1353      * Create compositions from decomposition data
1354      */
1355     create_comps();
1356
1357     /*
1358      * Open the comp.dat file.
1359      */
1360     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "comp.dat", opath);
1361     if ((out = fopen(path, "wb")) == 0)
1362         return;
1363
1364     /*
1365      * Write the header.
1366      */
1367     hdr[1] = (unsigned short) comps_used * 4;
1368     fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1369
1370     /*
1371      * Write out the byte count to maintain header size.
1372      */
1373     bytes = comps_used * sizeof(_comp_t);
1374     fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1375
1376     /*
1377      * Now, if comps exist, write them out.
1378      */
1379     if (comps_used > 0)
1380         fwrite((char *) comps, sizeof(_comp_t), comps_used, out);
1381
1382     fclose(out);
1383
1384     /*****************************************************************
1385      *
1386      * Generate the decomposition data.
1387      *
1388      *****************************************************************/
1389
1390     /*
1391      * Fully expand all decompositions before generating the output file.
1392      */
1393     expand_decomp();
1394
1395     /*
1396      * Open the decomp.dat file.
1397      */
1398     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "decomp.dat", opath);
1399     if ((out = fopen(path, "wb")) == 0)
1400       return;
1401
1402     hdr[1] = decomps_used;
1403
1404     /*
1405      * Write the header.
1406      */
1407     fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1408
1409     /*
1410      * Write a temporary byte count which will be calculated as the
1411      * decompositions are written out.
1412      */
1413     bytes = 0;
1414     fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1415
1416     if (decomps_used) {
1417         /*
1418          * Write the list of decomp nodes.
1419          */
1420         for (i = idx = 0; i < decomps_used; i++) {
1421             fwrite((char *) &decomps[i].code, sizeof(unsigned long), 1, out);
1422             fwrite((char *) &idx, sizeof(unsigned long), 1, out);
1423             idx += decomps[i].used;
1424         }
1425
1426         /*
1427          * Write the sentinel index as the last decomp node.
1428          */
1429         fwrite((char *) &idx, sizeof(unsigned long), 1, out);
1430
1431         /*
1432          * Write the decompositions themselves.
1433          */
1434         for (i = 0; i < decomps_used; i++)
1435           fwrite((char *) decomps[i].decomp, sizeof(unsigned long),
1436                  decomps[i].used, out);
1437
1438         /*
1439          * Seek back to the beginning and write the byte count.
1440          */
1441         bytes = (sizeof(unsigned long) * idx) +
1442             (sizeof(unsigned long) * ((hdr[1] << 1) + 1));
1443         fseek(out, sizeof(unsigned short) << 1, 0L);
1444         fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1445
1446         fclose(out);
1447     }
1448
1449     /*
1450      * Open the kdecomp.dat file.
1451      */
1452     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "kdecomp.dat", opath);
1453     if ((out = fopen(path, "wb")) == 0)
1454       return;
1455
1456     hdr[1] = kdecomps_used;
1457
1458     /*
1459      * Write the header.
1460      */
1461     fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1462
1463     /*
1464      * Write a temporary byte count which will be calculated as the
1465      * decompositions are written out.
1466      */
1467     bytes = 0;
1468     fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1469
1470     if (kdecomps_used) {
1471         /*
1472          * Write the list of kdecomp nodes.
1473          */
1474         for (i = idx = 0; i < kdecomps_used; i++) {
1475             fwrite((char *) &kdecomps[i].code, sizeof(unsigned long), 1, out);
1476             fwrite((char *) &idx, sizeof(unsigned long), 1, out);
1477             idx += kdecomps[i].used;
1478         }
1479
1480         /*
1481          * Write the sentinel index as the last decomp node.
1482          */
1483         fwrite((char *) &idx, sizeof(unsigned long), 1, out);
1484
1485         /*
1486          * Write the decompositions themselves.
1487          */
1488         for (i = 0; i < kdecomps_used; i++)
1489           fwrite((char *) kdecomps[i].decomp, sizeof(unsigned long),
1490                  kdecomps[i].used, out);
1491
1492         /*
1493          * Seek back to the beginning and write the byte count.
1494          */
1495         bytes = (sizeof(unsigned long) * idx) +
1496             (sizeof(unsigned long) * ((hdr[1] << 1) + 1));
1497         fseek(out, sizeof(unsigned short) << 1, 0L);
1498         fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1499
1500         fclose(out);
1501     }
1502
1503     /*****************************************************************
1504      *
1505      * Generate the combining class data.
1506      *
1507      *****************************************************************/
1508
1509     /*
1510      * Open the cmbcl.dat file.
1511      */
1512     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "cmbcl.dat", opath);
1513     if ((out = fopen(path, "wb")) == 0)
1514       return;
1515
1516     /*
1517      * Set the number of ranges used.  Each range has a combining class which
1518      * means each entry is a 3-tuple.
1519      */
1520     hdr[1] = ccl_used / 3;
1521
1522     /*
1523      * Write the header.
1524      */
1525     fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1526
1527     /*
1528      * Write out the byte count to maintain header size.
1529      */
1530     bytes = ccl_used * sizeof(unsigned long);
1531     fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1532
1533     if (ccl_used > 0)
1534       /*
1535        * Write the combining class ranges out.
1536        */
1537       fwrite((char *) ccl, sizeof(unsigned long), ccl_used, out);
1538
1539     fclose(out);
1540
1541     /*****************************************************************
1542      *
1543      * Generate the number data.
1544      *
1545      *****************************************************************/
1546
1547     /*
1548      * Open the num.dat file.
1549      */
1550     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "num.dat", opath);
1551     if ((out = fopen(path, "wb")) == 0)
1552       return;
1553
1554     /*
1555      * The count part of the header will be the total number of codes that
1556      * have numbers.
1557      */
1558     hdr[1] = (unsigned short) (ncodes_used << 1);
1559     bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t));
1560
1561     /*
1562      * Write the header.
1563      */
1564     fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1565
1566     /*
1567      * Write out the byte count to maintain header size.
1568      */
1569     fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1570
1571     /*
1572      * Now, if number mappings exist, write them out.
1573      */
1574     if (ncodes_used > 0) {
1575         fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out);
1576         fwrite((char *) nums, sizeof(_num_t), nums_used, out);
1577     }
1578
1579     fclose(out);
1580 }
1581
1582 static void
1583 usage(char *prog)
1584 {
1585     fprintf(stderr,
1586             "Usage: %s [-o output-directory|-x composition-exclusions]", prog);
1587     fprintf(stderr, " datafile1 datafile2 ...\n\n");
1588     fprintf(stderr,
1589             "-o output-directory\n\t\tWrite the output files to a different");
1590     fprintf(stderr, " directory (default: .).\n");
1591     fprintf(stderr,
1592             "-x composition-exclusion\n\t\tFile of composition codes");
1593     fprintf(stderr, " that should be excluded.\n");
1594     exit(1);
1595 }
1596
1597 int
1598 main(int argc, char *argv[])
1599 {
1600     FILE *in;
1601     char *prog, *opath;
1602
1603     if ((prog = strrchr(argv[0], *LDAP_DIRSEP)) != 0)
1604       prog++;
1605     else
1606       prog = argv[0];
1607
1608     opath = 0;
1609     in = stdin;
1610
1611     argc--;
1612     argv++;
1613
1614     while (argc > 0) {
1615         if (argv[0][0] == '-') {
1616             switch (argv[0][1]) {
1617               case 'o':
1618                 argc--;
1619                 argv++;
1620                 opath = argv[0];
1621                 break;
1622               case 'x':
1623                 argc--;
1624                 argv++;
1625                 if ((in = fopen(argv[0], "rb")) == 0)
1626                   fprintf(stderr,
1627                           "%s: unable to open composition exclusion file %s\n",
1628                           prog, argv[0]);
1629                 else {
1630                     read_compexdata(in);
1631                     fclose(in);
1632                     in = 0;
1633                 }
1634                 break;
1635               default:
1636                 usage(prog);
1637             }
1638         } else {
1639             if (in != stdin && in != NULL)
1640               fclose(in);
1641             if ((in = fopen(argv[0], "rb")) == 0)
1642               fprintf(stderr, "%s: unable to open ctype file %s\n",
1643                       prog, argv[0]);
1644             else {
1645                 read_cdata(in);
1646                 fclose(in);
1647                 in = 0;
1648             }
1649         }
1650         argc--;
1651         argv++;
1652     }
1653
1654     if (opath == 0)
1655       opath = ".";
1656     write_cdata(opath);
1657
1658     return 0;
1659 }