git.sur5r.net Git - openldap/blob - libraries/liblunicode/ucdata/ucgendat.c

   1 /* $OpenLDAP$ */
   2 /*
   3  * Copyright 2000-2002 The OpenLDAP Foundation, All Rights Reserved.
   4  * COPYING RESTRICTIONS APPLY, see COPYRIGHT file
   5  */
   6 /*
   7  * Copyright 2001 Computing Research Labs, New Mexico State University
   8  *
   9  * Permission is hereby granted, free of charge, to any person obtaining a
  10  * copy of this software and associated documentation files (the "Software"),
  11  * to deal in the Software without restriction, including without limitation
  12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13  * and/or sell copies of the Software, and to permit persons to whom the
  14  * Software is furnished to do so, subject to the following conditions:
  15  *
  16  * The above copyright notice and this permission notice shall be included in
  17  * all copies or substantial portions of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  22  * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
  23  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
  24  * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
  25  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  */
  27 /* $Id: ucgendat.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */
  28
  29 #include "portable.h"
  30 #include "ldap_config.h"
  31
  32 #include <stdio.h>
  33 #include <ac/stdlib.h>
  34 #include <ac/string.h>
  35 #include <ac/unistd.h>
  36
  37 #include <ac/bytes.h>
  38
  39 #include <lutil.h>
  40
  41 #undef ishdigit
  42 #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\
  43                       ((cc) >= 'A' && (cc) <= 'F') ||\
  44                       ((cc) >= 'a' && (cc) <= 'f'))
  45
  46 /*
  47  * A header written to the output file with the byte-order-mark and the number
  48  * of property nodes.
  49  */
  50 static unsigned short hdr[2] = {0xfeff, 0};
  51
  52 #define NUMPROPS 50
  53 #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3)))
  54
  55 typedef struct {
  56     char *name;
  57     int len;
  58 } _prop_t;
  59
  60 /*
  61  * List of properties expected to be found in the Unicode Character Database
  62  * including some implementation specific properties.
  63  *
  64  * The implementation specific properties are:
  65  * Cm = Composed (can be decomposed)
  66  * Nb = Non-breaking
  67  * Sy = Symmetric (has left and right forms)
  68  * Hd = Hex digit
  69  * Qm = Quote marks
  70  * Mr = Mirroring
  71  * Ss = Space, other
  72  * Cp = Defined character
  73  */
  74 static _prop_t props[NUMPROPS] = {
  75     {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2},
  76     {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2},
  77     {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2},
  78     {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2},
  79     {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L",  1}, {"R",  1},
  80     {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B",  1},
  81     {"S",  1}, {"WS", 2}, {"ON", 2},
  82     {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2},
  83     {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}, {"AL", 2}
  84 };
  85
  86 typedef struct {
  87     unsigned long *ranges;
  88     unsigned short used;
  89     unsigned short size;
  90 } _ranges_t;
  91
  92 static _ranges_t proptbl[NUMPROPS];
  93
  94 /*
  95  * Make sure this array is sized to be on a 4-byte boundary at compile time.
  96  */
  97 static unsigned short propcnt[NEEDPROPS];
  98
  99 /*
 100  * Array used to collect a decomposition before adding it to the decomposition
 101  * table.
 102  */
 103 static unsigned long dectmp[64];
 104 static unsigned long dectmp_size;
 105
 106 typedef struct {
 107     unsigned long code;
 108     unsigned short size;
 109     unsigned short used;
 110     unsigned long *decomp;
 111 } _decomp_t;
 112
 113 /*
 114  * List of decomposition.  Created and expanded in order as the characters are
 115  * encountered. First list contains canonical mappings, second also includes
 116  * compatibility mappings.
 117  */
 118 static _decomp_t *decomps;
 119 static unsigned long decomps_used;
 120 static unsigned long decomps_size;
 121
 122 static _decomp_t *kdecomps;
 123 static unsigned long kdecomps_used;
 124 static unsigned long kdecomps_size;
 125
 126 /*
 127  * Composition exclusion table stuff.
 128  */
 129 #define COMPEX_SET(c) (compexs[(c) >> 5] |= (1 << ((c) & 31)))
 130 #define COMPEX_TEST(c) (compexs[(c) >> 5] & (1 << ((c) & 31)))
 131 static unsigned long compexs[2048];
 132
 133 /*
 134  * Struct for holding a composition pair, and array of composition pairs
 135  */
 136 typedef struct {
 137     unsigned long comp;
 138     unsigned long count;
 139     unsigned long code1;
 140     unsigned long code2;
 141 } _comp_t;
 142
 143 static _comp_t *comps;
 144 static unsigned long comps_used;
 145
 146 /*
 147  * Types and lists for handling lists of case mappings.
 148  */
 149 typedef struct {
 150     unsigned long key;
 151     unsigned long other1;
 152     unsigned long other2;
 153 } _case_t;
 154
 155 static _case_t *upper;
 156 static _case_t *lower;
 157 static _case_t *title;
 158 static unsigned long upper_used;
 159 static unsigned long upper_size;
 160 static unsigned long lower_used;
 161 static unsigned long lower_size;
 162 static unsigned long title_used;
 163 static unsigned long title_size;
 164
 165 /*
 166  * Array used to collect case mappings before adding them to a list.
 167  */
 168 static unsigned long cases[3];
 169
 170 /*
 171  * An array to hold ranges for combining classes.
 172  */
 173 static unsigned long *ccl;
 174 static unsigned long ccl_used;
 175 static unsigned long ccl_size;
 176
 177 /*
 178  * Structures for handling numbers.
 179  */
 180 typedef struct {
 181     unsigned long code;
 182     unsigned long idx;
 183 } _codeidx_t;
 184
 185 typedef struct {
 186     short numerator;
 187     short denominator;
 188 } _num_t;
 189
 190 /*
 191  * Arrays to hold the mapping of codes to numbers.
 192  */
 193 static _codeidx_t *ncodes;
 194 static unsigned long ncodes_used;
 195 static unsigned long ncodes_size;
 196
 197 static _num_t *nums;
 198 static unsigned long nums_used;
 199 static unsigned long nums_size;
 200
 201 /*
 202  * Array for holding numbers.
 203  */
 204 static _num_t *nums;
 205 static unsigned long nums_used;
 206 static unsigned long nums_size;
 207
 208 static void
 209 add_range(unsigned long start, unsigned long end, char *p1, char *p2)
 210 {
 211     int i, j, k, len;
 212     _ranges_t *rlp;
 213     char *name;
 214
 215     for (k = 0; k < 2; k++) {
 216         if (k == 0) {
 217             name = p1;
 218             len = 2;
 219         } else {
 220             if (p2 == 0)
 221               break;
 222
 223             name = p2;
 224             len = 1;
 225         }
 226
 227         for (i = 0; i < NUMPROPS; i++) {
 228             if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
 229               break;
 230         }
 231
 232         if (i == NUMPROPS)
 233           continue;
 234
 235         rlp = &proptbl[i];
 236
 237         /*
 238          * Resize the range list if necessary.
 239          */
 240         if (rlp->used == rlp->size) {
 241             if (rlp->size == 0)
 242               rlp->ranges = (unsigned long *)
 243                   malloc(sizeof(unsigned long) << 3);
 244             else
 245               rlp->ranges = (unsigned long *)
 246                   realloc((char *) rlp->ranges,
 247                           sizeof(unsigned long) * (rlp->size + 8));
 248             rlp->size += 8;
 249         }
 250
 251         /*
 252          * If this is the first code for this property list, just add it
 253          * and return.
 254          */
 255         if (rlp->used == 0) {
 256             rlp->ranges[0] = start;
 257             rlp->ranges[1] = end;
 258             rlp->used += 2;
 259             continue;
 260         }
 261
 262         /*
 263          * Optimize the case of adding the range to the end.
 264          */
 265         j = rlp->used - 1;
 266         if (start > rlp->ranges[j]) {
 267             j = rlp->used;
 268             rlp->ranges[j++] = start;
 269             rlp->ranges[j++] = end;
 270             rlp->used = j;
 271             continue;
 272         }
 273
 274         /*
 275          * Need to locate the insertion point.
 276          */
 277         for (i = 0;
 278              i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ;
 279
 280         /*
 281          * If the start value lies in the current range, then simply set the
 282          * new end point of the range to the end value passed as a parameter.
 283          */
 284         if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) {
 285             rlp->ranges[i + 1] = end;
 286             return;
 287         }
 288
 289         /*
 290          * Shift following values up by two.
 291          */
 292         for (j = rlp->used; j > i; j -= 2) {
 293             rlp->ranges[j] = rlp->ranges[j - 2];
 294             rlp->ranges[j + 1] = rlp->ranges[j - 1];
 295         }
 296
 297         /*
 298          * Add the new range at the insertion point.
 299          */
 300         rlp->ranges[i] = start;
 301         rlp->ranges[i + 1] = end;
 302         rlp->used += 2;
 303     }
 304 }
 305
 306 static void
 307 ordered_range_insert(unsigned long c, char *name, int len)
 308 {
 309     int i, j;
 310     unsigned long s, e;
 311     _ranges_t *rlp;
 312
 313     if (len == 0)
 314       return;
 315
 316     /*
 317      * Deal with directionality codes introduced in Unicode 3.0.
 318      */
 319     if ((len == 2 && memcmp(name, "BN", 2) == 0) ||
 320         (len == 3 &&
 321          (memcmp(name, "NSM", 3) == 0 || memcmp(name, "PDF", 3) == 0 ||
 322           memcmp(name, "LRE", 3) == 0 || memcmp(name, "LRO", 3) == 0 ||
 323           memcmp(name, "RLE", 3) == 0 || memcmp(name, "RLO", 3) == 0))) {
 324         /*
 325          * Mark all of these as Other Neutral to preserve compatibility with
 326          * older versions.
 327          */
 328         len = 2;
 329         name = "ON";
 330     }
 331
 332     for (i = 0; i < NUMPROPS; i++) {
 333         if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
 334           break;
 335     }
 336
 337     if (i == NUMPROPS)
 338       return;
 339
 340     /*
 341      * Have a match, so insert the code in order.
 342      */
 343     rlp = &proptbl[i];
 344
 345     /*
 346      * Resize the range list if necessary.
 347      */
 348     if (rlp->used == rlp->size) {
 349         if (rlp->size == 0)
 350           rlp->ranges = (unsigned long *)
 351               malloc(sizeof(unsigned long) << 3);
 352         else
 353           rlp->ranges = (unsigned long *)
 354               realloc((char *) rlp->ranges,
 355                       sizeof(unsigned long) * (rlp->size + 8));
 356         rlp->size += 8;
 357     }
 358
 359     /*
 360      * If this is the first code for this property list, just add it
 361      * and return.
 362      */
 363     if (rlp->used == 0) {
 364         rlp->ranges[0] = rlp->ranges[1] = c;
 365         rlp->used += 2;
 366         return;
 367     }
 368
 369     /*
 370      * Optimize the cases of extending the last range and adding new ranges to
 371      * the end.
 372      */
 373     j = rlp->used - 1;
 374     e = rlp->ranges[j];
 375     s = rlp->ranges[j - 1];
 376
 377     if (c == e + 1) {
 378         /*
 379          * Extend the last range.
 380          */
 381         rlp->ranges[j] = c;
 382         return;
 383     }
 384
 385     if (c > e + 1) {
 386         /*
 387          * Start another range on the end.
 388          */
 389         j = rlp->used;
 390         rlp->ranges[j] = rlp->ranges[j + 1] = c;
 391         rlp->used += 2;
 392         return;
 393     }
 394
 395     if (c >= s)
 396       /*
 397        * The code is a duplicate of a code in the last range, so just return.
 398        */
 399       return;
 400
 401     /*
 402      * The code should be inserted somewhere before the last range in the
 403      * list.  Locate the insertion point.
 404      */
 405     for (i = 0;
 406          i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ;
 407
 408     s = rlp->ranges[i];
 409     e = rlp->ranges[i + 1];
 410
 411     if (c == e + 1)
 412       /*
 413        * Simply extend the current range.
 414        */
 415       rlp->ranges[i + 1] = c;
 416     else if (c < s) {
 417         /*
 418          * Add a new entry before the current location.  Shift all entries
 419          * before the current one up by one to make room.
 420          */
 421         for (j = rlp->used; j > i; j -= 2) {
 422             rlp->ranges[j] = rlp->ranges[j - 2];
 423             rlp->ranges[j + 1] = rlp->ranges[j - 1];
 424         }
 425         rlp->ranges[i] = rlp->ranges[i + 1] = c;
 426
 427         rlp->used += 2;
 428     }
 429 }
 430
 431 static void
 432 add_decomp(unsigned long code, short compat)
 433 {
 434     unsigned long i, j, size;
 435     _decomp_t **pdecomps;
 436     unsigned long *pdecomps_used;
 437     unsigned long *pdecomps_size;
 438
 439     if (compat) {
 440         pdecomps = &kdecomps;
 441         pdecomps_used = &kdecomps_used;
 442         pdecomps_size = &kdecomps_size;
 443     } else {
 444         pdecomps = &decomps;
 445         pdecomps_used = &decomps_used;
 446         pdecomps_size = &decomps_size;
 447     }
 448
 449     /*
 450      * Add the code to the composite property.
 451      */
 452     if (!compat) {
 453         ordered_range_insert(code, "Cm", 2);
 454     }
 455
 456     /*
 457      * Locate the insertion point for the code.
 458      */
 459     for (i = 0; i < *pdecomps_used && code > (*pdecomps)[i].code; i++) ;
 460
 461     /*
 462      * Allocate space for a new decomposition.
 463      */
 464     if (*pdecomps_used == *pdecomps_size) {
 465         if (*pdecomps_size == 0)
 466           *pdecomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3);
 467         else
 468           *pdecomps = (_decomp_t *)
 469               realloc((char *) *pdecomps,
 470                       sizeof(_decomp_t) * (*pdecomps_size + 8));
 471         (void) memset((char *) (*pdecomps + *pdecomps_size), '\0',
 472                       sizeof(_decomp_t) << 3);
 473         *pdecomps_size += 8;
 474     }
 475
 476     if (i < *pdecomps_used && code != (*pdecomps)[i].code) {
 477         /*
 478          * Shift the decomps up by one if the codes don't match.
 479          */
 480         for (j = *pdecomps_used; j > i; j--)
 481           (void) AC_MEMCPY((char *) &(*pdecomps)[j], (char *) &(*pdecomps)[j - 1],
 482                         sizeof(_decomp_t));
 483     }
 484
 485     /*
 486      * Insert or replace a decomposition.
 487      */
 488     size = dectmp_size + (4 - (dectmp_size & 3));
 489     if ((*pdecomps)[i].size < size) {
 490         if ((*pdecomps)[i].size == 0)
 491           (*pdecomps)[i].decomp = (unsigned long *)
 492               malloc(sizeof(unsigned long) * size);
 493         else
 494           (*pdecomps)[i].decomp = (unsigned long *)
 495               realloc((char *) (*pdecomps)[i].decomp,
 496                       sizeof(unsigned long) * size);
 497         (*pdecomps)[i].size = size;
 498     }
 499
 500     if ((*pdecomps)[i].code != code)
 501       (*pdecomps_used)++;
 502
 503     (*pdecomps)[i].code = code;
 504     (*pdecomps)[i].used = dectmp_size;
 505     (void) AC_MEMCPY((char *) (*pdecomps)[i].decomp, (char *) dectmp,
 506                   sizeof(unsigned long) * dectmp_size);
 507
 508     /*
 509      * NOTICE: This needs changing later so it is more general than simply
 510      * pairs.  This calculation is done here to simplify allocation elsewhere.
 511      */
 512     if (!compat && dectmp_size == 2)
 513       comps_used++;
 514 }
 515
 516 static void
 517 add_title(unsigned long code)
 518 {
 519     unsigned long i, j;
 520
 521     /*
 522      * Always map the code to itself.
 523      */
 524     cases[2] = code;
 525
 526     if (title_used == title_size) {
 527         if (title_size == 0)
 528           title = (_case_t *) malloc(sizeof(_case_t) << 3);
 529         else
 530           title = (_case_t *) realloc((char *) title,
 531                                       sizeof(_case_t) * (title_size + 8));
 532         title_size += 8;
 533     }
 534
 535     /*
 536      * Locate the insertion point.
 537      */
 538     for (i = 0; i < title_used && code > title[i].key; i++) ;
 539
 540     if (i < title_used) {
 541         /*
 542          * Shift the array up by one.
 543          */
 544         for (j = title_used; j > i; j--)
 545           (void) AC_MEMCPY((char *) &title[j], (char *) &title[j - 1],
 546                         sizeof(_case_t));
 547     }
 548
 549     title[i].key = cases[2];    /* Title */
 550     title[i].other1 = cases[0]; /* Upper */
 551     title[i].other2 = cases[1]; /* Lower */
 552
 553     title_used++;
 554 }
 555
 556 static void
 557 add_upper(unsigned long code)
 558 {
 559     unsigned long i, j;
 560
 561     /*
 562      * Always map the code to itself.
 563      */
 564     cases[0] = code;
 565
 566     /*
 567      * If the title case character is not present, then make it the same as
 568      * the upper case.
 569      */
 570     if (cases[2] == 0)
 571       cases[2] = code;
 572
 573     if (upper_used == upper_size) {
 574         if (upper_size == 0)
 575           upper = (_case_t *) malloc(sizeof(_case_t) << 3);
 576         else
 577           upper = (_case_t *) realloc((char *) upper,
 578                                       sizeof(_case_t) * (upper_size + 8));
 579         upper_size += 8;
 580     }
 581
 582     /*
 583      * Locate the insertion point.
 584      */
 585     for (i = 0; i < upper_used && code > upper[i].key; i++) ;
 586
 587     if (i < upper_used) {
 588         /*
 589          * Shift the array up by one.
 590          */
 591         for (j = upper_used; j > i; j--)
 592           (void) AC_MEMCPY((char *) &upper[j], (char *) &upper[j - 1],
 593                         sizeof(_case_t));
 594     }
 595
 596     upper[i].key = cases[0];    /* Upper */
 597     upper[i].other1 = cases[1]; /* Lower */
 598     upper[i].other2 = cases[2]; /* Title */
 599
 600     upper_used++;
 601 }
 602
 603 static void
 604 add_lower(unsigned long code)
 605 {
 606     unsigned long i, j;
 607
 608     /*
 609      * Always map the code to itself.
 610      */
 611     cases[1] = code;
 612
 613     /*
 614      * If the title case character is empty, then make it the same as the
 615      * upper case.
 616      */
 617     if (cases[2] == 0)
 618       cases[2] = cases[0];
 619
 620     if (lower_used == lower_size) {
 621         if (lower_size == 0)
 622           lower = (_case_t *) malloc(sizeof(_case_t) << 3);
 623         else
 624           lower = (_case_t *) realloc((char *) lower,
 625                                       sizeof(_case_t) * (lower_size + 8));
 626         lower_size += 8;
 627     }
 628
 629     /*
 630      * Locate the insertion point.
 631      */
 632     for (i = 0; i < lower_used && code > lower[i].key; i++) ;
 633
 634     if (i < lower_used) {
 635         /*
 636          * Shift the array up by one.
 637          */
 638         for (j = lower_used; j > i; j--)
 639           (void) AC_MEMCPY((char *) &lower[j], (char *) &lower[j - 1],
 640                         sizeof(_case_t));
 641     }
 642
 643     lower[i].key = cases[1];    /* Lower */
 644     lower[i].other1 = cases[0]; /* Upper */
 645     lower[i].other2 = cases[2]; /* Title */
 646
 647     lower_used++;
 648 }
 649
 650 static void
 651 ordered_ccl_insert(unsigned long c, unsigned long ccl_code)
 652 {
 653     unsigned long i, j;
 654
 655     if (ccl_used == ccl_size) {
 656         if (ccl_size == 0)
 657           ccl = (unsigned long *) malloc(sizeof(unsigned long) * 24);
 658         else
 659           ccl = (unsigned long *)
 660               realloc((char *) ccl, sizeof(unsigned long) * (ccl_size + 24));
 661         ccl_size += 24;
 662     }
 663
 664     /*
 665      * Optimize adding the first item.
 666      */
 667     if (ccl_used == 0) {
 668         ccl[0] = ccl[1] = c;
 669         ccl[2] = ccl_code;
 670         ccl_used += 3;
 671         return;
 672     }
 673
 674     /*
 675      * Handle the special case of extending the range on the end.  This
 676      * requires that the combining class codes are the same.
 677      */
 678     if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) {
 679         ccl[ccl_used - 2] = c;
 680         return;
 681     }
 682
 683     /*
 684      * Handle the special case of adding another range on the end.
 685      */
 686     if (c > ccl[ccl_used - 2] + 1 ||
 687         (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) {
 688         ccl[ccl_used++] = c;
 689         ccl[ccl_used++] = c;
 690         ccl[ccl_used++] = ccl_code;
 691         return;
 692     }
 693
 694     /*
 695      * Locate either the insertion point or range for the code.
 696      */
 697     for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ;
 698
 699     if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) {
 700         /*
 701          * Extend an existing range.
 702          */
 703         ccl[i + 1] = c;
 704         return;
 705     } else if (c < ccl[i]) {
 706         /*
 707          * Start a new range before the current location.
 708          */
 709         for (j = ccl_used; j > i; j -= 3) {
 710             ccl[j] = ccl[j - 3];
 711             ccl[j - 1] = ccl[j - 4];
 712             ccl[j - 2] = ccl[j - 5];
 713         }
 714         ccl[i] = ccl[i + 1] = c;
 715         ccl[i + 2] = ccl_code;
 716     }
 717 }
 718
 719 /*
 720  * Adds a number if it does not already exist and returns an index value
 721  * multiplied by 2.
 722  */
 723 static unsigned long
 724 make_number(short num, short denom)
 725 {
 726     unsigned long n;
 727
 728     /*
 729      * Determine if the number already exists.
 730      */
 731     for (n = 0; n < nums_used; n++) {
 732         if (nums[n].numerator == num && nums[n].denominator == denom)
 733           return n << 1;
 734     }
 735
 736     if (nums_used == nums_size) {
 737         if (nums_size == 0)
 738           nums = (_num_t *) malloc(sizeof(_num_t) << 3);
 739         else
 740           nums = (_num_t *) realloc((char *) nums,
 741                                     sizeof(_num_t) * (nums_size + 8));
 742         nums_size += 8;
 743     }
 744
 745     n = nums_used++;
 746     nums[n].numerator = num;
 747     nums[n].denominator = denom;
 748
 749     return n << 1;
 750 }
 751
 752 static void
 753 add_number(unsigned long code, short num, short denom)
 754 {
 755     unsigned long i, j;
 756
 757     /*
 758      * Insert the code in order.
 759      */
 760     for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ;
 761
 762     /*
 763      * Handle the case of the codes matching and simply replace the number
 764      * that was there before.
 765      */
 766     if (i < ncodes_used && code == ncodes[i].code) {
 767         ncodes[i].idx = make_number(num, denom);
 768         return;
 769     }
 770
 771     /*
 772      * Resize the array if necessary.
 773      */
 774     if (ncodes_used == ncodes_size) {
 775         if (ncodes_size == 0)
 776           ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3);
 777         else
 778           ncodes = (_codeidx_t *)
 779               realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8));
 780
 781         ncodes_size += 8;
 782     }
 783
 784     /*
 785      * Shift things around to insert the code if necessary.
 786      */
 787     if (i < ncodes_used) {
 788         for (j = ncodes_used; j > i; j--) {
 789             ncodes[j].code = ncodes[j - 1].code;
 790             ncodes[j].idx = ncodes[j - 1].idx;
 791         }
 792     }
 793     ncodes[i].code = code;
 794     ncodes[i].idx = make_number(num, denom);
 795
 796     ncodes_used++;
 797 }
 798
 799 /*
 800  * This routine assumes that the line is a valid Unicode Character Database
 801  * entry.
 802  */
 803 static void
 804 read_cdata(FILE *in)
 805 {
 806     unsigned long i, lineno, skip, code, ccl_code;
 807     short wnum, neg, number[2], compat;
 808     char line[512], *s, *e;
 809
 810     lineno = skip = 0;
 811     while (fgets(line, sizeof(line), in)) {
 812         if( (s=strchr(line, '\n')) ) *s = '\0';
 813         lineno++;
 814
 815         /*
 816          * Skip blank lines and lines that start with a '#'.
 817          */
 818         if (line[0] == 0 || line[0] == '#')
 819           continue;
 820
 821         /*
 822          * If lines need to be skipped, do it here.
 823          */
 824         if (skip) {
 825             skip--;
 826             continue;
 827         }
 828
 829         /*
 830          * Collect the code.  The code can be up to 6 hex digits in length to
 831          * allow surrogates to be specified.
 832          */
 833         for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) {
 834             code <<= 4;
 835             if (*s >= '0' && *s <= '9')
 836               code += *s - '0';
 837             else if (*s >= 'A' && *s <= 'F')
 838               code += (*s - 'A') + 10;
 839             else if (*s >= 'a' && *s <= 'f')
 840               code += (*s - 'a') + 10;
 841         }
 842
 843         /*
 844          * Handle the following special cases:
 845          * 1. 4E00-9FA5 CJK Ideographs.
 846          * 2. AC00-D7A3 Hangul Syllables.
 847          * 3. D800-DFFF Surrogates.
 848          * 4. E000-F8FF Private Use Area.
 849          * 5. F900-FA2D Han compatibility.
 850          */
 851         switch (code) {
 852           case 0x4e00:
 853             /*
 854              * The Han ideographs.
 855              */
 856             add_range(0x4e00, 0x9fff, "Lo", "L");
 857
 858             /*
 859              * Add the characters to the defined category.
 860              */
 861             add_range(0x4e00, 0x9fa5, "Cp", 0);
 862
 863             skip = 1;
 864             break;
 865           case 0xac00:
 866             /*
 867              * The Hangul syllables.
 868              */
 869             add_range(0xac00, 0xd7a3, "Lo", "L");
 870
 871             /*
 872              * Add the characters to the defined category.
 873              */
 874             add_range(0xac00, 0xd7a3, "Cp", 0);
 875
 876             skip = 1;
 877             break;
 878           case 0xd800:
 879             /*
 880              * Make a range of all surrogates and assume some default
 881              * properties.
 882              */
 883             add_range(0x010000, 0x10ffff, "Cs", "L");
 884             skip = 5;
 885             break;
 886           case 0xe000:
 887             /*
 888              * The Private Use area.  Add with a default set of properties.
 889              */
 890             add_range(0xe000, 0xf8ff, "Co", "L");
 891             skip = 1;
 892             break;
 893           case 0xf900:
 894             /*
 895              * The CJK compatibility area.
 896              */
 897             add_range(0xf900, 0xfaff, "Lo", "L");
 898
 899             /*
 900              * Add the characters to the defined category.
 901              */
 902             add_range(0xf900, 0xfaff, "Cp", 0);
 903
 904             skip = 1;
 905         }
 906
 907         if (skip)
 908           continue;
 909
 910         /*
 911          * Add the code to the defined category.
 912          */
 913         ordered_range_insert(code, "Cp", 2);
 914
 915         /*
 916          * Locate the first character property field.
 917          */
 918         for (i = 0; *s != 0 && i < 2; s++) {
 919             if (*s == ';')
 920               i++;
 921         }
 922         for (e = s; *e && *e != ';'; e++) ;
 923
 924         ordered_range_insert(code, s, e - s);
 925
 926         /*
 927          * Locate the combining class code.
 928          */
 929         for (s = e; *s != 0 && i < 3; s++) {
 930             if (*s == ';')
 931               i++;
 932         }
 933
 934         /*
 935          * Convert the combining class code from decimal.
 936          */
 937         for (ccl_code = 0, e = s; *e && *e != ';'; e++)
 938           ccl_code = (ccl_code * 10) + (*e - '0');
 939
 940         /*
 941          * Add the code if it not 0.
 942          */
 943         if (ccl_code != 0)
 944           ordered_ccl_insert(code, ccl_code);
 945
 946         /*
 947          * Locate the second character property field.
 948          */
 949         for (s = e; *s != 0 && i < 4; s++) {
 950             if (*s == ';')
 951               i++;
 952         }
 953         for (e = s; *e && *e != ';'; e++) ;
 954
 955         ordered_range_insert(code, s, e - s);
 956
 957         /*
 958          * Check for a decomposition.
 959          */
 960         s = ++e;
 961         if (*s != ';') {
 962             compat = *s == '<';
 963             if (compat) {
 964                 /*
 965                  * Skip compatibility formatting tag.
 966                  */
 967                 while (*s++ != '>');
 968             }
 969             /*
 970              * Collect the codes of the decomposition.
 971              */
 972             for (dectmp_size = 0; *s != ';'; ) {
 973                 /*
 974                  * Skip all leading non-hex digits.
 975                  */
 976                 while (!ishdigit(*s))
 977                   s++;
 978
 979                 for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) {
 980                     dectmp[dectmp_size] <<= 4;
 981                     if (*s >= '0' && *s <= '9')
 982                       dectmp[dectmp_size] += *s - '0';
 983                     else if (*s >= 'A' && *s <= 'F')
 984                       dectmp[dectmp_size] += (*s - 'A') + 10;
 985                     else if (*s >= 'a' && *s <= 'f')
 986                       dectmp[dectmp_size] += (*s - 'a') + 10;
 987                 }
 988                 dectmp_size++;
 989             }
 990
 991             /*
 992              * If there are any codes in the temporary decomposition array,
 993              * then add the character with its decomposition.
 994              */
 995             if (dectmp_size > 0) {
 996                 if (!compat) {
 997                     add_decomp(code, 0);
 998                 }
 999                 add_decomp(code, 1);
1000             }
1001         }
1002
1003         /*
1004          * Skip to the number field.
1005          */
1006         for (i = 0; i < 3 && *s; s++) {
1007             if (*s == ';')
1008               i++;
1009         }
1010
1011         /*
1012          * Scan the number in.
1013          */
1014         number[0] = number[1] = 0;
1015         for (e = s, neg = wnum = 0; *e && *e != ';'; e++) {
1016             if (*e == '-') {
1017                 neg = 1;
1018                 continue;
1019             }
1020
1021             if (*e == '/') {
1022                 /*
1023                  * Move the the denominator of the fraction.
1024                  */
1025                 if (neg)
1026                   number[wnum] *= -1;
1027                 neg = 0;
1028                 e++;
1029                 wnum++;
1030             }
1031             number[wnum] = (number[wnum] * 10) + (*e - '0');
1032         }
1033
1034         if (e > s) {
1035             /*
1036              * Adjust the denominator in case of integers and add the number.
1037              */
1038             if (wnum == 0)
1039               number[1] = number[0];
1040
1041             add_number(code, number[0], number[1]);
1042         }
1043
1044         /*
1045          * Skip to the start of the possible case mappings.
1046          */
1047         for (s = e, i = 0; i < 4 && *s; s++) {
1048             if (*s == ';')
1049               i++;
1050         }
1051
1052         /*
1053          * Collect the case mappings.
1054          */
1055         cases[0] = cases[1] = cases[2] = 0;
1056         for (i = 0; i < 3; i++) {
1057             while (ishdigit(*s)) {
1058                 cases[i] <<= 4;
1059                 if (*s >= '0' && *s <= '9')
1060                   cases[i] += *s - '0';
1061                 else if (*s >= 'A' && *s <= 'F')
1062                   cases[i] += (*s - 'A') + 10;
1063                 else if (*s >= 'a' && *s <= 'f')
1064                   cases[i] += (*s - 'a') + 10;
1065                 s++;
1066             }
1067             if (*s == ';')
1068               s++;
1069         }
1070         if (cases[0] && cases[1])
1071           /*
1072            * Add the upper and lower mappings for a title case character.
1073            */
1074           add_title(code);
1075         else if (cases[1])
1076           /*
1077            * Add the lower and title case mappings for the upper case
1078            * character.
1079            */
1080           add_upper(code);
1081         else if (cases[0])
1082           /*
1083            * Add the upper and title case mappings for the lower case
1084            * character.
1085            */
1086           add_lower(code);
1087     }
1088 }
1089
1090 static _decomp_t *
1091 find_decomp(unsigned long code, short compat)
1092 {
1093     long l, r, m;
1094     _decomp_t *decs;
1095
1096     l = 0;
1097     r = (compat ? kdecomps_used : decomps_used) - 1;
1098     decs = compat ? kdecomps : decomps;
1099     while (l <= r) {
1100         m = (l + r) >> 1;
1101         if (code > decs[m].code)
1102           l = m + 1;
1103         else if (code < decs[m].code)
1104           r = m - 1;
1105         else
1106           return &decs[m];
1107     }
1108     return 0;
1109 }
1110
1111 static void
1112 decomp_it(_decomp_t *d, short compat)
1113 {
1114     unsigned long i;
1115     _decomp_t *dp;
1116
1117     for (i = 0; i < d->used; i++) {
1118         if ((dp = find_decomp(d->decomp[i], compat)) != 0)
1119           decomp_it(dp, compat);
1120         else
1121           dectmp[dectmp_size++] = d->decomp[i];
1122     }
1123 }
1124
1125 /*
1126  * Expand all decompositions by recursively decomposing each character
1127  * in the decomposition.
1128  */
1129 static void
1130 expand_decomp(void)
1131 {
1132     unsigned long i;
1133
1134     for (i = 0; i < decomps_used; i++) {
1135         dectmp_size = 0;
1136         decomp_it(&decomps[i], 0);
1137         if (dectmp_size > 0)
1138           add_decomp(decomps[i].code, 0);
1139     }
1140
1141     for (i = 0; i < kdecomps_used; i++) {
1142         dectmp_size = 0;
1143         decomp_it(&kdecomps[i], 1);
1144         if (dectmp_size > 0)
1145           add_decomp(kdecomps[i].code, 1);
1146     }
1147 }
1148
1149 static int
1150 cmpcomps(_comp_t *comp1, _comp_t *comp2)
1151 {
1152     long diff = comp1->code1 - comp2->code1;
1153
1154     if (!diff)
1155         diff = comp1->code2 - comp2->code2;
1156     return (int) diff;
1157 }
1158
1159 /*
1160  * Load composition exclusion data
1161  */
1162 static void
1163 read_compexdata(FILE *in)
1164 {
1165     unsigned short i, code;
1166     char line[512], *s;
1167
1168     (void) memset((char *) compexs, 0, sizeof(unsigned long) << 11);
1169
1170     while (fgets(line, sizeof(line), in)) {
1171         if( (s=strchr(line, '\n')) ) *s = '\0';
1172         /*
1173          * Skip blank lines and lines that start with a '#'.
1174          */
1175         if (line[0] == 0 || line[0] == '#')
1176             continue;
1177
1178         /*
1179          * Collect the code.  Assume max 4 digits
1180          */
1181
1182         for (s = line, i = code = 0; *s != '#' && i < 4; i++, s++) {
1183             code <<= 4;
1184             if (*s >= '0' && *s <= '9')
1185                 code += *s - '0';
1186             else if (*s >= 'A' && *s <= 'F')
1187                 code += (*s - 'A') + 10;
1188             else if (*s >= 'a' && *s <= 'f')
1189                 code += (*s - 'a') + 10;
1190         }
1191         COMPEX_SET(code);
1192     }
1193 }
1194
1195 /*
1196  * Creates array of compositions from decomposition array
1197  */
1198 static void
1199 create_comps(void)
1200 {
1201     unsigned long i, cu;
1202
1203     comps = (_comp_t *) malloc(comps_used * sizeof(_comp_t));
1204
1205     for (i = cu = 0; i < decomps_used; i++) {
1206         if (decomps[i].used != 2 || COMPEX_TEST(decomps[i].code))
1207             continue;
1208         comps[cu].comp = decomps[i].code;
1209         comps[cu].count = 2;
1210         comps[cu].code1 = decomps[i].decomp[0];
1211         comps[cu].code2 = decomps[i].decomp[1];
1212         cu++;
1213     }
1214     comps_used = cu;
1215     qsort(comps, comps_used, sizeof(_comp_t),
1216           (int (*)(const void *, const void *)) cmpcomps);
1217 }
1218
1219 static void
1220 write_cdata(char *opath)
1221 {
1222     FILE *out;
1223         ac_uint4 bytes;
1224     unsigned long i, idx, nprops;
1225     unsigned short casecnt[2];
1226     char path[BUFSIZ];
1227
1228     /*****************************************************************
1229      *
1230      * Generate the ctype data.
1231      *
1232      *****************************************************************/
1233
1234     /*
1235      * Open the ctype.dat file.
1236      */
1237     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "ctype.dat", opath);
1238     if ((out = fopen(path, "wb")) == 0)
1239       return;
1240
1241     /*
1242      * Collect the offsets for the properties.  The offsets array is
1243      * on a 4-byte boundary to keep things efficient for architectures
1244      * that need such a thing.
1245      */
1246     for (i = idx = 0; i < NUMPROPS; i++) {
1247         propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff;
1248         idx += proptbl[i].used;
1249     }
1250
1251     /*
1252      * Add the sentinel index which is used by the binary search as the upper
1253      * bound for a search.
1254      */
1255     propcnt[i] = idx;
1256
1257     /*
1258      * Record the actual number of property lists.  This may be different than
1259      * the number of offsets actually written because of aligning on a 4-byte
1260      * boundary.
1261      */
1262     hdr[1] = NUMPROPS;
1263
1264     /*
1265      * Calculate the byte count needed and pad the property counts array to a
1266      * 4-byte boundary.
1267      */
1268     if ((bytes = sizeof(unsigned short) * (NUMPROPS + 1)) & 3)
1269       bytes += 4 - (bytes & 3);
1270     nprops = bytes / sizeof(unsigned short);
1271     bytes += sizeof(unsigned long) * idx;
1272
1273     /*
1274      * Write the header.
1275      */
1276     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
1277
1278     /*
1279      * Write the byte count.
1280      */
1281     fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1282
1283     /*
1284      * Write the property list counts.
1285      */
1286     fwrite((char *) propcnt, sizeof(unsigned short), nprops, out);
1287
1288     /*
1289      * Write the property lists.
1290      */
1291     for (i = 0; i < NUMPROPS; i++) {
1292         if (proptbl[i].used > 0)
1293           fwrite((char *) proptbl[i].ranges, sizeof(unsigned long),
1294                  proptbl[i].used, out);
1295     }
1296
1297     fclose(out);
1298
1299     /*****************************************************************
1300      *
1301      * Generate the case mapping data.
1302      *
1303      *****************************************************************/
1304
1305     /*
1306      * Open the case.dat file.
1307      */
1308     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "case.dat", opath);
1309     if ((out = fopen(path, "wb")) == 0)
1310       return;
1311
1312     /*
1313      * Write the case mapping tables.
1314      */
1315     hdr[1] = upper_used + lower_used + title_used;
1316     casecnt[0] = upper_used;
1317     casecnt[1] = lower_used;
1318
1319     /*
1320      * Write the header.
1321      */
1322     fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1323
1324     /*
1325      * Write the upper and lower case table sizes.
1326      */
1327     fwrite((char *) casecnt, sizeof(unsigned short), 2, out);
1328
1329     if (upper_used > 0)
1330       /*
1331        * Write the upper case table.
1332        */
1333       fwrite((char *) upper, sizeof(_case_t), upper_used, out);
1334
1335     if (lower_used > 0)
1336       /*
1337        * Write the lower case table.
1338        */
1339       fwrite((char *) lower, sizeof(_case_t), lower_used, out);
1340
1341     if (title_used > 0)
1342       /*
1343        * Write the title case table.
1344        */
1345       fwrite((char *) title, sizeof(_case_t), title_used, out);
1346
1347     fclose(out);
1348
1349     /*****************************************************************
1350      *
1351      * Generate the composition data.
1352      *
1353      *****************************************************************/
1354
1355     /*
1356      * Create compositions from decomposition data
1357      */
1358     create_comps();
1359
1360     /*
1361      * Open the comp.dat file.
1362      */
1363     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "comp.dat", opath);
1364     if ((out = fopen(path, "wb")) == 0)
1365         return;
1366
1367     /*
1368      * Write the header.
1369      */
1370     hdr[1] = (unsigned short) comps_used * 4;
1371     fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1372
1373     /*
1374      * Write out the byte count to maintain header size.
1375      */
1376     bytes = comps_used * sizeof(_comp_t);
1377     fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1378
1379     /*
1380      * Now, if comps exist, write them out.
1381      */
1382     if (comps_used > 0)
1383         fwrite((char *) comps, sizeof(_comp_t), comps_used, out);
1384
1385     fclose(out);
1386
1387     /*****************************************************************
1388      *
1389      * Generate the decomposition data.
1390      *
1391      *****************************************************************/
1392
1393     /*
1394      * Fully expand all decompositions before generating the output file.
1395      */
1396     expand_decomp();
1397
1398     /*
1399      * Open the decomp.dat file.
1400      */
1401     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "decomp.dat", opath);
1402     if ((out = fopen(path, "wb")) == 0)
1403       return;
1404
1405     hdr[1] = decomps_used;
1406
1407     /*
1408      * Write the header.
1409      */
1410     fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1411
1412     /*
1413      * Write a temporary byte count which will be calculated as the
1414      * decompositions are written out.
1415      */
1416     bytes = 0;
1417     fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1418
1419     if (decomps_used) {
1420         /*
1421          * Write the list of decomp nodes.
1422          */
1423         for (i = idx = 0; i < decomps_used; i++) {
1424             fwrite((char *) &decomps[i].code, sizeof(unsigned long), 1, out);
1425             fwrite((char *) &idx, sizeof(unsigned long), 1, out);
1426             idx += decomps[i].used;
1427         }
1428
1429         /*
1430          * Write the sentinel index as the last decomp node.
1431          */
1432         fwrite((char *) &idx, sizeof(unsigned long), 1, out);
1433
1434         /*
1435          * Write the decompositions themselves.
1436          */
1437         for (i = 0; i < decomps_used; i++)
1438           fwrite((char *) decomps[i].decomp, sizeof(unsigned long),
1439                  decomps[i].used, out);
1440
1441         /*
1442          * Seek back to the beginning and write the byte count.
1443          */
1444         bytes = (sizeof(unsigned long) * idx) +
1445             (sizeof(unsigned long) * ((hdr[1] << 1) + 1));
1446         fseek(out, sizeof(unsigned short) << 1, 0L);
1447         fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1448
1449         fclose(out);
1450     }
1451
1452     /*
1453      * Open the kdecomp.dat file.
1454      */
1455     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "kdecomp.dat", opath);
1456     if ((out = fopen(path, "wb")) == 0)
1457       return;
1458
1459     hdr[1] = kdecomps_used;
1460
1461     /*
1462      * Write the header.
1463      */
1464     fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1465
1466     /*
1467      * Write a temporary byte count which will be calculated as the
1468      * decompositions are written out.
1469      */
1470     bytes = 0;
1471     fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1472
1473     if (kdecomps_used) {
1474         /*
1475          * Write the list of kdecomp nodes.
1476          */
1477         for (i = idx = 0; i < kdecomps_used; i++) {
1478             fwrite((char *) &kdecomps[i].code, sizeof(unsigned long), 1, out);
1479             fwrite((char *) &idx, sizeof(unsigned long), 1, out);
1480             idx += kdecomps[i].used;
1481         }
1482
1483         /*
1484          * Write the sentinel index as the last decomp node.
1485          */
1486         fwrite((char *) &idx, sizeof(unsigned long), 1, out);
1487
1488         /*
1489          * Write the decompositions themselves.
1490          */
1491         for (i = 0; i < kdecomps_used; i++)
1492           fwrite((char *) kdecomps[i].decomp, sizeof(unsigned long),
1493                  kdecomps[i].used, out);
1494
1495         /*
1496          * Seek back to the beginning and write the byte count.
1497          */
1498         bytes = (sizeof(unsigned long) * idx) +
1499             (sizeof(unsigned long) * ((hdr[1] << 1) + 1));
1500         fseek(out, sizeof(unsigned short) << 1, 0L);
1501         fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1502
1503         fclose(out);
1504     }
1505
1506     /*****************************************************************
1507      *
1508      * Generate the combining class data.
1509      *
1510      *****************************************************************/
1511
1512     /*
1513      * Open the cmbcl.dat file.
1514      */
1515     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "cmbcl.dat", opath);
1516     if ((out = fopen(path, "wb")) == 0)
1517       return;
1518
1519     /*
1520      * Set the number of ranges used.  Each range has a combining class which
1521      * means each entry is a 3-tuple.
1522      */
1523     hdr[1] = ccl_used / 3;
1524
1525     /*
1526      * Write the header.
1527      */
1528     fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1529
1530     /*
1531      * Write out the byte count to maintain header size.
1532      */
1533     bytes = ccl_used * sizeof(unsigned long);
1534     fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1535
1536     if (ccl_used > 0)
1537       /*
1538        * Write the combining class ranges out.
1539        */
1540       fwrite((char *) ccl, sizeof(unsigned long), ccl_used, out);
1541
1542     fclose(out);
1543
1544     /*****************************************************************
1545      *
1546      * Generate the number data.
1547      *
1548      *****************************************************************/
1549
1550     /*
1551      * Open the num.dat file.
1552      */
1553     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "num.dat", opath);
1554     if ((out = fopen(path, "wb")) == 0)
1555       return;
1556
1557     /*
1558      * The count part of the header will be the total number of codes that
1559      * have numbers.
1560      */
1561     hdr[1] = (unsigned short) (ncodes_used << 1);
1562     bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t));
1563
1564     /*
1565      * Write the header.
1566      */
1567     fwrite((char *) hdr, sizeof(unsigned short), 2, out);
1568
1569     /*
1570      * Write out the byte count to maintain header size.
1571      */
1572     fwrite((char *) &bytes, sizeof(unsigned long), 1, out);
1573
1574     /*
1575      * Now, if number mappings exist, write them out.
1576      */
1577     if (ncodes_used > 0) {
1578         fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out);
1579         fwrite((char *) nums, sizeof(_num_t), nums_used, out);
1580     }
1581
1582     fclose(out);
1583 }
1584
1585 static void
1586 usage(char *prog)
1587 {
1588     fprintf(stderr,
1589             "Usage: %s [-o output-directory|-x composition-exclusions]", prog);
1590     fprintf(stderr, " datafile1 datafile2 ...\n\n");
1591     fprintf(stderr,
1592             "-o output-directory\n\t\tWrite the output files to a different");
1593     fprintf(stderr, " directory (default: .).\n");
1594     fprintf(stderr,
1595             "-x composition-exclusion\n\t\tFile of composition codes");
1596     fprintf(stderr, " that should be excluded.\n");
1597     exit(1);
1598 }
1599
1600 int
1601 main(int argc, char *argv[])
1602 {
1603     FILE *in;
1604     char *prog, *opath;
1605
1606     prog = lutil_progname( "ucgendat", argc, argv );
1607
1608     opath = 0;
1609     in = stdin;
1610
1611     argc--;
1612     argv++;
1613
1614     while (argc > 0) {
1615         if (argv[0][0] == '-') {
1616             switch (argv[0][1]) {
1617               case 'o':
1618                 argc--;
1619                 argv++;
1620                 opath = argv[0];
1621                 break;
1622               case 'x':
1623                 argc--;
1624                 argv++;
1625                 if ((in = fopen(argv[0], "rb")) == 0)
1626                   fprintf(stderr,
1627                           "%s: unable to open composition exclusion file %s\n",
1628                           prog, argv[0]);
1629                 else {
1630                     read_compexdata(in);
1631                     fclose(in);
1632                     in = 0;
1633                 }
1634                 break;
1635               default:
1636                 usage(prog);
1637             }
1638         } else {
1639             if (in != stdin && in != NULL)
1640               fclose(in);
1641             if ((in = fopen(argv[0], "rb")) == 0)
1642               fprintf(stderr, "%s: unable to open ctype file %s\n",
1643                       prog, argv[0]);
1644             else {
1645                 read_cdata(in);
1646                 fclose(in);
1647                 in = 0;
1648             }
1649         }
1650         argc--;
1651         argv++;
1652     }
1653
1654     if (opath == 0)
1655       opath = ".";
1656     write_cdata(opath);
1657
1658     return 0;
1659 }