git.sur5r.net Git - openldap/blob - build/unproto/tok_class.c

   1 /*++
   2 /* NAME
   3 /*      tok_class 3
   4 /* SUMMARY
   5 /*      token classification
   6 /* PACKAGE
   7 /*      unproto
   8 /* SYNOPSIS
   9 /*      #include "token.h"
  10 /*
  11 /*      void tok_unget(t)
  12 /*      struct token *t;
  13 /*
  14 /*      struct token *tok_class()
  15 /* DESCRIPTION
  16 /*      tok_class() collects single and composite tokens, and
  17 /*      recognizes keywords.
  18 /*      At present, the only composite tokens are ()-delimited,
  19 /*      comma-separated lists, and non-whitespace tokens with attached
  20 /*      whitespace or comment tokens.
  21 /*
  22 /*      Source transformations are: __DATE__ and __TIME__ are rewritten
  23 /*      to string constants with the current date and time, respectively.
  24 /*      Multiple string constants are concatenated. Optionally, "void *"
  25 /*      is mapped to "char *", and plain "void" to "int".
  26 /*
  27 /*      tok_unget() implements an arbitrary amount of token pushback.
  28 /*      Only tokens obtained through tok_class() should be given to
  29 /*      tok_unget(). This function accepts a list of tokens in
  30 /*      last-read-first order.
  31 /* DIAGNOSTICS
  32 /*      The code complains if input terminates in the middle of a list.
  33 /* BUGS
  34 /*      Does not preserve white space at the beginning of a list element
  35 /*      or after the end of a list.
  36 /* AUTHOR(S)
  37 /*      Wietse Venema
  38 /*      Eindhoven University of Technology
  39 /*      Department of Mathematics and Computer Science
  40 /*      Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands
  41 /* LAST MODIFICATION
  42 /*      92/01/15 21:53:02
  43 /* VERSION/RELEASE
  44 /*      1.4
  45 /*--*/
  46
  47 static char class_sccsid[] = "@(#) tok_class.c 1.4 92/01/15 21:53:02";
  48
  49 /* C library */
  50
  51 #include <stdio.h>
  52
  53 extern char *strcpy();
  54 extern long time();
  55 extern char *ctime();
  56
  57 /* Application-specific stuff */
  58
  59 #include "error.h"
  60 #include "vstring.h"
  61 #include "token.h"
  62 #include "symbol.h"
  63
  64 static struct token *tok_list();
  65 static void tok_list_struct();
  66 static void tok_list_append();
  67 static void tok_strcat();
  68 static void tok_time();
  69 static void tok_date();
  70 static void tok_space_append();
  71
  72 #if defined(MAP_VOID_STAR) || defined(MAP_VOID)
  73 static void tok_void();                 /* rewrite void keyword */
  74 #endif
  75
  76 static struct token *tok_buf = 0;       /* token push-back storage */
  77
  78 /* TOK_PREPEND - add token to LIFO queue, return head */
  79
  80 #define TOK_PREPEND(list,t) (t->next = list, list = t)
  81
  82 /* tok_space_append - append trailing space except at start of or after list */
  83
  84 static void tok_space_append(list, t)
  85 register struct token *list;
  86 register struct token *t;
  87 {
  88
  89     /*
  90      * The head/tail fields of a token do triple duty. They are used to keep
  91      * track of the members that make up a (list); to keep track of the
  92      * non-blank tokens that make up one list member; and, finally, to tack
  93      * whitespace and comment tokens onto the non-blank tokens that make up
  94      * one list member.
  95      *
  96      * Within a (list), white space and comment tokens are always tacked onto
  97      * the non-blank tokens to avoid parsing complications later on. For this
  98      * reason, blanks and comments at the beginning of a list member are
  99      * discarded because there is no token to tack them onto. (Well, we could
 100      * start each list member with a dummy token, but that would mess up the
 101      * whole unprototyper).
 102      *
 103      * Blanks or comments that follow a (list) are discarded, because the
 104      * head/tail fields of a (list) are already being used for other
 105      * purposes.
 106      *
 107      * Newlines within a (list) are discarded because they can mess up the
 108      * output when we rewrite function headers. The output routines will
 109      * regenerate discarded newlines, anyway.
 110      */
 111
 112     if (list == 0 || list->tokno == TOK_LIST) {
 113         tok_free(t);
 114     } else {
 115         tok_list_append(list, t);
 116     }
 117 }
 118
 119 /* tok_class - discriminate single tokens, keywords, and composite tokens */
 120
 121 struct token *tok_class()
 122 {
 123     register struct token *t;
 124     register struct symbol *s;
 125
 126     /*
 127      * Use push-back token, if available. Push-back tokens are already
 128      * canonical and can be passed on to the caller without further
 129      * inspection.
 130      */
 131
 132     if (t = tok_buf) {
 133         tok_buf = t->next;
 134         t->next = 0;
 135         return (t);
 136     }
 137     /* Read a new token and canonicalize it. */
 138
 139     if (t = tok_get()) {
 140         switch (t->tokno) {
 141         case '(':                               /* beginning of list */
 142             t = tok_list(t);
 143             break;
 144         case TOK_WORD:                          /* look up keyword */
 145             if ((s = sym_find(t->vstr->str))) {
 146                 switch (s->type) {
 147                 case TOK_TIME:                  /* map __TIME__ to string */
 148                     tok_time(t);
 149                     tok_strcat(t);              /* look for more strings */
 150                     break;
 151                 case TOK_DATE:                  /* map __DATE__ to string */
 152                     tok_date(t);
 153                     tok_strcat(t);              /* look for more strings */
 154                     break;
 155 #if defined(MAP_VOID_STAR) || defined(MAP_VOID)
 156                 case TOK_VOID:                  /* optionally map void types */
 157                     tok_void(t);
 158                     break;
 159 #endif
 160                 default:                        /* other keyword */
 161                     t->tokno = s->type;
 162                     break;
 163                 }
 164             }
 165             break;
 166         case '"':                               /* string, look for more */
 167             tok_strcat(t);
 168             break;
 169         }
 170     }
 171     return (t);
 172 }
 173
 174 /* tok_list - collect ()-delimited, comma-separated list of tokens */
 175
 176 static struct token *tok_list(t)
 177 struct token *t;
 178 {
 179     register struct token *list = tok_alloc();
 180     char   *filename;
 181     int     lineno;
 182
 183     /* Save context of '(' for diagnostics. */
 184
 185     filename = t->path;
 186     lineno = t->line;
 187
 188     list->tokno = TOK_LIST;
 189     list->head = list->tail = t;
 190     list->path = t->path;
 191     list->line = t->line;
 192 #ifdef DEBUG
 193     strcpy(list->vstr->str, "LIST");
 194 #endif
 195
 196     /*
 197      * Read until the matching ')' is found, accounting for structured stuff
 198      * (enclosed by '{' and '}' tokens). Break the list up at each ',' token,
 199      * and try to preserve as much whitespace as possible. Newlines are
 200      * discarded so that they will not mess up the layout when we rewrite
 201      * argument lists. The output routines will regenerate discarded
 202      * newlines.
 203      */
 204
 205     while (t = tok_class()) {                   /* skip blanks */
 206         switch (t->tokno) {
 207         case ')':                               /* end of list */
 208             tok_list_append(list, t);
 209             return (list);
 210         case '{':                               /* struct/union type */
 211             tok_list_struct(list->tail, t);
 212             break;
 213         case TOK_WSPACE:                        /* preserve trailing blanks */
 214             tok_space_append(list->tail->tail, t);      /* except after list */
 215             break;
 216         case '\n':                              /* fix newlines later */
 217             tok_free(t);
 218             break;
 219         case ',':                               /* list separator */
 220             tok_list_append(list, t);
 221             break;
 222         default:                                /* other */
 223             tok_list_append(list->tail, t);
 224             break;
 225         }
 226     }
 227     error_where(filename, lineno, "unmatched '('");
 228     return (list);                              /* do not waste any data */
 229 }
 230
 231 /* tok_list_struct - collect structured type info within list */
 232
 233 static void tok_list_struct(list, t)
 234 register struct token *list;
 235 register struct token *t;
 236 {
 237     char   *filename;
 238     int     lineno;
 239
 240     /*
 241      * Save context of '{' for diagnostics. This routine is called by the one
 242      * that collects list members. If the '}' is not found, the list
 243      * collector will not see the closing ')' either.
 244      */
 245
 246     filename = t->path;
 247     lineno = t->line;
 248
 249     tok_list_append(list, t);
 250
 251     /*
 252      * Collect tokens until the matching '}' is found. Try to preserve as
 253      * much whitespace as possible. Newlines are discarded so that they do
 254      * not interfere when rewriting argument lists. The output routines will
 255      * regenerate discarded newlines.
 256      */
 257
 258     while (t = tok_class()) {
 259         switch (t->tokno) {
 260         case TOK_WSPACE:                        /* preserve trailing blanks */
 261             tok_space_append(list->tail, t);    /* except after list */
 262             break;
 263         case '\n':                              /* fix newlines later */
 264             tok_free(t);
 265             break;
 266         case '{':                               /* recurse */
 267             tok_list_struct(list, t);
 268             break;
 269         case '}':                               /* done */
 270             tok_list_append(list, t);
 271             return;
 272         default:                                /* other */
 273             tok_list_append(list, t);
 274             break;
 275         }
 276     }
 277     error_where(filename, lineno, "unmatched '{'");
 278 }
 279
 280 /* tok_strcat - concatenate multiple string constants */
 281
 282 static void tok_strcat(t1)
 283 register struct token *t1;
 284 {
 285     register struct token *t2;
 286     register struct token *lookahead = 0;
 287
 288     /*
 289      * Read ahead past whitespace, comments and newlines. If we find a string
 290      * token, concatenate it with the previous one and push back the
 291      * intervening tokens (thus preserving as much information as possible).
 292      * If we find something else, push back all lookahead tokens.
 293      */
 294
 295 #define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; }
 296
 297     while (t2 = tok_class()) {
 298         switch (t2->tokno) {
 299         case TOK_WSPACE:                        /* read past comments/blanks */
 300         case '\n':                              /* read past newlines */
 301             TOK_PREPEND(lookahead, t2);
 302             break;
 303         case '"':                               /* concatenate string tokens */
 304             if (vs_strcpy(t1->vstr,
 305                           t1->vstr->str + strlen(t1->vstr->str) - 1,
 306                           t2->vstr->str + 1) == 0)
 307                 fatal("out of memory");
 308             tok_free(t2);
 309             PUSHBACK_AND_RETURN;
 310         default:                                /* something else, push back */
 311             tok_unget(t2);
 312             PUSHBACK_AND_RETURN;
 313         }
 314     }
 315     PUSHBACK_AND_RETURN;                        /* hit EOF */
 316 }
 317
 318 #if defined(MAP_VOID_STAR) || defined(MAP_VOID)
 319
 320 /* tok_void - support for compilers that have problems with "void" */
 321
 322 static void tok_void(t)
 323 register struct token *t;
 324 {
 325     register struct token *t2;
 326     register struct token *lookahead = 0;
 327
 328     /*
 329      * Look ahead beyond whitespace, comments and newlines until we see a '*'
 330      * token. If one is found, replace "void" by "char". If we find something
 331      * else, and if "void" should always be mapped, replace "void" by "int".
 332      * Always push back the lookahead tokens.
 333      *
 334      * XXX The code also replaces the (void) argument list; this must be
 335      * accounted for later on. The alternative would be to add (in unproto.c)
 336      * TOK_VOID cases all over the place and that would be too error-prone.
 337      */
 338
 339 #define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; }
 340
 341     while (t2 = tok_class()) {
 342         switch (TOK_PREPEND(lookahead, t2)->tokno) {
 343         case TOK_WSPACE:                        /* read past comments/blanks */
 344         case '\n':                              /* read past newline */
 345             break;
 346         case '*':                               /* "void *" -> "char *" */
 347             if (vs_strcpy(t->vstr, t->vstr->str, "char") == 0)
 348                 fatal("out of memory");
 349             PUSHBACK_AND_RETURN;
 350         default:
 351 #ifdef MAP_VOID                                 /* plain "void" -> "int" */
 352             if (vs_strcpy(t->vstr, t->vstr->str, "int") == 0)
 353                 fatal("out of memory");
 354 #endif
 355             PUSHBACK_AND_RETURN;
 356         }
 357     }
 358     PUSHBACK_AND_RETURN;                        /* hit EOF */
 359 }
 360
 361 #endif
 362
 363 /* tok_time - rewrite __TIME__ to "hh:mm:ss" string constant */
 364
 365 static void tok_time(t)
 366 struct token *t;
 367 {
 368     long    now;
 369     char   *cp;
 370     char    buf[BUFSIZ];
 371
 372     /*
 373      * Using sprintf() to select parts of a string is gross, but this should
 374      * be fast enough.
 375      */
 376
 377     (void) time(&now);
 378     cp = ctime(&now);
 379     sprintf(buf, "\"%.8s\"", cp + 11);
 380     if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0)
 381         fatal("out of memory");
 382     t->tokno = buf[0];
 383 }
 384
 385 /* tok_date - rewrite __DATE__ to "Mmm dd yyyy" string constant */
 386
 387 static void tok_date(t)
 388 struct token *t;
 389 {
 390     long    now;
 391     char   *cp;
 392     char    buf[BUFSIZ];
 393
 394     /*
 395      * Using sprintf() to select parts of a string is gross, but this should
 396      * be fast enough.
 397      */
 398
 399     (void) time(&now);
 400     cp = ctime(&now);
 401     sprintf(buf, "\"%.3s %.2s %.4s\"", cp + 4, cp + 8, cp + 20);
 402     if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0)
 403         fatal("out of memory");
 404     t->tokno = buf[0];
 405 }
 406
 407 /* tok_unget - push back one or more possibly composite tokens */
 408
 409 void    tok_unget(t)
 410 register struct token *t;
 411 {
 412     register struct token *next;
 413
 414     do {
 415         next = t->next;
 416         TOK_PREPEND(tok_buf, t);
 417     } while (t = next);
 418 }
 419
 420 /* tok_list_append - append data to list */
 421
 422 static void tok_list_append(h, t)
 423 struct token *h;
 424 struct token *t;
 425 {
 426     if (h->head == 0) {
 427         h->head = h->tail = t;
 428     } else {
 429         h->tail->next = t;
 430         h->tail = t;
 431     }
 432 }