git.sur5r.net Git - cc65/blob - src/cc65/scanner.c

   1 /*****************************************************************************/
   2 /*                                                                           */
   3 /*                                 scanner.c                                 */
   4 /*                                                                           */
   5 /*                      Source file line info structure                      */
   6 /*                                                                           */
   7 /*                                                                           */
   8 /*                                                                           */
   9 /* (C) 1998-2010, Ullrich von Bassewitz                                      */
  10 /*                Roemerstrasse 52                                           */
  11 /*                D-70794 Filderstadt                                        */
  12 /* EMail:         uz@cc65.org                                                */
  13 /*                                                                           */
  14 /*                                                                           */
  15 /* This software is provided 'as-is', without any expressed or implied       */
  16 /* warranty.  In no event will the authors be held liable for any damages    */
  17 /* arising from the use of this software.                                    */
  18 /*                                                                           */
  19 /* Permission is granted to anyone to use this software for any purpose,     */
  20 /* including commercial applications, and to alter it and redistribute it    */
  21 /* freely, subject to the following restrictions:                            */
  22 /*                                                                           */
  23 /* 1. The origin of this software must not be misrepresented; you must not   */
  24 /*    claim that you wrote the original software. If you use this software   */
  25 /*    in a product, an acknowledgment in the product documentation would be  */
  26 /*    appreciated but is not required.                                       */
  27 /* 2. Altered source versions must be plainly marked as such, and must not   */
  28 /*    be misrepresented as being the original software.                      */
  29 /* 3. This notice may not be removed or altered from any source              */
  30 /*    distribution.                                                          */
  31 /*                                                                           */
  32 /*****************************************************************************/
  33
  34
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <errno.h>
  40 #include <ctype.h>
  41 #include <math.h>
  42
  43 /* common */
  44 #include "chartype.h"
  45 #include "fp.h"
  46 #include "tgttrans.h"
  47
  48 /* cc65 */
  49 #include "datatype.h"
  50 #include "error.h"
  51 #include "function.h"
  52 #include "global.h"
  53 #include "hexval.h"
  54 #include "ident.h"
  55 #include "input.h"
  56 #include "litpool.h"
  57 #include "preproc.h"
  58 #include "scanner.h"
  59 #include "standard.h"
  60 #include "symtab.h"
  61
  62
  63
  64 /*****************************************************************************/
  65 /*                                   data                                    */
  66 /*****************************************************************************/
  67
  68
  69
  70 Token CurTok;           /* The current token */
  71 Token NextTok;          /* The next token */
  72
  73
  74
  75 /* Token types */
  76 enum {
  77     TT_C89      = 0x01 << STD_C89,      /* Token valid in C89 */
  78     TT_C99      = 0x01 << STD_C99,      /* Token valid in C99 */
  79     TT_CC65     = 0x01 << STD_CC65      /* Token valid in cc65 */
  80 };
  81
  82 /* Token table */
  83 static const struct Keyword {
  84     char*           Key;        /* Keyword name */
  85     unsigned char   Tok;        /* The token */
  86     unsigned char   Std;        /* Token supported in which standards? */
  87 } Keywords [] = {
  88     { "_Pragma",        TOK_PRAGMA,     TT_C89 | TT_C99 | TT_CC65  },   /* !! */
  89     { "__AX__",         TOK_AX,         TT_C89 | TT_C99 | TT_CC65  },
  90     { "__A__",          TOK_A,          TT_C89 | TT_C99 | TT_CC65  },
  91     { "__EAX__",        TOK_EAX,        TT_C89 | TT_C99 | TT_CC65  },
  92     { "__X__",          TOK_X,          TT_C89 | TT_C99 | TT_CC65  },
  93     { "__Y__",          TOK_Y,          TT_C89 | TT_C99 | TT_CC65  },
  94     { "__asm__",        TOK_ASM,        TT_C89 | TT_C99 | TT_CC65  },
  95     { "__attribute__",  TOK_ATTRIBUTE,  TT_C89 | TT_C99 | TT_CC65  },
  96     { "__cdecl__",      TOK_CDECL,      TT_C89 | TT_C99 | TT_CC65  },
  97     { "__far__",        TOK_FAR,        TT_C89 | TT_C99 | TT_CC65  },
  98     { "__fastcall__",   TOK_FASTCALL,   TT_C89 | TT_C99 | TT_CC65  },
  99     { "__inline__",     TOK_INLINE,     TT_C89 | TT_C99 | TT_CC65  },
 100     { "__near__",       TOK_NEAR,       TT_C89 | TT_C99 | TT_CC65  },
 101     { "asm",            TOK_ASM,                          TT_CC65  },
 102     { "auto",           TOK_AUTO,       TT_C89 | TT_C99 | TT_CC65  },
 103     { "break",          TOK_BREAK,      TT_C89 | TT_C99 | TT_CC65  },
 104     { "case",           TOK_CASE,       TT_C89 | TT_C99 | TT_CC65  },
 105     { "cdecl",          TOK_CDECL,                        TT_CC65  },
 106     { "char",           TOK_CHAR,       TT_C89 | TT_C99 | TT_CC65  },
 107     { "const",          TOK_CONST,      TT_C89 | TT_C99 | TT_CC65  },
 108     { "continue",       TOK_CONTINUE,   TT_C89 | TT_C99 | TT_CC65  },
 109     { "default",        TOK_DEFAULT,    TT_C89 | TT_C99 | TT_CC65  },
 110     { "do",             TOK_DO,         TT_C89 | TT_C99 | TT_CC65  },
 111     { "double",         TOK_DOUBLE,     TT_C89 | TT_C99 | TT_CC65  },
 112     { "else",           TOK_ELSE,       TT_C89 | TT_C99 | TT_CC65  },
 113     { "enum",           TOK_ENUM,       TT_C89 | TT_C99 | TT_CC65  },
 114     { "extern",         TOK_EXTERN,     TT_C89 | TT_C99 | TT_CC65  },
 115     { "far",            TOK_FAR,                          TT_CC65  },
 116     { "fastcall",       TOK_FASTCALL,                     TT_CC65  },
 117     { "float",          TOK_FLOAT,      TT_C89 | TT_C99 | TT_CC65  },
 118     { "for",            TOK_FOR,        TT_C89 | TT_C99 | TT_CC65  },
 119     { "goto",           TOK_GOTO,       TT_C89 | TT_C99 | TT_CC65  },
 120     { "if",             TOK_IF,         TT_C89 | TT_C99 | TT_CC65  },
 121     { "inline",         TOK_INLINE,              TT_C99 | TT_CC65  },
 122     { "int",            TOK_INT,        TT_C89 | TT_C99 | TT_CC65  },
 123     { "long",           TOK_LONG,       TT_C89 | TT_C99 | TT_CC65  },
 124     { "near",           TOK_NEAR,                         TT_CC65  },
 125     { "register",       TOK_REGISTER,   TT_C89 | TT_C99 | TT_CC65  },
 126     { "restrict",       TOK_RESTRICT,            TT_C99 | TT_CC65  },
 127     { "return",         TOK_RETURN,     TT_C89 | TT_C99 | TT_CC65  },
 128     { "short",          TOK_SHORT,      TT_C89 | TT_C99 | TT_CC65  },
 129     { "signed",         TOK_SIGNED,     TT_C89 | TT_C99 | TT_CC65  },
 130     { "sizeof",         TOK_SIZEOF,     TT_C89 | TT_C99 | TT_CC65  },
 131     { "static",         TOK_STATIC,     TT_C89 | TT_C99 | TT_CC65  },
 132     { "struct",         TOK_STRUCT,     TT_C89 | TT_C99 | TT_CC65  },
 133     { "switch",         TOK_SWITCH,     TT_C89 | TT_C99 | TT_CC65  },
 134     { "typedef",        TOK_TYPEDEF,    TT_C89 | TT_C99 | TT_CC65  },
 135     { "union",          TOK_UNION,      TT_C89 | TT_C99 | TT_CC65  },
 136     { "unsigned",       TOK_UNSIGNED,   TT_C89 | TT_C99 | TT_CC65  },
 137     { "void",           TOK_VOID,       TT_C89 | TT_C99 | TT_CC65  },
 138     { "volatile",       TOK_VOLATILE,   TT_C89 | TT_C99 | TT_CC65  },
 139     { "while",          TOK_WHILE,      TT_C89 | TT_C99 | TT_CC65  },
 140 };
 141 #define KEY_COUNT       (sizeof (Keywords) / sizeof (Keywords [0]))
 142
 143
 144
 145 /* Stuff for determining the type of an integer constant */
 146 #define IT_INT          0x01
 147 #define IT_UINT         0x02
 148 #define IT_LONG         0x04
 149 #define IT_ULONG        0x08
 150
 151
 152
 153 /*****************************************************************************/
 154 /*                                   code                                    */
 155 /*****************************************************************************/
 156
 157
 158
 159 static int CmpKey (const void* Key, const void* Elem)
 160 /* Compare function for bsearch */
 161 {
 162     return strcmp ((const char*) Key, ((const struct Keyword*) Elem)->Key);
 163 }
 164
 165
 166
 167 static token_t FindKey (const char* Key)
 168 /* Find a keyword and return the token. Return IDENT if the token is not a
 169 ** keyword.
 170 */
 171 {
 172     struct Keyword* K;
 173     K = bsearch (Key, Keywords, KEY_COUNT, sizeof (Keywords [0]), CmpKey);
 174     if (K && (K->Std & (0x01 << IS_Get (&Standard))) != 0) {
 175         return K->Tok;
 176     } else {
 177         return TOK_IDENT;
 178     }
 179 }
 180
 181
 182
 183 static int SkipWhite (void)
 184 /* Skip white space in the input stream, reading and preprocessing new lines
 185 ** if necessary. Return 0 if end of file is reached, return 1 otherwise.
 186 */
 187 {
 188     while (1) {
 189         while (CurC == '\0') {
 190             if (NextLine () == 0) {
 191                 return 0;
 192             }
 193             Preprocess ();
 194         }
 195         if (IsSpace (CurC)) {
 196             NextChar ();
 197         } else {
 198             return 1;
 199         }
 200     }
 201 }
 202
 203
 204
 205 int TokIsFuncSpec (const Token* T)
 206 /* Return true if the token is a function specifier */
 207 {
 208     return (T->Tok == TOK_INLINE)   ||
 209            (T->Tok == TOK_FASTCALL) || (T->Tok == TOK_CDECL) ||
 210            (T->Tok == TOK_NEAR)     || (T->Tok == TOK_FAR);
 211 }
 212
 213
 214
 215 void SymName (char* S)
 216 /* Read a symbol from the input stream. The first character must have been
 217 ** checked before calling this function. The buffer is expected to be at
 218 ** least of size MAX_IDENTLEN+1.
 219 */
 220 {
 221     unsigned Len = 0;
 222     do {
 223         if (Len < MAX_IDENTLEN) {
 224             ++Len;
 225             *S++ = CurC;
 226         }
 227         NextChar ();
 228     } while (IsIdent (CurC) || IsDigit (CurC));
 229     *S = '\0';
 230 }
 231
 232
 233
 234 int IsSym (char* S)
 235 /* If a symbol follows, read it and return 1, otherwise return 0 */
 236 {
 237     if (IsIdent (CurC)) {
 238         SymName (S);
 239         return 1;
 240     } else {
 241         return 0;
 242     }
 243 }
 244
 245
 246
 247 static void UnknownChar (char C)
 248 /* Error message for unknown character */
 249 {
 250     Error ("Invalid input character with code %02X", C & 0xFF);
 251     NextChar ();                        /* Skip */
 252 }
 253
 254
 255
 256 static void SetTok (int tok)
 257 /* Set NextTok.Tok and bump line ptr */
 258 {
 259     NextTok.Tok = tok;
 260     NextChar ();
 261 }
 262
 263
 264
 265 static int ParseChar (void)
 266 /* Parse a character. Converts escape chars into character codes. */
 267 {
 268     int C;
 269     int HadError;
 270     int Count;
 271
 272     /* Check for escape chars */
 273     if (CurC == '\\') {
 274         NextChar ();
 275         switch (CurC) {
 276             case '?':
 277                 C = '\?';
 278                 break;
 279             case 'a':
 280                 C = '\a';
 281                 break;
 282             case 'b':
 283                 C = '\b';
 284                 break;
 285             case 'f':
 286                 C = '\f';
 287                 break;
 288             case 'r':
 289                 C = '\r';
 290                 break;
 291             case 'n':
 292                 C = '\n';
 293                 break;
 294             case 't':
 295                 C = '\t';
 296                 break;
 297             case 'v':
 298                 C = '\v';
 299                 break;
 300             case '\"':
 301                 C = '\"';
 302                 break;
 303             case '\'':
 304                 C = '\'';
 305                 break;
 306             case '\\':
 307                 C = '\\';
 308                 break;
 309             case 'x':
 310             case 'X':
 311                 /* Hex character constant */
 312                 if (!IsXDigit (NextC)) {
 313                     Error ("\\x used with no following hex digits");
 314                     C = ' ';
 315                 } else {
 316                     HadError = 0;
 317                     C = 0;
 318                     while (IsXDigit (NextC)) {
 319                         if ((C << 4) >= 256) {
 320                             if (!HadError) {
 321                                 Error ("Hex character constant out of range");
 322                                 HadError = 1;
 323                             }
 324                         } else {
 325                             C = (C << 4) | HexVal (NextC);
 326                         }
 327                         NextChar ();
 328                     }
 329                 }
 330                 break;
 331             case '0':
 332             case '1':
 333             case '2':
 334             case '3':
 335             case '4':
 336             case '5':
 337             case '6':
 338             case '7':
 339                 /* Octal constant */
 340                 Count = 1;
 341                 C = HexVal (CurC);
 342                 while (IsODigit (NextC) && Count++ < 3) {
 343                     C = (C << 3) | HexVal (NextC);
 344                     NextChar ();
 345                 }
 346                 if (C >= 256)
 347                     Error ("Octal character constant out of range");
 348                 break;
 349             default:
 350                 C = CurC;
 351                 Error ("Illegal escaped character: 0x%02X", CurC);
 352                 break;
 353         }
 354     } else {
 355         C = CurC;
 356     }
 357
 358     /* Skip the character read */
 359     NextChar ();
 360
 361     /* Do correct sign extension */
 362     return SignExtendChar (C);
 363 }
 364
 365
 366
 367 static void CharConst (void)
 368 /* Parse a character constant. */
 369 {
 370     int C;
 371
 372     /* Skip the quote */
 373     NextChar ();
 374
 375     /* Get character */
 376     C = ParseChar ();
 377
 378     /* Check for closing quote */
 379     if (CurC != '\'') {
 380         Error ("`\'' expected");
 381     } else {
 382         /* Skip the quote */
 383         NextChar ();
 384     }
 385
 386     /* Setup values and attributes */
 387     NextTok.Tok  = TOK_CCONST;
 388
 389     /* Translate into target charset */
 390     NextTok.IVal = SignExtendChar (TgtTranslateChar (C));
 391
 392     /* Character constants have type int */
 393     NextTok.Type = type_int;
 394 }
 395
 396
 397
 398 static void StringConst (void)
 399 /* Parse a quoted string */
 400 {
 401     /* String buffer */
 402     StrBuf S = AUTO_STRBUF_INITIALIZER;
 403
 404     /* Assume next token is a string constant */
 405     NextTok.Tok  = TOK_SCONST;
 406
 407     /* Concatenate strings. If at least one of the concenated strings is a wide
 408     ** character literal, the whole string is a wide char literal, otherwise
 409     ** it's a normal string literal.
 410     */
 411     while (1) {
 412
 413         /* Check if this is a normal or a wide char string */
 414         if (CurC == 'L' && NextC == '\"') {
 415             /* Wide character literal */
 416             NextTok.Tok = TOK_WCSCONST;
 417             NextChar ();
 418             NextChar ();
 419         } else if (CurC == '\"') {
 420             /* Skip the quote char */
 421             NextChar ();
 422         } else {
 423             /* No string */
 424             break;
 425         }
 426
 427         /* Read until end of string */
 428         while (CurC != '\"') {
 429             if (CurC == '\0') {
 430                 Error ("Unexpected newline");
 431                 break;
 432             }
 433             SB_AppendChar (&S, ParseChar ());
 434         }
 435
 436         /* Skip closing quote char if there was one */
 437         NextChar ();
 438
 439         /* Skip white space, read new input */
 440         SkipWhite ();
 441
 442     }
 443
 444     /* Terminate the string */
 445     SB_AppendChar (&S, '\0');
 446
 447     /* Add the whole string to the literal pool */
 448     NextTok.SVal = AddLiteralStr (&S);
 449
 450     /* Free the buffer */
 451     SB_Done (&S);
 452 }
 453
 454
 455
 456 static void NumericConst (void)
 457 /* Parse a numeric constant */
 458 {
 459     unsigned Base;              /* Temporary number base */
 460     unsigned Prefix;            /* Base according to prefix */
 461     StrBuf   S = STATIC_STRBUF_INITIALIZER;
 462     int      IsFloat;
 463     char     C;
 464     unsigned DigitVal;
 465     unsigned long IVal;         /* Value */
 466
 467     /* Check for a leading hex or octal prefix and determine the possible
 468     ** integer types.
 469     */
 470     if (CurC == '0') {
 471         /* Gobble 0 and examine next char */
 472         NextChar ();
 473         if (toupper (CurC) == 'X') {
 474             Base = Prefix = 16;
 475             NextChar ();        /* gobble "x" */
 476         } else {
 477             Base = 10;          /* Assume 10 for now - see below */
 478             Prefix = 8;         /* Actual prefix says octal */
 479         }
 480     } else {
 481         Base  = Prefix = 10;
 482     }
 483
 484     /* Because floating point numbers don't have octal prefixes (a number
 485     ** with a leading zero is decimal), we first have to read the number
 486     ** before converting it, so we can determine if it's a float or an
 487     ** integer.
 488     */
 489     while (IsXDigit (CurC) && HexVal (CurC) < Base) {
 490         SB_AppendChar (&S, CurC);
 491         NextChar ();
 492     }
 493     SB_Terminate (&S);
 494
 495     /* The following character tells us if we have an integer or floating
 496     ** point constant. Note: Hexadecimal floating point constants aren't
 497     ** supported in C89.
 498     */
 499     IsFloat = (CurC == '.' ||
 500                (Base == 10 && toupper (CurC) == 'E') ||
 501                (Base == 16 && toupper (CurC) == 'P' && IS_Get (&Standard) >= STD_C99));
 502
 503     /* If we don't have a floating point type, an octal prefix results in an
 504     ** octal base.
 505     */
 506     if (!IsFloat && Prefix == 8) {
 507         Base = 8;
 508     }
 509
 510     /* Since we do now know the correct base, convert the remembered input
 511     ** into a number.
 512     */
 513     SB_Reset (&S);
 514     IVal = 0;
 515     while ((C = SB_Get (&S)) != '\0') {
 516         DigitVal = HexVal (C);
 517         if (DigitVal >= Base) {
 518             Error ("Numeric constant contains digits beyond the radix");
 519         }
 520         IVal = (IVal * Base) + DigitVal;
 521     }
 522
 523     /* We don't need the string buffer any longer */
 524     SB_Done (&S);
 525
 526     /* Distinguish between integer and floating point constants */
 527     if (!IsFloat) {
 528
 529         unsigned Types;
 530         int      HaveSuffix;
 531
 532         /* Check for a suffix and determine the possible types */
 533         HaveSuffix = 1;
 534         if (toupper (CurC) == 'U') {
 535             /* Unsigned type */
 536             NextChar ();
 537             if (toupper (CurC) != 'L') {
 538                 Types = IT_UINT | IT_ULONG;
 539             } else {
 540                 NextChar ();
 541                 Types = IT_ULONG;
 542             }
 543         } else if (toupper (CurC) == 'L') {
 544             /* Long type */
 545             NextChar ();
 546             if (toupper (CurC) != 'U') {
 547                 Types = IT_LONG | IT_ULONG;
 548             } else {
 549                 NextChar ();
 550                 Types = IT_ULONG;
 551             }
 552         } else {
 553             HaveSuffix = 0;
 554             if (Prefix == 10) {
 555                 /* Decimal constants are of any type but uint */
 556                 Types = IT_INT | IT_LONG | IT_ULONG;
 557             } else {
 558                 /* Octal or hex constants are of any type */
 559                 Types = IT_INT | IT_UINT | IT_LONG | IT_ULONG;
 560             }
 561         }
 562
 563         /* Check the range to determine the type */
 564         if (IVal > 0x7FFF) {
 565             /* Out of range for int */
 566             Types &= ~IT_INT;
 567             /* If the value is in the range 0x8000..0xFFFF, unsigned int is not
 568             ** allowed, and we don't have a type specifying suffix, emit a
 569             ** warning, because the constant is of type long.
 570             */
 571             if (IVal <= 0xFFFF && (Types & IT_UINT) == 0 && !HaveSuffix) {
 572                 Warning ("Constant is long");
 573             }
 574         }
 575         if (IVal > 0xFFFF) {
 576             /* Out of range for unsigned int */
 577             Types &= ~IT_UINT;
 578         }
 579         if (IVal > 0x7FFFFFFF) {
 580             /* Out of range for long int */
 581             Types &= ~IT_LONG;
 582         }
 583
 584         /* Now set the type string to the smallest type in types */
 585         if (Types & IT_INT) {
 586             NextTok.Type = type_int;
 587         } else if (Types & IT_UINT) {
 588             NextTok.Type = type_uint;
 589         } else if (Types & IT_LONG) {
 590             NextTok.Type = type_long;
 591         } else {
 592             NextTok.Type = type_ulong;
 593         }
 594
 595         /* Set the value and the token */
 596         NextTok.IVal = IVal;
 597         NextTok.Tok  = TOK_ICONST;
 598
 599     } else {
 600
 601         /* Float constant */
 602         Double FVal = FP_D_FromInt (IVal);      /* Convert to double */
 603
 604         /* Check for a fractional part and read it */
 605         if (CurC == '.') {
 606
 607             Double Scale;
 608
 609             /* Skip the dot */
 610             NextChar ();
 611
 612             /* Read fractional digits */
 613             Scale  = FP_D_Make (1.0);
 614             while (IsXDigit (CurC) && (DigitVal = HexVal (CurC)) < Base) {
 615                 /* Get the value of this digit */
 616                 Double FracVal = FP_D_Div (FP_D_FromInt (DigitVal * Base), Scale);
 617                 /* Add it to the float value */
 618                 FVal = FP_D_Add (FVal, FracVal);
 619                 /* Scale base */
 620                 Scale = FP_D_Mul (Scale, FP_D_FromInt (DigitVal));
 621                 /* Skip the digit */
 622                 NextChar ();
 623             }
 624         }
 625
 626         /* Check for an exponent and read it */
 627         if ((Base == 16 && toupper (CurC) == 'F') ||
 628             (Base == 10 && toupper (CurC) == 'E')) {
 629
 630             unsigned Digits;
 631             unsigned Exp;
 632
 633             /* Skip the exponent notifier */
 634             NextChar ();
 635
 636             /* Read an optional sign */
 637             if (CurC == '-') {
 638                 NextChar ();
 639             } else if (CurC == '+') {
 640                 NextChar ();
 641             }
 642
 643             /* Read exponent digits. Since we support only 32 bit floats
 644             ** with a maximum exponent of +-/127, we read the exponent
 645             ** part as integer with up to 3 digits and drop the remainder.
 646             ** This avoids an overflow of Exp. The exponent is always
 647             ** decimal, even for hex float consts.
 648             */
 649             Digits = 0;
 650             Exp    = 0;
 651             while (IsDigit (CurC)) {
 652                 if (++Digits <= 3) {
 653                     Exp = Exp * 10 + HexVal (CurC);
 654                 }
 655                 NextChar ();
 656             }
 657
 658             /* Check for errors: We must have exponent digits, and not more
 659             ** than three.
 660             */
 661             if (Digits == 0) {
 662                 Error ("Floating constant exponent has no digits");
 663             } else if (Digits > 3) {
 664                 Warning ("Floating constant exponent is too large");
 665             }
 666
 667             /* Scale the exponent and adjust the value accordingly */
 668             if (Exp) {
 669                 FVal = FP_D_Mul (FVal, FP_D_Make (pow (10, Exp)));
 670             }
 671         }
 672
 673         /* Check for a suffix and determine the type of the constant */
 674         if (toupper (CurC) == 'F') {
 675             NextChar ();
 676             NextTok.Type = type_float;
 677         } else {
 678             NextTok.Type = type_double;
 679         }
 680
 681         /* Set the value and the token */
 682         NextTok.FVal = FVal;
 683         NextTok.Tok  = TOK_FCONST;
 684
 685     }
 686 }
 687
 688
 689
 690 void NextToken (void)
 691 /* Get next token from input stream */
 692 {
 693     ident token;
 694
 695     /* We have to skip white space here before shifting tokens, since the
 696     ** tokens and the current line info is invalid at startup and will get
 697     ** initialized by reading the first time from the file. Remember if
 698     ** we were at end of input and handle that later.
 699     */
 700     int GotEOF = (SkipWhite() == 0);
 701
 702     /* Current token is the lookahead token */
 703     if (CurTok.LI) {
 704         ReleaseLineInfo (CurTok.LI);
 705     }
 706     CurTok = NextTok;
 707
 708     /* When reading the first time from the file, the line info in NextTok,
 709     ** which was copied to CurTok is invalid. Since the information from
 710     ** the token is used for error messages, we must make it valid.
 711     */
 712     if (CurTok.LI == 0) {
 713         CurTok.LI = UseLineInfo (GetCurLineInfo ());
 714     }
 715
 716     /* Remember the starting position of the next token */
 717     NextTok.LI = UseLineInfo (GetCurLineInfo ());
 718
 719     /* Now handle end of input. */
 720     if (GotEOF) {
 721         /* End of file reached */
 722         NextTok.Tok = TOK_CEOF;
 723         return;
 724     }
 725
 726     /* Determine the next token from the lookahead */
 727     if (IsDigit (CurC) || (CurC == '.' && IsDigit (NextC))) {
 728         /* A number */
 729         NumericConst ();
 730         return;
 731     }
 732
 733     /* Check for wide character literals */
 734     if (CurC == 'L' && NextC == '\"') {
 735         StringConst ();
 736         return;
 737     }
 738
 739     /* Check for keywords and identifiers */
 740     if (IsSym (token)) {
 741
 742         /* Check for a keyword */
 743         if ((NextTok.Tok = FindKey (token)) != TOK_IDENT) {
 744             /* Reserved word found */
 745             return;
 746         }
 747         /* No reserved word, check for special symbols */
 748         if (token[0] == '_' && token[1] == '_') {
 749             /* Special symbols */
 750             if (strcmp (token+2, "FILE__") == 0) {
 751                 NextTok.SVal = AddLiteral (GetCurrentFile());
 752                 NextTok.Tok  = TOK_SCONST;
 753                 return;
 754             } else if (strcmp (token+2, "LINE__") == 0) {
 755                 NextTok.Tok  = TOK_ICONST;
 756                 NextTok.IVal = GetCurrentLine();
 757                 NextTok.Type = type_int;
 758                 return;
 759             } else if (strcmp (token+2, "func__") == 0) {
 760                 /* __func__ is only defined in functions */
 761                 if (CurrentFunc) {
 762                     NextTok.SVal = AddLiteral (F_GetFuncName (CurrentFunc));
 763                     NextTok.Tok  = TOK_SCONST;
 764                     return;
 765                 }
 766             }
 767         }
 768
 769         /* No reserved word but identifier */
 770         strcpy (NextTok.Ident, token);
 771         NextTok.Tok = TOK_IDENT;
 772         return;
 773     }
 774
 775     /* Monstrous switch statement ahead... */
 776     switch (CurC) {
 777
 778         case '!':
 779             NextChar ();
 780             if (CurC == '=') {
 781                 SetTok (TOK_NE);
 782             } else {
 783                 NextTok.Tok = TOK_BOOL_NOT;
 784             }
 785             break;
 786
 787         case '\"':
 788             StringConst ();
 789             break;
 790
 791         case '%':
 792             NextChar ();
 793             if (CurC == '=') {
 794                 SetTok (TOK_MOD_ASSIGN);
 795             } else {
 796                 NextTok.Tok = TOK_MOD;
 797             }
 798             break;
 799
 800         case '&':
 801             NextChar ();
 802             switch (CurC) {
 803                 case '&':
 804                     SetTok (TOK_BOOL_AND);
 805                     break;
 806                 case '=':
 807                     SetTok (TOK_AND_ASSIGN);
 808                     break;
 809                 default:
 810                     NextTok.Tok = TOK_AND;
 811             }
 812             break;
 813
 814         case '\'':
 815             CharConst ();
 816             break;
 817
 818         case '(':
 819             SetTok (TOK_LPAREN);
 820             break;
 821
 822         case ')':
 823             SetTok (TOK_RPAREN);
 824             break;
 825
 826         case '*':
 827             NextChar ();
 828             if (CurC == '=') {
 829                 SetTok (TOK_MUL_ASSIGN);
 830             } else {
 831                 NextTok.Tok = TOK_STAR;
 832             }
 833             break;
 834
 835         case '+':
 836             NextChar ();
 837             switch (CurC) {
 838                 case '+':
 839                     SetTok (TOK_INC);
 840                     break;
 841                 case '=':
 842                     SetTok (TOK_PLUS_ASSIGN);
 843                     break;
 844                 default:
 845                     NextTok.Tok = TOK_PLUS;
 846             }
 847             break;
 848
 849         case ',':
 850             SetTok (TOK_COMMA);
 851             break;
 852
 853         case '-':
 854             NextChar ();
 855             switch (CurC) {
 856                 case '-':
 857                     SetTok (TOK_DEC);
 858                     break;
 859                 case '=':
 860                     SetTok (TOK_MINUS_ASSIGN);
 861                     break;
 862                 case '>':
 863                     SetTok (TOK_PTR_REF);
 864                     break;
 865                 default:
 866                     NextTok.Tok = TOK_MINUS;
 867             }
 868             break;
 869
 870         case '.':
 871             NextChar ();
 872             if (CurC == '.') {
 873                 NextChar ();
 874                 if (CurC == '.') {
 875                     SetTok (TOK_ELLIPSIS);
 876                 } else {
 877                     UnknownChar (CurC);
 878                 }
 879             } else {
 880                 NextTok.Tok = TOK_DOT;
 881             }
 882             break;
 883
 884         case '/':
 885             NextChar ();
 886             if (CurC == '=') {
 887                 SetTok (TOK_DIV_ASSIGN);
 888             } else {
 889                 NextTok.Tok = TOK_DIV;
 890             }
 891             break;
 892
 893         case ':':
 894             SetTok (TOK_COLON);
 895             break;
 896
 897         case ';':
 898             SetTok (TOK_SEMI);
 899             break;
 900
 901         case '<':
 902             NextChar ();
 903             switch (CurC) {
 904                 case '=':
 905                     SetTok (TOK_LE);
 906                     break;
 907                 case '<':
 908                     NextChar ();
 909                     if (CurC == '=') {
 910                         SetTok (TOK_SHL_ASSIGN);
 911                     } else {
 912                         NextTok.Tok = TOK_SHL;
 913                     }
 914                     break;
 915                 default:
 916                     NextTok.Tok = TOK_LT;
 917             }
 918             break;
 919
 920         case '=':
 921             NextChar ();
 922             if (CurC == '=') {
 923                 SetTok (TOK_EQ);
 924             } else {
 925                 NextTok.Tok = TOK_ASSIGN;
 926             }
 927             break;
 928
 929         case '>':
 930             NextChar ();
 931             switch (CurC) {
 932                 case '=':
 933                     SetTok (TOK_GE);
 934                     break;
 935                 case '>':
 936                     NextChar ();
 937                     if (CurC == '=') {
 938                         SetTok (TOK_SHR_ASSIGN);
 939                     } else {
 940                         NextTok.Tok = TOK_SHR;
 941                     }
 942                     break;
 943                 default:
 944                     NextTok.Tok = TOK_GT;
 945             }
 946             break;
 947
 948         case '?':
 949             SetTok (TOK_QUEST);
 950             break;
 951
 952         case '[':
 953             SetTok (TOK_LBRACK);
 954             break;
 955
 956         case ']':
 957             SetTok (TOK_RBRACK);
 958             break;
 959
 960         case '^':
 961             NextChar ();
 962             if (CurC == '=') {
 963                 SetTok (TOK_XOR_ASSIGN);
 964             } else {
 965                 NextTok.Tok = TOK_XOR;
 966             }
 967             break;
 968
 969         case '{':
 970             SetTok (TOK_LCURLY);
 971             break;
 972
 973         case '|':
 974             NextChar ();
 975             switch (CurC) {
 976                 case '|':
 977                     SetTok (TOK_BOOL_OR);
 978                     break;
 979                 case '=':
 980                     SetTok (TOK_OR_ASSIGN);
 981                     break;
 982                 default:
 983                     NextTok.Tok = TOK_OR;
 984             }
 985             break;
 986
 987         case '}':
 988             SetTok (TOK_RCURLY);
 989             break;
 990
 991         case '~':
 992             SetTok (TOK_COMP);
 993             break;
 994
 995         default:
 996             UnknownChar (CurC);
 997
 998     }
 999
1000 }
1001
1002
1003
1004 void SkipTokens (const token_t* TokenList, unsigned TokenCount)
1005 /* Skip tokens until we reach TOK_CEOF or a token in the given token list.
1006 ** This routine is used for error recovery.
1007 */
1008 {
1009     while (CurTok.Tok != TOK_CEOF) {
1010
1011         /* Check if the current token is in the token list */
1012         unsigned I;
1013         for (I = 0; I < TokenCount; ++I) {
1014             if (CurTok.Tok == TokenList[I]) {
1015                 /* Found a token in the list */
1016                 return;
1017             }
1018         }
1019
1020         /* Not in the list: Skip it */
1021         NextToken ();
1022
1023     }
1024 }
1025
1026
1027
1028 int Consume (token_t Token, const char* ErrorMsg)
1029 /* Eat token if it is the next in the input stream, otherwise print an error
1030 ** message. Returns true if the token was found and false otherwise.
1031 */
1032 {
1033     if (CurTok.Tok == Token) {
1034         NextToken ();
1035         return 1;
1036     } else {
1037         Error ("%s", ErrorMsg);
1038         return 0;
1039     }
1040 }
1041
1042
1043
1044 int ConsumeColon (void)
1045 /* Check for a colon and skip it. */
1046 {
1047     return Consume (TOK_COLON, "`:' expected");
1048 }
1049
1050
1051
1052 int ConsumeSemi (void)
1053 /* Check for a semicolon and skip it. */
1054 {
1055     /* Try do be smart about typos... */
1056     if (CurTok.Tok == TOK_SEMI) {
1057         NextToken ();
1058         return 1;
1059     } else {
1060         Error ("`;' expected");
1061         if (CurTok.Tok == TOK_COLON || CurTok.Tok == TOK_COMMA) {
1062             NextToken ();
1063         }
1064         return 0;
1065     }
1066 }
1067
1068
1069
1070 int ConsumeComma (void)
1071 /* Check for a comma and skip it. */
1072 {
1073     /* Try do be smart about typos... */
1074     if (CurTok.Tok == TOK_COMMA) {
1075         NextToken ();
1076         return 1;
1077     } else {
1078         Error ("`,' expected");
1079         if (CurTok.Tok == TOK_SEMI) {
1080             NextToken ();
1081         }
1082         return 0;
1083     }
1084 }
1085
1086
1087
1088 int ConsumeLParen (void)
1089 /* Check for a left parenthesis and skip it */
1090 {
1091     return Consume (TOK_LPAREN, "`(' expected");
1092 }
1093
1094
1095
1096 int ConsumeRParen (void)
1097 /* Check for a right parenthesis and skip it */
1098 {
1099     return Consume (TOK_RPAREN, "`)' expected");
1100 }
1101
1102
1103
1104 int ConsumeLBrack (void)
1105 /* Check for a left bracket and skip it */
1106 {
1107     return Consume (TOK_LBRACK, "`[' expected");
1108 }
1109
1110
1111
1112 int ConsumeRBrack (void)
1113 /* Check for a right bracket and skip it */
1114 {
1115     return Consume (TOK_RBRACK, "`]' expected");
1116 }
1117
1118
1119
1120 int ConsumeLCurly (void)
1121 /* Check for a left curly brace and skip it */
1122 {
1123     return Consume (TOK_LCURLY, "`{' expected");
1124 }
1125
1126
1127
1128 int ConsumeRCurly (void)
1129 /* Check for a right curly brace and skip it */
1130 {
1131     return Consume (TOK_RCURLY, "`}' expected");
1132 }