git.sur5r.net Git - cc65/blob - src/cc65/scanner.c

   1 /*****************************************************************************/
   2 /*                                                                           */
   3 /*                                 scanner.c                                 */
   4 /*                                                                           */
   5 /*                      Source file line info structure                      */
   6 /*                                                                           */
   7 /*                                                                           */
   8 /*                                                                           */
   9 /* (C) 1998-2009, Ullrich von Bassewitz                                      */
  10 /*                Roemerstrasse 52                                           */
  11 /*                D-70794 Filderstadt                                        */
  12 /* EMail:         uz@cc65.org                                                */
  13 /*                                                                           */
  14 /*                                                                           */
  15 /* This software is provided 'as-is', without any expressed or implied       */
  16 /* warranty.  In no event will the authors be held liable for any damages    */
  17 /* arising from the use of this software.                                    */
  18 /*                                                                           */
  19 /* Permission is granted to anyone to use this software for any purpose,     */
  20 /* including commercial applications, and to alter it and redistribute it    */
  21 /* freely, subject to the following restrictions:                            */
  22 /*                                                                           */
  23 /* 1. The origin of this software must not be misrepresented; you must not   */
  24 /*    claim that you wrote the original software. If you use this software   */
  25 /*    in a product, an acknowledgment in the product documentation would be  */
  26 /*    appreciated but is not required.                                       */
  27 /* 2. Altered source versions must be plainly marked as such, and must not   */
  28 /*    be misrepresented as being the original software.                      */
  29 /* 3. This notice may not be removed or altered from any source              */
  30 /*    distribution.                                                          */
  31 /*                                                                           */
  32 /*****************************************************************************/
  33
  34
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <errno.h>
  40 #include <ctype.h>
  41 #include <math.h>
  42
  43 /* common */
  44 #include "chartype.h"
  45 #include "fp.h"
  46 #include "tgttrans.h"
  47
  48 /* cc65 */
  49 #include "datatype.h"
  50 #include "error.h"
  51 #include "function.h"
  52 #include "global.h"
  53 #include "hexval.h"
  54 #include "ident.h"
  55 #include "input.h"
  56 #include "litpool.h"
  57 #include "preproc.h"
  58 #include "scanner.h"
  59 #include "standard.h"
  60 #include "symtab.h"
  61
  62
  63
  64 /*****************************************************************************/
  65 /*                                   data                                    */
  66 /*****************************************************************************/
  67
  68
  69
  70 Token CurTok;           /* The current token */
  71 Token NextTok;          /* The next token */
  72
  73
  74
  75 /* Token types */
  76 enum {
  77     TT_C89      = 0x01 << STD_C89,      /* Token valid in C89 */
  78     TT_C99      = 0x01 << STD_C99,      /* Token valid in C99 */
  79     TT_CC65     = 0x01 << STD_CC65      /* Token valid in cc65 */
  80 };
  81
  82 /* Token table */
  83 static const struct Keyword {
  84     char*           Key;        /* Keyword name */
  85     unsigned char   Tok;        /* The token */
  86     unsigned char   Std;        /* Token supported in which standards? */
  87 } Keywords [] = {
  88     { "_Pragma",        TOK_PRAGMA,     TT_C89 | TT_C99 | TT_CC65  },   /* !! */
  89     { "__AX__",         TOK_AX,         TT_C89 | TT_C99 | TT_CC65  },
  90     { "__A__",          TOK_A,          TT_C89 | TT_C99 | TT_CC65  },
  91     { "__EAX__",        TOK_EAX,        TT_C89 | TT_C99 | TT_CC65  },
  92     { "__X__",          TOK_X,          TT_C89 | TT_C99 | TT_CC65  },
  93     { "__Y__",          TOK_Y,          TT_C89 | TT_C99 | TT_CC65  },
  94     { "__asm__",        TOK_ASM,        TT_C89 | TT_C99 | TT_CC65  },
  95     { "__attribute__",  TOK_ATTRIBUTE,  TT_C89 | TT_C99 | TT_CC65  },
  96     { "__far__",        TOK_FAR,        TT_C89 | TT_C99 | TT_CC65  },
  97     { "__fastcall__",   TOK_FASTCALL,   TT_C89 | TT_C99 | TT_CC65  },
  98     { "__inline__",     TOK_INLINE,     TT_C89 | TT_C99 | TT_CC65  },
  99     { "__near__",       TOK_NEAR,       TT_C89 | TT_C99 | TT_CC65  },
 100     { "asm",            TOK_ASM,                          TT_CC65  },
 101     { "auto",           TOK_AUTO,       TT_C89 | TT_C99 | TT_CC65  },
 102     { "break",          TOK_BREAK,      TT_C89 | TT_C99 | TT_CC65  },
 103     { "case",           TOK_CASE,       TT_C89 | TT_C99 | TT_CC65  },
 104     { "char",           TOK_CHAR,       TT_C89 | TT_C99 | TT_CC65  },
 105     { "const",          TOK_CONST,      TT_C89 | TT_C99 | TT_CC65  },
 106     { "continue",       TOK_CONTINUE,   TT_C89 | TT_C99 | TT_CC65  },
 107     { "default",        TOK_DEFAULT,    TT_C89 | TT_C99 | TT_CC65  },
 108     { "do",             TOK_DO,         TT_C89 | TT_C99 | TT_CC65  },
 109     { "double",         TOK_DOUBLE,     TT_C89 | TT_C99 | TT_CC65  },
 110     { "else",           TOK_ELSE,       TT_C89 | TT_C99 | TT_CC65  },
 111     { "enum",           TOK_ENUM,       TT_C89 | TT_C99 | TT_CC65  },
 112     { "extern",         TOK_EXTERN,     TT_C89 | TT_C99 | TT_CC65  },
 113     { "far",            TOK_FAR,                          TT_CC65  },
 114     { "fastcall",       TOK_FASTCALL,                     TT_CC65  },
 115     { "float",          TOK_FLOAT,      TT_C89 | TT_C99 | TT_CC65  },
 116     { "for",            TOK_FOR,        TT_C89 | TT_C99 | TT_CC65  },
 117     { "goto",           TOK_GOTO,       TT_C89 | TT_C99 | TT_CC65  },
 118     { "if",             TOK_IF,         TT_C89 | TT_C99 | TT_CC65  },
 119     { "inline",         TOK_INLINE,              TT_C99 | TT_CC65  },
 120     { "int",            TOK_INT,        TT_C89 | TT_C99 | TT_CC65  },
 121     { "long",           TOK_LONG,       TT_C89 | TT_C99 | TT_CC65  },
 122     { "near",           TOK_NEAR,                         TT_CC65  },
 123     { "register",       TOK_REGISTER,   TT_C89 | TT_C99 | TT_CC65  },
 124     { "restrict",       TOK_RESTRICT,            TT_C99 | TT_CC65  },
 125     { "return",         TOK_RETURN,     TT_C89 | TT_C99 | TT_CC65  },
 126     { "short",          TOK_SHORT,      TT_C89 | TT_C99 | TT_CC65  },
 127     { "signed",         TOK_SIGNED,     TT_C89 | TT_C99 | TT_CC65  },
 128     { "sizeof",         TOK_SIZEOF,     TT_C89 | TT_C99 | TT_CC65  },
 129     { "static",         TOK_STATIC,     TT_C89 | TT_C99 | TT_CC65  },
 130     { "struct",         TOK_STRUCT,     TT_C89 | TT_C99 | TT_CC65  },
 131     { "switch",         TOK_SWITCH,     TT_C89 | TT_C99 | TT_CC65  },
 132     { "typedef",        TOK_TYPEDEF,    TT_C89 | TT_C99 | TT_CC65  },
 133     { "union",          TOK_UNION,      TT_C89 | TT_C99 | TT_CC65  },
 134     { "unsigned",       TOK_UNSIGNED,   TT_C89 | TT_C99 | TT_CC65  },
 135     { "void",           TOK_VOID,       TT_C89 | TT_C99 | TT_CC65  },
 136     { "volatile",       TOK_VOLATILE,   TT_C89 | TT_C99 | TT_CC65  },
 137     { "while",          TOK_WHILE,      TT_C89 | TT_C99 | TT_CC65  },
 138 };
 139 #define KEY_COUNT       (sizeof (Keywords) / sizeof (Keywords [0]))
 140
 141
 142
 143 /* Stuff for determining the type of an integer constant */
 144 #define IT_INT          0x01
 145 #define IT_UINT         0x02
 146 #define IT_LONG         0x04
 147 #define IT_ULONG        0x08
 148
 149
 150
 151 /*****************************************************************************/
 152 /*                                   code                                    */
 153 /*****************************************************************************/
 154
 155
 156
 157 static int CmpKey (const void* Key, const void* Elem)
 158 /* Compare function for bsearch */
 159 {
 160     return strcmp ((const char*) Key, ((const struct Keyword*) Elem)->Key);
 161 }
 162
 163
 164
 165 static token_t FindKey (const char* Key)
 166 /* Find a keyword and return the token. Return IDENT if the token is not a
 167  * keyword.
 168  */
 169 {
 170     struct Keyword* K;
 171     K = bsearch (Key, Keywords, KEY_COUNT, sizeof (Keywords [0]), CmpKey);
 172     if (K && (K->Std & (0x01 << IS_Get (&Standard))) != 0) {
 173         return K->Tok;
 174     } else {
 175         return TOK_IDENT;
 176     }
 177 }
 178
 179
 180
 181 static int SkipWhite (void)
 182 /* Skip white space in the input stream, reading and preprocessing new lines
 183  * if necessary. Return 0 if end of file is reached, return 1 otherwise.
 184  */
 185 {
 186     while (1) {
 187         while (CurC == '\0') {
 188             if (NextLine () == 0) {
 189                 return 0;
 190             }
 191             Preprocess ();
 192         }
 193         if (IsSpace (CurC)) {
 194             NextChar ();
 195         } else {
 196             return 1;
 197         }
 198     }
 199 }
 200
 201
 202
 203 int TokIsFuncSpec (const Token* T)
 204 /* Return true if the token is a function specifier */
 205 {
 206     return (T->Tok == TOK_INLINE) || (T->Tok == TOK_FASTCALL) ||
 207            (T->Tok == TOK_NEAR)   || (T->Tok == TOK_FAR);
 208 }
 209
 210
 211
 212 void SymName (char* S)
 213 /* Read a symbol from the input stream. The first character must have been
 214  * checked before calling this function. The buffer is expected to be at
 215  * least of size MAX_IDENTLEN+1.
 216  */
 217 {
 218     unsigned Len = 0;
 219     do {
 220         if (Len < MAX_IDENTLEN) {
 221             ++Len;
 222             *S++ = CurC;
 223         }
 224         NextChar ();
 225     } while (IsIdent (CurC) || IsDigit (CurC));
 226     *S = '\0';
 227 }
 228
 229
 230
 231 int IsSym (char* S)
 232 /* If a symbol follows, read it and return 1, otherwise return 0 */
 233 {
 234     if (IsIdent (CurC)) {
 235         SymName (S);
 236         return 1;
 237     } else {
 238         return 0;
 239     }
 240 }
 241
 242
 243
 244 static void UnknownChar (char C)
 245 /* Error message for unknown character */
 246 {
 247     Error ("Invalid input character with code %02X", C & 0xFF);
 248     NextChar ();                        /* Skip */
 249 }
 250
 251
 252
 253 static void SetTok (int tok)
 254 /* Set NextTok.Tok and bump line ptr */
 255 {
 256     NextTok.Tok = tok;
 257     NextChar ();
 258 }
 259
 260
 261
 262 static int ParseChar (void)
 263 /* Parse a character. Converts escape chars into character codes. */
 264 {
 265     int C;
 266     int HadError;
 267
 268     /* Check for escape chars */
 269     if (CurC == '\\') {
 270         NextChar ();
 271         switch (CurC) {
 272             case '?':
 273                 C = '\?';
 274                 break;
 275             case 'a':
 276                 C = '\a';
 277                 break;
 278             case 'b':
 279                 C = '\b';
 280                 break;
 281             case 'f':
 282                 C = '\f';
 283                 break;
 284             case 'r':
 285                 C = '\r';
 286                 break;
 287             case 'n':
 288                 C = '\n';
 289                 break;
 290             case 't':
 291                 C = '\t';
 292                 break;
 293             case 'v':
 294                 C = '\v';
 295                 break;
 296             case '\"':
 297                 C = '\"';
 298                 break;
 299             case '\'':
 300                 C = '\'';
 301                 break;
 302             case '\\':
 303                 C = '\\';
 304                 break;
 305             case 'x':
 306             case 'X':
 307                 /* Hex character constant */
 308                 if (!IsXDigit (NextC)) {
 309                     Error ("\\x used with no following hex digits");
 310                     C = ' ';
 311                 } else {
 312                     HadError = 0;
 313                     C = 0;
 314                     while (IsXDigit (NextC)) {
 315                         if ((C << 4) >= 256) {
 316                             if (!HadError) {
 317                                 Error ("Hex character constant out of range");
 318                                 HadError = 1;
 319                             }
 320                         } else {
 321                             C = (C << 4) | HexVal (NextC);
 322                         }
 323                         NextChar ();
 324                     }
 325                 }
 326                 break;
 327             case '0':
 328             case '1':
 329             case '2':
 330             case '3':
 331             case '4':
 332             case '5':
 333             case '6':
 334             case '7':
 335                 /* Octal constant */
 336                 HadError = 0;
 337                 C = HexVal (CurC);
 338                 while (IsODigit (NextC)) {
 339                     if ((C << 3) >= 256) {
 340                         if (!HadError) {
 341                             Error ("Octal character constant out of range");
 342                             HadError = 1;
 343                         }
 344                     } else {
 345                         C = (C << 3) | HexVal (NextC);
 346                     }
 347                     NextChar ();
 348                 }
 349                 break;
 350             default:
 351                 Error ("Illegal character constant");
 352                 C = ' ';
 353                 /* Try to do error recovery, otherwise the compiler will spit
 354                  * out thousands of errors in this place and abort.
 355                  */
 356                 if (CurC != '\'' && CurC != '\0') {
 357                     while (NextC != '\'' && NextC != '\"' && NextC != '\0') {
 358                         NextChar ();
 359                     }
 360                 }
 361                 break;
 362         }
 363     } else {
 364         C = CurC;
 365     }
 366
 367     /* Skip the character read */
 368     NextChar ();
 369
 370     /* Do correct sign extension */
 371     return SignExtendChar (C);
 372 }
 373
 374
 375
 376 static void CharConst (void)
 377 /* Parse a character constant. */
 378 {
 379     int C;
 380
 381     /* Skip the quote */
 382     NextChar ();
 383
 384     /* Get character */
 385     C = ParseChar ();
 386
 387     /* Check for closing quote */
 388     if (CurC != '\'') {
 389         Error ("`\'' expected");
 390     } else {
 391         /* Skip the quote */
 392         NextChar ();
 393     }
 394
 395     /* Setup values and attributes */
 396     NextTok.Tok  = TOK_CCONST;
 397
 398     /* Translate into target charset */
 399     NextTok.IVal = SignExtendChar (TgtTranslateChar (C));
 400
 401     /* Character constants have type int */
 402     NextTok.Type = type_int;
 403 }
 404
 405
 406
 407 static void StringConst (void)
 408 /* Parse a quoted string */
 409 {
 410     /* String buffer */
 411     StrBuf S = AUTO_STRBUF_INITIALIZER;
 412
 413     /* Concatenate strings. If at least one of the concenated strings is a wide
 414      * character literal, the whole string is a wide char literal, otherwise
 415      * it's a normal string literal.
 416      */
 417     while (1) {
 418
 419         /* Check if this is a normal or a wide char string */
 420         if (CurC == 'L' && NextC == '\"') {
 421             /* Wide character literal */
 422             NextTok.Tok = TOK_WCSCONST;
 423             NextChar ();
 424             NextChar ();
 425         } else if (CurC == '\"') {
 426             /* Skip the quote char */
 427             NextChar ();
 428         } else {
 429             /* No string */
 430             break;
 431         }
 432
 433         /* Read until end of string */
 434         while (CurC != '\"') {
 435             if (CurC == '\0') {
 436                 Error ("Unexpected newline");
 437                 break;
 438             }
 439             SB_AppendChar (&S, ParseChar ());
 440         }
 441
 442         /* Skip closing quote char if there was one */
 443         NextChar ();
 444
 445         /* Skip white space, read new input */
 446         SkipWhite ();
 447
 448     }
 449
 450     /* Terminate the string */
 451     SB_AppendChar (&S, '\0');
 452
 453     /* Add the whole string to the literal pool */
 454     NextTok.IVal = AddLiteralStr (&S);
 455     NextTok.Tok  = TOK_SCONST;
 456
 457     /* Free the buffer */
 458     SB_Done (&S);
 459 }
 460
 461
 462
 463 static void NumericConst (void)
 464 /* Parse a numeric constant */
 465 {
 466     unsigned Base;              /* Temporary number base */
 467     unsigned Prefix;            /* Base according to prefix */
 468     StrBuf   S = STATIC_STRBUF_INITIALIZER;
 469     int      IsFloat;
 470     char     C;
 471     unsigned DigitVal;
 472     unsigned long IVal;         /* Value */
 473
 474     /* Check for a leading hex or octal prefix and determine the possible
 475      * integer types.
 476      */
 477     if (CurC == '0') {
 478         /* Gobble 0 and examine next char */
 479         NextChar ();
 480         if (toupper (CurC) == 'X') {
 481             Base = Prefix = 16;
 482             NextChar ();        /* gobble "x" */
 483         } else {
 484             Base = 10;          /* Assume 10 for now - see below */
 485             Prefix = 8;         /* Actual prefix says octal */
 486         }
 487     } else {
 488         Base  = Prefix = 10;
 489     }
 490
 491     /* Because floating point numbers don't have octal prefixes (a number
 492      * with a leading zero is decimal), we first have to read the number
 493      * before converting it, so we can determine if it's a float or an
 494      * integer.
 495      */
 496     while (IsXDigit (CurC) && HexVal (CurC) < Base) {
 497         SB_AppendChar (&S, CurC);
 498         NextChar ();
 499     }
 500     SB_Terminate (&S);
 501
 502     /* The following character tells us if we have an integer or floating
 503      * point constant. Note: Hexadecimal floating point constants aren't
 504      * supported in C89.
 505      */
 506     IsFloat = (CurC == '.' ||
 507                (Base == 10 && toupper (CurC) == 'E') ||
 508                (Base == 16 && toupper (CurC) == 'P' && IS_Get (&Standard) >= STD_C99));
 509
 510     /* If we don't have a floating point type, an octal prefix results in an
 511      * octal base.
 512      */
 513     if (!IsFloat && Prefix == 8) {
 514         Base = 8;
 515     }
 516
 517     /* Since we do now know the correct base, convert the remembered input
 518      * into a number.
 519      */
 520     SB_Reset (&S);
 521     IVal = 0;
 522     while ((C = SB_Get (&S)) != '\0') {
 523         DigitVal = HexVal (C);
 524         if (DigitVal >= Base) {
 525             Error ("Numeric constant contains digits beyond the radix");
 526         }
 527         IVal = (IVal * Base) + DigitVal;
 528     }
 529
 530     /* We don't need the string buffer any longer */
 531     SB_Done (&S);
 532
 533     /* Distinguish between integer and floating point constants */
 534     if (!IsFloat) {
 535
 536         unsigned Types;
 537         int      HaveSuffix;
 538
 539         /* Check for a suffix and determine the possible types */
 540         HaveSuffix = 1;
 541         if (toupper (CurC) == 'U') {
 542             /* Unsigned type */
 543             NextChar ();
 544             if (toupper (CurC) != 'L') {
 545                 Types = IT_UINT | IT_ULONG;
 546             } else {
 547                 NextChar ();
 548                 Types = IT_ULONG;
 549             }
 550         } else if (toupper (CurC) == 'L') {
 551             /* Long type */
 552             NextChar ();
 553             if (toupper (CurC) != 'U') {
 554                 Types = IT_LONG | IT_ULONG;
 555             } else {
 556                 NextChar ();
 557                 Types = IT_ULONG;
 558             }
 559         } else {
 560             HaveSuffix = 0;
 561             if (Prefix == 10) {
 562                 /* Decimal constants are of any type but uint */
 563                 Types = IT_INT | IT_LONG | IT_ULONG;
 564             } else {
 565                 /* Octal or hex constants are of any type */
 566                 Types = IT_INT | IT_UINT | IT_LONG | IT_ULONG;
 567             }
 568         }
 569
 570         /* Check the range to determine the type */
 571         if (IVal > 0x7FFF) {
 572             /* Out of range for int */
 573             Types &= ~IT_INT;
 574             /* If the value is in the range 0x8000..0xFFFF, unsigned int is not
 575              * allowed, and we don't have a type specifying suffix, emit a
 576              * warning, because the constant is of type long.
 577              */
 578             if (IVal <= 0xFFFF && (Types & IT_UINT) == 0 && !HaveSuffix) {
 579                 Warning ("Constant is long");
 580             }
 581         }
 582         if (IVal > 0xFFFF) {
 583             /* Out of range for unsigned int */
 584             Types &= ~IT_UINT;
 585         }
 586         if (IVal > 0x7FFFFFFF) {
 587             /* Out of range for long int */
 588             Types &= ~IT_LONG;
 589         }
 590
 591         /* Now set the type string to the smallest type in types */
 592         if (Types & IT_INT) {
 593             NextTok.Type = type_int;
 594         } else if (Types & IT_UINT) {
 595             NextTok.Type = type_uint;
 596         } else if (Types & IT_LONG) {
 597             NextTok.Type = type_long;
 598         } else {
 599             NextTok.Type = type_ulong;
 600         }
 601
 602         /* Set the value and the token */
 603         NextTok.IVal = IVal;
 604         NextTok.Tok  = TOK_ICONST;
 605
 606     } else {
 607
 608         /* Float constant */
 609         Double FVal = FP_D_FromInt (IVal);      /* Convert to double */
 610
 611         /* Check for a fractional part and read it */
 612         if (CurC == '.') {
 613
 614             Double Scale;
 615
 616             /* Skip the dot */
 617             NextChar ();
 618
 619             /* Read fractional digits */
 620             Scale  = FP_D_Make (1.0);
 621             while (IsXDigit (CurC) && (DigitVal = HexVal (CurC)) < Base) {
 622                 /* Get the value of this digit */
 623                 Double FracVal = FP_D_Div (FP_D_FromInt (DigitVal * Base), Scale);
 624                 /* Add it to the float value */
 625                 FVal = FP_D_Add (FVal, FracVal);
 626                 /* Scale base */
 627                 Scale = FP_D_Mul (Scale, FP_D_FromInt (DigitVal));
 628                 /* Skip the digit */
 629                 NextChar ();
 630             }
 631         }
 632
 633         /* Check for an exponent and read it */
 634         if ((Base == 16 && toupper (CurC) == 'F') ||
 635             (Base == 10 && toupper (CurC) == 'E')) {
 636
 637             int Sign;
 638             unsigned Digits;
 639             unsigned Exp;
 640
 641             /* Skip the exponent notifier */
 642             NextChar ();
 643
 644             /* Read an optional sign */
 645             Sign = 1;
 646             if (CurC == '-') {
 647                 Sign = -1;
 648                 NextChar ();
 649             } else if (CurC == '+') {
 650                 NextChar ();
 651             }
 652
 653             /* Read exponent digits. Since we support only 32 bit floats
 654              * with a maximum exponent of +-/127, we read the exponent
 655              * part as integer with up to 3 digits and drop the remainder.
 656              * This avoids an overflow of Exp. The exponent is always
 657              * decimal, even for hex float consts.
 658              */
 659             Digits = 0;
 660             Exp    = 0;
 661             while (IsDigit (CurC)) {
 662                 if (++Digits <= 3) {
 663                     Exp = Exp * 10 + HexVal (CurC);
 664                 }
 665                 NextChar ();
 666             }
 667
 668             /* Check for errors: We must have exponent digits, and not more
 669              * than three.
 670              */
 671             if (Digits == 0) {
 672                 Error ("Floating constant exponent has no digits");
 673             } else if (Digits > 3) {
 674                 Warning ("Floating constant exponent is too large");
 675             }
 676
 677             /* Scale the exponent and adjust the value accordingly */
 678             if (Exp) {
 679                 FVal = FP_D_Mul (FVal, FP_D_Make (pow (10, Exp)));
 680             }
 681         }
 682
 683         /* Check for a suffix and determine the type of the constant */
 684         if (toupper (CurC) == 'F') {
 685             NextChar ();
 686             NextTok.Type = type_float;
 687         } else {
 688             NextTok.Type = type_double;
 689         }
 690
 691         /* Set the value and the token */
 692         NextTok.FVal = FVal;
 693         NextTok.Tok  = TOK_FCONST;
 694
 695     }
 696 }
 697
 698
 699
 700 void NextToken (void)
 701 /* Get next token from input stream */
 702 {
 703     ident token;
 704
 705     /* We have to skip white space here before shifting tokens, since the
 706      * tokens and the current line info is invalid at startup and will get
 707      * initialized by reading the first time from the file. Remember if
 708      * we were at end of input and handle that later.
 709      */
 710     int GotEOF = (SkipWhite() == 0);
 711
 712     /* Current token is the lookahead token */
 713     if (CurTok.LI) {
 714         ReleaseLineInfo (CurTok.LI);
 715     }
 716     CurTok = NextTok;
 717
 718     /* When reading the first time from the file, the line info in NextTok,
 719      * which was copied to CurTok is invalid. Since the information from
 720      * the token is used for error messages, we must make it valid.
 721      */
 722     if (CurTok.LI == 0) {
 723         CurTok.LI = UseLineInfo (GetCurLineInfo ());
 724     }
 725
 726     /* Remember the starting position of the next token */
 727     NextTok.LI = UseLineInfo (GetCurLineInfo ());
 728
 729     /* Now handle end of input. */
 730     if (GotEOF) {
 731         /* End of file reached */
 732         NextTok.Tok = TOK_CEOF;
 733         return;
 734     }
 735
 736     /* Determine the next token from the lookahead */
 737     if (IsDigit (CurC) || (CurC == '.' && IsDigit (NextC))) {
 738         /* A number */
 739         NumericConst ();
 740         return;
 741     }
 742
 743     /* Check for wide character literals */
 744     if (CurC == 'L' && NextC == '\"') {
 745         StringConst ();
 746         return;
 747     }
 748
 749     /* Check for keywords and identifiers */
 750     if (IsSym (token)) {
 751
 752         /* Check for a keyword */
 753         if ((NextTok.Tok = FindKey (token)) != TOK_IDENT) {
 754             /* Reserved word found */
 755             return;
 756         }
 757         /* No reserved word, check for special symbols */
 758         if (token[0] == '_' && token[1] == '_') {
 759             /* Special symbols */
 760             if (strcmp (token+2, "FILE__") == 0) {
 761                 NextTok.IVal = AddLiteral (GetCurrentFile());
 762                 NextTok.Tok  = TOK_SCONST;
 763                 return;
 764             } else if (strcmp (token+2, "LINE__") == 0) {
 765                 NextTok.Tok  = TOK_ICONST;
 766                 NextTok.IVal = GetCurrentLine();
 767                 NextTok.Type = type_int;
 768                 return;
 769             } else if (strcmp (token+2, "func__") == 0) {
 770                 /* __func__ is only defined in functions */
 771                 if (CurrentFunc) {
 772                     NextTok.IVal = AddLiteral (F_GetFuncName (CurrentFunc));
 773                     NextTok.Tok  = TOK_SCONST;
 774                     return;
 775                 }
 776             }
 777         }
 778
 779         /* No reserved word but identifier */
 780         strcpy (NextTok.Ident, token);
 781         NextTok.Tok = TOK_IDENT;
 782         return;
 783     }
 784
 785     /* Monstrous switch statement ahead... */
 786     switch (CurC) {
 787
 788         case '!':
 789             NextChar ();
 790             if (CurC == '=') {
 791                 SetTok (TOK_NE);
 792             } else {
 793                 NextTok.Tok = TOK_BOOL_NOT;
 794             }
 795             break;
 796
 797         case '\"':
 798             StringConst ();
 799             break;
 800
 801         case '%':
 802             NextChar ();
 803             if (CurC == '=') {
 804                 SetTok (TOK_MOD_ASSIGN);
 805             } else {
 806                 NextTok.Tok = TOK_MOD;
 807             }
 808             break;
 809
 810         case '&':
 811             NextChar ();
 812             switch (CurC) {
 813                 case '&':
 814                     SetTok (TOK_BOOL_AND);
 815                     break;
 816                 case '=':
 817                     SetTok (TOK_AND_ASSIGN);
 818                     break;
 819                 default:
 820                     NextTok.Tok = TOK_AND;
 821             }
 822             break;
 823
 824         case '\'':
 825             CharConst ();
 826             break;
 827
 828         case '(':
 829             SetTok (TOK_LPAREN);
 830             break;
 831
 832         case ')':
 833             SetTok (TOK_RPAREN);
 834             break;
 835
 836         case '*':
 837             NextChar ();
 838             if (CurC == '=') {
 839                 SetTok (TOK_MUL_ASSIGN);
 840             } else {
 841                 NextTok.Tok = TOK_STAR;
 842             }
 843             break;
 844
 845         case '+':
 846             NextChar ();
 847             switch (CurC) {
 848                 case '+':
 849                     SetTok (TOK_INC);
 850                     break;
 851                 case '=':
 852                     SetTok (TOK_PLUS_ASSIGN);
 853                     break;
 854                 default:
 855                     NextTok.Tok = TOK_PLUS;
 856             }
 857             break;
 858
 859         case ',':
 860             SetTok (TOK_COMMA);
 861             break;
 862
 863         case '-':
 864             NextChar ();
 865             switch (CurC) {
 866                 case '-':
 867                     SetTok (TOK_DEC);
 868                     break;
 869                 case '=':
 870                     SetTok (TOK_MINUS_ASSIGN);
 871                     break;
 872                 case '>':
 873                     SetTok (TOK_PTR_REF);
 874                     break;
 875                 default:
 876                     NextTok.Tok = TOK_MINUS;
 877             }
 878             break;
 879
 880         case '.':
 881             NextChar ();
 882             if (CurC == '.') {
 883                 NextChar ();
 884                 if (CurC == '.') {
 885                     SetTok (TOK_ELLIPSIS);
 886                 } else {
 887                     UnknownChar (CurC);
 888                 }
 889             } else {
 890                 NextTok.Tok = TOK_DOT;
 891             }
 892             break;
 893
 894         case '/':
 895             NextChar ();
 896             if (CurC == '=') {
 897                 SetTok (TOK_DIV_ASSIGN);
 898             } else {
 899                 NextTok.Tok = TOK_DIV;
 900             }
 901             break;
 902
 903         case ':':
 904             SetTok (TOK_COLON);
 905             break;
 906
 907         case ';':
 908             SetTok (TOK_SEMI);
 909             break;
 910
 911         case '<':
 912             NextChar ();
 913             switch (CurC) {
 914                 case '=':
 915                     SetTok (TOK_LE);
 916                     break;
 917                 case '<':
 918                     NextChar ();
 919                     if (CurC == '=') {
 920                         SetTok (TOK_SHL_ASSIGN);
 921                     } else {
 922                         NextTok.Tok = TOK_SHL;
 923                     }
 924                     break;
 925                 default:
 926                     NextTok.Tok = TOK_LT;
 927             }
 928             break;
 929
 930         case '=':
 931             NextChar ();
 932             if (CurC == '=') {
 933                 SetTok (TOK_EQ);
 934             } else {
 935                 NextTok.Tok = TOK_ASSIGN;
 936             }
 937             break;
 938
 939         case '>':
 940             NextChar ();
 941             switch (CurC) {
 942                 case '=':
 943                     SetTok (TOK_GE);
 944                     break;
 945                 case '>':
 946                     NextChar ();
 947                     if (CurC == '=') {
 948                         SetTok (TOK_SHR_ASSIGN);
 949                     } else {
 950                         NextTok.Tok = TOK_SHR;
 951                     }
 952                     break;
 953                 default:
 954                     NextTok.Tok = TOK_GT;
 955             }
 956             break;
 957
 958         case '?':
 959             SetTok (TOK_QUEST);
 960             break;
 961
 962         case '[':
 963             SetTok (TOK_LBRACK);
 964             break;
 965
 966         case ']':
 967             SetTok (TOK_RBRACK);
 968             break;
 969
 970         case '^':
 971             NextChar ();
 972             if (CurC == '=') {
 973                 SetTok (TOK_XOR_ASSIGN);
 974             } else {
 975                 NextTok.Tok = TOK_XOR;
 976             }
 977             break;
 978
 979         case '{':
 980             SetTok (TOK_LCURLY);
 981             break;
 982
 983         case '|':
 984             NextChar ();
 985             switch (CurC) {
 986                 case '|':
 987                     SetTok (TOK_BOOL_OR);
 988                     break;
 989                 case '=':
 990                     SetTok (TOK_OR_ASSIGN);
 991                     break;
 992                 default:
 993                     NextTok.Tok = TOK_OR;
 994             }
 995             break;
 996
 997         case '}':
 998             SetTok (TOK_RCURLY);
 999             break;
1000
1001         case '~':
1002             SetTok (TOK_COMP);
1003             break;
1004
1005         default:
1006             UnknownChar (CurC);
1007
1008     }
1009
1010 }
1011
1012
1013
1014 void SkipTokens (const token_t* TokenList, unsigned TokenCount)
1015 /* Skip tokens until we reach TOK_CEOF or a token in the given token list.
1016  * This routine is used for error recovery.
1017  */
1018 {
1019     while (CurTok.Tok != TOK_CEOF) {
1020
1021         /* Check if the current token is in the token list */
1022         unsigned I;
1023         for (I = 0; I < TokenCount; ++I) {
1024             if (CurTok.Tok == TokenList[I]) {
1025                 /* Found a token in the list */
1026                 return;
1027             }
1028         }
1029
1030         /* Not in the list: Skip it */
1031         NextToken ();
1032
1033     }
1034 }
1035
1036
1037
1038 int Consume (token_t Token, const char* ErrorMsg)
1039 /* Eat token if it is the next in the input stream, otherwise print an error
1040  * message. Returns true if the token was found and false otherwise.
1041  */
1042 {
1043     if (CurTok.Tok == Token) {
1044         NextToken ();
1045         return 1;
1046     } else {
1047         Error ("%s", ErrorMsg);
1048         return 0;
1049     }
1050 }
1051
1052
1053
1054 int ConsumeColon (void)
1055 /* Check for a colon and skip it. */
1056 {
1057     return Consume (TOK_COLON, "`:' expected");
1058 }
1059
1060
1061
1062 int ConsumeSemi (void)
1063 /* Check for a semicolon and skip it. */
1064 {
1065     /* Try do be smart about typos... */
1066     if (CurTok.Tok == TOK_SEMI) {
1067         NextToken ();
1068         return 1;
1069     } else {
1070         Error ("`;' expected");
1071         if (CurTok.Tok == TOK_COLON || CurTok.Tok == TOK_COMMA) {
1072             NextToken ();
1073         }
1074         return 0;
1075     }
1076 }
1077
1078
1079
1080 int ConsumeComma (void)
1081 /* Check for a comma and skip it. */
1082 {
1083     /* Try do be smart about typos... */
1084     if (CurTok.Tok == TOK_COMMA) {
1085         NextToken ();
1086         return 1;
1087     } else {
1088         Error ("`,' expected");
1089         if (CurTok.Tok == TOK_SEMI) {
1090             NextToken ();
1091         }
1092         return 0;
1093     }
1094 }
1095
1096
1097
1098 int ConsumeLParen (void)
1099 /* Check for a left parenthesis and skip it */
1100 {
1101     return Consume (TOK_LPAREN, "`(' expected");
1102 }
1103
1104
1105
1106 int ConsumeRParen (void)
1107 /* Check for a right parenthesis and skip it */
1108 {
1109     return Consume (TOK_RPAREN, "`)' expected");
1110 }
1111
1112
1113
1114 int ConsumeLBrack (void)
1115 /* Check for a left bracket and skip it */
1116 {
1117     return Consume (TOK_LBRACK, "`[' expected");
1118 }
1119
1120
1121
1122 int ConsumeRBrack (void)
1123 /* Check for a right bracket and skip it */
1124 {
1125     return Consume (TOK_RBRACK, "`]' expected");
1126 }
1127
1128
1129
1130 int ConsumeLCurly (void)
1131 /* Check for a left curly brace and skip it */
1132 {
1133     return Consume (TOK_LCURLY, "`{' expected");
1134 }
1135
1136
1137
1138 int ConsumeRCurly (void)
1139 /* Check for a right curly brace and skip it */
1140 {
1141     return Consume (TOK_RCURLY, "`}' expected");
1142 }
1143
1144
1145