git.sur5r.net Git - cc65/blob - src/cc65/scanner.c

   1 /*
   2  * scanner.c
   3  *
   4  * Ullrich von Bassewitz, 07.06.1998
   5  */
   6
   7
   8
   9 #include <stdio.h>
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <errno.h>
  13 #include <ctype.h>
  14
  15 #include "ctrans.h"
  16 #include "datatype.h"
  17 #include "error.h"
  18 #include "function.h"
  19 #include "global.h"
  20 #include "ident.h"
  21 #include "input.h"
  22 #include "litpool.h"
  23 #include "preproc.h"
  24 #include "symtab.h"
  25 #include "util.h"
  26 #include "scanner.h"
  27
  28
  29
  30 /*****************************************************************************/
  31 /*                                   data                                    */
  32 /*****************************************************************************/
  33
  34
  35
  36 Token CurTok;           /* The current token */
  37 Token NextTok;          /* The next token */
  38
  39
  40
  41 /* Token types */
  42 #define TT_C    0               /* ANSI C token */
  43 #define TT_EXT  1               /* cc65 extension */
  44
  45 /* Token table */
  46 static const struct Keyword {
  47     char*           Key;        /* Keyword name */
  48     unsigned char   Tok;        /* The token */
  49     unsigned char   Type;       /* Token type */
  50 } Keywords [] = {
  51     { "__AX__",         TOK_AX,         TT_C    },
  52     { "__EAX__",        TOK_EAX,        TT_C    },
  53     { "__asm__",        TOK_ASM,        TT_C    },
  54     { "__attribute__",  TOK_ATTRIBUTE,  TT_C    },
  55     { "__fastcall__",   TOK_FASTCALL,   TT_C    },
  56     { "asm",            TOK_ASM,        TT_EXT  },
  57     { "auto",           TOK_AUTO,       TT_C    },
  58     { "break",          TOK_BREAK,      TT_C    },
  59     { "case",           TOK_CASE,       TT_C    },
  60     { "char",           TOK_CHAR,       TT_C    },
  61     { "const",          TOK_CONST,      TT_C    },
  62     { "continue",       TOK_CONTINUE,   TT_C    },
  63     { "default",        TOK_DEFAULT,    TT_C    },
  64     { "do",             TOK_DO,         TT_C    },
  65     { "double",         TOK_DOUBLE,     TT_C    },
  66     { "else",           TOK_ELSE,       TT_C    },
  67     { "enum",           TOK_ENUM,       TT_C    },
  68     { "extern",         TOK_EXTERN,     TT_C    },
  69     { "fastcall",       TOK_FASTCALL,   TT_EXT  },
  70     { "float",          TOK_FLOAT,      TT_C    },
  71     { "for",            TOK_FOR,        TT_C    },
  72     { "goto",           TOK_GOTO,       TT_C    },
  73     { "if",             TOK_IF,         TT_C    },
  74     { "int",            TOK_INT,        TT_C    },
  75     { "long",           TOK_LONG,       TT_C    },
  76     { "register",       TOK_REGISTER,   TT_C    },
  77     { "return",         TOK_RETURN,     TT_C    },
  78     { "short",          TOK_SHORT,      TT_C    },
  79     { "signed",         TOK_SIGNED,     TT_C    },
  80     { "sizeof",         TOK_SIZEOF,     TT_C    },
  81     { "static",         TOK_STATIC,     TT_C    },
  82     { "struct",         TOK_STRUCT,     TT_C    },
  83     { "switch",         TOK_SWITCH,     TT_C    },
  84     { "typedef",        TOK_TYPEDEF,    TT_C    },
  85     { "union",          TOK_UNION,      TT_C    },
  86     { "unsigned",       TOK_UNSIGNED,   TT_C    },
  87     { "void",           TOK_VOID,       TT_C    },
  88     { "volatile",       TOK_VOLATILE,   TT_C    },
  89     { "while",          TOK_WHILE,      TT_C    },
  90 };
  91 #define KEY_COUNT       (sizeof (Keywords) / sizeof (Keywords [0]))
  92
  93
  94
  95 /* Stuff for determining the type of an integer constant */
  96 #define IT_INT          0x01
  97 #define IT_UINT         0x02
  98 #define IT_LONG         0x04
  99 #define IT_ULONG        0x08
 100
 101
 102
 103 /*****************************************************************************/
 104 /*                                   code                                    */
 105 /*****************************************************************************/
 106
 107
 108
 109 static int CmpKey (const void* Key, const void* Elem)
 110 /* Compare function for bsearch */
 111 {
 112     return strcmp ((const char*) Key, ((const struct Keyword*) Elem)->Key);
 113 }
 114
 115
 116
 117 static int FindKey (const char* Key)
 118 /* Find a keyword and return the token. Return IDENT if the token is not a
 119  * keyword.
 120  */
 121 {
 122     struct Keyword* K;
 123     K = bsearch (Key, Keywords, KEY_COUNT, sizeof (Keywords [0]), CmpKey);
 124     if (K && (K->Type != TT_EXT || ANSI == 0)) {
 125         return K->Tok;
 126     } else {
 127         return TOK_IDENT;
 128     }
 129 }
 130
 131
 132
 133 static int SkipWhite (void)
 134 /* Skip white space in the input stream, reading and preprocessing new lines
 135  * if necessary. Return 0 if end of file is reached, return 1 otherwise.
 136  */
 137 {
 138     while (1) {
 139         while (CurC == 0) {
 140             if (NextLine () == 0) {
 141                 return 0;
 142             }
 143             Preprocess ();
 144         }
 145         if (CurC == ' ' || CurC == '\r') {
 146             NextChar ();
 147         } else {
 148             return 1;
 149         }
 150     }
 151 }
 152
 153
 154
 155 void SymName (char* s)
 156 /* Get symbol from input stream */
 157 {
 158     unsigned k = 0;
 159     do {
 160         if (k != MAX_IDENTLEN) {
 161             ++k;
 162             *s++ = CurC;
 163         }
 164         NextChar ();
 165     } while (IsIdent (CurC) || isdigit (CurC));
 166     *s = '\0';
 167 }
 168
 169
 170
 171 int IsSym (char *s)
 172 /* Get symbol from input stream or return 0 if not a symbol. */
 173 {
 174     if (IsIdent (CurC)) {
 175         SymName (s);
 176         return 1;
 177     } else {
 178         return 0;
 179     }
 180 }
 181
 182
 183
 184 static void unknown (char C)
 185 /* Error message for unknown character */
 186 {
 187     Error (ERR_INVALID_CHAR, C);
 188     NextChar ();                        /* Skip */
 189 }
 190
 191
 192
 193 static unsigned hexval (int c)
 194 /* Convert a hex digit into a value */
 195 {
 196     if (!isxdigit (c)) {
 197         Error (ERR_ILLEGAL_HEX_DIGIT);
 198     }
 199     if (isdigit (c)) {
 200         return c - '0';
 201     } else {
 202         return toupper (c) - 'A' + 10;
 203     }
 204 }
 205
 206
 207
 208 static void SetTok (int tok)
 209 /* set nxttok and bump line ptr */
 210 {
 211     nxttok = tok;
 212     NextChar ();
 213 }
 214
 215
 216
 217 static int SignExtendChar (int C)
 218 /* Do correct sign extension of a character */
 219 {
 220     if (SignedChars && (C & 0x80) != 0) {
 221         return C | ~0xFF;
 222     } else {
 223         return C & 0xFF;
 224     }
 225 }
 226
 227
 228
 229 static int ParseChar (void)
 230 /* Parse a character. Converts \n into EOL, etc. */
 231 {
 232     int i;
 233     unsigned val;
 234     int C;
 235
 236     /* Check for escape chars */
 237     if (CurC == '\\') {
 238         NextChar ();
 239         switch (CurC) {
 240             case 'b':
 241                 C = '\b';
 242                 break;
 243             case 'f':
 244                 C = '\f';
 245                 break;
 246             case 'r':
 247                 C = '\r';
 248                 break;
 249             case 'n':
 250                 C = '\n';
 251                 break;
 252             case 't':
 253                 C = '\t';
 254                 break;
 255             case '\"':
 256                 C = '\"';
 257                 break;
 258             case '\'':
 259                 C = '\'';
 260                 break;
 261             case '\\':
 262                 C = '\\';
 263                 break;
 264             case 'x':
 265             case 'X':
 266                 /* Hex character constant */
 267                 NextChar ();
 268                 val = hexval (CurC) << 4;
 269                 NextChar ();
 270                 C = val | hexval (CurC);        /* Do not translate */
 271                 break;
 272             case '0':
 273             case '1':
 274                 /* Octal constant */
 275                 i = 0;
 276                 C = CurC - '0';
 277                 while (NextC >= '0' && NextC <= '7' && i++ < 4) {
 278                     NextChar ();
 279                     C = (C << 3) | (CurC - '0');
 280                 }
 281                 break;
 282             default:
 283                 Error (ERR_ILLEGAL_CHARCONST);
 284                 C = ' ';
 285                 break;
 286         }
 287     } else {
 288         C = CurC;
 289     }
 290
 291     /* Skip the character read */
 292     NextChar ();
 293
 294     /* Do correct sign extension */
 295     return SignExtendChar (C);
 296 }
 297
 298
 299
 300 static void CharConst (void)
 301 /* Parse a character constant. */
 302 {
 303     int C;
 304
 305     /* Skip the quote */
 306     NextChar ();
 307
 308     /* Get character */
 309     C = ParseChar ();
 310
 311     /* Check for closing quote */
 312     if (CurC != '\'') {
 313         Error (ERR_QUOTE_EXPECTED);
 314     } else {
 315         /* Skip the quote */
 316         NextChar ();
 317     }
 318
 319     /* Setup values and attributes */
 320     nxttok  = TOK_CCONST;
 321     nxtval  = SignExtendChar (ctrans (C));      /* Translate into target charset */
 322     nxttype = type_int;                         /* Character constants have type int */
 323 }
 324
 325
 326
 327 static void StringConst (void)
 328 /* Parse a quoted string */
 329 {
 330     nxtval = GetLiteralOffs ();
 331     nxttok = TOK_SCONST;
 332
 333     /* Be sure to concatenate strings */
 334     while (CurC == '\"') {
 335
 336         /* Skip the quote char */
 337         NextChar ();
 338
 339         while (CurC != '\"') {
 340             if (CurC == '\0') {
 341                 Error (ERR_UNEXPECTED_NEWLINE);
 342                 break;
 343             }
 344             AddLiteralChar (ParseChar ());
 345         }
 346
 347         /* Skip closing quote char if there was one */
 348         NextChar ();
 349
 350         /* Skip white space, read new input */
 351         SkipWhite ();
 352
 353     }
 354
 355     /* Terminate the string */
 356     AddLiteralChar ('\0');
 357 }
 358
 359
 360
 361 void NextToken (void)
 362 /* Get next token from input stream */
 363 {
 364     ident token;
 365
 366     /* Current token is the lookahead token */
 367     CurTok = NextTok;
 368
 369     /* Remember the starting position of the next token */
 370     NextTok.Pos = GetCurrentLine();
 371
 372     /* Skip spaces and read the next line if needed */
 373     if (SkipWhite () == 0) {
 374         /* End of file reached */
 375         nxttok = TOK_CEOF;
 376         return;
 377     }
 378
 379     /* Determine the next token from the lookahead */
 380     if (isdigit (CurC)) {
 381
 382         /* A number */
 383         int HaveSuffix;         /* True if we have a type suffix */
 384         unsigned types;         /* Possible types */
 385         unsigned base;
 386         unsigned long k;        /* Value */
 387
 388         k     = 0;
 389         base  = 10;
 390         types = IT_INT | IT_LONG | IT_ULONG;
 391
 392         if (CurC == '0') {
 393             /* Octal or hex constants may also be of type unsigned int */
 394             types = IT_INT | IT_UINT | IT_LONG | IT_ULONG;
 395             /* gobble 0 and examin next char */
 396             NextChar ();
 397             if (toupper (CurC) == 'X') {
 398                 base = 16;
 399                 nxttype = type_uint;
 400                 NextChar ();    /* gobble "x" */
 401             } else {
 402                 base = 8;
 403             }
 404         }
 405         while (1) {
 406             if (isdigit (CurC)) {
 407                 k = k * base + (CurC - '0');
 408             } else if (base == 16 && isxdigit (CurC)) {
 409                 k = (k << 4) + hexval (CurC);
 410             } else {
 411                 break;          /* not digit */
 412             }
 413             NextChar ();        /* gobble char */
 414         }
 415
 416         /* Check for a suffix */
 417         HaveSuffix = 1;
 418         if (CurC == 'u' || CurC == 'U') {
 419             /* Unsigned type */
 420             NextChar ();
 421             if (toupper (CurC) != 'L') {
 422                 types = IT_UINT | IT_ULONG;
 423             } else {
 424                 NextChar ();
 425                 types = IT_ULONG;
 426             }
 427         } else if (CurC == 'l' || CurC == 'L') {
 428             /* Long type */
 429             NextChar ();
 430             if (toupper (CurC) != 'U') {
 431                 types = IT_LONG | IT_ULONG;
 432             } else {
 433                 NextChar ();
 434                 types = IT_ULONG;
 435             }
 436         } else {
 437             HaveSuffix = 0;
 438         }
 439
 440         /* Check the range to determine the type */
 441         if (k > 0x7FFF) {
 442             /* Out of range for int */
 443             types &= ~IT_INT;
 444             /* If the value is in the range 0x8000..0xFFFF, unsigned int is not
 445              * allowed, and we don't have a type specifying suffix, emit a
 446              * warning.
 447              */
 448             if (k <= 0xFFFF && (types & IT_UINT) == 0 && !HaveSuffix) {
 449                 Warning (WARN_CONSTANT_IS_LONG);
 450             }
 451         }
 452         if (k > 0xFFFF) {
 453             /* Out of range for unsigned int */
 454             types &= ~IT_UINT;
 455         }
 456         if (k > 0x7FFFFFFF) {
 457             /* Out of range for long int */
 458             types &= ~IT_LONG;
 459         }
 460
 461         /* Now set the type string to the smallest type in types */
 462         if (types & IT_INT) {
 463             nxttype = type_int;
 464         } else if (types & IT_UINT) {
 465             nxttype = type_uint;
 466         } else if (types & IT_LONG) {
 467             nxttype = type_long;
 468         } else {
 469             nxttype = type_ulong;
 470         }
 471
 472         /* Set the value and the token */
 473         nxtval = k;
 474         nxttok = TOK_ICONST;
 475         return;
 476     }
 477
 478     if (IsSym (token)) {
 479
 480         /* Check for a keyword */
 481         if ((nxttok = FindKey (token)) != TOK_IDENT) {
 482             /* Reserved word found */
 483             return;
 484         }
 485         /* No reserved word, check for special symbols */
 486         if (token [0] == '_') {
 487             /* Special symbols */
 488             if (strcmp (token, "__FILE__") == 0) {
 489                 nxtval = AddLiteral (GetCurrentFile());
 490                 nxttok = TOK_SCONST;
 491                 return;
 492             } else if (strcmp (token, "__LINE__") == 0) {
 493                 nxttok  = TOK_ICONST;
 494                 nxtval  = GetCurrentLine();
 495                 nxttype = type_int;
 496                 return;
 497             } else if (strcmp (token, "__fixargs__") == 0) {
 498                 nxttok  = TOK_ICONST;
 499                 nxtval  = GetParamSize (CurrentFunc);
 500                 nxttype = type_uint;
 501                 return;
 502             } else if (strcmp (token, "__func__") == 0) {
 503                 /* __func__ is only defined in functions */
 504                 if (CurrentFunc) {
 505                     nxtval = AddLiteral (GetFuncName (CurrentFunc));
 506                     nxttok = TOK_SCONST;
 507                     return;
 508                 }
 509             }
 510         }
 511
 512         /* No reserved word but identifier */
 513         strcpy (NextTok.Ident, token);
 514         NextTok.Tok = TOK_IDENT;
 515         return;
 516     }
 517
 518     /* Monstrous switch statement ahead... */
 519     switch (CurC) {
 520
 521         case '!':
 522             NextChar ();
 523             if (CurC == '=') {
 524                 SetTok (TOK_NE);
 525             } else {
 526                 nxttok = TOK_BOOL_NOT;
 527             }
 528             break;
 529
 530         case '\"':
 531             StringConst ();
 532             break;
 533
 534         case '%':
 535             NextChar ();
 536             if (CurC == '=') {
 537                 SetTok (TOK_MOD_ASSIGN);
 538             } else {
 539                 nxttok = TOK_MOD;
 540             }
 541             break;
 542
 543         case '&':
 544             NextChar ();
 545             switch (CurC) {
 546                 case '&':
 547                     SetTok (TOK_BOOL_AND);
 548                     break;
 549                 case '=':
 550                     SetTok (TOK_AND_ASSIGN);
 551                     break;
 552                 default:
 553                     nxttok = TOK_AND;
 554             }
 555             break;
 556
 557         case '\'':
 558             CharConst ();
 559             break;
 560
 561         case '(':
 562             SetTok (TOK_LPAREN);
 563             break;
 564
 565         case ')':
 566             SetTok (TOK_RPAREN);
 567             break;
 568
 569         case '*':
 570             NextChar ();
 571             if (CurC == '=') {
 572                 SetTok (TOK_MUL_ASSIGN);
 573             } else {
 574                 nxttok = TOK_STAR;
 575             }
 576             break;
 577
 578         case '+':
 579             NextChar ();
 580             switch (CurC) {
 581                 case '+':
 582                     SetTok (TOK_INC);
 583                     break;
 584                 case '=':
 585                     SetTok (TOK_PLUS_ASSIGN);
 586                     break;
 587                 default:
 588                     nxttok = TOK_PLUS;
 589             }
 590             break;
 591
 592         case ',':
 593             SetTok (TOK_COMMA);
 594             break;
 595
 596         case '-':
 597             NextChar ();
 598             switch (CurC) {
 599                 case '-':
 600                     SetTok (TOK_DEC);
 601                     break;
 602                 case '=':
 603                     SetTok (TOK_MINUS_ASSIGN);
 604                     break;
 605                 case '>':
 606                     SetTok (TOK_PTR_REF);
 607                     break;
 608                 default:
 609                     nxttok = TOK_MINUS;
 610             }
 611             break;
 612
 613         case '.':
 614             NextChar ();
 615             if (CurC == '.') {
 616                 NextChar ();
 617                 if (CurC == '.') {
 618                     SetTok (TOK_ELLIPSIS);
 619                 } else {
 620                     unknown (CurC);
 621                 }
 622             } else {
 623                 nxttok = TOK_DOT;
 624             }
 625             break;
 626
 627         case '/':
 628             NextChar ();
 629             if (CurC == '=') {
 630                 SetTok (TOK_DIV_ASSIGN);
 631             } else {
 632                 nxttok = TOK_DIV;
 633             }
 634             break;
 635
 636         case ':':
 637             SetTok (TOK_COLON);
 638             break;
 639
 640         case ';':
 641             SetTok (TOK_SEMI);
 642             break;
 643
 644         case '<':
 645             NextChar ();
 646             switch (CurC) {
 647                 case '=':
 648                     SetTok (TOK_LE);
 649                     break;
 650                 case '<':
 651                     NextChar ();
 652                     if (CurC == '=') {
 653                         SetTok (TOK_SHL_ASSIGN);
 654                     } else {
 655                         nxttok = TOK_SHL;
 656                     }
 657                     break;
 658                 default:
 659                     nxttok = TOK_LT;
 660             }
 661             break;
 662
 663         case '=':
 664             NextChar ();
 665             if (CurC == '=') {
 666                 SetTok (TOK_EQ);
 667             } else {
 668                 nxttok = TOK_ASSIGN;
 669             }
 670             break;
 671
 672         case '>':
 673             NextChar ();
 674             switch (CurC) {
 675                 case '=':
 676                     SetTok (TOK_GE);
 677                     break;
 678                 case '>':
 679                     NextChar ();
 680                     if (CurC == '=') {
 681                         SetTok (TOK_SHR_ASSIGN);
 682                     } else {
 683                         nxttok = TOK_SHR;
 684                     }
 685                     break;
 686                 default:
 687                     nxttok = TOK_GT;
 688             }
 689             break;
 690
 691         case '?':
 692             SetTok (TOK_QUEST);
 693             break;
 694
 695         case '[':
 696             SetTok (TOK_LBRACK);
 697             break;
 698
 699         case ']':
 700             SetTok (TOK_RBRACK);
 701             break;
 702
 703         case '^':
 704             NextChar ();
 705             if (CurC == '=') {
 706                 SetTok (TOK_XOR_ASSIGN);
 707             } else {
 708                 nxttok = TOK_XOR;
 709             }
 710             break;
 711
 712         case '{':
 713             SetTok (TOK_LCURLY);
 714             break;
 715
 716         case '|':
 717             NextChar ();
 718             switch (CurC) {
 719                 case '|':
 720                     SetTok (TOK_BOOL_OR);
 721                     break;
 722                 case '=':
 723                     SetTok (TOK_OR_ASSIGN);
 724                     break;
 725                 default:
 726                     nxttok = TOK_OR;
 727             }
 728             break;
 729
 730         case '}':
 731             SetTok (TOK_RCURLY);
 732             break;
 733
 734         case '~':
 735             SetTok (TOK_COMP);
 736             break;
 737
 738         case '#':
 739             /* Skip it and following whitespace */
 740             do {
 741                 NextChar ();
 742             } while (CurC == ' ');
 743             if (!IsSym (token) || strcmp (token, "pragma") != 0) {
 744                 /* OOPS - should not happen */
 745                 Error (ERR_CPP_DIRECTIVE_EXPECTED);
 746             }
 747             nxttok = TOK_PRAGMA;
 748             break;
 749
 750         default:
 751             unknown (CurC);
 752
 753     }
 754
 755 }
 756
 757
 758
 759 void Consume (token_t Token, unsigned ErrNum)
 760 /* Eat token if it is the next in the input stream, otherwise print an error
 761  * message.
 762  */
 763 {
 764     if (curtok == Token) {
 765         NextToken ();
 766     } else {
 767         Error (ErrNum);
 768     }
 769 }
 770
 771
 772
 773 void ConsumeColon (void)
 774 /* Check for a colon and skip it. */
 775 {
 776     Consume (TOK_COLON, ERR_COLON_EXPECTED);
 777 }
 778
 779
 780
 781 void ConsumeSemi (void)
 782 /* Check for a semicolon and skip it. */
 783 {
 784     /* Try do be smart about typos... */
 785     if (curtok == TOK_SEMI) {
 786         NextToken ();
 787     } else {
 788         Error (ERR_SEMICOLON_EXPECTED);
 789         if (curtok == TOK_COLON || curtok == TOK_COMMA) {
 790             NextToken ();
 791         }
 792     }
 793 }
 794
 795
 796
 797 void ConsumeLParen (void)
 798 /* Check for a left parenthesis and skip it */
 799 {
 800     Consume (TOK_LPAREN, ERR_LPAREN_EXPECTED);
 801 }
 802
 803
 804
 805 void ConsumeRParen (void)
 806 /* Check for a right parenthesis and skip it */
 807 {
 808     Consume (TOK_RPAREN, ERR_RPAREN_EXPECTED);
 809 }
 810
 811
 812
 813 void ConsumeLBrack (void)
 814 /* Check for a left bracket and skip it */
 815 {
 816     Consume (TOK_LBRACK, ERR_LBRACK_EXPECTED);
 817 }
 818
 819
 820
 821 void ConsumeRBrack (void)
 822 /* Check for a right bracket and skip it */
 823 {
 824     Consume (TOK_RBRACK, ERR_RBRACK_EXPECTED);
 825 }
 826
 827
 828
 829 void ConsumeLCurly (void)
 830 /* Check for a left curly brace and skip it */
 831 {
 832     Consume (TOK_LCURLY, ERR_LCURLY_EXPECTED);
 833 }
 834
 835
 836
 837 void ConsumeRCurly (void)
 838 /* Check for a right curly brace and skip it */
 839 {
 840     Consume (TOK_RCURLY, ERR_RCURLY_EXPECTED);
 841 }
 842
 843
 844