git.sur5r.net Git - cc65/blob - src/cc65/scanner.c

   1 /*
   2  * scanner.c
   3  *
   4  * Ullrich von Bassewitz, 07.06.1998
   5  */
   6
   7
   8
   9 #include <stdio.h>
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <errno.h>
  13 #include <ctype.h>
  14
  15 /* common */
  16 #include "chartype.h"
  17 #include "tgttrans.h"
  18
  19 /* cc65 */
  20 #include "datatype.h"
  21 #include "error.h"
  22 #include "function.h"
  23 #include "global.h"
  24 #include "ident.h"
  25 #include "input.h"
  26 #include "litpool.h"
  27 #include "preproc.h"
  28 #include "symtab.h"
  29 #include "util.h"
  30 #include "scanner.h"
  31
  32
  33
  34 /*****************************************************************************/
  35 /*                                   data                                    */
  36 /*****************************************************************************/
  37
  38
  39
  40 Token CurTok;           /* The current token */
  41 Token NextTok;          /* The next token */
  42
  43
  44
  45 /* Token types */
  46 #define TT_C    0               /* ANSI C token */
  47 #define TT_EXT  1               /* cc65 extension */
  48
  49 /* Token table */
  50 static const struct Keyword {
  51     char*           Key;        /* Keyword name */
  52     unsigned char   Tok;        /* The token */
  53     unsigned char   Type;       /* Token type */
  54 } Keywords [] = {
  55     { "__A__",          TOK_A,          TT_C    },
  56     { "__AX__",         TOK_AX,         TT_C    },
  57     { "__EAX__",        TOK_EAX,        TT_C    },
  58     { "__X__",          TOK_X,          TT_C    },
  59     { "__Y__",          TOK_Y,          TT_C    },
  60     { "__asm__",        TOK_ASM,        TT_C    },
  61     { "__attribute__",  TOK_ATTRIBUTE,  TT_C    },
  62     { "__far__",        TOK_FAR,        TT_C    },
  63     { "__fastcall__",   TOK_FASTCALL,   TT_C    },
  64     { "asm",            TOK_ASM,        TT_EXT  },
  65     { "auto",           TOK_AUTO,       TT_C    },
  66     { "break",          TOK_BREAK,      TT_C    },
  67     { "case",           TOK_CASE,       TT_C    },
  68     { "char",           TOK_CHAR,       TT_C    },
  69     { "const",          TOK_CONST,      TT_C    },
  70     { "continue",       TOK_CONTINUE,   TT_C    },
  71     { "default",        TOK_DEFAULT,    TT_C    },
  72     { "do",             TOK_DO,         TT_C    },
  73     { "double",         TOK_DOUBLE,     TT_C    },
  74     { "else",           TOK_ELSE,       TT_C    },
  75     { "enum",           TOK_ENUM,       TT_C    },
  76     { "extern",         TOK_EXTERN,     TT_C    },
  77     { "far",            TOK_FAR,        TT_EXT  },
  78     { "fastcall",       TOK_FASTCALL,   TT_EXT  },
  79     { "float",          TOK_FLOAT,      TT_C    },
  80     { "for",            TOK_FOR,        TT_C    },
  81     { "goto",           TOK_GOTO,       TT_C    },
  82     { "if",             TOK_IF,         TT_C    },
  83     { "int",            TOK_INT,        TT_C    },
  84     { "long",           TOK_LONG,       TT_C    },
  85     { "register",       TOK_REGISTER,   TT_C    },
  86     { "return",         TOK_RETURN,     TT_C    },
  87     { "short",          TOK_SHORT,      TT_C    },
  88     { "signed",         TOK_SIGNED,     TT_C    },
  89     { "sizeof",         TOK_SIZEOF,     TT_C    },
  90     { "static",         TOK_STATIC,     TT_C    },
  91     { "struct",         TOK_STRUCT,     TT_C    },
  92     { "switch",         TOK_SWITCH,     TT_C    },
  93     { "typedef",        TOK_TYPEDEF,    TT_C    },
  94     { "union",          TOK_UNION,      TT_C    },
  95     { "unsigned",       TOK_UNSIGNED,   TT_C    },
  96     { "void",           TOK_VOID,       TT_C    },
  97     { "volatile",       TOK_VOLATILE,   TT_C    },
  98     { "while",          TOK_WHILE,      TT_C    },
  99 };
 100 #define KEY_COUNT       (sizeof (Keywords) / sizeof (Keywords [0]))
 101
 102
 103
 104 /* Stuff for determining the type of an integer constant */
 105 #define IT_INT          0x01
 106 #define IT_UINT         0x02
 107 #define IT_LONG         0x04
 108 #define IT_ULONG        0x08
 109
 110
 111
 112 /*****************************************************************************/
 113 /*                                   code                                    */
 114 /*****************************************************************************/
 115
 116
 117
 118 static int CmpKey (const void* Key, const void* Elem)
 119 /* Compare function for bsearch */
 120 {
 121     return strcmp ((const char*) Key, ((const struct Keyword*) Elem)->Key);
 122 }
 123
 124
 125
 126 static int FindKey (const char* Key)
 127 /* Find a keyword and return the token. Return IDENT if the token is not a
 128  * keyword.
 129  */
 130 {
 131     struct Keyword* K;
 132     K = bsearch (Key, Keywords, KEY_COUNT, sizeof (Keywords [0]), CmpKey);
 133     if (K && (K->Type != TT_EXT || ANSI == 0)) {
 134         return K->Tok;
 135     } else {
 136         return TOK_IDENT;
 137     }
 138 }
 139
 140
 141
 142 static int SkipWhite (void)
 143 /* Skip white space in the input stream, reading and preprocessing new lines
 144  * if necessary. Return 0 if end of file is reached, return 1 otherwise.
 145  */
 146 {
 147     while (1) {
 148         while (CurC == 0) {
 149             if (NextLine () == 0) {
 150                 return 0;
 151             }
 152             Preprocess ();
 153         }
 154         if (CurC == ' ' || CurC == '\r') {
 155             NextChar ();
 156         } else {
 157             return 1;
 158         }
 159     }
 160 }
 161
 162
 163
 164 void SymName (char* s)
 165 /* Get symbol from input stream */
 166 {
 167     unsigned k = 0;
 168     do {
 169         if (k != MAX_IDENTLEN) {
 170             ++k;
 171             *s++ = CurC;
 172         }
 173         NextChar ();
 174     } while (IsIdent (CurC) || IsDigit (CurC));
 175     *s = '\0';
 176 }
 177
 178
 179
 180 int IsSym (char *s)
 181 /* Get symbol from input stream or return 0 if not a symbol. */
 182 {
 183     if (IsIdent (CurC)) {
 184         SymName (s);
 185         return 1;
 186     } else {
 187         return 0;
 188     }
 189 }
 190
 191
 192
 193 static void unknown (char C)
 194 /* Error message for unknown character */
 195 {
 196     Error ("Invalid input character with code %02X", C & 0xFF);
 197     NextChar ();                        /* Skip */
 198 }
 199
 200
 201
 202 static unsigned hexval (int c)
 203 /* Convert a hex digit into a value */
 204 {
 205     if (!IsXDigit (c)) {
 206         Error ("Invalid hexadecimal digit: `%c'", c);
 207     }
 208     if (IsDigit (c)) {
 209         return c - '0';
 210     } else {
 211         return toupper (c) - 'A' + 10;
 212     }
 213 }
 214
 215
 216
 217 static void SetTok (int tok)
 218 /* set nxttok and bump line ptr */
 219 {
 220     nxttok = tok;
 221     NextChar ();
 222 }
 223
 224
 225
 226 static int SignExtendChar (int C)
 227 /* Do correct sign extension of a character */
 228 {
 229     if (SignedChars && (C & 0x80) != 0) {
 230         return C | ~0xFF;
 231     } else {
 232         return C & 0xFF;
 233     }
 234 }
 235
 236
 237
 238 static int ParseChar (void)
 239 /* Parse a character. Converts \n into EOL, etc. */
 240 {
 241     int i;
 242     unsigned val;
 243     int C;
 244
 245     /* Check for escape chars */
 246     if (CurC == '\\') {
 247         NextChar ();
 248         switch (CurC) {
 249             case 'b':
 250                 C = '\b';
 251                 break;
 252             case 'f':
 253                 C = '\f';
 254                 break;
 255             case 'r':
 256                 C = '\r';
 257                 break;
 258             case 'n':
 259                 C = '\n';
 260                 break;
 261             case 't':
 262                 C = '\t';
 263                 break;
 264             case '\"':
 265                 C = '\"';
 266                 break;
 267             case '\'':
 268                 C = '\'';
 269                 break;
 270             case '\\':
 271                 C = '\\';
 272                 break;
 273             case 'x':
 274             case 'X':
 275                 /* Hex character constant */
 276                 NextChar ();
 277                 val = hexval (CurC) << 4;
 278                 NextChar ();
 279                 C = val | hexval (CurC);        /* Do not translate */
 280                 break;
 281             case '0':
 282             case '1':
 283                 /* Octal constant */
 284                 i = 0;
 285                 C = CurC - '0';
 286                 while (NextC >= '0' && NextC <= '7' && i++ < 4) {
 287                     NextChar ();
 288                     C = (C << 3) | (CurC - '0');
 289                 }
 290                 break;
 291             default:
 292                 Error ("Illegal character constant");
 293                 C = ' ';
 294                 break;
 295         }
 296     } else {
 297         C = CurC;
 298     }
 299
 300     /* Skip the character read */
 301     NextChar ();
 302
 303     /* Do correct sign extension */
 304     return SignExtendChar (C);
 305 }
 306
 307
 308
 309 static void CharConst (void)
 310 /* Parse a character constant. */
 311 {
 312     int C;
 313
 314     /* Skip the quote */
 315     NextChar ();
 316
 317     /* Get character */
 318     C = ParseChar ();
 319
 320     /* Check for closing quote */
 321     if (CurC != '\'') {
 322         Error ("`\'' expected");
 323     } else {
 324         /* Skip the quote */
 325         NextChar ();
 326     }
 327
 328     /* Setup values and attributes */
 329     nxttok  = TOK_CCONST;
 330
 331     /* Translate into target charset */
 332     nxtval  = SignExtendChar (TgtTranslateChar (C));
 333
 334     /* Character constants have type int */
 335     nxttype = type_int;
 336 }
 337
 338
 339
 340 static void StringConst (void)
 341 /* Parse a quoted string */
 342 {
 343     nxtval = GetLiteralOffs ();
 344     nxttok = TOK_SCONST;
 345
 346     /* Be sure to concatenate strings */
 347     while (CurC == '\"') {
 348
 349         /* Skip the quote char */
 350         NextChar ();
 351
 352         while (CurC != '\"') {
 353             if (CurC == '\0') {
 354                 Error ("Unexpected newline");
 355                 break;
 356             }
 357             AddLiteralChar (ParseChar ());
 358         }
 359
 360         /* Skip closing quote char if there was one */
 361         NextChar ();
 362
 363         /* Skip white space, read new input */
 364         SkipWhite ();
 365
 366     }
 367
 368     /* Terminate the string */
 369     AddLiteralChar ('\0');
 370 }
 371
 372
 373
 374 void NextToken (void)
 375 /* Get next token from input stream */
 376 {
 377     ident token;
 378
 379     /* Current token is the lookahead token */
 380     CurTok = NextTok;
 381
 382     /* Remember the starting position of the next token */
 383     NextTok.Pos = GetCurrentLine();
 384
 385     /* Skip spaces and read the next line if needed */
 386     if (SkipWhite () == 0) {
 387         /* End of file reached */
 388         nxttok = TOK_CEOF;
 389         return;
 390     }
 391
 392     /* Determine the next token from the lookahead */
 393     if (IsDigit (CurC)) {
 394
 395         /* A number */
 396         int HaveSuffix;         /* True if we have a type suffix */
 397         unsigned types;         /* Possible types */
 398         unsigned base;
 399         unsigned long k;        /* Value */
 400
 401         k     = 0;
 402         base  = 10;
 403         types = IT_INT | IT_LONG | IT_ULONG;
 404
 405         if (CurC == '0') {
 406             /* Octal or hex constants may also be of type unsigned int */
 407             types = IT_INT | IT_UINT | IT_LONG | IT_ULONG;
 408             /* gobble 0 and examin next char */
 409             NextChar ();
 410             if (toupper (CurC) == 'X') {
 411                 base = 16;
 412                 nxttype = type_uint;
 413                 NextChar ();    /* gobble "x" */
 414             } else {
 415                 base = 8;
 416             }
 417         }
 418         while (1) {
 419             if (IsDigit (CurC)) {
 420                 k = k * base + (CurC - '0');
 421             } else if (base == 16 && IsXDigit (CurC)) {
 422                 k = (k << 4) + hexval (CurC);
 423             } else {
 424                 break;          /* not digit */
 425             }
 426             NextChar ();        /* gobble char */
 427         }
 428
 429         /* Check for a suffix */
 430         HaveSuffix = 1;
 431         if (CurC == 'u' || CurC == 'U') {
 432             /* Unsigned type */
 433             NextChar ();
 434             if (toupper (CurC) != 'L') {
 435                 types = IT_UINT | IT_ULONG;
 436             } else {
 437                 NextChar ();
 438                 types = IT_ULONG;
 439             }
 440         } else if (CurC == 'l' || CurC == 'L') {
 441             /* Long type */
 442             NextChar ();
 443             if (toupper (CurC) != 'U') {
 444                 types = IT_LONG | IT_ULONG;
 445             } else {
 446                 NextChar ();
 447                 types = IT_ULONG;
 448             }
 449         } else {
 450             HaveSuffix = 0;
 451         }
 452
 453         /* Check the range to determine the type */
 454         if (k > 0x7FFF) {
 455             /* Out of range for int */
 456             types &= ~IT_INT;
 457             /* If the value is in the range 0x8000..0xFFFF, unsigned int is not
 458              * allowed, and we don't have a type specifying suffix, emit a
 459              * warning.
 460              */
 461             if (k <= 0xFFFF && (types & IT_UINT) == 0 && !HaveSuffix) {
 462                 Warning ("Constant is long");
 463             }
 464         }
 465         if (k > 0xFFFF) {
 466             /* Out of range for unsigned int */
 467             types &= ~IT_UINT;
 468         }
 469         if (k > 0x7FFFFFFF) {
 470             /* Out of range for long int */
 471             types &= ~IT_LONG;
 472         }
 473
 474         /* Now set the type string to the smallest type in types */
 475         if (types & IT_INT) {
 476             nxttype = type_int;
 477         } else if (types & IT_UINT) {
 478             nxttype = type_uint;
 479         } else if (types & IT_LONG) {
 480             nxttype = type_long;
 481         } else {
 482             nxttype = type_ulong;
 483         }
 484
 485         /* Set the value and the token */
 486         nxtval = k;
 487         nxttok = TOK_ICONST;
 488         return;
 489     }
 490
 491     if (IsSym (token)) {
 492
 493         /* Check for a keyword */
 494         if ((nxttok = FindKey (token)) != TOK_IDENT) {
 495             /* Reserved word found */
 496             return;
 497         }
 498         /* No reserved word, check for special symbols */
 499         if (token [0] == '_') {
 500             /* Special symbols */
 501             if (strcmp (token, "__FILE__") == 0) {
 502                 nxtval = AddLiteral (GetCurrentFile());
 503                 nxttok = TOK_SCONST;
 504                 return;
 505             } else if (strcmp (token, "__LINE__") == 0) {
 506                 nxttok  = TOK_ICONST;
 507                 nxtval  = GetCurrentLine();
 508                 nxttype = type_int;
 509                 return;
 510             } else if (strcmp (token, "__fixargs__") == 0) {
 511                 nxttok  = TOK_ICONST;
 512                 nxtval  = GetParamSize (CurrentFunc);
 513                 nxttype = type_uint;
 514                 return;
 515             } else if (strcmp (token, "__func__") == 0) {
 516                 /* __func__ is only defined in functions */
 517                 if (CurrentFunc) {
 518                     nxtval = AddLiteral (GetFuncName (CurrentFunc));
 519                     nxttok = TOK_SCONST;
 520                     return;
 521                 }
 522             }
 523         }
 524
 525         /* No reserved word but identifier */
 526         strcpy (NextTok.Ident, token);
 527         NextTok.Tok = TOK_IDENT;
 528         return;
 529     }
 530
 531     /* Monstrous switch statement ahead... */
 532     switch (CurC) {
 533
 534         case '!':
 535             NextChar ();
 536             if (CurC == '=') {
 537                 SetTok (TOK_NE);
 538             } else {
 539                 nxttok = TOK_BOOL_NOT;
 540             }
 541             break;
 542
 543         case '\"':
 544             StringConst ();
 545             break;
 546
 547         case '%':
 548             NextChar ();
 549             if (CurC == '=') {
 550                 SetTok (TOK_MOD_ASSIGN);
 551             } else {
 552                 nxttok = TOK_MOD;
 553             }
 554             break;
 555
 556         case '&':
 557             NextChar ();
 558             switch (CurC) {
 559                 case '&':
 560                     SetTok (TOK_BOOL_AND);
 561                     break;
 562                 case '=':
 563                     SetTok (TOK_AND_ASSIGN);
 564                     break;
 565                 default:
 566                     nxttok = TOK_AND;
 567             }
 568             break;
 569
 570         case '\'':
 571             CharConst ();
 572             break;
 573
 574         case '(':
 575             SetTok (TOK_LPAREN);
 576             break;
 577
 578         case ')':
 579             SetTok (TOK_RPAREN);
 580             break;
 581
 582         case '*':
 583             NextChar ();
 584             if (CurC == '=') {
 585                 SetTok (TOK_MUL_ASSIGN);
 586             } else {
 587                 nxttok = TOK_STAR;
 588             }
 589             break;
 590
 591         case '+':
 592             NextChar ();
 593             switch (CurC) {
 594                 case '+':
 595                     SetTok (TOK_INC);
 596                     break;
 597                 case '=':
 598                     SetTok (TOK_PLUS_ASSIGN);
 599                     break;
 600                 default:
 601                     nxttok = TOK_PLUS;
 602             }
 603             break;
 604
 605         case ',':
 606             SetTok (TOK_COMMA);
 607             break;
 608
 609         case '-':
 610             NextChar ();
 611             switch (CurC) {
 612                 case '-':
 613                     SetTok (TOK_DEC);
 614                     break;
 615                 case '=':
 616                     SetTok (TOK_MINUS_ASSIGN);
 617                     break;
 618                 case '>':
 619                     SetTok (TOK_PTR_REF);
 620                     break;
 621                 default:
 622                     nxttok = TOK_MINUS;
 623             }
 624             break;
 625
 626         case '.':
 627             NextChar ();
 628             if (CurC == '.') {
 629                 NextChar ();
 630                 if (CurC == '.') {
 631                     SetTok (TOK_ELLIPSIS);
 632                 } else {
 633                     unknown (CurC);
 634                 }
 635             } else {
 636                 nxttok = TOK_DOT;
 637             }
 638             break;
 639
 640         case '/':
 641             NextChar ();
 642             if (CurC == '=') {
 643                 SetTok (TOK_DIV_ASSIGN);
 644             } else {
 645                 nxttok = TOK_DIV;
 646             }
 647             break;
 648
 649         case ':':
 650             SetTok (TOK_COLON);
 651             break;
 652
 653         case ';':
 654             SetTok (TOK_SEMI);
 655             break;
 656
 657         case '<':
 658             NextChar ();
 659             switch (CurC) {
 660                 case '=':
 661                     SetTok (TOK_LE);
 662                     break;
 663                 case '<':
 664                     NextChar ();
 665                     if (CurC == '=') {
 666                         SetTok (TOK_SHL_ASSIGN);
 667                     } else {
 668                         nxttok = TOK_SHL;
 669                     }
 670                     break;
 671                 default:
 672                     nxttok = TOK_LT;
 673             }
 674             break;
 675
 676         case '=':
 677             NextChar ();
 678             if (CurC == '=') {
 679                 SetTok (TOK_EQ);
 680             } else {
 681                 nxttok = TOK_ASSIGN;
 682             }
 683             break;
 684
 685         case '>':
 686             NextChar ();
 687             switch (CurC) {
 688                 case '=':
 689                     SetTok (TOK_GE);
 690                     break;
 691                 case '>':
 692                     NextChar ();
 693                     if (CurC == '=') {
 694                         SetTok (TOK_SHR_ASSIGN);
 695                     } else {
 696                         nxttok = TOK_SHR;
 697                     }
 698                     break;
 699                 default:
 700                     nxttok = TOK_GT;
 701             }
 702             break;
 703
 704         case '?':
 705             SetTok (TOK_QUEST);
 706             break;
 707
 708         case '[':
 709             SetTok (TOK_LBRACK);
 710             break;
 711
 712         case ']':
 713             SetTok (TOK_RBRACK);
 714             break;
 715
 716         case '^':
 717             NextChar ();
 718             if (CurC == '=') {
 719                 SetTok (TOK_XOR_ASSIGN);
 720             } else {
 721                 nxttok = TOK_XOR;
 722             }
 723             break;
 724
 725         case '{':
 726             SetTok (TOK_LCURLY);
 727             break;
 728
 729         case '|':
 730             NextChar ();
 731             switch (CurC) {
 732                 case '|':
 733                     SetTok (TOK_BOOL_OR);
 734                     break;
 735                 case '=':
 736                     SetTok (TOK_OR_ASSIGN);
 737                     break;
 738                 default:
 739                     nxttok = TOK_OR;
 740             }
 741             break;
 742
 743         case '}':
 744             SetTok (TOK_RCURLY);
 745             break;
 746
 747         case '~':
 748             SetTok (TOK_COMP);
 749             break;
 750
 751         case '#':
 752             /* Skip it and following whitespace */
 753             do {
 754                 NextChar ();
 755             } while (CurC == ' ');
 756             if (!IsSym (token) || strcmp (token, "pragma") != 0) {
 757                 /* OOPS - should not happen */
 758                 Error ("Preprocessor directive expected");
 759             }
 760             nxttok = TOK_PRAGMA;
 761             break;
 762
 763         default:
 764             unknown (CurC);
 765
 766     }
 767
 768 }
 769
 770
 771
 772 void Consume (token_t Token, const char* ErrorMsg)
 773 /* Eat token if it is the next in the input stream, otherwise print an error
 774  * message.
 775  */
 776 {
 777     if (curtok == Token) {
 778         NextToken ();
 779     } else {
 780         Error (ErrorMsg);
 781     }
 782 }
 783
 784
 785
 786 void ConsumeColon (void)
 787 /* Check for a colon and skip it. */
 788 {
 789     Consume (TOK_COLON, "`:' expected");
 790 }
 791
 792
 793
 794 void ConsumeSemi (void)
 795 /* Check for a semicolon and skip it. */
 796 {
 797     /* Try do be smart about typos... */
 798     if (curtok == TOK_SEMI) {
 799         NextToken ();
 800     } else {
 801         Error ("`;' expected");
 802         if (curtok == TOK_COLON || curtok == TOK_COMMA) {
 803             NextToken ();
 804         }
 805     }
 806 }
 807
 808
 809
 810 void ConsumeComma (void)
 811 /* Check for a comma and skip it. */
 812 {
 813     /* Try do be smart about typos... */
 814     if (CurTok.Tok == TOK_COMMA) {
 815         NextToken ();
 816     } else {
 817         Error ("`,' expected");
 818         if (CurTok.Tok == TOK_SEMI) {
 819             NextToken ();
 820         }
 821     }
 822 }
 823
 824
 825
 826 void ConsumeLParen (void)
 827 /* Check for a left parenthesis and skip it */
 828 {
 829     Consume (TOK_LPAREN, "`(' expected");
 830 }
 831
 832
 833
 834 void ConsumeRParen (void)
 835 /* Check for a right parenthesis and skip it */
 836 {
 837     Consume (TOK_RPAREN, "`)' expected");
 838 }
 839
 840
 841
 842 void ConsumeLBrack (void)
 843 /* Check for a left bracket and skip it */
 844 {
 845     Consume (TOK_LBRACK, "`[' expected");
 846 }
 847
 848
 849
 850 void ConsumeRBrack (void)
 851 /* Check for a right bracket and skip it */
 852 {
 853     Consume (TOK_RBRACK, "`]' expected");
 854 }
 855
 856
 857
 858 void ConsumeLCurly (void)
 859 /* Check for a left curly brace and skip it */
 860 {
 861     Consume (TOK_LCURLY, "`{' expected");
 862 }
 863
 864
 865
 866 void ConsumeRCurly (void)
 867 /* Check for a right curly brace and skip it */
 868 {
 869     Consume (TOK_RCURLY, "`}' expected");
 870 }
 871
 872
 873