git.sur5r.net Git - cc65/blob - src/cc65/scanner.c

   1 /*
   2  * scanner.c
   3  *
   4  * Ullrich von Bassewitz, 07.06.1998
   5  */
   6
   7
   8
   9 #include <stdio.h>
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <errno.h>
  13 #include <ctype.h>
  14
  15 /* common */
  16 #include "tgttrans.h"
  17
  18 /* cc65 */
  19 #include "datatype.h"
  20 #include "error.h"
  21 #include "function.h"
  22 #include "global.h"
  23 #include "ident.h"
  24 #include "input.h"
  25 #include "litpool.h"
  26 #include "preproc.h"
  27 #include "symtab.h"
  28 #include "util.h"
  29 #include "scanner.h"
  30
  31
  32
  33 /*****************************************************************************/
  34 /*                                   data                                    */
  35 /*****************************************************************************/
  36
  37
  38
  39 Token CurTok;           /* The current token */
  40 Token NextTok;          /* The next token */
  41
  42
  43
  44 /* Token types */
  45 #define TT_C    0               /* ANSI C token */
  46 #define TT_EXT  1               /* cc65 extension */
  47
  48 /* Token table */
  49 static const struct Keyword {
  50     char*           Key;        /* Keyword name */
  51     unsigned char   Tok;        /* The token */
  52     unsigned char   Type;       /* Token type */
  53 } Keywords [] = {
  54     { "__A__",          TOK_A,          TT_C    },
  55     { "__AX__",         TOK_AX,         TT_C    },
  56     { "__EAX__",        TOK_EAX,        TT_C    },
  57     { "__X__",          TOK_X,          TT_C    },
  58     { "__Y__",          TOK_Y,          TT_C    },
  59     { "__asm__",        TOK_ASM,        TT_C    },
  60     { "__attribute__",  TOK_ATTRIBUTE,  TT_C    },
  61     { "__far__",        TOK_FAR,        TT_C    },
  62     { "__fastcall__",   TOK_FASTCALL,   TT_C    },
  63     { "asm",            TOK_ASM,        TT_EXT  },
  64     { "auto",           TOK_AUTO,       TT_C    },
  65     { "break",          TOK_BREAK,      TT_C    },
  66     { "case",           TOK_CASE,       TT_C    },
  67     { "char",           TOK_CHAR,       TT_C    },
  68     { "const",          TOK_CONST,      TT_C    },
  69     { "continue",       TOK_CONTINUE,   TT_C    },
  70     { "default",        TOK_DEFAULT,    TT_C    },
  71     { "do",             TOK_DO,         TT_C    },
  72     { "double",         TOK_DOUBLE,     TT_C    },
  73     { "else",           TOK_ELSE,       TT_C    },
  74     { "enum",           TOK_ENUM,       TT_C    },
  75     { "extern",         TOK_EXTERN,     TT_C    },
  76     { "far",            TOK_FAR,        TT_EXT  },
  77     { "fastcall",       TOK_FASTCALL,   TT_EXT  },
  78     { "float",          TOK_FLOAT,      TT_C    },
  79     { "for",            TOK_FOR,        TT_C    },
  80     { "goto",           TOK_GOTO,       TT_C    },
  81     { "if",             TOK_IF,         TT_C    },
  82     { "int",            TOK_INT,        TT_C    },
  83     { "long",           TOK_LONG,       TT_C    },
  84     { "register",       TOK_REGISTER,   TT_C    },
  85     { "return",         TOK_RETURN,     TT_C    },
  86     { "short",          TOK_SHORT,      TT_C    },
  87     { "signed",         TOK_SIGNED,     TT_C    },
  88     { "sizeof",         TOK_SIZEOF,     TT_C    },
  89     { "static",         TOK_STATIC,     TT_C    },
  90     { "struct",         TOK_STRUCT,     TT_C    },
  91     { "switch",         TOK_SWITCH,     TT_C    },
  92     { "typedef",        TOK_TYPEDEF,    TT_C    },
  93     { "union",          TOK_UNION,      TT_C    },
  94     { "unsigned",       TOK_UNSIGNED,   TT_C    },
  95     { "void",           TOK_VOID,       TT_C    },
  96     { "volatile",       TOK_VOLATILE,   TT_C    },
  97     { "while",          TOK_WHILE,      TT_C    },
  98 };
  99 #define KEY_COUNT       (sizeof (Keywords) / sizeof (Keywords [0]))
 100
 101
 102
 103 /* Stuff for determining the type of an integer constant */
 104 #define IT_INT          0x01
 105 #define IT_UINT         0x02
 106 #define IT_LONG         0x04
 107 #define IT_ULONG        0x08
 108
 109
 110
 111 /*****************************************************************************/
 112 /*                                   code                                    */
 113 /*****************************************************************************/
 114
 115
 116
 117 static int CmpKey (const void* Key, const void* Elem)
 118 /* Compare function for bsearch */
 119 {
 120     return strcmp ((const char*) Key, ((const struct Keyword*) Elem)->Key);
 121 }
 122
 123
 124
 125 static int FindKey (const char* Key)
 126 /* Find a keyword and return the token. Return IDENT if the token is not a
 127  * keyword.
 128  */
 129 {
 130     struct Keyword* K;
 131     K = bsearch (Key, Keywords, KEY_COUNT, sizeof (Keywords [0]), CmpKey);
 132     if (K && (K->Type != TT_EXT || ANSI == 0)) {
 133         return K->Tok;
 134     } else {
 135         return TOK_IDENT;
 136     }
 137 }
 138
 139
 140
 141 static int SkipWhite (void)
 142 /* Skip white space in the input stream, reading and preprocessing new lines
 143  * if necessary. Return 0 if end of file is reached, return 1 otherwise.
 144  */
 145 {
 146     while (1) {
 147         while (CurC == 0) {
 148             if (NextLine () == 0) {
 149                 return 0;
 150             }
 151             Preprocess ();
 152         }
 153         if (CurC == ' ' || CurC == '\r') {
 154             NextChar ();
 155         } else {
 156             return 1;
 157         }
 158     }
 159 }
 160
 161
 162
 163 void SymName (char* s)
 164 /* Get symbol from input stream */
 165 {
 166     unsigned k = 0;
 167     do {
 168         if (k != MAX_IDENTLEN) {
 169             ++k;
 170             *s++ = CurC;
 171         }
 172         NextChar ();
 173     } while (IsIdent (CurC) || isdigit (CurC));
 174     *s = '\0';
 175 }
 176
 177
 178
 179 int IsSym (char *s)
 180 /* Get symbol from input stream or return 0 if not a symbol. */
 181 {
 182     if (IsIdent (CurC)) {
 183         SymName (s);
 184         return 1;
 185     } else {
 186         return 0;
 187     }
 188 }
 189
 190
 191
 192 static void unknown (char C)
 193 /* Error message for unknown character */
 194 {
 195     Error (ERR_INVALID_CHAR, C);
 196     NextChar ();                        /* Skip */
 197 }
 198
 199
 200
 201 static unsigned hexval (int c)
 202 /* Convert a hex digit into a value */
 203 {
 204     if (!isxdigit (c)) {
 205         Error (ERR_ILLEGAL_HEX_DIGIT);
 206     }
 207     if (isdigit (c)) {
 208         return c - '0';
 209     } else {
 210         return toupper (c) - 'A' + 10;
 211     }
 212 }
 213
 214
 215
 216 static void SetTok (int tok)
 217 /* set nxttok and bump line ptr */
 218 {
 219     nxttok = tok;
 220     NextChar ();
 221 }
 222
 223
 224
 225 static int SignExtendChar (int C)
 226 /* Do correct sign extension of a character */
 227 {
 228     if (SignedChars && (C & 0x80) != 0) {
 229         return C | ~0xFF;
 230     } else {
 231         return C & 0xFF;
 232     }
 233 }
 234
 235
 236
 237 static int ParseChar (void)
 238 /* Parse a character. Converts \n into EOL, etc. */
 239 {
 240     int i;
 241     unsigned val;
 242     int C;
 243
 244     /* Check for escape chars */
 245     if (CurC == '\\') {
 246         NextChar ();
 247         switch (CurC) {
 248             case 'b':
 249                 C = '\b';
 250                 break;
 251             case 'f':
 252                 C = '\f';
 253                 break;
 254             case 'r':
 255                 C = '\r';
 256                 break;
 257             case 'n':
 258                 C = '\n';
 259                 break;
 260             case 't':
 261                 C = '\t';
 262                 break;
 263             case '\"':
 264                 C = '\"';
 265                 break;
 266             case '\'':
 267                 C = '\'';
 268                 break;
 269             case '\\':
 270                 C = '\\';
 271                 break;
 272             case 'x':
 273             case 'X':
 274                 /* Hex character constant */
 275                 NextChar ();
 276                 val = hexval (CurC) << 4;
 277                 NextChar ();
 278                 C = val | hexval (CurC);        /* Do not translate */
 279                 break;
 280             case '0':
 281             case '1':
 282                 /* Octal constant */
 283                 i = 0;
 284                 C = CurC - '0';
 285                 while (NextC >= '0' && NextC <= '7' && i++ < 4) {
 286                     NextChar ();
 287                     C = (C << 3) | (CurC - '0');
 288                 }
 289                 break;
 290             default:
 291                 Error (ERR_ILLEGAL_CHARCONST);
 292                 C = ' ';
 293                 break;
 294         }
 295     } else {
 296         C = CurC;
 297     }
 298
 299     /* Skip the character read */
 300     NextChar ();
 301
 302     /* Do correct sign extension */
 303     return SignExtendChar (C);
 304 }
 305
 306
 307
 308 static void CharConst (void)
 309 /* Parse a character constant. */
 310 {
 311     int C;
 312
 313     /* Skip the quote */
 314     NextChar ();
 315
 316     /* Get character */
 317     C = ParseChar ();
 318
 319     /* Check for closing quote */
 320     if (CurC != '\'') {
 321         Error (ERR_QUOTE_EXPECTED);
 322     } else {
 323         /* Skip the quote */
 324         NextChar ();
 325     }
 326
 327     /* Setup values and attributes */
 328     nxttok  = TOK_CCONST;
 329
 330     /* Translate into target charset */
 331     nxtval  = SignExtendChar (TgtTranslateChar (C));
 332
 333     /* Character constants have type int */
 334     nxttype = type_int;
 335 }
 336
 337
 338
 339 static void StringConst (void)
 340 /* Parse a quoted string */
 341 {
 342     nxtval = GetLiteralOffs ();
 343     nxttok = TOK_SCONST;
 344
 345     /* Be sure to concatenate strings */
 346     while (CurC == '\"') {
 347
 348         /* Skip the quote char */
 349         NextChar ();
 350
 351         while (CurC != '\"') {
 352             if (CurC == '\0') {
 353                 Error (ERR_UNEXPECTED_NEWLINE);
 354                 break;
 355             }
 356             AddLiteralChar (ParseChar ());
 357         }
 358
 359         /* Skip closing quote char if there was one */
 360         NextChar ();
 361
 362         /* Skip white space, read new input */
 363         SkipWhite ();
 364
 365     }
 366
 367     /* Terminate the string */
 368     AddLiteralChar ('\0');
 369 }
 370
 371
 372
 373 void NextToken (void)
 374 /* Get next token from input stream */
 375 {
 376     ident token;
 377
 378     /* Current token is the lookahead token */
 379     CurTok = NextTok;
 380
 381     /* Remember the starting position of the next token */
 382     NextTok.Pos = GetCurrentLine();
 383
 384     /* Skip spaces and read the next line if needed */
 385     if (SkipWhite () == 0) {
 386         /* End of file reached */
 387         nxttok = TOK_CEOF;
 388         return;
 389     }
 390
 391     /* Determine the next token from the lookahead */
 392     if (isdigit (CurC)) {
 393
 394         /* A number */
 395         int HaveSuffix;         /* True if we have a type suffix */
 396         unsigned types;         /* Possible types */
 397         unsigned base;
 398         unsigned long k;        /* Value */
 399
 400         k     = 0;
 401         base  = 10;
 402         types = IT_INT | IT_LONG | IT_ULONG;
 403
 404         if (CurC == '0') {
 405             /* Octal or hex constants may also be of type unsigned int */
 406             types = IT_INT | IT_UINT | IT_LONG | IT_ULONG;
 407             /* gobble 0 and examin next char */
 408             NextChar ();
 409             if (toupper (CurC) == 'X') {
 410                 base = 16;
 411                 nxttype = type_uint;
 412                 NextChar ();    /* gobble "x" */
 413             } else {
 414                 base = 8;
 415             }
 416         }
 417         while (1) {
 418             if (isdigit (CurC)) {
 419                 k = k * base + (CurC - '0');
 420             } else if (base == 16 && isxdigit (CurC)) {
 421                 k = (k << 4) + hexval (CurC);
 422             } else {
 423                 break;          /* not digit */
 424             }
 425             NextChar ();        /* gobble char */
 426         }
 427
 428         /* Check for a suffix */
 429         HaveSuffix = 1;
 430         if (CurC == 'u' || CurC == 'U') {
 431             /* Unsigned type */
 432             NextChar ();
 433             if (toupper (CurC) != 'L') {
 434                 types = IT_UINT | IT_ULONG;
 435             } else {
 436                 NextChar ();
 437                 types = IT_ULONG;
 438             }
 439         } else if (CurC == 'l' || CurC == 'L') {
 440             /* Long type */
 441             NextChar ();
 442             if (toupper (CurC) != 'U') {
 443                 types = IT_LONG | IT_ULONG;
 444             } else {
 445                 NextChar ();
 446                 types = IT_ULONG;
 447             }
 448         } else {
 449             HaveSuffix = 0;
 450         }
 451
 452         /* Check the range to determine the type */
 453         if (k > 0x7FFF) {
 454             /* Out of range for int */
 455             types &= ~IT_INT;
 456             /* If the value is in the range 0x8000..0xFFFF, unsigned int is not
 457              * allowed, and we don't have a type specifying suffix, emit a
 458              * warning.
 459              */
 460             if (k <= 0xFFFF && (types & IT_UINT) == 0 && !HaveSuffix) {
 461                 Warning (WARN_CONSTANT_IS_LONG);
 462             }
 463         }
 464         if (k > 0xFFFF) {
 465             /* Out of range for unsigned int */
 466             types &= ~IT_UINT;
 467         }
 468         if (k > 0x7FFFFFFF) {
 469             /* Out of range for long int */
 470             types &= ~IT_LONG;
 471         }
 472
 473         /* Now set the type string to the smallest type in types */
 474         if (types & IT_INT) {
 475             nxttype = type_int;
 476         } else if (types & IT_UINT) {
 477             nxttype = type_uint;
 478         } else if (types & IT_LONG) {
 479             nxttype = type_long;
 480         } else {
 481             nxttype = type_ulong;
 482         }
 483
 484         /* Set the value and the token */
 485         nxtval = k;
 486         nxttok = TOK_ICONST;
 487         return;
 488     }
 489
 490     if (IsSym (token)) {
 491
 492         /* Check for a keyword */
 493         if ((nxttok = FindKey (token)) != TOK_IDENT) {
 494             /* Reserved word found */
 495             return;
 496         }
 497         /* No reserved word, check for special symbols */
 498         if (token [0] == '_') {
 499             /* Special symbols */
 500             if (strcmp (token, "__FILE__") == 0) {
 501                 nxtval = AddLiteral (GetCurrentFile());
 502                 nxttok = TOK_SCONST;
 503                 return;
 504             } else if (strcmp (token, "__LINE__") == 0) {
 505                 nxttok  = TOK_ICONST;
 506                 nxtval  = GetCurrentLine();
 507                 nxttype = type_int;
 508                 return;
 509             } else if (strcmp (token, "__fixargs__") == 0) {
 510                 nxttok  = TOK_ICONST;
 511                 nxtval  = GetParamSize (CurrentFunc);
 512                 nxttype = type_uint;
 513                 return;
 514             } else if (strcmp (token, "__func__") == 0) {
 515                 /* __func__ is only defined in functions */
 516                 if (CurrentFunc) {
 517                     nxtval = AddLiteral (GetFuncName (CurrentFunc));
 518                     nxttok = TOK_SCONST;
 519                     return;
 520                 }
 521             }
 522         }
 523
 524         /* No reserved word but identifier */
 525         strcpy (NextTok.Ident, token);
 526         NextTok.Tok = TOK_IDENT;
 527         return;
 528     }
 529
 530     /* Monstrous switch statement ahead... */
 531     switch (CurC) {
 532
 533         case '!':
 534             NextChar ();
 535             if (CurC == '=') {
 536                 SetTok (TOK_NE);
 537             } else {
 538                 nxttok = TOK_BOOL_NOT;
 539             }
 540             break;
 541
 542         case '\"':
 543             StringConst ();
 544             break;
 545
 546         case '%':
 547             NextChar ();
 548             if (CurC == '=') {
 549                 SetTok (TOK_MOD_ASSIGN);
 550             } else {
 551                 nxttok = TOK_MOD;
 552             }
 553             break;
 554
 555         case '&':
 556             NextChar ();
 557             switch (CurC) {
 558                 case '&':
 559                     SetTok (TOK_BOOL_AND);
 560                     break;
 561                 case '=':
 562                     SetTok (TOK_AND_ASSIGN);
 563                     break;
 564                 default:
 565                     nxttok = TOK_AND;
 566             }
 567             break;
 568
 569         case '\'':
 570             CharConst ();
 571             break;
 572
 573         case '(':
 574             SetTok (TOK_LPAREN);
 575             break;
 576
 577         case ')':
 578             SetTok (TOK_RPAREN);
 579             break;
 580
 581         case '*':
 582             NextChar ();
 583             if (CurC == '=') {
 584                 SetTok (TOK_MUL_ASSIGN);
 585             } else {
 586                 nxttok = TOK_STAR;
 587             }
 588             break;
 589
 590         case '+':
 591             NextChar ();
 592             switch (CurC) {
 593                 case '+':
 594                     SetTok (TOK_INC);
 595                     break;
 596                 case '=':
 597                     SetTok (TOK_PLUS_ASSIGN);
 598                     break;
 599                 default:
 600                     nxttok = TOK_PLUS;
 601             }
 602             break;
 603
 604         case ',':
 605             SetTok (TOK_COMMA);
 606             break;
 607
 608         case '-':
 609             NextChar ();
 610             switch (CurC) {
 611                 case '-':
 612                     SetTok (TOK_DEC);
 613                     break;
 614                 case '=':
 615                     SetTok (TOK_MINUS_ASSIGN);
 616                     break;
 617                 case '>':
 618                     SetTok (TOK_PTR_REF);
 619                     break;
 620                 default:
 621                     nxttok = TOK_MINUS;
 622             }
 623             break;
 624
 625         case '.':
 626             NextChar ();
 627             if (CurC == '.') {
 628                 NextChar ();
 629                 if (CurC == '.') {
 630                     SetTok (TOK_ELLIPSIS);
 631                 } else {
 632                     unknown (CurC);
 633                 }
 634             } else {
 635                 nxttok = TOK_DOT;
 636             }
 637             break;
 638
 639         case '/':
 640             NextChar ();
 641             if (CurC == '=') {
 642                 SetTok (TOK_DIV_ASSIGN);
 643             } else {
 644                 nxttok = TOK_DIV;
 645             }
 646             break;
 647
 648         case ':':
 649             SetTok (TOK_COLON);
 650             break;
 651
 652         case ';':
 653             SetTok (TOK_SEMI);
 654             break;
 655
 656         case '<':
 657             NextChar ();
 658             switch (CurC) {
 659                 case '=':
 660                     SetTok (TOK_LE);
 661                     break;
 662                 case '<':
 663                     NextChar ();
 664                     if (CurC == '=') {
 665                         SetTok (TOK_SHL_ASSIGN);
 666                     } else {
 667                         nxttok = TOK_SHL;
 668                     }
 669                     break;
 670                 default:
 671                     nxttok = TOK_LT;
 672             }
 673             break;
 674
 675         case '=':
 676             NextChar ();
 677             if (CurC == '=') {
 678                 SetTok (TOK_EQ);
 679             } else {
 680                 nxttok = TOK_ASSIGN;
 681             }
 682             break;
 683
 684         case '>':
 685             NextChar ();
 686             switch (CurC) {
 687                 case '=':
 688                     SetTok (TOK_GE);
 689                     break;
 690                 case '>':
 691                     NextChar ();
 692                     if (CurC == '=') {
 693                         SetTok (TOK_SHR_ASSIGN);
 694                     } else {
 695                         nxttok = TOK_SHR;
 696                     }
 697                     break;
 698                 default:
 699                     nxttok = TOK_GT;
 700             }
 701             break;
 702
 703         case '?':
 704             SetTok (TOK_QUEST);
 705             break;
 706
 707         case '[':
 708             SetTok (TOK_LBRACK);
 709             break;
 710
 711         case ']':
 712             SetTok (TOK_RBRACK);
 713             break;
 714
 715         case '^':
 716             NextChar ();
 717             if (CurC == '=') {
 718                 SetTok (TOK_XOR_ASSIGN);
 719             } else {
 720                 nxttok = TOK_XOR;
 721             }
 722             break;
 723
 724         case '{':
 725             SetTok (TOK_LCURLY);
 726             break;
 727
 728         case '|':
 729             NextChar ();
 730             switch (CurC) {
 731                 case '|':
 732                     SetTok (TOK_BOOL_OR);
 733                     break;
 734                 case '=':
 735                     SetTok (TOK_OR_ASSIGN);
 736                     break;
 737                 default:
 738                     nxttok = TOK_OR;
 739             }
 740             break;
 741
 742         case '}':
 743             SetTok (TOK_RCURLY);
 744             break;
 745
 746         case '~':
 747             SetTok (TOK_COMP);
 748             break;
 749
 750         case '#':
 751             /* Skip it and following whitespace */
 752             do {
 753                 NextChar ();
 754             } while (CurC == ' ');
 755             if (!IsSym (token) || strcmp (token, "pragma") != 0) {
 756                 /* OOPS - should not happen */
 757                 Error (ERR_CPP_DIRECTIVE_EXPECTED);
 758             }
 759             nxttok = TOK_PRAGMA;
 760             break;
 761
 762         default:
 763             unknown (CurC);
 764
 765     }
 766
 767 }
 768
 769
 770
 771 void Consume (token_t Token, unsigned ErrNum)
 772 /* Eat token if it is the next in the input stream, otherwise print an error
 773  * message.
 774  */
 775 {
 776     if (curtok == Token) {
 777         NextToken ();
 778     } else {
 779         Error (ErrNum);
 780     }
 781 }
 782
 783
 784
 785 void ConsumeColon (void)
 786 /* Check for a colon and skip it. */
 787 {
 788     Consume (TOK_COLON, ERR_COLON_EXPECTED);
 789 }
 790
 791
 792
 793 void ConsumeSemi (void)
 794 /* Check for a semicolon and skip it. */
 795 {
 796     /* Try do be smart about typos... */
 797     if (curtok == TOK_SEMI) {
 798         NextToken ();
 799     } else {
 800         Error (ERR_SEMICOLON_EXPECTED);
 801         if (curtok == TOK_COLON || curtok == TOK_COMMA) {
 802             NextToken ();
 803         }
 804     }
 805 }
 806
 807
 808
 809 void ConsumeLParen (void)
 810 /* Check for a left parenthesis and skip it */
 811 {
 812     Consume (TOK_LPAREN, ERR_LPAREN_EXPECTED);
 813 }
 814
 815
 816
 817 void ConsumeRParen (void)
 818 /* Check for a right parenthesis and skip it */
 819 {
 820     Consume (TOK_RPAREN, ERR_RPAREN_EXPECTED);
 821 }
 822
 823
 824
 825 void ConsumeLBrack (void)
 826 /* Check for a left bracket and skip it */
 827 {
 828     Consume (TOK_LBRACK, ERR_LBRACK_EXPECTED);
 829 }
 830
 831
 832
 833 void ConsumeRBrack (void)
 834 /* Check for a right bracket and skip it */
 835 {
 836     Consume (TOK_RBRACK, ERR_RBRACK_EXPECTED);
 837 }
 838
 839
 840
 841 void ConsumeLCurly (void)
 842 /* Check for a left curly brace and skip it */
 843 {
 844     Consume (TOK_LCURLY, ERR_LCURLY_EXPECTED);
 845 }
 846
 847
 848
 849 void ConsumeRCurly (void)
 850 /* Check for a right curly brace and skip it */
 851 {
 852     Consume (TOK_RCURLY, ERR_RCURLY_EXPECTED);
 853 }
 854
 855
 856