X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=src%2Fcc65%2Fscanner.c;h=73ce35aed1db9887338b47ef4d4b3a0aacda35fe;hb=763a35911411adbe46e07b57b6161205a51e16b6;hp=445d11405e32362a5f2ea0f602eaec9d011bb4f1;hpb=04ee693c00802bb541934803ac947e316e33d166;p=cc65 diff --git a/src/cc65/scanner.c b/src/cc65/scanner.c index 445d11405..73ce35aed 100644 --- a/src/cc65/scanner.c +++ b/src/cc65/scanner.c @@ -1,8 +1,35 @@ -/* - * scanner.c - * - * Ullrich von Bassewitz, 07.06.1998 - */ +/*****************************************************************************/ +/* */ +/* scanner.c */ +/* */ +/* Source file line info structure */ +/* */ +/* */ +/* */ +/* (C) 1998-2003 Ullrich von Bassewitz */ +/* Römerstrasse 52 */ +/* D-70794 Filderstadt */ +/* EMail: uz@cc65.org */ +/* */ +/* */ +/* This software is provided 'as-is', without any expressed or implied */ +/* warranty. In no event will the authors be held liable for any damages */ +/* arising from the use of this software. */ +/* */ +/* Permission is granted to anyone to use this software for any purpose, */ +/* including commercial applications, and to alter it and redistribute it */ +/* freely, subject to the following restrictions: */ +/* */ +/* 1. The origin of this software must not be misrepresented; you must not */ +/* claim that you wrote the original software. If you use this software */ +/* in a product, an acknowledgment in the product documentation would be */ +/* appreciated but is not required. */ +/* 2. Altered source versions must be plainly marked as such, and must not */ +/* be misrepresented as being the original software. */ +/* 3. This notice may not be removed or altered from any source */ +/* distribution. */ +/* */ +/*****************************************************************************/ @@ -12,11 +39,16 @@ #include #include -#include "ctrans.h" +/* common */ +#include "chartype.h" +#include "tgttrans.h" + +/* cc65 */ #include "datatype.h" #include "error.h" #include "function.h" #include "global.h" +#include "hexval.h" #include "ident.h" #include "input.h" #include "litpool.h" @@ -48,11 +80,17 @@ static const struct Keyword { unsigned char Tok; /* The token */ unsigned char Type; /* Token type */ } Keywords [] = { - { "__AX__", TOK_AX, TT_C }, - { "__EAX__", TOK_EAX, TT_C }, - { "__asm__", TOK_ASM, TT_C }, - { "__attribute__", TOK_ATTRIBUTE, TT_C }, - { "__fastcall__", TOK_FASTCALL, TT_C }, + { "_Pragma", TOK_PRAGMA, TT_C }, + { "__AX__", TOK_AX, TT_C }, + { "__A__", TOK_A, TT_C }, + { "__EAX__", TOK_EAX, TT_C }, + { "__X__", TOK_X, TT_C }, + { "__Y__", TOK_Y, TT_C }, + { "__asm__", TOK_ASM, TT_C }, + { "__attribute__", TOK_ATTRIBUTE, TT_C }, + { "__far__", TOK_FAR, TT_C }, + { "__fastcall__", TOK_FASTCALL, TT_C }, + { "__near__", TOK_NEAR, TT_C }, { "asm", TOK_ASM, TT_EXT }, { "auto", TOK_AUTO, TT_C }, { "break", TOK_BREAK, TT_C }, @@ -66,6 +104,7 @@ static const struct Keyword { { "else", TOK_ELSE, TT_C }, { "enum", TOK_ENUM, TT_C }, { "extern", TOK_EXTERN, TT_C }, + { "far", TOK_FAR, TT_EXT }, { "fastcall", TOK_FASTCALL, TT_EXT }, { "float", TOK_FLOAT, TT_C }, { "for", TOK_FOR, TT_C }, @@ -73,7 +112,9 @@ static const struct Keyword { { "if", TOK_IF, TT_C }, { "int", TOK_INT, TT_C }, { "long", TOK_LONG, TT_C }, + { "near", TOK_NEAR, TT_EXT }, { "register", TOK_REGISTER, TT_C }, + { "restrict", TOK_RESTRICT, TT_C }, { "return", TOK_RETURN, TT_C }, { "short", TOK_SHORT, TT_C }, { "signed", TOK_SIGNED, TT_C }, @@ -129,7 +170,7 @@ static int FindKey (const char* Key) } - + static int SkipWhite (void) /* Skip white space in the input stream, reading and preprocessing new lines * if necessary. Return 0 if end of file is reached, return 1 otherwise. @@ -142,7 +183,7 @@ static int SkipWhite (void) } Preprocess (); } - if (CurC == ' ' || CurC == '\r') { + if (IsSpace (CurC)) { NextChar (); } else { return 1; @@ -162,7 +203,7 @@ void SymName (char* s) *s++ = CurC; } NextChar (); - } while (IsIdent (CurC) || isdigit (CurC)); + } while (IsIdent (CurC) || IsDigit (CurC)); *s = '\0'; } @@ -181,107 +222,108 @@ int IsSym (char *s) -static void unknown (char C) +static void UnknownChar (char C) /* Error message for unknown character */ { - Error (ERR_INVALID_CHAR, C); + Error ("Invalid input character with code %02X", C & 0xFF); NextChar (); /* Skip */ } -static unsigned hexval (int c) -/* Convert a hex digit into a value */ -{ - if (!isxdigit (c)) { - Error (ERR_ILLEGAL_HEX_DIGIT); - } - if (isdigit (c)) { - return c - '0'; - } else { - return toupper (c) - 'A' + 10; - } -} - - - static void SetTok (int tok) -/* set nxttok and bump line ptr */ +/* Set NextTok.Tok and bump line ptr */ { - nxttok = tok; + NextTok.Tok = tok; NextChar (); } -static int SignExtendChar (int C) -/* Do correct sign extension of a character */ -{ - if (SignedChars && (C & 0x80) != 0) { - return C | ~0xFF; - } else { - return C & 0xFF; - } -} - - - static int ParseChar (void) /* Parse a character. Converts \n into EOL, etc. */ { - int i; - unsigned val; + int I; + unsigned Val; int C; /* Check for escape chars */ if (CurC == '\\') { NextChar (); switch (CurC) { + case '?': + C = '\?'; + break; + case 'a': + C = '\a'; + break; case 'b': C = '\b'; - break; + break; case 'f': - C = '\f'; - break; + C = '\f'; + break; case 'r': - C = '\r'; - break; + C = '\r'; + break; case 'n': - C = '\n'; - break; + C = '\n'; + break; case 't': - C = '\t'; - break; + C = '\t'; + break; + case 'v': + C = '\v'; + break; case '\"': - C = '\"'; - break; + C = '\"'; + break; case '\'': - C = '\''; - break; + C = '\''; + break; case '\\': - C = '\\'; - break; + C = '\\'; + break; case 'x': case 'X': - /* Hex character constant */ - NextChar (); - val = hexval (CurC) << 4; - NextChar (); - C = val | hexval (CurC); /* Do not translate */ - break; + /* Hex character constant */ + NextChar (); + Val = HexVal (CurC) << 4; + NextChar (); + C = Val | HexVal (CurC); /* Do not translate */ + break; case '0': case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': /* Octal constant */ - i = 0; - C = CurC - '0'; - while (NextC >= '0' && NextC <= '7' && i++ < 4) { - NextChar (); - C = (C << 3) | (CurC - '0'); + I = 0; + Val = CurC - '0'; + while (NextC >= '0' && NextC <= '7' && ++I <= 3) { + NextChar (); + Val = (Val << 3) | (CurC - '0'); } + C = (int) Val; + if (Val >= 256) { + Error ("Character constant out of range"); + C = ' '; + } break; default: - Error (ERR_ILLEGAL_CHARCONST); + Error ("Illegal character constant"); C = ' '; + /* Try to do error recovery, otherwise the compiler will spit + * out thousands of errors in this place and abort. + */ + if (CurC != '\'' && CurC != '\0') { + while (NextC != '\'' && NextC != '\"' && NextC != '\0') { + NextChar (); + } + } break; } } else { @@ -310,16 +352,20 @@ static void CharConst (void) /* Check for closing quote */ if (CurC != '\'') { - Error (ERR_QUOTE_EXPECTED); + Error ("`\'' expected"); } else { /* Skip the quote */ NextChar (); } /* Setup values and attributes */ - nxttok = TOK_CCONST; - nxtval = SignExtendChar (ctrans (C)); /* Translate into target charset */ - nxttype = type_int; /* Character constants have type int */ + NextTok.Tok = TOK_CCONST; + + /* Translate into target charset */ + NextTok.IVal = SignExtendChar (TgtTranslateChar (C)); + + /* Character constants have type int */ + NextTok.Type = type_int; } @@ -327,8 +373,8 @@ static void CharConst (void) static void StringConst (void) /* Parse a quoted string */ { - nxtval = GetLiteralOffs (); - nxttok = TOK_SCONST; + NextTok.IVal = GetLiteralPoolOffs (); + NextTok.Tok = TOK_SCONST; /* Be sure to concatenate strings */ while (CurC == '\"') { @@ -338,7 +384,7 @@ static void StringConst (void) while (CurC != '\"') { if (CurC == '\0') { - Error (ERR_UNEXPECTED_NEWLINE); + Error ("Unexpected newline"); break; } AddLiteralChar (ParseChar ()); @@ -363,30 +409,49 @@ void NextToken (void) { ident token; + /* We have to skip white space here before shifting tokens, since the + * tokens and the current line info is invalid at startup and will get + * initialized by reading the first time from the file. Remember if + * we were at end of input and handle that later. + */ + int GotEOF = (SkipWhite() == 0); + /* Current token is the lookahead token */ + if (CurTok.LI) { + ReleaseLineInfo (CurTok.LI); + } CurTok = NextTok; + /* When reading the first time from the file, the line info in NextTok, + * which was copied to CurTok is invalid. Since the information from + * the token is used for error messages, we must make it valid. + */ + if (CurTok.LI == 0) { + CurTok.LI = UseLineInfo (GetCurLineInfo ()); + } + /* Remember the starting position of the next token */ - NextTok.Pos = GetCurrentLine(); + NextTok.LI = UseLineInfo (GetCurLineInfo ()); - /* Skip spaces and read the next line if needed */ - if (SkipWhite () == 0) { + /* Now handle end of input. */ + if (GotEOF) { /* End of file reached */ - nxttok = TOK_CEOF; + NextTok.Tok = TOK_CEOF; return; } /* Determine the next token from the lookahead */ - if (isdigit (CurC)) { + if (IsDigit (CurC)) { /* A number */ int HaveSuffix; /* True if we have a type suffix */ unsigned types; /* Possible types */ - unsigned base; - unsigned long k; /* Value */ + unsigned Base; + unsigned DigitVal; + unsigned long k; /* Value */ k = 0; - base = 10; + Base = 10; types = IT_INT | IT_LONG | IT_ULONG; if (CurC == '0') { @@ -395,23 +460,30 @@ void NextToken (void) /* gobble 0 and examin next char */ NextChar (); if (toupper (CurC) == 'X') { - base = 16; - nxttype = type_uint; + Base = 16; + NextTok.Type = type_uint; NextChar (); /* gobble "x" */ } else { - base = 8; + Base = 8; } } - while (1) { - if (isdigit (CurC)) { - k = k * base + (CurC - '0'); - } else if (base == 16 && isxdigit (CurC)) { - k = (k << 4) + hexval (CurC); - } else { - break; /* not digit */ - } - NextChar (); /* gobble char */ - } + while (IsXDigit (CurC) && (DigitVal = HexVal (CurC)) < Base) { + k = k * Base + DigitVal; + NextChar (); + } + /* Check for errorneous digits */ + if (Base == 8 && IsDigit (CurC)) { + Error ("Numeric constant contains digits beyond the radix"); + /* Do error recovery */ + do { + NextChar (); + } while (IsDigit (CurC)); + } else if (Base != 16 && IsXDigit (CurC)) { + Error ("Nondigits in number and not hexadecimal"); + do { + NextChar (); + } while (IsXDigit (CurC)); + } /* Check for a suffix */ HaveSuffix = 1; @@ -446,7 +518,7 @@ void NextToken (void) * warning. */ if (k <= 0xFFFF && (types & IT_UINT) == 0 && !HaveSuffix) { - Warning (WARN_CONSTANT_IS_LONG); + Warning ("Constant is long"); } } if (k > 0xFFFF) { @@ -460,50 +532,45 @@ void NextToken (void) /* Now set the type string to the smallest type in types */ if (types & IT_INT) { - nxttype = type_int; + NextTok.Type = type_int; } else if (types & IT_UINT) { - nxttype = type_uint; + NextTok.Type = type_uint; } else if (types & IT_LONG) { - nxttype = type_long; + NextTok.Type = type_long; } else { - nxttype = type_ulong; + NextTok.Type = type_ulong; } /* Set the value and the token */ - nxtval = k; - nxttok = TOK_ICONST; + NextTok.IVal = k; + NextTok.Tok = TOK_ICONST; return; } if (IsSym (token)) { /* Check for a keyword */ - if ((nxttok = FindKey (token)) != TOK_IDENT) { + if ((NextTok.Tok = FindKey (token)) != TOK_IDENT) { /* Reserved word found */ return; } /* No reserved word, check for special symbols */ if (token [0] == '_') { /* Special symbols */ - if (strcmp (token, "__FILE__") == 0) { - nxtval = AddLiteral (GetCurrentFile()); - nxttok = TOK_SCONST; + if (strcmp (token, "__FILE__") == 0) { + NextTok.IVal = AddLiteral (GetCurrentFile()); + NextTok.Tok = TOK_SCONST; return; } else if (strcmp (token, "__LINE__") == 0) { - nxttok = TOK_ICONST; - nxtval = GetCurrentLine(); - nxttype = type_int; - return; - } else if (strcmp (token, "__fixargs__") == 0) { - nxttok = TOK_ICONST; - nxtval = GetParamSize (CurrentFunc); - nxttype = type_uint; + NextTok.Tok = TOK_ICONST; + NextTok.IVal = GetCurrentLine(); + NextTok.Type = type_int; return; } else if (strcmp (token, "__func__") == 0) { /* __func__ is only defined in functions */ if (CurrentFunc) { - nxtval = AddLiteral (GetFuncName (CurrentFunc)); - nxttok = TOK_SCONST; + NextTok.IVal = AddLiteral (F_GetFuncName (CurrentFunc)); + NextTok.Tok = TOK_SCONST; return; } } @@ -523,7 +590,7 @@ void NextToken (void) if (CurC == '=') { SetTok (TOK_NE); } else { - nxttok = TOK_BOOL_NOT; + NextTok.Tok = TOK_BOOL_NOT; } break; @@ -536,7 +603,7 @@ void NextToken (void) if (CurC == '=') { SetTok (TOK_MOD_ASSIGN); } else { - nxttok = TOK_MOD; + NextTok.Tok = TOK_MOD; } break; @@ -550,7 +617,7 @@ void NextToken (void) SetTok (TOK_AND_ASSIGN); break; default: - nxttok = TOK_AND; + NextTok.Tok = TOK_AND; } break; @@ -571,7 +638,7 @@ void NextToken (void) if (CurC == '=') { SetTok (TOK_MUL_ASSIGN); } else { - nxttok = TOK_STAR; + NextTok.Tok = TOK_STAR; } break; @@ -585,7 +652,7 @@ void NextToken (void) SetTok (TOK_PLUS_ASSIGN); break; default: - nxttok = TOK_PLUS; + NextTok.Tok = TOK_PLUS; } break; @@ -606,7 +673,7 @@ void NextToken (void) SetTok (TOK_PTR_REF); break; default: - nxttok = TOK_MINUS; + NextTok.Tok = TOK_MINUS; } break; @@ -617,10 +684,10 @@ void NextToken (void) if (CurC == '.') { SetTok (TOK_ELLIPSIS); } else { - unknown (CurC); + UnknownChar (CurC); } } else { - nxttok = TOK_DOT; + NextTok.Tok = TOK_DOT; } break; @@ -629,7 +696,7 @@ void NextToken (void) if (CurC == '=') { SetTok (TOK_DIV_ASSIGN); } else { - nxttok = TOK_DIV; + NextTok.Tok = TOK_DIV; } break; @@ -652,11 +719,11 @@ void NextToken (void) if (CurC == '=') { SetTok (TOK_SHL_ASSIGN); } else { - nxttok = TOK_SHL; + NextTok.Tok = TOK_SHL; } break; default: - nxttok = TOK_LT; + NextTok.Tok = TOK_LT; } break; @@ -665,7 +732,7 @@ void NextToken (void) if (CurC == '=') { SetTok (TOK_EQ); } else { - nxttok = TOK_ASSIGN; + NextTok.Tok = TOK_ASSIGN; } break; @@ -680,11 +747,11 @@ void NextToken (void) if (CurC == '=') { SetTok (TOK_SHR_ASSIGN); } else { - nxttok = TOK_SHR; + NextTok.Tok = TOK_SHR; } break; default: - nxttok = TOK_GT; + NextTok.Tok = TOK_GT; } break; @@ -705,7 +772,7 @@ void NextToken (void) if (CurC == '=') { SetTok (TOK_XOR_ASSIGN); } else { - nxttok = TOK_XOR; + NextTok.Tok = TOK_XOR; } break; @@ -723,7 +790,7 @@ void NextToken (void) SetTok (TOK_OR_ASSIGN); break; default: - nxttok = TOK_OR; + NextTok.Tok = TOK_OR; } break; @@ -735,20 +802,8 @@ void NextToken (void) SetTok (TOK_COMP); break; - case '#': - /* Skip it and following whitespace */ - do { - NextChar (); - } while (CurC == ' '); - if (!IsSym (token) || strcmp (token, "pragma") != 0) { - /* OOPS - should not happen */ - Error (ERR_CPP_DIRECTIVE_EXPECTED); - } - nxttok = TOK_PRAGMA; - break; - default: - unknown (CurC); + UnknownChar (CurC); } @@ -756,88 +811,134 @@ void NextToken (void) -void Consume (token_t Token, unsigned ErrNum) +void SkipTokens (const token_t* TokenList, unsigned TokenCount) +/* Skip tokens until we reach TOK_CEOF or a token in the given token list. + * This routine is used for error recovery. + */ +{ + while (CurTok.Tok != TOK_CEOF) { + + /* Check if the current token is in the token list */ + unsigned I; + for (I = 0; I < TokenCount; ++I) { + if (CurTok.Tok == TokenList[I]) { + /* Found a token in the list */ + return; + } + } + + /* Not in the list: Skip it */ + NextToken (); + + } +} + + + +int Consume (token_t Token, const char* ErrorMsg) /* Eat token if it is the next in the input stream, otherwise print an error - * message. + * message. Returns true if the token was found and false otherwise. */ { - if (curtok == Token) { + if (CurTok.Tok == Token) { NextToken (); + return 1; } else { - Error (ErrNum); + Error (ErrorMsg); + return 0; } } -void ConsumeColon (void) +int ConsumeColon (void) /* Check for a colon and skip it. */ { - Consume (TOK_COLON, ERR_COLON_EXPECTED); + return Consume (TOK_COLON, "`:' expected"); } -void ConsumeSemi (void) +int ConsumeSemi (void) /* Check for a semicolon and skip it. */ { /* Try do be smart about typos... */ - if (curtok == TOK_SEMI) { - NextToken (); + if (CurTok.Tok == TOK_SEMI) { + NextToken (); + return 1; + } else { + Error ("`;' expected"); + if (CurTok.Tok == TOK_COLON || CurTok.Tok == TOK_COMMA) { + NextToken (); + } + return 0; + } +} + + + +int ConsumeComma (void) +/* Check for a comma and skip it. */ +{ + /* Try do be smart about typos... */ + if (CurTok.Tok == TOK_COMMA) { + NextToken (); + return 1; } else { - Error (ERR_SEMICOLON_EXPECTED); - if (curtok == TOK_COLON || curtok == TOK_COMMA) { + Error ("`,' expected"); + if (CurTok.Tok == TOK_SEMI) { NextToken (); } + return 0; } } -void ConsumeLParen (void) +int ConsumeLParen (void) /* Check for a left parenthesis and skip it */ { - Consume (TOK_LPAREN, ERR_LPAREN_EXPECTED); + return Consume (TOK_LPAREN, "`(' expected"); } -void ConsumeRParen (void) +int ConsumeRParen (void) /* Check for a right parenthesis and skip it */ { - Consume (TOK_RPAREN, ERR_RPAREN_EXPECTED); + return Consume (TOK_RPAREN, "`)' expected"); } -void ConsumeLBrack (void) +int ConsumeLBrack (void) /* Check for a left bracket and skip it */ { - Consume (TOK_LBRACK, ERR_LBRACK_EXPECTED); + return Consume (TOK_LBRACK, "`[' expected"); } -void ConsumeRBrack (void) +int ConsumeRBrack (void) /* Check for a right bracket and skip it */ { - Consume (TOK_RBRACK, ERR_RBRACK_EXPECTED); + return Consume (TOK_RBRACK, "`]' expected"); } -void ConsumeLCurly (void) +int ConsumeLCurly (void) /* Check for a left curly brace and skip it */ { - Consume (TOK_LCURLY, ERR_LCURLY_EXPECTED); + return Consume (TOK_LCURLY, "`{' expected"); } -void ConsumeRCurly (void) +int ConsumeRCurly (void) /* Check for a right curly brace and skip it */ { - Consume (TOK_RCURLY, ERR_RCURLY_EXPECTED); + return Consume (TOK_RCURLY, "`}' expected"); }