-/*
- * scanner.c
- *
- * Ullrich von Bassewitz, 07.06.1998
- */
+/*****************************************************************************/
+/* */
+/* scanner.c */
+/* */
+/* Source file line info structure */
+/* */
+/* */
+/* */
+/* (C) 1998-2001 Ullrich von Bassewitz */
+/* Wacholderweg 14 */
+/* D-70597 Stuttgart */
+/* EMail: uz@musoftware.de */
+/* */
+/* */
+/* This software is provided 'as-is', without any expressed or implied */
+/* warranty. In no event will the authors be held liable for any damages */
+/* arising from the use of this software. */
+/* */
+/* Permission is granted to anyone to use this software for any purpose, */
+/* including commercial applications, and to alter it and redistribute it */
+/* freely, subject to the following restrictions: */
+/* */
+/* 1. The origin of this software must not be misrepresented; you must not */
+/* claim that you wrote the original software. If you use this software */
+/* in a product, an acknowledgment in the product documentation would be */
+/* appreciated but is not required. */
+/* 2. Altered source versions must be plainly marked as such, and must not */
+/* be misrepresented as being the original software. */
+/* 3. This notice may not be removed or altered from any source */
+/* distribution. */
+/* */
+/*****************************************************************************/
#include <errno.h>
#include <ctype.h>
-#include "ctrans.h"
+/* common */
+#include "chartype.h"
+#include "tgttrans.h"
+
+/* cc65 */
#include "datatype.h"
#include "error.h"
#include "function.h"
#include "global.h"
#include "ident.h"
#include "input.h"
-#include "io.h"
#include "litpool.h"
#include "preproc.h"
#include "symtab.h"
#define TT_EXT 1 /* cc65 extension */
/* Token table */
-static struct Keyword {
+static const struct Keyword {
char* Key; /* Keyword name */
unsigned char Tok; /* The token */
unsigned char Type; /* Token type */
} Keywords [] = {
+ { "__A__", TOK_A, TT_C },
{ "__AX__", TOK_AX, TT_C },
{ "__EAX__", TOK_EAX, TT_C },
+ { "__X__", TOK_X, TT_C },
+ { "__Y__", TOK_Y, TT_C },
{ "__asm__", TOK_ASM, TT_C },
{ "__attribute__", TOK_ATTRIBUTE, TT_C },
+ { "__far__", TOK_FAR, TT_C },
{ "__fastcall__", TOK_FASTCALL, TT_C },
{ "asm", TOK_ASM, TT_EXT },
{ "auto", TOK_AUTO, TT_C },
{ "else", TOK_ELSE, TT_C },
{ "enum", TOK_ENUM, TT_C },
{ "extern", TOK_EXTERN, TT_C },
+ { "far", TOK_FAR, TT_EXT },
{ "fastcall", TOK_FASTCALL, TT_EXT },
{ "float", TOK_FLOAT, TT_C },
{ "for", TOK_FOR, TT_C },
-static int FindKey (char* Key)
+static int FindKey (const char* Key)
/* Find a keyword and return the token. Return IDENT if the token is not a
* keyword.
*/
-static int skipwhite (void)
+static int SkipWhite (void)
/* Skip white space in the input stream, reading and preprocessing new lines
* if necessary. Return 0 if end of file is reached, return 1 otherwise.
*/
{
while (1) {
- while (*lptr == 0) {
+ while (CurC == 0) {
if (NextLine () == 0) {
return 0;
}
- preprocess ();
+ Preprocess ();
}
- if (*lptr == ' ' || *lptr == '\r') {
- ++lptr;
+ if (IsSpace (CurC)) {
+ NextChar ();
} else {
return 1;
}
-void symname (char *s)
+void SymName (char* s)
/* Get symbol from input stream */
{
unsigned k = 0;
do {
- if (k != MAX_IDENTLEN) {
- ++k;
- *s++ = *lptr;
+ if (k != MAX_IDENTLEN) {
+ ++k;
+ *s++ = CurC;
}
- ++lptr;
- } while (IsIdent (*lptr) || isdigit (*lptr));
+ NextChar ();
+ } while (IsIdent (CurC) || IsDigit (CurC));
*s = '\0';
}
-int issym (char *s)
+int IsSym (char *s)
/* Get symbol from input stream or return 0 if not a symbol. */
{
- if (IsIdent (*lptr)) {
- symname (s);
+ if (IsIdent (CurC)) {
+ SymName (s);
return 1;
} else {
return 0;
-static void unknown (unsigned char c)
+static void UnknownChar (char C)
/* Error message for unknown character */
{
- Error (ERR_INVALID_CHAR, c);
- gch (); /* Skip */
+ Error ("Invalid input character with code %02X", C & 0xFF);
+ NextChar (); /* Skip */
}
static unsigned hexval (int c)
/* Convert a hex digit into a value */
{
- if (!isxdigit (c)) {
- Error (ERR_ILLEGAL_HEX_DIGIT);
+ if (!IsXDigit (c)) {
+ Error ("Invalid hexadecimal digit: `%c'", c);
}
- if (isdigit (c)) {
+ if (IsDigit (c)) {
return c - '0';
} else {
return toupper (c) - 'A' + 10;
static void SetTok (int tok)
-/* set nxttok and bump line ptr */
+/* Set NextTok.Tok and bump line ptr */
{
- nxttok = tok;
- ++lptr;
+ NextTok.Tok = tok;
+ NextChar ();
}
-static int parsechar (int c)
+static int ParseChar (void)
/* Parse a character. Converts \n into EOL, etc. */
{
int i;
- int val;
+ unsigned val;
+ int C;
/* Check for escape chars */
- if (c == '\\') {
- switch (c = gch ()) {
+ if (CurC == '\\') {
+ NextChar ();
+ switch (CurC) {
case 'b':
- c = '\b';
+ C = '\b';
break;
- case 'f':
- c = '\f';
+ case 'f':
+ C = '\f';
break;
case 'r':
- c = '\r';
+ C = '\r';
break;
case 'n':
- c = '\n';
+ C = '\n';
break;
case 't':
- c = '\t';
+ C = '\t';
break;
case '\"':
- c = '\"';
+ C = '\"';
break;
case '\'':
- c = '\'';
+ C = '\'';
break;
case '\\':
- c = '\\';
+ C = '\\';
break;
case 'x':
case 'X':
/* Hex character constant */
- val = hexval (gch ()) << 4;
- c = val | hexval (gch ()); /* Do not translate */
+ NextChar ();
+ val = hexval (CurC) << 4;
+ NextChar ();
+ C = val | hexval (CurC); /* Do not translate */
break;
case '0':
case '1':
/* Octal constant */
i = 0;
- val = c - '0';
- while ((c = *lptr) >= '0' && c <= '7' && i++ < 4) {
- val = (val << 3) | (c - '0');
- gch ();
- }
- c = val; /* Do not translate */
+ C = CurC - '0';
+ while (NextC >= '0' && NextC <= '7' && i++ < 4) {
+ NextChar ();
+ C = (C << 3) | (CurC - '0');
+ }
+ break;
+ default:
+ Error ("Illegal character constant");
+ C = ' ';
break;
- default:
- Error (ERR_ILLEGAL_CHARCONST);
- }
+ }
+ } else {
+ C = CurC;
}
+ /* Skip the character read */
+ NextChar ();
+
/* Do correct sign extension */
- return SignExtendChar (c);
+ return SignExtendChar (C);
}
static void CharConst (void)
/* Parse a character constant. */
{
- int c;
+ int C;
/* Skip the quote */
- ++lptr;
+ NextChar ();
/* Get character */
- c = parsechar (cgch ());
+ C = ParseChar ();
/* Check for closing quote */
- if (cgch () != '\'') {
- Error (ERR_QUOTE_EXPECTED);
+ if (CurC != '\'') {
+ Error ("`\'' expected");
+ } else {
+ /* Skip the quote */
+ NextChar ();
}
/* Setup values and attributes */
- nxttok = TOK_CCONST;
- nxtval = SignExtendChar (ctrans (c)); /* Translate into target charset */
- nxttype = type_int; /* Character constants have type int */
+ NextTok.Tok = TOK_CCONST;
+
+ /* Translate into target charset */
+ NextTok.IVal = SignExtendChar (TgtTranslateChar (C));
+
+ /* Character constants have type int */
+ NextTok.Type = type_int;
}
static void StringConst (void)
/* Parse a quoted string */
{
- nxtval = GetLiteralOffs ();
- nxttok = TOK_SCONST;
+ NextTok.IVal = GetLiteralPoolOffs ();
+ NextTok.Tok = TOK_SCONST;
/* Be sure to concatenate strings */
- while (*lptr == '\"') {
+ while (CurC == '\"') {
/* Skip the quote char */
- ++lptr;
+ NextChar ();
- while (*lptr != '\"') {
- if (*lptr == 0) {
- Error (ERR_UNEXPECTED_NEWLINE);
+ while (CurC != '\"') {
+ if (CurC == '\0') {
+ Error ("Unexpected newline");
break;
}
- AddLiteralChar (parsechar (gch()));
+ AddLiteralChar (ParseChar ());
}
/* Skip closing quote char if there was one */
- cgch ();
+ NextChar ();
/* Skip white space, read new input */
- skipwhite ();
+ SkipWhite ();
}
void NextToken (void)
/* Get next token from input stream */
{
- char c;
ident token;
+ /* We have to skip white space here before shifting tokens, since the
+ * tokens and the current line info is invalid at startup and will get
+ * initialized by reading the first time from the file. Remember if
+ * we were at end of input and handle that later.
+ */
+ int GotEOF = (SkipWhite() == 0);
+
/* Current token is the lookahead token */
+ if (CurTok.LI) {
+ ReleaseLineInfo (CurTok.LI);
+ }
CurTok = NextTok;
/* Remember the starting position of the next token */
- NextTok.Pos = GetCurrentLine();
+ NextTok.LI = UseLineInfo (GetCurLineInfo ());
- /* Skip spaces and read the next line if needed */
- if (skipwhite () == 0) {
+ /* Now handle end of input. */
+ if (GotEOF) {
/* End of file reached */
- nxttok = TOK_CEOF;
+ NextTok.Tok = TOK_CEOF;
return;
}
/* Determine the next token from the lookahead */
- c = *lptr;
- if (isdigit (c)) {
+ if (IsDigit (CurC)) {
/* A number */
int HaveSuffix; /* True if we have a type suffix */
base = 10;
types = IT_INT | IT_LONG | IT_ULONG;
- if (c == '0') {
+ if (CurC == '0') {
/* Octal or hex constants may also be of type unsigned int */
types = IT_INT | IT_UINT | IT_LONG | IT_ULONG;
/* gobble 0 and examin next char */
- if (toupper (*++lptr) == 'X') {
+ NextChar ();
+ if (toupper (CurC) == 'X') {
base = 16;
- nxttype = type_uint;
- ++lptr; /* gobble "x" */
+ NextTok.Type = type_uint;
+ NextChar (); /* gobble "x" */
} else {
base = 8;
}
}
while (1) {
- c = *lptr;
- if (isdigit (c)) {
- k = k * base + (c - '0');
- } else if (base == 16 && isxdigit (c)) {
- k = (k << 4) + hexval (c);
+ if (IsDigit (CurC)) {
+ k = k * base + (CurC - '0');
+ } else if (base == 16 && IsXDigit (CurC)) {
+ k = (k << 4) + hexval (CurC);
} else {
break; /* not digit */
}
- ++lptr; /* gobble char */
+ NextChar (); /* gobble char */
}
/* Check for a suffix */
HaveSuffix = 1;
- c = toupper (*lptr);
- if (c == 'U') {
+ if (CurC == 'u' || CurC == 'U') {
/* Unsigned type */
- ++lptr;
- if (toupper (*lptr) != 'L') {
+ NextChar ();
+ if (toupper (CurC) != 'L') {
types = IT_UINT | IT_ULONG;
} else {
- ++lptr;
+ NextChar ();
types = IT_ULONG;
}
- } else if (c == 'L') {
+ } else if (CurC == 'l' || CurC == 'L') {
/* Long type */
- ++lptr;
- if (toupper (*lptr) != 'U') {
+ NextChar ();
+ if (toupper (CurC) != 'U') {
types = IT_LONG | IT_ULONG;
} else {
- ++lptr;
+ NextChar ();
types = IT_ULONG;
}
} else {
* warning.
*/
if (k <= 0xFFFF && (types & IT_UINT) == 0 && !HaveSuffix) {
- Warning (WARN_CONSTANT_IS_LONG);
+ Warning ("Constant is long");
}
}
if (k > 0xFFFF) {
/* Now set the type string to the smallest type in types */
if (types & IT_INT) {
- nxttype = type_int;
+ NextTok.Type = type_int;
} else if (types & IT_UINT) {
- nxttype = type_uint;
+ NextTok.Type = type_uint;
} else if (types & IT_LONG) {
- nxttype = type_long;
+ NextTok.Type = type_long;
} else {
- nxttype = type_ulong;
+ NextTok.Type = type_ulong;
}
/* Set the value and the token */
- nxtval = k;
- nxttok = TOK_ICONST;
+ NextTok.IVal = k;
+ NextTok.Tok = TOK_ICONST;
return;
}
- if (issym (token)) {
+ if (IsSym (token)) {
/* Check for a keyword */
- if ((nxttok = FindKey (token)) != TOK_IDENT) {
+ if ((NextTok.Tok = FindKey (token)) != TOK_IDENT) {
/* Reserved word found */
return;
}
if (token [0] == '_') {
/* Special symbols */
if (strcmp (token, "__FILE__") == 0) {
- nxtval = AddLiteral (GetCurrentFile());
- nxttok = TOK_SCONST;
+ NextTok.IVal = AddLiteral (GetCurrentFile());
+ NextTok.Tok = TOK_SCONST;
return;
} else if (strcmp (token, "__LINE__") == 0) {
- nxttok = TOK_ICONST;
- nxtval = GetCurrentLine();
- nxttype = type_int;
- return;
- } else if (strcmp (token, "__fixargs__") == 0) {
- nxttok = TOK_ICONST;
- nxtval = GetParamSize (CurrentFunc);
- nxttype = type_uint;
+ NextTok.Tok = TOK_ICONST;
+ NextTok.IVal = GetCurrentLine();
+ NextTok.Type = type_int;
return;
} else if (strcmp (token, "__func__") == 0) {
/* __func__ is only defined in functions */
if (CurrentFunc) {
- nxtval = AddLiteral (GetFuncName (CurrentFunc));
- nxttok = TOK_SCONST;
+ NextTok.IVal = AddLiteral (GetFuncName (CurrentFunc));
+ NextTok.Tok = TOK_SCONST;
return;
}
}
}
/* Monstrous switch statement ahead... */
- switch (c) {
+ switch (CurC) {
case '!':
- if (*++lptr == '=') {
+ NextChar ();
+ if (CurC == '=') {
SetTok (TOK_NE);
} else {
- nxttok = TOK_BOOL_NOT;
+ NextTok.Tok = TOK_BOOL_NOT;
}
break;
break;
case '%':
- if (*++lptr == '=') {
+ NextChar ();
+ if (CurC == '=') {
SetTok (TOK_MOD_ASSIGN);
} else {
- nxttok = TOK_MOD;
+ NextTok.Tok = TOK_MOD;
}
break;
case '&':
- switch (*++lptr) {
+ NextChar ();
+ switch (CurC) {
case '&':
SetTok (TOK_BOOL_AND);
break;
SetTok (TOK_AND_ASSIGN);
break;
default:
- nxttok = TOK_AND;
+ NextTok.Tok = TOK_AND;
}
break;
break;
case '*':
- if (*++lptr == '=') {
+ NextChar ();
+ if (CurC == '=') {
SetTok (TOK_MUL_ASSIGN);
} else {
- nxttok = TOK_STAR;
+ NextTok.Tok = TOK_STAR;
}
break;
case '+':
- switch (*++lptr) {
+ NextChar ();
+ switch (CurC) {
case '+':
SetTok (TOK_INC);
break;
SetTok (TOK_PLUS_ASSIGN);
break;
default:
- nxttok = TOK_PLUS;
+ NextTok.Tok = TOK_PLUS;
}
break;
break;
case '-':
- switch (*++lptr) {
+ NextChar ();
+ switch (CurC) {
case '-':
SetTok (TOK_DEC);
break;
SetTok (TOK_PTR_REF);
break;
default:
- nxttok = TOK_MINUS;
+ NextTok.Tok = TOK_MINUS;
}
break;
case '.':
- if (*++lptr == '.') {
- if (*++lptr == '.') {
+ NextChar ();
+ if (CurC == '.') {
+ NextChar ();
+ if (CurC == '.') {
SetTok (TOK_ELLIPSIS);
} else {
- unknown (*lptr);
+ UnknownChar (CurC);
}
} else {
- nxttok = TOK_DOT;
+ NextTok.Tok = TOK_DOT;
}
break;
case '/':
- if (*++lptr == '=') {
+ NextChar ();
+ if (CurC == '=') {
SetTok (TOK_DIV_ASSIGN);
} else {
- nxttok = TOK_DIV;
+ NextTok.Tok = TOK_DIV;
}
break;
break;
case '<':
- switch (*++lptr) {
+ NextChar ();
+ switch (CurC) {
case '=':
SetTok (TOK_LE);
break;
case '<':
- if (*++lptr == '=') {
+ NextChar ();
+ if (CurC == '=') {
SetTok (TOK_SHL_ASSIGN);
} else {
- nxttok = TOK_SHL;
+ NextTok.Tok = TOK_SHL;
}
break;
default:
- nxttok = TOK_LT;
+ NextTok.Tok = TOK_LT;
}
break;
case '=':
- if (*++lptr == '=') {
+ NextChar ();
+ if (CurC == '=') {
SetTok (TOK_EQ);
} else {
- nxttok = TOK_ASSIGN;
+ NextTok.Tok = TOK_ASSIGN;
}
break;
case '>':
- switch (*++lptr) {
+ NextChar ();
+ switch (CurC) {
case '=':
SetTok (TOK_GE);
break;
case '>':
- if (*++lptr == '=') {
+ NextChar ();
+ if (CurC == '=') {
SetTok (TOK_SHR_ASSIGN);
} else {
- nxttok = TOK_SHR;
+ NextTok.Tok = TOK_SHR;
}
break;
default:
- nxttok = TOK_GT;
+ NextTok.Tok = TOK_GT;
}
break;
break;
case '^':
- if (*++lptr == '=') {
+ NextChar ();
+ if (CurC == '=') {
SetTok (TOK_XOR_ASSIGN);
} else {
- nxttok = TOK_XOR;
+ NextTok.Tok = TOK_XOR;
}
break;
break;
case '|':
- switch (*++lptr) {
+ NextChar ();
+ switch (CurC) {
case '|':
SetTok (TOK_BOOL_OR);
break;
SetTok (TOK_OR_ASSIGN);
break;
default:
- nxttok = TOK_OR;
+ NextTok.Tok = TOK_OR;
}
break;
break;
case '#':
- while (*++lptr == ' ') ; /* Skip it and following whitespace */
- if (!issym (token) || strcmp (token, "pragma") != 0) {
+ /* Skip it and following whitespace */
+ do {
+ NextChar ();
+ } while (CurC == ' ');
+ if (!IsSym (token) || strcmp (token, "pragma") != 0) {
/* OOPS - should not happen */
- Error (ERR_CPP_DIRECTIVE_EXPECTED);
+ Error ("Preprocessor directive expected");
}
- nxttok = TOK_PRAGMA;
+ NextTok.Tok = TOK_PRAGMA;
break;
default:
- unknown (c);
+ UnknownChar (CurC);
}
-void Consume (token_t Token, unsigned ErrNum)
+void SkipTokens (const token_t* TokenList, unsigned TokenCount)
+/* Skip tokens until we reach TOK_CEOF or a token in the given token list.
+ * This routine is used for error recovery.
+ */
+{
+ while (CurTok.Tok != TOK_CEOF) {
+
+ /* Check if the current token is in the token list */
+ unsigned I;
+ for (I = 0; I < TokenCount; ++I) {
+ if (CurTok.Tok == TokenList[I]) {
+ /* Found a token in the list */
+ return;
+ }
+ }
+
+ /* Not in the list: Skip it */
+ NextToken ();
+
+ }
+}
+
+
+
+void Consume (token_t Token, const char* ErrorMsg)
/* Eat token if it is the next in the input stream, otherwise print an error
* message.
*/
{
- if (curtok == Token) {
+ if (CurTok.Tok == Token) {
NextToken ();
} else {
- Error (ErrNum);
+ Error (ErrorMsg);
}
}
void ConsumeColon (void)
/* Check for a colon and skip it. */
{
- Consume (TOK_COLON, ERR_COLON_EXPECTED);
+ Consume (TOK_COLON, "`:' expected");
}
/* Check for a semicolon and skip it. */
{
/* Try do be smart about typos... */
- if (curtok == TOK_SEMI) {
+ if (CurTok.Tok == TOK_SEMI) {
+ NextToken ();
+ } else {
+ Error ("`;' expected");
+ if (CurTok.Tok == TOK_COLON || CurTok.Tok == TOK_COMMA) {
+ NextToken ();
+ }
+ }
+}
+
+
+
+void ConsumeComma (void)
+/* Check for a comma and skip it. */
+{
+ /* Try do be smart about typos... */
+ if (CurTok.Tok == TOK_COMMA) {
NextToken ();
} else {
- Error (ERR_SEMICOLON_EXPECTED);
- if (curtok == TOK_COLON || curtok == TOK_COMMA) {
+ Error ("`,' expected");
+ if (CurTok.Tok == TOK_SEMI) {
NextToken ();
}
}
void ConsumeLParen (void)
/* Check for a left parenthesis and skip it */
{
- Consume (TOK_LPAREN, ERR_LPAREN_EXPECTED);
+ Consume (TOK_LPAREN, "`(' expected");
}
void ConsumeRParen (void)
/* Check for a right parenthesis and skip it */
{
- Consume (TOK_RPAREN, ERR_RPAREN_EXPECTED);
+ Consume (TOK_RPAREN, "`)' expected");
}
void ConsumeLBrack (void)
/* Check for a left bracket and skip it */
{
- Consume (TOK_LBRACK, ERR_LBRACK_EXPECTED);
+ Consume (TOK_LBRACK, "`[' expected");
}
void ConsumeRBrack (void)
/* Check for a right bracket and skip it */
{
- Consume (TOK_RBRACK, ERR_RBRACK_EXPECTED);
+ Consume (TOK_RBRACK, "`]' expected");
}
void ConsumeLCurly (void)
/* Check for a left curly brace and skip it */
{
- Consume (TOK_LCURLY, ERR_LCURLY_EXPECTED);
+ Consume (TOK_LCURLY, "`{' expected");
}
void ConsumeRCurly (void)
/* Check for a right curly brace and skip it */
{
- Consume (TOK_RCURLY, ERR_RCURLY_EXPECTED);
+ Consume (TOK_RCURLY, "`}' expected");
}