-/*
- * scanner.c
- *
- * Ullrich von Bassewitz, 07.06.1998
- */
+/*****************************************************************************/
+/* */
+/* scanner.c */
+/* */
+/* Source file line info structure */
+/* */
+/* */
+/* */
+/* (C) 1998-2010, Ullrich von Bassewitz */
+/* Roemerstrasse 52 */
+/* D-70794 Filderstadt */
+/* EMail: uz@cc65.org */
+/* */
+/* */
+/* This software is provided 'as-is', without any expressed or implied */
+/* warranty. In no event will the authors be held liable for any damages */
+/* arising from the use of this software. */
+/* */
+/* Permission is granted to anyone to use this software for any purpose, */
+/* including commercial applications, and to alter it and redistribute it */
+/* freely, subject to the following restrictions: */
+/* */
+/* 1. The origin of this software must not be misrepresented; you must not */
+/* claim that you wrote the original software. If you use this software */
+/* in a product, an acknowledgment in the product documentation would be */
+/* appreciated but is not required. */
+/* 2. Altered source versions must be plainly marked as such, and must not */
+/* be misrepresented as being the original software. */
+/* 3. This notice may not be removed or altered from any source */
+/* distribution. */
+/* */
+/*****************************************************************************/
#include <string.h>
#include <errno.h>
#include <ctype.h>
+#include <math.h>
+
+/* common */
+#include "chartype.h"
+#include "fp.h"
+#include "tgttrans.h"
-#include "ctrans.h"
+/* cc65 */
#include "datatype.h"
#include "error.h"
#include "function.h"
#include "global.h"
+#include "hexval.h"
#include "ident.h"
-#include "io.h"
+#include "input.h"
#include "litpool.h"
#include "preproc.h"
-#include "symtab.h"
-#include "util.h"
#include "scanner.h"
+#include "standard.h"
+#include "symtab.h"
/*****************************************************************************/
-/* data */
+/* data */
/*****************************************************************************/
-Token CurTok; /* The current token */
-Token NextTok; /* The next token */
+Token CurTok; /* The current token */
+Token NextTok; /* The next token */
/* Token types */
-#define TT_C 0 /* ANSI C token */
-#define TT_EXT 1 /* cc65 extension */
+enum {
+ TT_C89 = 0x01 << STD_C89, /* Token valid in C89 */
+ TT_C99 = 0x01 << STD_C99, /* Token valid in C99 */
+ TT_CC65 = 0x01 << STD_CC65 /* Token valid in cc65 */
+};
/* Token table */
-static struct Keyword {
- char* Key; /* Keyword name */
- unsigned char Tok; /* The token */
- unsigned char Type; /* Token type */
+static const struct Keyword {
+ char* Key; /* Keyword name */
+ unsigned char Tok; /* The token */
+ unsigned char Std; /* Token supported in which standards? */
} Keywords [] = {
- { "__AX__", TOK_AX, TT_C },
- { "__EAX__", TOK_EAX, TT_C },
- { "__asm__", TOK_ASM, TT_C },
- { "__attribute__", TOK_ATTRIBUTE, TT_C },
- { "__fastcall__", TOK_FASTCALL, TT_C },
- { "asm", TOK_ASM, TT_EXT },
- { "auto", TOK_AUTO, TT_C },
- { "break", TOK_BREAK, TT_C },
- { "case", TOK_CASE, TT_C },
- { "char", TOK_CHAR, TT_C },
- { "const", TOK_CONST, TT_C },
- { "continue", TOK_CONTINUE, TT_C },
- { "default", TOK_DEFAULT, TT_C },
- { "do", TOK_DO, TT_C },
- { "double", TOK_DOUBLE, TT_C },
- { "else", TOK_ELSE, TT_C },
- { "enum", TOK_ENUM, TT_C },
- { "extern", TOK_EXTERN, TT_C },
- { "fastcall", TOK_FASTCALL, TT_EXT },
- { "float", TOK_FLOAT, TT_C },
- { "for", TOK_FOR, TT_C },
- { "goto", TOK_GOTO, TT_C },
- { "if", TOK_IF, TT_C },
- { "int", TOK_INT, TT_C },
- { "long", TOK_LONG, TT_C },
- { "register", TOK_REGISTER, TT_C },
- { "return", TOK_RETURN, TT_C },
- { "short", TOK_SHORT, TT_C },
- { "signed", TOK_SIGNED, TT_C },
- { "sizeof", TOK_SIZEOF, TT_C },
- { "static", TOK_STATIC, TT_C },
- { "struct", TOK_STRUCT, TT_C },
- { "switch", TOK_SWITCH, TT_C },
- { "typedef", TOK_TYPEDEF, TT_C },
- { "union", TOK_UNION, TT_C },
- { "unsigned", TOK_UNSIGNED, TT_C },
- { "void", TOK_VOID, TT_C },
- { "volatile", TOK_VOLATILE, TT_C },
- { "while", TOK_WHILE, TT_C },
+ { "_Pragma", TOK_PRAGMA, TT_C89 | TT_C99 | TT_CC65 }, /* !! */
+ { "__AX__", TOK_AX, TT_C89 | TT_C99 | TT_CC65 },
+ { "__A__", TOK_A, TT_C89 | TT_C99 | TT_CC65 },
+ { "__EAX__", TOK_EAX, TT_C89 | TT_C99 | TT_CC65 },
+ { "__X__", TOK_X, TT_C89 | TT_C99 | TT_CC65 },
+ { "__Y__", TOK_Y, TT_C89 | TT_C99 | TT_CC65 },
+ { "__asm__", TOK_ASM, TT_C89 | TT_C99 | TT_CC65 },
+ { "__attribute__", TOK_ATTRIBUTE, TT_C89 | TT_C99 | TT_CC65 },
+ { "__cdecl__", TOK_CDECL, TT_C89 | TT_C99 | TT_CC65 },
+ { "__far__", TOK_FAR, TT_C89 | TT_C99 | TT_CC65 },
+ { "__fastcall__", TOK_FASTCALL, TT_C89 | TT_C99 | TT_CC65 },
+ { "__inline__", TOK_INLINE, TT_C89 | TT_C99 | TT_CC65 },
+ { "__near__", TOK_NEAR, TT_C89 | TT_C99 | TT_CC65 },
+ { "asm", TOK_ASM, TT_CC65 },
+ { "auto", TOK_AUTO, TT_C89 | TT_C99 | TT_CC65 },
+ { "break", TOK_BREAK, TT_C89 | TT_C99 | TT_CC65 },
+ { "case", TOK_CASE, TT_C89 | TT_C99 | TT_CC65 },
+ { "cdecl", TOK_CDECL, TT_CC65 },
+ { "char", TOK_CHAR, TT_C89 | TT_C99 | TT_CC65 },
+ { "const", TOK_CONST, TT_C89 | TT_C99 | TT_CC65 },
+ { "continue", TOK_CONTINUE, TT_C89 | TT_C99 | TT_CC65 },
+ { "default", TOK_DEFAULT, TT_C89 | TT_C99 | TT_CC65 },
+ { "do", TOK_DO, TT_C89 | TT_C99 | TT_CC65 },
+ { "double", TOK_DOUBLE, TT_C89 | TT_C99 | TT_CC65 },
+ { "else", TOK_ELSE, TT_C89 | TT_C99 | TT_CC65 },
+ { "enum", TOK_ENUM, TT_C89 | TT_C99 | TT_CC65 },
+ { "extern", TOK_EXTERN, TT_C89 | TT_C99 | TT_CC65 },
+ { "far", TOK_FAR, TT_CC65 },
+ { "fastcall", TOK_FASTCALL, TT_CC65 },
+ { "float", TOK_FLOAT, TT_C89 | TT_C99 | TT_CC65 },
+ { "for", TOK_FOR, TT_C89 | TT_C99 | TT_CC65 },
+ { "goto", TOK_GOTO, TT_C89 | TT_C99 | TT_CC65 },
+ { "if", TOK_IF, TT_C89 | TT_C99 | TT_CC65 },
+ { "inline", TOK_INLINE, TT_C99 | TT_CC65 },
+ { "int", TOK_INT, TT_C89 | TT_C99 | TT_CC65 },
+ { "long", TOK_LONG, TT_C89 | TT_C99 | TT_CC65 },
+ { "near", TOK_NEAR, TT_CC65 },
+ { "register", TOK_REGISTER, TT_C89 | TT_C99 | TT_CC65 },
+ { "restrict", TOK_RESTRICT, TT_C99 | TT_CC65 },
+ { "return", TOK_RETURN, TT_C89 | TT_C99 | TT_CC65 },
+ { "short", TOK_SHORT, TT_C89 | TT_C99 | TT_CC65 },
+ { "signed", TOK_SIGNED, TT_C89 | TT_C99 | TT_CC65 },
+ { "sizeof", TOK_SIZEOF, TT_C89 | TT_C99 | TT_CC65 },
+ { "static", TOK_STATIC, TT_C89 | TT_C99 | TT_CC65 },
+ { "struct", TOK_STRUCT, TT_C89 | TT_C99 | TT_CC65 },
+ { "switch", TOK_SWITCH, TT_C89 | TT_C99 | TT_CC65 },
+ { "typedef", TOK_TYPEDEF, TT_C89 | TT_C99 | TT_CC65 },
+ { "union", TOK_UNION, TT_C89 | TT_C99 | TT_CC65 },
+ { "unsigned", TOK_UNSIGNED, TT_C89 | TT_C99 | TT_CC65 },
+ { "void", TOK_VOID, TT_C89 | TT_C99 | TT_CC65 },
+ { "volatile", TOK_VOLATILE, TT_C89 | TT_C99 | TT_CC65 },
+ { "while", TOK_WHILE, TT_C89 | TT_C99 | TT_CC65 },
};
-#define KEY_COUNT (sizeof (Keywords) / sizeof (Keywords [0]))
+#define KEY_COUNT (sizeof (Keywords) / sizeof (Keywords [0]))
/* Stuff for determining the type of an integer constant */
-#define IT_INT 0x01
-#define IT_UINT 0x02
-#define IT_LONG 0x04
-#define IT_ULONG 0x08
+#define IT_INT 0x01
+#define IT_UINT 0x02
+#define IT_LONG 0x04
+#define IT_ULONG 0x08
/*****************************************************************************/
-/* code */
+/* code */
/*****************************************************************************/
-static int FindKey (char* Key)
+static token_t FindKey (const char* Key)
/* Find a keyword and return the token. Return IDENT if the token is not a
- * keyword.
- */
+** keyword.
+*/
{
struct Keyword* K;
K = bsearch (Key, Keywords, KEY_COUNT, sizeof (Keywords [0]), CmpKey);
- if (K && (K->Type != TT_EXT || ANSI == 0)) {
- return K->Tok;
+ if (K && (K->Std & (0x01 << IS_Get (&Standard))) != 0) {
+ return K->Tok;
} else {
- return TOK_IDENT;
+ return TOK_IDENT;
}
}
-static int skipwhite (void)
+static int SkipWhite (void)
/* Skip white space in the input stream, reading and preprocessing new lines
- * if necessary. Return 0 if end of file is reached, return 1 otherwise.
- */
+** if necessary. Return 0 if end of file is reached, return 1 otherwise.
+*/
{
while (1) {
- while (*lptr == 0) {
- if (readline () == 0) {
- return 0;
- }
- preprocess ();
- }
- if (*lptr == ' ' || *lptr == '\r') {
- ++lptr;
- } else {
- return 1;
- }
+ while (CurC == '\0') {
+ if (NextLine () == 0) {
+ return 0;
+ }
+ Preprocess ();
+ }
+ if (IsSpace (CurC)) {
+ NextChar ();
+ } else {
+ return 1;
+ }
}
}
-void symname (char *s)
-/* Get symbol from input stream */
+int TokIsFuncSpec (const Token* T)
+/* Return true if the token is a function specifier */
{
- unsigned k = 0;
- do {
- if (k != MAX_IDENTLEN) {
- ++k;
- *s++ = *lptr;
- }
- ++lptr;
- } while (IsIdent (*lptr) || isdigit (*lptr));
- *s = '\0';
+ return (T->Tok == TOK_INLINE) ||
+ (T->Tok == TOK_FASTCALL) || (T->Tok == TOK_CDECL) ||
+ (T->Tok == TOK_NEAR) || (T->Tok == TOK_FAR);
}
-int issym (char *s)
-/* Get symbol from input stream or return 0 if not a symbol. */
+void SymName (char* S)
+/* Read a symbol from the input stream. The first character must have been
+** checked before calling this function. The buffer is expected to be at
+** least of size MAX_IDENTLEN+1.
+*/
{
- if (IsIdent (*lptr)) {
- symname (s);
- return 1;
- } else {
- return 0;
- }
-}
-
-
-
-static void unknown (unsigned char c)
-/* Error message for unknown character */
-{
- Error (ERR_INVALID_CHAR, c);
- gch (); /* Skip */
+ unsigned Len = 0;
+ do {
+ if (Len < MAX_IDENTLEN) {
+ ++Len;
+ *S++ = CurC;
+ }
+ NextChar ();
+ } while (IsIdent (CurC) || IsDigit (CurC));
+ *S = '\0';
}
-static unsigned hexval (int c)
-/* Convert a hex digit into a value */
+int IsSym (char* S)
+/* If a symbol follows, read it and return 1, otherwise return 0 */
{
- if (!isxdigit (c)) {
- Error (ERR_ILLEGAL_HEX_DIGIT);
- }
- if (isdigit (c)) {
- return c - '0';
+ if (IsIdent (CurC)) {
+ SymName (S);
+ return 1;
} else {
- return toupper (c) - 'A' + 10;
+ return 0;
}
}
-static void SetTok (int tok)
-/* set nxttok and bump line ptr */
+static void UnknownChar (char C)
+/* Error message for unknown character */
{
- nxttok = tok;
- ++lptr;
+ Error ("Invalid input character with code %02X", C & 0xFF);
+ NextChar (); /* Skip */
}
-static int SignExtendChar (int C)
-/* Do correct sign extension of a character */
+static void SetTok (int tok)
+/* Set NextTok.Tok and bump line ptr */
{
- if (SignedChars && (C & 0x80) != 0) {
- return C | ~0xFF;
- } else {
- return C & 0xFF;
- }
+ NextTok.Tok = tok;
+ NextChar ();
}
-static int parsechar (int c)
-/* Parse a character. Converts \n into EOL, etc. */
+static int ParseChar (void)
+/* Parse a character. Converts escape chars into character codes. */
{
- int i;
- int val;
+ int C;
+ int HadError;
+ int Count;
/* Check for escape chars */
- if (c == '\\') {
- switch (c = gch ()) {
- case 'b':
- c = '\b';
- break;
- case 'f':
- c = '\f';
- break;
- case 'r':
- c = '\r';
- break;
- case 'n':
- c = '\n';
- break;
- case 't':
- c = '\t';
- break;
- case '\"':
- c = '\"';
- break;
- case '\'':
- c = '\'';
- break;
- case '\\':
- c = '\\';
- break;
- case 'x':
- case 'X':
- /* Hex character constant */
- val = hexval (gch ()) << 4;
- c = val | hexval (gch ()); /* Do not translate */
- break;
- case '0':
- case '1':
- /* Octal constant */
- i = 0;
- val = c - '0';
- while ((c = *lptr) >= '0' && c <= '7' && i++ < 4) {
- val = (val << 3) | (c - '0');
- gch ();
- }
- c = val; /* Do not translate */
- break;
- default:
- Error (ERR_ILLEGAL_CHARCONST);
- }
+ if (CurC == '\\') {
+ NextChar ();
+ switch (CurC) {
+ case '?':
+ C = '\?';
+ break;
+ case 'a':
+ C = '\a';
+ break;
+ case 'b':
+ C = '\b';
+ break;
+ case 'f':
+ C = '\f';
+ break;
+ case 'r':
+ C = '\r';
+ break;
+ case 'n':
+ C = '\n';
+ break;
+ case 't':
+ C = '\t';
+ break;
+ case 'v':
+ C = '\v';
+ break;
+ case '\"':
+ C = '\"';
+ break;
+ case '\'':
+ C = '\'';
+ break;
+ case '\\':
+ C = '\\';
+ break;
+ case 'x':
+ case 'X':
+ /* Hex character constant */
+ if (!IsXDigit (NextC)) {
+ Error ("\\x used with no following hex digits");
+ C = ' ';
+ } else {
+ HadError = 0;
+ C = 0;
+ while (IsXDigit (NextC)) {
+ if ((C << 4) >= 256) {
+ if (!HadError) {
+ Error ("Hex character constant out of range");
+ HadError = 1;
+ }
+ } else {
+ C = (C << 4) | HexVal (NextC);
+ }
+ NextChar ();
+ }
+ }
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ /* Octal constant */
+ Count = 1;
+ C = HexVal (CurC);
+ while (IsODigit (NextC) && Count++ < 3) {
+ C = (C << 3) | HexVal (NextC);
+ NextChar ();
+ }
+ if (C >= 256)
+ Error ("Octal character constant out of range");
+ break;
+ default:
+ C = CurC;
+ Error ("Illegal escaped character: 0x%02X", CurC);
+ break;
+ }
+ } else {
+ C = CurC;
}
+ /* Skip the character read */
+ NextChar ();
+
/* Do correct sign extension */
- return SignExtendChar (c);
+ return SignExtendChar (C);
}
static void CharConst (void)
/* Parse a character constant. */
{
- int c;
+ int C;
/* Skip the quote */
- ++lptr;
+ NextChar ();
/* Get character */
- c = parsechar (cgch ());
+ C = ParseChar ();
/* Check for closing quote */
- if (cgch () != '\'') {
- Error (ERR_QUOTE_EXPECTED);
+ if (CurC != '\'') {
+ Error ("'\'' expected");
+ } else {
+ /* Skip the quote */
+ NextChar ();
}
/* Setup values and attributes */
- nxttok = TOK_CCONST;
- nxtval = SignExtendChar (ctrans (c)); /* Translate into target charset */
- nxttype = type_int; /* Character constants have type int */
+ NextTok.Tok = TOK_CCONST;
+
+ /* Translate into target charset */
+ NextTok.IVal = SignExtendChar (TgtTranslateChar (C));
+
+ /* Character constants have type int */
+ NextTok.Type = type_int;
}
static void StringConst (void)
/* Parse a quoted string */
{
- nxtval = GetLiteralOffs ();
- nxttok = TOK_SCONST;
+ /* String buffer */
+ StrBuf S = AUTO_STRBUF_INITIALIZER;
+
+ /* Assume next token is a string constant */
+ NextTok.Tok = TOK_SCONST;
- /* Be sure to concatenate strings */
- while (*lptr == '\"') {
+ /* Concatenate strings. If at least one of the concenated strings is a wide
+ ** character literal, the whole string is a wide char literal, otherwise
+ ** it's a normal string literal.
+ */
+ while (1) {
+
+ /* Check if this is a normal or a wide char string */
+ if (CurC == 'L' && NextC == '\"') {
+ /* Wide character literal */
+ NextTok.Tok = TOK_WCSCONST;
+ NextChar ();
+ NextChar ();
+ } else if (CurC == '\"') {
+ /* Skip the quote char */
+ NextChar ();
+ } else {
+ /* No string */
+ break;
+ }
+
+ /* Read until end of string */
+ while (CurC != '\"') {
+ if (CurC == '\0') {
+ Error ("Unexpected newline");
+ break;
+ }
+ SB_AppendChar (&S, ParseChar ());
+ }
+
+ /* Skip closing quote char if there was one */
+ NextChar ();
+
+ /* Skip white space, read new input */
+ SkipWhite ();
+
+ }
+
+ /* Terminate the string */
+ SB_AppendChar (&S, '\0');
+
+ /* Add the whole string to the literal pool */
+ NextTok.SVal = AddLiteralStr (&S);
+
+ /* Free the buffer */
+ SB_Done (&S);
+}
- /* Skip the quote char */
- ++lptr;
- while (*lptr != '\"') {
- if (*lptr == 0) {
- Error (ERR_UNEXPECTED_NEWLINE);
- break;
- }
- AddLiteralChar (parsechar (gch()));
- }
- /* Skip closing quote char if there was one */
- cgch ();
+static void NumericConst (void)
+/* Parse a numeric constant */
+{
+ unsigned Base; /* Temporary number base */
+ unsigned Prefix; /* Base according to prefix */
+ StrBuf S = STATIC_STRBUF_INITIALIZER;
+ int IsFloat;
+ char C;
+ unsigned DigitVal;
+ unsigned long IVal; /* Value */
+
+ /* Check for a leading hex or octal prefix and determine the possible
+ ** integer types.
+ */
+ if (CurC == '0') {
+ /* Gobble 0 and examine next char */
+ NextChar ();
+ if (toupper (CurC) == 'X') {
+ Base = Prefix = 16;
+ NextChar (); /* gobble "x" */
+ } else {
+ Base = 10; /* Assume 10 for now - see below */
+ Prefix = 8; /* Actual prefix says octal */
+ }
+ } else {
+ Base = Prefix = 10;
+ }
- /* Skip white space, read new input */
- skipwhite ();
+ /* Because floating point numbers don't have octal prefixes (a number
+ ** with a leading zero is decimal), we first have to read the number
+ ** before converting it, so we can determine if it's a float or an
+ ** integer.
+ */
+ while (IsXDigit (CurC) && HexVal (CurC) < Base) {
+ SB_AppendChar (&S, CurC);
+ NextChar ();
+ }
+ SB_Terminate (&S);
+
+ /* The following character tells us if we have an integer or floating
+ ** point constant. Note: Hexadecimal floating point constants aren't
+ ** supported in C89.
+ */
+ IsFloat = (CurC == '.' ||
+ (Base == 10 && toupper (CurC) == 'E') ||
+ (Base == 16 && toupper (CurC) == 'P' && IS_Get (&Standard) >= STD_C99));
+
+ /* If we don't have a floating point type, an octal prefix results in an
+ ** octal base.
+ */
+ if (!IsFloat && Prefix == 8) {
+ Base = 8;
+ }
+ /* Since we do now know the correct base, convert the remembered input
+ ** into a number.
+ */
+ SB_Reset (&S);
+ IVal = 0;
+ while ((C = SB_Get (&S)) != '\0') {
+ DigitVal = HexVal (C);
+ if (DigitVal >= Base) {
+ Error ("Numeric constant contains digits beyond the radix");
+ }
+ IVal = (IVal * Base) + DigitVal;
}
- /* Terminate the string */
- AddLiteralChar ('\0');
+ /* We don't need the string buffer any longer */
+ SB_Done (&S);
+
+ /* Distinguish between integer and floating point constants */
+ if (!IsFloat) {
+
+ unsigned Types;
+ int HaveSuffix;
+
+ /* Check for a suffix and determine the possible types */
+ HaveSuffix = 1;
+ if (toupper (CurC) == 'U') {
+ /* Unsigned type */
+ NextChar ();
+ if (toupper (CurC) != 'L') {
+ Types = IT_UINT | IT_ULONG;
+ } else {
+ NextChar ();
+ Types = IT_ULONG;
+ }
+ } else if (toupper (CurC) == 'L') {
+ /* Long type */
+ NextChar ();
+ if (toupper (CurC) != 'U') {
+ Types = IT_LONG | IT_ULONG;
+ } else {
+ NextChar ();
+ Types = IT_ULONG;
+ }
+ } else {
+ HaveSuffix = 0;
+ if (Prefix == 10) {
+ /* Decimal constants are of any type but uint */
+ Types = IT_INT | IT_LONG | IT_ULONG;
+ } else {
+ /* Octal or hex constants are of any type */
+ Types = IT_INT | IT_UINT | IT_LONG | IT_ULONG;
+ }
+ }
+
+ /* Check the range to determine the type */
+ if (IVal > 0x7FFF) {
+ /* Out of range for int */
+ Types &= ~IT_INT;
+ /* If the value is in the range 0x8000..0xFFFF, unsigned int is not
+ ** allowed, and we don't have a type specifying suffix, emit a
+ ** warning, because the constant is of type long.
+ */
+ if (IVal <= 0xFFFF && (Types & IT_UINT) == 0 && !HaveSuffix) {
+ Warning ("Constant is long");
+ }
+ }
+ if (IVal > 0xFFFF) {
+ /* Out of range for unsigned int */
+ Types &= ~IT_UINT;
+ }
+ if (IVal > 0x7FFFFFFF) {
+ /* Out of range for long int */
+ Types &= ~IT_LONG;
+ }
+
+ /* Now set the type string to the smallest type in types */
+ if (Types & IT_INT) {
+ NextTok.Type = type_int;
+ } else if (Types & IT_UINT) {
+ NextTok.Type = type_uint;
+ } else if (Types & IT_LONG) {
+ NextTok.Type = type_long;
+ } else {
+ NextTok.Type = type_ulong;
+ }
+
+ /* Set the value and the token */
+ NextTok.IVal = IVal;
+ NextTok.Tok = TOK_ICONST;
+
+ } else {
+
+ /* Float constant */
+ Double FVal = FP_D_FromInt (IVal); /* Convert to double */
+
+ /* Check for a fractional part and read it */
+ if (CurC == '.') {
+
+ Double Scale;
+
+ /* Skip the dot */
+ NextChar ();
+
+ /* Read fractional digits */
+ Scale = FP_D_Make (1.0);
+ while (IsXDigit (CurC) && (DigitVal = HexVal (CurC)) < Base) {
+ /* Get the value of this digit */
+ Double FracVal = FP_D_Div (FP_D_FromInt (DigitVal * Base), Scale);
+ /* Add it to the float value */
+ FVal = FP_D_Add (FVal, FracVal);
+ /* Scale base */
+ Scale = FP_D_Mul (Scale, FP_D_FromInt (DigitVal));
+ /* Skip the digit */
+ NextChar ();
+ }
+ }
+
+ /* Check for an exponent and read it */
+ if ((Base == 16 && toupper (CurC) == 'F') ||
+ (Base == 10 && toupper (CurC) == 'E')) {
+
+ unsigned Digits;
+ unsigned Exp;
+
+ /* Skip the exponent notifier */
+ NextChar ();
+
+ /* Read an optional sign */
+ if (CurC == '-') {
+ NextChar ();
+ } else if (CurC == '+') {
+ NextChar ();
+ }
+
+ /* Read exponent digits. Since we support only 32 bit floats
+ ** with a maximum exponent of +-/127, we read the exponent
+ ** part as integer with up to 3 digits and drop the remainder.
+ ** This avoids an overflow of Exp. The exponent is always
+ ** decimal, even for hex float consts.
+ */
+ Digits = 0;
+ Exp = 0;
+ while (IsDigit (CurC)) {
+ if (++Digits <= 3) {
+ Exp = Exp * 10 + HexVal (CurC);
+ }
+ NextChar ();
+ }
+
+ /* Check for errors: We must have exponent digits, and not more
+ ** than three.
+ */
+ if (Digits == 0) {
+ Error ("Floating constant exponent has no digits");
+ } else if (Digits > 3) {
+ Warning ("Floating constant exponent is too large");
+ }
+
+ /* Scale the exponent and adjust the value accordingly */
+ if (Exp) {
+ FVal = FP_D_Mul (FVal, FP_D_Make (pow (10, Exp)));
+ }
+ }
+
+ /* Check for a suffix and determine the type of the constant */
+ if (toupper (CurC) == 'F') {
+ NextChar ();
+ NextTok.Type = type_float;
+ } else {
+ NextTok.Type = type_double;
+ }
+
+ /* Set the value and the token */
+ NextTok.FVal = FVal;
+ NextTok.Tok = TOK_FCONST;
+
+ }
}
void NextToken (void)
/* Get next token from input stream */
{
- char c;
ident token;
+ /* We have to skip white space here before shifting tokens, since the
+ ** tokens and the current line info is invalid at startup and will get
+ ** initialized by reading the first time from the file. Remember if
+ ** we were at end of input and handle that later.
+ */
+ int GotEOF = (SkipWhite() == 0);
+
/* Current token is the lookahead token */
+ if (CurTok.LI) {
+ ReleaseLineInfo (CurTok.LI);
+ }
CurTok = NextTok;
+ /* When reading the first time from the file, the line info in NextTok,
+ ** which was copied to CurTok is invalid. Since the information from
+ ** the token is used for error messages, we must make it valid.
+ */
+ if (CurTok.LI == 0) {
+ CurTok.LI = UseLineInfo (GetCurLineInfo ());
+ }
+
/* Remember the starting position of the next token */
- NextTok.Pos = ln;
+ NextTok.LI = UseLineInfo (GetCurLineInfo ());
- /* Skip spaces and read the next line if needed */
- if (skipwhite () == 0) {
- /* End of file reached */
- nxttok = TOK_CEOF;
- return;
+ /* Now handle end of input. */
+ if (GotEOF) {
+ /* End of file reached */
+ NextTok.Tok = TOK_CEOF;
+ return;
}
/* Determine the next token from the lookahead */
- c = *lptr;
- if (isdigit (c)) {
-
- /* A number */
- int HaveSuffix; /* True if we have a type suffix */
- unsigned types; /* Possible types */
- unsigned base;
- unsigned long k; /* Value */
-
- k = 0;
- base = 10;
- types = IT_INT | IT_LONG | IT_ULONG;
-
- if (c == '0') {
- /* Octal or hex constants may also be of type unsigned int */
- types = IT_INT | IT_UINT | IT_LONG | IT_ULONG;
- /* gobble 0 and examin next char */
- if (toupper (*++lptr) == 'X') {
- base = 16;
- nxttype = type_uint;
- ++lptr; /* gobble "x" */
- } else {
- base = 8;
- }
- }
- while (1) {
- c = *lptr;
- if (isdigit (c)) {
- k = k * base + (c - '0');
- } else if (base == 16 && isxdigit (c)) {
- k = (k << 4) + hexval (c);
- } else {
- break; /* not digit */
- }
- ++lptr; /* gobble char */
- }
-
- /* Check for a suffix */
- HaveSuffix = 1;
- c = toupper (*lptr);
- if (c == 'U') {
- /* Unsigned type */
- ++lptr;
- if (toupper (*lptr) != 'L') {
- types = IT_UINT | IT_ULONG;
- } else {
- ++lptr;
- types = IT_ULONG;
- }
- } else if (c == 'L') {
- /* Long type */
- ++lptr;
- if (toupper (*lptr) != 'U') {
- types = IT_LONG | IT_ULONG;
- } else {
- ++lptr;
- types = IT_ULONG;
- }
- } else {
- HaveSuffix = 0;
- }
-
- /* Check the range to determine the type */
- if (k > 0x7FFF) {
- /* Out of range for int */
- types &= ~IT_INT;
- /* If the value is in the range 0x8000..0xFFFF, unsigned int is not
- * allowed, and we don't have a type specifying suffix, emit a
- * warning.
- */
- if (k <= 0xFFFF && (types & IT_UINT) == 0 && !HaveSuffix) {
- Warning (WARN_CONSTANT_IS_LONG);
- }
- }
- if (k > 0xFFFF) {
- /* Out of range for unsigned int */
- types &= ~IT_UINT;
- }
- if (k > 0x7FFFFFFF) {
- /* Out of range for long int */
- types &= ~IT_LONG;
- }
-
- /* Now set the type string to the smallest type in types */
- if (types & IT_INT) {
- nxttype = type_int;
- } else if (types & IT_UINT) {
- nxttype = type_uint;
- } else if (types & IT_LONG) {
- nxttype = type_long;
- } else {
- nxttype = type_ulong;
- }
-
- /* Set the value and the token */
- nxtval = k;
- nxttok = TOK_ICONST;
- return;
+ if (IsDigit (CurC) || (CurC == '.' && IsDigit (NextC))) {
+ /* A number */
+ NumericConst ();
+ return;
}
- if (issym (token)) {
-
- /* Check for a keyword */
- if ((nxttok = FindKey (token)) != TOK_IDENT) {
- /* Reserved word found */
- return;
- }
- /* No reserved word, check for special symbols */
- if (token [0] == '_') {
- /* Special symbols */
- if (strcmp (token, "__FILE__") == 0) {
- nxtval = AddLiteral (fin);
- nxttok = TOK_SCONST;
- return;
- } else if (strcmp (token, "__LINE__") == 0) {
- nxttok = TOK_ICONST;
- nxtval = ln;
- nxttype = type_int;
- return;
- } else if (strcmp (token, "__fixargs__") == 0) {
- nxttok = TOK_ICONST;
- nxtval = GetParamSize (CurrentFunc);
- nxttype = type_uint;
- return;
- } else if (strcmp (token, "__func__") == 0) {
- /* __func__ is only defined in functions */
- if (CurrentFunc) {
- nxtval = AddLiteral (GetFuncName (CurrentFunc));
- nxttok = TOK_SCONST;
- return;
- }
- }
- }
-
- /* No reserved word but identifier */
- strcpy (NextTok.Ident, token);
- NextTok.Tok = TOK_IDENT;
- return;
+ /* Check for wide character literals */
+ if (CurC == 'L' && NextC == '\"') {
+ StringConst ();
+ return;
+ }
+
+ /* Check for keywords and identifiers */
+ if (IsSym (token)) {
+
+ /* Check for a keyword */
+ if ((NextTok.Tok = FindKey (token)) != TOK_IDENT) {
+ /* Reserved word found */
+ return;
+ }
+ /* No reserved word, check for special symbols */
+ if (token[0] == '_' && token[1] == '_') {
+ /* Special symbols */
+ if (strcmp (token+2, "FILE__") == 0) {
+ NextTok.SVal = AddLiteral (GetCurrentFile());
+ NextTok.Tok = TOK_SCONST;
+ return;
+ } else if (strcmp (token+2, "LINE__") == 0) {
+ NextTok.Tok = TOK_ICONST;
+ NextTok.IVal = GetCurrentLine();
+ NextTok.Type = type_int;
+ return;
+ } else if (strcmp (token+2, "func__") == 0) {
+ /* __func__ is only defined in functions */
+ if (CurrentFunc) {
+ NextTok.SVal = AddLiteral (F_GetFuncName (CurrentFunc));
+ NextTok.Tok = TOK_SCONST;
+ return;
+ }
+ }
+ }
+
+ /* No reserved word but identifier */
+ strcpy (NextTok.Ident, token);
+ NextTok.Tok = TOK_IDENT;
+ return;
}
/* Monstrous switch statement ahead... */
- switch (c) {
-
- case '!':
- if (*++lptr == '=') {
- SetTok (TOK_NE);
- } else {
- nxttok = TOK_BOOL_NOT;
- }
- break;
-
- case '\"':
- StringConst ();
- break;
-
- case '%':
- if (*++lptr == '=') {
- SetTok (TOK_MOD_ASSIGN);
- } else {
- nxttok = TOK_MOD;
- }
- break;
-
- case '&':
- switch (*++lptr) {
- case '&':
- SetTok (TOK_BOOL_AND);
- break;
- case '=':
- SetTok (TOK_AND_ASSIGN);
- break;
- default:
- nxttok = TOK_AND;
- }
- break;
-
- case '\'':
- CharConst ();
- break;
-
- case '(':
- SetTok (TOK_LPAREN);
- break;
-
- case ')':
- SetTok (TOK_RPAREN);
- break;
-
- case '*':
- if (*++lptr == '=') {
- SetTok (TOK_MUL_ASSIGN);
- } else {
- nxttok = TOK_STAR;
- }
- break;
-
- case '+':
- switch (*++lptr) {
- case '+':
- SetTok (TOK_INC);
- break;
- case '=':
- SetTok (TOK_PLUS_ASSIGN);
- break;
- default:
- nxttok = TOK_PLUS;
- }
- break;
-
- case ',':
- SetTok (TOK_COMMA);
- break;
-
- case '-':
- switch (*++lptr) {
- case '-':
- SetTok (TOK_DEC);
- break;
- case '=':
- SetTok (TOK_MINUS_ASSIGN);
- break;
- case '>':
- SetTok (TOK_PTR_REF);
- break;
- default:
- nxttok = TOK_MINUS;
- }
- break;
-
- case '.':
- if (*++lptr == '.') {
- if (*++lptr == '.') {
- SetTok (TOK_ELLIPSIS);
- } else {
- unknown (*lptr);
- }
- } else {
- nxttok = TOK_DOT;
- }
- break;
-
- case '/':
- if (*++lptr == '=') {
- SetTok (TOK_DIV_ASSIGN);
- } else {
- nxttok = TOK_DIV;
- }
- break;
-
- case ':':
- SetTok (TOK_COLON);
- break;
-
- case ';':
- SetTok (TOK_SEMI);
- break;
-
- case '<':
- switch (*++lptr) {
- case '=':
- SetTok (TOK_LE);
- break;
- case '<':
- if (*++lptr == '=') {
- SetTok (TOK_SHL_ASSIGN);
- } else {
- nxttok = TOK_SHL;
- }
- break;
- default:
- nxttok = TOK_LT;
- }
- break;
-
- case '=':
- if (*++lptr == '=') {
- SetTok (TOK_EQ);
- } else {
- nxttok = TOK_ASSIGN;
- }
- break;
-
- case '>':
- switch (*++lptr) {
- case '=':
- SetTok (TOK_GE);
- break;
- case '>':
- if (*++lptr == '=') {
- SetTok (TOK_SHR_ASSIGN);
- } else {
- nxttok = TOK_SHR;
- }
- break;
- default:
- nxttok = TOK_GT;
- }
- break;
-
- case '?':
- SetTok (TOK_QUEST);
- break;
-
- case '[':
- SetTok (TOK_LBRACK);
- break;
-
- case ']':
- SetTok (TOK_RBRACK);
- break;
-
- case '^':
- if (*++lptr == '=') {
- SetTok (TOK_XOR_ASSIGN);
- } else {
- nxttok = TOK_XOR;
- }
- break;
-
- case '{':
- SetTok (TOK_LCURLY);
- break;
+ switch (CurC) {
+
+ case '!':
+ NextChar ();
+ if (CurC == '=') {
+ SetTok (TOK_NE);
+ } else {
+ NextTok.Tok = TOK_BOOL_NOT;
+ }
+ break;
+
+ case '\"':
+ StringConst ();
+ break;
+
+ case '%':
+ NextChar ();
+ if (CurC == '=') {
+ SetTok (TOK_MOD_ASSIGN);
+ } else {
+ NextTok.Tok = TOK_MOD;
+ }
+ break;
+
+ case '&':
+ NextChar ();
+ switch (CurC) {
+ case '&':
+ SetTok (TOK_BOOL_AND);
+ break;
+ case '=':
+ SetTok (TOK_AND_ASSIGN);
+ break;
+ default:
+ NextTok.Tok = TOK_AND;
+ }
+ break;
+
+ case '\'':
+ CharConst ();
+ break;
+
+ case '(':
+ SetTok (TOK_LPAREN);
+ break;
+
+ case ')':
+ SetTok (TOK_RPAREN);
+ break;
+
+ case '*':
+ NextChar ();
+ if (CurC == '=') {
+ SetTok (TOK_MUL_ASSIGN);
+ } else {
+ NextTok.Tok = TOK_STAR;
+ }
+ break;
+
+ case '+':
+ NextChar ();
+ switch (CurC) {
+ case '+':
+ SetTok (TOK_INC);
+ break;
+ case '=':
+ SetTok (TOK_PLUS_ASSIGN);
+ break;
+ default:
+ NextTok.Tok = TOK_PLUS;
+ }
+ break;
+
+ case ',':
+ SetTok (TOK_COMMA);
+ break;
+
+ case '-':
+ NextChar ();
+ switch (CurC) {
+ case '-':
+ SetTok (TOK_DEC);
+ break;
+ case '=':
+ SetTok (TOK_MINUS_ASSIGN);
+ break;
+ case '>':
+ SetTok (TOK_PTR_REF);
+ break;
+ default:
+ NextTok.Tok = TOK_MINUS;
+ }
+ break;
+
+ case '.':
+ NextChar ();
+ if (CurC == '.') {
+ NextChar ();
+ if (CurC == '.') {
+ SetTok (TOK_ELLIPSIS);
+ } else {
+ UnknownChar (CurC);
+ }
+ } else {
+ NextTok.Tok = TOK_DOT;
+ }
+ break;
+
+ case '/':
+ NextChar ();
+ if (CurC == '=') {
+ SetTok (TOK_DIV_ASSIGN);
+ } else {
+ NextTok.Tok = TOK_DIV;
+ }
+ break;
+
+ case ':':
+ SetTok (TOK_COLON);
+ break;
+
+ case ';':
+ SetTok (TOK_SEMI);
+ break;
+
+ case '<':
+ NextChar ();
+ switch (CurC) {
+ case '=':
+ SetTok (TOK_LE);
+ break;
+ case '<':
+ NextChar ();
+ if (CurC == '=') {
+ SetTok (TOK_SHL_ASSIGN);
+ } else {
+ NextTok.Tok = TOK_SHL;
+ }
+ break;
+ default:
+ NextTok.Tok = TOK_LT;
+ }
+ break;
+
+ case '=':
+ NextChar ();
+ if (CurC == '=') {
+ SetTok (TOK_EQ);
+ } else {
+ NextTok.Tok = TOK_ASSIGN;
+ }
+ break;
+
+ case '>':
+ NextChar ();
+ switch (CurC) {
+ case '=':
+ SetTok (TOK_GE);
+ break;
+ case '>':
+ NextChar ();
+ if (CurC == '=') {
+ SetTok (TOK_SHR_ASSIGN);
+ } else {
+ NextTok.Tok = TOK_SHR;
+ }
+ break;
+ default:
+ NextTok.Tok = TOK_GT;
+ }
+ break;
+
+ case '?':
+ SetTok (TOK_QUEST);
+ break;
+
+ case '[':
+ SetTok (TOK_LBRACK);
+ break;
+
+ case ']':
+ SetTok (TOK_RBRACK);
+ break;
+
+ case '^':
+ NextChar ();
+ if (CurC == '=') {
+ SetTok (TOK_XOR_ASSIGN);
+ } else {
+ NextTok.Tok = TOK_XOR;
+ }
+ break;
+
+ case '{':
+ SetTok (TOK_LCURLY);
+ break;
case '|':
- switch (*++lptr) {
- case '|':
- SetTok (TOK_BOOL_OR);
- break;
- case '=':
- SetTok (TOK_OR_ASSIGN);
- break;
- default:
- nxttok = TOK_OR;
- }
- break;
-
- case '}':
- SetTok (TOK_RCURLY);
- break;
-
- case '~':
- SetTok (TOK_COMP);
- break;
-
- case '#':
- while (*++lptr == ' ') ; /* Skip it and following whitespace */
- if (!issym (token) || strcmp (token, "pragma") != 0) {
- /* OOPS - should not happen */
- Error (ERR_CPP_DIRECTIVE_EXPECTED);
- }
- nxttok = TOK_PRAGMA;
- break;
-
- default:
- unknown (c);
+ NextChar ();
+ switch (CurC) {
+ case '|':
+ SetTok (TOK_BOOL_OR);
+ break;
+ case '=':
+ SetTok (TOK_OR_ASSIGN);
+ break;
+ default:
+ NextTok.Tok = TOK_OR;
+ }
+ break;
+
+ case '}':
+ SetTok (TOK_RCURLY);
+ break;
+
+ case '~':
+ SetTok (TOK_COMP);
+ break;
+
+ default:
+ UnknownChar (CurC);
}
-void Consume (token_t Token, unsigned ErrNum)
+void SkipTokens (const token_t* TokenList, unsigned TokenCount)
+/* Skip tokens until we reach TOK_CEOF or a token in the given token list.
+** This routine is used for error recovery.
+*/
+{
+ while (CurTok.Tok != TOK_CEOF) {
+
+ /* Check if the current token is in the token list */
+ unsigned I;
+ for (I = 0; I < TokenCount; ++I) {
+ if (CurTok.Tok == TokenList[I]) {
+ /* Found a token in the list */
+ return;
+ }
+ }
+
+ /* Not in the list: Skip it */
+ NextToken ();
+
+ }
+}
+
+
+
+int Consume (token_t Token, const char* ErrorMsg)
/* Eat token if it is the next in the input stream, otherwise print an error
- * message.
- */
+** message. Returns true if the token was found and false otherwise.
+*/
{
- if (curtok == Token) {
- NextToken ();
+ if (CurTok.Tok == Token) {
+ NextToken ();
+ return 1;
} else {
- Error (ErrNum);
+ Error ("%s", ErrorMsg);
+ return 0;
}
}
-void ConsumeColon (void)
+int ConsumeColon (void)
/* Check for a colon and skip it. */
{
- Consume (TOK_COLON, ERR_COLON_EXPECTED);
+ return Consume (TOK_COLON, "':' expected");
}
-void ConsumeSemi (void)
+int ConsumeSemi (void)
/* Check for a semicolon and skip it. */
{
/* Try do be smart about typos... */
- if (curtok == TOK_SEMI) {
- NextToken ();
+ if (CurTok.Tok == TOK_SEMI) {
+ NextToken ();
+ return 1;
+ } else {
+ Error ("';' expected");
+ if (CurTok.Tok == TOK_COLON || CurTok.Tok == TOK_COMMA) {
+ NextToken ();
+ }
+ return 0;
+ }
+}
+
+
+
+int ConsumeComma (void)
+/* Check for a comma and skip it. */
+{
+ /* Try do be smart about typos... */
+ if (CurTok.Tok == TOK_COMMA) {
+ NextToken ();
+ return 1;
} else {
- Error (ERR_SEMICOLON_EXPECTED);
- if (curtok == TOK_COLON || curtok == TOK_COMMA) {
- NextToken ();
- }
+ Error ("',' expected");
+ if (CurTok.Tok == TOK_SEMI) {
+ NextToken ();
+ }
+ return 0;
}
}
-void ConsumeLParen (void)
+int ConsumeLParen (void)
/* Check for a left parenthesis and skip it */
{
- Consume (TOK_LPAREN, ERR_LPAREN_EXPECTED);
+ return Consume (TOK_LPAREN, "'(' expected");
}
-void ConsumeRParen (void)
+int ConsumeRParen (void)
/* Check for a right parenthesis and skip it */
{
- Consume (TOK_RPAREN, ERR_RPAREN_EXPECTED);
+ return Consume (TOK_RPAREN, "')' expected");
}
-void ConsumeLBrack (void)
+int ConsumeLBrack (void)
/* Check for a left bracket and skip it */
{
- Consume (TOK_LBRACK, ERR_LBRACK_EXPECTED);
+ return Consume (TOK_LBRACK, "'[' expected");
}
-void ConsumeRBrack (void)
+int ConsumeRBrack (void)
/* Check for a right bracket and skip it */
{
- Consume (TOK_RBRACK, ERR_RBRACK_EXPECTED);
+ return Consume (TOK_RBRACK, "']' expected");
}
-void ConsumeLCurly (void)
+int ConsumeLCurly (void)
/* Check for a left curly brace and skip it */
{
- Consume (TOK_LCURLY, ERR_LCURLY_EXPECTED);
+ return Consume (TOK_LCURLY, "'{' expected");
}
-void ConsumeRCurly (void)
+int ConsumeRCurly (void)
/* Check for a right curly brace and skip it */
{
- Consume (TOK_RCURLY, ERR_RCURLY_EXPECTED);
+ return Consume (TOK_RCURLY, "'}' expected");
}
-
-
-