X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=src%2Fcc65%2Fscanner.c;h=695085e9418dfd47e20533d1e8527924c5f294c3;hb=a6b04f6e97b59ca0ad1cc98dbd91ad600c0414a8;hp=27b6a15d10f6407a9277de29bdc7670d012d00b8;hpb=9cc25f13b6aabc4fd299c54c9c38c5825689eb47;p=cc65 diff --git a/src/cc65/scanner.c b/src/cc65/scanner.c index 27b6a15d1..695085e94 100644 --- a/src/cc65/scanner.c +++ b/src/cc65/scanner.c @@ -1,8 +1,35 @@ -/* - * scanner.c - * - * Ullrich von Bassewitz, 07.06.1998 - */ +/*****************************************************************************/ +/* */ +/* scanner.c */ +/* */ +/* Source file line info structure */ +/* */ +/* */ +/* */ +/* (C) 1998-2010, Ullrich von Bassewitz */ +/* Roemerstrasse 52 */ +/* D-70794 Filderstadt */ +/* EMail: uz@cc65.org */ +/* */ +/* */ +/* This software is provided 'as-is', without any expressed or implied */ +/* warranty. In no event will the authors be held liable for any damages */ +/* arising from the use of this software. */ +/* */ +/* Permission is granted to anyone to use this software for any purpose, */ +/* including commercial applications, and to alter it and redistribute it */ +/* freely, subject to the following restrictions: */ +/* */ +/* 1. The origin of this software must not be misrepresented; you must not */ +/* claim that you wrote the original software. If you use this software */ +/* in a product, an acknowledgment in the product documentation would be */ +/* appreciated but is not required. */ +/* 2. Altered source versions must be plainly marked as such, and must not */ +/* be misrepresented as being the original software. */ +/* 3. This notice may not be removed or altered from any source */ +/* distribution. */ +/* */ +/*****************************************************************************/ @@ -11,97 +38,120 @@ #include #include #include +#include + +/* common */ +#include "chartype.h" +#include "fp.h" +#include "tgttrans.h" -#include "ctrans.h" +/* cc65 */ #include "datatype.h" #include "error.h" #include "function.h" #include "global.h" +#include "hexval.h" #include "ident.h" -#include "io.h" +#include "input.h" #include "litpool.h" #include "preproc.h" -#include "symtab.h" -#include "util.h" #include "scanner.h" +#include "standard.h" +#include "symtab.h" /*****************************************************************************/ -/* data */ +/* data */ /*****************************************************************************/ -Token CurTok; /* The current token */ -Token NextTok; /* The next token */ +Token CurTok; /* The current token */ +Token NextTok; /* The next token */ /* Token types */ -#define TT_C 0 /* ANSI C token */ -#define TT_EXT 1 /* cc65 extension */ +enum { + TT_C89 = 0x01 << STD_C89, /* Token valid in C89 */ + TT_C99 = 0x01 << STD_C99, /* Token valid in C99 */ + TT_CC65 = 0x01 << STD_CC65 /* Token valid in cc65 */ +}; /* Token table */ -static struct Keyword { - char* Key; /* Keyword name */ - unsigned char Tok; /* The token */ - unsigned char Type; /* Token type */ +static const struct Keyword { + char* Key; /* Keyword name */ + unsigned char Tok; /* The token */ + unsigned char Std; /* Token supported in which standards? */ } Keywords [] = { - { "__AX__", TOK_AX, TT_C }, - { "__EAX__", TOK_EAX, TT_C }, - { "__asm__", TOK_ASM, TT_C }, - { "__attribute__", TOK_ATTRIBUTE, TT_C }, - { "__fastcall__", TOK_FASTCALL, TT_C }, - { "asm", TOK_ASM, TT_EXT }, - { "auto", TOK_AUTO, TT_C }, - { "break", TOK_BREAK, TT_C }, - { "case", TOK_CASE, TT_C }, - { "char", TOK_CHAR, TT_C }, - { "const", TOK_CONST, TT_C }, - { "continue", TOK_CONTINUE, TT_C }, - { "default", TOK_DEFAULT, TT_C }, - { "do", TOK_DO, TT_C }, - { "double", TOK_DOUBLE, TT_C }, - { "else", TOK_ELSE, TT_C }, - { "enum", TOK_ENUM, TT_C }, - { "extern", TOK_EXTERN, TT_C }, - { "fastcall", TOK_FASTCALL, TT_EXT }, - { "float", TOK_FLOAT, TT_C }, - { "for", TOK_FOR, TT_C }, - { "goto", TOK_GOTO, TT_C }, - { "if", TOK_IF, TT_C }, - { "int", TOK_INT, TT_C }, - { "long", TOK_LONG, TT_C }, - { "register", TOK_REGISTER, TT_C }, - { "return", TOK_RETURN, TT_C }, - { "short", TOK_SHORT, TT_C }, - { "signed", TOK_SIGNED, TT_C }, - { "sizeof", TOK_SIZEOF, TT_C }, - { "static", TOK_STATIC, TT_C }, - { "struct", TOK_STRUCT, TT_C }, - { "switch", TOK_SWITCH, TT_C }, - { "typedef", TOK_TYPEDEF, TT_C }, - { "union", TOK_UNION, TT_C }, - { "unsigned", TOK_UNSIGNED, TT_C }, - { "void", TOK_VOID, TT_C }, - { "volatile", TOK_VOLATILE, TT_C }, - { "while", TOK_WHILE, TT_C }, + { "_Pragma", TOK_PRAGMA, TT_C89 | TT_C99 | TT_CC65 }, /* !! */ + { "__AX__", TOK_AX, TT_C89 | TT_C99 | TT_CC65 }, + { "__A__", TOK_A, TT_C89 | TT_C99 | TT_CC65 }, + { "__EAX__", TOK_EAX, TT_C89 | TT_C99 | TT_CC65 }, + { "__X__", TOK_X, TT_C89 | TT_C99 | TT_CC65 }, + { "__Y__", TOK_Y, TT_C89 | TT_C99 | TT_CC65 }, + { "__asm__", TOK_ASM, TT_C89 | TT_C99 | TT_CC65 }, + { "__attribute__", TOK_ATTRIBUTE, TT_C89 | TT_C99 | TT_CC65 }, + { "__cdecl__", TOK_CDECL, TT_C89 | TT_C99 | TT_CC65 }, + { "__far__", TOK_FAR, TT_C89 | TT_C99 | TT_CC65 }, + { "__fastcall__", TOK_FASTCALL, TT_C89 | TT_C99 | TT_CC65 }, + { "__inline__", TOK_INLINE, TT_C89 | TT_C99 | TT_CC65 }, + { "__near__", TOK_NEAR, TT_C89 | TT_C99 | TT_CC65 }, + { "asm", TOK_ASM, TT_CC65 }, + { "auto", TOK_AUTO, TT_C89 | TT_C99 | TT_CC65 }, + { "break", TOK_BREAK, TT_C89 | TT_C99 | TT_CC65 }, + { "case", TOK_CASE, TT_C89 | TT_C99 | TT_CC65 }, + { "cdecl", TOK_CDECL, TT_CC65 }, + { "char", TOK_CHAR, TT_C89 | TT_C99 | TT_CC65 }, + { "const", TOK_CONST, TT_C89 | TT_C99 | TT_CC65 }, + { "continue", TOK_CONTINUE, TT_C89 | TT_C99 | TT_CC65 }, + { "default", TOK_DEFAULT, TT_C89 | TT_C99 | TT_CC65 }, + { "do", TOK_DO, TT_C89 | TT_C99 | TT_CC65 }, + { "double", TOK_DOUBLE, TT_C89 | TT_C99 | TT_CC65 }, + { "else", TOK_ELSE, TT_C89 | TT_C99 | TT_CC65 }, + { "enum", TOK_ENUM, TT_C89 | TT_C99 | TT_CC65 }, + { "extern", TOK_EXTERN, TT_C89 | TT_C99 | TT_CC65 }, + { "far", TOK_FAR, TT_CC65 }, + { "fastcall", TOK_FASTCALL, TT_CC65 }, + { "float", TOK_FLOAT, TT_C89 | TT_C99 | TT_CC65 }, + { "for", TOK_FOR, TT_C89 | TT_C99 | TT_CC65 }, + { "goto", TOK_GOTO, TT_C89 | TT_C99 | TT_CC65 }, + { "if", TOK_IF, TT_C89 | TT_C99 | TT_CC65 }, + { "inline", TOK_INLINE, TT_C99 | TT_CC65 }, + { "int", TOK_INT, TT_C89 | TT_C99 | TT_CC65 }, + { "long", TOK_LONG, TT_C89 | TT_C99 | TT_CC65 }, + { "near", TOK_NEAR, TT_CC65 }, + { "register", TOK_REGISTER, TT_C89 | TT_C99 | TT_CC65 }, + { "restrict", TOK_RESTRICT, TT_C99 | TT_CC65 }, + { "return", TOK_RETURN, TT_C89 | TT_C99 | TT_CC65 }, + { "short", TOK_SHORT, TT_C89 | TT_C99 | TT_CC65 }, + { "signed", TOK_SIGNED, TT_C89 | TT_C99 | TT_CC65 }, + { "sizeof", TOK_SIZEOF, TT_C89 | TT_C99 | TT_CC65 }, + { "static", TOK_STATIC, TT_C89 | TT_C99 | TT_CC65 }, + { "struct", TOK_STRUCT, TT_C89 | TT_C99 | TT_CC65 }, + { "switch", TOK_SWITCH, TT_C89 | TT_C99 | TT_CC65 }, + { "typedef", TOK_TYPEDEF, TT_C89 | TT_C99 | TT_CC65 }, + { "union", TOK_UNION, TT_C89 | TT_C99 | TT_CC65 }, + { "unsigned", TOK_UNSIGNED, TT_C89 | TT_C99 | TT_CC65 }, + { "void", TOK_VOID, TT_C89 | TT_C99 | TT_CC65 }, + { "volatile", TOK_VOLATILE, TT_C89 | TT_C99 | TT_CC65 }, + { "while", TOK_WHILE, TT_C89 | TT_C99 | TT_CC65 }, }; -#define KEY_COUNT (sizeof (Keywords) / sizeof (Keywords [0])) +#define KEY_COUNT (sizeof (Keywords) / sizeof (Keywords [0])) /* Stuff for determining the type of an integer constant */ -#define IT_INT 0x01 -#define IT_UINT 0x02 -#define IT_LONG 0x04 -#define IT_ULONG 0x08 +#define IT_INT 0x01 +#define IT_UINT 0x02 +#define IT_LONG 0x04 +#define IT_ULONG 0x08 /*****************************************************************************/ -/* code */ +/* code */ /*****************************************************************************/ @@ -114,175 +164,202 @@ static int CmpKey (const void* Key, const void* Elem) -static int FindKey (char* Key) +static token_t FindKey (const char* Key) /* Find a keyword and return the token. Return IDENT if the token is not a - * keyword. - */ +** keyword. +*/ { struct Keyword* K; K = bsearch (Key, Keywords, KEY_COUNT, sizeof (Keywords [0]), CmpKey); - if (K && (K->Type != TT_EXT || ANSI == 0)) { - return K->Tok; + if (K && (K->Std & (0x01 << IS_Get (&Standard))) != 0) { + return K->Tok; } else { - return TOK_IDENT; + return TOK_IDENT; } } -static int skipwhite (void) +static int SkipWhite (void) /* Skip white space in the input stream, reading and preprocessing new lines - * if necessary. Return 0 if end of file is reached, return 1 otherwise. - */ +** if necessary. Return 0 if end of file is reached, return 1 otherwise. +*/ { while (1) { - while (*lptr == 0) { - if (readline () == 0) { - return 0; - } - preprocess (); - } - if (*lptr == ' ' || *lptr == '\r') { - ++lptr; - } else { - return 1; - } + while (CurC == '\0') { + if (NextLine () == 0) { + return 0; + } + Preprocess (); + } + if (IsSpace (CurC)) { + NextChar (); + } else { + return 1; + } } } -void symname (char *s) -/* Get symbol from input stream */ +int TokIsFuncSpec (const Token* T) +/* Return true if the token is a function specifier */ { - unsigned k = 0; - do { - if (k != MAX_IDENTLEN) { - ++k; - *s++ = *lptr; - } - ++lptr; - } while (IsIdent (*lptr) || isdigit (*lptr)); - *s = '\0'; + return (T->Tok == TOK_INLINE) || + (T->Tok == TOK_FASTCALL) || (T->Tok == TOK_CDECL) || + (T->Tok == TOK_NEAR) || (T->Tok == TOK_FAR); } -int issym (char *s) -/* Get symbol from input stream or return 0 if not a symbol. */ +void SymName (char* S) +/* Read a symbol from the input stream. The first character must have been +** checked before calling this function. The buffer is expected to be at +** least of size MAX_IDENTLEN+1. +*/ { - if (IsIdent (*lptr)) { - symname (s); - return 1; - } else { - return 0; - } -} - - - -static void unknown (unsigned char c) -/* Error message for unknown character */ -{ - Error (ERR_INVALID_CHAR, c); - gch (); /* Skip */ + unsigned Len = 0; + do { + if (Len < MAX_IDENTLEN) { + ++Len; + *S++ = CurC; + } + NextChar (); + } while (IsIdent (CurC) || IsDigit (CurC)); + *S = '\0'; } -static unsigned hexval (int c) -/* Convert a hex digit into a value */ +int IsSym (char* S) +/* If a symbol follows, read it and return 1, otherwise return 0 */ { - if (!isxdigit (c)) { - Error (ERR_ILLEGAL_HEX_DIGIT); - } - if (isdigit (c)) { - return c - '0'; + if (IsIdent (CurC)) { + SymName (S); + return 1; } else { - return toupper (c) - 'A' + 10; + return 0; } } -static void SetTok (int tok) -/* set nxttok and bump line ptr */ +static void UnknownChar (char C) +/* Error message for unknown character */ { - nxttok = tok; - ++lptr; + Error ("Invalid input character with code %02X", C & 0xFF); + NextChar (); /* Skip */ } -static int SignExtendChar (int C) -/* Do correct sign extension of a character */ +static void SetTok (int tok) +/* Set NextTok.Tok and bump line ptr */ { - if (SignedChars && (C & 0x80) != 0) { - return C | ~0xFF; - } else { - return C & 0xFF; - } + NextTok.Tok = tok; + NextChar (); } -static int parsechar (int c) -/* Parse a character. Converts \n into EOL, etc. */ +static int ParseChar (void) +/* Parse a character. Converts escape chars into character codes. */ { - int i; - int val; + int C; + int HadError; + int Count; /* Check for escape chars */ - if (c == '\\') { - switch (c = gch ()) { - case 'b': - c = '\b'; - break; - case 'f': - c = '\f'; - break; - case 'r': - c = '\r'; - break; - case 'n': - c = '\n'; - break; - case 't': - c = '\t'; - break; - case '\"': - c = '\"'; - break; - case '\'': - c = '\''; - break; - case '\\': - c = '\\'; - break; - case 'x': - case 'X': - /* Hex character constant */ - val = hexval (gch ()) << 4; - c = val | hexval (gch ()); /* Do not translate */ - break; - case '0': - case '1': - /* Octal constant */ - i = 0; - val = c - '0'; - while ((c = *lptr) >= '0' && c <= '7' && i++ < 4) { - val = (val << 3) | (c - '0'); - gch (); - } - c = val; /* Do not translate */ - break; - default: - Error (ERR_ILLEGAL_CHARCONST); - } + if (CurC == '\\') { + NextChar (); + switch (CurC) { + case '?': + C = '\?'; + break; + case 'a': + C = '\a'; + break; + case 'b': + C = '\b'; + break; + case 'f': + C = '\f'; + break; + case 'r': + C = '\r'; + break; + case 'n': + C = '\n'; + break; + case 't': + C = '\t'; + break; + case 'v': + C = '\v'; + break; + case '\"': + C = '\"'; + break; + case '\'': + C = '\''; + break; + case '\\': + C = '\\'; + break; + case 'x': + case 'X': + /* Hex character constant */ + if (!IsXDigit (NextC)) { + Error ("\\x used with no following hex digits"); + C = ' '; + } else { + HadError = 0; + C = 0; + while (IsXDigit (NextC)) { + if ((C << 4) >= 256) { + if (!HadError) { + Error ("Hex character constant out of range"); + HadError = 1; + } + } else { + C = (C << 4) | HexVal (NextC); + } + NextChar (); + } + } + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + /* Octal constant */ + Count = 1; + C = HexVal (CurC); + while (IsODigit (NextC) && Count++ < 3) { + C = (C << 3) | HexVal (NextC); + NextChar (); + } + if (C >= 256) + Error ("Octal character constant out of range"); + break; + default: + C = CurC; + Error ("Illegal escaped character: 0x%02X", CurC); + break; + } + } else { + C = CurC; } + /* Skip the character read */ + NextChar (); + /* Do correct sign extension */ - return SignExtendChar (c); + return SignExtendChar (C); } @@ -290,23 +367,30 @@ static int parsechar (int c) static void CharConst (void) /* Parse a character constant. */ { - int c; + int C; /* Skip the quote */ - ++lptr; + NextChar (); /* Get character */ - c = parsechar (cgch ()); + C = ParseChar (); /* Check for closing quote */ - if (cgch () != '\'') { - Error (ERR_QUOTE_EXPECTED); + if (CurC != '\'') { + Error ("'\'' expected"); + } else { + /* Skip the quote */ + NextChar (); } /* Setup values and attributes */ - nxttok = TOK_CCONST; - nxtval = SignExtendChar (ctrans (c)); /* Translate into target charset */ - nxttype = type_int; /* Character constants have type int */ + NextTok.Tok = TOK_CCONST; + + /* Translate into target charset */ + NextTok.IVal = SignExtendChar (TgtTranslateChar (C)); + + /* Character constants have type int */ + NextTok.Type = type_int; } @@ -314,33 +398,291 @@ static void CharConst (void) static void StringConst (void) /* Parse a quoted string */ { - nxtval = GetLiteralOffs (); - nxttok = TOK_SCONST; + /* String buffer */ + StrBuf S = AUTO_STRBUF_INITIALIZER; + + /* Assume next token is a string constant */ + NextTok.Tok = TOK_SCONST; - /* Be sure to concatenate strings */ - while (*lptr == '\"') { + /* Concatenate strings. If at least one of the concenated strings is a wide + ** character literal, the whole string is a wide char literal, otherwise + ** it's a normal string literal. + */ + while (1) { + + /* Check if this is a normal or a wide char string */ + if (CurC == 'L' && NextC == '\"') { + /* Wide character literal */ + NextTok.Tok = TOK_WCSCONST; + NextChar (); + NextChar (); + } else if (CurC == '\"') { + /* Skip the quote char */ + NextChar (); + } else { + /* No string */ + break; + } + + /* Read until end of string */ + while (CurC != '\"') { + if (CurC == '\0') { + Error ("Unexpected newline"); + break; + } + SB_AppendChar (&S, ParseChar ()); + } + + /* Skip closing quote char if there was one */ + NextChar (); + + /* Skip white space, read new input */ + SkipWhite (); + + } + + /* Terminate the string */ + SB_AppendChar (&S, '\0'); + + /* Add the whole string to the literal pool */ + NextTok.SVal = AddLiteralStr (&S); + + /* Free the buffer */ + SB_Done (&S); +} - /* Skip the quote char */ - ++lptr; - while (*lptr != '\"') { - if (*lptr == 0) { - Error (ERR_UNEXPECTED_NEWLINE); - break; - } - AddLiteralChar (parsechar (gch())); - } - /* Skip closing quote char if there was one */ - cgch (); +static void NumericConst (void) +/* Parse a numeric constant */ +{ + unsigned Base; /* Temporary number base */ + unsigned Prefix; /* Base according to prefix */ + StrBuf S = STATIC_STRBUF_INITIALIZER; + int IsFloat; + char C; + unsigned DigitVal; + unsigned long IVal; /* Value */ + + /* Check for a leading hex or octal prefix and determine the possible + ** integer types. + */ + if (CurC == '0') { + /* Gobble 0 and examine next char */ + NextChar (); + if (toupper (CurC) == 'X') { + Base = Prefix = 16; + NextChar (); /* gobble "x" */ + } else { + Base = 10; /* Assume 10 for now - see below */ + Prefix = 8; /* Actual prefix says octal */ + } + } else { + Base = Prefix = 10; + } - /* Skip white space, read new input */ - skipwhite (); + /* Because floating point numbers don't have octal prefixes (a number + ** with a leading zero is decimal), we first have to read the number + ** before converting it, so we can determine if it's a float or an + ** integer. + */ + while (IsXDigit (CurC) && HexVal (CurC) < Base) { + SB_AppendChar (&S, CurC); + NextChar (); + } + SB_Terminate (&S); + + /* The following character tells us if we have an integer or floating + ** point constant. Note: Hexadecimal floating point constants aren't + ** supported in C89. + */ + IsFloat = (CurC == '.' || + (Base == 10 && toupper (CurC) == 'E') || + (Base == 16 && toupper (CurC) == 'P' && IS_Get (&Standard) >= STD_C99)); + + /* If we don't have a floating point type, an octal prefix results in an + ** octal base. + */ + if (!IsFloat && Prefix == 8) { + Base = 8; + } + /* Since we do now know the correct base, convert the remembered input + ** into a number. + */ + SB_Reset (&S); + IVal = 0; + while ((C = SB_Get (&S)) != '\0') { + DigitVal = HexVal (C); + if (DigitVal >= Base) { + Error ("Numeric constant contains digits beyond the radix"); + } + IVal = (IVal * Base) + DigitVal; } - /* Terminate the string */ - AddLiteralChar ('\0'); + /* We don't need the string buffer any longer */ + SB_Done (&S); + + /* Distinguish between integer and floating point constants */ + if (!IsFloat) { + + unsigned Types; + int HaveSuffix; + + /* Check for a suffix and determine the possible types */ + HaveSuffix = 1; + if (toupper (CurC) == 'U') { + /* Unsigned type */ + NextChar (); + if (toupper (CurC) != 'L') { + Types = IT_UINT | IT_ULONG; + } else { + NextChar (); + Types = IT_ULONG; + } + } else if (toupper (CurC) == 'L') { + /* Long type */ + NextChar (); + if (toupper (CurC) != 'U') { + Types = IT_LONG | IT_ULONG; + } else { + NextChar (); + Types = IT_ULONG; + } + } else { + HaveSuffix = 0; + if (Prefix == 10) { + /* Decimal constants are of any type but uint */ + Types = IT_INT | IT_LONG | IT_ULONG; + } else { + /* Octal or hex constants are of any type */ + Types = IT_INT | IT_UINT | IT_LONG | IT_ULONG; + } + } + + /* Check the range to determine the type */ + if (IVal > 0x7FFF) { + /* Out of range for int */ + Types &= ~IT_INT; + /* If the value is in the range 0x8000..0xFFFF, unsigned int is not + ** allowed, and we don't have a type specifying suffix, emit a + ** warning, because the constant is of type long. + */ + if (IVal <= 0xFFFF && (Types & IT_UINT) == 0 && !HaveSuffix) { + Warning ("Constant is long"); + } + } + if (IVal > 0xFFFF) { + /* Out of range for unsigned int */ + Types &= ~IT_UINT; + } + if (IVal > 0x7FFFFFFF) { + /* Out of range for long int */ + Types &= ~IT_LONG; + } + + /* Now set the type string to the smallest type in types */ + if (Types & IT_INT) { + NextTok.Type = type_int; + } else if (Types & IT_UINT) { + NextTok.Type = type_uint; + } else if (Types & IT_LONG) { + NextTok.Type = type_long; + } else { + NextTok.Type = type_ulong; + } + + /* Set the value and the token */ + NextTok.IVal = IVal; + NextTok.Tok = TOK_ICONST; + + } else { + + /* Float constant */ + Double FVal = FP_D_FromInt (IVal); /* Convert to double */ + + /* Check for a fractional part and read it */ + if (CurC == '.') { + + Double Scale; + + /* Skip the dot */ + NextChar (); + + /* Read fractional digits */ + Scale = FP_D_Make (1.0); + while (IsXDigit (CurC) && (DigitVal = HexVal (CurC)) < Base) { + /* Get the value of this digit */ + Double FracVal = FP_D_Div (FP_D_FromInt (DigitVal * Base), Scale); + /* Add it to the float value */ + FVal = FP_D_Add (FVal, FracVal); + /* Scale base */ + Scale = FP_D_Mul (Scale, FP_D_FromInt (DigitVal)); + /* Skip the digit */ + NextChar (); + } + } + + /* Check for an exponent and read it */ + if ((Base == 16 && toupper (CurC) == 'F') || + (Base == 10 && toupper (CurC) == 'E')) { + + unsigned Digits; + unsigned Exp; + + /* Skip the exponent notifier */ + NextChar (); + + /* Read an optional sign */ + if (CurC == '-') { + NextChar (); + } else if (CurC == '+') { + NextChar (); + } + + /* Read exponent digits. Since we support only 32 bit floats + ** with a maximum exponent of +-/127, we read the exponent + ** part as integer with up to 3 digits and drop the remainder. + ** This avoids an overflow of Exp. The exponent is always + ** decimal, even for hex float consts. + */ + Digits = 0; + Exp = 0; + while (IsDigit (CurC)) { + if (++Digits <= 3) { + Exp = Exp * 10 + HexVal (CurC); + } + NextChar (); + } + + /* Check for errors: We must have exponent digits, and not more + ** than three. + */ + if (Digits == 0) { + Error ("Floating constant exponent has no digits"); + } else if (Digits > 3) { + Warning ("Floating constant exponent is too large"); + } + + /* Scale the exponent and adjust the value accordingly */ + if (Exp) { + FVal = FP_D_Mul (FVal, FP_D_Make (pow (10, Exp))); + } + } + + /* Check for a suffix and determine the type of the constant */ + if (toupper (CurC) == 'F') { + NextChar (); + NextTok.Type = type_float; + } else { + NextTok.Type = type_double; + } + + /* Set the value and the token */ + NextTok.FVal = FVal; + NextTok.Tok = TOK_FCONST; + + } } @@ -348,378 +690,310 @@ static void StringConst (void) void NextToken (void) /* Get next token from input stream */ { - char c; ident token; + /* We have to skip white space here before shifting tokens, since the + ** tokens and the current line info is invalid at startup and will get + ** initialized by reading the first time from the file. Remember if + ** we were at end of input and handle that later. + */ + int GotEOF = (SkipWhite() == 0); + /* Current token is the lookahead token */ + if (CurTok.LI) { + ReleaseLineInfo (CurTok.LI); + } CurTok = NextTok; + /* When reading the first time from the file, the line info in NextTok, + ** which was copied to CurTok is invalid. Since the information from + ** the token is used for error messages, we must make it valid. + */ + if (CurTok.LI == 0) { + CurTok.LI = UseLineInfo (GetCurLineInfo ()); + } + /* Remember the starting position of the next token */ - NextTok.Pos = ln; + NextTok.LI = UseLineInfo (GetCurLineInfo ()); - /* Skip spaces and read the next line if needed */ - if (skipwhite () == 0) { - /* End of file reached */ - nxttok = TOK_CEOF; - return; + /* Now handle end of input. */ + if (GotEOF) { + /* End of file reached */ + NextTok.Tok = TOK_CEOF; + return; } /* Determine the next token from the lookahead */ - c = *lptr; - if (isdigit (c)) { - - /* A number */ - int HaveSuffix; /* True if we have a type suffix */ - unsigned types; /* Possible types */ - unsigned base; - unsigned long k; /* Value */ - - k = 0; - base = 10; - types = IT_INT | IT_LONG | IT_ULONG; - - if (c == '0') { - /* Octal or hex constants may also be of type unsigned int */ - types = IT_INT | IT_UINT | IT_LONG | IT_ULONG; - /* gobble 0 and examin next char */ - if (toupper (*++lptr) == 'X') { - base = 16; - nxttype = type_uint; - ++lptr; /* gobble "x" */ - } else { - base = 8; - } - } - while (1) { - c = *lptr; - if (isdigit (c)) { - k = k * base + (c - '0'); - } else if (base == 16 && isxdigit (c)) { - k = (k << 4) + hexval (c); - } else { - break; /* not digit */ - } - ++lptr; /* gobble char */ - } - - /* Check for a suffix */ - HaveSuffix = 1; - c = toupper (*lptr); - if (c == 'U') { - /* Unsigned type */ - ++lptr; - if (toupper (*lptr) != 'L') { - types = IT_UINT | IT_ULONG; - } else { - ++lptr; - types = IT_ULONG; - } - } else if (c == 'L') { - /* Long type */ - ++lptr; - if (toupper (*lptr) != 'U') { - types = IT_LONG | IT_ULONG; - } else { - ++lptr; - types = IT_ULONG; - } - } else { - HaveSuffix = 0; - } - - /* Check the range to determine the type */ - if (k > 0x7FFF) { - /* Out of range for int */ - types &= ~IT_INT; - /* If the value is in the range 0x8000..0xFFFF, unsigned int is not - * allowed, and we don't have a type specifying suffix, emit a - * warning. - */ - if (k <= 0xFFFF && (types & IT_UINT) == 0 && !HaveSuffix) { - Warning (WARN_CONSTANT_IS_LONG); - } - } - if (k > 0xFFFF) { - /* Out of range for unsigned int */ - types &= ~IT_UINT; - } - if (k > 0x7FFFFFFF) { - /* Out of range for long int */ - types &= ~IT_LONG; - } - - /* Now set the type string to the smallest type in types */ - if (types & IT_INT) { - nxttype = type_int; - } else if (types & IT_UINT) { - nxttype = type_uint; - } else if (types & IT_LONG) { - nxttype = type_long; - } else { - nxttype = type_ulong; - } - - /* Set the value and the token */ - nxtval = k; - nxttok = TOK_ICONST; - return; + if (IsDigit (CurC) || (CurC == '.' && IsDigit (NextC))) { + /* A number */ + NumericConst (); + return; } - if (issym (token)) { - - /* Check for a keyword */ - if ((nxttok = FindKey (token)) != TOK_IDENT) { - /* Reserved word found */ - return; - } - /* No reserved word, check for special symbols */ - if (token [0] == '_') { - /* Special symbols */ - if (strcmp (token, "__FILE__") == 0) { - nxtval = AddLiteral (fin); - nxttok = TOK_SCONST; - return; - } else if (strcmp (token, "__LINE__") == 0) { - nxttok = TOK_ICONST; - nxtval = ln; - nxttype = type_int; - return; - } else if (strcmp (token, "__fixargs__") == 0) { - nxttok = TOK_ICONST; - nxtval = GetParamSize (CurrentFunc); - nxttype = type_uint; - return; - } else if (strcmp (token, "__func__") == 0) { - /* __func__ is only defined in functions */ - if (CurrentFunc) { - nxtval = AddLiteral (GetFuncName (CurrentFunc)); - nxttok = TOK_SCONST; - return; - } - } - } - - /* No reserved word but identifier */ - strcpy (NextTok.Ident, token); - NextTok.Tok = TOK_IDENT; - return; + /* Check for wide character literals */ + if (CurC == 'L' && NextC == '\"') { + StringConst (); + return; + } + + /* Check for keywords and identifiers */ + if (IsSym (token)) { + + /* Check for a keyword */ + if ((NextTok.Tok = FindKey (token)) != TOK_IDENT) { + /* Reserved word found */ + return; + } + /* No reserved word, check for special symbols */ + if (token[0] == '_' && token[1] == '_') { + /* Special symbols */ + if (strcmp (token+2, "FILE__") == 0) { + NextTok.SVal = AddLiteral (GetCurrentFile()); + NextTok.Tok = TOK_SCONST; + return; + } else if (strcmp (token+2, "LINE__") == 0) { + NextTok.Tok = TOK_ICONST; + NextTok.IVal = GetCurrentLine(); + NextTok.Type = type_int; + return; + } else if (strcmp (token+2, "func__") == 0) { + /* __func__ is only defined in functions */ + if (CurrentFunc) { + NextTok.SVal = AddLiteral (F_GetFuncName (CurrentFunc)); + NextTok.Tok = TOK_SCONST; + return; + } + } + } + + /* No reserved word but identifier */ + strcpy (NextTok.Ident, token); + NextTok.Tok = TOK_IDENT; + return; } /* Monstrous switch statement ahead... */ - switch (c) { - - case '!': - if (*++lptr == '=') { - SetTok (TOK_NE); - } else { - nxttok = TOK_BOOL_NOT; - } - break; - - case '\"': - StringConst (); - break; - - case '%': - if (*++lptr == '=') { - SetTok (TOK_MOD_ASSIGN); - } else { - nxttok = TOK_MOD; - } - break; - - case '&': - switch (*++lptr) { - case '&': - SetTok (TOK_BOOL_AND); - break; - case '=': - SetTok (TOK_AND_ASSIGN); - break; - default: - nxttok = TOK_AND; - } - break; - - case '\'': - CharConst (); - break; - - case '(': - SetTok (TOK_LPAREN); - break; - - case ')': - SetTok (TOK_RPAREN); - break; - - case '*': - if (*++lptr == '=') { - SetTok (TOK_MUL_ASSIGN); - } else { - nxttok = TOK_STAR; - } - break; - - case '+': - switch (*++lptr) { - case '+': - SetTok (TOK_INC); - break; - case '=': - SetTok (TOK_PLUS_ASSIGN); - break; - default: - nxttok = TOK_PLUS; - } - break; - - case ',': - SetTok (TOK_COMMA); - break; - - case '-': - switch (*++lptr) { - case '-': - SetTok (TOK_DEC); - break; - case '=': - SetTok (TOK_MINUS_ASSIGN); - break; - case '>': - SetTok (TOK_PTR_REF); - break; - default: - nxttok = TOK_MINUS; - } - break; - - case '.': - if (*++lptr == '.') { - if (*++lptr == '.') { - SetTok (TOK_ELLIPSIS); - } else { - unknown (*lptr); - } - } else { - nxttok = TOK_DOT; - } - break; - - case '/': - if (*++lptr == '=') { - SetTok (TOK_DIV_ASSIGN); - } else { - nxttok = TOK_DIV; - } - break; - - case ':': - SetTok (TOK_COLON); - break; - - case ';': - SetTok (TOK_SEMI); - break; - - case '<': - switch (*++lptr) { - case '=': - SetTok (TOK_LE); - break; - case '<': - if (*++lptr == '=') { - SetTok (TOK_SHL_ASSIGN); - } else { - nxttok = TOK_SHL; - } - break; - default: - nxttok = TOK_LT; - } - break; - - case '=': - if (*++lptr == '=') { - SetTok (TOK_EQ); - } else { - nxttok = TOK_ASSIGN; - } - break; - - case '>': - switch (*++lptr) { - case '=': - SetTok (TOK_GE); - break; - case '>': - if (*++lptr == '=') { - SetTok (TOK_SHR_ASSIGN); - } else { - nxttok = TOK_SHR; - } - break; - default: - nxttok = TOK_GT; - } - break; - - case '?': - SetTok (TOK_QUEST); - break; - - case '[': - SetTok (TOK_LBRACK); - break; - - case ']': - SetTok (TOK_RBRACK); - break; - - case '^': - if (*++lptr == '=') { - SetTok (TOK_XOR_ASSIGN); - } else { - nxttok = TOK_XOR; - } - break; - - case '{': - SetTok (TOK_LCURLY); - break; + switch (CurC) { + + case '!': + NextChar (); + if (CurC == '=') { + SetTok (TOK_NE); + } else { + NextTok.Tok = TOK_BOOL_NOT; + } + break; + + case '\"': + StringConst (); + break; + + case '%': + NextChar (); + if (CurC == '=') { + SetTok (TOK_MOD_ASSIGN); + } else { + NextTok.Tok = TOK_MOD; + } + break; + + case '&': + NextChar (); + switch (CurC) { + case '&': + SetTok (TOK_BOOL_AND); + break; + case '=': + SetTok (TOK_AND_ASSIGN); + break; + default: + NextTok.Tok = TOK_AND; + } + break; + + case '\'': + CharConst (); + break; + + case '(': + SetTok (TOK_LPAREN); + break; + + case ')': + SetTok (TOK_RPAREN); + break; + + case '*': + NextChar (); + if (CurC == '=') { + SetTok (TOK_MUL_ASSIGN); + } else { + NextTok.Tok = TOK_STAR; + } + break; + + case '+': + NextChar (); + switch (CurC) { + case '+': + SetTok (TOK_INC); + break; + case '=': + SetTok (TOK_PLUS_ASSIGN); + break; + default: + NextTok.Tok = TOK_PLUS; + } + break; + + case ',': + SetTok (TOK_COMMA); + break; + + case '-': + NextChar (); + switch (CurC) { + case '-': + SetTok (TOK_DEC); + break; + case '=': + SetTok (TOK_MINUS_ASSIGN); + break; + case '>': + SetTok (TOK_PTR_REF); + break; + default: + NextTok.Tok = TOK_MINUS; + } + break; + + case '.': + NextChar (); + if (CurC == '.') { + NextChar (); + if (CurC == '.') { + SetTok (TOK_ELLIPSIS); + } else { + UnknownChar (CurC); + } + } else { + NextTok.Tok = TOK_DOT; + } + break; + + case '/': + NextChar (); + if (CurC == '=') { + SetTok (TOK_DIV_ASSIGN); + } else { + NextTok.Tok = TOK_DIV; + } + break; + + case ':': + SetTok (TOK_COLON); + break; + + case ';': + SetTok (TOK_SEMI); + break; + + case '<': + NextChar (); + switch (CurC) { + case '=': + SetTok (TOK_LE); + break; + case '<': + NextChar (); + if (CurC == '=') { + SetTok (TOK_SHL_ASSIGN); + } else { + NextTok.Tok = TOK_SHL; + } + break; + default: + NextTok.Tok = TOK_LT; + } + break; + + case '=': + NextChar (); + if (CurC == '=') { + SetTok (TOK_EQ); + } else { + NextTok.Tok = TOK_ASSIGN; + } + break; + + case '>': + NextChar (); + switch (CurC) { + case '=': + SetTok (TOK_GE); + break; + case '>': + NextChar (); + if (CurC == '=') { + SetTok (TOK_SHR_ASSIGN); + } else { + NextTok.Tok = TOK_SHR; + } + break; + default: + NextTok.Tok = TOK_GT; + } + break; + + case '?': + SetTok (TOK_QUEST); + break; + + case '[': + SetTok (TOK_LBRACK); + break; + + case ']': + SetTok (TOK_RBRACK); + break; + + case '^': + NextChar (); + if (CurC == '=') { + SetTok (TOK_XOR_ASSIGN); + } else { + NextTok.Tok = TOK_XOR; + } + break; + + case '{': + SetTok (TOK_LCURLY); + break; case '|': - switch (*++lptr) { - case '|': - SetTok (TOK_BOOL_OR); - break; - case '=': - SetTok (TOK_OR_ASSIGN); - break; - default: - nxttok = TOK_OR; - } - break; - - case '}': - SetTok (TOK_RCURLY); - break; - - case '~': - SetTok (TOK_COMP); - break; - - case '#': - while (*++lptr == ' ') ; /* Skip it and following whitespace */ - if (!issym (token) || strcmp (token, "pragma") != 0) { - /* OOPS - should not happen */ - Error (ERR_CPP_DIRECTIVE_EXPECTED); - } - nxttok = TOK_PRAGMA; - break; - - default: - unknown (c); + NextChar (); + switch (CurC) { + case '|': + SetTok (TOK_BOOL_OR); + break; + case '=': + SetTok (TOK_OR_ASSIGN); + break; + default: + NextTok.Tok = TOK_OR; + } + break; + + case '}': + SetTok (TOK_RCURLY); + break; + + case '~': + SetTok (TOK_COMP); + break; + + default: + UnknownChar (CurC); } @@ -727,89 +1001,132 @@ void NextToken (void) -void Consume (token_t Token, unsigned ErrNum) +void SkipTokens (const token_t* TokenList, unsigned TokenCount) +/* Skip tokens until we reach TOK_CEOF or a token in the given token list. +** This routine is used for error recovery. +*/ +{ + while (CurTok.Tok != TOK_CEOF) { + + /* Check if the current token is in the token list */ + unsigned I; + for (I = 0; I < TokenCount; ++I) { + if (CurTok.Tok == TokenList[I]) { + /* Found a token in the list */ + return; + } + } + + /* Not in the list: Skip it */ + NextToken (); + + } +} + + + +int Consume (token_t Token, const char* ErrorMsg) /* Eat token if it is the next in the input stream, otherwise print an error - * message. - */ +** message. Returns true if the token was found and false otherwise. +*/ { - if (curtok == Token) { - NextToken (); + if (CurTok.Tok == Token) { + NextToken (); + return 1; } else { - Error (ErrNum); + Error ("%s", ErrorMsg); + return 0; } } -void ConsumeColon (void) +int ConsumeColon (void) /* Check for a colon and skip it. */ { - Consume (TOK_COLON, ERR_COLON_EXPECTED); + return Consume (TOK_COLON, "':' expected"); } -void ConsumeSemi (void) +int ConsumeSemi (void) /* Check for a semicolon and skip it. */ { /* Try do be smart about typos... */ - if (curtok == TOK_SEMI) { - NextToken (); + if (CurTok.Tok == TOK_SEMI) { + NextToken (); + return 1; + } else { + Error ("';' expected"); + if (CurTok.Tok == TOK_COLON || CurTok.Tok == TOK_COMMA) { + NextToken (); + } + return 0; + } +} + + + +int ConsumeComma (void) +/* Check for a comma and skip it. */ +{ + /* Try do be smart about typos... */ + if (CurTok.Tok == TOK_COMMA) { + NextToken (); + return 1; } else { - Error (ERR_SEMICOLON_EXPECTED); - if (curtok == TOK_COLON || curtok == TOK_COMMA) { - NextToken (); - } + Error ("',' expected"); + if (CurTok.Tok == TOK_SEMI) { + NextToken (); + } + return 0; } } -void ConsumeLParen (void) +int ConsumeLParen (void) /* Check for a left parenthesis and skip it */ { - Consume (TOK_LPAREN, ERR_LPAREN_EXPECTED); + return Consume (TOK_LPAREN, "'(' expected"); } -void ConsumeRParen (void) +int ConsumeRParen (void) /* Check for a right parenthesis and skip it */ { - Consume (TOK_RPAREN, ERR_RPAREN_EXPECTED); + return Consume (TOK_RPAREN, "')' expected"); } -void ConsumeLBrack (void) +int ConsumeLBrack (void) /* Check for a left bracket and skip it */ { - Consume (TOK_LBRACK, ERR_LBRACK_EXPECTED); + return Consume (TOK_LBRACK, "'[' expected"); } -void ConsumeRBrack (void) +int ConsumeRBrack (void) /* Check for a right bracket and skip it */ { - Consume (TOK_RBRACK, ERR_RBRACK_EXPECTED); + return Consume (TOK_RBRACK, "']' expected"); } -void ConsumeLCurly (void) +int ConsumeLCurly (void) /* Check for a left curly brace and skip it */ { - Consume (TOK_LCURLY, ERR_LCURLY_EXPECTED); + return Consume (TOK_LCURLY, "'{' expected"); } -void ConsumeRCurly (void) +int ConsumeRCurly (void) /* Check for a right curly brace and skip it */ { - Consume (TOK_RCURLY, ERR_RCURLY_EXPECTED); + return Consume (TOK_RCURLY, "'}' expected"); } - - -