1 /*****************************************************************************/
5 /* Source file line info structure */
9 /* (C) 1998-2010, Ullrich von Bassewitz */
10 /* Roemerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
64 /*****************************************************************************/
66 /*****************************************************************************/
70 Token CurTok; /* The current token */
71 Token NextTok; /* The next token */
77 TT_C89 = 0x01 << STD_C89, /* Token valid in C89 */
78 TT_C99 = 0x01 << STD_C99, /* Token valid in C99 */
79 TT_CC65 = 0x01 << STD_CC65 /* Token valid in cc65 */
83 static const struct Keyword {
84 char* Key; /* Keyword name */
85 unsigned char Tok; /* The token */
86 unsigned char Std; /* Token supported in which standards? */
88 { "_Pragma", TOK_PRAGMA, TT_C89 | TT_C99 | TT_CC65 }, /* !! */
89 { "__AX__", TOK_AX, TT_C89 | TT_C99 | TT_CC65 },
90 { "__A__", TOK_A, TT_C89 | TT_C99 | TT_CC65 },
91 { "__EAX__", TOK_EAX, TT_C89 | TT_C99 | TT_CC65 },
92 { "__X__", TOK_X, TT_C89 | TT_C99 | TT_CC65 },
93 { "__Y__", TOK_Y, TT_C89 | TT_C99 | TT_CC65 },
94 { "__asm__", TOK_ASM, TT_C89 | TT_C99 | TT_CC65 },
95 { "__attribute__", TOK_ATTRIBUTE, TT_C89 | TT_C99 | TT_CC65 },
96 { "__cdecl__", TOK_CDECL, TT_C89 | TT_C99 | TT_CC65 },
97 { "__far__", TOK_FAR, TT_C89 | TT_C99 | TT_CC65 },
98 { "__fastcall__", TOK_FASTCALL, TT_C89 | TT_C99 | TT_CC65 },
99 { "__inline__", TOK_INLINE, TT_C89 | TT_C99 | TT_CC65 },
100 { "__near__", TOK_NEAR, TT_C89 | TT_C99 | TT_CC65 },
101 { "asm", TOK_ASM, TT_CC65 },
102 { "auto", TOK_AUTO, TT_C89 | TT_C99 | TT_CC65 },
103 { "break", TOK_BREAK, TT_C89 | TT_C99 | TT_CC65 },
104 { "case", TOK_CASE, TT_C89 | TT_C99 | TT_CC65 },
105 { "cdecl", TOK_CDECL, TT_CC65 },
106 { "char", TOK_CHAR, TT_C89 | TT_C99 | TT_CC65 },
107 { "const", TOK_CONST, TT_C89 | TT_C99 | TT_CC65 },
108 { "continue", TOK_CONTINUE, TT_C89 | TT_C99 | TT_CC65 },
109 { "default", TOK_DEFAULT, TT_C89 | TT_C99 | TT_CC65 },
110 { "do", TOK_DO, TT_C89 | TT_C99 | TT_CC65 },
111 { "double", TOK_DOUBLE, TT_C89 | TT_C99 | TT_CC65 },
112 { "else", TOK_ELSE, TT_C89 | TT_C99 | TT_CC65 },
113 { "enum", TOK_ENUM, TT_C89 | TT_C99 | TT_CC65 },
114 { "extern", TOK_EXTERN, TT_C89 | TT_C99 | TT_CC65 },
115 { "far", TOK_FAR, TT_CC65 },
116 { "fastcall", TOK_FASTCALL, TT_CC65 },
117 { "float", TOK_FLOAT, TT_C89 | TT_C99 | TT_CC65 },
118 { "for", TOK_FOR, TT_C89 | TT_C99 | TT_CC65 },
119 { "goto", TOK_GOTO, TT_C89 | TT_C99 | TT_CC65 },
120 { "if", TOK_IF, TT_C89 | TT_C99 | TT_CC65 },
121 { "inline", TOK_INLINE, TT_C99 | TT_CC65 },
122 { "int", TOK_INT, TT_C89 | TT_C99 | TT_CC65 },
123 { "long", TOK_LONG, TT_C89 | TT_C99 | TT_CC65 },
124 { "near", TOK_NEAR, TT_CC65 },
125 { "register", TOK_REGISTER, TT_C89 | TT_C99 | TT_CC65 },
126 { "restrict", TOK_RESTRICT, TT_C99 | TT_CC65 },
127 { "return", TOK_RETURN, TT_C89 | TT_C99 | TT_CC65 },
128 { "short", TOK_SHORT, TT_C89 | TT_C99 | TT_CC65 },
129 { "signed", TOK_SIGNED, TT_C89 | TT_C99 | TT_CC65 },
130 { "sizeof", TOK_SIZEOF, TT_C89 | TT_C99 | TT_CC65 },
131 { "static", TOK_STATIC, TT_C89 | TT_C99 | TT_CC65 },
132 { "struct", TOK_STRUCT, TT_C89 | TT_C99 | TT_CC65 },
133 { "switch", TOK_SWITCH, TT_C89 | TT_C99 | TT_CC65 },
134 { "typedef", TOK_TYPEDEF, TT_C89 | TT_C99 | TT_CC65 },
135 { "union", TOK_UNION, TT_C89 | TT_C99 | TT_CC65 },
136 { "unsigned", TOK_UNSIGNED, TT_C89 | TT_C99 | TT_CC65 },
137 { "void", TOK_VOID, TT_C89 | TT_C99 | TT_CC65 },
138 { "volatile", TOK_VOLATILE, TT_C89 | TT_C99 | TT_CC65 },
139 { "while", TOK_WHILE, TT_C89 | TT_C99 | TT_CC65 },
141 #define KEY_COUNT (sizeof (Keywords) / sizeof (Keywords [0]))
145 /* Stuff for determining the type of an integer constant */
149 #define IT_ULONG 0x08
153 /*****************************************************************************/
155 /*****************************************************************************/
159 static int CmpKey (const void* Key, const void* Elem)
160 /* Compare function for bsearch */
162 return strcmp ((const char*) Key, ((const struct Keyword*) Elem)->Key);
167 static token_t FindKey (const char* Key)
168 /* Find a keyword and return the token. Return IDENT if the token is not a
173 K = bsearch (Key, Keywords, KEY_COUNT, sizeof (Keywords [0]), CmpKey);
174 if (K && (K->Std & (0x01 << IS_Get (&Standard))) != 0) {
183 static int SkipWhite (void)
184 /* Skip white space in the input stream, reading and preprocessing new lines
185 * if necessary. Return 0 if end of file is reached, return 1 otherwise.
189 while (CurC == '\0') {
190 if (NextLine () == 0) {
195 if (IsSpace (CurC)) {
205 int TokIsFuncSpec (const Token* T)
206 /* Return true if the token is a function specifier */
208 return (T->Tok == TOK_INLINE) ||
209 (T->Tok == TOK_FASTCALL) || (T->Tok == TOK_CDECL) ||
210 (T->Tok == TOK_NEAR) || (T->Tok == TOK_FAR);
215 void SymName (char* S)
216 /* Read a symbol from the input stream. The first character must have been
217 * checked before calling this function. The buffer is expected to be at
218 * least of size MAX_IDENTLEN+1.
223 if (Len < MAX_IDENTLEN) {
228 } while (IsIdent (CurC) || IsDigit (CurC));
235 /* If a symbol follows, read it and return 1, otherwise return 0 */
237 if (IsIdent (CurC)) {
247 static void UnknownChar (char C)
248 /* Error message for unknown character */
250 Error ("Invalid input character with code %02X", C & 0xFF);
251 NextChar (); /* Skip */
256 static void SetTok (int tok)
257 /* Set NextTok.Tok and bump line ptr */
265 static int ParseChar (void)
266 /* Parse a character. Converts escape chars into character codes. */
271 /* Check for escape chars */
310 /* Hex character constant */
311 if (!IsXDigit (NextC)) {
312 Error ("\\x used with no following hex digits");
317 while (IsXDigit (NextC)) {
318 if ((C << 4) >= 256) {
320 Error ("Hex character constant out of range");
324 C = (C << 4) | HexVal (NextC);
341 while (IsODigit (NextC)) {
342 if ((C << 3) >= 256) {
344 Error ("Octal character constant out of range");
348 C = (C << 3) | HexVal (NextC);
354 Error ("Illegal character constant");
356 /* Try to do error recovery, otherwise the compiler will spit
357 * out thousands of errors in this place and abort.
359 if (CurC != '\'' && CurC != '\0') {
360 while (NextC != '\'' && NextC != '\"' && NextC != '\0') {
370 /* Skip the character read */
373 /* Do correct sign extension */
374 return SignExtendChar (C);
379 static void CharConst (void)
380 /* Parse a character constant. */
390 /* Check for closing quote */
392 Error ("`\'' expected");
398 /* Setup values and attributes */
399 NextTok.Tok = TOK_CCONST;
401 /* Translate into target charset */
402 NextTok.IVal = SignExtendChar (TgtTranslateChar (C));
404 /* Character constants have type int */
405 NextTok.Type = type_int;
410 static void StringConst (void)
411 /* Parse a quoted string */
414 StrBuf S = AUTO_STRBUF_INITIALIZER;
416 /* Assume next token is a string constant */
417 NextTok.Tok = TOK_SCONST;
419 /* Concatenate strings. If at least one of the concenated strings is a wide
420 * character literal, the whole string is a wide char literal, otherwise
421 * it's a normal string literal.
425 /* Check if this is a normal or a wide char string */
426 if (CurC == 'L' && NextC == '\"') {
427 /* Wide character literal */
428 NextTok.Tok = TOK_WCSCONST;
431 } else if (CurC == '\"') {
432 /* Skip the quote char */
439 /* Read until end of string */
440 while (CurC != '\"') {
442 Error ("Unexpected newline");
445 SB_AppendChar (&S, ParseChar ());
448 /* Skip closing quote char if there was one */
451 /* Skip white space, read new input */
456 /* Terminate the string */
457 SB_AppendChar (&S, '\0');
459 /* Add the whole string to the literal pool */
460 NextTok.SVal = AddLiteralStr (&S);
462 /* Free the buffer */
468 static void NumericConst (void)
469 /* Parse a numeric constant */
471 unsigned Base; /* Temporary number base */
472 unsigned Prefix; /* Base according to prefix */
473 StrBuf S = STATIC_STRBUF_INITIALIZER;
477 unsigned long IVal; /* Value */
479 /* Check for a leading hex or octal prefix and determine the possible
483 /* Gobble 0 and examine next char */
485 if (toupper (CurC) == 'X') {
487 NextChar (); /* gobble "x" */
489 Base = 10; /* Assume 10 for now - see below */
490 Prefix = 8; /* Actual prefix says octal */
496 /* Because floating point numbers don't have octal prefixes (a number
497 * with a leading zero is decimal), we first have to read the number
498 * before converting it, so we can determine if it's a float or an
501 while (IsXDigit (CurC) && HexVal (CurC) < Base) {
502 SB_AppendChar (&S, CurC);
507 /* The following character tells us if we have an integer or floating
508 * point constant. Note: Hexadecimal floating point constants aren't
511 IsFloat = (CurC == '.' ||
512 (Base == 10 && toupper (CurC) == 'E') ||
513 (Base == 16 && toupper (CurC) == 'P' && IS_Get (&Standard) >= STD_C99));
515 /* If we don't have a floating point type, an octal prefix results in an
518 if (!IsFloat && Prefix == 8) {
522 /* Since we do now know the correct base, convert the remembered input
527 while ((C = SB_Get (&S)) != '\0') {
528 DigitVal = HexVal (C);
529 if (DigitVal >= Base) {
530 Error ("Numeric constant contains digits beyond the radix");
532 IVal = (IVal * Base) + DigitVal;
535 /* We don't need the string buffer any longer */
538 /* Distinguish between integer and floating point constants */
544 /* Check for a suffix and determine the possible types */
546 if (toupper (CurC) == 'U') {
549 if (toupper (CurC) != 'L') {
550 Types = IT_UINT | IT_ULONG;
555 } else if (toupper (CurC) == 'L') {
558 if (toupper (CurC) != 'U') {
559 Types = IT_LONG | IT_ULONG;
567 /* Decimal constants are of any type but uint */
568 Types = IT_INT | IT_LONG | IT_ULONG;
570 /* Octal or hex constants are of any type */
571 Types = IT_INT | IT_UINT | IT_LONG | IT_ULONG;
575 /* Check the range to determine the type */
577 /* Out of range for int */
579 /* If the value is in the range 0x8000..0xFFFF, unsigned int is not
580 * allowed, and we don't have a type specifying suffix, emit a
581 * warning, because the constant is of type long.
583 if (IVal <= 0xFFFF && (Types & IT_UINT) == 0 && !HaveSuffix) {
584 Warning ("Constant is long");
588 /* Out of range for unsigned int */
591 if (IVal > 0x7FFFFFFF) {
592 /* Out of range for long int */
596 /* Now set the type string to the smallest type in types */
597 if (Types & IT_INT) {
598 NextTok.Type = type_int;
599 } else if (Types & IT_UINT) {
600 NextTok.Type = type_uint;
601 } else if (Types & IT_LONG) {
602 NextTok.Type = type_long;
604 NextTok.Type = type_ulong;
607 /* Set the value and the token */
609 NextTok.Tok = TOK_ICONST;
614 Double FVal = FP_D_FromInt (IVal); /* Convert to double */
616 /* Check for a fractional part and read it */
624 /* Read fractional digits */
625 Scale = FP_D_Make (1.0);
626 while (IsXDigit (CurC) && (DigitVal = HexVal (CurC)) < Base) {
627 /* Get the value of this digit */
628 Double FracVal = FP_D_Div (FP_D_FromInt (DigitVal * Base), Scale);
629 /* Add it to the float value */
630 FVal = FP_D_Add (FVal, FracVal);
632 Scale = FP_D_Mul (Scale, FP_D_FromInt (DigitVal));
638 /* Check for an exponent and read it */
639 if ((Base == 16 && toupper (CurC) == 'F') ||
640 (Base == 10 && toupper (CurC) == 'E')) {
646 /* Skip the exponent notifier */
649 /* Read an optional sign */
654 } else if (CurC == '+') {
658 /* Read exponent digits. Since we support only 32 bit floats
659 * with a maximum exponent of +-/127, we read the exponent
660 * part as integer with up to 3 digits and drop the remainder.
661 * This avoids an overflow of Exp. The exponent is always
662 * decimal, even for hex float consts.
666 while (IsDigit (CurC)) {
668 Exp = Exp * 10 + HexVal (CurC);
673 /* Check for errors: We must have exponent digits, and not more
677 Error ("Floating constant exponent has no digits");
678 } else if (Digits > 3) {
679 Warning ("Floating constant exponent is too large");
682 /* Scale the exponent and adjust the value accordingly */
684 FVal = FP_D_Mul (FVal, FP_D_Make (pow (10, Exp)));
688 /* Check for a suffix and determine the type of the constant */
689 if (toupper (CurC) == 'F') {
691 NextTok.Type = type_float;
693 NextTok.Type = type_double;
696 /* Set the value and the token */
698 NextTok.Tok = TOK_FCONST;
705 void NextToken (void)
706 /* Get next token from input stream */
710 /* We have to skip white space here before shifting tokens, since the
711 * tokens and the current line info is invalid at startup and will get
712 * initialized by reading the first time from the file. Remember if
713 * we were at end of input and handle that later.
715 int GotEOF = (SkipWhite() == 0);
717 /* Current token is the lookahead token */
719 ReleaseLineInfo (CurTok.LI);
723 /* When reading the first time from the file, the line info in NextTok,
724 * which was copied to CurTok is invalid. Since the information from
725 * the token is used for error messages, we must make it valid.
727 if (CurTok.LI == 0) {
728 CurTok.LI = UseLineInfo (GetCurLineInfo ());
731 /* Remember the starting position of the next token */
732 NextTok.LI = UseLineInfo (GetCurLineInfo ());
734 /* Now handle end of input. */
736 /* End of file reached */
737 NextTok.Tok = TOK_CEOF;
741 /* Determine the next token from the lookahead */
742 if (IsDigit (CurC) || (CurC == '.' && IsDigit (NextC))) {
748 /* Check for wide character literals */
749 if (CurC == 'L' && NextC == '\"') {
754 /* Check for keywords and identifiers */
757 /* Check for a keyword */
758 if ((NextTok.Tok = FindKey (token)) != TOK_IDENT) {
759 /* Reserved word found */
762 /* No reserved word, check for special symbols */
763 if (token[0] == '_' && token[1] == '_') {
764 /* Special symbols */
765 if (strcmp (token+2, "FILE__") == 0) {
766 NextTok.SVal = AddLiteral (GetCurrentFile());
767 NextTok.Tok = TOK_SCONST;
769 } else if (strcmp (token+2, "LINE__") == 0) {
770 NextTok.Tok = TOK_ICONST;
771 NextTok.IVal = GetCurrentLine();
772 NextTok.Type = type_int;
774 } else if (strcmp (token+2, "func__") == 0) {
775 /* __func__ is only defined in functions */
777 NextTok.SVal = AddLiteral (F_GetFuncName (CurrentFunc));
778 NextTok.Tok = TOK_SCONST;
784 /* No reserved word but identifier */
785 strcpy (NextTok.Ident, token);
786 NextTok.Tok = TOK_IDENT;
790 /* Monstrous switch statement ahead... */
798 NextTok.Tok = TOK_BOOL_NOT;
809 SetTok (TOK_MOD_ASSIGN);
811 NextTok.Tok = TOK_MOD;
819 SetTok (TOK_BOOL_AND);
822 SetTok (TOK_AND_ASSIGN);
825 NextTok.Tok = TOK_AND;
844 SetTok (TOK_MUL_ASSIGN);
846 NextTok.Tok = TOK_STAR;
857 SetTok (TOK_PLUS_ASSIGN);
860 NextTok.Tok = TOK_PLUS;
875 SetTok (TOK_MINUS_ASSIGN);
878 SetTok (TOK_PTR_REF);
881 NextTok.Tok = TOK_MINUS;
890 SetTok (TOK_ELLIPSIS);
895 NextTok.Tok = TOK_DOT;
902 SetTok (TOK_DIV_ASSIGN);
904 NextTok.Tok = TOK_DIV;
925 SetTok (TOK_SHL_ASSIGN);
927 NextTok.Tok = TOK_SHL;
931 NextTok.Tok = TOK_LT;
940 NextTok.Tok = TOK_ASSIGN;
953 SetTok (TOK_SHR_ASSIGN);
955 NextTok.Tok = TOK_SHR;
959 NextTok.Tok = TOK_GT;
978 SetTok (TOK_XOR_ASSIGN);
980 NextTok.Tok = TOK_XOR;
992 SetTok (TOK_BOOL_OR);
995 SetTok (TOK_OR_ASSIGN);
998 NextTok.Tok = TOK_OR;
1003 SetTok (TOK_RCURLY);
1019 void SkipTokens (const token_t* TokenList, unsigned TokenCount)
1020 /* Skip tokens until we reach TOK_CEOF or a token in the given token list.
1021 * This routine is used for error recovery.
1024 while (CurTok.Tok != TOK_CEOF) {
1026 /* Check if the current token is in the token list */
1028 for (I = 0; I < TokenCount; ++I) {
1029 if (CurTok.Tok == TokenList[I]) {
1030 /* Found a token in the list */
1035 /* Not in the list: Skip it */
1043 int Consume (token_t Token, const char* ErrorMsg)
1044 /* Eat token if it is the next in the input stream, otherwise print an error
1045 * message. Returns true if the token was found and false otherwise.
1048 if (CurTok.Tok == Token) {
1052 Error ("%s", ErrorMsg);
1059 int ConsumeColon (void)
1060 /* Check for a colon and skip it. */
1062 return Consume (TOK_COLON, "`:' expected");
1067 int ConsumeSemi (void)
1068 /* Check for a semicolon and skip it. */
1070 /* Try do be smart about typos... */
1071 if (CurTok.Tok == TOK_SEMI) {
1075 Error ("`;' expected");
1076 if (CurTok.Tok == TOK_COLON || CurTok.Tok == TOK_COMMA) {
1085 int ConsumeComma (void)
1086 /* Check for a comma and skip it. */
1088 /* Try do be smart about typos... */
1089 if (CurTok.Tok == TOK_COMMA) {
1093 Error ("`,' expected");
1094 if (CurTok.Tok == TOK_SEMI) {
1103 int ConsumeLParen (void)
1104 /* Check for a left parenthesis and skip it */
1106 return Consume (TOK_LPAREN, "`(' expected");
1111 int ConsumeRParen (void)
1112 /* Check for a right parenthesis and skip it */
1114 return Consume (TOK_RPAREN, "`)' expected");
1119 int ConsumeLBrack (void)
1120 /* Check for a left bracket and skip it */
1122 return Consume (TOK_LBRACK, "`[' expected");
1127 int ConsumeRBrack (void)
1128 /* Check for a right bracket and skip it */
1130 return Consume (TOK_RBRACK, "`]' expected");
1135 int ConsumeLCurly (void)
1136 /* Check for a left curly brace and skip it */
1138 return Consume (TOK_LCURLY, "`{' expected");
1143 int ConsumeRCurly (void)
1144 /* Check for a right curly brace and skip it */
1146 return Consume (TOK_RCURLY, "`}' expected");