1 /*****************************************************************************/
5 /* The scanner for the ca65 macroassembler */
9 /* (C) 1998-2003 Ullrich von Bassewitz */
10 /* Römerstrasse 52 */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
41 #include <sys/types.h> /* EMX needs this */
65 /*****************************************************************************/
67 /*****************************************************************************/
71 enum Token Tok = TOK_NONE; /* Current token */
72 int WS; /* Flag: Whitespace before token */
73 long IVal; /* Integer token attribute */
74 char SVal [MAX_STR_LEN+1]; /* String token attribute */
76 FilePos CurPos = { 0, 0, 0 }; /* Name and position in current file */
80 /* Struct to handle include files. Note: The length of the input line may
81 * not exceed 255+1, since the column is stored in the file position struct
82 * as a character. Increasing this value means changing the FilePos struct,
83 * and the read and write routines in the assembler and linker.
85 typedef struct InputFile_ InputFile;
87 FILE* F; /* Input file descriptor */
88 FilePos Pos; /* Position in file */
89 enum Token Tok; /* Last token */
90 int C; /* Last character */
91 char Line[256]; /* The current input line */
92 InputFile* Next; /* Linked list of input files */
95 /* Struct to handle textual input data */
96 typedef struct InputData_ InputData;
98 char* Data; /* Pointer to the data */
99 const char* Pos; /* Pointer to current position */
100 int Malloced; /* Memory was malloced */
101 enum Token Tok; /* Last token */
102 int C; /* Last character */
103 InputData* Next; /* Linked list of input data */
106 /* Current input variables */
107 static InputFile* IFile = 0; /* Current input file */
108 static InputData* IData = 0; /* Current input memory data */
109 static unsigned ICount = 0; /* Count of input files */
110 static int C = 0; /* Current input character */
112 /* Force end of assembly */
115 /* List of dot keywords with the corresponding tokens */
117 const char* Key; /* MUST be first field */
122 { ".ADDR", TOK_ADDR },
123 { ".ALIGN", TOK_ALIGN },
124 { ".AND", TOK_BAND },
125 { ".ASCIIZ", TOK_ASCIIZ },
126 { ".ASSERT", TOK_ASSERT },
127 { ".AUTOIMPORT", TOK_AUTOIMPORT },
128 { ".BITAND", TOK_AND },
129 { ".BITNOT", TOK_NOT },
130 { ".BITOR", TOK_OR },
131 { ".BITXOR", TOK_XOR },
132 { ".BLANK", TOK_BLANK },
134 { ".BYT", TOK_BYTE },
135 { ".BYTE", TOK_BYTE },
136 { ".CASE", TOK_CASE },
137 { ".CHARMAP", TOK_CHARMAP },
138 { ".CODE", TOK_CODE },
139 { ".CONCAT", TOK_CONCAT },
140 { ".CONDES", TOK_CONDES },
141 { ".CONST", TOK_CONST },
142 { ".CONSTRUCTOR", TOK_CONSTRUCTOR },
144 { ".DATA", TOK_DATA },
146 { ".DBYT", TOK_DBYT },
147 { ".DEBUGINFO", TOK_DEBUGINFO },
148 { ".DEF", TOK_DEFINED },
149 { ".DEFINE", TOK_DEFINE },
150 { ".DEFINED", TOK_DEFINED },
151 { ".DESTRUCTOR", TOK_DESTRUCTOR },
152 { ".DWORD", TOK_DWORD },
153 { ".ELSE", TOK_ELSE },
154 { ".ELSEIF", TOK_ELSEIF },
156 { ".ENDIF", TOK_ENDIF },
157 { ".ENDMAC", TOK_ENDMACRO },
158 { ".ENDMACRO", TOK_ENDMACRO },
159 { ".ENDPROC", TOK_ENDPROC },
160 { ".ENDREP", TOK_ENDREP },
161 { ".ENDREPEAT", TOK_ENDREP },
162 { ".ERROR", TOK_ERROR },
163 { ".EXITMAC", TOK_EXITMACRO },
164 { ".EXITMACRO", TOK_EXITMACRO },
165 { ".EXPORT", TOK_EXPORT },
166 { ".EXPORTZP", TOK_EXPORTZP },
167 { ".FARADDR", TOK_FARADDR },
168 { ".FEATURE", TOK_FEATURE },
169 { ".FILEOPT", TOK_FILEOPT },
170 { ".FOPT", TOK_FILEOPT },
171 { ".FORCEIMPORT", TOK_FORCEIMPORT },
172 { ".FORCEWORD", TOK_FORCEWORD },
173 { ".GLOBAL", TOK_GLOBAL },
174 { ".GLOBALZP", TOK_GLOBALZP },
178 { ".IFBLANK", TOK_IFBLANK },
179 { ".IFCONST", TOK_IFCONST },
180 { ".IFDEF", TOK_IFDEF },
181 { ".IFNBLANK", TOK_IFNBLANK },
182 { ".IFNCONST", TOK_IFNCONST },
183 { ".IFNDEF", TOK_IFNDEF },
184 { ".IFNREF", TOK_IFNREF },
185 { ".IFP02", TOK_IFP02 },
186 { ".IFP816", TOK_IFP816 },
187 { ".IFPC02", TOK_IFPC02 },
188 { ".IFPSC02", TOK_IFPSC02 },
189 { ".IFREF", TOK_IFREF },
190 { ".IMPORT", TOK_IMPORT },
191 { ".IMPORTZP", TOK_IMPORTZP },
192 { ".INCBIN", TOK_INCBIN },
193 { ".INCLUDE", TOK_INCLUDE },
194 { ".LEFT", TOK_LEFT },
195 { ".LINECONT", TOK_LINECONT },
196 { ".LIST", TOK_LIST },
197 { ".LISTBYTES", TOK_LISTBYTES },
198 { ".LOCAL", TOK_LOCAL },
199 { ".LOCALCHAR", TOK_LOCALCHAR },
200 { ".MAC", TOK_MACRO },
201 { ".MACPACK", TOK_MACPACK },
202 { ".MACRO", TOK_MACRO },
203 { ".MATCH", TOK_MATCH },
206 { ".NOT", TOK_BNOT },
207 { ".NULL", TOK_NULL },
212 { ".P816", TOK_P816 },
213 { ".PAGELEN", TOK_PAGELENGTH },
214 { ".PAGELENGTH", TOK_PAGELENGTH },
215 { ".PARAMCOUNT", TOK_PARAMCOUNT },
216 { ".PC02", TOK_PC02 },
217 { ".POPSEG", TOK_POPSEG },
218 { ".PROC", TOK_PROC },
219 { ".PSC02", TOK_PSC02 },
220 { ".PUSHSEG", TOK_PUSHSEG },
221 { ".REF", TOK_REFERENCED },
222 { ".REFERENCED", TOK_REFERENCED },
223 { ".RELOC", TOK_RELOC },
224 { ".REPEAT", TOK_REPEAT },
226 { ".RIGHT", TOK_RIGHT },
227 { ".RODATA", TOK_RODATA },
228 { ".SEGMENT", TOK_SEGMENT },
229 { ".SETCPU", TOK_SETCPU },
232 { ".SMART", TOK_SMART },
233 { ".STRAT", TOK_STRAT },
234 { ".STRING", TOK_STRING },
235 { ".STRLEN", TOK_STRLEN },
236 { ".SUNPLUS", TOK_SUNPLUS },
237 { ".TCOUNT", TOK_TCOUNT },
238 { ".TIME", TOK_TIME },
239 { ".VERSION", TOK_VERSION },
240 { ".WARNING", TOK_WARNING },
241 { ".WORD", TOK_WORD },
242 { ".XMATCH", TOK_XMATCH },
243 { ".XOR", TOK_BXOR },
244 { ".ZEROPAGE", TOK_ZEROPAGE },
249 /*****************************************************************************/
251 /*****************************************************************************/
255 static void NextChar (void);
256 /* Read the next character from the input file */
260 /*****************************************************************************/
261 /* Character classification functions */
262 /*****************************************************************************/
266 static int IsIdChar (int C)
267 /* Return true if the character is a valid character for an identifier */
269 return IsAlNum (C) ||
271 (C == '@' && AtInIdents) ||
272 (C == '$' && DollarInIdents);
277 static int IsIdStart (int C)
278 /* Return true if the character may start an identifier */
280 return IsAlpha (C) || C == '_';
285 /*****************************************************************************/
287 /*****************************************************************************/
291 void NewInputFile (const char* Name)
292 /* Open a new input file */
297 /* First try to open the file */
298 F = fopen (Name, "r");
303 /* Error (fatal error if this is the main file) */
305 Fatal (FAT_CANNOT_OPEN_INPUT, Name, strerror (errno));
308 /* We are on include level. Search for the file in the include
311 PathName = FindInclude (Name);
312 if (PathName == 0 || (F = fopen (PathName, "r")) == 0) {
313 /* Not found or cannot open, print an error and bail out */
314 Error (ERR_CANNOT_OPEN_INCLUDE, Name, strerror (errno));
317 /* Free the allocated memory */
322 /* check again if we do now have an open file */
327 /* Stat the file and remember the values */
329 if (fstat (fileno (F), &Buf) != 0) {
330 Fatal (FAT_CANNOT_STAT_INPUT, Name, strerror (errno));
333 /* Add the file to the input file table and remember the index */
334 FileIdx = AddFile (Name, Buf.st_size, Buf.st_mtime);
336 /* Create a new state variable and initialize it */
337 I = xmalloc (sizeof (*I));
341 I->Pos.Name = FileIdx;
346 /* Use the new file */
358 void DoneInputFile (void)
359 /* Close the current input file */
363 /* Restore the old token */
367 /* Save a pointer to the current struct, then set it back */
371 /* Cleanup the current stuff */
379 void NewInputData (char* Data, int Malloced)
380 /* Add a chunk of input data to the input stream */
384 /* Create a new state variable and initialize it */
385 I = xmalloc (sizeof (*I));
388 I->Malloced = Malloced;
392 /* Use the new data */
402 static void DoneInputData (void)
403 /* End the current input data stream */
407 /* Restore the old token */
411 /* Save a pointer to the current struct, then set it back */
415 /* Cleanup the current stuff */
424 static unsigned DigitVal (unsigned char C)
425 /* Convert a digit into it's numerical representation */
430 return toupper (C) - 'A' + 10;
436 static void NextChar (void)
437 /* Read the next character from the input file */
439 /* If we have an input data structure, read from there */
444 /* End of input data, will set to last file char */
450 /* Check for end of line, read the next line if needed */
451 while (IFile->Line [IFile->Pos.Col] == '\0') {
453 /* End of current line reached, read next line */
454 if (fgets (IFile->Line, sizeof (IFile->Line), IFile->F) == 0) {
455 /* End of file. Add an empty line to the listing. This is a
456 * small hack needed to keep the PC output in sync.
458 NewListingLine ("", IFile->Pos.Name, ICount);
467 /* Remember the new line for the listing */
468 NewListingLine (IFile->Line, IFile->Pos.Name, ICount);
472 /* Return the next character from the file */
473 C = IFile->Line [IFile->Pos.Col++];
480 void LocaseSVal (void)
481 /* Make SVal lower case */
485 SVal [I] = tolower (SVal [I]);
492 void UpcaseSVal (void)
493 /* Make SVal upper case */
497 SVal [I] = toupper (SVal [I]);
504 static int CmpDotKeyword (const void* K1, const void* K2)
505 /* Compare function for the dot keyword search */
507 return strcmp (((struct DotKeyword*)K1)->Key, ((struct DotKeyword*)K2)->Key);
512 static unsigned char FindDotKeyword (void)
513 /* Find the dot keyword in SVal. Return the corresponding token if found,
514 * return TOK_NONE if not found.
517 static const struct DotKeyword K = { SVal, 0 };
518 struct DotKeyword* R;
520 /* If we aren't in ignore case mode, we have to uppercase the keyword */
525 /* Search for the keyword */
526 R = bsearch (&K, DotKeywords, sizeof (DotKeywords) / sizeof (DotKeywords [0]),
527 sizeof (DotKeywords [0]), CmpDotKeyword);
537 static void ReadIdent (unsigned Index)
538 /* Read an identifier from the current input position into Ident. Filling SVal
539 * starts at Index with the current character in C. It is assumed that any
540 * characters already filled in are ok, and the character in C is checked.
543 /* Read the identifier */
545 if (Index < MAX_STR_LEN) {
549 } while (IsIdChar (C));
552 /* If we should ignore case, convert the identifier to upper case */
560 static unsigned ReadStringConst (int StringTerm)
561 /* Read a string constant into SVal. Check for maximum string length and all
562 * other stuff. The length of the string is returned.
567 /* Skip the leading string terminator */
570 /* Read the string */
573 if (C == StringTerm) {
576 if (C == '\n' || C == EOF) {
577 Error (ERR_NEWLINE_IN_STRING);
581 /* Check for string length, print an error message once */
582 if (I == MAX_STR_LEN) {
583 Error (ERR_STRING_TOO_LONG);
584 } else if (I < MAX_STR_LEN) {
589 /* Skip the character */
593 /* Skip the trailing terminator */
596 /* Terminate the string */
597 if (I >= MAX_STR_LEN) {
602 /* Return the length of the string */
608 void NextRawTok (void)
609 /* Read the next raw token from the input stream */
611 /* If we've a forced end of assembly, don't read further */
618 /* Check if we have tokens from another input source */
619 if (InputFromStack ()) {
624 /* Skip whitespace, remember if we had some */
625 if ((WS = IsBlank (C)) != 0) {
628 } while (IsBlank (C));
631 /* If we're reading from the file, update the location from where the
632 * next token will be read. If we're reading from input data, keep the
639 /* Hex number or PC symbol? */
643 /* Hex digit must follow or DollarIsPC must be enabled */
649 Error (ERR_HEX_DIGIT_EXPECTED);
653 /* Read the number */
655 while (IsXDigit (C)) {
656 if (IVal & 0xF0000000) {
657 Error (ERR_NUM_OVERFLOW);
660 IVal = (IVal << 4) + DigitVal (C);
664 /* This is an integer constant */
673 /* 0 or 1 must follow */
675 Error (ERR_01_EXPECTED);
678 /* Read the number */
680 while (IsBDigit (C)) {
681 if (IVal & 0x80000000) {
682 Error (ERR_NUM_OVERFLOW);
685 IVal = (IVal << 1) + DigitVal (C);
689 /* This is an integer constant */
694 /* Decimal number? */
697 /* Read the number */
699 while (IsDigit (C)) {
700 if (IVal > (long) (0xFFFFFFFFUL / 10)) {
701 Error (ERR_NUM_OVERFLOW);
704 IVal = (IVal * 10) + DigitVal (C);
708 /* This is an integer constant */
713 /* Control command? */
716 /* Remember and skip the dot */
720 /* Check if it's just a dot */
721 if (!IsIdStart (C)) {
728 /* Read the remainder of the identifier */
731 /* Dot keyword, search for it */
732 Tok = FindDotKeyword ();
733 if (Tok == TOK_NONE) {
735 if (LeadingDotInIdents) {
736 /* An identifier with a dot */
739 /* Invalid pseudo instruction */
740 Error (ERR_PSEUDO_EXPECTED);
750 if (C == LocalStart) {
752 /* Read the identifier */
755 /* Start character alone is not enough */
756 if (SVal [1] == '\0') {
757 Error (ERR_IDENT_EXPECTED);
767 /* Identifier or keyword? */
770 /* Read the identifier */
773 /* Check for special names */
774 if (SVal[1] == '\0') {
775 switch (toupper (SVal [0])) {
780 Tok = TOK_OVERRIDE_ABS;
789 Tok = TOK_OVERRIDE_FAR;
810 Tok = TOK_OVERRIDE_ZP;
822 /* Search for an opcode */
823 IVal = FindInstruction (SVal);
825 /* This is a mnemonic */
827 } else if (IsDefine (SVal)) {
828 /* This is a define style macro - expand it */
838 /* Ok, let's do the switch */
927 while (C != '\n' && C != EOF) {
962 } else if (C == '<') {
965 } else if (C == '>') {
988 } else if (C == '>') {
1002 /* Hack: If we allow ' as terminating character for strings, read
1003 * the following stuff as a string, and check for a one character
1006 if (LooseStringTerm) {
1007 if (ReadStringConst ('\'') == 1) {
1014 /* Always a character constant */
1016 if (C == '\n' || C == EOF) {
1017 Error (ERR_ILLEGAL_CHARCON);
1024 Error (ERR_ILLEGAL_CHARCON);
1032 ReadStringConst ('\"');
1037 /* Line continuation? */
1041 /* Handle as white space */
1055 /* Check if we have any open .IFs in this file */
1057 /* Check if we have any open token lists in this file */
1060 /* If this was an include file, then close it and handle like a
1061 * separator. Do not close the main file, but return EOF.
1072 /* If we go here, we could not identify the current character. Skip it
1075 Error (ERR_INVALID_CHAR, C & 0xFF);
1082 int TokHasSVal (enum Token Tok)
1083 /* Return true if the given token has an attached SVal */
1085 return (Tok == TOK_IDENT || Tok == TOK_STRCON);
1090 int TokHasIVal (enum Token Tok)
1091 /* Return true if the given token has an attached IVal */
1093 return (Tok == TOK_INTCON || Tok == TOK_CHARCON || Tok == TOK_MNEMO);
1098 int GetSubKey (const char** Keys, unsigned Count)
1099 /* Search for a subkey in a table of keywords. The current token must be an
1100 * identifier and all keys must be in upper case. The identifier will be
1101 * uppercased in the process. The function returns the index of the keyword,
1102 * or -1 if the keyword was not found.
1107 /* Must have an identifier */
1108 PRECONDITION (Tok == TOK_IDENT);
1110 /* If we aren't in ignore case mode, we have to uppercase the identifier */
1115 /* Do a linear search (a binary search is not worth the effort) */
1116 for (I = 0; I < Count; ++I) {
1117 if (strcmp (SVal, Keys [I]) == 0) {
1129 void InitScanner (const char* InFile)
1130 /* Initialize the scanner, open the given input file */
1132 /* Open the input file */
1133 NewInputFile (InFile);
1138 void DoneScanner (void)
1139 /* Release scanner resources */