1 /*****************************************************************************/
5 /* The scanner for the ca65 macroassembler */
9 /* (C) 1998-2000 Ullrich von Bassewitz */
11 /* D-70597 Stuttgart */
12 /* EMail: uz@musoftware.de */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
63 /*****************************************************************************/
65 /*****************************************************************************/
69 enum Token Tok = TOK_NONE; /* Current token */
70 int WS; /* Flag: Whitespace before token */
71 long IVal; /* Integer token attribute */
72 char SVal [MAX_STR_LEN+1]; /* String token attribute */
74 FilePos CurPos = { 0, 0, 0 }; /* Name and position in current file */
78 /* Struct to handle include files. Note: The length of the input line may
79 * not exceed 255+1, since the column is stored in the file position struct
80 * as a character. Increasing this value means changing the FilePos struct,
81 * and the read and write routines in the assembler and linker.
83 typedef struct InputFile_ InputFile;
85 FILE* F; /* Input file descriptor */
86 FilePos Pos; /* Position in file */
87 enum Token Tok; /* Last token */
88 int C; /* Last character */
89 char Line[256]; /* The current input line */
90 InputFile* Next; /* Linked list of input files */
93 /* Struct to handle textual input data */
94 typedef struct InputData_ InputData;
96 const char* Data; /* Pointer to the data */
97 const char* Pos; /* Pointer to current position */
98 int Malloced; /* Memory was malloced */
99 enum Token Tok; /* Last token */
100 int C; /* Last character */
101 InputData* Next; /* Linked list of input data */
104 /* Current input variables */
105 static InputFile* IFile = 0; /* Current input file */
106 static InputData* IData = 0; /* Current input memory data */
107 static unsigned ICount = 0; /* Count of input files */
108 static int C = 0; /* Current input character */
110 /* Force end of assembly */
113 /* List of dot keywords with the corresponding tokens */
115 const char* Key; /* MUST be first field */
120 { "ADDR", TOK_ADDR },
121 { "ALIGN", TOK_ALIGN },
123 { "ASCIIZ", TOK_ASCIIZ },
124 { "AUTOIMPORT", TOK_AUTOIMPORT },
125 { "BITAND", TOK_AND },
126 { "BITNOT", TOK_NOT },
128 { "BITXOR", TOK_XOR },
129 { "BLANK", TOK_BLANK },
132 { "BYTE", TOK_BYTE },
133 { "CASE", TOK_CASE },
134 { "CODE", TOK_CODE },
135 { "CONCAT", TOK_CONCAT },
136 { "CONDES", TOK_CONDES },
137 { "CONST", TOK_CONST },
138 { "CONSTRUCTOR", TOK_CONSTRUCTOR },
140 { "DATA", TOK_DATA },
142 { "DBYT", TOK_DBYT },
143 { "DEBUGINFO", TOK_DEBUGINFO },
144 { "DEF", TOK_DEFINED },
145 { "DEFINE", TOK_DEFINE },
146 { "DEFINED", TOK_DEFINED },
147 { "DESTRUCTOR", TOK_DESTRUCTOR },
148 { "DWORD", TOK_DWORD },
149 { "ELSE", TOK_ELSE },
150 { "ELSEIF", TOK_ELSEIF },
152 { "ENDIF", TOK_ENDIF },
153 { "ENDMAC", TOK_ENDMACRO },
154 { "ENDMACRO", TOK_ENDMACRO },
155 { "ENDPROC", TOK_ENDPROC },
156 { "ENDREP", TOK_ENDREP },
157 { "ENDREPEAT", TOK_ENDREP },
158 { "ERROR", TOK_ERROR },
159 { "EXITMAC", TOK_EXITMACRO },
160 { "EXITMACRO", TOK_EXITMACRO },
161 { "EXPORT", TOK_EXPORT },
162 { "EXPORTZP", TOK_EXPORTZP },
163 { "FARADDR", TOK_FARADDR },
164 { "FEATURE", TOK_FEATURE },
165 { "FILEOPT", TOK_FILEOPT },
166 { "FOPT", TOK_FILEOPT },
167 { "FORCEWORD", TOK_FORCEWORD },
168 { "GLOBAL", TOK_GLOBAL },
169 { "GLOBALZP", TOK_GLOBALZP },
173 { "IFBLANK", TOK_IFBLANK },
174 { "IFCONST", TOK_IFCONST },
175 { "IFDEF", TOK_IFDEF },
176 { "IFNBLANK", TOK_IFNBLANK },
177 { "IFNCONST", TOK_IFNCONST },
178 { "IFNDEF", TOK_IFNDEF },
179 { "IFNREF", TOK_IFNREF },
180 { "IFP02", TOK_IFP02 },
181 { "IFP816", TOK_IFP816 },
182 { "IFPC02", TOK_IFPC02 },
183 { "IFREF", TOK_IFREF },
184 { "IMPORT", TOK_IMPORT },
185 { "IMPORTZP", TOK_IMPORTZP },
186 { "INCBIN", TOK_INCBIN },
187 { "INCLUDE", TOK_INCLUDE },
188 { "LEFT", TOK_LEFT },
189 { "LINECONT", TOK_LINECONT },
190 { "LIST", TOK_LIST },
191 { "LISTBYTES", TOK_LISTBYTES },
192 { "LOCAL", TOK_LOCAL },
193 { "LOCALCHAR", TOK_LOCALCHAR },
194 { "MAC", TOK_MACRO },
195 { "MACPACK", TOK_MACPACK },
196 { "MACRO", TOK_MACRO },
197 { "MATCH", TOK_MATCH },
201 { "NULL", TOK_NULL },
206 { "P816", TOK_P816 },
207 { "PAGELEN", TOK_PAGELENGTH },
208 { "PAGELENGTH", TOK_PAGELENGTH },
209 { "PARAMCOUNT", TOK_PARAMCOUNT },
210 { "PC02", TOK_PC02 },
211 { "PROC", TOK_PROC },
212 { "REF", TOK_REFERENCED },
213 { "REFERENCED", TOK_REFERENCED },
214 { "RELOC", TOK_RELOC },
215 { "REPEAT", TOK_REPEAT },
217 { "RIGHT", TOK_RIGHT },
218 { "RODATA", TOK_RODATA },
219 { "SEGMENT", TOK_SEGMENT },
222 { "SMART", TOK_SMART },
223 { "STRAT", TOK_STRAT },
224 { "STRING", TOK_STRING },
225 { "STRLEN", TOK_STRLEN },
226 { "SUNPLUS", TOK_SUNPLUS },
227 { "TCOUNT", TOK_TCOUNT },
228 { "WARNING", TOK_WARNING },
229 { "WORD", TOK_WORD },
230 { "XMATCH", TOK_XMATCH },
232 { "ZEROPAGE", TOK_ZEROPAGE },
237 /*****************************************************************************/
239 /*****************************************************************************/
243 static void NextChar (void);
244 /* Read the next character from the input file */
248 /*****************************************************************************/
249 /* Character classification functions */
250 /*****************************************************************************/
254 static int IsBlank (int C)
255 /* Return true if the character is a blank or tab */
257 return (C == ' ' || C == '\t');
262 static int IsDigit (int C)
263 /* Return true if the character is a digit */
270 static int IsXDigit (int C)
271 /* Return true if the character is a hexadecimal digit */
278 static int IsDDigit (int C)
279 /* Return true if the character is a dual digit */
281 return (C == '0' || C == '1');
286 static int IsIdChar (int C)
287 /* Return true if the character is a valid character for an identifier */
289 return isalnum (C) ||
291 (C == '@' && AtInIdents) ||
292 (C == '$' && DollarInIdents);
297 static int IsIdStart (int C)
298 /* Return true if the character may start an identifier */
300 return isalpha (C) || C == '_';
305 /*****************************************************************************/
307 /*****************************************************************************/
311 void NewInputFile (const char* Name)
312 /* Open a new input file */
317 /* First try to open the file */
318 F = fopen (Name, "r");
323 /* Error (fatal error if this is the main file) */
325 Fatal (FAT_CANNOT_OPEN_INPUT, Name, strerror (errno));
328 /* We are on include level. Search for the file in the include
331 PathName = FindInclude (Name);
332 if (PathName == 0 || (F = fopen (PathName, "r")) == 0) {
333 /* Not found or cannot open, print an error and bail out */
334 Error (ERR_CANNOT_OPEN_INCLUDE, Name, strerror (errno));
337 /* Free the allocated memory */
342 /* check again if we do now have an open file */
347 /* Stat the file and remember the values */
349 if (fstat (fileno (F), &Buf) != 0) {
350 Fatal (FAT_CANNOT_STAT_INPUT, Name, strerror (errno));
353 /* Add the file to the input file table and remember the index */
354 FileIdx = AddFile (Name, Buf.st_size, Buf.st_mtime);
356 /* Create a new state variable and initialize it */
357 I = xmalloc (sizeof (*I));
361 I->Pos.Name = FileIdx;
366 /* Use the new file */
378 void DoneInputFile (void)
379 /* Close the current input file */
383 /* Restore the old token */
387 /* Save a pointer to the current struct, then set it back */
391 /* Cleanup the current stuff */
399 void NewInputData (const char* Data, int Malloced)
400 /* Add a chunk of input data to the input stream */
404 /* Create a new state variable and initialize it */
405 I = xmalloc (sizeof (*I));
408 I->Malloced = Malloced;
412 /* Use the new data */
422 static void DoneInputData (void)
423 /* End the current input data stream */
427 /* Restore the old token */
431 /* Save a pointer to the current struct, then set it back */
435 /* Cleanup the current stuff */
444 static unsigned DigitVal (unsigned char C)
445 /* Convert a digit into it's numerical representation */
450 return toupper (C) - 'A' + 10;
456 static void NextChar (void)
457 /* Read the next character from the input file */
459 /* If we have an input data structure, read from there */
464 /* End of input data, will set to last file char */
470 /* Check for end of line, read the next line if needed */
471 while (IFile->Line [IFile->Pos.Col] == '\0') {
473 /* End of current line reached, read next line */
474 if (fgets (IFile->Line, sizeof (IFile->Line), IFile->F) == 0) {
475 /* End of file. Add an empty line to the listing. This is a
476 * small hack needed to keep the PC output in sync.
478 NewListingLine ("", IFile->Pos.Name, ICount);
487 /* Remember the new line for the listing */
488 NewListingLine (IFile->Line, IFile->Pos.Name, ICount);
492 /* Return the next character from the file */
493 C = IFile->Line [IFile->Pos.Col++];
500 void LocaseSVal (void)
501 /* Make SVal lower case */
505 SVal [I] = tolower (SVal [I]);
512 void UpcaseSVal (void)
513 /* Make SVal upper case */
517 SVal [I] = toupper (SVal [I]);
524 static int CmpDotKeyword (const void* K1, const void* K2)
525 /* Compare function for the dot keyword search */
527 return strcmp (((struct DotKeyword*)K1)->Key, ((struct DotKeyword*)K2)->Key);
532 static unsigned char FindDotKeyword (void)
533 /* Find the dot keyword in SVal. Return the corresponding token if found,
534 * return TOK_NONE if not found.
537 static const struct DotKeyword K = { SVal, 0 };
538 struct DotKeyword* R;
540 /* If we aren't in ignore case mode, we have to uppercase the keyword */
545 /* Search for the keyword */
546 R = bsearch (&K, DotKeywords, sizeof (DotKeywords) / sizeof (DotKeywords [0]),
547 sizeof (DotKeywords [0]), CmpDotKeyword);
557 static void ReadIdent (void)
558 /* Read an identifier from the current input position into Ident. It is
559 * assumed that the first character has already been checked.
562 /* Read the identifier */
565 if (I < MAX_STR_LEN) {
569 } while (IsIdChar (C));
572 /* If we should ignore case, convert the identifier to upper case */
580 static unsigned ReadStringConst (int StringTerm)
581 /* Read a string constant into SVal. Check for maximum string length and all
582 * other stuff. The length of the string is returned.
587 /* Skip the leading string terminator */
590 /* Read the string */
593 if (C == StringTerm) {
596 if (C == '\n' || C == EOF) {
597 Error (ERR_NEWLINE_IN_STRING);
601 /* Check for string length, print an error message once */
602 if (I == MAX_STR_LEN) {
603 Error (ERR_STRING_TOO_LONG);
604 } else if (I < MAX_STR_LEN) {
609 /* Skip the character */
613 /* Skip the trailing terminator */
616 /* Terminate the string */
617 if (I >= MAX_STR_LEN) {
622 /* Return the length of the string */
628 void NextRawTok (void)
629 /* Read the next raw token from the input stream */
631 /* If we've a forced end of assembly, don't read further */
638 /* Check if we have tokens from another input source */
639 if (InputFromStack ()) {
644 /* Skip whitespace, remember if we had some */
645 if ((WS = IsBlank (C)) != 0) {
648 } while (IsBlank (C));
651 /* If we're reading from the file, update the location from where the
652 * next token will be read. If we're reading from input data, keep the
659 /* Hex number or PC symbol? */
663 /* Hex digit must follow or DollarIsPC must be enabled */
669 Error (ERR_HEX_DIGIT_EXPECTED);
673 /* Read the number */
675 while (IsXDigit (C)) {
676 if (IVal & 0xF0000000) {
677 Error (ERR_NUM_OVERFLOW);
680 IVal = (IVal << 4) + DigitVal (C);
684 /* This is an integer constant */
693 /* 0 or 1 must follow */
695 Error (ERR_01_EXPECTED);
698 /* Read the number */
700 while (IsDDigit (C)) {
701 if (IVal & 0x80000000) {
702 Error (ERR_NUM_OVERFLOW);
705 IVal = (IVal << 1) + DigitVal (C);
709 /* This is an integer constant */
714 /* Decimal number? */
717 /* Read the number */
719 while (IsDigit (C)) {
720 if (IVal > (0xFFFFFFFF / 10)) {
721 Error (ERR_NUM_OVERFLOW);
724 IVal = (IVal * 10) + DigitVal (C);
728 /* This is an integer constant */
733 /* Control command? */
738 if (!IsIdStart (C)) {
739 Error (ERR_PSEUDO_EXPECTED);
740 /* Try to read an identifier */
744 /* Read the identifier */
747 /* Search the keyword */
748 Tok = FindDotKeyword ();
749 if (Tok == TOK_NONE) {
751 Error (ERR_PSEUDO_EXPECTED);
758 if (C == LocalStart) {
760 /* Read the identifier */
763 /* Start character alone is not enough */
764 if (SVal [1] == '\0') {
765 Error (ERR_IDENT_EXPECTED);
775 /* Identifier or keyword? */
778 /* Read the identifier */
781 /* Check for special names */
782 if (SVal [1] == '\0') {
783 switch (toupper (SVal [0])) {
807 /* Search for an opcode */
808 IVal = FindInstruction (SVal);
810 /* This is a mnemonic */
812 } else if (IsDefine (SVal)) {
813 /* This is a define style macro - expand it */
823 /* Ok, let's do the switch */
912 while (C != '\n' && C != EOF) {
947 } else if (C == '<') {
950 } else if (C == '>') {
973 } else if (C == '>') {
987 /* Hack: If we allow ' as terminating character for strings, read
988 * the following stuff as a string, and check for a one character
991 if (LooseStringTerm) {
992 if (ReadStringConst ('\'') == 1) {
999 /* Always a character constant */
1001 if (C == '\n' || C == EOF) {
1002 Error (ERR_ILLEGAL_CHARCON);
1009 Error (ERR_ILLEGAL_CHARCON);
1017 ReadStringConst ('\"');
1022 /* Line continuation? */
1026 /* Handle as white space */
1040 /* Check if we have any open .IFs in this file */
1042 /* Check if we have any open token lists in this file */
1045 /* If this was an include file, then close it and handle like a
1046 * separator. Do not close the main file, but return EOF.
1057 /* If we go here, we could not identify the current character. Skip it
1060 Error (ERR_INVALID_CHAR, C & 0xFF);
1067 int TokHasSVal (enum Token Tok)
1068 /* Return true if the given token has an attached SVal */
1070 return (Tok == TOK_IDENT || Tok == TOK_STRCON);
1075 int TokHasIVal (enum Token Tok)
1076 /* Return true if the given token has an attached IVal */
1078 return (Tok == TOK_INTCON || Tok == TOK_CHARCON || Tok == TOK_MNEMO);
1083 int GetSubKey (const char** Keys, unsigned Count)
1084 /* Search for a subkey in a table of keywords. The current token must be an
1085 * identifier and all keys must be in upper case. The identifier will be
1086 * uppercased in the process. The function returns the index of the keyword,
1087 * or -1 if the keyword was not found.
1092 /* Must have an identifier */
1093 PRECONDITION (Tok == TOK_IDENT);
1095 /* If we aren't in ignore case mode, we have to uppercase the identifier */
1100 /* Do a linear search (a binary search is not worth the effort) */
1101 for (I = 0; I < Count; ++I) {
1102 if (strcmp (SVal, Keys [I]) == 0) {
1114 void InitScanner (const char* InFile)
1115 /* Initialize the scanner, open the given input file */
1117 /* Open the input file */
1118 NewInputFile (InFile);
1123 void DoneScanner (void)
1124 /* Release scanner resources */