1 /*****************************************************************************/
5 /* The scanner for the ca65 macroassembler */
9 /* (C) 1998-2000 Ullrich von Bassewitz */
11 /* D-70597 Stuttgart */
12 /* EMail: uz@musoftware.de */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
56 /*****************************************************************************/
58 /*****************************************************************************/
62 enum Token Tok = TOK_NONE; /* Current token */
63 int WS; /* Flag: Whitespace before token */
64 long IVal; /* Integer token attribute */
65 char SVal [MAX_STR_LEN+1]; /* String token attribute */
67 FilePos CurPos = { 0, 0, 0 }; /* Name and position in current file */
71 /* Struct to handle include files. Note: The length of the input line may
72 * not exceed 255+1, since the column is stored in the file position struct
73 * as a character. Increasing this value means changing the FilePos struct,
74 * and the read and write routines in the assembler and linker.
76 typedef struct InputFile_ InputFile;
78 FILE* F; /* Input file descriptor */
79 FilePos Pos; /* Position in file */
80 enum Token Tok; /* Last token */
81 int C; /* Last character */
82 char Line[256]; /* The current input line */
83 InputFile* Next; /* Linked list of input files */
86 /* Struct to handle textual input data */
87 typedef struct InputData_ InputData;
89 const char* Data; /* Pointer to the data */
90 const char* Pos; /* Pointer to current position */
91 int Malloced; /* Memory was malloced */
92 enum Token Tok; /* Last token */
93 int C; /* Last character */
94 InputData* Next; /* Linked list of input data */
97 /* List of input files */
99 unsigned long MTime; /* Time of last modification */
100 unsigned long Size; /* Size of file */
101 const char* Name; /* Name of file */
102 } Files [MAX_INPUT_FILES];
103 static unsigned FileCount = 0;
105 /* Current input variables */
106 static InputFile* IFile = 0;
107 static InputData* IData = 0;
108 static unsigned ICount = 0; /* Count of input files */
111 /* Force end of assembly */
114 /* List of dot keywords with the corresponding tokens */
116 const char* Key; /* MUST be first field */
121 { "ADDR", TOK_ADDR },
122 { "ALIGN", TOK_ALIGN },
124 { "ASCIIZ", TOK_ASCIIZ },
125 { "AUTOIMPORT", TOK_AUTOIMPORT },
126 { "BITAND", TOK_AND },
127 { "BITNOT", TOK_NOT },
129 { "BITXOR", TOK_XOR },
130 { "BLANK", TOK_BLANK },
132 { "BYTE", TOK_BYTE },
133 { "CASE", TOK_CASE },
134 { "CODE", TOK_CODE },
135 { "CONST", TOK_CONST },
137 { "DATA", TOK_DATA },
138 { "DBYT", TOK_DBYT },
139 { "DEBUGINFO", TOK_DEBUGINFO },
140 { "DEF", TOK_DEFINED },
141 { "DEFINE", TOK_DEFINE },
142 { "DEFINED", TOK_DEFINED },
143 { "DWORD", TOK_DWORD },
144 { "ELSE", TOK_ELSE },
145 { "ELSEIF", TOK_ELSEIF },
147 { "ENDIF", TOK_ENDIF },
148 { "ENDMAC", TOK_ENDMACRO },
149 { "ENDMACRO", TOK_ENDMACRO },
150 { "ENDPROC", TOK_ENDPROC },
151 { "ENDREP", TOK_ENDREP },
152 { "ENDREPEAT", TOK_ENDREP },
153 { "ERROR", TOK_ERROR },
154 { "EXITMAC", TOK_EXITMACRO },
155 { "EXITMACRO", TOK_EXITMACRO },
156 { "EXPORT", TOK_EXPORT },
157 { "EXPORTZP", TOK_EXPORTZP },
158 { "FARADDR", TOK_FARADDR },
159 { "FEATURE", TOK_FEATURE },
160 { "FILEOPT", TOK_FILEOPT },
161 { "FOPT", TOK_FILEOPT },
162 { "GLOBAL", TOK_GLOBAL },
163 { "GLOBALZP", TOK_GLOBALZP },
167 { "IFBLANK", TOK_IFBLANK },
168 { "IFCONST", TOK_IFCONST },
169 { "IFDEF", TOK_IFDEF },
170 { "IFNBLANK", TOK_IFNBLANK },
171 { "IFNCONST", TOK_IFNCONST },
172 { "IFNDEF", TOK_IFNDEF },
173 { "IFNREF", TOK_IFNREF },
174 { "IFP02", TOK_IFP02 },
175 { "IFP816", TOK_IFP816 },
176 { "IFPC02", TOK_IFPC02 },
177 { "IFREF", TOK_IFREF },
178 { "IMPORT", TOK_IMPORT },
179 { "IMPORTZP", TOK_IMPORTZP },
180 { "INCBIN", TOK_INCBIN },
181 { "INCLUDE", TOK_INCLUDE },
182 { "LINECONT", TOK_LINECONT },
183 { "LIST", TOK_LIST },
184 { "LISTBYTES", TOK_LISTBYTES },
185 { "LOCAL", TOK_LOCAL },
186 { "LOCALCHAR", TOK_LOCALCHAR },
187 { "MAC", TOK_MACRO },
188 { "MACPACK", TOK_MACPACK },
189 { "MACRO", TOK_MACRO },
190 { "MATCH", TOK_MATCH },
193 { "NULL", TOK_NULL },
198 { "P816", TOK_P816 },
199 { "PAGELEN", TOK_PAGELENGTH },
200 { "PAGELENGTH", TOK_PAGELENGTH },
201 { "PARAMCOUNT", TOK_PARAMCOUNT },
202 { "PC02", TOK_PC02 },
203 { "PROC", TOK_PROC },
204 { "REF", TOK_REFERENCED },
205 { "REFERENCED", TOK_REFERENCED },
206 { "RELOC", TOK_RELOC },
207 { "REPEAT", TOK_REPEAT },
209 { "RODATA", TOK_RODATA },
210 { "SEGMENT", TOK_SEGMENT },
213 { "SMART", TOK_SMART },
214 { "STRING", TOK_STRING },
215 { "SUNPLUS", TOK_SUNPLUS },
216 { "WORD", TOK_WORD },
217 { "XMATCH", TOK_XMATCH },
219 { "ZEROPAGE", TOK_ZEROPAGE },
224 /*****************************************************************************/
226 /*****************************************************************************/
230 static void NextChar (void);
231 /* Read the next character from the input file */
235 /*****************************************************************************/
236 /* Character classification functions */
237 /*****************************************************************************/
241 static int IsBlank (int C)
242 /* Return true if the character is a blank or tab */
244 return (C == ' ' || C == '\t');
249 static int IsDigit (int C)
250 /* Return true if the character is a digit */
257 static int IsXDigit (int C)
258 /* Return true if the character is a hexadecimal digit */
265 static int IsDDigit (int C)
266 /* Return true if the character is a dual digit */
268 return (C == '0' || C == '1');
273 static int IsIdChar (int C)
274 /* Return true if the character is a valid character for an identifier */
276 return isalnum (C) ||
278 (C == '@' && AtInIdents) ||
279 (C == '$' && DollarInIdents);
284 static int IsIdStart (int C)
285 /* Return true if the character may start an identifier */
287 return isalpha (C) || C == '_';
292 /*****************************************************************************/
294 /*****************************************************************************/
298 const char* GetFileName (unsigned char Name)
299 /* Get the name of a file where the name index is known */
301 PRECONDITION (Name <= FileCount);
303 /* Name was defined outside any file scope, use the name of the first
304 * file instead. Errors are then reported with a file position of
305 * line zero in the first file.
307 if (FileCount == 0) {
308 /* No files defined until now */
309 return "(outside file scope)";
311 return Files [0].Name;
314 return Files [Name-1].Name;
320 void NewInputFile (const char* Name)
321 /* Open a new input file */
326 /* Insert a copy of the filename into the list */
327 if (FileCount >= MAX_INPUT_FILES) {
328 Fatal (FAT_MAX_INPUT_FILES);
330 Files [FileCount].Name = StrDup (Name);
332 /* First try to open the file */
333 F = fopen (Name, "r");
336 /* Error (fatal error if this is the main file) */
338 Fatal (FAT_CANNOT_OPEN_INPUT, Name, strerror (errno));
340 Error (ERR_CANNOT_OPEN_INCLUDE, Name, strerror (errno));
341 Xfree (Files [FileCount].Name);
346 /* Stat the file and remember the values */
348 if (fstat (fileno (F), &Buf) != 0) {
349 Fatal (FAT_CANNOT_STAT_INPUT, Name, strerror (errno));
351 Files [FileCount].MTime = Buf.st_mtime;
352 Files [FileCount].Size = Buf.st_size;
355 /* Create a new state variable and initialize it */
356 I = Xmalloc (sizeof (*I));
360 I->Pos.Name = FileCount;
365 /* Use the new file */
377 void DoneInputFile (void)
378 /* Close the current input file */
382 /* Restore the old token */
386 /* Save a pointer to the current struct, then set it back */
390 /* Cleanup the current stuff */
398 void NewInputData (const char* Data, int Malloced)
399 /* Add a chunk of input data to the input stream */
403 /* Create a new state variable and initialize it */
404 I = Xmalloc (sizeof (*I));
407 I->Malloced = Malloced;
411 /* Use the new data */
421 static void DoneInputData (void)
422 /* End the current input data stream */
426 /* Restore the old token */
430 /* Save a pointer to the current struct, then set it back */
434 /* Cleanup the current stuff */
443 static unsigned DigitVal (unsigned char C)
444 /* Convert a digit into it's numerical representation */
449 return toupper (C) - 'A' + 10;
455 static void NextChar (void)
456 /* Read the next character from the input file */
458 /* If we have an input data structure, read from there */
463 /* End of input data, will set to last file char */
469 /* Check for end of line, read the next line if needed */
470 while (IFile->Line [IFile->Pos.Col] == '\0') {
472 /* End of current line reached, read next line */
473 if (fgets (IFile->Line, sizeof (IFile->Line), IFile->F) == 0) {
474 /* End of file. Add an empty line to the listing. This is a
475 * small hack needed to keep the PC output in sync.
477 NewListingLine ("", IFile->Pos.Name, ICount);
486 /* Remember the new line for the listing */
487 NewListingLine (IFile->Line, IFile->Pos.Name, ICount);
491 /* Return the next character from the file */
492 C = IFile->Line [IFile->Pos.Col++];
499 void UpcaseSVal (void)
500 /* Make SVal upper case */
504 SVal [I] = toupper (SVal [I]);
511 static int CmpDotKeyword (const void* K1, const void* K2)
512 /* Compare function for the dot keyword search */
514 return strcmp (((struct DotKeyword*)K1)->Key, ((struct DotKeyword*)K2)->Key);
519 static unsigned char FindDotKeyword (void)
520 /* Find the dot keyword in SVal. Return the corresponding token if found,
521 * return TOK_NONE if not found.
524 static const struct DotKeyword K = { SVal, 0 };
525 struct DotKeyword* R;
527 /* If we aren't in ignore case mode, we have to uppercase the keyword */
532 /* Search for the keyword */
533 R = bsearch (&K, DotKeywords, sizeof (DotKeywords) / sizeof (DotKeywords [0]),
534 sizeof (DotKeywords [0]), CmpDotKeyword);
544 static void ReadIdent (void)
545 /* Read an identifier from the current input position into Ident. It is
546 * assumed that the first character has already been checked.
549 /* Read the identifier */
552 if (I < MAX_STR_LEN) {
556 } while (IsIdChar (C));
559 /* If we should ignore case, convert the identifier to upper case */
567 static unsigned ReadStringConst (int StringTerm)
568 /* Read a string constant into SVal. Check for maximum string length and all
569 * other stuff. The length of the string is returned.
574 /* Skip the leading string terminator */
577 /* Read the string */
580 if (C == StringTerm) {
583 if (C == '\n' || C == EOF) {
584 Error (ERR_NEWLINE_IN_STRING);
588 /* Check for string length, print an error message once */
589 if (I == MAX_STR_LEN) {
590 Error (ERR_STRING_TOO_LONG);
591 } else if (I < MAX_STR_LEN) {
596 /* Skip the character */
600 /* Skip the trailing terminator */
603 /* Terminate the string */
604 if (I >= MAX_STR_LEN) {
609 /* Return the length of the string */
616 /* Read the next raw token from the input stream */
618 /* If we've a forced end of assembly, don't read further */
624 /* If we're expanding a macro, the tokens come from the macro expansion */
630 /* Skip whitespace, remember if we had some */
631 if ((WS = IsBlank (C)) != 0) {
634 } while (IsBlank (C));
637 /* If we're reading from the file, update the location from where the
638 * next token will be read. If we're reading from input data, keep the
645 /* Hex number or PC symbol? */
649 /* Hex digit must follow or DollarIsPC must be enabled */
655 Error (ERR_HEX_DIGIT_EXPECTED);
659 /* Read the number */
661 while (IsXDigit (C)) {
662 if (IVal & 0xF0000000) {
663 Error (ERR_NUM_OVERFLOW);
666 IVal = (IVal << 4) + DigitVal (C);
670 /* This is an integer constant */
679 /* 0 or 1 must follow */
681 Error (ERR_01_EXPECTED);
684 /* Read the number */
686 while (IsDDigit (C)) {
687 if (IVal & 0x80000000) {
688 Error (ERR_NUM_OVERFLOW);
691 IVal = (IVal << 1) + DigitVal (C);
695 /* This is an integer constant */
700 /* Decimal number? */
703 /* Read the number */
705 while (IsDigit (C)) {
706 if (IVal > (0xFFFFFFFF / 10)) {
707 Error (ERR_NUM_OVERFLOW);
710 IVal = (IVal * 10) + DigitVal (C);
714 /* This is an integer constant */
719 /* Control command? */
724 if (!IsIdStart (C)) {
725 Error (ERR_PSEUDO_EXPECTED);
726 /* Try to read an identifier */
730 /* Read the identifier */
733 /* Search the keyword */
734 Tok = FindDotKeyword ();
735 if (Tok == TOK_NONE) {
737 Error (ERR_PSEUDO_EXPECTED);
744 if (C == LocalStart) {
746 /* Read the identifier */
749 /* Start character alone is not enough */
750 if (SVal [1] == '\0') {
751 Error (ERR_IDENT_EXPECTED);
761 /* Identifier or keyword? */
764 /* Read the identifier */
767 /* Check for special names */
768 if (SVal [1] == '\0') {
769 switch (toupper (SVal [0])) {
793 /* Search for an opcode */
794 IVal = FindInstruction (SVal);
796 /* This is a mnemonic */
798 } else if (IsDefine (SVal)) {
799 /* This is a define style macro - expand it */
811 /* Ok, let's do the switch */
900 while (C != '\n' && C != EOF) {
935 } else if (C == '<') {
938 } else if (C == '>') {
961 } else if (C == '>') {
975 /* Hack: If we allow ' as terminating character for strings, read
976 * the following stuff as a string, and check for a one character
979 if (LooseStringTerm) {
980 if (ReadStringConst ('\'') == 1) {
987 /* Always a character constant */
989 if (C == '\n' || C == EOF) {
990 Error (ERR_ILLEGAL_CHARCON);
997 Error (ERR_ILLEGAL_CHARCON);
1005 ReadStringConst ('\"');
1010 /* Line continuation? */
1014 /* Handle as white space */
1028 /* Check if we have any open .IFs in this file */
1031 /* If this was an include file, then close it and handle like a
1032 * separator. Do not close the main file, but return EOF.
1043 /* If we go here, we could not identify the current character. Skip it
1046 Error (ERR_INVALID_CHAR, C & 0xFF);
1053 void Consume (enum Token Expected, unsigned ErrMsg)
1054 /* Consume Expected, print an error if we don't find it */
1056 if (Tok == Expected) {
1065 void ConsumeSep (void)
1066 /* Consume a separator token */
1068 /* Accept an EOF as separator */
1069 if (Tok != TOK_EOF) {
1070 if (Tok != TOK_SEP) {
1071 Error (ERR_TOO_MANY_CHARS);
1081 void ConsumeLParen (void)
1082 /* Consume a left paren */
1084 Consume (TOK_LPAREN, ERR_LPAREN_EXPECTED);
1089 void ConsumeRParen (void)
1090 /* Consume a right paren */
1092 Consume (TOK_RPAREN, ERR_RPAREN_EXPECTED);
1097 void ConsumeComma (void)
1098 /* Consume a comma */
1100 Consume (TOK_COMMA, ERR_COMMA_EXPECTED);
1105 void SkipUntilSep (void)
1106 /* Skip tokens until we reach a line separator */
1108 while (Tok != TOK_SEP && Tok != TOK_EOF) {
1115 int TokHasSVal (enum Token Tok)
1116 /* Return true if the given token has an attached SVal */
1118 return (Tok == TOK_IDENT || Tok == TOK_STRCON);
1123 int TokHasIVal (enum Token Tok)
1124 /* Return true if the given token has an attached IVal */
1126 return (Tok == TOK_INTCON || Tok == TOK_CHARCON || Tok == TOK_MNEMO);
1131 int GetSubKey (const char** Keys, unsigned Count)
1132 /* Search for a subkey in a table of keywords. The current token must be an
1133 * identifier and all keys must be in upper case. The identifier will be
1134 * uppercased in the process. The function returns the index of the keyword,
1135 * or -1 if the keyword was not found.
1140 /* Must have an identifier */
1141 PRECONDITION (Tok == TOK_IDENT);
1143 /* If we aren't in ignore case mode, we have to uppercase the identifier */
1148 /* Do a linear search (a binary search is not worth the effort) */
1149 for (I = 0; I < Count; ++I) {
1150 if (strcmp (SVal, Keys [I]) == 0) {
1162 void WriteFiles (void)
1163 /* Write the list of input files to the object file */
1167 /* Tell the obj file module that we're about to start the file list */
1170 /* Write the file count */
1171 ObjWrite8 (FileCount);
1173 /* Write the file data */
1174 for (I = 0; I < FileCount; ++I) {
1175 ObjWrite32 (Files [I].MTime);
1176 ObjWrite32 (Files [I].Size);
1177 ObjWriteStr (Files [I].Name);
1180 /* Done writing files */
1186 void InitScanner (const char* InFile)
1187 /* Initialize the scanner, open the given input file */
1189 /* Open the input file */
1190 NewInputFile (InFile);
1195 void DoneScanner (void)
1196 /* Release scanner resources */