1 /*****************************************************************************/
5 /* The scanner for the ca65 macroassembler */
9 /* (C) 1998-2005 Ullrich von Bassewitz */
11 /* D-70794 Filderstadt */
12 /* EMail: uz@cc65.org */
15 /* This software is provided 'as-is', without any expressed or implied */
16 /* warranty. In no event will the authors be held liable for any damages */
17 /* arising from the use of this software. */
19 /* Permission is granted to anyone to use this software for any purpose, */
20 /* including commercial applications, and to alter it and redistribute it */
21 /* freely, subject to the following restrictions: */
23 /* 1. The origin of this software must not be misrepresented; you must not */
24 /* claim that you wrote the original software. If you use this software */
25 /* in a product, an acknowledgment in the product documentation would be */
26 /* appreciated but is not required. */
27 /* 2. Altered source versions must be plainly marked as such, and must not */
28 /* be misrepresented as being the original software. */
29 /* 3. This notice may not be removed or altered from any source */
32 /*****************************************************************************/
41 #include <sys/types.h> /* EMX needs this */
66 /*****************************************************************************/
68 /*****************************************************************************/
72 enum Token Tok = TOK_NONE; /* Current token */
73 int WS; /* Flag: Whitespace before token */
74 long IVal; /* Integer token attribute */
75 char SVal[MAX_STR_LEN+1]; /* String token attribute */
77 FilePos CurPos = { 0, 0, 0 }; /* Name and position in current file */
81 /* Struct to handle include files. */
82 typedef struct InputFile InputFile;
84 FILE* F; /* Input file descriptor */
85 FilePos Pos; /* Position in file */
86 enum Token Tok; /* Last token */
87 int C; /* Last character */
88 char Line[256]; /* The current input line */
89 InputFile* Next; /* Linked list of input files */
92 /* Struct to handle textual input data */
93 typedef struct InputData InputData;
95 char* Data; /* Pointer to the data */
96 const char* Pos; /* Pointer to current position */
97 int Malloced; /* Memory was malloced */
98 enum Token Tok; /* Last token */
99 int C; /* Last character */
100 InputData* Next; /* Linked list of input data */
103 /* Current input variables */
104 static InputFile* IFile = 0; /* Current input file */
105 static InputData* IData = 0; /* Current input memory data */
106 static unsigned ICount = 0; /* Count of input files */
107 static int C = 0; /* Current input character */
109 /* Force end of assembly */
112 /* List of dot keywords with the corresponding tokens */
114 const char* Key; /* MUST be first field */
119 { ".ADDR", TOK_ADDR },
120 { ".ALIGN", TOK_ALIGN },
121 { ".AND", TOK_BOOLAND },
122 { ".ASCIIZ", TOK_ASCIIZ },
123 { ".ASSERT", TOK_ASSERT },
124 { ".AUTOIMPORT", TOK_AUTOIMPORT },
125 { ".BANKBYTE", TOK_BANKBYTE },
126 { ".BITAND", TOK_AND },
127 { ".BITNOT", TOK_NOT },
128 { ".BITOR", TOK_OR },
129 { ".BITXOR", TOK_XOR },
130 { ".BLANK", TOK_BLANK },
132 { ".BYT", TOK_BYTE },
133 { ".BYTE", TOK_BYTE },
134 { ".CASE", TOK_CASE },
135 { ".CHARMAP", TOK_CHARMAP },
136 { ".CODE", TOK_CODE },
137 { ".CONCAT", TOK_CONCAT },
138 { ".CONDES", TOK_CONDES },
139 { ".CONST", TOK_CONST },
140 { ".CONSTRUCTOR", TOK_CONSTRUCTOR },
142 { ".DATA", TOK_DATA },
144 { ".DBYT", TOK_DBYT },
145 { ".DEBUGINFO", TOK_DEBUGINFO },
146 { ".DEF", TOK_DEFINED },
147 { ".DEFINE", TOK_DEFINE },
148 { ".DEFINED", TOK_DEFINED },
149 { ".DESTRUCTOR", TOK_DESTRUCTOR },
150 { ".DWORD", TOK_DWORD },
151 { ".ELSE", TOK_ELSE },
152 { ".ELSEIF", TOK_ELSEIF },
154 { ".ENDENUM", TOK_ENDENUM },
155 { ".ENDIF", TOK_ENDIF },
156 { ".ENDMAC", TOK_ENDMACRO },
157 { ".ENDMACRO", TOK_ENDMACRO },
158 { ".ENDPROC", TOK_ENDPROC },
159 { ".ENDREP", TOK_ENDREP },
160 { ".ENDREPEAT", TOK_ENDREP },
161 { ".ENDSCOPE", TOK_ENDSCOPE },
162 { ".ENDSTRUCT", TOK_ENDSTRUCT },
163 { ".ENDUNION", TOK_ENDUNION },
164 { ".ENUM", TOK_ENUM },
165 { ".ERROR", TOK_ERROR },
166 { ".EXITMAC", TOK_EXITMACRO },
167 { ".EXITMACRO", TOK_EXITMACRO },
168 { ".EXPORT", TOK_EXPORT },
169 { ".EXPORTZP", TOK_EXPORTZP },
170 { ".FARADDR", TOK_FARADDR },
171 { ".FEATURE", TOK_FEATURE },
172 { ".FILEOPT", TOK_FILEOPT },
173 { ".FOPT", TOK_FILEOPT },
174 { ".FORCEIMPORT", TOK_FORCEIMPORT },
175 { ".FORCEWORD", TOK_FORCEWORD },
176 { ".GLOBAL", TOK_GLOBAL },
177 { ".GLOBALZP", TOK_GLOBALZP },
178 { ".HIBYTE", TOK_HIBYTE },
179 { ".HIWORD", TOK_HIWORD },
182 { ".IDENT", TOK_MAKEIDENT },
184 { ".IFBLANK", TOK_IFBLANK },
185 { ".IFCONST", TOK_IFCONST },
186 { ".IFDEF", TOK_IFDEF },
187 { ".IFNBLANK", TOK_IFNBLANK },
188 { ".IFNCONST", TOK_IFNCONST },
189 { ".IFNDEF", TOK_IFNDEF },
190 { ".IFNREF", TOK_IFNREF },
191 { ".IFP02", TOK_IFP02 },
192 { ".IFP816", TOK_IFP816 },
193 { ".IFPC02", TOK_IFPC02 },
194 { ".IFPSC02", TOK_IFPSC02 },
195 { ".IFREF", TOK_IFREF },
196 { ".IMPORT", TOK_IMPORT },
197 { ".IMPORTZP", TOK_IMPORTZP },
198 { ".INCBIN", TOK_INCBIN },
199 { ".INCLUDE", TOK_INCLUDE },
200 { ".INTERRUPTOR", TOK_INTERRUPTOR },
201 { ".LEFT", TOK_LEFT },
202 { ".LINECONT", TOK_LINECONT },
203 { ".LIST", TOK_LIST },
204 { ".LISTBYTES", TOK_LISTBYTES },
205 { ".LOBYTE", TOK_LOBYTE },
206 { ".LOCAL", TOK_LOCAL },
207 { ".LOCALCHAR", TOK_LOCALCHAR },
208 { ".LOWORD", TOK_LOWORD },
209 { ".MAC", TOK_MACRO },
210 { ".MACPACK", TOK_MACPACK },
211 { ".MACRO", TOK_MACRO },
212 { ".MATCH", TOK_MATCH },
215 { ".NOT", TOK_BOOLNOT },
216 { ".NULL", TOK_NULL },
217 { ".OR", TOK_BOOLOR },
221 { ".P816", TOK_P816 },
222 { ".PAGELEN", TOK_PAGELENGTH },
223 { ".PAGELENGTH", TOK_PAGELENGTH },
224 { ".PARAMCOUNT", TOK_PARAMCOUNT },
225 { ".PC02", TOK_PC02 },
226 { ".POPSEG", TOK_POPSEG },
227 { ".PROC", TOK_PROC },
228 { ".PSC02", TOK_PSC02 },
229 { ".PUSHSEG", TOK_PUSHSEG },
230 { ".REF", TOK_REFERENCED },
231 { ".REFERENCED", TOK_REFERENCED },
232 { ".RELOC", TOK_RELOC },
233 { ".REPEAT", TOK_REPEAT },
235 { ".RIGHT", TOK_RIGHT },
236 { ".RODATA", TOK_RODATA },
237 { ".SCOPE", TOK_SCOPE },
238 { ".SEGMENT", TOK_SEGMENT },
240 { ".SETCPU", TOK_SETCPU },
243 { ".SIZEOF", TOK_SIZEOF },
244 { ".SMART", TOK_SMART },
245 { ".SPRINTF", TOK_SPRINTF },
246 { ".STRAT", TOK_STRAT },
247 { ".STRING", TOK_STRING },
248 { ".STRLEN", TOK_STRLEN },
249 { ".STRUCT", TOK_STRUCT },
250 { ".SUNPLUS", TOK_SUNPLUS },
252 { ".TCOUNT", TOK_TCOUNT },
253 { ".TIME", TOK_TIME },
254 { ".UNION", TOK_UNION },
255 { ".VERSION", TOK_VERSION },
256 { ".WARNING", TOK_WARNING },
257 { ".WORD", TOK_WORD },
258 { ".XMATCH", TOK_XMATCH },
259 { ".XOR", TOK_BOOLXOR },
260 { ".ZEROPAGE", TOK_ZEROPAGE },
265 /*****************************************************************************/
267 /*****************************************************************************/
271 static void NextChar (void);
272 /* Read the next character from the input file */
276 /*****************************************************************************/
277 /* Character classification functions */
278 /*****************************************************************************/
283 /* Return true if the character is a valid character for an identifier */
285 return IsAlNum (C) ||
287 (C == '@' && AtInIdents) ||
288 (C == '$' && DollarInIdents);
293 int IsIdStart (int C)
294 /* Return true if the character may start an identifier */
296 return IsAlpha (C) || C == '_';
301 /*****************************************************************************/
303 /*****************************************************************************/
307 void NewInputFile (const char* Name)
308 /* Open a new input file */
313 /* First try to open the file */
314 F = fopen (Name, "r");
319 /* Error (fatal error if this is the main file) */
321 Fatal ("Cannot open input file `%s': %s", Name, strerror (errno));
324 /* We are on include level. Search for the file in the include
327 PathName = FindInclude (Name);
328 if (PathName == 0 || (F = fopen (PathName, "r")) == 0) {
329 /* Not found or cannot open, print an error and bail out */
330 Error ("Cannot open include file `%s': %s", Name, strerror (errno));
333 /* Free the allocated memory */
338 /* check again if we do now have an open file */
343 /* Stat the file and remember the values */
345 if (fstat (fileno (F), &Buf) != 0) {
346 Fatal ("Cannot stat input file `%s': %s", Name, strerror (errno));
349 /* Add the file to the input file table and remember the index */
350 FileIdx = AddFile (Name, Buf.st_size, Buf.st_mtime);
352 /* Create a new state variable and initialize it */
353 I = xmalloc (sizeof (*I));
357 I->Pos.Name = FileIdx;
362 /* Use the new file */
367 /* Read the first character from the new file */
370 /* Setup the next token so it will be skipped on the next call to
380 void DoneInputFile (void)
381 /* Close the current input file */
385 /* Restore the old token */
389 /* Save a pointer to the current struct, then set it back */
393 /* Cleanup the current stuff */
401 void NewInputData (char* Data, int Malloced)
402 /* Add a chunk of input data to the input stream */
406 /* Create a new state variable and initialize it */
407 I = xmalloc (sizeof (*I));
410 I->Malloced = Malloced;
414 /* Use the new data */
418 /* Read the first character from the new file */
421 /* Setup the next token so it will be skipped on the next call to
429 static void DoneInputData (void)
430 /* End the current input data stream */
434 /* Restore the old token */
438 /* Save a pointer to the current struct, then set it back */
442 /* Cleanup the current stuff */
451 static unsigned DigitVal (unsigned char C)
452 /* Convert a digit into it's numerical representation */
457 return toupper (C) - 'A' + 10;
463 static void NextChar (void)
464 /* Read the next character from the input file */
466 /* If we have an input data structure, read from there */
471 /* End of input data */
477 /* Check for end of line, read the next line if needed */
478 while (IFile->Line [IFile->Pos.Col] == '\0') {
480 unsigned Len, Removed;
482 /* End of current line reached, read next line */
483 if (fgets (IFile->Line, sizeof (IFile->Line), IFile->F) == 0) {
484 /* End of file. Add an empty line to the listing. This is a
485 * small hack needed to keep the PC output in sync.
487 NewListingLine ("", IFile->Pos.Name, ICount);
492 /* For better handling of files with unusual line endings (DOS
493 * files that are accidently translated on Unix for example),
494 * first remove all whitespace at the end, then add a single
497 Len = strlen (IFile->Line);
499 while (Len > 0 && IsSpace (IFile->Line[Len-1])) {
504 IFile->Line[Len+0] = '\n';
505 IFile->Line[Len+1] = '\0';
512 /* Remember the new line for the listing */
513 NewListingLine (IFile->Line, IFile->Pos.Name, ICount);
517 /* Return the next character from the file */
518 C = IFile->Line [IFile->Pos.Col++];
525 void LocaseSVal (void)
526 /* Make SVal lower case */
530 SVal [I] = tolower (SVal [I]);
537 void UpcaseSVal (void)
538 /* Make SVal upper case */
542 SVal [I] = toupper (SVal [I]);
549 static int CmpDotKeyword (const void* K1, const void* K2)
550 /* Compare function for the dot keyword search */
552 return strcmp (((struct DotKeyword*)K1)->Key, ((struct DotKeyword*)K2)->Key);
557 static unsigned char FindDotKeyword (void)
558 /* Find the dot keyword in SVal. Return the corresponding token if found,
559 * return TOK_NONE if not found.
562 static const struct DotKeyword K = { SVal, 0 };
563 struct DotKeyword* R;
565 /* If we aren't in ignore case mode, we have to uppercase the keyword */
570 /* Search for the keyword */
571 R = bsearch (&K, DotKeywords, sizeof (DotKeywords) / sizeof (DotKeywords [0]),
572 sizeof (DotKeywords [0]), CmpDotKeyword);
582 static void ReadIdent (unsigned Index)
583 /* Read an identifier from the current input position into Ident. Filling SVal
584 * starts at Index with the current character in C. It is assumed that any
585 * characters already filled in are ok, and the character in C is checked.
588 /* Read the identifier */
590 if (Index < MAX_STR_LEN) {
594 } while (IsIdChar (C));
597 /* If we should ignore case, convert the identifier to upper case */
605 static unsigned ReadStringConst (int StringTerm)
606 /* Read a string constant into SVal. Check for maximum string length and all
607 * other stuff. The length of the string is returned.
612 /* Skip the leading string terminator */
615 /* Read the string */
618 if (C == StringTerm) {
621 if (C == '\n' || C == EOF) {
622 Error ("Newline in string constant");
626 /* Check for string length, print an error message once */
627 if (I == MAX_STR_LEN) {
628 Error ("Maximum string size exceeded");
629 } else if (I < MAX_STR_LEN) {
634 /* Skip the character */
638 /* Skip the trailing terminator */
641 /* Terminate the string */
642 if (I >= MAX_STR_LEN) {
647 /* Return the length of the string */
653 static int Sweet16Reg (const char* Ident)
654 /* Check if the given identifier is a sweet16 register. Return -1 if this is
655 * not the case, return the register number otherwise.
661 if (Ident[0] != 'r' && Ident[0] != 'R') {
664 if (!IsDigit (Ident[1])) {
668 if (sscanf (Ident+1, "%u%c", &RegNum, &Check) != 1 || RegNum > 15) {
669 /* Invalid register */
673 /* The register number is valid */
679 void NextRawTok (void)
680 /* Read the next raw token from the input stream */
682 /* If we've a forced end of assembly, don't read further */
689 /* Check if we have tokens from another input source */
690 if (InputFromStack ()) {
695 /* Skip whitespace, remember if we had some */
696 if ((WS = IsBlank (C)) != 0) {
699 } while (IsBlank (C));
702 /* If we're reading from the file, update the location from where the
703 * next token will be read. If we're reading from input data, keep the
710 /* Hex number or PC symbol? */
714 /* Hex digit must follow or DollarIsPC must be enabled */
720 Error ("Hexadecimal digit expected");
724 /* Read the number */
726 while (IsXDigit (C)) {
727 if (IVal & 0xF0000000) {
728 Error ("Overflow in hexadecimal number");
731 IVal = (IVal << 4) + DigitVal (C);
735 /* This is an integer constant */
744 /* 0 or 1 must follow */
746 Error ("Binary digit expected");
749 /* Read the number */
751 while (IsBDigit (C)) {
752 if (IVal & 0x80000000) {
753 Error ("Overflow in binary number");
756 IVal = (IVal << 1) + DigitVal (C);
760 /* This is an integer constant */
775 /* Ignore leading zeros */
780 /* Read the number into Buf counting the digits */
782 while (IsXDigit (C)) {
784 /* Buf is big enough to allow any decimal and hex number to
785 * overflow, so ignore excess digits here, they will be detected
786 * when we convert the value.
788 if (Digits < sizeof (Buf)) {
795 /* Allow zilog/intel style hex numbers with a 'h' suffix */
796 if (C == 'h' || C == 'H') {
799 Max = 0xFFFFFFFFUL / 16;
802 Max = 0xFFFFFFFFUL / 10;
805 /* Convert the number using the given base */
807 for (I = 0; I < Digits; ++I) {
809 Error ("Number out of range");
813 DVal = DigitVal (Buf[I]);
815 Error ("Invalid digits in number");
819 IVal = (IVal * Base) + DVal;
822 /* This is an integer constant */
827 /* Control command? */
830 /* Remember and skip the dot */
833 /* Check if it's just a dot */
834 if (!IsIdStart (C)) {
841 /* Read the remainder of the identifier */
845 /* Dot keyword, search for it */
846 Tok = FindDotKeyword ();
847 if (Tok == TOK_NONE) {
850 if (!LeadingDotInIdents) {
851 /* Invalid pseudo instruction */
852 Error ("`%s' is not a recognized control command", SVal);
856 /* An identifier with a dot. Check if it's a define style
859 if (IsDefine (SVal)) {
860 /* This is a define style macro - expand it */
865 /* Just an identifier with a dot */
873 /* Indirect op for sweet16 cpu. Must check this before checking for local
874 * symbols, because these may also use the '@' symbol.
876 if (CPU == CPU_SWEET16 && C == '@') {
883 if (C == LocalStart) {
885 /* Read the identifier */
888 /* Start character alone is not enough */
889 if (SVal [1] == '\0') {
890 Error ("Invalid cheap local symbol");
894 /* A local identifier */
895 Tok = TOK_LOCAL_IDENT;
900 /* Identifier or keyword? */
903 /* Read the identifier */
906 /* Check for special names. Bail out if we have identified the type of
907 * the token. Go on if the token is an identifier.
909 if (SVal[1] == '\0') {
910 switch (toupper (SVal [0])) {
915 Tok = TOK_OVERRIDE_ABS;
924 Tok = TOK_OVERRIDE_FAR;
944 Tok = TOK_OVERRIDE_ZP;
953 } else if (CPU == CPU_SWEET16 && (IVal = Sweet16Reg (SVal)) >= 0) {
955 /* A sweet16 register number in sweet16 mode */
961 /* Check for define style macro */
962 if (IsDefine (SVal)) {
963 /* Macro - expand it */
973 /* Ok, let's do the switch */
1028 Tok = TOK_NAMESPACE;
1067 while (C != '\n' && C != EOF) {
1112 } else if (C == '<') {
1115 } else if (C == '>') {
1138 } else if (C == '>') {
1152 /* Hack: If we allow ' as terminating character for strings, read
1153 * the following stuff as a string, and check for a one character
1156 if (LooseStringTerm) {
1157 if (ReadStringConst ('\'') == 1) {
1164 /* Always a character constant */
1166 if (C == EOF || IsControl (C)) {
1167 Error ("Illegal character constant");
1174 if (!MissingCharTerm) {
1175 Error ("Illegal character constant");
1184 ReadStringConst ('\"');
1189 /* Line continuation? */
1193 /* Handle as white space */
1207 /* Check if we have any open .IFs in this file */
1209 /* Check if we have any open token lists in this file */
1212 /* If this was an include file, then close it and read the next
1213 * token. When an include file is opened, the last token of the
1214 * old file is not skipped, to prevent the lookahead to read
1215 * the next line of the old input file. So we do effectively
1216 * skip the last token in the old file (the file name of the
1217 * include statement).
1218 * In case of the main file, do not close it, but return EOF.
1221 /* Input came from internal data */
1224 } else if (ICount > 1) {
1234 /* If we go here, we could not identify the current character. Skip it
1237 Error ("Invalid input character: 0x%02X", C & 0xFF);
1244 int TokHasSVal (enum Token Tok)
1245 /* Return true if the given token has an attached SVal */
1247 return (Tok == TOK_IDENT || TOK_LOCAL_IDENT || Tok == TOK_STRCON);
1252 int TokHasIVal (enum Token Tok)
1253 /* Return true if the given token has an attached IVal */
1255 return (Tok == TOK_INTCON || Tok == TOK_CHARCON || Tok == TOK_REG);
1260 int GetSubKey (const char** Keys, unsigned Count)
1261 /* Search for a subkey in a table of keywords. The current token must be an
1262 * identifier and all keys must be in upper case. The identifier will be
1263 * uppercased in the process. The function returns the index of the keyword,
1264 * or -1 if the keyword was not found.
1269 /* Must have an identifier */
1270 PRECONDITION (Tok == TOK_IDENT);
1272 /* If we aren't in ignore case mode, we have to uppercase the identifier */
1277 /* Do a linear search (a binary search is not worth the effort) */
1278 for (I = 0; I < Count; ++I) {
1279 if (strcmp (SVal, Keys [I]) == 0) {
1291 unsigned char ParseAddrSize (void)
1292 /* Check if the next token is a keyword that denotes an address size specifier.
1293 * If so, return the corresponding address size constant, otherwise output an
1294 * error message and return ADDR_SIZE_DEFAULT.
1297 static const char* Keys[] = {
1298 "DIRECT", "ZEROPAGE", "ZP",
1299 "ABSOLUTE", "ABS", "NEAR",
1304 /* Check for an identifier */
1305 if (Tok != TOK_IDENT) {
1306 Error ("Address size specifier expected");
1307 return ADDR_SIZE_DEFAULT;
1310 /* Search for the attribute */
1311 switch (GetSubKey (Keys, sizeof (Keys) / sizeof (Keys [0]))) {
1314 case 2: return ADDR_SIZE_ZP;
1317 case 5: return ADDR_SIZE_ABS;
1318 case 6: return ADDR_SIZE_FAR;
1320 case 8: return ADDR_SIZE_LONG;
1322 Error ("Address size specifier expected");
1323 return ADDR_SIZE_DEFAULT;
1329 void InitScanner (const char* InFile)
1330 /* Initialize the scanner, open the given input file */
1332 /* Open the input file */
1333 NewInputFile (InFile);
1338 void DoneScanner (void)
1339 /* Release scanner resources */