git.sur5r.net Git - cc65/blob - src/ca65/scanner.c

   1 /*****************************************************************************/
   2 /*                                                                           */
   3 /*                                 scanner.c                                 */
   4 /*                                                                           */
   5 /*                  The scanner for the ca65 macroassembler                  */
   6 /*                                                                           */
   7 /*                                                                           */
   8 /*                                                                           */
   9 /* (C) 1998-2008 Ullrich von Bassewitz                                       */
  10 /*               Roemerstrasse 52                                            */
  11 /*               D-70794 Filderstadt                                         */
  12 /* EMail:        uz@cc65.org                                                 */
  13 /*                                                                           */
  14 /*                                                                           */
  15 /* This software is provided 'as-is', without any expressed or implied       */
  16 /* warranty.  In no event will the authors be held liable for any damages    */
  17 /* arising from the use of this software.                                    */
  18 /*                                                                           */
  19 /* Permission is granted to anyone to use this software for any purpose,     */
  20 /* including commercial applications, and to alter it and redistribute it    */
  21 /* freely, subject to the following restrictions:                            */
  22 /*                                                                           */
  23 /* 1. The origin of this software must not be misrepresented; you must not   */
  24 /*    claim that you wrote the original software. If you use this software   */
  25 /*    in a product, an acknowledgment in the product documentation would be  */
  26 /*    appreciated but is not required.                                       */
  27 /* 2. Altered source versions must be plainly marked as such, and must not   */
  28 /*    be misrepresented as being the original software.                      */
  29 /* 3. This notice may not be removed or altered from any source              */
  30 /*    distribution.                                                          */
  31 /*                                                                           */
  32 /*****************************************************************************/
  33
  34
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <ctype.h>
  40 #include <errno.h>
  41 #include <sys/types.h>          /* EMX needs this */
  42 #include <sys/stat.h>
  43
  44 /* common */
  45 #include "addrsize.h"
  46 #include "attrib.h"
  47 #include "chartype.h"
  48 #include "check.h"
  49 #include "fname.h"
  50 #include "xmalloc.h"
  51
  52 /* ca65 */
  53 #include "condasm.h"
  54 #include "error.h"
  55 #include "filetab.h"
  56 #include "global.h"
  57 #include "incpath.h"
  58 #include "instr.h"
  59 #include "istack.h"
  60 #include "listing.h"
  61 #include "macro.h"
  62 #include "toklist.h"
  63 #include "scanner.h"
  64
  65
  66
  67 /*****************************************************************************/
  68 /*                                   Data                                    */
  69 /*****************************************************************************/
  70
  71
  72
  73 Token Tok = TOK_NONE;                   /* Current token */
  74 int WS;                                 /* Flag: Whitespace before token */
  75 long IVal;                              /* Integer token attribute */
  76 StrBuf SVal = STATIC_STRBUF_INITIALIZER;/* String token attribute */
  77
  78 FilePos CurPos = { 0, 0, 0 };           /* Name and position in current file */
  79
  80
  81
  82 /* Struct to handle include files. */
  83 typedef struct InputFile InputFile;
  84 struct InputFile {
  85     FILE*           F;                  /* Input file descriptor */
  86     FilePos         Pos;                /* Position in file */
  87     Token           Tok;                /* Last token */
  88     int             C;                  /* Last character */
  89     char            Line[256];          /* The current input line */
  90     InputFile*      Next;               /* Linked list of input files */
  91 };
  92
  93 /* Struct to handle textual input data */
  94 typedef struct InputData InputData;
  95 struct InputData {
  96     char*           Text;               /* Pointer to the text data */
  97     const char*     Pos;                /* Pointer to current position */
  98     int             Malloced;           /* Memory was malloced */
  99     Token           Tok;                /* Last token */
 100     int             C;                  /* Last character */
 101     InputData*      Next;               /* Linked list of input data */
 102 };
 103
 104 /* Input source: Either file or data */
 105 typedef struct CharSource CharSource;
 106
 107 /* Set of input functions */
 108 typedef struct CharSourceFunctions CharSourceFunctions;
 109 struct CharSourceFunctions {
 110     void (*MarkStart) (CharSource*);    /* Mark the start pos of a token */
 111     void (*NextChar) (CharSource*);     /* Read next char from input */
 112     void (*Done) (CharSource*);         /* Close input source */
 113 };
 114
 115 /* Input source: Either file or data */
 116 struct CharSource {
 117     CharSource*                 Next;   /* Linked list of char sources */
 118     Token                       Tok;    /* Last token */
 119     int                         C;      /* Last character */
 120     const CharSourceFunctions*  Func;   /* Pointer to function table */
 121     union {
 122         InputFile               File;   /* File data */
 123         InputData               Data;   /* Textual data */
 124     }                           V;
 125 };
 126
 127 /* Current input variables */
 128 static CharSource* Source       = 0;    /* Current char source */
 129 static unsigned     FCount      = 0;    /* Count of input files */
 130 static int          C           = 0;    /* Current input character */
 131
 132 /* Force end of assembly */
 133 int               ForcedEnd     = 0;
 134
 135 /* List of dot keywords with the corresponding tokens */
 136 struct DotKeyword {
 137     const char* Key;                    /* MUST be first field */
 138     Token       Tok;
 139 } DotKeywords [] = {
 140     { ".A16",           TOK_A16         },
 141     { ".A8",            TOK_A8          },
 142     { ".ADDR",          TOK_ADDR        },
 143     { ".ALIGN",         TOK_ALIGN       },
 144     { ".AND",           TOK_BOOLAND     },
 145     { ".ASCIIZ",        TOK_ASCIIZ      },
 146     { ".ASSERT",        TOK_ASSERT      },
 147     { ".AUTOIMPORT",    TOK_AUTOIMPORT  },
 148     { ".BANKBYTE",      TOK_BANKBYTE    },
 149     { ".BITAND",        TOK_AND         },
 150     { ".BITNOT",        TOK_NOT         },
 151     { ".BITOR",         TOK_OR          },
 152     { ".BITXOR",        TOK_XOR         },
 153     { ".BLANK",         TOK_BLANK       },
 154     { ".BSS",           TOK_BSS         },
 155     { ".BYT",           TOK_BYTE        },
 156     { ".BYTE",          TOK_BYTE        },
 157     { ".CASE",          TOK_CASE        },
 158     { ".CHARMAP",       TOK_CHARMAP     },
 159     { ".CODE",          TOK_CODE        },
 160     { ".CONCAT",        TOK_CONCAT      },
 161     { ".CONDES",        TOK_CONDES      },
 162     { ".CONST",         TOK_CONST       },
 163     { ".CONSTRUCTOR",   TOK_CONSTRUCTOR },
 164     { ".CPU",           TOK_CPU         },
 165     { ".DATA",          TOK_DATA        },
 166     { ".DBG",           TOK_DBG         },
 167     { ".DBYT",          TOK_DBYT        },
 168     { ".DEBUGINFO",     TOK_DEBUGINFO   },
 169     { ".DEF",           TOK_DEFINED     },
 170     { ".DEFINE",        TOK_DEFINE      },
 171     { ".DEFINED",       TOK_DEFINED     },
 172     { ".DESTRUCTOR",    TOK_DESTRUCTOR  },
 173     { ".DWORD",         TOK_DWORD       },
 174     { ".ELSE",          TOK_ELSE        },
 175     { ".ELSEIF",        TOK_ELSEIF      },
 176     { ".END",           TOK_END         },
 177     { ".ENDENUM",       TOK_ENDENUM     },
 178     { ".ENDIF",         TOK_ENDIF       },
 179     { ".ENDMAC",        TOK_ENDMACRO    },
 180     { ".ENDMACRO",      TOK_ENDMACRO    },
 181     { ".ENDPROC",       TOK_ENDPROC     },
 182     { ".ENDREP",        TOK_ENDREP      },
 183     { ".ENDREPEAT",     TOK_ENDREP      },
 184     { ".ENDSCOPE",      TOK_ENDSCOPE    },
 185     { ".ENDSTRUCT",     TOK_ENDSTRUCT   },
 186     { ".ENDUNION",      TOK_ENDUNION    },
 187     { ".ENUM",          TOK_ENUM        },
 188     { ".ERROR",         TOK_ERROR       },
 189     { ".EXITMAC",       TOK_EXITMACRO   },
 190     { ".EXITMACRO",     TOK_EXITMACRO   },
 191     { ".EXPORT",        TOK_EXPORT      },
 192     { ".EXPORTZP",      TOK_EXPORTZP    },
 193     { ".FARADDR",       TOK_FARADDR     },
 194     { ".FEATURE",       TOK_FEATURE     },
 195     { ".FILEOPT",       TOK_FILEOPT     },
 196     { ".FOPT",          TOK_FILEOPT     },
 197     { ".FORCEIMPORT",   TOK_FORCEIMPORT },
 198     { ".FORCEWORD",     TOK_FORCEWORD   },
 199     { ".GLOBAL",        TOK_GLOBAL      },
 200     { ".GLOBALZP",      TOK_GLOBALZP    },
 201     { ".HIBYTE",        TOK_HIBYTE      },
 202     { ".HIWORD",        TOK_HIWORD      },
 203     { ".I16",           TOK_I16         },
 204     { ".I8",            TOK_I8          },
 205     { ".IDENT",         TOK_MAKEIDENT   },
 206     { ".IF",            TOK_IF          },
 207     { ".IFBLANK",       TOK_IFBLANK     },
 208     { ".IFCONST",       TOK_IFCONST     },
 209     { ".IFDEF",         TOK_IFDEF       },
 210     { ".IFNBLANK",      TOK_IFNBLANK    },
 211     { ".IFNCONST",      TOK_IFNCONST    },
 212     { ".IFNDEF",        TOK_IFNDEF      },
 213     { ".IFNREF",        TOK_IFNREF      },
 214     { ".IFP02",         TOK_IFP02       },
 215     { ".IFP816",        TOK_IFP816      },
 216     { ".IFPC02",        TOK_IFPC02      },
 217     { ".IFPSC02",       TOK_IFPSC02     },
 218     { ".IFREF",         TOK_IFREF       },
 219     { ".IMPORT",        TOK_IMPORT      },
 220     { ".IMPORTZP",      TOK_IMPORTZP    },
 221     { ".INCBIN",        TOK_INCBIN      },
 222     { ".INCLUDE",       TOK_INCLUDE     },
 223     { ".INTERRUPTOR",   TOK_INTERRUPTOR },
 224     { ".LEFT",          TOK_LEFT        },
 225     { ".LINECONT",      TOK_LINECONT    },
 226     { ".LIST",          TOK_LIST        },
 227     { ".LISTBYTES",     TOK_LISTBYTES   },
 228     { ".LOBYTE",        TOK_LOBYTE      },
 229     { ".LOCAL",         TOK_LOCAL       },
 230     { ".LOCALCHAR",     TOK_LOCALCHAR   },
 231     { ".LOWORD",        TOK_LOWORD      },
 232     { ".MAC",           TOK_MACRO       },
 233     { ".MACPACK",       TOK_MACPACK     },
 234     { ".MACRO",         TOK_MACRO       },
 235     { ".MATCH",         TOK_MATCH       },
 236     { ".MID",           TOK_MID         },
 237     { ".MOD",           TOK_MOD         },
 238     { ".NOT",           TOK_BOOLNOT     },
 239     { ".NULL",          TOK_NULL        },
 240     { ".OR",            TOK_BOOLOR      },
 241     { ".ORG",           TOK_ORG         },
 242     { ".OUT",           TOK_OUT         },
 243     { ".P02",           TOK_P02         },
 244     { ".P816",          TOK_P816        },
 245     { ".PAGELEN",       TOK_PAGELENGTH  },
 246     { ".PAGELENGTH",    TOK_PAGELENGTH  },
 247     { ".PARAMCOUNT",    TOK_PARAMCOUNT  },
 248     { ".PC02",          TOK_PC02        },
 249     { ".POPSEG",        TOK_POPSEG      },
 250     { ".PROC",          TOK_PROC        },
 251     { ".PSC02",         TOK_PSC02       },
 252     { ".PUSHSEG",       TOK_PUSHSEG     },
 253     { ".REF",           TOK_REFERENCED  },
 254     { ".REFERENCED",    TOK_REFERENCED  },
 255     { ".RELOC",         TOK_RELOC       },
 256     { ".REPEAT",        TOK_REPEAT      },
 257     { ".RES",           TOK_RES         },
 258     { ".RIGHT",         TOK_RIGHT       },
 259     { ".RODATA",        TOK_RODATA      },
 260     { ".SCOPE",         TOK_SCOPE       },
 261     { ".SEGMENT",       TOK_SEGMENT     },
 262     { ".SET",           TOK_SET         },
 263     { ".SETCPU",        TOK_SETCPU      },
 264     { ".SHL",           TOK_SHL         },
 265     { ".SHR",           TOK_SHR         },
 266     { ".SIZEOF",        TOK_SIZEOF      },
 267     { ".SMART",         TOK_SMART       },
 268     { ".SPRINTF",       TOK_SPRINTF     },
 269     { ".STRAT",         TOK_STRAT       },
 270     { ".STRING",        TOK_STRING      },
 271     { ".STRLEN",        TOK_STRLEN      },
 272     { ".STRUCT",        TOK_STRUCT      },
 273     { ".SUNPLUS",       TOK_SUNPLUS     },
 274     { ".TAG",           TOK_TAG         },
 275     { ".TCOUNT",        TOK_TCOUNT      },
 276     { ".TIME",          TOK_TIME        },
 277     { ".UNION",         TOK_UNION       },
 278     { ".VERSION",       TOK_VERSION     },
 279     { ".WARNING",       TOK_WARNING     },
 280     { ".WORD",          TOK_WORD        },
 281     { ".XMATCH",        TOK_XMATCH      },
 282     { ".XOR",           TOK_BOOLXOR     },
 283     { ".ZEROPAGE",      TOK_ZEROPAGE    },
 284 };
 285
 286
 287
 288 /*****************************************************************************/
 289 /*                            CharSource functions                           */
 290 /*****************************************************************************/
 291
 292
 293
 294 static void UseCharSource (CharSource* S)
 295 /* Initialize a new input source and start to use it. */
 296 {
 297     /* Remember the current input char and token */
 298     S->Tok      = Tok;
 299     S->C        = C;
 300
 301     /* Use the new input source */
 302     S->Next     = Source;
 303     Source      = S;
 304
 305     /* Read the first character from the new file */
 306     S->Func->NextChar (S);
 307
 308     /* Setup the next token so it will be skipped on the next call to
 309      * NextRawTok().
 310      */
 311     Tok = TOK_SEP;
 312 }
 313
 314
 315
 316 static void DoneCharSource (void)
 317 /* Close the top level character source */
 318 {
 319     CharSource* S;
 320
 321     /* First, call the type specific function */
 322     Source->Func->Done (Source);
 323
 324     /* Restore the old token */
 325     Tok = Source->Tok;
 326     C   = Source->C;
 327
 328     /* Remember the last stacked input source */
 329     S = Source->Next;
 330
 331     /* Delete the top level one ... */
 332     xfree (Source);
 333
 334     /* ... and use the one before */
 335     Source = S;
 336 }
 337
 338
 339
 340 /*****************************************************************************/
 341 /*                            InputFile functions                            */
 342 /*****************************************************************************/
 343
 344
 345
 346 static void IFMarkStart (CharSource* S)
 347 /* Mark the start of the next token */
 348 {
 349     CurPos = S->V.File.Pos;
 350 }
 351
 352
 353
 354 static void IFNextChar (CharSource* S)
 355 /* Read the next character from the input file */
 356 {
 357     /* Check for end of line, read the next line if needed */
 358     while (S->V.File.Line [S->V.File.Pos.Col] == '\0') {
 359
 360         unsigned Len, Removed;
 361
 362         /* End of current line reached, read next line */
 363         if (fgets (S->V.File.Line, sizeof (S->V.File.Line), S->V.File.F) == 0) {
 364             /* End of file. Add an empty line to the listing. This is a
 365              * small hack needed to keep the PC output in sync.
 366              */
 367             NewListingLine ("", S->V.File.Pos.Name, FCount);
 368             C = EOF;
 369             return;
 370         }
 371
 372         /* For better handling of files with unusual line endings (DOS
 373          * files that are accidently translated on Unix for example),
 374          * first remove all whitespace at the end, then add a single
 375          * newline.
 376          */
 377         Len = strlen (S->V.File.Line);
 378         Removed = 0;
 379         while (Len > 0 && IsSpace (S->V.File.Line[Len-1])) {
 380             ++Removed;
 381             --Len;
 382         }
 383         if (Removed) {
 384             S->V.File.Line[Len+0] = '\n';
 385             S->V.File.Line[Len+1] = '\0';
 386         }
 387
 388         /* One more line */
 389         S->V.File.Pos.Line++;
 390         S->V.File.Pos.Col = 0;
 391
 392         /* Remember the new line for the listing */
 393         NewListingLine (S->V.File.Line, S->V.File.Pos.Name, FCount);
 394
 395     }
 396
 397     /* Return the next character from the file */
 398     C = S->V.File.Line [S->V.File.Pos.Col++];
 399 }
 400
 401
 402
 403 void IFDone (CharSource* S)
 404 /* Close the current input file */
 405 {
 406     /* We're at the end of an include file. Check if we have any
 407      * open .IFs, or any open token lists in this file. This
 408      * enforcement is artificial, using conditionals that start
 409      * in one file and end in another are uncommon, and don't
 410      * allowing these things will help finding errors.
 411      */
 412     CheckOpenIfs ();
 413
 414     /* Close the input file and decrement the file count. We will ignore
 415      * errors here, since we were just reading from the file.
 416      */
 417     (void) fclose (S->V.File.F);
 418     --FCount;
 419 }
 420
 421
 422
 423 /* Set of input file handling functions */
 424 static const CharSourceFunctions IFFunc = {
 425     IFMarkStart,
 426     IFNextChar,
 427     IFDone
 428 };
 429
 430
 431
 432 void NewInputFile (const char* Name)
 433 /* Open a new input file */
 434 {
 435     char* PathName = 0;
 436
 437     /* First try to open the file */
 438     FILE* F = fopen (Name, "r");
 439     if (F == 0) {
 440
 441         /* Error (fatal error if this is the main file) */
 442         if (FCount == 0) {
 443             Fatal ("Cannot open input file `%s': %s", Name, strerror (errno));
 444         }
 445
 446         /* We are on include level. Search for the file in the include
 447          * directories.
 448          */
 449         PathName = FindInclude (Name);
 450         if (PathName == 0 || (F = fopen (PathName, "r")) == 0) {
 451             /* Not found or cannot open, print an error and bail out */
 452             Error ("Cannot open include file `%s': %s", Name, strerror (errno));
 453         }
 454
 455         /* Use the path name from now on */
 456         Name = PathName;
 457     }
 458
 459     /* check again if we do now have an open file */
 460     if (F != 0) {
 461
 462         StrBuf          NameBuf;
 463         unsigned        FileIdx;
 464         CharSource*     S;
 465
 466         /* Stat the file and remember the values. There a race condition here,
 467          * since we cannot use fileno() (non standard identifier in standard
 468          * header file), and therefore not fstat. When using stat with the
 469          * file name, there's a risk that the file was deleted and recreated
 470          * while it was open. Since mtime and size are only used to check
 471          * if a file has changed in the debugger, we will ignore this problem
 472          * here.
 473          */
 474         struct stat Buf;
 475         if (stat (Name, &Buf) != 0) {
 476             Fatal ("Cannot stat input file `%s': %s", Name, strerror (errno));
 477         }
 478
 479         /* Add the file to the input file table and remember the index */
 480         FileIdx = AddFile (SB_InitFromString (&NameBuf, Name), Buf.st_size, Buf.st_mtime);
 481
 482         /* Create a new input source variable and initialize it */
 483         S                   = xmalloc (sizeof (*S));
 484         S->Func             = &IFFunc;
 485         S->V.File.F         = F;
 486         S->V.File.Pos.Line  = 0;
 487         S->V.File.Pos.Col   = 0;
 488         S->V.File.Pos.Name  = FileIdx;
 489         S->V.File.Line[0]   = '\0';
 490
 491         /* Count active input files */
 492         ++FCount;
 493
 494         /* Use this input source */
 495         UseCharSource (S);
 496     }
 497
 498     /* Free an allocated name buffer */
 499     xfree (PathName);
 500 }
 501
 502
 503
 504 /*****************************************************************************/
 505 /*                            InputData functions                            */
 506 /*****************************************************************************/
 507
 508
 509
 510 static void IDMarkStart (CharSource* S attribute ((unused)))
 511 /* Mark the start of the next token */
 512 {
 513     /* Nothing to do here */
 514 }
 515
 516
 517
 518 static void IDNextChar (CharSource* S)
 519 /* Read the next character from the input text */
 520 {
 521     C = *S->V.Data.Pos++;
 522     if (C == '\0') {
 523         /* End of input data */
 524         --S->V.Data.Pos;
 525         C = EOF;
 526     }
 527 }
 528
 529
 530
 531 void IDDone (CharSource* S)
 532 /* Close the current input data */
 533 {
 534     /* Cleanup the current stuff */
 535     if (S->V.Data.Malloced) {
 536         xfree (S->V.Data.Text);
 537     }
 538 }
 539
 540
 541
 542 /* Set of input data handling functions */
 543 static const CharSourceFunctions IDFunc = {
 544     IDMarkStart,
 545     IDNextChar,
 546     IDDone
 547 };
 548
 549
 550
 551 void NewInputData (char* Text, int Malloced)
 552 /* Add a chunk of input data to the input stream */
 553 {
 554     CharSource* S;
 555
 556     /* Create a new input source variable and initialize it */
 557     S                   = xmalloc (sizeof (*S));
 558     S->Func             = &IDFunc;
 559     S->V.Data.Text      = Text;
 560     S->V.Data.Pos       = Text;
 561     S->V.Data.Malloced  = Malloced;
 562
 563     /* Use this input source */
 564     UseCharSource (S);
 565 }
 566
 567
 568
 569 /*****************************************************************************/
 570 /*                    Character classification functions                     */
 571 /*****************************************************************************/
 572
 573
 574
 575 int IsIdChar (int C)
 576 /* Return true if the character is a valid character for an identifier */
 577 {
 578     return IsAlNum (C)                  ||
 579            (C == '_')                   ||
 580            (C == '@' && AtInIdents)     ||
 581            (C == '$' && DollarInIdents);
 582 }
 583
 584
 585
 586 int IsIdStart (int C)
 587 /* Return true if the character may start an identifier */
 588 {
 589     return IsAlpha (C) || C == '_';
 590 }
 591
 592
 593
 594 /*****************************************************************************/
 595 /*                                   Code                                    */
 596 /*****************************************************************************/
 597
 598
 599
 600 static unsigned DigitVal (unsigned char C)
 601 /* Convert a digit into it's numerical representation */
 602 {
 603     if (IsDigit (C)) {
 604         return C - '0';
 605     } else {
 606         return toupper (C) - 'A' + 10;
 607     }
 608 }
 609
 610
 611
 612 static void NextChar (void)
 613 /* Read the next character from the input file */
 614 {
 615     Source->Func->NextChar (Source);
 616 }
 617
 618
 619
 620 void LocaseSVal (void)
 621 /* Make SVal lower case */
 622 {
 623     SB_ToLower (&SVal);
 624 }
 625
 626
 627
 628 void UpcaseSVal (void)
 629 /* Make SVal upper case */
 630 {
 631     SB_ToUpper (&SVal);
 632 }
 633
 634
 635
 636 static int CmpDotKeyword (const void* K1, const void* K2)
 637 /* Compare function for the dot keyword search */
 638 {
 639     return strcmp (((struct DotKeyword*)K1)->Key, ((struct DotKeyword*)K2)->Key);
 640 }
 641
 642
 643
 644 static unsigned char FindDotKeyword (void)
 645 /* Find the dot keyword in SVal. Return the corresponding token if found,
 646  * return TOK_NONE if not found.
 647  */
 648 {
 649     struct DotKeyword K;
 650     struct DotKeyword* R;
 651
 652     /* Initialize K */
 653     K.Key = SB_GetConstBuf (&SVal);
 654     K.Tok = 0;
 655
 656     /* If we aren't in ignore case mode, we have to uppercase the keyword */
 657     if (!IgnoreCase) {
 658         UpcaseSVal ();
 659     }
 660
 661     /* Search for the keyword */
 662     R = bsearch (&K, DotKeywords, sizeof (DotKeywords) / sizeof (DotKeywords [0]),
 663                  sizeof (DotKeywords [0]), CmpDotKeyword);
 664     if (R != 0) {
 665         return R->Tok;
 666     } else {
 667         return TOK_NONE;
 668     }
 669 }
 670
 671
 672
 673 static void ReadIdent (void)
 674 /* Read an identifier from the current input position into Ident. Filling SVal
 675  * starts at the current position with the next character in C. It is assumed
 676  * that any characters already filled in are ok, and the character in C is
 677  * checked.
 678  */
 679 {
 680     /* Read the identifier */
 681     do {
 682         SB_AppendChar (&SVal, C);
 683         NextChar ();
 684     } while (IsIdChar (C));
 685     SB_Terminate (&SVal);
 686
 687     /* If we should ignore case, convert the identifier to upper case */
 688     if (IgnoreCase) {
 689         UpcaseSVal ();
 690     }
 691 }
 692
 693
 694
 695 static void ReadStringConst (int StringTerm)
 696 /* Read a string constant into SVal. */
 697 {
 698     /* Skip the leading string terminator */
 699     NextChar ();
 700
 701     /* Read the string */
 702     while (1) {
 703         if (C == StringTerm) {
 704             break;
 705         }
 706         if (C == '\n' || C == EOF) {
 707             Error ("Newline in string constant");
 708             break;
 709         }
 710
 711         /* Append the char to the string */
 712         SB_AppendChar (&SVal, C);
 713
 714         /* Skip the character */
 715         NextChar ();
 716     }
 717
 718     /* Skip the trailing terminator */
 719     NextChar ();
 720
 721     /* Terminate the string */
 722     SB_Terminate (&SVal);
 723 }
 724
 725
 726
 727 static int Sweet16Reg (const StrBuf* Id)
 728 /* Check if the given identifier is a sweet16 register. Return -1 if this is
 729  * not the case, return the register number otherwise.
 730  */
 731 {
 732     unsigned RegNum;
 733     char Check;
 734
 735     if (SB_GetLen (Id) < 2) {
 736         return -1;
 737     }
 738     if (toupper (SB_AtUnchecked (Id, 0)) != 'R') {
 739         return -1;
 740     }
 741     if (!IsDigit (SB_AtUnchecked (Id, 1))) {
 742         return -1;
 743     }
 744
 745     if (sscanf (SB_GetConstBuf (Id)+1, "%u%c", &RegNum, &Check) != 1 || RegNum > 15) {
 746         /* Invalid register */
 747         return -1;
 748     }
 749
 750     /* The register number is valid */
 751     return (int) RegNum;
 752 }
 753
 754
 755
 756 void NextRawTok (void)
 757 /* Read the next raw token from the input stream */
 758 {
 759     /* If we've a forced end of assembly, don't read further */
 760     if (ForcedEnd) {
 761         Tok = TOK_EOF;
 762         return;
 763     }
 764
 765 Restart:
 766     /* Check if we have tokens from another input source */
 767     if (InputFromStack ()) {
 768         return;
 769     }
 770
 771 Again:
 772     /* Skip whitespace, remember if we had some */
 773     if ((WS = IsBlank (C)) != 0) {
 774         do {
 775             NextChar ();
 776         } while (IsBlank (C));
 777     }
 778
 779     /* Mark the file position of the next token */
 780     Source->Func->MarkStart (Source);
 781
 782     /* Clear the string attribute */
 783     SB_Clear (&SVal);
 784
 785     /* Hex number or PC symbol? */
 786     if (C == '$') {
 787         NextChar ();
 788
 789         /* Hex digit must follow or DollarIsPC must be enabled */
 790         if (!IsXDigit (C)) {
 791             if (DollarIsPC) {
 792                 Tok = TOK_PC;
 793                 return;
 794             } else {
 795                 Error ("Hexadecimal digit expected");
 796             }
 797         }
 798
 799         /* Read the number */
 800         IVal = 0;
 801         while (IsXDigit (C)) {
 802             if (IVal & 0xF0000000) {
 803                 Error ("Overflow in hexadecimal number");
 804                 IVal = 0;
 805             }
 806             IVal = (IVal << 4) + DigitVal (C);
 807             NextChar ();
 808         }
 809
 810         /* This is an integer constant */
 811         Tok = TOK_INTCON;
 812         return;
 813     }
 814
 815     /* Binary number? */
 816     if (C == '%') {
 817         NextChar ();
 818
 819         /* 0 or 1 must follow */
 820         if (!IsBDigit (C)) {
 821             Error ("Binary digit expected");
 822         }
 823
 824         /* Read the number */
 825         IVal = 0;
 826         while (IsBDigit (C)) {
 827             if (IVal & 0x80000000) {
 828                 Error ("Overflow in binary number");
 829                 IVal = 0;
 830             }
 831             IVal = (IVal << 1) + DigitVal (C);
 832             NextChar ();
 833         }
 834
 835         /* This is an integer constant */
 836         Tok = TOK_INTCON;
 837         return;
 838     }
 839
 840     /* Number? */
 841     if (IsDigit (C)) {
 842
 843         char Buf[16];
 844         unsigned Digits;
 845         unsigned Base;
 846         unsigned I;
 847         long     Max;
 848         unsigned DVal;
 849
 850         /* Ignore leading zeros */
 851         while (C == '0') {
 852             NextChar ();
 853         }
 854
 855         /* Read the number into Buf counting the digits */
 856         Digits = 0;
 857         while (IsXDigit (C)) {
 858
 859             /* Buf is big enough to allow any decimal and hex number to
 860              * overflow, so ignore excess digits here, they will be detected
 861              * when we convert the value.
 862              */
 863             if (Digits < sizeof (Buf)) {
 864                 Buf[Digits++] = C;
 865             }
 866
 867             NextChar ();
 868         }
 869
 870         /* Allow zilog/intel style hex numbers with a 'h' suffix */
 871         if (C == 'h' || C == 'H') {
 872             NextChar ();
 873             Base = 16;
 874             Max  = 0xFFFFFFFFUL / 16;
 875         } else {
 876             Base = 10;
 877             Max  = 0xFFFFFFFFUL / 10;
 878         }
 879
 880         /* Convert the number using the given base */
 881         IVal = 0;
 882         for (I = 0; I < Digits; ++I) {
 883             if (IVal > Max) {
 884                 Error ("Number out of range");
 885                 IVal = 0;
 886                 break;
 887             }
 888             DVal = DigitVal (Buf[I]);
 889             if (DVal > Base) {
 890                 Error ("Invalid digits in number");
 891                 IVal = 0;
 892                 break;
 893             }
 894             IVal = (IVal * Base) + DVal;
 895         }
 896
 897         /* This is an integer constant */
 898         Tok = TOK_INTCON;
 899         return;
 900     }
 901
 902     /* Control command? */
 903     if (C == '.') {
 904
 905         /* Remember and skip the dot */
 906         NextChar ();
 907
 908         /* Check if it's just a dot */
 909         if (!IsIdStart (C)) {
 910
 911             /* Just a dot */
 912             Tok = TOK_DOT;
 913
 914         } else {
 915
 916             /* Read the remainder of the identifier */
 917             SB_AppendChar (&SVal, '.');
 918             ReadIdent ();
 919
 920             /* Dot keyword, search for it */
 921             Tok = FindDotKeyword ();
 922             if (Tok == TOK_NONE) {
 923
 924                 /* Not found */
 925                 if (!LeadingDotInIdents) {
 926                     /* Invalid pseudo instruction */
 927                     Error ("`%m%p' is not a recognized control command", &SVal);
 928                     goto Again;
 929                 }
 930
 931                 /* An identifier with a dot. Check if it's a define style
 932                  * macro.
 933                  */
 934                 if (IsDefine (&SVal)) {
 935                     /* This is a define style macro - expand it */
 936                     MacExpandStart ();
 937                     goto Restart;
 938                 }
 939
 940                 /* Just an identifier with a dot */
 941                 Tok = TOK_IDENT;
 942             }
 943
 944         }
 945         return;
 946     }
 947
 948     /* Indirect op for sweet16 cpu. Must check this before checking for local
 949      * symbols, because these may also use the '@' symbol.
 950      */
 951     if (CPU == CPU_SWEET16 && C == '@') {
 952         NextChar ();
 953         Tok = TOK_AT;
 954         return;
 955     }
 956
 957     /* Local symbol? */
 958     if (C == LocalStart) {
 959
 960         /* Read the identifier. */
 961         ReadIdent ();
 962
 963         /* Start character alone is not enough */
 964         if (SB_GetLen (&SVal) == 1) {
 965             Error ("Invalid cheap local symbol");
 966             goto Again;
 967         }
 968
 969         /* A local identifier */
 970         Tok = TOK_LOCAL_IDENT;
 971         return;
 972     }
 973
 974
 975     /* Identifier or keyword? */
 976     if (IsIdStart (C)) {
 977
 978         /* Read the identifier */
 979         ReadIdent ();
 980
 981         /* Check for special names. Bail out if we have identified the type of
 982          * the token. Go on if the token is an identifier.
 983          */
 984         if (SB_GetLen (&SVal) == 1) {
 985             switch (toupper (SB_AtUnchecked (&SVal, 0))) {
 986
 987                 case 'A':
 988                     if (C == ':') {
 989                         NextChar ();
 990                         Tok = TOK_OVERRIDE_ABS;
 991                     } else {
 992                         Tok = TOK_A;
 993                     }
 994                     return;
 995
 996                 case 'F':
 997                     if (C == ':') {
 998                         NextChar ();
 999                         Tok = TOK_OVERRIDE_FAR;
1000                         return;
1001                     }
1002                     break;
1003
1004                 case 'S':
1005                     Tok = TOK_S;
1006                     return;
1007
1008                 case 'X':
1009                     Tok = TOK_X;
1010                     return;
1011
1012                 case 'Y':
1013                     Tok = TOK_Y;
1014                     return;
1015
1016                 case 'Z':
1017                     if (C == ':') {
1018                         NextChar ();
1019                         Tok = TOK_OVERRIDE_ZP;
1020                         return;
1021                     }
1022                     break;
1023
1024                 default:
1025                     break;
1026             }
1027
1028         } else if (CPU == CPU_SWEET16 && (IVal = Sweet16Reg (&SVal)) >= 0) {
1029
1030             /* A sweet16 register number in sweet16 mode */
1031             Tok = TOK_REG;
1032             return;
1033
1034         }
1035
1036         /* Check for define style macro */
1037         if (IsDefine (&SVal)) {
1038             /* Macro - expand it */
1039             MacExpandStart ();
1040             goto Restart;
1041         } else {
1042             /* An identifier */
1043             Tok = TOK_IDENT;
1044         }
1045         return;
1046     }
1047
1048     /* Ok, let's do the switch */
1049 CharAgain:
1050     switch (C) {
1051
1052         case '+':
1053             NextChar ();
1054             Tok = TOK_PLUS;
1055             return;
1056
1057         case '-':
1058             NextChar ();
1059             Tok = TOK_MINUS;
1060             return;
1061
1062         case '/':
1063             NextChar ();
1064             if (C != '*') {
1065                 Tok = TOK_DIV;
1066             } else {
1067                 /* Remember the position, then skip the '*' */
1068                 FilePos Pos = CurPos;
1069                 NextChar ();
1070                 do {
1071                     while (C !=  '*') {
1072                         if (C == EOF) {
1073                             PError (&Pos, "Unterminated comment");
1074                             goto Again;
1075                         }
1076                         NextChar ();
1077                     }
1078                     NextChar ();
1079                 } while (C != '/');
1080                 NextChar ();
1081                 goto Again;
1082             }
1083             return;
1084
1085         case '*':
1086             NextChar ();
1087             Tok = TOK_MUL;
1088             return;
1089
1090         case '^':
1091             NextChar ();
1092             Tok = TOK_XOR;
1093             return;
1094
1095         case '&':
1096             NextChar ();
1097             if (C == '&') {
1098                 NextChar ();
1099                 Tok = TOK_BOOLAND;
1100             } else {
1101                 Tok = TOK_AND;
1102             }
1103             return;
1104
1105         case '|':
1106             NextChar ();
1107             if (C == '|') {
1108                 NextChar ();
1109                 Tok = TOK_BOOLOR;
1110             } else {
1111                 Tok = TOK_OR;
1112             }
1113             return;
1114
1115         case ':':
1116             NextChar ();
1117             switch (C) {
1118
1119                 case ':':
1120                     NextChar ();
1121                     Tok = TOK_NAMESPACE;
1122                     break;
1123
1124                 case '-':
1125                     IVal = 0;
1126                     do {
1127                         --IVal;
1128                         NextChar ();
1129                     } while (C == '-');
1130                     Tok = TOK_ULABEL;
1131                     break;
1132
1133                 case '+':
1134                     IVal = 0;
1135                     do {
1136                         ++IVal;
1137                         NextChar ();
1138                     } while (C == '+');
1139                     Tok = TOK_ULABEL;
1140                     break;
1141
1142                 case '=':
1143                     NextChar ();
1144                     Tok = TOK_ASSIGN;
1145                     break;
1146
1147                 default:
1148                     Tok = TOK_COLON;
1149                     break;
1150             }
1151             return;
1152
1153         case ',':
1154             NextChar ();
1155             Tok = TOK_COMMA;
1156             return;
1157
1158         case ';':
1159             NextChar ();
1160             while (C != '\n' && C != EOF) {
1161                 NextChar ();
1162             }
1163             goto CharAgain;
1164
1165         case '#':
1166             NextChar ();
1167             Tok = TOK_HASH;
1168             return;
1169
1170         case '(':
1171             NextChar ();
1172             Tok = TOK_LPAREN;
1173             return;
1174
1175         case ')':
1176             NextChar ();
1177             Tok = TOK_RPAREN;
1178             return;
1179
1180         case '[':
1181             NextChar ();
1182             Tok = TOK_LBRACK;
1183             return;
1184
1185         case ']':
1186             NextChar ();
1187             Tok = TOK_RBRACK;
1188             return;
1189
1190         case '{':
1191             NextChar ();
1192             Tok = TOK_LCURLY;
1193             return;
1194
1195         case '}':
1196             NextChar ();
1197             Tok = TOK_RCURLY;
1198             return;
1199
1200         case '<':
1201             NextChar ();
1202             if (C == '=') {
1203                 NextChar ();
1204                 Tok = TOK_LE;
1205             } else if (C == '<') {
1206                 NextChar ();
1207                 Tok = TOK_SHL;
1208             } else if (C == '>') {
1209                 NextChar ();
1210                 Tok = TOK_NE;
1211             } else {
1212                 Tok = TOK_LT;
1213             }
1214             return;
1215
1216         case '=':
1217             NextChar ();
1218             Tok = TOK_EQ;
1219             return;
1220
1221         case '!':
1222             NextChar ();
1223             Tok = TOK_BOOLNOT;
1224             return;
1225
1226         case '>':
1227             NextChar ();
1228             if (C == '=') {
1229                 NextChar ();
1230                 Tok = TOK_GE;
1231             } else if (C == '>') {
1232                 NextChar ();
1233                 Tok = TOK_SHR;
1234             } else {
1235                 Tok = TOK_GT;
1236             }
1237             return;
1238
1239         case '~':
1240             NextChar ();
1241             Tok = TOK_NOT;
1242             return;
1243
1244         case '\'':
1245             /* Hack: If we allow ' as terminating character for strings, read
1246              * the following stuff as a string, and check for a one character
1247              * string later.
1248              */
1249             if (LooseStringTerm) {
1250                 ReadStringConst ('\'');
1251                 if (SB_GetLen (&SVal) == 1) {
1252                     IVal = SB_AtUnchecked (&SVal, 0);
1253                     Tok = TOK_CHARCON;
1254                 } else {
1255                     Tok = TOK_STRCON;
1256                 }
1257             } else {
1258                 /* Always a character constant */
1259                 NextChar ();
1260                 if (C == EOF || IsControl (C)) {
1261                     Error ("Illegal character constant");
1262                     goto CharAgain;
1263                 }
1264                 IVal = C;
1265                 Tok = TOK_CHARCON;
1266                 NextChar ();
1267                 if (C != '\'') {
1268                     if (!MissingCharTerm) {
1269                         Error ("Illegal character constant");
1270                     }
1271                 } else {
1272                     NextChar ();
1273                 }
1274             }
1275             return;
1276
1277         case '\"':
1278             ReadStringConst ('\"');
1279             Tok = TOK_STRCON;
1280             return;
1281
1282         case '\\':
1283             /* Line continuation? */
1284             if (LineCont) {
1285                 NextChar ();
1286                 if (C == '\n') {
1287                     /* Handle as white space */
1288                     NextChar ();
1289                     C = ' ';
1290                     goto Again;
1291                 }
1292             }
1293             break;
1294
1295         case '\n':
1296             NextChar ();
1297             Tok = TOK_SEP;
1298             return;
1299
1300         case EOF:
1301             CheckInputStack ();
1302             /* In case of the main file, do not close it, but return EOF. */
1303             if (Source && Source->Next) {
1304                 DoneCharSource ();
1305                 goto Again;
1306             } else {
1307                 Tok = TOK_EOF;
1308             }
1309             return;
1310     }
1311
1312     /* If we go here, we could not identify the current character. Skip it
1313      * and try again.
1314      */
1315     Error ("Invalid input character: 0x%02X", C & 0xFF);
1316     NextChar ();
1317     goto Again;
1318 }
1319
1320
1321
1322 int GetSubKey (const char** Keys, unsigned Count)
1323 /* Search for a subkey in a table of keywords. The current token must be an
1324  * identifier and all keys must be in upper case. The identifier will be
1325  * uppercased in the process. The function returns the index of the keyword,
1326  * or -1 if the keyword was not found.
1327  */
1328 {
1329     unsigned I;
1330
1331     /* Must have an identifier */
1332     PRECONDITION (Tok == TOK_IDENT);
1333
1334     /* If we aren't in ignore case mode, we have to uppercase the identifier */
1335     if (!IgnoreCase) {
1336         UpcaseSVal ();
1337     }
1338
1339     /* Do a linear search (a binary search is not worth the effort) */
1340     for (I = 0; I < Count; ++I) {
1341         if (SB_CompareStr (&SVal, Keys [I]) == 0) {
1342             /* Found it */
1343             return I;
1344         }
1345     }
1346
1347     /* Not found */
1348     return -1;
1349 }
1350
1351
1352
1353 unsigned char ParseAddrSize (void)
1354 /* Check if the next token is a keyword that denotes an address size specifier.
1355  * If so, return the corresponding address size constant, otherwise output an
1356  * error message and return ADDR_SIZE_DEFAULT.
1357  */
1358 {
1359     static const char* Keys[] = {
1360         "DIRECT", "ZEROPAGE", "ZP",
1361         "ABSOLUTE", "ABS", "NEAR",
1362         "FAR",
1363         "LONG", "DWORD",
1364     };
1365
1366     /* Check for an identifier */
1367     if (Tok != TOK_IDENT) {
1368         Error ("Address size specifier expected");
1369         return ADDR_SIZE_DEFAULT;
1370     }
1371
1372     /* Search for the attribute */
1373     switch (GetSubKey (Keys, sizeof (Keys) / sizeof (Keys [0]))) {
1374         case 0:
1375         case 1:
1376         case 2: return ADDR_SIZE_ZP;
1377         case 3:
1378         case 4:
1379         case 5: return ADDR_SIZE_ABS;
1380         case 6: return ADDR_SIZE_FAR;
1381         case 7:
1382         case 8: return ADDR_SIZE_LONG;
1383         default:
1384             Error ("Address size specifier expected");
1385             return ADDR_SIZE_DEFAULT;
1386     }
1387 }
1388
1389
1390
1391 void InitScanner (const char* InFile)
1392 /* Initialize the scanner, open the given input file */
1393 {
1394     /* Open the input file */
1395     NewInputFile (InFile);
1396 }
1397
1398
1399
1400 void DoneScanner (void)
1401 /* Release scanner resources */
1402 {
1403     DoneCharSource ();
1404 }
1405
1406
1407