git.sur5r.net Git - cc65/blob - src/ca65/scanner.c

   1 /*****************************************************************************/
   2 /*                                                                           */
   3 /*                                 scanner.c                                 */
   4 /*                                                                           */
   5 /*                  The scanner for the ca65 macroassembler                  */
   6 /*                                                                           */
   7 /*                                                                           */
   8 /*                                                                           */
   9 /* (C) 1998-2010, Ullrich von Bassewitz                                      */
  10 /*                Roemerstrasse 52                                           */
  11 /*                D-70794 Filderstadt                                        */
  12 /* EMail:         uz@cc65.org                                                */
  13 /*                                                                           */
  14 /*                                                                           */
  15 /* This software is provided 'as-is', without any expressed or implied       */
  16 /* warranty.  In no event will the authors be held liable for any damages    */
  17 /* arising from the use of this software.                                    */
  18 /*                                                                           */
  19 /* Permission is granted to anyone to use this software for any purpose,     */
  20 /* including commercial applications, and to alter it and redistribute it    */
  21 /* freely, subject to the following restrictions:                            */
  22 /*                                                                           */
  23 /* 1. The origin of this software must not be misrepresented; you must not   */
  24 /*    claim that you wrote the original software. If you use this software   */
  25 /*    in a product, an acknowledgment in the product documentation would be  */
  26 /*    appreciated but is not required.                                       */
  27 /* 2. Altered source versions must be plainly marked as such, and must not   */
  28 /*    be misrepresented as being the original software.                      */
  29 /* 3. This notice may not be removed or altered from any source              */
  30 /*    distribution.                                                          */
  31 /*                                                                           */
  32 /*****************************************************************************/
  33
  34
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <ctype.h>
  40 #include <errno.h>
  41 #include <sys/types.h>          /* EMX needs this */
  42 #include <sys/stat.h>
  43
  44 /* common */
  45 #include "addrsize.h"
  46 #include "attrib.h"
  47 #include "chartype.h"
  48 #include "check.h"
  49 #include "fname.h"
  50 #include "xmalloc.h"
  51
  52 /* ca65 */
  53 #include "condasm.h"
  54 #include "error.h"
  55 #include "filetab.h"
  56 #include "global.h"
  57 #include "incpath.h"
  58 #include "instr.h"
  59 #include "istack.h"
  60 #include "listing.h"
  61 #include "macro.h"
  62 #include "toklist.h"
  63 #include "scanner.h"
  64
  65
  66
  67 /*****************************************************************************/
  68 /*                                   Data                                    */
  69 /*****************************************************************************/
  70
  71
  72
  73 Token Tok = TOK_NONE;                   /* Current token */
  74 int WS;                                 /* Flag: Whitespace before token */
  75 long IVal;                              /* Integer token attribute */
  76 StrBuf SVal = STATIC_STRBUF_INITIALIZER;/* String token attribute */
  77
  78 FilePos CurPos = { 0, 0, 0 };           /* Name and position in current file */
  79
  80
  81
  82 /* Struct to handle include files. */
  83 typedef struct InputFile InputFile;
  84 struct InputFile {
  85     FILE*           F;                  /* Input file descriptor */
  86     FilePos         Pos;                /* Position in file */
  87     Token           Tok;                /* Last token */
  88     int             C;                  /* Last character */
  89     char            Line[256];          /* The current input line */
  90     int             IncSearchPath;      /* True if we've added a search path */
  91     int             BinSearchPath;      /* True if we've added a search path */
  92     InputFile*      Next;               /* Linked list of input files */
  93 };
  94
  95 /* Struct to handle textual input data */
  96 typedef struct InputData InputData;
  97 struct InputData {
  98     char*           Text;               /* Pointer to the text data */
  99     const char*     Pos;                /* Pointer to current position */
 100     int             Malloced;           /* Memory was malloced */
 101     Token           Tok;                /* Last token */
 102     int             C;                  /* Last character */
 103     InputData*      Next;               /* Linked list of input data */
 104 };
 105
 106 /* Input source: Either file or data */
 107 typedef struct CharSource CharSource;
 108
 109 /* Set of input functions */
 110 typedef struct CharSourceFunctions CharSourceFunctions;
 111 struct CharSourceFunctions {
 112     void (*MarkStart) (CharSource*);    /* Mark the start pos of a token */
 113     void (*NextChar) (CharSource*);     /* Read next char from input */
 114     void (*Done) (CharSource*);         /* Close input source */
 115 };
 116
 117 /* Input source: Either file or data */
 118 struct CharSource {
 119     CharSource*                 Next;   /* Linked list of char sources */
 120     Token                       Tok;    /* Last token */
 121     int                         C;      /* Last character */
 122     const CharSourceFunctions*  Func;   /* Pointer to function table */
 123     union {
 124         InputFile               File;   /* File data */
 125         InputData               Data;   /* Textual data */
 126     }                           V;
 127 };
 128
 129 /* Current input variables */
 130 static CharSource* Source       = 0;    /* Current char source */
 131 static unsigned     FCount      = 0;    /* Count of input files */
 132 static int          C           = 0;    /* Current input character */
 133
 134 /* Force end of assembly */
 135 int               ForcedEnd     = 0;
 136
 137 /* List of dot keywords with the corresponding tokens */
 138 struct DotKeyword {
 139     const char* Key;                    /* MUST be first field */
 140     Token       Tok;
 141 } DotKeywords [] = {
 142     { ".A16",           TOK_A16         },
 143     { ".A8",            TOK_A8          },
 144     { ".ADDR",          TOK_ADDR        },
 145     { ".ALIGN",         TOK_ALIGN       },
 146     { ".AND",           TOK_BOOLAND     },
 147     { ".ASCIIZ",        TOK_ASCIIZ      },
 148     { ".ASSERT",        TOK_ASSERT      },
 149     { ".AUTOIMPORT",    TOK_AUTOIMPORT  },
 150     { ".BANKBYTE",      TOK_BANKBYTE    },
 151     { ".BANKBYTES",     TOK_BANKBYTES   },
 152     { ".BITAND",        TOK_AND         },
 153     { ".BITNOT",        TOK_NOT         },
 154     { ".BITOR",         TOK_OR          },
 155     { ".BITXOR",        TOK_XOR         },
 156     { ".BLANK",         TOK_BLANK       },
 157     { ".BSS",           TOK_BSS         },
 158     { ".BYT",           TOK_BYTE        },
 159     { ".BYTE",          TOK_BYTE        },
 160     { ".CASE",          TOK_CASE        },
 161     { ".CHARMAP",       TOK_CHARMAP     },
 162     { ".CODE",          TOK_CODE        },
 163     { ".CONCAT",        TOK_CONCAT      },
 164     { ".CONDES",        TOK_CONDES      },
 165     { ".CONST",         TOK_CONST       },
 166     { ".CONSTRUCTOR",   TOK_CONSTRUCTOR },
 167     { ".CPU",           TOK_CPU         },
 168     { ".DATA",          TOK_DATA        },
 169     { ".DBG",           TOK_DBG         },
 170     { ".DBYT",          TOK_DBYT        },
 171     { ".DEBUGINFO",     TOK_DEBUGINFO   },
 172     { ".DEF",           TOK_DEFINED     },
 173     { ".DEFINE",        TOK_DEFINE      },
 174     { ".DEFINED",       TOK_DEFINED     },
 175     { ".DESTRUCTOR",    TOK_DESTRUCTOR  },
 176     { ".DWORD",         TOK_DWORD       },
 177     { ".ELSE",          TOK_ELSE        },
 178     { ".ELSEIF",        TOK_ELSEIF      },
 179     { ".END",           TOK_END         },
 180     { ".ENDENUM",       TOK_ENDENUM     },
 181     { ".ENDIF",         TOK_ENDIF       },
 182     { ".ENDMAC",        TOK_ENDMACRO    },
 183     { ".ENDMACRO",      TOK_ENDMACRO    },
 184     { ".ENDPROC",       TOK_ENDPROC     },
 185     { ".ENDREP",        TOK_ENDREP      },
 186     { ".ENDREPEAT",     TOK_ENDREP      },
 187     { ".ENDSCOPE",      TOK_ENDSCOPE    },
 188     { ".ENDSTRUCT",     TOK_ENDSTRUCT   },
 189     { ".ENDUNION",      TOK_ENDUNION    },
 190     { ".ENUM",          TOK_ENUM        },
 191     { ".ERROR",         TOK_ERROR       },
 192     { ".EXITMAC",       TOK_EXITMACRO   },
 193     { ".EXITMACRO",     TOK_EXITMACRO   },
 194     { ".EXPORT",        TOK_EXPORT      },
 195     { ".EXPORTZP",      TOK_EXPORTZP    },
 196     { ".FARADDR",       TOK_FARADDR     },
 197     { ".FEATURE",       TOK_FEATURE     },
 198     { ".FILEOPT",       TOK_FILEOPT     },
 199     { ".FOPT",          TOK_FILEOPT     },
 200     { ".FORCEIMPORT",   TOK_FORCEIMPORT },
 201     { ".FORCEWORD",     TOK_FORCEWORD   },
 202     { ".GLOBAL",        TOK_GLOBAL      },
 203     { ".GLOBALZP",      TOK_GLOBALZP    },
 204     { ".HIBYTE",        TOK_HIBYTE      },
 205     { ".HIBYTES",       TOK_HIBYTES     },
 206     { ".HIWORD",        TOK_HIWORD      },
 207     { ".I16",           TOK_I16         },
 208     { ".I8",            TOK_I8          },
 209     { ".IDENT",         TOK_MAKEIDENT   },
 210     { ".IF",            TOK_IF          },
 211     { ".IFBLANK",       TOK_IFBLANK     },
 212     { ".IFCONST",       TOK_IFCONST     },
 213     { ".IFDEF",         TOK_IFDEF       },
 214     { ".IFNBLANK",      TOK_IFNBLANK    },
 215     { ".IFNCONST",      TOK_IFNCONST    },
 216     { ".IFNDEF",        TOK_IFNDEF      },
 217     { ".IFNREF",        TOK_IFNREF      },
 218     { ".IFP02",         TOK_IFP02       },
 219     { ".IFP816",        TOK_IFP816      },
 220     { ".IFPC02",        TOK_IFPC02      },
 221     { ".IFPSC02",       TOK_IFPSC02     },
 222     { ".IFREF",         TOK_IFREF       },
 223     { ".IMPORT",        TOK_IMPORT      },
 224     { ".IMPORTZP",      TOK_IMPORTZP    },
 225     { ".INCBIN",        TOK_INCBIN      },
 226     { ".INCLUDE",       TOK_INCLUDE     },
 227     { ".INTERRUPTOR",   TOK_INTERRUPTOR },
 228     { ".LEFT",          TOK_LEFT        },
 229     { ".LINECONT",      TOK_LINECONT    },
 230     { ".LIST",          TOK_LIST        },
 231     { ".LISTBYTES",     TOK_LISTBYTES   },
 232     { ".LOBYTE",        TOK_LOBYTE      },
 233     { ".LOBYTES",       TOK_LOBYTES     },
 234     { ".LOCAL",         TOK_LOCAL       },
 235     { ".LOCALCHAR",     TOK_LOCALCHAR   },
 236     { ".LOWORD",        TOK_LOWORD      },
 237     { ".MAC",           TOK_MACRO       },
 238     { ".MACPACK",       TOK_MACPACK     },
 239     { ".MACRO",         TOK_MACRO       },
 240     { ".MATCH",         TOK_MATCH       },
 241     { ".MAX",           TOK_MAX         },
 242     { ".MID",           TOK_MID         },
 243     { ".MIN",           TOK_MIN         },
 244     { ".MOD",           TOK_MOD         },
 245     { ".NOT",           TOK_BOOLNOT     },
 246     { ".NULL",          TOK_NULL        },
 247     { ".OR",            TOK_BOOLOR      },
 248     { ".ORG",           TOK_ORG         },
 249     { ".OUT",           TOK_OUT         },
 250     { ".P02",           TOK_P02         },
 251     { ".P816",          TOK_P816        },
 252     { ".PAGELEN",       TOK_PAGELENGTH  },
 253     { ".PAGELENGTH",    TOK_PAGELENGTH  },
 254     { ".PARAMCOUNT",    TOK_PARAMCOUNT  },
 255     { ".PC02",          TOK_PC02        },
 256     { ".POPCPU",        TOK_POPCPU      },
 257     { ".POPSEG",        TOK_POPSEG      },
 258     { ".PROC",          TOK_PROC        },
 259     { ".PSC02",         TOK_PSC02       },
 260     { ".PUSHCPU",       TOK_PUSHCPU     },
 261     { ".PUSHSEG",       TOK_PUSHSEG     },
 262     { ".REF",           TOK_REFERENCED  },
 263     { ".REFERENCED",    TOK_REFERENCED  },
 264     { ".RELOC",         TOK_RELOC       },
 265     { ".REPEAT",        TOK_REPEAT      },
 266     { ".RES",           TOK_RES         },
 267     { ".RIGHT",         TOK_RIGHT       },
 268     { ".RODATA",        TOK_RODATA      },
 269     { ".SCOPE",         TOK_SCOPE       },
 270     { ".SEGMENT",       TOK_SEGMENT     },
 271     { ".SET",           TOK_SET         },
 272     { ".SETCPU",        TOK_SETCPU      },
 273     { ".SHL",           TOK_SHL         },
 274     { ".SHR",           TOK_SHR         },
 275     { ".SIZEOF",        TOK_SIZEOF      },
 276     { ".SMART",         TOK_SMART       },
 277     { ".SPRINTF",       TOK_SPRINTF     },
 278     { ".STRAT",         TOK_STRAT       },
 279     { ".STRING",        TOK_STRING      },
 280     { ".STRLEN",        TOK_STRLEN      },
 281     { ".STRUCT",        TOK_STRUCT      },
 282     { ".SUNPLUS",       TOK_SUNPLUS     },
 283     { ".TAG",           TOK_TAG         },
 284     { ".TCOUNT",        TOK_TCOUNT      },
 285     { ".TIME",          TOK_TIME        },
 286     { ".UNION",         TOK_UNION       },
 287     { ".VERSION",       TOK_VERSION     },
 288     { ".WARNING",       TOK_WARNING     },
 289     { ".WORD",          TOK_WORD        },
 290     { ".XMATCH",        TOK_XMATCH      },
 291     { ".XOR",           TOK_BOOLXOR     },
 292     { ".ZEROPAGE",      TOK_ZEROPAGE    },
 293 };
 294
 295
 296
 297 /*****************************************************************************/
 298 /*                            CharSource functions                           */
 299 /*****************************************************************************/
 300
 301
 302
 303 static void UseCharSource (CharSource* S)
 304 /* Initialize a new input source and start to use it. */
 305 {
 306     /* Remember the current input char and token */
 307     S->Tok      = Tok;
 308     S->C        = C;
 309
 310     /* Use the new input source */
 311     S->Next     = Source;
 312     Source      = S;
 313
 314     /* Read the first character from the new file */
 315     S->Func->NextChar (S);
 316
 317     /* Setup the next token so it will be skipped on the next call to
 318      * NextRawTok().
 319      */
 320     Tok = TOK_SEP;
 321 }
 322
 323
 324
 325 static void DoneCharSource (void)
 326 /* Close the top level character source */
 327 {
 328     CharSource* S;
 329
 330     /* First, call the type specific function */
 331     Source->Func->Done (Source);
 332
 333     /* Restore the old token */
 334     Tok = Source->Tok;
 335     C   = Source->C;
 336
 337     /* Remember the last stacked input source */
 338     S = Source->Next;
 339
 340     /* Delete the top level one ... */
 341     xfree (Source);
 342
 343     /* ... and use the one before */
 344     Source = S;
 345 }
 346
 347
 348
 349 /*****************************************************************************/
 350 /*                            InputFile functions                            */
 351 /*****************************************************************************/
 352
 353
 354
 355 static void IFMarkStart (CharSource* S)
 356 /* Mark the start of the next token */
 357 {
 358     CurPos = S->V.File.Pos;
 359 }
 360
 361
 362
 363 static void IFNextChar (CharSource* S)
 364 /* Read the next character from the input file */
 365 {
 366     /* Check for end of line, read the next line if needed */
 367     while (S->V.File.Line [S->V.File.Pos.Col] == '\0') {
 368
 369         unsigned Len, Removed;
 370
 371         /* End of current line reached, read next line */
 372         if (fgets (S->V.File.Line, sizeof (S->V.File.Line), S->V.File.F) == 0) {
 373             /* End of file. Add an empty line to the listing. This is a
 374              * small hack needed to keep the PC output in sync.
 375              */
 376             NewListingLine ("", S->V.File.Pos.Name, FCount);
 377             C = EOF;
 378             return;
 379         }
 380
 381         /* For better handling of files with unusual line endings (DOS
 382          * files that are accidently translated on Unix for example),
 383          * first remove all whitespace at the end, then add a single
 384          * newline.
 385          */
 386         Len = strlen (S->V.File.Line);
 387         Removed = 0;
 388         while (Len > 0 && IsSpace (S->V.File.Line[Len-1])) {
 389             ++Removed;
 390             --Len;
 391         }
 392         if (Removed) {
 393             S->V.File.Line[Len+0] = '\n';
 394             S->V.File.Line[Len+1] = '\0';
 395         }
 396
 397         /* One more line */
 398         S->V.File.Pos.Line++;
 399         S->V.File.Pos.Col = 0;
 400
 401         /* Remember the new line for the listing */
 402         NewListingLine (S->V.File.Line, S->V.File.Pos.Name, FCount);
 403
 404     }
 405
 406     /* Return the next character from the file */
 407     C = S->V.File.Line [S->V.File.Pos.Col++];
 408 }
 409
 410
 411
 412 void IFDone (CharSource* S)
 413 /* Close the current input file */
 414 {
 415     /* We're at the end of an include file. Check if we have any
 416      * open .IFs, or any open token lists in this file. This
 417      * enforcement is artificial, using conditionals that start
 418      * in one file and end in another are uncommon, and don't
 419      * allowing these things will help finding errors.
 420      */
 421     CheckOpenIfs ();
 422
 423     /* If we've added search paths for this file, remove them */
 424     if (S->V.File.IncSearchPath) {
 425         PopSearchPath (IncSearchPath);
 426     }
 427     if (S->V.File.BinSearchPath) {
 428         PopSearchPath (BinSearchPath);
 429     }
 430
 431     /* Close the input file and decrement the file count. We will ignore
 432      * errors here, since we were just reading from the file.
 433      */
 434     (void) fclose (S->V.File.F);
 435     --FCount;
 436 }
 437
 438
 439
 440 /* Set of input file handling functions */
 441 static const CharSourceFunctions IFFunc = {
 442     IFMarkStart,
 443     IFNextChar,
 444     IFDone
 445 };
 446
 447
 448
 449 int NewInputFile (const char* Name)
 450 /* Open a new input file. Returns true if the file could be successfully opened
 451  * and false otherwise.
 452  */
 453 {
 454     int         RetCode = 0;            /* Return code. Assume an error. */
 455     char*       PathName = 0;
 456     FILE*       F;
 457     struct stat Buf;
 458     StrBuf      NameBuf;                /* No need to initialize */
 459     StrBuf      Path = AUTO_STRBUF_INITIALIZER;
 460     unsigned    FileIdx;
 461     CharSource* S;
 462
 463
 464     /* If this is the main file, just try to open it. If it's an include file,
 465      * search for it using the include path list.
 466      */
 467     if (FCount == 0) {
 468         /* Main file */
 469         F = fopen (Name, "r");
 470         if (F == 0) {
 471             Fatal ("Cannot open input file `%s': %s", Name, strerror (errno));
 472         }
 473     } else {
 474         /* We are on include level. Search for the file in the include
 475          * directories.
 476          */
 477         PathName = SearchFile (IncSearchPath, Name);
 478         if (PathName == 0 || (F = fopen (PathName, "r")) == 0) {
 479             /* Not found or cannot open, print an error and bail out */
 480             Error ("Cannot open include file `%s': %s", Name, strerror (errno));
 481             goto ExitPoint;
 482         }
 483
 484         /* Use the path name from now on */
 485         Name = PathName;
 486     }
 487
 488     /* Stat the file and remember the values. There a race condition here,
 489      * since we cannot use fileno() (non standard identifier in standard
 490      * header file), and therefore not fstat. When using stat with the
 491      * file name, there's a risk that the file was deleted and recreated
 492      * while it was open. Since mtime and size are only used to check
 493      * if a file has changed in the debugger, we will ignore this problem
 494      * here.
 495      */
 496     if (stat (Name, &Buf) != 0) {
 497         Fatal ("Cannot stat input file `%s': %s", Name, strerror (errno));
 498     }
 499
 500     /* Add the file to the input file table and remember the index */
 501     FileIdx = AddFile (SB_InitFromString (&NameBuf, Name),
 502                        (FCount == 0)? FT_MAIN : FT_INCLUDE,
 503                        Buf.st_size, Buf.st_mtime);
 504
 505     /* Create a new input source variable and initialize it */
 506     S                   = xmalloc (sizeof (*S));
 507     S->Func             = &IFFunc;
 508     S->V.File.F         = F;
 509     S->V.File.Pos.Line  = 0;
 510     S->V.File.Pos.Col   = 0;
 511     S->V.File.Pos.Name  = FileIdx;
 512     S->V.File.Line[0]   = '\0';
 513
 514     /* Push the path for this file onto the include search lists */
 515     SB_CopyBuf (&Path, Name, FindName (Name) - Name);
 516     SB_Terminate (&Path);
 517     S->V.File.IncSearchPath = PushSearchPath (IncSearchPath, SB_GetConstBuf (&Path));
 518     S->V.File.BinSearchPath = PushSearchPath (BinSearchPath, SB_GetConstBuf (&Path));
 519     SB_Done (&Path);
 520
 521     /* Count active input files */
 522     ++FCount;
 523
 524     /* Use this input source */
 525     UseCharSource (S);
 526
 527     /* File successfully opened */
 528     RetCode = 1;
 529
 530 ExitPoint:
 531     /* Free an allocated name buffer */
 532     xfree (PathName);
 533
 534     /* Return the success code */
 535     return RetCode;
 536 }
 537
 538
 539
 540 /*****************************************************************************/
 541 /*                            InputData functions                            */
 542 /*****************************************************************************/
 543
 544
 545
 546 static void IDMarkStart (CharSource* S attribute ((unused)))
 547 /* Mark the start of the next token */
 548 {
 549     /* Nothing to do here */
 550 }
 551
 552
 553
 554 static void IDNextChar (CharSource* S)
 555 /* Read the next character from the input text */
 556 {
 557     C = *S->V.Data.Pos++;
 558     if (C == '\0') {
 559         /* End of input data */
 560         --S->V.Data.Pos;
 561         C = EOF;
 562     }
 563 }
 564
 565
 566
 567 void IDDone (CharSource* S)
 568 /* Close the current input data */
 569 {
 570     /* Cleanup the current stuff */
 571     if (S->V.Data.Malloced) {
 572         xfree (S->V.Data.Text);
 573     }
 574 }
 575
 576
 577
 578 /* Set of input data handling functions */
 579 static const CharSourceFunctions IDFunc = {
 580     IDMarkStart,
 581     IDNextChar,
 582     IDDone
 583 };
 584
 585
 586
 587 void NewInputData (char* Text, int Malloced)
 588 /* Add a chunk of input data to the input stream */
 589 {
 590     CharSource* S;
 591
 592     /* Create a new input source variable and initialize it */
 593     S                   = xmalloc (sizeof (*S));
 594     S->Func             = &IDFunc;
 595     S->V.Data.Text      = Text;
 596     S->V.Data.Pos       = Text;
 597     S->V.Data.Malloced  = Malloced;
 598
 599     /* Use this input source */
 600     UseCharSource (S);
 601 }
 602
 603
 604
 605 /*****************************************************************************/
 606 /*                    Character classification functions                     */
 607 /*****************************************************************************/
 608
 609
 610
 611 int IsIdChar (int C)
 612 /* Return true if the character is a valid character for an identifier */
 613 {
 614     return IsAlNum (C)                  ||
 615            (C == '_')                   ||
 616            (C == '@' && AtInIdents)     ||
 617            (C == '$' && DollarInIdents);
 618 }
 619
 620
 621
 622 int IsIdStart (int C)
 623 /* Return true if the character may start an identifier */
 624 {
 625     return IsAlpha (C) || C == '_';
 626 }
 627
 628
 629
 630 /*****************************************************************************/
 631 /*                                   Code                                    */
 632 /*****************************************************************************/
 633
 634
 635
 636 static unsigned DigitVal (unsigned char C)
 637 /* Convert a digit into it's numerical representation */
 638 {
 639     if (IsDigit (C)) {
 640         return C - '0';
 641     } else {
 642         return toupper (C) - 'A' + 10;
 643     }
 644 }
 645
 646
 647
 648 static void NextChar (void)
 649 /* Read the next character from the input file */
 650 {
 651     Source->Func->NextChar (Source);
 652 }
 653
 654
 655
 656 void LocaseSVal (void)
 657 /* Make SVal lower case */
 658 {
 659     SB_ToLower (&SVal);
 660 }
 661
 662
 663
 664 void UpcaseSVal (void)
 665 /* Make SVal upper case */
 666 {
 667     SB_ToUpper (&SVal);
 668 }
 669
 670
 671
 672 static int CmpDotKeyword (const void* K1, const void* K2)
 673 /* Compare function for the dot keyword search */
 674 {
 675     return strcmp (((struct DotKeyword*)K1)->Key, ((struct DotKeyword*)K2)->Key);
 676 }
 677
 678
 679
 680 static unsigned char FindDotKeyword (void)
 681 /* Find the dot keyword in SVal. Return the corresponding token if found,
 682  * return TOK_NONE if not found.
 683  */
 684 {
 685     struct DotKeyword K;
 686     struct DotKeyword* R;
 687
 688     /* Initialize K */
 689     K.Key = SB_GetConstBuf (&SVal);
 690     K.Tok = 0;
 691
 692     /* If we aren't in ignore case mode, we have to uppercase the keyword */
 693     if (!IgnoreCase) {
 694         UpcaseSVal ();
 695     }
 696
 697     /* Search for the keyword */
 698     R = bsearch (&K, DotKeywords, sizeof (DotKeywords) / sizeof (DotKeywords [0]),
 699                  sizeof (DotKeywords [0]), CmpDotKeyword);
 700     if (R != 0) {
 701         return R->Tok;
 702     } else {
 703         return TOK_NONE;
 704     }
 705 }
 706
 707
 708
 709 static void ReadIdent (void)
 710 /* Read an identifier from the current input position into Ident. Filling SVal
 711  * starts at the current position with the next character in C. It is assumed
 712  * that any characters already filled in are ok, and the character in C is
 713  * checked.
 714  */
 715 {
 716     /* Read the identifier */
 717     do {
 718         SB_AppendChar (&SVal, C);
 719         NextChar ();
 720     } while (IsIdChar (C));
 721     SB_Terminate (&SVal);
 722
 723     /* If we should ignore case, convert the identifier to upper case */
 724     if (IgnoreCase) {
 725         UpcaseSVal ();
 726     }
 727 }
 728
 729
 730
 731 static void ReadStringConst (int StringTerm)
 732 /* Read a string constant into SVal. */
 733 {
 734     /* Skip the leading string terminator */
 735     NextChar ();
 736
 737     /* Read the string */
 738     while (1) {
 739         if (C == StringTerm) {
 740             break;
 741         }
 742         if (C == '\n' || C == EOF) {
 743             Error ("Newline in string constant");
 744             break;
 745         }
 746
 747         /* Append the char to the string */
 748         SB_AppendChar (&SVal, C);
 749
 750         /* Skip the character */
 751         NextChar ();
 752     }
 753
 754     /* Skip the trailing terminator */
 755     NextChar ();
 756
 757     /* Terminate the string */
 758     SB_Terminate (&SVal);
 759 }
 760
 761
 762
 763 static int Sweet16Reg (const StrBuf* Id)
 764 /* Check if the given identifier is a sweet16 register. Return -1 if this is
 765  * not the case, return the register number otherwise.
 766  */
 767 {
 768     unsigned RegNum;
 769     char Check;
 770
 771     if (SB_GetLen (Id) < 2) {
 772         return -1;
 773     }
 774     if (toupper (SB_AtUnchecked (Id, 0)) != 'R') {
 775         return -1;
 776     }
 777     if (!IsDigit (SB_AtUnchecked (Id, 1))) {
 778         return -1;
 779     }
 780
 781     if (sscanf (SB_GetConstBuf (Id)+1, "%u%c", &RegNum, &Check) != 1 || RegNum > 15) {
 782         /* Invalid register */
 783         return -1;
 784     }
 785
 786     /* The register number is valid */
 787     return (int) RegNum;
 788 }
 789
 790
 791
 792 void NextRawTok (void)
 793 /* Read the next raw token from the input stream */
 794 {
 795     /* If we've a forced end of assembly, don't read further */
 796     if (ForcedEnd) {
 797         Tok = TOK_EOF;
 798         return;
 799     }
 800
 801 Restart:
 802     /* Check if we have tokens from another input source */
 803     if (InputFromStack ()) {
 804         return;
 805     }
 806
 807 Again:
 808     /* Skip whitespace, remember if we had some */
 809     if ((WS = IsBlank (C)) != 0) {
 810         do {
 811             NextChar ();
 812         } while (IsBlank (C));
 813     }
 814
 815     /* Mark the file position of the next token */
 816     Source->Func->MarkStart (Source);
 817
 818     /* Clear the string attribute */
 819     SB_Clear (&SVal);
 820
 821     /* Hex number or PC symbol? */
 822     if (C == '$') {
 823         NextChar ();
 824
 825         /* Hex digit must follow or DollarIsPC must be enabled */
 826         if (!IsXDigit (C)) {
 827             if (DollarIsPC) {
 828                 Tok = TOK_PC;
 829                 return;
 830             } else {
 831                 Error ("Hexadecimal digit expected");
 832             }
 833         }
 834
 835         /* Read the number */
 836         IVal = 0;
 837         while (IsXDigit (C)) {
 838             if (IVal & 0xF0000000) {
 839                 Error ("Overflow in hexadecimal number");
 840                 IVal = 0;
 841             }
 842             IVal = (IVal << 4) + DigitVal (C);
 843             NextChar ();
 844         }
 845
 846         /* This is an integer constant */
 847         Tok = TOK_INTCON;
 848         return;
 849     }
 850
 851     /* Binary number? */
 852     if (C == '%') {
 853         NextChar ();
 854
 855         /* 0 or 1 must follow */
 856         if (!IsBDigit (C)) {
 857             Error ("Binary digit expected");
 858         }
 859
 860         /* Read the number */
 861         IVal = 0;
 862         while (IsBDigit (C)) {
 863             if (IVal & 0x80000000) {
 864                 Error ("Overflow in binary number");
 865                 IVal = 0;
 866             }
 867             IVal = (IVal << 1) + DigitVal (C);
 868             NextChar ();
 869         }
 870
 871         /* This is an integer constant */
 872         Tok = TOK_INTCON;
 873         return;
 874     }
 875
 876     /* Number? */
 877     if (IsDigit (C)) {
 878
 879         char Buf[16];
 880         unsigned Digits;
 881         unsigned Base;
 882         unsigned I;
 883         long     Max;
 884         unsigned DVal;
 885
 886         /* Ignore leading zeros */
 887         while (C == '0') {
 888             NextChar ();
 889         }
 890
 891         /* Read the number into Buf counting the digits */
 892         Digits = 0;
 893         while (IsXDigit (C)) {
 894
 895             /* Buf is big enough to allow any decimal and hex number to
 896              * overflow, so ignore excess digits here, they will be detected
 897              * when we convert the value.
 898              */
 899             if (Digits < sizeof (Buf)) {
 900                 Buf[Digits++] = C;
 901             }
 902
 903             NextChar ();
 904         }
 905
 906         /* Allow zilog/intel style hex numbers with a 'h' suffix */
 907         if (C == 'h' || C == 'H') {
 908             NextChar ();
 909             Base = 16;
 910             Max  = 0xFFFFFFFFUL / 16;
 911         } else {
 912             Base = 10;
 913             Max  = 0xFFFFFFFFUL / 10;
 914         }
 915
 916         /* Convert the number using the given base */
 917         IVal = 0;
 918         for (I = 0; I < Digits; ++I) {
 919             if (IVal > Max) {
 920                 Error ("Number out of range");
 921                 IVal = 0;
 922                 break;
 923             }
 924             DVal = DigitVal (Buf[I]);
 925             if (DVal > Base) {
 926                 Error ("Invalid digits in number");
 927                 IVal = 0;
 928                 break;
 929             }
 930             IVal = (IVal * Base) + DVal;
 931         }
 932
 933         /* This is an integer constant */
 934         Tok = TOK_INTCON;
 935         return;
 936     }
 937
 938     /* Control command? */
 939     if (C == '.') {
 940
 941         /* Remember and skip the dot */
 942         NextChar ();
 943
 944         /* Check if it's just a dot */
 945         if (!IsIdStart (C)) {
 946
 947             /* Just a dot */
 948             Tok = TOK_DOT;
 949
 950         } else {
 951
 952             /* Read the remainder of the identifier */
 953             SB_AppendChar (&SVal, '.');
 954             ReadIdent ();
 955
 956             /* Dot keyword, search for it */
 957             Tok = FindDotKeyword ();
 958             if (Tok == TOK_NONE) {
 959
 960                 /* Not found */
 961                 if (!LeadingDotInIdents) {
 962                     /* Invalid pseudo instruction */
 963                     Error ("`%m%p' is not a recognized control command", &SVal);
 964                     goto Again;
 965                 }
 966
 967                 /* An identifier with a dot. Check if it's a define style
 968                  * macro.
 969                  */
 970                 if (IsDefine (&SVal)) {
 971                     /* This is a define style macro - expand it */
 972                     MacExpandStart ();
 973                     goto Restart;
 974                 }
 975
 976                 /* Just an identifier with a dot */
 977                 Tok = TOK_IDENT;
 978             }
 979
 980         }
 981         return;
 982     }
 983
 984     /* Indirect op for sweet16 cpu. Must check this before checking for local
 985      * symbols, because these may also use the '@' symbol.
 986      */
 987     if (CPU == CPU_SWEET16 && C == '@') {
 988         NextChar ();
 989         Tok = TOK_AT;
 990         return;
 991     }
 992
 993     /* Local symbol? */
 994     if (C == LocalStart) {
 995
 996         /* Read the identifier. */
 997         ReadIdent ();
 998
 999         /* Start character alone is not enough */
1000         if (SB_GetLen (&SVal) == 1) {
1001             Error ("Invalid cheap local symbol");
1002             goto Again;
1003         }
1004
1005         /* A local identifier */
1006         Tok = TOK_LOCAL_IDENT;
1007         return;
1008     }
1009
1010
1011     /* Identifier or keyword? */
1012     if (IsIdStart (C)) {
1013
1014         /* Read the identifier */
1015         ReadIdent ();
1016
1017         /* Check for special names. Bail out if we have identified the type of
1018          * the token. Go on if the token is an identifier.
1019          */
1020         if (SB_GetLen (&SVal) == 1) {
1021             switch (toupper (SB_AtUnchecked (&SVal, 0))) {
1022
1023                 case 'A':
1024                     if (C == ':') {
1025                         NextChar ();
1026                         Tok = TOK_OVERRIDE_ABS;
1027                     } else {
1028                         Tok = TOK_A;
1029                     }
1030                     return;
1031
1032                 case 'F':
1033                     if (C == ':') {
1034                         NextChar ();
1035                         Tok = TOK_OVERRIDE_FAR;
1036                         return;
1037                     }
1038                     break;
1039
1040                 case 'S':
1041                     if (CPU == CPU_65816) {
1042                         Tok = TOK_S;
1043                         return;
1044                     }
1045                     break;
1046
1047                 case 'X':
1048                     Tok = TOK_X;
1049                     return;
1050
1051                 case 'Y':
1052                     Tok = TOK_Y;
1053                     return;
1054
1055                 case 'Z':
1056                     if (C == ':') {
1057                         NextChar ();
1058                         Tok = TOK_OVERRIDE_ZP;
1059                         return;
1060                     }
1061                     break;
1062
1063                 default:
1064                     break;
1065             }
1066
1067         } else if (CPU == CPU_SWEET16 && (IVal = Sweet16Reg (&SVal)) >= 0) {
1068
1069             /* A sweet16 register number in sweet16 mode */
1070             Tok = TOK_REG;
1071             return;
1072
1073         }
1074
1075         /* Check for define style macro */
1076         if (IsDefine (&SVal)) {
1077             /* Macro - expand it */
1078             MacExpandStart ();
1079             goto Restart;
1080         } else {
1081             /* An identifier */
1082             Tok = TOK_IDENT;
1083         }
1084         return;
1085     }
1086
1087     /* Ok, let's do the switch */
1088 CharAgain:
1089     switch (C) {
1090
1091         case '+':
1092             NextChar ();
1093             Tok = TOK_PLUS;
1094             return;
1095
1096         case '-':
1097             NextChar ();
1098             Tok = TOK_MINUS;
1099             return;
1100
1101         case '/':
1102             NextChar ();
1103             if (C != '*') {
1104                 Tok = TOK_DIV;
1105             } else if (CComments) {
1106                 /* Remember the position, then skip the '*' */
1107                 FilePos Pos = CurPos;
1108                 NextChar ();
1109                 do {
1110                     while (C !=  '*') {
1111                         if (C == EOF) {
1112                             PError (&Pos, "Unterminated comment");
1113                             goto CharAgain;
1114                         }
1115                         NextChar ();
1116                     }
1117                     NextChar ();
1118                 } while (C != '/');
1119                 NextChar ();
1120                 goto Again;
1121             }
1122             return;
1123
1124         case '*':
1125             NextChar ();
1126             Tok = TOK_MUL;
1127             return;
1128
1129         case '^':
1130             NextChar ();
1131             Tok = TOK_XOR;
1132             return;
1133
1134         case '&':
1135             NextChar ();
1136             if (C == '&') {
1137                 NextChar ();
1138                 Tok = TOK_BOOLAND;
1139             } else {
1140                 Tok = TOK_AND;
1141             }
1142             return;
1143
1144         case '|':
1145             NextChar ();
1146             if (C == '|') {
1147                 NextChar ();
1148                 Tok = TOK_BOOLOR;
1149             } else {
1150                 Tok = TOK_OR;
1151             }
1152             return;
1153
1154         case ':':
1155             NextChar ();
1156             switch (C) {
1157
1158                 case ':':
1159                     NextChar ();
1160                     Tok = TOK_NAMESPACE;
1161                     break;
1162
1163                 case '-':
1164                     IVal = 0;
1165                     do {
1166                         --IVal;
1167                         NextChar ();
1168                     } while (C == '-');
1169                     Tok = TOK_ULABEL;
1170                     break;
1171
1172                 case '+':
1173                     IVal = 0;
1174                     do {
1175                         ++IVal;
1176                         NextChar ();
1177                     } while (C == '+');
1178                     Tok = TOK_ULABEL;
1179                     break;
1180
1181                 case '=':
1182                     NextChar ();
1183                     Tok = TOK_ASSIGN;
1184                     break;
1185
1186                 default:
1187                     Tok = TOK_COLON;
1188                     break;
1189             }
1190             return;
1191
1192         case ',':
1193             NextChar ();
1194             Tok = TOK_COMMA;
1195             return;
1196
1197         case ';':
1198             NextChar ();
1199             while (C != '\n' && C != EOF) {
1200                 NextChar ();
1201             }
1202             goto CharAgain;
1203
1204         case '#':
1205             NextChar ();
1206             Tok = TOK_HASH;
1207             return;
1208
1209         case '(':
1210             NextChar ();
1211             Tok = TOK_LPAREN;
1212             return;
1213
1214         case ')':
1215             NextChar ();
1216             Tok = TOK_RPAREN;
1217             return;
1218
1219         case '[':
1220             NextChar ();
1221             Tok = TOK_LBRACK;
1222             return;
1223
1224         case ']':
1225             NextChar ();
1226             Tok = TOK_RBRACK;
1227             return;
1228
1229         case '{':
1230             NextChar ();
1231             Tok = TOK_LCURLY;
1232             return;
1233
1234         case '}':
1235             NextChar ();
1236             Tok = TOK_RCURLY;
1237             return;
1238
1239         case '<':
1240             NextChar ();
1241             if (C == '=') {
1242                 NextChar ();
1243                 Tok = TOK_LE;
1244             } else if (C == '<') {
1245                 NextChar ();
1246                 Tok = TOK_SHL;
1247             } else if (C == '>') {
1248                 NextChar ();
1249                 Tok = TOK_NE;
1250             } else {
1251                 Tok = TOK_LT;
1252             }
1253             return;
1254
1255         case '=':
1256             NextChar ();
1257             Tok = TOK_EQ;
1258             return;
1259
1260         case '!':
1261             NextChar ();
1262             Tok = TOK_BOOLNOT;
1263             return;
1264
1265         case '>':
1266             NextChar ();
1267             if (C == '=') {
1268                 NextChar ();
1269                 Tok = TOK_GE;
1270             } else if (C == '>') {
1271                 NextChar ();
1272                 Tok = TOK_SHR;
1273             } else {
1274                 Tok = TOK_GT;
1275             }
1276             return;
1277
1278         case '~':
1279             NextChar ();
1280             Tok = TOK_NOT;
1281             return;
1282
1283         case '\'':
1284             /* Hack: If we allow ' as terminating character for strings, read
1285              * the following stuff as a string, and check for a one character
1286              * string later.
1287              */
1288             if (LooseStringTerm) {
1289                 ReadStringConst ('\'');
1290                 if (SB_GetLen (&SVal) == 1) {
1291                     IVal = SB_AtUnchecked (&SVal, 0);
1292                     Tok = TOK_CHARCON;
1293                 } else {
1294                     Tok = TOK_STRCON;
1295                 }
1296             } else {
1297                 /* Always a character constant */
1298                 NextChar ();
1299                 if (C == EOF || IsControl (C)) {
1300                     Error ("Illegal character constant");
1301                     goto CharAgain;
1302                 }
1303                 IVal = C;
1304                 Tok = TOK_CHARCON;
1305                 NextChar ();
1306                 if (C != '\'') {
1307                     if (!MissingCharTerm) {
1308                         Error ("Illegal character constant");
1309                     }
1310                 } else {
1311                     NextChar ();
1312                 }
1313             }
1314             return;
1315
1316         case '\"':
1317             ReadStringConst ('\"');
1318             Tok = TOK_STRCON;
1319             return;
1320
1321         case '\\':
1322             /* Line continuation? */
1323             if (LineCont) {
1324                 NextChar ();
1325                 if (C == '\n') {
1326                     /* Handle as white space */
1327                     NextChar ();
1328                     C = ' ';
1329                     goto Again;
1330                 }
1331             }
1332             break;
1333
1334         case '\n':
1335             NextChar ();
1336             Tok = TOK_SEP;
1337             return;
1338
1339         case EOF:
1340             CheckInputStack ();
1341             /* In case of the main file, do not close it, but return EOF. */
1342             if (Source && Source->Next) {
1343                 DoneCharSource ();
1344                 goto Again;
1345             } else {
1346                 Tok = TOK_EOF;
1347             }
1348             return;
1349     }
1350
1351     /* If we go here, we could not identify the current character. Skip it
1352      * and try again.
1353      */
1354     Error ("Invalid input character: 0x%02X", C & 0xFF);
1355     NextChar ();
1356     goto Again;
1357 }
1358
1359
1360
1361 int GetSubKey (const char** Keys, unsigned Count)
1362 /* Search for a subkey in a table of keywords. The current token must be an
1363  * identifier and all keys must be in upper case. The identifier will be
1364  * uppercased in the process. The function returns the index of the keyword,
1365  * or -1 if the keyword was not found.
1366  */
1367 {
1368     unsigned I;
1369
1370     /* Must have an identifier */
1371     PRECONDITION (Tok == TOK_IDENT);
1372
1373     /* If we aren't in ignore case mode, we have to uppercase the identifier */
1374     if (!IgnoreCase) {
1375         UpcaseSVal ();
1376     }
1377
1378     /* Do a linear search (a binary search is not worth the effort) */
1379     for (I = 0; I < Count; ++I) {
1380         if (SB_CompareStr (&SVal, Keys [I]) == 0) {
1381             /* Found it */
1382             return I;
1383         }
1384     }
1385
1386     /* Not found */
1387     return -1;
1388 }
1389
1390
1391
1392 unsigned char ParseAddrSize (void)
1393 /* Check if the next token is a keyword that denotes an address size specifier.
1394  * If so, return the corresponding address size constant, otherwise output an
1395  * error message and return ADDR_SIZE_DEFAULT.
1396  */
1397 {
1398     unsigned char AddrSize;
1399
1400     /* Check for an identifier */
1401     if (Tok != TOK_IDENT) {
1402         Error ("Address size specifier expected");
1403         return ADDR_SIZE_DEFAULT;
1404     }
1405
1406     /* Convert the attribute */
1407     AddrSize = AddrSizeFromStr (SB_GetConstBuf (&SVal));
1408     if (AddrSize == ADDR_SIZE_INVALID) {
1409         Error ("Address size specifier expected");
1410         AddrSize = ADDR_SIZE_DEFAULT;
1411     }
1412
1413     /* Done */
1414     return AddrSize;
1415 }
1416
1417
1418
1419 void InitScanner (const char* InFile)
1420 /* Initialize the scanner, open the given input file */
1421 {
1422     /* Open the input file */
1423     NewInputFile (InFile);
1424 }
1425
1426
1427
1428 void DoneScanner (void)
1429 /* Release scanner resources */
1430 {
1431     DoneCharSource ();
1432 }
1433
1434
1435