git.sur5r.net Git - cc65/blob - src/ca65/scanner.c

   1 /*****************************************************************************/
   2 /*                                                                           */
   3 /*                                 scanner.c                                 */
   4 /*                                                                           */
   5 /*                  The scanner for the ca65 macroassembler                  */
   6 /*                                                                           */
   7 /*                                                                           */
   8 /*                                                                           */
   9 /* (C) 1998-2010, Ullrich von Bassewitz                                      */
  10 /*                Roemerstrasse 52                                           */
  11 /*                D-70794 Filderstadt                                        */
  12 /* EMail:         uz@cc65.org                                                */
  13 /*                                                                           */
  14 /*                                                                           */
  15 /* This software is provided 'as-is', without any expressed or implied       */
  16 /* warranty.  In no event will the authors be held liable for any damages    */
  17 /* arising from the use of this software.                                    */
  18 /*                                                                           */
  19 /* Permission is granted to anyone to use this software for any purpose,     */
  20 /* including commercial applications, and to alter it and redistribute it    */
  21 /* freely, subject to the following restrictions:                            */
  22 /*                                                                           */
  23 /* 1. The origin of this software must not be misrepresented; you must not   */
  24 /*    claim that you wrote the original software. If you use this software   */
  25 /*    in a product, an acknowledgment in the product documentation would be  */
  26 /*    appreciated but is not required.                                       */
  27 /* 2. Altered source versions must be plainly marked as such, and must not   */
  28 /*    be misrepresented as being the original software.                      */
  29 /* 3. This notice may not be removed or altered from any source              */
  30 /*    distribution.                                                          */
  31 /*                                                                           */
  32 /*****************************************************************************/
  33
  34
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <ctype.h>
  40 #include <errno.h>
  41 #include <sys/types.h>          /* EMX needs this */
  42 #include <sys/stat.h>
  43
  44 /* common */
  45 #include "addrsize.h"
  46 #include "attrib.h"
  47 #include "chartype.h"
  48 #include "check.h"
  49 #include "fname.h"
  50 #include "xmalloc.h"
  51
  52 /* ca65 */
  53 #include "condasm.h"
  54 #include "error.h"
  55 #include "filetab.h"
  56 #include "global.h"
  57 #include "incpath.h"
  58 #include "instr.h"
  59 #include "istack.h"
  60 #include "listing.h"
  61 #include "macro.h"
  62 #include "toklist.h"
  63 #include "scanner.h"
  64
  65
  66
  67 /*****************************************************************************/
  68 /*                                   Data                                    */
  69 /*****************************************************************************/
  70
  71
  72
  73 Token Tok = TOK_NONE;                   /* Current token */
  74 int WS;                                 /* Flag: Whitespace before token */
  75 long IVal;                              /* Integer token attribute */
  76 StrBuf SVal = STATIC_STRBUF_INITIALIZER;/* String token attribute */
  77
  78 FilePos CurPos = { 0, 0, 0 };           /* Name and position in current file */
  79
  80
  81
  82 /* Struct to handle include files. */
  83 typedef struct InputFile InputFile;
  84 struct InputFile {
  85     FILE*           F;                  /* Input file descriptor */
  86     FilePos         Pos;                /* Position in file */
  87     Token           Tok;                /* Last token */
  88     int             C;                  /* Last character */
  89     char            Line[256];          /* The current input line */
  90     InputFile*      Next;               /* Linked list of input files */
  91 };
  92
  93 /* Struct to handle textual input data */
  94 typedef struct InputData InputData;
  95 struct InputData {
  96     char*           Text;               /* Pointer to the text data */
  97     const char*     Pos;                /* Pointer to current position */
  98     int             Malloced;           /* Memory was malloced */
  99     Token           Tok;                /* Last token */
 100     int             C;                  /* Last character */
 101     InputData*      Next;               /* Linked list of input data */
 102 };
 103
 104 /* Input source: Either file or data */
 105 typedef struct CharSource CharSource;
 106
 107 /* Set of input functions */
 108 typedef struct CharSourceFunctions CharSourceFunctions;
 109 struct CharSourceFunctions {
 110     void (*MarkStart) (CharSource*);    /* Mark the start pos of a token */
 111     void (*NextChar) (CharSource*);     /* Read next char from input */
 112     void (*Done) (CharSource*);         /* Close input source */
 113 };
 114
 115 /* Input source: Either file or data */
 116 struct CharSource {
 117     CharSource*                 Next;   /* Linked list of char sources */
 118     Token                       Tok;    /* Last token */
 119     int                         C;      /* Last character */
 120     const CharSourceFunctions*  Func;   /* Pointer to function table */
 121     union {
 122         InputFile               File;   /* File data */
 123         InputData               Data;   /* Textual data */
 124     }                           V;
 125 };
 126
 127 /* Current input variables */
 128 static CharSource* Source       = 0;    /* Current char source */
 129 static unsigned     FCount      = 0;    /* Count of input files */
 130 static int          C           = 0;    /* Current input character */
 131
 132 /* Force end of assembly */
 133 int               ForcedEnd     = 0;
 134
 135 /* List of dot keywords with the corresponding tokens */
 136 struct DotKeyword {
 137     const char* Key;                    /* MUST be first field */
 138     Token       Tok;
 139 } DotKeywords [] = {
 140     { ".A16",           TOK_A16         },
 141     { ".A8",            TOK_A8          },
 142     { ".ADDR",          TOK_ADDR        },
 143     { ".ALIGN",         TOK_ALIGN       },
 144     { ".AND",           TOK_BOOLAND     },
 145     { ".ASCIIZ",        TOK_ASCIIZ      },
 146     { ".ASSERT",        TOK_ASSERT      },
 147     { ".AUTOIMPORT",    TOK_AUTOIMPORT  },
 148     { ".BANKBYTE",      TOK_BANKBYTE    },
 149     { ".BANKBYTES",     TOK_BANKBYTES   },
 150     { ".BITAND",        TOK_AND         },
 151     { ".BITNOT",        TOK_NOT         },
 152     { ".BITOR",         TOK_OR          },
 153     { ".BITXOR",        TOK_XOR         },
 154     { ".BLANK",         TOK_BLANK       },
 155     { ".BSS",           TOK_BSS         },
 156     { ".BYT",           TOK_BYTE        },
 157     { ".BYTE",          TOK_BYTE        },
 158     { ".CASE",          TOK_CASE        },
 159     { ".CHARMAP",       TOK_CHARMAP     },
 160     { ".CODE",          TOK_CODE        },
 161     { ".CONCAT",        TOK_CONCAT      },
 162     { ".CONDES",        TOK_CONDES      },
 163     { ".CONST",         TOK_CONST       },
 164     { ".CONSTRUCTOR",   TOK_CONSTRUCTOR },
 165     { ".CPU",           TOK_CPU         },
 166     { ".DATA",          TOK_DATA        },
 167     { ".DBG",           TOK_DBG         },
 168     { ".DBYT",          TOK_DBYT        },
 169     { ".DEBUGINFO",     TOK_DEBUGINFO   },
 170     { ".DEF",           TOK_DEFINED     },
 171     { ".DEFINE",        TOK_DEFINE      },
 172     { ".DEFINED",       TOK_DEFINED     },
 173     { ".DESTRUCTOR",    TOK_DESTRUCTOR  },
 174     { ".DWORD",         TOK_DWORD       },
 175     { ".ELSE",          TOK_ELSE        },
 176     { ".ELSEIF",        TOK_ELSEIF      },
 177     { ".END",           TOK_END         },
 178     { ".ENDENUM",       TOK_ENDENUM     },
 179     { ".ENDIF",         TOK_ENDIF       },
 180     { ".ENDMAC",        TOK_ENDMACRO    },
 181     { ".ENDMACRO",      TOK_ENDMACRO    },
 182     { ".ENDPROC",       TOK_ENDPROC     },
 183     { ".ENDREP",        TOK_ENDREP      },
 184     { ".ENDREPEAT",     TOK_ENDREP      },
 185     { ".ENDSCOPE",      TOK_ENDSCOPE    },
 186     { ".ENDSTRUCT",     TOK_ENDSTRUCT   },
 187     { ".ENDUNION",      TOK_ENDUNION    },
 188     { ".ENUM",          TOK_ENUM        },
 189     { ".ERROR",         TOK_ERROR       },
 190     { ".EXITMAC",       TOK_EXITMACRO   },
 191     { ".EXITMACRO",     TOK_EXITMACRO   },
 192     { ".EXPORT",        TOK_EXPORT      },
 193     { ".EXPORTZP",      TOK_EXPORTZP    },
 194     { ".FARADDR",       TOK_FARADDR     },
 195     { ".FEATURE",       TOK_FEATURE     },
 196     { ".FILEOPT",       TOK_FILEOPT     },
 197     { ".FOPT",          TOK_FILEOPT     },
 198     { ".FORCEIMPORT",   TOK_FORCEIMPORT },
 199     { ".FORCEWORD",     TOK_FORCEWORD   },
 200     { ".GLOBAL",        TOK_GLOBAL      },
 201     { ".GLOBALZP",      TOK_GLOBALZP    },
 202     { ".HIBYTE",        TOK_HIBYTE      },
 203     { ".HIBYTES",       TOK_HIBYTES     },
 204     { ".HIWORD",        TOK_HIWORD      },
 205     { ".I16",           TOK_I16         },
 206     { ".I8",            TOK_I8          },
 207     { ".IDENT",         TOK_MAKEIDENT   },
 208     { ".IF",            TOK_IF          },
 209     { ".IFBLANK",       TOK_IFBLANK     },
 210     { ".IFCONST",       TOK_IFCONST     },
 211     { ".IFDEF",         TOK_IFDEF       },
 212     { ".IFNBLANK",      TOK_IFNBLANK    },
 213     { ".IFNCONST",      TOK_IFNCONST    },
 214     { ".IFNDEF",        TOK_IFNDEF      },
 215     { ".IFNREF",        TOK_IFNREF      },
 216     { ".IFP02",         TOK_IFP02       },
 217     { ".IFP816",        TOK_IFP816      },
 218     { ".IFPC02",        TOK_IFPC02      },
 219     { ".IFPSC02",       TOK_IFPSC02     },
 220     { ".IFREF",         TOK_IFREF       },
 221     { ".IMPORT",        TOK_IMPORT      },
 222     { ".IMPORTZP",      TOK_IMPORTZP    },
 223     { ".INCBIN",        TOK_INCBIN      },
 224     { ".INCLUDE",       TOK_INCLUDE     },
 225     { ".INTERRUPTOR",   TOK_INTERRUPTOR },
 226     { ".LEFT",          TOK_LEFT        },
 227     { ".LINECONT",      TOK_LINECONT    },
 228     { ".LIST",          TOK_LIST        },
 229     { ".LISTBYTES",     TOK_LISTBYTES   },
 230     { ".LOBYTE",        TOK_LOBYTE      },
 231     { ".LOBYTES",       TOK_LOBYTES     },
 232     { ".LOCAL",         TOK_LOCAL       },
 233     { ".LOCALCHAR",     TOK_LOCALCHAR   },
 234     { ".LOWORD",        TOK_LOWORD      },
 235     { ".MAC",           TOK_MACRO       },
 236     { ".MACPACK",       TOK_MACPACK     },
 237     { ".MACRO",         TOK_MACRO       },
 238     { ".MATCH",         TOK_MATCH       },
 239     { ".MAX",           TOK_MAX         },
 240     { ".MID",           TOK_MID         },
 241     { ".MIN",           TOK_MIN         },
 242     { ".MOD",           TOK_MOD         },
 243     { ".NOT",           TOK_BOOLNOT     },
 244     { ".NULL",          TOK_NULL        },
 245     { ".OR",            TOK_BOOLOR      },
 246     { ".ORG",           TOK_ORG         },
 247     { ".OUT",           TOK_OUT         },
 248     { ".P02",           TOK_P02         },
 249     { ".P816",          TOK_P816        },
 250     { ".PAGELEN",       TOK_PAGELENGTH  },
 251     { ".PAGELENGTH",    TOK_PAGELENGTH  },
 252     { ".PARAMCOUNT",    TOK_PARAMCOUNT  },
 253     { ".PC02",          TOK_PC02        },
 254     { ".POPCPU",        TOK_POPCPU      },
 255     { ".POPSEG",        TOK_POPSEG      },
 256     { ".PROC",          TOK_PROC        },
 257     { ".PSC02",         TOK_PSC02       },
 258     { ".PUSHCPU",       TOK_PUSHCPU     },
 259     { ".PUSHSEG",       TOK_PUSHSEG     },
 260     { ".REF",           TOK_REFERENCED  },
 261     { ".REFERENCED",    TOK_REFERENCED  },
 262     { ".RELOC",         TOK_RELOC       },
 263     { ".REPEAT",        TOK_REPEAT      },
 264     { ".RES",           TOK_RES         },
 265     { ".RIGHT",         TOK_RIGHT       },
 266     { ".RODATA",        TOK_RODATA      },
 267     { ".SCOPE",         TOK_SCOPE       },
 268     { ".SEGMENT",       TOK_SEGMENT     },
 269     { ".SET",           TOK_SET         },
 270     { ".SETCPU",        TOK_SETCPU      },
 271     { ".SHL",           TOK_SHL         },
 272     { ".SHR",           TOK_SHR         },
 273     { ".SIZEOF",        TOK_SIZEOF      },
 274     { ".SMART",         TOK_SMART       },
 275     { ".SPRINTF",       TOK_SPRINTF     },
 276     { ".STRAT",         TOK_STRAT       },
 277     { ".STRING",        TOK_STRING      },
 278     { ".STRLEN",        TOK_STRLEN      },
 279     { ".STRUCT",        TOK_STRUCT      },
 280     { ".SUNPLUS",       TOK_SUNPLUS     },
 281     { ".TAG",           TOK_TAG         },
 282     { ".TCOUNT",        TOK_TCOUNT      },
 283     { ".TIME",          TOK_TIME        },
 284     { ".UNION",         TOK_UNION       },
 285     { ".VERSION",       TOK_VERSION     },
 286     { ".WARNING",       TOK_WARNING     },
 287     { ".WORD",          TOK_WORD        },
 288     { ".XMATCH",        TOK_XMATCH      },
 289     { ".XOR",           TOK_BOOLXOR     },
 290     { ".ZEROPAGE",      TOK_ZEROPAGE    },
 291 };
 292
 293
 294
 295 /*****************************************************************************/
 296 /*                            CharSource functions                           */
 297 /*****************************************************************************/
 298
 299
 300
 301 static void UseCharSource (CharSource* S)
 302 /* Initialize a new input source and start to use it. */
 303 {
 304     /* Remember the current input char and token */
 305     S->Tok      = Tok;
 306     S->C        = C;
 307
 308     /* Use the new input source */
 309     S->Next     = Source;
 310     Source      = S;
 311
 312     /* Read the first character from the new file */
 313     S->Func->NextChar (S);
 314
 315     /* Setup the next token so it will be skipped on the next call to
 316      * NextRawTok().
 317      */
 318     Tok = TOK_SEP;
 319 }
 320
 321
 322
 323 static void DoneCharSource (void)
 324 /* Close the top level character source */
 325 {
 326     CharSource* S;
 327
 328     /* First, call the type specific function */
 329     Source->Func->Done (Source);
 330
 331     /* Restore the old token */
 332     Tok = Source->Tok;
 333     C   = Source->C;
 334
 335     /* Remember the last stacked input source */
 336     S = Source->Next;
 337
 338     /* Delete the top level one ... */
 339     xfree (Source);
 340
 341     /* ... and use the one before */
 342     Source = S;
 343 }
 344
 345
 346
 347 /*****************************************************************************/
 348 /*                            InputFile functions                            */
 349 /*****************************************************************************/
 350
 351
 352
 353 static void IFMarkStart (CharSource* S)
 354 /* Mark the start of the next token */
 355 {
 356     CurPos = S->V.File.Pos;
 357 }
 358
 359
 360
 361 static void IFNextChar (CharSource* S)
 362 /* Read the next character from the input file */
 363 {
 364     /* Check for end of line, read the next line if needed */
 365     while (S->V.File.Line [S->V.File.Pos.Col] == '\0') {
 366
 367         unsigned Len, Removed;
 368
 369         /* End of current line reached, read next line */
 370         if (fgets (S->V.File.Line, sizeof (S->V.File.Line), S->V.File.F) == 0) {
 371             /* End of file. Add an empty line to the listing. This is a
 372              * small hack needed to keep the PC output in sync.
 373              */
 374             NewListingLine ("", S->V.File.Pos.Name, FCount);
 375             C = EOF;
 376             return;
 377         }
 378
 379         /* For better handling of files with unusual line endings (DOS
 380          * files that are accidently translated on Unix for example),
 381          * first remove all whitespace at the end, then add a single
 382          * newline.
 383          */
 384         Len = strlen (S->V.File.Line);
 385         Removed = 0;
 386         while (Len > 0 && IsSpace (S->V.File.Line[Len-1])) {
 387             ++Removed;
 388             --Len;
 389         }
 390         if (Removed) {
 391             S->V.File.Line[Len+0] = '\n';
 392             S->V.File.Line[Len+1] = '\0';
 393         }
 394
 395         /* One more line */
 396         S->V.File.Pos.Line++;
 397         S->V.File.Pos.Col = 0;
 398
 399         /* Remember the new line for the listing */
 400         NewListingLine (S->V.File.Line, S->V.File.Pos.Name, FCount);
 401
 402     }
 403
 404     /* Return the next character from the file */
 405     C = S->V.File.Line [S->V.File.Pos.Col++];
 406 }
 407
 408
 409
 410 void IFDone (CharSource* S)
 411 /* Close the current input file */
 412 {
 413     /* We're at the end of an include file. Check if we have any
 414      * open .IFs, or any open token lists in this file. This
 415      * enforcement is artificial, using conditionals that start
 416      * in one file and end in another are uncommon, and don't
 417      * allowing these things will help finding errors.
 418      */
 419     CheckOpenIfs ();
 420
 421     /* Close the input file and decrement the file count. We will ignore
 422      * errors here, since we were just reading from the file.
 423      */
 424     (void) fclose (S->V.File.F);
 425     --FCount;
 426 }
 427
 428
 429
 430 /* Set of input file handling functions */
 431 static const CharSourceFunctions IFFunc = {
 432     IFMarkStart,
 433     IFNextChar,
 434     IFDone
 435 };
 436
 437
 438
 439 int NewInputFile (const char* Name)
 440 /* Open a new input file. Returns true if the file could be successfully opened
 441  * and false otherwise.
 442  */
 443 {
 444     int RetCode = 0;            /* Return code. Assume an error. */
 445     char* PathName = 0;
 446
 447     /* First try to open the file */
 448     FILE* F = fopen (Name, "r");
 449     if (F == 0) {
 450
 451         /* Error (fatal error if this is the main file) */
 452         if (FCount == 0) {
 453             Fatal ("Cannot open input file `%s': %s", Name, strerror (errno));
 454         }
 455
 456         /* We are on include level. Search for the file in the include
 457          * directories.
 458          */
 459         PathName = FindInclude (Name, INC_STD);
 460         if (PathName == 0 || (F = fopen (PathName, "r")) == 0) {
 461             /* Not found or cannot open, print an error and bail out */
 462             Error ("Cannot open include file `%s': %s", Name, strerror (errno));
 463             goto ExitPoint;
 464         }
 465
 466         /* Use the path name from now on */
 467         Name = PathName;
 468     }
 469
 470     /* check again if we do now have an open file */
 471     if (F != 0) {
 472
 473         StrBuf          NameBuf;
 474         unsigned        FileIdx;
 475         CharSource*     S;
 476
 477         /* Stat the file and remember the values. There a race condition here,
 478          * since we cannot use fileno() (non standard identifier in standard
 479          * header file), and therefore not fstat. When using stat with the
 480          * file name, there's a risk that the file was deleted and recreated
 481          * while it was open. Since mtime and size are only used to check
 482          * if a file has changed in the debugger, we will ignore this problem
 483          * here.
 484          */
 485         struct stat Buf;
 486         if (stat (Name, &Buf) != 0) {
 487             Fatal ("Cannot stat input file `%s': %s", Name, strerror (errno));
 488         }
 489
 490         /* Add the file to the input file table and remember the index */
 491         FileIdx = AddFile (SB_InitFromString (&NameBuf, Name),
 492                            (FCount == 0)? FT_MAIN : FT_INCLUDE,
 493                            Buf.st_size, Buf.st_mtime);
 494
 495         /* Create a new input source variable and initialize it */
 496         S                   = xmalloc (sizeof (*S));
 497         S->Func             = &IFFunc;
 498         S->V.File.F         = F;
 499         S->V.File.Pos.Line  = 0;
 500         S->V.File.Pos.Col   = 0;
 501         S->V.File.Pos.Name  = FileIdx;
 502         S->V.File.Line[0]   = '\0';
 503
 504         /* Count active input files */
 505         ++FCount;
 506
 507         /* Use this input source */
 508         UseCharSource (S);
 509     }
 510
 511     /* File successfully opened */
 512     RetCode = 1;
 513
 514 ExitPoint:
 515     /* Free an allocated name buffer */
 516     xfree (PathName);
 517
 518     /* Return the success code */
 519     return RetCode;
 520 }
 521
 522
 523
 524 /*****************************************************************************/
 525 /*                            InputData functions                            */
 526 /*****************************************************************************/
 527
 528
 529
 530 static void IDMarkStart (CharSource* S attribute ((unused)))
 531 /* Mark the start of the next token */
 532 {
 533     /* Nothing to do here */
 534 }
 535
 536
 537
 538 static void IDNextChar (CharSource* S)
 539 /* Read the next character from the input text */
 540 {
 541     C = *S->V.Data.Pos++;
 542     if (C == '\0') {
 543         /* End of input data */
 544         --S->V.Data.Pos;
 545         C = EOF;
 546     }
 547 }
 548
 549
 550
 551 void IDDone (CharSource* S)
 552 /* Close the current input data */
 553 {
 554     /* Cleanup the current stuff */
 555     if (S->V.Data.Malloced) {
 556         xfree (S->V.Data.Text);
 557     }
 558 }
 559
 560
 561
 562 /* Set of input data handling functions */
 563 static const CharSourceFunctions IDFunc = {
 564     IDMarkStart,
 565     IDNextChar,
 566     IDDone
 567 };
 568
 569
 570
 571 void NewInputData (char* Text, int Malloced)
 572 /* Add a chunk of input data to the input stream */
 573 {
 574     CharSource* S;
 575
 576     /* Create a new input source variable and initialize it */
 577     S                   = xmalloc (sizeof (*S));
 578     S->Func             = &IDFunc;
 579     S->V.Data.Text      = Text;
 580     S->V.Data.Pos       = Text;
 581     S->V.Data.Malloced  = Malloced;
 582
 583     /* Use this input source */
 584     UseCharSource (S);
 585 }
 586
 587
 588
 589 /*****************************************************************************/
 590 /*                    Character classification functions                     */
 591 /*****************************************************************************/
 592
 593
 594
 595 int IsIdChar (int C)
 596 /* Return true if the character is a valid character for an identifier */
 597 {
 598     return IsAlNum (C)                  ||
 599            (C == '_')                   ||
 600            (C == '@' && AtInIdents)     ||
 601            (C == '$' && DollarInIdents);
 602 }
 603
 604
 605
 606 int IsIdStart (int C)
 607 /* Return true if the character may start an identifier */
 608 {
 609     return IsAlpha (C) || C == '_';
 610 }
 611
 612
 613
 614 /*****************************************************************************/
 615 /*                                   Code                                    */
 616 /*****************************************************************************/
 617
 618
 619
 620 static unsigned DigitVal (unsigned char C)
 621 /* Convert a digit into it's numerical representation */
 622 {
 623     if (IsDigit (C)) {
 624         return C - '0';
 625     } else {
 626         return toupper (C) - 'A' + 10;
 627     }
 628 }
 629
 630
 631
 632 static void NextChar (void)
 633 /* Read the next character from the input file */
 634 {
 635     Source->Func->NextChar (Source);
 636 }
 637
 638
 639
 640 void LocaseSVal (void)
 641 /* Make SVal lower case */
 642 {
 643     SB_ToLower (&SVal);
 644 }
 645
 646
 647
 648 void UpcaseSVal (void)
 649 /* Make SVal upper case */
 650 {
 651     SB_ToUpper (&SVal);
 652 }
 653
 654
 655
 656 static int CmpDotKeyword (const void* K1, const void* K2)
 657 /* Compare function for the dot keyword search */
 658 {
 659     return strcmp (((struct DotKeyword*)K1)->Key, ((struct DotKeyword*)K2)->Key);
 660 }
 661
 662
 663
 664 static unsigned char FindDotKeyword (void)
 665 /* Find the dot keyword in SVal. Return the corresponding token if found,
 666  * return TOK_NONE if not found.
 667  */
 668 {
 669     struct DotKeyword K;
 670     struct DotKeyword* R;
 671
 672     /* Initialize K */
 673     K.Key = SB_GetConstBuf (&SVal);
 674     K.Tok = 0;
 675
 676     /* If we aren't in ignore case mode, we have to uppercase the keyword */
 677     if (!IgnoreCase) {
 678         UpcaseSVal ();
 679     }
 680
 681     /* Search for the keyword */
 682     R = bsearch (&K, DotKeywords, sizeof (DotKeywords) / sizeof (DotKeywords [0]),
 683                  sizeof (DotKeywords [0]), CmpDotKeyword);
 684     if (R != 0) {
 685         return R->Tok;
 686     } else {
 687         return TOK_NONE;
 688     }
 689 }
 690
 691
 692
 693 static void ReadIdent (void)
 694 /* Read an identifier from the current input position into Ident. Filling SVal
 695  * starts at the current position with the next character in C. It is assumed
 696  * that any characters already filled in are ok, and the character in C is
 697  * checked.
 698  */
 699 {
 700     /* Read the identifier */
 701     do {
 702         SB_AppendChar (&SVal, C);
 703         NextChar ();
 704     } while (IsIdChar (C));
 705     SB_Terminate (&SVal);
 706
 707     /* If we should ignore case, convert the identifier to upper case */
 708     if (IgnoreCase) {
 709         UpcaseSVal ();
 710     }
 711 }
 712
 713
 714
 715 static void ReadStringConst (int StringTerm)
 716 /* Read a string constant into SVal. */
 717 {
 718     /* Skip the leading string terminator */
 719     NextChar ();
 720
 721     /* Read the string */
 722     while (1) {
 723         if (C == StringTerm) {
 724             break;
 725         }
 726         if (C == '\n' || C == EOF) {
 727             Error ("Newline in string constant");
 728             break;
 729         }
 730
 731         /* Append the char to the string */
 732         SB_AppendChar (&SVal, C);
 733
 734         /* Skip the character */
 735         NextChar ();
 736     }
 737
 738     /* Skip the trailing terminator */
 739     NextChar ();
 740
 741     /* Terminate the string */
 742     SB_Terminate (&SVal);
 743 }
 744
 745
 746
 747 static int Sweet16Reg (const StrBuf* Id)
 748 /* Check if the given identifier is a sweet16 register. Return -1 if this is
 749  * not the case, return the register number otherwise.
 750  */
 751 {
 752     unsigned RegNum;
 753     char Check;
 754
 755     if (SB_GetLen (Id) < 2) {
 756         return -1;
 757     }
 758     if (toupper (SB_AtUnchecked (Id, 0)) != 'R') {
 759         return -1;
 760     }
 761     if (!IsDigit (SB_AtUnchecked (Id, 1))) {
 762         return -1;
 763     }
 764
 765     if (sscanf (SB_GetConstBuf (Id)+1, "%u%c", &RegNum, &Check) != 1 || RegNum > 15) {
 766         /* Invalid register */
 767         return -1;
 768     }
 769
 770     /* The register number is valid */
 771     return (int) RegNum;
 772 }
 773
 774
 775
 776 void NextRawTok (void)
 777 /* Read the next raw token from the input stream */
 778 {
 779     /* If we've a forced end of assembly, don't read further */
 780     if (ForcedEnd) {
 781         Tok = TOK_EOF;
 782         return;
 783     }
 784
 785 Restart:
 786     /* Check if we have tokens from another input source */
 787     if (InputFromStack ()) {
 788         return;
 789     }
 790
 791 Again:
 792     /* Skip whitespace, remember if we had some */
 793     if ((WS = IsBlank (C)) != 0) {
 794         do {
 795             NextChar ();
 796         } while (IsBlank (C));
 797     }
 798
 799     /* Mark the file position of the next token */
 800     Source->Func->MarkStart (Source);
 801
 802     /* Clear the string attribute */
 803     SB_Clear (&SVal);
 804
 805     /* Hex number or PC symbol? */
 806     if (C == '$') {
 807         NextChar ();
 808
 809         /* Hex digit must follow or DollarIsPC must be enabled */
 810         if (!IsXDigit (C)) {
 811             if (DollarIsPC) {
 812                 Tok = TOK_PC;
 813                 return;
 814             } else {
 815                 Error ("Hexadecimal digit expected");
 816             }
 817         }
 818
 819         /* Read the number */
 820         IVal = 0;
 821         while (IsXDigit (C)) {
 822             if (IVal & 0xF0000000) {
 823                 Error ("Overflow in hexadecimal number");
 824                 IVal = 0;
 825             }
 826             IVal = (IVal << 4) + DigitVal (C);
 827             NextChar ();
 828         }
 829
 830         /* This is an integer constant */
 831         Tok = TOK_INTCON;
 832         return;
 833     }
 834
 835     /* Binary number? */
 836     if (C == '%') {
 837         NextChar ();
 838
 839         /* 0 or 1 must follow */
 840         if (!IsBDigit (C)) {
 841             Error ("Binary digit expected");
 842         }
 843
 844         /* Read the number */
 845         IVal = 0;
 846         while (IsBDigit (C)) {
 847             if (IVal & 0x80000000) {
 848                 Error ("Overflow in binary number");
 849                 IVal = 0;
 850             }
 851             IVal = (IVal << 1) + DigitVal (C);
 852             NextChar ();
 853         }
 854
 855         /* This is an integer constant */
 856         Tok = TOK_INTCON;
 857         return;
 858     }
 859
 860     /* Number? */
 861     if (IsDigit (C)) {
 862
 863         char Buf[16];
 864         unsigned Digits;
 865         unsigned Base;
 866         unsigned I;
 867         long     Max;
 868         unsigned DVal;
 869
 870         /* Ignore leading zeros */
 871         while (C == '0') {
 872             NextChar ();
 873         }
 874
 875         /* Read the number into Buf counting the digits */
 876         Digits = 0;
 877         while (IsXDigit (C)) {
 878
 879             /* Buf is big enough to allow any decimal and hex number to
 880              * overflow, so ignore excess digits here, they will be detected
 881              * when we convert the value.
 882              */
 883             if (Digits < sizeof (Buf)) {
 884                 Buf[Digits++] = C;
 885             }
 886
 887             NextChar ();
 888         }
 889
 890         /* Allow zilog/intel style hex numbers with a 'h' suffix */
 891         if (C == 'h' || C == 'H') {
 892             NextChar ();
 893             Base = 16;
 894             Max  = 0xFFFFFFFFUL / 16;
 895         } else {
 896             Base = 10;
 897             Max  = 0xFFFFFFFFUL / 10;
 898         }
 899
 900         /* Convert the number using the given base */
 901         IVal = 0;
 902         for (I = 0; I < Digits; ++I) {
 903             if (IVal > Max) {
 904                 Error ("Number out of range");
 905                 IVal = 0;
 906                 break;
 907             }
 908             DVal = DigitVal (Buf[I]);
 909             if (DVal > Base) {
 910                 Error ("Invalid digits in number");
 911                 IVal = 0;
 912                 break;
 913             }
 914             IVal = (IVal * Base) + DVal;
 915         }
 916
 917         /* This is an integer constant */
 918         Tok = TOK_INTCON;
 919         return;
 920     }
 921
 922     /* Control command? */
 923     if (C == '.') {
 924
 925         /* Remember and skip the dot */
 926         NextChar ();
 927
 928         /* Check if it's just a dot */
 929         if (!IsIdStart (C)) {
 930
 931             /* Just a dot */
 932             Tok = TOK_DOT;
 933
 934         } else {
 935
 936             /* Read the remainder of the identifier */
 937             SB_AppendChar (&SVal, '.');
 938             ReadIdent ();
 939
 940             /* Dot keyword, search for it */
 941             Tok = FindDotKeyword ();
 942             if (Tok == TOK_NONE) {
 943
 944                 /* Not found */
 945                 if (!LeadingDotInIdents) {
 946                     /* Invalid pseudo instruction */
 947                     Error ("`%m%p' is not a recognized control command", &SVal);
 948                     goto Again;
 949                 }
 950
 951                 /* An identifier with a dot. Check if it's a define style
 952                  * macro.
 953                  */
 954                 if (IsDefine (&SVal)) {
 955                     /* This is a define style macro - expand it */
 956                     MacExpandStart ();
 957                     goto Restart;
 958                 }
 959
 960                 /* Just an identifier with a dot */
 961                 Tok = TOK_IDENT;
 962             }
 963
 964         }
 965         return;
 966     }
 967
 968     /* Indirect op for sweet16 cpu. Must check this before checking for local
 969      * symbols, because these may also use the '@' symbol.
 970      */
 971     if (CPU == CPU_SWEET16 && C == '@') {
 972         NextChar ();
 973         Tok = TOK_AT;
 974         return;
 975     }
 976
 977     /* Local symbol? */
 978     if (C == LocalStart) {
 979
 980         /* Read the identifier. */
 981         ReadIdent ();
 982
 983         /* Start character alone is not enough */
 984         if (SB_GetLen (&SVal) == 1) {
 985             Error ("Invalid cheap local symbol");
 986             goto Again;
 987         }
 988
 989         /* A local identifier */
 990         Tok = TOK_LOCAL_IDENT;
 991         return;
 992     }
 993
 994
 995     /* Identifier or keyword? */
 996     if (IsIdStart (C)) {
 997
 998         /* Read the identifier */
 999         ReadIdent ();
1000
1001         /* Check for special names. Bail out if we have identified the type of
1002          * the token. Go on if the token is an identifier.
1003          */
1004         if (SB_GetLen (&SVal) == 1) {
1005             switch (toupper (SB_AtUnchecked (&SVal, 0))) {
1006
1007                 case 'A':
1008                     if (C == ':') {
1009                         NextChar ();
1010                         Tok = TOK_OVERRIDE_ABS;
1011                     } else {
1012                         Tok = TOK_A;
1013                     }
1014                     return;
1015
1016                 case 'F':
1017                     if (C == ':') {
1018                         NextChar ();
1019                         Tok = TOK_OVERRIDE_FAR;
1020                         return;
1021                     }
1022                     break;
1023
1024                 case 'S':
1025                     if (CPU == CPU_65816) {
1026                         Tok = TOK_S;
1027                         return;
1028                     }
1029                     break;
1030
1031                 case 'X':
1032                     Tok = TOK_X;
1033                     return;
1034
1035                 case 'Y':
1036                     Tok = TOK_Y;
1037                     return;
1038
1039                 case 'Z':
1040                     if (C == ':') {
1041                         NextChar ();
1042                         Tok = TOK_OVERRIDE_ZP;
1043                         return;
1044                     }
1045                     break;
1046
1047                 default:
1048                     break;
1049             }
1050
1051         } else if (CPU == CPU_SWEET16 && (IVal = Sweet16Reg (&SVal)) >= 0) {
1052
1053             /* A sweet16 register number in sweet16 mode */
1054             Tok = TOK_REG;
1055             return;
1056
1057         }
1058
1059         /* Check for define style macro */
1060         if (IsDefine (&SVal)) {
1061             /* Macro - expand it */
1062             MacExpandStart ();
1063             goto Restart;
1064         } else {
1065             /* An identifier */
1066             Tok = TOK_IDENT;
1067         }
1068         return;
1069     }
1070
1071     /* Ok, let's do the switch */
1072 CharAgain:
1073     switch (C) {
1074
1075         case '+':
1076             NextChar ();
1077             Tok = TOK_PLUS;
1078             return;
1079
1080         case '-':
1081             NextChar ();
1082             Tok = TOK_MINUS;
1083             return;
1084
1085         case '/':
1086             NextChar ();
1087             if (C != '*') {
1088                 Tok = TOK_DIV;
1089             } else if (CComments) {
1090                 /* Remember the position, then skip the '*' */
1091                 FilePos Pos = CurPos;
1092                 NextChar ();
1093                 do {
1094                     while (C !=  '*') {
1095                         if (C == EOF) {
1096                             PError (&Pos, "Unterminated comment");
1097                             goto CharAgain;
1098                         }
1099                         NextChar ();
1100                     }
1101                     NextChar ();
1102                 } while (C != '/');
1103                 NextChar ();
1104                 goto Again;
1105             }
1106             return;
1107
1108         case '*':
1109             NextChar ();
1110             Tok = TOK_MUL;
1111             return;
1112
1113         case '^':
1114             NextChar ();
1115             Tok = TOK_XOR;
1116             return;
1117
1118         case '&':
1119             NextChar ();
1120             if (C == '&') {
1121                 NextChar ();
1122                 Tok = TOK_BOOLAND;
1123             } else {
1124                 Tok = TOK_AND;
1125             }
1126             return;
1127
1128         case '|':
1129             NextChar ();
1130             if (C == '|') {
1131                 NextChar ();
1132                 Tok = TOK_BOOLOR;
1133             } else {
1134                 Tok = TOK_OR;
1135             }
1136             return;
1137
1138         case ':':
1139             NextChar ();
1140             switch (C) {
1141
1142                 case ':':
1143                     NextChar ();
1144                     Tok = TOK_NAMESPACE;
1145                     break;
1146
1147                 case '-':
1148                     IVal = 0;
1149                     do {
1150                         --IVal;
1151                         NextChar ();
1152                     } while (C == '-');
1153                     Tok = TOK_ULABEL;
1154                     break;
1155
1156                 case '+':
1157                     IVal = 0;
1158                     do {
1159                         ++IVal;
1160                         NextChar ();
1161                     } while (C == '+');
1162                     Tok = TOK_ULABEL;
1163                     break;
1164
1165                 case '=':
1166                     NextChar ();
1167                     Tok = TOK_ASSIGN;
1168                     break;
1169
1170                 default:
1171                     Tok = TOK_COLON;
1172                     break;
1173             }
1174             return;
1175
1176         case ',':
1177             NextChar ();
1178             Tok = TOK_COMMA;
1179             return;
1180
1181         case ';':
1182             NextChar ();
1183             while (C != '\n' && C != EOF) {
1184                 NextChar ();
1185             }
1186             goto CharAgain;
1187
1188         case '#':
1189             NextChar ();
1190             Tok = TOK_HASH;
1191             return;
1192
1193         case '(':
1194             NextChar ();
1195             Tok = TOK_LPAREN;
1196             return;
1197
1198         case ')':
1199             NextChar ();
1200             Tok = TOK_RPAREN;
1201             return;
1202
1203         case '[':
1204             NextChar ();
1205             Tok = TOK_LBRACK;
1206             return;
1207
1208         case ']':
1209             NextChar ();
1210             Tok = TOK_RBRACK;
1211             return;
1212
1213         case '{':
1214             NextChar ();
1215             Tok = TOK_LCURLY;
1216             return;
1217
1218         case '}':
1219             NextChar ();
1220             Tok = TOK_RCURLY;
1221             return;
1222
1223         case '<':
1224             NextChar ();
1225             if (C == '=') {
1226                 NextChar ();
1227                 Tok = TOK_LE;
1228             } else if (C == '<') {
1229                 NextChar ();
1230                 Tok = TOK_SHL;
1231             } else if (C == '>') {
1232                 NextChar ();
1233                 Tok = TOK_NE;
1234             } else {
1235                 Tok = TOK_LT;
1236             }
1237             return;
1238
1239         case '=':
1240             NextChar ();
1241             Tok = TOK_EQ;
1242             return;
1243
1244         case '!':
1245             NextChar ();
1246             Tok = TOK_BOOLNOT;
1247             return;
1248
1249         case '>':
1250             NextChar ();
1251             if (C == '=') {
1252                 NextChar ();
1253                 Tok = TOK_GE;
1254             } else if (C == '>') {
1255                 NextChar ();
1256                 Tok = TOK_SHR;
1257             } else {
1258                 Tok = TOK_GT;
1259             }
1260             return;
1261
1262         case '~':
1263             NextChar ();
1264             Tok = TOK_NOT;
1265             return;
1266
1267         case '\'':
1268             /* Hack: If we allow ' as terminating character for strings, read
1269              * the following stuff as a string, and check for a one character
1270              * string later.
1271              */
1272             if (LooseStringTerm) {
1273                 ReadStringConst ('\'');
1274                 if (SB_GetLen (&SVal) == 1) {
1275                     IVal = SB_AtUnchecked (&SVal, 0);
1276                     Tok = TOK_CHARCON;
1277                 } else {
1278                     Tok = TOK_STRCON;
1279                 }
1280             } else {
1281                 /* Always a character constant */
1282                 NextChar ();
1283                 if (C == EOF || IsControl (C)) {
1284                     Error ("Illegal character constant");
1285                     goto CharAgain;
1286                 }
1287                 IVal = C;
1288                 Tok = TOK_CHARCON;
1289                 NextChar ();
1290                 if (C != '\'') {
1291                     if (!MissingCharTerm) {
1292                         Error ("Illegal character constant");
1293                     }
1294                 } else {
1295                     NextChar ();
1296                 }
1297             }
1298             return;
1299
1300         case '\"':
1301             ReadStringConst ('\"');
1302             Tok = TOK_STRCON;
1303             return;
1304
1305         case '\\':
1306             /* Line continuation? */
1307             if (LineCont) {
1308                 NextChar ();
1309                 if (C == '\n') {
1310                     /* Handle as white space */
1311                     NextChar ();
1312                     C = ' ';
1313                     goto Again;
1314                 }
1315             }
1316             break;
1317
1318         case '\n':
1319             NextChar ();
1320             Tok = TOK_SEP;
1321             return;
1322
1323         case EOF:
1324             CheckInputStack ();
1325             /* In case of the main file, do not close it, but return EOF. */
1326             if (Source && Source->Next) {
1327                 DoneCharSource ();
1328                 goto Again;
1329             } else {
1330                 Tok = TOK_EOF;
1331             }
1332             return;
1333     }
1334
1335     /* If we go here, we could not identify the current character. Skip it
1336      * and try again.
1337      */
1338     Error ("Invalid input character: 0x%02X", C & 0xFF);
1339     NextChar ();
1340     goto Again;
1341 }
1342
1343
1344
1345 int GetSubKey (const char** Keys, unsigned Count)
1346 /* Search for a subkey in a table of keywords. The current token must be an
1347  * identifier and all keys must be in upper case. The identifier will be
1348  * uppercased in the process. The function returns the index of the keyword,
1349  * or -1 if the keyword was not found.
1350  */
1351 {
1352     unsigned I;
1353
1354     /* Must have an identifier */
1355     PRECONDITION (Tok == TOK_IDENT);
1356
1357     /* If we aren't in ignore case mode, we have to uppercase the identifier */
1358     if (!IgnoreCase) {
1359         UpcaseSVal ();
1360     }
1361
1362     /* Do a linear search (a binary search is not worth the effort) */
1363     for (I = 0; I < Count; ++I) {
1364         if (SB_CompareStr (&SVal, Keys [I]) == 0) {
1365             /* Found it */
1366             return I;
1367         }
1368     }
1369
1370     /* Not found */
1371     return -1;
1372 }
1373
1374
1375
1376 unsigned char ParseAddrSize (void)
1377 /* Check if the next token is a keyword that denotes an address size specifier.
1378  * If so, return the corresponding address size constant, otherwise output an
1379  * error message and return ADDR_SIZE_DEFAULT.
1380  */
1381 {
1382     unsigned char AddrSize;
1383
1384     /* Check for an identifier */
1385     if (Tok != TOK_IDENT) {
1386         Error ("Address size specifier expected");
1387         return ADDR_SIZE_DEFAULT;
1388     }
1389
1390     /* Convert the attribute */
1391     AddrSize = AddrSizeFromStr (SB_GetConstBuf (&SVal));
1392     if (AddrSize == ADDR_SIZE_INVALID) {
1393         Error ("Address size specifier expected");
1394         AddrSize = ADDR_SIZE_DEFAULT;
1395     }
1396
1397     /* Done */
1398     return AddrSize;
1399 }
1400
1401
1402
1403 void InitScanner (const char* InFile)
1404 /* Initialize the scanner, open the given input file */
1405 {
1406     /* Open the input file */
1407     NewInputFile (InFile);
1408 }
1409
1410
1411
1412 void DoneScanner (void)
1413 /* Release scanner resources */
1414 {
1415     DoneCharSource ();
1416 }
1417
1418
1419