git.sur5r.net Git - cc65/blob - src/ca65/scanner.c

   1 /*****************************************************************************/
   2 /*                                                                           */
   3 /*                                 scanner.c                                 */
   4 /*                                                                           */
   5 /*                  The scanner for the ca65 macroassembler                  */
   6 /*                                                                           */
   7 /*                                                                           */
   8 /*                                                                           */
   9 /* (C) 1998-2005 Ullrich von Bassewitz                                       */
  10 /*               Römerstraße 52                                              */
  11 /*               D-70794 Filderstadt                                         */
  12 /* EMail:        uz@cc65.org                                                 */
  13 /*                                                                           */
  14 /*                                                                           */
  15 /* This software is provided 'as-is', without any expressed or implied       */
  16 /* warranty.  In no event will the authors be held liable for any damages    */
  17 /* arising from the use of this software.                                    */
  18 /*                                                                           */
  19 /* Permission is granted to anyone to use this software for any purpose,     */
  20 /* including commercial applications, and to alter it and redistribute it    */
  21 /* freely, subject to the following restrictions:                            */
  22 /*                                                                           */
  23 /* 1. The origin of this software must not be misrepresented; you must not   */
  24 /*    claim that you wrote the original software. If you use this software   */
  25 /*    in a product, an acknowledgment in the product documentation would be  */
  26 /*    appreciated but is not required.                                       */
  27 /* 2. Altered source versions must be plainly marked as such, and must not   */
  28 /*    be misrepresented as being the original software.                      */
  29 /* 3. This notice may not be removed or altered from any source              */
  30 /*    distribution.                                                          */
  31 /*                                                                           */
  32 /*****************************************************************************/
  33
  34
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <ctype.h>
  40 #include <errno.h>
  41 #include <sys/types.h>          /* EMX needs this */
  42 #include <sys/stat.h>
  43
  44 /* common */
  45 #include "addrsize.h"
  46 #include "chartype.h"
  47 #include "check.h"
  48 #include "fname.h"
  49 #include "xmalloc.h"
  50
  51 /* ca65 */
  52 #include "condasm.h"
  53 #include "error.h"
  54 #include "filetab.h"
  55 #include "global.h"
  56 #include "incpath.h"
  57 #include "instr.h"
  58 #include "istack.h"
  59 #include "listing.h"
  60 #include "macro.h"
  61 #include "toklist.h"
  62 #include "scanner.h"
  63
  64
  65
  66 /*****************************************************************************/
  67 /*                                   Data                                    */
  68 /*****************************************************************************/
  69
  70
  71
  72 enum Token Tok = TOK_NONE;              /* Current token */
  73 int WS;                                 /* Flag: Whitespace before token */
  74 long IVal;                              /* Integer token attribute */
  75 char SVal[MAX_STR_LEN+1];               /* String token attribute */
  76
  77 FilePos CurPos = { 0, 0, 0 };           /* Name and position in current file */
  78
  79
  80
  81 /* Struct to handle include files. */
  82 typedef struct InputFile InputFile;
  83 struct InputFile {
  84     FILE*           F;                  /* Input file descriptor */
  85     FilePos         Pos;                /* Position in file */
  86     enum Token      Tok;                /* Last token */
  87     int             C;                  /* Last character */
  88     char            Line[256];          /* The current input line */
  89     InputFile*      Next;               /* Linked list of input files */
  90 };
  91
  92 /* Struct to handle textual input data */
  93 typedef struct InputData InputData;
  94 struct InputData {
  95     char*           Data;               /* Pointer to the data */
  96     const char*     Pos;                /* Pointer to current position */
  97     int             Malloced;           /* Memory was malloced */
  98     enum Token      Tok;                /* Last token */
  99     int             C;                  /* Last character */
 100     InputData*      Next;               /* Linked list of input data */
 101 };
 102
 103 /* Current input variables */
 104 static InputFile* IFile         = 0;    /* Current input file */
 105 static InputData* IData         = 0;    /* Current input memory data */
 106 static unsigned   ICount        = 0;    /* Count of input files */
 107 static int        C             = 0;    /* Current input character */
 108
 109 /* Force end of assembly */
 110 int               ForcedEnd     = 0;
 111
 112 /* List of dot keywords with the corresponding tokens */
 113 struct DotKeyword {
 114     const char* Key;                    /* MUST be first field */
 115     enum Token  Tok;
 116 } DotKeywords [] = {
 117     { ".A16",           TOK_A16         },
 118     { ".A8",            TOK_A8          },
 119     { ".ADDR",          TOK_ADDR        },
 120     { ".ALIGN",         TOK_ALIGN       },
 121     { ".AND",           TOK_BOOLAND     },
 122     { ".ASCIIZ",        TOK_ASCIIZ      },
 123     { ".ASSERT",        TOK_ASSERT      },
 124     { ".AUTOIMPORT",    TOK_AUTOIMPORT  },
 125     { ".BANKBYTE",      TOK_BANKBYTE    },
 126     { ".BITAND",        TOK_AND         },
 127     { ".BITNOT",        TOK_NOT         },
 128     { ".BITOR",         TOK_OR          },
 129     { ".BITXOR",        TOK_XOR         },
 130     { ".BLANK",         TOK_BLANK       },
 131     { ".BSS",           TOK_BSS         },
 132     { ".BYT",           TOK_BYTE        },
 133     { ".BYTE",          TOK_BYTE        },
 134     { ".CASE",          TOK_CASE        },
 135     { ".CHARMAP",       TOK_CHARMAP     },
 136     { ".CODE",          TOK_CODE        },
 137     { ".CONCAT",        TOK_CONCAT      },
 138     { ".CONDES",        TOK_CONDES      },
 139     { ".CONST",         TOK_CONST       },
 140     { ".CONSTRUCTOR",   TOK_CONSTRUCTOR },
 141     { ".CPU",           TOK_CPU         },
 142     { ".DATA",          TOK_DATA        },
 143     { ".DBG",           TOK_DBG         },
 144     { ".DBYT",          TOK_DBYT        },
 145     { ".DEBUGINFO",     TOK_DEBUGINFO   },
 146     { ".DEF",           TOK_DEFINED     },
 147     { ".DEFINE",        TOK_DEFINE      },
 148     { ".DEFINED",       TOK_DEFINED     },
 149     { ".DESTRUCTOR",    TOK_DESTRUCTOR  },
 150     { ".DWORD",         TOK_DWORD       },
 151     { ".ELSE",          TOK_ELSE        },
 152     { ".ELSEIF",        TOK_ELSEIF      },
 153     { ".END",           TOK_END         },
 154     { ".ENDENUM",       TOK_ENDENUM     },
 155     { ".ENDIF",         TOK_ENDIF       },
 156     { ".ENDMAC",        TOK_ENDMACRO    },
 157     { ".ENDMACRO",      TOK_ENDMACRO    },
 158     { ".ENDPROC",       TOK_ENDPROC     },
 159     { ".ENDREP",        TOK_ENDREP      },
 160     { ".ENDREPEAT",     TOK_ENDREP      },
 161     { ".ENDSCOPE",      TOK_ENDSCOPE    },
 162     { ".ENDSTRUCT",     TOK_ENDSTRUCT   },
 163     { ".ENDUNION",      TOK_ENDUNION    },
 164     { ".ENUM",          TOK_ENUM        },
 165     { ".ERROR",         TOK_ERROR       },
 166     { ".EXITMAC",       TOK_EXITMACRO   },
 167     { ".EXITMACRO",     TOK_EXITMACRO   },
 168     { ".EXPORT",        TOK_EXPORT      },
 169     { ".EXPORTZP",      TOK_EXPORTZP    },
 170     { ".FARADDR",       TOK_FARADDR     },
 171     { ".FEATURE",       TOK_FEATURE     },
 172     { ".FILEOPT",       TOK_FILEOPT     },
 173     { ".FOPT",          TOK_FILEOPT     },
 174     { ".FORCEIMPORT",   TOK_FORCEIMPORT },
 175     { ".FORCEWORD",     TOK_FORCEWORD   },
 176     { ".GLOBAL",        TOK_GLOBAL      },
 177     { ".GLOBALZP",      TOK_GLOBALZP    },
 178     { ".HIBYTE",        TOK_HIBYTE      },
 179     { ".HIWORD",        TOK_HIWORD      },
 180     { ".I16",           TOK_I16         },
 181     { ".I8",            TOK_I8          },
 182     { ".IDENT",         TOK_MAKEIDENT   },
 183     { ".IF",            TOK_IF          },
 184     { ".IFBLANK",       TOK_IFBLANK     },
 185     { ".IFCONST",       TOK_IFCONST     },
 186     { ".IFDEF",         TOK_IFDEF       },
 187     { ".IFNBLANK",      TOK_IFNBLANK    },
 188     { ".IFNCONST",      TOK_IFNCONST    },
 189     { ".IFNDEF",        TOK_IFNDEF      },
 190     { ".IFNREF",        TOK_IFNREF      },
 191     { ".IFP02",         TOK_IFP02       },
 192     { ".IFP816",        TOK_IFP816      },
 193     { ".IFPC02",        TOK_IFPC02      },
 194     { ".IFPSC02",       TOK_IFPSC02     },
 195     { ".IFREF",         TOK_IFREF       },
 196     { ".IMPORT",        TOK_IMPORT      },
 197     { ".IMPORTZP",      TOK_IMPORTZP    },
 198     { ".INCBIN",        TOK_INCBIN      },
 199     { ".INCLUDE",       TOK_INCLUDE     },
 200     { ".INTERRUPTOR",   TOK_INTERRUPTOR },
 201     { ".LEFT",          TOK_LEFT        },
 202     { ".LINECONT",      TOK_LINECONT    },
 203     { ".LIST",          TOK_LIST        },
 204     { ".LISTBYTES",     TOK_LISTBYTES   },
 205     { ".LOBYTE",        TOK_LOBYTE      },
 206     { ".LOCAL",         TOK_LOCAL       },
 207     { ".LOCALCHAR",     TOK_LOCALCHAR   },
 208     { ".LOWORD",        TOK_LOWORD      },
 209     { ".MAC",           TOK_MACRO       },
 210     { ".MACPACK",       TOK_MACPACK     },
 211     { ".MACRO",         TOK_MACRO       },
 212     { ".MATCH",         TOK_MATCH       },
 213     { ".MID",           TOK_MID         },
 214     { ".MOD",           TOK_MOD         },
 215     { ".NOT",           TOK_BOOLNOT     },
 216     { ".NULL",          TOK_NULL        },
 217     { ".OR",            TOK_BOOLOR      },
 218     { ".ORG",           TOK_ORG         },
 219     { ".OUT",           TOK_OUT         },
 220     { ".P02",           TOK_P02         },
 221     { ".P816",          TOK_P816        },
 222     { ".PAGELEN",       TOK_PAGELENGTH  },
 223     { ".PAGELENGTH",    TOK_PAGELENGTH  },
 224     { ".PARAMCOUNT",    TOK_PARAMCOUNT  },
 225     { ".PC02",          TOK_PC02        },
 226     { ".POPSEG",        TOK_POPSEG      },
 227     { ".PROC",          TOK_PROC        },
 228     { ".PSC02",         TOK_PSC02       },
 229     { ".PUSHSEG",       TOK_PUSHSEG     },
 230     { ".REF",           TOK_REFERENCED  },
 231     { ".REFERENCED",    TOK_REFERENCED  },
 232     { ".RELOC",         TOK_RELOC       },
 233     { ".REPEAT",        TOK_REPEAT      },
 234     { ".RES",           TOK_RES         },
 235     { ".RIGHT",         TOK_RIGHT       },
 236     { ".RODATA",        TOK_RODATA      },
 237     { ".SCOPE",         TOK_SCOPE       },
 238     { ".SEGMENT",       TOK_SEGMENT     },
 239     { ".SET",           TOK_SET         },
 240     { ".SETCPU",        TOK_SETCPU      },
 241     { ".SHL",           TOK_SHL         },
 242     { ".SHR",           TOK_SHR         },
 243     { ".SIZEOF",        TOK_SIZEOF      },
 244     { ".SMART",         TOK_SMART       },
 245     { ".SPRINTF",       TOK_SPRINTF     },
 246     { ".STRAT",         TOK_STRAT       },
 247     { ".STRING",        TOK_STRING      },
 248     { ".STRLEN",        TOK_STRLEN      },
 249     { ".STRUCT",        TOK_STRUCT      },
 250     { ".SUNPLUS",       TOK_SUNPLUS     },
 251     { ".TAG",           TOK_TAG         },
 252     { ".TCOUNT",        TOK_TCOUNT      },
 253     { ".TIME",          TOK_TIME        },
 254     { ".UNION",         TOK_UNION       },
 255     { ".VERSION",       TOK_VERSION     },
 256     { ".WARNING",       TOK_WARNING     },
 257     { ".WORD",          TOK_WORD        },
 258     { ".XMATCH",        TOK_XMATCH      },
 259     { ".XOR",           TOK_BOOLXOR     },
 260     { ".ZEROPAGE",      TOK_ZEROPAGE    },
 261 };
 262
 263
 264
 265 /*****************************************************************************/
 266 /*                                 Forwards                                  */
 267 /*****************************************************************************/
 268
 269
 270
 271 static void NextChar (void);
 272 /* Read the next character from the input file */
 273
 274
 275
 276 /*****************************************************************************/
 277 /*                    Character classification functions                     */
 278 /*****************************************************************************/
 279
 280
 281
 282 int IsIdChar (int C)
 283 /* Return true if the character is a valid character for an identifier */
 284 {
 285     return IsAlNum (C)                  ||
 286            (C == '_')                   ||
 287            (C == '@' && AtInIdents)     ||
 288            (C == '$' && DollarInIdents);
 289 }
 290
 291
 292
 293 int IsIdStart (int C)
 294 /* Return true if the character may start an identifier */
 295 {
 296     return IsAlpha (C) || C == '_';
 297 }
 298
 299
 300
 301 /*****************************************************************************/
 302 /*                                   Code                                    */
 303 /*****************************************************************************/
 304
 305
 306
 307 void NewInputFile (const char* Name)
 308 /* Open a new input file */
 309 {
 310     InputFile* I;
 311     FILE* F;
 312
 313     /* First try to open the file */
 314     F = fopen (Name, "r");
 315     if (F == 0) {
 316
 317         char* PathName;
 318
 319         /* Error (fatal error if this is the main file) */
 320         if (ICount == 0) {
 321             Fatal ("Cannot open input file `%s': %s", Name, strerror (errno));
 322         }
 323
 324         /* We are on include level. Search for the file in the include
 325          * directories.
 326          */
 327         PathName = FindInclude (Name);
 328         if (PathName == 0 || (F = fopen (PathName, "r")) == 0) {
 329             /* Not found or cannot open, print an error and bail out */
 330             Error ("Cannot open include file `%s': %s", Name, strerror (errno));
 331         }
 332
 333         /* Free the allocated memory */
 334         xfree (PathName);
 335
 336     }
 337
 338     /* check again if we do now have an open file */
 339     if (F != 0) {
 340
 341         unsigned FileIdx;
 342
 343         /* Stat the file and remember the values */
 344         struct stat Buf;
 345         if (fstat (fileno (F), &Buf) != 0) {
 346             Fatal ("Cannot stat input file `%s': %s", Name, strerror (errno));
 347         }
 348
 349         /* Add the file to the input file table and remember the index */
 350         FileIdx = AddFile (Name, Buf.st_size, Buf.st_mtime);
 351
 352         /* Create a new state variable and initialize it */
 353         I           = xmalloc (sizeof (*I));
 354         I->F        = F;
 355         I->Pos.Line = 0;
 356         I->Pos.Col  = 0;
 357         I->Pos.Name = FileIdx;
 358         I->Tok      = Tok;
 359         I->C        = C;
 360         I->Line[0]  = '\0';
 361
 362         /* Use the new file */
 363         I->Next     = IFile;
 364         IFile       = I;
 365         ++ICount;
 366
 367         /* Read the first character from the new file */
 368         NextChar ();
 369
 370         /* Setup the next token so it will be skipped on the next call to
 371          * NextRawTok().
 372          */
 373         Tok = TOK_SEP;
 374
 375     }
 376 }
 377
 378
 379
 380 void DoneInputFile (void)
 381 /* Close the current input file */
 382 {
 383     InputFile* I;
 384
 385     /* Restore the old token */
 386     Tok = IFile->Tok;
 387     C   = IFile->C;
 388
 389     /* Save a pointer to the current struct, then set it back */
 390     I     = IFile;
 391     IFile = I->Next;
 392
 393     /* Cleanup the current stuff */
 394     fclose (I->F);
 395     xfree (I);
 396     --ICount;
 397 }
 398
 399
 400
 401 void NewInputData (char* Data, int Malloced)
 402 /* Add a chunk of input data to the input stream */
 403 {
 404     InputData* I;
 405
 406     /* Create a new state variable and initialize it */
 407     I           = xmalloc (sizeof (*I));
 408     I->Data     = Data;
 409     I->Pos      = Data;
 410     I->Malloced = Malloced;
 411     I->Tok      = Tok;
 412     I->C        = C;
 413
 414     /* Use the new data */
 415     I->Next     = IData;
 416     IData       = I;
 417
 418     /* Read the first character from the new file */
 419     NextChar ();
 420
 421     /* Setup the next token so it will be skipped on the next call to
 422      * NextRawTok().
 423      */
 424     Tok = TOK_SEP;
 425 }
 426
 427
 428
 429 static void DoneInputData (void)
 430 /* End the current input data stream */
 431 {
 432     InputData* I;
 433
 434     /* Restore the old token */
 435     Tok = IData->Tok;
 436     C   = IData->C;
 437
 438     /* Save a pointer to the current struct, then set it back */
 439     I     = IData;
 440     IData = I->Next;
 441
 442     /* Cleanup the current stuff */
 443     if (I->Malloced) {
 444         xfree (I->Data);
 445     }
 446     xfree (I);
 447 }
 448
 449
 450
 451 static unsigned DigitVal (unsigned char C)
 452 /* Convert a digit into it's numerical representation */
 453 {
 454     if (IsDigit (C)) {
 455         return C - '0';
 456     } else {
 457         return toupper (C) - 'A' + 10;
 458     }
 459 }
 460
 461
 462
 463 static void NextChar (void)
 464 /* Read the next character from the input file */
 465 {
 466     /* If we have an input data structure, read from there */
 467     if (IData) {
 468
 469         C = *IData->Pos++;
 470         if (C == '\0') {
 471             /* End of input data */
 472             C = EOF;
 473         }
 474
 475     } else {
 476
 477         /* Check for end of line, read the next line if needed */
 478         while (IFile->Line [IFile->Pos.Col] == '\0') {
 479
 480             unsigned Len, Removed;
 481
 482             /* End of current line reached, read next line */
 483             if (fgets (IFile->Line, sizeof (IFile->Line), IFile->F) == 0) {
 484                 /* End of file. Add an empty line to the listing. This is a
 485                  * small hack needed to keep the PC output in sync.
 486                  */
 487                 NewListingLine ("", IFile->Pos.Name, ICount);
 488                 C = EOF;
 489                 return;
 490             }
 491
 492             /* For better handling of files with unusual line endings (DOS
 493              * files that are accidently translated on Unix for example),
 494              * first remove all whitespace at the end, then add a single
 495              * newline.
 496              */
 497             Len = strlen (IFile->Line);
 498             Removed = 0;
 499             while (Len > 0 && IsSpace (IFile->Line[Len-1])) {
 500                 ++Removed;
 501                 --Len;
 502             }
 503             if (Removed) {
 504                 IFile->Line[Len+0] = '\n';
 505                 IFile->Line[Len+1] = '\0';
 506             }
 507
 508             /* One more line */
 509             IFile->Pos.Line++;
 510             IFile->Pos.Col = 0;
 511
 512             /* Remember the new line for the listing */
 513             NewListingLine (IFile->Line, IFile->Pos.Name, ICount);
 514
 515         }
 516
 517         /* Return the next character from the file */
 518         C = IFile->Line [IFile->Pos.Col++];
 519
 520     }
 521 }
 522
 523
 524
 525 void LocaseSVal (void)
 526 /* Make SVal lower case */
 527 {
 528     unsigned I = 0;
 529     while (SVal [I]) {
 530         SVal [I] = tolower (SVal [I]);
 531         ++I;
 532     }
 533 }
 534
 535
 536
 537 void UpcaseSVal (void)
 538 /* Make SVal upper case */
 539 {
 540     unsigned I = 0;
 541     while (SVal [I]) {
 542         SVal [I] = toupper (SVal [I]);
 543         ++I;
 544     }
 545 }
 546
 547
 548
 549 static int CmpDotKeyword (const void* K1, const void* K2)
 550 /* Compare function for the dot keyword search */
 551 {
 552     return strcmp (((struct DotKeyword*)K1)->Key, ((struct DotKeyword*)K2)->Key);
 553 }
 554
 555
 556
 557 static unsigned char FindDotKeyword (void)
 558 /* Find the dot keyword in SVal. Return the corresponding token if found,
 559  * return TOK_NONE if not found.
 560  */
 561 {
 562     static const struct DotKeyword K = { SVal, 0 };
 563     struct DotKeyword* R;
 564
 565     /* If we aren't in ignore case mode, we have to uppercase the keyword */
 566     if (!IgnoreCase) {
 567         UpcaseSVal ();
 568     }
 569
 570     /* Search for the keyword */
 571     R = bsearch (&K, DotKeywords, sizeof (DotKeywords) / sizeof (DotKeywords [0]),
 572                  sizeof (DotKeywords [0]), CmpDotKeyword);
 573     if (R != 0) {
 574         return R->Tok;
 575     } else {
 576         return TOK_NONE;
 577     }
 578 }
 579
 580
 581
 582 static void ReadIdent (unsigned Index)
 583 /* Read an identifier from the current input position into Ident. Filling SVal
 584  * starts at Index with the current character in C. It is assumed that any
 585  * characters already filled in are ok, and the character in C is checked.
 586  */
 587 {
 588     /* Read the identifier */
 589     do {
 590         if (Index < MAX_STR_LEN) {
 591             SVal [Index++] = C;
 592         }
 593         NextChar ();
 594     } while (IsIdChar (C));
 595     SVal [Index] = '\0';
 596
 597     /* If we should ignore case, convert the identifier to upper case */
 598     if (IgnoreCase) {
 599         UpcaseSVal ();
 600     }
 601 }
 602
 603
 604
 605 static unsigned ReadStringConst (int StringTerm)
 606 /* Read a string constant into SVal. Check for maximum string length and all
 607  * other stuff. The length of the string is returned.
 608  */
 609 {
 610     unsigned I;
 611
 612     /* Skip the leading string terminator */
 613     NextChar ();
 614
 615     /* Read the string */
 616     I = 0;
 617     while (1) {
 618         if (C == StringTerm) {
 619             break;
 620         }
 621         if (C == '\n' || C == EOF) {
 622             Error ("Newline in string constant");
 623             break;
 624         }
 625
 626         /* Check for string length, print an error message once */
 627         if (I == MAX_STR_LEN) {
 628             Error ("Maximum string size exceeded");
 629         } else if (I < MAX_STR_LEN) {
 630             SVal [I] = C;
 631         }
 632         ++I;
 633
 634         /* Skip the character */
 635         NextChar ();
 636     }
 637
 638     /* Skip the trailing terminator */
 639     NextChar ();
 640
 641     /* Terminate the string */
 642     if (I >= MAX_STR_LEN) {
 643         I = MAX_STR_LEN;
 644     }
 645     SVal [I] = '\0';
 646
 647     /* Return the length of the string */
 648     return I;
 649 }
 650
 651
 652
 653 static int Sweet16Reg (const char* Ident)
 654 /* Check if the given identifier is a sweet16 register. Return -1 if this is
 655  * not the case, return the register number otherwise.
 656  */
 657 {
 658     unsigned RegNum;
 659     char Check;
 660
 661     if (Ident[0] != 'r' && Ident[0] != 'R') {
 662         return -1;
 663     }
 664     if (!IsDigit (Ident[1])) {
 665         return -1;
 666     }
 667
 668     if (sscanf (Ident+1, "%u%c", &RegNum, &Check) != 1 || RegNum > 15) {
 669         /* Invalid register */
 670         return -1;
 671     }
 672
 673     /* The register number is valid */
 674     return (int) RegNum;
 675 }
 676
 677
 678
 679 void NextRawTok (void)
 680 /* Read the next raw token from the input stream */
 681 {
 682     /* If we've a forced end of assembly, don't read further */
 683     if (ForcedEnd) {
 684         Tok = TOK_EOF;
 685         return;
 686     }
 687
 688 Restart:
 689     /* Check if we have tokens from another input source */
 690     if (InputFromStack ()) {
 691         return;
 692     }
 693
 694 Again:
 695     /* Skip whitespace, remember if we had some */
 696     if ((WS = IsBlank (C)) != 0) {
 697         do {
 698             NextChar ();
 699         } while (IsBlank (C));
 700     }
 701
 702     /* If we're reading from the file, update the location from where the
 703      * next token will be read. If we're reading from input data, keep the
 704      * current position.
 705      */
 706     if (IData == 0) {
 707         CurPos = IFile->Pos;
 708     }
 709
 710     /* Hex number or PC symbol? */
 711     if (C == '$') {
 712         NextChar ();
 713
 714         /* Hex digit must follow or DollarIsPC must be enabled */
 715         if (!IsXDigit (C)) {
 716             if (DollarIsPC) {
 717                 Tok = TOK_PC;
 718                 return;
 719             } else {
 720                 Error ("Hexadecimal digit expected");
 721             }
 722         }
 723
 724         /* Read the number */
 725         IVal = 0;
 726         while (IsXDigit (C)) {
 727             if (IVal & 0xF0000000) {
 728                 Error ("Overflow in hexadecimal number");
 729                 IVal = 0;
 730             }
 731             IVal = (IVal << 4) + DigitVal (C);
 732             NextChar ();
 733         }
 734
 735         /* This is an integer constant */
 736         Tok = TOK_INTCON;
 737         return;
 738     }
 739
 740     /* Binary number? */
 741     if (C == '%') {
 742         NextChar ();
 743
 744         /* 0 or 1 must follow */
 745         if (!IsBDigit (C)) {
 746             Error ("Binary digit expected");
 747         }
 748
 749         /* Read the number */
 750         IVal = 0;
 751         while (IsBDigit (C)) {
 752             if (IVal & 0x80000000) {
 753                 Error ("Overflow in binary number");
 754                 IVal = 0;
 755             }
 756             IVal = (IVal << 1) + DigitVal (C);
 757             NextChar ();
 758         }
 759
 760         /* This is an integer constant */
 761         Tok = TOK_INTCON;
 762         return;
 763     }
 764
 765     /* Number? */
 766     if (IsDigit (C)) {
 767
 768         char Buf[16];
 769         unsigned Digits;
 770         unsigned Base;
 771         unsigned I;
 772         long     Max;
 773         unsigned DVal;
 774
 775         /* Ignore leading zeros */
 776         while (C == '0') {
 777             NextChar ();
 778         }
 779
 780         /* Read the number into Buf counting the digits */
 781         Digits = 0;
 782         while (IsXDigit (C)) {
 783
 784             /* Buf is big enough to allow any decimal and hex number to
 785              * overflow, so ignore excess digits here, they will be detected
 786              * when we convert the value.
 787              */
 788             if (Digits < sizeof (Buf)) {
 789                 Buf[Digits++] = C;
 790             }
 791
 792             NextChar ();
 793         }
 794
 795         /* Allow zilog/intel style hex numbers with a 'h' suffix */
 796         if (C == 'h' || C == 'H') {
 797             NextChar ();
 798             Base = 16;
 799             Max  = 0xFFFFFFFFUL / 16;
 800         } else {
 801             Base = 10;
 802             Max  = 0xFFFFFFFFUL / 10;
 803         }
 804
 805         /* Convert the number using the given base */
 806         IVal = 0;
 807         for (I = 0; I < Digits; ++I) {
 808             if (IVal > Max) {
 809                 Error ("Number out of range");
 810                 IVal = 0;
 811                 break;
 812             }
 813             DVal = DigitVal (Buf[I]);
 814             if (DVal > Base) {
 815                 Error ("Invalid digits in number");
 816                 IVal = 0;
 817                 break;
 818             }
 819             IVal = (IVal * Base) + DVal;
 820         }
 821
 822         /* This is an integer constant */
 823         Tok = TOK_INTCON;
 824         return;
 825     }
 826
 827     /* Control command? */
 828     if (C == '.') {
 829
 830         /* Remember and skip the dot */
 831         NextChar ();
 832
 833         /* Check if it's just a dot */
 834         if (!IsIdStart (C)) {
 835
 836             /* Just a dot */
 837             Tok = TOK_DOT;
 838
 839         } else {
 840
 841             /* Read the remainder of the identifier */
 842             SVal[0] = '.';
 843             ReadIdent (1);
 844
 845             /* Dot keyword, search for it */
 846             Tok = FindDotKeyword ();
 847             if (Tok == TOK_NONE) {
 848
 849                 /* Not found */
 850                 if (!LeadingDotInIdents) {
 851                     /* Invalid pseudo instruction */
 852                     Error ("`%s' is not a recognized control command", SVal);
 853                     goto Again;
 854                 }
 855
 856                 /* An identifier with a dot. Check if it's a define style
 857                  * macro.
 858                  */
 859                 if (IsDefine (SVal)) {
 860                     /* This is a define style macro - expand it */
 861                     MacExpandStart ();
 862                     goto Restart;
 863                 }
 864
 865                 /* Just an identifier with a dot */
 866                 Tok = TOK_IDENT;
 867             }
 868
 869         }
 870         return;
 871     }
 872
 873     /* Indirect op for sweet16 cpu. Must check this before checking for local
 874      * symbols, because these may also use the '@' symbol.
 875      */
 876     if (CPU == CPU_SWEET16 && C == '@') {
 877         NextChar ();
 878         Tok = TOK_AT;
 879         return;
 880     }
 881
 882     /* Local symbol? */
 883     if (C == LocalStart) {
 884
 885         /* Read the identifier */
 886         ReadIdent (0);
 887
 888         /* Start character alone is not enough */
 889         if (SVal [1] == '\0') {
 890             Error ("Invalid cheap local symbol");
 891             goto Again;
 892         }
 893
 894         /* A local identifier */
 895         Tok = TOK_LOCAL_IDENT;
 896         return;
 897     }
 898
 899
 900     /* Identifier or keyword? */
 901     if (IsIdStart (C)) {
 902
 903         /* Read the identifier */
 904         ReadIdent (0);
 905
 906         /* Check for special names. Bail out if we have identified the type of
 907          * the token. Go on if the token is an identifier.
 908          */
 909         if (SVal[1] == '\0') {
 910             switch (toupper (SVal [0])) {
 911
 912                 case 'A':
 913                     if (C == ':') {
 914                         NextChar ();
 915                         Tok = TOK_OVERRIDE_ABS;
 916                     } else {
 917                         Tok = TOK_A;
 918                     }
 919                     return;
 920
 921                 case 'F':
 922                     if (C == ':') {
 923                         NextChar ();
 924                         Tok = TOK_OVERRIDE_FAR;
 925                         return;
 926                     }
 927                     break;
 928
 929                 case 'S':
 930                     Tok = TOK_S;
 931                     return;
 932
 933                 case 'X':
 934                     Tok = TOK_X;
 935                     return;
 936
 937                 case 'Y':
 938                     Tok = TOK_Y;
 939                     return;
 940
 941                 case 'Z':
 942                     if (C == ':') {
 943                         NextChar ();
 944                         Tok = TOK_OVERRIDE_ZP;
 945                         return;
 946                     }
 947                     break;
 948
 949                 default:
 950                     break;
 951             }
 952
 953         } else if (CPU == CPU_SWEET16 && (IVal = Sweet16Reg (SVal)) >= 0) {
 954
 955             /* A sweet16 register number in sweet16 mode */
 956             Tok = TOK_REG;
 957             return;
 958
 959         }
 960
 961         /* Check for define style macro */
 962         if (IsDefine (SVal)) {
 963             /* Macro - expand it */
 964             MacExpandStart ();
 965             goto Restart;
 966         } else {
 967             /* An identifier */
 968             Tok = TOK_IDENT;
 969         }
 970         return;
 971     }
 972
 973     /* Ok, let's do the switch */
 974 CharAgain:
 975     switch (C) {
 976
 977         case '+':
 978             NextChar ();
 979             Tok = TOK_PLUS;
 980             return;
 981
 982         case '-':
 983             NextChar ();
 984             Tok = TOK_MINUS;
 985             return;
 986
 987         case '/':
 988             NextChar ();
 989             Tok = TOK_DIV;
 990             return;
 991
 992         case '*':
 993             NextChar ();
 994             Tok = TOK_MUL;
 995             return;
 996
 997         case '^':
 998             NextChar ();
 999             Tok = TOK_XOR;
1000             return;
1001
1002         case '&':
1003             NextChar ();
1004             if (C == '&') {
1005                 NextChar ();
1006                 Tok = TOK_BOOLAND;
1007             } else {
1008                 Tok = TOK_AND;
1009             }
1010             return;
1011
1012         case '|':
1013             NextChar ();
1014             if (C == '|') {
1015                 NextChar ();
1016                 Tok = TOK_BOOLOR;
1017             } else {
1018                 Tok = TOK_OR;
1019             }
1020             return;
1021
1022         case ':':
1023             NextChar ();
1024             switch (C) {
1025
1026                 case ':':
1027                     NextChar ();
1028                     Tok = TOK_NAMESPACE;
1029                     break;
1030
1031                 case '-':
1032                     IVal = 0;
1033                     do {
1034                         --IVal;
1035                         NextChar ();
1036                     } while (C == '-');
1037                     Tok = TOK_ULABEL;
1038                     break;
1039
1040                 case '+':
1041                     IVal = 0;
1042                     do {
1043                         ++IVal;
1044                         NextChar ();
1045                     } while (C == '+');
1046                     Tok = TOK_ULABEL;
1047                     break;
1048
1049                 case '=':
1050                     NextChar ();
1051                     Tok = TOK_ASSIGN;
1052                     break;
1053
1054                 default:
1055                     Tok = TOK_COLON;
1056                     break;
1057             }
1058             return;
1059
1060         case ',':
1061             NextChar ();
1062             Tok = TOK_COMMA;
1063             return;
1064
1065         case ';':
1066             NextChar ();
1067             while (C != '\n' && C != EOF) {
1068                 NextChar ();
1069             }
1070             goto CharAgain;
1071
1072         case '#':
1073             NextChar ();
1074             Tok = TOK_HASH;
1075             return;
1076
1077         case '(':
1078             NextChar ();
1079             Tok = TOK_LPAREN;
1080             return;
1081
1082         case ')':
1083             NextChar ();
1084             Tok = TOK_RPAREN;
1085             return;
1086
1087         case '[':
1088             NextChar ();
1089             Tok = TOK_LBRACK;
1090             return;
1091
1092         case ']':
1093             NextChar ();
1094             Tok = TOK_RBRACK;
1095             return;
1096
1097         case '{':
1098             NextChar ();
1099             Tok = TOK_LCURLY;
1100             return;
1101
1102         case '}':
1103             NextChar ();
1104             Tok = TOK_RCURLY;
1105             return;
1106
1107         case '<':
1108             NextChar ();
1109             if (C == '=') {
1110                 NextChar ();
1111                 Tok = TOK_LE;
1112             } else if (C == '<') {
1113                 NextChar ();
1114                 Tok = TOK_SHL;
1115             } else if (C == '>') {
1116                 NextChar ();
1117                 Tok = TOK_NE;
1118             } else {
1119                 Tok = TOK_LT;
1120             }
1121             return;
1122
1123         case '=':
1124             NextChar ();
1125             Tok = TOK_EQ;
1126             return;
1127
1128         case '!':
1129             NextChar ();
1130             Tok = TOK_BOOLNOT;
1131             return;
1132
1133         case '>':
1134             NextChar ();
1135             if (C == '=') {
1136                 NextChar ();
1137                 Tok = TOK_GE;
1138             } else if (C == '>') {
1139                 NextChar ();
1140                 Tok = TOK_SHR;
1141             } else {
1142                 Tok = TOK_GT;
1143             }
1144             return;
1145
1146         case '~':
1147             NextChar ();
1148             Tok = TOK_NOT;
1149             return;
1150
1151         case '\'':
1152             /* Hack: If we allow ' as terminating character for strings, read
1153              * the following stuff as a string, and check for a one character
1154              * string later.
1155              */
1156             if (LooseStringTerm) {
1157                 if (ReadStringConst ('\'') == 1) {
1158                     IVal = SVal[0];
1159                     Tok = TOK_CHARCON;
1160                 } else {
1161                     Tok = TOK_STRCON;
1162                 }
1163             } else {
1164                 /* Always a character constant */
1165                 NextChar ();
1166                 if (C == EOF || IsControl (C)) {
1167                     Error ("Illegal character constant");
1168                     goto CharAgain;
1169                 }
1170                 IVal = C;
1171                 Tok = TOK_CHARCON;
1172                 NextChar ();
1173                 if (C != '\'') {
1174                     if (!MissingCharTerm) {
1175                         Error ("Illegal character constant");
1176                     }
1177                 } else {
1178                     NextChar ();
1179                 }
1180             }
1181             return;
1182
1183         case '\"':
1184             ReadStringConst ('\"');
1185             Tok = TOK_STRCON;
1186             return;
1187
1188         case '\\':
1189             /* Line continuation? */
1190             if (LineCont) {
1191                 NextChar ();
1192                 if (C == '\n') {
1193                     /* Handle as white space */
1194                     NextChar ();
1195                     C = ' ';
1196                     goto Again;
1197                 }
1198             }
1199             break;
1200
1201         case '\n':
1202             NextChar ();
1203             Tok = TOK_SEP;
1204             return;
1205
1206         case EOF:
1207             /* Check if we have any open .IFs in this file */
1208             CheckOpenIfs ();
1209             /* Check if we have any open token lists in this file */
1210             CheckInputStack ();
1211
1212             /* If this was an include file, then close it and read the next
1213              * token. When an include file is opened, the last token of the
1214              * old file is not skipped, to prevent the lookahead to read
1215              * the next line of the old input file. So we do effectively
1216              * skip the last token in the old file (the file name of the
1217              * include statement).
1218              * In case of the main file, do not close it, but return EOF.
1219              */
1220             if (IData) {
1221                 /* Input came from internal data */
1222                 DoneInputData ();
1223                 goto Again;
1224             } else if (ICount > 1) {
1225                 DoneInputFile ();
1226                 goto Again;
1227             } else {
1228                 Tok = TOK_EOF;
1229             }
1230             return;
1231
1232     }
1233
1234     /* If we go here, we could not identify the current character. Skip it
1235      * and try again.
1236      */
1237     Error ("Invalid input character: 0x%02X", C & 0xFF);
1238     NextChar ();
1239     goto Again;
1240 }
1241
1242
1243
1244 int TokHasSVal (enum Token Tok)
1245 /* Return true if the given token has an attached SVal */
1246 {
1247     return (Tok == TOK_IDENT || TOK_LOCAL_IDENT || Tok == TOK_STRCON);
1248 }
1249
1250
1251
1252 int TokHasIVal (enum Token Tok)
1253 /* Return true if the given token has an attached IVal */
1254 {
1255     return (Tok == TOK_INTCON || Tok == TOK_CHARCON || Tok == TOK_REG);
1256 }
1257
1258
1259
1260 int GetSubKey (const char** Keys, unsigned Count)
1261 /* Search for a subkey in a table of keywords. The current token must be an
1262  * identifier and all keys must be in upper case. The identifier will be
1263  * uppercased in the process. The function returns the index of the keyword,
1264  * or -1 if the keyword was not found.
1265  */
1266 {
1267     unsigned I;
1268
1269     /* Must have an identifier */
1270     PRECONDITION (Tok == TOK_IDENT);
1271
1272     /* If we aren't in ignore case mode, we have to uppercase the identifier */
1273     if (!IgnoreCase) {
1274         UpcaseSVal ();
1275     }
1276
1277     /* Do a linear search (a binary search is not worth the effort) */
1278     for (I = 0; I < Count; ++I) {
1279         if (strcmp (SVal, Keys [I]) == 0) {
1280             /* Found it */
1281             return I;
1282         }
1283     }
1284
1285     /* Not found */
1286     return -1;
1287 }
1288
1289
1290
1291 unsigned char ParseAddrSize (void)
1292 /* Check if the next token is a keyword that denotes an address size specifier.
1293  * If so, return the corresponding address size constant, otherwise output an
1294  * error message and return ADDR_SIZE_DEFAULT.
1295  */
1296 {
1297     static const char* Keys[] = {
1298         "DIRECT", "ZEROPAGE", "ZP",
1299         "ABSOLUTE", "ABS", "NEAR",
1300         "FAR",
1301         "LONG", "DWORD",
1302     };
1303
1304     /* Check for an identifier */
1305     if (Tok != TOK_IDENT) {
1306         Error ("Address size specifier expected");
1307         return ADDR_SIZE_DEFAULT;
1308     }
1309
1310     /* Search for the attribute */
1311     switch (GetSubKey (Keys, sizeof (Keys) / sizeof (Keys [0]))) {
1312         case 0:
1313         case 1:
1314         case 2: return ADDR_SIZE_ZP;
1315         case 3:
1316         case 4:
1317         case 5: return ADDR_SIZE_ABS;
1318         case 6: return ADDR_SIZE_FAR;
1319         case 7:
1320         case 8: return ADDR_SIZE_LONG;
1321         default:
1322             Error ("Address size specifier expected");
1323             return ADDR_SIZE_DEFAULT;
1324     }
1325 }
1326
1327
1328
1329 void InitScanner (const char* InFile)
1330 /* Initialize the scanner, open the given input file */
1331 {
1332     /* Open the input file */
1333     NewInputFile (InFile);
1334 }
1335
1336
1337
1338 void DoneScanner (void)
1339 /* Release scanner resources */
1340 {
1341     DoneInputFile ();
1342 }
1343
1344
1345