git.sur5r.net Git - cc65/blob - src/ca65/scanner.c

   1 /*****************************************************************************/
   2 /*                                                                           */
   3 /*                                 scanner.c                                 */
   4 /*                                                                           */
   5 /*                  The scanner for the ca65 macroassembler                  */
   6 /*                                                                           */
   7 /*                                                                           */
   8 /*                                                                           */
   9 /* (C) 1998-2004 Ullrich von Bassewitz                                       */
  10 /*               Römerstraße 52                                              */
  11 /*               D-70794 Filderstadt                                         */
  12 /* EMail:        uz@cc65.org                                                 */
  13 /*                                                                           */
  14 /*                                                                           */
  15 /* This software is provided 'as-is', without any expressed or implied       */
  16 /* warranty.  In no event will the authors be held liable for any damages    */
  17 /* arising from the use of this software.                                    */
  18 /*                                                                           */
  19 /* Permission is granted to anyone to use this software for any purpose,     */
  20 /* including commercial applications, and to alter it and redistribute it    */
  21 /* freely, subject to the following restrictions:                            */
  22 /*                                                                           */
  23 /* 1. The origin of this software must not be misrepresented; you must not   */
  24 /*    claim that you wrote the original software. If you use this software   */
  25 /*    in a product, an acknowledgment in the product documentation would be  */
  26 /*    appreciated but is not required.                                       */
  27 /* 2. Altered source versions must be plainly marked as such, and must not   */
  28 /*    be misrepresented as being the original software.                      */
  29 /* 3. This notice may not be removed or altered from any source              */
  30 /*    distribution.                                                          */
  31 /*                                                                           */
  32 /*****************************************************************************/
  33
  34
  35
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <ctype.h>
  40 #include <errno.h>
  41 #include <sys/types.h>          /* EMX needs this */
  42 #include <sys/stat.h>
  43
  44 /* common */
  45 #include "addrsize.h"
  46 #include "chartype.h"
  47 #include "check.h"
  48 #include "fname.h"
  49 #include "xmalloc.h"
  50
  51 /* ca65 */
  52 #include "condasm.h"
  53 #include "error.h"
  54 #include "filetab.h"
  55 #include "global.h"
  56 #include "incpath.h"
  57 #include "instr.h"
  58 #include "istack.h"
  59 #include "listing.h"
  60 #include "macro.h"
  61 #include "toklist.h"
  62 #include "scanner.h"
  63
  64
  65
  66 /*****************************************************************************/
  67 /*                                   Data                                    */
  68 /*****************************************************************************/
  69
  70
  71
  72 enum Token Tok = TOK_NONE;              /* Current token */
  73 int WS;                                 /* Flag: Whitespace before token */
  74 long IVal;                              /* Integer token attribute */
  75 char SVal[MAX_STR_LEN+1];               /* String token attribute */
  76
  77 FilePos CurPos = { 0, 0, 0 };           /* Name and position in current file */
  78
  79
  80
  81 /* Struct to handle include files. */
  82 typedef struct InputFile InputFile;
  83 struct InputFile {
  84     FILE*           F;                  /* Input file descriptor */
  85     FilePos         Pos;                /* Position in file */
  86     enum Token      Tok;                /* Last token */
  87     int             C;                  /* Last character */
  88     char            Line[256];          /* The current input line */
  89     InputFile*      Next;               /* Linked list of input files */
  90 };
  91
  92 /* Struct to handle textual input data */
  93 typedef struct InputData InputData;
  94 struct InputData {
  95     char*           Data;               /* Pointer to the data */
  96     const char*     Pos;                /* Pointer to current position */
  97     int             Malloced;           /* Memory was malloced */
  98     enum Token      Tok;                /* Last token */
  99     int             C;                  /* Last character */
 100     InputData*      Next;               /* Linked list of input data */
 101 };
 102
 103 /* Current input variables */
 104 static InputFile* IFile         = 0;    /* Current input file */
 105 static InputData* IData         = 0;    /* Current input memory data */
 106 static unsigned   ICount        = 0;    /* Count of input files */
 107 static int        C             = 0;    /* Current input character */
 108
 109 /* Force end of assembly */
 110 int               ForcedEnd     = 0;
 111
 112 /* List of dot keywords with the corresponding tokens */
 113 struct DotKeyword {
 114     const char* Key;                    /* MUST be first field */
 115     enum Token  Tok;
 116 } DotKeywords [] = {
 117     { ".A16",           TOK_A16         },
 118     { ".A8",            TOK_A8          },
 119     { ".ADDR",          TOK_ADDR        },
 120     { ".ALIGN",         TOK_ALIGN       },
 121     { ".AND",           TOK_BOOLAND     },
 122     { ".ASCIIZ",        TOK_ASCIIZ      },
 123     { ".ASSERT",        TOK_ASSERT      },
 124     { ".AUTOIMPORT",    TOK_AUTOIMPORT  },
 125     { ".BANKBYTE",      TOK_BANKBYTE    },
 126     { ".BITAND",        TOK_AND         },
 127     { ".BITNOT",        TOK_NOT         },
 128     { ".BITOR",         TOK_OR          },
 129     { ".BITXOR",        TOK_XOR         },
 130     { ".BLANK",         TOK_BLANK       },
 131     { ".BSS",           TOK_BSS         },
 132     { ".BYT",           TOK_BYTE        },
 133     { ".BYTE",          TOK_BYTE        },
 134     { ".CASE",          TOK_CASE        },
 135     { ".CHARMAP",       TOK_CHARMAP     },
 136     { ".CODE",          TOK_CODE        },
 137     { ".CONCAT",        TOK_CONCAT      },
 138     { ".CONDES",        TOK_CONDES      },
 139     { ".CONST",         TOK_CONST       },
 140     { ".CONSTRUCTOR",   TOK_CONSTRUCTOR },
 141     { ".CPU",           TOK_CPU         },
 142     { ".DATA",          TOK_DATA        },
 143     { ".DBG",           TOK_DBG         },
 144     { ".DBYT",          TOK_DBYT        },
 145     { ".DEBUGINFO",     TOK_DEBUGINFO   },
 146     { ".DEF",           TOK_DEFINED     },
 147     { ".DEFINE",        TOK_DEFINE      },
 148     { ".DEFINED",       TOK_DEFINED     },
 149     { ".DESTRUCTOR",    TOK_DESTRUCTOR  },
 150     { ".DWORD",         TOK_DWORD       },
 151     { ".ELSE",          TOK_ELSE        },
 152     { ".ELSEIF",        TOK_ELSEIF      },
 153     { ".END",           TOK_END         },
 154     { ".ENDENUM",       TOK_ENDENUM     },
 155     { ".ENDIF",         TOK_ENDIF       },
 156     { ".ENDMAC",        TOK_ENDMACRO    },
 157     { ".ENDMACRO",      TOK_ENDMACRO    },
 158     { ".ENDPROC",       TOK_ENDPROC     },
 159     { ".ENDREP",        TOK_ENDREP      },
 160     { ".ENDREPEAT",     TOK_ENDREP      },
 161     { ".ENDSCOPE",      TOK_ENDSCOPE    },
 162     { ".ENDSTRUCT",     TOK_ENDSTRUCT   },
 163     { ".ENDUNION",      TOK_ENDUNION    },
 164     { ".ENUM",          TOK_ENUM        },
 165     { ".ERROR",         TOK_ERROR       },
 166     { ".EXITMAC",       TOK_EXITMACRO   },
 167     { ".EXITMACRO",     TOK_EXITMACRO   },
 168     { ".EXPORT",        TOK_EXPORT      },
 169     { ".EXPORTZP",      TOK_EXPORTZP    },
 170     { ".FARADDR",       TOK_FARADDR     },
 171     { ".FEATURE",       TOK_FEATURE     },
 172     { ".FILEOPT",       TOK_FILEOPT     },
 173     { ".FOPT",          TOK_FILEOPT     },
 174     { ".FORCEIMPORT",   TOK_FORCEIMPORT },
 175     { ".FORCEWORD",     TOK_FORCEWORD   },
 176     { ".GLOBAL",        TOK_GLOBAL      },
 177     { ".GLOBALZP",      TOK_GLOBALZP    },
 178     { ".HIBYTE",        TOK_HIBYTE      },
 179     { ".HIWORD",        TOK_HIWORD      },
 180     { ".I16",           TOK_I16         },
 181     { ".I8",            TOK_I8          },
 182     { ".IF",            TOK_IF          },
 183     { ".IFBLANK",       TOK_IFBLANK     },
 184     { ".IFCONST",       TOK_IFCONST     },
 185     { ".IFDEF",         TOK_IFDEF       },
 186     { ".IFNBLANK",      TOK_IFNBLANK    },
 187     { ".IFNCONST",      TOK_IFNCONST    },
 188     { ".IFNDEF",        TOK_IFNDEF      },
 189     { ".IFNREF",        TOK_IFNREF      },
 190     { ".IFP02",         TOK_IFP02       },
 191     { ".IFP816",        TOK_IFP816      },
 192     { ".IFPC02",        TOK_IFPC02      },
 193     { ".IFPSC02",       TOK_IFPSC02     },
 194     { ".IFREF",         TOK_IFREF       },
 195     { ".IMPORT",        TOK_IMPORT      },
 196     { ".IMPORTZP",      TOK_IMPORTZP    },
 197     { ".INCBIN",        TOK_INCBIN      },
 198     { ".INCLUDE",       TOK_INCLUDE     },
 199     { ".INTERRUPTOR",   TOK_INTERRUPTOR },
 200     { ".LEFT",          TOK_LEFT        },
 201     { ".LINECONT",      TOK_LINECONT    },
 202     { ".LIST",          TOK_LIST        },
 203     { ".LISTBYTES",     TOK_LISTBYTES   },
 204     { ".LOBYTE",        TOK_LOBYTE      },
 205     { ".LOCAL",         TOK_LOCAL       },
 206     { ".LOCALCHAR",     TOK_LOCALCHAR   },
 207     { ".LOWORD",        TOK_LOWORD      },
 208     { ".MAC",           TOK_MACRO       },
 209     { ".MACPACK",       TOK_MACPACK     },
 210     { ".MACRO",         TOK_MACRO       },
 211     { ".MATCH",         TOK_MATCH       },
 212     { ".MID",           TOK_MID         },
 213     { ".MOD",           TOK_MOD         },
 214     { ".NOT",           TOK_BOOLNOT     },
 215     { ".NULL",          TOK_NULL        },
 216     { ".OR",            TOK_BOOLOR      },
 217     { ".ORG",           TOK_ORG         },
 218     { ".OUT",           TOK_OUT         },
 219     { ".P02",           TOK_P02         },
 220     { ".P816",          TOK_P816        },
 221     { ".PAGELEN",       TOK_PAGELENGTH  },
 222     { ".PAGELENGTH",    TOK_PAGELENGTH  },
 223     { ".PARAMCOUNT",    TOK_PARAMCOUNT  },
 224     { ".PC02",          TOK_PC02        },
 225     { ".POPSEG",        TOK_POPSEG      },
 226     { ".PROC",          TOK_PROC        },
 227     { ".PSC02",         TOK_PSC02       },
 228     { ".PUSHSEG",       TOK_PUSHSEG     },
 229     { ".REF",           TOK_REFERENCED  },
 230     { ".REFERENCED",    TOK_REFERENCED  },
 231     { ".RELOC",         TOK_RELOC       },
 232     { ".REPEAT",        TOK_REPEAT      },
 233     { ".RES",           TOK_RES         },
 234     { ".RIGHT",         TOK_RIGHT       },
 235     { ".RODATA",        TOK_RODATA      },
 236     { ".SCOPE",         TOK_SCOPE       },
 237     { ".SEGMENT",       TOK_SEGMENT     },
 238     { ".SETCPU",        TOK_SETCPU      },
 239     { ".SHL",           TOK_SHL         },
 240     { ".SHR",           TOK_SHR         },
 241     { ".SIZEOF",        TOK_SIZEOF      },
 242     { ".SMART",         TOK_SMART       },
 243     { ".STRAT",         TOK_STRAT       },
 244     { ".STRING",        TOK_STRING      },
 245     { ".STRLEN",        TOK_STRLEN      },
 246     { ".STRUCT",        TOK_STRUCT      },
 247     { ".SUNPLUS",       TOK_SUNPLUS     },
 248     { ".TAG",           TOK_TAG         },
 249     { ".TCOUNT",        TOK_TCOUNT      },
 250     { ".TIME",          TOK_TIME        },
 251     { ".UNION",         TOK_UNION       },
 252     { ".VERSION",       TOK_VERSION     },
 253     { ".WARNING",       TOK_WARNING     },
 254     { ".WORD",          TOK_WORD        },
 255     { ".XMATCH",        TOK_XMATCH      },
 256     { ".XOR",           TOK_BOOLXOR     },
 257     { ".ZEROPAGE",      TOK_ZEROPAGE    },
 258 };
 259
 260
 261
 262 /*****************************************************************************/
 263 /*                                 Forwards                                  */
 264 /*****************************************************************************/
 265
 266
 267
 268 static void NextChar (void);
 269 /* Read the next character from the input file */
 270
 271
 272
 273 /*****************************************************************************/
 274 /*                    Character classification functions                     */
 275 /*****************************************************************************/
 276
 277
 278
 279 static int IsIdChar (int C)
 280 /* Return true if the character is a valid character for an identifier */
 281 {
 282     return IsAlNum (C)                  ||
 283            (C == '_')                   ||
 284            (C == '@' && AtInIdents)     ||
 285            (C == '$' && DollarInIdents);
 286 }
 287
 288
 289
 290 static int IsIdStart (int C)
 291 /* Return true if the character may start an identifier */
 292 {
 293     return IsAlpha (C) || C == '_';
 294 }
 295
 296
 297
 298 /*****************************************************************************/
 299 /*                                   Code                                    */
 300 /*****************************************************************************/
 301
 302
 303
 304 void NewInputFile (const char* Name)
 305 /* Open a new input file */
 306 {
 307     InputFile* I;
 308     FILE* F;
 309
 310     /* First try to open the file */
 311     F = fopen (Name, "r");
 312     if (F == 0) {
 313
 314         char* PathName;
 315
 316         /* Error (fatal error if this is the main file) */
 317         if (ICount == 0) {
 318             Fatal ("Cannot open input file `%s': %s", Name, strerror (errno));
 319         }
 320
 321         /* We are on include level. Search for the file in the include
 322          * directories.
 323          */
 324         PathName = FindInclude (Name);
 325         if (PathName == 0 || (F = fopen (PathName, "r")) == 0) {
 326             /* Not found or cannot open, print an error and bail out */
 327             Error ("Cannot open include file `%s': %s", Name, strerror (errno));
 328         }
 329
 330         /* Free the allocated memory */
 331         xfree (PathName);
 332
 333     }
 334
 335     /* check again if we do now have an open file */
 336     if (F != 0) {
 337
 338         unsigned FileIdx;
 339
 340         /* Stat the file and remember the values */
 341         struct stat Buf;
 342         if (fstat (fileno (F), &Buf) != 0) {
 343             Fatal ("Cannot stat input file `%s': %s", Name, strerror (errno));
 344         }
 345
 346         /* Add the file to the input file table and remember the index */
 347         FileIdx = AddFile (Name, Buf.st_size, Buf.st_mtime);
 348
 349         /* Create a new state variable and initialize it */
 350         I           = xmalloc (sizeof (*I));
 351         I->F        = F;
 352         I->Pos.Line = 0;
 353         I->Pos.Col  = 0;
 354         I->Pos.Name = FileIdx;
 355         I->Tok      = Tok;
 356         I->C        = C;
 357         I->Line[0]  = '\0';
 358
 359         /* Use the new file */
 360         I->Next     = IFile;
 361         IFile       = I;
 362         ++ICount;
 363
 364         /* Prime the pump */
 365         NextChar ();
 366     }
 367 }
 368
 369
 370
 371 void DoneInputFile (void)
 372 /* Close the current input file */
 373 {
 374     InputFile* I;
 375
 376     /* Restore the old token */
 377     Tok = IFile->Tok;
 378     C   = IFile->C;
 379
 380     /* Save a pointer to the current struct, then set it back */
 381     I     = IFile;
 382     IFile = I->Next;
 383
 384     /* Cleanup the current stuff */
 385     fclose (I->F);
 386     xfree (I);
 387     --ICount;
 388 }
 389
 390
 391
 392 void NewInputData (char* Data, int Malloced)
 393 /* Add a chunk of input data to the input stream */
 394 {
 395     InputData* I;
 396
 397     /* Create a new state variable and initialize it */
 398     I           = xmalloc (sizeof (*I));
 399     I->Data     = Data;
 400     I->Pos      = Data;
 401     I->Malloced = Malloced;
 402     I->Tok      = Tok;
 403     I->C        = C;
 404
 405     /* Use the new data */
 406     I->Next     = IData;
 407     IData       = I;
 408
 409     /* Prime the pump */
 410     NextChar ();
 411 }
 412
 413
 414
 415 static void DoneInputData (void)
 416 /* End the current input data stream */
 417 {
 418     InputData* I;
 419
 420     /* Restore the old token */
 421     Tok = IData->Tok;
 422     C   = IData->C;
 423
 424     /* Save a pointer to the current struct, then set it back */
 425     I     = IData;
 426     IData = I->Next;
 427
 428     /* Cleanup the current stuff */
 429     if (I->Malloced) {
 430         xfree (I->Data);
 431     }
 432     xfree (I);
 433 }
 434
 435
 436
 437 static unsigned DigitVal (unsigned char C)
 438 /* Convert a digit into it's numerical representation */
 439 {
 440     if (IsDigit (C)) {
 441         return C - '0';
 442     } else {
 443         return toupper (C) - 'A' + 10;
 444     }
 445 }
 446
 447
 448
 449 static void NextChar (void)
 450 /* Read the next character from the input file */
 451 {
 452     /* If we have an input data structure, read from there */
 453     if (IData) {
 454
 455         C = *IData->Pos++;
 456         if (C == '\0') {
 457             /* End of input data, will set to last file char */
 458             DoneInputData ();
 459         }
 460
 461     } else {
 462
 463         /* Check for end of line, read the next line if needed */
 464         while (IFile->Line [IFile->Pos.Col] == '\0') {
 465
 466             unsigned Len, Removed;
 467
 468             /* End of current line reached, read next line */
 469             if (fgets (IFile->Line, sizeof (IFile->Line), IFile->F) == 0) {
 470                 /* End of file. Add an empty line to the listing. This is a
 471                  * small hack needed to keep the PC output in sync.
 472                  */
 473                 NewListingLine ("", IFile->Pos.Name, ICount);
 474                 C = EOF;
 475                 return;
 476             }
 477
 478             /* For better handling of files with unusual line endings (DOS
 479              * files that are accidently translated on Unix for example),
 480              * first remove all whitespace at the end, then add a single
 481              * newline.
 482              */
 483             Len = strlen (IFile->Line);
 484             Removed = 0;
 485             while (Len > 0 && IsSpace (IFile->Line[Len-1])) {
 486                 ++Removed;
 487                 --Len;
 488             }
 489             if (Removed) {
 490                 IFile->Line[Len+0] = '\n';
 491                 IFile->Line[Len+1] = '\0';
 492             }
 493
 494             /* One more line */
 495             IFile->Pos.Line++;
 496             IFile->Pos.Col = 0;
 497
 498             /* Remember the new line for the listing */
 499             NewListingLine (IFile->Line, IFile->Pos.Name, ICount);
 500
 501         }
 502
 503         /* Return the next character from the file */
 504         C = IFile->Line [IFile->Pos.Col++];
 505
 506     }
 507 }
 508
 509
 510
 511 void LocaseSVal (void)
 512 /* Make SVal lower case */
 513 {
 514     unsigned I = 0;
 515     while (SVal [I]) {
 516         SVal [I] = tolower (SVal [I]);
 517         ++I;
 518     }
 519 }
 520
 521
 522
 523 void UpcaseSVal (void)
 524 /* Make SVal upper case */
 525 {
 526     unsigned I = 0;
 527     while (SVal [I]) {
 528         SVal [I] = toupper (SVal [I]);
 529         ++I;
 530     }
 531 }
 532
 533
 534
 535 static int CmpDotKeyword (const void* K1, const void* K2)
 536 /* Compare function for the dot keyword search */
 537 {
 538     return strcmp (((struct DotKeyword*)K1)->Key, ((struct DotKeyword*)K2)->Key);
 539 }
 540
 541
 542
 543 static unsigned char FindDotKeyword (void)
 544 /* Find the dot keyword in SVal. Return the corresponding token if found,
 545  * return TOK_NONE if not found.
 546  */
 547 {
 548     static const struct DotKeyword K = { SVal, 0 };
 549     struct DotKeyword* R;
 550
 551     /* If we aren't in ignore case mode, we have to uppercase the keyword */
 552     if (!IgnoreCase) {
 553         UpcaseSVal ();
 554     }
 555
 556     /* Search for the keyword */
 557     R = bsearch (&K, DotKeywords, sizeof (DotKeywords) / sizeof (DotKeywords [0]),
 558                  sizeof (DotKeywords [0]), CmpDotKeyword);
 559     if (R != 0) {
 560         return R->Tok;
 561     } else {
 562         return TOK_NONE;
 563     }
 564 }
 565
 566
 567
 568 static void ReadIdent (unsigned Index)
 569 /* Read an identifier from the current input position into Ident. Filling SVal
 570  * starts at Index with the current character in C. It is assumed that any
 571  * characters already filled in are ok, and the character in C is checked.
 572  */
 573 {
 574     /* Read the identifier */
 575     do {
 576         if (Index < MAX_STR_LEN) {
 577             SVal [Index++] = C;
 578         }
 579         NextChar ();
 580     } while (IsIdChar (C));
 581     SVal [Index] = '\0';
 582
 583     /* If we should ignore case, convert the identifier to upper case */
 584     if (IgnoreCase) {
 585         UpcaseSVal ();
 586     }
 587 }
 588
 589
 590
 591 static unsigned ReadStringConst (int StringTerm)
 592 /* Read a string constant into SVal. Check for maximum string length and all
 593  * other stuff. The length of the string is returned.
 594  */
 595 {
 596     unsigned I;
 597
 598     /* Skip the leading string terminator */
 599     NextChar ();
 600
 601     /* Read the string */
 602     I = 0;
 603     while (1) {
 604         if (C == StringTerm) {
 605             break;
 606         }
 607         if (C == '\n' || C == EOF) {
 608             Error ("Newline in string constant");
 609             break;
 610         }
 611
 612         /* Check for string length, print an error message once */
 613         if (I == MAX_STR_LEN) {
 614             Error ("Maximum string size exceeded");
 615         } else if (I < MAX_STR_LEN) {
 616             SVal [I] = C;
 617         }
 618         ++I;
 619
 620         /* Skip the character */
 621         NextChar ();
 622     }
 623
 624     /* Skip the trailing terminator */
 625     NextChar ();
 626
 627     /* Terminate the string */
 628     if (I >= MAX_STR_LEN) {
 629         I = MAX_STR_LEN;
 630     }
 631     SVal [I] = '\0';
 632
 633     /* Return the length of the string */
 634     return I;
 635 }
 636
 637
 638
 639 static int Sweet16Reg (const char* Ident)
 640 /* Check if the given identifier is a sweet16 register. Return -1 if this is
 641  * not the case, return the register number otherwise.
 642  */
 643 {
 644     unsigned RegNum;
 645     char Check;
 646
 647     if (Ident[0] != 'r' && Ident[0] != 'R') {
 648         return -1;
 649     }
 650     if (!IsDigit (Ident[1])) {
 651         return -1;
 652     }
 653
 654     if (sscanf (Ident+1, "%u%c", &RegNum, &Check) != 1 || RegNum > 15) {
 655         /* Invalid register */
 656         return -1;
 657     }
 658
 659     /* The register number is valid */
 660     return (int) RegNum;
 661 }
 662
 663
 664
 665 void NextRawTok (void)
 666 /* Read the next raw token from the input stream */
 667 {
 668     /* If we've a forced end of assembly, don't read further */
 669     if (ForcedEnd) {
 670         Tok = TOK_EOF;
 671         return;
 672     }
 673
 674 Restart:
 675     /* Check if we have tokens from another input source */
 676     if (InputFromStack ()) {
 677         return;
 678     }
 679
 680 Again:
 681     /* Skip whitespace, remember if we had some */
 682     if ((WS = IsBlank (C)) != 0) {
 683         do {
 684             NextChar ();
 685         } while (IsBlank (C));
 686     }
 687
 688     /* If we're reading from the file, update the location from where the
 689      * next token will be read. If we're reading from input data, keep the
 690      * current position.
 691      */
 692     if (IData == 0) {
 693         CurPos = IFile->Pos;
 694     }
 695
 696     /* Hex number or PC symbol? */
 697     if (C == '$') {
 698         NextChar ();
 699
 700         /* Hex digit must follow or DollarIsPC must be enabled */
 701         if (!IsXDigit (C)) {
 702             if (DollarIsPC) {
 703                 Tok = TOK_PC;
 704                 return;
 705             } else {
 706                 Error ("Hexadecimal digit expected");
 707             }
 708         }
 709
 710         /* Read the number */
 711         IVal = 0;
 712         while (IsXDigit (C)) {
 713             if (IVal & 0xF0000000) {
 714                 Error ("Overflow in hexadecimal number");
 715                 IVal = 0;
 716             }
 717             IVal = (IVal << 4) + DigitVal (C);
 718             NextChar ();
 719         }
 720
 721         /* This is an integer constant */
 722         Tok = TOK_INTCON;
 723         return;
 724     }
 725
 726     /* Binary number? */
 727     if (C == '%') {
 728         NextChar ();
 729
 730         /* 0 or 1 must follow */
 731         if (!IsBDigit (C)) {
 732             Error ("Binary digit expected");
 733         }
 734
 735         /* Read the number */
 736         IVal = 0;
 737         while (IsBDigit (C)) {
 738             if (IVal & 0x80000000) {
 739                 Error ("Overflow in binary number");
 740                 IVal = 0;
 741             }
 742             IVal = (IVal << 1) + DigitVal (C);
 743             NextChar ();
 744         }
 745
 746         /* This is an integer constant */
 747         Tok = TOK_INTCON;
 748         return;
 749     }
 750
 751     /* Number? */
 752     if (IsDigit (C)) {
 753
 754         char Buf[16];
 755         unsigned Digits;
 756         unsigned Base;
 757         unsigned I;
 758         long     Max;
 759         unsigned DVal;
 760
 761         /* Ignore leading zeros */
 762         while (C == '0') {
 763             NextChar ();
 764         }
 765
 766         /* Read the number into Buf counting the digits */
 767         Digits = 0;
 768         while (IsXDigit (C)) {
 769
 770             /* Buf is big enough to allow any decimal and hex number to
 771              * overflow, so ignore excess digits here, they will be detected
 772              * when we convert the value.
 773              */
 774             if (Digits < sizeof (Buf)) {
 775                 Buf[Digits++] = C;
 776             }
 777
 778             NextChar ();
 779         }
 780
 781         /* Allow zilog/intel style hex numbers with a 'h' suffix */
 782         if (C == 'h' || C == 'H') {
 783             NextChar ();
 784             Base = 16;
 785             Max  = 0xFFFFFFFFUL / 16;
 786         } else {
 787             Base = 10;
 788             Max  = 0xFFFFFFFFUL / 10;
 789         }
 790
 791         /* Convert the number using the given base */
 792         IVal = 0;
 793         for (I = 0; I < Digits; ++I) {
 794             if (IVal > Max) {
 795                 Error ("Number out of range");
 796                 IVal = 0;
 797                 break;
 798             }
 799             DVal = DigitVal (Buf[I]);
 800             if (DVal > Base) {
 801                 Error ("Invalid digits in number");
 802                 IVal = 0;
 803                 break;
 804             }
 805             IVal = (IVal * Base) + DVal;
 806         }
 807
 808         /* This is an integer constant */
 809         Tok = TOK_INTCON;
 810         return;
 811     }
 812
 813     /* Control command? */
 814     if (C == '.') {
 815
 816         /* Remember and skip the dot */
 817         NextChar ();
 818
 819         /* Check if it's just a dot */
 820         if (!IsIdStart (C)) {
 821
 822             /* Just a dot */
 823             Tok = TOK_DOT;
 824
 825         } else {
 826
 827             /* Read the remainder of the identifier */
 828             SVal[0] = '.';
 829             ReadIdent (1);
 830
 831             /* Dot keyword, search for it */
 832             Tok = FindDotKeyword ();
 833             if (Tok == TOK_NONE) {
 834
 835                 /* Not found */
 836                 if (!LeadingDotInIdents) {
 837                     /* Invalid pseudo instruction */
 838                     Error ("`%s' is not a recognized control command", SVal);
 839                     goto Again;
 840                 }
 841
 842                 /* An identifier with a dot. Check if it's a define style
 843                  * macro.
 844                  */
 845                 if (IsDefine (SVal)) {
 846                     /* This is a define style macro - expand it */
 847                     MacExpandStart ();
 848                     goto Restart;
 849                 }
 850
 851                 /* Just an identifier with a dot */
 852                 Tok = TOK_IDENT;
 853             }
 854
 855         }
 856         return;
 857     }
 858
 859     /* Indirect op for sweet16 cpu. Must check this before checking for local
 860      * symbols, because these may also use the '@' symbol.
 861      */
 862     if (CPU == CPU_SWEET16 && C == '@') {
 863         NextChar ();
 864         Tok = TOK_AT;
 865         return;
 866     }
 867
 868     /* Local symbol? */
 869     if (C == LocalStart) {
 870
 871         /* Read the identifier */
 872         ReadIdent (0);
 873
 874         /* Start character alone is not enough */
 875         if (SVal [1] == '\0') {
 876             Error ("Invalid cheap local symbol");
 877             goto Again;
 878         }
 879
 880         /* A local identifier */
 881         Tok = TOK_LOCAL_IDENT;
 882         return;
 883     }
 884
 885
 886     /* Identifier or keyword? */
 887     if (IsIdStart (C)) {
 888
 889         /* Read the identifier */
 890         ReadIdent (0);
 891
 892         /* Check for special names. Bail out if we have identified the type of
 893          * the token. Go on if the token is an identifier.
 894          */
 895         if (SVal[1] == '\0') {
 896             switch (toupper (SVal [0])) {
 897
 898                 case 'A':
 899                     if (C == ':') {
 900                         NextChar ();
 901                         Tok = TOK_OVERRIDE_ABS;
 902                     } else {
 903                         Tok = TOK_A;
 904                     }
 905                     return;
 906
 907                 case 'F':
 908                     if (C == ':') {
 909                         NextChar ();
 910                         Tok = TOK_OVERRIDE_FAR;
 911                         return;
 912                     }
 913                     break;
 914
 915                 case 'S':
 916                     Tok = TOK_S;
 917                     return;
 918
 919                 case 'X':
 920                     Tok = TOK_X;
 921                     return;
 922
 923                 case 'Y':
 924                     Tok = TOK_Y;
 925                     return;
 926
 927                 case 'Z':
 928                     if (C == ':') {
 929                         NextChar ();
 930                         Tok = TOK_OVERRIDE_ZP;
 931                         return;
 932                     }
 933                     break;
 934
 935                 default:
 936                     break;
 937             }
 938
 939         } else if (CPU == CPU_SWEET16 && (IVal = Sweet16Reg (SVal)) >= 0) {
 940
 941             /* A sweet16 register number in sweet16 mode */
 942             Tok = TOK_REG;
 943             return;
 944
 945         }
 946
 947         /* Check for define style macro */
 948         if (IsDefine (SVal)) {
 949             /* Macro - expand it */
 950             MacExpandStart ();
 951             goto Restart;
 952         } else {
 953             /* An identifier */
 954             Tok = TOK_IDENT;
 955         }
 956         return;
 957     }
 958
 959     /* Ok, let's do the switch */
 960 CharAgain:
 961     switch (C) {
 962
 963         case '+':
 964             NextChar ();
 965             Tok = TOK_PLUS;
 966             return;
 967
 968         case '-':
 969             NextChar ();
 970             Tok = TOK_MINUS;
 971             return;
 972
 973         case '/':
 974             NextChar ();
 975             Tok = TOK_DIV;
 976             return;
 977
 978         case '*':
 979             NextChar ();
 980             Tok = TOK_MUL;
 981             return;
 982
 983         case '^':
 984             NextChar ();
 985             Tok = TOK_XOR;
 986             return;
 987
 988         case '&':
 989             NextChar ();
 990             if (C == '&') {
 991                 NextChar ();
 992                 Tok = TOK_BOOLAND;
 993             } else {
 994                 Tok = TOK_AND;
 995             }
 996             return;
 997
 998         case '|':
 999             NextChar ();
1000             if (C == '|') {
1001                 NextChar ();
1002                 Tok = TOK_BOOLOR;
1003             } else {
1004                 Tok = TOK_OR;
1005             }
1006             return;
1007
1008         case ':':
1009             NextChar ();
1010             switch (C) {
1011
1012                 case ':':
1013                     NextChar ();
1014                     Tok = TOK_NAMESPACE;
1015                     break;
1016
1017                 case '-':
1018                     IVal = 0;
1019                     do {
1020                         --IVal;
1021                         NextChar ();
1022                     } while (C == '-');
1023                     Tok = TOK_ULABEL;
1024                     break;
1025
1026                 case '+':
1027                     IVal = 0;
1028                     do {
1029                         ++IVal;
1030                         NextChar ();
1031                     } while (C == '+');
1032                     Tok = TOK_ULABEL;
1033                     break;
1034
1035                 case '=':
1036                     NextChar ();
1037                     Tok = TOK_ASSIGN;
1038                     break;
1039
1040                 default:
1041                     Tok = TOK_COLON;
1042                     break;
1043             }
1044             return;
1045
1046         case ',':
1047             NextChar ();
1048             Tok = TOK_COMMA;
1049             return;
1050
1051         case ';':
1052             NextChar ();
1053             while (C != '\n' && C != EOF) {
1054                 NextChar ();
1055             }
1056             goto CharAgain;
1057
1058         case '#':
1059             NextChar ();
1060             Tok = TOK_HASH;
1061             return;
1062
1063         case '(':
1064             NextChar ();
1065             Tok = TOK_LPAREN;
1066             return;
1067
1068         case ')':
1069             NextChar ();
1070             Tok = TOK_RPAREN;
1071             return;
1072
1073         case '[':
1074             NextChar ();
1075             Tok = TOK_LBRACK;
1076             return;
1077
1078         case ']':
1079             NextChar ();
1080             Tok = TOK_RBRACK;
1081             return;
1082
1083         case '{':
1084             NextChar ();
1085             Tok = TOK_LCURLY;
1086             return;
1087
1088         case '}':
1089             NextChar ();
1090             Tok = TOK_RCURLY;
1091             return;
1092
1093         case '<':
1094             NextChar ();
1095             if (C == '=') {
1096                 NextChar ();
1097                 Tok = TOK_LE;
1098             } else if (C == '<') {
1099                 NextChar ();
1100                 Tok = TOK_SHL;
1101             } else if (C == '>') {
1102                 NextChar ();
1103                 Tok = TOK_NE;
1104             } else {
1105                 Tok = TOK_LT;
1106             }
1107             return;
1108
1109         case '=':
1110             NextChar ();
1111             Tok = TOK_EQ;
1112             return;
1113
1114         case '!':
1115             NextChar ();
1116             Tok = TOK_BOOLNOT;
1117             return;
1118
1119         case '>':
1120             NextChar ();
1121             if (C == '=') {
1122                 NextChar ();
1123                 Tok = TOK_GE;
1124             } else if (C == '>') {
1125                 NextChar ();
1126                 Tok = TOK_SHR;
1127             } else {
1128                 Tok = TOK_GT;
1129             }
1130             return;
1131
1132         case '~':
1133             NextChar ();
1134             Tok = TOK_NOT;
1135             return;
1136
1137         case '\'':
1138             /* Hack: If we allow ' as terminating character for strings, read
1139              * the following stuff as a string, and check for a one character
1140              * string later.
1141              */
1142             if (LooseStringTerm) {
1143                 if (ReadStringConst ('\'') == 1) {
1144                     IVal = SVal[0];
1145                     Tok = TOK_CHARCON;
1146                 } else {
1147                     Tok = TOK_STRCON;
1148                 }
1149             } else {
1150                 /* Always a character constant */
1151                 NextChar ();
1152                 if (C == EOF || IsControl (C)) {
1153                     Error ("Illegal character constant");
1154                     goto CharAgain;
1155                 }
1156                 IVal = C;
1157                 Tok = TOK_CHARCON;
1158                 NextChar ();
1159                 if (C != '\'') {
1160                     if (!MissingCharTerm) {
1161                         Error ("Illegal character constant");
1162                     }
1163                 } else {
1164                     NextChar ();
1165                 }
1166             }
1167             return;
1168
1169         case '\"':
1170             ReadStringConst ('\"');
1171             Tok = TOK_STRCON;
1172             return;
1173
1174         case '\\':
1175             /* Line continuation? */
1176             if (LineCont) {
1177                 NextChar ();
1178                 if (C == '\n') {
1179                     /* Handle as white space */
1180                     NextChar ();
1181                     C = ' ';
1182                     goto Again;
1183                 }
1184             }
1185             break;
1186
1187         case '\n':
1188             NextChar ();
1189             Tok = TOK_SEP;
1190             return;
1191
1192         case EOF:
1193             /* Check if we have any open .IFs in this file */
1194             CheckOpenIfs ();
1195             /* Check if we have any open token lists in this file */
1196             CheckInputStack ();
1197
1198             /* If this was an include file, then close it and handle like a
1199              * separator. Do not close the main file, but return EOF.
1200              */
1201             if (ICount > 1) {
1202                 DoneInputFile ();
1203             } else {
1204                 Tok = TOK_EOF;
1205             }
1206             return;
1207
1208     }
1209
1210     /* If we go here, we could not identify the current character. Skip it
1211      * and try again.
1212      */
1213     Error ("Invalid input character: 0x%02X", C & 0xFF);
1214     NextChar ();
1215     goto Again;
1216 }
1217
1218
1219
1220 int TokHasSVal (enum Token Tok)
1221 /* Return true if the given token has an attached SVal */
1222 {
1223     return (Tok == TOK_IDENT || TOK_LOCAL_IDENT || Tok == TOK_STRCON);
1224 }
1225
1226
1227
1228 int TokHasIVal (enum Token Tok)
1229 /* Return true if the given token has an attached IVal */
1230 {
1231     return (Tok == TOK_INTCON || Tok == TOK_CHARCON || Tok == TOK_REG);
1232 }
1233
1234
1235
1236 int GetSubKey (const char** Keys, unsigned Count)
1237 /* Search for a subkey in a table of keywords. The current token must be an
1238  * identifier and all keys must be in upper case. The identifier will be
1239  * uppercased in the process. The function returns the index of the keyword,
1240  * or -1 if the keyword was not found.
1241  */
1242 {
1243     unsigned I;
1244
1245     /* Must have an identifier */
1246     PRECONDITION (Tok == TOK_IDENT);
1247
1248     /* If we aren't in ignore case mode, we have to uppercase the identifier */
1249     if (!IgnoreCase) {
1250         UpcaseSVal ();
1251     }
1252
1253     /* Do a linear search (a binary search is not worth the effort) */
1254     for (I = 0; I < Count; ++I) {
1255         if (strcmp (SVal, Keys [I]) == 0) {
1256             /* Found it */
1257             return I;
1258         }
1259     }
1260
1261     /* Not found */
1262     return -1;
1263 }
1264
1265
1266
1267 unsigned char ParseAddrSize (void)
1268 /* Check if the next token is a keyword that denotes an address size specifier.
1269  * If so, return the corresponding address size constant, otherwise output an
1270  * error message and return ADDR_SIZE_DEFAULT.
1271  */
1272 {
1273     static const char* Keys[] = {
1274         "DIRECT", "ZEROPAGE", "ZP",
1275         "ABSOLUTE", "ABS", "NEAR",
1276         "FAR",
1277         "LONG", "DWORD",
1278     };
1279
1280     /* Check for an identifier */
1281     if (Tok != TOK_IDENT) {
1282         Error ("Address size specifier expected");
1283         return ADDR_SIZE_DEFAULT;
1284     }
1285
1286     /* Search for the attribute */
1287     switch (GetSubKey (Keys, sizeof (Keys) / sizeof (Keys [0]))) {
1288         case 0:
1289         case 1:
1290         case 2: return ADDR_SIZE_ZP;
1291         case 3:
1292         case 4:
1293         case 5: return ADDR_SIZE_ABS;
1294         case 6: return ADDR_SIZE_FAR;
1295         case 7:
1296         case 8: return ADDR_SIZE_LONG;
1297         default:
1298             Error ("Address size specifier expected");
1299             return ADDR_SIZE_DEFAULT;
1300     }
1301 }
1302
1303
1304
1305 void InitScanner (const char* InFile)
1306 /* Initialize the scanner, open the given input file */
1307 {
1308     /* Open the input file */
1309     NewInputFile (InFile);
1310 }
1311
1312
1313
1314 void DoneScanner (void)
1315 /* Release scanner resources */
1316 {
1317     DoneInputFile ();
1318 }
1319
1320
1321