4 * (c) Copyright 2001-2005, Ullrich von Bassewitz <uz@cc65.org>
5 * 2005-01-24, Greg King <gngking@erols.com>
7 * This is the basic layer for all scanf-type functions. It should be
8 * rewritten in assembly, at some time in the future. So, some of the code
9 * is not as elegant as it could be.
24 /* _scanf() can give EOF to these functions. But, the macroes can't
25 ** understand it; so, they are removed.
32 extern void __fastcall__ _seterrno (unsigned char code);
34 #pragma staticlocals(on)
38 /*****************************************************************************/
39 /* SetJmp return codes */
40 /*****************************************************************************/
45 RC_OK, /* setjmp() call */
46 RC_NOCONV, /* No conversion possible */
47 RC_EOF /* EOF reached */
52 /*****************************************************************************/
54 /*****************************************************************************/
58 static const char* format; /* Copy of function argument */
59 static const struct scanfdata* D_; /* Copy of function argument */
60 static va_list ap; /* Copy of function argument */
61 static jmp_buf JumpBuf; /* "Label" that is used for failures */
62 static char F; /* Character from format string */
63 static unsigned CharCount; /* Characters read so far */
64 static int C; /* Character from input */
65 static unsigned Width; /* Maximum field width */
66 static long IntVal; /* Converted int value */
67 static int Assignments; /* Number of assignments */
68 static unsigned char IntBytes; /* Number of bytes-1 for int conversions */
71 static bool Converted; /* Some object was converted */
72 static bool Positive; /* Flag for positive value */
73 static bool NoAssign; /* Suppress assignment */
74 static bool Invert; /* Do we need to invert the charset? */
75 static unsigned char CharSet[(1+UCHAR_MAX)/CHAR_BIT];
76 static const unsigned char Bits[CHAR_BIT] = {
77 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
80 /* We need C to be 16 bits since we cannot check for EOF otherwise.
81 * Unfortunately, this causes the code to be quite larger, even if for most
82 * purposes, checking the low byte would be enough, since if C is EOF, the
83 * low byte will not match any useful character anyway (at least for the
84 * supported platforms - I know that this is not portable). So the following
85 * macro is used to access just the low byte of C.
87 #define CHAR(c) (*((unsigned char*)&(c)))
91 /*****************************************************************************/
93 /*****************************************************************************/
97 /* We don't want the optimizer to ruin our "perfect" ;-)
100 #pragma optimize (push, off)
102 static unsigned FindBit (void)
103 /* Locate the character's bit in the charset array.
104 * < .A - Argument character
105 * > .X - Offset of the byte in the character-set mask
110 asm ("lsr a"); /* Divide by CHAR_BIT */
113 asm ("tax"); /* Byte's offset */
115 asm ("and #%b", CHAR_BIT-1);
116 asm ("tay"); /* Bit's offset */
117 asm ("lda %v,y", Bits);
118 return (unsigned) __AX__;
121 #pragma optimize (pop)
124 static void __fastcall__ AddCharToSet (unsigned char /* C */)
125 /* Set the given bit in the character set */
128 asm ("ora %v,x", CharSet);
129 asm ("sta %v,x", CharSet);
134 #pragma optimize (push, off)
136 static unsigned char IsCharInSet (void)
137 /* Check if the char. is part of the character set. */
139 /* Get the character from C. */
142 asm ("bne L1"); /* EOF never is in the set */
145 asm ("and %v,x", CharSet);
148 return (unsigned char) __AX__;
151 #pragma optimize (pop)
155 static void InvertCharSet (void)
156 /* Invert the character set */
158 asm ("ldy #%b", sizeof (CharSet) - 1);
160 asm ("lda %v,y", CharSet);
162 asm ("sta %v,y", CharSet);
169 /*****************************************************************************/
171 /*****************************************************************************/
175 static void PushBack (void)
176 /* Push back the last (unused) character, provided it is not EOF. */
178 /* Get the character from C. */
179 /* Only the high-byte needs to be checked for EOF. */
181 asm ("bne %g", Done);
184 /* Put unget()'s first argument on the stack. */
187 /* Copy D into the zero-page. */
188 (const struct scanfdata*) __AX__ = D_;
192 /* Copy the unget vector to jmpvec. */
193 asm ("ldy #%b", offsetof (struct scanfdata, unget));
194 asm ("lda (ptr1),y");
195 asm ("sta jmpvec+1");
197 asm ("lda (ptr1),y");
198 asm ("sta jmpvec+2");
200 /* Load D->data into __AX__. */
201 asm ("ldy #%b", offsetof (struct scanfdata, data) + 1);
202 asm ("lda (ptr1),y");
205 asm ("lda (ptr1),y");
207 /* Call the unget routine. */
210 /* Take back that character's count. */
211 asm ("lda %v", CharCount);
212 asm ("bne %g", Yank);
213 asm ("dec %v+1", CharCount);
215 asm ("dec %v", CharCount);
223 static void ReadChar (void)
224 /* Get an input character, count characters */
228 asm ("ldx %v+1", D_);
232 /* Copy the get vector to jmpvec */
233 asm ("ldy #%b", offsetof (struct scanfdata, get));
234 asm ("lda (ptr1),y");
235 asm ("sta jmpvec+1");
237 asm ("lda (ptr1),y");
238 asm ("sta jmpvec+2");
240 /* Load D->data into __AX__ */
241 asm ("ldy #%b", offsetof (struct scanfdata, data) + 1);
242 asm ("lda (ptr1),y");
245 asm ("lda (ptr1),y");
247 /* Call the get routine */
250 /* Assign the result to C */
254 /* If C is EOF, don't bump the character counter.
255 * Only the high-byte needs to be checked.
258 asm ("beq %g", Done);
260 /* Must bump CharCount. */
261 asm ("inc %v", CharCount);
262 asm ("bne %g", Done);
263 asm ("inc %v+1", CharCount);
271 #pragma optimize (push, off)
273 static void __fastcall__ Error (unsigned char /* Code */)
274 /* Does a longjmp using the given code */
277 (char*) __AX__ = JumpBuf;
281 asm ("jmp %v", longjmp);
284 #pragma optimize (pop)
288 static void CheckEnd (void)
289 /* Stop a scan if it prematurely reaches the end of a string or a file. */
291 /* Only the high-byte needs to be checked for EOF. */
293 asm ("beq %g", Done);
302 static void SkipWhite (void)
303 /* Skip white space in the input and return the first non white character */
305 while ((bool) isspace (C)) {
312 #pragma optimize (push, off)
314 static void ReadSign (void)
315 /* Read an optional sign and skip it. Store 1 in Positive if the value is
316 * positive, store 0 otherwise.
319 /* We can ignore the high byte of C here, since if it is EOF, the lower
320 * byte won't match anyway.
324 asm ("bne %g", NotNeg);
327 asm ("sta %v", Converted);
328 asm ("jsr %v", ReadChar);
329 asm ("lda #$00"); /* Flag as negative */
330 asm ("beq %g", Store);
336 asm ("sta %v", Converted);
337 asm ("jsr %v", ReadChar); /* Skip the + sign */
339 asm ("lda #$01"); /* Flag as positive */
341 asm ("sta %v", Positive);
344 #pragma optimize (pop)
348 static unsigned char __fastcall__ HexVal (char C)
349 /* Convert a digit to a value */
351 return (bool) isdigit (C) ?
353 (char) tolower ((int) C) - ('a' - 10);
358 static void __fastcall__ ReadInt (unsigned char Base)
359 /* Read an integer, and store it into IntVal. */
361 unsigned char Val, CharCount = 0;
363 /* Read the integer value */
365 while ((bool) isxdigit (C) && ++Width != 0
366 && (Val = HexVal ((char) C)) < Base) {
368 IntVal = IntVal * (long) Base + (long) Val;
372 /* If we didn't convert anything, it's a failure. */
373 if (CharCount == 0) {
377 /* Another conversion */
383 static void AssignInt (void)
384 /* Assign the integer value in Val to the next argument. The function makes
385 * several non-portable assumptions, to reduce code size:
386 * - signed and unsigned types have the same representation.
387 * - short and int have the same representation.
388 * - all pointer types have the same representation.
391 if (NoAssign == false) {
393 /* Get the next argument pointer */
394 (void*) __AX__ = va_arg (ap, void*);
396 /* Put the argument pointer into the zero-page. */
400 /* Get the number of bytes-1 to copy */
401 asm ("ldy %v", IntBytes);
403 /* Assign the integer value */
404 Loop: asm ("lda %v,y", IntVal);
405 asm ("sta (ptr1),y");
407 asm ("bpl %g", Loop);
409 /* Another assignment */
410 asm ("inc %v", Assignments);
411 asm ("bne %g", Done);
412 asm ("inc %v+1", Assignments);
419 static void __fastcall__ ScanInt (unsigned char Base)
420 /* Scan an integer including white space, sign and optional base spec,
421 * and store it into IntVal.
424 /* Skip whitespace */
427 /* Read an optional sign */
430 /* If Base is unknown (zero), figure it out */
432 if (CHAR (C) == '0') {
444 /* Restart at the beginning of the number because it might
445 * be only a single zero digit (which already was read).
455 /* Read the integer value */
459 if (Positive == false) {
463 /* Assign the value to the next argument unless suppressed */
469 static char GetFormat (void)
470 /* Pick up the next character from the format string. */
472 /* return (F = *format++); */
473 (const char*) __AX__ = format;
475 asm ("stx regsave+1");
478 asm ("lda (regsave),y");
480 return (F = (char) __AX__);
485 int __fastcall__ _scanf (const struct scanfdata* D,
486 const char* format_, va_list ap_)
487 /* This is the routine used to do the actual work. It is called from several
488 * types of wrappers to implement the actual ISO xxscanf functions.
492 bool HaveWidth; /* True if a width was given */
493 bool Match; /* True if a character-set has any matches */
494 char Start; /* Walks over a range */
496 /* Place copies of the arguments into global variables. This is not very
497 * nice, but on a 6502 platform it gives better code, since the values
498 * do not have to be passed as parameters.
504 /* Initialize variables */
509 /* Set up the jump "label". CheckEnd() will use that label when EOF
510 * is reached. ReadInt() will use it when number-conversion fails.
512 if ((unsigned char) setjmp (JumpBuf) == RC_OK) {
515 /* Get the next input character */
518 /* Walk over the format string */
519 while (GetFormat ()) {
521 /* Check for a conversion */
524 /* Check for a match */
525 if ((bool) isspace ((int) F)) {
527 /* Special white space handling: Any whitespace in the
528 * format string matches any amount of whitespace including
529 * none(!). So this match will never fail.
536 /* ### Note: The opposite test (C == F)
537 ** would be optimized into buggy code!
541 /* A mismatch -- we will stop scanning the input,
542 * and return the number of assigned conversions.
547 /* A match -- get the next input character, and continue. */
552 /* A conversion. Skip the percent sign. */
553 /* 0. Check for %% */
554 if (GetFormat () == '%') {
558 /* 1. Assignment suppression */
559 NoAssign = (F == '*');
564 /* 2. Maximum field width */
566 HaveWidth = (bool) isdigit (F);
570 /* ### Non portable ### */
571 Width = Width * 10 + (F & 0x0F);
572 } while ((bool) isdigit (GetFormat ()));
575 /* Invalid specification */
576 /* Note: This method of leaving the function might seem
577 * to be crude, but it optimizes very well because
578 * the four exits can share this code.
585 /* Increment-and-test makes better code than test-and-decrement
586 * does. So, change the width into a form that can be used in
591 /* 3. Length modifier */
592 IntBytes = sizeof(int) - 1;
595 if (*format == 'h') {
596 IntBytes = sizeof(char) - 1;
603 if (*format == 'l') {
604 /* Treat long long as long */
608 case 'j': /* intmax_t */
609 IntBytes = sizeof(long) - 1;
612 case 'z': /* size_t */
613 case 't': /* ptrdiff_t */
614 /* Same size as int */
616 case 'L': /* long double - ignore this one */
620 /* 4. Conversion specifier */
622 /* 'd' and 'u' conversions are actually the same, since the
623 * standard says that even the 'u' modifier allows an
624 * optionally signed integer.
626 case 'd': /* Optionally signed decimal integer */
632 /* Optionally signed integer with a base */
637 /* Optionally signed octal integer */
643 /* Optionally signed hexadecimal integer */
648 /* Whitespace-terminated string */
650 CheckEnd (); /* Is it an input failure? */
651 Converted = true; /* No, conversion will succeed */
652 if (NoAssign == false) {
653 S = va_arg (ap, char*);
656 && (bool) isspace (C) == false
658 if (NoAssign == false) {
663 /* Terminate the string just read */
664 if (NoAssign == false) {
671 /* Fixed-length string, NOT zero-terminated */
672 if (HaveWidth == false) {
673 /* No width given, default is 1 */
676 CheckEnd (); /* Is it an input failure? */
677 Converted = true; /* No, at least 1 char. available */
678 if (NoAssign == false) {
679 S = va_arg (ap, char*);
680 /* ## This loop is convenient for us, but it isn't
681 * standard C. The standard implies that a failure
682 * shouldn't put anything into the array argument.
685 CheckEnd (); /* Is it a matching failure? */
691 /* Just skip as many chars as given */
693 CheckEnd (); /* Is it a matching failure? */
700 /* String using characters from a set */
702 memset (CharSet, 0, sizeof (CharSet));
703 /* Skip the left-bracket, and test for inversion. */
704 Invert = (GetFormat () == '^');
709 /* Empty sets aren't allowed; so, a right-bracket
710 * at the beginning must be a member of the set.
715 /* Read the characters that are part of the set */
716 while (F != '\0' && F != ']') {
717 if (*format == '-') { /* Look ahead at next char. */
718 /* A range. Get start and end, skip the '-' */
721 switch (GetFormat ()) {
724 /* '-' as last char means: include '-' */
725 AddCharToSet (Start);
729 /* Include all characters
730 * that are in the range.
733 AddCharToSet (Start);
739 /* Get next char after range */
743 /* Just a character */
749 /* Don't go beyond the end of the format string. */
750 /* (Maybe, this should mean an invalid specification.) */
755 /* Invert the set if requested */
760 /* We have the set in CharSet. Read characters and
761 * store them into a string while they are part of
765 if (NoAssign == false) {
766 S = va_arg (ap, char*);
768 while (IsCharInSet () && ++Width) {
769 if (NoAssign == false) {
772 Match = Converted = true;
775 /* At least one character must match the set. */
776 if (Match == false) {
779 if (NoAssign == false) {
786 /* Pointer, general format is 0xABCD.
787 * %hhp --> zero-page pointer
788 * %hp --> near pointer
789 * %lp --> far pointer
792 if (CHAR (C) != '0') {
810 /* Store the number of characters consumed so far
811 * (the read-ahead character hasn't been consumed).
813 IntVal = (long) (CharCount - (C == EOF ? 0u : 1u));
815 /* Don't count it. */
816 if (NoAssign == false) {
823 /* Wide characters */
833 /* Optionally signed float */
835 /* Those 2 groups aren't implemented. */
842 /* Invalid specification */
853 /* Coming here means a failure. If that happens at EOF, with no
854 * conversion attempts, then it is considered an error; otherwise,
855 * the number of assignments is returned (the default behaviour).
857 if (C == EOF && Converted == false) {
858 Assignments = EOF; /* Special case: error */
862 /* Put the read-ahead character back into the input stream. */
865 /* Return the number of conversion-and-assignments. */