4 * (c) Copyright 2001-2005, Ullrich von Bassewitz <uz@cc65.org>
5 * 2005-01-24, Greg King <gngking@erols.com>
7 * This is the basic layer for all scanf-type functions. It should be
8 * rewritten in assembly, at some time in the future. So, some of the code
9 * is not as elegant as it could be.
24 /* _scanf() can give EOF to these functions. But, the macroes can't
25 ** understand it; so, they are removed.
32 #pragma static-locals(on)
36 /*****************************************************************************/
37 /* SetJmp return codes */
38 /*****************************************************************************/
43 RC_OK, /* setjmp() call */
44 RC_NOCONV, /* No conversion possible */
45 RC_EOF /* EOF reached */
50 /*****************************************************************************/
52 /*****************************************************************************/
56 static const char* format; /* Copy of function argument */
57 static const struct scanfdata* D_; /* Copy of function argument */
58 static va_list ap; /* Copy of function argument */
59 static jmp_buf JumpBuf; /* "Label" that is used for failures */
60 static char F; /* Character from format string */
61 static unsigned CharCount; /* Characters read so far */
62 static int C; /* Character from input */
63 static unsigned Width; /* Maximum field width */
64 static long IntVal; /* Converted int value */
65 static int Assignments; /* Number of assignments */
66 static unsigned char IntBytes; /* Number of bytes-1 for int conversions */
69 static bool Converted; /* Some object was converted */
70 static bool Positive; /* Flag for positive value */
71 static bool NoAssign; /* Suppress assignment */
72 static bool Invert; /* Do we need to invert the charset? */
73 static unsigned char CharSet[(1+UCHAR_MAX)/CHAR_BIT];
74 static const unsigned char Bits[CHAR_BIT] = {
75 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
78 /* We need C to be 16 bits since we cannot check for EOF otherwise.
79 * Unfortunately, this causes the code to be quite larger, even if for most
80 * purposes, checking the low byte would be enough, since if C is EOF, the
81 * low byte will not match any useful character anyway (at least for the
82 * supported platforms - I know that this is not portable). So the following
83 * macro is used to access just the low byte of C.
85 #define CHAR(c) (*((unsigned char*)&(c)))
89 /*****************************************************************************/
91 /*****************************************************************************/
95 /* We don't want the optimizer to ruin our "perfect" ;-)
98 #pragma optimize (push, off)
100 static unsigned FindBit (void)
101 /* Locate the character's bit in the charset array.
102 * < .A - Argument character
103 * > .X - Offset of the byte in the character-set mask
108 asm ("lsr a"); /* Divide by CHAR_BIT */
111 asm ("tax"); /* Byte's offset */
113 asm ("and #%b", CHAR_BIT-1);
114 asm ("tay"); /* Bit's offset */
115 asm ("lda %v,y", Bits);
116 return (unsigned) __AX__;
119 #pragma optimize (pop)
122 static void __fastcall__ AddCharToSet (unsigned char /* C */)
123 /* Set the given bit in the character set */
126 asm ("ora %v,x", CharSet);
127 asm ("sta %v,x", CharSet);
132 #pragma optimize (push, off)
134 static unsigned char IsCharInSet (void)
135 /* Check if the char. is part of the character set. */
137 /* Get the character from C. */
140 asm ("bne L1"); /* EOF never is in the set */
143 asm ("and %v,x", CharSet);
146 return (unsigned char) __AX__;
149 #pragma optimize (pop)
153 static void InvertCharSet (void)
154 /* Invert the character set */
156 asm ("ldy #%b", sizeof (CharSet) - 1);
158 asm ("lda %v,y", CharSet);
160 asm ("sta %v,y", CharSet);
167 /*****************************************************************************/
169 /*****************************************************************************/
173 static void PushBack (void)
174 /* Push back the last (unused) character, provided it is not EOF. */
176 /* Get the character from C. */
177 /* Only the high-byte needs to be checked for EOF. */
179 asm ("bne %g", Done);
182 /* Put unget()'s first argument on the stack. */
185 /* Copy D into the zero-page. */
186 (const struct scanfdata*) __AX__ = D_;
190 /* Copy the unget vector to jmpvec. */
191 asm ("ldy #%b", offsetof (struct scanfdata, unget));
192 asm ("lda (ptr1),y");
193 asm ("sta jmpvec+1");
195 asm ("lda (ptr1),y");
196 asm ("sta jmpvec+2");
198 /* Load D->data into __AX__. */
199 asm ("ldy #%b", offsetof (struct scanfdata, data) + 1);
200 asm ("lda (ptr1),y");
203 asm ("lda (ptr1),y");
205 /* Call the unget routine. */
208 /* Take back that character's count. */
209 asm ("lda %v", CharCount);
210 asm ("bne %g", Yank);
211 asm ("dec %v+1", CharCount);
213 asm ("dec %v", CharCount);
221 static void ReadChar (void)
222 /* Get an input character, count characters */
226 asm ("ldx %v+1", D_);
230 /* Copy the get vector to jmpvec */
231 asm ("ldy #%b", offsetof (struct scanfdata, get));
232 asm ("lda (ptr1),y");
233 asm ("sta jmpvec+1");
235 asm ("lda (ptr1),y");
236 asm ("sta jmpvec+2");
238 /* Load D->data into __AX__ */
239 asm ("ldy #%b", offsetof (struct scanfdata, data) + 1);
240 asm ("lda (ptr1),y");
243 asm ("lda (ptr1),y");
245 /* Call the get routine */
248 /* Assign the result to C */
252 /* If C is EOF, don't bump the character counter.
253 * Only the high-byte needs to be checked.
256 asm ("beq %g", Done);
258 /* Must bump CharCount. */
259 asm ("inc %v", CharCount);
260 asm ("bne %g", Done);
261 asm ("inc %v+1", CharCount);
269 #pragma optimize (push, off)
271 static void __fastcall__ Error (unsigned char /* Code */)
272 /* Does a longjmp using the given code */
275 (char*) __AX__ = JumpBuf;
279 asm ("jmp %v", longjmp);
282 #pragma optimize (pop)
286 static void CheckEnd (void)
287 /* Stop a scan if it prematurely reaches the end of a string or a file. */
289 /* Only the high-byte needs to be checked for EOF. */
291 asm ("beq %g", Done);
300 static void SkipWhite (void)
301 /* Skip white space in the input and return the first non white character */
303 while ((bool) isspace (C)) {
310 #pragma optimize (push, off)
312 static void ReadSign (void)
313 /* Read an optional sign and skip it. Store 1 in Positive if the value is
314 * positive, store 0 otherwise.
317 /* We can ignore the high byte of C here, since if it is EOF, the lower
318 * byte won't match anyway.
322 asm ("bne %g", NotNeg);
325 asm ("sta %v", Converted);
326 asm ("jsr %v", ReadChar);
327 asm ("lda #$00"); /* Flag as negative */
328 asm ("beq %g", Store);
334 asm ("sta %v", Converted);
335 asm ("jsr %v", ReadChar); /* Skip the + sign */
337 asm ("lda #$01"); /* Flag as positive */
339 asm ("sta %v", Positive);
342 #pragma optimize (pop)
346 static unsigned char __fastcall__ HexVal (char C)
347 /* Convert a digit to a value */
349 return (bool) isdigit (C) ?
351 (char) tolower ((int) C) - ('a' - 10);
356 static void __fastcall__ ReadInt (unsigned char Base)
357 /* Read an integer, and store it into IntVal. */
359 unsigned char Val, CharCount = 0;
361 /* Read the integer value */
363 while ((bool) isxdigit (C) && ++Width != 0
364 && (Val = HexVal ((char) C)) < Base) {
366 IntVal = IntVal * (long) Base + (long) Val;
370 /* If we didn't convert anything, it's a failure. */
371 if (CharCount == 0) {
375 /* Another conversion */
381 static void AssignInt (void)
382 /* Assign the integer value in Val to the next argument. The function makes
383 * several non-portable assumptions, to reduce code size:
384 * - signed and unsigned types have the same representation.
385 * - short and int have the same representation.
386 * - all pointer types have the same representation.
389 if (NoAssign == false) {
391 /* Get the next argument pointer */
392 (void*) __AX__ = va_arg (ap, void*);
394 /* Put the argument pointer into the zero-page. */
398 /* Get the number of bytes-1 to copy */
399 asm ("ldy %v", IntBytes);
401 /* Assign the integer value */
402 Loop: asm ("lda %v,y", IntVal);
403 asm ("sta (ptr1),y");
405 asm ("bpl %g", Loop);
407 /* Another assignment */
408 asm ("inc %v", Assignments);
409 asm ("bne %g", Done);
410 asm ("inc %v+1", Assignments);
417 static void __fastcall__ ScanInt (unsigned char Base)
418 /* Scan an integer including white space, sign and optional base spec,
419 * and store it into IntVal.
422 /* Skip whitespace */
425 /* Read an optional sign */
428 /* If Base is unknown (zero), figure it out */
430 if (CHAR (C) == '0') {
442 /* Restart at the beginning of the number because it might
443 * be only a single zero digit (which already was read).
453 /* Read the integer value */
457 if (Positive == false) {
461 /* Assign the value to the next argument unless suppressed */
467 static char GetFormat (void)
468 /* Pick up the next character from the format string. */
470 /* return (F = *format++); */
471 (const char*) __AX__ = format;
473 asm ("stx regsave+1");
476 asm ("lda (regsave),y");
478 return (F = (char) __AX__);
483 int __fastcall__ _scanf (const struct scanfdata* D,
484 const char* format_, va_list ap_)
485 /* This is the routine used to do the actual work. It is called from several
486 * types of wrappers to implement the actual ISO xxscanf functions.
490 bool HaveWidth; /* True if a width was given */
491 bool Match; /* True if a character-set has any matches */
492 char Start; /* Walks over a range */
494 /* Place copies of the arguments into global variables. This is not very
495 * nice, but on a 6502 platform it gives better code, since the values
496 * do not have to be passed as parameters.
502 /* Initialize variables */
507 /* Set up the jump "label". CheckEnd() will use that label when EOF
508 * is reached. ReadInt() will use it when number-conversion fails.
510 if ((unsigned char) setjmp (JumpBuf) == RC_OK) {
513 /* Get the next input character */
516 /* Walk over the format string */
517 while (GetFormat ()) {
519 /* Check for a conversion */
522 /* Check for a match */
523 if ((bool) isspace ((int) F)) {
525 /* Special white space handling: Any whitespace in the
526 * format string matches any amount of whitespace including
527 * none(!). So this match will never fail.
534 /* ### Note: The opposite test (C == F)
535 ** would be optimized into buggy code!
539 /* A mismatch -- we will stop scanning the input,
540 * and return the number of assigned conversions.
545 /* A match -- get the next input character, and continue. */
550 /* A conversion. Skip the percent sign. */
551 /* 0. Check for %% */
552 if (GetFormat () == '%') {
556 /* 1. Assignment suppression */
557 NoAssign = (F == '*');
562 /* 2. Maximum field width */
564 HaveWidth = (bool) isdigit (F);
568 /* ### Non portable ### */
569 Width = Width * 10 + (F & 0x0F);
570 } while ((bool) isdigit (GetFormat ()));
573 /* Invalid specification */
574 /* Note: This method of leaving the function might seem
575 * to be crude, but it optimizes very well because
576 * the four exits can share this code.
583 /* Increment-and-test makes better code than test-and-decrement
584 * does. So, change the width into a form that can be used in
589 /* 3. Length modifier */
590 IntBytes = sizeof(int) - 1;
593 if (*format == 'h') {
594 IntBytes = sizeof(char) - 1;
601 if (*format == 'l') {
602 /* Treat long long as long */
606 case 'j': /* intmax_t */
607 IntBytes = sizeof(long) - 1;
610 case 'z': /* size_t */
611 case 't': /* ptrdiff_t */
612 /* Same size as int */
614 case 'L': /* long double - ignore this one */
618 /* 4. Conversion specifier */
620 /* 'd' and 'u' conversions are actually the same, since the
621 * standard says that even the 'u' modifier allows an
622 * optionally signed integer.
624 case 'd': /* Optionally signed decimal integer */
630 /* Optionally signed integer with a base */
635 /* Optionally signed octal integer */
641 /* Optionally signed hexadecimal integer */
646 /* Whitespace-terminated string */
648 CheckEnd (); /* Is it an input failure? */
649 Converted = true; /* No, conversion will succeed */
650 if (NoAssign == false) {
651 S = va_arg (ap, char*);
654 && (bool) isspace (C) == false
656 if (NoAssign == false) {
661 /* Terminate the string just read */
662 if (NoAssign == false) {
669 /* Fixed-length string, NOT zero-terminated */
670 if (HaveWidth == false) {
671 /* No width given, default is 1 */
674 CheckEnd (); /* Is it an input failure? */
675 Converted = true; /* No, at least 1 char. available */
676 if (NoAssign == false) {
677 S = va_arg (ap, char*);
678 /* ## This loop is convenient for us, but it isn't
679 * standard C. The standard implies that a failure
680 * shouldn't put anything into the array argument.
683 CheckEnd (); /* Is it a matching failure? */
689 /* Just skip as many chars as given */
691 CheckEnd (); /* Is it a matching failure? */
698 /* String using characters from a set */
700 memset (CharSet, 0, sizeof (CharSet));
701 /* Skip the left-bracket, and test for inversion. */
702 Invert = (GetFormat () == '^');
707 /* Empty sets aren't allowed; so, a right-bracket
708 * at the beginning must be a member of the set.
713 /* Read the characters that are part of the set */
714 while (F != '\0' && F != ']') {
715 if (*format == '-') { /* Look ahead at next char. */
716 /* A range. Get start and end, skip the '-' */
719 switch (GetFormat ()) {
722 /* '-' as last char means: include '-' */
723 AddCharToSet (Start);
727 /* Include all characters
728 * that are in the range.
731 AddCharToSet (Start);
737 /* Get next char after range */
741 /* Just a character */
747 /* Don't go beyond the end of the format string. */
748 /* (Maybe, this should mean an invalid specification.) */
753 /* Invert the set if requested */
758 /* We have the set in CharSet. Read characters and
759 * store them into a string while they are part of
763 if (NoAssign == false) {
764 S = va_arg (ap, char*);
766 while (IsCharInSet () && ++Width) {
767 if (NoAssign == false) {
770 Match = Converted = true;
773 /* At least one character must match the set. */
774 if (Match == false) {
777 if (NoAssign == false) {
784 /* Pointer, general format is 0xABCD.
785 * %hhp --> zero-page pointer
786 * %hp --> near pointer
787 * %lp --> far pointer
790 if (CHAR (C) != '0') {
808 /* Store the number of characters consumed so far
809 * (the read-ahead character hasn't been consumed).
811 IntVal = (long) (CharCount - (C == EOF ? 0u : 1u));
813 /* Don't count it. */
814 if (NoAssign == false) {
821 /* Wide characters */
831 /* Optionally signed float */
833 /* Those 2 groups aren't implemented. */
840 /* Invalid specification */
851 /* Coming here means a failure. If that happens at EOF, with no
852 * conversion attempts, then it is considered an error; otherwise,
853 * the number of assignments is returned (the default behaviour).
855 if (C == EOF && Converted == false) {
856 Assignments = EOF; /* Special case: error */
860 /* Put the read-ahead character back into the input stream. */
863 /* Return the number of conversion-and-assignments. */