From 1217289737588e65b088b3535428b27c7287d699 Mon Sep 17 00:00:00 2001 From: Manuel Novoa III Date: Fri, 1 Aug 2003 20:08:59 +0000 Subject: Add a new *scanf implementation, includeing the *wscanf functions. Should be standards compliant and with several optional features, including support for hexadecimal float notation, locale awareness, glibc-like locale-specific digit grouping with the `'' flag, and positional arg support. I tested it pretty well (finding several bugs in glibc's scanf in the process), but it is brand new so be aware. The *wprintf functions now support floating point output. Also, a couple of bugs were squashed. Finally, %a/%A conversions are now implemented. Implement the glibc xlocale interface for thread-specific locale support. Also add the various *_l(args, locale_t loc_arg) funcs. NOTE!!! setlocale() is NOT threadsafe! NOTE!!! The strto{floating point} conversion functions are now locale aware. The also now support hexadecimal floating point notation. Add the wcsto{floating point} conversion functions. Fix a bug in mktime() related to dst. Note that unlike glibc's mktime, uClibc's version always normalizes the struct tm before attempting to determine the correct dst setting if tm_isdst == -1 on entry. Add a stub version of the libintl functions. (untested) Fixed a known memory leak in setlocale() related to the collation data. Add lots of new config options (which Erik agreed to sort out :-), including finally exposing some of the stripped down stdio configs. Be careful with those though, as they haven't been tested in a long time. (temporary) GOTCHAs... The ctype functions are currently incorrect for 8-bit locales. They will be fixed shortly. The ctype functions are now table-based, resulting in larger staticly linked binaries. I'll be adding an option to use the old approach in the stub locale configuration. --- libc/stdio/scanf.c | 2404 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 1860 insertions(+), 544 deletions(-) (limited to 'libc/stdio/scanf.c') diff --git a/libc/stdio/scanf.c b/libc/stdio/scanf.c index 9ac3d3c9c..48e9344c0 100644 --- a/libc/stdio/scanf.c +++ b/libc/stdio/scanf.c @@ -1,130 +1,178 @@ - -/* - * Modified by Manuel Novoa III Mar 13, 2001 - * - * The vfscanf routine was completely rewritten to add features and remove - * bugs. The function __strtold, based on my strtod code in stdlib, was - * added to provide floating point support for the scanf functions. - * - * So far they pass the test cases from glibc-2.1.3, except in two instances. - * In one case, the test appears to be broken. The other case is something - * I need to research further. This version of scanf assumes it can only - * peek one character ahead. Apparently, glibc looks further. The difference - * can be seen when parsing a floating point value in the character - * sequence "100ergs". glibc is able to back up before the 'e' and return - * a value of 100, whereas this scanf reports a bad match with the stream - * pointer at 'r'. A similar situation can also happen when parsing hex - * values prefixed by 0x or 0X; a failure would occur for "0xg". In order to - * fix this, I need to rework the "ungetc" machinery in stdio.c again. - * I do have one reference though, that seems to imply scanf has a single - * character of lookahead. - * - * May 20, 2001 - * - * Quote from ANSI/ISO C99 standard: - * - * fscanf pushes back at most one input character onto the input stream. - * Therefore, some sequences that are acceptable to strtod, strtol, etc., - * are unacceptable to fscanf. - * - * So uClibc's *scanf functions conform to the standard, and glibc's - * implementation doesn't for the "100ergs" case mentioned above. +/* Copyright (C) 2002, 2003 Manuel Novoa III * - * Sep 6, 2002 - * Patch from Tero_Lyytikäinen to fix bug in matchchar case. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. * - * May 15, 2003 - * Hopefully fix handling of 0 bytes with %s, %c, and %[ specifiers. + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. * - * July 17, 2003 - * Bug fix from Peter Kjellerstedt . vfscanf was - * not setting the FILE bufread member to flag the end of the buffer. - * Also, do not set bufgetc member if getc macro support is disabled. + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +/* Aug 1, 2003 + * New *scanf implementation with lots of bug fixes and *wscanf support. + * Also now optionally supports hexadecimal float notation, positional + * args, and glibc locale-specific digit grouping. Should now be + * standards compliant. + */ + + #define _ISOC99_SOURCE /* for LLONG_MAX primarily... */ #define _GNU_SOURCE #define _STDIO_UTILITY +#include #include #include #include #include #include #include +#include +#include +#include + +#ifdef __UCLIBC_HAS_WCHAR__ +#include +#include +#include +#endif /* __UCLIBC_HAS_WCHAR__ */ + +#include +#include + +#include +#include #ifdef __STDIO_THREADSAFE #include #include #endif /* __STDIO_THREADSAFE */ -#ifdef L_scanf -#ifdef __STDC__ -int scanf(const char *fmt, ...) +#ifdef __UCLIBC_HAS_FLOATS__ +#include +#include +#endif /* __UCLIBC_HAS_FLOATS__ */ + +#ifdef __UCLIBC_HAS_SCANF_GLIBC_A_FLAG__ +#ifdef L_vfscanf +/* only emit this once */ +#warning Forcing undef of __UCLIBC_HAS_SCANF_GLIBC_A_FLAG__ until implemented! +#endif +#undef __UCLIBC_HAS_SCANF_GLIBC_A_FLAG__ +#endif + +extern void _store_inttype(void *dest, int desttype, uintmax_t val); + +#ifdef LLONG_MAX + +extern unsigned long long +_stdlib_strto_ll(register const char * __restrict str, + char ** __restrict endptr, int base, int sflag); +#if (ULLONG_MAX == UINTMAX_MAX) +#define STRTOUIM(s,e,b,sf) _stdlib_strto_ll(s,e,b,sf) +#endif + +#else /* LLONG_MAX */ + +extern unsigned long +_stdlib_strto_l(register const char * __restrict str, + char ** __restrict endptr, int base, int sflag); + +#if (ULONG_MAX == UINTMAX_MAX) +#define STRTOUIM(s,e,b,sf) _stdlib_strto_l(s,e,b,sf) +#endif + +#endif /* LLONG_MAX */ + +#ifndef STRTOUIM +#error STRTOUIM conversion function is undefined! +#endif + +/**********************************************************************/ + +/* The standards require EOF < 0. */ +#if EOF >= CHAR_MIN +#define __isdigit_char_or_EOF(C) __isdigit_char((C)) #else -int scanf(fmt, va_alist) -__const char *fmt; -va_dcl +#define __isdigit_char_or_EOF(C) __isdigit_int((C)) #endif + +/**********************************************************************/ +#ifdef L_fscanf + +int fscanf(FILE * __restrict stream, const char * __restrict format, ...) { - va_list ptr; + va_list arg; int rv; - va_start(ptr, fmt); - rv = vfscanf(stdin, fmt, ptr); - va_end(ptr); + va_start(arg, format); + rv = vfscanf(stream, format, arg); + va_end(arg); + return rv; } -#endif -#ifdef L_sscanf -#if !defined(__STDIO_BUFFERS) && !defined(__STDIO_GLIBC_CUSTOM_STREAMS) -#warning skipping sscanf since no buffering and no custom streams! -#else +#endif +/**********************************************************************/ +#ifdef L_scanf -int sscanf(const char *sp, const char *fmt, ...) +int scanf(const char * __restrict format, ...) { - va_list ptr; + va_list arg; int rv; - va_start(ptr, fmt); - rv = vsscanf(sp, fmt, ptr); - va_end(ptr); + va_start(arg, format); + rv = vfscanf(stdin, format, arg); + va_end(arg); + return rv; } #endif -#endif +/**********************************************************************/ +#ifdef L_sscanf -#ifdef L_fscanf -#ifdef __STDC__ -int fscanf(FILE * fp, const char *fmt, ...) -#else -int fscanf(fp, fmt, va_alist) -FILE *fp; -__const char *fmt; -va_dcl -#endif +#if defined(__STDIO_BUFFERS) || defined(__STDIO_GLIBC_CUSTOM_STREAMS) + +int sscanf(const char * __restrict str, const char * __restrict format, ...) { - va_list ptr; + va_list arg; int rv; - va_start(ptr, fmt); - rv = vfscanf(fp, fmt, ptr); - va_end(ptr); + va_start(arg, format); + rv = vsscanf(str, format, arg); + va_end(arg); + return rv; } -#endif +#else /* defined(__STDIO_BUFFERS) || defined(__STDIO_GLIBC_CUSTOM_STREAMS) */ +#warning Skipping sscanf since no buffering and no custom streams! +#endif /* defined(__STDIO_BUFFERS) || defined(__STDIO_GLIBC_CUSTOM_STREAMS) */ + +#endif +/**********************************************************************/ #ifdef L_vscanf -int vscanf(fmt, ap) -__const char *fmt; -va_list ap; + +int vscanf(const char * __restrict format, va_list arg) { - return vfscanf(stdin, fmt, ap); + return vfscanf(stdin, format, arg); } -#endif +#endif +/**********************************************************************/ #ifdef L_vsscanf + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning WISHLIST: Implement vsscanf for non-buffered and no custom stream case. +#endif /* __UCLIBC_MJN3_ONLY__ */ + #ifdef __STDIO_BUFFERS int vsscanf(__const char *sp, __const char *fmt, va_list ap) { @@ -165,598 +213,1866 @@ int vsscanf(__const char *sp, __const char *fmt, va_list ap) return rv; } #else /* __STDIO_GLIBC_CUSTOM_STREAMS */ -#warning skipping vsscanf since no buffering and no custom streams! +#warning Skipping vsscanf since no buffering and no custom streams! #endif /* __STDIO_GLIBC_CUSTOM_STREAMS */ #endif /* __STDIO_BUFFERS */ + #endif +/**********************************************************************/ +#ifdef L_fwscanf -#ifdef L_vfscanf +int fwscanf(FILE * __restrict stream, const wchar_t * __restrict format, ...) +{ + va_list arg; + int rv; -#include -#include -#include + va_start(arg, format); + rv = vfwscanf(stream, format, arg); + va_end(arg); + + return rv; +} + +#endif +/**********************************************************************/ +#ifdef L_wscanf -static int valid_digit(char c, char base) +int wscanf(const wchar_t * __restrict format, ...) { - if (base == 16) { - return isxdigit(c); - } else { - return (__isdigit(c) && (c < '0' + base)); - } + va_list arg; + int rv; + + va_start(arg, format); + rv = vfwscanf(stdin, format, arg); + va_end(arg); + + return rv; } -extern unsigned long -_stdlib_strto_l(register const char * __restrict str, - char ** __restrict endptr, int base, int sflag); -#ifdef LLONG_MAX -extern unsigned long long -_stdlib_strto_ll(register const char * __restrict str, - char ** __restrict endptr, int base, int sflag); +#endif +/**********************************************************************/ +#ifdef L_swscanf + +#ifdef __STDIO_BUFFERS + +int swscanf(const wchar_t * __restrict str, const wchar_t * __restrict format, + ...) +{ + va_list arg; + int rv; + + va_start(arg, format); + rv = vswscanf(str, format, arg); + va_end(arg); + + return rv; +} +#else /* __STDIO_BUFFERS */ +#warning Skipping swscanf since no buffering! +#endif /* __STDIO_BUFFERS */ + +#endif +/**********************************************************************/ +#ifdef L_vwscanf + +int vwscanf(const wchar_t * __restrict format, va_list arg) +{ + return vfwscanf(stdin, format, arg); +} + +#endif +/**********************************************************************/ +#ifdef L_vswscanf + +#ifdef __STDIO_BUFFERS + +int vswscanf(const wchar_t * __restrict str, const wchar_t * __restrict format, + va_list arg) +{ + FILE f; + + f.filedes = -3; /* FAKE STREAM TO SUPPORT *wscanf! */ + f.modeflags = (__FLAG_WIDE|__FLAG_READONLY|__FLAG_READING); + f.bufpos = (char *) str; + f.bufend = (char *)(str + wcslen(str)); + f.ungot_width[0] = 0; +#ifdef __STDIO_THREADSAFE + f.user_locking = 0; + __stdio_init_mutex(&f.lock); +#endif + + + return vfwscanf(&f, format, arg); +} +#else /* __STDIO_BUFFERS */ +#warning Skipping vswscanf since no buffering! +#endif /* __STDIO_BUFFERS */ + +#endif +/**********************************************************************/ +/**********************************************************************/ + + + +/* float layout 0123456789012345678901 repeat n for "l[" */ +#define SPEC_CHARS "npxXoudifFeEgGaACSncs[" +/* npxXoudif eEgG CS cs[ */ + +/* NOTE: Ordering is important! In particular, CONV_LEFTBRACKET + * must immediately precede CONV_c. */ + +enum { + CONV_n = 0, + CONV_p, + CONV_x, CONV_X, CONV_o, CONV_u, CONV_d, CONV_i, + CONV_f, CONV_F, CONV_e, CONV_E, CONV_g, CONV_G, CONV_a, CONV_A, + CONV_C, CONV_S, CONV_LEFTBRACKET, CONV_c, CONV_s, CONV_leftbracket, + CONV_percent, CONV_whitespace /* not in SPEC_* and no flags */ +}; + +#ifdef __UCLIBC_HAS_FLOATS__ +#ifdef __UCLIBC_HAS_HEXADECIMAL_FLOATS__ +/* p x X o u d i f F e E g G a A */ +#define SPEC_BASE { 16, 16, 16, 8, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +#else +/* p x X o u d i f F e E g G a A */ +#define SPEC_BASE { 16, 16, 16, 8, 10, 10, 0, 10, 10, 10, 10, 10, 10, 10, 10 } +#endif +#else /* __UCLIBC_HAS_FLOATS__ */ +/* p x X o u d i f F e E g G a A */ +#define SPEC_BASE { 16, 16, 16, 8, 10, 10, 0 } +#endif /* __UCLIBC_HAS_FLOATS__ */ + +#ifdef __UCLIBC_MJN3_ONLY__ +#ifdef L_vfscanf +/* emit once */ +#warning CONSIDER: Add a '0' flag to eat 0 padding when grouping? +#endif +#endif /* __UCLIBC_MJN3_ONLY__ */ + +#define SPEC_FLAGS "*'I" + +enum { + FLAG_SURPRESS = 0x10, /* MUST BE 1ST!! See DO_FLAGS. */ + FLAG_THOUSANDS = 0x20, + FLAG_I18N = 0x40, /* only works for d, i, u */ + FLAG_MALLOC = 0x80, /* only works for s, S, and [ (and l[)*/ +}; + + +#define SPEC_RANGES { CONV_n, CONV_p, CONV_i, CONV_A, \ + CONV_C, CONV_LEFTBRACKET, \ + CONV_c, CONV_leftbracket } + +/* Note: We treat L and ll as synonymous... for ints and floats. */ + +#define SPEC_ALLOWED_FLAGS { \ + /* n */ (0x0f|FLAG_SURPRESS), \ + /* p */ ( 0|FLAG_SURPRESS), \ + /* oxXudi */ (0x0f|FLAG_SURPRESS|FLAG_THOUSANDS|FLAG_I18N), \ + /* fFeEgGaA */ (0x0c|FLAG_SURPRESS|FLAG_THOUSANDS|FLAG_I18N), \ + /* C */ ( 0|FLAG_SURPRESS), \ + /* S and l[ */ ( 0|FLAG_SURPRESS|FLAG_MALLOC), \ + /* c */ (0x04|FLAG_SURPRESS), \ + /* s and [ */ (0x04|FLAG_SURPRESS|FLAG_MALLOC), \ +} + + +/**********************************************************************/ +/* + * In order to ease translation to what arginfo and _print_info._flags expect, + * we map: 0:int 1:char 2:longlong 4:long 8:short + * and then _flags |= (((q << 7) + q) & 0x701) and argtype |= (_flags & 0x701) + */ + +/* TODO -- Fix the table below to take into account stdint.h. */ +/* #ifndef LLONG_MAX */ +/* #error fix QUAL_CHARS for no long long! Affects 'L', 'j', 'q', 'll'. */ +/* #else */ +/* #if LLONG_MAX != INTMAX_MAX */ +/* #error fix QUAL_CHARS intmax_t entry 'j'! */ +/* #endif */ +/* #endif */ + +#ifdef PDS +#error PDS already defined! +#endif +#ifdef SS +#error SS already defined! +#endif +#ifdef IMS +#error IMS already defined! +#endif + +#if PTRDIFF_MAX == INT_MAX +#define PDS 0 +#elif PTRDIFF_MAX == LONG_MAX +#define PDS 4 +#elif defined(LLONG_MAX) && (PTRDIFF_MAX == LLONG_MAX) +#define PDS 8 +#else +#error fix QUAL_CHARS ptrdiff_t entry 't'! +#endif + +#if SIZE_MAX == UINT_MAX +#define SS 0 +#elif SIZE_MAX == ULONG_MAX +#define SS 4 +#elif defined(LLONG_MAX) && (SIZE_MAX == ULLONG_MAX) +#define SS 8 +#else +#error fix QUAL_CHARS size_t entries 'z', 'Z'! +#endif + +#if INTMAX_MAX == INT_MAX +#define IMS 0 +#elif INTMAX_MAX == LONG_MAX +#define IMS 4 +#elif defined(LLONG_MAX) && (INTMAX_MAX == LLONG_MAX) +#define IMS 8 +#else +#error fix QUAL_CHARS ptrdiff_t entry 't'! +#endif + +#define QUAL_CHARS { \ + /* j:(u)intmax_t z:(s)size_t t:ptrdiff_t \0:int q:long_long */ \ + 'h', 'l', 'L', 'j', 'z', 't', 'q', 0, \ + 2, 4, 8, IMS, SS, PDS, 8, 0, /* TODO -- fix!!! */\ + 1, 8 } + + +/**********************************************************************/ + +#ifdef L_vfwscanf +#if WINT_MIN > EOF +#error Unfortunately, we currently need wint_t to be able to store EOF. Sorry. +#endif +#define W_EOF WEOF +#define Wint wint_t +#define Wchar wchar_t +#define Wuchar __uwchar_t +#define ISSPACE(C) iswspace((C)) +#define VFSCANF vfwscanf +#define GETC(SC) (SC)->sc_getc((SC)) +#else +typedef unsigned char __uchar_t; +#define W_EOF EOF +#define Wint int +#define Wchar char +#define Wuchar __uchar_t +#define ISSPACE(C) isspace((C)) +#define VFSCANF vfscanf +#ifdef __UCLIBC_HAS_WCHAR__ +#define GETC(SC) (SC)->sc_getc((SC)) +#else /* __UCLIBC_HAS_WCHAR__ */ +#define GETC(SC) getc_unlocked((SC)->fp) +#endif /* __UCLIBC_HAS_WCHAR__ */ #endif struct scan_cookie { + Wint cc; + Wint ungot_char; FILE *fp; int nread; int width; - int width_flag; - int ungot_char; - int ungot_flag; - int app_ungot; + +#ifdef __UCLIBC_HAS_WCHAR__ + wchar_t app_ungot; /* Match FILE struct member type. */ + unsigned char ungot_wchar_width; +#else /* __UCLIBC_HAS_WCHAR__ */ + unsigned char app_ungot; /* Match FILE struct member type. */ +#endif /* __UCLIBC_HAS_WCHAR__ */ + + char ungot_flag; + +#ifdef __UCLIBC_HAS_WCHAR__ + char ungot_wflag; /* vfwscanf */ + char mb_fail; /* vfscanf */ + mbstate_t mbstate; /* vfscanf */ + wint_t wc; + wint_t ungot_wchar; /* to support __scan_getc */ + int (*sc_getc)(struct scan_cookie *); +#endif /* __UCLIBC_HAS_WCHAR__ */ + +#ifdef __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ + const char *grouping; + const unsigned char *thousands_sep; + int tslen; +#ifdef __UCLIBC_HAS_WCHAR__ + wchar_t thousands_sep_wc; +#endif /* __UCLIBC_HAS_WCHAR__ */ +#endif /* __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ */ + +#ifdef __UCLIBC_HAS_FLOATS__ + const unsigned char *decpt; + int decpt_len; +#ifdef __UCLIBC_HAS_WCHAR__ + wchar_t decpt_wc; +#endif /* __UCLIBC_HAS_WCHAR__ */ + const unsigned char *fake_decpt; +#endif /* __UCLIBC_HAS_FLOATS__ */ + }; -static const char qual[] = "hl" /* "jtz" */ "Lq"; -/* char = -2, short = -1, int = 0, long = 1, long long = 2 */ -static const char qsz[] = { -1, 1, 2, 2 }; +typedef struct { +#if defined(NL_ARGMAX) && (NL_ARGMAX > 0) +#if NL_ARGMAX > 10 +#warning NL_ARGMAX > 10, and space is allocated on the stack for positional args. +#endif + void *pos_args[NL_ARGMAX]; + int num_pos_args; /* Must start at -1. */ + int cur_pos_arg; +#endif /* defined(NL_ARGMAX) && (NL_ARGMAX > 0) */ + void *cur_ptr; + const unsigned char *fmt; + int cnt, dataargtype, conv_num, max_width; + unsigned char store, flags; +} psfs_t; /* parse scanf format state */ + + +/**********************************************************************/ +/**********************************************************************/ + +extern void __init_scan_cookie(register struct scan_cookie *sc, + register FILE *fp); +extern int __scan_getc(register struct scan_cookie *sc); +extern void __scan_ungetc(register struct scan_cookie *sc); #ifdef __UCLIBC_HAS_FLOATS__ -static int __strtold(long double *ld, struct scan_cookie *sc); - /*01234567890123456 */ -static const char spec[] = "%n[csoupxXidfeEgG"; -#else -static const char spec[] = "%n[csoupxXid"; +extern int __scan_strtold(long double *ld, struct scan_cookie *sc); +#endif /* __UCLIBC_HAS_FLOATS__ */ + +extern int __psfs_parse_spec(psfs_t *psfs); +extern int __psfs_do_numeric(psfs_t *psfs, struct scan_cookie *sc); + +/**********************************************************************/ +#ifdef L___scan_cookie + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning TODO: Remove dependence on decpt_str and fake_decpt in stub locale mode. +#endif +#ifndef __UCLIBC_HAS_LOCALE__ +static const char decpt_str[] = "."; #endif -/* radix[i] <-> spec[i+5] o u p x X i d */ -static const char radix[] = { 8, 10, 16, 16, 16, 0, 10 }; -static void init_scan_cookie(register struct scan_cookie *sc, - register FILE *fp) +void __init_scan_cookie(register struct scan_cookie *sc, + register FILE *fp) { sc->fp = fp; sc->nread = 0; - sc->width_flag = 0; sc->ungot_flag = 0; sc->app_ungot = ((fp->modeflags & __MASK_UNGOT) ? fp->ungot[1] : 0); -} +#ifdef __UCLIBC_HAS_WCHAR__ + sc->ungot_wflag = 0; /* vfwscanf */ + sc->mb_fail = 0; +#endif /* __UCLIBC_HAS_WCHAR__ */ + +#ifdef __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ + if (*(sc->grouping = __UCLIBC_CURLOCALE_DATA.grouping)) { + sc->thousands_sep = __UCLIBC_CURLOCALE_DATA.thousands_sep; + sc->tslen = __UCLIBC_CURLOCALE_DATA.thousands_sep_len; +#ifdef __UCLIBC_HAS_WCHAR__ + sc->thousands_sep_wc = __UCLIBC_CURLOCALE_DATA.thousands_sep_wc; +#endif /* __UCLIBC_HAS_WCHAR__ */ + } +#endif /* __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ */ + +#ifdef __UCLIBC_HAS_FLOATS__ +#ifdef __UCLIBC_HAS_LOCALE__ + sc->decpt = __UCLIBC_CURLOCALE_DATA.decimal_point; + sc->decpt_len = __UCLIBC_CURLOCALE_DATA.decimal_point_len; +#else /* __UCLIBC_HAS_LOCALE__ */ + sc->fake_decpt = sc->decpt = decpt_str; + sc->decpt_len = 1; +#endif /* __UCLIBC_HAS_LOCALE__ */ +#ifdef __UCLIBC_HAS_WCHAR__ +#ifdef __UCLIBC_HAS_LOCALE__ + sc->decpt_wc = __UCLIBC_CURLOCALE_DATA.decimal_point_wc; +#else + sc->decpt_wc = '.'; +#endif +#endif /* __UCLIBC_HAS_WCHAR__ */ +#endif /* __UCLIBC_HAS_FLOATS__ */ -/* TODO -- what about literal '\0' chars in a file??? */ +} -static int scan_getc_nw(register struct scan_cookie *sc) +int __scan_getc(register struct scan_cookie *sc) { + int c; + +#ifdef __UCLIBC_HAS_WCHAR__ + assert(!sc->mb_fail); +#endif /* __UCLIBC_HAS_WCHAR__ */ + + sc->cc = EOF; + + if (--sc->width < 0) { + sc->ungot_flag |= 2; + return -1; + } + if (sc->ungot_flag == 0) { - sc->ungot_char = getc(sc->fp); + if ((c = GETC(sc)) == EOF) { + sc->ungot_flag |= 2; + return -1; + } + sc->ungot_char = c; } else { + assert(sc->ungot_flag == 1); sc->ungot_flag = 0; } - if (sc->ungot_char > 0) { - ++sc->nread; - } - sc->width_flag = 0; - return sc->ungot_char; + + ++sc->nread; + return sc->cc = sc->ungot_char; } -static int scan_getc(register struct scan_cookie *sc) +void __scan_ungetc(register struct scan_cookie *sc) { - if (sc->ungot_flag == 0) { - sc->ungot_char = getc(sc->fp); - } - sc->width_flag = 1; - if (--sc->width < 0) { + ++sc->width; + if (sc->ungot_flag == 2) { /* last was EOF */ + sc->ungot_flag = 0; + sc->cc = sc->ungot_char; + } else if (sc->ungot_flag == 0) { sc->ungot_flag = 1; - return -1; - } - sc->ungot_flag = 0; - if (sc->ungot_char > 0) { - ++sc->nread; + --sc->nread; + } else { + assert(0); } - return sc->ungot_char; } -static void scan_ungetc(register struct scan_cookie *sc) +#endif +/**********************************************************************/ +#ifdef L___psfs_parse_spec + +#ifdef SPEC_FLAGS +static const unsigned char spec_flags[] = SPEC_FLAGS; +#endif /* SPEC_FLAGS */ +static const unsigned char spec_chars[] = SPEC_CHARS; +static const unsigned char qual_chars[] = QUAL_CHARS; +static const unsigned char spec_ranges[] = SPEC_RANGES; +static const unsigned short spec_allowed[] = SPEC_ALLOWED_FLAGS; + +int __psfs_parse_spec(register psfs_t *psfs) { - if (sc->ungot_flag != 0) { - assert(sc->width < 0); - return; + const unsigned char *p; + const unsigned char *fmt0 = psfs->fmt; + int i; +#ifdef SPEC_FLAGS + int j; +#endif +#if defined(NL_ARGMAX) && (NL_ARGMAX > 0) + unsigned char fail = 0; + + i = 0; /* Do this here to avoid a warning. */ + + if (!__isdigit_char(*psfs->fmt)) { /* Not a positional arg. */ + fail = 1; + goto DO_FLAGS; } - if (sc->width_flag) { - ++sc->width; + + /* parse the positional arg (or width) value */ + do { + if (i <= ((INT_MAX - 9)/10)) { + i = (i * 10) + (*psfs->fmt++ - '0'); + } + } while (__isdigit_char(*psfs->fmt)); + + if (*psfs->fmt != '$') { /* This is a max field width. */ + if (psfs->num_pos_args >= 0) { /* Already saw a pos arg! */ + goto ERROR_EINVAL; + } + psfs->max_width = i; + psfs->num_pos_args = -2; + goto DO_QUALIFIER; } - sc->ungot_flag = 1; - if (sc->ungot_char > 0) { /* not EOF or EOS */ - --sc->nread; + ++psfs->fmt; /* Advance past '$'. */ +#endif /* defined(NL_ARGMAX) && (NL_ARGMAX > 0) */ + +#if defined(SPEC_FLAGS) || (defined(NL_ARGMAX) && (NL_ARGMAX > 0)) + DO_FLAGS: +#endif /* defined(SPEC_FLAGS) || (defined(NL_ARGMAX) && (NL_ARGMAX > 0)) */ +#ifdef SPEC_FLAGS + p = spec_flags; + j = FLAG_SURPRESS; + do { + if (*p == *psfs->fmt) { + ++psfs->fmt; + psfs->flags |= j; + goto DO_FLAGS; + } + j += j; + } while (*++p); + + if (psfs->flags & FLAG_SURPRESS) { /* Suppress assignment. */ + psfs->store = 0; + goto DO_WIDTH; } -} +#else /* SPEC_FLAGS */ + if (*psfs->fmt == '*') { /* Suppress assignment. */ + ++psfs->fmt; + psfs->store = 0; + goto DO_WIDTH; + } +#endif /* SPEC_FLAGS */ -static void kill_scan_cookie(register struct scan_cookie *sc) -{ - if (sc->ungot_flag) { - ungetc(sc->ungot_char,sc->fp); - /* Deal with distiction between user and scanf ungots. */ - if (sc->nread == 0) { /* Only one char was read... app ungot? */ - sc->fp->ungot[1] = sc->app_ungot; /* restore ungot state. */ + +#if defined(NL_ARGMAX) && (NL_ARGMAX > 0) + if (fail) { + /* Must be a non-positional arg */ + if (psfs->num_pos_args >= 0) { /* Already saw a pos arg! */ + goto ERROR_EINVAL; } + psfs->num_pos_args = -2; + } else { + if ((psfs->num_pos_args == -2) || (((unsigned int)(--i)) >= NL_ARGMAX)) { + /* Already saw a non-pos arg or (0-based) num too large. */ + goto ERROR_EINVAL; + } + psfs->cur_pos_arg = i; } -} +#endif /* defined(NL_ARGMAX) && (NL_ARGMAX > 0) */ -int vfscanf(FILE *fp, const char *format, va_list ap) -{ -#define STRTO_L_(s,e,b,sf) _stdlib_strto_ll(s,e,b,sf) -#define MAX_DIGITS 64 -#define UV_TYPE unsigned long long -#define V_TYPE long long -#ifdef __UCLIBC_HAS_FLOATS__ - long double ld; -#endif - UV_TYPE uv; - struct scan_cookie sc; - register unsigned const char *fmt; - const char *p; - register unsigned char *b; - void *vp; - int cc, i, cnt; - signed char lval; - unsigned char store, usflag, base, invert, r0, r1; - unsigned char buf[MAX_DIGITS+2]; - unsigned char scanset[UCHAR_MAX + 1]; + DO_WIDTH: + for (i = 0 ; __isdigit_char(*psfs->fmt) ; ) { + if (i <= ((INT_MAX - 9)/10)) { + i = (i * 10) + (*psfs->fmt++ - '0'); + psfs->max_width = i; + } + } - __STDIO_THREADLOCK(fp); +#if defined(NL_ARGMAX) && (NL_ARGMAX > 0) + DO_QUALIFIER: +#endif /* defined(NL_ARGMAX) && (NL_ARGMAX > 0) */ + p = qual_chars; + do { + if (*psfs->fmt == *p) { + ++psfs->fmt; + break; + } + } while (*++p); + if ((p - qual_chars < 2) && (*psfs->fmt == *p)) { + p += ((sizeof(qual_chars)-2) / 2); + ++psfs->fmt; + } + psfs->dataargtype = ((int)(p[(sizeof(qual_chars)-2) / 2])) << 8; - init_scan_cookie(&sc,fp); +#ifdef __UCLIBC_MJN3_ONLY__ +#warning CONSIDER: Should we validate that psfs->max_width > 0 in __psfs_parse_spec()? It would avoid whitespace consumption... +#warning CONSIDER: Should INT_MAX be a valid width (%c/%C)? See __psfs_parse_spec(). +#endif /* __UCLIBC_MJN3_ONLY__ */ - fmt = (unsigned const char *) format; - cnt = 0; + p = spec_chars; + do { + if (*psfs->fmt == *p) { + int p_m_spec_chars = p - spec_chars; - while (*fmt) { - store = 1; - lval = 0; - sc.width = INT_MAX; - if (*fmt == '%') { /* Conversion specification. */ - ++fmt; - if (*fmt == '*') { /* Suppress assignment. */ - store = 0; - ++fmt; +#ifdef __UCLIBC_HAS_SCANF_GLIBC_A_FLAG__ +#error implement gnu a flag + if ((*p == 'a') + && ((psfs->fmt[1] == '[') || ((psfs->fmt[1]|0x20) == 's')) + ) { /* Assumes ascii for 's' and 'S' test. */ + psfs->flags |= FLAG_MALLOC; + ++psfs->fmt; + ++p; + continue; /* The related conversions follow 'a'. */ } - for (i = 0 ; __isdigit(*fmt) ; sc.width = i) { - i = (i * 10) + (*fmt++ - '0'); /* Get specified width. */ +#endif /* __UCLIBC_HAS_SCANF_GLIBC_A_FLAG__ */ + + for (p = spec_ranges; p_m_spec_chars > *p ; ++p) {} + if (((psfs->dataargtype >> 8) | psfs->flags) + & ~spec_allowed[(int)(p - spec_ranges)] + ) { + goto ERROR_EINVAL; } - for (i = 0 ; i < sizeof(qual) ; i++) { /* Optional qualifier. */ - if (qual[i] == *fmt) { - ++fmt; - lval += qsz[i]; - if ((i < 2) && (qual[i] == *fmt)) { /* Double h or l. */ - ++fmt; - lval += qsz[i]; + + if ((p_m_spec_chars >= CONV_c) + && (psfs->dataargtype & PA_FLAG_LONG)) { + p_m_spec_chars -= 3; /* lc -> C, ls -> S, l[ -> ?? */ + } + + psfs->conv_num = p_m_spec_chars; + return psfs->fmt - fmt0; + } + if (!*++p) { + ERROR_EINVAL: + __set_errno(EINVAL); + return -1; + } + } while(1); + + assert(0); +} + +#endif +/**********************************************************************/ +#if defined(L_vfscanf) || defined(L_vfwscanf) + +#ifdef __UCLIBC_HAS_WCHAR__ +#ifdef L_vfscanf +static int sc_getc(register struct scan_cookie *sc) +{ + return getc(sc->fp); +} + +static int scan_getwc(register struct scan_cookie *sc) +{ + size_t r; + int width; + wchar_t wc[1]; + char b[1]; + + if (--sc->width < 0) { + sc->ungot_flag |= 2; + return -1; + } + + width = sc->width; /* Preserve width. */ + sc->width = INT_MAX; /* MB_CUR_MAX can invoke a function. */ + + r = (size_t)(-1); + while (__scan_getc(sc) >= 0) { + *b = sc->cc; + + r = mbrtowc(wc, b, 1, &sc->mbstate); + if (((ssize_t) r) >= 0) { /* Successful completion of a wc. */ + sc->wc = *wc; + goto SUCCESS; + } else if (r == ((size_t) -2)) { + /* Potentially valid but incomplete. */ + continue; + } + break; + } + + /* If we reach here, either r == ((size_t)-1) and + * mbrtowc set errno to EILSEQ, or r == ((size_t)-2) + * and stream is in an error state or at EOF with a + * partially complete wchar. */ + __set_errno(EILSEQ); /* In case of incomplete conversion. */ + sc->mb_fail = 1; + + SUCCESS: + sc->width = width; /* Restore width. */ + + return (int)((ssize_t) r); +} + +#endif /* L_vfscanf */ + +#ifdef L_vfwscanf + +/* This gets called by __scan_getc. __scan_getc is called by vfwscanf + * when the next wide char is expected to be valid ascii (digits). + */ +static int sc_getc(register struct scan_cookie *sc) +{ + wint_t wc; + + if (sc->fp->filedes == -3) { + if (sc->fp->bufpos < sc->fp->bufend) { + wc = *((wchar_t *)(sc->fp->bufpos)); + sc->fp->bufpos += sizeof(wchar_t); + } else { + sc->fp->modeflags |= __FLAG_EOF; + return EOF; + } + } else if ((wc = fgetwc_unlocked(sc->fp)) == WEOF) { + return EOF; + } + + sc->ungot_wflag = 1; + sc->ungot_wchar = wc; + sc->ungot_wchar_width = sc->fp->ungot_width[0]; + +#ifdef __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ + if (wc == sc->thousands_sep_wc) { + wc = ','; + } else +#endif /* __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ */ +#ifdef __UCLIBC_HAS_FLOATS__ + if (wc == sc->decpt_wc) { + wc = '.'; + } else +#endif /* __UCLIBC_HAS_FLOATS__ */ + if (!__isascii(wc)) { + wc = '?'; + } + sc->wc = sc->ungot_char = wc; + + return (int) wc; +} + +static int scan_getwc(register struct scan_cookie *sc) +{ + wint_t wc; + + sc->wc = WEOF; + + if (--sc->width < 0) { + sc->ungot_flag |= 2; + return -1; + } + + if (sc->ungot_flag == 0) { + + if (sc->fp->filedes == -3) { + if (sc->fp->bufpos < sc->fp->bufend) { + wc = *((wchar_t *)(sc->fp->bufpos)); + sc->fp->bufpos += sizeof(wchar_t); + } else { + sc->ungot_flag |= 2; + return -1; + } + } else if ((wc = fgetwc_unlocked(sc->fp)) == WEOF) { + sc->ungot_flag |= 2; + return -1; + } + sc->ungot_wflag = 1; + sc->ungot_char = wc; + sc->ungot_wchar_width = sc->fp->ungot_width[0]; + } else { + assert(sc->ungot_flag == 1); + sc->ungot_flag = 0; + } + + ++sc->nread; + sc->wc = sc->ungot_char; + + return 0; +} + + +#endif /* L_vfwscanf */ +#endif /* __UCLIBC_HAS_WCHAR__ */ + +static __inline void kill_scan_cookie(register struct scan_cookie *sc) +{ +#ifdef L_vfscanf + + if (sc->ungot_flag & 1) { + ungetc(sc->ungot_char, sc->fp); + /* Deal with distiction between user and scanf ungots. */ + if (sc->nread == 0) { /* Only one char was read... app ungot? */ + sc->fp->ungot[1] = sc->app_ungot; /* restore ungot state. */ + } else { + sc->fp->ungot[1] = 0; + } + } + +#else + + if ((sc->ungot_wflag & 1) && (sc->fp->filedes != -3) && (sc->fp->state.mask == 0)) { + ungetwc(sc->ungot_char, sc->fp); + /* Deal with distiction between user and scanf ungots. */ + if (sc->nread == 0) { /* Only one char was read... app ungot? */ + sc->fp->ungot[1] = sc->app_ungot; /* restore ungot state. */ + } else { + sc->fp->ungot[1] = 0; + } + sc->fp->ungot_width[1] = sc->ungot_wchar_width; + } + +#endif +} + +#ifdef L_vfwscanf +#ifdef __UCLIBC_HAS_FLOATS__ +static const char fake_decpt_str[] = "."; +#endif +#ifdef __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ +static const char fake_thousands_sep_str[] = ","; +#endif +#endif /* L_vfwscanf */ + + +int VFSCANF (FILE *__restrict fp, const Wchar *__restrict format, va_list arg) +{ + const Wuchar *fmt; + unsigned char *b; + + +#ifdef L_vfwscanf + wchar_t wbuf[1]; + wchar_t *wb; +#endif /* L_vfwscanf */ + +#ifdef __UCLIBC_HAS_WCHAR__ + mbstate_t mbstate; +#endif /* __UCLIBC_HAS_WCHAR__ */ + + struct scan_cookie sc; + psfs_t psfs; + + int i; + +#warning fix MAX_DIGITS. we do not do binary, so...! +#define MAX_DIGITS 65 /* Allow one leading 0. */ + unsigned char buf[MAX_DIGITS+2]; +#ifdef L_vfscanf + unsigned char scanset[UCHAR_MAX + 1]; + unsigned char invert; /* Careful! Meaning changes. */ +#endif /* L_vfscanf */ + unsigned char fail; + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning TODO: Make checking of the format string in C locale an option. +#endif + /* To support old programs, don't check mb validity if in C locale. */ +#if defined(__UCLIBC_HAS_LOCALE__) && !defined(L_vfwscanf) + /* ANSI/ISO C99 requires format string to be a valid multibyte string + * beginning and ending in its initial shift state. */ + if (((__UCLIBC_CURLOCALE_DATA).encoding) != __ctype_encoding_7_bit) { + mbstate.mask = 0; /* Initialize the mbstate. */ + const char *p = format; + if (mbsrtowcs(NULL, &p, SIZE_MAX, &mbstate) == ((size_t)(-1))) { + __set_errno(EINVAL); /* Format string is invalid. */ + return 0; + } + } +#endif /* defined(__UCLIBC_HAS_LOCALE__) && !defined(L_vfwscanf) */ + +#if defined(NL_ARGMAX) && (NL_ARGMAX > 0) + psfs.num_pos_args = -1; /* Must start at -1. */ + /* Initialize positional arg ptrs to NULL. */ + memset(psfs.pos_args, 0, sizeof(psfs.pos_args)); +#endif /* defined(NL_ARGMAX) && (NL_ARGMAX > 0) */ + + __STDIO_THREADLOCK(fp); + + __init_scan_cookie(&sc,fp); +#ifdef __UCLIBC_HAS_WCHAR__ + sc.sc_getc = sc_getc; + sc.ungot_wchar_width = sc.fp->ungot_width[1]; + +#ifdef L_vfwscanf + +#ifdef __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ + if (*sc.grouping) { + sc.thousands_sep = fake_thousands_sep_str; + sc.tslen = 1; + } +#endif /* __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ */ + +#ifdef __UCLIBC_HAS_FLOATS__ + sc.fake_decpt = fake_decpt_str; +#endif /* __UCLIBC_HAS_FLOATS__ */ + +#else /* L_vfwscanf */ + +#ifdef __UCLIBC_HAS_FLOATS__ + sc.fake_decpt = sc.decpt; +#endif /* __UCLIBC_HAS_FLOATS__ */ + +#endif /* L_vfwscanf */ + +#endif /* __UCLIBC_HAS_WCHAR__ */ + psfs.cnt = 0; + + /* Note: If we ever wanted to support non-nice codesets, we + * would really need to do a mb->wc conversion here in the + * vfscanf case. Related changes would have to be made in + * the code that follows... basicly wherever fmt appears. */ + for (fmt = (const Wuchar *) format ; *fmt ; /* ++fmt */) { + + psfs.store = 1; + psfs.flags = 0; +#ifndef NDEBUG + psfs.cur_ptr = NULL; /* Debugging aid. */ +#endif /* NDEBUG */ + + + sc.ungot_flag &= 1; /* Clear (possible fake) EOF. */ + sc.width = psfs.max_width = INT_MAX; + + /* Note: According to the standards, vfscanf does use isspace + * here. So, if we did a mb->wc conversion, we would have to do + * something like + * ((((__uwchar_t)wc) < UCHAR_MAX) && isspace(wc)) + * because wc might not be in the allowed domain. */ + if (ISSPACE(*fmt)) { + do { + ++fmt; + } while (ISSPACE(*fmt)); + --fmt; + psfs.conv_num = CONV_whitespace; + goto DO_WHITESPACE; + } + + if (*fmt == '%') { /* Conversion specification. */ + if (*++fmt == '%') { /* Remember, '%' eats whitespace too. */ + psfs.conv_num = CONV_percent; + goto DO_CONVERSION; + } + + +#ifdef L_vfscanf + psfs.fmt = fmt; +#else /* L_vfscanf */ + { + const __uwchar_t *wf = fmt; + psfs.fmt = b = buf; + + while (*wf && __isascii(*wf) && (b < buf + sizeof(buf) - 1)) { + *b++ = *wf++; + } +#ifdef __UCLIBC_HAS_SCANF_GLIBC_A_FLAG__ +#error this is wrong... we need to ched in __psfs_parse_spec instead since this checks last char in buffer and conversion my have stopped before it. + if ((*b == 'a') && ((*wf == '[') || ((*wf|0x20) == 's'))) { + goto DONE; /* Spec was excessively long. */ + } +#endif /* __UCLIBC_HAS_SCANF_GLIBC_A_FLAG__ */ + *b = 0; + if (b == buf) { /* Bad conversion specifier! */ + goto DONE; + } + } +#endif /* L_vfscanf */ + if ((i = __psfs_parse_spec(&psfs)) < 0) { /* Bad conversion specifier! */ + goto DONE; + } + fmt += i; + +#if defined(NL_ARGMAX) && (NL_ARGMAX > 0) + if (psfs.store) { + if (psfs.num_pos_args == -2) { + psfs.cur_ptr = va_arg(arg, void *); + } else { + while (psfs.cur_pos_arg > psfs.num_pos_args) { + psfs.pos_args[++psfs.num_pos_args] = va_arg(arg, void *); } - break; + psfs.cur_ptr = psfs.pos_args[psfs.cur_pos_arg]; + } + } +#else /* defined(NL_ARGMAX) && (NL_ARGMAX > 0) */ + psfs.cur_ptr = va_arg(arg, void *); +#endif /* defined(NL_ARGMAX) && (NL_ARGMAX > 0) */ + + DO_CONVERSION: + /* First, consume white-space if not n, c, [, C, or l[. */ + if ((((1L << CONV_n)|(1L << CONV_C)|(1L << CONV_c) + |(1L << CONV_LEFTBRACKET)|(1L << CONV_leftbracket)) + & (1L << psfs.conv_num)) == 0 + ) { + DO_WHITESPACE: + while ((__scan_getc(&sc) >= 0) +#ifdef L_vfscanf + && isspace(sc.cc) +#else /* L_vfscanf */ + && iswspace(sc.wc) +#endif /* L_vfscanf */ + ) {} + __scan_ungetc(&sc); + if (psfs.conv_num == CONV_whitespace) { + goto NEXT_FMT; + } + } + + sc.width = psfs.max_width; /* Now limit the max width. */ + + if (sc.width == 0) { /* 0 width is forbidden. */ + goto DONE; + } + + + if (psfs.conv_num == CONV_percent) { + goto MATCH_CHAR; + } + + if (psfs.conv_num == CONV_n) { + if (psfs.store) { + _store_inttype(psfs.cur_ptr, psfs.dataargtype, + (uintmax_t) sc.nread); } + goto NEXT_FMT; } - for (p = spec ; *p ; p++) { /* Process format specifier. */ - if (*fmt != *p) continue; - if (p-spec < 1) { /* % - match a '%'*/ - goto matchchar; + + if (psfs.conv_num <= CONV_A) { /* pointer, integer, or float spec */ +#ifdef L_vfscanf + if (__psfs_do_numeric(&psfs, &sc) < 0) { /* Num conv failed! */ + goto DONE; } - if (p-spec < 2) { /* n - store number of chars read */ - *(va_arg(ap, int *)) = sc.nread; - scan_getc_nw(&sc); - goto nextfmt; + goto NEXT_FMT; +#else + int r = __psfs_do_numeric(&psfs, &sc); + if (sc.ungot_wflag == 1) { /* fix up '?', '.', and ',' hacks */ + sc.cc = sc.ungot_char = sc.ungot_wchar; } - if (p-spec > 3) { /* skip white space if not c or [ */ - do { - i = scan_getc_nw(&sc); - } while (__isspace(i)); - scan_ungetc(&sc); + if (r < 0) { + goto DONE; + } + goto NEXT_FMT; +#endif + } + + /* Do string conversions here since they are not common code. */ + + +#ifdef L_vfscanf + + if +#ifdef __UCLIBC_HAS_WCHAR__ + (psfs.conv_num >= CONV_LEFTBRACKET) +#else /* __UCLIBC_HAS_WCHAR__ */ + (psfs.conv_num >= CONV_c) +#endif /* __UCLIBC_HAS_WCHAR__ */ + { + b = (psfs.store ? ((unsigned char *) psfs.cur_ptr) : buf); + fail = 1; + + + if (psfs.conv_num == CONV_c) { + if (sc.width == INT_MAX) { + sc.width = 1; + } + + while (__scan_getc(&sc) >= 0) { + *b = sc.cc; + b += psfs.store; + } + __scan_ungetc(&sc); + if (sc.width > 0) { /* Failed to read all required. */ + goto DONE; + } + psfs.cnt += psfs.store; + goto NEXT_FMT; } - if (p-spec < 5) { /* [,c,s - string conversions */ + + if (psfs.conv_num == CONV_s) { + /* Yes, believe it or not, a %s conversion can store nuls. */ + while ((__scan_getc(&sc) >= 0) && !isspace(sc.cc)) { + *b = sc.cc; + b += psfs.store; + fail = 0; + } + } else { +#ifdef __UCLIBC_HAS_WCHAR__ + assert((psfs.conv_num == CONV_LEFTBRACKET) || \ + (psfs.conv_num == CONV_leftbracket)); +#else /* __UCLIBC_HAS_WCHAR__ */ + assert((psfs.conv_num == CONV_leftbracket)); +#endif /* __UCLIBC_HAS_WCHAR__ */ + invert = 0; - if (*p == 'c') { + + if (*++fmt == '^') { + ++fmt; invert = 1; - if (sc.width == INT_MAX) { - sc.width = 1; - } - } - for (i=0 ; i<= UCHAR_MAX ; i++) { - scanset[i] = ((*p == 's') ? (__isspace(i) == 0) : 0); + } + memset(scanset, invert, sizeof(scanset)); + invert = 1-invert; + + if (*fmt == ']') { + scanset[(int)(']')] = invert; + ++fmt; } - if (*p == '[') { /* need to build a scanset */ - if (*++fmt == '^') { - invert = 1; - ++fmt; - } - if (*fmt == ']') { - scanset[(int)']'] = 1; - ++fmt; + + while (*fmt != ']') { + if (!*fmt) { /* No closing ']'. */ + goto DONE; } - r0 = 0; - while (*fmt && *fmt !=']') { /* build scanset */ - if ((*fmt == '-') && r0 && (fmt[1] != ']')) { - /* range */ - ++fmt; - if (*fmt < r0) { - r1 = r0; - r0 = *fmt; - } else { - r1 = *fmt; - } - for (i=r0 ; i<= r1 ; i++) { - scanset[i] = 1; - } - r0 = 0; - } else { - r0 = *fmt; - scanset[r0] = 1; - } + if ((*fmt == '-') && (fmt[1] != ']') + && (fmt[-1] < fmt[1]) /* sorted? */ + ) { /* range */ ++fmt; + i = fmt[-2]; + /* Note: scanset[i] should already have been done + * in the previous iteration. */ + do { + scanset[++i] = invert; + } while (i < *fmt); + /* Safe to fall through, and a bit smaller. */ } - if (!*fmt) { /* format string exhausted! */ - goto done; - } + /* literal char */ + scanset[(int) *fmt] = invert; + ++fmt; } - /* ok -- back to common work */ - if (sc.width <= 0) { - goto done; + +#ifdef __UCLIBC_HAS_WCHAR__ + if (psfs.conv_num == CONV_LEFTBRACKET) { + goto DO_LEFTBRACKET; } - if (store) { - b = va_arg(ap, unsigned char *); - } else { - b = buf; +#endif /* __UCLIBC_HAS_WCHAR__ */ + + + while (__scan_getc(&sc) >= 0) { + if (!scanset[sc.cc]) { + break; + } + *b = sc.cc; + b += psfs.store; + fail = 0; } - cc = scan_getc(&sc); - if (cc < 0) { - scan_ungetc(&sc); - goto done; /* return EOF if cnt == 0 */ + } + /* Common tail for processing of %s and %[. */ + + __scan_ungetc(&sc); + if (fail) { /* nothing stored! */ + goto DONE; + } + *b = 0; /* Nul-terminate string. */ + psfs.cnt += psfs.store; + goto NEXT_FMT; + } + +#ifdef __UCLIBC_HAS_WCHAR__ + DO_LEFTBRACKET: /* Need to do common wide init. */ + if (psfs.conv_num >= CONV_C) { + wchar_t wbuf[1]; + wchar_t *wb; + + sc.mbstate.mask = 0; + + wb = (psfs.store ? ((wchar_t *) psfs.cur_ptr) : wbuf); + fail = 1; + + if (psfs.conv_num == CONV_C) { + if (sc.width == INT_MAX) { + sc.width = 1; } - if (*p == 'c') { - goto c_spec; + + while (scan_getwc(&sc) >= 0) { + assert(sc.width >= 0); + *wb = sc.wc; + wb += psfs.store; } - i = 0; - while ((cc>=0) && (scanset[cc] != invert)) { - c_spec: - i = 1; /* yes, we stored something */ - *b = cc; - b += store; - cc = scan_getc(&sc); + + __scan_ungetc(&sc); + if (sc.width > 0) { /* Failed to read all required. */ + goto DONE; } - if (i==0) { - scan_ungetc(&sc); - goto done; /* return cnt */ + psfs.cnt += psfs.store; + goto NEXT_FMT; + } + + + if (psfs.conv_num == CONV_S) { + /* Yes, believe it or not, a %s conversion can store nuls. */ + while ((scan_getwc(&sc) >= 0) + && ((((__uwchar_t)(sc.wc)) > UCHAR_MAX) + || !isspace(sc.wc)) + ) { + *wb = sc.wc; + wb += psfs.store; + fail = 0; } - if (*p != 'c') { /* nul-terminate the stored string */ - *b = 0; + } else { + assert(psfs.conv_num == CONV_LEFTBRACKET); + + while (scan_getwc(&sc) >= 0) { + if (((__uwchar_t) sc.wc) <= UCHAR_MAX) { + if (!scanset[sc.wc]) { + break; + } + } else if (invert) { + break; + } + *wb = sc.wc; + wb += psfs.store; + fail = 0; } - cnt += store; - goto nextfmt; } - if (p-spec < 12) { /* o,u,p,x,X,i,d - (un)signed integer */ - if (*p == 'p') { - /* assume pointer same size as int or long. */ - lval = (sizeof(char *) == sizeof(long)); + /* Common tail for processing of %ls and %l[. */ + + __scan_ungetc(&sc); + if (fail || sc.mb_fail) { /* Nothing stored or mb error. */ + goto DONE; + } + *wb = 0; /* Nul-terminate string. */ + psfs.cnt += psfs.store; + goto NEXT_FMT; + + } + +#endif /* __UCLIBC_HAS_WCHAR__ */ +#else /* L_vfscanf */ + + if (psfs.conv_num >= CONV_C) { + b = buf; + wb = wbuf; + if (psfs.conv_num >= CONV_c) { + mbstate.mask = 0; /* Initialize the mbstate. */ + if (psfs.store) { + b = (unsigned char *) psfs.cur_ptr; } - usflag = ((p-spec) < 10); /* (1)0 if (un)signed */ - base = radix[(int)(p-spec) - 5]; - b = buf; - if (sc.width <= 0) { - goto done; + } else { + if (psfs.store) { + wb = (wchar_t *) psfs.cur_ptr; } - cc = scan_getc(&sc); - if ((cc == '+') || (cc == '-')) { /* Handle leading sign.*/ - *b++ = cc; - cc = scan_getc(&sc); + } + fail = 1; + + + if ((psfs.conv_num == CONV_C) || (psfs.conv_num == CONV_c)) { + if (sc.width == INT_MAX) { + sc.width = 1; } - if (cc == '0') { /* Possibly set base and handle prefix. */ - if ((base == 0) || (base == 16)) { - cc = scan_getc(&sc); - if ((cc == 'x') || (cc == 'X')) { - /* We're committed to base 16 now. */ - base = 16; - cc = scan_getc(&sc); - } else { /* oops... back up */ - scan_ungetc(&sc); - cc = '0'; - if (base == 0) { - base = 8; - } + + while (scan_getwc(&sc) >= 0) { + if (psfs.conv_num == CONV_C) { + *wb = sc.wc; + wb += psfs.store; + } else { + i = wcrtomb(b, sc.wc, &mbstate); + if (i < 0) { /* Conversion failure. */ + goto DONE_DO_UNGET; + } + if (psfs.store) { + b += i; } } } - if (base == 0) { /* Default to base 10 */ - base = 10; - } - /* At this point, we're ready to start reading digits. */ - if (cc == '0') { - *b++ = cc; /* Store first leading 0 */ - do { /* but ignore others. */ - cc = scan_getc(&sc); - } while (cc == '0'); + __scan_ungetc(&sc); + if (sc.width > 0) { /* Failed to read all required. */ + goto DONE; } - while (valid_digit(cc,base)) { /* Now for nonzero digits.*/ - if (b - buf < MAX_DIGITS) { - *b++ = cc; + psfs.cnt += psfs.store; + goto NEXT_FMT; + } + + if ((psfs.conv_num == CONV_S) || (psfs.conv_num == CONV_s)) { + /* Yes, believe it or not, a %s conversion can store nuls. */ + while ((scan_getwc(&sc) >= 0) && !iswspace(sc.wc)) { + if (psfs.conv_num == CONV_S) { + *wb = sc.wc; + wb += psfs.store; + } else { + i = wcrtomb(b, sc.wc, &mbstate); + if (i < 0) { /* Conversion failure. */ + goto DONE_DO_UNGET; + } + if (psfs.store) { + b += i; + } } - cc = scan_getc(&sc); + fail = 0; } - *b = 0; /* null-terminate */ - if ((b == buf) || (*--b == '+') || (*b == '-')) { - scan_ungetc(&sc); - goto done; /* No digits! */ + } else { + const wchar_t *sss; + const wchar_t *ssp; + unsigned char invert = 0; + + assert((psfs.conv_num == CONV_LEFTBRACKET) + || (psfs.conv_num == CONV_leftbracket)); + + if (*++fmt == '^') { + ++fmt; + invert = 1; } - if (store) { - if (*buf == '-') { - usflag = 0; + sss = (const wchar_t *) fmt; + if (*fmt == ']') { + ++fmt; + } + while (*fmt != ']') { + if (!*fmt) { /* No closing ']'. */ + goto DONE; } - uv = STRTO_L_(buf, NULL, base, 1-usflag); - vp = va_arg(ap, void *); - switch (lval) { - case 2: /* If no long long, treat as long . */ - *((unsigned long long *)vp) = uv; - break; - case 1: -#if ULONG_MAX == UINT_MAX - case 0: /* int and long int are the same */ -#endif - if (usflag) { - if (uv > ULONG_MAX) { - uv = ULONG_MAX; - } - } else if (((V_TYPE)uv) > LONG_MAX) { - uv = LONG_MAX; - } else if (((V_TYPE)uv) < LONG_MIN) { - uv = (UV_TYPE) LONG_MIN; - } - *((unsigned long *)vp) = (unsigned long)uv; - break; -#if ULONG_MAX != UINT_MAX - case 0: /* int and long int are different */ - if (usflag) { - if (uv > UINT_MAX) { - uv = UINT_MAX; - } - } else if (((V_TYPE)uv) > INT_MAX) { - uv = INT_MAX; - } else if (((V_TYPE)uv) < INT_MIN) { - uv = (UV_TYPE) INT_MIN; - } - *((unsigned int *)vp) = (unsigned int)uv; - break; -#endif - case (signed char)(-1): - if (usflag) { - if (uv > USHRT_MAX) { - uv = USHRT_MAX; - } - } else if (((V_TYPE)uv) > SHRT_MAX) { - uv = SHRT_MAX; - } else if (((V_TYPE)uv) < SHRT_MIN) { - uv = (UV_TYPE) SHRT_MIN; - } - *((unsigned short *)vp) = (unsigned short)uv; - break; - case (signed char)(-2): - if (usflag) { - if (uv > UCHAR_MAX) { - uv = UCHAR_MAX; + if ((*fmt == '-') && (fmt[1] != ']') + && (fmt[-1] < fmt[1]) /* sorted? */ + ) { /* range */ + ++fmt; + } + ++fmt; + } + /* Ok... a valid scanset spec. */ + + while (scan_getwc(&sc) >= 0) { + ssp = sss; + do { /* We know sss < fmt. */ + if (*ssp == '-') { /* possible range... */ + /* Note: We accept a-c-e (ordered) as + * equivalent to a-e. */ + if (ssp > sss) { + if ((++ssp < (const wchar_t *) fmt) + && (ssp[-2] < *ssp) /* sorted? */ + ) { /* yes */ + if ((sc.wc >= ssp[-2]) + && (sc.wc <= *ssp)) { + break; + } + continue; /* not in range */ } - } else if (((V_TYPE)uv) > CHAR_MAX) { - uv = CHAR_MAX; - } else if (((V_TYPE)uv) < CHAR_MIN) { - uv = (UV_TYPE) CHAR_MIN; + --ssp; /* oops... '-' at end, so back up */ } - *((unsigned char *)vp) = (unsigned char) uv; + /* false alarm... a literal '-' */ + } + if (sc.wc == *ssp) { /* Matched literal char. */ break; - default: - assert(0); + } + } while (++ssp < (const wchar_t *) fmt); + + if ((ssp == (const wchar_t *) fmt) ^ invert) { + /* no match and not inverting + * or match and inverting */ + break; } - ++cnt; - } - goto nextfmt; - } -#ifdef __UCLIBC_HAS_FLOATS__ - else { /* floating point */ - if (sc.width <= 0) { - goto done; - } - if (__strtold(&ld, &sc)) { /* Success! */ - if (store) { - vp = va_arg(ap, void *); - switch (lval) { - case 2: - *((long double *)vp) = ld; - break; - case 1: - *((double *)vp) = (double) ld; - break; - case 0: - *((float *)vp) = (float) ld; - break; - default: /* Illegal qualifier! */ - assert(0); - goto done; + if (psfs.conv_num == CONV_LEFTBRACKET) { + *wb = sc.wc; + wb += psfs.store; + } else { + i = wcrtomb(b, sc.wc, &mbstate); + if (i < 0) { /* Conversion failure. */ + goto DONE_DO_UNGET; + } + if (psfs.store) { + b += i; } - ++cnt; } - goto nextfmt; + fail = 0; } } -#else - assert(0); -#endif - goto done; - } - /* Unrecognized specifier! */ - goto RETURN_cnt; - } if (__isspace(*fmt)) { /* Consume all whitespace. */ - do { - i = scan_getc_nw(&sc); - } while (__isspace(i)); - } else { /* Match the current fmt char. */ - matchchar: - if (scan_getc_nw(&sc) != *fmt) { - scan_ungetc(&sc); - goto done; + /* Common tail for processing of %s and %[. */ + + __scan_ungetc(&sc); + if (fail) { /* nothing stored! */ + goto DONE; + } + *wb = 0; /* Nul-terminate string. */ + *b = 0; + psfs.cnt += psfs.store; + goto NEXT_FMT; } - scan_getc_nw(&sc); + +#endif /* L_vfscanf */ + + assert(0); + goto DONE; + } /* conversion specification */ + + MATCH_CHAR: + if (__scan_getc(&sc) != *fmt) { +#ifdef L_vfwscanf + DONE_DO_UNGET: +#endif /* L_vfwscanf */ + __scan_ungetc(&sc); + goto DONE; } - nextfmt: - scan_ungetc(&sc); + + NEXT_FMT: ++fmt; } - done: /* end of scan */ - kill_scan_cookie(&sc); - - if ((sc.ungot_char <= 0) && (cnt == 0) && (*fmt)) { - cnt = EOF; + DONE: + if ((psfs.cnt == 0) && (*fmt) && __FEOF_OR_FERROR(fp)) { + psfs.cnt = EOF; /* Yes, vfwscanf also returns EOF. */ } - RETURN_cnt: + kill_scan_cookie(&sc); + +/* RETURN_cnt: */ __STDIO_THREADUNLOCK(fp); - return (cnt); + return psfs.cnt; } +#endif +/**********************************************************************/ +#ifdef L___psfs_do_numeric + +static const unsigned char spec_base[] = SPEC_BASE; +static const unsigned char nil_string[] = "(nil)"; -/*****************************************************************************/ +int __psfs_do_numeric(psfs_t *psfs, struct scan_cookie *sc) +{ + unsigned char *b; + const unsigned char *p; #ifdef __UCLIBC_HAS_FLOATS__ + int exp_adjust = 0; +#endif +#warning fix MAX_DIGITS. we do not do binary, so...! +#define MAX_DIGITS 65 /* Allow one leading 0. */ +#warning fix buf! + unsigned char buf[MAX_DIGITS+2+ 100]; + unsigned char usflag, base; + unsigned char nonzero = 0; + unsigned char seendigit = 0; + -#include +#ifndef __UCLIBC_HAS_FLOATS__ + if (psfs->conv_num > CONV_i) { /* floating point */ + goto DONE; + } +#endif -#define MAX_SIG_DIGITS 20 -#define MAX_IGNORED_DIGITS 2000 -#define MAX_ALLOWED_EXP (MAX_SIG_DIGITS + MAX_IGNORED_DIGITS + LDBL_MAX_10_EXP) + base = spec_base[psfs->conv_num - CONV_p]; + usflag = (psfs->conv_num <= CONV_u); /* (1)0 if (un)signed */ + b = buf; -#if LDBL_DIG > MAX_SIG_DIGITS -#error need to adjust MAX_SIG_DIGITS -#endif -#include -#if MAX_ALLOWED_EXP > INT_MAX -#error size assumption violated for MAX_ALLOWED_EXP + if (psfs->conv_num == CONV_p) { /* Pointer */ + p = nil_string; + do { + if ((__scan_getc(sc) < 0) || (*p != sc->cc)) { + __scan_ungetc(sc); + if (p > nil_string) { /* failed */ + /* We matched at least the '(' so even if we + * are at eof, we can not match a pointer. */ + goto DONE; + } + break; + } + if (!*++p) { /* Matched (nil), so no unget necessary. */ + if (psfs->store) { + ++psfs->cnt; + _store_inttype(psfs->cur_ptr, psfs->dataargtype, + (uintmax_t) NULL); + } + return 0; + } + } while (1); + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning CONSIDER: Should we require a 0x prefix and disallow +/- for pointer %p? +#endif /* __UCLIBC_MJN3_ONLY__ */ + } + + __scan_getc(sc); + if ((sc->cc == '+') || (sc->cc == '-')) { /* Handle leading sign.*/ + *b++ = sc->cc; + __scan_getc(sc); + } + + if ((base & 0xef) == 0) { /* 0xef is ~16, so 16 or 0. */ + if (sc->cc == '0') { /* Possibly set base and handle prefix. */ + __scan_getc(sc); + if ((sc->cc|0x20) == 'x') { /* Assumes ascii.. x or X. */ + if ((__scan_getc(sc) < 0) +#ifdef __UCLIBC_HAS_WCHAR__ + && !sc->ungot_wflag /* wc outside char range */ +#endif /* __UCLIBC_HAS_WCHAR__ */ + ) { + /* Note! 'x' at end of file|field is special. + * While this looks like I'm 'unget'ing twice, + * EOF and end of field are handled specially + * by the scan_* funcs. */ + __scan_ungetc(sc); + goto DO_NO_0X; + } + base = 16; /* Base 16 for sure now. */ +#ifdef __UCLIBC_HAS_HEXADECIMAL_FLOATS__ + /* The prefix is required for hexadecimal floats. */ + *b++ = '0'; + *b++ = 'x'; +#endif /* __UCLIBC_HAS_HEXADECIMAL_FLOATS__ */ + } else { /* oops... back up */ + DO_NO_0X: + __scan_ungetc(sc); + sc->cc = '0'; /* NASTY HACK! */ + + base = (base >> 1) + 8; /* 0->8, 16->16. no 'if' */ +#ifdef __UCLIBC_HAS_FLOATS__ + if (psfs->conv_num > CONV_i) { /* floating point */ + base = 10; + } #endif + } + } else if (!base) { + base = 10; + } + } -int __strtold(long double *ld, struct scan_cookie *sc) -{ - long double number; - long double p10; - int exponent_power; - int exponent_temp; - int negative; - int num_digits; - int since_decimal; - int c; + /***************** digit grouping **********************/ +#ifdef __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ + + if ((psfs->flags & FLAG_THOUSANDS) && (base == 10) + && *(p = sc->grouping) + ) { - c = scan_getc(sc); /* Decrements width. */ - - negative = 0; - switch(c) { /* Handle optional sign. */ - case '-': negative = 1; /* Fall through to get next char. */ - case '+': c = scan_getc(sc); - } - - number = 0.; - num_digits = -1; - exponent_power = 0; - since_decimal = INT_MIN; - - LOOP: - while (__isdigit(c)) { /* Process string of digits. */ - ++since_decimal; - if (num_digits < 0) { /* First time through? */ - ++num_digits; /* We've now seen a digit. */ - } - if (num_digits || (c != '0')) { /* had/have nonzero */ - ++num_digits; - if (num_digits <= MAX_SIG_DIGITS) { /* Is digit significant? */ - number = number * 10. + (c - '0'); + int nblk1, nblk2, nbmax, lastblock, pass, i; + + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning CONSIDER: Should we initalize the grouping blocks in __init_scan_cookie()? +#endif /* __UCLIBC_MJN3_ONLY__ */ + nbmax = nblk2 = nblk1 = *p; + if (*++p) { + nblk2 = *p; + if (nbmax < nblk2) { + nbmax = nblk2; } + assert(!*++p); } - c = scan_getc(sc); - } - - if ((c == '.') && (since_decimal < 0)) { /* If no previous decimal pt, */ - since_decimal = 0; /* save position of decimal point */ - c = scan_getc(sc); /* and process rest of digits */ - goto LOOP; - } - - if (num_digits<0) { /* Must have at least one digit. */ - goto FAIL; - } - - if (num_digits > MAX_SIG_DIGITS) { /* Adjust exp for skipped digits. */ - exponent_power += num_digits - MAX_SIG_DIGITS; - } - - if (since_decimal >= 0) { /* Adjust exponent for decimal point. */ - exponent_power -= since_decimal; - } - - if (negative) { /* Correct for sign. */ - number = -number; - negative = 0; /* Reset for exponent processing below. */ - } - - /* Process an exponent string. */ - if (c == 'e' || c == 'E') { - c = scan_getc(sc); - switch(c) { /* Handle optional sign. */ - case '-': negative = 1; /* Fall through to get next char. */ - case '+': c = scan_getc(sc); - } - - num_digits = 0; - exponent_temp = 0; - while (__isdigit(c)) { /* Process string of digits. */ - if (exponent_temp < MAX_ALLOWED_EXP) { /* overflow check */ - exponent_temp = exponent_temp * 10 + (c - '0'); + + /* Note: for printf, if 0 and \' flags appear then + * grouping is done before 0-padding. Should we + * strip leading 0's first? Or add a 0 flag? */ + + /* For vfwscanf, sc_getc translates, so the value of sc->cc is + * either EOF or a char. */ + + if (!__isdigit_char_or_EOF(sc->cc)) { /* No starting digit! */ +#ifdef __UCLIBC_HAS_FLOATS__ + if (psfs->conv_num > CONV_i) { /* floating point */ + goto NO_STARTING_DIGIT; } - c = scan_getc(sc); - ++num_digits; +#endif + goto DONE_DO_UNGET; } - if (num_digits == 0) { /* Were there no exp digits? */ - goto FAIL; - } /* else */ - if (negative) { - exponent_power -= exponent_temp; - } else { - exponent_power += exponent_temp; + if (sc->cc == '0') { + seendigit = 1; + *b++ = '0'; /* Store the first 0. */ +#ifdef __UCLIBC_MJN3_ONLY__ +#warning CONSIDER: Should leading 0s be skipped before digit grouping? (printf 0 pad) +#endif /* __UCLIBC_MJN3_ONLY__ */ +#if 0 + do { /* But ignore all subsequent 0s. */ + __scan_getc(sc); + } while (sc->cc == '0'); +#endif } - } + pass = 0; + lastblock = 0; + do { + i = 0; + while (__isdigit_char_or_EOF(sc->cc)) { + seendigit = 1; + if (i == nbmax) { /* too many digits for a block */ +#ifdef __UCLIBC_HAS_SCANF_LENIENT_DIGIT_GROUPING__ + if (!pass) { /* treat as nongrouped */ + if (nonzero) { + goto DO_NO_GROUP; + } + goto DO_TRIM_LEADING_ZEROS; + } +#endif + if (nbmax > nblk1) { + goto DONE_DO_UNGET; + } + goto DONE_GROUPING_DO_UNGET; /* nbmax == nblk1 */ + } + ++i; + + if (nonzero || (sc->cc != '0')) { + if (b < buf + MAX_DIGITS) { + *b++ = sc->cc; + nonzero = 1; +#ifdef __UCLIBC_HAS_FLOATS__ + } else { + ++exp_adjust; +#endif + } + } + + __scan_getc(sc); + } + + if (i) { /* we saw digits digits */ + if ((i == nblk2) || ((i < nblk2) && !pass)) { + /* (possible) outer grp */ + p = sc->thousands_sep; + if (*p == sc->cc) { /* first byte matches... */ + /* so check if grouping mb char */ + /* Since 1st matched, either match or fail now + * unless EOF (yuk) */ + __scan_getc(sc); + MBG_LOOP: + if (!*++p) { /* is a grouping mb char */ + lastblock = i; + ++pass; + continue; + } + if (*p == sc->cc) { + __scan_getc(sc); + goto MBG_LOOP; + } + /* bad grouping mb char! */ + __scan_ungetc(sc); + if ((sc->cc >= 0) || (p > sc->thousands_sep + 1)) { +#ifdef __UCLIBC_HAS_FLOATS__ + /* We failed to match a thousep mb char, and + * we've read too much to recover. But if + * this is a floating point conversion and + * the initial portion of the decpt mb char + * matches, then we may still be able to + * recover. */ + int k = p - sc->thousands_sep - 1; + + if ((psfs->conv_num > CONV_i) /* float conversion */ + && (!pass || (i == nblk1)) /* possible last */ + && !memcmp(sc->thousands_sep, sc->fake_decpt, k) + /* and prefix matched, so could be decpt */ + ) { + __scan_getc(sc); + p = sc->fake_decpt + k; + do { + if (!*++p) { + strcpy(b, sc->decpt); + b += sc->decpt_len; + goto GOT_DECPT; + } + if (*p != sc->cc) { + __scan_ungetc(sc); + break; /* failed */ + } + __scan_getc(sc); + } while (1); + } +#endif /* __UCLIBC_HAS_FLOATS__ */ + goto DONE; + } + /* was EOF and 1st, so recoverable. */ + } + } + if ((i == nblk1) || ((i < nblk1) && !pass)) { + /* got an inner group */ + goto DONE_GROUPING_DO_UNGET; + } + if (i > nblk1) { + /* An inner group if we can back up a bit. */ + if ((i - nblk1) <= (sc->ungot_flag ^ 1)) { + assert(sc->cc < 0); + --b; + goto DO_RECOVER_GROUP; + } + } + + /* (0 < i < nblk1) && (pass > 0) so prev group char + * So we have an unrecoverable situation. */ + goto DONE_DO_UNGET; + } /* i != 0 */ + + assert(pass); + + /* No next group. Can we back up past grouping mb char? */ + if ((pass == 1) || (nblk1 == nblk2)) { + if (!i && (sc->tslen == 1) && (sc->cc < 0)) { + /* No digits, grouping mb char is len 1, and EOF*/ + DO_RECOVER_GROUP: + if (sc->ungot_flag & 2) { + __scan_ungetc(sc); + } + goto DONE_GROUPING_DO_UNGET; + } + } + goto DONE_DO_UNGET; + } while (1); + + assert(0); /* Should never get here. */ + } + +#endif /***************** digit grouping **********************/ + + /* Not grouping so first trim all but one leading 0. */ +#ifdef __UCLIBC_HAS_SCANF_LENIENT_DIGIT_GROUPING__ + DO_TRIM_LEADING_ZEROS: +#endif /* __UCLIBC_HAS_SCANF_LENIENT_DIGIT_GROUPING__ */ + if (sc->cc == '0') { + seendigit = 1; + *b++ = '0'; /* Store the first 0. */ + do { /* But ignore all subsequent 0s. */ + __scan_getc(sc); + } while (sc->cc == '0'); + } + +#ifdef __UCLIBC_HAS_SCANF_LENIENT_DIGIT_GROUPING__ + DO_NO_GROUP: +#endif /* __UCLIBC_HAS_SCANF_LENIENT_DIGIT_GROUPING__ */ + /* At this point, we're ready to start reading digits. */ - if (number != 0.) { - /* Now scale the result. */ - exponent_temp = exponent_power; - p10 = 10.; +#define valid_digit(cc,base) (isxdigit(cc) && ((base == 16) || (cc - '0' < base))) - if (exponent_temp < 0) { - exponent_temp = -exponent_temp; + while (valid_digit(sc->cc,base)) { /* Now for significant digits.*/ + if (b - buf < MAX_DIGITS) { + nonzero = seendigit = 1; /* Set nonzero too 0s trimmed above. */ + *b++ = sc->cc; +#ifdef __UCLIBC_HAS_FLOATS__ + } else { + ++exp_adjust; +#endif } + __scan_getc(sc); + } - while (exponent_temp) { - if (exponent_temp & 1) { - if (exponent_power < 0) { - number /= p10; - } else { - number *= p10; +#ifdef __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ + DONE_GROUPING_DO_UNGET: +#endif /* __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ */ + if (psfs->conv_num <= CONV_i) { /* integer conversion */ + __scan_ungetc(sc); + *b = 0; /* null-terminate */ + if (!seendigit) { + goto DONE; /* No digits! */ + } + if (psfs->store) { + if (*buf == '-') { + usflag = 0; + } + ++psfs->cnt; + _store_inttype(psfs->cur_ptr, psfs->dataargtype, + (uintmax_t) STRTOUIM(buf, NULL, base, 1-usflag)); + } + return 0; + } + +#ifdef __UCLIBC_HAS_FLOATS__ + + /* At this point, we have everything left of the decimal point or exponent. */ +#ifdef __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ + NO_STARTING_DIGIT: +#endif + p = sc->fake_decpt; + do { + if (!*p) { + strcpy(b, sc->decpt); + b += sc->decpt_len; + break; + } + if (*p != sc->cc) { + if (p > sc->fake_decpt) { + if ((sc->cc >= 0) && (p > sc->fake_decpt + 1)) { + goto DONE_DO_UNGET; /* failed */ } + + __scan_ungetc(sc); + } - exponent_temp >>= 1; - p10 *= p10; + goto DO_DIGIT_CHECK; + } + ++p; + __scan_getc(sc); + } while (1); + +#ifdef __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ + GOT_DECPT: +#endif + if (!nonzero) { + if (sc->cc == '0') { + assert(exp_adjust == 0); + *b++ = '0'; + ++exp_adjust; + seendigit = 1; + do { + --exp_adjust; + __scan_getc(sc); + } while (sc->cc == '0'); } } - *ld = number; - return 1; - FAIL: - scan_ungetc(sc); - return 0; -} + while (valid_digit(sc->cc,base)) { /* Process fractional digits.*/ + if (b - buf < MAX_DIGITS) { + seendigit = 1; + *b++ = sc->cc; + } + __scan_getc(sc); + } + + DO_DIGIT_CHECK: + /* Hmm... no decimal point. */ + if (!seendigit) { + static const unsigned char nan_inf_str[] = "an\0nfinity"; + + if (base == 16) { /* We had a prefix, but no digits! */ + goto DONE_DO_UNGET; + } + + /* Avoid tolower problems for INFINITY in the tr_TR locale. (yuk)*/ +#undef TOLOWER +#define TOLOWER(C) ((C)|0x20) + + switch (TOLOWER(sc->cc)) { + case 'i': + p = nan_inf_str + 3; + break; + case 'n': + p = nan_inf_str; + break; + default: + /* No digits and not inf or nan. */ + goto DONE_DO_UNGET; + } + + *b++ = sc->cc; + + do { + __scan_getc(sc); + if (TOLOWER(sc->cc) == *p) { + *b++ = sc->cc; + ++p; + continue; + } + if (!*p || (p == nan_inf_str + 5)) { /* match nan/infinity or inf */ + goto GOT_FLOAT; + } + /* Unrecoverable. Even if on 1st char, we had no digits. */ + goto DONE_DO_UNGET; + } while (1); + } + + /* If we get here, we had some digits. */ + + if ( +#ifdef __UCLIBC_HAS_HEXADECIMAL_FLOATS__ + ((base == 16) && (((sc->cc)|0x20) == 'p')) || +#endif + (((sc->cc)|0x20) == 'e') + ) { /* Process an exponent. */ + *b++ = sc->cc; + + __scan_getc(sc); + if (sc->cc < 0) { /* EOF... recoverable */ + --b; + goto GOT_FLOAT; + } + + if ((sc->cc == '+') || (sc->cc == '-')) { /* Signed exponent? */ + *b++ = sc->cc; + __scan_getc(sc); + } + +#warning fix MAX_EXP_DIGITS! +#define MAX_EXP_DIGITS 20 + assert(seendigit); + seendigit = 0; + nonzero = 0; + + if (sc->cc == '0') { + seendigit = 1; + *b++ = '0'; + do { + __scan_getc(sc); + } while (sc->cc == '0'); + } + + while (__isdigit_char_or_EOF(sc->cc)) { /* Exponent digits (base 10).*/ + if (seendigit < MAX_EXP_DIGITS) { + ++seendigit; + *b++ = sc->cc; + } + __scan_getc(sc); + } + + if (!seendigit) { /* No digits. Unrecoverable. */ + goto DONE_DO_UNGET; + } + } + + + GOT_FLOAT: + + *b = 0; + { + __fpmax_t x; + char *e; + x = __strtofpmax(buf, &e, exp_adjust); + assert(!*e); + if (psfs->store) { + if (psfs->dataargtype & PA_FLAG_LONG_LONG) { + *((long double *)psfs->cur_ptr) = (long double) x; + } else if (psfs->dataargtype & PA_FLAG_LONG) { + *((double *)psfs->cur_ptr) = (double) x; + } else { + *((float *)psfs->cur_ptr) = (float) x; + } + ++psfs->cnt; + } + __scan_ungetc(sc); + return 0; + } #endif /* __UCLIBC_HAS_FLOATS__ */ + + DONE_DO_UNGET: + __scan_ungetc(sc); + DONE: + return -1; + +} #endif +/**********************************************************************/ -- cgit v1.2.3