From 1217289737588e65b088b3535428b27c7287d699 Mon Sep 17 00:00:00 2001 From: Manuel Novoa III Date: Fri, 1 Aug 2003 20:08:59 +0000 Subject: Add a new *scanf implementation, includeing the *wscanf functions. Should be standards compliant and with several optional features, including support for hexadecimal float notation, locale awareness, glibc-like locale-specific digit grouping with the `'' flag, and positional arg support. I tested it pretty well (finding several bugs in glibc's scanf in the process), but it is brand new so be aware. The *wprintf functions now support floating point output. Also, a couple of bugs were squashed. Finally, %a/%A conversions are now implemented. Implement the glibc xlocale interface for thread-specific locale support. Also add the various *_l(args, locale_t loc_arg) funcs. NOTE!!! setlocale() is NOT threadsafe! NOTE!!! The strto{floating point} conversion functions are now locale aware. The also now support hexadecimal floating point notation. Add the wcsto{floating point} conversion functions. Fix a bug in mktime() related to dst. Note that unlike glibc's mktime, uClibc's version always normalizes the struct tm before attempting to determine the correct dst setting if tm_isdst == -1 on entry. Add a stub version of the libintl functions. (untested) Fixed a known memory leak in setlocale() related to the collation data. Add lots of new config options (which Erik agreed to sort out :-), including finally exposing some of the stripped down stdio configs. Be careful with those though, as they haven't been tested in a long time. (temporary) GOTCHAs... The ctype functions are currently incorrect for 8-bit locales. They will be fixed shortly. The ctype functions are now table-based, resulting in larger staticly linked binaries. I'll be adding an option to use the old approach in the stub locale configuration. --- include/ctype.h | 443 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 348 insertions(+), 95 deletions(-) (limited to 'include/ctype.h') diff --git a/include/ctype.h b/include/ctype.h index c6faf3d9b..23ff199e4 100644 --- a/include/ctype.h +++ b/include/ctype.h @@ -1,129 +1,382 @@ -/* Copyright (C) 2002 Manuel Novoa III +/* Copyright (C) 1991,92,93,95,96,97,98,99,2001,02 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* + * ISO C99 Standard 7.4: Character handling + */ + +#ifndef _CTYPE_H +#define _CTYPE_H 1 + +#include +#include + +__BEGIN_DECLS + +#ifndef _ISbit +/* These are all the characteristics of characters. + If there get to be more than 16 distinct characteristics, + many things must be changed that use `__uint16_t's. */ + +# define _ISbit(bit) (1 << (bit)) + +enum +{ + _ISupper = _ISbit (0), /* UPPERCASE. */ + _ISlower = _ISbit (1), /* lowercase. */ + _ISalpha = _ISbit (2), /* Alphabetic. */ + _ISdigit = _ISbit (3), /* Numeric. */ + _ISxdigit = _ISbit (4), /* Hexadecimal numeric. */ + _ISspace = _ISbit (5), /* Whitespace. */ + _ISprint = _ISbit (6), /* Printing. */ + _ISgraph = _ISbit (7), /* Graphical. */ + _ISblank = _ISbit (8), /* Blank (usually SPC and TAB). */ + _IScntrl = _ISbit (9), /* Control character. */ + _ISpunct = _ISbit (10), /* Punctuation. */ + _ISalnum = _ISbit (11) /* Alphanumeric. */ +}; +#else +#error _ISbit already defined! +#endif /* ! _ISbit */ + +#include + +#ifdef __UCLIBC_HAS_CTYPE_SIGNED__ +# define __UCLIBC_CTYPE_IN_TO_DOMAIN(c) (((unsigned int)((c) + 128)) < 384) + +#else /* __UCLIBC_HAS_CTYPE_SIGNED__ */ +# define __UCLIBC_CTYPE_IN_TO_DOMAIN(c) (((unsigned int)(c)) < 256) + +#endif /* __UCLIBC_HAS_CTYPE_SIGNED__ */ + +/* In the thread-specific locale model (see `uselocale' in ) + we cannot use global variables for these as was done in the past. + Instead, the following accessor functions return the address of + each variable, which is local to the current thread if multithreaded. + + These point into arrays of 384, so they can be indexed by any `unsigned + char' value [0,255]; by EOF (-1); or by any `signed char' value + [-128,-1). ISO C requires that the ctype functions work for `unsigned + char' values and for EOF; we also support negative `signed char' values + for broken old programs. The case conversion arrays are of `int's + rather than `unsigned char's because tolower (EOF) must be EOF, which + doesn't fit into an `unsigned char'. But today more important is that + the arrays are also used for multi-byte character sets. */ + +/* uClibc differences: + * + * When __UCLIBC_HAS_CTYPE_SIGNED is defined, * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * The upper and lower mapping arrays are type int16_t, so that + * they may store all char values plus EOF. The glibc reasoning + * given above for these being type int is questionable, as the + * ctype mapping functions map from the set of (unsigned) char + * and EOF back into the set. They have no awareness of multi-byte + * or wide characters. * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. + * Otherwise, * - * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * The ctype array is defined for -1..255. + * The upper and lower mapping arrays are defined for 0..255. + * The upper and lower mapping arrays are type unsigned char. */ -/* NOTE: It is assumed here and throughout the library that the underlying - * char encoding for the portable C character set is ASCII (host & target). */ +/* Pointers to the default C-locale data. */ +extern const __uint16_t *__C_ctype_b; +extern const __ctype_touplow_t *__C_ctype_toupper; +extern const __ctype_touplow_t *__C_ctype_tolower; -#ifndef _CTYPE_H -#define _CTYPE_H +#ifdef __UCLIBC_HAS_XLOCALE__ -#include -#include +extern __const __uint16_t **__ctype_b_loc (void) + __attribute__ ((__const)); +extern __const __ctype_touplow_t **__ctype_tolower_loc (void) + __attribute__ ((__const)); +extern __const __ctype_touplow_t **__ctype_toupper_loc (void) + __attribute__ ((__const)); -__BEGIN_DECLS +#define __UCLIBC_CTYPE_B (*__ctype_b_loc()) +#define __UCLIBC_CTYPE_TOLOWER (*__ctype_tolower_loc()) +#define __UCLIBC_CTYPE_TOUPPER (*__ctype_toupper_loc()) -extern int isalnum(int c) __THROW; -extern int isalpha(int c) __THROW; -#ifdef __USE_ISOC99 -extern int isblank(int c) __THROW; -#endif -extern int iscntrl(int c) __THROW; -extern int isdigit(int c) __THROW; -extern int isgraph(int c) __THROW; -extern int islower(int c) __THROW; -extern int isprint(int c) __THROW; -extern int ispunct(int c) __THROW; -extern int isspace(int c) __THROW; -extern int isupper(int c) __THROW; -extern int isxdigit(int c) __THROW; - -extern int tolower(int c) __THROW; -extern int toupper(int c) __THROW; +#else /* __UCLIBC_HAS_XLOCALE__ */ -#if defined __USE_SVID || defined __USE_MISC || defined __USE_XOPEN -extern int isascii(int c) __THROW; -extern int toascii(int c) __THROW; -#endif +/* Pointers to the current global locale data in use. */ +extern const __uint16_t *__ctype_b; +extern const __ctype_touplow_t *__ctype_toupper; +extern const __ctype_touplow_t *__ctype_tolower; + +#define __UCLIBC_CTYPE_B (__ctype_b) +#define __UCLIBC_CTYPE_TOLOWER (__ctype_tolower) +#define __UCLIBC_CTYPE_TOUPPER (__ctype_toupper) + +#endif /* __UCLIBC_HAS_XLOCALE__ */ + +#define __isctype(c, type) \ + ((__UCLIBC_CTYPE_B)[(int) (c)] & (__uint16_t) type) + +#define __isascii(c) (((c) & ~0x7f) == 0) /* If C is a 7 bit value. */ +#define __toascii(c) ((c) & 0x7f) /* Mask off high bits. */ -/* The following are included for compatibility with older versions of - * uClibc; but now they're only visible if MISC funcctionality is requested. - * However, as they are locale-independent, the hidden macro versions are - * always present. */ #ifdef __USE_MISC -extern int isxlower(int c) __THROW; /* uClibc-specific. */ -extern int isxupper(int c) __THROW; /* uClibc-specific. */ + +/* The following are included for compatibility with older versions of + * uClibc; but now they're only visible if MISC funcctionality is requested. */ +extern int isxlower(int c) __THROW; +extern int isxupper(int c) __THROW; + +/* isdigit() is really locale-invariant, so provide some small fast macros. + * These are uClibc-specific. */ +#define __isdigit_char(C) (((unsigned char)((C) - '0')) <= 9) +#define __isdigit_int(C) (((unsigned int)((C) - '0')) <= 9) + #endif -/* Next, some ctype macros which are valid for all supported locales. */ -/* WARNING: isspace and isblank need to be reverified if more 8-bit codesets - * are added!!! But isdigit and isxdigit are always valid. */ +#define __exctype(name) extern int name (int) __THROW -#define __isspace(c) __C_isspace(c) -#define __isblank(c) __C_isblank(c) +__BEGIN_NAMESPACE_STD -#define __isdigit(c) __C_isdigit(c) -#define __isxdigit(c) __C_isxdigit(c) +/* The following names are all functions: + int isCHARACTERISTIC(int c); + which return nonzero iff C has CHARACTERISTIC. + For the meaning of the characteristic names, see the `enum' above. */ +__exctype (isalnum); +__exctype (isalpha); +__exctype (iscntrl); +__exctype (isdigit); +__exctype (islower); +__exctype (isgraph); +__exctype (isprint); +__exctype (ispunct); +__exctype (isspace); +__exctype (isupper); +__exctype (isxdigit); -/* Now some non-ansi/iso c99 macros. */ -#define __isascii(c) (((c) & ~0x7f) == 0) -#define __toascii(c) ((c) & 0x7f) -#define _toupper(c) ((c) ^ 0x20) -#define _tolower(c) ((c) | 0x20) +/* Return the lowercase version of C. */ +extern int tolower (int __c) __THROW; +/* Return the uppercase version of C. */ +extern int toupper (int __c) __THROW; -/* For compatibility with older versions of uClibc. Are these ever used? */ -#define __isxlower(c) __C_isxlower(c) /* uClibc-specific. */ -#define __isxupper(c) __C_isxupper(c) /* uClibc-specific. */ +__END_NAMESPACE_STD -/* Apparently, glibc implements things as macros if __NO_CTYPE isn't defined. - * If we don't have locale support, we'll do the same. Otherwise, we'll - * only use macros for the supported-locale-invariant cases. */ -#if 0 -/* Currently broken, since masking macros, other than getc and putc, must - * evaluate their args exactly once. Will be fixed by the next release. mjn3 */ -/* #ifndef __NO_CTYPE */ -#define isdigit(c) __isdigit(c) -#define isxdigit(c) __isxdigit(c) -#define isspace(c) __isspace(c) -#ifdef __USE_ISOC99 -#define isblank(c) __isblank(c) -#endif +/* ISO C99 introduced one new function. */ +#ifdef __USE_ISOC99 +__BEGIN_NAMESPACE_C99 -#if defined __USE_SVID || defined __USE_MISC || defined __USE_XOPEN -#define isascii(c) __isascii(c) -#define toascii(c) __toascii(c) +__exctype (isblank); + +__END_NAMESPACE_C99 #endif -#ifdef __USE_MISC -#define isxlower(c) __C_isxlower(c) /* uClibc-specific. */ -#define isxupper(c) __C_isxupper(c) /* uClibc-specific. */ +#ifdef __USE_GNU +/* Test C for a set of character classes according to MASK. */ +extern int isctype (int __c, int __mask) __THROW; #endif -/* TODO - Should test for 8-bit codesets instead, but currently impossible. */ -#ifndef __UCLIBC_HAS_LOCALE__ +#if defined __USE_SVID || defined __USE_MISC || defined __USE_XOPEN + +/* Return nonzero iff C is in the ASCII set + (i.e., is no more than 7 bits wide). */ +extern int isascii (int __c) __THROW; + +/* Return the part of C that is in the ASCII set + (i.e., the low-order 7 bits of C). */ +extern int toascii (int __c) __THROW; + +/* These are the same as `toupper' and `tolower' except that they do not + check the argument for being in the range of a `char'. */ +__exctype (_toupper); +__exctype (_tolower); +#endif /* Use SVID or use misc. */ + +/* This code is needed for the optimized mapping functions. */ +#define __tobody(c, f, a, args) \ + (__extension__ \ + ({ int __res; \ + if (sizeof (c) > 1) \ + { \ + if (__builtin_constant_p (c)) \ + { \ + int __c = (c); \ + __res = __UCLIBC_CTYPE_IN_TO_DOMAIN(__c) ? (a)[__c] : __c; \ + } \ + else \ + __res = f args; \ + } \ + else \ + __res = (a)[(int) (c)]; \ + __res; })) + +#if !defined __NO_CTYPE && !defined __cplusplus +# define isalnum(c) __isctype((c), _ISalnum) +# define isalpha(c) __isctype((c), _ISalpha) +# define iscntrl(c) __isctype((c), _IScntrl) +# define isdigit(c) __isctype((c), _ISdigit) +# define islower(c) __isctype((c), _ISlower) +# define isgraph(c) __isctype((c), _ISgraph) +# define isprint(c) __isctype((c), _ISprint) +# define ispunct(c) __isctype((c), _ISpunct) +# define isspace(c) __isctype((c), _ISspace) +# define isupper(c) __isctype((c), _ISupper) +# define isxdigit(c) __isctype((c), _ISxdigit) + +# ifdef __USE_ISOC99 +# define isblank(c) __isctype((c), _ISblank) +# endif + +# ifdef __USE_EXTERN_INLINES +extern __inline int +tolower (int __c) __THROW +{ + return __UCLIBC_CTYPE_IN_TO_DOMAIN(__c) ? (__UCLIBC_CTYPE_TOLOWER)[__c] : __c; +} + +extern __inline int +toupper (int __c) __THROW +{ + return __UCLIBC_CTYPE_IN_TO_DOMAIN(__c) ? (__UCLIBC_CTYPE_TOUPPER)[__c] : __c; +} +# endif + +# if __GNUC__ >= 2 && defined __OPTIMIZE__ && !defined __cplusplus +# define tolower(c) __tobody (c, tolower, __UCLIBC_CTYPE_TOLOWER, (c)) +# define toupper(c) __tobody (c, toupper, __UCLIBC_CTYPE_TOUPPER, (c)) +# endif /* Optimizing gcc */ + +# if defined __USE_SVID || defined __USE_MISC || defined __USE_XOPEN +# define isascii(c) __isascii (c) +# define toascii(c) __toascii (c) + +# define _tolower(c) ((int) (__UCLIBC_CTYPE_TOLOWER)[(int) (c)]) +# define _toupper(c) ((int) (__UCLIBC_CTYPE_TOUPPER)[(int) (c)]) +# endif + +#endif /* Not __NO_CTYPE. */ + + +#if defined(__USE_GNU) && defined(__UCLIBC_HAS_XLOCALE__) +/* The concept of one static locale per category is not very well + thought out. Many applications will need to process its data using + information from several different locales. Another application is + the implementation of the internationalization handling in the + upcoming ISO C++ standard library. To support this another set of + the functions using locale data exist which have an additional + argument. + + Attention: all these functions are *not* standardized in any form. + This is a proof-of-concept implementation. */ + +/* Structure for reentrant locale using functions. This is an + (almost) opaque type for the user level programs. */ +# include + +/* These definitions are similar to the ones above but all functions + take as an argument a handle for the locale which shall be used. */ +# define __isctype_l(c, type, locale) \ + ((locale)->__ctype_b[(int) (c)] & (__uint16_t) type) + +# define __exctype_l(name) \ + extern int name (int, __locale_t) __THROW + +/* The following names are all functions: + int isCHARACTERISTIC(int c, locale_t *locale); + which return nonzero iff C has CHARACTERISTIC. + For the meaning of the characteristic names, see the `enum' above. */ +__exctype_l (isalnum_l); +__exctype_l (isalpha_l); +__exctype_l (iscntrl_l); +__exctype_l (isdigit_l); +__exctype_l (islower_l); +__exctype_l (isgraph_l); +__exctype_l (isprint_l); +__exctype_l (ispunct_l); +__exctype_l (isspace_l); +__exctype_l (isupper_l); +__exctype_l (isxdigit_l); + +__exctype_l (isblank_l); + + +/* Return the lowercase version of C in locale L. */ +extern int __tolower_l (int __c, __locale_t __l) __THROW; +extern int tolower_l (int __c, __locale_t __l) __THROW; + +/* Return the uppercase version of C. */ +extern int __toupper_l (int __c, __locale_t __l) __THROW; +extern int toupper_l (int __c, __locale_t __l) __THROW; + +# if __GNUC__ >= 2 && defined __OPTIMIZE__ && !defined __cplusplus +# define __tolower_l(c, locale) \ + __tobody (c, __tolower_l, (locale)->__ctype_tolower, (c, locale)) +# define __toupper_l(c, locale) \ + __tobody (c, __toupper_l, (locale)->__ctype_toupper, (c, locale)) +# define tolower_l(c, locale) __tolower_l ((c), (locale)) +# define toupper_l(c, locale) __toupper_l ((c), (locale)) +# endif /* Optimizing gcc */ + + +# ifndef __NO_CTYPE +# define __isalnum_l(c,l) __isctype_l((c), _ISalnum, (l)) +# define __isalpha_l(c,l) __isctype_l((c), _ISalpha, (l)) +# define __iscntrl_l(c,l) __isctype_l((c), _IScntrl, (l)) +# define __isdigit_l(c,l) __isctype_l((c), _ISdigit, (l)) +# define __islower_l(c,l) __isctype_l((c), _ISlower, (l)) +# define __isgraph_l(c,l) __isctype_l((c), _ISgraph, (l)) +# define __isprint_l(c,l) __isctype_l((c), _ISprint, (l)) +# define __ispunct_l(c,l) __isctype_l((c), _ISpunct, (l)) +# define __isspace_l(c,l) __isctype_l((c), _ISspace, (l)) +# define __isupper_l(c,l) __isctype_l((c), _ISupper, (l)) +# define __isxdigit_l(c,l) __isctype_l((c), _ISxdigit, (l)) + +# define __isblank_l(c,l) __isctype_l((c), _ISblank, (l)) + +# if defined __USE_SVID || defined __USE_MISC || defined __USE_XOPEN +# define __isascii_l(c,l) ((l), __isascii (c)) +# define __toascii_l(c,l) ((l), __toascii (c)) +# endif + +# define isalnum_l(c,l) __isalnum_l ((c), (l)) +# define isalpha_l(c,l) __isalpha_l ((c), (l)) +# define iscntrl_l(c,l) __iscntrl_l ((c), (l)) +# define isdigit_l(c,l) __isdigit_l ((c), (l)) +# define islower_l(c,l) __islower_l ((c), (l)) +# define isgraph_l(c,l) __isgraph_l ((c), (l)) +# define isprint_l(c,l) __isprint_l ((c), (l)) +# define ispunct_l(c,l) __ispunct_l ((c), (l)) +# define isspace_l(c,l) __isspace_l ((c), (l)) +# define isupper_l(c,l) __isupper_l ((c), (l)) +# define isxdigit_l(c,l) __isxdigit_l ((c), (l)) -#define isalnum(c) __C_isalnum(c) -#define isalpha(c) __C_isalpha(c) -#define iscntrl(c) __C_iscntrl(c) -#define isgraph(c) __C_isgraph(c) -#define islower(c) __C_islower(c) -#define isprint(c) __C_isprint(c) -#define ispunct(c) __C_ispunct(c) -#define isupper(c) __C_isupper(c) +# define isblank_l(c,l) __isblank_l ((c), (l)) -#define tolower(c) __C_tolower(c) -#define toupper(c) __C_toupper(c) +# if defined __USE_SVID || defined __USE_MISC || defined __USE_XOPEN +# define isascii_l(c,l) __isascii_l ((c), (l)) +# define toascii_l(c,l) __toascii_l ((c), (l)) +# endif -#endif /* __UCLIBC_HAS_LOCALE__ */ +# endif /* Not __NO_CTYPE. */ -#endif /* __NO_CTYPE */ +#endif /* Use GNU. */ __END_DECLS -#endif /* _CTYPE_H */ +#endif /* ctype.h */ -- cgit v1.2.3