From d07fdf8b9ece2c4339b325921add50792077bf97 Mon Sep 17 00:00:00 2001 From: Manuel Novoa III Date: Mon, 6 May 2002 07:37:32 +0000 Subject: New locale support (in development). Supports LC_CTYPE, LC_NUMERIC, LC_TIME, LC_MONETARY, and LC_MESSAGES for the SUSv3 items. Also, nl_langinfo() when real locale support is enabled. New implementation of ctype.h. New implementation of wctype.h. New implementation of most of the string functions (smaller). New implementation of the wcs/wmem functions. These are untested, but they're also just preprocessor-modified versions ot the corresponding str/mem functions. Tweaked qsort and new bsearch. Stuff still pending: stdlib.h and wchar.h mb<->wc functions. I actually have working versions of the stdlib ones, but the reentrant versions from wchar.h require some reworking. Basic replacement and translit support for wc->mb conversions. (groundwork laid). Simple-minded collate support such as was provided by the previous locale implementation. (mostly done -- 8-bit codesets only) Shared mmaping of the locale data and strerror message text. --- libc/sysdeps/linux/common/bits/.cvsignore | 1 + libc/sysdeps/linux/common/bits/uClibc_ctype.h | 250 +++++++++++++++++++++++ libc/sysdeps/linux/common/bits/uClibc_locale.h | 261 +++++++++++++++++++++++++ 3 files changed, 512 insertions(+) create mode 100644 libc/sysdeps/linux/common/bits/.cvsignore create mode 100644 libc/sysdeps/linux/common/bits/uClibc_ctype.h create mode 100644 libc/sysdeps/linux/common/bits/uClibc_locale.h (limited to 'libc/sysdeps/linux/common') diff --git a/libc/sysdeps/linux/common/bits/.cvsignore b/libc/sysdeps/linux/common/bits/.cvsignore new file mode 100644 index 000000000..02b6bca59 --- /dev/null +++ b/libc/sysdeps/linux/common/bits/.cvsignore @@ -0,0 +1 @@ +uClibc_locale_data.h diff --git a/libc/sysdeps/linux/common/bits/uClibc_ctype.h b/libc/sysdeps/linux/common/bits/uClibc_ctype.h new file mode 100644 index 000000000..875070452 --- /dev/null +++ b/libc/sysdeps/linux/common/bits/uClibc_ctype.h @@ -0,0 +1,250 @@ +/* Copyright (C) 2002 Manuel Novoa III + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! + * + * Besides uClibc, I'm using this code in my libc for elks, which is + * a 16-bit environment with a fairly limited compiler. It would make + * things much easier for me if this file isn't modified unnecessarily. + * In particular, please put any new or replacement functions somewhere + * else, and modify the makefile to use your version instead. + * Thanks. Manuel + * + * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */ + +#if !defined(_CTYPE_H) && !defined(_WCTYPE_H) +#error Always include <{w}ctype.h> rather than +#endif + +#ifndef _BITS_CTYPE_H +#define _BITS_CTYPE_H + +/* Taking advantage of the C99 mutual-exclusion guarantees for the various + * (w)ctype classes, including the descriptions of printing and control + * (w)chars, we can place each in one of the following mutually-exlusive + * subsets. Since there are less than 16, we can store the data for + * each (w)chars in a nibble. In contrast, glibc uses an unsigned int + * per (w)char, with one bit flag for each is* type. While this allows + * a simple '&' operation to determine the type vs. a range test and a + * little special handling for the "blank" and "xdigit" types in my + * approach, it also uses 8 times the space for the tables on the typical + * 32-bit archs we supported.*/ +enum { + __CTYPE_unclassified = 0, + __CTYPE_alpha_nonupper_nonlower, + __CTYPE_alpha_lower, + __CTYPE_alpha_upper_lower, + __CTYPE_alpha_upper, + __CTYPE_digit, + __CTYPE_punct, + __CTYPE_graph, + __CTYPE_print_space_nonblank, + __CTYPE_print_space_blank, + __CTYPE_space_nonblank_noncntrl, + __CTYPE_space_blank_noncntrl, + __CTYPE_cntrl_space_nonblank, + __CTYPE_cntrl_space_blank, + __CTYPE_cntrl_nonspace, +}; + +/* Some macros that test for various (w)ctype classes when passed one of the + * designator values enumerated above. */ +#define __CTYPE_isalnum(D) ((unsigned int)(D-1) <= (__CTYPE_digit-1)) +#define __CTYPE_isalpha(D) ((unsigned int)(D-1) <= (__CTYPE_alpha_upper-1)) +#define __CTYPE_isblank(D) \ + ((((unsigned int)(D - __CTYPE_print_space_nonblank)) <= 5) && (D & 1)) +#define __CTYPE_iscntrl(D) (((unsigned int)(D - __CTYPE_cntrl_space_nonblank)) <= 2) +#define __CTYPE_isdigit(D) (D == __CTYPE_digit) +#define __CTYPE_isgraph(D) ((unsigned int)(D-1) <= (__CTYPE_graph-1)) +#define __CTYPE_islower(D) (((unsigned int)(D - __CTYPE_alpha_lower)) <= 1) +#define __CTYPE_isprint(D) ((unsigned int)(D-1) <= (__CTYPE_print_space_blank-1)) +#define __CTYPE_ispunct(D) (D == __CTYPE_punct) +#define __CTYPE_isspace(D) (((unsigned int)(D - __CTYPE_print_space_nonblank)) <= 5) +#define __CTYPE_isupper(D) (((unsigned int)(D - __CTYPE_alpha_upper_lower)) <= 1) +/* #define __CTYPE_isxdigit(D) -- isxdigit is untestable this way. + * But that's ok as isxdigit() (and isdigit() too) are locale-invariant. */ + +/* The values for wctype_t. */ +enum { + _CTYPE_unclassified = 0, + _CTYPE_isalnum, + _CTYPE_isalpha, + _CTYPE_isblank, + _CTYPE_iscntrl, + _CTYPE_isdigit, + _CTYPE_isgraph, + _CTYPE_islower, + _CTYPE_isprint, + _CTYPE_ispunct, + _CTYPE_isspace, + _CTYPE_isupper, + _CTYPE_isxdigit /* _MUST_ be last of the standard classes! */ +}; + + +/* The following is used to implement wctype(), but it is defined + * here because the ordering must agree with that of the enumeration + * above (ignoring unclassified). */ +#define __CTYPE_TYPESTRING \ + "\6alnum\0\6alpha\0\6blank\0\6cntrl\0\6digit\0\6graph\0\6lower\0" \ + "\6print\0\6punct\0\6space\0\6upper\0\7xdigit\0\0" + +/* Used in implementing iswctype(), but defined here as it must agree + * in ordering with the string above. */ +#define __CTYPE_RANGES \ + 0, -1, /* unclassified */ \ + 1, __CTYPE_digit - 1, /* alnum */ \ + 1, __CTYPE_alpha_upper - 1, /* alpha */ \ + __CTYPE_print_space_blank, 5, /* blank -- also must be odd! */ \ + __CTYPE_cntrl_space_nonblank, 2, /* cntrl */ \ + __CTYPE_digit, 0, /* digit */ \ + 1, __CTYPE_graph - 1, /* graph */ \ + __CTYPE_alpha_lower, 1, /* lower */ \ + 1, __CTYPE_print_space_blank - 1, /* print */ \ + __CTYPE_punct, 0, /* punct */ \ + __CTYPE_print_space_nonblank, 5, /* space */ \ + __CTYPE_alpha_upper_lower, 1, /* upper */ \ + /* No entry for xdigit as it is handled specially. */ + +#define _CTYPE_iswalnum _CTYPE_isalnum +#define _CTYPE_iswalpha _CTYPE_isalpha +#define _CTYPE_iswblank _CTYPE_isblank +#define _CTYPE_iswcntrl _CTYPE_iscntrl +#define _CTYPE_iswdigit _CTYPE_isdigit +#define _CTYPE_iswgraph _CTYPE_isgraph +#define _CTYPE_iswlower _CTYPE_islower +#define _CTYPE_iswprint _CTYPE_isprint +#define _CTYPE_iswpunct _CTYPE_ispunct +#define _CTYPE_iswspace _CTYPE_isspace +#define _CTYPE_iswupper _CTYPE_isupper +#define _CTYPE_iswxdigit _CTYPE_isxdigit + +/* The following is used to implement wctrans(). */ + +enum { + _CTYPE_tolower = 1, + _CTYPE_toupper, + _CTYPE_totitle +}; + +#define __CTYPE_TRANSTRING "\10tolower\0\10toupper\0\10totitle\0\0" + +/* Now define some ctype macros valid for the C/POSIX locale. */ + +/* ASCII ords of \t, \f, \n, \r, and \v are 9, 12, 10, 13, 11 respectively. */ +#define __C_isspace(c) \ + ((sizeof(c) == sizeof(char)) \ + ? ((((c) == ' ') || (((unsigned char)((c) - 9)) <= (13 - 9)))) \ + : ((((c) == ' ') || (((unsigned int)((c) - 9)) <= (13 - 9))))) +#define __C_isblank(c) (((c) == ' ') || ((c) == '\t')) +#define __C_isdigit(c) \ + ((sizeof(c) == sizeof(char)) \ + ? (((unsigned char)((c) - '0')) < 10) \ + : (((unsigned int)((c) - '0')) < 10)) +#define __C_isxdigit(c) \ + (__C_isdigit(c) \ + || ((sizeof(c) == sizeof(char)) \ + ? (((unsigned char)((((c)) | 0x20) - 'a')) < 6) \ + : (((unsigned int)((((c)) | 0x20) - 'a')) < 6))) +#define __C_iscntrl(c) \ + ((sizeof(c) == sizeof(char)) \ + ? ((((unsigned char)(c)) < 0x20) || ((c) == 0x7f)) \ + : ((((unsigned int)(c)) < 0x20) || ((c) == 0x7f))) +#define __C_isalpha(c) \ + ((sizeof(c) == sizeof(char)) \ + ? (((unsigned char)(((c) | 0x20) - 'a')) < 26) \ + : (((unsigned int)(((c) | 0x20) - 'a')) < 26)) +#define __C_isalnum(c) (__C_isalpha(c) || __C_isdigit(c)) +#define __C_isprint(c) \ + ((sizeof(c) == sizeof(char)) \ + ? (((unsigned char)((c) - 0x20)) <= (0x7e - 0x20)) \ + : (((unsigned int)((c) - 0x20)) <= (0x7e - 0x20))) +#define __C_islower(c) \ + ((sizeof(c) == sizeof(char)) \ + ? (((unsigned char)((c) - 'a')) < 26) \ + : (((unsigned int)((c) - 'a')) < 26)) +#define __C_isupper(c) \ + ((sizeof(c) == sizeof(char)) \ + ? (((unsigned char)((c) - 'A')) < 26) \ + : (((unsigned int)((c) - 'A')) < 26)) +#define __C_ispunct(c) \ + ((!__C_isalnum(c)) \ + && ((sizeof(c) == sizeof(char)) \ + ? (((unsigned char)((c) - 0x21)) <= (0x7e - 0x21)) \ + : (((unsigned int)((c) - 0x21)) <= (0x7e - 0x21)))) +#define __C_isgraph(c) \ + ((sizeof(c) == sizeof(char)) \ + ? (((unsigned int)((c) - 0x21)) <= (0x7e - 0x21)) \ + : (((unsigned int)((c) - 0x21)) <= (0x7e - 0x21))) + +#define __C_tolower(c) (__C_isupper(c) ? ((c) | 0x20) : (c)) +#define __C_toupper(c) (__C_islower(c) ? ((c) ^ 0x20) : (c)) + +#define __C_isxlower(c) \ + (__C_isdigit(c) \ + || ((sizeof(c) == sizeof(char)) \ + ? (((unsigned char)(((c)) - 'a')) < 6) \ + : (((unsigned int)(((c)) - 'a')) < 6))) +#define __C_isxupper(c) \ + (__C_isdigit(c) \ + || ((sizeof(c) == sizeof(char)) \ + ? (((unsigned char)(((c)) - 'A')) < 6) \ + : (((unsigned int)(((c)) - 'A')) < 6))) + +/* TODO: Replace the above with expressions like the following? */ +/* #define __C_isdigit(c) ((sizeof(c) == sizeof(char)) \ */ +/* ? (((unsigned char)((c) - '0')) < 10) \ */ +/* : (((unsigned int)((c) - '0')) < 10)) */ + +/* Similarly, define some wctype macros valid for the C/POSIX locale. */ + +/* First, we need some way to make sure the arg is in range. */ +#define __C_classed(c) \ + ((sizeof(c) <= sizeof(int)) || (c == ((unsigned char)c))) + +#define __C_iswspace(c) (__C_classed(c) && __C_isspace(c)) +#define __C_iswblank(c) (__C_classed(c) && __C_isblank(c)) +#define __C_iswdigit(c) (__C_classed(c) && __C_isdigit(c)) +#define __C_iswxdigit(c) (__C_classed(c) && __C_isxdigit(c)) +#define __C_iswcntrl(c) (__C_classed(c) && __C_iscntrl(c)) +#define __C_iswalpha(c) (__C_classed(c) && __C_isalpha(c)) +#define __C_iswalnum(c) (__C_classed(c) && __C_isalnum(c)) +#define __C_iswprint(c) (__C_classed(c) && __C_isprint(c)) +#define __C_iswlower(c) (__C_classed(c) && __C_islower(c)) +#define __C_iswupper(c) (__C_classed(c) && __C_isupper(c)) +#define __C_iswpunct(c) (__C_classed(c) && __C_ispunct(c)) +#define __C_iswgraph(c) (__C_classed(c) && __C_isgraph(c)) +#define __C_towlower(c) \ + ((__C_classed(c) && __C_isupper(c)) ? ((c) | 0x20) : (c)) +#define __C_towupper(c) \ + ((__C_classed(c) && __C_islower(c)) ? ((c) ^ 0x20) : (c)) + +/* Now define some macros to aviod the extra wrapper-function call. */ +#define __iswalnum(c) iswctype(c, _CTYPE_iswalnum) +#define __iswalpha(c) iswctype(c, _CTYPE_iswalpha) +#define __iswblank(c) iswctype(c, _CTYPE_iswblank) +#define __iswcntrl(c) iswctype(c, _CTYPE_iswcntrl) +#define __iswgraph(c) iswctype(c, _CTYPE_iswgraph) +#define __iswlower(c) iswctype(c, _CTYPE_iswlower) +#define __iswprint(c) iswctype(c, _CTYPE_iswprint) +#define __iswpunct(c) iswctype(c, _CTYPE_iswpunct) +#define __iswspace(c) iswctype(c, _CTYPE_iswspace) +#define __iswupper(c) iswctype(c, _CTYPE_iswupper) +#define __iswdigit(c) __C_iswdigit(c) +#define __iswxdigit(c) __C_iswxdigit(c) + +#endif /* _BITS_CTYPE_H */ diff --git a/libc/sysdeps/linux/common/bits/uClibc_locale.h b/libc/sysdeps/linux/common/bits/uClibc_locale.h new file mode 100644 index 000000000..2a35d38ec --- /dev/null +++ b/libc/sysdeps/linux/common/bits/uClibc_locale.h @@ -0,0 +1,261 @@ +/* Copyright (C) 2002 Manuel Novoa III + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! + * + * Besides uClibc, I'm using this code in my libc for elks, which is + * a 16-bit environment with a fairly limited compiler. It would make + * things much easier for me if this file isn't modified unnecessarily. + * In particular, please put any new or replacement functions somewhere + * else, and modify the makefile to use your version instead. + * Thanks. Manuel + * + * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */ + +#ifndef _UCLIBC_LOCALE_H +#define _UCLIBC_LOCALE_H + +/**********************************************************************/ +/* uClibc compatibilty stuff */ + +#ifdef __UCLIBC_HAS_WCHAR__ +#define __WCHAR_ENABLED +#endif + + +#ifdef __UCLIBC_HAS_LOCALE__ + +#undef __LOCALE_C_ONLY + +#else /* __UCLIBC_HAS_LOCALE__ */ + +#define __LOCALE_C_ONLY + +#endif /* __UCLIBC_HAS_LOCALE__ */ + +/**********************************************************************/ + +#define __NL_ITEM_CATEGORY_SHIFT (8) +#define __NL_ITEM_INDEX_MASK (0xff) + +/* TODO: Make sure these agree with the locale mmap file gererator! */ + +#define __LC_CTYPE 0 +#define __LC_NUMERIC 1 +#define __LC_MONETARY 2 +#define __LC_TIME 3 +#define __LC_COLLATE 4 +#define __LC_MESSAGES 5 +#define __LC_ALL 6 + +/**********************************************************************/ +#if defined(_LIBC) && !defined(__LOCALE_C_ONLY) + +#include +#include +#include + +/* TODO: This really needs to be somewhere else... */ +#include +#include + +#if WCHAR_MIN == 0 +typedef wchar_t __uwchar_t; +#elif WCHAR_MAX <= USHRT_MAX +typedef unsigned short __uwchar_t; +#elif WCHAR_MAX <= UINT_MAX +typedef unsigned int __uwchar_t; +#elif WCHAR_MAX <= ULONG_MAX +typedef unsigned long __uwchar_t; +#elif defined(ULLONG_MAX) && (WCHAR_MAX <= ULLONG_MAX) +typedef unsigned long long __uwchar_t; +#elif WCHAR_MAX <= UINT_MAX +typedef uintmax_t __uwchar_t; +#else +#error Can not determine an appropriate type for __uwchar_t! +#endif + + + +extern void _locale_set(const unsigned char *p); +extern void _locale_init(void); + +/* TODO: assumes 8-bit chars!!! */ + +enum { + __ctype_encoding_7_bit, /* C/POSIX */ + __ctype_encoding_utf8, /* UTF-8 */ + __ctype_encoding_8_bit /* for 8-bit codeset locales */ +}; + +#define LOCALE_STRING_SIZE (2 * __LC_ALL + 2) + + /* + * '#' + 2_per_category + '\0' + * {locale row # : 0 = C|POSIX} + 0x8001 + * encoded in two chars as (((N+1) >> 8) | 0x80) and ((N+1) & 0xff) + * so decode is ((((uint16_t)(*s & 0x7f)) << 8) + s[1]) - 1 + * + * Note: 0s are not used as they are nul-terminators for strings. + * Note: 0xff, 0xff is the encoding for a non-selected locale. + * (see setlocale() below). + * In particular, C/POSIX locale is '#' + "\x80\x01"}*LC_ALL + nul. + */ + + +/* static unsigned char cur_locale[LOCALE_STRING_SIZE]; */ + +typedef struct { +/* int tables_loaded; */ +/* unsigned char lctypes[LOCALE_STRING_SIZE]; */ + unsigned char cur_locale[LOCALE_STRING_SIZE]; + + /* NL_LANGINFO stuff. BEWARE ORDERING!!! must agree with NL_* constants! */ + /* Also, numeric must be followed by monetary and the items must be in + * the "struct lconv" order. */ + + uint16_t category_offsets[__LC_ALL]; /* TODO -- fix? */ + unsigned char category_item_count[__LC_ALL]; /* TODO - fix */ + + /* ctype */ + unsigned char encoding; /* C/POSIX, 8-bit, UTF-8 */ + unsigned char mb_cur_max; /* determined by encoding _AND_ translit!!! */ + + const char *codeset; + +#ifdef __CTYPE_HAS_8_BIT_LOCALES + const unsigned char *idx8ctype; + const unsigned char *tbl8ctype; + const unsigned char *idx8uplow; + const unsigned char *tbl8uplow; +#ifdef __WCHAR_ENABLED + const unsigned char *idx8c2wc; + const uint16_t *tbl8c2wc; /* char > 0x7f to wide char */ + const unsigned char *idx8wc2c; + const unsigned char *tbl8wc2c; + /* translit */ +#endif /* __WCHAR_ENABLED */ +#endif /* __CTYPE_HAS_8_BIT_LOCALES */ +#ifdef __WCHAR_ENABLED + const unsigned char *tblwctype; + const unsigned char *tblwuplow; + const unsigned char *tblwcomb; + const int16_t *tblwuplow_diff; /* yes... signed */ + /* width?? */ +#endif /* __WCHAR_ENABLED */ + + /* numeric */ + const char *decimal_point; + const char *thousands_sep; + const char *grouping; + + /* monetary */ + const char *int_curr_symbol; + const char *currency_symbol; + const char *mon_decimal_point; + const char *mon_thousands_sep; + const char *mon_grouping; + const char *positive_sign; + const char *negative_sign; + const char *int_frac_digits; + const char *frac_digits; + const char *p_cs_precedes; + const char *p_sep_by_space; + const char *n_cs_precedes; + const char *n_sep_by_space; + const char *p_sign_posn; + const char *n_sign_posn; + const char *int_p_cs_precedes; + const char *int_p_sep_by_space; + const char *int_n_cs_precedes; + const char *int_n_sep_by_space; + const char *int_p_sign_posn; + const char *int_n_sign_posn; + + const char *crncystr; /* not returned by localeconv */ + + /* time */ + const char *abday_1; + const char *abday_2; + const char *abday_3; + const char *abday_4; + const char *abday_5; + const char *abday_6; + const char *abday_7; + + const char *day_1; + const char *day_2; + const char *day_3; + const char *day_4; + const char *day_5; + const char *day_6; + const char *day_7; + + const char *abmon_1; + const char *abmon_2; + const char *abmon_3; + const char *abmon_4; + const char *abmon_5; + const char *abmon_6; + const char *abmon_7; + const char *abmon_8; + const char *abmon_9; + const char *abmon_10; + const char *abmon_11; + const char *abmon_12; + + const char *mon_1; + const char *mon_2; + const char *mon_3; + const char *mon_4; + const char *mon_5; + const char *mon_6; + const char *mon_7; + const char *mon_8; + const char *mon_9; + const char *mon_10; + const char *mon_11; + const char *mon_12; + + const char *am_str; + const char *pm_str; + + const char *d_t_fmt; + const char *d_fmt; + const char *t_fmt; + const char *t_fmt_ampm; + const char *era; + + const char *era_year; /* non SUSv3 */ + const char *era_d_fmt; + const char *alt_digits; + const char *era_d_t_fmt; + const char *era_t_fmt; + + /* collate */ + + /* messages */ + const char *yesexpr; + const char *noexpr; + +} __locale_t; + +extern __locale_t __global_locale; + +#endif /* defined(_LIBC) && !defined(__LOCALE_C_ONLY) */ + +#endif /* _UCLIBC_LOCALE_H */ -- cgit v1.2.3