From 9c6f2391ed8dd72d9e13db459d149de7bc707567 Mon Sep 17 00:00:00 2001 From: Manuel Novoa III Date: Thu, 28 Aug 2003 17:16:53 +0000 Subject: Create a typedef for the ctype bitmask table entries. Hack a fix for ctype support of 8-bit codeset locales. Note: toupper/tolower mappings do not handle the special cases for the tr_TR and az_AZ locales, since the wide versions currently handle them either. That will be addressed when I rewrite the data generation tools and the libc locale code. --- include/ctype.h | 12 +-- libc/misc/ctype/ctype.c | 36 ++----- libc/misc/locale/locale.c | 124 ++++++++++++++++++++++-- libc/sysdeps/linux/common/bits/uClibc_locale.h | 7 +- libc/sysdeps/linux/common/bits/uClibc_touplow.h | 10 ++ 5 files changed, 148 insertions(+), 41 deletions(-) diff --git a/include/ctype.h b/include/ctype.h index 23ff199e4..361f160f8 100644 --- a/include/ctype.h +++ b/include/ctype.h @@ -32,7 +32,7 @@ __BEGIN_DECLS #ifndef _ISbit /* These are all the characteristics of characters. If there get to be more than 16 distinct characteristics, - many things must be changed that use `__uint16_t's. */ + __ctype_mask_t will need to be adjusted. */ # define _ISbit(bit) (1 << (bit)) @@ -98,13 +98,13 @@ enum */ /* Pointers to the default C-locale data. */ -extern const __uint16_t *__C_ctype_b; +extern const __ctype_mask_t *__C_ctype_b; extern const __ctype_touplow_t *__C_ctype_toupper; extern const __ctype_touplow_t *__C_ctype_tolower; #ifdef __UCLIBC_HAS_XLOCALE__ -extern __const __uint16_t **__ctype_b_loc (void) +extern __const __ctype_mask_t **__ctype_b_loc (void) __attribute__ ((__const)); extern __const __ctype_touplow_t **__ctype_tolower_loc (void) __attribute__ ((__const)); @@ -118,7 +118,7 @@ extern __const __ctype_touplow_t **__ctype_toupper_loc (void) #else /* __UCLIBC_HAS_XLOCALE__ */ /* Pointers to the current global locale data in use. */ -extern const __uint16_t *__ctype_b; +extern const __ctype_mask_t *__ctype_b; extern const __ctype_touplow_t *__ctype_toupper; extern const __ctype_touplow_t *__ctype_tolower; @@ -129,7 +129,7 @@ extern const __ctype_touplow_t *__ctype_tolower; #endif /* __UCLIBC_HAS_XLOCALE__ */ #define __isctype(c, type) \ - ((__UCLIBC_CTYPE_B)[(int) (c)] & (__uint16_t) type) + ((__UCLIBC_CTYPE_B)[(int) (c)] & (__ctype_mask_t) type) #define __isascii(c) (((c) & ~0x7f) == 0) /* If C is a 7 bit value. */ #define __toascii(c) ((c) & 0x7f) /* Mask off high bits. */ @@ -292,7 +292,7 @@ toupper (int __c) __THROW /* These definitions are similar to the ones above but all functions take as an argument a handle for the locale which shall be used. */ # define __isctype_l(c, type, locale) \ - ((locale)->__ctype_b[(int) (c)] & (__uint16_t) type) + ((locale)->__ctype_b[(int) (c)] & (__ctype_mask_t) type) # define __exctype_l(name) \ extern int name (int, __locale_t) __THROW diff --git a/libc/misc/ctype/ctype.c b/libc/misc/ctype/ctype.c index a89e1e75c..13095015d 100644 --- a/libc/misc/ctype/ctype.c +++ b/libc/misc/ctype/ctype.c @@ -378,7 +378,7 @@ int isctype(int c, int mask) #ifdef __UCLIBC_HAS_XLOCALE__ -const uint16_t **__ctype_b_loc(void) +const __ctype_mask_t **__ctype_b_loc(void) { return &(__UCLIBC_CURLOCALE_DATA).__ctype_b; } @@ -415,7 +415,7 @@ const __ctype_touplow_t **__ctype_toupper_loc(void) /**********************************************************************/ #ifdef L___C_ctype_b -const uint16_t __C_ctype_b_data[] = { +const __ctype_mask_t __C_ctype_b_data[] = { #ifdef __UCLIBC_HAS_CTYPE_SIGNED__ /* -128 M-^@ */ 0, /* -127 M-^A */ 0, @@ -804,19 +804,11 @@ const uint16_t __C_ctype_b_data[] = { /* 255 M-^? */ 0 }; -const uint16_t *__C_ctype_b = __C_ctype_b_data + 1 -#ifdef __UCLIBC_HAS_CTYPE_SIGNED__ - + 127 -#endif - ; +const __ctype_mask_t *__C_ctype_b = __C_ctype_b_data + __UCLIBC_CTYPE_B_TBL_OFFSET; #ifndef __UCLIBC_HAS_XLOCALE__ -const uint16_t *__ctype_b = __C_ctype_b_data + 1 -#ifdef __UCLIBC_HAS_CTYPE_SIGNED__ - + 127 -#endif - ; +const __ctype_mask_t *__ctype_b = __C_ctype_b_data + __UCLIBC_CTYPE_B_TBL_OFFSET; #endif @@ -926,18 +918,12 @@ const __ctype_touplow_t __C_ctype_tolower_data[] = { }; const __ctype_touplow_t *__C_ctype_tolower = __C_ctype_tolower_data -#ifdef __UCLIBC_HAS_CTYPE_SIGNED__ - + 128 -#endif - ; + + __UCLIBC_CTYPE_TO_TBL_OFFSET; #ifndef __UCLIBC_HAS_XLOCALE__ const __ctype_touplow_t *__ctype_tolower = __C_ctype_tolower_data -#ifdef __UCLIBC_HAS_CTYPE_SIGNED__ - + 128 -#endif - ; + + __UCLIBC_CTYPE_TO_TBL_OFFSET; #endif @@ -1047,18 +1033,12 @@ const __ctype_touplow_t __C_ctype_toupper_data[] = { }; const __ctype_touplow_t *__C_ctype_toupper = __C_ctype_toupper_data -#ifdef __UCLIBC_HAS_CTYPE_SIGNED__ - + 128 -#endif - ; + + __UCLIBC_CTYPE_TO_TBL_OFFSET; #ifndef __UCLIBC_HAS_XLOCALE__ const __ctype_touplow_t *__ctype_toupper = __C_ctype_toupper_data -#ifdef __UCLIBC_HAS_CTYPE_SIGNED__ - + 128 -#endif - ; + + __UCLIBC_CTYPE_TO_TBL_OFFSET; #endif diff --git a/libc/misc/locale/locale.c b/libc/misc/locale/locale.c index 8f0cbd415..cbed01146 100644 --- a/libc/misc/locale/locale.c +++ b/libc/misc/locale/locale.c @@ -54,7 +54,6 @@ #include #include #include -#warning devel code #include #undef __LOCALE_C_ONLY @@ -639,6 +638,122 @@ int _locale_set_l(const unsigned char *p, __locale_t base) base->idx8wc2c = c8b->idx8wc2c; /* translit */ #endif /* __UCLIBC_HAS_WCHAR__ */ + + /* What follows is fairly bloated, but it is just a hack + * to get the 8-bit codeset ctype stuff functioning. + * All of this will be replaced in the next generation + * of locale support anyway... */ + + memcpy(base->__ctype_b_data, + __C_ctype_b - __UCLIBC_CTYPE_B_TBL_OFFSET, + (256 + __UCLIBC_CTYPE_B_TBL_OFFSET) + * sizeof(__ctype_mask_t)); + memcpy(base->__ctype_tolower_data, + __C_ctype_tolower - __UCLIBC_CTYPE_TO_TBL_OFFSET, + (256 + __UCLIBC_CTYPE_TO_TBL_OFFSET) + * sizeof(__ctype_touplow_t)); + memcpy(base->__ctype_toupper_data, + __C_ctype_toupper - __UCLIBC_CTYPE_TO_TBL_OFFSET, + (256 + __UCLIBC_CTYPE_TO_TBL_OFFSET) + * sizeof(__ctype_touplow_t)); + +#define Cctype_TBL_MASK ((1 << __LOCALE_DATA_Cctype_IDX_SHIFT) - 1) +#define Cctype_IDX_OFFSET (128 >> __LOCALE_DATA_Cctype_IDX_SHIFT) + + { + int u; + __ctype_mask_t m; + + for (u=0 ; u < 128 ; u++) { +#ifdef __LOCALE_DATA_Cctype_PACKED + c = base->tbl8ctype + [ ((int)(c8b->idx8ctype + [(u >> __LOCALE_DATA_Cctype_IDX_SHIFT) ]) + << (__LOCALE_DATA_Cctype_IDX_SHIFT - 1)) + + ((u & Cctype_TBL_MASK) >> 1)]; + c = (u & 1) ? (c >> 4) : (c & 0xf); +#else + c = base->tbl8ctype + [ ((int)(c8b->idx8ctype + [(u >> __LOCALE_DATA_Cctype_IDX_SHIFT) ]) + << __LOCALE_DATA_Cctype_IDX_SHIFT) + + (u & Cctype_TBL_MASK) ]; +#endif + + m = base->code2flag[c]; + + base->__ctype_b_data + [128 + __UCLIBC_CTYPE_B_TBL_OFFSET + u] + = m; + +#ifdef __UCLIBC_HAS_CTYPE_SIGNED__ + if (((signed char)(128 + u)) != -1) { + base->__ctype_b_data[__UCLIBC_CTYPE_B_TBL_OFFSET + + ((signed char)(128 + u))] + = m; + } +#endif + + base->__ctype_tolower_data + [128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u] + = 128 + u; + base->__ctype_toupper_data + [128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u] + = 128 + u; + + if (m & (_ISlower|_ISupper)) { + c = base->tbl8uplow + [ ((int)(c8b->idx8uplow + [u >> __LOCALE_DATA_Cuplow_IDX_SHIFT]) + << __LOCALE_DATA_Cuplow_IDX_SHIFT) + + ((128 + u) + & ((1 << __LOCALE_DATA_Cuplow_IDX_SHIFT) + - 1)) ]; + if (m & _ISlower) { + base->__ctype_toupper_data + [128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u] + = (unsigned char)(128 + u + c); +#ifdef __UCLIBC_HAS_CTYPE_SIGNED__ + if (((signed char)(128 + u)) != -1) { + base->__ctype_toupper_data + [__UCLIBC_CTYPE_TO_TBL_OFFSET + + ((signed char)(128 + u))] + = (unsigned char)(128 + u + c); + } +#endif + } else { + base->__ctype_tolower_data + [128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u] + = (unsigned char)(128 + u - c); +#ifdef __UCLIBC_HAS_CTYPE_SIGNED__ + if (((signed char)(128 + u)) != -1) { + base->__ctype_tolower_data + [__UCLIBC_CTYPE_TO_TBL_OFFSET + + ((signed char)(128 + u))] + = (unsigned char)(128 + u - c); + } +#endif + } + } + } + } + +#ifdef __UCLIBC_HAS_XLOCALE__ + base->__ctype_b = base->__ctype_b_data + + __UCLIBC_CTYPE_B_TBL_OFFSET; + base->__ctype_tolower = base->__ctype_tolower_data + + __UCLIBC_CTYPE_TO_TBL_OFFSET; + base->__ctype_toupper = base->__ctype_toupper_data + + __UCLIBC_CTYPE_TO_TBL_OFFSET; +#else /* __UCLIBC_HAS_XLOCALE__ */ + __ctype_b = base->__ctype_b_data + + __UCLIBC_CTYPE_B_TBL_OFFSET; + __ctype_tolower = base->__ctype_tolower_data + + __UCLIBC_CTYPE_TO_TBL_OFFSET; + __ctype_toupper = base->__ctype_toupper_data + + __UCLIBC_CTYPE_TO_TBL_OFFSET; +#endif /* __UCLIBC_HAS_XLOCALE__ */ + #endif /* __CTYPE_HAS_8_BIT_LOCALES */ } #ifdef __UCLIBC_MJN3_ONLY__ @@ -741,11 +856,8 @@ void _locale_init_l(__locale_t base) /* width?? */ #endif /* __UCLIBC_HAS_WCHAR__ */ - - -#ifdef __UCLIBC_MJN3_ONLY__ -#warning wrong for now, but always set ctype arrays to global C version -#endif + /* Initially, set things up to use the global C ctype tables. + * This is correct for C (ASCII) and UTF-8 based locales (except tr_TR). */ #ifdef __UCLIBC_HAS_XLOCALE__ base->__ctype_b = __C_ctype_b; base->__ctype_tolower = __C_ctype_tolower; diff --git a/libc/sysdeps/linux/common/bits/uClibc_locale.h b/libc/sysdeps/linux/common/bits/uClibc_locale.h index bf642b511..88226c638 100644 --- a/libc/sysdeps/linux/common/bits/uClibc_locale.h +++ b/libc/sysdeps/linux/common/bits/uClibc_locale.h @@ -141,11 +141,16 @@ typedef struct { typedef struct { #ifdef __UCLIBC_HAS_XLOCALE__ - const __uint16_t *__ctype_b; + const __ctype_mask_t *__ctype_b; const __ctype_touplow_t *__ctype_tolower; const __ctype_touplow_t *__ctype_toupper; #endif + /* For now, just embed this in the structure. */ + __ctype_mask_t __ctype_b_data[256 + __UCLIBC_CTYPE_B_TBL_OFFSET]; + __ctype_touplow_t __ctype_tolower_data[256 + __UCLIBC_CTYPE_TO_TBL_OFFSET]; + __ctype_touplow_t __ctype_toupper_data[256 + __UCLIBC_CTYPE_TO_TBL_OFFSET]; + /* int tables_loaded; */ /* unsigned char lctypes[LOCALE_STRING_SIZE]; */ unsigned char cur_locale[LOCALE_STRING_SIZE]; diff --git a/libc/sysdeps/linux/common/bits/uClibc_touplow.h b/libc/sysdeps/linux/common/bits/uClibc_touplow.h index 75d508546..d79d3cde7 100644 --- a/libc/sysdeps/linux/common/bits/uClibc_touplow.h +++ b/libc/sysdeps/linux/common/bits/uClibc_touplow.h @@ -34,10 +34,20 @@ /* glibc uses the equivalent of - typedef __int32_t __ctype_touplow_t; */ +typedef __uint16_t __ctype_mask_t; + #ifdef __UCLIBC_HAS_CTYPE_SIGNED__ + typedef __int16_t __ctype_touplow_t; +#define __UCLIBC_CTYPE_B_TBL_OFFSET 128 +#define __UCLIBC_CTYPE_TO_TBL_OFFSET 128 + #else /* __UCLIBC_HAS_CTYPE_SIGNED__ */ + typedef unsigned char __ctype_touplow_t; +#define __UCLIBC_CTYPE_B_TBL_OFFSET 1 +#define __UCLIBC_CTYPE_TO_TBL_OFFSET 0 + #endif /* __UCLIBC_HAS_CTYPE_SIGNED__ */ #endif /* _UCLIBC_TOUPLOW_H */ -- cgit v1.2.3