summaryrefslogtreecommitdiff
path: root/extra/locale/gen_wctype.c
diff options
context:
space:
mode:
authorManuel Novoa III <mjn3@codepoet.org>2002-05-06 07:37:32 +0000
committerManuel Novoa III <mjn3@codepoet.org>2002-05-06 07:37:32 +0000
commitd07fdf8b9ece2c4339b325921add50792077bf97 (patch)
treeb0886656bdd854728f2d1c05597368c4739ecc1b /extra/locale/gen_wctype.c
parent7f09a14cabbec158d683542e53f53ccfe75031fa (diff)
New locale support (in development). Supports LC_CTYPE, LC_NUMERIC,
LC_TIME, LC_MONETARY, and LC_MESSAGES for the SUSv3 items. Also, nl_langinfo() when real locale support is enabled. New implementation of ctype.h. New implementation of wctype.h. New implementation of most of the string functions (smaller). New implementation of the wcs/wmem functions. These are untested, but they're also just preprocessor-modified versions ot the corresponding str/mem functions. Tweaked qsort and new bsearch. Stuff still pending: stdlib.h and wchar.h mb<->wc functions. I actually have working versions of the stdlib ones, but the reentrant versions from wchar.h require some reworking. Basic replacement and translit support for wc->mb conversions. (groundwork laid). Simple-minded collate support such as was provided by the previous locale implementation. (mostly done -- 8-bit codesets only) Shared mmaping of the locale data and strerror message text.
Diffstat (limited to 'extra/locale/gen_wctype.c')
-rw-r--r--extra/locale/gen_wctype.c833
1 files changed, 833 insertions, 0 deletions
diff --git a/extra/locale/gen_wctype.c b/extra/locale/gen_wctype.c
new file mode 100644
index 000000000..9508a3bbf
--- /dev/null
+++ b/extra/locale/gen_wctype.c
@@ -0,0 +1,833 @@
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+#include <wctype.h>
+#include <limits.h>
+#include <stdint.h>
+#include <wchar.h>
+#include <ctype.h>
+
+
+/* 0x9 : space blank */
+/* 0xa : space */
+/* 0xb : space */
+/* 0xc : space */
+/* 0xd : space */
+/* 0x20 : space blank */
+/* 0x1680 : space blank */
+/* 0x2000 : space blank */
+/* 0x2001 : space blank */
+/* 0x2002 : space blank */
+/* 0x2003 : space blank */
+/* 0x2004 : space blank */
+/* 0x2005 : space blank */
+/* 0x2006 : space blank */
+/* 0x2008 : space blank */
+/* 0x2009 : space blank */
+/* 0x200a : space blank */
+/* 0x200b : space blank */
+/* 0x2028 : space */
+/* 0x2029 : space */
+/* 0x3000 : space blank */
+
+/* typecount[ 0] = 88670 C_alpha_nonupper_nonlower */
+/* typecount[ 1] = 742 C_alpha_lower */
+/* typecount[ 2] = 4 C_alpha_upper_lower */
+/* typecount[ 3] = 731 C_alpha_upper */
+/* typecount[ 4] = 10 C_digit */
+/* typecount[ 5] = 10270 C_punct */
+/* typecount[ 6] = 0 C_graph */
+/* typecount[ 7] = 0 C_print_space_nonblank */
+/* typecount[ 8] = 14 C_print_space_blank */
+/* typecount[ 9] = 0 C_space_nonblank_noncntrl */
+/* typecount[10] = 0 C_space_blank_noncntrl */
+/* typecount[11] = 6 C_cntrl_space_nonblank */
+/* typecount[12] = 1 C_cntrl_space_blank */
+/* typecount[13] = 60 C_cntrl_nonspace */
+/* typecount[14] = 96100 C_unclassified */
+/* typecount[15] = 0 empty_slot */
+
+
+
+/* Set to #if 0 to restrict wchars to 16 bits. */
+#if 1
+#define RANGE 0x2ffffUL
+#elif 0
+#define RANGE 0x1ffffUL
+#else
+#define RANGE 0xffffUL /* Restrict for 16-bit wchar_t... */
+#endif
+
+/* Classification codes. */
+
+static const char *typename[] = {
+ "C_unclassified",
+ "C_alpha_nonupper_nonlower",
+ "C_alpha_lower",
+ "C_alpha_upper_lower",
+ "C_alpha_upper",
+ "C_digit",
+ "C_punct",
+ "C_graph",
+ "C_print_space_nonblank",
+ "C_print_space_blank",
+ "C_space_nonblank_noncntrl",
+ "C_space_blank_noncntrl",
+ "C_cntrl_space_nonblank",
+ "C_cntrl_space_blank",
+ "C_cntrl_nonspace",
+ "empty_slot"
+};
+
+/* Taking advantage of the C99 mutual-exclusion guarantees for the various
+ * (w)ctype classes, including the descriptions of printing and control
+ * (w)chars, we can place each in one of the following mutually-exlusive
+ * subsets. Since there are less than 16, we can store the data for
+ * each (w)chars in a nibble. In contrast, glibc uses an unsigned int
+ * per (w)char, with one bit flag for each is* type. While this allows
+ * a simple '&' operation to determine the type vs. a range test and a
+ * little special handling for the "blank" and "xdigit" types in my
+ * approach, it also uses 8 times the space for the tables on the typical
+ * 32-bit archs we supported.*/
+enum {
+ __CTYPE_unclassified = 0,
+ __CTYPE_alpha_nonupper_nonlower,
+ __CTYPE_alpha_lower,
+ __CTYPE_alpha_upper_lower,
+ __CTYPE_alpha_upper,
+ __CTYPE_digit,
+ __CTYPE_punct,
+ __CTYPE_graph,
+ __CTYPE_print_space_nonblank,
+ __CTYPE_print_space_blank,
+ __CTYPE_space_nonblank_noncntrl,
+ __CTYPE_space_blank_noncntrl,
+ __CTYPE_cntrl_space_nonblank,
+ __CTYPE_cntrl_space_blank,
+ __CTYPE_cntrl_nonspace,
+};
+
+/* Some macros that test for various (w)ctype classes when passed one of the
+ * designator values enumerated above. */
+#define __CTYPE_isalnum(D) ((unsigned int)(D-1) <= (__CTYPE_digit-1))
+#define __CTYPE_isalpha(D) ((unsigned int)(D-1) <= (__CTYPE_alpha_upper-1))
+#define __CTYPE_isblank(D) \
+ ((((unsigned int)(D - __CTYPE_print_space_nonblank)) <= 5) && (D & 1))
+#define __CTYPE_iscntrl(D) (((unsigned int)(D - __CTYPE_cntrl_space_nonblank)) <= 2)
+#define __CTYPE_isdigit(D) (D == __CTYPE_digit)
+#define __CTYPE_isgraph(D) ((unsigned int)(D-1) <= (__CTYPE_graph-1))
+#define __CTYPE_islower(D) (((unsigned int)(D - __CTYPE_alpha_lower)) <= 1)
+#define __CTYPE_isprint(D) ((unsigned int)(D-1) <= (__CTYPE_print_space_blank-1))
+#define __CTYPE_ispunct(D) (D == __CTYPE_punct)
+#define __CTYPE_isspace(D) (((unsigned int)(D - __CTYPE_print_space_nonblank)) <= 5)
+#define __CTYPE_isupper(D) (((unsigned int)(D - __CTYPE_alpha_upper_lower)) <= 1)
+#define __CTYPE_isxdigit(D,X) \
+ (__CTYPE_isdigit(D) || (((unsigned int)(((X)|0x20) - 'a')) <= 5))
+
+#define mywalnum(x) __CTYPE_isalnum(d)
+#define mywalpha(x) __CTYPE_isalpha(d)
+#define mywblank(x) __CTYPE_isblank(d)
+#define mywcntrl(x) __CTYPE_iscntrl(d)
+#define mywdigit(x) __CTYPE_isdigit(d)
+#define mywgraph(x) __CTYPE_isgraph(d)
+#define mywlower(x) __CTYPE_islower(d)
+#define mywprint(x) __CTYPE_isprint(d)
+#define mywpunct(x) __CTYPE_ispunct(d)
+#define mywspace(x) __CTYPE_isspace(d)
+#define mywupper(x) __CTYPE_isupper(d)
+#define mywxdigit(x) __CTYPE_isxdigit(d,x)
+
+typedef struct {
+ short l;
+ short u;
+} uldiff_entry;
+
+typedef struct {
+ uint16_t ii_len;
+ uint16_t ti_len;
+ uint16_t ut_len;
+
+ unsigned char ii_shift;
+ unsigned char ti_shift;
+
+ unsigned char *ii;
+ unsigned char *ti;
+ unsigned char *ut;
+} table_data;
+
+
+void output_table(FILE *fp, const char *name, table_data *tbl)
+{
+ size_t i;
+
+ fprintf(fp, "#define WC%s_II_LEN %7u\n", name, tbl->ii_len);
+ fprintf(fp, "#define WC%s_TI_LEN %7u\n", name, tbl->ti_len);
+ fprintf(fp, "#define WC%s_UT_LEN %7u\n", name, tbl->ut_len);
+
+ fprintf(fp, "#define WC%s_II_SHIFT %7u\n", name, tbl->ii_shift);
+ fprintf(fp, "#define WC%s_TI_SHIFT %7u\n", name, tbl->ti_shift);
+
+ fprintf(fp, "\n#ifdef WANT_WC%s_data\n", name);
+
+ i = tbl->ii_len + tbl->ti_len + tbl->ut_len;
+ fprintf(fp, "\nstatic const unsigned char WC%s_data[%zu] = {", name, i);
+ for (i=0 ; i < tbl->ii_len ; i++) {
+ if (i % 12 == 0) {
+ fprintf(fp, "\n");
+ }
+ fprintf(fp, " %#04x,", tbl->ii[i]);
+ }
+ for (i=0 ; i < tbl->ti_len ; i++) {
+ if (i % 12 == 0) {
+ fprintf(fp, "\n");
+ }
+ fprintf(fp, " %#04x,", tbl->ti[i]);
+ }
+ for (i=0 ; i < tbl->ut_len ; i++) {
+ if (i % 12 == 0) {
+ fprintf(fp, "\n");
+ }
+ fprintf(fp, " %#04x,", tbl->ut[i]);
+ }
+ fprintf(fp, "\n};\n\n");
+
+ fprintf(fp, "#endif /* WANT_WC%s_data */\n\n", name);
+}
+
+static void dump_table_data(table_data *tbl)
+{
+ printf("ii_shift = %d ti_shift = %d\n"
+ "ii_len = %d ti_len = %d ut_len = %d\n"
+ "total = %d\n",
+ tbl->ii_shift, tbl->ti_shift,
+ tbl->ii_len, tbl->ti_len, tbl->ut_len,
+ (int) tbl->ii_len + (int) tbl->ti_len + (int) tbl->ut_len);
+}
+
+/* For sorting the blocks of unsigned chars. */
+static size_t nu_val;
+
+int nu_memcmp(const void *a, const void *b)
+{
+ return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val);
+}
+
+static size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl);
+
+#define MAXTO 255 /* Restrict to minimal unsigned char max. */
+
+int main(int argc, char **argv)
+{
+ long int u, l, tt;
+ size_t smallest, t;
+ unsigned int c;
+ unsigned int d;
+ int i, n;
+ int ul_count = 0;
+ uldiff_entry uldiff[MAXTO];
+ table_data cttable;
+ table_data ultable;
+ table_data combtable;
+ table_data widthtable;
+
+ unsigned char wct[(RANGE/2)+1]; /* wctype table (nibble per wchar) */
+ unsigned char ult[RANGE+1]; /* upper/lower table */
+ unsigned char combt[(RANGE/4)+1]; /* combining */
+ unsigned char widtht[(RANGE/4)+1]; /* width */
+ wctrans_t totitle;
+ wctype_t is_comb, is_comb3;
+
+ long int typecount[16];
+ int built = 0;
+
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ while (--argc) {
+ if (!setlocale(LC_CTYPE, *++argv)) {
+ printf("setlocale(LC_CTYPE,%s) failed!\n", *argv);
+ continue;
+ }
+
+ if (!(totitle = wctrans("totitle"))) {
+ printf("no totitle transformation.\n");
+ }
+ if (!(is_comb = wctype("combining"))) {
+ printf("no combining wctype.\n");
+ }
+ if (!(is_comb3 = wctype("combining_level3"))) {
+ printf("no combining_level3 wctype.\n");
+ }
+
+ if (!built) {
+ built = 1;
+ ul_count = 1;
+ uldiff[0].u = uldiff[0].l = 0;
+
+ memset(wct, 0, sizeof(wct));
+ memset(combt, 0, sizeof(combt));
+ memset(widtht, 0, sizeof(widtht));
+
+ for (i = 0 ; i < 16 ; i++) {
+ typecount[i] = 0;
+ }
+
+ for (c=0 ; c <= RANGE ; c++) {
+ if (iswdigit(c)) {
+ d = __CTYPE_digit;
+ } else if (iswalpha(c)) {
+ d = __CTYPE_alpha_nonupper_nonlower;
+ if (iswlower(c)) {
+ d = __CTYPE_alpha_lower;
+ if (iswupper(c)) {
+ d = __CTYPE_alpha_upper_lower;
+ }
+ } else if (iswupper(c)) {
+ d = __CTYPE_alpha_upper;
+ }
+ } else if (iswpunct(c)) {
+ d = __CTYPE_punct;
+ } else if (iswgraph(c)) {
+ d = __CTYPE_graph;
+ } else if (iswprint(c)) {
+ d = __CTYPE_print_space_nonblank;
+ if (iswblank(c)) {
+ d = __CTYPE_print_space_blank;
+ }
+ } else if (iswspace(c) && !iswcntrl(c)) {
+ d = __CTYPE_space_nonblank_noncntrl;
+ if (iswblank(c)) {
+ d = __CTYPE_space_blank_noncntrl;
+ }
+ } else if (iswcntrl(c)) {
+ d = __CTYPE_cntrl_nonspace;
+ if (iswspace(c)) {
+ d = __CTYPE_cntrl_space_nonblank;
+ if (iswblank(c)) {
+ d = __CTYPE_cntrl_space_blank;
+ }
+ }
+ } else {
+ d = __CTYPE_unclassified;
+ }
+
+ ++typecount[d];
+
+#if 0
+ if (iswspace(c)) {
+ if (iswblank(c)) {
+ printf("%#8x : space blank\n", c);
+ } else {
+ printf("%#8x : space\n", c);
+ }
+ }
+#endif
+
+#if 0
+ if (c < 256) {
+ unsigned int glibc;
+
+ glibc = 0;
+ if (isalnum(c)) ++glibc; glibc <<= 1;
+ if (isalpha(c)) ++glibc; glibc <<= 1;
+ if (isblank(c)) ++glibc; glibc <<= 1;
+ if (iscntrl(c)) ++glibc; glibc <<= 1;
+ if (isdigit(c)) ++glibc; glibc <<= 1;
+ if (isgraph(c)) ++glibc; glibc <<= 1;
+ if (islower(c)) ++glibc; glibc <<= 1;
+ if (isprint(c)) ++glibc; glibc <<= 1;
+ if (ispunct(c)) ++glibc; glibc <<= 1;
+ if (isspace(c)) ++glibc; glibc <<= 1;
+ if (isupper(c)) ++glibc; glibc <<= 1;
+ if (isxdigit(c)) ++glibc;
+ printf("%#8x : ctype %#4x\n", c, glibc);
+ }
+#endif
+
+#if 1
+ /* Paranoid checking... */
+ {
+ unsigned int glibc;
+ unsigned int mine;
+
+ glibc = 0;
+ if (iswalnum(c)) ++glibc; glibc <<= 1;
+ if (iswalpha(c)) ++glibc; glibc <<= 1;
+ if (iswblank(c)) ++glibc; glibc <<= 1;
+ if (iswcntrl(c)) ++glibc; glibc <<= 1;
+ if (iswdigit(c)) ++glibc; glibc <<= 1;
+ if (iswgraph(c)) ++glibc; glibc <<= 1;
+ if (iswlower(c)) ++glibc; glibc <<= 1;
+ if (iswprint(c)) ++glibc; glibc <<= 1;
+ if (iswpunct(c)) ++glibc; glibc <<= 1;
+ if (iswspace(c)) ++glibc; glibc <<= 1;
+ if (iswupper(c)) ++glibc; glibc <<= 1;
+ if (iswxdigit(c)) ++glibc;
+
+ mine = 0;
+ if (mywalnum(c)) ++mine; mine <<= 1;
+ if (mywalpha(c)) ++mine; mine <<= 1;
+ if (mywblank(c)) ++mine; mine <<= 1;
+ if (mywcntrl(c)) ++mine; mine <<= 1;
+ if (mywdigit(c)) ++mine; mine <<= 1;
+ if (mywgraph(c)) ++mine; mine <<= 1;
+ if (mywlower(c)) ++mine; mine <<= 1;
+ if (mywprint(c)) ++mine; mine <<= 1;
+ if (mywpunct(c)) ++mine; mine <<= 1;
+ if (mywspace(c)) ++mine; mine <<= 1;
+ if (mywupper(c)) ++mine; mine <<= 1;
+ if (mywxdigit(c)) ++mine;
+
+ if (glibc != mine) {
+ printf("%#8x : glibc %#4x != %#4x mine %u\n", c, glibc, mine, d);
+ return EXIT_FAILURE;
+ }
+
+#if 0
+ if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
+/* if (!iswpunct(c)) { */
+ printf("%#8x : %d %d %#4x\n",
+ c, iswctype(c,is_comb),iswctype(c,is_comb3), glibc);
+/* } */
+ }
+#endif
+
+ }
+#endif
+
+ combt[c/4] |= ((((!!iswctype(c,is_comb)) << 1) | !!iswctype(c,is_comb3))
+ << ((c & 3) << 1));
+/* comb3t[c/8] |= ((!!iswctype(c,is_comb3)) << (c & 7)); */
+ widtht[c/4] |= (wcwidth(c) << ((c & 3) << 1));
+
+ if (c & 1) { /* Use the high nibble for odd numbered wchars. */
+ d <<= 4;
+ }
+ wct[c/2] |= d;
+
+ l = towlower(c) - c;
+ u = towupper(c) - c;
+ ult[c] = 0;
+ if (l || u) {
+ if ((l != (short)l) || (u != (short)u)) {
+ printf("range assumption error! %x %ld %ld\n", c, l, u);
+ return EXIT_FAILURE;
+ }
+ for (i=0 ; i < ul_count ; i++) {
+ if ((l == uldiff[i].l) && (u == uldiff[i].u)) {
+ goto found;
+ }
+ }
+ uldiff[ul_count].l = l;
+ uldiff[ul_count].u = u;
+ ++ul_count;
+ if (ul_count > MAXTO) {
+ printf("too many touppers/tolowers!\n");
+ return EXIT_FAILURE;
+ }
+ found:
+ ult[c] = i;
+ }
+ }
+
+ for (i = 0 ; i < 16 ; i++) {
+ printf("typecount[%2d] = %8ld %s\n", i, typecount[i], typename[i]);
+ }
+
+ printf("optimizing is* table..\n");
+ n = -1;
+ smallest = SIZE_MAX;
+ cttable.ii = NULL;
+ for (i=0 ; i < 14 ; i++) {
+ t = newopt(wct, (RANGE/2)+1, i, &cttable);
+ if (smallest >= t) {
+ n = i;
+ smallest = t;
+/* } else { */
+/* break; */
+ }
+ }
+ printf("smallest = %zu\n", smallest);
+ if (!(cttable.ii = malloc(smallest))) {
+ printf("couldn't allocate space!\n");
+ return EXIT_FAILURE;
+ }
+ smallest = SIZE_MAX;
+ newopt(wct, (RANGE/2)+1, n, &cttable);
+ ++cttable.ti_shift; /* correct for nibble mode */
+
+
+
+ printf("optimizing u/l-to table..\n");
+ smallest = SIZE_MAX;
+ ultable.ii = NULL;
+ for (i=0 ; i < 14 ; i++) {
+ t = newopt(ult, RANGE+1, i, &ultable);
+ if (smallest >= t) {
+ n = i;
+ smallest = t;
+/* } else { */
+/* break; */
+ }
+ }
+ printf("%zu (smallest) + %zu (u/l diffs) = %zu\n",
+ smallest, 4 * ul_count, smallest + 4 * ul_count);
+ printf("smallest = %zu\n", smallest);
+ if (!(ultable.ii = malloc(smallest))) {
+ printf("couldn't allocate space!\n");
+ return EXIT_FAILURE;
+ }
+ smallest = SIZE_MAX;
+ newopt(ult, RANGE+1, n, &ultable);
+
+
+ printf("optimizing comb table..\n");
+ smallest = SIZE_MAX;
+ combtable.ii = NULL;
+ for (i=0 ; i < 14 ; i++) {
+ t = newopt(combt, sizeof(combt), i, &combtable);
+ if (smallest >= t) {
+ n = i;
+ smallest = t;
+/* } else { */
+/* break; */
+ }
+ }
+ printf("smallest = %zu\n", smallest);
+ if (!(combtable.ii = malloc(smallest))) {
+ printf("couldn't allocate space!\n");
+ return EXIT_FAILURE;
+ }
+ smallest = SIZE_MAX;
+ newopt(combt, sizeof(combt), n, &combtable);
+ combtable.ti_shift += 4; /* correct for 4 entries per */
+
+
+ printf("optimizing width table..\n");
+ smallest = SIZE_MAX;
+ widthtable.ii = NULL;
+ for (i=0 ; i < 14 ; i++) {
+ t = newopt(widtht, sizeof(widtht), i, &widthtable);
+ if (smallest >= t) {
+ n = i;
+ smallest = t;
+/* } else { */
+/* break; */
+ }
+ }
+ printf("smallest = %zu\n", smallest);
+ if (!(widthtable.ii = malloc(smallest))) {
+ printf("couldn't allocate space!\n");
+ return EXIT_FAILURE;
+ }
+ smallest = SIZE_MAX;
+ newopt(widtht, sizeof(widtht), n, &widthtable);
+ widthtable.ti_shift += 4; /* correct for 4 entries per */
+
+
+#if 0
+ printf("optimizing comb3 table..\n");
+ smallest = SIZE_MAX;
+ comb3table.ii = NULL;
+ for (i=0 ; i < 14 ; i++) {
+ t = newopt(comb3t, sizeof(comb3t), i, &comb3table);
+ if (smallest >= t) {
+ n = i;
+ smallest = t;
+/* } else { */
+/* break; */
+ }
+ }
+ printf("smallest = %zu\n", smallest);
+ if (!(comb3table.ii = malloc(smallest))) {
+ printf("couldn't allocate space!\n");
+ return EXIT_FAILURE;
+ }
+ smallest = SIZE_MAX;
+ newopt(comb3t, sizeof(comb3t), n, &comb3table);
+ comb3table.ti_shift += 8; /* correct for 4 entries per */
+#endif
+
+ dump_table_data(&cttable);
+ dump_table_data(&ultable);
+ dump_table_data(&combtable);
+ dump_table_data(&widthtable);
+ }
+
+ printf("verifying for %s...\n", *argv);
+#if RANGE == 0xffffU
+ for (c=0 ; c <= 0xffffUL ; c++)
+#else
+ for (c=0 ; c <= 0x10ffffUL ; c++)
+#endif
+ {
+ unsigned int glibc;
+ unsigned int mine;
+ unsigned int upper, lower;
+
+#if 0
+#if RANGE < 0x10000UL
+ if (c == 0x10000UL) {
+ c = 0x30000UL; /* skip 1st and 2nd sup planes */
+ }
+#elif RANGE < 0x20000UL
+ if (c == 0x20000UL) {
+ c = 0x30000UL; /* skip 2nd sup planes */
+ }
+#endif
+#endif
+
+ glibc = 0;
+ if (iswalnum(c)) ++glibc; glibc <<= 1;
+ if (iswalpha(c)) ++glibc; glibc <<= 1;
+ if (iswblank(c)) ++glibc; glibc <<= 1;
+ if (iswcntrl(c)) ++glibc; glibc <<= 1;
+ if (iswdigit(c)) ++glibc; glibc <<= 1;
+ if (iswgraph(c)) ++glibc; glibc <<= 1;
+ if (iswlower(c)) ++glibc; glibc <<= 1;
+ if (iswprint(c)) ++glibc; glibc <<= 1;
+ if (iswpunct(c)) ++glibc; glibc <<= 1;
+ if (iswspace(c)) ++glibc; glibc <<= 1;
+ if (iswupper(c)) ++glibc; glibc <<= 1;
+ if (iswxdigit(c)) ++glibc;
+
+ {
+ unsigned int u;
+ int n, sc;
+ int i0, i1;
+
+ u = c;
+ if (u <= RANGE) {
+ sc = u & ((1 << cttable.ti_shift) - 1);
+ u >>= cttable.ti_shift;
+ n = u & ((1 << cttable.ii_shift) - 1);
+ u >>= cttable.ii_shift;
+
+ i0 = cttable.ii[u];
+ i0 <<= cttable.ii_shift;
+ i1 = cttable.ti[i0 + n];
+ i1 <<= (cttable.ti_shift-1);
+ d = cttable.ut[i1 + (sc >> 1)];
+
+ if (sc & 1) {
+ d >>= 4;
+ }
+ d &= 0x0f;
+ } else if ((((unsigned int)(c - 0xe0020UL)) <= 0x5f) || (c == 0xe0001UL)){
+ d = __CTYPE_punct;
+ } else if (((unsigned int)(c - 0xf0000UL)) < 0x20000UL) {
+ if ((c & 0xffffU) <= 0xfffdU) {
+ d = __CTYPE_punct;
+ } else {
+ d = __CTYPE_unclassified;
+ }
+ } else {
+ d = __CTYPE_unclassified;
+ }
+
+ mine = 0;
+ if (mywalnum(c)) ++mine; mine <<= 1;
+ if (mywalpha(c)) ++mine; mine <<= 1;
+ if (mywblank(c)) ++mine; mine <<= 1;
+ if (mywcntrl(c)) ++mine; mine <<= 1;
+ if (mywdigit(c)) ++mine; mine <<= 1;
+ if (mywgraph(c)) ++mine; mine <<= 1;
+ if (mywlower(c)) ++mine; mine <<= 1;
+ if (mywprint(c)) ++mine; mine <<= 1;
+ if (mywpunct(c)) ++mine; mine <<= 1;
+ if (mywspace(c)) ++mine; mine <<= 1;
+ if (mywupper(c)) ++mine; mine <<= 1;
+ if (mywxdigit(c)) ++mine;
+
+ if (glibc != mine) {
+ printf("%#8x : glibc %#4x != %#4x mine %d\n", c, glibc, mine, d);
+ if (c < 0x30000UL) {
+ printf("sc=%#x u=%#x n=%#x i0=%#x i1=%#x\n", sc, u, n, i0, i1);
+ }
+ }
+ upper = lower = u = c;
+ if (u <= RANGE) {
+ sc = u & ((1 << ultable.ti_shift) - 1);
+ u >>= ultable.ti_shift;
+ n = u & ((1 << ultable.ii_shift) - 1);
+ u >>= ultable.ii_shift;
+
+ i0 = ultable.ii[u];
+ i0 <<= ultable.ii_shift;
+ i1 = ultable.ti[i0 + n];
+ i1 <<= (ultable.ti_shift);
+ i1 += sc;
+ i0 = ultable.ut[i1];
+ upper = c + uldiff[i0].u;
+ lower = c + uldiff[i0].l;
+ }
+
+ if (towupper(c) != upper) {
+ printf("%#8x : towupper glibc %#4x != %#4x mine\n",
+ c, towupper(c), upper);
+ }
+
+ if (towlower(c) != lower) {
+ printf("%#8x : towlower glibc %#4x != %#4x mine i0 = %d\n",
+ c, towlower(c), lower, i0);
+ }
+
+ if (totitle && ((tt = towctrans(c, totitle)) != upper)) {
+ printf("%#8x : totitle glibc %#4lx != %#4x mine i0 = %d\n",
+ c, tt, upper, i0);
+ }
+ }
+
+
+ if ((c & 0xfff) == 0xfff) printf(".");
+ }
+ printf("done\n");
+ }
+
+ if (1) {
+ FILE *fp;
+
+ if (!(fp = fopen("wctables.h", "w"))) {
+ printf("couldn't open wctables.h!\n");
+ return EXIT_FAILURE;
+ }
+
+ fprintf(fp, "#define WC_TABLE_DOMAIN_MAX %#8lx\n\n",
+ (unsigned long) RANGE);
+ output_table(fp, "ctype", &cttable);
+ output_table(fp, "uplow", &ultable);
+
+
+#warning fix the upper bound on the upper/lower tables... save 200 bytes or so
+ fprintf(fp, "#define WCuplow_diffs %7u\n", ul_count);
+ fprintf(fp, "\n#ifdef WANT_WCuplow_diff_data\n\n");
+ fprintf(fp, "\nstatic const short WCuplow_diff_data[%zu] = {",
+ 2 * (size_t) ul_count);
+ for (i=0 ; i < ul_count ; i++) {
+ if (i % 4 == 0) {
+ fprintf(fp, "\n");
+ }
+ fprintf(fp, " %6d, %6d,", uldiff[i].u, uldiff[i].l);
+ }
+ fprintf(fp, "\n};\n\n");
+ fprintf(fp, "#endif /* WANT_WCuplow_diff_data */\n\n");
+
+
+ output_table(fp, "comb", &combtable);
+ output_table(fp, "width", &widthtable);
+
+ fclose(fp);
+ }
+
+ return EXIT_SUCCESS;
+}
+
+size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl)
+{
+ static int recurse = 0;
+ unsigned char *ti[RANGE+1]; /* table index */
+ size_t numblocks;
+ size_t blocksize;
+ size_t uniq;
+ size_t i, j;
+ size_t smallest, t;
+ unsigned char *ii_save;
+ int uniqblock[256];
+ unsigned char uit[RANGE+1];
+ int shift2;
+
+ ii_save = NULL;
+ blocksize = 1 << shift;
+ numblocks = usize >> shift;
+
+ /* init table index */
+ for (i=j=0 ; i < numblocks ; i++) {
+ ti[i] = ut + j;
+ j += blocksize;
+ }
+
+ /* sort */
+ nu_val = blocksize;
+ qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);
+
+ uniq = 1;
+ uit[(ti[0]-ut)/blocksize] = 0;
+ for (i=1 ; i < numblocks ; i++) {
+ if (memcmp(ti[i-1], ti[i], blocksize) < 0) {
+ if (++uniq > 255) {
+ break;
+ }
+ uniqblock[uniq - 1] = i;
+ }
+#if 1
+ else if (memcmp(ti[i-1], ti[i], blocksize) > 0) {
+ printf("bad sort %i!\n", i);
+ abort();
+ }
+#endif
+ uit[(ti[i]-ut)/blocksize] = uniq - 1;
+ }
+
+ smallest = SIZE_MAX;
+ shift2 = -1;
+ if (uniq <= 255) {
+ smallest = numblocks + uniq * blocksize;
+ if (!recurse) {
+ ++recurse;
+ for (j=1 ; j < 14 ; j++) {
+ if ((numblocks >> j) < 2) break;
+ if (tbl) {
+ ii_save = tbl->ii;
+ tbl->ii = NULL;
+ }
+ if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
+ t += uniq * blocksize;
+ }
+ if (tbl) {
+ tbl->ii = ii_save;
+ }
+ if (smallest >= t) {
+ shift2 = j;
+ smallest = t;
+ if (!tbl->ii) {
+ printf("ishift %zu tshift %zu size %zu\n",
+ shift2, shift, t);
+ }
+/* } else { */
+/* break; */
+ }
+ }
+ --recurse;
+ }
+ } else {
+ return SIZE_MAX;
+ }
+
+ if (tbl->ii) {
+ if (recurse) {
+ tbl->ii_shift = shift;
+ tbl->ii_len = numblocks;
+ memcpy(tbl->ii, uit, numblocks);
+ tbl->ti = tbl->ii + tbl->ii_len;
+ tbl->ti_len = uniq * blocksize;
+ for (i=0 ; i < uniq ; i++) {
+ memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize);
+ }
+ } else {
+ ++recurse;
+ printf("setting ishift %zu tshift %zu\n",
+ shift2, shift);
+ newopt(uit, numblocks, shift2, tbl);
+ --recurse;
+ tbl->ti_shift = shift;
+ tbl->ut_len = uniq * blocksize;
+ tbl->ut = tbl->ti + tbl->ti_len;
+ for (i=0 ; i < uniq ; i++) {
+ memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize);
+ }
+ }
+ }
+ return smallest;
+}