OSDN Git Service

New locale support (in development). Supports LC_CTYPE, LC_NUMERIC,
[uclinux-h8/uClibc.git] / extra / locale / gen_wctype.c
1
2 #define _GNU_SOURCE
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <locale.h>
7 #include <wctype.h>
8 #include <limits.h>
9 #include <stdint.h>
10 #include <wchar.h>
11 #include <ctype.h>
12
13
14 /*       0x9 : space  blank */
15 /*       0xa : space */
16 /*       0xb : space */
17 /*       0xc : space */
18 /*       0xd : space */
19 /*      0x20 : space  blank */
20 /*    0x1680 : space  blank */
21 /*    0x2000 : space  blank */
22 /*    0x2001 : space  blank */
23 /*    0x2002 : space  blank */
24 /*    0x2003 : space  blank */
25 /*    0x2004 : space  blank */
26 /*    0x2005 : space  blank */
27 /*    0x2006 : space  blank */
28 /*    0x2008 : space  blank */
29 /*    0x2009 : space  blank */
30 /*    0x200a : space  blank */
31 /*    0x200b : space  blank */
32 /*    0x2028 : space */
33 /*    0x2029 : space */
34 /*    0x3000 : space  blank */
35
36 /*  typecount[ 0] =    88670  C_alpha_nonupper_nonlower */
37 /*  typecount[ 1] =      742  C_alpha_lower */
38 /*  typecount[ 2] =        4  C_alpha_upper_lower */
39 /*  typecount[ 3] =      731  C_alpha_upper */
40 /*  typecount[ 4] =       10  C_digit */
41 /*  typecount[ 5] =    10270  C_punct */
42 /*  typecount[ 6] =        0  C_graph */
43 /*  typecount[ 7] =        0  C_print_space_nonblank */
44 /*  typecount[ 8] =       14  C_print_space_blank */
45 /*  typecount[ 9] =        0  C_space_nonblank_noncntrl */
46 /*  typecount[10] =        0  C_space_blank_noncntrl */
47 /*  typecount[11] =        6  C_cntrl_space_nonblank */
48 /*  typecount[12] =        1  C_cntrl_space_blank */
49 /*  typecount[13] =       60  C_cntrl_nonspace */
50 /*  typecount[14] =    96100  C_unclassified */
51 /*  typecount[15] =        0  empty_slot */
52
53
54
55 /* Set to #if 0 to restrict wchars to 16 bits. */
56 #if 1
57 #define RANGE 0x2ffffUL
58 #elif 0
59 #define RANGE 0x1ffffUL
60 #else
61 #define RANGE 0xffffUL                  /* Restrict for 16-bit wchar_t... */
62 #endif
63
64 /* Classification codes. */
65
66 static const char *typename[] = {
67         "C_unclassified",
68         "C_alpha_nonupper_nonlower",
69         "C_alpha_lower",
70         "C_alpha_upper_lower",
71         "C_alpha_upper",
72         "C_digit",
73         "C_punct",
74         "C_graph",
75         "C_print_space_nonblank",
76         "C_print_space_blank",
77         "C_space_nonblank_noncntrl",
78         "C_space_blank_noncntrl",
79         "C_cntrl_space_nonblank",
80         "C_cntrl_space_blank",
81         "C_cntrl_nonspace",
82         "empty_slot"
83 };
84
85 /* Taking advantage of the C99 mutual-exclusion guarantees for the various
86  * (w)ctype classes, including the descriptions of printing and control
87  * (w)chars, we can place each in one of the following mutually-exlusive
88  * subsets.  Since there are less than 16, we can store the data for
89  * each (w)chars in a nibble. In contrast, glibc uses an unsigned int
90  * per (w)char, with one bit flag for each is* type.  While this allows
91  * a simple '&' operation to determine the type vs. a range test and a
92  * little special handling for the "blank" and "xdigit" types in my
93  * approach, it also uses 8 times the space for the tables on the typical
94  * 32-bit archs we supported.*/
95 enum {
96         __CTYPE_unclassified = 0,
97         __CTYPE_alpha_nonupper_nonlower,
98         __CTYPE_alpha_lower,
99         __CTYPE_alpha_upper_lower,
100         __CTYPE_alpha_upper,
101         __CTYPE_digit,
102         __CTYPE_punct,
103         __CTYPE_graph,
104         __CTYPE_print_space_nonblank,
105         __CTYPE_print_space_blank,
106         __CTYPE_space_nonblank_noncntrl,
107         __CTYPE_space_blank_noncntrl,
108         __CTYPE_cntrl_space_nonblank,
109         __CTYPE_cntrl_space_blank,
110         __CTYPE_cntrl_nonspace,
111 };
112
113 /* Some macros that test for various (w)ctype classes when passed one of the
114  * designator values enumerated above. */
115 #define __CTYPE_isalnum(D)              ((unsigned int)(D-1) <= (__CTYPE_digit-1))
116 #define __CTYPE_isalpha(D)              ((unsigned int)(D-1) <= (__CTYPE_alpha_upper-1))
117 #define __CTYPE_isblank(D) \
118         ((((unsigned int)(D - __CTYPE_print_space_nonblank)) <= 5) && (D & 1))
119 #define __CTYPE_iscntrl(D)              (((unsigned int)(D - __CTYPE_cntrl_space_nonblank)) <= 2)
120 #define __CTYPE_isdigit(D)              (D == __CTYPE_digit)
121 #define __CTYPE_isgraph(D)              ((unsigned int)(D-1) <= (__CTYPE_graph-1))
122 #define __CTYPE_islower(D)              (((unsigned int)(D - __CTYPE_alpha_lower)) <= 1)
123 #define __CTYPE_isprint(D)              ((unsigned int)(D-1) <= (__CTYPE_print_space_blank-1))
124 #define __CTYPE_ispunct(D)              (D == __CTYPE_punct)
125 #define __CTYPE_isspace(D)              (((unsigned int)(D - __CTYPE_print_space_nonblank)) <= 5)
126 #define __CTYPE_isupper(D)              (((unsigned int)(D - __CTYPE_alpha_upper_lower)) <= 1)
127 #define __CTYPE_isxdigit(D,X) \
128         (__CTYPE_isdigit(D) || (((unsigned int)(((X)|0x20) - 'a')) <= 5))
129
130 #define mywalnum(x)             __CTYPE_isalnum(d)
131 #define mywalpha(x)             __CTYPE_isalpha(d)
132 #define mywblank(x)     __CTYPE_isblank(d)
133 #define mywcntrl(x)             __CTYPE_iscntrl(d)
134 #define mywdigit(x)             __CTYPE_isdigit(d)
135 #define mywgraph(x)             __CTYPE_isgraph(d)
136 #define mywlower(x)             __CTYPE_islower(d)
137 #define mywprint(x)             __CTYPE_isprint(d)
138 #define mywpunct(x)             __CTYPE_ispunct(d)
139 #define mywspace(x)             __CTYPE_isspace(d)
140 #define mywupper(x)             __CTYPE_isupper(d)
141 #define mywxdigit(x)    __CTYPE_isxdigit(d,x)
142
143 typedef struct {
144         short l;
145         short u;
146 } uldiff_entry;
147
148 typedef struct {
149         uint16_t ii_len;
150         uint16_t ti_len;
151         uint16_t ut_len;
152
153         unsigned char ii_shift;
154         unsigned char ti_shift;
155
156         unsigned char *ii;
157         unsigned char *ti;
158         unsigned char *ut;
159 } table_data;
160
161
162 void output_table(FILE *fp, const char *name, table_data *tbl)
163 {
164         size_t i;
165
166         fprintf(fp, "#define WC%s_II_LEN    %7u\n", name, tbl->ii_len);
167         fprintf(fp, "#define WC%s_TI_LEN    %7u\n", name, tbl->ti_len);
168         fprintf(fp, "#define WC%s_UT_LEN    %7u\n", name, tbl->ut_len);
169
170         fprintf(fp, "#define WC%s_II_SHIFT  %7u\n", name, tbl->ii_shift);
171         fprintf(fp, "#define WC%s_TI_SHIFT  %7u\n", name, tbl->ti_shift);
172
173         fprintf(fp, "\n#ifdef WANT_WC%s_data\n", name);
174
175         i = tbl->ii_len + tbl->ti_len + tbl->ut_len;
176         fprintf(fp, "\nstatic const unsigned char WC%s_data[%zu] = {", name, i);
177         for (i=0 ; i < tbl->ii_len ; i++) {
178                 if (i % 12 == 0) {
179                         fprintf(fp, "\n");
180                 }
181                 fprintf(fp, " %#04x,", tbl->ii[i]);
182         }
183         for (i=0 ; i < tbl->ti_len ; i++) {
184                 if (i % 12 == 0) {
185                         fprintf(fp, "\n");
186                 }
187                 fprintf(fp, " %#04x,", tbl->ti[i]);
188         }
189         for (i=0 ; i < tbl->ut_len ; i++) {
190                 if (i % 12 == 0) {
191                         fprintf(fp, "\n");
192                 }
193                 fprintf(fp, " %#04x,", tbl->ut[i]);
194         }
195         fprintf(fp, "\n};\n\n");
196
197         fprintf(fp, "#endif /* WANT_WC%s_data */\n\n", name);
198 }
199
200 static void dump_table_data(table_data *tbl)
201 {
202         printf("ii_shift = %d  ti_shift = %d\n"
203                    "ii_len = %d  ti_len = %d  ut_len = %d\n"
204                    "total = %d\n",
205                    tbl->ii_shift, tbl->ti_shift,
206                    tbl->ii_len, tbl->ti_len, tbl->ut_len,
207                    (int) tbl->ii_len + (int) tbl->ti_len + (int) tbl->ut_len);
208 }
209
210 /* For sorting the blocks of unsigned chars. */
211 static size_t nu_val;
212
213 int nu_memcmp(const void *a, const void *b)
214 {
215         return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val);
216 }
217
218 static size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl);
219
220 #define MAXTO           255                     /* Restrict to minimal unsigned char max. */
221
222 int main(int argc, char **argv)
223 {
224         long int u, l, tt;
225         size_t smallest, t;
226         unsigned int c;
227         unsigned int d;
228         int i, n;
229         int ul_count = 0;
230         uldiff_entry uldiff[MAXTO];
231         table_data cttable;
232         table_data ultable;
233         table_data combtable;
234         table_data widthtable;
235
236         unsigned char wct[(RANGE/2)+1]; /* wctype table (nibble per wchar) */
237         unsigned char ult[RANGE+1];     /* upper/lower table */
238         unsigned char combt[(RANGE/4)+1];       /* combining */
239         unsigned char widtht[(RANGE/4)+1];      /* width */
240         wctrans_t totitle;
241         wctype_t is_comb, is_comb3;
242
243         long int typecount[16];
244         int built = 0;
245
246         setvbuf(stdout, NULL, _IONBF, 0);
247
248         while (--argc) {
249                 if (!setlocale(LC_CTYPE, *++argv)) {
250                         printf("setlocale(LC_CTYPE,%s) failed!\n", *argv);
251                         continue;
252                 }
253
254                 if (!(totitle = wctrans("totitle"))) {
255                         printf("no totitle transformation.\n");
256                 }
257                 if (!(is_comb = wctype("combining"))) {
258                         printf("no combining wctype.\n");
259                 }
260                 if (!(is_comb3 = wctype("combining_level3"))) {
261                         printf("no combining_level3 wctype.\n");
262                 }
263
264                 if (!built) {
265                 built = 1;
266                 ul_count = 1;
267                 uldiff[0].u = uldiff[0].l = 0;
268
269                 memset(wct, 0, sizeof(wct));
270                 memset(combt, 0, sizeof(combt));
271                 memset(widtht, 0, sizeof(widtht));
272
273                 for (i = 0 ; i < 16 ; i++) {
274                         typecount[i] = 0;
275                 }
276
277                 for (c=0 ; c <= RANGE ; c++) {
278                         if (iswdigit(c)) {
279                                 d = __CTYPE_digit;
280                         } else if (iswalpha(c)) {
281                                 d = __CTYPE_alpha_nonupper_nonlower;
282                                 if (iswlower(c)) {
283                                         d = __CTYPE_alpha_lower;
284                                         if (iswupper(c)) {
285                                                 d = __CTYPE_alpha_upper_lower;
286                                         }
287                                 } else if (iswupper(c)) {
288                                         d = __CTYPE_alpha_upper;
289                                 }
290                         } else if (iswpunct(c)) {
291                                 d = __CTYPE_punct;
292                         } else if (iswgraph(c)) {
293                                 d = __CTYPE_graph;
294                         } else if (iswprint(c)) {
295                                 d = __CTYPE_print_space_nonblank;
296                                 if (iswblank(c)) {
297                                         d = __CTYPE_print_space_blank;
298                                 }
299                         } else if (iswspace(c) && !iswcntrl(c)) {
300                                 d = __CTYPE_space_nonblank_noncntrl;
301                                 if (iswblank(c)) {
302                                         d = __CTYPE_space_blank_noncntrl;
303                                 }
304                         } else if (iswcntrl(c)) {
305                                 d = __CTYPE_cntrl_nonspace;
306                                 if (iswspace(c)) {
307                                         d = __CTYPE_cntrl_space_nonblank;
308                                         if (iswblank(c)) {
309                                                 d = __CTYPE_cntrl_space_blank;
310                                         }
311                                 }
312                         } else {
313                                 d = __CTYPE_unclassified;
314                         }
315
316                         ++typecount[d];
317
318 #if 0
319                         if (iswspace(c)) {
320                                 if (iswblank(c)) {
321                                         printf("%#8x : space  blank\n", c);
322                                 } else {
323                                         printf("%#8x : space\n", c);
324                                 }
325                         }
326 #endif
327
328 #if 0
329                         if (c < 256) {
330                                 unsigned int glibc;
331
332                                 glibc = 0;
333                                 if (isalnum(c)) ++glibc; glibc <<= 1;
334                                 if (isalpha(c)) ++glibc; glibc <<= 1;
335                                 if (isblank(c)) ++glibc; glibc <<= 1;
336                                 if (iscntrl(c)) ++glibc; glibc <<= 1;
337                                 if (isdigit(c)) ++glibc; glibc <<= 1;
338                                 if (isgraph(c)) ++glibc; glibc <<= 1;
339                                 if (islower(c)) ++glibc; glibc <<= 1;
340                                 if (isprint(c)) ++glibc; glibc <<= 1;
341                                 if (ispunct(c)) ++glibc; glibc <<= 1;
342                                 if (isspace(c)) ++glibc; glibc <<= 1;
343                                 if (isupper(c)) ++glibc; glibc <<= 1;
344                                 if (isxdigit(c)) ++glibc;
345                                 printf("%#8x : ctype %#4x\n", c, glibc);
346                         }
347 #endif
348
349 #if 1
350                         /* Paranoid checking... */
351                         {
352                                 unsigned int glibc;
353                                 unsigned int mine;
354
355                                 glibc = 0;
356                                 if (iswalnum(c)) ++glibc; glibc <<= 1;
357                                 if (iswalpha(c)) ++glibc; glibc <<= 1;
358                                 if (iswblank(c)) ++glibc; glibc <<= 1;
359                                 if (iswcntrl(c)) ++glibc; glibc <<= 1;
360                                 if (iswdigit(c)) ++glibc; glibc <<= 1;
361                                 if (iswgraph(c)) ++glibc; glibc <<= 1;
362                                 if (iswlower(c)) ++glibc; glibc <<= 1;
363                                 if (iswprint(c)) ++glibc; glibc <<= 1;
364                                 if (iswpunct(c)) ++glibc; glibc <<= 1;
365                                 if (iswspace(c)) ++glibc; glibc <<= 1;
366                                 if (iswupper(c)) ++glibc; glibc <<= 1;
367                                 if (iswxdigit(c)) ++glibc;
368
369                                 mine = 0;
370                                 if (mywalnum(c)) ++mine; mine <<= 1;
371                                 if (mywalpha(c)) ++mine; mine <<= 1;
372                                 if (mywblank(c)) ++mine; mine <<= 1;
373                                 if (mywcntrl(c)) ++mine; mine <<= 1;
374                                 if (mywdigit(c)) ++mine; mine <<= 1;
375                                 if (mywgraph(c)) ++mine; mine <<= 1;
376                                 if (mywlower(c)) ++mine; mine <<= 1;
377                                 if (mywprint(c)) ++mine; mine <<= 1;
378                                 if (mywpunct(c)) ++mine; mine <<= 1;
379                                 if (mywspace(c)) ++mine; mine <<= 1;
380                                 if (mywupper(c)) ++mine; mine <<= 1;
381                                 if (mywxdigit(c)) ++mine;
382
383                                 if (glibc != mine) {
384                                         printf("%#8x : glibc %#4x != %#4x mine  %u\n", c, glibc, mine, d);
385                                         return EXIT_FAILURE;
386                                 }
387
388 #if 0
389                                 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
390 /*                                      if (!iswpunct(c)) { */
391                                                 printf("%#8x : %d %d %#4x\n",
392                                                            c, iswctype(c,is_comb),iswctype(c,is_comb3), glibc);
393 /*                                      } */
394                                 }
395 #endif
396
397                         }
398 #endif
399
400                         combt[c/4] |= ((((!!iswctype(c,is_comb)) << 1) | !!iswctype(c,is_comb3))
401                                                    << ((c & 3) << 1));
402 /*                      comb3t[c/8] |= ((!!iswctype(c,is_comb3)) << (c & 7)); */
403                         widtht[c/4] |= (wcwidth(c) << ((c & 3) << 1));
404
405                         if (c & 1) {    /* Use the high nibble for odd numbered wchars. */
406                                 d <<= 4;
407                         }
408                         wct[c/2] |= d;
409
410                         l = towlower(c) - c;
411                         u = towupper(c) - c;
412                         ult[c] = 0;
413                         if (l || u) {
414                                 if ((l != (short)l) || (u != (short)u)) {
415                                         printf("range assumption error!  %x  %ld  %ld\n", c, l, u);
416                                         return EXIT_FAILURE;
417                                 }
418                                 for (i=0 ; i < ul_count ; i++) {
419                                         if ((l == uldiff[i].l) && (u == uldiff[i].u)) {
420                                                 goto found;
421                                         }
422                                 }
423                                 uldiff[ul_count].l = l;
424                                 uldiff[ul_count].u = u;
425                                 ++ul_count;
426                                 if (ul_count > MAXTO) {
427                                         printf("too many touppers/tolowers!\n");
428                                         return EXIT_FAILURE;
429                                 }
430                         found:
431                                 ult[c] = i;
432                         }
433                 }
434
435                 for (i = 0 ; i < 16 ; i++) {
436                         printf("typecount[%2d] = %8ld  %s\n", i, typecount[i], typename[i]);
437                 }
438
439                 printf("optimizing is* table..\n");
440                 n = -1;
441                 smallest = SIZE_MAX;
442                 cttable.ii = NULL;
443                 for (i=0 ; i < 14 ; i++) {
444                         t = newopt(wct, (RANGE/2)+1, i, &cttable);
445                         if (smallest >= t) {
446                                 n = i;
447                                 smallest = t;
448 /*                      } else { */
449 /*                              break; */
450                         }
451                 }
452                 printf("smallest = %zu\n", smallest);
453                 if (!(cttable.ii = malloc(smallest))) {
454                         printf("couldn't allocate space!\n");
455                         return EXIT_FAILURE;
456                 }
457                 smallest = SIZE_MAX;
458                 newopt(wct, (RANGE/2)+1, n, &cttable);
459                 ++cttable.ti_shift;             /* correct for nibble mode */
460
461
462
463                 printf("optimizing u/l-to table..\n");
464                 smallest = SIZE_MAX;
465                 ultable.ii = NULL;
466                 for (i=0 ; i < 14 ; i++) {
467                         t = newopt(ult, RANGE+1, i, &ultable);
468                         if (smallest >= t) {
469                                 n = i;
470                                 smallest = t;
471 /*                      } else { */
472 /*                              break; */
473                         }
474                 }
475                 printf("%zu (smallest) + %zu (u/l diffs) = %zu\n",
476                            smallest, 4 * ul_count, smallest + 4 * ul_count);
477                 printf("smallest = %zu\n", smallest);
478                 if (!(ultable.ii = malloc(smallest))) {
479                         printf("couldn't allocate space!\n");
480                         return EXIT_FAILURE;
481                 }
482                 smallest = SIZE_MAX;
483                 newopt(ult, RANGE+1, n, &ultable);
484
485
486                 printf("optimizing comb table..\n");
487                 smallest = SIZE_MAX;
488                 combtable.ii = NULL;
489                 for (i=0 ; i < 14 ; i++) {
490                         t = newopt(combt, sizeof(combt), i, &combtable);
491                         if (smallest >= t) {
492                                 n = i;
493                                 smallest = t;
494 /*                      } else { */
495 /*                              break; */
496                         }
497                 }
498                 printf("smallest = %zu\n", smallest);
499                 if (!(combtable.ii = malloc(smallest))) {
500                         printf("couldn't allocate space!\n");
501                         return EXIT_FAILURE;
502                 }
503                 smallest = SIZE_MAX;
504                 newopt(combt, sizeof(combt), n, &combtable);
505                 combtable.ti_shift += 4; /* correct for 4 entries per */
506
507
508                 printf("optimizing width table..\n");
509                 smallest = SIZE_MAX;
510                 widthtable.ii = NULL;
511                 for (i=0 ; i < 14 ; i++) {
512                         t = newopt(widtht, sizeof(widtht), i, &widthtable);
513                         if (smallest >= t) {
514                                 n = i;
515                                 smallest = t;
516 /*                      } else { */
517 /*                              break; */
518                         }
519                 }
520                 printf("smallest = %zu\n", smallest);
521                 if (!(widthtable.ii = malloc(smallest))) {
522                         printf("couldn't allocate space!\n");
523                         return EXIT_FAILURE;
524                 }
525                 smallest = SIZE_MAX;
526                 newopt(widtht, sizeof(widtht), n, &widthtable);
527                 widthtable.ti_shift += 4; /* correct for 4 entries per */
528
529
530 #if 0
531                 printf("optimizing comb3 table..\n");
532                 smallest = SIZE_MAX;
533                 comb3table.ii = NULL;
534                 for (i=0 ; i < 14 ; i++) {
535                         t = newopt(comb3t, sizeof(comb3t), i, &comb3table);
536                         if (smallest >= t) {
537                                 n = i;
538                                 smallest = t;
539 /*                      } else { */
540 /*                              break; */
541                         }
542                 }
543                 printf("smallest = %zu\n", smallest);
544                 if (!(comb3table.ii = malloc(smallest))) {
545                         printf("couldn't allocate space!\n");
546                         return EXIT_FAILURE;
547                 }
548                 smallest = SIZE_MAX;
549                 newopt(comb3t, sizeof(comb3t), n, &comb3table);
550                 comb3table.ti_shift += 8; /* correct for 4 entries per */
551 #endif
552
553                 dump_table_data(&cttable);
554                 dump_table_data(&ultable);
555                 dump_table_data(&combtable);
556                 dump_table_data(&widthtable);
557                 }
558
559                 printf("verifying for %s...\n", *argv);
560 #if RANGE == 0xffffU
561                 for (c=0 ; c <= 0xffffUL ; c++)
562 #else
563                 for (c=0 ; c <= 0x10ffffUL ; c++)
564 #endif
565                         {
566                         unsigned int glibc;
567                         unsigned int mine;
568                         unsigned int upper, lower;
569
570 #if 0
571 #if RANGE < 0x10000UL
572                         if (c == 0x10000UL) {
573                                 c = 0x30000UL;  /* skip 1st and 2nd sup planes */
574                         }
575 #elif RANGE < 0x20000UL
576                         if (c == 0x20000UL) {
577                                 c = 0x30000UL;  /* skip 2nd sup planes */
578                         }
579 #endif
580 #endif
581
582                         glibc = 0;
583                         if (iswalnum(c)) ++glibc; glibc <<= 1;
584                         if (iswalpha(c)) ++glibc; glibc <<= 1;
585                         if (iswblank(c)) ++glibc; glibc <<= 1;
586                         if (iswcntrl(c)) ++glibc; glibc <<= 1;
587                         if (iswdigit(c)) ++glibc; glibc <<= 1;
588                         if (iswgraph(c)) ++glibc; glibc <<= 1;
589                         if (iswlower(c)) ++glibc; glibc <<= 1;
590                         if (iswprint(c)) ++glibc; glibc <<= 1;
591                         if (iswpunct(c)) ++glibc; glibc <<= 1;
592                         if (iswspace(c)) ++glibc; glibc <<= 1;
593                         if (iswupper(c)) ++glibc; glibc <<= 1;
594                         if (iswxdigit(c)) ++glibc;
595
596                         {
597                                 unsigned int u;
598                                 int n, sc;
599                                 int i0, i1;
600
601                                 u = c;
602                                 if (u <= RANGE) {
603                                         sc = u & ((1 << cttable.ti_shift) - 1);
604                                         u >>= cttable.ti_shift;
605                                         n = u & ((1 << cttable.ii_shift) - 1);
606                                         u >>= cttable.ii_shift;
607
608                                         i0 = cttable.ii[u];
609                                         i0 <<= cttable.ii_shift;
610                                         i1 = cttable.ti[i0 + n];
611                                         i1 <<= (cttable.ti_shift-1);
612                                         d = cttable.ut[i1 + (sc >> 1)];
613
614                                         if (sc & 1) {
615                                                 d >>= 4;
616                                         }
617                                         d &= 0x0f;
618                                 } else if ((((unsigned int)(c - 0xe0020UL)) <= 0x5f) || (c == 0xe0001UL)){
619                                         d = __CTYPE_punct;
620                                 } else if (((unsigned int)(c - 0xf0000UL)) < 0x20000UL) {
621                                         if ((c & 0xffffU) <= 0xfffdU) {
622                                                 d = __CTYPE_punct;
623                                         } else {
624                                                 d = __CTYPE_unclassified;
625                                         }
626                                 } else {
627                                         d = __CTYPE_unclassified;
628                                 }
629
630                         mine = 0;
631                         if (mywalnum(c)) ++mine; mine <<= 1;
632                         if (mywalpha(c)) ++mine; mine <<= 1;
633                         if (mywblank(c)) ++mine; mine <<= 1;
634                         if (mywcntrl(c)) ++mine; mine <<= 1;
635                         if (mywdigit(c)) ++mine; mine <<= 1;
636                         if (mywgraph(c)) ++mine; mine <<= 1;
637                         if (mywlower(c)) ++mine; mine <<= 1;
638                         if (mywprint(c)) ++mine; mine <<= 1;
639                         if (mywpunct(c)) ++mine; mine <<= 1;
640                         if (mywspace(c)) ++mine; mine <<= 1;
641                         if (mywupper(c)) ++mine; mine <<= 1;
642                         if (mywxdigit(c)) ++mine;
643
644                         if (glibc != mine) {
645                                 printf("%#8x : glibc %#4x != %#4x mine %d\n", c, glibc, mine, d);
646                                 if (c < 0x30000UL) {
647                                         printf("sc=%#x u=%#x n=%#x i0=%#x i1=%#x\n", sc, u, n, i0, i1);
648                                 }
649                         }
650                                 upper = lower = u = c;
651                                 if (u <= RANGE) {
652                                         sc = u & ((1 << ultable.ti_shift) - 1);
653                                         u >>= ultable.ti_shift;
654                                         n = u & ((1 << ultable.ii_shift) - 1);
655                                         u >>= ultable.ii_shift;
656
657                                         i0 = ultable.ii[u];
658                                         i0 <<= ultable.ii_shift;
659                                         i1 = ultable.ti[i0 + n];
660                                         i1 <<= (ultable.ti_shift);
661                                         i1 += sc;
662                                         i0 = ultable.ut[i1];
663                                         upper = c + uldiff[i0].u;
664                                         lower = c + uldiff[i0].l;
665                                 }
666
667                         if (towupper(c) != upper) {
668                                 printf("%#8x : towupper glibc %#4x != %#4x mine\n",
669                                            c, towupper(c), upper);
670                         }
671                                 
672                         if (towlower(c) != lower) {
673                                 printf("%#8x : towlower glibc %#4x != %#4x mine   i0 = %d\n",
674                                            c, towlower(c), lower, i0);
675                         }
676
677                         if (totitle && ((tt = towctrans(c, totitle)) != upper)) {
678                                 printf("%#8x : totitle glibc %#4lx != %#4x mine   i0 = %d\n",
679                                            c, tt, upper, i0);
680                         }
681                         }
682
683
684                         if ((c & 0xfff) == 0xfff) printf(".");
685                 }
686                 printf("done\n");
687         }
688
689         if (1) {
690                 FILE *fp;
691
692                 if (!(fp = fopen("wctables.h", "w"))) {
693                         printf("couldn't open wctables.h!\n");
694                         return EXIT_FAILURE;
695                 }
696
697                 fprintf(fp, "#define WC_TABLE_DOMAIN_MAX  %#8lx\n\n",
698                                 (unsigned long) RANGE);
699                 output_table(fp, "ctype", &cttable);
700                 output_table(fp, "uplow", &ultable);
701         
702
703 #warning fix the upper bound on the upper/lower tables... save 200 bytes or so
704                 fprintf(fp, "#define WCuplow_diffs  %7u\n", ul_count);
705                 fprintf(fp, "\n#ifdef WANT_WCuplow_diff_data\n\n");
706                 fprintf(fp, "\nstatic const short WCuplow_diff_data[%zu] = {",
707                            2 * (size_t) ul_count);
708                 for (i=0 ; i < ul_count ; i++) {
709                         if (i % 4 == 0) {
710                                 fprintf(fp, "\n");
711                         }
712                         fprintf(fp, " %6d, %6d,", uldiff[i].u, uldiff[i].l);
713                 }
714                 fprintf(fp, "\n};\n\n");
715                 fprintf(fp, "#endif /* WANT_WCuplow_diff_data */\n\n");
716
717
718                 output_table(fp, "comb", &combtable);
719                 output_table(fp, "width", &widthtable);
720
721                 fclose(fp);
722         }
723
724         return EXIT_SUCCESS;
725 }
726
727 size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl)
728 {
729         static int recurse = 0;
730         unsigned char *ti[RANGE+1];     /* table index */
731         size_t numblocks;
732         size_t blocksize;
733         size_t uniq;
734         size_t i, j;
735         size_t smallest, t;
736         unsigned char *ii_save;
737         int uniqblock[256];
738         unsigned char uit[RANGE+1];
739         int shift2;
740
741         ii_save = NULL;
742         blocksize = 1 << shift;
743         numblocks = usize >> shift;
744
745         /* init table index */
746         for (i=j=0 ; i < numblocks ; i++) {
747                 ti[i] = ut + j;
748                 j += blocksize;
749         }
750
751         /* sort */
752         nu_val = blocksize;
753         qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);
754         
755         uniq = 1;
756         uit[(ti[0]-ut)/blocksize] = 0;
757         for (i=1 ; i < numblocks ; i++) {
758                 if (memcmp(ti[i-1], ti[i], blocksize) < 0) {
759                         if (++uniq > 255) {
760                                 break;
761                         }
762                         uniqblock[uniq - 1] = i;
763                 }
764 #if 1
765                 else if (memcmp(ti[i-1], ti[i], blocksize) > 0) {
766                         printf("bad sort %i!\n", i);
767                         abort();
768                 }
769 #endif
770                 uit[(ti[i]-ut)/blocksize] = uniq - 1;
771         }
772
773         smallest = SIZE_MAX;
774         shift2 = -1;
775         if (uniq <= 255) {
776                 smallest = numblocks + uniq * blocksize;
777                 if (!recurse) {
778                         ++recurse;
779                         for (j=1 ; j < 14 ; j++) {
780                                 if ((numblocks >> j) < 2) break;
781                                 if (tbl) {
782                                         ii_save = tbl->ii;
783                                         tbl->ii = NULL;
784                                 }
785                                 if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
786                                         t += uniq * blocksize;
787                                 }
788                                 if (tbl) {
789                                         tbl->ii = ii_save;
790                                 }
791                                 if (smallest >= t) {
792                                         shift2 = j;
793                                         smallest = t;
794                                         if (!tbl->ii) {
795                                                 printf("ishift %zu  tshift %zu  size %zu\n",
796                                                            shift2, shift, t);
797                                         }
798 /*                              } else { */
799 /*                                      break; */
800                                 }
801                         }
802                         --recurse;
803                 }
804         } else {
805                 return SIZE_MAX;
806         }
807
808         if (tbl->ii) {
809                 if (recurse) {
810                         tbl->ii_shift = shift;
811                         tbl->ii_len = numblocks;
812                         memcpy(tbl->ii, uit, numblocks);
813                         tbl->ti = tbl->ii + tbl->ii_len;
814                         tbl->ti_len = uniq * blocksize;
815                         for (i=0 ; i < uniq ; i++) {
816                                 memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize);
817                         }
818                 } else {
819                         ++recurse;
820                         printf("setting ishift %zu  tshift %zu\n",
821                                                            shift2, shift);
822                         newopt(uit, numblocks, shift2, tbl);
823                         --recurse;
824                         tbl->ti_shift = shift;
825                         tbl->ut_len = uniq * blocksize;
826                         tbl->ut = tbl->ti + tbl->ti_len;
827                         for (i=0 ; i < uniq ; i++) {
828                                 memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize);
829                         }
830                 }
831         }
832         return smallest;
833 }