OSDN Git Service

Michael Troß writes: Yes, the missing initialization of uniqblock seems to cause...
[uclinux-h8/uClibc.git] / extra / locale / gen_wctype.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <locale.h>
5 #include <wctype.h>
6 #include <limits.h>
7 #include <stdint.h>
8 #include <wchar.h>
9 #include <ctype.h>
10
11 #ifndef _CTYPE_H
12 #define _CTYPE_H
13 #endif
14 #ifndef _WCTYPE_H
15 #define _WCTYPE_H
16 #endif
17 #include <bits/uClibc_ctype.h>
18
19 /*       0x9 : space  blank */
20 /*       0xa : space */
21 /*       0xb : space */
22 /*       0xc : space */
23 /*       0xd : space */
24 /*      0x20 : space  blank */
25 /*    0x1680 : space  blank */
26 /*    0x2000 : space  blank */
27 /*    0x2001 : space  blank */
28 /*    0x2002 : space  blank */
29 /*    0x2003 : space  blank */
30 /*    0x2004 : space  blank */
31 /*    0x2005 : space  blank */
32 /*    0x2006 : space  blank */
33 /*    0x2008 : space  blank */
34 /*    0x2009 : space  blank */
35 /*    0x200a : space  blank */
36 /*    0x200b : space  blank */
37 /*    0x2028 : space */
38 /*    0x2029 : space */
39 /*    0x3000 : space  blank */
40
41 /*  typecount[ 0] =    88670  C_alpha_nonupper_nonlower */
42 /*  typecount[ 1] =      742  C_alpha_lower */
43 /*  typecount[ 2] =        4  C_alpha_upper_lower */
44 /*  typecount[ 3] =      731  C_alpha_upper */
45 /*  typecount[ 4] =       10  C_digit */
46 /*  typecount[ 5] =    10270  C_punct */
47 /*  typecount[ 6] =        0  C_graph */
48 /*  typecount[ 7] =        0  C_print_space_nonblank */
49 /*  typecount[ 8] =       14  C_print_space_blank */
50 /*  typecount[ 9] =        0  C_space_nonblank_noncntrl */
51 /*  typecount[10] =        0  C_space_blank_noncntrl */
52 /*  typecount[11] =        6  C_cntrl_space_nonblank */
53 /*  typecount[12] =        1  C_cntrl_space_blank */
54 /*  typecount[13] =       60  C_cntrl_nonspace */
55 /*  typecount[14] =    96100  C_unclassified */
56 /*  typecount[15] =        0  empty_slot */
57
58
59
60 /* Set to #if 0 to restrict wchars to 16 bits. */
61 #if 1
62 #define RANGE 0x2ffffUL
63 #elif 0
64 #define RANGE 0x1ffffUL
65 #else
66 #define RANGE 0xffffUL                  /* Restrict for 16-bit wchar_t... */
67 #endif
68
69 #if 0
70 /* Classification codes. */
71
72 static const char *typename[] = {
73         "C_unclassified",
74         "C_alpha_nonupper_nonlower",
75         "C_alpha_lower",
76         "C_alpha_upper_lower",
77         "C_alpha_upper",
78         "C_digit",
79         "C_punct",
80         "C_graph",
81         "C_print_space_nonblank",
82         "C_print_space_blank",
83         "C_space_nonblank_noncntrl",
84         "C_space_blank_noncntrl",
85         "C_cntrl_space_nonblank",
86         "C_cntrl_space_blank",
87         "C_cntrl_nonspace",
88         "empty_slot"
89 };
90 #endif
91
92 #if 0
93 /* Taking advantage of the C99 mutual-exclusion guarantees for the various
94  * (w)ctype classes, including the descriptions of printing and control
95  * (w)chars, we can place each in one of the following mutually-exlusive
96  * subsets.  Since there are less than 16, we can store the data for
97  * each (w)chars in a nibble. In contrast, glibc uses an unsigned int
98  * per (w)char, with one bit flag for each is* type.  While this allows
99  * a simple '&' operation to determine the type vs. a range test and a
100  * little special handling for the "blank" and "xdigit" types in my
101  * approach, it also uses 8 times the space for the tables on the typical
102  * 32-bit archs we supported.*/
103 enum {
104         __CTYPE_unclassified = 0,
105         __CTYPE_alpha_nonupper_nonlower,
106         __CTYPE_alpha_lower,
107         __CTYPE_alpha_upper_lower,
108         __CTYPE_alpha_upper,
109         __CTYPE_digit,
110         __CTYPE_punct,
111         __CTYPE_graph,
112         __CTYPE_print_space_nonblank,
113         __CTYPE_print_space_blank,
114         __CTYPE_space_nonblank_noncntrl,
115         __CTYPE_space_blank_noncntrl,
116         __CTYPE_cntrl_space_nonblank,
117         __CTYPE_cntrl_space_blank,
118         __CTYPE_cntrl_nonspace,
119 };
120 #endif
121
122 #define __CTYPE_isxdigit(D,X) \
123         (__CTYPE_isdigit(D) || (((unsigned int)(((X)|0x20) - 'a')) <= 5))
124
125 #define mywalnum(x)             __CTYPE_isalnum(d)
126 #define mywalpha(x)             __CTYPE_isalpha(d)
127 #define mywblank(x)     __CTYPE_isblank(d)
128 #define mywcntrl(x)             __CTYPE_iscntrl(d)
129 #define mywdigit(x)             __CTYPE_isdigit(d)
130 #define mywgraph(x)             __CTYPE_isgraph(d)
131 #define mywlower(x)             __CTYPE_islower(d)
132 #define mywprint(x)             __CTYPE_isprint(d)
133 #define mywpunct(x)             __CTYPE_ispunct(d)
134 #define mywspace(x)             __CTYPE_isspace(d)
135 #define mywupper(x)             __CTYPE_isupper(d)
136 #define mywxdigit(x)    __CTYPE_isxdigit(d,x)
137
138 typedef struct {
139         short l;
140         short u;
141 } uldiff_entry;
142
143 typedef struct {
144         uint16_t ii_len;
145         uint16_t ti_len;
146         uint16_t ut_len;
147
148         unsigned char ii_shift;
149         unsigned char ti_shift;
150
151         unsigned char *ii;
152         unsigned char *ti;
153         unsigned char *ut;
154 } table_data;
155
156
157 void output_table(FILE *fp, const char *name, table_data *tbl)
158 {
159         size_t i;
160
161         fprintf(fp, "#define __LOCALE_DATA_WC%s_II_LEN    %7u\n", name, tbl->ii_len);
162         fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_LEN    %7u\n", name, tbl->ti_len);
163         fprintf(fp, "#define __LOCALE_DATA_WC%s_UT_LEN    %7u\n", name, tbl->ut_len);
164
165         fprintf(fp, "#define __LOCALE_DATA_WC%s_II_SHIFT  %7u\n", name, tbl->ii_shift);
166         fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_SHIFT  %7u\n", name, tbl->ti_shift);
167
168         fprintf(fp, "\n#ifdef WANT_WC%s_data\n", name);
169
170         i = tbl->ii_len + tbl->ti_len + tbl->ut_len;
171         fprintf(fp, "\nstatic const unsigned char __LOCALE_DATA_WC%s_data[%zu] = {", name, i);
172         for (i=0 ; i < tbl->ii_len ; i++) {
173                 if (i % 12 == 0) {
174                         fprintf(fp, "\n");
175                 }
176                 fprintf(fp, " %#04x,", tbl->ii[i]);
177         }
178         for (i=0 ; i < tbl->ti_len ; i++) {
179                 if (i % 12 == 0) {
180                         fprintf(fp, "\n");
181                 }
182                 fprintf(fp, " %#04x,", tbl->ti[i]);
183         }
184         for (i=0 ; i < tbl->ut_len ; i++) {
185                 if (i % 12 == 0) {
186                         fprintf(fp, "\n");
187                 }
188                 fprintf(fp, " %#04x,", tbl->ut[i]);
189         }
190         fprintf(fp, "\n};\n\n");
191
192         fprintf(fp, "#endif /* WANT_WC%s_data */\n\n", name);
193 }
194
195 static void dump_table_data(table_data *tbl)
196 {
197         printf("ii_shift = %d  ti_shift = %d\n"
198                    "ii_len = %d  ti_len = %d  ut_len = %d\n"
199                    "total = %d\n",
200                    tbl->ii_shift, tbl->ti_shift,
201                    tbl->ii_len, tbl->ti_len, tbl->ut_len,
202                    (int) tbl->ii_len + (int) tbl->ti_len + (int) tbl->ut_len);
203 }
204
205 /* For sorting the blocks of unsigned chars. */
206 static size_t nu_val;
207
208 int nu_memcmp(const void *a, const void *b)
209 {
210         return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val);
211 }
212
213 static size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl);
214
215 #define MAXTO           255                     /* Restrict to minimal unsigned char max. */
216
217 int main(int argc, char **argv)
218 {
219         long int u, l, tt;
220         size_t smallest, t;
221         unsigned int c;
222         unsigned int d;
223         int i, n;
224         int ul_count = 0;
225         uldiff_entry uldiff[MAXTO];
226         table_data cttable;
227         table_data ultable;
228         table_data combtable;
229         table_data widthtable;
230         long int last_comb = 0;
231
232         unsigned char wct[(RANGE/2)+1]; /* wctype table (nibble per wchar) */
233         unsigned char ult[RANGE+1];     /* upper/lower table */
234         unsigned char combt[(RANGE/4)+1];       /* combining */
235         unsigned char widtht[(RANGE/4)+1];      /* width */
236         wctrans_t totitle;
237         wctype_t is_comb, is_comb3;
238
239         long int typecount[16];
240         const char *typename[16];
241         static const char empty_slot[] = "empty_slot";
242         int built = 0;
243
244 #define INIT_TYPENAME(X) typename[__CTYPE_##X] = "C_" #X
245
246         for (i=0 ; i < 16 ; i++) {
247                 typename[i] = empty_slot;
248         }
249
250         INIT_TYPENAME(unclassified);
251         INIT_TYPENAME(alpha_nonupper_nonlower);
252         INIT_TYPENAME(alpha_lower);
253         INIT_TYPENAME(alpha_upper_lower);
254         INIT_TYPENAME(alpha_upper);
255         INIT_TYPENAME(digit);
256         INIT_TYPENAME(punct);
257         INIT_TYPENAME(graph);
258         INIT_TYPENAME(print_space_nonblank);
259         INIT_TYPENAME(print_space_blank);
260         INIT_TYPENAME(space_nonblank_noncntrl);
261         INIT_TYPENAME(space_blank_noncntrl);
262         INIT_TYPENAME(cntrl_space_nonblank);
263         INIT_TYPENAME(cntrl_space_blank);
264         INIT_TYPENAME(cntrl_nonspace);
265
266         setvbuf(stdout, NULL, _IONBF, 0);
267
268         while (--argc) {
269                 if (!setlocale(LC_CTYPE, *++argv)) {
270                         printf("setlocale(LC_CTYPE,%s) failed!  Skipping this locale...\n", *argv);
271                         continue;
272                 }
273
274                 if (!(totitle = wctrans("totitle"))) {
275                         printf("no totitle transformation.\n");
276                 }
277                 if (!(is_comb = wctype("combining"))) {
278                         printf("no combining wctype.\n");
279                 }
280                 if (!(is_comb3 = wctype("combining_level3"))) {
281                         printf("no combining_level3 wctype.\n");
282                 }
283
284                 if (!built) {
285                 built = 1;
286                 ul_count = 1;
287                 uldiff[0].u = uldiff[0].l = 0;
288
289                 memset(wct, 0, sizeof(wct));
290                 memset(combt, 0, sizeof(combt));
291                 memset(widtht, 0, sizeof(widtht));
292
293                 for (i = 0 ; i < 16 ; i++) {
294                         typecount[i] = 0;
295                 }
296
297                 for (c=0 ; c <= RANGE ; c++) {
298                         if (iswdigit(c)) {
299                                 d = __CTYPE_digit;
300                         } else if (iswalpha(c)) {
301                                 d = __CTYPE_alpha_nonupper_nonlower;
302                                 if (iswlower(c)) {
303                                         d = __CTYPE_alpha_lower;
304                                         if (iswupper(c)) {
305                                                 d = __CTYPE_alpha_upper_lower;
306                                         }
307                                 } else if (iswupper(c)) {
308                                         d = __CTYPE_alpha_upper;
309                                 }
310                         } else if (iswpunct(c)) {
311                                 d = __CTYPE_punct;
312                         } else if (iswgraph(c)) {
313                                 d = __CTYPE_graph;
314                         } else if (iswprint(c)) {
315                                 d = __CTYPE_print_space_nonblank;
316                                 if (iswblank(c)) {
317                                         d = __CTYPE_print_space_blank;
318                                 }
319                         } else if (iswspace(c) && !iswcntrl(c)) {
320                                 d = __CTYPE_space_nonblank_noncntrl;
321                                 if (iswblank(c)) {
322                                         d = __CTYPE_space_blank_noncntrl;
323                                 }
324                         } else if (iswcntrl(c)) {
325                                 d = __CTYPE_cntrl_nonspace;
326                                 if (iswspace(c)) {
327                                         d = __CTYPE_cntrl_space_nonblank;
328                                         if (iswblank(c)) {
329                                                 d = __CTYPE_cntrl_space_blank;
330                                         }
331                                 }
332                         } else {
333                                 d = __CTYPE_unclassified;
334                         }
335
336                         ++typecount[d];
337
338 #if 0
339                         if (iswspace(c)) {
340                                 if (iswblank(c)) {
341                                         printf("%#8x : space  blank\n", c);
342                                 } else {
343                                         printf("%#8x : space\n", c);
344                                 }
345                         }
346 #endif
347
348 #if 0
349                         if (c < 256) {
350                                 unsigned int glibc;
351
352                                 glibc = 0;
353                                 if (isalnum(c)) ++glibc; glibc <<= 1;
354                                 if (isalpha(c)) ++glibc; glibc <<= 1;
355                                 if (isblank(c)) ++glibc; glibc <<= 1;
356                                 if (iscntrl(c)) ++glibc; glibc <<= 1;
357                                 if (isdigit(c)) ++glibc; glibc <<= 1;
358                                 if (isgraph(c)) ++glibc; glibc <<= 1;
359                                 if (islower(c)) ++glibc; glibc <<= 1;
360                                 if (isprint(c)) ++glibc; glibc <<= 1;
361                                 if (ispunct(c)) ++glibc; glibc <<= 1;
362                                 if (isspace(c)) ++glibc; glibc <<= 1;
363                                 if (isupper(c)) ++glibc; glibc <<= 1;
364                                 if (isxdigit(c)) ++glibc;
365                                 printf("%#8x : ctype %#4x\n", c, glibc);
366                         }
367 #endif
368
369 #if 1
370                         /* Paranoid checking... */
371                         {
372                                 unsigned int glibc;
373                                 unsigned int mine;
374
375                                 glibc = 0;
376                                 if (iswalnum(c)) ++glibc; glibc <<= 1;
377                                 if (iswalpha(c)) ++glibc; glibc <<= 1;
378                                 if (iswblank(c)) ++glibc; glibc <<= 1;
379                                 if (iswcntrl(c)) ++glibc; glibc <<= 1;
380                                 if (iswdigit(c)) ++glibc; glibc <<= 1;
381                                 if (iswgraph(c)) ++glibc; glibc <<= 1;
382                                 if (iswlower(c)) ++glibc; glibc <<= 1;
383                                 if (iswprint(c)) ++glibc; glibc <<= 1;
384                                 if (iswpunct(c)) ++glibc; glibc <<= 1;
385                                 if (iswspace(c)) ++glibc; glibc <<= 1;
386                                 if (iswupper(c)) ++glibc; glibc <<= 1;
387                                 if (iswxdigit(c)) ++glibc;
388
389                                 mine = 0;
390                                 if (mywalnum(c)) ++mine; mine <<= 1;
391                                 if (mywalpha(c)) ++mine; mine <<= 1;
392                                 if (mywblank(c)) ++mine; mine <<= 1;
393                                 if (mywcntrl(c)) ++mine; mine <<= 1;
394                                 if (mywdigit(c)) ++mine; mine <<= 1;
395                                 if (mywgraph(c)) ++mine; mine <<= 1;
396                                 if (mywlower(c)) ++mine; mine <<= 1;
397                                 if (mywprint(c)) ++mine; mine <<= 1;
398                                 if (mywpunct(c)) ++mine; mine <<= 1;
399                                 if (mywspace(c)) ++mine; mine <<= 1;
400                                 if (mywupper(c)) ++mine; mine <<= 1;
401                                 if (mywxdigit(c)) ++mine;
402
403                                 if (glibc != mine) {
404                                         printf("%#8x : glibc %#4x != %#4x mine  %u\n", c, glibc, mine, d);
405                                         return EXIT_FAILURE;
406                                 }
407
408 #if 0
409                                 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
410 /*                                      if (!iswpunct(c)) { */
411                                                 printf("%#8x : %d %d %#4x\n",
412                                                            c, iswctype(c,is_comb),iswctype(c,is_comb3), glibc);
413 /*                                      } */
414                                 }
415 #endif
416 #if 0
417                                 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
418                                         if (!last_comb) {
419                                                 printf("%#8x - ", c);
420                                                 last_comb = c;
421                                         } else if (last_comb + 1 < c) {
422                                                 printf("%#8x\n%#8x - ", last_comb, c);
423                                                 last_comb = c;
424                                         } else {
425                                                 last_comb = c;
426                                         }
427                                 }
428 #endif
429                         }
430 #endif
431
432                         combt[c/4] |= ((((!!iswctype(c,is_comb)) << 1) | !!iswctype(c,is_comb3))
433                                                    << ((c & 3) << 1));
434 /*                      comb3t[c/8] |= ((!!iswctype(c,is_comb3)) << (c & 7)); */
435
436 /*                      widtht[c/4] |= (wcwidth(c) << ((c & 3) << 1)); */
437
438                         if (c & 1) {    /* Use the high nibble for odd numbered wchars. */
439                                 d <<= 4;
440                         }
441                         wct[c/2] |= d;
442
443                         l = (long)(int) towlower(c) - c;
444                         u = (long)(int) towupper(c) - c;
445                         ult[c] = 0;
446                         if (l || u) {
447                                 if ((l != (short)l) || (u != (short)u)) {
448                                         printf("range assumption error!  %x  %ld  %ld\n", c, l, u);
449                                         return EXIT_FAILURE;
450                                 }
451                                 for (i=0 ; i < ul_count ; i++) {
452                                         if ((l == uldiff[i].l) && (u == uldiff[i].u)) {
453                                                 goto found;
454                                         }
455                                 }
456                                 uldiff[ul_count].l = l;
457                                 uldiff[ul_count].u = u;
458                                 ++ul_count;
459                                 if (ul_count > MAXTO) {
460                                         printf("too many touppers/tolowers!\n");
461                                         return EXIT_FAILURE;
462                                 }
463                         found:
464                                 ult[c] = i;
465                         }
466                 }
467
468                 for (i = 0 ; i < 16 ; i++) {
469                         printf("typecount[%2d] = %8ld  %s\n", i, typecount[i], typename[i]);
470                 }
471
472                 printf("optimizing is* table..\n");
473                 n = -1;
474                 smallest = SIZE_MAX;
475                 cttable.ii = NULL;
476                 for (i=0 ; i < 14 ; i++) {
477                         t = newopt(wct, (RANGE/2)+1, i, &cttable);
478                         if (smallest >= t) {
479                                 n = i;
480                                 smallest = t;
481 /*                      } else { */
482 /*                              break; */
483                         }
484                 }
485                 printf("smallest = %zu\n", smallest);
486                 if (!(cttable.ii = malloc(smallest))) {
487                         printf("couldn't allocate space!\n");
488                         return EXIT_FAILURE;
489                 }
490                 smallest = SIZE_MAX;
491                 newopt(wct, (RANGE/2)+1, n, &cttable);
492                 ++cttable.ti_shift;             /* correct for nibble mode */
493
494
495
496                 printf("optimizing u/l-to table..\n");
497                 smallest = SIZE_MAX;
498                 ultable.ii = NULL;
499                 for (i=0 ; i < 14 ; i++) {
500                         t = newopt(ult, RANGE+1, i, &ultable);
501                         if (smallest >= t) {
502                                 n = i;
503                                 smallest = t;
504 /*                      } else { */
505 /*                              break; */
506                         }
507                 }
508                 printf("%zu (smallest) + %zu (u/l diffs) = %zu\n",
509                            smallest, 4 * ul_count, smallest + 4 * ul_count);
510                 printf("smallest = %zu\n", smallest);
511                 if (!(ultable.ii = malloc(smallest))) {
512                         printf("couldn't allocate space!\n");
513                         return EXIT_FAILURE;
514                 }
515                 smallest = SIZE_MAX;
516                 newopt(ult, RANGE+1, n, &ultable);
517
518
519 #if 0
520                 printf("optimizing comb table..\n");
521                 smallest = SIZE_MAX;
522                 combtable.ii = NULL;
523                 for (i=0 ; i < 14 ; i++) {
524                         t = newopt(combt, sizeof(combt), i, &combtable);
525                         if (smallest >= t) {
526                                 n = i;
527                                 smallest = t;
528 /*                      } else { */
529 /*                              break; */
530                         }
531                 }
532                 printf("smallest = %zu\n", smallest);
533                 if (!(combtable.ii = malloc(smallest))) {
534                         printf("couldn't allocate space!\n");
535                         return EXIT_FAILURE;
536                 }
537                 smallest = SIZE_MAX;
538                 newopt(combt, sizeof(combt), n, &combtable);
539                 combtable.ti_shift += 4; /* correct for 4 entries per */
540 #endif
541
542
543 #if 0
544                 printf("optimizing width table..\n");
545                 smallest = SIZE_MAX;
546                 widthtable.ii = NULL;
547                 for (i=0 ; i < 14 ; i++) {
548                         t = newopt(widtht, sizeof(widtht), i, &widthtable);
549                         if (smallest >= t) {
550                                 n = i;
551                                 smallest = t;
552 /*                      } else { */
553 /*                              break; */
554                         }
555                 }
556                 printf("smallest = %zu\n", smallest);
557                 if (!(widthtable.ii = malloc(smallest))) {
558                         printf("couldn't allocate space!\n");
559                         return EXIT_FAILURE;
560                 }
561                 smallest = SIZE_MAX;
562                 newopt(widtht, sizeof(widtht), n, &widthtable);
563                 widthtable.ti_shift += 4; /* correct for 4 entries per */
564 #endif
565
566 #if 0
567                 printf("optimizing comb3 table..\n");
568                 smallest = SIZE_MAX;
569                 comb3table.ii = NULL;
570                 for (i=0 ; i < 14 ; i++) {
571                         t = newopt(comb3t, sizeof(comb3t), i, &comb3table);
572                         if (smallest >= t) {
573                                 n = i;
574                                 smallest = t;
575 /*                      } else { */
576 /*                              break; */
577                         }
578                 }
579                 printf("smallest = %zu\n", smallest);
580                 if (!(comb3table.ii = malloc(smallest))) {
581                         printf("couldn't allocate space!\n");
582                         return EXIT_FAILURE;
583                 }
584                 smallest = SIZE_MAX;
585                 newopt(comb3t, sizeof(comb3t), n, &comb3table);
586                 comb3table.ti_shift += 8; /* correct for 4 entries per */
587 #endif
588
589                 dump_table_data(&cttable);
590                 dump_table_data(&ultable);
591                 dump_table_data(&combtable);
592                 }
593
594                 printf("verifying for %s...\n", *argv);
595 #if RANGE == 0xffffU
596                 for (c=0 ; c <= 0xffffUL ; c++)
597 #else
598                 for (c=0 ; c <= 0x10ffffUL ; c++)
599 #endif
600                         {
601                         unsigned int glibc;
602                         unsigned int mine;
603                         unsigned int upper, lower;
604
605 #if 0
606 #if RANGE < 0x10000UL
607                         if (c == 0x10000UL) {
608                                 c = 0x30000UL;  /* skip 1st and 2nd sup planes */
609                         }
610 #elif RANGE < 0x20000UL
611                         if (c == 0x20000UL) {
612                                 c = 0x30000UL;  /* skip 2nd sup planes */
613                         }
614 #endif
615 #endif
616
617                         glibc = 0;
618                         if (iswalnum(c)) ++glibc; glibc <<= 1;
619                         if (iswalpha(c)) ++glibc; glibc <<= 1;
620                         if (iswblank(c)) ++glibc; glibc <<= 1;
621                         if (iswcntrl(c)) ++glibc; glibc <<= 1;
622                         if (iswdigit(c)) ++glibc; glibc <<= 1;
623                         if (iswgraph(c)) ++glibc; glibc <<= 1;
624                         if (iswlower(c)) ++glibc; glibc <<= 1;
625                         if (iswprint(c)) ++glibc; glibc <<= 1;
626                         if (iswpunct(c)) ++glibc; glibc <<= 1;
627                         if (iswspace(c)) ++glibc; glibc <<= 1;
628                         if (iswupper(c)) ++glibc; glibc <<= 1;
629                         if (iswxdigit(c)) ++glibc;
630
631                         {
632                                 unsigned int u;
633                                 int n, sc;
634                                 int i0, i1;
635
636                                 u = c;
637                                 if (u <= RANGE) {
638                                         sc = u & ((1 << cttable.ti_shift) - 1);
639                                         u >>= cttable.ti_shift;
640                                         n = u & ((1 << cttable.ii_shift) - 1);
641                                         u >>= cttable.ii_shift;
642
643                                         i0 = cttable.ii[u];
644                                         i0 <<= cttable.ii_shift;
645                                         i1 = cttable.ti[i0 + n];
646                                         i1 <<= (cttable.ti_shift-1);
647                                         d = cttable.ut[i1 + (sc >> 1)];
648
649                                         if (sc & 1) {
650                                                 d >>= 4;
651                                         }
652                                         d &= 0x0f;
653                                 } else if ((((unsigned int)(c - 0xe0020UL)) <= 0x5f) || (c == 0xe0001UL)){
654                                         d = __CTYPE_punct;
655                                 } else if (((unsigned int)(c - 0xf0000UL)) < 0x20000UL) {
656                                         if ((c & 0xffffU) <= 0xfffdU) {
657                                                 d = __CTYPE_punct;
658                                         } else {
659                                                 d = __CTYPE_unclassified;
660                                         }
661                                 } else {
662                                         d = __CTYPE_unclassified;
663                                 }
664
665                         mine = 0;
666                         if (mywalnum(c)) ++mine; mine <<= 1;
667                         if (mywalpha(c)) ++mine; mine <<= 1;
668                         if (mywblank(c)) ++mine; mine <<= 1;
669                         if (mywcntrl(c)) ++mine; mine <<= 1;
670                         if (mywdigit(c)) ++mine; mine <<= 1;
671                         if (mywgraph(c)) ++mine; mine <<= 1;
672                         if (mywlower(c)) ++mine; mine <<= 1;
673                         if (mywprint(c)) ++mine; mine <<= 1;
674                         if (mywpunct(c)) ++mine; mine <<= 1;
675                         if (mywspace(c)) ++mine; mine <<= 1;
676                         if (mywupper(c)) ++mine; mine <<= 1;
677                         if (mywxdigit(c)) ++mine;
678
679                         if (glibc != mine) {
680                                 printf("%#8x : glibc %#4x != %#4x mine %d\n", c, glibc, mine, d);
681                                 if (c < 0x30000UL) {
682                                         printf("sc=%#x u=%#x n=%#x i0=%#x i1=%#x\n", sc, u, n, i0, i1);
683                                 }
684                         }
685                                 upper = lower = u = c;
686                                 if (u <= RANGE) {
687                                         sc = u & ((1 << ultable.ti_shift) - 1);
688                                         u >>= ultable.ti_shift;
689                                         n = u & ((1 << ultable.ii_shift) - 1);
690                                         u >>= ultable.ii_shift;
691
692                                         i0 = ultable.ii[u];
693                                         i0 <<= ultable.ii_shift;
694                                         i1 = ultable.ti[i0 + n];
695                                         i1 <<= (ultable.ti_shift);
696                                         i1 += sc;
697                                         i0 = ultable.ut[i1];
698                                         upper = c + uldiff[i0].u;
699                                         lower = c + uldiff[i0].l;
700                                 }
701
702                         if (towupper(c) != upper) {
703                                 printf("%#8x : towupper glibc %#4x != %#4x mine\n",
704                                            c, towupper(c), upper);
705                         }
706                                 
707                         if (towlower(c) != lower) {
708                                 printf("%#8x : towlower glibc %#4x != %#4x mine   i0 = %d\n",
709                                            c, towlower(c), lower, i0);
710                         }
711
712                         if (totitle && ((tt = towctrans(c, totitle)) != upper)) {
713                                 printf("%#8x : totitle glibc %#4lx != %#4x mine   i0 = %d\n",
714                                            c, tt, upper, i0);
715                         }
716                         }
717
718
719                         if ((c & 0xfff) == 0xfff) printf(".");
720                 }
721                 printf("done\n");
722         }
723
724         if (1) {
725                 FILE *fp;
726
727                 if (!(fp = fopen("wctables.h", "w"))) {
728                         printf("couldn't open wctables.h!\n");
729                         return EXIT_FAILURE;
730                 }
731
732                 fprintf(fp, "#define __LOCALE_DATA_WC_TABLE_DOMAIN_MAX  %#8lx\n\n",
733                                 (unsigned long) RANGE);
734                 output_table(fp, "ctype", &cttable);
735                 output_table(fp, "uplow", &ultable);
736         
737
738 #warning fix the upper bound on the upper/lower tables... save 200 bytes or so
739                 fprintf(fp, "#define __LOCALE_DATA_WCuplow_diffs  %7u\n", ul_count);
740                 fprintf(fp, "\n#ifdef WANT_WCuplow_diff_data\n\n");
741                 fprintf(fp, "\nstatic const short __LOCALE_DATA_WCuplow_diff_data[%zu] = {",
742                            2 * (size_t) ul_count);
743                 for (i=0 ; i < ul_count ; i++) {
744                         if (i % 4 == 0) {
745                                 fprintf(fp, "\n");
746                         }
747                         fprintf(fp, " %6d, %6d,", uldiff[i].u, uldiff[i].l);
748                 }
749                 fprintf(fp, "\n};\n\n");
750                 fprintf(fp, "#endif /* WANT_WCuplow_diff_data */\n\n");
751
752
753 /*              output_table(fp, "comb", &combtable); */
754 /*              output_table(fp, "width", &widthtable); */
755
756                 fclose(fp);
757         }
758
759         return EXIT_SUCCESS;
760 }
761
762 size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl)
763 {
764         static int recurse = 0;
765         unsigned char *ti[RANGE+1];     /* table index */
766         size_t numblocks;
767         size_t blocksize;
768         size_t uniq;
769         size_t i, j;
770         size_t smallest, t;
771         unsigned char *ii_save;
772         int uniqblock[256];
773         unsigned char uit[RANGE+1];
774         int shift2;
775
776         memset(uniqblock, 0x00, sizeof(uniqblock));
777
778         ii_save = NULL;
779         blocksize = 1 << shift;
780         numblocks = usize >> shift;
781
782         /* init table index */
783         for (i=j=0 ; i < numblocks ; i++) {
784                 ti[i] = ut + j;
785                 j += blocksize;
786         }
787
788         /* sort */
789         nu_val = blocksize;
790         qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);
791         
792         uniq = 1;
793         uit[(ti[0]-ut)/blocksize] = 0;
794         for (i=1 ; i < numblocks ; i++) {
795                 if (memcmp(ti[i-1], ti[i], blocksize) < 0) {
796                         if (++uniq > 255) {
797                                 break;
798                         }
799                         uniqblock[uniq - 1] = i;
800                 }
801 #if 1
802                 else if (memcmp(ti[i-1], ti[i], blocksize) > 0) {
803                         printf("bad sort %i!\n", i);
804                         abort();
805                 }
806 #endif
807                 uit[(ti[i]-ut)/blocksize] = uniq - 1;
808         }
809
810         smallest = SIZE_MAX;
811         shift2 = -1;
812         if (uniq <= 255) {
813                 smallest = numblocks + uniq * blocksize;
814                 if (!recurse) {
815                         ++recurse;
816                         for (j=1 ; j < 14 ; j++) {
817                                 if ((numblocks >> j) < 2) break;
818                                 if (tbl) {
819                                         ii_save = tbl->ii;
820                                         tbl->ii = NULL;
821                                 }
822                                 if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
823                                         t += uniq * blocksize;
824                                 }
825                                 if (tbl) {
826                                         tbl->ii = ii_save;
827                                 }
828                                 if (smallest >= t) {
829                                         shift2 = j;
830                                         smallest = t;
831                                         if (!tbl->ii) {
832                                                 printf("ishift %zu  tshift %zu  size %zu\n",
833                                                            shift2, shift, t);
834                                         }
835 /*                              } else { */
836 /*                                      break; */
837                                 }
838                         }
839                         --recurse;
840                 }
841         } else {
842                 return SIZE_MAX;
843         }
844
845         if (tbl->ii) {
846                 if (recurse) {
847                         tbl->ii_shift = shift;
848                         tbl->ii_len = numblocks;
849                         memcpy(tbl->ii, uit, numblocks);
850                         tbl->ti = tbl->ii + tbl->ii_len;
851                         tbl->ti_len = uniq * blocksize;
852                         for (i=0 ; i < uniq ; i++) {
853                                 memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize);
854                         }
855                 } else {
856                         ++recurse;
857                         printf("setting ishift %zu  tshift %zu\n",
858                                                            shift2, shift);
859                         newopt(uit, numblocks, shift2, tbl);
860                         --recurse;
861                         tbl->ti_shift = shift;
862                         tbl->ut_len = uniq * blocksize;
863                         tbl->ut = tbl->ti + tbl->ti_len;
864                         for (i=0 ; i < uniq ; i++) {
865                                 memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize);
866                         }
867                 }
868         }
869         return smallest;
870 }