OSDN Git Service

extra/locale/gen_wctype.c: remove __CTYPE_isalnum and friends;
[uclinux-h8/uClibc.git] / extra / locale / gen_wctype.c
1 /*
2  * Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org>
3  *
4  * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
5  */
6 #define _GNU_SOURCE
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <locale.h>
11 #include <wctype.h>
12 #include <limits.h>
13 #include <stdint.h>
14 #include <wchar.h>
15 #include <ctype.h>
16
17 #include "include/bits/uClibc_charclass.h"
18
19 /*       0x9 : space  blank */
20 /*       0xa : space */
21 /*       0xb : space */
22 /*       0xc : space */
23 /*       0xd : space */
24 /*      0x20 : space  blank */
25 /*    0x1680 : space  blank */
26 /*    0x2000 : space  blank */
27 /*    0x2001 : space  blank */
28 /*    0x2002 : space  blank */
29 /*    0x2003 : space  blank */
30 /*    0x2004 : space  blank */
31 /*    0x2005 : space  blank */
32 /*    0x2006 : space  blank */
33 /*    0x2008 : space  blank */
34 /*    0x2009 : space  blank */
35 /*    0x200a : space  blank */
36 /*    0x200b : space  blank */
37 /*    0x2028 : space */
38 /*    0x2029 : space */
39 /*    0x3000 : space  blank */
40
41 /*  typecount[ 0] =    88670  C_alpha_nonupper_nonlower */
42 /*  typecount[ 1] =      742  C_alpha_lower */
43 /*  typecount[ 2] =        4  C_alpha_upper_lower */
44 /*  typecount[ 3] =      731  C_alpha_upper */
45 /*  typecount[ 4] =       10  C_digit */
46 /*  typecount[ 5] =    10270  C_punct */
47 /*  typecount[ 6] =        0  C_graph */
48 /*  typecount[ 7] =        0  C_print_space_nonblank */
49 /*  typecount[ 8] =       14  C_print_space_blank */
50 /*  typecount[ 9] =        0  C_space_nonblank_noncntrl */
51 /*  typecount[10] =        0  C_space_blank_noncntrl */
52 /*  typecount[11] =        6  C_cntrl_space_nonblank */
53 /*  typecount[12] =        1  C_cntrl_space_blank */
54 /*  typecount[13] =       60  C_cntrl_nonspace */
55 /*  typecount[14] =    96100  C_unclassified */
56 /*  typecount[15] =        0  empty_slot */
57
58
59 /* Set to #if 0 to restrict wchars to 16 bits. */
60 #if 1
61 #define RANGE 0x2ffffUL
62 #elif 0
63 #define RANGE 0x1ffffUL
64 #else
65 #define RANGE 0xffffUL                  /* Restrict for 16-bit wchar_t... */
66 #endif
67
68 /* Some macros that test for various (w)ctype classes when passed one of the
69  * designator values enumerated above. */
70 #define mywalnum(D,C) ((unsigned)(D - 1) <= (__CTYPE_digit - 1))
71 #define mywalpha(D,C) ((unsigned)(D - 1) <= (__CTYPE_alpha_upper - 1))
72 #define mywblank(D,C) ((unsigned)(D - __CTYPE_print_space_nonblank) <= 5 && (D & 1))
73 #define mywcntrl(D,C) ((unsigned)(D - __CTYPE_cntrl_space_nonblank) <= 2)
74 #define mywdigit(D,C) (D == __CTYPE_digit)
75 #define mywgraph(D,C) ((unsigned)(D - 1) <= (__CTYPE_graph - 1))
76 #define mywlower(D,C) ((unsigned)(D - __CTYPE_alpha_lower) <= 1)
77 #define mywprint(D,C) ((unsigned)(D - 1) <= (__CTYPE_print_space_blank - 1))
78 #define mywpunct(D,C) (D == __CTYPE_punct)
79 #define mywspace(D,C) ((unsigned)(D - __CTYPE_print_space_nonblank) <= 5)
80 #define mywupper(D,C) ((unsigned)(D - __CTYPE_alpha_upper_lower) <= 1)
81 /* #define mywxdigit(D,C) -- isxdigit is untestable this way.
82  * But that's ok as isxdigit() (and isdigit() too) are locale-invariant. */
83 #define mywxdigit(D,C) (mywdigit(D,C) || (unsigned)(((C) | 0x20) - 'a') <= 5)
84
85 typedef struct {
86         short l;
87         short u;
88 } uldiff_entry;
89
90 typedef struct {
91         uint16_t ii_len;
92         uint16_t ti_len;
93         uint16_t ut_len;
94
95         unsigned char ii_shift;
96         unsigned char ti_shift;
97
98         unsigned char *ii;
99         unsigned char *ti;
100         unsigned char *ut;
101 } table_data;
102
103 static unsigned verbose;
104 #define verbose_msg(msg...) if (verbose) fprintf(stderr, msg)
105
106 void output_table(FILE *fp, const char *name, table_data *tbl)
107 {
108         size_t i;
109
110         fprintf(fp, "#define __LOCALE_DATA_WC%s_II_LEN    %7u\n", name, tbl->ii_len);
111         fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_LEN    %7u\n", name, tbl->ti_len);
112         fprintf(fp, "#define __LOCALE_DATA_WC%s_UT_LEN    %7u\n", name, tbl->ut_len);
113
114         fprintf(fp, "#define __LOCALE_DATA_WC%s_II_SHIFT  %7u\n", name, tbl->ii_shift);
115         fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_SHIFT  %7u\n", name, tbl->ti_shift);
116
117         fprintf(fp, "\n#ifdef WANT_WC%s_data\n", name);
118
119         i = tbl->ii_len + tbl->ti_len + tbl->ut_len;
120         fprintf(fp, "\nstatic const unsigned char __LOCALE_DATA_WC%s_data[%zu] = {", name, i);
121         for (i = 0; i < tbl->ii_len; i++) {
122                 if (i % 12 == 0) {
123                         fprintf(fp, "\n");
124                 }
125                 fprintf(fp, " %#04x,", tbl->ii[i]);
126         }
127         for (i = 0; i < tbl->ti_len; i++) {
128                 if (i % 12 == 0) {
129                         fprintf(fp, "\n");
130                 }
131                 fprintf(fp, " %#04x,", tbl->ti[i]);
132         }
133         for (i = 0; i < tbl->ut_len; i++) {
134                 if (i % 12 == 0) {
135                         fprintf(fp, "\n");
136                 }
137                 fprintf(fp, " %#04x,", tbl->ut[i]);
138         }
139         fprintf(fp, "\n};\n\n");
140
141         fprintf(fp, "#endif /* WANT_WC%s_data */\n\n", name);
142 }
143
144 static void dump_table_data(table_data *tbl)
145 {
146         verbose_msg("ii_shift = %d  ti_shift = %d\n"
147                    "ii_len = %d  ti_len = %d  ut_len = %d\n"
148                    "total = %d\n",
149                    tbl->ii_shift, tbl->ti_shift,
150                    tbl->ii_len, tbl->ti_len, tbl->ut_len,
151                    (int) tbl->ii_len + (int) tbl->ti_len + (int) tbl->ut_len);
152 }
153
154 /* For sorting the blocks of unsigned chars. */
155 static size_t nu_val;
156
157 int nu_memcmp(const void *a, const void *b)
158 {
159         return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val);
160 }
161
162 static size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl);
163
164 #define MAXTO           255                     /* Restrict to minimal unsigned char max. */
165
166 int main(int argc, char **argv)
167 {
168         long int u, l, tt;
169         size_t smallest, t;
170         unsigned int c;
171         unsigned int d;
172         int i, n;
173         int ul_count = 0;
174         uldiff_entry uldiff[MAXTO];
175         table_data cttable;
176         table_data ultable;
177 #if 0
178         table_data combtable;
179         table_data widthtable;
180         long int last_comb = 0;
181 #endif
182         unsigned char wct[(RANGE/2)+1]; /* wctype table (nibble per wchar) */
183         unsigned char ult[RANGE+1];     /* upper/lower table */
184         unsigned char combt[(RANGE/4)+1];       /* combining */
185         unsigned char widtht[(RANGE/4)+1];      /* width */
186         wctrans_t totitle;
187         wctype_t is_comb, is_comb3;
188
189         long int typecount[16];
190         const char *typename[16];
191         static const char empty_slot[] = "empty_slot";
192         int built = 0;
193
194 #define INIT_TYPENAME(X) typename[__CTYPE_##X] = "C_" #X
195
196         for (i = 0; i < 16; i++) {
197                 typename[i] = empty_slot;
198         }
199
200         INIT_TYPENAME(unclassified);
201         INIT_TYPENAME(alpha_nonupper_nonlower);
202         INIT_TYPENAME(alpha_lower);
203         INIT_TYPENAME(alpha_upper_lower);
204         INIT_TYPENAME(alpha_upper);
205         INIT_TYPENAME(digit);
206         INIT_TYPENAME(punct);
207         INIT_TYPENAME(graph);
208         INIT_TYPENAME(print_space_nonblank);
209         INIT_TYPENAME(print_space_blank);
210         INIT_TYPENAME(space_nonblank_noncntrl);
211         INIT_TYPENAME(space_blank_noncntrl);
212         INIT_TYPENAME(cntrl_space_nonblank);
213         INIT_TYPENAME(cntrl_space_blank);
214         INIT_TYPENAME(cntrl_nonspace);
215
216         memset(&cttable, 0, sizeof(table_data));
217         memset(&ultable, 0, sizeof(table_data));
218 #if 0
219         memset(combtable, 0, sizeof(table_data));
220         memset(widthtable, 0, sizeof(table_data));
221 #endif
222         setvbuf(stdout, NULL, _IONBF, 0);
223
224         while (--argc) {
225                 ++argv;
226                 if (!strcmp(*argv, "-v")) {
227                         ++verbose;
228                         continue;
229                 }
230                 if (!setlocale(LC_CTYPE, *argv)) {
231                         verbose_msg("setlocale(LC_CTYPE,%s) failed!  Skipping this locale...\n", *argv);
232                         continue;
233                 }
234
235                 if (!(totitle = wctrans("totitle"))) {
236                         verbose_msg("no totitle transformation.\n");
237                 }
238                 if (!(is_comb = wctype("combining"))) {
239                         verbose_msg("no combining wctype.\n");
240                 }
241                 if (!(is_comb3 = wctype("combining_level3"))) {
242                         verbose_msg("no combining_level3 wctype.\n");
243                 }
244
245                 if (!built) {
246                         built = 1;
247                         ul_count = 1;
248                         uldiff[0].u = uldiff[0].l = 0;
249
250                         memset(wct, 0, sizeof(wct));
251                         memset(combt, 0, sizeof(combt));
252                         memset(widtht, 0, sizeof(widtht));
253
254                         for (i = 0; i < 16; i++) {
255                                 typecount[i] = 0;
256                         }
257
258                         for (c = 0; c <= RANGE; c++) {
259                                 if (iswdigit(c)) {
260                                         d = __CTYPE_digit;
261                                 } else if (iswalpha(c)) {
262                                         d = __CTYPE_alpha_nonupper_nonlower;
263                                         if (iswlower(c)) {
264                                                 d = __CTYPE_alpha_lower;
265                                                 if (iswupper(c)) {
266                                                         d = __CTYPE_alpha_upper_lower;
267                                                 }
268                                         } else if (iswupper(c)) {
269                                                 d = __CTYPE_alpha_upper;
270                                         }
271                                 } else if (iswpunct(c)) {
272                                         d = __CTYPE_punct;
273                                 } else if (iswgraph(c)) {
274                                         d = __CTYPE_graph;
275                                 } else if (iswprint(c)) {
276                                         d = __CTYPE_print_space_nonblank;
277                                         if (iswblank(c)) {
278                                                 d = __CTYPE_print_space_blank;
279                                         }
280                                 } else if (iswspace(c) && !iswcntrl(c)) {
281                                         d = __CTYPE_space_nonblank_noncntrl;
282                                         if (iswblank(c)) {
283                                                 d = __CTYPE_space_blank_noncntrl;
284                                         }
285                                 } else if (iswcntrl(c)) {
286                                         d = __CTYPE_cntrl_nonspace;
287                                         if (iswspace(c)) {
288                                                 d = __CTYPE_cntrl_space_nonblank;
289                                                 if (iswblank(c)) {
290                                                         d = __CTYPE_cntrl_space_blank;
291                                                 }
292                                         }
293                                 } else {
294                                         d = __CTYPE_unclassified;
295                                 }
296
297                                 ++typecount[d];
298 #if 0
299                                 if (iswspace(c)) {
300                                         if (iswblank(c)) {
301                                                 verbose_msg("%#8x : space  blank\n", c);
302                                         } else {
303                                                 verbose_msg("%#8x : space\n", c);
304                                         }
305                                 }
306 #endif
307 #if 0
308                                 if (c < 256) {
309                                         unsigned int glibc;
310
311                                         glibc = 0;
312                                         if (isalnum(c)) ++glibc; glibc <<= 1;
313                                         if (isalpha(c)) ++glibc; glibc <<= 1;
314                                         if (isblank(c)) ++glibc; glibc <<= 1;
315                                         if (iscntrl(c)) ++glibc; glibc <<= 1;
316                                         if (isdigit(c)) ++glibc; glibc <<= 1;
317                                         if (isgraph(c)) ++glibc; glibc <<= 1;
318                                         if (islower(c)) ++glibc; glibc <<= 1;
319                                         if (isprint(c)) ++glibc; glibc <<= 1;
320                                         if (ispunct(c)) ++glibc; glibc <<= 1;
321                                         if (isspace(c)) ++glibc; glibc <<= 1;
322                                         if (isupper(c)) ++glibc; glibc <<= 1;
323                                         if (isxdigit(c)) ++glibc;
324                                         verbose_msg("%#8x : ctype %#4x\n", c, glibc);
325                                 }
326 #endif
327 #if 1
328                                 /* Paranoid checking... */
329                                 {
330                                         unsigned int glibc;
331                                         unsigned int mine;
332
333                                         glibc = 0;
334                                         if (iswalnum(c)) ++glibc; glibc <<= 1;
335                                         if (iswalpha(c)) ++glibc; glibc <<= 1;
336                                         if (iswblank(c)) ++glibc; glibc <<= 1;
337                                         if (iswcntrl(c)) ++glibc; glibc <<= 1;
338                                         if (iswdigit(c)) ++glibc; glibc <<= 1;
339                                         if (iswgraph(c)) ++glibc; glibc <<= 1;
340                                         if (iswlower(c)) ++glibc; glibc <<= 1;
341                                         if (iswprint(c)) ++glibc; glibc <<= 1;
342                                         if (iswpunct(c)) ++glibc; glibc <<= 1;
343                                         if (iswspace(c)) ++glibc; glibc <<= 1;
344                                         if (iswupper(c)) ++glibc; glibc <<= 1;
345                                         if (iswxdigit(c)) ++glibc;
346
347                                         mine = 0;
348                                         if (mywalnum(d,c)) ++mine; mine <<= 1;
349                                         if (mywalpha(d,c)) ++mine; mine <<= 1;
350                                         if (mywblank(d,c)) ++mine; mine <<= 1;
351                                         if (mywcntrl(d,c)) ++mine; mine <<= 1;
352                                         if (mywdigit(d,c)) ++mine; mine <<= 1;
353                                         if (mywgraph(d,c)) ++mine; mine <<= 1;
354                                         if (mywlower(d,c)) ++mine; mine <<= 1;
355                                         if (mywprint(d,c)) ++mine; mine <<= 1;
356                                         if (mywpunct(d,c)) ++mine; mine <<= 1;
357                                         if (mywspace(d,c)) ++mine; mine <<= 1;
358                                         if (mywupper(d,c)) ++mine; mine <<= 1;
359                                         if (mywxdigit(d,c)) ++mine;
360
361                                         if (glibc != mine) {
362                                                 verbose_msg("%#8x : glibc %#4x != %#4x mine  %u\n", c, glibc, mine, d);
363                                                 return EXIT_FAILURE;
364                                         }
365 #if 0
366                                         if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
367 /*                                              if (!iswpunct(c)) { */
368                                                         verbose_msg("%#8x : %d %d %#4x\n",
369                                                                    c, iswctype(c,is_comb),iswctype(c,is_comb3), glibc);
370 /*                                              } */
371                                         }
372 #endif
373 #if 0
374                                         if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
375                                                 if (!last_comb) {
376                                                         verbose_msg("%#8x - ", c);
377                                                         last_comb = c;
378                                                 } else if (last_comb + 1 < c) {
379                                                         verbose_msg("%#8x\n%#8x - ", last_comb, c);
380                                                         last_comb = c;
381                                                 } else {
382                                                         last_comb = c;
383                                                 }
384                                         }
385 #endif
386                                 }
387 #endif
388
389                                 combt[c/4] |= ((((!!iswctype(c,is_comb)) << 1) | !!iswctype(c,is_comb3))
390                                                    << ((c & 3) << 1));
391 /*                              comb3t[c/8] |= ((!!iswctype(c,is_comb3)) << (c & 7)); */
392
393 /*                              widtht[c/4] |= (wcwidth(c) << ((c & 3) << 1)); */
394
395                                 if (c & 1) {    /* Use the high nibble for odd numbered wchars. */
396                                         d <<= 4;
397                                 }
398                                 wct[c/2] |= d;
399
400                                 l = (long)(int) towlower(c) - c;
401                                 u = (long)(int) towupper(c) - c;
402                                 ult[c] = 0;
403                                 if (l || u) {
404                                         if ((l != (short)l) || (u != (short)u)) {
405                                                 verbose_msg("range assumption error!  %x  %ld  %ld\n", c, l, u);
406                                                 return EXIT_FAILURE;
407                                         }
408                                         for (i = 0; i < ul_count; i++) {
409                                                 if ((l == uldiff[i].l) && (u == uldiff[i].u)) {
410                                                         goto found;
411                                                 }
412                                         }
413                                         uldiff[ul_count].l = l;
414                                         uldiff[ul_count].u = u;
415                                         ++ul_count;
416                                         if (ul_count > MAXTO) {
417                                                 verbose_msg("too many touppers/tolowers!\n");
418                                                 return EXIT_FAILURE;
419                                         }
420  found:
421                                         ult[c] = i;
422                                 }
423                         }
424
425                         for (i = 0; i < 16; i++) {
426                                 verbose_msg("typecount[%2d] = %8ld  %s\n", i, typecount[i], typename[i]);
427                         }
428
429                         verbose_msg("optimizing is* table..\n");
430                         n = -1;
431                         smallest = SIZE_MAX;
432                         cttable.ii = NULL;
433                         for (i = 0; i < 14; i++) {
434                                 t = newopt(wct, (RANGE/2)+1, i, &cttable);
435                                 if (smallest >= t) {
436                                         n = i;
437                                         smallest = t;
438 /*                              } else { */
439 /*                                      break; */
440                                 }
441                         }
442                         verbose_msg("smallest = %zu\n", smallest);
443                         if (!(cttable.ii = malloc(smallest))) {
444                                 verbose_msg("couldn't allocate space!\n");
445                                 return EXIT_FAILURE;
446                         }
447                         smallest = SIZE_MAX;
448                         newopt(wct, (RANGE/2)+1, n, &cttable);
449                         ++cttable.ti_shift;             /* correct for nibble mode */
450
451                         verbose_msg("optimizing u/l-to table..\n");
452                         smallest = SIZE_MAX;
453                         ultable.ii = NULL;
454                         for (i = 0; i < 14; i++) {
455                                 t = newopt(ult, RANGE+1, i, &ultable);
456                                 if (smallest >= t) {
457                                         n = i;
458                                         smallest = t;
459 /*                              } else { */
460 /*                                      break; */
461                                 }
462                         }
463                         verbose_msg("%lu (smallest) + %lu (u/l diffs) = %lu\n",
464                                 (unsigned long) smallest,
465                                 (unsigned long) (4 * ul_count),
466                                 (unsigned long) (smallest + 4 * ul_count)
467                         );
468                         verbose_msg("smallest = %zu\n", smallest);
469                         if (!(ultable.ii = malloc(smallest))) {
470                                 verbose_msg("couldn't allocate space!\n");
471                                 return EXIT_FAILURE;
472                         }
473                         smallest = SIZE_MAX;
474                         newopt(ult, RANGE+1, n, &ultable);
475 #if 0
476                         verbose_msg("optimizing comb table..\n");
477                         smallest = SIZE_MAX;
478                         combtable.ii = NULL;
479                         for (i = 0; i < 14; i++) {
480                                 t = newopt(combt, sizeof(combt), i, &combtable);
481                                 if (smallest >= t) {
482                                         n = i;
483                                         smallest = t;
484 /*                              } else { */
485 /*                                      break; */
486                                 }
487                         }
488                         verbose_msg("smallest = %zu\n", smallest);
489                         if (!(combtable.ii = malloc(smallest))) {
490                                 verbose_msg("couldn't allocate space!\n");
491                                 return EXIT_FAILURE;
492                         }
493                         smallest = SIZE_MAX;
494                         newopt(combt, sizeof(combt), n, &combtable);
495                         combtable.ti_shift += 4; /* correct for 4 entries per */
496 #endif
497 #if 0
498                         verbose_msg("optimizing width table..\n");
499                         smallest = SIZE_MAX;
500                         widthtable.ii = NULL;
501                         for (i = 0; i < 14; i++) {
502                                 t = newopt(widtht, sizeof(widtht), i, &widthtable);
503                                 if (smallest >= t) {
504                                         n = i;
505                                         smallest = t;
506 /*                              } else { */
507 /*                                      break; */
508                                 }
509                         }
510                         verbose_msg("smallest = %zu\n", smallest);
511                         if (!(widthtable.ii = malloc(smallest))) {
512                                 verbose_msg("couldn't allocate space!\n");
513                                 return EXIT_FAILURE;
514                         }
515                         smallest = SIZE_MAX;
516                         newopt(widtht, sizeof(widtht), n, &widthtable);
517                         widthtable.ti_shift += 4; /* correct for 4 entries per */
518 #endif
519 #if 0
520                         verbose_msg("optimizing comb3 table..\n");
521                         smallest = SIZE_MAX;
522                         comb3table.ii = NULL;
523                         for (i = 0; i < 14; i++) {
524                                 t = newopt(comb3t, sizeof(comb3t), i, &comb3table);
525                                 if (smallest >= t) {
526                                         n = i;
527                                         smallest = t;
528 /*                              } else { */
529 /*                                      break; */
530                                 }
531                         }
532                         verbose_msg("smallest = %zu\n", smallest);
533                         if (!(comb3table.ii = malloc(smallest))) {
534                                 verbose_msg("couldn't allocate space!\n");
535                                 return EXIT_FAILURE;
536                         }
537                         smallest = SIZE_MAX;
538                         newopt(comb3t, sizeof(comb3t), n, &comb3table);
539                         comb3table.ti_shift += 8; /* correct for 4 entries per */
540 #endif
541
542                         dump_table_data(&cttable);
543                         dump_table_data(&ultable);
544 #if 0
545                         dump_table_data(&combtable);
546 #endif
547                 }
548
549                 verbose_msg("verifying for %s...\n", *argv);
550 #if RANGE == 0xffffU
551                 for (c = 0; c <= 0xffffUL; c++)
552 #else
553                 for (c = 0; c <= 0x10ffffUL; c++)
554 #endif
555                 {
556                         unsigned int glibc;
557                         unsigned int mine;
558                         unsigned int upper, lower;
559
560 #if 0
561 #if RANGE < 0x10000UL
562                         if (c == 0x10000UL) {
563                                 c = 0x30000UL;  /* skip 1st and 2nd sup planes */
564                         }
565 #elif RANGE < 0x20000UL
566                         if (c == 0x20000UL) {
567                                 c = 0x30000UL;  /* skip 2nd sup planes */
568                         }
569 #endif
570 #endif
571                         glibc = 0;
572                         if (iswalnum(c)) ++glibc; glibc <<= 1;
573                         if (iswalpha(c)) ++glibc; glibc <<= 1;
574                         if (iswblank(c)) ++glibc; glibc <<= 1;
575                         if (iswcntrl(c)) ++glibc; glibc <<= 1;
576                         if (iswdigit(c)) ++glibc; glibc <<= 1;
577                         if (iswgraph(c)) ++glibc; glibc <<= 1;
578                         if (iswlower(c)) ++glibc; glibc <<= 1;
579                         if (iswprint(c)) ++glibc; glibc <<= 1;
580                         if (iswpunct(c)) ++glibc; glibc <<= 1;
581                         if (iswspace(c)) ++glibc; glibc <<= 1;
582                         if (iswupper(c)) ++glibc; glibc <<= 1;
583                         if (iswxdigit(c)) ++glibc;
584
585                         {
586                                 unsigned int u;
587                                 int n = 0, sc = 0; /* = 0 for verbose_msg only */
588                                 int i0 = 0, i1 = 0;
589
590                                 u = c;
591                                 if (u <= RANGE) {
592                                         sc = u & ((1 << cttable.ti_shift) - 1);
593                                         u >>= cttable.ti_shift;
594                                         n = u & ((1 << cttable.ii_shift) - 1);
595                                         u >>= cttable.ii_shift;
596
597                                         i0 = cttable.ii[u];
598                                         i0 <<= cttable.ii_shift;
599                                         i1 = cttable.ti[i0 + n];
600                                         i1 <<= (cttable.ti_shift - 1);
601                                         d = cttable.ut[i1 + (sc >> 1)];
602
603                                         if (sc & 1) {
604                                                 d >>= 4;
605                                         }
606                                         d &= 0x0f;
607                                 } else if (((unsigned)(c - 0xe0020UL) <= 0x5f) || (c == 0xe0001UL)) {
608                                         d = __CTYPE_punct;
609                                 } else if ((unsigned)(c - 0xf0000UL) < 0x20000UL) {
610                                         if ((c & 0xffffU) <= 0xfffdU) {
611                                                 d = __CTYPE_punct;
612                                         } else {
613                                                 d = __CTYPE_unclassified;
614                                         }
615                                 } else {
616                                         d = __CTYPE_unclassified;
617                                 }
618
619                                 mine = 0;
620                                 if (mywalnum(d,c)) ++mine; mine <<= 1;
621                                 if (mywalpha(d,c)) ++mine; mine <<= 1;
622                                 if (mywblank(d,c)) ++mine; mine <<= 1;
623                                 if (mywcntrl(d,c)) ++mine; mine <<= 1;
624                                 if (mywdigit(d,c)) ++mine; mine <<= 1;
625                                 if (mywgraph(d,c)) ++mine; mine <<= 1;
626                                 if (mywlower(d,c)) ++mine; mine <<= 1;
627                                 if (mywprint(d,c)) ++mine; mine <<= 1;
628                                 if (mywpunct(d,c)) ++mine; mine <<= 1;
629                                 if (mywspace(d,c)) ++mine; mine <<= 1;
630                                 if (mywupper(d,c)) ++mine; mine <<= 1;
631                                 if (mywxdigit(d,c)) ++mine;
632
633                                 if (glibc != mine) {
634                                         verbose_msg("%#8x : glibc %#4x != %#4x mine %d\n", c, glibc, mine, d);
635                                         if (c < 0x30000UL) {
636                                                 verbose_msg("sc=%#x u=%#x n=%#x i0=%#x i1=%#x\n", sc, u, n, i0, i1);
637                                         }
638                                 }
639
640                                 upper = lower = u = c;
641                                 if (u <= RANGE) {
642                                         sc = u & ((1 << ultable.ti_shift) - 1);
643                                         u >>= ultable.ti_shift;
644                                         n = u & ((1 << ultable.ii_shift) - 1);
645                                         u >>= ultable.ii_shift;
646
647                                         i0 = ultable.ii[u];
648                                         i0 <<= ultable.ii_shift;
649                                         i1 = ultable.ti[i0 + n];
650                                         i1 <<= (ultable.ti_shift);
651                                         i1 += sc;
652                                         i0 = ultable.ut[i1];
653                                         upper = c + uldiff[i0].u;
654                                         lower = c + uldiff[i0].l;
655                                 }
656
657                                 if (towupper(c) != upper) {
658                                         verbose_msg("%#8x : towupper glibc %#4x != %#4x mine\n",
659                                                    c, towupper(c), upper);
660                                 }
661
662                                 if (towlower(c) != lower) {
663                                         verbose_msg("%#8x : towlower glibc %#4x != %#4x mine   i0 = %d\n",
664                                                    c, towlower(c), lower, i0);
665                                 }
666
667                                 if (totitle && ((tt = towctrans(c, totitle)) != upper)) {
668                                         verbose_msg("%#8x : totitle glibc %#4lx != %#4x mine   i0 = %d\n",
669                                                    c, tt, upper, i0);
670                                 }
671                         }
672
673                         if ((c & 0xfff) == 0xfff) verbose_msg(".");
674                 }
675                 verbose_msg("done\n");
676         }
677
678         if (built) {
679                 FILE *fp;
680
681                 if (!(fp = fopen("wctables.h", "w"))) {
682                         verbose_msg("cannot open output file 'wctables.h'!\n");
683                         return EXIT_FAILURE;
684                 }
685
686                 fprintf(fp, "#define __LOCALE_DATA_WC_TABLE_DOMAIN_MAX  %#8lx\n\n",
687                                 (unsigned long) RANGE);
688                 output_table(fp, "ctype", &cttable);
689                 output_table(fp, "uplow", &ultable);
690
691 #warning fix the upper bound on the upper/lower tables... save 200 bytes or so
692                 fprintf(fp, "#define __LOCALE_DATA_WCuplow_diffs  %7u\n", ul_count);
693                 fprintf(fp, "\n#ifdef WANT_WCuplow_diff_data\n\n");
694                 fprintf(fp, "\nstatic const short __LOCALE_DATA_WCuplow_diff_data[%zu] = {",
695                            2 * (size_t) ul_count);
696                 for (i = 0; i < ul_count; i++) {
697                         if (i % 4 == 0) {
698                                 fprintf(fp, "\n");
699                         }
700                         fprintf(fp, " %6d, %6d,", uldiff[i].u, uldiff[i].l);
701                 }
702                 fprintf(fp, "\n};\n\n");
703                 fprintf(fp, "#endif /* WANT_WCuplow_diff_data */\n\n");
704
705 /*              output_table(fp, "comb", &combtable); */
706 /*              output_table(fp, "width", &widthtable); */
707
708                 fclose(fp);
709         }
710
711         return !built;
712 }
713
714 size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl)
715 {
716         static int recurse;
717         unsigned char *ti[RANGE+1];     /* table index */
718         size_t numblocks;
719         size_t blocksize;
720         size_t uniq;
721         size_t i, j;
722         size_t smallest, t;
723         unsigned char *ii_save;
724         int uniqblock[256];
725         unsigned char uit[RANGE+1];
726         int shift2;
727
728         memset(uniqblock, 0x00, sizeof(uniqblock));
729
730         ii_save = NULL;
731         blocksize = 1 << shift;
732         numblocks = usize >> shift;
733
734         /* init table index */
735         for (i=j = 0; i < numblocks; i++) {
736                 ti[i] = ut + j;
737                 j += blocksize;
738         }
739
740         /* sort */
741         nu_val = blocksize;
742         qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);
743
744         uniq = 1;
745         uit[(ti[0]-ut)/blocksize] = 0;
746         for (i=1; i < numblocks; i++) {
747                 if (memcmp(ti[i-1], ti[i], blocksize) < 0) {
748                         if (++uniq > 255) {
749                                 break;
750                         }
751                         uniqblock[uniq - 1] = i;
752                 }
753 #if 1
754                 else if (memcmp(ti[i-1], ti[i], blocksize) > 0) {
755                         verbose_msg("bad sort %li!\n", (long) i);
756                         abort();
757                 }
758 #endif
759                 uit[(ti[i]-ut)/blocksize] = uniq - 1;
760         }
761
762         smallest = SIZE_MAX;
763         shift2 = -1;
764
765         if (uniq > 255)
766                 return SIZE_MAX;
767
768         smallest = numblocks + uniq * blocksize;
769         if (!recurse) {
770                 ++recurse;
771                 for (j=1; j < 14; j++) {
772                         if ((numblocks >> j) < 2) break;
773                         if (tbl) {
774                                 ii_save = tbl->ii;
775                                 tbl->ii = NULL;
776                         }
777                         if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
778                                 t += uniq * blocksize;
779                         }
780                         if (tbl) {
781                                 tbl->ii = ii_save;
782                         }
783                         if (smallest >= t) {
784                                 shift2 = j;
785                                 smallest = t;
786                                 if (!tbl->ii) {
787                                         verbose_msg("ishift %u  tshift %u  size %lu\n",
788                                                    shift2, shift, (unsigned long) t);
789                                 }
790 /*                      } else { */
791 /*                              break; */
792                         }
793                 }
794                 --recurse;
795         }
796
797         if (tbl->ii) {
798                 if (recurse) {
799                         tbl->ii_shift = shift;
800                         tbl->ii_len = numblocks;
801                         memcpy(tbl->ii, uit, numblocks);
802                         tbl->ti = tbl->ii + tbl->ii_len;
803                         tbl->ti_len = uniq * blocksize;
804                         for (i = 0; i < uniq; i++) {
805                                 memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize);
806                         }
807                 } else {
808                         ++recurse;
809                         verbose_msg("setting ishift %u  tshift %u\n",
810                                                            shift2, shift);
811                         newopt(uit, numblocks, shift2, tbl);
812                         --recurse;
813                         tbl->ti_shift = shift;
814                         tbl->ut_len = uniq * blocksize;
815                         tbl->ut = tbl->ti + tbl->ti_len;
816                         for (i = 0; i < uniq; i++) {
817                                 memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize);
818                         }
819                 }
820         }
821         return smallest;
822 }
823 /* vi: set sw=4 ts=4: */