OSDN Git Service

UCLIBC_CTYPE_HEADER define is removed, as it always
[uclinux-h8/uClibc.git] / extra / locale / gen_wctype.c
1 /*
2  * Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org>
3  *
4  * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
5  */
6 #define _GNU_SOURCE
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <locale.h>
11 #include <wctype.h>
12 #include <limits.h>
13 #include <stdint.h>
14 #include <wchar.h>
15 #include <ctype.h>
16
17 #include "include/bits/uClibc_charclass.h"
18
19 /*       0x9 : space  blank */
20 /*       0xa : space */
21 /*       0xb : space */
22 /*       0xc : space */
23 /*       0xd : space */
24 /*      0x20 : space  blank */
25 /*    0x1680 : space  blank */
26 /*    0x2000 : space  blank */
27 /*    0x2001 : space  blank */
28 /*    0x2002 : space  blank */
29 /*    0x2003 : space  blank */
30 /*    0x2004 : space  blank */
31 /*    0x2005 : space  blank */
32 /*    0x2006 : space  blank */
33 /*    0x2008 : space  blank */
34 /*    0x2009 : space  blank */
35 /*    0x200a : space  blank */
36 /*    0x200b : space  blank */
37 /*    0x2028 : space */
38 /*    0x2029 : space */
39 /*    0x3000 : space  blank */
40
41 /*  typecount[ 0] =    88670  C_alpha_nonupper_nonlower */
42 /*  typecount[ 1] =      742  C_alpha_lower */
43 /*  typecount[ 2] =        4  C_alpha_upper_lower */
44 /*  typecount[ 3] =      731  C_alpha_upper */
45 /*  typecount[ 4] =       10  C_digit */
46 /*  typecount[ 5] =    10270  C_punct */
47 /*  typecount[ 6] =        0  C_graph */
48 /*  typecount[ 7] =        0  C_print_space_nonblank */
49 /*  typecount[ 8] =       14  C_print_space_blank */
50 /*  typecount[ 9] =        0  C_space_nonblank_noncntrl */
51 /*  typecount[10] =        0  C_space_blank_noncntrl */
52 /*  typecount[11] =        6  C_cntrl_space_nonblank */
53 /*  typecount[12] =        1  C_cntrl_space_blank */
54 /*  typecount[13] =       60  C_cntrl_nonspace */
55 /*  typecount[14] =    96100  C_unclassified */
56 /*  typecount[15] =        0  empty_slot */
57
58
59 /* Set to #if 0 to restrict wchars to 16 bits. */
60 #if 1
61 #define RANGE 0x2ffffUL
62 #elif 0
63 #define RANGE 0x1ffffUL
64 #else
65 #define RANGE 0xffffUL                  /* Restrict for 16-bit wchar_t... */
66 #endif
67
68 /* Some macros that test for various (w)ctype classes when passed one of the
69  * designator values enumerated above. */
70 #define __CTYPE_isalnum(D)              ((unsigned int)(D-1) <= (__CTYPE_digit-1))
71 #define __CTYPE_isalpha(D)              ((unsigned int)(D-1) <= (__CTYPE_alpha_upper-1))
72 #define __CTYPE_isblank(D) \
73         ((((unsigned int)(D - __CTYPE_print_space_nonblank)) <= 5) && (D & 1))
74 #define __CTYPE_iscntrl(D)              (((unsigned int)(D - __CTYPE_cntrl_space_nonblank)) <= 2)
75 #define __CTYPE_isdigit(D)              (D == __CTYPE_digit)
76 #define __CTYPE_isgraph(D)              ((unsigned int)(D-1) <= (__CTYPE_graph-1))
77 #define __CTYPE_islower(D)              (((unsigned int)(D - __CTYPE_alpha_lower)) <= 1)
78 #define __CTYPE_isprint(D)              ((unsigned int)(D-1) <= (__CTYPE_print_space_blank-1))
79 #define __CTYPE_ispunct(D)              (D == __CTYPE_punct)
80 #define __CTYPE_isspace(D)              (((unsigned int)(D - __CTYPE_print_space_nonblank)) <= 5)
81 #define __CTYPE_isupper(D)              (((unsigned int)(D - __CTYPE_alpha_upper_lower)) <= 1)
82 /*  #define __CTYPE_isxdigit(D) -- isxdigit is untestable this way.
83  *  But that's ok as isxdigit() (and isdigit() too) are locale-invariant. */
84 #define __CTYPE_isxdigit(D,X) \
85         (__CTYPE_isdigit(D) || (((unsigned int)(((X)|0x20) - 'a')) <= 5))
86
87 #define mywalnum(x)             __CTYPE_isalnum(d)
88 #define mywalpha(x)             __CTYPE_isalpha(d)
89 #define mywblank(x)             __CTYPE_isblank(d)
90 #define mywcntrl(x)             __CTYPE_iscntrl(d)
91 #define mywdigit(x)             __CTYPE_isdigit(d)
92 #define mywgraph(x)             __CTYPE_isgraph(d)
93 #define mywlower(x)             __CTYPE_islower(d)
94 #define mywprint(x)             __CTYPE_isprint(d)
95 #define mywpunct(x)             __CTYPE_ispunct(d)
96 #define mywspace(x)             __CTYPE_isspace(d)
97 #define mywupper(x)             __CTYPE_isupper(d)
98 #define mywxdigit(x)    __CTYPE_isxdigit(d,x)
99
100 typedef struct {
101         short l;
102         short u;
103 } uldiff_entry;
104
105 typedef struct {
106         uint16_t ii_len;
107         uint16_t ti_len;
108         uint16_t ut_len;
109
110         unsigned char ii_shift;
111         unsigned char ti_shift;
112
113         unsigned char *ii;
114         unsigned char *ti;
115         unsigned char *ut;
116 } table_data;
117
118 static unsigned verbose;
119 #define verbose_msg(msg...) if (verbose) fprintf(stderr, msg)
120
121 void output_table(FILE *fp, const char *name, table_data *tbl)
122 {
123         size_t i;
124
125         fprintf(fp, "#define __LOCALE_DATA_WC%s_II_LEN    %7u\n", name, tbl->ii_len);
126         fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_LEN    %7u\n", name, tbl->ti_len);
127         fprintf(fp, "#define __LOCALE_DATA_WC%s_UT_LEN    %7u\n", name, tbl->ut_len);
128
129         fprintf(fp, "#define __LOCALE_DATA_WC%s_II_SHIFT  %7u\n", name, tbl->ii_shift);
130         fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_SHIFT  %7u\n", name, tbl->ti_shift);
131
132         fprintf(fp, "\n#ifdef WANT_WC%s_data\n", name);
133
134         i = tbl->ii_len + tbl->ti_len + tbl->ut_len;
135         fprintf(fp, "\nstatic const unsigned char __LOCALE_DATA_WC%s_data[%zu] = {", name, i);
136         for (i=0 ; i < tbl->ii_len ; i++) {
137                 if (i % 12 == 0) {
138                         fprintf(fp, "\n");
139                 }
140                 fprintf(fp, " %#04x,", tbl->ii[i]);
141         }
142         for (i=0 ; i < tbl->ti_len ; i++) {
143                 if (i % 12 == 0) {
144                         fprintf(fp, "\n");
145                 }
146                 fprintf(fp, " %#04x,", tbl->ti[i]);
147         }
148         for (i=0 ; i < tbl->ut_len ; i++) {
149                 if (i % 12 == 0) {
150                         fprintf(fp, "\n");
151                 }
152                 fprintf(fp, " %#04x,", tbl->ut[i]);
153         }
154         fprintf(fp, "\n};\n\n");
155
156         fprintf(fp, "#endif /* WANT_WC%s_data */\n\n", name);
157 }
158
159 static void dump_table_data(table_data *tbl)
160 {
161         verbose_msg("ii_shift = %d  ti_shift = %d\n"
162                    "ii_len = %d  ti_len = %d  ut_len = %d\n"
163                    "total = %d\n",
164                    tbl->ii_shift, tbl->ti_shift,
165                    tbl->ii_len, tbl->ti_len, tbl->ut_len,
166                    (int) tbl->ii_len + (int) tbl->ti_len + (int) tbl->ut_len);
167 }
168
169 /* For sorting the blocks of unsigned chars. */
170 static size_t nu_val;
171
172 int nu_memcmp(const void *a, const void *b)
173 {
174         return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val);
175 }
176
177 static size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl);
178
179 #define MAXTO           255                     /* Restrict to minimal unsigned char max. */
180
181 int main(int argc, char **argv)
182 {
183         long int u, l, tt;
184         size_t smallest, t;
185         unsigned int c;
186         unsigned int d;
187         int i, n;
188         int ul_count = 0;
189         uldiff_entry uldiff[MAXTO];
190         table_data cttable;
191         table_data ultable;
192 #if 0
193         table_data combtable;
194         table_data widthtable;
195         long int last_comb = 0;
196 #endif
197         unsigned char wct[(RANGE/2)+1]; /* wctype table (nibble per wchar) */
198         unsigned char ult[RANGE+1];     /* upper/lower table */
199         unsigned char combt[(RANGE/4)+1];       /* combining */
200         unsigned char widtht[(RANGE/4)+1];      /* width */
201         wctrans_t totitle;
202         wctype_t is_comb, is_comb3;
203
204         long int typecount[16];
205         const char *typename[16];
206         static const char empty_slot[] = "empty_slot";
207         int built = 0;
208
209 #define INIT_TYPENAME(X) typename[__CTYPE_##X] = "C_" #X
210
211         for (i=0 ; i < 16 ; i++) {
212                 typename[i] = empty_slot;
213         }
214
215         INIT_TYPENAME(unclassified);
216         INIT_TYPENAME(alpha_nonupper_nonlower);
217         INIT_TYPENAME(alpha_lower);
218         INIT_TYPENAME(alpha_upper_lower);
219         INIT_TYPENAME(alpha_upper);
220         INIT_TYPENAME(digit);
221         INIT_TYPENAME(punct);
222         INIT_TYPENAME(graph);
223         INIT_TYPENAME(print_space_nonblank);
224         INIT_TYPENAME(print_space_blank);
225         INIT_TYPENAME(space_nonblank_noncntrl);
226         INIT_TYPENAME(space_blank_noncntrl);
227         INIT_TYPENAME(cntrl_space_nonblank);
228         INIT_TYPENAME(cntrl_space_blank);
229         INIT_TYPENAME(cntrl_nonspace);
230
231         memset(&cttable, 0, sizeof(table_data));
232         memset(&ultable, 0, sizeof(table_data));
233 #if 0
234         memset(combtable, 0, sizeof table_data);
235         memset(widthtable, 0, sizeof table_data);
236 #endif
237         setvbuf(stdout, NULL, _IONBF, 0);
238
239         while (--argc) {
240                 ++argv;
241                 if (!strcmp(*argv, "-v")) {
242                         ++verbose;
243                         continue;
244                 } else if (!setlocale(LC_CTYPE, *argv)) {
245                         verbose_msg("setlocale(LC_CTYPE,%s) failed!  Skipping this locale...\n", *argv);
246                         continue;
247                 }
248
249                 if (!(totitle = wctrans("totitle"))) {
250                         verbose_msg("no totitle transformation.\n");
251                 }
252                 if (!(is_comb = wctype("combining"))) {
253                         verbose_msg("no combining wctype.\n");
254                 }
255                 if (!(is_comb3 = wctype("combining_level3"))) {
256                         verbose_msg("no combining_level3 wctype.\n");
257                 }
258
259                 if (!built) {
260                 built = 1;
261                 ul_count = 1;
262                 uldiff[0].u = uldiff[0].l = 0;
263
264                 memset(wct, 0, sizeof(wct));
265                 memset(combt, 0, sizeof(combt));
266                 memset(widtht, 0, sizeof(widtht));
267
268                 for (i = 0 ; i < 16 ; i++) {
269                         typecount[i] = 0;
270                 }
271
272                 for (c=0 ; c <= RANGE ; c++) {
273                         if (iswdigit(c)) {
274                                 d = __CTYPE_digit;
275                         } else if (iswalpha(c)) {
276                                 d = __CTYPE_alpha_nonupper_nonlower;
277                                 if (iswlower(c)) {
278                                         d = __CTYPE_alpha_lower;
279                                         if (iswupper(c)) {
280                                                 d = __CTYPE_alpha_upper_lower;
281                                         }
282                                 } else if (iswupper(c)) {
283                                         d = __CTYPE_alpha_upper;
284                                 }
285                         } else if (iswpunct(c)) {
286                                 d = __CTYPE_punct;
287                         } else if (iswgraph(c)) {
288                                 d = __CTYPE_graph;
289                         } else if (iswprint(c)) {
290                                 d = __CTYPE_print_space_nonblank;
291                                 if (iswblank(c)) {
292                                         d = __CTYPE_print_space_blank;
293                                 }
294                         } else if (iswspace(c) && !iswcntrl(c)) {
295                                 d = __CTYPE_space_nonblank_noncntrl;
296                                 if (iswblank(c)) {
297                                         d = __CTYPE_space_blank_noncntrl;
298                                 }
299                         } else if (iswcntrl(c)) {
300                                 d = __CTYPE_cntrl_nonspace;
301                                 if (iswspace(c)) {
302                                         d = __CTYPE_cntrl_space_nonblank;
303                                         if (iswblank(c)) {
304                                                 d = __CTYPE_cntrl_space_blank;
305                                         }
306                                 }
307                         } else {
308                                 d = __CTYPE_unclassified;
309                         }
310
311                         ++typecount[d];
312
313 #if 0
314                         if (iswspace(c)) {
315                                 if (iswblank(c)) {
316                                         verbose_msg("%#8x : space  blank\n", c);
317                                 } else {
318                                         verbose_msg("%#8x : space\n", c);
319                                 }
320                         }
321 #endif
322
323 #if 0
324                         if (c < 256) {
325                                 unsigned int glibc;
326
327                                 glibc = 0;
328                                 if (isalnum(c)) ++glibc; glibc <<= 1;
329                                 if (isalpha(c)) ++glibc; glibc <<= 1;
330                                 if (isblank(c)) ++glibc; glibc <<= 1;
331                                 if (iscntrl(c)) ++glibc; glibc <<= 1;
332                                 if (isdigit(c)) ++glibc; glibc <<= 1;
333                                 if (isgraph(c)) ++glibc; glibc <<= 1;
334                                 if (islower(c)) ++glibc; glibc <<= 1;
335                                 if (isprint(c)) ++glibc; glibc <<= 1;
336                                 if (ispunct(c)) ++glibc; glibc <<= 1;
337                                 if (isspace(c)) ++glibc; glibc <<= 1;
338                                 if (isupper(c)) ++glibc; glibc <<= 1;
339                                 if (isxdigit(c)) ++glibc;
340                                 verbose_msg("%#8x : ctype %#4x\n", c, glibc);
341                         }
342 #endif
343
344 #if 1
345                         /* Paranoid checking... */
346                         {
347                                 unsigned int glibc;
348                                 unsigned int mine;
349
350                                 glibc = 0;
351                                 if (iswalnum(c)) ++glibc; glibc <<= 1;
352                                 if (iswalpha(c)) ++glibc; glibc <<= 1;
353                                 if (iswblank(c)) ++glibc; glibc <<= 1;
354                                 if (iswcntrl(c)) ++glibc; glibc <<= 1;
355                                 if (iswdigit(c)) ++glibc; glibc <<= 1;
356                                 if (iswgraph(c)) ++glibc; glibc <<= 1;
357                                 if (iswlower(c)) ++glibc; glibc <<= 1;
358                                 if (iswprint(c)) ++glibc; glibc <<= 1;
359                                 if (iswpunct(c)) ++glibc; glibc <<= 1;
360                                 if (iswspace(c)) ++glibc; glibc <<= 1;
361                                 if (iswupper(c)) ++glibc; glibc <<= 1;
362                                 if (iswxdigit(c)) ++glibc;
363
364                                 mine = 0;
365                                 if (mywalnum(c)) ++mine; mine <<= 1;
366                                 if (mywalpha(c)) ++mine; mine <<= 1;
367                                 if (mywblank(c)) ++mine; mine <<= 1;
368                                 if (mywcntrl(c)) ++mine; mine <<= 1;
369                                 if (mywdigit(c)) ++mine; mine <<= 1;
370                                 if (mywgraph(c)) ++mine; mine <<= 1;
371                                 if (mywlower(c)) ++mine; mine <<= 1;
372                                 if (mywprint(c)) ++mine; mine <<= 1;
373                                 if (mywpunct(c)) ++mine; mine <<= 1;
374                                 if (mywspace(c)) ++mine; mine <<= 1;
375                                 if (mywupper(c)) ++mine; mine <<= 1;
376                                 if (mywxdigit(c)) ++mine;
377
378                                 if (glibc != mine) {
379                                         verbose_msg("%#8x : glibc %#4x != %#4x mine  %u\n", c, glibc, mine, d);
380                                         return EXIT_FAILURE;
381                                 }
382
383 #if 0
384                                 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
385 /*                                      if (!iswpunct(c)) { */
386                                                 verbose_msg("%#8x : %d %d %#4x\n",
387                                                            c, iswctype(c,is_comb),iswctype(c,is_comb3), glibc);
388 /*                                      } */
389                                 }
390 #endif
391 #if 0
392                                 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
393                                         if (!last_comb) {
394                                                 verbose_msg("%#8x - ", c);
395                                                 last_comb = c;
396                                         } else if (last_comb + 1 < c) {
397                                                 verbose_msg("%#8x\n%#8x - ", last_comb, c);
398                                                 last_comb = c;
399                                         } else {
400                                                 last_comb = c;
401                                         }
402                                 }
403 #endif
404                         }
405 #endif
406
407                         combt[c/4] |= ((((!!iswctype(c,is_comb)) << 1) | !!iswctype(c,is_comb3))
408                                                    << ((c & 3) << 1));
409 /*                      comb3t[c/8] |= ((!!iswctype(c,is_comb3)) << (c & 7)); */
410
411 /*                      widtht[c/4] |= (wcwidth(c) << ((c & 3) << 1)); */
412
413                         if (c & 1) {    /* Use the high nibble for odd numbered wchars. */
414                                 d <<= 4;
415                         }
416                         wct[c/2] |= d;
417
418                         l = (long)(int) towlower(c) - c;
419                         u = (long)(int) towupper(c) - c;
420                         ult[c] = 0;
421                         if (l || u) {
422                                 if ((l != (short)l) || (u != (short)u)) {
423                                         verbose_msg("range assumption error!  %x  %ld  %ld\n", c, l, u);
424                                         return EXIT_FAILURE;
425                                 }
426                                 for (i=0 ; i < ul_count ; i++) {
427                                         if ((l == uldiff[i].l) && (u == uldiff[i].u)) {
428                                                 goto found;
429                                         }
430                                 }
431                                 uldiff[ul_count].l = l;
432                                 uldiff[ul_count].u = u;
433                                 ++ul_count;
434                                 if (ul_count > MAXTO) {
435                                         verbose_msg("too many touppers/tolowers!\n");
436                                         return EXIT_FAILURE;
437                                 }
438                         found:
439                                 ult[c] = i;
440                         }
441                 }
442
443                 for (i = 0 ; i < 16 ; i++) {
444                         verbose_msg("typecount[%2d] = %8ld  %s\n", i, typecount[i], typename[i]);
445                 }
446
447                 verbose_msg("optimizing is* table..\n");
448                 n = -1;
449                 smallest = SIZE_MAX;
450                 cttable.ii = NULL;
451                 for (i=0 ; i < 14 ; i++) {
452                         t = newopt(wct, (RANGE/2)+1, i, &cttable);
453                         if (smallest >= t) {
454                                 n = i;
455                                 smallest = t;
456 /*                      } else { */
457 /*                              break; */
458                         }
459                 }
460                 verbose_msg("smallest = %zu\n", smallest);
461                 if (!(cttable.ii = malloc(smallest))) {
462                         verbose_msg("couldn't allocate space!\n");
463                         return EXIT_FAILURE;
464                 }
465                 smallest = SIZE_MAX;
466                 newopt(wct, (RANGE/2)+1, n, &cttable);
467                 ++cttable.ti_shift;             /* correct for nibble mode */
468
469
470
471                 verbose_msg("optimizing u/l-to table..\n");
472                 smallest = SIZE_MAX;
473                 ultable.ii = NULL;
474                 for (i=0 ; i < 14 ; i++) {
475                         t = newopt(ult, RANGE+1, i, &ultable);
476                         if (smallest >= t) {
477                                 n = i;
478                                 smallest = t;
479 /*                      } else { */
480 /*                              break; */
481                         }
482                 }
483                 verbose_msg("%zu (smallest) + %zu (u/l diffs) = %zu\n",
484                            smallest, 4 * ul_count, smallest + 4 * ul_count);
485                 verbose_msg("smallest = %zu\n", smallest);
486                 if (!(ultable.ii = malloc(smallest))) {
487                         verbose_msg("couldn't allocate space!\n");
488                         return EXIT_FAILURE;
489                 }
490                 smallest = SIZE_MAX;
491                 newopt(ult, RANGE+1, n, &ultable);
492
493
494 #if 0
495                 verbose_msg("optimizing comb table..\n");
496                 smallest = SIZE_MAX;
497                 combtable.ii = NULL;
498                 for (i=0 ; i < 14 ; i++) {
499                         t = newopt(combt, sizeof(combt), i, &combtable);
500                         if (smallest >= t) {
501                                 n = i;
502                                 smallest = t;
503 /*                      } else { */
504 /*                              break; */
505                         }
506                 }
507                 verbose_msg("smallest = %zu\n", smallest);
508                 if (!(combtable.ii = malloc(smallest))) {
509                         verbose_msg("couldn't allocate space!\n");
510                         return EXIT_FAILURE;
511                 }
512                 smallest = SIZE_MAX;
513                 newopt(combt, sizeof(combt), n, &combtable);
514                 combtable.ti_shift += 4; /* correct for 4 entries per */
515 #endif
516
517
518 #if 0
519                 verbose_msg("optimizing width table..\n");
520                 smallest = SIZE_MAX;
521                 widthtable.ii = NULL;
522                 for (i=0 ; i < 14 ; i++) {
523                         t = newopt(widtht, sizeof(widtht), i, &widthtable);
524                         if (smallest >= t) {
525                                 n = i;
526                                 smallest = t;
527 /*                      } else { */
528 /*                              break; */
529                         }
530                 }
531                 verbose_msg("smallest = %zu\n", smallest);
532                 if (!(widthtable.ii = malloc(smallest))) {
533                         verbose_msg("couldn't allocate space!\n");
534                         return EXIT_FAILURE;
535                 }
536                 smallest = SIZE_MAX;
537                 newopt(widtht, sizeof(widtht), n, &widthtable);
538                 widthtable.ti_shift += 4; /* correct for 4 entries per */
539 #endif
540
541 #if 0
542                 verbose_msg("optimizing comb3 table..\n");
543                 smallest = SIZE_MAX;
544                 comb3table.ii = NULL;
545                 for (i=0 ; i < 14 ; i++) {
546                         t = newopt(comb3t, sizeof(comb3t), i, &comb3table);
547                         if (smallest >= t) {
548                                 n = i;
549                                 smallest = t;
550 /*                      } else { */
551 /*                              break; */
552                         }
553                 }
554                 verbose_msg("smallest = %zu\n", smallest);
555                 if (!(comb3table.ii = malloc(smallest))) {
556                         verbose_msg("couldn't allocate space!\n");
557                         return EXIT_FAILURE;
558                 }
559                 smallest = SIZE_MAX;
560                 newopt(comb3t, sizeof(comb3t), n, &comb3table);
561                 comb3table.ti_shift += 8; /* correct for 4 entries per */
562 #endif
563
564                 dump_table_data(&cttable);
565                 dump_table_data(&ultable);
566 #if 0
567                 dump_table_data(&combtable);
568 #endif
569                 }
570
571                 verbose_msg("verifying for %s...\n", *argv);
572 #if RANGE == 0xffffU
573                 for (c=0 ; c <= 0xffffUL ; c++)
574 #else
575                 for (c=0 ; c <= 0x10ffffUL ; c++)
576 #endif
577                         {
578                         unsigned int glibc;
579                         unsigned int mine;
580                         unsigned int upper, lower;
581
582 #if 0
583 #if RANGE < 0x10000UL
584                         if (c == 0x10000UL) {
585                                 c = 0x30000UL;  /* skip 1st and 2nd sup planes */
586                         }
587 #elif RANGE < 0x20000UL
588                         if (c == 0x20000UL) {
589                                 c = 0x30000UL;  /* skip 2nd sup planes */
590                         }
591 #endif
592 #endif
593
594                         glibc = 0;
595                         if (iswalnum(c)) ++glibc; glibc <<= 1;
596                         if (iswalpha(c)) ++glibc; glibc <<= 1;
597                         if (iswblank(c)) ++glibc; glibc <<= 1;
598                         if (iswcntrl(c)) ++glibc; glibc <<= 1;
599                         if (iswdigit(c)) ++glibc; glibc <<= 1;
600                         if (iswgraph(c)) ++glibc; glibc <<= 1;
601                         if (iswlower(c)) ++glibc; glibc <<= 1;
602                         if (iswprint(c)) ++glibc; glibc <<= 1;
603                         if (iswpunct(c)) ++glibc; glibc <<= 1;
604                         if (iswspace(c)) ++glibc; glibc <<= 1;
605                         if (iswupper(c)) ++glibc; glibc <<= 1;
606                         if (iswxdigit(c)) ++glibc;
607
608                         {
609                                 unsigned int u;
610                                 int n, sc;
611                                 int i0, i1;
612
613                                 u = c;
614                                 if (u <= RANGE) {
615                                         sc = u & ((1 << cttable.ti_shift) - 1);
616                                         u >>= cttable.ti_shift;
617                                         n = u & ((1 << cttable.ii_shift) - 1);
618                                         u >>= cttable.ii_shift;
619
620                                         i0 = cttable.ii[u];
621                                         i0 <<= cttable.ii_shift;
622                                         i1 = cttable.ti[i0 + n];
623                                         i1 <<= (cttable.ti_shift-1);
624                                         d = cttable.ut[i1 + (sc >> 1)];
625
626                                         if (sc & 1) {
627                                                 d >>= 4;
628                                         }
629                                         d &= 0x0f;
630                                 } else if ((((unsigned int)(c - 0xe0020UL)) <= 0x5f) || (c == 0xe0001UL)){
631                                         d = __CTYPE_punct;
632                                 } else if (((unsigned int)(c - 0xf0000UL)) < 0x20000UL) {
633                                         if ((c & 0xffffU) <= 0xfffdU) {
634                                                 d = __CTYPE_punct;
635                                         } else {
636                                                 d = __CTYPE_unclassified;
637                                         }
638                                 } else {
639                                         d = __CTYPE_unclassified;
640                                 }
641
642                         mine = 0;
643                         if (mywalnum(c)) ++mine; mine <<= 1;
644                         if (mywalpha(c)) ++mine; mine <<= 1;
645                         if (mywblank(c)) ++mine; mine <<= 1;
646                         if (mywcntrl(c)) ++mine; mine <<= 1;
647                         if (mywdigit(c)) ++mine; mine <<= 1;
648                         if (mywgraph(c)) ++mine; mine <<= 1;
649                         if (mywlower(c)) ++mine; mine <<= 1;
650                         if (mywprint(c)) ++mine; mine <<= 1;
651                         if (mywpunct(c)) ++mine; mine <<= 1;
652                         if (mywspace(c)) ++mine; mine <<= 1;
653                         if (mywupper(c)) ++mine; mine <<= 1;
654                         if (mywxdigit(c)) ++mine;
655
656                         if (glibc != mine) {
657                                 verbose_msg("%#8x : glibc %#4x != %#4x mine %d\n", c, glibc, mine, d);
658                                 if (c < 0x30000UL) {
659                                         verbose_msg("sc=%#x u=%#x n=%#x i0=%#x i1=%#x\n", sc, u, n, i0, i1);
660                                 }
661                         }
662                                 upper = lower = u = c;
663                                 if (u <= RANGE) {
664                                         sc = u & ((1 << ultable.ti_shift) - 1);
665                                         u >>= ultable.ti_shift;
666                                         n = u & ((1 << ultable.ii_shift) - 1);
667                                         u >>= ultable.ii_shift;
668
669                                         i0 = ultable.ii[u];
670                                         i0 <<= ultable.ii_shift;
671                                         i1 = ultable.ti[i0 + n];
672                                         i1 <<= (ultable.ti_shift);
673                                         i1 += sc;
674                                         i0 = ultable.ut[i1];
675                                         upper = c + uldiff[i0].u;
676                                         lower = c + uldiff[i0].l;
677                                 }
678
679                         if (towupper(c) != upper) {
680                                 verbose_msg("%#8x : towupper glibc %#4x != %#4x mine\n",
681                                            c, towupper(c), upper);
682                         }
683
684                         if (towlower(c) != lower) {
685                                 verbose_msg("%#8x : towlower glibc %#4x != %#4x mine   i0 = %d\n",
686                                            c, towlower(c), lower, i0);
687                         }
688
689                         if (totitle && ((tt = towctrans(c, totitle)) != upper)) {
690                                 verbose_msg("%#8x : totitle glibc %#4lx != %#4x mine   i0 = %d\n",
691                                            c, tt, upper, i0);
692                         }
693                         }
694
695
696                         if ((c & 0xfff) == 0xfff) verbose_msg(".");
697                 }
698                 verbose_msg("done\n");
699         }
700
701         if (built) {
702                 FILE *fp;
703
704                 if (!(fp = fopen("wctables.h", "w"))) {
705                         verbose_msg("cannot open output file 'wctables.h'!\n");
706                         return EXIT_FAILURE;
707                 }
708
709                 fprintf(fp, "#define __LOCALE_DATA_WC_TABLE_DOMAIN_MAX  %#8lx\n\n",
710                                 (unsigned long) RANGE);
711                 output_table(fp, "ctype", &cttable);
712                 output_table(fp, "uplow", &ultable);
713
714
715 #warning fix the upper bound on the upper/lower tables... save 200 bytes or so
716                 fprintf(fp, "#define __LOCALE_DATA_WCuplow_diffs  %7u\n", ul_count);
717                 fprintf(fp, "\n#ifdef WANT_WCuplow_diff_data\n\n");
718                 fprintf(fp, "\nstatic const short __LOCALE_DATA_WCuplow_diff_data[%zu] = {",
719                            2 * (size_t) ul_count);
720                 for (i=0 ; i < ul_count ; i++) {
721                         if (i % 4 == 0) {
722                                 fprintf(fp, "\n");
723                         }
724                         fprintf(fp, " %6d, %6d,", uldiff[i].u, uldiff[i].l);
725                 }
726                 fprintf(fp, "\n};\n\n");
727                 fprintf(fp, "#endif /* WANT_WCuplow_diff_data */\n\n");
728
729
730 /*              output_table(fp, "comb", &combtable); */
731 /*              output_table(fp, "width", &widthtable); */
732
733                 fclose(fp);
734         }
735
736         return !built;
737 }
738
739 size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl)
740 {
741         static int recurse;
742         unsigned char *ti[RANGE+1];     /* table index */
743         size_t numblocks;
744         size_t blocksize;
745         size_t uniq;
746         size_t i, j;
747         size_t smallest, t;
748         unsigned char *ii_save;
749         int uniqblock[256];
750         unsigned char uit[RANGE+1];
751         int shift2;
752
753         memset(uniqblock, 0x00, sizeof(uniqblock));
754
755         ii_save = NULL;
756         blocksize = 1 << shift;
757         numblocks = usize >> shift;
758
759         /* init table index */
760         for (i=j=0 ; i < numblocks ; i++) {
761                 ti[i] = ut + j;
762                 j += blocksize;
763         }
764
765         /* sort */
766         nu_val = blocksize;
767         qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);
768
769         uniq = 1;
770         uit[(ti[0]-ut)/blocksize] = 0;
771         for (i=1 ; i < numblocks ; i++) {
772                 if (memcmp(ti[i-1], ti[i], blocksize) < 0) {
773                         if (++uniq > 255) {
774                                 break;
775                         }
776                         uniqblock[uniq - 1] = i;
777                 }
778 #if 1
779                 else if (memcmp(ti[i-1], ti[i], blocksize) > 0) {
780                         verbose_msg("bad sort %i!\n", i);
781                         abort();
782                 }
783 #endif
784                 uit[(ti[i]-ut)/blocksize] = uniq - 1;
785         }
786
787         smallest = SIZE_MAX;
788         shift2 = -1;
789         if (uniq <= 255) {
790                 smallest = numblocks + uniq * blocksize;
791                 if (!recurse) {
792                         ++recurse;
793                         for (j=1 ; j < 14 ; j++) {
794                                 if ((numblocks >> j) < 2) break;
795                                 if (tbl) {
796                                         ii_save = tbl->ii;
797                                         tbl->ii = NULL;
798                                 }
799                                 if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
800                                         t += uniq * blocksize;
801                                 }
802                                 if (tbl) {
803                                         tbl->ii = ii_save;
804                                 }
805                                 if (smallest >= t) {
806                                         shift2 = j;
807                                         smallest = t;
808                                         if (!tbl->ii) {
809                                                 verbose_msg("ishift %zu  tshift %zu  size %zu\n",
810                                                            shift2, shift, t);
811                                         }
812 /*                              } else { */
813 /*                                      break; */
814                                 }
815                         }
816                         --recurse;
817                 }
818         } else {
819                 return SIZE_MAX;
820         }
821
822         if (tbl->ii) {
823                 if (recurse) {
824                         tbl->ii_shift = shift;
825                         tbl->ii_len = numblocks;
826                         memcpy(tbl->ii, uit, numblocks);
827                         tbl->ti = tbl->ii + tbl->ii_len;
828                         tbl->ti_len = uniq * blocksize;
829                         for (i=0 ; i < uniq ; i++) {
830                                 memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize);
831                         }
832                 } else {
833                         ++recurse;
834                         verbose_msg("setting ishift %zu  tshift %zu\n",
835                                                            shift2, shift);
836                         newopt(uit, numblocks, shift2, tbl);
837                         --recurse;
838                         tbl->ti_shift = shift;
839                         tbl->ut_len = uniq * blocksize;
840                         tbl->ut = tbl->ti + tbl->ti_len;
841                         for (i=0 ; i < uniq ; i++) {
842                                 memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize);
843                         }
844                 }
845         }
846         return smallest;
847 }
848 /* vi: set sw=4 ts=4: */