OSDN Git Service

- trim any trailing whitespace
[uclinux-h8/uClibc.git] / extra / locale / gen_wctype.c
1 /*
2  * Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org>
3  *
4  * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
5  */
6 #define _GNU_SOURCE
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <locale.h>
11 #include <wctype.h>
12 #include <limits.h>
13 #include <stdint.h>
14 #include <wchar.h>
15 #include <ctype.h>
16
17 #ifndef _CTYPE_H
18 #define _CTYPE_H
19 #endif
20 #ifndef _WCTYPE_H
21 #define _WCTYPE_H
22 #endif
23 #include UCLIBC_CTYPE_HEADER
24
25 /*       0x9 : space  blank */
26 /*       0xa : space */
27 /*       0xb : space */
28 /*       0xc : space */
29 /*       0xd : space */
30 /*      0x20 : space  blank */
31 /*    0x1680 : space  blank */
32 /*    0x2000 : space  blank */
33 /*    0x2001 : space  blank */
34 /*    0x2002 : space  blank */
35 /*    0x2003 : space  blank */
36 /*    0x2004 : space  blank */
37 /*    0x2005 : space  blank */
38 /*    0x2006 : space  blank */
39 /*    0x2008 : space  blank */
40 /*    0x2009 : space  blank */
41 /*    0x200a : space  blank */
42 /*    0x200b : space  blank */
43 /*    0x2028 : space */
44 /*    0x2029 : space */
45 /*    0x3000 : space  blank */
46
47 /*  typecount[ 0] =    88670  C_alpha_nonupper_nonlower */
48 /*  typecount[ 1] =      742  C_alpha_lower */
49 /*  typecount[ 2] =        4  C_alpha_upper_lower */
50 /*  typecount[ 3] =      731  C_alpha_upper */
51 /*  typecount[ 4] =       10  C_digit */
52 /*  typecount[ 5] =    10270  C_punct */
53 /*  typecount[ 6] =        0  C_graph */
54 /*  typecount[ 7] =        0  C_print_space_nonblank */
55 /*  typecount[ 8] =       14  C_print_space_blank */
56 /*  typecount[ 9] =        0  C_space_nonblank_noncntrl */
57 /*  typecount[10] =        0  C_space_blank_noncntrl */
58 /*  typecount[11] =        6  C_cntrl_space_nonblank */
59 /*  typecount[12] =        1  C_cntrl_space_blank */
60 /*  typecount[13] =       60  C_cntrl_nonspace */
61 /*  typecount[14] =    96100  C_unclassified */
62 /*  typecount[15] =        0  empty_slot */
63
64
65
66 /* Set to #if 0 to restrict wchars to 16 bits. */
67 #if 1
68 #define RANGE 0x2ffffUL
69 #elif 0
70 #define RANGE 0x1ffffUL
71 #else
72 #define RANGE 0xffffUL                  /* Restrict for 16-bit wchar_t... */
73 #endif
74
75 #if 0
76 /* Classification codes. */
77
78 static const char *typename[] = {
79         "C_unclassified",
80         "C_alpha_nonupper_nonlower",
81         "C_alpha_lower",
82         "C_alpha_upper_lower",
83         "C_alpha_upper",
84         "C_digit",
85         "C_punct",
86         "C_graph",
87         "C_print_space_nonblank",
88         "C_print_space_blank",
89         "C_space_nonblank_noncntrl",
90         "C_space_blank_noncntrl",
91         "C_cntrl_space_nonblank",
92         "C_cntrl_space_blank",
93         "C_cntrl_nonspace",
94         "empty_slot"
95 };
96 #endif
97
98 #if 0
99 /* Taking advantage of the C99 mutual-exclusion guarantees for the various
100  * (w)ctype classes, including the descriptions of printing and control
101  * (w)chars, we can place each in one of the following mutually-exlusive
102  * subsets.  Since there are less than 16, we can store the data for
103  * each (w)chars in a nibble. In contrast, glibc uses an unsigned int
104  * per (w)char, with one bit flag for each is* type.  While this allows
105  * a simple '&' operation to determine the type vs. a range test and a
106  * little special handling for the "blank" and "xdigit" types in my
107  * approach, it also uses 8 times the space for the tables on the typical
108  * 32-bit archs we supported.*/
109 enum {
110         __CTYPE_unclassified = 0,
111         __CTYPE_alpha_nonupper_nonlower,
112         __CTYPE_alpha_lower,
113         __CTYPE_alpha_upper_lower,
114         __CTYPE_alpha_upper,
115         __CTYPE_digit,
116         __CTYPE_punct,
117         __CTYPE_graph,
118         __CTYPE_print_space_nonblank,
119         __CTYPE_print_space_blank,
120         __CTYPE_space_nonblank_noncntrl,
121         __CTYPE_space_blank_noncntrl,
122         __CTYPE_cntrl_space_nonblank,
123         __CTYPE_cntrl_space_blank,
124         __CTYPE_cntrl_nonspace,
125 };
126 #endif
127
128 #define __CTYPE_isxdigit(D,X) \
129         (__CTYPE_isdigit(D) || (((unsigned int)(((X)|0x20) - 'a')) <= 5))
130
131 #define mywalnum(x)             __CTYPE_isalnum(d)
132 #define mywalpha(x)             __CTYPE_isalpha(d)
133 #define mywblank(x)     __CTYPE_isblank(d)
134 #define mywcntrl(x)             __CTYPE_iscntrl(d)
135 #define mywdigit(x)             __CTYPE_isdigit(d)
136 #define mywgraph(x)             __CTYPE_isgraph(d)
137 #define mywlower(x)             __CTYPE_islower(d)
138 #define mywprint(x)             __CTYPE_isprint(d)
139 #define mywpunct(x)             __CTYPE_ispunct(d)
140 #define mywspace(x)             __CTYPE_isspace(d)
141 #define mywupper(x)             __CTYPE_isupper(d)
142 #define mywxdigit(x)    __CTYPE_isxdigit(d,x)
143
144 typedef struct {
145         short l;
146         short u;
147 } uldiff_entry;
148
149 typedef struct {
150         uint16_t ii_len;
151         uint16_t ti_len;
152         uint16_t ut_len;
153
154         unsigned char ii_shift;
155         unsigned char ti_shift;
156
157         unsigned char *ii;
158         unsigned char *ti;
159         unsigned char *ut;
160 } table_data;
161
162
163 void output_table(FILE *fp, const char *name, table_data *tbl)
164 {
165         size_t i;
166
167         fprintf(fp, "#define __LOCALE_DATA_WC%s_II_LEN    %7u\n", name, tbl->ii_len);
168         fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_LEN    %7u\n", name, tbl->ti_len);
169         fprintf(fp, "#define __LOCALE_DATA_WC%s_UT_LEN    %7u\n", name, tbl->ut_len);
170
171         fprintf(fp, "#define __LOCALE_DATA_WC%s_II_SHIFT  %7u\n", name, tbl->ii_shift);
172         fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_SHIFT  %7u\n", name, tbl->ti_shift);
173
174         fprintf(fp, "\n#ifdef WANT_WC%s_data\n", name);
175
176         i = tbl->ii_len + tbl->ti_len + tbl->ut_len;
177         fprintf(fp, "\nstatic const unsigned char __LOCALE_DATA_WC%s_data[%zu] = {", name, i);
178         for (i=0 ; i < tbl->ii_len ; i++) {
179                 if (i % 12 == 0) {
180                         fprintf(fp, "\n");
181                 }
182                 fprintf(fp, " %#04x,", tbl->ii[i]);
183         }
184         for (i=0 ; i < tbl->ti_len ; i++) {
185                 if (i % 12 == 0) {
186                         fprintf(fp, "\n");
187                 }
188                 fprintf(fp, " %#04x,", tbl->ti[i]);
189         }
190         for (i=0 ; i < tbl->ut_len ; i++) {
191                 if (i % 12 == 0) {
192                         fprintf(fp, "\n");
193                 }
194                 fprintf(fp, " %#04x,", tbl->ut[i]);
195         }
196         fprintf(fp, "\n};\n\n");
197
198         fprintf(fp, "#endif /* WANT_WC%s_data */\n\n", name);
199 }
200
201 static void dump_table_data(table_data *tbl)
202 {
203         printf("ii_shift = %d  ti_shift = %d\n"
204                    "ii_len = %d  ti_len = %d  ut_len = %d\n"
205                    "total = %d\n",
206                    tbl->ii_shift, tbl->ti_shift,
207                    tbl->ii_len, tbl->ti_len, tbl->ut_len,
208                    (int) tbl->ii_len + (int) tbl->ti_len + (int) tbl->ut_len);
209 }
210
211 /* For sorting the blocks of unsigned chars. */
212 static size_t nu_val;
213
214 int nu_memcmp(const void *a, const void *b)
215 {
216         return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val);
217 }
218
219 static size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl);
220
221 #define MAXTO           255                     /* Restrict to minimal unsigned char max. */
222
223 int main(int argc, char **argv)
224 {
225         long int u, l, tt;
226         size_t smallest, t;
227         unsigned int c;
228         unsigned int d;
229         int i, n;
230         int ul_count = 0;
231         uldiff_entry uldiff[MAXTO];
232         table_data cttable;
233         table_data ultable;
234         table_data combtable;
235         table_data widthtable;
236         long int last_comb = 0;
237
238         unsigned char wct[(RANGE/2)+1]; /* wctype table (nibble per wchar) */
239         unsigned char ult[RANGE+1];     /* upper/lower table */
240         unsigned char combt[(RANGE/4)+1];       /* combining */
241         unsigned char widtht[(RANGE/4)+1];      /* width */
242         wctrans_t totitle;
243         wctype_t is_comb, is_comb3;
244
245         long int typecount[16];
246         const char *typename[16];
247         static const char empty_slot[] = "empty_slot";
248         int built = 0;
249
250 #define INIT_TYPENAME(X) typename[__CTYPE_##X] = "C_" #X
251
252         for (i=0 ; i < 16 ; i++) {
253                 typename[i] = empty_slot;
254         }
255
256         INIT_TYPENAME(unclassified);
257         INIT_TYPENAME(alpha_nonupper_nonlower);
258         INIT_TYPENAME(alpha_lower);
259         INIT_TYPENAME(alpha_upper_lower);
260         INIT_TYPENAME(alpha_upper);
261         INIT_TYPENAME(digit);
262         INIT_TYPENAME(punct);
263         INIT_TYPENAME(graph);
264         INIT_TYPENAME(print_space_nonblank);
265         INIT_TYPENAME(print_space_blank);
266         INIT_TYPENAME(space_nonblank_noncntrl);
267         INIT_TYPENAME(space_blank_noncntrl);
268         INIT_TYPENAME(cntrl_space_nonblank);
269         INIT_TYPENAME(cntrl_space_blank);
270         INIT_TYPENAME(cntrl_nonspace);
271
272         setvbuf(stdout, NULL, _IONBF, 0);
273
274         while (--argc) {
275                 if (!setlocale(LC_CTYPE, *++argv)) {
276                         printf("setlocale(LC_CTYPE,%s) failed!  Skipping this locale...\n", *argv);
277                         continue;
278                 }
279
280                 if (!(totitle = wctrans("totitle"))) {
281                         printf("no totitle transformation.\n");
282                 }
283                 if (!(is_comb = wctype("combining"))) {
284                         printf("no combining wctype.\n");
285                 }
286                 if (!(is_comb3 = wctype("combining_level3"))) {
287                         printf("no combining_level3 wctype.\n");
288                 }
289
290                 if (!built) {
291                 built = 1;
292                 ul_count = 1;
293                 uldiff[0].u = uldiff[0].l = 0;
294
295                 memset(wct, 0, sizeof(wct));
296                 memset(combt, 0, sizeof(combt));
297                 memset(widtht, 0, sizeof(widtht));
298
299                 for (i = 0 ; i < 16 ; i++) {
300                         typecount[i] = 0;
301                 }
302
303                 for (c=0 ; c <= RANGE ; c++) {
304                         if (iswdigit(c)) {
305                                 d = __CTYPE_digit;
306                         } else if (iswalpha(c)) {
307                                 d = __CTYPE_alpha_nonupper_nonlower;
308                                 if (iswlower(c)) {
309                                         d = __CTYPE_alpha_lower;
310                                         if (iswupper(c)) {
311                                                 d = __CTYPE_alpha_upper_lower;
312                                         }
313                                 } else if (iswupper(c)) {
314                                         d = __CTYPE_alpha_upper;
315                                 }
316                         } else if (iswpunct(c)) {
317                                 d = __CTYPE_punct;
318                         } else if (iswgraph(c)) {
319                                 d = __CTYPE_graph;
320                         } else if (iswprint(c)) {
321                                 d = __CTYPE_print_space_nonblank;
322                                 if (iswblank(c)) {
323                                         d = __CTYPE_print_space_blank;
324                                 }
325                         } else if (iswspace(c) && !iswcntrl(c)) {
326                                 d = __CTYPE_space_nonblank_noncntrl;
327                                 if (iswblank(c)) {
328                                         d = __CTYPE_space_blank_noncntrl;
329                                 }
330                         } else if (iswcntrl(c)) {
331                                 d = __CTYPE_cntrl_nonspace;
332                                 if (iswspace(c)) {
333                                         d = __CTYPE_cntrl_space_nonblank;
334                                         if (iswblank(c)) {
335                                                 d = __CTYPE_cntrl_space_blank;
336                                         }
337                                 }
338                         } else {
339                                 d = __CTYPE_unclassified;
340                         }
341
342                         ++typecount[d];
343
344 #if 0
345                         if (iswspace(c)) {
346                                 if (iswblank(c)) {
347                                         printf("%#8x : space  blank\n", c);
348                                 } else {
349                                         printf("%#8x : space\n", c);
350                                 }
351                         }
352 #endif
353
354 #if 0
355                         if (c < 256) {
356                                 unsigned int glibc;
357
358                                 glibc = 0;
359                                 if (isalnum(c)) ++glibc; glibc <<= 1;
360                                 if (isalpha(c)) ++glibc; glibc <<= 1;
361                                 if (isblank(c)) ++glibc; glibc <<= 1;
362                                 if (iscntrl(c)) ++glibc; glibc <<= 1;
363                                 if (isdigit(c)) ++glibc; glibc <<= 1;
364                                 if (isgraph(c)) ++glibc; glibc <<= 1;
365                                 if (islower(c)) ++glibc; glibc <<= 1;
366                                 if (isprint(c)) ++glibc; glibc <<= 1;
367                                 if (ispunct(c)) ++glibc; glibc <<= 1;
368                                 if (isspace(c)) ++glibc; glibc <<= 1;
369                                 if (isupper(c)) ++glibc; glibc <<= 1;
370                                 if (isxdigit(c)) ++glibc;
371                                 printf("%#8x : ctype %#4x\n", c, glibc);
372                         }
373 #endif
374
375 #if 1
376                         /* Paranoid checking... */
377                         {
378                                 unsigned int glibc;
379                                 unsigned int mine;
380
381                                 glibc = 0;
382                                 if (iswalnum(c)) ++glibc; glibc <<= 1;
383                                 if (iswalpha(c)) ++glibc; glibc <<= 1;
384                                 if (iswblank(c)) ++glibc; glibc <<= 1;
385                                 if (iswcntrl(c)) ++glibc; glibc <<= 1;
386                                 if (iswdigit(c)) ++glibc; glibc <<= 1;
387                                 if (iswgraph(c)) ++glibc; glibc <<= 1;
388                                 if (iswlower(c)) ++glibc; glibc <<= 1;
389                                 if (iswprint(c)) ++glibc; glibc <<= 1;
390                                 if (iswpunct(c)) ++glibc; glibc <<= 1;
391                                 if (iswspace(c)) ++glibc; glibc <<= 1;
392                                 if (iswupper(c)) ++glibc; glibc <<= 1;
393                                 if (iswxdigit(c)) ++glibc;
394
395                                 mine = 0;
396                                 if (mywalnum(c)) ++mine; mine <<= 1;
397                                 if (mywalpha(c)) ++mine; mine <<= 1;
398                                 if (mywblank(c)) ++mine; mine <<= 1;
399                                 if (mywcntrl(c)) ++mine; mine <<= 1;
400                                 if (mywdigit(c)) ++mine; mine <<= 1;
401                                 if (mywgraph(c)) ++mine; mine <<= 1;
402                                 if (mywlower(c)) ++mine; mine <<= 1;
403                                 if (mywprint(c)) ++mine; mine <<= 1;
404                                 if (mywpunct(c)) ++mine; mine <<= 1;
405                                 if (mywspace(c)) ++mine; mine <<= 1;
406                                 if (mywupper(c)) ++mine; mine <<= 1;
407                                 if (mywxdigit(c)) ++mine;
408
409                                 if (glibc != mine) {
410                                         printf("%#8x : glibc %#4x != %#4x mine  %u\n", c, glibc, mine, d);
411                                         return EXIT_FAILURE;
412                                 }
413
414 #if 0
415                                 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
416 /*                                      if (!iswpunct(c)) { */
417                                                 printf("%#8x : %d %d %#4x\n",
418                                                            c, iswctype(c,is_comb),iswctype(c,is_comb3), glibc);
419 /*                                      } */
420                                 }
421 #endif
422 #if 0
423                                 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
424                                         if (!last_comb) {
425                                                 printf("%#8x - ", c);
426                                                 last_comb = c;
427                                         } else if (last_comb + 1 < c) {
428                                                 printf("%#8x\n%#8x - ", last_comb, c);
429                                                 last_comb = c;
430                                         } else {
431                                                 last_comb = c;
432                                         }
433                                 }
434 #endif
435                         }
436 #endif
437
438                         combt[c/4] |= ((((!!iswctype(c,is_comb)) << 1) | !!iswctype(c,is_comb3))
439                                                    << ((c & 3) << 1));
440 /*                      comb3t[c/8] |= ((!!iswctype(c,is_comb3)) << (c & 7)); */
441
442 /*                      widtht[c/4] |= (wcwidth(c) << ((c & 3) << 1)); */
443
444                         if (c & 1) {    /* Use the high nibble for odd numbered wchars. */
445                                 d <<= 4;
446                         }
447                         wct[c/2] |= d;
448
449                         l = (long)(int) towlower(c) - c;
450                         u = (long)(int) towupper(c) - c;
451                         ult[c] = 0;
452                         if (l || u) {
453                                 if ((l != (short)l) || (u != (short)u)) {
454                                         printf("range assumption error!  %x  %ld  %ld\n", c, l, u);
455                                         return EXIT_FAILURE;
456                                 }
457                                 for (i=0 ; i < ul_count ; i++) {
458                                         if ((l == uldiff[i].l) && (u == uldiff[i].u)) {
459                                                 goto found;
460                                         }
461                                 }
462                                 uldiff[ul_count].l = l;
463                                 uldiff[ul_count].u = u;
464                                 ++ul_count;
465                                 if (ul_count > MAXTO) {
466                                         printf("too many touppers/tolowers!\n");
467                                         return EXIT_FAILURE;
468                                 }
469                         found:
470                                 ult[c] = i;
471                         }
472                 }
473
474                 for (i = 0 ; i < 16 ; i++) {
475                         printf("typecount[%2d] = %8ld  %s\n", i, typecount[i], typename[i]);
476                 }
477
478                 printf("optimizing is* table..\n");
479                 n = -1;
480                 smallest = SIZE_MAX;
481                 cttable.ii = NULL;
482                 for (i=0 ; i < 14 ; i++) {
483                         t = newopt(wct, (RANGE/2)+1, i, &cttable);
484                         if (smallest >= t) {
485                                 n = i;
486                                 smallest = t;
487 /*                      } else { */
488 /*                              break; */
489                         }
490                 }
491                 printf("smallest = %zu\n", smallest);
492                 if (!(cttable.ii = malloc(smallest))) {
493                         printf("couldn't allocate space!\n");
494                         return EXIT_FAILURE;
495                 }
496                 smallest = SIZE_MAX;
497                 newopt(wct, (RANGE/2)+1, n, &cttable);
498                 ++cttable.ti_shift;             /* correct for nibble mode */
499
500
501
502                 printf("optimizing u/l-to table..\n");
503                 smallest = SIZE_MAX;
504                 ultable.ii = NULL;
505                 for (i=0 ; i < 14 ; i++) {
506                         t = newopt(ult, RANGE+1, i, &ultable);
507                         if (smallest >= t) {
508                                 n = i;
509                                 smallest = t;
510 /*                      } else { */
511 /*                              break; */
512                         }
513                 }
514                 printf("%zu (smallest) + %zu (u/l diffs) = %zu\n",
515                            smallest, 4 * ul_count, smallest + 4 * ul_count);
516                 printf("smallest = %zu\n", smallest);
517                 if (!(ultable.ii = malloc(smallest))) {
518                         printf("couldn't allocate space!\n");
519                         return EXIT_FAILURE;
520                 }
521                 smallest = SIZE_MAX;
522                 newopt(ult, RANGE+1, n, &ultable);
523
524
525 #if 0
526                 printf("optimizing comb table..\n");
527                 smallest = SIZE_MAX;
528                 combtable.ii = NULL;
529                 for (i=0 ; i < 14 ; i++) {
530                         t = newopt(combt, sizeof(combt), i, &combtable);
531                         if (smallest >= t) {
532                                 n = i;
533                                 smallest = t;
534 /*                      } else { */
535 /*                              break; */
536                         }
537                 }
538                 printf("smallest = %zu\n", smallest);
539                 if (!(combtable.ii = malloc(smallest))) {
540                         printf("couldn't allocate space!\n");
541                         return EXIT_FAILURE;
542                 }
543                 smallest = SIZE_MAX;
544                 newopt(combt, sizeof(combt), n, &combtable);
545                 combtable.ti_shift += 4; /* correct for 4 entries per */
546 #endif
547
548
549 #if 0
550                 printf("optimizing width table..\n");
551                 smallest = SIZE_MAX;
552                 widthtable.ii = NULL;
553                 for (i=0 ; i < 14 ; i++) {
554                         t = newopt(widtht, sizeof(widtht), i, &widthtable);
555                         if (smallest >= t) {
556                                 n = i;
557                                 smallest = t;
558 /*                      } else { */
559 /*                              break; */
560                         }
561                 }
562                 printf("smallest = %zu\n", smallest);
563                 if (!(widthtable.ii = malloc(smallest))) {
564                         printf("couldn't allocate space!\n");
565                         return EXIT_FAILURE;
566                 }
567                 smallest = SIZE_MAX;
568                 newopt(widtht, sizeof(widtht), n, &widthtable);
569                 widthtable.ti_shift += 4; /* correct for 4 entries per */
570 #endif
571
572 #if 0
573                 printf("optimizing comb3 table..\n");
574                 smallest = SIZE_MAX;
575                 comb3table.ii = NULL;
576                 for (i=0 ; i < 14 ; i++) {
577                         t = newopt(comb3t, sizeof(comb3t), i, &comb3table);
578                         if (smallest >= t) {
579                                 n = i;
580                                 smallest = t;
581 /*                      } else { */
582 /*                              break; */
583                         }
584                 }
585                 printf("smallest = %zu\n", smallest);
586                 if (!(comb3table.ii = malloc(smallest))) {
587                         printf("couldn't allocate space!\n");
588                         return EXIT_FAILURE;
589                 }
590                 smallest = SIZE_MAX;
591                 newopt(comb3t, sizeof(comb3t), n, &comb3table);
592                 comb3table.ti_shift += 8; /* correct for 4 entries per */
593 #endif
594
595                 dump_table_data(&cttable);
596                 dump_table_data(&ultable);
597                 dump_table_data(&combtable);
598                 }
599
600                 printf("verifying for %s...\n", *argv);
601 #if RANGE == 0xffffU
602                 for (c=0 ; c <= 0xffffUL ; c++)
603 #else
604                 for (c=0 ; c <= 0x10ffffUL ; c++)
605 #endif
606                         {
607                         unsigned int glibc;
608                         unsigned int mine;
609                         unsigned int upper, lower;
610
611 #if 0
612 #if RANGE < 0x10000UL
613                         if (c == 0x10000UL) {
614                                 c = 0x30000UL;  /* skip 1st and 2nd sup planes */
615                         }
616 #elif RANGE < 0x20000UL
617                         if (c == 0x20000UL) {
618                                 c = 0x30000UL;  /* skip 2nd sup planes */
619                         }
620 #endif
621 #endif
622
623                         glibc = 0;
624                         if (iswalnum(c)) ++glibc; glibc <<= 1;
625                         if (iswalpha(c)) ++glibc; glibc <<= 1;
626                         if (iswblank(c)) ++glibc; glibc <<= 1;
627                         if (iswcntrl(c)) ++glibc; glibc <<= 1;
628                         if (iswdigit(c)) ++glibc; glibc <<= 1;
629                         if (iswgraph(c)) ++glibc; glibc <<= 1;
630                         if (iswlower(c)) ++glibc; glibc <<= 1;
631                         if (iswprint(c)) ++glibc; glibc <<= 1;
632                         if (iswpunct(c)) ++glibc; glibc <<= 1;
633                         if (iswspace(c)) ++glibc; glibc <<= 1;
634                         if (iswupper(c)) ++glibc; glibc <<= 1;
635                         if (iswxdigit(c)) ++glibc;
636
637                         {
638                                 unsigned int u;
639                                 int n, sc;
640                                 int i0, i1;
641
642                                 u = c;
643                                 if (u <= RANGE) {
644                                         sc = u & ((1 << cttable.ti_shift) - 1);
645                                         u >>= cttable.ti_shift;
646                                         n = u & ((1 << cttable.ii_shift) - 1);
647                                         u >>= cttable.ii_shift;
648
649                                         i0 = cttable.ii[u];
650                                         i0 <<= cttable.ii_shift;
651                                         i1 = cttable.ti[i0 + n];
652                                         i1 <<= (cttable.ti_shift-1);
653                                         d = cttable.ut[i1 + (sc >> 1)];
654
655                                         if (sc & 1) {
656                                                 d >>= 4;
657                                         }
658                                         d &= 0x0f;
659                                 } else if ((((unsigned int)(c - 0xe0020UL)) <= 0x5f) || (c == 0xe0001UL)){
660                                         d = __CTYPE_punct;
661                                 } else if (((unsigned int)(c - 0xf0000UL)) < 0x20000UL) {
662                                         if ((c & 0xffffU) <= 0xfffdU) {
663                                                 d = __CTYPE_punct;
664                                         } else {
665                                                 d = __CTYPE_unclassified;
666                                         }
667                                 } else {
668                                         d = __CTYPE_unclassified;
669                                 }
670
671                         mine = 0;
672                         if (mywalnum(c)) ++mine; mine <<= 1;
673                         if (mywalpha(c)) ++mine; mine <<= 1;
674                         if (mywblank(c)) ++mine; mine <<= 1;
675                         if (mywcntrl(c)) ++mine; mine <<= 1;
676                         if (mywdigit(c)) ++mine; mine <<= 1;
677                         if (mywgraph(c)) ++mine; mine <<= 1;
678                         if (mywlower(c)) ++mine; mine <<= 1;
679                         if (mywprint(c)) ++mine; mine <<= 1;
680                         if (mywpunct(c)) ++mine; mine <<= 1;
681                         if (mywspace(c)) ++mine; mine <<= 1;
682                         if (mywupper(c)) ++mine; mine <<= 1;
683                         if (mywxdigit(c)) ++mine;
684
685                         if (glibc != mine) {
686                                 printf("%#8x : glibc %#4x != %#4x mine %d\n", c, glibc, mine, d);
687                                 if (c < 0x30000UL) {
688                                         printf("sc=%#x u=%#x n=%#x i0=%#x i1=%#x\n", sc, u, n, i0, i1);
689                                 }
690                         }
691                                 upper = lower = u = c;
692                                 if (u <= RANGE) {
693                                         sc = u & ((1 << ultable.ti_shift) - 1);
694                                         u >>= ultable.ti_shift;
695                                         n = u & ((1 << ultable.ii_shift) - 1);
696                                         u >>= ultable.ii_shift;
697
698                                         i0 = ultable.ii[u];
699                                         i0 <<= ultable.ii_shift;
700                                         i1 = ultable.ti[i0 + n];
701                                         i1 <<= (ultable.ti_shift);
702                                         i1 += sc;
703                                         i0 = ultable.ut[i1];
704                                         upper = c + uldiff[i0].u;
705                                         lower = c + uldiff[i0].l;
706                                 }
707
708                         if (towupper(c) != upper) {
709                                 printf("%#8x : towupper glibc %#4x != %#4x mine\n",
710                                            c, towupper(c), upper);
711                         }
712
713                         if (towlower(c) != lower) {
714                                 printf("%#8x : towlower glibc %#4x != %#4x mine   i0 = %d\n",
715                                            c, towlower(c), lower, i0);
716                         }
717
718                         if (totitle && ((tt = towctrans(c, totitle)) != upper)) {
719                                 printf("%#8x : totitle glibc %#4lx != %#4x mine   i0 = %d\n",
720                                            c, tt, upper, i0);
721                         }
722                         }
723
724
725                         if ((c & 0xfff) == 0xfff) printf(".");
726                 }
727                 printf("done\n");
728         }
729
730         if (1) {
731                 FILE *fp;
732
733                 if (!(fp = fopen("wctables.h", "w"))) {
734                         printf("couldn't open wctables.h!\n");
735                         return EXIT_FAILURE;
736                 }
737
738                 fprintf(fp, "#define __LOCALE_DATA_WC_TABLE_DOMAIN_MAX  %#8lx\n\n",
739                                 (unsigned long) RANGE);
740                 output_table(fp, "ctype", &cttable);
741                 output_table(fp, "uplow", &ultable);
742
743
744 #warning fix the upper bound on the upper/lower tables... save 200 bytes or so
745                 fprintf(fp, "#define __LOCALE_DATA_WCuplow_diffs  %7u\n", ul_count);
746                 fprintf(fp, "\n#ifdef WANT_WCuplow_diff_data\n\n");
747                 fprintf(fp, "\nstatic const short __LOCALE_DATA_WCuplow_diff_data[%zu] = {",
748                            2 * (size_t) ul_count);
749                 for (i=0 ; i < ul_count ; i++) {
750                         if (i % 4 == 0) {
751                                 fprintf(fp, "\n");
752                         }
753                         fprintf(fp, " %6d, %6d,", uldiff[i].u, uldiff[i].l);
754                 }
755                 fprintf(fp, "\n};\n\n");
756                 fprintf(fp, "#endif /* WANT_WCuplow_diff_data */\n\n");
757
758
759 /*              output_table(fp, "comb", &combtable); */
760 /*              output_table(fp, "width", &widthtable); */
761
762                 fclose(fp);
763         }
764
765         return EXIT_SUCCESS;
766 }
767
768 size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl)
769 {
770         static int recurse;
771         unsigned char *ti[RANGE+1];     /* table index */
772         size_t numblocks;
773         size_t blocksize;
774         size_t uniq;
775         size_t i, j;
776         size_t smallest, t;
777         unsigned char *ii_save;
778         int uniqblock[256];
779         unsigned char uit[RANGE+1];
780         int shift2;
781
782         memset(uniqblock, 0x00, sizeof(uniqblock));
783
784         ii_save = NULL;
785         blocksize = 1 << shift;
786         numblocks = usize >> shift;
787
788         /* init table index */
789         for (i=j=0 ; i < numblocks ; i++) {
790                 ti[i] = ut + j;
791                 j += blocksize;
792         }
793
794         /* sort */
795         nu_val = blocksize;
796         qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);
797
798         uniq = 1;
799         uit[(ti[0]-ut)/blocksize] = 0;
800         for (i=1 ; i < numblocks ; i++) {
801                 if (memcmp(ti[i-1], ti[i], blocksize) < 0) {
802                         if (++uniq > 255) {
803                                 break;
804                         }
805                         uniqblock[uniq - 1] = i;
806                 }
807 #if 1
808                 else if (memcmp(ti[i-1], ti[i], blocksize) > 0) {
809                         printf("bad sort %i!\n", i);
810                         abort();
811                 }
812 #endif
813                 uit[(ti[i]-ut)/blocksize] = uniq - 1;
814         }
815
816         smallest = SIZE_MAX;
817         shift2 = -1;
818         if (uniq <= 255) {
819                 smallest = numblocks + uniq * blocksize;
820                 if (!recurse) {
821                         ++recurse;
822                         for (j=1 ; j < 14 ; j++) {
823                                 if ((numblocks >> j) < 2) break;
824                                 if (tbl) {
825                                         ii_save = tbl->ii;
826                                         tbl->ii = NULL;
827                                 }
828                                 if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
829                                         t += uniq * blocksize;
830                                 }
831                                 if (tbl) {
832                                         tbl->ii = ii_save;
833                                 }
834                                 if (smallest >= t) {
835                                         shift2 = j;
836                                         smallest = t;
837                                         if (!tbl->ii) {
838                                                 printf("ishift %zu  tshift %zu  size %zu\n",
839                                                            shift2, shift, t);
840                                         }
841 /*                              } else { */
842 /*                                      break; */
843                                 }
844                         }
845                         --recurse;
846                 }
847         } else {
848                 return SIZE_MAX;
849         }
850
851         if (tbl->ii) {
852                 if (recurse) {
853                         tbl->ii_shift = shift;
854                         tbl->ii_len = numblocks;
855                         memcpy(tbl->ii, uit, numblocks);
856                         tbl->ti = tbl->ii + tbl->ii_len;
857                         tbl->ti_len = uniq * blocksize;
858                         for (i=0 ; i < uniq ; i++) {
859                                 memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize);
860                         }
861                 } else {
862                         ++recurse;
863                         printf("setting ishift %zu  tshift %zu\n",
864                                                            shift2, shift);
865                         newopt(uit, numblocks, shift2, tbl);
866                         --recurse;
867                         tbl->ti_shift = shift;
868                         tbl->ut_len = uniq * blocksize;
869                         tbl->ut = tbl->ti + tbl->ti_len;
870                         for (i=0 ; i < uniq ; i++) {
871                                 memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize);
872                         }
873                 }
874         }
875         return smallest;
876 }