OSDN Git Service

- avoid segfaulting when rlimit stack is set to low for gen_wctype by calling setrlim...
[uclinux-h8/uClibc.git] / extra / locale / gen_wctype.c
1
2 #define _GNU_SOURCE
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <locale.h>
7 #include <wctype.h>
8 #include <limits.h>
9 #include <stdint.h>
10 #include <wchar.h>
11 #include <ctype.h>
12
13 #ifdef __linux__
14 #include <sys/resource.h>
15 #endif
16
17 #ifndef _CTYPE_H
18 #define _CTYPE_H
19 #endif
20 #ifndef _WCTYPE_H
21 #define _WCTYPE_H
22 #endif
23 #include "../../libc/sysdeps/linux/common/bits/uClibc_ctype.h"
24
25 /*       0x9 : space  blank */
26 /*       0xa : space */
27 /*       0xb : space */
28 /*       0xc : space */
29 /*       0xd : space */
30 /*      0x20 : space  blank */
31 /*    0x1680 : space  blank */
32 /*    0x2000 : space  blank */
33 /*    0x2001 : space  blank */
34 /*    0x2002 : space  blank */
35 /*    0x2003 : space  blank */
36 /*    0x2004 : space  blank */
37 /*    0x2005 : space  blank */
38 /*    0x2006 : space  blank */
39 /*    0x2008 : space  blank */
40 /*    0x2009 : space  blank */
41 /*    0x200a : space  blank */
42 /*    0x200b : space  blank */
43 /*    0x2028 : space */
44 /*    0x2029 : space */
45 /*    0x3000 : space  blank */
46
47 /*  typecount[ 0] =    88670  C_alpha_nonupper_nonlower */
48 /*  typecount[ 1] =      742  C_alpha_lower */
49 /*  typecount[ 2] =        4  C_alpha_upper_lower */
50 /*  typecount[ 3] =      731  C_alpha_upper */
51 /*  typecount[ 4] =       10  C_digit */
52 /*  typecount[ 5] =    10270  C_punct */
53 /*  typecount[ 6] =        0  C_graph */
54 /*  typecount[ 7] =        0  C_print_space_nonblank */
55 /*  typecount[ 8] =       14  C_print_space_blank */
56 /*  typecount[ 9] =        0  C_space_nonblank_noncntrl */
57 /*  typecount[10] =        0  C_space_blank_noncntrl */
58 /*  typecount[11] =        6  C_cntrl_space_nonblank */
59 /*  typecount[12] =        1  C_cntrl_space_blank */
60 /*  typecount[13] =       60  C_cntrl_nonspace */
61 /*  typecount[14] =    96100  C_unclassified */
62 /*  typecount[15] =        0  empty_slot */
63
64
65
66 /* Set to #if 0 to restrict wchars to 16 bits. */
67 #if 1
68 #define RANGE 0x2ffffUL
69 #elif 0
70 #define RANGE 0x1ffffUL
71 #else
72 #define RANGE 0xffffUL                  /* Restrict for 16-bit wchar_t... */
73 #endif
74
75 #if 0
76 /* Classification codes. */
77
78 static const char *typename[] = {
79         "C_unclassified",
80         "C_alpha_nonupper_nonlower",
81         "C_alpha_lower",
82         "C_alpha_upper_lower",
83         "C_alpha_upper",
84         "C_digit",
85         "C_punct",
86         "C_graph",
87         "C_print_space_nonblank",
88         "C_print_space_blank",
89         "C_space_nonblank_noncntrl",
90         "C_space_blank_noncntrl",
91         "C_cntrl_space_nonblank",
92         "C_cntrl_space_blank",
93         "C_cntrl_nonspace",
94         "empty_slot"
95 };
96 #endif
97
98 #if 0
99 /* Taking advantage of the C99 mutual-exclusion guarantees for the various
100  * (w)ctype classes, including the descriptions of printing and control
101  * (w)chars, we can place each in one of the following mutually-exlusive
102  * subsets.  Since there are less than 16, we can store the data for
103  * each (w)chars in a nibble. In contrast, glibc uses an unsigned int
104  * per (w)char, with one bit flag for each is* type.  While this allows
105  * a simple '&' operation to determine the type vs. a range test and a
106  * little special handling for the "blank" and "xdigit" types in my
107  * approach, it also uses 8 times the space for the tables on the typical
108  * 32-bit archs we supported.*/
109 enum {
110         __CTYPE_unclassified = 0,
111         __CTYPE_alpha_nonupper_nonlower,
112         __CTYPE_alpha_lower,
113         __CTYPE_alpha_upper_lower,
114         __CTYPE_alpha_upper,
115         __CTYPE_digit,
116         __CTYPE_punct,
117         __CTYPE_graph,
118         __CTYPE_print_space_nonblank,
119         __CTYPE_print_space_blank,
120         __CTYPE_space_nonblank_noncntrl,
121         __CTYPE_space_blank_noncntrl,
122         __CTYPE_cntrl_space_nonblank,
123         __CTYPE_cntrl_space_blank,
124         __CTYPE_cntrl_nonspace,
125 };
126 #endif
127
128 #define __CTYPE_isxdigit(D,X) \
129         (__CTYPE_isdigit(D) || (((unsigned int)(((X)|0x20) - 'a')) <= 5))
130
131 #define mywalnum(x)             __CTYPE_isalnum(d)
132 #define mywalpha(x)             __CTYPE_isalpha(d)
133 #define mywblank(x)     __CTYPE_isblank(d)
134 #define mywcntrl(x)             __CTYPE_iscntrl(d)
135 #define mywdigit(x)             __CTYPE_isdigit(d)
136 #define mywgraph(x)             __CTYPE_isgraph(d)
137 #define mywlower(x)             __CTYPE_islower(d)
138 #define mywprint(x)             __CTYPE_isprint(d)
139 #define mywpunct(x)             __CTYPE_ispunct(d)
140 #define mywspace(x)             __CTYPE_isspace(d)
141 #define mywupper(x)             __CTYPE_isupper(d)
142 #define mywxdigit(x)    __CTYPE_isxdigit(d,x)
143
144 typedef struct {
145         short l;
146         short u;
147 } uldiff_entry;
148
149 typedef struct {
150         uint16_t ii_len;
151         uint16_t ti_len;
152         uint16_t ut_len;
153
154         unsigned char ii_shift;
155         unsigned char ti_shift;
156
157         unsigned char *ii;
158         unsigned char *ti;
159         unsigned char *ut;
160 } table_data;
161
162
163 void output_table(FILE *fp, const char *name, table_data *tbl)
164 {
165         size_t i;
166
167         fprintf(fp, "#define __LOCALE_DATA_WC%s_II_LEN    %7u\n", name, tbl->ii_len);
168         fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_LEN    %7u\n", name, tbl->ti_len);
169         fprintf(fp, "#define __LOCALE_DATA_WC%s_UT_LEN    %7u\n", name, tbl->ut_len);
170
171         fprintf(fp, "#define __LOCALE_DATA_WC%s_II_SHIFT  %7u\n", name, tbl->ii_shift);
172         fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_SHIFT  %7u\n", name, tbl->ti_shift);
173
174         fprintf(fp, "\n#ifdef WANT_WC%s_data\n", name);
175
176         i = tbl->ii_len + tbl->ti_len + tbl->ut_len;
177         fprintf(fp, "\nstatic const unsigned char __LOCALE_DATA_WC%s_data[%zu] = {", name, i);
178         for (i=0 ; i < tbl->ii_len ; i++) {
179                 if (i % 12 == 0) {
180                         fprintf(fp, "\n");
181                 }
182                 fprintf(fp, " %#04x,", tbl->ii[i]);
183         }
184         for (i=0 ; i < tbl->ti_len ; i++) {
185                 if (i % 12 == 0) {
186                         fprintf(fp, "\n");
187                 }
188                 fprintf(fp, " %#04x,", tbl->ti[i]);
189         }
190         for (i=0 ; i < tbl->ut_len ; i++) {
191                 if (i % 12 == 0) {
192                         fprintf(fp, "\n");
193                 }
194                 fprintf(fp, " %#04x,", tbl->ut[i]);
195         }
196         fprintf(fp, "\n};\n\n");
197
198         fprintf(fp, "#endif /* WANT_WC%s_data */\n\n", name);
199 }
200
201 static void dump_table_data(table_data *tbl)
202 {
203         printf("ii_shift = %d  ti_shift = %d\n"
204                    "ii_len = %d  ti_len = %d  ut_len = %d\n"
205                    "total = %d\n",
206                    tbl->ii_shift, tbl->ti_shift,
207                    tbl->ii_len, tbl->ti_len, tbl->ut_len,
208                    (int) tbl->ii_len + (int) tbl->ti_len + (int) tbl->ut_len);
209 }
210
211 /* For sorting the blocks of unsigned chars. */
212 static size_t nu_val;
213
214 int nu_memcmp(const void *a, const void *b)
215 {
216         return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val);
217 }
218
219 static size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl);
220
221 #define MAXTO           255                     /* Restrict to minimal unsigned char max. */
222
223 int main(int argc, char **argv)
224 {
225         long int u, l, tt;
226         size_t smallest, t;
227         unsigned int c;
228         unsigned int d;
229         int i, n;
230         int ul_count = 0;
231         uldiff_entry uldiff[MAXTO];
232         table_data cttable;
233         table_data ultable;
234         table_data combtable;
235         table_data widthtable;
236         long int last_comb = 0;
237
238         unsigned char wct[(RANGE/2)+1]; /* wctype table (nibble per wchar) */
239         unsigned char ult[RANGE+1];     /* upper/lower table */
240         unsigned char combt[(RANGE/4)+1];       /* combining */
241         unsigned char widtht[(RANGE/4)+1];      /* width */
242         wctrans_t totitle;
243         wctype_t is_comb, is_comb3;
244
245         long int typecount[16];
246         const char *typename[16];
247         static const char empty_slot[] = "empty_slot";
248         int built = 0;
249
250 #ifdef __linux__
251         struct rlimit limit;
252
253         limit.rlim_max = RLIM_INFINITY;
254         limit.rlim_cur = RLIM_INFINITY;
255         setrlimit(RLIMIT_STACK, &limit);
256 #endif
257
258 #define INIT_TYPENAME(X) typename[__CTYPE_##X] = "C_" #X
259
260         for (i=0 ; i < 16 ; i++) {
261                 typename[i] = empty_slot;
262         }
263
264         INIT_TYPENAME(unclassified);
265         INIT_TYPENAME(alpha_nonupper_nonlower);
266         INIT_TYPENAME(alpha_lower);
267         INIT_TYPENAME(alpha_upper_lower);
268         INIT_TYPENAME(alpha_upper);
269         INIT_TYPENAME(digit);
270         INIT_TYPENAME(punct);
271         INIT_TYPENAME(graph);
272         INIT_TYPENAME(print_space_nonblank);
273         INIT_TYPENAME(print_space_blank);
274         INIT_TYPENAME(space_nonblank_noncntrl);
275         INIT_TYPENAME(space_blank_noncntrl);
276         INIT_TYPENAME(cntrl_space_nonblank);
277         INIT_TYPENAME(cntrl_space_blank);
278         INIT_TYPENAME(cntrl_nonspace);
279
280         setvbuf(stdout, NULL, _IONBF, 0);
281
282         while (--argc) {
283                 if (!setlocale(LC_CTYPE, *++argv)) {
284                         printf("setlocale(LC_CTYPE,%s) failed!\n", *argv);
285                         continue;
286                 }
287
288                 if (!(totitle = wctrans("totitle"))) {
289                         printf("no totitle transformation.\n");
290                 }
291                 if (!(is_comb = wctype("combining"))) {
292                         printf("no combining wctype.\n");
293                 }
294                 if (!(is_comb3 = wctype("combining_level3"))) {
295                         printf("no combining_level3 wctype.\n");
296                 }
297
298                 if (!built) {
299                 built = 1;
300                 ul_count = 1;
301                 uldiff[0].u = uldiff[0].l = 0;
302
303                 memset(wct, 0, sizeof(wct));
304                 memset(combt, 0, sizeof(combt));
305                 memset(widtht, 0, sizeof(widtht));
306
307                 for (i = 0 ; i < 16 ; i++) {
308                         typecount[i] = 0;
309                 }
310
311                 for (c=0 ; c <= RANGE ; c++) {
312                         if (iswdigit(c)) {
313                                 d = __CTYPE_digit;
314                         } else if (iswalpha(c)) {
315                                 d = __CTYPE_alpha_nonupper_nonlower;
316                                 if (iswlower(c)) {
317                                         d = __CTYPE_alpha_lower;
318                                         if (iswupper(c)) {
319                                                 d = __CTYPE_alpha_upper_lower;
320                                         }
321                                 } else if (iswupper(c)) {
322                                         d = __CTYPE_alpha_upper;
323                                 }
324                         } else if (iswpunct(c)) {
325                                 d = __CTYPE_punct;
326                         } else if (iswgraph(c)) {
327                                 d = __CTYPE_graph;
328                         } else if (iswprint(c)) {
329                                 d = __CTYPE_print_space_nonblank;
330                                 if (iswblank(c)) {
331                                         d = __CTYPE_print_space_blank;
332                                 }
333                         } else if (iswspace(c) && !iswcntrl(c)) {
334                                 d = __CTYPE_space_nonblank_noncntrl;
335                                 if (iswblank(c)) {
336                                         d = __CTYPE_space_blank_noncntrl;
337                                 }
338                         } else if (iswcntrl(c)) {
339                                 d = __CTYPE_cntrl_nonspace;
340                                 if (iswspace(c)) {
341                                         d = __CTYPE_cntrl_space_nonblank;
342                                         if (iswblank(c)) {
343                                                 d = __CTYPE_cntrl_space_blank;
344                                         }
345                                 }
346                         } else {
347                                 d = __CTYPE_unclassified;
348                         }
349
350                         ++typecount[d];
351
352 #if 0
353                         if (iswspace(c)) {
354                                 if (iswblank(c)) {
355                                         printf("%#8x : space  blank\n", c);
356                                 } else {
357                                         printf("%#8x : space\n", c);
358                                 }
359                         }
360 #endif
361
362 #if 0
363                         if (c < 256) {
364                                 unsigned int glibc;
365
366                                 glibc = 0;
367                                 if (isalnum(c)) ++glibc; glibc <<= 1;
368                                 if (isalpha(c)) ++glibc; glibc <<= 1;
369                                 if (isblank(c)) ++glibc; glibc <<= 1;
370                                 if (iscntrl(c)) ++glibc; glibc <<= 1;
371                                 if (isdigit(c)) ++glibc; glibc <<= 1;
372                                 if (isgraph(c)) ++glibc; glibc <<= 1;
373                                 if (islower(c)) ++glibc; glibc <<= 1;
374                                 if (isprint(c)) ++glibc; glibc <<= 1;
375                                 if (ispunct(c)) ++glibc; glibc <<= 1;
376                                 if (isspace(c)) ++glibc; glibc <<= 1;
377                                 if (isupper(c)) ++glibc; glibc <<= 1;
378                                 if (isxdigit(c)) ++glibc;
379                                 printf("%#8x : ctype %#4x\n", c, glibc);
380                         }
381 #endif
382
383 #if 1
384                         /* Paranoid checking... */
385                         {
386                                 unsigned int glibc;
387                                 unsigned int mine;
388
389                                 glibc = 0;
390                                 if (iswalnum(c)) ++glibc; glibc <<= 1;
391                                 if (iswalpha(c)) ++glibc; glibc <<= 1;
392                                 if (iswblank(c)) ++glibc; glibc <<= 1;
393                                 if (iswcntrl(c)) ++glibc; glibc <<= 1;
394                                 if (iswdigit(c)) ++glibc; glibc <<= 1;
395                                 if (iswgraph(c)) ++glibc; glibc <<= 1;
396                                 if (iswlower(c)) ++glibc; glibc <<= 1;
397                                 if (iswprint(c)) ++glibc; glibc <<= 1;
398                                 if (iswpunct(c)) ++glibc; glibc <<= 1;
399                                 if (iswspace(c)) ++glibc; glibc <<= 1;
400                                 if (iswupper(c)) ++glibc; glibc <<= 1;
401                                 if (iswxdigit(c)) ++glibc;
402
403                                 mine = 0;
404                                 if (mywalnum(c)) ++mine; mine <<= 1;
405                                 if (mywalpha(c)) ++mine; mine <<= 1;
406                                 if (mywblank(c)) ++mine; mine <<= 1;
407                                 if (mywcntrl(c)) ++mine; mine <<= 1;
408                                 if (mywdigit(c)) ++mine; mine <<= 1;
409                                 if (mywgraph(c)) ++mine; mine <<= 1;
410                                 if (mywlower(c)) ++mine; mine <<= 1;
411                                 if (mywprint(c)) ++mine; mine <<= 1;
412                                 if (mywpunct(c)) ++mine; mine <<= 1;
413                                 if (mywspace(c)) ++mine; mine <<= 1;
414                                 if (mywupper(c)) ++mine; mine <<= 1;
415                                 if (mywxdigit(c)) ++mine;
416
417                                 if (glibc != mine) {
418                                         printf("%#8x : glibc %#4x != %#4x mine  %u\n", c, glibc, mine, d);
419                                         return EXIT_FAILURE;
420                                 }
421
422 #if 0
423                                 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
424 /*                                      if (!iswpunct(c)) { */
425                                                 printf("%#8x : %d %d %#4x\n",
426                                                            c, iswctype(c,is_comb),iswctype(c,is_comb3), glibc);
427 /*                                      } */
428                                 }
429 #endif
430 #if 0
431                                 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
432                                         if (!last_comb) {
433                                                 printf("%#8x - ", c);
434                                                 last_comb = c;
435                                         } else if (last_comb + 1 < c) {
436                                                 printf("%#8x\n%#8x - ", last_comb, c);
437                                                 last_comb = c;
438                                         } else {
439                                                 last_comb = c;
440                                         }
441                                 }
442 #endif
443                         }
444 #endif
445
446                         combt[c/4] |= ((((!!iswctype(c,is_comb)) << 1) | !!iswctype(c,is_comb3))
447                                                    << ((c & 3) << 1));
448 /*                      comb3t[c/8] |= ((!!iswctype(c,is_comb3)) << (c & 7)); */
449
450 /*                      widtht[c/4] |= (wcwidth(c) << ((c & 3) << 1)); */
451
452                         if (c & 1) {    /* Use the high nibble for odd numbered wchars. */
453                                 d <<= 4;
454                         }
455                         wct[c/2] |= d;
456
457                         l = (long)(int) towlower(c) - c;
458                         u = (long)(int) towupper(c) - c;
459                         ult[c] = 0;
460                         if (l || u) {
461                                 if ((l != (short)l) || (u != (short)u)) {
462                                         printf("range assumption error!  %x  %ld  %ld\n", c, l, u);
463                                         return EXIT_FAILURE;
464                                 }
465                                 for (i=0 ; i < ul_count ; i++) {
466                                         if ((l == uldiff[i].l) && (u == uldiff[i].u)) {
467                                                 goto found;
468                                         }
469                                 }
470                                 uldiff[ul_count].l = l;
471                                 uldiff[ul_count].u = u;
472                                 ++ul_count;
473                                 if (ul_count > MAXTO) {
474                                         printf("too many touppers/tolowers!\n");
475                                         return EXIT_FAILURE;
476                                 }
477                         found:
478                                 ult[c] = i;
479                         }
480                 }
481
482                 for (i = 0 ; i < 16 ; i++) {
483                         printf("typecount[%2d] = %8ld  %s\n", i, typecount[i], typename[i]);
484                 }
485
486                 printf("optimizing is* table..\n");
487                 n = -1;
488                 smallest = SIZE_MAX;
489                 cttable.ii = NULL;
490                 for (i=0 ; i < 14 ; i++) {
491                         t = newopt(wct, (RANGE/2)+1, i, &cttable);
492                         if (smallest >= t) {
493                                 n = i;
494                                 smallest = t;
495 /*                      } else { */
496 /*                              break; */
497                         }
498                 }
499                 printf("smallest = %zu\n", smallest);
500                 if (!(cttable.ii = malloc(smallest))) {
501                         printf("couldn't allocate space!\n");
502                         return EXIT_FAILURE;
503                 }
504                 smallest = SIZE_MAX;
505                 newopt(wct, (RANGE/2)+1, n, &cttable);
506                 ++cttable.ti_shift;             /* correct for nibble mode */
507
508
509
510                 printf("optimizing u/l-to table..\n");
511                 smallest = SIZE_MAX;
512                 ultable.ii = NULL;
513                 for (i=0 ; i < 14 ; i++) {
514                         t = newopt(ult, RANGE+1, i, &ultable);
515                         if (smallest >= t) {
516                                 n = i;
517                                 smallest = t;
518 /*                      } else { */
519 /*                              break; */
520                         }
521                 }
522                 printf("%zu (smallest) + %zu (u/l diffs) = %zu\n",
523                            smallest, 4 * ul_count, smallest + 4 * ul_count);
524                 printf("smallest = %zu\n", smallest);
525                 if (!(ultable.ii = malloc(smallest))) {
526                         printf("couldn't allocate space!\n");
527                         return EXIT_FAILURE;
528                 }
529                 smallest = SIZE_MAX;
530                 newopt(ult, RANGE+1, n, &ultable);
531
532
533 #if 0
534                 printf("optimizing comb table..\n");
535                 smallest = SIZE_MAX;
536                 combtable.ii = NULL;
537                 for (i=0 ; i < 14 ; i++) {
538                         t = newopt(combt, sizeof(combt), i, &combtable);
539                         if (smallest >= t) {
540                                 n = i;
541                                 smallest = t;
542 /*                      } else { */
543 /*                              break; */
544                         }
545                 }
546                 printf("smallest = %zu\n", smallest);
547                 if (!(combtable.ii = malloc(smallest))) {
548                         printf("couldn't allocate space!\n");
549                         return EXIT_FAILURE;
550                 }
551                 smallest = SIZE_MAX;
552                 newopt(combt, sizeof(combt), n, &combtable);
553                 combtable.ti_shift += 4; /* correct for 4 entries per */
554 #endif
555
556
557 #if 0
558                 printf("optimizing width table..\n");
559                 smallest = SIZE_MAX;
560                 widthtable.ii = NULL;
561                 for (i=0 ; i < 14 ; i++) {
562                         t = newopt(widtht, sizeof(widtht), i, &widthtable);
563                         if (smallest >= t) {
564                                 n = i;
565                                 smallest = t;
566 /*                      } else { */
567 /*                              break; */
568                         }
569                 }
570                 printf("smallest = %zu\n", smallest);
571                 if (!(widthtable.ii = malloc(smallest))) {
572                         printf("couldn't allocate space!\n");
573                         return EXIT_FAILURE;
574                 }
575                 smallest = SIZE_MAX;
576                 newopt(widtht, sizeof(widtht), n, &widthtable);
577                 widthtable.ti_shift += 4; /* correct for 4 entries per */
578 #endif
579
580 #if 0
581                 printf("optimizing comb3 table..\n");
582                 smallest = SIZE_MAX;
583                 comb3table.ii = NULL;
584                 for (i=0 ; i < 14 ; i++) {
585                         t = newopt(comb3t, sizeof(comb3t), i, &comb3table);
586                         if (smallest >= t) {
587                                 n = i;
588                                 smallest = t;
589 /*                      } else { */
590 /*                              break; */
591                         }
592                 }
593                 printf("smallest = %zu\n", smallest);
594                 if (!(comb3table.ii = malloc(smallest))) {
595                         printf("couldn't allocate space!\n");
596                         return EXIT_FAILURE;
597                 }
598                 smallest = SIZE_MAX;
599                 newopt(comb3t, sizeof(comb3t), n, &comb3table);
600                 comb3table.ti_shift += 8; /* correct for 4 entries per */
601 #endif
602
603                 dump_table_data(&cttable);
604                 dump_table_data(&ultable);
605                 dump_table_data(&combtable);
606                 }
607
608                 printf("verifying for %s...\n", *argv);
609 #if RANGE == 0xffffU
610                 for (c=0 ; c <= 0xffffUL ; c++)
611 #else
612                 for (c=0 ; c <= 0x10ffffUL ; c++)
613 #endif
614                         {
615                         unsigned int glibc;
616                         unsigned int mine;
617                         unsigned int upper, lower;
618
619 #if 0
620 #if RANGE < 0x10000UL
621                         if (c == 0x10000UL) {
622                                 c = 0x30000UL;  /* skip 1st and 2nd sup planes */
623                         }
624 #elif RANGE < 0x20000UL
625                         if (c == 0x20000UL) {
626                                 c = 0x30000UL;  /* skip 2nd sup planes */
627                         }
628 #endif
629 #endif
630
631                         glibc = 0;
632                         if (iswalnum(c)) ++glibc; glibc <<= 1;
633                         if (iswalpha(c)) ++glibc; glibc <<= 1;
634                         if (iswblank(c)) ++glibc; glibc <<= 1;
635                         if (iswcntrl(c)) ++glibc; glibc <<= 1;
636                         if (iswdigit(c)) ++glibc; glibc <<= 1;
637                         if (iswgraph(c)) ++glibc; glibc <<= 1;
638                         if (iswlower(c)) ++glibc; glibc <<= 1;
639                         if (iswprint(c)) ++glibc; glibc <<= 1;
640                         if (iswpunct(c)) ++glibc; glibc <<= 1;
641                         if (iswspace(c)) ++glibc; glibc <<= 1;
642                         if (iswupper(c)) ++glibc; glibc <<= 1;
643                         if (iswxdigit(c)) ++glibc;
644
645                         {
646                                 unsigned int u;
647                                 int n, sc;
648                                 int i0, i1;
649
650                                 u = c;
651                                 if (u <= RANGE) {
652                                         sc = u & ((1 << cttable.ti_shift) - 1);
653                                         u >>= cttable.ti_shift;
654                                         n = u & ((1 << cttable.ii_shift) - 1);
655                                         u >>= cttable.ii_shift;
656
657                                         i0 = cttable.ii[u];
658                                         i0 <<= cttable.ii_shift;
659                                         i1 = cttable.ti[i0 + n];
660                                         i1 <<= (cttable.ti_shift-1);
661                                         d = cttable.ut[i1 + (sc >> 1)];
662
663                                         if (sc & 1) {
664                                                 d >>= 4;
665                                         }
666                                         d &= 0x0f;
667                                 } else if ((((unsigned int)(c - 0xe0020UL)) <= 0x5f) || (c == 0xe0001UL)){
668                                         d = __CTYPE_punct;
669                                 } else if (((unsigned int)(c - 0xf0000UL)) < 0x20000UL) {
670                                         if ((c & 0xffffU) <= 0xfffdU) {
671                                                 d = __CTYPE_punct;
672                                         } else {
673                                                 d = __CTYPE_unclassified;
674                                         }
675                                 } else {
676                                         d = __CTYPE_unclassified;
677                                 }
678
679                         mine = 0;
680                         if (mywalnum(c)) ++mine; mine <<= 1;
681                         if (mywalpha(c)) ++mine; mine <<= 1;
682                         if (mywblank(c)) ++mine; mine <<= 1;
683                         if (mywcntrl(c)) ++mine; mine <<= 1;
684                         if (mywdigit(c)) ++mine; mine <<= 1;
685                         if (mywgraph(c)) ++mine; mine <<= 1;
686                         if (mywlower(c)) ++mine; mine <<= 1;
687                         if (mywprint(c)) ++mine; mine <<= 1;
688                         if (mywpunct(c)) ++mine; mine <<= 1;
689                         if (mywspace(c)) ++mine; mine <<= 1;
690                         if (mywupper(c)) ++mine; mine <<= 1;
691                         if (mywxdigit(c)) ++mine;
692
693                         if (glibc != mine) {
694                                 printf("%#8x : glibc %#4x != %#4x mine %d\n", c, glibc, mine, d);
695                                 if (c < 0x30000UL) {
696                                         printf("sc=%#x u=%#x n=%#x i0=%#x i1=%#x\n", sc, u, n, i0, i1);
697                                 }
698                         }
699                                 upper = lower = u = c;
700                                 if (u <= RANGE) {
701                                         sc = u & ((1 << ultable.ti_shift) - 1);
702                                         u >>= ultable.ti_shift;
703                                         n = u & ((1 << ultable.ii_shift) - 1);
704                                         u >>= ultable.ii_shift;
705
706                                         i0 = ultable.ii[u];
707                                         i0 <<= ultable.ii_shift;
708                                         i1 = ultable.ti[i0 + n];
709                                         i1 <<= (ultable.ti_shift);
710                                         i1 += sc;
711                                         i0 = ultable.ut[i1];
712                                         upper = c + uldiff[i0].u;
713                                         lower = c + uldiff[i0].l;
714                                 }
715
716                         if (towupper(c) != upper) {
717                                 printf("%#8x : towupper glibc %#4x != %#4x mine\n",
718                                            c, towupper(c), upper);
719                         }
720                                 
721                         if (towlower(c) != lower) {
722                                 printf("%#8x : towlower glibc %#4x != %#4x mine   i0 = %d\n",
723                                            c, towlower(c), lower, i0);
724                         }
725
726                         if (totitle && ((tt = towctrans(c, totitle)) != upper)) {
727                                 printf("%#8x : totitle glibc %#4lx != %#4x mine   i0 = %d\n",
728                                            c, tt, upper, i0);
729                         }
730                         }
731
732
733                         if ((c & 0xfff) == 0xfff) printf(".");
734                 }
735                 printf("done\n");
736         }
737
738         if (1) {
739                 FILE *fp;
740
741                 if (!(fp = fopen("wctables.h", "w"))) {
742                         printf("couldn't open wctables.h!\n");
743                         return EXIT_FAILURE;
744                 }
745
746                 fprintf(fp, "#define __LOCALE_DATA_WC_TABLE_DOMAIN_MAX  %#8lx\n\n",
747                                 (unsigned long) RANGE);
748                 output_table(fp, "ctype", &cttable);
749                 output_table(fp, "uplow", &ultable);
750         
751
752 #warning fix the upper bound on the upper/lower tables... save 200 bytes or so
753                 fprintf(fp, "#define __LOCALE_DATA_WCuplow_diffs  %7u\n", ul_count);
754                 fprintf(fp, "\n#ifdef WANT_WCuplow_diff_data\n\n");
755                 fprintf(fp, "\nstatic const short __LOCALE_DATA_WCuplow_diff_data[%zu] = {",
756                            2 * (size_t) ul_count);
757                 for (i=0 ; i < ul_count ; i++) {
758                         if (i % 4 == 0) {
759                                 fprintf(fp, "\n");
760                         }
761                         fprintf(fp, " %6d, %6d,", uldiff[i].u, uldiff[i].l);
762                 }
763                 fprintf(fp, "\n};\n\n");
764                 fprintf(fp, "#endif /* WANT_WCuplow_diff_data */\n\n");
765
766
767 /*              output_table(fp, "comb", &combtable); */
768 /*              output_table(fp, "width", &widthtable); */
769
770                 fclose(fp);
771         }
772
773         return EXIT_SUCCESS;
774 }
775
776 size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl)
777 {
778         static int recurse = 0;
779         unsigned char *ti[RANGE+1];     /* table index */
780         size_t numblocks;
781         size_t blocksize;
782         size_t uniq;
783         size_t i, j;
784         size_t smallest, t;
785         unsigned char *ii_save;
786         int uniqblock[256];
787         unsigned char uit[RANGE+1];
788         int shift2;
789
790         ii_save = NULL;
791         blocksize = 1 << shift;
792         numblocks = usize >> shift;
793
794         /* init table index */
795         for (i=j=0 ; i < numblocks ; i++) {
796                 ti[i] = ut + j;
797                 j += blocksize;
798         }
799
800         /* sort */
801         nu_val = blocksize;
802         qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);
803         
804         uniq = 1;
805         uit[(ti[0]-ut)/blocksize] = 0;
806         for (i=1 ; i < numblocks ; i++) {
807                 if (memcmp(ti[i-1], ti[i], blocksize) < 0) {
808                         if (++uniq > 255) {
809                                 break;
810                         }
811                         uniqblock[uniq - 1] = i;
812                 }
813 #if 1
814                 else if (memcmp(ti[i-1], ti[i], blocksize) > 0) {
815                         printf("bad sort %i!\n", i);
816                         abort();
817                 }
818 #endif
819                 uit[(ti[i]-ut)/blocksize] = uniq - 1;
820         }
821
822         smallest = SIZE_MAX;
823         shift2 = -1;
824         if (uniq <= 255) {
825                 smallest = numblocks + uniq * blocksize;
826                 if (!recurse) {
827                         ++recurse;
828                         for (j=1 ; j < 14 ; j++) {
829                                 if ((numblocks >> j) < 2) break;
830                                 if (tbl) {
831                                         ii_save = tbl->ii;
832                                         tbl->ii = NULL;
833                                 }
834                                 if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
835                                         t += uniq * blocksize;
836                                 }
837                                 if (tbl) {
838                                         tbl->ii = ii_save;
839                                 }
840                                 if (smallest >= t) {
841                                         shift2 = j;
842                                         smallest = t;
843                                         if (!tbl->ii) {
844                                                 printf("ishift %zu  tshift %zu  size %zu\n",
845                                                            shift2, shift, t);
846                                         }
847 /*                              } else { */
848 /*                                      break; */
849                                 }
850                         }
851                         --recurse;
852                 }
853         } else {
854                 return SIZE_MAX;
855         }
856
857         if (tbl->ii) {
858                 if (recurse) {
859                         tbl->ii_shift = shift;
860                         tbl->ii_len = numblocks;
861                         memcpy(tbl->ii, uit, numblocks);
862                         tbl->ti = tbl->ii + tbl->ii_len;
863                         tbl->ti_len = uniq * blocksize;
864                         for (i=0 ; i < uniq ; i++) {
865                                 memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize);
866                         }
867                 } else {
868                         ++recurse;
869                         printf("setting ishift %zu  tshift %zu\n",
870                                                            shift2, shift);
871                         newopt(uit, numblocks, shift2, tbl);
872                         --recurse;
873                         tbl->ti_shift = shift;
874                         tbl->ut_len = uniq * blocksize;
875                         tbl->ut = tbl->ti + tbl->ti_len;
876                         for (i=0 ; i < uniq ; i++) {
877                                 memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize);
878                         }
879                 }
880         }
881         return smallest;
882 }