OSDN Git Service

6bfc40ba439b163ee78d36858aba89554dfd1424
[uclinux-h8/uClibc.git] / extra / locale / gen_wctype.c
1 /*
2  * Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org>
3  *
4  * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
5  */
6 #define _GNU_SOURCE
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <locale.h>
11 #include <wctype.h>
12 #include <limits.h>
13 #include <stdint.h>
14 #include <wchar.h>
15 #include <ctype.h>
16
17 #ifndef _CTYPE_H
18 #define _CTYPE_H
19 #endif
20 #ifndef _WCTYPE_H
21 #define _WCTYPE_H
22 #endif
23 #include UCLIBC_CTYPE_HEADER
24
25 /*       0x9 : space  blank */
26 /*       0xa : space */
27 /*       0xb : space */
28 /*       0xc : space */
29 /*       0xd : space */
30 /*      0x20 : space  blank */
31 /*    0x1680 : space  blank */
32 /*    0x2000 : space  blank */
33 /*    0x2001 : space  blank */
34 /*    0x2002 : space  blank */
35 /*    0x2003 : space  blank */
36 /*    0x2004 : space  blank */
37 /*    0x2005 : space  blank */
38 /*    0x2006 : space  blank */
39 /*    0x2008 : space  blank */
40 /*    0x2009 : space  blank */
41 /*    0x200a : space  blank */
42 /*    0x200b : space  blank */
43 /*    0x2028 : space */
44 /*    0x2029 : space */
45 /*    0x3000 : space  blank */
46
47 /*  typecount[ 0] =    88670  C_alpha_nonupper_nonlower */
48 /*  typecount[ 1] =      742  C_alpha_lower */
49 /*  typecount[ 2] =        4  C_alpha_upper_lower */
50 /*  typecount[ 3] =      731  C_alpha_upper */
51 /*  typecount[ 4] =       10  C_digit */
52 /*  typecount[ 5] =    10270  C_punct */
53 /*  typecount[ 6] =        0  C_graph */
54 /*  typecount[ 7] =        0  C_print_space_nonblank */
55 /*  typecount[ 8] =       14  C_print_space_blank */
56 /*  typecount[ 9] =        0  C_space_nonblank_noncntrl */
57 /*  typecount[10] =        0  C_space_blank_noncntrl */
58 /*  typecount[11] =        6  C_cntrl_space_nonblank */
59 /*  typecount[12] =        1  C_cntrl_space_blank */
60 /*  typecount[13] =       60  C_cntrl_nonspace */
61 /*  typecount[14] =    96100  C_unclassified */
62 /*  typecount[15] =        0  empty_slot */
63
64
65
66 /* Set to #if 0 to restrict wchars to 16 bits. */
67 #if 1
68 #define RANGE 0x2ffffUL
69 #elif 0
70 #define RANGE 0x1ffffUL
71 #else
72 #define RANGE 0xffffUL                  /* Restrict for 16-bit wchar_t... */
73 #endif
74
75 #if 0
76 /* Classification codes. */
77
78 static const char *typename[] = {
79         "C_unclassified",
80         "C_alpha_nonupper_nonlower",
81         "C_alpha_lower",
82         "C_alpha_upper_lower",
83         "C_alpha_upper",
84         "C_digit",
85         "C_punct",
86         "C_graph",
87         "C_print_space_nonblank",
88         "C_print_space_blank",
89         "C_space_nonblank_noncntrl",
90         "C_space_blank_noncntrl",
91         "C_cntrl_space_nonblank",
92         "C_cntrl_space_blank",
93         "C_cntrl_nonspace",
94         "empty_slot"
95 };
96 #endif
97
98 #if 0
99 /* Taking advantage of the C99 mutual-exclusion guarantees for the various
100  * (w)ctype classes, including the descriptions of printing and control
101  * (w)chars, we can place each in one of the following mutually-exlusive
102  * subsets.  Since there are less than 16, we can store the data for
103  * each (w)chars in a nibble. In contrast, glibc uses an unsigned int
104  * per (w)char, with one bit flag for each is* type.  While this allows
105  * a simple '&' operation to determine the type vs. a range test and a
106  * little special handling for the "blank" and "xdigit" types in my
107  * approach, it also uses 8 times the space for the tables on the typical
108  * 32-bit archs we supported.*/
109 enum {
110         __CTYPE_unclassified = 0,
111         __CTYPE_alpha_nonupper_nonlower,
112         __CTYPE_alpha_lower,
113         __CTYPE_alpha_upper_lower,
114         __CTYPE_alpha_upper,
115         __CTYPE_digit,
116         __CTYPE_punct,
117         __CTYPE_graph,
118         __CTYPE_print_space_nonblank,
119         __CTYPE_print_space_blank,
120         __CTYPE_space_nonblank_noncntrl,
121         __CTYPE_space_blank_noncntrl,
122         __CTYPE_cntrl_space_nonblank,
123         __CTYPE_cntrl_space_blank,
124         __CTYPE_cntrl_nonspace,
125 };
126 #endif
127
128 #define __CTYPE_isxdigit(D,X) \
129         (__CTYPE_isdigit(D) || (((unsigned int)(((X)|0x20) - 'a')) <= 5))
130
131 #define mywalnum(x)             __CTYPE_isalnum(d)
132 #define mywalpha(x)             __CTYPE_isalpha(d)
133 #define mywblank(x)             __CTYPE_isblank(d)
134 #define mywcntrl(x)             __CTYPE_iscntrl(d)
135 #define mywdigit(x)             __CTYPE_isdigit(d)
136 #define mywgraph(x)             __CTYPE_isgraph(d)
137 #define mywlower(x)             __CTYPE_islower(d)
138 #define mywprint(x)             __CTYPE_isprint(d)
139 #define mywpunct(x)             __CTYPE_ispunct(d)
140 #define mywspace(x)             __CTYPE_isspace(d)
141 #define mywupper(x)             __CTYPE_isupper(d)
142 #define mywxdigit(x)    __CTYPE_isxdigit(d,x)
143
144 typedef struct {
145         short l;
146         short u;
147 } uldiff_entry;
148
149 typedef struct {
150         uint16_t ii_len;
151         uint16_t ti_len;
152         uint16_t ut_len;
153
154         unsigned char ii_shift;
155         unsigned char ti_shift;
156
157         unsigned char *ii;
158         unsigned char *ti;
159         unsigned char *ut;
160 } table_data;
161
162
163 void output_table(FILE *fp, const char *name, table_data *tbl)
164 {
165         size_t i;
166
167         fprintf(fp, "#define __LOCALE_DATA_WC%s_II_LEN    %7u\n", name, tbl->ii_len);
168         fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_LEN    %7u\n", name, tbl->ti_len);
169         fprintf(fp, "#define __LOCALE_DATA_WC%s_UT_LEN    %7u\n", name, tbl->ut_len);
170
171         fprintf(fp, "#define __LOCALE_DATA_WC%s_II_SHIFT  %7u\n", name, tbl->ii_shift);
172         fprintf(fp, "#define __LOCALE_DATA_WC%s_TI_SHIFT  %7u\n", name, tbl->ti_shift);
173
174         fprintf(fp, "\n#ifdef WANT_WC%s_data\n", name);
175
176         i = tbl->ii_len + tbl->ti_len + tbl->ut_len;
177         fprintf(fp, "\nstatic const unsigned char __LOCALE_DATA_WC%s_data[%zu] = {", name, i);
178         for (i=0 ; i < tbl->ii_len ; i++) {
179                 if (i % 12 == 0) {
180                         fprintf(fp, "\n");
181                 }
182                 fprintf(fp, " %#04x,", tbl->ii[i]);
183         }
184         for (i=0 ; i < tbl->ti_len ; i++) {
185                 if (i % 12 == 0) {
186                         fprintf(fp, "\n");
187                 }
188                 fprintf(fp, " %#04x,", tbl->ti[i]);
189         }
190         for (i=0 ; i < tbl->ut_len ; i++) {
191                 if (i % 12 == 0) {
192                         fprintf(fp, "\n");
193                 }
194                 fprintf(fp, " %#04x,", tbl->ut[i]);
195         }
196         fprintf(fp, "\n};\n\n");
197
198         fprintf(fp, "#endif /* WANT_WC%s_data */\n\n", name);
199 }
200
201 static void dump_table_data(table_data *tbl)
202 {
203         printf("ii_shift = %d  ti_shift = %d\n"
204                    "ii_len = %d  ti_len = %d  ut_len = %d\n"
205                    "total = %d\n",
206                    tbl->ii_shift, tbl->ti_shift,
207                    tbl->ii_len, tbl->ti_len, tbl->ut_len,
208                    (int) tbl->ii_len + (int) tbl->ti_len + (int) tbl->ut_len);
209 }
210
211 /* For sorting the blocks of unsigned chars. */
212 static size_t nu_val;
213
214 int nu_memcmp(const void *a, const void *b)
215 {
216         return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val);
217 }
218
219 static size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl);
220
221 #define MAXTO           255                     /* Restrict to minimal unsigned char max. */
222
223 int main(int argc, char **argv)
224 {
225         long int u, l, tt;
226         size_t smallest, t;
227         unsigned int c;
228         unsigned int d;
229         int i, n;
230         int ul_count = 0;
231         uldiff_entry uldiff[MAXTO];
232         table_data cttable;
233         table_data ultable;
234 #if 0
235         table_data combtable;
236         table_data widthtable;
237         long int last_comb = 0;
238 #endif
239         unsigned char wct[(RANGE/2)+1]; /* wctype table (nibble per wchar) */
240         unsigned char ult[RANGE+1];     /* upper/lower table */
241         unsigned char combt[(RANGE/4)+1];       /* combining */
242         unsigned char widtht[(RANGE/4)+1];      /* width */
243         wctrans_t totitle;
244         wctype_t is_comb, is_comb3;
245
246         long int typecount[16];
247         const char *typename[16];
248         static const char empty_slot[] = "empty_slot";
249         int built = 0;
250
251 #define INIT_TYPENAME(X) typename[__CTYPE_##X] = "C_" #X
252
253         for (i=0 ; i < 16 ; i++) {
254                 typename[i] = empty_slot;
255         }
256
257         INIT_TYPENAME(unclassified);
258         INIT_TYPENAME(alpha_nonupper_nonlower);
259         INIT_TYPENAME(alpha_lower);
260         INIT_TYPENAME(alpha_upper_lower);
261         INIT_TYPENAME(alpha_upper);
262         INIT_TYPENAME(digit);
263         INIT_TYPENAME(punct);
264         INIT_TYPENAME(graph);
265         INIT_TYPENAME(print_space_nonblank);
266         INIT_TYPENAME(print_space_blank);
267         INIT_TYPENAME(space_nonblank_noncntrl);
268         INIT_TYPENAME(space_blank_noncntrl);
269         INIT_TYPENAME(cntrl_space_nonblank);
270         INIT_TYPENAME(cntrl_space_blank);
271         INIT_TYPENAME(cntrl_nonspace);
272
273         memset(&cttable, 0, sizeof(table_data));
274         memset(&ultable, 0, sizeof(table_data));
275 #if 0
276         memset(combtable, 0, sizeof table_data);
277         memset(widthtable, 0, sizeof table_data);
278 #endif
279         setvbuf(stdout, NULL, _IONBF, 0);
280
281         while (--argc) {
282                 if (!setlocale(LC_CTYPE, *++argv)) {
283                         printf("setlocale(LC_CTYPE,%s) failed!  Skipping this locale...\n", *argv);
284                         continue;
285                 }
286
287                 if (!(totitle = wctrans("totitle"))) {
288                         printf("no totitle transformation.\n");
289                 }
290                 if (!(is_comb = wctype("combining"))) {
291                         printf("no combining wctype.\n");
292                 }
293                 if (!(is_comb3 = wctype("combining_level3"))) {
294                         printf("no combining_level3 wctype.\n");
295                 }
296
297                 if (!built) {
298                 built = 1;
299                 ul_count = 1;
300                 uldiff[0].u = uldiff[0].l = 0;
301
302                 memset(wct, 0, sizeof(wct));
303                 memset(combt, 0, sizeof(combt));
304                 memset(widtht, 0, sizeof(widtht));
305
306                 for (i = 0 ; i < 16 ; i++) {
307                         typecount[i] = 0;
308                 }
309
310                 for (c=0 ; c <= RANGE ; c++) {
311                         if (iswdigit(c)) {
312                                 d = __CTYPE_digit;
313                         } else if (iswalpha(c)) {
314                                 d = __CTYPE_alpha_nonupper_nonlower;
315                                 if (iswlower(c)) {
316                                         d = __CTYPE_alpha_lower;
317                                         if (iswupper(c)) {
318                                                 d = __CTYPE_alpha_upper_lower;
319                                         }
320                                 } else if (iswupper(c)) {
321                                         d = __CTYPE_alpha_upper;
322                                 }
323                         } else if (iswpunct(c)) {
324                                 d = __CTYPE_punct;
325                         } else if (iswgraph(c)) {
326                                 d = __CTYPE_graph;
327                         } else if (iswprint(c)) {
328                                 d = __CTYPE_print_space_nonblank;
329                                 if (iswblank(c)) {
330                                         d = __CTYPE_print_space_blank;
331                                 }
332                         } else if (iswspace(c) && !iswcntrl(c)) {
333                                 d = __CTYPE_space_nonblank_noncntrl;
334                                 if (iswblank(c)) {
335                                         d = __CTYPE_space_blank_noncntrl;
336                                 }
337                         } else if (iswcntrl(c)) {
338                                 d = __CTYPE_cntrl_nonspace;
339                                 if (iswspace(c)) {
340                                         d = __CTYPE_cntrl_space_nonblank;
341                                         if (iswblank(c)) {
342                                                 d = __CTYPE_cntrl_space_blank;
343                                         }
344                                 }
345                         } else {
346                                 d = __CTYPE_unclassified;
347                         }
348
349                         ++typecount[d];
350
351 #if 0
352                         if (iswspace(c)) {
353                                 if (iswblank(c)) {
354                                         printf("%#8x : space  blank\n", c);
355                                 } else {
356                                         printf("%#8x : space\n", c);
357                                 }
358                         }
359 #endif
360
361 #if 0
362                         if (c < 256) {
363                                 unsigned int glibc;
364
365                                 glibc = 0;
366                                 if (isalnum(c)) ++glibc; glibc <<= 1;
367                                 if (isalpha(c)) ++glibc; glibc <<= 1;
368                                 if (isblank(c)) ++glibc; glibc <<= 1;
369                                 if (iscntrl(c)) ++glibc; glibc <<= 1;
370                                 if (isdigit(c)) ++glibc; glibc <<= 1;
371                                 if (isgraph(c)) ++glibc; glibc <<= 1;
372                                 if (islower(c)) ++glibc; glibc <<= 1;
373                                 if (isprint(c)) ++glibc; glibc <<= 1;
374                                 if (ispunct(c)) ++glibc; glibc <<= 1;
375                                 if (isspace(c)) ++glibc; glibc <<= 1;
376                                 if (isupper(c)) ++glibc; glibc <<= 1;
377                                 if (isxdigit(c)) ++glibc;
378                                 printf("%#8x : ctype %#4x\n", c, glibc);
379                         }
380 #endif
381
382 #if 1
383                         /* Paranoid checking... */
384                         {
385                                 unsigned int glibc;
386                                 unsigned int mine;
387
388                                 glibc = 0;
389                                 if (iswalnum(c)) ++glibc; glibc <<= 1;
390                                 if (iswalpha(c)) ++glibc; glibc <<= 1;
391                                 if (iswblank(c)) ++glibc; glibc <<= 1;
392                                 if (iswcntrl(c)) ++glibc; glibc <<= 1;
393                                 if (iswdigit(c)) ++glibc; glibc <<= 1;
394                                 if (iswgraph(c)) ++glibc; glibc <<= 1;
395                                 if (iswlower(c)) ++glibc; glibc <<= 1;
396                                 if (iswprint(c)) ++glibc; glibc <<= 1;
397                                 if (iswpunct(c)) ++glibc; glibc <<= 1;
398                                 if (iswspace(c)) ++glibc; glibc <<= 1;
399                                 if (iswupper(c)) ++glibc; glibc <<= 1;
400                                 if (iswxdigit(c)) ++glibc;
401
402                                 mine = 0;
403                                 if (mywalnum(c)) ++mine; mine <<= 1;
404                                 if (mywalpha(c)) ++mine; mine <<= 1;
405                                 if (mywblank(c)) ++mine; mine <<= 1;
406                                 if (mywcntrl(c)) ++mine; mine <<= 1;
407                                 if (mywdigit(c)) ++mine; mine <<= 1;
408                                 if (mywgraph(c)) ++mine; mine <<= 1;
409                                 if (mywlower(c)) ++mine; mine <<= 1;
410                                 if (mywprint(c)) ++mine; mine <<= 1;
411                                 if (mywpunct(c)) ++mine; mine <<= 1;
412                                 if (mywspace(c)) ++mine; mine <<= 1;
413                                 if (mywupper(c)) ++mine; mine <<= 1;
414                                 if (mywxdigit(c)) ++mine;
415
416                                 if (glibc != mine) {
417                                         printf("%#8x : glibc %#4x != %#4x mine  %u\n", c, glibc, mine, d);
418                                         return EXIT_FAILURE;
419                                 }
420
421 #if 0
422                                 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
423 /*                                      if (!iswpunct(c)) { */
424                                                 printf("%#8x : %d %d %#4x\n",
425                                                            c, iswctype(c,is_comb),iswctype(c,is_comb3), glibc);
426 /*                                      } */
427                                 }
428 #endif
429 #if 0
430                                 if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
431                                         if (!last_comb) {
432                                                 printf("%#8x - ", c);
433                                                 last_comb = c;
434                                         } else if (last_comb + 1 < c) {
435                                                 printf("%#8x\n%#8x - ", last_comb, c);
436                                                 last_comb = c;
437                                         } else {
438                                                 last_comb = c;
439                                         }
440                                 }
441 #endif
442                         }
443 #endif
444
445                         combt[c/4] |= ((((!!iswctype(c,is_comb)) << 1) | !!iswctype(c,is_comb3))
446                                                    << ((c & 3) << 1));
447 /*                      comb3t[c/8] |= ((!!iswctype(c,is_comb3)) << (c & 7)); */
448
449 /*                      widtht[c/4] |= (wcwidth(c) << ((c & 3) << 1)); */
450
451                         if (c & 1) {    /* Use the high nibble for odd numbered wchars. */
452                                 d <<= 4;
453                         }
454                         wct[c/2] |= d;
455
456                         l = (long)(int) towlower(c) - c;
457                         u = (long)(int) towupper(c) - c;
458                         ult[c] = 0;
459                         if (l || u) {
460                                 if ((l != (short)l) || (u != (short)u)) {
461                                         printf("range assumption error!  %x  %ld  %ld\n", c, l, u);
462                                         return EXIT_FAILURE;
463                                 }
464                                 for (i=0 ; i < ul_count ; i++) {
465                                         if ((l == uldiff[i].l) && (u == uldiff[i].u)) {
466                                                 goto found;
467                                         }
468                                 }
469                                 uldiff[ul_count].l = l;
470                                 uldiff[ul_count].u = u;
471                                 ++ul_count;
472                                 if (ul_count > MAXTO) {
473                                         printf("too many touppers/tolowers!\n");
474                                         return EXIT_FAILURE;
475                                 }
476                         found:
477                                 ult[c] = i;
478                         }
479                 }
480
481                 for (i = 0 ; i < 16 ; i++) {
482                         printf("typecount[%2d] = %8ld  %s\n", i, typecount[i], typename[i]);
483                 }
484
485                 printf("optimizing is* table..\n");
486                 n = -1;
487                 smallest = SIZE_MAX;
488                 cttable.ii = NULL;
489                 for (i=0 ; i < 14 ; i++) {
490                         t = newopt(wct, (RANGE/2)+1, i, &cttable);
491                         if (smallest >= t) {
492                                 n = i;
493                                 smallest = t;
494 /*                      } else { */
495 /*                              break; */
496                         }
497                 }
498                 printf("smallest = %zu\n", smallest);
499                 if (!(cttable.ii = malloc(smallest))) {
500                         printf("couldn't allocate space!\n");
501                         return EXIT_FAILURE;
502                 }
503                 smallest = SIZE_MAX;
504                 newopt(wct, (RANGE/2)+1, n, &cttable);
505                 ++cttable.ti_shift;             /* correct for nibble mode */
506
507
508
509                 printf("optimizing u/l-to table..\n");
510                 smallest = SIZE_MAX;
511                 ultable.ii = NULL;
512                 for (i=0 ; i < 14 ; i++) {
513                         t = newopt(ult, RANGE+1, i, &ultable);
514                         if (smallest >= t) {
515                                 n = i;
516                                 smallest = t;
517 /*                      } else { */
518 /*                              break; */
519                         }
520                 }
521                 printf("%zu (smallest) + %zu (u/l diffs) = %zu\n",
522                            smallest, 4 * ul_count, smallest + 4 * ul_count);
523                 printf("smallest = %zu\n", smallest);
524                 if (!(ultable.ii = malloc(smallest))) {
525                         printf("couldn't allocate space!\n");
526                         return EXIT_FAILURE;
527                 }
528                 smallest = SIZE_MAX;
529                 newopt(ult, RANGE+1, n, &ultable);
530
531
532 #if 0
533                 printf("optimizing comb table..\n");
534                 smallest = SIZE_MAX;
535                 combtable.ii = NULL;
536                 for (i=0 ; i < 14 ; i++) {
537                         t = newopt(combt, sizeof(combt), i, &combtable);
538                         if (smallest >= t) {
539                                 n = i;
540                                 smallest = t;
541 /*                      } else { */
542 /*                              break; */
543                         }
544                 }
545                 printf("smallest = %zu\n", smallest);
546                 if (!(combtable.ii = malloc(smallest))) {
547                         printf("couldn't allocate space!\n");
548                         return EXIT_FAILURE;
549                 }
550                 smallest = SIZE_MAX;
551                 newopt(combt, sizeof(combt), n, &combtable);
552                 combtable.ti_shift += 4; /* correct for 4 entries per */
553 #endif
554
555
556 #if 0
557                 printf("optimizing width table..\n");
558                 smallest = SIZE_MAX;
559                 widthtable.ii = NULL;
560                 for (i=0 ; i < 14 ; i++) {
561                         t = newopt(widtht, sizeof(widtht), i, &widthtable);
562                         if (smallest >= t) {
563                                 n = i;
564                                 smallest = t;
565 /*                      } else { */
566 /*                              break; */
567                         }
568                 }
569                 printf("smallest = %zu\n", smallest);
570                 if (!(widthtable.ii = malloc(smallest))) {
571                         printf("couldn't allocate space!\n");
572                         return EXIT_FAILURE;
573                 }
574                 smallest = SIZE_MAX;
575                 newopt(widtht, sizeof(widtht), n, &widthtable);
576                 widthtable.ti_shift += 4; /* correct for 4 entries per */
577 #endif
578
579 #if 0
580                 printf("optimizing comb3 table..\n");
581                 smallest = SIZE_MAX;
582                 comb3table.ii = NULL;
583                 for (i=0 ; i < 14 ; i++) {
584                         t = newopt(comb3t, sizeof(comb3t), i, &comb3table);
585                         if (smallest >= t) {
586                                 n = i;
587                                 smallest = t;
588 /*                      } else { */
589 /*                              break; */
590                         }
591                 }
592                 printf("smallest = %zu\n", smallest);
593                 if (!(comb3table.ii = malloc(smallest))) {
594                         printf("couldn't allocate space!\n");
595                         return EXIT_FAILURE;
596                 }
597                 smallest = SIZE_MAX;
598                 newopt(comb3t, sizeof(comb3t), n, &comb3table);
599                 comb3table.ti_shift += 8; /* correct for 4 entries per */
600 #endif
601
602                 dump_table_data(&cttable);
603                 dump_table_data(&ultable);
604 #if 0
605                 dump_table_data(&combtable);
606 #endif
607                 }
608
609                 printf("verifying for %s...\n", *argv);
610 #if RANGE == 0xffffU
611                 for (c=0 ; c <= 0xffffUL ; c++)
612 #else
613                 for (c=0 ; c <= 0x10ffffUL ; c++)
614 #endif
615                         {
616                         unsigned int glibc;
617                         unsigned int mine;
618                         unsigned int upper, lower;
619
620 #if 0
621 #if RANGE < 0x10000UL
622                         if (c == 0x10000UL) {
623                                 c = 0x30000UL;  /* skip 1st and 2nd sup planes */
624                         }
625 #elif RANGE < 0x20000UL
626                         if (c == 0x20000UL) {
627                                 c = 0x30000UL;  /* skip 2nd sup planes */
628                         }
629 #endif
630 #endif
631
632                         glibc = 0;
633                         if (iswalnum(c)) ++glibc; glibc <<= 1;
634                         if (iswalpha(c)) ++glibc; glibc <<= 1;
635                         if (iswblank(c)) ++glibc; glibc <<= 1;
636                         if (iswcntrl(c)) ++glibc; glibc <<= 1;
637                         if (iswdigit(c)) ++glibc; glibc <<= 1;
638                         if (iswgraph(c)) ++glibc; glibc <<= 1;
639                         if (iswlower(c)) ++glibc; glibc <<= 1;
640                         if (iswprint(c)) ++glibc; glibc <<= 1;
641                         if (iswpunct(c)) ++glibc; glibc <<= 1;
642                         if (iswspace(c)) ++glibc; glibc <<= 1;
643                         if (iswupper(c)) ++glibc; glibc <<= 1;
644                         if (iswxdigit(c)) ++glibc;
645
646                         {
647                                 unsigned int u;
648                                 int n, sc;
649                                 int i0, i1;
650
651                                 u = c;
652                                 if (u <= RANGE) {
653                                         sc = u & ((1 << cttable.ti_shift) - 1);
654                                         u >>= cttable.ti_shift;
655                                         n = u & ((1 << cttable.ii_shift) - 1);
656                                         u >>= cttable.ii_shift;
657
658                                         i0 = cttable.ii[u];
659                                         i0 <<= cttable.ii_shift;
660                                         i1 = cttable.ti[i0 + n];
661                                         i1 <<= (cttable.ti_shift-1);
662                                         d = cttable.ut[i1 + (sc >> 1)];
663
664                                         if (sc & 1) {
665                                                 d >>= 4;
666                                         }
667                                         d &= 0x0f;
668                                 } else if ((((unsigned int)(c - 0xe0020UL)) <= 0x5f) || (c == 0xe0001UL)){
669                                         d = __CTYPE_punct;
670                                 } else if (((unsigned int)(c - 0xf0000UL)) < 0x20000UL) {
671                                         if ((c & 0xffffU) <= 0xfffdU) {
672                                                 d = __CTYPE_punct;
673                                         } else {
674                                                 d = __CTYPE_unclassified;
675                                         }
676                                 } else {
677                                         d = __CTYPE_unclassified;
678                                 }
679
680                         mine = 0;
681                         if (mywalnum(c)) ++mine; mine <<= 1;
682                         if (mywalpha(c)) ++mine; mine <<= 1;
683                         if (mywblank(c)) ++mine; mine <<= 1;
684                         if (mywcntrl(c)) ++mine; mine <<= 1;
685                         if (mywdigit(c)) ++mine; mine <<= 1;
686                         if (mywgraph(c)) ++mine; mine <<= 1;
687                         if (mywlower(c)) ++mine; mine <<= 1;
688                         if (mywprint(c)) ++mine; mine <<= 1;
689                         if (mywpunct(c)) ++mine; mine <<= 1;
690                         if (mywspace(c)) ++mine; mine <<= 1;
691                         if (mywupper(c)) ++mine; mine <<= 1;
692                         if (mywxdigit(c)) ++mine;
693
694                         if (glibc != mine) {
695                                 printf("%#8x : glibc %#4x != %#4x mine %d\n", c, glibc, mine, d);
696                                 if (c < 0x30000UL) {
697                                         printf("sc=%#x u=%#x n=%#x i0=%#x i1=%#x\n", sc, u, n, i0, i1);
698                                 }
699                         }
700                                 upper = lower = u = c;
701                                 if (u <= RANGE) {
702                                         sc = u & ((1 << ultable.ti_shift) - 1);
703                                         u >>= ultable.ti_shift;
704                                         n = u & ((1 << ultable.ii_shift) - 1);
705                                         u >>= ultable.ii_shift;
706
707                                         i0 = ultable.ii[u];
708                                         i0 <<= ultable.ii_shift;
709                                         i1 = ultable.ti[i0 + n];
710                                         i1 <<= (ultable.ti_shift);
711                                         i1 += sc;
712                                         i0 = ultable.ut[i1];
713                                         upper = c + uldiff[i0].u;
714                                         lower = c + uldiff[i0].l;
715                                 }
716
717                         if (towupper(c) != upper) {
718                                 printf("%#8x : towupper glibc %#4x != %#4x mine\n",
719                                            c, towupper(c), upper);
720                         }
721
722                         if (towlower(c) != lower) {
723                                 printf("%#8x : towlower glibc %#4x != %#4x mine   i0 = %d\n",
724                                            c, towlower(c), lower, i0);
725                         }
726
727                         if (totitle && ((tt = towctrans(c, totitle)) != upper)) {
728                                 printf("%#8x : totitle glibc %#4lx != %#4x mine   i0 = %d\n",
729                                            c, tt, upper, i0);
730                         }
731                         }
732
733
734                         if ((c & 0xfff) == 0xfff) printf(".");
735                 }
736                 printf("done\n");
737         }
738
739         if (built) {
740                 FILE *fp;
741
742                 if (!(fp = fopen("wctables.h", "w"))) {
743                         printf("cannot open output file 'wctables.h'!\n");
744                         return EXIT_FAILURE;
745                 }
746
747                 fprintf(fp, "#define __LOCALE_DATA_WC_TABLE_DOMAIN_MAX  %#8lx\n\n",
748                                 (unsigned long) RANGE);
749                 output_table(fp, "ctype", &cttable);
750                 output_table(fp, "uplow", &ultable);
751
752
753 #warning fix the upper bound on the upper/lower tables... save 200 bytes or so
754                 fprintf(fp, "#define __LOCALE_DATA_WCuplow_diffs  %7u\n", ul_count);
755                 fprintf(fp, "\n#ifdef WANT_WCuplow_diff_data\n\n");
756                 fprintf(fp, "\nstatic const short __LOCALE_DATA_WCuplow_diff_data[%zu] = {",
757                            2 * (size_t) ul_count);
758                 for (i=0 ; i < ul_count ; i++) {
759                         if (i % 4 == 0) {
760                                 fprintf(fp, "\n");
761                         }
762                         fprintf(fp, " %6d, %6d,", uldiff[i].u, uldiff[i].l);
763                 }
764                 fprintf(fp, "\n};\n\n");
765                 fprintf(fp, "#endif /* WANT_WCuplow_diff_data */\n\n");
766
767
768 /*              output_table(fp, "comb", &combtable); */
769 /*              output_table(fp, "width", &widthtable); */
770
771                 fclose(fp);
772         }
773
774         return !built;
775 }
776
777 size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl)
778 {
779         static int recurse;
780         unsigned char *ti[RANGE+1];     /* table index */
781         size_t numblocks;
782         size_t blocksize;
783         size_t uniq;
784         size_t i, j;
785         size_t smallest, t;
786         unsigned char *ii_save;
787         int uniqblock[256];
788         unsigned char uit[RANGE+1];
789         int shift2;
790
791         memset(uniqblock, 0x00, sizeof(uniqblock));
792
793         ii_save = NULL;
794         blocksize = 1 << shift;
795         numblocks = usize >> shift;
796
797         /* init table index */
798         for (i=j=0 ; i < numblocks ; i++) {
799                 ti[i] = ut + j;
800                 j += blocksize;
801         }
802
803         /* sort */
804         nu_val = blocksize;
805         qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);
806
807         uniq = 1;
808         uit[(ti[0]-ut)/blocksize] = 0;
809         for (i=1 ; i < numblocks ; i++) {
810                 if (memcmp(ti[i-1], ti[i], blocksize) < 0) {
811                         if (++uniq > 255) {
812                                 break;
813                         }
814                         uniqblock[uniq - 1] = i;
815                 }
816 #if 1
817                 else if (memcmp(ti[i-1], ti[i], blocksize) > 0) {
818                         printf("bad sort %i!\n", i);
819                         abort();
820                 }
821 #endif
822                 uit[(ti[i]-ut)/blocksize] = uniq - 1;
823         }
824
825         smallest = SIZE_MAX;
826         shift2 = -1;
827         if (uniq <= 255) {
828                 smallest = numblocks + uniq * blocksize;
829                 if (!recurse) {
830                         ++recurse;
831                         for (j=1 ; j < 14 ; j++) {
832                                 if ((numblocks >> j) < 2) break;
833                                 if (tbl) {
834                                         ii_save = tbl->ii;
835                                         tbl->ii = NULL;
836                                 }
837                                 if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
838                                         t += uniq * blocksize;
839                                 }
840                                 if (tbl) {
841                                         tbl->ii = ii_save;
842                                 }
843                                 if (smallest >= t) {
844                                         shift2 = j;
845                                         smallest = t;
846                                         if (!tbl->ii) {
847                                                 printf("ishift %zu  tshift %zu  size %zu\n",
848                                                            shift2, shift, t);
849                                         }
850 /*                              } else { */
851 /*                                      break; */
852                                 }
853                         }
854                         --recurse;
855                 }
856         } else {
857                 return SIZE_MAX;
858         }
859
860         if (tbl->ii) {
861                 if (recurse) {
862                         tbl->ii_shift = shift;
863                         tbl->ii_len = numblocks;
864                         memcpy(tbl->ii, uit, numblocks);
865                         tbl->ti = tbl->ii + tbl->ii_len;
866                         tbl->ti_len = uniq * blocksize;
867                         for (i=0 ; i < uniq ; i++) {
868                                 memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize);
869                         }
870                 } else {
871                         ++recurse;
872                         printf("setting ishift %zu  tshift %zu\n",
873                                                            shift2, shift);
874                         newopt(uit, numblocks, shift2, tbl);
875                         --recurse;
876                         tbl->ti_shift = shift;
877                         tbl->ut_len = uniq * blocksize;
878                         tbl->ut = tbl->ti + tbl->ti_len;
879                         for (i=0 ; i < uniq ; i++) {
880                                 memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize);
881                         }
882                 }
883         }
884         return smallest;
885 }
886 /* vi: set sw=4 ts=4: */