2 * Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org>
4 * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
21 #include UCLIBC_CTYPE_HEADER
24 /* #define CTYPE_PACKED */
25 #define UPLOW_IDX_SHIFT 3
26 /* best if 2 unpacked or 3 packed */
27 #define CTYPE_IDX_SHIFT 3
28 /* 3 or 4 are very similar */
29 #define C2WC_IDX_SHIFT 3
31 #define CTYPE_IDX_LEN (128 >> (CTYPE_IDX_SHIFT))
32 #define UPLOW_IDX_LEN (128 >> (UPLOW_IDX_SHIFT))
33 #define C2WC_IDX_LEN (128 >> (C2WC_IDX_SHIFT))
35 /* #ifdef CTYPE_PACKED */
36 /* #define CTYPE_ROW_LEN (1 << ((CTYPE_IDX_SHIFT)-1)) */
38 #define CTYPE_ROW_LEN (1 << (CTYPE_IDX_SHIFT))
40 #define UPLOW_ROW_LEN (1 << (UPLOW_IDX_SHIFT))
41 #define C2WC_ROW_LEN (1 << (C2WC_IDX_SHIFT))
45 #define MAX_WCHAR (0x2600-1)
47 static unsigned char ctype_tbl[256 * CTYPE_ROW_LEN];
48 static unsigned char uplow_tbl[256 * UPLOW_ROW_LEN];
50 static unsigned short c2wc_tbl[256 * C2WC_ROW_LEN];
52 static unsigned char tt[MAX_WCHAR+1];
53 static unsigned char ti[MAX_WCHAR+1];
54 static unsigned char xi[MAX_WCHAR+1];
56 static int n_ctype_rows;
57 static int n_uplow_rows;
59 static int n_c2wc_rows;
64 #define RANGE MAX_WCHAR
69 #define II_LEN ((MAX_WCHAR+1) >> (TT_SHIFT+TI_SHIFT))
72 unsigned long c2w[256];
73 unsigned char w2c[MAX_WCHAR];
74 unsigned char ii[II_LEN];
75 unsigned char ctype_idx[CTYPE_IDX_LEN];
76 unsigned char uplow_idx[UPLOW_IDX_LEN];
77 unsigned char c2wc_idx[C2WC_IDX_LEN];
80 int main(int argc, char **argv)
85 unsigned long max_wchar;
91 unsigned char row[256];
93 unsigned short wrow[256];
95 char codeset_list[500];
96 char codeset_index[30];
97 int codeset_list_end = 0;
100 if (!setlocale(LC_CTYPE, "en_US.UTF-8")) {
101 printf("setlocale(LC_CTYPE,\"en_US.UTF-8\") failed!\n");
105 if (!(out = fopen("c8tables.h","w"))) {
106 printf("error: couldn't open file \"c8tables.h\"\n");
112 /* User requested 8-bit codesets, but didn't list any... */
113 /* Allow to build, just so this feature can be left on in config. */
114 fprintf(out, "#ifdef __CTYPE_HAS_8_BIT_LOCALES\n");
115 fprintf(out, "#warning ignoring 8 bit codesets request"
116 " as no codesets specified.\n");
117 fprintf(out, "#endif\n");
118 fprintf(out, "#undef __CTYPE_HAS_8_BIT_LOCALES\n\n");
120 fprintf(out, "#define __LOCALE_DATA_NUM_CODESETS\t\t0\n");
121 fprintf(out, "#define __LOCALE_DATA_CODESET_LIST\t\t\"\"\n");
126 /* fprintf(out, "#define __CTYPE_HAS_8_BIT_LOCALES\t1\n\n"); */
127 fprintf(out, "#ifdef __CTYPE_HAS_8_BIT_LOCALES\n\n");
131 fprintf(out, "#undef __CTYPE_HAS_8_BIT_LOCALES\n\n");
133 fprintf(out, "#define __LOCALE_DATA_NUM_CODESETS\t\t0\n");
134 fprintf(out, "#define __LOCALE_DATA_CODESET_LIST\t\t\"\"\n");
136 fprintf(out, "#define __CTYPE_HAS_8_BIT_LOCALES\t\t1\n\n");
139 fprintf(out, "#define __LOCALE_DATA_Cctype_IDX_SHIFT\t%d\n", CTYPE_IDX_SHIFT);
140 fprintf(out, "#define __LOCALE_DATA_Cctype_IDX_LEN\t\t%d\n", CTYPE_IDX_LEN);
142 fprintf(out, "#define __LOCALE_DATA_Cctype_ROW_LEN\t\t%d\n", CTYPE_ROW_LEN >> 1);
143 fprintf(out, "#define __LOCALE_DATA_Cctype_PACKED\t\t1\n");
145 fprintf(out, "#define __LOCALE_DATA_Cctype_ROW_LEN\t\t%d\n", CTYPE_ROW_LEN);
146 fprintf(out, "#undef __LOCALE_DATA_Cctype_PACKED\n");
149 fprintf(out, "\n#define __LOCALE_DATA_Cuplow_IDX_SHIFT\t%d\n", UPLOW_IDX_SHIFT);
150 fprintf(out, "#define __LOCALE_DATA_Cuplow_IDX_LEN\t\t%d\n", UPLOW_IDX_LEN);
151 fprintf(out, "#define __LOCALE_DATA_Cuplow_ROW_LEN\t\t%d\n", UPLOW_ROW_LEN);
154 fprintf(out, "\n#define __LOCALE_DATA_Cc2wc_IDX_LEN\t\t%d\n", C2WC_IDX_LEN);
155 fprintf(out, "#define __LOCALE_DATA_Cc2wc_IDX_SHIFT\t\t%d\n", C2WC_IDX_SHIFT);
156 fprintf(out, "#define __LOCALE_DATA_Cc2wc_ROW_LEN\t\t%d\n", C2WC_ROW_LEN);
159 fprintf(out, "\ntypedef struct {\n");
160 fprintf(out, "\tunsigned char idx8ctype[%d];\n", CTYPE_IDX_LEN);
161 fprintf(out, "\tunsigned char idx8uplow[%d];\n", UPLOW_IDX_LEN);
163 fprintf(out, "\tunsigned char idx8c2wc[%d];\n", C2WC_IDX_LEN);
164 fprintf(out, "\tunsigned char idx8wc2c[%d];\n", II_LEN);
166 fprintf(out, "} __codeset_8_bit_t;\n\n");
168 fprintf(out, "#ifdef WANT_DATA\n\n");
169 fprintf(out, "static const __codeset_8_bit_t codeset_8_bit[%d] = {\n", argc-1);
173 codeset_index[0] = 0;
175 if (!(fp = fopen(*++argv,"r"))) {
176 printf("error: couldn't open file \"%s\"\n", *argv);
179 printf("processing %s... ", *argv);
186 s0 = strrchr(*argv, '/');
192 s1 = strrchr(s0, '.');
199 /* if ((numsets == 0) && strncmp("ASCII", s0, n)) { */
200 /* printf("error - first codeset isn't ASCII!\n"); */
201 /* return EXIT_FAILURE; */
204 if (numsets >= sizeof(codeset_index)) {
205 printf("error - too many codesets!\n");
209 if (codeset_list_end + n + 1 + numsets + 1 + 1 >= 256) {
210 printf("error - codeset list to big!\n");
214 codeset_index[numsets+1] = codeset_index[numsets] + n+1;
215 strncpy(codeset_list + codeset_list_end, s0, n);
216 codeset_list_end += (n+1);
217 codeset_list[codeset_list_end - 1] = 0;
219 fprintf(out, "\t{ /* %.*s */", n, s0);
222 memset(&csd[numsets],sizeof(charset_data),0);
223 memset(xi, sizeof(xi), 0);
228 while (fgets(buf,sizeof(buf),fp)) {
229 if ((2 != sscanf(buf, "{ %lx , %lx", &c, &wc))
230 || (c >= 256) || (wc > MAX_WCHAR)) {
231 printf("error: scanf failure! \"%s\"\n", buf);
235 /* don't put in w2c... dynamicly build tt instead. */
237 if (c <= 0x7f) { /* check the 7bit entries but don't store */
239 printf("error: c != wc in %s\n", buf);
242 csd[numsets].c2w[c] = wc;
243 csd[numsets].w2c[wc] = 0; /* ignore */
244 if (wc > max_wchar) {
248 csd[numsets].c2w[c] = wc;
249 csd[numsets].w2c[wc] = c;
250 if (wc > max_wchar) {
256 printf("%d lines ", lines);
258 for (i = 0 ; i <= MAX_WCHAR ; i += (1 << TT_SHIFT)) {
259 p = &csd[numsets].w2c[i];
260 for (j = 0 ; j < tt_num ; j++) {
261 if (!memcmp(p, &tt[j << TT_SHIFT], (1 << TT_SHIFT))) {
265 if (j == tt_num) { /* new entry */
266 memcpy(&tt[j << TT_SHIFT], p, (1 << TT_SHIFT));
269 xi[i >> TT_SHIFT] = j;
272 for (i = 0 ; i <= (MAX_WCHAR >> TT_SHIFT) ; i += (1 << TI_SHIFT)) {
274 for (j = 0 ; j < ti_num ; j++) {
275 if (!memcmp(p, &ti[j << TI_SHIFT], (1 << TI_SHIFT))) {
279 if (j == ti_num) { /* new entry */
280 memcpy(&ti[j << TI_SHIFT], p, (1 << TI_SHIFT));
283 csd[numsets].ii[i >> TI_SHIFT] = j;
284 /* printf("%d ", i >> TI_SHIFT); */
288 fprintf(out, "\n\t\t/* idx8ctype data */\n\t\t{");
289 for (i = 128 ; i < 256 ; i++) {
293 /* if (!(i & 0x7)) { */
294 /* fprintf(out, "\n"); */
297 c = csd[numsets].c2w[i];
299 if (c == 0) { /* non-existant char in codeset */
300 d = __CTYPE_unclassified;
301 } else if (iswdigit(c)) {
303 } else if (iswalpha(c)) {
304 d = __CTYPE_alpha_nonupper_nonlower;
306 d = __CTYPE_alpha_lower;
308 d = __CTYPE_alpha_upper_lower;
310 } else if (iswupper(c)) {
311 d = __CTYPE_alpha_upper;
313 } else if (iswpunct(c)) {
315 } else if (iswgraph(c)) {
317 } else if (iswprint(c)) {
318 d = __CTYPE_print_space_nonblank;
320 d = __CTYPE_print_space_blank;
322 } else if (iswspace(c) && !iswcntrl(c)) {
323 d = __CTYPE_space_nonblank_noncntrl;
325 d = __CTYPE_space_blank_noncntrl;
327 } else if (iswcntrl(c)) {
328 d = __CTYPE_cntrl_nonspace;
330 d = __CTYPE_cntrl_space_nonblank;
332 d = __CTYPE_cntrl_space_blank;
336 d = __CTYPE_unclassified;
340 row[i & (CTYPE_ROW_LEN-1)] = d;
341 if ((i & (CTYPE_ROW_LEN-1)) == (CTYPE_ROW_LEN-1)) {
343 for (j=0 ; j < n_ctype_rows ; j++) {
344 if (!memcmp(p, row, CTYPE_ROW_LEN)) {
349 if (j == n_ctype_rows) { /* new entry */
350 if (++n_ctype_rows > 256) {
351 printf("error -- to many ctype rows!\n");
354 memcpy(p, row, CTYPE_ROW_LEN);
356 csd[numsets].ctype_idx[i >> CTYPE_IDX_SHIFT] = j;
357 if (!((i >> CTYPE_IDX_SHIFT) & 0x7)
358 && (i != (127 + CTYPE_ROW_LEN))
360 fprintf(out, "\n\t\t ");
362 fprintf(out, " %#4x,", j);
365 fprintf(out, " %#4x,", d);
372 fprintf(out, ",\n\t\t/* idx8uplow data */\n\t\t{");
373 for (i = 128 ; i < 256 ; i++) {
375 /* if (!(i & 0x7)) { */
376 /* fprintf(out, "\n"); */
378 c = csd[numsets].c2w[i];
383 if (u >= 0x80) u = csd[numsets].w2c[u];
384 if (l >= 0x80) l = csd[numsets].w2c[l];
386 if (u == 0) u = i; /* upper is missing, so ignore */
387 if (l == 0) l = i; /* lower is missing, so ignore */
390 /* store as unsigned char and let overflow handle it. */
391 /* if ((((u-i) < CHAR_MIN) || ((u-i) > CHAR_MAX)) */
392 /* || (((i-l) < CHAR_MIN) || ((i-l) > CHAR_MAX)) */
394 /* printf("error - uplow diff out of range! %d %ld %ld\n", */
396 /* return EXIT_FAILURE; */
399 row[i & (UPLOW_ROW_LEN-1)] = ((l==i) ? (u-i) : (i-l));
400 if ((i & (UPLOW_ROW_LEN-1)) == (UPLOW_ROW_LEN-1)) {
402 for (j=0 ; j < n_uplow_rows ; j++) {
403 if (!memcmp(p, row, UPLOW_ROW_LEN)) {
408 if (j == n_uplow_rows) { /* new entry */
409 if (++n_uplow_rows > 256) {
410 printf("error -- to many uplow rows!\n");
413 memcpy(p, row, UPLOW_ROW_LEN);
415 csd[numsets].uplow_idx[i >> UPLOW_IDX_SHIFT] = j;
416 if (!((i >> UPLOW_IDX_SHIFT) & 0x7)
417 && (i != (127 + UPLOW_ROW_LEN))
419 fprintf(out, "\n\t\t ");
421 fprintf(out, " %#4x,", j);
425 if (!(i & 0x7) && i) {
428 fprintf(out, " %4ld,", (l==i) ? (u-i) : (i-l));
429 /* fprintf(out, " %4ld,", (l==i) ? u : l); */
431 if ((u != i) || (l != i)) {
433 fprintf(out, " %#08lx, %#08lx, %#08lx, %#08lx, %#08lx, %#08lx, \n",
437 (unsigned long) towlower(c),
439 (unsigned long) towupper(c));
442 fprintf(out, " %#08lx, %8ld, %d, %8ld, %d, %#08lx\n",
459 #else /* DO_WIDE_CHAR */
462 fprintf(out, ",\n\t\t/* idx8c2wc data */\n\t\t{");
463 for (i = 128 ; i < 256 ; i++) {
465 wrow[i & (C2WC_ROW_LEN-1)] = csd[numsets].c2w[i];
466 if ((i & (C2WC_ROW_LEN-1)) == (C2WC_ROW_LEN-1)) {
467 p = (unsigned char *) c2wc_tbl;
468 for (j=0 ; j < n_c2wc_rows ; j++) {
469 if (!memcmp(p, (char *) wrow, 2*C2WC_ROW_LEN)) {
474 if (j == n_c2wc_rows) { /* new entry */
475 if (++n_c2wc_rows > 256) {
476 printf("error -- to many c2wc rows!\n");
479 memcpy(p, (char *) wrow, 2*C2WC_ROW_LEN);
481 csd[numsets].c2wc_idx[i >> C2WC_IDX_SHIFT] = j;
482 if (!((i >> C2WC_IDX_SHIFT) & 0x7)
483 && (i != (127 + C2WC_ROW_LEN))
485 fprintf(out, "\n\t\t ");
487 fprintf(out, " %#4x,", j);
490 if (!(i & 0x7) && i) {
493 fprintf(out, " %#6lx,", csd[numsets].c2w[i]);
496 fprintf(out, " },\n");
500 /* fprintf(out, "\nII_LEN = %d\n", II_LEN); */
501 fprintf(out, "\t\t/* idx8wc2c data */\n\t\t{");
502 for (i = 0 ; i < II_LEN ; i++) {
503 if (!(i & 0x7) && i) {
504 fprintf(out, "\n\t\t ");
506 fprintf(out, " %#4x,", csd[numsets].ii[i]);
508 fprintf(out, " }\n");
511 #endif /* DO_WIDE_CHAR */
512 fprintf(out, "\t},\n");
518 fprintf(out, "};\n");
519 fprintf(out, "\n#endif /* WANT_DATA */\n");
523 fprintf(out, "#define __LOCALE_DATA_Cwc2c_DOMAIN_MAX\t%#x\n", RANGE);
524 fprintf(out, "#define __LOCALE_DATA_Cwc2c_TI_SHIFT\t\t%d\n", TI_SHIFT);
525 fprintf(out, "#define __LOCALE_DATA_Cwc2c_TT_SHIFT\t\t%d\n", TT_SHIFT);
526 fprintf(out, "#define __LOCALE_DATA_Cwc2c_II_LEN\t\t%d\n", II_LEN);
527 fprintf(out, "#define __LOCALE_DATA_Cwc2c_TI_LEN\t\t%d\n", ti_num << TI_SHIFT);
528 fprintf(out, "#define __LOCALE_DATA_Cwc2c_TT_LEN\t\t%d\n", tt_num << TT_SHIFT);
531 fprintf(out, "\n#define __LOCALE_DATA_Cwc2c_TBL_LEN\t\t%d\n",
532 (ti_num << TI_SHIFT) + (tt_num << TT_SHIFT));
534 fprintf(out, "#ifdef WANT_DATA\n\n");
535 fprintf(out, "static const unsigned char __LOCALE_DATA_Cwc2c_data[%d] = {\n",
536 (ti_num << TI_SHIFT) + (tt_num << TT_SHIFT));
537 fprintf(out, "\t/* ti_table */\n\t");
538 for (i=0 ; i < ti_num << TI_SHIFT ; i++) {
540 fprintf(out, "\n\t");
542 fprintf(out, " %#4x,", ti[i]);
545 fprintf(out, "\t/* tt_table */\n\t");
546 for (i=0 ; i < tt_num << TT_SHIFT ; i++) {
548 fprintf(out, "\n\t");
550 fprintf(out, " %#4x,", tt[i]);
552 fprintf(out, "\n};\n");
554 fprintf(out, "\n#endif /* WANT_DATA */\n");
555 #endif /* DO_WIDE_CHAR */
557 fprintf(out, "\n#define __LOCALE_DATA_Cuplow_TBL_LEN\t\t%d\n",
558 n_uplow_rows * UPLOW_ROW_LEN);
559 fprintf(out, "\n#ifdef WANT_DATA\n\n");
561 fprintf(out, "\nstatic const unsigned char __LOCALE_DATA_Cuplow_data[%d] = {\n",
562 n_uplow_rows * UPLOW_ROW_LEN);
564 for (j=0 ; j < n_uplow_rows ; j++) {
566 for (i=0 ; i < UPLOW_ROW_LEN ; i++) {
567 fprintf(out, " %#4x,", (unsigned int)((unsigned char) p[i]));
572 fprintf(out, "};\n");
574 fprintf(out, "\n#endif /* WANT_DATA */\n");
575 fprintf(out, "\n#define __LOCALE_DATA_Cctype_TBL_LEN\t\t%d\n",
577 n_ctype_rows * CTYPE_ROW_LEN / 2
579 n_ctype_rows * CTYPE_ROW_LEN
582 fprintf(out, "\n#ifdef WANT_DATA\n\n");
585 fprintf(out, "\nstatic const unsigned char __LOCALE_DATA_Cctype_data[%d] = {\n",
587 n_ctype_rows * CTYPE_ROW_LEN / 2
589 n_ctype_rows * CTYPE_ROW_LEN
593 for (j=0 ; j < n_ctype_rows ; j++) {
595 for (i=0 ; i < CTYPE_ROW_LEN ; i++) {
597 fprintf(out, " %#4x,", (unsigned int)(p[i] + (p[i+1] << 4)));
600 fprintf(out, " %#4x,", (unsigned int)p[i]);
606 fprintf(out, "};\n");
608 fprintf(out, "\n#endif /* WANT_DATA */\n");
612 fprintf(out, "\n#define __LOCALE_DATA_Cc2wc_TBL_LEN\t\t%d\n",
613 n_c2wc_rows * C2WC_ROW_LEN);
614 fprintf(out, "\n#ifdef WANT_DATA\n\n");
616 fprintf(out, "\nstatic const unsigned short __LOCALE_DATA_Cc2wc_data[%d] = {\n",
617 n_c2wc_rows * C2WC_ROW_LEN);
618 p = (unsigned char *) c2wc_tbl;
619 for (j=0 ; j < n_c2wc_rows ; j++) {
621 for (i=0 ; i < C2WC_ROW_LEN ; i++) {
622 fprintf(out, " %#6x,", (unsigned int)(((unsigned short *)p)[i]));
627 fprintf(out, "};\n");
628 fprintf(out, "\n#endif /* WANT_DATA */\n");
629 #endif /* DO_WIDE_CHAR */
630 fprintf(out, "\n\n");
632 fprintf(out, "#define __LOCALE_DATA_NUM_CODESETS\t\t%d\n", numsets);
633 fprintf(out, "#define __LOCALE_DATA_CODESET_LIST \\\n\t\"");
634 for (i=0 ; i < numsets ; i++) {
635 fprintf(out, "\\x%02x", numsets + 1 + (unsigned char) codeset_index[i]);
636 if (((i & 7) == 7) && (i + 1 < numsets)) {
637 fprintf(out, "\" \\\n\t\"");
640 fprintf(out, "\" \\\n\t\"\\0\"");
641 for (i=0 ; i < numsets ; i++) {
642 fprintf(out, " \\\n\t\"%s\\0\"",
643 codeset_list + ((unsigned char)codeset_index[i]));
646 fprintf(out, "\n\n");
647 for (i=0 ; i < numsets ; i++) {
650 strcpy(buf, codeset_list + ((unsigned char)codeset_index[i]));
651 for (z=buf ; *z ; z++) {
656 fprintf(out, "#define __CTYPE_HAS_CODESET_%s\n", buf);
659 fprintf(out, "#define __CTYPE_HAS_CODESET_UTF_8\n");
660 #endif /* DO_WIDE_CHAR */
663 fprintf(out, "\n#endif /* __CTYPE_HAS_8_BIT_LOCALES */\n\n");
670 printf("tt_num = %d ti_num = %d\n", tt_num, ti_num);
671 printf("max_wchar = %#lx\n", max_wchar);
673 printf("size is %d * %d + %d * %d + %d * %d = %d\n",
674 tt_num, 1 << TT_SHIFT, ti_num, 1 << TI_SHIFT,
675 ((MAX_WCHAR >> (TT_SHIFT + TI_SHIFT)) + 1), numsets,
676 j = tt_num * (1 << TT_SHIFT) + ti_num * (1 << TI_SHIFT)
677 + ((MAX_WCHAR >> (TT_SHIFT + TI_SHIFT)) + 1) * numsets);
679 #endif /* DO_WIDE_CHAR */
687 printf("ctype - CTYPE_IDX_SHIFT = %d -- %d * %d + %d * %d = %d\n",
688 CTYPE_IDX_SHIFT, numsets, CTYPE_IDX_LEN, n_ctype_rows, CTYPE_ROW_LEN / i,
689 j = numsets * CTYPE_IDX_LEN + n_ctype_rows * CTYPE_ROW_LEN / i);
692 printf("uplow - UPLOW_IDX_SHIFT = %d -- %d * %d + %d * %d = %d\n",
693 UPLOW_IDX_SHIFT, numsets, UPLOW_IDX_LEN, n_uplow_rows, UPLOW_ROW_LEN,
694 j = numsets * UPLOW_IDX_LEN + n_uplow_rows * UPLOW_ROW_LEN);
699 printf("c2wc - C2WC_IDX_SHIFT = %d -- %d * %d + 2 * %d * %d = %d\n",
700 C2WC_IDX_SHIFT, numsets, C2WC_IDX_LEN, n_c2wc_rows, C2WC_ROW_LEN,
701 j = numsets * C2WC_IDX_LEN + 2 * n_c2wc_rows * C2WC_ROW_LEN);
704 #endif /* DO_WIDE_CHAR */
706 printf("total size = %d\n", total_size);
708 /* for (i=0 ; i < numsets ; i++) { */
709 /* printf("codeset_index[i] = %d codeset_list[ci[i]] = \"%s\"\n", */
710 /* (unsigned char) codeset_index[i], */
711 /* codeset_list + ((unsigned char)codeset_index[i])); */