OSDN Git Service

Replace FSF snail mail address with URLs
[uclinux-h8/uClibc.git] / libc / misc / locale / locale.c
1 /*  Copyright (C) 2002     Manuel Novoa III
2  *
3  *  This library is free software; you can redistribute it and/or
4  *  modify it under the terms of the GNU Library General Public
5  *  License as published by the Free Software Foundation; either
6  *  version 2 of the License, or (at your option) any later version.
7  *
8  *  This library is distributed in the hope that it will be useful,
9  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  *  Library General Public License for more details.
12  *
13  *  You should have received a copy of the GNU Library General Public
14  *  License along with this library; if not, see
15  *  <http://www.gnu.org/licenses/>.
16  */
17
18 /* Nov. 1, 2002
19  * Reworked setlocale() return values and locale arg processing to
20  *   be more like glibc.  Applications expecting to be able to
21  *   query locale settings should now work... at the cost of almost
22  *   doubling the size of the setlocale object code.
23  * Fixed a bug in the internal fixed-size-string locale specifier code.
24  *
25  * Dec 20, 2002
26  * Added in collation support and updated stub nl_langinfo.
27  *
28  * Aug 1, 2003
29  * Added glibc-like extended locale stuff (newlocale, duplocale, etc).
30  *
31  * Aug 18, 2003
32  * Bug in duplocale... collation data wasn't copied.
33  * Bug in newlocale... translate 1<<LC_ALL to LC_ALL_MASK.
34  * Bug in _wchar_utf8sntowcs... fix cut-n-paste error.
35  *
36  * Aug 31, 2003
37  * Hack around bg_BG bug; grouping specified but no thousands separator.
38  * Also, disable the locale link_warnings for now, as they generate a
39  * lot of noise when using libstd++.
40  */
41
42
43 /*  TODO:
44  *  Implement the shared mmap code so non-mmu platforms can use this.
45  *  Add some basic collate functionality similar to what the previous
46  *    locale support had (8-bit codesets only).
47  */
48
49 #define __CTYPE_HAS_8_BIT_LOCALES 1
50
51 #include <string.h>
52 #include <stdlib.h>
53 #include <stddef.h>
54 #include <limits.h>
55 #include <stdint.h>
56 #include <assert.h>
57 #include <errno.h>
58 #include <ctype.h>
59 #include <stdio.h>
60
61 #ifdef __UCLIBC_MJN3_ONLY__
62 #ifdef L_setlocale
63 #warning TODO: Make the link_warning()s a config option?
64 #endif
65 #endif
66 #undef link_warning
67 #define link_warning(A,B)
68
69 #undef __LOCALE_C_ONLY
70 #ifndef __UCLIBC_HAS_LOCALE__
71 #define __LOCALE_C_ONLY
72 #endif /* __UCLIBC_HAS_LOCALE__ */
73
74
75 #ifdef __LOCALE_C_ONLY
76
77 #include <locale.h>
78
79 #else  /* __LOCALE_C_ONLY */
80
81 #ifdef __UCLIBC_MJN3_ONLY__
82 #ifdef L_setlocale
83 #warning TODO: Fix the __CTYPE_HAS_8_BIT_LOCALES define at the top of the file.
84 #warning TODO: Fix __WCHAR_ENABLED.
85 #endif
86 #endif
87
88 /* Need to include this before locale.h! */
89 #include <bits/uClibc_locale.h>
90
91 #undef CODESET_LIST
92 #define CODESET_LIST                    (__locale_mmap->codeset_list)
93
94 #ifdef __UCLIBC_HAS_XLOCALE__
95 #include <locale.h>
96 #else /* __UCLIBC_HAS_XLOCALE__ */
97 /* We need this internally... */
98 #define __UCLIBC_HAS_XLOCALE__ 1
99 #include <locale.h>
100 #undef __UCLIBC_HAS_XLOCALE__
101 #endif /* __UCLIBC_HAS_XLOCALE__ */
102
103 #include <wchar.h>
104
105 #define LOCALE_NAMES                    (__locale_mmap->locale_names5)
106 #define LOCALES                                 (__locale_mmap->locales)
107 #define LOCALE_AT_MODIFIERS             (__locale_mmap->locale_at_modifiers)
108 #define CATEGORY_NAMES                  (__locale_mmap->lc_names)
109
110 #ifdef __UCLIBC_MJN3_ONLY__
111 #warning REMINDER: redo the MAX_LOCALE_STR stuff...
112 #endif
113 #define MAX_LOCALE_STR                  256 /* TODO: Only sufficient for current case. */
114 #define MAX_LOCALE_CATEGORY_STR 32 /* TODO: Only sufficient for current case. */
115 /* Note: Best if MAX_LOCALE_CATEGORY_STR is a power of 2. */
116
117 extern int _locale_set_l(const unsigned char *p, __locale_t base) attribute_hidden;
118 extern void _locale_init_l(__locale_t base) attribute_hidden;
119
120 #endif /* __LOCALE_C_ONLY */
121
122 #undef LOCALE_STRING_SIZE
123 #define LOCALE_SELECTOR_SIZE (2 * __LC_ALL + 2)
124
125 #ifdef __UCLIBC_MJN3_ONLY__
126 #ifdef L_setlocale
127 #warning TODO: Create a C locale selector string.
128 #endif
129 #endif
130 #define C_LOCALE_SELECTOR "\x23\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"
131
132
133 #include <langinfo.h>
134 #include <nl_types.h>
135
136 /**********************************************************************/
137 #ifdef L_setlocale
138
139 #ifdef __LOCALE_C_ONLY
140
141 link_warning(setlocale,"REMINDER: The 'setlocale' function supports only C|POSIX locales.")
142
143 static const char C_string[] = "C";
144
145 char *setlocale(int category, register const char *locale)
146 {
147         return ( (((unsigned int)(category)) <= LC_ALL)
148                          && ( (!locale)         /* Request for locale category string. */
149                                   || (!*locale) /* Implementation-defined default is C. */
150                                   || ((*locale == 'C') && !locale[1])
151                                   || (!strcmp(locale, "POSIX"))) )
152                 ? (char *) C_string             /* Always in C/POSIX locale. */
153                 : NULL;
154 }
155
156 #else /* ---------------------------------------------- __LOCALE_C_ONLY */
157
158 #ifdef __UCLIBC_HAS_THREADS__
159 link_warning(setlocale,"REMINDER: The 'setlocale' function is _not_ threadsafe except for simple queries.")
160 #endif
161
162 #if !defined(__LOCALE_DATA_NUM_LOCALES) || (__LOCALE_DATA_NUM_LOCALES <= 1)
163 #error locales enabled, but not data other than for C locale!
164 #endif
165
166 #ifdef __UCLIBC_MJN3_ONLY__
167 #warning TODO: Move posix and utf8 strings.
168 #endif
169 static const char posix[] = "POSIX";
170 static const char utf8[] = "UTF-8";
171
172 #ifdef __UCLIBC_MJN3_ONLY__
173 #warning TODO: Fix dimensions of hr_locale.
174 #endif
175 /* Individual category strings start at hr_locale + category * MAX_LOCALE_CATEGORY.
176  * This holds for LC_ALL as well.
177  */
178 static char hr_locale[(MAX_LOCALE_CATEGORY_STR * LC_ALL) + MAX_LOCALE_STR];
179
180
181 static void update_hr_locale(const unsigned char *spec)
182 {
183         const unsigned char *loc;
184         const unsigned char *s;
185         char *n;
186         int i, category, done;
187
188         done = category = 0;
189         do {
190                 s = spec + 1;
191                 n = hr_locale + category * MAX_LOCALE_CATEGORY_STR;
192
193                 if (category == LC_ALL) {
194                         done = 1;
195                         for (i = 0 ; i < LC_ALL-1 ; i += 2) {
196                                 if ((s[i] != s[i+2]) || (s[i+1] != s[i+3])) {
197                                         goto SKIP;
198                                 }
199                         }
200                         /* All categories the same, so simplify string by using a single
201                          * category. */
202                         category = LC_CTYPE;
203                 }
204
205         SKIP:
206                 i = (category == LC_ALL) ? 0 : category;
207                 s += 2*i;
208
209                 do {
210                         if ((*s != 0xff) || (s[1] != 0xff)) {
211                                 loc = LOCALES
212                                         + __LOCALE_DATA_WIDTH_LOCALES * ((((int)(*s & 0x7f)) << 7)
213                                                                                                          + (s[1] & 0x7f));
214                                 if (category == LC_ALL) {
215                                         /* CATEGORY_NAMES is unsigned char* */
216                                         n = stpcpy(n, (char*) CATEGORY_NAMES + (int) CATEGORY_NAMES[i]);
217                                         *n++ = '=';
218                                 }
219                                 if (*loc == 0) {
220                                         *n++ = 'C';
221                                         *n = 0;
222                                 } else {
223                                         char at = 0;
224                                         memcpy(n, LOCALE_NAMES + 5*((*loc)-1), 5);
225                                         if (n[2] != '_') {
226                                                 at = n[2];
227                                                 n[2] = '_';
228                                         }
229                                         n += 5;
230                                         *n++ = '.';
231                                         if (loc[2] == 2) {
232                                                 n = stpcpy(n, utf8);
233                                         } else if (loc[2] >= 3) {
234                                                 n = stpcpy(n, (char*) CODESET_LIST + (int)(CODESET_LIST[loc[2] - 3]));
235                                         }
236                                         if (at) {
237                                                 const char *q;
238                                                 *n++ = '@';
239                                                 q = (char*) LOCALE_AT_MODIFIERS;
240                                                 do {
241                                                         if (q[1] == at) {
242                                                                 n = stpcpy(n, q+2);
243                                                                 break;
244                                                         }
245                                                         q += 2 + *q;
246                                                 } while (*q);
247                                         }
248                                 }
249                                 *n++ = ';';
250                         }
251                         s += 2;
252                 } while (++i < category);
253                 *--n = 0;               /* Remove trailing ';' and nul-terminate. */
254
255                 ++category;
256         } while (!done);
257 }
258
259 char *setlocale(int category, const char *locale)
260 {
261         if (((unsigned int)(category)) > LC_ALL) {
262 #if 0
263                 __set_errno(EINVAL);    /* glibc sets errno -- SUSv3 doesn't say. */
264 #endif
265                 return NULL;                    /* Illegal/unsupported category. */
266         }
267
268         if (locale != NULL) {           /* Not just a query... */
269                 if (!newlocale((1 << category), locale, __global_locale)) {
270                         return NULL;            /* Failed! */
271                 }
272                 update_hr_locale(__global_locale->cur_locale);
273         }
274
275         /* Either a query or a successful set, so return current locale string. */
276         return hr_locale + (category * MAX_LOCALE_CATEGORY_STR);
277 }
278
279 #endif /* __LOCALE_C_ONLY */
280
281 #endif
282 /**********************************************************************/
283 #ifdef L_localeconv
284
285 /* Note: We assume here that the compiler does the sane thing regarding
286  * placement of the fields in the struct.  If necessary, we could ensure
287  * this usings an array of offsets but at some size cost. */
288
289
290 #ifdef __LOCALE_C_ONLY
291
292 link_warning(localeconv,"REMINDER: The 'localeconv' function is hardwired for C/POSIX locale only.")
293
294 static struct lconv the_lconv;
295
296 static const char decpt[] = ".";
297
298 struct lconv *localeconv(void)
299 {
300         register char *p = (char *)(&the_lconv);
301
302         *((char **)p) = (char *) decpt;
303         do {
304                 p += sizeof(char **);
305                 *((char **)p) = (char *) (decpt+1);
306         } while (p < (char *) &the_lconv.negative_sign);
307
308         p = (&the_lconv.int_frac_digits);
309         do {
310                 *p = CHAR_MAX;
311                 ++p;
312         } while (p <= &the_lconv.int_n_sign_posn);
313
314         return &the_lconv;
315 }
316
317 #else /* __LOCALE_C_ONLY */
318
319 static struct lconv the_lconv;
320
321 struct lconv *localeconv(void)
322 {
323         register char *p = (char *) &the_lconv;
324         register char **q = (char **) &(__UCLIBC_CURLOCALE->decimal_point);
325
326         do {
327                 *((char **)p) = *q;
328                 p += sizeof(char **);
329                 ++q;
330         } while (p < &the_lconv.int_frac_digits);
331
332         do {
333                 *p = **q;
334                 ++p;
335                 ++q;
336         } while (p <= &the_lconv.int_n_sign_posn);
337
338         return &the_lconv;
339 }
340
341 #endif /* __LOCALE_C_ONLY */
342
343 libc_hidden_def(localeconv)
344
345 #endif
346 /**********************************************************************/
347 #if defined(L__locale_init) && !defined(__LOCALE_C_ONLY)
348
349 struct __uclibc_locale_struct __global_locale_data;
350
351 __locale_t __global_locale = &__global_locale_data;
352
353 #ifdef __UCLIBC_HAS_XLOCALE__
354 __locale_t __curlocale_var = &__global_locale_data;
355 #endif
356
357 /*----------------------------------------------------------------------*/
358 #ifdef __UCLIBC_MJN3_ONLY__
359 #warning TODO: Move utf8 and ascii strings.
360 #endif
361 static const char utf8[] = "UTF-8";
362 static const char ascii[] = "ASCII";
363
364 typedef struct {
365         uint16_t num_base;
366         uint16_t num_der;
367         uint16_t MAX_WEIGHTS;
368         uint16_t num_index2weight;
369 #define num_index2ruleidx num_index2weight
370         uint16_t num_weightstr;
371         uint16_t num_multistart;
372         uint16_t num_override;
373         uint16_t num_ruletable;
374 } coldata_header_t;
375
376 typedef struct {
377         uint16_t num_weights;
378         uint16_t num_starters;
379         uint16_t ii_shift;
380         uint16_t ti_shift;
381         uint16_t ii_len;
382         uint16_t ti_len;
383         uint16_t max_weight;
384         uint16_t num_col_base;
385         uint16_t max_col_index;
386         uint16_t undefined_idx;
387         uint16_t range_low;
388         uint16_t range_count;
389         uint16_t range_base_weight;
390         uint16_t range_rule_offset;
391
392         uint16_t index2weight_offset;
393         uint16_t index2ruleidx_offset;
394         uint16_t multistart_offset;
395         uint16_t wcs2colidt_offset_low;
396         uint16_t wcs2colidt_offset_hi;
397 } coldata_base_t;
398
399 typedef struct {
400         uint16_t base_idx;
401         uint16_t undefined_idx;
402         uint16_t overrides_offset;
403         uint16_t multistart_offset;
404 } coldata_der_t;
405
406 static int init_cur_collate(int der_num, __collate_t *cur_collate)
407 {
408         const uint16_t *__locale_collate_tbl = __locale_mmap->collate_data;
409         coldata_header_t *cdh;
410         coldata_base_t *cdb;
411         coldata_der_t *cdd;
412         const uint16_t *p;
413         size_t n;
414         uint16_t i, w;
415
416 #ifdef __UCLIBC_MJN3_ONLY__
417 #warning kill of x86-specific asserts
418 #endif
419 #if 0
420         assert(sizeof(coldata_base_t) == 19*2);
421         assert(sizeof(coldata_der_t) == 4*2);
422         assert(sizeof(coldata_header_t) == 8*2);
423 #endif
424
425         if (!der_num) {                         /* C locale... special */
426                 cur_collate->num_weights = 0;
427                 return 1;
428         }
429
430         --der_num;
431
432         cdh = (coldata_header_t *) __locale_collate_tbl;
433
434 #ifdef __UCLIBC_MJN3_ONLY__
435 #warning CONSIDER: Should we assert here?
436 #endif
437 #if 0
438         if (der_num >= cdh->num_der) {
439                 return 0;
440         }
441 #else
442         assert((der_num < cdh->num_der));
443 #endif
444
445         cdd = (coldata_der_t *)(__locale_collate_tbl
446                                                         + (sizeof(coldata_header_t)
447                                                            + cdh->num_base * sizeof(coldata_base_t)
448                                                            + der_num * sizeof(coldata_der_t)
449                                                            )/2 );
450
451         cdb = (coldata_base_t *)(__locale_collate_tbl
452                                                          + (sizeof(coldata_header_t)
453                                                                 + cdd->base_idx * sizeof(coldata_base_t)
454                                                                 )/2 );
455
456         memcpy(cur_collate, cdb, offsetof(coldata_base_t,index2weight_offset));
457         cur_collate->undefined_idx = cdd->undefined_idx;
458
459         cur_collate->ti_mask = (1 << cur_collate->ti_shift)-1;
460         cur_collate->ii_mask = (1 << cur_collate->ii_shift)-1;
461
462 /*      fflush(stdout); */
463 /*      fprintf(stderr,"base=%d  num_col_base: %d  %d\n", cdd->base_idx ,cur_collate->num_col_base, cdb->num_col_base); */
464
465         n = (sizeof(coldata_header_t) + cdh->num_base * sizeof(coldata_base_t)
466                  + cdh->num_der * sizeof(coldata_der_t))/2;
467
468 /*      fprintf(stderr,"n   = %d\n", n); */
469         cur_collate->index2weight_tbl = __locale_collate_tbl + n + cdb->index2weight_offset;
470 /*      fprintf(stderr,"i2w = %d\n", n + cdb->index2weight_offset); */
471         n += cdh->num_index2weight;
472         cur_collate->index2ruleidx_tbl = __locale_collate_tbl + n + cdb->index2ruleidx_offset;
473 /*      fprintf(stderr,"i2r = %d\n", n + cdb->index2ruleidx_offset); */
474         n += cdh->num_index2ruleidx;
475         cur_collate->multistart_tbl = __locale_collate_tbl + n + cdd->multistart_offset;
476 /*      fprintf(stderr,"mts = %d\n", n + cdb->multistart_offset); */
477         n += cdh->num_multistart;
478         cur_collate->overrides_tbl = __locale_collate_tbl + n + cdd->overrides_offset;
479 /*      fprintf(stderr,"ovr = %d\n", n + cdd->overrides_offset); */
480         n += cdh->num_override;
481         cur_collate->ruletable = __locale_collate_tbl + n;
482 /*      fprintf(stderr, "rtb = %d\n", n); */
483         n += cdh->num_ruletable;
484         cur_collate->weightstr = __locale_collate_tbl + n;
485 /*      fprintf(stderr,"wts = %d\n", n); */
486         n += cdh->num_weightstr;
487         cur_collate->wcs2colidt_tbl = __locale_collate_tbl + n
488                 + (((unsigned long)(cdb->wcs2colidt_offset_hi)) << 16)
489                 + cdb->wcs2colidt_offset_low;
490 /*      fprintf(stderr,"wcs = %lu\n", n + (((unsigned long)(cdb->wcs2colidt_offset_hi)) << 16) */
491 /*                      + cdb->wcs2colidt_offset_low); */
492
493         cur_collate->MAX_WEIGHTS = cdh->MAX_WEIGHTS;
494
495 #ifdef __UCLIBC_MJN3_ONLY__
496 #warning CONSIDER: Fix the +1 by increasing max_col_index?
497 #warning CONSIDER: Since this collate info is dependent only on LC_COLLATE ll_cc and not on codeset, we could just globally allocate this for each in a table
498 #endif
499
500         cur_collate->index2weight = calloc(2*cur_collate->max_col_index+2,
501                                                                            sizeof(uint16_t));
502         if (!cur_collate->index2weight) {
503                 return 0;
504         }
505         cur_collate->index2ruleidx = cur_collate->index2weight
506                 + cur_collate->max_col_index + 1;
507
508         memcpy(cur_collate->index2weight, cur_collate->index2weight_tbl,
509                    cur_collate->num_col_base * sizeof(uint16_t));
510         memcpy(cur_collate->index2ruleidx, cur_collate->index2ruleidx_tbl,
511                    cur_collate->num_col_base * sizeof(uint16_t));
512
513         /* now do the overrides */
514         p = cur_collate->overrides_tbl;
515         while (*p > 1) {
516 /*              fprintf(stderr, "processing override -- count = %d\n", *p); */
517                 n = *p++;
518                 w = *p++;
519                 do {
520                         i = *p++;
521 /*                      fprintf(stderr, "       i=%d (%#x) w=%d *p=%d\n", i, i, w, *p); */
522                         cur_collate->index2weight[i-1] = w++;
523                         cur_collate->index2ruleidx[i-1] = *p++;
524                 } while (--n);
525         }
526         assert(*p == 1);
527         while (*++p) {
528                 i = *p;
529 /*              fprintf(stderr, "       i=%d (%#x) w=%d *p=%d\n", i, i, p[1], p[2]); */
530                 cur_collate->index2weight[i-1] = *++p;
531                 cur_collate->index2ruleidx[i-1] = *++p;
532         }
533
534
535         for (i=0 ; i < cur_collate->multistart_tbl[0] ; i++) {
536                 p = cur_collate->multistart_tbl;
537 /*              fprintf(stderr, "%2d of %2d: %d ", i,  cur_collate->multistart_tbl[0], p[i]); */
538                 p += p[i];
539
540                 do {
541                         n = *p++;
542                         do {
543                                 if (!*p) {              /* found it */
544 /*                                      fprintf(stderr, "found: n=%d (%#lx) |%.*ls|\n", n, (int) *cs->s, n, cs->s); */
545 /*                                      fprintf(stderr, ": %d - single\n", n); */
546                                         goto FOUND;
547                                 }
548                                 /* the lookup check here is safe since we're assured that *p is a valid colidex */
549 /*                              fprintf(stderr, "lookup(%lc)==%d  *p==%d\n", cs->s[n], lookup(cs->s[n]), (int) *p); */
550 /*                              fprintf(stderr, ": %d - ", n); */
551                                 do {
552 /*                                      fprintf(stderr, "%d|",  *p); */
553                                 } while (*p++);
554                                 break;
555                         } while (1);
556                 } while (1);
557         FOUND:
558                 continue;
559         }
560
561         return 1;
562 }
563
564 int attribute_hidden _locale_set_l(const unsigned char *p, __locale_t base)
565 {
566         const char **x;
567         unsigned char *s = base->cur_locale + 1;
568         const size_t *stp;
569         const unsigned char *r;
570         const uint16_t *io;
571         const uint16_t *ii;
572         const unsigned char *d;
573         int row;                                        /* locale row */
574         int crow;                                       /* category row */
575         int len;
576         int c;
577         int i = 0;
578         __collate_t newcol;
579
580         ++p;
581
582         newcol.index2weight = NULL;
583         if ((p[2*LC_COLLATE] != s[2*LC_COLLATE])
584                 || (p[2*LC_COLLATE + 1] != s[2*LC_COLLATE + 1])
585                 ) {
586                 row = (((int)(*p & 0x7f)) << 7) + (p[1] & 0x7f);
587                 assert(row < __LOCALE_DATA_NUM_LOCALES);
588                 if (!init_cur_collate(__locale_mmap->locales[ __LOCALE_DATA_WIDTH_LOCALES
589                                                                                                           * row + 3 + LC_COLLATE ],
590                                                           &newcol)
591                         ) {
592                         return 0;                       /* calloc failed. */
593                 }
594                 free(base->collate.index2weight);
595                 memcpy(&base->collate, &newcol, sizeof(__collate_t));
596         }
597
598         do {
599                 if ((*p != *s) || (p[1] != s[1])) {
600                         row = (((int)(*p & 0x7f)) << 7) + (p[1] & 0x7f);
601                         assert(row < __LOCALE_DATA_NUM_LOCALES);
602
603                         *s = *p;
604                         s[1] = p[1];
605
606                         if ((i != LC_COLLATE)
607                                 && ((len = __locale_mmap->lc_common_item_offsets_LEN[i]) != 0)
608                                 ) {
609                                 crow = __locale_mmap->locales[ __LOCALE_DATA_WIDTH_LOCALES * row
610                                                                                            + 3 + i ]
611                                         * len;
612
613                                 x = (const char **)(((char *) base)
614                                     + base->category_offsets[i]);
615
616                                 stp = __locale_mmap->lc_common_tbl_offsets + 4*i;
617                                 r = (const unsigned char *)( ((char *)__locale_mmap) + *stp );
618                                 io = (const uint16_t *)( ((char *)__locale_mmap) + *++stp );
619                                 ii = (const uint16_t *)( ((char *)__locale_mmap) + *++stp );
620                                 d = (const unsigned char *)( ((char *)__locale_mmap) + *++stp );
621                                 for (c = 0; c < len; c++) {
622                                         x[c] = (char*)(d + ii[r[crow + c] + io[c]]);
623                                 }
624                         }
625                         if (i == LC_CTYPE) {
626                                 c = __locale_mmap->locales[ __LOCALE_DATA_WIDTH_LOCALES * row
627                                                                                         + 2 ]; /* codeset */
628                                 if (c <= 2) {
629                                         if (c == 2) {
630                                                 base->codeset = utf8;
631                                                 base->encoding = __ctype_encoding_utf8;
632                                                 /* TODO - fix for bcc */
633                                                 base->mb_cur_max = 6;
634                                         } else {
635                                                 assert(c == 1);
636                                                 base->codeset = ascii;
637                                                 base->encoding = __ctype_encoding_7_bit;
638                                                 base->mb_cur_max = 1;
639                                         }
640                                 } else {
641                                         const __codeset_8_bit_t *c8b;
642                                         r = CODESET_LIST;
643                                         c -= 3;
644                                         base->codeset = (char *) (r + r[c]);
645                                         base->encoding = __ctype_encoding_8_bit;
646 #ifdef __UCLIBC_MJN3_ONLY__
647 #warning REMINDER: update 8 bit mb_cur_max when translit implemented!
648 #endif
649                                         /* TODO - update when translit implemented! */
650                                         base->mb_cur_max = 1;
651                                         c8b = __locale_mmap->codeset_8_bit + c;
652 #ifdef __CTYPE_HAS_8_BIT_LOCALES
653                                         base->idx8ctype = c8b->idx8ctype;
654                                         base->idx8uplow = c8b->idx8uplow;
655 #ifdef __UCLIBC_HAS_WCHAR__
656                                         base->idx8c2wc = c8b->idx8c2wc;
657                                         base->idx8wc2c = c8b->idx8wc2c;
658                                         /* translit  */
659 #endif /* __UCLIBC_HAS_WCHAR__ */
660
661                                         /* What follows is fairly bloated, but it is just a hack
662                                          * to get the 8-bit codeset ctype stuff functioning.
663                                          * All of this will be replaced in the next generation
664                                          * of locale support anyway... */
665
666                                         memcpy(base->__ctype_b_data,
667                                                    __C_ctype_b - __UCLIBC_CTYPE_B_TBL_OFFSET,
668                                                    (256 + __UCLIBC_CTYPE_B_TBL_OFFSET)
669                                                    * sizeof(__ctype_mask_t));
670                                         memcpy(base->__ctype_tolower_data,
671                                                    __C_ctype_tolower - __UCLIBC_CTYPE_TO_TBL_OFFSET,
672                                                    (256 + __UCLIBC_CTYPE_TO_TBL_OFFSET)
673                                                    * sizeof(__ctype_touplow_t));
674                                         memcpy(base->__ctype_toupper_data,
675                                                    __C_ctype_toupper - __UCLIBC_CTYPE_TO_TBL_OFFSET,
676                                                    (256 + __UCLIBC_CTYPE_TO_TBL_OFFSET)
677                                                    * sizeof(__ctype_touplow_t));
678
679 #define Cctype_TBL_MASK         ((1 << __LOCALE_DATA_Cctype_IDX_SHIFT) - 1)
680 #define Cctype_IDX_OFFSET       (128 >> __LOCALE_DATA_Cctype_IDX_SHIFT)
681
682                                         {
683                                                 int u;
684                                                 __ctype_mask_t m;
685
686                                                 for (u=0 ; u < 128 ; u++) {
687 #ifdef __LOCALE_DATA_Cctype_PACKED
688                                                         c = base->tbl8ctype
689                                                                 [ ((int)(c8b->idx8ctype
690                                                                                  [(u >> __LOCALE_DATA_Cctype_IDX_SHIFT) ])
691                                                                    << (__LOCALE_DATA_Cctype_IDX_SHIFT - 1))
692                                                                   + ((u & Cctype_TBL_MASK) >> 1)];
693                                                         c = (u & 1) ? (c >> 4) : (c & 0xf);
694 #else
695                                                         c = base->tbl8ctype
696                                                                 [ ((int)(c8b->idx8ctype
697                                                                                  [(u >> __LOCALE_DATA_Cctype_IDX_SHIFT) ])
698                                                                    << __LOCALE_DATA_Cctype_IDX_SHIFT)
699                                                                   + (u & Cctype_TBL_MASK) ];
700 #endif
701
702                                                         m = base->code2flag[c];
703
704                                                         base->__ctype_b_data
705                                                                 [128 + __UCLIBC_CTYPE_B_TBL_OFFSET + u]
706                                                                 = m;
707
708 #ifdef __UCLIBC_HAS_CTYPE_SIGNED__
709                                                         if (((signed char)(128 + u)) != -1) {
710                                                                 base->__ctype_b_data[__UCLIBC_CTYPE_B_TBL_OFFSET
711                                                                                                          + ((signed char)(128 + u))]
712                                                                         = m;
713                                                         }
714 #endif
715
716                                                         base->__ctype_tolower_data
717                                                                 [128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
718                                                                 = 128 + u;
719                                                         base->__ctype_toupper_data
720                                                                 [128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
721                                                                 = 128 + u;
722
723                                                         if (m & (_ISlower|_ISupper)) {
724                                                                 c = base->tbl8uplow
725                                                                         [ ((int)(c8b->idx8uplow
726                                                                                          [u >> __LOCALE_DATA_Cuplow_IDX_SHIFT])
727                                                                            << __LOCALE_DATA_Cuplow_IDX_SHIFT)
728                                                                           + ((128 + u)
729                                                                                  & ((1 << __LOCALE_DATA_Cuplow_IDX_SHIFT)
730                                                                                         - 1)) ];
731                                                                 if (m & _ISlower) {
732                                                                         base->__ctype_toupper_data
733                                                                                 [128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
734                                                                                 = (unsigned char)(128 + u + c);
735 #ifdef __UCLIBC_HAS_CTYPE_SIGNED__
736                                                                         if (((signed char)(128 + u)) != -1) {
737                                                                                 base->__ctype_toupper_data
738                                                                                         [__UCLIBC_CTYPE_TO_TBL_OFFSET
739                                                                                          + ((signed char)(128 + u))]
740                                                                                         = (unsigned char)(128 + u + c);
741                                                                         }
742 #endif
743                                                                 } else {
744                                                                         base->__ctype_tolower_data
745                                                                                 [128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
746                                                                                 = (unsigned char)(128 + u - c);
747 #ifdef __UCLIBC_HAS_CTYPE_SIGNED__
748                                                                         if (((signed char)(128 + u)) != -1) {
749                                                                                 base->__ctype_tolower_data
750                                                                                         [__UCLIBC_CTYPE_TO_TBL_OFFSET
751                                                                                          + ((signed char)(128 + u))]
752                                                                                         = (unsigned char)(128 + u - c);
753                                                                         }
754 #endif
755                                                                 }
756                                                         }
757                                                 }
758                                         }
759
760 #ifdef __UCLIBC_HAS_XLOCALE__
761                                         base->__ctype_b = base->__ctype_b_data
762                                                 + __UCLIBC_CTYPE_B_TBL_OFFSET;
763                                         base->__ctype_tolower = base->__ctype_tolower_data
764                                                 + __UCLIBC_CTYPE_TO_TBL_OFFSET;
765                                         base->__ctype_toupper = base->__ctype_toupper_data
766                                                 + __UCLIBC_CTYPE_TO_TBL_OFFSET;
767 #else /* __UCLIBC_HAS_XLOCALE__ */
768                                         __ctype_b = base->__ctype_b_data
769                                                 + __UCLIBC_CTYPE_B_TBL_OFFSET;
770                                         __ctype_tolower = base->__ctype_tolower_data
771                                                 + __UCLIBC_CTYPE_TO_TBL_OFFSET;
772                                         __ctype_toupper = base->__ctype_toupper_data
773                                                 + __UCLIBC_CTYPE_TO_TBL_OFFSET;
774 #endif /* __UCLIBC_HAS_XLOCALE__ */
775
776 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
777                                 }
778 #ifdef __UCLIBC_MJN3_ONLY__
779 #warning TODO: Put the outdigit string length in the locale_mmap object.
780 #endif
781                                 d = base->outdigit_length;
782                                 x = &base->outdigit0_mb;
783                                 for (c = 0 ; c < 10 ; c++) {
784                                         ((unsigned char *)d)[c] = strlen(x[c]);
785                                         assert(d[c] > 0);
786                                 }
787                         } else if (i == LC_NUMERIC) {
788                                 assert(LC_NUMERIC > LC_CTYPE); /* Need ctype initialized. */
789
790                                 base->decimal_point_len
791                                         = __locale_mbrtowc_l(&base->decimal_point_wc,
792                                                                                         base->decimal_point, base);
793                                 assert(base->decimal_point_len > 0);
794                                 assert(base->decimal_point[base->decimal_point_len] == 0);
795
796                                 if (*base->grouping) {
797                                         base->thousands_sep_len
798                                                 = __locale_mbrtowc_l(&base->thousands_sep_wc,
799                                                                                          base->thousands_sep, base);
800 #if 1
801 #ifdef __UCLIBC_MJN3_ONLY__
802 #warning TODO: Remove hack involving grouping without a thousep char (bg_BG).
803 #endif
804                                         assert(base->thousands_sep_len >= 0);
805                                         if (base->thousands_sep_len == 0) {
806                                                 base->grouping = base->thousands_sep; /* empty string */
807                                         }
808                                         assert(base->thousands_sep[base->thousands_sep_len] == 0);
809 #else
810                                         assert(base->thousands_sep_len > 0);
811                                         assert(base->thousands_sep[base->thousands_sep_len] == 0);
812 #endif
813                                 }
814
815 /*                      } else if (i == LC_COLLATE) { */
816 /*                              init_cur_collate(__locale_mmap->locales[ __LOCALE_DATA_WIDTH_LOCALES */
817 /*                                                                                                               * row + 3 + i ], */
818 /*                                                               &base->collate); */
819                         }
820                 }
821                 ++i;
822                 p += 2;
823                 s += 2;
824         } while (i < LC_ALL);
825
826         return 1;
827 }
828
829 static const uint16_t __code2flag[16] = {
830         0,                                                      /* unclassified = 0 */
831         _ISprint|_ISgraph|_ISalnum|_ISalpha, /* alpha_nonupper_nonlower */
832         _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, /* alpha_lower */
833         _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower|_ISupper, /* alpha_upper_lower */
834         _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISupper, /* alpha_upper */
835         _ISprint|_ISgraph|_ISalnum|_ISdigit, /* digit */
836         _ISprint|_ISgraph|_ISpunct,     /* punct */
837         _ISprint|_ISgraph,                      /* graph */
838         _ISprint|_ISspace,                      /* print_space_nonblank */
839         _ISprint|_ISspace|_ISblank,     /* print_space_blank */
840                  _ISspace,                      /* space_nonblank_noncntrl */
841                  _ISspace|_ISblank,     /* space_blank_noncntrl */
842         _IScntrl|_ISspace,                      /* cntrl_space_nonblank */
843         _IScntrl|_ISspace|_ISblank,     /* cntrl_space_blank */
844         _IScntrl                                        /* cntrl_nonspace */
845 };
846
847 void attribute_hidden _locale_init_l(__locale_t base)
848 {
849         memset(base->cur_locale, 0, LOCALE_SELECTOR_SIZE);
850         base->cur_locale[0] = '#';
851
852         memcpy(base->category_item_count,
853                    __locale_mmap->lc_common_item_offsets_LEN,
854                    LC_ALL);
855
856         ++base->category_item_count[0]; /* Increment for codeset entry. */
857         base->category_offsets[0] = offsetof(struct __uclibc_locale_struct, outdigit0_mb);
858         base->category_offsets[1] = offsetof(struct __uclibc_locale_struct, decimal_point);
859         base->category_offsets[2] = offsetof(struct __uclibc_locale_struct, int_curr_symbol);
860         base->category_offsets[3] = offsetof(struct __uclibc_locale_struct, abday_1);
861 /*      base->category_offsets[4] = offsetof(struct __uclibc_locale_struct, collate???); */
862         base->category_offsets[5] = offsetof(struct __uclibc_locale_struct, yesexpr);
863
864 #ifdef __CTYPE_HAS_8_BIT_LOCALES
865         base->tbl8ctype
866                 = (const unsigned char *) &__locale_mmap->tbl8ctype;
867         base->tbl8uplow
868                 = (const unsigned char *) &__locale_mmap->tbl8uplow;
869 #ifdef __UCLIBC_HAS_WCHAR__
870         base->tbl8c2wc
871                 = (const uint16_t *) &__locale_mmap->tbl8c2wc;
872         base->tbl8wc2c
873                 = (const unsigned char *) &__locale_mmap->tbl8wc2c;
874         /* translit  */
875 #endif /* __UCLIBC_HAS_WCHAR__ */
876 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
877 #ifdef __UCLIBC_HAS_WCHAR__
878         base->tblwctype
879                 = (const unsigned char *) &__locale_mmap->tblwctype;
880         base->tblwuplow
881                 = (const unsigned char *) &__locale_mmap->tblwuplow;
882         base->tblwuplow_diff
883                 = (const int16_t *) &__locale_mmap->tblwuplow_diff;
884 /*      base->tblwcomb */
885 /*              = (const unsigned char *) &__locale_mmap->tblwcomb; */
886         /* width?? */
887 #endif /* __UCLIBC_HAS_WCHAR__ */
888
889         /* Initially, set things up to use the global C ctype tables.
890          * This is correct for C (ASCII) and UTF-8 based locales (except tr_TR). */
891 #ifdef __UCLIBC_HAS_XLOCALE__
892         base->__ctype_b = __C_ctype_b;
893         base->__ctype_tolower = __C_ctype_tolower;
894         base->__ctype_toupper = __C_ctype_toupper;
895 #else /* __UCLIBC_HAS_XLOCALE__ */
896         __ctype_b = __C_ctype_b;
897         __ctype_tolower = __C_ctype_tolower;
898         __ctype_toupper = __C_ctype_toupper;
899 #endif /* __UCLIBC_HAS_XLOCALE__ */
900
901 #ifdef __UCLIBC_MJN3_ONLY__
902 #warning TODO: Initialize code2flag correctly based on locale_mmap.
903 #endif
904         base->code2flag = __code2flag;
905
906         _locale_set_l((unsigned char*) C_LOCALE_SELECTOR, base);
907 }
908
909 void _locale_init(void)
910 {
911         /* TODO: mmap the locale file  */
912
913         /* TODO - ??? */
914         _locale_init_l(__global_locale);
915 }
916
917 #endif
918 /**********************************************************************/
919 #if defined(L_nl_langinfo) || defined(L_nl_langinfo_l)
920
921 #ifdef __LOCALE_C_ONLY
922
923 /* We need to index 320 bytes of data, so you might initially think we
924  * need to store the offsets in shorts.  But since the offset of the
925  * 64th item is 182, we'll store "offset - 2*64" for all items >= 64
926  * and always calculate the data offset as "offset[i] + 2*(i & 64)".
927  * This allows us to pack the data offsets in an unsigned char while
928  * also avoiding an "if".
929  *
930  * Note: Category order is assumed to be:
931  *   ctype, numeric, monetary, time, collate, messages, all
932  */
933
934 #define C_LC_ALL 6
935
936 /* Combine the data to avoid size penalty for seperate char arrays when
937  * compiler aligns objects.  The original code is left in as documentation. */
938 #define cat_start nl_data
939 #define C_locale_data (nl_data + C_LC_ALL + 1 + 90)
940
941 static const unsigned char nl_data[C_LC_ALL + 1 + 90 + 320] = {
942 /* static const char cat_start[LC_ALL + 1] = { */
943         '\x00', '\x0b', '\x0e', '\x24', '\x56', '\x56', '\x5a',
944 /* }; */
945 /* static const char item_offset[90] = { */
946         '\x00', '\x02', '\x04', '\x06', '\x08', '\x0a', '\x0c', '\x0e',
947         '\x10', '\x12', '\x14', '\x1a', '\x1b', '\x1b', '\x1b', '\x1b',
948         '\x1b', '\x1b', '\x1b', '\x1b', '\x1b', '\x1c', '\x1c', '\x1c',
949         '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c',
950         '\x1c', '\x1c', '\x1c', '\x1e', '\x20', '\x24', '\x28', '\x2c',
951         '\x30', '\x34', '\x38', '\x3c', '\x43', '\x4a', '\x52', '\x5c',
952         '\x65', '\x6c', '\x75', '\x79', '\x7d', '\x81', '\x85', '\x89',
953         '\x8d', '\x91', '\x95', '\x99', '\x9d', '\xa1', '\xa5', '\xad',
954         '\x36', '\x3c', '\x42', '\x46', '\x4b', '\x50', '\x57', '\x61',
955         '\x69', '\x72', '\x7b', '\x7e', '\x81', '\x96', '\x9f', '\xa8',
956         '\xb3', '\xb3', '\xb3', '\xb3', '\xb3', '\xb3', '\xb4', '\xba',
957         '\xbf', '\xbf',
958 /* }; */
959 /* static const char C_locale_data[320] = { */
960            '0', '\x00',    '1', '\x00',    '2', '\x00',    '3', '\x00',
961            '4', '\x00',    '5', '\x00',    '6', '\x00',    '7', '\x00',
962            '8', '\x00',    '9', '\x00',    'A',    'S',    'C',    'I',
963            'I', '\x00',    '.', '\x00', '\x7f', '\x00',    '-', '\x00',
964            'S',    'u',    'n', '\x00',    'M',    'o',    'n', '\x00',
965            'T',    'u',    'e', '\x00',    'W',    'e',    'd', '\x00',
966            'T',    'h',    'u', '\x00',    'F',    'r',    'i', '\x00',
967            'S',    'a',    't', '\x00',    'S',    'u',    'n',    'd',
968            'a',    'y', '\x00',    'M',    'o',    'n',    'd',    'a',
969            'y', '\x00',    'T',    'u',    'e',    's',    'd',    'a',
970            'y', '\x00',    'W',    'e',    'd',    'n',    'e',    's',
971            'd',    'a',    'y', '\x00',    'T',    'h',    'u',    'r',
972            's',    'd',    'a',    'y', '\x00',    'F',    'r',    'i',
973            'd',    'a',    'y', '\x00',    'S',    'a',    't',    'u',
974            'r',    'd',    'a',    'y', '\x00',    'J',    'a',    'n',
975         '\x00',    'F',    'e',    'b', '\x00',    'M',    'a',    'r',
976         '\x00',    'A',    'p',    'r', '\x00',    'M',    'a',    'y',
977         '\x00',    'J',    'u',    'n', '\x00',    'J',    'u',    'l',
978         '\x00',    'A',    'u',    'g', '\x00',    'S',    'e',    'p',
979         '\x00',    'O',    'c',    't', '\x00',    'N',    'o',    'v',
980         '\x00',    'D',    'e',    'c', '\x00',    'J',    'a',    'n',
981            'u',    'a',    'r',    'y', '\x00',    'F',    'e',    'b',
982            'r',    'u',    'a',    'r',    'y', '\x00',    'M',    'a',
983            'r',    'c',    'h', '\x00',    'A',    'p',    'r',    'i',
984            'l', '\x00',    'M',    'a',    'y', '\x00',    'J',    'u',
985            'n',    'e', '\x00',    'J',    'u',    'l',    'y', '\x00',
986            'A',    'u',    'g',    'u',    's',    't', '\x00',    'S',
987            'e',    'p',    't',    'e',    'm',    'b',    'e',    'r',
988         '\x00',    'O',    'c',    't',    'o',    'b',    'e',    'r',
989         '\x00',    'N',    'o',    'v',    'e',    'm',    'b',    'e',
990            'r', '\x00',    'D',    'e',    'c',    'e',    'm',    'b',
991            'e',    'r', '\x00',    'A',    'M', '\x00',    'P',    'M',
992         '\x00',    '%',    'a',    ' ',    '%',    'b',    ' ',    '%',
993            'e',    ' ',    '%',    'H',    ':',    '%',    'M',    ':',
994            '%',    'S',    ' ',    '%',    'Y', '\x00',    '%',    'm',
995            '/',    '%',    'd',    '/',    '%',    'y', '\x00',    '%',
996            'H',    ':',    '%',    'M',    ':',    '%',    'S', '\x00',
997            '%',    'I',    ':',    '%',    'M',    ':',    '%',    'S',
998            ' ',    '%',    'p', '\x00',    '^',    '[',    'y',    'Y',
999            ']', '\x00',    '^',    '[',    'n',    'N',    ']', '\x00',
1000 };
1001
1002 char *nl_langinfo(nl_item item)
1003 {
1004         unsigned int c;
1005         unsigned int i;
1006
1007         if ((c = _NL_ITEM_CATEGORY(item)) < C_LC_ALL) {
1008                 if ((i = cat_start[c] + _NL_ITEM_INDEX(item)) < cat_start[c+1]) {
1009 /*                      return (char *) C_locale_data + item_offset[i] + (i & 64); */
1010                         return (char *) C_locale_data + nl_data[C_LC_ALL+1+i] + 2*(i & 64);
1011                 }
1012         }
1013         return (char *) cat_start;      /* Conveniently, this is the empty string. */
1014 }
1015 libc_hidden_def(nl_langinfo)
1016
1017 #else /* __LOCALE_C_ONLY */
1018
1019 #if defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE)
1020
1021
1022
1023 char *nl_langinfo(nl_item item)
1024 {
1025         return nl_langinfo_l(item, __UCLIBC_CURLOCALE);
1026 }
1027 libc_hidden_def(nl_langinfo)
1028
1029 #else /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */
1030
1031 libc_hidden_proto(__XL_NPP(nl_langinfo))
1032
1033 static const char empty[] = "";
1034
1035 char *__XL_NPP(nl_langinfo)(nl_item item __LOCALE_PARAM )
1036 {
1037         unsigned int c = _NL_ITEM_CATEGORY(item);
1038         unsigned int i = _NL_ITEM_INDEX(item);
1039
1040         if ((c < LC_ALL) && (i < __LOCALE_PTR->category_item_count[c])) {
1041                 return ((char **)(((char *) __LOCALE_PTR)
1042                                                   + __LOCALE_PTR->category_offsets[c]))[i];
1043         }
1044
1045         return (char *) empty;
1046 }
1047 libc_hidden_def(__XL_NPP(nl_langinfo))
1048
1049 #endif /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */
1050
1051 #endif /* __LOCALE_C_ONLY */
1052
1053 #endif
1054 /**********************************************************************/
1055 #ifdef L_newlocale
1056
1057 #warning mask defines for extra locale categories
1058
1059 #ifdef __UCLIBC_MJN3_ONLY__
1060 #warning TODO: Move posix and utf8 strings.
1061 #endif
1062 static const char posix[] = "POSIX";
1063 static const char utf8[] = "UTF-8";
1064
1065 static int find_locale(int category_mask, const char *p,
1066                                            unsigned char *new_locale)
1067 {
1068         int i;
1069         const unsigned char *s;
1070         uint16_t n;
1071         unsigned char lang_cult, codeset;
1072
1073 #if defined(__LOCALE_DATA_AT_MODIFIERS_LENGTH) && 1
1074         /* Support standard locale handling for @-modifiers. */
1075
1076 #ifdef __UCLIBC_MJN3_ONLY__
1077 #warning REMINDER: Fix buf size in find_locale.
1078 #endif
1079         char buf[18];   /* TODO: 7+{max codeset name length} */
1080         const char *q;
1081
1082         if ((q = strchr(p,'@')) != NULL) {
1083                 if ((((size_t)((q-p)-5)) > (sizeof(buf) - 5)) || (p[2] != '_')) {
1084                         return 0;
1085                 }
1086                 /* locale name at least 5 chars long and 3rd char is '_' */
1087                 s = LOCALE_AT_MODIFIERS;
1088                 do {
1089                         if (!strcmp((char*) (s + 2), q + 1)) {
1090                                 break;
1091                         }
1092                         s += 2 + *s;            /* TODO - fix this throughout */
1093                 } while (*s);
1094                 if (!*s) {
1095                         return 0;
1096                 }
1097                 assert(q - p < sizeof(buf));
1098                 memcpy(buf, p, q-p);
1099                 buf[q-p] = 0;
1100                 buf[2] = s[1];
1101                 p = buf;
1102         }
1103 #endif
1104
1105         lang_cult = codeset = 0;        /* Assume C and default codeset.  */
1106         if (((*p == 'C') && !p[1]) || !strcmp(p, posix)) {
1107                 goto FIND_LOCALE;
1108         }
1109
1110         if ((strlen(p) > 5) && (p[5] == '.')) { /* Codeset in locale name? */
1111                 /* TODO: maybe CODESET_LIST + *s ??? */
1112                 /* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
1113                 codeset = 2;
1114                 if (strcasecmp(utf8, p + 6) != 0) {/* TODO - fix! */
1115                         s = CODESET_LIST;
1116                         do {
1117                                 ++codeset;              /* Increment codeset first. */
1118                                 if (!strcmp((char*) CODESET_LIST + *s, p + 6)) {
1119                                         goto FIND_LANG_CULT;
1120                                 }
1121                         } while (*++s);
1122                         return 0;                       /* No matching codeset! */
1123                 }
1124         }
1125
1126  FIND_LANG_CULT:                                /* Find language_culture number. */
1127         s = LOCALE_NAMES;
1128         do {                                            /* TODO -- do a binary search? */
1129                 /* TODO -- fix gen_mmap!*/
1130                 ++lang_cult;                    /* Increment first since C/POSIX is 0. */
1131                 if (!strncmp((char*) s, p, 5)) { /* Found a matching locale name; */
1132                         goto FIND_LOCALE;
1133                 }
1134                 s += 5;
1135         } while (lang_cult < __LOCALE_DATA_NUM_LOCALE_NAMES);
1136         return 0;                                       /* No matching language_culture! */
1137
1138  FIND_LOCALE:                                   /* Find locale row matching name and codeset */
1139         s = LOCALES;
1140         n = 0;
1141         do {                                            /* TODO -- do a binary search? */
1142                 if ((lang_cult == *s) && ((codeset == s[1]) || (codeset == s[2]))) {
1143                         i = 1;
1144                         s = new_locale + 1;
1145                         do {
1146                                 if (category_mask & i) {
1147                                         /* Encode current locale row number. */
1148                                         ((unsigned char *) s)[0] = (n >> 7) | 0x80;
1149                                         ((unsigned char *) s)[1] = (n & 0x7f) | 0x80;
1150                                 }
1151                                 s += 2;
1152                                 i += i;
1153                         } while (i < (1 << LC_ALL));
1154
1155                         return i;                       /* Return non-zero */
1156                 }
1157                 s += __LOCALE_DATA_WIDTH_LOCALES;
1158                 ++n;
1159         } while (n <= __LOCALE_DATA_NUM_LOCALES); /* We started at 1!!! */
1160
1161         return 0;                                       /* Unsupported locale. */
1162 }
1163
1164 static unsigned char *composite_locale(int category_mask, const char *locale,
1165                                                                            unsigned char *new_locale)
1166 {
1167         char buf[MAX_LOCALE_STR];
1168         char *t;
1169         char *e;
1170         int c;
1171         int component_mask;
1172
1173         if (!strchr(locale,'=')) {
1174                 if (!find_locale(category_mask, locale, new_locale)) {
1175                         return NULL;
1176                 }
1177                 return new_locale;
1178         }
1179
1180         if (strlen(locale) >= sizeof(buf)) {
1181                 return NULL;
1182         }
1183         stpcpy(buf, locale);
1184
1185         component_mask = 0;
1186         t = strtok_r(buf, "=", &e);     /* This can't fail because of strchr test above. */
1187         do {
1188                 c = 0;
1189                 /* CATEGORY_NAMES is unsigned char* */
1190                 while (strcmp((char*) CATEGORY_NAMES + (int) CATEGORY_NAMES[c], t)) {
1191                         if (++c == LC_ALL) { /* Unknown category name! */
1192                                 return NULL;
1193                         }
1194                 }
1195                 t = strtok_r(NULL, ";", &e);
1196                 c = (1 << c);
1197                 if (component_mask & c) { /* Multiple components for one category. */
1198                         return NULL;
1199                 }
1200                 component_mask |= c;
1201                 if ((category_mask & c) && (!t || !find_locale(c, t, new_locale))) {
1202                         return NULL;
1203                 }
1204         } while ((t = strtok_r(NULL, "=", &e)) != NULL);
1205
1206         if (category_mask & ~component_mask) { /* Category component(s) missing. */
1207                 return NULL;
1208         }
1209
1210         return new_locale;
1211 }
1212
1213 __locale_t newlocale(int category_mask, const char *locale, __locale_t base)
1214 {
1215         const char *p;
1216         int i, j, k;
1217         unsigned char new_selector[LOCALE_SELECTOR_SIZE];
1218
1219         if (category_mask == (1 << LC_ALL)) {
1220                 category_mask = LC_ALL_MASK;
1221         }
1222
1223         if (!locale || ((unsigned)(category_mask) > LC_ALL_MASK)) {
1224  INVALID:
1225                 __set_errno(EINVAL);
1226                 return NULL; /* No locale or illegal/unsupported category. */
1227         }
1228
1229 #ifdef __UCLIBC_MJN3_ONLY__
1230 #warning TODO: Rename cur_locale to locale_selector.
1231 #endif
1232         strcpy((char *) new_selector,
1233                    (base ? (char *) base->cur_locale : C_LOCALE_SELECTOR));
1234
1235         if (!locale[0]) {       /* locale == "", so check environment. */
1236                 const char *envstr[4];
1237
1238                 envstr[0] = "LC_ALL";
1239                 envstr[1] = NULL;
1240                 envstr[2] = "LANG";
1241                 envstr[3] = posix;
1242
1243                 i = 1;
1244                 k = 0;
1245                 do {
1246                         if (category_mask & i) {
1247                                 /* Note: SUSv3 doesn't define a fallback mechanism here.
1248                                  * So, if LC_ALL is invalid, we do _not_ continue trying
1249                                  * the other environment vars. */
1250                                 envstr[1] = (char*) CATEGORY_NAMES + CATEGORY_NAMES[k];
1251                                 j = 0;
1252                                 while (1) {
1253                                         p = envstr[j];
1254                                         if (++j >= 4)
1255                                                 break; /* now p == "POSIX" */
1256                                         p = getenv(p);
1257                                         if (p && p[0])
1258                                                 break;
1259                                 };
1260
1261                                 /* The user set something... is it valid? */
1262                                 /* Note: Since we don't support user-supplied locales and
1263                                  * alternate paths, we don't need to worry about special
1264                                  * handling for suid/sgid apps. */
1265                                 if (!find_locale(i, p, new_selector)) {
1266                                         goto INVALID;
1267                                 }
1268                         }
1269                         i += i;
1270                 } while (++k < LC_ALL);
1271         } else if (!composite_locale(category_mask, locale, new_selector)) {
1272                 goto INVALID;
1273         }
1274
1275 #ifdef __UCLIBC_MJN3_ONLY__
1276 #warning TODO: Do a compatible codeset check!
1277 #endif
1278
1279         /* If we get here, the new selector corresponds to a valid locale. */
1280
1281 #ifdef __UCLIBC_MJN3_ONLY__
1282 #warning CONSIDER: Probably want a _locale_new func to allow for caching of locales.
1283 #endif
1284 #if 0
1285         if (base) {
1286                 _locale_set_l(new_selector, base);
1287         } else {
1288                 base = _locale_new(new_selector);
1289         }
1290 #else
1291         if (!base) {
1292                 base = calloc(1, sizeof(struct __uclibc_locale_struct));
1293                 if (base == NULL)
1294                         return base;
1295                 _locale_init_l(base);
1296         }
1297
1298         _locale_set_l(new_selector, base);
1299 #endif
1300
1301         return base;
1302 }
1303 #ifdef __UCLIBC_HAS_XLOCALE__
1304 libc_hidden_def(newlocale)
1305 #endif
1306
1307 #endif
1308 /**********************************************************************/
1309 #ifdef L_duplocale
1310
1311
1312 #ifdef __UCLIBC_MJN3_ONLY__
1313 #warning REMINDER: When we allocate ctype tables, remember to dup them.
1314 #endif
1315
1316 __locale_t duplocale(__locale_t dataset)
1317 {
1318         __locale_t r;
1319         uint16_t * i2w;
1320         size_t n;
1321
1322         assert(dataset != LC_GLOBAL_LOCALE);
1323
1324         r = malloc(sizeof(struct __uclibc_locale_struct));
1325         if (r != NULL) {
1326                 n = 2 * dataset->collate.max_col_index + 2;
1327                 i2w = calloc(n, sizeof(uint16_t));
1328                 if (i2w != NULL) {
1329                         memcpy(r, dataset, sizeof(struct __uclibc_locale_struct));
1330                         r->collate.index2weight = i2w;
1331                         memcpy(i2w, dataset->collate.index2weight, n * sizeof(uint16_t));
1332                 } else {
1333                         free(r);
1334                         r = NULL;
1335                 }
1336         }
1337         return r;
1338 }
1339
1340 #endif
1341 /**********************************************************************/
1342 #ifdef L_freelocale
1343
1344 #ifdef __UCLIBC_MJN3_ONLY__
1345 #warning REMINDER: When we allocate ctype tables, remember to free them.
1346 #endif
1347
1348 void freelocale(__locale_t dataset)
1349 {
1350         assert(dataset != __global_locale);
1351         assert(dataset != LC_GLOBAL_LOCALE);
1352
1353         free(dataset->collate.index2weight); /* Free collation data. */
1354         free(dataset);                          /* Free locale */
1355 }
1356
1357 #endif
1358 /**********************************************************************/
1359 #ifdef L_uselocale
1360
1361 __locale_t uselocale(__locale_t dataset)
1362 {
1363         __locale_t old;
1364
1365         if (!dataset) {
1366                 old = __UCLIBC_CURLOCALE;
1367         } else {
1368                 if (dataset == LC_GLOBAL_LOCALE) {
1369                         dataset = __global_locale;
1370                 }
1371 #ifdef __UCLIBC_HAS_THREADS__
1372                 old = __curlocale_set(dataset);
1373 #else
1374                 old = __curlocale_var;
1375                 __curlocale_var = dataset;
1376 #endif
1377         }
1378
1379         if (old == __global_locale) {
1380                 return LC_GLOBAL_LOCALE;
1381         }
1382         return old;
1383 }
1384 libc_hidden_def(uselocale)
1385
1386 #endif
1387 /**********************************************************************/
1388 #ifdef L___curlocale
1389
1390 #ifdef __UCLIBC_HAS_THREADS__
1391
1392 __locale_t weak_const_function __curlocale(void)
1393 {
1394         return __curlocale_var; /* This is overriden by the thread version. */
1395 }
1396 libc_hidden_weak(__curlocale)
1397
1398 __locale_t weak_function __curlocale_set(__locale_t newloc)
1399 {
1400         __locale_t oldloc = __curlocale_var;
1401         assert(newloc != LC_GLOBAL_LOCALE);
1402         __curlocale_var = newloc;
1403         return oldloc;
1404 }
1405 libc_hidden_weak(__curlocale_set)
1406
1407 #endif
1408
1409 #endif
1410 /**********************************************************************/
1411 #ifdef L___locale_mbrtowc_l
1412
1413 /* NOTE: This returns an int... not size_t.  Also, it is not a general
1414  * routine.  It is actually a very stripped-down version of mbrtowc
1415  * that takes a __locale_t arg.  This is used by strcoll and strxfrm.
1416  * It is also used above to generate wchar_t versions of the decimal point
1417  * and thousands seperator. */
1418
1419
1420 #ifndef __CTYPE_HAS_UTF_8_LOCALES
1421 #warning __CTYPE_HAS_UTF_8_LOCALES not set!
1422 #endif
1423 #ifndef __CTYPE_HAS_8_BIT_LOCALES
1424 #warning __CTYPE_HAS_8_BIT_LOCALES not set!
1425 #endif
1426
1427 #define Cc2wc_IDX_SHIFT         __LOCALE_DATA_Cc2wc_IDX_SHIFT
1428 #define Cc2wc_ROW_LEN           __LOCALE_DATA_Cc2wc_ROW_LEN
1429
1430 extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
1431                                                  const char **__restrict src, size_t n,
1432                                                  mbstate_t *ps, int allow_continuation) attribute_hidden;
1433
1434 int attribute_hidden __locale_mbrtowc_l(wchar_t *__restrict dst,
1435                                            const char *__restrict src,
1436                                            __locale_t loc )
1437 {
1438 #ifdef __CTYPE_HAS_UTF_8_LOCALES
1439         if (loc->encoding == __ctype_encoding_utf8) {
1440                 mbstate_t ps;
1441                 const char *p = src;
1442                 size_t r;
1443                 ps.__mask = 0;
1444                 r = _wchar_utf8sntowcs(dst, 1, &p, SIZE_MAX, &ps, 1);
1445                 return (r == 1) ? (p-src) : r; /* Need to return 0 if nul char. */
1446         }
1447 #endif
1448
1449 #ifdef __CTYPE_HAS_8_BIT_LOCALES
1450         assert((loc->encoding == __ctype_encoding_7_bit) || (loc->encoding == __ctype_encoding_8_bit));
1451 #else
1452         assert(loc->encoding == __ctype_encoding_7_bit);
1453 #endif
1454
1455         if ((*dst = ((unsigned char)(*src))) < 0x80) {  /* ASCII... */
1456                 return (*src != 0);
1457         }
1458
1459 #ifdef __CTYPE_HAS_8_BIT_LOCALES
1460         if (loc->encoding == __ctype_encoding_8_bit) {
1461                 wchar_t wc = *dst - 0x80;
1462                 *dst = loc->tbl8c2wc[
1463                                                 (loc->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
1464                                                  << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
1465                 if (*dst) {
1466                         return 1;
1467                 }
1468         }
1469 #endif
1470
1471         return -1;
1472 }
1473
1474 #endif
1475 /**********************************************************************/