1 /* Copyright (C) 2002 Manuel Novoa III
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Library General Public
5 * License as published by the Free Software Foundation; either
6 * version 2 of the License, or (at your option) any later version.
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Library General Public License for more details.
13 * You should have received a copy of the GNU Library General Public
14 * License along with this library; if not, write to the Free
15 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 * Implement the shared mmap code so non-mmu platforms can use this.
20 * Add some basic collate functionality similar to what the previous
21 * locale support had (8-bit codesets only).
33 #ifndef __LOCALE_C_ONLY
35 #define CUR_LOCALE_SPEC (__global_locale.cur_locale)
37 #define CODESET_LIST (__locale_mmap->codeset_list)
39 /* TODO: Optional... See below. */
40 #define __LOCALE_STRICTER_SETLOCALE
42 #endif /* __LOCALE_C_ONLY */
44 /**********************************************************************/
47 #ifdef __LOCALE_C_ONLY
49 link_warning(setlocale,"the 'setlocale' function supports only C|POSIX locales")
51 static const char C_string[] = "C";
53 char *setlocale(int category, register const char *locale)
55 return ( (((unsigned int)(category)) <= LC_ALL)
56 && ( (!locale) /* Request for locale category string. */
57 || (!*locale) /* Implementation-defined default is C. */
58 || ((*locale == 'C') && !locale[1])
59 || (!strcmp(locale, "POSIX"))) )
60 ? (char *) C_string /* Always in C/POSIX locale. */
64 #else /* ---------------------------------------------- __LOCALE_C_ONLY */
66 #if !defined(NUM_LOCALES) || (NUM_LOCALES <= 1)
67 #error locales enabled, but not data other than for C locale!
70 static unsigned char setlocale_buf[LOCALE_STRING_SIZE];
73 #define LOCALE_NAMES (__locale_mmap->locale_names5)
74 #define LOCALES (__locale_mmap->locales)
75 #define LOCALE_AT_MODIFIERS (__locale_mmap->locale_at_modifiers)
76 #define CATEGORY_NAMES (__locale_mmap->lc_names)
78 static const char posix[] = "POSIX";
80 static int find_locale(int category, const char *p, unsigned char *new_locale)
83 const unsigned char *s;
85 unsigned char lang_cult, codeset;
87 #if defined(LOCALE_AT_MODIFIERS_LENGTH) && 1
88 /* Support standard locale handling for @-modifiers. */
89 char buf[18]; /* TODO: 7+{max codeset name length} */
92 if ((q = strchr(p,'@')) != NULL) {
93 if ((((size_t)((q-p)-5)) > (sizeof(buf) - 5)) || (p[2] != '_')) {
96 /* locale name at least 5 chars long and 3rd char is '_' */
97 s = LOCALE_AT_MODIFIERS;
99 if (!strcmp(s+2, q+1)) {
102 s += 2 + *s; /* TODO - fix this throughout */
114 lang_cult = codeset = 0; /* Assume C and default codeset. */
115 if (((*p == 'C') && !p[1]) || !strcmp(p, posix)) {
119 if (p[5] == '.') { /* Codeset specified in locale name? */
120 /* TODO: maybe CODESET_LIST + *s ??? */
121 /* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
123 if (strcmp("UTF-8",p+6) != 0) {/* TODO - fix! */
126 ++codeset; /* Increment codeset first. */
127 if (!strcmp(CODESET_LIST+*s, p+6)) {
131 return 0; /* No matching codeset! */
135 FIND_LANG_CULT: /* Find language_culture number. */
137 do { /* TODO -- do a binary search? */
138 /* TODO -- fix gen_mmap!*/
139 ++lang_cult; /* Increment first since C/POSIX is 0. */
140 if (!strncmp(s,p,5)) { /* Found a matching locale name; */
144 } while (lang_cult < NUM_LOCALE_NAMES);
145 return 0; /* No matching language_culture! */
147 FIND_LOCALE: /* Find locale row matching name and codeset */
150 do { /* TODO -- do a binary search? */
151 if ((lang_cult == *s) && ((codeset == s[1]) || (codeset == s[2]))) {
152 i = ((category == LC_ALL) ? 0 : category);
153 s = new_locale + 2*i;
155 /* Encode current locale row number. */
156 *((unsigned char *) ++s) = (n >> 8) | 0x80;
157 *((unsigned char *) ++s) = n & 0xff;
158 } while (++i < category);
160 return i; /* Return non-zero */
164 } while (n <= NUM_LOCALES); /* We started at 1!!! */
166 return 0; /* Unsupported locale. */
169 char *setlocale(int category, const char *locale)
171 const unsigned char *p;
175 unsigned char new_locale[LOCALE_STRING_SIZE];
177 if (((unsigned int)(category)) > LC_ALL) {
178 /* TODO - set errno? SUSv3 doesn't say too. */
179 return NULL; /* Illegal/unsupported category. */
182 lc_mask = 1 << category;
183 if (category == LC_ALL) {
187 if (!locale) { /* Request for locale category string... */
189 strcpy(setlocale_buf, CUR_LOCALE_SPEC);
190 #ifdef __LOCALE_STRICTER_SETLOCALE
191 /* The standard says you can only use the string returned to restore
192 * the category (categories) requested. This could be optional.
193 * See below as well. */
194 s = setlocale_buf + 1;
195 lc_mask |= (1 << LC_ALL);
197 if (!(lc_mask & 1)) {
198 /* Encode non-selected locale flag. */
202 } while ((lc_mask >>= 1) > 1);
203 #endif /* __LOCALE_STRICTER_SETLOCALE */
204 return (char *) setlocale_buf;
207 strcpy(new_locale, CUR_LOCALE_SPEC); /* Start with current. */
209 if (!*locale) { /* locale == "", so check environment. */
210 i = ((category == LC_ALL) ? 0 : category);
212 /* Note: SUSv3 doesn't define a fallback mechanism here. So,
213 * if LC_ALL is invalid, we do _not_ continue trying the other
214 * environment vars. */
215 if (!(p = getenv("LC_ALL"))) {
216 if (!(p = getenv(CATEGORY_NAMES + CATEGORY_NAMES[i]))) {
217 if (!(p = getenv("LANG"))) {
223 /* The user set something... is it valid? */
224 /* Note: Since we don't support user-supplied locales and
225 * alternate paths, we don't need to worry about special
226 * handling for suid/sgid apps. */
227 if (!find_locale(i, p, new_locale)) {
230 } while (++i < category);
231 } else if (*locale == '#') { /* Previsouly returned value. */
232 assert(strlen(locale) == LOCALE_STRING_SIZE - 1);
234 i = ((category == LC_ALL) ? 0 : category);
236 s = new_locale + 2*i;
238 #ifdef __LOCALE_STRICTER_SETLOCALE
239 /* Only set categories that were selected in the previous
240 * return value. Could be optional. See above as well.
241 * NOTE: This still isn't quite right for non-LC_ALL
242 * as it only checks the category selected to set. */
243 if ((*p == 0xff) && (p[1] == 0xff)) {
246 #endif /* __LOCALE_STRICTER_SETLOCALE */
247 /* Note: Validate settings below. */
250 } while (++i < category);
251 } else if (!find_locale(category, locale, new_locale)) {
256 /* TODO: Ok, everything checks out, so install the new locale. */
257 _locale_set(new_locale);
259 /* Everything ok, so make a copy in setlocale_buf and return. */
263 #endif /* __LOCALE_C_ONLY */
266 /**********************************************************************/
269 /* Note: We assume here that the compiler does the sane thing regarding
270 * placement of the fields in the struct. If necessary, we could ensure
271 * this usings an array of offsets but at some size cost. */
273 #ifdef __LOCALE_C_ONLY
275 link_warning(localeconv,"the 'localeconv' function is hardwired for C/POSIX locale only")
277 static struct lconv the_lconv;
279 static const char decpt[] = ".";
281 struct lconv *localeconv(void)
283 register char *p = (char *)(&the_lconv);
285 *((char **)p) = (char *) decpt;
287 p += sizeof(char **);
288 *((char **)p) = (char *) (decpt+1);
289 } while (p < (char *) &the_lconv.negative_sign);
291 p = (&the_lconv.int_frac_digits);
295 } while (p <= &the_lconv.int_n_sign_posn);
300 #else /* __LOCALE_C_ONLY */
302 static struct lconv the_lconv;
304 struct lconv *localeconv(void)
306 register char *p = (char *) &the_lconv;
307 register char **q = (char **) &__global_locale.decimal_point;
311 p += sizeof(char **);
313 } while (p < &the_lconv.int_frac_digits);
319 } while (p <= &the_lconv.int_n_sign_posn);
324 #endif /* __LOCALE_C_ONLY */
327 /**********************************************************************/
328 #ifdef L__locale_init
330 #ifndef __LOCALE_C_ONLY
332 #define C_LOCALE_SELECTOR "\x23\x80\x01\x80\x01\x80\x01\x80\x01\x80\x01\x80\x01"
333 #define LOCALE_INIT_FAILED "locale init failed!\n"
335 #define CUR_LOCALE_SPEC (__global_locale.cur_locale)
337 __locale_t __global_locale;
339 void _locale_init(void)
341 /* TODO: mmap the locale file */
344 memset(CUR_LOCALE_SPEC, 0, LOCALE_STRING_SIZE);
345 CUR_LOCALE_SPEC[0] = '#';
347 memcpy(__global_locale.category_item_count,
348 __locale_mmap->lc_common_item_offsets_LEN,
351 __global_locale.category_offsets[0] = offsetof(__locale_t, codeset);
352 __global_locale.category_offsets[1] = offsetof(__locale_t, decimal_point);
353 __global_locale.category_offsets[2] = offsetof(__locale_t, int_curr_symbol);
354 __global_locale.category_offsets[3] = offsetof(__locale_t, abday_1);
355 /* __global_locale.category_offsets[4] = offsetof(__locale_t, collate???); */
356 __global_locale.category_offsets[5] = offsetof(__locale_t, yesexpr);
358 #ifdef __CTYPE_HAS_8_BIT_LOCALES
359 __global_locale.tbl8ctype
360 = (const unsigned char *) &__locale_mmap->tbl8ctype;
361 __global_locale.tbl8uplow
362 = (const unsigned char *) &__locale_mmap->tbl8uplow;
363 #ifdef __WCHAR_ENABLED
364 __global_locale.tbl8c2wc
365 = (const uint16_t *) &__locale_mmap->tbl8c2wc;
366 __global_locale.tbl8wc2c
367 = (const unsigned char *) &__locale_mmap->tbl8wc2c;
369 #endif /* __WCHAR_ENABLED */
370 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
371 #ifdef __WCHAR_ENABLED
372 __global_locale.tblwctype
373 = (const unsigned char *) &__locale_mmap->tblwctype;
374 __global_locale.tblwuplow
375 = (const unsigned char *) &__locale_mmap->tblwuplow;
376 __global_locale.tblwuplow_diff
377 = (const uint16_t *) &__locale_mmap->tblwuplow_diff;
378 __global_locale.tblwcomb
379 = (const unsigned char *) &__locale_mmap->tblwcomb;
381 #endif /* __WCHAR_ENABLED */
383 _locale_set(C_LOCALE_SELECTOR);
386 static const char ascii[] = "ASCII";
387 static const char utf8[] = "UTF-8";
389 void _locale_set(const unsigned char *p)
392 unsigned char *s = CUR_LOCALE_SPEC + 1;
394 const unsigned char *r;
397 const unsigned char *d;
398 int row; /* locale row */
399 int crow; /* category row */
406 if ((*p != *s) || (p[1] != s[1])) {
407 row = (((int)(*p & 0x7f)) << 8) + p[1] - 1;
409 assert(row < NUM_LOCALES);
415 c = __locale_mmap->locales[ WIDTH_LOCALES * row + 2 ]; /* codeset */
418 __global_locale.codeset = utf8;
419 __global_locale.encoding = __ctype_encoding_utf8;
420 /* TODO - fix for bcc */
421 __global_locale.mb_cur_max = 6;
424 __global_locale.codeset = ascii;
425 __global_locale.encoding = __ctype_encoding_7_bit;
426 __global_locale.mb_cur_max = 1;
429 const codeset_8_bit_t *c8b;
431 __global_locale.codeset = r + r[c -= 3];
432 __global_locale.encoding = __ctype_encoding_8_bit;
433 #ifdef __UCLIBC_MJN3_ONLY__
434 #warning REMINDER: update 8 bit mb_cur_max when trasnlit implemented!
436 /* TODO - update when translit implemented! */
437 __global_locale.mb_cur_max = 1;
438 c8b = __locale_mmap->codeset_8_bit + c;
439 #ifdef __CTYPE_HAS_8_BIT_LOCALES
440 __global_locale.idx8ctype = c8b->idx8ctype;
441 __global_locale.idx8uplow = c8b->idx8uplow;
442 #ifdef __WCHAR_ENABLED
443 __global_locale.idx8c2wc = c8b->idx8c2wc;
444 __global_locale.idx8wc2c = c8b->idx8wc2c;
446 #endif /* __WCHAR_ENABLED */
447 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
450 } else if ((len = __locale_mmap->lc_common_item_offsets_LEN[i]) != 0) {
451 crow = __locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ]
453 x = (const char **)(((char *) &__global_locale)
454 + __global_locale.category_offsets[i]);
455 stp = __locale_mmap->lc_common_tbl_offsets + 4*i;
456 r = (const unsigned char *)( ((char *)__locale_mmap) + *stp );
457 io = (const uint16_t *)( ((char *)__locale_mmap) + *++stp );
458 ii = (const uint16_t *)( ((char *)__locale_mmap) + *++stp );
459 d = (const unsigned char *)( ((char *)__locale_mmap) + *++stp );
460 for (c=0 ; c < len ; c++) {
461 *(x + c) = d + ii[ r[crow + c] + io[c] ];
469 } while (i < LC_ALL);
472 #endif /* __LOCALE_C_ONLY */
475 /**********************************************************************/
478 #include <langinfo.h>
479 #include <nl_types.h>
481 #ifdef __LOCALE_C_ONLY
483 /* We need to index 300 bytes of data, so you might initially think we
484 * need to store the offsets in shorts. But since the offset of the
485 * 64th item is 231, we'll store "offset - 64" for all items >= 64
486 * and always calculate the data offset as "offset[i] + (i & 64)".
487 * This allows us to pack the data offsets in an unsigned char while
488 * also avoiding an "if".
490 * Note: Category order is assumed to be:
491 * ctype, numeric, monetary, time, collate, messages, all
496 /* Combine the data to avoid size penalty for seperate char arrays when
497 * compiler aligns objects. The original code is left in as documentation. */
498 #define cat_start nl_data
499 #define C_locale_data (nl_data + C_LC_ALL + 1 + 78)
501 static const unsigned char nl_data[C_LC_ALL + 1 + 78 + 300] = {
502 /* static const unsigned char cat_start[C_LC_ALL + 1] = { */
503 '\x00', '\x01', '\x04', '\x1a', '\x4c', '\x4c', '\x4e',
505 /* static const unsigned char item_offset[78] = { */
506 '\x00', '\x06', '\x07', '\x07', '\x07', '\x07', '\x07', '\x07',
507 '\x07', '\x07', '\x07', '\x08', '\x08', '\x08', '\x08', '\x08',
508 '\x08', '\x08', '\x08', '\x08', '\x08', '\x08', '\x08', '\x08',
509 '\x08', '\x0a', '\x0c', '\x10', '\x14', '\x18', '\x1c', '\x20',
510 '\x24', '\x28', '\x2f', '\x36', '\x3e', '\x48', '\x51', '\x58',
511 '\x61', '\x65', '\x69', '\x6d', '\x71', '\x75', '\x79', '\x7d',
512 '\x81', '\x85', '\x89', '\x8d', '\x91', '\x99', '\xa2', '\xa8',
513 '\xae', '\xb2', '\xb7', '\xbc', '\xc3', '\xcd', '\xd5', '\xde',
514 '\xa7', '\xaa', '\xad', '\xc2', '\xcb', '\xd4', '\xdf', '\xdf',
515 '\xdf', '\xdf', '\xdf', '\xdf', '\xe0', '\xe6',
517 /* static const unsigned char C_locale_data[300] = { */
518 'A', 'S', 'C', 'I', 'I', '\x00', '.', '\x00',
519 '\x7f', '\x00', '-', '\x00', 'S', 'u', 'n', '\x00',
520 'M', 'o', 'n', '\x00', 'T', 'u', 'e', '\x00',
521 'W', 'e', 'd', '\x00', 'T', 'h', 'u', '\x00',
522 'F', 'r', 'i', '\x00', 'S', 'a', 't', '\x00',
523 'S', 'u', 'n', 'd', 'a', 'y', '\x00', 'M',
524 'o', 'n', 'd', 'a', 'y', '\x00', 'T', 'u',
525 'e', 's', 'd', 'a', 'y', '\x00', 'W', 'e',
526 'd', 'n', 'e', 's', 'd', 'a', 'y', '\x00',
527 'T', 'h', 'u', 'r', 's', 'd', 'a', 'y',
528 '\x00', 'F', 'r', 'i', 'd', 'a', 'y', '\x00',
529 'S', 'a', 't', 'u', 'r', 'd', 'a', 'y',
530 '\x00', 'J', 'a', 'n', '\x00', 'F', 'e', 'b',
531 '\x00', 'M', 'a', 'r', '\x00', 'A', 'p', 'r',
532 '\x00', 'M', 'a', 'y', '\x00', 'J', 'u', 'n',
533 '\x00', 'J', 'u', 'l', '\x00', 'A', 'u', 'g',
534 '\x00', 'S', 'e', 'p', '\x00', 'O', 'c', 't',
535 '\x00', 'N', 'o', 'v', '\x00', 'D', 'e', 'c',
536 '\x00', 'J', 'a', 'n', 'u', 'a', 'r', 'y',
537 '\x00', 'F', 'e', 'b', 'r', 'u', 'a', 'r',
538 'y', '\x00', 'M', 'a', 'r', 'c', 'h', '\x00',
539 'A', 'p', 'r', 'i', 'l', '\x00', 'M', 'a',
540 'y', '\x00', 'J', 'u', 'n', 'e', '\x00', 'J',
541 'u', 'l', 'y', '\x00', 'A', 'u', 'g', 'u',
542 's', 't', '\x00', 'S', 'e', 'p', 't', 'e',
543 'm', 'b', 'e', 'r', '\x00', 'O', 'c', 't',
544 'o', 'b', 'e', 'r', '\x00', 'N', 'o', 'v',
545 'e', 'm', 'b', 'e', 'r', '\x00', 'D', 'e',
546 'c', 'e', 'm', 'b', 'e', 'r', '\x00', 'A',
547 'M', '\x00', 'P', 'M', '\x00', '%', 'a', ' ',
548 '%', 'b', ' ', '%', 'e', ' ', '%', 'H',
549 ':', '%', 'M', ':', '%', 'S', ' ', '%',
550 'Y', '\x00', '%', 'm', '/', '%', 'd', '/',
551 '%', 'y', '\x00', '%', 'H', ':', '%', 'M',
552 ':', '%', 'S', '\x00', '%', 'I', ':', '%',
553 'M', ':', '%', 'S', ' ', '%', 'p', '\x00',
554 '^', '[', 'y', 'Y', ']', '\x00', '^', '[',
555 'n', 'N', ']', '\x00',
558 char *nl_langinfo(nl_item item)
563 if ((c = _NL_ITEM_CATEGORY(item)) < C_LC_ALL) {
564 if ((i = cat_start[c] + _NL_ITEM_INDEX(item)) < cat_start[c+1]) {
565 /* return (char *) C_locale_data + item_offset[i] + (i & 64); */
566 return (char *) C_locale_data + nl_data[C_LC_ALL+1+i] + (i & 64);
569 return (char *) cat_start; /* Conveniently, this is the empty string. */
572 #else /* __LOCALE_C_ONLY */
574 static const char empty[] = "";
576 char *nl_langinfo(nl_item item)
578 unsigned int c = _NL_ITEM_CATEGORY(item);
579 unsigned int i = _NL_ITEM_INDEX(item);
581 if ((c < LC_ALL) && (i < __global_locale.category_item_count[c])) {
582 return ((char **)(((char *) &__global_locale)
583 + __global_locale.category_offsets[c]))[i];
586 return (char *) empty;
589 #endif /* __LOCALE_C_ONLY */
592 /**********************************************************************/