1 /* nlsfuncs.cc: NLS helper functions
3 Copyright 2010 Red Hat, Inc.
5 This file is part of Cygwin.
7 This software is a copyrighted work licensed under the terms of the
8 Cygwin license. Please consult the file "CYGWIN_LICENSE" for
22 /* Internal headers from newlib */
23 #include "../locale/timelocal.h"
24 #include "../locale/lctype.h"
25 #include "../locale/lnumeric.h"
26 #include "../locale/lmonetary.h"
27 #include "../locale/lmessages.h"
31 #define _LC(x) &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr
33 #define getlocaleinfo(category,type) \
34 __getlocaleinfo(lcid,(type),_LC(category))
35 #define eval_datetimefmt(type,flags) \
36 __eval_datetimefmt(lcid,(type),(flags),&lc_time_ptr,\
37 lc_time_end-lc_time_ptr)
38 #define charfromwchar(category,in) \
39 __charfromwchar (_##category##_locale->in,_LC(category),\
42 #define has_modifier(x) ((x)[0] && !strcmp (modifier, (x)))
44 /* Vista and later. Not defined in w32api yet. */
46 WINBASEAPI LCID WINAPI LocaleNameToLCID (LPCWSTR, DWORD);
49 static char last_locale[ENCODING_LEN + 1];
50 static LCID last_lcid;
52 /* Fetch LCID from POSIX locale specifier.
60 __get_lcid_from_locale (const char *name)
62 char locale[ENCODING_LEN + 1];
66 /* Speed up reusing the same locale as before, for instance in LC_ALL case. */
67 if (!strcmp (name, last_locale))
69 debug_printf ("LCID=0x%04x", last_lcid);
72 stpcpy (last_locale, name);
73 stpcpy (locale, name);
74 /* Store modifier for later use. */
75 const char *modifier = strchr (last_locale, '@') ? : "";
76 /* Drop charset and modifier */
77 c = strchr (locale, '.');
79 c = strchr (locale, '@');
82 /* "POSIX" already converted to "C" in loadlocale. */
83 if (!strcmp (locale, "C"))
85 c = strchr (locale, '_');
87 return last_lcid = (LCID) -1;
88 if (wincap.has_localenames ())
90 wchar_t wlocale[ENCODING_LEN + 1];
92 /* Convert to RFC 4646 syntax which is the standard for the locale names
93 replacing LCIDs starting with Vista. */
95 mbstowcs (wlocale, locale, ENCODING_LEN + 1);
96 lcid = LocaleNameToLCID (wlocale, 0);
99 /* Unfortunately there are a couple of locales for which no form
100 without a Script part per RFC 4646 exists.
101 Linux also supports no_NO which is equivalent to nb_NO. */
105 } sc_only_locale[] = {
106 { "az-AZ" , L"az-Latn-AZ" },
107 { "bs-BA" , L"bs-Latn-BA" },
108 { "ha-NG" , L"ha-Latn-NG" },
109 { "iu-CA" , L"iu-Latn-CA" },
110 { "mn-CN" , L"mn-Mong-CN" },
111 { "no-NO" , L"nb-NO" },
112 { "sr-BA" , L"sr-Cyrl-BA" },
113 { "sr-CS" , L"sr-Cyrl-CS" },
114 { "sr-ME" , L"sr-Cyrl-ME" },
115 { "sr-RS" , L"sr-Cyrl-RS" },
116 { "tg-TJ" , L"tg-Cyrl-TJ" },
117 { "tzm-DZ", L"tzm-Latn-DZ" },
118 { "uz-UZ" , L"uz-Latn-UZ" },
121 for (int i = 0; sc_only_locale[i].loc
122 && sc_only_locale[i].loc[0] <= locale[0]; ++i)
123 if (!strcmp (locale, sc_only_locale[i].loc))
125 lcid = LocaleNameToLCID (sc_only_locale[i].wloc, 0);
126 if (!strncmp (locale, "sr-", 3))
128 /* Vista/2K8 is missing sr-ME and sr-RS. It has only the
129 deprecated sr-CS. So we map ME and RS to CS here. */
131 lcid = LocaleNameToLCID (L"sr-Cyrl-CS", 0);
132 /* "@latin" modifier for the sr_XY locales changes
133 collation behaviour so lcid should accommodate that
134 by being set to the Latin sublang. */
135 if (lcid != 0 && has_modifier ("@latin"))
136 lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) - 1);
138 else if (!strncmp (locale, "uz-", 3))
140 /* Equivalent for "@cyrillic" modifier in uz_UZ locale */
141 if (lcid != 0 && has_modifier ("@cyrillic"))
142 lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) + 1);
147 last_lcid = lcid ?: (LCID) -1;
148 debug_printf ("LCID=0x%04x", last_lcid);
151 /* Pre-Vista we have to loop through the LCID values and see if they
152 match language and TERRITORY. */
154 /* locale now points to the language, c points to the TERRITORY */
155 const char *language = locale;
156 const char *territory = c;
160 /* In theory the lang part takes 10 bits (0x3ff), but up to Windows 2003 R2
161 the highest lang value is 0x81. */
162 for (lang = 1; lang <= 0x81; ++lang)
163 if (GetLocaleInfo (lang, LOCALE_SISO639LANGNAME, iso, 10)
164 && !strcmp (language, iso))
172 /* In theory the sublang part takes 7 bits (0x3f), but up to
173 Windows 2003 R2 the highest sublang value is 0x14. */
174 for (sublang = 1; sublang <= 0x14; ++sublang)
176 lcid = (sublang << 10) | lang;
177 if (GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso, 10)
178 && !strcmp (territory, iso))
184 if (lcid == 0 && territory)
186 /* Unfortunately there are four language LCID number areas representing
187 multiple languages. Fortunately only two of them already existed
188 pre-Vista. The concealed languages have to be tested explicitly,
189 since they are not catched by the above loops.
190 This also enables the serbian ISO 3166 territory codes which have
191 been changed post 2003, and maps them to the old wrong (SP was never
192 a valid ISO 3166 code) territory code sr_SP which fortunately has the
193 same LCID as the newer sr_CS.
194 Linux also supports no_NO which is equivalent to nb_NO. */
198 } ambiguous_locale[] = {
199 { "bs_BA", MAKELANGID (LANG_BOSNIAN, 0x05) },
200 { "nn_NO", MAKELANGID (LANG_NORWEGIAN, SUBLANG_NORWEGIAN_NYNORSK) },
201 { "no_NO", MAKELANGID (LANG_NORWEGIAN, SUBLANG_NORWEGIAN_BOKMAL) },
202 { "sr_BA", MAKELANGID (LANG_BOSNIAN,
203 SUBLANG_SERBIAN_BOSNIA_HERZEGOVINA_CYRILLIC) },
204 { "sr_CS", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC) },
205 { "sr_ME", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC) },
206 { "sr_RS", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC) },
207 { "sr_SP", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC) },
211 for (int i = 0; ambiguous_locale[i].loc
212 && ambiguous_locale[i].loc[0] <= locale[0]; ++i)
213 if (!strcmp (locale, ambiguous_locale[i].loc)
214 && GetLocaleInfo (ambiguous_locale[i].lcid, LOCALE_SISO639LANGNAME,
217 lcid = ambiguous_locale[i].lcid;
218 /* "@latin" modifier for the sr_XY locales changes collation
219 behaviour so lcid should accommodate that by being set to
220 the Latin sublang. */
221 if (!strncmp (locale, "sr_", 3) && has_modifier ("@latin"))
222 lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) - 1);
226 else if (lcid == 0x0443) /* uz_UZ (Uzbek/Uzbekistan) */
228 /* Equivalent for "@cyrillic" modifier in uz_UZ locale */
229 if (lcid != 0 && has_modifier ("@cyrillic"))
230 lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) + 1);
232 last_lcid = lcid ?: (LCID) -1;
233 debug_printf ("LCID=0x%04x", last_lcid);
237 /* Never returns -1. Just skips invalid chars instead. Only if return_invalid
238 is set, s==NULL returns -1 since then it's used to recognize invalid strings
239 in the used charset. */
241 lc_wcstombs (wctomb_p f_wctomb, const char *charset,
242 char *s, const wchar_t *pwcs, size_t n,
243 bool return_invalid = false)
248 size_t i, bytes, num_to_copy;
251 memset (&state, 0, sizeof state);
254 size_t num_bytes = 0;
257 bytes = f_wctomb (_REENT, buf, *pwcs++, charset, &state);
258 if (bytes != (size_t) -1)
260 else if (return_invalid)
267 bytes = f_wctomb (_REENT, buf, *pwcs, charset, &state);
268 if (bytes == (size_t) -1)
270 memset (&state, 0, sizeof state);
274 num_to_copy = (n > bytes ? bytes : n);
275 for (i = 0; i < num_to_copy; ++i)
279 return ptr - s - (n >= bytes);
286 /* Never returns -1. Invalid sequences are translated to replacement
289 lc_mbstowcs (mbtowc_p f_mbtowc, const char *charset,
290 wchar_t *pwcs, const char *s, size_t n)
293 char *t = (char *) s;
297 memset (&state, 0, sizeof state);
302 bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */,
304 if (bytes == (size_t) -1)
325 locale_cmp (const void *a, const void *b)
327 char **la = (char **) a;
328 char **lb = (char **) b;
329 return strcmp (*la, *lb);
332 /* Helper function to workaround reallocs which move blocks even if they shrink.
333 Cygwin's realloc is not doing this, but tcsh's, for instance. All lc_foo
334 structures consist entirely of pointers so they are practically pointer
335 arrays. What we do here is just treat the lc_foo pointers as char ** and
336 rebase all char * pointers within, up to the given size of the structure. */
338 rebase_locale_buf (const void *ptrv, const char *newbase, const char *oldbase,
341 const char **ptrs = (const char **) ptrv;
342 const char **ptrsend = (const char **) ptrvend;
343 while (ptrs < ptrsend)
344 *ptrs++ += newbase - oldbase;
348 __getlocaleinfo (LCID lcid, LCTYPE type, char **ptr, size_t size)
353 if ((uintptr_t) *ptr % 1)
355 ret = (wchar_t *) *ptr;
356 num = GetLocaleInfoW (lcid, type, ret, size / sizeof (wchar_t));
357 *ptr = (char *) (ret + num);
362 __charfromwchar (const wchar_t *in, char **ptr, size_t size,
363 wctomb_p f_wctomb, const char *charset)
368 num = lc_wcstombs (f_wctomb, charset, ret = *ptr, in, size);
374 getlocaleint (LCID lcid, LCTYPE type)
377 return GetLocaleInfoW (lcid, type | LOCALE_RETURN_NUMBER, (PWCHAR) &val,
378 sizeof val) ? val : 0;
383 DT_AMPM = 0x01, /* Enforce 12 hour time format. */
384 DT_ABBREV = 0x02, /* Enforce abbreviated month and day names. */
388 __eval_datetimefmt (LCID lcid, LCTYPE type, dt_flags flags, char **ptr,
394 const wchar_t *day_str = L"edaA";
395 const wchar_t *mon_str = L"mmbB";
396 const wchar_t *year_str = L"yyyY";
397 const wchar_t *hour12_str = L"lI";
398 const wchar_t *hour24_str = L"kH";
399 const wchar_t *t_str;
401 if ((uintptr_t) *ptr % 1)
403 wchar_t *ret = (wchar_t *) *ptr;
404 wchar_t *p = (wchar_t *) *ptr;
405 GetLocaleInfoW (lcid, type, buf, 80);
406 for (wchar_t *fmt = buf; *fmt; ++fmt)
413 while (fmt[1] && *++fmt != L'\'')
419 t_str = (fc == L'd' ? day_str : fc == L'M' ? mon_str : year_str);
420 for (idx = 0; fmt[1] == fc; ++idx, ++fmt);
423 if ((flags & DT_ABBREV) && fc != L'y' && idx == 3)
433 t_str = (fc == L'h' || (flags & DT_AMPM) ? hour12_str : hour24_str);
449 *p++ = (fc == L'm' ? L'M' : fc == L's' ? L'S' : L'p');
466 /* Convert Windows grouping format into POSIX grouping format. */
468 conv_grouping (LCID lcid, LCTYPE type, char **lc_ptr)
470 char buf[10]; /* Per MSDN max size of LOCALE_SGROUPING element incl. NUL */
475 GetLocaleInfoA (lcid, type, buf, 10);
476 /* Convert Windows grouping format into POSIX grouping format. */
477 for (char *c = buf; *c; ++c)
479 if (*c < '0' || *c > '9')
496 /* Called from newlib's setlocale() via __time_load_locale() if category
497 is LC_TIME. Returns LC_TIME values fetched from Windows locale data
498 in the structure pointed to by _time_locale. This is subsequently
499 accessed by functions like nl_langinfo, strftime, strptime. */
501 __set_lc_time_from_win (const char *name,
502 const struct lc_time_T *_C_time_locale,
503 struct lc_time_T *_time_locale,
504 char **lc_time_buf, wctomb_p f_wctomb,
507 LCID lcid = __get_lcid_from_locale (name);
508 if (lcid == (LCID) -1)
510 if (!lcid && !strcmp (charset, "ASCII"))
513 # define MAX_TIME_BUFFER_SIZE 4096
515 char *new_lc_time_buf = (char *) malloc (MAX_TIME_BUFFER_SIZE);
516 const char *lc_time_end = new_lc_time_buf + MAX_TIME_BUFFER_SIZE;
518 if (!new_lc_time_buf)
520 char *lc_time_ptr = new_lc_time_buf;
522 /* C.foo is just a copy of "C" with fixed charset. */
524 memcpy (_time_locale, _C_time_locale, sizeof (struct lc_time_T));
526 _time_locale->codeset = lc_time_ptr;
527 lc_time_ptr = stpcpy (lc_time_ptr, charset) + 1;
531 char locale[ENCODING_LEN + 1];
532 strcpy (locale, name);
533 /* Removes the charset from the locale and attach the modifer to the
534 language_TERRITORY part. */
535 char *c = strchr (locale, '.');
539 char *c2 = strchr (c + 1, '@');
540 /* Ignore @cjknarrow modifier since it's a very personal thing between
541 Cygwin and newlib... */
542 if (c2 && strcmp (c2, "@cjknarrow"))
543 memmove (c, c2, strlen (c2) + 1);
545 /* Now search in the alphabetically order lc_era array for the
547 lc_era_t locale_key = { locale, NULL, NULL, NULL, NULL, NULL ,
548 NULL, NULL, NULL, NULL, NULL };
549 lc_era_t *era = (lc_era_t *) bsearch ((void *) &locale_key, (void *) lc_era,
550 sizeof lc_era / sizeof *lc_era,
551 sizeof *lc_era, locale_cmp);
554 for (int i = 0; i < 12; ++i)
556 _time_locale->wmon[i] = getlocaleinfo (time,
557 LOCALE_SABBREVMONTHNAME1 + i);
558 _time_locale->mon[i] = charfromwchar (time, wmon[i]);
560 /* month and alt_month */
561 for (int i = 0; i < 12; ++i)
563 _time_locale->wmonth[i] = getlocaleinfo (time, LOCALE_SMONTHNAME1 + i);
564 _time_locale->month[i] = _time_locale->alt_month[i]
565 = charfromwchar (time, wmonth[i]);
568 _time_locale->wwday[0] = getlocaleinfo (time, LOCALE_SABBREVDAYNAME7);
569 _time_locale->wday[0] = charfromwchar (time, wwday[0]);
570 for (int i = 0; i < 6; ++i)
572 _time_locale->wwday[i + 1] = getlocaleinfo (time,
573 LOCALE_SABBREVDAYNAME1 + i);
574 _time_locale->wday[i + 1] = charfromwchar (time, wwday[i + 1]);
577 _time_locale->wweekday[0] = getlocaleinfo (time, LOCALE_SDAYNAME7);
578 _time_locale->weekday[0] = charfromwchar (time, wweekday[0]);
579 for (int i = 0; i < 6; ++i)
581 _time_locale->wweekday[i + 1] = getlocaleinfo (time,
582 LOCALE_SDAYNAME1 + i);
583 _time_locale->weekday[i + 1] = charfromwchar (time, wweekday[i + 1]);
587 if (era && *era->t_fmt)
589 _time_locale->wX_fmt = (const wchar_t *) lc_time_ptr;
590 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wX_fmt,
594 _time_locale->wX_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
595 _time_locale->X_fmt = charfromwchar (time, wX_fmt);
597 if (era && *era->d_fmt)
599 _time_locale->wx_fmt = (const wchar_t *) lc_time_ptr;
600 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wx_fmt,
604 _time_locale->wx_fmt = eval_datetimefmt (LOCALE_SSHORTDATE, DT_DEFAULT);
605 _time_locale->x_fmt = charfromwchar (time, wx_fmt);
607 if (era && *era->d_t_fmt)
609 _time_locale->wc_fmt = (const wchar_t *) lc_time_ptr;
610 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wc_fmt,
615 _time_locale->wc_fmt = eval_datetimefmt (LOCALE_SLONGDATE, DT_ABBREV);
616 ((wchar_t *) lc_time_ptr)[-1] = L' ';
617 eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
619 _time_locale->c_fmt = charfromwchar (time, wc_fmt);
621 _time_locale->wam_pm[0] = getlocaleinfo (time, LOCALE_S1159);
622 _time_locale->wam_pm[1] = getlocaleinfo (time, LOCALE_S2359);
623 _time_locale->am_pm[0] = charfromwchar (time, wam_pm[0]);
624 _time_locale->am_pm[1] = charfromwchar (time, wam_pm[1]);
626 if (era && *era->date_fmt)
628 _time_locale->wdate_fmt = (const wchar_t *) lc_time_ptr;
629 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wdate_fmt,
633 _time_locale->wdate_fmt = _time_locale->wc_fmt;
634 _time_locale->date_fmt = charfromwchar (time, wdate_fmt);
638 GetLocaleInfoW (lcid, LOCALE_IDATE, buf, 80);
639 _time_locale->md_order = (const char *) lc_time_ptr;
640 lc_time_ptr = stpcpy (lc_time_ptr, *buf == L'1' ? "dm" : "md") + 1;
645 _time_locale->wampm_fmt = (const wchar_t *) lc_time_ptr;
646 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wampm_fmt,
647 era->t_fmt_ampm) + 1);
650 _time_locale->wampm_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_AMPM);
651 _time_locale->ampm_fmt = charfromwchar (time, wampm_fmt);
655 /* Evaluate string length in target charset. Characters invalid in the
656 target charset are simply ignored, as on Linux. */
658 len += lc_wcstombs (f_wctomb, charset, NULL, era->era, 0) + 1;
659 len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_fmt, 0) + 1;
660 len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_t_fmt, 0) + 1;
661 len += lc_wcstombs (f_wctomb, charset, NULL, era->era_t_fmt, 0) + 1;
662 len += lc_wcstombs (f_wctomb, charset, NULL, era->alt_digits, 0) + 1;
663 len += (wcslen (era->era) + 1) * sizeof (wchar_t);
664 len += (wcslen (era->era_d_fmt) + 1) * sizeof (wchar_t);
665 len += (wcslen (era->era_d_t_fmt) + 1) * sizeof (wchar_t);
666 len += (wcslen (era->era_t_fmt) + 1) * sizeof (wchar_t);
667 len += (wcslen (era->alt_digits) + 1) * sizeof (wchar_t);
669 /* Make sure data fits into the buffer */
670 if (lc_time_ptr + len > lc_time_end)
672 len = lc_time_ptr + len - new_lc_time_buf;
673 char *tmp = (char *) realloc (new_lc_time_buf, len);
678 if (tmp != new_lc_time_buf)
679 rebase_locale_buf (_time_locale, tmp, new_lc_time_buf,
681 lc_time_ptr = tmp + (lc_time_ptr - new_lc_time_buf);
682 new_lc_time_buf = tmp;
683 lc_time_end = new_lc_time_buf + len;
690 _time_locale->wera = (const wchar_t *) lc_time_ptr;
691 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera,
693 _time_locale->era = charfromwchar (time, wera);
695 _time_locale->wera_d_fmt = (const wchar_t *) lc_time_ptr;
696 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_fmt,
697 era->era_d_fmt) + 1);
698 _time_locale->era_d_fmt = charfromwchar (time, wera_d_fmt);
700 _time_locale->wera_d_t_fmt = (const wchar_t *) lc_time_ptr;
701 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_t_fmt,
702 era->era_d_t_fmt) + 1);
703 _time_locale->era_d_t_fmt = charfromwchar (time, wera_d_t_fmt);
705 _time_locale->wera_t_fmt = (const wchar_t *) lc_time_ptr;
706 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_t_fmt,
707 era->era_t_fmt) + 1);
708 _time_locale->era_t_fmt = charfromwchar (time, wera_t_fmt);
710 _time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
711 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->walt_digits,
712 era->alt_digits) + 1);
713 _time_locale->alt_digits = charfromwchar (time, walt_digits);
719 _time_locale->wera_d_fmt =
720 _time_locale->wera_d_t_fmt =
721 _time_locale->wera_t_fmt =
722 _time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
724 _time_locale->era_d_fmt =
725 _time_locale->era_d_t_fmt =
726 _time_locale->era_t_fmt =
727 _time_locale->alt_digits = (const char *) lc_time_ptr;
728 /* Twice, to make sure wide char strings are correctly terminated. */
729 *lc_time_ptr++ = '\0';
730 *lc_time_ptr++ = '\0';
734 char *tmp = (char *) realloc (new_lc_time_buf, lc_time_ptr - new_lc_time_buf);
737 free (new_lc_time_buf);
740 if (tmp != new_lc_time_buf)
741 rebase_locale_buf (_time_locale, tmp, new_lc_time_buf,
749 /* Called from newlib's setlocale() via __ctype_load_locale() if category
750 is LC_CTYPE. Returns LC_CTYPE values fetched from Windows locale data
751 in the structure pointed to by _ctype_locale. This is subsequently
752 accessed by functions like nl_langinfo, localeconv, printf, etc. */
754 __set_lc_ctype_from_win (const char *name,
755 const struct lc_ctype_T *_C_ctype_locale,
756 struct lc_ctype_T *_ctype_locale,
757 char **lc_ctype_buf, wctomb_p f_wctomb,
758 const char *charset, int mb_cur_max)
760 LCID lcid = __get_lcid_from_locale (name);
761 if (lcid == (LCID) -1)
763 if (!lcid && !strcmp (charset, "ASCII"))
766 # define MAX_CTYPE_BUFFER_SIZE 256
768 char *new_lc_ctype_buf = (char *) malloc (MAX_CTYPE_BUFFER_SIZE);
770 if (!new_lc_ctype_buf)
772 char *lc_ctype_ptr = new_lc_ctype_buf;
773 /* C.foo is just a copy of "C" with fixed charset. */
775 memcpy (_ctype_locale, _C_ctype_locale, sizeof (struct lc_ctype_T));
777 _ctype_locale->codeset = lc_ctype_ptr;
778 lc_ctype_ptr = stpcpy (lc_ctype_ptr, charset) + 1;
780 _ctype_locale->mb_cur_max = lc_ctype_ptr;
781 *lc_ctype_ptr++ = mb_cur_max;
782 *lc_ctype_ptr++ = '\0';
785 /* outdigits and woutdigits */
787 GetLocaleInfoW (lcid, LOCALE_SNATIVEDIGITS, digits, 11);
788 for (int i = 0; i <= 9; ++i)
792 /* Make sure the wchar_t's are always 2 byte aligned. */
793 if ((uintptr_t) lc_ctype_ptr % 2)
795 wchar_t *woutdig = (wchar_t *) lc_ctype_ptr;
796 _ctype_locale->woutdigits[i] = (const wchar_t *) woutdig;
797 *woutdig++ = digits[i];
799 lc_ctype_ptr = (char *) woutdig;
800 _ctype_locale->outdigits[i] = lc_ctype_ptr;
801 memset (&state, 0, sizeof state);
802 lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], charset,
804 *lc_ctype_ptr++ = '\0';
808 char *tmp = (char *) realloc (new_lc_ctype_buf,
809 lc_ctype_ptr - new_lc_ctype_buf);
812 free (new_lc_ctype_buf);
815 if (tmp != new_lc_ctype_buf)
816 rebase_locale_buf (_ctype_locale, tmp, new_lc_ctype_buf,
819 free (*lc_ctype_buf);
824 /* Called from newlib's setlocale() via __numeric_load_locale() if category
825 is LC_NUMERIC. Returns LC_NUMERIC values fetched from Windows locale data
826 in the structure pointed to by _numeric_locale. This is subsequently
827 accessed by functions like nl_langinfo, localeconv, printf, etc. */
829 __set_lc_numeric_from_win (const char *name,
830 const struct lc_numeric_T *_C_numeric_locale,
831 struct lc_numeric_T *_numeric_locale,
832 char **lc_numeric_buf, wctomb_p f_wctomb,
835 LCID lcid = __get_lcid_from_locale (name);
836 if (lcid == (LCID) -1)
838 if (!lcid && !strcmp (charset, "ASCII"))
841 # define MAX_NUMERIC_BUFFER_SIZE 256
843 char *new_lc_numeric_buf = (char *) malloc (MAX_NUMERIC_BUFFER_SIZE);
844 const char *lc_numeric_end = new_lc_numeric_buf + MAX_NUMERIC_BUFFER_SIZE;
846 if (!new_lc_numeric_buf)
848 char *lc_numeric_ptr = new_lc_numeric_buf;
849 /* C.foo is just a copy of "C" with fixed charset. */
851 memcpy (_numeric_locale, _C_numeric_locale, sizeof (struct lc_numeric_T));
855 _numeric_locale->wdecimal_point = getlocaleinfo (numeric, LOCALE_SDECIMAL);
856 _numeric_locale->decimal_point = charfromwchar (numeric, wdecimal_point);
858 _numeric_locale->wthousands_sep = getlocaleinfo (numeric, LOCALE_STHOUSAND);
859 _numeric_locale->thousands_sep = charfromwchar (numeric, wthousands_sep);
861 _numeric_locale->grouping = conv_grouping (lcid, LOCALE_SGROUPING,
865 _numeric_locale->codeset = lc_numeric_ptr;
866 lc_numeric_ptr = stpcpy (lc_numeric_ptr, charset) + 1;
868 char *tmp = (char *) realloc (new_lc_numeric_buf,
869 lc_numeric_ptr - new_lc_numeric_buf);
872 free (new_lc_numeric_buf);
875 if (tmp != new_lc_numeric_buf)
876 rebase_locale_buf (_numeric_locale, tmp, new_lc_numeric_buf,
877 _numeric_locale + 1);
879 free (*lc_numeric_buf);
880 *lc_numeric_buf = tmp;
884 /* Called from newlib's setlocale() via __monetary_load_locale() if category
885 is LC_MONETARY. Returns LC_MONETARY values fetched from Windows locale data
886 in the structure pointed to by _monetary_locale. This is subsequently
887 accessed by functions like nl_langinfo, localeconv, printf, etc. */
889 __set_lc_monetary_from_win (const char *name,
890 const struct lc_monetary_T *_C_monetary_locale,
891 struct lc_monetary_T *_monetary_locale,
892 char **lc_monetary_buf, wctomb_p f_wctomb,
895 LCID lcid = __get_lcid_from_locale (name);
896 if (lcid == (LCID) -1)
898 if (!lcid && !strcmp (charset, "ASCII"))
901 # define MAX_MONETARY_BUFFER_SIZE 512
903 char *new_lc_monetary_buf = (char *) malloc (MAX_MONETARY_BUFFER_SIZE);
904 const char *lc_monetary_end = new_lc_monetary_buf + MAX_MONETARY_BUFFER_SIZE;
906 if (!new_lc_monetary_buf)
908 char *lc_monetary_ptr = new_lc_monetary_buf;
909 /* C.foo is just a copy of "C" with fixed charset. */
911 memcpy (_monetary_locale, _C_monetary_locale, sizeof (struct lc_monetary_T));
914 /* int_curr_symbol */
915 _monetary_locale->wint_curr_symbol = getlocaleinfo (monetary,
917 /* No spacing char means space. */
918 if (!_monetary_locale->wint_curr_symbol[3])
920 wchar_t *wc = (wchar_t *) _monetary_locale->wint_curr_symbol + 3;
923 lc_monetary_ptr = (char *) wc;
925 _monetary_locale->int_curr_symbol = charfromwchar (monetary,
927 /* currency_symbol */
928 _monetary_locale->wcurrency_symbol = getlocaleinfo (monetary,
930 /* As on Linux: If the currency_symbol can't be represented in the
931 given charset, use int_curr_symbol. */
932 if (lc_wcstombs (f_wctomb, charset, NULL,
933 _monetary_locale->wcurrency_symbol,
934 0, true) == (size_t) -1)
935 _monetary_locale->currency_symbol = _monetary_locale->int_curr_symbol;
937 _monetary_locale->currency_symbol = charfromwchar (monetary,
939 /* mon_decimal_point */
940 _monetary_locale->wmon_decimal_point = getlocaleinfo (monetary,
941 LOCALE_SMONDECIMALSEP);
942 _monetary_locale->mon_decimal_point = charfromwchar (monetary,
944 /* mon_thousands_sep */
945 _monetary_locale->wmon_thousands_sep = getlocaleinfo (monetary,
946 LOCALE_SMONTHOUSANDSEP);
947 _monetary_locale->mon_thousands_sep = charfromwchar (monetary,
950 _monetary_locale->mon_grouping = conv_grouping (lcid, LOCALE_SMONGROUPING,
953 _monetary_locale->wpositive_sign = getlocaleinfo (monetary,
954 LOCALE_SPOSITIVESIGN);
955 _monetary_locale->positive_sign = charfromwchar (monetary, wpositive_sign);
957 _monetary_locale->wnegative_sign = getlocaleinfo (monetary,
958 LOCALE_SNEGATIVESIGN);
959 _monetary_locale->negative_sign = charfromwchar (monetary, wnegative_sign);
960 /* int_frac_digits */
961 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IINTLCURRDIGITS);
962 _monetary_locale->int_frac_digits = lc_monetary_ptr++;
964 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_ICURRDIGITS);
965 _monetary_locale->frac_digits = lc_monetary_ptr++;
966 /* p_cs_precedes and int_p_cs_precedes */
967 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSYMPRECEDES);
968 _monetary_locale->p_cs_precedes
969 = _monetary_locale->int_p_cs_precedes = lc_monetary_ptr++;
970 /* p_sep_by_space and int_p_sep_by_space */
971 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSEPBYSPACE);
972 _monetary_locale->p_sep_by_space
973 = _monetary_locale->int_p_sep_by_space = lc_monetary_ptr++;
974 /* n_cs_precedes and int_n_cs_precedes */
975 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSYMPRECEDES);
976 _monetary_locale->n_cs_precedes
977 = _monetary_locale->int_n_cs_precedes = lc_monetary_ptr++;
978 /* n_sep_by_space and int_n_sep_by_space */
979 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSEPBYSPACE);
980 _monetary_locale->n_sep_by_space
981 = _monetary_locale->int_n_sep_by_space = lc_monetary_ptr++;
982 /* p_sign_posn and int_p_sign_posn */
983 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSIGNPOSN);
984 _monetary_locale->p_sign_posn
985 = _monetary_locale->int_p_sign_posn = lc_monetary_ptr++;
986 /* n_sign_posn and int_n_sign_posn */
987 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSIGNPOSN);
988 _monetary_locale->n_sign_posn
989 = _monetary_locale->int_n_sign_posn = lc_monetary_ptr++;
992 _monetary_locale->codeset = lc_monetary_ptr;
993 lc_monetary_ptr = stpcpy (lc_monetary_ptr, charset) + 1;
995 char *tmp = (char *) realloc (new_lc_monetary_buf,
996 lc_monetary_ptr - new_lc_monetary_buf);
999 free (new_lc_monetary_buf);
1002 if (tmp != new_lc_monetary_buf)
1003 rebase_locale_buf (_monetary_locale, tmp, new_lc_monetary_buf,
1004 _monetary_locale + 1);
1005 if (*lc_monetary_buf)
1006 free (*lc_monetary_buf);
1007 *lc_monetary_buf = tmp;
1012 __set_lc_messages_from_win (const char *name,
1013 const struct lc_messages_T *_C_messages_locale,
1014 struct lc_messages_T *_messages_locale,
1015 char **lc_messages_buf,
1016 wctomb_p f_wctomb, const char *charset)
1018 LCID lcid = __get_lcid_from_locale (name);
1019 if (lcid == (LCID) -1)
1021 if (!lcid && !strcmp (charset, "ASCII"))
1024 char locale[ENCODING_LEN + 1];
1026 lc_msg_t *msg = NULL;
1028 /* C.foo is just a copy of "C" with fixed charset. */
1030 memcpy (_messages_locale, _C_messages_locale, sizeof (struct lc_messages_T));
1033 strcpy (locale, name);
1034 /* Removes the charset from the locale and attach the modifer to the
1035 language_TERRITORY part. */
1036 c = strchr (locale, '.');
1040 c2 = strchr (c + 1, '@');
1041 /* Ignore @cjknarrow modifier since it's a very personal thing between
1042 Cygwin and newlib... */
1043 if (c2 && strcmp (c2, "@cjknarrow"))
1044 memmove (c, c2, strlen (c2) + 1);
1046 /* Now search in the alphabetically order lc_msg array for the
1048 lc_msg_t locale_key = { locale, NULL, NULL, NULL, NULL };
1049 msg = (lc_msg_t *) bsearch ((void *) &locale_key, (void *) lc_msg,
1050 sizeof lc_msg / sizeof *lc_msg,
1051 sizeof *lc_msg, locale_cmp);
1056 /* Evaluate string length in target charset. Characters invalid in the
1057 target charset are simply ignored, as on Linux. */
1059 len += (strlen (charset) + 1);
1062 len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesexpr, 0) + 1;
1063 len += lc_wcstombs (f_wctomb, charset, NULL, msg->noexpr, 0) + 1;
1064 len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesstr, 0) + 1;
1065 len += lc_wcstombs (f_wctomb, charset, NULL, msg->nostr, 0) + 1;
1066 len += (wcslen (msg->yesexpr) + 1) * sizeof (wchar_t);
1067 len += (wcslen (msg->noexpr) + 1) * sizeof (wchar_t);
1068 len += (wcslen (msg->yesstr) + 1) * sizeof (wchar_t);
1069 len += (wcslen (msg->nostr) + 1) * sizeof (wchar_t);
1074 char *new_lc_messages_buf = (char *) malloc (len);
1075 const char *lc_messages_end = new_lc_messages_buf + len;
1077 if (!new_lc_messages_buf)
1080 c = new_lc_messages_buf;
1082 _messages_locale->codeset = c;
1083 c = stpcpy (c, charset) + 1;
1086 _messages_locale->yesexpr = (const char *) c;
1087 len = lc_wcstombs (f_wctomb, charset, c, msg->yesexpr, lc_messages_end - c);
1088 _messages_locale->noexpr = (const char *) (c += len + 1);
1089 len = lc_wcstombs (f_wctomb, charset, c, msg->noexpr, lc_messages_end - c);
1090 _messages_locale->yesstr = (const char *) (c += len + 1);
1091 len = lc_wcstombs (f_wctomb, charset, c, msg->yesstr, lc_messages_end - c);
1092 _messages_locale->nostr = (const char *) (c += len + 1);
1093 len = lc_wcstombs (f_wctomb, charset, c, msg->nostr, lc_messages_end - c);
1095 if ((uintptr_t) c % 1)
1097 wchar_t *wc = (wchar_t *) c;
1098 _messages_locale->wyesexpr = (const wchar_t *) wc;
1099 wc = wcpcpy (wc, msg->yesexpr) + 1;
1100 _messages_locale->wnoexpr = (const wchar_t *) wc;
1101 wc = wcpcpy (wc, msg->noexpr) + 1;
1102 _messages_locale->wyesstr = (const wchar_t *) wc;
1103 wc = wcpcpy (wc, msg->yesstr) + 1;
1104 _messages_locale->wnostr = (const wchar_t *) wc;
1105 wcpcpy (wc, msg->nostr);
1108 if (*lc_messages_buf)
1109 free (*lc_messages_buf);
1110 *lc_messages_buf = new_lc_messages_buf;
1114 LCID collate_lcid = 0;
1115 static mbtowc_p collate_mbtowc = __ascii_mbtowc;
1116 char collate_charset[ENCODING_LEN + 1] = "ASCII";
1118 /* Called from newlib's setlocale() if category is LC_COLLATE. Stores
1119 LC_COLLATE locale information. This is subsequently accessed by the
1120 below functions strcoll, strxfrm, wcscoll, wcsxfrm. */
1122 __collate_load_locale (const char *name, mbtowc_p f_mbtowc, const char *charset)
1124 LCID lcid = __get_lcid_from_locale (name);
1125 if (lcid == (LCID) -1)
1127 collate_lcid = lcid;
1128 collate_mbtowc = f_mbtowc;
1129 stpcpy (collate_charset, charset);
1133 extern "C" const char *
1134 __get_current_collate_codeset (void)
1136 return collate_charset;
1139 /* We use the Windows functions for locale-specific string comparison and
1140 transformation. The advantage is that we don't need any files with
1141 collation information. */
1143 wcscoll (const wchar_t *ws1, const wchar_t *ws2)
1148 return wcscmp (ws1, ws2);
1149 ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1);
1152 return ret - CSTR_EQUAL;
1156 strcoll (const char *s1, const char *s2)
1164 return strcmp (s1, s2);
1165 /* The ANSI version of CompareString uses the default charset of the lcid,
1166 so we must use the Unicode version. */
1167 n1 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s1, 0) + 1;
1168 ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t))
1170 lc_mbstowcs (collate_mbtowc, collate_charset, ws1, s1, n1);
1171 n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1;
1172 ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
1174 lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2);
1175 ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1);
1176 if (n1 > NT_MAX_PATH)
1178 if (n2 > NT_MAX_PATH)
1182 return ret - CSTR_EQUAL;
1186 wcsxfrm (wchar_t *ws1, const wchar_t *ws2, size_t wsn)
1191 return wcslcpy (ws1, ws2, wsn);
1192 ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY | LCMAP_BYTEREV,
1193 ws2, -1, ws1, wsn * sizeof (wchar_t));
1194 /* LCMapStringW returns byte count including the terminating NUL character,
1195 wcsxfrm is supposed to return length in wchar_t excluding the NUL.
1196 Since the array is only single byte NUL-terminated we must make sure
1197 the result is wchar_t-NUL terminated. */
1200 ret = (ret + 1) / sizeof (wchar_t);
1206 if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
1212 strxfrm (char *s1, const char *s2, size_t sn)
1220 return strlcpy (s1, s2, sn);
1221 /* The ANSI version of LCMapString uses the default charset of the lcid,
1222 so we must use the Unicode version. */
1223 n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1;
1224 ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
1226 lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2);
1227 /* The sort key is a NUL-terminated byte string. */
1228 ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1, (PWCHAR) s1, sn);
1229 if (n2 > NT_MAX_PATH)
1233 if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
1237 /* LCMapStringW returns byte count including the terminating NUL character.
1238 strxfrm is supposed to return length excluding the NUL. */
1242 /* Fetch default ANSI codepage from locale info and generate a setlocale
1243 compatible character set code. Called from newlib's setlocale(), if the
1244 charset isn't given explicitely in the POSIX compatible locale specifier. */
1246 __set_charset_from_locale (const char *locale, char *charset)
1249 LCID lcid = __get_lcid_from_locale (locale);
1252 /* "C" locale, or invalid locale? */
1253 if (lcid == 0 || lcid == (LCID) -1)
1255 else if (!GetLocaleInfoW (lcid,
1256 LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
1257 (PWCHAR) &cp, sizeof cp))
1259 /* Translate codepage and lcid to a charset closely aligned with the default
1260 charsets defined in Glibc. */
1262 const char *modifier = strchr (locale, '@') ?: "";
1284 if (lcid == 0x081a /* sr_CS (Serbian Language/Former
1285 Serbia and Montenegro) */
1286 || lcid == 0x181a /* sr_BA (Serbian Language/Bosnia
1288 || lcid == 0x241a /* sr_RS (Serbian Language/Serbia) */
1289 || lcid == 0x2c1a /* sr_ME (Serbian Language/Montenegro)*/
1290 || lcid == 0x0442) /* tk_TM (Turkmen/Turkmenistan) */
1292 else if (lcid == 0x041c) /* sq_AL (Albanian/Albania) */
1298 if (lcid == 0x0c1a /* sr_CS (Serbian Language/Former
1299 Serbia and Montenegro) */
1300 || lcid == 0x1c1a /* sr_BA (Serbian Language/Bosnia
1302 || lcid == 0x281a /* sr_RS (Serbian Language/Serbia) */
1303 || lcid == 0x301a /* sr_ME (Serbian Language/Montenegro)*/
1304 || lcid == 0x0440 /* ky_KG (Kyrgyz/Kyrgyzstan) */
1305 || lcid == 0x0843 /* uz_UZ (Uzbek/Uzbekistan) */
1306 /* tt_RU (Tatar/Russia),
1308 || (lcid == 0x0444 && has_modifier ("@iqtelif"))
1309 || lcid == 0x0450) /* mn_MN (Mongolian/Mongolia) */
1311 else if (lcid == 0x0423) /* be_BY (Belarusian/Belarus) */
1312 cs = has_modifier ("@latin") ? "UTF-8" : "CP1251";
1313 else if (lcid == 0x0402) /* bg_BG (Bulgarian/Bulgaria) */
1315 else if (lcid == 0x0422) /* uk_UA (Ukrainian/Ukraine) */
1321 if (lcid == 0x0452) /* cy_GB (Welsh/Great Britain) */
1323 else if (lcid == 0x4009 /* en_IN (English/India) */
1324 || lcid == 0x0464 /* fil_PH (Filipino/Philippines) */
1325 || lcid == 0x0462 /* fy_NL (Frisian/Netherlands) */
1326 || lcid == 0x0468 /* ha_NG (Hausa/Nigeria) */
1327 || lcid == 0x0470 /* ig_NG (Igbo/Nigeria) */
1328 || lcid == 0x046c /* nso_ZA (Northern Sotho/South Africa) */
1329 || lcid == 0x0487 /* rw_RW (Kinyarwanda/Rwanda) */
1330 || lcid == 0x043b /* se_NO (Northern Saami/Norway) */
1331 || lcid == 0x0432 /* tn_ZA (Tswana/South Africa) */
1332 || lcid == 0x0488 /* wo_SN (Wolof/Senegal) */
1333 || lcid == 0x046a /* yo_NG (Yoruba/Nigeria) */
1334 || lcid == 0x085d) /* iu_CA (Inuktitut/Canada) */
1336 else if (lcid == 0x042e) /* hsb_DE (Upper Sorbian/Germany) */
1338 else if (lcid == 0x0491 /* gd_GB (Scots Gaelic/Great Britain) */
1339 || (has_modifier ("@euro")
1340 && GetLocaleInfoW (lcid, LOCALE_SINTLSYMBOL, wbuf, 9)
1341 && !wcsncmp (wbuf, L"EUR", 3)))
1350 if (lcid == 0x042c) /* az_AZ (Azeri/Azerbaijan) */
1352 else if (lcid == 0x0443) /* uz_UZ (Uzbek/Uzbekistan) */
1361 if (lcid == 0x0429 /* fa_IR (Persian/Iran) */
1362 || lcid == 0x0480 /* ug_CN (Uyghur/China) */
1363 || lcid == 0x0420) /* ur_PK (Urdu/Pakistan) */
1369 if (lcid == 0x0425) /* et_EE (Estonian/Estonia) */
1376 if (lcid == 0x0481) /* mi_NZ (Maori/New Zealand) */
1378 else if (lcid == 0x043a) /* mt_MT (Maltese/Malta) */
1380 else if (lcid == 0x0437) /* ka_GE (Georgian/Georgia) */
1382 else if (lcid == 0x043f) /* kk_KZ (Kazakh/Kazakhstan) */
1388 stpcpy (charset, cs);
1391 /* This function is called from newlib's loadlocale if the locale identifier
1392 was invalid, one way or the other. It looks for the file
1394 /usr/share/locale/locale.alias
1396 which is part of the gettext package, and if it finds the locale alias
1397 in that file, it replaces the locale with the correct locale string from
1400 If successful, it returns a pointer to new_locale, NULL otherwise.*/
1402 __set_locale_from_locale_alias (const char *locale, char *new_locale)
1404 wchar_t wlocale[ENCODING_LEN + 1];
1405 wchar_t walias[ENCODING_LEN + 1];
1406 #define LOCALE_ALIAS_LINE_LEN 255
1407 char alias_buf[LOCALE_ALIAS_LINE_LEN + 1], *c;
1409 const char *alias, *replace;
1412 FILE *fp = fopen ("/usr/share/locale/locale.alias", "rt");
1415 /* The incoming locale is given in the application charset, or in
1416 the Cygwin internal charset. We try both. */
1417 if (mbstowcs (wlocale, locale, ENCODING_LEN + 1) == (size_t) -1)
1418 sys_mbstowcs (wlocale, ENCODING_LEN + 1, locale);
1419 wlocale[ENCODING_LEN] = L'\0';
1420 /* Ignore @cjknarrow modifier since it's a very personal thing between
1421 Cygwin and newlib... */
1422 if ((wc = wcschr (wlocale, L'@')) && !wcscmp (wc + 1, L"cjknarrow"))
1424 while (fgets (alias_buf, LOCALE_ALIAS_LINE_LEN + 1, fp))
1426 alias_buf[LOCALE_ALIAS_LINE_LEN] = '\0';
1427 c = strrchr (alias_buf, '\n');
1431 c += strspn (c, " \t");
1432 if (!*c || *c == '#')
1435 c += strcspn (c, " \t");
1437 c += strspn (c, " \t");
1441 c += strcspn (c, " \t");
1443 if (strlen (replace) > ENCODING_LEN)
1445 /* The file is latin1 encoded */
1446 lc_mbstowcs (__iso_mbtowc, "ISO-8859-1", walias, alias, ENCODING_LEN + 1);
1447 walias[ENCODING_LEN] = L'\0';
1448 if (!wcscmp (wlocale, walias))
1450 ret = strcpy (new_locale, replace);
1459 check_codepage (char *ret)
1461 if (!wincap.has_always_all_codepages ())
1463 /* Prior to Windows Vista, many codepages are not installed by
1464 default, or can be deinstalled. The following codepages require
1465 that the respective conversion tables are installed into the OS.
1466 So we check if they are installed and if not, setlocale should
1470 if (__mbtowc == __sjis_mbtowc)
1472 else if (__mbtowc == __eucjp_mbtowc)
1474 else if (__mbtowc == __gbk_mbtowc)
1476 else if (__mbtowc == __kr_mbtowc)
1478 else if (__mbtowc == __big5_mbtowc)
1480 if (cp && !GetCPInfo (cp, &cpi)
1481 && GetLastError () == ERROR_INVALID_PARAMETER)
1487 /* Can be called via cygwin_internal (CW_INTERNAL_SETLOCALE) for application
1488 which really (think they) know what they are doing. */
1490 internal_setlocale ()
1492 /* Each setlocale from the environment potentially changes the
1493 multibyte representation of the CWD. Therefore we have to
1494 reevaluate the CWD's posix path and store in the new charset.
1495 Same for the PATH environment variable. */
1496 /* FIXME: Other buffered paths might be affected as well. */
1497 /* FIXME: It could be necessary to convert the entire environment,
1501 wchar_t *w_path = NULL, *w_cwd;
1503 /* Don't do anything if the charset hasn't actually changed. */
1504 if (strcmp (cygheap->locale.charset, __locale_charset ()) == 0)
1507 debug_printf ("Cygwin charset changed from %s to %s",
1508 cygheap->locale.charset, __locale_charset ());
1509 /* Fetch PATH and CWD and convert to wchar_t in previous charset. */
1510 path = getenv ("PATH");
1511 if (path && *path) /* $PATH can be potentially unset. */
1513 w_path = tp.w_get ();
1514 sys_mbstowcs (w_path, 32768, path);
1516 w_cwd = tp.w_get ();
1517 cwdstuff::cwd_lock.acquire ();
1518 sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ());
1519 /* Set charset for internal conversion functions. */
1520 if (*__locale_charset () == 'A'/*SCII*/)
1522 cygheap->locale.mbtowc = __utf8_mbtowc;
1523 cygheap->locale.wctomb = __utf8_wctomb;
1527 cygheap->locale.mbtowc = __mbtowc;
1528 cygheap->locale.wctomb = __wctomb;
1530 strcpy (cygheap->locale.charset, __locale_charset ());
1531 /* Restore CWD and PATH in new charset. */
1532 cygheap->cwd.reset_posix (w_cwd);
1533 cwdstuff::cwd_lock.release ();
1536 char *c_path = tp.c_get ();
1537 sys_wcstombs (c_path, 32768, w_path);
1538 setenv ("PATH", c_path, 1);
1542 /* Called from dll_crt0_1, before fetching the command line from Windows.
1543 Set the internal charset according to the environment locale settings.
1544 Check if a required codepage is available, and only switch internal
1546 Make sure to reset the application locale to "C" per POSIX. */
1548 initial_setlocale ()
1550 char *ret = _setlocale_r (_REENT, LC_CTYPE, "");
1551 if (ret && check_codepage (ret))
1552 internal_setlocale ();
1555 /* Like newlib's setlocale, but additionally check if the charset needs
1556 OS support and the required codepage is actually installed. If codepage
1557 is not available, revert to previous locale and return NULL. For details
1558 about codepage availability, see the comment in check_codepage() above. */
1560 setlocale (int category, const char *locale)
1562 char old[(LC_MESSAGES + 1) * (ENCODING_LEN + 1/*"/"*/ + 1)];
1563 if (locale && !wincap.has_always_all_codepages ())
1564 stpcpy (old, _setlocale_r (_REENT, category, NULL));
1565 char *ret = _setlocale_r (_REENT, category, locale);
1566 if (ret && locale && !(ret = check_codepage (ret)))
1567 _setlocale_r (_REENT, category, old);