1 /* nlsfuncs.cc: NLS helper functions
3 Copyright 2010 Red Hat, Inc.
5 This file is part of Cygwin.
7 This software is a copyrighted work licensed under the terms of the
8 Cygwin license. Please consult the file "CYGWIN_LICENSE" for
22 /* Internal headers from newlib */
23 #include "../locale/timelocal.h"
24 #include "../locale/lctype.h"
25 #include "../locale/lnumeric.h"
26 #include "../locale/lmonetary.h"
27 #include "../locale/lmessages.h"
31 #define _LC(x) &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr
33 #define getlocaleinfo(category,type) \
34 __getlocaleinfo(lcid,(type),_LC(category))
35 #define eval_datetimefmt(type,flags) \
36 __eval_datetimefmt(lcid,(type),(flags),&lc_time_ptr,\
37 lc_time_end-lc_time_ptr)
38 #define charfromwchar(category,in) \
39 __charfromwchar (_##category##_locale->in,_LC(category),\
42 #define has_modifier(x) ((x)[0] && !strcmp (modifier, (x)))
44 /* Vista and later. Not defined in w32api yet. */
46 WINBASEAPI LCID WINAPI LocaleNameToLCID (LPCWSTR, DWORD);
49 static char last_locale[ENCODING_LEN + 1];
50 static LCID last_lcid;
52 /* Fetch LCID from POSIX locale specifier.
60 __get_lcid_from_locale (const char *name)
62 char locale[ENCODING_LEN + 1];
66 /* Speed up reusing the same locale as before, for instance in LC_ALL case. */
67 if (!strcmp (name, last_locale))
69 debug_printf ("LCID=0x%04x", last_lcid);
72 stpcpy (last_locale, name);
73 stpcpy (locale, name);
74 /* Store modifier for later use. */
75 const char *modifier = strchr (last_locale, '@') ? : "";
76 /* Drop charset and modifier */
77 c = strchr (locale, '.');
79 c = strchr (locale, '@');
82 /* "POSIX" already converted to "C" in loadlocale. */
83 if (!strcmp (locale, "C"))
85 c = strchr (locale, '_');
87 return last_lcid = (LCID) -1;
88 if (wincap.has_localenames ())
90 wchar_t wlocale[ENCODING_LEN + 1];
92 /* Convert to RFC 4646 syntax which is the standard for the locale names
93 replacing LCIDs starting with Vista. */
95 mbstowcs (wlocale, locale, ENCODING_LEN + 1);
96 lcid = LocaleNameToLCID (wlocale, 0);
99 /* Unfortunately there are a couple of locales for which no form
100 without a Script part per RFC 4646 exists.
101 Linux also supports no_NO which is equivalent to nb_NO. */
105 } sc_only_locale[] = {
106 { "az-AZ" , L"az-Latn-AZ" },
107 { "bs-BA" , L"bs-Latn-BA" },
108 { "ha-NG" , L"ha-Latn-NG" },
109 { "iu-CA" , L"iu-Latn-CA" },
110 { "mn-CN" , L"mn-Mong-CN" },
111 { "no-NO" , L"nb-NO" },
112 { "sr-BA" , L"sr-Cyrl-BA" },
113 { "sr-CS" , L"sr-Cyrl-CS" },
114 { "sr-ME" , L"sr-Cyrl-ME" },
115 { "sr-RS" , L"sr-Cyrl-RS" },
116 { "tg-TJ" , L"tg-Cyrl-TJ" },
117 { "tzm-DZ", L"tzm-Latn-DZ" },
118 { "uz-UZ" , L"uz-Latn-UZ" },
121 for (int i = 0; sc_only_locale[i].loc
122 && sc_only_locale[i].loc[0] <= locale[0]; ++i)
123 if (!strcmp (locale, sc_only_locale[i].loc))
125 lcid = LocaleNameToLCID (sc_only_locale[i].wloc, 0);
126 if (!strncmp (locale, "sr-", 3))
128 /* Vista/2K8 is missing sr-ME and sr-RS. It has only the
129 deprecated sr-CS. So we map ME and RS to CS here. */
131 lcid = LocaleNameToLCID (L"sr-Cyrl-CS", 0);
132 /* "@latin" modifier for the sr_XY locales changes
133 collation behaviour so lcid should accommodate that
134 by being set to the Latin sublang. */
135 if (lcid != 0 && has_modifier ("@latin"))
136 lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) - 1);
138 else if (!strncmp (locale, "uz-", 3))
140 /* Equivalent for "@cyrillic" modifier in uz_UZ locale */
141 if (lcid != 0 && has_modifier ("@cyrillic"))
142 lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) + 1);
147 last_lcid = lcid ?: (LCID) -1;
148 debug_printf ("LCID=0x%04x", last_lcid);
151 /* Pre-Vista we have to loop through the LCID values and see if they
152 match language and TERRITORY. */
154 /* locale now points to the language, c points to the TERRITORY */
155 const char *language = locale;
156 const char *territory = c;
160 /* In theory the lang part takes 10 bits (0x3ff), but up to Windows 2003 R2
161 the highest lang value is 0x81. */
162 for (lang = 1; lang <= 0x81; ++lang)
163 if (GetLocaleInfo (lang, LOCALE_SISO639LANGNAME, iso, 10)
164 && !strcmp (language, iso))
172 /* In theory the sublang part takes 7 bits (0x3f), but up to
173 Windows 2003 R2 the highest sublang value is 0x14. */
174 for (sublang = 1; sublang <= 0x14; ++sublang)
176 lcid = (sublang << 10) | lang;
177 if (GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso, 10)
178 && !strcmp (territory, iso))
184 if (lcid == 0 && territory)
186 /* Unfortunately there are four language LCID number areas representing
187 multiple languages. Fortunately only two of them already existed
188 pre-Vista. The concealed languages have to be tested explicitly,
189 since they are not catched by the above loops.
190 This also enables the serbian ISO 3166 territory codes which have
191 been changed post 2003, and maps them to the old wrong (SP was never
192 a valid ISO 3166 code) territory code sr_SP which fortunately has the
193 same LCID as the newer sr_CS.
194 Linux also supports no_NO which is equivalent to nb_NO. */
198 } ambiguous_locale[] = {
199 { "bs_BA", MAKELANGID (LANG_BOSNIAN, 0x05) },
200 { "nn_NO", MAKELANGID (LANG_NORWEGIAN, SUBLANG_NORWEGIAN_NYNORSK) },
201 { "no_NO", MAKELANGID (LANG_NORWEGIAN, SUBLANG_NORWEGIAN_BOKMAL) },
202 { "sr_BA", MAKELANGID (LANG_BOSNIAN,
203 SUBLANG_SERBIAN_BOSNIA_HERZEGOVINA_CYRILLIC) },
204 { "sr_CS", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC) },
205 { "sr_ME", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC) },
206 { "sr_RS", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC) },
207 { "sr_SP", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC) },
211 for (int i = 0; ambiguous_locale[i].loc
212 && ambiguous_locale[i].loc[0] <= locale[0]; ++i)
213 if (!strcmp (locale, ambiguous_locale[i].loc)
214 && GetLocaleInfo (ambiguous_locale[i].lcid, LOCALE_SISO639LANGNAME,
217 lcid = ambiguous_locale[i].lcid;
218 /* "@latin" modifier for the sr_XY locales changes collation
219 behaviour so lcid should accommodate that by being set to
220 the Latin sublang. */
221 if (!strncmp (locale, "sr_", 3) && has_modifier ("@latin"))
222 lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) - 1);
226 else if (lcid == 0x0443) /* uz_UZ (Uzbek/Uzbekistan) */
228 /* Equivalent for "@cyrillic" modifier in uz_UZ locale */
229 if (lcid != 0 && has_modifier ("@cyrillic"))
230 lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) + 1);
232 last_lcid = lcid ?: (LCID) -1;
233 debug_printf ("LCID=0x%04x", last_lcid);
237 /* Never returns -1. Just skips invalid chars instead. Only if return_invalid
238 is set, s==NULL returns -1 since then it's used to recognize invalid strings
239 in the used charset. */
241 lc_wcstombs (wctomb_p f_wctomb, const char *charset,
242 char *s, const wchar_t *pwcs, size_t n,
243 bool return_invalid = false)
248 size_t i, bytes, num_to_copy;
251 memset (&state, 0, sizeof state);
254 size_t num_bytes = 0;
257 bytes = f_wctomb (_REENT, buf, *pwcs++, charset, &state);
258 if (bytes != (size_t) -1)
260 else if (return_invalid)
267 bytes = f_wctomb (_REENT, buf, *pwcs, charset, &state);
268 if (bytes == (size_t) -1)
270 memset (&state, 0, sizeof state);
274 num_to_copy = (n > bytes ? bytes : n);
275 for (i = 0; i < num_to_copy; ++i)
279 return ptr - s - (n >= bytes);
286 /* Never returns -1. Invalid sequences are translated to replacement
289 lc_mbstowcs (mbtowc_p f_mbtowc, const char *charset,
290 wchar_t *pwcs, const char *s, size_t n)
293 char *t = (char *) s;
297 memset (&state, 0, sizeof state);
302 bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */,
304 if (bytes == (size_t) -1)
325 locale_cmp (const void *a, const void *b)
327 char **la = (char **) a;
328 char **lb = (char **) b;
329 return strcmp (*la, *lb);
332 /* Helper function to workaround reallocs which move blocks even if they shrink.
333 Cygwin's realloc is not doing this, but tcsh's, for instance. All lc_foo
334 structures consist entirely of pointers so they are practically pointer
335 arrays. What we do here is just treat the lc_foo pointers as char ** and
336 rebase all char * pointers within, up to the given size of the structure. */
338 rebase_locale_buf (const void *ptrv, const void *ptrvend, const char *newbase,
339 const char *oldbase, const char *oldend)
341 const char **ptrsend = (const char **) ptrvend;
342 for (const char **ptrs = (const char **) ptrv; ptrs < ptrsend; ++ptrs)
343 if (*ptrs >= oldbase && *ptrs < oldend)
344 *ptrs += newbase - oldbase;
348 __getlocaleinfo (LCID lcid, LCTYPE type, char **ptr, size_t size)
353 if ((uintptr_t) *ptr % 1)
355 ret = (wchar_t *) *ptr;
356 num = GetLocaleInfoW (lcid, type, ret, size / sizeof (wchar_t));
357 *ptr = (char *) (ret + num);
362 __charfromwchar (const wchar_t *in, char **ptr, size_t size,
363 wctomb_p f_wctomb, const char *charset)
368 num = lc_wcstombs (f_wctomb, charset, ret = *ptr, in, size);
374 getlocaleint (LCID lcid, LCTYPE type)
377 return GetLocaleInfoW (lcid, type | LOCALE_RETURN_NUMBER, (PWCHAR) &val,
378 sizeof val) ? val : 0;
383 DT_AMPM = 0x01, /* Enforce 12 hour time format. */
384 DT_ABBREV = 0x02, /* Enforce abbreviated month and day names. */
388 __eval_datetimefmt (LCID lcid, LCTYPE type, dt_flags flags, char **ptr,
394 const wchar_t *day_str = L"edaA";
395 const wchar_t *mon_str = L"mmbB";
396 const wchar_t *year_str = L"yyyY";
397 const wchar_t *hour12_str = L"lI";
398 const wchar_t *hour24_str = L"kH";
399 const wchar_t *t_str;
401 if ((uintptr_t) *ptr % 1)
403 wchar_t *ret = (wchar_t *) *ptr;
404 wchar_t *p = (wchar_t *) *ptr;
405 GetLocaleInfoW (lcid, type, buf, 80);
406 for (wchar_t *fmt = buf; *fmt; ++fmt)
413 while (fmt[1] && *++fmt != L'\'')
419 t_str = (fc == L'd' ? day_str : fc == L'M' ? mon_str : year_str);
420 for (idx = 0; fmt[1] == fc; ++idx, ++fmt);
423 if ((flags & DT_ABBREV) && fc != L'y' && idx == 3)
433 t_str = (fc == L'h' || (flags & DT_AMPM) ? hour12_str : hour24_str);
449 *p++ = (fc == L'm' ? L'M' : fc == L's' ? L'S' : L'p');
466 /* Convert Windows grouping format into POSIX grouping format. */
468 conv_grouping (LCID lcid, LCTYPE type, char **lc_ptr)
470 char buf[10]; /* Per MSDN max size of LOCALE_SGROUPING element incl. NUL */
475 GetLocaleInfoA (lcid, type, buf, 10);
476 /* Convert Windows grouping format into POSIX grouping format. */
477 for (char *c = buf; *c; ++c)
479 if (*c < '0' || *c > '9')
496 /* Called from newlib's setlocale() via __time_load_locale() if category
497 is LC_TIME. Returns LC_TIME values fetched from Windows locale data
498 in the structure pointed to by _time_locale. This is subsequently
499 accessed by functions like nl_langinfo, strftime, strptime. */
501 __set_lc_time_from_win (const char *name,
502 const struct lc_time_T *_C_time_locale,
503 struct lc_time_T *_time_locale,
504 char **lc_time_buf, wctomb_p f_wctomb,
507 LCID lcid = __get_lcid_from_locale (name);
508 if (lcid == (LCID) -1)
510 if (!lcid && !strcmp (charset, "ASCII"))
513 # define MAX_TIME_BUFFER_SIZE 4096
515 char *new_lc_time_buf = (char *) malloc (MAX_TIME_BUFFER_SIZE);
516 const char *lc_time_end = new_lc_time_buf + MAX_TIME_BUFFER_SIZE;
518 if (!new_lc_time_buf)
520 char *lc_time_ptr = new_lc_time_buf;
522 /* C.foo is just a copy of "C" with fixed charset. */
524 memcpy (_time_locale, _C_time_locale, sizeof (struct lc_time_T));
526 _time_locale->codeset = lc_time_ptr;
527 lc_time_ptr = stpcpy (lc_time_ptr, charset) + 1;
531 char locale[ENCODING_LEN + 1];
532 strcpy (locale, name);
533 /* Removes the charset from the locale and attach the modifer to the
534 language_TERRITORY part. */
535 char *c = strchr (locale, '.');
539 char *c2 = strchr (c + 1, '@');
540 /* Ignore @cjknarrow modifier since it's a very personal thing between
541 Cygwin and newlib... */
542 if (c2 && strcmp (c2, "@cjknarrow"))
543 memmove (c, c2, strlen (c2) + 1);
545 /* Now search in the alphabetically order lc_era array for the
547 lc_era_t locale_key = { locale, NULL, NULL, NULL, NULL, NULL ,
548 NULL, NULL, NULL, NULL, NULL };
549 lc_era_t *era = (lc_era_t *) bsearch ((void *) &locale_key, (void *) lc_era,
550 sizeof lc_era / sizeof *lc_era,
551 sizeof *lc_era, locale_cmp);
554 /* Windows has a bug in Japanese and Korean locales. In these
555 locales, strings returned for LOCALE_SABBREVMONTHNAME* are missing
556 the suffix representing a month. Unfortunately this is not
557 documented in English. A Japanese article describing the problem
558 is http://msdn.microsoft.com/ja-jp/library/cc422084.aspx
559 The workaround is to use LOCALE_SMONTHNAME* in these locales,
560 even for the abbreviated month name. */
561 const LCTYPE mon_base =
562 lcid == MAKELANGID (LANG_JAPANESE, SUBLANG_JAPANESE_JAPAN)
563 || lcid == MAKELANGID (LANG_KOREAN, SUBLANG_KOREAN)
564 ? LOCALE_SMONTHNAME1 : LOCALE_SABBREVMONTHNAME1;
565 for (int i = 0; i < 12; ++i)
567 _time_locale->wmon[i] = getlocaleinfo (time, mon_base + i);
568 _time_locale->mon[i] = charfromwchar (time, wmon[i]);
570 /* month and alt_month */
571 for (int i = 0; i < 12; ++i)
573 _time_locale->wmonth[i] = getlocaleinfo (time, LOCALE_SMONTHNAME1 + i);
574 _time_locale->month[i] = _time_locale->alt_month[i]
575 = charfromwchar (time, wmonth[i]);
578 _time_locale->wwday[0] = getlocaleinfo (time, LOCALE_SABBREVDAYNAME7);
579 _time_locale->wday[0] = charfromwchar (time, wwday[0]);
580 for (int i = 0; i < 6; ++i)
582 _time_locale->wwday[i + 1] = getlocaleinfo (time,
583 LOCALE_SABBREVDAYNAME1 + i);
584 _time_locale->wday[i + 1] = charfromwchar (time, wwday[i + 1]);
587 _time_locale->wweekday[0] = getlocaleinfo (time, LOCALE_SDAYNAME7);
588 _time_locale->weekday[0] = charfromwchar (time, wweekday[0]);
589 for (int i = 0; i < 6; ++i)
591 _time_locale->wweekday[i + 1] = getlocaleinfo (time,
592 LOCALE_SDAYNAME1 + i);
593 _time_locale->weekday[i + 1] = charfromwchar (time, wweekday[i + 1]);
597 if (era && *era->t_fmt)
599 _time_locale->wX_fmt = (const wchar_t *) lc_time_ptr;
600 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wX_fmt,
604 _time_locale->wX_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
605 _time_locale->X_fmt = charfromwchar (time, wX_fmt);
607 if (era && *era->d_fmt)
609 _time_locale->wx_fmt = (const wchar_t *) lc_time_ptr;
610 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wx_fmt,
614 _time_locale->wx_fmt = eval_datetimefmt (LOCALE_SSHORTDATE, DT_DEFAULT);
615 _time_locale->x_fmt = charfromwchar (time, wx_fmt);
617 if (era && *era->d_t_fmt)
619 _time_locale->wc_fmt = (const wchar_t *) lc_time_ptr;
620 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wc_fmt,
625 _time_locale->wc_fmt = eval_datetimefmt (LOCALE_SLONGDATE, DT_ABBREV);
626 ((wchar_t *) lc_time_ptr)[-1] = L' ';
627 eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
629 _time_locale->c_fmt = charfromwchar (time, wc_fmt);
631 _time_locale->wam_pm[0] = getlocaleinfo (time, LOCALE_S1159);
632 _time_locale->wam_pm[1] = getlocaleinfo (time, LOCALE_S2359);
633 _time_locale->am_pm[0] = charfromwchar (time, wam_pm[0]);
634 _time_locale->am_pm[1] = charfromwchar (time, wam_pm[1]);
636 if (era && *era->date_fmt)
638 _time_locale->wdate_fmt = (const wchar_t *) lc_time_ptr;
639 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wdate_fmt,
643 _time_locale->wdate_fmt = _time_locale->wc_fmt;
644 _time_locale->date_fmt = charfromwchar (time, wdate_fmt);
648 GetLocaleInfoW (lcid, LOCALE_IDATE, buf, 80);
649 _time_locale->md_order = (const char *) lc_time_ptr;
650 lc_time_ptr = stpcpy (lc_time_ptr, *buf == L'1' ? "dm" : "md") + 1;
655 _time_locale->wampm_fmt = (const wchar_t *) lc_time_ptr;
656 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wampm_fmt,
657 era->t_fmt_ampm) + 1);
660 _time_locale->wampm_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_AMPM);
661 _time_locale->ampm_fmt = charfromwchar (time, wampm_fmt);
665 /* Evaluate string length in target charset. Characters invalid in the
666 target charset are simply ignored, as on Linux. */
668 len += lc_wcstombs (f_wctomb, charset, NULL, era->era, 0) + 1;
669 len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_fmt, 0) + 1;
670 len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_t_fmt, 0) + 1;
671 len += lc_wcstombs (f_wctomb, charset, NULL, era->era_t_fmt, 0) + 1;
672 len += lc_wcstombs (f_wctomb, charset, NULL, era->alt_digits, 0) + 1;
673 len += (wcslen (era->era) + 1) * sizeof (wchar_t);
674 len += (wcslen (era->era_d_fmt) + 1) * sizeof (wchar_t);
675 len += (wcslen (era->era_d_t_fmt) + 1) * sizeof (wchar_t);
676 len += (wcslen (era->era_t_fmt) + 1) * sizeof (wchar_t);
677 len += (wcslen (era->alt_digits) + 1) * sizeof (wchar_t);
679 /* Make sure data fits into the buffer */
680 if (lc_time_ptr + len > lc_time_end)
682 len = lc_time_ptr + len - new_lc_time_buf;
683 char *tmp = (char *) realloc (new_lc_time_buf, len);
688 if (tmp != new_lc_time_buf)
689 rebase_locale_buf (_time_locale, _time_locale + 1, tmp,
690 new_lc_time_buf, lc_time_ptr);
691 lc_time_ptr = tmp + (lc_time_ptr - new_lc_time_buf);
692 new_lc_time_buf = tmp;
693 lc_time_end = new_lc_time_buf + len;
700 _time_locale->wera = (const wchar_t *) lc_time_ptr;
701 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera,
703 _time_locale->era = charfromwchar (time, wera);
705 _time_locale->wera_d_fmt = (const wchar_t *) lc_time_ptr;
706 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_fmt,
707 era->era_d_fmt) + 1);
708 _time_locale->era_d_fmt = charfromwchar (time, wera_d_fmt);
710 _time_locale->wera_d_t_fmt = (const wchar_t *) lc_time_ptr;
711 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_t_fmt,
712 era->era_d_t_fmt) + 1);
713 _time_locale->era_d_t_fmt = charfromwchar (time, wera_d_t_fmt);
715 _time_locale->wera_t_fmt = (const wchar_t *) lc_time_ptr;
716 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_t_fmt,
717 era->era_t_fmt) + 1);
718 _time_locale->era_t_fmt = charfromwchar (time, wera_t_fmt);
720 _time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
721 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->walt_digits,
722 era->alt_digits) + 1);
723 _time_locale->alt_digits = charfromwchar (time, walt_digits);
729 _time_locale->wera_d_fmt =
730 _time_locale->wera_d_t_fmt =
731 _time_locale->wera_t_fmt =
732 _time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
734 _time_locale->era_d_fmt =
735 _time_locale->era_d_t_fmt =
736 _time_locale->era_t_fmt =
737 _time_locale->alt_digits = (const char *) lc_time_ptr;
738 /* Twice, to make sure wide char strings are correctly terminated. */
739 *lc_time_ptr++ = '\0';
740 *lc_time_ptr++ = '\0';
744 char *tmp = (char *) realloc (new_lc_time_buf, lc_time_ptr - new_lc_time_buf);
747 free (new_lc_time_buf);
750 if (tmp != new_lc_time_buf)
751 rebase_locale_buf (_time_locale, _time_locale + 1, tmp,
752 new_lc_time_buf, lc_time_ptr);
759 /* Called from newlib's setlocale() via __ctype_load_locale() if category
760 is LC_CTYPE. Returns LC_CTYPE values fetched from Windows locale data
761 in the structure pointed to by _ctype_locale. This is subsequently
762 accessed by functions like nl_langinfo, localeconv, printf, etc. */
764 __set_lc_ctype_from_win (const char *name,
765 const struct lc_ctype_T *_C_ctype_locale,
766 struct lc_ctype_T *_ctype_locale,
767 char **lc_ctype_buf, wctomb_p f_wctomb,
768 const char *charset, int mb_cur_max)
770 LCID lcid = __get_lcid_from_locale (name);
771 if (lcid == (LCID) -1)
773 if (!lcid && !strcmp (charset, "ASCII"))
776 # define MAX_CTYPE_BUFFER_SIZE 256
778 char *new_lc_ctype_buf = (char *) malloc (MAX_CTYPE_BUFFER_SIZE);
780 if (!new_lc_ctype_buf)
782 char *lc_ctype_ptr = new_lc_ctype_buf;
783 /* C.foo is just a copy of "C" with fixed charset. */
785 memcpy (_ctype_locale, _C_ctype_locale, sizeof (struct lc_ctype_T));
787 _ctype_locale->codeset = lc_ctype_ptr;
788 lc_ctype_ptr = stpcpy (lc_ctype_ptr, charset) + 1;
790 _ctype_locale->mb_cur_max = lc_ctype_ptr;
791 *lc_ctype_ptr++ = mb_cur_max;
792 *lc_ctype_ptr++ = '\0';
795 /* outdigits and woutdigits */
797 GetLocaleInfoW (lcid, LOCALE_SNATIVEDIGITS, digits, 11);
798 for (int i = 0; i <= 9; ++i)
802 /* Make sure the wchar_t's are always 2 byte aligned. */
803 if ((uintptr_t) lc_ctype_ptr % 2)
805 wchar_t *woutdig = (wchar_t *) lc_ctype_ptr;
806 _ctype_locale->woutdigits[i] = (const wchar_t *) woutdig;
807 *woutdig++ = digits[i];
809 lc_ctype_ptr = (char *) woutdig;
810 _ctype_locale->outdigits[i] = lc_ctype_ptr;
811 memset (&state, 0, sizeof state);
812 lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], charset,
814 *lc_ctype_ptr++ = '\0';
818 char *tmp = (char *) realloc (new_lc_ctype_buf,
819 lc_ctype_ptr - new_lc_ctype_buf);
822 free (new_lc_ctype_buf);
825 if (tmp != new_lc_ctype_buf)
826 rebase_locale_buf (_ctype_locale, _ctype_locale + 1, tmp,
827 new_lc_ctype_buf, lc_ctype_ptr);
829 free (*lc_ctype_buf);
834 /* Called from newlib's setlocale() via __numeric_load_locale() if category
835 is LC_NUMERIC. Returns LC_NUMERIC values fetched from Windows locale data
836 in the structure pointed to by _numeric_locale. This is subsequently
837 accessed by functions like nl_langinfo, localeconv, printf, etc. */
839 __set_lc_numeric_from_win (const char *name,
840 const struct lc_numeric_T *_C_numeric_locale,
841 struct lc_numeric_T *_numeric_locale,
842 char **lc_numeric_buf, wctomb_p f_wctomb,
845 LCID lcid = __get_lcid_from_locale (name);
846 if (lcid == (LCID) -1)
848 if (!lcid && !strcmp (charset, "ASCII"))
851 # define MAX_NUMERIC_BUFFER_SIZE 256
853 char *new_lc_numeric_buf = (char *) malloc (MAX_NUMERIC_BUFFER_SIZE);
854 const char *lc_numeric_end = new_lc_numeric_buf + MAX_NUMERIC_BUFFER_SIZE;
856 if (!new_lc_numeric_buf)
858 char *lc_numeric_ptr = new_lc_numeric_buf;
859 /* C.foo is just a copy of "C" with fixed charset. */
861 memcpy (_numeric_locale, _C_numeric_locale, sizeof (struct lc_numeric_T));
865 _numeric_locale->wdecimal_point = getlocaleinfo (numeric, LOCALE_SDECIMAL);
866 _numeric_locale->decimal_point = charfromwchar (numeric, wdecimal_point);
868 _numeric_locale->wthousands_sep = getlocaleinfo (numeric, LOCALE_STHOUSAND);
869 _numeric_locale->thousands_sep = charfromwchar (numeric, wthousands_sep);
871 _numeric_locale->grouping = conv_grouping (lcid, LOCALE_SGROUPING,
875 _numeric_locale->codeset = lc_numeric_ptr;
876 lc_numeric_ptr = stpcpy (lc_numeric_ptr, charset) + 1;
878 char *tmp = (char *) realloc (new_lc_numeric_buf,
879 lc_numeric_ptr - new_lc_numeric_buf);
882 free (new_lc_numeric_buf);
885 if (tmp != new_lc_numeric_buf)
886 rebase_locale_buf (_numeric_locale, _numeric_locale + 1, tmp,
887 new_lc_numeric_buf, lc_numeric_ptr);
889 free (*lc_numeric_buf);
890 *lc_numeric_buf = tmp;
894 /* Called from newlib's setlocale() via __monetary_load_locale() if category
895 is LC_MONETARY. Returns LC_MONETARY values fetched from Windows locale data
896 in the structure pointed to by _monetary_locale. This is subsequently
897 accessed by functions like nl_langinfo, localeconv, printf, etc. */
899 __set_lc_monetary_from_win (const char *name,
900 const struct lc_monetary_T *_C_monetary_locale,
901 struct lc_monetary_T *_monetary_locale,
902 char **lc_monetary_buf, wctomb_p f_wctomb,
905 LCID lcid = __get_lcid_from_locale (name);
906 if (lcid == (LCID) -1)
908 if (!lcid && !strcmp (charset, "ASCII"))
911 # define MAX_MONETARY_BUFFER_SIZE 512
913 char *new_lc_monetary_buf = (char *) malloc (MAX_MONETARY_BUFFER_SIZE);
914 const char *lc_monetary_end = new_lc_monetary_buf + MAX_MONETARY_BUFFER_SIZE;
916 if (!new_lc_monetary_buf)
918 char *lc_monetary_ptr = new_lc_monetary_buf;
919 /* C.foo is just a copy of "C" with fixed charset. */
921 memcpy (_monetary_locale, _C_monetary_locale, sizeof (struct lc_monetary_T));
924 /* int_curr_symbol */
925 _monetary_locale->wint_curr_symbol = getlocaleinfo (monetary,
927 /* No spacing char means space. */
928 if (!_monetary_locale->wint_curr_symbol[3])
930 wchar_t *wc = (wchar_t *) _monetary_locale->wint_curr_symbol + 3;
933 lc_monetary_ptr = (char *) wc;
935 _monetary_locale->int_curr_symbol = charfromwchar (monetary,
937 /* currency_symbol */
938 _monetary_locale->wcurrency_symbol = getlocaleinfo (monetary,
940 /* As on Linux: If the currency_symbol can't be represented in the
941 given charset, use int_curr_symbol. */
942 if (lc_wcstombs (f_wctomb, charset, NULL,
943 _monetary_locale->wcurrency_symbol,
944 0, true) == (size_t) -1)
945 _monetary_locale->currency_symbol = _monetary_locale->int_curr_symbol;
947 _monetary_locale->currency_symbol = charfromwchar (monetary,
949 /* mon_decimal_point */
950 _monetary_locale->wmon_decimal_point = getlocaleinfo (monetary,
951 LOCALE_SMONDECIMALSEP);
952 _monetary_locale->mon_decimal_point = charfromwchar (monetary,
954 /* mon_thousands_sep */
955 _monetary_locale->wmon_thousands_sep = getlocaleinfo (monetary,
956 LOCALE_SMONTHOUSANDSEP);
957 _monetary_locale->mon_thousands_sep = charfromwchar (monetary,
960 _monetary_locale->mon_grouping = conv_grouping (lcid, LOCALE_SMONGROUPING,
963 _monetary_locale->wpositive_sign = getlocaleinfo (monetary,
964 LOCALE_SPOSITIVESIGN);
965 _monetary_locale->positive_sign = charfromwchar (monetary, wpositive_sign);
967 _monetary_locale->wnegative_sign = getlocaleinfo (monetary,
968 LOCALE_SNEGATIVESIGN);
969 _monetary_locale->negative_sign = charfromwchar (monetary, wnegative_sign);
970 /* int_frac_digits */
971 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IINTLCURRDIGITS);
972 _monetary_locale->int_frac_digits = lc_monetary_ptr++;
974 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_ICURRDIGITS);
975 _monetary_locale->frac_digits = lc_monetary_ptr++;
976 /* p_cs_precedes and int_p_cs_precedes */
977 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSYMPRECEDES);
978 _monetary_locale->p_cs_precedes
979 = _monetary_locale->int_p_cs_precedes = lc_monetary_ptr++;
980 /* p_sep_by_space and int_p_sep_by_space */
981 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSEPBYSPACE);
982 _monetary_locale->p_sep_by_space
983 = _monetary_locale->int_p_sep_by_space = lc_monetary_ptr++;
984 /* n_cs_precedes and int_n_cs_precedes */
985 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSYMPRECEDES);
986 _monetary_locale->n_cs_precedes
987 = _monetary_locale->int_n_cs_precedes = lc_monetary_ptr++;
988 /* n_sep_by_space and int_n_sep_by_space */
989 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSEPBYSPACE);
990 _monetary_locale->n_sep_by_space
991 = _monetary_locale->int_n_sep_by_space = lc_monetary_ptr++;
992 /* p_sign_posn and int_p_sign_posn */
993 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSIGNPOSN);
994 _monetary_locale->p_sign_posn
995 = _monetary_locale->int_p_sign_posn = lc_monetary_ptr++;
996 /* n_sign_posn and int_n_sign_posn */
997 *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSIGNPOSN);
998 _monetary_locale->n_sign_posn
999 = _monetary_locale->int_n_sign_posn = lc_monetary_ptr++;
1002 _monetary_locale->codeset = lc_monetary_ptr;
1003 lc_monetary_ptr = stpcpy (lc_monetary_ptr, charset) + 1;
1005 char *tmp = (char *) realloc (new_lc_monetary_buf,
1006 lc_monetary_ptr - new_lc_monetary_buf);
1009 free (new_lc_monetary_buf);
1012 if (tmp != new_lc_monetary_buf)
1013 rebase_locale_buf (_monetary_locale, _monetary_locale + 1, tmp,
1014 new_lc_monetary_buf, lc_monetary_ptr);
1015 if (*lc_monetary_buf)
1016 free (*lc_monetary_buf);
1017 *lc_monetary_buf = tmp;
1022 __set_lc_messages_from_win (const char *name,
1023 const struct lc_messages_T *_C_messages_locale,
1024 struct lc_messages_T *_messages_locale,
1025 char **lc_messages_buf,
1026 wctomb_p f_wctomb, const char *charset)
1028 LCID lcid = __get_lcid_from_locale (name);
1029 if (lcid == (LCID) -1)
1031 if (!lcid && !strcmp (charset, "ASCII"))
1034 char locale[ENCODING_LEN + 1];
1036 lc_msg_t *msg = NULL;
1038 /* C.foo is just a copy of "C" with fixed charset. */
1040 memcpy (_messages_locale, _C_messages_locale, sizeof (struct lc_messages_T));
1043 strcpy (locale, name);
1044 /* Removes the charset from the locale and attach the modifer to the
1045 language_TERRITORY part. */
1046 c = strchr (locale, '.');
1050 c2 = strchr (c + 1, '@');
1051 /* Ignore @cjknarrow modifier since it's a very personal thing between
1052 Cygwin and newlib... */
1053 if (c2 && strcmp (c2, "@cjknarrow"))
1054 memmove (c, c2, strlen (c2) + 1);
1056 /* Now search in the alphabetically order lc_msg array for the
1058 lc_msg_t locale_key = { locale, NULL, NULL, NULL, NULL };
1059 msg = (lc_msg_t *) bsearch ((void *) &locale_key, (void *) lc_msg,
1060 sizeof lc_msg / sizeof *lc_msg,
1061 sizeof *lc_msg, locale_cmp);
1066 /* Evaluate string length in target charset. Characters invalid in the
1067 target charset are simply ignored, as on Linux. */
1069 len += (strlen (charset) + 1);
1072 len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesexpr, 0) + 1;
1073 len += lc_wcstombs (f_wctomb, charset, NULL, msg->noexpr, 0) + 1;
1074 len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesstr, 0) + 1;
1075 len += lc_wcstombs (f_wctomb, charset, NULL, msg->nostr, 0) + 1;
1076 len += (wcslen (msg->yesexpr) + 1) * sizeof (wchar_t);
1077 len += (wcslen (msg->noexpr) + 1) * sizeof (wchar_t);
1078 len += (wcslen (msg->yesstr) + 1) * sizeof (wchar_t);
1079 len += (wcslen (msg->nostr) + 1) * sizeof (wchar_t);
1084 char *new_lc_messages_buf = (char *) malloc (len);
1085 const char *lc_messages_end = new_lc_messages_buf + len;
1087 if (!new_lc_messages_buf)
1090 c = new_lc_messages_buf;
1092 _messages_locale->codeset = c;
1093 c = stpcpy (c, charset) + 1;
1096 _messages_locale->yesexpr = (const char *) c;
1097 len = lc_wcstombs (f_wctomb, charset, c, msg->yesexpr, lc_messages_end - c);
1098 _messages_locale->noexpr = (const char *) (c += len + 1);
1099 len = lc_wcstombs (f_wctomb, charset, c, msg->noexpr, lc_messages_end - c);
1100 _messages_locale->yesstr = (const char *) (c += len + 1);
1101 len = lc_wcstombs (f_wctomb, charset, c, msg->yesstr, lc_messages_end - c);
1102 _messages_locale->nostr = (const char *) (c += len + 1);
1103 len = lc_wcstombs (f_wctomb, charset, c, msg->nostr, lc_messages_end - c);
1105 if ((uintptr_t) c % 1)
1107 wchar_t *wc = (wchar_t *) c;
1108 _messages_locale->wyesexpr = (const wchar_t *) wc;
1109 wc = wcpcpy (wc, msg->yesexpr) + 1;
1110 _messages_locale->wnoexpr = (const wchar_t *) wc;
1111 wc = wcpcpy (wc, msg->noexpr) + 1;
1112 _messages_locale->wyesstr = (const wchar_t *) wc;
1113 wc = wcpcpy (wc, msg->yesstr) + 1;
1114 _messages_locale->wnostr = (const wchar_t *) wc;
1115 wcpcpy (wc, msg->nostr);
1118 if (*lc_messages_buf)
1119 free (*lc_messages_buf);
1120 *lc_messages_buf = new_lc_messages_buf;
1124 LCID collate_lcid = 0;
1125 static mbtowc_p collate_mbtowc = __ascii_mbtowc;
1126 char collate_charset[ENCODING_LEN + 1] = "ASCII";
1128 /* Called from newlib's setlocale() if category is LC_COLLATE. Stores
1129 LC_COLLATE locale information. This is subsequently accessed by the
1130 below functions strcoll, strxfrm, wcscoll, wcsxfrm. */
1132 __collate_load_locale (const char *name, mbtowc_p f_mbtowc, const char *charset)
1134 LCID lcid = __get_lcid_from_locale (name);
1135 if (lcid == (LCID) -1)
1137 collate_lcid = lcid;
1138 collate_mbtowc = f_mbtowc;
1139 stpcpy (collate_charset, charset);
1143 extern "C" const char *
1144 __get_current_collate_codeset (void)
1146 return collate_charset;
1149 /* We use the Windows functions for locale-specific string comparison and
1150 transformation. The advantage is that we don't need any files with
1151 collation information. */
1153 wcscoll (const wchar_t *ws1, const wchar_t *ws2)
1158 return wcscmp (ws1, ws2);
1159 ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1);
1162 return ret - CSTR_EQUAL;
1166 strcoll (const char *s1, const char *s2)
1174 return strcmp (s1, s2);
1175 /* The ANSI version of CompareString uses the default charset of the lcid,
1176 so we must use the Unicode version. */
1177 n1 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s1, 0) + 1;
1178 ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t))
1180 lc_mbstowcs (collate_mbtowc, collate_charset, ws1, s1, n1);
1181 n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1;
1182 ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
1184 lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2);
1185 ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1);
1186 if (n1 > NT_MAX_PATH)
1188 if (n2 > NT_MAX_PATH)
1192 return ret - CSTR_EQUAL;
1196 wcsxfrm (wchar_t *ws1, const wchar_t *ws2, size_t wsn)
1201 return wcslcpy (ws1, ws2, wsn);
1202 ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY | LCMAP_BYTEREV,
1203 ws2, -1, ws1, wsn * sizeof (wchar_t));
1204 /* LCMapStringW returns byte count including the terminating NUL character,
1205 wcsxfrm is supposed to return length in wchar_t excluding the NUL.
1206 Since the array is only single byte NUL-terminated we must make sure
1207 the result is wchar_t-NUL terminated. */
1210 ret = (ret + 1) / sizeof (wchar_t);
1216 if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
1222 strxfrm (char *s1, const char *s2, size_t sn)
1230 return strlcpy (s1, s2, sn);
1231 /* The ANSI version of LCMapString uses the default charset of the lcid,
1232 so we must use the Unicode version. */
1233 n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1;
1234 ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
1236 lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2);
1237 /* The sort key is a NUL-terminated byte string. */
1238 ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1, (PWCHAR) s1, sn);
1239 if (n2 > NT_MAX_PATH)
1243 if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
1247 /* LCMapStringW returns byte count including the terminating NUL character.
1248 strxfrm is supposed to return length excluding the NUL. */
1252 /* Fetch default ANSI codepage from locale info and generate a setlocale
1253 compatible character set code. Called from newlib's setlocale(), if the
1254 charset isn't given explicitely in the POSIX compatible locale specifier. */
1256 __set_charset_from_locale (const char *locale, char *charset)
1259 LCID lcid = __get_lcid_from_locale (locale);
1262 /* "C" locale, or invalid locale? */
1263 if (lcid == 0 || lcid == (LCID) -1)
1265 else if (!GetLocaleInfoW (lcid,
1266 LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
1267 (PWCHAR) &cp, sizeof cp))
1269 /* Translate codepage and lcid to a charset closely aligned with the default
1270 charsets defined in Glibc. */
1272 const char *modifier = strchr (locale, '@') ?: "";
1294 if (lcid == 0x081a /* sr_CS (Serbian Language/Former
1295 Serbia and Montenegro) */
1296 || lcid == 0x181a /* sr_BA (Serbian Language/Bosnia
1298 || lcid == 0x241a /* sr_RS (Serbian Language/Serbia) */
1299 || lcid == 0x2c1a /* sr_ME (Serbian Language/Montenegro)*/
1300 || lcid == 0x0442) /* tk_TM (Turkmen/Turkmenistan) */
1302 else if (lcid == 0x041c) /* sq_AL (Albanian/Albania) */
1308 if (lcid == 0x0c1a /* sr_CS (Serbian Language/Former
1309 Serbia and Montenegro) */
1310 || lcid == 0x1c1a /* sr_BA (Serbian Language/Bosnia
1312 || lcid == 0x281a /* sr_RS (Serbian Language/Serbia) */
1313 || lcid == 0x301a /* sr_ME (Serbian Language/Montenegro)*/
1314 || lcid == 0x0440 /* ky_KG (Kyrgyz/Kyrgyzstan) */
1315 || lcid == 0x0843 /* uz_UZ (Uzbek/Uzbekistan) */
1316 /* tt_RU (Tatar/Russia),
1318 || (lcid == 0x0444 && has_modifier ("@iqtelif"))
1319 || lcid == 0x0450) /* mn_MN (Mongolian/Mongolia) */
1321 else if (lcid == 0x0423) /* be_BY (Belarusian/Belarus) */
1322 cs = has_modifier ("@latin") ? "UTF-8" : "CP1251";
1323 else if (lcid == 0x0402) /* bg_BG (Bulgarian/Bulgaria) */
1325 else if (lcid == 0x0422) /* uk_UA (Ukrainian/Ukraine) */
1331 if (lcid == 0x0452) /* cy_GB (Welsh/Great Britain) */
1333 else if (lcid == 0x4009 /* en_IN (English/India) */
1334 || lcid == 0x0464 /* fil_PH (Filipino/Philippines) */
1335 || lcid == 0x0462 /* fy_NL (Frisian/Netherlands) */
1336 || lcid == 0x0468 /* ha_NG (Hausa/Nigeria) */
1337 || lcid == 0x0470 /* ig_NG (Igbo/Nigeria) */
1338 || lcid == 0x046c /* nso_ZA (Northern Sotho/South Africa) */
1339 || lcid == 0x0487 /* rw_RW (Kinyarwanda/Rwanda) */
1340 || lcid == 0x043b /* se_NO (Northern Saami/Norway) */
1341 || lcid == 0x0432 /* tn_ZA (Tswana/South Africa) */
1342 || lcid == 0x0488 /* wo_SN (Wolof/Senegal) */
1343 || lcid == 0x046a /* yo_NG (Yoruba/Nigeria) */
1344 || lcid == 0x085d) /* iu_CA (Inuktitut/Canada) */
1346 else if (lcid == 0x042e) /* hsb_DE (Upper Sorbian/Germany) */
1348 else if (lcid == 0x0491 /* gd_GB (Scots Gaelic/Great Britain) */
1349 || (has_modifier ("@euro")
1350 && GetLocaleInfoW (lcid, LOCALE_SINTLSYMBOL, wbuf, 9)
1351 && !wcsncmp (wbuf, L"EUR", 3)))
1360 if (lcid == 0x042c) /* az_AZ (Azeri/Azerbaijan) */
1362 else if (lcid == 0x0443) /* uz_UZ (Uzbek/Uzbekistan) */
1371 if (lcid == 0x0429 /* fa_IR (Persian/Iran) */
1372 || lcid == 0x0480 /* ug_CN (Uyghur/China) */
1373 || lcid == 0x0420) /* ur_PK (Urdu/Pakistan) */
1379 if (lcid == 0x0425) /* et_EE (Estonian/Estonia) */
1386 if (lcid == 0x0481) /* mi_NZ (Maori/New Zealand) */
1388 else if (lcid == 0x043a) /* mt_MT (Maltese/Malta) */
1390 else if (lcid == 0x0437) /* ka_GE (Georgian/Georgia) */
1392 else if (lcid == 0x043f) /* kk_KZ (Kazakh/Kazakhstan) */
1398 stpcpy (charset, cs);
1401 /* This function is called from newlib's loadlocale if the locale identifier
1402 was invalid, one way or the other. It looks for the file
1404 /usr/share/locale/locale.alias
1406 which is part of the gettext package, and if it finds the locale alias
1407 in that file, it replaces the locale with the correct locale string from
1410 If successful, it returns a pointer to new_locale, NULL otherwise.*/
1412 __set_locale_from_locale_alias (const char *locale, char *new_locale)
1414 wchar_t wlocale[ENCODING_LEN + 1];
1415 wchar_t walias[ENCODING_LEN + 1];
1416 #define LOCALE_ALIAS_LINE_LEN 255
1417 char alias_buf[LOCALE_ALIAS_LINE_LEN + 1], *c;
1419 const char *alias, *replace;
1422 FILE *fp = fopen ("/usr/share/locale/locale.alias", "rt");
1425 /* The incoming locale is given in the application charset, or in
1426 the Cygwin internal charset. We try both. */
1427 if (mbstowcs (wlocale, locale, ENCODING_LEN + 1) == (size_t) -1)
1428 sys_mbstowcs (wlocale, ENCODING_LEN + 1, locale);
1429 wlocale[ENCODING_LEN] = L'\0';
1430 /* Ignore @cjknarrow modifier since it's a very personal thing between
1431 Cygwin and newlib... */
1432 if ((wc = wcschr (wlocale, L'@')) && !wcscmp (wc + 1, L"cjknarrow"))
1434 while (fgets (alias_buf, LOCALE_ALIAS_LINE_LEN + 1, fp))
1436 alias_buf[LOCALE_ALIAS_LINE_LEN] = '\0';
1437 c = strrchr (alias_buf, '\n');
1441 c += strspn (c, " \t");
1442 if (!*c || *c == '#')
1445 c += strcspn (c, " \t");
1447 c += strspn (c, " \t");
1451 c += strcspn (c, " \t");
1453 if (strlen (replace) > ENCODING_LEN)
1455 /* The file is latin1 encoded */
1456 lc_mbstowcs (__iso_mbtowc, "ISO-8859-1", walias, alias, ENCODING_LEN + 1);
1457 walias[ENCODING_LEN] = L'\0';
1458 if (!wcscmp (wlocale, walias))
1460 ret = strcpy (new_locale, replace);
1469 check_codepage (char *ret)
1471 if (!wincap.has_always_all_codepages ())
1473 /* Prior to Windows Vista, many codepages are not installed by
1474 default, or can be deinstalled. The following codepages require
1475 that the respective conversion tables are installed into the OS.
1476 So we check if they are installed and if not, setlocale should
1480 if (__mbtowc == __sjis_mbtowc)
1482 else if (__mbtowc == __eucjp_mbtowc)
1484 else if (__mbtowc == __gbk_mbtowc)
1486 else if (__mbtowc == __kr_mbtowc)
1488 else if (__mbtowc == __big5_mbtowc)
1490 if (cp && !GetCPInfo (cp, &cpi)
1491 && GetLastError () == ERROR_INVALID_PARAMETER)
1497 /* Can be called via cygwin_internal (CW_INTERNAL_SETLOCALE) for application
1498 which really (think they) know what they are doing. */
1500 internal_setlocale ()
1502 /* Each setlocale from the environment potentially changes the
1503 multibyte representation of the CWD. Therefore we have to
1504 reevaluate the CWD's posix path and store in the new charset.
1505 Same for the PATH environment variable. */
1506 /* FIXME: Other buffered paths might be affected as well. */
1507 /* FIXME: It could be necessary to convert the entire environment,
1511 wchar_t *w_path = NULL, *w_cwd;
1513 /* Don't do anything if the charset hasn't actually changed. */
1514 if (strcmp (cygheap->locale.charset, __locale_charset ()) == 0)
1517 debug_printf ("Cygwin charset changed from %s to %s",
1518 cygheap->locale.charset, __locale_charset ());
1519 /* Fetch PATH and CWD and convert to wchar_t in previous charset. */
1520 path = getenv ("PATH");
1521 if (path && *path) /* $PATH can be potentially unset. */
1523 w_path = tp.w_get ();
1524 sys_mbstowcs (w_path, 32768, path);
1526 w_cwd = tp.w_get ();
1527 cwdstuff::cwd_lock.acquire ();
1528 sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ());
1529 /* Set charset for internal conversion functions. */
1530 if (*__locale_charset () == 'A'/*SCII*/)
1532 cygheap->locale.mbtowc = __utf8_mbtowc;
1533 cygheap->locale.wctomb = __utf8_wctomb;
1537 cygheap->locale.mbtowc = __mbtowc;
1538 cygheap->locale.wctomb = __wctomb;
1540 strcpy (cygheap->locale.charset, __locale_charset ());
1541 /* Restore CWD and PATH in new charset. */
1542 cygheap->cwd.reset_posix (w_cwd);
1543 cwdstuff::cwd_lock.release ();
1546 char *c_path = tp.c_get ();
1547 sys_wcstombs (c_path, 32768, w_path);
1548 setenv ("PATH", c_path, 1);
1552 /* Called from dll_crt0_1, before fetching the command line from Windows.
1553 Set the internal charset according to the environment locale settings.
1554 Check if a required codepage is available, and only switch internal
1556 Make sure to reset the application locale to "C" per POSIX. */
1558 initial_setlocale ()
1560 char *ret = _setlocale_r (_REENT, LC_CTYPE, "");
1561 if (ret && check_codepage (ret))
1562 internal_setlocale ();
1565 /* Like newlib's setlocale, but additionally check if the charset needs
1566 OS support and the required codepage is actually installed. If codepage
1567 is not available, revert to previous locale and return NULL. For details
1568 about codepage availability, see the comment in check_codepage() above. */
1570 setlocale (int category, const char *locale)
1572 char old[(LC_MESSAGES + 1) * (ENCODING_LEN + 1/*"/"*/ + 1)];
1573 if (locale && !wincap.has_always_all_codepages ())
1574 stpcpy (old, _setlocale_r (_REENT, category, NULL));
1575 char *ret = _setlocale_r (_REENT, category, locale);
1576 if (ret && locale && !(ret = check_codepage (ret)))
1577 _setlocale_r (_REENT, category, old);