winsup/cygwin/nlsfuncs.cc

   1 /* nlsfuncs.cc: NLS helper functions
   2
   3    Copyright 2010 Red Hat, Inc.
   4
   5 This file is part of Cygwin.
   6
   7 This software is a copyrighted work licensed under the terms of the
   8 Cygwin license.  Please consult the file "CYGWIN_LICENSE" for
   9 details. */
  10
  11 #include "winsup.h"
  12 #include <winnls.h>
  13 #include <stdio.h>
  14 #include <stdlib.h>
  15 #include <locale.h>
  16 #include <wchar.h>
  17 #include "path.h"
  18 #include "fhandler.h"
  19 #include "dtable.h"
  20 #include "cygheap.h"
  21 #include "tls_pbuf.h"
  22 /* Internal headers from newlib */
  23 #include "../locale/timelocal.h"
  24 #include "../locale/lctype.h"
  25 #include "../locale/lnumeric.h"
  26 #include "../locale/lmonetary.h"
  27 #include "../locale/lmessages.h"
  28 #include "lc_msg.h"
  29 #include "lc_era.h"
  30
  31 #define _LC(x)  &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr
  32
  33 #define getlocaleinfo(category,type) \
  34             __getlocaleinfo(lcid,(type),_LC(category))
  35 #define eval_datetimefmt(type,flags) \
  36             __eval_datetimefmt(lcid,(type),(flags),&lc_time_ptr,\
  37                                lc_time_end-lc_time_ptr)
  38 #define charfromwchar(category,in) \
  39             __charfromwchar (_##category##_locale->in,_LC(category),\
  40                              f_wctomb,charset)
  41
  42 #define has_modifier(x) ((x)[0] && !strcmp (modifier, (x)))
  43
  44 /* Vista and later.  Not defined in w32api yet. */
  45 extern "C" {
  46 WINBASEAPI LCID WINAPI LocaleNameToLCID (LPCWSTR, DWORD);
  47 };
  48
  49 static char last_locale[ENCODING_LEN + 1];
  50 static LCID last_lcid;
  51
  52 /* Fetch LCID from POSIX locale specifier.
  53    Return values:
  54
  55      -1: Invalid locale
  56       0: C or POSIX
  57      >0: LCID
  58 */
  59 static LCID
  60 __get_lcid_from_locale (const char *name)
  61 {
  62   char locale[ENCODING_LEN + 1];
  63   char *c;
  64   LCID lcid;
  65
  66   /* Speed up reusing the same locale as before, for instance in LC_ALL case. */
  67   if (!strcmp (name, last_locale))
  68     {
  69       debug_printf ("LCID=0x%04x", last_lcid);
  70       return last_lcid;
  71     }
  72   stpcpy (last_locale, name);
  73   stpcpy (locale, name);
  74   /* Store modifier for later use. */
  75   const char *modifier = strchr (last_locale, '@') ? : "";
  76   /* Drop charset and modifier */
  77   c = strchr (locale, '.');
  78   if (!c)
  79     c = strchr (locale, '@');
  80   if (c)
  81     *c = '\0';
  82   /* "POSIX" already converted to "C" in loadlocale. */
  83   if (!strcmp (locale, "C"))
  84     return last_lcid = 0;
  85   c = strchr (locale, '_');
  86   if (!c)
  87     return last_lcid = (LCID) -1;
  88   if (wincap.has_localenames ())
  89     {
  90       wchar_t wlocale[ENCODING_LEN + 1];
  91
  92       /* Convert to RFC 4646 syntax which is the standard for the locale names
  93          replacing LCIDs starting with Vista. */
  94       *c = '-';
  95       mbstowcs (wlocale, locale, ENCODING_LEN + 1);
  96       lcid = LocaleNameToLCID (wlocale, 0);
  97       if (lcid == 0)
  98         {
  99           /* Unfortunately there are a couple of locales for which no form
 100              without a Script part per RFC 4646 exists.
 101              Linux also supports no_NO which is equivalent to nb_NO. */
 102           struct {
 103             const char    *loc;
 104             const wchar_t *wloc;
 105           } sc_only_locale[] = {
 106             { "az-AZ" , L"az-Latn-AZ"  },
 107             { "bs-BA" , L"bs-Latn-BA"  },
 108             { "ha-NG" , L"ha-Latn-NG"  },
 109             { "iu-CA" , L"iu-Latn-CA"  },
 110             { "mn-CN" , L"mn-Mong-CN"  },
 111             { "no-NO" , L"nb-NO"       },
 112             { "sr-BA" , L"sr-Cyrl-BA"  },
 113             { "sr-CS" , L"sr-Cyrl-CS"  },
 114             { "sr-ME" , L"sr-Cyrl-ME"  },
 115             { "sr-RS" , L"sr-Cyrl-RS"  },
 116             { "tg-TJ" , L"tg-Cyrl-TJ"  },
 117             { "tzm-DZ", L"tzm-Latn-DZ" },
 118             { "uz-UZ" , L"uz-Latn-UZ"  },
 119             { NULL    , NULL           }
 120           };
 121           for (int i = 0; sc_only_locale[i].loc
 122                           && sc_only_locale[i].loc[0] <= locale[0]; ++i)
 123             if (!strcmp (locale, sc_only_locale[i].loc))
 124               {
 125                 lcid = LocaleNameToLCID (sc_only_locale[i].wloc, 0);
 126                 if (!strncmp (locale, "sr-", 3))
 127                   {
 128                     /* Vista/2K8 is missing sr-ME and sr-RS.  It has only the
 129                        deprecated sr-CS.  So we map ME and RS to CS here. */
 130                     if (lcid == 0)
 131                       lcid = LocaleNameToLCID (L"sr-Cyrl-CS", 0);
 132                     /* "@latin" modifier for the sr_XY locales changes
 133                         collation behaviour so lcid should accommodate that
 134                         by being set to the Latin sublang. */
 135                     if (lcid != 0 && has_modifier ("@latin"))
 136                       lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) - 1);
 137                   }
 138                 else if (!strncmp (locale, "uz-", 3))
 139                   {
 140                     /* Equivalent for "@cyrillic" modifier in uz_UZ locale */
 141                     if (lcid != 0 && has_modifier ("@cyrillic"))
 142                       lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) + 1);
 143                   }
 144                 break;
 145               }
 146         }
 147       last_lcid = lcid ?: (LCID) -1;
 148       debug_printf ("LCID=0x%04x", last_lcid);
 149       return last_lcid;
 150     }
 151   /* Pre-Vista we have to loop through the LCID values and see if they
 152      match language and TERRITORY. */
 153   *c++ = '\0';
 154   /* locale now points to the language, c points to the TERRITORY */
 155   const char *language = locale;
 156   const char *territory = c;
 157   LCID lang, sublang;
 158   char iso[10];
 159
 160   /* In theory the lang part takes 10 bits (0x3ff), but up to Windows 2003 R2
 161      the highest lang value is 0x81. */
 162   for (lang = 1; lang <= 0x81; ++lang)
 163     if (GetLocaleInfo (lang, LOCALE_SISO639LANGNAME, iso, 10)
 164         && !strcmp (language, iso))
 165       break;
 166   if (lang > 0x81)
 167     lcid = 0;
 168   else if (!territory)
 169     lcid = lang;
 170   else
 171     {
 172       /* In theory the sublang part takes 7 bits (0x3f), but up to
 173          Windows 2003 R2 the highest sublang value is 0x14. */
 174       for (sublang = 1; sublang <= 0x14; ++sublang)
 175         {
 176           lcid = (sublang << 10) | lang;
 177           if (GetLocaleInfo (lcid, LOCALE_SISO3166CTRYNAME, iso, 10)
 178               && !strcmp (territory, iso))
 179             break;
 180         }
 181       if (sublang > 0x14)
 182         lcid = 0;
 183     }
 184   if (lcid == 0 && territory)
 185     {
 186       /* Unfortunately there are four language LCID number areas representing
 187          multiple languages.  Fortunately only two of them already existed
 188          pre-Vista.  The concealed languages have to be tested explicitly,
 189          since they are not catched by the above loops.
 190          This also enables the serbian ISO 3166 territory codes which have
 191          been changed post 2003, and maps them to the old wrong (SP was never
 192          a valid ISO 3166 code) territory code sr_SP which fortunately has the
 193          same LCID as the newer sr_CS.
 194          Linux also supports no_NO which is equivalent to nb_NO. */
 195       struct {
 196         const char *loc;
 197         LCID        lcid;
 198       } ambiguous_locale[] = {
 199         { "bs_BA", MAKELANGID (LANG_BOSNIAN, 0x05)                          },
 200         { "nn_NO", MAKELANGID (LANG_NORWEGIAN, SUBLANG_NORWEGIAN_NYNORSK)   },
 201         { "no_NO", MAKELANGID (LANG_NORWEGIAN, SUBLANG_NORWEGIAN_BOKMAL)    },
 202         { "sr_BA", MAKELANGID (LANG_BOSNIAN,
 203                                SUBLANG_SERBIAN_BOSNIA_HERZEGOVINA_CYRILLIC) },
 204         { "sr_CS", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC)      },
 205         { "sr_ME", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC)      },
 206         { "sr_RS", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC)      },
 207         { "sr_SP", MAKELANGID (LANG_SERBIAN, SUBLANG_SERBIAN_CYRILLIC)      },
 208         { NULL,    0 },
 209       };
 210       *--c = '_';
 211       for (int i = 0; ambiguous_locale[i].loc
 212                       && ambiguous_locale[i].loc[0] <= locale[0]; ++i)
 213         if (!strcmp (locale, ambiguous_locale[i].loc)
 214             && GetLocaleInfo (ambiguous_locale[i].lcid, LOCALE_SISO639LANGNAME,
 215                               iso, 10))
 216           {
 217             lcid = ambiguous_locale[i].lcid;
 218             /* "@latin" modifier for the sr_XY locales changes collation
 219                behaviour so lcid should accommodate that by being set to
 220                the Latin sublang. */
 221             if (!strncmp (locale, "sr_", 3) && has_modifier ("@latin"))
 222               lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) - 1);
 223             break;
 224           }
 225     }
 226   else if (lcid == 0x0443)              /* uz_UZ (Uzbek/Uzbekistan) */
 227     {
 228       /* Equivalent for "@cyrillic" modifier in uz_UZ locale */
 229       if (lcid != 0 && has_modifier ("@cyrillic"))
 230         lcid = MAKELANGID (lcid & 0x3ff, (lcid >> 10) + 1);
 231     }
 232   last_lcid = lcid ?: (LCID) -1;
 233   debug_printf ("LCID=0x%04x", last_lcid);
 234   return last_lcid;
 235 }
 236
 237 /* Never returns -1.  Just skips invalid chars instead.  Only if return_invalid
 238    is set, s==NULL returns -1 since then it's used to recognize invalid strings
 239    in the used charset. */
 240 static size_t
 241 lc_wcstombs (wctomb_p f_wctomb, const char *charset,
 242              char *s, const wchar_t *pwcs, size_t n,
 243              bool return_invalid = false)
 244 {
 245   char *ptr = s;
 246   size_t max = n;
 247   char buf[8];
 248   size_t i, bytes, num_to_copy;
 249   mbstate_t state;
 250
 251   memset (&state, 0, sizeof state);
 252   if (s == NULL)
 253     {
 254       size_t num_bytes = 0;
 255       while (*pwcs != 0)
 256         {
 257           bytes = f_wctomb (_REENT, buf, *pwcs++, charset, &state);
 258           if (bytes != (size_t) -1)
 259             num_bytes += bytes;
 260           else if (return_invalid)
 261             return (size_t) -1;
 262         }
 263       return num_bytes;
 264     }
 265   while (n > 0)
 266     {
 267       bytes = f_wctomb (_REENT, buf, *pwcs, charset, &state);
 268       if (bytes == (size_t) -1)
 269         {
 270           memset (&state, 0, sizeof state);
 271           ++pwcs;
 272           continue;
 273         }
 274       num_to_copy = (n > bytes ? bytes : n);
 275       for (i = 0; i < num_to_copy; ++i)
 276         *ptr++ = buf[i];
 277
 278       if (*pwcs == 0x00)
 279         return ptr - s - (n >= bytes);
 280       ++pwcs;
 281       n -= num_to_copy;
 282     }
 283   return max;
 284 }
 285
 286 /* Never returns -1.  Invalid sequences are translated to replacement
 287    wide-chars. */
 288 static size_t
 289 lc_mbstowcs (mbtowc_p f_mbtowc, const char *charset,
 290              wchar_t *pwcs, const char *s, size_t n)
 291 {
 292   size_t ret = 0;
 293   char *t = (char *) s;
 294   size_t bytes;
 295   mbstate_t state;
 296
 297   memset (&state, 0, sizeof state);
 298   if (!pwcs)
 299     n = 1;
 300   while (n > 0)
 301     {
 302       bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */,
 303                         charset, &state);
 304       if (bytes == (size_t) -1)
 305         {
 306           state.__count = 0;
 307           bytes = 1;
 308           if (pwcs)
 309             *pwcs = L' ';
 310         }
 311       else if (bytes == 0)
 312         break;
 313       t += bytes;
 314       ++ret;
 315       if (pwcs)
 316         {
 317           ++pwcs;
 318           --n;
 319         }
 320     }
 321   return ret;
 322 }
 323
 324 static int
 325 locale_cmp (const void *a, const void *b)
 326 {
 327   char **la = (char **) a;
 328   char **lb = (char **) b;
 329   return strcmp (*la, *lb);
 330 }
 331
 332 /* Helper function to workaround reallocs which move blocks even if they shrink.
 333    Cygwin's realloc is not doing this, but tcsh's, for instance.  All lc_foo
 334    structures consist entirely of pointers so they are practically pointer
 335    arrays.  What we do here is just treat the lc_foo pointers as char ** and
 336    rebase all char * pointers within, up to the given size of the structure. */
 337 static void
 338 rebase_locale_buf (const void *ptrv, const void *ptrvend, const char *newbase,
 339                    const char *oldbase, const char *oldend)
 340 {
 341   const char **ptrsend = (const char **) ptrvend;
 342   for (const char **ptrs = (const char **) ptrv; ptrs < ptrsend; ++ptrs)
 343     if (*ptrs >= oldbase && *ptrs < oldend)
 344       *ptrs += newbase - oldbase;
 345 }
 346
 347 static wchar_t *
 348 __getlocaleinfo (LCID lcid, LCTYPE type, char **ptr, size_t size)
 349 {
 350   size_t num;
 351   wchar_t *ret;
 352
 353   if ((uintptr_t) *ptr % 1)
 354     ++*ptr;
 355   ret = (wchar_t *) *ptr;
 356   num = GetLocaleInfoW (lcid, type, ret, size / sizeof (wchar_t));
 357   *ptr = (char *) (ret + num);
 358   return ret;
 359 }
 360
 361 static char *
 362 __charfromwchar (const wchar_t *in, char **ptr, size_t size,
 363                  wctomb_p f_wctomb, const char *charset)
 364 {
 365   size_t num;
 366   char *ret;
 367
 368   num = lc_wcstombs (f_wctomb, charset, ret = *ptr, in, size);
 369   *ptr += num + 1;
 370   return ret;
 371 }
 372
 373 static UINT
 374 getlocaleint (LCID lcid, LCTYPE type)
 375 {
 376   UINT val;
 377   return GetLocaleInfoW (lcid, type | LOCALE_RETURN_NUMBER, (PWCHAR) &val,
 378                          sizeof val) ? val : 0;
 379 }
 380
 381 enum dt_flags {
 382   DT_DEFAULT    = 0x00,
 383   DT_AMPM       = 0x01, /* Enforce 12 hour time format. */
 384   DT_ABBREV     = 0x02, /* Enforce abbreviated month and day names. */
 385 };
 386
 387 static wchar_t *
 388 __eval_datetimefmt (LCID lcid, LCTYPE type, dt_flags flags, char **ptr,
 389                     size_t size)
 390 {
 391   wchar_t buf[80];
 392   wchar_t fc;
 393   size_t idx;
 394   const wchar_t *day_str = L"edaA";
 395   const wchar_t *mon_str = L"mmbB";
 396   const wchar_t *year_str = L"yyyY";
 397   const wchar_t *hour12_str = L"lI";
 398   const wchar_t *hour24_str = L"kH";
 399   const wchar_t *t_str;
 400
 401   if ((uintptr_t) *ptr % 1)
 402     ++*ptr;
 403   wchar_t *ret = (wchar_t *) *ptr;
 404   wchar_t *p = (wchar_t *) *ptr;
 405   GetLocaleInfoW (lcid, type, buf, 80);
 406   for (wchar_t *fmt = buf; *fmt; ++fmt)
 407     switch (fc = *fmt)
 408       {
 409       case L'\'':
 410         if (fmt[1] == L'\'')
 411           *p++ = L'\'';
 412         else
 413           while (fmt[1] && *++fmt != L'\'')
 414             *p++ = *fmt;
 415         break;
 416       case L'd':
 417       case L'M':
 418       case L'y':
 419         t_str = (fc == L'd' ? day_str : fc == L'M' ? mon_str : year_str);
 420         for (idx = 0; fmt[1] == fc; ++idx, ++fmt);
 421         if (idx > 3)
 422           idx = 3;
 423         if ((flags & DT_ABBREV) && fc != L'y' && idx == 3)
 424           idx = 2;
 425         *p++ = L'%';
 426         *p++ = t_str[idx];
 427         break;
 428       case L'g':
 429         /* TODO */
 430         break;
 431       case L'h':
 432       case L'H':
 433         t_str = (fc == L'h' || (flags & DT_AMPM) ? hour12_str : hour24_str);
 434         idx = 0;
 435         if (fmt[1] == fc)
 436           {
 437             ++fmt;
 438             idx = 1;
 439           }
 440         *p++ = L'%';
 441         *p++ = t_str[idx];
 442         break;
 443       case L'm':
 444       case L's':
 445       case L't':
 446         if (fmt[1] == fc)
 447           ++fmt;
 448         *p++ = L'%';
 449         *p++ = (fc == L'm' ? L'M' : fc == L's' ? L'S' : L'p');
 450         break;
 451       case L'\t':
 452       case L'\n':
 453       case L'%':
 454         *p++ = L'%';
 455         *p++ = fc;
 456         break;
 457       default:
 458         *p++ = *fmt;
 459         break;
 460       }
 461   *p++ = L'\0';
 462   *ptr = (char *) p;
 463   return ret;
 464 }
 465
 466 /* Convert Windows grouping format into POSIX grouping format. */
 467 static char *
 468 conv_grouping (LCID lcid, LCTYPE type, char **lc_ptr)
 469 {
 470   char buf[10]; /* Per MSDN max size of LOCALE_SGROUPING element incl. NUL */
 471   bool repeat = false;
 472   char *ptr = *lc_ptr;
 473   char *ret = ptr;
 474
 475   GetLocaleInfoA (lcid, type, buf, 10);
 476   /* Convert Windows grouping format into POSIX grouping format. */
 477   for (char *c = buf; *c; ++c)
 478     {
 479       if (*c < '0' || *c > '9')
 480         continue;
 481       char val = *c - '0';
 482       if (!val)
 483         {
 484           repeat = true;
 485           break;
 486         }
 487       *ptr++ = val;
 488     }
 489   if (!repeat)
 490     *ptr++ = CHAR_MAX;
 491   *ptr++ = '\0';
 492   *lc_ptr = ptr;
 493   return ret;
 494 }
 495
 496 /* Called from newlib's setlocale() via __time_load_locale() if category
 497    is LC_TIME.  Returns LC_TIME values fetched from Windows locale data
 498    in the structure pointed to by _time_locale.  This is subsequently
 499    accessed by functions like nl_langinfo, strftime, strptime. */
 500 extern "C" int
 501 __set_lc_time_from_win (const char *name,
 502                         const struct lc_time_T *_C_time_locale,
 503                         struct lc_time_T *_time_locale,
 504                         char **lc_time_buf, wctomb_p f_wctomb,
 505                         const char *charset)
 506 {
 507   LCID lcid = __get_lcid_from_locale (name);
 508   if (lcid == (LCID) -1)
 509     return lcid;
 510   if (!lcid && !strcmp (charset, "ASCII"))
 511     return 0;
 512
 513 # define MAX_TIME_BUFFER_SIZE   4096
 514
 515   char *new_lc_time_buf = (char *) malloc (MAX_TIME_BUFFER_SIZE);
 516   const char *lc_time_end = new_lc_time_buf + MAX_TIME_BUFFER_SIZE;
 517
 518   if (!new_lc_time_buf)
 519     return -1;
 520   char *lc_time_ptr = new_lc_time_buf;
 521
 522   /* C.foo is just a copy of "C" with fixed charset. */
 523   if (!lcid)
 524     memcpy (_time_locale, _C_time_locale, sizeof (struct lc_time_T));
 525   /* codeset */
 526   _time_locale->codeset = lc_time_ptr;
 527   lc_time_ptr = stpcpy (lc_time_ptr, charset) + 1;
 528
 529   if (lcid)
 530     {
 531       char locale[ENCODING_LEN + 1];
 532       strcpy (locale, name);
 533       /* Removes the charset from the locale and attach the modifer to the
 534          language_TERRITORY part. */
 535       char *c = strchr (locale, '.');
 536       if (c)
 537         {
 538           *c = '\0';
 539           char *c2 = strchr (c + 1, '@');
 540           /* Ignore @cjknarrow modifier since it's a very personal thing between
 541              Cygwin and newlib... */
 542           if (c2 && strcmp (c2, "@cjknarrow"))
 543             memmove (c, c2, strlen (c2) + 1);
 544         }
 545       /* Now search in the alphabetically order lc_era array for the
 546          locale. */
 547       lc_era_t locale_key = { locale, NULL, NULL, NULL, NULL, NULL ,
 548                                       NULL, NULL, NULL, NULL, NULL };
 549       lc_era_t *era = (lc_era_t *) bsearch ((void *) &locale_key, (void *) lc_era,
 550                                             sizeof lc_era / sizeof *lc_era,
 551                                             sizeof *lc_era, locale_cmp);
 552
 553       /* mon */
 554       /* Windows has a bug in Japanese and Korean locales.  In these
 555          locales, strings returned for LOCALE_SABBREVMONTHNAME* are missing
 556          the suffix representing a month.  Unfortunately this is not
 557          documented in English.  A Japanese article describing the problem
 558          is http://msdn.microsoft.com/ja-jp/library/cc422084.aspx
 559          The workaround is to use LOCALE_SMONTHNAME* in these locales,
 560          even for the abbreviated month name. */
 561       const LCTYPE mon_base =
 562                 lcid == MAKELANGID (LANG_JAPANESE, SUBLANG_JAPANESE_JAPAN)
 563                 || lcid == MAKELANGID (LANG_KOREAN, SUBLANG_KOREAN)
 564                 ? LOCALE_SMONTHNAME1 : LOCALE_SABBREVMONTHNAME1;
 565       for (int i = 0; i < 12; ++i)
 566         {
 567           _time_locale->wmon[i] = getlocaleinfo (time, mon_base + i);
 568           _time_locale->mon[i] = charfromwchar (time, wmon[i]);
 569         }
 570       /* month and alt_month */
 571       for (int i = 0; i < 12; ++i)
 572         {
 573           _time_locale->wmonth[i] = getlocaleinfo (time, LOCALE_SMONTHNAME1 + i);
 574           _time_locale->month[i] = _time_locale->alt_month[i]
 575                                  = charfromwchar (time, wmonth[i]);
 576         }
 577       /* wday */
 578       _time_locale->wwday[0] = getlocaleinfo (time, LOCALE_SABBREVDAYNAME7);
 579       _time_locale->wday[0] = charfromwchar (time, wwday[0]);
 580       for (int i = 0; i < 6; ++i)
 581         {
 582           _time_locale->wwday[i + 1] = getlocaleinfo (time,
 583                                                       LOCALE_SABBREVDAYNAME1 + i);
 584           _time_locale->wday[i + 1] = charfromwchar (time, wwday[i + 1]);
 585         }
 586       /* weekday */
 587       _time_locale->wweekday[0] = getlocaleinfo (time, LOCALE_SDAYNAME7);
 588       _time_locale->weekday[0] = charfromwchar (time, wweekday[0]);
 589       for (int i = 0; i < 6; ++i)
 590         {
 591           _time_locale->wweekday[i + 1] = getlocaleinfo (time,
 592                                                          LOCALE_SDAYNAME1 + i);
 593           _time_locale->weekday[i + 1] = charfromwchar (time, wweekday[i + 1]);
 594         }
 595       size_t len;
 596       /* X_fmt */
 597       if (era && *era->t_fmt)
 598         {
 599           _time_locale->wX_fmt = (const wchar_t *) lc_time_ptr;
 600           lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wX_fmt,
 601                                           era->t_fmt) + 1);
 602         }
 603       else
 604         _time_locale->wX_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
 605       _time_locale->X_fmt = charfromwchar (time, wX_fmt);
 606       /* x_fmt */
 607       if (era && *era->d_fmt)
 608         {
 609           _time_locale->wx_fmt = (const wchar_t *) lc_time_ptr;
 610           lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wx_fmt,
 611                                           era->d_fmt) + 1);
 612         }
 613       else
 614         _time_locale->wx_fmt = eval_datetimefmt (LOCALE_SSHORTDATE, DT_DEFAULT);
 615       _time_locale->x_fmt = charfromwchar (time, wx_fmt);
 616       /* c_fmt */
 617       if (era && *era->d_t_fmt)
 618         {
 619           _time_locale->wc_fmt = (const wchar_t *) lc_time_ptr;
 620           lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wc_fmt,
 621                                           era->d_t_fmt) + 1);
 622         }
 623       else
 624         {
 625           _time_locale->wc_fmt = eval_datetimefmt (LOCALE_SLONGDATE, DT_ABBREV);
 626           ((wchar_t *) lc_time_ptr)[-1] = L' ';
 627           eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
 628         }
 629       _time_locale->c_fmt = charfromwchar (time, wc_fmt);
 630       /* AM/PM */
 631       _time_locale->wam_pm[0] = getlocaleinfo (time, LOCALE_S1159);
 632       _time_locale->wam_pm[1] = getlocaleinfo (time, LOCALE_S2359);
 633       _time_locale->am_pm[0] = charfromwchar (time, wam_pm[0]);
 634       _time_locale->am_pm[1] = charfromwchar (time, wam_pm[1]);
 635       /* date_fmt */
 636       if (era && *era->date_fmt)
 637         {
 638           _time_locale->wdate_fmt = (const wchar_t *) lc_time_ptr;
 639           lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wdate_fmt,
 640                                           era->date_fmt) + 1);
 641         }
 642       else
 643         _time_locale->wdate_fmt = _time_locale->wc_fmt;
 644       _time_locale->date_fmt = charfromwchar (time, wdate_fmt);
 645       /* md */
 646       {
 647         wchar_t buf[80];
 648         GetLocaleInfoW (lcid, LOCALE_IDATE, buf, 80);
 649         _time_locale->md_order = (const char *) lc_time_ptr;
 650         lc_time_ptr = stpcpy (lc_time_ptr, *buf == L'1' ? "dm" : "md") + 1;
 651       }
 652       /* ampm_fmt */
 653       if (era)
 654         {
 655           _time_locale->wampm_fmt = (const wchar_t *) lc_time_ptr;
 656           lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wampm_fmt,
 657                                           era->t_fmt_ampm) + 1);
 658         }
 659       else
 660         _time_locale->wampm_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_AMPM);
 661       _time_locale->ampm_fmt = charfromwchar (time, wampm_fmt);
 662
 663       if (era)
 664         {
 665           /* Evaluate string length in target charset.  Characters invalid in the
 666              target charset are simply ignored, as on Linux. */
 667           len = 0;
 668           len += lc_wcstombs (f_wctomb, charset, NULL, era->era, 0) + 1;
 669           len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_fmt, 0) + 1;
 670           len += lc_wcstombs (f_wctomb, charset, NULL, era->era_d_t_fmt, 0) + 1;
 671           len += lc_wcstombs (f_wctomb, charset, NULL, era->era_t_fmt, 0) + 1;
 672           len += lc_wcstombs (f_wctomb, charset, NULL, era->alt_digits, 0) + 1;
 673           len += (wcslen (era->era) + 1) * sizeof (wchar_t);
 674           len += (wcslen (era->era_d_fmt) + 1) * sizeof (wchar_t);
 675           len += (wcslen (era->era_d_t_fmt) + 1) * sizeof (wchar_t);
 676           len += (wcslen (era->era_t_fmt) + 1) * sizeof (wchar_t);
 677           len += (wcslen (era->alt_digits) + 1) * sizeof (wchar_t);
 678
 679           /* Make sure data fits into the buffer */
 680           if (lc_time_ptr + len > lc_time_end)
 681             {
 682               len = lc_time_ptr + len - new_lc_time_buf;
 683               char *tmp = (char *) realloc (new_lc_time_buf, len);
 684               if (!tmp)
 685                 era = NULL;
 686               else
 687                 {
 688                   if (tmp != new_lc_time_buf)
 689                     rebase_locale_buf (_time_locale, _time_locale + 1, tmp,
 690                                        new_lc_time_buf, lc_time_ptr);
 691                   lc_time_ptr = tmp + (lc_time_ptr - new_lc_time_buf);
 692                   new_lc_time_buf = tmp;
 693                   lc_time_end = new_lc_time_buf + len;
 694                 }
 695             }
 696           /* Copy over */
 697           if (era)
 698             {
 699               /* era */
 700               _time_locale->wera = (const wchar_t *) lc_time_ptr;
 701               lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera,
 702                                               era->era) + 1);
 703               _time_locale->era = charfromwchar (time, wera);
 704               /* era_d_fmt */
 705               _time_locale->wera_d_fmt = (const wchar_t *) lc_time_ptr;
 706               lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_fmt,
 707                                               era->era_d_fmt) + 1);
 708               _time_locale->era_d_fmt = charfromwchar (time, wera_d_fmt);
 709               /* era_d_t_fmt */
 710               _time_locale->wera_d_t_fmt = (const wchar_t *) lc_time_ptr;
 711               lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_t_fmt,
 712                                               era->era_d_t_fmt) + 1);
 713               _time_locale->era_d_t_fmt = charfromwchar (time, wera_d_t_fmt);
 714               /* era_t_fmt */
 715               _time_locale->wera_t_fmt = (const wchar_t *) lc_time_ptr;
 716               lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_t_fmt,
 717                                               era->era_t_fmt) + 1);
 718               _time_locale->era_t_fmt = charfromwchar (time, wera_t_fmt);
 719               /* alt_digits */
 720               _time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
 721               lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->walt_digits,
 722                                               era->alt_digits) + 1);
 723               _time_locale->alt_digits = charfromwchar (time, walt_digits);
 724             }
 725         }
 726       if (!era)
 727         {
 728           _time_locale->wera =
 729           _time_locale->wera_d_fmt =
 730           _time_locale->wera_d_t_fmt =
 731           _time_locale->wera_t_fmt =
 732           _time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
 733           _time_locale->era =
 734           _time_locale->era_d_fmt =
 735           _time_locale->era_d_t_fmt =
 736           _time_locale->era_t_fmt =
 737           _time_locale->alt_digits = (const char *) lc_time_ptr;
 738           /* Twice, to make sure wide char strings are correctly terminated. */
 739           *lc_time_ptr++ = '\0';
 740           *lc_time_ptr++ = '\0';
 741         }
 742     }
 743
 744   char *tmp = (char *) realloc (new_lc_time_buf, lc_time_ptr - new_lc_time_buf);
 745   if (!tmp)
 746     {
 747       free (new_lc_time_buf);
 748       return -1;
 749     }
 750   if (tmp != new_lc_time_buf)
 751     rebase_locale_buf (_time_locale, _time_locale + 1, tmp,
 752                        new_lc_time_buf, lc_time_ptr);
 753   if (*lc_time_buf)
 754     free (*lc_time_buf);
 755   *lc_time_buf = tmp;
 756   return 1;
 757 }
 758
 759 /* Called from newlib's setlocale() via __ctype_load_locale() if category
 760    is LC_CTYPE.  Returns LC_CTYPE values fetched from Windows locale data
 761    in the structure pointed to by _ctype_locale.  This is subsequently
 762    accessed by functions like nl_langinfo, localeconv, printf, etc. */
 763 extern "C" int
 764 __set_lc_ctype_from_win (const char *name,
 765                          const struct lc_ctype_T *_C_ctype_locale,
 766                          struct lc_ctype_T *_ctype_locale,
 767                          char **lc_ctype_buf, wctomb_p f_wctomb,
 768                          const char *charset, int mb_cur_max)
 769 {
 770   LCID lcid = __get_lcid_from_locale (name);
 771   if (lcid == (LCID) -1)
 772     return lcid;
 773   if (!lcid && !strcmp (charset, "ASCII"))
 774     return 0;
 775
 776 # define MAX_CTYPE_BUFFER_SIZE  256
 777
 778   char *new_lc_ctype_buf = (char *) malloc (MAX_CTYPE_BUFFER_SIZE);
 779
 780   if (!new_lc_ctype_buf)
 781     return -1;
 782   char *lc_ctype_ptr = new_lc_ctype_buf;
 783   /* C.foo is just a copy of "C" with fixed charset. */
 784   if (!lcid)
 785     memcpy (_ctype_locale, _C_ctype_locale, sizeof (struct lc_ctype_T));
 786   /* codeset */
 787   _ctype_locale->codeset = lc_ctype_ptr;
 788   lc_ctype_ptr = stpcpy (lc_ctype_ptr, charset) + 1;
 789   /* mb_cur_max */
 790   _ctype_locale->mb_cur_max = lc_ctype_ptr;
 791   *lc_ctype_ptr++ = mb_cur_max;
 792   *lc_ctype_ptr++ = '\0';
 793   if (lcid)
 794     {
 795       /* outdigits and woutdigits */
 796       wchar_t digits[11];
 797       GetLocaleInfoW (lcid, LOCALE_SNATIVEDIGITS, digits, 11);
 798       for (int i = 0; i <= 9; ++i)
 799         {
 800           mbstate_t state;
 801
 802           /* Make sure the wchar_t's are always 2 byte aligned. */
 803           if ((uintptr_t) lc_ctype_ptr % 2)
 804             ++lc_ctype_ptr;
 805           wchar_t *woutdig = (wchar_t *) lc_ctype_ptr;
 806           _ctype_locale->woutdigits[i] = (const wchar_t *) woutdig;
 807           *woutdig++ = digits[i];
 808           *woutdig++ = L'\0';
 809           lc_ctype_ptr = (char *) woutdig;
 810           _ctype_locale->outdigits[i] = lc_ctype_ptr;
 811           memset (&state, 0, sizeof state);
 812           lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], charset,
 813                                       &state);
 814           *lc_ctype_ptr++ = '\0';
 815         }
 816     }
 817
 818   char *tmp = (char *) realloc (new_lc_ctype_buf,
 819                                 lc_ctype_ptr - new_lc_ctype_buf);
 820   if (!tmp)
 821     {
 822       free (new_lc_ctype_buf);
 823       return -1;
 824     }
 825   if (tmp != new_lc_ctype_buf)
 826     rebase_locale_buf (_ctype_locale, _ctype_locale + 1, tmp,
 827                        new_lc_ctype_buf, lc_ctype_ptr);
 828   if (*lc_ctype_buf)
 829     free (*lc_ctype_buf);
 830   *lc_ctype_buf = tmp;
 831   return 1;
 832 }
 833
 834 /* Called from newlib's setlocale() via __numeric_load_locale() if category
 835    is LC_NUMERIC.  Returns LC_NUMERIC values fetched from Windows locale data
 836    in the structure pointed to by _numeric_locale.  This is subsequently
 837    accessed by functions like nl_langinfo, localeconv, printf, etc. */
 838 extern "C" int
 839 __set_lc_numeric_from_win (const char *name,
 840                            const struct lc_numeric_T *_C_numeric_locale,
 841                            struct lc_numeric_T *_numeric_locale,
 842                            char **lc_numeric_buf, wctomb_p f_wctomb,
 843                            const char *charset)
 844 {
 845   LCID lcid = __get_lcid_from_locale (name);
 846   if (lcid == (LCID) -1)
 847     return lcid;
 848   if (!lcid && !strcmp (charset, "ASCII"))
 849     return 0;
 850
 851 # define MAX_NUMERIC_BUFFER_SIZE        256
 852
 853   char *new_lc_numeric_buf = (char *) malloc (MAX_NUMERIC_BUFFER_SIZE);
 854   const char *lc_numeric_end = new_lc_numeric_buf + MAX_NUMERIC_BUFFER_SIZE;
 855
 856   if (!new_lc_numeric_buf)
 857     return -1;
 858   char *lc_numeric_ptr = new_lc_numeric_buf;
 859   /* C.foo is just a copy of "C" with fixed charset. */
 860   if (!lcid)
 861     memcpy (_numeric_locale, _C_numeric_locale, sizeof (struct lc_numeric_T));
 862   else
 863     {
 864       /* decimal_point */
 865       _numeric_locale->wdecimal_point = getlocaleinfo (numeric, LOCALE_SDECIMAL);
 866       _numeric_locale->decimal_point = charfromwchar (numeric, wdecimal_point);
 867       /* thousands_sep */
 868       _numeric_locale->wthousands_sep = getlocaleinfo (numeric, LOCALE_STHOUSAND);
 869       _numeric_locale->thousands_sep = charfromwchar (numeric, wthousands_sep);
 870       /* grouping */
 871       _numeric_locale->grouping = conv_grouping (lcid, LOCALE_SGROUPING,
 872                                                  &lc_numeric_ptr);
 873     }
 874   /* codeset */
 875   _numeric_locale->codeset = lc_numeric_ptr;
 876   lc_numeric_ptr = stpcpy (lc_numeric_ptr, charset) + 1;
 877
 878   char *tmp = (char *) realloc (new_lc_numeric_buf,
 879                                 lc_numeric_ptr - new_lc_numeric_buf);
 880   if (!tmp)
 881     {
 882       free (new_lc_numeric_buf);
 883       return -1;
 884     }
 885   if (tmp != new_lc_numeric_buf)
 886     rebase_locale_buf (_numeric_locale, _numeric_locale + 1, tmp,
 887                        new_lc_numeric_buf, lc_numeric_ptr);
 888   if (*lc_numeric_buf)
 889     free (*lc_numeric_buf);
 890   *lc_numeric_buf = tmp;
 891   return 1;
 892 }
 893
 894 /* Called from newlib's setlocale() via __monetary_load_locale() if category
 895    is LC_MONETARY.  Returns LC_MONETARY values fetched from Windows locale data
 896    in the structure pointed to by _monetary_locale.  This is subsequently
 897    accessed by functions like nl_langinfo, localeconv, printf, etc. */
 898 extern "C" int
 899 __set_lc_monetary_from_win (const char *name,
 900                             const struct lc_monetary_T *_C_monetary_locale,
 901                             struct lc_monetary_T *_monetary_locale,
 902                             char **lc_monetary_buf, wctomb_p f_wctomb,
 903                             const char *charset)
 904 {
 905   LCID lcid = __get_lcid_from_locale (name);
 906   if (lcid == (LCID) -1)
 907     return lcid;
 908   if (!lcid && !strcmp (charset, "ASCII"))
 909     return 0;
 910
 911 # define MAX_MONETARY_BUFFER_SIZE       512
 912
 913   char *new_lc_monetary_buf = (char *) malloc (MAX_MONETARY_BUFFER_SIZE);
 914   const char *lc_monetary_end = new_lc_monetary_buf + MAX_MONETARY_BUFFER_SIZE;
 915
 916   if (!new_lc_monetary_buf)
 917     return -1;
 918   char *lc_monetary_ptr = new_lc_monetary_buf;
 919   /* C.foo is just a copy of "C" with fixed charset. */
 920   if (!lcid)
 921     memcpy (_monetary_locale, _C_monetary_locale, sizeof (struct lc_monetary_T));
 922   else
 923     {
 924       /* int_curr_symbol */
 925       _monetary_locale->wint_curr_symbol = getlocaleinfo (monetary,
 926                                                           LOCALE_SINTLSYMBOL);
 927       /* No spacing char means space. */
 928       if (!_monetary_locale->wint_curr_symbol[3])
 929         {
 930           wchar_t *wc = (wchar_t *) _monetary_locale->wint_curr_symbol + 3;
 931           *wc++ = L' ';
 932           *wc++ = L'\0';
 933           lc_monetary_ptr = (char *) wc;
 934         }
 935       _monetary_locale->int_curr_symbol = charfromwchar (monetary,
 936                                                          wint_curr_symbol);
 937       /* currency_symbol */
 938       _monetary_locale->wcurrency_symbol = getlocaleinfo (monetary,
 939                                                           LOCALE_SCURRENCY);
 940       /* As on Linux:  If the currency_symbol can't be represented in the
 941          given charset, use int_curr_symbol. */
 942       if (lc_wcstombs (f_wctomb, charset, NULL,
 943                        _monetary_locale->wcurrency_symbol,
 944                        0, true) == (size_t) -1)
 945         _monetary_locale->currency_symbol = _monetary_locale->int_curr_symbol;
 946       else
 947         _monetary_locale->currency_symbol = charfromwchar (monetary,
 948                                                            wcurrency_symbol);
 949       /* mon_decimal_point */
 950       _monetary_locale->wmon_decimal_point = getlocaleinfo (monetary,
 951                                                             LOCALE_SMONDECIMALSEP);
 952       _monetary_locale->mon_decimal_point = charfromwchar (monetary,
 953                                                            wmon_decimal_point);
 954       /* mon_thousands_sep */
 955       _monetary_locale->wmon_thousands_sep = getlocaleinfo (monetary,
 956                                                             LOCALE_SMONTHOUSANDSEP);
 957       _monetary_locale->mon_thousands_sep = charfromwchar (monetary,
 958                                                            wmon_thousands_sep);
 959       /* mon_grouping */
 960       _monetary_locale->mon_grouping = conv_grouping (lcid, LOCALE_SMONGROUPING,
 961                                                       &lc_monetary_ptr);
 962       /* positive_sign */
 963       _monetary_locale->wpositive_sign = getlocaleinfo (monetary,
 964                                                         LOCALE_SPOSITIVESIGN);
 965       _monetary_locale->positive_sign = charfromwchar (monetary, wpositive_sign);
 966       /* negative_sign */
 967       _monetary_locale->wnegative_sign = getlocaleinfo (monetary,
 968                                                         LOCALE_SNEGATIVESIGN);
 969       _monetary_locale->negative_sign = charfromwchar (monetary, wnegative_sign);
 970       /* int_frac_digits */
 971       *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IINTLCURRDIGITS);
 972       _monetary_locale->int_frac_digits = lc_monetary_ptr++;
 973       /* frac_digits */
 974       *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_ICURRDIGITS);
 975       _monetary_locale->frac_digits = lc_monetary_ptr++;
 976       /* p_cs_precedes and int_p_cs_precedes */
 977       *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSYMPRECEDES);
 978       _monetary_locale->p_cs_precedes
 979             = _monetary_locale->int_p_cs_precedes = lc_monetary_ptr++;
 980       /* p_sep_by_space and int_p_sep_by_space */
 981       *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSEPBYSPACE);
 982       _monetary_locale->p_sep_by_space
 983             = _monetary_locale->int_p_sep_by_space = lc_monetary_ptr++;
 984       /* n_cs_precedes and int_n_cs_precedes */
 985       *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSYMPRECEDES);
 986       _monetary_locale->n_cs_precedes
 987             = _monetary_locale->int_n_cs_precedes = lc_monetary_ptr++;
 988       /* n_sep_by_space and int_n_sep_by_space */
 989       *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSEPBYSPACE);
 990       _monetary_locale->n_sep_by_space
 991             = _monetary_locale->int_n_sep_by_space = lc_monetary_ptr++;
 992       /* p_sign_posn and int_p_sign_posn */
 993       *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_IPOSSIGNPOSN);
 994       _monetary_locale->p_sign_posn
 995             = _monetary_locale->int_p_sign_posn = lc_monetary_ptr++;
 996       /* n_sign_posn and int_n_sign_posn */
 997       *lc_monetary_ptr = (char) getlocaleint (lcid, LOCALE_INEGSIGNPOSN);
 998       _monetary_locale->n_sign_posn
 999             = _monetary_locale->int_n_sign_posn = lc_monetary_ptr++;
1000     }
1001   /* codeset */
1002   _monetary_locale->codeset = lc_monetary_ptr;
1003   lc_monetary_ptr = stpcpy (lc_monetary_ptr, charset) + 1;
1004
1005   char *tmp = (char *) realloc (new_lc_monetary_buf,
1006                                 lc_monetary_ptr - new_lc_monetary_buf);
1007   if (!tmp)
1008     {
1009       free (new_lc_monetary_buf);
1010       return -1;
1011     }
1012   if (tmp != new_lc_monetary_buf)
1013     rebase_locale_buf (_monetary_locale, _monetary_locale + 1, tmp,
1014                        new_lc_monetary_buf, lc_monetary_ptr);
1015   if (*lc_monetary_buf)
1016     free (*lc_monetary_buf);
1017   *lc_monetary_buf = tmp;
1018   return 1;
1019 }
1020
1021 extern "C" int
1022 __set_lc_messages_from_win (const char *name,
1023                             const struct lc_messages_T *_C_messages_locale,
1024                             struct lc_messages_T *_messages_locale,
1025                             char **lc_messages_buf,
1026                             wctomb_p f_wctomb, const char *charset)
1027 {
1028   LCID lcid = __get_lcid_from_locale (name);
1029   if (lcid == (LCID) -1)
1030     return lcid;
1031   if (!lcid && !strcmp (charset, "ASCII"))
1032     return 0;
1033
1034   char locale[ENCODING_LEN + 1];
1035   char *c, *c2;
1036   lc_msg_t *msg = NULL;
1037
1038   /* C.foo is just a copy of "C" with fixed charset. */
1039   if (!lcid)
1040     memcpy (_messages_locale, _C_messages_locale, sizeof (struct lc_messages_T));
1041   else
1042     {
1043       strcpy (locale, name);
1044       /* Removes the charset from the locale and attach the modifer to the
1045          language_TERRITORY part. */
1046       c = strchr (locale, '.');
1047       if (c)
1048         {
1049           *c = '\0';
1050           c2 = strchr (c + 1, '@');
1051           /* Ignore @cjknarrow modifier since it's a very personal thing between
1052              Cygwin and newlib... */
1053           if (c2 && strcmp (c2, "@cjknarrow"))
1054             memmove (c, c2, strlen (c2) + 1);
1055         }
1056       /* Now search in the alphabetically order lc_msg array for the
1057          locale. */
1058       lc_msg_t locale_key = { locale, NULL, NULL, NULL, NULL };
1059       msg = (lc_msg_t *) bsearch ((void *) &locale_key, (void *) lc_msg,
1060                                   sizeof lc_msg / sizeof *lc_msg,
1061                                   sizeof *lc_msg, locale_cmp);
1062       if (!msg)
1063         return 0;
1064     }
1065
1066   /* Evaluate string length in target charset.  Characters invalid in the
1067      target charset are simply ignored, as on Linux. */
1068   size_t len = 0;
1069   len += (strlen (charset) + 1);
1070   if (lcid)
1071     {
1072       len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesexpr, 0) + 1;
1073       len += lc_wcstombs (f_wctomb, charset, NULL, msg->noexpr, 0) + 1;
1074       len += lc_wcstombs (f_wctomb, charset, NULL, msg->yesstr, 0) + 1;
1075       len += lc_wcstombs (f_wctomb, charset, NULL, msg->nostr, 0) + 1;
1076       len += (wcslen (msg->yesexpr) + 1) * sizeof (wchar_t);
1077       len += (wcslen (msg->noexpr) + 1) * sizeof (wchar_t);
1078       len += (wcslen (msg->yesstr) + 1) * sizeof (wchar_t);
1079       len += (wcslen (msg->nostr) + 1) * sizeof (wchar_t);
1080       if (len % 1)
1081         ++len;
1082     }
1083   /* Allocate. */
1084   char *new_lc_messages_buf = (char *) malloc (len);
1085   const char *lc_messages_end = new_lc_messages_buf + len;
1086
1087   if (!new_lc_messages_buf)
1088     return -1;
1089   /* Copy over. */
1090   c = new_lc_messages_buf;
1091   /* codeset */
1092   _messages_locale->codeset = c;
1093   c = stpcpy (c, charset) + 1;
1094   if (lcid)
1095     {
1096       _messages_locale->yesexpr = (const char *) c;
1097       len = lc_wcstombs (f_wctomb, charset, c, msg->yesexpr, lc_messages_end - c);
1098       _messages_locale->noexpr = (const char *) (c += len + 1);
1099       len = lc_wcstombs (f_wctomb, charset, c, msg->noexpr, lc_messages_end - c);
1100       _messages_locale->yesstr = (const char *) (c += len + 1);
1101       len = lc_wcstombs (f_wctomb, charset, c, msg->yesstr, lc_messages_end - c);
1102       _messages_locale->nostr = (const char *) (c += len + 1);
1103       len = lc_wcstombs (f_wctomb, charset, c, msg->nostr, lc_messages_end - c);
1104       c += len + 1;
1105       if ((uintptr_t) c % 1)
1106         ++c;
1107       wchar_t *wc = (wchar_t *) c;
1108       _messages_locale->wyesexpr = (const wchar_t *) wc;
1109       wc = wcpcpy (wc, msg->yesexpr) + 1;
1110       _messages_locale->wnoexpr = (const wchar_t *) wc;
1111       wc = wcpcpy (wc, msg->noexpr) + 1;
1112       _messages_locale->wyesstr = (const wchar_t *) wc;
1113       wc = wcpcpy (wc, msg->yesstr) + 1;
1114       _messages_locale->wnostr = (const wchar_t *) wc;
1115       wcpcpy (wc, msg->nostr);
1116     }
1117   /* Aftermath. */
1118   if (*lc_messages_buf)
1119     free (*lc_messages_buf);
1120   *lc_messages_buf = new_lc_messages_buf;
1121   return 1;
1122 }
1123
1124 LCID collate_lcid = 0;
1125 static mbtowc_p collate_mbtowc = __ascii_mbtowc;
1126 char collate_charset[ENCODING_LEN + 1] = "ASCII";
1127
1128 /* Called from newlib's setlocale() if category is LC_COLLATE.  Stores
1129    LC_COLLATE locale information.  This is subsequently accessed by the
1130    below functions strcoll, strxfrm, wcscoll, wcsxfrm. */
1131 extern "C" int
1132 __collate_load_locale (const char *name, mbtowc_p f_mbtowc, const char *charset)
1133 {
1134   LCID lcid = __get_lcid_from_locale (name);
1135   if (lcid == (LCID) -1)
1136     return -1;
1137   collate_lcid = lcid;
1138   collate_mbtowc = f_mbtowc;
1139   stpcpy (collate_charset, charset);
1140   return 0;
1141 }
1142
1143 extern "C" const char *
1144 __get_current_collate_codeset (void)
1145 {
1146   return collate_charset;
1147 }
1148
1149 /* We use the Windows functions for locale-specific string comparison and
1150    transformation.  The advantage is that we don't need any files with
1151    collation information. */
1152 extern "C" int
1153 wcscoll (const wchar_t *ws1, const wchar_t *ws2)
1154 {
1155   int ret;
1156
1157   if (!collate_lcid)
1158     return wcscmp (ws1, ws2);
1159   ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1);
1160   if (!ret)
1161     set_errno (EINVAL);
1162   return ret - CSTR_EQUAL;
1163 }
1164
1165 extern "C" int
1166 strcoll (const char *s1, const char *s2)
1167 {
1168   size_t n1, n2;
1169   wchar_t *ws1, *ws2;
1170   tmp_pathbuf tp;
1171   int ret;
1172
1173   if (!collate_lcid)
1174     return strcmp (s1, s2);
1175   /* The ANSI version of CompareString uses the default charset of the lcid,
1176      so we must use the Unicode version. */
1177   n1 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s1, 0) + 1;
1178   ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t))
1179                           : tp.w_get ());
1180   lc_mbstowcs (collate_mbtowc, collate_charset, ws1, s1, n1);
1181   n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1;
1182   ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
1183                           : tp.w_get ());
1184   lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2);
1185   ret = CompareStringW (collate_lcid, 0, ws1, -1, ws2, -1);
1186   if (n1 > NT_MAX_PATH)
1187     free (ws1);
1188   if (n2 > NT_MAX_PATH)
1189     free (ws2);
1190   if (!ret)
1191     set_errno (EINVAL);
1192   return ret - CSTR_EQUAL;
1193 }
1194
1195 extern "C" size_t
1196 wcsxfrm (wchar_t *ws1, const wchar_t *ws2, size_t wsn)
1197 {
1198   size_t ret;
1199
1200   if (!collate_lcid)
1201     return wcslcpy (ws1, ws2, wsn);
1202   ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY | LCMAP_BYTEREV,
1203                       ws2, -1, ws1, wsn * sizeof (wchar_t));
1204   /* LCMapStringW returns byte count including the terminating NUL character,
1205      wcsxfrm is supposed to return length in wchar_t excluding the NUL.
1206      Since the array is only single byte NUL-terminated we must make sure
1207      the result is wchar_t-NUL terminated. */
1208   if (ret)
1209     {
1210       ret = (ret + 1) / sizeof (wchar_t);
1211       if (ret >= wsn)
1212         return wsn;
1213       ws1[ret] = L'\0';
1214       return ret;
1215     }
1216   if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
1217     set_errno (EINVAL);
1218   return wsn;
1219 }
1220
1221 extern "C" size_t
1222 strxfrm (char *s1, const char *s2, size_t sn)
1223 {
1224   size_t ret;
1225   size_t n2;
1226   wchar_t *ws2;
1227   tmp_pathbuf tp;
1228
1229   if (!collate_lcid)
1230     return strlcpy (s1, s2, sn);
1231   /* The ANSI version of LCMapString uses the default charset of the lcid,
1232      so we must use the Unicode version. */
1233   n2 = lc_mbstowcs (collate_mbtowc, collate_charset, NULL, s2, 0) + 1;
1234   ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
1235                           : tp.w_get ());
1236   lc_mbstowcs (collate_mbtowc, collate_charset, ws2, s2, n2);
1237   /* The sort key is a NUL-terminated byte string. */
1238   ret = LCMapStringW (collate_lcid, LCMAP_SORTKEY, ws2, -1, (PWCHAR) s1, sn);
1239   if (n2 > NT_MAX_PATH)
1240     free (ws2);
1241   if (ret == 0)
1242     {
1243       if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
1244         set_errno (EINVAL);
1245       return sn;
1246     }
1247   /* LCMapStringW returns byte count including the terminating NUL character.
1248      strxfrm is supposed to return length excluding the NUL. */
1249   return ret - 1;
1250 }
1251
1252 /* Fetch default ANSI codepage from locale info and generate a setlocale
1253    compatible character set code.  Called from newlib's setlocale(), if the
1254    charset isn't given explicitely in the POSIX compatible locale specifier. */
1255 extern "C" void
1256 __set_charset_from_locale (const char *locale, char *charset)
1257 {
1258   UINT cp;
1259   LCID lcid = __get_lcid_from_locale (locale);
1260   wchar_t wbuf[9];
1261
1262   /* "C" locale, or invalid locale? */
1263   if (lcid == 0 || lcid == (LCID) -1)
1264     cp = 20127;
1265   else if (!GetLocaleInfoW (lcid,
1266                             LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
1267                             (PWCHAR) &cp, sizeof cp))
1268     cp = 0;
1269   /* Translate codepage and lcid to a charset closely aligned with the default
1270      charsets defined in Glibc. */
1271   const char *cs;
1272   const char *modifier = strchr (locale, '@') ?: "";
1273   switch (cp)
1274     {
1275     case 20127:
1276       cs = "ASCII";
1277       break;
1278     case 874:
1279       cs = "CP874";
1280       break;
1281     case 932:
1282       cs = "EUCJP";
1283       break;
1284     case 936:
1285       cs = "GB2312";
1286       break;
1287     case 949:
1288       cs = "EUCKR";
1289       break;
1290     case 950:
1291       cs = "BIG5";
1292       break;
1293     case 1250:
1294       if (lcid == 0x081a                /* sr_CS (Serbian Language/Former
1295                                                   Serbia and Montenegro) */
1296           || lcid == 0x181a             /* sr_BA (Serbian Language/Bosnia
1297                                                   and Herzegovina) */
1298           || lcid == 0x241a             /* sr_RS (Serbian Language/Serbia) */
1299           || lcid == 0x2c1a             /* sr_ME (Serbian Language/Montenegro)*/
1300           || lcid == 0x0442)            /* tk_TM (Turkmen/Turkmenistan) */
1301         cs = "UTF-8";
1302       else if (lcid == 0x041c)          /* sq_AL (Albanian/Albania) */
1303         cs = "ISO-8859-1";
1304       else
1305         cs = "ISO-8859-2";
1306       break;
1307     case 1251:
1308       if (lcid == 0x0c1a                /* sr_CS (Serbian Language/Former
1309                                                   Serbia and Montenegro) */
1310           || lcid == 0x1c1a             /* sr_BA (Serbian Language/Bosnia
1311                                                   and Herzegovina) */
1312           || lcid == 0x281a             /* sr_RS (Serbian Language/Serbia) */
1313           || lcid == 0x301a             /* sr_ME (Serbian Language/Montenegro)*/
1314           || lcid == 0x0440             /* ky_KG (Kyrgyz/Kyrgyzstan) */
1315           || lcid == 0x0843             /* uz_UZ (Uzbek/Uzbekistan) */
1316                                         /* tt_RU (Tatar/Russia),
1317                                                  IQTElif alphabet */
1318           || (lcid == 0x0444 && has_modifier ("@iqtelif"))
1319           || lcid == 0x0450)            /* mn_MN (Mongolian/Mongolia) */
1320         cs = "UTF-8";
1321       else if (lcid == 0x0423)          /* be_BY (Belarusian/Belarus) */
1322         cs = has_modifier ("@latin") ? "UTF-8" : "CP1251";
1323       else if (lcid == 0x0402)          /* bg_BG (Bulgarian/Bulgaria) */
1324         cs = "CP1251";
1325       else if (lcid == 0x0422)          /* uk_UA (Ukrainian/Ukraine) */
1326         cs = "KOI8-U";
1327       else
1328         cs = "ISO-8859-5";
1329       break;
1330     case 1252:
1331       if (lcid == 0x0452)               /* cy_GB (Welsh/Great Britain) */
1332         cs = "ISO-8859-14";
1333       else if (lcid == 0x4009           /* en_IN (English/India) */
1334                || lcid == 0x0464        /* fil_PH (Filipino/Philippines) */
1335                || lcid == 0x0462        /* fy_NL (Frisian/Netherlands) */
1336                || lcid == 0x0468        /* ha_NG (Hausa/Nigeria) */
1337                || lcid == 0x0470        /* ig_NG (Igbo/Nigeria) */
1338                || lcid == 0x046c        /* nso_ZA (Northern Sotho/South Africa) */
1339                || lcid == 0x0487        /* rw_RW (Kinyarwanda/Rwanda) */
1340                || lcid == 0x043b        /* se_NO (Northern Saami/Norway) */
1341                || lcid == 0x0432        /* tn_ZA (Tswana/South Africa) */
1342                || lcid == 0x0488        /* wo_SN (Wolof/Senegal) */
1343                || lcid == 0x046a        /* yo_NG (Yoruba/Nigeria) */
1344                || lcid == 0x085d)       /* iu_CA (Inuktitut/Canada) */
1345         cs = "UTF-8";
1346       else if (lcid == 0x042e)          /* hsb_DE (Upper Sorbian/Germany) */
1347         cs = "ISO-8859-2";
1348       else if (lcid == 0x0491           /* gd_GB (Scots Gaelic/Great Britain) */
1349                || (has_modifier ("@euro")
1350                    && GetLocaleInfoW (lcid, LOCALE_SINTLSYMBOL, wbuf, 9)
1351                    && !wcsncmp (wbuf, L"EUR", 3)))
1352         cs = "ISO-8859-15";
1353       else
1354         cs = "ISO-8859-1";
1355       break;
1356     case 1253:
1357       cs = "ISO-8859-7";
1358       break;
1359     case 1254:
1360       if (lcid == 0x042c)               /* az_AZ (Azeri/Azerbaijan) */
1361         cs = "UTF-8";
1362       else if (lcid == 0x0443)          /* uz_UZ (Uzbek/Uzbekistan) */
1363         cs = "ISO-8859-1";
1364       else
1365         cs = "ISO-8859-9";
1366       break;
1367     case 1255:
1368       cs = "ISO-8859-8";
1369       break;
1370     case 1256:
1371       if (lcid == 0x0429                /* fa_IR (Persian/Iran) */
1372           || lcid == 0x0480             /* ug_CN (Uyghur/China) */
1373           || lcid == 0x0420)            /* ur_PK (Urdu/Pakistan) */
1374         cs = "UTF-8";
1375       else
1376         cs = "ISO-8859-6";
1377       break;
1378     case 1257:
1379       if (lcid == 0x0425)               /* et_EE (Estonian/Estonia) */
1380         cs = "ISO-8859-15";
1381       else
1382         cs = "ISO-8859-13";
1383       break;
1384     case 1258:
1385     default:
1386       if (lcid == 0x0481)               /* mi_NZ (Maori/New Zealand) */
1387         cs = "ISO-8859-13";
1388       else if (lcid == 0x043a)          /* mt_MT (Maltese/Malta) */
1389         cs = "ISO-8859-3";
1390       else if (lcid == 0x0437)          /* ka_GE (Georgian/Georgia) */
1391         cs = "GEORGIAN-PS";
1392       else if (lcid == 0x043f)          /* kk_KZ (Kazakh/Kazakhstan) */
1393         cs = "PT154";
1394       else
1395         cs = "UTF-8";
1396       break;
1397     }
1398   stpcpy (charset, cs);
1399 }
1400
1401 /* This function is called from newlib's loadlocale if the locale identifier
1402    was invalid, one way or the other.  It looks for the file
1403
1404      /usr/share/locale/locale.alias
1405
1406    which is part of the gettext package, and if it finds the locale alias
1407    in that file, it replaces the locale with the correct locale string from
1408    that file.
1409
1410    If successful, it returns a pointer to new_locale, NULL otherwise.*/
1411 extern "C" char *
1412 __set_locale_from_locale_alias (const char *locale, char *new_locale)
1413 {
1414   wchar_t wlocale[ENCODING_LEN + 1];
1415   wchar_t walias[ENCODING_LEN + 1];
1416 #define LOCALE_ALIAS_LINE_LEN 255
1417   char alias_buf[LOCALE_ALIAS_LINE_LEN + 1], *c;
1418   wchar_t *wc;
1419   const char *alias, *replace;
1420   char *ret = NULL;
1421
1422   FILE *fp = fopen ("/usr/share/locale/locale.alias", "rt");
1423   if (!fp)
1424     return NULL;
1425   /* The incoming locale is given in the application charset, or in
1426      the Cygwin internal charset.  We try both. */
1427   if (mbstowcs (wlocale, locale, ENCODING_LEN + 1) == (size_t) -1)
1428     sys_mbstowcs (wlocale, ENCODING_LEN + 1, locale);
1429   wlocale[ENCODING_LEN] = L'\0';
1430   /* Ignore @cjknarrow modifier since it's a very personal thing between
1431      Cygwin and newlib... */
1432   if ((wc = wcschr (wlocale, L'@')) && !wcscmp (wc + 1, L"cjknarrow"))
1433     *wc = L'\0';
1434   while (fgets (alias_buf, LOCALE_ALIAS_LINE_LEN + 1, fp))
1435     {
1436       alias_buf[LOCALE_ALIAS_LINE_LEN] = '\0';
1437       c = strrchr (alias_buf, '\n');
1438       if (c)
1439         *c = '\0';
1440       c = alias_buf;
1441       c += strspn (c, " \t");
1442       if (!*c || *c == '#')
1443         continue;
1444       alias = c;
1445       c += strcspn (c, " \t");
1446       *c++ = '\0';
1447       c += strspn (c, " \t");
1448       if (*c == '#')
1449         continue;
1450       replace = c;
1451       c += strcspn (c, " \t");
1452       *c++ = '\0';
1453       if (strlen (replace) > ENCODING_LEN)
1454         continue;
1455       /* The file is latin1 encoded */
1456       lc_mbstowcs (__iso_mbtowc, "ISO-8859-1", walias, alias, ENCODING_LEN + 1);
1457       walias[ENCODING_LEN] = L'\0';
1458       if (!wcscmp (wlocale, walias))
1459         {
1460           ret = strcpy (new_locale, replace);
1461           break;
1462         }
1463     }
1464   fclose (fp);
1465   return ret;
1466 }
1467
1468 static char *
1469 check_codepage (char *ret)
1470 {
1471   if (!wincap.has_always_all_codepages ())
1472     {
1473       /* Prior to Windows Vista, many codepages are not installed by
1474          default, or can be deinstalled.  The following codepages require
1475          that the respective conversion tables are installed into the OS.
1476          So we check if they are installed and if not, setlocale should
1477          fail. */
1478       CPINFO cpi;
1479       UINT cp = 0;
1480       if (__mbtowc == __sjis_mbtowc)
1481         cp = 932;
1482       else if (__mbtowc == __eucjp_mbtowc)
1483         cp = 20932;
1484       else if (__mbtowc == __gbk_mbtowc)
1485         cp = 936;
1486       else if (__mbtowc == __kr_mbtowc)
1487         cp = 949;
1488       else if (__mbtowc == __big5_mbtowc)
1489         cp = 950;
1490       if (cp && !GetCPInfo (cp, &cpi)
1491           && GetLastError () == ERROR_INVALID_PARAMETER)
1492         return NULL;
1493     }
1494   return ret;
1495 }
1496
1497 /* Can be called via cygwin_internal (CW_INTERNAL_SETLOCALE) for application
1498    which really (think they) know what they are doing. */
1499 extern "C" void
1500 internal_setlocale ()
1501 {
1502   /* Each setlocale from the environment potentially changes the
1503      multibyte representation of the CWD.  Therefore we have to
1504      reevaluate the CWD's posix path and store in the new charset.
1505      Same for the PATH environment variable. */
1506   /* FIXME: Other buffered paths might be affected as well. */
1507   /* FIXME: It could be necessary to convert the entire environment,
1508             not just PATH. */
1509   tmp_pathbuf tp;
1510   char *path;
1511   wchar_t *w_path = NULL, *w_cwd;
1512
1513   /* Don't do anything if the charset hasn't actually changed. */
1514   if (strcmp (cygheap->locale.charset, __locale_charset ()) == 0)
1515     return;
1516
1517   debug_printf ("Cygwin charset changed from %s to %s",
1518                 cygheap->locale.charset, __locale_charset ());
1519   /* Fetch PATH and CWD and convert to wchar_t in previous charset. */
1520   path = getenv ("PATH");
1521   if (path && *path)    /* $PATH can be potentially unset. */
1522     {
1523       w_path = tp.w_get ();
1524       sys_mbstowcs (w_path, 32768, path);
1525     }
1526   w_cwd = tp.w_get ();
1527   cwdstuff::cwd_lock.acquire ();
1528   sys_mbstowcs (w_cwd, 32768, cygheap->cwd.get_posix ());
1529   /* Set charset for internal conversion functions. */
1530   if (*__locale_charset () == 'A'/*SCII*/)
1531     {
1532       cygheap->locale.mbtowc = __utf8_mbtowc;
1533       cygheap->locale.wctomb = __utf8_wctomb;
1534     }
1535   else
1536     {
1537       cygheap->locale.mbtowc = __mbtowc;
1538       cygheap->locale.wctomb = __wctomb;
1539     }
1540   strcpy (cygheap->locale.charset, __locale_charset ());
1541   /* Restore CWD and PATH in new charset. */
1542   cygheap->cwd.reset_posix (w_cwd);
1543   cwdstuff::cwd_lock.release ();
1544   if (w_path)
1545     {
1546       char *c_path = tp.c_get ();
1547       sys_wcstombs (c_path, 32768, w_path);
1548       setenv ("PATH", c_path, 1);
1549     }
1550 }
1551
1552 /* Called from dll_crt0_1, before fetching the command line from Windows.
1553    Set the internal charset according to the environment locale settings.
1554    Check if a required codepage is available, and only switch internal
1555    charset if so.
1556    Make sure to reset the application locale to "C" per POSIX. */
1557 void
1558 initial_setlocale ()
1559 {
1560   char *ret = _setlocale_r (_REENT, LC_CTYPE, "");
1561   if (ret && check_codepage (ret))
1562     internal_setlocale ();
1563 }
1564
1565 /* Like newlib's setlocale, but additionally check if the charset needs
1566    OS support and the required codepage is actually installed.  If codepage
1567    is not available, revert to previous locale and return NULL.  For details
1568    about codepage availability, see the comment in check_codepage() above. */
1569 extern "C" char *
1570 setlocale (int category, const char *locale)
1571 {
1572   char old[(LC_MESSAGES + 1) * (ENCODING_LEN + 1/*"/"*/ + 1)];
1573   if (locale && !wincap.has_always_all_codepages ())
1574     stpcpy (old, _setlocale_r (_REENT, category, NULL));
1575   char *ret = _setlocale_r (_REENT, category, locale);
1576   if (ret && locale && !(ret = check_codepage (ret)))
1577     _setlocale_r (_REENT, category, old);
1578   return ret;
1579 }