1 /*-----------------------------------------------------------------------
3 * PostgreSQL locale utilities
5 * Portions Copyright (c) 2002-2011, PostgreSQL Global Development Group
7 * src/backend/utils/adt/pg_locale.c
9 *-----------------------------------------------------------------------
13 * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14 * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15 * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16 * toupper(), etc. are always in the same fixed locale.
18 * LC_MESSAGES is settable at run time and will take effect
21 * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22 * settable at run-time. However, we don't actually set those locale
23 * categories permanently. This would have bizarre effects like no
24 * longer accepting standard floating-point literals in some locales.
25 * Instead, we only set the locales briefly when needed, cache the
26 * required information obtained from localeconv(), and set them back.
27 * The cached information is only used by the formatting functions
28 * (to_char, etc.) and the money type. For the user, this should all be
31 * !!! NOW HEAR THIS !!!
33 * We've been bitten repeatedly by this bug, so let's try to keep it in
34 * mind in future: on some platforms, the locale functions return pointers
35 * to static data that will be overwritten by any later locale function.
36 * Thus, for example, the obvious-looking sequence
37 * save = setlocale(category, NULL);
38 * if (!setlocale(category, value))
40 * setlocale(category, save);
41 * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
42 * will change the memory save is pointing at. To do this sort of thing
43 * safely, you *must* pstrdup what setlocale returns the first time.
45 * FYI, The Open Group locale standard is defined here:
47 * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
57 #include "catalog/pg_collation.h"
58 #include "catalog/pg_control.h"
59 #include "mb/pg_wchar.h"
60 #include "utils/hsearch.h"
61 #include "utils/memutils.h"
62 #include "utils/pg_locale.h"
63 #include "utils/syscache.h"
67 * This Windows file defines StrNCpy. We don't need it here, so we undefine
68 * it to keep the compiler quiet, and undefine it again after the file is
69 * included, so we don't accidentally use theirs.
78 #define MAX_L10N_DATA 80
82 char *locale_messages;
83 char *locale_monetary;
87 /* lc_time localization cache */
88 char *localized_abbrev_days[7];
89 char *localized_full_days[7];
90 char *localized_abbrev_months[12];
91 char *localized_full_months[12];
93 /* indicates whether locale information cache is valid */
94 static bool CurrentLocaleConvValid = false;
95 static bool CurrentLCTimeValid = false;
97 /* Environment variable storage area */
99 #define LC_ENV_BUFSIZE (NAMEDATALEN + 20)
101 static char lc_collate_envbuf[LC_ENV_BUFSIZE];
102 static char lc_ctype_envbuf[LC_ENV_BUFSIZE];
105 static char lc_messages_envbuf[LC_ENV_BUFSIZE];
107 static char lc_monetary_envbuf[LC_ENV_BUFSIZE];
108 static char lc_numeric_envbuf[LC_ENV_BUFSIZE];
109 static char lc_time_envbuf[LC_ENV_BUFSIZE];
111 /* Cache for collation-related knowledge */
115 Oid collid; /* hash key: pg_collation OID */
116 bool collate_is_c; /* is collation's LC_COLLATE C? */
117 bool ctype_is_c; /* is collation's LC_CTYPE C? */
118 bool flags_valid; /* true if above flags are valid */
119 pg_locale_t locale; /* locale_t struct, or 0 if not valid */
120 } collation_cache_entry;
122 static HTAB *collation_cache = NULL;
125 #if defined(WIN32) && defined(LC_MESSAGES)
126 static char *IsoLocaleName(const char *); /* MSVC specific */
133 * This is identical to the libc function setlocale(), with the addition
134 * that if the operation is successful, the corresponding LC_XXX environment
135 * variable is set to match. By setting the environment variable, we ensure
136 * that any subsequent use of setlocale(..., "") will preserve the settings
137 * made through this routine. Of course, LC_ALL must also be unset to fully
138 * ensure that, but that has to be done elsewhere after all the individual
139 * LC_XXX variables have been set correctly. (Thank you Perl for making this
143 pg_perm_setlocale(int category, const char *locale)
150 result = setlocale(category, locale);
154 * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
155 * the given value is good and set it in the environment variables. We
156 * must ignore attempts to set to "", which means "keep using the old
157 * environment value".
160 if (category == LC_MESSAGES)
162 result = (char *) locale;
163 if (locale == NULL || locale[0] == '\0')
168 result = setlocale(category, locale);
172 return result; /* fall out immediately on failure */
177 envvar = "LC_COLLATE";
178 envbuf = lc_collate_envbuf;
182 envbuf = lc_ctype_envbuf;
186 envvar = "LC_MESSAGES";
187 envbuf = lc_messages_envbuf;
189 result = IsoLocaleName(locale);
191 result = (char *) locale;
194 #endif /* LC_MESSAGES */
196 envvar = "LC_MONETARY";
197 envbuf = lc_monetary_envbuf;
200 envvar = "LC_NUMERIC";
201 envbuf = lc_numeric_envbuf;
205 envbuf = lc_time_envbuf;
208 elog(FATAL, "unrecognized LC category: %d", category);
209 envvar = NULL; /* keep compiler quiet */
214 snprintf(envbuf, LC_ENV_BUFSIZE - 1, "%s=%s", envvar, result);
224 * Is the locale name valid for the locale category?
227 check_locale(int category, const char *value)
232 save = setlocale(category, NULL);
234 return false; /* won't happen, we hope */
236 /* save may be pointing at a modifiable scratch variable, see above */
237 save = pstrdup(save);
239 /* set the locale with setlocale, to see if it accepts it. */
240 ret = (setlocale(category, value) != NULL);
242 setlocale(category, save); /* assume this won't fail */
250 * GUC check/assign hooks
252 * For most locale categories, the assign hook doesn't actually set the locale
253 * permanently, just reset flags so that the next use will cache the
254 * appropriate values. (See explanation at the top of this file.)
256 * Note: we accept value = "" as selecting the postmaster's environment
257 * value, whatever it was (so long as the environment setting is legal).
258 * This will have been locked down by an earlier call to pg_perm_setlocale.
261 check_locale_monetary(char **newval, void **extra, GucSource source)
263 return check_locale(LC_MONETARY, *newval);
267 assign_locale_monetary(const char *newval, void *extra)
269 CurrentLocaleConvValid = false;
273 check_locale_numeric(char **newval, void **extra, GucSource source)
275 return check_locale(LC_NUMERIC, *newval);
279 assign_locale_numeric(const char *newval, void *extra)
281 CurrentLocaleConvValid = false;
285 check_locale_time(char **newval, void **extra, GucSource source)
287 return check_locale(LC_TIME, *newval);
291 assign_locale_time(const char *newval, void *extra)
293 CurrentLCTimeValid = false;
297 * We allow LC_MESSAGES to actually be set globally.
299 * Note: we normally disallow value = "" because it wouldn't have consistent
300 * semantics (it'd effectively just use the previous value). However, this
301 * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
302 * not even if the attempted setting fails due to invalid environment value.
303 * The idea there is just to accept the environment setting *if possible*
304 * during startup, until we can read the proper value from postgresql.conf.
307 check_locale_messages(char **newval, void **extra, GucSource source)
309 if (**newval == '\0')
311 if (source == PGC_S_DEFAULT)
318 * LC_MESSAGES category does not exist everywhere, but accept it anyway
320 * On Windows, we can't even check the value, so accept blindly
322 #if defined(LC_MESSAGES) && !defined(WIN32)
323 return check_locale(LC_MESSAGES, *newval);
330 assign_locale_messages(const char *newval, void *extra)
333 * LC_MESSAGES category does not exist everywhere, but accept it anyway.
334 * We ignore failure, as per comment above.
337 (void) pg_perm_setlocale(LC_MESSAGES, newval);
343 * Frees the malloced content of a struct lconv. (But not the struct
347 free_struct_lconv(struct lconv * s)
352 if (s->currency_symbol)
353 free(s->currency_symbol);
354 if (s->decimal_point)
355 free(s->decimal_point);
358 if (s->thousands_sep)
359 free(s->thousands_sep);
360 if (s->int_curr_symbol)
361 free(s->int_curr_symbol);
362 if (s->mon_decimal_point)
363 free(s->mon_decimal_point);
365 free(s->mon_grouping);
366 if (s->mon_thousands_sep)
367 free(s->mon_thousands_sep);
368 if (s->negative_sign)
369 free(s->negative_sign);
370 if (s->positive_sign)
371 free(s->positive_sign);
376 * Return a strdup'ed string converted from the specified encoding to the
380 db_encoding_strdup(int encoding, const char *str)
385 /* convert the string to the database encoding */
386 pstr = (char *) pg_do_encoding_conversion(
387 (unsigned char *) str, strlen(str),
388 encoding, GetDatabaseEncoding());
398 * Return the POSIX lconv struct (contains number/money formatting
399 * information) with locale information for all categories.
402 PGLC_localeconv(void)
404 static struct lconv CurrentLocaleConv;
405 struct lconv *extlconv;
406 char *save_lc_monetary;
407 char *save_lc_numeric;
417 /* Did we do it already? */
418 if (CurrentLocaleConvValid)
419 return &CurrentLocaleConv;
421 free_struct_lconv(&CurrentLocaleConv);
423 /* Save user's values of monetary and numeric locales */
424 save_lc_monetary = setlocale(LC_MONETARY, NULL);
425 if (save_lc_monetary)
426 save_lc_monetary = pstrdup(save_lc_monetary);
428 save_lc_numeric = setlocale(LC_NUMERIC, NULL);
430 save_lc_numeric = pstrdup(save_lc_numeric);
435 * Ideally, monetary and numeric local symbols could be returned in any
436 * server encoding. Unfortunately, the WIN32 API does not allow
437 * setlocale() to return values in a codepage/CTYPE that uses more than
438 * two bytes per character, like UTF-8:
440 * http://msdn.microsoft.com/en-us/library/x99tb11d.aspx
442 * Evidently, LC_CTYPE allows us to control the encoding used for strings
443 * returned by localeconv(). The Open Group standard, mentioned at the
444 * top of this C file, doesn't explicitly state this.
446 * Therefore, we set LC_CTYPE to match LC_NUMERIC or LC_MONETARY (which
447 * cannot be UTF8), call localeconv(), and then convert from the
448 * numeric/monitary LC_CTYPE to the server encoding. One example use of
449 * this is for the Euro symbol.
451 * Perhaps someday we will use GetLocaleInfoW() which returns values in
452 * UTF16 and convert from that.
455 /* save user's value of ctype locale */
456 save_lc_ctype = setlocale(LC_CTYPE, NULL);
458 save_lc_ctype = pstrdup(save_lc_ctype);
460 /* use numeric to set the ctype */
461 setlocale(LC_CTYPE, locale_numeric);
464 /* Get formatting information for numeric */
465 setlocale(LC_NUMERIC, locale_numeric);
466 extlconv = localeconv();
467 encoding = pg_get_encoding_from_locale(locale_numeric, true);
469 decimal_point = db_encoding_strdup(encoding, extlconv->decimal_point);
470 thousands_sep = db_encoding_strdup(encoding, extlconv->thousands_sep);
471 grouping = strdup(extlconv->grouping);
474 /* use monetary to set the ctype */
475 setlocale(LC_CTYPE, locale_monetary);
478 /* Get formatting information for monetary */
479 setlocale(LC_MONETARY, locale_monetary);
480 extlconv = localeconv();
481 encoding = pg_get_encoding_from_locale(locale_monetary, true);
484 * Must copy all values since restoring internal settings may overwrite
485 * localeconv()'s results.
487 CurrentLocaleConv = *extlconv;
488 CurrentLocaleConv.decimal_point = decimal_point;
489 CurrentLocaleConv.grouping = grouping;
490 CurrentLocaleConv.thousands_sep = thousands_sep;
491 CurrentLocaleConv.int_curr_symbol = db_encoding_strdup(encoding, extlconv->int_curr_symbol);
492 CurrentLocaleConv.currency_symbol = db_encoding_strdup(encoding, extlconv->currency_symbol);
493 CurrentLocaleConv.mon_decimal_point = db_encoding_strdup(encoding, extlconv->mon_decimal_point);
494 CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping);
495 CurrentLocaleConv.mon_thousands_sep = db_encoding_strdup(encoding, extlconv->mon_thousands_sep);
496 CurrentLocaleConv.negative_sign = db_encoding_strdup(encoding, extlconv->negative_sign);
497 CurrentLocaleConv.positive_sign = db_encoding_strdup(encoding, extlconv->positive_sign);
499 /* Try to restore internal settings */
500 if (save_lc_monetary)
502 setlocale(LC_MONETARY, save_lc_monetary);
503 pfree(save_lc_monetary);
508 setlocale(LC_NUMERIC, save_lc_numeric);
509 pfree(save_lc_numeric);
513 /* Try to restore internal ctype settings */
516 setlocale(LC_CTYPE, save_lc_ctype);
517 pfree(save_lc_ctype);
521 CurrentLocaleConvValid = true;
522 return &CurrentLocaleConv;
527 * On WIN32, strftime() returns the encoding in CP_ACP (the default
528 * operating system codpage for that computer), which is likely different
529 * from SERVER_ENCODING. This is especially important in Japanese versions
530 * of Windows which will use SJIS encoding, which we don't support as a
533 * So, instead of using strftime(), use wcsftime() to return the value in
534 * wide characters (internally UTF16) and then convert it to the appropriate
537 * Note that this only affects the calls to strftime() in this file, which are
538 * used to get the locale-aware strings. Other parts of the backend use
539 * pg_strftime(), which isn't locale-aware and does not need to be replaced.
542 strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm * tm)
545 wchar_t wbuf[MAX_L10N_DATA];
548 encoding = GetDatabaseEncoding();
550 len = wcsftime(wbuf, MAX_L10N_DATA, format, tm);
554 * strftime call failed - return 0 with the contents of dst
559 len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen, NULL, NULL);
562 "could not convert string to UTF-8:error %lu", GetLastError());
565 if (encoding != PG_UTF8)
568 (char *) pg_do_encoding_conversion((unsigned char *) dst,
569 len, PG_UTF8, encoding);
573 strlcpy(dst, convstr, dstlen);
581 /* redefine strftime() */
582 #define strftime(a,b,c,d) strftime_win32(a,b,L##c,d)
587 * Update the lc_time localization cache variables if needed.
590 cache_locale_time(void)
595 char buf[MAX_L10N_DATA];
603 /* did we do this already? */
604 if (CurrentLCTimeValid)
607 elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
609 /* save user's value of time locale */
610 save_lc_time = setlocale(LC_TIME, NULL);
612 save_lc_time = pstrdup(save_lc_time);
617 * On WIN32, there is no way to get locale-specific time values in a
618 * specified locale, like we do for monetary/numeric. We can only get
619 * CP_ACP (see strftime_win32) or UTF16. Therefore, we get UTF16 and
620 * convert it to the database locale. However, wcsftime() internally uses
621 * LC_CTYPE, so we set it here. See the WIN32 comment near the top of
625 /* save user's value of ctype locale */
626 save_lc_ctype = setlocale(LC_CTYPE, NULL);
628 save_lc_ctype = pstrdup(save_lc_ctype);
630 /* use lc_time to set the ctype */
631 setlocale(LC_CTYPE, locale_time);
634 setlocale(LC_TIME, locale_time);
636 timenow = time(NULL);
637 timeinfo = localtime(&timenow);
640 for (i = 0; i < 7; i++)
642 timeinfo->tm_wday = i;
643 strftime(buf, MAX_L10N_DATA, "%a", timeinfo);
644 ptr = MemoryContextStrdup(TopMemoryContext, buf);
645 if (localized_abbrev_days[i])
646 pfree(localized_abbrev_days[i]);
647 localized_abbrev_days[i] = ptr;
649 strftime(buf, MAX_L10N_DATA, "%A", timeinfo);
650 ptr = MemoryContextStrdup(TopMemoryContext, buf);
651 if (localized_full_days[i])
652 pfree(localized_full_days[i]);
653 localized_full_days[i] = ptr;
656 /* localized months */
657 for (i = 0; i < 12; i++)
659 timeinfo->tm_mon = i;
660 timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
661 strftime(buf, MAX_L10N_DATA, "%b", timeinfo);
662 ptr = MemoryContextStrdup(TopMemoryContext, buf);
663 if (localized_abbrev_months[i])
664 pfree(localized_abbrev_months[i]);
665 localized_abbrev_months[i] = ptr;
667 strftime(buf, MAX_L10N_DATA, "%B", timeinfo);
668 ptr = MemoryContextStrdup(TopMemoryContext, buf);
669 if (localized_full_months[i])
670 pfree(localized_full_months[i]);
671 localized_full_months[i] = ptr;
674 /* try to restore internal settings */
677 setlocale(LC_TIME, save_lc_time);
682 /* try to restore internal ctype settings */
685 setlocale(LC_CTYPE, save_lc_ctype);
686 pfree(save_lc_ctype);
690 CurrentLCTimeValid = true;
694 #if defined(WIN32) && defined(LC_MESSAGES)
696 * Convert Windows locale name to the ISO formatted one
699 * This function returns NULL if conversion is impossible,
700 * otherwise returns the pointer to a static area which
701 * contains the iso formatted locale name.
705 IsoLocaleName(const char *winlocname)
707 #if (_MSC_VER >= 1400) /* VC8.0 or later */
708 static char iso_lc_messages[32];
709 _locale_t loct = NULL;
711 if (pg_strcasecmp("c", winlocname) == 0 ||
712 pg_strcasecmp("posix", winlocname) == 0)
714 strcpy(iso_lc_messages, "C");
715 return iso_lc_messages;
718 loct = _create_locale(LC_CTYPE, winlocname);
725 lcid = loct->locinfo->lc_handle[LC_CTYPE];
727 lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
730 if (!GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, isolang, sizeof(isolang)))
732 if (!GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, isocrty, sizeof(isocrty)))
734 snprintf(iso_lc_messages, sizeof(iso_lc_messages) - 1, "%s_%s", isolang, isocrty);
735 return iso_lc_messages;
739 return NULL; /* Not supported on this version of msvc/mingw */
740 #endif /* _MSC_VER >= 1400 */
742 #endif /* WIN32 && LC_MESSAGES */
746 * Cache mechanism for collation information.
748 * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
749 * (or POSIX), so we can optimize a few code paths in various places.
750 * For the built-in C and POSIX collations, we can know that without even
751 * doing a cache lookup, but we want to support aliases for C/POSIX too.
752 * For the "default" collation, there are separate static cache variables,
753 * since consulting the pg_collation catalog doesn't tell us what we need.
755 * Also, if a pg_locale_t has been requested for a collation, we cache that
756 * for the life of a backend.
758 * Note that some code relies on the flags not reporting false negatives
759 * (that is, saying it's not C when it is). For example, char2wchar()
760 * could fail if the locale is C, so str_tolower() shouldn't call it
763 * Note that we currently lack any way to flush the cache. Since we don't
764 * support ALTER COLLATION, this is OK. The worst case is that someone
765 * drops a collation, and a useless cache entry hangs around in existing
769 static collation_cache_entry *
770 lookup_collation_cache(Oid collation, bool set_flags)
772 collation_cache_entry *cache_entry;
775 Assert(OidIsValid(collation));
776 Assert(collation != DEFAULT_COLLATION_OID);
778 if (collation_cache == NULL)
780 /* First time through, initialize the hash table */
783 memset(&ctl, 0, sizeof(ctl));
784 ctl.keysize = sizeof(Oid);
785 ctl.entrysize = sizeof(collation_cache_entry);
787 collation_cache = hash_create("Collation cache", 100, &ctl,
788 HASH_ELEM | HASH_FUNCTION);
791 cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
795 * Make sure cache entry is marked invalid, in case we fail before
798 cache_entry->flags_valid = false;
799 cache_entry->locale = 0;
802 if (set_flags && !cache_entry->flags_valid)
804 /* Attempt to set the flags */
806 Form_pg_collation collform;
807 const char *collcollate;
808 const char *collctype;
810 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
811 if (!HeapTupleIsValid(tp))
812 elog(ERROR, "cache lookup failed for collation %u", collation);
813 collform = (Form_pg_collation) GETSTRUCT(tp);
815 collcollate = NameStr(collform->collcollate);
816 collctype = NameStr(collform->collctype);
818 cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
819 (strcmp(collcollate, "POSIX") == 0));
820 cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
821 (strcmp(collctype, "POSIX") == 0));
823 cache_entry->flags_valid = true;
833 * Detect whether collation's LC_COLLATE property is C
836 lc_collate_is_c(Oid collation)
839 * If we're asked about "collation 0", return false, so that the code will
840 * go into the non-C path and report that the collation is bogus.
842 if (!OidIsValid(collation))
846 * If we're asked about the default collation, we have to inquire of the C
847 * library. Cache the result so we only have to compute it once.
849 if (collation == DEFAULT_COLLATION_OID)
851 static int result = -1;
855 return (bool) result;
856 localeptr = setlocale(LC_COLLATE, NULL);
858 elog(ERROR, "invalid LC_COLLATE setting");
860 if (strcmp(localeptr, "C") == 0)
862 else if (strcmp(localeptr, "POSIX") == 0)
866 return (bool) result;
870 * If we're asked about the built-in C/POSIX collations, we know that.
872 if (collation == C_COLLATION_OID ||
873 collation == POSIX_COLLATION_OID)
877 * Otherwise, we have to consult pg_collation, but we cache that.
879 return (lookup_collation_cache(collation, true))->collate_is_c;
883 * Detect whether collation's LC_CTYPE property is C
886 lc_ctype_is_c(Oid collation)
889 * If we're asked about "collation 0", return false, so that the code will
890 * go into the non-C path and report that the collation is bogus.
892 if (!OidIsValid(collation))
896 * If we're asked about the default collation, we have to inquire of the C
897 * library. Cache the result so we only have to compute it once.
899 if (collation == DEFAULT_COLLATION_OID)
901 static int result = -1;
905 return (bool) result;
906 localeptr = setlocale(LC_CTYPE, NULL);
908 elog(ERROR, "invalid LC_CTYPE setting");
910 if (strcmp(localeptr, "C") == 0)
912 else if (strcmp(localeptr, "POSIX") == 0)
916 return (bool) result;
920 * If we're asked about the built-in C/POSIX collations, we know that.
922 if (collation == C_COLLATION_OID ||
923 collation == POSIX_COLLATION_OID)
927 * Otherwise, we have to consult pg_collation, but we cache that.
929 return (lookup_collation_cache(collation, true))->ctype_is_c;
934 * Create a locale_t from a collation OID. Results are cached for the
935 * lifetime of the backend. Thus, do not free the result with freelocale().
937 * As a special optimization, the default/database collation returns 0.
938 * Callers should then revert to the non-locale_t-enabled code path.
939 * In fact, they shouldn't call this function at all when they are dealing
940 * with the default locale. That can save quite a bit in hotspots.
941 * Also, callers should avoid calling this before going down a C/POSIX
942 * fastpath, because such a fastpath should work even on platforms without
943 * locale_t support in the C library.
945 * For simplicity, we always generate COLLATE + CTYPE even though we
946 * might only need one of them. Since this is called only once per session,
947 * it shouldn't cost much.
950 pg_newlocale_from_collation(Oid collid)
952 collation_cache_entry *cache_entry;
954 /* Callers must pass a valid OID */
955 Assert(OidIsValid(collid));
957 /* Return 0 for "default" collation, just in case caller forgets */
958 if (collid == DEFAULT_COLLATION_OID)
959 return (pg_locale_t) 0;
961 cache_entry = lookup_collation_cache(collid, false);
963 if (cache_entry->locale == 0)
965 /* We haven't computed this yet in this session, so do it */
968 Form_pg_collation collform;
969 const char *collcollate;
970 const char *collctype;
973 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
974 if (!HeapTupleIsValid(tp))
975 elog(ERROR, "cache lookup failed for collation %u", collid);
976 collform = (Form_pg_collation) GETSTRUCT(tp);
978 collcollate = NameStr(collform->collcollate);
979 collctype = NameStr(collform->collctype);
981 if (strcmp(collcollate, collctype) == 0)
983 /* Normal case where they're the same */
985 result = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
988 result = _create_locale(LC_ALL, collcollate);
992 (errcode_for_file_access(),
993 errmsg("could not create locale \"%s\": %m",
999 /* We need two newlocale() steps */
1002 loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1005 (errcode_for_file_access(),
1006 errmsg("could not create locale \"%s\": %m",
1008 result = newlocale(LC_CTYPE_MASK, collctype, loc1);
1011 (errcode_for_file_access(),
1012 errmsg("could not create locale \"%s\": %m",
1017 * XXX The _create_locale() API doesn't appear to support this.
1018 * Could perhaps be worked around by changing pg_locale_t to
1019 * contain two separate fields.
1022 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1023 errmsg("collations with different collate and ctype values are not supported on this platform")));
1027 cache_entry->locale = result;
1029 ReleaseSysCache(tp);
1030 #else /* not HAVE_LOCALE_T */
1033 * For platforms that don't support locale_t, we can't do anything
1034 * with non-default collations.
1037 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1038 errmsg("nondefault collations are not supported on this platform")));
1039 #endif /* not HAVE_LOCALE_T */
1042 return cache_entry->locale;
1047 * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
1048 * Therefore we keep them here rather than with the mbutils code.
1051 #ifdef USE_WIDE_UPPER_LOWER
1054 * wchar2char --- convert wide characters to multibyte format
1056 * This has the same API as the standard wcstombs_l() function; in particular,
1057 * tolen is the maximum number of bytes to store at *to, and *from must be
1058 * zero-terminated. The output will be zero-terminated iff there is room.
1061 wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
1071 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1072 * for some reason mbstowcs and wcstombs won't do this for us, so we use
1073 * MultiByteToWideChar().
1075 if (GetDatabaseEncoding() == PG_UTF8)
1077 result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
1079 /* A zero return is failure */
1084 Assert(result <= tolen);
1085 /* Microsoft counts the zero terminator in the result */
1091 if (locale == (pg_locale_t) 0)
1093 /* Use wcstombs directly for the default locale */
1094 result = wcstombs(to, from, tolen);
1098 #ifdef HAVE_LOCALE_T
1099 #ifdef HAVE_WCSTOMBS_L
1100 /* Use wcstombs_l for nondefault locales */
1101 result = wcstombs_l(to, from, tolen, locale);
1102 #else /* !HAVE_WCSTOMBS_L */
1103 /* We have to temporarily set the locale as current ... ugh */
1104 locale_t save_locale = uselocale(locale);
1106 result = wcstombs(to, from, tolen);
1108 uselocale(save_locale);
1109 #endif /* HAVE_WCSTOMBS_L */
1110 #else /* !HAVE_LOCALE_T */
1111 /* Can't have locale != 0 without HAVE_LOCALE_T */
1112 elog(ERROR, "wcstombs_l is not available");
1113 result = 0; /* keep compiler quiet */
1114 #endif /* HAVE_LOCALE_T */
1121 * char2wchar --- convert multibyte characters to wide characters
1123 * This has almost the API of mbstowcs_l(), except that *from need not be
1124 * null-terminated; instead, the number of input bytes is specified as
1125 * fromlen. Also, we ereport() rather than returning -1 for invalid
1126 * input encoding. tolen is the maximum number of wchar_t's to store at *to.
1127 * The output will be zero-terminated iff there is room.
1130 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
1139 /* See WIN32 "Unicode" comment above */
1140 if (GetDatabaseEncoding() == PG_UTF8)
1142 /* Win32 API does not work for zero-length input */
1147 result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
1148 /* A zero return is failure */
1155 Assert(result < tolen);
1156 /* Append trailing null wchar (MultiByteToWideChar() does not) */
1163 /* mbstowcs requires ending '\0' */
1164 char *str = pnstrdup(from, fromlen);
1166 if (locale == (pg_locale_t) 0)
1168 /* Use mbstowcs directly for the default locale */
1169 result = mbstowcs(to, str, tolen);
1173 #ifdef HAVE_LOCALE_T
1174 #ifdef HAVE_WCSTOMBS_L
1175 /* Use mbstowcs_l for nondefault locales */
1176 result = mbstowcs_l(to, str, tolen, locale);
1177 #else /* !HAVE_WCSTOMBS_L */
1178 /* We have to temporarily set the locale as current ... ugh */
1179 locale_t save_locale = uselocale(locale);
1181 result = mbstowcs(to, str, tolen);
1183 uselocale(save_locale);
1184 #endif /* HAVE_WCSTOMBS_L */
1185 #else /* !HAVE_LOCALE_T */
1186 /* Can't have locale != 0 without HAVE_LOCALE_T */
1187 elog(ERROR, "mbstowcs_l is not available");
1188 result = 0; /* keep compiler quiet */
1189 #endif /* HAVE_LOCALE_T */
1198 * Invalid multibyte character encountered. We try to give a useful
1199 * error message by letting pg_verifymbstr check the string. But it's
1200 * possible that the string is OK to us, and not OK to mbstowcs ---
1201 * this suggests that the LC_CTYPE locale is different from the
1202 * database encoding. Give a generic error message if verifymbstr
1203 * can't find anything wrong.
1205 pg_verifymbstr(from, fromlen, false); /* might not return */
1206 /* but if it does ... */
1208 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1209 errmsg("invalid multibyte character for locale"),
1210 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1216 #endif /* USE_WIDE_UPPER_LOWER */