OSDN Git Service

Tighten the check in initdb and CREATE DATABASE that the chosen encoding
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 23 Sep 2008 10:58:03 +0000 (10:58 +0000)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 23 Sep 2008 10:58:03 +0000 (10:58 +0000)
matches the encoding of the locale. LC_COLLATE is now checked in addition
to LC_CTYPE.

doc/src/sgml/charset.sgml
src/backend/commands/dbcommands.c
src/bin/initdb/initdb.c

index c012294..0aeef78 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.88 2008/09/23 09:20:34 heikki Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.89 2008/09/23 10:58:03 heikki Exp $ -->
 
 <chapter id="charset">
  <title>Localization</>
@@ -320,10 +320,10 @@ initdb --locale=sv_SE
 
   <para>
    An important restriction, however, is that each database's character set
-   must be compatible with the database's <envar>LC_CTYPE</> setting.
-   When <envar>LC_CTYPE</> is <literal>C</> or <literal>POSIX</>, any
-   character set is allowed, but for other settings of <envar>LC_CTYPE</>
-   there is only one character set that will work correctly.
+   must be compatible with the database's <envar>LC_CTYPE</> and
+   <envvar>LC_COLLATE</> locale settings. For <literal>C</> or
+   <literal>POSIX</> locale, any character set is allowed, but for other
+   locales there is only one character set that will work correctly.
   </para>
 
    <sect2 id="multibyte-charset-supported">
index ce3754f..29dc073 100644 (file)
@@ -13,7 +13,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.211 2008/09/23 09:20:35 heikki Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.212 2008/09/23 10:58:03 heikki Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -118,6 +118,7 @@ createdb(const CreatedbStmt *stmt)
        int                     encoding = -1;
        int                     dbconnlimit = -1;
        int                     ctype_encoding;
+       int                     collate_encoding;
        int                     notherbackends;
        int                     npreparedxacts;
        createdb_failure_params fparms;
@@ -334,6 +335,7 @@ createdb(const CreatedbStmt *stmt)
         * Note: if you change this policy, fix initdb to match.
         */
        ctype_encoding = pg_get_encoding_from_locale(dbctype);
+       collate_encoding = pg_get_encoding_from_locale(dbcollate);
 
        if (!(ctype_encoding == encoding ||
                  ctype_encoding == PG_SQL_ASCII ||
@@ -345,9 +347,22 @@ createdb(const CreatedbStmt *stmt)
                                (errmsg("encoding %s does not match locale %s",
                                                pg_encoding_to_char(encoding),
                                                dbctype),
-                        errdetail("The chosen LC_CTYPE setting requires encoding %s.",
+                        errdetail("The chosen CTYPE setting requires encoding %s.",
                                           pg_encoding_to_char(ctype_encoding))));
 
+       if (!(collate_encoding == encoding ||
+                 collate_encoding == PG_SQL_ASCII ||
+#ifdef WIN32
+                 encoding == PG_UTF8 ||
+#endif
+                 (encoding == PG_SQL_ASCII && superuser())))
+               ereport(ERROR,
+                               (errmsg("encoding %s does not match locale %s",
+                                               pg_encoding_to_char(encoding),
+                                               dbcollate),
+                        errdetail("The chosen COLLATE setting requires encoding %s.",
+                                          pg_encoding_to_char(collate_encoding))));
+
        /*
         * Check that the new locale is compatible with the source database.
         *
index a4bd34c..4caee3e 100644 (file)
@@ -42,7 +42,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  * Portions taken from FreeBSD.
  *
- * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.160 2008/09/23 09:20:37 heikki Exp $
+ * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.161 2008/09/23 10:58:03 heikki Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -188,7 +188,8 @@ static void trapsig(int signum);
 static void check_ok(void);
 static char *escape_quotes(const char *src);
 static int     locale_date_order(const char *locale);
-static bool chklocale(const char *locale);
+static bool check_locale_name(const char *locale);
+static bool check_locale_encoding(const char *locale, int encoding);
 static void setlocales(void);
 static void usage(const char *progname);
 
@@ -2187,7 +2188,7 @@ locale_date_order(const char *locale)
  * this should match the backend check_locale() function
  */
 static bool
-chklocale(const char *locale)
+check_locale_name(const char *locale)
 {
        bool            ret;
        int                     category = LC_CTYPE;
@@ -2212,6 +2213,50 @@ chklocale(const char *locale)
 }
 
 /*
+ * check if the chosen encoding matches the encoding required by the locale
+ *
+ * this should match the similar check in the backend createdb() function
+ */
+static bool
+check_locale_encoding(const char *locale, int user_enc)
+{
+       int                     locale_enc;
+
+       locale_enc = pg_get_encoding_from_locale(locale);
+
+       /* We allow selection of SQL_ASCII --- see notes in createdb() */
+       if (!(locale_enc == user_enc ||
+                 locale_enc == PG_SQL_ASCII ||
+                 user_enc == PG_SQL_ASCII
+#ifdef WIN32
+
+       /*
+        * On win32, if the encoding chosen is UTF8, all locales are OK
+        * (assuming the actual locale name passed the checks above). This is
+        * because UTF8 is a pseudo-codepage, that we convert to UTF16 before
+        * doing any operations on, and UTF16 supports all locales.
+        */
+                 || user_enc == PG_UTF8
+#endif
+                 ))
+       {
+               fprintf(stderr, _("%s: encoding mismatch\n"), progname);
+               fprintf(stderr,
+                          _("The encoding you selected (%s) and the encoding that the\n"
+                         "selected locale uses (%s) do not match.  This would lead to\n"
+                       "misbehavior in various character string processing functions.\n"
+                          "Rerun %s and either do not specify an encoding explicitly,\n"
+                                "or choose a matching combination.\n"),
+                               pg_encoding_to_char(user_enc),
+                               pg_encoding_to_char(locale_enc),
+                               progname);
+               return false;
+       }
+       return true;
+}
+
+
+/*
  * set up the locale variables
  *
  * assumes we have called setlocale(LC_ALL,"")
@@ -2241,17 +2286,17 @@ setlocales(void)
         * override absent/invalid config settings from initdb's locale settings
         */
 
-       if (strlen(lc_ctype) == 0 || !chklocale(lc_ctype))
+       if (strlen(lc_ctype) == 0 || !check_locale_name(lc_ctype))
                lc_ctype = xstrdup(setlocale(LC_CTYPE, NULL));
-       if (strlen(lc_collate) == 0 || !chklocale(lc_collate))
+       if (strlen(lc_collate) == 0 || !check_locale_name(lc_collate))
                lc_collate = xstrdup(setlocale(LC_COLLATE, NULL));
-       if (strlen(lc_numeric) == 0 || !chklocale(lc_numeric))
+       if (strlen(lc_numeric) == 0 || !check_locale_name(lc_numeric))
                lc_numeric = xstrdup(setlocale(LC_NUMERIC, NULL));
-       if (strlen(lc_time) == 0 || !chklocale(lc_time))
+       if (strlen(lc_time) == 0 || !check_locale_name(lc_time))
                lc_time = xstrdup(setlocale(LC_TIME, NULL));
-       if (strlen(lc_monetary) == 0 || !chklocale(lc_monetary))
+       if (strlen(lc_monetary) == 0 || !check_locale_name(lc_monetary))
                lc_monetary = xstrdup(setlocale(LC_MONETARY, NULL));
-       if (strlen(lc_messages) == 0 || !chklocale(lc_messages))
+       if (strlen(lc_messages) == 0 || !check_locale_name(lc_messages))
 #if defined(LC_MESSAGES) && !defined(WIN32)
        {
                /* when available get the current locale setting */
@@ -2452,6 +2497,7 @@ main(int argc, char *argv[])
                                                                 * environment */
        char            bin_dir[MAXPGPATH];
        char       *pg_data_native;
+       int                     user_enc;
 
 #ifdef WIN32
        char       *restrict_env;
@@ -2868,44 +2914,12 @@ main(int argc, char *argv[])
                }
        }
        else
-       {
-               int                     user_enc;
-               int                     ctype_enc;
-
                encodingid = get_encoding_id(encoding);
-               user_enc = atoi(encodingid);
-
-               ctype_enc = pg_get_encoding_from_locale(lc_ctype);
 
-               /* We allow selection of SQL_ASCII --- see notes in createdb() */
-               if (!(ctype_enc == user_enc ||
-                         ctype_enc == PG_SQL_ASCII ||
-                         user_enc == PG_SQL_ASCII
-#ifdef WIN32
-
-               /*
-                * On win32, if the encoding chosen is UTF8, all locales are OK
-                * (assuming the actual locale name passed the checks above). This is
-                * because UTF8 is a pseudo-codepage, that we convert to UTF16 before
-                * doing any operations on, and UTF16 supports all locales.
-                */
-                         || user_enc == PG_UTF8
-#endif
-                         ))
-               {
-                       fprintf(stderr, _("%s: encoding mismatch\n"), progname);
-                       fprintf(stderr,
-                          _("The encoding you selected (%s) and the encoding that the\n"
-                         "selected locale uses (%s) do not match.  This would lead to\n"
-                       "misbehavior in various character string processing functions.\n"
-                          "Rerun %s and either do not specify an encoding explicitly,\n"
-                                "or choose a matching combination.\n"),
-                                       pg_encoding_to_char(user_enc),
-                                       pg_encoding_to_char(ctype_enc),
-                                       progname);
-                       exit(1);
-               }
-       }
+       user_enc = atoi(encodingid);
+       if (!check_locale_encoding(lc_ctype, user_enc) ||
+               !check_locale_encoding(lc_collate, user_enc))
+               exit(1); /* check_locale_encoding printed the error */
 
        if (strlen(default_text_search_config) == 0)
        {