OSDN Git Service

Support for KOI8U encoding
authorPeter Eisentraut <peter_e@gmx.net>
Tue, 10 Feb 2009 19:29:39 +0000 (19:29 +0000)
committerPeter Eisentraut <peter_e@gmx.net>
Tue, 10 Feb 2009 19:29:39 +0000 (19:29 +0000)
doc/src/sgml/charset.sgml
src/backend/utils/mb/Unicode/UCS_to_most.pl
src/backend/utils/mb/Unicode/koi8u_to_utf8.map [new file with mode: 0644]
src/backend/utils/mb/Unicode/utf8_to_koi8u.map [new file with mode: 0644]
src/backend/utils/mb/conversion_procs/Makefile
src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
src/backend/utils/mb/encnames.c
src/backend/utils/mb/wchar.c
src/include/mb/pg_wchar.h
src/port/chklocale.c

index a77d389..d9194b2 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.90 2008/09/24 16:30:26 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.91 2009/02/10 19:29:39 petere Exp $ -->
 
 <chapter id="charset">
  <title>Localization</>
@@ -457,12 +457,20 @@ initdb --locale=sv_SE
          <entry></entry>
         </row>
         <row>
-         <entry><literal>KOI8</literal></entry>
-         <entry><acronym>KOI</acronym>8-R(U)</entry>
-         <entry>Cyrillic</entry>
+         <entry><literal>KOI8R</literal></entry>
+         <entry><acronym>KOI</acronym>8-R</entry>
+         <entry>Cyrillic (Russian)</entry>
          <entry>Yes</entry>
          <entry>1</entry>
-         <entry><literal>KOI8R</></entry>
+         <entry><literal>KOI8</></entry>
+        </row>
+        <row>
+         <entry><literal>KOI8U</literal></entry>
+         <entry><acronym>KOI</acronym>8-U</entry>
+         <entry>Cyrillic (Ukrainian)</entry>
+         <entry>Yes</entry>
+         <entry>1</entry>
+         <entry></entry>
         </row>
         <row>
          <entry><literal>LATIN1</literal></entry>
index 8ffde56..0e245d1 100644 (file)
@@ -2,7 +2,7 @@
 #
 # Copyright (c) 2001-2009, PostgreSQL Global Development Group
 #
-# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_most.pl,v 1.6 2009/02/10 16:36:55 petere Exp $
+# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_most.pl,v 1.7 2009/02/10 19:29:39 petere Exp $
 #
 # Generate UTF-8 <--> character code conversion tables from
 # map files provided by Unicode organization.
@@ -43,6 +43,7 @@ require "ucs2utf.pl";
        'ISO8859_15' => '8859-15.TXT',
        'ISO8859_16' => '8859-16.TXT',
        'KOI8R' => 'KOI8-R.TXT',
+       'KOI8U' => 'KOI8-U.TXT',
        'GBK' => 'CP936.TXT',
        'UHC' => 'CP949.TXT',
        'JOHAB' => 'JOHAB.TXT',
diff --git a/src/backend/utils/mb/Unicode/koi8u_to_utf8.map b/src/backend/utils/mb/Unicode/koi8u_to_utf8.map
new file mode 100644 (file)
index 0000000..659f486
--- /dev/null
@@ -0,0 +1,130 @@
+static pg_local_to_utf LUmapKOI8U[ 128 ] = {
+  {0x0080, 0xe29480},
+  {0x0081, 0xe29482},
+  {0x0082, 0xe2948c},
+  {0x0083, 0xe29490},
+  {0x0084, 0xe29494},
+  {0x0085, 0xe29498},
+  {0x0086, 0xe2949c},
+  {0x0087, 0xe294a4},
+  {0x0088, 0xe294ac},
+  {0x0089, 0xe294b4},
+  {0x008a, 0xe294bc},
+  {0x008b, 0xe29680},
+  {0x008c, 0xe29684},
+  {0x008d, 0xe29688},
+  {0x008e, 0xe2968c},
+  {0x008f, 0xe29690},
+  {0x0090, 0xe29691},
+  {0x0091, 0xe29692},
+  {0x0092, 0xe29693},
+  {0x0093, 0xe28ca0},
+  {0x0094, 0xe296a0},
+  {0x0095, 0xe28899},
+  {0x0096, 0xe2889a},
+  {0x0097, 0xe28988},
+  {0x0098, 0xe289a4},
+  {0x0099, 0xe289a5},
+  {0x009a, 0xc2a0},
+  {0x009b, 0xe28ca1},
+  {0x009c, 0xc2b0},
+  {0x009d, 0xc2b2},
+  {0x009e, 0xc2b7},
+  {0x009f, 0xc3b7},
+  {0x00a0, 0xe29590},
+  {0x00a1, 0xe29591},
+  {0x00a2, 0xe29592},
+  {0x00a3, 0xd191},
+  {0x00a4, 0xd194},
+  {0x00a5, 0xe29594},
+  {0x00a6, 0xd196},
+  {0x00a7, 0xd197},
+  {0x00a8, 0xe29597},
+  {0x00a9, 0xe29598},
+  {0x00aa, 0xe29599},
+  {0x00ab, 0xe2959a},
+  {0x00ac, 0xe2959b},
+  {0x00ad, 0xd291},
+  {0x00ae, 0xe2959d},
+  {0x00af, 0xe2959e},
+  {0x00b0, 0xe2959f},
+  {0x00b1, 0xe295a0},
+  {0x00b2, 0xe295a1},
+  {0x00b3, 0xd081},
+  {0x00b4, 0xd084},
+  {0x00b5, 0xe295a3},
+  {0x00b6, 0xd086},
+  {0x00b7, 0xd087},
+  {0x00b8, 0xe295a6},
+  {0x00b9, 0xe295a7},
+  {0x00ba, 0xe295a8},
+  {0x00bb, 0xe295a9},
+  {0x00bc, 0xe295aa},
+  {0x00bd, 0xd290},
+  {0x00be, 0xe295ac},
+  {0x00bf, 0xc2a9},
+  {0x00c0, 0xd18e},
+  {0x00c1, 0xd0b0},
+  {0x00c2, 0xd0b1},
+  {0x00c3, 0xd186},
+  {0x00c4, 0xd0b4},
+  {0x00c5, 0xd0b5},
+  {0x00c6, 0xd184},
+  {0x00c7, 0xd0b3},
+  {0x00c8, 0xd185},
+  {0x00c9, 0xd0b8},
+  {0x00ca, 0xd0b9},
+  {0x00cb, 0xd0ba},
+  {0x00cc, 0xd0bb},
+  {0x00cd, 0xd0bc},
+  {0x00ce, 0xd0bd},
+  {0x00cf, 0xd0be},
+  {0x00d0, 0xd0bf},
+  {0x00d1, 0xd18f},
+  {0x00d2, 0xd180},
+  {0x00d3, 0xd181},
+  {0x00d4, 0xd182},
+  {0x00d5, 0xd183},
+  {0x00d6, 0xd0b6},
+  {0x00d7, 0xd0b2},
+  {0x00d8, 0xd18c},
+  {0x00d9, 0xd18b},
+  {0x00da, 0xd0b7},
+  {0x00db, 0xd188},
+  {0x00dc, 0xd18d},
+  {0x00dd, 0xd189},
+  {0x00de, 0xd187},
+  {0x00df, 0xd18a},
+  {0x00e0, 0xd0ae},
+  {0x00e1, 0xd090},
+  {0x00e2, 0xd091},
+  {0x00e3, 0xd0a6},
+  {0x00e4, 0xd094},
+  {0x00e5, 0xd095},
+  {0x00e6, 0xd0a4},
+  {0x00e7, 0xd093},
+  {0x00e8, 0xd0a5},
+  {0x00e9, 0xd098},
+  {0x00ea, 0xd099},
+  {0x00eb, 0xd09a},
+  {0x00ec, 0xd09b},
+  {0x00ed, 0xd09c},
+  {0x00ee, 0xd09d},
+  {0x00ef, 0xd09e},
+  {0x00f0, 0xd09f},
+  {0x00f1, 0xd0af},
+  {0x00f2, 0xd0a0},
+  {0x00f3, 0xd0a1},
+  {0x00f4, 0xd0a2},
+  {0x00f5, 0xd0a3},
+  {0x00f6, 0xd096},
+  {0x00f7, 0xd092},
+  {0x00f8, 0xd0ac},
+  {0x00f9, 0xd0ab},
+  {0x00fa, 0xd097},
+  {0x00fb, 0xd0a8},
+  {0x00fc, 0xd0ad},
+  {0x00fd, 0xd0a9},
+  {0x00fe, 0xd0a7},
+  {0x00ff, 0xd0aa}
+};
diff --git a/src/backend/utils/mb/Unicode/utf8_to_koi8u.map b/src/backend/utils/mb/Unicode/utf8_to_koi8u.map
new file mode 100644 (file)
index 0000000..7f262a4
--- /dev/null
@@ -0,0 +1,130 @@
+static pg_utf_to_local ULmapKOI8U[ 128 ] = {
+  {0xc2a0, 0x009a},
+  {0xc2a9, 0x00bf},
+  {0xc2b0, 0x009c},
+  {0xc2b2, 0x009d},
+  {0xc2b7, 0x009e},
+  {0xc3b7, 0x009f},
+  {0xd081, 0x00b3},
+  {0xd084, 0x00b4},
+  {0xd086, 0x00b6},
+  {0xd087, 0x00b7},
+  {0xd090, 0x00e1},
+  {0xd091, 0x00e2},
+  {0xd092, 0x00f7},
+  {0xd093, 0x00e7},
+  {0xd094, 0x00e4},
+  {0xd095, 0x00e5},
+  {0xd096, 0x00f6},
+  {0xd097, 0x00fa},
+  {0xd098, 0x00e9},
+  {0xd099, 0x00ea},
+  {0xd09a, 0x00eb},
+  {0xd09b, 0x00ec},
+  {0xd09c, 0x00ed},
+  {0xd09d, 0x00ee},
+  {0xd09e, 0x00ef},
+  {0xd09f, 0x00f0},
+  {0xd0a0, 0x00f2},
+  {0xd0a1, 0x00f3},
+  {0xd0a2, 0x00f4},
+  {0xd0a3, 0x00f5},
+  {0xd0a4, 0x00e6},
+  {0xd0a5, 0x00e8},
+  {0xd0a6, 0x00e3},
+  {0xd0a7, 0x00fe},
+  {0xd0a8, 0x00fb},
+  {0xd0a9, 0x00fd},
+  {0xd0aa, 0x00ff},
+  {0xd0ab, 0x00f9},
+  {0xd0ac, 0x00f8},
+  {0xd0ad, 0x00fc},
+  {0xd0ae, 0x00e0},
+  {0xd0af, 0x00f1},
+  {0xd0b0, 0x00c1},
+  {0xd0b1, 0x00c2},
+  {0xd0b2, 0x00d7},
+  {0xd0b3, 0x00c7},
+  {0xd0b4, 0x00c4},
+  {0xd0b5, 0x00c5},
+  {0xd0b6, 0x00d6},
+  {0xd0b7, 0x00da},
+  {0xd0b8, 0x00c9},
+  {0xd0b9, 0x00ca},
+  {0xd0ba, 0x00cb},
+  {0xd0bb, 0x00cc},
+  {0xd0bc, 0x00cd},
+  {0xd0bd, 0x00ce},
+  {0xd0be, 0x00cf},
+  {0xd0bf, 0x00d0},
+  {0xd180, 0x00d2},
+  {0xd181, 0x00d3},
+  {0xd182, 0x00d4},
+  {0xd183, 0x00d5},
+  {0xd184, 0x00c6},
+  {0xd185, 0x00c8},
+  {0xd186, 0x00c3},
+  {0xd187, 0x00de},
+  {0xd188, 0x00db},
+  {0xd189, 0x00dd},
+  {0xd18a, 0x00df},
+  {0xd18b, 0x00d9},
+  {0xd18c, 0x00d8},
+  {0xd18d, 0x00dc},
+  {0xd18e, 0x00c0},
+  {0xd18f, 0x00d1},
+  {0xd191, 0x00a3},
+  {0xd194, 0x00a4},
+  {0xd196, 0x00a6},
+  {0xd197, 0x00a7},
+  {0xd290, 0x00bd},
+  {0xd291, 0x00ad},
+  {0xe28899, 0x0095},
+  {0xe2889a, 0x0096},
+  {0xe28988, 0x0097},
+  {0xe289a4, 0x0098},
+  {0xe289a5, 0x0099},
+  {0xe28ca0, 0x0093},
+  {0xe28ca1, 0x009b},
+  {0xe29480, 0x0080},
+  {0xe29482, 0x0081},
+  {0xe2948c, 0x0082},
+  {0xe29490, 0x0083},
+  {0xe29494, 0x0084},
+  {0xe29498, 0x0085},
+  {0xe2949c, 0x0086},
+  {0xe294a4, 0x0087},
+  {0xe294ac, 0x0088},
+  {0xe294b4, 0x0089},
+  {0xe294bc, 0x008a},
+  {0xe29590, 0x00a0},
+  {0xe29591, 0x00a1},
+  {0xe29592, 0x00a2},
+  {0xe29594, 0x00a5},
+  {0xe29597, 0x00a8},
+  {0xe29598, 0x00a9},
+  {0xe29599, 0x00aa},
+  {0xe2959a, 0x00ab},
+  {0xe2959b, 0x00ac},
+  {0xe2959d, 0x00ae},
+  {0xe2959e, 0x00af},
+  {0xe2959f, 0x00b0},
+  {0xe295a0, 0x00b1},
+  {0xe295a1, 0x00b2},
+  {0xe295a3, 0x00b5},
+  {0xe295a6, 0x00b8},
+  {0xe295a7, 0x00b9},
+  {0xe295a8, 0x00ba},
+  {0xe295a9, 0x00bb},
+  {0xe295aa, 0x00bc},
+  {0xe295ac, 0x00be},
+  {0xe29680, 0x008b},
+  {0xe29684, 0x008c},
+  {0xe29688, 0x008d},
+  {0xe2968c, 0x008e},
+  {0xe29690, 0x008f},
+  {0xe29691, 0x0090},
+  {0xe29692, 0x0091},
+  {0xe29693, 0x0092},
+  {0xe296a0, 0x0094}
+};
index 051e885..2d0b375 100644 (file)
@@ -4,7 +4,7 @@
 #    Makefile for utils/mb/conversion_procs
 #
 # IDENTIFICATION
-#    $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.20 2008/08/23 20:31:37 momjian Exp $
+#    $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.21 2009/02/10 19:29:39 petere Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -84,6 +84,8 @@ CONVERSIONS = \
                utf8_to_big5 UTF8 BIG5 utf8_to_big5 utf8_and_big5 \
                utf8_to_koi8_r  UTF8 KOI8R utf8_to_koi8r utf8_and_cyrillic \
                koi8_r_to_utf8  KOI8R UTF8 koi8r_to_utf8 utf8_and_cyrillic \
+               utf8_to_koi8_u  UTF8 KOI8U utf8_to_koi8u utf8_and_cyrillic \
+               koi8_u_to_utf8  KOI8U UTF8 koi8u_to_utf8 utf8_and_cyrillic \
                utf8_to_windows_866 UTF8 WIN866 utf8_to_win utf8_and_win \
                windows_866_to_utf8 WIN866 UTF8 win_to_utf8 utf8_and_win \
                utf8_to_windows_874 UTF8 WIN874 utf8_to_win utf8_and_win \
index 0f22256..0231df6 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.23 2009/01/29 19:23:40 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.24 2009/02/10 19:29:39 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "mb/pg_wchar.h"
 #include "../../Unicode/utf8_to_koi8r.map"
 #include "../../Unicode/koi8r_to_utf8.map"
+#include "../../Unicode/utf8_to_koi8u.map"
+#include "../../Unicode/koi8u_to_utf8.map"
 
 PG_MODULE_MAGIC;
 
 PG_FUNCTION_INFO_V1(utf8_to_koi8r);
 PG_FUNCTION_INFO_V1(koi8r_to_utf8);
 
+PG_FUNCTION_INFO_V1(utf8_to_koi8u);
+PG_FUNCTION_INFO_V1(koi8u_to_utf8);
+
 extern Datum utf8_to_koi8r(PG_FUNCTION_ARGS);
 extern Datum koi8r_to_utf8(PG_FUNCTION_ARGS);
 
+extern Datum utf8_to_koi8u(PG_FUNCTION_ARGS);
+extern Datum koi8u_to_utf8(PG_FUNCTION_ARGS);
+
 /* ----------
  * conv_proc(
  *             INTEGER,        -- source encoding id
@@ -65,3 +73,33 @@ koi8r_to_utf8(PG_FUNCTION_ARGS)
 
        PG_RETURN_VOID();
 }
+
+Datum
+utf8_to_koi8u(PG_FUNCTION_ARGS)
+{
+       unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
+       unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
+       int                     len = PG_GETARG_INT32(4);
+
+       CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U);
+
+       UtfToLocal(src, dest, ULmapKOI8U, NULL,
+                        sizeof(ULmapKOI8U) / sizeof(pg_utf_to_local), 0, PG_KOI8U, len);
+
+       PG_RETURN_VOID();
+}
+
+Datum
+koi8u_to_utf8(PG_FUNCTION_ARGS)
+{
+       unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
+       unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
+       int                     len = PG_GETARG_INT32(4);
+
+       CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8);
+
+       LocalToUtf(src, dest, LUmapKOI8U, NULL,
+                        sizeof(LUmapKOI8U) / sizeof(pg_local_to_utf), 0, PG_KOI8U, len);
+
+       PG_RETURN_VOID();
+}
index 0167dff..c441f98 100644 (file)
@@ -2,7 +2,7 @@
  * Encoding names and routines for work with it. All
  * in this file is shared bedween FE and BE.
  *
- * $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.37 2007/11/15 21:14:40 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.38 2009/02/10 19:29:39 petere Exp $
  */
 #ifdef FRONTEND
 #include "postgres_fe.h"
@@ -123,6 +123,9 @@ pg_encname  pg_encname_tbl[] =
                "koi8r", PG_KOI8R
        },                                                      /* KOI8-R; RFC1489 */
        {
+               "koi8u", PG_KOI8U
+       },                                                      /* KOI8-U; RFC2319 */
+       {
                "latin1", PG_LATIN1
        },                                                      /* alias for ISO-8859-1 */
        {
@@ -366,7 +369,7 @@ pg_enc2name pg_enc2name_tbl[] =
                "WIN874", PG_WIN874
        },
        {
-               "KOI8", PG_KOI8R
+               "KOI8R", PG_KOI8R
        },
        {
                "WIN1251", PG_WIN1251
@@ -402,6 +405,9 @@ pg_enc2name pg_enc2name_tbl[] =
                "WIN1257", PG_WIN1257
        },
        {
+               "KOI8U", PG_KOI8U
+       },
+       {
                "SJIS", PG_SJIS
        },
        {
index d14d37d..7b7ebf6 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * conversion functions between pg_wchar and multibyte streams.
  * Tatsuo Ishii
- * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.70 2009/02/10 16:44:44 petere Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.71 2009/02/10 19:29:39 petere Exp $
  *
  */
 /* can be used in either frontend or backend */
@@ -1373,6 +1373,7 @@ pg_wchar_tbl pg_wchar_table[] = {
        {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* PG_WIN1254 */
        {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* PG_WIN1255 */
        {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* PG_WIN1257 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1},           /* PG_KOI8U */
        {0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2},        /* PG_SJIS */
        {0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2},        /* PG_BIG5 */
        {0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2},           /* PG_GBK */
index 65c99a2..12820d4 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.83 2009/01/29 19:23:42 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.84 2009/02/10 19:29:39 petere Exp $
  *
  *     NOTES
  *             This is used both by the backend and by libpq, but should not be
@@ -202,6 +202,7 @@ typedef enum pg_enc
        PG_WIN1254,                                     /* windows-1254 */
        PG_WIN1255,                                     /* windows-1255 */
        PG_WIN1257,                                     /* windows-1257 */
+       PG_KOI8U,                                       /* KOI8-U */
        /* PG_ENCODING_BE_LAST points to the above entry */
 
        /* followings are for client encoding only */
@@ -216,7 +217,7 @@ typedef enum pg_enc
 
 } pg_enc;
 
-#define PG_ENCODING_BE_LAST PG_WIN1257
+#define PG_ENCODING_BE_LAST PG_KOI8U
 
 /*
  * Please use these tests before access to pg_encconv_tbl[]
index 74f440a..864071d 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/port/chklocale.c,v 1.10 2009/01/01 17:24:04 momjian Exp $
+ *       $PostgreSQL: pgsql/src/port/chklocale.c,v 1.11 2009/02/10 19:29:39 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -123,6 +123,9 @@ static const struct encoding_match encoding_match_list[] = {
        {PG_KOI8R, "KOI8-R"},
        {PG_KOI8R, "CP20866"},
 
+       {PG_KOI8U, "KOI8-U"},
+       {PG_KOI8U, "CP21866"},
+
        {PG_WIN866, "CP866"},
        {PG_WIN874, "CP874"},
        {PG_WIN1250, "CP1250"},