From e7fb9f18bfe545070f7b1761b2ff7779c19f331f Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Mon, 14 Mar 2005 18:31:25 +0000 Subject: [PATCH] Add support for Win1252 encoding. Roland Volkmann --- doc/src/sgml/charset.sgml | 25 +++- doc/src/sgml/func.sgml | 14 ++- src/backend/utils/mb/Unicode/utf8_to_win1252.map | 126 ++++++++++++++++++++ src/backend/utils/mb/Unicode/win1252_to_utf8.map | 130 +++++++++++++++++++++ src/backend/utils/mb/conversion_procs/Makefile | 18 +-- .../utf8_and_iso8859/utf8_and_iso8859.c | 4 +- .../utf8_and_win1250/utf8_and_win1250.c | 14 +-- .../mb/conversion_procs/utf8_and_win1252/Makefile | 12 ++ .../utf8_and_win1252/utf8_and_win1252.c | 69 +++++++++++ .../utf8_and_win1256/utf8_and_win1256.c | 14 +-- .../utf8_and_win874/utf8_and_win874.c | 14 +-- src/backend/utils/mb/encnames.c | 11 +- src/backend/utils/mb/wchar.c | 3 +- src/bin/initdb/initdb.c | 3 +- src/include/mb/pg_wchar.h | 5 +- src/test/regress/expected/conversion.out | 26 +++++ src/test/regress/sql/conversion.sql | 6 + 17 files changed, 451 insertions(+), 43 deletions(-) create mode 100644 src/backend/utils/mb/Unicode/utf8_to_win1252.map create mode 100644 src/backend/utils/mb/Unicode/win1252_to_utf8.map create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_win1252/Makefile create mode 100644 src/backend/utils/mb/conversion_procs/utf8_and_win1252/utf8_and_win1252.c diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index 9832f948b4..9bfd78e911 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -1,4 +1,4 @@ - + Localization</> @@ -537,6 +537,13 @@ initdb --locale=sv_SE <row> <entry><literal>WIN1251</literal></entry> <entry>Windows CP1251</entry> + <entry>Western European</entry> + <entry>1</entry> + <entry></entry> + </row> + <row> + <entry><literal>WIN1252</literal></entry> + <entry>Windows CP1252</entry> <entry>Cyrillic</entry> <entry>1</entry> <entry><literal>WIN</></entry> @@ -676,7 +683,7 @@ $ <userinput>psql -l</userinput> <tbody> <row> <entry><literal>BIG5</literal></entry> - <entry><emphasis>not available as a server encoding</emphasis> + <entry><emphasis>not supported as a server encoding</emphasis> </entry> </row> <row> @@ -711,12 +718,12 @@ $ <userinput>psql -l</userinput> </row> <row> <entry><literal>GB18030</literal></entry> - <entry><emphasis>not available as a server encoding</emphasis> + <entry><emphasis>not supported as a server encoding</emphasis> </entry> </row> <row> <entry><literal>GBK</literal></entry> - <entry><emphasis>not available as a server encoding</emphasis> + <entry><emphasis>not supported as a server encoding</emphasis> </entry> </row> <row> @@ -847,7 +854,7 @@ $ <userinput>psql -l</userinput> </row> <row> <entry><literal>SJIS</literal></entry> - <entry><emphasis>not available as a server encoding</emphasis> + <entry><emphasis>not supported as a server encoding</emphasis> </entry> </row> <row> @@ -859,7 +866,7 @@ $ <userinput>psql -l</userinput> </row> <row> <entry><literal>UHC</literal></entry> - <entry><emphasis>not available as a server encoding</emphasis> + <entry><emphasis>not supported as a server encoding</emphasis> </entry> </row> <row> @@ -902,6 +909,12 @@ $ <userinput>psql -l</userinput> </entry> </row> <row> + <entry><literal>WIN1252</literal></entry> + <entry><emphasis>WIN1252</emphasis>, + <literal>UTF8</literal> + </entry> + </row> + <row> <entry><literal>WIN1256</literal></entry> <entry><emphasis>WIN1256</emphasis>, <literal>UTF8</literal> diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 7da2e8ad4a..c96ef83b16 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,5 +1,5 @@ <!-- -$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.240 2005/03/07 04:30:49 momjian Exp $ +$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.241 2005/03/14 18:31:19 momjian Exp $ PostgreSQL documentation --> @@ -1991,6 +1991,12 @@ PostgreSQL documentation </row> <row> + <entry><literal>utf8_to_windows_1252</literal></entry> + <entry><literal>UTF8</literal></entry> + <entry><literal>WIN1252</literal></entry> + </row> + + <row> <entry><literal>utf8_to_windows_1256</literal></entry> <entry><literal>UTF8</literal></entry> <entry><literal>WIN1256</literal></entry> @@ -2057,6 +2063,12 @@ PostgreSQL documentation </row> <row> + <entry><literal>windows_1252_to_utf8</literal></entry> + <entry><literal>WIN1252</literal></entry> + <entry><literal>UTF8</literal></entry> + </row> + + <row> <entry><literal>windows_1256_to_utf8</literal></entry> <entry><literal>WIN1256</literal></entry> <entry><literal>UTF8</literal></entry> diff --git a/src/backend/utils/mb/Unicode/utf8_to_win1252.map b/src/backend/utils/mb/Unicode/utf8_to_win1252.map new file mode 100644 index 0000000000..6dc3c6c07e --- /dev/null +++ b/src/backend/utils/mb/Unicode/utf8_to_win1252.map @@ -0,0 +1,126 @@ +static pg_utf_to_local ULmapWIN1252[ 124 ] = { + {0x0000, 0x0081}, + {0xc2a0, 0x00a0}, + {0xc2a1, 0x00a1}, + {0xc2a2, 0x00a2}, + {0xc2a3, 0x00a3}, + {0xc2a4, 0x00a4}, + {0xc2a5, 0x00a5}, + {0xc2a6, 0x00a6}, + {0xc2a7, 0x00a7}, + {0xc2a8, 0x00a8}, + {0xc2a9, 0x00a9}, + {0xc2aa, 0x00aa}, + {0xc2ab, 0x00ab}, + {0xc2ac, 0x00ac}, + {0xc2ad, 0x00ad}, + {0xc2ae, 0x00ae}, + {0xc2af, 0x00af}, + {0xc2b0, 0x00b0}, + {0xc2b1, 0x00b1}, + {0xc2b2, 0x00b2}, + {0xc2b3, 0x00b3}, + {0xc2b4, 0x00b4}, + {0xc2b5, 0x00b5}, + {0xc2b6, 0x00b6}, + {0xc2b7, 0x00b7}, + {0xc2b8, 0x00b8}, + {0xc2b9, 0x00b9}, + {0xc2ba, 0x00ba}, + {0xc2bb, 0x00bb}, + {0xc2bc, 0x00bc}, + {0xc2bd, 0x00bd}, + {0xc2be, 0x00be}, + {0xc2bf, 0x00bf}, + {0xc380, 0x00c0}, + {0xc381, 0x00c1}, + {0xc382, 0x00c2}, + {0xc383, 0x00c3}, + {0xc384, 0x00c4}, + {0xc385, 0x00c5}, + {0xc386, 0x00c6}, + {0xc387, 0x00c7}, + {0xc388, 0x00c8}, + {0xc389, 0x00c9}, + {0xc38a, 0x00ca}, + {0xc38b, 0x00cb}, + {0xc38c, 0x00cc}, + {0xc38d, 0x00cd}, + {0xc38e, 0x00ce}, + {0xc38f, 0x00cf}, + {0xc390, 0x00d0}, + {0xc391, 0x00d1}, + {0xc392, 0x00d2}, + {0xc393, 0x00d3}, + {0xc394, 0x00d4}, + {0xc395, 0x00d5}, + {0xc396, 0x00d6}, + {0xc397, 0x00d7}, + {0xc398, 0x00d8}, + {0xc399, 0x00d9}, + {0xc39a, 0x00da}, + {0xc39b, 0x00db}, + {0xc39c, 0x00dc}, + {0xc39d, 0x00dd}, + {0xc39e, 0x00de}, + {0xc39f, 0x00df}, + {0xc3a0, 0x00e0}, + {0xc3a1, 0x00e1}, + {0xc3a2, 0x00e2}, + {0xc3a3, 0x00e3}, + {0xc3a4, 0x00e4}, + {0xc3a5, 0x00e5}, + {0xc3a6, 0x00e6}, + {0xc3a7, 0x00e7}, + {0xc3a8, 0x00e8}, + {0xc3a9, 0x00e9}, + {0xc3aa, 0x00ea}, + {0xc3ab, 0x00eb}, + {0xc3ac, 0x00ec}, + {0xc3ad, 0x00ed}, + {0xc3ae, 0x00ee}, + {0xc3af, 0x00ef}, + {0xc3b0, 0x00f0}, + {0xc3b1, 0x00f1}, + {0xc3b2, 0x00f2}, + {0xc3b3, 0x00f3}, + {0xc3b4, 0x00f4}, + {0xc3b5, 0x00f5}, + {0xc3b6, 0x00f6}, + {0xc3b7, 0x00f7}, + {0xc3b8, 0x00f8}, + {0xc3b9, 0x00f9}, + {0xc3ba, 0x00fa}, + {0xc3bb, 0x00fb}, + {0xc3bc, 0x00fc}, + {0xc3bd, 0x00fd}, + {0xc3be, 0x00fe}, + {0xc3bf, 0x00ff}, + {0xc592, 0x008c}, + {0xc593, 0x009c}, + {0xc5a0, 0x008a}, + {0xc5a1, 0x009a}, + {0xc5b8, 0x009f}, + {0xc5bd, 0x008e}, + {0xc5be, 0x009e}, + {0xc692, 0x0083}, + {0xcb86, 0x0088}, + {0xcb9c, 0x0098}, + {0xe28093, 0x0096}, + {0xe28094, 0x0097}, + {0xe28098, 0x0091}, + {0xe28099, 0x0092}, + {0xe2809a, 0x0082}, + {0xe2809c, 0x0093}, + {0xe2809d, 0x0094}, + {0xe2809e, 0x0084}, + {0xe280a0, 0x0086}, + {0xe280a1, 0x0087}, + {0xe280a2, 0x0095}, + {0xe280a6, 0x0085}, + {0xe280b0, 0x0089}, + {0xe280b9, 0x008b}, + {0xe280ba, 0x009b}, + {0xe282ac, 0x0080}, + {0xe284a2, 0x0099} +}; diff --git a/src/backend/utils/mb/Unicode/win1252_to_utf8.map b/src/backend/utils/mb/Unicode/win1252_to_utf8.map new file mode 100644 index 0000000000..636baf859f --- /dev/null +++ b/src/backend/utils/mb/Unicode/win1252_to_utf8.map @@ -0,0 +1,130 @@ +static pg_local_to_utf LUmapWIN1252[ 128 ] = { + {0x0080, 0xe282ac}, + {0x0081, 0x0000}, + {0x0082, 0xe2809a}, + {0x0083, 0xc692}, + {0x0084, 0xe2809e}, + {0x0085, 0xe280a6}, + {0x0086, 0xe280a0}, + {0x0087, 0xe280a1}, + {0x0088, 0xcb86}, + {0x0089, 0xe280b0}, + {0x008a, 0xc5a0}, + {0x008b, 0xe280b9}, + {0x008c, 0xc592}, + {0x008d, 0x0000}, + {0x008e, 0xc5bd}, + {0x008f, 0x0000}, + {0x0090, 0x0000}, + {0x0091, 0xe28098}, + {0x0092, 0xe28099}, + {0x0093, 0xe2809c}, + {0x0094, 0xe2809d}, + {0x0095, 0xe280a2}, + {0x0096, 0xe28093}, + {0x0097, 0xe28094}, + {0x0098, 0xcb9c}, + {0x0099, 0xe284a2}, + {0x009a, 0xc5a1}, + {0x009b, 0xe280ba}, + {0x009c, 0xc593}, + {0x009d, 0x0000}, + {0x009e, 0xc5be}, + {0x009f, 0xc5b8}, + {0x00a0, 0xc2a0}, + {0x00a1, 0xc2a1}, + {0x00a2, 0xc2a2}, + {0x00a3, 0xc2a3}, + {0x00a4, 0xc2a4}, + {0x00a5, 0xc2a5}, + {0x00a6, 0xc2a6}, + {0x00a7, 0xc2a7}, + {0x00a8, 0xc2a8}, + {0x00a9, 0xc2a9}, + {0x00aa, 0xc2aa}, + {0x00ab, 0xc2ab}, + {0x00ac, 0xc2ac}, + {0x00ad, 0xc2ad}, + {0x00ae, 0xc2ae}, + {0x00af, 0xc2af}, + {0x00b0, 0xc2b0}, + {0x00b1, 0xc2b1}, + {0x00b2, 0xc2b2}, + {0x00b3, 0xc2b3}, + {0x00b4, 0xc2b4}, + {0x00b5, 0xc2b5}, + {0x00b6, 0xc2b6}, + {0x00b7, 0xc2b7}, + {0x00b8, 0xc2b8}, + {0x00b9, 0xc2b9}, + {0x00ba, 0xc2ba}, + {0x00bb, 0xc2bb}, + {0x00bc, 0xc2bc}, + {0x00bd, 0xc2bd}, + {0x00be, 0xc2be}, + {0x00bf, 0xc2bf}, + {0x00c0, 0xc380}, + {0x00c1, 0xc381}, + {0x00c2, 0xc382}, + {0x00c3, 0xc383}, + {0x00c4, 0xc384}, + {0x00c5, 0xc385}, + {0x00c6, 0xc386}, + {0x00c7, 0xc387}, + {0x00c8, 0xc388}, + {0x00c9, 0xc389}, + {0x00ca, 0xc38a}, + {0x00cb, 0xc38b}, + {0x00cc, 0xc38c}, + {0x00cd, 0xc38d}, + {0x00ce, 0xc38e}, + {0x00cf, 0xc38f}, + {0x00d0, 0xc390}, + {0x00d1, 0xc391}, + {0x00d2, 0xc392}, + {0x00d3, 0xc393}, + {0x00d4, 0xc394}, + {0x00d5, 0xc395}, + {0x00d6, 0xc396}, + {0x00d7, 0xc397}, + {0x00d8, 0xc398}, + {0x00d9, 0xc399}, + {0x00da, 0xc39a}, + {0x00db, 0xc39b}, + {0x00dc, 0xc39c}, + {0x00dd, 0xc39d}, + {0x00de, 0xc39e}, + {0x00df, 0xc39f}, + {0x00e0, 0xc3a0}, + {0x00e1, 0xc3a1}, + {0x00e2, 0xc3a2}, + {0x00e3, 0xc3a3}, + {0x00e4, 0xc3a4}, + {0x00e5, 0xc3a5}, + {0x00e6, 0xc3a6}, + {0x00e7, 0xc3a7}, + {0x00e8, 0xc3a8}, + {0x00e9, 0xc3a9}, + {0x00ea, 0xc3aa}, + {0x00eb, 0xc3ab}, + {0x00ec, 0xc3ac}, + {0x00ed, 0xc3ad}, + {0x00ee, 0xc3ae}, + {0x00ef, 0xc3af}, + {0x00f0, 0xc3b0}, + {0x00f1, 0xc3b1}, + {0x00f2, 0xc3b2}, + {0x00f3, 0xc3b3}, + {0x00f4, 0xc3b4}, + {0x00f5, 0xc3b5}, + {0x00f6, 0xc3b6}, + {0x00f7, 0xc3b7}, + {0x00f8, 0xc3b8}, + {0x00f9, 0xc3b9}, + {0x00fa, 0xc3ba}, + {0x00fb, 0xc3bb}, + {0x00fc, 0xc3bc}, + {0x00fd, 0xc3bd}, + {0x00fe, 0xc3be}, + {0x00ff, 0xc3bf} +}; diff --git a/src/backend/utils/mb/conversion_procs/Makefile b/src/backend/utils/mb/conversion_procs/Makefile index 8df4a00891..18009da163 100644 --- a/src/backend/utils/mb/conversion_procs/Makefile +++ b/src/backend/utils/mb/conversion_procs/Makefile @@ -4,7 +4,7 @@ # Makefile for utils/mb/conversion_procs # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.12 2005/03/07 04:30:52 momjian Exp $ +# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.13 2005/03/14 18:31:21 momjian Exp $ # #------------------------------------------------------------------------- @@ -24,7 +24,7 @@ DIRS = \ utf8_and_euc_jp utf8_and_euc_kr utf8_and_euc_tw utf8_and_gb18030 \ utf8_and_gbk utf8_and_iso8859 utf8_and_iso8859_1 utf8_and_johab \ utf8_and_sjis utf8_and_win1258 utf8_and_uhc utf8_and_win1250 \ - utf8_and_win1256 utf8_and_win874 + utf8_and_win1252 utf8_and_win1256 utf8_and_win874 # conversion_name source_encoding destination_encoding function object CONVERSIONS = \ @@ -86,6 +86,8 @@ CONVERSIONS = \ koi8_r_to_utf8 KOI8R UTF8 koi8r_to_utf8 utf8_and_cyrillic \ utf8_to_windows_1251 UTF8 WIN1251 utf8_to_win1251 utf8_and_cyrillic \ windows_1251_to_utf8 WIN1251 UTF8 win1251_to_utf8 utf8_and_cyrillic \ + utf8_to_windows_1252 UTF8 WIN1252 utf8_to_win1252 utf8_and_win1252 \ + windows_1252_to_utf8 WIN1252 UTF8 win1252_to_utf8 utf8_and_win1252 \ utf8_to_windows_866 UTF8 WIN866 utf8_to_win866 utf8_and_cyrillic \ windows_866_to_utf8 WIN866 UTF8 win866_to_utf8 utf8_and_cyrillic \ euc_cn_to_utf8 EUC_CN UTF8 euc_cn_to_utf8 utf8_and_euc_cn \ @@ -136,12 +138,12 @@ CONVERSIONS = \ utf8_to_win1258 UTF8 WIN1258 utf8_to_win1258 utf8_and_win1258 \ uhc_to_utf8 UHC UTF8 uhc_to_utf8 utf8_and_uhc \ utf8_to_uhc UTF8 UHC utf8_to_uhc utf8_and_uhc \ - utf8_to_windows_1250 UTF8 WIN1250 utf_to_win1250 utf8_and_win1250 \ - windows_1250_to_utf8 WIN1250 UTF8 win1250_to_utf utf8_and_win1250 \ - utf8_to_windows_1256 UTF8 WIN1256 utf_to_win1256 utf8_and_win1256 \ - windows_1256_to_utf8 WIN1256 UTF8 win1256_to_utf utf8_and_win1256 \ - utf8_to_windows_874 UTF8 WIN874 utf_to_win874 utf8_and_win874 \ - windows_874_to_utf8 WIN874 UTF8 win874_to_utf utf8_and_win874 + utf8_to_windows_1250 UTF8 WIN1250 utf8_to_win1250 utf8_and_win1250 \ + windows_1250_to_utf8 WIN1250 UTF8 win1250_to_utf8 utf8_and_win1250 \ + utf8_to_windows_1256 UTF8 WIN1256 utf8_to_win1256 utf8_and_win1256 \ + windows_1256_to_utf8 WIN1256 UTF8 win1256_to_utf8 utf8_and_win1256 \ + utf8_to_windows_874 UTF8 WIN874 utf8_to_win874 utf8_and_win874 \ + windows_874_to_utf8 WIN874 UTF8 win874_to_utf8 utf8_and_win874 all: $(SQLSCRIPT) @for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c index ab830129e1..bdc0254f25 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c,v 1.12 2005/03/07 04:30:54 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c,v 1.13 2005/03/14 18:31:21 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -108,7 +108,7 @@ static pg_conv_map maps[] = { {PG_WIN1258}, /* Windows-1258 */ {PG_WIN874}, /* windows-874 */ {PG_KOI8R}, /* KOI8-R */ - {PG_WIN1251}, /* windows-1251 (was: WIN) */ + {PG_WIN1251}, /* windows-1251 */ {PG_WIN866}, /* (MS-DOS CP866) */ {PG_ISO_8859_5, LUmapISO8859_5, ULmapISO8859_5, sizeof(LUmapISO8859_5) / sizeof(pg_local_to_utf), diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c b/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c index bdbeaa741d..74154b0cae 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c,v 1.10 2005/03/07 04:30:54 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1250/utf8_and_win1250.c,v 1.11 2005/03/14 18:31:21 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -17,11 +17,11 @@ #include "../../Unicode/utf8_to_win1250.map" #include "../../Unicode/win1250_to_utf8.map" -PG_FUNCTION_INFO_V1(utf_to_win1250); -PG_FUNCTION_INFO_V1(win1250_to_utf); +PG_FUNCTION_INFO_V1(utf8_to_win1250); +PG_FUNCTION_INFO_V1(win1250_to_utf8); -extern Datum utf_to_win1250(PG_FUNCTION_ARGS); -extern Datum win1250_to_utf(PG_FUNCTION_ARGS); +extern Datum utf8_to_win1250(PG_FUNCTION_ARGS); +extern Datum win1250_to_utf8(PG_FUNCTION_ARGS); /* ---------- * conv_proc( @@ -35,7 +35,7 @@ extern Datum win1250_to_utf(PG_FUNCTION_ARGS); */ Datum -utf_to_win1250(PG_FUNCTION_ARGS) +utf8_to_win1250(PG_FUNCTION_ARGS) { unsigned char *src = PG_GETARG_CSTRING(2); unsigned char *dest = PG_GETARG_CSTRING(3); @@ -52,7 +52,7 @@ utf_to_win1250(PG_FUNCTION_ARGS) } Datum -win1250_to_utf(PG_FUNCTION_ARGS) +win1250_to_utf8(PG_FUNCTION_ARGS) { unsigned char *src = PG_GETARG_CSTRING(2); unsigned char *dest = PG_GETARG_CSTRING(3); diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1252/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_win1252/Makefile new file mode 100644 index 0000000000..06c85a390d --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_win1252/Makefile @@ -0,0 +1,12 @@ +#------------------------------------------------------------------------- +# +# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1252/Makefile,v 1.3 2005/03/14 18:31:22 momjian Exp $ +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_win1252 +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME := utf8_and_win1252 + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1252/utf8_and_win1252.c b/src/backend/utils/mb/conversion_procs/utf8_and_win1252/utf8_and_win1252.c new file mode 100644 index 0000000000..de298d747e --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_win1252/utf8_and_win1252.c @@ -0,0 +1,69 @@ +/*------------------------------------------------------------------------- + * + * WIN1252 and UTF8 + * + * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1252/utf8_and_win1252.c,v 1.3 2005/03/14 18:31:22 momjian Exp $ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/utf8_to_win1252.map" +#include "../../Unicode/win1252_to_utf8.map" + +PG_FUNCTION_INFO_V1(utf8_to_win1252); +PG_FUNCTION_INFO_V1(win1252_to_utf8); + +extern Datum utf8_to_win1252(PG_FUNCTION_ARGS); +extern Datum win1252_to_utf8(PG_FUNCTION_ARGS); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER -- source string length + * ) returns VOID; + * ---------- + */ + +Datum +utf8_to_win1252(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_UTF8); + Assert(PG_GETARG_INT32(1) == PG_WIN1252); + Assert(len >= 0); + + UtfToLocal(src, dest, ULmapWIN1252, + sizeof(ULmapWIN1252) / sizeof(pg_utf_to_local), len); + + PG_RETURN_VOID(); +} + +Datum +win1252_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = PG_GETARG_CSTRING(2); + unsigned char *dest = PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + + Assert(PG_GETARG_INT32(0) == PG_WIN1252); + Assert(PG_GETARG_INT32(1) == PG_UTF8); + Assert(len >= 0); + + LocalToUtf(src, dest, LUmapWIN1252, + sizeof(LUmapWIN1252) / sizeof(pg_local_to_utf), PG_WIN1252, len); + + PG_RETURN_VOID(); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c b/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c index 6af8e6fcff..35029b612e 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c,v 1.10 2005/03/07 04:30:54 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win1256/utf8_and_win1256.c,v 1.11 2005/03/14 18:31:22 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -17,11 +17,11 @@ #include "../../Unicode/utf8_to_win1256.map" #include "../../Unicode/win1256_to_utf8.map" -PG_FUNCTION_INFO_V1(utf_to_win1256); -PG_FUNCTION_INFO_V1(win1256_to_utf); +PG_FUNCTION_INFO_V1(utf8_to_win1256); +PG_FUNCTION_INFO_V1(win1256_to_utf8); -extern Datum utf_to_win1256(PG_FUNCTION_ARGS); -extern Datum win1256_to_utf(PG_FUNCTION_ARGS); +extern Datum utf8_to_win1256(PG_FUNCTION_ARGS); +extern Datum win1256_to_utf8(PG_FUNCTION_ARGS); /* ---------- * conv_proc( @@ -35,7 +35,7 @@ extern Datum win1256_to_utf(PG_FUNCTION_ARGS); */ Datum -utf_to_win1256(PG_FUNCTION_ARGS) +utf8_to_win1256(PG_FUNCTION_ARGS) { unsigned char *src = PG_GETARG_CSTRING(2); unsigned char *dest = PG_GETARG_CSTRING(3); @@ -52,7 +52,7 @@ utf_to_win1256(PG_FUNCTION_ARGS) } Datum -win1256_to_utf(PG_FUNCTION_ARGS) +win1256_to_utf8(PG_FUNCTION_ARGS) { unsigned char *src = PG_GETARG_CSTRING(2); unsigned char *dest = PG_GETARG_CSTRING(3); diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c b/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c index 449a5c3323..d128db9fa4 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c,v 1.10 2005/03/07 04:30:55 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_win874/utf8_and_win874.c,v 1.11 2005/03/14 18:31:22 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -17,11 +17,11 @@ #include "../../Unicode/utf8_to_win874.map" #include "../../Unicode/win874_to_utf8.map" -PG_FUNCTION_INFO_V1(utf_to_win874); -PG_FUNCTION_INFO_V1(win874_to_utf); +PG_FUNCTION_INFO_V1(utf8_to_win874); +PG_FUNCTION_INFO_V1(win874_to_utf8); -extern Datum utf_to_win874(PG_FUNCTION_ARGS); -extern Datum win874_to_utf(PG_FUNCTION_ARGS); +extern Datum utf8_to_win874(PG_FUNCTION_ARGS); +extern Datum win874_to_utf8(PG_FUNCTION_ARGS); /* ---------- * conv_proc( @@ -35,7 +35,7 @@ extern Datum win874_to_utf(PG_FUNCTION_ARGS); */ Datum -utf_to_win874(PG_FUNCTION_ARGS) +utf8_to_win874(PG_FUNCTION_ARGS) { unsigned char *src = PG_GETARG_CSTRING(2); unsigned char *dest = PG_GETARG_CSTRING(3); @@ -52,7 +52,7 @@ utf_to_win874(PG_FUNCTION_ARGS) } Datum -win874_to_utf(PG_FUNCTION_ARGS) +win874_to_utf8(PG_FUNCTION_ARGS) { unsigned char *src = PG_GETARG_CSTRING(2); unsigned char *dest = PG_GETARG_CSTRING(3); diff --git a/src/backend/utils/mb/encnames.c b/src/backend/utils/mb/encnames.c index b04866ec76..5c0b15fd74 100644 --- a/src/backend/utils/mb/encnames.c +++ b/src/backend/utils/mb/encnames.c @@ -2,7 +2,7 @@ * Encoding names and routines for work with it. All * in this file is shared bedween FE and BE. * - * $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.24 2005/03/13 01:26:30 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.25 2005/03/14 18:31:20 momjian Exp $ */ #ifdef FRONTEND #include "postgres_fe.h" @@ -194,6 +194,9 @@ pg_encname pg_encname_tbl[] = "win1251", PG_WIN1251 }, /* alias for Windows-1251 */ { + "win1252", PG_WIN1252 + }, /* alias for Windows-1252 */ + { "win1256", PG_WIN1256 }, /* alias for Windows-1256 */ { @@ -224,6 +227,9 @@ pg_encname pg_encname_tbl[] = "windows1251", PG_WIN1251 }, /* Windows-1251; Microsoft */ { + "windows1252", PG_WIN1252 + }, /* Windows-1252; Microsoft */ + { "windows1256", PG_WIN1256 }, /* Windows-1256; Microsoft */ { @@ -335,6 +341,9 @@ pg_enc2name pg_enc2name_tbl[] = "WIN1251", PG_WIN1251 }, { + "WIN1252", PG_WIN1252 + }, + { "ISO_8859_5", PG_ISO_8859_5 }, { diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index d981db1dd4..7ff16809d8 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,7 +1,7 @@ /* * conversion functions between pg_wchar and multibyte streams. * Tatsuo Ishii - * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.42 2005/03/14 00:19:13 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.43 2005/03/14 18:31:20 momjian Exp $ * * WIN1250 client encoding updated by Pavel Behal * @@ -738,6 +738,7 @@ pg_wchar_tbl pg_wchar_table[] = { {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 20; PG_WIN874 */ {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 21; PG_KOI8 */ {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 22; PG_WIN1251 */ + {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 22; PG_WIN1252 */ {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 23; PG_WIN866 */ {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 24; ISO-8859-5 */ {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 25; ISO-8859-6 */ diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index b5203c197c..0a48b08d24 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -39,7 +39,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * Portions taken from FreeBSD. * - * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.78 2005/03/11 15:36:27 momjian Exp $ + * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.79 2005/03/14 18:31:23 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -764,6 +764,7 @@ struct encoding_match encoding_match_list[] = { {PG_LATIN10, "ISO8859-16"}, {PG_LATIN10, "iso885916"}, + {PG_WIN1252, "CP1252"}, {PG_WIN1256, "CP1256"}, {PG_WIN1258, "CP1258"}, #ifdef NOT_VERIFIED diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index d13eda5e76..41ca953a40 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -1,4 +1,4 @@ -/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.57 2005/03/07 04:30:55 momjian Exp $ */ +/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.58 2005/03/14 18:31:24 momjian Exp $ */ #ifndef PG_WCHAR_H #define PG_WCHAR_H @@ -172,7 +172,8 @@ typedef enum pg_enc PG_WIN866, /* (MS-DOS CP866) */ PG_WIN874, /* windows-874 */ PG_KOI8R, /* KOI8-R */ - PG_WIN1251, /* windows-1251 (was: WIN) */ + PG_WIN1251, /* windows-1251 */ + PG_WIN1252, /* windows-1252 */ PG_ISO_8859_5, /* ISO-8859-5 */ PG_ISO_8859_6, /* ISO-8859-6 */ PG_ISO_8859_7, /* ISO-8859-7 */ diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out index ea2984a227..1130d127fa 100644 --- a/src/test/regress/expected/conversion.out +++ b/src/test/regress/expected/conversion.out @@ -784,6 +784,32 @@ SELECT CONVERT('foo', 'WIN1251', 'UTF8'); foo (1 row) +-- UTF8 --> WIN1252 +SELECT CONVERT('foo' USING utf8_to_windows_1252); + convert_using +--------------- + foo +(1 row) + +SELECT CONVERT('foo', 'UTF8', 'WIN1252'); + convert +--------- + foo +(1 row) + +-- WIN1252 --> UTF8 +SELECT CONVERT('foo' USING windows_1252_to_utf8); + convert_using +--------------- + foo +(1 row) + +SELECT CONVERT('foo', 'WIN1252', 'UTF8'); + convert +--------- + foo +(1 row) + -- UTF8 --> WIN866 SELECT CONVERT('foo' USING utf8_to_windows_866); convert_using diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql index 765064bc4f..b172d764ad 100644 --- a/src/test/regress/sql/conversion.sql +++ b/src/test/regress/sql/conversion.sql @@ -201,6 +201,12 @@ SELECT CONVERT('foo', 'UTF8', 'WIN1251'); -- WIN1251 --> UTF8 SELECT CONVERT('foo' USING windows_1251_to_utf8); SELECT CONVERT('foo', 'WIN1251', 'UTF8'); +-- UTF8 --> WIN1252 +SELECT CONVERT('foo' USING utf8_to_windows_1252); +SELECT CONVERT('foo', 'UTF8', 'WIN1252'); +-- WIN1252 --> UTF8 +SELECT CONVERT('foo' USING windows_1252_to_utf8); +SELECT CONVERT('foo', 'WIN1252', 'UTF8'); -- UTF8 --> WIN866 SELECT CONVERT('foo' USING utf8_to_windows_866); SELECT CONVERT('foo', 'UTF8', 'WIN866'); -- 2.11.0