OSDN Git Service

FIRST REPOSITORY
[eos/hostdependOTHERS.git] / I386LINUX / util / I386LINUX / doc / postgresql / html / multibyte.html
1 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
2 <HTML
3 ><HEAD
4 ><TITLE
5 >Character Set Support</TITLE
6 ><META
7 NAME="GENERATOR"
8 CONTENT="Modular DocBook HTML Stylesheet Version 1.7"><LINK
9 REV="MADE"
10 HREF="mailto:pgsql-docs@postgresql.org"><LINK
11 REL="HOME"
12 TITLE="PostgreSQL 7.4.1 Documentation"
13 HREF="index.html"><LINK
14 REL="UP"
15 TITLE="Localization"
16 HREF="charset.html"><LINK
17 REL="PREVIOUS"
18 TITLE="Localization"
19 HREF="charset.html"><LINK
20 REL="NEXT"
21 TITLE="Routine Database Maintenance Tasks"
22 HREF="maintenance.html"><LINK
23 REL="STYLESHEET"
24 TYPE="text/css"
25 HREF="stylesheet.css"><META
26 NAME="creation"
27 CONTENT="2003-12-22T03:48:47"></HEAD
28 ><BODY
29 CLASS="SECT1"
30 ><DIV
31 CLASS="NAVHEADER"
32 ><TABLE
33 SUMMARY="Header navigation table"
34 WIDTH="100%"
35 BORDER="0"
36 CELLPADDING="0"
37 CELLSPACING="0"
38 ><TR
39 ><TH
40 COLSPAN="5"
41 ALIGN="center"
42 VALIGN="bottom"
43 >PostgreSQL 7.4.1 Documentation</TH
44 ></TR
45 ><TR
46 ><TD
47 WIDTH="10%"
48 ALIGN="left"
49 VALIGN="top"
50 ><A
51 HREF="charset.html"
52 ACCESSKEY="P"
53 >Prev</A
54 ></TD
55 ><TD
56 WIDTH="10%"
57 ALIGN="left"
58 VALIGN="top"
59 ><A
60 HREF="charset.html"
61 >Fast Backward</A
62 ></TD
63 ><TD
64 WIDTH="60%"
65 ALIGN="center"
66 VALIGN="bottom"
67 >Chapter 20. Localization</TD
68 ><TD
69 WIDTH="10%"
70 ALIGN="right"
71 VALIGN="top"
72 ><A
73 HREF="charset.html"
74 >Fast Forward</A
75 ></TD
76 ><TD
77 WIDTH="10%"
78 ALIGN="right"
79 VALIGN="top"
80 ><A
81 HREF="maintenance.html"
82 ACCESSKEY="N"
83 >Next</A
84 ></TD
85 ></TR
86 ></TABLE
87 ><HR
88 ALIGN="LEFT"
89 WIDTH="100%"></DIV
90 ><DIV
91 CLASS="SECT1"
92 ><H1
93 CLASS="SECT1"
94 ><A
95 NAME="MULTIBYTE"
96 >20.2. Character Set Support</A
97 ></H1
98 ><A
99 NAME="AEN18176"
100 ></A
101 ><P
102 >   The character set support in <SPAN
103 CLASS="PRODUCTNAME"
104 >PostgreSQL</SPAN
105 >
106    allows you to store text in a variety of character sets, including
107    single-byte character sets such as the ISO 8859 series and
108    multiple-byte character sets such as <ACRONYM
109 CLASS="ACRONYM"
110 >EUC</ACRONYM
111 > (Extended Unix
112    Code), Unicode, and Mule internal code.  All character sets can be
113    used transparently throughout the server.  (If you use extension
114    functions from other sources, it depends on whether they wrote
115    their code correctly.)  The default character set is selected while
116    initializing your <SPAN
117 CLASS="PRODUCTNAME"
118 >PostgreSQL</SPAN
119 > database
120    cluster using <TT
121 CLASS="COMMAND"
122 >initdb</TT
123 >.  It can be overridden when you
124    create a database using <TT
125 CLASS="COMMAND"
126 >createdb</TT
127 > or by using the
128    SQL command <TT
129 CLASS="COMMAND"
130 >CREATE DATABASE</TT
131 >. So you can have multiple
132    databases each with a different character set.
133   </P
134 ><DIV
135 CLASS="SECT2"
136 ><H2
137 CLASS="SECT2"
138 ><A
139 NAME="AEN18185"
140 >20.2.1. Supported Character Sets</A
141 ></H2
142 ><P
143 >     <A
144 HREF="multibyte.html#CHARSET-TABLE"
145 >Table 20-1</A
146 > shows the character sets available
147      for use in the server.
148     </P
149 ><DIV
150 CLASS="TABLE"
151 ><A
152 NAME="CHARSET-TABLE"
153 ></A
154 ><P
155 ><B
156 >Table 20-1. Server Character Sets</B
157 ></P
158 ><TABLE
159 BORDER="1"
160 CLASS="CALSTABLE"
161 ><COL><COL><THEAD
162 ><TR
163 ><TH
164 >Name</TH
165 ><TH
166 >Description</TH
167 ></TR
168 ></THEAD
169 ><TBODY
170 ><TR
171 ><TD
172 ><TT
173 CLASS="LITERAL"
174 >SQL_ASCII</TT
175 ></TD
176 ><TD
177 ><ACRONYM
178 CLASS="ACRONYM"
179 >ASCII</ACRONYM
180 ></TD
181 ></TR
182 ><TR
183 ><TD
184 ><TT
185 CLASS="LITERAL"
186 >EUC_JP</TT
187 ></TD
188 ><TD
189 >Japanese <ACRONYM
190 CLASS="ACRONYM"
191 >EUC</ACRONYM
192 ></TD
193 ></TR
194 ><TR
195 ><TD
196 ><TT
197 CLASS="LITERAL"
198 >EUC_CN</TT
199 ></TD
200 ><TD
201 >Chinese <ACRONYM
202 CLASS="ACRONYM"
203 >EUC</ACRONYM
204 ></TD
205 ></TR
206 ><TR
207 ><TD
208 ><TT
209 CLASS="LITERAL"
210 >EUC_KR</TT
211 ></TD
212 ><TD
213 >Korean <ACRONYM
214 CLASS="ACRONYM"
215 >EUC</ACRONYM
216 ></TD
217 ></TR
218 ><TR
219 ><TD
220 ><TT
221 CLASS="LITERAL"
222 >JOHAB</TT
223 ></TD
224 ><TD
225 >Korean <ACRONYM
226 CLASS="ACRONYM"
227 >EUC</ACRONYM
228 > (Hangle base)</TD
229 ></TR
230 ><TR
231 ><TD
232 ><TT
233 CLASS="LITERAL"
234 >EUC_TW</TT
235 ></TD
236 ><TD
237 >Taiwan <ACRONYM
238 CLASS="ACRONYM"
239 >EUC</ACRONYM
240 ></TD
241 ></TR
242 ><TR
243 ><TD
244 ><TT
245 CLASS="LITERAL"
246 >UNICODE</TT
247 ></TD
248 ><TD
249 >Unicode (<ACRONYM
250 CLASS="ACRONYM"
251 >UTF</ACRONYM
252 >-8)</TD
253 ></TR
254 ><TR
255 ><TD
256 ><TT
257 CLASS="LITERAL"
258 >MULE_INTERNAL</TT
259 ></TD
260 ><TD
261 >Mule internal code</TD
262 ></TR
263 ><TR
264 ><TD
265 ><TT
266 CLASS="LITERAL"
267 >LATIN1</TT
268 ></TD
269 ><TD
270 >ISO 8859-1/<ACRONYM
271 CLASS="ACRONYM"
272 >ECMA</ACRONYM
273 > 94 (Latin alphabet no.1)</TD
274 ></TR
275 ><TR
276 ><TD
277 ><TT
278 CLASS="LITERAL"
279 >LATIN2</TT
280 ></TD
281 ><TD
282 >ISO 8859-2/<ACRONYM
283 CLASS="ACRONYM"
284 >ECMA</ACRONYM
285 > 94 (Latin alphabet no.2)</TD
286 ></TR
287 ><TR
288 ><TD
289 ><TT
290 CLASS="LITERAL"
291 >LATIN3</TT
292 ></TD
293 ><TD
294 >ISO 8859-3/<ACRONYM
295 CLASS="ACRONYM"
296 >ECMA</ACRONYM
297 > 94 (Latin alphabet no.3)</TD
298 ></TR
299 ><TR
300 ><TD
301 ><TT
302 CLASS="LITERAL"
303 >LATIN4</TT
304 ></TD
305 ><TD
306 >ISO 8859-4/<ACRONYM
307 CLASS="ACRONYM"
308 >ECMA</ACRONYM
309 > 94 (Latin alphabet no.4)</TD
310 ></TR
311 ><TR
312 ><TD
313 ><TT
314 CLASS="LITERAL"
315 >LATIN5</TT
316 ></TD
317 ><TD
318 >ISO 8859-9/<ACRONYM
319 CLASS="ACRONYM"
320 >ECMA</ACRONYM
321 > 128 (Latin alphabet no.5)</TD
322 ></TR
323 ><TR
324 ><TD
325 ><TT
326 CLASS="LITERAL"
327 >LATIN6</TT
328 ></TD
329 ><TD
330 >ISO 8859-10/<ACRONYM
331 CLASS="ACRONYM"
332 >ECMA</ACRONYM
333 > 144 (Latin alphabet no.6)</TD
334 ></TR
335 ><TR
336 ><TD
337 ><TT
338 CLASS="LITERAL"
339 >LATIN7</TT
340 ></TD
341 ><TD
342 >ISO 8859-13 (Latin alphabet no.7)</TD
343 ></TR
344 ><TR
345 ><TD
346 ><TT
347 CLASS="LITERAL"
348 >LATIN8</TT
349 ></TD
350 ><TD
351 >ISO 8859-14 (Latin alphabet no.8)</TD
352 ></TR
353 ><TR
354 ><TD
355 ><TT
356 CLASS="LITERAL"
357 >LATIN9</TT
358 ></TD
359 ><TD
360 >ISO 8859-15 (Latin alphabet no.9)</TD
361 ></TR
362 ><TR
363 ><TD
364 ><TT
365 CLASS="LITERAL"
366 >LATIN10</TT
367 ></TD
368 ><TD
369 >ISO 8859-16/<ACRONYM
370 CLASS="ACRONYM"
371 >ASRO</ACRONYM
372 > SR 14111 (Latin alphabet no.10)</TD
373 ></TR
374 ><TR
375 ><TD
376 ><TT
377 CLASS="LITERAL"
378 >ISO_8859_5</TT
379 ></TD
380 ><TD
381 >ISO 8859-5/<ACRONYM
382 CLASS="ACRONYM"
383 >ECMA</ACRONYM
384 > 113 (Latin/Cyrillic)</TD
385 ></TR
386 ><TR
387 ><TD
388 ><TT
389 CLASS="LITERAL"
390 >ISO_8859_6</TT
391 ></TD
392 ><TD
393 >ISO 8859-6/<ACRONYM
394 CLASS="ACRONYM"
395 >ECMA</ACRONYM
396 > 114 (Latin/Arabic)</TD
397 ></TR
398 ><TR
399 ><TD
400 ><TT
401 CLASS="LITERAL"
402 >ISO_8859_7</TT
403 ></TD
404 ><TD
405 >ISO 8859-7/<ACRONYM
406 CLASS="ACRONYM"
407 >ECMA</ACRONYM
408 > 118 (Latin/Greek)</TD
409 ></TR
410 ><TR
411 ><TD
412 ><TT
413 CLASS="LITERAL"
414 >ISO_8859_8</TT
415 ></TD
416 ><TD
417 >ISO 8859-8/<ACRONYM
418 CLASS="ACRONYM"
419 >ECMA</ACRONYM
420 > 121 (Latin/Hebrew)</TD
421 ></TR
422 ><TR
423 ><TD
424 ><TT
425 CLASS="LITERAL"
426 >KOI8</TT
427 ></TD
428 ><TD
429 ><ACRONYM
430 CLASS="ACRONYM"
431 >KOI</ACRONYM
432 >8-R(U)</TD
433 ></TR
434 ><TR
435 ><TD
436 ><TT
437 CLASS="LITERAL"
438 >WIN</TT
439 ></TD
440 ><TD
441 >Windows CP1251</TD
442 ></TR
443 ><TR
444 ><TD
445 ><TT
446 CLASS="LITERAL"
447 >ALT</TT
448 ></TD
449 ><TD
450 >Windows CP866</TD
451 ></TR
452 ><TR
453 ><TD
454 ><TT
455 CLASS="LITERAL"
456 >WIN1256</TT
457 ></TD
458 ><TD
459 >Windows CP1256 (Arabic)</TD
460 ></TR
461 ><TR
462 ><TD
463 ><TT
464 CLASS="LITERAL"
465 >TCVN</TT
466 ></TD
467 ><TD
468 ><ACRONYM
469 CLASS="ACRONYM"
470 >TCVN</ACRONYM
471 >-5712/Windows CP1258 (Vietnamese)</TD
472 ></TR
473 ><TR
474 ><TD
475 ><TT
476 CLASS="LITERAL"
477 >WIN874</TT
478 ></TD
479 ><TD
480 >Windows CP874 (Thai)</TD
481 ></TR
482 ></TBODY
483 ></TABLE
484 ></DIV
485 ><DIV
486 CLASS="IMPORTANT"
487 ><BLOCKQUOTE
488 CLASS="IMPORTANT"
489 ><P
490 ><B
491 >Important: </B
492 >      Before <SPAN
493 CLASS="PRODUCTNAME"
494 >PostgreSQL</SPAN
495 > 7.2, <TT
496 CLASS="LITERAL"
497 >LATIN5</TT
498 >
499       mistakenly meant ISO 8859-5.  From 7.2 on, <TT
500 CLASS="LITERAL"
501 >LATIN5</TT
502 >
503       means ISO 8859-9. If you have a <TT
504 CLASS="LITERAL"
505 >LATIN5</TT
506 > database
507       created on 7.1 or earlier and want to migrate to 7.2 or later,
508       you should be careful about this change.
509      </P
510 ></BLOCKQUOTE
511 ></DIV
512 ><P
513 >      Not all <ACRONYM
514 CLASS="ACRONYM"
515 >API</ACRONYM
516 >s support all the listed character sets. For example, the
517       <SPAN
518 CLASS="PRODUCTNAME"
519 >PostgreSQL</SPAN
520 >
521       JDBC driver does not support <TT
522 CLASS="LITERAL"
523 >MULE_INTERNAL</TT
524 >, <TT
525 CLASS="LITERAL"
526 >LATIN6</TT
527 >,
528       <TT
529 CLASS="LITERAL"
530 >LATIN8</TT
531 >, and <TT
532 CLASS="LITERAL"
533 >LATIN10</TT
534 >.
535      </P
536 ></DIV
537 ><DIV
538 CLASS="SECT2"
539 ><H2
540 CLASS="SECT2"
541 ><A
542 NAME="AEN18342"
543 >20.2.2. Setting the Character Set</A
544 ></H2
545 ><P
546 >     <TT
547 CLASS="COMMAND"
548 >initdb</TT
549 > defines the default character set
550      for a <SPAN
551 CLASS="PRODUCTNAME"
552 >PostgreSQL</SPAN
553 > cluster. For example,
554
555 </P><PRE
556 CLASS="SCREEN"
557 >initdb -E EUC_JP</PRE
558 ><P>
559
560      sets the default character set (encoding) to
561      <TT
562 CLASS="LITERAL"
563 >EUC_JP</TT
564 > (Extended Unix Code for Japanese).  You
565      can use <VAR
566 CLASS="OPTION"
567 >--encoding</VAR
568 > instead of
569      <VAR
570 CLASS="OPTION"
571 >-E</VAR
572 > if you prefer to type longer option strings.
573      If no <VAR
574 CLASS="OPTION"
575 >-E</VAR
576 > or <VAR
577 CLASS="OPTION"
578 >--encoding</VAR
579 > option is
580      given, <TT
581 CLASS="LITERAL"
582 >SQL_ASCII</TT
583 > is used.
584     </P
585 ><P
586 >     You can create a database with a different character set:
587
588 </P><PRE
589 CLASS="SCREEN"
590 >createdb -E EUC_KR korean</PRE
591 ><P>
592
593      This will create a database named <TT
594 CLASS="LITERAL"
595 >korean</TT
596 > that
597      uses the character set <TT
598 CLASS="LITERAL"
599 >EUC_KR</TT
600 >.  Another way to
601      accomplish this is to use this SQL command:
602
603 </P><PRE
604 CLASS="PROGRAMLISTING"
605 >CREATE DATABASE korean WITH ENCODING 'EUC_KR';</PRE
606 ><P>
607
608      The encoding for a database is stored in the system catalog
609      <TT
610 CLASS="LITERAL"
611 >pg_database</TT
612 >.  You can see that by using the
613      <VAR
614 CLASS="OPTION"
615 >-l</VAR
616 > option or the <TT
617 CLASS="COMMAND"
618 >\l</TT
619 > command
620      of <TT
621 CLASS="COMMAND"
622 >psql</TT
623 >.
624
625 </P><PRE
626 CLASS="SCREEN"
627 >$ <KBD
628 CLASS="USERINPUT"
629 >psql -l</KBD
630 >
631             List of databases
632    Database    |  Owner  |   Encoding    
633 ---------------+---------+---------------
634  euc_cn        | t-ishii | EUC_CN
635  euc_jp        | t-ishii | EUC_JP
636  euc_kr        | t-ishii | EUC_KR
637  euc_tw        | t-ishii | EUC_TW
638  mule_internal | t-ishii | MULE_INTERNAL
639  regression    | t-ishii | SQL_ASCII
640  template1     | t-ishii | EUC_JP
641  test          | t-ishii | EUC_JP
642  unicode       | t-ishii | UNICODE
643 (9 rows)</PRE
644 ><P>
645     </P
646 ></DIV
647 ><DIV
648 CLASS="SECT2"
649 ><H2
650 CLASS="SECT2"
651 ><A
652 NAME="AEN18365"
653 >20.2.3. Automatic Character Set Conversion Between Server and Client</A
654 ></H2
655 ><P
656 >     <SPAN
657 CLASS="PRODUCTNAME"
658 >PostgreSQL</SPAN
659 > supports automatic
660      character set conversion between server and client for certain
661      character sets. The conversion information is stored in the
662      <TT
663 CLASS="LITERAL"
664 >pg_conversion</TT
665 > system catalog. You can create a new
666      conversion by using the SQL command <TT
667 CLASS="COMMAND"
668 >CREATE
669      CONVERSION</TT
670 >. <SPAN
671 CLASS="PRODUCTNAME"
672 >PostgreSQL</SPAN
673 > comes with some
674      predefined conversions. They are listed in <A
675 HREF="multibyte.html#MULTIBYTE-TRANSLATION-TABLE"
676 >Table 20-2</A
677 >.
678     </P
679 ><DIV
680 CLASS="TABLE"
681 ><A
682 NAME="MULTIBYTE-TRANSLATION-TABLE"
683 ></A
684 ><P
685 ><B
686 >Table 20-2. Client/Server Character Set Conversions</B
687 ></P
688 ><TABLE
689 BORDER="1"
690 CLASS="CALSTABLE"
691 ><COL><COL><THEAD
692 ><TR
693 ><TH
694 >Server Character Set</TH
695 ><TH
696 >Available Client Character Sets</TH
697 ></TR
698 ></THEAD
699 ><TBODY
700 ><TR
701 ><TD
702 ><TT
703 CLASS="LITERAL"
704 >SQL_ASCII</TT
705 ></TD
706 ><TD
707 ><TT
708 CLASS="LITERAL"
709 >SQL_ASCII</TT
710 >, <TT
711 CLASS="LITERAL"
712 >UNICODE</TT
713 >, <TT
714 CLASS="LITERAL"
715 >MULE_INTERNAL</TT
716 >
717          </TD
718 ></TR
719 ><TR
720 ><TD
721 ><TT
722 CLASS="LITERAL"
723 >EUC_JP</TT
724 ></TD
725 ><TD
726 ><TT
727 CLASS="LITERAL"
728 >EUC_JP</TT
729 >, <TT
730 CLASS="LITERAL"
731 >SJIS</TT
732 >,
733          <TT
734 CLASS="LITERAL"
735 >UNICODE</TT
736 >, <TT
737 CLASS="LITERAL"
738 >MULE_INTERNAL</TT
739 >
740          </TD
741 ></TR
742 ><TR
743 ><TD
744 ><TT
745 CLASS="LITERAL"
746 >EUC_CN</TT
747 ></TD
748 ><TD
749 ><TT
750 CLASS="LITERAL"
751 >EUC_CN</TT
752 >, <TT
753 CLASS="LITERAL"
754 >UNICODE</TT
755 >, <TT
756 CLASS="LITERAL"
757 >MULE_INTERNAL</TT
758 >
759          </TD
760 ></TR
761 ><TR
762 ><TD
763 ><TT
764 CLASS="LITERAL"
765 >EUC_KR</TT
766 ></TD
767 ><TD
768 ><TT
769 CLASS="LITERAL"
770 >EUC_KR</TT
771 >, <TT
772 CLASS="LITERAL"
773 >UNICODE</TT
774 >, <TT
775 CLASS="LITERAL"
776 >MULE_INTERNAL</TT
777 >
778          </TD
779 ></TR
780 ><TR
781 ><TD
782 ><TT
783 CLASS="LITERAL"
784 >JOHAB</TT
785 ></TD
786 ><TD
787 ><TT
788 CLASS="LITERAL"
789 >JOHAB</TT
790 >, <TT
791 CLASS="LITERAL"
792 >UNICODE</TT
793 >
794          </TD
795 ></TR
796 ><TR
797 ><TD
798 ><TT
799 CLASS="LITERAL"
800 >EUC_TW</TT
801 ></TD
802 ><TD
803 ><TT
804 CLASS="LITERAL"
805 >EUC_TW</TT
806 >, <TT
807 CLASS="LITERAL"
808 >BIG5</TT
809 >,
810          <TT
811 CLASS="LITERAL"
812 >UNICODE</TT
813 >, <TT
814 CLASS="LITERAL"
815 >MULE_INTERNAL</TT
816 >
817          </TD
818 ></TR
819 ><TR
820 ><TD
821 ><TT
822 CLASS="LITERAL"
823 >LATIN1</TT
824 ></TD
825 ><TD
826 ><TT
827 CLASS="LITERAL"
828 >LATIN1</TT
829 >, <TT
830 CLASS="LITERAL"
831 >UNICODE</TT
832 >
833          <TT
834 CLASS="LITERAL"
835 >MULE_INTERNAL</TT
836 >
837          </TD
838 ></TR
839 ><TR
840 ><TD
841 ><TT
842 CLASS="LITERAL"
843 >LATIN2</TT
844 ></TD
845 ><TD
846 ><TT
847 CLASS="LITERAL"
848 >LATIN2</TT
849 >, <TT
850 CLASS="LITERAL"
851 >WIN1250</TT
852 >,
853          <TT
854 CLASS="LITERAL"
855 >UNICODE</TT
856 >,
857          <TT
858 CLASS="LITERAL"
859 >MULE_INTERNAL</TT
860 >
861          </TD
862 ></TR
863 ><TR
864 ><TD
865 ><TT
866 CLASS="LITERAL"
867 >LATIN3</TT
868 ></TD
869 ><TD
870 ><TT
871 CLASS="LITERAL"
872 >LATIN3</TT
873 >, <TT
874 CLASS="LITERAL"
875 >UNICODE</TT
876 >,
877          <TT
878 CLASS="LITERAL"
879 >MULE_INTERNAL</TT
880 >
881          </TD
882 ></TR
883 ><TR
884 ><TD
885 ><TT
886 CLASS="LITERAL"
887 >LATIN4</TT
888 ></TD
889 ><TD
890 ><TT
891 CLASS="LITERAL"
892 >LATIN4</TT
893 >, <TT
894 CLASS="LITERAL"
895 >UNICODE</TT
896 >,
897          <TT
898 CLASS="LITERAL"
899 >MULE_INTERNAL</TT
900 >
901          </TD
902 ></TR
903 ><TR
904 ><TD
905 ><TT
906 CLASS="LITERAL"
907 >LATIN5</TT
908 ></TD
909 ><TD
910 ><TT
911 CLASS="LITERAL"
912 >LATIN5</TT
913 >, <TT
914 CLASS="LITERAL"
915 >UNICODE</TT
916 >
917          </TD
918 ></TR
919 ><TR
920 ><TD
921 ><TT
922 CLASS="LITERAL"
923 >LATIN6</TT
924 ></TD
925 ><TD
926 ><TT
927 CLASS="LITERAL"
928 >LATIN6</TT
929 >, <TT
930 CLASS="LITERAL"
931 >UNICODE</TT
932 >,
933          <TT
934 CLASS="LITERAL"
935 >MULE_INTERNAL</TT
936 >
937          </TD
938 ></TR
939 ><TR
940 ><TD
941 ><TT
942 CLASS="LITERAL"
943 >LATIN7</TT
944 ></TD
945 ><TD
946 ><TT
947 CLASS="LITERAL"
948 >LATIN7</TT
949 >, <TT
950 CLASS="LITERAL"
951 >UNICODE</TT
952 >,
953          <TT
954 CLASS="LITERAL"
955 >MULE_INTERNAL</TT
956 >
957          </TD
958 ></TR
959 ><TR
960 ><TD
961 ><TT
962 CLASS="LITERAL"
963 >LATIN8</TT
964 ></TD
965 ><TD
966 ><TT
967 CLASS="LITERAL"
968 >LATIN8</TT
969 >, <TT
970 CLASS="LITERAL"
971 >UNICODE</TT
972 >,
973          <TT
974 CLASS="LITERAL"
975 >MULE_INTERNAL</TT
976 >
977          </TD
978 ></TR
979 ><TR
980 ><TD
981 ><TT
982 CLASS="LITERAL"
983 >LATIN9</TT
984 ></TD
985 ><TD
986 ><TT
987 CLASS="LITERAL"
988 >LATIN9</TT
989 >, <TT
990 CLASS="LITERAL"
991 >UNICODE</TT
992 >,
993          <TT
994 CLASS="LITERAL"
995 >MULE_INTERNAL</TT
996 >
997          </TD
998 ></TR
999 ><TR
1000 ><TD
1001 ><TT
1002 CLASS="LITERAL"
1003 >LATIN10</TT
1004 ></TD
1005 ><TD
1006 ><TT
1007 CLASS="LITERAL"
1008 >LATIN10</TT
1009 >, <TT
1010 CLASS="LITERAL"
1011 >UNICODE</TT
1012 >,
1013          <TT
1014 CLASS="LITERAL"
1015 >MULE_INTERNAL</TT
1016 >
1017          </TD
1018 ></TR
1019 ><TR
1020 ><TD
1021 ><TT
1022 CLASS="LITERAL"
1023 >ISO_8859_5</TT
1024 ></TD
1025 ><TD
1026 ><TT
1027 CLASS="LITERAL"
1028 >ISO_8859_5</TT
1029 >,
1030          <TT
1031 CLASS="LITERAL"
1032 >UNICODE</TT
1033 >,
1034          <TT
1035 CLASS="LITERAL"
1036 >MULE_INTERNAL</TT
1037 >,
1038          <TT
1039 CLASS="LITERAL"
1040 >WIN</TT
1041 >,
1042          <TT
1043 CLASS="LITERAL"
1044 >ALT</TT
1045 >,
1046          <TT
1047 CLASS="LITERAL"
1048 >KOI8</TT
1049 >
1050          </TD
1051 ></TR
1052 ><TR
1053 ><TD
1054 ><TT
1055 CLASS="LITERAL"
1056 >ISO_8859_6</TT
1057 ></TD
1058 ><TD
1059 ><TT
1060 CLASS="LITERAL"
1061 >ISO_8859_6</TT
1062 >,
1063          <TT
1064 CLASS="LITERAL"
1065 >UNICODE</TT
1066 >
1067          </TD
1068 ></TR
1069 ><TR
1070 ><TD
1071 ><TT
1072 CLASS="LITERAL"
1073 >ISO_8859_7</TT
1074 ></TD
1075 ><TD
1076 ><TT
1077 CLASS="LITERAL"
1078 >ISO_8859_7</TT
1079 >,
1080          <TT
1081 CLASS="LITERAL"
1082 >UNICODE</TT
1083 >
1084          </TD
1085 ></TR
1086 ><TR
1087 ><TD
1088 ><TT
1089 CLASS="LITERAL"
1090 >ISO_8859_8</TT
1091 ></TD
1092 ><TD
1093 ><TT
1094 CLASS="LITERAL"
1095 >ISO_8859_8</TT
1096 >,
1097          <TT
1098 CLASS="LITERAL"
1099 >UNICODE</TT
1100 >
1101          </TD
1102 ></TR
1103 ><TR
1104 ><TD
1105 ><TT
1106 CLASS="LITERAL"
1107 >UNICODE</TT
1108 ></TD
1109 ><TD
1110 >        <TT
1111 CLASS="LITERAL"
1112 >EUC_JP</TT
1113 >, <TT
1114 CLASS="LITERAL"
1115 >SJIS</TT
1116 >, 
1117          <TT
1118 CLASS="LITERAL"
1119 >EUC_KR</TT
1120 >, <TT
1121 CLASS="LITERAL"
1122 >UHC</TT
1123 >, <TT
1124 CLASS="LITERAL"
1125 >JOHAB</TT
1126 >,
1127          <TT
1128 CLASS="LITERAL"
1129 >EUC_CN</TT
1130 >, <TT
1131 CLASS="LITERAL"
1132 >GBK</TT
1133 >,
1134          <TT
1135 CLASS="LITERAL"
1136 >EUC_TW</TT
1137 >, <TT
1138 CLASS="LITERAL"
1139 >BIG5</TT
1140 >, 
1141          <TT
1142 CLASS="LITERAL"
1143 >LATIN1</TT
1144 > to <TT
1145 CLASS="LITERAL"
1146 >LATIN10</TT
1147 >, 
1148          <TT
1149 CLASS="LITERAL"
1150 >ISO_8859_5</TT
1151 >, 
1152          <TT
1153 CLASS="LITERAL"
1154 >ISO_8859_6</TT
1155 >,
1156          <TT
1157 CLASS="LITERAL"
1158 >ISO_8859_7</TT
1159 >, 
1160          <TT
1161 CLASS="LITERAL"
1162 >ISO_8859_8</TT
1163 >, 
1164          <TT
1165 CLASS="LITERAL"
1166 >WIN</TT
1167 >, <TT
1168 CLASS="LITERAL"
1169 >ALT</TT
1170 >, 
1171          <TT
1172 CLASS="LITERAL"
1173 >KOI8</TT
1174 >, 
1175          <TT
1176 CLASS="LITERAL"
1177 >WIN1256</TT
1178 >,
1179          <TT
1180 CLASS="LITERAL"
1181 >TCVN</TT
1182 >,
1183          <TT
1184 CLASS="LITERAL"
1185 >WIN874</TT
1186 >,
1187          <TT
1188 CLASS="LITERAL"
1189 >GB18030</TT
1190 >,
1191          <TT
1192 CLASS="LITERAL"
1193 >WIN1250</TT
1194 >
1195          </TD
1196 ></TR
1197 ><TR
1198 ><TD
1199 ><TT
1200 CLASS="LITERAL"
1201 >MULE_INTERNAL</TT
1202 ></TD
1203 ><TD
1204 ><TT
1205 CLASS="LITERAL"
1206 >EUC_JP</TT
1207 >, <TT
1208 CLASS="LITERAL"
1209 >SJIS</TT
1210 >, <TT
1211 CLASS="LITERAL"
1212 >EUC_KR</TT
1213 >, <TT
1214 CLASS="LITERAL"
1215 >EUC_CN</TT
1216 >, 
1217           <TT
1218 CLASS="LITERAL"
1219 >EUC_TW</TT
1220 >, <TT
1221 CLASS="LITERAL"
1222 >BIG5</TT
1223 >, <TT
1224 CLASS="LITERAL"
1225 >LATIN1</TT
1226 > to <TT
1227 CLASS="LITERAL"
1228 >LATIN5</TT
1229 >, 
1230           <TT
1231 CLASS="LITERAL"
1232 >WIN</TT
1233 >, <TT
1234 CLASS="LITERAL"
1235 >ALT</TT
1236 >,
1237          <TT
1238 CLASS="LITERAL"
1239 >WIN1250</TT
1240 >,
1241           <TT
1242 CLASS="LITERAL"
1243 >BIG5</TT
1244 >, <TT
1245 CLASS="LITERAL"
1246 >ISO_8859_5</TT
1247 >, <TT
1248 CLASS="LITERAL"
1249 >KOI8</TT
1250 ></TD
1251 ></TR
1252 ><TR
1253 ><TD
1254 ><TT
1255 CLASS="LITERAL"
1256 >KOI8</TT
1257 ></TD
1258 ><TD
1259 ><TT
1260 CLASS="LITERAL"
1261 >ISO_8859_5</TT
1262 >, <TT
1263 CLASS="LITERAL"
1264 >WIN</TT
1265 >, 
1266          <TT
1267 CLASS="LITERAL"
1268 >ALT</TT
1269 >, <TT
1270 CLASS="LITERAL"
1271 >KOI8</TT
1272 >,
1273          <TT
1274 CLASS="LITERAL"
1275 >UNICODE</TT
1276 >, <TT
1277 CLASS="LITERAL"
1278 >MULE_INTERNAL</TT
1279 >
1280          </TD
1281 ></TR
1282 ><TR
1283 ><TD
1284 ><TT
1285 CLASS="LITERAL"
1286 >WIN</TT
1287 ></TD
1288 ><TD
1289 ><TT
1290 CLASS="LITERAL"
1291 >ISO_8859_5</TT
1292 >, <TT
1293 CLASS="LITERAL"
1294 >WIN</TT
1295 >, 
1296          <TT
1297 CLASS="LITERAL"
1298 >ALT</TT
1299 >, <TT
1300 CLASS="LITERAL"
1301 >KOI8</TT
1302 >,
1303          <TT
1304 CLASS="LITERAL"
1305 >UNICODE</TT
1306 >, <TT
1307 CLASS="LITERAL"
1308 >MULE_INTERNAL</TT
1309 >
1310          </TD
1311 ></TR
1312 ><TR
1313 ><TD
1314 ><TT
1315 CLASS="LITERAL"
1316 >ALT</TT
1317 ></TD
1318 ><TD
1319 ><TT
1320 CLASS="LITERAL"
1321 >ISO_8859_5</TT
1322 >, <TT
1323 CLASS="LITERAL"
1324 >WIN</TT
1325 >, 
1326          <TT
1327 CLASS="LITERAL"
1328 >ALT</TT
1329 >, <TT
1330 CLASS="LITERAL"
1331 >KOI8</TT
1332 >,
1333          <TT
1334 CLASS="LITERAL"
1335 >UNICODE</TT
1336 >, <TT
1337 CLASS="LITERAL"
1338 >MULE_INTERNAL</TT
1339 >
1340          </TD
1341 ></TR
1342 ><TR
1343 ><TD
1344 ><TT
1345 CLASS="LITERAL"
1346 >WIN1256</TT
1347 ></TD
1348 ><TD
1349 ><TT
1350 CLASS="LITERAL"
1351 >WIN1256</TT
1352 >,
1353          <TT
1354 CLASS="LITERAL"
1355 >UNICODE</TT
1356 >
1357          </TD
1358 ></TR
1359 ><TR
1360 ><TD
1361 ><TT
1362 CLASS="LITERAL"
1363 >TCVN</TT
1364 ></TD
1365 ><TD
1366 ><TT
1367 CLASS="LITERAL"
1368 >TCVN</TT
1369 >,
1370          <TT
1371 CLASS="LITERAL"
1372 >UNICODE</TT
1373 >
1374          </TD
1375 ></TR
1376 ><TR
1377 ><TD
1378 ><TT
1379 CLASS="LITERAL"
1380 >WIN874</TT
1381 ></TD
1382 ><TD
1383 ><TT
1384 CLASS="LITERAL"
1385 >WIN874</TT
1386 >,
1387          <TT
1388 CLASS="LITERAL"
1389 >UNICODE</TT
1390 >
1391          </TD
1392 ></TR
1393 ></TBODY
1394 ></TABLE
1395 ></DIV
1396 ><P
1397 >     To enable the automatic character set conversion, you have to
1398      tell <SPAN
1399 CLASS="PRODUCTNAME"
1400 >PostgreSQL</SPAN
1401 > the character set
1402      (encoding) you would like to use in the client. There are several
1403      ways to accomplish this:
1404
1405      <P
1406 ></P
1407 ></P><UL
1408 ><LI
1409 ><P
1410 >       Using the <TT
1411 CLASS="COMMAND"
1412 >\encoding</TT
1413 > command in
1414         <SPAN
1415 CLASS="APPLICATION"
1416 >psql</SPAN
1417 >.
1418         <TT
1419 CLASS="COMMAND"
1420 >\encoding</TT
1421 > allows you to change client
1422         encoding on the fly. For
1423         example, to change the encoding to <TT
1424 CLASS="LITERAL"
1425 >SJIS</TT
1426 >, type:
1427
1428 </P><PRE
1429 CLASS="PROGRAMLISTING"
1430 >\encoding SJIS</PRE
1431 ><P>
1432        </P
1433 ></LI
1434 ><LI
1435 ><P
1436 >       Using <SPAN
1437 CLASS="APPLICATION"
1438 >libpq</SPAN
1439 > functions.
1440         <TT
1441 CLASS="COMMAND"
1442 >\encoding</TT
1443 > actually calls
1444         <CODE
1445 CLASS="FUNCTION"
1446 >PQsetClientEncoding()</CODE
1447 > for its purpose.
1448
1449 </P><PRE
1450 CLASS="SYNOPSIS"
1451 >int PQsetClientEncoding(PGconn *<VAR
1452 CLASS="REPLACEABLE"
1453 >conn</VAR
1454 >, const char *<VAR
1455 CLASS="REPLACEABLE"
1456 >encoding</VAR
1457 >);</PRE
1458 ><P>
1459
1460         where <VAR
1461 CLASS="REPLACEABLE"
1462 >conn</VAR
1463 > is a connection to the server,
1464         and <VAR
1465 CLASS="REPLACEABLE"
1466 >encoding</VAR
1467 > is the encoding you
1468         want to use. If the function successfully sets the encoding, it returns 0,
1469         otherwise -1. The current encoding for this connection can be determined by
1470         using:
1471
1472 </P><PRE
1473 CLASS="SYNOPSIS"
1474 >int PQclientEncoding(const PGconn *<VAR
1475 CLASS="REPLACEABLE"
1476 >conn</VAR
1477 >);</PRE
1478 ><P>
1479
1480         Note that it returns the encoding ID, not a symbolic string
1481         such as <TT
1482 CLASS="LITERAL"
1483 >EUC_JP</TT
1484 >. To convert an encoding ID to an encoding name, you
1485         can use:
1486
1487 </P><PRE
1488 CLASS="SYNOPSIS"
1489 >char *pg_encoding_to_char(int <VAR
1490 CLASS="REPLACEABLE"
1491 >encoding_id</VAR
1492 >);</PRE
1493 ><P>
1494        </P
1495 ></LI
1496 ><LI
1497 ><P
1498 >       Using <TT
1499 CLASS="COMMAND"
1500 >SET client_encoding TO</TT
1501 >.
1502
1503         Setting the client encoding can be done with this SQL command:
1504
1505 </P><PRE
1506 CLASS="PROGRAMLISTING"
1507 >SET CLIENT_ENCODING TO '<VAR
1508 CLASS="REPLACEABLE"
1509 >value</VAR
1510 >';</PRE
1511 ><P>
1512
1513         Also you can use the more standard SQL syntax <TT
1514 CLASS="LITERAL"
1515 >SET NAMES</TT
1516 > for this purpose:
1517
1518 </P><PRE
1519 CLASS="PROGRAMLISTING"
1520 >SET NAMES '<VAR
1521 CLASS="REPLACEABLE"
1522 >value</VAR
1523 >';</PRE
1524 ><P>
1525
1526         To query the current client encoding:
1527
1528 </P><PRE
1529 CLASS="PROGRAMLISTING"
1530 >SHOW client_encoding;</PRE
1531 ><P>
1532
1533         To return to the default encoding:
1534
1535 </P><PRE
1536 CLASS="PROGRAMLISTING"
1537 >RESET client_encoding;</PRE
1538 ><P>
1539        </P
1540 ></LI
1541 ><LI
1542 ><P
1543 >       Using <TT
1544 CLASS="ENVAR"
1545 >PGCLIENTENCODING</TT
1546 >.
1547
1548         If environment variable <TT
1549 CLASS="ENVAR"
1550 >PGCLIENTENCODING</TT
1551 > is defined
1552         in the client's environment, that client encoding is automatically
1553         selected when a connection to the server is made.  (This can subsequently
1554         be overridden using any of the other methods mentioned above.)
1555        </P
1556 ></LI
1557 ><LI
1558 ><P
1559 >       Using the configuration variable <VAR
1560 CLASS="VARNAME"
1561 >client_encoding</VAR
1562 >.
1563
1564       If the <VAR
1565 CLASS="VARNAME"
1566 >client_encoding</VAR
1567 > variable in <TT
1568 CLASS="FILENAME"
1569 >postgresql.conf</TT
1570 > is set, that
1571       client encoding is automatically selected when a connection to the
1572       server is made.  (This can subsequently be overridden using any of the
1573       other methods mentioned above.)
1574        </P
1575 ></LI
1576 ></UL
1577 ><P>
1578     </P
1579 ><P
1580 >     If the conversion of a particular character is not possible --
1581      suppose you chose <TT
1582 CLASS="LITERAL"
1583 >EUC_JP</TT
1584 > for the server and
1585      <TT
1586 CLASS="LITERAL"
1587 >LATIN1</TT
1588 > for the client, then some Japanese
1589      characters cannot be converted to <TT
1590 CLASS="LITERAL"
1591 >LATIN1</TT
1592 > -- it
1593      is transformed to its hexadecimal byte values in parentheses,
1594      e.g., <TT
1595 CLASS="LITERAL"
1596 >(826C)</TT
1597 >.
1598     </P
1599 ></DIV
1600 ><DIV
1601 CLASS="SECT2"
1602 ><H2
1603 CLASS="SECT2"
1604 ><A
1605 NAME="AEN18664"
1606 >20.2.4. Further Reading</A
1607 ></H2
1608 ><P
1609 >     These are good sources to start learning about various kinds of encoding
1610      systems.
1611
1612      <P
1613 ></P
1614 ></P><DIV
1615 CLASS="VARIABLELIST"
1616 ><DL
1617 ><DT
1618 ><A
1619 HREF="ftp://ftp.ora.com/pub/examples/nutshell/ujip/doc/cjk.inf"
1620 TARGET="_top"
1621 >ftp://ftp.ora.com/pub/examples/nutshell/ujip/doc/cjk.inf</A
1622 ></DT
1623 ><DD
1624 ><P
1625 >         Detailed explanations of <TT
1626 CLASS="LITERAL"
1627 >EUC_JP</TT
1628 >,
1629          <TT
1630 CLASS="LITERAL"
1631 >EUC_CN</TT
1632 >, <TT
1633 CLASS="LITERAL"
1634 >EUC_KR</TT
1635 >,
1636          <TT
1637 CLASS="LITERAL"
1638 >EUC_TW</TT
1639 > appear in section 3.2.
1640         </P
1641 ></DD
1642 ><DT
1643 ><A
1644 HREF="http://www.unicode.org/"
1645 TARGET="_top"
1646 >http://www.unicode.org/</A
1647 ></DT
1648 ><DD
1649 ><P
1650 >         The web site of the Unicode Consortium
1651         </P
1652 ></DD
1653 ><DT
1654 >RFC 2044</DT
1655 ><DD
1656 ><P
1657 >        <ACRONYM
1658 CLASS="ACRONYM"
1659 >UTF</ACRONYM
1660 >-8 is defined here.
1661         </P
1662 ></DD
1663 ></DL
1664 ></DIV
1665 ><P>
1666     </P
1667 ></DIV
1668 ></DIV
1669 ><DIV
1670 CLASS="NAVFOOTER"
1671 ><HR
1672 ALIGN="LEFT"
1673 WIDTH="100%"><TABLE
1674 SUMMARY="Footer navigation table"
1675 WIDTH="100%"
1676 BORDER="0"
1677 CELLPADDING="0"
1678 CELLSPACING="0"
1679 ><TR
1680 ><TD
1681 WIDTH="33%"
1682 ALIGN="left"
1683 VALIGN="top"
1684 ><A
1685 HREF="charset.html"
1686 ACCESSKEY="P"
1687 >Prev</A
1688 ></TD
1689 ><TD
1690 WIDTH="34%"
1691 ALIGN="center"
1692 VALIGN="top"
1693 ><A
1694 HREF="index.html"
1695 ACCESSKEY="H"
1696 >Home</A
1697 ></TD
1698 ><TD
1699 WIDTH="33%"
1700 ALIGN="right"
1701 VALIGN="top"
1702 ><A
1703 HREF="maintenance.html"
1704 ACCESSKEY="N"
1705 >Next</A
1706 ></TD
1707 ></TR
1708 ><TR
1709 ><TD
1710 WIDTH="33%"
1711 ALIGN="left"
1712 VALIGN="top"
1713 >Localization</TD
1714 ><TD
1715 WIDTH="34%"
1716 ALIGN="center"
1717 VALIGN="top"
1718 ><A
1719 HREF="charset.html"
1720 ACCESSKEY="U"
1721 >Up</A
1722 ></TD
1723 ><TD
1724 WIDTH="33%"
1725 ALIGN="right"
1726 VALIGN="top"
1727 >Routine Database Maintenance Tasks</TD
1728 ></TR
1729 ></TABLE
1730 ></DIV
1731 ></BODY
1732 ></HTML
1733 >