OSDN Git Service

ba10a56cdb85bd9a08fc58bc2b29deca7453850f
[pg-rex/syncrep.git] / src / backend / utils / mb / conversion_procs / euc_jis_2004_and_shift_jis_2004 / euc_jis_2004_and_shift_jis_2004.c
1 /*-------------------------------------------------------------------------
2  *
3  *        EUC_JIS_2004, SHIFT_JIS_2004
4  *
5  * Copyright (c) 2007-2009, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *        $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/euc_jis_2004_and_shift_jis_2004.c,v 1.5 2009/01/01 17:23:51 momjian Exp $
9  *
10  *-------------------------------------------------------------------------
11  */
12
13 #include "postgres.h"
14 #include "fmgr.h"
15 #include "mb/pg_wchar.h"
16
17 PG_MODULE_MAGIC;
18
19 PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
20 PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
21
22 extern Datum euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS);
23 extern Datum shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS);
24
25 static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
26 static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
27
28 /* ----------
29  * conv_proc(
30  *              INTEGER,        -- source encoding id
31  *              INTEGER,        -- destination encoding id
32  *              CSTRING,        -- source string (null terminated C string)
33  *              CSTRING,        -- destination string (null terminated C string)
34  *              INTEGER         -- source string length
35  * ) returns VOID;
36  * ----------
37  */
38
39 Datum
40 euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
41 {
42         unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
43         unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
44         int                     len = PG_GETARG_INT32(4);
45
46         Assert(PG_GETARG_INT32(0) == PG_EUC_JIS_2004);
47         Assert(PG_GETARG_INT32(1) == PG_SHIFT_JIS_2004);
48         Assert(len >= 0);
49
50         euc_jis_20042shift_jis_2004(src, dest, len);
51
52         PG_RETURN_VOID();
53 }
54
55 Datum
56 shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
57 {
58         unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
59         unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
60         int                     len = PG_GETARG_INT32(4);
61
62         Assert(PG_GETARG_INT32(0) == PG_SHIFT_JIS_2004);
63         Assert(PG_GETARG_INT32(1) == PG_EUC_JIS_2004);
64         Assert(len >= 0);
65
66         shift_jis_20042euc_jis_2004(src, dest, len);
67
68         PG_RETURN_VOID();
69 }
70
71 /*
72  * EUC_JIS_2004 -> SHIFT_JIS_2004
73  */
74 static void
75 euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
76 {
77         int                     c1,
78                                 ku,
79                                 ten;
80         int                     l;
81
82         while (len > 0)
83         {
84                 c1 = *euc;
85                 if (!IS_HIGHBIT_SET(c1))
86                 {
87                         /* ASCII */
88                         if (c1 == 0)
89                                 report_invalid_encoding(PG_EUC_JIS_2004,
90                                                                                 (const char *) euc, len);
91                         *p++ = c1;
92                         euc++;
93                         len--;
94                         continue;
95                 }
96
97                 l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
98
99                 if (l < 0)
100                         report_invalid_encoding(PG_EUC_JIS_2004,
101                                                                         (const char *) euc, len);
102
103                 if (c1 == SS2 && l == 2)        /* JIS X 0201 kana? */
104                 {
105                         *p++ = euc[1];
106                 }
107                 else if (c1 == SS3 && l == 3)   /* JIS X 0213 plane 2? */
108                 {
109                         ku = euc[1] - 0xa0;
110                         ten = euc[2] - 0xa0;
111
112                         switch (ku)
113                         {
114                                 case 1:
115                                 case 3:
116                                 case 4:
117                                 case 5:
118                                 case 8:
119                                 case 12:
120                                 case 13:
121                                 case 14:
122                                 case 15:
123                                         *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3;
124                                         break;
125                                 default:
126                                         if (ku >= 78 && ku <= 94)
127                                         {
128                                                 *p++ = (ku + 0x19b) >> 1;
129                                         }
130                                         else
131                                                 report_invalid_encoding(PG_EUC_JIS_2004,
132                                                                                                 (const char *) euc, len);
133                         }
134
135                         if (ku % 2)
136                         {
137                                 if (ten >= 1 && ten <= 63)
138                                         *p++ = ten + 0x3f;
139                                 else if (ten >= 64 && ten <= 94)
140                                         *p++ = ten + 0x40;
141                                 else
142                                         report_invalid_encoding(PG_EUC_JIS_2004,
143                                                                                         (const char *) euc, len);
144                         }
145                         else
146                                 *p++ = ten + 0x9e;
147                 }
148
149                 else if (l == 2)                /* JIS X 0213 plane 1? */
150                 {
151                         ku = c1 - 0xa0;
152                         ten = euc[1] - 0xa0;
153
154                         if (ku >= 1 && ku <= 62)
155                                 *p++ = (ku + 0x101) >> 1;
156                         else if (ku >= 63 && ku <= 94)
157                                 *p++ = (ku + 0x181) >> 1;
158                         else
159                                 report_invalid_encoding(PG_EUC_JIS_2004,
160                                                                                 (const char *) euc, len);
161
162                         if (ku % 2)
163                         {
164                                 if (ten >= 1 && ten <= 63)
165                                         *p++ = ten + 0x3f;
166                                 else if (ten >= 64 && ten <= 94)
167                                         *p++ = ten + 0x40;
168                                 else
169                                         report_invalid_encoding(PG_EUC_JIS_2004,
170                                                                                         (const char *) euc, len);
171                         }
172                         else
173                                 *p++ = ten + 0x9e;
174                 }
175                 else
176                         report_invalid_encoding(PG_EUC_JIS_2004,
177                                                                         (const char *) euc, len);
178
179                 euc += l;
180                 len -= l;
181         }
182         *p = '\0';
183 }
184
185 /*
186  * returns SHIFT_JIS_2004 "ku" code indicated by second byte
187  * *ku = 0: "ku" = even
188  * *ku = 1: "ku" = odd
189  */
190 static int
191 get_ten(int b, int *ku)
192 {
193         int                     ten;
194
195         if (b >= 0x40 && b <= 0x7e)
196         {
197                 ten = b - 0x3f;
198                 *ku = 1;
199         }
200         else if (b >= 0x80 && b <= 0x9e)
201         {
202                 ten = b - 0x40;
203                 *ku = 1;
204         }
205         else if (b >= 0x9f && b <= 0xfc)
206         {
207                 ten = b - 0x9e;
208                 *ku = 0;
209         }
210         else
211         {
212                 ten = -1;                               /* error */
213                 *ku = 0;                                /* keep compiler quiet */
214         }
215         return ten;
216 }
217
218 /*
219  * SHIFT_JIS_2004 ---> EUC_JIS_2004
220  */
221
222 static void
223 shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
224 {
225         int                     c1,
226                                 c2;
227         int                     ku,
228                                 ten,
229                                 kubun;
230         int                     plane;
231         int                     l;
232
233         while (len > 0)
234         {
235                 c1 = *sjis;
236                 c2 = sjis[1];
237
238                 if (!IS_HIGHBIT_SET(c1))
239                 {
240                         /* ASCII */
241                         if (c1 == 0)
242                                 report_invalid_encoding(PG_SHIFT_JIS_2004,
243                                                                                 (const char *) sjis, len);
244                         *p++ = c1;
245                         sjis++;
246                         len--;
247                         continue;
248                 }
249
250                 l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
251
252                 if (l < 0)
253                         report_invalid_encoding(PG_SHIFT_JIS_2004,
254                                                                         (const char *) sjis, len);
255
256                 if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
257                 {
258                         /* JIS X0201 (1 byte kana) */
259                         *p++ = SS2;
260                         *p++ = c1;
261                 }
262                 else if (l == 2)
263                 {
264                         plane = 1;
265                         ku = 1;
266                         ten = 1;
267
268                         /*
269                          * JIS X 0213
270                          */
271                         if (c1 >= 0x81 && c1 <= 0x9f)           /* plane 1 1ku-62ku */
272                         {
273                                 ku = (c1 << 1) - 0x100;
274                                 ten = get_ten(c2, &kubun);
275                                 if (ten < 0)
276                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
277                                                                                         (const char *) sjis, len);
278                                 ku -= kubun;
279                         }
280                         else if (c1 >= 0xe0 && c1 <= 0xef)      /* plane 1 62ku-94ku */
281                         {
282                                 ku = (c1 << 1) - 0x180;
283                                 ten = get_ten(c2, &kubun);
284                                 if (ten < 0)
285                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
286
287                                                                                         (const char *) sjis, len);
288                                 ku -= kubun;
289                         }
290                         else if (c1 >= 0xf0 && c1 <= 0xf3)      /* plane 2
291                                                                                                  * 1,3,4,5,8,12,13,14,15 ku */
292                         {
293                                 plane = 2;
294                                 ten = get_ten(c2, &kubun);
295                                 if (ten < 0)
296                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
297                                                                                         (const char *) sjis, len);
298                                 switch (c1)
299                                 {
300                                         case 0xf0:
301                                                 ku = kubun == 0 ? 8 : 1;
302                                                 break;
303                                         case 0xf1:
304                                                 ku = kubun == 0 ? 4 : 3;
305                                                 break;
306                                         case 0xf2:
307                                                 ku = kubun == 0 ? 12 : 5;
308                                                 break;
309                                         default:
310                                                 ku = kubun == 0 ? 14 : 13;
311                                                 break;
312                                 }
313                         }
314                         else if (c1 >= 0xf4 && c1 <= 0xfc)      /* plane 2 78-94ku */
315                         {
316                                 plane = 2;
317                                 ten = get_ten(c2, &kubun);
318                                 if (ten < 0)
319                                         report_invalid_encoding(PG_SHIFT_JIS_2004,
320                                                                                         (const char *) sjis, len);
321                                 if (c1 == 0xf4 && kubun == 1)
322                                         ku = 15;
323                                 else
324                                         ku = (c1 << 1) - 0x19a - kubun;
325                         }
326                         else
327                                 report_invalid_encoding(PG_SHIFT_JIS_2004,
328                                                                                 (const char *) sjis, len);
329
330                         if (plane == 2)
331                                 *p++ = SS3;
332
333                         *p++ = ku + 0xa0;
334                         *p++ = ten + 0xa0;
335                 }
336                 sjis += l;
337                 len -= l;
338         }
339         *p = '\0';
340 }