OSDN Git Service

Re-run pgindent, fixing a problem where comment lines after a blank
[pg-rex/syncrep.git] / src / backend / utils / mb / conversion_procs / euc_tw_and_big5 / big5.c
1 /*
2  * conversion between BIG5 and Mule Internal Code(CNS 116643-1992
3  * plane 1 and plane 2).
4  * This program is partially copied from lv(Multilingual file viewer)
5  * and slightly modified. lv is written and copyrighted by NARITA Tomio
6  * (nrt@web.ad.jp).
7  *
8  * 1999/1/15 Tatsuo Ishii
9  *
10  * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c,v 1.7 2005/11/22 18:17:26 momjian Exp $
11  */
12
13 /* can be used in either frontend or backend */
14 #include "postgres_fe.h"
15
16 #include "mb/pg_wchar.h"
17
18 typedef struct
19 {
20         unsigned short code,
21                                 peer;
22 } codes_t;
23
24 /* map Big5 Level 1 to CNS 11643-1992 Plane 1 */
25 static codes_t big5Level1ToCnsPlane1[25] = {    /* range */
26         {0xA140, 0x2121},
27         {0xA1F6, 0x2258},
28         {0xA1F7, 0x2257},
29         {0xA1F8, 0x2259},
30         {0xA2AF, 0x2421},
31         {0xA3C0, 0x4221},
32         {0xa3e1, 0x0000},
33         {0xA440, 0x4421},
34         {0xACFE, 0x5753},
35         {0xacff, 0x0000},
36         {0xAD40, 0x5323},
37         {0xAFD0, 0x5754},
38         {0xBBC8, 0x6B51},
39         {0xBE52, 0x6B50},
40         {0xBE53, 0x6F5C},
41         {0xC1AB, 0x7536},
42         {0xC2CB, 0x7535},
43         {0xC2CC, 0x7737},
44         {0xC361, 0x782E},
45         {0xC3B9, 0x7865},
46         {0xC3BA, 0x7864},
47         {0xC3BB, 0x7866},
48         {0xC456, 0x782D},
49         {0xC457, 0x7962},
50         {0xc67f, 0x0000}
51 };
52
53 /* map CNS 11643-1992 Plane 1 to Big5 Level 1 */
54 static codes_t cnsPlane1ToBig5Level1[26] = {    /* range */
55         {0x2121, 0xA140},
56         {0x2257, 0xA1F7},
57         {0x2258, 0xA1F6},
58         {0x2259, 0xA1F8},
59         {0x234f, 0x0000},
60         {0x2421, 0xA2AF},
61         {0x2571, 0x0000},
62         {0x4221, 0xA3C0},
63         {0x4242, 0x0000},
64         {0x4421, 0xA440},
65         {0x5323, 0xAD40},
66         {0x5753, 0xACFE},
67         {0x5754, 0xAFD0},
68         {0x6B50, 0xBE52},
69         {0x6B51, 0xBBC8},
70         {0x6F5C, 0xBE53},
71         {0x7535, 0xC2CB},
72         {0x7536, 0xC1AB},
73         {0x7737, 0xC2CC},
74         {0x782D, 0xC456},
75         {0x782E, 0xC361},
76         {0x7864, 0xC3BA},
77         {0x7865, 0xC3B9},
78         {0x7866, 0xC3BB},
79         {0x7962, 0xC457},
80         {0x7d4c, 0x0000}
81 };
82
83 /* map Big5 Level 2 to CNS 11643-1992 Plane 2 */
84 static codes_t big5Level2ToCnsPlane2[48] = {    /* range */
85         {0xC940, 0x2121},
86         {0xc94a, 0x0000},
87         {0xC94B, 0x212B},
88         {0xC96C, 0x214D},
89         {0xC9BE, 0x214C},
90         {0xC9BF, 0x217D},
91         {0xC9ED, 0x224E},
92         {0xCAF7, 0x224D},
93         {0xCAF8, 0x2439},
94         {0xD77A, 0x3F6A},
95         {0xD77B, 0x387E},
96         {0xDBA7, 0x3F6B},
97         {0xDDFC, 0x4176},
98         {0xDDFD, 0x4424},
99         {0xE8A3, 0x554C},
100         {0xE976, 0x5723},
101         {0xEB5B, 0x5A29},
102         {0xEBF1, 0x554B},
103         {0xEBF2, 0x5B3F},
104         {0xECDE, 0x5722},
105         {0xECDF, 0x5C6A},
106         {0xEDAA, 0x5D75},
107         {0xEEEB, 0x642F},
108         {0xEEEC, 0x6039},
109         {0xF056, 0x5D74},
110         {0xF057, 0x6243},
111         {0xF0CB, 0x5A28},
112         {0xF0CC, 0x6337},
113         {0xF163, 0x6430},
114         {0xF16B, 0x6761},
115         {0xF16C, 0x6438},
116         {0xF268, 0x6934},
117         {0xF269, 0x6573},
118         {0xF2C3, 0x664E},
119         {0xF375, 0x6762},
120         {0xF466, 0x6935},
121         {0xF4B5, 0x664D},
122         {0xF4B6, 0x6962},
123         {0xF4FD, 0x6A4C},
124         {0xF663, 0x6A4B},
125         {0xF664, 0x6C52},
126         {0xF977, 0x7167},
127         {0xF9C4, 0x7166},
128         {0xF9C5, 0x7234},
129         {0xF9C6, 0x7240},
130         {0xF9C7, 0x7235},
131         {0xF9D2, 0x7241},
132         {0xf9d6, 0x0000}
133 };
134
135 /* map CNS 11643-1992 Plane 2 to Big5 Level 2 */
136 static codes_t cnsPlane2ToBig5Level2[49] = {    /* range */
137         {0x2121, 0xC940},
138         {0x212B, 0xC94B},
139         {0x214C, 0xC9BE},
140         {0x214D, 0xC96C},
141         {0x217D, 0xC9BF},
142         {0x224D, 0xCAF7},
143         {0x224E, 0xC9ED},
144         {0x2439, 0xCAF8},
145         {0x387E, 0xD77B},
146         {0x3F6A, 0xD77A},
147         {0x3F6B, 0xDBA7},
148         {0x4424, 0x0000},
149         {0x4176, 0xDDFC},
150         {0x4177, 0x0000},
151         {0x4424, 0xDDFD},
152         {0x554B, 0xEBF1},
153         {0x554C, 0xE8A3},
154         {0x5722, 0xECDE},
155         {0x5723, 0xE976},
156         {0x5A28, 0xF0CB},
157         {0x5A29, 0xEB5B},
158         {0x5B3F, 0xEBF2},
159         {0x5C6A, 0xECDF},
160         {0x5D74, 0xF056},
161         {0x5D75, 0xEDAA},
162         {0x6039, 0xEEEC},
163         {0x6243, 0xF057},
164         {0x6337, 0xF0CC},
165         {0x642F, 0xEEEB},
166         {0x6430, 0xF163},
167         {0x6438, 0xF16C},
168         {0x6573, 0xF269},
169         {0x664D, 0xF4B5},
170         {0x664E, 0xF2C3},
171         {0x6761, 0xF16B},
172         {0x6762, 0xF375},
173         {0x6934, 0xF268},
174         {0x6935, 0xF466},
175         {0x6962, 0xF4B6},
176         {0x6A4B, 0xF663},
177         {0x6A4C, 0xF4FD},
178         {0x6C52, 0xF664},
179         {0x7166, 0xF9C4},
180         {0x7167, 0xF977},
181         {0x7234, 0xF9C5},
182         {0x7235, 0xF9C7},
183         {0x7240, 0xF9C6},
184         {0x7241, 0xF9D2},
185         {0x7245, 0x0000}
186 };
187
188 /* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */
189 static unsigned short b1c4[][2] = {
190         {0xC879, 0x2123},
191         {0xC87B, 0x2124},
192         {0xC87D, 0x212A},
193         {0xC8A2, 0x2152}
194 };
195
196 /* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */
197 static unsigned short b2c3[][2] = {
198         {0xF9D6, 0x4337},
199         {0xF9D7, 0x4F50},
200         {0xF9D8, 0x444E},
201         {0xF9D9, 0x504A},
202         {0xF9DA, 0x2C5D},
203         {0xF9DB, 0x3D7E},
204         {0xF9DC, 0x4B5C}
205 };
206
207 static unsigned short BinarySearchRange
208                         (codes_t *array, int high, unsigned short code)
209 {
210         int                     low,
211                                 mid,
212                                 distance,
213                                 tmp;
214
215         low = 0;
216         mid = high >> 1;
217
218         for (; low <= high; mid = (low + high) >> 1)
219         {
220                 if ((array[mid].code <= code) && (array[mid + 1].code > code))
221                 {
222                         if (0 == array[mid].peer)
223                                 return 0;
224                         if (code >= 0xa140U)
225                         {
226                                 /* big5 to cns */
227                                 tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8;
228                                 high = code & 0x00ff;
229                                 low = array[mid].code & 0x00ff;
230
231                                 /*
232                                  * NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e,
233                                  * 0xa1-0xfe (radicals: 0x00-0x3e, 0x3f-0x9c) big5 radix is
234                                  * 0x9d.                                         [region_low, region_high] We
235                                  * should remember big5 has two different regions (above).
236                                  * There is a bias for the distance between these regions.
237                                  * 0xa1 - 0x7e + bias = 1 (Distance between 0xa1 and 0x7e is
238                                  * 1.) bias = - 0x22.
239                                  */
240                                 distance = tmp * 0x9d + high - low +
241                                         (high >= 0xa1 ? (low >= 0xa1 ? 0 : -0x22)
242                                          : (low >= 0xa1 ? +0x22 : 0));
243
244                                 /*
245                                  * NOTE: we have to convert the distance into a code point.
246                                  * The code point's low_byte is 0x21 plus mod_0x5e. In the
247                                  * first, we extract the mod_0x5e of the starting code point,
248                                  * subtracting 0x21, and add distance to it. Then we calculate
249                                  * again mod_0x5e of them, and restore the final codepoint,
250                                  * adding 0x21.
251                                  */
252                                 tmp = (array[mid].peer & 0x00ff) + distance - 0x21;
253                                 tmp = (array[mid].peer & 0xff00) + ((tmp / 0x5e) << 8)
254                                         + 0x21 + tmp % 0x5e;
255                                 return tmp;
256                         }
257                         else
258                         {
259                                 /* cns to big5 */
260                                 tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8;
261
262                                 /*
263                                  * NOTE: ISO charsets ranges between 0x21-0xfe (94charset).
264                                  * Its radix is 0x5e. But there is no distance bias like big5.
265                                  */
266                                 distance = tmp * 0x5e
267                                         + ((int) (code & 0x00ff) - (int) (array[mid].code & 0x00ff));
268
269                                 /*
270                                  * NOTE: Similar to big5 to cns conversion, we extract
271                                  * mod_0x9d and restore mod_0x9d into a code point.
272                                  */
273                                 low = array[mid].peer & 0x00ff;
274                                 tmp = low + distance - (low >= 0xa1 ? 0x62 : 0x40);
275                                 low = tmp % 0x9d;
276                                 tmp = (array[mid].peer & 0xff00) + ((tmp / 0x9d) << 8)
277                                         + (low > 0x3e ? 0x62 : 0x40) + low;
278                                 return tmp;
279                         }
280                 }
281                 else if (array[mid].code > code)
282                         high = mid - 1;
283                 else
284                         low = mid + 1;
285         }
286
287         return 0;
288 }
289
290
291 unsigned short
292 BIG5toCNS(unsigned short big5, unsigned char *lc)
293 {
294         unsigned short cns = 0;
295         int                     i;
296
297         if (big5 < 0xc940U)
298         {
299                 /* level 1 */
300
301                 for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
302                 {
303                         if (b1c4[i][0] == big5)
304                         {
305                                 *lc = LC_CNS11643_4;
306                                 return (b1c4[i][1] | 0x8080U);
307                         }
308                 }
309
310                 if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5)))
311                         *lc = LC_CNS11643_1;
312         }
313         else if (big5 == 0xc94aU)
314         {
315                 /* level 2 */
316                 *lc = LC_CNS11643_1;
317                 cns = 0x4442;
318         }
319         else
320         {
321                 /* level 2 */
322                 for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
323                 {
324                         if (b2c3[i][0] == big5)
325                         {
326                                 *lc = LC_CNS11643_3;
327                                 return (b2c3[i][1] | 0x8080U);
328                         }
329                 }
330
331                 if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5)))
332                         *lc = LC_CNS11643_2;
333         }
334
335         if (0 == cns)
336         {                                                       /* no mapping Big5 to CNS 11643-1992 */
337                 *lc = 0;
338                 return (unsigned short) '?';
339         }
340
341         return cns | 0x8080;
342 }
343
344 unsigned short
345 CNStoBIG5(unsigned short cns, unsigned char lc)
346 {
347         int                     i;
348         unsigned int big5 = 0;
349
350         cns &= 0x7f7f;
351
352         switch (lc)
353         {
354                 case LC_CNS11643_1:
355                         big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns);
356                         break;
357                 case LC_CNS11643_2:
358                         big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns);
359                         break;
360                 case LC_CNS11643_3:
361                         for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
362                         {
363                                 if (b2c3[i][1] == cns)
364                                         return (b2c3[i][0]);
365                         }
366                         break;
367                 case LC_CNS11643_4:
368                         for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
369                         {
370                                 if (b1c4[i][1] == cns)
371                                         return (b1c4[i][0]);
372                         }
373                 default:
374                         break;
375         }
376         return big5;
377 }