2 * conversion between BIG5 and Mule Internal Code(CNS 116643-1992
3 * plane 1 and plane 2).
4 * This program is partially copied from lv(Multilingual file viewer)
5 * and slightly modified. lv is written and copyrighted by NARITA Tomio
8 * 1999/1/15 Tatsuo Ishii
10 * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c,v 1.7 2005/11/22 18:17:26 momjian Exp $
13 /* can be used in either frontend or backend */
14 #include "postgres_fe.h"
16 #include "mb/pg_wchar.h"
24 /* map Big5 Level 1 to CNS 11643-1992 Plane 1 */
25 static codes_t big5Level1ToCnsPlane1[25] = { /* range */
53 /* map CNS 11643-1992 Plane 1 to Big5 Level 1 */
54 static codes_t cnsPlane1ToBig5Level1[26] = { /* range */
83 /* map Big5 Level 2 to CNS 11643-1992 Plane 2 */
84 static codes_t big5Level2ToCnsPlane2[48] = { /* range */
135 /* map CNS 11643-1992 Plane 2 to Big5 Level 2 */
136 static codes_t cnsPlane2ToBig5Level2[49] = { /* range */
188 /* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */
189 static unsigned short b1c4[][2] = {
196 /* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */
197 static unsigned short b2c3[][2] = {
207 static unsigned short BinarySearchRange
208 (codes_t *array, int high, unsigned short code)
218 for (; low <= high; mid = (low + high) >> 1)
220 if ((array[mid].code <= code) && (array[mid + 1].code > code))
222 if (0 == array[mid].peer)
227 tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8;
228 high = code & 0x00ff;
229 low = array[mid].code & 0x00ff;
232 * NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e,
233 * 0xa1-0xfe (radicals: 0x00-0x3e, 0x3f-0x9c) big5 radix is
234 * 0x9d. [region_low, region_high] We
235 * should remember big5 has two different regions (above).
236 * There is a bias for the distance between these regions.
237 * 0xa1 - 0x7e + bias = 1 (Distance between 0xa1 and 0x7e is
240 distance = tmp * 0x9d + high - low +
241 (high >= 0xa1 ? (low >= 0xa1 ? 0 : -0x22)
242 : (low >= 0xa1 ? +0x22 : 0));
245 * NOTE: we have to convert the distance into a code point.
246 * The code point's low_byte is 0x21 plus mod_0x5e. In the
247 * first, we extract the mod_0x5e of the starting code point,
248 * subtracting 0x21, and add distance to it. Then we calculate
249 * again mod_0x5e of them, and restore the final codepoint,
252 tmp = (array[mid].peer & 0x00ff) + distance - 0x21;
253 tmp = (array[mid].peer & 0xff00) + ((tmp / 0x5e) << 8)
260 tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8;
263 * NOTE: ISO charsets ranges between 0x21-0xfe (94charset).
264 * Its radix is 0x5e. But there is no distance bias like big5.
266 distance = tmp * 0x5e
267 + ((int) (code & 0x00ff) - (int) (array[mid].code & 0x00ff));
270 * NOTE: Similar to big5 to cns conversion, we extract
271 * mod_0x9d and restore mod_0x9d into a code point.
273 low = array[mid].peer & 0x00ff;
274 tmp = low + distance - (low >= 0xa1 ? 0x62 : 0x40);
276 tmp = (array[mid].peer & 0xff00) + ((tmp / 0x9d) << 8)
277 + (low > 0x3e ? 0x62 : 0x40) + low;
281 else if (array[mid].code > code)
292 BIG5toCNS(unsigned short big5, unsigned char *lc)
294 unsigned short cns = 0;
301 for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
303 if (b1c4[i][0] == big5)
306 return (b1c4[i][1] | 0x8080U);
310 if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5)))
313 else if (big5 == 0xc94aU)
322 for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
324 if (b2c3[i][0] == big5)
327 return (b2c3[i][1] | 0x8080U);
331 if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5)))
336 { /* no mapping Big5 to CNS 11643-1992 */
338 return (unsigned short) '?';
345 CNStoBIG5(unsigned short cns, unsigned char lc)
348 unsigned int big5 = 0;
355 big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns);
358 big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns);
361 for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++)
363 if (b2c3[i][1] == cns)
368 for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++)
370 if (b1c4[i][1] == cns)