2 unicode-jp.c -- JIS X 0208 <=> UCS-2 converter
3 written by N. Tsuchimura
6 #include <ptexenc/c-auto.h>
7 #include <ptexenc/unicode-jp.h>
8 #include <ptexenc/kanjicnv.h>
9 #include <ptexenc/ptexenc.h>
11 #if defined(KANJI_ICONV) && defined(HAVE_ICONV_H)
13 #include <stdlib.h> /* for atexit() */
15 #define ICONV_INVALID ((iconv_t)(-1))
16 #define ICONV_JIS "ISO-2022-JP"
17 #define ICONV_UNI "UCS-2BE"
19 static int inited = FALSE;
20 static iconv_t i_jis2ucs2 = ICONV_INVALID;
21 static iconv_t i_ucs22jis = ICONV_INVALID;
24 static void close_iconv(void)
25 #else /* HAVE_ATEXIT */
26 static void close_iconv(int dummy1, void *dummy2)
27 #endif /* HAVE_ATEXIT */
29 if (i_jis2ucs2 != ICONV_INVALID) iconv_close(i_jis2ucs2);
30 if (i_ucs22jis != ICONV_INVALID) iconv_close(i_ucs22jis);
33 static void open_iconv(void)
36 i_jis2ucs2 = iconv_open(ICONV_UNI, ICONV_JIS);
37 i_ucs22jis = iconv_open(ICONV_JIS, ICONV_UNI);
41 #endif /* HAVE_ATEXIT */
43 on_exit(close_iconv, NULL);
44 #endif /* HAVE_ON_EXIT */
47 /* convert a JIS X 0208 char to UCS-2 */
48 static int JIStoUCS2native(int jis)
50 unsigned char jis_seq[8] = {
51 0x1b, 0x24, 0x42, /* JIS X 0208-1983 */
52 (unsigned char)HI(jis), (unsigned char)LO(jis),
53 0x1b, 0x28, 0x42, /* ASCII */
55 unsigned char unicode[2];
56 size_t from = sizeof jis_seq;
57 size_t to = sizeof unicode;
58 char *from_ptr = (char *)jis_seq;
59 char *to_ptr = (char *)unicode;
61 if (!inited) open_iconv();
62 if (i_jis2ucs2 == ICONV_INVALID) return 0;
64 if (iconv(i_jis2ucs2, &from_ptr, &from, &to_ptr, &to) == (size_t)(-1)) {
67 return HILO(unicode[0], unicode[1]);
70 /* convert a UCS-2 char to JIS X 0208 */
71 static int UCS2toJISnative(int ucs2)
73 unsigned char unicode[2] = {
74 (unsigned char)HI(ucs2), (unsigned char)LO(ucs2),
76 unsigned char jis_seq[10] = { 0,0,0,0,0,0,0,0,0,0 };
77 size_t from = sizeof unicode;
78 size_t to = sizeof jis_seq;
79 char *from_ptr = (char *)unicode;
80 char *to_ptr = (char *)jis_seq;
81 unsigned char *ret = jis_seq;
83 if (!inited) open_iconv();
84 if (i_ucs22jis == ICONV_INVALID) return 0;
86 if (iconv(i_ucs22jis, &from_ptr, &from, &to_ptr, &to) == (size_t)(-1)) {
89 if (ret[0] == 0x1b) ret += 3;
90 return HILO(ret[0], ret[1]);
93 #else /* KANJI_ICONV && HAVE_ICONV_H */
98 /* convert a JIS X 0208 char to UCS-2 */
99 static int JIStoUCS2native(int jis)
104 low = LO(jis) - 0x21;
105 if (0 <= hi && hi < MAXJIS &&
106 0 <= low && low < 94) return (int)UnicodeTbl[hi][low];
110 /* convert a UCS-2 char to JIS X 0208 */
111 static int UCS2toJISnative(int ucs2)
115 for (i=0; i<MAXJIS; i++) {
116 for (j=0; j<94; j++) {
117 if (UnicodeTbl[i][j] == ucs2) {
118 return HILO(i, j) + 0x2121;
124 #endif /* KANJI_ICONV && HAVE_ICONV_H */
128 http://homepage3.nifty.com/ttk/comp/tex/jis_uni_variation_uptex.html
129 http://hp.vector.co.jp/authors/VA010341/unicode/
130 http://www.jca.apc.org/~earthian/aozora/0213/jisx0213code.zip
132 static unsigned short int variation[] = {
133 /* JIS X 0208, UCS-2(1), UCS-2(2), ..., 0(sentinel) */
134 /* UCS-2(1) is used for JIS -> UCS conversion if is_internalUPTEX */
135 0x2131 /* 1-17 */, 0xFFE3, 0x203E, 0,
136 0x213D /* 1-29 */, 0x2015, 0x2014, 0,
137 0x2141 /* 1-33 */, 0x301C, 0xFF5E, 0,
138 0x2142 /* 1-34 */, 0x2016, 0x2225, 0,
139 0x2144 /* 1-36 */, 0x2026, 0x22EF, 0,
140 0x215D /* 1-61 */, 0x2212, 0xFF0D, 0,
141 0x216F /* 1-79 */, 0xFFE5, 0x00A5, 0,
142 0x2171 /* 1-81 */, 0xFFE0, 0x00A2, 0,
143 0x2172 /* 1-82 */, 0xFFE1, 0x00A3, 0,
144 0x224C /* 2-44 */, 0xFFE2, 0x00AC, 0,
148 if is_internalUPTEX, force JIS X 0208 -> UCS2 conversion as follows:
149 JIS code (men-ku) -> UCS ( Character Name )
150 0x2131 ( 1-17 ) -> U+FFE3 ( FULLWIDTH MACRON )
151 0x213D ( 1-29 ) -> U+2015 ( HORIZONTAL BAR )
152 0x2141 ( 1-33 ) -> U+301C ( WAVE DASH )
153 0x2142 ( 1-34 ) -> U+2016 ( DOUBLE VERTICAL LINE )
154 0x2144 ( 1-36 ) -> U+2026 ( HORIZONTAL ELLIPSIS )
155 0x215D ( 1-61 ) -> U+2212 ( MINUS SIGN )
156 0x216F ( 1-79 ) -> U+FFE5 ( FULLWIDTH YEN SIGN )
157 0x2171 ( 1-81 ) -> U+FFE0 ( FULLWIDTH NOT SIGN )
158 0x2172 ( 1-82 ) -> U+FFE1 ( FULLWIDTH POUND SIGN )
159 0x224C ( 2-44 ) -> U+FFE2 ( FULLWIDTH NOT SIGN )
164 http://developer.apple.com/technotes/tn/tn1150table.html
166 static unsigned short int voiced_sounds[] = {
219 /* semi voiced sound of kana */
220 static unsigned short int semi_voiced_sounds[] = {
235 /* convert a JIS X 0208 char to UCS-2 */
236 int JIStoUCS2(int jis)
240 /* first: variation table */
241 if (is_internalUPTEX()) {
242 for (i=0; variation[i]!=0; i=j+1) {
243 if (variation[i] == jis) return variation[i+1];
244 for (j=i+3; variation[j]!=0; j++) ;
248 /* second: UnicodeTbl[][] */
249 return JIStoUCS2native(jis);
252 /* convert a UCS-2 char to JIS X 0208 */
253 int UCS2toJIS(int ucs2)
257 /* first: variation table */
258 for (i=0; variation[i]!=0; i=j+1) {
259 for (j=i+1; variation[j]!=0; j++) {
260 if (variation[j] == ucs2) return variation[i];
264 /* second: UnicodeTbl[][] */
265 return UCS2toJISnative(ucs2);
269 /* for U+3099 or U+309A */
270 int get_voiced_sound(int ucs2, boolean semi)
273 unsigned short int *table;
275 if (semi) table = semi_voiced_sounds;
276 else table = voiced_sounds;
277 for (i=0; table[i]!=0; i+=2) {
278 if (ucs2 == table[i]) return table[i+1];
285 int main(int argc, char **argv) {
287 for (hi=0; hi<MAXJIS; hi++) {
288 for (low=0; low<94; low++) {
289 int jis = HILO(hi, low) + 0x2121;
290 int uni = JIStoUCS2(jis);
291 int jis2 = UCS2toJIS(uni);
292 if (uni != 0 && jis != jis2) {
293 printf("0x%x(%c%c) uni=0x%04x, jis2=0x%04x\n",
294 jis, HI(jis)|0x80, LO(jis)|0x80, uni, jis2);