OSDN Git Service

Merge branch 'master' of git.osdn.net:/gitroot/hengband/hengband
[hengband/hengband.git] / src / locale / japanese.c
1 /*!
2  *  @file japanese.c
3  *  @brief 日本語処理関数
4  *  @date 2014/07/07
5  */
6
7 #include "locale/japanese.h"
8 #include "util/string-processor.h"
9 #include "view/display-messages.h"
10
11 #ifdef JP
12
13 typedef struct convert_key convert_key;
14
15 struct convert_key
16 {
17         concptr key1;
18         concptr key2;
19 };
20
21 static const convert_key s2j_table[] = {
22         {"mb","nb"}, {"mp","np"}, {"mv","nv"}, {"mm","nm"},
23         {"x","ks"},
24         /* sindar:シンダール  parantir:パランティア  feanor:フェアノール */
25         {"ar$","a-ru$"}, {"ir$","ia$"}, {"or$","o-ru$"},
26         {"ra","ラ"}, {"ri","リ"}, {"ru","ル"}, {"re","レ"}, {"ro","ロ"},
27         {"ir","ia"}, {"ur","ua"}, {"er","ea"}, {"ar","aル"},
28         {"sha","シャ"}, {"shi","シ"}, {"shu","シュ"}, {"she","シェ"}, {"sho","ショ"},
29         {"tha","サ"}, {"thi","シ"}, {"thu","ス"}, {"the","セ"}, {"tho","ソ"},
30         {"cha","ハ"}, {"chi","ヒ"}, {"chu","フ"}, {"che","ヘ"}, {"cho","ホ"},
31         {"dha","ザ"}, {"dhi","ジ"}, {"dhu","ズ"}, {"dhe","ゼ"}, {"dho","ゾ"},
32         {"ba","バ"}, {"bi","ビ"}, {"bu","ブ"}, {"be","ベ"}, {"bo","ボ"},
33         {"ca","カ"}, {"ci","キ"}, {"cu","ク"}, {"ce","ケ"}, {"co","コ"},
34         {"da","ダ"}, {"di","ディ"}, {"du","ドゥ"}, {"de","デ"}, {"do","ド"},
35         {"fa","ファ"}, {"fi","フィ"}, {"fu","フ"}, {"fe","フェ"}, {"fo","フォ"},
36         {"ga","ガ"}, {"gi","ギ"}, {"gu","グ"}, {"ge","ゲ"}, {"go","ゴ"},
37         {"ha","ハ"}, {"hi","ヒ"}, {"hu","フ"}, {"he","ヘ"}, {"ho","ホ"},
38         {"ja","ジャ"}, {"ji","ジ"}, {"ju","ジュ"}, {"je","ジェ"}, {"jo","ジョ"},
39         {"ka","カ"}, {"ki","キ"}, {"ku","ク"}, {"ke","ケ"}, {"ko","コ"},
40         {"la","ラ"}, {"li","リ"}, {"lu","ル"}, {"le","レ"}, {"lo","ロ"},
41         {"ma","マ"}, {"mi","ミ"}, {"mu","ム"}, {"me","メ"}, {"mo","モ"},
42         {"na","ナ"}, {"ni","ニ"}, {"nu","ヌ"}, {"ne","ネ"}, {"no","ノ"},
43         {"pa","パ"}, {"pi","ピ"}, {"pu","プ"}, {"pe","ペ"}, {"po","ポ"},
44         {"qu","ク"},
45         {"sa","サ"}, {"si","シ"}, {"su","ス"}, {"se","セ"}, {"so","ソ"},
46         {"ta","タ"}, {"ti","ティ"}, {"tu","トゥ"}, {"te","テ"}, {"to","ト"},
47         {"va","ヴァ"}, {"vi","ヴィ"}, {"vu","ヴ"}, {"ve","ヴェ"}, {"vo","ヴォ"},
48         {"wa","ワ"}, {"wi","ウィ"}, {"wu","ウ"}, {"we","ウェ"}, {"wo","ウォ"},
49         {"ya","ヤ"}, {"yu","ユ"}, {"yo","ヨ"},
50         {"za","ザ"}, {"zi","ジ"}, {"zu","ズ"}, {"ze","ゼ"}, {"zo","ゾ"},
51         {"dh","ズ"}, {"ch","フ"}, {"th","ス"},
52         {"b","ブ"}, {"c","ク"}, {"d","ド"}, {"f","フ"}, {"g","グ"},
53         {"h","フ"}, {"j","ジュ"}, {"k","ク"}, {"l","ル"}, {"m","ム"},
54         {"n","ン"}, {"p","プ"}, {"q","ク"}, {"r","ル"}, {"s","ス"},
55         {"t","ト"}, {"v","ヴ"}, {"w","ウ"}, {"y","イ"},
56         {"a","ア"}, {"i","イ"}, {"u","ウ"}, {"e","エ"}, {"o","オ"},
57         {"-","ー"},
58         {NULL,NULL}
59 };
60
61 /*!
62  * @brief シンダリンを日本語の読みに変換する
63  * @param kana 変換後の日本語文字列ポインタ
64  * @param sindarin 変換前のシンダリン文字列ポインタ
65  * @return なし
66  * @details
67  */
68 void sindarin_to_kana(char *kana, concptr sindarin)
69 {
70         char buf[256];
71         int idx;
72
73         sprintf(kana, "%s$", sindarin);
74         for (idx = 0; kana[idx]; idx++)
75                 if (isupper(kana[idx])) kana[idx] = (char)tolower(kana[idx]);
76
77         for (idx = 0; s2j_table[idx].key1 != NULL; idx++)
78         {
79                 concptr pat1 = s2j_table[idx].key1;
80                 concptr pat2 = s2j_table[idx].key2;
81                 int len = strlen(pat1);
82                 char *src = kana;
83                 char *dest = buf;
84
85                 while (*src)
86                 {
87                         if (strncmp(src, pat1, len) == 0)
88                         {
89                                 strcpy(dest, pat2);
90                                 src += len;
91                                 dest += strlen(pat2);
92                         }
93                         else
94                         {
95                                 if (iskanji(*src))
96                                 {
97                                         *dest = *src;
98                                         src++;
99                                         dest++;
100                                 }
101                                 *dest = *src;
102                                 src++;
103                                 dest++;
104                         }
105                 }
106
107                 *dest = 0;
108                 strcpy(kana, buf);
109         }
110
111         idx = 0;
112
113         while (kana[idx] != '$') idx++;
114
115         kana[idx] = '\0';
116 }
117
118
119 /*! 日本語動詞活用 (打つ>打って,打ち etc)
120  * JVERB_AND: 殴る,蹴る > 殴り,蹴る
121  * JVERB_TO:  殴る,蹴る > 殴って蹴る
122  * JVERB_OR:  殴る,蹴る > 殴ったり蹴ったり */
123 static const struct jverb_table_t {
124         const char* from;
125         const char* to[3];
126 } jverb_table[] = {
127         { "する", {"し", "して", "した"}},
128         { "いる", {"いて", "いて", "いた"}},
129
130         { "える", {"え", "えて", "えた"}},
131         { "ける", {"け", "けて", "けた"}},
132         { "げる", {"げ", "えて", "げた"}},
133         { "せる", {"せ", "せて", "せた"}},
134         { "ぜる", {"ぜ", "ぜて", "ぜた"}},
135         { "てる", {"て", "てって", "てった"}},
136         { "でる", {"で", "でて", "でた"}},
137         { "ねる", {"ね", "ねて", "ねた"}},
138         { "へる", {"へ", "へて", "へた"}},
139         { "べる", {"べ", "べて", "べた"}},
140         { "める", {"め", "めて", "めた"}},
141         { "れる", {"れ", "れて", "れた"}},
142
143         { "う", {"い", "って", "った"}},
144         { "く", {"き", "いて", "いた"}},
145         { "ぐ", {"ぎ", "いで", "いだ"}},
146         { "す", {"し", "して", "した"}},
147         { "ず", {"じ", "じて", "じた"}},
148         { "つ", {"ち", "って", "った"}},
149         { "づ", {"ぢ", "って", "った"}},
150         { "ぬ", {"に", "ねて", "ねた"}},
151         { "ふ", {"ひ", "へて", "へた"}},
152         { "ぶ", {"び", "んで", "んだ"}},
153         { "む", {"み", "んで", "んだ"}},
154         { "る", {"り", "って", "った"}},
155         { NULL, {"そして", "ことにより", "ことや"}},
156 };
157
158 /*!
159  * @brief jverb_table_tに従って動詞を活用する
160  * @param in 変換元文字列ポインタ
161  * @param out 変換先文字列ポインタ
162  * @param flag 変換種類を指定(JVERB_AND/JVERB_TO/JVERB_OR)
163  * @return なし
164  * @details
165  */
166 void jverb(concptr in, char *out, int flag)
167 {
168         const struct jverb_table_t * p;
169         int in_len = strlen(in);
170
171         strcpy(out, in);
172
173         for (p = jverb_table; p->from; p++) {
174                 int from_len = strlen(p->from);
175                 if (strncmp(&in[in_len-from_len], p->from, from_len) == 0) {
176                         strcpy(&out[in_len - from_len], p->to[flag - 1]);
177                         break;
178                 }
179         }
180
181         if (p->from == NULL)
182                 strcpy(&out[in_len], p->to[flag - 1]);
183 }
184
185 /*!
186  * @brief 文字コードをSJISからEUCに変換する / Convert SJIS string to EUC string
187  * @param str 変換する文字列のポインタ
188  * @return なし
189  * @details
190  */
191 void sjis2euc(char *str)
192 {
193         int i;
194         unsigned char c1, c2;
195         unsigned char *tmp;
196
197         int len = strlen(str);
198
199         C_MAKE(tmp, len+1, byte);
200
201         for (i = 0; i < len; i++)
202         {
203                 c1 = str[i];
204                 if (c1 & 0x80)
205                 {
206                         i++;
207                         c2 = str[i];
208                         if (c2 >= 0x9f)
209                         {
210                                 c1 = c1 * 2 - (c1 >= 0xe0 ? 0xe0 : 0x60);
211                                 c2 += 2;
212                         }
213                         else
214                         {
215                                 c1 = c1 * 2 - (c1 >= 0xe0 ? 0xe1 : 0x61);
216                                 c2 += 0x60 + (c2 < 0x7f);
217                         }
218                         tmp[i - 1] = c1;
219                         tmp[i] = c2;
220                 }
221                 else
222                         tmp[i] = c1;
223         }
224         tmp[len] = 0;
225         strcpy(str, (char *)tmp);
226
227         C_KILL(tmp, len+1, byte);
228 }  
229
230
231 /*!
232  * @brief 文字コードをEUCからSJISに変換する / Convert EUC string to SJIS string
233  * @param str 変換する文字列のポインタ
234  * @return なし
235  * @details
236  */
237 void euc2sjis(char *str)
238 {
239         int i;
240         unsigned char c1, c2;
241         unsigned char *tmp;
242         
243         int len = strlen(str);
244
245         C_MAKE(tmp, len+1, byte);
246
247         for (i = 0; i < len; i++)
248         {
249                 c1 = str[i];
250                 if (c1 & 0x80)
251                 {
252                         i++;
253                         c2 = str[i];
254                         if (c1 % 2)
255                         {
256                                 c1 = (c1 >> 1) + (c1 < 0xdf ? 0x31 : 0x71);
257                                 c2 -= 0x60 + (c2 < 0xe0);
258                         }
259                         else
260                         {
261                                 c1 = (c1 >> 1) + (c1 < 0xdf ? 0x30 : 0x70);
262                                 c2 -= 2;
263                         }
264
265                         tmp[i - 1] = c1;
266                         tmp[i] = c2;
267                 }
268                 else
269                         tmp[i] = c1;
270         }
271         tmp[len] = 0;
272         strcpy(str, (char *)tmp);
273
274         C_KILL(tmp, len+1, byte);
275 }  
276
277
278 /*!
279  * @brief strを環境に合った文字コードに変換し、変換前の文字コードを返す。strの長さに制限はない。
280  * @param str 変換する文字列のポインタ
281  * @return 
282  * 0: Unknown<br>
283  * 1: ASCII (Never known to be ASCII in this function.)<br>
284  * 2: EUC<br>
285  * 3: SJIS<br>
286  */
287 byte codeconv(char *str)
288 {
289         byte code = 0;
290         int i;
291
292         for (i = 0; str[i]; i++)
293         {
294                 unsigned char c1;
295                 unsigned char c2;
296
297                 /* First byte */
298                 c1 = str[i];
299
300                 /* ASCII? */
301                 if (!(c1 & 0x80)) continue;
302
303                 /* Second byte */
304                 i++;
305                 c2 = str[i];
306
307                 if (((0xa1 <= c1 && c1 <= 0xdf) || (0xfd <= c1 && c1 <= 0xfe)) &&
308                     (0xa1 <= c2 && c2 <= 0xfe))
309                 {
310                         /* Only EUC is allowed */
311                         if (!code)
312                         {
313                                 /* EUC */
314                                 code = 2;
315                         }
316
317                         /* Broken string? */
318                         else if (code != 2)
319                         {
320                                 /* No conversion */
321                                 return 0;
322                         }
323                 }
324
325                 else if (((0x81 <= c1 && c1 <= 0x9f) &&
326                           ((0x40 <= c2 && c2 <= 0x7e) || (0x80 <= c2 && c2 <= 0xfc))) ||
327                          ((0xe0 <= c1 && c1 <= 0xfc) &&
328                           (0x40 <= c2 && c2 <= 0x7e)))
329                 {
330                         /* Only SJIS is allowed */
331                         if (!code)
332                         {
333                                 /* SJIS */
334                                 code = 3;
335                         }
336
337                         /* Broken string? */
338                         else if (code != 3)
339                         {
340                                 /* No conversion */
341                                 return 0;
342                         }
343                 }
344         }
345
346
347         switch (code)
348         {
349 #ifdef EUC
350         case 3:
351                 /* SJIS -> EUC */
352                 sjis2euc(str);
353                 break;
354 #endif
355
356 #ifdef SJIS
357         case 2:
358                 /* EUC -> SJIS */
359                 euc2sjis(str);
360
361                 break;
362 #endif
363         }
364
365         /* Return kanji code */
366         return code;
367 }
368
369 /*!
370  * @brief 文字列sのxバイト目が漢字の1バイト目かどうか判定する
371  * @param s 判定する文字列のポインタ
372  * @param x 判定する位置(バイト)
373  * @return 漢字の1バイト目ならばTRUE
374  */
375 bool iskanji2(concptr s, int x)
376 {
377         int i;
378
379         for (i = 0; i < x; i++)
380         {
381                 if (iskanji(s[i])) i++;
382         }
383         if ((x == i) && iskanji(s[x])) return TRUE;
384
385         return FALSE;
386 }
387
388 /*!
389  * @brief 文字列の文字コードがASCIIかどうかを判定する
390  * @param str 判定する文字列へのポインタ
391  * @return 文字列の文字コードがASCIIならTRUE、そうでなければFALSE
392  */
393 static bool is_ascii_str(concptr str)
394 {
395         for (;*str; str++) {
396                 int ch = *str;
397                 if (!(0x00 < ch && ch <= 0x7f))
398                         return FALSE;
399         }
400         return TRUE;
401 }
402
403 /*!
404  * @brief 文字列の文字コードがUTF-8かどうかを判定する
405  * @param str 判定する文字列へのポインタ
406  * @return 文字列の文字コードがUTF-8ならTRUE、そうでなければFALSE
407  */
408 static bool is_utf8_str(concptr str)
409 {
410         const unsigned char* p;
411         for (p = (const unsigned char*)str; *p; p++) {
412                 int subseq_num = 0;
413                 if (0x00 < *p && *p <= 0x7f) continue;
414                 
415                 if ((*p & 0xe0) == 0xc0) subseq_num = 1;
416                 if ((*p & 0xf0) == 0xe0) subseq_num = 2;
417                 if ((*p & 0xf8) == 0xf0) subseq_num = 3;
418
419                 if (subseq_num == 0) return FALSE;
420                 while (subseq_num--) {
421                         p++;
422                         if (!*p || (*p & 0xc0) != 0x80) return FALSE;
423                 }
424         }
425         return TRUE;
426 }
427
428 #if defined(EUC)
429 #include <iconv.h>
430
431 static const struct ms_to_jis_unicode_conv_t {
432         char from[3];
433         char to[3];
434 } ms_to_jis_unicode_conv[] = {
435         {{0xef, 0xbd, 0x9e}, {0xe3, 0x80, 0x9c}}, /* FULLWIDTH TILDE -> WAVE DASH */
436         {{0xef, 0xbc, 0x8d}, {0xe2, 0x88, 0x92}}, /* FULLWIDTH HYPHEN-MINUS -> MINUS SIGN */
437 };
438
439 /*!
440  * @brief EUCがシステムコードである環境下向けにUTF-8から変換処理を行うサブルーチン
441  * @param str 変換する文字列のポインタ
442  * @return なし
443  */
444 static void ms_to_jis_unicode(char* str)
445 {
446         unsigned char* p;
447         for (p = (unsigned char*)str; *p; p++) {
448                 int subseq_num = 0;
449                 if (0x00 < *p && *p <= 0x7f) continue;
450
451                 if ((*p & 0xe0) == 0xc0) subseq_num = 1;
452                 if ((*p & 0xf0) == 0xe0) {
453                         size_t i;
454                         for (i = 0; i < sizeof(ms_to_jis_unicode_conv) / sizeof(ms_to_jis_unicode_conv[0]); ++ i) {
455                                 const struct ms_to_jis_unicode_conv_t *c = &ms_to_jis_unicode_conv[i];
456                                 if (memcmp(p, c->from, 3) == 0) {
457                                         memcpy(p, c->to, 3);
458                                 }
459                         }
460                         subseq_num = 2;
461                 }
462                 if ((*p & 0xf8) == 0xf0) subseq_num = 3;
463
464                 p += subseq_num;
465         }
466 }
467
468 #elif defined(SJIS) && defined(WINDOWS)
469 #include <Windows.h>
470 #endif
471 /*!
472  * @brief 文字コードがUTF-8の文字列をシステムの文字コードに変換する
473  * @param utf8_str 変換するUTF-8の文字列へのポインタ
474  * @param sys_str_buffer 変換したシステムの文字コードの文字列を格納するバッファへのポインタ
475  * @param sys_str_buflen 変換したシステムの文字コードの文字列を格納するバッファの長さ
476  * @return 変換に成功した場合TRUE、失敗した場合FALSEを返す
477  */
478 static bool utf8_to_sys(char* utf8_str, char* sys_str_buffer, size_t sys_str_buflen)
479 {
480 #if defined(EUC)
481
482         iconv_t cd = iconv_open("EUC-JP", "UTF-8");
483         size_t utf8_len = strlen(utf8_str) + 1; /* include termination character */
484         char *from = utf8_str;
485         int ret;
486
487         ms_to_jis_unicode(utf8_str);
488         ret = iconv(cd, &from, &utf8_len, &sys_str_buffer, &sys_str_buflen);
489         iconv_close(cd);
490         return (ret >= 0);
491
492 #elif defined(SJIS) && defined(WINDOWS)
493
494         LPWSTR utf16buf;
495         int input_len = strlen(utf8_str) + 1; /* include termination character */
496
497         C_MAKE(utf16buf, input_len, WCHAR);
498
499         /* UTF-8 -> UTF-16 */
500         if (MultiByteToWideChar( CP_UTF8, 0, utf8_str, input_len, utf16buf, input_len) == 0) {
501                 C_KILL(utf16buf, input_len, WCHAR);
502                 return FALSE;
503         }
504
505         /* UTF-8 -> SJIS(CP932) */
506         if (WideCharToMultiByte( CP_ACP, 0, utf16buf, -1, sys_str_buffer, sys_str_buflen, NULL, NULL ) == 0) {
507                 C_KILL(utf16buf, input_len, WCHAR);
508                 return FALSE;
509         }
510
511         C_KILL(utf16buf, input_len, WCHAR);
512         return TRUE;
513
514 #endif
515 }
516
517 /*!
518  * @brief 受け取った文字列の文字コードを推定し、システムの文字コードへ変換する
519  * @param strbuf 変換する文字列を格納したバッファへのポインタ。
520  *               バッファは変換した文字列で上書きされる。
521  *               UTF-8からSJISもしくはEUCへの変換を想定しているのでバッファの長さが足りなくなることはない。
522  * @param buflen バッファの長さ。
523  * @return なし
524  */
525 void guess_convert_to_system_encoding(char* strbuf, int buflen)
526 {
527         if (is_ascii_str(strbuf)) return;
528
529         if (is_utf8_str(strbuf)) {
530                 char* work;
531                 C_MAKE(work, buflen, char);
532                 angband_strcpy(work, strbuf, buflen);
533                 if (!utf8_to_sys(work, strbuf, buflen)) {
534                         msg_print("警告:文字コードの変換に失敗しました");
535                         msg_print(NULL);
536                 }
537                 C_KILL(work, buflen, char);
538         }
539 }
540
541 #endif /* JP */