From 5b056aad4cc5688068d021d7b60561ee8190f0c8 Mon Sep 17 00:00:00 2001 From: Habu Date: Wed, 26 Feb 2014 23:41:05 +0900 Subject: [PATCH] =?utf8?q?=E3=83=95=E3=82=A1=E3=82=A4=E3=83=AB=E3=81=AE?= =?utf8?q?=E8=AA=AD=E3=81=BF=E8=BE=BC=E3=81=BF=E6=99=82=E3=81=AB=E6=96=87?= =?utf8?q?=E5=AD=97=E3=82=B3=E3=83=BC=E3=83=89=E3=82=92=E9=81=A9=E5=88=87?= =?utf8?q?=E3=81=AA=E3=82=82=E3=81=AE=E3=81=AB=E5=A4=89=E6=8F=9B=E3=81=99?= =?utf8?q?=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit ファイルを読み込み時に文字コードがUTF-8かどうかを推測し、UTF-8ならば システムの文字コード(WindowsならCP932、Linux/UNIXならEUC-JP)に 変換して読み込むようにした。 --- src/externs.h | 1 + src/japanese.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- src/util.c | 1 + 3 files changed, 90 insertions(+), 1 deletion(-) diff --git a/src/externs.h b/src/externs.h index e45958aaf..5be477e9d 100644 --- a/src/externs.h +++ b/src/externs.h @@ -1763,6 +1763,7 @@ extern void sjis2euc(char *str); extern void euc2sjis(char *str); extern byte codeconv(char *str); extern bool iskanji2(cptr s, int x); +extern void guess_convert_to_system_encoding(char* strbuf, int buflen); #endif #ifdef WORLD_SCORE diff --git a/src/japanese.c b/src/japanese.c index 49d0a37bc..832e3aeb7 100644 --- a/src/japanese.c +++ b/src/japanese.c @@ -357,5 +357,92 @@ bool iskanji2(cptr s, int x) return FALSE; } -#endif /* JP */ +/*! + * @brief ʸ»úÎó¤Îʸ»ú¥³¡¼¥É¤¬ASCII¤«¤É¤¦¤«¤òȽÄꤹ¤ë + * @param str ȽÄꤹ¤ëʸ»úÎó¤Ø¤Î¥Ý¥¤¥ó¥¿ + * @return ʸ»úÎó¤Îʸ»ú¥³¡¼¥É¤¬ASCII¤Ê¤éTRUE¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤ÐFALSE + */ +static bool is_ascii_str(cptr str) +{ + for (;*str; str++) { + if (!(0x00 < *str && *str <= 0x7f)) + return FALSE; + } + return TRUE; +} + +/*! + * @brief ʸ»úÎó¤Îʸ»ú¥³¡¼¥É¤¬UTF-8¤«¤É¤¦¤«¤òȽÄꤹ¤ë + * @param str ȽÄꤹ¤ëʸ»úÎó¤Ø¤Î¥Ý¥¤¥ó¥¿ + * @return ʸ»úÎó¤Îʸ»ú¥³¡¼¥É¤¬ASCII¤Ê¤éTRUE¡¢¤½¤¦¤Ç¤Ê¤±¤ì¤ÐFALSE + */ +static bool is_utf8_str(cptr str) +{ + const unsigned char* p; + for (p = (const unsigned char*)str; *p; p++) { + int subseq_num = 0; + if (0x00 < *p && *p <= 0x7f) continue; + + if ((*p & 0xe0) == 0xc0) subseq_num = 1; + if ((*p & 0xf0) == 0xe0) subseq_num = 2; + if ((*p & 0xf8) == 0xf0) subseq_num = 3; + + if (subseq_num == 0) return FALSE; + while (subseq_num--) { + p++; + if (!*p || (*p & 0xc0) != 0x80) return FALSE; + } + } + return TRUE; +} + +/*! + * @brief ʸ»ú¥³¡¼¥É¤¬UTF-8¤Îʸ»úÎó¤ò¥·¥¹¥Æ¥à¤Îʸ»ú¥³¡¼¥É¤ËÊÑ´¹¤¹¤ë + * @param utf8_str ÊÑ´¹¤¹¤ëUTF-8¤Îʸ»úÎó¤Ø¤Î¥Ý¥¤¥ó¥¿ + * @param sys_str_buffer ÊÑ´¹¤·¤¿¥·¥¹¥Æ¥à¤Îʸ»ú¥³¡¼¥É¤Îʸ»úÎó¤ò³ÊǼ¤¹¤ë¥Ð¥Ã¥Õ¥¡¤Ø¤Î¥Ý¥¤¥ó¥¿ + * @param sys_str_buflen ÊÑ´¹¤·¤¿¥·¥¹¥Æ¥à¤Îʸ»ú¥³¡¼¥É¤Îʸ»úÎó¤ò³ÊǼ¤¹¤ë¥Ð¥Ã¥Õ¥¡¤ÎŤµ + * @return ¤Ê¤· + */ +#ifdef SJIS +#ifdef WINDOWS +#include +static void utf8_to_sys(cptr utf8_str, char* sys_str_buffer, size_t sys_str_buflen) +{ + LPWSTR utf16buf; + int input_str_len = strlen(utf8_str); + int len; + + C_MAKE(utf16buf, input_str_len, WCHAR); + + MultiByteToWideChar( CP_UTF8, 0, (LPCSTR)utf8_str, input_str_len, (LPWSTR)utf16buf, input_str_len); + + len = WideCharToMultiByte( CP_ACP, 0, (LPCWSTR)utf16buf, -1, (LPSTR)sys_str_buffer, sys_str_buflen, NULL, NULL ); + + sys_str_buffer[len] = '\0'; + + C_KILL(utf16buf, input_str_len, WCHAR); +} +#endif +#endif +/*! + * @brief ¼õ¤±¼è¤Ã¤¿Ê¸»úÎó¤Îʸ»ú¥³¡¼¥É¤ò¿äÄꤷ¡¢¥·¥¹¥Æ¥à¤Îʸ»ú¥³¡¼¥É¤ØÊÑ´¹¤¹¤ë + * @param strbuf ÊÑ´¹¤¹¤ëʸ»úÎó¤ò³ÊǼ¤·¤¿¥Ð¥Ã¥Õ¥¡¤Ø¤Î¥Ý¥¤¥ó¥¿¡£ + * ¥Ð¥Ã¥Õ¥¡¤ÏÊÑ´¹¤·¤¿Ê¸»úÎó¤Ç¾å½ñ¤­¤µ¤ì¤ë¡£ + * @param buflen ¥Ð¥Ã¥Õ¥¡¤ÎŤµ¡£ + * @return ¤Ê¤· + */ +void guess_convert_to_system_encoding(char* strbuf, int buflen) +{ + if (is_ascii_str(strbuf)) return; + + if (is_utf8_str(strbuf)) { + char* work; + C_MAKE(work, buflen, char); + strncpy(work, strbuf, buflen); + utf8_to_sys(work, strbuf, buflen); + C_KILL(work, buflen, char); + } +} + +#endif /* JP */ diff --git a/src/util.c b/src/util.c index 5ef7f93b8..d67359ba4 100644 --- a/src/util.c +++ b/src/util.c @@ -452,6 +452,7 @@ errr my_fgets(FILE *fff, char *buf, huge n) /* Read a line */ if (fgets(tmp, 1024, fff)) { + guess_convert_to_system_encoding(tmp, sizeof(tmp)); /* Convert weirdness */ for (s = tmp; *s; s++) { -- 2.11.0