From b4d963be0aa62a84f6f9bd37297ff2147c6e4322 Mon Sep 17 00:00:00 2001 From: Takashi Sawanaka Date: Sun, 4 Dec 2016 16:37:19 +0900 Subject: [PATCH] Retry to detect the encoding of a file using all content when occurred encoding error. fixes github-issue#13. --- Src/MergeDoc.cpp | 6 ++++++ Src/codepage_detect.cpp | 8 +------- Src/codepage_detect.h | 5 ++++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Src/MergeDoc.cpp b/Src/MergeDoc.cpp index 63c22bc7f..3e8ac577a 100644 --- a/Src/MergeDoc.cpp +++ b/Src/MergeDoc.cpp @@ -2469,6 +2469,12 @@ DWORD CMergeDoc::LoadOneFile(int index, String filename, bool readOnly, const St m_pRescanFileInfo[index]->Update(filename); loadSuccess = LoadFile(filename.c_str(), index, readOnly, encoding); + if (FileLoadResult::IsLossy(loadSuccess)) + { + m_ptBuf[index]->FreeAll(); + loadSuccess = LoadFile(filename.c_str(), index, readOnly, + GuessCodepageEncoding(filename, GetOptionsMgr()->GetInt(OPT_CP_DETECT), -1)); + } } else { diff --git a/Src/codepage_detect.cpp b/Src/codepage_detect.cpp index 35668baac..338fafd6a 100644 --- a/Src/codepage_detect.cpp +++ b/Src/codepage_detect.cpp @@ -25,9 +25,6 @@ # define strncasecmp(a, b, n) _strnicmp((a), (b), (n)) #endif -/** @brief Buffer size used in this file. */ -static const int BufSize = 65536; - /** * @brief Prefixes to handle when searching for codepage names * NB: prefixes ending in '-' must go first! @@ -211,8 +208,6 @@ static unsigned GuessEncoding_from_bytes(const String& ext, const char *src, siz } if (guessEncodingType & 1) { - if (len > BufSize) - len = BufSize; String lower_ext = string_makelower(ext); if (lower_ext == _T(".rc")) { @@ -236,10 +231,9 @@ static unsigned GuessEncoding_from_bytes(const String& ext, const char *src, siz * @param [in] bGuessEncoding Try to guess codepage (not just unicode encoding). * @return Structure getting the encoding info. */ -FileTextEncoding GuessCodepageEncoding(const String& filepath, int guessEncodingType) +FileTextEncoding GuessCodepageEncoding(const String& filepath, int guessEncodingType, int mapmaxlen) { FileTextEncoding encoding; - const int mapmaxlen = BufSize; CMarkdown::FileImage fi(filepath.c_str(), mapmaxlen); encoding.SetCodepage(ucr::getDefaultCodepage()); encoding.m_bom = false; diff --git a/Src/codepage_detect.h b/Src/codepage_detect.h index 2f5f67f31..19dbda659 100644 --- a/Src/codepage_detect.h +++ b/Src/codepage_detect.h @@ -8,4 +8,7 @@ #include "UnicodeString.h" #include "FileTextEncoding.h" -FileTextEncoding GuessCodepageEncoding(const String& filepath, int guessEncodingType); +/** @brief Buffer size used in this file. */ +static const int BufSize = 65536; + +FileTextEncoding GuessCodepageEncoding(const String& filepath, int guessEncodingType, int mapmaxlen = BufSize); -- 2.11.0