From 6654e89b87e7a8cf96ee5be823495a7a0b4557d0 Mon Sep 17 00:00:00 2001 From: Perry Rapp Date: Sat, 31 Dec 2005 19:30:18 +0000 Subject: [PATCH] PATCH: [ 1394247 ] Fix GuessEncoding recognition of codepages BUG: [ 1394234 ] GuessEncodings fails to recognize CP-437 Src: codepage_detect.cpp --- Src/Changes.txt | 5 +++ Src/codepage_detect.cpp | 99 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 87 insertions(+), 17 deletions(-) diff --git a/Src/Changes.txt b/Src/Changes.txt index 4d8a41a62..c14599760 100644 --- a/Src/Changes.txt +++ b/Src/Changes.txt @@ -1,3 +1,8 @@ +2005-12-31 Perry + PATCH: [ 1394247 ] Fix GuessEncoding recognition of codepages + BUG: [ 1394234 ] GuessEncodings fails to recognize CP-437 + Src: codepage_detect.cpp + 2005-12-30 Perry PATCH: [ 1391732 ] Do not offer to unify codepages in UNICODE build Also introduces new error IDS_DIFFERENT_UNICODINGS and new diff --git a/Src/codepage_detect.cpp b/Src/codepage_detect.cpp index da77ac1f4..6dc4b57f6 100644 --- a/Src/codepage_detect.cpp +++ b/Src/codepage_detect.cpp @@ -21,6 +21,85 @@ static char THIS_FILE[] = __FILE__; #endif +/** + * @brief Prefixes to handle when searching for codepage names + */ +static LPCTSTR f_wincp_prefixes[] = +{ + _T("WINDOWS-") + , _T("WINDOWS") + , _T("CP") + , _T("CP-") + , _T("MSDOS") + , _T("MSDOS-") +}; + +/** + * @brief Is string non-empty and comprised entirely of numbers? + */ +static bool +isNumeric(const CString & str) +{ + if (str.IsEmpty()) + return false; + for (int i=0; i prefix.GetLength()) + { + CString encpref = encodingName.Left(prefix.GetLength()); + encpref.MakeUpper(); + if (prefix == encpref) + { + // encoding is, eg, "windows-1251" + CString remainder = encodingName.Mid(prefix.GetLength()); + // remainder is, eg, "1251" + if (isNumeric(remainder)) + { + unsigned codepage = _ttoi(remainder); + if (codepage) + encodingId = GetEncodingIdFromCodePage(codepage); + return encodingId; + } + } + } + } + + return 0; // failed +} /** * @brief Parser for HTML files to find encoding information @@ -46,14 +125,7 @@ static unsigned demoGuessEncoding_html(const char *src, size_t len) { pchValue[cchValue] = '\0'; // Is it an encoding name known to charsets module ? - unsigned encodingId = GetEncodingIdFromName(pchValue); - if (encodingId == 0) - { - if (unsigned codepage = atoi(pchValue)) - { - encodingId = GetEncodingIdFromCodePage(codepage); - } - } + unsigned encodingId = FindEncodingIdFromNameOrAlias(pchValue); if (encodingId) { return GetEncodingCodePageFromId(encodingId); @@ -79,15 +151,8 @@ static unsigned demoGuessEncoding_xml(const char *src, size_t len) CMarkdown::String encoding = xml.GetAttribute("encoding"); if (encoding.A) { - // Is it an encoding name known to charsets module ? - unsigned encodingId = GetEncodingIdFromName(encoding.A); - if (encodingId == 0) - { - if (unsigned codepage = atoi(encoding.A)) - { - encodingId = GetEncodingIdFromCodePage(codepage); - } - } + // Is it an encoding name we can find in charsets module ? + unsigned encodingId = FindEncodingIdFromNameOrAlias(encoding.A); if (encodingId) { return GetEncodingCodePageFromId(encodingId); -- 2.11.0