From: Yurii Hordiienko <45203053+hordi@users.noreply.github.com> Date: Thu, 24 Nov 2022 13:51:40 +0000 (-0500) Subject: Update unicoder.cpp (#1584) X-Git-Tag: 2.16.24+jp-2~2^2~5 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=c966793684f79c2de6f1a33b12416759d54f19f5;p=winmerge-jp%2Fwinmerge-jp.git Update unicoder.cpp (#1584) CheckForInvalidUtf8 performance improvement --- diff --git a/Src/Common/unicoder.cpp b/Src/Common/unicoder.cpp index e499db296..37f2381ec 100644 --- a/Src/Common/unicoder.cpp +++ b/Src/Common/unicoder.cpp @@ -1134,18 +1134,19 @@ bool CheckForInvalidUtf8(const char* pBuffer, size_t size) for (unsigned char* pb = (unsigned char*)pBuffer, *end = pb + size; pb < end;) { unsigned c = *pb++; - if ((c == 0xC0) || (c == 0xC1) || (c >= 0xF5)) - return true; - + if (!(c & 0x80)) continue; + + if ((c >= 0xF5) || (c == 0xC0) || (c == 0xC1)) + return true; - uint32_t v = 0x80808080; + uint32_t v = 0x80808000; //1st 0-byte covers scenario if no any next "if" fired at all if ((c & 0xE0) == 0xC0) { if (pb == end) return true; - reinterpret_cast(&v)[0] = *pb++; + *reinterpret_cast(&v) = *pb++; } else if ((c & 0xF0) == 0xE0) { @@ -1164,8 +1165,6 @@ bool CheckForInvalidUtf8(const char* pBuffer, size_t size) reinterpret_cast(&v)[2] = pb[2]; pb += 3; } - else - return true; if ((v & (0xC0C0C0C0)) != 0x80808080) return true;