OSDN Git Service

Fix issue #1838: End of line diff is a bit wanky (#1849)
authorTakashi Sawanaka <sdottaka@users.sourceforge.net>
Sat, 20 May 2023 01:53:59 +0000 (10:53 +0900)
committerGitHub <noreply@github.com>
Sat, 20 May 2023 01:53:59 +0000 (10:53 +0900)
Externals/crystaledit/editlib/ccrystaltextview.cpp
Src/MergeEditView.cpp
Src/stringdiffs.cpp
Src/stringdiffsi.h
Testing/GoogleTest/StringDiffs/stringdiffs_test.cpp

index 17cec32..f8168b9 100644 (file)
@@ -1350,7 +1350,7 @@ DrawLineHelperImpl (CPoint & ptOrigin, const CRect & rcClip, int nColorIndex,
 bool CCrystalTextView::
 GetSelectionLeftRight(int nLineIndex, int& nSelLeft, int& nSelRight)
 {
-    int nLineLength = GetLineLength (nLineIndex);
+    int nLineLength = GetViewableLineLength (nLineIndex);
     nSelLeft = 0;
     nSelRight = 0;
     if ( !m_bRectangularSelection )
index b98695b..4f2e641 100644 (file)
@@ -687,7 +687,7 @@ std::vector<TEXTBLOCK> CMergeEditView::GetAdditionalTextBlocks (int nLineIndex)
                for (int pane = 0; pane < pDoc->m_nBuffers; pane++)
                {
                        begin[pane] = (worddiffs[i].beginline[pane] < nLineIndex) ? 0 : worddiffs[i].begin[pane];
-                       end[pane]   = (worddiffs[i].endline[pane]   > nLineIndex) ? GetGroupView(pane)->GetLineLength(nLineIndex) : worddiffs[i].end[pane];
+                       end[pane]   = (worddiffs[i].endline[pane]   > nLineIndex) ? GetGroupView(pane)->GetViewableLineLength(nLineIndex) : worddiffs[i].end[pane];
                        if (worddiffs[i].beginline[pane] == worddiffs[i].endline[pane] &&
                                worddiffs[i].begin[pane] == worddiffs[i].end[pane])
                                deleted = true;
index b90dddc..3f1953e 100644 (file)
@@ -25,7 +25,7 @@ static tchar_t BreakCharDefaults[] = _T(",.;:");
 static const int TimeoutMilliSeconds = 500;
 
 static bool isSafeWhitespace(tchar_t ch);
-static bool isWordBreak(int breakType, const tchar_t *str, int index, bool ignore_numbers);
+static bool isWordBreak(int breakType, const tchar_t *str, int index);
 
 void Init()
 {
@@ -419,67 +419,43 @@ stringdiffs::BuildWordsArray(const String & str) const
 
        size_t sLen = str.length();
        assert(sLen < INT_MAX);
-       int iLen = static_cast<int>(sLen);
-
-       // state when we are looking for next word
-inspace:
-       if (isSafeWhitespace(str[i])) 
-       {
-               i = pIterChar->next();
-               goto inspace;
-       }
-       if (begin < i)
-       {
-               // just finished a word
-               // e is first word character (space or at end)
-               int e = i - 1;
 
-               words.push_back(word(begin, e, dlspace, Hash(str, begin, e, 0)));
-       }
-       if (i == iLen)
+       if (str.empty())
                return words;
-       begin = i;
-       goto inword;
 
-       // state when we are inside a word
-inword:
-       bool atspace=false;
-       if (i == iLen || ((atspace = isSafeWhitespace(str[i])) != 0) || isWordBreak(m_breakType, str.c_str(), i, m_ignore_numbers))
+       int break_type = 0;
+       int prev_break_type = 0;
+       int iLen = static_cast<int>(sLen);
+       for (; i < iLen;)
        {
-               if (begin<i)
+               break_type = dlword;
+               tchar_t ch = str[i];
+               if (ch == '\r' || ch == '\n')
                {
-                       // just finished a word
-                       // e is first non-word character (space or at end)
-                       int e = i-1;
-                       
-                       words.push_back(word(begin, e, dlword, Hash(str, begin, e, 0)));
+                       break_type = dleol;
                }
-               if (i == iLen)
+               else if (isSafeWhitespace(ch))
                {
-                       return words;
+                       break_type = dlspace;
                }
-               else if (atspace)
+               else if (isWordBreak(m_breakType, str.c_str(), i))
                {
-                       begin = i;
-                       goto inspace;
+                       break_type = dlbreak;
                }
-               else
+               else if (m_ignore_numbers && tc::istdigit(ch))
+               {
+                       break_type = dlnumber;
+               }
+               if (i > 0 && (break_type != prev_break_type || break_type == dlbreak || (prev_break_type == dleol && !(str[i - 1] == '\r' && ch == '\n'))))
                {
-                       // start a new word because we hit a non-whitespace word break (eg, a comma)
-                       // but, we have to put each word break character into its own word
-                       int break_type = (m_ignore_numbers && tc::istdigit(str[i]))
-                               ? dlnumber
-                               : dlbreak;
-
-                       int inext = pIterChar->next();
-                       words.push_back(word(i, inext - 1, break_type, Hash(str, i, inext - 1, 0)));
-                       i = inext;
+                       words.push_back(word(begin, i - 1, prev_break_type, Hash(str, begin, i - 1, 0)));
                        begin = i;
-                       goto inword;
                }
+               i = pIterChar->next();
+               prev_break_type = break_type;
        }
-       i = pIterChar->next();
-       goto inword; // safe even if we're at the end or no longer in a word
+       words.push_back(word(begin, i - 1, break_type, Hash(str, begin, i - 1, 0)));
+       return words;
 }
 
 /**
@@ -490,13 +466,6 @@ inword:
 void
 stringdiffs::PopulateDiffs()
 {
-       auto IsEOLorEmpty = [](const String& text, size_t begin, size_t end) -> bool {
-               if (end - begin + 1 > 2)
-                       return false;
-               String str = text.substr(begin, end - begin + 1);
-               return (str.empty() || str == _T("\r\n") || str == _T("\n") || str == _T("\r"));
-       };
-       
        for (int i=0; i< (int)m_wdiffs.size(); ++i)
        {
                bool skipIt = false;
@@ -513,13 +482,6 @@ stringdiffs::PopulateDiffs()
                                skipIt = true;
                        }
                }
-               else
-               {
-                       if (!m_eol_sensitive &&
-                               IsEOLorEmpty(m_str1, m_wdiffs[i].begin[0], m_wdiffs[i].end[0]) &&
-                               IsEOLorEmpty(m_str2, m_wdiffs[i].begin[1], m_wdiffs[i].end[1]))
-                               skipIt = true;
-               }
                if (!skipIt)
                {
                        // Should never have a pair where both are missing
@@ -579,6 +541,11 @@ stringdiffs::AreWordsSame(const word& word1, const word& word2) const
                if (tc::istdigit(m_str1[word1.start]) && tc::istdigit(m_str2[word2.start]))
                        return true;
        }
+       if (!this->m_eol_sensitive)
+       {
+               if (IsEOL(word1) && IsEOL(word2))
+                       return true;
+       }
 
        if (word1.hash != word2.hash)
                return false;
@@ -780,18 +747,16 @@ static inline bool IsLeadByte(tchar_t ch)
 static inline bool
 isSafeWhitespace(tchar_t ch)
 {
-       return tc::istspace((unsigned)ch) && !IsLeadByte(ch);
+       return tc::istspace((unsigned)ch) && !IsLeadByte(ch) && (ch != '\r' && ch != '\n');
 }
 
 /**
  * @brief Is it a non-whitespace wordbreak character (ie, punctuation)?
  */
 static bool
-isWordBreak(int breakType, const tchar_t *str, int index, bool ignore_numbers)
+isWordBreak(int breakType, const tchar_t *str, int index)
 {
        tchar_t ch = str[index];
-       if (ignore_numbers && tc::istdigit(ch))
-               return true;
        // breakType==1 means break also on punctuation
        if ((ch & 0xff00) == 0)
        {
index 2e3b666..c60df7d 100644 (file)
@@ -23,8 +23,8 @@ enum
 {
        dlword = 0,
        dlspace,
+       dleol,
        dlbreak, 
-       dlinsert,
        dlnumber,
 };
 /**
@@ -92,18 +92,11 @@ private:
                return (word1.bBreak == dlnumber);
        }
        /**
-        * @brief Is this block a break?
+        * @brief Is this block an EOL?
         */
-       inline bool IsBreak(const word & word1) const
+       inline bool IsEOL(const word & word1) const
        {
-               return (word1.bBreak == dlbreak || word1.bBreak == dlspace);
-       }
-       /**
-        * @brief Is this block an empty (insert) one?
-        */
-       inline bool IsInsert(const word & word1) const
-       {
-               return (word1.bBreak == dlinsert);
+               return (word1.bBreak == dleol);
        }
        bool BuildWordDiffList_DP();
        int dp(std::vector<char> & edscript);
index 69ad20d..c7ae258 100644 (file)
@@ -675,7 +675,22 @@ namespace
 
        TEST_F(StringDiffsTest, IgnoreEOL3)
        {
-               std::vector<strdiff::wdiff> diffs = strdiff::ComputeWordDiffs(_T("abc\r\n"), _T("abc"), true, false, 0, false, 0, true);
+               std::vector<strdiff::wdiff> diffs = strdiff::ComputeWordDiffs(_T("abc\r\n\r\n"), _T("abc\n\n"), true, false, 0, false, 0, true);
+               EXPECT_EQ(0, diffs.size());
+       }
+
+       TEST_F(StringDiffsTest, IgnoreEOL4)
+       {
+               std::vector<strdiff::wdiff> diffs = strdiff::ComputeWordDiffs(_T("abc\r\r"), _T("abc\n\n"), true, false, 0, false, 0, true);
+               EXPECT_EQ(0, diffs.size());
+       }
+
+       TEST_F(StringDiffsTest, IgnoreEOL5)
+       {
+               // GitHub issue #1838 End of line diff is a bit wanky
+               std::vector<strdiff::wdiff> diffs = strdiff::ComputeWordDiffs(_T("abc\r\ndef\r\n"), _T("abc\ndef\n"), true, false, 0, false, 0, true);
+               EXPECT_EQ(0, diffs.size());
+               diffs = strdiff::ComputeWordDiffs(_T("  abc  \r\ndef\r\n  "), _T("  abc  \ndef\n  "), true, false, 0, false, 0, true);
                EXPECT_EQ(0, diffs.size());
        }
 
@@ -715,12 +730,23 @@ namespace
                pDiff = &diffs[0];
                if (diffs.size() == 1)
                {
-                       EXPECT_EQ(7, pDiff->begin[0]);
+                       EXPECT_EQ(6, pDiff->begin[0]);
                        EXPECT_EQ(7, pDiff->end[0]);
                        EXPECT_EQ(5, pDiff->begin[1]);
                        EXPECT_EQ(5, pDiff->end[1]);
                }
 
+               diffs = strdiff::ComputeWordDiffs(_T("ab  12 34"), _T("aB 2 c"), false, true, 1, true, 0, true);
+               EXPECT_EQ(1, diffs.size());
+               pDiff = &diffs[0];
+               if (diffs.size() == 1)
+               {
+                       EXPECT_EQ(7, pDiff->begin[0]);
+                       EXPECT_EQ(8, pDiff->end[0]);
+                       EXPECT_EQ(5, pDiff->begin[1]);
+                       EXPECT_EQ(5, pDiff->end[1]);
+               }
+
        }
 
        TEST_F(StringDiffsTest, CompareCase)