Fix issue #1838: End of line diff is a bit wanky (#1849)

author Takashi Sawanaka <sdottaka@users.sourceforge.net>

Sat, 20 May 2023 01:53:59 +0000 (10:53 +0900)

committer GitHub <noreply@github.com>

Sat, 20 May 2023 01:53:59 +0000 (10:53 +0900)
author Takashi Sawanaka <sdottaka@users.sourceforge.net>
Sat, 20 May 2023 01:53:59 +0000 (10:53 +0900)
committer GitHub <noreply@github.com>
Sat, 20 May 2023 01:53:59 +0000 (10:53 +0900)
diff --git a/Externals/crystaledit/editlib/ccrystaltextview.cpp b/Externals/crystaledit/editlib/ccrystaltextview.cpp

index 17cec32..f8168b9 100644 (file)
--- a/Externals/crystaledit/editlib/ccrystaltextview.cpp
+++ b/Externals/crystaledit/editlib/ccrystaltextview.cpp
@@ -1350,7 +1350,7 @@ DrawLineHelperImpl (CPoint & ptOrigin, const CRect & rcClip, int nColorIndex,
  bool CCrystalTextView::
  GetSelectionLeftRight(int nLineIndex, int& nSelLeft, int& nSelRight)
  {
-    int nLineLength = GetLineLength (nLineIndex);
+    int nLineLength = GetViewableLineLength (nLineIndex);
      nSelLeft = 0;
      nSelRight = 0;
      if ( !m_bRectangularSelection )
diff --git a/Src/MergeEditView.cpp b/Src/MergeEditView.cpp

index b98695b..4f2e641 100644 (file)
--- a/Src/MergeEditView.cpp
+++ b/Src/MergeEditView.cpp
@@ -687,7 +687,7 @@ std::vector<TEXTBLOCK> CMergeEditView::GetAdditionalTextBlocks (int nLineIndex)
                 for (int pane = 0; pane < pDoc->m_nBuffers; pane++)
                 {
                         begin[pane] = (worddiffs[i].beginline[pane] < nLineIndex) ? 0 : worddiffs[i].begin[pane];
-                       end[pane]   = (worddiffs[i].endline[pane]   > nLineIndex) ? GetGroupView(pane)->GetLineLength(nLineIndex) : worddiffs[i].end[pane];
+                       end[pane]   = (worddiffs[i].endline[pane]   > nLineIndex) ? GetGroupView(pane)->GetViewableLineLength(nLineIndex) : worddiffs[i].end[pane];
                         if (worddiffs[i].beginline[pane] == worddiffs[i].endline[pane] &&
                                 worddiffs[i].begin[pane] == worddiffs[i].end[pane])
                                 deleted = true;
diff --git a/Src/stringdiffs.cpp b/Src/stringdiffs.cpp

index b90dddc..3f1953e 100644 (file)
--- a/Src/stringdiffs.cpp
+++ b/Src/stringdiffs.cpp
@@ -25,7 +25,7 @@ static tchar_t BreakCharDefaults[] = _T(",.;:");
  static const int TimeoutMilliSeconds = 500;
  
  static bool isSafeWhitespace(tchar_t ch);
-static bool isWordBreak(int breakType, const tchar_t *str, int index, bool ignore_numbers);
+static bool isWordBreak(int breakType, const tchar_t *str, int index);
  
  void Init()
  {
@@ -419,67 +419,43 @@ stringdiffs::BuildWordsArray(const String & str) const
  
         size_t sLen = str.length();
         assert(sLen < INT_MAX);
-       int iLen = static_cast<int>(sLen);
-
-       // state when we are looking for next word
-inspace:
-       if (isSafeWhitespace(str[i])) 
-       {
-               i = pIterChar->next();
-               goto inspace;
-       }
-       if (begin < i)
-       {
-               // just finished a word
-               // e is first word character (space or at end)
-               int e = i - 1;
  
-               words.push_back(word(begin, e, dlspace, Hash(str, begin, e, 0)));
-       }
-       if (i == iLen)
+       if (str.empty())
                 return words;
-       begin = i;
-       goto inword;
  
-       // state when we are inside a word
-inword:
-       bool atspace=false;
-       if (i == iLen || ((atspace = isSafeWhitespace(str[i])) != 0) || isWordBreak(m_breakType, str.c_str(), i, m_ignore_numbers))
+       int break_type = 0;
+       int prev_break_type = 0;
+       int iLen = static_cast<int>(sLen);
+       for (; i < iLen;)
         {
-               if (begin<i)
+               break_type = dlword;
+               tchar_t ch = str[i];
+               if (ch == '\r' || ch == '\n')
                 {
-                       // just finished a word
-                       // e is first non-word character (space or at end)
-                       int e = i-1;
-                       
-                       words.push_back(word(begin, e, dlword, Hash(str, begin, e, 0)));
+                       break_type = dleol;
                 }
-               if (i == iLen)
+               else if (isSafeWhitespace(ch))
                 {
-                       return words;
+                       break_type = dlspace;
                 }
-               else if (atspace)
+               else if (isWordBreak(m_breakType, str.c_str(), i))
                 {
-                       begin = i;
-                       goto inspace;
+                       break_type = dlbreak;
                 }
-               else
+               else if (m_ignore_numbers && tc::istdigit(ch))
+               {
+                       break_type = dlnumber;
+               }
+               if (i > 0 && (break_type != prev_break_type || break_type == dlbreak || (prev_break_type == dleol && !(str[i - 1] == '\r' && ch == '\n'))))
                 {
-                       // start a new word because we hit a non-whitespace word break (eg, a comma)
-                       // but, we have to put each word break character into its own word
-                       int break_type = (m_ignore_numbers && tc::istdigit(str[i]))
-                               ? dlnumber
-                               : dlbreak;
-
-                       int inext = pIterChar->next();
-                       words.push_back(word(i, inext - 1, break_type, Hash(str, i, inext - 1, 0)));
-                       i = inext;
+                       words.push_back(word(begin, i - 1, prev_break_type, Hash(str, begin, i - 1, 0)));
                         begin = i;
-                       goto inword;
                 }
+               i = pIterChar->next();
+               prev_break_type = break_type;
         }
-       i = pIterChar->next();
-       goto inword; // safe even if we're at the end or no longer in a word
+       words.push_back(word(begin, i - 1, break_type, Hash(str, begin, i - 1, 0)));
+       return words;
  }
  
  /**
@@ -490,13 +466,6 @@ inword:
  void
  stringdiffs::PopulateDiffs()
  {
-       auto IsEOLorEmpty = [](const String& text, size_t begin, size_t end) -> bool {
-               if (end - begin + 1 > 2)
-                       return false;
-               String str = text.substr(begin, end - begin + 1);
-               return (str.empty() || str == _T("\r\n") || str == _T("\n") || str == _T("\r"));
-       };
-       
         for (int i=0; i< (int)m_wdiffs.size(); ++i)
         {
                 bool skipIt = false;
@@ -513,13 +482,6 @@ stringdiffs::PopulateDiffs()
                                 skipIt = true;
                         }
                 }
-               else
-               {
-                       if (!m_eol_sensitive &&
-                               IsEOLorEmpty(m_str1, m_wdiffs[i].begin[0], m_wdiffs[i].end[0]) &&
-                               IsEOLorEmpty(m_str2, m_wdiffs[i].begin[1], m_wdiffs[i].end[1]))
-                               skipIt = true;
-               }
                 if (!skipIt)
                 {
                         // Should never have a pair where both are missing
@@ -579,6 +541,11 @@ stringdiffs::AreWordsSame(const word& word1, const word& word2) const
                 if (tc::istdigit(m_str1[word1.start]) && tc::istdigit(m_str2[word2.start]))
                         return true;
         }
+       if (!this->m_eol_sensitive)
+       {
+               if (IsEOL(word1) && IsEOL(word2))
+                       return true;
+       }
  
         if (word1.hash != word2.hash)
                 return false;
@@ -780,18 +747,16 @@ static inline bool IsLeadByte(tchar_t ch)
  static inline bool
  isSafeWhitespace(tchar_t ch)
  {
-       return tc::istspace((unsigned)ch) && !IsLeadByte(ch);
+       return tc::istspace((unsigned)ch) && !IsLeadByte(ch) && (ch != '\r' && ch != '\n');
  }
  
  /**
   * @brief Is it a non-whitespace wordbreak character (ie, punctuation)?
   */
  static bool
-isWordBreak(int breakType, const tchar_t *str, int index, bool ignore_numbers)
+isWordBreak(int breakType, const tchar_t *str, int index)
  {
         tchar_t ch = str[index];
-       if (ignore_numbers && tc::istdigit(ch))
-               return true;
         // breakType==1 means break also on punctuation
         if ((ch & 0xff00) == 0)
         {
diff --git a/Src/stringdiffsi.h b/Src/stringdiffsi.h

index 2e3b666..c60df7d 100644 (file)
--- a/Src/stringdiffsi.h
+++ b/Src/stringdiffsi.h
@@ -23,8 +23,8 @@ enum
  {
         dlword = 0,
         dlspace,
+       dleol,
         dlbreak, 
-       dlinsert,
         dlnumber,
  };
  /**
@@ -92,18 +92,11 @@ private:
                 return (word1.bBreak == dlnumber);
         }
         /**
-        * @brief Is this block a break?
+        * @brief Is this block an EOL?
          */
-       inline bool IsBreak(const word & word1) const
+       inline bool IsEOL(const word & word1) const
         {
-               return (word1.bBreak == dlbreak || word1.bBreak == dlspace);
-       }
-       /**
-        * @brief Is this block an empty (insert) one?
-        */
-       inline bool IsInsert(const word & word1) const
-       {
-               return (word1.bBreak == dlinsert);
+               return (word1.bBreak == dleol);
         }
         bool BuildWordDiffList_DP();
         int dp(std::vector<char> & edscript);
diff --git a/Testing/GoogleTest/StringDiffs/stringdiffs_test.cpp b/Testing/GoogleTest/StringDiffs/stringdiffs_test.cpp

index 69ad20d..c7ae258 100644 (file)
--- a/Testing/GoogleTest/StringDiffs/stringdiffs_test.cpp
+++ b/Testing/GoogleTest/StringDiffs/stringdiffs_test.cpp
@@ -675,7 +675,22 @@ namespace
  
         TEST_F(StringDiffsTest, IgnoreEOL3)
         {
-               std::vector<strdiff::wdiff> diffs = strdiff::ComputeWordDiffs(_T("abc\r\n"), _T("abc"), true, false, 0, false, 0, true);
+               std::vector<strdiff::wdiff> diffs = strdiff::ComputeWordDiffs(_T("abc\r\n\r\n"), _T("abc\n\n"), true, false, 0, false, 0, true);
+               EXPECT_EQ(0, diffs.size());
+       }
+
+       TEST_F(StringDiffsTest, IgnoreEOL4)
+       {
+               std::vector<strdiff::wdiff> diffs = strdiff::ComputeWordDiffs(_T("abc\r\r"), _T("abc\n\n"), true, false, 0, false, 0, true);
+               EXPECT_EQ(0, diffs.size());
+       }
+
+       TEST_F(StringDiffsTest, IgnoreEOL5)
+       {
+               // GitHub issue #1838 End of line diff is a bit wanky
+               std::vector<strdiff::wdiff> diffs = strdiff::ComputeWordDiffs(_T("abc\r\ndef\r\n"), _T("abc\ndef\n"), true, false, 0, false, 0, true);
+               EXPECT_EQ(0, diffs.size());
+               diffs = strdiff::ComputeWordDiffs(_T("  abc  \r\ndef\r\n  "), _T("  abc  \ndef\n  "), true, false, 0, false, 0, true);
                 EXPECT_EQ(0, diffs.size());
         }
  
@@ -715,12 +730,23 @@ namespace
                 pDiff = &diffs[0];
                 if (diffs.size() == 1)
                 {
-                       EXPECT_EQ(7, pDiff->begin[0]);
+                       EXPECT_EQ(6, pDiff->begin[0]);
                         EXPECT_EQ(7, pDiff->end[0]);
                         EXPECT_EQ(5, pDiff->begin[1]);
                         EXPECT_EQ(5, pDiff->end[1]);
                 }
  
+               diffs = strdiff::ComputeWordDiffs(_T("ab  12 34"), _T("aB 2 c"), false, true, 1, true, 0, true);
+               EXPECT_EQ(1, diffs.size());
+               pDiff = &diffs[0];
+               if (diffs.size() == 1)
+               {
+                       EXPECT_EQ(7, pDiff->begin[0]);
+                       EXPECT_EQ(8, pDiff->end[0]);
+                       EXPECT_EQ(5, pDiff->begin[1]);
+                       EXPECT_EQ(5, pDiff->end[1]);
+               }
+
         }
  
         TEST_F(StringDiffsTest, CompareCase)
author	Takashi Sawanaka <sdottaka@users.sourceforge.net>
	Sat, 20 May 2023 01:53:59 +0000 (10:53 +0900)
committer	GitHub <noreply@github.com>
	Sat, 20 May 2023 01:53:59 +0000 (10:53 +0900)
Externals/crystaledit/editlib/ccrystaltextview.cpp		patch \| blob \| history
Src/MergeEditView.cpp		patch \| blob \| history
Src/stringdiffs.cpp		patch \| blob \| history
Src/stringdiffsi.h		patch \| blob \| history
Testing/GoogleTest/StringDiffs/stringdiffs_test.cpp		patch \| blob \| history