2 * @file stringdiffs.cpp
4 * @brief Implementation file for ComputeWordDiffs (q.v.)
9 #include "stringdiffs.h"
13 #include "CompareOptions.h"
14 #include "stringdiffsi.h"
22 static bool Initialized;
23 static bool CustomChars;
24 static TCHAR *BreakChars;
25 static TCHAR BreakCharDefaults[] = _T(",.;:");
26 static int TimeoutMilliSeconds = 500;
28 static bool isSafeWhitespace(TCHAR ch);
29 static bool isWordBreak(int breakType, const TCHAR *str, int index);
33 BreakChars = &BreakCharDefaults[0];
48 void SetBreakChars(const TCHAR *breakChars)
56 BreakChars = _tcsdup(breakChars);
60 ComputeWordDiffs(const String& str1, const String& str2,
61 bool case_sensitive, bool eol_sensitive, int whitespace, int breakType, bool byte_level)
63 String strs[3] = {str1, str2, _T("")};
64 return ComputeWordDiffs(2, strs, case_sensitive, eol_sensitive, whitespace, breakType, byte_level);
69 Comp02Functor(const String *strs, bool case_sensitive) :
70 strs_(strs), case_sensitive_(case_sensitive)
73 bool operator()(const wdiff &wd3)
75 size_t wlen0 = wd3.end[0] - wd3.begin[0] + 1;
76 size_t wlen2 = wd3.end[2] - wd3.begin[2] + 1;
81 if (memcmp(&strs_[0][wd3.begin[0]], &strs_[2][wd3.begin[2]], wlen0 * sizeof(TCHAR)) != 0)
86 if (_tcsnicmp(&strs_[0][wd3.begin[0]], &strs_[2][wd3.begin[2]], wlen0) != 0)
96 * @brief Construct our worker object and tell it to do the work
99 ComputeWordDiffs(int nFiles, const String str[3],
100 bool case_sensitive, bool eol_sensitive, int whitespace, int breakType, bool byte_level)
102 std::vector<wdiff> diffs;
105 stringdiffs sdiffs(str[0], str[1], case_sensitive, eol_sensitive, whitespace, breakType, &diffs);
106 // Hash all words in both lines and then compare them word by word
107 // storing differences into m_wdiffs
108 sdiffs.BuildWordDiffList();
111 sdiffs.wordLevelToByteLevel();
113 // Now copy m_wdiffs into caller-supplied m_pDiffs (coalescing adjacents if possible)
114 sdiffs.PopulateDiffs();
121 stringdiffs sdiffs(str[1], str[2], case_sensitive, eol_sensitive, whitespace, breakType, &diffs);
122 sdiffs.BuildWordDiffList();
124 sdiffs.wordLevelToByteLevel();
125 sdiffs.PopulateDiffs();
126 for (size_t i = 0; i < diffs.size(); i++)
128 wdiff& diff = diffs[i];
129 diff.begin[2] = diff.begin[1];
130 diff.begin[1] = diff.begin[0];
132 diff.end[2] = diff.end[1];
133 diff.end[1] = diff.end[0];
137 else if (str[1].empty())
139 stringdiffs sdiffs(str[0], str[2], case_sensitive, eol_sensitive, whitespace, breakType, &diffs);
140 sdiffs.BuildWordDiffList();
142 sdiffs.wordLevelToByteLevel();
143 sdiffs.PopulateDiffs();
144 for (size_t i = 0; i < diffs.size(); i++)
146 wdiff& diff = diffs[i];
147 diff.begin[2] = diff.begin[1];
148 //diff.begin[0] = diff.begin[0];
150 diff.end[2] = diff.end[1];
151 //diff.end[0] = diff.end[0];
155 else if (str[2].empty())
157 stringdiffs sdiffs(str[0], str[1], case_sensitive, eol_sensitive, whitespace, breakType, &diffs);
158 sdiffs.BuildWordDiffList();
160 sdiffs.wordLevelToByteLevel();
161 sdiffs.PopulateDiffs();
162 for (size_t i = 0; i < diffs.size(); i++)
164 wdiff& diff = diffs[i];
165 //diff.begin[1] = diff.begin[1];
166 //diff.begin[0] = diff.begin[0];
168 //diff.end[1] = diff.end[1];
169 //diff.end[0] = diff.end[0];
175 std::vector<wdiff> diffs10, diffs12;
176 stringdiffs sdiffs10(str[1], str[0], case_sensitive, eol_sensitive, 0, breakType, &diffs10);
177 stringdiffs sdiffs12(str[1], str[2], case_sensitive, eol_sensitive, 0, breakType, &diffs12);
178 // Hash all words in both lines and then compare them word by word
179 // storing differences into m_wdiffs
180 sdiffs10.BuildWordDiffList();
181 sdiffs12.BuildWordDiffList();
184 sdiffs10.wordLevelToByteLevel();
185 sdiffs12.wordLevelToByteLevel();
187 // Now copy m_wdiffs into caller-supplied m_pDiffs (coalescing adjacents if possible)
188 sdiffs10.PopulateDiffs();
189 sdiffs12.PopulateDiffs();
191 Make3wayDiff(diffs, diffs10, diffs12,
192 Comp02Functor(str, case_sensitive), false);
199 * @brief stringdiffs constructor simply loads all members from arguments
201 stringdiffs::stringdiffs(const String & str1, const String & str2,
202 bool case_sensitive, bool eol_sensitive, int whitespace, int breakType,
203 std::vector<wdiff> * pDiffs)
206 , m_case_sensitive(case_sensitive)
207 , m_eol_sensitive(eol_sensitive)
208 , m_whitespace(whitespace)
209 , m_breakType(breakType)
211 , m_matchblock(true) // Change to false to get word to word compare
217 * The destructor frees all diffs added to the vectors.
219 stringdiffs::~stringdiffs()
223 #ifdef STRINGDIFF_LOGGING
225 stringdiffs::debugoutput()
227 for (size_t i = 0; i < m_wdiffs.size(); i++)
232 int s1 = m_wdiffs[i].begin[0];
233 int e1 = m_wdiffs[i].end[0];
234 int s2 = m_wdiffs[i].begin[1];
235 int e2 = m_wdiffs[i].end[1];
237 int len1 = e1 - s1 + 1;
238 int len2 = e2 - s2 + 1;
241 str1 = m_str1.substr(s1 ,e1 - s1 + 1);
243 str1 = m_str1.substr(s1, 50);
246 str2 = m_str2.substr(s2, e2- s2 + 1);
248 str2 = m_str2.substr(s2, 50);
250 wsprintf(buf, _T("left= %s, %d,%d,\nright= %s, %d,%d \n"),
251 str1.c_str(), s1, e1, str2.c_str(), s2, e2);
252 OutputDebugString(buf);
258 stringdiffs::BuildWordDiffList_DP()
260 std::vector<char> edscript;
262 //if (dp(edscript) <= 0)
264 if (onp(edscript) < 0)
268 for (size_t k = 0; k < edscript.size(); k++)
271 if (edscript[k] == '-')
273 if (m_whitespace == WHITESPACE_IGNORE_ALL)
275 if (IsSpace(m_words1[i]))
282 s1 = m_words1[i].start;
283 e1 = m_words1[i].end;
284 s2 = m_words2[j-1].end+1;
286 m_wdiffs.push_back(wdiff(s1, e1, s2, e2));
289 else if (edscript[k] == '+')
291 if (m_whitespace == WHITESPACE_IGNORE_ALL)
293 if (IsSpace(m_words2[j]))
300 s1 = m_words1[i-1].end+1;
302 s2 = m_words2[j].start;
303 e2 = m_words2[j].end;
304 m_wdiffs.push_back(wdiff(s1, e1, s2, e2));
307 else if (edscript[k] == '!')
309 if (m_whitespace == WHITESPACE_IGNORE_CHANGE || m_whitespace == WHITESPACE_IGNORE_ALL)
311 if (IsSpace(m_words1[i]) && IsSpace(m_words2[j]))
318 s1 = m_words1[i].start;
319 e1 = m_words1[i].end;
320 s2 = m_words2[j].start;
321 e2 = m_words2[j].end ;
322 m_wdiffs.push_back(wdiff(s1, e1, s2, e2));
330 #ifdef STRINGDIFF_LOGGING
337 * @brief Add all different elements between lines to the wdiff list
340 stringdiffs::BuildWordDiffList()
342 m_words1 = BuildWordsArray(m_str1);
343 m_words2 = BuildWordsArray(m_str2);
345 bool succeeded = false;
347 if (m_words1.size() < 20480 && m_words2.size() < 20480)
349 if (m_words1.size() < 2048 && m_words2.size() < 2048)
352 succeeded = BuildWordDiffList_DP();
356 int s1 = m_words1[0].start;
357 int e1 = m_words1[m_words1.size() - 1].end;
358 int s2 = m_words2[0].start;
359 int e2 = m_words2[m_words2.size() - 1].end;
360 m_wdiffs.push_back(wdiff(s1, e1, s2, e2));
367 * @brief Break line into constituent words
369 std::vector<stringdiffs::word>
370 stringdiffs::BuildWordsArray(const String & str)
372 std::vector<word> words;
373 int i = 0, begin = 0;
374 ICUBreakIterator *pIterChar = ICUBreakIterator::getCharacterBreakIterator(reinterpret_cast<const UChar *>(str.c_str()), static_cast<int32_t>(str.length()));
376 size_t sLen = str.length();
377 assert(sLen < INT_MAX);
378 int iLen = static_cast<int>(sLen);
381 words.push_back(word(0, -1, 0, 0));
383 // state when we are looking for next word
385 if (isSafeWhitespace(str[i]))
387 i = pIterChar->next();
392 // just finished a word
393 // e is first word character (space or at end)
396 words.push_back(word(begin, e, dlspace, Hash(str, begin, e, 0)));
403 // state when we are inside a word
406 if (i == iLen || ((atspace = isSafeWhitespace(str[i])) != 0) || isWordBreak(m_breakType, str.c_str(), i))
410 // just finished a word
411 // e is first non-word character (space or at end)
414 words.push_back(word(begin, e, dlword, Hash(str, begin, e, 0)));
427 // start a new word because we hit a non-whitespace word break (eg, a comma)
428 // but, we have to put each word break character into its own word
429 int inext = pIterChar->next();
430 words.push_back(word(i, inext - 1, dlbreak, Hash(str, i, inext - 1, 0)));
436 i = pIterChar->next();
437 goto inword; // safe even if we're at the end or no longer in a word
441 * @brief Populate m_pDiffs from m_wdiffs (combining adjacent diffs)
443 * Doing the combining of adjacent diffs here keeps some complexity out of BuildWordsArray.
446 stringdiffs::PopulateDiffs()
448 auto IsEOLorEmpty = [](const String& text, size_t begin, size_t end) -> bool {
449 if (end - begin + 1 > 2)
451 String str = text.substr(begin, end - begin + 1);
452 return (str.empty() || str == _T("\r\n") || str == _T("\n") || str == _T("\r"));
455 for (int i=0; i< (int)m_wdiffs.size(); ++i)
458 // combine it with next ?
459 if (i+1< (int)m_wdiffs.size())
461 if (m_wdiffs[i].end[0] + 1 == m_wdiffs[i+1].begin[0]
462 && m_wdiffs[i].end[1] + 1 == m_wdiffs[i+1].begin[1])
464 // diff[i] and diff[i+1] are contiguous
465 // so combine them into diff[i+1] and ignore diff[i]
466 m_wdiffs[i+1].begin[0] = m_wdiffs[i].begin[0];
467 m_wdiffs[i+1].begin[1] = m_wdiffs[i].begin[1];
473 if (!m_eol_sensitive &&
474 IsEOLorEmpty(m_str1, m_wdiffs[i].begin[0], m_wdiffs[i].end[0]) &&
475 IsEOLorEmpty(m_str2, m_wdiffs[i].begin[1], m_wdiffs[i].end[1]))
480 // Should never have a pair where both are missing
481 assert(m_wdiffs[i].begin[0]>=0 || m_wdiffs[i].begin[1]>=0);
483 // Store the diff[i] in the caller list (m_pDiffs)
484 m_pDiffs->push_back(wdiff(m_wdiffs[i]));
491 /* Rotate a value n bits to the left. */
492 #define UINT_BIT (sizeof (unsigned) * CHAR_BIT)
493 #define ROL(v, n) ((v) << (n) | (v) >> (UINT_BIT - (n)))
494 /* Given a hash value and a new character, return a new hash value. */
495 #define HASH(h, c) ((c) + ROL (h, 7))
498 stringdiffs::Hash(const String & str, int begin, int end, unsigned h) const
500 for (int i = begin; i <= end; ++i)
502 TCHAR ch = static_cast<unsigned>(str[i]);
503 if (m_case_sensitive)
509 ch = static_cast<unsigned>(_totupper(ch));
518 * @brief Compare two words (by reference to original strings)
521 stringdiffs::AreWordsSame(const word & word1, const word & word2) const
523 if (this->m_whitespace != WHITESPACE_COMPARE_ALL)
525 if (IsSpace(word1) && IsSpace(word2))
528 if (word1.hash != word2.hash)
530 if (word1.length() != word2.length())
532 for (int i=0; i<word1.length(); ++i)
534 if (!caseMatch(m_str1[word1.start+i], m_str2[word2.start+i]))
541 * @brief Return true if characters match
544 stringdiffs::caseMatch(TCHAR ch1, TCHAR ch2) const
546 if (m_case_sensitive)
549 return _totupper(ch1)==_totupper(ch2);
553 * @ brief An O(NP) Sequence Comparison Algorithm. Sun Wu, Udi Manber, Gene Myers
556 stringdiffs::onp(std::vector<char> &edscript)
558 auto start = std::chrono::system_clock::now();
560 int M = static_cast<int>(m_words1.size() - 1);
561 int N = static_cast<int>(m_words2.size() - 1);
562 bool exchanged = false;
565 M = static_cast<int>(m_words2.size() - 1);
566 N = static_cast<int>(m_words1.size() - 1);
569 int *fp = (new int[(M+1) + 1 + (N+1)]) + (M+1);
570 struct EditScriptElem { int op; int neq; int pk; int pi; };
571 std::vector<EditScriptElem> *es = (new std::vector<EditScriptElem>[(M+1) + 1 + (N+1)]) + (M+1);
574 auto addEditScriptElem = [&es, &fp](int k) {
576 if (fp[k - 1] + 1 > fp[k + 1])
579 ese.neq = fp[k] - (fp[k - 1] + 1);
585 ese.neq = fp[k] - fp[k + 1];
588 ese.pi = static_cast<int>(es[ese.pk].size() - 1);
589 es[k].push_back(ese);
592 const int COUNTMAX = 100000;
595 for (k = -(M+1); k <= (N+1); k++)
601 for (k = -p; k <= DELTA-1; k++)
603 fp[k] = snake(k, std::max(fp[k-1] + 1, fp[k+1]), exchanged);
604 addEditScriptElem(k);
607 for (k = DELTA + p; k >= DELTA+1; k--)
609 fp[k] = snake(k, std::max(fp[k-1] + 1, fp[k+1]), exchanged);
610 addEditScriptElem(k);
614 fp[k] = snake(k, std::max(fp[k-1] + 1, fp[k+1]), exchanged);
615 addEditScriptElem(k);
618 if (count > COUNTMAX)
621 auto end = std::chrono::system_clock::now();
622 auto msec = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
623 if (msec > TimeoutMilliSeconds)
625 delete [] (es - (M+1));
626 delete [] (fp - (M+1));
630 } while (fp[k] != N);
634 std::vector<char> ses;
636 for (k = DELTA, i = static_cast<int>(es[DELTA].size() - 1); i >= 0;)
638 EditScriptElem& esi = es[k][i];
639 for (int j = 0; j < esi.neq; ++j)
641 ses.push_back(static_cast<char>(esi.op));
645 std::reverse(ses.begin(), ses.end());
648 for (i = 1; i < static_cast<int>(ses.size()); i++)
653 if (i + 1 < ses.size() && ses[i + 1] == '-')
655 edscript.push_back('!');
661 edscript.push_back(exchanged ? '-' : '+');
666 if (i + 1 < ses.size() && ses[i + 1] == '+')
668 edscript.push_back('!');
674 edscript.push_back(exchanged ? '+' : '-');
679 edscript.push_back('=');
683 delete [] (es - (M+1));
684 delete [] (fp - (M+1));
690 stringdiffs::snake(int k, int y, bool exchanged)
692 int M = static_cast<int>(exchanged ? m_words2.size() - 1 : m_words1.size() - 1);
693 int N = static_cast<int>(exchanged ? m_words1.size() - 1 : m_words2.size() - 1);
695 while (x < M && y < N && (exchanged ? AreWordsSame(m_words1[y + 1], m_words2[x + 1]) : AreWordsSame(m_words1[x + 1], m_words2[y + 1]))) {
696 x = x + 1; y = y + 1;
702 * @brief Return true if chars match
704 * Caller must not call this for lead bytes
707 matchchar(const TCHAR *ch1, const TCHAR *ch2, size_t len, bool casitive)
710 return memcmp(ch1, ch2, len * sizeof(TCHAR)) == 0;
711 for (size_t i = 0; i < len; ++i)
713 if (_totupper(ch1[i]) != _totupper(ch2[i]))
720 /** Does character introduce a multicharacter character? */
721 static inline bool IsLeadByte(TCHAR ch)
726 return _getmbcp() && IsDBCSLeadByte(ch);
731 * @brief Is it whitespace (excludes all lead & trail bytes)?
734 isSafeWhitespace(TCHAR ch)
736 return _istspace((unsigned)ch) && !IsLeadByte(ch);
740 * @brief Is it a non-whitespace wordbreak character (ie, punctuation)?
743 isWordBreak(int breakType, const TCHAR *str, int index)
745 TCHAR ch = str[index];
746 // breakType==1 means break also on punctuation
747 if ((ch & 0xff00) == 0)
749 // TCHAR nextCh = str[index + 1];
750 // breakType==0 means whitespace only
753 return _tcschr(BreakChars, ch) != nullptr;
758 // ch==0xff0c/* Fullwidth Full Stop */ ||
759 // ch==0xff0e/* Fullwidth Comma */ ||
760 // ch==0xff1b/* Fullwidth Semicolon */ ||
761 // ch==0xff1a/* Fullwidth Colon */ ||
762 // ch==0x3002/* Ideographic Full Stop */ ||
763 // ch==0x3001/* Ideographic Comma */
766 // WORD wCharType, wCharTypeNext;
767 // GetStringTypeW(CT_CTYPE3, &ch, 1, &wCharType);
768 // TCHAR nextCh = str[index + 1];
769 // GetStringTypeW(CT_CTYPE3, &nextCh, 1, &wCharTypeNext);
770 // return (wCharType != wCharTypeNext);
778 * @brief advance current pointer over whitespace, until not whitespace or beyond end
779 * @param pcurrent [in,out] current location (to be advanced)
780 * @param end [in] last valid position (only go one beyond this)
783 AdvanceOverWhitespace(const TCHAR **pcurrent, const TCHAR *end)
785 // advance over whitespace
786 while (*pcurrent <= end && isSafeWhitespace(**pcurrent))
787 ++(*pcurrent); // DBCS safe because of isSafeWhitespace above
791 * @brief Compute begin1,begin2,end1,end2 to display byte difference between strings str1 & str2
792 * @param casitive [in] true for case-sensitive, false for case-insensitive
793 * @param xwhite [in] This governs whether we handle whitespace specially (see WHITESPACE_COMPARE_ALL, WHITESPACE_IGNORE_CHANGE, WHITESPACE_IGNORE_ALL)
794 * @param [out] begin return -1 if not found or pos of equal
795 * @param [out] end return -1 if not found or pos of equal valid if begin1 >=0
796 * @param [in] equal false surch for a diff, true surch for equal
799 * Assumes whitespace is never leadbyte or trailbyte!
802 stringdiffs::ComputeByteDiff(const String & str1, const String & str2,
803 bool casitive, int xwhite,
804 int begin[2], int end[2], bool equal)
806 // Set to sane values
807 // Also this way can distinguish if we set begin[0] to -1 for no diff in line
808 begin[0] = end[0] = begin[1] = end[1] = 0;
810 int len1 = static_cast<int>(str1.length());
811 int len2 = static_cast<int>(str2.length());
813 const TCHAR *pbeg1 = str1.c_str();
814 const TCHAR *pbeg2 = str2.c_str();
816 ICUBreakIterator *pIterCharBegin1 = ICUBreakIterator::getCharacterBreakIterator(reinterpret_cast<const UChar *>(pbeg1), static_cast<int32_t>(len1));
817 ICUBreakIterator *pIterCharBegin2 = ICUBreakIterator::getCharacterBreakIterator<2>(reinterpret_cast<const UChar *>(pbeg2), static_cast<int32_t>(len2));
818 ICUBreakIterator *pIterCharEnd1 = ICUBreakIterator::getCharacterBreakIterator<3>(reinterpret_cast<const UChar *>(pbeg1), static_cast<int32_t>(len1));
819 ICUBreakIterator *pIterCharEnd2 = ICUBreakIterator::getCharacterBreakIterator<4>(reinterpret_cast<const UChar *>(pbeg2), static_cast<int32_t>(len2));
821 if (len1 == 0 || len2 == 0)
833 // cursors from front, which we advance to beginning of difference
834 const TCHAR *py1 = pbeg1;
835 const TCHAR *py2 = pbeg2;
837 // pen1,pen2 point to the last valid character (broken multibyte lead chars don't count)
838 const TCHAR *pen1 = pbeg1 + (len1 > 0 ? pIterCharEnd1->preceding(len1) : 0);
839 const TCHAR *pen2 = pbeg2 + (len2 > 0 ? pIterCharEnd2->preceding(len2) : 0);
840 size_t glyphlenz1 = pbeg1 + len1 - pen1;
841 size_t glyphlenz2 = pbeg2 + len2 - pen2;
843 if (xwhite != WHITESPACE_COMPARE_ALL)
845 // Ignore leading and trailing whitespace
846 // by advancing py1 and py2
847 // and retreating pen1 and pen2
848 while (py1 < pen1 && isSafeWhitespace(*py1))
849 py1 = pbeg1 + pIterCharBegin1->next();
850 while (py2 < pen2 && isSafeWhitespace(*py2))
851 py2 = pbeg2 + pIterCharBegin2->next();
852 if ((pen1 < pbeg1 + len1 - 1 || pen2 < pbeg2 + len2 -1)
853 && (!len1 || !len2 || pbeg1[len1] != pbeg2[len2]))
855 // mismatched broken multibyte ends
859 while (pen1 > py1 && isSafeWhitespace(*pen1))
860 pen1 = pbeg1 + pIterCharEnd1->previous();
861 while (pen2 > py2 && isSafeWhitespace(*pen2))
862 pen2 = pbeg2 + pIterCharEnd2->previous();
865 //check for excaption of empty string on one side
866 //In that case display all as a diff
867 if (!equal && (((py1 == pen1) && isSafeWhitespace(*pen1)) ||
868 ((py2 == pen2) && isSafeWhitespace(*pen2))))
876 // Advance over matching beginnings of lines
877 // Advance py1 & py2 from beginning until find difference or end
880 // Potential difference extends from py1 to pen1 and py2 to pen2
882 // Check if either side finished
883 if (py1 > pen1 && py2 > pen2)
885 begin[0] = end[0] = begin[1] = end[1] = -1;
888 if (py1 > pen1 || py2 > pen2)
893 // handle all the whitespace logic (due to WinMerge whitespace settings)
894 if (xwhite && py1 < pen1 && isSafeWhitespace(*py1))
896 if (xwhite==WHITESPACE_IGNORE_CHANGE && !isSafeWhitespace(*py2))
898 // py1 is white but py2 is not
899 // in WHITESPACE_IGNORE_CHANGE mode,
900 // this doesn't qualify as skippable whitespace
901 break; // done with forward search
903 // gobble up all whitespace in current area
904 AdvanceOverWhitespace(&py1, pen1); // will go beyond end
905 AdvanceOverWhitespace(&py2, pen2); // will go beyond end
909 if (xwhite && py2 < pen2 && isSafeWhitespace(*py2))
911 if (xwhite==WHITESPACE_IGNORE_CHANGE && !isSafeWhitespace(*py1))
913 // py2 is white but py1 is not
914 // in WHITESPACE_IGNORE_CHANGE mode,
915 // this doesn't qualify as skippable whitespace
916 break; // done with forward search
918 // gobble up all whitespace in current area
919 AdvanceOverWhitespace(&py1, pen1); // will go beyond end
920 AdvanceOverWhitespace(&py2, pen2); // will go beyond end
924 const TCHAR* py1next = pbeg1 + pIterCharBegin1->next();
925 const TCHAR* py2next = pbeg2 + pIterCharBegin2->next();
926 size_t glyphleny1 = py1next - py1;
927 size_t glyphleny2 = py2next - py2;
928 if (glyphleny1 != glyphleny2 || !matchchar(py1, py2, glyphleny1, casitive))
929 break; // done with forward search
934 // Potential difference extends from py1 to pen1 and py2 to pen2
936 // Store results of advance into return variables (begin[0] & begin[1])
937 // -1 in a begin variable means no visible diff area
938 begin[0] = static_cast<int>(py1 - pbeg1);
939 begin[1] = static_cast<int>(py2 - pbeg2);
941 const TCHAR *pz1 = pen1;
942 const TCHAR *pz2 = pen2;
944 // Retreat over matching ends of lines
945 // Retreat pz1 & pz2 from end until find difference or beginning
948 // Check if either side finished
949 if (pz1 < py1 && pz2 < py2)
951 begin[0] = end[0] = begin[1] = end[1] = -1;
954 if (pz1 < py1 || pz2 < py2)
959 // handle all the whitespace logic (due to WinMerge whitespace settings)
960 if (xwhite && pz1 > py1 && isSafeWhitespace(*pz1))
962 if (xwhite==1 && !isSafeWhitespace(*pz2))
963 break; // done with reverse search
964 // gobble up all whitespace in current area
965 while (pz1 > py1 && isSafeWhitespace(*pz1))
966 pz1 = pbeg1 + pIterCharEnd1->previous();
967 while (pz2 > py2 && isSafeWhitespace(*pz2))
968 pz2 = pbeg2 + pIterCharEnd2->previous();
972 if (xwhite && pz2 > py2 && isSafeWhitespace(*pz2))
975 break; // done with reverse search
976 while (pz2 > py2 && isSafeWhitespace(*pz2))
977 pz2 = pbeg2 + pIterCharEnd2->previous();
981 if (glyphlenz1 != glyphlenz2 || !matchchar(pz1, pz2, glyphlenz1, casitive))
982 break; // done with forward search
983 const TCHAR* pz1next = pz1;
984 const TCHAR* pz2next = pz2;
985 pz1 = (pz1 > pbeg1) ? pbeg1 + pIterCharEnd1->preceding(static_cast<int32_t>(pz1 - pbeg1)) : pz1 - 1;
986 pz2 = (pz2 > pbeg2) ? pbeg2 + pIterCharEnd2->preceding(static_cast<int32_t>(pz2 - pbeg2)) : pz2 - 1;
987 glyphlenz1 = pz1next - pz1;
988 glyphlenz2 = pz2next - pz2;
989 // Now do real character match
992 /* if (*pz1 == '\r' && *(pz1+1) == '\n')
997 else if (*pz2 == '\r' && *(pz2+1) == '\n')
1002 if (*(pbeg1-1) == '\r' && *pbeg1 == '\n')
1007 else if (*(pbeg2-1) == '\r' && *pbeg2 == '\n')
1013 // Store results of advance into return variables (end[0] & end[1])
1014 end[0] = static_cast<int>(pz1 - pbeg1 + glyphlenz1 - 1);
1015 end[1] = static_cast<int>(pz2 - pbeg2 + glyphlenz2 - 1);
1017 // Check if difference region was empty
1018 if (begin[0] == end[0] + 1 && begin[1] == end[1] + 1)
1019 begin[0] = -1; // no diff
1023 * @brief adjust the range of the specified word diffs down to byte(char) level.
1024 * @param str1, str2 [in] line to be compared
1025 * @param casitive [in] true for case-sensitive, false for case-insensitive
1026 * @param xwhite [in] This governs whether we handle whitespace specially
1027 * (see WHITESPACE_COMPARE_ALL, WHITESPACE_IGNORE_CHANGE, WHITESPACE_IGNORE_ALL)
1029 void stringdiffs::wordLevelToByteLevel()
1031 for (size_t i = 0; i < m_wdiffs.size(); i++)
1033 int begin[3], end[3];
1034 wdiff& diff = m_wdiffs[i];
1035 String str1_2, str2_2;
1036 str1_2 = m_str1.substr(diff.begin[0], diff.end[0] - diff.begin[0] + 1);
1037 str2_2 = m_str2.substr(diff.begin[1], diff.end[1] - diff.begin[1] + 1);
1038 ComputeByteDiff(str1_2, str2_2, m_case_sensitive, m_whitespace, begin, end, false);
1041 // no visible diff on side1
1042 diff.end[0] = diff.begin[0] - 1;
1046 diff.end[0] = diff.begin[0] + end[0];
1047 diff.begin[0] += begin[0];
1051 // no visible diff on side2
1052 diff.end[1] = diff.begin[1] - 1;
1056 diff.end[1] = diff.begin[1] + end[1];
1057 diff.begin[1] += begin[1];