2 * @file stringdiffs.cpp
4 * @brief Implementation file for ComputeWordDiffs (q.v.)
9 #include "stringdiffs.h"
13 #include "CompareOptions.h"
14 #include "stringdiffsi.h"
22 static bool Initialized;
23 static bool CustomChars;
24 static TCHAR *BreakChars;
25 static TCHAR BreakCharDefaults[] = _T(",.;:");
26 static int TimeoutMilliSeconds = 500;
28 static bool isSafeWhitespace(TCHAR ch);
29 static bool isWordBreak(int breakType, const TCHAR *str, int index, bool ignore_numbers);
33 BreakChars = &BreakCharDefaults[0];
48 void SetBreakChars(const TCHAR *breakChars)
56 BreakChars = _tcsdup(breakChars);
60 ComputeWordDiffs(const String& str1, const String& str2,
61 bool case_sensitive, bool eol_sensitive, int whitespace, bool ignore_numbers, int breakType, bool byte_level)
63 String strs[3] = {str1, str2, _T("")};
64 return ComputeWordDiffs(2, strs, case_sensitive, eol_sensitive, whitespace, ignore_numbers, breakType, byte_level);
69 Comp02Functor(const String *strs, bool case_sensitive) :
70 strs_(strs), case_sensitive_(case_sensitive)
73 bool operator()(const wdiff &wd3)
75 size_t wlen0 = wd3.end[0] - wd3.begin[0] + 1;
76 size_t wlen2 = wd3.end[2] - wd3.begin[2] + 1;
81 if (memcmp(&strs_[0][wd3.begin[0]], &strs_[2][wd3.begin[2]], wlen0 * sizeof(TCHAR)) != 0)
86 if (_tcsnicmp(&strs_[0][wd3.begin[0]], &strs_[2][wd3.begin[2]], wlen0) != 0)
96 * @brief Construct our worker object and tell it to do the work
99 ComputeWordDiffs(int nFiles, const String *str,
100 bool case_sensitive, bool eol_sensitive, int whitespace, bool ignore_numbers, int breakType, bool byte_level)
102 std::vector<wdiff> diffs;
105 stringdiffs sdiffs(str[0], str[1], case_sensitive, eol_sensitive, whitespace, ignore_numbers, breakType, &diffs);
106 // Hash all words in both lines and then compare them word by word
107 // storing differences into m_wdiffs
108 sdiffs.BuildWordDiffList();
111 sdiffs.wordLevelToByteLevel();
113 // Now copy m_wdiffs into caller-supplied m_pDiffs (coalescing adjacents if possible)
114 sdiffs.PopulateDiffs();
121 stringdiffs sdiffs(str[1], str[2], case_sensitive, eol_sensitive, whitespace, ignore_numbers, breakType, &diffs);
122 sdiffs.BuildWordDiffList();
124 sdiffs.wordLevelToByteLevel();
125 sdiffs.PopulateDiffs();
126 for (size_t i = 0; i < diffs.size(); i++)
128 wdiff& diff = diffs[i];
129 diff.begin[2] = diff.begin[1];
130 diff.begin[1] = diff.begin[0];
132 diff.end[2] = diff.end[1];
133 diff.end[1] = diff.end[0];
137 else if (str[1].empty())
139 stringdiffs sdiffs(str[0], str[2], case_sensitive, eol_sensitive, whitespace, ignore_numbers, breakType, &diffs);
140 sdiffs.BuildWordDiffList();
142 sdiffs.wordLevelToByteLevel();
143 sdiffs.PopulateDiffs();
144 for (size_t i = 0; i < diffs.size(); i++)
146 wdiff& diff = diffs[i];
147 diff.begin[2] = diff.begin[1];
148 //diff.begin[0] = diff.begin[0];
150 diff.end[2] = diff.end[1];
151 //diff.end[0] = diff.end[0];
155 else if (str[2].empty())
157 stringdiffs sdiffs(str[0], str[1], case_sensitive, eol_sensitive, whitespace, ignore_numbers, breakType, &diffs);
158 sdiffs.BuildWordDiffList();
160 sdiffs.wordLevelToByteLevel();
161 sdiffs.PopulateDiffs();
162 for (size_t i = 0; i < diffs.size(); i++)
164 wdiff& diff = diffs[i];
165 //diff.begin[1] = diff.begin[1];
166 //diff.begin[0] = diff.begin[0];
168 //diff.end[1] = diff.end[1];
169 //diff.end[0] = diff.end[0];
175 std::vector<wdiff> diffs10, diffs12;
176 stringdiffs sdiffs10(str[1], str[0], case_sensitive, eol_sensitive, 0, ignore_numbers, breakType, &diffs10);
177 stringdiffs sdiffs12(str[1], str[2], case_sensitive, eol_sensitive, 0, ignore_numbers, breakType, &diffs12);
178 // Hash all words in both lines and then compare them word by word
179 // storing differences into m_wdiffs
180 sdiffs10.BuildWordDiffList();
181 sdiffs12.BuildWordDiffList();
184 sdiffs10.wordLevelToByteLevel();
185 sdiffs12.wordLevelToByteLevel();
187 // Now copy m_wdiffs into caller-supplied m_pDiffs (coalescing adjacents if possible)
188 sdiffs10.PopulateDiffs();
189 sdiffs12.PopulateDiffs();
191 Make3wayDiff(diffs, diffs10, diffs12,
192 Comp02Functor(str, case_sensitive), false);
198 int Compare(const String& str1, const String& str2,
199 bool case_sensitive, bool eol_sensitive, int whitespace, bool ignore_numbers)
201 if (case_sensitive && eol_sensitive && whitespace == WHITESPACE_COMPARE_ALL && !ignore_numbers)
202 return str2.compare(str1);
203 String s1 = str1, s2 = str2;
206 s1 = strutils::makelower(s1);
207 s2 = strutils::makelower(s2);
209 if (whitespace == WHITESPACE_IGNORE_CHANGE)
211 strutils::replace_chars(s1, _T(" \t"), _T(" "));
212 strutils::replace_chars(s2, _T(" \t"), _T(" "));
214 else if (whitespace == WHITESPACE_IGNORE_ALL)
216 strutils::replace_chars(s1, _T(" \t"), _T(""));
217 strutils::replace_chars(s2, _T(" \t"), _T(""));
221 strutils::replace_chars(s1, _T("\r\n"), _T("\n"));
222 strutils::replace_chars(s2, _T("\r\n"), _T("\n"));
226 strutils::replace_chars(s1, _T("0123456789"), _T(""));
227 strutils::replace_chars(s2, _T("0123456789"), _T(""));
229 return s2.compare(s1);
233 * @brief stringdiffs constructor simply loads all members from arguments
235 stringdiffs::stringdiffs(const String & str1, const String & str2,
236 bool case_sensitive, bool eol_sensitive, int whitespace, bool ignore_numbers, int breakType,
237 std::vector<wdiff> * pDiffs)
240 , m_case_sensitive(case_sensitive)
241 , m_eol_sensitive(eol_sensitive)
242 , m_whitespace(whitespace)
243 , m_ignore_numbers(ignore_numbers)
244 , m_breakType(breakType)
246 , m_matchblock(true) // Change to false to get word to word compare
252 * The destructor frees all diffs added to the vectors.
254 stringdiffs::~stringdiffs() = default;
256 #ifdef STRINGDIFF_LOGGING
258 stringdiffs::debugoutput()
260 for (size_t i = 0; i < m_wdiffs.size(); i++)
265 int s1 = m_wdiffs[i].begin[0];
266 int e1 = m_wdiffs[i].end[0];
267 int s2 = m_wdiffs[i].begin[1];
268 int e2 = m_wdiffs[i].end[1];
270 int len1 = e1 - s1 + 1;
271 int len2 = e2 - s2 + 1;
274 str1 = m_str1.substr(s1 ,e1 - s1 + 1);
276 str1 = m_str1.substr(s1, 50);
279 str2 = m_str2.substr(s2, e2- s2 + 1);
281 str2 = m_str2.substr(s2, 50);
283 wsprintf(buf, _T("left= %s, %d,%d,\nright= %s, %d,%d \n"),
284 str1.c_str(), s1, e1, str2.c_str(), s2, e2);
285 OutputDebugString(buf);
291 stringdiffs::BuildWordDiffList_DP()
293 std::vector<char> edscript;
295 //if (dp(edscript) <= 0)
297 if (onp(edscript) < 0)
301 for (size_t k = 0; k < edscript.size(); k++)
304 if (edscript[k] == '-')
306 if (m_whitespace == WHITESPACE_IGNORE_ALL)
308 if (IsSpace(m_words1[i]))
314 if (m_ignore_numbers && IsNumber(m_words1[i]))
320 s1 = m_words1[i].start;
321 e1 = m_words1[i].end;
322 s2 = m_words2[j-1].end+1;
324 m_wdiffs.emplace_back(s1, e1, s2, e2);
327 else if (edscript[k] == '+')
329 if (m_whitespace == WHITESPACE_IGNORE_ALL)
331 if (IsSpace(m_words2[j]))
338 if (m_ignore_numbers && IsNumber(m_words2[j]))
344 s1 = m_words1[i-1].end+1;
346 s2 = m_words2[j].start;
347 e2 = m_words2[j].end;
348 m_wdiffs.emplace_back(s1, e1, s2, e2);
351 else if (edscript[k] == '!')
353 if (m_whitespace == WHITESPACE_IGNORE_CHANGE || m_whitespace == WHITESPACE_IGNORE_ALL)
355 if (IsSpace(m_words1[i]) && IsSpace(m_words2[j]))
361 if (m_ignore_numbers && IsNumber(m_words1[i]) && IsNumber(m_words2[j]))
367 s1 = m_words1[i].start;
368 e1 = m_words1[i].end;
369 s2 = m_words2[j].start;
370 e2 = m_words2[j].end ;
371 m_wdiffs.emplace_back(s1, e1, s2, e2);
379 #ifdef STRINGDIFF_LOGGING
386 * @brief Add all different elements between lines to the wdiff list
389 stringdiffs::BuildWordDiffList()
391 m_words1 = BuildWordsArray(m_str1);
392 m_words2 = BuildWordsArray(m_str2);
394 bool succeeded = false;
396 if (m_words1.size() < 20480 && m_words2.size() < 20480)
398 if (m_words1.size() < 2048 && m_words2.size() < 2048)
401 succeeded = BuildWordDiffList_DP();
405 int s1 = m_words1[0].start;
406 int e1 = m_words1[m_words1.size() - 1].end;
407 int s2 = m_words2[0].start;
408 int e2 = m_words2[m_words2.size() - 1].end;
409 m_wdiffs.emplace_back(s1, e1, s2, e2);
416 * @brief Break line into constituent words
418 std::vector<stringdiffs::word>
419 stringdiffs::BuildWordsArray(const String & str)
421 std::vector<word> words;
422 int i = 0, begin = 0;
423 ICUBreakIterator *pIterChar = ICUBreakIterator::getCharacterBreakIterator(reinterpret_cast<const UChar *>(str.c_str()), static_cast<int32_t>(str.length()));
425 size_t sLen = str.length();
426 assert(sLen < INT_MAX);
427 int iLen = static_cast<int>(sLen);
430 words.push_back(word(0, -1, 0, 0));
432 // state when we are looking for next word
434 if (isSafeWhitespace(str[i]))
436 i = pIterChar->next();
441 // just finished a word
442 // e is first word character (space or at end)
445 words.push_back(word(begin, e, dlspace, Hash(str, begin, e, 0)));
452 // state when we are inside a word
455 if (i == iLen || ((atspace = isSafeWhitespace(str[i])) != 0) || isWordBreak(m_breakType, str.c_str(), i, m_ignore_numbers))
459 // just finished a word
460 // e is first non-word character (space or at end)
463 words.push_back(word(begin, e, dlword, Hash(str, begin, e, 0)));
476 // start a new word because we hit a non-whitespace word break (eg, a comma)
477 // but, we have to put each word break character into its own word
478 int break_type = dlbreak;
479 if (m_ignore_numbers && _istdigit(str[i]))
481 break_type = dlnumber;
483 int inext = pIterChar->next();
484 words.push_back(word(i, inext - 1, break_type, Hash(str, i, inext - 1, 0)));
490 i = pIterChar->next();
491 goto inword; // safe even if we're at the end or no longer in a word
495 * @brief Populate m_pDiffs from m_wdiffs (combining adjacent diffs)
497 * Doing the combining of adjacent diffs here keeps some complexity out of BuildWordsArray.
500 stringdiffs::PopulateDiffs()
502 auto IsEOLorEmpty = [](const String& text, size_t begin, size_t end) -> bool {
503 if (end - begin + 1 > 2)
505 String str = text.substr(begin, end - begin + 1);
506 return (str.empty() || str == _T("\r\n") || str == _T("\n") || str == _T("\r"));
509 for (int i=0; i< (int)m_wdiffs.size(); ++i)
512 // combine it with next ?
513 if (i+1< (int)m_wdiffs.size())
515 if (m_wdiffs[i].end[0] + 1 == m_wdiffs[i+1].begin[0]
516 && m_wdiffs[i].end[1] + 1 == m_wdiffs[i+1].begin[1])
518 // diff[i] and diff[i+1] are contiguous
519 // so combine them into diff[i+1] and ignore diff[i]
520 m_wdiffs[i+1].begin[0] = m_wdiffs[i].begin[0];
521 m_wdiffs[i+1].begin[1] = m_wdiffs[i].begin[1];
527 if (!m_eol_sensitive &&
528 IsEOLorEmpty(m_str1, m_wdiffs[i].begin[0], m_wdiffs[i].end[0]) &&
529 IsEOLorEmpty(m_str2, m_wdiffs[i].begin[1], m_wdiffs[i].end[1]))
534 // Should never have a pair where both are missing
535 assert(m_wdiffs[i].begin[0]>=0 || m_wdiffs[i].begin[1]>=0);
537 // Store the diff[i] in the caller list (m_pDiffs)
538 m_pDiffs->emplace_back(m_wdiffs[i]);
545 /* Rotate a value n bits to the left. */
546 #define UINT_BIT (sizeof (unsigned) * CHAR_BIT)
547 #define ROL(v, n) ((v) << (n) | (v) >> (UINT_BIT - (n)))
548 /* Given a hash value and a new character, return a new hash value. */
549 #define HASH(h, c) ((c) + ROL (h, 7))
552 stringdiffs::Hash(const String & str, int begin, int end, unsigned h) const
554 for (int i = begin; i <= end; ++i)
556 TCHAR ch = static_cast<unsigned>(str[i]);
557 if (m_case_sensitive)
563 ch = static_cast<unsigned>(_totlower(ch));
572 * @brief Compare two words (by reference to original strings)
575 stringdiffs::AreWordsSame(const word & word1, const word & word2) const
577 if (this->m_whitespace != WHITESPACE_COMPARE_ALL)
579 if (IsSpace(word1) && IsSpace(word2))
582 if (m_ignore_numbers)
584 auto a = m_str1[word1.start];
585 auto b = m_str2[word2.start];
586 if (_istdigit(a) && _istdigit(b))
591 if (word1.hash != word2.hash)
593 if (word1.length() != word2.length())
595 for (int i=0; i<word1.length(); ++i)
597 if (!caseMatch(m_str1[word1.start+i], m_str2[word2.start+i]))
604 * @brief Return true if characters match
607 stringdiffs::caseMatch(TCHAR ch1, TCHAR ch2) const
609 if (m_case_sensitive)
612 return _totlower(ch1)==_totlower(ch2);
616 * @ brief An O(NP) Sequence Comparison Algorithm. Sun Wu, Udi Manber, Gene Myers
619 stringdiffs::onp(std::vector<char> &edscript)
621 auto start = std::chrono::system_clock::now();
623 int M = static_cast<int>(m_words1.size() - 1);
624 int N = static_cast<int>(m_words2.size() - 1);
625 bool exchanged = false;
628 M = static_cast<int>(m_words2.size() - 1);
629 N = static_cast<int>(m_words1.size() - 1);
632 int *fp = (new int[(M+1) + 1 + (N+1)]) + (M+1);
633 struct EditScriptElem { int op; int neq; int pk; int pi; };
634 std::vector<EditScriptElem> *es = (new std::vector<EditScriptElem>[(M+1) + 1 + (N+1)]) + (M+1);
637 auto addEditScriptElem = [&es, &fp](int k) {
639 if (fp[k - 1] + 1 > fp[k + 1])
642 ese.neq = fp[k] - (fp[k - 1] + 1);
648 ese.neq = fp[k] - fp[k + 1];
651 ese.pi = static_cast<int>(es[ese.pk].size() - 1);
652 es[k].push_back(ese);
655 const int COUNTMAX = 100000;
658 for (k = -(M+1); k <= (N+1); k++)
664 for (k = -p; k <= DELTA-1; k++)
666 fp[k] = snake(k, std::max(fp[k-1] + 1, fp[k+1]), M, N, exchanged);
667 addEditScriptElem(k);
670 for (k = DELTA + p; k >= DELTA+1; k--)
672 fp[k] = snake(k, std::max(fp[k-1] + 1, fp[k+1]), M, N, exchanged);
673 addEditScriptElem(k);
677 fp[k] = snake(k, std::max(fp[k-1] + 1, fp[k+1]), M, N, exchanged);
678 addEditScriptElem(k);
681 if (count > COUNTMAX)
684 auto end = std::chrono::system_clock::now();
685 auto msec = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
686 if (msec > TimeoutMilliSeconds)
688 delete [] (es - (M+1));
689 delete [] (fp - (M+1));
693 } while (fp[k] != N);
697 std::vector<char> ses;
699 for (k = DELTA, i = static_cast<int>(es[DELTA].size() - 1); i >= 0;)
701 EditScriptElem& esi = es[k][i];
702 for (int j = 0; j < esi.neq; ++j)
704 ses.push_back(static_cast<char>(esi.op));
708 std::reverse(ses.begin(), ses.end());
711 for (i = 1; i < static_cast<int>(ses.size()); i++)
716 if (static_cast<size_t>(i + 1) < ses.size() && ses[i + 1] == '-')
718 edscript.push_back('!');
724 edscript.push_back(exchanged ? '-' : '+');
729 if (static_cast<size_t>(i + 1) < ses.size() && ses[i + 1] == '+')
731 edscript.push_back('!');
737 edscript.push_back(exchanged ? '+' : '-');
742 edscript.push_back('=');
746 delete [] (es - (M+1));
747 delete [] (fp - (M+1));
753 stringdiffs::snake(int k, int y, int M, int N, bool exchanged)
758 while (x < M && y < N && AreWordsSame(m_words1[y + 1], m_words2[x + 1])) {
764 while (x < M && y < N && AreWordsSame(m_words1[x + 1], m_words2[y + 1])) {
772 * @brief Return true if chars match
774 * Caller must not call this for lead bytes
777 matchchar(const TCHAR *ch1, const TCHAR *ch2, size_t len, bool casitive)
780 return memcmp(ch1, ch2, len * sizeof(TCHAR)) == 0;
781 for (size_t i = 0; i < len; ++i)
783 if (_totlower(ch1[i]) != _totlower(ch2[i]))
790 /** Does character introduce a multicharacter character? */
791 static inline bool IsLeadByte(TCHAR ch)
796 return _getmbcp() && IsDBCSLeadByte(ch);
801 * @brief Is it whitespace (excludes all lead & trail bytes)?
804 isSafeWhitespace(TCHAR ch)
806 return _istspace((unsigned)ch) && !IsLeadByte(ch);
810 * @brief Is it a non-whitespace wordbreak character (ie, punctuation)?
813 isWordBreak(int breakType, const TCHAR *str, int index, bool ignore_numbers)
815 TCHAR ch = str[index];
816 if (ignore_numbers && _istdigit(ch))
818 // breakType==1 means break also on punctuation
819 if ((ch & 0xff00) == 0)
821 // TCHAR nextCh = str[index + 1];
822 // breakType==0 means whitespace only
825 return _tcschr(BreakChars, ch) != nullptr;
830 // ch==0xff0c/* Fullwidth Full Stop */ ||
831 // ch==0xff0e/* Fullwidth Comma */ ||
832 // ch==0xff1b/* Fullwidth Semicolon */ ||
833 // ch==0xff1a/* Fullwidth Colon */ ||
834 // ch==0x3002/* Ideographic Full Stop */ ||
835 // ch==0x3001/* Ideographic Comma */
838 // WORD wCharType, wCharTypeNext;
839 // GetStringTypeW(CT_CTYPE3, &ch, 1, &wCharType);
840 // TCHAR nextCh = str[index + 1];
841 // GetStringTypeW(CT_CTYPE3, &nextCh, 1, &wCharTypeNext);
842 // return (wCharType != wCharTypeNext);
845 GetStringTypeW(CT_CTYPE1, &ch, 1, &wCharType);
846 if ((wCharType & (C1_UPPER | C1_LOWER | C1_DIGIT)) != 0)
854 * @brief advance current pointer over whitespace, until not whitespace or beyond end
855 * @param pcurrent [in,out] current location (to be advanced)
856 * @param end [in] last valid position (only go one beyond this)
859 AdvanceOverWhitespace(const TCHAR **pcurrent, const TCHAR *end)
861 // advance over whitespace
862 while (*pcurrent <= end && isSafeWhitespace(**pcurrent))
863 ++(*pcurrent); // DBCS safe because of isSafeWhitespace above
867 * @brief Compute begin1,begin2,end1,end2 to display byte difference between strings str1 & str2
868 * @param casitive [in] true for case-sensitive, false for case-insensitive
869 * @param xwhite [in] This governs whether we handle whitespace specially (see WHITESPACE_COMPARE_ALL, WHITESPACE_IGNORE_CHANGE, WHITESPACE_IGNORE_ALL)
870 * @param [out] begin return -1 if not found or pos of equal
871 * @param [out] end return -1 if not found or pos of equal valid if begin1 >=0
872 * @param [in] equal false surch for a diff, true surch for equal
875 * Assumes whitespace is never leadbyte or trailbyte!
878 stringdiffs::ComputeByteDiff(const String & str1, const String & str2,
879 bool casitive, int xwhite,
880 int begin[2], int end[2], bool equal)
882 // Set to sane values
883 // Also this way can distinguish if we set begin[0] to -1 for no diff in line
884 begin[0] = end[0] = begin[1] = end[1] = 0;
886 int len1 = static_cast<int>(str1.length());
887 int len2 = static_cast<int>(str2.length());
889 const TCHAR *pbeg1 = str1.c_str();
890 const TCHAR *pbeg2 = str2.c_str();
892 ICUBreakIterator *pIterCharBegin1 = ICUBreakIterator::getCharacterBreakIterator(reinterpret_cast<const UChar *>(pbeg1), static_cast<int32_t>(len1));
893 ICUBreakIterator *pIterCharBegin2 = ICUBreakIterator::getCharacterBreakIterator<2>(reinterpret_cast<const UChar *>(pbeg2), static_cast<int32_t>(len2));
894 ICUBreakIterator *pIterCharEnd1 = ICUBreakIterator::getCharacterBreakIterator<3>(reinterpret_cast<const UChar *>(pbeg1), static_cast<int32_t>(len1));
895 ICUBreakIterator *pIterCharEnd2 = ICUBreakIterator::getCharacterBreakIterator<4>(reinterpret_cast<const UChar *>(pbeg2), static_cast<int32_t>(len2));
897 if (len1 == 0 || len2 == 0)
909 // cursors from front, which we advance to beginning of difference
910 const TCHAR *py1 = pbeg1;
911 const TCHAR *py2 = pbeg2;
913 // pen1,pen2 point to the last valid character (broken multibyte lead chars don't count)
914 const TCHAR *pen1 = pbeg1 + (len1 > 0 ? pIterCharEnd1->preceding(len1) : 0);
915 const TCHAR *pen2 = pbeg2 + (len2 > 0 ? pIterCharEnd2->preceding(len2) : 0);
916 size_t glyphlenz1 = pbeg1 + len1 - pen1;
917 size_t glyphlenz2 = pbeg2 + len2 - pen2;
919 if (xwhite != WHITESPACE_COMPARE_ALL)
921 // Ignore leading and trailing whitespace
922 // by advancing py1 and py2
923 // and retreating pen1 and pen2
924 while (py1 < pen1 && isSafeWhitespace(*py1))
925 py1 = pbeg1 + pIterCharBegin1->next();
926 while (py2 < pen2 && isSafeWhitespace(*py2))
927 py2 = pbeg2 + pIterCharBegin2->next();
928 if ((pen1 < pbeg1 + len1 - 1 || pen2 < pbeg2 + len2 -1)
929 && (pbeg1[len1] != pbeg2[len2]))
931 // mismatched broken multibyte ends
935 while (pen1 > py1 && isSafeWhitespace(*pen1))
936 pen1 = pbeg1 + pIterCharEnd1->previous();
937 while (pen2 > py2 && isSafeWhitespace(*pen2))
938 pen2 = pbeg2 + pIterCharEnd2->previous();
941 //check for excaption of empty string on one side
942 //In that case display all as a diff
943 if (!equal && (((py1 == pen1) && isSafeWhitespace(*pen1)) ||
944 ((py2 == pen2) && isSafeWhitespace(*pen2))))
952 // Advance over matching beginnings of lines
953 // Advance py1 & py2 from beginning until find difference or end
956 // Potential difference extends from py1 to pen1 and py2 to pen2
958 // Check if either side finished
959 if (py1 > pen1 && py2 > pen2)
961 begin[0] = end[0] = begin[1] = end[1] = -1;
964 if (py1 > pen1 || py2 > pen2)
969 // handle all the whitespace logic (due to WinMerge whitespace settings)
970 if (xwhite && py1 < pen1 && isSafeWhitespace(*py1))
972 if (xwhite==WHITESPACE_IGNORE_CHANGE && !isSafeWhitespace(*py2))
974 // py1 is white but py2 is not
975 // in WHITESPACE_IGNORE_CHANGE mode,
976 // this doesn't qualify as skippable whitespace
977 break; // done with forward search
979 // gobble up all whitespace in current area
980 AdvanceOverWhitespace(&py1, pen1); // will go beyond end
981 AdvanceOverWhitespace(&py2, pen2); // will go beyond end
985 if (xwhite && py2 < pen2 && isSafeWhitespace(*py2))
987 if (xwhite==WHITESPACE_IGNORE_CHANGE && !isSafeWhitespace(*py1))
989 // py2 is white but py1 is not
990 // in WHITESPACE_IGNORE_CHANGE mode,
991 // this doesn't qualify as skippable whitespace
992 break; // done with forward search
994 // gobble up all whitespace in current area
995 AdvanceOverWhitespace(&py1, pen1); // will go beyond end
996 AdvanceOverWhitespace(&py2, pen2); // will go beyond end
1000 const TCHAR* py1next = pbeg1 + pIterCharBegin1->next();
1001 const TCHAR* py2next = pbeg2 + pIterCharBegin2->next();
1002 size_t glyphleny1 = py1next - py1;
1003 size_t glyphleny2 = py2next - py2;
1004 if (glyphleny1 != glyphleny2 || !matchchar(py1, py2, glyphleny1, casitive))
1005 break; // done with forward search
1010 // Potential difference extends from py1 to pen1 and py2 to pen2
1012 // Store results of advance into return variables (begin[0] & begin[1])
1013 // -1 in a begin variable means no visible diff area
1014 begin[0] = static_cast<int>(py1 - pbeg1);
1015 begin[1] = static_cast<int>(py2 - pbeg2);
1017 const TCHAR *pz1 = pen1;
1018 const TCHAR *pz2 = pen2;
1020 // Retreat over matching ends of lines
1021 // Retreat pz1 & pz2 from end until find difference or beginning
1024 // Check if either side finished
1025 if (pz1 < py1 && pz2 < py2)
1027 begin[0] = end[0] = begin[1] = end[1] = -1;
1030 if (pz1 < py1 || pz2 < py2)
1035 // handle all the whitespace logic (due to WinMerge whitespace settings)
1036 if (xwhite && pz1 > py1 && isSafeWhitespace(*pz1))
1038 if (xwhite==1 && !isSafeWhitespace(*pz2))
1039 break; // done with reverse search
1040 // gobble up all whitespace in current area
1041 while (pz1 > py1 && isSafeWhitespace(*pz1))
1042 pz1 = pbeg1 + pIterCharEnd1->previous();
1043 while (pz2 > py2 && isSafeWhitespace(*pz2))
1044 pz2 = pbeg2 + pIterCharEnd2->previous();
1048 if (xwhite && pz2 > py2 && isSafeWhitespace(*pz2))
1051 break; // done with reverse search
1052 while (pz2 > py2 && isSafeWhitespace(*pz2))
1053 pz2 = pbeg2 + pIterCharEnd2->previous();
1057 if (glyphlenz1 != glyphlenz2 || !matchchar(pz1, pz2, glyphlenz1, casitive))
1058 break; // done with forward search
1059 const TCHAR* pz1next = pz1;
1060 const TCHAR* pz2next = pz2;
1061 pz1 = (pz1 > pbeg1) ? pbeg1 + pIterCharEnd1->preceding(static_cast<int32_t>(pz1 - pbeg1)) : pz1 - 1;
1062 pz2 = (pz2 > pbeg2) ? pbeg2 + pIterCharEnd2->preceding(static_cast<int32_t>(pz2 - pbeg2)) : pz2 - 1;
1063 glyphlenz1 = pz1next - pz1;
1064 glyphlenz2 = pz2next - pz2;
1065 // Now do real character match
1068 /* if (*pz1 == '\r' && *(pz1+1) == '\n')
1073 else if (*pz2 == '\r' && *(pz2+1) == '\n')
1078 if (*(pbeg1-1) == '\r' && *pbeg1 == '\n')
1083 else if (*(pbeg2-1) == '\r' && *pbeg2 == '\n')
1089 // Store results of advance into return variables (end[0] & end[1])
1090 end[0] = static_cast<int>(pz1 - pbeg1 + glyphlenz1 - 1);
1091 end[1] = static_cast<int>(pz2 - pbeg2 + glyphlenz2 - 1);
1093 // Check if difference region was empty
1094 if (begin[0] == end[0] + 1 && begin[1] == end[1] + 1)
1095 begin[0] = -1; // no diff
1099 * @brief adjust the range of the specified word diffs down to byte(char) level.
1100 * @param str1, str2 [in] line to be compared
1101 * @param casitive [in] true for case-sensitive, false for case-insensitive
1102 * @param xwhite [in] This governs whether we handle whitespace specially
1103 * (see WHITESPACE_COMPARE_ALL, WHITESPACE_IGNORE_CHANGE, WHITESPACE_IGNORE_ALL)
1105 void stringdiffs::wordLevelToByteLevel()
1107 for (size_t i = 0; i < m_wdiffs.size(); i++)
1109 int begin[3], end[3];
1110 wdiff& diff = m_wdiffs[i];
1111 String str1_2, str2_2;
1112 str1_2 = m_str1.substr(diff.begin[0], diff.end[0] - diff.begin[0] + 1);
1113 str2_2 = m_str2.substr(diff.begin[1], diff.end[1] - diff.begin[1] + 1);
1114 ComputeByteDiff(str1_2, str2_2, m_case_sensitive, m_whitespace, begin, end, false);
1117 // no visible diff on side1
1118 diff.end[0] = diff.begin[0] - 1;
1122 diff.end[0] = diff.begin[0] + end[0];
1123 diff.begin[0] += begin[0];
1127 // no visible diff on side2
1128 diff.end[1] = diff.begin[1] - 1;
1132 diff.end[1] = diff.begin[1] + end[1];
1133 diff.begin[1] += begin[1];