2 * @file MergeDocDiffSync.cpp
4 * @brief Code to layout diff blocks, to find matching lines, and insert ghost lines
7 // RCS ID line follows -- this is updated by CVS
16 #include "MergeLineFlags.h"
21 static char THIS_FILE[] = __FILE__;
27 * @brief Divide diff blocks to match lines in diff blocks.
29 void CMergeDoc::AdjustDiffBlocks()
32 int nDiffCount = m_diffList.GetSize();
34 // Go through and do our best to line up lines within each diff block
35 // between left side and right side
38 for (nDiff = 0; nDiff < nDiffCount; nDiff++)
40 const DIFFRANGE & diffrange = *m_diffList.DiffRangeAt(nDiff);
41 // size map correctly (it will hold one entry for each left-side line
42 int nlines0 = diffrange.end0 - diffrange.begin0 + 1;
43 int nlines1 = diffrange.end1 - diffrange.begin1 + 1;
44 if (nlines0>0 && nlines1>0)
46 // Call worker to do all lines in block
47 int lo0 = 0, hi0 = nlines0-1;
48 int lo1 = 0, hi1 = nlines1-1;
50 diffmap.InitDiffMap(nlines0);
51 AdjustDiffBlock(diffmap, diffrange, lo0, hi0, lo1, hi1);
54 std::vector<int>::const_iterator iter = diffmap.m_map.begin();
56 while (iter != diffmap.m_map.end())
59 wsprintf(buf, _T("begin0=%d begin1=%d diffmap[%d]=%d\n"),
60 diffrange.begin0, diffrange.begin1, i, iter);
61 OutputDebugString(buf);
69 int line0, line1, lineend0;
70 for (line0 = 0, line1 = 0; line0 < nlines0;)
72 const int map_line0 = diffmap.m_map[line0];
73 if (map_line0 == DiffMap::GHOST_MAP_ENTRY ||
74 map_line0 == DiffMap::BAD_MAP_ENTRY)
76 for (lineend0 = line0; lineend0 < nlines0; lineend0++)
78 if (map_line0 != DiffMap::GHOST_MAP_ENTRY &&
79 map_line0 != DiffMap::BAD_MAP_ENTRY)
82 dr.begin0 = diffrange.begin0 + line0;
83 dr.begin1 = diffrange.begin1 + line1;
84 dr.end0 = diffrange.begin0 + lineend0 - 1;
85 dr.end1 = dr.begin1 - 1;
86 dr.blank0 = dr.blank1 = -1;
88 newDiffList.AddDiff(dr);
93 if (map_line0 > line1)
95 dr.begin0 = diffrange.begin0 + line0;
96 dr.begin1 = diffrange.begin1 + line1;
97 dr.end0 = dr.begin0 - 1;
98 dr.end1 = diffrange.begin1 + map_line0 - 1;
99 dr.blank0 = dr.blank1 = -1;
101 newDiffList.AddDiff(dr);
105 for (lineend0 = line0 + 1; lineend0 < nlines0; lineend0++)
107 if (map_line0 != diffmap.m_map[lineend0 - 1] + 1)
110 dr.begin0 = diffrange.begin0 + line0;
111 dr.begin1 = diffrange.begin1 + line1;
112 dr.end0 = diffrange.begin0 + lineend0 - 1;
113 dr.end1 = diffrange.begin1 + diffmap.m_map[lineend0 - 1];
114 dr.blank0 = dr.blank1 = -1;
115 dr.op = diffrange.op == OP_TRIVIAL ? OP_TRIVIAL : OP_DIFF;
116 newDiffList.AddDiff(dr);
118 line1 = diffmap.m_map[lineend0 - 1] + 1;
123 dr.begin0 = diffrange.begin0 + line0;
124 dr.begin1 = diffrange.begin1 + line1;
125 dr.end0 = dr.begin0 - 1;
126 dr.end1 = diffrange.begin1 + hi1;
127 dr.blank0 = dr.blank1 = -1;
128 dr.op = diffrange.op == OP_TRIVIAL ? OP_TRIVIAL : OP_DIFF;
129 newDiffList.AddDiff(dr);
134 newDiffList.AddDiff(diffrange);
138 // recreate m_diffList
140 nDiffCount = newDiffList.GetSize();
141 for (nDiff = 0; nDiff < nDiffCount; nDiff++)
145 DIFFRANGE di = *newDiffList.DiffRangeAt(nDiff);
146 wsprintf(buf, _T("%d: begin0=%d end0=%d begin1=%d end1=%d\n"), nDiff,
147 di.begin0, di.end0, di.begin1, di.end1);
148 OutputDebugString(buf);
150 m_diffList.AddDiff(*newDiffList.DiffRangeAt(nDiff));
155 * @brief Return cost of making strings equal
157 * The cost of making them equal is the measure of their dissimilarity
158 * which is their Levenshtein distance.
160 int CMergeDoc::GetMatchCost(const String &sLine0, const String &sLine1)
162 DIFFOPTIONS diffOptions = {0};
163 m_diffWrapper.GetOptions(&diffOptions);
165 // Options that affect comparison
166 bool casitive = !diffOptions.bIgnoreCase;
167 int xwhite = diffOptions.nIgnoreWhitespace;
168 int breakType = GetBreakType(); // whitespace only or include punctuation
169 bool byteColoring = GetByteColoringOption();
171 vector<wdiff*> worddiffs;
172 sd_ComputeWordDiffs(sLine0, sLine1, casitive, xwhite, breakType, byteColoring, &worddiffs);
174 int nDiffLenSum = 0, nDiffLen0, nDiffLen1;
176 int nCount = worddiffs.size();
177 for (i = 0; i < nCount; i++)
179 nDiffLen0 = worddiffs[i]->end[0] - worddiffs[i]->start[0] + 1;
180 nDiffLen1 = worddiffs[i]->end[1] - worddiffs[i]->start[1] + 1;
181 nDiffLenSum += (nDiffLen0 > nDiffLen1) ? nDiffLen0 : nDiffLen1;
184 while (!worddiffs.empty())
186 delete worddiffs.back();
187 worddiffs.pop_back();
194 * @brief Map lines from left to right for specified range in diff block, as best we can
195 * Map left side range [lo0;hi0] to right side range [lo1;hi1]
196 * (ranges include ends)
199 * Find best match, and use that to split problem into two parts (above & below match)
200 * and call ourselves recursively to solve each smaller problem
202 void CMergeDoc::AdjustDiffBlock(DiffMap & diffMap, const DIFFRANGE & diffrange, int lo0, int hi0, int lo1, int hi1)
204 // Map & lo & hi numbers are all relative to this block
205 // We need to know offsets to find actual line strings from buffer
206 int offset0 = diffrange.begin0;
207 int offset1 = diffrange.begin1;
209 // # of lines on left and right
210 int lines0 = hi0 - lo0 + 1;
211 int lines1 = hi1 - lo1 + 1;
214 ASSERT(lines0 > 0 && lines1 > 0);
216 // shortcut special case
217 if (lines0 == 1 && lines1 == 1)
219 diffMap.m_map[lo0] = hi1;
223 // Bail out if range is large
224 if (lines0 > 15 || lines1 > 15)
226 // Do simple 1:1 mapping
227 // but try to stay within original block
228 // Cannot be outside both sides, so just test for outside one side
232 // right side is off the bottom
233 // so put it as close to bottom as possible
234 tlo = hi1 - (hi0-lo0);
239 // put it as close to top as possible
241 thi = lo1 + (hi0-lo0);
244 for (int w=0; w<lines0; ++w)
247 diffMap.m_map[w] = w + tlo;
249 diffMap.m_map[w] = DiffMap::GHOST_MAP_ENTRY;
255 int ibest=-1, isavings=0x7fffffff, itarget=-1;
256 CString sLine0, sLine1;
257 for (int i=lo0; i<=hi0; ++i)
259 m_ptBuf[0]->GetLine(offset0 + i, sLine0);
260 for (int j=lo1; j<=hi1; ++j)
262 m_ptBuf[1]->GetLine(offset1 + j, sLine1);
263 int savings = GetMatchCost((LPCTSTR)sLine0, (LPCTSTR)sLine1);
265 // Need to penalize assignments that push us outside the box
266 // further than is required
267 if (savings < isavings)
275 // I think we have to find at least one
276 // because cost (Levenshtein distance) is less than or equal to maxlen
277 // so cost is always nonnegative
280 ASSERT(diffMap.m_map[ibest] == DiffMap::BAD_MAP_ENTRY);
282 diffMap.m_map[ibest] = itarget;
284 // Half of the remaining problem is below our match
289 AdjustDiffBlock(diffMap, diffrange, lo0, ibest-1, lo1, itarget-1);
293 // No target space for the ones below our match
294 for (int x = lo0; x < ibest; ++x)
296 diffMap.m_map[x] = DiffMap::GHOST_MAP_ENTRY;
301 // Half of the remaining problem is above our match
306 AdjustDiffBlock(diffMap, diffrange, ibest + 1, hi0,
311 // No target space for the ones above our match
312 for (int x = ibest + 1; x <= hi0; ++x)
314 diffMap.m_map[x] = DiffMap::GHOST_MAP_ENTRY;