OSDN Git Service

Convert CMergeDoc::GetMatchCost() to use String:s as parameters.
[winmerge-jp/winmerge-jp.git] / Src / MergeDocDiffSync.cpp
1 /** 
2  * @file  MergeDocDiffSync.cpp
3  *
4  * @brief Code to layout diff blocks, to find matching lines, and insert ghost lines
5  *
6  */
7 // RCS ID line follows -- this is updated by CVS
8 // $Id$
9
10 #include "StdAfx.h"
11 #include <vector>
12 #include "MergeDoc.h"
13
14 #include "Merge.h"
15 #include "DiffList.h"
16 #include "MergeLineFlags.h"
17
18 #ifdef _DEBUG
19 #define new DEBUG_NEW
20 #undef THIS_FILE
21 static char THIS_FILE[] = __FILE__;
22 #endif
23
24 using std::vector;
25
26 /**
27  * @brief Divide diff blocks to match lines in diff blocks.
28  */
29 void CMergeDoc::AdjustDiffBlocks()
30 {
31         int nDiff;
32         int nDiffCount = m_diffList.GetSize();
33
34         // Go through and do our best to line up lines within each diff block
35         // between left side and right side
36         DiffList newDiffList;
37         newDiffList.Clear();
38         for (nDiff = 0; nDiff < nDiffCount; nDiff++)
39         {
40                 const DIFFRANGE & diffrange = *m_diffList.DiffRangeAt(nDiff);
41                 // size map correctly (it will hold one entry for each left-side line
42                 int nlines0 = diffrange.end0 - diffrange.begin0 + 1;
43                 int nlines1 = diffrange.end1 - diffrange.begin1 + 1;
44                 if (nlines0>0 && nlines1>0)
45                 {
46                         // Call worker to do all lines in block
47                         int lo0 = 0, hi0 = nlines0-1;
48                         int lo1 = 0, hi1 = nlines1-1;
49                         DiffMap diffmap;
50                         diffmap.InitDiffMap(nlines0);
51                         AdjustDiffBlock(diffmap, diffrange, lo0, hi0, lo1, hi1);
52
53 #ifdef _DEBUG
54                         std::vector<int>::const_iterator iter = diffmap.m_map.begin();
55                         int i = 0;
56                         while (iter != diffmap.m_map.end())
57                         {
58                                 TCHAR buf[256];
59                                 wsprintf(buf, _T("begin0=%d begin1=%d diffmap[%d]=%d\n"),
60                                                 diffrange.begin0, diffrange.begin1, i, iter);
61                                 OutputDebugString(buf);
62                                 iter++;
63                                 i++;
64                         }
65 #endif
66
67                         // divide diff blocks
68                         DIFFRANGE dr;
69                         int line0, line1, lineend0;
70                         for (line0 = 0, line1 = 0; line0 < nlines0;)
71                         {
72                                 const int map_line0 = diffmap.m_map[line0];
73                                 if (map_line0 == DiffMap::GHOST_MAP_ENTRY ||
74                                                 map_line0 == DiffMap::BAD_MAP_ENTRY)
75                                 {
76                                         for (lineend0 = line0; lineend0 < nlines0; lineend0++)
77                                         {
78                                                 if (map_line0 != DiffMap::GHOST_MAP_ENTRY &&
79                                                                 map_line0 != DiffMap::BAD_MAP_ENTRY)
80                                                         break;
81                                         }
82                                         dr.begin0  = diffrange.begin0 + line0;
83                                         dr.begin1  = diffrange.begin1 + line1;
84                                         dr.end0    = diffrange.begin0 + lineend0 - 1;
85                                         dr.end1    = dr.begin1 - 1;
86                                         dr.blank0  = dr.blank1 = -1;
87                                         dr.op      = OP_DIFF;
88                                         newDiffList.AddDiff(dr);
89                                         line0 = lineend0;
90                                 }
91                                 else
92                                 {
93                                         if (map_line0 > line1)
94                                         {
95                                                 dr.begin0  = diffrange.begin0 + line0;
96                                                 dr.begin1  = diffrange.begin1 + line1;
97                                                 dr.end0    = dr.begin0 - 1;
98                                                 dr.end1    = diffrange.begin1 + map_line0 - 1;
99                                                 dr.blank0  = dr.blank1 = -1;
100                                                 dr.op      = OP_DIFF;
101                                                 newDiffList.AddDiff(dr);
102                                                 line1 = map_line0;
103                                         } 
104
105                                         for (lineend0 = line0 + 1; lineend0 < nlines0; lineend0++)
106                                         {
107                                                 if (map_line0 != diffmap.m_map[lineend0 - 1] + 1)
108                                                         break;
109                                         }
110                                         dr.begin0  = diffrange.begin0 + line0;
111                                         dr.begin1  = diffrange.begin1 + line1;
112                                         dr.end0    = diffrange.begin0 + lineend0 - 1;
113                                         dr.end1    = diffrange.begin1 + diffmap.m_map[lineend0 - 1];
114                                         dr.blank0  = dr.blank1 = -1;
115                                         dr.op      = diffrange.op == OP_TRIVIAL ? OP_TRIVIAL : OP_DIFF;
116                                         newDiffList.AddDiff(dr);
117                                         line0 = lineend0;
118                                         line1 = diffmap.m_map[lineend0 - 1] + 1;
119                                 }
120                         }
121                         if (line1 <= hi1)
122                         {
123                                 dr.begin0  = diffrange.begin0 + line0;
124                                 dr.begin1  = diffrange.begin1 + line1;
125                                 dr.end0    = dr.begin0 - 1;
126                                 dr.end1    = diffrange.begin1 + hi1;
127                                 dr.blank0  = dr.blank1 = -1;
128                                 dr.op      = diffrange.op == OP_TRIVIAL ? OP_TRIVIAL : OP_DIFF;
129                                 newDiffList.AddDiff(dr);
130                         }
131                 }
132                 else
133                 {
134                         newDiffList.AddDiff(diffrange);
135                 }
136         }
137
138         // recreate m_diffList
139         m_diffList.Clear();
140         nDiffCount = newDiffList.GetSize();
141         for (nDiff = 0; nDiff < nDiffCount; nDiff++)
142         {
143 #ifdef _DEBUG
144                 TCHAR buf[256];
145                 DIFFRANGE di = *newDiffList.DiffRangeAt(nDiff);
146                 wsprintf(buf, _T("%d: begin0=%d end0=%d begin1=%d end1=%d\n"), nDiff,
147                                 di.begin0, di.end0, di.begin1, di.end1);
148                 OutputDebugString(buf);
149 #endif
150                 m_diffList.AddDiff(*newDiffList.DiffRangeAt(nDiff));
151         }
152 }
153
154 /**
155  * @brief Return cost of making strings equal
156  *
157  * The cost of making them equal is the measure of their dissimilarity
158  * which is their Levenshtein distance.
159  */
160 int CMergeDoc::GetMatchCost(const String &sLine0, const String &sLine1)
161 {
162         DIFFOPTIONS diffOptions = {0};
163         m_diffWrapper.GetOptions(&diffOptions);
164
165         // Options that affect comparison
166         bool casitive = !diffOptions.bIgnoreCase;
167         int xwhite = diffOptions.nIgnoreWhitespace;
168         int breakType = GetBreakType(); // whitespace only or include punctuation
169         bool byteColoring = GetByteColoringOption();
170
171         vector<wdiff*> worddiffs;
172         sd_ComputeWordDiffs(sLine0, sLine1, casitive, xwhite, breakType, byteColoring, &worddiffs);
173
174         int nDiffLenSum = 0, nDiffLen0, nDiffLen1;
175         int i;
176         int nCount = worddiffs.size();
177         for (i = 0; i < nCount; i++)
178         {
179                 nDiffLen0 = worddiffs[i]->end[0] - worddiffs[i]->start[0] + 1;
180                 nDiffLen1 = worddiffs[i]->end[1] - worddiffs[i]->start[1] + 1;
181                 nDiffLenSum += (nDiffLen0 > nDiffLen1) ? nDiffLen0 : nDiffLen1;
182         }
183
184         while (!worddiffs.empty())
185         {
186                 delete worddiffs.back();
187                 worddiffs.pop_back();
188         }
189
190         return nDiffLenSum;
191 }
192
193 /**
194  * @brief Map lines from left to right for specified range in diff block, as best we can
195  * Map left side range [lo0;hi0] to right side range [lo1;hi1]
196  * (ranges include ends)
197  *
198  * Algorithm:
199  * Find best match, and use that to split problem into two parts (above & below match)
200  * and call ourselves recursively to solve each smaller problem
201  */
202 void CMergeDoc::AdjustDiffBlock(DiffMap & diffMap, const DIFFRANGE & diffrange, int lo0, int hi0, int lo1, int hi1)
203 {
204         // Map & lo & hi numbers are all relative to this block
205         // We need to know offsets to find actual line strings from buffer
206         int offset0 = diffrange.begin0;
207         int offset1 = diffrange.begin1;
208
209         // # of lines on left and right
210         int lines0 = hi0 - lo0 + 1;
211         int lines1 = hi1 - lo1 + 1;
212
213
214         ASSERT(lines0 > 0 && lines1 > 0);
215
216         // shortcut special case
217         if (lines0 == 1 && lines1 == 1)
218         {
219                 diffMap.m_map[lo0] = hi1;
220                 return;
221         }
222
223         // Bail out if range is large
224         if (lines0 > 15 || lines1 > 15)
225         {
226                 // Do simple 1:1 mapping
227                 // but try to stay within original block
228                 // Cannot be outside both sides, so just test for outside one side
229                 int tlo=0, thi=0;
230                 if (lo1 < 0)
231                 {
232                         // right side is off the bottom
233                         // so put it as close to bottom as possible
234                         tlo = hi1 - (hi0-lo0);
235                         thi = hi1;
236                 }
237                 else
238                 {
239                         // put it as close to top as possible
240                         tlo = lo1;
241                         thi = lo1 + (hi0-lo0);
242                 }
243
244                 for (int w=0; w<lines0; ++w)
245                 {
246                         if (w < lines1)
247                                 diffMap.m_map[w] = w + tlo;
248                         else
249                                 diffMap.m_map[w] = DiffMap::GHOST_MAP_ENTRY;
250                 }
251                 return;
252         }
253
254         // Find best fit
255         int ibest=-1, isavings=0x7fffffff, itarget=-1;
256         CString sLine0, sLine1;
257         for (int i=lo0; i<=hi0; ++i)
258         {
259                 m_ptBuf[0]->GetLine(offset0 + i, sLine0);
260                 for (int j=lo1; j<=hi1; ++j)
261                 {
262                         m_ptBuf[1]->GetLine(offset1 + j, sLine1);
263                         int savings = GetMatchCost((LPCTSTR)sLine0, (LPCTSTR)sLine1);
264                         // TODO
265                         // Need to penalize assignments that push us outside the box
266                         // further than is required
267                         if (savings < isavings)
268                         {
269                                 ibest = i;
270                                 itarget = j;
271                                 isavings = savings;
272                         }
273                 }
274         }
275         // I think we have to find at least one
276         // because cost (Levenshtein distance) is less than or equal to maxlen
277         // so cost is always nonnegative
278         ASSERT(ibest >= 0);
279
280         ASSERT(diffMap.m_map[ibest] == DiffMap::BAD_MAP_ENTRY);
281
282         diffMap.m_map[ibest] = itarget;
283
284         // Half of the remaining problem is below our match
285         if (lo0 < ibest)
286         {
287                 if (lo1 < itarget)
288                 {
289                         AdjustDiffBlock(diffMap, diffrange, lo0, ibest-1, lo1, itarget-1);
290                 }
291                 else
292                 {
293                         // No target space for the ones below our match
294                         for (int x = lo0; x < ibest; ++x)
295                         {
296                                 diffMap.m_map[x] = DiffMap::GHOST_MAP_ENTRY;
297                         }
298                 }
299         }
300
301         // Half of the remaining problem is above our match
302         if (hi0 > ibest)
303         {
304                 if (itarget < hi1)
305                 {
306                         AdjustDiffBlock(diffMap, diffrange, ibest + 1, hi0,
307                                         itarget + 1, hi1);
308                 }
309                 else
310                 {
311                         // No target space for the ones above our match
312                         for (int x = ibest + 1; x <= hi0; ++x)
313                         {
314                                 diffMap.m_map[x] = DiffMap::GHOST_MAP_ENTRY;
315                         }
316                 }
317         }
318 }