2 * @file DiffTextBuffer.cpp
4 * @brief Implementation file for CDiffTextBuffer
7 // ID line follows -- this is updated by SVN
8 // $Id: DiffTextBuffer.cpp 7082 2010-01-03 22:15:50Z sdottaka $
11 #include "DiffTextBuffer.h"
12 #include <Poco/Exception.h>
19 #include "OptionsDef.h"
20 #include "OptionsMgr.h"
21 #include "Environment.h"
22 #include "MergeLineFlags.h"
24 #include "FileTransform.h"
25 #include "FileTextEncoding.h"
26 #include "codepage_detect.h"
29 using Poco::Exception;
34 static char THIS_FILE[] = __FILE__;
37 static bool IsTextFileStylePure(const UniMemFile::txtstats & stats);
38 static CString GetLineByteTimeReport(UINT lines, __int64 bytes,
39 const COleDateTime & start);
40 static void EscapeControlChars(CString &s);
41 static LPCTSTR GetEol(const CString &str);
42 static CRLFSTYLE GetTextFileStyle(const UniMemFile::txtstats & stats);
45 * @brief Check if file has only one EOL type.
46 * @param [in] stats File's text stats.
47 * @return true if only one EOL type is found, false otherwise.
49 static bool IsTextFileStylePure(const UniMemFile::txtstats & stats)
62 * @brief Return a string giving #lines and #bytes and how much time elapsed.
63 * @param [in] lines Count of lines.
64 * @param [in] bytes Count of bytes.
65 * @param [in] start Time used.
66 * @return Formatted string.
68 static CString GetLineByteTimeReport(UINT lines, __int64 bytes,
69 const COleDateTime & start)
71 String sLines = locality::NumToLocaleStr((int)lines);
72 String sBytes = locality::NumToLocaleStr(bytes);
73 COleDateTimeSpan duration = COleDateTime::GetCurrentTime() - start;
74 String sMinutes = locality::NumToLocaleStr((int)duration.GetTotalMinutes());
76 str.Format(_T("%s lines (%s byte) saved in %sm%02ds")
77 , sLines.c_str(), sBytes.c_str(), sMinutes.c_str()
78 , duration.GetSeconds()
84 * @brief Escape control characters.
85 * @param [in,out] s Line of text excluding eol chars.
87 * @note Escape sequences follow the pattern
88 * (leadin character, high nibble, low nibble, leadout character).
89 * The leadin character is '\x0F'. The leadout character is a backslash.
91 static void EscapeControlChars(CString &s)
93 // Compute buffer length required for escaping
94 int n = s.GetLength();
100 // Is it a control character in the range 0..31 except TAB?
101 if (!(c & ~_T('\x1F')) && c != _T('\t'))
103 n += 3; // Need 3 extra characters to escape
106 // Reallocate accordingly
108 LPTSTR p = s.GetBufferSetLength(n);
109 // Copy/translate characters starting at end of string
113 // Is it a control character in the range 0..31 except TAB?
114 if (!(c & ~_T('\x1F')) && c != _T('\t'))
116 // Bitwise OR with 0x100 so _itot() will output 3 hex digits
117 _itot(0x100 | c, p + n - 4, 16);
118 // Replace terminating zero with leadout character
120 // Prepare to replace 1st hex digit with leadin character
129 * @brief Get file's EOL type.
130 * @param [in] stats File's text stats.
133 static CRLFSTYLE GetTextFileStyle(const UniMemFile::txtstats & stats)
135 // Check if file has more than one EOL type.
136 if (!IsTextFileStylePure(stats))
137 return CRLF_STYLE_MIXED;
138 else if (stats.ncrlfs >= stats.nlfs)
140 if (stats.ncrlfs >= stats.ncrs)
141 return CRLF_STYLE_DOS;
143 return CRLF_STYLE_MAC;
147 if (stats.nlfs >= stats.ncrs)
148 return CRLF_STYLE_UNIX;
150 return CRLF_STYLE_MAC;
155 * @brief Constructor.
156 * @param [in] pDoc Owning CMergeDoc.
157 * @param [in] pane Pane number this buffer is associated with.
159 CDiffTextBuffer::CDiffTextBuffer(CMergeDoc * pDoc, int pane)
162 , m_unpackerSubcode(0)
168 * @brief Get a line from the buffer.
169 * @param [in] nLineIndex Index of the line to get.
170 * @param [out] strLine Returns line text in the index.
172 bool CDiffTextBuffer::GetLine(int nLineIndex, CString &strLine) const
174 int nLineLength = CCrystalTextBuffer::GetLineLength(nLineIndex);
177 else if (nLineLength == 0)
181 _tcsncpy(strLine.GetBuffer(nLineLength + 1),
182 CCrystalTextBuffer::GetLineChars(nLineIndex), nLineLength);
183 strLine.ReleaseBuffer(nLineLength);
189 * @brief Set the buffer modified status.
190 * @param [in] bModified New modified status, true if buffer has been
191 * modified since last saving.
193 void CDiffTextBuffer::SetModified(bool bModified /*= true*/)
195 CCrystalTextBuffer::SetModified (bModified);
196 m_pOwnerDoc->SetModifiedFlag (bModified);
200 * @brief Get a line (with EOL bytes) from the buffer.
201 * This function is like GetLine() but it also includes line's EOL to the
203 * @param [in] nLineIndex Index of the line to get.
204 * @param [out] strLine Returns line text in the index. Existing content
205 * of this string is overwritten.
207 bool CDiffTextBuffer::GetFullLine(int nLineIndex, CString &strLine) const
209 int cchText = GetFullLineLength(nLineIndex);
215 LPTSTR pchText = strLine.GetBufferSetLength(cchText);
216 memcpy(pchText, GetLineChars(nLineIndex), cchText * sizeof(TCHAR));
220 void CDiffTextBuffer::AddUndoRecord(bool bInsert, const CPoint & ptStartPos,
221 const CPoint & ptEndPos, LPCTSTR pszText, int cchText,
222 int nLinesToValidate, int nActionType /*= CE_ACTION_UNKNOWN*/,
223 CDWordArray *paSavedRevisonNumbers)
225 CGhostTextBuffer::AddUndoRecord(bInsert, ptStartPos, ptEndPos, pszText,
226 cchText, nLinesToValidate, nActionType, paSavedRevisonNumbers);
227 if (m_aUndoBuf[m_nUndoPosition - 1].m_dwFlags & UNDO_BEGINGROUP)
229 m_pOwnerDoc->undoTgt.erase(m_pOwnerDoc->curUndo, m_pOwnerDoc->undoTgt.end());
230 m_pOwnerDoc->undoTgt.push_back(m_pOwnerDoc->GetView(m_nThisPane));
231 m_pOwnerDoc->curUndo = m_pOwnerDoc->undoTgt.end();
235 * @brief Checks if a flag is set for line.
236 * @param [in] line Index (0-based) for line.
237 * @param [in] flag Flag to check.
238 * @return true if flag is set, false otherwise.
240 bool CDiffTextBuffer::FlagIsSet(UINT line, DWORD flag) const
242 return ((m_aLines[line].m_dwFlags & flag) == flag);
246 Remove blank lines and clear winmerge flags
247 (2003-06-21, Perry: I don't understand why this is necessary, but if this isn't
248 done, more and more gray lines appear in the file)
249 (2003-07-31, Laoran I don't understand either why it is necessary, but it works
250 fine, so let's go on with it)
252 void CDiffTextBuffer::prepareForRescan()
254 RemoveAllGhostLines();
255 for (int ct = GetLineCount() - 1; ct >= 0; --ct)
258 LF_INVISIBLE | LF_DIFF | LF_TRIVIAL | LF_MOVED | LF_SNP,
259 false, false, false);
264 * @brief Called when line has been edited.
265 * After editing a line, we don't know if there is a diff or not.
266 * So we clear the LF_DIFF flag (and it is more easy to read during edition).
267 * Rescan will set the proper color.
268 * @param [in] nLine Line that has been edited.
271 void CDiffTextBuffer::OnNotifyLineHasBeenEdited(int nLine)
273 SetLineFlag(nLine, LF_DIFF, false, false, false);
274 SetLineFlag(nLine, LF_TRIVIAL, false, false, false);
275 SetLineFlag(nLine, LF_MOVED, false, false, false);
276 SetLineFlag(nLine, LF_SNP, false, false, false);
277 CGhostTextBuffer::OnNotifyLineHasBeenEdited(nLine);
281 * @brief Set the folder for temp files.
282 * @param [in] path Temp files folder.
284 void CDiffTextBuffer::SetTempPath(const String &path)
286 m_strTempPath = path;
290 * @brief Is the buffer initialized?
291 * @return true if the buffer is initialized, false otherwise.
293 bool CDiffTextBuffer::IsInitialized() const
299 * @brief Load file from disk into buffer
301 * @param [in] pszFileNameInit File to load
302 * @param [in] infoUnpacker Unpacker plugin
303 * @param [in] sToFindUnpacker String for finding unpacker plugin
304 * @param [out] readOnly Loading was lossy so file should be read-only
305 * @param [in] nCrlfStyle EOL style used
306 * @param [in] encoding Encoding used
307 * @param [out] sError Error message returned
308 * @return FRESULT_OK when loading succeed or (list in files.h):
309 * - FRESULT_OK_IMPURE : load OK, but the EOL are of different types
310 * - FRESULT_ERROR_UNPACK : plugin failed to unpack
311 * - FRESULT_ERROR : loading failed, sError contains error message
312 * - FRESULT_BINARY : file is binary file
313 * @note If this method fails, it calls InitNew so the CDiffTextBuffer is in a valid state
315 int CDiffTextBuffer::LoadFromFile(LPCTSTR pszFileNameInit,
316 PackingInfo * infoUnpacker, LPCTSTR sToFindUnpacker, bool & readOnly,
317 CRLFSTYLE nCrlfStyle, const FileTextEncoding & encoding, CString &sError)
320 ASSERT(m_aLines.size() == 0);
322 // Unpacking the file here, save the result in a temporary file
323 String sFileName(pszFileNameInit);
324 if (infoUnpacker->bToBeScanned)
326 if (!FileTransform_Unpacking(sFileName, sToFindUnpacker, infoUnpacker,
329 InitNew(); // leave crystal editor in valid, empty state
330 return FileLoadResult::FRESULT_ERROR_UNPACK;
335 if (!FileTransform_Unpacking(sFileName, infoUnpacker, &m_unpackerSubcode))
337 InitNew(); // leave crystal editor in valid, empty state
338 return FileLoadResult::FRESULT_ERROR_UNPACK;
341 // we use the same unpacker for both files, so it must be defined after first file
342 ASSERT(infoUnpacker->bToBeScanned != PLUGIN_AUTO);
343 // we will load the transformed file
344 LPCTSTR pszFileName = sFileName.c_str();
347 DWORD nRetVal = FileLoadResult::FRESULT_OK;
349 // Set encoding based on extension, if we know one
350 paths_SplitFilename(pszFileName, NULL, NULL, &sExt);
351 CCrystalTextView::TextDefinition *def =
352 CCrystalTextView::GetTextType(sExt.c_str());
353 if (def && def->encoding != -1)
354 m_nSourceEncoding = def->encoding;
356 UniFile *pufile = infoUnpacker->pufile;
358 pufile = new UniMemFile;
360 // Now we only use the UniFile interface
361 // which is something we could implement for HTTP and/or FTP files
363 if (!pufile->OpenReadOnly(pszFileName))
365 nRetVal = FileLoadResult::FRESULT_ERROR;
366 UniFile::UniError uniErr = pufile->GetLastUniError();
367 if (uniErr.HasError())
369 sError = uniErr.GetError().c_str();
371 InitNew(); // leave crystal editor in valid, empty state
372 goto LoadFromFileExit;
376 if (infoUnpacker->pluginName.length() > 0)
378 // re-detect codepage
379 int iGuessEncodingType = GetOptionsMgr()->GetInt(OPT_CP_DETECT);
380 FileTextEncoding encoding2 = GuessCodepageEncoding(pszFileName, iGuessEncodingType);
381 pufile->SetUnicoding(encoding2.m_unicoding);
382 pufile->SetCodepage(encoding2.m_codepage);
383 pufile->SetBom(encoding2.m_bom);
389 // If the file is not unicode file, use the codepage we were given to
390 // interpret the 8-bit characters. If the file is unicode file,
391 // determine its type (IsUnicode() does that).
392 if (encoding.m_unicoding == ucr::NONE || !pufile->IsUnicode())
393 pufile->SetCodepage(encoding.m_codepage);
400 UINT next_line_report = 100; // for trace messages
401 UINT next_line_multiple = 5; // for trace messages
403 COleDateTime start = COleDateTime::GetCurrentTime(); // for trace messages
405 // Manually grow line array exponentially
406 UINT arraysize = 500;
407 m_aLines.resize(arraysize);
409 // preveol must be initialized for empty files
414 done = !pufile->ReadString(sline, eol, &lossy);
416 // if last line had no eol, we can quit
417 if (done && preveol.empty())
419 // but if last line had eol, we add an extra (empty) line to buffer
422 if (lineno == arraysize)
424 // For smaller sizes use exponential growth, but for larger
425 // sizes grow by constant ratio. Unlimited exponential growth
426 // easily runs out of memory.
427 if (arraysize < 100 * 1024)
430 arraysize += 100 * 1024;
431 m_aLines.resize(arraysize);
434 sline += eol; // TODO: opportunity for optimization, as CString append is terrible
437 // TODO: Should record lossy status of line
439 AppendLine(lineno, sline.c_str(), sline.length());
444 // send occasional line counts to trace
445 // (at 100, 500, 1000, 5000, etc)
446 if (lineno == next_line_report)
448 __int64 dwBytesRead = pufile->GetPosition();
449 COleDateTimeSpan duration = COleDateTime::GetCurrentTime() - start;
450 if (duration.GetTotalMinutes() > 0)
452 CString strace = GetLineByteTimeReport(lineno, dwBytesRead, start);
453 TRACE(_T("%s\n"), (LPCTSTR)strace);
455 next_line_report = next_line_multiple * next_line_report;
456 next_line_multiple = (next_line_multiple == 5) ? 2 : 5;
462 // Send report of duration to trace (if it took a while)
463 COleDateTime end = COleDateTime::GetCurrentTime();
464 COleDateTimeSpan duration = end - start;
465 if (duration.GetTotalMinutes() > 0)
467 __int64 dwBytesRead = pufile->GetPosition();
468 CString strace = GetLineByteTimeReport(lineno, dwBytesRead, start);
469 TRACE(_T("%s\n"), (LPCTSTR)strace);
473 // fix array size (due to our manual exponential growth
474 m_aLines.resize(lineno);
477 //Try to determine current CRLF mode (most frequent)
478 if (nCrlfStyle == CRLF_STYLE_AUTOMATIC)
480 nCrlfStyle = GetTextFileStyle(pufile->GetTxtStats());
482 ASSERT(nCrlfStyle >= 0 && nCrlfStyle <= 3);
483 SetCRLFMode(nCrlfStyle);
485 // At least one empty line must present
486 // (view does not work for empty buffers)
487 ASSERT(m_aLines.size() > 0);
491 m_bUndoGroup = m_bUndoBeginGroup = false;
492 m_nSyncPosition = m_nUndoPosition = 0;
493 ASSERT(m_aUndoBuf.size() == 0);
494 m_ptLastChange.x = m_ptLastChange.y = -1;
497 // flags don't need initialization because 0 is the default value
499 // Set the return value : OK + info if the file is impure
500 // A pure file is a file where EOL are consistent (all DOS, or all UNIX, or all MAC)
501 // An impure file is a file with several EOL types
502 // WinMerge may display impure files, but the default option is to unify the EOL
503 // We return this info to the caller, so it may display a confirmation box
504 if (IsTextFileStylePure(pufile->GetTxtStats()))
505 nRetVal = FileLoadResult::FRESULT_OK;
507 nRetVal = FileLoadResult::FRESULT_OK_IMPURE;
509 // stash original encoding away
510 m_encoding.m_unicoding = pufile->GetUnicoding();
511 m_encoding.m_bom = pufile->HasBom();
512 m_encoding.m_codepage = pufile->GetCodepage();
514 if (pufile->GetTxtStats().nlosses)
516 FileLoadResult::AddModifier(nRetVal, FileLoadResult::FRESULT_LOSSY);
522 // close the file now to free the handle
526 // delete the file that unpacking may have created
527 if (_tcscmp(pszFileNameInit, pszFileName) != 0)
531 TFile(pszFileName).remove();
535 LogErrorStringUTF8(e.displayText());
542 * @brief Saves file from buffer to disk
544 * @param bTempFile : false if we are saving user files and
545 * true if we are saving workin-temp-files for diff-engine
547 * @return SAVE_DONE or an error code (list in MergeDoc.h)
549 int CDiffTextBuffer::SaveToFile (const String& pszFileName,
550 bool bTempFile, String & sError, PackingInfo * infoUnpacker /*= NULL*/,
551 CRLFSTYLE nCrlfStyle /*= CRLF_STYLE_AUTOMATIC*/,
552 bool bClearModifiedFlag /*= true*/,
553 bool bForceUTF8 /*= false*/,
554 int nStartLine /*= 0*/, int nLines /*= -1*/)
556 ASSERT (nCrlfStyle == CRLF_STYLE_AUTOMATIC || nCrlfStyle == CRLF_STYLE_DOS ||
557 nCrlfStyle == CRLF_STYLE_UNIX || nCrlfStyle == CRLF_STYLE_MAC);
561 nLines = m_aLines.size() - nStartLine;
563 if (pszFileName.empty())
564 return SAVE_FAILED; // No filename, cannot save...
566 if (nCrlfStyle == CRLF_STYLE_AUTOMATIC &&
567 !GetOptionsMgr()->GetBool(OPT_ALLOW_MIXED_EOL) ||
568 infoUnpacker && infoUnpacker->disallowMixedEOL)
570 // get the default nCrlfStyle of the CDiffTextBuffer
571 nCrlfStyle = GetCRLFMode();
572 ASSERT(nCrlfStyle >= 0 && nCrlfStyle <= 3);
575 bool bOpenSuccess = true;
576 bool bSaveSuccess = false;
579 file.SetUnicoding(bForceUTF8 ? ucr::UTF8 : m_encoding.m_unicoding);
580 file.SetBom(bForceUTF8 ? true : m_encoding.m_bom);
581 file.SetCodepage(bForceUTF8 ? CP_UTF8 : m_encoding.m_codepage);
583 String sIntermediateFilename; // used when !bTempFile
587 bOpenSuccess = !!file.OpenCreate(pszFileName);
591 sIntermediateFilename = env_GetTempFileName(m_strTempPath,
593 if (sIntermediateFilename.empty())
594 return SAVE_FAILED; //Nothing to do if even tempfile name fails
595 bOpenSuccess = !!file.OpenCreate(sIntermediateFilename);
600 UniFile::UniError uniErr = file.GetLastUniError();
601 if (uniErr.HasError())
603 sError = uniErr.GetError().c_str();
605 LogErrorString(string_format(_T("Opening file %s failed: %s"),
606 pszFileName.c_str(), sError.c_str()));
608 LogErrorString(string_format(_T("Opening file %s failed: %s"),
609 sIntermediateFilename.c_str(), sError.c_str()));
616 // line loop : get each real line and write it in the file
618 CString sEol = GetStringEol(nCrlfStyle);
619 for (size_t line = nStartLine; line < nStartLine + nLines; ++line)
621 if (GetLineFlags(line) & LF_GHOST)
624 // get the characters of the line (excluding EOL)
625 if (GetLineLength(line) > 0)
627 int nLineLength = GetLineLength(line);
628 void *pszBuf = sLine.GetBuffer(nLineLength);
629 memcpy(pszBuf, GetLineChars(line), nLineLength * sizeof(TCHAR));
630 sLine.ReleaseBuffer(nLineLength);
636 EscapeControlChars(sLine);
638 int lastRealLine = ApparentLastRealLine();
639 if (line == lastRealLine || lastRealLine == -1 )
641 // last real line is never EOL terminated
642 ASSERT (_tcslen(GetLineEol(line)) == 0);
643 // write the line and exit loop
644 String tmpLine(sLine);
645 file.WriteString(tmpLine);
649 // normal real line : append an EOL
650 if (nCrlfStyle == CRLF_STYLE_AUTOMATIC || nCrlfStyle == CRLF_STYLE_MIXED)
652 // either the EOL of the line (when preserve original EOL chars is on)
653 sLine += GetLineEol(line);
657 // or the default EOL for this file
661 // write this line to the file (codeset or unicode conversions are done there)
662 String tmpLine(sLine);
663 file.WriteString(tmpLine);
669 // If we are saving user files
670 // we need an unpacker/packer, at least a "do nothing" one
671 ASSERT(infoUnpacker != NULL);
672 // repack the file here, overwrite the temporary file we did save in
673 String csTempFileName = sIntermediateFilename;
674 infoUnpacker->subcode = m_unpackerSubcode;
675 if (!FileTransform_Packing(csTempFileName, *infoUnpacker))
679 TFile(sIntermediateFilename).remove();
683 LogErrorStringUTF8(e.displayText());
685 // returns now, don't overwrite the original file
686 return SAVE_PACK_FAILED;
688 // the temp filename may have changed during packing
689 if (csTempFileName != sIntermediateFilename)
693 TFile(sIntermediateFilename).remove();
697 LogErrorStringUTF8(e.displayText());
699 sIntermediateFilename = csTempFileName;
702 // Write tempfile over original file
705 TFile file(sIntermediateFilename);
706 file.copyTo(pszFileName);
708 if (bClearModifiedFlag)
711 m_nSyncPosition = m_nUndoPosition;
715 // remember revision number on save
716 m_dwRevisionNumberOnSave = m_dwCurrentRevisionNumber;
718 // redraw line revision marks
719 UpdateViews (NULL, NULL, UPDATE_FLAGSONLY);
723 LogErrorStringUTF8(e.displayText());
728 if (bClearModifiedFlag)
731 m_nSyncPosition = m_nUndoPosition;
742 /// Replace line (removing any eol, and only including one if in strText)
743 void CDiffTextBuffer::ReplaceFullLines(CDiffTextBuffer& dbuf, CDiffTextBuffer& sbuf, CCrystalTextView * pSource, int nLineBegin, int nLineEnd, int nAction /*=CE_ACTION_UNKNOWN*/)
747 if (nLineBegin != nLineEnd || sbuf.GetLineLength(nLineEnd) > 0)
748 sbuf.GetTextWithoutEmptys(nLineBegin, 0, nLineEnd, sbuf.GetLineLength(nLineEnd), strText);
749 strText += sbuf.GetLineEol(nLineEnd);
751 if (nLineBegin != nLineEnd || dbuf.GetFullLineLength(nLineEnd) > 0)
753 int nLineEndSource = nLineEnd < dbuf.GetLineCount() ? nLineEnd : dbuf.GetLineCount();
754 if (nLineEnd+1 < GetLineCount())
755 dbuf.DeleteText(pSource, nLineBegin, 0, nLineEndSource + 1, 0, nAction);
757 dbuf.DeleteText(pSource, nLineBegin, 0, nLineEndSource, dbuf.GetLineLength(nLineEndSource), nAction);
760 if (int cchText = strText.GetLength())
761 dbuf.InsertText(pSource, nLineBegin, 0, strText, cchText, endl,endc, nAction);
765 bool CDiffTextBuffer::curUndoGroup()
767 return (m_aUndoBuf.size() != 0 && m_aUndoBuf[0].m_dwFlags&UNDO_BEGINGROUP);