2 * @file DiffTextBuffer.cpp
4 * @brief Implementation file for CDiffTextBuffer
7 // ID line follows -- this is updated by SVN
8 // $Id: DiffTextBuffer.cpp 7082 2010-01-03 22:15:50Z sdottaka $
15 #include "coretools.h"
17 #include "OptionsDef.h"
18 #include "Environment.h"
19 #include "MergeLineFlags.h"
21 #include "FileTransform.h"
22 #include "FileTextEncoding.h"
23 #include "DiffTextBuffer.h"
24 #include "codepage_detect.h"
29 static char THIS_FILE[] = __FILE__;
32 static bool IsTextFileStylePure(const UniMemFile::txtstats & stats);
33 static CString GetLineByteTimeReport(UINT lines, __int64 bytes,
34 const COleDateTime & start);
35 static void EscapeControlChars(CString &s);
36 static LPCTSTR GetEol(const CString &str);
37 static CRLFSTYLE GetTextFileStyle(const UniMemFile::txtstats & stats);
40 * @brief Check if file has only one EOL type.
41 * @param [in] stats File's text stats.
42 * @return true if only one EOL type is found, false otherwise.
44 static bool IsTextFileStylePure(const UniMemFile::txtstats & stats)
57 * @brief Return a string giving #lines and #bytes and how much time elapsed.
58 * @param [in] lines Count of lines.
59 * @param [in] bytes Count of bytes.
60 * @param [in] start Time used.
61 * @return Formatted string.
63 static CString GetLineByteTimeReport(UINT lines, __int64 bytes,
64 const COleDateTime & start)
66 String sLines = locality::NumToLocaleStr((int)lines);
67 String sBytes = locality::NumToLocaleStr(bytes);
68 COleDateTimeSpan duration = COleDateTime::GetCurrentTime() - start;
69 String sMinutes = locality::NumToLocaleStr((int)duration.GetTotalMinutes());
71 str.Format(_T("%s lines (%s byte) saved in %sm%02ds")
72 , sLines.c_str(), sBytes.c_str(), sMinutes.c_str()
73 , duration.GetSeconds()
79 * @brief Escape control characters.
80 * @param [in,out] s Line of text excluding eol chars.
82 * @note Escape sequences follow the pattern
83 * (leadin character, high nibble, low nibble, leadout character).
84 * The leadin character is '\x0F'. The leadout character is a backslash.
86 static void EscapeControlChars(CString &s)
88 // Compute buffer length required for escaping
89 int n = s.GetLength();
95 // Is it a control character in the range 0..31 except TAB?
96 if (!(c & ~_T('\x1F')) && c != _T('\t'))
98 n += 3; // Need 3 extra characters to escape
101 // Reallocate accordingly
103 LPTSTR p = s.GetBufferSetLength(n);
104 // Copy/translate characters starting at end of string
108 // Is it a control character in the range 0..31 except TAB?
109 if (!(c & ~_T('\x1F')) && c != _T('\t'))
111 // Bitwise OR with 0x100 so _itot() will output 3 hex digits
112 _itot(0x100 | c, p + n - 4, 16);
113 // Replace terminating zero with leadout character
115 // Prepare to replace 1st hex digit with leadin character
124 * @brief Get file's EOL type.
125 * @param [in] stats File's text stats.
128 static CRLFSTYLE GetTextFileStyle(const UniMemFile::txtstats & stats)
130 // Check if file has more than one EOL type.
131 if (!IsTextFileStylePure(stats))
132 return CRLF_STYLE_MIXED;
133 else if (stats.ncrlfs >= stats.nlfs)
135 if (stats.ncrlfs >= stats.ncrs)
136 return CRLF_STYLE_DOS;
138 return CRLF_STYLE_MAC;
142 if (stats.nlfs >= stats.ncrs)
143 return CRLF_STYLE_UNIX;
145 return CRLF_STYLE_MAC;
150 * @brief Constructor.
151 * @param [in] pDoc Owning CMergeDoc.
152 * @param [in] pane Pane number this buffer is associated with.
154 CDiffTextBuffer::CDiffTextBuffer(CMergeDoc * pDoc, int pane)
157 , m_unpackerSubcode(0)
163 * @brief Get a line from the buffer.
164 * @param [in] nLineIndex Index of the line to get.
165 * @param [out] strLine Returns line text in the index.
167 BOOL CDiffTextBuffer::GetLine(int nLineIndex, CString &strLine) const
169 int nLineLength = CCrystalTextBuffer::GetLineLength(nLineIndex);
172 else if (nLineLength == 0)
176 _tcsncpy(strLine.GetBuffer(nLineLength + 1),
177 CCrystalTextBuffer::GetLineChars(nLineIndex), nLineLength);
178 strLine.ReleaseBuffer(nLineLength);
184 * @brief Set the buffer modified status.
185 * @param [in] bModified New modified status, TRUE if buffer has been
186 * modified since last saving.
188 void CDiffTextBuffer::SetModified(BOOL bModified /*= TRUE*/)
190 CCrystalTextBuffer::SetModified (bModified);
191 m_pOwnerDoc->SetModifiedFlag (bModified);
195 * @brief Get a line (with EOL bytes) from the buffer.
196 * This function is like GetLine() but it also includes line's EOL to the
198 * @param [in] nLineIndex Index of the line to get.
199 * @param [out] strLine Returns line text in the index. Existing content
200 * of this string is overwritten.
202 BOOL CDiffTextBuffer::GetFullLine(int nLineIndex, CString &strLine) const
204 int cchText = GetFullLineLength(nLineIndex);
210 LPTSTR pchText = strLine.GetBufferSetLength(cchText);
211 memcpy(pchText, GetLineChars(nLineIndex), cchText * sizeof(TCHAR));
215 void CDiffTextBuffer::AddUndoRecord(BOOL bInsert, const CPoint & ptStartPos,
216 const CPoint & ptEndPos, LPCTSTR pszText, int cchText,
217 int nLinesToValidate, int nActionType /*= CE_ACTION_UNKNOWN*/,
218 CDWordArray *paSavedRevisonNumbers)
220 CGhostTextBuffer::AddUndoRecord(bInsert, ptStartPos, ptEndPos, pszText,
221 cchText, nLinesToValidate, nActionType, paSavedRevisonNumbers);
222 if (m_aUndoBuf[m_nUndoPosition - 1].m_dwFlags & UNDO_BEGINGROUP)
224 m_pOwnerDoc->undoTgt.erase(m_pOwnerDoc->curUndo, m_pOwnerDoc->undoTgt.end());
225 m_pOwnerDoc->undoTgt.push_back(m_pOwnerDoc->GetView(m_nThisPane));
226 m_pOwnerDoc->curUndo = m_pOwnerDoc->undoTgt.end();
230 * @brief Checks if a flag is set for line.
231 * @param [in] line Index (0-based) for line.
232 * @param [in] flag Flag to check.
233 * @return TRUE if flag is set, FALSE otherwise.
235 BOOL CDiffTextBuffer::FlagIsSet(UINT line, DWORD flag) const
237 return ((m_aLines[line].m_dwFlags & flag) == flag);
241 Remove blank lines and clear winmerge flags
242 (2003-06-21, Perry: I don't understand why this is necessary, but if this isn't
243 done, more and more gray lines appear in the file)
244 (2003-07-31, Laoran I don't understand either why it is necessary, but it works
245 fine, so let's go on with it)
247 void CDiffTextBuffer::prepareForRescan()
249 RemoveAllGhostLines();
250 for (int ct = GetLineCount() - 1; ct >= 0; --ct)
253 LF_INVISIBLE | LF_DIFF | LF_TRIVIAL | LF_MOVED | LF_SNP,
254 FALSE, FALSE, FALSE);
259 * @brief Called when line has been edited.
260 * After editing a line, we don't know if there is a diff or not.
261 * So we clear the LF_DIFF flag (and it is more easy to read during edition).
262 * Rescan will set the proper color.
263 * @param [in] nLine Line that has been edited.
266 void CDiffTextBuffer::OnNotifyLineHasBeenEdited(int nLine)
268 SetLineFlag(nLine, LF_DIFF, FALSE, FALSE, FALSE);
269 SetLineFlag(nLine, LF_TRIVIAL, FALSE, FALSE, FALSE);
270 SetLineFlag(nLine, LF_MOVED, FALSE, FALSE, FALSE);
271 SetLineFlag(nLine, LF_SNP, FALSE, FALSE, FALSE);
272 CGhostTextBuffer::OnNotifyLineHasBeenEdited(nLine);
276 * @brief Set the folder for temp files.
277 * @param [in] path Temp files folder.
279 void CDiffTextBuffer::SetTempPath(const String &path)
281 m_strTempPath = path;
285 * @brief Is the buffer initialized?
286 * @return TRUE if the buffer is initialized, FALSE otherwise.
288 bool CDiffTextBuffer::IsInitialized() const
294 * @brief Load file from disk into buffer
296 * @param [in] pszFileNameInit File to load
297 * @param [in] infoUnpacker Unpacker plugin
298 * @param [in] sToFindUnpacker String for finding unpacker plugin
299 * @param [out] readOnly Loading was lossy so file should be read-only
300 * @param [in] nCrlfStyle EOL style used
301 * @param [in] encoding Encoding used
302 * @param [out] sError Error message returned
303 * @return FRESULT_OK when loading succeed or (list in files.h):
304 * - FRESULT_OK_IMPURE : load OK, but the EOL are of different types
305 * - FRESULT_ERROR_UNPACK : plugin failed to unpack
306 * - FRESULT_ERROR : loading failed, sError contains error message
307 * - FRESULT_BINARY : file is binary file
308 * @note If this method fails, it calls InitNew so the CDiffTextBuffer is in a valid state
310 int CDiffTextBuffer::LoadFromFile(LPCTSTR pszFileNameInit,
311 PackingInfo * infoUnpacker, LPCTSTR sToFindUnpacker, BOOL & readOnly,
312 CRLFSTYLE nCrlfStyle, const FileTextEncoding & encoding, CString &sError)
315 ASSERT(m_aLines.size() == 0);
317 // Unpacking the file here, save the result in a temporary file
318 String sFileName(pszFileNameInit);
319 if (infoUnpacker->bToBeScanned)
321 if (!FileTransform_Unpacking(sFileName, sToFindUnpacker, infoUnpacker,
324 InitNew(); // leave crystal editor in valid, empty state
325 return FileLoadResult::FRESULT_ERROR_UNPACK;
330 if (!FileTransform_Unpacking(sFileName, infoUnpacker, &m_unpackerSubcode))
332 InitNew(); // leave crystal editor in valid, empty state
333 return FileLoadResult::FRESULT_ERROR_UNPACK;
336 // we use the same unpacker for both files, so it must be defined after first file
337 ASSERT(infoUnpacker->bToBeScanned != PLUGIN_AUTO);
338 // we will load the transformed file
339 LPCTSTR pszFileName = sFileName.c_str();
342 DWORD nRetVal = FileLoadResult::FRESULT_OK;
344 // Set encoding based on extension, if we know one
345 SplitFilename(pszFileName, NULL, NULL, &sExt);
346 CCrystalTextView::TextDefinition *def =
347 CCrystalTextView::GetTextType(sExt.c_str());
348 if (def && def->encoding != -1)
349 m_nSourceEncoding = def->encoding;
351 UniFile *pufile = infoUnpacker->pufile;
353 pufile = new UniMemFile;
355 // Now we only use the UniFile interface
356 // which is something we could implement for HTTP and/or FTP files
358 if (!pufile->OpenReadOnly(pszFileName))
360 nRetVal = FileLoadResult::FRESULT_ERROR;
361 UniFile::UniError uniErr = pufile->GetLastUniError();
362 if (uniErr.HasError())
364 sError = uniErr.GetError().c_str();
366 InitNew(); // leave crystal editor in valid, empty state
367 goto LoadFromFileExit;
371 if (infoUnpacker->pluginName.length() > 0)
373 // re-detect codepage
374 FileTextEncoding encoding2;
375 int iGuessEncodingType = GetOptionsMgr()->GetInt(OPT_CP_DETECT);
376 GuessCodepageEncoding(pszFileName, &encoding2,
378 pufile->SetUnicoding(encoding2.m_unicoding);
379 pufile->SetCodepage(encoding2.m_codepage);
380 pufile->SetBom(encoding2.m_bom);
386 // If the file is not unicode file, use the codepage we were given to
387 // interpret the 8-bit characters. If the file is unicode file,
388 // determine its type (IsUnicode() does that).
389 if (encoding.m_unicoding == ucr::NONE || !pufile->IsUnicode())
390 pufile->SetCodepage(encoding.m_codepage);
396 UINT next_line_report = 100; // for trace messages
397 UINT next_line_multiple = 5; // for trace messages
398 COleDateTime start = COleDateTime::GetCurrentTime(); // for trace messages
400 // Manually grow line array exponentially
401 UINT arraysize = 500;
402 m_aLines.resize(arraysize);
404 // preveol must be initialized for empty files
409 done = !pufile->ReadString(sline, eol, &lossy);
411 // if last line had no eol, we can quit
412 if (done && preveol.empty())
414 // but if last line had eol, we add an extra (empty) line to buffer
417 if (lineno == arraysize)
419 // For smaller sizes use exponential growth, but for larger
420 // sizes grow by constant ratio. Unlimited exponential growth
421 // easily runs out of memory.
422 if (arraysize < 100 * 1024)
425 arraysize += 100 * 1024;
426 m_aLines.resize(arraysize);
429 sline += eol; // TODO: opportunity for optimization, as CString append is terrible
432 // TODO: Should record lossy status of line
434 AppendLine(lineno, sline.c_str(), sline.length());
439 // send occasional line counts to trace
440 // (at 100, 500, 1000, 5000, etc)
441 if (lineno == next_line_report)
443 __int64 dwBytesRead = pufile->GetPosition();
444 COleDateTimeSpan duration = COleDateTime::GetCurrentTime() - start;
445 if (duration.GetTotalMinutes() > 0)
447 CString strace = GetLineByteTimeReport(lineno, dwBytesRead, start);
448 TRACE(_T("%s\n"), (LPCTSTR)strace);
450 next_line_report = next_line_multiple * next_line_report;
451 next_line_multiple = (next_line_multiple == 5) ? 2 : 5;
457 // Send report of duration to trace (if it took a while)
458 COleDateTime end = COleDateTime::GetCurrentTime();
459 COleDateTimeSpan duration = end - start;
460 if (duration.GetTotalMinutes() > 0)
462 __int64 dwBytesRead = pufile->GetPosition();
463 CString strace = GetLineByteTimeReport(lineno, dwBytesRead, start);
464 TRACE(_T("%s\n"), (LPCTSTR)strace);
468 // fix array size (due to our manual exponential growth
469 m_aLines.resize(lineno);
472 //Try to determine current CRLF mode (most frequent)
473 if (nCrlfStyle == CRLF_STYLE_AUTOMATIC)
475 nCrlfStyle = GetTextFileStyle(pufile->GetTxtStats());
477 ASSERT(nCrlfStyle >= 0 && nCrlfStyle <= 3);
478 SetCRLFMode(nCrlfStyle);
480 // At least one empty line must present
481 // (view does not work for empty buffers)
482 ASSERT(m_aLines.size() > 0);
486 m_bUndoGroup = m_bUndoBeginGroup = FALSE;
487 m_nSyncPosition = m_nUndoPosition = 0;
488 ASSERT(m_aUndoBuf.size() == 0);
489 m_ptLastChange.x = m_ptLastChange.y = -1;
492 // flags don't need initialization because 0 is the default value
494 // Set the return value : OK + info if the file is impure
495 // A pure file is a file where EOL are consistent (all DOS, or all UNIX, or all MAC)
496 // An impure file is a file with several EOL types
497 // WinMerge may display impure files, but the default option is to unify the EOL
498 // We return this info to the caller, so it may display a confirmation box
499 if (IsTextFileStylePure(pufile->GetTxtStats()))
500 nRetVal = FileLoadResult::FRESULT_OK;
502 nRetVal = FileLoadResult::FRESULT_OK_IMPURE;
504 // stash original encoding away
505 m_encoding.m_unicoding = pufile->GetUnicoding();
506 m_encoding.m_bom = pufile->HasBom();
507 m_encoding.m_codepage = pufile->GetCodepage();
509 if (pufile->GetTxtStats().nlosses)
511 FileLoadResult::AddModifier(nRetVal, FileLoadResult::FRESULT_LOSSY);
517 // close the file now to free the handle
521 // delete the file that unpacking may have created
522 if (_tcscmp(pszFileNameInit, pszFileName) != 0)
523 if (!::DeleteFile(pszFileName))
525 LogErrorString(Fmt(_T("DeleteFile(%s) failed: %s"),
526 pszFileName, GetSysError(GetLastError()).c_str()));
533 * @brief Saves file from buffer to disk
535 * @param bTempFile : FALSE if we are saving user files and
536 * TRUE if we are saving workin-temp-files for diff-engine
538 * @return SAVE_DONE or an error code (list in MergeDoc.h)
540 int CDiffTextBuffer::SaveToFile (LPCTSTR pszFileName,
541 BOOL bTempFile, String & sError, PackingInfo * infoUnpacker /*= NULL*/,
542 CRLFSTYLE nCrlfStyle /*= CRLF_STYLE_AUTOMATIC*/,
543 BOOL bClearModifiedFlag /*= TRUE*/,
544 BOOL bForceUTF8 /*= FALSE*/)
546 ASSERT (nCrlfStyle == CRLF_STYLE_AUTOMATIC || nCrlfStyle == CRLF_STYLE_DOS ||
547 nCrlfStyle == CRLF_STYLE_UNIX || nCrlfStyle == CRLF_STYLE_MAC);
550 if (!pszFileName || _tcslen(pszFileName) == 0)
551 return SAVE_FAILED; // No filename, cannot save...
553 if (nCrlfStyle == CRLF_STYLE_AUTOMATIC &&
554 !GetOptionsMgr()->GetBool(OPT_ALLOW_MIXED_EOL) ||
555 infoUnpacker && infoUnpacker->disallowMixedEOL)
557 // get the default nCrlfStyle of the CDiffTextBuffer
558 nCrlfStyle = GetCRLFMode();
559 ASSERT(nCrlfStyle >= 0 && nCrlfStyle <= 3);
562 BOOL bOpenSuccess = TRUE;
563 BOOL bSaveSuccess = FALSE;
566 file.SetUnicoding(bForceUTF8 ? ucr::UTF8 : m_encoding.m_unicoding);
567 file.SetBom(bForceUTF8 ? true : m_encoding.m_bom);
568 file.SetCodepage(bForceUTF8 ? CP_UTF8 : m_encoding.m_codepage);
570 String sIntermediateFilename; // used when !bTempFile
574 bOpenSuccess = !!file.OpenCreate(pszFileName);
578 sIntermediateFilename = env_GetTempFileName(m_strTempPath.c_str(),
580 if (sIntermediateFilename.empty())
581 return SAVE_FAILED; //Nothing to do if even tempfile name fails
582 bOpenSuccess = !!file.OpenCreate(sIntermediateFilename.c_str());
587 UniFile::UniError uniErr = file.GetLastUniError();
588 if (uniErr.HasError())
590 sError = uniErr.GetError().c_str();
592 LogErrorString(Fmt(_T("Opening file %s failed: %s"),
593 pszFileName, sError.c_str()));
595 LogErrorString(Fmt(_T("Opening file %s failed: %s"),
596 sIntermediateFilename.c_str(), sError.c_str()));
603 // line loop : get each real line and write it in the file
605 CString sEol = GetStringEol(nCrlfStyle);
606 const size_t nLineCount = m_aLines.size();
607 for (int line = 0; line < nLineCount; ++line)
609 if (GetLineFlags(line) & LF_GHOST)
612 // get the characters of the line (excluding EOL)
613 if (GetLineLength(line) > 0)
615 int nLineLength = GetLineLength(line);
616 void *pszBuf = sLine.GetBuffer(nLineLength);
617 memcpy(pszBuf, GetLineChars(line), nLineLength * sizeof(TCHAR));
618 sLine.ReleaseBuffer(nLineLength);
624 EscapeControlChars(sLine);
626 int lastRealLine = ApparentLastRealLine();
627 if (line == lastRealLine || lastRealLine == -1 )
629 // last real line is never EOL terminated
630 ASSERT (_tcslen(GetLineEol(line)) == 0);
631 // write the line and exit loop
632 String tmpLine(sLine);
633 file.WriteString(tmpLine);
637 // normal real line : append an EOL
638 if (nCrlfStyle == CRLF_STYLE_AUTOMATIC || nCrlfStyle == CRLF_STYLE_MIXED)
640 // either the EOL of the line (when preserve original EOL chars is on)
641 sLine += GetLineEol(line);
645 // or the default EOL for this file
649 // write this line to the file (codeset or unicode conversions are done there)
650 String tmpLine(sLine);
651 file.WriteString(tmpLine);
657 // If we are saving user files
658 // we need an unpacker/packer, at least a "do nothing" one
659 ASSERT(infoUnpacker != NULL);
660 // repack the file here, overwrite the temporary file we did save in
661 String csTempFileName = sIntermediateFilename;
662 infoUnpacker->subcode = m_unpackerSubcode;
663 if (!FileTransform_Packing(csTempFileName, *infoUnpacker))
665 if (!::DeleteFile(sIntermediateFilename.c_str()))
667 LogErrorString(Fmt(_T("DeleteFile(%s) failed: %s"),
668 sIntermediateFilename.c_str(), GetSysError(GetLastError()).c_str()));
670 // returns now, don't overwrite the original file
671 return SAVE_PACK_FAILED;
673 // the temp filename may have changed during packing
674 if (csTempFileName != sIntermediateFilename)
676 if (!::DeleteFile(sIntermediateFilename.c_str()))
678 LogErrorString(Fmt(_T("DeleteFile(%s) failed: %s"),
679 sIntermediateFilename.c_str(), GetSysError(GetLastError()).c_str()));
681 sIntermediateFilename = csTempFileName;
684 // Write tempfile over original file
685 if (::CopyFile(sIntermediateFilename.c_str(), pszFileName, FALSE))
687 if (!::DeleteFile(sIntermediateFilename.c_str()))
689 LogErrorString(Fmt(_T("DeleteFile(%s) failed: %s"),
690 sIntermediateFilename.c_str(), GetSysError(GetLastError()).c_str()));
692 if (bClearModifiedFlag)
695 m_nSyncPosition = m_nUndoPosition;
699 // remember revision number on save
700 m_dwRevisionNumberOnSave = m_dwCurrentRevisionNumber;
702 // redraw line revision marks
703 UpdateViews (NULL, NULL, UPDATE_FLAGSONLY);
707 sError = GetSysError(GetLastError());
708 LogErrorString(Fmt(_T("CopyFile(%s, %s) failed: %s"),
709 sIntermediateFilename.c_str(), pszFileName, sError.c_str()));
714 if (bClearModifiedFlag)
717 m_nSyncPosition = m_nUndoPosition;
728 /// Replace line (removing any eol, and only including one if in strText)
729 void CDiffTextBuffer::ReplaceFullLines(CDiffTextBuffer& dbuf, CDiffTextBuffer& sbuf, CCrystalTextView * pSource, int nLineBegin, int nLineEnd, int nAction /*=CE_ACTION_UNKNOWN*/)
733 if (nLineBegin != nLineEnd || sbuf.GetLineLength(nLineEnd) > 0)
734 sbuf.GetTextWithoutEmptys(nLineBegin, 0, nLineEnd, sbuf.GetLineLength(nLineEnd), strText);
735 strText += sbuf.GetLineEol(nLineEnd);
737 if (nLineBegin != nLineEnd || dbuf.GetFullLineLength(nLineEnd) > 0)
739 int nLineEndSource = nLineEnd < dbuf.GetLineCount() ? nLineEnd : dbuf.GetLineCount();
740 if (nLineEnd+1 < GetLineCount())
741 dbuf.DeleteText(pSource, nLineBegin, 0, nLineEndSource + 1, 0, nAction);
743 dbuf.DeleteText(pSource, nLineBegin, 0, nLineEndSource, dbuf.GetLineLength(nLineEndSource), nAction);
746 if (int cchText = strText.GetLength())
747 dbuf.InsertText(pSource, nLineBegin, 0, strText, cchText, endl,endc, nAction);
751 bool CDiffTextBuffer::curUndoGroup()
753 return (m_aUndoBuf.size() != 0 && m_aUndoBuf[0].m_dwFlags&UNDO_BEGINGROUP);