From 048bdb7a085147e2293db20222a70dc0f80db93a Mon Sep 17 00:00:00 2001 From: Jochen Tucht Date: Fri, 15 Jul 2005 08:22:13 +0000 Subject: [PATCH] PATCH: [ 1225880 ] Project file parsing based on CMarkdown class --- Src/ProjectFile.cpp | 142 ++++++++++++++++++++------------------------- Src/markdown.cpp | 164 +++++++++++++++++++++++++++++++++++++++++++++++++--- Src/markdown.h | 21 +++++-- 3 files changed, 233 insertions(+), 94 deletions(-) diff --git a/Src/ProjectFile.cpp b/Src/ProjectFile.cpp index 7715e7ad1..bb4926311 100755 --- a/Src/ProjectFile.cpp +++ b/Src/ProjectFile.cpp @@ -24,6 +24,7 @@ #include "stdafx.h" #include "ProjectFile.h" +#include "markdown.h" ProjectFile::ProjectFile() { @@ -31,104 +32,85 @@ ProjectFile::ProjectFile() } /** + * @brief Get message from exception into sError, or else throw it. + */ +static BOOL NTAPI False(CException *e, CString *sError) +{ + if (sError == NULL) + throw e; + TCHAR szError[1024]; + e->GetErrorMessage(szError, 1024); + *sError = szError; + e->Delete(); + return FALSE; +} + +/** * @brief Open given path-file and read data from it to member variables. */ BOOL ProjectFile::Read(LPCTSTR path, CString *sError) { - ASSERT(sError != NULL); - CFile file; - CFileException e; - - if (!file.Open(path, CFile::modeRead, &e)) + try { - TCHAR szError[1024]; - e.GetErrorMessage(szError, 1024); - *sError = szError; - return FALSE; - } - - char buf[4096] = {0}; - TCHAR buf2[4096] = {0}; - TCHAR tmpPath[MAX_PATH] = {0}; - UINT bytesRead = file.Read(buf, 4095); - - USES_CONVERSION; - _tcsncpy(buf2, A2T(buf), 4096); - - if (_tcsstr(buf2, _T(""))) - { - TCHAR *pProject = _tcsstr(buf2, _T("")); - - if (pProject) + CMarkdown::EntityMap entities; + entities.Load(); + CMarkdown::File xmlfile = path; + if (xmlfile.pImage == NULL) { - TCHAR *pPaths = _tcsstr(buf2, _T("")); - TCHAR *pLeft = _tcsstr(buf2, _T("")); - TCHAR *pRight = _tcsstr(buf2, _T("")); - TCHAR *pFilter = _tcsstr(buf2, _T("")); - TCHAR *pSubs = _tcsstr(buf2, _T("")); - - CString subs; - GetVal(pPaths, pLeft, &m_leftFile, _T(""), _T(""), buf2); - GetVal(pPaths, pRight, &m_rightFile, _T(""), _T(""), buf2); - GetVal(pPaths, pFilter, &m_filter, _T(""), _T(""), buf2); - if (GetVal(pPaths, pSubs, &subs, _T(""), _T(""), buf2)) - m_subfolders = _ttoi(subs); + CFileException::ThrowOsError(GetLastError(), path); } + // If encoding is other than UTF-8, assume CP_ACP + CMarkdown::String encoding = CMarkdown(xmlfile).Move("?xml").GetAttribute("encoding"); + UINT codepage = lstrcmpiA(encoding.A, "UTF-8") == 0 ? CP_UTF8 : CP_ACP; + + CMarkdown project = CMarkdown(xmlfile).Move("project").Pop(); + CMarkdown paths = CMarkdown(project).Move("paths").Pop(); + m_leftFile = CMarkdown::String(CMarkdown(paths).Move("left").GetInnerText()->Unicode(codepage)->Resolve(entities)).W; + m_rightFile = CMarkdown::String(CMarkdown(paths).Move("right").GetInnerText()->Unicode(codepage)->Resolve(entities)).W; + m_filter = CMarkdown::String(CMarkdown(paths).Move("filter").GetInnerText()->Unicode(codepage)->Resolve(entities)).W; + sscanf(CMarkdown::String(CMarkdown(paths).Move("subfolders").GetInnerText()).A, "%d", &m_subfolders); + } + catch (CException *e) + { + return False(e, sError); } - - file.Close(); - return TRUE; } /** * @brief Save data from member variables to path-file. - * @note paths are converted to ASCII + * @note paths are converted to UTF-8 */ BOOL ProjectFile::Save(LPCTSTR path, CString *sError) { - UINT flags = CFile::modeCreate | CFile::modeWrite; - CFile file; - CFileException e; - - if (!file.Open(path, flags,&e)) + try { - TCHAR szError[1024]; - e.GetErrorMessage(szError, 1024); - *sError = szError; - - return FALSE; + static const char szFormat[] + ( + "\n" + "\n" + "\t\n" + "\t\t%s\n" + "\t\t%s\n" + "\t\t%s\n" + "\t\t%d\n" + "\t\n" + "\n" + ); + fprintf + ( + CStdioFile(path, CFile::modeCreate|CFile::modeWrite|CFile::typeText).m_pStream, + szFormat, + CMarkdown::String(CMarkdown::HSTR(GetLeft().AllocSysString())->Entities()->Octets(CP_UTF8)).A, + CMarkdown::String(CMarkdown::HSTR(GetRight().AllocSysString())->Entities()->Octets(CP_UTF8)).A, + CMarkdown::String(CMarkdown::HSTR(GetFilter().AllocSysString())->Entities()->Octets(CP_UTF8)).A, + GetSubfolders() ? 1 : 0 + ); + } + catch (CException *e) + { + return False(e, sError); } - - TCHAR buf2[4096] = {0}; - - _tcscpy(buf2,_T("\n\n\t\n\t\t")); - - _tcscat(buf2,_T("")); - _tcscat(buf2,GetLeft()); - _tcscat(buf2,_T("\n\t\t")); - _tcscat(buf2,_T("")); - _tcscat(buf2,GetRight()); - _tcscat(buf2,_T("\n\t\t")); - _tcscat(buf2,_T("")); - _tcscat(buf2,GetFilter()); - _tcscat(buf2,_T("\n\t\t")); - _tcscat(buf2,_T("")); - _tcscat(buf2,GetSubfolders() ? _T("1") : _T("0")); - _tcscat(buf2,_T("\n")); - - _tcscat(buf2,_T("\t\n")); - - // convert the string from unicode to ascii, because Read is expecting ascii - char buf[4096] = {0}; - - USES_CONVERSION; - strncpy(buf, T2A(buf2), 4096); - - - file.Write(buf,strlen(buf)); - file.Close(); - return TRUE; } diff --git a/Src/markdown.cpp b/Src/markdown.cpp index d612b2107..70284a441 100644 --- a/Src/markdown.cpp +++ b/Src/markdown.cpp @@ -65,6 +65,15 @@ DATE: BY: DESCRIPTION: ========== ================== ================================================ 2005/01/15 Jochen Tucht Created 2005/02/26 Jochen Tucht Load iconv.dll through DLLPSTUB +2005/03/20 Jochen Tucht Add IgnoreCase option for ASCII-7 tag/attr names. + Add HtmlUTags option to check for (potentially) + unbalanced HTML tags. Html option is combination + of the above. Using these options imposes + performance penalty, so avoid it if you can. + New flag CMarkdown::FileImage::Handle makes + CMarkdown::FileImage::FileImage() accept a + handle rather than a filename. +2005/06/22 Jochen Tucht New method CMarkdown::_HSTR::Entities(). */ #include "stdafx.h" @@ -256,6 +265,55 @@ CMarkdown::HSTR CMarkdown::_HSTR::Resolve(const CMarkdown::EntityMap &map) return H; } +CMarkdown::HSTR CMarkdown::_HSTR::Entities() +{ + HSTR H = this; + BSTR p, q = H->B; + while (*(p = q)) + { + OLECHAR *value = 0; + switch (*p) + { + case '&': value = L"&"; break; + case '"': value = L"""; break; + case '\'': value = L"'"; break; + case '<' : value = L"<"; break; + case '>' : value = L">"; break; + } + ++q; + if (value) + { + int i = p - H->B; + int j = q - H->B; + int cchValue = lstrlenW(value); + if (int cchGrow = cchValue - 1) + { + BSTR B = H->B; + int b = SysStringLen(B); + size_t cbMove = (b - j) * sizeof(OLECHAR); + if (cchGrow < 0) + { + memmove(q + cchGrow, q, cbMove); + } + if (!SysReAllocStringLen(&B, B, b + cchGrow)) + { + continue; + } + H = (HSTR)B; + p = H->B + i; + q = H->B + j; + if (cchGrow > 0) + { + memmove(q + cchGrow, q, cbMove); + } + } + memcpy(p, value, cchValue * sizeof(OLECHAR)); + q = p + cchValue; + } + } + return H; +} + CMarkdown::HSTR CMarkdown::_HSTR::Trim(const OLECHAR *pszTrimChars) { HSTR H = this; @@ -267,8 +325,57 @@ CMarkdown::HSTR CMarkdown::_HSTR::Trim(const OLECHAR *pszTrimChars) return H; } -CMarkdown::CMarkdown(const char *upper, const char *ahead): -first(0), lower(0), upper(upper), ahead(ahead) +//This is a hopefully complete list of the 36 (?) (potentially) unbalanced HTML +//tags. It is based on tags.c from Tidy library, +//"http://cvs.sourceforge.net/viewcvs.py/*checkout*/tidy/tidy/src/tags.c?rev=1.55". +//It should include all tags from tag_defs[] array which are flagged either +//CM_EMPTY (no closing tag) or CM_OPT (optional closing tag). + +static const char htmlUTags[] +( + "area\0" + "base\0" + "basefont\0" + "body\0" + "br\0" + "col\0" + "colgroup\0" + "dd\0" + "dt\0" + "frame\0" + "head\0" + "hr\0" + "html\0" + "img\0" + "input\0" + "isindex\0" + "li\0" + "link\0" + "meta\0" + "optgroup\0" + "option\0" + "p\0" + "param\0" + "tbody\0" + "td\0" + "tfoot\0" + "th\0" + "thead\0" + "tr\0" + "nextid\0" + /* proprietary elements */ + "bgsound\0" //MICROSOFT + "embed\0" //NETSCAPE + "keygen\0" //NETSCAPE + "marquee\0" //MICROSOFT + "spacer\0" //NETSCAPE + "wbr\0" //PROPRIETARY +); + +CMarkdown::CMarkdown(const char *upper, const char *ahead, unsigned flags): +first(0), lower(0), upper(upper), ahead(ahead), +memcmp(flags & IgnoreCase ? ::memicmp : ::memcmp), +utags(flags & HtmlUTags ? htmlUTags : NULL) { if (CMarkdown::ahead > CMarkdown::upper) { @@ -285,6 +392,25 @@ CMarkdown::operator bool() ); } +int CMarkdown::FindTag(const char *tags, const char *markup) +{ + while (int len = lstrlenA(tags)) + { + unsigned char c; + if + ( + ahead - markup > len + && memcmp(markup, tags, len) == 0 + && (isspace(c = markup[len]) || c == '[' || c == '>' || c == '"' || c == '\'' || c == '=') + ) + { + return len; + } + tags += len + 1; + } + return 0; +} + void CMarkdown::Scan() { if (first == upper && *this) @@ -355,7 +481,7 @@ void CMarkdown::Scan() } break; case '>': - if (upper[-2] == '/') + if (upper[-2] == '/' || utags && FindTag(utags, first + 1)) --depth; break; case '<': @@ -369,9 +495,21 @@ void CMarkdown::Scan() CMarkdown &CMarkdown::Move() { Scan(); - while (*this && *upper != '<') + for (;;) { - ++upper; + while (*this && *upper != '<') + { + ++upper; + } + if (utags && MAKEWORD(upper[0], upper[1]) == MAKEWORD('<', '/')) + { + if (int utlen = FindTag(utags, upper + 2)) + { + upper += 2 + utlen; + continue; + } + } + break; } first = lower = upper; return *this; @@ -434,7 +572,7 @@ bool CMarkdown::Pull() { ++lower; } - if (lower[-1] != '/' && lower[-1] != '?') + if (lower[-1] != '/' && lower[-1] != '?' && !(utags && FindTag(utags, first + 1))) { upper = lower; return true; @@ -480,7 +618,7 @@ CMarkdown::HSTR CMarkdown::GetTagName() } else { - while (q < ahead && !isspace(c = *q) && c != '[' && c != '>' && c != '"' && c != '\'' && c != '=' ) + while (q < ahead && !isspace(c = *q) && c != '[' && c != '>' && c != '"' && c != '\'' && c != '=') { ++q; } @@ -712,7 +850,12 @@ LPVOID NTAPI CMarkdown::FileImage::MapFile(HANDLE hFile, DWORD dwSize) CMarkdown::FileImage::FileImage(LPCTSTR path, DWORD trunc, int flags): pImage(NULL) { - HANDLE hFile = CreateFile(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, 0); + HANDLE hFile + ( + flags & Handle + ? HANDLE(path) + : CreateFile(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, 0) + ); if (hFile != INVALID_HANDLE_VALUE) { cbImage = GetFileSize(hFile, 0); @@ -756,7 +899,10 @@ pImage(NULL) } } } - CloseHandle(hFile); + if (!(flags & Handle)) + { + CloseHandle(hFile); + } } if (pImage == NULL) { diff --git a/Src/markdown.h b/Src/markdown.h index dd1fe4c9b..fad9bb0a1 100644 --- a/Src/markdown.h +++ b/Src/markdown.h @@ -122,6 +122,7 @@ public: // Convert(converter) converts string using an ICONV descriptor _HSTR *Convert(const Converter &); _HSTR *Resolve(const EntityMap &); + _HSTR *Entities(); _HSTR *Trim(const OLECHAR *); } *HSTR; union String @@ -182,7 +183,13 @@ public: const char *lower; // beginning of enclosed text (valid after Move) const char *upper; // end of enclosed text (initially beginning of file) const char *ahead; // last char of file - CMarkdown(const char *upper, const char *ahead); + enum + { + IgnoreCase = 0x01, + HtmlUTags = 0x02, // check for unbalanced tags + Html = IgnoreCase|HtmlUTags // shortcut + }; + CMarkdown(const char *upper, const char *ahead, unsigned flags = 0); operator bool(); // is node ahead? void Scan(); // find closing tag CMarkdown &Move(); // move to next node @@ -196,6 +203,9 @@ public: HSTR GetOuterText(); // text including enclosing tags HSTR GetAttribute(const char *, const void * = 0); // random or enumerate private: + int (__cdecl *const memcmp)(const void *, const void *, size_t); + const char *const utags; + int FindTag(const char *, const char *); class Token; }; @@ -207,7 +217,8 @@ public: LPVOID pImage; enum { - Octets = 1 + Octets = 0x10, + Handle = 0x20 }; FileImage(LPCTSTR, DWORD trunc = 0, int flags = 0); ~FileImage(); @@ -218,9 +229,9 @@ class CMarkdown::File : public CMarkdown::FileImage, public CMarkdown { // Construct CMarkdown object from file. public: - File(LPCTSTR path, DWORD trunc = 0): - CMarkdown::FileImage(path, trunc, Octets), - CMarkdown((const char *)pImage, (const char *)pImage + cbImage) + File(LPCTSTR path, DWORD trunc = 0, unsigned flags = Octets): + CMarkdown::FileImage(path, trunc, flags), + CMarkdown((const char *)pImage, (const char *)pImage + cbImage, flags) { } }; -- 2.11.0