OSDN Git Service

PATCH: [ 1225880 ] Project file parsing based on CMarkdown class
authorJochen Tucht <jtuc@users.sourceforge.net>
Fri, 15 Jul 2005 08:22:13 +0000 (08:22 +0000)
committerJochen Tucht <jtuc@users.sourceforge.net>
Fri, 15 Jul 2005 08:22:13 +0000 (08:22 +0000)
Src/ProjectFile.cpp
Src/markdown.cpp
Src/markdown.h

index 7715e7a..bb49263 100755 (executable)
@@ -24,6 +24,7 @@
 
 #include "stdafx.h"
 #include "ProjectFile.h"
+#include "markdown.h"
 
 ProjectFile::ProjectFile()
 {
@@ -31,104 +32,85 @@ ProjectFile::ProjectFile()
 }
 
 /** 
+ * @brief Get message from exception into sError, or else throw it.
+ */
+static BOOL NTAPI False(CException *e, CString *sError)
+{
+       if (sError == NULL)
+               throw e;
+       TCHAR szError[1024];
+       e->GetErrorMessage(szError, 1024);
+       *sError = szError;
+       e->Delete();
+       return FALSE;
+}
+
+/** 
  * @brief Open given path-file and read data from it to member variables.
  */
 BOOL ProjectFile::Read(LPCTSTR path, CString *sError)
 {
-       ASSERT(sError != NULL);
-       CFile file;
-       CFileException e;
-
-       if (!file.Open(path, CFile::modeRead, &e))
+       try
        {
-               TCHAR szError[1024];
-               e.GetErrorMessage(szError, 1024);
-               *sError = szError;
-               return FALSE;
-       }
-
-       char buf[4096] = {0};
-       TCHAR buf2[4096] = {0};
-       TCHAR tmpPath[MAX_PATH] = {0};
-       UINT bytesRead = file.Read(buf, 4095);
-
-       USES_CONVERSION;
-       _tcsncpy(buf2, A2T(buf), 4096);
-
-       if (_tcsstr(buf2, _T("<?xml")) && _tcsstr(buf2, _T("?>")))
-       {
-               TCHAR *pProject = _tcsstr(buf2, _T("<project>"));
-               
-               if (pProject)
+               CMarkdown::EntityMap entities;
+               entities.Load();
+               CMarkdown::File xmlfile = path;
+               if (xmlfile.pImage == NULL)
                {
-                       TCHAR *pPaths = _tcsstr(buf2, _T("<paths>"));
-                       TCHAR *pLeft = _tcsstr(buf2, _T("<left>"));
-                       TCHAR *pRight = _tcsstr(buf2, _T("<right>"));
-                       TCHAR *pFilter = _tcsstr(buf2, _T("<filter>"));
-                       TCHAR *pSubs = _tcsstr(buf2, _T("<subfolders>"));
-
-                       CString subs;
-                       GetVal(pPaths, pLeft, &m_leftFile, _T("<left>"), _T("</left>"), buf2);
-                       GetVal(pPaths, pRight, &m_rightFile, _T("<right>"), _T("</right>"), buf2);
-                       GetVal(pPaths, pFilter, &m_filter, _T("<filter>"), _T("</filter>"), buf2);
-                       if (GetVal(pPaths, pSubs, &subs, _T("<subfolders>"), _T("</subfolders>"), buf2))
-                               m_subfolders = _ttoi(subs);
+                       CFileException::ThrowOsError(GetLastError(), path);
                }
+               // If encoding is other than UTF-8, assume CP_ACP
+               CMarkdown::String encoding = CMarkdown(xmlfile).Move("?xml").GetAttribute("encoding");
+               UINT codepage = lstrcmpiA(encoding.A, "UTF-8") == 0 ? CP_UTF8 : CP_ACP;
+
+               CMarkdown project = CMarkdown(xmlfile).Move("project").Pop();
+               CMarkdown paths = CMarkdown(project).Move("paths").Pop();
+               m_leftFile = CMarkdown::String(CMarkdown(paths).Move("left").GetInnerText()->Unicode(codepage)->Resolve(entities)).W;
+               m_rightFile = CMarkdown::String(CMarkdown(paths).Move("right").GetInnerText()->Unicode(codepage)->Resolve(entities)).W;
+               m_filter = CMarkdown::String(CMarkdown(paths).Move("filter").GetInnerText()->Unicode(codepage)->Resolve(entities)).W;
+               sscanf(CMarkdown::String(CMarkdown(paths).Move("subfolders").GetInnerText()).A, "%d", &m_subfolders);
+       }
+       catch (CException *e)
+       {
+               return False(e, sError);
        }
-
-       file.Close();
-
        return TRUE;
 }
 
 /** 
  * @brief Save data from member variables to path-file.
- * @note paths are converted to ASCII
+ * @note paths are converted to UTF-8
  */
 BOOL ProjectFile::Save(LPCTSTR path, CString *sError)
 {
-       UINT flags = CFile::modeCreate | CFile::modeWrite;
-       CFile file;
-       CFileException e;
-
-       if (!file.Open(path, flags,&e))
+       try
        {
-               TCHAR szError[1024];
-               e.GetErrorMessage(szError, 1024);
-               *sError = szError;
-               
-               return FALSE;
+               static const char szFormat[]
+               (
+                       "<?xml version='1.0' encoding='UTF-8'?>\n"
+                       "<project>\n"
+                       "\t<paths>\n"
+                       "\t\t<left>%s</left>\n"
+                       "\t\t<right>%s</right>\n"
+                       "\t\t<filter>%s</filter>\n"
+                       "\t\t<subfolders>%d</subfolders>\n"
+                       "\t</paths>\n"
+                       "</project>\n"
+               );
+               fprintf
+               (
+                       CStdioFile(path, CFile::modeCreate|CFile::modeWrite|CFile::typeText).m_pStream,
+                       szFormat,
+                       CMarkdown::String(CMarkdown::HSTR(GetLeft().AllocSysString())->Entities()->Octets(CP_UTF8)).A,
+                       CMarkdown::String(CMarkdown::HSTR(GetRight().AllocSysString())->Entities()->Octets(CP_UTF8)).A,
+                       CMarkdown::String(CMarkdown::HSTR(GetFilter().AllocSysString())->Entities()->Octets(CP_UTF8)).A,
+                       GetSubfolders() ? 1 : 0
+               );
+       }
+       catch (CException *e)
+       {
+               return False(e, sError);
        }
-
-       TCHAR buf2[4096] = {0};
-       
-       _tcscpy(buf2,_T("<?xml version=\"1.0\"?>\n<project>\n\t<paths>\n\t\t"));
-       
-       _tcscat(buf2,_T("<left>"));
-       _tcscat(buf2,GetLeft());
-       _tcscat(buf2,_T("</left>\n\t\t"));
-       _tcscat(buf2,_T("<right>"));
-       _tcscat(buf2,GetRight());
-       _tcscat(buf2,_T("</right>\n\t\t"));
-       _tcscat(buf2,_T("<filter>"));
-       _tcscat(buf2,GetFilter());
-       _tcscat(buf2,_T("</filter>\n\t\t"));
-       _tcscat(buf2,_T("<subfolders>"));
-       _tcscat(buf2,GetSubfolders() ? _T("1") : _T("0"));
-       _tcscat(buf2,_T("</subfolders>\n"));
-       
-       _tcscat(buf2,_T("\t</paths>\n</project>"));
-
-       // convert the string from unicode to ascii, because Read is expecting ascii
-       char buf[4096] = {0};
-       
-       USES_CONVERSION;
-       strncpy(buf, T2A(buf2), 4096);
-
-
-       file.Write(buf,strlen(buf));
-       file.Close();
-
        return TRUE;
 }
 
index d612b21..70284a4 100644 (file)
@@ -65,6 +65,15 @@ DATE:                BY:                                     DESCRIPTION:
 ==========     ==================      ================================================
 2005/01/15     Jochen Tucht            Created
 2005/02/26     Jochen Tucht            Load iconv.dll through DLLPSTUB
+2005/03/20     Jochen Tucht            Add IgnoreCase option for ASCII-7 tag/attr names.
+                                                               Add HtmlUTags option to check for (potentially)
+                                                               unbalanced HTML tags. Html option is combination
+                                                               of the above. Using these options imposes
+                                                               performance penalty, so avoid it if you can.
+                                                               New flag CMarkdown::FileImage::Handle makes
+                                                               CMarkdown::FileImage::FileImage() accept a
+                                                               handle rather than a filename.
+2005/06/22     Jochen Tucht            New method CMarkdown::_HSTR::Entities().
 */
 
 #include "stdafx.h"
@@ -256,6 +265,55 @@ CMarkdown::HSTR CMarkdown::_HSTR::Resolve(const CMarkdown::EntityMap &map)
        return H;
 }
 
+CMarkdown::HSTR CMarkdown::_HSTR::Entities()
+{
+       HSTR H = this;
+       BSTR p, q = H->B;
+       while (*(p = q))
+       {
+               OLECHAR *value = 0;
+               switch (*p)
+               {
+               case '&': value = L"&amp;"; break;
+               case '"': value = L"&quot;"; break;
+               case '\'': value = L"&apos;"; break;
+               case '<' : value = L"&lt;"; break;
+               case '>' : value = L"&gt;"; break;
+               }
+               ++q;
+               if (value)
+               {
+                       int i = p - H->B;
+                       int j = q - H->B;
+                       int cchValue = lstrlenW(value);
+                       if (int cchGrow = cchValue - 1)
+                       {
+                               BSTR B = H->B;
+                               int b = SysStringLen(B);
+                               size_t cbMove = (b - j) * sizeof(OLECHAR);
+                               if (cchGrow < 0)
+                               {
+                                       memmove(q + cchGrow, q, cbMove);
+                               }
+                               if (!SysReAllocStringLen(&B, B, b + cchGrow))
+                               {
+                                       continue;
+                               }
+                               H = (HSTR)B;
+                               p = H->B + i;
+                               q = H->B + j;
+                               if (cchGrow > 0)
+                               {
+                                       memmove(q + cchGrow, q, cbMove);
+                               }
+                       }
+                       memcpy(p, value, cchValue * sizeof(OLECHAR));
+                       q = p + cchValue;
+               }
+       }
+       return H;
+}
+
 CMarkdown::HSTR CMarkdown::_HSTR::Trim(const OLECHAR *pszTrimChars)
 {
        HSTR H = this;
@@ -267,8 +325,57 @@ CMarkdown::HSTR CMarkdown::_HSTR::Trim(const OLECHAR *pszTrimChars)
        return H;
 }
 
-CMarkdown::CMarkdown(const char *upper, const char *ahead):
-first(0), lower(0), upper(upper), ahead(ahead)
+//This is a hopefully complete list of the 36 (?) (potentially) unbalanced HTML
+//tags. It is based on tags.c from Tidy library,
+//"http://cvs.sourceforge.net/viewcvs.py/*checkout*/tidy/tidy/src/tags.c?rev=1.55".
+//It should include all tags from tag_defs[] array which are flagged either
+//CM_EMPTY (no closing tag) or CM_OPT (optional closing tag).
+
+static const char htmlUTags[]
+(
+       "area\0"
+       "base\0"
+       "basefont\0"
+       "body\0"
+       "br\0"
+       "col\0"
+       "colgroup\0"
+       "dd\0"
+       "dt\0"
+       "frame\0"
+       "head\0"
+       "hr\0"
+       "html\0"
+       "img\0"
+       "input\0"
+       "isindex\0"
+       "li\0"
+       "link\0"
+       "meta\0"
+       "optgroup\0"
+       "option\0"
+       "p\0"
+       "param\0"
+       "tbody\0"
+       "td\0"
+       "tfoot\0"
+       "th\0"
+       "thead\0"
+       "tr\0"
+       "nextid\0"
+       /* proprietary elements */
+       "bgsound\0"     //MICROSOFT
+       "embed\0"       //NETSCAPE
+       "keygen\0"      //NETSCAPE
+       "marquee\0"     //MICROSOFT
+       "spacer\0"      //NETSCAPE
+       "wbr\0"         //PROPRIETARY
+);
+
+CMarkdown::CMarkdown(const char *upper, const char *ahead, unsigned flags):
+first(0), lower(0), upper(upper), ahead(ahead),
+memcmp(flags & IgnoreCase ? ::memicmp : ::memcmp),
+utags(flags & HtmlUTags ? htmlUTags : NULL)
 {
        if (CMarkdown::ahead > CMarkdown::upper)
        {
@@ -285,6 +392,25 @@ CMarkdown::operator bool()
        );
 }
 
+int CMarkdown::FindTag(const char *tags, const char *markup)
+{
+       while (int len = lstrlenA(tags))
+       {
+               unsigned char c;
+               if
+               (
+                       ahead - markup > len
+               &&      memcmp(markup, tags, len) == 0
+               &&      (isspace(c = markup[len]) || c == '[' || c == '>' || c == '"' || c == '\'' || c == '=')
+               )
+               {
+                       return len;
+               }
+               tags += len + 1;
+       }
+       return 0;
+}
+
 void CMarkdown::Scan()
 {
        if (first == upper && *this)
@@ -355,7 +481,7 @@ void CMarkdown::Scan()
                                }
                                break;
                        case '>':
-                               if (upper[-2] == '/')
+                               if (upper[-2] == '/' || utags && FindTag(utags, first + 1))
                                        --depth;
                                break;
                        case '<':
@@ -369,9 +495,21 @@ void CMarkdown::Scan()
 CMarkdown &CMarkdown::Move()
 {
        Scan();
-       while (*this && *upper != '<')
+       for (;;)
        {
-               ++upper;
+               while (*this && *upper != '<')
+               {
+                       ++upper;
+               }
+               if (utags && MAKEWORD(upper[0], upper[1]) == MAKEWORD('<', '/'))
+               {
+                       if (int utlen = FindTag(utags, upper + 2))
+                       {
+                               upper += 2 + utlen;
+                               continue;
+                       }
+               }
+               break;
        }
        first = lower = upper;
        return *this;
@@ -434,7 +572,7 @@ bool CMarkdown::Pull()
                {
                        ++lower;
                }
-               if (lower[-1] != '/' && lower[-1] != '?')
+               if (lower[-1] != '/' && lower[-1] != '?' && !(utags && FindTag(utags, first + 1)))
                {
                        upper = lower;
                        return true;
@@ -480,7 +618,7 @@ CMarkdown::HSTR CMarkdown::GetTagName()
                }
                else
                {
-                       while (q < ahead && !isspace(c = *q) && c != '[' && c != '>' && c != '"' && c != '\'' && c != '=' )
+                       while (q < ahead && !isspace(c = *q) && c != '[' && c != '>' && c != '"' && c != '\'' && c != '=')
                        {
                                ++q;
                        }
@@ -712,7 +850,12 @@ LPVOID NTAPI CMarkdown::FileImage::MapFile(HANDLE hFile, DWORD dwSize)
 CMarkdown::FileImage::FileImage(LPCTSTR path, DWORD trunc, int flags):
 pImage(NULL)
 {
-       HANDLE hFile = CreateFile(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, 0);
+       HANDLE hFile
+       (
+               flags & Handle
+       ?       HANDLE(path)
+       :       CreateFile(path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, 0)
+       );
        if (hFile != INVALID_HANDLE_VALUE)
        {
                cbImage = GetFileSize(hFile, 0);
@@ -756,7 +899,10 @@ pImage(NULL)
                                }
                        }
                }
-               CloseHandle(hFile);
+               if (!(flags & Handle))
+               {
+                       CloseHandle(hFile);
+               }
        }
        if (pImage == NULL)
        {
index dd1fe4c..fad9bb0 100644 (file)
@@ -122,6 +122,7 @@ public:
                // Convert(converter) converts string using an ICONV descriptor
                _HSTR *Convert(const Converter &);
                _HSTR *Resolve(const EntityMap &);
+               _HSTR *Entities();
                _HSTR *Trim(const OLECHAR *);
        } *HSTR;
        union String
@@ -182,7 +183,13 @@ public:
        const char *lower;      // beginning of enclosed text (valid after Move)
        const char *upper;      // end of enclosed text (initially beginning of file)
        const char *ahead;      // last char of file
-       CMarkdown(const char *upper, const char *ahead);
+       enum
+       {
+               IgnoreCase = 0x01,
+               HtmlUTags = 0x02,                       // check for unbalanced tags
+               Html = IgnoreCase|HtmlUTags     // shortcut
+       };
+       CMarkdown(const char *upper, const char *ahead, unsigned flags = 0);
        operator bool();                                // is node ahead?
        void Scan();                                    // find closing tag
        CMarkdown &Move();                              // move to next node
@@ -196,6 +203,9 @@ public:
        HSTR GetOuterText();                    // text including enclosing tags
        HSTR GetAttribute(const char *, const void * = 0); // random or enumerate
 private:
+       int (__cdecl *const memcmp)(const void *, const void *, size_t);
+       const char *const utags;
+       int FindTag(const char *, const char *);
        class Token;
 };
 
@@ -207,7 +217,8 @@ public:
        LPVOID pImage;
        enum
        {
-               Octets = 1
+               Octets = 0x10,
+               Handle = 0x20
        };
        FileImage(LPCTSTR, DWORD trunc = 0, int flags = 0);
        ~FileImage();
@@ -218,9 +229,9 @@ class CMarkdown::File : public CMarkdown::FileImage, public CMarkdown
 {
 //     Construct CMarkdown object from file.
 public:
-       File(LPCTSTR path, DWORD trunc = 0):
-       CMarkdown::FileImage(path, trunc, Octets),
-       CMarkdown((const char *)pImage, (const char *)pImage + cbImage)
+       File(LPCTSTR path, DWORD trunc = 0, unsigned flags = Octets):
+       CMarkdown::FileImage(path, trunc, flags),
+       CMarkdown((const char *)pImage, (const char *)pImage + cbImage, flags)
        {
        }
 };