OSDN Git Service

Fix the issue where the Apache Tika plugin becomes enabled again when reopening the...
[winmerge-jp/winmerge-jp.git] / Src / markdown.cpp
1 /* markdown.cpp: Pull-parse XML sources
2  * Copyright (c) 2005 Jochen Tucht
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  *
18  * OS/Libs:     Win32/STL/shlwapi/iconv
19  *                      iconv.dll is loaded on demand, and is not required as long as
20  *                      program doesn't call iconv based methods.
21  *
22  * Remarks:     Pull-parsing is a very simple way to parse XML. It does not require
23  *                      callback functions, and it does not build object trees in memory. It
24  *                      just travels through plain source.
25  *
26  *                      This library reads source text from memory. It can safely operate
27  *                      on memory mapped files, as it does not require text to be zero-
28  *                      terminated. It will also read most of the usual meta stuff (<? ?>,
29  *                      <!-- -->, <![ []]>, and DTD tags), but applying meta information is
30  *                      left to the caller. Thus, the library does not exactly implement an
31  *                      XML parser. It just helps reading XML.
32  *                      
33  *                      This library is not modeled after an existing pull parsing API,
34  *                      so don't expect to find the same methods you've seen elsewhere.
35  *                      In particular, this library does not follow XmlPull's event model,
36  *                      but attempts to be somewhat closer to a tree-based API.
37  *                      For simplicity, this library does not perform any validation, nor
38  *                      provide error handling other than returning empty text in case it
39  *                      fails to retrieve something.
40  *
41  *                      The name of the core class, CMarkdown, actually was going to be
42  *                      CMarkup when I came across another XML tool with same name on
43  *                      CodeProject. Like TinyXml and XMLite, and unlike CMarkdown, CMarkup
44  *                      follows DOM-like approach, suffering from considerable memory
45  *                      footprint. Anyway, class name CMarkdown somewhat reflects the nature
46  *                      of pull-parsing, pulling down the leaves of an XML tree so programs
47  *                      can reach them from a flat loop, rather than climb up the tree and
48  *                      push the leaves to some callback function, or preprocess the entire
49  *                      tree in some way before allowing programs to retrieve content.
50  *
51  *                      Recommended reading:
52  *
53  *                      www.sosnoski.com/articles/parsing1.html (SAX2 Basics)
54  *                      www.sosnoski.com/articles/parsing2.html (SAX vs Pull)
55  *                      www.sosnoski.com/articles/parsing3.html (Performance)
56  *                      www.xml.com/pub/a/2002/08/14/xmlpull.html (XMLPULL API)
57  *                      www.xml.com/pub/a/2002/09/25/xmlpull.html (response to above)
58  *                      www.stylusstudio.com/xmldev/200205/post61120.html (discussion)
59  *
60  *                      There are lots of related articles on the web, though.
61
62 Please mind 2. b) of the GNU LGPL terms, and log your changes below.
63
64 DATE:           BY:                                     DESCRIPTION:
65 ==========      ==================      ================================================
66 2005-01-15      Jochen Tucht            Created
67 2005-02-26      Jochen Tucht            Load iconv.dll through DLLPSTUB
68 2005-03-20      Jochen Tucht            Add IgnoreCase option for ASCII-7 tag/attr names.
69                                                                 Add HtmlUTags option to check for (potentially)
70                                                                 unbalanced HTML tags. Html option is combination
71                                                                 of the above. Using these options imposes
72                                                                 performance penalty, so avoid it if you can.
73                                                                 New flag CMarkdown::FileImage::Handle makes
74                                                                 CMarkdown::FileImage::FileImage() accept a
75                                                                 handle rather than a filename.
76 2005-06-22      Jochen Tucht            New method CMarkdown::_HSTR::Entities().
77 2005-07-29      Jochen Tucht            ByteOrder detection for 16/32 bit encodings
78 2005-09-09      Jochen Tucht            Patch by Takashi Sawanaka fixes crash due to
79                                                                 reading beyond end of text with HtmlUTags option
80 2005-12-04      Jochen Tucht            Fix UTF-8 signature detection
81                                                                 Strip bogus trailing slash in name of empty tag
82 2008-08-27      Jochen Neubeck          Replace MFC CMap by STL std::map
83 */
84
85 #include "pch.h"
86 #include "markdown.h"
87 #include <cstring>
88 #include <cstdint>
89 #include <Poco/ByteOrder.h>
90 #include <Poco/NumberParser.h>
91 #include <Poco/SharedMemory.h>
92 #include "unicoder.h"
93 #include "TFile.h"
94
95 #ifndef MAKEWORD
96 #define MAKEWORD(a, b)      ((unsigned short)(((unsigned char)((unsigned)(a) & 0xff)) | ((unsigned short)((unsigned char)((unsigned)(b) & 0xff))) << 8))
97 #define MAKELONG(a, b)      ((unsigned)(((unsigned short)((unsigned)(a) & 0xffff)) | ((unsigned)((unsigned short)((unsigned)(b) & 0xffff))) << 16))
98 #define LOWORD(l)           ((unsigned short)((unsigned)(l) & 0xffff))
99 #define HIWORD(l)           ((unsigned short)((unsigned)(l) >> 16))
100 #define LOBYTE(w)           ((unsigned char)((unsigned)(w) & 0xff))
101 #define HIBYTE(w)           ((unsigned char)((unsigned)(w) >> 8))
102 #endif
103
104 using Poco::ByteOrder;
105 using Poco::NumberParser;
106 using Poco::SharedMemory;
107 using Poco::File;
108
109 void CMarkdown::Load(EntityMap &entityMap)
110 {
111         entityMap["amp"] = "&";
112         entityMap["quot"] = "\"";
113         entityMap["apos"] = "'";
114         entityMap["lt"] = "<";
115         entityMap["gt"] = ">";
116 }
117
118 void CMarkdown::Load(EntityMap &entityMap, int dummy)
119 {
120         while (Move("!ENTITY"))
121         {
122                 std::string hstrValue;
123                 std::string hstrKey = GetAttribute(0, &hstrValue);
124                 if (!hstrKey.empty())
125                 {
126                         entityMap[hstrKey] = hstrValue;
127                 }
128         }
129 }
130
131 std::string CMarkdown::Resolve(const EntityMap &map, const std::string& v)
132 {
133         std::string ret(v);
134         char *p, *q = &ret[0];
135         while ((p = strchr(q, '&')) != nullptr && (q = strchr(p, ';')) != nullptr)
136         {
137                 *q = '\0';
138                 char *key = p + 1;
139                 std::string value;
140                 if (*key == '#')
141                 {
142                         unsigned ordinal = '?';
143                         *key = '0';
144                         if (NumberParser::tryParseHex(key, ordinal))
145                                 value.assign(1, static_cast<std::string::value_type>(ordinal));
146                         *key = '#';
147                 }
148                 else
149                 {
150                         EntityMap::const_iterator p1 = map.find(key);
151                         if (p1 != map.end())
152                                 value = p1->second;
153                 }
154                 *q = ';';
155                 ++q;
156                 size_t cchValue = value.length();
157                 if (cchValue != 0)
158                 {
159                         size_t i = p - &ret[0];
160                         size_t j = q - &ret[0];
161                         size_t cchKey = q - p;
162                         if (cchValue != cchKey)
163                         {
164                                 size_t b = ret.length();
165                                 size_t cbMove = (b - j) * sizeof(char);
166                                 if (cchKey > cchValue)
167                                 {
168                                         size_t cchGrow = cchKey - cchValue;
169                                         memmove(q - cchGrow, q, cbMove);
170                                         ret.resize(b - cchGrow);
171                                 }
172                                 p = &ret[0] + i;
173                                 q = &ret[0] + j;
174                                 if (cchValue > cchKey)
175                                 {
176                                         size_t cchGrow = cchValue - cchKey;
177                                         ret.resize(b + cchGrow);
178                                         memmove(q + cchGrow, q, cbMove);
179                                 }
180                         }
181                         memcpy(p, value.c_str(), cchValue * sizeof(char));
182                         q = p + cchValue;
183                 }
184         }
185         return ret;
186 }
187
188 std::string CMarkdown::Entities(const std::string& v)
189 {
190         std::string ret(v);
191         char *p, *q = &ret[0];
192         while (*(p = q))
193         {
194                 char *value = nullptr;
195                 switch (*p)
196                 {
197                 case '&': value = "&amp;"; break;
198                 case '"': value = "&quot;"; break;
199                 case '\'': value = "&apos;"; break;
200                 case '<' : value = "&lt;"; break;
201                 case '>' : value = "&gt;"; break;
202                 }
203                 ++q;
204                 if (value != nullptr)
205                 {
206                         size_t cchValue = strlen(value);
207                         if (cchValue > 1)
208                         {
209                                 ptrdiff_t i = p - &ret[0];
210                                 ptrdiff_t j = q - &ret[0];
211                                 size_t b = ret.length();
212                                 ret.resize(b + cchValue - 1);
213                                 p = &ret[0] + i;
214                                 q = &ret[0] + j;
215                                 memmove(q + cchValue - 1, q, (b - j) * sizeof(char));
216                         }
217                         memcpy(p, value, cchValue * sizeof(char));
218                         q = p + cchValue;
219                 }
220         }
221         return ret;
222 }
223
224 //This is a hopefully complete list of the 36 (?) (potentially) unbalanced HTML
225 //tags. It is based on tags.c from Tidy library,
226 //"http://cvs.sourceforge.net/viewcvs.py/*checkout*/tidy/tidy/src/tags.c?rev=1.55".
227 //It should include all tags from tag_defs[] array which are flagged either
228 //CM_EMPTY (no closing tag) or CM_OPT (optional closing tag).
229
230 static const char htmlUTags[] = 
231 (
232         "area\0"
233         "base\0"
234         "basefont\0"
235         "body\0"
236         "br\0"
237         "col\0"
238         "colgroup\0"
239         "dd\0"
240         "dt\0"
241         "frame\0"
242         "head\0"
243         "hr\0"
244         "html\0"
245         "img\0"
246         "input\0"
247         "isindex\0"
248         "li\0"
249         "link\0"
250         "meta\0"
251         "optgroup\0"
252         "option\0"
253         "p\0"
254         "param\0"
255         "tbody\0"
256         "td\0"
257         "tfoot\0"
258         "th\0"
259         "thead\0"
260         "tr\0"
261         "nextid\0"
262         /* proprietary elements */
263         "bgsound\0"     //MICROSOFT
264         "embed\0"       //NETSCAPE
265         "keygen\0"      //NETSCAPE
266         "marquee\0"     //MICROSOFT
267         "spacer\0"      //NETSCAPE
268         "wbr\0"         //PROPRIETARY
269 );
270
271 CMarkdown::CMarkdown(const char *upper, const char *ahead, unsigned flags):
272 first(nullptr), lower(nullptr), upper(upper), ahead(ahead),
273 memcmp(flags & IgnoreCase ? ::_memicmp : ::memcmp),
274 utags(flags & HtmlUTags ? htmlUTags : nullptr)
275 {
276         if (CMarkdown::ahead > CMarkdown::upper)
277         {
278                 --CMarkdown::ahead;
279         }
280 }
281
282 CMarkdown::operator bool()
283 {
284         return upper < ahead &&
285         (
286                 MAKEWORD(upper[0], upper[1]) != MAKEWORD('<', '/')
287         &&      MAKEWORD(upper[0], upper[1]) != MAKEWORD(']', '>')
288         );
289 }
290
291 size_t CMarkdown::FindTag(const char *tags, const char *markup) const
292 {
293         while (ptrdiff_t len = strlen(tags))
294         {
295                 unsigned char c;
296                 if
297                 (
298                         (ahead - markup) > len
299                 &&      memcmp(markup, tags, len) == 0
300                 &&      (isspace(c = markup[len]) || c == '[' || c == '>' || c == '"' || c == '\'' || c == '=')
301                 )
302                 {
303                         return len;
304                 }
305                 tags += len + 1;
306         }
307         return 0;
308 }
309
310 void CMarkdown::Scan()
311 {
312         if (first == upper && *this)
313         {
314                 int depth = 0;
315                 do
316                 {
317                         switch (*upper++)
318                         {
319                         case '/':
320                                 if (upper[-2] == '<')
321                                         depth -= 2;
322                                 break;
323                         case '?':
324                                 if (upper[-2] == '<')
325                                 {
326                                         do
327                                         {
328                                         } while (upper <= ahead && (*upper++ != '>' || upper[-2] != '?'));
329                                         --depth;
330                                 }
331                                 break;
332                         case '!':
333                                 if (upper[-2] == '<' && upper <= ahead)
334                                 {
335                                         if (*upper == '-')
336                                         {
337                                                 do
338                                                 {
339                                                 } while (upper <= ahead && (*upper++ != '>' || upper[-2] != '-' || upper[-3] != '-'));
340                                                 --depth;
341                                         }
342                                         else if (*upper == '[')
343                                         {
344                                                 do
345                                                 {
346                                                 } while (upper <= ahead && (*upper++ != '>' || upper[-2] != ']' || upper[-3] != ']'));
347                                                 --depth;
348                                         }
349                                         else
350                                         {
351                                                 int quoting = 0;
352                                                 do
353                                                 {
354                                                         switch (*upper)
355                                                         {
356                                                         case '"':
357                                                                 if (!(quoting & 1))
358                                                                         quoting ^= 2;
359                                                                 break;
360                                                         case '\'': 
361                                                                 if (!(quoting & 2))
362                                                                         quoting ^= 1;
363                                                                 break;
364                                                         case '<':
365                                                         case '[':
366                                                                 if (!quoting)
367                                                                         ++depth;
368                                                                 break;
369                                                         case ']':
370                                                         case '>':
371                                                                 if (!quoting)
372                                                                         --depth;
373                                                                 break;
374                                                         }
375                                                 } while (++upper <= ahead && depth);
376                                         }
377                                 }
378                                 break;
379                         case '>':
380                                 if (upper[-2] == '/' || utags && FindTag(utags, first + 1))
381                                         --depth;
382                                 break;
383                         case '<':
384                                 ++depth;
385                                 break;
386                         }
387                 } while (upper <= ahead && depth);
388         }
389 }
390
391 CMarkdown &CMarkdown::Move()
392 {
393         Scan();
394         for (;;)
395         {
396                 while (*this && *upper != '<')
397                 {
398                         ++upper;
399                 }
400                 if (utags != nullptr && upper < ahead && *upper == '<')
401                 {
402                         size_t utlen = FindTag(utags, upper + 2);
403                         if (utlen != 0)
404                         {
405                                 upper += 2 + utlen;
406                                 continue;
407                         }
408                 }
409                 break;
410         }
411         first = lower = upper;
412         return *this;
413 }
414
415 CMarkdown &CMarkdown::Move(const char *name)
416 {
417         while (Move())
418         {
419                 const char *q = lower;
420                 const char *p = q + 1;
421                 unsigned char c;
422                 do
423                 {
424                         ++q;
425                 } while (q <= ahead && !isspace(c = *q) && c != '[' && c != '>' && c != '"' && c != '\'' && c != '=');
426                 size_t length = q - p;
427                 if (memcmp(p, name, length) == 0 && name[length] == '\0')
428                 {
429                         break;
430                 }
431         }
432         return *this;
433 }
434
435 bool CMarkdown::Pull()
436 {
437         if (lower < ahead && (*lower != '<' || ++lower < ahead))
438         {
439                 if (first[1] == '!')
440                 {
441                         if (first[2] != '[' && first[2] != '-')
442                         {
443                                 // neither CDATA nor comment: assume DTD tag
444                                 unsigned quoting = 0;
445                                 while (lower < ahead && (quoting || *lower != '[' && *lower != '>'))
446                                 {
447                                         switch (*lower)
448                                         {
449                                         case '"':
450                                                 if (!(quoting & 1))
451                                                         quoting ^= 2;
452                                                 break;
453                                         case '\'': 
454                                                 if (!(quoting & 2))
455                                                         quoting ^= 1;
456                                                 break;
457                                         }
458                                         ++lower;
459                                 }
460                                 if (*lower == '[')
461                                 {
462                                         upper = lower;
463                                         return true;
464                                 }
465                         }
466                         return false;
467                 }
468                 while (lower < ahead && *lower != '>')
469                 {
470                         ++lower;
471                 }
472                 if (lower[-1] != '/' && lower[-1] != '?' && !(utags && FindTag(utags, first + 1)))
473                 {
474                         upper = lower;
475                         return true;
476                 }
477         }
478         return false;
479 }
480
481 CMarkdown &CMarkdown::Pop()
482 {
483         if (!Pull())
484         {
485                 upper = ahead;
486         }
487         return *this;
488 }
489
490 bool CMarkdown::Push()
491 {
492         if (upper < ahead)
493         {
494                 switch MAKEWORD(upper[0], upper[1])
495                 {
496                 case MAKEWORD('<', '/'):
497                 case MAKEWORD(']', '>'):
498                         upper += 2;
499                         return true;
500                 }
501         }
502         return false;
503 }
504
505 std::string CMarkdown::GetTagName() const
506 {
507         const char *p = first;
508         const char *q = first;
509         if (q < ahead && (p = ++q) < ahead)
510         {
511                 if (*q == '!' && (*++q == '-' || *q == '['))
512                 {
513                         ++q;
514                 }
515                 else
516                 {
517                         unsigned char c;
518                         while (q < ahead && !isspace(c = *q) && c != '[' && c != '>' && c != '"' && c != '\'' && c != '=' && c != '/')
519                         {
520                                 ++q;
521                         }
522                 }
523         }
524         return std::string(p, q - p);
525 }
526
527 std::string CMarkdown::GetTagText() const
528 {
529         const char *p = first, *q = first;
530         if (q < ahead && (p = ++q) < ahead && (*q != '!' || ++q < ahead))
531         {
532                 if (*q == '-' || *q == '[')
533                 {
534                         ++q;
535                 }
536                 else
537                 {
538                         unsigned quoting = 0;
539                         while (q < ahead && (quoting || (*q != '[' && *q != '<' && *q != '>' && *q != '/')))
540                         {
541                                 switch (*q)
542                                 {
543                                 case '"':
544                                         if (!(quoting & 1))
545                                                 quoting ^= 2;
546                                         break;
547                                 case '\'': 
548                                         if (!(quoting & 2))
549                                                 quoting ^= 1;
550                                         break;
551                                 }
552                                 ++q;
553                         }
554                 }
555         }
556         return std::string(p, q - p);
557 }
558
559 std::string CMarkdown::GetInnerText()
560 {
561         Scan();
562         const char *p = first;
563         const char *q = upper;
564         char bracket = '>';
565         if (p < upper && ++p < upper && *p == '!' && ++p < upper)
566         {
567                 bracket = *p;
568                 if (bracket != '-')
569                 {
570                         bracket = '[';
571                 }
572         }
573         p = lower;
574         unsigned quoting = 0;
575         while (p < upper && (quoting || *p != bracket))
576         {
577                 switch (*p)
578                 {
579                 case '"':
580                         if (!(quoting & 1))
581                                 quoting ^= 2;
582                         break;
583                 case '\'': 
584                         if (!(quoting & 2))
585                                 quoting ^= 1;
586                         break;
587                 }
588                 ++p;
589         }
590         if (p < q && p < --q && p < --q)
591         {
592                 ++p;
593         }
594         return std::string(p, q - p);
595 }
596
597 std::string CMarkdown::GetOuterText()
598 {
599         Scan();
600         const char *q = upper;
601         if (q > first)
602         {
603                 while (q[-1] != '>' && q <= ahead)
604                 {
605                         ++q;
606                 }
607         }
608         return std::string(lower, q - first);
609 }
610
611 class CMarkdown::Token
612 {
613 public:
614         const char *lower;
615         const char *upper;
616         int IsSpecial(const char *, const char *);
617 };
618
619 int CMarkdown::Token::IsSpecial(const char *p, const char *ahead)
620 {
621         while (p <= ahead && isspace((unsigned char)*p))
622         {
623                 ++p;
624         }
625         lower = p;
626         int special = 1;
627         while (p <= ahead && !isspace((unsigned char)*p))
628         {
629                 switch (char c = *p)
630                 {
631                 case '"':
632                 case '\'':
633                         if (special && p < ahead)
634                         {
635                                 do
636                                 {
637                                         ++p;
638                                 } while (p < ahead && *p != c);
639                         }
640                         [[fallthrough]];
641                 case '/':
642                 case '=':
643                 case '<':
644                 case '>':
645                 case '[':
646                 case ']':
647                         upper = p + special;
648                         return special;
649                 }
650                 ++p;
651                 special = 0;
652         }
653         upper = p;
654         return special;
655 }
656
657 std::string CMarkdown::GetAttribute(const char *key, std::string *pv)
658 {
659         const char *name = 0;
660         size_t cname = 0;
661         const char *value = 0;
662         size_t cvalue = 0;
663         bool equals = false;
664         const char *p = lower;
665         Token token;
666         do
667         {
668                 if (token.IsSpecial(p, ahead))
669                 {
670                         switch (*token.lower)
671                         {
672                         case '=':
673                                 equals = true;
674                                 break;
675                         case '"':
676                         case '\'':
677                                 equals = false;
678                                 cvalue = token.upper - (value = token.lower); 
679                                 if (cvalue >= 2)
680                                 {
681                                         ++value;
682                                         cvalue -= 2;
683                                 }
684                                 break;
685                         case '[':
686                         case '>':
687                                 token.upper = token.lower;
688                                 break;
689                         }
690                 }
691                 else if (token.upper != token.lower)
692                 {
693                         if (equals)
694                         {
695                                 equals = false;
696                                 cvalue = token.upper - (value = token.lower);
697                         }
698                         else
699                         {
700                                 cname = token.upper - (name = token.lower);
701                         }
702                 }
703                 p = token.upper;
704                 if (name && value)
705                 {
706                         if (key == nullptr)
707                         {
708                                 lower = p;
709                                 *pv = std::string(value, cvalue);
710                                 return std::string(name, cname);
711                         }
712                         if (memcmp(name, key, cname) == 0 && key[cname] == '\0')
713                         {
714                                 return std::string(value, cvalue);
715                         }
716                         name = value = 0;
717                 }
718         } while (token.upper != token.lower);
719         if (key == nullptr)
720         {
721                 lower = p;
722                 return "";
723         }
724         return pv ? *pv : "";
725 }
726
727 int CMarkdown::FileImage::GuessByteOrder(unsigned dwBOM)
728 {
729         int nByteOrder = 0;
730         if (dwBOM)
731         {
732                 unsigned short wBOM = LOWORD(dwBOM);
733                 unsigned short wBOMhigh = HIWORD(dwBOM);
734                 nByteOrder = 2;
735                 if (wBOM == 0 || wBOMhigh == 0)
736                 {
737                         wBOM |= wBOMhigh;
738                         nByteOrder = 4;
739                 }
740                 if (wBOM == 0xFEFF || wBOM == 0xFFFE)
741                 {
742                         nByteOrder += 8 + static_cast<int>((char *)memchr(&dwBOM, 0xFF, 4) - (char *)&dwBOM);
743                 }
744                 else if (LOBYTE(wBOM) == 0 || HIBYTE(wBOM) == 0)
745                 {
746                         unsigned char cBOM = LOBYTE(wBOM) | HIBYTE(wBOM);
747                         nByteOrder += static_cast<int>((char *)memchr(&dwBOM, cBOM, 4) - (char *)&dwBOM);
748                 }
749                 else if ((dwBOM & 0xFFFFFF) == 0xBFBBEF)
750                 {
751                         nByteOrder = 8 + 1;
752                 }
753                 else
754                 {
755                         nByteOrder = 1;
756                 }
757         }
758         return nByteOrder;
759 }
760
761 CMarkdown::FileImage::FileImage(const tchar_t *path, size_t trunc, unsigned flags)
762 : pImage(nullptr), cbImage(0), nByteOrder(0), m_pSharedMemory(nullptr), pCopy(nullptr)
763 {
764         if (flags & Mapping)
765         {
766                 pImage = (void *)(path);
767                 cbImage = trunc;
768         }
769         else if (path != nullptr)
770         {
771                 try
772                 {
773                         TFile file(path);
774                         m_pSharedMemory = new SharedMemory(file, SharedMemory::AM_READ);
775                         pImage = m_pSharedMemory->begin();
776                         cbImage = m_pSharedMemory->end() - static_cast<char *>(pImage);
777                 }
778                 catch (...)
779                 {
780                 }
781         }
782         if (pImage == nullptr)
783         {
784                 cbImage = 0;
785         }
786         else if (cbImage >= 4 && (flags & Octets & (nByteOrder = GuessByteOrder(*(unsigned *)pImage))))
787         {
788                 switch (nByteOrder)
789                 {
790                 case 2 + 1:
791                 case 2 + 1 + 8:
792                         // big endian: swab first
793                         cbImage &= ~1UL;
794                         pCopy = new(std::nothrow) unsigned char[cbImage];
795                         if (pCopy != nullptr)
796                         {
797                                 for (size_t i = 0; i < cbImage / 2; ++i)
798                                         *((uint16_t *)pCopy + i) = Poco::ByteOrder::flipBytes(*((uint16_t *)pImage + i));
799                         }
800
801                         delete m_pSharedMemory;
802                         m_pSharedMemory = nullptr;
803                         pImage = pCopy;
804                         if (pImage != nullptr)
805                         {
806                                 [[fallthrough]];
807                         case 2 + 0:
808                         case 2 + 0 + 8:
809                                 // little endian
810                                 size_t cchImage = cbImage / 2;
811                                 uint16_t *pchImage = (uint16_t *)pImage;
812                                 if (nByteOrder & 8)
813                                 {
814                                         ++pchImage;
815                                         --cchImage;
816                                 }
817                                 cbImage = ucr::Utf8len_of_string(pchImage, cchImage);
818                                 pCopy = new(std::nothrow) unsigned char[cbImage];
819                                 if (pCopy != nullptr)
820                                 {
821                                         uint16_t *pu16;
822                                         unsigned char *pu8;
823                                         for (pu16 = (uint16_t *)pchImage, pu8 = (unsigned char *)pCopy; pu16 < pchImage + cchImage; ++pu16)
824                                                 pu8 += ucr::Ucs4_to_Utf8(*pu16, pu8);
825                                 }
826                                 delete m_pSharedMemory;
827                                 m_pSharedMemory = nullptr;
828                                 pImage = pCopy;
829                         }
830                         break;
831                 case 4 + 1:
832                 case 4 + 1 + 8:
833                 case 4 + 2:
834                 case 4 + 2 + 8:
835                         // odd word endianness: swab first
836                         cbImage &= ~3UL;
837                         pCopy = new(std::nothrow) unsigned char[cbImage];
838                         if (pCopy != nullptr)
839                         {
840                                 for (size_t i = 0; i < cbImage / 2; ++i)
841                                         *((uint16_t *)pCopy + i) = Poco::ByteOrder::flipBytes(*((uint16_t *)pImage + i));
842                         }
843                         delete m_pSharedMemory;
844                         m_pSharedMemory = nullptr;
845                         pImage = pCopy;
846                         if (pImage != nullptr)
847                         {
848                                 [[fallthrough]];
849                         case 4 + 0:
850                         case 4 + 0 + 8:
851                         case 4 + 3:
852                         case 4 + 3 + 8:
853                                 size_t cchImage = cbImage;
854                                 char *pchImage = (char *)pImage;
855                                 if (nByteOrder & 8)
856                                 {
857                                         pchImage += 4;
858                                         cchImage -= 4;
859                                 }
860                                 unsigned uch;
861                                 cbImage = 0;
862                                 for (size_t i = 0; i < cchImage; i += 4)
863                                 {
864                                         memcpy(&uch, pchImage + i, 4);
865                                         if (nByteOrder & 2)
866                                                 uch = ByteOrder::fromBigEndian(uch);
867                                         else
868                                                 uch = ByteOrder::fromLittleEndian(uch);
869                                         cbImage += ucr::Utf8len_fromCodepoint(uch);
870                                 }
871                                 void *pCopy2 = new(std::nothrow) unsigned char[cbImage];
872                                 if (pCopy2 != nullptr)
873                                 {
874                                         cbImage = 0;
875                                         for (size_t i = 0; i < cchImage; i += 4)
876                                         {
877                                                 memcpy(&uch, pchImage + i, 4);
878                                                 if (nByteOrder & 2)
879                                                         uch = ByteOrder::fromBigEndian(uch);
880                                                 else
881                                                         uch = ByteOrder::fromLittleEndian(uch);
882                                                 cbImage += ucr::Ucs4_to_Utf8(uch, (unsigned char *)pCopy2 + cbImage);
883                                         }
884                                 }
885                                 delete m_pSharedMemory;
886                                 m_pSharedMemory = nullptr;
887                                 pImage = pCopy2;
888                                 delete [] pCopy;
889                                 pCopy = pCopy2;
890                         }
891                         break;
892                 }
893         }
894 }
895
896 CMarkdown::FileImage::~FileImage()
897 {
898         delete m_pSharedMemory;
899         delete [] pCopy;
900 }