Src/markdown.cpp

   1 /* markdown.cpp: Pull-parse XML sources
   2  * Copyright (c) 2005 Jochen Tucht
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  17  *
  18  * OS/Libs:     Win32/STL/shlwapi/iconv
  19  *                      iconv.dll is loaded on demand, and is not required as long as
  20  *                      program doesn't call iconv based methods.
  21  *
  22  * Remarks:     Pull-parsing is a very simple way to parse XML. It does not require
  23  *                      callback functions, and it does not build object trees in memory. It
  24  *                      just travels through plain source.
  25  *
  26  *                      This library reads source text from memory. It can safely operate
  27  *                      on memory mapped files, as it does not require text to be zero-
  28  *                      terminated. It will also read most of the usual meta stuff (<? ?>,
  29  *                      <!-- -->, <![ []]>, and DTD tags), but applying meta information is
  30  *                      left to the caller. Thus, the library does not exactly implement an
  31  *                      XML parser. It just helps reading XML.
  32  *
  33  *                      This library is not modeled after an existing pull parsing API,
  34  *                      so don't expect to find the same methods you've seen elsewhere.
  35  *                      In particular, this library does not follow XmlPull's event model,
  36  *                      but attempts to be somewhat closer to a tree-based API.
  37  *                      For simplicity, this library does not perform any validation, nor
  38  *                      provide error handling other than returning empty text in case it
  39  *                      fails to retrieve something.
  40  *
  41  *                      The name of the core class, CMarkdown, actually was going to be
  42  *                      CMarkup when I came across another XML tool with same name on
  43  *                      CodeProject. Like TinyXml and XMLite, and unlike CMarkdown, CMarkup
  44  *                      follows DOM-like approach, suffering from considerable memory
  45  *                      footprint. Anyway, class name CMarkdown somewhat reflects the nature
  46  *                      of pull-parsing, pulling down the leaves of an XML tree so programs
  47  *                      can reach them from a flat loop, rather than climb up the tree and
  48  *                      push the leaves to some callback function, or preprocess the entire
  49  *                      tree in some way before allowing programs to retrieve content.
  50  *
  51  *                      Recommended reading:
  52  *
  53  *                      www.sosnoski.com/articles/parsing1.html (SAX2 Basics)
  54  *                      www.sosnoski.com/articles/parsing2.html (SAX vs Pull)
  55  *                      www.sosnoski.com/articles/parsing3.html (Performance)
  56  *                      www.xml.com/pub/a/2002/08/14/xmlpull.html (XMLPULL API)
  57  *                      www.xml.com/pub/a/2002/09/25/xmlpull.html (response to above)
  58  *                      www.stylusstudio.com/xmldev/200205/post61120.html (discussion)
  59  *
  60  *                      There are lots of related articles on the web, though.
  61
  62 Please mind 2. b) of the GNU LGPL terms, and log your changes below.
  63
  64 DATE:           BY:                                     DESCRIPTION:
  65 ==========      ==================      ================================================
  66 2005-01-15      Jochen Tucht            Created
  67 2005-02-26      Jochen Tucht            Load iconv.dll through DLLPSTUB
  68 2005-03-20      Jochen Tucht            Add IgnoreCase option for ASCII-7 tag/attr names.
  69                                                                 Add HtmlUTags option to check for (potentially)
  70                                                                 unbalanced HTML tags. Html option is combination
  71                                                                 of the above. Using these options imposes
  72                                                                 performance penalty, so avoid it if you can.
  73                                                                 New flag CMarkdown::FileImage::Handle makes
  74                                                                 CMarkdown::FileImage::FileImage() accept a
  75                                                                 handle rather than a filename.
  76 2005-06-22      Jochen Tucht            New method CMarkdown::_HSTR::Entities().
  77 2005-07-29      Jochen Tucht            ByteOrder detection for 16/32 bit encodings
  78 2005-09-09      Jochen Tucht            Patch by Takashi Sawanaka fixes crash due to
  79                                                                 reading beyond end of text with HtmlUTags option
  80 2005-12-04      Jochen Tucht            Fix UTF-8 signature detection
  81                                                                 Strip bogus trailing slash in name of empty tag
  82 2008-08-27      Jochen Neubeck          Replace MFC CMap by STL std::map
  83 */
  84
  85 #include "pch.h"
  86 #include "markdown.h"
  87 #include <cstring>
  88 #include <cstdint>
  89 #include <Poco/ByteOrder.h>
  90 #include <Poco/NumberParser.h>
  91 #include <Poco/SharedMemory.h>
  92 #include "unicoder.h"
  93 #include "TFile.h"
  94
  95 #define MAKEWORD(a, b)      ((unsigned short)(((unsigned char)((unsigned)(a) & 0xff)) | ((unsigned short)((unsigned char)((unsigned)(b) & 0xff))) << 8))
  96 #define MAKELONG(a, b)      ((unsigned)(((unsigned short)((unsigned)(a) & 0xffff)) | ((unsigned)((unsigned short)((unsigned)(b) & 0xffff))) << 16))
  97 #define LOWORD(l)           ((unsigned short)((unsigned)(l) & 0xffff))
  98 #define HIWORD(l)           ((unsigned short)((unsigned)(l) >> 16))
  99 #define LOBYTE(w)           ((unsigned char)((unsigned)(w) & 0xff))
 100 #define HIBYTE(w)           ((unsigned char)((unsigned)(w) >> 8))
 101
 102 using Poco::ByteOrder;
 103 using Poco::NumberParser;
 104 using Poco::SharedMemory;
 105 using Poco::File;
 106
 107 void CMarkdown::Load(EntityMap &entityMap)
 108 {
 109         entityMap["amp"] = "&";
 110         entityMap["quot"] = "\"";
 111         entityMap["apos"] = "'";
 112         entityMap["lt"] = "<";
 113         entityMap["gt"] = ">";
 114 }
 115
 116 void CMarkdown::Load(EntityMap &entityMap, int dummy)
 117 {
 118         while (Move("!ENTITY"))
 119         {
 120                 std::string hstrValue;
 121                 std::string hstrKey = GetAttribute(0, &hstrValue);
 122                 if (!hstrKey.empty())
 123                 {
 124                         entityMap[hstrKey] = hstrValue;
 125                 }
 126         }
 127 }
 128
 129 std::string CMarkdown::Resolve(const EntityMap &map, const std::string& v)
 130 {
 131         std::string ret(v);
 132         char *p, *q = &ret[0];
 133         while ((p = strchr(q, '&')) != nullptr && (q = strchr(p, ';')) != nullptr)
 134         {
 135                 *q = '\0';
 136                 char *key = p + 1;
 137                 std::string value;
 138                 if (*key == '#')
 139                 {
 140                         unsigned ordinal = '?';
 141                         *key = '0';
 142                         if (NumberParser::tryParseHex(key, ordinal))
 143                                 value.assign(1, static_cast<std::string::value_type>(ordinal));
 144                         *key = '#';
 145                 }
 146                 else
 147                 {
 148                         EntityMap::const_iterator p1 = map.find(key);
 149                         if (p1 != map.end())
 150                                 value = p1->second;
 151                 }
 152                 *q = ';';
 153                 ++q;
 154                 size_t cchValue = value.length();
 155                 if (cchValue != 0)
 156                 {
 157                         size_t i = p - &ret[0];
 158                         size_t j = q - &ret[0];
 159                         size_t cchKey = q - p;
 160                         if (cchValue != cchKey)
 161                         {
 162                                 size_t b = ret.length();
 163                                 size_t cbMove = (b - j) * sizeof(char);
 164                                 if (cchKey > cchValue)
 165                                 {
 166                                         size_t cchGrow = cchKey - cchValue;
 167                                         memmove(q - cchGrow, q, cbMove);
 168                                         ret.resize(b - cchGrow);
 169                                 }
 170                                 p = &ret[0] + i;
 171                                 q = &ret[0] + j;
 172                                 if (cchValue > cchKey)
 173                                 {
 174                                         size_t cchGrow = cchValue - cchKey;
 175                                         ret.resize(b + cchGrow);
 176                                         memmove(q + cchGrow, q, cbMove);
 177                                 }
 178                         }
 179                         memcpy(p, value.c_str(), cchValue * sizeof(char));
 180                         q = p + cchValue;
 181                 }
 182         }
 183         return ret;
 184 }
 185
 186 std::string CMarkdown::Entities(const std::string& v)
 187 {
 188         std::string ret(v);
 189         char *p, *q = &ret[0];
 190         while (*(p = q))
 191         {
 192                 char *value = nullptr;
 193                 switch (*p)
 194                 {
 195                 case '&': value = "&amp;"; break;
 196                 case '"': value = "&quot;"; break;
 197                 case '\'': value = "&apos;"; break;
 198                 case '<' : value = "&lt;"; break;
 199                 case '>' : value = "&gt;"; break;
 200                 }
 201                 ++q;
 202                 if (value != nullptr)
 203                 {
 204                         size_t cchValue = strlen(value);
 205                         if (cchValue > 1)
 206                         {
 207                                 ptrdiff_t i = p - &ret[0];
 208                                 ptrdiff_t j = q - &ret[0];
 209                                 size_t b = v.length();
 210                                 ret.resize(b + cchValue - 1);
 211                                 p = &ret[0] + i;
 212                                 q = &ret[0] + j;
 213                                 memmove(q + cchValue - 1, q, (b - j) * sizeof(char));
 214                         }
 215                         memcpy(p, value, cchValue * sizeof(char));
 216                         q = p + cchValue;
 217                 }
 218         }
 219         return ret;
 220 }
 221
 222 //This is a hopefully complete list of the 36 (?) (potentially) unbalanced HTML
 223 //tags. It is based on tags.c from Tidy library,
 224 //"http://cvs.sourceforge.net/viewcvs.py/*checkout*/tidy/tidy/src/tags.c?rev=1.55".
 225 //It should include all tags from tag_defs[] array which are flagged either
 226 //CM_EMPTY (no closing tag) or CM_OPT (optional closing tag).
 227
 228 static const char htmlUTags[] =
 229 (
 230         "area\0"
 231         "base\0"
 232         "basefont\0"
 233         "body\0"
 234         "br\0"
 235         "col\0"
 236         "colgroup\0"
 237         "dd\0"
 238         "dt\0"
 239         "frame\0"
 240         "head\0"
 241         "hr\0"
 242         "html\0"
 243         "img\0"
 244         "input\0"
 245         "isindex\0"
 246         "li\0"
 247         "link\0"
 248         "meta\0"
 249         "optgroup\0"
 250         "option\0"
 251         "p\0"
 252         "param\0"
 253         "tbody\0"
 254         "td\0"
 255         "tfoot\0"
 256         "th\0"
 257         "thead\0"
 258         "tr\0"
 259         "nextid\0"
 260         /* proprietary elements */
 261         "bgsound\0"     //MICROSOFT
 262         "embed\0"       //NETSCAPE
 263         "keygen\0"      //NETSCAPE
 264         "marquee\0"     //MICROSOFT
 265         "spacer\0"      //NETSCAPE
 266         "wbr\0"         //PROPRIETARY
 267 );
 268
 269 CMarkdown::CMarkdown(const char *upper, const char *ahead, unsigned flags):
 270 first(nullptr), lower(nullptr), upper(upper), ahead(ahead),
 271 memcmp(flags & IgnoreCase ? ::_memicmp : ::memcmp),
 272 utags(flags & HtmlUTags ? htmlUTags : nullptr)
 273 {
 274         if (CMarkdown::ahead > CMarkdown::upper)
 275         {
 276                 --CMarkdown::ahead;
 277         }
 278 }
 279
 280 CMarkdown::operator bool()
 281 {
 282         return upper < ahead &&
 283         (
 284                 MAKEWORD(upper[0], upper[1]) != MAKEWORD('<', '/')
 285         &&      MAKEWORD(upper[0], upper[1]) != MAKEWORD(']', '>')
 286         );
 287 }
 288
 289 size_t CMarkdown::FindTag(const char *tags, const char *markup) const
 290 {
 291         while (ptrdiff_t len = strlen(tags))
 292         {
 293                 unsigned char c;
 294                 if
 295                 (
 296                         (ahead - markup) > len
 297                 &&      memcmp(markup, tags, len) == 0
 298                 &&      (isspace(c = markup[len]) || c == '[' || c == '>' || c == '"' || c == '\'' || c == '=')
 299                 )
 300                 {
 301                         return len;
 302                 }
 303                 tags += len + 1;
 304         }
 305         return 0;
 306 }
 307
 308 void CMarkdown::Scan()
 309 {
 310         if (first == upper && *this)
 311         {
 312                 int depth = 0;
 313                 do
 314                 {
 315                         switch (*upper++)
 316                         {
 317                         case '/':
 318                                 if (upper[-2] == '<')
 319                                         depth -= 2;
 320                                 break;
 321                         case '?':
 322                                 if (upper[-2] == '<')
 323                                 {
 324                                         do
 325                                         {
 326                                         } while (upper <= ahead && (*upper++ != '>' || upper[-2] != '?'));
 327                                         --depth;
 328                                 }
 329                                 break;
 330                         case '!':
 331                                 if (upper[-2] == '<' && upper <= ahead)
 332                                 {
 333                                         if (*upper == '-')
 334                                         {
 335                                                 do
 336                                                 {
 337                                                 } while (upper <= ahead && (*upper++ != '>' || upper[-2] != '-' || upper[-3] != '-'));
 338                                                 --depth;
 339                                         }
 340                                         else if (*upper == '[')
 341                                         {
 342                                                 do
 343                                                 {
 344                                                 } while (upper <= ahead && (*upper++ != '>' || upper[-2] != ']' || upper[-3] != ']'));
 345                                                 --depth;
 346                                         }
 347                                         else
 348                                         {
 349                                                 int quoting = 0;
 350                                                 do
 351                                                 {
 352                                                         switch (*upper)
 353                                                         {
 354                                                         case '"':
 355                                                                 if (!(quoting & 1))
 356                                                                         quoting ^= 2;
 357                                                                 break;
 358                                                         case '\'':
 359                                                                 if (!(quoting & 2))
 360                                                                         quoting ^= 1;
 361                                                                 break;
 362                                                         case '<':
 363                                                         case '[':
 364                                                                 if (!quoting)
 365                                                                         ++depth;
 366                                                                 break;
 367                                                         case ']':
 368                                                         case '>':
 369                                                                 if (!quoting)
 370                                                                         --depth;
 371                                                                 break;
 372                                                         }
 373                                                 } while (++upper <= ahead && depth);
 374                                         }
 375                                 }
 376                                 break;
 377                         case '>':
 378                                 if (upper[-2] == '/' || utags && FindTag(utags, first + 1))
 379                                         --depth;
 380                                 break;
 381                         case '<':
 382                                 ++depth;
 383                                 break;
 384                         }
 385                 } while (upper <= ahead && depth);
 386         }
 387 }
 388
 389 CMarkdown &CMarkdown::Move()
 390 {
 391         Scan();
 392         for (;;)
 393         {
 394                 while (*this && *upper != '<')
 395                 {
 396                         ++upper;
 397                 }
 398                 if (utags != nullptr && upper < ahead && *upper == '<')
 399                 {
 400                         size_t utlen = FindTag(utags, upper + 2);
 401                         if (utlen != 0)
 402                         {
 403                                 upper += 2 + utlen;
 404                                 continue;
 405                         }
 406                 }
 407                 break;
 408         }
 409         first = lower = upper;
 410         return *this;
 411 }
 412
 413 CMarkdown &CMarkdown::Move(const char *name)
 414 {
 415         while (Move())
 416         {
 417                 const char *q = lower;
 418                 const char *p = q + 1;
 419                 unsigned char c;
 420                 do
 421                 {
 422                         ++q;
 423                 } while (q <= ahead && !isspace(c = *q) && c != '[' && c != '>' && c != '"' && c != '\'' && c != '=');
 424                 size_t length = q - p;
 425                 if (memcmp(p, name, length) == 0 && name[length] == '\0')
 426                 {
 427                         break;
 428                 }
 429         }
 430         return *this;
 431 }
 432
 433 bool CMarkdown::Pull()
 434 {
 435         if (lower < ahead && (*lower != '<' || ++lower < ahead))
 436         {
 437                 if (first[1] == '!')
 438                 {
 439                         if (first[2] != '[' && first[2] != '-')
 440                         {
 441                                 // neither CDATA nor comment: assume DTD tag
 442                                 unsigned quoting = 0;
 443                                 while (lower < ahead && (quoting || *lower != '[' && *lower != '>'))
 444                                 {
 445                                         switch (*lower)
 446                                         {
 447                                         case '"':
 448                                                 if (!(quoting & 1))
 449                                                         quoting ^= 2;
 450                                                 break;
 451                                         case '\'':
 452                                                 if (!(quoting & 2))
 453                                                         quoting ^= 1;
 454                                                 break;
 455                                         }
 456                                         ++lower;
 457                                 }
 458                                 if (*lower == '[')
 459                                 {
 460                                         upper = lower;
 461                                         return true;
 462                                 }
 463                         }
 464                         return false;
 465                 }
 466                 while (lower < ahead && *lower != '>')
 467                 {
 468                         ++lower;
 469                 }
 470                 if (lower[-1] != '/' && lower[-1] != '?' && !(utags && FindTag(utags, first + 1)))
 471                 {
 472                         upper = lower;
 473                         return true;
 474                 }
 475         }
 476         return false;
 477 }
 478
 479 CMarkdown &CMarkdown::Pop()
 480 {
 481         if (!Pull())
 482         {
 483                 upper = ahead;
 484         }
 485         return *this;
 486 }
 487
 488 bool CMarkdown::Push()
 489 {
 490         if (upper < ahead)
 491         {
 492                 switch MAKEWORD(upper[0], upper[1])
 493                 {
 494                 case MAKEWORD('<', '/'):
 495                 case MAKEWORD(']', '>'):
 496                         upper += 2;
 497                         return true;
 498                 }
 499         }
 500         return false;
 501 }
 502
 503 std::string CMarkdown::GetTagName() const
 504 {
 505         const char *p = first;
 506         const char *q = first;
 507         if (q < ahead && (p = ++q) < ahead)
 508         {
 509                 if (*q == '!' && (*++q == '-' || *q == '['))
 510                 {
 511                         ++q;
 512                 }
 513                 else
 514                 {
 515                         unsigned char c;
 516                         while (q < ahead && !isspace(c = *q) && c != '[' && c != '>' && c != '"' && c != '\'' && c != '=' && c != '/')
 517                         {
 518                                 ++q;
 519                         }
 520                 }
 521         }
 522         return std::string(p, q - p);
 523 }
 524
 525 std::string CMarkdown::GetTagText() const
 526 {
 527         const char *p = first, *q = first;
 528         if (q < ahead && (p = ++q) < ahead && (*q != '!' || ++q < ahead))
 529         {
 530                 if (*q == '-' || *q == '[')
 531                 {
 532                         ++q;
 533                 }
 534                 else
 535                 {
 536                         unsigned quoting = 0;
 537                         while (q < ahead && (quoting || (*q != '[' && *q != '<' && *q != '>' && *q != '/')))
 538                         {
 539                                 switch (*q)
 540                                 {
 541                                 case '"':
 542                                         if (!(quoting & 1))
 543                                                 quoting ^= 2;
 544                                         break;
 545                                 case '\'':
 546                                         if (!(quoting & 2))
 547                                                 quoting ^= 1;
 548                                         break;
 549                                 }
 550                                 ++q;
 551                         }
 552                 }
 553         }
 554         return std::string(p, q - p);
 555 }
 556
 557 std::string CMarkdown::GetInnerText()
 558 {
 559         Scan();
 560         const char *p = first;
 561         const char *q = upper;
 562         char bracket = '>';
 563         if (p < upper && ++p < upper && *p == '!' && ++p < upper)
 564         {
 565                 bracket = *p;
 566                 if (bracket != '-')
 567                 {
 568                         bracket = '[';
 569                 }
 570         }
 571         p = lower;
 572         unsigned quoting = 0;
 573         while (p < upper && (quoting || *p != bracket))
 574         {
 575                 switch (*p)
 576                 {
 577                 case '"':
 578                         if (!(quoting & 1))
 579                                 quoting ^= 2;
 580                         break;
 581                 case '\'':
 582                         if (!(quoting & 2))
 583                                 quoting ^= 1;
 584                         break;
 585                 }
 586                 ++p;
 587         }
 588         if (p < q && p < --q && p < --q)
 589         {
 590                 ++p;
 591         }
 592         return std::string(p, q - p);
 593 }
 594
 595 std::string CMarkdown::GetOuterText()
 596 {
 597         Scan();
 598         const char *q = upper;
 599         if (q > first)
 600         {
 601                 while (q[-1] != '>' && q <= ahead)
 602                 {
 603                         ++q;
 604                 }
 605         }
 606         return std::string(lower, q - first);
 607 }
 608
 609 class CMarkdown::Token
 610 {
 611 public:
 612         const char *lower;
 613         const char *upper;
 614         int IsSpecial(const char *, const char *);
 615 };
 616
 617 int CMarkdown::Token::IsSpecial(const char *p, const char *ahead)
 618 {
 619         while (p <= ahead && isspace((unsigned char)*p))
 620         {
 621                 ++p;
 622         }
 623         lower = p;
 624         int special = 1;
 625         while (p <= ahead && !isspace((unsigned char)*p))
 626         {
 627                 switch (char c = *p)
 628                 {
 629                 case '"':
 630                 case '\'':
 631                         if (special && p < ahead)
 632                         {
 633                                 do
 634                                 {
 635                                         ++p;
 636                                 } while (p < ahead && *p != c);
 637                         }
 638                         // fall through
 639                 case '/':
 640                 case '=':
 641                 case '<':
 642                 case '>':
 643                 case '[':
 644                 case ']':
 645                         upper = p + special;
 646                         return special;
 647                 }
 648                 ++p;
 649                 special = 0;
 650         }
 651         upper = p;
 652         return special;
 653 }
 654
 655 std::string CMarkdown::GetAttribute(const char *key, std::string *pv)
 656 {
 657         const char *name = 0;
 658         size_t cname = 0;
 659         const char *value = 0;
 660         size_t cvalue = 0;
 661         bool equals = false;
 662         const char *p = lower;
 663         Token token;
 664         do
 665         {
 666                 if (token.IsSpecial(p, ahead))
 667                 {
 668                         switch (*token.lower)
 669                         {
 670                         case '=':
 671                                 equals = true;
 672                                 break;
 673                         case '"':
 674                         case '\'':
 675                                 equals = false;
 676                                 cvalue = token.upper - (value = token.lower);
 677                                 if (cvalue >= 2)
 678                                 {
 679                                         ++value;
 680                                         cvalue -= 2;
 681                                 }
 682                                 break;
 683                         case '[':
 684                         case '>':
 685                                 token.upper = token.lower;
 686                                 break;
 687                         }
 688                 }
 689                 else if (token.upper != token.lower)
 690                 {
 691                         if (equals)
 692                         {
 693                                 equals = false;
 694                                 cvalue = token.upper - (value = token.lower);
 695                         }
 696                         else
 697                         {
 698                                 cname = token.upper - (name = token.lower);
 699                         }
 700                 }
 701                 p = token.upper;
 702                 if (name && value)
 703                 {
 704                         if (key == nullptr)
 705                         {
 706                                 lower = p;
 707                                 *pv = std::string(value, cvalue);
 708                                 return std::string(name, cname);
 709                         }
 710                         if (memcmp(name, key, cname) == 0 && key[cname] == '\0')
 711                         {
 712                                 return std::string(value, cvalue);
 713                         }
 714                         name = value = 0;
 715                 }
 716         } while (token.upper != token.lower);
 717         if (key == nullptr)
 718         {
 719                 lower = p;
 720                 return "";
 721         }
 722         return pv ? *pv : "";
 723 }
 724
 725 int CMarkdown::FileImage::GuessByteOrder(unsigned dwBOM)
 726 {
 727         int nByteOrder = 0;
 728         if (dwBOM)
 729         {
 730                 unsigned short wBOM = LOWORD(dwBOM);
 731                 unsigned short wBOMhigh = HIWORD(dwBOM);
 732                 nByteOrder = 2;
 733                 if (wBOM == 0 || wBOMhigh == 0)
 734                 {
 735                         wBOM |= wBOMhigh;
 736                         nByteOrder = 4;
 737                 }
 738                 if (wBOM == 0xFEFF || wBOM == 0xFFFE)
 739                 {
 740                         nByteOrder += 8 + static_cast<int>((char *)memchr(&dwBOM, 0xFF, 4) - (char *)&dwBOM);
 741                 }
 742                 else if (LOBYTE(wBOM) == 0 || HIBYTE(wBOM) == 0)
 743                 {
 744                         unsigned char cBOM = LOBYTE(wBOM) | HIBYTE(wBOM);
 745                         nByteOrder += static_cast<int>((char *)memchr(&dwBOM, cBOM, 4) - (char *)&dwBOM);
 746                 }
 747                 else if ((dwBOM & 0xFFFFFF) == 0xBFBBEF)
 748                 {
 749                         nByteOrder = 8 + 1;
 750                 }
 751                 else
 752                 {
 753                         nByteOrder = 1;
 754                 }
 755         }
 756         return nByteOrder;
 757 }
 758
 759 CMarkdown::FileImage::FileImage(const TCHAR *path, size_t trunc, unsigned flags)
 760 : pImage(nullptr), cbImage(0), nByteOrder(0), m_pSharedMemory(nullptr), pCopy(nullptr)
 761 {
 762         if (flags & Mapping)
 763         {
 764                 pImage = (void *)(path);
 765                 cbImage = trunc;
 766         }
 767         else
 768         {
 769                 try
 770                 {
 771                         TFile file(path);
 772                         m_pSharedMemory = new SharedMemory(file, SharedMemory::AM_READ);
 773                         pImage = m_pSharedMemory->begin();
 774                         cbImage = m_pSharedMemory->end() - m_pSharedMemory->begin();
 775                 }
 776                 catch (...)
 777                 {
 778                 }
 779         }
 780         if (pImage == nullptr)
 781         {
 782                 cbImage = 0;
 783         }
 784         else if (cbImage >= 4 && (flags & Octets & (nByteOrder = GuessByteOrder(*(unsigned *)pImage))))
 785         {
 786                 switch (nByteOrder)
 787                 {
 788                 case 2 + 1:
 789                 case 2 + 1 + 8:
 790                         // big endian: swab first
 791                         cbImage &= ~1UL;
 792                         pCopy = new unsigned char[cbImage];
 793                         if (pCopy != nullptr)
 794                         {
 795                                 for (size_t i = 0; i < cbImage / 2; ++i)
 796                                         *((uint16_t *)pCopy + i) = Poco::ByteOrder::flipBytes(*((uint16_t *)pImage + i));
 797                         }
 798
 799                         delete m_pSharedMemory;
 800                         pImage = pCopy;
 801                         if (pImage != nullptr)
 802                         {
 803                         case 2 + 0:
 804                         case 2 + 0 + 8:
 805                                 // little endian
 806                                 size_t cchImage = cbImage / 2;
 807                                 uint16_t *pchImage = (uint16_t *)pImage;
 808                                 if (nByteOrder & 8)
 809                                 {
 810                                         ++pchImage;
 811                                         --cchImage;
 812                                 }
 813                                 cbImage = ucr::Utf8len_of_string(pchImage, cchImage);
 814                                 pCopy = new unsigned char[cbImage];
 815                                 if (pCopy != nullptr)
 816                                 {
 817                                         uint16_t *pu16;
 818                                         unsigned char *pu8;
 819                                         for (pu16 = (uint16_t *)pchImage, pu8 = (unsigned char *)pCopy; pu16 < pchImage + cchImage; ++pu16)
 820                                                 pu8 += ucr::Ucs4_to_Utf8(*pu16, pu8);
 821                                 }
 822                                 delete m_pSharedMemory;
 823                                 m_pSharedMemory = nullptr;
 824                                 pImage = pCopy;
 825                         }
 826                         break;
 827                 case 4 + 1:
 828                 case 4 + 1 + 8:
 829                 case 4 + 2:
 830                 case 4 + 2 + 8:
 831                         // odd word endianness: swab first
 832                         cbImage &= ~3UL;
 833                         pCopy = new unsigned char[cbImage];
 834                         if (pCopy != nullptr)
 835                         {
 836                                 for (size_t i = 0; i < cbImage / 2; ++i)
 837                                         *((uint16_t *)pCopy + i) = Poco::ByteOrder::flipBytes(*((uint16_t *)pImage + i));
 838                         }
 839                         delete m_pSharedMemory;
 840                         m_pSharedMemory = nullptr;
 841                         pImage = pCopy;
 842                         if (pImage != nullptr)
 843                         {
 844                         case 4 + 0:
 845                         case 4 + 0 + 8:
 846                         case 4 + 3:
 847                         case 4 + 3 + 8:
 848                                 size_t cchImage = cbImage;
 849                                 char *pchImage = (char *)pImage;
 850                                 if (nByteOrder & 8)
 851                                 {
 852                                         pchImage += 4;
 853                                         cchImage -= 4;
 854                                 }
 855                                 unsigned uch;
 856                                 cbImage = 0;
 857                                 for (size_t i = 0; i < cchImage; i += 4)
 858                                 {
 859                                         memcpy(&uch, pchImage + i, 4);
 860                                         if (nByteOrder & 2)
 861                                                 uch = ByteOrder::fromBigEndian(uch);
 862                                         else
 863                                                 uch = ByteOrder::fromLittleEndian(uch);
 864                                         cbImage += ucr::Utf8len_fromCodepoint(uch);
 865                                 }
 866                                 void *pCopy2 = new unsigned char[cbImage];
 867                                 if (pCopy2 != nullptr)
 868                                 {
 869                                         cbImage = 0;
 870                                         for (size_t i = 0; i < cchImage; i += 4)
 871                                         {
 872                                                 memcpy(&uch, pchImage + i, 4);
 873                                                 if (nByteOrder & 2)
 874                                                         uch = ByteOrder::fromBigEndian(uch);
 875                                                 else
 876                                                         uch = ByteOrder::fromLittleEndian(uch);
 877                                                 cbImage += ucr::Ucs4_to_Utf8(uch, (unsigned char *)pCopy2 + cbImage);
 878                                         }
 879                                 }
 880                                 delete m_pSharedMemory;
 881                                 m_pSharedMemory = nullptr;
 882                                 pImage = pCopy2;
 883                                 delete [] pCopy;
 884                                 pCopy = pCopy2;
 885                         }
 886                         break;
 887                 }
 888         }
 889 }
 890
 891 CMarkdown::FileImage::~FileImage()
 892 {
 893         delete m_pSharedMemory;
 894         delete [] pCopy;
 895 }