2 // PukiWiki - Yet another WikiWikiWeb clone
5 // 2002-2016 PukiWiki Development Team
6 // 2001-2002 Originally written by yu-ji
7 // License: GPL v2 or (at your option) any later version
9 // function 'convert_html()', wiki text parser
10 // and related classes-and-functions
12 function convert_html($lines)
14 global $vars, $digest;
15 static $contents_id = 0;
18 $digest = md5(join('', get_source($vars['page'])));
20 if (! is_array($lines)) $lines = explode("\n", $lines);
22 $body = new Body(++$contents_id);
25 return $body->toString();
32 var $elements; // References of childs
33 var $last; // Insert new one at the back of the $last
39 function __construct()
41 $this->elements = array();
42 $this->last = & $this;
45 function setParent(& $parent)
47 $this->parent = & $parent;
50 function & add(& $obj)
52 if ($this->canContain($obj)) {
53 return $this->insert($obj);
55 return $this->parent->add($obj);
59 function & insert(& $obj)
61 $obj->setParent($this);
62 $this->elements[] = & $obj;
64 return $this->last = & $obj->last;
67 function canContain($obj)
72 function wrap($string, $tag, $param = '', $canomit = TRUE)
74 return ($canomit && $string == '') ? '' :
75 '<' . $tag . $param . '>' . $string . '</' . $tag . '>';
81 foreach (array_keys($this->elements) as $key)
82 $ret[] = $this->elements[$key]->toString();
83 return join("\n", $ret);
86 function dump($indent = 0)
88 $ret = str_repeat(' ', $indent) . get_class($this) . "\n";
90 foreach (array_keys($this->elements) as $key) {
91 $ret .= is_object($this->elements[$key]) ?
92 $this->elements[$key]->dump($indent) : '';
93 //str_repeat(' ', $indent) . $this->elements[$key];
99 // Returns inline-related object
100 function & Factory_Inline($text)
102 // Check the first letter of the line
103 if (substr($text, 0, 1) == '~') {
104 return new Paragraph(' ' . substr($text, 1));
106 return new Inline($text);
110 function & Factory_DList(& $root, $text)
112 $out = explode('|', ltrim($text), 2);
113 if (count($out) < 2) {
114 return Factory_Inline($text);
116 return new DList($out);
120 // '|'-separated table
121 function & Factory_Table(& $root, $text)
123 if (! preg_match('/^\|(.+)\|([hHfFcC]?)$/', $text, $out)) {
124 return Factory_Inline($text);
126 return new Table($out);
130 // Comma-separated table
131 function & Factory_YTable(& $root, $text)
134 return Factory_Inline($text);
136 return new YTable(csv_explode(',', substr($text, 1)));
140 function & Factory_Div(& $root, $text)
144 // Seems block plugin?
145 if (PKWKEXP_DISABLE_MULTILINE_PLUGIN_HACK) {
147 if (preg_match('/^\#([^\(]+)(?:\((.*)\))?/', $text, $matches) &&
148 exist_plugin_convert($matches[1])) {
149 return new Div($matches);
153 if(preg_match('/^#([^\(\{]+)(?:\(([^\r]*)\))?(\{*)/', $text, $matches) &&
154 exist_plugin_convert($matches[1])) {
155 $len = strlen($matches[3]);
158 return new Div($matches); // Seems legacy block plugin
159 } else if (preg_match('/\{{' . $len . '}\s*\r(.*)\r\}{' . $len . '}/', $text, $body)) {
160 $matches[2] .= "\r" . $body[1] . "\r";
161 return new Div($matches); // Seems multiline-enabled block plugin
166 return new Paragraph($text);
170 class Inline extends Element
172 function Inline($text)
174 $this->__construct($text);
176 function __construct($text)
178 parent::__construct();
179 $this->elements[] = trim((substr($text, 0, 1) == "\n") ?
180 $text : make_link($text));
183 function & insert(& $obj)
185 $this->elements[] = $obj->elements[0];
189 function canContain($obj)
191 return is_a($obj, 'Inline');
197 return join(($line_break ? '<br />' . "\n" : "\n"), $this->elements);
200 function & toPara($class = '')
202 $obj = new Paragraph('', $class);
208 // Paragraph: blank-line-separated sentences
209 class Paragraph extends Element
213 function Paragraph($text, $param = '')
215 $this->__construct($text, $param);
217 function __construct($text, $param = '')
219 parent::__construct();
220 $this->param = $param;
221 if ($text == '') return;
223 if (substr($text, 0, 1) == '~')
224 $text = ' ' . substr($text, 1);
226 $this->insert(Factory_Inline($text));
229 function canContain($obj)
231 return is_a($obj, 'Inline');
236 return $this->wrap(parent::toString(), 'p', $this->param);
243 class Heading extends Element
249 function Heading(& $root, $text)
251 $this->__construct($root, $text);
253 function __construct(& $root, $text)
255 parent::__construct();
257 $this->level = min(3, strspn($text, '*'));
258 list($text, $this->msg_top, $this->id) = $root->getAnchor($text, $this->level);
259 $this->insert(Factory_Inline($text));
260 $this->level++; // h2,h3,h4
263 function & insert(& $obj)
265 parent::insert($obj);
266 return $this->last = & $this;
269 function canContain(& $obj)
276 return $this->msg_top . $this->wrap(parent::toString(),
277 'h' . $this->level, ' id="' . $this->id . '"');
283 class HRule extends Element
285 function HRule(& $root, $text)
287 $this->__construct($root, $text);
289 function __construct(& $root, $text)
291 parent::__construct();
294 function canContain(& $obj)
306 // Lists (UL, OL, DL)
307 class ListContainer extends Element
314 function ListContainer($tag, $tag2, $head, $text)
316 $this->__construct($tag, $tag2, $head, $text);
318 function __construct($tag, $tag2, $head, $text)
320 parent::__construct();
324 $this->level = min(3, strspn($text, $head));
325 $text = ltrim(substr($text, $this->level));
327 parent::insert(new ListElement($this->level, $tag2));
329 $this->last = & $this->last->insert(Factory_Inline($text));
332 function canContain(& $obj)
334 return (! is_a($obj, 'ListContainer')
335 || ($this->tag == $obj->tag && $this->level == $obj->level));
338 function setParent(& $parent)
340 parent::setParent($parent);
342 $step = $this->level;
343 if (isset($parent->parent) && is_a($parent->parent, 'ListContainer'))
344 $step -= $parent->parent->level;
346 $this->style = sprintf(pkwk_list_attrs_template(), $this->level, $step);
349 function & insert(& $obj)
351 if (! is_a($obj, get_class($this)))
352 return $this->last = & $this->last->insert($obj);
354 // Break if no elements found (BugTrack/524)
355 if (count($obj->elements) == 1 && empty($obj->elements[0]->elements))
356 return $this->last->parent; // up to ListElement
359 foreach(array_keys($obj->elements) as $key)
360 parent::insert($obj->elements[$key]);
367 return $this->wrap(parent::toString(), $this->tag, $this->style);
371 class ListElement extends Element
373 function ListElement($level, $head)
375 $this->__construct($level, $head);
377 function __construct($level, $head)
379 parent::__construct();
380 $this->level = $level;
384 function canContain(& $obj)
386 return (! is_a($obj, 'ListContainer') || ($obj->level > $this->level));
391 return $this->wrap(parent::toString(), $this->head);
398 class UList extends ListContainer
400 function UList(& $root, $text)
402 $this->__construct($root, $text);
404 function __construct(& $root, $text)
406 parent::__construct('ul', 'li', '-', $text);
413 class OList extends ListContainer
415 function OList(& $root, $text)
417 $this->__construct($root, $text);
419 function __construct(& $root, $text)
421 parent::__construct('ol', 'li', '+', $text);
425 // : definition1 | description1
426 // : definition2 | description2
427 // : definition3 | description3
428 class DList extends ListContainer
432 $this->__construct($out);
434 function __construct($out)
436 parent::__construct('dl', 'dt', ':', $out[0]);
437 $this->last = & Element::insert(new ListElement($this->level, 'dd'));
439 $this->last = & $this->last->insert(Factory_Inline($out[1]));
444 // > like E-mail text
445 class BQuote extends Element
449 function BQuote(& $root, $text)
451 $this->__construct($root, $text);
453 function __construct(& $root, $text)
455 parent::__construct();
457 $head = substr($text, 0, 1);
458 $this->level = min(3, strspn($text, $head));
459 $text = ltrim(substr($text, $this->level));
461 if ($head == '<') { // Blockquote close
462 $level = $this->level;
464 $this->last = & $this->end($root, $level);
466 $this->last = & $this->last->insert(Factory_Inline($text));
468 $this->insert(Factory_Inline($text));
472 function canContain(& $obj)
474 return (! is_a($obj, get_class($this)) || $obj->level >= $this->level);
477 function & insert(& $obj)
479 // BugTrack/521, BugTrack/545
480 if (is_a($obj, 'inline'))
481 return parent::insert($obj->toPara(' class="quotation"'));
483 if (is_a($obj, 'BQuote') && $obj->level == $this->level && count($obj->elements)) {
484 $obj = & $obj->elements[0];
485 if (is_a($this->last, 'Paragraph') && count($obj->elements))
486 $obj = & $obj->elements[0];
488 return parent::insert($obj);
493 return $this->wrap(parent::toString(), 'blockquote');
496 function & end(& $root, $level)
498 $parent = & $root->last;
500 while (is_object($parent)) {
501 if (is_a($parent, 'BQuote') && $parent->level == $level)
502 return $parent->parent;
503 $parent = & $parent->parent;
509 class TableCell extends Element
511 var $tag = 'td'; // {td|th}
514 var $style; // is array('width'=>, 'align'=>...);
516 function TableCell($text, $is_template = FALSE)
518 $this->__construct($text, $is_template);
520 function __construct($text, $is_template = FALSE)
522 parent::__construct();
523 $this->style = $matches = array();
525 while (preg_match('/^(?:(LEFT|CENTER|RIGHT)|(BG)?COLOR\(([#\w]+)\)|SIZE\((\d+)\)):(.*)$/',
528 $this->style['align'] = 'text-align:' . strtolower($matches[1]) . ';';
530 } else if ($matches[3]) {
531 $name = $matches[2] ? 'background-color' : 'color';
532 $this->style[$name] = $name . ':' . htmlsc($matches[3]) . ';';
534 } else if ($matches[4]) {
535 $this->style['size'] = 'font-size:' . htmlsc($matches[4]) . 'px;';
539 if ($is_template && is_numeric($text))
540 $this->style['width'] = 'width:' . $text . 'px;';
544 } else if ($text == '~') {
546 } else if (substr($text, 0, 1) == '~') {
548 $text = substr($text, 1);
551 if ($text != '' && $text{0} == '#') {
552 // Try using Div class for this $text
553 $obj = & Factory_Div($this, $text);
554 if (is_a($obj, 'Paragraph'))
555 $obj = & $obj->elements[0];
557 $obj = & Factory_Inline($text);
563 function setStyle(& $style)
565 foreach ($style as $key=>$value)
566 if (! isset($this->style[$key]))
567 $this->style[$key] = $value;
572 if ($this->rowspan == 0 || $this->colspan == 0) return '';
574 $param = ' class="style_' . $this->tag . '"';
575 if ($this->rowspan > 1)
576 $param .= ' rowspan="' . $this->rowspan . '"';
577 if ($this->colspan > 1) {
578 $param .= ' colspan="' . $this->colspan . '"';
579 unset($this->style['width']);
581 if (! empty($this->style))
582 $param .= ' style="' . join(' ', $this->style) . '"';
584 return $this->wrap(parent::toString(), $this->tag, $param, FALSE);
588 // | title1 | title2 | title3 |
589 // | cell1 | cell2 | cell3 |
590 // | cell4 | cell5 | cell6 |
591 class Table extends Element
595 var $col; // number of column
599 $this->__construct($out);
601 function __construct($out)
603 parent::__construct();
605 $cells = explode('|', $out[1]);
606 $this->col = count($cells);
607 $this->type = strtolower($out[2]);
608 $this->types = array($this->type);
609 $is_template = ($this->type == 'c');
611 foreach ($cells as $cell)
612 $row[] = new TableCell($cell, $is_template);
613 $this->elements[] = $row;
616 function canContain(& $obj)
618 return is_a($obj, 'Table') && ($obj->col == $this->col);
621 function & insert(& $obj)
623 $this->elements[] = $obj->elements[0];
624 $this->types[] = $obj->type;
630 static $parts = array('h'=>'thead', 'f'=>'tfoot', ''=>'tbody');
632 // Set rowspan (from bottom, to top)
633 for ($ncol = 0; $ncol < $this->col; $ncol++) {
635 foreach (array_reverse(array_keys($this->elements)) as $nrow) {
636 $row = & $this->elements[$nrow];
637 if ($row[$ncol]->rowspan == 0) {
641 $row[$ncol]->rowspan = $rowspan;
644 $this->types[$nrow + $rowspan] = $this->types[$nrow];
649 // Set colspan and style
651 foreach (array_keys($this->elements) as $nrow) {
652 $row = & $this->elements[$nrow];
653 if ($this->types[$nrow] == 'c')
656 foreach (array_keys($row) as $ncol) {
657 if ($row[$ncol]->colspan == 0) {
661 $row[$ncol]->colspan = $colspan;
662 if ($stylerow !== NULL) {
663 $row[$ncol]->setStyle($stylerow[$ncol]->style);
664 // Inherits column style
666 $row[$ncol - $colspan]->setStyle($stylerow[$ncol]->style);
674 foreach ($parts as $type => $part)
677 foreach (array_keys($this->elements) as $nrow) {
678 if ($this->types[$nrow] != $type)
680 $row = & $this->elements[$nrow];
682 foreach (array_keys($row) as $ncol)
683 $row_string .= $row[$ncol]->toString();
684 $part_string .= $this->wrap($row_string, 'tr') . "\n";
686 $string .= $this->wrap($part_string, $part);
688 $string = $this->wrap($string, 'table', ' class="style_table" cellspacing="1" border="0"');
690 return $this->wrap($string, 'div', ' class="ie5"');
694 // , cell1 , cell2 , cell3
695 // , cell4 , cell5 , cell6
696 // , cell7 , right,==
698 class YTable extends Element
700 var $col; // Number of columns
702 function YTable($row = array('cell1 ', ' cell2 ', ' cell3'))
704 $this->__construct($row);
706 // TODO: Seems unable to show literal '==' without tricks.
707 // But it will be imcompatible.
708 // TODO: Why toString() or toXHTML() here
709 function __construct($row = array('cell1 ', ' cell2 ', ' cell3'))
711 parent::__construct();
716 $matches = $_value = $_align = array();
717 foreach($row as $cell) {
718 if (preg_match('/^(\s+)?(.+?)(\s+)?$/', $cell, $matches)) {
719 if ($matches[2] == '==') {
724 $_value[] = $matches[2];
725 if ($matches[1] == '') {
726 $_align[] = ''; // left
727 } else if (isset($matches[3])) {
728 $_align[] = 'center';
739 for ($i = 0; $i < $col; $i++) {
740 if ($_value[$i] === FALSE) continue;
742 while (isset($_value[$i + $colspan]) && $_value[$i + $colspan] === FALSE) ++$colspan;
743 $colspan = ($colspan > 1) ? ' colspan="' . $colspan . '"' : '';
744 $align = $_align[$i] ? ' style="text-align:' . $_align[$i] . '"' : '';
745 $str[] = '<td class="style_td"' . $align . $colspan . '>';
746 $str[] = make_link($_value[$i]);
748 unset($_value[$i], $_align[$i]);
752 $this->elements[] = implode('', $str);
755 function canContain(& $obj)
757 return is_a($obj, 'YTable') && ($obj->col == $this->col);
760 function & insert(& $obj)
762 $this->elements[] = $obj->elements[0];
769 foreach ($this->elements as $str) {
770 $rows .= "\n" . '<tr class="style_tr">' . $str . '</tr>' . "\n";
772 $rows = $this->wrap($rows, 'table', ' class="style_table" cellspacing="1" border="0"');
773 return $this->wrap($rows, 'div', ' class="ie5"');
777 // ' 'Space-beginning sentence
778 // ' 'Space-beginning sentence
779 // ' 'Space-beginning sentence
780 class Pre extends Element
782 function Pre(& $root, $text)
784 $this->__construct($root, $text);
786 function __construct(& $root, $text)
788 global $preformat_ltrim;
789 parent::__construct();
790 $this->elements[] = htmlsc(
791 (! $preformat_ltrim || $text == '' || $text{0} != ' ') ? $text : substr($text, 1));
794 function canContain(& $obj)
796 return is_a($obj, 'Pre');
799 function & insert(& $obj)
801 $this->elements[] = $obj->elements[0];
807 return $this->wrap(join("\n", $this->elements), 'pre');
811 // Block plugin: #something (started with '#')
812 class Div extends Element
819 $this->__construct($out);
821 function __construct($out)
823 parent::__construct();
824 list(, $this->name, $this->param) = array_pad($out, 3, '');
827 function canContain(& $obj)
835 return do_plugin_convert($this->name, $this->param);
839 // LEFT:/CENTER:/RIGHT:
840 class Align extends Element
844 function Align($align)
846 $this->__construct($align);
848 function __construct($align)
850 parent::__construct();
851 $this->align = $align;
854 function canContain(& $obj)
856 return is_a($obj, 'Inline');
861 return $this->wrap(parent::toString(), 'div', ' style="text-align:' . $this->align . '"');
866 class Body extends Element
872 var $classes = array(
877 var $factories = array(
885 $this->__construct($id);
887 function __construct($id)
890 $this->contents = new Element();
891 $this->contents_last = & $this->contents;
892 parent::__construct();
895 function parse(& $lines)
897 $this->last = & $this;
900 while (! empty($lines)) {
901 $line = array_shift($lines);
904 if (substr($line, 0, 2) == '//') continue;
906 if (preg_match('/^(LEFT|CENTER|RIGHT):(.*)$/', $line, $matches)) {
907 // <div style="text-align:...">
908 $this->last = & $this->last->add(new Align(strtolower($matches[1])));
909 if ($matches[2] == '') continue;
913 $line = rtrim($line, "\r\n");
917 $this->last = & $this;
922 if (substr($line, 0, 4) == '----') {
923 $this->insert(new HRule($this, $line));
927 // Multiline-enabled block plugin
928 if (! PKWKEXP_DISABLE_MULTILINE_PLUGIN_HACK &&
929 preg_match('/^#[^{]+(\{\{+)\s*$/', $line, $matches)) {
930 $len = strlen($matches[1]);
931 $line .= "\r"; // Delimiter
932 while (! empty($lines)) {
933 $next_line = preg_replace("/[\r\n]*$/", '', array_shift($lines));
934 if (preg_match('/\}{' . $len . '}/', $next_line)) {
938 $line .= $next_line .= "\r"; // Delimiter
943 // The first character
948 $this->insert(new Heading($this, $line));
953 if ($head == ' ' || $head == "\t") {
954 $this->last = & $this->last->add(new Pre($this, $line));
959 if (substr($line, -1) == '~')
960 $line = substr($line, 0, -1) . "\r";
963 if (isset($this->classes[$head])) {
964 $classname = $this->classes[$head];
965 $this->last = & $this->last->add(new $classname($this, $line));
970 if (isset($this->factories[$head])) {
971 $factoryname = 'Factory_' . $this->factories[$head];
972 $this->last = & $this->last->add($factoryname($this, $line));
977 $this->last = & $this->last->add(Factory_Inline($line));
981 function getAnchor($text, $level)
983 global $top, $_symbol_anchor;
985 // Heading id (auto-generated)
986 $autoid = 'content_' . $this->id . '_' . $this->count;
989 // Heading id (specified by users)
990 $id = make_heading($text, FALSE); // Cut fixed-anchor from $text
996 $anchor = ' &aname(' . $id . ',super,full,nouserselect){' . $_symbol_anchor . '};';
1001 // Add 'page contents' link to its heading
1002 $this->contents_last = & $this->contents_last->add(new Contents_UList($text, $level, $id));
1005 return array($text . $anchor, $this->count > 1 ? "\n" . $top : '', $autoid);
1008 function & insert(& $obj)
1010 if (is_a($obj, 'Inline')) $obj = & $obj->toPara();
1011 return parent::insert($obj);
1018 $text = parent::toString();
1021 $text = preg_replace_callback('/<#_contents_>/',
1022 array(& $this, 'replace_contents'), $text);
1024 return $text . "\n";
1027 function replace_contents($arr)
1029 $contents = '<div class="contents">' . "\n" .
1030 '<a id="contents_' . $this->id . '"></a>' . "\n" .
1031 $this->contents->toString() . "\n" .
1037 class Contents_UList extends ListContainer
1039 function Contents_UList($text, $level, $id)
1041 $this->__construct($text, $level, $id);
1043 function __construct($text, $level, $id)
1045 // Reformatting $text
1046 // A line started with "\n" means "preformatted" ... X(
1047 make_heading($text);
1048 $text = "\n" . '<a href="#' . $id . '">' . $text . '</a>' . "\n";
1049 parent::__construct('ul', 'li', '-', str_repeat('-', $level));
1050 $this->insert(Factory_Inline($text));
1053 function setParent(& $parent)
1055 parent::setParent($parent);
1056 $step = $this->level;
1057 if (isset($parent->parent) && is_a($parent->parent, 'ListContainer')) {
1058 $step -= $parent->parent->level;
1060 $indent_level = ($step == $this->level ? 1 : $step);
1061 $this->style = sprintf(pkwk_list_attrs_template(), $this->level, $indent_level);