2 // PukiWiki - Yet another WikiWikiWeb clone
3 // $Id: convert_html.php,v 1.21 2011/01/25 15:01:01 henoheno Exp $
5 // 2002-2005 PukiWiki Developers Team
6 // 2001-2002 Originally written by yu-ji
7 // License: GPL v2 or (at your option) any later version
9 // function 'convert_html()', wiki text parser
10 // and related classes-and-functions
12 function convert_html($lines)
14 global $vars, $digest;
15 static $contents_id = 0;
18 $digest = md5(join('', get_source($vars['page'])));
20 if (! is_array($lines)) $lines = explode("\n", $lines);
22 $body = new Body(++$contents_id);
25 return $body->toString();
32 var $elements; // References of childs
33 var $last; // Insert new one at the back of the $last
37 $this->elements = array();
38 $this->last = & $this;
41 function setParent(& $parent)
43 $this->parent = & $parent;
46 function & add(& $obj)
48 if ($this->canContain($obj)) {
49 return $this->insert($obj);
51 return $this->parent->add($obj);
55 function & insert(& $obj)
57 $obj->setParent($this);
58 $this->elements[] = & $obj;
60 return $this->last = & $obj->last;
63 function canContain($obj)
68 function wrap($string, $tag, $param = '', $canomit = TRUE)
70 return ($canomit && $string == '') ? '' :
71 '<' . $tag . $param . '>' . $string . '</' . $tag . '>';
77 foreach (array_keys($this->elements) as $key)
78 $ret[] = $this->elements[$key]->toString();
79 return join("\n", $ret);
82 function dump($indent = 0)
84 $ret = str_repeat(' ', $indent) . get_class($this) . "\n";
86 foreach (array_keys($this->elements) as $key) {
87 $ret .= is_object($this->elements[$key]) ?
88 $this->elements[$key]->dump($indent) : '';
89 //str_repeat(' ', $indent) . $this->elements[$key];
95 // Returns inline-related object
96 function & Factory_Inline($text)
98 // Check the first letter of the line
99 if (substr($text, 0, 1) == '~') {
100 return new Paragraph(' ' . substr($text, 1));
102 return new Inline($text);
106 function & Factory_DList(& $root, $text)
108 $out = explode('|', ltrim($text), 2);
109 if (count($out) < 2) {
110 return Factory_Inline($text);
112 return new DList($out);
116 // '|'-separated table
117 function & Factory_Table(& $root, $text)
119 if (! preg_match('/^\|(.+)\|([hHfFcC]?)$/', $text, $out)) {
120 return Factory_Inline($text);
122 return new Table($out);
126 // Comma-separated table
127 function & Factory_YTable(& $root, $text)
130 return Factory_Inline($text);
132 return new YTable(csv_explode(',', substr($text, 1)));
136 function & Factory_Div(& $root, $text)
140 // Seems block plugin?
141 if (PKWKEXP_DISABLE_MULTILINE_PLUGIN_HACK) {
143 if (preg_match('/^\#([^\(]+)(?:\((.*)\))?/', $text, $matches) &&
144 exist_plugin_convert($matches[1])) {
145 return new Div($matches);
149 if(preg_match('/^#([^\(\{]+)(?:\(([^\r]*)\))?(\{*)/', $text, $matches) &&
150 exist_plugin_convert($matches[1])) {
151 $len = strlen($matches[3]);
154 return new Div($matches); // Seems legacy block plugin
155 } else if (preg_match('/\{{' . $len . '}\s*\r(.*)\r\}{' . $len . '}/', $text, $body)) {
156 $matches[2] .= "\r" . $body[1] . "\r";
157 return new Div($matches); // Seems multiline-enabled block plugin
162 return new Paragraph($text);
166 class Inline extends Element
168 function Inline($text)
171 $this->elements[] = trim((substr($text, 0, 1) == "\n") ?
172 $text : make_link($text));
175 function & insert(& $obj)
177 $this->elements[] = $obj->elements[0];
181 function canContain($obj)
183 return is_a($obj, 'Inline');
189 return join(($line_break ? '<br />' . "\n" : "\n"), $this->elements);
192 function & toPara($class = '')
194 $obj = new Paragraph('', $class);
200 // Paragraph: blank-line-separated sentences
201 class Paragraph extends Element
205 function Paragraph($text, $param = '')
208 $this->param = $param;
209 if ($text == '') return;
211 if (substr($text, 0, 1) == '~')
212 $text = ' ' . substr($text, 1);
214 $this->insert(Factory_Inline($text));
217 function canContain($obj)
219 return is_a($obj, 'Inline');
224 return $this->wrap(parent::toString(), 'p', $this->param);
231 class Heading extends Element
237 function Heading(& $root, $text)
241 $this->level = min(3, strspn($text, '*'));
242 list($text, $this->msg_top, $this->id) = $root->getAnchor($text, $this->level);
243 $this->insert(Factory_Inline($text));
244 $this->level++; // h2,h3,h4
247 function & insert(& $obj)
249 parent::insert($obj);
250 return $this->last = & $this;
253 function canContain(& $obj)
260 return $this->msg_top . $this->wrap(parent::toString(),
261 'h' . $this->level, ' id="' . $this->id . '"');
267 class HRule extends Element
269 function HRule(& $root, $text)
274 function canContain(& $obj)
286 // Lists (UL, OL, DL)
287 class ListContainer extends Element
296 function ListContainer($tag, $tag2, $head, $text)
300 $var_margin = '_' . $tag . '_margin';
301 $var_left_margin = '_' . $tag . '_left_margin';
302 global $$var_margin, $$var_left_margin;
304 $this->margin = $$var_margin;
305 $this->left_margin = $$var_left_margin;
309 $this->level = min(3, strspn($text, $head));
310 $text = ltrim(substr($text, $this->level));
312 parent::insert(new ListElement($this->level, $tag2));
314 $this->last = & $this->last->insert(Factory_Inline($text));
317 function canContain(& $obj)
319 return (! is_a($obj, 'ListContainer')
320 || ($this->tag == $obj->tag && $this->level == $obj->level));
323 function setParent(& $parent)
325 global $_list_pad_str;
327 parent::setParent($parent);
329 $step = $this->level;
330 if (isset($parent->parent) && is_a($parent->parent, 'ListContainer'))
331 $step -= $parent->parent->level;
333 $margin = $this->margin * $step;
334 if ($step == $this->level)
335 $margin += $this->left_margin;
337 $this->style = sprintf($_list_pad_str, $this->level, $margin, $margin);
340 function & insert(& $obj)
342 if (! is_a($obj, get_class($this)))
343 return $this->last = & $this->last->insert($obj);
345 // Break if no elements found (BugTrack/524)
346 if (count($obj->elements) == 1 && empty($obj->elements[0]->elements))
347 return $this->last->parent; // up to ListElement
350 foreach(array_keys($obj->elements) as $key)
351 parent::insert($obj->elements[$key]);
358 return $this->wrap(parent::toString(), $this->tag, $this->style);
362 class ListElement extends Element
364 function ListElement($level, $head)
367 $this->level = $level;
371 function canContain(& $obj)
373 return (! is_a($obj, 'ListContainer') || ($obj->level > $this->level));
378 return $this->wrap(parent::toString(), $this->head);
385 class UList extends ListContainer
387 function UList(& $root, $text)
389 parent::ListContainer('ul', 'li', '-', $text);
396 class OList extends ListContainer
398 function OList(& $root, $text)
400 parent::ListContainer('ol', 'li', '+', $text);
404 // : definition1 | description1
405 // : definition2 | description2
406 // : definition3 | description3
407 class DList extends ListContainer
411 parent::ListContainer('dl', 'dt', ':', $out[0]);
412 $this->last = & Element::insert(new ListElement($this->level, 'dd'));
414 $this->last = & $this->last->insert(Factory_Inline($out[1]));
419 // > like E-mail text
420 class BQuote extends Element
424 function BQuote(& $root, $text)
428 $head = substr($text, 0, 1);
429 $this->level = min(3, strspn($text, $head));
430 $text = ltrim(substr($text, $this->level));
432 if ($head == '<') { // Blockquote close
433 $level = $this->level;
435 $this->last = & $this->end($root, $level);
437 $this->last = & $this->last->insert(Factory_Inline($text));
439 $this->insert(Factory_Inline($text));
443 function canContain(& $obj)
445 return (! is_a($obj, get_class($this)) || $obj->level >= $this->level);
448 function & insert(& $obj)
450 // BugTrack/521, BugTrack/545
451 if (is_a($obj, 'inline'))
452 return parent::insert($obj->toPara(' class="quotation"'));
454 if (is_a($obj, 'BQuote') && $obj->level == $this->level && count($obj->elements)) {
455 $obj = & $obj->elements[0];
456 if (is_a($this->last, 'Paragraph') && count($obj->elements))
457 $obj = & $obj->elements[0];
459 return parent::insert($obj);
464 return $this->wrap(parent::toString(), 'blockquote');
467 function & end(& $root, $level)
469 $parent = & $root->last;
471 while (is_object($parent)) {
472 if (is_a($parent, 'BQuote') && $parent->level == $level)
473 return $parent->parent;
474 $parent = & $parent->parent;
480 class TableCell extends Element
482 var $tag = 'td'; // {td|th}
485 var $style; // is array('width'=>, 'align'=>...);
487 function TableCell($text, $is_template = FALSE)
490 $this->style = $matches = array();
492 while (preg_match('/^(?:(LEFT|CENTER|RIGHT)|(BG)?COLOR\(([#\w]+)\)|SIZE\((\d+)\)):(.*)$/',
495 $this->style['align'] = 'text-align:' . strtolower($matches[1]) . ';';
497 } else if ($matches[3]) {
498 $name = $matches[2] ? 'background-color' : 'color';
499 $this->style[$name] = $name . ':' . htmlsc($matches[3]) . ';';
501 } else if ($matches[4]) {
502 $this->style['size'] = 'font-size:' . htmlsc($matches[4]) . 'px;';
506 if ($is_template && is_numeric($text))
507 $this->style['width'] = 'width:' . $text . 'px;';
511 } else if ($text == '~') {
513 } else if (substr($text, 0, 1) == '~') {
515 $text = substr($text, 1);
518 if ($text != '' && $text{0} == '#') {
519 // Try using Div class for this $text
520 $obj = & Factory_Div($this, $text);
521 if (is_a($obj, 'Paragraph'))
522 $obj = & $obj->elements[0];
524 $obj = & Factory_Inline($text);
530 function setStyle(& $style)
532 foreach ($style as $key=>$value)
533 if (! isset($this->style[$key]))
534 $this->style[$key] = $value;
539 if ($this->rowspan == 0 || $this->colspan == 0) return '';
541 $param = ' class="style_' . $this->tag . '"';
542 if ($this->rowspan > 1)
543 $param .= ' rowspan="' . $this->rowspan . '"';
544 if ($this->colspan > 1) {
545 $param .= ' colspan="' . $this->colspan . '"';
546 unset($this->style['width']);
548 if (! empty($this->style))
549 $param .= ' style="' . join(' ', $this->style) . '"';
551 return $this->wrap(parent::toString(), $this->tag, $param, FALSE);
555 // | title1 | title2 | title3 |
556 // | cell1 | cell2 | cell3 |
557 // | cell4 | cell5 | cell6 |
558 class Table extends Element
562 var $col; // number of column
568 $cells = explode('|', $out[1]);
569 $this->col = count($cells);
570 $this->type = strtolower($out[2]);
571 $this->types = array($this->type);
572 $is_template = ($this->type == 'c');
574 foreach ($cells as $cell)
575 $row[] = new TableCell($cell, $is_template);
576 $this->elements[] = $row;
579 function canContain(& $obj)
581 return is_a($obj, 'Table') && ($obj->col == $this->col);
584 function & insert(& $obj)
586 $this->elements[] = $obj->elements[0];
587 $this->types[] = $obj->type;
593 static $parts = array('h'=>'thead', 'f'=>'tfoot', ''=>'tbody');
595 // Set rowspan (from bottom, to top)
596 for ($ncol = 0; $ncol < $this->col; $ncol++) {
598 foreach (array_reverse(array_keys($this->elements)) as $nrow) {
599 $row = & $this->elements[$nrow];
600 if ($row[$ncol]->rowspan == 0) {
604 $row[$ncol]->rowspan = $rowspan;
607 $this->types[$nrow + $rowspan] = $this->types[$nrow];
612 // Set colspan and style
614 foreach (array_keys($this->elements) as $nrow) {
615 $row = & $this->elements[$nrow];
616 if ($this->types[$nrow] == 'c')
619 foreach (array_keys($row) as $ncol) {
620 if ($row[$ncol]->colspan == 0) {
624 $row[$ncol]->colspan = $colspan;
625 if ($stylerow !== NULL) {
626 $row[$ncol]->setStyle($stylerow[$ncol]->style);
627 // Inherits column style
629 $row[$ncol - $colspan]->setStyle($stylerow[$ncol]->style);
637 foreach ($parts as $type => $part)
640 foreach (array_keys($this->elements) as $nrow) {
641 if ($this->types[$nrow] != $type)
643 $row = & $this->elements[$nrow];
645 foreach (array_keys($row) as $ncol)
646 $row_string .= $row[$ncol]->toString();
647 $part_string .= $this->wrap($row_string, 'tr');
649 $string .= $this->wrap($part_string, $part);
651 $string = $this->wrap($string, 'table', ' class="style_table" cellspacing="1" border="0"');
653 return $this->wrap($string, 'div', ' class="ie5"');
657 // , cell1 , cell2 , cell3
658 // , cell4 , cell5 , cell6
659 // , cell7 , right,==
661 class YTable extends Element
663 var $col; // Number of columns
665 // TODO: Seems unable to show literal '==' without tricks.
666 // But it will be imcompatible.
667 // TODO: Why toString() or toXHTML() here
668 function YTable($row = array('cell1 ', ' cell2 ', ' cell3'))
675 $matches = $_value = $_align = array();
676 foreach($row as $cell) {
677 if (preg_match('/^(\s+)?(.+?)(\s+)?$/', $cell, $matches)) {
678 if ($matches[2] == '==') {
683 $_value[] = $matches[2];
684 if ($matches[1] == '') {
685 $_align[] = ''; // left
686 } else if (isset($matches[3])) {
687 $_align[] = 'center';
698 for ($i = 0; $i < $col; $i++) {
699 if ($_value[$i] === FALSE) continue;
701 while (isset($_value[$i + $colspan]) && $_value[$i + $colspan] === FALSE) ++$colspan;
702 $colspan = ($colspan > 1) ? ' colspan="' . $colspan . '"' : '';
703 $align = $_align[$i] ? ' style="text-align:' . $_align[$i] . '"' : '';
704 $str[] = '<td class="style_td"' . $align . $colspan . '>';
705 $str[] = make_link($_value[$i]);
707 unset($_value[$i], $_align[$i]);
711 $this->elements[] = implode('', $str);
714 function canContain(& $obj)
716 return is_a($obj, 'YTable') && ($obj->col == $this->col);
719 function & insert(& $obj)
721 $this->elements[] = $obj->elements[0];
728 foreach ($this->elements as $str) {
729 $rows .= "\n" . '<tr class="style_tr">' . $str . '</tr>' . "\n";
731 $rows = $this->wrap($rows, 'table', ' class="style_table" cellspacing="1" border="0"');
732 return $this->wrap($rows, 'div', ' class="ie5"');
736 // ' 'Space-beginning sentence
737 // ' 'Space-beginning sentence
738 // ' 'Space-beginning sentence
739 class Pre extends Element
741 function Pre(& $root, $text)
743 global $preformat_ltrim;
745 $this->elements[] = htmlsc(
746 (! $preformat_ltrim || $text == '' || $text{0} != ' ') ? $text : substr($text, 1));
749 function canContain(& $obj)
751 return is_a($obj, 'Pre');
754 function & insert(& $obj)
756 $this->elements[] = $obj->elements[0];
762 return $this->wrap(join("\n", $this->elements), 'pre');
766 // Block plugin: #something (started with '#')
767 class Div extends Element
775 list(, $this->name, $this->param) = array_pad($out, 3, '');
778 function canContain(& $obj)
786 return do_plugin_convert($this->name, $this->param);
790 // LEFT:/CENTER:/RIGHT:
791 class Align extends Element
795 function Align($align)
798 $this->align = $align;
801 function canContain(& $obj)
803 return is_a($obj, 'Inline');
808 return $this->wrap(parent::toString(), 'div', ' style="text-align:' . $this->align . '"');
813 class Body extends Element
819 var $classes = array(
824 var $factories = array(
833 $this->contents = new Element();
834 $this->contents_last = & $this->contents;
838 function parse(& $lines)
840 $this->last = & $this;
843 while (! empty($lines)) {
844 $line = array_shift($lines);
847 if (substr($line, 0, 2) == '//') continue;
849 if (preg_match('/^(LEFT|CENTER|RIGHT):(.*)$/', $line, $matches)) {
850 // <div style="text-align:...">
851 $this->last = & $this->last->add(new Align(strtolower($matches[1])));
852 if ($matches[2] == '') continue;
856 $line = rtrim($line, "\r\n");
860 $this->last = & $this;
865 if (substr($line, 0, 4) == '----') {
866 $this->insert(new HRule($this, $line));
870 // Multiline-enabled block plugin
871 if (! PKWKEXP_DISABLE_MULTILINE_PLUGIN_HACK &&
872 preg_match('/^#[^{]+(\{\{+)\s*$/', $line, $matches)) {
873 $len = strlen($matches[1]);
874 $line .= "\r"; // Delimiter
875 while (! empty($lines)) {
876 $next_line = preg_replace("/[\r\n]*$/", '', array_shift($lines));
877 if (preg_match('/\}{' . $len . '}/', $next_line)) {
881 $line .= $next_line .= "\r"; // Delimiter
886 // The first character
891 $this->insert(new Heading($this, $line));
896 if ($head == ' ' || $head == "\t") {
897 $this->last = & $this->last->add(new Pre($this, $line));
902 if (substr($line, -1) == '~')
903 $line = substr($line, 0, -1) . "\r";
906 if (isset($this->classes[$head])) {
907 $classname = $this->classes[$head];
908 $this->last = & $this->last->add(new $classname($this, $line));
913 if (isset($this->factories[$head])) {
914 $factoryname = 'Factory_' . $this->factories[$head];
915 $this->last = & $this->last->add($factoryname($this, $line));
920 $this->last = & $this->last->add(Factory_Inline($line));
924 function getAnchor($text, $level)
926 global $top, $_symbol_anchor;
928 // Heading id (auto-generated)
929 $autoid = 'content_' . $this->id . '_' . $this->count;
932 // Heading id (specified by users)
933 $id = make_heading($text, FALSE); // Cut fixed-anchor from $text
939 $anchor = ' &aname(' . $id . ',super,full){' . $_symbol_anchor . '};';
944 // Add 'page contents' link to its heading
945 $this->contents_last = & $this->contents_last->add(new Contents_UList($text, $level, $id));
948 return array($text . $anchor, $this->count > 1 ? "\n" . $top : '', $autoid);
951 function & insert(& $obj)
953 if (is_a($obj, 'Inline')) $obj = & $obj->toPara();
954 return parent::insert($obj);
961 $text = parent::toString();
964 $text = preg_replace_callback('/<#_contents_>/',
965 array(& $this, 'replace_contents'), $text);
970 function replace_contents($arr)
972 $contents = '<div class="contents">' . "\n" .
973 '<a id="contents_' . $this->id . '"></a>' . "\n" .
974 $this->contents->toString() . "\n" .
980 class Contents_UList extends ListContainer
982 function Contents_UList($text, $level, $id)
984 // Reformatting $text
985 // A line started with "\n" means "preformatted" ... X(
987 $text = "\n" . '<a href="#' . $id . '">' . $text . '</a>' . "\n";
988 parent::ListContainer('ul', 'li', '-', str_repeat('-', $level));
989 $this->insert(Factory_Inline($text));
992 function setParent(& $parent)
994 global $_list_pad_str;
996 parent::setParent($parent);
997 $step = $this->level;
998 $margin = $this->left_margin;
999 if (isset($parent->parent) && is_a($parent->parent, 'ListContainer')) {
1000 $step -= $parent->parent->level;
1003 $margin += $this->margin * ($step == $this->level ? 1 : $step);
1004 $this->style = sprintf($_list_pad_str, $this->level, $margin, $margin);