7 * htmlentities wrapper
\r
11 * @param string $string target string
\r
12 * @param string $quotation quotation mode. please refer to the argument of PHP built-in htmlentities
\r
13 * @return string escaped string
\r
15 static public function hen($string, $quotation=ENT_QUOTES)
\r
18 * we can use 'double_encode' flag instead of this when dropping supports for PHP 5.2.2 or lower
\r
20 $string = html_entity_decode($string, $quotation, i18n::get_current_charset());
\r
21 return (string) htmlentities($string, $quotation, i18n::get_current_charset());
\r
26 * htmlspecialchars wrapper
\r
28 * NOTE: htmlspecialchars_decode() is ASCII-to-ACII conversion
\r
29 * and its target string consists of several letters.
\r
30 * There are no problems.
\r
34 * @param string $string target string
\r
35 * @param string $quotation quotation mode. please refer to the argument of PHP built-in htmlspecialchars
\r
36 * @return string escaped string
\r
39 static public function hsc($string, $quotation=ENT_QUOTES)
\r
42 * we can use 'double_encode' flag instead of this when dropping supports for PHP 5.2.2 or lower
\r
44 $string = htmlspecialchars_decode($string, $quotation);
\r
45 return (string) htmlspecialchars($string, $quotation, i18n::get_current_charset());
\r
49 * Entity::strip_tags()
\r
50 * Strip HTML tags from a string
\r
52 * This function is a bit more intelligent than a regular call to strip_tags(),
\r
53 * because it also deletes the contents of certain tags and cleans up any
\r
54 * unneeded whitespace.
\r
57 * @param String $string target string
\r
58 * @return String string with stripped tags
\r
60 static public function strip_tags($string)
\r
62 $string = preg_replace("#<del[^>]*>.+<\/del[^>]*>#isU", '', $string);
\r
63 $string = preg_replace("#<script[^>]*>.+<\/script[^>]*>#isU", '', $string);
\r
64 $string = preg_replace("#<style[^>]*>.+<\/style[^>]*>#isU", '', $string);
\r
65 $string = preg_replace('#>#', '> ', $string);
\r
66 $string = preg_replace('#<#', ' <', $string);
\r
67 $string = strip_tags($string);
\r
68 $string = preg_replace("#\s+#", " ", $string);
\r
69 $string = trim($string);
\r
74 * shortens a text string to maxlength.
\r
75 * $suffix is what needs to be added at the end (end length is <= $maxlength)
\r
77 * The purpose is to limit the width of string for rendered screen in web browser.
\r
78 * So it depends on style sheet, browser's rendering scheme, client's system font.
\r
80 * NOTE: In general, non-Latin font such as Japanese, Chinese, Cyrillic have two times as width as Latin fonts,
\r
81 * but this is not always correct, for example, rendered by proportional font.
\r
84 * @param string $escaped_string target string
\r
85 * @param integer $maxlength maximum length of return string which includes suffix
\r
86 * @param string $suffix added in the end of shortened-string
\r
89 static public function shorten($string, $maxlength, $suffix)
\r
93 $decoded_entities_pcre = array();
\r
94 $encoded_entities = array();
\r
96 /* 1. store html entities */
\r
97 preg_match('#&[^&]+?;#', $string, $encoded_entities);
\r
98 if ( !$encoded_entities )
\r
108 foreach ( $encoded_entities as $encoded_entity )
\r
110 $decoded_entities_pcre[] = '#' . html_entity_decode($encoded_entity, ENT_QUOTES, i18n::get_current_charset()) . '#';
\r
114 /* 2. decode string */
\r
115 $string = html_entity_decode($string, ENT_QUOTES, i18n::get_current_charset());
\r
117 /* 3. shorten string and add suffix if string length is longer */
\r
118 if ( i18n::strlen($string) > $maxlength - i18n::strlen($suffix) )
\r
120 $string = i18n::substr($string, 0, $maxlength - i18n::strlen($suffix) );
\r
121 $string .= $suffix;
\r
124 /* 4. recover entities */
\r
127 $string = preg_replace($decoded_entities_pcre, $encoded_entities, $string);
\r
134 * Entity::highlight()
\r
135 * highlights a specific query in a given HTML text (not within HTML tags)
\r
138 * @param string $text text to be highlighted
\r
139 * @param string $expression regular expression to be matched (can be an array of expressions as well)
\r
140 * @param string $highlight highlight to be used (use \\0 to indicate the matched expression)
\r
143 static public function highlight($text, $expression, $highlight)
\r
145 if ( !$highlight || !$expression )
\r
150 if ( is_array($expression) && (count($expression) == 0) )
\r
155 $text = "<!--h-->{$text}";
\r
156 preg_match_all('#(<[^>]+>)([^<>]*)#', $text, $matches);
\r
158 $count = count($matches[2]);
\r
160 for ( $i = 0; $i < $count; $i++ )
\r
164 $result .= $matches[1][$i];
\r
167 if ( is_array($expression) )
\r
169 foreach ( $expression as $regex )
\r
171 $matches[2][$i] = preg_replace("#{$regex}#i", $highlight, $matches[2][$i]);
\r
173 $result .= $matches[2][$i];
\r
177 $result .= preg_replace("#{$expression}#i", $highlight, $matches[2][$i]);
\r
184 * Entity::anchor_footnoting()
\r
185 * change strings with footnoticing generated from anchor elements
\r
188 * @param String $string strings which includes html elements
\r
189 * @return String string with footnotes
\r
191 static public function anchor_footnoting($string)
\r
193 /* 1. detect anchor elements */
\r
194 $anchors = array();
\r
195 if ( !preg_match_all("#<a[^>]*href=[\"\']([^\"^']*)[\"\'][^>]*>([^<]*)<\/a>#i", $subject, $anchors) )
\r
200 /* 2. add footnotes */
\r
203 foreach ( $anchors as $anchor )
\r
205 preg_replace("#{$anchor[0]}#", "{$anchor[2]} [{$count}] ", $subject);
\r
206 $subject .= "[{$count}] {$anchor[1]}\n";
\r
210 return strip_tags($ascii);
\r
214 * NOTE: Obsoleted functions
\r
218 * Entity::named_to_numeric()
\r
221 * @param String $string
\r
223 function named_to_numeric ($string)
\r
225 $string = preg_replace('/(&[0-9A-Za-z]+)(;?\=?|([^A-Za-z0-9\;\:\.\-\_]))/e', "Entity::_named('\\1', '\\2') . '\\3'", $string);
\r
230 * Entity::named_to_numeric()
\r
233 * @param String $string
\r
235 function normalize_numeric ($string) {
\r
236 $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string);
\r
237 $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string);
\r
238 $string = strtr($string, self::$entities['Windows-1252']);
\r
243 * Entity::numeric_to_utf8()
\r
246 * @param String $string
\r
248 function numeric_to_utf8 ($string) {
\r
249 $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string);
\r
250 $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string);
\r
251 $string = preg_replace('/&#x([0-9A-Fa-f]+);/e', "Entity::_hex_to_utf8('\\1')", $string);
\r
256 * Entity::numeric_to_named()
\r
257 * convert decimal and hexadecimal numeric character references into named character references
\r
260 * @param String $string
\r
262 function numeric_to_named ($string)
\r
264 $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+)/e', "'&#'.hexdec('\\1')", $string);
\r
265 $string = strtr($string, array_flip(self::$entities['named_to_numeric']));
\r
270 * Entity::specialchars()
\r
271 * convert HTML entities to named character reference
\r
274 * @param String $string
\r
276 function specialchars ($string, $type = 'xml')
\r
278 $specialchars = array(
\r
284 if ( $type != 'xml' )
\r
286 $specialchars["'"] = ''';
\r
290 $specialchars["'"] = ''';
\r
293 $string = preg_replace('/&(#?[Xx]?[0-9A-Za-z]+);/', "[[[ENTITY:\\1]]]", $string);
\r
294 $string = strtr($string, $specialchars);
\r
295 $string = preg_replace('/\[\[\[ENTITY\:([^\]]+)\]\]\]/', "&\\1;", $string);
\r
300 * Entity::_hex_to_utf8()
\r
301 * convert decimal numeric character references to hexadecimal numeric character references
\r
304 * @param String $string
\r
306 function _hex_to_utf8($s)
\r
314 else if ( $c < 0x800 )
\r
316 $str = chr(0xC0 | $c>>6) . chr(0x80 | $c & 0x3F);
\r
318 else if ( $c < 0x10000 )
\r
320 $str = chr(0xE0 | $c>>12) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F);
\r
322 else if ( $c < 0x200000 )
\r
324 $str = chr(0xF0 | $c>>18) . chr(0x80 | $c>>12 & 0x3F) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F);
\r
331 * convert entities to named character reference
\r
334 * @param String $string
\r
335 * @param String $extra
\r
338 function _named($entity, $extra)
\r
340 if ( $extra == '=' )
\r
342 return $entity . '=';
\r
345 $length = i18n::strlen($entity);
\r
347 while ( $length > 0 )
\r
349 $check = i18n::substr($entity, 0, $length);
\r
350 if ( array_key_exists($check, self::$entities['named_to_numeric']) )
\r
352 return self::$entities['named_to_numeric'][$check] . ';' . i18n::substr($entity, $length);
\r
357 if ( $extra != ';' )
\r
363 return "{$entity};";
\r
368 * ENTITIY::$entities
\r
370 * HTML 4.01 Specification
\r
371 * @link http://www.w3.org/TR/html4/sgml/entities.html
\r
372 * @see 24 Character entity references in HTML 4
\r
374 * XHTML™ 1.0 The Extensible HyperText Markup Language (Second Edition)
\r
375 * A Reformulation of HTML 4 in XML 1.0
\r
376 * @link http://www.w3.org/TR/xhtml1/
\r
377 * @see 4.12. Entity references as hex values
\r
378 * @see C.16. The Named Character Reference '
\r
383 static private $entities = array (
\r
384 'named_to_numeric' => array (
\r
385 ' ' => ' ',
\r
386 '¡' => '¡',
\r
387 '¢' => '¢',
\r
388 '£' => '£',
\r
389 '¤' => '¤',
\r
390 '¥' => '¥',
\r
391 '¦' => '¦',
\r
392 '§' => '§',
\r
393 '¨' => '¨',
\r
394 '©' => '©',
\r
395 'ª' => 'ª',
\r
396 '«' => '«',
\r
397 '¬' => '¬',
\r
398 '­' => '­',
\r
399 '®' => '®',
\r
400 '¯' => '¯',
\r
401 '°' => '°',
\r
402 '±' => '±',
\r
403 '²' => '²',
\r
404 '³' => '³',
\r
405 '´' => '´',
\r
406 'µ' => 'µ',
\r
407 '¶' => '¶',
\r
408 '·' => '·',
\r
409 '¸' => '¸',
\r
410 '¹' => '¹',
\r
411 'º' => 'º',
\r
412 '»' => '»',
\r
413 '¼' => '¼',
\r
414 '½' => '½',
\r
415 '¾' => '¾',
\r
416 '¿' => '¿',
\r
417 'À' => 'À',
\r
418 'Á' => 'Á',
\r
419 'Â' => 'Â',
\r
420 'Ã' => 'Ã',
\r
421 'Ä' => 'Ä',
\r
422 'Å' => 'Å',
\r
423 'Æ' => 'Æ',
\r
424 'Ç' => 'Ç',
\r
425 'È' => 'È',
\r
426 'É' => 'É',
\r
427 'Ê' => 'Ê',
\r
428 'Ë' => 'Ë',
\r
429 'Ì' => 'Ì',
\r
430 'Í' => 'Í',
\r
431 'Î' => 'Î',
\r
432 'Ï' => 'Ï',
\r
433 'Ð' => 'Ð',
\r
434 'Ñ' => 'Ñ',
\r
435 'Ò' => 'Ò',
\r
436 'Ó' => 'Ó',
\r
437 'Ô' => 'Ô',
\r
438 'Õ' => 'Õ',
\r
439 'Ö' => 'Ö',
\r
440 '×' => '×',
\r
441 'Ø' => 'Ø',
\r
442 'Ù' => 'Ù',
\r
443 'Ú' => 'Ú',
\r
444 'Û' => 'Û',
\r
445 'Ü' => 'Ü',
\r
446 'Ý' => 'Ý',
\r
447 'Þ' => 'Þ',
\r
448 'ß' => 'ß',
\r
449 'à' => 'à',
\r
450 'á' => 'á',
\r
451 'â' => 'â',
\r
452 'ã' => 'ã',
\r
453 'ä' => 'ä',
\r
454 'å' => 'å',
\r
455 'æ' => 'æ',
\r
456 'ç' => 'ç',
\r
457 'è' => 'è',
\r
458 'é' => 'é',
\r
459 'ê' => 'ê',
\r
460 'ë' => 'ë',
\r
461 'ì' => 'ì',
\r
462 'í' => 'í',
\r
463 'î' => 'î',
\r
464 'ï' => 'ï',
\r
465 'ð' => 'ð',
\r
466 'ñ' => 'ñ',
\r
467 'ò' => 'ò',
\r
468 'ó' => 'ó',
\r
469 'ô' => 'ô',
\r
470 'õ' => 'õ',
\r
471 'ö' => 'ö',
\r
472 '÷' => '÷',
\r
473 'ø' => 'ø',
\r
474 'ù' => 'ù',
\r
475 'ú' => 'ú',
\r
476 'û' => 'û',
\r
477 'ü' => 'ü',
\r
478 'ý' => 'ý',
\r
479 'þ' => 'þ',
\r
480 'ÿ' => 'ÿ',
\r
481 '&OElig' => 'Œ',
\r
482 '&oelig' => 'å',
\r
483 '&Scaron' => 'Š',
\r
484 '&scaron' => 'š',
\r
485 '&Yuml' => 'Ÿ',
\r
486 '&circ' => 'ˆ',
\r
487 '&tilde' => '˜',
\r
488 '&esnp' => ' ',
\r
489 '&emsp' => ' ',
\r
490 '&thinsp' => ' ',
\r
491 '&zwnj' => '‌',
\r
492 '&zwj' => '‍',
\r
493 '&lrm' => '‎',
\r
494 '&rlm' => '‏',
\r
495 '&ndash' => '–',
\r
496 '&mdash' => '—',
\r
497 '&lsquo' => '‘',
\r
498 '&rsquo' => '’',
\r
499 '&sbquo' => '‚',
\r
500 '&ldquo' => '“',
\r
501 '&rdquo' => '”',
\r
502 '&bdquo' => '„',
\r
503 '&dagger' => '†',
\r
504 '&Dagger' => '‡',
\r
505 '&permil' => '‰',
\r
506 '&lsaquo' => '‹',
\r
507 '&rsaquo' => '›',
\r
508 '&euro' => '€',
\r
509 '&fnof' => 'ƒ',
\r
510 '&Alpha' => 'Α',
\r
511 '&Beta' => 'Β',
\r
512 '&Gamma' => 'Γ',
\r
513 '&Delta' => 'Δ',
\r
514 '&Epsilon' => 'Ε',
\r
515 '&Zeta' => 'Ζ',
\r
516 '&Eta' => 'Η',
\r
517 '&Theta' => 'Θ',
\r
518 '&Iota' => 'Ι',
\r
519 '&Kappa' => 'Κ',
\r
520 '&Lambda' => 'Λ',
\r
521 '&Mu' => 'Μ',
\r
522 '&Nu' => 'Ν',
\r
523 '&Xi' => 'Ξ',
\r
524 '&Omicron' => 'Ο',
\r
525 '&Pi' => 'Π',
\r
526 '&Rho' => 'Ρ',
\r
527 '&Sigma' => 'Σ',
\r
528 '&Tau' => 'Τ',
\r
529 '&Upsilon' => 'Υ',
\r
530 '&Phi' => 'Φ',
\r
531 '&Chi' => 'Χ',
\r
532 '&Psi' => 'Ψ',
\r
533 '&Omega' => 'Ω',
\r
534 '&alpha' => 'α',
\r
535 '&beta' => 'β',
\r
536 '&gamma' => 'γ',
\r
537 '&delta' => 'δ',
\r
538 '&epsilon' => 'ε',
\r
539 '&zeta' => 'ζ',
\r
540 '&eta' => 'η',
\r
541 '&theta' => 'θ',
\r
542 '&iota' => 'ι',
\r
543 '&kappa' => 'κ',
\r
544 '&lambda' => 'λ',
\r
545 '&mu' => 'μ',
\r
546 '&nu' => 'ν',
\r
547 '&xi' => 'ξ',
\r
548 '&omicron' => 'ο',
\r
549 '&pi' => 'π',
\r
550 '&rho' => 'ρ',
\r
551 '&sigmaf' => 'ς',
\r
552 '&sigma' => 'σ',
\r
553 '&tau' => 'τ',
\r
554 '&upsilon' => 'υ',
\r
555 '&phi' => 'φ',
\r
556 '&chi' => 'χ',
\r
557 '&psi' => 'ψ',
\r
558 '&omega' => 'ω',
\r
559 '&thetasym' => 'ϑ',
\r
560 '&upsih' => 'ϒ',
\r
561 '&piv' => 'ϖ',
\r
562 '&bull' => '•',
\r
563 '&hellip' => '…',
\r
564 '&prime' => '′',
\r
565 '&Prime' => '″',
\r
566 '&oline' => '‾',
\r
567 '&frasl' => '⁄',
\r
568 '&weierp' => '℘',
\r
569 '&image' => 'ℑ',
\r
570 '&real' => 'ℜ',
\r
571 '&trade' => 'ℒ',
\r
572 '&alefsym' => 'ℵ',
\r
573 '&larr' => '←',
\r
574 '&uarr' => '↑',
\r
575 '&rarr' => '→',
\r
576 '&darr' => '↓',
\r
577 '&harr' => '↔',
\r
578 '&crarr' => '↵',
\r
579 '&lArr' => '⇐',
\r
580 '&uArr' => '⇑',
\r
581 '&rArr' => '⇒',
\r
582 '&dArr' => '⇓',
\r
583 '&hArr' => '⇔',
\r
584 '&forall' => '∀',
\r
585 '&part' => '∂',
\r
586 '&exist' => '∃',
\r
587 '&empty' => '∅',
\r
588 '&nabla' => '∇',
\r
589 '&isin' => '∈',
\r
590 '¬in' => '∉',
\r
591 '&ni' => '∋',
\r
592 '&prod' => '∏',
\r
593 '&sum' => '∑',
\r
594 '&minus' => '−',
\r
595 '&lowast' => '∗',
\r
596 '&radic' => '√',
\r
597 '&prop' => '∝',
\r
598 '&infin' => '∞',
\r
599 '&ang' => '∠',
\r
600 '&and' => '∧',
\r
601 '&or' => '∨',
\r
602 '&cap' => '∩',
\r
603 '&cup' => '∪',
\r
604 '&int' => '∫',
\r
605 '&there4' => '∴',
\r
606 '&sim' => '∼',
\r
607 '&cong' => '≅',
\r
608 '&asymp' => '≈',
\r
609 '&ne' => '≠',
\r
610 '&equiv' => '≡',
\r
611 '&le' => '≤',
\r
612 '&ge' => '≥',
\r
613 '&sub' => '⊂',
\r
614 '&sup' => '⊃',
\r
615 '&nsub' => '⊄',
\r
616 '&sube' => '⊆',
\r
617 '&supe' => '⊇',
\r
618 '&oplus' => '⊕',
\r
619 '&otimes' => '⊖',
\r
620 '&perp' => '⊥',
\r
621 '&sdot' => '⋅',
\r
622 '&lceil' => '⍨',
\r
623 '&rceil' => '⌉',
\r
624 '&lfloor' => '⌊',
\r
625 '&rfloor' => '⌋',
\r
626 '&lang' => '〈',
\r
627 '&rang' => '⌰',
\r
628 '&loz' => '◊',
\r
629 '&spades' => '♠',
\r
630 '&clubs' => '♣',
\r
631 '&hearts' => '♥',
\r
632 '&diams' => '♦'
\r
634 'Windows-1252' => array(
\r
635 '€' => '€',
\r
636 '‚' => '‚',
\r
637 'ƒ' => 'ƒ',
\r
638 '„' => '„',
\r
639 '…' => '…',
\r
640 '†' => '†',
\r
641 '‡' => '‡',
\r
642 'ˆ' => 'ˆ',
\r
643 '‰' => '‰',
\r
644 'Š' => 'Š',
\r
645 '‹' => '‹',
\r
646 'Œ' => 'Œ',
\r
647 'Ž' => 'Ž',
\r
648 '‘' => '‘',
\r
649 '’' => '’',
\r
650 '“' => '“',
\r
651 '”' => '”',
\r
652 '•' => '•',
\r
653 '–' => '–',
\r
654 '—' => '—',
\r
655 '˜' => '˜',
\r
656 '™' => '™',
\r
657 'š' => 'š',
\r
658 '›' => '›',
\r
659 'œ' => 'œ',
\r
660 'ž' => 'ž',
\r
661 'Ÿ' => 'Ÿ',
\r