-<?php\r
-\r
-class Entity\r
-{\r
- /**\r
- * Entity::hen\r
- * htmlentities wrapper\r
- * \r
- * @static\r
- * @access public\r
- * @param string $string target string\r
- * @param string $quotation quotation mode. please refer to the argument of PHP built-in htmlentities\r
- * @return string escaped string\r
- */\r
- static public function hen($string, $quotation=ENT_QUOTES)\r
- {\r
- /*\r
- * we can use 'double_encode' flag instead of this when dropping supports for PHP 5.2.2 or lower\r
- */\r
- $string = html_entity_decode($string, $quotation, i18n::get_current_charset());\r
- return (string) htmlentities($string, $quotation, i18n::get_current_charset());\r
- }\r
- \r
- /**\r
- * Entity::hsc\r
- * htmlspecialchars wrapper\r
- * \r
- * NOTE: htmlspecialchars_decode() is ASCII-to-ACII conversion\r
- * and its target string consists of several letters.\r
- * There are no problems.\r
- * \r
- * @static\r
- * @access public\r
- * @param string $string target string\r
- * @param string $quotation quotation mode. please refer to the argument of PHP built-in htmlspecialchars\r
- * @return string escaped string\r
- * \r
- */\r
- static public function hsc($string, $quotation=ENT_QUOTES)\r
- {\r
- /*\r
- * we can use 'double_encode' flag instead of this when dropping supports for PHP 5.2.2 or lower\r
- */\r
- $string = htmlspecialchars_decode($string, $quotation);\r
- return (string) htmlspecialchars($string, $quotation, i18n::get_current_charset());\r
- }\r
- \r
- /**\r
- * Entity::strip_tags()\r
- * Strip HTML tags from a string\r
- * \r
- * This function is a bit more intelligent than a regular call to strip_tags(),\r
- * because it also deletes the contents of certain tags and cleans up any\r
- * unneeded whitespace.\r
- * \r
- * @static\r
- * @param String $string target string\r
- * @return String string with stripped tags\r
- */\r
- static public function strip_tags($string)\r
- {\r
- $string = preg_replace("#<del[^>]*>.+<\/del[^>]*>#isU", '', $string);\r
- $string = preg_replace("#<script[^>]*>.+<\/script[^>]*>#isU", '', $string);\r
- $string = preg_replace("#<style[^>]*>.+<\/style[^>]*>#isU", '', $string);\r
- $string = preg_replace('#>#', '> ', $string);\r
- $string = preg_replace('#<#', ' <', $string);\r
- $string = strip_tags($string);\r
- $string = preg_replace("#\s+#", " ", $string);\r
- $string = trim($string);\r
- return $string;\r
- }\r
- \r
- /**\r
- * shortens a text string to maxlength.\r
- * $suffix is what needs to be added at the end (end length is <= $maxlength)\r
- *\r
- * The purpose is to limit the width of string for rendered screen in web browser.\r
- * So it depends on style sheet, browser's rendering scheme, client's system font.\r
- *\r
- * NOTE: In general, non-Latin font such as Japanese, Chinese, Cyrillic have two times as width as Latin fonts,\r
- * but this is not always correct, for example, rendered by proportional font.\r
- *\r
- * @static\r
- * @param string $escaped_string target string\r
- * @param integer $maxlength maximum length of return string which includes suffix\r
- * @param string $suffix added in the end of shortened-string\r
- * @return string\r
- */\r
- static public function shorten($string, $maxlength, $suffix)\r
- {\r
- static $flag;\r
- \r
- $decoded_entities_pcre = array();\r
- $encoded_entities = array();\r
- \r
- /* 1. store html entities */\r
- preg_match('#&[^&]+?;#', $string, $encoded_entities);\r
- if ( !$encoded_entities )\r
- {\r
- $flag = FALSE;\r
- }\r
- else\r
- {\r
- $flag = TRUE;\r
- }\r
- if ( $flag )\r
- {\r
- foreach ( $encoded_entities as $encoded_entity )\r
- {\r
- $decoded_entities_pcre[] = '#' . html_entity_decode($encoded_entity, ENT_QUOTES, i18n::get_current_charset()) . '#';\r
- }\r
- }\r
- \r
- /* 2. decode string */\r
- $string = html_entity_decode($string, ENT_QUOTES, i18n::get_current_charset());\r
- \r
- /* 3. shorten string and add suffix if string length is longer */\r
- if ( i18n::strlen($string) > $maxlength - i18n::strlen($suffix) )\r
- {\r
- $string = i18n::substr($string, 0, $maxlength - i18n::strlen($suffix) );\r
- $string .= $suffix;\r
- }\r
- \r
- /* 4. recover entities */\r
- if ( $flag )\r
- {\r
- $string = preg_replace($decoded_entities_pcre, $encoded_entities, $string);\r
- }\r
- \r
- return $string;\r
- }\r
- \r
- /**\r
- * Entity::highlight()\r
- * highlights a specific query in a given HTML text (not within HTML tags)\r
- * \r
- * @static\r
- * @param string $text text to be highlighted\r
- * @param string $expression regular expression to be matched (can be an array of expressions as well)\r
- * @param string $highlight highlight to be used (use \\0 to indicate the matched expression)\r
- * @return string\r
- */\r
- static public function highlight($text, $expression, $highlight)\r
- {\r
- if ( !$highlight || !$expression )\r
- {\r
- return $text;\r
- }\r
- \r
- if ( is_array($expression) && (count($expression) == 0) )\r
- {\r
- return $text;\r
- }\r
- \r
- $text = "<!--h-->{$text}";\r
- preg_match_all('#(<[^>]+>)([^<>]*)#', $text, $matches);\r
- $result = '';\r
- $count = count($matches[2]);\r
- \r
- for ( $i = 0; $i < $count; $i++ )\r
- {\r
- if ( $i != 0 )\r
- {\r
- $result .= $matches[1][$i];\r
- }\r
- \r
- if ( is_array($expression) )\r
- {\r
- foreach ( $expression as $regex )\r
- {\r
- $matches[2][$i] = preg_replace("#{$regex}#i", $highlight, $matches[2][$i]);\r
- }\r
- $result .= $matches[2][$i];\r
- }\r
- else\r
- {\r
- $result .= preg_replace("#{$expression}#i", $highlight, $matches[2][$i]);\r
- }\r
- }\r
- return $result;\r
- }\r
- \r
- /**\r
- * Entity::anchor_footnoting()\r
- * change strings with footnoticing generated from anchor elements\r
- * \r
- * @static\r
- * @param String $string strings which includes html elements\r
- * @return String string with footnotes\r
- */\r
- static public function anchor_footnoting($string)\r
- {\r
- /* 1. detect anchor elements */\r
- $anchors = array();\r
- if ( !preg_match_all("#<a[^>]*href=[\"\']([^\"^']*)[\"\'][^>]*>([^<]*)<\/a>#i", $subject, $anchors) )\r
- {\r
- return $string;\r
- }\r
- \r
- /* 2. add footnotes */\r
- $string .= "\n\n";\r
- $count = 1;\r
- foreach ( $anchors as $anchor )\r
- {\r
- preg_replace("#{$anchor[0]}#", "{$anchor[2]} [{$count}] ", $subject);\r
- $subject .= "[{$count}] {$anchor[1]}\n";\r
- $count++;\r
- }\r
- \r
- return strip_tags($ascii);\r
- }\r
- \r
- /*\r
- * NOTE: Obsoleted functions\r
- */\r
- \r
- /**\r
- * Entity::named_to_numeric()\r
- * \r
- * @deprecated\r
- * @param String $string\r
- */\r
- function named_to_numeric ($string)\r
- {\r
- $string = preg_replace('/(&[0-9A-Za-z]+)(;?\=?|([^A-Za-z0-9\;\:\.\-\_]))/e', "Entity::_named('\\1', '\\2') . '\\3'", $string);\r
- return $string;\r
- }\r
- \r
- /**\r
- * Entity::named_to_numeric()\r
- * \r
- * @deprecated\r
- * @param String $string\r
- */\r
- function normalize_numeric ($string) {\r
- $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string);\r
- $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string);\r
- $string = strtr($string, self::$entities['Windows-1252']);\r
- return $string;\r
- }\r
- \r
- /**\r
- * Entity::numeric_to_utf8()\r
- * \r
- * @deprecated\r
- * @param String $string\r
- */\r
- function numeric_to_utf8 ($string) {\r
- $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string);\r
- $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string);\r
- $string = preg_replace('/&#x([0-9A-Fa-f]+);/e', "Entity::_hex_to_utf8('\\1')", $string); \r
- return $string; \r
- }\r
- \r
- /**\r
- * Entity::numeric_to_named()\r
- * convert decimal and hexadecimal numeric character references into named character references\r
- * \r
- * @deprecated\r
- * @param String $string\r
- */\r
- function numeric_to_named ($string)\r
- {\r
- $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+)/e', "'&#'.hexdec('\\1')", $string);\r
- $string = strtr($string, array_flip(self::$entities['named_to_numeric']));\r
- return $string; \r
- }\r
- \r
- /**\r
- * Entity::specialchars()\r
- * convert HTML entities to named character reference\r
- * \r
- * @deprecated\r
- * @param String $string\r
- */\r
- function specialchars ($string, $type = 'xml')\r
- {\r
- $specialchars = array(\r
- '"' => '"',\r
- '&' => '&',\r
- '<' => '<',\r
- '>' => '>'\r
- );\r
- if ( $type != 'xml' )\r
- {\r
- $specialchars["'"] = ''';\r
- }\r
- else\r
- {\r
- $specialchars["'"] = ''';\r
- }\r
- \r
- $string = preg_replace('/&(#?[Xx]?[0-9A-Za-z]+);/', "[[[ENTITY:\\1]]]", $string);\r
- $string = strtr($string, $specialchars);\r
- $string = preg_replace('/\[\[\[ENTITY\:([^\]]+)\]\]\]/', "&\\1;", $string); \r
- return $string;\r
- }\r
- \r
- /**\r
- * Entity::_hex_to_utf8()\r
- * convert decimal numeric character references to hexadecimal numeric character references\r
- * \r
- * @deprecated\r
- * @param String $string\r
- */\r
- function _hex_to_utf8($s)\r
- {\r
- $c = hexdec($s);\r
- \r
- if ( $c < 0x80 )\r
- {\r
- $str = chr($c);\r
- }\r
- else if ( $c < 0x800 )\r
- {\r
- $str = chr(0xC0 | $c>>6) . chr(0x80 | $c & 0x3F);\r
- }\r
- else if ( $c < 0x10000 )\r
- {\r
- $str = chr(0xE0 | $c>>12) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F);\r
- }\r
- else if ( $c < 0x200000 )\r
- {\r
- $str = chr(0xF0 | $c>>18) . chr(0x80 | $c>>12 & 0x3F) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F);\r
- }\r
- return $str;\r
- }\r
- \r
- /**\r
- * Entity::_named()\r
- * convert entities to named character reference\r
- * \r
- * @deprecated\r
- * @param String $string\r
- * @param String $extra\r
- * @return \r
- */\r
- function _named($entity, $extra)\r
- {\r
- if ( $extra == '=' )\r
- {\r
- return $entity . '=';\r
- }\r
- \r
- $length = i18n::strlen($entity);\r
- \r
- while ( $length > 0 )\r
- {\r
- $check = i18n::substr($entity, 0, $length);\r
- if ( array_key_exists($check, self::$entities['named_to_numeric']) )\r
- {\r
- return self::$entities['named_to_numeric'][$check] . ';' . i18n::substr($entity, $length);\r
- }\r
- $length--;\r
- }\r
- \r
- if ( $extra != ';' )\r
- {\r
- return $entity;\r
- }\r
- else\r
- {\r
- return "{$entity};";\r
- }\r
- }\r
- \r
- /**\r
- * ENTITIY::$entities\r
- * \r
- * HTML 4.01 Specification\r
- * @link http://www.w3.org/TR/html4/sgml/entities.html\r
- * @see 24 Character entity references in HTML 4\r
- * \r
- * XHTML™ 1.0 The Extensible HyperText Markup Language (Second Edition)\r
- * A Reformulation of HTML 4 in XML 1.0\r
- * @link http://www.w3.org/TR/xhtml1/\r
- * @see 4.12. Entity references as hex values\r
- * @see C.16. The Named Character Reference '\r
- * \r
- * @static\r
- * @deprecated\r
- */\r
- static private $entities = array (\r
- 'named_to_numeric' => array (\r
- ' ' => ' ',\r
- '¡' => '¡',\r
- '¢' => '¢',\r
- '£' => '£',\r
- '¤' => '¤',\r
- '¥' => '¥',\r
- '¦' => '¦',\r
- '§' => '§',\r
- '¨' => '¨',\r
- '©' => '©',\r
- 'ª' => 'ª',\r
- '«' => '«',\r
- '¬' => '¬',\r
- '­' => '­',\r
- '®' => '®',\r
- '¯' => '¯',\r
- '°' => '°',\r
- '±' => '±',\r
- '²' => '²',\r
- '³' => '³',\r
- '´' => '´',\r
- 'µ' => 'µ',\r
- '¶' => '¶',\r
- '·' => '·',\r
- '¸' => '¸',\r
- '¹' => '¹',\r
- 'º' => 'º',\r
- '»' => '»',\r
- '¼' => '¼',\r
- '½' => '½',\r
- '¾' => '¾',\r
- '¿' => '¿',\r
- 'À' => 'À',\r
- 'Á' => 'Á',\r
- 'Â' => 'Â',\r
- 'Ã' => 'Ã',\r
- 'Ä' => 'Ä',\r
- 'Å' => 'Å',\r
- 'Æ' => 'Æ',\r
- 'Ç' => 'Ç',\r
- 'È' => 'È',\r
- 'É' => 'É',\r
- 'Ê' => 'Ê',\r
- 'Ë' => 'Ë',\r
- 'Ì' => 'Ì',\r
- 'Í' => 'Í',\r
- 'Î' => 'Î',\r
- 'Ï' => 'Ï',\r
- 'Ð' => 'Ð',\r
- 'Ñ' => 'Ñ',\r
- 'Ò' => 'Ò',\r
- 'Ó' => 'Ó',\r
- 'Ô' => 'Ô',\r
- 'Õ' => 'Õ',\r
- 'Ö' => 'Ö',\r
- '×' => '×',\r
- 'Ø' => 'Ø',\r
- 'Ù' => 'Ù',\r
- 'Ú' => 'Ú',\r
- 'Û' => 'Û',\r
- 'Ü' => 'Ü',\r
- 'Ý' => 'Ý',\r
- 'Þ' => 'Þ',\r
- 'ß' => 'ß',\r
- 'à' => 'à',\r
- 'á' => 'á',\r
- 'â' => 'â',\r
- 'ã' => 'ã',\r
- 'ä' => 'ä',\r
- 'å' => 'å',\r
- 'æ' => 'æ',\r
- 'ç' => 'ç',\r
- 'è' => 'è',\r
- 'é' => 'é',\r
- 'ê' => 'ê',\r
- 'ë' => 'ë',\r
- 'ì' => 'ì',\r
- 'í' => 'í',\r
- 'î' => 'î',\r
- 'ï' => 'ï',\r
- 'ð' => 'ð',\r
- 'ñ' => 'ñ',\r
- 'ò' => 'ò',\r
- 'ó' => 'ó',\r
- 'ô' => 'ô',\r
- 'õ' => 'õ',\r
- 'ö' => 'ö',\r
- '÷' => '÷',\r
- 'ø' => 'ø',\r
- 'ù' => 'ù',\r
- 'ú' => 'ú',\r
- 'û' => 'û',\r
- 'ü' => 'ü',\r
- 'ý' => 'ý',\r
- 'þ' => 'þ',\r
- 'ÿ' => 'ÿ',\r
- '&OElig' => 'Œ',\r
- '&oelig' => 'å',\r
- '&Scaron' => 'Š',\r
- '&scaron' => 'š',\r
- '&Yuml' => 'Ÿ',\r
- '&circ' => 'ˆ',\r
- '&tilde' => '˜',\r
- '&esnp' => ' ',\r
- '&emsp' => ' ',\r
- '&thinsp' => ' ',\r
- '&zwnj' => '‌',\r
- '&zwj' => '‍',\r
- '&lrm' => '‎',\r
- '&rlm' => '‏',\r
- '&ndash' => '–',\r
- '&mdash' => '—',\r
- '&lsquo' => '‘',\r
- '&rsquo' => '’',\r
- '&sbquo' => '‚',\r
- '&ldquo' => '“',\r
- '&rdquo' => '”',\r
- '&bdquo' => '„',\r
- '&dagger' => '†',\r
- '&Dagger' => '‡',\r
- '&permil' => '‰',\r
- '&lsaquo' => '‹',\r
- '&rsaquo' => '›',\r
- '&euro' => '€',\r
- '&fnof' => 'ƒ',\r
- '&Alpha' => 'Α',\r
- '&Beta' => 'Β',\r
- '&Gamma' => 'Γ',\r
- '&Delta' => 'Δ',\r
- '&Epsilon' => 'Ε',\r
- '&Zeta' => 'Ζ',\r
- '&Eta' => 'Η',\r
- '&Theta' => 'Θ',\r
- '&Iota' => 'Ι',\r
- '&Kappa' => 'Κ',\r
- '&Lambda' => 'Λ',\r
- '&Mu' => 'Μ',\r
- '&Nu' => 'Ν',\r
- '&Xi' => 'Ξ',\r
- '&Omicron' => 'Ο',\r
- '&Pi' => 'Π',\r
- '&Rho' => 'Ρ',\r
- '&Sigma' => 'Σ',\r
- '&Tau' => 'Τ',\r
- '&Upsilon' => 'Υ',\r
- '&Phi' => 'Φ',\r
- '&Chi' => 'Χ',\r
- '&Psi' => 'Ψ',\r
- '&Omega' => 'Ω',\r
- '&alpha' => 'α',\r
- '&beta' => 'β',\r
- '&gamma' => 'γ',\r
- '&delta' => 'δ',\r
- '&epsilon' => 'ε',\r
- '&zeta' => 'ζ',\r
- '&eta' => 'η',\r
- '&theta' => 'θ',\r
- '&iota' => 'ι',\r
- '&kappa' => 'κ',\r
- '&lambda' => 'λ',\r
- '&mu' => 'μ',\r
- '&nu' => 'ν',\r
- '&xi' => 'ξ',\r
- '&omicron' => 'ο',\r
- '&pi' => 'π',\r
- '&rho' => 'ρ',\r
- '&sigmaf' => 'ς',\r
- '&sigma' => 'σ',\r
- '&tau' => 'τ',\r
- '&upsilon' => 'υ',\r
- '&phi' => 'φ',\r
- '&chi' => 'χ',\r
- '&psi' => 'ψ',\r
- '&omega' => 'ω',\r
- '&thetasym' => 'ϑ',\r
- '&upsih' => 'ϒ',\r
- '&piv' => 'ϖ',\r
- '&bull' => '•',\r
- '&hellip' => '…',\r
- '&prime' => '′',\r
- '&Prime' => '″',\r
- '&oline' => '‾',\r
- '&frasl' => '⁄',\r
- '&weierp' => '℘',\r
- '&image' => 'ℑ',\r
- '&real' => 'ℜ',\r
- '&trade' => 'ℒ',\r
- '&alefsym' => 'ℵ',\r
- '&larr' => '←',\r
- '&uarr' => '↑',\r
- '&rarr' => '→',\r
- '&darr' => '↓',\r
- '&harr' => '↔',\r
- '&crarr' => '↵',\r
- '&lArr' => '⇐',\r
- '&uArr' => '⇑',\r
- '&rArr' => '⇒',\r
- '&dArr' => '⇓',\r
- '&hArr' => '⇔',\r
- '&forall' => '∀',\r
- '&part' => '∂',\r
- '&exist' => '∃',\r
- '&empty' => '∅',\r
- '&nabla' => '∇',\r
- '&isin' => '∈',\r
- '¬in' => '∉',\r
- '&ni' => '∋',\r
- '&prod' => '∏',\r
- '&sum' => '∑',\r
- '&minus' => '−',\r
- '&lowast' => '∗',\r
- '&radic' => '√',\r
- '&prop' => '∝',\r
- '&infin' => '∞',\r
- '&ang' => '∠',\r
- '&and' => '∧',\r
- '&or' => '∨',\r
- '&cap' => '∩',\r
- '&cup' => '∪',\r
- '&int' => '∫',\r
- '&there4' => '∴',\r
- '&sim' => '∼',\r
- '&cong' => '≅',\r
- '&asymp' => '≈',\r
- '&ne' => '≠',\r
- '&equiv' => '≡',\r
- '&le' => '≤',\r
- '&ge' => '≥',\r
- '&sub' => '⊂',\r
- '&sup' => '⊃',\r
- '&nsub' => '⊄',\r
- '&sube' => '⊆',\r
- '&supe' => '⊇',\r
- '&oplus' => '⊕',\r
- '&otimes' => '⊖',\r
- '&perp' => '⊥',\r
- '&sdot' => '⋅',\r
- '&lceil' => '⍨',\r
- '&rceil' => '⌉',\r
- '&lfloor' => '⌊',\r
- '&rfloor' => '⌋',\r
- '&lang' => '〈',\r
- '&rang' => '⌰',\r
- '&loz' => '◊',\r
- '&spades' => '♠',\r
- '&clubs' => '♣',\r
- '&hearts' => '♥',\r
- '&diams' => '♦'\r
- ),\r
- 'Windows-1252' => array(\r
- '€' => '€',\r
- '‚' => '‚',\r
- 'ƒ' => 'ƒ',\r
- '„' => '„',\r
- '…' => '…',\r
- '†' => '†',\r
- '‡' => '‡',\r
- 'ˆ' => 'ˆ',\r
- '‰' => '‰',\r
- 'Š' => 'Š',\r
- '‹' => '‹',\r
- 'Œ' => 'Œ',\r
- 'Ž' => 'Ž',\r
- '‘' => '‘',\r
- '’' => '’',\r
- '“' => '“',\r
- '”' => '”',\r
- '•' => '•',\r
- '–' => '–',\r
- '—' => '—',\r
- '˜' => '˜',\r
- '™' => '™',\r
- 'š' => 'š',\r
- '›' => '›',\r
- 'œ' => 'œ',\r
- 'ž' => 'ž',\r
- 'Ÿ' => 'Ÿ',\r
- )\r
- );\r
-}\r
+<?php
+
+class Entity
+{
+ /**
+ * Entity::hen
+ * htmlentities wrapper
+ *
+ * @static
+ * @access public
+ * @param string $string target string
+ * @param string $quotation quotation mode. please refer to the argument of PHP built-in htmlentities
+ * @return string escaped string
+ */
+ static public function hen($string, $quotation=ENT_QUOTES)
+ {
+ /*
+ * we can use 'double_encode' flag instead of this when dropping supports for PHP 5.2.2 or lower
+ */
+ $string = html_entity_decode($string, $quotation, i18n::get_current_charset());
+ return (string) htmlentities($string, $quotation, i18n::get_current_charset());
+ }
+
+ /**
+ * Entity::hsc
+ * htmlspecialchars wrapper
+ *
+ * NOTE: htmlspecialchars_decode() is ASCII-to-ACII conversion
+ * and its target string consists of several letters.
+ * There are no problems.
+ *
+ * @static
+ * @access public
+ * @param string $string target string
+ * @param string $quotation quotation mode. please refer to the argument of PHP built-in htmlspecialchars
+ * @return string escaped string
+ *
+ */
+ static public function hsc($string, $quotation=ENT_QUOTES)
+ {
+ /*
+ * we can use 'double_encode' flag instead of this when dropping supports for PHP 5.2.2 or lower
+ */
+ $string = htmlspecialchars_decode($string, $quotation);
+ return (string) htmlspecialchars($string, $quotation, i18n::get_current_charset());
+ }
+
+ /**
+ * Entity::strip_tags()
+ * Strip HTML tags from a string
+ *
+ * This function is a bit more intelligent than a regular call to strip_tags(),
+ * because it also deletes the contents of certain tags and cleans up any
+ * unneeded whitespace.
+ *
+ * @static
+ * @param String $string target string
+ * @return String string with stripped tags
+ */
+ static public function strip_tags($string)
+ {
+ $string = preg_replace("#<del[^>]*>.+<\/del[^>]*>#isU", '', $string);
+ $string = preg_replace("#<script[^>]*>.+<\/script[^>]*>#isU", '', $string);
+ $string = preg_replace("#<style[^>]*>.+<\/style[^>]*>#isU", '', $string);
+ $string = preg_replace('#>#', '> ', $string);
+ $string = preg_replace('#<#', ' <', $string);
+ $string = strip_tags($string);
+ $string = preg_replace("#\s+#", " ", $string);
+ $string = trim($string);
+ return $string;
+ }
+
+ /**
+ * shortens a text string to maxlength.
+ * $suffix is what needs to be added at the end (end length is <= $maxlength)
+ *
+ * The purpose is to limit the width of string for rendered screen in web browser.
+ * So it depends on style sheet, browser's rendering scheme, client's system font.
+ *
+ * NOTE: In general, non-Latin font such as Japanese, Chinese, Cyrillic have two times as width as Latin fonts,
+ * but this is not always correct, for example, rendered by proportional font.
+ *
+ * @static
+ * @param string $escaped_string target string
+ * @param integer $maxlength maximum length of return string which includes suffix
+ * @param string $suffix added in the end of shortened-string
+ * @return string
+ */
+ static public function shorten($string, $maxlength, $suffix)
+ {
+ static $flag;
+
+ $decoded_entities_pcre = array();
+ $encoded_entities = array();
+
+ /* 1. store html entities */
+ preg_match('#&[^&]+?;#', $string, $encoded_entities);
+ if ( !$encoded_entities )
+ {
+ $flag = FALSE;
+ }
+ else
+ {
+ $flag = TRUE;
+ }
+ if ( $flag )
+ {
+ foreach ( $encoded_entities as $encoded_entity )
+ {
+ $decoded_entities_pcre[] = '#' . html_entity_decode($encoded_entity, ENT_QUOTES, i18n::get_current_charset()) . '#';
+ }
+ }
+
+ /* 2. decode string */
+ $string = html_entity_decode($string, ENT_QUOTES, i18n::get_current_charset());
+
+ /* 3. shorten string and add suffix if string length is longer */
+ if ( i18n::strlen($string) > $maxlength - i18n::strlen($suffix) )
+ {
+ $string = i18n::substr($string, 0, $maxlength - i18n::strlen($suffix) );
+ $string .= $suffix;
+ }
+
+ /* 4. recover entities */
+ if ( $flag )
+ {
+ $string = preg_replace($decoded_entities_pcre, $encoded_entities, $string);
+ }
+
+ return $string;
+ }
+
+ /**
+ * Entity::highlight()
+ * highlights a specific query in a given HTML text (not within HTML tags)
+ *
+ * @static
+ * @param string $text text to be highlighted
+ * @param string $expression regular expression to be matched (can be an array of expressions as well)
+ * @param string $highlight highlight to be used (use \\0 to indicate the matched expression)
+ * @return string
+ */
+ static public function highlight($text, $expression, $highlight)
+ {
+ if ( !$highlight || !$expression )
+ {
+ return $text;
+ }
+
+ if ( is_array($expression) && (count($expression) == 0) )
+ {
+ return $text;
+ }
+
+ $text = "<!--h-->{$text}";
+ preg_match_all('#(<[^>]+>)([^<>]*)#', $text, $matches);
+ $result = '';
+ $count = count($matches[2]);
+
+ for ( $i = 0; $i < $count; $i++ )
+ {
+ if ( $i != 0 )
+ {
+ $result .= $matches[1][$i];
+ }
+
+ if ( is_array($expression) )
+ {
+ foreach ( $expression as $regex )
+ {
+ $matches[2][$i] = preg_replace("#{$regex}#i", $highlight, $matches[2][$i]);
+ }
+ $result .= $matches[2][$i];
+ }
+ else
+ {
+ $result .= preg_replace("#{$expression}#i", $highlight, $matches[2][$i]);
+ }
+ }
+ return $result;
+ }
+
+ /**
+ * Entity::anchor_footnoting()
+ * change strings with footnoticing generated from anchor elements
+ *
+ * @static
+ * @param String $string strings which includes html elements
+ * @return String string with footnotes
+ */
+ static public function anchor_footnoting($string)
+ {
+ /* 1. detect anchor elements */
+ $anchors = array();
+ if ( !preg_match_all("#<a[^>]*href=[\"\']([^\"^']*)[\"\'][^>]*>([^<]*)<\/a>#i", $subject, $anchors) )
+ {
+ return $string;
+ }
+
+ /* 2. add footnotes */
+ $string .= "\n\n";
+ $count = 1;
+ foreach ( $anchors as $anchor )
+ {
+ preg_replace("#{$anchor[0]}#", "{$anchor[2]} [{$count}] ", $subject);
+ $subject .= "[{$count}] {$anchor[1]}\n";
+ $count++;
+ }
+
+ return strip_tags($ascii);
+ }
+
+ /*
+ * NOTE: Obsoleted functions
+ */
+
+ /**
+ * Entity::named_to_numeric()
+ *
+ * @deprecated
+ * @param String $string
+ */
+ function named_to_numeric ($string)
+ {
+ $string = preg_replace('/(&[0-9A-Za-z]+)(;?\=?|([^A-Za-z0-9\;\:\.\-\_]))/e', "Entity::_named('\\1', '\\2') . '\\3'", $string);
+ return $string;
+ }
+
+ /**
+ * Entity::named_to_numeric()
+ *
+ * @deprecated
+ * @param String $string
+ */
+ function normalize_numeric ($string) {
+ $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string);
+ $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string);
+ $string = strtr($string, self::$entities['Windows-1252']);
+ return $string;
+ }
+
+ /**
+ * Entity::numeric_to_utf8()
+ *
+ * @deprecated
+ * @param String $string
+ */
+ function numeric_to_utf8 ($string) {
+ $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string);
+ $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string);
+ $string = preg_replace('/&#x([0-9A-Fa-f]+);/e', "Entity::_hex_to_utf8('\\1')", $string);
+ return $string;
+ }
+
+ /**
+ * Entity::numeric_to_named()
+ * convert decimal and hexadecimal numeric character references into named character references
+ *
+ * @deprecated
+ * @param String $string
+ */
+ function numeric_to_named ($string)
+ {
+ $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+)/e', "'&#'.hexdec('\\1')", $string);
+ $string = strtr($string, array_flip(self::$entities['named_to_numeric']));
+ return $string;
+ }
+
+ /**
+ * Entity::specialchars()
+ * convert HTML entities to named character reference
+ *
+ * @deprecated
+ * @param String $string
+ */
+ function specialchars ($string, $type = 'xml')
+ {
+ $specialchars = array(
+ '"' => '"',
+ '&' => '&',
+ '<' => '<',
+ '>' => '>'
+ );
+ if ( $type != 'xml' )
+ {
+ $specialchars["'"] = ''';
+ }
+ else
+ {
+ $specialchars["'"] = ''';
+ }
+
+ $string = preg_replace('/&(#?[Xx]?[0-9A-Za-z]+);/', "[[[ENTITY:\\1]]]", $string);
+ $string = strtr($string, $specialchars);
+ $string = preg_replace('/\[\[\[ENTITY\:([^\]]+)\]\]\]/', "&\\1;", $string);
+ return $string;
+ }
+
+ /**
+ * Entity::_hex_to_utf8()
+ * convert decimal numeric character references to hexadecimal numeric character references
+ *
+ * @deprecated
+ * @param String $string
+ */
+ function _hex_to_utf8($s)
+ {
+ $c = hexdec($s);
+
+ if ( $c < 0x80 )
+ {
+ $str = chr($c);
+ }
+ else if ( $c < 0x800 )
+ {
+ $str = chr(0xC0 | $c>>6) . chr(0x80 | $c & 0x3F);
+ }
+ else if ( $c < 0x10000 )
+ {
+ $str = chr(0xE0 | $c>>12) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F);
+ }
+ else if ( $c < 0x200000 )
+ {
+ $str = chr(0xF0 | $c>>18) . chr(0x80 | $c>>12 & 0x3F) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F);
+ }
+ return $str;
+ }
+
+ /**
+ * Entity::_named()
+ * convert entities to named character reference
+ *
+ * @deprecated
+ * @param String $string
+ * @param String $extra
+ * @return
+ */
+ function _named($entity, $extra)
+ {
+ if ( $extra == '=' )
+ {
+ return $entity . '=';
+ }
+
+ $length = i18n::strlen($entity);
+
+ while ( $length > 0 )
+ {
+ $check = i18n::substr($entity, 0, $length);
+ if ( array_key_exists($check, self::$entities['named_to_numeric']) )
+ {
+ return self::$entities['named_to_numeric'][$check] . ';' . i18n::substr($entity, $length);
+ }
+ $length--;
+ }
+
+ if ( $extra != ';' )
+ {
+ return $entity;
+ }
+ else
+ {
+ return "{$entity};";
+ }
+ }
+
+ /**
+ * ENTITIY::$entities
+ *
+ * HTML 4.01 Specification
+ * @link http://www.w3.org/TR/html4/sgml/entities.html
+ * @see 24 Character entity references in HTML 4
+ *
+ * XHTML™ 1.0 The Extensible HyperText Markup Language (Second Edition)
+ * A Reformulation of HTML 4 in XML 1.0
+ * @link http://www.w3.org/TR/xhtml1/
+ * @see 4.12. Entity references as hex values
+ * @see C.16. The Named Character Reference '
+ *
+ * @static
+ * @deprecated
+ */
+ static private $entities = array (
+ 'named_to_numeric' => array (
+ ' ' => ' ',
+ '¡' => '¡',
+ '¢' => '¢',
+ '£' => '£',
+ '¤' => '¤',
+ '¥' => '¥',
+ '¦' => '¦',
+ '§' => '§',
+ '¨' => '¨',
+ '©' => '©',
+ 'ª' => 'ª',
+ '«' => '«',
+ '¬' => '¬',
+ '­' => '­',
+ '®' => '®',
+ '¯' => '¯',
+ '°' => '°',
+ '±' => '±',
+ '²' => '²',
+ '³' => '³',
+ '´' => '´',
+ 'µ' => 'µ',
+ '¶' => '¶',
+ '·' => '·',
+ '¸' => '¸',
+ '¹' => '¹',
+ 'º' => 'º',
+ '»' => '»',
+ '¼' => '¼',
+ '½' => '½',
+ '¾' => '¾',
+ '¿' => '¿',
+ 'À' => 'À',
+ 'Á' => 'Á',
+ 'Â' => 'Â',
+ 'Ã' => 'Ã',
+ 'Ä' => 'Ä',
+ 'Å' => 'Å',
+ 'Æ' => 'Æ',
+ 'Ç' => 'Ç',
+ 'È' => 'È',
+ 'É' => 'É',
+ 'Ê' => 'Ê',
+ 'Ë' => 'Ë',
+ 'Ì' => 'Ì',
+ 'Í' => 'Í',
+ 'Î' => 'Î',
+ 'Ï' => 'Ï',
+ 'Ð' => 'Ð',
+ 'Ñ' => 'Ñ',
+ 'Ò' => 'Ò',
+ 'Ó' => 'Ó',
+ 'Ô' => 'Ô',
+ 'Õ' => 'Õ',
+ 'Ö' => 'Ö',
+ '×' => '×',
+ 'Ø' => 'Ø',
+ 'Ù' => 'Ù',
+ 'Ú' => 'Ú',
+ 'Û' => 'Û',
+ 'Ü' => 'Ü',
+ 'Ý' => 'Ý',
+ 'Þ' => 'Þ',
+ 'ß' => 'ß',
+ 'à' => 'à',
+ 'á' => 'á',
+ 'â' => 'â',
+ 'ã' => 'ã',
+ 'ä' => 'ä',
+ 'å' => 'å',
+ 'æ' => 'æ',
+ 'ç' => 'ç',
+ 'è' => 'è',
+ 'é' => 'é',
+ 'ê' => 'ê',
+ 'ë' => 'ë',
+ 'ì' => 'ì',
+ 'í' => 'í',
+ 'î' => 'î',
+ 'ï' => 'ï',
+ 'ð' => 'ð',
+ 'ñ' => 'ñ',
+ 'ò' => 'ò',
+ 'ó' => 'ó',
+ 'ô' => 'ô',
+ 'õ' => 'õ',
+ 'ö' => 'ö',
+ '÷' => '÷',
+ 'ø' => 'ø',
+ 'ù' => 'ù',
+ 'ú' => 'ú',
+ 'û' => 'û',
+ 'ü' => 'ü',
+ 'ý' => 'ý',
+ 'þ' => 'þ',
+ 'ÿ' => 'ÿ',
+ '&OElig' => 'Œ',
+ '&oelig' => 'å',
+ '&Scaron' => 'Š',
+ '&scaron' => 'š',
+ '&Yuml' => 'Ÿ',
+ '&circ' => 'ˆ',
+ '&tilde' => '˜',
+ '&esnp' => ' ',
+ '&emsp' => ' ',
+ '&thinsp' => ' ',
+ '&zwnj' => '‌',
+ '&zwj' => '‍',
+ '&lrm' => '‎',
+ '&rlm' => '‏',
+ '&ndash' => '–',
+ '&mdash' => '—',
+ '&lsquo' => '‘',
+ '&rsquo' => '’',
+ '&sbquo' => '‚',
+ '&ldquo' => '“',
+ '&rdquo' => '”',
+ '&bdquo' => '„',
+ '&dagger' => '†',
+ '&Dagger' => '‡',
+ '&permil' => '‰',
+ '&lsaquo' => '‹',
+ '&rsaquo' => '›',
+ '&euro' => '€',
+ '&fnof' => 'ƒ',
+ '&Alpha' => 'Α',
+ '&Beta' => 'Β',
+ '&Gamma' => 'Γ',
+ '&Delta' => 'Δ',
+ '&Epsilon' => 'Ε',
+ '&Zeta' => 'Ζ',
+ '&Eta' => 'Η',
+ '&Theta' => 'Θ',
+ '&Iota' => 'Ι',
+ '&Kappa' => 'Κ',
+ '&Lambda' => 'Λ',
+ '&Mu' => 'Μ',
+ '&Nu' => 'Ν',
+ '&Xi' => 'Ξ',
+ '&Omicron' => 'Ο',
+ '&Pi' => 'Π',
+ '&Rho' => 'Ρ',
+ '&Sigma' => 'Σ',
+ '&Tau' => 'Τ',
+ '&Upsilon' => 'Υ',
+ '&Phi' => 'Φ',
+ '&Chi' => 'Χ',
+ '&Psi' => 'Ψ',
+ '&Omega' => 'Ω',
+ '&alpha' => 'α',
+ '&beta' => 'β',
+ '&gamma' => 'γ',
+ '&delta' => 'δ',
+ '&epsilon' => 'ε',
+ '&zeta' => 'ζ',
+ '&eta' => 'η',
+ '&theta' => 'θ',
+ '&iota' => 'ι',
+ '&kappa' => 'κ',
+ '&lambda' => 'λ',
+ '&mu' => 'μ',
+ '&nu' => 'ν',
+ '&xi' => 'ξ',
+ '&omicron' => 'ο',
+ '&pi' => 'π',
+ '&rho' => 'ρ',
+ '&sigmaf' => 'ς',
+ '&sigma' => 'σ',
+ '&tau' => 'τ',
+ '&upsilon' => 'υ',
+ '&phi' => 'φ',
+ '&chi' => 'χ',
+ '&psi' => 'ψ',
+ '&omega' => 'ω',
+ '&thetasym' => 'ϑ',
+ '&upsih' => 'ϒ',
+ '&piv' => 'ϖ',
+ '&bull' => '•',
+ '&hellip' => '…',
+ '&prime' => '′',
+ '&Prime' => '″',
+ '&oline' => '‾',
+ '&frasl' => '⁄',
+ '&weierp' => '℘',
+ '&image' => 'ℑ',
+ '&real' => 'ℜ',
+ '&trade' => 'ℒ',
+ '&alefsym' => 'ℵ',
+ '&larr' => '←',
+ '&uarr' => '↑',
+ '&rarr' => '→',
+ '&darr' => '↓',
+ '&harr' => '↔',
+ '&crarr' => '↵',
+ '&lArr' => '⇐',
+ '&uArr' => '⇑',
+ '&rArr' => '⇒',
+ '&dArr' => '⇓',
+ '&hArr' => '⇔',
+ '&forall' => '∀',
+ '&part' => '∂',
+ '&exist' => '∃',
+ '&empty' => '∅',
+ '&nabla' => '∇',
+ '&isin' => '∈',
+ '¬in' => '∉',
+ '&ni' => '∋',
+ '&prod' => '∏',
+ '&sum' => '∑',
+ '&minus' => '−',
+ '&lowast' => '∗',
+ '&radic' => '√',
+ '&prop' => '∝',
+ '&infin' => '∞',
+ '&ang' => '∠',
+ '&and' => '∧',
+ '&or' => '∨',
+ '&cap' => '∩',
+ '&cup' => '∪',
+ '&int' => '∫',
+ '&there4' => '∴',
+ '&sim' => '∼',
+ '&cong' => '≅',
+ '&asymp' => '≈',
+ '&ne' => '≠',
+ '&equiv' => '≡',
+ '&le' => '≤',
+ '&ge' => '≥',
+ '&sub' => '⊂',
+ '&sup' => '⊃',
+ '&nsub' => '⊄',
+ '&sube' => '⊆',
+ '&supe' => '⊇',
+ '&oplus' => '⊕',
+ '&otimes' => '⊖',
+ '&perp' => '⊥',
+ '&sdot' => '⋅',
+ '&lceil' => '⍨',
+ '&rceil' => '⌉',
+ '&lfloor' => '⌊',
+ '&rfloor' => '⌋',
+ '&lang' => '〈',
+ '&rang' => '⌰',
+ '&loz' => '◊',
+ '&spades' => '♠',
+ '&clubs' => '♣',
+ '&hearts' => '♥',
+ '&diams' => '♦'
+ ),
+ 'Windows-1252' => array(
+ '€' => '€',
+ '‚' => '‚',
+ 'ƒ' => 'ƒ',
+ '„' => '„',
+ '…' => '…',
+ '†' => '†',
+ '‡' => '‡',
+ 'ˆ' => 'ˆ',
+ '‰' => '‰',
+ 'Š' => 'Š',
+ '‹' => '‹',
+ 'Œ' => 'Œ',
+ 'Ž' => 'Ž',
+ '‘' => '‘',
+ '’' => '’',
+ '“' => '“',
+ '”' => '”',
+ '•' => '•',
+ '–' => '–',
+ '—' => '—',
+ '˜' => '˜',
+ '™' => '™',
+ 'š' => 'š',
+ '›' => '›',
+ 'œ' => 'œ',
+ 'ž' => 'ž',
+ 'Ÿ' => 'Ÿ',
+ )
+ );
+}