-<?php
-
-class ENTITY
-{
- /**
- * ENTITY::hen
- * htmlentities wrapper
- *
- * @static
- * @access public
- * @param string $string target string
- * @param string $quotation quotation mode. please refer to the argument of PHP built-in htmlentities
- * @return string escaped string
- */
- static public function hen($string, $quotation=ENT_QUOTES)
- {
- /*
- * we can use 'double_encode' flag instead of this when dropping supports for PHP 5.2.2 or lower
- */
- $string = html_entity_decode($string, $quotation, i18n::get_current_charset());
- return (string) htmlentities($string, $quotation, i18n::get_current_charset());
- }
-
- /**
- * ENTITY::hsc
- * htmlspecialchars wrapper
- *
- * NOTE: htmlspecialchars_decode() is ASCII-to-ACII conversion
- * and its target string consists of several letters.
- * There are no problems.
- *
- * @static
- * @access public
- * @param string $string target string
- * @param string $quotation quotation mode. please refer to the argument of PHP built-in htmlspecialchars
- * @return string escaped string
- *
- */
- static public function hsc($string, $quotation=ENT_QUOTES)
- {
- /*
- * we can use 'double_encode' flag instead of this when dropping supports for PHP 5.2.2 or lower
- */
- $string = htmlspecialchars_decode($string, $quotation);
- return (string) htmlspecialchars($string, $quotation, i18n::get_current_charset());
- }
-
- /**
- * ENTITY::strip_tags()
- * Strip HTML tags from a string
- *
- * This function is a bit more intelligent than a regular call to strip_tags(),
- * because it also deletes the contents of certain tags and cleans up any
- * unneeded whitespace.
- *
- * @static
- * @param String $string target string
- * @return String string with stripped tags
- */
- static public function strip_tags($string)
- {
- $string = preg_replace("#<del[^>]*>.+<\/del[^>]*>#isU", '', $string);
- $string = preg_replace("#<script[^>]*>.+<\/script[^>]*>#isU", '', $string);
- $string = preg_replace("#<style[^>]*>.+<\/style[^>]*>#isU", '', $string);
- $string = preg_replace('#>#', '> ', $string);
- $string = preg_replace('#<#', ' <', $string);
- $string = strip_tags($string);
- $string = preg_replace("#\s+#", " ", $string);
- $string = trim($string);
- return $string;
- }
-
- /**
- * shortens a text string to maxlength.
- * $suffix is what needs to be added at the end (end length is <= $maxlength)
- *
- * The purpose is to limit the width of string for rendered screen in web browser.
- * So it depends on style sheet, browser's rendering scheme, client's system font.
- *
- * NOTE: In general, non-Latin font such as Japanese, Chinese, Cyrillic have two times as width as Latin fonts,
- * but this is not always correct, for example, rendered by proportional font.
- *
- * @static
- * @param string $escaped_string target string
- * @param integer $maxlength maximum length of return string which includes suffix
- * @param string $suffix added in the end of shortened-string
- * @return string
- */
- static public function shorten($string, $maxlength, $suffix)
- {
- static $flag;
-
- $decoded_entities_pcre = array();
- $encoded_entities = array();
-
- /* 1. store html entities */
- preg_match('#&[^&]+?;#', $string, $encoded_entities);
- if ( !$encoded_entities )
- {
- $flag = FALSE;
- }
- else
- {
- $flag = TRUE;
- }
- if ( $flag )
- {
- foreach ( $encoded_entities as $encoded_entity )
- {
- $decoded_entities_pcre[] = '#' . html_entity_decode($encoded_entity, ENT_QUOTES, i18n::get_current_charset()) . '#';
- }
- }
-
- /* 2. decode string */
- $string = html_entity_decode($string, ENT_QUOTES, i18n::get_current_charset());
-
- /* 3. shorten string and add suffix if string length is longer */
- if ( i18n::strlen($string) > $maxlength - i18n::strlen($suffix) )
- {
- $string = i18n::substr($string, 0, $maxlength - i18n::strlen($suffix) );
- $string .= $suffix;
- }
-
- /* 4. recover entities */
- if ( $flag )
- {
- $string = preg_replace($decoded_entities_pcre, $encoded_entities, $string);
- }
-
- return $string;
- }
-
- /**
- * ENTITY::highlight()
- * highlights a specific query in a given HTML text (not within HTML tags)
- *
- * @static
- * @param string $text text to be highlighted
- * @param string $expression regular expression to be matched (can be an array of expressions as well)
- * @param string $highlight highlight to be used (use \\0 to indicate the matched expression)
- * @return string
- */
- static public function highlight($text, $expression, $highlight)
- {
- if ( !$highlight || !$expression )
- {
- return $text;
- }
-
- if ( is_array($expression) && (count($expression) == 0) )
- {
- return $text;
- }
-
- $text = "<!--h-->{$text}";
- preg_match_all('#(<[^>]+>)([^<>]*)#', $text, $matches);
- $result = '';
- $count = count($matches[2]);
-
- for ( $i = 0; $i < $count; $i++ )
- {
- if ( $i != 0 )
- {
- $result .= $matches[1][$i];
- }
-
- if ( is_array($expression) )
- {
- foreach ( $expression as $regex )
- {
- $matches[2][$i] = preg_replace("#{$regex}#i", $highlight, $matches[2][$i]);
- }
- $result .= $matches[2][$i];
- }
- else
- {
- $result .= preg_replace("#{$expression}#i", $highlight, $matches[2][$i]);
- }
- }
- return $result;
- }
-
- /**
- * ENTITY::anchor_footnoting()
- * change strings with footnoticing generated from anchor elements
- *
- * @static
- * @param String $string strings which includes html elements
- * @return String string with footnotes
- */
- static public function anchor_footnoting($string)
- {
- /* 1. detect anchor elements */
- $anchors = array();
- if ( !preg_match_all("#<a[^>]*href=[\"\']([^\"^']*)[\"\'][^>]*>([^<]*)<\/a>#i", $subject, $anchors) )
- {
- return $string;
- }
-
- /* 2. add footnotes */
- $string .= "\n\n";
- $count = 1;
- foreach ( $anchors as $anchor )
- {
- preg_replace("#{$anchor[0]}#", "{$anchor[2]} [{$count}] ", $subject);
- $subject .= "[{$count}] {$anchor[1]}\n";
- $count++;
- }
-
- return strip_tags($ascii);
- }
-
- /*
- * NOTE: Obsoleted functions
- */
-
- /**
- * ENTITY::named_to_numeric()
- *
- * @deprecated
- * @param String $string
- */
- function named_to_numeric ($string)
- {
- $string = preg_replace('/(&[0-9A-Za-z]+)(;?\=?|([^A-Za-z0-9\;\:\.\-\_]))/e', "entity::_named('\\1', '\\2') . '\\3'", $string);
- return $string;
- }
-
- /**
- * ENTITY::named_to_numeric()
- *
- * @deprecated
- * @param String $string
- */
- function normalize_numeric ($string) {
- $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string);
- $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string);
- $string = strtr($string, self::$entities['Windows-1252']);
- return $string;
- }
-
- /**
- * ENTITY::numeric_to_utf8()
- *
- * @deprecated
- * @param String $string
- */
- function numeric_to_utf8 ($string) {
- $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string);
- $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string);
- $string = preg_replace('/&#x([0-9A-Fa-f]+);/e', "entity::_hex_to_utf8('\\1')", $string);
- return $string;
- }
-
- /**
- * ENTITY::numeric_to_named()
- * convert decimal and hexadecimal numeric character references into named character references
- *
- * @deprecated
- * @param String $string
- */
- function numeric_to_named ($string)
- {
- $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+)/e', "'&#'.hexdec('\\1')", $string);
- $string = strtr($string, array_flip(self::$entities['named_to_numeric']));
- return $string;
- }
-
- /**
- * ENTITY::specialchars()
- * convert HTML entities to named character reference
- *
- * @deprecated
- * @param String $string
- */
- function specialchars ($string, $type = 'xml')
- {
- $specialchars = array(
- '"' => '"',
- '&' => '&',
- '<' => '<',
- '>' => '>'
- );
- if ( $type != 'xml' )
- {
- $specialchars["'"] = ''';
- }
- else
- {
- $specialchars["'"] = ''';
- }
-
- $string = preg_replace('/&(#?[Xx]?[0-9A-Za-z]+);/', "[[[ENTITY:\\1]]]", $string);
- $string = strtr($string, $specialchars);
- $string = preg_replace('/\[\[\[ENTITY\:([^\]]+)\]\]\]/', "&\\1;", $string);
- return $string;
- }
-
- /**
- * ENTITY::_hex_to_utf8()
- * convert decimal numeric character references to hexadecimal numeric character references
- *
- * @deprecated
- * @param String $string
- */
- function _hex_to_utf8($s)
- {
- $c = hexdec($s);
-
- if ( $c < 0x80 )
- {
- $str = chr($c);
- }
- else if ( $c < 0x800 )
- {
- $str = chr(0xC0 | $c>>6) . chr(0x80 | $c & 0x3F);
- }
- else if ( $c < 0x10000 )
- {
- $str = chr(0xE0 | $c>>12) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F);
- }
- else if ( $c < 0x200000 )
- {
- $str = chr(0xF0 | $c>>18) . chr(0x80 | $c>>12 & 0x3F) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F);
- }
- return $str;
- }
-
- /**
- * ENTITY::_named()
- * convert entities to named character reference
- *
- * @deprecated
- * @param String $string
- * @param String $extra
- * @return
- */
- function _named($entity, $extra)
- {
- if ( $extra == '=' )
- {
- return $entity . '=';
- }
-
- $length = i18n::strlen($entity);
-
- while ( $length > 0 )
- {
- $check = i18n::substr($entity, 0, $length);
- if ( array_key_exists($check, self::$entities['named_to_numeric']) )
- {
- return self::$entities['named_to_numeric'][$check] . ';' . i18n::substr($entity, $length);
- }
- $length--;
- }
-
- if ( $extra != ';' )
- {
- return $entity;
- }
- else
- {
- return "{$entity};";
- }
- }
-
- /**
- * ENTITIY::$entities
- *
- * HTML 4.01 Specification
- * @link http://www.w3.org/TR/html4/sgml/entities.html
- * @see 24 Character entity references in HTML 4
- *
- * XHTML™ 1.0 The Extensible HyperText Markup Language (Second Edition)
- * A Reformulation of HTML 4 in XML 1.0
- * @link http://www.w3.org/TR/xhtml1/
- * @see 4.12. Entity references as hex values
- * @see C.16. The Named Character Reference '
- *
- * @static
- * @deprecated
- */
- static private $entities = array (
- 'named_to_numeric' => array (
- ' ' => ' ',
- '¡' => '¡',
- '¢' => '¢',
- '£' => '£',
- '¤' => '¤',
- '¥' => '¥',
- '¦' => '¦',
- '§' => '§',
- '¨' => '¨',
- '©' => '©',
- 'ª' => 'ª',
- '«' => '«',
- '¬' => '¬',
- '­' => '­',
- '®' => '®',
- '¯' => '¯',
- '°' => '°',
- '±' => '±',
- '²' => '²',
- '³' => '³',
- '´' => '´',
- 'µ' => 'µ',
- '¶' => '¶',
- '·' => '·',
- '¸' => '¸',
- '¹' => '¹',
- 'º' => 'º',
- '»' => '»',
- '¼' => '¼',
- '½' => '½',
- '¾' => '¾',
- '¿' => '¿',
- 'À' => 'À',
- 'Á' => 'Á',
- 'Â' => 'Â',
- 'Ã' => 'Ã',
- 'Ä' => 'Ä',
- 'Å' => 'Å',
- 'Æ' => 'Æ',
- 'Ç' => 'Ç',
- 'È' => 'È',
- 'É' => 'É',
- 'Ê' => 'Ê',
- 'Ë' => 'Ë',
- 'Ì' => 'Ì',
- 'Í' => 'Í',
- 'Î' => 'Î',
- 'Ï' => 'Ï',
- 'Ð' => 'Ð',
- 'Ñ' => 'Ñ',
- 'Ò' => 'Ò',
- 'Ó' => 'Ó',
- 'Ô' => 'Ô',
- 'Õ' => 'Õ',
- 'Ö' => 'Ö',
- '×' => '×',
- 'Ø' => 'Ø',
- 'Ù' => 'Ù',
- 'Ú' => 'Ú',
- 'Û' => 'Û',
- 'Ü' => 'Ü',
- 'Ý' => 'Ý',
- 'Þ' => 'Þ',
- 'ß' => 'ß',
- 'à' => 'à',
- 'á' => 'á',
- 'â' => 'â',
- 'ã' => 'ã',
- 'ä' => 'ä',
- 'å' => 'å',
- 'æ' => 'æ',
- 'ç' => 'ç',
- 'è' => 'è',
- 'é' => 'é',
- 'ê' => 'ê',
- 'ë' => 'ë',
- 'ì' => 'ì',
- 'í' => 'í',
- 'î' => 'î',
- 'ï' => 'ï',
- 'ð' => 'ð',
- 'ñ' => 'ñ',
- 'ò' => 'ò',
- 'ó' => 'ó',
- 'ô' => 'ô',
- 'õ' => 'õ',
- 'ö' => 'ö',
- '÷' => '÷',
- 'ø' => 'ø',
- 'ù' => 'ù',
- 'ú' => 'ú',
- 'û' => 'û',
- 'ü' => 'ü',
- 'ý' => 'ý',
- 'þ' => 'þ',
- 'ÿ' => 'ÿ',
- '&OElig' => 'Œ',
- '&oelig' => 'å',
- '&Scaron' => 'Š',
- '&scaron' => 'š',
- '&Yuml' => 'Ÿ',
- '&circ' => 'ˆ',
- '&tilde' => '˜',
- '&esnp' => ' ',
- '&emsp' => ' ',
- '&thinsp' => ' ',
- '&zwnj' => '‌',
- '&zwj' => '‍',
- '&lrm' => '‎',
- '&rlm' => '‏',
- '&ndash' => '–',
- '&mdash' => '—',
- '&lsquo' => '‘',
- '&rsquo' => '’',
- '&sbquo' => '‚',
- '&ldquo' => '“',
- '&rdquo' => '”',
- '&bdquo' => '„',
- '&dagger' => '†',
- '&Dagger' => '‡',
- '&permil' => '‰',
- '&lsaquo' => '‹',
- '&rsaquo' => '›',
- '&euro' => '€',
- '&fnof' => 'ƒ',
- '&Alpha' => 'Α',
- '&Beta' => 'Β',
- '&Gamma' => 'Γ',
- '&Delta' => 'Δ',
- '&Epsilon' => 'Ε',
- '&Zeta' => 'Ζ',
- '&Eta' => 'Η',
- '&Theta' => 'Θ',
- '&Iota' => 'Ι',
- '&Kappa' => 'Κ',
- '&Lambda' => 'Λ',
- '&Mu' => 'Μ',
- '&Nu' => 'Ν',
- '&Xi' => 'Ξ',
- '&Omicron' => 'Ο',
- '&Pi' => 'Π',
- '&Rho' => 'Ρ',
- '&Sigma' => 'Σ',
- '&Tau' => 'Τ',
- '&Upsilon' => 'Υ',
- '&Phi' => 'Φ',
- '&Chi' => 'Χ',
- '&Psi' => 'Ψ',
- '&Omega' => 'Ω',
- '&alpha' => 'α',
- '&beta' => 'β',
- '&gamma' => 'γ',
- '&delta' => 'δ',
- '&epsilon' => 'ε',
- '&zeta' => 'ζ',
- '&eta' => 'η',
- '&theta' => 'θ',
- '&iota' => 'ι',
- '&kappa' => 'κ',
- '&lambda' => 'λ',
- '&mu' => 'μ',
- '&nu' => 'ν',
- '&xi' => 'ξ',
- '&omicron' => 'ο',
- '&pi' => 'π',
- '&rho' => 'ρ',
- '&sigmaf' => 'ς',
- '&sigma' => 'σ',
- '&tau' => 'τ',
- '&upsilon' => 'υ',
- '&phi' => 'φ',
- '&chi' => 'χ',
- '&psi' => 'ψ',
- '&omega' => 'ω',
- '&thetasym' => 'ϑ',
- '&upsih' => 'ϒ',
- '&piv' => 'ϖ',
- '&bull' => '•',
- '&hellip' => '…',
- '&prime' => '′',
- '&Prime' => '″',
- '&oline' => '‾',
- '&frasl' => '⁄',
- '&weierp' => '℘',
- '&image' => 'ℑ',
- '&real' => 'ℜ',
- '&trade' => 'ℒ',
- '&alefsym' => 'ℵ',
- '&larr' => '←',
- '&uarr' => '↑',
- '&rarr' => '→',
- '&darr' => '↓',
- '&harr' => '↔',
- '&crarr' => '↵',
- '&lArr' => '⇐',
- '&uArr' => '⇑',
- '&rArr' => '⇒',
- '&dArr' => '⇓',
- '&hArr' => '⇔',
- '&forall' => '∀',
- '&part' => '∂',
- '&exist' => '∃',
- '&empty' => '∅',
- '&nabla' => '∇',
- '&isin' => '∈',
- '¬in' => '∉',
- '&ni' => '∋',
- '&prod' => '∏',
- '&sum' => '∑',
- '&minus' => '−',
- '&lowast' => '∗',
- '&radic' => '√',
- '&prop' => '∝',
- '&infin' => '∞',
- '&ang' => '∠',
- '&and' => '∧',
- '&or' => '∨',
- '&cap' => '∩',
- '&cup' => '∪',
- '&int' => '∫',
- '&there4' => '∴',
- '&sim' => '∼',
- '&cong' => '≅',
- '&asymp' => '≈',
- '&ne' => '≠',
- '&equiv' => '≡',
- '&le' => '≤',
- '&ge' => '≥',
- '&sub' => '⊂',
- '&sup' => '⊃',
- '&nsub' => '⊄',
- '&sube' => '⊆',
- '&supe' => '⊇',
- '&oplus' => '⊕',
- '&otimes' => '⊖',
- '&perp' => '⊥',
- '&sdot' => '⋅',
- '&lceil' => '⍨',
- '&rceil' => '⌉',
- '&lfloor' => '⌊',
- '&rfloor' => '⌋',
- '&lang' => '〈',
- '&rang' => '⌰',
- '&loz' => '◊',
- '&spades' => '♠',
- '&clubs' => '♣',
- '&hearts' => '♥',
- '&diams' => '♦'
- ),
- 'Windows-1252' => array(
- '€' => '€',
- '‚' => '‚',
- 'ƒ' => 'ƒ',
- '„' => '„',
- '…' => '…',
- '†' => '†',
- '‡' => '‡',
- 'ˆ' => 'ˆ',
- '‰' => '‰',
- 'Š' => 'Š',
- '‹' => '‹',
- 'Œ' => 'Œ',
- 'Ž' => 'Ž',
- '‘' => '‘',
- '’' => '’',
- '“' => '“',
- '”' => '”',
- '•' => '•',
- '–' => '–',
- '—' => '—',
- '˜' => '˜',
- '™' => '™',
- 'š' => 'š',
- '›' => '›',
- 'œ' => 'œ',
- 'ž' => 'ž',
- 'Ÿ' => 'Ÿ',
- )
- );
-}
+<?php\r
+\r
+class ENTITY\r
+{\r
+ /**\r
+ * ENTITY::hen\r
+ * htmlentities wrapper\r
+ * \r
+ * @static\r
+ * @access public\r
+ * @param string $string target string\r
+ * @param string $quotation quotation mode. please refer to the argument of PHP built-in htmlentities\r
+ * @return string escaped string\r
+ */\r
+ static public function hen($string, $quotation=ENT_QUOTES)\r
+ {\r
+ /*\r
+ * we can use 'double_encode' flag instead of this when dropping supports for PHP 5.2.2 or lower\r
+ */\r
+ $string = html_entity_decode($string, $quotation, i18n::get_current_charset());\r
+ return (string) htmlentities($string, $quotation, i18n::get_current_charset());\r
+ }\r
+ \r
+ /**\r
+ * ENTITY::hsc\r
+ * htmlspecialchars wrapper\r
+ * \r
+ * NOTE: htmlspecialchars_decode() is ASCII-to-ACII conversion\r
+ * and its target string consists of several letters.\r
+ * There are no problems.\r
+ * \r
+ * @static\r
+ * @access public\r
+ * @param string $string target string\r
+ * @param string $quotation quotation mode. please refer to the argument of PHP built-in htmlspecialchars\r
+ * @return string escaped string\r
+ * \r
+ */\r
+ static public function hsc($string, $quotation=ENT_QUOTES)\r
+ {\r
+ /*\r
+ * we can use 'double_encode' flag instead of this when dropping supports for PHP 5.2.2 or lower\r
+ */\r
+ $string = htmlspecialchars_decode($string, $quotation);\r
+ return (string) htmlspecialchars($string, $quotation, i18n::get_current_charset());\r
+ }\r
+ \r
+ /**\r
+ * ENTITY::strip_tags()\r
+ * Strip HTML tags from a string\r
+ * \r
+ * This function is a bit more intelligent than a regular call to strip_tags(),\r
+ * because it also deletes the contents of certain tags and cleans up any\r
+ * unneeded whitespace.\r
+ * \r
+ * @static\r
+ * @param String $string target string\r
+ * @return String string with stripped tags\r
+ */\r
+ static public function strip_tags($string)\r
+ {\r
+ $string = preg_replace("#<del[^>]*>.+<\/del[^>]*>#isU", '', $string);\r
+ $string = preg_replace("#<script[^>]*>.+<\/script[^>]*>#isU", '', $string);\r
+ $string = preg_replace("#<style[^>]*>.+<\/style[^>]*>#isU", '', $string);\r
+ $string = preg_replace('#>#', '> ', $string);\r
+ $string = preg_replace('#<#', ' <', $string);\r
+ $string = strip_tags($string);\r
+ $string = preg_replace("#\s+#", " ", $string);\r
+ $string = trim($string);\r
+ return $string;\r
+ }\r
+ \r
+ /**\r
+ * shortens a text string to maxlength.\r
+ * $suffix is what needs to be added at the end (end length is <= $maxlength)\r
+ *\r
+ * The purpose is to limit the width of string for rendered screen in web browser.\r
+ * So it depends on style sheet, browser's rendering scheme, client's system font.\r
+ *\r
+ * NOTE: In general, non-Latin font such as Japanese, Chinese, Cyrillic have two times as width as Latin fonts,\r
+ * but this is not always correct, for example, rendered by proportional font.\r
+ *\r
+ * @static\r
+ * @param string $escaped_string target string\r
+ * @param integer $maxlength maximum length of return string which includes suffix\r
+ * @param string $suffix added in the end of shortened-string\r
+ * @return string\r
+ */\r
+ static public function shorten($string, $maxlength, $suffix)\r
+ {\r
+ static $flag;\r
+ \r
+ $decoded_entities_pcre = array();\r
+ $encoded_entities = array();\r
+ \r
+ /* 1. store html entities */\r
+ preg_match('#&[^&]+?;#', $string, $encoded_entities);\r
+ if ( !$encoded_entities )\r
+ {\r
+ $flag = FALSE;\r
+ }\r
+ else\r
+ {\r
+ $flag = TRUE;\r
+ }\r
+ if ( $flag )\r
+ {\r
+ foreach ( $encoded_entities as $encoded_entity )\r
+ {\r
+ $decoded_entities_pcre[] = '#' . html_entity_decode($encoded_entity, ENT_QUOTES, i18n::get_current_charset()) . '#';\r
+ }\r
+ }\r
+ \r
+ /* 2. decode string */\r
+ $string = html_entity_decode($string, ENT_QUOTES, i18n::get_current_charset());\r
+ \r
+ /* 3. shorten string and add suffix if string length is longer */\r
+ if ( i18n::strlen($string) > $maxlength - i18n::strlen($suffix) )\r
+ {\r
+ $string = i18n::substr($string, 0, $maxlength - i18n::strlen($suffix) );\r
+ $string .= $suffix;\r
+ }\r
+ \r
+ /* 4. recover entities */\r
+ if ( $flag )\r
+ {\r
+ $string = preg_replace($decoded_entities_pcre, $encoded_entities, $string);\r
+ }\r
+ \r
+ return $string;\r
+ }\r
+ \r
+ /**\r
+ * ENTITY::highlight()\r
+ * highlights a specific query in a given HTML text (not within HTML tags)\r
+ * \r
+ * @static\r
+ * @param string $text text to be highlighted\r
+ * @param string $expression regular expression to be matched (can be an array of expressions as well)\r
+ * @param string $highlight highlight to be used (use \\0 to indicate the matched expression)\r
+ * @return string\r
+ */\r
+ static public function highlight($text, $expression, $highlight)\r
+ {\r
+ if ( !$highlight || !$expression )\r
+ {\r
+ return $text;\r
+ }\r
+ \r
+ if ( is_array($expression) && (count($expression) == 0) )\r
+ {\r
+ return $text;\r
+ }\r
+ \r
+ $text = "<!--h-->{$text}";\r
+ preg_match_all('#(<[^>]+>)([^<>]*)#', $text, $matches);\r
+ $result = '';\r
+ $count = count($matches[2]);\r
+ \r
+ for ( $i = 0; $i < $count; $i++ )\r
+ {\r
+ if ( $i != 0 )\r
+ {\r
+ $result .= $matches[1][$i];\r
+ }\r
+ \r
+ if ( is_array($expression) )\r
+ {\r
+ foreach ( $expression as $regex )\r
+ {\r
+ $matches[2][$i] = preg_replace("#{$regex}#i", $highlight, $matches[2][$i]);\r
+ }\r
+ $result .= $matches[2][$i];\r
+ }\r
+ else\r
+ {\r
+ $result .= preg_replace("#{$expression}#i", $highlight, $matches[2][$i]);\r
+ }\r
+ }\r
+ return $result;\r
+ }\r
+ \r
+ /**\r
+ * ENTITY::anchor_footnoting()\r
+ * change strings with footnoticing generated from anchor elements\r
+ * \r
+ * @static\r
+ * @param String $string strings which includes html elements\r
+ * @return String string with footnotes\r
+ */\r
+ static public function anchor_footnoting($string)\r
+ {\r
+ /* 1. detect anchor elements */\r
+ $anchors = array();\r
+ if ( !preg_match_all("#<a[^>]*href=[\"\']([^\"^']*)[\"\'][^>]*>([^<]*)<\/a>#i", $subject, $anchors) )\r
+ {\r
+ return $string;\r
+ }\r
+ \r
+ /* 2. add footnotes */\r
+ $string .= "\n\n";\r
+ $count = 1;\r
+ foreach ( $anchors as $anchor )\r
+ {\r
+ preg_replace("#{$anchor[0]}#", "{$anchor[2]} [{$count}] ", $subject);\r
+ $subject .= "[{$count}] {$anchor[1]}\n";\r
+ $count++;\r
+ }\r
+ \r
+ return strip_tags($ascii);\r
+ }\r
+ \r
+ /*\r
+ * NOTE: Obsoleted functions\r
+ */\r
+ \r
+ /**\r
+ * ENTITY::named_to_numeric()\r
+ * \r
+ * @deprecated\r
+ * @param String $string\r
+ */\r
+ function named_to_numeric ($string)\r
+ {\r
+ $string = preg_replace('/(&[0-9A-Za-z]+)(;?\=?|([^A-Za-z0-9\;\:\.\-\_]))/e', "entity::_named('\\1', '\\2') . '\\3'", $string);\r
+ return $string;\r
+ }\r
+ \r
+ /**\r
+ * ENTITY::named_to_numeric()\r
+ * \r
+ * @deprecated\r
+ * @param String $string\r
+ */\r
+ function normalize_numeric ($string) {\r
+ $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string);\r
+ $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string);\r
+ $string = strtr($string, self::$entities['Windows-1252']);\r
+ return $string;\r
+ }\r
+ \r
+ /**\r
+ * ENTITY::numeric_to_utf8()\r
+ * \r
+ * @deprecated\r
+ * @param String $string\r
+ */\r
+ function numeric_to_utf8 ($string) {\r
+ $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string);\r
+ $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string);\r
+ $string = preg_replace('/&#x([0-9A-Fa-f]+);/e', "entity::_hex_to_utf8('\\1')", $string); \r
+ return $string; \r
+ }\r
+ \r
+ /**\r
+ * ENTITY::numeric_to_named()\r
+ * convert decimal and hexadecimal numeric character references into named character references\r
+ * \r
+ * @deprecated\r
+ * @param String $string\r
+ */\r
+ function numeric_to_named ($string)\r
+ {\r
+ $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+)/e', "'&#'.hexdec('\\1')", $string);\r
+ $string = strtr($string, array_flip(self::$entities['named_to_numeric']));\r
+ return $string; \r
+ }\r
+ \r
+ /**\r
+ * ENTITY::specialchars()\r
+ * convert HTML entities to named character reference\r
+ * \r
+ * @deprecated\r
+ * @param String $string\r
+ */\r
+ function specialchars ($string, $type = 'xml')\r
+ {\r
+ $specialchars = array(\r
+ '"' => '"',\r
+ '&' => '&',\r
+ '<' => '<',\r
+ '>' => '>'\r
+ );\r
+ if ( $type != 'xml' )\r
+ {\r
+ $specialchars["'"] = ''';\r
+ }\r
+ else\r
+ {\r
+ $specialchars["'"] = ''';\r
+ }\r
+ \r
+ $string = preg_replace('/&(#?[Xx]?[0-9A-Za-z]+);/', "[[[ENTITY:\\1]]]", $string);\r
+ $string = strtr($string, $specialchars);\r
+ $string = preg_replace('/\[\[\[ENTITY\:([^\]]+)\]\]\]/', "&\\1;", $string); \r
+ return $string;\r
+ }\r
+ \r
+ /**\r
+ * ENTITY::_hex_to_utf8()\r
+ * convert decimal numeric character references to hexadecimal numeric character references\r
+ * \r
+ * @deprecated\r
+ * @param String $string\r
+ */\r
+ function _hex_to_utf8($s)\r
+ {\r
+ $c = hexdec($s);\r
+ \r
+ if ( $c < 0x80 )\r
+ {\r
+ $str = chr($c);\r
+ }\r
+ else if ( $c < 0x800 )\r
+ {\r
+ $str = chr(0xC0 | $c>>6) . chr(0x80 | $c & 0x3F);\r
+ }\r
+ else if ( $c < 0x10000 )\r
+ {\r
+ $str = chr(0xE0 | $c>>12) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F);\r
+ }\r
+ else if ( $c < 0x200000 )\r
+ {\r
+ $str = chr(0xF0 | $c>>18) . chr(0x80 | $c>>12 & 0x3F) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F);\r
+ }\r
+ return $str;\r
+ }\r
+ \r
+ /**\r
+ * ENTITY::_named()\r
+ * convert entities to named character reference\r
+ * \r
+ * @deprecated\r
+ * @param String $string\r
+ * @param String $extra\r
+ * @return \r
+ */\r
+ function _named($entity, $extra)\r
+ {\r
+ if ( $extra == '=' )\r
+ {\r
+ return $entity . '=';\r
+ }\r
+ \r
+ $length = i18n::strlen($entity);\r
+ \r
+ while ( $length > 0 )\r
+ {\r
+ $check = i18n::substr($entity, 0, $length);\r
+ if ( array_key_exists($check, self::$entities['named_to_numeric']) )\r
+ {\r
+ return self::$entities['named_to_numeric'][$check] . ';' . i18n::substr($entity, $length);\r
+ }\r
+ $length--;\r
+ }\r
+ \r
+ if ( $extra != ';' )\r
+ {\r
+ return $entity;\r
+ }\r
+ else\r
+ {\r
+ return "{$entity};";\r
+ }\r
+ }\r
+ \r
+ /**\r
+ * ENTITIY::$entities\r
+ * \r
+ * HTML 4.01 Specification\r
+ * @link http://www.w3.org/TR/html4/sgml/entities.html\r
+ * @see 24 Character entity references in HTML 4\r
+ * \r
+ * XHTML™ 1.0 The Extensible HyperText Markup Language (Second Edition)\r
+ * A Reformulation of HTML 4 in XML 1.0\r
+ * @link http://www.w3.org/TR/xhtml1/\r
+ * @see 4.12. Entity references as hex values\r
+ * @see C.16. The Named Character Reference '\r
+ * \r
+ * @static\r
+ * @deprecated\r
+ */\r
+ static private $entities = array (\r
+ 'named_to_numeric' => array (\r
+ ' ' => ' ',\r
+ '¡' => '¡',\r
+ '¢' => '¢',\r
+ '£' => '£',\r
+ '¤' => '¤',\r
+ '¥' => '¥',\r
+ '¦' => '¦',\r
+ '§' => '§',\r
+ '¨' => '¨',\r
+ '©' => '©',\r
+ 'ª' => 'ª',\r
+ '«' => '«',\r
+ '¬' => '¬',\r
+ '­' => '­',\r
+ '®' => '®',\r
+ '¯' => '¯',\r
+ '°' => '°',\r
+ '±' => '±',\r
+ '²' => '²',\r
+ '³' => '³',\r
+ '´' => '´',\r
+ 'µ' => 'µ',\r
+ '¶' => '¶',\r
+ '·' => '·',\r
+ '¸' => '¸',\r
+ '¹' => '¹',\r
+ 'º' => 'º',\r
+ '»' => '»',\r
+ '¼' => '¼',\r
+ '½' => '½',\r
+ '¾' => '¾',\r
+ '¿' => '¿',\r
+ 'À' => 'À',\r
+ 'Á' => 'Á',\r
+ 'Â' => 'Â',\r
+ 'Ã' => 'Ã',\r
+ 'Ä' => 'Ä',\r
+ 'Å' => 'Å',\r
+ 'Æ' => 'Æ',\r
+ 'Ç' => 'Ç',\r
+ 'È' => 'È',\r
+ 'É' => 'É',\r
+ 'Ê' => 'Ê',\r
+ 'Ë' => 'Ë',\r
+ 'Ì' => 'Ì',\r
+ 'Í' => 'Í',\r
+ 'Î' => 'Î',\r
+ 'Ï' => 'Ï',\r
+ 'Ð' => 'Ð',\r
+ 'Ñ' => 'Ñ',\r
+ 'Ò' => 'Ò',\r
+ 'Ó' => 'Ó',\r
+ 'Ô' => 'Ô',\r
+ 'Õ' => 'Õ',\r
+ 'Ö' => 'Ö',\r
+ '×' => '×',\r
+ 'Ø' => 'Ø',\r
+ 'Ù' => 'Ù',\r
+ 'Ú' => 'Ú',\r
+ 'Û' => 'Û',\r
+ 'Ü' => 'Ü',\r
+ 'Ý' => 'Ý',\r
+ 'Þ' => 'Þ',\r
+ 'ß' => 'ß',\r
+ 'à' => 'à',\r
+ 'á' => 'á',\r
+ 'â' => 'â',\r
+ 'ã' => 'ã',\r
+ 'ä' => 'ä',\r
+ 'å' => 'å',\r
+ 'æ' => 'æ',\r
+ 'ç' => 'ç',\r
+ 'è' => 'è',\r
+ 'é' => 'é',\r
+ 'ê' => 'ê',\r
+ 'ë' => 'ë',\r
+ 'ì' => 'ì',\r
+ 'í' => 'í',\r
+ 'î' => 'î',\r
+ 'ï' => 'ï',\r
+ 'ð' => 'ð',\r
+ 'ñ' => 'ñ',\r
+ 'ò' => 'ò',\r
+ 'ó' => 'ó',\r
+ 'ô' => 'ô',\r
+ 'õ' => 'õ',\r
+ 'ö' => 'ö',\r
+ '÷' => '÷',\r
+ 'ø' => 'ø',\r
+ 'ù' => 'ù',\r
+ 'ú' => 'ú',\r
+ 'û' => 'û',\r
+ 'ü' => 'ü',\r
+ 'ý' => 'ý',\r
+ 'þ' => 'þ',\r
+ 'ÿ' => 'ÿ',\r
+ '&OElig' => 'Œ',\r
+ '&oelig' => 'å',\r
+ '&Scaron' => 'Š',\r
+ '&scaron' => 'š',\r
+ '&Yuml' => 'Ÿ',\r
+ '&circ' => 'ˆ',\r
+ '&tilde' => '˜',\r
+ '&esnp' => ' ',\r
+ '&emsp' => ' ',\r
+ '&thinsp' => ' ',\r
+ '&zwnj' => '‌',\r
+ '&zwj' => '‍',\r
+ '&lrm' => '‎',\r
+ '&rlm' => '‏',\r
+ '&ndash' => '–',\r
+ '&mdash' => '—',\r
+ '&lsquo' => '‘',\r
+ '&rsquo' => '’',\r
+ '&sbquo' => '‚',\r
+ '&ldquo' => '“',\r
+ '&rdquo' => '”',\r
+ '&bdquo' => '„',\r
+ '&dagger' => '†',\r
+ '&Dagger' => '‡',\r
+ '&permil' => '‰',\r
+ '&lsaquo' => '‹',\r
+ '&rsaquo' => '›',\r
+ '&euro' => '€',\r
+ '&fnof' => 'ƒ',\r
+ '&Alpha' => 'Α',\r
+ '&Beta' => 'Β',\r
+ '&Gamma' => 'Γ',\r
+ '&Delta' => 'Δ',\r
+ '&Epsilon' => 'Ε',\r
+ '&Zeta' => 'Ζ',\r
+ '&Eta' => 'Η',\r
+ '&Theta' => 'Θ',\r
+ '&Iota' => 'Ι',\r
+ '&Kappa' => 'Κ',\r
+ '&Lambda' => 'Λ',\r
+ '&Mu' => 'Μ',\r
+ '&Nu' => 'Ν',\r
+ '&Xi' => 'Ξ',\r
+ '&Omicron' => 'Ο',\r
+ '&Pi' => 'Π',\r
+ '&Rho' => 'Ρ',\r
+ '&Sigma' => 'Σ',\r
+ '&Tau' => 'Τ',\r
+ '&Upsilon' => 'Υ',\r
+ '&Phi' => 'Φ',\r
+ '&Chi' => 'Χ',\r
+ '&Psi' => 'Ψ',\r
+ '&Omega' => 'Ω',\r
+ '&alpha' => 'α',\r
+ '&beta' => 'β',\r
+ '&gamma' => 'γ',\r
+ '&delta' => 'δ',\r
+ '&epsilon' => 'ε',\r
+ '&zeta' => 'ζ',\r
+ '&eta' => 'η',\r
+ '&theta' => 'θ',\r
+ '&iota' => 'ι',\r
+ '&kappa' => 'κ',\r
+ '&lambda' => 'λ',\r
+ '&mu' => 'μ',\r
+ '&nu' => 'ν',\r
+ '&xi' => 'ξ',\r
+ '&omicron' => 'ο',\r
+ '&pi' => 'π',\r
+ '&rho' => 'ρ',\r
+ '&sigmaf' => 'ς',\r
+ '&sigma' => 'σ',\r
+ '&tau' => 'τ',\r
+ '&upsilon' => 'υ',\r
+ '&phi' => 'φ',\r
+ '&chi' => 'χ',\r
+ '&psi' => 'ψ',\r
+ '&omega' => 'ω',\r
+ '&thetasym' => 'ϑ',\r
+ '&upsih' => 'ϒ',\r
+ '&piv' => 'ϖ',\r
+ '&bull' => '•',\r
+ '&hellip' => '…',\r
+ '&prime' => '′',\r
+ '&Prime' => '″',\r
+ '&oline' => '‾',\r
+ '&frasl' => '⁄',\r
+ '&weierp' => '℘',\r
+ '&image' => 'ℑ',\r
+ '&real' => 'ℜ',\r
+ '&trade' => 'ℒ',\r
+ '&alefsym' => 'ℵ',\r
+ '&larr' => '←',\r
+ '&uarr' => '↑',\r
+ '&rarr' => '→',\r
+ '&darr' => '↓',\r
+ '&harr' => '↔',\r
+ '&crarr' => '↵',\r
+ '&lArr' => '⇐',\r
+ '&uArr' => '⇑',\r
+ '&rArr' => '⇒',\r
+ '&dArr' => '⇓',\r
+ '&hArr' => '⇔',\r
+ '&forall' => '∀',\r
+ '&part' => '∂',\r
+ '&exist' => '∃',\r
+ '&empty' => '∅',\r
+ '&nabla' => '∇',\r
+ '&isin' => '∈',\r
+ '¬in' => '∉',\r
+ '&ni' => '∋',\r
+ '&prod' => '∏',\r
+ '&sum' => '∑',\r
+ '&minus' => '−',\r
+ '&lowast' => '∗',\r
+ '&radic' => '√',\r
+ '&prop' => '∝',\r
+ '&infin' => '∞',\r
+ '&ang' => '∠',\r
+ '&and' => '∧',\r
+ '&or' => '∨',\r
+ '&cap' => '∩',\r
+ '&cup' => '∪',\r
+ '&int' => '∫',\r
+ '&there4' => '∴',\r
+ '&sim' => '∼',\r
+ '&cong' => '≅',\r
+ '&asymp' => '≈',\r
+ '&ne' => '≠',\r
+ '&equiv' => '≡',\r
+ '&le' => '≤',\r
+ '&ge' => '≥',\r
+ '&sub' => '⊂',\r
+ '&sup' => '⊃',\r
+ '&nsub' => '⊄',\r
+ '&sube' => '⊆',\r
+ '&supe' => '⊇',\r
+ '&oplus' => '⊕',\r
+ '&otimes' => '⊖',\r
+ '&perp' => '⊥',\r
+ '&sdot' => '⋅',\r
+ '&lceil' => '⍨',\r
+ '&rceil' => '⌉',\r
+ '&lfloor' => '⌊',\r
+ '&rfloor' => '⌋',\r
+ '&lang' => '〈',\r
+ '&rang' => '⌰',\r
+ '&loz' => '◊',\r
+ '&spades' => '♠',\r
+ '&clubs' => '♣',\r
+ '&hearts' => '♥',\r
+ '&diams' => '♦'\r
+ ),\r
+ 'Windows-1252' => array(\r
+ '€' => '€',\r
+ '‚' => '‚',\r
+ 'ƒ' => 'ƒ',\r
+ '„' => '„',\r
+ '…' => '…',\r
+ '†' => '†',\r
+ '‡' => '‡',\r
+ 'ˆ' => 'ˆ',\r
+ '‰' => '‰',\r
+ 'Š' => 'Š',\r
+ '‹' => '‹',\r
+ 'Œ' => 'Œ',\r
+ 'Ž' => 'Ž',\r
+ '‘' => '‘',\r
+ '’' => '’',\r
+ '“' => '“',\r
+ '”' => '”',\r
+ '•' => '•',\r
+ '–' => '–',\r
+ '—' => '—',\r
+ '˜' => '˜',\r
+ '™' => '™',\r
+ 'š' => 'š',\r
+ '›' => '›',\r
+ 'œ' => 'œ',\r
+ 'ž' => 'ž',\r
+ 'Ÿ' => 'Ÿ',\r
+ )\r
+ );\r
+}\r