]*>.+<\/del[^>]*>#isU", '', $string); $string = preg_replace("#]*>.+<\/script[^>]*>#isU", '', $string); $string = preg_replace("#]*>.+<\/style[^>]*>#isU", '', $string); $string = preg_replace('#>#', '> ', $string); $string = preg_replace('#<#', ' <', $string); $string = strip_tags($string); $string = preg_replace("#\s+#", " ", $string); $string = trim($string); return $string; } /** * shortens a text string to maxlength. * $suffix is what needs to be added at the end (end length is <= $maxlength) * * The purpose is to limit the width of string for rendered screen in web browser. * So it depends on style sheet, browser's rendering scheme, client's system font. * * NOTE: In general, non-Latin font such as Japanese, Chinese, Cyrillic have two times as width as Latin fonts, * but this is not always correct, for example, rendered by proportional font. * * @static * @param string $escaped_string target string * @param integer $maxlength maximum length of return string which includes suffix * @param string $suffix added in the end of shortened-string * @return string */ static public function shorten($string, $maxlength, $suffix) { static $flag; $decoded_entities_pcre = array(); $encoded_entities = array(); /* 1. store html entities */ preg_match('#&[^&]+?;#', $string, $encoded_entities); if ( !$encoded_entities ) { $flag = FALSE; } else { $flag = TRUE; } if ( $flag ) { foreach ( $encoded_entities as $encoded_entity ) { $decoded_entities_pcre[] = '#' . html_entity_decode($encoded_entity, ENT_QUOTES, i18n::get_current_charset()) . '#'; } } /* 2. decode string */ $string = html_entity_decode($string, ENT_QUOTES, i18n::get_current_charset()); /* 3. shorten string and add suffix if string length is longer */ if ( i18n::strlen($string) > $maxlength - i18n::strlen($suffix) ) { $string = i18n::substr($string, 0, $maxlength - i18n::strlen($suffix) ); $string .= $suffix; } /* 4. recover entities */ if ( $flag ) { $string = preg_replace($decoded_entities_pcre, $encoded_entities, $string); } return $string; } /** * Entity::highlight() * highlights a specific query in a given HTML text (not within HTML tags) * * @static * @param string $text text to be highlighted * @param string $expression regular expression to be matched (can be an array of expressions as well) * @param string $highlight highlight to be used (use \\0 to indicate the matched expression) * @return string */ static public function highlight($text, $expression, $highlight) { if ( !$highlight || !$expression ) { return $text; } if ( is_array($expression) && (count($expression) == 0) ) { return $text; } $text = "{$text}"; preg_match_all('#(<[^>]+>)([^<>]*)#', $text, $matches); $result = ''; $count = count($matches[2]); for ( $i = 0; $i < $count; $i++ ) { if ( $i != 0 ) { $result .= $matches[1][$i]; } if ( is_array($expression) ) { foreach ( $expression as $regex ) { $matches[2][$i] = preg_replace("#{$regex}#i", $highlight, $matches[2][$i]); } $result .= $matches[2][$i]; } else { $result .= preg_replace("#{$expression}#i", $highlight, $matches[2][$i]); } } return $result; } /** * Entity::anchor_footnoting() * change strings with footnoticing generated from anchor elements * * @static * @param String $string strings which includes html elements * @return String string with footnotes */ static public function anchor_footnoting($string) { /* 1. detect anchor elements */ $anchors = array(); if ( !preg_match_all("#]*href=[\"\']([^\"^']*)[\"\'][^>]*>([^<]*)<\/a>#i", $subject, $anchors) ) { return $string; } /* 2. add footnotes */ $string .= "\n\n"; $count = 1; foreach ( $anchors as $anchor ) { preg_replace("#{$anchor[0]}#", "{$anchor[2]} [{$count}] ", $subject); $subject .= "[{$count}] {$anchor[1]}\n"; $count++; } return strip_tags($ascii); } /* * NOTE: Obsoleted functions */ /** * Entity::named_to_numeric() * * @deprecated * @param String $string */ function named_to_numeric ($string) { $string = preg_replace('/(&[0-9A-Za-z]+)(;?\=?|([^A-Za-z0-9\;\:\.\-\_]))/e', "Entity::_named('\\1', '\\2') . '\\3'", $string); return $string; } /** * Entity::named_to_numeric() * * @deprecated * @param String $string */ function normalize_numeric ($string) { $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string); $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string); $string = strtr($string, self::$entities['Windows-1252']); return $string; } /** * Entity::numeric_to_utf8() * * @deprecated * @param String $string */ function numeric_to_utf8 ($string) { $string = preg_replace('/&#([0-9]+)(;)?/e', "'&#x'.dechex('\\1').';'", $string); $string = preg_replace('/&#[Xx](0)*([0-9A-Fa-f]+)(;?|([^A-Za-z0-9\;\:\.\-\_]))/e', "'&#x' . strtoupper('\\2') . ';\\4'", $string); $string = preg_replace('/&#x([0-9A-Fa-f]+);/e', "Entity::_hex_to_utf8('\\1')", $string); return $string; } /** * Entity::numeric_to_named() * convert decimal and hexadecimal numeric character references into named character references * * @deprecated * @param String $string */ function numeric_to_named ($string) { $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+)/e', "'&#'.hexdec('\\1')", $string); $string = strtr($string, array_flip(self::$entities['named_to_numeric'])); return $string; } /** * Entity::specialchars() * convert HTML entities to named character reference * * @deprecated * @param String $string */ function specialchars ($string, $type = 'xml') { $specialchars = array( '"' => '"', '&' => '&', '<' => '<', '>' => '>' ); if ( $type != 'xml' ) { $specialchars["'"] = '''; } else { $specialchars["'"] = '''; } $string = preg_replace('/&(#?[Xx]?[0-9A-Za-z]+);/', "[[[ENTITY:\\1]]]", $string); $string = strtr($string, $specialchars); $string = preg_replace('/\[\[\[ENTITY\:([^\]]+)\]\]\]/', "&\\1;", $string); return $string; } /** * Entity::_hex_to_utf8() * convert decimal numeric character references to hexadecimal numeric character references * * @deprecated * @param String $string */ function _hex_to_utf8($s) { $c = hexdec($s); if ( $c < 0x80 ) { $str = chr($c); } else if ( $c < 0x800 ) { $str = chr(0xC0 | $c>>6) . chr(0x80 | $c & 0x3F); } else if ( $c < 0x10000 ) { $str = chr(0xE0 | $c>>12) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F); } else if ( $c < 0x200000 ) { $str = chr(0xF0 | $c>>18) . chr(0x80 | $c>>12 & 0x3F) . chr(0x80 | $c>>6 & 0x3F) . chr(0x80 | $c & 0x3F); } return $str; } /** * Entity::_named() * convert entities to named character reference * * @deprecated * @param String $string * @param String $extra * @return */ function _named($entity, $extra) { if ( $extra == '=' ) { return $entity . '='; } $length = i18n::strlen($entity); while ( $length > 0 ) { $check = i18n::substr($entity, 0, $length); if ( array_key_exists($check, self::$entities['named_to_numeric']) ) { return self::$entities['named_to_numeric'][$check] . ';' . i18n::substr($entity, $length); } $length--; } if ( $extra != ';' ) { return $entity; } else { return "{$entity};"; } } /** * ENTITIY::$entities * * HTML 4.01 Specification * @link http://www.w3.org/TR/html4/sgml/entities.html * @see 24 Character entity references in HTML 4 * * XHTML™ 1.0 The Extensible HyperText Markup Language (Second Edition) * A Reformulation of HTML 4 in XML 1.0 * @link http://www.w3.org/TR/xhtml1/ * @see 4.12. Entity references as hex values * @see C.16. The Named Character Reference ' * * @static * @deprecated */ static private $entities = array ( 'named_to_numeric' => array ( ' ' => ' ', '¡' => '¡', '¢' => '¢', '£' => '£', '¤' => '¤', '¥' => '¥', '¦' => '¦', '§' => '§', '¨' => '¨', '©' => '©', 'ª' => 'ª', '«' => '«', '¬' => '¬', '­' => '­', '®' => '®', '¯' => '¯', '°' => '°', '±' => '±', '²' => '²', '³' => '³', '´' => '´', 'µ' => 'µ', '¶' => '¶', '·' => '·', '¸' => '¸', '¹' => '¹', 'º' => 'º', '»' => '»', '¼' => '¼', '½' => '½', '¾' => '¾', '¿' => '¿', 'À' => 'À', 'Á' => 'Á', 'Â' => 'Â', 'Ã' => 'Ã', 'Ä' => 'Ä', 'Å' => 'Å', 'Æ' => 'Æ', 'Ç' => 'Ç', 'È' => 'È', 'É' => 'É', 'Ê' => 'Ê', 'Ë' => 'Ë', 'Ì' => 'Ì', 'Í' => 'Í', 'Î' => 'Î', 'Ï' => 'Ï', 'Ð' => 'Ð', 'Ñ' => 'Ñ', 'Ò' => 'Ò', 'Ó' => 'Ó', 'Ô' => 'Ô', 'Õ' => 'Õ', 'Ö' => 'Ö', '×' => '×', 'Ø' => 'Ø', 'Ù' => 'Ù', 'Ú' => 'Ú', 'Û' => 'Û', 'Ü' => 'Ü', 'Ý' => 'Ý', 'Þ' => 'Þ', 'ß' => 'ß', 'à' => 'à', 'á' => 'á', 'â' => 'â', 'ã' => 'ã', 'ä' => 'ä', 'å' => 'å', 'æ' => 'æ', 'ç' => 'ç', 'è' => 'è', 'é' => 'é', 'ê' => 'ê', 'ë' => 'ë', 'ì' => 'ì', 'í' => 'í', 'î' => 'î', 'ï' => 'ï', 'ð' => 'ð', 'ñ' => 'ñ', 'ò' => 'ò', 'ó' => 'ó', 'ô' => 'ô', 'õ' => 'õ', 'ö' => 'ö', '÷' => '÷', 'ø' => 'ø', 'ù' => 'ù', 'ú' => 'ú', 'û' => 'û', 'ü' => 'ü', 'ý' => 'ý', 'þ' => 'þ', 'ÿ' => 'ÿ', '&OElig' => 'Œ', '&oelig' => 'å', '&Scaron' => 'Š', '&scaron' => 'š', '&Yuml' => 'Ÿ', '&circ' => 'ˆ', '&tilde' => '˜', '&esnp' => ' ', '&emsp' => ' ', '&thinsp' => ' ', '&zwnj' => '‌', '&zwj' => '‍', '&lrm' => '‎', '&rlm' => '‏', '&ndash' => '–', '&mdash' => '—', '&lsquo' => '‘', '&rsquo' => '’', '&sbquo' => '‚', '&ldquo' => '“', '&rdquo' => '”', '&bdquo' => '„', '&dagger' => '†', '&Dagger' => '‡', '&permil' => '‰', '&lsaquo' => '‹', '&rsaquo' => '›', '&euro' => '€', '&fnof' => 'ƒ', '&Alpha' => 'Α', '&Beta' => 'Β', '&Gamma' => 'Γ', '&Delta' => 'Δ', '&Epsilon' => 'Ε', '&Zeta' => 'Ζ', '&Eta' => 'Η', '&Theta' => 'Θ', '&Iota' => 'Ι', '&Kappa' => 'Κ', '&Lambda' => 'Λ', '&Mu' => 'Μ', '&Nu' => 'Ν', '&Xi' => 'Ξ', '&Omicron' => 'Ο', '&Pi' => 'Π', '&Rho' => 'Ρ', '&Sigma' => 'Σ', '&Tau' => 'Τ', '&Upsilon' => 'Υ', '&Phi' => 'Φ', '&Chi' => 'Χ', '&Psi' => 'Ψ', '&Omega' => 'Ω', '&alpha' => 'α', '&beta' => 'β', '&gamma' => 'γ', '&delta' => 'δ', '&epsilon' => 'ε', '&zeta' => 'ζ', '&eta' => 'η', '&theta' => 'θ', '&iota' => 'ι', '&kappa' => 'κ', '&lambda' => 'λ', '&mu' => 'μ', '&nu' => 'ν', '&xi' => 'ξ', '&omicron' => 'ο', '&pi' => 'π', '&rho' => 'ρ', '&sigmaf' => 'ς', '&sigma' => 'σ', '&tau' => 'τ', '&upsilon' => 'υ', '&phi' => 'φ', '&chi' => 'χ', '&psi' => 'ψ', '&omega' => 'ω', '&thetasym' => 'ϑ', '&upsih' => 'ϒ', '&piv' => 'ϖ', '&bull' => '•', '&hellip' => '…', '&prime' => '′', '&Prime' => '″', '&oline' => '‾', '&frasl' => '⁄', '&weierp' => '℘', '&image' => 'ℑ', '&real' => 'ℜ', '&trade' => 'ℒ', '&alefsym' => 'ℵ', '&larr' => '←', '&uarr' => '↑', '&rarr' => '→', '&darr' => '↓', '&harr' => '↔', '&crarr' => '↵', '&lArr' => '⇐', '&uArr' => '⇑', '&rArr' => '⇒', '&dArr' => '⇓', '&hArr' => '⇔', '&forall' => '∀', '&part' => '∂', '&exist' => '∃', '&empty' => '∅', '&nabla' => '∇', '&isin' => '∈', '¬in' => '∉', '&ni' => '∋', '&prod' => '∏', '&sum' => '∑', '&minus' => '−', '&lowast' => '∗', '&radic' => '√', '&prop' => '∝', '&infin' => '∞', '&ang' => '∠', '&and' => '∧', '&or' => '∨', '&cap' => '∩', '&cup' => '∪', '&int' => '∫', '&there4' => '∴', '&sim' => '∼', '&cong' => '≅', '&asymp' => '≈', '&ne' => '≠', '&equiv' => '≡', '&le' => '≤', '&ge' => '≥', '&sub' => '⊂', '&sup' => '⊃', '&nsub' => '⊄', '&sube' => '⊆', '&supe' => '⊇', '&oplus' => '⊕', '&otimes' => '⊖', '&perp' => '⊥', '&sdot' => '⋅', '&lceil' => '⍨', '&rceil' => '⌉', '&lfloor' => '⌊', '&rfloor' => '⌋', '&lang' => '〈', '&rang' => '⌰', '&loz' => '◊', '&spades' => '♠', '&clubs' => '♣', '&hearts' => '♥', '&diams' => '♦' ), 'Windows-1252' => array( '€' => '€', '‚' => '‚', 'ƒ' => 'ƒ', '„' => '„', '…' => '…', '†' => '†', '‡' => '‡', 'ˆ' => 'ˆ', '‰' => '‰', 'Š' => 'Š', '‹' => '‹', 'Œ' => 'Œ', 'Ž' => 'Ž', '‘' => '‘', '’' => '’', '“' => '“', '”' => '”', '•' => '•', '–' => '–', '—' => '—', '˜' => '˜', '™' => '™', 'š' => 'š', '›' => '›', 'œ' => 'œ', 'ž' => 'ž', 'Ÿ' => 'Ÿ', ) ); }