3 * @author Nick Pope <nick@nickpope.me.uk>
4 * @copyright Copyright © 2010, Nick Pope
5 * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
9 require_once 'Regex.php';
12 * Twitter HitHighlighter Class
14 * Performs "hit highlighting" on tweets that have been auto-linked already.
15 * Useful with the results returned from the search API.
17 * Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
18 * is based on code by {@link http://github.com/mzsanford Matt Sanford} and
19 * heavily modified by {@link http://github.com/ngnpope Nick Pope}.
21 * @author Nick Pope <nick@nickpope.me.uk>
22 * @copyright Copyright © 2010, Nick Pope
23 * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
26 class Twitter_HitHighlighter extends Twitter_Regex {
29 * The tag to surround hits with.
33 protected $tag = 'em';
36 * Provides fluent method chaining.
38 * @param string $tweet The tweet to be hit highlighted.
39 * @param bool $full_encode Whether to encode all special characters.
43 * @return Twitter_HitHighlighter
45 public static function create($tweet, $full_encode = false) {
46 return new self($tweet, $full_encode);
50 * Reads in a tweet to be parsed and hit highlighted.
52 * We take this opportunity to ensure that we escape user input.
54 * @see htmlspecialchars()
56 * @param string $tweet The tweet to be hit highlighted.
57 * @param bool $escape Whether to escape the tweet (default: true).
58 * @param bool $full_encode Whether to encode all special characters.
60 public function __construct($tweet, $escape = true, $full_encode = false) {
63 parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false));
65 parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false));
68 parent::__construct($tweet);
73 * Set the highlighting tag to surround hits with. The default tag is 'em'.
75 * @return string The tag name.
77 public function getTag() {
82 * Set the highlighting tag to surround hits with. The default tag is 'em'.
84 * @param string $v The tag name.
86 * @return Twitter_HitHighlighter Fluid method chaining.
88 public function setTag($v) {
94 * Hit highlights the tweet.
96 * @param array $hits An array containing the start and end index pairs
97 * for the highlighting.
99 * @return string The hit highlighted tweet.
101 public function addHitHighlighting(array $hits) {
102 if (empty($hits)) return $this->tweet;
104 $tags = array('<'.$this->tag.'>', '</'.$this->tag.'>');
105 # Check whether we can simply replace or whether we need to chunk...
106 if (strpos($this->tweet, '<') === false) {
107 $ti = 0; // tag increment (for added tags)
108 $tweet = $this->tweet;
109 foreach ($hits as $hit) {
110 $tweet = self::mb_substr_replace($tweet, $tags[0], $hit[0] + $ti, 0);
111 $ti += mb_strlen($tags[0]);
112 $tweet = self::mb_substr_replace($tweet, $tags[1], $hit[1] + $ti, 0);
113 $ti += mb_strlen($tags[1]);
116 $chunks = preg_split('/[<>]/iu', $this->tweet);
121 $start_in_chunk = false;
122 # Flatten the multidimensional hits array:
123 $hits_flat = array();
124 foreach ($hits as $hit) $hits_flat = array_merge($hits_flat, $hit);
125 # Loop over the hit indices:
126 for ($index = 0; $index < count($hits_flat); $index++) {
127 $hit = $hits_flat[$index];
128 $tag = $tags[$index % 2];
130 while ($chunk !== null && $hit >= ($i = $offset + mb_strlen($chunk))) {
131 $tweet .= mb_substr($chunk, $chunk_cursor);
132 if ($start_in_chunk && $hit === $i) {
136 if (isset($chunks[$chunk_index+1])) $tweet .= '<' . $chunks[$chunk_index+1] . '>';
137 $offset += mb_strlen($chunk);
140 $chunk = (isset($chunks[$chunk_index]) ? $chunks[$chunk_index] : null);
141 $start_in_chunk = false;
143 if (!$placed && $chunk !== null) {
144 $hit_spot = $hit - $offset;
145 $tweet .= mb_substr($chunk, $chunk_cursor, $hit_spot - $chunk_cursor) . $tag;
146 $chunk_cursor = $hit_spot;
147 $start_in_chunk = ($index % 2 === 0);
150 # Ultimate fallback - hits that run off the end get a closing tag:
151 if (!$placed) $tweet .= $tag;
153 if ($chunk !== null) {
154 if ($chunk_cursor < mb_strlen($chunk)) {
155 $tweet .= mb_substr($chunk, $chunk_cursor);
157 for ($index = $chunk_index + 1; $index < count($chunks); $index++) {
158 $tweet .= ($index % 2 === 0 ? $chunks[$index] : '<' . $chunks[$index] . '>');
166 * A multibyte-aware substring replacement function.
168 * @param string $string The string to modify.
169 * @param string $replacement The replacement string.
170 * @param int $start The start of the replacement.
171 * @param int $length The number of characters to replace.
172 * @param string $encoding The encoding of the string.
174 * @return string The modified string.
176 * @see http://www.php.net/manual/en/function.substr-replace.php#90146
178 protected static function mb_substr_replace($string, $replacement, $start, $length = null, $encoding = null) {
179 if (extension_loaded('mbstring') === true) {
180 $string_length = (is_null($encoding) === true) ? mb_strlen($string) : mb_strlen($string, $encoding);
182 $start = max(0, $string_length + $start);
183 } else if ($start > $string_length) {
184 $start = $string_length;
187 $length = max(0, $string_length - $start + $length);
188 } else if ((is_null($length) === true) || ($length > $string_length)) {
189 $length = $string_length;
191 if (($start + $length) > $string_length) {
192 $length = $string_length - $start;
194 if (is_null($encoding) === true) {
195 return mb_substr($string, 0, $start) . $replacement . mb_substr($string, $start + $length, $string_length - $start - $length);
197 return mb_substr($string, 0, $start, $encoding) . $replacement . mb_substr($string, $start + $length, $string_length - $start - $length, $encoding);
199 return (is_null($length) === true) ? substr_replace($string, $replacement, $start) : substr_replace($string, $replacement, $start, $length);