1 // OpenTween - Client of Twitter
2 // Copyright (c) 2015 kim_upsilon (@kim_upsilon) <https://upsilo.net/~upsilon/>
3 // All rights reserved.
5 // This file is part of OpenTween.
7 // This program is free software; you can redistribute it and/or modify it
8 // under the terms of the GNU General Public License as published by the Free
9 // Software Foundation; either version 3 of the License, or (at your option)
12 // This program is distributed in the hope that it will be useful, but
13 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 // You should have received a copy of the GNU General Public License along
18 // with this program. If not, see <http://www.gnu.org/licenses/>, or write to
19 // the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
20 // Boston, MA 02110-1301, USA.
23 using System.Collections.Generic;
26 using System.Threading.Tasks;
32 public class TweetExtractorTest
35 public void ExtractUrls_Test()
37 Assert.Equal(new[] { "http://example.com/" }, TweetExtractor.ExtractUrls("http://example.com/"));
38 Assert.Equal(new[] { "http://example.com/hogehoge" }, TweetExtractor.ExtractUrls("http://example.com/hogehoge"));
39 Assert.Equal(new[] { "http://example.com/" }, TweetExtractor.ExtractUrls("hogehoge http://example.com/"));
41 Assert.Equal(new[] { "https://example.com/" }, TweetExtractor.ExtractUrls("https://example.com/"));
42 Assert.Equal(new[] { "https://example.com/hogehoge" }, TweetExtractor.ExtractUrls("https://example.com/hogehoge"));
43 Assert.Equal(new[] { "https://example.com/" }, TweetExtractor.ExtractUrls("hogehoge https://example.com/"));
45 Assert.Equal(new[] { "example.com" }, TweetExtractor.ExtractUrls("example.com"));
46 Assert.Equal(new[] { "example.com/hogehoge" }, TweetExtractor.ExtractUrls("example.com/hogehoge"));
47 Assert.Equal(new[] { "example.com" }, TweetExtractor.ExtractUrls("hogehoge example.com"));
49 // スキーム (http://) を省略かつ末尾が ccTLD の場合は t.co に短縮されない
50 Assert.Empty(TweetExtractor.ExtractUrls("example.jp"));
51 // ただし、末尾にパスが続く場合は t.co に短縮される
52 Assert.Equal(new[] { "example.jp/hogehoge" }, TweetExtractor.ExtractUrls("example.jp/hogehoge"));
56 public void ExtractUrlEntities_Test()
58 var entity = TweetExtractor.ExtractUrlEntities("hogehoge http://example.com/").Single();
60 Assert.Equal(new[] { 9, 28 }, entity.Indices);
61 Assert.Equal("http://example.com/", entity.Url);
62 Assert.Equal("http://example.com/", entity.ExpandedUrl);
63 Assert.Equal("http://example.com/", entity.DisplayUrl);
67 public void ExtractUrlEntities_MultipleTest()
69 var entities = TweetExtractor.ExtractUrlEntities("hogehoge http://aaa.example.com/ http://bbb.example.com/").ToArray();
71 Assert.Equal(2, entities.Length);
72 Assert.Equal(new[] { 9, 32 }, entities[0].Indices);
73 Assert.Equal("http://aaa.example.com/", entities[0].Url);
74 Assert.Equal(new[] { 33, 56 }, entities[1].Indices);
75 Assert.Equal("http://bbb.example.com/", entities[1].Url);
79 public void ExtractUrlEntities_SurrogatePairTest()
81 var entity = TweetExtractor.ExtractUrlEntities("🍣 http://example.com/ 🍣").Single();
83 Assert.Equal(new[] { 2, 21 }, entity.Indices);
84 Assert.Equal("http://example.com/", entity.Url);
85 Assert.Equal("http://example.com/", entity.ExpandedUrl);
86 Assert.Equal("http://example.com/", entity.DisplayUrl);
90 public void ExtractUrlEntities_CompositeCharacterTest()
92 // 合成文字 é ( \u00e9 ) を含むツイート (1文字としてカウントする)
93 // 参照: https://dev.twitter.com/issues/251
94 var entity = TweetExtractor.ExtractUrlEntities("Caf\u00e9 http://example.com/").Single();
96 Assert.Equal(new[] { 5, 24 }, entity.Indices);
97 Assert.Equal("http://example.com/", entity.Url);
98 Assert.Equal("http://example.com/", entity.ExpandedUrl);
99 Assert.Equal("http://example.com/", entity.DisplayUrl);
103 public void ExtractUrlEntities_CombiningCharacterSequenceTest()
105 // 結合文字列 é ( e + \u0301 ) を含むツイート (2文字としてカウントする)
106 // 参照: https://dev.twitter.com/issues/251
107 var entity = TweetExtractor.ExtractUrlEntities("Cafe\u0301 http://example.com/").Single();
109 Assert.Equal(new[] { 6, 25 }, entity.Indices);
110 Assert.Equal("http://example.com/", entity.Url);
111 Assert.Equal("http://example.com/", entity.ExpandedUrl);
112 Assert.Equal("http://example.com/", entity.DisplayUrl);
116 public void ExtractMentionEntities_Test()
118 var entity = TweetExtractor.ExtractMentionEntities("hogehoge @twitterapi").Single();
120 // Indices は「@twitterapi」の範囲を指すが、ScreenName には「@」を含めない
121 Assert.Equal(new[] { 9, 20 }, entity.Indices);
122 Assert.Equal("twitterapi", entity.ScreenName);
126 public void ExtractMentionEntities_MultipleTest()
128 var entities = TweetExtractor.ExtractMentionEntities("hogehoge @twitterapi @opentween").ToArray();
130 Assert.Equal(2, entities.Length);
131 Assert.Equal(new[] { 9, 20 }, entities[0].Indices);
132 Assert.Equal("twitterapi", entities[0].ScreenName);
133 Assert.Equal(new[] { 21, 31 }, entities[1].Indices);
134 Assert.Equal("opentween", entities[1].ScreenName);
138 public void ExtractMentionEntities_ListTest()
140 var entity = TweetExtractor.ExtractMentionEntities("hogehoge @twitter/developers").Single();
142 // Indices は「@twitter/developers」の範囲を指すが、ScreenName には「@」を含めない
143 Assert.Equal(new[] { 9, 28 }, entity.Indices);
144 Assert.Equal("twitter/developers", entity.ScreenName);
148 public void ExtractMentionEntities_SurrogatePairTest()
150 var entity = TweetExtractor.ExtractMentionEntities("🍣 @twitterapi").Single();
152 Assert.Equal(new[] { 2, 13 }, entity.Indices);
153 Assert.Equal("twitterapi", entity.ScreenName);
157 public void ExtractHashtagEntities_Test()
159 var entity = TweetExtractor.ExtractHashtagEntities("hogehoge #test").Single();
161 // Indices は「#test」の範囲を指すが、Text には「#」を含めない
162 Assert.Equal(new[] { 9, 14 }, entity.Indices);
163 Assert.Equal("test", entity.Text);
167 public void ExtractHashtagEntities_MultipleTest()
169 var entities = TweetExtractor.ExtractHashtagEntities("hogehoge #test #test2").ToArray();
171 Assert.Equal(2, entities.Length);
172 Assert.Equal(new[] { 9, 14 }, entities[0].Indices);
173 Assert.Equal("test", entities[0].Text);
174 Assert.Equal(new[] { 15, 21 }, entities[1].Indices);
175 Assert.Equal("test2", entities[1].Text);
179 public void ExtractHashtagEntities_SurrogatePairTest()
181 var entity = TweetExtractor.ExtractHashtagEntities("🍣 #sushi").Single();
183 Assert.Equal(new[] { 2, 8 }, entity.Indices);
184 Assert.Equal("sushi", entity.Text);
188 public void ExtractEmojiEntities_Test()
190 var entity = TweetExtractor.ExtractEmojiEntities("star ✨").Single();
192 Assert.Equal(new[] { 5, 6 }, entity.Indices);
193 Assert.Equal("✨", entity.Text);
194 Assert.Equal("https://twemoji.maxcdn.com/2/72x72/2728.png", entity.Url);
198 public void ExtractEmojiEntities_SurrogatePairTest()
200 var entity = TweetExtractor.ExtractEmojiEntities("𠮷野家 🍚").Single();
202 // 「𠮷」「🍚」は UTF-16 でそれぞれ 2byte になるがインデックスはコードポイント単位で数えなければならない
203 Assert.Equal(new[] { 4, 5 }, entity.Indices);
204 Assert.Equal("🍚", entity.Text);
205 Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f35a.png", entity.Url);
209 public void ExtractEmojiEntities_VariationSelector_TextStyleTest()
211 // 異字体セレクタを使用して明示的にテキストスタイルで表示させている文字
212 var origText = "©\uFE0E"; // U+00A9 + U+FE0E (text style)
213 var entities = TweetExtractor.ExtractEmojiEntities(origText);
215 Assert.Empty(entities);
219 public void ExtractEmojiEntities_VariationSelector_EmojiStyleTest()
221 // 異字体セレクタを使用して明示的に絵文字スタイルで表示させている文字
222 var origText = "©\uFE0F"; // U+00A9 + U+FE0F (emoji style)
223 var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
225 Assert.Equal(new[] { 0, 2 }, entity.Indices);
226 Assert.Equal("©", entity.Text);
227 Assert.Equal("https://twemoji.maxcdn.com/2/72x72/a9.png", entity.Url);
231 public void ExtractEmojiEntities_VariationSelector_UnnecessaryEmojiStyleTest()
233 // 余分な U+FE0F が付いている場合
234 var origText = "🍣\uFE0F"; // U+1F363 + U+FE0F (emoji style)
235 var entities = TweetExtractor.ExtractEmojiEntities(origText).ToArray();
237 Assert.Equal(2, entities.Length);
239 Assert.Equal(new[] { 0, 1 }, entities[0].Indices);
240 Assert.Equal("🍣", entities[0].Text);
241 Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f363.png", entities[0].Url);
243 Assert.Equal(new[] { 1, 2 }, entities[1].Indices);
244 Assert.Equal("", entities[1].Text);
245 Assert.Equal("", entities[1].Url);
249 public void ExtractEmojiEntities_CombiningCharacterTest()
251 var origText = "#⃣"; // U+0023 U+20E3 (合字)
252 var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
254 Assert.Equal(new[] { 0, 2 }, entity.Indices);
255 Assert.Equal("#⃣", entity.Text);
256 Assert.Equal("https://twemoji.maxcdn.com/2/72x72/23-20e3.png", entity.Url);
260 public void ExtractEmojiEntities_Unicode10Test()
262 // Unicode 10.0/Emoji 5.0 で追加された絵文字
263 var origText = "🦒"; // U+1F992 (GIRAFFE)
264 var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
266 Assert.Equal(new[] { 0, 1 }, entity.Indices);
267 Assert.Equal("🦒", entity.Text);
268 Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f992.png", entity.Url);
272 public void ExtractEmojiEntities_Unicode11Test()
274 // Unicode 11.0 で追加された絵文字
275 var origText = "🦸"; // U+1F9B8 (SUPERHERO)
276 var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
278 Assert.Equal(new[] { 0, 1 }, entity.Indices);
279 Assert.Equal("🦸", entity.Text);
280 Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f9b8.png", entity.Url);
284 public void ExtractEmojiEntities_Unicode12Test()
286 // Unicode 12.0 で追加された絵文字
287 var origText = "🧅"; // U+1F9C5 (ONION)
288 var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
290 Assert.Equal(new[] { 0, 1 }, entity.Indices);
291 Assert.Equal("🧅", entity.Text);
292 Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f9c5.png", entity.Url);
296 public void ExtractEmojiEntities_EmojiModifiers_CombiningTest()
298 // Emoji modifiers を使用した合字 (リガチャー)
299 var origText = "👦\U0001F3FF"; // U+1F466 (BOY) + U+1F3FF (EMOJI MODIFIER FITZPATRICK TYPE-6)
300 var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
302 Assert.Equal(new[] { 0, 2 }, entity.Indices);
303 Assert.Equal("👦\U0001F3FF", entity.Text);
304 Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f466-1f3ff.png", entity.Url);
308 public void ExtractEmojiEntities_EmojiModifiers_SingleTest()
310 // Emoji modifiers は単体でも絵文字として表示される
311 var origText = "\U0001F3FF"; // U+1F3FB (EMOJI MODIFIER FITZPATRICK TYPE-6)
312 var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
314 Assert.Equal(new[] { 0, 1 }, entity.Indices);
315 Assert.Equal("\U0001F3FF", entity.Text);
316 Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f3ff.png", entity.Url);
320 public void ExtractEmojiEntities_EmojiZWJSequenceTest()
322 // 複数の絵文字を U+200D (ZERO WIDTH JOINER) で繋げて表現する絵文字
323 var origText = "👨\u200D🎨"; // U+1F468 (MAN) + U+200D + U+1F3A8 (ARTIST PALETTE)
324 var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
326 Assert.Equal(new[] { 0, 3 }, entity.Indices);
327 Assert.Equal("👨\u200D🎨", entity.Text);
328 Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f468-200d-1f3a8.png", entity.Url);
332 public void ExtractEmojiEntities_EmojiZWJSequenceWithVariationSelectorTest()
334 // 複数の絵文字を U+200D (ZERO WIDTH JOINER) で繋げて表現 + 異字体セレクタ U+FE0F を含む絵文字
335 // この場合は URL 生成時に異字体セレクタ U+FE0F を除去しない
336 var origText = "🏃\u200D♀\uFE0F"; // U+1F3C3 (RUNNER) + U+200D + U+2640 (FEMARE SIGN) + U+FE0F
337 var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
339 Assert.Equal(new[] { 0, 4 }, entity.Indices);
340 Assert.Equal("🏃\u200D♀\uFE0F", entity.Text);
341 Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f3c3-200d-2640-fe0f.png", entity.Url);