OSDN Git Service

0b4a2a915b272cc5cc9be93fe87d5d38a9054ee7
[opentween/open-tween.git] / OpenTween.Tests / TweetExtractorTest.cs
1 // OpenTween - Client of Twitter
2 // Copyright (c) 2015 kim_upsilon (@kim_upsilon) <https://upsilo.net/~upsilon/>
3 // All rights reserved.
4 //
5 // This file is part of OpenTween.
6 //
7 // This program is free software; you can redistribute it and/or modify it
8 // under the terms of the GNU General Public License as published by the Free
9 // Software Foundation; either version 3 of the License, or (at your option)
10 // any later version.
11 //
12 // This program is distributed in the hope that it will be useful, but
13 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 // for more details.
16 //
17 // You should have received a copy of the GNU General Public License along
18 // with this program. If not, see <http://www.gnu.org/licenses/>, or write to
19 // the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
20 // Boston, MA 02110-1301, USA.
21
22 using System;
23 using System.Collections.Generic;
24 using System.Linq;
25 using System.Text;
26 using System.Threading.Tasks;
27 using OpenTween.Api;
28 using Xunit;
29
30 namespace OpenTween
31 {
32     public class TweetExtractorTest
33     {
34         [Fact]
35         public void ExtractUrls_Test()
36         {
37             Assert.Equal(new[] { "http://example.com/" }, TweetExtractor.ExtractUrls("http://example.com/"));
38             Assert.Equal(new[] { "http://example.com/hogehoge" }, TweetExtractor.ExtractUrls("http://example.com/hogehoge"));
39             Assert.Equal(new[] { "http://example.com/" }, TweetExtractor.ExtractUrls("hogehoge http://example.com/"));
40
41             Assert.Equal(new[] { "https://example.com/" }, TweetExtractor.ExtractUrls("https://example.com/"));
42             Assert.Equal(new[] { "https://example.com/hogehoge" }, TweetExtractor.ExtractUrls("https://example.com/hogehoge"));
43             Assert.Equal(new[] { "https://example.com/" }, TweetExtractor.ExtractUrls("hogehoge https://example.com/"));
44
45             Assert.Equal(new[] { "example.com" }, TweetExtractor.ExtractUrls("example.com"));
46             Assert.Equal(new[] { "example.com/hogehoge" }, TweetExtractor.ExtractUrls("example.com/hogehoge"));
47             Assert.Equal(new[] { "example.com" }, TweetExtractor.ExtractUrls("hogehoge example.com"));
48
49             // スキーム (http://) を省略かつ末尾が ccTLD の場合は t.co に短縮されない
50             Assert.Empty(TweetExtractor.ExtractUrls("example.jp"));
51             // ただし、末尾にパスが続く場合は t.co に短縮される
52             Assert.Equal(new[] { "example.jp/hogehoge" }, TweetExtractor.ExtractUrls("example.jp/hogehoge"));
53         }
54
55         [Fact]
56         public void ExtractUrlEntities_Test()
57         {
58             var entity = TweetExtractor.ExtractUrlEntities("hogehoge http://example.com/").Single();
59
60             Assert.Equal(new[] { 9, 28 }, entity.Indices);
61             Assert.Equal("http://example.com/", entity.Url);
62             Assert.Equal("http://example.com/", entity.ExpandedUrl);
63             Assert.Equal("http://example.com/", entity.DisplayUrl);
64         }
65
66         [Fact]
67         public void ExtractUrlEntities_MultipleTest()
68         {
69             var entities = TweetExtractor.ExtractUrlEntities("hogehoge http://aaa.example.com/ http://bbb.example.com/").ToArray();
70
71             Assert.Equal(2, entities.Length);
72             Assert.Equal(new[] { 9, 32 }, entities[0].Indices);
73             Assert.Equal("http://aaa.example.com/", entities[0].Url);
74             Assert.Equal(new[] { 33, 56 }, entities[1].Indices);
75             Assert.Equal("http://bbb.example.com/", entities[1].Url);
76         }
77
78         [Fact]
79         public void ExtractUrlEntities_SurrogatePairTest()
80         {
81             var entity = TweetExtractor.ExtractUrlEntities("🍣 http://example.com/ 🍣").Single();
82
83             Assert.Equal(new[] { 2, 21 }, entity.Indices);
84             Assert.Equal("http://example.com/", entity.Url);
85             Assert.Equal("http://example.com/", entity.ExpandedUrl);
86             Assert.Equal("http://example.com/", entity.DisplayUrl);
87         }
88
89         [Fact]
90         public void ExtractUrlEntities_CompositeCharacterTest()
91         {
92             // 合成文字 é ( \u00e9 ) を含むツイート (1文字としてカウントする)
93             // 参照: https://dev.twitter.com/issues/251
94             var entity = TweetExtractor.ExtractUrlEntities("Caf\u00e9 http://example.com/").Single();
95
96             Assert.Equal(new[] { 5, 24 }, entity.Indices);
97             Assert.Equal("http://example.com/", entity.Url);
98             Assert.Equal("http://example.com/", entity.ExpandedUrl);
99             Assert.Equal("http://example.com/", entity.DisplayUrl);
100         }
101
102         [Fact]
103         public void ExtractUrlEntities_CombiningCharacterSequenceTest()
104         {
105             // 結合文字列 é ( e + \u0301 ) を含むツイート (2文字としてカウントする)
106             // 参照: https://dev.twitter.com/issues/251
107             var entity = TweetExtractor.ExtractUrlEntities("Cafe\u0301 http://example.com/").Single();
108
109             Assert.Equal(new[] { 6, 25 }, entity.Indices);
110             Assert.Equal("http://example.com/", entity.Url);
111             Assert.Equal("http://example.com/", entity.ExpandedUrl);
112             Assert.Equal("http://example.com/", entity.DisplayUrl);
113         }
114
115         [Fact]
116         public void ExtractMentionEntities_Test()
117         {
118             var entity = TweetExtractor.ExtractMentionEntities("hogehoge @twitterapi").Single();
119
120             // Indices は「@twitterapi」の範囲を指すが、ScreenName には「@」を含めない
121             Assert.Equal(new[] { 9, 20 }, entity.Indices);
122             Assert.Equal("twitterapi", entity.ScreenName);
123         }
124
125         [Fact]
126         public void ExtractMentionEntities_MultipleTest()
127         {
128             var entities = TweetExtractor.ExtractMentionEntities("hogehoge @twitterapi @opentween").ToArray();
129
130             Assert.Equal(2, entities.Length);
131             Assert.Equal(new[] { 9, 20 }, entities[0].Indices);
132             Assert.Equal("twitterapi", entities[0].ScreenName);
133             Assert.Equal(new[] { 21, 31 }, entities[1].Indices);
134             Assert.Equal("opentween", entities[1].ScreenName);
135         }
136
137         [Fact]
138         public void ExtractMentionEntities_ListTest()
139         {
140             var entity = TweetExtractor.ExtractMentionEntities("hogehoge @twitter/developers").Single();
141
142             // Indices は「@twitter/developers」の範囲を指すが、ScreenName には「@」を含めない
143             Assert.Equal(new[] { 9, 28 }, entity.Indices);
144             Assert.Equal("twitter/developers", entity.ScreenName);
145         }
146
147         [Fact]
148         public void ExtractMentionEntities_SurrogatePairTest()
149         {
150             var entity = TweetExtractor.ExtractMentionEntities("🍣 @twitterapi").Single();
151
152             Assert.Equal(new[] { 2, 13 }, entity.Indices);
153             Assert.Equal("twitterapi", entity.ScreenName);
154         }
155
156         [Fact]
157         public void ExtractHashtagEntities_Test()
158         {
159             var entity = TweetExtractor.ExtractHashtagEntities("hogehoge #test").Single();
160
161             // Indices は「#test」の範囲を指すが、Text には「#」を含めない
162             Assert.Equal(new[] { 9, 14 }, entity.Indices);
163             Assert.Equal("test", entity.Text);
164         }
165
166         [Fact]
167         public void ExtractHashtagEntities_MultipleTest()
168         {
169             var entities = TweetExtractor.ExtractHashtagEntities("hogehoge #test #test2").ToArray();
170
171             Assert.Equal(2, entities.Length);
172             Assert.Equal(new[] { 9, 14 }, entities[0].Indices);
173             Assert.Equal("test", entities[0].Text);
174             Assert.Equal(new[] { 15, 21 }, entities[1].Indices);
175             Assert.Equal("test2", entities[1].Text);
176         }
177
178         [Fact]
179         public void ExtractHashtagEntities_SurrogatePairTest()
180         {
181             var entity = TweetExtractor.ExtractHashtagEntities("🍣 #sushi").Single();
182
183             Assert.Equal(new[] { 2, 8 }, entity.Indices);
184             Assert.Equal("sushi", entity.Text);
185         }
186
187         [Fact]
188         public void ExtractEmojiEntities_Test()
189         {
190             var entity = TweetExtractor.ExtractEmojiEntities("star ✨").Single();
191
192             Assert.Equal(new[] { 5, 6 }, entity.Indices);
193             Assert.Equal("✨", entity.Text);
194             Assert.Equal("https://twemoji.maxcdn.com/2/72x72/2728.png", entity.Url);
195         }
196
197         [Fact]
198         public void ExtractEmojiEntities_SurrogatePairTest()
199         {
200             var entity = TweetExtractor.ExtractEmojiEntities("𠮷野家 🍚").Single();
201
202             // 「𠮷」「🍚」は UTF-16 でそれぞれ 2byte になるがインデックスはコードポイント単位で数えなければならない
203             Assert.Equal(new[] { 4, 5 }, entity.Indices);
204             Assert.Equal("🍚", entity.Text);
205             Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f35a.png", entity.Url);
206         }
207
208         [Fact]
209         public void ExtractEmojiEntities_VariationSelector_TextStyleTest()
210         {
211             // 異字体セレクタを使用して明示的にテキストスタイルで表示させている文字
212             var origText = "©\uFE0E"; // U+00A9 + U+FE0E (text style)
213             var entities = TweetExtractor.ExtractEmojiEntities(origText);
214
215             Assert.Empty(entities);
216         }
217
218         [Fact]
219         public void ExtractEmojiEntities_VariationSelector_EmojiStyleTest()
220         {
221             // 異字体セレクタを使用して明示的に絵文字スタイルで表示させている文字
222             var origText = "©\uFE0F"; // U+00A9 + U+FE0F (emoji style)
223             var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
224
225             Assert.Equal(new[] { 0, 2 }, entity.Indices);
226             Assert.Equal("©", entity.Text);
227             Assert.Equal("https://twemoji.maxcdn.com/2/72x72/a9.png", entity.Url);
228         }
229
230         [Fact]
231         public void ExtractEmojiEntities_CombiningCharacterTest()
232         {
233             var origText = "#⃣"; // U+0023 U+20E3 (合字)
234             var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
235
236             Assert.Equal(new[] { 0, 2 }, entity.Indices);
237             Assert.Equal("#⃣", entity.Text);
238             Assert.Equal("https://twemoji.maxcdn.com/2/72x72/23-20e3.png", entity.Url);
239         }
240
241         [Fact]
242         public void ExtractEmojiEntities_Unicode10Test()
243         {
244             // Unicode 10.0/Emoji 5.0 で追加された絵文字
245             var origText = "🦒"; // U+1F992 (GIRAFFE)
246             var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
247
248             Assert.Equal(new[] { 0, 1 }, entity.Indices);
249             Assert.Equal("🦒", entity.Text);
250             Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f992.png", entity.Url);
251         }
252
253         [Fact]
254         public void ExtractEmojiEntities_EmojiModifiers_CombiningTest()
255         {
256             // Emoji modifiers を使用した合字 (リガチャー)
257             var origText = "👦\U0001F3FF"; // U+1F466 (BOY) + U+1F3FF (EMOJI MODIFIER FITZPATRICK TYPE-6)
258             var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
259
260             Assert.Equal(new[] { 0, 2 }, entity.Indices);
261             Assert.Equal("👦\U0001F3FF", entity.Text);
262             Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f466-1f3ff.png", entity.Url);
263         }
264
265         [Fact]
266         public void ExtractEmojiEntities_EmojiModifiers_SingleTest()
267         {
268             // Emoji modifiers は単体でも絵文字として表示される
269             var origText = "\U0001F3FF"; // U+1F3FB (EMOJI MODIFIER FITZPATRICK TYPE-6)
270             var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
271
272             Assert.Equal(new[] { 0, 1 }, entity.Indices);
273             Assert.Equal("\U0001F3FF", entity.Text);
274             Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f3ff.png", entity.Url);
275         }
276
277         [Fact]
278         public void ExtractEmojiEntities_EmojiZWJSequenceTest()
279         {
280             // 複数の絵文字を U+200D (ZERO WIDTH JOINER) で繋げて表現する絵文字
281             var origText = "👨\u200D🎨"; // U+1F468 (MAN) + U+200D + U+1F3A8 (ARTIST PALETTE)
282             var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
283
284             Assert.Equal(new[] { 0, 3 }, entity.Indices);
285             Assert.Equal("👨\u200D🎨", entity.Text);
286             Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f468-200d-1f3a8.png", entity.Url);
287         }
288
289         [Fact]
290         public void ExtractEmojiEntities_EmojiZWJSequenceWithVariationSelectorTest()
291         {
292             // 複数の絵文字を U+200D (ZERO WIDTH JOINER) で繋げて表現 + 異字体セレクタ U+FE0F を含む絵文字
293             // この場合は URL 生成時に異字体セレクタ U+FE0F を除去しない
294             var origText = "🏃\u200D♀\uFE0F"; // U+1F3C3 (RUNNER) + U+200D + U+2640 (FEMARE SIGN) + U+FE0F
295             var entity = TweetExtractor.ExtractEmojiEntities(origText).Single();
296
297             Assert.Equal(new[] { 0, 4 }, entity.Indices);
298             Assert.Equal("🏃\u200D♀\uFE0F", entity.Text);
299             Assert.Equal("https://twemoji.maxcdn.com/2/72x72/1f3c3-200d-2640-fe0f.png", entity.Url);
300         }
301     }
302 }