From e4427654b3751a1d241f7af08a0b38883e4f086d Mon Sep 17 00:00:00 2001 From: Kimura Youichi Date: Sun, 20 May 2018 00:16:58 +0900 Subject: [PATCH] =?utf8?q?TweetExtractor=E3=81=A7=E9=80=A3=E7=B6=9A?= =?utf8?q?=E3=81=99=E3=82=8B=E8=A4=87=E6=95=B0=E3=81=AE=E3=83=A1=E3=83=B3?= =?utf8?q?=E3=82=B7=E3=83=A7=E3=83=B3=E3=82=92=E6=8A=BD=E5=87=BA=E3=81=A7?= =?utf8?q?=E3=81=8D=E3=81=AA=E3=81=84=E4=B8=8D=E5=85=B7=E5=90=88=E3=82=92?= =?utf8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- OpenTween.Tests/TweetExtractorTest.cs | 36 +++++++++++++++++++++++++++++++++++ OpenTween/TweetExtractor.cs | 18 ++++++++---------- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/OpenTween.Tests/TweetExtractorTest.cs b/OpenTween.Tests/TweetExtractorTest.cs index e20f3156..15c58eca 100644 --- a/OpenTween.Tests/TweetExtractorTest.cs +++ b/OpenTween.Tests/TweetExtractorTest.cs @@ -64,6 +64,18 @@ namespace OpenTween } [Fact] + public void ExtractUrlEntities_MultipleTest() + { + var entities = TweetExtractor.ExtractUrlEntities("hogehoge http://aaa.example.com/ http://bbb.example.com/").ToArray(); + + Assert.Equal(2, entities.Length); + Assert.Equal(new[] { 9, 32 }, entities[0].Indices); + Assert.Equal("http://aaa.example.com/", entities[0].Url); + Assert.Equal(new[] { 33, 56 }, entities[1].Indices); + Assert.Equal("http://bbb.example.com/", entities[1].Url); + } + + [Fact] public void ExtractUrlEntities_SurrogatePairTest() { var entity = TweetExtractor.ExtractUrlEntities("✨ http://example.com/ ✨").Single(); @@ -111,6 +123,18 @@ namespace OpenTween } [Fact] + public void ExtractMentionEntities_MultipleTest() + { + var entities = TweetExtractor.ExtractMentionEntities("hogehoge @twitterapi @opentween").ToArray(); + + Assert.Equal(2, entities.Length); + Assert.Equal(new[] { 9, 20 }, entities[0].Indices); + Assert.Equal("twitterapi", entities[0].ScreenName); + Assert.Equal(new[] { 21, 31 }, entities[1].Indices); + Assert.Equal("opentween", entities[1].ScreenName); + } + + [Fact] public void ExtractMentionEntities_ListTest() { var entity = TweetExtractor.ExtractMentionEntities("hogehoge @twitter/developers").Single(); @@ -129,5 +153,17 @@ namespace OpenTween Assert.Equal(new[] { 9, 14 }, entity.Indices); Assert.Equal("test", entity.Text); } + + [Fact] + public void ExtractHashtagEntities_MultipleTest() + { + var entities = TweetExtractor.ExtractHashtagEntities("hogehoge #test #test2").ToArray(); + + Assert.Equal(2, entities.Length); + Assert.Equal(new[] { 9, 14 }, entities[0].Indices); + Assert.Equal("test", entities[0].Text); + Assert.Equal(new[] { 15, 21 }, entities[1].Indices); + Assert.Equal("test2", entities[1].Text); + } } } diff --git a/OpenTween/TweetExtractor.cs b/OpenTween/TweetExtractor.cs index eea387c8..ca00cc08 100644 --- a/OpenTween/TweetExtractor.cs +++ b/OpenTween/TweetExtractor.cs @@ -105,32 +105,30 @@ namespace OpenTween public static IEnumerable ExtractMentionEntities(string text) { // リスト - var matchesAtList = Regex.Matches(text, @"(^|[^a-zA-Z0-9_/])([@@][a-zA-Z0-9_]{1,20}/[a-zA-Z][a-zA-Z0-9\p{IsLatin-1Supplement}\-]{0,79})"); + var matchesAtList = Regex.Matches(text, @"(?<=^|[^a-zA-Z0-9_/])([@@][a-zA-Z0-9_]{1,20}/[a-zA-Z][a-zA-Z0-9\p{IsLatin-1Supplement}\-]{0,79})"); foreach (var match in matchesAtList.Cast()) { - var groupMention = match.Groups[2]; - var startPos = groupMention.Index; - var endPos = startPos + groupMention.Length; + var startPos = match.Index; + var endPos = startPos + match.Length; yield return new TwitterEntityMention { Indices = new[] { startPos, endPos }, - ScreenName = groupMention.Value.Substring(1), // 先頭の「@」は取り除く + ScreenName = match.Value.Substring(1), // 先頭の「@」は取り除く }; } // 通常のメンション - var matchesAtUser = Regex.Matches(text, "(^|[^a-zA-Z0-9_/])([@@][a-zA-Z0-9_]{1,20})([^a-zA-Z0-9_/]|$)"); + var matchesAtUser = Regex.Matches(text, "(?<=^|[^a-zA-Z0-9_/])([@@][a-zA-Z0-9_]{1,20})(?=[^a-zA-Z0-9_/]|$)"); foreach (var match in matchesAtUser.Cast()) { - var groupMention = match.Groups[2]; - var startPos = groupMention.Index; - var endPos = startPos + groupMention.Length; + var startPos = match.Index; + var endPos = startPos + match.Length; yield return new TwitterEntityMention { Indices = new[] { startPos, endPos }, - ScreenName = groupMention.Value.Substring(1), // 先頭の「@」は取り除く + ScreenName = match.Value.Substring(1), // 先頭の「@」は取り除く }; } } -- 2.11.0