Assert.Equal("http://example.com/", entity.ExpandedUrl);
Assert.Equal("http://example.com/", entity.DisplayUrl);
}
+
+ [Fact]
+ public void ExtractMentionEntities_Test()
+ {
+ var entity = TweetExtractor.ExtractMentionEntities("hogehoge @twitterapi").Single();
+
+ Assert.Equal(new[] { 9, 20 }, entity.Indices);
+ Assert.Equal("@twitterapi", entity.ScreenName);
+ }
+
+ [Fact]
+ public void ExtractMentionEntities_ListTest()
+ {
+ var entity = TweetExtractor.ExtractMentionEntities("hogehoge @twitter/developers").Single();
+
+ Assert.Equal(new[] { 9, 28 }, entity.Indices);
+ Assert.Equal("@twitter/developers", entity.ScreenName);
+ }
+
+ [Fact]
+ public void ExtractHashtagEntities_Test()
+ {
+ var entity = TweetExtractor.ExtractHashtagEntities("hogehoge #test").Single();
+
+ Assert.Equal(new[] { 9, 14 }, entity.Indices);
+ Assert.Equal("#test", entity.Text);
+ }
}
}
}
}
}
+
+ /// <summary>
+ /// テキストからメンションを抽出してエンティティとして返します
+ /// </summary>
+ public static IEnumerable<TwitterEntityMention> ExtractMentionEntities(string text)
+ {
+ // リスト
+ var matchesAtList = Regex.Matches(text, @"(^|[^a-zA-Z0-9_/])([@@][a-zA-Z0-9_]{1,20}/[a-zA-Z][a-zA-Z0-9\p{IsLatin-1Supplement}\-]{0,79})");
+ foreach (var match in matchesAtList.Cast<Match>())
+ {
+ var groupMention = match.Groups[2];
+ var startPos = groupMention.Index;
+ var endPos = startPos + groupMention.Length;
+
+ yield return new TwitterEntityMention
+ {
+ Indices = new[] { startPos, endPos },
+ ScreenName = groupMention.Value,
+ };
+ }
+
+ // 通常のメンション
+ var matchesAtUser = Regex.Matches(text, "(^|[^a-zA-Z0-9_/])([@@][a-zA-Z0-9_]{1,20})([^a-zA-Z0-9_/]|$)");
+ foreach (var match in matchesAtUser.Cast<Match>())
+ {
+ var groupMention = match.Groups[2];
+ var startPos = groupMention.Index;
+ var endPos = startPos + groupMention.Length;
+
+ yield return new TwitterEntityMention
+ {
+ Indices = new[] { startPos, endPos },
+ ScreenName = groupMention.Value,
+ };
+ }
+ }
+
+ /// <summary>
+ /// テキストからハッシュタグを抽出してエンティティとして返します
+ /// </summary>
+ public static IEnumerable<TwitterEntityHashtag> ExtractHashtagEntities(string text)
+ {
+ var matches = Regex.Matches(text, Twitter.HASHTAG);
+ foreach (var match in matches.Cast<Match>())
+ {
+ var groupHashtagSharp = match.Groups[2];
+ var groupHashtagText = match.Groups[3];
+ var startPos = groupHashtagSharp.Index;
+ var endPos = startPos + groupHashtagSharp.Length + groupHashtagText.Length;
+
+ yield return new TwitterEntityHashtag
+ {
+ Indices = new[] { startPos, endPos },
+ Text = groupHashtagSharp.Value + groupHashtagText.Value,
+ };
+ }
+ }
}
}
this.CheckStatusCode(res, content);
}
- private class range
- {
- public int fromIndex { get; set; }
- public int toIndex { get; set; }
- public range(int fromIndex, int toIndex)
- {
- this.fromIndex = fromIndex;
- this.toIndex = toIndex;
- }
- }
- public async Task<string> CreateHtmlAnchorAsync(string Text, List<string> AtList, Dictionary<string, string> media)
- {
- if (Text == null) return null;
- var retStr = Text.Replace(">", "<<<<<tweenだいなり>>>>>").Replace("<", "<<<<<tweenしょうなり>>>>>");
- //uriの正規表現
- //const string url_valid_domain = "(?<domain>(?:[^\p{P}\s][\.\-_](?=[^\p{P}\s])|[^\p{P}\s]){1,}\.[a-z]{2,}(?::[0-9]+)?)"
- //const string url_valid_general_path_chars = "[a-z0-9!*';:=+$/%#\[\]\-_,~]"
- //const string url_balance_parens = "(?:\(" + url_valid_general_path_chars + "+\))"
- //const string url_valid_url_path_ending_chars = "(?:[a-z0-9=_#/\-\+]+|" + url_balance_parens + ")"
- //const string pth = "(?:" + url_balance_parens +
- // "|@" + url_valid_general_path_chars + "+/" +
- // "|[.,]?" + url_valid_general_path_chars + "+" +
- // ")"
- //const string pth2 = "(/(?:" +
- // pth + "+" + url_valid_url_path_ending_chars + "|" +
- // pth + "+" + url_valid_url_path_ending_chars + "?|" +
- // url_valid_url_path_ending_chars +
- // ")?)?"
- //const string qry = "(?<query>\?[a-z0-9!*'();:&=+$/%#\[\]\-_.,~]*[a-z0-9_&=#])?"
- //const string rgUrl = "(?<before>(?:[^\""':!=#]|^|\:/))" +
- // "(?<url>(?<protocol>https?://)" +
- // url_valid_domain +
- // pth2 +
- // qry +
- // ")"
- //const string rgUrl = "(?<before>(?:[^\""':!=#]|^|\:/))" +
- // "(?<url>(?<protocol>https?://|www\.)" +
- // url_valid_domain +
- // pth2 +
- // qry +
- // ")"
- //絶対パス表現のUriをリンクに置換
- retStr = await new Regex(rgUrl, RegexOptions.IgnoreCase).ReplaceAsync(retStr, async mu =>
- {
- var sb = new StringBuilder(mu.Result("${before}<a href=\""));
- //if (mu.Result("${protocol}").StartsWith("w", StringComparison.OrdinalIgnoreCase))
- // sb.Append("http://");
- //}
- var url = mu.Result("${url}");
- var title = await ShortUrl.Instance.ExpandUrlAsync(url);
- sb.Append(url + "\" title=\"" + MyCommon.ConvertToReadableUrl(title) + "\">").Append(url).Append("</a>");
- if (media != null && !media.ContainsKey(url)) media.Add(url, title);
- return sb.ToString();
- });
-
- //@先をリンクに置換(リスト)
- retStr = Regex.Replace(retStr,
- @"(^|[^a-zA-Z0-9_/])([@@]+)([a-zA-Z0-9_]{1,20}/[a-zA-Z][a-zA-Z0-9\p{IsLatin-1Supplement}\-]{0,79})",
- "$1$2<a href=\"/$3\">$3</a>");
-
- var m = Regex.Match(retStr, "(^|[^a-zA-Z0-9_])[@@]([a-zA-Z0-9_]{1,20})");
- while (m.Success)
- {
- if (!AtList.Contains(m.Result("$2").ToLower())) AtList.Add(m.Result("$2").ToLower());
- m = m.NextMatch();
- }
- //@先をリンクに置換
- retStr = Regex.Replace(retStr,
- "(^|[^a-zA-Z0-9_/])([@@])([a-zA-Z0-9_]{1,20})",
- "$1$2<a href=\"/$3\">$3</a>");
-
- //ハッシュタグを抽出し、リンクに置換
- var anchorRange = new List<range>();
- for (int i = 0; i < retStr.Length; i++)
- {
- var index = retStr.IndexOf("<a ", i);
- if (index > -1 && index < retStr.Length)
- {
- i = index;
- var toIndex = retStr.IndexOf("</a>", index);
- if (toIndex > -1)
- {
- anchorRange.Add(new range(index, toIndex + 3));
- i = toIndex;
- }
- }
- }
- //retStr = Regex.Replace(retStr,
- // "(^|[^a-zA-Z0-9/&])([##])([0-9a-zA-Z_]*[a-zA-Z_]+[a-zA-Z0-9_\xc0-\xd6\xd8-\xf6\xf8-\xff]*)",
- // new MatchEvaluator(Function(mh As Match)
- // foreach (var rng in anchorRange)
- // {
- // if (mh.Index >= rng.fromIndex &&
- // mh.Index <= rng.toIndex) return mh.Result("$0");
- // }
- // if (IsNumeric(mh.Result("$3"))) return mh.Result("$0");
- // lock (LockObj)
- // {
- // _hashList.Add("#" + mh.Result("$3"))
- // }
- // return mh.Result("$1") + "<a href=\"" + _protocol + "twitter.com/search?q=%23" + mh.Result("$3") + "\">" + mh.Result("$2$3") + "</a>";
- // }),
- // RegexOptions.IgnoreCase)
- retStr = Regex.Replace(retStr,
- HASHTAG,
- new MatchEvaluator(mh =>
- {
- foreach (var rng in anchorRange)
- {
- if (mh.Index >= rng.fromIndex &&
- mh.Index <= rng.toIndex) return mh.Result("$0");
- }
- lock (LockObj)
- {
- _hashList.Add("#" + mh.Result("$3"));
- }
- return mh.Result("$1") + "<a href=\"https://twitter.com/search?q=%23" + mh.Result("$3") + "\">" + mh.Result("$2$3") + "</a>";
- }),
- RegexOptions.IgnoreCase);
-
-
- retStr = Regex.Replace(retStr, "(^|[^a-zA-Z0-9_/&##@@>=.~])(sm|nm)([0-9]{1,10})", "$1<a href=\"http://www.nicovideo.jp/watch/$2$3\">$2$3</a>");
-
- retStr = retStr.Replace("<<<<<tweenだいなり>>>>>", ">").Replace("<<<<<tweenしょうなり>>>>>", "<");
-
- //retStr = AdjustHtml(ShortUrl.Resolve(PreProcessUrl(retStr), true)) //IDN置換、短縮Uri解決、@リンクを相対→絶対にしてtarget属性付与
- retStr = AdjustHtml(PreProcessUrl(retStr)); //IDN置換、短縮Uri解決、@リンクを相対→絶対にしてtarget属性付与
- return retStr;
- }
-
public async Task<string> CreateHtmlAnchorAsync(string text, List<string> AtList, TwitterEntities entities, List<MediaInfo> media)
{
if (entities != null)