1 // OpenTween - Client of Twitter
2 // Copyright (c) 2012 kim_upsilon (@kim_upsilon) <https://upsilo.net/~upsilon/>
3 // All rights reserved.
5 // This file is part of OpenTween.
7 // This program is free software; you can redistribute it and/or modify it
8 // under the terms of the GNU General Public License as published by the Free
9 // Software Foundation; either version 3 of the License, or (at your option)
12 // This program is distributed in the hope that it will be useful, but
13 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 // You should have received a copy of the GNU General Public License along
18 // with this program. If not, see <http://www.gnu.org/licenses/>, or write to
19 // the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
20 // Boston, MA 02110-1301, USA.
23 using System.Collections.Generic;
26 using System.Net.Http;
28 using System.Text.RegularExpressions;
29 using System.Threading;
30 using System.Threading.Tasks;
32 namespace OpenTween.Thumbnail.Services
35 /// og:image や twitter:image をスクレイピングしてサムネイルURLを抽出する
37 class MetaThumbnailService : IThumbnailService
39 protected static Regex metaPattern = new Regex("<meta (name|property)=[\"'](?<name>.+?)[\"'] (content|value)=[\"'](?<content>.+?)[\"']");
40 protected static string[] propertyNames = { "twitter:image", "og:image" };
42 protected HttpClient http
44 get { return this.localHttpClient ?? HttpConnection.GlobalHttpClient; }
46 private readonly HttpClient localHttpClient;
48 protected readonly Regex regex;
50 public MetaThumbnailService(string urlPattern)
51 : this(null, urlPattern)
55 public MetaThumbnailService(HttpClient http, string urlPattern)
57 this.localHttpClient = http;
58 this.regex = new Regex(urlPattern);
61 public override async Task<ThumbnailInfo> GetThumbnailInfoAsync(string url, PostClass post, CancellationToken token)
63 if (!this.regex.IsMatch(url))
68 var content = await this.FetchImageUrlAsync(url, token)
69 .ConfigureAwait(false);
71 var thumbnailUrl = this.GetThumbnailUrl(content);
72 if (string.IsNullOrEmpty(thumbnailUrl)) return null;
74 return new ThumbnailInfo
77 ThumbnailUrl = thumbnailUrl,
81 catch (HttpRequestException) { }
86 protected virtual string GetThumbnailUrl(string html)
88 var matches = MetaThumbnailService.metaPattern.Matches(html);
90 foreach (Match match in matches)
92 var propertyName = match.Groups["name"].Value;
93 if (MetaThumbnailService.propertyNames.Contains(propertyName))
95 return match.Groups["content"].Value;
102 protected virtual async Task<string> FetchImageUrlAsync(string url, CancellationToken token)
104 using (var response = await this.http.GetAsync(url, token).ConfigureAwait(false))
106 response.EnsureSuccessStatusCode();
108 return await response.Content.ReadAsStringAsync()
109 .ConfigureAwait(false);