OSDN Git Service

be9decf3b9545bf06b84c0b6a89d6a54edf385fc
[tdcgexplorer/nimono-crawlers.git] / lib / bowl.rb
1 require 'net/http'
2 require 'rss'
3
4 class Bowl
5   attr_reader :code
6
7   def self.rss_path
8     "/Volumes/uploader/src/mmdbowl/bowlroll.atom"
9   end
10
11   def self.save_rss
12     http = Net::HTTP.new('bowlroll.net')
13     http.start do
14       http.request_get('/feed/up?feed=ATOM&count=20&tab=MikuMikuDance') do |response|
15         open(rss_path, 'wb') { |f|
16           response.read_body { |buf| f.write buf }
17         }
18       end
19     end
20   end
21
22   def self.each_code_in_rss
23     feed = RSS::Parser.parse(IO.read(rss_path))
24     feed.items.each do |item|
25       code = /(dl\d+)\z/.match(item.link.href)[1]
26       yield code
27     end
28   end
29
30   def src_path
31     "/Volumes/uploader/src/mmdbowl/#{code}.html"
32   end
33
34   def save_src_0
35     http = Net::HTTP.new('bowlroll.net')
36     http.start do
37       http.request_get("/up/#{code}") do |response|
38         open(src_path, 'wb') { |f|
39           response.read_body { |buf| f.write buf }
40         }
41       end
42     end
43   end
44
45   def save_src
46     if File.exist? src_path
47       return false
48     end
49     save_src_0
50     true
51   end
52
53   def initialize(code)
54     @code = code
55   end
56
57   def self.load(code)
58     instance = new(code)
59     instance.load
60     instance
61   end
62
63   def load
64     @source = IO.read(src_path)
65   end
66
67   def removed?
68     !!/削除されました/.match(@source)
69   end
70
71   def denied?
72     !!/アクセス制限が/.match(@source)
73   end
74
75   def locked?
76     !!/認証キーを入力/.match(@source)
77   end
78
79   def summary
80     %r(<meta property="og:title" content="(.+?)">).match(@source)[1]
81   end
82
83   def tabs
84     keywords.split(/,/)[1..-1]
85   end
86
87   def keywords
88     %r(<meta name="keywords" content="(.+?)">).match(@source)[1]
89   end
90
91   def description
92     %r(<meta name="description" content="(.+?)">).match(@source)[1]
93   end
94
95   def extname
96     File.extname(origname)[1..-1]
97   end
98
99   def origname
100     %r(<h3><b>ファイル名</b>&nbsp;:&nbsp;(.+?)</h3>).match(@source)[1]
101   end
102
103   def boundary
104     "myboundary"
105   end
106
107   def request_header
108     header = {}
109     header["user-agent"] = "Mozilla/5.0 (Windows; U; Windows NT 6.0; ja; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10 (.NET CLR 3.5.30729)"
110     header["accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
111     header["accept-language"] = "ja,en-us;q=0.7,en;q=0.3"
112     header["accept-charset"] = "Shift_JIS,utf-8;q=0.7,*;q=0.7"
113     header["referer"] = "http://bowlroll.net/up/#{code}"
114     # header["content-type"] = "multipart/form-data; boundary=#{boundary}"
115     header
116   end
117
118   def request_body
119     "upDlVal=&upDlNum=#{number}"
120   end
121
122   def number
123     /dl(\d+)/.match(code)[1].to_i
124   end
125
126   def name
127     "#{code}.#{extname}"
128   end
129
130   def arc_path
131     "/Volumes/uploader/arc/mmdbowl/#{name}"
132   end
133
134   def save_arc_0
135     response_code = nil
136     http = Net::HTTP.new('bowlroll.net')
137     http.start do
138       request = Net::HTTP::Post.new('/Php/p3UpDlTop.php')
139
140       request_header.each do |key, value|
141         request[key] = value
142       end
143       request.body = request_body
144
145       http.request(request) do |response|
146         p response.code
147         p response['content-type']
148         response_code = response.code.to_i
149         if response_code == 200
150           open(arc_path, 'wb') { |f|
151             response.read_body { |buf| f.write buf }
152           }
153         end
154       end
155     end
156     response_code == 200
157   end
158
159   def save_arc
160     if removed? || denied? || locked?
161       return false
162     end
163     if File.exist? arc_path
164       return false
165     end
166     save_arc_0
167   end
168 end