OSDN Git Service

set encoding
[tdcgexplorer/nimono-crawlers.git] / lib / sn_uploader.rb
1 # encoding: utf-8
2
3 require 'net/http'
4 require 'erb'
5 require 'nkf'
6 require 'user_agent'
7
8 class SnUploader
9   attr_accessor :host, :root_path, :jcode, :dlkey, :authorization, :local_dir, :base_html
10
11   def initialize
12     @jcode = ERB::Util.u(NKF.nkf('-Ws', "漢字"))
13     @dlkey = "custom"
14     @authorization = nil
15     @local_dir = "."
16     @base_html = "upload.html"
17   end
18
19   def kcode=(kcode)
20     case kcode
21     when /^U/i
22       @jcode = ERB::Util.u("漢字")
23     end
24   end
25
26   def header
27     header = UserAgent.default_http_header.dup
28     header["Authorization"] = authorization if authorization
29     header
30   end
31
32   def local_path(path)
33     File.join(local_dir, File.basename(path))
34   end
35
36   def local_file_exist?(filename)
37     File.exist?(local_path(filename))
38   end
39
40   def local_file_match?(filename_re)
41     Dir.foreach(local_dir) do |ent|
42       if md = filename_re.match(ent)
43         return md[0]
44       end
45     end
46     false
47   end
48
49   def download(fileno, filename_re = nil)
50     if filename_re && local_file_match?(filename_re)
51       return nil
52     end
53
54     Net::HTTP.new(host).start do |http|
55       response = http.post("#{root_path}/upload.cgi", "file=#{fileno}&jcode=#{jcode}&mode=dl&dlkey=#{dlkey}", header)
56       body = response.body
57
58       if md = /"1;URL=(.+?)"/.match(body)
59         path = md[1]
60         path.sub!(/\A\./, root_path)
61       end
62       puts path
63       if path
64         open(local_path(path), "wb") do |file|
65           response = http.get(path, header) do |data|
66             file.write data
67           end
68         end
69       end
70     end
71   end
72
73   def download_file_through_counter(filename)
74     if local_file_exist?(filename)
75       return nil
76     end
77
78     Net::HTTP.new(host).start do |http|
79       response = http.get("#{root_path}/src/#{filename}.html", header)
80       file_re = Regexp.new(Regexp.escape('count.cgi?') + '(.+)')
81       text = response.body
82       text.scan(/href="(.+?)"/) do |href, |
83         if md = file_re.match(href)
84           path = root_path + '/src/' + md[1]
85           open(local_path(filename), "wb") do |file|
86             response = http.get(path, header) do |data|
87               file.write data
88             end
89           end
90         end
91       end
92     end
93   end
94
95   def download_file_through_clicker(filename)
96     if local_file_exist?(filename)
97       return nil
98     end
99
100     http = Net::HTTP.new(host)
101     http.open_timeout = 10
102     http.read_timeout = 20
103     begin
104       http.start do
105         path = "#{root_path}/src/#{filename}"
106         open(local_path(filename), "wb") do |file|
107           header = header()
108           header["Referer"] = "http://#{host}/#{root_path}/src/#{filename}.html"
109
110           response = http.get(path, header) do |data|
111             file.write data
112           end
113         end
114       end
115     rescue Timeout::Error
116       puts "timeout..."
117       sleep(20)
118       puts "retry"
119       retry
120     end
121   end
122
123   def download_file(filename)
124     if local_file_exist?(filename)
125       return nil
126     end
127
128     Net::HTTP.new(host).start do |http|
129       response = http.get("#{root_path}/src/#{filename}.html", header)
130       body = response.body
131       if md = /href="(.+?)"/.match(body)
132         path = md[1]
133         path.sub!(/\A\./, root_path + "/src")
134       end
135       puts path
136       if path
137         open(local_path(path), "wb") do |file|
138           response = http.get(path, header) do |data|
139             file.write data
140           end
141         end
142       end
143     end
144   end
145
146   def download_file_directly(filename, dir = '/src')
147     if local_file_exist?(filename)
148       return nil
149     end
150
151     path = "#{root_path}#{dir}/#{filename}"
152     puts path
153     Net::HTTP.new(host).start do |http|
154       open(local_path(path), "wb") do |file|
155         response = http.get(path, header) do |data|
156           file.write data
157         end
158       end
159     end
160   end
161
162   def download_file_directly_id(filename)
163     if local_file_exist?(filename)
164       return nil
165     end
166
167     path = "#{root_path}/?id=#{filename}"
168     puts path
169     http = Net::HTTP.new(host)
170     http.open_timeout = 10
171     http.read_timeout = 20
172     begin
173       http.start do
174         open(local_path(filename), "wb") do |file|
175           response = http.get(path, header) do |data|
176             file.write data
177           end
178         end
179       end
180     rescue Timeout::Error
181       puts "timeout..."
182       sleep(20)
183       puts "retry"
184       retry
185     end
186   end
187
188   def get_base
189     body = nil
190     Net::HTTP.new(host).start do |http|
191       response = http.get("#{root_path}/#{base_html}", header)
192       # response.each do |name, value|
193       #   puts [name, value].join(': ')
194       # end
195       # puts
196       body = response.body
197     end
198     body
199   end
200 end