OSDN Git Service

added sn_uploader
[tdcgexplorer/nimono-crawlers.git] / lib / sn_uploader.rb
1 require 'net/http'
2 require 'erb'
3 require 'nkf'
4 require 'user_agent'
5
6 class SnUploader
7   attr_accessor :host, :root_path, :jcode, :dlkey, :authorization, :local_dir, :base_html
8
9   def initialize
10     @jcode = ERB::Util.u("\8a¿\8e\9a")
11     @dlkey = "custom"
12     @authorization = nil
13     @local_dir = "."
14     @base_html = "upload.html"
15   end
16
17   def kcode=(kcode)
18     case kcode
19     when /^U/i
20       @jcode = ERB::Util.u(NKF.nkf('-Sw', "\8a¿\8e\9a"))
21     end
22   end
23
24   def header
25     header = UserAgent.default_http_header.dup
26     header["Authorization"] = authorization if authorization
27     header
28   end
29
30   def local_path(path)
31     File.join(local_dir, File.basename(path))
32   end
33
34   def local_file_exist?(filename)
35     File.exist?(local_path(filename))
36   end
37
38   def local_file_match?(filename_re)
39     Dir.foreach(local_dir) do |ent|
40       if md = filename_re.match(ent)
41         return md[0]
42       end
43     end
44     false
45   end
46
47   def download(fileno, filename_re = nil)
48     if filename_re && local_file_exist?(filename_re)
49       return nil
50     end
51
52     Net::HTTP.new(host).start do |http|
53       response = http.post("#{root_path}/upload.cgi", "file=#{fileno}&jcode=#{jcode}&mode=dl&dlkey=#{dlkey}", header)
54       body = response.body
55
56       if md = /"1;URL=(.+?)"/.match(body)
57         path = md[1]
58         path.sub!(/\A\./, root_path)
59       end
60       puts path
61       if path
62         open(local_path(path), "wb") do |file|
63           response = http.get(path, header) do |data|
64             file.write data
65           end
66         end
67       end
68     end
69   end
70
71   def download_file_through_counter(filename)
72     if local_file_exist?(filename)
73       return nil
74     end
75
76     Net::HTTP.new(host).start do |http|
77       response = http.get("#{root_path}/src/#{filename}.html", header)
78       file_re = Regexp.new(Regexp.escape('count.cgi?') + '(.+)')
79       text = response.body
80       text.scan(/href="(.+?)"/) do |href, |
81         if md = file_re.match(href)
82           path = root_path + '/src/' + md[1]
83           open(local_path(filename), "wb") do |file|
84             response = http.get(path, header) do |data|
85               file.write data
86             end
87           end
88         end
89       end
90     end
91   end
92
93   def download_file(filename)
94     if local_file_exist?(filename)
95       return nil
96     end
97
98     Net::HTTP.new(host).start do |http|
99       response = http.get("#{root_path}/src/#{filename}.html", header)
100       body = response.body
101       if md = /href="(.+?)"/.match(body)
102         path = md[1]
103         path.sub!(/\A\./, root_path + "/src")
104       end
105       puts path
106       if path
107         open(local_path(path), "wb") do |file|
108           response = http.get(path, header) do |data|
109             file.write data
110           end
111         end
112       end
113     end
114   end
115
116   def download_file_directly(filename, dir = '/src')
117     if local_file_exist?(filename)
118       return nil
119     end
120
121     path = "#{root_path}#{dir}/#{filename}"
122     puts path
123     Net::HTTP.new(host).start do |http|
124       open(local_path(path), "wb") do |file|
125         response = http.get(path, header) do |data|
126           file.write data
127         end
128       end
129     end
130   end
131
132   def download_file_directly_id(filename)
133     if local_file_exist?(filename)
134       return nil
135     end
136
137     path = "#{root_path}/?id=#{filename}"
138     puts path
139     http = Net::HTTP.new(host)
140     http.open_timeout = 10
141     http.read_timeout = 20
142     begin
143       http.start do
144         open(local_path(filename), "wb") do |file|
145           response = http.get(path, header) do |data|
146             file.write data
147           end
148         end
149       end
150     rescue Timeout::Error
151       puts "timeout..."
152       sleep(20)
153       puts "retry"
154       retry
155     end
156   end
157
158   def get_base
159     body = nil
160     Net::HTTP.new(host).start do |http|
161       response = http.get("#{root_path}/#{base_html}", header)
162       # response.each do |name, value|
163       #   puts [name, value].join(': ')
164       # end
165       # puts
166       body = response.body
167     end
168     body
169   end
170 end