OSDN Git Service

read zip on scrap
[tdcgexplorer/nimono-crawlers.git] / bin / 3ch-idx.rb
1 #!/usr/bin/ruby
2 # download from 3ch
3 # http://www.esc-j.net/tech-arts/ta3dc/t1a931d9c1s9.html
4
5 $LOAD_PATH.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
6 require 'sn_uploader'
7
8 def each_fileno(text)
9   file_href = 'http://www.esc-j.net/tech-arts/ta3dc/upload.cgi?mode=dl&file='
10   file_re = Regexp.new(Regexp.escape(file_href) + '(\d+)')
11   text.scan(/href="(.+?)"/) do |href, |
12     if md = file_re.match(href)
13       yield md[1]
14     end
15   end
16 end
17
18 uploader = SnUploader.new
19 uploader.host = "www.esc-j.net"
20 uploader.root_path = "/tech-arts/ta3dc"
21 uploader.base_html = "t1a931d9c1s9.html"
22 uploader.kcode = 'U'
23 uploader.authorization = "Basic " + ["tech:mybride"].pack('m').chomp
24 uploader.local_dir = '/Volumes/uploader/arc/3ch'
25 body = uploader.get_base
26 each_fileno(body) do |fileno|
27   basename_without_extension = "TA3CH%04d" % fileno.to_i
28   filename_re = Regexp.new('\A' + Regexp.escape(basename_without_extension) + '\.')
29   exist = uploader.local_file_match?(filename_re)
30   puts [ fileno, exist ? 'exist' : 'download' ].join("\t")
31   uploader.download(fileno) unless exist
32 end