@sleepTime = 20.0\r
@commitCount = 16\r
@savePath = if param[:path] then param[:path] else "./save" end\r
- @saveDB = if param[:db] then param[:db] else "./url.db3" end\r
+ @saveDB = if param[:db] then param[:db] else "./url.db3" end\r
@pendingURI = param[:pendingURI]\r
@pendingURI = nil if @pendingURI.to_s == ""\r
@list = URLtable.new(param={:path => @savePath, :db => @saveDB})\r
end\r
\r
def check_root(add)\r
- #p add.url.to_s\r
- #p add.referrer.to_s\r
@rootExp.each do |exp|\r
- # p exp\r
return true if exp.match(add.url.to_s)\r
return true if exp.match(add.referrer.to_s)\r
end\r
def add_nextpage_sub(path, dat)\r
return unless path\r
\r
- #p path\r
path = CGI::unescapeHTML(path.to_s)\r
return if /^(?:javascript|mailto|data|file|tel):/ni.match(path)\r
- #p "##"\r
uri = dat.uri\r
begin\r
- # p path\r
path = URI.parse( path.gsub(/[\x00-\x1F\x80-\xFF]/n){|x| '%'+x.unpack('H2')[0] } )\r
- # p path\r
rescue URI::InvalidURIError, URI::InvalidComponentError\r
# p "INV #{path}"\r
return\r
if check_cgiroot(add) then\r
add.linkCountCGI = dat.linkCountCGI+1\r
end\r
- ##add.message = path #for debug\r
\r
- #p "L #{newuri} #{add.linkCount}:#{add.linkCountCGI}"\r
return if add.linkCountCGI > @cgiMAXlink\r
\r
isroot = check_root(add)\r
- #p isroot\r
if isroot and (add.linkCount <= @rootMAXlink) then\r
- # p "U"\r
if @rootExp[0].match(add.url) or (@cgirootExp[0] and @cgirootExp[0].match(add.url)) then\r
add.priority = add.priority | 0x40000000\r
end\r
}\r
end\r
end\r
- #p path\r
end\r
\r
def add_nextpage(dat,response)\r
-=begin\r
- text = response.body\r
- #p text\r
- scriptmode = false\r
- exp = /(<script|<\/script)\b|\b(?:href|src)(?:\s*=\s*"([^\x22]*)"|=([^\x22\x27> ]+))|\burl\(([^\x29]*)\)/ni\r
- text.scan( exp ) do |t|\r
- curr=t.shift\r
- if curr=='<script'\r
- scriptmode = true\r
- end\r
- if curr=='</script'\r
- scriptmode = false\r
- end\r
- \r
- if !scriptmode then\r
- add_nextpage_sub(t[0], dat)\r
- add_nextpage_sub(t[1], dat)\r
- add_nextpage_sub(t[2], dat)\r
- end\r
- end\r
-=end\r
text = response.body\r
scriptmode = false\r
exp = /(<script|<\/script)\b|\b(href|src|value)(?:\s*=\s*"([^\x22]*)"|=([^\x22\x27> ]+))|\burl\(([^\x29]*)\)/ni\r