代码不多,就此贴下,轻喷。无聊时打发下时光。 ruby baike.rb -i 页数 -p 代理
#encoding: utf-8
require "httpclient"
require "nokogiri"
def spidr(index=1)
page = @@client.get_content("http://www.qiushibaike.com/8hr/page/#{index}?s=4593955")
document = Nokogiri.parse(page)
div_col1 = document.css("div.col1").first
div_list = div_col1.css("div.block")
puts "item size #{div_list.size}"
div_list.each do |item|
title = item.css("div.detail").first.css("a").first.content
content = item.css("div.content").first.content
thumb = item.css("div.thumb").first
puts "---------------------------------------------------------"
puts title
puts content
if thumb
src = thumb.css("img").first["src"]
resp = @@client.get src
filename = "./baike/#{Time.now.to_i}.jpg"
File.open(filename,"wb") do |file|
file.write resp.body
file.close
system "start #{filename}"
end
end
puts "---------------------------------------------------------"
gets
end
end
default_proxy = "http://192.168.2.135:808"
config = {}
start_index = 1
loop do
p = ARGV.shift
if p == nil
break
elsif p =~ /[\-]{0,2}p/
proxy = ARGV.shift
config[:proxy] = proxy == "default" ? default_proxy : proxy
elsif p =~ /[\-]{0,2}/i
start_index = ARGV.shift.to_i
end
end
Dir.mkdir("./baike") unless Dir.exist?("./baike")
@@client = HTTPClient.new config
@page = 1
loop do
spidr(@page)
@page+=1
end