#! /usr/local/bin/ruby
require 'nokogiri'
require 'open-uri'
c = 0
puts DateTime.now.to_s
begin
while true do
#Nokogiri::HTML(open("wwww.example.com/"))
data = open("http://www.example.com"){|f| f.read}
c = c + 1
print '.'
end
rescue OpenURI::HTTPError => e
$stderr.puts e.to_s
puts c.to_s
puts DateTime.now.to_s
end
平均就 60 秒 100 多一点和浏览器比起来,简直就是龟速啊。有的站点,还会遇到 403 Forbidden。
用 em-http-request 写了一个测试用例,感觉比 open-uri 还慢样
#! /usr/local/bin/ruby
require 'eventmachine'
require 'em-http'
require 'nokogiri'
@count = 0
@topic_ids =%w{14854 11168 14769 14875}
@conn = EventMachine::HttpRequest.new('http://www.ruby-china.org')
EventMachine.run{
@topic_ids.each do |id|
options = {
:redirects => 5,
:keepalive => true,
:path => "",
:head => {
'accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'accept-encoding' => 'gzip,deflate,sdch',
'accept-language' => 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4',
'cache-control' => 'max-age=0',
'user-agent' => 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.76 Safari/537.36'
}
}
options[:path] = "/topics/#{id}"
req1 = @conn.get options
req1.errback{|req|
@count = @count + 1; EventMachine.stop if @topic_ids.length == @count
}
req1.callback{
#doc = Nokogiri::HTML(req1.response)
puts req1.response
@count = @count + 1; EventMachine.stop if @topic_ids.length == @count
}
end
}