我想抓取淘宝的一 list 下的所有内容,然后,每个 link 已经获取,但是,当我用
hpricot 和 httpclient 去获取每个 link 的源码时,puts 出的是 js
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
function htmlspecialchars(str){ str = str.replace(//g, '>'); str = str.replace(/"/g, '"'); str = str.replace(/'/g, '''); return str; } function bol(){ var inf = (top.location!=self.location); var qs = location.search.split("?")[location.search.split("?").length-1].split("&"); qso = {}; for(var i=0;i<qs.length if var tmpa='qs[i].split("=");' qso tu="unescape(qso.tu);" tu.length exit oi='document.createElement("iframe");' oi.id="iobj" oi.border="0;" oi.frameborder="0;" oi.style.height="1px" oi.style.width="1px" document.body.appendchild cd="oi.contentWindow.document;" cd.write><bo style="margin:0px;padding:0px"><scr src="http://js.tongji.linezing.com/1023331/tongji.js" type="text/javascript"></scr><noscr><a href="http://www.linezing.com"><img src="http://img.tongji.linezing.com/1023331/tongji.gif"></a></noscr></bo>'); window.setTimeout(function(){cd.close();if(qso.tu)location.href = unescape(qso.tu);},1000); }else{ if(qso.co&&qso.co!=""){ document.cookie="tk_trace="+(qso.co)+";path=/;domain=.taobao.com"; } if(qso.tu && (qso.tu.indexOf("http%3A%2F%2Fs.click.taobao.com%2F")===0 || qso.tu.indexOf("http%3A%2F%2Fs.click.alimama.com%2F")===0 || qso.tu.indexOf("http%3A%2F%2Fitem8.taobao.com%2F")===0 || qso.tu.indexOf("http%3A%2F%2Fshop8.taobao.com%2F")===0)){ if(!window.attachEvent){ document.write('<input style="display:none" type="button" id="exe" value="" onclick="window.location=\''+unescape(qso.tu)+'\'">'); document.getElementById('exe').click(); }else{ document.write('<a style="display:none" href="'+unescape(qso.tu)+'" id="exe"></a>'); document.getElementById('exe').click(); } } } }//end of bol() bol(); </qs.length>
我想问下怎么能获取到 js 填充过后的源码,可以让我来搜索字段,谢谢