当前位置: 首页 > 后端技术 > Node.js

纯属娱乐——写个爬虫抓取whois信息

时间:2023-04-03 13:31:01 Node.js

爬取的目标对象和过程是西部数据。网站可以在https://www.west.cn/web/whois...查询whois信息,通过chrome调试,我们知道数据是从接口获取的:https://www.west.cn/web/whois...请求的cookie分析发现keyqtoken2016是最重要的(反爬虫标记,我以前写的时候还是qtoken),这个token的生成不在这个页面,但是在https://www.west.cn/main/whoi...点击查看本页面whois时,发送请求:https://www.west.cn/services/...生成新的token,结果混淆js代码:varl=[119,98,115,33,117,116,101,112,98,62,92,50,50,54,45,50,49,50,45,50,50,52,45,50,50,49,45,50,50,55,45,50,51,51,45,50,49,58,45,50,50,54,45,50,50,52,45,50,49,55,45,50,50,54,45,50,49,50,45,50,50,51,45,50,50,54,45,50,50,52,45,50,50,51,45,50,51,51,45,50,50,51,45,50,50,58,45,50,49,55,45,50,50,55,45,50,49,50,94,60,119,98,115,33,101,99,105,107,114,62,92,57,45,53,45,50,56,45,50,49,45,50,50,45,50,57,45,58,45,50,51,45,51,49,45,50,54,45,49,45,50,52,45,55,45,54,45,50,55,45,51,50,45,52,45,50,58,45,50,53,45,50,45,56,45,51,94,60,119,98,115,33,99,62,35,35,60,103,112,115,33,41,100,62,49,60,100,61,101,99,105,107,114,47,109,102,111,104,117,105,60,100,44,44,42,124,99,44,62,84,117,115,106,111,104,47,103,115,112,110,68,105,98,115,68,112,101,102,41,117,116,101,112,98,92,101,99,105,107,114,92,100,94,94,42,126,60,37,47,100,112,112,108,106,102,41,40,114,117,112,108,102,111,51,49,50,55,40,45,99,45,124,113,98,117,105,59,40,48,40,126,42,60];eval(函数(p,a,c,k,e,d){e=function(c){return(c35?String.fromCharCode(c+29):c.toString(36))};if(!''.replace(/^/,String)){while(c--)d[e(c)]=k[c]||e(c);k=[function(e){returnd[e]}];e=function(){return'\\w+'};c=1;};while(c--)if(k[c])p=p。replace(newRegExp('\\b'+e(c)+'\\b','g'),k[c]);返回p;}('63=\'\';??7(2=0;2<4.5;2++){3+=8.a(4[2]-1)};9(3)',11,11,'||i|t|l|length|var|for|String|eval|fromCharCode'.split('|'),0,{}))对于这一点,可以通过模拟js的eval函数来解决代码/***由salamander于2016/11/8创建。*/letrequest=require('request');letQ=require('q');letdatetime=require('locutus/php/datetime');letgetTokenUrl='http://www.west.cn/main/whois.asp?act=gettok&_=';letwhoisUrl='http://www.west.cn/web/whois/whoisinfo?domain=';//必填字段letneedFields=['domain','registrar','country','mail','whoisinfo','add_time','registrant_name','expire_date'];//模仿jqueryletjQuery,$;$=jQuery={token:''};jQuery.cookie=function(name,value,options){this.token=value;};/***获取西数whois信息*@paramdomain域名*@paramproxyproxy*/functiongetWestWhois(domain,proxy){letdefer=Q.延迟();让firstOptions={url:getTokenUrl+(newDate()).getTime()};如果(代理){firstOptions.proxy='http://'+proxy.trim();}request(firstOptions,function(error,response,body){if(!error&&response.statusCode===200){//模拟执行js代码尝试{评估(身体);}catch(err){defer.reject('解析json错误:'+err);返回;}if($.token){letoptions={url:whoisUrl+domain+'&server=&refresh=1',headers:{'Cookie':'qtoken='+$.token,'X-Requested-With':'XMLHttpRequest'}};//添加代理if(proxy){options.proxy='http://'+proxy.trim();}request(options,function(error,response,body){if(!error&&response.statusCode===200){letdata=null;try{data=JSON.parse(body);}catch(err){defer.reject('解析json错误:'+err);返回;}if(data['code']===200){defer.resolve(extractWestData(domain,data));}else{defer.reject('查询西数whois失败')}}else{defer.reject('西数whois请求失败');}})}else{defer.reject('生成令牌失败');返回延迟承诺;}}else{defer.reject(错误);}});returndefer.promise;}/***提取西部数据数据*@paramdomain*@paramdata*/functionextractWestData(domain,data){letcountry=solveCountry(domain,data['body']);返回{domain:domain,mail:data['dom_em'],errcode:0,country:country,registrant_name:data['dom_org'],registrar:data['registrer'],expire:data['expdate'],whoisinfo:JSON.stringify({domain:domain,mail:data['dom_em'],errcode:0,country:country,registrant_name:data['dom_org'],registrar:data['registrer'],过期:数据['expdate']})};functionsolveCountry(domain,html){if(domain&&domain.substr(-1,3)==='.cn'){返回'CN';}letresult=html.match(/注册国家:(\S+?)/);如果(结果){返回结果[1].trim();}返回'';}}module.exports.getWhois=getWhois;使用letwestWhois=require('./west_whois.js');westWhois.getWhois('51nazi.com').then((info)=>{console.log(info);});结果:又一个51nazi域名.com是我的,打算卖掉