package.json{"name":"Spider","version":"1.0.0","description":"spider","main":"index.js","dependencies":{"async":"^1.5.0","cheerio":"^0.19.0","eventproxy":"^0.3.4","superagent":"^1.4.0"},"devDependencies":{},"scripts":{"test":"nodeindex","start":"nodeserver.js"}}server.jsvarhttp=require("http");varcheerio=require("cheerio");varfs=require('fs');//下载URL并使用数据调用//回调的实用函数downloadPage(url,callback){http.get(url,function(res){vardata="";res.on('data',function(chunk){data+=chunk;});res.on("end",function(){callback(data);});}).on("error",function(){callback(null);});}functionstart(){varurl='http://ac.qq.com/Comic/index/类型/4/页/';varurl2='http://ac.qq.com/ComicView/index/id/549690/cid/1';变量arr=[];for(vari=1;i<13;i++){downloadPage(url+i,function(data){if(data){var$=cheerio.load(data);$("div.ret-search-result>ul>li.ret-search-item").each(function(i,e){varjson={};json.tags=[];json.img=$(e).find('img').attr('data-original');json.link=$(e).find('a.mod-cover-list-thumb').attr('href');json.id=json.link.split('/').reverse()[0];json.title=$(e).find('h3.ret-works-title>a').text();json.author=$(e).find('p.ret-works-author').text();json.popular=$(e).find('p.ret-works-tags>span>em').text();json.description=$(e).find('p.ret-works-decs').text();$(e).find('p.ret-works-tags>a').each(函数(i,e){json.tags.push($(e).text());});下载图像(json.img);arr.push(json)console.log("完成");//console.log(arr)//fs.writeFileSync('./output.json',JSON.stringify(arr));//});})}})}}functiondownloadImg(url){console.log('string')http.get(url,function(res){varimgData="";res.setEncoding("binary");//成为一定要设置响应的编码为二进制,否则下载的图片打不开res.on("data",function(chunk){imgData+=chunk;});res.on("end",function(){vard=newDate();fs.writeFile("./downImgs/"+Math.floor(Math.random()*10000000)+'.jpg',imgData,"binary",function(err){如果(err){console.log(err);}console.log("downsuccess");});});});}exports.start=开始;最后一个是index.jsvarserver=require("./server");server.start();说明1、导入必要的模块,http、cheerio、fs2、downloadPage函数在回调中接收URL并处理数据。3.在start函数中,定义url数据源。这里我们使用腾讯动漫。4、for循环处理url数据内容。其中的downloadImg函数将图片保存到本地。
