,全局安装typescript:npminstall-gtypescript目前是2.0.3版本。这个版本不再需要使用typings命令。不过vscode捆绑的版本是1.8,需要做一些配置工作,参见本文解决方案。测试tsc命令:tsc创建要写入的程序项目文件夹:mkdirtest-typescript-spider进入文件夹:cdtest-typescript-spider初始化项目:npminit安装superagent和cheerio模块:npmi--savesuperagentcheerio安装对应的类型声明模块:npmi-s@types/superagent--savenpmi-s@types/cheerio--save在项目中安装typescript(这一步必须走):npmi--savetypescript用vscode打开项目文件夹。在该文件夹下创建一个tsconfig.json文件,将如下配置代码复制到其中:{"compilerOptions":{"target":"ES6","module":"commonjs","noEmitOnError":true,"noImplicitAny":true,"experimentalDecorators":true,"sourceMap":false,//"sourceRoot":"./","outDir":"./out"},"exclude":["node_modules"]}在vscode中打开settings.json中添加"文件"-"***"-"工作区设置"(如果不做这个配置,打开项目时vscode会提示选择哪个版本的typescript):{"typescript.tsdk":"node_modules/typescript/lib"}创建api.ts文件,将以下代码复制到其中:importsuperagent=require('superagent');importcheerio=require('cheerio');exportconstremote_get=function(url:string){constpromise=newPromise(function(resolve,reject){superagent.get(url).end(function(err,res){if(!err){resolve(res);}else{console.log(err)reject(err);}});});returnpromise;}创建app.ts文件并编写测试代码:importapi=require('./api');constgo=async()=>{letres=awaitapi.remote_get('http://www.baidu.com/');console.log(res.text);}go();执行命令:tsc然后:nodeout/app观察输出是否正确现在尝试抓取http://cnodejs.org/***页面上的文章链接。修改app.ts文件,代码如下:importapi=require('./api');importcheerio=require('cheerio');constgo=async()=>{constres=awaitapi.remote_get('http://cnodejs.org/');const$=cheerio.load(res.text);leturls:string[]=[];lettitles:string[]=[];$('.topic_title_wrapper').each((index,element)=>{titles.push($(element).find('.topic_title').first().text().trim());urls.push('http://cnodejs.org/'+$(element).find('.topic_title').first().attr('href'));})console.log(titles,urls);}go();观察输出,文章的标题和链接已经获取到了。现在尝试深入获取文章内容/');const$=cheerio.load(res.text);$('.topic_title_wrapper').each(async(index,element)=>{leturl=('http://cnodejs.org'+$(元素).find('.topic_title').first().attr('href'));constres_content=awaitapi.remote_get(url);const$_content=cheerio.load(res_content.text);console.log($_content('.topic_content').first().text());})}go();可以发现因为访问服务器速度太快,出现了很多503错误。解决:添加helper.ts文件:exportconstwait_seconds=function(senconds:number){returnnewPromise(resolve=>setTimeout(resolve,senconds*1000));}修改api.ts文件为:importsuperagent=require('superagent');importcheerio=require('cheerio');exportconstget_index_urls=function(){constres=awaitremote_get('http://cnodejs.org/');const$=cheerio.load(res.text);leturls:string[]=[];$('.topic_title_wrapper').each(async(index,element)=>{urls.push('http://cnodejs.org'+$(element).find('.topic_title').first().attr('href'));});returnurls;}exportconstget_content=asyncfunction(url:string){constres=awaitremote_get(url);const$=cheerio.load(res.text);return$('.topic_content').first().text();}exportconstremote_get=function(url:string){constpromise=newPromise(function(resolve,reject){superagent.get(url).end(function(err,res){if(!err){resolve(res);}else{console.log(err)reject(err);}});});returnpromise;}修改app.ts文件为:importapi=require('./api');importhelper=require('./helper');importcheerio=require('cheerio');constgo=async()=>{leturls=awaitapi.get_index_urls();for(leti=0;i("文章",ArticleSchema);导出=文章;修改api.ts为:importsuperagent=require('superagent');importcheerio=require('cheerio');importmodels=require('./models');constArticle=models.Article;exportconstget_index_urls=asyncfunction(){constrs=awaitremote_get('http://cnodejs.org/');const$=cheerio.load(res.text);leturls:string[]=[];$('.topic_title_wrapper').each((index,element)=>{urls.push('http://cnodejs.org'+$(element).find('.topic_title').first().attr('href'));});returnurls;}exportconstfetch_content=asyncfunction(url:string){constres=awaitremote_get(url);const$=cheerio.load(res.text);letarticle=newArticle();article.text=$('.topic_content').first().text();article.title=$('.topic_full_title').first().text().replace('置顶','').replace('精华','').trim();article.url=url;console.log('获取成功:'+article.title);article.save();}exportconstremote_get=function(url:string){returnnewPromise((resolve,reject)=>{superagent.get(url).end(function(err,res){if(!err){resolve(res);}else{reject(err);}});});}修改app.ts为:importapi=require('./api');importhelper=require('./helper');importcheerio=require('cheerio');(async()=>{try{leturls=awaitapi.get_index_urls();for(leti=0;i(function(resolve,reject){console.log('get:'+url+',usingproxy:'+proxy);letoptions:request.CoreOptions={headers:{'Cookie':'','User-Agent':'Mozilla/5.0(WindowsNT10.0;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/53.0.2785.143Safari/537.36','Referer':'https://www.baidu.com/'},encoding:'utf-8',method:'GET',proxy:proxy,timeout:3000,}request(url,选项,asyncfunction(err,response,body){console.log('got:'+url);if(!err){body=body.toString();current_retry=config.retries||0;console.log('bytes:'+body.length);resolve(body);}else{console.log(err);if(current_retry<=0){current_retry=config.retries||0;reject(err);}else{console.log('retry...('+current_retry+')')current_retry--;try{letbody=awaitremote_get(url,proxy);resolve(body);}catch(e){reject(e);}}}});});returnpromise;}另外,IArticle.ts和Article.ts合并为一个文件,这样可能会更好。可以参考我的另一个模型:importmongoose=require('mongoose');interfaceIProxyModel{uri:string;ip:string;port:string;info:string;}exportinterfaceIProxyextendsIProxyModel,mongoose.Document{}constProxySchema=newmongoose.Schema({uri:{type:String},//ip:{type:String},//端口:{type:String},//信息:{type:String},//});exportconstProxy=mongoose.model("Proxy",ProxySchema);导入的时候就这样写:import{IProxy,Proxy}from'./models';whereProxy可用于new、find、where等操作:letx=newProxy();letxx=awaitProxy.find({});letxxx=awaitProxy.where('aaa',123).exec();而IProxy用于实体对象的传递,如functionxxx(p:IProxy){}