当前位置: 首页 > Web前端 > HTML

Node.js操作Dom,轻松hold住简单的爬虫

时间:2023-03-27 23:33:26 HTML

鍓嶈█鍓嶆鏃堕棿鍙戠幇浜嗕竴涓紑婧愰搴擄紝棰樼洰寰堟湁鎰忔€濄€傛垜鎯冲仛鎴愪竴涓狫SON鏂囦欢浣滀负鏁版嵁鍌ㄥ锛屾柟渚挎暣涓伐浣溿€備竴鍏?50澶氶亾棰橈紝鎴戣偗瀹氫笉鎯冲仛manualCV銆傛墍浠ユ垜鍐欎簡涓€涓剼鏈€傚湪鍐欒剼鏈殑杩囩▼涓紝鎴戝彂鐜颁簡涓€涓紑婧愰」鐩紝鍙互璁㎞ode.js鎿嶄綔Dom銆傛湁浜嗗畠锛屽啀鍔犱笂jQuery锛屽氨鍙互搴斿绠€鍗曠殑鐖櫕鎶撳彇鏁版嵁浜嗭紝鎵€浠ュ啓杩欑瘒鏂囩珷鍒嗕韩缁欏ぇ瀹躲€傛簮鐮佷粨搴撳湴鍧€锛欳atsAndMice/auto-script(github.com)瑙f瀽markdown鏂囦欢鑾峰彇鏁版嵁璇诲彇markdown鏂囦欢锛屽幓闄ゆ棤鐢ㄥ紑澶村唴瀹筩onstreadMd=(path)=>{letcontent=fs.readFileSync(path,{encoding:'utf-8'});content=content.split('---');//鍘绘帀寮€澶存棤鐢ㄧ殑鍐呭content.shift(0,1);returncontent;}markdown鏂囦欢涓殑姣忎竴娈靛唴瀹归兘鏄€斺€斿垎绂伙紝閭d箞鐩存帴鐢ㄥ畠鎶婃枃浠剁殑鍐呭鍒嗘垚鍑犲潡锛岀涓€鍧楀氨鏄紑澶寸殑鍐呭content.shift(0,1)鍘绘帀鍜屼娇鐢╩arkdown-it灏唌arkdown鍐呭娓叉煋鎴恏tml鍐呭锛屾鏃剁殑html鍙槸String锛宩sDom灏唄tml瀛楃涓茶浆鎴怐omconstmdIt=require('markdown-it')();constjsdom=require("jsdom");const{JSDOM}=jsdom;//...constparseMd=(md='')=>{constmdHtml=mdIt.render(md)constdom=newJSDOM(mdHtml)const{window}=dom杩斿洖绐楀彛}//...瀹屾垚markdown娓叉煋鎴恏tml瀛楃涓诧紝html瀛楃涓茶浆Dom鍚庯紝鐩存帴杩斿洖window鍙橀噺鍗冲彲銆傚紩鍏Query鎿嶄綔DomconstgetMdMapValue=(mds=[])=>{constmdMap=newMap();mds.forEach((md,index)=>{constid=index+1constwindow=parseMd(md)const$=require('jquery')(window);//...})returnArray.from(mdMap.values())}鎺ヤ笅鏉ヤ娇鐢?鏈夐拡瀵规€х殑鑾峰彇Dom鍐呭constgetMdMapValue=(mds=[])=>{constmdMap=newMap();mds.forEach((md,index)=>{constid=index+1constwindow=parseMd(md)const$=require('jquery')(window);//Newconstanswer=parseAnswer($('p'))constobj={id锛屾爣棰橈細$('h6').text()锛岀粨鏋滐細$('h4').text()锛屼唬鐮侊細$('.language-javascript').text(),answer}//濡傛灉閫夐」瑙f瀽澶辫触锛屽垯涓㈠純闂obj.options.push(option)杩斿洖}obj.options=[option]})mdMap.set(id,obj)}catch(error){console.warn('parsingerror:',error)}})returnArray.from(mdMap.values())}鍙︿竴绉嶉€昏緫鏄皢銆?em>绛夋爣绛捐浆鎹㈡垚`銆?*绛塵arkdown绗﹀彿鏉ュ鐞嗙浉鍏崇殑杈圭晫闂锛屽叿浣撲唬鐮佸氨涓嶈创浜嗭紝璇昏€呭彲浠ユ煡鐪嬫簮鐮佸啓涓皬鐖櫕.鎴戦€夋嫨鐖彇https://fabiaoqing.com/bqb/li...缃戠珯鐨勮〃鎯呭寘锛岄€昏緫寰堢畝鍗曪紝濡傛灉浣犵埇瀹屽嚑鍗佽锛屽簲璇ョ敤jsDom灏嗚姹傚搷搴旂殑鍐呭杞崲鎴恏tml鏂囦欢瀵煎叆Dom锛岀劧鍚庝娇鐢╦Query杩涜鎿嶄綔銆俢rawl.jsconstaxios=require('axios');const{JSDOM}=require('jsdom');let$=require('jquery');constfs=require('fs');constpath=require('path');(async()=>{const{data}=awaitaxios.get('https://fabiaoqing.com/bqb/lists/type/hot.html')constpage=newJSDOM(data)constwindow=page.window$=$(window)$('.bqppdiv').each(async(index,e)=>{constsrc=$(e).find('.image')[0].getAttribute('data-original')consttype=path.extname(src)constfileName=Date.now()+typeconst{data}=awaitaxios.get(src,{responseType:'stream'})甯搁噺涓嬭浇=path.join(__dirname,fileName)data.pipe(fs.createWriteStream(download))})})()杩欓噷鍙紨绀虹埇铏紝涓嶅啀娣卞叆銆傜被浼肩畝鍗曠殑鐖櫕鍙渶瑕乯query鍜宩sDom灏卞彲浠ユ悶瀹氾紝涓嶉渶瑕佸涔犲叾浠栧鏉傜殑鐖櫕宸ュ叿銆傛€荤粨閫氳繃瑙f瀽markdown鏂囦欢锛屽皢绾痬arkdown鏂囨湰瑙f瀽鎴恏tml瀛楃涓诧紝灏唄tml瀛楃涓茶浆鎹㈡垚鐪熸鐨凞om瀵硅薄锛岀劧鍚庝娇鐢╦Query鑾峰彇Dom锛屼粠鑰岃揪鍒颁俊鎭浆鎹㈢殑鐩殑鍦╩arkdown鏂囦欢涓墦鎴怞SON鏂囦欢浣滀负鏁版嵁瀛樺偍锛屾渶鍚庢紨绀篘odeJs鎿嶄綔Dom锛岀畝鍗曞啓涓埇铏綔涓虹粌涔犮€傚鏋滄垜鐨勬枃绔犲浣犳湁甯姪锛屼綘鐨勷煈嶅氨鏄鎴戞渶澶х殑鏀寔^_^銆傚彟澶栵紝娆㈣繋澶у鍏虫敞鎴戠殑鍏紬鍙枫€婂噷瑙堢ぞ銆嬶紝闄垜涓€璧锋垚闀裤€傛湰鏂囩敱mdnice澶氬钩鍙板彂甯?/p>