介绍与selenium类似,pyppeteer也可以渲染网页,但它是异步的。安装方法pipinstallpyppeteer#python3.7.5importasynciofrompyppeteerimportlaunchfrompyqueryimportPyQueryaspqasyncdefmain():browser=awaitlaunch()page=awaitbrowser.newPage()awaitpage.goto("http://quotes.toscrape.com/js/")doc=pq(awaitpage.content())print("Quotes:",doc(".quote").length)awaitbrowser.close()asyncio.run(main())复杂情况,阻塞css、图片、字体等importasynciofrompyppeteerimportlaunchfrompyqueryimportPyQueryaspqclassGlobal:browser=Noneasyncdefintercept_request(req):"""阻塞几种资源"""ifreq.resourceTypein[“图像”,“媒体”,“事件源”,“websocket”,“样式表”,“字体”]:等待req.abort()否则:等待req.continue_()asyncdeffetch():page=awaitGlobal.browser.newPage()awaitpage.setUserAgent("Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36""(KHTML,likeGecko)Chrome/58.0.3029.110Safari/537.36Edge/16.16299")#自定义useragentawaitpage.setViewport({"width":1080,"height":960})awaitpage.setRequestInterception(True)page.on("request",intercept_request)awaitpage.goto("https://juejin.im/timeline")awaitasyncio.sleep(3)doc=pq(awaitpage.content())print("Quotes:",doc("a").length)awaitpage.close()asyncdefmain():Global.browser=awaitlaunch()awaitasyncio.gather(*[fetch()for_inrange(10)])#并发awaitGlobal.browser.close()asyncio.get_event_loop().run_until_complete(main())
