仅供交流学习之用,禁止利用本资源从事任何违反国家(地区)法律法规的活动,一律合规《网络安全法》温馨提示:仅提供思路,实际中项目,需要维护实际步骤的框架,例如代理池的可用性等细节以及核心库部署定期更新代理池进程定期爬取列表页面进程主进程定期从Redis读取列表页面任务,并将每一项丢给异步任务执行环境CentOS7.2PHP7.2Swoole4.3.5GoogleChrome78.0.3904.108ChromeDriver78.0.3904.105Composerfacebook/webdriver=1.7easyswoole/easyswoole=3.1.18easyswoole/curl=1.0.1框架和核心库部署1.安装EasySwoole3.1.18版本[root@ar414.comphpseleniumdemo]composerrequireeasyswoole/easyswoole=3.1.18[root@ar414.comphpseleniumdemo]phpvendor/easyswoole/easyswoole/bin/easyswoole安装___________|____|/____|||||_________|(_____________||___|__|/_`|/__|||||\___\\\/\///_\/_\||/_\||____|(_||\__\||_||____)|\VV/|(_)||(_)||||__/|______|\__,_||___/\__,||_____/\_/\_/\___/\___/|_|\___|__/||___/安装成功,享受!2.安装核心库facebook/webdriver,easyswoole/curl[root@ar414.comphpseleniumdemo]#composerrequirefacebook/webdriver=1.7[root@ar414.comphpseleniumdemo]#composerrequireeasyswoole/curl=1.0.13.确认运行时没有报错[root@ar414.comphpseleniumdemo]#phpeasyswoolestart|____|/____|||||_________|(_____________||___>|__|/_`|/__|||||\___\\/\///_\/_\||/_\>||____|(_||\__\||_||____)|\VV/|(_)||(_)|||__/>|______|\__,_||___/\__,||_____/\_/\_/\___/\___/|_|\___|>__/|>|___/mainserverSWOOLE_WEBlistenaddress0.0.0.0listenport9501subserver1CONSOLE=>SWOOLE_TCP@127.0.0.1:9500...定期更新代理池流程温馨提示:代理资源请自行解决。这里只是举个例子,其实没什么用。[root@ar414.comphpseleniumdemo]#catcomposer.json{"autoload":{"psr-4":{"App\\":"App/"}},"require":{"easyswoole/easyswoole":"3.1.18","facebook/webdriver":"^1.7","easyswoole/curl":"1.0.1"}}#updatecomposerautoload[root@ar414.comphpseleniumdemo]#composerdump-autoload2,创建进程目录(更新代理池作为子进程随项目启动一起运行)[root@ar414.comphpseleniumdemo]#mkdirApp/Process3.定期爬取代理池(使用RedisList类型保证最新代理IP在头部,爬虫逻辑每次从头部获取,一个代理IP只使用一次)提示:代理资源请自行解决,这里只是举例,完整的代码链接其实不是使用proxyListApi=sprintf($this->proxyListApi,$_ENV['PROXY_LIST_API'],$_ENV['PROXY_LIST_KEY']);$this->proxyListApi=sprintf($this->proxyListApi,20191231231237085,'72axxxae0fe34');}公共函数运行($arg){$this->initProxyListApi();//依赖composerrequireeasyswoole/curl=1.0.1while(true){$ret=Curl::get($this->proxyListApi);var_dump($ret);if($ret){$ret=json_decode($ret,true);if($ret['code']==10001&&isset($ret['data']['proxy_list'])&&!empty($ret['data']['proxy_list'])){foreach($ret['data']['proxy_list']as$proxy){$proxyItem=$proxy['ip'].':'.$proxy['p排序'];Kv::redis()->lPush(self::PROXY_KV_KEY,$proxyItem);}}}睡眠(自我::计时器);}}}4.配置代理池更新进程在项目启动时启动(完整代码链接)publicstaticfunctionmainServerCreate(EventRegister$register){//更新代理池进程ServerManager::getInstance()->getSwooleServer()->addProcess((new\App\Process\UpdateProxyPool('UpdateProxyPool',[]))->getProcess());}定时抓取列表页面流程抓取列表页面流程(完整代码链接)getDriver();$driver->get(self::API);$listStr=$driver->getPageSource();var_dump($listStr);file_put_contents("/www/wwwroot/blog/phpseleniumdemo/listStr.html",$listStr);preg_match_all("/PD=(.*);/U",$listStr,$list);$list=array_unique($list[1]);if($list){Kv::redis()->set(self::LIST_KV_KEY,json_encode($list));}var_dump('完成');$驱动程序->关闭();$驱动程序->退出();}catch(\Throwable$throwable){$driver->close();$驱动程序->退出();Logger::getInstance()->log($throwable->getMessage(),'ListSpiderErr或者');var_dump($throwable->getMessage());}睡眠(自我::定时器);}}}主进程定时从Redis中读取列表页面任务,并将每一项抛给异步任务执行1.完整代码链接publicstaticfunctionmainServerCreate(EventRegister$register){//更新代理池进程ServerManager::getInstance()->getSwooleServer()->addProcess((new\App\Process\UpdateProxyPool('UpdateProxyPool',[]))->getProcess());//列表爬取进程ServerManager::getInstance()->getSwooleServer()->addProcess((new\App\Process\ListSpider('ListSpider',[]))->getProcess());$register->set($register::onWorkerStart,function(\swoole_server$server,$workerId){if($workerId==0){Timer::getInstance()->loop(30000,function(){$ret=Kv::redis()->get(ListSpider::LIST_KV_KEY);if($ret){$ret=json_decode($ret,true);foreach($retas$item){TaskManager::async(function()use($item){(newItemSpider(true))->run($item);returntrue;},function()use($item){var_dump("{$item}完毕”);});}}});}});}2、ItemSpider递编译代码(完整代码链接)getDriver();$itemPath=str_replace('#','/',$itemPath);$url="https://www.188-sb.com/#{$itemPath}";变量转储($url);尝试{$driver->get($url);$driver->wait(ChromeDriver::WAIT_SECONDS)->until(WebDriverExpectedCondition::visibilityOfElementLocated(WebDriverBy::className('gl-MarketGroupButton_Text')));Logger::getInstance()->console("标题是'".$driver->getTitle()."'\n");Logger::getInstance()->console("当前URI是'".$driver->getCurrentURL()."'\n");$body=$driver->getPageSource();var_dump($body);$驱动程序->关闭();$驱动程序->退出();//TODO清洗数据入库}catch(\Throwable$throwable){Logger::getInstance()->log($throwable->getMessage(),'Bet365ApiRun');$驱动程序->关闭();$驱动程序->退出();}返回;}}3、运行[root@ar414.comphpseleniumdemo]#phpeasyswoolestart
