当前位置: 首页 > 后端技术 > PHP

Nginx日志UserAgent数据PHP统计

时间:2023-03-29 17:26:40 PHP

转载请注明文章出处:https://tlanyan.me/stat-user-...近期会用到爬虫,所以打算收集UserAgent(UA)数据。那我立马想到,我网站的访问日志不就是现成的优质数据源吗?于是兴高采烈地决定写一个脚本统计Nginx访问日志中的UA信息。对于这种简单的操作,使用脚本语言就足够了,毫无疑问,必须使用最熟悉的PHP。打开vim并开始滚动。十分钟后,功能简单的统计脚本就搞定了。该脚本目前有3个功能:1.查找并整理所有UA信息;2、操作系统数据统计;3、浏览器数据统计。程序运行截图如下:UA信息操作系统信息浏览器使用脚本统计上个月的访问日志,得到如下结果:搜索引擎爬虫比较频繁,每天有几千条数据访问;Windows仍然是份额最大的操作系统,LinuxDesktop份额仍然很小;Chrome目前是浏览器领域的霸主,Firefox次之,Opera已经是小众。最后附上PHP脚本的代码,也可以从我的Github上找到:https://github.com/tlanyan/Sc...#!/usr/bin/php*@linkhttp://tlanyan.me*//*vim:setts=4;设置sw=4;设置ss=4;设置展开标签;*/functiongetFileList(string$path):array{returnglob(rtrim($path,"/")."/*access.log*");}functionstatFiles(array$files):array{$stat=[];echoPHP_EOL,"开始读取文件...",PHP_EOL;foreach($filesas$file){echo"读取文件:$file...",PHP_EOL;$contents=getFileContent($file);foreach($contentsas$line){$ua=getUA($line);如果(isset($stat[$ua])){$stat[$ua]+=1;}else{$stat[$ua]=1;}}}echo"statallfilesdone!",PHP_EOL,PHP_EOL;返回$stat;}functiongetFileContent(string$file):array{if(substr($file,-3,3)===".gz"){返回gzfile($文件);}returnfile($file);}functiongetUA(string$line):?string{//重要!Nginx日志格式决定了UA在行中的位置!//您可能需要重构以下代码以获得正确的结果//UA从第五个双引号开始$count=0;$偏移量=0;while($count<5){$pos=strpos($line,'"',$offset);if($pos===false){echo"错误!未知行:$line",PHP_EOL;returnnull;}$count++;$offset=$pos+1;}$end=strpos($line,'"',$offset);returnsubstr($line,$offset,$end-$offset);}functionusage(){echo"Usage:phpstatUA.php[option][dir]",PHP_EOL;}echo"选项:",PHP_EOL;echo"-h:显示帮助",PHP_EOL;echo"-v:详细模式",PHP_EOL;echo"-nNUM:UA列表编号",PHP_EOL;echo"dir:日志文件目录",PHP_EOL;echoPHP_EOL;}函数过滤器rUA(array&$stat,array$UAFilters){$filterCount=0;foreach($UAFiltersas$filter){foreach($statas$ua=>$count){if(stripos($ua,$filter)!==false){$filterCount+=$count;}取消设置($stat[$ua]);}}}echo"过滤$filterCount记录!",PHP_EOL;}functionprintCount(array$stat){$sum=array_sum($stat);foreach($statas$key=>$count){echo$key,":",$count,",percent:",sprintf("%.2f",100*$count/$sum),PHP_EOL;}}functionstatOS(数组$UAs):array{global$debug;echoPHP_EOL,"statOS...",PHP_EOL;$os=["Windows","MacOS","Linux","Android","iOS","其他"];$stat=array_fill_keys($os,0);foreach($UAsas$key=>$count){if(strpos($key,"Windows")!==false){$stat["Windows"]+=$count;}elseif(strpos($key,"Macintosh")!==false){$stat["MacOS"]+=$count;//必须先处理Android,然后是Linux}elseif(strpos($key,"Android")!==false){$stat["Android"]+=$count;}elseif(strpos($key,"Linux")!==false){$stat["Linux"]+=$count;}elseif(strpos($key,"iPhone")!==false||strpos($key,"iOS")!==false||strpos($key,"likeMacOS")!==false||strpos($key,"Darwin")!==false){$stat["iOS"]+=$count;}else{if($debug){echo"other:$key,count:$count",PHP_EOL;}$stat["other"]+=$count;}}return$stat;}functionstatBrowser(array$UAs):array{global$debug;echoPHP_EOL,"statbrwoser...",PHP_EOL;$browsers=["Chrome","Firefox","IE","Safari","Edge"","Opera","other"];$stat=array_fill_keys($browsers,0);foreach($UAsas$key=>$count){if(strpos($key,"MSIE")!==false){$st在["IE"]+=$count;}elseif(strpos($key,"Edge")!==false){$stat["Edge"]+=$count;}elseif(strpos($key,"Firefox")!==false){$stat["Firefox"]+=$count;}elseif(strpos($key,"OPR")!==false){$stat["Opera"]+=$count;//首先是Chrome,然后是Safari}elseif(strpos($key,"Chrome")!==false){$stat["Chrome"]+=$count;}elseif(strpos($key,"Safari")!==false){$stat["Safari"]+=$count;}else{if($debug){echo"other:$key,count:$count",PHP_EOL;}$stat["other"]+=$count;}}return$stat;}functionparseCmd(){global$debug,$num,$path,$argc,$argv;$选择=空;$options=getopt("hvn:",[],$optind);如果($argc>2&&empty($options)){usage();退出(1);}if(isset($options['h'])){usage();退出(0);}if(isset($options['v'])){$debug=true;}if(isset($options['n'])){$num=intval($options['n']);如果($num<=0){$num=10;}}if($argc===2&&empty($options)){$path=$argv[1];}if($argc>$optind){$path=$argv[$optind];}if(!is_dir($path)){echo"无效目录:$path",PHP_EOL;退出(1);}if($debug){echo"num:$num",PHP_EOL;echo"verbose:",var_export($debug,true),PHP_EOL;echo"path:$path",PHP_EOL;}}if(version_compare(PHP_VERSION,"7.1")<0){exit("脚本需要PHP>=7.1");}$path=".";$debug=false;$num=10;$UAFilters=["spider","bot","wget","curl",];parseCmd();$files=getFileList($path);if(empty($files)){echo'"'.realpath($path).'"不包含访问日志文件。',PHP_EOL;退出(0);}$allUA=statFiles($files);if(empty($allUA)){echo"nodata",PHP_EOL;exit(0);}filterUA($allUA,$UAFilters);//使用countuasort($allUA,function($a,$b){return$b-$a;});if($debug){对数组进行排序print_r($allUA);}echoPHP_EOL,"----top$numUA----",PHP_EOL;printCount(array_slice($allUA,0,$num));echo"------------------",PHP_EOL;$os=statOS($allUA);echoPHP_EOL,"操作系统计数:",PHP_EOL;printCount($os);$browser=statBrowser($allUA);echoPHP_EOL,"浏览器计数:",PHP_EOL;printCount($browser);