当前位置: 首页 > 后端技术 > PHP

2018年国家统计局省市信息简单抓取并导出csv文件(php)

时间:2023-03-29 13:50:33 PHP

2018年国家统计局省市信息简单抓取注:代码异常处理有待完善,如有请勿喷你不喜欢它。感谢header("Content-Type:text/html;charset=UTF-8");//超时设置ini_set('max_execution_time','0');//抓取地址$url='http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/';$data=curlGet($url);$data=iconv("GBK","UTF-8//忽略",$data);preg_match_all('/provincetr\'>(.*?)<\/tr>/',$data,$matches);if(empty($matches)){return'匹配异常';}$data2show=returnArr($matches[1]);//获取省份信息foreach($data2showas$key=>$val){preg_match('/=\'(\d{2}).html/',$val,$sz);preg_match('/\'>(.{1,30})
$val){//拼凑城市信息请求地址$cityUrl=$url.$key.'.html';$data=curlGet($cityUrl);$data=iconv("GBK","UTF-8//忽略",$data);preg_match_all('/citytr\'>(.*?)<\/tr>/',$data,$matches);foreach($matches[1]as$k=>$v){//echo$v;exit;preg_match_all('/=\'(\d{2})\/(\d{4}).html\'>(.*?)<\/a>/',$v,$info);$city[$key][$k]['province_code']=$info[1][1];$city[$key][$k]['province_name']=$val;$city[$key][$k]['city_code']=$info[2][1];$city[$key][$k]['city_name']=($info[3][1]==='市区')?$val:$信息[3][1];//print_r($city);退出;}//$cityData[$key]=$matches[1];}$cityArr=arr2ToArr1($city);export_csv($cityArr);exit;//curl获取请求函数curlGet($url){$curl=curl_init();//设置捕获获取的urlcurl_setopt($curl,CURLOPT_URL,$url);//设置头文件的信息为数据流输出//curl_setopt($curl,CURLOPT_HEADER,1);//设置获取到的信息以文件流的形式返回,而不是直接输出curl_setopt($curl,CURLOPT_RETURNTRANSFER,1);//执行命令$data=curl_exec($curl);//关闭URL请求curl_close($curl);//显示获取到的数据return$data;}//传入内容,返回数组函数returnArr($content){foreach($contentas$key=>$val){$arr[$key]=explode('','<'.trim($val,''));}$data2show=arr2ToArr1($arr);return$data2show;}//将二维数组转为一维数组functionarr2ToArr1($arr){returnarray_reduce($arr,'array_merge',array());}//导出数据到csvfunctionexport_csv($data){$path=$_SERVER['DOCUMENT_ROOT']."/csv/".date("Y-m-d",time())."/";if(!is_dir($path)){//如果目录存在则创建mkdir($path,0777,true);}$filename=$path.time().'.csv';//设置文件名header("Content-Type:text/csv;charset=utf-8");header("Content-Disposition:attachment;filename=\"$filename\"");header("Pragma:无缓存");header("过期时间:0");$fp=fopen($文件名,'w');//对于用wps和editor打开没有乱码,用excel打开有乱码的问题,可以添加下面一行代码解决问题fwrite($fp,chr(0xEF).chr(0xBB).chr(0xBF));foreach($dataas$fields){fputcsv($fp,$fields);}fclose($fp);}