当前位置: 首页 > 后端技术 > Python

【Python】爬取全国各城市历史天气数据

时间:2023-03-26 12:05:31 Python

用python爬取全国各城市历史天气数据爬取2011-2020年全国各城市每日天气数据以requests+BeautifulSoup的形式爬取数据多线程爬取按城市名爬取按省份保存为xls从全国城市名称对应的拼音构造字典当时,城市的拼音也有同样的问题。本站城市拼音错误,无法抓取数据。构建省市字典,按省份创建文件夹和档案。/www.tianqihoubao.com/lishi/beijing/month/201101.html简单看一下http://www.tianqihoubao.com/lishi/{城市拼音}/month/{年月}.html主代码类爬虫(threading.Thread,):defrun(self):print("%sisrunning"%threading.current_thread())whileTrue:#lockgLock.acquire()iflen(city_dict)==0:#释放锁gLock。release()继续else:item=city_dict.popitem()gLock.release()data_=list()urls=self.get_urls(item[0])forurlinurls:try:data_.extend(self.get_data(url))#合并列表,将某城市所有月份的天气信息写入data_exceptExceptionase:print(e)passself.saveTocsv(data_,item[1])#保存为csviflen(city_dict)==0:end=time.time()print("Timespent:",(end-start))exit()#获取城市历史天气urldefget_urls(self,city_pinyin):urls=[]foryearintarget_year_list:formonthintarget_month_list:date=year+month#url="http://www.tianqihoubao.com/lishi/beijing/month/201812.html"urls.append("http://www.tianqihoubao.com/lishi/{}/month/{}.html".format(city_pinyin,date))returnurlsdefget_soup(self,url):try:r=requests.get(url,timeout=30)r.raise_for_status()#如果请求不成功,抛出HTTPError异常soup=BeautifulSoup(r.text,"html.parser")returnsoupexceptExceptionase:print(e)pass#将天气数据保存到xls文件defsaveTocsv(self,data,city):fileName='./weather_data/'+city+'天气.xls'result_weather=pd.DataFrame(data,columns=['日期','天气状况','气温','风力风向'])#print(result_weather)result_weather.to_excel(fileName,index=False)print('保存所有天气成功!')print('remain{}'.format(len(city_dict)))defget_data(self,url):print(url)try:soup=self.get_soup(url)all_weather=soup.find('div',class_="wdetail").find('table').find_all("tr")data=list()fortrinall_weather[1:]:td_li=tr.find_all("td")fortdintd_li:s=td.get_text()data.append("".join(s.split()))res=np.array(data).reshape(-1,4)返回res除了Exceptionase:print(e)pass