我最近一次更新是3月中旬,国家卫健委的数据格式一直在变,所以代码只能说仅供参考!代码importrequestsfrombs4importBeautifulSoupimportdatetimeimportrefromseleniumimportwebdriverimporttimeimportxlwingsasxwdefget_sh_data(url):'''获取上海卫健委数据'''r=requests.get(url=url,headers=sh_headers)sh_dict={}soup=BeautifulSoup(r.text,'lxml')#print(soup)ivs_content=soup.find(name='div',attrs={'id':'ivs_content','class':'Article_content'})new_text=ivs_content.get_text()#print(new_text)sh_dict['累计排除疑似']=re.search('已累计排除疑似(\d+)',new_text).group(1)sh_dict['累计确诊病例']=re.search('已发现确诊病例(\d+)例',new_text).group(1)style2='(\d+)例危重,(\d+)例危重,(\d+)例治愈出院,(\d+)例死亡'sh_dict['累计重症']=int(re.search(style2,new_text).group(1))+int(re.search(style2,new_text).group(2))sh_dict['累积治愈']=re.search(style2,new_text).group(3)sh_dict['累计死亡病例']=re.search(style2,new_text).group(4)sh_dict['累计疑似病例']=re.search('还有(\d+)疑似案件调查中',new_text).group(1)returnsh_dictdefget_sh_today_news():'''getShanghai卫健委的新闻'''外汇赞助活动http://www.fx61.com/activitiesurl=r'http://wsjkw.sh.gov.cn/xwfb/i...'r=requests.get(url=url,headers=sh_headers)soup=BeautifulSoup(r.text,'lxml')#print(soup)today_format=datetime.datetime.today().strftime('%Y-%m-%d')today_sh_news=soup.find_all(name='span',text=today_format)today_counts=len(today_sh_news)foriinrange(today_counts-1,-1,-1):title=today_sh_news[i].find_previous_sibling(name='a').attrs['title']#标题href='http://wsjkw.sh.gov.cn'+today_sh_news[i].find_previous_sibling(name='a').attrs['href']#网址iftitle.startswith('上海新增'):#print(title)returnget_sh_data(href)defget_all_today_news():'''获得国家卫健委的新闻'''url='http://www.nhc.gov.cn/xcs/yqt..._gzbd.shtml'r=requests.get(url,headers=quanguo_headers)soup=BeautifulSoup(r.text,'lxml')#print(soup)today_format=datetime.datetime.today().strftime('%Y-%m-%d')latest_news_title=soup.find(name='span',text=today_format).find_previous_sibling(name='a').attrs['title']latest_news_href='http://www.nhc.gov.cn'+soup.find(name='span',text=today_format).find_previous_sibling(name='a').attrs['href']#print(latest_news_href)returnget_all_today_data(latest_news_href)defget_all_today_data(url):'''从国家卫健委获取数据'''r=requests.get(url,headers=quanguo_headers)all_dict={}hubei_dict={}soup=BeautifulSoup(r.text,'lxml')news=soup.find(name='p').get_text()#print(news)all_dict['新增疑似']=re.search('新增疑似病例(\d+)cases',news).group(1)all_dict['累计疑似病例']=re.search('现有疑似病例(\d+)cases',news).group(1)all_dict['累计确认cases']=re.search('累计报告确诊病例(\d+)例',news).group(1)all_dict['累计重症病例']=re.search('其中,重症病例(\d+)cases',news).group(1)all_dict['Cumulativedeath']=re.search('Cumulativedeathcases(\d+)cases',news).group(1)all_dict['Cumulativecure']=re.搜索('累计治愈出院病例(\d+)例',news).group(1)hubei_dict['新增疑似病例']=re.search('新增疑似病例(\d+)例.*?(武汉(\d+)cases',news).group(1)hubei_dict['新增确诊病例']=re.search('湖北新增确诊病例(\d+)例.*?(武汉(\d+)例',news).group(1)hubei_dict['newdeath']=re.search('newdeathcase(\d+)cases.*?(Wuhan(\d+)cases',news).group(1)hubei_dict['新治愈出院']=re.search('新治愈出院病例(\d+)cases(Wuhan(\d+)cases)',news).group(1)hubei_dict['累计重症病例']=re.search('重症(\d+)cases.*?(武汉(\d+)cases',news).组(1)#print(all_dict,hubei_dict)returnall_dict,hubei_dictdefget_cookie(url):driver=webdriver.Chrome()driver.get(url)time.sleep(3)cookies=driver.get_cookies()driver.quit()items=[]foriinrange(len(cookies)):cookie_value=cookies[i]item=cookie_value['name']+'='+cookie_value['value']items.append(item)cookiestr=';'.join(aforainitems)returncookiestrdefget_into_excel():'''将数据粘贴到excel'''app=xw.App(visible=True,add_book=False)app.display_alerts=Falseapp.screen_updating=Falsewb=app.books.open('新冠病毒每日数据.xlsx')ws=wb.sheets['all']max_row=ws.api.UsedRange.Rows.countws.range('C'+str(max_row)).value=hubei_data['新诊断']ws.range('K'+str(max_row)).value=hubei_data['新死亡']ws.range('O'+str(max_row)).value=hubei_data['新治愈']ws.range('S'+str(max_row)).value=hubei_data['新增疑似']ws.range('AA'+str(max_row)).value=hubei_data['累计重症']ws.range('R'+str(max_row)).value=all_data['添加怀疑']ws.range('AL'+str(max_row)).value=all_data['累积怀疑']ws.range('V'+str(max_row)).value=all_data['累计诊断']ws.range('Z'+str(max_row)).value=all_data['累计重病']ws.range('AD'+str(max_row)).value=all_data['累计死亡']ws.range('AH'+str(max_row)).value=all_data['累计治愈']ws.range('AN'+str(max_row)).value=sh_data['累计排除疑似病例数']ws.range('Y'+str(max_row)).value=sh_data['累计确诊']ws.range('AC'+str(max_row)).value=sh_data['累计重症']ws。range('AK'+str(max_row)).value=sh_data['累积治愈']ws.range('AG'+str(max_row)).value=sh_data['累积死亡']ws.range('AM'+str(max_row)).value=sh_data['累计怀疑']wb.save()wb.close()app.quit()if__name__=="__main__":sh_headers={'User-Agent':'Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/69.0.3497.100Safari/537.36','Cookie':get_cookie('http://wsjkw.sh.gov.cn/xwfb/i...'),#'Cookie':'zh_choose=s;zh_choose=s;_gscu_2010802395=80620430ie0po683;yd_cookie=12f170fc-e368-4a662db5220af2d434160e259b2e31585efb;_ydclearance=2cd0a8873fd311efcda1c1aa-05fc-4001-a108-0e86b80b3fee-1580700296;_gscbrs_2010802395=1;_pk_ref.30.0806=%5B%22%22%2C%22%22%2C1580693101%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DDVUbOETLyMZLC5c_V7RJRbAYPvyqaU3f2PCBi2-E6KC2QEFltdrKWGmhgA5NbC3c%26wd%3D%26eqid%3Df38b30250015e1c5000000045e365a8d%22%5D;_pk_ses.30.0806=*;_pk_id.30.0806=35b481da38abb562.1580620431.6.1580694952.1580693101.;_gscs_2010802395=80693100qds57e17|pv:6;AlteonP=ALa1BGHbHKyWUqcNUGRETw$$','Host':'wsjkw.sh.gov.cn'}quanguo_headers={'User-Agent':'Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/69.0.3497.100Safari/537.36','Cookie':'oHAcoULcWCQb80S=pxzexGFCvyGV4xDkaMHSyjBmzwXn5O4vfCbxFCgMDfcBaKqsFU9FHstqjFY6wJt9;yfx_c_g_u_id_10006654=_ck20020209283417867964364567575;_gscu_2059686908=81579037nbf5xc58;insert_cookie=67313298;yfx_f_l_v_t_10006654=f_t_1580606914774__r_t_1581643181169__v_t_1581678949269__r_c_14;security_session_verify=a2efd6893c3ad08675db9b0f5c365ecf;oHAcoULcWCQb80T=4Ywh2qE8IiJP44ThdpW0fs7Yqi1Hwlh9RhJHrW2WVl536y4eCIgXxGh9M8IuYUqGUCCtBO5kBc2DB6Kewd3naLK_O2bK5W3w3pcqT.uX3asTXxC2SGBqy9eV2DoGB0ZXb4uTPzPGbXebmT6xIYxbAmGbm_kZVX_nUvBL4nkAuFAVvcGLBmXr8nsdEToXztqZUlYnTjn9niwHMcg3th7XhJvFS_tckqRq5bLpvS_IKPuYn2JLraIIejlErBhA5IQhyHXFekNynv5PYgpzu2PguGccrP3c_bcg1MFViQjKVhgs_B22Nv4NxdHdiIk9GdZDZBjQ','Host':'www.nhc.gov.cn'}一、全国和湖北的数据all_data,hubei_data,sh_data={},{},{}try:all_data,hubei_data=get_all_today_news()print('全国数据:{}\n''湖北数据:{}'.format(all_data,hubei_data))except:print('全国数据未更新')2.上海数据try:sh_data=get_sh_today_news()print('上海数据:{}'.format(sh_data))except:print('上海数据未更新')3.导出excelifsh_data!={}andall_data!={}:get_into_excel()print('Excel刷新成功!')
