importopenpyxlimportrequestsfrombs4importBeautifulSoupopenexcelwb=openpyxl.load_workbook('test.xlsx')formSheet1sh=wb['Sheet1']循环处理第一列ofeachrowforcolinlist(sh.columns)[0]:#获取内容html=requests.get(col.value)bs=BeautifulSoup(html.text)#获取id为list的div,我看了网页,指向每个章节标签a都在这个div下div=bs.find(id='list')#id是列表中的所有a标签a_list=div.find_all('a')#从第二列开始oftherowforiinrange(2,len(a_list)+2):#writesh.cell(col.row,i,a_list[i-2].text)print(col.value+'OK')保存wb.save('test.xlsx')关闭wb.close()
