搭配好环境复制以下代码#-*-coding:utf-8-*-"""CreatedonMonSep721:44:392020@author:ASUS"""外汇常见问题https://www.kaifx.cn/lists/question/importrequestsimporttimeimportjsonimportxlwtworkbook=xlwt.Workbook(encoding='utf-8')mysheet=workbook.add_sheet('mysheet')mysheet.write(0,0,'positionId')mysheet.write(0,1,'positionName')mysheet.write(0,2,'companyId')mysheet.write(0,3,'companyFullName')mysheet.write(0,4,'city')mysheet.write(0,5,'companyLabelList')mysheet.write(0,6,'companyLogo')mysheet.write(0,7,'companyShortName')mysheet.write(0,8,'companySize')mysheet.write(0,9,'createTime')mysheet.write(0,10,'district')mysheet.write(0,11,'education')mysheet.write(0,12,'financeStage')mysheet.write(0,13,'firstType')mysheet.write(0,14,'formatCreateTime')mysheet.write(0,15,'industryField')mysheet.write(0,16,'jobNature')mysheet.write(0,17,'lastLogin')mysheet.write(0,18,'latitude')mysheet.write(0,19,'linestaion')mysheet.write(0,20,'经度')mysheet.write(0,21,'matchScore')mysheet.write(0,22,'positionAdvantage')mysheet.write(0,23,'positionId')mysheet.write(0,24,'positionLabels')mysheet.write(0,25,'positionName')mysheet.write(0,26,'secondType')mysheet.write(0,27,'skillLables')mysheet.write(0,28),'stationname')mysheet.write(0,29,'subwayline')mysheet.write(0,30,'thirdType')mysheet.write(0,31,'workYear')defmain(kd,pages,row):#通过访问主网页获取cookies和sessionurl1='https://www.lagou.com/jobs/list_python?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput='#提交ajax请求获取json数据url="https://www.lagou.com/jobs/positionAjax.json?px=default&needAddtionalResult=false"#请求头headers={'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8','Referer':'https://www.lagou.com/jobs/list_python?px=default&city=%E5%85%A8%E5%9B%BD','用户-Agent':'Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/72.0.3626.121Safari/537.36','Host':'www.lagou.com'}#使用数据来决定获取多少页的json数据forpageinrange(1,pages):data={'first':'false','pn':page,'kd':'python'}data['kd']=kds=requests.Session()#建立会话s.get(url=url1,headers=headers,timeout=1)cookie=s.cookies#获取cookierespon=s.post(url=url,headers=headers,data=data,cookies=cookie,timeout=3)time.sleep(1)#print(respon.text)result=json.loads(respon.text)info=result["content"]["positionResult"]["result"]print(len(info))forjininfo:mysheet.write(row,0,j['positionId'])mysheet.write(row,1,j['positionName'])mysheet.write(row,2、j['companyId'])mysheet.write(row,3,j['companyFullName'])mysheet.write(row,4,j['city'])mysheet.write(row,5,j['companyLabelList'])mysheet.write(row,6,j['companyLogo'])mysheet.write(row,7,j['companyShortName'])mysheet.write(row,8,j['companySize'])mysheet.write(row,9,j['createTime'])mysheet.write(row,10,j['district'])mysheet.write(row,11,j['education'])mysheet.write(row,12,j['financeStage'])mysheet.write(row,13,j['firstType'])mysheet.write(row,14,j['formatCreateTime'])mysheet.write(row,15,j['industryField'])mysheet.write(row,16,j['jobNature'])mysheet.write(row,17,j['lastLogin'])mysheet.write(row,18,j['latitude'])mysheet.write(row,19,j['linestaion'])mysheet.write(row,20,j['longitude'])mysheet.write(row,21,j['matchScore'])mysheet.write(row,22,j['positionAdvantage'])mysheet.write(row,23,j['positionId'])mysheet.write(row,24,j['positionLabels'])mysheet.write(row,25,j['positionName'])mysheet.write(row,26,j['secondType'])mysheet.write(row,27,j['skillLabables'])mysheet.write(row,28,j['stationname'])mysheet.write(row,29,j['subwayline'])mysheet.write(row,30,j['thirdType'])mysheet.write(row,31,j['workYear'])row=row+1workbook.save('py3.xls')#获取前两个页面的jobjson信息kd=input('输入关键字:')pages=int(input('输入要爬取多少页:'))main(kd,pages,1)#结果如下:#{"resubmitToken":null,"requestId":null,"msg":null,"success":true,"content":{"hrInfoMap":{"6187967":{"userId":11765418,"phone":null,"positionName":"招聘经理",...."pageSize":15},"code":0}
