当前位置: 首页 > 后端技术 > Python

Python爬取拉勾网

时间:2023-03-26 14:02:34 Python

又到了一年一度的招聘季,大量的工作岗位在向我们招手。今天就来看看拉勾网各个公司对python人才的需求。importjiebaimportnumpyasnpiimportpandasaspdimportmatplotlib.pyplotaspltfrompyechartsimportGeofromwordcloudimportWordCloudimportreimportmatplotlibfromimageioimportimreadurl="https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=falseagedata):return{"first":"true","pn":f"{page}","kd":"python",'sid':'4256fece2141497bb5a8e1bfa69bcee7'}defget_cookies():headers={'origin':'https://www.lagou.com','referer':'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=','authority':'www.lagou.com','user-agent':'Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/80.0.3987.149Safari/537.36',}response=requests.get('https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=',headers=headers)returnresponse.cookies.get_dict()cookies=get_cookies()headers={'user-agent':'墨子lla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/83.0.4103.61Safari/537.36','host':'www.lagou.com','origin':'https://www.lagou.com','referer':'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput='}defget_data(data):response=requests.post(url=url,headers=headers,data=data,cookies=cookies)#json数据content=response.json()['content']['positionResult']['result']j=1companyLabelstr=''foriincontent:city=i['city']companyFullName=i['companyFullName']companySize=i['companySize']education=i['education']positionName=i['positionName']salary=i['salary']workYear=i['workYear']companyLabelList=i['companyLabelList']iflen(companyLabelList)>0:companyLabelList=''.join(companyLabelList)else:companyLabelList='''''companyLabelstr=companyLabelList+companyLabelstrprint(workYear,companyLabelList)print(companyLabelstr)'''withopen('python.csv','a+',encoding='utf-8')asf:f.write(f'{city},{companyFullName},{companySize},{education},{positionName},{salary},{workYear},{companyLabelList}\n')print(f'{j}itemdatasuccess')j+=1if__name__=='__main__':foriinrange(1,11):params=data(i)get_data(params)分析XM返利https://www.kaifx.cn/broker下面的爬取文本/x...matplotlib.rcParams['font.family']='SimHei'plt.rcParams['axes.labelsize']=16plt.rcParams['xtick.labelsize']=14plt.rcParams['ytick.labelsize']=14plt.rcParams['legend.fontsize']=12plt.rcParams['figure.figsize']=[15,9]data=pd.read_excel(r'C:\Users\2020\Desktop\python2.xls',encoding='utf-8')1.学历数据['education'].value_counts().plot(kind='bar',rot=0)2.工作经历数据['years'].value_counts().plot(kind='bar',rot=0,color='g')3.城市分析plt.rcParams['figure.figsize']=[15,15]data['city'].value_counts().plot(kind='pie',autopct='%1.2f%%',explode=np.linspace(0,1.5,18))4.公司处理分析(1)分词操作a=len(data['company好处'])str=''foriinrange(a):b=data['公司福利'][i]iftype(b)==float:b=''str=str+bjieba.add_word('五险一金')jieba.add_word('牛B')jieba.add_word('年底双薪')jieba.add_word('带薪年假')jieba.add_word('股票期权')jieba.add_word('定期体检')jieba.add_word('节日礼物')words=jieba.lcut(str)counts={}forwordinwords:counts[word]=counts.get(word,0)+1items=list(counts.items())items.sort(key=lambdax:x[1],reverse=True)withopen('词频统计',mode='w',encoding='utf-8')asf:foriinrange(20):word,count=items[i]f.writelines('{}\t{}\n'.format(word,count))(2)词云展示withopen('词频统计',mode='r',encoding='utf-8')asf:text=f.read()wc=WordCloud(font_path=r'C:\Users\2020\Desktop\simhei.ttf',background_color='white',width=1000,max_words=100,height=860,margin=2).generate(text)plt.imshow(wc)plt.axis('off')plt.show()5.全国工资水平分析data2=list(map(lambdax:(data['city'][x],eval(re.split('k|K',data['salary'][x])[0])*1000),range(len(data))))data3=pd.DataFrame(data2,index)data4=list(map(lambdax:(data3.groupby(0).mean()[1].index[x],data3.groupby(0).mean()[1].values[x]),range(len(data3.groupby(0)))))geo=Geo('全国python薪资布局','制作人:痛,title_color='#fff',title_pos='left',width=1200,height=600,background_color='#404a59')attr,value=geo.cast(data4)geo.add('',attr,value,type='heatmap',is_visualmap=True,maptype='china',visual_range=[0,300],visual_text_color='#fff')geo.render()