当前位置: 首页 > 后端技术 > Python

使用python爬梨视频

时间:2023-03-26 16:53:10 Python

刚开始学习python,不要喷第一步去官网下载python3^版本下载链接https://www.python.org/downlo...如果是window系统,需要添加一些环境变量下面是pyhton爬虫pearvideo需要先下载的代码"Mozilla/5.0(WindowsNT10.0;Win64;x64;rv:63.0)Gecko/20100101Firefox/63.0"}#url="https://www.pearvideo.com/category_6"response=requests.get(url,headers=header)html=response.textreg=''bgImgReg=''titleReg='(.*?)'contentReg='(.*?)

'timeReg='(.*?)
'video_id=re.findall(reg,html)#videoidvideo_img=re.findall(bgImgReg,html)#videoimagevideo_title=re.findall(titleReg,html)#视频标题video_time=re.findall(timeReg,html)#视频时间video_content=re.findall(contentReg,html)#视频内容video_url=[]purl_1=[]videoImg=[]videoTitle=[]videoTime=[]videoContent=[]videoUrl=[]#@getVideoUrlforiinvideo_id:video_html="http://www.pearvideo.com/{}".format(i)video_url.append(str(video_html))#videoplaybackvideo_url中j的地址数组:purl=requests.get(j).textreq='srcUrl="(.*?)"'purl_1.append(re.findall(req,purl))#videoposterimagearrayforiinvideo_img:videoImg.append(i.split("(")[1].split(")")[0])#titlearrayforiinvideo_title:videoTitle.append(i)#videoplaybacktimearrayforiinvideo_time:videoTime.append(i)#视频内容数组foriinvideoio_content:videoContent.append(i)foriinpurl_1:videoUrl.append(''.join(i))#循环获取单个内容索引数组,elinenumerate(video_id):writeTxt=videoTitle[index]+'\n'+videoUrl[index]+'\n'+videoContent[index]+'\n'+videoImg[index]+'\n'+videoTime[index]+'\n\n\n\n'#print(videoUrl[index]+videoContent[index])f=open("test2.txt",'a+')f.write(writeTxt)f.close()//下面的注释是为了下载视频海报图片到视频文件夹#path="video"#判断当前目录下是否有视频文件#ifpathnotinos.listdir():#os.mkdir(path)#urlretrieve(purl_1[index],path+"/%s.mp4"%video_title[index])defdownload():n=0whileTrue:ifn>=36:return#https://www.pearvideo.com/popular_loading.jsp?reqType=5&categoryId=10&startthis梨视频异步请求接口url="https://www.pearvideo.com/popular_loading.jsp?reqType=5&categoryId=10&start={}".format(n)n+=12time.sleep(1)video_DL(url)下载()