当前位置: 首页 > 后端技术 > Python

Python爬取网站的m3u8视频,将ts解密成mp4,合并成整体视频

时间:2023-03-26 15:06:42 Python

刚刚才知道python爬取网站的m3u8视频。我在爬视频的时候遇到了一些坑。录制下来1.打开开发者工具,播放视频。发现只有.m3u8和.ts结尾的文件,于是下载m3u8文件,m3u8文件里面全是ts下载链接列表,还有加密密钥链接。下载密钥文件2.解密加密的ts视频需要安装AES库pipinstallpycryptodome。如果importromCrypto.CipherimportAES报错,可以将C:\Python36\Lib\site-packages\crypto改成C:\Python36\Lib\site-packages\Crypto3,pythoncode#-*-coding:utf-8-*-importosimportsysfromimportlibimportreloadimportrequestsimportdatetimefromCryptoimportRandomfromCrypto.CipherimportAES#根据m3u8文件下载地址获取m3u8文件内容,拦截ts下载链接defdownload(url):download_path=os.getcwd()+"\download"ifnotos.path.exists(download_path):os.mkdir(download_path)#新建日期文件夹download_path=os.path.join(download_path,datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))#创建os.mkdir(download_path)all_content=requests.get(url).text#获取M3U8文件内容#print('all_content:',all_content)if“#EXTM3U”不在all_content中:引发BaseException(“非M3U8链接”)urls=[]withopen(m3u8_path,"r")asfile:lines=file.readlines()forlineinlines:ifline.endswith(".ts\n"):#urls.append(base_url+line.strip("\n"))urls.append(line.strip("\n"))print("地址列表",urls)file_line=urlsprint(file_line)unknow=Truekey="26ebaf4ddfa89fad"forindex,lineinenumerate(file_line):#遍历ts链接列表#if"#EXT-X-KEY"inline:#寻找密码#method_pos=line.find("METHOD")#comma_pos=line.find(",")#method=line[method_pos:comma_pos].split('=')[1]#print#"DecodeMethod:",method##uri_pos=line.find("URI")#quotation_mark_pos=line.rfind('"')#key_path=line[uri_pos:quotation_mark_pos].split('"')[1]##key_url=url.rsplit("/",1)[0]+"/"+key_path#拼出密钥解密密钥URL#res=requests.get(key_url)#key=res.cointent#print#"key:",key#if"EXTINF"inline:#查找ts地址并下载unknown=False#pd_url=url.rsplit("/",1)[0]+"/"+file_line[index+1]#拼出ts段的URLpd_url=file_line[index]print("下载地址",pd_url)start=datetime.datetime.now().replace(microsecond=0)res=requests.get(pd_url)c_fule_name=file_line[index].rsplit("/")[-1]print('保存地址',download_path,'保存名称',c_fule_name)iflen(key):#AES解密end=datetime.datetime.now().replace(microsecond=0)print("耗时:%s"%(end-start))iv=Random.new().read(AES.block_size)cryptor=AES.new(key.encode('utf-8'),AES.MODE_CBC,iv)withopen(os.path.join(download_path,format(index)+".mp4"),'ab')asf:f.write(cryptor.decrypt(res.content))else:withopen(os.path.join(download_path,c_fule_name),'ab')asf:f.write(res.content)f.flush()ifunknown:raiseBaseException("Nocorrespondingdownloadlinkfound")else:print"Downloadcomplete"merge_file(download_path)#Mergetsfilesdefmerge_file(path):os。chdir(path)cmd="copy/b*new.tmp"os.system(cmd)os.system('del/Q*.ts')os.system('del/Q*.mp4')操作系统。rename("new.tmp","new.mp4")if__name__=='__main__':url="https://video.buycar5.cn/20200912/WPkXE88O/1000kb/hls/index.m3u8"download(url)4.对于没有AES加密的ts,直接下载ts合并即可。importdatetimeimportrequestsimportosimporttime#Python的urllib3包证书认证和警告关闭importurllib3urllib3.disable_warnings()#m3u8为本地文件路径m3u8_path="C:\\Users\\Administrator\\Desktop\\ReportDocuments\\index1.m3u8"#request请求头,如果没有禁止访问的可能header={"User-Agent":"Mozilla/5.0(WindowsNT10.0;Win64;x64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/89.0.4389.90Safari/537.36"}#从m3u8文件中提取并生成ts文件的下载链接defget_ts_urls(m3u8_path,base_url):urls=[]withopen(m3u8_path,"r")asfile:lines=file.readlines()forlineinlines:ifline.endswith(".ts\n"):#urls.append(base_url+line.strip("\n"))urls.append(line.strip("\n"))print("地址列表",urls)returnurls'''取出下载链接并下载ts_urlsts下载连接列表从文件download_path中获取保存地址ts文件合并命令:copy/bE:\pythonProject\tsfiles\*.tsD:\PySpider\new.mp4'''defdownload(ts_urls,download_path):foriinrange(len(ts_urls)):#ts_path=download_path+"/{0}.ts".format(i)ts_path="E:/pythonProject/tsfiles"+"/{0}.ts".format(i)print("存储地址:",ts_path)ts_url=ts_urls[i]file_name=ts_url.split("/")[-1]如果ts_path不是None:print("ts_url",ts_url)print("Startdownloading%s"%file_name)#time.sleep(0.5)#防止爬虫间隔时间过短被banstart=datetime.datetime.now().replace(microsecond=0)try:response=requeststs.get(headers=header,url=ts_url,stream=True)除了Exceptionase:print("Exceptionrequest:%s"%e.args)returnwithopen(ts_path,"wb+")asfile:forchunk在response.iter_content(chunk_size=1024):ifchunk:file.write(chunk)end=datetime.datetime.now().replace(microsecond=0)print("耗时:%s"%(end-start))else:print("{}已经存在,开始下载下一个ts".format(file_name))continue#对下载的ts文件的路径进行排序deffile_walker(path):file_list=[]forroot,dirs,filesinos.walk(path):#文件中fn的生成器:p=str(root+'/'+fn)file_list.append(p)file_list.sort(key=lambdax:int(x[10:-3]))print(file_list)returnfile_list#将所有下载的ts文件合并成一个文件#0.:一堆下载的ts文件的文件夹#combine_path:合并文件的存放位置#file_name:合并后的文件名视频文件defcombine(ts_path,combine_path,file_name):print(ts_path)file_list=file_walker(ts_path)file_path=combine_path+file_name+'.ts'打印(file_path)withopen(file_path,'wb+')asfw:foriinrange(len(file_list)):fw.write(open(file_list[i],'rb').read())if__name__=='__main__':#urls=get_ts_urls(m3u8_path=m3u8_path,base_url="https://www.zhuticlub.com:65")urls=get_ts_urls(m3u8_path=m3u8_path,base_url="https://www.zhuticlub.com:65")下载(urls,"./tsfiles")combine("./tsfiles/","D://PySpider//","西虹市首富")