使用Python搭建语音合成系统_0

时间：2023-03-14 19:25:27 科技观察

文大家好，我是Python人工智能技术出身的我一直对语音合成系统很感兴趣，一直想为自己合成一些内容，比如合成小说，下载我下载的电子书广播给我等等。语音合成系统其实就是一个基于语音合成的工具，但是由于很多厂商都提供了这个东西的API，大大降低了开发的难度。只需调用几个API，即可实现属于自己的语音合成工具；麻雀虽小，五脏俱全。从更大的规模来看，这是一个小型的语音合成系统。准备工作首先我们需要在自己的电脑上安装AnacondaPython3.7visualstudio代码步骤。这里我们选择讯飞开放平台的WebAPI接口。https://www.xfyun.cn/doc/tts/online_tts/API.html首先我们去控制台创建一个应用。创建好后，点击应用进入，有应用的详细一栏。点击左侧的语音合成，然后进入右上角的下一级在线语音合成（流媒体版），我们需要得到3样东西：APPIDAPISecretAPIKey代码实现然后代码实现，首先安装我们需要的两个库。pipinstallwebsocket-clientpipinstallplaysound接下来我们定义一个play类，里面包含4个函数类play：def__init__(self):#initializationfunctiondefplay_sound(self):#playaudiofunctiondefselect_vcn(self,*arg):#选择下拉框设置speakerdefxfyun_tts(self):#这里要进行语音合成，需要填写刚刚从讯飞开放平台控制台获取的appid、appkey和appsecretdef__init__(self):self.APP_ID='xxx'#请填写你自己的appidself.API_KEY='xxx'#请填写你自己的appkeyself.SECRET_KEY='xxx'#请填写你自己的appsecretself.root=tk.Tk()#初始化窗口self.root.title("语音合成系统")#窗口名称self.root.geometry("600x550")#设置窗口大小self.root.resizable(0,0)#self.root.resizable(width=True,height=True)#设置窗口是否可变，宽度不可变，高度可变，默认为Trueself.lb=tk.Label(self.root,text='请选择voicespeaker')#labelself.tt=tk.Text(self.root,width=77,height=30)#多行文本框self.cb=ttk.Combobox(self.root,width=12)#Drop-downlistbox#设置下拉列表框的内容self.cb['values']=("甜美女声-小燕","善良男声-久久","知性女声-小萍","可爱童声-徐小宝","善良女声-小静")self.cb.current(0)#设置当前选择状态为0，即第一项self.cb.bind("<>",self.select_vcn)self.tk_tts_file=tk.Label(self.root,text='生成文件名')self.b1=tk.Button(self.root,text='语音合成',width=10,height=1,command=self.xfyun_tts)#Buttonself.tk_play=tk.Button(self.root,text='play',width=10,height=1,command=self.play_sound)#Button#各个组件的位置self.tk_tts_file.place(x=30,y=500)self.b1.place(x=300,y=500)self.tk_play.place(x=400,y=500)self.lb.place(x=30,y=30)self.cb.place(x=154,y=30)self.tt.place(x=30,y=60)self.root.mainloop()选中时打开下拉列表，设置对应的speakerdefselect_vcn(self,*arg):ifself.cb.get()=='甜美声音-小燕':self.vcn="小燕"elifself.cb。get()=='问候男声-好久':self.vcn="aisjiuxu"elifself.cb.get()=='知性女声-小萍':self.vcn="aisxping"elifself.cb.get()=='可爱童声-徐小宝':self.vcn="aisbabyxu"elifself.cb.get()=='善良女声-小静':self.vcn="aisjinger"print(self.vcn)接下来我们来魔场科大讯飞自带Pythondemo使用起来更方便。另外，搜索公众号程序员小乐，后台回复“赚钱”，即可获得惊喜大礼包。#-*-coding:utf-8-*-##author:iflytek##本demo测试运行环境为：Windows+Python3.7#本demo测试运行成功时安装的第三方库及其版本为如下：#cffi==1.12.3#gevent==1.4.0#greenlet==0.4.15#pycparser==2.19#six==1.12.0#websocket==0.2.1#websocket-client==0.56.0#合成小语种需要传输小语种文本，使用小语种speakervcn,tte=unicode,修改文本编码方式#错误码链接：https://www.xfyun.cn/document/error-code（代码返回错误码必看）#############################################################importwebsocketimportdatetimeimporthashlibimportbase64importhmaciportjsonfromurllib.parseimporturlencodeimporttimeimportsslfromwsgiref.handlersimportformat_date_timefromdatetimeimportdatetimefrom时间导入FImktimeimportRAM_threadasimportRSTA_framefirstSTATUS_CONTINUE_FRAME=1#中间帧的标识STATUS_LAST_FRAME=2#最后一帧的标识PCM_PATH="./demo.pcm"classWs_Param(object):#初始化def__init__(self):passdefset_tts_params(self,text,vcn):iftext!="":self.Text=textifvcn!="":self.vcn=vcn#业务参数（business），更多个性化参数可以在官网查看自我.BusinessArgs={“bgs”：1，“aue”：“原始”，“auf”：“音频/L16;rate=16000”，“vcn”：self.vcn，“tte”：“utf8”}#useMinorlanguages必须使用如下方式，其中unicode指的是utf16little-endian编码方式，即"UTF-16LE""#self.Data={"status":2,"text":str(base64.b64encode(self.Text.encode('utf-16')),"UTF8")}self.Data={"status":2,"text":str(base64.b64encode(self.Text.encode('utf-8')),"UTF8")}defset_params(self,appid,apiSecret,apiKey):ifappid!="":self.APPID=appid#公共参数(common)self.CommonArgs={"app_id":self.APPID}ifapiKey!="":self.APIKey=apiKeyifapiSecret!="":self.APISecret=apiSecret#生成urldefcreate_url(self):url='wss://tts-api.xfyun.cn/v2/tts'#生成RFC1123格式的时间戳now=datetime.now()date=format_date_time(mktime(now.timetuple()))#连接字符串signature_origin="host:"+"ws-api.xfyun.cn"+"\n"signature_origin+="date:"+date+"\n"signature_origin+="GET"+"/v2/tts"+"HTTP/1.1"#使用hmac-sha256进行加密signature_sha=hmac.new(self.APISecret.encode('utf-8'),signature_origin.encode('utf-8'),digestmod=hashlib.sha256).digest()signature_sha=base64.b64encode(signature_sha).decode(encoding='utf-8')authorization_origin="api_key=\"%s\",algorithm=\"%s\",headers=\"%s\",signature=\"%s\""%(self.APIKey,"hmac-sha256","hostdaterequest-line",signature_sha)authorization=base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')#将请求的认证参数组合成字典v={"authorization":authorization,"date":日期,"host":"ws-api.xfyun.cn"}网址=网址+'?'+urlencode(v)返回urldefon_message(ws,message):try:#print(message)try:message=json.loads(message)exceptExceptionase:print("111",e)code=message["code"]sid=message["sid"]audio=message["data"]["audio"]audio=base64.b64decode(audio)status=message["data"]["status"]print(code,sid,status)ifstatus==2:print("wsisclosed")ws.close()ifcode!=0:errMsg=message["message"]print("sid:%scallerror:%scodeis:%s"%(sid,errMsg,code))else:withopen(PCM_PATH,'ab')asf:f.write(audio)除了Exceptionase:print("receivemsg,butparseexception:",e)#接收websocket错误的处理defon_error(ws,error):print("###error:",error)#接收websocketclose的处理defon_close(ws):print("###closed###")#接收到websocket连接建立的处理defon_open(ws):defrun(*args):d={"common":wsParam.CommonArgs,"business":wsParam.BusinessArgs,"data":wsParam.Data,}d=json.dumps(d)print("------>开始发送文档")ws.send(d)ifos.path.exists(PCM_PATH):os.remove(PCM_PATH)thread.start_new_thread(run,())deftext2pcm(appid,apiSecret,apiKey,text,vcn,fname):wsParam.set_params(appid,apiSecret,apiKey)wsParam.set_tts_params(文本,vcn)websocket.enableTrace(False)wsUrl=wsParam.create_url()ws=websocket.WebSocketApp(wsUrl,on_message=on_message,on_error=on_error，on_close=on_close）ws.on_open=on_openws.run_forever（sslopt={“cert_reqs”：ssl.CERT_NONE}）pcm2wav（PCM_PATH，fname）defpcm2wav（fname，dstname）：withopen（fname，'rb'）作为pcmfile:pcmdata=pcmfile.read()print(len(pcmdata))withwave.open(dstname,"wb")作为wavfile:wavfile.setparams((1,2,16000,0,'NONE','NONE'))波形文件.writeframes(pcmdata)wsParam=Ws_Param()这样就最终实现了一个语音合成系统。目前，各种云计算和云服务发展迅速，各大公司提供丰富的资源，大大降低了人工智能开发的门槛。你需要了解语音合成的原理，才能快速开发出语音合成工具！

上一篇：大数据和深度学习如何让你免于在下班路上卡住一段时间？

下一篇：中兴通讯已获得了国内运营商的第一个400G商用城域网订单

使用Python搭建语音合成系统_0相关文章