简介全民阅读时代已经来临。目前有2.1亿用户使用阅读软件,日活跃用户超过500万,其中19-35岁的年轻用户占比超过60%。本科及以上学历用户占比高达80%,北京、上海、广州、深圳等省会/直辖市用户占比超过80%。我习惯在微信上看书。为了方便整理书籍和导出笔记,我开发了这个小工具。部分截图代码思路1.目录结构首先来看一下整体目录结构代码├─excel_func.py读写excel文件├─pyqt_gui.pyPyQtGUI界面└─wereader.py微信阅读相关apiexcel_func.py的使用xlrd和xlwt库读写excel文件pyqt_gui.py使用PyQt绘制GUI界面wareader.py通过抓包分析得到相关api2。excel_func.pydefwrite_excel_xls(path,sheet_name_list,value):#新建一个工作簿workbook=xlwt.Workbook()#获取需要写入的行数index=len(value)forsheet_nameinsheet_name_list:#新建一个tableintheworkbooksheet=workbook.add_sheet(sheet_name)#向本工作簿中的表写入数据foriinrange(0,index):forjinrange(0,len(value[i])):sheet.write(i,j,value[i][j])#保存工作簿workbook.save(path)该函数的代码流程是:创建一个excel文件,创建一个表,向表中写入数据3.pyqt_gui.pyclassMainWindow(QMainWindow):def__init__(self,*args,**kwargs):super().__init__(*args,**kwargs)self.DomainCookies={}self.setWindowTitle('微信阅读助手')#设置窗口标题self.resize(900,600)#设置窗口大小self.setWindowFlags(Qt.WindowMinimizeButtonHint)#禁止最大化按钮self.setFixedSize(self.width(),self.height())#禁止调整窗口大小url='https://weread.qq.com/#login'#目标地址self.browser=QWebEngineView()#实例化浏览器对象QWebEngineProfile.defaultProfile().cookieStore().deleteAllCookies()#第一次运行软件时删除所有cookieQWebEngineProfile.defaultProfile().cookieStore().cookieAdded.connect(self.onCookieAdd)#cookie增加时触发self.onCookieAdd()函数self.browser.loadFinished.connect(self.onLoadFinished)#网页加载完成时触发self.onLoadFinished()函数self.browser.load(QUrl(url))#加载网页self.setCentralWidget(self.browser)#设置中央窗口该函数的代码流程是:新建一个QT窗口,实例化QWebEngineView对象,绑定self.onCookieAdd事件,绑定self.onLoadFinished事件,加载网页#网页加载完成eventdefonLoadFinished(self):globalUSER_VIDglobalHEADERS#获取cookiescookies=['{}={};'.format(key,value)forkey,valueinself.DomainCookies.items()]cookies=''.加入(饼干es)#header添加CookieHEADERS.update(Cookie=cookies)#判断是否登录微信阅读成功iflogin_success(HEADERS):print('登录微信阅读成功!')#获取用户user_vidif'wr_vid'inself.DomainCookies.keys():USER_VID=self.DomainCookies['wr_vid']print('Userid:{}'.format(USER_VID))#关闭整个qt窗口self.close()else:print('请扫描二维码登录微信阅读...')该功能的代码流程为:在网页加载时,检查您是否已成功登录微信阅读。微信阅读登录成功后,关闭QT窗口,开始数据导出。如果登录微信阅读失败,继续等待用户扫描二维码#添加cookies事件defonCookieAdd(self,cookie):if'weread.qq.com'incookie.domain():name=cookie.name().data().decode('utf-8')value=cookie.value().data().decode('utf-8')如果名称不在self.DomainCookies中:self.DomainCookies.update({name:value})该函数的代码流程为:保存微信阅读网址的cookie,以便后续操作books=get_bookshelf(USER_VID,HEADERS)#获取书架上的图书books_finish_read=books['finishReadBooks']books_recent_read=books['recentBooks']books_all=books['allBooks']write_excel_xls_append(data_dir+'mybookshelf.xls','已经读过的书',books_finish_read)#添加写入excel文件write_excel_xls_append(data_dir+'Mybookshelf.xls','Recentlyreadbooks',books_recent_read)#Appendtowritetoexcelfilewrite_excel_xls_append(data_dir+'Mybookshelf.xls','Allbooks',books_all)#Appendtowritetoexcelfile#获取笔记对于书架上用于索引的每本书,枚举(books_finish_read)中的书:book_id=book[0]book_name=book[1]notes=get_bookmarklist(book[0],HEADERS)withopen(note_dir+book_name+'.txt','w')asf:f.write(notes)print('导出笔记{}({}/{})'.format(note_dir+book_name+'.txt',index+1,len(books_finish_read)))该函数的代码流程是:调用write_excel_xls_append函数,保存书籍,导出笔记4.wereader.pydefget_bookshelf(userVid,headers):"""获取书架上的所有书籍"""url="https://i.weread.qq.com/shelf/friendCommon"params=dict(userVid=userVid)r=requests.get(url,params=params,headers=headers,verify=False)ifr.ok:data=r.json()else:raiseException(r.text)books_finish_read=set()#读完的书books_recent_read=set()#最近读的书books_all=set()#data['recentBooks']:ifnotbook['bookId'].isdigit():#Filter公众号continueb=Book(book['bookId'],book['所有书架上的书title'],book['author'],book['cover'],book['intro'],book['category'])books_recent_read.add(b)books_all=books_finish_read+books_recent_read返回字典(finishReadBooks=books_finish_read,recentBooks=books_recent_read,allBooks=books_all)这个函数的代码流程是:获取最近阅读的书籍,已经阅读的书籍,所有的书籍filter公众号以字典格式保存书籍数据defget_bookmarklist(bookId,headers):"""获取一本书的笔记,返回md文本"""url="https://i.weread.qq.com/book/bookmarklist"params=dict(bookId=bookId)r=requests.get(url,params=params,headers=headers,verify=False)ifr.ok:数据=r.json()#clipboard.copy(json.dumps(数据,indent=4,sort_keys=True))else:raiseException(r.text)chapters={c['chapterUid']:c['title']forcindata['chapters']}内容=defaultdict(list)foriteminsorted(data['updated'],key=lambdax:x['chapterUid']):#foritemindata['updated']:chapter=item['chapterUid']文本=项目['markText']create_time=item["createTime"]start=int(item['range'].split('-')[0])contents[chapter].append((start,text))chapters_map={title:级别对于级别,标题在get_chapters(int(bookId),headers)}res=''forcinsorted(chapters.keys()):title=chapters[c]res+='#'*chapters_map[title]+''+title+'\n'开始,文本在sorted(contents[c],key=lambdae:e[0]):res+='>'+text.strip()+'\n\n'res+='\n'returnres该函数的代码流程是:获取某本书的笔记,将返回的字符串改写成markdown格式并输出如何运行#跳转到当前目录cd目录名#卸载依赖库第一个pipuninstall-y-rrequirement.txt#然后重新安装依赖库pipinstall-rrequirement.txt-ihttps://pypi.tuna.tsinghua.edu.cn/simple#开始运行pythonpyqt_gui.py补充完整版源码存放在github上,如有需要请指教点此下载项目持续更新,欢迎star本项目License麻省理工学院License(麻省理工学院)
