当前位置: 首页 > 科技观察

手把手教你用Flask搭建ES搜索引擎(实战)

时间:2023-03-16 18:23:04 科技观察

现在正式进入正题:开始使用Flask搭建ES搜索。1配置文件Config.py#coding:utf-8importosDB_USERNAME='root'DB_PASSWORD=None#如果没有密码DB_HOST='127.0.0.1'DB_PORT='3306'DB_NAME='flask_es'classConfig:SECRET_KEY="随机字符"#RandomSECRET_KEYSQLALCHEMY_COMMIT_ON_TEARDOWN=True#AutomaticcommitSQLALCHEMY_TRACK_MODIFICATIONS=True#AutomaticsqlDEBUG=True#调试模式SQLALCHEMY_DATABASE_URI='mysql+pymysql://%s:%s@%s:%s/%s'%(DB_USERNAME,DB_PASSWORD,DB_HOST,DB_PORT,DB_NAME)#DatabaseURLMAIL_SERVER='smtp.qq.com'MAIL_POST=465MAIL_USERNAME='3417947630@qq.com'MAIL_PASSWORD='邮箱授权码'FLASK_MAIL_SUBJECT_PREFIX='M_KEPLER'FLASK_MAIL_SENDER=USMAIL_USERNAME默认为相对asender_USERNAME#this简单的烧瓶配置文件。当然,连接数据库对于当前项目来说并不是必须的。我只是用Mysql做辅助。小伙伴们不用配置连接数据库,ES就够了。然后邮件通知就看个人需要了......2LogLogger.py日志模块是工程应用中不可缺少的部分,根据不同的生产环境输出日志文件是非常有必要的。用江湖的话来说:“没有日志文件,你就不知道怎么死了....”#coding=utf-8importosimportloggingimportlogging.configaslog_confimportdatetimeimportcoloredlogcoloredlogs.DEFAULT_FIELD_STYLES={'asctime':{'color':'green'},'hostname':{'color':'magenta'},'levelname':{'color':'magenta','bold':False},'name':{'color':'green'}}log_dir=os.path.dirname(os.path.dirname(__file__))+'/logs'ifnotos.path.exists(log_dir):os.mkdir(log_dir)today=datetime.datetime.now().strftime("%Y-%m-%d")log_path=os.path.join(log_dir,today+".log")log_config={'version':1.0,#formatoutput'formatters':{'colored_console':{'format':"%(asctime)s-%(name)s-%(levelname)s-%(message)s",'datefmt':'%H:%M:%S'},'detail':{'format':'%(asctime)s-%(name)s-%(levelname)s-%(message)s','datefmt':"%Y-%m-%d%H:%M:%S"#timeformat},},'handlers':{'console':{'class':'logging.StreamHandler','level':'DEBUG','formatter':'colored_console'},'file':{'类':'logging.handlers.RotatingFileHandler','maxBytes':1024*1024*1024,'backupCount':1,'filename':log_path,'level':'INFO','formatter':'detail',#'encoding':'utf-8',#utf8编码预防发生编码错误},},'loggers':{'logger':{'handlers':['console'],'level':'DEBUG',},}}log_conf.dictConfig(log_config)log_v=logging。getLogger('log')coloredlogs.install(level='DEBUG',logger=log_v)##Someexamples.#logger.debug("thisisadebuggingmessage")#logger.info("thisisaninformationalmessage")#logger.warning("thisisawarningmessage")#logger.error("thisisanerrormessage")这里是一个我经常使用的日志配置文件,可以作为常用的日志格式,可以直接调用,根据不同级别输出到终端或者.log文件.谢3路由对于Flask项目来说,蓝图和路由会让整个项目变得更加有趣(当然是指代码的阅读)。这里我用两个分支作为数据支撑,一个是Math入口,一个是Baike入口。数据来源基于上一篇文章获取的百度百科爬虫。按照深度优先的爬取方式进行爬取,放入ES中。#coding:utf8fromflaskimportFlaskfromflask_sqlalchemyimportSQLAlchemyfromapp.config.configimportConfigfromflask_mailimportMailfromflask_wtf.csrfimportCSRFProtectapp=Flask(__name__,template_folder='templates',static_folder='static')app.config.from_object(Config)db=SQLAlchemy(app)db.init_app=(appCS)csrftect应用程序(app)mail=Mail(app)#生成db前不导入注册蓝图。fromapp.home.baikeimportbaikeasbaike_blueprintfromapp.home.mathimportmathasmath_blueprintfromapp.home.homeimporthomeashome_blueprintapp.register_blueprint(home_blueprint)app.register_blueprint(math_blueprint,url_prefix="/math")app.register_blueprint(baike_blueprint,url_prefix="/baike")#-*-编码:utf-8-*-fromflaskimportBlueprintbaike=Blueprint("baike",__name__)fromapp.home.baikeimportviews#-*-coding:utf-8-*-fromflaskimportBlueprintmath=Blueprint("math",__name__)fromapp.home.mathimportviews声明路径由并在__init__文件中初始化下面来看看路由的实现(以Baike为例)#-*-coding:utf-8-*-importosfromflask_paginateimportPagination,get_page_parameterfromapp.Logger.loggerimportlog_vfromapp.elasticsearchClassimportelasticSearchfromapp.home.formsimportSearchFormfromapp.home.baikeimportbaikefromflaskimportrequest,jsonify,render_template,redirectbaike_es=elasticSearch(index_type="baike_data",index_name="baike")@baike.route("/")defindex():searchForm=SearchForm()returnrender_template('baike/index.html',searchForm=searchForm)@baike.route("/search",methods=['GET','POST'])defbaikeSearch():search_key=request.args.get("b",default=None)ifsearch_key:searchForm=SearchForm()log_v.error("[+]SearchKeyword:"+search_key)match_data=baike_es.search(search_key,count=30)#翻页PER_PAGE=10page=request.args.get(get_page_parameter(),type=int,default=1)start=(page-1)*PER_PAGEend=start+PER_PAGEtotal=30print("最大数据总量:",total)pagination=Pagination(page=page,start=start,end=end,total=total)context={'match_data':match_data["hits"]["hits"][start:end],'pagination':pagination,'uid_link':"/baike/"}returnrender_template('data.html',q=search_key,searchForm=searchForm,**context)returnredirect('home.index')@baike.route('/')defbaikeSd(uid):base_path=os.path.abspath('app/templates/s_d/')old_file=os.listdir(base_path)[0]old_path=os.path.join(base_path,old_file)file_path=os.path.abspath('app/templates/s_d/{}.html'.format(uid))ifnotos.path.exists(file_path):log_v.debug("[-]Filedoesnotexist,重命名!!!")os.rename(old_path,file_path)match_data=baike_es.id_get_doc(uid=uid)returnrender_template('s_d/{}.html'.format(uid),match_data=match_data)可以看到我们已经成功初始化了elasticSearch类并进行了一次数据搜索,我们使用Flask分页插件进行了分页和限制单页数,根据Uid跳转到详情页。细心的朋友会发现我这里用了个小技巧@baike.route('/')defbaikeSd(uid):base_path=os.path.abspath('app/templates/s_d/')old_file=os.listdir(base_path)[0]old_path=os.path.join(base_path,old_file)file_path=os.path.abspath('app/templates/s_d/{}.html'.format(uid))ifnotos.path。存在(文件路径):log_v.debug(“[-]文件不存在,重命名!!!”)format(uid),match_data=match_data)保证详情页模板中始终只保留一个html文件。4项目启动一如既往的使用flask_script作为项目启动方案,确实方便。#coding:utf8fromappimportappfromflask_scriptimportManager,Servermanage=Manager(app)#启动命令manage.add_command("runserver",Server(use_debugger=True))if__name__=="__main__":manage.run()黑窗输入pythonmanage.pyrunserver启动Project,默认端口5000,访问http://127.0.0.1:5000使用gunicorn启动gunicorn-cgconfig.pymanage:app#encoding:utf-8importmultiprocessingfromgeventimportmonkeymonkey.patch_all()#并行工作进程数workers=multiprocessing.cpu_count()*2+1debug=Truereload=True#自动重新加载loglevel='debug'#指定每个工作线程的线程数=2#转发到监听端口8000bind='0.0.0.0:5001'#设置daemon进程并移交进程Supervisor管理daemon='false'#工作模式协程worker_class='gevent'#设置最大并发worker_connections=2000#设置进程文件目录pidfile='log/gunicorn.pid'logfile='log/debug.log'#设置访问日志和报错信息日志路径accesslog='log/gunicorn_acess.log'errorlog='log/gunicorn_error.log'项目截图项目Github地址https://github.com/GZKY-PY/Flask-ES