当前位置: 首页 > 后端技术 > Python

遍历ES节点检查分词(qbit)

时间:2023-03-26 15:37:18 Python

前言技术栈Elasticsearch7.17.2python3.8httpx0.22.0loguru0.6.0haotokenizer:https://github.com/tenlee2012...有时更新EStokenizer或remotedictionary后,不确定每个节点是否更新了地方,我没有找到直接的命令来验证,所以我写了一个Python脚本来做验证。原理是利用index.routing.allocation.include._ip将index分配给特定节点的shard。代码创建test_{nodeName}索引,测试后手动删除DELETEtest_*代码#encoding:utf8#author:qbit#date:202-06-16#summary:遍历ES数据节点验证分词结果importpprintimporthttpxfromloguruimportloggercoordnode='http://192.168.2.67:9200'#ES协调节点地址esuser='elastic'#ES集群账号espwd='xxxx'#ES集群密码analyzer="hao_index_mode"#Tokenbreakerintext="Swallows知天鹅之志"#分词文本outtext="Swallow;An;Knowledge;HonghuZhizhi;Honghu"#分词结果defGetNodeList():r"""获取ES集群节点列表"""url=f'{coordnode}/_cat/nodes?v=true&h=name,ip,master,node.role&s=name&format=json'r=httpx.get(url,auth=(esuser,espwd))result=r.json()for滴cinresult:logger.debug(dic)returnresultdefCheckOneNodeAnalyzer(nodeDict:dict,expected:str):r"""在某个节点上创建索引并测试分词"""nodeName=nodeDict['name']nodeIP=nodeDict['ip']indexName=f"test_{nodeName}"url=f"{coordnode}/{indexName}"logger.info(f"{nodeName},{nodeIP},{indexName}")dic={“设置”:{“索引”:{“number_of_shards”:1,“number_of_replicas”:0,“routing.allocation.include._ip”:nodeIP}}}r=httpx.put(url,auth=(esuser,espwd),json=dic)#创建索引logger.debug(r)url=f"{coordnode}/{indexName}/_analyze"dic={"analyzer":analyzer,"text":intext}r=httpx.post(url,auth=(esuser,espwd),json=dic)#验证分词logger.debug(r)tokenList=list()fordicinr.json()['tokens']:#logger.debug(dic)tokenList.append(dic['token'])tokenLine=';'.join(tokenList)logger.info(tokenLine)如果tokenLine==预期:返回['ok',nodeName,nodeIP,tokenLine]否则:返回['no',nodeName,nodeIP,tokenLine]if__name__=='__main__':nodeList=GetNodeList()okList=list()noList=list()fornodeinnodeList:if'd'innode['node.role']:#数据节点result=CheckOneNodeAnalyzer(node,outtext)ifresult[0]=='ok':okList.append(result)else:noList.append(result)print('------')logger.info(f"okListsize:{len(okList)}")pprint.pprint(okList)logger.info(f"noListsize:{len(noList)}")pprint.pprint(noList)qbitsnap