analysis-pinyin
analysis-pinyin copied to clipboard
pinyin filter会出现空字符串
ES版本 5.1.1
目的
不同之处
、不同
、butongzhichu
、butong
都能命中文档
配置
"filter": {
"first_letter_pinyin": {
"type": "pinyin",
"keep_full_pinyin": "false"
},
"full_pinyin_no_space": {
"keep_joined_full_pinyin": "true",
"keep_first_letter": "false",
"padding_char": "",
"type": "pinyin",
"keep_full_pinyin": "false"
},
"local_synonym": {
"type": "synonym",
"synonyms_path": "synonym.txt",
"interval": "86400"
}
},
"analyzer": {
"global_analyzer": {
"filter": "local_synonym",
"tokenizer": "ik_max_word"
},
"pinyin_full_analyzer": {
"filter": [
"local_synonym",
"full_pinyin_no_space"
],
"tokenizer": "ik_max_word"
}
}
结果
# 请求
GET index_name/_analyze
{
"analyzer": "pinyin_full_analyzer",
"text": "butong"
}
# 结果
{
"tokens": [
{
"token": "bu",
"start_offset": 0,
"end_offset": 6,
"type": "ENGLISH",
"position": 0
},
{
"token": "tong",
"start_offset": 0,
"end_offset": 6,
"type": "ENGLISH",
"position": 1
},
{
"token": "", # **出现空字符**
"start_offset": 0,
"end_offset": 6,
"type": "ENGLISH",
"position": 2
}
]
}
是不是你的同义词的filter引起的呢,我本地拿掉你的同义词,测试是没有空的情况的,最新的代码里面,我也加上了一个处理,忽略空的 term。可以试试看。
DELETE pboos*
PUT pboos-map-adress-1
{
"settings": {
"number_of_shards": 6,
"index.refresh_interval": "5s",
"analysis": {
"filter": {
"first_letter_pinyin": {
"type": "pinyin",
"keep_full_pinyin": "false"
},
"full_pinyin_no_space": {
"keep_joined_full_pinyin": "true",
"keep_first_letter": "false",
"padding_char": "",
"type": "pinyin",
"keep_full_pinyin": "false"
}
},
"analyzer": {
"global_analyzer": {
"tokenizer": "ik_max_word"
},
"pinyin_full_analyzer": {
"filter": [
"full_pinyin_no_space"
],
"tokenizer": "ik_max_word"
}
}
}
}
}
GET pboos-map-adress-1/_analyze
{
"text": ["butong"],
"analyzer": "pinyin_full_analyzer"
}
{
"tokens": [
{
"token": "bu",
"start_offset": 0,
"end_offset": 6,
"type": "ENGLISH",
"position": 0
},
{
"token": "tong",
"start_offset": 0,
"end_offset": 6,
"type": "ENGLISH",
"position": 1
}
]
}