analysis-pinyin
analysis-pinyin copied to clipboard
Elasticsearch6.4.1版本使用elasticsearch-analysis-pinyin6.4.1为什么不高亮?
例如:
PUT /medcl/
{
"index" : {
"analysis" : {
"analyzer" : {
"pinyin_analyzer" : {
"tokenizer" : "my_pinyin"
}
},
"tokenizer" : {
"my_pinyin" : {
"type" : "pinyin",
"keep_separate_first_letter" : false,
"keep_full_pinyin" : true,
"keep_original" : true,
"limit_first_letter_length" : 16,
"lowercase" : true,
"remove_duplicated_term" : true
}
}
}
}
}
GET /medcl/_analyze
{
"text": ["刘德华"],
"analyzer": "pinyin_analyzer"
}
POST /medcl/folks/_mapping
{
"folks": {
"properties": {
"name": {
"type": "keyword",
"fields": {
"pinyin": {
"type": "text",
"store": false,
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
}
}
}
}
POST /medcl/folks/andy
{"name":"刘德华"}
POST /medcl/folks/sefw
{"name":"三地发"}
POST /medcl/folks/333
{"name":"左发会"}
使用拼音查询有结果但是不高亮
GET /medcl/_search
{
"query" : {
"match": { "name.pinyin": "ldh" }
},
"highlight" : {
"order": "score",
"fields": {
"name": {
"matched_fields": ["name", "name.pinyin"]
}
}
}
}
结果:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.3439677,
"hits": [
{
"_index": "medcl",
"_type": "folks",
"_id": "andy",
"_score": 0.3439677,
"_source": {
"name": "刘德华"
}
}
]
}
}
没有出现高亮 但是使用以下查询:
GET /medcl/_search
{
"query" : {
"match": { "name": "刘德华" }
},
"highlight" : {
"order": "score",
"fields": {
"name": {
"matched_fields": ["name", "name.pinyin"]
}
}
}
}
就出现高亮
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.2876821,
"hits": [
{
"_index": "medcl",
"_type": "folks",
"_id": "andy",
"_score": 0.2876821,
"_source": {
"name": "刘德华"
},
"highlight": {
"name": [
"<em>刘德华</em>"
]
}
}
]
}
}
@medcl
@medcl
我也是同样的问题 @medcl 请问这个问题怎么解啊?
最近没空看啊,晚点排查
如果设置"ignore_pinyin_offset": false,写入在一定量数据之后就会报startOffset must be non-negative异常,似乎是代码问题。
目前解决这个问题,可以采用ngram+pinyin filter方式,如下:
{ "settings" : { "analysis" : { "analyzer" : { "pinyin_analyzer" : { "tokenizer" : "my_ngram", "filter": [ "pinyin_filter" ] } }, "tokenizer" : { "my_ngram" : { "type" : "ngram", "min_gram" : 1, "max_gram" : 50, "token_chars" : [ "letter", "digit", "punctuation", "symbol" ] } }, "filter":{ "pinyin_filter":{ "type":"pinyin", "keep_full_pinyin":false, "keep_joined_full_pinyin":true, "keep_none_chinese_in_joined_full_pinyin":true, "none_chinese_pinyin_tokenize":false, "remove_duplicated_term":true } } } }, "mappings": { "abia321": { "properties": { "name": { "type": "text", "analyzer": "pinyin_analyzer", "search_analyzer": "standard", "term_vector": "with_positions_offsets" } } } } }