analysis-pinyin icon indicating copy to clipboard operation
analysis-pinyin copied to clipboard

pinyin filter会出现空字符串

Open Morriaty-The-Murderer opened this issue 6 years ago • 2 comments

ES版本 5.1.1

目的 不同之处不同butongzhichubutong都能命中文档

配置


          "filter": {
            "first_letter_pinyin": {
              "type": "pinyin",
              "keep_full_pinyin": "false"
            },
            "full_pinyin_no_space": {
              "keep_joined_full_pinyin": "true",
              "keep_first_letter": "false",
              "padding_char": "",
              "type": "pinyin",
              "keep_full_pinyin": "false"
            },
            "local_synonym": {
              "type": "synonym",
              "synonyms_path": "synonym.txt",
              "interval": "86400"
            }
          },
          "analyzer": {
            "global_analyzer": {
              "filter": "local_synonym",
              "tokenizer": "ik_max_word"
            },
            "pinyin_full_analyzer": {
              "filter": [
                "local_synonym",
                "full_pinyin_no_space"
              ],
              "tokenizer": "ik_max_word"
            }
          }
        

结果

# 请求
GET index_name/_analyze
{
  "analyzer": "pinyin_full_analyzer", 
  "text": "butong"
}

# 结果
{
  "tokens": [
    {
      "token": "bu",
      "start_offset": 0,
      "end_offset": 6,
      "type": "ENGLISH",
      "position": 0
    },
    {
      "token": "tong",
      "start_offset": 0,
      "end_offset": 6,
      "type": "ENGLISH",
      "position": 1
    },
    {
      "token": "",  #  **出现空字符**
      "start_offset": 0,
      "end_offset": 6,
      "type": "ENGLISH",
      "position": 2
    }
  ]
}

Morriaty-The-Murderer avatar Sep 21 '17 07:09 Morriaty-The-Murderer

是不是你的同义词的filter引起的呢,我本地拿掉你的同义词,测试是没有空的情况的,最新的代码里面,我也加上了一个处理,忽略空的 term。可以试试看。

medcl avatar Dec 01 '17 06:12 medcl

   
DELETE pboos*    
PUT pboos-map-adress-1  
{
   "settings": {
    "number_of_shards": 6,
    "index.refresh_interval": "5s",
    "analysis": {
     "filter": {
            "first_letter_pinyin": {
              "type": "pinyin",
              "keep_full_pinyin": "false"
            },
            "full_pinyin_no_space": {
              "keep_joined_full_pinyin": "true",
              "keep_first_letter": "false",
              "padding_char": "",
              "type": "pinyin",
              "keep_full_pinyin": "false"
            }
          },
          "analyzer": {
            "global_analyzer": {
              "tokenizer": "ik_max_word"
            },
            "pinyin_full_analyzer": {
              "filter": [
                "full_pinyin_no_space"
              ],
              "tokenizer": "ik_max_word"
            }
          }
        
    }
  }
}
GET pboos-map-adress-1/_analyze
{
  "text": ["butong"],
  "analyzer": "pinyin_full_analyzer"
}

{
  "tokens": [
    {
      "token": "bu",
      "start_offset": 0,
      "end_offset": 6,
      "type": "ENGLISH",
      "position": 0
    },
    {
      "token": "tong",
      "start_offset": 0,
      "end_offset": 6,
      "type": "ENGLISH",
      "position": 1
    }
  ]
}

medcl avatar Dec 01 '17 06:12 medcl