elasticsearch-analysis-mmseg icon indicating copy to clipboard operation
elasticsearch-analysis-mmseg copied to clipboard

Unknown tokenizer type [mmseg] for [mmseg_maxword]

Open zhangwonderful opened this issue 8 years ago • 7 comments

服务器环境:elasticsearch5.5.1 分词插件版本:5.5.1 客户端程序环境:spring-boot 运行客户端程序报错:Caused by: java.lang.IllegalArgumentException: Unknown tokenizer type [mmseg] for [mmseg_maxword] at org.elasticsearch.index.analysis.AnalysisRegistry.getAnalysisProvider(AnalysisRegistry.java:387) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.analysis.AnalysisRegistry.buildMapping(AnalysisRegistry.java:338) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.analysis.AnalysisRegistry.buildTokenizerFactories(AnalysisRegistry.java:176) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.analysis.AnalysisRegistry.build(AnalysisRegistry.java:154) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.IndexService.(IndexService.java:145) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.IndexModule.newIndexService(IndexModule.java:363) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.indices.IndicesService.createIndexService(IndicesService.java:449) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.indices.IndicesService.createIndex(IndicesService.java:414) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.metadata.MetaDataCreateIndexService$1.execute(MetaDataCreateIndexService.java:366) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.ClusterStateUpdateTask.execute(ClusterStateUpdateTask.java:45) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.ClusterService.executeTasks(ClusterService.java:634) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.ClusterService.calculateTaskOutputs(ClusterService.java:612) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.ClusterService.runTasks(ClusterService.java:571) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.ClusterService$ClusterServiceTaskBatcher.run(ClusterService.java:263) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.TaskBatcher.runIfNotProcessed(TaskBatcher.java:150) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.TaskBatcher$BatchedTask.run(TaskBatcher.java:188) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:569) ~[elasticsearch-5.5.1.jar:5.5.1] 相关配置: { "analysis": {

"tokenizer":{
    
   "mmseg_maxword":{
      "type":"mmseg",
      "seg_type":"max_word"
   },
  "mmseg_complex":{
    "type":"mmseg",
    "seg_type":"complex"
  },
  "mmseg_simple":{
    "type":"mmseg",
    "seg_type":"simple"
  },
  "semicolon_spliter":{
    "type":"pattern",
    "seg_type":";"
  },
  "pct_spliter":{
    "type":"pattern",
    "seg_type":"[%]+"
  }
},

"filter": {

  "ngram_min_2":{
    "max_gram": 10,
    "min_gram": 2,
    "type": "nGram"
  },
  "ngram_min_1":{
    "max_gram": 10,
    "min_gram": 1,
    "type": "nGram"
  },
  "min2_length":{
    "min": 2,
    "max": 4,
    "type": "length"
  },
  "analyzer":{
    "default":{
      "type":"keyword"
    },
    "lowercase_keyword":{
       "type":"custom",
       "filter":"[standard,lowercase]",
       "tokenizer":"standard"
    },
    "lowercase_keyword_ngram_min_size1":{
      "type":"custom",
      "filter":"[ngram_min_1,standard,lowercase] ",
      "tokenizer":"nGram"
    },
    "lowercase_keyword_ngram_min_size2":{
      "type":"custom",
      "filter":"[ngram_min_2,standard,lowercase,min2_length,stop] ",
      "tokenizer":"nGram"
    },
    "lowercase_keyword_ngram":{
      "type":"custom",
      "filter":"[ngram_min_1,standard,lowercase] ",
      "tokenizer":"nGram"
    },
    "lowercase_keyword_without_standard":{
      "type":"custom",
      "filter":"[lowercase]",
      "tokenizer":"keyword"
    },
    "lowercase_whitespace":{
      "type":"custom",
      "filter":"[lowercase]",
      "tokenizer":"whitespace"
    },
  
    "mmseg":{
      "alias":"[mmseg_analyzer]",
      "type":"org.elasticsearch.index.analysis.MMsegAnalyzerProvider"
    },
    "comma_spliter":{
      "pattern":"[,|\\s]+",
      "type":"pattern"
    },
    "pct_spliter":{
      "pattern":"[%]+",
      "type":"pattern"
    },
    "custom_snowball_analyzer":{
      "language":"English",
      "type":"snowball"
    },
    "simple_english_analyzer":{
      "tokenizer":"whitespace",
      "filter":"[standard,lowercase,snowball]",
      "type":"custome"
    },
    "edge_ngram":{
      "tokenizer":"edgeNGram",
      "filter":"[lowercase]",
      "type":"custome"
    },
 
    "custom_auth_en_analyzer":{
      "tokenizer":"semicolon_spliter",
      "filter":"[standard,snowball,lowercase,trim]",
      "type":"custome"
    }
  }
}

} }

zhangwonderful avatar Aug 30 '17 08:08 zhangwonderful

请问大牛如何解决啊,我的spring boot程序启动就报错了。

zhangwonderful avatar Aug 30 '17 08:08 zhangwonderful

mmseg没有了,

The plugin ships with analyzers: mmseg_maxword ,mmseg_complex ,mmseg_simple and tokenizers: mmseg_maxword ,mmseg_complex ,mmseg_simple and token_filter: cut_letter_digit .
上面的配置:  "type":"custome" 要改成:
  "type":"custom"

medcl avatar Aug 30 '17 12:08 medcl

谢谢,我修改了,错误变了。 Caused by: java.lang.IllegalArgumentException: Unknown analyzer type [org.elasticsearch.index.analysis.MMsegAnalyzerProvider] for [mmseg] at org.elasticsearch.index.analysis.AnalysisRegistry.getAnalysisProvider(AnalysisRegistry.java:387) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.analysis.AnalysisRegistry.buildMapping(AnalysisRegistry.java:338) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.analysis.AnalysisRegistry.buildAnalyzerFactories(AnalysisRegistry.java:186) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.analysis.AnalysisRegistry.build(AnalysisRegistry.java:156) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.IndexService.(IndexService.java:145) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.IndexModule.newIndexService(IndexModule.java:363) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.indices.IndicesService.createIndexService(IndicesService.java:449) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.indices.IndicesService.createIndex(IndicesService.java:414) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.metadata.MetaDataCreateIndexService$1.execute(MetaDataCreateIndexService.java:366) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.ClusterStateUpdateTask.execute(ClusterStateUpdateTask.java:45) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.ClusterService.executeTasks(ClusterService.java:634) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.ClusterService.calculateTaskOutputs(ClusterService.java:612) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.ClusterService.runTasks(ClusterService.java:571) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.ClusterService$ClusterServiceTaskBatcher.run(ClusterService.java:263) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.TaskBatcher.runIfNotProcessed(TaskBatcher.java:150) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.TaskBatcher$BatchedTask.run(TaskBatcher.java:188) ~[elasticsearch-5.5.1.jar:5.5.1]

zhangwonderful avatar Aug 30 '17 14:08 zhangwonderful

针对ik中文分词的,也报错,谢谢大神再指点一下,报错如下: Caused by: java.lang.IllegalArgumentException: Unknown analyzer type [org.elasticsearch.index.analysis.IkAnalyzerProvider] for [ik] at org.elasticsearch.index.analysis.AnalysisRegistry.getAnalysisProvider(AnalysisRegistry.java:387) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.analysis.AnalysisRegistry.buildMapping(AnalysisRegistry.java:338) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.analysis.AnalysisRegistry.buildAnalyzerFactories(AnalysisRegistry.java:186) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.analysis.AnalysisRegistry.build(AnalysisRegistry.java:156) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.IndexService.(IndexService.java:145) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.index.IndexModule.newIndexService(IndexModule.java:363) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.indices.IndicesService.createIndexService(IndicesService.java:449) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.indices.IndicesService.createIndex(IndicesService.java:414) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.metadata.MetaDataCreateIndexService$1.execute(MetaDataCreateIndexService.java:366) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.ClusterStateUpdateTask.execute(ClusterStateUpdateTask.java:45) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.ClusterService.executeTasks(ClusterService.java:634) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.ClusterService.calculateTaskOutputs(ClusterService.java:612) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.ClusterService.runTasks(ClusterService.java:571) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.ClusterService$ClusterServiceTaskBatcher.run(ClusterService.java:263) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.TaskBatcher.runIfNotProcessed(TaskBatcher.java:150) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.cluster.service.TaskBatcher$BatchedTask.run(TaskBatcher.java:188) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:569) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:247) ~[elasticsearch-5.5.1.jar:5.5.1] at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:210) ~[elasticsearch-5.5.1.jar:5.5.1] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[na:1.8.0_111] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[na:1.8.0_111] at java.lang.Thread.run(Thread.java:745) ~[na:1.8.0_111]

我把analyzer中有关ik和mmseg的配置全部删除了,运行可以了,但是中文搜索不出来内容,字母数字组合的内容可以搜索出来。

zhangwonderful avatar Aug 31 '17 01:08 zhangwonderful

大神,在elasticsearch 5.5.1 中到底应该怎么配置settings和mappings才能保证ik和mmseg项目正常启动并实现中文分词搜索呢呢?我现在是中文搜索不了,其他可以,还望大神指点迷津,万分感谢。 settings.json配置如下: { "analysis": {

"tokenizer":{
    "my_pinyin":{
      "type":"pinyin",
      "first_letter":"prefix",
      "padding_char":""
    },
    "pinyin_first_letter":{
      "type":"pinyin",
      "first_letter":"only"
    },
   "mmseg_maxword":{
      "type":"mmseg_maxword",
      "seg_type":"max_word"
   },
  "mmseg_complex":{
    "type":"mmseg_complex",
    "seg_type":"complex"
  },
  "mmseg_simple":{
    "type":"mmseg_simple",
    "seg_type":"simple"
  },
  "semicolon_spliter":{
    "type":"pattern",
    "seg_type":";"
  },
  "pct_spliter":{
    "type":"pattern",
    "seg_type":"[%]+"
  }
},

"filter": {
  "ngram_min_2": {
    "max_gram": 10,
    "min_gram": 2,
    "type": "nGram"
  },
  "ngram_min_1": {
    "max_gram": 10,
    "min_gram": 1,
    "type": "nGram"
  },
  "min2_length": {
    "min": 2,
    "max": 4,
    "type": "length"
  },
  "custom_word_delimiter":{
    "type": "word_delimiter",
    "generate_word_parts": true,
    "generate_number_parts": true,
    "catenate_words": true,
    "catenate_numbers": false,
    "catenate_all": true,
    "split_on_case_change": true,
    "preserve_original": true,
    "split_on_numerics": false
  }
},
  "analyzer":{
    "default":{
      "type":"keyword",
      "tokenizer":"standard"
    },
    "lowercase_keyword":{
       "type":"custom",

       "tokenizer":"standard"
    },

    "lowercase_keyword_ngram":{
      "type":"custom",

      "tokenizer":"nGram"
    },
    "lowercase_keyword_without_standard":{
      "type":"custom",

      "tokenizer":"keyword"
    },
    "lowercase_whitespace":{
      "type":"custom",

      "tokenizer":"whitespace"
    },

    "comma_spliter":{
      "pattern":"[,|\\s]+",
      "type":"pattern",
      "tokenizer":"nGram"
    },
    "pct_spliter":{
      "pattern":"[%]+",
      "type":"pattern"
    },
    "custom_snowball_analyzer":{
      "language":"English",
      "type":"snowball",
      "tokenizer":"standard"
    },
    "simple_english_analyzer":{
      "tokenizer":"whitespace",

      "type":"custom"
    },
    "edge_ngram":{
      "tokenizer":"edgeNGram",

      "type":"custom"
    },
    "pinyin_ngram_analyzer":{
      "tokenizer":"my_pinyin",

      "type":"custom"
    },
    "pinyin_first_letter_analyzer":{
      "tokenizer":"pinyin_first_letter",

      "type":"custom"
    },
    "custom_auth_en_analyzer":{
      "tokenizer":"semicolon_spliter",

      "type":"custom"
    }
}

} }

mapping.json配置如下:

{ "_all": { "enabled": false }, "properties": { "id": { "type": "long" }, "vehicleMileage": { "type": "text", "analyzer": "mmseg_maxword", "search_analyzer": "mmseg_maxword" }, "customerName": { "type": "text", "analyzer": "mmseg_maxword", "search_analyzer": "mmseg_maxword" }, "planType": { "type": "text", "analyzer": "mmseg_maxword", "search_analyzer": "mmseg_maxword" }, "vin": { "type": "keyword" }, "saleNo": { "type": "keyword" }, "saleBy": { "type": "keyword" }, "organizationId": { "type": "keyword" }, "customerPhone": { "type": "keyword" }, "customerEmail": { "type": "keyword" }, "totalPrice": { "type": "keyword" }, "slipCreateBy": { "type": "keyword" }, "slipCreateDate": { "type": "keyword" }, "slipUpdateBy": { "type": "keyword" }, "slipUpdateDate": { "type": "keyword" }, "delFlag": { "type": "keyword" }, "slipId": { "type": "keyword" }, "slipPayState": { "type": "keyword" }, "vehicleId": { "type": "keyword" }, "vehicleCreateBy": { "type": "keyword" }, "vehicleCreateDate": { "type": "keyword" }, "vehicleUpdateBy": { "type": "keyword" }, "vehicleUpdateDate": { "type": "keyword" }, "vehicleRemarks": { "type": "keyword" }, "vehicleDelFlag": { "type": "keyword" }, "manufacturer": { "type": "keyword" }, "brand": { "type": "keyword" }, "comfuelConsumption": { "type": "keyword" }, "series": { "type": "keyword" }, "carType": { "type": "keyword" }, "carBody": { "type": "keyword" }, "vehicleName": { "type": "keyword" }, "carLevel": { "type": "keyword" }, "vehicleYear": { "type": "keyword" }, "vehiclePrice": { "type": "keyword" }, "manufacturDate": { "type": "keyword" }, "listingDate": { "type": "keyword" }, "transmissionType": { "type": "keyword" }, "gear": { "type": "keyword" }, "environmentalStandards": { "type": "keyword" }, "engine": { "type": "keyword" }, "gearbox": { "type": "keyword" }, "driceMode": { "type": "keyword" }, "frontTireSize": { "type": "keyword" }, "rearTireSize": { "type": "keyword" }, "engineType": { "type": "keyword" }, "resourceFrom": { "type": "keyword" } } }

zhangwonderful avatar Aug 31 '17 02:08 zhangwonderful

根本就不用配置,你这些都是多余的,这些 analyzer直接用就好了啊。

medcl avatar Sep 06 '17 03:09 medcl

好的,我调整配置再试试看,谢谢。

zhangwonderful avatar Sep 08 '17 04:09 zhangwonderful