elasticsearch-analysis-mmseg
elasticsearch-analysis-mmseg copied to clipboard
Unknown tokenizer type [mmseg] for [mmseg_maxword]
服务器环境:elasticsearch5.5.1
分词插件版本:5.5.1
客户端程序环境:spring-boot
运行客户端程序报错:Caused by: java.lang.IllegalArgumentException: Unknown tokenizer type [mmseg] for [mmseg_maxword]
at org.elasticsearch.index.analysis.AnalysisRegistry.getAnalysisProvider(AnalysisRegistry.java:387) ~[elasticsearch-5.5.1.jar:5.5.1]
at org.elasticsearch.index.analysis.AnalysisRegistry.buildMapping(AnalysisRegistry.java:338) ~[elasticsearch-5.5.1.jar:5.5.1]
at org.elasticsearch.index.analysis.AnalysisRegistry.buildTokenizerFactories(AnalysisRegistry.java:176) ~[elasticsearch-5.5.1.jar:5.5.1]
at org.elasticsearch.index.analysis.AnalysisRegistry.build(AnalysisRegistry.java:154) ~[elasticsearch-5.5.1.jar:5.5.1]
at org.elasticsearch.index.IndexService.
"tokenizer":{
"mmseg_maxword":{
"type":"mmseg",
"seg_type":"max_word"
},
"mmseg_complex":{
"type":"mmseg",
"seg_type":"complex"
},
"mmseg_simple":{
"type":"mmseg",
"seg_type":"simple"
},
"semicolon_spliter":{
"type":"pattern",
"seg_type":";"
},
"pct_spliter":{
"type":"pattern",
"seg_type":"[%]+"
}
},
"filter": {
"ngram_min_2":{
"max_gram": 10,
"min_gram": 2,
"type": "nGram"
},
"ngram_min_1":{
"max_gram": 10,
"min_gram": 1,
"type": "nGram"
},
"min2_length":{
"min": 2,
"max": 4,
"type": "length"
},
"analyzer":{
"default":{
"type":"keyword"
},
"lowercase_keyword":{
"type":"custom",
"filter":"[standard,lowercase]",
"tokenizer":"standard"
},
"lowercase_keyword_ngram_min_size1":{
"type":"custom",
"filter":"[ngram_min_1,standard,lowercase] ",
"tokenizer":"nGram"
},
"lowercase_keyword_ngram_min_size2":{
"type":"custom",
"filter":"[ngram_min_2,standard,lowercase,min2_length,stop] ",
"tokenizer":"nGram"
},
"lowercase_keyword_ngram":{
"type":"custom",
"filter":"[ngram_min_1,standard,lowercase] ",
"tokenizer":"nGram"
},
"lowercase_keyword_without_standard":{
"type":"custom",
"filter":"[lowercase]",
"tokenizer":"keyword"
},
"lowercase_whitespace":{
"type":"custom",
"filter":"[lowercase]",
"tokenizer":"whitespace"
},
"mmseg":{
"alias":"[mmseg_analyzer]",
"type":"org.elasticsearch.index.analysis.MMsegAnalyzerProvider"
},
"comma_spliter":{
"pattern":"[,|\\s]+",
"type":"pattern"
},
"pct_spliter":{
"pattern":"[%]+",
"type":"pattern"
},
"custom_snowball_analyzer":{
"language":"English",
"type":"snowball"
},
"simple_english_analyzer":{
"tokenizer":"whitespace",
"filter":"[standard,lowercase,snowball]",
"type":"custome"
},
"edge_ngram":{
"tokenizer":"edgeNGram",
"filter":"[lowercase]",
"type":"custome"
},
"custom_auth_en_analyzer":{
"tokenizer":"semicolon_spliter",
"filter":"[standard,snowball,lowercase,trim]",
"type":"custome"
}
}
}
} }
请问大牛如何解决啊,我的spring boot程序启动就报错了。
mmseg没有了,
The plugin ships with analyzers: mmseg_maxword ,mmseg_complex ,mmseg_simple and tokenizers: mmseg_maxword ,mmseg_complex ,mmseg_simple and token_filter: cut_letter_digit .
上面的配置: "type":"custome" 要改成:
"type":"custom"
谢谢,我修改了,错误变了。
Caused by: java.lang.IllegalArgumentException: Unknown analyzer type [org.elasticsearch.index.analysis.MMsegAnalyzerProvider] for [mmseg]
at org.elasticsearch.index.analysis.AnalysisRegistry.getAnalysisProvider(AnalysisRegistry.java:387) ~[elasticsearch-5.5.1.jar:5.5.1]
at org.elasticsearch.index.analysis.AnalysisRegistry.buildMapping(AnalysisRegistry.java:338) ~[elasticsearch-5.5.1.jar:5.5.1]
at org.elasticsearch.index.analysis.AnalysisRegistry.buildAnalyzerFactories(AnalysisRegistry.java:186) ~[elasticsearch-5.5.1.jar:5.5.1]
at org.elasticsearch.index.analysis.AnalysisRegistry.build(AnalysisRegistry.java:156) ~[elasticsearch-5.5.1.jar:5.5.1]
at org.elasticsearch.index.IndexService.
针对ik中文分词的,也报错,谢谢大神再指点一下,报错如下:
Caused by: java.lang.IllegalArgumentException: Unknown analyzer type [org.elasticsearch.index.analysis.IkAnalyzerProvider] for [ik]
at org.elasticsearch.index.analysis.AnalysisRegistry.getAnalysisProvider(AnalysisRegistry.java:387) ~[elasticsearch-5.5.1.jar:5.5.1]
at org.elasticsearch.index.analysis.AnalysisRegistry.buildMapping(AnalysisRegistry.java:338) ~[elasticsearch-5.5.1.jar:5.5.1]
at org.elasticsearch.index.analysis.AnalysisRegistry.buildAnalyzerFactories(AnalysisRegistry.java:186) ~[elasticsearch-5.5.1.jar:5.5.1]
at org.elasticsearch.index.analysis.AnalysisRegistry.build(AnalysisRegistry.java:156) ~[elasticsearch-5.5.1.jar:5.5.1]
at org.elasticsearch.index.IndexService.
我把analyzer中有关ik和mmseg的配置全部删除了,运行可以了,但是中文搜索不出来内容,字母数字组合的内容可以搜索出来。
大神,在elasticsearch 5.5.1 中到底应该怎么配置settings和mappings才能保证ik和mmseg项目正常启动并实现中文分词搜索呢呢?我现在是中文搜索不了,其他可以,还望大神指点迷津,万分感谢。 settings.json配置如下: { "analysis": {
"tokenizer":{
"my_pinyin":{
"type":"pinyin",
"first_letter":"prefix",
"padding_char":""
},
"pinyin_first_letter":{
"type":"pinyin",
"first_letter":"only"
},
"mmseg_maxword":{
"type":"mmseg_maxword",
"seg_type":"max_word"
},
"mmseg_complex":{
"type":"mmseg_complex",
"seg_type":"complex"
},
"mmseg_simple":{
"type":"mmseg_simple",
"seg_type":"simple"
},
"semicolon_spliter":{
"type":"pattern",
"seg_type":";"
},
"pct_spliter":{
"type":"pattern",
"seg_type":"[%]+"
}
},
"filter": {
"ngram_min_2": {
"max_gram": 10,
"min_gram": 2,
"type": "nGram"
},
"ngram_min_1": {
"max_gram": 10,
"min_gram": 1,
"type": "nGram"
},
"min2_length": {
"min": 2,
"max": 4,
"type": "length"
},
"custom_word_delimiter":{
"type": "word_delimiter",
"generate_word_parts": true,
"generate_number_parts": true,
"catenate_words": true,
"catenate_numbers": false,
"catenate_all": true,
"split_on_case_change": true,
"preserve_original": true,
"split_on_numerics": false
}
},
"analyzer":{
"default":{
"type":"keyword",
"tokenizer":"standard"
},
"lowercase_keyword":{
"type":"custom",
"tokenizer":"standard"
},
"lowercase_keyword_ngram":{
"type":"custom",
"tokenizer":"nGram"
},
"lowercase_keyword_without_standard":{
"type":"custom",
"tokenizer":"keyword"
},
"lowercase_whitespace":{
"type":"custom",
"tokenizer":"whitespace"
},
"comma_spliter":{
"pattern":"[,|\\s]+",
"type":"pattern",
"tokenizer":"nGram"
},
"pct_spliter":{
"pattern":"[%]+",
"type":"pattern"
},
"custom_snowball_analyzer":{
"language":"English",
"type":"snowball",
"tokenizer":"standard"
},
"simple_english_analyzer":{
"tokenizer":"whitespace",
"type":"custom"
},
"edge_ngram":{
"tokenizer":"edgeNGram",
"type":"custom"
},
"pinyin_ngram_analyzer":{
"tokenizer":"my_pinyin",
"type":"custom"
},
"pinyin_first_letter_analyzer":{
"tokenizer":"pinyin_first_letter",
"type":"custom"
},
"custom_auth_en_analyzer":{
"tokenizer":"semicolon_spliter",
"type":"custom"
}
}
} }
mapping.json配置如下:
{ "_all": { "enabled": false }, "properties": { "id": { "type": "long" }, "vehicleMileage": { "type": "text", "analyzer": "mmseg_maxword", "search_analyzer": "mmseg_maxword" }, "customerName": { "type": "text", "analyzer": "mmseg_maxword", "search_analyzer": "mmseg_maxword" }, "planType": { "type": "text", "analyzer": "mmseg_maxword", "search_analyzer": "mmseg_maxword" }, "vin": { "type": "keyword" }, "saleNo": { "type": "keyword" }, "saleBy": { "type": "keyword" }, "organizationId": { "type": "keyword" }, "customerPhone": { "type": "keyword" }, "customerEmail": { "type": "keyword" }, "totalPrice": { "type": "keyword" }, "slipCreateBy": { "type": "keyword" }, "slipCreateDate": { "type": "keyword" }, "slipUpdateBy": { "type": "keyword" }, "slipUpdateDate": { "type": "keyword" }, "delFlag": { "type": "keyword" }, "slipId": { "type": "keyword" }, "slipPayState": { "type": "keyword" }, "vehicleId": { "type": "keyword" }, "vehicleCreateBy": { "type": "keyword" }, "vehicleCreateDate": { "type": "keyword" }, "vehicleUpdateBy": { "type": "keyword" }, "vehicleUpdateDate": { "type": "keyword" }, "vehicleRemarks": { "type": "keyword" }, "vehicleDelFlag": { "type": "keyword" }, "manufacturer": { "type": "keyword" }, "brand": { "type": "keyword" }, "comfuelConsumption": { "type": "keyword" }, "series": { "type": "keyword" }, "carType": { "type": "keyword" }, "carBody": { "type": "keyword" }, "vehicleName": { "type": "keyword" }, "carLevel": { "type": "keyword" }, "vehicleYear": { "type": "keyword" }, "vehiclePrice": { "type": "keyword" }, "manufacturDate": { "type": "keyword" }, "listingDate": { "type": "keyword" }, "transmissionType": { "type": "keyword" }, "gear": { "type": "keyword" }, "environmentalStandards": { "type": "keyword" }, "engine": { "type": "keyword" }, "gearbox": { "type": "keyword" }, "driceMode": { "type": "keyword" }, "frontTireSize": { "type": "keyword" }, "rearTireSize": { "type": "keyword" }, "engineType": { "type": "keyword" }, "resourceFrom": { "type": "keyword" } } }
根本就不用配置,你这些都是多余的,这些 analyzer直接用就好了啊。
好的,我调整配置再试试看,谢谢。