diff --git a/src/main/java/org/elasticsearch/indices/analysis/smartcn/SmartChineseIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/smartcn/SmartChineseIndicesAnalysis.java new file mode 100644 index 0000000000000..de339ffe3bf71 --- /dev/null +++ b/src/main/java/org/elasticsearch/indices/analysis/smartcn/SmartChineseIndicesAnalysis.java @@ -0,0 +1,75 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.indices.analysis.smartcn; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.cn.smart.SentenceTokenizer; +import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; +import org.apache.lucene.analysis.cn.smart.WordTokenFilter; +import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.analysis.*; +import org.elasticsearch.indices.analysis.IndicesAnalysisService; + +import java.io.Reader; + +/** + * Registers indices level analysis components so, if not explicitly configured, will be shared + * among all indices. + */ +public class SmartChineseIndicesAnalysis extends AbstractComponent { + + @Inject + public SmartChineseIndicesAnalysis(Settings settings, IndicesAnalysisService indicesAnalysisService) { + super(settings); + + // Register smartcn analyzer + indicesAnalysisService.analyzerProviderFactories().put("smartcn", new PreBuiltAnalyzerProviderFactory("smartcn", AnalyzerScope.INDICES, new SmartChineseAnalyzer(Lucene.ANALYZER_VERSION))); + + // Register smartcn_word token filter + indicesAnalysisService.tokenFilterFactories().put("smartcn_word", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { + @Override public String name() { + return "smartcn_word"; + } + + @Override public TokenStream create(TokenStream tokenStream) { + return new WordTokenFilter(tokenStream); + } + })); + + // Register smartcn_sentence tokenizer + indicesAnalysisService.tokenizerFactories().put("smartcn_sentence", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { + @Override + public String name() { + return "smartcn_sentence"; + } + + @Override + public Tokenizer create(Reader reader) { + return new SentenceTokenizer(reader); + } + })); + + + } +} diff --git a/src/main/java/org/elasticsearch/indices/analysis/smartcn/SmartChineseIndicesAnalysisModule.java b/src/main/java/org/elasticsearch/indices/analysis/smartcn/SmartChineseIndicesAnalysisModule.java new file mode 100644 index 0000000000000..e02ae80dfb51f --- /dev/null +++ b/src/main/java/org/elasticsearch/indices/analysis/smartcn/SmartChineseIndicesAnalysisModule.java @@ -0,0 +1,32 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.indices.analysis.smartcn; + +import org.elasticsearch.common.inject.AbstractModule; + +/** + */ +public class SmartChineseIndicesAnalysisModule extends AbstractModule { + + @Override + protected void configure() { + bind(SmartChineseIndicesAnalysis.class).asEagerSingleton(); + } +} diff --git a/src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java index 3502e8df0498f..6e39e194b56cc 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java @@ -19,10 +19,15 @@ package org.elasticsearch.plugin.analysis.smartcn; +import org.elasticsearch.common.collect.ImmutableList; +import org.elasticsearch.common.inject.Module; import org.elasticsearch.index.analysis.AnalysisModule; import org.elasticsearch.index.analysis.SmartChineseAnalysisBinderProcessor; +import org.elasticsearch.indices.analysis.smartcn.SmartChineseIndicesAnalysisModule; import org.elasticsearch.plugins.AbstractPlugin; +import java.util.Collection; + /** * */ @@ -38,6 +43,11 @@ public String description() { return "Smart Chinese analysis support"; } + @Override + public Collection> modules() { + return ImmutableList.>of(SmartChineseIndicesAnalysisModule.class); + } + public void onModule(AnalysisModule module) { module.addProcessor(new SmartChineseAnalysisBinderProcessor()); } diff --git a/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseIntegrationTests.java b/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseIntegrationTests.java new file mode 100644 index 0000000000000..cca291d9e4d05 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseIntegrationTests.java @@ -0,0 +1,62 @@ +/* + * Licensed to Elasticsearch (the "Author") under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Author licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; +import org.elasticsearch.test.ElasticsearchIntegrationTest; +import org.junit.Test; + +import java.util.concurrent.ExecutionException; + +import static org.hamcrest.CoreMatchers.*; + +@ElasticsearchIntegrationTest.ClusterScope(numNodes = 1, scope = ElasticsearchIntegrationTest.Scope.SUITE) +public class SimpleSmartChineseIntegrationTests extends ElasticsearchIntegrationTest { + + @Test + public void testSmartcnAnalyzer() throws ExecutionException, InterruptedException { + AnalyzeResponse response = client().admin().indices() + .prepareAnalyze("叻出色").setAnalyzer("smartcn") + .execute().get(); + + assertThat(response, notNullValue()); + assertThat(response.getTokens().size(), is(2)); + } + + @Test + public void testSmartcnTokenizer() throws ExecutionException, InterruptedException { + AnalyzeResponse response = client().admin().indices() + .prepareAnalyze("叻出色").setTokenizer("smartcn_sentence") + .execute().get(); + + assertThat(response, notNullValue()); + assertThat(response.getTokens().size(), is(1)); + } + + @Test + public void testSmartcnTokenFilter() throws ExecutionException, InterruptedException { + AnalyzeResponse response = client().admin().indices() + .prepareAnalyze("叻出色").setTokenFilters("smartcn_word") + .execute().get(); + + assertThat(response, notNullValue()); + assertThat(response.getTokens().size(), is(3)); + } +}