Allow reloading of search time analyzers (elastic#43313)

Currently changing resources (like dictionaries, synonym files etc...) of search time analyzers is only possible by closing an index, changing the underlying resource (e.g. synonym files) and then re-opening the index for the change to take effect. This PR adds a new API endpoint that allows triggering reloading of certain analysis resources (currently token filters) that will then pick up changes in underlying file resources. To achieve this we introduce a new type of custom analyzer (ReloadableCustomAnalyzer) that uses a ReuseStrategy that allows swapping out analysis components. Custom analyzers that contain filters that are markes as "updateable" will automatically choose this implementation. This PR also adds this capability to `synonym` token filters for use in search time analyzers. Relates to elastic#29051
cbuescher · Jun 27, 2019 · 37049b1 · 37049b1
1 parent f39619d
commit 37049b1
Show file tree

Hide file tree

Showing 38 changed files with 1,454 additions and 120 deletions.
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/RestHighLevelClientTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/RestHighLevelClientTests.java
@@ -730,8 +730,8 @@ public void testApiNamingConventions() throws Exception {
             "indices.exists_type",
             "indices.get_upgrade",
             "indices.put_alias",
-            "scripts_painless_execute",
-            "render_search_template"
+            "render_search_template",
+            "scripts_painless_execute"
         };
         //These API are not required for high-level client feature completeness
         String[] notRequiredApi = new String[] {

diff --git a/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc
@@ -43,6 +43,8 @@ Additional settings are:
 * `expand` (defaults to `true`).
 * `lenient` (defaults to `false`). If `true` ignores exceptions while parsing the synonym configuration. It is important
 to note that only those synonym rules which cannot get parsed are ignored. For instance consider the following request:
+
+
 
 [source,js]
 --------------------------------------------------

diff --git a/docs/reference/indices/apis/reload-analyzers.asciidoc b/docs/reference/indices/apis/reload-analyzers.asciidoc
@@ -0,0 +1,72 @@
+[role="xpack"]
+[testenv="basic"]
+[[indices-reload-analyzers]]
+== Reload Search Analyzers
+
+experimental[]
+
+Reloads search analyzers and its resources.
+
+Synonym filters (both `synonym` and `synonym_graph`) can be declared as
+updateable if they are only used in <<search-analyzer,search analyzers>> 
+with the `updateable` flag:
+
+[source,js]
+--------------------------------------------------
+PUT /my_index
+{
+    "settings": {
+        "index" : {
+            "analysis" : {
+                "analyzer" : {
+                    "my_synonyms" : {
+                        "tokenizer" : "whitespace",
+                        "filter" : ["synonym"]
+                    }
+                },
+                "filter" : {
+                    "synonym" : {
+                        "type" : "synonym",
+                        "synonyms_path" : "analysis/synonym.txt",
+                        "updateable" : true <1>
+                    }
+                }
+            }
+        }
+    },
+    "mappings": {
+        "properties": {
+            "text": {
+                "type": "text",
+                "analyzer" : "standard",
+                "search_analyzer": "my_synonyms" <2>
+            }
+        }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+
+<1> Mark the synonym filter as updateable.
+<2> Synonym analyzer is usable as a search_analyzer.
+
+NOTE: Trying to use the above analyzer as an index analyzer will result in an error.
+
+Using the <<indices-reload-analyzers,analyzer reload API>>, you can trigger reloading of the
+synonym definition. The contents of the configured synonyms file will be reloaded and the
+synonyms definition the filter uses will be updated. 
+
+The `_reload_search_analyzers` API can be run on one or more indices and will trigger 
+reloading of the synonyms from the configured file.
+
+NOTE: Reloading will happen on every node the index has shards, so its important
+to update the synonym file contents on every data node (even the ones that don't currently
+hold shard copies; shards might be relocated there in the future) before calling
+reload to ensure the new state of the file is reflected everywhere in the cluster.
+
+[source,js]
+--------------------------------------------------
+POST /my_index/_reload_search_analyzers
+--------------------------------------------------
+// CONSOLE
+// TEST[s/^/PUT my_index\n/]
diff --git a/docs/reference/rest-api/index.asciidoc b/docs/reference/rest-api/index.asciidoc
@@ -15,6 +15,7 @@ not be included yet.
 * <<data-frame-apis,{dataframe-cap} APIs>>
 * <<graph-explore-api,Graph Explore API>>
 * <<freeze-index-api>>, <<unfreeze-index-api>>
+* <<indices-reload-analyzers,Reload Search Analyzers API>>
 * <<index-lifecycle-management-api,Index lifecycle management APIs>>
 * <<licensing-apis,Licensing APIs>>
 * <<ml-apis,Machine Learning APIs>>
@@ -38,4 +39,5 @@ include::{es-repo-dir}/rollup/rollup-api.asciidoc[]
 include::{xes-repo-dir}/rest-api/security.asciidoc[]
 include::{es-repo-dir}/indices/apis/unfreeze.asciidoc[]
 include::{xes-repo-dir}/rest-api/watcher.asciidoc[]
+include::{es-repo-dir}/indices/apis/reload-analyzers.asciidoc[]
 include::defs.asciidoc[]
diff --git a/...sis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java b/...sis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java
@@ -30,6 +30,7 @@
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 import org.elasticsearch.index.analysis.Analysis;
+import org.elasticsearch.index.analysis.AnalysisMode;
 import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.CustomAnalyzer;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
@@ -50,6 +51,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
     private final boolean lenient;
     protected final Settings settings;
     protected final Environment environment;
+    private final boolean updateable;
 
     SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
                                       String name, Settings settings) {
@@ -65,9 +67,15 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
         this.expand = settings.getAsBoolean("expand", true);
         this.lenient = settings.getAsBoolean("lenient", false);
         this.format = settings.get("format", "");
+        this.updateable = settings.getAsBoolean("updateable", false);
         this.environment = env;
     }
 
+    @Override
+    public AnalysisMode getAnalysisMode() {
+        return this.updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
+    }
+
     @Override
     public TokenStream create(TokenStream tokenStream) {
         throw new IllegalStateException("Call createPerAnalyzerSynonymFactory to specialize this factory for an analysis chain first");
@@ -98,6 +106,11 @@ public TokenFilterFactory getSynonymFilter() {
                 // which doesn't support stacked input tokens
                 return IDENTITY_FILTER;
             }
+
+            @Override
+            public AnalysisMode getAnalysisMode() {
+                return updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
+            }
         };
     }
 

diff --git a/.../src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/.../src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java
@@ -42,8 +42,9 @@
 import org.elasticsearch.index.IndexService;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.AnalysisRegistry;
+import org.elasticsearch.index.analysis.AnalyzerComponents;
+import org.elasticsearch.index.analysis.AnalyzerComponentsProvider;
 import org.elasticsearch.index.analysis.CharFilterFactory;
-import org.elasticsearch.index.analysis.CustomAnalyzer;
 import org.elasticsearch.index.analysis.NameOrDefinition;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
@@ -261,18 +262,23 @@ private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.R
             }
         }
 
-        CustomAnalyzer customAnalyzer = null;
-        if (analyzer instanceof CustomAnalyzer) {
-            customAnalyzer = (CustomAnalyzer) analyzer;
-        } else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
-            customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
+        // maybe unwrap analyzer from NamedAnalyzer
+        Analyzer potentialCustomAnalyzer = analyzer;
+        if (analyzer instanceof NamedAnalyzer) {
+            potentialCustomAnalyzer = ((NamedAnalyzer) analyzer).analyzer();
         }
 
-        if (customAnalyzer != null) {
-            // customAnalyzer = divide charfilter, tokenizer tokenfilters
-            CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
-            TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
-            TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
+        if (potentialCustomAnalyzer instanceof AnalyzerComponentsProvider) {
+            AnalyzerComponentsProvider customAnalyzer = (AnalyzerComponentsProvider) potentialCustomAnalyzer;
+            // note: this is not field-name dependent in our cases so we can leave out the argument
+            int positionIncrementGap = potentialCustomAnalyzer.getPositionIncrementGap("");
+            int offsetGap = potentialCustomAnalyzer.getOffsetGap("");
+            AnalyzerComponents components = customAnalyzer.getComponents();
+            // divide charfilter, tokenizer tokenfilters
+            CharFilterFactory[] charFilterFactories = components.getCharFilters();
+            TokenizerFactory tokenizerFactory = components.getTokenizerFactory();
+            TokenFilterFactory[] tokenFilterFactories = components.getTokenFilters();
+            String tokenizerName = components.getTokenizerName();
 
             String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
             TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ?
@@ -298,7 +304,7 @@ private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.R
                 // analyzing only tokenizer
                 Tokenizer tokenizer = tokenizerFactory.create();
                 tokenizer.setReader(reader);
-                tokenizerTokenListCreator.analyze(tokenizer, customAnalyzer, includeAttributes);
+                tokenizerTokenListCreator.analyze(tokenizer, includeAttributes, positionIncrementGap, offsetGap);
 
                 // analyzing each tokenfilter
                 if (tokenFilterFactories != null) {
@@ -308,7 +314,7 @@ private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.R
                         }
                         TokenStream stream = createStackedTokenStream(request.text()[textIndex],
                             charFilterFactories, tokenizerFactory, tokenFilterFactories, tokenFilterIndex + 1);
-                        tokenFiltersTokenListCreator[tokenFilterIndex].analyze(stream, customAnalyzer, includeAttributes);
+                        tokenFiltersTokenListCreator[tokenFilterIndex].analyze(stream, includeAttributes, positionIncrementGap, offsetGap);
                     }
                 }
             }
@@ -331,8 +337,8 @@ private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.R
                         tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens());
                 }
             }
-            detailResponse = new AnalyzeAction.DetailAnalyzeResponse(charFilteredLists, new AnalyzeAction.AnalyzeTokenList(
-                    customAnalyzer.getTokenizerName(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
+            detailResponse = new AnalyzeAction.DetailAnalyzeResponse(charFilteredLists,
+                    new AnalyzeAction.AnalyzeTokenList(tokenizerName, tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
         } else {
             String name;
             if (analyzer instanceof NamedAnalyzer) {
@@ -343,8 +349,8 @@ private static AnalyzeAction.DetailAnalyzeResponse detailAnalyze(AnalyzeAction.R
 
             TokenListCreator tokenListCreator = new TokenListCreator(maxTokenCount);
             for (String text : request.text()) {
-                tokenListCreator.analyze(analyzer.tokenStream("", text), analyzer,
-                    includeAttributes);
+                tokenListCreator.analyze(analyzer.tokenStream("", text), includeAttributes, analyzer.getPositionIncrementGap(""),
+                        analyzer.getOffsetGap(""));
             }
             detailResponse
                 = new AnalyzeAction.DetailAnalyzeResponse(new AnalyzeAction.AnalyzeTokenList(name, tokenListCreator.getArrayTokens()));
@@ -414,7 +420,7 @@ private static class TokenListCreator {
             tc = new TokenCounter(maxTokenCount);
         }
 
-        private void analyze(TokenStream stream, Analyzer analyzer, Set<String> includeAttributes) {
+        private void analyze(TokenStream stream, Set<String> includeAttributes, int positionIncrementGap, int offsetGap) {
             try {
                 stream.reset();
                 CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
@@ -437,8 +443,8 @@ private void analyze(TokenStream stream, Analyzer analyzer, Set<String> includeA
                 lastOffset += offset.endOffset();
                 lastPosition += posIncr.getPositionIncrement();
 
-                lastPosition += analyzer.getPositionIncrementGap("");
-                lastOffset += analyzer.getOffsetGap("");
+                lastPosition += positionIncrementGap;
+                lastOffset += offsetGap;
 
             } catch (IOException e) {
                 throw new ElasticsearchException("failed to analyze", e);

diff --git a/server/src/main/java/org/elasticsearch/client/IndicesAdminClient.java b/server/src/main/java/org/elasticsearch/client/IndicesAdminClient.java
@@ -818,4 +818,5 @@ public interface IndicesAdminClient extends ElasticsearchClient {
      * Swaps the index pointed to by an alias given all provided conditions are satisfied
      */
     void rolloverIndex(RolloverRequest request, ActionListener<RolloverResponse> listener);
+
 }
diff --git a/server/src/main/java/org/elasticsearch/client/Requests.java b/server/src/main/java/org/elasticsearch/client/Requests.java
@@ -534,5 +534,4 @@ public static DeleteSnapshotRequest deleteSnapshotRequest(String repository, Str
     public static SnapshotsStatusRequest snapshotsStatusRequest(String repository) {
         return new SnapshotsStatusRequest(repository);
     }
-
 }
diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java
@@ -527,7 +527,6 @@ public IndexAnalyzers build(IndexSettings indexSettings,
                                 Map<String, TokenizerFactory> tokenizerFactoryFactories,
                                 Map<String, CharFilterFactory> charFilterFactoryFactories,
                                 Map<String, TokenFilterFactory> tokenFilterFactoryFactories) {
-
         Map<String, NamedAnalyzer> analyzers = new HashMap<>();
         Map<String, NamedAnalyzer> normalizers = new HashMap<>();
         Map<String, NamedAnalyzer> whitespaceNormalizers = new HashMap<>();
@@ -569,9 +568,11 @@ public IndexAnalyzers build(IndexSettings indexSettings,
         return new IndexAnalyzers(analyzers, normalizers, whitespaceNormalizers);
     }
 
-    private static NamedAnalyzer produceAnalyzer(String name, AnalyzerProvider<?> analyzerFactory,
-            Map<String, TokenFilterFactory> tokenFilters, Map<String, CharFilterFactory> charFilters,
-            Map<String, TokenizerFactory> tokenizers) {
+    private static NamedAnalyzer produceAnalyzer(String name,
+                                        AnalyzerProvider<?> analyzerFactory,
+                                        Map<String, TokenFilterFactory> tokenFilters,
+                                        Map<String, CharFilterFactory> charFilters,
+                                        Map<String, TokenizerFactory> tokenizers) {
         /*
          * Lucene defaults positionIncrementGap to 0 in all analyzers but
          * Elasticsearch defaults them to 0 only before version 2.0
-Original file line number
+Diff line change
@@ Expand Up @@
          * Swaps the index pointed to by an alias given all provided conditions are satisfied
          */
         void rolloverIndex(RolloverRequest request, ActionListener<RolloverResponse> listener);
     }