From a874f47dd8e66b7204cd35e3623f5e34b588f38d Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Wed, 14 Feb 2024 08:46:08 -0500 Subject: [PATCH] Include better output in profiling & toString for automaton based queries (#105468) We have various automaton based queries that build particular automatons based on their usage. However, the input text isn't part of the `toString` output, nor the usage of the current query (wildcard, prefix,etc.). This commit adds a couple of simple queries to wrap some of our logic to make profiling and other output more readable. Here is an example without this change: ``` #(-(winlog.event_data.TargetUserName:AutomatonQuery { org.apache.lucene.util.automaton.Automaton@2d13c057} winlog.event_data.TargetUserName:AutomatonQuery { org.apache.lucene.util.automaton.Automaton@28daf002} winlog.event_data.TargetUserName:AutomatonQuery { org.apache.lucene.util.automaton.Automaton@43c3d7f8} winlog.event_data.TargetUserName:AutomatonQuery { org.apache.lucene.util.automaton.Automaton@2f52905} winlog.event_data.TargetUserName:AutomatonQuery { org.apache.lucene.util.automaton.Automaton@31d75074}) ``` We have 5 case-insensitive automatons, but we don't know which is which in the profiling output. All we know is the originating field. I don't think we can update `AutomatonQuery` directly as sometimes the automaton created mutates the term (prefix for example) and we lose that we are searching for a prefix. --- docs/changelog/105468.yaml | 5 +++ .../lucene/search/AutomatonQueries.java | 7 ++-- .../search/CaseInsensitivePrefixQuery.java | 34 ++++++++++++++++++ .../search/CaseInsensitiveTermQuery.java | 32 +++++++++++++++++ .../search/CaseInsensitiveWildcardQuery.java | 36 +++++++++++++++++++ .../index/mapper/StringFieldType.java | 25 ++++--------- 6 files changed, 116 insertions(+), 23 deletions(-) create mode 100644 docs/changelog/105468.yaml create mode 100644 server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitivePrefixQuery.java create mode 100644 server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitiveTermQuery.java create mode 100644 server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitiveWildcardQuery.java diff --git a/docs/changelog/105468.yaml b/docs/changelog/105468.yaml new file mode 100644 index 0000000000000..0de36a71862a4 --- /dev/null +++ b/docs/changelog/105468.yaml @@ -0,0 +1,5 @@ +pr: 105468 +summary: Include better output in profiling & `toString` for automaton based queries +area: Search +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/AutomatonQueries.java b/server/src/main/java/org/elasticsearch/common/lucene/search/AutomatonQueries.java index 2a9059a046421..d6463fb28f6cf 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/search/AutomatonQueries.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/AutomatonQueries.java @@ -44,18 +44,17 @@ public static Automaton caseInsensitivePrefix(String s) { /** Build an automaton query accepting all terms with the specified prefix, ASCII case insensitive. */ public static AutomatonQuery caseInsensitivePrefixQuery(Term prefix) { - return new AutomatonQuery(prefix, caseInsensitivePrefix(prefix.text())); + return new CaseInsensitivePrefixQuery(prefix); } /** Build an automaton accepting all terms ASCII case insensitive. */ public static AutomatonQuery caseInsensitiveTermQuery(Term term) { - BytesRef prefix = term.bytes(); - return new AutomatonQuery(term, toCaseInsensitiveString(prefix)); + return new CaseInsensitiveTermQuery(term); } /** Build an automaton matching a wildcard pattern, ASCII case insensitive. */ public static AutomatonQuery caseInsensitiveWildcardQuery(Term wildcardquery) { - return new AutomatonQuery(wildcardquery, toCaseInsensitiveWildcardAutomaton(wildcardquery)); + return new CaseInsensitiveWildcardQuery(wildcardquery); } /** String equality with support for wildcards */ diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitivePrefixQuery.java b/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitivePrefixQuery.java new file mode 100644 index 0000000000000..e83edaf1d9e22 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitivePrefixQuery.java @@ -0,0 +1,34 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.lucene.search; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.AutomatonQuery; +import org.apache.lucene.search.MultiTermQuery; + +import static org.elasticsearch.common.lucene.search.AutomatonQueries.caseInsensitivePrefix; + +public class CaseInsensitivePrefixQuery extends AutomatonQuery { + public CaseInsensitivePrefixQuery(Term term) { + super(term, caseInsensitivePrefix(term.text())); + } + + public CaseInsensitivePrefixQuery(Term term, int determinizeWorkLimit, boolean isBinary) { + super(term, caseInsensitivePrefix(term.text()), determinizeWorkLimit, isBinary); + } + + public CaseInsensitivePrefixQuery(Term term, int determinizeWorkLimit, boolean isBinary, MultiTermQuery.RewriteMethod rewriteMethod) { + super(term, caseInsensitivePrefix(term.text()), determinizeWorkLimit, isBinary, rewriteMethod); + } + + @Override + public String toString(String field) { + return this.getClass().getSimpleName() + "{" + field + ":" + term.text() + "}"; + } +} diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitiveTermQuery.java b/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitiveTermQuery.java new file mode 100644 index 0000000000000..639cd365a7fe6 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitiveTermQuery.java @@ -0,0 +1,32 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.lucene.search; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.AutomatonQuery; + +import static org.elasticsearch.common.lucene.search.AutomatonQueries.toCaseInsensitiveString; + +/** + * A case insensitive term query. + */ +public class CaseInsensitiveTermQuery extends AutomatonQuery { + /** + * Constructs a case insensitive term query. + * @param term the term to search for, created into a case insensitive automaton + */ + public CaseInsensitiveTermQuery(Term term) { + super(term, toCaseInsensitiveString(term.bytes())); + } + + @Override + public String toString(String field) { + return this.getClass().getSimpleName() + "{" + field + ":" + term.text() + "}"; + } +} diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitiveWildcardQuery.java b/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitiveWildcardQuery.java new file mode 100644 index 0000000000000..9480ce19e6c87 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitiveWildcardQuery.java @@ -0,0 +1,36 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.lucene.search; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.AutomatonQuery; + +import static org.elasticsearch.common.lucene.search.AutomatonQueries.toCaseInsensitiveWildcardAutomaton; + +/** + * A case insensitive wildcard query. + */ +public class CaseInsensitiveWildcardQuery extends AutomatonQuery { + /** + * Constructs a case insensitive wildcard query. + * @param term the term to search for, created into a case insensitive wildcard automaton + */ + public CaseInsensitiveWildcardQuery(Term term) { + super(term, toCaseInsensitiveWildcardAutomaton(term)); + } + + public CaseInsensitiveWildcardQuery(Term term, int determinizeWorkLimit, boolean isBinary, RewriteMethod rewriteMethod) { + super(term, toCaseInsensitiveWildcardAutomaton(term), determinizeWorkLimit, isBinary, rewriteMethod); + } + + @Override + public String toString(String field) { + return this.getClass().getSimpleName() + "{" + field + ":" + term.text() + "}"; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java index 6f68c2f67bdcd..778c733c745ac 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java @@ -10,7 +10,6 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.Term; -import org.apache.lucene.search.AutomatonQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PrefixQuery; @@ -23,6 +22,8 @@ import org.apache.lucene.util.automaton.Operations; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.lucene.BytesRefs; +import org.elasticsearch.common.lucene.search.CaseInsensitivePrefixQuery; +import org.elasticsearch.common.lucene.search.CaseInsensitiveWildcardQuery; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.query.SearchExecutionContext; @@ -31,8 +32,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import static org.elasticsearch.common.lucene.search.AutomatonQueries.caseInsensitivePrefix; -import static org.elasticsearch.common.lucene.search.AutomatonQueries.toCaseInsensitiveWildcardAutomaton; import static org.elasticsearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES; /** Base class for {@link MappedFieldType} implementations that use the same @@ -102,14 +101,8 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool Term prefix = new Term(name(), indexedValueForSearch(value)); if (caseInsensitive) { return method == null - ? new AutomatonQuery(prefix, caseInsensitivePrefix(prefix.text()), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, false) - : new AutomatonQuery( - prefix, - caseInsensitivePrefix(prefix.text()), - Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, - false, - method - ); + ? new CaseInsensitivePrefixQuery(prefix, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, false) + : new CaseInsensitivePrefixQuery(prefix, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, false, method); } return method == null ? new PrefixQuery(prefix) : new PrefixQuery(prefix, method); } @@ -177,14 +170,8 @@ protected Query wildcardQuery( } if (caseInsensitive) { return method == null - ? new AutomatonQuery(term, toCaseInsensitiveWildcardAutomaton(term)) - : new AutomatonQuery( - term, - toCaseInsensitiveWildcardAutomaton(term), - Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, - false, - method - ); + ? new CaseInsensitiveWildcardQuery(term) + : new CaseInsensitiveWildcardQuery(term, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, false, method); } return method == null ? new WildcardQuery(term) : new WildcardQuery(term, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, method); }