Skip to content

Commit

Permalink
Multi term intervals: increase max_expansions (#112826)
Browse files Browse the repository at this point in the history
Currently multi term interval queries (prefix, wildcard, fuzzy, regexp and range) can
expand maximum to 128 terms. To reach parity with span queries that we want to deprecate,
set max expansions to indices.query.bool.max_clause_count which is used in span queries.

Relates to #110491
  • Loading branch information
mayya-sharipova authored Sep 19, 2024
1 parent 9eec2c4 commit 7150729
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 49 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/112826.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 112826
summary: "Multi term intervals: increase max_expansions"
area: Search
type: enhancement
issues:
- 110491
21 changes: 13 additions & 8 deletions docs/reference/query-dsl/intervals-query.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,9 @@ unstemmed ones.
==== `prefix` rule parameters

The `prefix` rule matches terms that start with a specified set of characters.
This prefix can expand to match at most 128 terms. If the prefix matches more
than 128 terms, {es} returns an error. You can use the
This prefix can expand to match at most `indices.query.bool.max_clause_count`
<<search-settings,search setting>> terms. If the prefix matches more terms,
{es} returns an error. You can use the
<<index-prefixes,`index-prefixes`>> option in the field mapping to avoid this
limit.

Expand All @@ -151,7 +152,8 @@ separate `analyzer` is specified.
==== `wildcard` rule parameters

The `wildcard` rule matches terms using a wildcard pattern. This pattern can
expand to match at most 128 terms. If the pattern matches more than 128 terms,
expand to match at most `indices.query.bool.max_clause_count`
<<search-settings,search setting>> terms. If the pattern matches more terms,
{es} returns an error.

`pattern`::
Expand Down Expand Up @@ -184,8 +186,9 @@ The `pattern` is normalized using the search analyzer from this field, unless
==== `regexp` rule parameters

The `regexp` rule matches terms using a regular expression pattern.
This pattern can expand to match at most 128 terms.
If the pattern matches more than 128 terms,{es} returns an error.
This pattern can expand to match at most `indices.query.bool.max_clause_count`
<<search-settings,search setting>> terms.
If the pattern matches more terms,{es} returns an error.

`pattern`::
(Required, string) Regexp pattern used to find matching terms.
Expand Down Expand Up @@ -215,7 +218,8 @@ The `pattern` is normalized using the search analyzer from this field, unless

The `fuzzy` rule matches terms that are similar to the provided term, within an
edit distance defined by <<fuzziness>>. If the fuzzy expansion matches more than
128 terms, {es} returns an error.
`indices.query.bool.max_clause_count`
<<search-settings,search setting>> terms, {es} returns an error.

`term`::
(Required, string) The term to match
Expand Down Expand Up @@ -250,8 +254,9 @@ The `term` is normalized using the search analyzer from this field, unless
==== `range` rule parameters

The `range` rule matches terms contained within a provided range.
This range can expand to match at most 128 terms.
If the range matches more than 128 terms,{es} returns an error.
This range can expand to match at most `indices.query.bool.max_clause_count`
<<search-settings,search setting>> terms.
If the range matches more terms,{es} returns an error.

`gt`::
(Optional, string) Greater than: match terms greater than the provided term.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
Expand Down Expand Up @@ -270,7 +271,11 @@ public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext conte

@Override
public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext context) {
return toIntervalsSource(Intervals.prefix(term), new PrefixQuery(new Term(name(), term)), context);
return toIntervalsSource(
Intervals.prefix(term, IndexSearcher.getMaxClauseCount()),
new PrefixQuery(new Term(name(), term)),
context
);
}

@Override
Expand All @@ -285,18 +290,18 @@ public IntervalsSource fuzzyIntervals(
new Term(name(), term),
maxDistance,
prefixLength,
128,
IndexSearcher.getMaxClauseCount(),
transpositions,
MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE
);
IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), term);
IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), IndexSearcher.getMaxClauseCount(), term);
return toIntervalsSource(fuzzyIntervals, fuzzyQuery, context);
}

@Override
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
return toIntervalsSource(
Intervals.wildcard(pattern),
Intervals.wildcard(pattern, IndexSearcher.getMaxClauseCount()),
new MatchAllDocsQuery(), // wildcard queries can be expensive, what should the approximation be?
context
);
Expand All @@ -305,7 +310,7 @@ public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContex
@Override
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
return toIntervalsSource(
Intervals.regexp(pattern),
Intervals.regexp(pattern, IndexSearcher.getMaxClauseCount()),
new MatchAllDocsQuery(), // regexp queries can be expensive, what should the approximation be?
context
);
Expand All @@ -320,7 +325,7 @@ public IntervalsSource rangeIntervals(
SearchExecutionContext context
) {
return toIntervalsSource(
Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper),
Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper, IndexSearcher.getMaxClauseCount()),
new MatchAllDocsQuery(), // range queries can be expensive, what should the approximation be?
context
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
Expand Down Expand Up @@ -152,30 +153,56 @@ public void testPhrasePrefixQuery() throws IOException {
assertNotEquals(new MatchAllDocsQuery(), SourceConfirmedTextQuery.approximate(delegate));
}

public void testTermIntervals() throws IOException {
public void testTermIntervals() {
MappedFieldType ft = new MatchOnlyTextFieldType("field");
IntervalsSource termIntervals = ft.termIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertThat(termIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(Intervals.term(new BytesRef("foo")), ((SourceIntervalsSource) termIntervals).getIntervalsSource());
}

public void testPrefixIntervals() throws IOException {
public void testPrefixIntervals() {
MappedFieldType ft = new MatchOnlyTextFieldType("field");
IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertThat(prefixIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(Intervals.prefix(new BytesRef("foo")), ((SourceIntervalsSource) prefixIntervals).getIntervalsSource());
assertEquals(
Intervals.prefix(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()),
((SourceIntervalsSource) prefixIntervals).getIntervalsSource()
);
}

public void testWildcardIntervals() throws IOException {
public void testWildcardIntervals() {
MappedFieldType ft = new MatchOnlyTextFieldType("field");
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertThat(wildcardIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(Intervals.wildcard(new BytesRef("foo")), ((SourceIntervalsSource) wildcardIntervals).getIntervalsSource());
assertEquals(
Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()),
((SourceIntervalsSource) wildcardIntervals).getIntervalsSource()
);
}

public void testRegexpIntervals() {
MappedFieldType ft = new MatchOnlyTextFieldType("field");
IntervalsSource regexpIntervals = ft.regexpIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertThat(regexpIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(
Intervals.regexp(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()),
((SourceIntervalsSource) regexpIntervals).getIntervalsSource()
);
}

public void testFuzzyIntervals() throws IOException {
public void testFuzzyIntervals() {
MappedFieldType ft = new MatchOnlyTextFieldType("field");
IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT);
assertThat(fuzzyIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
}

public void testRangeIntervals() {
MappedFieldType ft = new MatchOnlyTextFieldType("field");
IntervalsSource rangeIntervals = ft.rangeIntervals(new BytesRef("foo"), new BytesRef("foo1"), true, true, MOCK_CONTEXT);
assertThat(rangeIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(
Intervals.range(new BytesRef("foo"), new BytesRef("foo1"), true, true, IndexSearcher.getMaxClauseCount()),
((SourceIntervalsSource) rangeIntervals).getIntervalsSource()
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
Expand Down Expand Up @@ -620,7 +621,10 @@ public IntervalsSource intervals(BytesRef term) {
return Intervals.fixField(name(), Intervals.term(term));
}
String wildcardTerm = term.utf8ToString() + "?".repeat(Math.max(0, minChars - term.length));
return Intervals.or(Intervals.fixField(name(), Intervals.wildcard(new BytesRef(wildcardTerm))), Intervals.term(term));
return Intervals.or(
Intervals.fixField(name(), Intervals.wildcard(new BytesRef(wildcardTerm), IndexSearcher.getMaxClauseCount())),
Intervals.term(term)
);
}

@Override
Expand Down Expand Up @@ -822,7 +826,7 @@ public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext con
if (prefixFieldType != null) {
return prefixFieldType.intervals(term);
}
return Intervals.prefix(term);
return Intervals.prefix(term, IndexSearcher.getMaxClauseCount());
}

@Override
Expand All @@ -836,24 +840,30 @@ public IntervalsSource fuzzyIntervals(
if (getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
FuzzyQuery fq = new FuzzyQuery(new Term(name(), term), maxDistance, prefixLength, 128, transpositions);
return Intervals.multiterm(fq.getAutomata(), term);
FuzzyQuery fq = new FuzzyQuery(
new Term(name(), term),
maxDistance,
prefixLength,
IndexSearcher.getMaxClauseCount(),
transpositions
);
return Intervals.multiterm(fq.getAutomata(), IndexSearcher.getMaxClauseCount(), term);
}

@Override
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
if (getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
return Intervals.wildcard(pattern);
return Intervals.wildcard(pattern, IndexSearcher.getMaxClauseCount());
}

@Override
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
if (getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
return Intervals.regexp(pattern);
return Intervals.regexp(pattern, IndexSearcher.getMaxClauseCount());
}

@Override
Expand All @@ -867,7 +877,7 @@ public IntervalsSource rangeIntervals(
if (getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
return Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper);
return Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper, IndexSearcher.getMaxClauseCount());
}

private void checkForPositions() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
Expand Down Expand Up @@ -231,20 +232,26 @@ public void testTermIntervals() throws IOException {
public void testPrefixIntervals() throws IOException {
MappedFieldType ft = createFieldType();
IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertEquals(Intervals.prefix(new BytesRef("foo")), prefixIntervals);
assertEquals(Intervals.prefix(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), prefixIntervals);
}

public void testWildcardIntervals() throws IOException {
MappedFieldType ft = createFieldType();
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertEquals(Intervals.wildcard(new BytesRef("foo")), wildcardIntervals);
assertEquals(Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), wildcardIntervals);
}

public void testRegexpIntervals() {
MappedFieldType ft = createFieldType();
IntervalsSource regexpIntervals = ft.regexpIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertEquals(Intervals.regexp(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), regexpIntervals);
}

public void testFuzzyIntervals() throws IOException {
MappedFieldType ft = createFieldType();
IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT);
FuzzyQuery fq = new FuzzyQuery(new Term("field", "foo"), 1, 2, 128, true);
IntervalsSource expectedIntervals = Intervals.multiterm(fq.getAutomata(), "foo");
IntervalsSource expectedIntervals = Intervals.multiterm(fq.getAutomata(), IndexSearcher.getMaxClauseCount(), "foo");
assertEquals(expectedIntervals, fuzzyIntervals);
}

Expand All @@ -259,6 +266,15 @@ public void testWildcardIntervalsWithIndexedPrefixes() {
ConstantScoreTextFieldType ft = createFieldType();
ft.setIndexPrefixes(1, 4);
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertEquals(Intervals.wildcard(new BytesRef("foo")), wildcardIntervals);
assertEquals(Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), wildcardIntervals);
}

public void testRangeIntervals() {
MappedFieldType ft = createFieldType();
IntervalsSource rangeIntervals = ft.rangeIntervals(new BytesRef("foo"), new BytesRef("foo1"), true, true, MOCK_CONTEXT);
assertEquals(
Intervals.range(new BytesRef("foo"), new BytesRef("foo1"), true, true, IndexSearcher.getMaxClauseCount()),
rangeIntervals
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
Expand Down Expand Up @@ -243,20 +244,26 @@ public void testTermIntervals() throws IOException {
public void testPrefixIntervals() throws IOException {
MappedFieldType ft = createFieldType();
IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertEquals(Intervals.prefix(new BytesRef("foo")), prefixIntervals);
assertEquals(Intervals.prefix(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), prefixIntervals);
}

public void testWildcardIntervals() throws IOException {
public void testWildcardIntervals() {
MappedFieldType ft = createFieldType();
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertEquals(Intervals.wildcard(new BytesRef("foo")), wildcardIntervals);
assertEquals(Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), wildcardIntervals);
}

public void testFuzzyIntervals() throws IOException {
public void testRegexpIntervals() {
MappedFieldType ft = createFieldType();
IntervalsSource regexpIntervals = ft.regexpIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertEquals(Intervals.regexp(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), regexpIntervals);
}

public void testFuzzyIntervals() {
MappedFieldType ft = createFieldType();
IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT);
FuzzyQuery fq = new FuzzyQuery(new Term("field", "foo"), 1, 2, 128, true);
IntervalsSource expectedIntervals = Intervals.multiterm(fq.getAutomata(), "foo");
IntervalsSource expectedIntervals = Intervals.multiterm(fq.getAutomata(), IndexSearcher.getMaxClauseCount(), "foo");
assertEquals(expectedIntervals, fuzzyIntervals);
}

Expand All @@ -271,6 +278,15 @@ public void testWildcardIntervalsWithIndexedPrefixes() {
TextFieldType ft = createFieldType();
ft.setIndexPrefixes(1, 4);
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertEquals(Intervals.wildcard(new BytesRef("foo")), wildcardIntervals);
assertEquals(Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), wildcardIntervals);
}

public void testRangeIntervals() {
MappedFieldType ft = createFieldType();
IntervalsSource rangeIntervals = ft.rangeIntervals(new BytesRef("foo"), new BytesRef("foo1"), true, true, MOCK_CONTEXT);
assertEquals(
Intervals.range(new BytesRef("foo"), new BytesRef("foo1"), true, true, IndexSearcher.getMaxClauseCount()),
rangeIntervals
);
}
}
Loading

0 comments on commit 7150729

Please sign in to comment.