Skip to content

Commit

Permalink
Backport/backport 15194 to 2.x
Browse files Browse the repository at this point in the history
Signed-off-by: Finn Carroll <carrofin@amazon.com>
  • Loading branch information
finnegancarroll committed Aug 20, 2024
1 parent d1adaf3 commit b21d00f
Show file tree
Hide file tree
Showing 5 changed files with 293 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix array_index_out_of_bounds_exception when indexing documents with field name containing only dot ([#15126](https://github.com/opensearch-project/OpenSearch/pull/15126))
- Fixed array field name omission in flat_object function for nested JSON ([#13620](https://github.com/opensearch-project/OpenSearch/pull/13620))
- Fix incorrect parameter names in MinHash token filter configuration handling ([#15233](https://github.com/opensearch-project/OpenSearch/pull/15233))
- Fix range aggregation optimization ignoring top level queries ([#15287](https://github.com/opensearch-project/OpenSearch/pull/15287))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,90 @@ setup:
- match: { aggregations.histo.buckets.8.doc_count: 1 }
- match: { aggregations.histo.buckets.12.key_as_string: "2016-06-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.12.doc_count: 1 }

---
"Date histogram aggregation w/ filter query test":
- do:
bulk:
refresh: true
index: dhisto-agg-w-query
body:
- '{"index": {}}'
- '{"routing": "route1", "date": "2024-08-12", "dow": "monday"}'
- '{"index": {}}'
- '{"routing": "route1", "date": "2024-08-14", "dow": "wednesday"}'
- '{"index": {}}'
- '{"routing": "route1", "date": "2024-08-19", "dow": "monday"}'
- '{"index": {}}'
- '{"routing": "route2", "date": "2024-08-13", "dow": "tuesday"}'
- '{"index": {}}'
- '{"routing": "route2", "date": "2024-08-15", "dow": "thursday"}'

- do:
search:
index: dhisto-agg-w-query
body:
query:
bool:
must:
match_all: {}
filter:
- terms:
routing:
- "route1"
aggregations:
weekHisto:
date_histogram:
field: date
calendar_interval: week
_source: false

- match: { hits.total.value: 3 }
- match: { aggregations.weekHisto.buckets.0.doc_count: 2 }
- match: { aggregations.weekHisto.buckets.1.doc_count: 1 }

---
"Date histogram aggregation w/ shared field range test":
- do:
bulk:
refresh: true
index: dhisto-agg-w-query
body:
- '{"index": {}}'
- '{"date": "2024-10-31"}'
- '{"index": {}}'
- '{"date": "2024-11-11"}'
- '{"index": {}}'
- '{"date": "2024-11-28"}'
- '{"index": {}}'
- '{"date": "2024-12-25"}'
- '{"index": {}}'
- '{"date": "2025-01-01"}'
- '{"index": {}}'
- '{"date": "2025-02-14"}'

- do:
search:
index: dhisto-agg-w-query
body:
profile: true
query:
range:
date:
gte: "2024-01-01"
lt: "2025-01-01"
aggregations:
monthHisto:
date_histogram:
field: date
calendar_interval: month
_source: false

- match: { hits.total.value: 4 }
- match: { aggregations.monthHisto.buckets.0.doc_count: 1 }
- match: { aggregations.monthHisto.buckets.1.doc_count: 2 }
- match: { aggregations.monthHisto.buckets.2.doc_count: 1 }
- match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 }
- match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 }
- match: { profile.shards.0.aggregations.0.debug.leaf_visited: 0 }
- match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 }
Original file line number Diff line number Diff line change
Expand Up @@ -673,3 +673,78 @@ setup:
- match: { aggregations.my_range.buckets.3.from: 1.5 }
- is_false: aggregations.my_range.buckets.3.to
- match: { aggregations.my_range.buckets.3.doc_count: 2 }

---
"Filter query w/ aggregation test":
- do:
bulk:
refresh: true
index: range-agg-w-query
body:
- '{"index": {}}'
- '{"routing": "route1", "v": -10, "date": "2024-10-29"}'
- '{"index": {}}'
- '{"routing": "route1", "v": -5, "date": "2024-10-30"}'
- '{"index": {}}'
- '{"routing": "route1", "v": 10, "date": "2024-10-31"}'
- '{"index": {}}'
- '{"routing": "route2", "v": 15, "date": "2024-11-01"}'
- '{"index": {}}'
- '{"routing": "route2", "v": 20, "date": "2024-11-02"}'

- do:
search:
index: range-agg-w-query
body:
query:
bool:
must:
match_all: {}
filter:
- terms:
routing:
- "route1"
aggregations:
NegPosAgg:
range:
field: v
keyed: true
ranges:
- to: 0
key: "0"
- from: 0
key: "1"
_source: false

- match: { hits.total.value: 3 }
- match: { aggregations.NegPosAgg.buckets.0.doc_count: 2 }
- match: { aggregations.NegPosAgg.buckets.1.doc_count: 1 }

- do:
search:
index: range-agg-w-query
body:
query:
bool:
must:
match_all: {}
filter:
- terms:
routing:
- "route1"
aggregations:
HalloweenAgg:
date_range:
field: date
format: "yyyy-MM-dd"
keyed: true
ranges:
- to: "2024-11-01"
key: "to-october"
- from: "2024-11-01"
key: "from-september"
_source: false

- match: { hits.total.value: 3 }
- match: { aggregations.HalloweenAgg.buckets.to-october.doc_count: 3 }
- match: { aggregations.HalloweenAgg.buckets.from-september.doc_count: 0 }
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
import java.util.function.Function;

import static org.opensearch.core.xcontent.ConstructingObjectParser.optionalConstructorArg;
import static org.opensearch.search.aggregations.bucket.filterrewrite.DateHistogramAggregatorBridge.segmentMatchAll;

/**
* Aggregate all docs that match given ranges.
Expand Down Expand Up @@ -310,8 +311,9 @@ public ScoreMode scoreMode() {

@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException {
boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, false);
if (optimized) throw new CollectionTerminatedException();
if (segmentMatchAll(context, ctx) && filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, false)) {
throw new CollectionTerminatedException();
}

final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx);
return new LeafBucketCollectorBase(sub, values) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,23 @@

package org.opensearch.search.aggregations.bucket.range;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.KeywordField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.TestUtil;
Expand All @@ -54,6 +61,7 @@
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.NumberFieldMapper.NumberFieldType;
import org.opensearch.index.mapper.NumberFieldMapper.NumberType;
import org.opensearch.index.mapper.ParseContext.Document;
import org.opensearch.search.aggregations.AggregationBuilder;
import org.opensearch.search.aggregations.AggregatorTestCase;
import org.opensearch.search.aggregations.CardinalityUpperBound;
Expand All @@ -65,6 +73,7 @@
import java.io.IOException;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedList;
Expand Down Expand Up @@ -457,6 +466,29 @@ public void testFloatType() throws IOException {
);
}

public void testTopLevelRangeQuery() throws IOException {
NumberFieldType fieldType = new NumberFieldType(NumberType.INTEGER.typeName(), NumberType.INTEGER);
String fieldName = fieldType.numberType().typeName();
Query query = IntPoint.newRangeQuery(fieldName, 5, 20);

testRewriteOptimizationCase(
fieldType,
new double[][] { { 0.0, 10.0 }, { 10.0, 20.0 } },
query,
new Number[] { 0.1, 4.0, 9, 11, 12, 19 },
range -> {
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
assertEquals(2, ranges.size());
assertEquals("0.0-10.0", ranges.get(0).getKeyAsString());
assertEquals(1, ranges.get(0).getDocCount());
assertEquals("10.0-20.0", ranges.get(1).getKeyAsString());
assertEquals(3, ranges.get(1).getDocCount());
assertTrue(AggregationInspectionHelper.hasValue(range));
},
false
);
}

public void testUnsignedLongType() throws IOException {
testRewriteOptimizationCase(
new NumberFieldType(NumberType.UNSIGNED_LONG.typeName(), NumberType.UNSIGNED_LONG),
Expand Down Expand Up @@ -493,6 +525,76 @@ public void testUnsignedLongType() throws IOException {
);
}

/**
* Expect no optimization as top level query excludes documents.
*/
public void testTopLevelFilterTermQuery() throws IOException {
final String KEYWORD_FIELD_NAME = "route";
final NumberFieldType NUM_FIELD_TYPE = new NumberFieldType(NumberType.DOUBLE.typeName(), NumberType.DOUBLE);
final NumberType numType = NUM_FIELD_TYPE.numberType();

BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(0);
builder.add(new TermQuery(new Term(KEYWORD_FIELD_NAME, "route1")), BooleanClause.Occur.MUST);
Query boolQuery = builder.build();

List<Document> docList = new ArrayList<>();
for (int i = 0; i < 3; i++)
docList.add(new Document());

docList.get(0).addAll(numType.createFields(numType.typeName(), 3.0, true, true, false));
docList.get(1).addAll(numType.createFields(numType.typeName(), 11.0, true, true, false));
docList.get(2).addAll(numType.createFields(numType.typeName(), 15.0, true, true, false));
docList.get(0).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO));
docList.get(1).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO));
docList.get(2).add(new KeywordField(KEYWORD_FIELD_NAME, "route2", Field.Store.NO));

testRewriteOptimizationCase(NUM_FIELD_TYPE, new double[][] { { 0.0, 10.0 }, { 10.0, 20.0 } }, boolQuery, docList, range -> {
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
assertEquals(2, ranges.size());
assertEquals("0.0-10.0", ranges.get(0).getKeyAsString());
assertEquals(1, ranges.get(0).getDocCount());
assertEquals("10.0-20.0", ranges.get(1).getKeyAsString());
assertEquals(1, ranges.get(1).getDocCount());
assertTrue(AggregationInspectionHelper.hasValue(range));
}, false);
}

/**
* Expect optimization as top level query is effective match all.
*/
public void testTopLevelEffectiveMatchAll() throws IOException {
final String KEYWORD_FIELD_NAME = "route";
final NumberFieldType NUM_FIELD_TYPE = new NumberFieldType(NumberType.DOUBLE.typeName(), NumberType.DOUBLE);
final NumberType numType = NUM_FIELD_TYPE.numberType();

BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setMinimumNumberShouldMatch(0);
builder.add(new TermQuery(new Term(KEYWORD_FIELD_NAME, "route1")), BooleanClause.Occur.MUST);
Query boolQuery = builder.build();

List<Document> docList = new ArrayList<>();
for (int i = 0; i < 3; i++)
docList.add(new Document());

docList.get(0).addAll(numType.createFields(numType.typeName(), 3.0, true, true, false));
docList.get(1).addAll(numType.createFields(numType.typeName(), 11.0, true, true, false));
docList.get(2).addAll(numType.createFields(numType.typeName(), 15.0, true, true, false));
docList.get(0).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO));
docList.get(1).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO));
docList.get(2).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO));

testRewriteOptimizationCase(NUM_FIELD_TYPE, new double[][] { { 0.0, 10.0 }, { 10.0, 20.0 } }, boolQuery, docList, range -> {
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
assertEquals(2, ranges.size());
assertEquals("0.0-10.0", ranges.get(0).getKeyAsString());
assertEquals(1, ranges.get(0).getDocCount());
assertEquals("10.0-20.0", ranges.get(1).getKeyAsString());
assertEquals(2, ranges.get(1).getDocCount());
assertTrue(AggregationInspectionHelper.hasValue(range));
}, true);
}

private void testCase(
Query query,
CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
Expand Down Expand Up @@ -556,11 +658,33 @@ private void testRewriteOptimizationCase(
) throws IOException {
NumberType numberType = fieldType.numberType();
String fieldName = numberType.typeName();
List<Document> docs = new ArrayList<>();

for (Number dataPoint : dataPoints) {
Document doc = new Document();
List<Field> fieldList = numberType.createFields(fieldName, dataPoint, true, true, false);
for (Field fld : fieldList)
doc.add(fld);
docs.add(doc);
}

testRewriteOptimizationCase(fieldType, ranges, query, docs, verify, optimized);
}

private void testRewriteOptimizationCase(
NumberFieldType fieldType,
double[][] ranges,
Query query,
List<Document> documents,
Consumer<InternalRange<? extends InternalRange.Bucket, ? extends InternalRange>> verify,
boolean optimized
) throws IOException {
NumberType numberType = fieldType.numberType();
String fieldName = numberType.typeName();
try (Directory directory = newDirectory()) {
try (IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig().setCodec(TestUtil.getDefaultCodec()))) {
for (Number dataPoint : dataPoints) {
indexWriter.addDocument(numberType.createFields(fieldName, dataPoint, true, true, false));
for (Document doc : documents) {
indexWriter.addDocument(doc);
}
}

Expand Down

0 comments on commit b21d00f

Please sign in to comment.