Skip to content

Commit

Permalink
Clean up how pipeline aggs check for multi-bucket (elastic#54161)
Browse files Browse the repository at this point in the history
Pipeline aggregations like `stats_bucket`, `sum_bucket`, and
`percentiles_bucket` only operate on buckets that have multiple buckets.
This adds support for those aggregations to `geo_distance`, `ip_range`,
`auto_date_histogram`, and `rare_terms`.

This all happened because we used a marker interface to mark compatible
aggs, `MultiBucketAggregationBuilder` and it was fairly easy to forget
to implement the interface.

This replaces the marker interface with an abstract method in
`AggregationBuilder`, `bucketCardinality` which makes you return `NONE`,
`ONE`, or `MANY`. The `bucket` aggregations can check for `MANY`. At
this point `ONE` and `NONE` amount to about the same thing, but I
suspect that'll be a useful distinction when validating bucket sorts.

Closes elastic#53215
  • Loading branch information
nik9000 authored Mar 28, 2020
1 parent 1bb36d7 commit a0f7c4a
Show file tree
Hide file tree
Showing 45 changed files with 518 additions and 103 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ protected void innerWriteTo(StreamOutput out) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public BucketCardinality bucketCardinality() {
return BucketCardinality.NONE;
}

@Override
protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, ValuesSourceConfig config,
AggregatorFactory parent, Builder subFactoriesBuilder) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ protected void doWriteTo(StreamOutput out) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public BucketCardinality bucketCardinality() {
return BucketCardinality.NONE;
}

@Override
protected AggregatorFactory doBuild(QueryShardContext queryShardContext, AggregatorFactory parent, Builder subfactoriesBuilder)
throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import java.util.Map;

public class MatrixStatsAggregationBuilder
extends ArrayValuesSourceAggregationBuilder.LeafOnly<MatrixStatsAggregationBuilder> {
extends ArrayValuesSourceAggregationBuilder.LeafOnly<MatrixStatsAggregationBuilder> {
public static final String NAME = "matrix_stats";

private MultiValueMode multiValueMode = MultiValueMode.AVG;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ public AB subAggregations(Builder subFactories) {
throw new AggregationInitializationException("Aggregator [" + name + "] of type [" +
getType() + "] cannot accept sub-aggregations");
}

@Override
public final BucketCardinality bucketCardinality() {
return BucketCardinality.NONE;
}
}

private List<String> fields = Collections.emptyList();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ protected void innerWriteTo(StreamOutput out) throws IOException {
}

@Override
public BucketCardinality bucketCardinality() {
return BucketCardinality.ONE;
}

protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext,
ValuesSourceConfig config,
AggregatorFactory parent,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ protected void innerWriteTo(StreamOutput out) throws IOException {
out.writeString(childType);
}

@Override
public BucketCardinality bucketCardinality() {
return BucketCardinality.ONE;
}

@Override
protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext,
ValuesSourceConfig config,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,64 @@ setup:
- is_false: aggregations.str_terms.buckets.0.key_as_string
- match: { aggregations.str_terms.buckets.0.doc_count: 1 }
- match: { aggregations.str_terms.buckets.0.max_n.value: 3.0 }

---
"avg_bucket":
- skip:
version: " - 7.99.99"
reason: Fixed in 8.0.0 to be backported to 7.8.0
- do:
indices.create:
index: test
body:
settings:
number_of_replicas: 0
mappings:
properties:
str:
type: keyword
- do:
bulk:
index: test
refresh: true
body:
- '{"index": {}}'
- '{"str": "foo", "v": 1}'
- '{"index": {}}'
- '{"str": "foo", "v": 2}'
- '{"index": {}}'
- '{"str": "foo", "v": 3}'
- '{"index": {}}'
- '{"str": "bar", "v": 4}'
- '{"index": {}}'
- '{"str": "bar", "v": 5}'
- '{"index": {}}'
- '{"str": "baz", "v": 6}'

- do:
search:
index: test
body:
size: 0
aggs:
str_terms:
rare_terms:
field: str
max_doc_count: 2
aggs:
v:
sum:
field: v
str_terms_avg_v:
avg_bucket:
buckets_path: str_terms.v

- match: { hits.total.value: 6 }
- length: { aggregations.str_terms.buckets: 2 }
- match: { aggregations.str_terms.buckets.0.key: baz }
- match: { aggregations.str_terms.buckets.0.doc_count: 1 }
- match: { aggregations.str_terms.buckets.0.v.value: 6 }
- match: { aggregations.str_terms.buckets.1.key: bar }
- match: { aggregations.str_terms.buckets.1.doc_count: 2 }
- match: { aggregations.str_terms.buckets.1.v.value: 9 }
- match: { aggregations.str_terms_avg_v.value: 7.5 }
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
setup:
- do:
indices.create:
index: test
body:
settings:
number_of_replicas: 0
mappings:
properties:
date:
type: date
- do:
bulk:
refresh: true
index: test
body:
- '{"index": {}}'
- '{"date": "2020-03-01", "v": 1}'
- '{"index": {}}'
- '{"date": "2020-03-02", "v": 2}'
- '{"index": {}}'
- '{"date": "2020-03-08", "v": 3}'
- '{"index": {}}'
- '{"date": "2020-03-09", "v": 4}'

---
"basic":
- do:
search:
body:
size: 0
aggs:
histo:
auto_date_histogram:
field: date
buckets: 2
- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 2 }
- match: { aggregations.histo.buckets.0.key_as_string: "2020-03-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 2 }
- match: { aggregations.histo.buckets.1.key_as_string: "2020-03-08T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 2 }

---
"avg_bucket":
- skip:
version: " - 7.99.99"
reason: Fixed in 8.0.0 to be backported to 7.8.0
- do:
search:
body:
size: 0
aggs:
histo:
auto_date_histogram:
field: date
buckets: 2
aggs:
v:
sum:
field: v
histo_avg_v:
avg_bucket:
buckets_path: histo.v
- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 2 }
- match: { aggregations.histo.buckets.0.key_as_string: "2020-03-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 2 }
- match: { aggregations.histo.buckets.0.v.value: 3 }
- match: { aggregations.histo.buckets.1.key_as_string: "2020-03-08T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 2 }
- match: { aggregations.histo.buckets.1.v.value: 7 }
- match: { aggregations.histo_avg_v.value: 5 }
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
setup:
- do:
indices.create:
index: test
body:
mappings:
properties:
location:
type: geo_point
- do:
bulk:
index: test
refresh: true
body:
- '{"index": {}}'
- '{"location": {"lat" : 40.7128, "lon" : -74.0060}, "name": "New York", "population": 8623000}'
- '{"index": {}}'
- '{"location": {"lat" : 34.0522, "lon" : -118.2437}, "name": "Los Angeles", "population": 4000000}'
- '{"index": {}}'
- '{"location": {"lat" : 41.8781, "lon" : -87.6298}, "name": "Chicago", "population": 2716000}'

---
"basic":
- do:
search:
body:
size: 0
aggs:
distance:
geo_distance:
field: location
origin: "35.7796, -78.6382"
ranges:
- to: 1000000
- from: 1000000
to: 5000000
- from: 5000000
- match: { hits.total.value: 3 }
- length: { aggregations.distance.buckets: 3 }
- match: { aggregations.distance.buckets.0.key: "*-1000000.0" }
- match: { aggregations.distance.buckets.0.doc_count: 1 }
- match: { aggregations.distance.buckets.1.key: "1000000.0-5000000.0" }
- match: { aggregations.distance.buckets.1.doc_count: 2 }
- match: { aggregations.distance.buckets.2.key: "5000000.0-*" }
- match: { aggregations.distance.buckets.2.doc_count: 0 }

---
"avg_bucket":
- skip:
version: " - 7.99.99"
reason: Fixed in 8.0.0 to be backported to 7.8.0
- do:
search:
body:
size: 0
aggs:
distance:
geo_distance:
field: location
origin: "35.7796, -78.6382"
ranges:
- to: 1000000
- from: 1000000
to: 5000000
- from: 5000000
aggs:
total_population:
sum:
field: population
avg_total_population:
avg_bucket:
buckets_path: distance.total_population
- match: { hits.total.value: 3 }
- length: { aggregations.distance.buckets: 3 }
- match: { aggregations.distance.buckets.0.key: "*-1000000.0" }
- match: { aggregations.distance.buckets.0.doc_count: 1 }
- match: { aggregations.distance.buckets.0.total_population.value: 8623000.0 }
- match: { aggregations.distance.buckets.1.key: "1000000.0-5000000.0" }
- match: { aggregations.distance.buckets.1.doc_count: 2 }
- match: { aggregations.distance.buckets.1.total_population.value: 6716000.0 }
- match: { aggregations.distance.buckets.2.key: "5000000.0-*" }
- match: { aggregations.distance.buckets.2.doc_count: 0 }
- match: { aggregations.distance.buckets.2.total_population.value: 0 }
- match: { aggregations.avg_total_population.value: 7669500.0 }
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,58 @@ setup:
- match: { aggregations.ip_range.buckets.1.key: "192.168.0.0-192.169.0.0" }
- match: { aggregations.ip_range.buckets.2.key: "192.169.0.0-*" }

---
"IP Range avg_bucket":
- skip:
version: " - 7.99.99"
reason: Fixed in 8.0.0 to be backported to 7.8.0
- do:
bulk:
refresh: true
index: test
body:
- '{"index": {}}'
- '{"ip": "::1", "v": 1}'
- '{"index": {}}'
- '{"ip": "192.168.0.1", "v": 2}'
- '{"index": {}}'
- '{"ip": "192.168.0.7", "v": 3}'

- do:
search:
index: test
body:
size: 0
aggs:
range:
ip_range:
field: ip
ranges:
- to: 192.168.0.0
- from: 192.168.0.0
to: 192.169.0.0
- from: 192.169.0.0
aggs:
v:
sum:
field: v
range_avg_v:
avg_bucket:
buckets_path: range.v

- match: { hits.total.value: 3 }
- length: { aggregations.range.buckets: 3 }
- match: { aggregations.range.buckets.0.key: "*-192.168.0.0" }
- match: { aggregations.range.buckets.0.doc_count: 1 }
- match: { aggregations.range.buckets.0.v.value: 1 }
- match: { aggregations.range.buckets.1.key: "192.168.0.0-192.169.0.0" }
- match: { aggregations.range.buckets.1.doc_count: 2 }
- match: { aggregations.range.buckets.1.v.value: 5 }
- match: { aggregations.range.buckets.2.key: "192.169.0.0-*" }
- match: { aggregations.range.buckets.2.doc_count: 0 }
- match: { aggregations.range.buckets.2.v.value: 0 }
- match: { aggregations.range_avg_v.value: 3 }

---
"Date range":
- do:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,19 @@ public PipelineTree buildPipelineTree() {
return factoriesBuilder.buildPipelineTree();
}

/**
* Rough measure of how many buckets this aggregation can return. Just
* "zero", "one", and "many".
*/
public enum BucketCardinality {
NONE, ONE, MANY;
}
/**
* Do aggregations built by this builder contain buckets? If so, do they
* contain *always* contain a single bucket?
*/
public abstract BucketCardinality bucketCardinality();

/** Common xcontent fields shared among aggregator builders */
public static final class CommonFields extends ParseField.CommonFields {
public static final ParseField VALUE_TYPE = new ParseField("value_type");
Expand Down
Loading

0 comments on commit a0f7c4a

Please sign in to comment.