Skip to content

Commit

Permalink
Integration tests for diversified sampler (#77810)
Browse files Browse the repository at this point in the history
Adds and integration test for the `diversified_sampler` aggregator.
  • Loading branch information
nik9000 authored Sep 22, 2021
1 parent 407d6ce commit 3e5dbb0
Showing 1 changed file with 229 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
setup:
- do:
indices.create:
index: test
body:
settings:
number_of_shards: 1
mappings:
properties:
tags:
type: text
author:
type: keyword
number:
type: integer
class:
type: integer

- do:
bulk:
index: test
refresh: true
body:
- '{"index": {}}'
- '{"tags": "kibana", "author": "alice", "number": 1, "class": 1}'
- '{"index": {}}'
- '{"tags": "kibana", "author": "bob", "number": 2, "class": 1}'
- '{"index": {}}'
- '{"tags": "kibana", "author": "alice", "number": 3, "class": 2}'
- '{"index": {}}'
- '{"tags": "javascript", "author": "alice", "number": 4, "class": 1}'

---
small shard_size:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: author
shard_size: 1
aggs:
min_number:
min:
field: number
max_number:
max:
field: number


- match: { hits.total.value: 4 }
- match: { aggregations.diversified.doc_count: 1 }
# The most relevant document has a value of 4 so we only aggregate that.
- match: { aggregations.diversified.min_number.value: 4.0 }
- match: { aggregations.diversified.max_number.value: 4.0 }

---
defaults:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: author
aggs:
min_number:
min:
field: number
max_number:
max:
field: number

- match: { hits.total.value: 4 }
# The max_docs_per_value defaults to 1 so we just get one doc for each of the two authors
- match: { aggregations.diversified.doc_count: 2 }
# Bob's only document is 2 so we get that as the min.
- match: { aggregations.diversified.min_number.value: 2.0 }
# Alice's most relevant document is 2 so we get that as the max.
- match: { aggregations.diversified.max_number.value: 4.0 }

---
override max_docs_per_value:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: author
max_docs_per_value: 3
aggs:
min_number:
min:
field: number
max_number:
max:
field: number

- match: { hits.total.value: 4 }
# We've bumped the max_docs_per_value high enough to get all docs
- match: { aggregations.diversified.doc_count: 4 }
- match: { aggregations.diversified.min_number.value: 1.0 }
- match: { aggregations.diversified.max_number.value: 4.0 }

---
run on number:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: class
aggs:
min_number:
min:
field: number
max_number:
max:
field: number

- match: { hits.total.value: 4 }
# The max_docs_per_value defaults to 1 so we just get one doc for each of the two classes
- match: { aggregations.diversified.doc_count: 2 }
- match: { aggregations.diversified.min_number.value: 3.0 }
- match: { aggregations.diversified.max_number.value: 4.0 }

---
force map mode:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: author
execution_hint: map
aggs:
min_number:
min:
field: number
max_number:
max:
field: number

- match: { hits.total.value: 4 }
- match: { aggregations.diversified.doc_count: 2 }
- match: { aggregations.diversified.min_number.value: 2.0 }
- match: { aggregations.diversified.max_number.value: 4.0 }

---
force global ordinals mode:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: author
execution_hint: global_ordinals
aggs:
min_number:
min:
field: number
max_number:
max:
field: number

- match: { hits.total.value: 4 }
- match: { aggregations.diversified.doc_count: 2 }
- match: { aggregations.diversified.min_number.value: 2.0 }
- match: { aggregations.diversified.max_number.value: 4.0 }

---
enable hash mode mode:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: author
execution_hint: bytes_hash
aggs:
min_number:
min:
field: number
max_number:
max:
field: number

# This mode can have hash collisions. The hash is seeded with tests.seed
# so we have to have weaker constraints on these hits
- match: { hits.total.value: 4 }
- lte: { aggregations.diversified.doc_count: 2 }
- gte: { aggregations.diversified.doc_count: 1 }
- gte: { aggregations.diversified.min_number.value: 2.0 }
- lte: { aggregations.diversified.min_number.value: 4.0 }
- match: { aggregations.diversified.max_number.value: 4.0 }

0 comments on commit 3e5dbb0

Please sign in to comment.