Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration tests for diversified sampler #77810

Merged
merged 3 commits into from
Sep 22, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
setup:
- do:
indices.create:
index: test
body:
settings:
number_of_shards: 1
mappings:
properties:
tags:
type: text
author:
type: keyword
number:
type: integer
class:
type: integer

- do:
bulk:
index: test
refresh: true
body:
- '{"index": {}}'
- '{"tags": "kibana", "author": "alice", "number": 1, "class": 1}'
- '{"index": {}}'
- '{"tags": "kibana", "author": "bob", "number": 2, "class": 1}'
- '{"index": {}}'
- '{"tags": "kibana", "author": "alice", "number": 3, "class": 2}'
- '{"index": {}}'
- '{"tags": "javascript", "author": "alice", "number": 4, "class": 1}'

---
small shard_size:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: author
shard_size: 1
aggs:
min_number:
min:
field: number
max_number:
max:
field: number


- match: { hits.total.value: 4 }
- match: { aggregations.diversified.doc_count: 1 }
# The most relevant document has a value of 4 so we only aggregate that.
- match: { aggregations.diversified.min_number.value: 4.0 }
- match: { aggregations.diversified.max_number.value: 4.0 }

---
defaults:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: author
aggs:
min_number:
min:
field: number
max_number:
max:
field: number

- match: { hits.total.value: 4 }
# The max_docs_per_value defaults to 1 so we just get one doc for each of the two authors
- match: { aggregations.diversified.doc_count: 2 }
# Bob's only document is 2 so we get that as the min.
- match: { aggregations.diversified.min_number.value: 2.0 }
# Alice's most relevant document is 2 so we get that as the max.
- match: { aggregations.diversified.max_number.value: 4.0 }

---
override max_docs_per_value:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: author
max_docs_per_value: 3
aggs:
min_number:
min:
field: number
max_number:
max:
field: number

- match: { hits.total.value: 4 }
# We've bumped the max_docs_per_value high enough to get all docs
- match: { aggregations.diversified.doc_count: 4 }
- match: { aggregations.diversified.min_number.value: 1.0 }
- match: { aggregations.diversified.max_number.value: 4.0 }

---
run on number:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: class
aggs:
min_number:
min:
field: number
max_number:
max:
field: number

- match: { hits.total.value: 4 }
# The max_docs_per_value defaults to 1 so we just get one doc for each of the two classes
- match: { aggregations.diversified.doc_count: 2 }
- match: { aggregations.diversified.min_number.value: 3.0 }
- match: { aggregations.diversified.max_number.value: 4.0 }

---
force map mode:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: author
execution_hint: map
aggs:
min_number:
min:
field: number
max_number:
max:
field: number

- match: { hits.total.value: 4 }
- match: { aggregations.diversified.doc_count: 2 }
- match: { aggregations.diversified.min_number.value: 2.0 }
- match: { aggregations.diversified.max_number.value: 4.0 }

---
force global ordinals mode:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: author
execution_hint: global_ordinals
aggs:
min_number:
min:
field: number
max_number:
max:
field: number

- match: { hits.total.value: 4 }
- match: { aggregations.diversified.doc_count: 2 }
- match: { aggregations.diversified.min_number.value: 2.0 }
- match: { aggregations.diversified.max_number.value: 4.0 }

---
enable hash mode mode:
- do:
search:
body:
size: 0
query:
query_string:
query: 'tags:kibana OR tags:javascript'
aggs:
diversified:
diversified_sampler:
field: author
execution_hint: bytes_hash
aggs:
min_number:
min:
field: number
max_number:
max:
field: number

# This mode can have hash collisions. The hash is seeded with tests.seed
# so we have to have weaker constraints on these hits
- match: { hits.total.value: 4 }
- lte: { aggregations.diversified.doc_count: 2 }
- gte: { aggregations.diversified.doc_count: 1 }
- gte: { aggregations.diversified.min_number.value: 2.0 }
- lte: { aggregations.diversified.min_number.value: 4.0 }
- match: { aggregations.diversified.max_number.value: 4.0 }