diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/410_diversified_sampler.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/410_diversified_sampler.yml new file mode 100644 index 0000000000000..894ac52617655 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/410_diversified_sampler.yml @@ -0,0 +1,229 @@ +setup: + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + mappings: + properties: + tags: + type: text + author: + type: keyword + number: + type: integer + class: + type: integer + + - do: + bulk: + index: test + refresh: true + body: + - '{"index": {}}' + - '{"tags": "kibana", "author": "alice", "number": 1, "class": 1}' + - '{"index": {}}' + - '{"tags": "kibana", "author": "bob", "number": 2, "class": 1}' + - '{"index": {}}' + - '{"tags": "kibana", "author": "alice", "number": 3, "class": 2}' + - '{"index": {}}' + - '{"tags": "javascript", "author": "alice", "number": 4, "class": 1}' + +--- +small shard_size: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: author + shard_size: 1 + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + + - match: { hits.total.value: 4 } + - match: { aggregations.diversified.doc_count: 1 } + # The most relevant document has a value of 4 so we only aggregate that. + - match: { aggregations.diversified.min_number.value: 4.0 } + - match: { aggregations.diversified.max_number.value: 4.0 } + +--- +defaults: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: author + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + - match: { hits.total.value: 4 } + # The max_docs_per_value defaults to 1 so we just get one doc for each of the two authors + - match: { aggregations.diversified.doc_count: 2 } + # Bob's only document is 2 so we get that as the min. + - match: { aggregations.diversified.min_number.value: 2.0 } + # Alice's most relevant document is 2 so we get that as the max. + - match: { aggregations.diversified.max_number.value: 4.0 } + +--- +override max_docs_per_value: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: author + max_docs_per_value: 3 + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + - match: { hits.total.value: 4 } + # We've bumped the max_docs_per_value high enough to get all docs + - match: { aggregations.diversified.doc_count: 4 } + - match: { aggregations.diversified.min_number.value: 1.0 } + - match: { aggregations.diversified.max_number.value: 4.0 } + +--- +run on number: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: class + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + - match: { hits.total.value: 4 } + # The max_docs_per_value defaults to 1 so we just get one doc for each of the two classes + - match: { aggregations.diversified.doc_count: 2 } + - match: { aggregations.diversified.min_number.value: 3.0 } + - match: { aggregations.diversified.max_number.value: 4.0 } + +--- +force map mode: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: author + execution_hint: map + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + - match: { hits.total.value: 4 } + - match: { aggregations.diversified.doc_count: 2 } + - match: { aggregations.diversified.min_number.value: 2.0 } + - match: { aggregations.diversified.max_number.value: 4.0 } + +--- +force global ordinals mode: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: author + execution_hint: global_ordinals + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + - match: { hits.total.value: 4 } + - match: { aggregations.diversified.doc_count: 2 } + - match: { aggregations.diversified.min_number.value: 2.0 } + - match: { aggregations.diversified.max_number.value: 4.0 } + +--- +enable hash mode mode: + - do: + search: + body: + size: 0 + query: + query_string: + query: 'tags:kibana OR tags:javascript' + aggs: + diversified: + diversified_sampler: + field: author + execution_hint: bytes_hash + aggs: + min_number: + min: + field: number + max_number: + max: + field: number + + # This mode can have hash collisions. The hash is seeded with tests.seed + # so we have to have weaker constraints on these hits + - match: { hits.total.value: 4 } + - lte: { aggregations.diversified.doc_count: 2 } + - gte: { aggregations.diversified.doc_count: 1 } + - gte: { aggregations.diversified.min_number.value: 2.0 } + - lte: { aggregations.diversified.min_number.value: 4.0 } + - match: { aggregations.diversified.max_number.value: 4.0 }