From e32408d9c5594fe49a2e95c62918c0336b27dc43 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Thu, 24 Oct 2024 15:38:40 +0100 Subject: [PATCH 01/22] Report JVM stats for all memory pools (97046) (#115117) (#115550) This fix allows reporting of all JVM memory pools sizes in JVM stats --- docs/changelog/115117.yaml | 6 ++++++ .../elasticsearch/monitor/jvm/GcNames.java | 15 +++++++++++++- .../elasticsearch/monitor/jvm/JvmStats.java | 5 +---- .../monitor/jvm/JvmStatsTests.java | 20 +++++++++++++++++-- 4 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 docs/changelog/115117.yaml diff --git a/docs/changelog/115117.yaml b/docs/changelog/115117.yaml new file mode 100644 index 0000000000000..de2defcd46afd --- /dev/null +++ b/docs/changelog/115117.yaml @@ -0,0 +1,6 @@ +pr: 115117 +summary: Report JVM stats for all memory pools (97046) +area: Infra/Core +type: bug +issues: + - 97046 diff --git a/server/src/main/java/org/elasticsearch/monitor/jvm/GcNames.java b/server/src/main/java/org/elasticsearch/monitor/jvm/GcNames.java index 9db8e8f414d5c..3494204c330c0 100644 --- a/server/src/main/java/org/elasticsearch/monitor/jvm/GcNames.java +++ b/server/src/main/java/org/elasticsearch/monitor/jvm/GcNames.java @@ -15,8 +15,14 @@ public class GcNames { public static final String OLD = "old"; public static final String SURVIVOR = "survivor"; + private GcNames() {} + /** - * Resolves the GC type by its memory pool name ({@link java.lang.management.MemoryPoolMXBean#getName()}. + * Resolves the memory area name by the memory pool name provided by {@link java.lang.management.MemoryPoolMXBean#getName()} + * + * @param poolName the name of the memory pool from {@link java.lang.management.MemoryPoolMXBean} + * @param defaultName the name to return if the pool name does not match any known memory area + * @return memory area name corresponding to the pool name or {@code defaultName} if no match is found */ public static String getByMemoryPoolName(String poolName, String defaultName) { if ("Eden Space".equals(poolName) @@ -40,6 +46,13 @@ public static String getByMemoryPoolName(String poolName, String defaultName) { return defaultName; } + /** + * Resolves the GC type by the GC name provided by {@link java.lang.management.GarbageCollectorMXBean#getName()} + * + * @param gcName the name of the GC from {@link java.lang.management.GarbageCollectorMXBean} + * @param defaultName the name to return if the GC name does not match any known GC type + * @return GC type corresponding to the GC name or {@code defaultName} if no match is found + */ public static String getByGcName(String gcName, String defaultName) { if ("Copy".equals(gcName) || "PS Scavenge".equals(gcName) || "ParNew".equals(gcName) || "G1 Young Generation".equals(gcName)) { return YOUNG; diff --git a/server/src/main/java/org/elasticsearch/monitor/jvm/JvmStats.java b/server/src/main/java/org/elasticsearch/monitor/jvm/JvmStats.java index 0a2763474b8df..e6b109207fdf3 100644 --- a/server/src/main/java/org/elasticsearch/monitor/jvm/JvmStats.java +++ b/server/src/main/java/org/elasticsearch/monitor/jvm/JvmStats.java @@ -64,10 +64,7 @@ public static JvmStats jvmStats() { List pools = new ArrayList<>(); for (MemoryPoolMXBean memoryPoolMXBean : memoryPoolMXBeans) { try { - String name = GcNames.getByMemoryPoolName(memoryPoolMXBean.getName(), null); - if (name == null) { // if we can't resolve it, its not interesting.... (Per Gen, Code Cache) - continue; - } + String name = GcNames.getByMemoryPoolName(memoryPoolMXBean.getName(), memoryPoolMXBean.getName()); MemoryUsage usage = memoryPoolMXBean.getUsage(); MemoryUsage peakUsage = memoryPoolMXBean.getPeakUsage(); pools.add( diff --git a/server/src/test/java/org/elasticsearch/monitor/jvm/JvmStatsTests.java b/server/src/test/java/org/elasticsearch/monitor/jvm/JvmStatsTests.java index 12fa776dd7efd..28976d803ff53 100644 --- a/server/src/test/java/org/elasticsearch/monitor/jvm/JvmStatsTests.java +++ b/server/src/test/java/org/elasticsearch/monitor/jvm/JvmStatsTests.java @@ -13,17 +13,22 @@ import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.test.ESTestCase; -import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.hasKey; +import static org.hamcrest.Matchers.hasSize; public class JvmStatsTests extends ESTestCase { - public void testJvmStats() throws IOException { + public void testJvmStats() { JvmStats stats = JvmStats.jvmStats(); assertNotNull(stats); assertNotNull(stats.getUptime()); @@ -40,6 +45,17 @@ public void testJvmStats() throws IOException { assertNotNull(mem.getHeapUsedPercent()); assertThat(mem.getHeapUsedPercent(), anyOf(equalTo((short) -1), greaterThanOrEqualTo((short) 0))); + // Memory pools + Map memoryPools = StreamSupport.stream(stats.getMem().spliterator(), false) + .collect(Collectors.toMap(JvmStats.MemoryPool::getName, Function.identity())); + assertThat(memoryPools, hasKey(GcNames.YOUNG)); + assertThat(memoryPools, hasKey(GcNames.OLD)); + assertThat(memoryPools, hasKey("Metaspace")); + assertThat(memoryPools.keySet(), hasSize(greaterThan(3))); + for (JvmStats.MemoryPool memoryPool : memoryPools.values()) { + assertThat(memoryPool.getUsed().getBytes(), greaterThan(0L)); + } + // Threads JvmStats.Threads threads = stats.getThreads(); assertNotNull(threads); From 18ede6a59d7f6e300c53ff6bb5d40ba85d9cb8fe Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Thu, 24 Oct 2024 17:39:24 +0300 Subject: [PATCH 02/22] Adding breaking change entry for retrievers (#115399) (#115548) --- docs/changelog/115399.yaml | 29 +++++++++++++++++++ .../TextSimilarityRankRetrieverBuilder.java | 2 +- .../xpack/rank/rrf/RRFRetrieverBuilder.java | 2 +- 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 docs/changelog/115399.yaml diff --git a/docs/changelog/115399.yaml b/docs/changelog/115399.yaml new file mode 100644 index 0000000000000..9f69657a5d167 --- /dev/null +++ b/docs/changelog/115399.yaml @@ -0,0 +1,29 @@ +pr: 115399 +summary: Adding breaking change entry for retrievers +area: Search +type: breaking +issues: [] +breaking: + title: Reworking RRF retriever to be evaluated during rewrite phase + area: REST API + details: |- + In this release (8.16), we have introduced major changes to the retrievers framework + and how they can be evaluated, focusing mainly on compound retrievers + like `rrf` and `text_similarity_reranker`, which allowed us to support full + composability (i.e. any retriever can be nested under any compound retriever), + as well as supporting additional search features like collapsing, explaining, + aggregations, and highlighting. + + To ensure consistency, and given that this rework is not available until 8.16, + `rrf` and `text_similarity_reranker` retriever queries would now + throw an exception in a mixed cluster scenario, where there are nodes + both in current or later (i.e. >= 8.16) and previous ( <= 8.15) versions. + + As part of the rework, we have also removed the `_rank` property from + the responses of an `rrf` retriever. + impact: |- + - Users will not be able to use the `rrf` and `text_similarity_reranker` retrievers in a mixed cluster scenario + with previous releases (i.e. prior to 8.16), and the request will throw an `IllegalArgumentException`. + - `_rank` has now been removed from the output of the `rrf` retrievers so trying to directly parse the field + will throw an exception + notable: false diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java index 66631b5aa497a..94b11cdc3f825 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java @@ -81,7 +81,7 @@ public static TextSimilarityRankRetrieverBuilder fromXContent(XContentParser par throw new ParsingException(parser.getTokenLocation(), "unknown retriever [" + TextSimilarityRankBuilder.NAME + "]"); } if (context.clusterSupportsFeature(TEXT_SIMILARITY_RERANKER_COMPOSITION_SUPPORTED) == false) { - throw new UnsupportedOperationException( + throw new IllegalArgumentException( "[text_similarity_reranker] retriever composition feature is not supported by all nodes in the cluster" ); } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index c3c9f19cde6ef..792ff4eac3893 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -83,7 +83,7 @@ public static RRFRetrieverBuilder fromXContent(XContentParser parser, RetrieverP throw new ParsingException(parser.getTokenLocation(), "unknown retriever [" + NAME + "]"); } if (context.clusterSupportsFeature(RRF_RETRIEVER_COMPOSITION_SUPPORTED) == false) { - throw new UnsupportedOperationException("[rrf] retriever composition feature is not supported by all nodes in the cluster"); + throw new IllegalArgumentException("[rrf] retriever composition feature is not supported by all nodes in the cluster"); } if (RRFRankPlugin.RANK_RRF_FEATURE.check(XPackPlugin.getSharedLicenseState()) == false) { throw LicenseUtils.newComplianceException("Reciprocal Rank Fusion (RRF)"); From 766877fbbaccc447ae8014bd265cdcbc7326b9e6 Mon Sep 17 00:00:00 2001 From: Gergely Kalapos Date: Thu, 24 Oct 2024 16:56:50 +0200 Subject: [PATCH 03/22] [otel-data] Add more kubernetes aliases (#115429) (#115556) * Add more kubernetes aliases * Update docs/changelog/115429.yaml * Review feedback --------- Co-authored-by: Elastic Machine (cherry picked from commit 37c7137f39d13ce36785c0bed01f2f058da886f8) --- docs/changelog/115429.yaml | 5 ++ .../semconv-resource-to-ecs@mappings.yaml | 48 +++++++++++++++++++ .../rest-api-spec/test/20_logs_tests.yml | 37 ++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 docs/changelog/115429.yaml diff --git a/docs/changelog/115429.yaml b/docs/changelog/115429.yaml new file mode 100644 index 0000000000000..ddf3c69183000 --- /dev/null +++ b/docs/changelog/115429.yaml @@ -0,0 +1,5 @@ +pr: 115429 +summary: "[otel-data] Add more kubernetes aliases" +area: Data streams +type: bug +issues: [] diff --git a/x-pack/plugin/otel-data/src/main/resources/component-templates/semconv-resource-to-ecs@mappings.yaml b/x-pack/plugin/otel-data/src/main/resources/component-templates/semconv-resource-to-ecs@mappings.yaml index 6645e7d282520..eb5cd6d37af83 100644 --- a/x-pack/plugin/otel-data/src/main/resources/component-templates/semconv-resource-to-ecs@mappings.yaml +++ b/x-pack/plugin/otel-data/src/main/resources/component-templates/semconv-resource-to-ecs@mappings.yaml @@ -56,21 +56,45 @@ template: os.version: type: keyword ignore_above: 1024 + k8s.container.name: + type: keyword + ignore_above: 1024 + k8s.cronjob.name: + type: keyword + ignore_above: 1024 + k8s.daemonset.name: + type: keyword + ignore_above: 1024 k8s.deployment.name: type: keyword ignore_above: 1024 + k8s.job.name: + type: keyword + ignore_above: 1024 k8s.namespace.name: type: keyword ignore_above: 1024 + k8s.node.hostname: + type: keyword + ignore_above: 1024 k8s.node.name: type: keyword ignore_above: 1024 + k8s.node.uid: + type: keyword + ignore_above: 1024 k8s.pod.name: type: keyword ignore_above: 1024 k8s.pod.uid: type: keyword ignore_above: 1024 + k8s.replicaset.name: + type: keyword + ignore_above: 1024 + k8s.statefulset.name: + type: keyword + ignore_above: 1024 service.node.name: type: alias path: resource.attributes.service.instance.id @@ -122,6 +146,30 @@ template: kubernetes.pod.uid: type: alias path: resource.attributes.k8s.pod.uid + kubernetes.container.name: + type: alias + path: resource.attributes.k8s.container.name + kubernetes.cronjob.name: + type: alias + path: resource.attributes.k8s.cronjob.name + kubernetes.job.name: + type: alias + path: resource.attributes.k8s.job.name + kubernetes.statefulset.name: + type: alias + path: resource.attributes.k8s.statefulset.name + kubernetes.daemonset.name: + type: alias + path: resource.attributes.k8s.daemonset.name + kubernetes.replicaset.name: + type: alias + path: resource.attributes.k8s.replicaset.name + kubernetes.node.uid: + type: alias + path: resource.attributes.k8s.node.uid + kubernetes.node.hostname: + type: alias + path: resource.attributes.k8s.node.hostname # Below are non-ECS fields that may be used by Kibana. service.language.name: type: alias diff --git a/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_logs_tests.yml b/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_logs_tests.yml index 6bc0cee78be4f..63966e601a3cb 100644 --- a/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_logs_tests.yml +++ b/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_logs_tests.yml @@ -187,3 +187,40 @@ host.name pass-through: - length: { hits.hits: 1 } - match: { hits.hits.0.fields.resource\.attributes\.host\.name: [ "localhost" ] } - match: { hits.hits.0.fields.host\.name: [ "localhost" ] } +--- +"kubernetes.* -> resource.attributes.k8s.* aliases": + - do: + bulk: + index: logs-generic.otel-default + refresh: true + body: + - create: { } + - "@timestamp": 2024-07-18T14:48:33.467654000Z + data_stream: + dataset: generic.otel + namespace: default + resource: + attributes: + k8s.container.name: myContainerName + k8s.cronjob.name: myCronJobName + k8s.job.name: myJobName + k8s.statefulset.name: myStatefulsetName + k8s.daemonset.name: myDaemonsetName + k8s.replicaset.name: myReplicasetName + k8s.node.uid: myNodeUid + k8s.node.hostname: myNodeHostname + - is_false: errors + - do: + search: + index: logs-generic.otel-default + body: + fields: ["kubernetes.container.name", "kubernetes.cronjob.name", "kubernetes.job.name", "kubernetes.statefulset.name", "kubernetes.daemonset.name", "kubernetes.replicaset.name", "kubernetes.node.uid", "kubernetes.node.hostname" ] + - length: { hits.hits: 1 } + - match: { hits.hits.0.fields.kubernetes\.container\.name : ["myContainerName"] } + - match: { hits.hits.0.fields.kubernetes\.cronjob\.name : ["myCronJobName"] } + - match: { hits.hits.0.fields.kubernetes\.job\.name : ["myJobName"] } + - match: { hits.hits.0.fields.kubernetes\.statefulset\.name : ["myStatefulsetName"] } + - match: { hits.hits.0.fields.kubernetes\.daemonset\.name : ["myDaemonsetName"] } + - match: { hits.hits.0.fields.kubernetes\.replicaset\.name : ["myReplicasetName"] } + - match: { hits.hits.0.fields.kubernetes\.node\.uid : ["myNodeUid"] } + - match: { hits.hits.0.fields.kubernetes\.node\.hostname : ["myNodeHostname"] } From b852e048d4826eca4e550d3b5a8afaf2c7093e3a Mon Sep 17 00:00:00 2001 From: David Kyle Date: Thu, 24 Oct 2024 16:19:52 +0100 Subject: [PATCH 04/22] [ML] Prevent NPE if model assignment is removed while waiting to start (#115430) (#115561) --- docs/changelog/115430.yaml | 5 +++++ .../action/TransportStartTrainedModelDeploymentAction.java | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/115430.yaml diff --git a/docs/changelog/115430.yaml b/docs/changelog/115430.yaml new file mode 100644 index 0000000000000..c2903f7751012 --- /dev/null +++ b/docs/changelog/115430.yaml @@ -0,0 +1,5 @@ +pr: 115430 +summary: Prevent NPE if model assignment is removed while waiting to start +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java index 0bda2de2ce9ae..5fd70ce71cd24 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java @@ -671,7 +671,11 @@ public boolean test(ClusterState clusterState) { deploymentId ).orElse(null); if (trainedModelAssignment == null) { - // Something weird happened, it should NEVER be null... + // The assignment may be null if it was stopped by another action while waiting + this.exception = new ElasticsearchStatusException( + "Error waiting for the model deployment to start. The trained model assignment was removed while waiting", + RestStatus.BAD_REQUEST + ); logger.trace(() -> format("[%s] assignment was null while waiting for state [%s]", deploymentId, waitForState)); return true; } From 2b0bac3b2680c13533be0cf035d026478f262f05 Mon Sep 17 00:00:00 2001 From: Luke Whiting Date: Thu, 24 Oct 2024 16:51:59 +0100 Subject: [PATCH 05/22] Fix for race condition in interval watcher scheduler tests (#115501) (#115567) --- muted-tests.yml | 12 ------------ .../schedule/engine/TickerScheduleEngineTests.java | 12 ++++-------- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 3664277721688..3a80d81330b65 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -351,18 +351,6 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/102992 - class: org.elasticsearch.bootstrap.SpawnerNoBootstrapTests issue: https://github.com/elastic/elasticsearch/issues/114555 -- class: org.elasticsearch.xpack.watcher.trigger.schedule.engine.TickerScheduleEngineTests - method: testAddWithNoLastCheckedTimeButHasActivationTimeExecutesBeforeInitialInterval - issue: https://github.com/elastic/elasticsearch/issues/115339 -- class: org.elasticsearch.xpack.watcher.trigger.schedule.engine.TickerScheduleEngineTests - method: testAddWithLastCheckedTimeExecutesBeforeInitialInterval - issue: https://github.com/elastic/elasticsearch/issues/115356 -- class: org.elasticsearch.xpack.watcher.trigger.schedule.engine.TickerScheduleEngineTests - method: testWatchWithLastCheckedTimeExecutesBeforeInitialInterval - issue: https://github.com/elastic/elasticsearch/issues/115354 -- class: org.elasticsearch.xpack.watcher.trigger.schedule.engine.TickerScheduleEngineTests - method: testWatchWithNoLastCheckedTimeButHasActivationTimeExecutesBeforeInitialInterval - issue: https://github.com/elastic/elasticsearch/issues/115368 - class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT method: test {p0=search.vectors/42_knn_search_int4_flat/Vector similarity with filter only} issue: https://github.com/elastic/elasticsearch/issues/115475 diff --git a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/trigger/schedule/engine/TickerScheduleEngineTests.java b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/trigger/schedule/engine/TickerScheduleEngineTests.java index 9a12b8f394eb2..ef290628c06d5 100644 --- a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/trigger/schedule/engine/TickerScheduleEngineTests.java +++ b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/trigger/schedule/engine/TickerScheduleEngineTests.java @@ -312,14 +312,13 @@ public void testWatchWithLastCheckedTimeExecutesBeforeInitialInterval() throws E engine.register(events -> { for (TriggerEvent ignored : events) { - if (runCount.get() == 0) { + if (runCount.getAndIncrement() == 0) { logger.info("job first fire"); firstLatch.countDown(); } else { logger.info("job second fire"); secondLatch.countDown(); } - runCount.incrementAndGet(); } }); @@ -375,14 +374,13 @@ public void testWatchWithNoLastCheckedTimeButHasActivationTimeExecutesBeforeInit engine.register(events -> { for (TriggerEvent ignored : events) { - if (runCount.get() == 0) { + if (runCount.getAndIncrement() == 0) { logger.info("job first fire"); firstLatch.countDown(); } else { logger.info("job second fire"); secondLatch.countDown(); } - runCount.incrementAndGet(); } }); @@ -428,14 +426,13 @@ public void testAddWithLastCheckedTimeExecutesBeforeInitialInterval() throws Exc engine.register(events -> { for (TriggerEvent ignored : events) { - if (runCount.get() == 0) { + if (runCount.getAndIncrement() == 0) { logger.info("job first fire"); firstLatch.countDown(); } else { logger.info("job second fire"); secondLatch.countDown(); } - runCount.incrementAndGet(); } }); @@ -492,14 +489,13 @@ public void testAddWithNoLastCheckedTimeButHasActivationTimeExecutesBeforeInitia engine.register(events -> { for (TriggerEvent ignored : events) { - if (runCount.get() == 0) { + if (runCount.getAndIncrement() == 0) { logger.info("job first fire"); firstLatch.countDown(); } else { logger.info("job second fire"); secondLatch.countDown(); } - runCount.incrementAndGet(); } }); From 8a6d6927a2a19f660d8ee440aa96304aec38e807 Mon Sep 17 00:00:00 2001 From: Andrei Dan Date: Thu, 24 Oct 2024 17:04:44 +0100 Subject: [PATCH 06/22] [8.x] Allow for queries on _tier to skip shards during coordinator rewrite (#114990) (#115514) * Allow for queries on _tier to skip shards during coordinator rewrite (#114990) The `_tier` metadata field was not used on the coordinator when rewriting queries in order to exclude shards that don't match. This lead to queries in the following form to continue to report failures even though the only unavailable shards were in the tier that was excluded from search (frozen tier in this example): ``` POST testing/_search { "query": { "bool": { "must_not": [ { "term": { "_tier": "data_frozen" } } ] } } } ``` This PR addresses this by having the queries that can execute on `_tier` (term, match, query string, simple query string, prefix, wildcard) execute a coordinator rewrite to exclude the indices that don't match the `_tier` query **before** attempting to reach to the shards (shards, that might not be available and raise errors). Fixes #114910 * Don't use getFirst * test compilation --------- Co-authored-by: Elastic Machine --- docs/changelog/114990.yaml | 6 + .../query/CoordinatorRewriteContext.java | 65 +++++++- .../CoordinatorRewriteContextProvider.java | 9 +- .../index/query/PrefixQueryBuilder.java | 18 ++- .../index/query/QueryRewriteContext.java | 21 +++ .../index/query/TermQueryBuilder.java | 18 ++- .../index/query/TermsQueryBuilder.java | 17 ++- .../index/query/WildcardQueryBuilder.java | 20 ++- .../index/query/PrefixQueryBuilderTests.java | 35 +++++ .../index/query/QueryRewriteContextTests.java | 131 ++++++++++++++++ .../index/query/TermQueryBuilderTests.java | 34 +++++ .../index/query/TermsQueryBuilderTests.java | 33 ++++ .../query/WildcardQueryBuilderTests.java | 34 +++++ .../test/AbstractBuilderTestCase.java | 15 +- .../mapper/DataTierFieldMapper.java | 26 +--- .../core/LocalStateCompositeXPackPlugin.java | 7 +- ...pshotsCanMatchOnCoordinatorIntegTests.java | 143 +++++++++++++++++- 17 files changed, 594 insertions(+), 38 deletions(-) create mode 100644 docs/changelog/114990.yaml create mode 100644 server/src/test/java/org/elasticsearch/index/query/QueryRewriteContextTests.java diff --git a/docs/changelog/114990.yaml b/docs/changelog/114990.yaml new file mode 100644 index 0000000000000..2575942d15bf5 --- /dev/null +++ b/docs/changelog/114990.yaml @@ -0,0 +1,6 @@ +pr: 114990 +summary: Allow for querries on `_tier` to skip shards in the `can_match` phase +area: Search +type: bug +issues: + - 114910 diff --git a/server/src/main/java/org/elasticsearch/index/query/CoordinatorRewriteContext.java b/server/src/main/java/org/elasticsearch/index/query/CoordinatorRewriteContext.java index 3e5deeeebae5d..964358610e074 100644 --- a/server/src/main/java/org/elasticsearch/index/query/CoordinatorRewriteContext.java +++ b/server/src/main/java/org/elasticsearch/index/query/CoordinatorRewriteContext.java @@ -9,17 +9,23 @@ package org.elasticsearch.index.query; +import org.apache.lucene.search.Query; import org.elasticsearch.client.internal.Client; import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.regex.Regex; import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.mapper.ConstantFieldType; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappingLookup; +import org.elasticsearch.index.mapper.ValueFetcher; import org.elasticsearch.index.shard.IndexLongFieldRange; import org.elasticsearch.indices.DateFieldRangeInfo; import org.elasticsearch.xcontent.XContentParserConfiguration; import java.util.Collections; +import java.util.Map; import java.util.function.LongSupplier; /** @@ -30,20 +36,57 @@ * and skip the shards that don't hold queried data. See IndexMetadata for more details. */ public class CoordinatorRewriteContext extends QueryRewriteContext { + + public static final String TIER_FIELD_NAME = "_tier"; + + private static final ConstantFieldType TIER_FIELD_TYPE = new ConstantFieldType(TIER_FIELD_NAME, Map.of()) { + @Override + public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { + throw new UnsupportedOperationException("fetching field values is not supported on the coordinator node"); + } + + @Override + public String typeName() { + return TIER_FIELD_NAME; + } + + @Override + protected boolean matches(String pattern, boolean caseInsensitive, QueryRewriteContext context) { + if (caseInsensitive) { + pattern = Strings.toLowercaseAscii(pattern); + } + + String tierPreference = context.getTierPreference(); + if (tierPreference == null) { + return false; + } + return Regex.simpleMatch(pattern, tierPreference); + } + + @Override + public Query existsQuery(SearchExecutionContext context) { + throw new UnsupportedOperationException("field exists query is not supported on the coordinator node"); + } + }; + private final DateFieldRangeInfo dateFieldRangeInfo; + private final String tier; /** * Context for coordinator search rewrites based on time ranges for the @timestamp field and/or 'event.ingested' field + * * @param parserConfig * @param client * @param nowInMillis * @param dateFieldRangeInfo range and field type info for @timestamp and 'event.ingested' + * @param tier the configured data tier (via the _tier_preference setting) for the index */ public CoordinatorRewriteContext( XContentParserConfiguration parserConfig, Client client, LongSupplier nowInMillis, - DateFieldRangeInfo dateFieldRangeInfo + DateFieldRangeInfo dateFieldRangeInfo, + String tier ) { super( parserConfig, @@ -63,10 +106,12 @@ public CoordinatorRewriteContext( null ); this.dateFieldRangeInfo = dateFieldRangeInfo; + this.tier = tier; } /** - * @param fieldName Must be one of DataStream.TIMESTAMP_FIELD_FIELD or IndexMetadata.EVENT_INGESTED_FIELD_NAME + * @param fieldName Must be one of DataStream.TIMESTAMP_FIELD_FIELD, IndexMetadata.EVENT_INGESTED_FIELD_NAME, or + * DataTierFiledMapper.NAME * @return MappedField with type for the field. Returns null if fieldName is not one of the allowed field names. */ @Nullable @@ -75,6 +120,8 @@ public MappedFieldType getFieldType(String fieldName) { return dateFieldRangeInfo.timestampFieldType(); } else if (IndexMetadata.EVENT_INGESTED_FIELD_NAME.equals(fieldName)) { return dateFieldRangeInfo.eventIngestedFieldType(); + } else if (TIER_FIELD_NAME.equals(fieldName)) { + return TIER_FIELD_TYPE; } else { return null; } @@ -99,4 +146,18 @@ public IndexLongFieldRange getFieldRange(String fieldName) { public CoordinatorRewriteContext convertToCoordinatorRewriteContext() { return this; } + + @Override + public String getTierPreference() { + // dominant branch first (tier preference is configured) + return tier.isEmpty() == false ? tier : null; + } + + /** + * We're holding on to the index tier in the context as otherwise we'd need + * to re-parse it from the index settings when evaluating the _tier field. + */ + public String tier() { + return tier; + } } diff --git a/server/src/main/java/org/elasticsearch/index/query/CoordinatorRewriteContextProvider.java b/server/src/main/java/org/elasticsearch/index/query/CoordinatorRewriteContextProvider.java index 67042a98db42a..feee429c3318b 100644 --- a/server/src/main/java/org/elasticsearch/index/query/CoordinatorRewriteContextProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/CoordinatorRewriteContextProvider.java @@ -52,6 +52,12 @@ public CoordinatorRewriteContext getCoordinatorRewriteContext(Index index) { return null; } DateFieldRangeInfo dateFieldRangeInfo = mappingSupplier.apply(index); + // we've now added a coordinator rewrite based on the _tier field so the requirement + // for the timestamps fields to be present is artificial (we could do a coordinator + // rewrite only based on the _tier field) and we might decide to remove this artificial + // limitation to enable coordinator rewrites based on _tier for hot and warm indices + // (currently the _tier coordinator rewrite is only available for mounted and partially mounted + // indices) if (dateFieldRangeInfo == null) { return null; } @@ -74,7 +80,8 @@ public CoordinatorRewriteContext getCoordinatorRewriteContext(Index index) { parserConfig, client, nowInMillis, - new DateFieldRangeInfo(timestampFieldType, timestampRange, dateFieldRangeInfo.eventIngestedFieldType(), eventIngestedRange) + new DateFieldRangeInfo(timestampFieldType, timestampRange, dateFieldRangeInfo.eventIngestedFieldType(), eventIngestedRange), + indexMetadata.getTierPreference().isEmpty() == false ? indexMetadata.getTierPreference().get(0) : "" ); } } diff --git a/server/src/main/java/org/elasticsearch/index/query/PrefixQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/PrefixQueryBuilder.java index 24817b778a4da..fcf986191da23 100644 --- a/server/src/main/java/org/elasticsearch/index/query/PrefixQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/PrefixQueryBuilder.java @@ -20,6 +20,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.ConstantFieldType; import org.elasticsearch.index.mapper.MappedFieldType; @@ -189,11 +190,24 @@ public String getWriteableName() { } @Override - protected QueryBuilder doIndexMetadataRewrite(QueryRewriteContext context) throws IOException { + protected QueryBuilder doIndexMetadataRewrite(QueryRewriteContext context) { MappedFieldType fieldType = context.getFieldType(this.fieldName); if (fieldType == null) { return new MatchNoneQueryBuilder("The \"" + getName() + "\" query is against a field that does not exist"); - } else if (fieldType instanceof ConstantFieldType constantFieldType) { + } + return maybeRewriteBasedOnConstantFields(fieldType, context); + } + + @Override + protected QueryBuilder doCoordinatorRewrite(CoordinatorRewriteContext coordinatorRewriteContext) { + MappedFieldType fieldType = coordinatorRewriteContext.getFieldType(this.fieldName); + // we don't rewrite a null field type to `match_none` on the coordinator because the coordinator has access + // to only a subset of fields see {@link CoordinatorRewriteContext#getFieldType} + return maybeRewriteBasedOnConstantFields(fieldType, coordinatorRewriteContext); + } + + private QueryBuilder maybeRewriteBasedOnConstantFields(@Nullable MappedFieldType fieldType, QueryRewriteContext context) { + if (fieldType instanceof ConstantFieldType constantFieldType) { // This logic is correct for all field types, but by only applying it to constant // fields we also have the guarantee that it doesn't perform I/O, which is important // since rewrites might happen on a network thread. diff --git a/server/src/main/java/org/elasticsearch/index/query/QueryRewriteContext.java b/server/src/main/java/org/elasticsearch/index/query/QueryRewriteContext.java index 8808cd79072f6..fce74aa60ab16 100644 --- a/server/src/main/java/org/elasticsearch/index/query/QueryRewriteContext.java +++ b/server/src/main/java/org/elasticsearch/index/query/QueryRewriteContext.java @@ -11,9 +11,12 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ResolvedIndices; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.routing.allocation.DataTier; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.CountDown; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.Index; @@ -407,4 +410,22 @@ public ResolvedIndices getResolvedIndices() { public PointInTimeBuilder getPointInTimeBuilder() { return pit; } + + /** + * Retrieve the first tier preference from the index setting. If the setting is not + * present, then return null. + */ + @Nullable + public String getTierPreference() { + Settings settings = getIndexSettings().getSettings(); + String value = DataTier.TIER_PREFERENCE_SETTING.get(settings); + + if (Strings.hasText(value) == false) { + return null; + } + + // Tier preference can be a comma-delimited list of tiers, ordered by preference + // It was decided we should only test the first of these potentially multiple preferences. + return value.split(",")[0].trim(); + } } diff --git a/server/src/main/java/org/elasticsearch/index/query/TermQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/TermQueryBuilder.java index 2978b3bfbf69c..113f66f3e58de 100644 --- a/server/src/main/java/org/elasticsearch/index/query/TermQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/TermQueryBuilder.java @@ -17,6 +17,7 @@ import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.mapper.ConstantFieldType; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.xcontent.ParseField; @@ -170,11 +171,24 @@ protected void addExtraXContent(XContentBuilder builder, Params params) throws I } @Override - protected QueryBuilder doIndexMetadataRewrite(QueryRewriteContext context) throws IOException { + protected QueryBuilder doIndexMetadataRewrite(QueryRewriteContext context) { MappedFieldType fieldType = context.getFieldType(this.fieldName); if (fieldType == null) { return new MatchNoneQueryBuilder("The \"" + getName() + "\" query is against a field that does not exist"); - } else if (fieldType instanceof ConstantFieldType constantFieldType) { + } + return maybeRewriteBasedOnConstantFields(fieldType, context); + } + + @Override + protected QueryBuilder doCoordinatorRewrite(CoordinatorRewriteContext coordinatorRewriteContext) { + MappedFieldType fieldType = coordinatorRewriteContext.getFieldType(this.fieldName); + // we don't rewrite a null field type to `match_none` on the coordinator because the coordinator has access + // to only a subset of fields see {@link CoordinatorRewriteContext#getFieldType} + return maybeRewriteBasedOnConstantFields(fieldType, coordinatorRewriteContext); + } + + private QueryBuilder maybeRewriteBasedOnConstantFields(@Nullable MappedFieldType fieldType, QueryRewriteContext context) { + if (fieldType instanceof ConstantFieldType constantFieldType) { // This logic is correct for all field types, but by only applying it to constant // fields we also have the guarantee that it doesn't perform I/O, which is important // since rewrites might happen on a network thread. diff --git a/server/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java index 4035bc02fba79..dec4090a3e6bd 100644 --- a/server/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java @@ -393,11 +393,24 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws } @Override - protected QueryBuilder doIndexMetadataRewrite(QueryRewriteContext context) throws IOException { + protected QueryBuilder doIndexMetadataRewrite(QueryRewriteContext context) { MappedFieldType fieldType = context.getFieldType(this.fieldName); if (fieldType == null) { return new MatchNoneQueryBuilder("The \"" + getName() + "\" query is against a field that does not exist"); - } else if (fieldType instanceof ConstantFieldType constantFieldType) { + } + return maybeRewriteBasedOnConstantFields(fieldType, context); + } + + @Override + protected QueryBuilder doCoordinatorRewrite(CoordinatorRewriteContext coordinatorRewriteContext) { + MappedFieldType fieldType = coordinatorRewriteContext.getFieldType(this.fieldName); + // we don't rewrite a null field type to `match_none` on the coordinator because the coordinator has access + // to only a subset of fields see {@link CoordinatorRewriteContext#getFieldType} + return maybeRewriteBasedOnConstantFields(fieldType, coordinatorRewriteContext); + } + + private QueryBuilder maybeRewriteBasedOnConstantFields(@Nullable MappedFieldType fieldType, QueryRewriteContext context) { + if (fieldType instanceof ConstantFieldType constantFieldType) { // This logic is correct for all field types, but by only applying it to constant // fields we also have the guarantee that it doesn't perform I/O, which is important // since rewrites might happen on a network thread. diff --git a/server/src/main/java/org/elasticsearch/index/query/WildcardQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/WildcardQueryBuilder.java index f287812ebbc10..419195e5e5ba5 100644 --- a/server/src/main/java/org/elasticsearch/index/query/WildcardQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/WildcardQueryBuilder.java @@ -20,6 +20,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.mapper.ConstantFieldType; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.support.QueryParsers; @@ -200,11 +201,24 @@ public static WildcardQueryBuilder fromXContent(XContentParser parser) throws IO } @Override - protected QueryBuilder doIndexMetadataRewrite(QueryRewriteContext context) throws IOException { + protected QueryBuilder doIndexMetadataRewrite(QueryRewriteContext context) { MappedFieldType fieldType = context.getFieldType(this.fieldName); if (fieldType == null) { - return new MatchNoneQueryBuilder("The \"" + getName() + "\" query is against a field that does not exist"); - } else if (fieldType instanceof ConstantFieldType constantFieldType) { + return new MatchNoneQueryBuilder("The \"" + getName() + "\" query is against a field that does not exist"); + } + return maybeRewriteBasedOnConstantFields(fieldType, context); + } + + @Override + protected QueryBuilder doCoordinatorRewrite(CoordinatorRewriteContext coordinatorRewriteContext) { + MappedFieldType fieldType = coordinatorRewriteContext.getFieldType(this.fieldName); + // we don't rewrite a null field type to `match_none` on the coordinator because the coordinator has access + // to only a subset of fields see {@link CoordinatorRewriteContext#getFieldType} + return maybeRewriteBasedOnConstantFields(fieldType, coordinatorRewriteContext); + } + + private QueryBuilder maybeRewriteBasedOnConstantFields(@Nullable MappedFieldType fieldType, QueryRewriteContext context) { + if (fieldType instanceof ConstantFieldType constantFieldType) { // This logic is correct for all field types, but by only applying it to constant // fields we also have the guarantee that it doesn't perform I/O, which is important // since rewrites might happen on a network thread. diff --git a/server/src/test/java/org/elasticsearch/index/query/PrefixQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/PrefixQueryBuilderTests.java index 0260fa2ef4cc8..918815f2a4f77 100644 --- a/server/src/test/java/org/elasticsearch/index/query/PrefixQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/PrefixQueryBuilderTests.java @@ -17,7 +17,9 @@ import org.apache.lucene.search.Query; import org.elasticsearch.common.ParsingException; import org.elasticsearch.core.Strings; +import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.test.AbstractQueryTestCase; +import org.hamcrest.CoreMatchers; import org.hamcrest.Matchers; import java.io.IOException; @@ -175,4 +177,37 @@ public void testMustRewrite() throws IOException { IllegalStateException e = expectThrows(IllegalStateException.class, () -> queryBuilder.toQuery(context)); assertEquals("Rewrite first", e.getMessage()); } + + public void testCoordinatorTierRewriteToMatchAll() throws IOException { + QueryBuilder query = new PrefixQueryBuilder("_tier", "data_fro"); + final String timestampFieldName = "@timestamp"; + long minTimestamp = 1685714000000L; + long maxTimestamp = 1685715000000L; + final CoordinatorRewriteContext coordinatorRewriteContext = createCoordinatorRewriteContext( + new DateFieldMapper.DateFieldType(timestampFieldName), + minTimestamp, + maxTimestamp, + "data_frozen" + ); + + QueryBuilder rewritten = query.rewrite(coordinatorRewriteContext); + assertThat(rewritten, CoreMatchers.instanceOf(MatchAllQueryBuilder.class)); + } + + public void testCoordinatorTierRewriteToMatchNone() throws IOException { + QueryBuilder query = QueryBuilders.boolQuery().mustNot(new PrefixQueryBuilder("_tier", "data_fro")); + final String timestampFieldName = "@timestamp"; + long minTimestamp = 1685714000000L; + long maxTimestamp = 1685715000000L; + final CoordinatorRewriteContext coordinatorRewriteContext = createCoordinatorRewriteContext( + new DateFieldMapper.DateFieldType(timestampFieldName), + minTimestamp, + maxTimestamp, + "data_frozen" + ); + + QueryBuilder rewritten = query.rewrite(coordinatorRewriteContext); + assertThat(rewritten, CoreMatchers.instanceOf(MatchNoneQueryBuilder.class)); + } + } diff --git a/server/src/test/java/org/elasticsearch/index/query/QueryRewriteContextTests.java b/server/src/test/java/org/elasticsearch/index/query/QueryRewriteContextTests.java new file mode 100644 index 0000000000000..0b2a8ab4856b3 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/query/QueryRewriteContextTests.java @@ -0,0 +1,131 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.query; + +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.routing.allocation.DataTier; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.MappingLookup; +import org.elasticsearch.indices.DateFieldRangeInfo; +import org.elasticsearch.test.ESTestCase; + +import java.util.Collections; + +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; + +public class QueryRewriteContextTests extends ESTestCase { + + public void testGetTierPreference() { + { + // cold->hot tier preference + IndexMetadata metadata = newIndexMeta( + "index", + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()) + .put(DataTier.TIER_PREFERENCE, "data_cold,data_warm,data_hot") + .build() + ); + QueryRewriteContext context = new QueryRewriteContext( + parserConfig(), + null, + System::currentTimeMillis, + null, + MappingLookup.EMPTY, + Collections.emptyMap(), + new IndexSettings(metadata, Settings.EMPTY), + null, + null, + null, + null, + null, + null, + null, + null + ); + + assertThat(context.getTierPreference(), is("data_cold")); + } + + { + // missing tier preference + IndexMetadata metadata = newIndexMeta( + "index", + Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()).build() + ); + QueryRewriteContext context = new QueryRewriteContext( + parserConfig(), + null, + System::currentTimeMillis, + null, + MappingLookup.EMPTY, + Collections.emptyMap(), + new IndexSettings(metadata, Settings.EMPTY), + null, + null, + null, + null, + null, + null, + null, + null + ); + + assertThat(context.getTierPreference(), is(nullValue())); + } + + { + // coordinator rewrite context + IndexMetadata metadata = newIndexMeta( + "index", + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()) + .put(DataTier.TIER_PREFERENCE, "data_cold,data_warm,data_hot") + .build() + ); + CoordinatorRewriteContext coordinatorRewriteContext = new CoordinatorRewriteContext( + parserConfig(), + null, + System::currentTimeMillis, + new DateFieldRangeInfo(null, null, new DateFieldMapper.DateFieldType(IndexMetadata.EVENT_INGESTED_FIELD_NAME), null), + "data_frozen" + ); + + assertThat(coordinatorRewriteContext.getTierPreference(), is("data_frozen")); + } + { + // coordinator rewrite context empty tier + IndexMetadata metadata = newIndexMeta( + "index", + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()) + .put(DataTier.TIER_PREFERENCE, "data_cold,data_warm,data_hot") + .build() + ); + CoordinatorRewriteContext coordinatorRewriteContext = new CoordinatorRewriteContext( + parserConfig(), + null, + System::currentTimeMillis, + new DateFieldRangeInfo(null, null, new DateFieldMapper.DateFieldType(IndexMetadata.EVENT_INGESTED_FIELD_NAME), null), + "" + ); + + assertThat(coordinatorRewriteContext.getTierPreference(), is(nullValue())); + } + } + + public static IndexMetadata newIndexMeta(String name, Settings indexSettings) { + return IndexMetadata.builder(name).settings(indexSettings(IndexVersion.current(), 1, 1).put(indexSettings)).build(); + } + +} diff --git a/server/src/test/java/org/elasticsearch/index/query/TermQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/TermQueryBuilderTests.java index b5cf42cf5df28..bbac216754eed 100644 --- a/server/src/test/java/org/elasticsearch/index/query/TermQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/TermQueryBuilderTests.java @@ -17,9 +17,11 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.elasticsearch.common.ParsingException; +import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.FieldTypeTestCase; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.xcontent.json.JsonStringEncoder; +import org.hamcrest.CoreMatchers; import java.io.IOException; import java.util.Locale; @@ -238,4 +240,36 @@ public void testLongTerm() throws IOException { { "term" : { "foo" : "%s" } }""", longTerm))); assertThat(e.getMessage(), containsString("term starting with [aaaaa")); } + + public void testCoordinatorTierRewriteToMatchAll() throws IOException { + QueryBuilder query = new TermQueryBuilder("_tier", "data_frozen"); + final String timestampFieldName = "@timestamp"; + long minTimestamp = 1685714000000L; + long maxTimestamp = 1685715000000L; + final CoordinatorRewriteContext coordinatorRewriteContext = createCoordinatorRewriteContext( + new DateFieldMapper.DateFieldType(timestampFieldName), + minTimestamp, + maxTimestamp, + "data_frozen" + ); + + QueryBuilder rewritten = query.rewrite(coordinatorRewriteContext); + assertThat(rewritten, CoreMatchers.instanceOf(MatchAllQueryBuilder.class)); + } + + public void testCoordinatorTierRewriteToMatchNone() throws IOException { + QueryBuilder query = QueryBuilders.boolQuery().mustNot(new TermQueryBuilder("_tier", "data_frozen")); + final String timestampFieldName = "@timestamp"; + long minTimestamp = 1685714000000L; + long maxTimestamp = 1685715000000L; + final CoordinatorRewriteContext coordinatorRewriteContext = createCoordinatorRewriteContext( + new DateFieldMapper.DateFieldType(timestampFieldName), + minTimestamp, + maxTimestamp, + "data_frozen" + ); + + QueryBuilder rewritten = query.rewrite(coordinatorRewriteContext); + assertThat(rewritten, CoreMatchers.instanceOf(MatchNoneQueryBuilder.class)); + } } diff --git a/server/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java index 1ce69355379de..2faee7bc89eb5 100644 --- a/server/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.index.get.GetResult; +import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.indices.TermsLookup; import org.elasticsearch.test.AbstractQueryTestCase; import org.elasticsearch.xcontent.XContentBuilder; @@ -317,6 +318,38 @@ public void testLongTerm() throws IOException { assertThat(e.getMessage(), containsString("term starting with [aaaaa")); } + public void testCoordinatorTierRewriteToMatchAll() throws IOException { + QueryBuilder query = new TermsQueryBuilder("_tier", "data_frozen"); + final String timestampFieldName = "@timestamp"; + long minTimestamp = 1685714000000L; + long maxTimestamp = 1685715000000L; + final CoordinatorRewriteContext coordinatorRewriteContext = createCoordinatorRewriteContext( + new DateFieldMapper.DateFieldType(timestampFieldName), + minTimestamp, + maxTimestamp, + "data_frozen" + ); + + QueryBuilder rewritten = query.rewrite(coordinatorRewriteContext); + assertThat(rewritten, CoreMatchers.instanceOf(MatchAllQueryBuilder.class)); + } + + public void testCoordinatorTierRewriteToMatchNone() throws IOException { + QueryBuilder query = QueryBuilders.boolQuery().mustNot(new TermsQueryBuilder("_tier", "data_frozen")); + final String timestampFieldName = "@timestamp"; + long minTimestamp = 1685714000000L; + long maxTimestamp = 1685715000000L; + final CoordinatorRewriteContext coordinatorRewriteContext = createCoordinatorRewriteContext( + new DateFieldMapper.DateFieldType(timestampFieldName), + minTimestamp, + maxTimestamp, + "data_frozen" + ); + + QueryBuilder rewritten = query.rewrite(coordinatorRewriteContext); + assertThat(rewritten, CoreMatchers.instanceOf(MatchNoneQueryBuilder.class)); + } + @Override protected QueryBuilder parseQuery(XContentParser parser) throws IOException { QueryBuilder query = super.parseQuery(parser); diff --git a/server/src/test/java/org/elasticsearch/index/query/WildcardQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/WildcardQueryBuilderTests.java index 7ee6d75a08736..182bd4d6b5b86 100644 --- a/server/src/test/java/org/elasticsearch/index/query/WildcardQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/WildcardQueryBuilderTests.java @@ -15,7 +15,9 @@ import org.apache.lucene.search.WildcardQuery; import org.elasticsearch.common.ParsingException; import org.elasticsearch.core.Strings; +import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.test.AbstractQueryTestCase; +import org.hamcrest.CoreMatchers; import java.io.IOException; import java.util.HashMap; @@ -166,4 +168,36 @@ public void testMustRewrite() throws IOException { IllegalStateException e = expectThrows(IllegalStateException.class, () -> queryBuilder.toQuery(context)); assertEquals("Rewrite first", e.getMessage()); } + + public void testCoordinatorTierRewriteToMatchAll() throws IOException { + QueryBuilder query = new WildcardQueryBuilder("_tier", "data_fr*"); + final String timestampFieldName = "@timestamp"; + long minTimestamp = 1685714000000L; + long maxTimestamp = 1685715000000L; + final CoordinatorRewriteContext coordinatorRewriteContext = createCoordinatorRewriteContext( + new DateFieldMapper.DateFieldType(timestampFieldName), + minTimestamp, + maxTimestamp, + "data_frozen" + ); + + QueryBuilder rewritten = query.rewrite(coordinatorRewriteContext); + assertThat(rewritten, CoreMatchers.instanceOf(MatchAllQueryBuilder.class)); + } + + public void testCoordinatorTierRewriteToMatchNone() throws IOException { + QueryBuilder query = QueryBuilders.boolQuery().mustNot(new WildcardQueryBuilder("_tier", "data_fro*")); + final String timestampFieldName = "@timestamp"; + long minTimestamp = 1685714000000L; + long maxTimestamp = 1685715000000L; + final CoordinatorRewriteContext coordinatorRewriteContext = createCoordinatorRewriteContext( + new DateFieldMapper.DateFieldType(timestampFieldName), + minTimestamp, + maxTimestamp, + "data_frozen" + ); + + QueryBuilder rewritten = query.rewrite(coordinatorRewriteContext); + assertThat(rewritten, CoreMatchers.instanceOf(MatchNoneQueryBuilder.class)); + } } diff --git a/test/framework/src/main/java/org/elasticsearch/test/AbstractBuilderTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/AbstractBuilderTestCase.java index 77ff194e2681d..0543bc7a78f8b 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/AbstractBuilderTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/AbstractBuilderTestCase.java @@ -342,6 +342,15 @@ protected static CoordinatorRewriteContext createCoordinatorRewriteContext( return serviceHolder.createCoordinatorContext(dateFieldType, min, max); } + protected static CoordinatorRewriteContext createCoordinatorRewriteContext( + DateFieldMapper.DateFieldType dateFieldType, + long min, + long max, + String tier + ) { + return serviceHolder.createCoordinatorContext(dateFieldType, min, max, tier); + } + protected static DataRewriteContext dataRewriteContext() { return serviceHolder.createDataContext(); } @@ -625,13 +634,17 @@ QueryRewriteContext createQueryRewriteContext() { } CoordinatorRewriteContext createCoordinatorContext(DateFieldMapper.DateFieldType dateFieldType, long min, long max) { + return createCoordinatorContext(dateFieldType, min, max, ""); + } + + CoordinatorRewriteContext createCoordinatorContext(DateFieldMapper.DateFieldType dateFieldType, long min, long max, String tier) { DateFieldRangeInfo timestampFieldInfo = new DateFieldRangeInfo( dateFieldType, IndexLongFieldRange.NO_SHARDS.extendWithShardRange(0, 1, ShardLongFieldRange.of(min, max)), dateFieldType, IndexLongFieldRange.NO_SHARDS.extendWithShardRange(0, 1, ShardLongFieldRange.of(min, max)) ); - return new CoordinatorRewriteContext(parserConfiguration, this.client, () -> nowInMillis, timestampFieldInfo); + return new CoordinatorRewriteContext(parserConfiguration, this.client, () -> nowInMillis, timestampFieldInfo, tier); } DataRewriteContext createDataContext() { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/cluster/routing/allocation/mapper/DataTierFieldMapper.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/cluster/routing/allocation/mapper/DataTierFieldMapper.java index 527f8d1c176ec..0e185a90ed39b 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/cluster/routing/allocation/mapper/DataTierFieldMapper.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/cluster/routing/allocation/mapper/DataTierFieldMapper.java @@ -10,10 +10,8 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; -import org.elasticsearch.cluster.routing.allocation.DataTier; import org.elasticsearch.common.Strings; import org.elasticsearch.common.regex.Regex; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.mapper.ConstantFieldType; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MetadataFieldMapper; @@ -55,7 +53,7 @@ protected boolean matches(String pattern, boolean caseInsensitive, QueryRewriteC pattern = Strings.toLowercaseAscii(pattern); } - String tierPreference = getTierPreference(context); + String tierPreference = context.getTierPreference(); if (tierPreference == null) { return false; } @@ -64,7 +62,7 @@ protected boolean matches(String pattern, boolean caseInsensitive, QueryRewriteC @Override public Query existsQuery(SearchExecutionContext context) { - String tierPreference = getTierPreference(context); + String tierPreference = context.getTierPreference(); if (tierPreference == null) { return new MatchNoDocsQuery(); } @@ -77,26 +75,9 @@ public ValueFetcher valueFetcher(SearchExecutionContext context, String format) throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); } - String tierPreference = getTierPreference(context); + String tierPreference = context.getTierPreference(); return tierPreference == null ? ValueFetcher.EMPTY : ValueFetcher.singleton(tierPreference); } - - /** - * Retrieve the first tier preference from the index setting. If the setting is not - * present, then return null. - */ - private static String getTierPreference(QueryRewriteContext context) { - Settings settings = context.getIndexSettings().getSettings(); - String value = DataTier.TIER_PREFERENCE_SETTING.get(settings); - - if (Strings.hasText(value) == false) { - return null; - } - - // Tier preference can be a comma-delimited list of tiers, ordered by preference - // It was decided we should only test the first of these potentially multiple preferences. - return value.split(",")[0].trim(); - } } public DataTierFieldMapper() { @@ -107,4 +88,5 @@ public DataTierFieldMapper() { protected String contentType() { return CONTENT_TYPE; } + } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java index 918976c0d3db8..1f2c89c473a62 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java @@ -637,10 +637,15 @@ public Collection getSystemIndexDescriptors(Settings sett @Override public Map getMetadataMappers() { - return filterPlugins(MapperPlugin.class).stream() + Map pluginsMetadataMappers = filterPlugins(MapperPlugin.class).stream() .map(MapperPlugin::getMetadataMappers) .flatMap(map -> map.entrySet().stream()) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + // the xpack plugin itself exposes a metadata mapper so let's include it as well + Map metadataMappersIncludingXPackPlugin = new HashMap<>(pluginsMetadataMappers); + metadataMappersIncludingXPackPlugin.putAll(super.getMetadataMappers()); + return metadataMappersIncludingXPackPlugin; } @Override diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsCanMatchOnCoordinatorIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsCanMatchOnCoordinatorIntegTests.java index faf41e7e655a8..1be31df9e693d 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsCanMatchOnCoordinatorIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsCanMatchOnCoordinatorIntegTests.java @@ -20,14 +20,18 @@ import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.index.Index; import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.index.query.TermQueryBuilder; +import org.elasticsearch.index.query.TermsQueryBuilder; import org.elasticsearch.index.shard.IndexLongFieldRange; import org.elasticsearch.indices.DateFieldRangeInfo; import org.elasticsearch.indices.IndicesService; @@ -36,6 +40,7 @@ import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.NodeRoles; import org.elasticsearch.test.junit.annotations.TestIssueLogging; import org.elasticsearch.test.transport.MockTransportService; import org.elasticsearch.xcontent.XContentFactory; @@ -51,6 +56,7 @@ import java.util.stream.Collectors; import static org.elasticsearch.cluster.metadata.IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_SETTING; +import static org.elasticsearch.cluster.node.DiscoveryNode.getRolesFromSettings; import static org.elasticsearch.index.IndexSettings.INDEX_SOFT_DELETES_SETTING; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; @@ -76,14 +82,24 @@ protected Collection> nodePlugins() { @Override protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { final Settings initialSettings = super.nodeSettings(nodeOrdinal, otherSettings); - if (DiscoveryNode.canContainData(otherSettings)) { + + if (DiscoveryNode.canContainData(otherSettings) + && getRolesFromSettings(otherSettings).stream() + .anyMatch( + nr -> nr.roleName().equals(DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE.roleName()) + || nr.roleName().equals(DiscoveryNodeRole.DATA_ROLE.roleName()) + )) { return Settings.builder() .put(initialSettings) // Have a shared cache of reasonable size available on each node because tests randomize over frozen and cold allocation .put(SharedBlobCacheService.SHARED_CACHE_SIZE_SETTING.getKey(), ByteSizeValue.ofMb(randomLongBetween(1, 10))) .build(); } else { - return initialSettings; + return Settings.builder() + .put(initialSettings) + // Have a shared cache of reasonable size available on each node because tests randomize over frozen and cold allocation + .putNull(SharedBlobCacheService.SHARED_CACHE_SIZE_SETTING.getKey()) + .build(); } } @@ -955,6 +971,129 @@ public void testSearchableSnapshotShardsThatHaveMatchingDataAreNotSkippedOnTheCo } } + public void testCanMatchSkipsPartiallyMountedIndicesWhenFrozenNodesUnavailable() throws Exception { + internalCluster().startMasterOnlyNode(); + internalCluster().startCoordinatingOnlyNode(Settings.EMPTY); + final String dataNodeHoldingRegularIndex = internalCluster().startNode( + NodeRoles.onlyRole(DiscoveryNodeRole.DATA_CONTENT_NODE_ROLE) + ); + final String dataNodeHoldingSearchableSnapshot = internalCluster().startNode( + NodeRoles.onlyRole(DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE) + ); + + final String indexToMountInFrozen = "frozen-" + randomAlphaOfLength(10).toLowerCase(Locale.ROOT); + final int shardCount = randomIntBetween(2, 3); + createIndexWithTimestampAndEventIngested(indexToMountInFrozen, shardCount, Settings.EMPTY); + final int numDocsFrozenIndex = between(350, 1000); + indexRandomDocs(indexToMountInFrozen, numDocsFrozenIndex); + + final String regularIndex = "regular-" + randomAlphaOfLength(10).toLowerCase(Locale.ROOT); + createIndexWithTimestampAndEventIngested( + regularIndex, + shardCount, + Settings.builder() + .put(INDEX_ROUTING_REQUIRE_GROUP_SETTING.getConcreteSettingForNamespace("_name").getKey(), dataNodeHoldingRegularIndex) + .build() + ); + int numDocsRegularIndex = between(100, 1000); + indexDocumentsWithTimestampAndEventIngestedDates(regularIndex, numDocsRegularIndex, TIMESTAMP_TEMPLATE_WITHIN_RANGE); + + final String repositoryName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT); + createRepository(repositoryName, "mock"); + + final SnapshotId snapshotId = createSnapshot(repositoryName, "snapshot-1", List.of(indexToMountInFrozen)).snapshotId(); + assertAcked(indicesAdmin().prepareDelete(indexToMountInFrozen)); + + final String partiallyMountedIndex = randomAlphaOfLength(10).toLowerCase(Locale.ROOT); + + final MountSearchableSnapshotRequest mountRequest = new MountSearchableSnapshotRequest( + TEST_REQUEST_TIMEOUT, + partiallyMountedIndex, + repositoryName, + snapshotId.getName(), + indexToMountInFrozen, + Settings.EMPTY, + Strings.EMPTY_ARRAY, + false, + MountSearchableSnapshotRequest.Storage.SHARED_CACHE + ); + client().execute(MountSearchableSnapshotAction.INSTANCE, mountRequest).actionGet(); + + ensureGreen(regularIndex, partiallyMountedIndex); + + // Stop the node holding the searchable snapshots, and since we defined + // the index allocation criteria to require the searchable snapshot + // index to be allocated in that node, the shards should remain unassigned + internalCluster().stopNode(dataNodeHoldingSearchableSnapshot); + final IndexMetadata partiallyMountedIndexMetadata = getIndexMetadata(partiallyMountedIndex); + waitUntilAllShardsAreUnassigned(partiallyMountedIndexMetadata.getIndex()); + + { + // term query + TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("_tier", "data_content"); + List indicesToSearch = List.of(regularIndex, partiallyMountedIndex); + SearchRequest request = new SearchRequest().indices(indicesToSearch.toArray(new String[0])) + .source(new SearchSourceBuilder().query(termQueryBuilder)); + + assertResponse(client().search(request), searchResponse -> { + // as we excluded the frozen tier we shouldn't get any failures + assertThat(searchResponse.getFailedShards(), equalTo(0)); + // we should be receiving all the hits from the index that's in the data_content tier + assertNotNull(searchResponse.getHits().getTotalHits()); + assertThat(searchResponse.getHits().getTotalHits().value, equalTo((long) numDocsRegularIndex)); + }); + } + + { + // termS query + TermsQueryBuilder termsQueryBuilder = QueryBuilders.termsQuery("_tier", "data_hot", "data_content"); + List indicesToSearch = List.of(regularIndex, partiallyMountedIndex); + SearchRequest request = new SearchRequest().indices(indicesToSearch.toArray(new String[0])) + .source(new SearchSourceBuilder().query(termsQueryBuilder)); + + assertResponse(client().search(request), searchResponse -> { + // as we excluded the frozen tier we shouldn't get any failures + assertThat(searchResponse.getFailedShards(), equalTo(0)); + // we should be receiving all the hits from the index that's in the data_content tier + assertNotNull(searchResponse.getHits().getTotalHits()); + assertThat(searchResponse.getHits().getTotalHits().value, equalTo((long) numDocsRegularIndex)); + }); + } + + { + // bool term query + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery("_tier", "data_frozen")); + List indicesToSearch = List.of(regularIndex, partiallyMountedIndex); + SearchRequest request = new SearchRequest().indices(indicesToSearch.toArray(new String[0])) + .source(new SearchSourceBuilder().query(boolQueryBuilder)); + + assertResponse(client().search(request), searchResponse -> { + // as we excluded the frozen tier we shouldn't get any failures + assertThat(searchResponse.getFailedShards(), equalTo(0)); + // we should be receiving all the hits from the index that's in the data_content tier + assertNotNull(searchResponse.getHits().getTotalHits()); + assertThat(searchResponse.getHits().getTotalHits().value, equalTo((long) numDocsRegularIndex)); + }); + } + + { + // bool prefix, wildcard + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery() + .mustNot(randomFrom(QueryBuilders.wildcardQuery("_tier", "dat*ozen"), QueryBuilders.prefixQuery("_tier", "data_fro"))); + List indicesToSearch = List.of(regularIndex, partiallyMountedIndex); + SearchRequest request = new SearchRequest().indices(indicesToSearch.toArray(new String[0])) + .source(new SearchSourceBuilder().query(boolQueryBuilder)); + + assertResponse(client().search(request), searchResponse -> { + // as we excluded the frozen tier we shouldn't get any failures + assertThat(searchResponse.getFailedShards(), equalTo(0)); + // we should be receiving all the hits from the index that's in the data_content tier + assertNotNull(searchResponse.getHits().getTotalHits()); + assertThat(searchResponse.getHits().getTotalHits().value, equalTo((long) numDocsRegularIndex)); + }); + } + } + private void createIndexWithTimestampAndEventIngested(String indexName, int numShards, Settings extraSettings) throws IOException { assertAcked( indicesAdmin().prepareCreate(indexName) From e847481fd6aed9c4cbff32f1097d5ff53c9a1528 Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Thu, 24 Oct 2024 18:43:32 +0200 Subject: [PATCH 07/22] [DOCS][101] Add BYO vectors ingestion tutorial (#115112) (#115576) (cherry picked from commit d500daf2e16bb3b6fb4bdde49bbf9d93b7fec25b) --- docs/reference/images/semantic-options.svg | 62 ++++++++ .../search-your-data/ingest-vectors.asciidoc | 141 ++++++++++++++++++ .../search-your-data/semantic-search.asciidoc | 3 + 3 files changed, 206 insertions(+) create mode 100644 docs/reference/images/semantic-options.svg create mode 100644 docs/reference/search/search-your-data/ingest-vectors.asciidoc diff --git a/docs/reference/images/semantic-options.svg b/docs/reference/images/semantic-options.svg new file mode 100644 index 0000000000000..3bedf5307357e --- /dev/null +++ b/docs/reference/images/semantic-options.svg @@ -0,0 +1,62 @@ + + + + Elasticsearch semantic search workflows + + + + + + semantic_text + (Recommended) + + + + Inference API + + + + Model Deployment + + + Complexity: Low + Complexity: Medium + Complexity: High + + + + + + Create Inference Endpoint + + + Define Index Mapping + + + + Create Inference Endpoint + + + Configure Model Settings + + + Define Index Mapping + + + Setup Ingest Pipeline + + + + Select NLP Model + + + Deploy with Eland Client + + + Define Index Mapping + + + Setup Ingest Pipeline + + + diff --git a/docs/reference/search/search-your-data/ingest-vectors.asciidoc b/docs/reference/search/search-your-data/ingest-vectors.asciidoc new file mode 100644 index 0000000000000..f288293d2b03a --- /dev/null +++ b/docs/reference/search/search-your-data/ingest-vectors.asciidoc @@ -0,0 +1,141 @@ +[[bring-your-own-vectors]] +=== Bring your own dense vector embeddings to {es} +++++ +Bring your own dense vectors +++++ + +This tutorial demonstrates how to index documents that already have dense vector embeddings into {es}. +You'll also learn the syntax for searching these documents using a `knn` query. + +You'll find links at the end of this tutorial for more information about deploying a text embedding model in {es}, so you can generate embeddings for queries on the fly. + +[TIP] +==== +This is an advanced use case. +Refer to <> for an overview of your options for semantic search with {es}. +==== + +[discrete] +[[bring-your-own-vectors-create-index]] +=== Step 1: Create an index with `dense_vector` mapping + +Each document in our simple dataset will have: + +* A review: stored in a `review_text` field +* An embedding of that review: stored in a `review_vector` field +** The `review_vector` field is defined as a <> data type. + +[TIP] +==== +The `dense_vector` type automatically uses `int8_hnsw` quantization by default to reduce the memory footprint required when searching float vectors. +Learn more about balancing performance and accuracy in <>. +==== + +[source,console] +---- +PUT /amazon-reviews +{ + "mappings": { + "properties": { + "review_vector": { + "type": "dense_vector", + "dims": 8, <1> + "index": true, <2> + "similarity": "cosine" <3> + }, + "review_text": { + "type": "text" + } + } + } +} +---- +// TEST SETUP +<1> The `dims` parameter must match the length of the embedding vector. Here we're using a simple 8-dimensional embedding for readability. If not specified, `dims` will be dynamically calculated based on the first indexed document. +<2> The `index` parameter is set to `true` to enable the use of the `knn` query. +<3> The `similarity` parameter defines the similarity function used to compare the query vector to the document vectors. `cosine` is the default similarity function for `dense_vector` fields in {es}. + +[discrete] +[[bring-your-own-vectors-index-documents]] +=== Step 2: Index documents with embeddings + +[discrete] +==== Index a single document + +First, index a single document to understand the document structure. + +[source,console] +---- +PUT /amazon-reviews/_doc/1 +{ + "review_text": "This product is lifechanging! I'm telling all my friends about it.", + "review_vector": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] <1> +} +---- +// TEST +<1> The size of the `review_vector` array is 8, matching the `dims` count specified in the mapping. + +[discrete] +==== Bulk index multiple documents + +In a production scenario, you'll want to index many documents at once using the <>. + +Here's an example of indexing multiple documents in a single `_bulk` request. + +[source,console] +---- +POST /_bulk +{ "index": { "_index": "amazon-reviews", "_id": "2" } } +{ "review_text": "This product is amazing! I love it.", "review_vector": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] } +{ "index": { "_index": "amazon-reviews", "_id": "3" } } +{ "review_text": "This product is terrible. I hate it.", "review_vector": [0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1] } +{ "index": { "_index": "amazon-reviews", "_id": "4" } } +{ "review_text": "This product is great. I can do anything with it.", "review_vector": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] } +{ "index": { "_index": "amazon-reviews", "_id": "5" } } +{ "review_text": "This product has ruined my life and the lives of my family and friends.", "review_vector": [0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1] } +---- +// TEST[continued] + +[discrete] +[[bring-your-own-vectors-search-documents]] +=== Step 3: Search documents with embeddings + +Now you can query these document vectors using a <>. +`knn` is a type of vector search, which finds the `k` most similar documents to a query vector. +Here we're simply using a raw vector for the query text, for demonstration purposes. + +[source,console] +---- +POST /amazon-reviews/_search +{ + "retriever": { + "knn": { + "field": "review_vector", + "query_vector": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8], <1> + "k": 2, <2> + "num_candidates": 5 <3> + } + } +} +---- +// TEST[skip:flakeyknnerror] +<1> In this simple example, we're sending a raw vector as the query text. In a real-world scenario, you'll need to generate vectors for queries using an embedding model. +<2> The `k` parameter specifies the number of results to return. +<3> The `num_candidates` parameter is optional. It limits the number of candidates returned by the search node. This can improve performance and reduce costs. + +[discrete] +[[bring-your-own-vectors-learn-more]] +=== Learn more + +In this simple example, we're sending a raw vector for the query text. +In a real-world scenario you won't know the query text ahead of time. +You'll need to generate query vectors, on the fly, using the same embedding model that generated the document vectors. + +For this you'll need to deploy a text embedding model in {es} and use the <>. Alternatively, you can generate vectors client-side and send them directly with the search request. + +Learn how to <> for semantic search. + +[TIP] +==== +If you're just getting started with vector search in {es}, refer to <>. +==== diff --git a/docs/reference/search/search-your-data/semantic-search.asciidoc b/docs/reference/search/search-your-data/semantic-search.asciidoc index 0ef8591e42b5d..e0fb8415fee18 100644 --- a/docs/reference/search/search-your-data/semantic-search.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search.asciidoc @@ -8,6 +8,8 @@ Using an NLP model enables you to extract text embeddings out of text. Embeddings are vectors that provide a numeric representation of a text. Pieces of content with similar meaning have similar representations. +image::images/semantic-options.svg[Overview of semantic search workflows in {es}] + You have several options for using NLP models in the {stack}: * use the `semantic_text` workflow (recommended) @@ -109,3 +111,4 @@ include::semantic-search-inference.asciidoc[] include::semantic-search-elser.asciidoc[] include::cohere-es.asciidoc[] include::semantic-search-deploy-model.asciidoc[] +include::ingest-vectors.asciidoc[] From 00a70699fa873e3c4ce58c00c011a14c1e36a30b Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Thu, 24 Oct 2024 09:54:00 -0700 Subject: [PATCH 08/22] Guard blob store local directory creation with doPrivileged (#115459) (#115571) The blob store may be triggered to create a local directory while in a reduced privilege context. This commit guards the creation of directories with doPrivileged. --- docs/changelog/115459.yaml | 5 +++++ .../common/blobstore/fs/FsBlobStore.java | 15 ++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) create mode 100644 docs/changelog/115459.yaml diff --git a/docs/changelog/115459.yaml b/docs/changelog/115459.yaml new file mode 100644 index 0000000000000..b20a8f765c084 --- /dev/null +++ b/docs/changelog/115459.yaml @@ -0,0 +1,5 @@ +pr: 115459 +summary: Guard blob store local directory creation with `doPrivileged` +area: Infra/Core +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/common/blobstore/fs/FsBlobStore.java b/server/src/main/java/org/elasticsearch/common/blobstore/fs/FsBlobStore.java index c4240672239fa..53e3b4b4796dc 100644 --- a/server/src/main/java/org/elasticsearch/common/blobstore/fs/FsBlobStore.java +++ b/server/src/main/java/org/elasticsearch/common/blobstore/fs/FsBlobStore.java @@ -19,6 +19,8 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.security.AccessController; +import java.security.PrivilegedAction; import java.util.Iterator; import java.util.List; @@ -56,11 +58,14 @@ public int bufferSizeInBytes() { public BlobContainer blobContainer(BlobPath path) { Path f = buildPath(path); if (readOnly == false) { - try { - Files.createDirectories(f); - } catch (IOException ex) { - throw new ElasticsearchException("failed to create blob container", ex); - } + AccessController.doPrivileged((PrivilegedAction) () -> { + try { + Files.createDirectories(f); + } catch (IOException ex) { + throw new ElasticsearchException("failed to create blob container", ex); + } + return null; + }); } return new FsBlobContainer(this, path, f); } From 2f3f6daea569299d3f0be445bef6a07a0e494b95 Mon Sep 17 00:00:00 2001 From: Rene Groeschke Date: Thu, 24 Oct 2024 19:01:25 +0200 Subject: [PATCH 09/22] Remove unused elasticsearch cloud docker image (#115357) (#115572) --- .../gradle/internal/DockerBase.java | 3 --- distribution/docker/build.gradle | 25 +++---------------- .../cloud-docker-aarch64-export/build.gradle | 2 -- .../docker/cloud-docker-export/build.gradle | 2 -- .../build.gradle | 2 -- .../wolfi-ess-docker-export/build.gradle | 2 -- .../packaging/test/DockerTests.java | 11 +++----- .../test/KeystoreManagementTests.java | 5 +--- .../packaging/test/PackagingTestCase.java | 6 ++--- .../packaging/util/Distribution.java | 5 +--- .../packaging/util/docker/Docker.java | 2 +- .../packaging/util/docker/DockerRun.java | 1 - settings.gradle | 2 -- 13 files changed, 12 insertions(+), 56 deletions(-) delete mode 100644 distribution/docker/cloud-docker-aarch64-export/build.gradle delete mode 100644 distribution/docker/cloud-docker-export/build.gradle delete mode 100644 distribution/docker/wolfi-ess-docker-aarch64-export/build.gradle delete mode 100644 distribution/docker/wolfi-ess-docker-export/build.gradle diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/DockerBase.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/DockerBase.java index fa3f6c3b7400b..718ba6021679a 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/DockerBase.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/DockerBase.java @@ -21,9 +21,6 @@ public enum DockerBase { // The Iron Bank base image is UBI (albeit hardened), but we are required to parameterize the Docker build IRON_BANK("${BASE_REGISTRY}/${BASE_IMAGE}:${BASE_TAG}", "-ironbank", "yum"), - // Base image with extras for Cloud - CLOUD("ubuntu:20.04", "-cloud", "apt-get"), - // Chainguard based wolfi image with latest jdk // This is usually updated via renovatebot // spotless:off diff --git a/distribution/docker/build.gradle b/distribution/docker/build.gradle index e40ac68bbacf4..788e836f8f045 100644 --- a/distribution/docker/build.gradle +++ b/distribution/docker/build.gradle @@ -288,20 +288,6 @@ void addBuildDockerContextTask(Architecture architecture, DockerBase base) { } } - if (base == DockerBase.CLOUD) { - // If we're performing a release build, but `build.id` hasn't been set, we can - // infer that we're not at the Docker building stage of the build, and therefore - // we should skip the beats part of the build. - String buildId = providers.systemProperty('build.id').getOrNull() - boolean includeBeats = VersionProperties.isElasticsearchSnapshot() == true || buildId != null || useDra - - if (includeBeats) { - from configurations.getByName("filebeat_${architecture.classifier}") - from configurations.getByName("metricbeat_${architecture.classifier}") - } - // For some reason, the artifact name can differ depending on what repository we used. - rename ~/((?:file|metric)beat)-.*\.tar\.gz$/, "\$1-${VersionProperties.elasticsearch}.tar.gz" - } Provider serviceProvider = GradleUtils.getBuildService( project.gradle.sharedServices, DockerSupportPlugin.DOCKER_SUPPORT_SERVICE_NAME @@ -381,7 +367,7 @@ private static List generateTags(DockerBase base, Architecture architect String image = "elasticsearch${base.suffix}" String namespace = 'elasticsearch' - if (base == DockerBase.CLOUD || base == DockerBase.CLOUD_ESS) { + if (base == base == DockerBase.CLOUD_ESS) { namespace += '-ci' } @@ -439,7 +425,7 @@ void addBuildDockerImageTask(Architecture architecture, DockerBase base) { } - if (base != DockerBase.IRON_BANK && base != DockerBase.CLOUD && base != DockerBase.CLOUD_ESS) { + if (base != DockerBase.IRON_BANK && base != DockerBase.CLOUD_ESS) { tasks.named("assemble").configure { dependsOn(buildDockerImageTask) } @@ -548,10 +534,6 @@ subprojects { Project subProject -> base = DockerBase.IRON_BANK } else if (subProject.name.contains('cloud-ess-')) { base = DockerBase.CLOUD_ESS - } else if (subProject.name.contains('cloud-')) { - base = DockerBase.CLOUD - } else if (subProject.name.contains('wolfi-ess')) { - base = DockerBase.WOLFI_ESS } else if (subProject.name.contains('wolfi-')) { base = DockerBase.WOLFI } @@ -559,10 +541,9 @@ subprojects { Project subProject -> final String arch = architecture == Architecture.AARCH64 ? '-aarch64' : '' final String extension = base == DockerBase.UBI ? 'ubi.tar' : (base == DockerBase.IRON_BANK ? 'ironbank.tar' : - (base == DockerBase.CLOUD ? 'cloud.tar' : (base == DockerBase.CLOUD_ESS ? 'cloud-ess.tar' : (base == DockerBase.WOLFI ? 'wolfi.tar' : - 'docker.tar')))) + 'docker.tar'))) final String artifactName = "elasticsearch${arch}${base.suffix}_test" final String exportTaskName = taskName("export", architecture, base, 'DockerImage') diff --git a/distribution/docker/cloud-docker-aarch64-export/build.gradle b/distribution/docker/cloud-docker-aarch64-export/build.gradle deleted file mode 100644 index 537b5a093683e..0000000000000 --- a/distribution/docker/cloud-docker-aarch64-export/build.gradle +++ /dev/null @@ -1,2 +0,0 @@ -// This file is intentionally blank. All configuration of the -// export is done in the parent project. diff --git a/distribution/docker/cloud-docker-export/build.gradle b/distribution/docker/cloud-docker-export/build.gradle deleted file mode 100644 index 537b5a093683e..0000000000000 --- a/distribution/docker/cloud-docker-export/build.gradle +++ /dev/null @@ -1,2 +0,0 @@ -// This file is intentionally blank. All configuration of the -// export is done in the parent project. diff --git a/distribution/docker/wolfi-ess-docker-aarch64-export/build.gradle b/distribution/docker/wolfi-ess-docker-aarch64-export/build.gradle deleted file mode 100644 index 537b5a093683e..0000000000000 --- a/distribution/docker/wolfi-ess-docker-aarch64-export/build.gradle +++ /dev/null @@ -1,2 +0,0 @@ -// This file is intentionally blank. All configuration of the -// export is done in the parent project. diff --git a/distribution/docker/wolfi-ess-docker-export/build.gradle b/distribution/docker/wolfi-ess-docker-export/build.gradle deleted file mode 100644 index 537b5a093683e..0000000000000 --- a/distribution/docker/wolfi-ess-docker-export/build.gradle +++ /dev/null @@ -1,2 +0,0 @@ -// This file is intentionally blank. All configuration of the -// export is done in the parent project. diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/DockerTests.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/DockerTests.java index 4ca97bff42333..8cb8354eb5d71 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/DockerTests.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/DockerTests.java @@ -169,10 +169,7 @@ public void test012SecurityCanBeDisabled() throws Exception { * Checks that no plugins are initially active. */ public void test020PluginsListWithNoPlugins() { - assumeTrue( - "Only applies to non-Cloud images", - distribution.packaging != Packaging.DOCKER_CLOUD && distribution().packaging != Packaging.DOCKER_CLOUD_ESS - ); + assumeTrue("Only applies to non-Cloud images", distribution().packaging != Packaging.DOCKER_CLOUD_ESS); final Installation.Executables bin = installation.executables(); final Result r = sh.run(bin.pluginTool + " list"); @@ -1116,8 +1113,8 @@ public void test170DefaultShellIsBash() { */ public void test171AdditionalCliOptionsAreForwarded() throws Exception { assumeTrue( - "Does not apply to Cloud and Cloud ESS images, because they don't use the default entrypoint", - distribution.packaging != Packaging.DOCKER_CLOUD && distribution().packaging != Packaging.DOCKER_CLOUD_ESS + "Does not apply to Cloud ESS images, because they don't use the default entrypoint", + distribution().packaging != Packaging.DOCKER_CLOUD_ESS ); runContainer(distribution(), builder().runArgs("bin/elasticsearch", "-Ecluster.name=kimchy").envVar("ELASTIC_PASSWORD", PASSWORD)); @@ -1204,7 +1201,7 @@ public void test310IronBankImageHasNoAdditionalLabels() throws Exception { * Check that the Cloud image contains the required Beats */ public void test400CloudImageBundlesBeats() { - assumeTrue(distribution.packaging == Packaging.DOCKER_CLOUD || distribution.packaging == Packaging.DOCKER_CLOUD_ESS); + assumeTrue(distribution.packaging == Packaging.DOCKER_CLOUD_ESS); final List contents = listContents("/opt"); assertThat("Expected beats in /opt", contents, hasItems("filebeat", "metricbeat")); diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/KeystoreManagementTests.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/KeystoreManagementTests.java index a988a446f561f..02e1ce35764cf 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/KeystoreManagementTests.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/KeystoreManagementTests.java @@ -436,10 +436,7 @@ private void verifyKeystorePermissions() { switch (distribution.packaging) { case TAR, ZIP -> assertThat(keystore, file(File, ARCHIVE_OWNER, ARCHIVE_OWNER, p660)); case DEB, RPM -> assertThat(keystore, file(File, "root", "elasticsearch", p660)); - case DOCKER, DOCKER_UBI, DOCKER_IRON_BANK, DOCKER_CLOUD, DOCKER_CLOUD_ESS, DOCKER_WOLFI -> assertThat( - keystore, - DockerFileMatcher.file(p660) - ); + case DOCKER, DOCKER_UBI, DOCKER_IRON_BANK, DOCKER_CLOUD_ESS, DOCKER_WOLFI -> assertThat(keystore, DockerFileMatcher.file(p660)); default -> throw new IllegalStateException("Unknown Elasticsearch packaging type."); } } diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/PackagingTestCase.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/PackagingTestCase.java index 644990105f60f..b4a00ca56924a 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/PackagingTestCase.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/PackagingTestCase.java @@ -245,7 +245,7 @@ protected static void install() throws Exception { installation = Packages.installPackage(sh, distribution); Packages.verifyPackageInstallation(installation, distribution, sh); } - case DOCKER, DOCKER_UBI, DOCKER_IRON_BANK, DOCKER_CLOUD, DOCKER_CLOUD_ESS, DOCKER_WOLFI -> { + case DOCKER, DOCKER_UBI, DOCKER_IRON_BANK, DOCKER_CLOUD_ESS, DOCKER_WOLFI -> { installation = Docker.runContainer(distribution); Docker.verifyContainerInstallation(installation); } @@ -335,7 +335,6 @@ public Shell.Result runElasticsearchStartCommand(String password, boolean daemon case DOCKER: case DOCKER_UBI: case DOCKER_IRON_BANK: - case DOCKER_CLOUD: case DOCKER_CLOUD_ESS: case DOCKER_WOLFI: // nothing, "installing" docker image is running it @@ -358,7 +357,6 @@ public void stopElasticsearch() throws Exception { case DOCKER: case DOCKER_UBI: case DOCKER_IRON_BANK: - case DOCKER_CLOUD: case DOCKER_CLOUD_ESS: case DOCKER_WOLFI: // nothing, "installing" docker image is running it @@ -373,7 +371,7 @@ public void awaitElasticsearchStartup(Shell.Result result) throws Exception { switch (distribution.packaging) { case TAR, ZIP -> Archives.assertElasticsearchStarted(installation); case DEB, RPM -> Packages.assertElasticsearchStarted(sh, installation); - case DOCKER, DOCKER_UBI, DOCKER_IRON_BANK, DOCKER_CLOUD, DOCKER_CLOUD_ESS, DOCKER_WOLFI -> Docker.waitForElasticsearchToStart(); + case DOCKER, DOCKER_UBI, DOCKER_IRON_BANK, DOCKER_CLOUD_ESS, DOCKER_WOLFI -> Docker.waitForElasticsearchToStart(); default -> throw new IllegalStateException("Unknown Elasticsearch packaging type."); } } diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/util/Distribution.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/util/Distribution.java index 05cef4a0818ba..11b8324384631 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/util/Distribution.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/util/Distribution.java @@ -33,8 +33,6 @@ public Distribution(Path path) { this.packaging = Packaging.DOCKER_UBI; } else if (filename.endsWith(".ironbank.tar")) { this.packaging = Packaging.DOCKER_IRON_BANK; - } else if (filename.endsWith(".cloud.tar")) { - this.packaging = Packaging.DOCKER_CLOUD; } else if (filename.endsWith(".cloud-ess.tar")) { this.packaging = Packaging.DOCKER_CLOUD_ESS; } else if (filename.endsWith(".wolfi.tar")) { @@ -63,7 +61,7 @@ public boolean isPackage() { */ public boolean isDocker() { return switch (packaging) { - case DOCKER, DOCKER_UBI, DOCKER_IRON_BANK, DOCKER_CLOUD, DOCKER_CLOUD_ESS, DOCKER_WOLFI -> true; + case DOCKER, DOCKER_UBI, DOCKER_IRON_BANK, DOCKER_CLOUD_ESS, DOCKER_WOLFI -> true; default -> false; }; } @@ -77,7 +75,6 @@ public enum Packaging { DOCKER(".docker.tar", Platforms.isDocker()), DOCKER_UBI(".ubi.tar", Platforms.isDocker()), DOCKER_IRON_BANK(".ironbank.tar", Platforms.isDocker()), - DOCKER_CLOUD(".cloud.tar", Platforms.isDocker()), DOCKER_CLOUD_ESS(".cloud-ess.tar", Platforms.isDocker()), DOCKER_WOLFI(".wolfi.tar", Platforms.isDocker()); diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/util/docker/Docker.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/util/docker/Docker.java index c38eaa58f0552..0cd2823080b9b 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/util/docker/Docker.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/util/docker/Docker.java @@ -532,7 +532,7 @@ public static void verifyContainerInstallation(Installation es) { ) ); - if (es.distribution.packaging == Packaging.DOCKER_CLOUD || es.distribution.packaging == Packaging.DOCKER_CLOUD_ESS) { + if (es.distribution.packaging == Packaging.DOCKER_CLOUD_ESS) { verifyCloudContainerInstallation(es); } } diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/util/docker/DockerRun.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/util/docker/DockerRun.java index 2b3eb7ff7a617..97adebcd21cac 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/util/docker/DockerRun.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/util/docker/DockerRun.java @@ -165,7 +165,6 @@ public static String getImageName(Distribution distribution) { case DOCKER -> ""; case DOCKER_UBI -> "-ubi8"; case DOCKER_IRON_BANK -> "-ironbank"; - case DOCKER_CLOUD -> "-cloud"; case DOCKER_CLOUD_ESS -> "-cloud-ess"; case DOCKER_WOLFI -> "-wolfi"; default -> throw new IllegalStateException("Unexpected distribution packaging type: " + distribution.packaging); diff --git a/settings.gradle b/settings.gradle index b696d9635d974..1c8afae84db92 100644 --- a/settings.gradle +++ b/settings.gradle @@ -63,8 +63,6 @@ List projects = [ 'distribution:archives:linux-aarch64-tar', 'distribution:archives:linux-tar', 'distribution:docker', - 'distribution:docker:cloud-docker-export', - 'distribution:docker:cloud-docker-aarch64-export', 'distribution:docker:cloud-ess-docker-export', 'distribution:docker:cloud-ess-docker-aarch64-export', 'distribution:docker:docker-aarch64-export', From 33f4b38ba3ff855417305b88e48509d52982dfd8 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Thu, 24 Oct 2024 14:34:33 -0400 Subject: [PATCH 10/22] [8.x] [ESQL] Enable "any type" aggregations on Date Nanos (#114438) (#115558) * [ESQL] Enable "any type" aggregations on Date Nanos (#114438) Resolves #110002 Resolves #110003 Resolves #110005 Enable Values, Count, CountDistinct, Min and Max aggregations on date nanos. In the course of addressing this, I had to make some changes to AggregateMapper where it maps types into string names. I tried to refactor this once before (#110841) but at the time we decided not to go ahead with it. That bit me while working on this, and so I am trying again to refactor it. This time I've made a more localized change, just replacing the cascading if block with a switch. That will cause a compile time failure when future new data types are added, unless they correctly update this section. I've also done a small refactoring on the aggregators themselves, to make the supplier function consistent with the typeResolution. --------- Co-authored-by: Elastic Machine * not sure how that happened --------- Co-authored-by: Elastic Machine --- .../src/main/resources/date_nanos.csv | 1 + .../src/main/resources/date_nanos.csv-spec | 30 +++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 5 +++ .../function/aggregate/CountDistinct.java | 40 ++++++++++-------- .../expression/function/aggregate/Max.java | 42 +++++++++---------- .../expression/function/aggregate/Min.java | 42 +++++++++---------- .../expression/function/aggregate/Values.java | 38 +++++++++-------- .../xpack/esql/planner/AggregateMapper.java | 31 ++++++-------- 8 files changed, 130 insertions(+), 99 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv index 83a2f3cb1c281..4308d4eea24be 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv @@ -6,4 +6,5 @@ millis:date,nanos:date_nanos,num:long 2023-10-23T13:33:34.937Z,2023-10-23T13:33:34.937193000Z,1698068014937193000 2023-10-23T12:27:28.948Z,2023-10-23T12:27:28.948000000Z,1698064048948000000 2023-10-23T12:15:03.360Z,2023-10-23T12:15:03.360103847Z,1698063303360103847 +2023-10-23T12:15:03.360Z,2023-10-23T12:15:03.360103847Z,1698063303360103847 1999-10-23T12:15:03.360Z,[2023-03-23T12:15:03.360103847Z, 2023-02-23T13:33:34.937193000Z, 2023-01-23T13:55:01.543123456Z], 0 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec index ad7149b0f742f..83c73000b3999 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec @@ -214,6 +214,7 @@ l:long 1698068014937193000 1698064048948000000 1698063303360103847 +1698063303360103847 ; long to date nanos, index version @@ -229,6 +230,7 @@ d:date_nanos 2023-10-23T13:33:34.937193000Z 2023-10-23T12:27:28.948000000Z 2023-10-23T12:15:03.360103847Z +2023-10-23T12:15:03.360103847Z ; date_nanos to date nanos, index version @@ -244,6 +246,7 @@ d:date_nanos 2023-10-23T13:33:34.937193000Z 2023-10-23T12:27:28.948000000Z 2023-10-23T12:15:03.360103847Z +2023-10-23T12:15:03.360103847Z ; attempt to cast the result of a fold to date nanos @@ -263,3 +266,30 @@ ROW a = TO_DATE_NANOS(null), b = TO_DATE_NANOS(null + 1::long), c = TO_DATE_NANO a:date_nanos | b:date_nanos | c:date_nanos null | null | null ; + +Max and Min of date nanos +required_capability: date_nanos_aggregations + +FROM date_nanos | STATS max = MAX(nanos), min = MIN(nanos); + +max:date_nanos | min:date_nanos +2023-10-23T13:55:01.543123456Z | 2023-01-23T13:55:01.543123456Z +; + +Count and count distinct of date nanos +required_capability: date_nanos_aggregations + +FROM date_nanos | WHERE millis > "2020-01-01" | STATS count = COUNT(nanos), count_distinct = COUNT_DISTINCT(nanos); + +count:long | count_distinct:long +8 | 7 +; + +Values aggregation on date nanos +required_capability: date_nanos_aggregations + +FROM date_nanos | WHERE millis > "2020-01-01" | STATS v = MV_SORT(VALUES(nanos), "DESC"); + +v:date_nanos +[2023-10-23T13:55:01.543123456Z, 2023-10-23T13:53:55.832987654Z, 2023-10-23T13:52:55.015787878Z, 2023-10-23T13:51:54.732102837Z, 2023-10-23T13:33:34.937193000Z, 2023-10-23T12:27:28.948000000Z, 2023-10-23T12:15:03.360103847Z] +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index c5f68d64b17f5..9184a69cc30c4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -307,6 +307,11 @@ public enum Cap { */ TO_DATE_NANOS(EsqlCorePlugin.DATE_NANOS_FEATURE_FLAG), + /** + * support aggregations on date nanos + */ + DATE_NANOS_AGGREGATIONS(EsqlCorePlugin.DATE_NANOS_FEATURE_FLAG), + /** * Support for datetime in least and greatest functions */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java index 756000dfbb187..5ae162f1fbb12 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java @@ -38,6 +38,8 @@ import java.io.IOException; import java.util.List; +import java.util.Map; +import java.util.function.BiFunction; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; @@ -53,6 +55,20 @@ public class CountDistinct extends AggregateFunction implements OptionalArgument CountDistinct::new ); + private static final Map, Integer, AggregatorFunctionSupplier>> SUPPLIERS = Map.ofEntries( + // Booleans ignore the precision because there are only two possible values anyway + Map.entry(DataType.BOOLEAN, (inputChannels, precision) -> new CountDistinctBooleanAggregatorFunctionSupplier(inputChannels)), + Map.entry(DataType.LONG, CountDistinctLongAggregatorFunctionSupplier::new), + Map.entry(DataType.DATETIME, CountDistinctLongAggregatorFunctionSupplier::new), + Map.entry(DataType.DATE_NANOS, CountDistinctLongAggregatorFunctionSupplier::new), + Map.entry(DataType.INTEGER, CountDistinctIntAggregatorFunctionSupplier::new), + Map.entry(DataType.DOUBLE, CountDistinctDoubleAggregatorFunctionSupplier::new), + Map.entry(DataType.KEYWORD, CountDistinctBytesRefAggregatorFunctionSupplier::new), + Map.entry(DataType.IP, CountDistinctBytesRefAggregatorFunctionSupplier::new), + Map.entry(DataType.VERSION, CountDistinctBytesRefAggregatorFunctionSupplier::new), + Map.entry(DataType.TEXT, CountDistinctBytesRefAggregatorFunctionSupplier::new) + ); + private static final int DEFAULT_PRECISION = 3000; private final Expression precision; @@ -102,7 +118,7 @@ public CountDistinct( Source source, @Param( name = "field", - type = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }, + type = { "boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "text", "version" }, description = "Column or literal for which to count the number of distinct values." ) Expression field, @Param( @@ -179,7 +195,7 @@ protected TypeResolution resolveType() { .and( isType( field(), - dt -> dt != DataType.UNSIGNED_LONG && dt != DataType.SOURCE, + SUPPLIERS::containsKey, sourceText(), DEFAULT, "any exact type except unsigned_long, _source, or counter types" @@ -196,23 +212,11 @@ protected TypeResolution resolveType() { public AggregatorFunctionSupplier supplier(List inputChannels) { DataType type = field().dataType(); int precision = this.precision == null ? DEFAULT_PRECISION : ((Number) this.precision.fold()).intValue(); - if (type == DataType.BOOLEAN) { - // Booleans ignore the precision because there are only two possible values anyway - return new CountDistinctBooleanAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.DATETIME || type == DataType.LONG) { - return new CountDistinctLongAggregatorFunctionSupplier(inputChannels, precision); - } - if (type == DataType.INTEGER) { - return new CountDistinctIntAggregatorFunctionSupplier(inputChannels, precision); - } - if (type == DataType.DOUBLE) { - return new CountDistinctDoubleAggregatorFunctionSupplier(inputChannels, precision); - } - if (DataType.isString(type) || type == DataType.IP || type == DataType.VERSION) { - return new CountDistinctBytesRefAggregatorFunctionSupplier(inputChannels, precision); + if (SUPPLIERS.containsKey(type) == false) { + // If the type checking did its job, this should never happen + throw EsqlIllegalArgumentException.illegalDataType(type); } - throw EsqlIllegalArgumentException.illegalDataType(type); + return SUPPLIERS.get(type).apply(inputChannels, precision); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java index 6119b2ce58465..ee16193efdccc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Max.java @@ -32,16 +32,28 @@ import java.io.IOException; import java.util.List; +import java.util.Map; +import java.util.function.Function; import static java.util.Collections.emptyList; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; -import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; -import static org.elasticsearch.xpack.esql.core.type.DataType.isRepresentable; -import static org.elasticsearch.xpack.esql.core.type.DataType.isSpatial; public class Max extends AggregateFunction implements ToAggregator, SurrogateExpression { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Max", Max::new); + private static final Map, AggregatorFunctionSupplier>> SUPPLIERS = Map.ofEntries( + Map.entry(DataType.BOOLEAN, MaxBooleanAggregatorFunctionSupplier::new), + Map.entry(DataType.LONG, MaxLongAggregatorFunctionSupplier::new), + Map.entry(DataType.DATETIME, MaxLongAggregatorFunctionSupplier::new), + Map.entry(DataType.DATE_NANOS, MaxLongAggregatorFunctionSupplier::new), + Map.entry(DataType.INTEGER, MaxIntAggregatorFunctionSupplier::new), + Map.entry(DataType.DOUBLE, MaxDoubleAggregatorFunctionSupplier::new), + Map.entry(DataType.IP, MaxIpAggregatorFunctionSupplier::new), + Map.entry(DataType.KEYWORD, MaxBytesRefAggregatorFunctionSupplier::new), + Map.entry(DataType.TEXT, MaxBytesRefAggregatorFunctionSupplier::new), + Map.entry(DataType.VERSION, MaxBytesRefAggregatorFunctionSupplier::new) + ); + @FunctionInfo( returnType = { "boolean", "double", "integer", "long", "date", "ip", "keyword", "text", "long", "version" }, description = "The maximum value of a field.", @@ -98,7 +110,7 @@ public Max replaceChildren(List newChildren) { protected TypeResolution resolveType() { return TypeResolutions.isType( field(), - t -> isRepresentable(t) && t != UNSIGNED_LONG && isSpatial(t) == false, + SUPPLIERS::containsKey, sourceText(), DEFAULT, "representable except unsigned_long and spatial types" @@ -113,25 +125,11 @@ public DataType dataType() { @Override public final AggregatorFunctionSupplier supplier(List inputChannels) { DataType type = field().dataType(); - if (type == DataType.BOOLEAN) { - return new MaxBooleanAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.LONG || type == DataType.DATETIME) { - return new MaxLongAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.INTEGER) { - return new MaxIntAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.DOUBLE) { - return new MaxDoubleAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.IP) { - return new MaxIpAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.VERSION || DataType.isString(type)) { - return new MaxBytesRefAggregatorFunctionSupplier(inputChannels); + if (SUPPLIERS.containsKey(type) == false) { + // If the type checking did its job, this should never happen + throw EsqlIllegalArgumentException.illegalDataType(type); } - throw EsqlIllegalArgumentException.illegalDataType(type); + return SUPPLIERS.get(type).apply(inputChannels); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java index a1492f79da393..7aaa41ea6ab11 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Min.java @@ -32,16 +32,28 @@ import java.io.IOException; import java.util.List; +import java.util.Map; +import java.util.function.Function; import static java.util.Collections.emptyList; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; -import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; -import static org.elasticsearch.xpack.esql.core.type.DataType.isRepresentable; -import static org.elasticsearch.xpack.esql.core.type.DataType.isSpatial; public class Min extends AggregateFunction implements ToAggregator, SurrogateExpression { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Min", Min::new); + private static final Map, AggregatorFunctionSupplier>> SUPPLIERS = Map.ofEntries( + Map.entry(DataType.BOOLEAN, MinBooleanAggregatorFunctionSupplier::new), + Map.entry(DataType.LONG, MinLongAggregatorFunctionSupplier::new), + Map.entry(DataType.DATETIME, MinLongAggregatorFunctionSupplier::new), + Map.entry(DataType.DATE_NANOS, MinLongAggregatorFunctionSupplier::new), + Map.entry(DataType.INTEGER, MinIntAggregatorFunctionSupplier::new), + Map.entry(DataType.DOUBLE, MinDoubleAggregatorFunctionSupplier::new), + Map.entry(DataType.IP, MinIpAggregatorFunctionSupplier::new), + Map.entry(DataType.VERSION, MinBytesRefAggregatorFunctionSupplier::new), + Map.entry(DataType.KEYWORD, MinBytesRefAggregatorFunctionSupplier::new), + Map.entry(DataType.TEXT, MinBytesRefAggregatorFunctionSupplier::new) + ); + @FunctionInfo( returnType = { "boolean", "double", "integer", "long", "date", "ip", "keyword", "text", "long", "version" }, description = "The minimum value of a field.", @@ -98,7 +110,7 @@ public Min withFilter(Expression filter) { protected TypeResolution resolveType() { return TypeResolutions.isType( field(), - t -> isRepresentable(t) && t != UNSIGNED_LONG && isSpatial(t) == false, + SUPPLIERS::containsKey, sourceText(), DEFAULT, "representable except unsigned_long and spatial types" @@ -113,25 +125,11 @@ public DataType dataType() { @Override public final AggregatorFunctionSupplier supplier(List inputChannels) { DataType type = field().dataType(); - if (type == DataType.BOOLEAN) { - return new MinBooleanAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.LONG || type == DataType.DATETIME) { - return new MinLongAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.INTEGER) { - return new MinIntAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.DOUBLE) { - return new MinDoubleAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.IP) { - return new MinIpAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.VERSION || DataType.isString(type)) { - return new MinBytesRefAggregatorFunctionSupplier(inputChannels); + if (SUPPLIERS.containsKey(type) == false) { + // If the type checking did its job, this should never happen + throw EsqlIllegalArgumentException.illegalDataType(type); } - throw EsqlIllegalArgumentException.illegalDataType(type); + return SUPPLIERS.get(type).apply(inputChannels); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java index a844b981c95d6..8d576839c3c5c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Values.java @@ -29,14 +29,28 @@ import java.io.IOException; import java.util.List; +import java.util.Map; +import java.util.function.Function; import static java.util.Collections.emptyList; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; -import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; public class Values extends AggregateFunction implements ToAggregator { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Values", Values::new); + private static final Map, AggregatorFunctionSupplier>> SUPPLIERS = Map.ofEntries( + Map.entry(DataType.INTEGER, ValuesIntAggregatorFunctionSupplier::new), + Map.entry(DataType.LONG, ValuesLongAggregatorFunctionSupplier::new), + Map.entry(DataType.DATETIME, ValuesLongAggregatorFunctionSupplier::new), + Map.entry(DataType.DATE_NANOS, ValuesLongAggregatorFunctionSupplier::new), + Map.entry(DataType.DOUBLE, ValuesDoubleAggregatorFunctionSupplier::new), + Map.entry(DataType.KEYWORD, ValuesBytesRefAggregatorFunctionSupplier::new), + Map.entry(DataType.TEXT, ValuesBytesRefAggregatorFunctionSupplier::new), + Map.entry(DataType.IP, ValuesBytesRefAggregatorFunctionSupplier::new), + Map.entry(DataType.VERSION, ValuesBytesRefAggregatorFunctionSupplier::new), + Map.entry(DataType.BOOLEAN, ValuesBooleanAggregatorFunctionSupplier::new) + ); + @FunctionInfo( returnType = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }, preview = true, @@ -98,7 +112,7 @@ public DataType dataType() { protected TypeResolution resolveType() { return TypeResolutions.isType( field(), - dt -> DataType.isSpatial(dt) == false && dt != UNSIGNED_LONG, + SUPPLIERS::containsKey, sourceText(), DEFAULT, "any type except unsigned_long and spatial types" @@ -108,22 +122,10 @@ protected TypeResolution resolveType() { @Override public AggregatorFunctionSupplier supplier(List inputChannels) { DataType type = field().dataType(); - if (type == DataType.INTEGER) { - return new ValuesIntAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.LONG || type == DataType.DATETIME) { - return new ValuesLongAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.DOUBLE) { - return new ValuesDoubleAggregatorFunctionSupplier(inputChannels); - } - if (DataType.isString(type) || type == DataType.IP || type == DataType.VERSION) { - return new ValuesBytesRefAggregatorFunctionSupplier(inputChannels); - } - if (type == DataType.BOOLEAN) { - return new ValuesBooleanAggregatorFunctionSupplier(inputChannels); + if (SUPPLIERS.containsKey(type) == false) { + // If the type checking did its job, this should never happen + throw EsqlIllegalArgumentException.illegalDataType(type); } - // TODO cartesian_point, geo_point - throw EsqlIllegalArgumentException.illegalDataType(type); + return SUPPLIERS.get(type).apply(inputChannels); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java index c322135198262..9bb0ab4144bed 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java @@ -297,25 +297,18 @@ private static String dataTypeToString(DataType type, Class aggClass) { if (aggClass == Top.class && type.equals(DataType.IP)) { return "Ip"; } - if (type.equals(DataType.BOOLEAN)) { - return "Boolean"; - } else if (type.equals(DataType.INTEGER) || type.equals(DataType.COUNTER_INTEGER)) { - return "Int"; - } else if (type.equals(DataType.LONG) || type.equals(DataType.DATETIME) || type.equals(DataType.COUNTER_LONG)) { - return "Long"; - } else if (type.equals(DataType.DOUBLE) || type.equals(DataType.COUNTER_DOUBLE)) { - return "Double"; - } else if (type.equals(DataType.KEYWORD) - || type.equals(DataType.IP) - || type.equals(DataType.VERSION) - || type.equals(DataType.TEXT)) { - return "BytesRef"; - } else if (type.equals(GEO_POINT)) { - return "GeoPoint"; - } else if (type.equals(CARTESIAN_POINT)) { - return "CartesianPoint"; - } else { + + return switch (type) { + case BOOLEAN -> "Boolean"; + case INTEGER, COUNTER_INTEGER -> "Int"; + case LONG, DATETIME, COUNTER_LONG, DATE_NANOS -> "Long"; + case DOUBLE, COUNTER_DOUBLE -> "Double"; + case KEYWORD, IP, VERSION, TEXT -> "BytesRef"; + case GEO_POINT -> "GeoPoint"; + case CARTESIAN_POINT -> "CartesianPoint"; + case SEMANTIC_TEXT, UNSUPPORTED, NULL, UNSIGNED_LONG, SHORT, BYTE, FLOAT, HALF_FLOAT, SCALED_FLOAT, OBJECT, SOURCE, DATE_PERIOD, + TIME_DURATION, CARTESIAN_SHAPE, GEO_SHAPE, DOC_DATA_TYPE, TSID_DATA_TYPE, PARTIAL_AGG -> throw new EsqlIllegalArgumentException("illegal agg type: " + type.typeName()); - } + }; } } From 9506d4681523cdb099a91b792aa8c9ef39a26764 Mon Sep 17 00:00:00 2001 From: shainaraskas <58563081+shainaraskas@users.noreply.github.com> Date: Thu, 24 Oct 2024 16:42:59 -0400 Subject: [PATCH 11/22] Make a minor change to trigger release note process (#113975) (#115592) * changelog entry (cherry picked from commit 97ed0a93bb75d0f920c976527f4f5fc0b6065beb) --- docs/changelog/113975.yaml | 19 +++++++++++++++++++ docs/reference/mapping/params/format.asciidoc | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/113975.yaml diff --git a/docs/changelog/113975.yaml b/docs/changelog/113975.yaml new file mode 100644 index 0000000000000..632ba038271bb --- /dev/null +++ b/docs/changelog/113975.yaml @@ -0,0 +1,19 @@ +pr: 113975 +summary: JDK locale database change +area: Mapping +type: breaking +issues: [] +breaking: + title: JDK locale database change + area: Mapping + details: | + {es} 8.16 changes the version of the JDK that is included from version 22 to version 23. This changes the locale database that is used by Elasticsearch from the COMPAT database to the CLDR database. This change can cause significant differences to the textual date formats accepted by Elasticsearch, and to calculated week-dates. + + If you run {es} 8.16 on JDK version 22 or below, it will use the COMPAT locale database to match the behavior of 8.15. However, starting with {es} 9.0, {es} will use the CLDR database regardless of JDK version it is run on. + impact: | + This affects you if you use custom date formats using textual or week-date field specifiers. If you use date fields or calculated week-dates that change between the COMPAT and CLDR databases, then this change will cause Elasticsearch to reject previously valid date fields as invalid data. You might need to modify your ingest or output integration code to account for the differences between these two JDK versions. + + Starting in version 8.15.2, Elasticsearch will log deprecation warnings if you are using date format specifiers that might change on upgrading to JDK 23. These warnings are visible in Kibana. + + For detailed guidance, refer to <> and the https://ela.st/jdk-23-locales[Elastic blog]. + notable: true diff --git a/docs/reference/mapping/params/format.asciidoc b/docs/reference/mapping/params/format.asciidoc index b890e62fd0a0b..29380fca83ca5 100644 --- a/docs/reference/mapping/params/format.asciidoc +++ b/docs/reference/mapping/params/format.asciidoc @@ -65,7 +65,7 @@ affected specifiers, you may need to modify your ingest or output integration co for the differences between these two JDK versions. [[built-in-date-formats]] -==== Built In Formats +==== Built-in formats Most of the below formats have a `strict` companion format, which means that year, month and day parts of the month must use respectively 4, 2 and 2 digits From a614517120f4f7ede722dd2c30ab40536c3a630c Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Thu, 24 Oct 2024 15:04:32 -0600 Subject: [PATCH 12/22] Fixing remote ENRICH by pushing the Enrich inside FragmentExec (#114665) (#115591) * Fixing remote ENRICH by pushing the Enrich inside FragmentExec * Improve handling of more complex cases such as several enriches (cherry picked from commit e789039dfa8fee60dc2615c3876295ff7c6f3b01) --- docs/changelog/114665.yaml | 6 ++ .../esql/action/CrossClustersEnrichIT.java | 102 ++++++++++++++++-- .../xpack/esql/analysis/Verifier.java | 7 -- .../xpack/esql/planner/Mapper.java | 42 ++++++++ .../optimizer/PhysicalPlanOptimizerTests.java | 63 +++++++++-- 5 files changed, 195 insertions(+), 25 deletions(-) create mode 100644 docs/changelog/114665.yaml diff --git a/docs/changelog/114665.yaml b/docs/changelog/114665.yaml new file mode 100644 index 0000000000000..b90bb799bd896 --- /dev/null +++ b/docs/changelog/114665.yaml @@ -0,0 +1,6 @@ +pr: 114665 +summary: Fixing remote ENRICH by pushing the Enrich inside `FragmentExec` +area: ES|QL +type: bug +issues: + - 105095 diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersEnrichIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersEnrichIT.java index 7d8bb738098d3..e8e9f45694e9c 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersEnrichIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersEnrichIT.java @@ -47,6 +47,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.Locale; import java.util.Map; @@ -469,27 +470,112 @@ public void testEnrichRemoteWithVendor() { } } + public void testEnrichRemoteWithVendorNoSort() { + Tuple includeCCSMetadata = randomIncludeCCSMetadata(); + Boolean requestIncludeMeta = includeCCSMetadata.v1(); + boolean responseExpectMeta = includeCCSMetadata.v2(); + + for (Enrich.Mode hostMode : List.of(Enrich.Mode.ANY, Enrich.Mode.REMOTE)) { + var query = String.format(Locale.ROOT, """ + FROM *:events,events + | LIMIT 100 + | eval ip= TO_STR(host) + | %s + | %s + | stats c = COUNT(*) by vendor + """, enrichHosts(hostMode), enrichVendors(Enrich.Mode.REMOTE)); + try (EsqlQueryResponse resp = runQuery(query, requestIncludeMeta)) { + var values = getValuesList(resp); + values.sort(Comparator.comparing(o -> (String) o.get(1), Comparator.nullsLast(Comparator.naturalOrder()))); + assertThat( + values, + equalTo( + List.of( + List.of(6L, "Apple"), + List.of(7L, "Microsoft"), + List.of(1L, "Redhat"), + List.of(2L, "Samsung"), + List.of(1L, "Sony"), + List.of(2L, "Suse"), + Arrays.asList(3L, (String) null) + ) + ) + ); + EsqlExecutionInfo executionInfo = resp.getExecutionInfo(); + assertThat(executionInfo.includeCCSMetadata(), equalTo(responseExpectMeta)); + assertThat(executionInfo.clusterAliases(), equalTo(Set.of("", "c1", "c2"))); + assertCCSExecutionInfoDetails(executionInfo); + } + } + } + public void testTopNThenEnrichRemote() { + Tuple includeCCSMetadata = randomIncludeCCSMetadata(); + Boolean requestIncludeMeta = includeCCSMetadata.v1(); + boolean responseExpectMeta = includeCCSMetadata.v2(); + String query = String.format(Locale.ROOT, """ FROM *:events,events | eval ip= TO_STR(host) - | SORT ip + | SORT timestamp, user, ip | LIMIT 5 - | %s + | %s | KEEP host, timestamp, user, os """, enrichHosts(Enrich.Mode.REMOTE)); - var error = expectThrows(VerificationException.class, () -> runQuery(query, randomBoolean()).close()); - assertThat(error.getMessage(), containsString("ENRICH with remote policy can't be executed after LIMIT")); + try (EsqlQueryResponse resp = runQuery(query, requestIncludeMeta)) { + assertThat( + getValuesList(resp), + equalTo( + List.of( + List.of("192.168.1.2", 1L, "andres", "Windows"), + List.of("192.168.1.3", 1L, "matthew", "MacOS"), + Arrays.asList("192.168.1.25", 1L, "park", (String) null), + List.of("192.168.1.5", 2L, "akio", "Android"), + List.of("192.168.1.6", 2L, "sergio", "iOS") + ) + ) + ); + EsqlExecutionInfo executionInfo = resp.getExecutionInfo(); + assertThat(executionInfo.includeCCSMetadata(), equalTo(responseExpectMeta)); + assertThat(executionInfo.clusterAliases(), equalTo(Set.of("", "c1", "c2"))); + assertCCSExecutionInfoDetails(executionInfo); + } } public void testLimitThenEnrichRemote() { + Tuple includeCCSMetadata = randomIncludeCCSMetadata(); + Boolean requestIncludeMeta = includeCCSMetadata.v1(); + boolean responseExpectMeta = includeCCSMetadata.v2(); + String query = String.format(Locale.ROOT, """ FROM *:events,events - | LIMIT 10 + | LIMIT 25 | eval ip= TO_STR(host) - | %s + | %s | KEEP host, timestamp, user, os """, enrichHosts(Enrich.Mode.REMOTE)); - var error = expectThrows(VerificationException.class, () -> runQuery(query, randomBoolean()).close()); - assertThat(error.getMessage(), containsString("ENRICH with remote policy can't be executed after LIMIT")); + try (EsqlQueryResponse resp = runQuery(query, requestIncludeMeta)) { + var values = getValuesList(resp); + values.sort( + Comparator.comparingLong((List o) -> (Long) o.get(1)) + .thenComparing(o -> (String) o.get(0)) + .thenComparing(o -> (String) o.get(2)) + ); + assertThat( + values.subList(0, 5), + equalTo( + List.of( + List.of("192.168.1.2", 1L, "andres", "Windows"), + Arrays.asList("192.168.1.25", 1L, "park", (String) null), + List.of("192.168.1.3", 1L, "matthew", "MacOS"), + List.of("192.168.1.5", 2L, "akio", "Android"), + List.of("192.168.1.5", 2L, "simon", "Android") + ) + ) + ); + EsqlExecutionInfo executionInfo = resp.getExecutionInfo(); + assertThat(executionInfo.includeCCSMetadata(), equalTo(responseExpectMeta)); + assertThat(executionInfo.clusterAliases(), equalTo(Set.of("", "c1", "c2"))); + assertCCSExecutionInfoDetails(executionInfo); + } } public void testAggThenEnrichRemote() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index e2717cd9af0d1..fbaf43467a2e7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -609,22 +609,15 @@ private static void checkForSortableDataTypes(LogicalPlan p, Set localF */ private static void checkRemoteEnrich(LogicalPlan plan, Set failures) { boolean[] agg = { false }; - boolean[] limit = { false }; boolean[] enrichCoord = { false }; plan.forEachUp(UnaryPlan.class, u -> { - if (u instanceof Limit) { - limit[0] = true; // TODO: Make Limit then enrich_remote work - } if (u instanceof Aggregate) { agg[0] = true; } else if (u instanceof Enrich enrich && enrich.mode() == Enrich.Mode.COORDINATOR) { enrichCoord[0] = true; } if (u instanceof Enrich enrich && enrich.mode() == Enrich.Mode.REMOTE) { - if (limit[0]) { - failures.add(fail(enrich, "ENRICH with remote policy can't be executed after LIMIT")); - } if (agg[0]) { failures.add(fail(enrich, "ENRICH with remote policy can't be executed after STATS")); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Mapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Mapper.java index e571be54692c4..152c492a34433 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Mapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Mapper.java @@ -52,8 +52,10 @@ import org.elasticsearch.xpack.esql.plan.physical.RowExec; import org.elasticsearch.xpack.esql.plan.physical.ShowExec; import org.elasticsearch.xpack.esql.plan.physical.TopNExec; +import org.elasticsearch.xpack.esql.plan.physical.UnaryExec; import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; /** *

This class is part of the planner

@@ -104,6 +106,46 @@ public PhysicalPlan map(LogicalPlan p) { // // Unary Plan // + if (localMode == false && p instanceof Enrich enrich && enrich.mode() == Enrich.Mode.REMOTE) { + // When we have remote enrich, we want to put it under FragmentExec, so it would be executed remotely. + // We're only going to do it on the coordinator node. + // The way we're going to do it is as follows: + // 1. Locate FragmentExec in the tree. If we have no FragmentExec, we won't do anything. + // 2. Put this Enrich under it, removing everything that was below it previously. + // 3. Above FragmentExec, we should deal with pipeline breakers, since pipeline ops already are supposed to go under + // FragmentExec. + // 4. Aggregates can't appear here since the plan should have errored out if we have aggregate inside remote Enrich. + // 5. So we should be keeping: LimitExec, ExchangeExec, OrderExec, TopNExec (actually OrderExec probably can't happen anyway). + + var child = map(enrich.child()); + AtomicBoolean hasFragment = new AtomicBoolean(false); + + var childTransformed = child.transformUp((f) -> { + // Once we reached FragmentExec, we stuff our Enrich under it + if (f instanceof FragmentExec) { + hasFragment.set(true); + return new FragmentExec(p); + } + if (f instanceof EnrichExec enrichExec) { + // It can only be ANY because COORDINATOR would have errored out earlier, and REMOTE should be under FragmentExec + assert enrichExec.mode() == Enrich.Mode.ANY : "enrich must be in ANY mode here"; + return enrichExec.child(); + } + if (f instanceof UnaryExec unaryExec) { + if (f instanceof LimitExec || f instanceof ExchangeExec || f instanceof OrderExec || f instanceof TopNExec) { + return f; + } else { + return unaryExec.child(); + } + } + // Currently, it's either UnaryExec or LeafExec. Leaf will either resolve to FragmentExec or we'll ignore it. + return f; + }); + + if (hasFragment.get()) { + return childTransformed; + } + } if (p instanceof UnaryPlan ua) { var child = map(ua.child()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 964039268e30d..961c70acada7b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -172,7 +172,7 @@ import static org.hamcrest.Matchers.nullValue; import static org.hamcrest.Matchers.startsWith; -// @TestLogging(value = "org.elasticsearch.xpack.esql:TRACE", reason = "debug") +// @TestLogging(value = "org.elasticsearch.xpack.esql:DEBUG", reason = "debug") public class PhysicalPlanOptimizerTests extends ESTestCase { private static final String PARAM_FORMATTING = "%1$s"; @@ -5851,14 +5851,14 @@ public void testEnrichBeforeLimit() { | EVAL employee_id = to_str(emp_no) | ENRICH _remote:departments | LIMIT 10"""); - var enrich = as(plan, EnrichExec.class); - assertThat(enrich.mode(), equalTo(Enrich.Mode.REMOTE)); - assertThat(enrich.concreteIndices(), equalTo(Map.of("cluster_1", ".enrich-departments-2"))); - var eval = as(enrich.child(), EvalExec.class); - var finalLimit = as(eval.child(), LimitExec.class); + var finalLimit = as(plan, LimitExec.class); var exchange = as(finalLimit.child(), ExchangeExec.class); var fragment = as(exchange.child(), FragmentExec.class); - var partialLimit = as(fragment.fragment(), Limit.class); + var enrich = as(fragment.fragment(), Enrich.class); + assertThat(enrich.mode(), equalTo(Enrich.Mode.REMOTE)); + assertThat(enrich.concreteIndices(), equalTo(Map.of("cluster_1", ".enrich-departments-2"))); + var evalFragment = as(enrich.child(), Eval.class); + var partialLimit = as(evalFragment.child(), Limit.class); as(partialLimit.child(), EsRelation.class); } } @@ -5901,13 +5901,21 @@ public void testLimitThenEnrich() { } public void testLimitThenEnrichRemote() { - var error = expectThrows(VerificationException.class, () -> physicalPlan(""" + var plan = physicalPlan(""" FROM test | LIMIT 10 | EVAL employee_id = to_str(emp_no) | ENRICH _remote:departments - """)); - assertThat(error.getMessage(), containsString("line 4:3: ENRICH with remote policy can't be executed after LIMIT")); + """); + var finalLimit = as(plan, LimitExec.class); + var exchange = as(finalLimit.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var enrich = as(fragment.fragment(), Enrich.class); + assertThat(enrich.mode(), equalTo(Enrich.Mode.REMOTE)); + assertThat(enrich.concreteIndices(), equalTo(Map.of("cluster_1", ".enrich-departments-2"))); + var evalFragment = as(enrich.child(), Eval.class); + var partialLimit = as(evalFragment.child(), Limit.class); + as(partialLimit.child(), EsRelation.class); } public void testEnrichBeforeTopN() { @@ -5961,6 +5969,23 @@ public void testEnrichBeforeTopN() { var eval = as(enrich.child(), Eval.class); as(eval.child(), EsRelation.class); } + { + var plan = physicalPlan(""" + FROM test + | EVAL employee_id = to_str(emp_no) + | ENRICH _remote:departments + | SORT department + | LIMIT 10"""); + var topN = as(plan, TopNExec.class); + var exchange = as(topN.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var partialTopN = as(fragment.fragment(), TopN.class); + var enrich = as(partialTopN.child(), Enrich.class); + assertThat(enrich.mode(), equalTo(Enrich.Mode.REMOTE)); + assertThat(enrich.concreteIndices(), equalTo(Map.of("cluster_1", ".enrich-departments-2"))); + var eval = as(enrich.child(), Eval.class); + as(eval.child(), EsRelation.class); + } } public void testEnrichAfterTopN() { @@ -6000,6 +6025,24 @@ public void testEnrichAfterTopN() { var partialTopN = as(fragment.fragment(), TopN.class); as(partialTopN.child(), EsRelation.class); } + { + var plan = physicalPlan(""" + FROM test + | SORT emp_no + | LIMIT 10 + | EVAL employee_id = to_str(emp_no) + | ENRICH _remote:departments + """); + var topN = as(plan, TopNExec.class); + var exchange = as(topN.child(), ExchangeExec.class); + var fragment = as(exchange.child(), FragmentExec.class); + var enrich = as(fragment.fragment(), Enrich.class); + assertThat(enrich.mode(), equalTo(Enrich.Mode.REMOTE)); + assertThat(enrich.concreteIndices(), equalTo(Map.of("cluster_1", ".enrich-departments-2"))); + var evalFragment = as(enrich.child(), Eval.class); + var partialTopN = as(evalFragment.child(), TopN.class); + as(partialTopN.child(), EsRelation.class); + } } public void testManyEnrich() { From d66b54f2b8a036ffa35fd9a5718e5aecb2c5574e Mon Sep 17 00:00:00 2001 From: Keith Massey Date: Thu, 24 Oct 2024 16:33:58 -0500 Subject: [PATCH 13/22] Fixing ingest simulate yaml rest test when global legacy template is present (#115586) (#115595) Sometimes the test framework adds a global legacy template. When this happens, a test that is using another legacy template to create an index emits a warning since the index matches two legacy templates. This PR allows that warning. --- .../resources/rest-api-spec/test/ingest/80_ingest_simulate.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml index 4d1a62c6f179e..baac460af00a0 100644 --- a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml +++ b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml @@ -1537,6 +1537,8 @@ setup: - not_exists: docs.0.doc.error - do: + allowed_warnings: + - "index [foo-1] matches multiple legacy templates [global, my-legacy-template], composable templates will only match a single template" indices.create: index: foo-1 - match: { acknowledged: true } From d3705e68d97505c9491210da7687adf096cb0906 Mon Sep 17 00:00:00 2001 From: Keith Massey Date: Thu, 24 Oct 2024 17:15:42 -0500 Subject: [PATCH 14/22] Fixing ingest simulate yaml rest test when there is a global legacy template (#115559) (#115599) The ingest simulate yaml rest test `Test mapping addition works with indices without templates` tests what happens when an index has a mapping but matches no template at all. However, randomly and rarely a global match-all legacy template is applied to the cluster. When this happens, the assumptions for the test fail since the index matches a template. This PR removes that global legacy template so that the test works as intended. Closes #115412 Closes #115472 --- .../rest-api-spec/test/ingest/80_ingest_simulate.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml index baac460af00a0..2d3fa6b568381 100644 --- a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml +++ b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml @@ -1588,6 +1588,13 @@ setup: cluster_features: ["simulate.support.non.template.mapping"] reason: "ingest simulate support for indices with mappings that didn't come from templates added in 8.17" + # A global match-everything legacy template is added to the cluster sometimes (rarely). We have to get rid of this template if it exists + # because this test is making sure we get correct behavior when an index matches *no* template: + - do: + indices.delete_template: + name: '*' + ignore: 404 + # First, make sure that validation fails before we create the index (since we are only defining to bar field but trying to index a value # for foo. - do: From e58fb83006bc38207eeb70f457ba9696c4003aa5 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Thu, 24 Oct 2024 15:19:52 -0700 Subject: [PATCH 15/22] Use jna cleaner thread filter in spawner tests (#115598) This commit filters out jna cleaner threads specifically in the spawner tests (which have a different set of filters from ESTestCase because they extend LuceneTestCase). closes #114555 --- muted-tests.yml | 2 -- .../org/elasticsearch/bootstrap/SpawnerNoBootstrapTests.java | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 3a80d81330b65..aff0e648a7808 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -349,8 +349,6 @@ tests: - class: org.elasticsearch.xpack.security.operator.OperatorPrivilegesIT method: testEveryActionIsEitherOperatorOnlyOrNonOperator issue: https://github.com/elastic/elasticsearch/issues/102992 -- class: org.elasticsearch.bootstrap.SpawnerNoBootstrapTests - issue: https://github.com/elastic/elasticsearch/issues/114555 - class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT method: test {p0=search.vectors/42_knn_search_int4_flat/Vector similarity with filter only} issue: https://github.com/elastic/elasticsearch/issues/115475 diff --git a/qa/no-bootstrap-tests/src/test/java/org/elasticsearch/bootstrap/SpawnerNoBootstrapTests.java b/qa/no-bootstrap-tests/src/test/java/org/elasticsearch/bootstrap/SpawnerNoBootstrapTests.java index 168493eb52f60..d47ba685b9834 100644 --- a/qa/no-bootstrap-tests/src/test/java/org/elasticsearch/bootstrap/SpawnerNoBootstrapTests.java +++ b/qa/no-bootstrap-tests/src/test/java/org/elasticsearch/bootstrap/SpawnerNoBootstrapTests.java @@ -24,6 +24,7 @@ import org.elasticsearch.plugins.Platforms; import org.elasticsearch.plugins.PluginTestUtil; import org.elasticsearch.test.GraalVMThreadsFilter; +import org.elasticsearch.test.JnaCleanerThreadsFilter; import org.elasticsearch.test.MockLog; import java.io.IOException; @@ -50,7 +51,7 @@ * that prevents the Spawner class from doing its job. Also needs to run in a separate JVM to other * tests that extend ESTestCase for the same reason. */ -@ThreadLeakFilters(filters = { GraalVMThreadsFilter.class }) +@ThreadLeakFilters(filters = { GraalVMThreadsFilter.class, JnaCleanerThreadsFilter.class }) public class SpawnerNoBootstrapTests extends LuceneTestCase { private static final String CONTROLLER_SOURCE = """ From 04572bbf8476e9ec21797512547be0f72c1df943 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Fri, 25 Oct 2024 00:20:24 +0200 Subject: [PATCH 16/22] Replace IndexNameExpressionResolver.ExpressionList with imperative logic (#115487) (#115602) The approach taken by `ExpressionList` becomes very expensive for large numbers of indices/datastreams. It implies that large lists of concrete names (as they are passed down from the transport layer via e.g. security) are copied at least twice during iteration. Removing the intermediary list and inlining the logic brings down the latency of searches targetting many shards/indices at once and allows for subsequent optimizations. The removed tests appear redundant as they tested an implementation detail of the IndexNameExpressionResolver which itself is well covered by its own tests. --- .../metadata/IndexNameExpressionResolver.java | 186 +++++------ .../cluster/metadata/ExpressionListTests.java | 309 ------------------ 2 files changed, 85 insertions(+), 410 deletions(-) delete mode 100644 server/src/test/java/org/elasticsearch/cluster/metadata/ExpressionListTests.java diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java index 2229166a2d779..39499253c8790 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexNameExpressionResolver.java @@ -48,7 +48,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; @@ -253,7 +252,7 @@ protected static Collection resolveExpressions(Context context, String.. } else { return ExplicitResourceNameFilter.filterUnavailable( context, - DateMathExpressionResolver.resolve(context, List.of(expressions)) + DateMathExpressionResolver.resolve(context, Arrays.asList(expressions)) ); } } else { @@ -264,7 +263,10 @@ protected static Collection resolveExpressions(Context context, String.. } else { return WildcardExpressionResolver.resolve( context, - ExplicitResourceNameFilter.filterUnavailable(context, DateMathExpressionResolver.resolve(context, List.of(expressions))) + ExplicitResourceNameFilter.filterUnavailable( + context, + DateMathExpressionResolver.resolve(context, Arrays.asList(expressions)) + ) ); } } @@ -1294,34 +1296,51 @@ private static boolean shouldIncludeIfAlias(IndexAbstraction ia, IndexNameExpres * */ public static Collection resolve(Context context, List expressions) { - ExpressionList expressionList = new ExpressionList(context, expressions); // fast exit if there are no wildcards to evaluate - if (expressionList.hasWildcard() == false) { + if (context.getOptions().expandWildcardExpressions() == false) { + return expressions; + } + int firstWildcardIndex = 0; + for (; firstWildcardIndex < expressions.size(); firstWildcardIndex++) { + String expression = expressions.get(firstWildcardIndex); + if (isWildcard(expression)) { + break; + } + } + if (firstWildcardIndex == expressions.size()) { return expressions; } Set result = new HashSet<>(); - for (ExpressionList.Expression expression : expressionList) { - if (expression.isWildcard()) { - Stream matchingResources = matchResourcesToWildcard(context, expression.get()); + for (int i = 0; i < firstWildcardIndex; i++) { + result.add(expressions.get(i)); + } + AtomicBoolean emptyWildcardExpansion = context.getOptions().allowNoIndices() ? null : new AtomicBoolean(); + for (int i = firstWildcardIndex; i < expressions.size(); i++) { + String expression = expressions.get(i); + boolean isExclusion = i > firstWildcardIndex && expression.charAt(0) == '-'; + if (i == firstWildcardIndex || isWildcard(expression)) { + Stream matchingResources = matchResourcesToWildcard( + context, + isExclusion ? expression.substring(1) : expression + ); Stream matchingOpenClosedNames = expandToOpenClosed(context, matchingResources); - AtomicBoolean emptyWildcardExpansion = new AtomicBoolean(false); - if (context.getOptions().allowNoIndices() == false) { + if (emptyWildcardExpansion != null) { emptyWildcardExpansion.set(true); matchingOpenClosedNames = matchingOpenClosedNames.peek(x -> emptyWildcardExpansion.set(false)); } - if (expression.isExclusion()) { - matchingOpenClosedNames.forEachOrdered(result::remove); + if (isExclusion) { + matchingOpenClosedNames.forEach(result::remove); } else { - matchingOpenClosedNames.forEachOrdered(result::add); + matchingOpenClosedNames.forEach(result::add); } - if (emptyWildcardExpansion.get()) { - throw notFoundException(expression.get()); + if (emptyWildcardExpansion != null && emptyWildcardExpansion.get()) { + throw notFoundException(expression); } } else { - if (expression.isExclusion()) { - result.remove(expression.get()); + if (isExclusion) { + result.remove(expression.substring(1)); } else { - result.add(expression.get()); + result.add(expression); } } } @@ -1507,27 +1526,35 @@ private DateMathExpressionResolver() { // utility class } + /** + * Resolves date math expressions. If this is a noop the given {@code expressions} list is returned without copying. + * As a result callers of this method should not mutate the returned list. Mutating it may come with unexpected side effects. + */ public static List resolve(Context context, List expressions) { - List result = new ArrayList<>(expressions.size()); - for (ExpressionList.Expression expression : new ExpressionList(context, expressions)) { - result.add(resolveExpression(expression, context::getStartTime)); + boolean wildcardSeen = false; + final boolean expandWildcards = context.getOptions().expandWildcardExpressions(); + String[] result = null; + for (int i = 0, n = expressions.size(); i < n; i++) { + String expression = expressions.get(i); + // accepts date-math exclusions that are of the form "-<...{}>",f i.e. the "-" is outside the "<>" date-math template + boolean isExclusion = wildcardSeen && expression.startsWith("-"); + wildcardSeen = wildcardSeen || (expandWildcards && isWildcard(expression)); + String toResolve = isExclusion ? expression.substring(1) : expression; + String resolved = resolveExpression(toResolve, context::getStartTime); + if (toResolve != resolved) { + if (result == null) { + result = expressions.toArray(Strings.EMPTY_ARRAY); + } + result[i] = isExclusion ? "-" + resolved : resolved; + } } - return result; + return result == null ? expressions : Arrays.asList(result); } static String resolveExpression(String expression) { return resolveExpression(expression, System::currentTimeMillis); } - static String resolveExpression(ExpressionList.Expression expression, LongSupplier getTime) { - if (expression.isExclusion()) { - // accepts date-math exclusions that are of the form "-<...{}>", i.e. the "-" is outside the "<>" date-math template - return "-" + resolveExpression(expression.get(), getTime); - } else { - return resolveExpression(expression.get(), getTime); - } - } - static String resolveExpression(String expression, LongSupplier getTime) { if (expression.startsWith(EXPRESSION_LEFT_BOUND) == false || expression.endsWith(EXPRESSION_RIGHT_BOUND) == false) { return expression; @@ -1689,14 +1716,35 @@ private ExplicitResourceNameFilter() { */ public static List filterUnavailable(Context context, List expressions) { ensureRemoteIndicesRequireIgnoreUnavailable(context.getOptions(), expressions); - List result = new ArrayList<>(expressions.size()); - for (ExpressionList.Expression expression : new ExpressionList(context, expressions)) { - validateAliasOrIndex(expression); - if (expression.isWildcard() || expression.isExclusion() || ensureAliasOrIndexExists(context, expression.get())) { - result.add(expression.expression()); + final boolean expandWildcards = context.getOptions().expandWildcardExpressions(); + boolean wildcardSeen = false; + List result = null; + for (int i = 0; i < expressions.size(); i++) { + String expression = expressions.get(i); + if (Strings.isEmpty(expression)) { + throw notFoundException(expression); + } + // Expressions can not start with an underscore. This is reserved for APIs. If the check gets here, the API + // does not exist and the path is interpreted as an expression. If the expression begins with an underscore, + // throw a specific error that is different from the [[IndexNotFoundException]], which is typically thrown + // if the expression can't be found. + if (expression.charAt(0) == '_') { + throw new InvalidIndexNameException(expression, "must not start with '_'."); + } + final boolean isWildcard = expandWildcards && isWildcard(expression); + if (isWildcard || (wildcardSeen && expression.charAt(0) == '-') || ensureAliasOrIndexExists(context, expression)) { + if (result != null) { + result.add(expression); + } + } else { + if (result == null) { + result = new ArrayList<>(expressions.size() - 1); + result.addAll(expressions.subList(0, i)); + } } + wildcardSeen |= isWildcard; } - return result; + return result == null ? expressions : result; } /** @@ -1736,19 +1784,6 @@ private static boolean ensureAliasOrIndexExists(Context context, String name) { return true; } - private static void validateAliasOrIndex(ExpressionList.Expression expression) { - if (Strings.isEmpty(expression.expression())) { - throw notFoundException(expression.expression()); - } - // Expressions can not start with an underscore. This is reserved for APIs. If the check gets here, the API - // does not exist and the path is interpreted as an expression. If the expression begins with an underscore, - // throw a specific error that is different from the [[IndexNotFoundException]], which is typically thrown - // if the expression can't be found. - if (expression.expression().charAt(0) == '_') { - throw new InvalidIndexNameException(expression.expression(), "must not start with '_'."); - } - } - private static void ensureRemoteIndicesRequireIgnoreUnavailable(IndicesOptions options, List indexExpressions) { if (options.ignoreUnavailable()) { return; @@ -1773,57 +1808,6 @@ private static void failOnRemoteIndicesNotIgnoringUnavailable(List index } } - /** - * Used to iterate expression lists and work out which expression item is a wildcard or an exclusion. - */ - public static final class ExpressionList implements Iterable { - private final List expressionsList; - private final boolean hasWildcard; - - public record Expression(String expression, boolean isWildcard, boolean isExclusion) { - public String get() { - if (isExclusion()) { - // drop the leading "-" if exclusion because it is easier for callers to handle it like this - return expression().substring(1); - } else { - return expression(); - } - } - } - - /** - * Creates the expression iterable that can be used to easily check which expression item is a wildcard or an exclusion (or both). - * The {@param context} is used to check if wildcards ought to be considered or not. - */ - public ExpressionList(Context context, List expressionStrings) { - List expressionsList = new ArrayList<>(expressionStrings.size()); - boolean wildcardSeen = false; - for (String expressionString : expressionStrings) { - boolean isExclusion = expressionString.startsWith("-") && wildcardSeen; - if (context.getOptions().expandWildcardExpressions() && isWildcard(expressionString)) { - wildcardSeen = true; - expressionsList.add(new Expression(expressionString, true, isExclusion)); - } else { - expressionsList.add(new Expression(expressionString, false, isExclusion)); - } - } - this.expressionsList = expressionsList; - this.hasWildcard = wildcardSeen; - } - - /** - * Returns {@code true} if the expression contains any wildcard and the options allow wildcard expansion - */ - public boolean hasWildcard() { - return this.hasWildcard; - } - - @Override - public Iterator iterator() { - return expressionsList.iterator(); - } - } - /** * This is a context for the DateMathExpressionResolver which does not require {@code IndicesOptions} or {@code ClusterState} * since it uses only the start time to resolve expressions. diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/ExpressionListTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/ExpressionListTests.java deleted file mode 100644 index 1ca59ff402bd8..0000000000000 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/ExpressionListTests.java +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.cluster.metadata; - -import org.elasticsearch.action.support.IndicesOptions; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver.Context; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver.ExpressionList; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver.ExpressionList.Expression; -import org.elasticsearch.core.Tuple; -import org.elasticsearch.test.ESTestCase; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.function.Supplier; - -import static org.hamcrest.Matchers.is; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public class ExpressionListTests extends ESTestCase { - - public void testEmpty() { - ExpressionList expressionList = new ExpressionList(getContextWithOptions(getExpandWildcardsIndicesOptions()), List.of()); - assertThat(expressionList.iterator().hasNext(), is(false)); - assertThat(expressionList.hasWildcard(), is(false)); - expressionList = new ExpressionList(getContextWithOptions(getNoExpandWildcardsIndicesOptions()), List.of()); - assertThat(expressionList.iterator().hasNext(), is(false)); - assertThat(expressionList.hasWildcard(), is(false)); - } - - public void testExplicitSingleNameExpression() { - for (IndicesOptions indicesOptions : List.of(getExpandWildcardsIndicesOptions(), getNoExpandWildcardsIndicesOptions())) { - for (String expressionString : List.of("non_wildcard", "-non_exclusion")) { - ExpressionList expressionList = new ExpressionList(getContextWithOptions(indicesOptions), List.of(expressionString)); - assertThat(expressionList.hasWildcard(), is(false)); - if (randomBoolean()) { - expressionList = new ExpressionList(getContextWithOptions(indicesOptions), List.of(expressionString)); - } - Iterator expressionIterator = expressionList.iterator(); - assertThat(expressionIterator.hasNext(), is(true)); - if (randomBoolean()) { - expressionIterator = expressionList.iterator(); - } - Expression expression = expressionIterator.next(); - assertThat(expression.isExclusion(), is(false)); - assertThat(expression.isWildcard(), is(false)); - assertThat(expression.get(), is(expressionString)); - assertThat(expressionIterator.hasNext(), is(false)); - } - } - } - - public void testWildcardSingleExpression() { - for (String wildcardTest : List.of("*", "a*", "*b", "a*b", "a-*b", "a*-b", "-*", "-a*", "-*b", "**", "*-*")) { - ExpressionList expressionList = new ExpressionList( - getContextWithOptions(getExpandWildcardsIndicesOptions()), - List.of(wildcardTest) - ); - assertThat(expressionList.hasWildcard(), is(true)); - if (randomBoolean()) { - expressionList = new ExpressionList(getContextWithOptions(getExpandWildcardsIndicesOptions()), List.of(wildcardTest)); - } - Iterator expressionIterator = expressionList.iterator(); - assertThat(expressionIterator.hasNext(), is(true)); - if (randomBoolean()) { - expressionIterator = expressionList.iterator(); - } - Expression expression = expressionIterator.next(); - assertThat(expression.isExclusion(), is(false)); - assertThat(expression.isWildcard(), is(true)); - assertThat(expression.get(), is(wildcardTest)); - assertThat(expressionIterator.hasNext(), is(false)); - } - } - - public void testWildcardLongerExpression() { - List onlyExplicits = randomList(7, () -> randomAlphaOfLengthBetween(0, 5)); - String wildcard = randomFrom("*", "*b", "-*", "*-", "c*", "a*b", "**"); - List expressionList = new ArrayList<>(onlyExplicits.size() + 1); - expressionList.addAll(randomSubsetOf(onlyExplicits)); - int wildcardPos = expressionList.size(); - expressionList.add(wildcard); - for (String item : onlyExplicits) { - if (expressionList.contains(item) == false) { - expressionList.add(item); - } - } - ExpressionList expressionIterable = new ExpressionList(getContextWithOptions(getExpandWildcardsIndicesOptions()), expressionList); - assertThat(expressionIterable.hasWildcard(), is(true)); - if (randomBoolean()) { - expressionIterable = new ExpressionList(getContextWithOptions(getExpandWildcardsIndicesOptions()), expressionList); - } - int i = 0; - for (Expression expression : expressionIterable) { - assertThat(expression.isExclusion(), is(false)); - if (i != wildcardPos) { - assertThat(expression.isWildcard(), is(false)); - } else { - assertThat(expression.isWildcard(), is(true)); - } - assertThat(expression.get(), is(expressionList.get(i++))); - } - } - - public void testWildcardsNoExclusionExpressions() { - for (List wildcardExpression : List.of( - List.of("*"), - List.of("a", "*"), - List.of("-b", "*c"), - List.of("-", "a", "c*"), - List.of("*", "a*", "*b"), - List.of("-*", "a", "b*") - )) { - ExpressionList expressionList = new ExpressionList( - getContextWithOptions(getExpandWildcardsIndicesOptions()), - wildcardExpression - ); - assertThat(expressionList.hasWildcard(), is(true)); - if (randomBoolean()) { - expressionList = new ExpressionList(getContextWithOptions(getExpandWildcardsIndicesOptions()), wildcardExpression); - } - int i = 0; - for (Expression expression : expressionList) { - assertThat(expression.isExclusion(), is(false)); - if (wildcardExpression.get(i).contains("*")) { - assertThat(expression.isWildcard(), is(true)); - } else { - assertThat(expression.isWildcard(), is(false)); - } - assertThat(expression.get(), is(wildcardExpression.get(i++))); - } - } - } - - public void testWildcardExpressionNoExpandOptions() { - for (List wildcardExpression : List.of( - List.of("*"), - List.of("a", "*"), - List.of("-b", "*c"), - List.of("*d", "-"), - List.of("*", "-*"), - List.of("-", "a", "c*"), - List.of("*", "a*", "*b") - )) { - ExpressionList expressionList = new ExpressionList( - getContextWithOptions(getNoExpandWildcardsIndicesOptions()), - wildcardExpression - ); - assertThat(expressionList.hasWildcard(), is(false)); - if (randomBoolean()) { - expressionList = new ExpressionList(getContextWithOptions(getNoExpandWildcardsIndicesOptions()), wildcardExpression); - } - int i = 0; - for (Expression expression : expressionList) { - assertThat(expression.isWildcard(), is(false)); - assertThat(expression.isExclusion(), is(false)); - assertThat(expression.get(), is(wildcardExpression.get(i++))); - } - } - } - - public void testSingleExclusionExpression() { - String wildcard = randomFrom("*", "*b", "-*", "*-", "c*", "a*b", "**", "*-*"); - int wildcardPos = randomIntBetween(0, 3); - String exclusion = randomFrom("-*", "-", "-c*", "-ab", "--"); - int exclusionPos = randomIntBetween(wildcardPos + 1, 7); - List exclusionExpression = new ArrayList<>(); - for (int i = 0; i < wildcardPos; i++) { - exclusionExpression.add(randomAlphaOfLengthBetween(0, 5)); - } - exclusionExpression.add(wildcard); - for (int i = wildcardPos + 1; i < exclusionPos; i++) { - exclusionExpression.add(randomAlphaOfLengthBetween(0, 5)); - } - exclusionExpression.add(exclusion); - for (int i = 0; i < randomIntBetween(0, 3); i++) { - exclusionExpression.add(randomAlphaOfLengthBetween(0, 5)); - } - ExpressionList expressionList = new ExpressionList(getContextWithOptions(getExpandWildcardsIndicesOptions()), exclusionExpression); - if (randomBoolean()) { - assertThat(expressionList.hasWildcard(), is(true)); - } - int i = 0; - for (Expression expression : expressionList) { - if (i == wildcardPos) { - assertThat(expression.isWildcard(), is(true)); - assertThat(expression.isExclusion(), is(false)); - assertThat(expression.get(), is(exclusionExpression.get(i++))); - } else if (i == exclusionPos) { - assertThat(expression.isExclusion(), is(true)); - assertThat(expression.isWildcard(), is(exclusionExpression.get(i).contains("*"))); - assertThat(expression.get(), is(exclusionExpression.get(i++).substring(1))); - } else { - assertThat(expression.isWildcard(), is(false)); - assertThat(expression.isExclusion(), is(false)); - assertThat(expression.get(), is(exclusionExpression.get(i++))); - } - } - } - - public void testExclusionsExpression() { - for (Tuple, List> exclusionExpression : List.of( - new Tuple<>(List.of("-a", "*", "-a"), List.of(false, false, true)), - new Tuple<>(List.of("-b*", "c", "-a"), List.of(false, false, true)), - new Tuple<>(List.of("*d", "-", "*b"), List.of(false, true, false)), - new Tuple<>(List.of("-", "--", "-*", "", "-*"), List.of(false, false, false, false, true)), - new Tuple<>(List.of("*-", "-*", "a", "-b"), List.of(false, true, false, true)), - new Tuple<>(List.of("a", "-b", "-*", "-b", "*", "-b"), List.of(false, false, false, true, false, true)), - new Tuple<>(List.of("-a", "*d", "-a", "-*b", "-b", "--"), List.of(false, false, true, true, true, true)) - )) { - ExpressionList expressionList = new ExpressionList( - getContextWithOptions(getExpandWildcardsIndicesOptions()), - exclusionExpression.v1() - ); - if (randomBoolean()) { - assertThat(expressionList.hasWildcard(), is(true)); - } - int i = 0; - for (Expression expression : expressionList) { - boolean isExclusion = exclusionExpression.v2().get(i); - assertThat(expression.isExclusion(), is(isExclusion)); - assertThat(expression.isWildcard(), is(exclusionExpression.v1().get(i).contains("*"))); - if (isExclusion) { - assertThat(expression.get(), is(exclusionExpression.v1().get(i++).substring(1))); - } else { - assertThat(expression.get(), is(exclusionExpression.v1().get(i++))); - } - } - } - } - - private IndicesOptions getExpandWildcardsToOpenOnlyIndicesOptions() { - return IndicesOptions.fromOptions( - randomBoolean(), - randomBoolean(), - true, - false, - randomBoolean(), - randomBoolean(), - randomBoolean(), - randomBoolean(), - randomBoolean() - ); - } - - private IndicesOptions getExpandWildcardsToCloseOnlyIndicesOptions() { - return IndicesOptions.fromOptions( - randomBoolean(), - randomBoolean(), - false, - true, - randomBoolean(), - randomBoolean(), - randomBoolean(), - randomBoolean(), - randomBoolean() - ); - } - - private IndicesOptions getExpandWildcardsToOpenCloseIndicesOptions() { - return IndicesOptions.fromOptions( - randomBoolean(), - randomBoolean(), - true, - true, - randomBoolean(), - randomBoolean(), - randomBoolean(), - randomBoolean(), - randomBoolean() - ); - } - - private IndicesOptions getExpandWildcardsIndicesOptions() { - return ESTestCase.>randomFrom( - this::getExpandWildcardsToOpenOnlyIndicesOptions, - this::getExpandWildcardsToCloseOnlyIndicesOptions, - this::getExpandWildcardsToOpenCloseIndicesOptions - ).get(); - } - - private IndicesOptions getNoExpandWildcardsIndicesOptions() { - return IndicesOptions.fromOptions( - randomBoolean(), - randomBoolean(), - false, - false, - randomBoolean(), - randomBoolean(), - randomBoolean(), - randomBoolean(), - randomBoolean() - ); - } - - private Context getContextWithOptions(IndicesOptions indicesOptions) { - Context context = mock(Context.class); - when(context.getOptions()).thenReturn(indicesOptions); - return context; - } -} From 9b31ba2d06e638e560253196e020649c48ba88f5 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Thu, 24 Oct 2024 18:43:26 -0400 Subject: [PATCH 17/22] [8.x] [ESQL] Support date_nanos on functions that take "any" type (#114056) (#115351) * [ESQL] Support date_nanos on functions that take "any" type (#114056) Resolves #109998 For the most part, this is just adding tests. Greater and Least have actual production code changes - notably toEvaluator is modified to map date nanos to the long evaluator. This parallels the work done in #113961. I've added CSV tests and unit tests for all the functions listed in the original ticket. --------- Co-authored-by: Elastic Machine * Mute failing watcher test Cherry-pick https://github.com/elastic/elasticsearch/commit/f8e931d6b5e4e17ef43ac3b39e4c7c40cbc24111#diff-41386766c394f14f5f205f92bb26eb1420b80af0057c78b2842fcc7ddd3d67aaR326 For whatever reason, git cherry-pick is having some difficulty with this, so I just hand copied the mute. * pull in another mute --------- Co-authored-by: Elastic Machine --- .../src/main/resources/date_nanos.csv | 1 + .../src/main/resources/date_nanos.csv-spec | 72 ++++++++++++++++++- .../xpack/esql/action/EsqlCapabilities.java | 5 ++ .../function/scalar/conditional/Greatest.java | 8 +-- .../function/scalar/conditional/Least.java | 8 +-- .../function/scalar/multivalue/MvDedupe.java | 2 + .../function/scalar/multivalue/MvSlice.java | 2 + .../function/scalar/multivalue/MvSort.java | 4 +- .../function/scalar/nulls/Coalesce.java | 3 + .../function/AbstractFunctionTestCase.java | 12 ++-- .../scalar/conditional/CaseTests.java | 1 + .../scalar/conditional/GreatestTests.java | 15 ++++ .../scalar/conditional/LeastTests.java | 15 ++++ .../scalar/multivalue/MvDedupeTests.java | 1 + .../scalar/multivalue/MvSliceTests.java | 17 +++++ .../scalar/multivalue/MvSortTests.java | 14 ++++ .../function/scalar/nulls/CoalesceTests.java | 13 ++++ 17 files changed, 177 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv index 4308d4eea24be..26b6f055221a6 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv @@ -8,3 +8,4 @@ millis:date,nanos:date_nanos,num:long 2023-10-23T12:15:03.360Z,2023-10-23T12:15:03.360103847Z,1698063303360103847 2023-10-23T12:15:03.360Z,2023-10-23T12:15:03.360103847Z,1698063303360103847 1999-10-23T12:15:03.360Z,[2023-03-23T12:15:03.360103847Z, 2023-02-23T13:33:34.937193000Z, 2023-01-23T13:55:01.543123456Z], 0 +1999-10-22T12:15:03.360Z,[2023-03-23T12:15:03.360103847Z, 2023-03-23T12:15:03.360103847Z, 2023-03-23T12:15:03.360103847Z], 0 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec index 83c73000b3999..77883597c9f19 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec @@ -38,9 +38,10 @@ nanos:date_nanos mv_min on date nanos required_capability: date_nanos_type -FROM date_nanos | SORT millis ASC | EVAL nanos = MV_MIN(nanos) | KEEP nanos | LIMIT 1; +FROM date_nanos | SORT millis ASC | WHERE millis < "2000-01-01" | EVAL nanos = MV_MIN(nanos) | KEEP nanos; nanos:date_nanos +2023-03-23T12:15:03.360103847Z 2023-01-23T13:55:01.543123456Z ; @@ -56,9 +57,10 @@ ct:integer mv_first on date nanos required_capability: date_nanos_type -FROM date_nanos | SORT millis ASC | EVAL nanos = MV_FIRST(nanos) | KEEP nanos | LIMIT 1; +FROM date_nanos | SORT millis ASC | WHERE millis < "2000-01-01" | EVAL nanos = MV_FIRST(nanos) | KEEP nanos; nanos:date_nanos +2023-03-23T12:15:03.360103847Z 2023-01-23T13:55:01.543123456Z ; @@ -267,6 +269,72 @@ a:date_nanos | b:date_nanos | c:date_nanos null | null | null ; +Coalasce date nanos +required_capability: to_date_nanos + +ROW a = COALESCE(null, TO_DATE_NANOS(1698069301543123456)); + +a:date_nanos +2023-10-23T13:55:01.543123456Z +; + +Case date nanos result +required_capability: to_date_nanos + +ROW a = CASE(false, TO_DATE_NANOS(0::long), TO_DATE_NANOS(1698069301543123456)); + +a:date_nanos +2023-10-23T13:55:01.543123456Z +; + +Greatest date nanos +required_capability: least_greatest_for_datenanos + +ROW a = GREATEST(TO_DATE_NANOS("2023-10-23T13:55:01.543123456"), TO_DATE_NANOS("2023-10-23T13:53:55.832987654")); + +a:date_nanos +2023-10-23T13:55:01.543123456Z +; + +Least date nanos +required_capability: least_greatest_for_datenanos + +ROW a = LEAST(TO_DATE_NANOS("2023-10-23T13:55:01.543123456"), TO_DATE_NANOS("2023-10-23T13:53:55.832987654")); + +a:date_nanos +2023-10-23T13:53:55.832987654Z +; + +mv_dedup over date nanos +required_capability: date_nanos_type + +FROM date_nanos | WHERE millis < "2000-01-01" | EVAL a = MV_DEDUPE(nanos) | SORT millis DESC | KEEP a; + +a:date_nanos +[2023-01-23T13:55:01.543123456Z, 2023-02-23T13:33:34.937193000Z, 2023-03-23T12:15:03.360103847Z] +2023-03-23T12:15:03.360103847Z +; + +mv_sort over date nanos +required_capability: date_nanos_type + +FROM date_nanos | WHERE millis < "2000-01-01" | EVAL a = MV_SORT(nanos, "asc") | SORT millis DESC | KEEP a; + +a:date_nanos +[2023-01-23T13:55:01.543123456Z, 2023-02-23T13:33:34.937193000Z, 2023-03-23T12:15:03.360103847Z] +[2023-03-23T12:15:03.360103847Z, 2023-03-23T12:15:03.360103847Z, 2023-03-23T12:15:03.360103847Z] +; + +mv_slice over date nanos +required_capability: date_nanos_type + +FROM date_nanos | WHERE millis < "2000-01-01" | EVAL a = MV_SLICE(MV_SORT(nanos, "asc"), 1, 2) | SORT millis DESC | KEEP a; + +a:date_nanos +[2023-02-23T13:33:34.937193000Z, 2023-03-23T12:15:03.360103847Z] +[2023-03-23T12:15:03.360103847Z, 2023-03-23T12:15:03.360103847Z] +; + Max and Min of date nanos required_capability: date_nanos_aggregations diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 9184a69cc30c4..bf115cc9483a4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -307,6 +307,11 @@ public enum Cap { */ TO_DATE_NANOS(EsqlCorePlugin.DATE_NANOS_FEATURE_FLAG), + /** + * Support Least and Greatest functions on Date Nanos type + */ + LEAST_GREATEST_FOR_DATENANOS(EsqlCorePlugin.DATE_NANOS_FEATURE_FLAG), + /** * support aggregations on date nanos */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Greatest.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Greatest.java index d47ebeab4ca6c..aad2d37d414b8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Greatest.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Greatest.java @@ -43,7 +43,7 @@ public class Greatest extends EsqlScalarFunction implements OptionalArgument { private DataType dataType; @FunctionInfo( - returnType = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }, + returnType = { "boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "text", "version" }, description = "Returns the maximum value from multiple columns. This is similar to <>\n" + "except it is intended to run on multiple columns at once.", note = "When run on `keyword` or `text` fields, this returns the last string in alphabetical order. " @@ -54,12 +54,12 @@ public Greatest( Source source, @Param( name = "first", - type = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }, + type = { "boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "text", "version" }, description = "First of the columns to evaluate." ) Expression first, @Param( name = "rest", - type = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }, + type = { "boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "text", "version" }, description = "The rest of the columns to evaluate.", optional = true ) List rest @@ -152,7 +152,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { if (dataType == DataType.INTEGER) { return new GreatestIntEvaluator.Factory(source(), factories); } - if (dataType == DataType.LONG || dataType == DataType.DATETIME) { + if (dataType == DataType.LONG || dataType == DataType.DATETIME || dataType == DataType.DATE_NANOS) { return new GreatestLongEvaluator.Factory(source(), factories); } if (DataType.isString(dataType) || dataType == DataType.IP || dataType == DataType.VERSION || dataType == DataType.UNSUPPORTED) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Least.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Least.java index 81c1419dcf788..70ba9319385f3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Least.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Least.java @@ -43,7 +43,7 @@ public class Least extends EsqlScalarFunction implements OptionalArgument { private DataType dataType; @FunctionInfo( - returnType = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }, + returnType = { "boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "text", "version" }, description = "Returns the minimum value from multiple columns. " + "This is similar to <> except it is intended to run on multiple columns at once.", examples = @Example(file = "math", tag = "least") @@ -52,12 +52,12 @@ public Least( Source source, @Param( name = "first", - type = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }, + type = { "boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "text", "version" }, description = "First of the columns to evaluate." ) Expression first, @Param( name = "rest", - type = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }, + type = { "boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "text", "version" }, description = "The rest of the columns to evaluate.", optional = true ) List rest @@ -151,7 +151,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { if (dataType == DataType.INTEGER) { return new LeastIntEvaluator.Factory(source(), factories); } - if (dataType == DataType.LONG || dataType == DataType.DATETIME) { + if (dataType == DataType.LONG || dataType == DataType.DATETIME || dataType == DataType.DATE_NANOS) { return new LeastLongEvaluator.Factory(source(), factories); } if (DataType.isString(dataType) || dataType == DataType.IP || dataType == DataType.VERSION || dataType == DataType.UNSUPPORTED) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvDedupe.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvDedupe.java index b17ddddb422ce..34b89b4f78997 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvDedupe.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvDedupe.java @@ -38,6 +38,7 @@ public class MvDedupe extends AbstractMultivalueFunction { "cartesian_point", "cartesian_shape", "date", + "date_nanos", "double", "geo_point", "geo_shape", @@ -60,6 +61,7 @@ public MvDedupe( "cartesian_point", "cartesian_shape", "date", + "date_nanos", "double", "geo_point", "geo_shape", diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSlice.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSlice.java index a829b6f1417b9..ef562c339dfd9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSlice.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSlice.java @@ -59,6 +59,7 @@ public class MvSlice extends EsqlScalarFunction implements OptionalArgument, Eva "cartesian_point", "cartesian_shape", "date", + "date_nanos", "double", "geo_point", "geo_shape", @@ -87,6 +88,7 @@ public MvSlice( "cartesian_point", "cartesian_shape", "date", + "date_nanos", "double", "geo_point", "geo_shape", diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSort.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSort.java index d9e41233952de..5ca5618bf2a54 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSort.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSort.java @@ -69,7 +69,7 @@ public class MvSort extends EsqlScalarFunction implements OptionalArgument, Vali private static final String INVALID_ORDER_ERROR = "Invalid order value in [{}], expected one of [{}, {}] but got [{}]"; @FunctionInfo( - returnType = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }, + returnType = { "boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "text", "version" }, description = "Sorts a multivalued field in lexicographical order.", examples = @Example(file = "ints", tag = "mv_sort") ) @@ -77,7 +77,7 @@ public MvSort( Source source, @Param( name = "field", - type = { "boolean", "date", "double", "integer", "ip", "keyword", "long", "text", "version" }, + type = { "boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "text", "version" }, description = "Multivalue expression. If `null`, the function returns `null`." ) Expression field, @Param( diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java index 575bb085c41f7..6b9c8d0da025b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java @@ -53,6 +53,7 @@ public class Coalesce extends EsqlScalarFunction implements OptionalArgument { "boolean", "cartesian_point", "cartesian_shape", + "date_nanos", "date", "geo_point", "geo_shape", @@ -73,6 +74,7 @@ public Coalesce( "boolean", "cartesian_point", "cartesian_shape", + "date_nanos", "date", "geo_point", "geo_shape", @@ -90,6 +92,7 @@ public Coalesce( "boolean", "cartesian_point", "cartesian_shape", + "date_nanos", "date", "geo_point", "geo_shape", diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java index 112ba20aa6fa6..d367e0fa4f9c1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java @@ -724,17 +724,19 @@ public static void testFunctionInfo() { for (int i = 0; i < args.size() && i < types.size(); i++) { typesFromSignature.get(i).add(types.get(i).esNameIfPossible()); } - returnFromSignature.add(entry.getValue().esNameIfPossible()); + if (DataType.UNDER_CONSTRUCTION.containsKey(entry.getValue()) == false) { + returnFromSignature.add(entry.getValue().esNameIfPossible()); + } } for (int i = 0; i < args.size(); i++) { EsqlFunctionRegistry.ArgSignature arg = args.get(i); Set annotationTypes = Arrays.stream(arg.type()) - .filter(DataType.UNDER_CONSTRUCTION::containsKey) + .filter(t -> DataType.UNDER_CONSTRUCTION.containsKey(DataType.fromNameOrAlias(t)) == false) .collect(Collectors.toCollection(TreeSet::new)); Set signatureTypes = typesFromSignature.get(i) .stream() - .filter(DataType.UNDER_CONSTRUCTION::containsKey) + .filter(t -> DataType.UNDER_CONSTRUCTION.containsKey(DataType.fromNameOrAlias(t)) == false) .collect(Collectors.toCollection(TreeSet::new)); if (signatureTypes.isEmpty()) { log.info("{}: skipping", arg.name()); @@ -748,7 +750,9 @@ public static void testFunctionInfo() { ); } - Set returnTypes = Arrays.stream(description.returnType()).collect(Collectors.toCollection(TreeSet::new)); + Set returnTypes = Arrays.stream(description.returnType()) + .filter(t -> DataType.UNDER_CONSTRUCTION.containsKey(DataType.fromNameOrAlias(t)) == false) + .collect(Collectors.toCollection(TreeSet::new)); assertEquals(returnFromSignature, returnTypes); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java index db3fce244c9a8..fbb7c691b1d94 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java @@ -46,6 +46,7 @@ public class CaseTests extends AbstractScalarFunctionTestCase { DataType.TEXT, DataType.BOOLEAN, DataType.DATETIME, + DataType.DATE_NANOS, DataType.DOUBLE, DataType.INTEGER, DataType.LONG, diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/GreatestTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/GreatestTests.java index 311e3e3d89149..07d6ae34dc1e7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/GreatestTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/GreatestTests.java @@ -115,6 +115,21 @@ public static Iterable parameters() { ) ) ); + suppliers.add( + new TestCaseSupplier( + "(a, b)", + List.of(DataType.DATE_NANOS, DataType.DATE_NANOS), + () -> new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(1727877348000123456L, DataType.DATE_NANOS, "a"), + new TestCaseSupplier.TypedData(1727790948000987654L, DataType.DATE_NANOS, "b") + ), + "GreatestLongEvaluator[values=[MvMax[field=Attribute[channel=0]], MvMax[field=Attribute[channel=1]]]]", + DataType.DATE_NANOS, + equalTo(1727877348000123456L) + ) + ) + ); return parameterSuppliersFromTypedData(anyNullIsNull(false, suppliers)); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/LeastTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/LeastTests.java index 69842fde90312..d95cc79dd22e0 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/LeastTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/LeastTests.java @@ -114,6 +114,21 @@ public static Iterable parameters() { ) ) ); + suppliers.add( + new TestCaseSupplier( + "(a, b)", + List.of(DataType.DATE_NANOS, DataType.DATE_NANOS), + () -> new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(1727877348000123456L, DataType.DATE_NANOS, "a"), + new TestCaseSupplier.TypedData(1727790948000987654L, DataType.DATE_NANOS, "b") + ), + "LeastLongEvaluator[values=[MvMin[field=Attribute[channel=0]], MvMin[field=Attribute[channel=1]]]]", + DataType.DATE_NANOS, + equalTo(1727790948000987654L) + ) + ) + ); return parameterSuppliersFromTypedData(anyNullIsNull(false, suppliers)); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvDedupeTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvDedupeTests.java index d8d3b607efcc0..f3b44274f3ade 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvDedupeTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvDedupeTests.java @@ -39,6 +39,7 @@ public static Iterable parameters() { booleans(cases, "mv_dedupe", "MvDedupe", (size, values) -> getMatcher(values)); bytesRefs(cases, "mv_dedupe", "MvDedupe", (size, values) -> getMatcher(values)); dateTimes(cases, "mv_dedupe", "MvDedupe", (size, values) -> getMatcher(values.mapToObj(Long::valueOf))); + dateNanos(cases, "mv_dedupe", "MvDedupe", DataType.DATE_NANOS, (size, values) -> getMatcher(values.mapToObj(Long::valueOf))); doubles(cases, "mv_dedupe", "MvDedupe", (size, values) -> getMatcher(values.mapToObj(Double::valueOf))); ints(cases, "mv_dedupe", "MvDedupe", (size, values) -> getMatcher(values.mapToObj(Integer::valueOf))); longs(cases, "mv_dedupe", "MvDedupe", (size, values) -> getMatcher(values.mapToObj(Long::valueOf))); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSliceTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSliceTests.java index e5bac422805af..859c79090d62f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSliceTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSliceTests.java @@ -182,6 +182,23 @@ private static void longs(List suppliers) { equalTo(start == end ? field.get(start) : field.subList(start, end + 1)) ); })); + + suppliers.add(new TestCaseSupplier(List.of(DataType.DATE_NANOS, DataType.INTEGER, DataType.INTEGER), () -> { + List field = randomList(1, 10, () -> randomLong()); + int length = field.size(); + int start = randomIntBetween(0, length - 1); + int end = randomIntBetween(start, length - 1); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field, DataType.DATE_NANOS, "field"), + new TestCaseSupplier.TypedData(start, DataType.INTEGER, "start"), + new TestCaseSupplier.TypedData(end, DataType.INTEGER, "end") + ), + "MvSliceLongEvaluator[field=Attribute[channel=0], start=Attribute[channel=1], end=Attribute[channel=2]]", + DataType.DATE_NANOS, + equalTo(start == end ? field.get(start) : field.subList(start, end + 1)) + ); + })); } private static void doubles(List suppliers) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSortTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSortTests.java index d07ed2aeae887..63f538059dddf 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSortTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSortTests.java @@ -110,6 +110,20 @@ private static void longs(List suppliers) { equalTo(field.size() == 1 ? field.iterator().next() : field.stream().sorted(Collections.reverseOrder()).toList()) ); })); + + suppliers.add(new TestCaseSupplier(List.of(DataType.DATE_NANOS, DataType.KEYWORD), () -> { + List field = randomList(1, 10, () -> randomLong()); + BytesRef order = new BytesRef("DESC"); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(field, DataType.DATE_NANOS, "field"), + new TestCaseSupplier.TypedData(order, DataType.KEYWORD, "order").forceLiteral() + ), + "MvSortLong[field=Attribute[channel=0], order=false]", + DataType.DATE_NANOS, + equalTo(field.size() == 1 ? field.iterator().next() : field.stream().sorted(Collections.reverseOrder()).toList()) + ); + })); } private static void doubles(List suppliers) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java index c9b6de64e079d..797c99992815e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java @@ -96,6 +96,19 @@ public static Iterable parameters() { equalTo(firstDate == null ? secondDate : firstDate) ); })); + noNullsSuppliers.add(new TestCaseSupplier(List.of(DataType.DATE_NANOS, DataType.DATE_NANOS), () -> { + Long firstDate = randomBoolean() ? null : randomNonNegativeLong(); + Long secondDate = randomNonNegativeLong(); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(firstDate, DataType.DATE_NANOS, "first"), + new TestCaseSupplier.TypedData(secondDate, DataType.DATE_NANOS, "second") + ), + "CoalesceEvaluator[values=[Attribute[channel=0], Attribute[channel=1]]]", + DataType.DATE_NANOS, + equalTo(firstDate == null ? secondDate : firstDate) + ); + })); List suppliers = new ArrayList<>(noNullsSuppliers); for (TestCaseSupplier s : noNullsSuppliers) { From 6c884e74778d08f0406d1ed324298c80b8f0dd92 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Thu, 24 Oct 2024 18:43:42 -0700 Subject: [PATCH 18/22] [8.x] Add lookup index mode (#115143) (#115596) * Add lookup index mode (#115143) This change introduces a new index mode, lookup, for indices intended for lookup operations in ES|QL. Lookup indices must have a single shard and be replicated to all data nodes by default. Aside from these requirements, they function as standard indices. Documentation will be added later when the lookup operator in ES|QL is implemented. * default shard * minimal * compile --- .../test/indices.create/10_basic.yml | 67 ++++++ .../index/LookupIndexModeIT.java | 219 ++++++++++++++++++ .../org/elasticsearch/TransportVersions.java | 1 + .../metadata/MetadataCreateIndexService.java | 16 +- .../org/elasticsearch/index/IndexMode.java | 115 ++++++++- .../monitor/metrics/IndicesMetrics.java | 2 +- .../elasticsearch/node/NodeConstruction.java | 10 +- .../indices/CreateIndexCapabilities.java | 7 +- .../index/mapper/MapperServiceTestCase.java | 2 +- .../index/engine/FollowingEngineTests.java | 3 + 10 files changed, 436 insertions(+), 6 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/index/LookupIndexModeIT.java diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/10_basic.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/10_basic.yml index 8242b7cdd29e7..d0e1759073e1b 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/10_basic.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/10_basic.yml @@ -149,3 +149,70 @@ indices.exists_alias: name: logs_2022-12-31 - is_true: '' + +--- +"Create lookup index": + - requires: + test_runner_features: [ capabilities, default_shards ] + capabilities: + - method: PUT + path: /{index} + capabilities: [ lookup_index_mode ] + reason: "Support for 'lookup' index mode capability required" + - do: + indices.create: + index: "test_lookup" + body: + settings: + index.mode: lookup + + - do: + indices.get_settings: + index: test_lookup + + - match: { test_lookup.settings.index.number_of_shards: "1"} + - match: { test_lookup.settings.index.auto_expand_replicas: "0-all"} + +--- +"Create lookup index with one shard": + - requires: + test_runner_features: [ capabilities, default_shards ] + capabilities: + - method: PUT + path: /{index} + capabilities: [ lookup_index_mode ] + reason: "Support for 'lookup' index mode capability required" + - do: + indices.create: + index: "test_lookup" + body: + settings: + index: + mode: lookup + number_of_shards: 1 + + - do: + indices.get_settings: + index: test_lookup + + - match: { test_lookup.settings.index.number_of_shards: "1"} + - match: { test_lookup.settings.index.auto_expand_replicas: "0-all"} + +--- +"Create lookup index with two shards": + - requires: + test_runner_features: [ capabilities ] + capabilities: + - method: PUT + path: /{index} + capabilities: [ lookup_index_mode ] + reason: "Support for 'lookup' index mode capability required" + - do: + catch: /illegal_argument_exception/ + indices.create: + index: test_lookup + body: + settings: + index.mode: lookup + index.number_of_shards: 2 + diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/LookupIndexModeIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/LookupIndexModeIT.java new file mode 100644 index 0000000000000..486e562faf679 --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/index/LookupIndexModeIT.java @@ -0,0 +1,219 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index; + +import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; +import org.elasticsearch.action.admin.indices.create.TransportCreateIndexAction; +import org.elasticsearch.action.admin.indices.shrink.ResizeAction; +import org.elasticsearch.action.admin.indices.shrink.ResizeRequest; +import org.elasticsearch.action.admin.indices.shrink.ResizeType; +import org.elasticsearch.action.fieldcaps.FieldCapabilitiesIndexResponse; +import org.elasticsearch.action.fieldcaps.FieldCapabilitiesRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESIntegTestCase; + +import java.util.Map; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; + +public class LookupIndexModeIT extends ESIntegTestCase { + + @Override + protected int numberOfShards() { + return 1; + } + + public void testBasic() { + internalCluster().ensureAtLeastNumDataNodes(1); + Settings.Builder lookupSettings = Settings.builder().put("index.mode", "lookup"); + if (randomBoolean()) { + lookupSettings.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1); + } + CreateIndexRequest createRequest = new CreateIndexRequest("hosts"); + createRequest.settings(lookupSettings); + createRequest.simpleMapping("ip", "type=ip", "os", "type=keyword"); + assertAcked(client().admin().indices().execute(TransportCreateIndexAction.TYPE, createRequest)); + Settings settings = client().admin().indices().prepareGetSettings("hosts").get().getIndexToSettings().get("hosts"); + assertThat(settings.get("index.mode"), equalTo("lookup")); + assertThat(settings.get("index.auto_expand_replicas"), equalTo("0-all")); + Map allHosts = Map.of( + "192.168.1.2", + "Windows", + "192.168.1.3", + "MacOS", + "192.168.1.4", + "Linux", + "192.168.1.5", + "Android", + "192.168.1.6", + "iOS", + "192.168.1.7", + "Windows", + "192.168.1.8", + "MacOS", + "192.168.1.9", + "Linux", + "192.168.1.10", + "Linux", + "192.168.1.11", + "Windows" + ); + for (Map.Entry e : allHosts.entrySet()) { + client().prepareIndex("hosts").setSource("ip", e.getKey(), "os", e.getValue()).get(); + } + refresh("hosts"); + assertAcked(client().admin().indices().prepareCreate("events").setSettings(Settings.builder().put("index.mode", "logsdb")).get()); + int numDocs = between(1, 10); + for (int i = 0; i < numDocs; i++) { + String ip = randomFrom(allHosts.keySet()); + String message = randomFrom("login", "logout", "shutdown", "restart"); + client().prepareIndex("events").setSource("@timestamp", "2024-01-01", "ip", ip, "message", message).get(); + } + refresh("events"); + // _search + { + SearchResponse resp = prepareSearch("events", "hosts").setQuery(new MatchQueryBuilder("_index_mode", "lookup")) + .setSize(10000) + .get(); + for (SearchHit hit : resp.getHits()) { + assertThat(hit.getIndex(), equalTo("hosts")); + } + assertHitCount(resp, allHosts.size()); + resp.decRef(); + } + // field_caps + { + FieldCapabilitiesRequest request = new FieldCapabilitiesRequest(); + request.indices("events", "hosts"); + request.fields("*"); + request.setMergeResults(false); + request.indexFilter(new MatchQueryBuilder("_index_mode", "lookup")); + var resp = client().fieldCaps(request).actionGet(); + assertThat(resp.getIndexResponses(), hasSize(1)); + FieldCapabilitiesIndexResponse indexResponse = resp.getIndexResponses().get(0); + assertThat(indexResponse.getIndexMode(), equalTo(IndexMode.LOOKUP)); + assertThat(indexResponse.getIndexName(), equalTo("hosts")); + } + } + + public void testRejectMoreThanOneShard() { + int numberOfShards = between(2, 5); + IllegalArgumentException error = expectThrows(IllegalArgumentException.class, () -> { + client().admin() + .indices() + .prepareCreate("hosts") + .setSettings(Settings.builder().put("index.mode", "lookup").put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numberOfShards)) + .setMapping("ip", "type=ip", "os", "type=keyword") + .get(); + }); + assertThat( + error.getMessage(), + equalTo("index with [lookup] mode must have [index.number_of_shards] set to 1 or unset; provided " + numberOfShards) + ); + } + + public void testResizeLookupIndex() { + Settings.Builder createSettings = Settings.builder().put("index.mode", "lookup"); + if (randomBoolean()) { + createSettings.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1); + } + CreateIndexRequest createIndexRequest = new CreateIndexRequest("lookup-1").settings(createSettings); + assertAcked(client().admin().indices().execute(TransportCreateIndexAction.TYPE, createIndexRequest)); + client().admin().indices().prepareAddBlock(IndexMetadata.APIBlock.WRITE, "lookup-1").get(); + + ResizeRequest clone = new ResizeRequest("lookup-2", "lookup-1"); + clone.setResizeType(ResizeType.CLONE); + assertAcked(client().admin().indices().execute(ResizeAction.INSTANCE, clone).actionGet()); + Settings settings = client().admin().indices().prepareGetSettings("lookup-2").get().getIndexToSettings().get("lookup-2"); + assertThat(settings.get("index.mode"), equalTo("lookup")); + assertThat(settings.get("index.number_of_shards"), equalTo("1")); + assertThat(settings.get("index.auto_expand_replicas"), equalTo("0-all")); + + ResizeRequest split = new ResizeRequest("lookup-3", "lookup-1"); + split.setResizeType(ResizeType.SPLIT); + split.getTargetIndexRequest().settings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 3)); + IllegalArgumentException error = expectThrows( + IllegalArgumentException.class, + () -> client().admin().indices().execute(ResizeAction.INSTANCE, split).actionGet() + ); + assertThat( + error.getMessage(), + equalTo("index with [lookup] mode must have [index.number_of_shards] set to 1 or unset; provided 3") + ); + } + + public void testResizeRegularIndexToLookup() { + String dataNode = internalCluster().startDataOnlyNode(); + assertAcked( + client().admin() + .indices() + .prepareCreate("regular-1") + .setSettings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2) + .put("index.routing.allocation.require._name", dataNode) + ) + .setMapping("ip", "type=ip", "os", "type=keyword") + .get() + ); + client().admin().indices().prepareAddBlock(IndexMetadata.APIBlock.WRITE, "regular-1").get(); + client().admin() + .indices() + .prepareUpdateSettings("regular-1") + .setSettings(Settings.builder().put("index.number_of_replicas", 0)) + .get(); + + ResizeRequest clone = new ResizeRequest("lookup-3", "regular-1"); + clone.setResizeType(ResizeType.CLONE); + clone.getTargetIndexRequest().settings(Settings.builder().put("index.mode", "lookup")); + IllegalArgumentException error = expectThrows( + IllegalArgumentException.class, + () -> client().admin().indices().execute(ResizeAction.INSTANCE, clone).actionGet() + ); + assertThat( + error.getMessage(), + equalTo("index with [lookup] mode must have [index.number_of_shards] set to 1 or unset; provided 2") + ); + + ResizeRequest shrink = new ResizeRequest("lookup-4", "regular-1"); + shrink.setResizeType(ResizeType.SHRINK); + shrink.getTargetIndexRequest() + .settings(Settings.builder().put("index.mode", "lookup").put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)); + + error = expectThrows( + IllegalArgumentException.class, + () -> client().admin().indices().execute(ResizeAction.INSTANCE, shrink).actionGet() + ); + assertThat(error.getMessage(), equalTo("can't change index.mode of index [regular-1] from [standard] to [lookup]")); + } + + public void testDoNotOverrideAutoExpandReplicas() { + internalCluster().ensureAtLeastNumDataNodes(1); + Settings.Builder createSettings = Settings.builder().put("index.mode", "lookup"); + if (randomBoolean()) { + createSettings.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1); + } + createSettings.put("index.auto_expand_replicas", "3-5"); + CreateIndexRequest createRequest = new CreateIndexRequest("hosts"); + createRequest.settings(createSettings); + createRequest.simpleMapping("ip", "type=ip", "os", "type=keyword"); + assertAcked(client().admin().indices().execute(TransportCreateIndexAction.TYPE, createRequest)); + Settings settings = client().admin().indices().prepareGetSettings("hosts").get().getIndexToSettings().get("hosts"); + assertThat(settings.get("index.mode"), equalTo("lookup")); + assertThat(settings.get("index.auto_expand_replicas"), equalTo("3-5")); + } +} diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 397dea9b99761..0e15a966b4dd1 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -181,6 +181,7 @@ static TransportVersion def(int id) { public static final TransportVersion INFERENCE_DONT_PERSIST_ON_READ = def(8_776_00_0); public static final TransportVersion SIMULATE_MAPPING_ADDITION = def(8_777_00_0); public static final TransportVersion INTRODUCE_ALL_APPLICABLE_SELECTOR = def(8_778_00_0); + public static final TransportVersion INDEX_MODE_LOOKUP = def(8_779_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java index 69e3b7b70ff82..ed029db54bf06 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java @@ -308,7 +308,12 @@ private void onlyCreateIndex( final CreateIndexClusterStateUpdateRequest request, final ActionListener listener ) { - normalizeRequestSetting(request); + try { + normalizeRequestSetting(request); + } catch (Exception e) { + listener.onFailure(e); + return; + } var delegate = new AllocationActionListener<>(listener, threadPool.getThreadContext()); submitUnbatchedTask( @@ -1599,6 +1604,15 @@ static IndexMetadata validateResize( // of if the source shards are divisible by the number of target shards IndexMetadata.getRoutingFactor(sourceMetadata.getNumberOfShards(), INDEX_NUMBER_OF_SHARDS_SETTING.get(targetIndexSettings)); } + if (targetIndexSettings.hasValue(IndexSettings.MODE.getKey())) { + IndexMode oldMode = Objects.requireNonNullElse(sourceMetadata.getIndexMode(), IndexMode.STANDARD); + IndexMode newMode = IndexSettings.MODE.get(targetIndexSettings); + if (newMode != oldMode) { + throw new IllegalArgumentException( + "can't change index.mode of index [" + sourceIndex + "] from [" + oldMode + "] to [" + newMode + "]" + ); + } + } return sourceMetadata; } diff --git a/server/src/main/java/org/elasticsearch/index/IndexMode.java b/server/src/main/java/org/elasticsearch/index/IndexMode.java index 75ec67f26dd3a..e6339344b6e5f 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexMode.java +++ b/server/src/main/java/org/elasticsearch/index/IndexMode.java @@ -9,7 +9,9 @@ package org.elasticsearch.index; +import org.elasticsearch.TransportVersions; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.metadata.MetadataCreateDataStreamService; import org.elasticsearch.cluster.routing.IndexRouting; import org.elasticsearch.common.compress.CompressedXContent; @@ -37,8 +39,10 @@ import org.elasticsearch.index.mapper.TsidExtractingIdFieldMapper; import java.io.IOException; +import java.time.Instant; import java.util.Arrays; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.function.BooleanSupplier; @@ -308,6 +312,78 @@ public SourceFieldMapper.Mode defaultSourceMode() { public String getDefaultCodec() { return CodecService.BEST_COMPRESSION_CODEC; } + }, + LOOKUP("lookup") { + @Override + void validateWithOtherSettings(Map, Object> settings) { + final Integer providedNumberOfShards = (Integer) settings.get(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING); + if (providedNumberOfShards != null && providedNumberOfShards != 1) { + throw new IllegalArgumentException( + "index with [lookup] mode must have [index.number_of_shards] set to 1 or unset; provided " + providedNumberOfShards + ); + } + } + + @Override + public void validateMapping(MappingLookup lookup) {}; + + @Override + public void validateAlias(@Nullable String indexRouting, @Nullable String searchRouting) {} + + @Override + public void validateTimestampFieldMapping(boolean isDataStream, MappingLookup mappingLookup) { + + } + + @Override + public CompressedXContent getDefaultMapping(final IndexSettings indexSettings) { + return null; + } + + @Override + public TimestampBounds getTimestampBound(IndexMetadata indexMetadata) { + return null; + } + + @Override + public MetadataFieldMapper timeSeriesIdFieldMapper() { + // non time-series indices must not have a TimeSeriesIdFieldMapper + return null; + } + + @Override + public MetadataFieldMapper timeSeriesRoutingHashFieldMapper() { + // non time-series indices must not have a TimeSeriesRoutingIdFieldMapper + return null; + } + + @Override + public IdFieldMapper idFieldMapperWithoutFieldData() { + return ProvidedIdFieldMapper.NO_FIELD_DATA; + } + + @Override + public IdFieldMapper buildIdFieldMapper(BooleanSupplier fieldDataEnabled) { + return new ProvidedIdFieldMapper(fieldDataEnabled); + } + + @Override + public DocumentDimensions buildDocumentDimensions(IndexSettings settings) { + return DocumentDimensions.Noop.INSTANCE; + } + + @Override + public boolean shouldValidateTimestamp() { + return false; + } + + @Override + public void validateSourceFieldMapper(SourceFieldMapper sourceFieldMapper) {} + + @Override + public SourceFieldMapper.Mode defaultSourceMode() { + return SourceFieldMapper.Mode.STORED; + } }; private static final String HOST_NAME = "host.name"; @@ -370,6 +446,7 @@ private static CompressedXContent createDefaultMapping(boolean includeHostName) static final List> VALIDATE_WITH_SETTINGS = List.copyOf( Stream.concat( Stream.of( + IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING, IndexMetadata.INDEX_ROUTING_PARTITION_SIZE_SETTING, IndexMetadata.INDEX_ROUTING_PATH, IndexSettings.TIME_SERIES_START_TIME, @@ -476,11 +553,12 @@ public static IndexMode fromString(String value) { case "standard" -> IndexMode.STANDARD; case "time_series" -> IndexMode.TIME_SERIES; case "logsdb" -> IndexMode.LOGSDB; + case "lookup" -> IndexMode.LOOKUP; default -> throw new IllegalArgumentException( "[" + value + "] is an invalid index mode, valid modes are: [" - + Arrays.stream(IndexMode.values()).map(IndexMode::toString).collect(Collectors.joining()) + + Arrays.stream(IndexMode.values()).map(IndexMode::toString).collect(Collectors.joining(",")) + "]" ); }; @@ -492,6 +570,7 @@ public static IndexMode readFrom(StreamInput in) throws IOException { case 0 -> STANDARD; case 1 -> TIME_SERIES; case 2 -> LOGSDB; + case 3 -> LOOKUP; default -> throw new IllegalStateException("unexpected index mode [" + mode + "]"); }; } @@ -501,6 +580,7 @@ public static void writeTo(IndexMode indexMode, StreamOutput out) throws IOExcep case STANDARD -> 0; case TIME_SERIES -> 1; case LOGSDB -> 2; + case LOOKUP -> out.getTransportVersion().onOrAfter(TransportVersions.INDEX_MODE_LOOKUP) ? 3 : 0; }; out.writeByte((byte) code); } @@ -509,4 +589,37 @@ public static void writeTo(IndexMode indexMode, StreamOutput out) throws IOExcep public String toString() { return getName(); } + + /** + * A built-in index setting provider that supplies additional index settings based on the index mode. + * Currently, only the lookup index mode provides non-empty additional settings. + */ + public static final class IndexModeSettingsProvider implements IndexSettingProvider { + @Override + public Settings getAdditionalIndexSettings( + String indexName, + String dataStreamName, + IndexMode templateIndexMode, + Metadata metadata, + Instant resolvedAt, + Settings indexTemplateAndCreateRequestSettings, + List combinedTemplateMappings + ) { + IndexMode indexMode = templateIndexMode; + if (indexMode == null) { + String modeName = indexTemplateAndCreateRequestSettings.get(IndexSettings.MODE.getKey()); + if (modeName != null) { + indexMode = IndexMode.valueOf(modeName.toUpperCase(Locale.ROOT)); + } + } + if (indexMode == LOOKUP) { + return Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-all") + .build(); + } else { + return Settings.EMPTY; + } + } + } } diff --git a/server/src/main/java/org/elasticsearch/monitor/metrics/IndicesMetrics.java b/server/src/main/java/org/elasticsearch/monitor/metrics/IndicesMetrics.java index 11df8710fad6c..ba67bc03e1441 100644 --- a/server/src/main/java/org/elasticsearch/monitor/metrics/IndicesMetrics.java +++ b/server/src/main/java/org/elasticsearch/monitor/metrics/IndicesMetrics.java @@ -55,7 +55,7 @@ public IndicesMetrics(MeterRegistry meterRegistry, IndicesService indicesService } private static List registerAsyncMetrics(MeterRegistry registry, IndicesStatsCache cache) { - final int TOTAL_METRICS = 36; + final int TOTAL_METRICS = 48; List metrics = new ArrayList<>(TOTAL_METRICS); for (IndexMode indexMode : IndexMode.values()) { String name = indexMode.getName(); diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java index 8ee51044e5f88..cce9d0cb8cdc4 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java +++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java @@ -80,6 +80,7 @@ import org.elasticsearch.common.settings.SettingsModule; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.PageCacheRecycler; +import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.core.IOUtils; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.core.TimeValue; @@ -108,6 +109,7 @@ import org.elasticsearch.health.node.tracker.RepositoriesHealthTracker; import org.elasticsearch.health.stats.HealthApiStats; import org.elasticsearch.http.HttpServerTransport; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexSettingProvider; import org.elasticsearch.index.IndexSettingProviders; import org.elasticsearch.index.IndexingPressure; @@ -820,7 +822,10 @@ private void construct( final var parameters = new IndexSettingProvider.Parameters(indicesService::createIndexMapperServiceForValidation); IndexSettingProviders indexSettingProviders = new IndexSettingProviders( - pluginsService.flatMap(p -> p.getAdditionalIndexSettingProviders(parameters)).collect(Collectors.toSet()) + Sets.union( + builtinIndexSettingProviders(), + pluginsService.flatMap(p -> p.getAdditionalIndexSettingProviders(parameters)).collect(Collectors.toSet()) + ) ); final ShardLimitValidator shardLimitValidator = new ShardLimitValidator(settings, clusterService); @@ -1653,4 +1658,7 @@ private Module loadPersistentTasksService( }; } + private Set builtinIndexSettingProviders() { + return Set.of(new IndexMode.IndexModeSettingsProvider()); + } } diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java index 899486399af6b..900a352d42f30 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java @@ -21,5 +21,10 @@ public class CreateIndexCapabilities { */ private static final String LOGSDB_INDEX_MODE_CAPABILITY = "logsdb_index_mode"; - public static Set CAPABILITIES = Set.of(LOGSDB_INDEX_MODE_CAPABILITY); + /** + * Support lookup index mode + */ + private static final String LOOKUP_INDEX_MODE_CAPABILITY = "lookup_index_mode"; + + public static Set CAPABILITIES = Set.of(LOGSDB_INDEX_MODE_CAPABILITY, LOOKUP_INDEX_MODE_CAPABILITY); } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index da04f30ff8023..3960aa5a91cc5 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -142,7 +142,7 @@ protected static String randomIndexOptions() { protected final DocumentMapper createDocumentMapper(XContentBuilder mappings, IndexMode indexMode) throws IOException { return switch (indexMode) { - case STANDARD -> createDocumentMapper(mappings); + case STANDARD, LOOKUP -> createDocumentMapper(mappings); case TIME_SERIES -> createTimeSeriesModeDocumentMapper(mappings); case LOGSDB -> createLogsModeDocumentMapper(mappings); }; diff --git a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngineTests.java b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngineTests.java index 478a0d08d6612..150eddf039cec 100644 --- a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngineTests.java +++ b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngineTests.java @@ -700,6 +700,9 @@ public void testProcessOnceOnPrimary() throws Exception { case LOGSDB: settingsBuilder.put("index.mode", IndexMode.LOGSDB.getName()); break; + case LOOKUP: + settingsBuilder.put("index.mode", IndexMode.LOOKUP.getName()); + break; default: throw new UnsupportedOperationException("Unknown index mode [" + indexMode + "]"); } From 1db03c480fdb60f2cea48056435c8c447309ab85 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas <131142368+kkrik-es@users.noreply.github.com> Date: Fri, 25 Oct 2024 08:22:04 +0300 Subject: [PATCH 19/22] [8.x] Propagate root subobjects setting to downsample indexes (#115358) (#115577) * Propagate root subobjects setting to downsample indexes (#115358) * Propagate root subobjects setting to downsample indexes * exclude tests from rest compat * remove subobjects propagation (cherry picked from commit 5c1a3ada8ae7a790dfd8460c76c6a341d9d42b7a) * Update build.gradle --- .../downsample/DownsampleWithBasicRestIT.java | 40 ++ .../test/downsample/10_basic.yml | 466 +++++++++--------- 2 files changed, 278 insertions(+), 228 deletions(-) create mode 100644 x-pack/plugin/downsample/qa/rest/src/yamlRestTest/java/org/elasticsearch/xpack/downsample/DownsampleWithBasicRestIT.java diff --git a/x-pack/plugin/downsample/qa/rest/src/yamlRestTest/java/org/elasticsearch/xpack/downsample/DownsampleWithBasicRestIT.java b/x-pack/plugin/downsample/qa/rest/src/yamlRestTest/java/org/elasticsearch/xpack/downsample/DownsampleWithBasicRestIT.java new file mode 100644 index 0000000000000..8f75e76315844 --- /dev/null +++ b/x-pack/plugin/downsample/qa/rest/src/yamlRestTest/java/org/elasticsearch/xpack/downsample/DownsampleWithBasicRestIT.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.downsample; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.local.distribution.DistributionType; +import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; +import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; +import org.junit.ClassRule; + +public class DownsampleWithBasicRestIT extends ESClientYamlSuiteTestCase { + + @ClassRule + public static ElasticsearchCluster cluster = ElasticsearchCluster.local() + .distribution(DistributionType.DEFAULT) + .setting("xpack.security.enabled", "false") + .build(); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + + public DownsampleWithBasicRestIT(final ClientYamlTestCandidate testCandidate) { + super(testCandidate); + } + + @ParametersFactory + public static Iterable parameters() throws Exception { + return ESClientYamlSuiteTestCase.createParameters(); + } + +} diff --git a/x-pack/plugin/downsample/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/downsample/10_basic.yml b/x-pack/plugin/downsample/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/downsample/10_basic.yml index 0bcd35cc69038..fa3560bec516e 100644 --- a/x-pack/plugin/downsample/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/downsample/10_basic.yml +++ b/x-pack/plugin/downsample/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/downsample/10_basic.yml @@ -16,6 +16,7 @@ setup: start_time: 2021-04-28T00:00:00Z end_time: 2021-04-29T00:00:00Z mappings: + subobjects: false properties: "@timestamp": type: date @@ -106,6 +107,7 @@ setup: start_time: 2021-04-28T00:00:00Z end_time: 2021-04-29T00:00:00Z mappings: + subobjects: false properties: "@timestamp": type: date @@ -172,6 +174,7 @@ setup: start_time: 2021-04-28T00:00:00Z end_time: 2021-04-29T00:00:00Z mappings: + subobjects: false properties: "@timestamp": type: date @@ -237,6 +240,7 @@ setup: start_time: 2021-04-28T00:00:00Z end_time: 2021-04-29T00:00:00Z mappings: + subobjects: false properties: "@timestamp": type: date @@ -318,29 +322,29 @@ setup: - length: { hits.hits: 4 } - match: { hits.hits.0._source._doc_count: 2 } - - match: { hits.hits.0._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } + - match: { hits.hits.0._source.k8s\.pod\.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } - match: { hits.hits.0._source.metricset: pod } - match: { hits.hits.0._source.@timestamp: 2021-04-28T18:00:00.000Z } - - match: { hits.hits.0._source.k8s.pod.multi-counter: 0 } - - match: { hits.hits.0._source.k8s.pod.scaled-counter: 0.00 } - - match: { hits.hits.0._source.k8s.pod.multi-gauge.min: 100 } - - match: { hits.hits.0._source.k8s.pod.multi-gauge.max: 102 } - - match: { hits.hits.0._source.k8s.pod.multi-gauge.sum: 607 } - - match: { hits.hits.0._source.k8s.pod.multi-gauge.value_count: 6 } - - match: { hits.hits.0._source.k8s.pod.scaled-gauge.min: 100.0 } - - match: { hits.hits.0._source.k8s.pod.scaled-gauge.max: 101.0 } - - match: { hits.hits.0._source.k8s.pod.scaled-gauge.sum: 201.0 } - - match: { hits.hits.0._source.k8s.pod.scaled-gauge.value_count: 2 } - - match: { hits.hits.0._source.k8s.pod.network.tx.min: 1434521831 } - - match: { hits.hits.0._source.k8s.pod.network.tx.max: 1434577921 } - - match: { hits.hits.0._source.k8s.pod.network.tx.value_count: 2 } - - match: { hits.hits.0._source.k8s.pod.ip: "10.10.55.56" } - - match: { hits.hits.0._source.k8s.pod.created_at: "2021-04-28T19:43:00.000Z" } - - match: { hits.hits.0._source.k8s.pod.number_of_containers: 1 } - - match: { hits.hits.0._source.k8s.pod.tags: ["backend", "test", "us-west2"] } - - match: { hits.hits.0._source.k8s.pod.values: [1, 1, 2] } - - is_false: hits.hits.0._source.k8s.pod.running + - match: { hits.hits.0._source.k8s\.pod\.multi-counter: 0 } + - match: { hits.hits.0._source.k8s\.pod\.scaled-counter: 0.00 } + - match: { hits.hits.0._source.k8s\.pod\.multi-gauge.min: 100 } + - match: { hits.hits.0._source.k8s\.pod\.multi-gauge.max: 102 } + - match: { hits.hits.0._source.k8s\.pod\.multi-gauge.sum: 607 } + - match: { hits.hits.0._source.k8s\.pod\.multi-gauge.value_count: 6 } + - match: { hits.hits.0._source.k8s\.pod\.scaled-gauge.min: 100.0 } + - match: { hits.hits.0._source.k8s\.pod\.scaled-gauge.max: 101.0 } + - match: { hits.hits.0._source.k8s\.pod\.scaled-gauge.sum: 201.0 } + - match: { hits.hits.0._source.k8s\.pod\.scaled-gauge.value_count: 2 } + - match: { hits.hits.0._source.k8s\.pod\.network\.tx.min: 1434521831 } + - match: { hits.hits.0._source.k8s\.pod\.network\.tx.max: 1434577921 } + - match: { hits.hits.0._source.k8s\.pod\.network\.tx.value_count: 2 } + - match: { hits.hits.0._source.k8s\.pod\.ip: "10.10.55.56" } + - match: { hits.hits.0._source.k8s\.pod\.created_at: "2021-04-28T19:43:00.000Z" } + - match: { hits.hits.0._source.k8s\.pod\.number_of_containers: 1 } + - match: { hits.hits.0._source.k8s\.pod\.tags: ["backend", "test", "us-west2"] } + - match: { hits.hits.0._source.k8s\.pod\.values: [1, 1, 2] } + - is_false: hits.hits.0._source.k8s\.pod\.running # Assert rollup index settings - do: @@ -362,21 +366,21 @@ setup: - match: { test-downsample.mappings.properties.@timestamp.type: date } - match: { test-downsample.mappings.properties.@timestamp.meta.fixed_interval: 1h } - match: { test-downsample.mappings.properties.@timestamp.meta.time_zone: UTC } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.multi-gauge.type: aggregate_metric_double } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.multi-gauge.metrics: [ "min", "max", "sum", "value_count" ] } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.multi-gauge.default_metric: max } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.multi-gauge.time_series_metric: gauge } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.multi-counter.type: long } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.multi-counter.time_series_metric: counter } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.scaled-counter.type: scaled_float } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.scaled-counter.scaling_factor: 100 } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.scaled-counter.time_series_metric: counter } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.scaled-gauge.type: aggregate_metric_double } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.scaled-gauge.metrics: [ "min", "max", "sum", "value_count" ] } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.scaled-gauge.default_metric: max } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.scaled-gauge.time_series_metric: gauge } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.uid.type: keyword } - - match: { test-downsample.mappings.properties.k8s.properties.pod.properties.uid.time_series_dimension: true } + - match: { test-downsample.mappings.properties.k8s\.pod\.multi-gauge.type: aggregate_metric_double } + - match: { test-downsample.mappings.properties.k8s\.pod\.multi-gauge.metrics: [ "min", "max", "sum", "value_count" ] } + - match: { test-downsample.mappings.properties.k8s\.pod\.multi-gauge.default_metric: max } + - match: { test-downsample.mappings.properties.k8s\.pod\.multi-gauge.time_series_metric: gauge } + - match: { test-downsample.mappings.properties.k8s\.pod\.multi-counter.type: long } + - match: { test-downsample.mappings.properties.k8s\.pod\.multi-counter.time_series_metric: counter } + - match: { test-downsample.mappings.properties.k8s\.pod\.scaled-counter.type: scaled_float } + - match: { test-downsample.mappings.properties.k8s\.pod\.scaled-counter.scaling_factor: 100 } + - match: { test-downsample.mappings.properties.k8s\.pod\.scaled-counter.time_series_metric: counter } + - match: { test-downsample.mappings.properties.k8s\.pod\.scaled-gauge.type: aggregate_metric_double } + - match: { test-downsample.mappings.properties.k8s\.pod\.scaled-gauge.metrics: [ "min", "max", "sum", "value_count" ] } + - match: { test-downsample.mappings.properties.k8s\.pod\.scaled-gauge.default_metric: max } + - match: { test-downsample.mappings.properties.k8s\.pod\.scaled-gauge.time_series_metric: gauge } + - match: { test-downsample.mappings.properties.k8s\.pod\.uid.type: keyword } + - match: { test-downsample.mappings.properties.k8s\.pod\.uid.time_series_dimension: true } # Assert source index has not been deleted @@ -763,18 +767,18 @@ setup: - match: { test-downsample-2.mappings.properties.@timestamp.type: date } - match: { test-downsample-2.mappings.properties.@timestamp.meta.fixed_interval: 2h } - match: { test-downsample-2.mappings.properties.@timestamp.meta.time_zone: UTC } - - match: { test-downsample-2.mappings.properties.k8s.properties.pod.properties.multi-gauge.type: aggregate_metric_double } - - match: { test-downsample-2.mappings.properties.k8s.properties.pod.properties.multi-gauge.metrics: [ "min", "max", "sum", "value_count" ] } - - match: { test-downsample-2.mappings.properties.k8s.properties.pod.properties.multi-gauge.default_metric: max } - - match: { test-downsample-2.mappings.properties.k8s.properties.pod.properties.multi-gauge.time_series_metric: gauge } - - match: { test-downsample-2.mappings.properties.k8s.properties.pod.properties.multi-counter.type: long } - - match: { test-downsample-2.mappings.properties.k8s.properties.pod.properties.multi-counter.time_series_metric: counter } - - match: { test-downsample-2.mappings.properties.k8s.properties.pod.properties.uid.type: keyword } - - match: { test-downsample-2.mappings.properties.k8s.properties.pod.properties.uid.time_series_dimension: true } - - match: { test-downsample-2.mappings.properties.k8s.properties.pod.properties.network.properties.tx.type: aggregate_metric_double } - - match: { test-downsample-2.mappings.properties.k8s.properties.pod.properties.network.properties.tx.metrics: [ "min", "max", "sum", "value_count" ] } - - match: { test-downsample-2.mappings.properties.k8s.properties.pod.properties.network.properties.tx.default_metric: max } - - match: { test-downsample-2.mappings.properties.k8s.properties.pod.properties.network.properties.tx.time_series_metric: gauge } + - match: { test-downsample-2.mappings.properties.k8s\.pod\.multi-gauge.type: aggregate_metric_double } + - match: { test-downsample-2.mappings.properties.k8s\.pod\.multi-gauge.metrics: [ "min", "max", "sum", "value_count" ] } + - match: { test-downsample-2.mappings.properties.k8s\.pod\.multi-gauge.default_metric: max } + - match: { test-downsample-2.mappings.properties.k8s\.pod\.multi-gauge.time_series_metric: gauge } + - match: { test-downsample-2.mappings.properties.k8s\.pod\.multi-counter.type: long } + - match: { test-downsample-2.mappings.properties.k8s\.pod\.multi-counter.time_series_metric: counter } + - match: { test-downsample-2.mappings.properties.k8s\.pod\.uid.type: keyword } + - match: { test-downsample-2.mappings.properties.k8s\.pod\.uid.time_series_dimension: true } + - match: { test-downsample-2.mappings.properties.k8s\.pod\.network\.tx.type: aggregate_metric_double } + - match: { test-downsample-2.mappings.properties.k8s\.pod\.network\.tx.metrics: [ "min", "max", "sum", "value_count" ] } + - match: { test-downsample-2.mappings.properties.k8s\.pod\.network\.tx.default_metric: max } + - match: { test-downsample-2.mappings.properties.k8s\.pod\.network\.tx.time_series_metric: gauge } - do: search: @@ -784,29 +788,29 @@ setup: - length: { hits.hits: 3 } - match: { hits.hits.0._source._doc_count: 4 } - - match: { hits.hits.0._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } + - match: { hits.hits.0._source.k8s\.pod\.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } - match: { hits.hits.0._source.metricset: pod } - match: { hits.hits.0._source.@timestamp: 2021-04-28T18:00:00.000Z } - - match: { hits.hits.0._source.k8s.pod.multi-counter: 76 } - - match: { hits.hits.0._source.k8s.pod.multi-gauge.min: 95.0 } - - match: { hits.hits.0._source.k8s.pod.multi-gauge.max: 110.0 } - - match: { hits.hits.0._source.k8s.pod.multi-gauge.sum: 1209.0 } - - match: { hits.hits.0._source.k8s.pod.multi-gauge.value_count: 12 } - - match: { hits.hits.0._source.k8s.pod.network.tx.min: 1434521831 } - - match: { hits.hits.0._source.k8s.pod.network.tx.max: 1434595272 } - - match: { hits.hits.0._source.k8s.pod.network.tx.value_count: 4 } - - match: { hits.hits.0._source.k8s.pod.ip: "10.10.55.120" } - - match: { hits.hits.0._source.k8s.pod.created_at: "2021-04-28T19:45:00.000Z" } - - match: { hits.hits.0._source.k8s.pod.number_of_containers: 1 } - - match: { hits.hits.0._source.k8s.pod.tags: [ "backend", "test", "us-west1" ] } - - match: { hits.hits.0._source.k8s.pod.values: [ 1, 2, 3 ] } - - - match: { hits.hits.1._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.0._source.k8s\.pod\.multi-counter: 76 } + - match: { hits.hits.0._source.k8s\.pod\.multi-gauge.min: 95.0 } + - match: { hits.hits.0._source.k8s\.pod\.multi-gauge.max: 110.0 } + - match: { hits.hits.0._source.k8s\.pod\.multi-gauge.sum: 1209.0 } + - match: { hits.hits.0._source.k8s\.pod\.multi-gauge.value_count: 12 } + - match: { hits.hits.0._source.k8s\.pod\.network\.tx.min: 1434521831 } + - match: { hits.hits.0._source.k8s\.pod\.network\.tx.max: 1434595272 } + - match: { hits.hits.0._source.k8s\.pod\.network\.tx.value_count: 4 } + - match: { hits.hits.0._source.k8s\.pod\.ip: "10.10.55.120" } + - match: { hits.hits.0._source.k8s\.pod\.created_at: "2021-04-28T19:45:00.000Z" } + - match: { hits.hits.0._source.k8s\.pod\.number_of_containers: 1 } + - match: { hits.hits.0._source.k8s\.pod\.tags: [ "backend", "test", "us-west1" ] } + - match: { hits.hits.0._source.k8s\.pod\.values: [ 1, 2, 3 ] } + + - match: { hits.hits.1._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.1._source.metricset: pod } - match: { hits.hits.1._source.@timestamp: 2021-04-28T18:00:00.000Z } - match: { hits.hits.1._source._doc_count: 2 } - - match: { hits.hits.2._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.2._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.2._source.metricset: pod } - match: { hits.hits.2._source.@timestamp: 2021-04-28T20:00:00.000Z } - match: { hits.hits.2._source._doc_count: 2 } @@ -890,16 +894,16 @@ setup: - match: { test-downsample-histogram.mappings.properties.@timestamp.type: date } - match: { test-downsample-histogram.mappings.properties.@timestamp.meta.fixed_interval: 1h } - match: { test-downsample-histogram.mappings.properties.@timestamp.meta.time_zone: UTC } - - match: { test-downsample-histogram.mappings.properties.k8s.properties.pod.properties.latency.type: histogram } - - match: { test-downsample-histogram.mappings.properties.k8s.properties.pod.properties.latency.time_series_metric: null } - - match: { test-downsample-histogram.mappings.properties.k8s.properties.pod.properties.empty-histogram.type: histogram } - - match: { test-downsample-histogram.mappings.properties.k8s.properties.pod.properties.empty-histogram.time_series_metric: null } - - match: { test-downsample-histogram.mappings.properties.k8s.properties.pod.properties.uid.type: keyword } - - match: { test-downsample-histogram.mappings.properties.k8s.properties.pod.properties.uid.time_series_dimension: true } - - match: { test-downsample-histogram.mappings.properties.k8s.properties.pod.properties.network.properties.tx.type: aggregate_metric_double } - - match: { test-downsample-histogram.mappings.properties.k8s.properties.pod.properties.network.properties.tx.metrics: [ "min", "max", "sum", "value_count" ] } - - match: { test-downsample-histogram.mappings.properties.k8s.properties.pod.properties.network.properties.tx.default_metric: max } - - match: { test-downsample-histogram.mappings.properties.k8s.properties.pod.properties.network.properties.tx.time_series_metric: gauge } + - match: { test-downsample-histogram.mappings.properties.k8s\.pod\.latency.type: histogram } + - match: { test-downsample-histogram.mappings.properties.k8s\.pod\.latency.time_series_metric: null } + - match: { test-downsample-histogram.mappings.properties.k8s\.pod\.empty-histogram.type: histogram } + - match: { test-downsample-histogram.mappings.properties.k8s\.pod\.empty-histogram.time_series_metric: null } + - match: { test-downsample-histogram.mappings.properties.k8s\.pod\.uid.type: keyword } + - match: { test-downsample-histogram.mappings.properties.k8s\.pod\.uid.time_series_dimension: true } + - match: { test-downsample-histogram.mappings.properties.k8s\.pod\.network\.tx.type: aggregate_metric_double } + - match: { test-downsample-histogram.mappings.properties.k8s\.pod\.network\.tx.metrics: [ "min", "max", "sum", "value_count" ] } + - match: { test-downsample-histogram.mappings.properties.k8s\.pod\.network\.tx.default_metric: max } + - match: { test-downsample-histogram.mappings.properties.k8s\.pod\.network\.tx.time_series_metric: gauge } - do: search: @@ -910,64 +914,64 @@ setup: - length: { hits.hits: 4 } - match: { hits.hits.0._source._doc_count: 2 } - - match: { hits.hits.0._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } + - match: { hits.hits.0._source.k8s\.pod\.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } - match: { hits.hits.0._source.metricset: pod } - match: { hits.hits.0._source.@timestamp: 2021-04-28T18:00:00.000Z } - - length: { hits.hits.0._source.k8s.pod.latency.counts: 4 } - - match: { hits.hits.0._source.k8s.pod.latency.counts.0: 2 } - - match: { hits.hits.0._source.k8s.pod.latency.counts.1: 2 } - - match: { hits.hits.0._source.k8s.pod.latency.counts.2: 8 } - - match: { hits.hits.0._source.k8s.pod.latency.counts.3: 8 } - - length: { hits.hits.0._source.k8s.pod.latency.values: 4 } - - match: { hits.hits.0._source.k8s.pod.latency.values.0: 1.0 } - - match: { hits.hits.0._source.k8s.pod.latency.values.1: 10.0 } - - match: { hits.hits.0._source.k8s.pod.latency.values.2: 100.0 } - - match: { hits.hits.0._source.k8s.pod.latency.values.3: 1000.0 } + - length: { hits.hits.0._source.k8s\.pod\.latency.counts: 4 } + - match: { hits.hits.0._source.k8s\.pod\.latency.counts.0: 2 } + - match: { hits.hits.0._source.k8s\.pod\.latency.counts.1: 2 } + - match: { hits.hits.0._source.k8s\.pod\.latency.counts.2: 8 } + - match: { hits.hits.0._source.k8s\.pod\.latency.counts.3: 8 } + - length: { hits.hits.0._source.k8s\.pod\.latency.values: 4 } + - match: { hits.hits.0._source.k8s\.pod\.latency.values.0: 1.0 } + - match: { hits.hits.0._source.k8s\.pod\.latency.values.1: 10.0 } + - match: { hits.hits.0._source.k8s\.pod\.latency.values.2: 100.0 } + - match: { hits.hits.0._source.k8s\.pod\.latency.values.3: 1000.0 } - match: { hits.hits.1._source._doc_count: 1 } - - match: { hits.hits.1._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } + - match: { hits.hits.1._source.k8s\.pod\.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } - match: { hits.hits.1._source.metricset: pod } - match: { hits.hits.1._source.@timestamp: 2021-04-28T19:00:00.000Z } - - length: { hits.hits.1._source.k8s.pod.latency.counts: 4 } - - match: { hits.hits.1._source.k8s.pod.latency.counts.0: 4 } - - match: { hits.hits.1._source.k8s.pod.latency.counts.1: 5 } - - match: { hits.hits.1._source.k8s.pod.latency.counts.2: 4 } - - match: { hits.hits.1._source.k8s.pod.latency.counts.3: 13 } - - length: { hits.hits.1._source.k8s.pod.latency.values: 4 } - - match: { hits.hits.1._source.k8s.pod.latency.values.0: 1.0 } - - match: { hits.hits.1._source.k8s.pod.latency.values.1: 10.0 } - - match: { hits.hits.1._source.k8s.pod.latency.values.2: 100.0 } - - match: { hits.hits.1._source.k8s.pod.latency.values.3: 1000.0 } + - length: { hits.hits.1._source.k8s\.pod\.latency.counts: 4 } + - match: { hits.hits.1._source.k8s\.pod\.latency.counts.0: 4 } + - match: { hits.hits.1._source.k8s\.pod\.latency.counts.1: 5 } + - match: { hits.hits.1._source.k8s\.pod\.latency.counts.2: 4 } + - match: { hits.hits.1._source.k8s\.pod\.latency.counts.3: 13 } + - length: { hits.hits.1._source.k8s\.pod\.latency.values: 4 } + - match: { hits.hits.1._source.k8s\.pod\.latency.values.0: 1.0 } + - match: { hits.hits.1._source.k8s\.pod\.latency.values.1: 10.0 } + - match: { hits.hits.1._source.k8s\.pod\.latency.values.2: 100.0 } + - match: { hits.hits.1._source.k8s\.pod\.latency.values.3: 1000.0 } - match: { hits.hits.2._source._doc_count: 2 } - - match: { hits.hits.2._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.2._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.2._source.metricset: pod } - match: { hits.hits.2._source.@timestamp: 2021-04-28T18:00:00.000Z } - - length: { hits.hits.2._source.k8s.pod.latency.counts: 4 } - - match: { hits.hits.2._source.k8s.pod.latency.counts.0: 8 } - - match: { hits.hits.2._source.k8s.pod.latency.counts.1: 7 } - - match: { hits.hits.2._source.k8s.pod.latency.counts.2: 10 } - - match: { hits.hits.2._source.k8s.pod.latency.counts.3: 12 } - - length: { hits.hits.2._source.k8s.pod.latency.values: 4 } - - match: { hits.hits.2._source.k8s.pod.latency.values.0: 1.0 } - - match: { hits.hits.2._source.k8s.pod.latency.values.1: 2.0 } - - match: { hits.hits.2._source.k8s.pod.latency.values.2: 5.0 } - - match: { hits.hits.2._source.k8s.pod.latency.values.3: 10.0 } + - length: { hits.hits.2._source.k8s\.pod\.latency.counts: 4 } + - match: { hits.hits.2._source.k8s\.pod\.latency.counts.0: 8 } + - match: { hits.hits.2._source.k8s\.pod\.latency.counts.1: 7 } + - match: { hits.hits.2._source.k8s\.pod\.latency.counts.2: 10 } + - match: { hits.hits.2._source.k8s\.pod\.latency.counts.3: 12 } + - length: { hits.hits.2._source.k8s\.pod\.latency.values: 4 } + - match: { hits.hits.2._source.k8s\.pod\.latency.values.0: 1.0 } + - match: { hits.hits.2._source.k8s\.pod\.latency.values.1: 2.0 } + - match: { hits.hits.2._source.k8s\.pod\.latency.values.2: 5.0 } + - match: { hits.hits.2._source.k8s\.pod\.latency.values.3: 10.0 } - match: { hits.hits.3._source._doc_count: 2 } - - match: { hits.hits.3._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.3._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.3._source.metricset: pod } - match: { hits.hits.3._source.@timestamp: 2021-04-28T19:00:00.000Z } - - length: { hits.hits.3._source.k8s.pod.latency.counts: 4 } - - match: { hits.hits.3._source.k8s.pod.latency.counts.0: 7 } - - match: { hits.hits.3._source.k8s.pod.latency.counts.1: 15 } - - match: { hits.hits.3._source.k8s.pod.latency.counts.2: 10 } - - match: { hits.hits.3._source.k8s.pod.latency.counts.3: 10 } - - length: { hits.hits.3._source.k8s.pod.latency.values: 4 } - - match: { hits.hits.3._source.k8s.pod.latency.values.0: 1.0 } - - match: { hits.hits.3._source.k8s.pod.latency.values.1: 2.0 } - - match: { hits.hits.3._source.k8s.pod.latency.values.2: 5.0 } - - match: { hits.hits.3._source.k8s.pod.latency.values.3: 10.0 } + - length: { hits.hits.3._source.k8s\.pod\.latency.counts: 4 } + - match: { hits.hits.3._source.k8s\.pod\.latency.counts.0: 7 } + - match: { hits.hits.3._source.k8s\.pod\.latency.counts.1: 15 } + - match: { hits.hits.3._source.k8s\.pod\.latency.counts.2: 10 } + - match: { hits.hits.3._source.k8s\.pod\.latency.counts.3: 10 } + - length: { hits.hits.3._source.k8s\.pod\.latency.values: 4 } + - match: { hits.hits.3._source.k8s\.pod\.latency.values.0: 1.0 } + - match: { hits.hits.3._source.k8s\.pod\.latency.values.1: 2.0 } + - match: { hits.hits.3._source.k8s\.pod\.latency.values.2: 5.0 } + - match: { hits.hits.3._source.k8s\.pod\.latency.values.3: 10.0 } --- "Downsample date_nanos timestamp field using custom format": @@ -988,6 +992,7 @@ setup: start_time: 2023-02-23T00:00:00Z end_time: 2023-02-24T00:00:00Z mappings: + subobjects: false properties: "@timestamp": type: date_nanos @@ -1048,19 +1053,19 @@ setup: - length: { hits.hits: 2 } - match: { hits.hits.0._source._doc_count: 3 } - - match: { hits.hits.0._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.0._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.0._source.metricset: pod } - match: { hits.hits.0._source.@timestamp: 2023-02-23T12:00:00.000000000Z } - - match: { hits.hits.0._source.k8s.pod.value.min: 8.0 } - - match: { hits.hits.0._source.k8s.pod.value.max: 12.0 } - - match: { hits.hits.0._source.k8s.pod.value.sum: 30.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.min: 8.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.max: 12.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.sum: 30.0 } - match: { hits.hits.1._source._doc_count: 1 } - - match: { hits.hits.1._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.1._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.1._source.metricset: pod } - match: { hits.hits.1._source.@timestamp: 2023-02-23T13:00:00.000000000Z } - - match: { hits.hits.1._source.k8s.pod.value.min: 9.0 } - - match: { hits.hits.1._source.k8s.pod.value.max: 9.0 } - - match: { hits.hits.1._source.k8s.pod.value.sum: 9.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.min: 9.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.max: 9.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.sum: 9.0 } - do: indices.get_mapping: @@ -1090,6 +1095,7 @@ setup: start_time: 2023-02-23T00:00:00Z end_time: 2023-02-24T00:00:00Z mappings: + subobjects: false properties: "@timestamp": type: date @@ -1150,19 +1156,19 @@ setup: - length: { hits.hits: 2 } - match: { hits.hits.0._source._doc_count: 3 } - - match: { hits.hits.0._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.0._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.0._source.metricset: pod } - match: { hits.hits.0._source.@timestamp: 2023-02-23T12:00:00.000Z } - - match: { hits.hits.0._source.k8s.pod.value.min: 8.0 } - - match: { hits.hits.0._source.k8s.pod.value.max: 12.0 } - - match: { hits.hits.0._source.k8s.pod.value.sum: 30.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.min: 8.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.max: 12.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.sum: 30.0 } - match: { hits.hits.1._source._doc_count: 1 } - - match: { hits.hits.1._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.1._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.1._source.metricset: pod } - match: { hits.hits.1._source.@timestamp: 2023-02-23T13:00:00.000Z } - - match: { hits.hits.1._source.k8s.pod.value.min: 9.0 } - - match: { hits.hits.1._source.k8s.pod.value.max: 9.0 } - - match: { hits.hits.1._source.k8s.pod.value.sum: 9.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.min: 9.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.max: 9.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.sum: 9.0 } - do: indices.get_mapping: @@ -1192,6 +1198,7 @@ setup: start_time: 2023-02-23T00:00:00Z end_time: 2023-02-27T00:00:00Z mappings: + subobjects: false properties: "@timestamp": type: date @@ -1251,33 +1258,33 @@ setup: - length: { hits.hits: 4 } - match: { hits.hits.0._source._doc_count: 1 } - - match: { hits.hits.0._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.0._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.0._source.metricset: pod } - match: { hits.hits.0._source.@timestamp: 2023-02-23 } - - match: { hits.hits.0._source.k8s.pod.value.min: 10.0 } - - match: { hits.hits.0._source.k8s.pod.value.max: 10.0 } - - match: { hits.hits.0._source.k8s.pod.value.sum: 10.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.min: 10.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.max: 10.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.sum: 10.0 } - match: { hits.hits.1._source._doc_count: 1 } - - match: { hits.hits.1._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.1._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.1._source.metricset: pod } - match: { hits.hits.1._source.@timestamp: 2023-02-24 } - - match: { hits.hits.1._source.k8s.pod.value.min: 12.0 } - - match: { hits.hits.1._source.k8s.pod.value.max: 12.0 } - - match: { hits.hits.1._source.k8s.pod.value.sum: 12.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.min: 12.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.max: 12.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.sum: 12.0 } - match: { hits.hits.2._source._doc_count: 1 } - - match: { hits.hits.2._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.2._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.2._source.metricset: pod } - match: { hits.hits.2._source.@timestamp: 2023-02-25 } - - match: { hits.hits.2._source.k8s.pod.value.min: 8.0 } - - match: { hits.hits.2._source.k8s.pod.value.max: 8.0 } - - match: { hits.hits.2._source.k8s.pod.value.sum: 8.0 } + - match: { hits.hits.2._source.k8s\.pod\.value.min: 8.0 } + - match: { hits.hits.2._source.k8s\.pod\.value.max: 8.0 } + - match: { hits.hits.2._source.k8s\.pod\.value.sum: 8.0 } - match: { hits.hits.3._source._doc_count: 1 } - - match: { hits.hits.3._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.3._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.3._source.metricset: pod } - match: { hits.hits.3._source.@timestamp: 2023-02-26 } - - match: { hits.hits.3._source.k8s.pod.value.min: 9.0 } - - match: { hits.hits.3._source.k8s.pod.value.max: 9.0 } - - match: { hits.hits.3._source.k8s.pod.value.sum: 9.0 } + - match: { hits.hits.3._source.k8s\.pod\.value.min: 9.0 } + - match: { hits.hits.3._source.k8s\.pod\.value.max: 9.0 } + - match: { hits.hits.3._source.k8s\.pod\.value.sum: 9.0 } --- "Downsample object field": @@ -1304,48 +1311,48 @@ setup: - length: { hits.hits: 4 } - match: { hits.hits.0._source._doc_count: 2 } - - match: { hits.hits.0._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } + - match: { hits.hits.0._source.k8s\.pod\.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } - match: { hits.hits.0._source.metricset: pod } - match: { hits.hits.0._source.@timestamp: "2021-04-28T18:00:00.000Z" } - - match: { hits.hits.0._source.k8s.pod.name: "dog" } - - match: { hits.hits.0._source.k8s.pod.value.min: 9.0 } - - match: { hits.hits.0._source.k8s.pod.value.max: 16.0 } - - match: { hits.hits.0._source.k8s.pod.value.sum: 25.0 } - - match: { hits.hits.0._source.k8s.pod.agent.id: "second" } - - match: { hits.hits.0._source.k8s.pod.agent.version: "2.1.7" } + - match: { hits.hits.0._source.k8s\.pod\.name: "dog" } + - match: { hits.hits.0._source.k8s\.pod\.value.min: 9.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.max: 16.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.sum: 25.0 } + - match: { hits.hits.0._source.k8s\.pod\.agent\.id: "second" } + - match: { hits.hits.0._source.k8s\.pod\.agent\.version: "2.1.7" } - match: { hits.hits.1._source._doc_count: 2 } - - match: { hits.hits.1._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } + - match: { hits.hits.1._source.k8s\.pod\.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } - match: { hits.hits.1._source.metricset: pod } - match: { hits.hits.1._source.@timestamp: "2021-04-28T19:00:00.000Z" } - - match: { hits.hits.1._source.k8s.pod.name: "dog" } - - match: { hits.hits.1._source.k8s.pod.value.min: 17.0 } - - match: { hits.hits.1._source.k8s.pod.value.max: 25.0 } - - match: { hits.hits.1._source.k8s.pod.value.sum: 42.0 } - - match: { hits.hits.1._source.k8s.pod.agent.id: "second" } - - match: { hits.hits.1._source.k8s.pod.agent.version: "2.1.7" } + - match: { hits.hits.1._source.k8s\.pod\.name: "dog" } + - match: { hits.hits.1._source.k8s\.pod\.value.min: 17.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.max: 25.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.sum: 42.0 } + - match: { hits.hits.1._source.k8s\.pod\.agent\.id: "second" } + - match: { hits.hits.1._source.k8s\.pod\.agent\.version: "2.1.7" } - match: { hits.hits.2._source._doc_count: 2 } - - match: { hits.hits.2._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.2._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.2._source.metricset: pod } - match: { hits.hits.2._source.@timestamp: "2021-04-28T18:00:00.000Z" } - - match: { hits.hits.2._source.k8s.pod.name: "cat" } - - match: { hits.hits.2._source.k8s.pod.value.min: 10.0 } - - match: { hits.hits.2._source.k8s.pod.value.max: 20.0 } - - match: { hits.hits.2._source.k8s.pod.value.sum: 30.0 } - - match: { hits.hits.2._source.k8s.pod.agent.id: "first" } - - match: { hits.hits.2._source.k8s.pod.agent.version: "2.0.4" } + - match: { hits.hits.2._source.k8s\.pod\.name: "cat" } + - match: { hits.hits.2._source.k8s\.pod\.value.min: 10.0 } + - match: { hits.hits.2._source.k8s\.pod\.value.max: 20.0 } + - match: { hits.hits.2._source.k8s\.pod\.value.sum: 30.0 } + - match: { hits.hits.2._source.k8s\.pod\.agent\.id: "first" } + - match: { hits.hits.2._source.k8s\.pod\.agent\.version: "2.0.4" } - match: { hits.hits.3._source._doc_count: 2 } - - match: { hits.hits.3._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.3._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.3._source.metricset: pod } - match: { hits.hits.3._source.@timestamp: "2021-04-28T20:00:00.000Z" } - - match: { hits.hits.3._source.k8s.pod.name: "cat" } - - match: { hits.hits.3._source.k8s.pod.value.min: 12.0 } - - match: { hits.hits.3._source.k8s.pod.value.max: 15.0 } - - match: { hits.hits.3._source.k8s.pod.value.sum: 27.0 } - - match: { hits.hits.3._source.k8s.pod.agent.id: "first" } - - match: { hits.hits.3._source.k8s.pod.agent.version: "2.0.4" } + - match: { hits.hits.3._source.k8s\.pod\.name: "cat" } + - match: { hits.hits.3._source.k8s\.pod\.value.min: 12.0 } + - match: { hits.hits.3._source.k8s\.pod\.value.max: 15.0 } + - match: { hits.hits.3._source.k8s\.pod\.value.sum: 27.0 } + - match: { hits.hits.3._source.k8s\.pod\.agent\.id: "first" } + - match: { hits.hits.3._source.k8s\.pod\.agent\.version: "2.0.4" } --- "Downsample empty and missing labels": @@ -1372,40 +1379,40 @@ setup: - length: { hits.hits: 3 } - match: { hits.hits.2._source._doc_count: 4 } - - match: { hits.hits.2._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.2._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } - match: { hits.hits.2._source.metricset: pod } - match: { hits.hits.2._source.@timestamp: "2021-04-28T18:00:00.000Z" } - - match: { hits.hits.2._source.k8s.pod.name: "cat" } - - match: { hits.hits.2._source.k8s.pod.value.min: 10.0 } - - match: { hits.hits.2._source.k8s.pod.value.max: 40.0 } - - match: { hits.hits.2._source.k8s.pod.value.sum: 100.0 } - - match: { hits.hits.2._source.k8s.pod.value.value_count: 4 } - - match: { hits.hits.2._source.k8s.pod.label: "abc" } - - match: { hits.hits.2._source.k8s.pod.unmapped: "abc" } + - match: { hits.hits.2._source.k8s\.pod\.name: "cat" } + - match: { hits.hits.2._source.k8s\.pod\.value.min: 10.0 } + - match: { hits.hits.2._source.k8s\.pod\.value.max: 40.0 } + - match: { hits.hits.2._source.k8s\.pod\.value.sum: 100.0 } + - match: { hits.hits.2._source.k8s\.pod\.value.value_count: 4 } + - match: { hits.hits.2._source.k8s\.pod\.label: "abc" } + - match: { hits.hits.2._source.k8s\.pod\.unmapped: "abc" } - match: { hits.hits.1._source._doc_count: 4 } - - match: { hits.hits.1._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e9597ab } + - match: { hits.hits.1._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e9597ab } - match: { hits.hits.1._source.metricset: pod } - match: { hits.hits.1._source.@timestamp: "2021-04-28T18:00:00.000Z" } - - match: { hits.hits.1._source.k8s.pod.name: "cat" } - - match: { hits.hits.1._source.k8s.pod.value.min: 10.0 } - - match: { hits.hits.1._source.k8s.pod.value.max: 40.0 } - - match: { hits.hits.1._source.k8s.pod.value.sum: 100.0 } - - match: { hits.hits.1._source.k8s.pod.value.value_count: 4 } - - match: { hits.hits.1._source.k8s.pod.label: null } - - match: { hits.hits.1._source.k8s.pod.unmapped: null } + - match: { hits.hits.1._source.k8s\.pod\.name: "cat" } + - match: { hits.hits.1._source.k8s\.pod\.value.min: 10.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.max: 40.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.sum: 100.0 } + - match: { hits.hits.1._source.k8s\.pod\.value.value_count: 4 } + - match: { hits.hits.1._source.k8s\.pod\.label: null } + - match: { hits.hits.1._source.k8s\.pod\.unmapped: null } - match: { hits.hits.0._source._doc_count: 4 } - - match: { hits.hits.0._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } + - match: { hits.hits.0._source.k8s\.pod\.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } - match: { hits.hits.0._source.metricset: pod } - match: { hits.hits.0._source.@timestamp: "2021-04-28T18:00:00.000Z" } - - match: { hits.hits.0._source.k8s.pod.name: "dog" } - - match: { hits.hits.0._source.k8s.pod.value.min: 10.0 } - - match: { hits.hits.0._source.k8s.pod.value.max: 40.0 } - - match: { hits.hits.0._source.k8s.pod.value.sum: 100.0 } - - match: { hits.hits.0._source.k8s.pod.value.value_count: 4 } - - match: { hits.hits.0._source.k8s.pod.label: "xyz" } - - match: { hits.hits.0._source.k8s.pod.unmapped: "xyz" } + - match: { hits.hits.0._source.k8s\.pod\.name: "dog" } + - match: { hits.hits.0._source.k8s\.pod\.value.min: 10.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.max: 40.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.sum: 100.0 } + - match: { hits.hits.0._source.k8s\.pod\.value.value_count: 4 } + - match: { hits.hits.0._source.k8s\.pod\.label: "xyz" } + - match: { hits.hits.0._source.k8s\.pod\.unmapped: "xyz" } --- @@ -1427,6 +1434,7 @@ setup: start_time: 2021-04-28T00:00:00Z end_time: 2021-04-29T00:00:00Z mappings: + subobjects: false properties: "@timestamp": type: date @@ -1495,45 +1503,45 @@ setup: - match: { hits.hits.0._source._doc_count: 2 } - match: { hits.hits.0._source.metricset: pod } - - match: { hits.hits.0._source.k8s.pod.name: dog } - - match: { hits.hits.0._source.k8s.pod.value: 20 } - - match: { hits.hits.0._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } - - match: { hits.hits.0._source.k8s.pod.label: foo } + - match: { hits.hits.0._source.k8s\.pod\.name: dog } + - match: { hits.hits.0._source.k8s\.pod\.value: 20 } + - match: { hits.hits.0._source.k8s\.pod\.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } + - match: { hits.hits.0._source.k8s\.pod\.label: foo } - match: { hits.hits.0._source.@timestamp: 2021-04-28T18:00:00.000Z } - match: { hits.hits.1._source._doc_count: 2 } - match: { hits.hits.1._source.metricset: pod } - - match: { hits.hits.1._source.k8s.pod.name: fox } - - match: { hits.hits.1._source.k8s.pod.value: 20 } - - match: { hits.hits.1._source.k8s.pod.uid: 7393ef8e-489c-11ee-be56-0242ac120002 } - - match: { hits.hits.1._source.k8s.pod.label: bar } + - match: { hits.hits.1._source.k8s\.pod\.name: fox } + - match: { hits.hits.1._source.k8s\.pod\.value: 20 } + - match: { hits.hits.1._source.k8s\.pod\.uid: 7393ef8e-489c-11ee-be56-0242ac120002 } + - match: { hits.hits.1._source.k8s\.pod\.label: bar } - match: { hits.hits.1._source.@timestamp: 2021-04-28T18:00:00.000Z } - match: { hits.hits.2._source._doc_count: 2 } - match: { hits.hits.2._source.metricset: pod } - - match: { hits.hits.2._source.k8s.pod.name: cat } - - match: { hits.hits.2._source.k8s.pod.value: 20 } - - match: { hits.hits.2._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - match: { hits.hits.2._source.k8s\.pod\.name: cat } + - match: { hits.hits.2._source.k8s\.pod\.value: 20 } + - match: { hits.hits.2._source.k8s\.pod\.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } # NOTE: when downsampling a label field we propagate the last (most-recent timestamp-wise) non-null value, # ignoring/skipping null values. Here the last document has a value that hits ignore_above ("foofoo") and, # as a result, we propagate the value of the previous document ("foo") - - match: { hits.hits.2._source.k8s.pod.label: foo } + - match: { hits.hits.2._source.k8s\.pod\.label: foo } - match: { hits.hits.2._source.@timestamp: 2021-04-28T18:00:00.000Z } - match: { hits.hits.3._source._doc_count: 2 } - match: { hits.hits.3._source.metricset: pod } - - match: { hits.hits.3._source.k8s.pod.name: cow } - - match: { hits.hits.3._source.k8s.pod.value: 20 } - - match: { hits.hits.3._source.k8s.pod.uid: a81ef23a-489c-11ee-be56-0242ac120005 } - - match: { hits.hits.3._source.k8s.pod.label: null } + - match: { hits.hits.3._source.k8s\.pod\.name: cow } + - match: { hits.hits.3._source.k8s\.pod\.value: 20 } + - match: { hits.hits.3._source.k8s\.pod\.uid: a81ef23a-489c-11ee-be56-0242ac120005 } + - match: { hits.hits.3._source.k8s\.pod\.label: null } - match: { hits.hits.3._source.@timestamp: 2021-04-28T18:00:00.000Z } - do: indices.get_mapping: index: test-downsample-label-ignore-above - - match: { test-downsample-label-ignore-above.mappings.properties.k8s.properties.pod.properties.label.type: keyword } - - match: { test-downsample-label-ignore-above.mappings.properties.k8s.properties.pod.properties.label.ignore_above: 3 } + - match: { test-downsample-label-ignore-above.mappings.properties.k8s\.pod\.label.type: keyword } + - match: { test-downsample-label-ignore-above.mappings.properties.k8s\.pod\.label.ignore_above: 3 } --- "Downsample index with empty dimension": @@ -1555,6 +1563,7 @@ setup: start_time: 2021-04-28T00:00:00Z end_time: 2021-04-29T00:00:00Z mappings: + subobjects: false properties: "@timestamp": type: date @@ -1612,11 +1621,11 @@ setup: - length: { hits.hits: 2 } - match: { hits.hits.0._source._doc_count: 3 } - - match: { hits.hits.0._source.k8s.pod.name: cat } - - match: { hits.hits.0._source.k8s.pod.empty: null } + - match: { hits.hits.0._source.k8s\.pod\.name: cat } + - match: { hits.hits.0._source.k8s\.pod\.empty: null } - match: { hits.hits.1._source._doc_count: 1 } - - match: { hits.hits.1._source.k8s.pod.name: cat } - - match: { hits.hits.1._source.k8s.pod.empty: "" } + - match: { hits.hits.1._source.k8s\.pod\.name: cat } + - match: { hits.hits.1._source.k8s\.pod\.empty: "" } --- "Downsample index with empty dimension on routing path": @@ -1638,6 +1647,7 @@ setup: start_time: 2021-04-28T00:00:00Z end_time: 2021-04-29T00:00:00Z mappings: + subobjects: false properties: "@timestamp": type: date @@ -1695,8 +1705,8 @@ setup: - length: { hits.hits: 2 } - match: { hits.hits.0._source._doc_count: 3 } - - match: { hits.hits.0._source.k8s.pod.name: cat } - - match: { hits.hits.0._source.k8s.pod.empty: null } + - match: { hits.hits.0._source.k8s\.pod\.name: cat } + - match: { hits.hits.0._source.k8s\.pod\.empty: null } - match: { hits.hits.1._source._doc_count: 1 } - - match: { hits.hits.1._source.k8s.pod.name: cat } - - match: { hits.hits.1._source.k8s.pod.empty: "" } + - match: { hits.hits.1._source.k8s\.pod\.name: cat } + - match: { hits.hits.1._source.k8s\.pod\.empty: "" } From a73e9727770398127ea3c221d25847873e2d0c2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Fri, 25 Oct 2024 09:40:40 +0200 Subject: [PATCH 20/22] [DOCS] Adds stream inference API docs (#115333) (#115623) Co-authored-by: Pat Whelan --- .../inference/inference-apis.asciidoc | 2 + .../inference/stream-inference.asciidoc | 122 ++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 docs/reference/inference/stream-inference.asciidoc diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc index ddcff1abc7dce..1206cb02ba89a 100644 --- a/docs/reference/inference/inference-apis.asciidoc +++ b/docs/reference/inference/inference-apis.asciidoc @@ -19,6 +19,7 @@ the following APIs to manage {infer} models and perform {infer}: * <> * <> * <> +* <> * <> [[inference-landscape]] @@ -56,6 +57,7 @@ include::delete-inference.asciidoc[] include::get-inference.asciidoc[] include::post-inference.asciidoc[] include::put-inference.asciidoc[] +include::stream-inference.asciidoc[] include::update-inference.asciidoc[] include::service-alibabacloud-ai-search.asciidoc[] include::service-amazon-bedrock.asciidoc[] diff --git a/docs/reference/inference/stream-inference.asciidoc b/docs/reference/inference/stream-inference.asciidoc new file mode 100644 index 0000000000000..e66acd630cb3e --- /dev/null +++ b/docs/reference/inference/stream-inference.asciidoc @@ -0,0 +1,122 @@ +[role="xpack"] +[[stream-inference-api]] +=== Stream inference API + +Streams a chat completion response. + +IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in {ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. +For built-in models and models uploaded through Eland, the {infer} APIs offer an alternative way to use and manage trained models. +However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <>. + + +[discrete] +[[stream-inference-api-request]] +==== {api-request-title} + +`POST /_inference//_stream` + +`POST /_inference///_stream` + + +[discrete] +[[stream-inference-api-prereqs]] +==== {api-prereq-title} + +* Requires the `monitor_inference` <> +(the built-in `inference_admin` and `inference_user` roles grant this privilege) +* You must use a client that supports streaming. + + +[discrete] +[[stream-inference-api-desc]] +==== {api-description-title} + +The stream {infer} API enables real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation. +It only works with the `completion` task type. + + +[discrete] +[[stream-inference-api-path-params]] +==== {api-path-parms-title} + +``:: +(Required, string) +The unique identifier of the {infer} endpoint. + + +``:: +(Optional, string) +The type of {infer} task that the model performs. + + +[discrete] +[[stream-inference-api-request-body]] +==== {api-request-body-title} + +`input`:: +(Required, string or array of strings) +The text on which you want to perform the {infer} task. +`input` can be a single string or an array. ++ +-- +[NOTE] +==== +Inference endpoints for the `completion` task type currently only support a +single string as input. +==== +-- + + +[discrete] +[[stream-inference-api-example]] +==== {api-examples-title} + +The following example performs a completion on the example question with streaming. + + +[source,console] +------------------------------------------------------------ +POST _inference/completion/openai-completion/_stream +{ + "input": "What is Elastic?" +} +------------------------------------------------------------ +// TEST[skip:TBD] + + +The API returns the following response: + + +[source,txt] +------------------------------------------------------------ +event: message +data: { + "completion":[{ + "delta":"Elastic" + }] +} + +event: message +data: { + "completion":[{ + "delta":" is" + }, + { + "delta":" a" + } + ] +} + +event: message +data: { + "completion":[{ + "delta":" software" + }, + { + "delta":" company" + }] +} + +(...) +------------------------------------------------------------ +// NOTCONSOLE From 4782ee552df73e50e97d24b85caaf72bcc9714e9 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Fri, 25 Oct 2024 09:54:54 +0200 Subject: [PATCH 21/22] Lazy initialize HttpRouteStatsTracker in MethodHandlers (#114107) (#115620) We use about 1M for the route stats trackers instances per ES instance. Making this lazy init should come at a trivial overhead and in fact makes the computation of the node stats cheaper by saving spurious sums on 0-valued long adders. --- .../elasticsearch/http/HttpRouteStats.java | 2 + .../elasticsearch/rest/MethodHandlers.java | 42 ++++++++++++++----- .../elasticsearch/rest/RestController.java | 25 +++++------ 3 files changed, 46 insertions(+), 23 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/http/HttpRouteStats.java b/server/src/main/java/org/elasticsearch/http/HttpRouteStats.java index 5be1ae9312c46..a15b929fd3c1b 100644 --- a/server/src/main/java/org/elasticsearch/http/HttpRouteStats.java +++ b/server/src/main/java/org/elasticsearch/http/HttpRouteStats.java @@ -49,6 +49,8 @@ public record HttpRouteStats( long[] responseTimeHistogram ) implements Writeable, ToXContentObject { + public static final HttpRouteStats EMPTY = new HttpRouteStats(0, 0, new long[0], 0, 0, new long[0], new long[0]); + public HttpRouteStats(StreamInput in) throws IOException { this(in.readVLong(), in.readVLong(), in.readVLongArray(), in.readVLong(), in.readVLong(), in.readVLongArray(), in.readVLongArray()); } diff --git a/server/src/main/java/org/elasticsearch/rest/MethodHandlers.java b/server/src/main/java/org/elasticsearch/rest/MethodHandlers.java index a947ddce2b9f3..2f53f48f9ae5b 100644 --- a/server/src/main/java/org/elasticsearch/rest/MethodHandlers.java +++ b/server/src/main/java/org/elasticsearch/rest/MethodHandlers.java @@ -13,6 +13,8 @@ import org.elasticsearch.http.HttpRouteStats; import org.elasticsearch.http.HttpRouteStatsTracker; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; import java.util.EnumMap; import java.util.Map; import java.util.Set; @@ -25,7 +27,18 @@ final class MethodHandlers { private final String path; private final Map> methodHandlers; - private final HttpRouteStatsTracker statsTracker = new HttpRouteStatsTracker(); + @SuppressWarnings("unused") // only accessed via #STATS_TRACKER_HANDLE, lazy initialized because instances consume non-trivial heap + private volatile HttpRouteStatsTracker statsTracker; + + private static final VarHandle STATS_TRACKER_HANDLE; + + static { + try { + STATS_TRACKER_HANDLE = MethodHandles.lookup().findVarHandle(MethodHandlers.class, "statsTracker", HttpRouteStatsTracker.class); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new ExceptionInInitializerError(e); + } + } MethodHandlers(String path) { this.path = path; @@ -73,19 +86,26 @@ Set getValidMethods() { return methodHandlers.keySet(); } - public void addRequestStats(int contentLength) { - statsTracker.addRequestStats(contentLength); - } - - public void addResponseStats(long contentLength) { - statsTracker.addResponseStats(contentLength); + public HttpRouteStats getStats() { + var tracker = existingStatsTracker(); + if (tracker == null) { + return HttpRouteStats.EMPTY; + } + return tracker.getStats(); } - public void addResponseTime(long timeMillis) { - statsTracker.addResponseTime(timeMillis); + public HttpRouteStatsTracker statsTracker() { + var tracker = existingStatsTracker(); + if (tracker == null) { + var newTracker = new HttpRouteStatsTracker(); + if ((tracker = (HttpRouteStatsTracker) STATS_TRACKER_HANDLE.compareAndExchange(this, null, newTracker)) == null) { + tracker = newTracker; + } + } + return tracker; } - public HttpRouteStats getStats() { - return statsTracker.getStats(); + private HttpRouteStatsTracker existingStatsTracker() { + return (HttpRouteStatsTracker) STATS_TRACKER_HANDLE.getAcquire(this); } } diff --git a/server/src/main/java/org/elasticsearch/rest/RestController.java b/server/src/main/java/org/elasticsearch/rest/RestController.java index 924cd361c671d..760facd561abf 100644 --- a/server/src/main/java/org/elasticsearch/rest/RestController.java +++ b/server/src/main/java/org/elasticsearch/rest/RestController.java @@ -36,6 +36,7 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.http.HttpHeadersValidationException; import org.elasticsearch.http.HttpRouteStats; +import org.elasticsearch.http.HttpRouteStatsTracker; import org.elasticsearch.http.HttpServerTransport; import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.rest.RestHandler.Route; @@ -914,7 +915,7 @@ public void sendResponse(RestResponse response) { private static final class ResourceHandlingHttpChannel extends DelegatingRestChannel { private final CircuitBreakerService circuitBreakerService; private final int contentLength; - private final MethodHandlers methodHandlers; + private final HttpRouteStatsTracker statsTracker; private final long startTime; private final AtomicBoolean closed = new AtomicBoolean(); @@ -927,7 +928,7 @@ private static final class ResourceHandlingHttpChannel extends DelegatingRestCha super(delegate); this.circuitBreakerService = circuitBreakerService; this.contentLength = contentLength; - this.methodHandlers = methodHandlers; + this.statsTracker = methodHandlers.statsTracker(); this.startTime = rawRelativeTimeInMillis(); } @@ -936,12 +937,12 @@ public void sendResponse(RestResponse response) { boolean success = false; try { close(); - methodHandlers.addRequestStats(contentLength); - methodHandlers.addResponseTime(rawRelativeTimeInMillis() - startTime); + statsTracker.addRequestStats(contentLength); + statsTracker.addResponseTime(rawRelativeTimeInMillis() - startTime); if (response.isChunked() == false) { - methodHandlers.addResponseStats(response.content().length()); + statsTracker.addResponseStats(response.content().length()); } else { - final var responseLengthRecorder = new ResponseLengthRecorder(methodHandlers); + final var responseLengthRecorder = new ResponseLengthRecorder(statsTracker); final var headers = response.getHeaders(); response = RestResponse.chunked( response.status(), @@ -976,11 +977,11 @@ private void close() { } } - private static class ResponseLengthRecorder extends AtomicReference implements Releasable { + private static class ResponseLengthRecorder extends AtomicReference implements Releasable { private long responseLength; - private ResponseLengthRecorder(MethodHandlers methodHandlers) { - super(methodHandlers); + private ResponseLengthRecorder(HttpRouteStatsTracker routeStatsTracker) { + super(routeStatsTracker); } @Override @@ -988,11 +989,11 @@ public void close() { // closed just before sending the last chunk, and also when the whole RestResponse is closed since the client might abort the // connection before we send the last chunk, in which case we won't have recorded the response in the // stats yet; thus we need run-once semantics here: - final var methodHandlers = getAndSet(null); - if (methodHandlers != null) { + final var routeStatsTracker = getAndSet(null); + if (routeStatsTracker != null) { // if we started sending chunks then we're closed on the transport worker, no need for sync assert responseLength == 0L || Transports.assertTransportThread(); - methodHandlers.addResponseStats(responseLength); + routeStatsTracker.addResponseStats(responseLength); } } From b151c14189152234341077ddbabb6b318c18d3d6 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Thu, 24 Oct 2024 21:43:22 +0100 Subject: [PATCH 22/22] Update BlobCacheBufferedIndexInput::readVLong to correctly handle negative long values (#115594) --- docs/changelog/115594.yaml | 6 ++++++ .../blobcache/common/BlobCacheBufferedIndexInput.java | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/115594.yaml diff --git a/docs/changelog/115594.yaml b/docs/changelog/115594.yaml new file mode 100644 index 0000000000000..91a6089dfb3ce --- /dev/null +++ b/docs/changelog/115594.yaml @@ -0,0 +1,6 @@ +pr: 115594 +summary: Update `BlobCacheBufferedIndexInput::readVLong` to correctly handle negative + long values +area: Search +type: bug +issues: [] diff --git a/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/common/BlobCacheBufferedIndexInput.java b/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/common/BlobCacheBufferedIndexInput.java index 95b2324d03b52..903b61bbc6279 100644 --- a/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/common/BlobCacheBufferedIndexInput.java +++ b/x-pack/plugin/blob-cache/src/main/java/org/elasticsearch/blobcache/common/BlobCacheBufferedIndexInput.java @@ -175,7 +175,7 @@ public final int readVInt() throws IOException { @Override public final long readVLong() throws IOException { - if (9 <= buffer.remaining()) { + if (10 <= buffer.remaining()) { return ByteBufferStreamInput.readVLong(buffer); } else { return super.readVLong();