From ce25f45af01df195039f9bb99678a5a6ce487535 Mon Sep 17 00:00:00 2001 From: Pat Whelan Date: Mon, 23 Sep 2024 10:44:57 -0400 Subject: [PATCH 01/58] [ML] Remove regex (#113210) (#113380) Regex is having trouble parsing some of the larger UTF8 characters, so instead we are just going to use our non-regex parser. Fix #113179 Fix #113148 Co-authored-by: Elastic Machine --- ...rverSentEventsRestActionListenerTests.java | 49 +++++++------------ 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/rest/ServerSentEventsRestActionListenerTests.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/rest/ServerSentEventsRestActionListenerTests.java index d80cb1e93ee12..d56b9fe21cd50 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/rest/ServerSentEventsRestActionListenerTests.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/rest/ServerSentEventsRestActionListenerTests.java @@ -47,6 +47,9 @@ import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEvent; +import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventField; +import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventParser; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -59,7 +62,6 @@ import java.util.concurrent.Flow; import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Predicate; @@ -80,9 +82,7 @@ public class ServerSentEventsRestActionListenerTests extends ESIntegTestCase { private static final String NO_STREAM_ROUTE = "/_inference_no_stream"; private static final Exception expectedException = new IllegalStateException("hello there"); private static final String expectedExceptionAsServerSentEvent = """ - \uFEFF\ - event: error - data: {\ + {\ "error":{"root_cause":[{"type":"illegal_state_exception","reason":"hello there",\ "caused_by":{"type":"illegal_state_exception","reason":"hello there"}}],\ "type":"illegal_state_exception","reason":"hello there"},"status":500\ @@ -323,30 +323,16 @@ protected void releaseResources() {} } private static class RandomStringCollector { - private static final Pattern jsonPattern = Pattern.compile("^\uFEFFevent: message\ndata: \\{.*}$"); - private static final Pattern endPattern = Pattern.compile("^\uFEFFevent: message\ndata: \\[DONE\\]$"); - private final AtomicBoolean hasDoneChunk = new AtomicBoolean(false); private final Deque stringsVerified = new LinkedBlockingDeque<>(); - private volatile String previousTokens = ""; + private final ServerSentEventParser sseParser = new ServerSentEventParser(); private void collect(String str) throws IOException { - str = previousTokens + str; - String[] events = str.split("\n\n", -1); - for (var i = 0; i < events.length - 1; i++) { - var line = events[i]; - if (jsonPattern.matcher(line).matches() || expectedExceptionAsServerSentEvent.equals(line)) { - stringsVerified.offer(line); - } else if (endPattern.matcher(line).matches()) { - hasDoneChunk.set(true); - } else { - throw new IOException("Line does not match expected JSON message or DONE message. Line: " + line); - } - } - - previousTokens = events[events.length - 1]; - if (endPattern.matcher(previousTokens.trim()).matches()) { - hasDoneChunk.set(true); - } + sseParser.parse(str.getBytes(StandardCharsets.UTF_8)) + .stream() + .filter(event -> event.name() == ServerSentEventField.DATA) + .filter(ServerSentEvent::hasValue) + .map(ServerSentEvent::value) + .forEach(stringsVerified::offer); } } @@ -363,8 +349,8 @@ public void testResponse() { var response = callAsync(request); assertThat(response.getStatusLine().getStatusCode(), is(HttpStatus.SC_OK)); - assertThat(collector.stringsVerified.size(), equalTo(expectedTestCount)); - assertThat(collector.hasDoneChunk.get(), equalTo(true)); + assertThat(collector.stringsVerified.size(), equalTo(expectedTestCount + 1)); // normal payload count + done byte + assertThat(collector.stringsVerified.peekLast(), equalTo("[DONE]")); } private Response callAsync(Request request) { @@ -409,10 +395,9 @@ public void testOnFailure() throws IOException { } catch (ResponseException e) { var response = e.getResponse(); assertThat(response.getStatusLine().getStatusCode(), is(HttpStatus.SC_INTERNAL_SERVER_ERROR)); - assertThat( - EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8), - equalTo(expectedExceptionAsServerSentEvent + "\n\n") - ); + assertThat(EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8), equalTo(""" + \uFEFFevent: error + data:\s""" + expectedExceptionAsServerSentEvent + "\n\n")); } } @@ -431,7 +416,7 @@ public void testErrorMidStream() { var response = callAsync(request); assertThat(response.getStatusLine().getStatusCode(), is(HttpStatus.SC_OK)); // error still starts with 200-OK assertThat(collector.stringsVerified.size(), equalTo(expectedTestCount + 1)); // normal payload count + last error byte - assertThat("DONE chunk is not sent on error", collector.hasDoneChunk.get(), equalTo(false)); + assertThat("DONE chunk is not sent on error", collector.stringsVerified.stream().anyMatch("[DONE]"::equals), equalTo(false)); assertThat(collector.stringsVerified.getLast(), equalTo(expectedExceptionAsServerSentEvent)); } From 9ae2439a34bd4337d574ee6d5d39825e75e21319 Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Mon, 23 Sep 2024 17:25:32 +0200 Subject: [PATCH 02/58] [DOCS] Add snippet tests to retriever API docs (#113289) (#113396) --- docs/reference/search/retriever.asciidoc | 70 ++++++++++++++++++------ 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 6d3a1a36ad407..54836ac33762d 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -81,7 +81,43 @@ retrievers) *only* the query element is allowed. [[standard-retriever-example]] ==== Example -[source,js] +//// +[source,console] +---- +PUT /restaurants +{ + "mappings": { + "properties": { + "region": { "type": "keyword" }, + "year": { "type": "keyword" }, + "vector": { + "type": "dense_vector", + "dims": 3 + } + } + } +} + +POST /restaurants/_bulk?refresh +{"index":{}} +{"region": "Austria", "year": "2019", "vector": [10, 22, 77]} +{"index":{}} +{"region": "France", "year": "2019", "vector": [10, 22, 78]} +{"index":{}} +{"region": "Austria", "year": "2020", "vector": [10, 22, 79]} +{"index":{}} +{"region": "France", "year": "2020", "vector": [10, 22, 80]} +---- +// TESTSETUP + +[source,console] +-------------------------------------------------- +DELETE /restaurants +-------------------------------------------------- +// TEARDOWN +//// + +[source,console] ---- GET /restaurants/_search { @@ -109,9 +145,8 @@ GET /restaurants/_search } } ---- -// NOTCONSOLE <1> Opens the `retriever` object. -<2> The `standard` retriever is used for definining traditional {es} queries. +<2> The `standard` retriever is used for defining traditional {es} queries. <3> The entry point for defining the search query. <4> The `bool` object allows for combining multiple query clauses logically. <5> The `should` array indicates conditions under which a document will match. Documents matching these conditions will increase their relevancy score. @@ -171,9 +206,9 @@ The parameters `query_vector` and `query_vector_builder` cannot be used together [[knn-retriever-example]] ==== Example -[source,js] +[source,console] ---- -GET my-embeddings/_search +GET /restaurants/_search { "retriever": { "knn": { <1> @@ -185,8 +220,7 @@ GET my-embeddings/_search } } ---- -// NOTCONSOLE - +// TEST[continued] <1> Configuration for k-nearest neighbor (knn) search, which is based on vector similarity. <2> Specifies the field name that contains the vectors. <3> The query vector against which document vectors are compared in the `knn` search. @@ -223,7 +257,7 @@ the retriever tree. A simple hybrid search example (lexical search + dense vector search) combining a `standard` retriever with a `knn` retriever using RRF: -[source,js] +[source,console] ---- GET /restaurants/_search { @@ -234,7 +268,7 @@ GET /restaurants/_search "standard": { <3> "query": { "multi_match": { - "query": "San Francisco", + "query": "Austria", "fields": [ "city", "region" @@ -258,7 +292,7 @@ GET /restaurants/_search } } ---- -// NOTCONSOLE +// TEST[continued] <1> Defines a retriever tree with an RRF retriever. <2> The sub-retriever array. <3> The first sub-retriever is a `standard` retriever. @@ -272,7 +306,7 @@ GET /restaurants/_search A more complex hybrid search example (lexical search + ELSER sparse vector search + dense vector search) using RRF: -[source,js] +[source,console] ---- GET movies/_search { @@ -316,7 +350,7 @@ GET movies/_search } } ---- -// NOTCONSOLE +// TEST[skip:uses ELSER] [[text-similarity-reranker-retriever]] ==== Text Similarity Re-ranker Retriever @@ -390,7 +424,7 @@ A text similarity re-ranker retriever is a compound retriever. Child retrievers This example enables out-of-the-box semantic search by re-ranking top documents using the Cohere Rerank API. This approach eliminate the need to generate and store embeddings for all indexed documents. This requires a <> using the `rerank` task type. -[source,js] +[source,console] ---- GET /index/_search { @@ -414,7 +448,7 @@ GET /index/_search } } ---- -// NOTCONSOLE +// TEST[skip:uses ML] [discrete] [[text-similarity-reranker-retriever-example-eland]] @@ -452,7 +486,7 @@ eland_import_hub_model \ + . Create an inference endpoint for the `rerank` task + -[source,js] +[source,console] ---- PUT _inference/rerank/my-msmarco-minilm-model { @@ -464,11 +498,11 @@ PUT _inference/rerank/my-msmarco-minilm-model } } ---- -// NOTCONSOLE +// TEST[skip:uses ML] + . Define a `text_similarity_rerank` retriever. + -[source,js] +[source,console] ---- POST movies/_search { @@ -490,7 +524,7 @@ POST movies/_search } } ---- -// NOTCONSOLE +// TEST[skip:uses ML] + This retriever uses a standard `match` query to search the `movie` index for films tagged with the genre "drama". It then re-ranks the results based on semantic similarity to the text in the `inference_text` parameter, using the model we uploaded to {es}. From 2b30482e76f7d35d64a8cd469dc982a8e80820e9 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 23 Sep 2024 16:30:47 +0100 Subject: [PATCH 03/58] Make `OpenIndexClusterStateUpdateRequest` a record (#113351) (#113390) No need to extend `IndicesClusterStateUpdateRequest`, this thing can be completely immutable. --- .../OpenIndexClusterStateUpdateRequest.java | 31 +++++++++-------- .../open/TransportOpenIndexAction.java | 33 ++++++++++--------- .../action/TransportFreezeIndexAction.java | 10 +++--- 3 files changed, 39 insertions(+), 35 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/open/OpenIndexClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/open/OpenIndexClusterStateUpdateRequest.java index 5967c9923be13..92e2433a9b1cc 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/open/OpenIndexClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/open/OpenIndexClusterStateUpdateRequest.java @@ -9,25 +9,24 @@ package org.elasticsearch.action.admin.indices.open; import org.elasticsearch.action.support.ActiveShardCount; -import org.elasticsearch.cluster.ack.IndicesClusterStateUpdateRequest; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.Index; + +import java.util.Objects; /** * Cluster state update request that allows to open one or more indices */ -public class OpenIndexClusterStateUpdateRequest extends IndicesClusterStateUpdateRequest { - - private ActiveShardCount waitForActiveShards = ActiveShardCount.DEFAULT; - - public OpenIndexClusterStateUpdateRequest() { - - } - - public ActiveShardCount waitForActiveShards() { - return waitForActiveShards; - } - - public OpenIndexClusterStateUpdateRequest waitForActiveShards(ActiveShardCount waitForActiveShards) { - this.waitForActiveShards = waitForActiveShards; - return this; +public record OpenIndexClusterStateUpdateRequest( + TimeValue masterNodeTimeout, + TimeValue ackTimeout, + ActiveShardCount waitForActiveShards, + Index[] indices +) { + public OpenIndexClusterStateUpdateRequest { + Objects.requireNonNull(masterNodeTimeout); + Objects.requireNonNull(ackTimeout); + Objects.requireNonNull(waitForActiveShards); + Objects.requireNonNull(indices); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/open/TransportOpenIndexAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/open/TransportOpenIndexAction.java index 1184ad66eae0f..ffaa50f7c0f29 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/open/TransportOpenIndexAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/open/TransportOpenIndexAction.java @@ -90,23 +90,26 @@ protected void masterOperation( listener.onResponse(new OpenIndexResponse(true, true)); return; } - OpenIndexClusterStateUpdateRequest updateRequest = new OpenIndexClusterStateUpdateRequest().ackTimeout(request.ackTimeout()) - .masterNodeTimeout(request.masterNodeTimeout()) - .indices(concreteIndices) - .waitForActiveShards(request.waitForActiveShards()); - indexStateService.openIndices(updateRequest, new ActionListener<>() { + indexStateService.openIndices( + new OpenIndexClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + request.waitForActiveShards(), + concreteIndices + ), + new ActionListener<>() { + @Override + public void onResponse(ShardsAcknowledgedResponse response) { + listener.onResponse(new OpenIndexResponse(response.isAcknowledged(), response.isShardsAcknowledged())); + } - @Override - public void onResponse(ShardsAcknowledgedResponse response) { - listener.onResponse(new OpenIndexResponse(response.isAcknowledged(), response.isShardsAcknowledged())); + @Override + public void onFailure(Exception t) { + logger.debug(() -> "failed to open indices [" + Arrays.toString(concreteIndices) + "]", t); + listener.onFailure(t); + } } - - @Override - public void onFailure(Exception t) { - logger.debug(() -> "failed to open indices [" + Arrays.toString(concreteIndices) + "]", t); - listener.onFailure(t); - } - }); + ); } } diff --git a/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/action/TransportFreezeIndexAction.java b/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/action/TransportFreezeIndexAction.java index 39e992b0d103c..96225ecf0430c 100644 --- a/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/action/TransportFreezeIndexAction.java +++ b/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/action/TransportFreezeIndexAction.java @@ -145,10 +145,12 @@ private void toggleFrozenSettings( submitUnbatchedTask( "toggle-frozen-settings", new AckedClusterStateUpdateTask(Priority.URGENT, request, listener.delegateFailure((delegate, acknowledgedResponse) -> { - OpenIndexClusterStateUpdateRequest updateRequest = new OpenIndexClusterStateUpdateRequest().ackTimeout(request.ackTimeout()) - .masterNodeTimeout(request.masterNodeTimeout()) - .indices(concreteIndices) - .waitForActiveShards(request.waitForActiveShards()); + OpenIndexClusterStateUpdateRequest updateRequest = new OpenIndexClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + request.waitForActiveShards(), + concreteIndices + ); indexStateService.openIndices( updateRequest, delegate.safeMap( From fb95f7678c39d1f520b619a71d161bd91d839596 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 23 Sep 2024 16:36:42 +0100 Subject: [PATCH 04/58] Make `CloseIndexClusterStateUpdateRequest` a record (#113350) (#113391) No need to extend `IndicesClusterStateUpdateRequest`, this thing can be completely immutable. --- .../CloseIndexClusterStateUpdateRequest.java | 42 +++++++------------ .../close/TransportCloseIndexAction.java | 10 +++-- .../action/TransportFreezeIndexAction.java | 11 +++-- 3 files changed, 31 insertions(+), 32 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/close/CloseIndexClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/close/CloseIndexClusterStateUpdateRequest.java index 35e9b42a97ebc..de8db5c025d13 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/close/CloseIndexClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/close/CloseIndexClusterStateUpdateRequest.java @@ -9,35 +9,25 @@ package org.elasticsearch.action.admin.indices.close; import org.elasticsearch.action.support.ActiveShardCount; -import org.elasticsearch.cluster.ack.IndicesClusterStateUpdateRequest; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.Index; + +import java.util.Objects; /** * Cluster state update request that allows to close one or more indices */ -public class CloseIndexClusterStateUpdateRequest extends IndicesClusterStateUpdateRequest { - - private long taskId; - private ActiveShardCount waitForActiveShards = ActiveShardCount.DEFAULT; - - public CloseIndexClusterStateUpdateRequest(final long taskId) { - this.taskId = taskId; - } - - public long taskId() { - return taskId; - } - - public CloseIndexClusterStateUpdateRequest taskId(final long taskId) { - this.taskId = taskId; - return this; - } - - public ActiveShardCount waitForActiveShards() { - return waitForActiveShards; - } - - public CloseIndexClusterStateUpdateRequest waitForActiveShards(final ActiveShardCount waitForActiveShards) { - this.waitForActiveShards = waitForActiveShards; - return this; +public record CloseIndexClusterStateUpdateRequest( + TimeValue masterNodeTimeout, + TimeValue ackTimeout, + long taskId, + ActiveShardCount waitForActiveShards, + Index[] indices +) { + public CloseIndexClusterStateUpdateRequest { + Objects.requireNonNull(masterNodeTimeout); + Objects.requireNonNull(ackTimeout); + Objects.requireNonNull(waitForActiveShards); + Objects.requireNonNull(indices); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportCloseIndexAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportCloseIndexAction.java index 2d1e7805fa59a..5a4292804fd6c 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportCloseIndexAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportCloseIndexAction.java @@ -120,9 +120,13 @@ protected void masterOperation( return; } - final CloseIndexClusterStateUpdateRequest closeRequest = new CloseIndexClusterStateUpdateRequest(task.getId()).ackTimeout( - request.ackTimeout() - ).masterNodeTimeout(request.masterNodeTimeout()).waitForActiveShards(request.waitForActiveShards()).indices(concreteIndices); + final CloseIndexClusterStateUpdateRequest closeRequest = new CloseIndexClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + task.getId(), + request.waitForActiveShards(), + concreteIndices + ); indexStateService.closeIndices(closeRequest, listener.delegateResponse((delegatedListener, t) -> { logger.debug(() -> "failed to close indices [" + Arrays.toString(concreteIndices) + "]", t); delegatedListener.onFailure(t); diff --git a/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/action/TransportFreezeIndexAction.java b/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/action/TransportFreezeIndexAction.java index 96225ecf0430c..83f1677229972 100644 --- a/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/action/TransportFreezeIndexAction.java +++ b/x-pack/plugin/frozen-indices/src/main/java/org/elasticsearch/xpack/frozen/action/TransportFreezeIndexAction.java @@ -14,6 +14,7 @@ import org.elasticsearch.action.admin.indices.close.CloseIndexResponse; import org.elasticsearch.action.admin.indices.open.OpenIndexClusterStateUpdateRequest; import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.action.support.DestructiveOperations; import org.elasticsearch.action.support.master.TransportMasterNodeAction; import org.elasticsearch.cluster.AckedClusterStateUpdateTask; @@ -114,9 +115,13 @@ protected void masterOperation(Task task, FreezeRequest request, ClusterState st return; } - final CloseIndexClusterStateUpdateRequest closeRequest = new CloseIndexClusterStateUpdateRequest(task.getId()).ackTimeout( - request.ackTimeout() - ).masterNodeTimeout(request.masterNodeTimeout()).indices(concreteIndices); + final CloseIndexClusterStateUpdateRequest closeRequest = new CloseIndexClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + task.getId(), + ActiveShardCount.DEFAULT, + concreteIndices + ); indexStateService.closeIndices(closeRequest, new ActionListener<>() { @Override From d17aab75a71f4d958c86f119ef96117e8a8e5d3c Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Mon, 23 Sep 2024 08:45:35 -0700 Subject: [PATCH 05/58] Fix new synthetic source copy_to tests to pass on serverless (#113320) (#113388) (cherry picked from commit 6b11af38d6ed0a0ad3dd63fa6fcef6fd9bcac33a) --- .../indices.create/20_synthetic_source.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index 937c5f19ae5aa..b5a9146bc54a6 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -1815,6 +1815,8 @@ synthetic_source with copy_to pointing at dynamic field: _source: mode: synthetic properties: + name: + type: keyword k: type: keyword copy_to: c.copy @@ -1829,6 +1831,7 @@ synthetic_source with copy_to pointing at dynamic field: id: 1 refresh: true body: + name: "A" k: "hello" - do: @@ -1837,6 +1840,7 @@ synthetic_source with copy_to pointing at dynamic field: id: 2 refresh: true body: + name: "B" k: ["55", "66"] - do: @@ -1845,6 +1849,7 @@ synthetic_source with copy_to pointing at dynamic field: id: 3 refresh: true body: + name: "C" k: "hello" c: copy: "zap" @@ -1852,11 +1857,13 @@ synthetic_source with copy_to pointing at dynamic field: - do: search: index: test + sort: name body: docvalue_fields: [ "c.copy.keyword" ] - match: hits.hits.0._source: + name: "A" k: "hello" - match: hits.hits.0.fields: @@ -1864,6 +1871,7 @@ synthetic_source with copy_to pointing at dynamic field: - match: hits.hits.1._source: + name: "B" k: ["55", "66"] - match: hits.hits.1.fields: @@ -1871,6 +1879,7 @@ synthetic_source with copy_to pointing at dynamic field: - match: hits.hits.2._source: + name: "C" k: "hello" c: copy: "zap" @@ -1892,6 +1901,8 @@ synthetic_source with copy_to pointing inside dynamic object: _source: mode: synthetic properties: + name: + type: keyword k: type: keyword copy_to: c.copy @@ -1902,6 +1913,7 @@ synthetic_source with copy_to pointing inside dynamic object: id: 1 refresh: true body: + name: "A" k: "hello" - do: @@ -1910,6 +1922,7 @@ synthetic_source with copy_to pointing inside dynamic object: id: 2 refresh: true body: + name: "B" k: ["55", "66"] - do: @@ -1918,6 +1931,7 @@ synthetic_source with copy_to pointing inside dynamic object: id: 3 refresh: true body: + name: "C" k: "hello" c: copy: "zap" @@ -1925,11 +1939,13 @@ synthetic_source with copy_to pointing inside dynamic object: - do: search: index: test + sort: name body: docvalue_fields: [ "c.copy.keyword" ] - match: hits.hits.0._source: + name: "A" k: "hello" - match: hits.hits.0.fields: @@ -1937,6 +1953,7 @@ synthetic_source with copy_to pointing inside dynamic object: - match: hits.hits.1._source: + name: "B" k: ["55", "66"] - match: hits.hits.1.fields: @@ -1944,6 +1961,7 @@ synthetic_source with copy_to pointing inside dynamic object: - match: hits.hits.2._source: + name: "C" k: "hello" c: copy: "zap" From cbe2faead8185d3347a437832ce7fb47e826546a Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:05:57 +0200 Subject: [PATCH 06/58] fix typos (#113329) (#113400) Co-authored-by: Pm Ching <41728178+pionCham@users.noreply.github.com> --- docs/internal/DistributedArchitectureGuide.md | 4 ++-- docs/plugins/development/creating-classic-plugins.asciidoc | 2 +- docs/reference/commands/cli-jvm-options.asciidoc | 2 +- docs/reference/connector/apis/connector-apis.asciidoc | 2 +- docs/reference/settings/security-settings.asciidoc | 2 +- .../troubleshooting/snapshot/corrupt-repository.asciidoc | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/internal/DistributedArchitectureGuide.md b/docs/internal/DistributedArchitectureGuide.md index 732e2e7be46fa..0114be68b9be2 100644 --- a/docs/internal/DistributedArchitectureGuide.md +++ b/docs/internal/DistributedArchitectureGuide.md @@ -252,7 +252,7 @@ changes. The cloud service will add more resources to the cluster based on Elast Elasticsearch by itself cannot automatically scale. Autoscaling recommendations are tailored for the user [based on user defined policies][], composed of data -roles (hot, frozen, etc) and [deciders][]. There's a public [webinar on autoscaling][], as well as the +roles (hot, frozen, etc.) and [deciders][]. There's a public [webinar on autoscaling][], as well as the public [Autoscaling APIs] docs. Autoscaling's current implementation is based primary on storage requirements, as well as memory capacity @@ -332,7 +332,7 @@ problems in the cluster. It uses [an algorithm defined here][]. Some examples ar [an algorithm defined here]: https://github.com/elastic/elasticsearch/blob/v8.13.2/x-pack/plugin/autoscaling/src/main/java/org/elasticsearch/xpack/autoscaling/storage/ReactiveStorageDeciderService.java#L158-L176 The `ProactiveStorageDeciderService` maintains a forecast window that [defaults to 30 minutes][]. It only -runs on data streams (ILM, rollover, etc), not regular indexes. It looks at past [index changes][] that +runs on data streams (ILM, rollover, etc.), not regular indexes. It looks at past [index changes][] that took place within the forecast window to [predict][] resources that will be needed shortly. [defaults to 30 minutes]: https://github.com/elastic/elasticsearch/blob/v8.13.2/x-pack/plugin/autoscaling/src/main/java/org/elasticsearch/xpack/autoscaling/storage/ProactiveStorageDeciderService.java#L32 diff --git a/docs/plugins/development/creating-classic-plugins.asciidoc b/docs/plugins/development/creating-classic-plugins.asciidoc index cc03ad51275fa..58dc00e496c2d 100644 --- a/docs/plugins/development/creating-classic-plugins.asciidoc +++ b/docs/plugins/development/creating-classic-plugins.asciidoc @@ -18,7 +18,7 @@ will refuse to start in the presence of plugins with the incorrect [discrete] ==== Classic plugin file structure -Classis plugins are ZIP files composed of JAR files and +Classic plugins are ZIP files composed of JAR files and <>, a Java properties file that describes the plugin. diff --git a/docs/reference/commands/cli-jvm-options.asciidoc b/docs/reference/commands/cli-jvm-options.asciidoc index 546884f428c12..0428ead60b626 100644 --- a/docs/reference/commands/cli-jvm-options.asciidoc +++ b/docs/reference/commands/cli-jvm-options.asciidoc @@ -3,7 +3,7 @@ ==== JVM options CLI tools run with 64MB of heap. For most tools, this value is fine. However, if -needed this can be overriden by setting the `CLI_JAVA_OPTS` environment variable. +needed this can be overridden by setting the `CLI_JAVA_OPTS` environment variable. For example, the following increases the heap size used by the `pass:a[elasticsearch-{tool-name}]` tool to 1GB. diff --git a/docs/reference/connector/apis/connector-apis.asciidoc b/docs/reference/connector/apis/connector-apis.asciidoc index 987f82f6b4ce4..3de4483adcfd1 100644 --- a/docs/reference/connector/apis/connector-apis.asciidoc +++ b/docs/reference/connector/apis/connector-apis.asciidoc @@ -82,7 +82,7 @@ beta:[] preview::[] -*Connector Service APIs* are a subset of Connector API endpoints, that represent framework-level operations defined in the https://github.com/elastic/connectors/blob/main/docs/CONNECTOR_PROTOCOL.md[Connector Protocol]. These APIs are not intended for direct connector management by users but are there to support the implementation of services that utilize the Conector Protocol to communicate with {es}. +*Connector Service APIs* are a subset of Connector API endpoints, that represent framework-level operations defined in the https://github.com/elastic/connectors/blob/main/docs/CONNECTOR_PROTOCOL.md[Connector Protocol]. These APIs are not intended for direct connector management by users but are there to support the implementation of services that utilize the Connector Protocol to communicate with {es}. [TIP] ==== diff --git a/docs/reference/settings/security-settings.asciidoc b/docs/reference/settings/security-settings.asciidoc index 7dd9d0574638c..0fc4d59e72350 100644 --- a/docs/reference/settings/security-settings.asciidoc +++ b/docs/reference/settings/security-settings.asciidoc @@ -1990,7 +1990,7 @@ idle for more than the specified timeout. The server can also set the `Keep-Alive` HTTP response header. The effective time-to-live value is the smaller value between this setting and the `Keep-Alive` -reponse header. Configure this setting to `-1` to let the server dictate the value. +response header. Configure this setting to `-1` to let the server dictate the value. If the header is not set by the server and the setting has value of `-1`, the time-to-live is infinite and connections never expire. // end::oidc-http-connection-pool-ttl-tag[] diff --git a/docs/reference/tab-widgets/troubleshooting/snapshot/corrupt-repository.asciidoc b/docs/reference/tab-widgets/troubleshooting/snapshot/corrupt-repository.asciidoc index b2e864aab6db9..942b0f6ba21a6 100644 --- a/docs/reference/tab-widgets/troubleshooting/snapshot/corrupt-repository.asciidoc +++ b/docs/reference/tab-widgets/troubleshooting/snapshot/corrupt-repository.asciidoc @@ -71,7 +71,7 @@ GET _snapshot/my-repo ---- // TEST[skip:we're not setting up repos in these tests] + -The reponse will look like this: +The response will look like this: + [source,console-result] ---- From f849aed0d585a7954fd18f74a5d178708382a061 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 23 Sep 2024 17:06:28 +0100 Subject: [PATCH 07/58] Make `PutMappingClusterStateUpdateRequest` a record (#113352) (#113392) No need to extend `IndicesClusterStateUpdateRequest`, this thing can be completely immutable. --- .../PutMappingClusterStateUpdateRequest.java | 42 ++++++++------- .../put/TransportPutMappingAction.java | 11 ++-- .../metadata/MetadataMappingServiceTests.java | 42 +++++++++++---- .../SemanticTextClusterMetadataTests.java | 53 +++++++++++-------- 4 files changed, 92 insertions(+), 56 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/PutMappingClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/PutMappingClusterStateUpdateRequest.java index 8a75faf36c58c..2c469f4cae490 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/PutMappingClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/PutMappingClusterStateUpdateRequest.java @@ -9,33 +9,37 @@ package org.elasticsearch.action.admin.indices.mapping.put; -import org.elasticsearch.cluster.ack.IndicesClusterStateUpdateRequest; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.Index; import java.io.IOException; +import java.util.Objects; /** * Cluster state update request that allows to put a mapping */ -public class PutMappingClusterStateUpdateRequest extends IndicesClusterStateUpdateRequest { - - private final CompressedXContent source; - private boolean autoUpdate; - - public PutMappingClusterStateUpdateRequest(String source) throws IOException { - this.source = CompressedXContent.fromJSON(source); - } - - public CompressedXContent source() { - return source; - } - - public PutMappingClusterStateUpdateRequest autoUpdate(boolean autoUpdate) { - this.autoUpdate = autoUpdate; - return this; +public record PutMappingClusterStateUpdateRequest( + TimeValue masterNodeTimeout, + TimeValue ackTimeout, + CompressedXContent source, + boolean autoUpdate, + Index[] indices +) { + public PutMappingClusterStateUpdateRequest { + Objects.requireNonNull(masterNodeTimeout); + Objects.requireNonNull(ackTimeout); + Objects.requireNonNull(source); + Objects.requireNonNull(indices); } - public boolean autoUpdate() { - return autoUpdate; + public PutMappingClusterStateUpdateRequest( + TimeValue masterNodeTimeout, + TimeValue ackTimeout, + String source, + boolean autoUpdate, + Index... indices + ) throws IOException { + this(masterNodeTimeout, ackTimeout, CompressedXContent.fromJSON(source), autoUpdate, indices); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/TransportPutMappingAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/TransportPutMappingAction.java index bb305c0d827fd..749470e181deb 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/TransportPutMappingAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/mapping/put/TransportPutMappingAction.java @@ -157,10 +157,13 @@ static void performMappingUpdate( }); final PutMappingClusterStateUpdateRequest updateRequest; try { - updateRequest = new PutMappingClusterStateUpdateRequest(request.source()).indices(concreteIndices) - .ackTimeout(request.ackTimeout()) - .masterNodeTimeout(request.masterNodeTimeout()) - .autoUpdate(autoUpdate); + updateRequest = new PutMappingClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + request.source(), + autoUpdate, + concreteIndices + ); } catch (IOException e) { wrappedListener.onFailure(e); return; diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataMappingServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataMappingServiceTests.java index 75c14143ea269..111f0db8250bf 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataMappingServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataMappingServiceTests.java @@ -14,7 +14,6 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.cluster.service.ClusterStateTaskExecutorUtils; import org.elasticsearch.common.compress.CompressedXContent; -import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.plugins.Plugin; @@ -43,9 +42,14 @@ public void testMappingClusterStateUpdateDoesntChangeExistingIndices() throws Ex final MetadataMappingService.PutMappingExecutor putMappingExecutor = mappingService.new PutMappingExecutor(); final ClusterService clusterService = getInstanceFromNode(ClusterService.class); // TODO - it will be nice to get a random mapping generator - final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest(""" - { "properties": { "field": { "type": "text" }}}"""); - request.indices(new Index[] { indexService.index() }); + final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + """ + { "properties": { "field": { "type": "text" }}}""", + false, + indexService.index() + ); final var resultingState = ClusterStateTaskExecutorUtils.executeAndAssertSuccessful( clusterService.state(), putMappingExecutor, @@ -66,8 +70,14 @@ public void testClusterStateIsNotChangedWithIdenticalMappings() throws Exception final MetadataMappingService mappingService = getInstanceFromNode(MetadataMappingService.class); final MetadataMappingService.PutMappingExecutor putMappingExecutor = mappingService.new PutMappingExecutor(); final ClusterService clusterService = getInstanceFromNode(ClusterService.class); - final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest(""" - { "properties": { "field": { "type": "text" }}}""").indices(new Index[] { indexService.index() }); + final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + """ + { "properties": { "field": { "type": "text" }}}""", + false, + indexService.index() + ); final var resultingState1 = ClusterStateTaskExecutorUtils.executeAndAssertSuccessful( clusterService.state(), putMappingExecutor, @@ -87,9 +97,14 @@ public void testMappingVersion() throws Exception { final MetadataMappingService mappingService = getInstanceFromNode(MetadataMappingService.class); final MetadataMappingService.PutMappingExecutor putMappingExecutor = mappingService.new PutMappingExecutor(); final ClusterService clusterService = getInstanceFromNode(ClusterService.class); - final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest(""" - { "properties": { "field": { "type": "text" }}}"""); - request.indices(new Index[] { indexService.index() }); + final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + """ + { "properties": { "field": { "type": "text" }}}""", + false, + indexService.index() + ); final var resultingState = ClusterStateTaskExecutorUtils.executeAndAssertSuccessful( clusterService.state(), putMappingExecutor, @@ -105,8 +120,13 @@ public void testMappingVersionUnchanged() throws Exception { final MetadataMappingService mappingService = getInstanceFromNode(MetadataMappingService.class); final MetadataMappingService.PutMappingExecutor putMappingExecutor = mappingService.new PutMappingExecutor(); final ClusterService clusterService = getInstanceFromNode(ClusterService.class); - final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest("{ \"properties\": {}}"); - request.indices(new Index[] { indexService.index() }); + final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + "{ \"properties\": {}}", + false, + indexService.index() + ); final var resultingState = ClusterStateTaskExecutorUtils.executeAndAssertSuccessful( clusterService.state(), putMappingExecutor, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/cluster/metadata/SemanticTextClusterMetadataTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/cluster/metadata/SemanticTextClusterMetadataTests.java index 1c4a2f561ad4a..bfec2d5ac3484 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/cluster/metadata/SemanticTextClusterMetadataTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/cluster/metadata/SemanticTextClusterMetadataTests.java @@ -11,7 +11,6 @@ import org.elasticsearch.action.admin.indices.mapping.put.PutMappingClusterStateUpdateRequest; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.cluster.service.ClusterStateTaskExecutorUtils; -import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; @@ -46,9 +45,14 @@ public void testSingleSourceSemanticTextField() throws Exception { final MetadataMappingService.PutMappingExecutor putMappingExecutor = mappingService.new PutMappingExecutor(); final ClusterService clusterService = getInstanceFromNode(ClusterService.class); - final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest(""" - { "properties": { "field": { "type": "semantic_text", "inference_id": "test_model" }}}"""); - request.indices(new Index[] { indexService.index() }); + final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + """ + { "properties": { "field": { "type": "semantic_text", "inference_id": "test_model" }}}""", + false, + indexService.index() + ); final var resultingState = ClusterStateTaskExecutorUtils.executeAndAssertSuccessful( clusterService.state(), putMappingExecutor, @@ -63,25 +67,30 @@ public void testCopyToSemanticTextField() throws Exception { final MetadataMappingService.PutMappingExecutor putMappingExecutor = mappingService.new PutMappingExecutor(); final ClusterService clusterService = getInstanceFromNode(ClusterService.class); - final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest(""" - { - "properties": { - "semantic": { - "type": "semantic_text", - "inference_id": "test_model" - }, - "copy_origin_1": { - "type": "text", - "copy_to": "semantic" - }, - "copy_origin_2": { - "type": "text", - "copy_to": "semantic" + final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + """ + { + "properties": { + "semantic": { + "type": "semantic_text", + "inference_id": "test_model" + }, + "copy_origin_1": { + "type": "text", + "copy_to": "semantic" + }, + "copy_origin_2": { + "type": "text", + "copy_to": "semantic" + } + } } - } - } - """); - request.indices(new Index[] { indexService.index() }); + """, + false, + indexService.index() + ); final var resultingState = ClusterStateTaskExecutorUtils.executeAndAssertSuccessful( clusterService.state(), putMappingExecutor, From 2fac37dd6894b673c4cb3f9ab35f0db5ac51b34e Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:20:36 +0200 Subject: [PATCH 08/58] fix typos of docs/plugins (#113348) (#113404) Co-authored-by: YeonghyeonKo <46114393+YeonghyeonKO@users.noreply.github.com> --- docs/plugins/analysis-icu.asciidoc | 4 +- docs/plugins/analysis-kuromoji.asciidoc | 4 +- docs/plugins/analysis-nori.asciidoc | 2 +- .../creating-stable-plugins.asciidoc | 50 +++++++++---------- docs/plugins/discovery-azure-classic.asciidoc | 2 +- docs/plugins/discovery-gce.asciidoc | 2 +- docs/plugins/integrations.asciidoc | 4 +- docs/plugins/mapper-annotated-text.asciidoc | 2 +- docs/plugins/store-smb.asciidoc | 4 +- 9 files changed, 37 insertions(+), 37 deletions(-) diff --git a/docs/plugins/analysis-icu.asciidoc b/docs/plugins/analysis-icu.asciidoc index f6ca6ceae7ea4..da7efd2843f50 100644 --- a/docs/plugins/analysis-icu.asciidoc +++ b/docs/plugins/analysis-icu.asciidoc @@ -380,7 +380,7 @@ GET /my-index-000001/_search <3> -------------------------- -<1> The `name` field uses the `standard` analyzer, and so support full text queries. +<1> The `name` field uses the `standard` analyzer, and so supports full text queries. <2> The `name.sort` field is an `icu_collation_keyword` field that will preserve the name as a single token doc values, and applies the German ``phonebook'' order. <3> An example query which searches the `name` field and sorts on the `name.sort` field. @@ -467,7 +467,7 @@ differences. `case_first`:: Possible values: `lower` or `upper`. Useful to control which case is sorted -first when case is not ignored for strength `tertiary`. The default depends on +first when the case is not ignored for strength `tertiary`. The default depends on the collation. `numeric`:: diff --git a/docs/plugins/analysis-kuromoji.asciidoc b/docs/plugins/analysis-kuromoji.asciidoc index b1d1d5a751057..fa6229b9f20e8 100644 --- a/docs/plugins/analysis-kuromoji.asciidoc +++ b/docs/plugins/analysis-kuromoji.asciidoc @@ -86,7 +86,7 @@ The `kuromoji_iteration_mark` normalizes Japanese horizontal iteration marks `normalize_kanji`:: - Indicates whether kanji iteration marks should be normalize. Defaults to `true`. + Indicates whether kanji iteration marks should be normalized. Defaults to `true`. `normalize_kana`:: @@ -189,7 +189,7 @@ PUT kuromoji_sample + -- Additional expert user parameters `nbest_cost` and `nbest_examples` can be used -to include additional tokens that most likely according to the statistical model. +to include additional tokens that are most likely according to the statistical model. If both parameters are used, the largest number of both is applied. `nbest_cost`:: diff --git a/docs/plugins/analysis-nori.asciidoc b/docs/plugins/analysis-nori.asciidoc index 1a3153fa3bea5..369268bcef0cd 100644 --- a/docs/plugins/analysis-nori.asciidoc +++ b/docs/plugins/analysis-nori.asciidoc @@ -447,7 +447,7 @@ Which responds with: The `nori_number` token filter normalizes Korean numbers to regular Arabic decimal numbers in half-width characters. -Korean numbers are often written using a combination of Hangul and Arabic numbers with various kinds punctuation. +Korean numbers are often written using a combination of Hangul and Arabic numbers with various kinds of punctuation. For example, 3.2천 means 3200. This filter does this kind of normalization and allows a search for 3200 to match 3.2천 in text, but can also be used to make range facets based on the normalized numbers and so on. diff --git a/docs/plugins/development/creating-stable-plugins.asciidoc b/docs/plugins/development/creating-stable-plugins.asciidoc index c9a8a1f6c7e2a..9f98774b5a761 100644 --- a/docs/plugins/development/creating-stable-plugins.asciidoc +++ b/docs/plugins/development/creating-stable-plugins.asciidoc @@ -1,8 +1,8 @@ [[creating-stable-plugins]] === Creating text analysis plugins with the stable plugin API -Text analysis plugins provide {es} with custom {ref}/analysis.html[Lucene -analyzers, token filters, character filters, and tokenizers]. +Text analysis plugins provide {es} with custom {ref}/analysis.html[Lucene +analyzers, token filters, character filters, and tokenizers]. [discrete] ==== The stable plugin API @@ -10,7 +10,7 @@ analyzers, token filters, character filters, and tokenizers]. Text analysis plugins can be developed against the stable plugin API. This API consists of the following dependencies: -* `plugin-api` - an API used by plugin developers to implement custom {es} +* `plugin-api` - an API used by plugin developers to implement custom {es} plugins. * `plugin-analysis-api` - an API used by plugin developers to implement analysis plugins and integrate them into {es}. @@ -18,7 +18,7 @@ plugins and integrate them into {es}. core Lucene analysis interfaces like `Tokenizer`, `Analyzer`, and `TokenStream`. For new versions of {es} within the same major version, plugins built against -this API do not need to be recompiled. Future versions of the API will be +this API does not need to be recompiled. Future versions of the API will be backwards compatible and plugins are binary compatible with future versions of {es}. In other words, once you have a working artifact, you can re-use it when you upgrade {es} to a new bugfix or minor version. @@ -48,9 +48,9 @@ require code changes. Stable plugins are ZIP files composed of JAR files and two metadata files: -* `stable-plugin-descriptor.properties` - a Java properties file that describes +* `stable-plugin-descriptor.properties` - a Java properties file that describes the plugin. Refer to <>. -* `named_components.json` - a JSON file mapping interfaces to key-value pairs +* `named_components.json` - a JSON file mapping interfaces to key-value pairs of component names and implementation classes. Note that only JAR files at the root of the plugin are added to the classpath @@ -65,7 +65,7 @@ you use this plugin. However, you don't need Gradle to create plugins. The {es} Github repository contains {es-repo}tree/main/plugins/examples/stable-analysis[an example analysis plugin]. -The example `build.gradle` build script provides a good starting point for +The example `build.gradle` build script provides a good starting point for developing your own plugin. [discrete] @@ -77,29 +77,29 @@ Plugins are written in Java, so you need to install a Java Development Kit [discrete] ===== Step by step -. Create a directory for your project. +. Create a directory for your project. . Copy the example `build.gradle` build script to your project directory. Note that this build script uses the `elasticsearch.stable-esplugin` gradle plugin to build your plugin. . Edit the `build.gradle` build script: -** Add a definition for the `pluginApiVersion` and matching `luceneVersion` -variables to the top of the file. You can find these versions in the -`build-tools-internal/version.properties` file in the {es-repo}[Elasticsearch +** Add a definition for the `pluginApiVersion` and matching `luceneVersion` +variables to the top of the file. You can find these versions in the +`build-tools-internal/version.properties` file in the {es-repo}[Elasticsearch Github repository]. -** Edit the `name` and `description` in the `esplugin` section of the build -script. This will create the plugin descriptor file. If you're not using the -`elasticsearch.stable-esplugin` gradle plugin, refer to +** Edit the `name` and `description` in the `esplugin` section of the build +script. This will create the plugin descriptor file. If you're not using the +`elasticsearch.stable-esplugin` gradle plugin, refer to <> to create the file manually. ** Add module information. -** Ensure you have declared the following compile-time dependencies. These -dependencies are compile-time only because {es} will provide these libraries at +** Ensure you have declared the following compile-time dependencies. These +dependencies are compile-time only because {es} will provide these libraries at runtime. *** `org.elasticsearch.plugin:elasticsearch-plugin-api` *** `org.elasticsearch.plugin:elasticsearch-plugin-analysis-api` *** `org.apache.lucene:lucene-analysis-common` -** For unit testing, ensure these dependencies have also been added to the +** For unit testing, ensure these dependencies have also been added to the `build.gradle` script as `testImplementation` dependencies. -. Implement an interface from the analysis plugin API, annotating it with +. Implement an interface from the analysis plugin API, annotating it with `NamedComponent`. Refer to <> for an example. . You should now be able to assemble a plugin ZIP file by running: + @@ -107,22 +107,22 @@ runtime. ---- gradle bundlePlugin ---- -The resulting plugin ZIP file is written to the `build/distributions` +The resulting plugin ZIP file is written to the `build/distributions` directory. [discrete] ===== YAML REST tests -The Gradle `elasticsearch.yaml-rest-test` plugin enables testing of your -plugin using the {es-repo}blob/main/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc[{es} yamlRestTest framework]. +The Gradle `elasticsearch.yaml-rest-test` plugin enables testing of your +plugin using the {es-repo}blob/main/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc[{es} yamlRestTest framework]. These tests use a YAML-formatted domain language to issue REST requests against -an internal {es} cluster that has your plugin installed, and to check the -results of those requests. The structure of a YAML REST test directory is as +an internal {es} cluster that has your plugin installed, and to check the +results of those requests. The structure of a YAML REST test directory is as follows: -* A test suite class, defined under `src/yamlRestTest/java`. This class should +* A test suite class, defined under `src/yamlRestTest/java`. This class should extend `ESClientYamlSuiteTestCase`. -* The YAML tests themselves should be defined under +* The YAML tests themselves should be defined under `src/yamlRestTest/resources/test/`. [[plugin-descriptor-file-stable]] diff --git a/docs/plugins/discovery-azure-classic.asciidoc b/docs/plugins/discovery-azure-classic.asciidoc index aa710a2fe7ef9..b8d37f024172c 100644 --- a/docs/plugins/discovery-azure-classic.asciidoc +++ b/docs/plugins/discovery-azure-classic.asciidoc @@ -148,7 +148,7 @@ Before starting, you need to have: -- You should follow http://azure.microsoft.com/en-us/documentation/articles/linux-use-ssh-key/[this guide] to learn -how to create or use existing SSH keys. If you have already did it, you can skip the following. +how to create or use existing SSH keys. If you have already done it, you can skip the following. Here is a description on how to generate SSH keys using `openssl`: diff --git a/docs/plugins/discovery-gce.asciidoc b/docs/plugins/discovery-gce.asciidoc index 2e8cff21208e0..0a2629b7f094b 100644 --- a/docs/plugins/discovery-gce.asciidoc +++ b/docs/plugins/discovery-gce.asciidoc @@ -478,7 +478,7 @@ discovery: seed_providers: gce -------------------------------------------------- -Replaces `project_id` and `zone` with your settings. +Replace `project_id` and `zone` with your settings. To run test: diff --git a/docs/plugins/integrations.asciidoc b/docs/plugins/integrations.asciidoc index 71f237692ad35..aff4aed0becd2 100644 --- a/docs/plugins/integrations.asciidoc +++ b/docs/plugins/integrations.asciidoc @@ -91,7 +91,7 @@ Integrations are not plugins, but are external tools or modules that make it eas Elasticsearch Grails plugin. * https://hibernate.org/search/[Hibernate Search] - Integration with Hibernate ORM, from the Hibernate team. Automatic synchronization of write operations, yet exposes full Elasticsearch capabilities for queries. Can return either Elasticsearch native or re-map queries back into managed entities loaded within transaction from the reference database. + Integration with Hibernate ORM, from the Hibernate team. Automatic synchronization of write operations, yet exposes full Elasticsearch capabilities for queries. Can return either Elasticsearch native or re-map queries back into managed entities loaded within transactions from the reference database. * https://github.com/spring-projects/spring-data-elasticsearch[Spring Data Elasticsearch]: Spring Data implementation for Elasticsearch @@ -104,7 +104,7 @@ Integrations are not plugins, but are external tools or modules that make it eas * https://pulsar.apache.org/docs/en/io-elasticsearch[Apache Pulsar]: The Elasticsearch Sink Connector is used to pull messages from Pulsar topics - and persist the messages to a index. + and persist the messages to an index. * https://micronaut-projects.github.io/micronaut-elasticsearch/latest/guide/index.html[Micronaut Elasticsearch Integration]: Integration of Micronaut with Elasticsearch diff --git a/docs/plugins/mapper-annotated-text.asciidoc b/docs/plugins/mapper-annotated-text.asciidoc index afe8ba41da9b8..e4141e98a2285 100644 --- a/docs/plugins/mapper-annotated-text.asciidoc +++ b/docs/plugins/mapper-annotated-text.asciidoc @@ -143,7 +143,7 @@ broader positional queries e.g. finding mentions of a `Guitarist` near to `strat WARNING: Any use of `=` signs in annotation values eg `[Prince](person=Prince)` will cause the document to be rejected with a parse failure. In future we hope to have a use for -the equals signs so wil actively reject documents that contain this today. +the equals signs so will actively reject documents that contain this today. [[annotated-text-synthetic-source]] ===== Synthetic `_source` diff --git a/docs/plugins/store-smb.asciidoc b/docs/plugins/store-smb.asciidoc index 8557ef868010f..da803b4f42022 100644 --- a/docs/plugins/store-smb.asciidoc +++ b/docs/plugins/store-smb.asciidoc @@ -10,7 +10,7 @@ include::install_remove.asciidoc[] ==== Working around a bug in Windows SMB and Java on windows When using a shared file system based on the SMB protocol (like Azure File Service) to store indices, the way Lucene -open index segment files is with a write only flag. This is the _correct_ way to open the files, as they will only be +opens index segment files is with a write only flag. This is the _correct_ way to open the files, as they will only be used for writes and allows different FS implementations to optimize for it. Sadly, in windows with SMB, this disables the cache manager, causing writes to be slow. This has been described in https://issues.apache.org/jira/browse/LUCENE-6176[LUCENE-6176], but it affects each and every Java program out there!. @@ -44,7 +44,7 @@ This can be configured for all indices by adding this to the `elasticsearch.yml` index.store.type: smb_nio_fs ---- -Note that setting will be applied for newly created indices. +Note that settings will be applied for newly created indices. It can also be set on a per-index basis at index creation time: From 1ceec4b638694510c69a003fdc0af08dd8640c54 Mon Sep 17 00:00:00 2001 From: Stef Nestor <26751266+stefnestor@users.noreply.github.com> Date: Mon, 23 Sep 2024 10:40:54 -0600 Subject: [PATCH 09/58] (logger) change from error to warn for short circuiting user (#112895) (#113135) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 👋🏽 howdy, team! I believe [this](https://github.com/elastic/elasticsearch/blob/main/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/NativeUsersStore.java#L233) "error" by itself usually self-resolves and does not necessarily guarantee any remediation is required. I would like to request shifting the `logger` from `error` over to `warn` to reflect this. ```java logger.error("security index is unavailable. short circuiting retrieval of user [{}]", user); ``` Introduced [here](https://github.com/elastic/elasticsearch/pull/34568). Sample confirming when action is necessary, per [example Discuss](https://discuss.elastic.co/t/security-index-is-unavailable/314824) a sister log would induce an error message with problem called out. (Social note: For 24h analysis of ESS ES Cluster Logs, this was the 10th top "error" at 5% of error volume. I'd like to remove noise where errors aren't actionable so we can better monitor error trends.) 🙏 TIA! Co-authored-by: Elastic Machine --- docs/changelog/112895.yaml | 5 +++++ .../xpack/security/authc/esnative/NativeUsersStore.java | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/112895.yaml diff --git a/docs/changelog/112895.yaml b/docs/changelog/112895.yaml new file mode 100644 index 0000000000000..59d391f649280 --- /dev/null +++ b/docs/changelog/112895.yaml @@ -0,0 +1,5 @@ +pr: 112895 +summary: (logger) change from error to warn for short circuiting user +area: Security +type: enhancement +issues: [] diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/NativeUsersStore.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/NativeUsersStore.java index 698cda1683a20..d866bd2a9d229 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/NativeUsersStore.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/NativeUsersStore.java @@ -230,7 +230,7 @@ private void getUserAndPassword(final String user, final ActionListener Date: Mon, 23 Sep 2024 11:43:42 -0500 Subject: [PATCH 10/58] Deduplicate Nori and Kuromoji User Dictionary (#112768) (#113401) added the ability to deduplicate the user dictionary optionally --- docs/changelog/112768.yaml | 5 ++ docs/plugins/analysis-kuromoji.asciidoc | 8 +- docs/plugins/analysis-nori.asciidoc | 9 +- .../kuromoji/KuromojiTokenizerFactory.java | 12 ++- .../kuromoji/KuromojiAnalysisTests.java | 21 ++++- .../analysis/nori/NoriTokenizerFactory.java | 4 +- .../analysis/nori/NoriAnalysisTests.java | 16 +++- .../index/analysis/Analysis.java | 40 ++++++--- .../index/analysis/AnalysisTests.java | 89 +++++++++++++++++++ 9 files changed, 186 insertions(+), 18 deletions(-) create mode 100644 docs/changelog/112768.yaml diff --git a/docs/changelog/112768.yaml b/docs/changelog/112768.yaml new file mode 100644 index 0000000000000..13d5b8eaae38f --- /dev/null +++ b/docs/changelog/112768.yaml @@ -0,0 +1,5 @@ +pr: 112768 +summary: Deduplicate Kuromoji User Dictionary +area: Search +type: enhancement +issues: [] diff --git a/docs/plugins/analysis-kuromoji.asciidoc b/docs/plugins/analysis-kuromoji.asciidoc index fa6229b9f20e8..0a167bf3f0240 100644 --- a/docs/plugins/analysis-kuromoji.asciidoc +++ b/docs/plugins/analysis-kuromoji.asciidoc @@ -133,6 +133,11 @@ unknown words. It can be set to: Whether punctuation should be discarded from the output. Defaults to `true`. +`lenient`:: + + Whether the `user_dictionary` should be deduplicated on the provided `text`. + False by default causing duplicates to generate an error. + `user_dictionary`:: + -- @@ -221,7 +226,8 @@ PUT kuromoji_sample "type": "kuromoji_tokenizer", "mode": "extended", "discard_punctuation": "false", - "user_dictionary": "userdict_ja.txt" + "user_dictionary": "userdict_ja.txt", + "lenient": "true" } }, "analyzer": { diff --git a/docs/plugins/analysis-nori.asciidoc b/docs/plugins/analysis-nori.asciidoc index 369268bcef0cd..02980a4ed8a8c 100644 --- a/docs/plugins/analysis-nori.asciidoc +++ b/docs/plugins/analysis-nori.asciidoc @@ -58,6 +58,11 @@ It can be set to: Whether punctuation should be discarded from the output. Defaults to `true`. +`lenient`:: + + Whether the `user_dictionary` should be deduplicated on the provided `text`. + False by default causing duplicates to generate an error. + `user_dictionary`:: + -- @@ -104,7 +109,8 @@ PUT nori_sample "type": "nori_tokenizer", "decompound_mode": "mixed", "discard_punctuation": "false", - "user_dictionary": "userdict_ko.txt" + "user_dictionary": "userdict_ko.txt", + "lenient": "true" } }, "analyzer": { @@ -299,7 +305,6 @@ Which responds with: } -------------------------------------------------- - [[analysis-nori-speech]] ==== `nori_part_of_speech` token filter diff --git a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/KuromojiTokenizerFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/KuromojiTokenizerFactory.java index a7fa63709d580..edb29a8f4c98e 100644 --- a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/KuromojiTokenizerFactory.java +++ b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/KuromojiTokenizerFactory.java @@ -33,6 +33,7 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { private static final String NBEST_COST = "nbest_cost"; private static final String NBEST_EXAMPLES = "nbest_examples"; private static final String DISCARD_COMPOUND_TOKEN = "discard_compound_token"; + private static final String LENIENT = "lenient"; private final UserDictionary userDictionary; private final Mode mode; @@ -58,7 +59,15 @@ public static UserDictionary getUserDictionary(Environment env, Settings setting "It is not allowed to use [" + USER_DICT_PATH_OPTION + "] in conjunction" + " with [" + USER_DICT_RULES_OPTION + "]" ); } - List ruleList = Analysis.getWordList(env, settings, USER_DICT_PATH_OPTION, USER_DICT_RULES_OPTION, false, true); + List ruleList = Analysis.getWordList( + env, + settings, + USER_DICT_PATH_OPTION, + USER_DICT_RULES_OPTION, + LENIENT, + false, // typically don't want to remove comments as deduplication will provide better feedback + true + ); if (ruleList == null || ruleList.isEmpty()) { return null; } @@ -66,6 +75,7 @@ public static UserDictionary getUserDictionary(Environment env, Settings setting for (String line : ruleList) { sb.append(line).append(System.lineSeparator()); } + try (Reader rulesReader = new StringReader(sb.toString())) { return UserDictionary.open(rulesReader); } catch (IOException e) { diff --git a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/plugin/analysis/kuromoji/KuromojiAnalysisTests.java b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/plugin/analysis/kuromoji/KuromojiAnalysisTests.java index 1229b4f348911..f26213d86c5a9 100644 --- a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/plugin/analysis/kuromoji/KuromojiAnalysisTests.java +++ b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/plugin/analysis/kuromoji/KuromojiAnalysisTests.java @@ -445,7 +445,26 @@ public void testKuromojiAnalyzerDuplicateUserDictRule() throws Exception { ) .build(); IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings)); - assertThat(exc.getMessage(), containsString("[制限スピード] in user dictionary at line [3]")); + assertThat(exc.getMessage(), containsString("[制限スピード] in user dictionary at line [4]")); + } + + public void testKuromojiAnalyzerDuplicateUserDictRuleDeduplication() throws Exception { + Settings settings = Settings.builder() + .put("index.analysis.analyzer.my_analyzer.type", "kuromoji") + .put("index.analysis.analyzer.my_analyzer.lenient", "true") + .putList( + "index.analysis.analyzer.my_analyzer.user_dictionary_rules", + "c++,c++,w,w", + "#comment", + "制限スピード,制限スピード,セイゲンスピード,テスト名詞", + "制限スピード,制限スピード,セイゲンスピード,テスト名詞" + ) + .build(); + TestAnalysis analysis = createTestAnalysis(settings); + Analyzer analyzer = analysis.indexAnalyzers.get("my_analyzer"); + try (TokenStream stream = analyzer.tokenStream("", "制限スピード")) { + assertTokenStreamContents(stream, new String[] { "制限スピード" }); + } } public void testDiscardCompoundToken() throws Exception { diff --git a/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/NoriTokenizerFactory.java b/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/NoriTokenizerFactory.java index 8bc53fa69c9a7..ed8458bc94043 100644 --- a/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/NoriTokenizerFactory.java +++ b/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/NoriTokenizerFactory.java @@ -31,6 +31,7 @@ public class NoriTokenizerFactory extends AbstractTokenizerFactory { private static final String USER_DICT_PATH_OPTION = "user_dictionary"; private static final String USER_DICT_RULES_OPTION = "user_dictionary_rules"; + private static final String LENIENT = "lenient"; private final UserDictionary userDictionary; private final KoreanTokenizer.DecompoundMode decompoundMode; @@ -54,7 +55,8 @@ public static UserDictionary getUserDictionary(Environment env, Settings setting settings, USER_DICT_PATH_OPTION, USER_DICT_RULES_OPTION, - true, + LENIENT, + false, // typically don't want to remove comments as deduplication will provide better feedback isSupportDuplicateCheck(indexSettings) ); if (ruleList == null || ruleList.isEmpty()) { diff --git a/plugins/analysis-nori/src/test/java/org/elasticsearch/plugin/analysis/nori/NoriAnalysisTests.java b/plugins/analysis-nori/src/test/java/org/elasticsearch/plugin/analysis/nori/NoriAnalysisTests.java index e1123f167da99..1709d02263eea 100644 --- a/plugins/analysis-nori/src/test/java/org/elasticsearch/plugin/analysis/nori/NoriAnalysisTests.java +++ b/plugins/analysis-nori/src/test/java/org/elasticsearch/plugin/analysis/nori/NoriAnalysisTests.java @@ -127,7 +127,7 @@ public void testNoriAnalyzerDuplicateUserDictRule() throws Exception { .build(); final IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> createTestAnalysis(settings)); - assertThat(exc.getMessage(), containsString("[세종] in user dictionary at line [3]")); + assertThat(exc.getMessage(), containsString("[세종] in user dictionary at line [4]")); } public void testNoriAnalyzerDuplicateUserDictRuleWithLegacyVersion() throws IOException { @@ -144,6 +144,20 @@ public void testNoriAnalyzerDuplicateUserDictRuleWithLegacyVersion() throws IOEx } } + public void testNoriAnalyzerDuplicateUserDictRuleDeduplication() throws Exception { + Settings settings = Settings.builder() + .put("index.analysis.analyzer.my_analyzer.type", "nori") + .put("index.analysis.analyzer.my_analyzer.lenient", "true") + .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersions.NORI_DUPLICATES) + .putList("index.analysis.analyzer.my_analyzer.user_dictionary_rules", "c++", "C쁠쁠", "세종", "세종", "세종시 세종 시") + .build(); + TestAnalysis analysis = createTestAnalysis(settings); + Analyzer analyzer = analysis.indexAnalyzers.get("my_analyzer"); + try (TokenStream stream = analyzer.tokenStream("", "세종시")) { + assertTokenStreamContents(stream, new String[] { "세종", "시" }); + } + } + public void testNoriTokenizer() throws Exception { Settings settings = Settings.builder() .put("index.analysis.tokenizer.my_tokenizer.type", "nori_tokenizer") diff --git a/server/src/main/java/org/elasticsearch/index/analysis/Analysis.java b/server/src/main/java/org/elasticsearch/index/analysis/Analysis.java index 1a90f5f110376..462490a7fceb7 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/Analysis.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/Analysis.java @@ -9,6 +9,8 @@ package org.elasticsearch.index.analysis; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; @@ -67,6 +69,7 @@ import java.security.AccessControlException; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Locale; @@ -78,6 +81,7 @@ public class Analysis { private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(Analysis.class); + private static final Logger logger = LogManager.getLogger(Analysis.class); public static void checkForDeprecatedVersion(String name, Settings settings) { String sVersion = settings.get("version"); @@ -267,12 +271,14 @@ public static List getWordList( Settings settings, String settingPath, String settingList, + String settingLenient, boolean removeComments, boolean checkDuplicate ) { + boolean deduplicateDictionary = settings.getAsBoolean(settingLenient, false); final List ruleList = getWordList(env, settings, settingPath, settingList, removeComments); if (ruleList != null && ruleList.isEmpty() == false && checkDuplicate) { - checkDuplicateRules(ruleList); + return deDuplicateRules(ruleList, deduplicateDictionary == false); } return ruleList; } @@ -288,24 +294,36 @@ public static List getWordList( * If the addition to the HashSet returns false, it means that item was already present in the set, indicating a duplicate. * In such a case, an IllegalArgumentException is thrown specifying the duplicate term and the line number in the original list. * + * Optionally the function will return the deduplicated list + * * @param ruleList The list of rules to check for duplicates. * @throws IllegalArgumentException If a duplicate rule is found. */ - private static void checkDuplicateRules(List ruleList) { - Set dup = new HashSet<>(); - int lineNum = 0; - for (String line : ruleList) { - // ignore comments + private static List deDuplicateRules(List ruleList, boolean failOnDuplicate) { + Set duplicateKeys = new HashSet<>(); + List deduplicatedList = new ArrayList<>(); + for (int lineNum = 0; lineNum < ruleList.size(); lineNum++) { + String line = ruleList.get(lineNum); + // ignore lines beginning with # as those are comments if (line.startsWith("#") == false) { String[] values = CSVUtil.parse(line); - if (dup.add(values[0]) == false) { - throw new IllegalArgumentException( - "Found duplicate term [" + values[0] + "] in user dictionary " + "at line [" + lineNum + "]" - ); + if (duplicateKeys.add(values[0]) == false) { + if (failOnDuplicate) { + throw new IllegalArgumentException( + "Found duplicate term [" + values[0] + "] in user dictionary " + "at line [" + (lineNum + 1) + "]" + ); + } else { + logger.warn("Ignoring duplicate term [" + values[0] + "] in user dictionary " + "at line [" + (lineNum + 1) + "]"); + } + } else { + deduplicatedList.add(line); } + } else { + deduplicatedList.add(line); } - ++lineNum; } + + return Collections.unmodifiableList(deduplicatedList); } private static List loadWordList(Path path, boolean removeComments) throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java b/server/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java index 86c268dd2a092..e05b67874ddbb 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/AnalysisTests.java @@ -28,6 +28,7 @@ import java.util.Arrays; import java.util.List; +import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.Matchers.is; public class AnalysisTests extends ESTestCase { @@ -104,4 +105,92 @@ public void testParseWordList() throws IOException { List wordList = Analysis.getWordList(env, nodeSettings, "foo.bar"); assertEquals(Arrays.asList("hello", "world"), wordList); } + + public void testParseDuplicates() throws IOException { + Path tempDir = createTempDir(); + Path dict = tempDir.resolve("foo.dict"); + Settings nodeSettings = Settings.builder() + .put("foo.path", tempDir.resolve(dict)) + .put("bar.list", "") + .put("soup.lenient", "true") + .put(Environment.PATH_HOME_SETTING.getKey(), tempDir) + .build(); + try (BufferedWriter writer = Files.newBufferedWriter(dict, StandardCharsets.UTF_8)) { + writer.write("# This is a test of the emergency broadcast system"); + writer.write('\n'); + writer.write("最終契約,最終契約,最終契約,カスタム名 詞"); + writer.write('\n'); + writer.write("最終契約,最終契約,最終契約,カスタム名 詞"); + writer.write('\n'); + writer.write("# This is a test of the emergency broadcast system"); + writer.write('\n'); + writer.write("最終契約,最終契約,最終契約,カスタム名 詞,extra stuff that gets discarded"); + writer.write('\n'); + } + Environment env = TestEnvironment.newEnvironment(nodeSettings); + List wordList = Analysis.getWordList(env, nodeSettings, "foo.path", "bar.list", "soup.lenient", true, true); + assertEquals(List.of("最終契約,最終契約,最終契約,カスタム名 詞"), wordList); + } + + public void testFailOnDuplicates() throws IOException { + Path tempDir = createTempDir(); + Path dict = tempDir.resolve("foo.dict"); + Settings nodeSettings = Settings.builder() + .put("foo.path", tempDir.resolve(dict)) + .put("bar.list", "") + .put("soup.lenient", "false") + .put(Environment.PATH_HOME_SETTING.getKey(), tempDir) + .build(); + try (BufferedWriter writer = Files.newBufferedWriter(dict, StandardCharsets.UTF_8)) { + writer.write("# This is a test of the emergency broadcast system"); + writer.write('\n'); + writer.write("最終契約,最終契約,最終契約,カスタム名 詞"); + writer.write('\n'); + writer.write("最終契,最終契,最終契約,カスタム名 詞"); + writer.write('\n'); + writer.write("# This is a test of the emergency broadcast system"); + writer.write('\n'); + writer.write("最終契約,最終契約,最終契約,カスタム名 詞,extra"); + writer.write('\n'); + } + Environment env = TestEnvironment.newEnvironment(nodeSettings); + IllegalArgumentException exc = expectThrows( + IllegalArgumentException.class, + () -> Analysis.getWordList(env, nodeSettings, "foo.path", "bar.list", "soup.lenient", false, true) + ); + assertThat(exc.getMessage(), containsString("[最終契約] in user dictionary at line [5]")); + } + + public void testParseDuplicatesWComments() throws IOException { + Path tempDir = createTempDir(); + Path dict = tempDir.resolve("foo.dict"); + Settings nodeSettings = Settings.builder() + .put("foo.path", tempDir.resolve(dict)) + .put("bar.list", "") + .put("soup.lenient", "true") + .put(Environment.PATH_HOME_SETTING.getKey(), tempDir) + .build(); + try (BufferedWriter writer = Files.newBufferedWriter(dict, StandardCharsets.UTF_8)) { + writer.write("# This is a test of the emergency broadcast system"); + writer.write('\n'); + writer.write("最終契約,最終契約,最終契約,カスタム名 詞"); + writer.write('\n'); + writer.write("最終契約,最終契約,最終契約,カスタム名 詞"); + writer.write('\n'); + writer.write("# This is a test of the emergency broadcast system"); + writer.write('\n'); + writer.write("最終契約,最終契約,最終契約,カスタム名 詞,extra"); + writer.write('\n'); + } + Environment env = TestEnvironment.newEnvironment(nodeSettings); + List wordList = Analysis.getWordList(env, nodeSettings, "foo.path", "bar.list", "soup.lenient", false, true); + assertEquals( + List.of( + "# This is a test of the emergency broadcast system", + "最終契約,最終契約,最終契約,カスタム名 詞", + "# This is a test of the emergency broadcast system" + ), + wordList + ); + } } From a443d47f803a8034b7c8be2d78bc8220d6612920 Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Mon, 23 Sep 2024 09:59:22 -0700 Subject: [PATCH 11/58] Fix generation of synthetic source test data for range mapper to be deterministic (#113304) (#113402) (cherry picked from commit 02084d3257a8892c3d2024d2f3a7e691d9c6bcc1) # Conflicts: # muted-tests.yml --- .../elasticsearch/index/mapper/RangeFieldMapperTests.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/RangeFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/RangeFieldMapperTests.java index 5676c5c92e5a8..351a3ee6a6098 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/RangeFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/RangeFieldMapperTests.java @@ -307,6 +307,9 @@ protected class TestRange> implements Comparable { builder.startObject(); - if (includeFrom && from == null && randomBoolean()) { + if (includeFrom && from == null && skipDefaultFrom) { // skip field entirely since it is equivalent to a default value } else { builder.field(fromKey, from); } - if (includeTo && to == null && randomBoolean()) { + if (includeTo && to == null && skipDefaultTo) { // skip field entirely since it is equivalent to a default value } else { builder.field(toKey, to); From bac208a154ea04853befcbe81ea9e0ce592c0727 Mon Sep 17 00:00:00 2001 From: Salvatore Campagna <93581129+salvatore-campagna@users.noreply.github.com> Date: Mon, 23 Sep 2024 22:16:08 +0200 Subject: [PATCH 12/58] Introduce an `ignore_above` index-level setting (#113121) (#113414) Here we introduce a new index-level setting, `ignore_above`, similar to what we have for `ignore_malformed`. The setting will apply to all `keyword`, `wildcard` and `flattened` fields. Each field mapping will still be allowed to override the index-level setting using a mapping-level `ignore_above` value. (cherry picked from commit 208a1fe5714c0e49549de7aaed7a9a847e7b4a15) --- .../mapping/params/ignore-above.asciidoc | 30 +++ .../search/530_ignore_above_stored_source.yml | 214 ++++++++++++++++++ .../540_ignore_above_synthetic_source.yml | 179 +++++++++++++++ .../test/search/550_ignore_above_invalid.yml | 63 ++++++ .../common/settings/IndexScopedSettings.java | 1 + .../elasticsearch/index/IndexSettings.java | 26 +++ .../index/mapper/KeywordFieldMapper.java | 53 +++-- .../index/mapper/MapperFeatures.java | 2 + .../flattened/FlattenedFieldMapper.java | 49 ++-- .../index/mapper/KeywordFieldTypeTests.java | 1 + .../index/mapper/MultiFieldsTests.java | 1 + .../20_ignore_above_stored_source.yml | 56 +++++ .../30_ignore_above_synthetic_source.yml | 58 +++++ .../wildcard/mapper/WildcardFieldMapper.java | 68 +++--- .../test/CoreTestTranslater.java | 24 +- 15 files changed, 762 insertions(+), 63 deletions(-) create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/530_ignore_above_stored_source.yml create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/550_ignore_above_invalid.yml create mode 100644 x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/20_ignore_above_stored_source.yml create mode 100644 x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml diff --git a/docs/reference/mapping/params/ignore-above.asciidoc b/docs/reference/mapping/params/ignore-above.asciidoc index 7d04bc82dcbb3..526f2d6205961 100644 --- a/docs/reference/mapping/params/ignore-above.asciidoc +++ b/docs/reference/mapping/params/ignore-above.asciidoc @@ -57,3 +57,33 @@ NOTE: The value for `ignore_above` is the _character count_, but Lucene counts bytes. If you use UTF-8 text with many non-ASCII characters, you may want to set the limit to `32766 / 4 = 8191` since UTF-8 characters may occupy at most 4 bytes. + +[[index-mapping-ignore-above]] +=== `index.mapping.ignore_above` + +The `ignore_above` setting, typically used at the field level, can also be applied at the index level using +`index.mapping.ignore_above`. This setting lets you define a maximum string length for all applicable fields across +the index, including `keyword`, `wildcard`, and keyword values in `flattened` fields. Any values that exceed this +limit will be ignored during indexing and won’t be stored. + +This index-wide setting ensures a consistent approach to managing excessively long values. It works the same as the +field-level setting—if a string’s length goes over the specified limit, that string won’t be indexed or stored. +When dealing with arrays, each element is evaluated separately, and only the elements that exceed the limit are ignored. + +[source,console] +-------------------------------------------------- +PUT my-index-000001 +{ + "settings": { + "index.mapping.ignore_above": 256 + } +} +-------------------------------------------------- + +In this example, all applicable fields in `my-index-000001` will ignore any strings longer than 256 characters. + +TIP: You can override this index-wide setting for specific fields by specifying a custom `ignore_above` value in the +field mapping. + +NOTE: Just like the field-level `ignore_above`, this setting only affects indexing and storage. The original values +are still available in the `_source` field if `_source` is enabled, which is the default behavior in Elasticsearch. diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/530_ignore_above_stored_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/530_ignore_above_stored_source.yml new file mode 100644 index 0000000000000..1730a49f743d9 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/530_ignore_above_stored_source.yml @@ -0,0 +1,214 @@ +--- +ignore_above mapping level setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + flattened: + type: flattened + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": "foo bar", "flattened": { "value": "the quick brown fox" } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: "foo bar" } + - match: { hits.hits.0._source.flattened.value: "the quick brown fox" } + - match: { hits.hits.0.fields.keyword.0: "foo bar" } + - match: { hits.hits.0.fields.flattened: null } + +--- +ignore_above mapping level setting on arrays: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + flattened: + type: flattened + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["foo bar", "the quick brown fox"], "flattened": { "value": ["the quick brown fox", "jumps over"] } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: ["foo bar", "the quick brown fox"] } + - match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over"] } + - match: { hits.hits.0.fields.keyword.0: "foo bar" } + - match: { hits.hits.0.fields.flattened.0.value: "jumps over" } + +--- +ignore_above mapping overrides setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + ignore_above: 100 + flattened: + type: flattened + ignore_above: 100 + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": "foo bar baz foo bar baz", "flattened": { "value": "the quick brown fox" } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: { } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: "foo bar baz foo bar baz" } + - match: { hits.hits.0._source.flattened.value: "the quick brown fox" } + - match: { hits.hits.0.fields.keyword.0: "foo bar baz foo bar baz" } + - match: { hits.hits.0.fields.flattened.0.value: "the quick brown fox" } + +--- +ignore_above mapping overrides setting on arrays: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + ignore_above: 100 + flattened: + type: flattened + ignore_above: 100 + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["foo bar baz foo bar baz", "the quick brown fox jumps over"], "flattened": { "value": ["the quick brown fox", "jumps over the lazy dog"] } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: { } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] } + - match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over the lazy dog"] } + - match: { hits.hits.0.fields.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] } + - match: { hits.hits.0.fields.flattened.0.value: ["the quick brown fox", "jumps over the lazy dog"] } + +--- +date ignore_above index level setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + date: + type: date + format: "yyyy-MM-dd'T'HH:mm:ss" + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["2023-09-17T15:30:00", "2023-09-17T15:31:00"], "date": ["2023-09-17T15:30:00", "2023-09-17T15:31:00"] } + + - do: + search: + body: + fields: + - keyword + - date + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: ["2023-09-17T15:30:00", "2023-09-17T15:31:00"] } + - match: { hits.hits.0._source.date: ["2023-09-17T15:30:00", "2023-09-17T15:31:00"] } + - match: { hits.hits.0.fields.keyword: null } + - match: { hits.hits.0.fields.date: ["2023-09-17T15:30:00","2023-09-17T15:31:00"] } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml new file mode 100644 index 0000000000000..defdc8467bf8d --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml @@ -0,0 +1,179 @@ +--- +ignore_above mapping level setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + flattened: + type: flattened + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": "foo bar", "flattened": { "value": "the quick brown fox" } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: "foo bar" } + - match: { hits.hits.0._source.flattened.value: "the quick brown fox" } + - match: { hits.hits.0.fields.keyword.0: "foo bar" } + +--- +ignore_above mapping level setting on arrays: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + flattened: + type: flattened + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["foo bar", "the quick brown fox"], "flattened": { "value": ["the quick brown fox", "jumps over"] } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: {} + + - length: { hits.hits: 1 } + #TODO: synthetic source field reconstruction bug (TBD: add link to the issue here) + #- match: { hits.hits.0._source.keyword: ["foo bar", "the quick brown fox"] } + - match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over"] } + - match: { hits.hits.0.fields.keyword.0: "foo bar" } + - match: { hits.hits.0.fields.flattened.0.value: "jumps over" } + +--- +ignore_above mapping overrides setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + ignore_above: 100 + flattened: + type: flattened + ignore_above: 100 + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": "foo bar baz foo bar baz", "flattened": { "value": "the quick brown fox" } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: { } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: "foo bar baz foo bar baz" } + - match: { hits.hits.0._source.flattened.value: "the quick brown fox" } + - match: { hits.hits.0.fields.keyword.0: "foo bar baz foo bar baz" } + - match: { hits.hits.0.fields.flattened.0.value: "the quick brown fox" } + +--- +ignore_above mapping overrides setting on arrays: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + ignore_above: 100 + flattened: + type: flattened + ignore_above: 100 + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["foo bar baz foo bar baz", "the quick brown fox jumps over"], "flattened": { "value": ["the quick brown fox", "jumps over the lazy dog"] } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: { } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] } + - match: { hits.hits.0._source.flattened.value: ["jumps over the lazy dog", "the quick brown fox"] } + - match: { hits.hits.0.fields.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] } + - match: { hits.hits.0.fields.flattened.0.value: ["jumps over the lazy dog", "the quick brown fox"] } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/550_ignore_above_invalid.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/550_ignore_above_invalid.yml new file mode 100644 index 0000000000000..3c29845871fe7 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/550_ignore_above_invalid.yml @@ -0,0 +1,63 @@ +--- +ignore_above index setting negative value: + - do: + catch: bad_request + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: -1 + mappings: + properties: + keyword: + type: keyword + +--- +keyword ignore_above mapping setting negative value: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + catch: bad_request + indices.create: + index: test + body: + mappings: + properties: + keyword: + ignore_above: -2 + type: keyword + +--- +flattened ignore_above mapping setting negative value: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + catch: bad_request + indices.create: + index: test + body: + mappings: + properties: + flattened: + ignore_above: -2 + type: flattened + +--- +wildcard ignore_above mapping setting negative value: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + catch: bad_request + indices.create: + index: test + body: + mappings: + properties: + wildcard: + ignore_above: -2 + type: wildcard diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 778136cbf5d31..0258fdc77eadf 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -151,6 +151,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.INDEX_SEARCH_IDLE_AFTER, IndexSettings.INDEX_SEARCH_THROTTLED, IndexFieldDataService.INDEX_FIELDDATA_CACHE_KEY, + IndexSettings.IGNORE_ABOVE_SETTING, FieldMapper.IGNORE_MALFORMED_SETTING, FieldMapper.COERCE_SETTING, Store.INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING, diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 41523c6dc2c7e..c97ba3953a58d 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.translog.Translog; @@ -700,6 +701,31 @@ public Iterator> settings() { Property.IndexSettingDeprecatedInV7AndRemovedInV8 ); + /** + * The `index.mapping.ignore_above` setting defines the maximum length for the content of a field that will be indexed + * or stored. If the length of the field’s content exceeds this limit, the field value will be ignored during indexing. + * This setting is useful for `keyword`, `flattened`, and `wildcard` fields where very large values are undesirable. + * It allows users to manage the size of indexed data by skipping fields with excessively long content. As an index-level + * setting, it applies to all `keyword` and `wildcard` fields, as well as to keyword values within `flattened` fields. + * When it comes to arrays, the `ignore_above` setting applies individually to each element of the array. If any element's + * length exceeds the specified limit, only that element will be ignored during indexing, while the rest of the array will + * still be processed. This behavior is consistent with the field-level `ignore_above` setting. + * This setting can be overridden at the field level by specifying a custom `ignore_above` value in the field mapping. + *

+ * Example usage: + *

+     * "index.mapping.ignore_above": 256
+     * 
+ */ + public static final Setting IGNORE_ABOVE_SETTING = Setting.intSetting( + "index.mapping.ignore_above", + Integer.MAX_VALUE, + 0, + Property.IndexScope, + Property.ServerlessPublic + ); + public static final NodeFeature IGNORE_ABOVE_INDEX_LEVEL_SETTING = new NodeFeature("mapper.ignore_above_index_level_setting"); + private final Index index; private final IndexVersion version; private final Logger logger; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 2da8d32773733..46b1dbdce4c4b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -79,6 +79,7 @@ import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH; import static org.elasticsearch.core.Strings.format; +import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING; /** * A field mapper for keywords. This mapper accepts strings and indexes them as-is. @@ -110,8 +111,6 @@ public static class Defaults { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER ); - - public static final int IGNORE_ABOVE = Integer.MAX_VALUE; } public static class KeywordField extends Field { @@ -158,12 +157,8 @@ public static final class Builder extends FieldMapper.DimensionBuilder { m -> toType(m).fieldType().eagerGlobalOrdinals(), false ); - private final Parameter ignoreAbove = Parameter.intParam( - "ignore_above", - true, - m -> toType(m).fieldType().ignoreAbove(), - Defaults.IGNORE_ABOVE - ); + private final Parameter ignoreAbove; + private final int ignoreAboveDefault; private final Parameter indexOptions = TextParams.keywordIndexOptions(m -> toType(m).indexOptions); private final Parameter hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false); @@ -193,7 +188,23 @@ public static final class Builder extends FieldMapper.DimensionBuilder { private final ScriptCompiler scriptCompiler; private final IndexVersion indexCreatedVersion; - public Builder(String name, IndexAnalyzers indexAnalyzers, ScriptCompiler scriptCompiler, IndexVersion indexCreatedVersion) { + public Builder(final String name, final MappingParserContext mappingParserContext) { + this( + name, + mappingParserContext.getIndexAnalyzers(), + mappingParserContext.scriptCompiler(), + IGNORE_ABOVE_SETTING.get(mappingParserContext.getSettings()), + mappingParserContext.getIndexSettings().getIndexVersionCreated() + ); + } + + Builder( + String name, + IndexAnalyzers indexAnalyzers, + ScriptCompiler scriptCompiler, + int ignoreAboveDefault, + IndexVersion indexCreatedVersion + ) { super(name); this.indexAnalyzers = indexAnalyzers; this.scriptCompiler = Objects.requireNonNull(scriptCompiler); @@ -220,10 +231,17 @@ public Builder(String name, IndexAnalyzers indexAnalyzers, ScriptCompiler script ); } }).precludesParameters(normalizer); + this.ignoreAboveDefault = ignoreAboveDefault; + this.ignoreAbove = Parameter.intParam("ignore_above", true, m -> toType(m).fieldType().ignoreAbove(), ignoreAboveDefault) + .addValidator(v -> { + if (v < 0) { + throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]"); + } + }); } public Builder(String name, IndexVersion indexCreatedVersion) { - this(name, null, ScriptCompiler.NONE, indexCreatedVersion); + this(name, null, ScriptCompiler.NONE, Integer.MAX_VALUE, indexCreatedVersion); } public Builder ignoreAbove(int ignoreAbove) { @@ -370,10 +388,7 @@ public KeywordFieldMapper build(MapperBuilderContext context) { private static final IndexVersion MINIMUM_COMPATIBILITY_VERSION = IndexVersion.fromId(5000099); - public static final TypeParser PARSER = new TypeParser( - (n, c) -> new Builder(n, c.getIndexAnalyzers(), c.scriptCompiler(), c.indexVersionCreated()), - MINIMUM_COMPATIBILITY_VERSION - ); + public static final TypeParser PARSER = new TypeParser(Builder::new, MINIMUM_COMPATIBILITY_VERSION); public static final class KeywordFieldType extends StringFieldType { @@ -865,6 +880,8 @@ public boolean hasNormalizer() { private final boolean isSyntheticSource; private final IndexAnalyzers indexAnalyzers; + private final int ignoreAboveDefault; + private final int ignoreAbove; private KeywordFieldMapper( String simpleName, @@ -887,6 +904,8 @@ private KeywordFieldMapper( this.scriptCompiler = builder.scriptCompiler; this.indexCreatedVersion = builder.indexCreatedVersion; this.isSyntheticSource = isSyntheticSource; + this.ignoreAboveDefault = builder.ignoreAboveDefault; + this.ignoreAbove = builder.ignoreAbove.getValue(); } @Override @@ -1004,7 +1023,9 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexAnalyzers, scriptCompiler, indexCreatedVersion).dimension(fieldType().isDimension()).init(this); + return new Builder(leafName(), indexAnalyzers, scriptCompiler, ignoreAboveDefault, indexCreatedVersion).dimension( + fieldType().isDimension() + ).init(this); } @Override @@ -1072,7 +1093,7 @@ protected BytesRef preserve(BytesRef value) { }); } - if (fieldType().ignoreAbove != Defaults.IGNORE_ABOVE) { + if (fieldType().ignoreAbove != ignoreAboveDefault) { layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(originalName()) { @Override protected void writeValue(Object value, XContentBuilder b) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index d18c3283ef909..d2ca7a24a78fd 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -11,6 +11,7 @@ import org.elasticsearch.features.FeatureSpecification; import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; @@ -41,6 +42,7 @@ public Set getFeatures() { SourceFieldMapper.SYNTHETIC_SOURCE_WITH_COPY_TO_AND_DOC_VALUES_FALSE_SUPPORT, SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_FIX, FlattenedFieldMapper.IGNORE_ABOVE_SUPPORT, + IndexSettings.IGNORE_ABOVE_INDEX_LEVEL_SETTING, SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX ); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java index 867a4a7ec39e0..9ea52752ec679 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java @@ -82,6 +82,8 @@ import java.util.Set; import java.util.function.Function; +import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING; + /** * A field mapper that accepts a JSON object and flattens it into a single field. This data type * can be a useful alternative to an 'object' mapping when the object has a large, unknown set @@ -123,6 +125,9 @@ private static Builder builder(Mapper in) { return ((FlattenedFieldMapper) in).builder; } + private final int ignoreAboveDefault; + private final int ignoreAbove; + public static class Builder extends FieldMapper.Builder { final Parameter depthLimit = Parameter.intParam( @@ -148,12 +153,8 @@ public static class Builder extends FieldMapper.Builder { m -> builder(m).eagerGlobalOrdinals.get(), false ); - private final Parameter ignoreAbove = Parameter.intParam( - "ignore_above", - true, - m -> builder(m).ignoreAbove.get(), - Integer.MAX_VALUE - ); + private final int ignoreAboveDefault; + private final Parameter ignoreAbove; private final Parameter indexOptions = TextParams.keywordIndexOptions(m -> builder(m).indexOptions.get()); private final Parameter similarity = TextParams.similarity(m -> builder(m).similarity.get()); @@ -176,7 +177,7 @@ public static class Builder extends FieldMapper.Builder { + "] are true" ); } - }).precludesParameters(ignoreAbove); + }); private final Parameter> meta = Parameter.metaParam(); @@ -184,8 +185,20 @@ public static FieldMapper.Parameter> dimensionsParam(Function builder(m).ignoreAbove.get(), ignoreAboveDefault) + .addValidator(v -> { + if (v < 0) { + throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]"); + } + }); + this.dimensions.precludesParameters(ignoreAbove); } @Override @@ -223,11 +236,11 @@ public FlattenedFieldMapper build(MapperBuilderContext context) { dimensions.get(), ignoreAbove.getValue() ); - return new FlattenedFieldMapper(leafName(), ft, builderParams(this, context), this); + return new FlattenedFieldMapper(leafName(), ft, builderParams(this, context), ignoreAboveDefault, this); } } - public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n)); + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, IGNORE_ABOVE_SETTING.get(c.getSettings()))); /** * A field type that represents the values under a particular JSON key, used @@ -808,9 +821,17 @@ public void validateMatchedRoutingPath(final String routingPath) { private final FlattenedFieldParser fieldParser; private final Builder builder; - private FlattenedFieldMapper(String leafName, MappedFieldType mappedFieldType, BuilderParams builderParams, Builder builder) { + private FlattenedFieldMapper( + String leafName, + MappedFieldType mappedFieldType, + BuilderParams builderParams, + int ignoreAboveDefault, + Builder builder + ) { super(leafName, mappedFieldType, builderParams); + this.ignoreAboveDefault = ignoreAboveDefault; this.builder = builder; + this.ignoreAbove = builder.ignoreAbove.get(); this.fieldParser = new FlattenedFieldParser( mappedFieldType.name(), mappedFieldType.name() + KEYED_FIELD_SUFFIX, @@ -835,8 +856,8 @@ int depthLimit() { return builder.depthLimit.get(); } - int ignoreAbove() { - return builder.ignoreAbove.get(); + public int ignoreAbove() { + return ignoreAbove; } @Override @@ -876,7 +897,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName()).init(this); + return new Builder(leafName(), ignoreAboveDefault).init(this); } @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java index 7e5cc5045c100..b4c7ea0ed9508 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java @@ -243,6 +243,7 @@ public void testFetchSourceValue() throws IOException { "field", createIndexAnalyzers(), ScriptCompiler.NONE, + Integer.MAX_VALUE, IndexVersion.current() ).normalizer("lowercase").build(MapperBuilderContext.root(false, false)).fieldType(); assertEquals(List.of("value"), fetchSourceValue(normalizerMapper, "VALUE")); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldsTests.java b/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldsTests.java index 06c3125648309..fd024c5d23e28 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldsTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldsTests.java @@ -63,6 +63,7 @@ private KeywordFieldMapper.Builder getKeywordFieldMapperBuilder(boolean isStored "field", IndexAnalyzers.of(Map.of(), Map.of("normalizer", Lucene.STANDARD_ANALYZER), Map.of()), ScriptCompiler.NONE, + Integer.MAX_VALUE, IndexVersion.current() ); if (isStored) { diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/20_ignore_above_stored_source.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/20_ignore_above_stored_source.yml new file mode 100644 index 0000000000000..252bafbdbe15a --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/20_ignore_above_stored_source.yml @@ -0,0 +1,56 @@ +--- +wildcard field type ignore_above: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + a_wildcard: + type: wildcard + b_wildcard: + type: wildcard + ignore_above: 20 + c_wildcard: + type: wildcard + d_wildcard: + type: wildcard + ignore_above: 5 + + + + - do: + index: + index: test + refresh: true + id: "1" + body: { "a_wildcard": "foo bar", "b_wildcard": "the quick brown", "c_wildcard": ["foo", "bar", "jumps over the lazy dog"], "d_wildcard": ["foo", "bar", "the quick"]} + + - do: + search: + body: + fields: + - a_wildcard + - b_wildcard + - c_wildcard + - d_wildcard + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.a_wildcard: "foo bar" } + - match: { hits.hits.0._source.b_wildcard: "the quick brown" } + - match: { hits.hits.0._source.c_wildcard: ["foo", "bar", "jumps over the lazy dog"] } + - match: { hits.hits.0._source.d_wildcard: ["foo", "bar", "the quick"] } + - match: { hits.hits.0.fields.a_wildcard.0: "foo bar" } + - match: { hits.hits.0.fields.b_wildcard.0: "the quick brown" } + - match: { hits.hits.0.fields.c_wildcard: ["foo", "bar"] } + - match: { hits.hits.0.fields.d_wildcard: ["foo", "bar"] } + diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml new file mode 100644 index 0000000000000..f5c9f3d92369a --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml @@ -0,0 +1,58 @@ +--- +wildcard field type ignore_above: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + a_wildcard: + type: wildcard + b_wildcard: + type: wildcard + ignore_above: 20 + c_wildcard: + type: wildcard + d_wildcard: + type: wildcard + ignore_above: 5 + + + + - do: + index: + index: test + refresh: true + id: "1" + body: { "a_wildcard": "foo bar", "b_wildcard": "the quick brown", "c_wildcard": ["foo", "bar", "jumps over the lazy dog"], "d_wildcard": ["foo", "bar", "the quick"]} + + - do: + search: + body: + fields: + - a_wildcard + - b_wildcard + - c_wildcard + - d_wildcard + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.a_wildcard: "foo bar" } + - match: { hits.hits.0._source.b_wildcard: "the quick brown" } + - match: { hits.hits.0._source.c_wildcard: ["bar", "foo"] } + - match: { hits.hits.0._source.d_wildcard: ["bar", "foo", "the quick"] } + - match: { hits.hits.0.fields.a_wildcard.0: "foo bar" } + - match: { hits.hits.0.fields.b_wildcard.0: "the quick brown" } + - match: { hits.hits.0.fields.c_wildcard: ["bar", "foo"] } + - match: { hits.hits.0.fields.d_wildcard: ["bar", "foo"] } + diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index 8e4f56e299587..1e97e64371586 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -87,6 +87,8 @@ import java.util.Map; import java.util.Set; +import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING; + /** * A {@link FieldMapper} for indexing fields with ngrams for efficient wildcard matching */ @@ -191,7 +193,6 @@ public static class Defaults { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER ); - public static final int IGNORE_ABOVE = Integer.MAX_VALUE; } private static WildcardFieldMapper toType(FieldMapper in) { @@ -200,21 +201,28 @@ private static WildcardFieldMapper toType(FieldMapper in) { public static class Builder extends FieldMapper.Builder { - final Parameter ignoreAbove = Parameter.intParam("ignore_above", true, m -> toType(m).ignoreAbove, Defaults.IGNORE_ABOVE) - .addValidator(v -> { - if (v < 0) { - throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]"); - } - }); + final Parameter ignoreAbove; final Parameter nullValue = Parameter.stringParam("null_value", false, m -> toType(m).nullValue, null).acceptsNull(); final Parameter> meta = Parameter.metaParam(); final IndexVersion indexVersionCreated; - public Builder(String name, IndexVersion indexVersionCreated) { + final int ignoreAboveDefault; + + public Builder(final String name, IndexVersion indexVersionCreated) { + this(name, Integer.MAX_VALUE, indexVersionCreated); + } + + private Builder(String name, int ignoreAboveDefault, IndexVersion indexVersionCreated) { super(name); this.indexVersionCreated = indexVersionCreated; + this.ignoreAboveDefault = ignoreAboveDefault; + this.ignoreAbove = Parameter.intParam("ignore_above", true, m -> toType(m).ignoreAbove, ignoreAboveDefault).addValidator(v -> { + if (v < 0) { + throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]"); + } + }); } @Override @@ -236,23 +244,18 @@ Builder nullValue(String nullValue) { public WildcardFieldMapper build(MapperBuilderContext context) { return new WildcardFieldMapper( leafName(), - new WildcardFieldType( - context.buildFullName(leafName()), - nullValue.get(), - ignoreAbove.get(), - indexVersionCreated, - meta.get() - ), - ignoreAbove.get(), + new WildcardFieldType(context.buildFullName(leafName()), indexVersionCreated, meta.get(), this), context.isSourceSynthetic(), builderParams(this, context), - nullValue.get(), - indexVersionCreated + indexVersionCreated, + this ); } } - public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated())); + public static TypeParser PARSER = new TypeParser( + (n, c) -> new Builder(n, IGNORE_ABOVE_SETTING.get(c.getSettings()), c.indexVersionCreated()) + ); public static final char TOKEN_START_OR_END_CHAR = 0; public static final String TOKEN_START_STRING = Character.toString(TOKEN_START_OR_END_CHAR); @@ -263,18 +266,18 @@ public static final class WildcardFieldType extends MappedFieldType { static Analyzer lowercaseNormalizer = new LowercaseNormalizer(); private final String nullValue; - private final int ignoreAbove; private final NamedAnalyzer analyzer; + private final int ignoreAbove; - private WildcardFieldType(String name, String nullValue, int ignoreAbove, IndexVersion version, Map meta) { + private WildcardFieldType(String name, IndexVersion version, Map meta, Builder builder) { super(name, true, false, true, Defaults.TEXT_SEARCH_INFO, meta); if (version.onOrAfter(IndexVersions.V_7_10_0)) { this.analyzer = WILDCARD_ANALYZER_7_10; } else { this.analyzer = WILDCARD_ANALYZER_7_9; } - this.nullValue = nullValue; - this.ignoreAbove = ignoreAbove; + this.nullValue = builder.nullValue.getValue(); + this.ignoreAbove = builder.ignoreAbove.getValue(); } @Override @@ -889,26 +892,27 @@ protected String parseSourceValue(Object value) { NGRAM_FIELD_TYPE = freezeAndDeduplicateFieldType(ft); assert NGRAM_FIELD_TYPE.indexOptions() == IndexOptions.DOCS; } - - private final int ignoreAbove; private final String nullValue; private final IndexVersion indexVersionCreated; + + private final int ignoreAbove; + private final int ignoreAboveDefault; private final boolean storeIgnored; private WildcardFieldMapper( String simpleName, WildcardFieldType mappedFieldType, - int ignoreAbove, boolean storeIgnored, BuilderParams builderParams, - String nullValue, - IndexVersion indexVersionCreated + IndexVersion indexVersionCreated, + Builder builder ) { super(simpleName, mappedFieldType, builderParams); - this.nullValue = nullValue; - this.ignoreAbove = ignoreAbove; + this.nullValue = builder.nullValue.getValue(); this.storeIgnored = storeIgnored; this.indexVersionCreated = indexVersionCreated; + this.ignoreAbove = builder.ignoreAbove.getValue(); + this.ignoreAboveDefault = builder.ignoreAboveDefault; } @Override @@ -983,14 +987,14 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexVersionCreated).init(this); + return new Builder(leafName(), ignoreAboveDefault, indexVersionCreated).init(this); } @Override protected SyntheticSourceSupport syntheticSourceSupport() { var layers = new ArrayList(); layers.add(new WildcardSyntheticFieldLoader()); - if (ignoreAbove != Defaults.IGNORE_ABOVE) { + if (ignoreAbove != ignoreAboveDefault) { layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(originalName()) { @Override protected void writeValue(Object value, XContentBuilder b) throws IOException { diff --git a/x-pack/qa/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/test/CoreTestTranslater.java b/x-pack/qa/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/test/CoreTestTranslater.java index 2bea4bb247d8f..d34303ea803d6 100644 --- a/x-pack/qa/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/test/CoreTestTranslater.java +++ b/x-pack/qa/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/test/CoreTestTranslater.java @@ -222,10 +222,32 @@ public boolean modifySections(List executables) { */ protected abstract boolean modifySearch(ApiCallSection search); + private static Object getSetting(final Object map, final String... keys) { + Map current = (Map) map; + for (final String key : keys) { + if (current != null) { + current = (Map) current.get(key); + } else { + return null; + } + } + return current; + } + private boolean modifyCreateIndex(ApiCallSection createIndex) { String index = createIndex.getParams().get("index"); for (Map body : createIndex.getBodies()) { - Object settings = body.get("settings"); + final Object settings = body.get("settings"); + final Object indexMapping = getSetting(settings, "index", "mapping"); + if (indexMapping instanceof Map m) { + final Object ignoreAbove = m.get("ignore_above"); + if (ignoreAbove instanceof Integer ignoreAboveValue) { + if (ignoreAboveValue >= 0) { + // Scripts don't support ignore_above so we skip those fields + continue; + } + } + } if (settings instanceof Map && ((Map) settings).containsKey("sort.field")) { /* * You can't sort the index on a runtime field From 62d3d538a4d82a5118d8889114077ffbf8a8672e Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Mon, 23 Sep 2024 14:52:04 -0600 Subject: [PATCH 13/58] Test fix: ensure we don't accidentally generate two identical histograms (#113322) (#113415) * Test fix: looks like using one value is not random enough --- .../admin/cluster/stats/CCSTelemetrySnapshotTests.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshotTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshotTests.java index 0bca6e57dc47b..e9188d9cb8f0d 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshotTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/CCSTelemetrySnapshotTests.java @@ -33,7 +33,7 @@ public class CCSTelemetrySnapshotTests extends AbstractWireSerializingTestCase Date: Mon, 23 Sep 2024 13:53:37 -0700 Subject: [PATCH 14/58] Unmute logsdb data generation tests (#113306) (#113321) (cherry picked from commit 413b23a9ea16206e8cb97bc99f5ab6ac578229c7) # Conflicts: # muted-tests.yml Co-authored-by: Elastic Machine --- muted-tests.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 44cb6f631ddff..20863a6f6349d 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -208,9 +208,6 @@ tests: - class: org.elasticsearch.packaging.test.WindowsServiceTests method: test33JavaChanged issue: https://github.com/elastic/elasticsearch/issues/113177 -- class: org.elasticsearch.datastreams.logsdb.qa.StandardVersusLogsIndexModeRandomDataChallengeRestIT - method: testMatchAllQuery - issue: https://github.com/elastic/elasticsearch/issues/113265 # Examples: # From 54ddc29fc7d086574b17901032b63516711b499e Mon Sep 17 00:00:00 2001 From: Tim Brooks Date: Mon, 23 Sep 2024 16:18:04 -0600 Subject: [PATCH 15/58] Default incremental bulk functionality to false (#113416) (#113417) This commit flips the incremental bulk setting to false. Additionally, it removes some test code which intermittently causes issues with security test cases. --- .../http/IncrementalBulkRestIT.java | 8 +++ .../action/bulk/IncrementalBulkService.java | 2 +- .../elasticsearch/test/ESIntegTestCase.java | 49 ++----------------- 3 files changed, 13 insertions(+), 46 deletions(-) diff --git a/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/IncrementalBulkRestIT.java b/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/IncrementalBulkRestIT.java index 2b24e53874e51..da05011696274 100644 --- a/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/IncrementalBulkRestIT.java +++ b/qa/smoke-test-http/src/javaRestTest/java/org/elasticsearch/http/IncrementalBulkRestIT.java @@ -29,6 +29,14 @@ @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, supportsDedicatedMasters = false, numDataNodes = 2, numClientNodes = 0) public class IncrementalBulkRestIT extends HttpSmokeTestCase { + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + .put(IncrementalBulkService.INCREMENTAL_BULK.getKey(), true) + .build(); + } + public void testBulkUriMatchingDoesNotMatchBulkCapabilitiesApi() throws IOException { Request request = new Request("GET", "/_capabilities?method=GET&path=%2F_bulk&capabilities=failure_store_status&pretty"); Response response = getRestClient().performRequest(request); diff --git a/server/src/main/java/org/elasticsearch/action/bulk/IncrementalBulkService.java b/server/src/main/java/org/elasticsearch/action/bulk/IncrementalBulkService.java index 7185c4d76265e..fc264de35f510 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/IncrementalBulkService.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/IncrementalBulkService.java @@ -36,7 +36,7 @@ public class IncrementalBulkService { public static final Setting INCREMENTAL_BULK = boolSetting( "rest.incremental_bulk", - true, + false, Setting.Property.NodeScope, Setting.Property.Dynamic ); diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index 684236b9af661..ab0a0bf626d5d 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -26,7 +26,6 @@ import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionResponse; import org.elasticsearch.action.ActionType; -import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainRequest; import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse; @@ -49,8 +48,6 @@ import org.elasticsearch.action.admin.indices.template.put.PutIndexTemplateRequestBuilder; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.bulk.BulkResponse; -import org.elasticsearch.action.bulk.IncrementalBulkService; -import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.ClearScrollResponse; import org.elasticsearch.action.search.SearchRequest; @@ -188,7 +185,6 @@ import java.util.Random; import java.util.Set; import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; @@ -1774,48 +1770,11 @@ public void indexRandom(boolean forceRefresh, boolean dummyDocuments, boolean ma logger.info("Index [{}] docs async: [{}] bulk: [{}] partitions [{}]", builders.size(), false, true, partition.size()); for (List segmented : partition) { BulkResponse actionGet; - if (randomBoolean()) { - BulkRequestBuilder bulkBuilder = client().prepareBulk(); - for (IndexRequestBuilder indexRequestBuilder : segmented) { - bulkBuilder.add(indexRequestBuilder); - } - actionGet = bulkBuilder.get(); - } else { - IncrementalBulkService bulkService = internalCluster().getInstance(IncrementalBulkService.class); - IncrementalBulkService.Handler handler = bulkService.newBulkRequest(); - - ConcurrentLinkedQueue queue = new ConcurrentLinkedQueue<>(); - segmented.forEach(b -> queue.add(b.request())); - - PlainActionFuture future = new PlainActionFuture<>(); - AtomicInteger runs = new AtomicInteger(0); - Runnable r = new Runnable() { - - @Override - public void run() { - int toRemove = Math.min(randomIntBetween(5, 10), queue.size()); - ArrayList> docs = new ArrayList<>(); - for (int i = 0; i < toRemove; i++) { - docs.add(queue.poll()); - } - - if (queue.isEmpty()) { - handler.lastItems(docs, () -> {}, future); - } else { - handler.addItems(docs, () -> {}, () -> { - // Every 10 runs dispatch to new thread to prevent stackoverflow - if (runs.incrementAndGet() % 10 == 0) { - new Thread(this).start(); - } else { - this.run(); - } - }); - } - } - }; - r.run(); - actionGet = future.actionGet(); + BulkRequestBuilder bulkBuilder = client().prepareBulk(); + for (IndexRequestBuilder indexRequestBuilder : segmented) { + bulkBuilder.add(indexRequestBuilder); } + actionGet = bulkBuilder.get(); assertThat(actionGet.hasFailures() ? actionGet.buildFailureMessage() : "", actionGet.hasFailures(), equalTo(false)); } } From d9188591a5c3fc4e3c408c567f6eb66c6823f970 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Tue, 24 Sep 2024 00:53:49 +0200 Subject: [PATCH 16/58] ESQL: add tests checking on data availabiltiy (#113292) (#113422) This adds simple tests that check the shape of the available data to query as a first step in troubleshooting some non-reproducible failures. --- .../test/esql/26_aggs_bucket.yml | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/26_aggs_bucket.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/26_aggs_bucket.yml index 7d0989a6e1886..ea7684fb69a09 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/26_aggs_bucket.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/26_aggs_bucket.yml @@ -30,6 +30,20 @@ - { "index": { "_index": "test_bucket" } } - { "ts": "2024-07-16T11:40:00Z" } + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM test_bucket | SORT ts' + - match: { columns.0.name: ts } + - match: { columns.0.type: date } + - length: { values: 4 } + - match: { values.0.0: "2024-07-16T08:10:00.000Z" } + - match: { values.1.0: "2024-07-16T09:20:00.000Z" } + - match: { values.2.0: "2024-07-16T10:30:00.000Z" } + - match: { values.3.0: "2024-07-16T11:40:00.000Z" } + - do: allowed_warnings_regex: - "No limit defined, adding default limit of \\[.*\\]" @@ -119,6 +133,40 @@ - { "index": { "_index": "test_bucket" } } - { "ts": "2024-09-16" } + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM test_bucket | STATS c = COUNT(*)' + - match: { columns.0.name: c } + - match: { columns.0.type: long } + - match: { values.0.0: 4 } + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM test_bucket | SORT ts' + - match: { columns.0.name: ts } + - match: { columns.0.type: date } + - length: { values: 4 } + - match: { values.0.0: "2024-06-16T00:00:00.000Z" } + - match: { values.1.0: "2024-07-16T00:00:00.000Z" } + - match: { values.2.0: "2024-08-16T00:00:00.000Z" } + - match: { values.3.0: "2024-09-16T00:00:00.000Z" } + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM test_bucket | STATS c = COUNT(*)' + - match: { columns.0.name: c } + - match: { columns.0.type: long } + - match: { values.0.0: 4 } + - do: allowed_warnings_regex: - "No limit defined, adding default limit of \\[.*\\]" From f7190599c239e885387de07b1b78aebda7ad938f Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Tue, 24 Sep 2024 08:08:52 +0200 Subject: [PATCH 17/58] Small performance improvement in h3 library (#113385) (#113429) Changing some FDIV's into FMUL's leads to performance improvements --- docs/changelog/113385.yaml | 5 +++ .../java/org/elasticsearch/h3/Constants.java | 7 +++- .../java/org/elasticsearch/h3/CoordIJK.java | 11 +++-- .../java/org/elasticsearch/h3/FastMath.java | 41 +++++++++++-------- .../main/java/org/elasticsearch/h3/Vec2d.java | 23 ++++++----- .../main/java/org/elasticsearch/h3/Vec3d.java | 2 +- 6 files changed, 57 insertions(+), 32 deletions(-) create mode 100644 docs/changelog/113385.yaml diff --git a/docs/changelog/113385.yaml b/docs/changelog/113385.yaml new file mode 100644 index 0000000000000..9cee1ebcd4f64 --- /dev/null +++ b/docs/changelog/113385.yaml @@ -0,0 +1,5 @@ +pr: 113385 +summary: Small performance improvement in h3 library +area: Geo +type: enhancement +issues: [] diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/Constants.java b/libs/h3/src/main/java/org/elasticsearch/h3/Constants.java index 5192fe836e73d..570052700615f 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/Constants.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/Constants.java @@ -33,7 +33,7 @@ final class Constants { /** * 2.0 * PI */ - public static final double M_2PI = 6.28318530717958647692528676655900576839433; + public static final double M_2PI = 2.0 * Math.PI; /** * max H3 resolution; H3 version 1 has 16 resolutions, numbered 0 through 15 */ @@ -58,6 +58,11 @@ final class Constants { * square root of 7 */ public static final double M_SQRT7 = 2.6457513110645905905016157536392604257102; + + /** + * 1 / square root of 7 + */ + public static final double M_RSQRT7 = 1.0 / M_SQRT7; /** * scaling factor from hex2d resolution 0 unit length * (or distance between adjacent cell center points diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/CoordIJK.java b/libs/h3/src/main/java/org/elasticsearch/h3/CoordIJK.java index e57f681fc2eae..8aae7583ef04e 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/CoordIJK.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/CoordIJK.java @@ -39,6 +39,9 @@ */ final class CoordIJK { + /** one seventh (1/7) **/ + private static final double M_ONESEVENTH = 1.0 / 7.0; + /** CoordIJK unit vectors corresponding to the 7 H3 digits. */ private static final int[][] UNIT_VECS = { @@ -281,8 +284,8 @@ public void neighbor(int digit) { public void upAp7r() { final int i = Math.subtractExact(this.i, this.k); final int j = Math.subtractExact(this.j, this.k); - this.i = (int) Math.round((Math.addExact(Math.multiplyExact(2, i), j)) / 7.0); - this.j = (int) Math.round((Math.subtractExact(Math.multiplyExact(3, j), i)) / 7.0); + this.i = (int) Math.round((Math.addExact(Math.multiplyExact(2, i), j)) * M_ONESEVENTH); + this.j = (int) Math.round((Math.subtractExact(Math.multiplyExact(3, j), i)) * M_ONESEVENTH); this.k = 0; ijkNormalize(); } @@ -295,8 +298,8 @@ public void upAp7r() { public void upAp7() { final int i = Math.subtractExact(this.i, this.k); final int j = Math.subtractExact(this.j, this.k); - this.i = (int) Math.round((Math.subtractExact(Math.multiplyExact(3, i), j)) / 7.0); - this.j = (int) Math.round((Math.addExact(Math.multiplyExact(2, j), i)) / 7.0); + this.i = (int) Math.round((Math.subtractExact(Math.multiplyExact(3, i), j)) * M_ONESEVENTH); + this.j = (int) Math.round((Math.addExact(Math.multiplyExact(2, j), i)) * M_ONESEVENTH); this.k = 0; ijkNormalize(); } diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/FastMath.java b/libs/h3/src/main/java/org/elasticsearch/h3/FastMath.java index 61d767901ae0c..760fa75535487 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/FastMath.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/FastMath.java @@ -102,6 +102,15 @@ final class FastMath { private static final int MIN_DOUBLE_EXPONENT = -1074; private static final int MAX_DOUBLE_EXPONENT = 1023; + /** + * PI / 2.0 + */ + private static final double M_HALF_PI = Math.PI * 0.5; + /** + * PI / 4.0 + */ + private static final double M_QUARTER_PI = Math.PI * 0.25; + // -------------------------------------------------------------------------- // CONSTANTS FOR NORMALIZATIONS // -------------------------------------------------------------------------- @@ -335,7 +344,7 @@ public static double cos(double angle) { // Faster than using normalizeZeroTwoPi. angle = remainderTwoPi(angle); if (angle < 0.0) { - angle += 2 * Math.PI; + angle += Constants.M_2PI; } } // index: possibly outside tables range. @@ -366,7 +375,7 @@ public static double sin(double angle) { // Faster than using normalizeZeroTwoPi. angle = remainderTwoPi(angle); if (angle < 0.0) { - angle += 2 * Math.PI; + angle += Constants.M_2PI; } } int index = (int) (angle * SIN_COS_INDEXER + 0.5); @@ -387,9 +396,9 @@ public static double tan(double angle) { if (Math.abs(angle) > TAN_MAX_VALUE_FOR_INT_MODULO) { // Faster than using normalizeMinusHalfPiHalfPi. angle = remainderTwoPi(angle); - if (angle < -Math.PI / 2) { + if (angle < -M_HALF_PI) { angle += Math.PI; - } else if (angle > Math.PI / 2) { + } else if (angle > M_HALF_PI) { angle -= Math.PI; } } @@ -428,7 +437,7 @@ public static double tan(double angle) { * @return Value arccosine, in radians, in [0,PI]. */ public static double acos(double value) { - return Math.PI / 2 - FastMath.asin(value); + return M_HALF_PI - FastMath.asin(value); } /** @@ -468,7 +477,7 @@ public static double asin(double value) { return negateResult ? -result : result; } else { // value >= 1.0, or value is NaN if (value == 1.0) { - return negateResult ? -Math.PI / 2 : Math.PI / 2; + return negateResult ? -M_HALF_PI : M_HALF_PI; } else { return Double.NaN; } @@ -490,7 +499,7 @@ public static double atan(double value) { } if (value == 1.0) { // We want "exact" result for 1.0. - return negateResult ? -Math.PI / 4 : Math.PI / 4; + return negateResult ? -M_QUARTER_PI : M_QUARTER_PI; } else if (value <= ATAN_MAX_VALUE_FOR_TABS) { int index = (int) (value * ATAN_INDEXER + 0.5); double delta = value - index * ATAN_DELTA; @@ -511,7 +520,7 @@ public static double atan(double value) { if (Double.isNaN(value)) { return Double.NaN; } else { - return negateResult ? -Math.PI / 2 : Math.PI / 2; + return negateResult ? -M_HALF_PI : M_HALF_PI; } } } @@ -532,9 +541,9 @@ public static double atan2(double y, double x) { } if (x == Double.POSITIVE_INFINITY) { if (y == Double.POSITIVE_INFINITY) { - return Math.PI / 4; + return M_QUARTER_PI; } else if (y == Double.NEGATIVE_INFINITY) { - return -Math.PI / 4; + return -M_QUARTER_PI; } else if (y > 0.0) { return 0.0; } else if (y < 0.0) { @@ -551,9 +560,9 @@ public static double atan2(double y, double x) { } if (x == Double.NEGATIVE_INFINITY) { if (y == Double.POSITIVE_INFINITY) { - return 3 * Math.PI / 4; + return 3 * M_QUARTER_PI; } else if (y == Double.NEGATIVE_INFINITY) { - return -3 * Math.PI / 4; + return -3 * M_QUARTER_PI; } else if (y > 0.0) { return Math.PI; } else if (y < 0.0) { @@ -562,9 +571,9 @@ public static double atan2(double y, double x) { return Double.NaN; } } else if (y > 0.0) { - return Math.PI / 2 + FastMath.atan(-x / y); + return M_HALF_PI + FastMath.atan(-x / y); } else if (y < 0.0) { - return -Math.PI / 2 - FastMath.atan(x / y); + return -M_HALF_PI - FastMath.atan(x / y); } else { return Double.NaN; } @@ -577,9 +586,9 @@ public static double atan2(double y, double x) { } } if (y > 0.0) { - return Math.PI / 2; + return M_HALF_PI; } else if (y < 0.0) { - return -Math.PI / 2; + return -M_HALF_PI; } else { return Double.NaN; } diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/Vec2d.java b/libs/h3/src/main/java/org/elasticsearch/h3/Vec2d.java index 12ce728a99967..b0c2627a5f398 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/Vec2d.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/Vec2d.java @@ -29,8 +29,11 @@ */ final class Vec2d { - /** sin(60') */ - private static final double M_SIN60 = Constants.M_SQRT3_2; + /** 1/sin(60') **/ + private static final double M_RSIN60 = 1.0 / Constants.M_SQRT3_2; + + /** one third **/ + private static final double M_ONETHIRD = 1.0 / 3.0; private static final double VEC2D_RESOLUTION = 1e-7; @@ -133,14 +136,14 @@ static LatLng hex2dToGeo(double x, double y, int face, int res, boolean substrat // scale for current resolution length u for (int i = 0; i < res; i++) { - r /= Constants.M_SQRT7; + r *= Constants.M_RSQRT7; } // scale accordingly if this is a substrate grid if (substrate) { r /= 3.0; if (H3Index.isResolutionClassIII(res)) { - r /= Constants.M_SQRT7; + r *= Constants.M_RSQRT7; } } @@ -181,8 +184,8 @@ static CoordIJK hex2dToCoordIJK(double x, double y) { a2 = Math.abs(y); // first do a reverse conversion - x2 = a2 / M_SIN60; - x1 = a1 + x2 / 2.0; + x2 = a2 * M_RSIN60; + x1 = a1 + x2 * 0.5; // check if we have the center of a hex m1 = (int) x1; @@ -193,8 +196,8 @@ static CoordIJK hex2dToCoordIJK(double x, double y) { r2 = x2 - m2; if (r1 < 0.5) { - if (r1 < 1.0 / 3.0) { - if (r2 < (1.0 + r1) / 2.0) { + if (r1 < M_ONETHIRD) { + if (r2 < (1.0 + r1) * 0.5) { i = m1; j = m2; } else { @@ -215,7 +218,7 @@ static CoordIJK hex2dToCoordIJK(double x, double y) { } } } else { - if (r1 < 2.0 / 3.0) { + if (r1 < 2.0 * M_ONETHIRD) { if (r2 < (1.0 - r1)) { j = m2; } else { @@ -228,7 +231,7 @@ static CoordIJK hex2dToCoordIJK(double x, double y) { i = Math.incrementExact(m1); } } else { - if (r2 < (r1 / 2.0)) { + if (r2 < (r1 * 0.5)) { i = Math.incrementExact(m1); j = m2; } else { diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/Vec3d.java b/libs/h3/src/main/java/org/elasticsearch/h3/Vec3d.java index c5c4f8975597c..5973af4b51f6f 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/Vec3d.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/Vec3d.java @@ -96,7 +96,7 @@ static long geoToH3(int res, double lat, double lon) { } } // cos(r) = 1 - 2 * sin^2(r/2) = 1 - 2 * (sqd / 4) = 1 - sqd/2 - double r = FastMath.acos(1 - sqd / 2); + double r = FastMath.acos(1 - sqd * 0.5); if (r < Constants.EPSILON) { return FaceIJK.faceIjkToH3(res, face, new CoordIJK(0, 0, 0)); From d086e149fdffd0b9a154498a6530225bba58908d Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 24 Sep 2024 09:07:52 +0100 Subject: [PATCH 18/58] Add extra context to `TransportNodesAction` invocations (#113140) (#113201) Several `TransportNodesAction` implementations do some kind of top-level computation in addition to fanning out requests to individual nodes. Today they all have to do this once the node-level fanout is complete, but in most cases the top-level computation can happen in parallel with the fanout. This commit adds support for an additional `ActionContext` object, created when starting to process the request and exposed to `newResponseAsync()` at the end, to allow this parallelization. All implementations use `(Void) null` for this param, except for `TransportClusterStatsAction` which now parallelizes the computation of the cluster-state-based stats with the node-level fanout. --- .../stats/GeoIpStatsTransportAction.java | 2 +- .../TransportNodesCapabilitiesAction.java | 3 +- .../TransportNodesFeaturesAction.java | 3 +- .../TransportNodesHotThreadsAction.java | 3 +- .../node/info/TransportNodesInfoAction.java | 3 +- ...nsportNodesReloadSecureSettingsAction.java | 3 +- .../TransportPrevalidateShardPathAction.java | 3 +- .../node/stats/TransportNodesStatsAction.java | 4 +- .../node/usage/TransportNodesUsageAction.java | 3 +- .../status/TransportNodesSnapshotsStatus.java | 3 +- .../stats/TransportClusterStatsAction.java | 136 +++++++++++++----- .../TransportFindDanglingIndexAction.java | 3 +- .../TransportListDanglingIndicesAction.java | 3 +- .../support/nodes/TransportNodesAction.java | 18 ++- ...ransportNodesListGatewayStartedShards.java | 3 +- .../stats/HealthApiStatsTransportAction.java | 3 +- .../TransportNodesListShardStoreMetadata.java | 3 +- .../node/tasks/TaskManagerTestCase.java | 2 +- .../cluster/node/tasks/TestTaskPlugin.java | 2 +- .../nodes/TransportNodesActionTests.java | 68 ++++++++- .../action/TransportAnalyticsStatsAction.java | 3 +- .../NodesDataTiersUsageTransportAction.java | 3 +- .../TransportNodeDeprecationCheckAction.java | 3 +- .../TransportDeprecationCacheResetAction.java | 3 +- .../action/EnrichCoordinatorStatsAction.java | 2 +- .../eql/plugin/TransportEqlStatsAction.java | 3 +- .../esql/plugin/TransportEsqlStatsAction.java | 3 +- ...ransportGetInferenceDiagnosticsAction.java | 3 +- .../TransportTrainedModelCacheInfoAction.java | 3 +- ...rtClearRepositoriesStatsArchiveAction.java | 3 +- .../TransportRepositoriesStatsAction.java | 3 +- ...rtSearchableSnapshotCacheStoresAction.java | 3 +- ...rchableSnapshotsNodeCachesStatsAction.java | 3 +- .../TransportClearSecurityCacheAction.java | 3 +- .../TransportClearPrivilegesCacheAction.java | 3 +- .../realm/TransportClearRealmCacheAction.java | 3 +- .../role/TransportClearRolesCacheAction.java | 3 +- ...tServiceAccountNodesCredentialsAction.java | 3 +- .../action/SpatialStatsTransportAction.java | 3 +- .../sql/plugin/TransportSqlStatsAction.java | 3 +- .../TransportGetTransformNodeStatsAction.java | 3 +- .../actions/TransportWatcherStatsAction.java | 3 +- 42 files changed, 255 insertions(+), 81 deletions(-) diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/stats/GeoIpStatsTransportAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/stats/GeoIpStatsTransportAction.java index c1e9b04dda907..9ebf97ca4e9ee 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/stats/GeoIpStatsTransportAction.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/stats/GeoIpStatsTransportAction.java @@ -30,7 +30,7 @@ import java.io.IOException; import java.util.List; -public class GeoIpStatsTransportAction extends TransportNodesAction { +public class GeoIpStatsTransportAction extends TransportNodesAction { private final DatabaseNodeService registry; private final GeoIpDownloaderTaskExecutor geoIpDownloaderTaskExecutor; diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/capabilities/TransportNodesCapabilitiesAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/capabilities/TransportNodesCapabilitiesAction.java index 1f772be2ed1e3..8df34d882941a 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/capabilities/TransportNodesCapabilitiesAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/capabilities/TransportNodesCapabilitiesAction.java @@ -38,7 +38,8 @@ public class TransportNodesCapabilitiesAction extends TransportNodesAction< NodesCapabilitiesRequest, NodesCapabilitiesResponse, TransportNodesCapabilitiesAction.NodeCapabilitiesRequest, - NodeCapability> { + NodeCapability, + Void> { public static final ActionType TYPE = new ActionType<>("cluster:monitor/nodes/capabilities"); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/TransportNodesFeaturesAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/TransportNodesFeaturesAction.java index c0cf86288fd3f..e5e04c8490c8e 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/TransportNodesFeaturesAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/TransportNodesFeaturesAction.java @@ -33,7 +33,8 @@ public class TransportNodesFeaturesAction extends TransportNodesAction< NodesFeaturesRequest, NodesFeaturesResponse, TransportNodesFeaturesAction.NodeFeaturesRequest, - NodeFeatures> { + NodeFeatures, + Void> { public static final ActionType TYPE = new ActionType<>("cluster:monitor/nodes/features"); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/hotthreads/TransportNodesHotThreadsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/hotthreads/TransportNodesHotThreadsAction.java index cf3b34877afa0..f1e24258eb579 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/hotthreads/TransportNodesHotThreadsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/hotthreads/TransportNodesHotThreadsAction.java @@ -39,7 +39,8 @@ public class TransportNodesHotThreadsAction extends TransportNodesAction< NodesHotThreadsRequest, NodesHotThreadsResponse, TransportNodesHotThreadsAction.NodeRequest, - NodeHotThreads> { + NodeHotThreads, + Void> { public static final ActionType TYPE = new ActionType<>("cluster:monitor/nodes/hot_threads"); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/info/TransportNodesInfoAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/info/TransportNodesInfoAction.java index 9fc657feeb463..65bf763197598 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/info/TransportNodesInfoAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/info/TransportNodesInfoAction.java @@ -34,7 +34,8 @@ public class TransportNodesInfoAction extends TransportNodesAction< NodesInfoRequest, NodesInfoResponse, TransportNodesInfoAction.NodeInfoRequest, - NodeInfo> { + NodeInfo, + Void> { public static final ActionType TYPE = new ActionType<>("cluster:monitor/nodes/info"); private final NodeService nodeService; diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/TransportNodesReloadSecureSettingsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/TransportNodesReloadSecureSettingsAction.java index 8f13e69a35a5c..c84df0ddfe644 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/TransportNodesReloadSecureSettingsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/TransportNodesReloadSecureSettingsAction.java @@ -39,7 +39,8 @@ public class TransportNodesReloadSecureSettingsAction extends TransportNodesActi NodesReloadSecureSettingsRequest, NodesReloadSecureSettingsResponse, NodesReloadSecureSettingsRequest.NodeRequest, - NodesReloadSecureSettingsResponse.NodeResponse> { + NodesReloadSecureSettingsResponse.NodeResponse, + Void> { public static final ActionType TYPE = new ActionType<>("cluster:admin/nodes/reload_secure_settings"); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/shutdown/TransportPrevalidateShardPathAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/shutdown/TransportPrevalidateShardPathAction.java index d3f59292009fa..8c49175c320fd 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/shutdown/TransportPrevalidateShardPathAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/shutdown/TransportPrevalidateShardPathAction.java @@ -44,7 +44,8 @@ public class TransportPrevalidateShardPathAction extends TransportNodesAction< PrevalidateShardPathRequest, PrevalidateShardPathResponse, NodePrevalidateShardPathRequest, - NodePrevalidateShardPathResponse> { + NodePrevalidateShardPathResponse, + Void> { public static final String ACTION_NAME = "internal:admin/indices/prevalidate_shard_path"; public static final ActionType TYPE = new ActionType<>(ACTION_NAME); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java index 0ac55291a7975..379ebe80539b4 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java @@ -46,7 +46,8 @@ public class TransportNodesStatsAction extends TransportNodesAction< NodesStatsRequest, NodesStatsResponse, TransportNodesStatsAction.NodeStatsRequest, - NodeStats> { + NodeStats, + Void> { public static final ActionType TYPE = new ActionType<>("cluster:monitor/nodes/stats"); @@ -83,6 +84,7 @@ protected NodesStatsResponse newResponse(NodesStatsRequest request, List responses, List failures, ActionListener listener diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/usage/TransportNodesUsageAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/usage/TransportNodesUsageAction.java index 967f619d31f4f..a55c58568647c 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/usage/TransportNodesUsageAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/usage/TransportNodesUsageAction.java @@ -34,7 +34,8 @@ public class TransportNodesUsageAction extends TransportNodesAction< NodesUsageRequest, NodesUsageResponse, TransportNodesUsageAction.NodeUsageRequest, - NodeUsage> { + NodeUsage, + Void> { public static final ActionType TYPE = new ActionType<>("cluster:monitor/nodes/usage"); private final UsageService restUsageService; diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportNodesSnapshotsStatus.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportNodesSnapshotsStatus.java index 19b5894e21398..42b71e275bb1b 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportNodesSnapshotsStatus.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportNodesSnapshotsStatus.java @@ -47,7 +47,8 @@ public class TransportNodesSnapshotsStatus extends TransportNodesAction< TransportNodesSnapshotsStatus.Request, TransportNodesSnapshotsStatus.NodesSnapshotStatus, TransportNodesSnapshotsStatus.NodeRequest, - TransportNodesSnapshotsStatus.NodeSnapshotStatus> { + TransportNodesSnapshotsStatus.NodeSnapshotStatus, + Void> { public static final String ACTION_NAME = TransportSnapshotsStatusAction.TYPE.name() + "[nodes]"; public static final ActionType TYPE = new ActionType<>(ACTION_NAME); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 80f3e8c439d26..7e25fe45f6332 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -12,6 +12,7 @@ import org.apache.lucene.store.AlreadyClosedException; import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRunnable; import org.elasticsearch.action.ActionType; import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; @@ -20,6 +21,8 @@ import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.RefCountingListener; +import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.action.support.nodes.TransportNodesAction; import org.elasticsearch.cluster.ClusterSnapshotStats; import org.elasticsearch.cluster.ClusterState; @@ -31,7 +34,6 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.util.CancellableSingleObjectCache; -import org.elasticsearch.common.util.concurrent.ListenableFuture; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.index.IndexService; @@ -57,6 +59,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.concurrent.Executor; import java.util.function.BiFunction; import java.util.function.BooleanSupplier; @@ -64,7 +67,8 @@ public class TransportClusterStatsAction extends TransportNodesAction< ClusterStatsRequest, ClusterStatsResponse, TransportClusterStatsAction.ClusterStatsNodeRequest, - ClusterStatsNodeResponse> { + ClusterStatsNodeResponse, + SubscribableListener> { public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats"); private static final CommonStatsFlags SHARD_STATS_FLAGS = new CommonStatsFlags( @@ -84,6 +88,7 @@ public class TransportClusterStatsAction extends TransportNodesAction< private final SearchUsageHolder searchUsageHolder; private final CCSUsageTelemetry ccsUsageHolder; + private final Executor clusterStateStatsExecutor; private final MetadataStatsCache mappingStatsCache; private final MetadataStatsCache analysisStatsCache; @@ -111,14 +116,32 @@ public TransportClusterStatsAction( this.repositoriesService = repositoriesService; this.searchUsageHolder = usageService.getSearchUsageHolder(); this.ccsUsageHolder = usageService.getCcsUsageHolder(); + this.clusterStateStatsExecutor = threadPool.executor(ThreadPool.Names.MANAGEMENT); this.mappingStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), MappingStats::of); this.analysisStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), AnalysisStats::of); } + @Override + protected SubscribableListener createActionContext(Task task, ClusterStatsRequest request) { + assert task instanceof CancellableTask; + final var cancellableTask = (CancellableTask) task; + final var additionalStatsListener = new SubscribableListener(); + AdditionalStats.compute( + cancellableTask, + clusterStateStatsExecutor, + clusterService, + mappingStatsCache, + analysisStatsCache, + additionalStatsListener + ); + return additionalStatsListener; + } + @Override protected void newResponseAsync( final Task task, final ClusterStatsRequest request, + final SubscribableListener additionalStatsListener, final List responses, final List failures, final ActionListener listener @@ -128,41 +151,19 @@ protected void newResponseAsync( + "the cluster state that are too slow for a transport thread" ); assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); - assert task instanceof CancellableTask; - final CancellableTask cancellableTask = (CancellableTask) task; - final ClusterState state = clusterService.state(); - final Metadata metadata = state.metadata(); - final ClusterSnapshotStats clusterSnapshotStats = ClusterSnapshotStats.of( - state, - clusterService.threadPool().absoluteTimeInMillis() - ); - - final ListenableFuture mappingStatsStep = new ListenableFuture<>(); - final ListenableFuture analysisStatsStep = new ListenableFuture<>(); - mappingStatsCache.get(metadata, cancellableTask::isCancelled, mappingStatsStep); - analysisStatsCache.get(metadata, cancellableTask::isCancelled, analysisStatsStep); - mappingStatsStep.addListener( - listener.delegateFailureAndWrap( - (l, mappingStats) -> analysisStatsStep.addListener( - l.delegateFailureAndWrap( - (ll, analysisStats) -> ActionListener.completeWith( - ll, - () -> new ClusterStatsResponse( - System.currentTimeMillis(), - metadata.clusterUUID(), - clusterService.getClusterName(), - responses, - failures, - mappingStats, - analysisStats, - VersionStats.of(metadata, responses), - clusterSnapshotStats - ) - ) - ) - ) + additionalStatsListener.andThenApply( + additionalStats -> new ClusterStatsResponse( + System.currentTimeMillis(), + additionalStats.clusterUUID(), + clusterService.getClusterName(), + responses, + failures, + additionalStats.mappingStats(), + additionalStats.analysisStats(), + VersionStats.of(clusterService.state().metadata(), responses), + additionalStats.clusterSnapshotStats() ) - ); + ).addListener(listener); } @Override @@ -316,4 +317,67 @@ protected boolean isFresh(Long currentKey, Long newKey) { return newKey <= currentKey; } } + + public static final class AdditionalStats { + + private String clusterUUID; + private MappingStats mappingStats; + private AnalysisStats analysisStats; + private ClusterSnapshotStats clusterSnapshotStats; + + static void compute( + CancellableTask task, + Executor executor, + ClusterService clusterService, + MetadataStatsCache mappingStatsCache, + MetadataStatsCache analysisStatsCache, + ActionListener listener + ) { + executor.execute(ActionRunnable.wrap(listener, l -> { + task.ensureNotCancelled(); + final var result = new AdditionalStats(); + result.compute( + clusterService.state(), + mappingStatsCache, + analysisStatsCache, + task::isCancelled, + clusterService.threadPool().absoluteTimeInMillis(), + l.map(ignored -> result) + ); + })); + } + + private void compute( + ClusterState clusterState, + MetadataStatsCache mappingStatsCache, + MetadataStatsCache analysisStatsCache, + BooleanSupplier isCancelledSupplier, + long absoluteTimeInMillis, + ActionListener listener + ) { + try (var listeners = new RefCountingListener(listener)) { + final var metadata = clusterState.metadata(); + clusterUUID = metadata.clusterUUID(); + mappingStatsCache.get(metadata, isCancelledSupplier, listeners.acquire(s -> mappingStats = s)); + analysisStatsCache.get(metadata, isCancelledSupplier, listeners.acquire(s -> analysisStats = s)); + clusterSnapshotStats = ClusterSnapshotStats.of(clusterState, absoluteTimeInMillis); + } + } + + String clusterUUID() { + return clusterUUID; + } + + MappingStats mappingStats() { + return mappingStats; + } + + AnalysisStats analysisStats() { + return analysisStats; + } + + ClusterSnapshotStats clusterSnapshotStats() { + return clusterSnapshotStats; + } + } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/dangling/find/TransportFindDanglingIndexAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/dangling/find/TransportFindDanglingIndexAction.java index 30d6cdf932fe7..a181b059b82e3 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/dangling/find/TransportFindDanglingIndexAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/dangling/find/TransportFindDanglingIndexAction.java @@ -34,7 +34,8 @@ public class TransportFindDanglingIndexAction extends TransportNodesAction< FindDanglingIndexRequest, FindDanglingIndexResponse, NodeFindDanglingIndexRequest, - NodeFindDanglingIndexResponse> { + NodeFindDanglingIndexResponse, + Void> { public static final ActionType TYPE = new ActionType<>("cluster:admin/indices/dangling/find"); diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/dangling/list/TransportListDanglingIndicesAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/dangling/list/TransportListDanglingIndicesAction.java index 70b7ff370afd7..3410e617e3ed4 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/dangling/list/TransportListDanglingIndicesAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/dangling/list/TransportListDanglingIndicesAction.java @@ -36,7 +36,8 @@ public class TransportListDanglingIndicesAction extends TransportNodesAction< ListDanglingIndicesRequest, ListDanglingIndicesResponse, NodeListDanglingIndicesRequest, - NodeListDanglingIndicesResponse> { + NodeListDanglingIndicesResponse, + Void> { public static final ActionType TYPE = new ActionType<>("cluster:admin/indices/dangling/list"); diff --git a/server/src/main/java/org/elasticsearch/action/support/nodes/TransportNodesAction.java b/server/src/main/java/org/elasticsearch/action/support/nodes/TransportNodesAction.java index 2eed3b6263c87..89b7ec01c0406 100644 --- a/server/src/main/java/org/elasticsearch/action/support/nodes/TransportNodesAction.java +++ b/server/src/main/java/org/elasticsearch/action/support/nodes/TransportNodesAction.java @@ -30,6 +30,7 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.core.CheckedConsumer; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasables; import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.tasks.CancellableTask; @@ -53,7 +54,8 @@ public abstract class TransportNodesAction< NodesRequest extends BaseNodesRequest, NodesResponse extends BaseNodesResponse, NodeRequest extends TransportRequest, - NodeResponse extends BaseNodeResponse> extends TransportAction { + NodeResponse extends BaseNodeResponse, + ActionContext> extends TransportAction { private static final Logger logger = LogManager.getLogger(TransportNodesAction.class); @@ -99,6 +101,7 @@ protected void doExecute(Task task, NodesRequest request, ActionListener, Exception>>() { + final ActionContext actionContext = createActionContext(task, request); final ArrayList responses = new ArrayList<>(concreteNodes.length); final ArrayList exceptions = new ArrayList<>(0); @@ -166,7 +169,7 @@ protected CheckedConsumer, Exception> onCompletion // ref releases all happen-before here so no need to be synchronized return l -> { try (var ignored = Releasables.wrap(Iterators.map(responses.iterator(), r -> r::decRef))) { - newResponseAsync(task, request, responses, exceptions, l); + newResponseAsync(task, request, actionContext, responses, exceptions, l); } }; } @@ -187,6 +190,16 @@ private Writeable.Reader nodeResponseReader(DiscoveryNode discover return in -> TransportNodesAction.this.newNodeResponse(in, discoveryNode); } + /** + * Create an (optional) {@link ActionContext}: called when starting to execute this action, and the result passed to + * {@link #newResponseAsync} on completion. NB runs on the transport worker thread, must not do anything expensive without dispatching + * to a different executor. + */ + @Nullable + protected ActionContext createActionContext(Task task, NodesRequest request) { + return null; + } + /** * Create a new {@link NodesResponse}. This method is executed on {@link #finalExecutor}. * @@ -211,6 +224,7 @@ private Writeable.Reader nodeResponseReader(DiscoveryNode discover protected void newResponseAsync( Task task, NodesRequest request, + ActionContext actionContext, List responses, List failures, ActionListener listener diff --git a/server/src/main/java/org/elasticsearch/gateway/TransportNodesListGatewayStartedShards.java b/server/src/main/java/org/elasticsearch/gateway/TransportNodesListGatewayStartedShards.java index d77635af8f451..b7ddb9226ddb5 100644 --- a/server/src/main/java/org/elasticsearch/gateway/TransportNodesListGatewayStartedShards.java +++ b/server/src/main/java/org/elasticsearch/gateway/TransportNodesListGatewayStartedShards.java @@ -58,7 +58,8 @@ public class TransportNodesListGatewayStartedShards extends TransportNodesAction TransportNodesListGatewayStartedShards.Request, TransportNodesListGatewayStartedShards.NodesGatewayStartedShards, TransportNodesListGatewayStartedShards.NodeRequest, - TransportNodesListGatewayStartedShards.NodeGatewayStartedShards> { + TransportNodesListGatewayStartedShards.NodeGatewayStartedShards, + Void> { private static final Logger logger = LogManager.getLogger(TransportNodesListGatewayStartedShards.class); diff --git a/server/src/main/java/org/elasticsearch/health/stats/HealthApiStatsTransportAction.java b/server/src/main/java/org/elasticsearch/health/stats/HealthApiStatsTransportAction.java index a0325b4c467ec..4c2e809f48be9 100644 --- a/server/src/main/java/org/elasticsearch/health/stats/HealthApiStatsTransportAction.java +++ b/server/src/main/java/org/elasticsearch/health/stats/HealthApiStatsTransportAction.java @@ -29,7 +29,8 @@ public class HealthApiStatsTransportAction extends TransportNodesAction< HealthApiStatsAction.Request, HealthApiStatsAction.Response, HealthApiStatsAction.Request.Node, - HealthApiStatsAction.Response.Node> { + HealthApiStatsAction.Response.Node, + Void> { private final HealthApiStats healthApiStats; diff --git a/server/src/main/java/org/elasticsearch/indices/store/TransportNodesListShardStoreMetadata.java b/server/src/main/java/org/elasticsearch/indices/store/TransportNodesListShardStoreMetadata.java index 10d6c32585a9c..dc6b14cec3cac 100644 --- a/server/src/main/java/org/elasticsearch/indices/store/TransportNodesListShardStoreMetadata.java +++ b/server/src/main/java/org/elasticsearch/indices/store/TransportNodesListShardStoreMetadata.java @@ -60,7 +60,8 @@ public class TransportNodesListShardStoreMetadata extends TransportNodesAction< TransportNodesListShardStoreMetadata.Request, TransportNodesListShardStoreMetadata.NodesStoreFilesMetadata, TransportNodesListShardStoreMetadata.NodeRequest, - TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata> { + TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata, + Void> { private static final Logger logger = LogManager.getLogger(TransportNodesListShardStoreMetadata.class); diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/tasks/TaskManagerTestCase.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/tasks/TaskManagerTestCase.java index 25c7ac1d39d09..a61360dab7e36 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/tasks/TaskManagerTestCase.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/tasks/TaskManagerTestCase.java @@ -138,7 +138,7 @@ public int failureCount() { * Simulates node-based task that can be used to block node tasks so they are guaranteed to be registered by task manager */ abstract class AbstractTestNodesAction, NodeRequest extends TransportRequest> - extends TransportNodesAction { + extends TransportNodesAction { AbstractTestNodesAction( String actionName, diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/tasks/TestTaskPlugin.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/tasks/TestTaskPlugin.java index 4c0ac871d2e31..0591974437951 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/tasks/TestTaskPlugin.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/tasks/TestTaskPlugin.java @@ -243,7 +243,7 @@ public Task createTask(long id, String type, String action, TaskId parentTaskId, } } - public static class TransportTestTaskAction extends TransportNodesAction { + public static class TransportTestTaskAction extends TransportNodesAction { @Inject public TransportTestTaskAction(ThreadPool threadPool, ClusterService clusterService, TransportService transportService) { diff --git a/server/src/test/java/org/elasticsearch/action/support/nodes/TransportNodesActionTests.java b/server/src/test/java/org/elasticsearch/action/support/nodes/TransportNodesActionTests.java index ed347643f0e7d..4a3b060c3e1c0 100644 --- a/server/src/test/java/org/elasticsearch/action/support/nodes/TransportNodesActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/support/nodes/TransportNodesActionTests.java @@ -78,7 +78,8 @@ public class TransportNodesActionTests extends ESTestCase { private TransportService transportService; public void testRequestIsSentToEachNode() { - TransportNodesAction action = getTestTransportNodesAction(); + TransportNodesAction action = + getTestTransportNodesAction(); TestNodesRequest request = new TestNodesRequest(); action.execute(null, request, new PlainActionFuture<>()); Map> capturedRequests = transport.getCapturedRequestsByTargetNodeAndClear(); @@ -89,7 +90,8 @@ public void testRequestIsSentToEachNode() { } public void testNodesSelectors() { - TransportNodesAction action = getTestTransportNodesAction(); + TransportNodesAction action = + getTestTransportNodesAction(); int numSelectors = randomIntBetween(1, 5); Set nodeSelectors = new HashSet<>(); for (int i = 0; i < numSelectors; i++) { @@ -109,7 +111,7 @@ public void testNodesSelectors() { } public void testCustomResolving() { - TransportNodesAction action = + TransportNodesAction action = getDataNodesOnlyTransportNodesAction(transportService); TestNodesRequest request = new TestNodesRequest(randomBoolean() ? null : generateRandomStringArray(10, 5, false, true)); action.execute(null, request, new PlainActionFuture<>()); @@ -257,6 +259,63 @@ public void testResponsesReleasedOnCancellation() { assertTrue(cancellableTask.isCancelled()); // keep task alive } + public void testActionContextReleasedOnCancellation() { + final var reachabilityChecker = new ReachabilityChecker(); + final TransportNodesAction action = + new TransportNodesAction<>( + "indices:admin/test", + clusterService, + transportService, + new ActionFilters(Collections.emptySet()), + TestNodeRequest::new, + THREAD_POOL.executor(ThreadPool.Names.GENERIC) + ) { + @Override + protected TestNodesResponse newResponse( + TestNodesRequest request, + List testNodeResponses, + List failures + ) { + return fail(null, "should not be called"); + } + + @Override + protected TestNodeRequest newNodeRequest(TestNodesRequest request) { + return new TestNodeRequest(); + } + + @Override + protected TestNodeResponse newNodeResponse(StreamInput in, DiscoveryNode node) throws IOException { + return new TestNodeResponse(in); + } + + @Override + protected TestNodeResponse nodeOperation(TestNodeRequest request, Task task) { + return new TestNodeResponse(); + } + + @Override + protected Object createActionContext(Task task, TestNodesRequest request) { + return reachabilityChecker.register(new Object()); + } + }; + + final CancellableTask cancellableTask = new CancellableTask(randomLong(), "transport", "action", "", null, emptyMap()); + final PlainActionFuture listener = new PlainActionFuture<>(); + action.execute(cancellableTask, new TestNodesRequest(), listener); + + reachabilityChecker.checkReachable(); + TaskCancelHelper.cancel(cancellableTask, "simulated"); + reachabilityChecker.ensureUnreachable(); + + for (CapturingTransport.CapturedRequest capturedRequest : transport.getCapturedRequestsAndClear()) { + transport.handleLocalError(capturedRequest.requestId(), new ElasticsearchException("simulated")); + } + + expectThrows(TaskCancelledException.class, () -> listener.actionGet(10, TimeUnit.SECONDS)); + assertTrue(cancellableTask.isCancelled()); // keep task alive + } + @BeforeClass public static void startThreadPool() { THREAD_POOL = new TestThreadPool(TransportNodesActionTests.class.getSimpleName()); @@ -341,7 +400,8 @@ private static class TestTransportNodesAction extends TransportNodesAction< TestNodesRequest, TestNodesResponse, TestNodeRequest, - TestNodeResponse> { + TestNodeResponse, + Void> { TestTransportNodesAction( ClusterService clusterService, diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/action/TransportAnalyticsStatsAction.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/action/TransportAnalyticsStatsAction.java index d20ef5abe2388..830ab3528dcca 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/action/TransportAnalyticsStatsAction.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/action/TransportAnalyticsStatsAction.java @@ -26,7 +26,8 @@ public class TransportAnalyticsStatsAction extends TransportNodesAction< AnalyticsStatsAction.Request, AnalyticsStatsAction.Response, AnalyticsStatsAction.NodeRequest, - AnalyticsStatsAction.NodeResponse> { + AnalyticsStatsAction.NodeResponse, + Void> { private final AnalyticsUsage usage; @Inject diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/datatiers/NodesDataTiersUsageTransportAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/datatiers/NodesDataTiersUsageTransportAction.java index eb35ba651df20..6a544f6377728 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/datatiers/NodesDataTiersUsageTransportAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/datatiers/NodesDataTiersUsageTransportAction.java @@ -54,7 +54,8 @@ public class NodesDataTiersUsageTransportAction extends TransportNodesAction< NodesDataTiersUsageTransportAction.NodesRequest, NodesDataTiersUsageTransportAction.NodesResponse, NodesDataTiersUsageTransportAction.NodeRequest, - NodeDataTiersUsage> { + NodeDataTiersUsage, + Void> { public static final ActionType TYPE = new ActionType<>("cluster:monitor/nodes/data_tier_usage"); public static final NodeFeature LOCALLY_PRECALCULATED_STATS_FEATURE = new NodeFeature("usage.data_tiers.precalculate_stats"); diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/TransportNodeDeprecationCheckAction.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/TransportNodeDeprecationCheckAction.java index 92b16b6a3430e..745f5e7ae8959 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/TransportNodeDeprecationCheckAction.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/TransportNodeDeprecationCheckAction.java @@ -43,7 +43,8 @@ public class TransportNodeDeprecationCheckAction extends TransportNodesAction< NodesDeprecationCheckRequest, NodesDeprecationCheckResponse, NodesDeprecationCheckAction.NodeRequest, - NodesDeprecationCheckAction.NodeResponse> { + NodesDeprecationCheckAction.NodeResponse, + Void> { private final Settings settings; private final XPackLicenseState licenseState; diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/logging/TransportDeprecationCacheResetAction.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/logging/TransportDeprecationCacheResetAction.java index 01d9089a153fd..1a82574752fe9 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/logging/TransportDeprecationCacheResetAction.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/logging/TransportDeprecationCacheResetAction.java @@ -28,7 +28,8 @@ public class TransportDeprecationCacheResetAction extends TransportNodesAction< DeprecationCacheResetAction.Request, DeprecationCacheResetAction.Response, DeprecationCacheResetAction.NodeRequest, - DeprecationCacheResetAction.NodeResponse> { + DeprecationCacheResetAction.NodeResponse, + Void> { private static final Logger logger = LogManager.getLogger(TransportDeprecationCacheResetAction.class); diff --git a/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/action/EnrichCoordinatorStatsAction.java b/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/action/EnrichCoordinatorStatsAction.java index 1213c439c628c..808acee58df3c 100644 --- a/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/action/EnrichCoordinatorStatsAction.java +++ b/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/action/EnrichCoordinatorStatsAction.java @@ -111,7 +111,7 @@ public void writeTo(StreamOutput out) throws IOException { } } - public static class TransportAction extends TransportNodesAction { + public static class TransportAction extends TransportNodesAction { private final EnrichCache enrichCache; private final EnrichCoordinatorProxyAction.Coordinator coordinator; diff --git a/x-pack/plugin/eql/src/main/java/org/elasticsearch/xpack/eql/plugin/TransportEqlStatsAction.java b/x-pack/plugin/eql/src/main/java/org/elasticsearch/xpack/eql/plugin/TransportEqlStatsAction.java index 76f3d05cdb9dc..5f1fcbbe66595 100644 --- a/x-pack/plugin/eql/src/main/java/org/elasticsearch/xpack/eql/plugin/TransportEqlStatsAction.java +++ b/x-pack/plugin/eql/src/main/java/org/elasticsearch/xpack/eql/plugin/TransportEqlStatsAction.java @@ -28,7 +28,8 @@ public class TransportEqlStatsAction extends TransportNodesAction< EqlStatsRequest, EqlStatsResponse, EqlStatsRequest.NodeStatsRequest, - EqlStatsResponse.NodeStatsResponse> { + EqlStatsResponse.NodeStatsResponse, + Void> { // the plan executor holds the metrics private final PlanExecutor planExecutor; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlStatsAction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlStatsAction.java index 7ed027436bbcd..985dcf118ac54 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlStatsAction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlStatsAction.java @@ -31,7 +31,8 @@ public class TransportEsqlStatsAction extends TransportNodesAction< EsqlStatsRequest, EsqlStatsResponse, EsqlStatsRequest.NodeStatsRequest, - EsqlStatsResponse.NodeStatsResponse> { + EsqlStatsResponse.NodeStatsResponse, + Void> { static final NodeFeature ESQL_STATS_FEATURE = new NodeFeature("esql.stats_node"); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportGetInferenceDiagnosticsAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportGetInferenceDiagnosticsAction.java index 88689035fbd8a..cdd322cfe74f3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportGetInferenceDiagnosticsAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportGetInferenceDiagnosticsAction.java @@ -28,7 +28,8 @@ public class TransportGetInferenceDiagnosticsAction extends TransportNodesAction GetInferenceDiagnosticsAction.Request, GetInferenceDiagnosticsAction.Response, GetInferenceDiagnosticsAction.NodeRequest, - GetInferenceDiagnosticsAction.NodeResponse> { + GetInferenceDiagnosticsAction.NodeResponse, + Void> { private final HttpClientManager httpClientManager; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportTrainedModelCacheInfoAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportTrainedModelCacheInfoAction.java index 5b76b46f66c6c..af7e1869420b6 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportTrainedModelCacheInfoAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportTrainedModelCacheInfoAction.java @@ -35,7 +35,8 @@ public class TransportTrainedModelCacheInfoAction extends TransportNodesAction< TrainedModelCacheInfoAction.Request, TrainedModelCacheInfoAction.Response, TransportTrainedModelCacheInfoAction.NodeModelCacheInfoRequest, - CacheInfo> { + CacheInfo, + Void> { private final ModelLoadingService modelLoadingService; diff --git a/x-pack/plugin/repositories-metering-api/src/main/java/org/elasticsearch/xpack/repositories/metering/action/TransportClearRepositoriesStatsArchiveAction.java b/x-pack/plugin/repositories-metering-api/src/main/java/org/elasticsearch/xpack/repositories/metering/action/TransportClearRepositoriesStatsArchiveAction.java index a7ffc096f6ffb..f138449559d25 100644 --- a/x-pack/plugin/repositories-metering-api/src/main/java/org/elasticsearch/xpack/repositories/metering/action/TransportClearRepositoriesStatsArchiveAction.java +++ b/x-pack/plugin/repositories-metering-api/src/main/java/org/elasticsearch/xpack/repositories/metering/action/TransportClearRepositoriesStatsArchiveAction.java @@ -29,7 +29,8 @@ public final class TransportClearRepositoriesStatsArchiveAction extends Transpor ClearRepositoriesMeteringArchiveRequest, RepositoriesMeteringResponse, TransportClearRepositoriesStatsArchiveAction.ClearRepositoriesStatsArchiveNodeRequest, - RepositoriesNodeMeteringResponse> { + RepositoriesNodeMeteringResponse, + Void> { private final RepositoriesService repositoriesService; diff --git a/x-pack/plugin/repositories-metering-api/src/main/java/org/elasticsearch/xpack/repositories/metering/action/TransportRepositoriesStatsAction.java b/x-pack/plugin/repositories-metering-api/src/main/java/org/elasticsearch/xpack/repositories/metering/action/TransportRepositoriesStatsAction.java index cb7d274814483..76ad89a9dfea9 100644 --- a/x-pack/plugin/repositories-metering-api/src/main/java/org/elasticsearch/xpack/repositories/metering/action/TransportRepositoriesStatsAction.java +++ b/x-pack/plugin/repositories-metering-api/src/main/java/org/elasticsearch/xpack/repositories/metering/action/TransportRepositoriesStatsAction.java @@ -27,7 +27,8 @@ public final class TransportRepositoriesStatsAction extends TransportNodesAction RepositoriesMeteringRequest, RepositoriesMeteringResponse, TransportRepositoriesStatsAction.RepositoriesNodeStatsRequest, - RepositoriesNodeMeteringResponse> { + RepositoriesNodeMeteringResponse, + Void> { private final RepositoriesService repositoriesService; diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/TransportSearchableSnapshotCacheStoresAction.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/TransportSearchableSnapshotCacheStoresAction.java index 446f0f433fe33..67cb5cddd9881 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/TransportSearchableSnapshotCacheStoresAction.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/TransportSearchableSnapshotCacheStoresAction.java @@ -39,7 +39,8 @@ public class TransportSearchableSnapshotCacheStoresAction extends TransportNodes TransportSearchableSnapshotCacheStoresAction.Request, TransportSearchableSnapshotCacheStoresAction.NodesCacheFilesMetadata, TransportSearchableSnapshotCacheStoresAction.NodeRequest, - TransportSearchableSnapshotCacheStoresAction.NodeCacheFilesMetadata> { + TransportSearchableSnapshotCacheStoresAction.NodeCacheFilesMetadata, + Void> { public static final String ACTION_NAME = "internal:admin/xpack/searchable_snapshots/cache/store"; diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/TransportSearchableSnapshotsNodeCachesStatsAction.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/TransportSearchableSnapshotsNodeCachesStatsAction.java index c28948b4101e2..b414ff6daf713 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/TransportSearchableSnapshotsNodeCachesStatsAction.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/action/cache/TransportSearchableSnapshotsNodeCachesStatsAction.java @@ -47,7 +47,8 @@ public class TransportSearchableSnapshotsNodeCachesStatsAction extends Transport TransportSearchableSnapshotsNodeCachesStatsAction.NodesRequest, TransportSearchableSnapshotsNodeCachesStatsAction.NodesCachesStatsResponse, TransportSearchableSnapshotsNodeCachesStatsAction.NodeRequest, - TransportSearchableSnapshotsNodeCachesStatsAction.NodeCachesStatsResponse> { + TransportSearchableSnapshotsNodeCachesStatsAction.NodeCachesStatsResponse, + Void> { public static final String ACTION_NAME = "cluster:admin/xpack/searchable_snapshots/cache/stats"; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/TransportClearSecurityCacheAction.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/TransportClearSecurityCacheAction.java index 56965274c6fab..ac06cf5f1eb67 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/TransportClearSecurityCacheAction.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/TransportClearSecurityCacheAction.java @@ -33,7 +33,8 @@ public class TransportClearSecurityCacheAction extends TransportNodesAction< ClearSecurityCacheRequest, ClearSecurityCacheResponse, ClearSecurityCacheRequest.Node, - ClearSecurityCacheResponse.Node> { + ClearSecurityCacheResponse.Node, + Void> { private final CacheInvalidatorRegistry cacheInvalidatorRegistry; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/privilege/TransportClearPrivilegesCacheAction.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/privilege/TransportClearPrivilegesCacheAction.java index 852144d1c2777..14868dda9f046 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/privilege/TransportClearPrivilegesCacheAction.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/privilege/TransportClearPrivilegesCacheAction.java @@ -30,7 +30,8 @@ public class TransportClearPrivilegesCacheAction extends TransportNodesAction< ClearPrivilegesCacheRequest, ClearPrivilegesCacheResponse, ClearPrivilegesCacheRequest.Node, - ClearPrivilegesCacheResponse.Node> { + ClearPrivilegesCacheResponse.Node, + Void> { private final CompositeRolesStore rolesStore; private final CacheInvalidatorRegistry cacheInvalidatorRegistry; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/realm/TransportClearRealmCacheAction.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/realm/TransportClearRealmCacheAction.java index 4d574c6b6c0ac..23c4e312e2f30 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/realm/TransportClearRealmCacheAction.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/realm/TransportClearRealmCacheAction.java @@ -32,7 +32,8 @@ public class TransportClearRealmCacheAction extends TransportNodesAction< ClearRealmCacheRequest, ClearRealmCacheResponse, ClearRealmCacheRequest.Node, - ClearRealmCacheResponse.Node> { + ClearRealmCacheResponse.Node, + Void> { private final Realms realms; private final AuthenticationService authenticationService; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/role/TransportClearRolesCacheAction.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/role/TransportClearRolesCacheAction.java index 82e62187f7f47..412b0d0b70da1 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/role/TransportClearRolesCacheAction.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/role/TransportClearRolesCacheAction.java @@ -28,7 +28,8 @@ public class TransportClearRolesCacheAction extends TransportNodesAction< ClearRolesCacheRequest, ClearRolesCacheResponse, ClearRolesCacheRequest.Node, - ClearRolesCacheResponse.Node> { + ClearRolesCacheResponse.Node, + Void> { private final CompositeRolesStore rolesStore; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/service/TransportGetServiceAccountNodesCredentialsAction.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/service/TransportGetServiceAccountNodesCredentialsAction.java index 228c606dd1e35..82ec407189595 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/service/TransportGetServiceAccountNodesCredentialsAction.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/service/TransportGetServiceAccountNodesCredentialsAction.java @@ -35,7 +35,8 @@ public class TransportGetServiceAccountNodesCredentialsAction extends TransportN GetServiceAccountCredentialsNodesRequest, GetServiceAccountCredentialsNodesResponse, GetServiceAccountCredentialsNodesRequest.Node, - GetServiceAccountCredentialsNodesResponse.Node> { + GetServiceAccountCredentialsNodesResponse.Node, + Void> { private final FileServiceAccountTokenStore fileServiceAccountTokenStore; diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/action/SpatialStatsTransportAction.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/action/SpatialStatsTransportAction.java index f36ee616996ea..526b2c85c84f7 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/action/SpatialStatsTransportAction.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/action/SpatialStatsTransportAction.java @@ -26,7 +26,8 @@ public class SpatialStatsTransportAction extends TransportNodesAction< SpatialStatsAction.Request, SpatialStatsAction.Response, SpatialStatsAction.NodeRequest, - SpatialStatsAction.NodeResponse> { + SpatialStatsAction.NodeResponse, + Void> { private final SpatialUsage usage; @Inject diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/plugin/TransportSqlStatsAction.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/plugin/TransportSqlStatsAction.java index c334c5779050a..337abf47ca0e3 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/plugin/TransportSqlStatsAction.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/plugin/TransportSqlStatsAction.java @@ -28,7 +28,8 @@ public class TransportSqlStatsAction extends TransportNodesAction< SqlStatsRequest, SqlStatsResponse, SqlStatsRequest.NodeStatsRequest, - SqlStatsResponse.NodeStatsResponse> { + SqlStatsResponse.NodeStatsResponse, + Void> { // the plan executor holds the metrics private final PlanExecutor planExecutor; diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportGetTransformNodeStatsAction.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportGetTransformNodeStatsAction.java index 83e7f55df04b0..3fd97ee49e1d0 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportGetTransformNodeStatsAction.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportGetTransformNodeStatsAction.java @@ -35,7 +35,8 @@ public class TransportGetTransformNodeStatsAction extends TransportNodesAction< NodesStatsRequest, NodesStatsResponse, NodeStatsRequest, - NodeStatsResponse> { + NodeStatsResponse, + Void> { private final TransportService transportService; private final TransformScheduler scheduler; diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/transport/actions/TransportWatcherStatsAction.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/transport/actions/TransportWatcherStatsAction.java index 220415cf9d094..0c79bba50722a 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/transport/actions/TransportWatcherStatsAction.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/transport/actions/TransportWatcherStatsAction.java @@ -36,7 +36,8 @@ public class TransportWatcherStatsAction extends TransportNodesAction< WatcherStatsRequest, WatcherStatsResponse, WatcherStatsRequest.Node, - WatcherStatsResponse.Node> { + WatcherStatsResponse.Node, + Void> { private final ExecutionService executionService; private final TriggerService triggerService; From 68c2efc7f68cfe19f37453e089b622cb23b402c0 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 24 Sep 2024 09:09:43 +0100 Subject: [PATCH 19/58] Detect long-running outbound tasks on network threads (#113250) (#113375) Extends the mechanism introduced in #109204 to cover slow-running outbound tasks too. Closes #108710 Closes ES-8625 --- .../netty4/Netty4HttpServerTransport.java | 12 +-- .../transport/netty4/Netty4Transport.java | 5 +- .../netty4/Netty4WriteThrottlingHandler.java | 81 ++++++++++++++----- .../Netty4WriteThrottlingHandlerTests.java | 66 +++++++++------ .../common/network/ThreadWatchdog.java | 10 +++ .../common/network/ThreadWatchdogTests.java | 38 ++++++--- 6 files changed, 148 insertions(+), 64 deletions(-) diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpServerTransport.java b/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpServerTransport.java index 5ed3d81392951..b971a52b7afb6 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpServerTransport.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpServerTransport.java @@ -332,8 +332,12 @@ protected void initChannel(Channel ch) throws Exception { if (tlsConfig.isTLSEnabled()) { ch.pipeline().addLast("ssl", new SslHandler(tlsConfig.createServerSSLEngine())); } + final var threadWatchdogActivityTracker = transport.threadWatchdog.getActivityTrackerForCurrentThread(); ch.pipeline() - .addLast("chunked_writer", new Netty4WriteThrottlingHandler(transport.getThreadPool().getThreadContext())) + .addLast( + "chunked_writer", + new Netty4WriteThrottlingHandler(transport.getThreadPool().getThreadContext(), threadWatchdogActivityTracker) + ) .addLast("byte_buf_sizer", NettyByteBufSizer.INSTANCE); if (transport.readTimeoutMillis > 0) { ch.pipeline().addLast("read_timeout", new ReadTimeoutHandler(transport.readTimeoutMillis, TimeUnit.MILLISECONDS)); @@ -409,11 +413,7 @@ protected Result beginEncode(HttpResponse httpResponse, String acceptEncoding) t ch.pipeline() .addLast( "pipelining", - new Netty4HttpPipeliningHandler( - transport.pipeliningMaxEvents, - transport, - transport.threadWatchdog.getActivityTrackerForCurrentThread() - ) + new Netty4HttpPipeliningHandler(transport.pipeliningMaxEvents, transport, threadWatchdogActivityTracker) ); transport.serverAcceptedChannel(nettyHttpChannel); } diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java index b99c76e7b0615..d8b02a0e9a0df 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java @@ -359,7 +359,10 @@ private void setupPipeline(Channel ch, boolean isRemoteClusterServerChannel) { if (NetworkTraceFlag.TRACE_ENABLED) { pipeline.addLast("logging", ESLoggingHandler.INSTANCE); } - pipeline.addLast("chunked_writer", new Netty4WriteThrottlingHandler(getThreadPool().getThreadContext())); + pipeline.addLast( + "chunked_writer", + new Netty4WriteThrottlingHandler(getThreadPool().getThreadContext(), threadWatchdog.getActivityTrackerForCurrentThread()) + ); pipeline.addLast( "dispatcher", new Netty4MessageInboundHandler( diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4WriteThrottlingHandler.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4WriteThrottlingHandler.java index 15011957040af..738da83817cb8 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4WriteThrottlingHandler.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4WriteThrottlingHandler.java @@ -23,6 +23,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.network.ThreadWatchdog; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.transport.Transports; @@ -42,31 +43,44 @@ public final class Netty4WriteThrottlingHandler extends ChannelDuplexHandler { private final Queue queuedWrites = new LinkedList<>(); private final ThreadContext threadContext; + private final ThreadWatchdog.ActivityTracker threadWatchdogActivityTracker; private WriteOperation currentWrite; - public Netty4WriteThrottlingHandler(ThreadContext threadContext) { + public Netty4WriteThrottlingHandler(ThreadContext threadContext, ThreadWatchdog.ActivityTracker threadWatchdogActivityTracker) { this.threadContext = threadContext; + this.threadWatchdogActivityTracker = threadWatchdogActivityTracker; } @Override public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) throws IOException { - if (msg instanceof BytesReference reference) { - if (reference.hasArray()) { - writeSingleByteBuf(ctx, Unpooled.wrappedBuffer(reference.array(), reference.arrayOffset(), reference.length()), promise); - } else { - BytesRefIterator iter = reference.iterator(); - final PromiseCombiner combiner = new PromiseCombiner(ctx.executor()); - BytesRef next; - while ((next = iter.next()) != null) { - final ChannelPromise chunkPromise = ctx.newPromise(); - combiner.add((Future) chunkPromise); - writeSingleByteBuf(ctx, Unpooled.wrappedBuffer(next.bytes, next.offset, next.length), chunkPromise); + final boolean startedActivity = threadWatchdogActivityTracker.maybeStartActivity(); + try { + if (msg instanceof BytesReference reference) { + if (reference.hasArray()) { + writeSingleByteBuf( + ctx, + Unpooled.wrappedBuffer(reference.array(), reference.arrayOffset(), reference.length()), + promise + ); + } else { + BytesRefIterator iter = reference.iterator(); + final PromiseCombiner combiner = new PromiseCombiner(ctx.executor()); + BytesRef next; + while ((next = iter.next()) != null) { + final ChannelPromise chunkPromise = ctx.newPromise(); + combiner.add((Future) chunkPromise); + writeSingleByteBuf(ctx, Unpooled.wrappedBuffer(next.bytes, next.offset, next.length), chunkPromise); + } + combiner.finish(promise); } - combiner.finish(promise); + } else { + assert msg instanceof ByteBuf; + writeSingleByteBuf(ctx, (ByteBuf) msg, promise); + } + } finally { + if (startedActivity) { + threadWatchdogActivityTracker.stopActivity(); } - } else { - assert msg instanceof ByteBuf; - writeSingleByteBuf(ctx, (ByteBuf) msg, promise); } } @@ -116,22 +130,45 @@ private void queueWrite(ByteBuf buf, ChannelPromise promise) { @Override public void channelWritabilityChanged(ChannelHandlerContext ctx) { - if (ctx.channel().isWritable()) { - doFlush(ctx); + final boolean startedActivity = threadWatchdogActivityTracker.maybeStartActivity(); + try { + if (ctx.channel().isWritable()) { + doFlush(ctx); + } + ctx.fireChannelWritabilityChanged(); + } finally { + if (startedActivity) { + threadWatchdogActivityTracker.stopActivity(); + } } - ctx.fireChannelWritabilityChanged(); } @Override public void flush(ChannelHandlerContext ctx) { - if (doFlush(ctx) == false) { - ctx.flush(); + final boolean startedActivity = threadWatchdogActivityTracker.maybeStartActivity(); + try { + if (doFlush(ctx) == false) { + ctx.flush(); + } + } finally { + if (startedActivity) { + threadWatchdogActivityTracker.stopActivity(); + } } } @Override public void channelInactive(ChannelHandlerContext ctx) throws Exception { - doFlush(ctx); + final boolean startedActivity = threadWatchdogActivityTracker.maybeStartActivity(); + try { + doFlush(ctx); + } finally { + if (startedActivity) { + threadWatchdogActivityTracker.stopActivity(); + } + } + + // super.channelInactive() can trigger reads which are tracked separately (and are not re-entrant) so no activity tracking here super.channelInactive(ctx); } diff --git a/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/Netty4WriteThrottlingHandlerTests.java b/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/Netty4WriteThrottlingHandlerTests.java index cf1fcbe88ea95..d87889c6a2416 100644 --- a/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/Netty4WriteThrottlingHandlerTests.java +++ b/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/Netty4WriteThrottlingHandlerTests.java @@ -18,43 +18,52 @@ import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.CompositeBytesReference; +import org.elasticsearch.common.network.ThreadWatchdog; +import org.elasticsearch.common.network.ThreadWatchdogHelper; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.transport.Transports; import org.junit.After; import org.junit.Before; import java.util.List; import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.ExecutionException; +import static org.hamcrest.Matchers.anyOf; +import static org.hamcrest.Matchers.emptyIterable; +import static org.hamcrest.Matchers.hasItem; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.hamcrest.Matchers.oneOf; +import static org.hamcrest.Matchers.startsWith; public class Netty4WriteThrottlingHandlerTests extends ESTestCase { - private SharedGroupFactory.SharedGroup transportGroup; + private ThreadWatchdog threadWatchdog = new ThreadWatchdog(); @Before - public void createGroup() { - final SharedGroupFactory sharedGroupFactory = new SharedGroupFactory(Settings.EMPTY); - transportGroup = sharedGroupFactory.getTransportGroup(); + public void setFakeThreadName() { + // These tests interact with EmbeddedChannel instances directly on the test thread, so we rename it temporarily to satisfy checks + // that we're running on a transport thread + Thread.currentThread().setName(Transports.TEST_MOCK_TRANSPORT_THREAD_PREFIX + Thread.currentThread().getName()); } @After - public void stopGroup() { - transportGroup.shutdown(); + public void resetThreadName() { + final var threadName = Thread.currentThread().getName(); + assertThat(threadName, startsWith(Transports.TEST_MOCK_TRANSPORT_THREAD_PREFIX)); + Thread.currentThread().setName(threadName.substring(Transports.TEST_MOCK_TRANSPORT_THREAD_PREFIX.length())); } - public void testThrottlesLargeMessage() throws ExecutionException, InterruptedException { + public void testThrottlesLargeMessage() { final List seen = new CopyOnWriteArrayList<>(); final CapturingHandler capturingHandler = new CapturingHandler(seen); final EmbeddedChannel embeddedChannel = new EmbeddedChannel( capturingHandler, - new Netty4WriteThrottlingHandler(new ThreadContext(Settings.EMPTY)) + new Netty4WriteThrottlingHandler(new ThreadContext(Settings.EMPTY), threadWatchdog.getActivityTrackerForCurrentThread()) ); // we assume that the channel outbound buffer is smaller than Netty4WriteThrottlingHandler.MAX_BYTES_PER_WRITE final int writeableBytes = Math.toIntExact(embeddedChannel.bytesBeforeUnwritable()); @@ -66,11 +75,11 @@ public void testThrottlesLargeMessage() throws ExecutionException, InterruptedEx ); final Object message = wrapAsNettyOrEsBuffer(messageBytes); final ChannelPromise promise = embeddedChannel.newPromise(); - transportGroup.getLowLevelGroup().submit(() -> embeddedChannel.write(message, promise)).get(); + embeddedChannel.write(message, promise); assertThat(seen, hasSize(1)); assertSliceEquals(seen.get(0), message, 0, Netty4WriteThrottlingHandler.MAX_BYTES_PER_WRITE); assertFalse(promise.isDone()); - transportGroup.getLowLevelGroup().submit(embeddedChannel::flush).get(); + embeddedChannel.flush(); assertTrue(promise.isDone()); assertThat(seen, hasSize(fullSizeChunks + (extraChunkSize == 0 ? 0 : 1))); assertTrue(capturingHandler.didWriteAfterThrottled); @@ -84,12 +93,12 @@ public void testThrottlesLargeMessage() throws ExecutionException, InterruptedEx } } - public void testThrottleLargeCompositeMessage() throws ExecutionException, InterruptedException { + public void testThrottleLargeCompositeMessage() { final List seen = new CopyOnWriteArrayList<>(); final CapturingHandler capturingHandler = new CapturingHandler(seen); final EmbeddedChannel embeddedChannel = new EmbeddedChannel( capturingHandler, - new Netty4WriteThrottlingHandler(new ThreadContext(Settings.EMPTY)) + new Netty4WriteThrottlingHandler(new ThreadContext(Settings.EMPTY), threadWatchdog.getActivityTrackerForCurrentThread()) ); // we assume that the channel outbound buffer is smaller than Netty4WriteThrottlingHandler.MAX_BYTES_PER_WRITE final int writeableBytes = Math.toIntExact(embeddedChannel.bytesBeforeUnwritable()); @@ -105,51 +114,51 @@ public void testThrottleLargeCompositeMessage() throws ExecutionException, Inter new BytesArray(messageBytes, splitOffset, messageBytes.length - splitOffset) ); final ChannelPromise promise = embeddedChannel.newPromise(); - transportGroup.getLowLevelGroup().submit(() -> embeddedChannel.write(message, promise)).get(); + embeddedChannel.write(message, promise); assertThat(seen, hasSize(oneOf(1, 2))); assertSliceEquals(seen.get(0), message, 0, seen.get(0).readableBytes()); assertFalse(promise.isDone()); - transportGroup.getLowLevelGroup().submit(embeddedChannel::flush).get(); + embeddedChannel.flush(); assertTrue(promise.isDone()); assertThat(seen, hasSize(oneOf(fullSizeChunks, fullSizeChunks + 1))); assertTrue(capturingHandler.didWriteAfterThrottled); assertBufferEquals(Unpooled.compositeBuffer().addComponents(true, seen), message); } - public void testPassesSmallMessageDirectly() throws ExecutionException, InterruptedException { + public void testPassesSmallMessageDirectly() { final List seen = new CopyOnWriteArrayList<>(); final CapturingHandler capturingHandler = new CapturingHandler(seen); final EmbeddedChannel embeddedChannel = new EmbeddedChannel( capturingHandler, - new Netty4WriteThrottlingHandler(new ThreadContext(Settings.EMPTY)) + new Netty4WriteThrottlingHandler(new ThreadContext(Settings.EMPTY), threadWatchdog.getActivityTrackerForCurrentThread()) ); final int writeableBytes = Math.toIntExact(embeddedChannel.bytesBeforeUnwritable()); assertThat(writeableBytes, lessThan(Netty4WriteThrottlingHandler.MAX_BYTES_PER_WRITE)); final byte[] messageBytes = randomByteArrayOfLength(randomIntBetween(0, Netty4WriteThrottlingHandler.MAX_BYTES_PER_WRITE)); final Object message = wrapAsNettyOrEsBuffer(messageBytes); final ChannelPromise promise = embeddedChannel.newPromise(); - transportGroup.getLowLevelGroup().submit(() -> embeddedChannel.write(message, promise)).get(); + embeddedChannel.write(message, promise); assertThat(seen, hasSize(1)); // first message should be passed through straight away assertBufferEquals(seen.get(0), message); assertFalse(promise.isDone()); - transportGroup.getLowLevelGroup().submit(embeddedChannel::flush).get(); + embeddedChannel.flush(); assertTrue(promise.isDone()); assertThat(seen, hasSize(1)); assertFalse(capturingHandler.didWriteAfterThrottled); } - public void testThrottlesOnUnwritable() throws ExecutionException, InterruptedException { + public void testThrottlesOnUnwritable() { final List seen = new CopyOnWriteArrayList<>(); final EmbeddedChannel embeddedChannel = new EmbeddedChannel( new CapturingHandler(seen), - new Netty4WriteThrottlingHandler(new ThreadContext(Settings.EMPTY)) + new Netty4WriteThrottlingHandler(new ThreadContext(Settings.EMPTY), threadWatchdog.getActivityTrackerForCurrentThread()) ); final int writeableBytes = Math.toIntExact(embeddedChannel.bytesBeforeUnwritable()); assertThat(writeableBytes, lessThan(Netty4WriteThrottlingHandler.MAX_BYTES_PER_WRITE)); final byte[] messageBytes = randomByteArrayOfLength(writeableBytes + randomIntBetween(0, 10)); final Object message = wrapAsNettyOrEsBuffer(messageBytes); final ChannelPromise promise = embeddedChannel.newPromise(); - transportGroup.getLowLevelGroup().submit(() -> embeddedChannel.write(message, promise)).get(); + embeddedChannel.write(message, promise); assertThat(seen, hasSize(1)); // first message should be passed through straight away assertBufferEquals(seen.get(0), message); assertFalse(promise.isDone()); @@ -157,11 +166,11 @@ public void testThrottlesOnUnwritable() throws ExecutionException, InterruptedEx randomByteArrayOfLength(randomIntBetween(0, Netty4WriteThrottlingHandler.MAX_BYTES_PER_WRITE)) ); final ChannelPromise promiseForQueued = embeddedChannel.newPromise(); - transportGroup.getLowLevelGroup().submit(() -> embeddedChannel.write(messageToQueue, promiseForQueued)).get(); + embeddedChannel.write(messageToQueue, promiseForQueued); assertThat(seen, hasSize(1)); assertFalse(promiseForQueued.isDone()); assertFalse(promise.isDone()); - transportGroup.getLowLevelGroup().submit(embeddedChannel::flush).get(); + embeddedChannel.flush(); assertTrue(promise.isDone()); assertTrue(promiseForQueued.isDone()); } @@ -191,7 +200,7 @@ private static Object wrapAsNettyOrEsBuffer(byte[] messageBytes) { return new BytesArray(messageBytes); } - private static class CapturingHandler extends ChannelOutboundHandlerAdapter { + private class CapturingHandler extends ChannelOutboundHandlerAdapter { private final List seen; private boolean wasThrottled = false; @@ -204,6 +213,13 @@ private static class CapturingHandler extends ChannelOutboundHandlerAdapter { @Override public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise) throws Exception { + assertThat( + ThreadWatchdogHelper.getStuckThreadNames(threadWatchdog), + // writes are re-entrant so we might already be considered stuck due to an earlier check + anyOf(emptyIterable(), hasItem(Thread.currentThread().getName())) + ); + assertThat(ThreadWatchdogHelper.getStuckThreadNames(threadWatchdog), hasItem(Thread.currentThread().getName())); + assertTrue("should only write to writeable channel", ctx.channel().isWritable()); assertThat(msg, instanceOf(ByteBuf.class)); final ByteBuf buf = (ByteBuf) msg; diff --git a/server/src/main/java/org/elasticsearch/common/network/ThreadWatchdog.java b/server/src/main/java/org/elasticsearch/common/network/ThreadWatchdog.java index 687a8f5940bd6..5432e7cfa2676 100644 --- a/server/src/main/java/org/elasticsearch/common/network/ThreadWatchdog.java +++ b/server/src/main/java/org/elasticsearch/common/network/ThreadWatchdog.java @@ -131,6 +131,16 @@ public void startActivity() { assert isIdle(prevValue) : "thread [" + trackedThread.getName() + "] was already active"; } + public boolean maybeStartActivity() { + assert trackedThread == Thread.currentThread() : trackedThread.getName() + " vs " + Thread.currentThread().getName(); + if (isIdle(get())) { + getAndIncrement(); + return true; + } else { + return false; + } + } + public void stopActivity() { assert trackedThread == Thread.currentThread() : trackedThread.getName() + " vs " + Thread.currentThread().getName(); final var prevValue = getAndIncrement(); diff --git a/server/src/test/java/org/elasticsearch/common/network/ThreadWatchdogTests.java b/server/src/test/java/org/elasticsearch/common/network/ThreadWatchdogTests.java index 06cfddf6c9739..f8506a007bb19 100644 --- a/server/src/test/java/org/elasticsearch/common/network/ThreadWatchdogTests.java +++ b/server/src/test/java/org/elasticsearch/common/network/ThreadWatchdogTests.java @@ -49,22 +49,22 @@ public void testSimpleActivityTracking() throws InterruptedException { // step 1: thread is idle safeAwait(barrier); - activityTracker.startActivity(); + startActivity(activityTracker); safeAwait(barrier); // step 2: thread is active safeAwait(barrier); for (int i = between(1, 10); i > 0; i--) { - activityTracker.stopActivity(); - activityTracker.startActivity(); + stopActivity(activityTracker); + startActivity(activityTracker); } safeAwait(barrier); // step 3: thread still active, but made progress safeAwait(barrier); - activityTracker.stopActivity(); + stopActivity(activityTracker); safeAwait(barrier); // step 4: thread is idle again @@ -117,11 +117,11 @@ public void testMultipleBlockedThreads() throws InterruptedException { threads[i] = new Thread(() -> { safeAwait(barrier); final var activityTracker = watchdog.getActivityTrackerForCurrentThread(); - activityTracker.startActivity(); + startActivity(activityTracker); safeAwait(barrier); // wait for main test thread safeAwait(barrier); - activityTracker.stopActivity(); + stopActivity(activityTracker); }, threadNames.get(i)); threads[i].start(); } @@ -158,14 +158,14 @@ public void testConcurrency() throws Exception { threads[i] = new Thread(() -> { final var activityTracker = watchdog.getActivityTrackerForCurrentThread(); while (keepGoing.get()) { - activityTracker.startActivity(); + startActivity(activityTracker); try { safeAcquire(semaphore); Thread.yield(); semaphore.release(); Thread.yield(); } finally { - activityTracker.stopActivity(); + stopActivity(activityTracker); warmUpLatch.countDown(); } } @@ -233,7 +233,7 @@ public void testLoggingAndScheduling() { ); } - activityTracker.startActivity(); + startActivity(activityTracker); assertAdvanceTime(deterministicTaskQueue, checkIntervalMillis); MockLog.assertThatLogger( deterministicTaskQueue::runAllRunnableTasks, @@ -262,7 +262,7 @@ public void testLoggingAndScheduling() { ) ); assertAdvanceTime(deterministicTaskQueue, Math.max(quietTimeMillis, checkIntervalMillis)); - activityTracker.stopActivity(); + stopActivity(activityTracker); MockLog.assertThatLogger( deterministicTaskQueue::runAllRunnableTasks, ThreadWatchdog.class, @@ -303,4 +303,22 @@ private static void assertAdvanceTime(DeterministicTaskQueue deterministicTaskQu deterministicTaskQueue.advanceTime(); assertEquals(expectedMillis, deterministicTaskQueue.getCurrentTimeMillis() - currentTimeMillis); } + + private static void startActivity(ThreadWatchdog.ActivityTracker activityTracker) { + if (randomBoolean()) { + activityTracker.startActivity(); + } else { + assertTrue(activityTracker.maybeStartActivity()); + } + if (randomBoolean()) { + assertFalse(activityTracker.maybeStartActivity()); + } + } + + private static void stopActivity(ThreadWatchdog.ActivityTracker activityTracker) { + if (randomBoolean()) { + assertFalse(activityTracker.maybeStartActivity()); + } + activityTracker.stopActivity(); + } } From caf94ca88a9ef2bb60639df5ab9344f17a03dcb7 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 24 Sep 2024 09:16:01 +0100 Subject: [PATCH 20/58] Make `AddIndexBlockClusterStateUpdateRequest` a record (#113349) (#113389) No need to extend `IndicesClusterStateUpdateRequest`, this thing can be completely immutable. --- ...ddIndexBlockClusterStateUpdateRequest.java | 38 ++++++++----------- .../TransportAddIndexBlockAction.java | 21 ++++++---- .../metadata/MetadataIndexStateService.java | 12 +++--- 3 files changed, 34 insertions(+), 37 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/AddIndexBlockClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/AddIndexBlockClusterStateUpdateRequest.java index beaf561bfee56..50bd3b37b4cb3 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/AddIndexBlockClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/AddIndexBlockClusterStateUpdateRequest.java @@ -8,32 +8,26 @@ */ package org.elasticsearch.action.admin.indices.readonly; -import org.elasticsearch.cluster.ack.IndicesClusterStateUpdateRequest; import org.elasticsearch.cluster.metadata.IndexMetadata.APIBlock; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.Index; + +import java.util.Objects; /** * Cluster state update request that allows to add a block to one or more indices */ -public class AddIndexBlockClusterStateUpdateRequest extends IndicesClusterStateUpdateRequest { - - private final APIBlock block; - private long taskId; - - public AddIndexBlockClusterStateUpdateRequest(final APIBlock block, final long taskId) { - this.block = block; - this.taskId = taskId; - } - - public long taskId() { - return taskId; - } - - public APIBlock getBlock() { - return block; - } - - public AddIndexBlockClusterStateUpdateRequest taskId(final long taskId) { - this.taskId = taskId; - return this; +public record AddIndexBlockClusterStateUpdateRequest( + TimeValue masterNodeTimeout, + TimeValue ackTimeout, + APIBlock block, + long taskId, + Index[] indices +) { + public AddIndexBlockClusterStateUpdateRequest { + Objects.requireNonNull(masterNodeTimeout); + Objects.requireNonNull(ackTimeout); + Objects.requireNonNull(block); + Objects.requireNonNull(indices); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/TransportAddIndexBlockAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/TransportAddIndexBlockAction.java index 2b8f832b8aafd..867cd80fb68d0 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/TransportAddIndexBlockAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/readonly/TransportAddIndexBlockAction.java @@ -102,13 +102,18 @@ protected void masterOperation( return; } - final AddIndexBlockClusterStateUpdateRequest addBlockRequest = new AddIndexBlockClusterStateUpdateRequest( - request.getBlock(), - task.getId() - ).ackTimeout(request.ackTimeout()).masterNodeTimeout(request.masterNodeTimeout()).indices(concreteIndices); - indexStateService.addIndexBlock(addBlockRequest, listener.delegateResponse((delegatedListener, t) -> { - logger.debug(() -> "failed to mark indices as readonly [" + Arrays.toString(concreteIndices) + "]", t); - delegatedListener.onFailure(t); - })); + indexStateService.addIndexBlock( + new AddIndexBlockClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + request.getBlock(), + task.getId(), + concreteIndices + ), + listener.delegateResponse((delegatedListener, t) -> { + logger.debug(() -> "failed to mark indices as readonly [" + Arrays.toString(concreteIndices) + "]", t); + delegatedListener.onFailure(t); + }) + ); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexStateService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexStateService.java index 00e7d2b05f2a3..0c33878b01229 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexStateService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexStateService.java @@ -470,7 +470,7 @@ public void addIndexBlock(AddIndexBlockClusterStateUpdateRequest request, Action } addBlocksQueue.submitTask( - "add-index-block-[" + request.getBlock().name + "]-" + Arrays.toString(concreteIndices), + "add-index-block-[" + request.block().name + "]-" + Arrays.toString(concreteIndices), new AddBlocksTask(request, listener), request.masterNodeTimeout() ); @@ -480,7 +480,7 @@ private class AddBlocksExecutor extends SimpleBatchedExecutor> executeTask(AddBlocksTask task, ClusterState clusterState) { - return addIndexBlock(task.request.indices(), clusterState, task.request.getBlock()); + return addIndexBlock(task.request.indices(), clusterState, task.request.block()); } @Override @@ -497,7 +497,7 @@ public void taskSucceeded(AddBlocksTask task, Map blockedIn .delegateFailure( (delegate2, verifyResults) -> finalizeBlocksQueue.submitTask( "finalize-index-block-[" - + task.request.getBlock().name + + task.request.block().name + "]-[" + blockedIndices.keySet().stream().map(Index::getName).collect(Collectors.joining(", ")) + "]", @@ -529,7 +529,7 @@ public Tuple> executeTask(FinalizeBlocksTask clusterState, task.blockedIndices, task.verifyResults, - task.request.getBlock() + task.request.block() ); assert finalizeResult.v2().size() == task.verifyResults.size(); return finalizeResult; @@ -797,9 +797,7 @@ private void sendVerifyShardBlockRequest( block, parentTaskId ); - if (request.ackTimeout() != null) { - shardRequest.timeout(request.ackTimeout()); - } + shardRequest.timeout(request.ackTimeout()); client.executeLocally(TransportVerifyShardIndexBlockAction.TYPE, shardRequest, listener); } } From 8c81222b6699cbf7f0a790f1ba6b53ce1151269e Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Tue, 24 Sep 2024 11:04:08 +0100 Subject: [PATCH 21/58] Change default locale of date processors to ENGLISH (#112796) (#113438) It is English in the docs, so this fixes the code to match the docs. Note that this really impacts Elasticsearch when run on JDK 23 with the CLDR locale database, as in the COMPAT database pre-23, root and en are essentially the same. --- .../java/org/elasticsearch/ingest/common/DateProcessor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateProcessor.java index bfdf87f417b60..22db5a330fb45 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateProcessor.java @@ -98,7 +98,7 @@ private static ZoneId newDateTimeZone(String timezone) { } private static Locale newLocale(String locale) { - return locale == null ? Locale.ROOT : LocaleUtils.parse(locale); + return locale == null ? Locale.ENGLISH : LocaleUtils.parse(locale); } @Override From d553b8bef99c26d072f6ae1f50beaeec2629f0bf Mon Sep 17 00:00:00 2001 From: Andrei Dan Date: Tue, 24 Sep 2024 13:08:31 +0300 Subject: [PATCH 22/58] Implement `parseBytesRef` for TimeSeriesRoutingHashFieldType (#113373) (#113439) This implements the `parseBytesRef` method for the `_ts_routing_hash` field so we can parse the values generated by the companion `format` method. We parse the values when fetching them from the source when the field is used as a `sort` paired with `search_after`. Before this change a sort by and search_after `_ts_routing_hash` would yield an `UnsupportedOperationException` (cherry picked from commit 4e5e87037074e7b4a6ccd6b729da477f99aabeae) Signed-off-by: Andrei Dan --- docs/changelog/113373.yaml | 6 +++ .../test/tsdb/25_id_generation.yml | 47 +++++++++++++++++++ .../index/mapper/MapperFeatures.java | 3 +- .../TimeSeriesRoutingHashFieldMapper.java | 9 ++++ .../search/DocValueFormatTests.java | 13 +++++ 5 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/113373.yaml diff --git a/docs/changelog/113373.yaml b/docs/changelog/113373.yaml new file mode 100644 index 0000000000000..cbb3829e03425 --- /dev/null +++ b/docs/changelog/113373.yaml @@ -0,0 +1,6 @@ +pr: 113373 +summary: Implement `parseBytesRef` for `TimeSeriesRoutingHashFieldType` +area: TSDB +type: bug +issues: + - 112399 diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/25_id_generation.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/25_id_generation.yml index 973832cf3ca73..4faa0424adb43 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/25_id_generation.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/25_id_generation.yml @@ -65,6 +65,9 @@ setup: --- generates a consistent id: + - requires: + cluster_features: "tsdb.ts_routing_hash_doc_value_parse_byte_ref" + reason: _tsid routing hash doc value parsing has been fixed - do: bulk: refresh: true @@ -152,6 +155,50 @@ generates a consistent id: - match: { hits.hits.8._source.@timestamp: 2021-04-28T18:52:04.467Z } - match: { hits.hits.8._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + - do: + search: + index: id_generation_test + body: + query: + match_all: {} + sort: ["@timestamp", "_ts_routing_hash"] + _source: true + search_after: [ "2021-04-28T18:50:03.142Z", "cn4exQ" ] + docvalue_fields: [_ts_routing_hash] + + - match: {hits.total.value: 9} + + - match: { hits.hits.0._id: cZZNs7B9sSWsyrL5AAABeRnRGTM } + - match: { hits.hits.0._source.@timestamp: 2021-04-28T18:50:04.467Z } + - match: { hits.hits.0._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + + - match: { hits.hits.1._id: cn4excfoxSs_KdA5AAABeRnRYiY } + - match: { hits.hits.1._source.@timestamp: 2021-04-28T18:50:23.142Z } + - match: { hits.hits.1._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } + + - match: { hits.hits.2._id: cZZNs7B9sSWsyrL5AAABeRnRZ1M } + - match: { hits.hits.2._source.@timestamp: 2021-04-28T18:50:24.467Z } + - match: { hits.hits.2._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + + - match: { hits.hits.3._id: cZZNs7B9sSWsyrL5AAABeRnRtXM } + - match: { hits.hits.3._source.@timestamp: 2021-04-28T18:50:44.467Z } + - match: { hits.hits.3._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + + - match: { hits.hits.4._id: cn4excfoxSs_KdA5AAABeRnR11Y } + - match: { hits.hits.4._source.@timestamp: 2021-04-28T18:50:53.142Z } + - match: { hits.hits.4._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } + + - match: { hits.hits.5._id: cn4excfoxSs_KdA5AAABeRnR_mY } + - match: { hits.hits.5._source.@timestamp: 2021-04-28T18:51:03.142Z } + - match: { hits.hits.5._source.k8s.pod.uid: df3145b3-0563-4d3b-a0f7-897eb2876ea9 } + + - match: { hits.hits.6._id: cZZNs7B9sSWsyrL5AAABeRnSA5M } + - match: { hits.hits.6._source.@timestamp: 2021-04-28T18:51:04.467Z } + - match: { hits.hits.6._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } + + - match: { hits.hits.7._id: cZZNs7B9sSWsyrL5AAABeRnS7fM } + - match: { hits.hits.7._source.@timestamp: 2021-04-28T18:52:04.467Z } + - match: { hits.hits.7._source.k8s.pod.uid: 947e4ced-1786-4e53-9e0c-5c447e959507 } --- index a new document on top of an old one: - do: diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index d2ca7a24a78fd..ac7d10abc7121 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -43,7 +43,8 @@ public Set getFeatures() { SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_FIX, FlattenedFieldMapper.IGNORE_ABOVE_SUPPORT, IndexSettings.IGNORE_ABOVE_INDEX_LEVEL_SETTING, - SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX + SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX, + TimeSeriesRoutingHashFieldMapper.TS_ROUTING_HASH_FIELD_PARSES_BYTES_REF ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesRoutingHashFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesRoutingHashFieldMapper.java index 3c4a0ae4e51f8..60f792068300b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesRoutingHashFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesRoutingHashFieldMapper.java @@ -14,6 +14,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.fielddata.FieldData; @@ -45,6 +46,7 @@ public class TimeSeriesRoutingHashFieldMapper extends MetadataFieldMapper { public static final TimeSeriesRoutingHashFieldMapper INSTANCE = new TimeSeriesRoutingHashFieldMapper(); public static final TypeParser PARSER = new FixedTypeParser(c -> c.getIndexSettings().getMode().timeSeriesRoutingHashFieldMapper()); + static final NodeFeature TS_ROUTING_HASH_FIELD_PARSES_BYTES_REF = new NodeFeature("tsdb.ts_routing_hash_doc_value_parse_byte_ref"); static final class TimeSeriesRoutingHashFieldType extends MappedFieldType { @@ -64,6 +66,13 @@ public Object format(BytesRef value) { return Uid.decodeId(value.bytes, value.offset, value.length); } + @Override + public BytesRef parseBytesRef(Object value) { + if (value instanceof BytesRef valueAsBytesRef) { + return valueAsBytesRef; + } + return Uid.encodeId(value.toString()); + } }; private TimeSeriesRoutingHashFieldType() { diff --git a/server/src/test/java/org/elasticsearch/search/DocValueFormatTests.java b/server/src/test/java/org/elasticsearch/search/DocValueFormatTests.java index 0a830b598817d..6b42dbbb39c9f 100644 --- a/server/src/test/java/org/elasticsearch/search/DocValueFormatTests.java +++ b/server/src/test/java/org/elasticsearch/search/DocValueFormatTests.java @@ -20,6 +20,7 @@ import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper.TimeSeriesIdBuilder; +import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper; import org.elasticsearch.test.ESTestCase; import java.io.IOException; @@ -33,6 +34,8 @@ import static org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileUtils.longEncode; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; public class DocValueFormatTests extends ESTestCase { @@ -388,4 +391,14 @@ public void testParseTsid() throws IOException { Object tsidBase64 = Base64.getUrlEncoder().withoutPadding().encodeToString(expectedBytes); assertEquals(tsidFormat, tsidBase64); } + + public void testFormatAndParseTsRoutingHash() throws IOException { + BytesRef tsRoutingHashInput = new BytesRef("cn4exQ"); + DocValueFormat docValueFormat = TimeSeriesRoutingHashFieldMapper.INSTANCE.fieldType().docValueFormat(null, ZoneOffset.UTC); + Object formattedValue = docValueFormat.format(tsRoutingHashInput); + // the format method takes BytesRef as input and outputs a String + assertThat(formattedValue, instanceOf(String.class)); + // the parse method will output the BytesRef input + assertThat(docValueFormat.parseBytesRef(formattedValue), is(tsRoutingHashInput)); + } } From db00f0c106cdcaa7c15f0454678d07d37ecbab22 Mon Sep 17 00:00:00 2001 From: Pooya Salehi Date: Tue, 24 Sep 2024 12:12:43 +0200 Subject: [PATCH 23/58] Remove test logging from PrevalidateShardPathIT#testCheckShards (#113434) (#113440) Relates https://github.com/elastic/elasticsearch/pull/113107 Closes https://github.com/elastic/elasticsearch/issues/111134 --- .../elasticsearch/cluster/PrevalidateShardPathIT.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/PrevalidateShardPathIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/PrevalidateShardPathIT.java index 062f4adb27120..87943dedc708b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/PrevalidateShardPathIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/PrevalidateShardPathIT.java @@ -21,7 +21,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.ESIntegTestCase; -import org.elasticsearch.test.junit.annotations.TestIssueLogging; import java.util.HashSet; import java.util.Set; @@ -41,15 +40,6 @@ @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) public class PrevalidateShardPathIT extends ESIntegTestCase { - @TestIssueLogging( - value = "org.elasticsearch.cluster.service.MasterService:DEBUG," - + "org.elasticsearch.indices.store.IndicesStore:TRACE," - + "org.elasticsearch.indices.cluster.IndicesClusterStateService:DEBUG," - + "org.elasticsearch.indices.IndicesService:TRACE," - + "org.elasticsearch.index.IndexService:TRACE," - + "org.elasticsearch.env.NodeEnvironment:TRACE", - issueUrl = "https://github.com/elastic/elasticsearch/issues/111134" - ) public void testCheckShards() throws Exception { internalCluster().startMasterOnlyNode(); String node1 = internalCluster().startDataOnlyNode(); From 9a21ca63d7f2f307739fd24885a931ae8b75aea5 Mon Sep 17 00:00:00 2001 From: Salvatore Campagna <93581129+salvatore-campagna@users.noreply.github.com> Date: Tue, 24 Sep 2024 13:47:09 +0200 Subject: [PATCH 24/58] LogsDB data migration integration testing (#112710) (#113448) Here we test reindexing logsdb indices, creating and restoring snapshots. Note that logsdb uses synthetic source and restoring source only snapshots fails due to missing _source. (cherry picked from commit f7880ae85f0be9f9a8c89c5415ce406c293a09db) --- .../repository-source-only.asciidoc | 2 +- .../datastreams/LogsDataStreamRestIT.java | 293 +++++++++++++++++- 2 files changed, 280 insertions(+), 15 deletions(-) diff --git a/docs/reference/snapshot-restore/repository-source-only.asciidoc b/docs/reference/snapshot-restore/repository-source-only.asciidoc index 07ddedd197931..04e53c42aff9d 100644 --- a/docs/reference/snapshot-restore/repository-source-only.asciidoc +++ b/docs/reference/snapshot-restore/repository-source-only.asciidoc @@ -18,7 +18,7 @@ stream or index. ================================================== Source-only snapshots are only supported if the `_source` field is enabled and no source-filtering is applied. -When you restore a source-only snapshot: +As a result, indices adopting synthetic source cannot be restored. When you restore a source-only snapshot: * The restored index is read-only and can only serve `match_all` search or scroll requests to enable reindexing. diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java index f62fa83b4e111..f95815d1daff9 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java @@ -9,16 +9,23 @@ package org.elasticsearch.datastreams; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.methods.HttpPut; import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; import org.elasticsearch.client.ResponseException; import org.elasticsearch.client.RestClient; import org.elasticsearch.common.network.InetAddresses; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.common.time.FormatNames; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.repositories.fs.FsRepository; +import org.elasticsearch.rest.RestStatus; import org.elasticsearch.test.cluster.ElasticsearchCluster; import org.elasticsearch.test.cluster.local.distribution.DistributionType; import org.elasticsearch.test.rest.ESRestTestCase; +import org.hamcrest.Matchers; import org.junit.Before; import org.junit.ClassRule; @@ -41,6 +48,7 @@ public class LogsDataStreamRestIT extends ESRestTestCase { public static ElasticsearchCluster cluster = ElasticsearchCluster.local() .distribution(DistributionType.DEFAULT) .setting("xpack.security.enabled", "false") + .setting("xpack.license.self_generated.type", "trial") .build(); @Override @@ -102,7 +110,7 @@ private static void waitForLogs(RestClient client) throws Exception { } }"""; - private static final String STANDARD_TEMPLATE = """ + private static final String LOGS_STANDARD_INDEX_MODE = """ { "index_patterns": [ "logs-*-*" ], "data_stream": {}, @@ -135,6 +143,39 @@ private static void waitForLogs(RestClient client) throws Exception { } }"""; + private static final String STANDARD_TEMPLATE = """ + { + "index_patterns": [ "standard-*-*" ], + "data_stream": {}, + "priority": 201, + "template": { + "settings": { + "index": { + "mode": "standard" + } + }, + "mappings": { + "properties": { + "@timestamp" : { + "type": "date" + }, + "host.name": { + "type": "keyword" + }, + "pid": { + "type": "long" + }, + "method": { + "type": "keyword" + }, + "ip_address": { + "type": "ip" + } + } + } + } + }"""; + private static final String TIME_SERIES_TEMPLATE = """ { "index_patterns": [ "logs-*-*" ], @@ -203,7 +244,7 @@ public void testLogsIndexing() throws IOException { randomLongBetween(1_000_000L, 2_000_000L) ) ); - assertDataStreamBackingIndexMode("logsdb", 0); + assertDataStreamBackingIndexMode("logsdb", 0, DATA_STREAM_NAME); rolloverDataStream(client, DATA_STREAM_NAME); indexDocument( client, @@ -218,7 +259,7 @@ public void testLogsIndexing() throws IOException { randomLongBetween(1_000_000L, 2_000_000L) ) ); - assertDataStreamBackingIndexMode("logsdb", 1); + assertDataStreamBackingIndexMode("logsdb", 1, DATA_STREAM_NAME); } public void testLogsStandardIndexModeSwitch() throws IOException { @@ -237,9 +278,9 @@ public void testLogsStandardIndexModeSwitch() throws IOException { randomLongBetween(1_000_000L, 2_000_000L) ) ); - assertDataStreamBackingIndexMode("logsdb", 0); + assertDataStreamBackingIndexMode("logsdb", 0, DATA_STREAM_NAME); - putTemplate(client, "custom-template", STANDARD_TEMPLATE); + putTemplate(client, "custom-template", LOGS_STANDARD_INDEX_MODE); rolloverDataStream(client, DATA_STREAM_NAME); indexDocument( client, @@ -254,7 +295,7 @@ public void testLogsStandardIndexModeSwitch() throws IOException { randomLongBetween(1_000_000L, 2_000_000L) ) ); - assertDataStreamBackingIndexMode("standard", 1); + assertDataStreamBackingIndexMode("standard", 1, DATA_STREAM_NAME); putTemplate(client, "custom-template", LOGS_TEMPLATE); rolloverDataStream(client, DATA_STREAM_NAME); @@ -271,7 +312,7 @@ public void testLogsStandardIndexModeSwitch() throws IOException { randomLongBetween(1_000_000L, 2_000_000L) ) ); - assertDataStreamBackingIndexMode("logsdb", 2); + assertDataStreamBackingIndexMode("logsdb", 2, DATA_STREAM_NAME); } public void testLogsTimeSeriesIndexModeSwitch() throws IOException { @@ -290,7 +331,7 @@ public void testLogsTimeSeriesIndexModeSwitch() throws IOException { randomLongBetween(1_000_000L, 2_000_000L) ) ); - assertDataStreamBackingIndexMode("logsdb", 0); + assertDataStreamBackingIndexMode("logsdb", 0, DATA_STREAM_NAME); putTemplate(client, "custom-template", TIME_SERIES_TEMPLATE); rolloverDataStream(client, DATA_STREAM_NAME); @@ -307,7 +348,7 @@ public void testLogsTimeSeriesIndexModeSwitch() throws IOException { randomLongBetween(1_000_000L, 2_000_000L) ) ); - assertDataStreamBackingIndexMode("time_series", 1); + assertDataStreamBackingIndexMode("time_series", 1, DATA_STREAM_NAME); putTemplate(client, "custom-template", LOGS_TEMPLATE); rolloverDataStream(client, DATA_STREAM_NAME); @@ -324,11 +365,193 @@ public void testLogsTimeSeriesIndexModeSwitch() throws IOException { randomLongBetween(1_000_000L, 2_000_000L) ) ); - assertDataStreamBackingIndexMode("logsdb", 2); + assertDataStreamBackingIndexMode("logsdb", 2, DATA_STREAM_NAME); + } + + public void testLogsDBToStandardReindex() throws IOException { + // LogsDB data stream + putTemplate(client, "logs-template", LOGS_TEMPLATE); + createDataStream(client, "logs-apache-kafka"); + + // Standard data stream + putTemplate(client, "standard-template", STANDARD_TEMPLATE); + createDataStream(client, "standard-apache-kafka"); + + // Index some documents in the LogsDB index + for (int i = 0; i < 10; i++) { + indexDocument( + client, + "logs-apache-kafka", + document( + Instant.now().plusSeconds(10), + randomAlphaOfLength(10), + randomNonNegativeLong(), + randomFrom("PUT", "POST", "GET"), + randomAlphaOfLength(64), + randomIp(randomBoolean()), + randomLongBetween(1_000_000L, 2_000_000L) + ) + ); + } + assertDataStreamBackingIndexMode("logsdb", 0, "logs-apache-kafka"); + assertDocCount(client, "logs-apache-kafka", 10); + + // Reindex a LogsDB data stream into a standard data stream + final Request reindexRequest = new Request("POST", "/_reindex?refresh=true"); + reindexRequest.setJsonEntity(""" + { + "source": { + "index": "logs-apache-kafka" + }, + "dest": { + "index": "standard-apache-kafka", + "op_type": "create" + } + } + """); + assertOK(client.performRequest(reindexRequest)); + assertDataStreamBackingIndexMode("standard", 0, "standard-apache-kafka"); + assertDocCount(client, "standard-apache-kafka", 10); + } + + public void testStandardToLogsDBReindex() throws IOException { + // LogsDB data stream + putTemplate(client, "logs-template", LOGS_TEMPLATE); + createDataStream(client, "logs-apache-kafka"); + + // Standard data stream + putTemplate(client, "standard-template", STANDARD_TEMPLATE); + createDataStream(client, "standard-apache-kafka"); + + // Index some documents in a standard index + for (int i = 0; i < 10; i++) { + indexDocument( + client, + "standard-apache-kafka", + document( + Instant.now().plusSeconds(10), + randomAlphaOfLength(10), + randomNonNegativeLong(), + randomFrom("PUT", "POST", "GET"), + randomAlphaOfLength(64), + randomIp(randomBoolean()), + randomLongBetween(1_000_000L, 2_000_000L) + ) + ); + } + assertDataStreamBackingIndexMode("standard", 0, "standard-apache-kafka"); + assertDocCount(client, "standard-apache-kafka", 10); + + // Reindex a standard data stream into a LogsDB data stream + final Request reindexRequest = new Request("POST", "/_reindex?refresh=true"); + reindexRequest.setJsonEntity(""" + { + "source": { + "index": "standard-apache-kafka" + }, + "dest": { + "index": "logs-apache-kafka", + "op_type": "create" + } + } + """); + assertOK(client.performRequest(reindexRequest)); + assertDataStreamBackingIndexMode("logsdb", 0, "logs-apache-kafka"); + assertDocCount(client, "logs-apache-kafka", 10); + } + + public void testLogsDBSnapshotCreateRestoreMount() throws IOException { + final String repository = randomAlphaOfLength(10).toLowerCase(Locale.ROOT); + registerRepository(repository, FsRepository.TYPE, Settings.builder().put("location", randomAlphaOfLength(6))); + + final String index = randomAlphaOfLength(12).toLowerCase(Locale.ROOT); + createIndex(client, index, Settings.builder().put("index.mode", IndexMode.LOGSDB.getName()).build()); + + for (int i = 0; i < 10; i++) { + indexDocument( + client, + index, + document( + Instant.now().plusSeconds(10), + randomAlphaOfLength(10), + randomNonNegativeLong(), + randomFrom("PUT", "POST", "GET"), + randomAlphaOfLength(64), + randomIp(randomBoolean()), + randomLongBetween(1_000_000L, 2_000_000L) + ) + ); + } + + final String snapshot = randomAlphaOfLength(8).toLowerCase(Locale.ROOT); + deleteSnapshot(repository, snapshot, true); + createSnapshot(client, repository, snapshot, true, index); + wipeDataStreams(); + wipeAllIndices(); + restoreSnapshot(client, repository, snapshot, true, index); + + final String restoreIndex = randomAlphaOfLength(7).toLowerCase(Locale.ROOT); + final Request mountRequest = new Request("POST", "/_snapshot/" + repository + '/' + snapshot + "/_mount"); + mountRequest.addParameter("wait_for_completion", "true"); + mountRequest.setJsonEntity("{\"index\": \"" + index + "\",\"renamed_index\": \"" + restoreIndex + "\"}"); + + assertOK(client.performRequest(mountRequest)); + assertDocCount(client, restoreIndex, 10); + assertThat(getSettings(client, restoreIndex).get("index.mode"), Matchers.equalTo(IndexMode.LOGSDB.getName())); + } + + // NOTE: this test will fail on snapshot creation after fixing + // https://github.com/elastic/elasticsearch/issues/112735 + public void testLogsDBSourceOnlySnapshotCreation() throws IOException { + final String repository = randomAlphaOfLength(10).toLowerCase(Locale.ROOT); + registerRepository(repository, FsRepository.TYPE, Settings.builder().put("location", randomAlphaOfLength(6))); + // A source-only repository delegates storage to another repository + final String sourceOnlyRepository = randomAlphaOfLength(10).toLowerCase(Locale.ROOT); + registerRepository( + sourceOnlyRepository, + "source", + Settings.builder().put("delegate_type", FsRepository.TYPE).put("location", repository) + ); + + final String index = randomAlphaOfLength(12).toLowerCase(Locale.ROOT); + createIndex(client, index, Settings.builder().put("index.mode", IndexMode.LOGSDB.getName()).build()); + + for (int i = 0; i < 10; i++) { + indexDocument( + client, + index, + document( + Instant.now().plusSeconds(10), + randomAlphaOfLength(10), + randomNonNegativeLong(), + randomFrom("PUT", "POST", "GET"), + randomAlphaOfLength(64), + randomIp(randomBoolean()), + randomLongBetween(1_000_000L, 2_000_000L) + ) + ); + } + + final String snapshot = randomAlphaOfLength(8).toLowerCase(Locale.ROOT); + deleteSnapshot(sourceOnlyRepository, snapshot, true); + createSnapshot(client, sourceOnlyRepository, snapshot, true, index); + wipeDataStreams(); + wipeAllIndices(); + // Can't snapshot _source only on an index that has incomplete source ie. has _source disabled or filters the source + final ResponseException responseException = expectThrows( + ResponseException.class, + () -> restoreSnapshot(client, sourceOnlyRepository, snapshot, true, index) + ); + assertThat(responseException.getMessage(), Matchers.containsString("wasn't fully snapshotted")); + } + + private static void registerRepository(final String repository, final String type, final Settings.Builder settings) throws IOException { + registerRepository(repository, type, false, settings.build()); } - private void assertDataStreamBackingIndexMode(final String indexMode, int backingIndex) throws IOException { - assertThat(getSettings(client, getWriteBackingIndex(client, DATA_STREAM_NAME, backingIndex)).get("index.mode"), is(indexMode)); + private void assertDataStreamBackingIndexMode(final String indexMode, int backingIndex, final String dataStreamName) + throws IOException { + assertThat(getSettings(client, getWriteBackingIndex(client, dataStreamName, backingIndex)).get("index.mode"), is(indexMode)); } private String document( @@ -364,8 +587,8 @@ private static void putTemplate(final RestClient client, final String templateNa assertOK(client.performRequest(request)); } - private static void indexDocument(final RestClient client, String dataStreamName, String doc) throws IOException { - final Request request = new Request("POST", "/" + dataStreamName + "/_doc?refresh=true"); + private static void indexDocument(final RestClient client, String indexOrtDataStream, String doc) throws IOException { + final Request request = new Request("POST", "/" + indexOrtDataStream + "/_doc?refresh=true"); request.setJsonEntity(doc); final Response response = client.performRequest(request); assertOK(response); @@ -393,4 +616,46 @@ private static Map getSettings(final RestClient client, final St final Request request = new Request("GET", "/" + indexName + "/_settings?flat_settings"); return ((Map>) entityAsMap(client.performRequest(request)).get(indexName)).get("settings"); } + + private static void createSnapshot( + RestClient restClient, + String repository, + String snapshot, + boolean waitForCompletion, + final String... indices + ) throws IOException { + final Request request = new Request(HttpPut.METHOD_NAME, "_snapshot/" + repository + '/' + snapshot); + request.addParameter("wait_for_completion", Boolean.toString(waitForCompletion)); + request.setJsonEntity(""" + "indices": $indices + """.replace("$indices", String.join(", ", indices))); + + final Response response = restClient.performRequest(request); + assertThat( + "Failed to create snapshot [" + snapshot + "] in repository [" + repository + "]: " + response, + response.getStatusLine().getStatusCode(), + equalTo(RestStatus.OK.getStatus()) + ); + } + + private static void restoreSnapshot( + final RestClient client, + final String repository, + String snapshot, + boolean waitForCompletion, + final String... indices + ) throws IOException { + final Request request = new Request(HttpPost.METHOD_NAME, "_snapshot/" + repository + '/' + snapshot + "/_restore"); + request.addParameter("wait_for_completion", Boolean.toString(waitForCompletion)); + request.setJsonEntity(""" + "indices": $indices + """.replace("$indices", String.join(", ", indices))); + + final Response response = client.performRequest(request); + assertThat( + "Failed to restore snapshot [" + snapshot + "] from repository [" + repository + "]: " + response, + response.getStatusLine().getStatusCode(), + equalTo(RestStatus.OK.getStatus()) + ); + } } From cb42fd45de3f61166f315c70e53d8519963209d2 Mon Sep 17 00:00:00 2001 From: Pat Whelan Date: Tue, 24 Sep 2024 08:14:58 -0400 Subject: [PATCH 25/58] [ML] Stream Inference API (#113158) (#113423) Create `POST _inference///_stream` and `POST _inference//_stream` API. REST Streaming API will reuse InferenceAction. For now, all services and task types will return an HTTP 405 status code and error message. Co-authored-by: Elastic Machine --- docs/changelog/113158.yaml | 5 + .../inference/InferenceService.java | 17 ++ .../inference/action/InferenceAction.java | 18 +- .../action/InferenceActionRequestTests.java | 63 ++++-- .../AsyncInferenceResponseConsumer.java | 68 ++++++ .../inference/InferenceBaseRestTest.java | 61 +++++- .../xpack/inference/InferenceCrudIT.java | 58 +++++ .../mock/TestInferenceServicePlugin.java | 5 + ...stStreamingCompletionServiceExtension.java | 204 ++++++++++++++++++ ...search.inference.InferenceServiceExtension | 1 + .../xpack/inference/InferencePlugin.java | 2 + .../action/TransportInferenceAction.java | 65 ++++-- .../queries/SemanticQueryBuilder.java | 3 +- ...ankFeaturePhaseRankCoordinatorContext.java | 3 +- .../inference/rest/BaseInferenceAction.java | 55 +++++ .../xpack/inference/rest/Paths.java | 7 + .../inference/rest/RestInferenceAction.java | 35 +-- .../rest/RestStreamInferenceAction.java | 43 ++++ .../TextSimilarityRankTests.java | 3 +- .../TextSimilarityTestPlugin.java | 3 +- .../rest/BaseInferenceActionTests.java | 107 +++++++++ .../rest/RestInferenceActionTests.java | 40 +--- .../rest/RestStreamInferenceActionTests.java | 50 +++++ .../TransportCoordinatedInferenceAction.java | 3 +- 24 files changed, 798 insertions(+), 121 deletions(-) create mode 100644 docs/changelog/113158.yaml create mode 100644 x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/AsyncInferenceResponseConsumer.java create mode 100644 x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestStreamingCompletionServiceExtension.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/BaseInferenceAction.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestStreamInferenceAction.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/BaseInferenceActionTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestStreamInferenceActionTests.java diff --git a/docs/changelog/113158.yaml b/docs/changelog/113158.yaml new file mode 100644 index 0000000000000..d097ea11b3a23 --- /dev/null +++ b/docs/changelog/113158.yaml @@ -0,0 +1,5 @@ +pr: 113158 +summary: Adds a new Inference API for streaming responses back to the user. +area: Machine Learning +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/inference/InferenceService.java b/server/src/main/java/org/elasticsearch/inference/InferenceService.java index a37fb3dd75673..9e9a4cf890379 100644 --- a/server/src/main/java/org/elasticsearch/inference/InferenceService.java +++ b/server/src/main/java/org/elasticsearch/inference/InferenceService.java @@ -188,4 +188,21 @@ default boolean isInClusterService() { * @return {@link TransportVersion} specifying the version */ TransportVersion getMinimalSupportedVersion(); + + /** + * The set of tasks where this service provider supports using the streaming API. + * @return set of supported task types. Defaults to empty. + */ + default Set supportedStreamingTasks() { + return Set.of(); + } + + /** + * Checks the task type against the set of supported streaming tasks returned by {@link #supportedStreamingTasks()}. + * @param taskType the task that supports streaming + * @return true if the taskType is supported + */ + default boolean canStream(TaskType taskType) { + return supportedStreamingTasks().contains(taskType); + } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/action/InferenceAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/action/InferenceAction.java index d898f961651f1..a19edd5a08162 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/action/InferenceAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/action/InferenceAction.java @@ -92,6 +92,7 @@ public static Builder parseRequest(String inferenceEntityId, TaskType taskType, private final Map taskSettings; private final InputType inputType; private final TimeValue inferenceTimeout; + private final boolean stream; public Request( TaskType taskType, @@ -100,7 +101,8 @@ public Request( List input, Map taskSettings, InputType inputType, - TimeValue inferenceTimeout + TimeValue inferenceTimeout, + boolean stream ) { this.taskType = taskType; this.inferenceEntityId = inferenceEntityId; @@ -109,6 +111,7 @@ public Request( this.taskSettings = taskSettings; this.inputType = inputType; this.inferenceTimeout = inferenceTimeout; + this.stream = stream; } public Request(StreamInput in) throws IOException { @@ -134,6 +137,9 @@ public Request(StreamInput in) throws IOException { this.query = null; this.inferenceTimeout = DEFAULT_TIMEOUT; } + + // streaming is not supported yet for transport traffic + this.stream = false; } public TaskType getTaskType() { @@ -165,7 +171,7 @@ public TimeValue getInferenceTimeout() { } public boolean isStreaming() { - return false; + return stream; } @Override @@ -261,6 +267,7 @@ public static class Builder { private Map taskSettings = Map.of(); private String query; private TimeValue timeout = DEFAULT_TIMEOUT; + private boolean stream = false; private Builder() {} @@ -303,8 +310,13 @@ private Builder setInferenceTimeout(String inferenceTimeout) { return setInferenceTimeout(TimeValue.parseTimeValue(inferenceTimeout, TIMEOUT.getPreferredName())); } + public Builder setStream(boolean stream) { + this.stream = stream; + return this; + } + public Request build() { - return new Request(taskType, inferenceEntityId, query, input, taskSettings, inputType, timeout); + return new Request(taskType, inferenceEntityId, query, input, taskSettings, inputType, timeout, stream); } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/inference/action/InferenceActionRequestTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/inference/action/InferenceActionRequestTests.java index f41e117e75b9f..a9ca5e6da8720 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/inference/action/InferenceActionRequestTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/inference/action/InferenceActionRequestTests.java @@ -46,7 +46,8 @@ protected InferenceAction.Request createTestInstance() { randomList(1, 5, () -> randomAlphaOfLength(8)), randomMap(0, 3, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))), randomFrom(InputType.values()), - TimeValue.timeValueMillis(randomLongBetween(1, 2048)) + TimeValue.timeValueMillis(randomLongBetween(1, 2048)), + false ); } @@ -80,7 +81,8 @@ public void testValidation_TextEmbedding() { List.of("input"), null, null, - null + null, + false ); ActionRequestValidationException e = request.validate(); assertNull(e); @@ -94,7 +96,8 @@ public void testValidation_Rerank() { List.of("input"), null, null, - null + null, + false ); ActionRequestValidationException e = request.validate(); assertNull(e); @@ -108,7 +111,8 @@ public void testValidation_TextEmbedding_Null() { null, null, null, - null + null, + false ); ActionRequestValidationException inputNullError = inputNullRequest.validate(); assertNotNull(inputNullError); @@ -123,7 +127,8 @@ public void testValidation_TextEmbedding_Empty() { List.of(), null, null, - null + null, + false ); ActionRequestValidationException inputEmptyError = inputEmptyRequest.validate(); assertNotNull(inputEmptyError); @@ -138,7 +143,8 @@ public void testValidation_Rerank_Null() { List.of("input"), null, null, - null + null, + false ); ActionRequestValidationException queryNullError = queryNullRequest.validate(); assertNotNull(queryNullError); @@ -153,7 +159,8 @@ public void testValidation_Rerank_Empty() { List.of("input"), null, null, - null + null, + false ); ActionRequestValidationException queryEmptyError = queryEmptyRequest.validate(); assertNotNull(queryEmptyError); @@ -185,7 +192,8 @@ protected InferenceAction.Request mutateInstance(InferenceAction.Request instanc instance.getInput(), instance.getTaskSettings(), instance.getInputType(), - instance.getInferenceTimeout() + instance.getInferenceTimeout(), + false ); } case 1 -> new InferenceAction.Request( @@ -195,7 +203,8 @@ protected InferenceAction.Request mutateInstance(InferenceAction.Request instanc instance.getInput(), instance.getTaskSettings(), instance.getInputType(), - instance.getInferenceTimeout() + instance.getInferenceTimeout(), + false ); case 2 -> { var changedInputs = new ArrayList(instance.getInput()); @@ -207,7 +216,8 @@ protected InferenceAction.Request mutateInstance(InferenceAction.Request instanc changedInputs, instance.getTaskSettings(), instance.getInputType(), - instance.getInferenceTimeout() + instance.getInferenceTimeout(), + false ); } case 3 -> { @@ -225,7 +235,8 @@ protected InferenceAction.Request mutateInstance(InferenceAction.Request instanc instance.getInput(), taskSettings, instance.getInputType(), - instance.getInferenceTimeout() + instance.getInferenceTimeout(), + false ); } case 4 -> { @@ -237,7 +248,8 @@ protected InferenceAction.Request mutateInstance(InferenceAction.Request instanc instance.getInput(), instance.getTaskSettings(), nextInputType, - instance.getInferenceTimeout() + instance.getInferenceTimeout(), + false ); } case 5 -> new InferenceAction.Request( @@ -247,7 +259,8 @@ protected InferenceAction.Request mutateInstance(InferenceAction.Request instanc instance.getInput(), instance.getTaskSettings(), instance.getInputType(), - instance.getInferenceTimeout() + instance.getInferenceTimeout(), + false ); case 6 -> { var newDuration = Duration.of( @@ -262,7 +275,8 @@ protected InferenceAction.Request mutateInstance(InferenceAction.Request instanc instance.getInput(), instance.getTaskSettings(), instance.getInputType(), - TimeValue.timeValueMillis(newDuration.plus(additionalTime).toMillis()) + TimeValue.timeValueMillis(newDuration.plus(additionalTime).toMillis()), + false ); } default -> throw new UnsupportedOperationException(); @@ -279,7 +293,8 @@ protected InferenceAction.Request mutateInstanceForVersion(InferenceAction.Reque instance.getInput().subList(0, 1), instance.getTaskSettings(), InputType.UNSPECIFIED, - InferenceAction.Request.DEFAULT_TIMEOUT + InferenceAction.Request.DEFAULT_TIMEOUT, + false ); } else if (version.before(TransportVersions.V_8_13_0)) { return new InferenceAction.Request( @@ -289,7 +304,8 @@ protected InferenceAction.Request mutateInstanceForVersion(InferenceAction.Reque instance.getInput(), instance.getTaskSettings(), InputType.UNSPECIFIED, - InferenceAction.Request.DEFAULT_TIMEOUT + InferenceAction.Request.DEFAULT_TIMEOUT, + false ); } else if (version.before(TransportVersions.V_8_13_0) && (instance.getInputType() == InputType.UNSPECIFIED @@ -302,7 +318,8 @@ protected InferenceAction.Request mutateInstanceForVersion(InferenceAction.Reque instance.getInput(), instance.getTaskSettings(), InputType.INGEST, - InferenceAction.Request.DEFAULT_TIMEOUT + InferenceAction.Request.DEFAULT_TIMEOUT, + false ); } else if (version.before(TransportVersions.V_8_13_0) && (instance.getInputType() == InputType.CLUSTERING || instance.getInputType() == InputType.CLASSIFICATION)) { @@ -313,7 +330,8 @@ protected InferenceAction.Request mutateInstanceForVersion(InferenceAction.Reque instance.getInput(), instance.getTaskSettings(), InputType.UNSPECIFIED, - InferenceAction.Request.DEFAULT_TIMEOUT + InferenceAction.Request.DEFAULT_TIMEOUT, + false ); } else if (version.before(TransportVersions.V_8_14_0)) { return new InferenceAction.Request( @@ -323,7 +341,8 @@ protected InferenceAction.Request mutateInstanceForVersion(InferenceAction.Reque instance.getInput(), instance.getTaskSettings(), instance.getInputType(), - InferenceAction.Request.DEFAULT_TIMEOUT + InferenceAction.Request.DEFAULT_TIMEOUT, + false ); } @@ -339,7 +358,8 @@ public void testWriteTo_WhenVersionIsOnAfterUnspecifiedAdded() throws IOExceptio List.of(), Map.of(), InputType.UNSPECIFIED, - InferenceAction.Request.DEFAULT_TIMEOUT + InferenceAction.Request.DEFAULT_TIMEOUT, + false ), TransportVersions.V_8_13_0 ); @@ -353,7 +373,8 @@ public void testWriteTo_WhenVersionIsBeforeInputTypeAdded_ShouldSetInputTypeToUn List.of(), Map.of(), InputType.INGEST, - InferenceAction.Request.DEFAULT_TIMEOUT + InferenceAction.Request.DEFAULT_TIMEOUT, + false ); InferenceAction.Request deserializedInstance = copyWriteable( diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/AsyncInferenceResponseConsumer.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/AsyncInferenceResponseConsumer.java new file mode 100644 index 0000000000000..eb5f3c75bab60 --- /dev/null +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/AsyncInferenceResponseConsumer.java @@ -0,0 +1,68 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.entity.ContentType; +import org.apache.http.nio.ContentDecoder; +import org.apache.http.nio.IOControl; +import org.apache.http.nio.protocol.AbstractAsyncResponseConsumer; +import org.apache.http.nio.util.SimpleInputBuffer; +import org.apache.http.protocol.HttpContext; +import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEvent; +import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventParser; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.concurrent.atomic.AtomicReference; + +class AsyncInferenceResponseConsumer extends AbstractAsyncResponseConsumer { + private final AtomicReference httpResponse = new AtomicReference<>(); + private final Deque collector = new ArrayDeque<>(); + private final ServerSentEventParser sseParser = new ServerSentEventParser(); + private final SimpleInputBuffer inputBuffer = new SimpleInputBuffer(4096); + + @Override + protected void onResponseReceived(HttpResponse httpResponse) { + this.httpResponse.set(httpResponse); + } + + @Override + protected void onContentReceived(ContentDecoder contentDecoder, IOControl ioControl) throws IOException { + inputBuffer.consumeContent(contentDecoder); + } + + @Override + protected void onEntityEnclosed(HttpEntity httpEntity, ContentType contentType) { + httpResponse.updateAndGet(response -> { + response.setEntity(httpEntity); + return response; + }); + } + + @Override + protected HttpResponse buildResult(HttpContext httpContext) { + var allBytes = new byte[inputBuffer.length()]; + try { + inputBuffer.read(allBytes); + sseParser.parse(allBytes).forEach(collector::offer); + } catch (IOException e) { + failed(e); + } + return httpResponse.get(); + } + + @Override + protected void releaseResources() {} + + Deque events() { + return collector; + } +} diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java index f30f2e8fe201a..c19cd916055d3 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java @@ -9,7 +9,9 @@ import org.apache.http.util.EntityUtils; import org.elasticsearch.client.Request; +import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.Response; +import org.elasticsearch.client.ResponseListener; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.SecureString; import org.elasticsearch.common.settings.Settings; @@ -19,11 +21,15 @@ import org.elasticsearch.test.cluster.ElasticsearchCluster; import org.elasticsearch.test.cluster.local.distribution.DistributionType; import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEvent; import org.junit.ClassRule; import java.io.IOException; +import java.util.Deque; import java.util.List; import java.util.Map; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; @@ -72,6 +78,23 @@ static String mockSparseServiceModelConfig(@Nullable TaskType taskTypeInBody) { """, taskType); } + static String mockCompletionServiceModelConfig(@Nullable TaskType taskTypeInBody) { + var taskType = taskTypeInBody == null ? "" : "\"task_type\": \"" + taskTypeInBody + "\","; + return Strings.format(""" + { + %s + "service": "streaming_completion_test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + "temperature": 3 + } + } + """, taskType); + } + static String mockSparseServiceModelConfig(@Nullable TaskType taskTypeInBody, boolean shouldReturnHiddenField) { var taskType = taskTypeInBody == null ? "" : "\"task_type\": \"" + taskTypeInBody + "\","; return Strings.format(""" @@ -252,6 +275,32 @@ protected Map inferOnMockService(String modelId, List in return inferOnMockServiceInternal(endpoint, input); } + protected Deque streamInferOnMockService(String modelId, TaskType taskType, List input) throws Exception { + var endpoint = Strings.format("_inference/%s/%s/_stream", taskType, modelId); + return callAsync(endpoint, input); + } + + private Deque callAsync(String endpoint, List input) throws Exception { + var responseConsumer = new AsyncInferenceResponseConsumer(); + var request = new Request("POST", endpoint); + request.setJsonEntity(jsonBody(input)); + request.setOptions(RequestOptions.DEFAULT.toBuilder().setHttpAsyncResponseConsumerFactory(() -> responseConsumer).build()); + var latch = new CountDownLatch(1); + client().performRequestAsync(request, new ResponseListener() { + @Override + public void onSuccess(Response response) { + latch.countDown(); + } + + @Override + public void onFailure(Exception exception) { + latch.countDown(); + } + }); + assertTrue(latch.await(30, TimeUnit.SECONDS)); + return responseConsumer.events(); + } + protected Map inferOnMockService(String modelId, TaskType taskType, List input) throws IOException { var endpoint = Strings.format("_inference/%s/%s", taskType, modelId); return inferOnMockServiceInternal(endpoint, input); @@ -259,7 +308,13 @@ protected Map inferOnMockService(String modelId, TaskType taskTy private Map inferOnMockServiceInternal(String endpoint, List input) throws IOException { var request = new Request("POST", endpoint); + request.setJsonEntity(jsonBody(input)); + var response = client().performRequest(request); + assertOkOrCreated(response); + return entityAsMap(response); + } + private String jsonBody(List input) { var bodyBuilder = new StringBuilder("{\"input\": ["); for (var in : input) { bodyBuilder.append('"').append(in).append('"').append(','); @@ -267,11 +322,7 @@ private Map inferOnMockServiceInternal(String endpoint, List { + switch (event.name()) { + case EVENT -> assertThat(event.value(), equalToIgnoringCase("error")); + case DATA -> assertThat( + event.value(), + containsString( + "Streaming is not allowed for service [streaming_completion_test_service] and task [sparse_embedding]" + ) + ); + } + }); + } finally { + deleteModel(modelId); + } + } + + public void testSupportedStream() throws Exception { + String modelId = "streaming"; + putModel(modelId, mockCompletionServiceModelConfig(TaskType.COMPLETION)); + var singleModel = getModel(modelId); + assertEquals(modelId, singleModel.get("inference_id")); + assertEquals(TaskType.COMPLETION.toString(), singleModel.get("task_type")); + + var input = IntStream.range(0, randomInt(10)).mapToObj(i -> randomAlphaOfLength(10)).toList(); + + try { + var events = streamInferOnMockService(modelId, TaskType.COMPLETION, input); + + var expectedResponses = Stream.concat( + input.stream().map(String::toUpperCase).map(str -> "{\"completion\":[{\"delta\":\"" + str + "\"}]}"), + Stream.of("[DONE]") + ).iterator(); + assertThat(events.size(), equalTo((input.size() + 1) * 2)); + events.forEach(event -> { + switch (event.name()) { + case EVENT -> assertThat(event.value(), equalToIgnoringCase("message")); + case DATA -> assertThat(event.value(), equalTo(expectedResponses.next())); + } + }); + } finally { + deleteModel(modelId); + } + } } diff --git a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestInferenceServicePlugin.java b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestInferenceServicePlugin.java index 752472b90374b..eef0da909f529 100644 --- a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestInferenceServicePlugin.java +++ b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestInferenceServicePlugin.java @@ -44,6 +44,11 @@ public List getNamedWriteables() { ServiceSettings.class, TestRerankingServiceExtension.TestServiceSettings.NAME, TestRerankingServiceExtension.TestServiceSettings::new + ), + new NamedWriteableRegistry.Entry( + ServiceSettings.class, + TestStreamingCompletionServiceExtension.TestServiceSettings.NAME, + TestStreamingCompletionServiceExtension.TestServiceSettings::new ) ); } diff --git a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestStreamingCompletionServiceExtension.java b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestStreamingCompletionServiceExtension.java new file mode 100644 index 0000000000000..3d72b1f2729b0 --- /dev/null +++ b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestStreamingCompletionServiceExtension.java @@ -0,0 +1,204 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.mock; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.TransportVersion; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ChunkedToXContent; +import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.ChunkedInferenceServiceResults; +import org.elasticsearch.inference.ChunkingOptions; +import org.elasticsearch.inference.InferenceServiceExtension; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.inference.InputType; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ServiceSettings; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.inference.results.StreamingChatCompletionResults; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.Flow; + +import static org.elasticsearch.xpack.core.inference.results.ChatCompletionResults.COMPLETION; + +public class TestStreamingCompletionServiceExtension implements InferenceServiceExtension { + @Override + public List getInferenceServiceFactories() { + return List.of(TestInferenceService::new); + } + + public static class TestInferenceService extends AbstractTestInferenceService { + private static final String NAME = "streaming_completion_test_service"; + private static final Set supportedStreamingTasks = Set.of(TaskType.COMPLETION); + + public TestInferenceService(InferenceServiceExtension.InferenceServiceFactoryContext context) {} + + @Override + public String name() { + return NAME; + } + + @Override + protected ServiceSettings getServiceSettingsFromMap(Map serviceSettingsMap) { + return TestServiceSettings.fromMap(serviceSettingsMap); + } + + @Override + @SuppressWarnings("unchecked") + public void parseRequestConfig( + String modelId, + TaskType taskType, + Map config, + Set platformArchitectures, + ActionListener parsedModelListener + ) { + var serviceSettingsMap = (Map) config.remove(ModelConfigurations.SERVICE_SETTINGS); + var serviceSettings = TestSparseInferenceServiceExtension.TestServiceSettings.fromMap(serviceSettingsMap); + var secretSettings = TestSecretSettings.fromMap(serviceSettingsMap); + + var taskSettingsMap = getTaskSettingsMap(config); + var taskSettings = TestTaskSettings.fromMap(taskSettingsMap); + + parsedModelListener.onResponse(new TestServiceModel(modelId, taskType, name(), serviceSettings, taskSettings, secretSettings)); + } + + @Override + public void infer( + Model model, + String query, + List input, + Map taskSettings, + InputType inputType, + TimeValue timeout, + ActionListener listener + ) { + switch (model.getConfigurations().getTaskType()) { + case COMPLETION -> listener.onResponse(makeResults(input)); + default -> listener.onFailure( + new ElasticsearchStatusException( + TaskType.unsupportedTaskTypeErrorMsg(model.getConfigurations().getTaskType(), name()), + RestStatus.BAD_REQUEST + ) + ); + } + } + + private StreamingChatCompletionResults makeResults(List input) { + var responseIter = input.stream().map(String::toUpperCase).iterator(); + return new StreamingChatCompletionResults(subscriber -> { + subscriber.onSubscribe(new Flow.Subscription() { + @Override + public void request(long n) { + if (responseIter.hasNext()) { + subscriber.onNext(completionChunk(responseIter.next())); + } else { + subscriber.onComplete(); + } + } + + @Override + public void cancel() {} + }); + }); + } + + private ChunkedToXContent completionChunk(String delta) { + return params -> Iterators.concat( + ChunkedToXContentHelper.startObject(), + ChunkedToXContentHelper.startArray(COMPLETION), + ChunkedToXContentHelper.startObject(), + ChunkedToXContentHelper.field("delta", delta), + ChunkedToXContentHelper.endObject(), + ChunkedToXContentHelper.endArray(), + ChunkedToXContentHelper.endObject() + ); + } + + @Override + public void chunkedInfer( + Model model, + String query, + List input, + Map taskSettings, + InputType inputType, + ChunkingOptions chunkingOptions, + TimeValue timeout, + ActionListener> listener + ) { + listener.onFailure( + new ElasticsearchStatusException( + TaskType.unsupportedTaskTypeErrorMsg(model.getConfigurations().getTaskType(), name()), + RestStatus.BAD_REQUEST + ) + ); + } + + @Override + public Set supportedStreamingTasks() { + return supportedStreamingTasks; + } + } + + public record TestServiceSettings(String modelId) implements ServiceSettings { + public static final String NAME = "streaming_completion_test_service_settings"; + + public TestServiceSettings(StreamInput in) throws IOException { + this(in.readString()); + } + + public static TestServiceSettings fromMap(Map map) { + var modelId = map.remove("model").toString(); + + if (modelId == null) { + ValidationException validationException = new ValidationException(); + validationException.addValidationError("missing model id"); + throw validationException; + } + + return new TestServiceSettings(modelId); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersion.current(); // fine for these tests but will not work for cluster upgrade tests + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(modelId()); + } + + @Override + public ToXContentObject getFilteredXContentObject() { + return this; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + return builder.startObject().field("model", modelId()).endObject(); + } + } +} diff --git a/x-pack/plugin/inference/qa/test-service-plugin/src/main/resources/META-INF/services/org.elasticsearch.inference.InferenceServiceExtension b/x-pack/plugin/inference/qa/test-service-plugin/src/main/resources/META-INF/services/org.elasticsearch.inference.InferenceServiceExtension index 690168b538fb9..c996a33d1e916 100644 --- a/x-pack/plugin/inference/qa/test-service-plugin/src/main/resources/META-INF/services/org.elasticsearch.inference.InferenceServiceExtension +++ b/x-pack/plugin/inference/qa/test-service-plugin/src/main/resources/META-INF/services/org.elasticsearch.inference.InferenceServiceExtension @@ -1,3 +1,4 @@ org.elasticsearch.xpack.inference.mock.TestSparseInferenceServiceExtension org.elasticsearch.xpack.inference.mock.TestDenseInferenceServiceExtension org.elasticsearch.xpack.inference.mock.TestRerankingServiceExtension +org.elasticsearch.xpack.inference.mock.TestStreamingCompletionServiceExtension diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index 1cec996400a97..a6972ddc214fc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -73,6 +73,7 @@ import org.elasticsearch.xpack.inference.rest.RestGetInferenceModelAction; import org.elasticsearch.xpack.inference.rest.RestInferenceAction; import org.elasticsearch.xpack.inference.rest.RestPutInferenceModelAction; +import org.elasticsearch.xpack.inference.rest.RestStreamInferenceAction; import org.elasticsearch.xpack.inference.services.ServiceComponents; import org.elasticsearch.xpack.inference.services.alibabacloudsearch.AlibabaCloudSearchService; import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockService; @@ -167,6 +168,7 @@ public List getRestHandlers( ) { return List.of( new RestInferenceAction(), + new RestStreamInferenceAction(), new RestGetInferenceModelAction(), new RestPutInferenceModelAction(), new RestDeleteInferenceEndpointAction(), diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java index bfdfca166ef3a..803e8f1e07612 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java @@ -17,6 +17,7 @@ import org.elasticsearch.inference.InferenceServiceRegistry; import org.elasticsearch.inference.InferenceServiceResults; import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.TaskType; import org.elasticsearch.injection.guice.Inject; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.tasks.Task; @@ -26,10 +27,17 @@ import org.elasticsearch.xpack.inference.registry.ModelRegistry; import org.elasticsearch.xpack.inference.telemetry.InferenceStats; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.elasticsearch.core.Strings.format; + public class TransportInferenceAction extends HandledTransportAction { private static final String STREAMING_INFERENCE_TASK_TYPE = "streaming_inference"; private static final String STREAMING_TASK_ACTION = "xpack/inference/streaming_inference[n]"; + private static final Set> supportsStreaming = Set.of(); + private final ModelRegistry modelRegistry; private final InferenceServiceRegistry serviceRegistry; private final InferenceStats inferenceStats; @@ -101,15 +109,40 @@ private void inferOnService( InferenceService service, ActionListener listener ) { - service.infer( - model, - request.getQuery(), - request.getInput(), - request.getTaskSettings(), - request.getInputType(), - request.getInferenceTimeout(), - createListener(request, listener) - ); + if (request.isStreaming() == false || service.canStream(request.getTaskType())) { + service.infer( + model, + request.getQuery(), + request.getInput(), + request.getTaskSettings(), + request.getInputType(), + request.getInferenceTimeout(), + createListener(request, listener) + ); + } else { + listener.onFailure(unsupportedStreamingTaskException(request, service)); + } + } + + private ElasticsearchStatusException unsupportedStreamingTaskException(InferenceAction.Request request, InferenceService service) { + var supportedTasks = service.supportedStreamingTasks(); + if (supportedTasks.isEmpty()) { + return new ElasticsearchStatusException( + format("Streaming is not allowed for service [%s].", service.name()), + RestStatus.METHOD_NOT_ALLOWED + ); + } else { + var validTasks = supportedTasks.stream().map(TaskType::toString).collect(Collectors.joining(",")); + return new ElasticsearchStatusException( + format( + "Streaming is not allowed for service [%s] and task [%s]. Supported tasks: [%s]", + service.name(), + request.getTaskType(), + validTasks + ), + RestStatus.METHOD_NOT_ALLOWED + ); + } } private ActionListener createListener( @@ -118,17 +151,9 @@ private ActionListener createListener( ) { if (request.isStreaming()) { return listener.delegateFailureAndWrap((l, inferenceResults) -> { - if (inferenceResults.isStreaming()) { - var taskProcessor = streamingTaskManager.create( - STREAMING_INFERENCE_TASK_TYPE, - STREAMING_TASK_ACTION - ); - inferenceResults.publisher().subscribe(taskProcessor); - l.onResponse(new InferenceAction.Response(inferenceResults, taskProcessor)); - } else { - // if we asked for streaming but the provider doesn't support it, for now we're going to get back the single response - l.onResponse(new InferenceAction.Response(inferenceResults)); - } + var taskProcessor = streamingTaskManager.create(STREAMING_INFERENCE_TASK_TYPE, STREAMING_TASK_ACTION); + inferenceResults.publisher().subscribe(taskProcessor); + l.onResponse(new InferenceAction.Response(inferenceResults, taskProcessor)); }); } return listener.delegateFailureAndWrap((l, inferenceResults) -> l.onResponse(new InferenceAction.Response(inferenceResults))); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index 8f1e28d0d8ee4..7f21f94d33276 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -204,7 +204,8 @@ private SemanticQueryBuilder doRewriteGetInferenceResults(QueryRewriteContext qu List.of(query), Map.of(), InputType.SEARCH, - InferModelAction.Request.DEFAULT_TIMEOUT_FOR_API + InferModelAction.Request.DEFAULT_TIMEOUT_FOR_API, + false ); queryRewriteContext.registerAsyncAction( diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java index cad11cbdc9d5b..0ff48bfd493ba 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java @@ -144,7 +144,8 @@ protected InferenceAction.Request generateRequest(List docFeatures) { docFeatures, Map.of(), InputType.SEARCH, - InferenceAction.Request.DEFAULT_TIMEOUT + InferenceAction.Request.DEFAULT_TIMEOUT, + false ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/BaseInferenceAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/BaseInferenceAction.java new file mode 100644 index 0000000000000..e72e68052f648 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/BaseInferenceAction.java @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.rest; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestChannel; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; + +import java.io.IOException; + +import static org.elasticsearch.xpack.inference.rest.Paths.INFERENCE_ID; +import static org.elasticsearch.xpack.inference.rest.Paths.TASK_TYPE_OR_INFERENCE_ID; + +abstract class BaseInferenceAction extends BaseRestHandler { + @Override + protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { + String inferenceEntityId; + TaskType taskType; + if (restRequest.hasParam(INFERENCE_ID)) { + inferenceEntityId = restRequest.param(INFERENCE_ID); + taskType = TaskType.fromStringOrStatusException(restRequest.param(TASK_TYPE_OR_INFERENCE_ID)); + } else { + inferenceEntityId = restRequest.param(TASK_TYPE_OR_INFERENCE_ID); + taskType = TaskType.ANY; + } + + InferenceAction.Request.Builder requestBuilder; + try (var parser = restRequest.contentParser()) { + requestBuilder = InferenceAction.Request.parseRequest(inferenceEntityId, taskType, parser); + } + + var inferTimeout = restRequest.paramAsTime( + InferenceAction.Request.TIMEOUT.getPreferredName(), + InferenceAction.Request.DEFAULT_TIMEOUT + ); + requestBuilder.setInferenceTimeout(inferTimeout); + var request = prepareInferenceRequest(requestBuilder); + return channel -> client.execute(InferenceAction.INSTANCE, request, listener(channel)); + } + + protected InferenceAction.Request prepareInferenceRequest(InferenceAction.Request.Builder builder) { + return builder.build(); + } + + protected abstract ActionListener listener(RestChannel channel); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java index e33931f3d2f8d..9f64b58e48b55 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java @@ -15,6 +15,13 @@ public final class Paths { static final String TASK_TYPE_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/{" + INFERENCE_ID + "}"; static final String INFERENCE_DIAGNOSTICS_PATH = "_inference/.diagnostics"; + static final String STREAM_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/_stream"; + static final String STREAM_TASK_TYPE_INFERENCE_ID_PATH = "_inference/{" + + TASK_TYPE_OR_INFERENCE_ID + + "}/{" + + INFERENCE_ID + + "}/_stream"; + private Paths() { } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestInferenceAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestInferenceAction.java index f5c30d0a94c54..0fbc2f8214cbb 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestInferenceAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestInferenceAction.java @@ -7,26 +7,21 @@ package org.elasticsearch.xpack.inference.rest; -import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.inference.TaskType; -import org.elasticsearch.rest.BaseRestHandler; -import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.rest.RestChannel; import org.elasticsearch.rest.Scope; import org.elasticsearch.rest.ServerlessScope; import org.elasticsearch.rest.action.RestChunkedToXContentListener; import org.elasticsearch.xpack.core.inference.action.InferenceAction; -import java.io.IOException; import java.util.List; import static org.elasticsearch.rest.RestRequest.Method.POST; -import static org.elasticsearch.xpack.inference.rest.Paths.INFERENCE_ID; import static org.elasticsearch.xpack.inference.rest.Paths.INFERENCE_ID_PATH; import static org.elasticsearch.xpack.inference.rest.Paths.TASK_TYPE_INFERENCE_ID_PATH; -import static org.elasticsearch.xpack.inference.rest.Paths.TASK_TYPE_OR_INFERENCE_ID; @ServerlessScope(Scope.PUBLIC) -public class RestInferenceAction extends BaseRestHandler { +public class RestInferenceAction extends BaseInferenceAction { @Override public String getName() { return "inference_action"; @@ -38,27 +33,7 @@ public List routes() { } @Override - protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { - String inferenceEntityId; - TaskType taskType; - if (restRequest.hasParam(INFERENCE_ID)) { - inferenceEntityId = restRequest.param(INFERENCE_ID); - taskType = TaskType.fromStringOrStatusException(restRequest.param(TASK_TYPE_OR_INFERENCE_ID)); - } else { - inferenceEntityId = restRequest.param(TASK_TYPE_OR_INFERENCE_ID); - taskType = TaskType.ANY; - } - - InferenceAction.Request.Builder requestBuilder; - try (var parser = restRequest.contentParser()) { - requestBuilder = InferenceAction.Request.parseRequest(inferenceEntityId, taskType, parser); - } - - var inferTimeout = restRequest.paramAsTime( - InferenceAction.Request.TIMEOUT.getPreferredName(), - InferenceAction.Request.DEFAULT_TIMEOUT - ); - requestBuilder.setInferenceTimeout(inferTimeout); - return channel -> client.execute(InferenceAction.INSTANCE, requestBuilder.build(), new RestChunkedToXContentListener<>(channel)); + protected ActionListener listener(RestChannel channel) { + return new RestChunkedToXContentListener<>(channel); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestStreamInferenceAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestStreamInferenceAction.java new file mode 100644 index 0000000000000..875c288da52bd --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestStreamInferenceAction.java @@ -0,0 +1,43 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.rest; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.rest.RestChannel; +import org.elasticsearch.rest.Scope; +import org.elasticsearch.rest.ServerlessScope; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; + +import java.util.List; + +import static org.elasticsearch.rest.RestRequest.Method.POST; +import static org.elasticsearch.xpack.inference.rest.Paths.STREAM_INFERENCE_ID_PATH; +import static org.elasticsearch.xpack.inference.rest.Paths.STREAM_TASK_TYPE_INFERENCE_ID_PATH; + +@ServerlessScope(Scope.PUBLIC) +public class RestStreamInferenceAction extends BaseInferenceAction { + @Override + public String getName() { + return "stream_inference_action"; + } + + @Override + public List routes() { + return List.of(new Route(POST, STREAM_INFERENCE_ID_PATH), new Route(POST, STREAM_TASK_TYPE_INFERENCE_ID_PATH)); + } + + @Override + protected InferenceAction.Request prepareInferenceRequest(InferenceAction.Request.Builder builder) { + return builder.setStream(true).build(); + } + + @Override + protected ActionListener listener(RestChannel channel) { + return new ServerSentEventsRestActionListener(channel); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java index a26dc50097cf5..a042fca44fdb5 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java @@ -92,7 +92,8 @@ protected InferenceAction.Request generateRequest(List docFeatures) { docFeatures, Map.of("inferenceResultCount", inferenceResultCount), InputType.SEARCH, - InferenceAction.Request.DEFAULT_TIMEOUT + InferenceAction.Request.DEFAULT_TIMEOUT, + false ); } }; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java index 6d0c15d5c0bfe..120527f489549 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java @@ -312,7 +312,8 @@ protected InferenceAction.Request generateRequest(List docFeatures) { docFeatures, Map.of("throwing", true), InputType.SEARCH, - InferenceAction.Request.DEFAULT_TIMEOUT + InferenceAction.Request.DEFAULT_TIMEOUT, + false ); } }; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/BaseInferenceActionTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/BaseInferenceActionTests.java new file mode 100644 index 0000000000000..05a8d52be5df4 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/BaseInferenceActionTests.java @@ -0,0 +1,107 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.rest; + +import org.apache.lucene.util.SetOnce; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.rest.RestChannel; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.action.RestChunkedToXContentListener; +import org.elasticsearch.test.rest.FakeRestRequest; +import org.elasticsearch.test.rest.RestActionTestCase; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingByteResults; +import org.junit.Before; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.rest.RestRequest.Method.POST; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +public class BaseInferenceActionTests extends RestActionTestCase { + + @Before + public void setUpAction() { + controller().registerHandler(new BaseInferenceAction() { + @Override + protected ActionListener listener(RestChannel channel) { + return new RestChunkedToXContentListener<>(channel); + } + + @Override + public String getName() { + return "base_inference_action"; + } + + @Override + public List routes() { + return List.of(new Route(POST, route("{task_type_or_id}"))); + } + }); + } + + private static String route(String param) { + return "_route/" + param; + } + + public void testUsesDefaultTimeout() { + SetOnce executeCalled = new SetOnce<>(); + verifyingClient.setExecuteVerifier(((actionType, actionRequest) -> { + assertThat(actionRequest, instanceOf(InferenceAction.Request.class)); + + var request = (InferenceAction.Request) actionRequest; + assertThat(request.getInferenceTimeout(), is(InferenceAction.Request.DEFAULT_TIMEOUT)); + + executeCalled.set(true); + return createResponse(); + })); + + RestRequest inferenceRequest = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.POST) + .withPath(route("test")) + .withContent(new BytesArray("{}"), XContentType.JSON) + .build(); + dispatchRequest(inferenceRequest); + assertThat(executeCalled.get(), equalTo(true)); + } + + public void testUses3SecondTimeoutFromParams() { + SetOnce executeCalled = new SetOnce<>(); + verifyingClient.setExecuteVerifier(((actionType, actionRequest) -> { + assertThat(actionRequest, instanceOf(InferenceAction.Request.class)); + + var request = (InferenceAction.Request) actionRequest; + assertThat(request.getInferenceTimeout(), is(TimeValue.timeValueSeconds(3))); + + executeCalled.set(true); + return createResponse(); + })); + + RestRequest inferenceRequest = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.POST) + .withPath(route("test")) + .withParams(new HashMap<>(Map.of("timeout", "3s"))) + .withContent(new BytesArray("{}"), XContentType.JSON) + .build(); + dispatchRequest(inferenceRequest); + assertThat(executeCalled.get(), equalTo(true)); + } + + static InferenceAction.Response createResponse() { + return new InferenceAction.Response( + new InferenceTextEmbeddingByteResults( + List.of(new InferenceTextEmbeddingByteResults.InferenceByteEmbedding(new byte[] { (byte) -1 })) + ) + ); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestInferenceActionTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestInferenceActionTests.java index 48e5d54a62733..1b0df1b4a20da 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestInferenceActionTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestInferenceActionTests.java @@ -9,19 +9,14 @@ import org.apache.lucene.util.SetOnce; import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.core.TimeValue; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.test.rest.FakeRestRequest; import org.elasticsearch.test.rest.RestActionTestCase; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.inference.action.InferenceAction; -import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingByteResults; import org.junit.Before; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - +import static org.elasticsearch.xpack.inference.rest.BaseInferenceActionTests.createResponse; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @@ -33,13 +28,13 @@ public void setUpAction() { controller().registerHandler(new RestInferenceAction()); } - public void testUsesDefaultTimeout() { + public void testStreamIsFalse() { SetOnce executeCalled = new SetOnce<>(); verifyingClient.setExecuteVerifier(((actionType, actionRequest) -> { assertThat(actionRequest, instanceOf(InferenceAction.Request.class)); var request = (InferenceAction.Request) actionRequest; - assertThat(request.getInferenceTimeout(), is(InferenceAction.Request.DEFAULT_TIMEOUT)); + assertThat(request.isStreaming(), is(false)); executeCalled.set(true); return createResponse(); @@ -52,33 +47,4 @@ public void testUsesDefaultTimeout() { dispatchRequest(inferenceRequest); assertThat(executeCalled.get(), equalTo(true)); } - - public void testUses3SecondTimeoutFromParams() { - SetOnce executeCalled = new SetOnce<>(); - verifyingClient.setExecuteVerifier(((actionType, actionRequest) -> { - assertThat(actionRequest, instanceOf(InferenceAction.Request.class)); - - var request = (InferenceAction.Request) actionRequest; - assertThat(request.getInferenceTimeout(), is(TimeValue.timeValueSeconds(3))); - - executeCalled.set(true); - return createResponse(); - })); - - RestRequest inferenceRequest = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.POST) - .withPath("_inference/test") - .withParams(new HashMap<>(Map.of("timeout", "3s"))) - .withContent(new BytesArray("{}"), XContentType.JSON) - .build(); - dispatchRequest(inferenceRequest); - assertThat(executeCalled.get(), equalTo(true)); - } - - private static InferenceAction.Response createResponse() { - return new InferenceAction.Response( - new InferenceTextEmbeddingByteResults( - List.of(new InferenceTextEmbeddingByteResults.InferenceByteEmbedding(new byte[] { (byte) -1 })) - ) - ); - } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestStreamInferenceActionTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestStreamInferenceActionTests.java new file mode 100644 index 0000000000000..b999e2c9b72f0 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestStreamInferenceActionTests.java @@ -0,0 +1,50 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.rest; + +import org.apache.lucene.util.SetOnce; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.test.rest.FakeRestRequest; +import org.elasticsearch.test.rest.RestActionTestCase; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.junit.Before; + +import static org.elasticsearch.xpack.inference.rest.BaseInferenceActionTests.createResponse; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +public class RestStreamInferenceActionTests extends RestActionTestCase { + + @Before + public void setUpAction() { + controller().registerHandler(new RestStreamInferenceAction()); + } + + public void testStreamIsTrue() { + SetOnce executeCalled = new SetOnce<>(); + verifyingClient.setExecuteVerifier(((actionType, actionRequest) -> { + assertThat(actionRequest, instanceOf(InferenceAction.Request.class)); + + var request = (InferenceAction.Request) actionRequest; + assertThat(request.isStreaming(), is(true)); + + executeCalled.set(true); + return createResponse(); + })); + + RestRequest inferenceRequest = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.POST) + .withPath("_inference/test/_stream") + .withContent(new BytesArray("{}"), XContentType.JSON) + .build(); + dispatchRequest(inferenceRequest); + assertThat(executeCalled.get(), equalTo(true)); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCoordinatedInferenceAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCoordinatedInferenceAction.java index fd13e3de4e6cd..ab5a9d43fd6d1 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCoordinatedInferenceAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCoordinatedInferenceAction.java @@ -126,7 +126,8 @@ private void doInferenceServiceModel(CoordinatedInferenceAction.Request request, request.getInputs(), request.getTaskSettings(), inputType, - request.getInferenceTimeout() + request.getInferenceTimeout(), + false ), listener.delegateFailureAndWrap((l, r) -> l.onResponse(translateInferenceServiceResponse(r.getResults()))) ); From f8dbda3f98f6a7d9dbc02762272329a61854509f Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 24 Sep 2024 09:28:53 -0400 Subject: [PATCH 26/58] ESQL: Document esql_worker threadpool (#113203) (#113459) Documents the thread pool we use to run ESQL operations. It's the same size and queue depth as the `search` thread pool. Closes #113130 --- docs/reference/modules/threadpool.asciidoc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/reference/modules/threadpool.asciidoc b/docs/reference/modules/threadpool.asciidoc index ed4becbfbb6d0..2d4110bdcb431 100644 --- a/docs/reference/modules/threadpool.asciidoc +++ b/docs/reference/modules/threadpool.asciidoc @@ -121,6 +121,11 @@ There are several thread pools, but the important ones include: `min(5 * (`<>`), 50)` and queue_size of `1000`. +[[modules-threadpool-esql]]`esql_worker`:: + Executes <> operations. Thread pool type is `fixed` with a + size of `int((`<> + `pass:[ * ]3) / 2) + 1`, and queue_size of `1000`. + Thread pool settings are <> and can be changed by editing `elasticsearch.yml`. Changing a specific thread pool can be done by setting its type-specific parameters; for example, changing the number of From 0631be599d9635f05d1144011e73fbfe903f01db Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Tue, 24 Sep 2024 15:55:53 +0200 Subject: [PATCH 27/58] Account for DelayedBucket before reduction (#113013) (#113458) This commit moves the account for the DelayableBucket before reduction, therefore in some adversarial cases, we should exit much sooner. --- docs/changelog/113013.yaml | 5 ++ .../search/aggregations/DelayedBucket.java | 9 ++-- .../search/aggregations/TopBucketBuilder.java | 6 +++ .../bucket/terms/AbstractInternalTerms.java | 10 ++-- .../aggregations/DelayedBucketTests.java | 49 ++++++++++++++++++- 5 files changed, 70 insertions(+), 9 deletions(-) create mode 100644 docs/changelog/113013.yaml diff --git a/docs/changelog/113013.yaml b/docs/changelog/113013.yaml new file mode 100644 index 0000000000000..1cec31074e806 --- /dev/null +++ b/docs/changelog/113013.yaml @@ -0,0 +1,5 @@ +pr: 113013 +summary: Account for `DelayedBucket` before reduction +area: Aggregations +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/DelayedBucket.java b/server/src/main/java/org/elasticsearch/search/aggregations/DelayedBucket.java index fa8fe9e4628d7..017d87df52092 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/DelayedBucket.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/DelayedBucket.java @@ -15,6 +15,10 @@ /** * A wrapper around reducing buckets with the same key that can delay that reduction * as long as possible. It's stateful and not even close to thread safe. + *

+ * It is responsibility of the caller to account for buckets created using DelayedBucket. + * It should call {@link #nonCompetitive} to release any possible sub-bucket creation if + * a bucket is rejected from the final response. */ public final class DelayedBucket { /** @@ -45,7 +49,6 @@ public DelayedBucket(List toReduce) { */ public B reduced(BiFunction, AggregationReduceContext, B> reduce, AggregationReduceContext reduceContext) { if (reduced == null) { - reduceContext.consumeBucketsAndMaybeBreak(1); reduced = reduce.apply(toReduce, reduceContext); toReduce = null; } @@ -95,8 +98,8 @@ public String toString() { */ void nonCompetitive(AggregationReduceContext reduceContext) { if (reduced != null) { - // -1 for itself, -countInnerBucket for all the sub-buckets. - reduceContext.consumeBucketsAndMaybeBreak(-1 - InternalMultiBucketAggregation.countInnerBucket(reduced)); + // -countInnerBucket for all the sub-buckets. + reduceContext.consumeBucketsAndMaybeBreak(-InternalMultiBucketAggregation.countInnerBucket(reduced)); } } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/TopBucketBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/TopBucketBuilder.java index a3d04ecc2074d..0389b7e105a58 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/TopBucketBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/TopBucketBuilder.java @@ -132,7 +132,11 @@ public void add(DelayedBucket bucket) { DelayedBucket removed = queue.insertWithOverflow(bucket); if (removed != null) { nonCompetitive.accept(removed); + // release any created sub-buckets removed.nonCompetitive(reduceContext); + } else { + // add one bucket to the final result + reduceContext.consumeBucketsAndMaybeBreak(1); } } @@ -183,6 +187,8 @@ public void add(DelayedBucket bucket) { next.add(bucket); return; } + // add one bucket to the final result + reduceContext.consumeBucketsAndMaybeBreak(1); buffer.add(bucket); if (buffer.size() < size) { return; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java index 71a06fb020344..5c422a9dd4e32 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractInternalTerms.java @@ -290,6 +290,7 @@ public InternalAggregation get() { result = new ArrayList<>(); thisReduceOrder = reduceBuckets(bucketsList, getThisReduceOrder(), bucket -> { if (result.size() < getRequiredSize()) { + reduceContext.consumeBucketsAndMaybeBreak(1); result.add(bucket.reduced(AbstractInternalTerms.this::reduceBucket, reduceContext)); } else { otherDocCount[0] += bucket.getDocCount(); @@ -311,11 +312,10 @@ public InternalAggregation get() { result = top.build(); } else { result = new ArrayList<>(); - thisReduceOrder = reduceBuckets( - bucketsList, - getThisReduceOrder(), - bucket -> result.add(bucket.reduced(AbstractInternalTerms.this::reduceBucket, reduceContext)) - ); + thisReduceOrder = reduceBuckets(bucketsList, getThisReduceOrder(), bucket -> { + reduceContext.consumeBucketsAndMaybeBreak(1); + result.add(bucket.reduced(AbstractInternalTerms.this::reduceBucket, reduceContext)); + }); } for (B r : result) { if (sumDocCountError == -1) { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/DelayedBucketTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/DelayedBucketTests.java index b5a35098e0073..70d5692b6dcf7 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/DelayedBucketTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/DelayedBucketTests.java @@ -25,6 +25,8 @@ import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.sameInstance; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class DelayedBucketTests extends ESTestCase { public void testToString() { @@ -40,6 +42,23 @@ public void testReduced() { assertThat(b.reduced(reduce, context), sameInstance(b.reduced(reduce, context))); assertThat(b.reduced(reduce, context).getKeyAsString(), equalTo("test")); assertThat(b.reduced(reduce, context).getDocCount(), equalTo(3L)); + // it only accounts for sub-buckets + assertEquals(0, buckets.get()); + } + + public void testReducedSubAggregation() { + AtomicInteger buckets = new AtomicInteger(); + AggregationReduceContext context = new AggregationReduceContext.ForFinal(null, null, () -> false, null, buckets::addAndGet); + BiFunction, AggregationReduceContext, InternalBucket> reduce = mockReduce(context); + DelayedBucket b = new DelayedBucket<>( + List.of(bucket("test", 1, mockMultiBucketAgg()), bucket("test", 2, mockMultiBucketAgg())) + ); + + assertThat(b.getDocCount(), equalTo(3L)); + assertThat(b.reduced(reduce, context), sameInstance(b.reduced(reduce, context))); + assertThat(b.reduced(reduce, context).getKeyAsString(), equalTo("test")); + assertThat(b.reduced(reduce, context).getDocCount(), equalTo(3L)); + // it only accounts for sub-buckets assertEquals(1, buckets.get()); } @@ -76,6 +95,19 @@ public void testNonCompetitiveReduced() { BiFunction, AggregationReduceContext, InternalBucket> reduce = mockReduce(context); DelayedBucket b = new DelayedBucket<>(List.of(bucket("test", 1))); b.reduced(reduce, context); + // only account for sub-aggregations + assertEquals(0, buckets.get()); + b.nonCompetitive(context); + assertEquals(0, buckets.get()); + } + + public void testNonCompetitiveReducedSubAggregation() { + AtomicInteger buckets = new AtomicInteger(); + AggregationReduceContext context = new AggregationReduceContext.ForFinal(null, null, () -> false, null, buckets::addAndGet); + BiFunction, AggregationReduceContext, InternalBucket> reduce = mockReduce(context); + DelayedBucket b = new DelayedBucket<>(List.of(bucket("test", 1, mockMultiBucketAgg()))); + b.reduced(reduce, context); + // only account for sub-aggregations assertEquals(1, buckets.get()); b.nonCompetitive(context); assertEquals(0, buckets.get()); @@ -85,10 +117,25 @@ private static InternalBucket bucket(String key, long docCount) { return new StringTerms.Bucket(new BytesRef(key), docCount, InternalAggregations.EMPTY, false, 0, DocValueFormat.RAW); } + private static InternalBucket bucket(String key, long docCount, InternalAggregations subAggregations) { + return new StringTerms.Bucket(new BytesRef(key), docCount, subAggregations, false, 0, DocValueFormat.RAW); + } + static BiFunction, AggregationReduceContext, InternalBucket> mockReduce(AggregationReduceContext context) { return (l, c) -> { assertThat(c, sameInstance(context)); - return bucket(l.get(0).getKeyAsString(), l.stream().mapToLong(Bucket::getDocCount).sum()); + context.consumeBucketsAndMaybeBreak(l.get(0).getAggregations().asList().size()); + return bucket(l.get(0).getKeyAsString(), l.stream().mapToLong(Bucket::getDocCount).sum(), l.get(0).getAggregations()); }; } + + @SuppressWarnings("unchecked") + private InternalAggregations mockMultiBucketAgg() { + List buckets = List.of(bucket("sub", 1)); + InternalMultiBucketAggregation mock = (InternalMultiBucketAggregation) mock( + InternalMultiBucketAggregation.class + ); + when(mock.getBuckets()).thenReturn(buckets); + return InternalAggregations.from(List.of(mock)); + } } From 6916dff5e43d06d1693e796ddc98875e8d77e064 Mon Sep 17 00:00:00 2001 From: Andrei Dan Date: Tue, 24 Sep 2024 17:54:50 +0300 Subject: [PATCH 28/58] Register the _ts_routing_hash value format as named writeable (#113452) (#113463) In #113373 we added a yaml test that can exercise the path of parsing `_ts_routing_hash` values however, the doc value format was not already registered in the `NamedWriteableRegistry`. This makes the `TimeSeriesRoutingHashFieldType` doc value format `NamedWriteable` available in the `NamedWriteableRegistry` (cherry picked from commit d07d1674309d898fcb1339e13fb3db3d9946830f) Signed-off-by: Andrei Dan --- .../index/mapper/TimeSeriesRoutingHashFieldMapper.java | 4 +++- .../src/main/java/org/elasticsearch/search/SearchModule.java | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesRoutingHashFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesRoutingHashFieldMapper.java index 60f792068300b..24c0a9760893b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesRoutingHashFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesRoutingHashFieldMapper.java @@ -48,10 +48,12 @@ public class TimeSeriesRoutingHashFieldMapper extends MetadataFieldMapper { public static final TypeParser PARSER = new FixedTypeParser(c -> c.getIndexSettings().getMode().timeSeriesRoutingHashFieldMapper()); static final NodeFeature TS_ROUTING_HASH_FIELD_PARSES_BYTES_REF = new NodeFeature("tsdb.ts_routing_hash_doc_value_parse_byte_ref"); + public static DocValueFormat TS_ROUTING_HASH_DOC_VALUE_FORMAT = TimeSeriesRoutingHashFieldType.DOC_VALUE_FORMAT; + static final class TimeSeriesRoutingHashFieldType extends MappedFieldType { private static final TimeSeriesRoutingHashFieldType INSTANCE = new TimeSeriesRoutingHashFieldType(); - private static final DocValueFormat DOC_VALUE_FORMAT = new DocValueFormat() { + static final DocValueFormat DOC_VALUE_FORMAT = new DocValueFormat() { @Override public String getWriteableName() { diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index b703b05371cce..4afcc57b7b15a 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -276,6 +276,7 @@ import static java.util.Collections.unmodifiableMap; import static java.util.Objects.requireNonNull; +import static org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper.TS_ROUTING_HASH_DOC_VALUE_FORMAT; /** * Sets up things that can be done at search time like queries, aggregations, and suggesters. @@ -1023,6 +1024,7 @@ private void registerValueFormats() { registerValueFormat(DocValueFormat.BINARY.getWriteableName(), in -> DocValueFormat.BINARY); registerValueFormat(DocValueFormat.UNSIGNED_LONG_SHIFTED.getWriteableName(), in -> DocValueFormat.UNSIGNED_LONG_SHIFTED); registerValueFormat(DocValueFormat.TIME_SERIES_ID.getWriteableName(), in -> DocValueFormat.TIME_SERIES_ID); + registerValueFormat(TS_ROUTING_HASH_DOC_VALUE_FORMAT.getWriteableName(), in -> TS_ROUTING_HASH_DOC_VALUE_FORMAT); } /** From 24c15ab56b4741bb7d797ee4979795150503b4d8 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 24 Sep 2024 16:53:38 +0100 Subject: [PATCH 29/58] Backport new `MERGE_ON_RECOVERY_VERSION` (#113473) Most of #113462 only applies to `main`, but for now we must keep the index versions aligned in `8.x`. This commit backports the new `IndexVersion` constant. --- server/src/main/java/org/elasticsearch/index/IndexVersions.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 3d7db8f6db433..c8bc5604c7d8a 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -115,6 +115,7 @@ private static IndexVersion def(int id, Version luceneVersion) { public static final IndexVersion INDEX_SORTING_ON_NESTED = def(8_512_00_0, Version.LUCENE_9_11_1); public static final IndexVersion LENIENT_UPDATEABLE_SYNONYMS = def(8_513_00_0, Version.LUCENE_9_11_1); public static final IndexVersion ENABLE_IGNORE_MALFORMED_LOGSDB = def(8_514_00_0, Version.LUCENE_9_11_1); + public static final IndexVersion MERGE_ON_RECOVERY_VERSION = def(8_515_00_0, Version.LUCENE_9_11_1); /* * STOP! READ THIS FIRST! No, really, From cc3191b9b0cef60cd31cf73165e8b4a4bdf56c8c Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Tue, 24 Sep 2024 18:11:29 +0200 Subject: [PATCH 30/58] Add to SearchUsages queries generated by the vector tiles API (#113449) (#113475) Register the queries used via the vector tiles API in SearchUsages. --- .../xpack/vectortile/VectorTileRestIT.java | 14 ++++++++++++++ .../xpack/vectortile/VectorTilePlugin.java | 2 +- .../vectortile/rest/RestVectorTileAction.java | 9 +++++++-- .../xpack/vectortile/rest/VectorTileRequest.java | 15 +++++++++++---- .../vectortile/rest/VectorTileRequestTests.java | 14 +++++++------- 5 files changed, 40 insertions(+), 14 deletions(-) diff --git a/x-pack/plugin/vector-tile/src/javaRestTest/java/org/elasticsearch/xpack/vectortile/VectorTileRestIT.java b/x-pack/plugin/vector-tile/src/javaRestTest/java/org/elasticsearch/xpack/vectortile/VectorTileRestIT.java index 016bfaabec0ff..82b6a4382525a 100644 --- a/x-pack/plugin/vector-tile/src/javaRestTest/java/org/elasticsearch/xpack/vectortile/VectorTileRestIT.java +++ b/x-pack/plugin/vector-tile/src/javaRestTest/java/org/elasticsearch/xpack/vectortile/VectorTileRestIT.java @@ -25,8 +25,11 @@ import org.elasticsearch.geometry.Polygon; import org.elasticsearch.geometry.Rectangle; import org.elasticsearch.geometry.utils.WellKnownText; +import org.elasticsearch.index.query.GeoShapeQueryBuilder; +import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileUtils; import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.test.rest.ObjectPath; import org.hamcrest.Matchers; import org.junit.AfterClass; import org.junit.Before; @@ -790,7 +793,9 @@ public void testBasicQueryGet() throws Exception { } } }"""); + final int termsUsage = queryUsage(TermQueryBuilder.NAME); final VectorTile.Tile tile = execute(mvtRequest); + assertThat(queryUsage(TermQueryBuilder.NAME), Matchers.equalTo(termsUsage + 1)); assertThat(tile.getLayersCount(), Matchers.equalTo(3)); assertLayer(tile, HITS_LAYER, 4096, 1, 2); assertLayer(tile, AGGS_LAYER, 4096, 1, 2); @@ -1060,12 +1065,21 @@ private void assertBucketKeyTag(VectorTile.Tile.Layer layer, VectorTile.Tile.Fea } private VectorTile.Tile execute(Request mvtRequest) throws IOException { + final int geoShapeUsage = queryUsage(GeoShapeQueryBuilder.NAME); final Response response = client().performRequest(mvtRequest); + assertThat(queryUsage(GeoShapeQueryBuilder.NAME), Matchers.equalTo(geoShapeUsage + 1)); final InputStream inputStream = response.getEntity().getContent(); assertThat(response.getStatusLine().getStatusCode(), Matchers.equalTo(HttpStatus.SC_OK)); return VectorTile.Tile.parseFrom(inputStream); } + private int queryUsage(String queryName) throws IOException { + final Request request = new Request(HttpGet.METHOD_NAME, "/_cluster/stats?filter_path=indices.search.queries." + queryName); + ObjectPath objectPath = ObjectPath.createFromResponse(client().performRequest(request)); + Integer count = objectPath.evaluate("indices.search.queries." + queryName); + return count == null ? 0 : count; + } + private VectorTile.Tile.Layer getLayer(VectorTile.Tile tile, String layerName) { for (int i = 0; i < tile.getLayersCount(); i++) { final VectorTile.Tile.Layer layer = tile.getLayers(i); diff --git a/x-pack/plugin/vector-tile/src/main/java/org/elasticsearch/xpack/vectortile/VectorTilePlugin.java b/x-pack/plugin/vector-tile/src/main/java/org/elasticsearch/xpack/vectortile/VectorTilePlugin.java index 590f0816d4aa7..f3cc660c40bd2 100644 --- a/x-pack/plugin/vector-tile/src/main/java/org/elasticsearch/xpack/vectortile/VectorTilePlugin.java +++ b/x-pack/plugin/vector-tile/src/main/java/org/elasticsearch/xpack/vectortile/VectorTilePlugin.java @@ -45,6 +45,6 @@ public List getRestHandlers( Supplier nodesInCluster, Predicate clusterSupportsFeature ) { - return List.of(new RestVectorTileAction()); + return List.of(new RestVectorTileAction(restController.getSearchUsageHolder())); } } diff --git a/x-pack/plugin/vector-tile/src/main/java/org/elasticsearch/xpack/vectortile/rest/RestVectorTileAction.java b/x-pack/plugin/vector-tile/src/main/java/org/elasticsearch/xpack/vectortile/rest/RestVectorTileAction.java index 66fbf2e892b56..7498a51321d55 100644 --- a/x-pack/plugin/vector-tile/src/main/java/org/elasticsearch/xpack/vectortile/rest/RestVectorTileAction.java +++ b/x-pack/plugin/vector-tile/src/main/java/org/elasticsearch/xpack/vectortile/rest/RestVectorTileAction.java @@ -47,6 +47,7 @@ import org.elasticsearch.search.fetch.subphase.FieldAndFormat; import org.elasticsearch.search.profile.SearchProfileResults; import org.elasticsearch.search.sort.SortBuilder; +import org.elasticsearch.usage.SearchUsageHolder; import org.elasticsearch.xpack.vectortile.feature.FeatureFactory; import java.io.IOException; @@ -87,7 +88,11 @@ public class RestVectorTileAction extends BaseRestHandler { // internal label position runtime field name static final String LABEL_POSITION_FIELD_NAME = INTERNAL_AGG_PREFIX + "label_position"; - public RestVectorTileAction() {} + private final SearchUsageHolder searchUsageHolder; + + public RestVectorTileAction(SearchUsageHolder searchUsageHolder) { + this.searchUsageHolder = searchUsageHolder; + } @Override public List routes() { @@ -103,7 +108,7 @@ public String getName() { protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { // This will allow to cancel the search request if the http channel is closed final RestCancellableNodeClient cancellableNodeClient = new RestCancellableNodeClient(client, restRequest.getHttpChannel()); - final VectorTileRequest request = VectorTileRequest.parseRestRequest(restRequest); + final VectorTileRequest request = VectorTileRequest.parseRestRequest(restRequest, searchUsageHolder::updateUsage); final SearchRequestBuilder searchRequestBuilder = searchRequestBuilder(cancellableNodeClient, request); return channel -> searchRequestBuilder.execute(new RestResponseListener<>(channel) { diff --git a/x-pack/plugin/vector-tile/src/main/java/org/elasticsearch/xpack/vectortile/rest/VectorTileRequest.java b/x-pack/plugin/vector-tile/src/main/java/org/elasticsearch/xpack/vectortile/rest/VectorTileRequest.java index 785f86e06e418..69f44d64d6b6d 100644 --- a/x-pack/plugin/vector-tile/src/main/java/org/elasticsearch/xpack/vectortile/rest/VectorTileRequest.java +++ b/x-pack/plugin/vector-tile/src/main/java/org/elasticsearch/xpack/vectortile/rest/VectorTileRequest.java @@ -12,6 +12,7 @@ import org.elasticsearch.core.Booleans; import org.elasticsearch.geometry.Rectangle; import org.elasticsearch.index.query.AbstractQueryBuilder; +import org.elasticsearch.index.query.GeoShapeQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.script.Script; @@ -25,6 +26,7 @@ import org.elasticsearch.search.sort.ScriptSortBuilder; import org.elasticsearch.search.sort.SortBuilder; import org.elasticsearch.search.sort.SortOrder; +import org.elasticsearch.usage.SearchUsage; import org.elasticsearch.xcontent.ObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentParser; @@ -33,6 +35,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.function.Consumer; import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; @@ -75,7 +78,7 @@ protected static class Defaults { public static final int TRACK_TOTAL_HITS_UP_TO = DEFAULT_TRACK_TOTAL_HITS_UP_TO; } - private static final ObjectParser PARSER; + private static final ObjectParser PARSER; static { PARSER = new ObjectParser<>("vector-tile"); @@ -89,7 +92,7 @@ protected static class Defaults { }, SearchSourceBuilder.FETCH_FIELDS_FIELD, ObjectParser.ValueType.OBJECT_ARRAY); PARSER.declareField( VectorTileRequest::setQueryBuilder, - (p, c) -> AbstractQueryBuilder.parseTopLevelQuery(p), + (p, c) -> AbstractQueryBuilder.parseTopLevelQuery(p, c::trackQueryUsage), SearchSourceBuilder.QUERY_FIELD, ObjectParser.ValueType.OBJECT ); @@ -130,7 +133,7 @@ protected static class Defaults { }, SearchSourceBuilder.TRACK_TOTAL_HITS_FIELD, ObjectParser.ValueType.VALUE); } - static VectorTileRequest parseRestRequest(RestRequest restRequest) throws IOException { + static VectorTileRequest parseRestRequest(RestRequest restRequest, Consumer searchUsageConsumer) throws IOException { final VectorTileRequest request = new VectorTileRequest( Strings.splitStringByCommaToArray(restRequest.param(INDEX_PARAM)), restRequest.param(FIELD_PARAM), @@ -138,11 +141,15 @@ static VectorTileRequest parseRestRequest(RestRequest restRequest) throws IOExce Integer.parseInt(restRequest.param(X_PARAM)), Integer.parseInt(restRequest.param(Y_PARAM)) ); + final SearchUsage searchUsage = new SearchUsage(); if (restRequest.hasContentOrSourceParam()) { try (XContentParser contentParser = restRequest.contentOrSourceParamParser()) { - PARSER.parse(contentParser, request, restRequest); + PARSER.parse(contentParser, request, searchUsage); } } + // The API generates a query on the fly that we track here. + searchUsage.trackQueryUsage(GeoShapeQueryBuilder.NAME); + searchUsageConsumer.accept(searchUsage); // Following the same strategy of the _search API, some parameters can be defined in the body or as URL parameters. // URL parameters takes precedence so we check them here. if (restRequest.hasParam(SearchSourceBuilder.SIZE_FIELD.getPreferredName())) { diff --git a/x-pack/plugin/vector-tile/src/test/java/org/elasticsearch/xpack/vectortile/rest/VectorTileRequestTests.java b/x-pack/plugin/vector-tile/src/test/java/org/elasticsearch/xpack/vectortile/rest/VectorTileRequestTests.java index 36ba62767923c..9f903c9d2efd5 100644 --- a/x-pack/plugin/vector-tile/src/test/java/org/elasticsearch/xpack/vectortile/rest/VectorTileRequestTests.java +++ b/x-pack/plugin/vector-tile/src/test/java/org/elasticsearch/xpack/vectortile/rest/VectorTileRequestTests.java @@ -231,7 +231,7 @@ public void testWrongTile() { final FakeRestRequest request = getBasicRequestBuilder(index, field, z, x, y).build(); final IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> VectorTileRequest.parseRestRequest(request) + () -> VectorTileRequest.parseRestRequest(request, s -> {}) ); assertThat(ex.getMessage(), Matchers.equalTo("Invalid geotile_grid precision of " + z + ". Must be between 0 and 29.")); } @@ -243,7 +243,7 @@ public void testWrongTile() { final FakeRestRequest request = getBasicRequestBuilder(index, field, z, x, y).build(); final IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> VectorTileRequest.parseRestRequest(request) + () -> VectorTileRequest.parseRestRequest(request, s -> {}) ); assertThat(ex.getMessage(), Matchers.equalTo("Invalid geotile_grid precision of " + z + ". Must be between 0 and 29.")); } @@ -255,7 +255,7 @@ public void testWrongTile() { final FakeRestRequest request = getBasicRequestBuilder(index, field, z, x, y).build(); final IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> VectorTileRequest.parseRestRequest(request) + () -> VectorTileRequest.parseRestRequest(request, s -> {}) ); assertThat(ex.getMessage(), Matchers.equalTo("Zoom/X/Y combination is not valid: " + z + "/" + x + "/" + y)); } @@ -267,7 +267,7 @@ public void testWrongTile() { final FakeRestRequest request = getBasicRequestBuilder(index, field, z, x, y).build(); final IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> VectorTileRequest.parseRestRequest(request) + () -> VectorTileRequest.parseRestRequest(request, s -> {}) ); assertThat(ex.getMessage(), Matchers.equalTo("Zoom/X/Y combination is not valid: " + z + "/" + x + "/" + y)); } @@ -279,7 +279,7 @@ public void testWrongTile() { final FakeRestRequest request = getBasicRequestBuilder(index, field, z, x, y).build(); final IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> VectorTileRequest.parseRestRequest(request) + () -> VectorTileRequest.parseRestRequest(request, s -> {}) ); assertThat(ex.getMessage(), Matchers.equalTo("Zoom/X/Y combination is not valid: " + z + "/" + x + "/" + y)); } @@ -291,7 +291,7 @@ public void testWrongTile() { final FakeRestRequest request = getBasicRequestBuilder(index, field, z, x, y).build(); final IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> VectorTileRequest.parseRestRequest(request) + () -> VectorTileRequest.parseRestRequest(request, s -> {}) ); assertThat(ex.getMessage(), Matchers.equalTo("Zoom/X/Y combination is not valid: " + z + "/" + x + "/" + y)); } @@ -310,7 +310,7 @@ private void assertRestRequest(CheckedConsumer con consumer.accept(builder); builder.endObject(); final FakeRestRequest request = requestBuilder.withContent(BytesReference.bytes(builder), builder.contentType()).build(); - final VectorTileRequest vectorTileRequest = VectorTileRequest.parseRestRequest(request); + final VectorTileRequest vectorTileRequest = VectorTileRequest.parseRestRequest(request, s -> {}); assertThat(vectorTileRequest.getIndexes(), Matchers.equalTo(new String[] { index })); assertThat(vectorTileRequest.getField(), Matchers.equalTo(field)); assertThat(vectorTileRequest.getZ(), Matchers.equalTo(z)); From b74e5928dba7ae5f85ae5db84112c28964ba778b Mon Sep 17 00:00:00 2001 From: Pat Whelan Date: Tue, 24 Sep 2024 12:53:23 -0400 Subject: [PATCH 31/58] [ML] Always send at least 1 input (#113456) (#113481) Fix #113430 --- .../java/org/elasticsearch/xpack/inference/InferenceCrudIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java index b9a5202a25013..893d52435ec1c 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java @@ -260,7 +260,7 @@ public void testSupportedStream() throws Exception { assertEquals(modelId, singleModel.get("inference_id")); assertEquals(TaskType.COMPLETION.toString(), singleModel.get("task_type")); - var input = IntStream.range(0, randomInt(10)).mapToObj(i -> randomAlphaOfLength(10)).toList(); + var input = IntStream.range(1, randomInt(10)).mapToObj(i -> randomAlphaOfLength(10)).toList(); try { var events = streamInferOnMockService(modelId, TaskType.COMPLETION, input); From d798c86bcc4dcab3544354da9294910642129d4f Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 24 Sep 2024 18:13:54 +0100 Subject: [PATCH 32/58] [8.x] Remove timeouts from `CreateIndexClusterStateUpdateRequest` (#113366) (#113479) * Remove timeouts from `CreateIndexClusterStateUpdateRequest` (#113366) This class is mostly used to carry information during the process of computing a cluster state update which creates an index, for which the `masterNodeTimeout` and `ackTimeout` fields are not meaningful. Setting these fields in those contexts is pointless, but leaving them as `null` makes it harder to reason about null-propagation. This commit removes these fields and replaces them with method arguments in the few places where they actually make sense. * Fix compile --- .../indices/create/AutoCreateAction.java | 4 +-- .../CreateIndexClusterStateUpdateRequest.java | 3 +- .../create/TransportCreateIndexAction.java | 17 +++------- .../rollover/MetadataRolloverService.java | 6 +--- .../indices/shrink/TransportResizeAction.java | 5 +-- .../metadata/MetadataCreateIndexService.java | 32 +++++++++++++------ .../upgrades/SystemIndexMigrator.java | 2 +- .../TransportCreateIndexActionTests.java | 25 +++++++++++++-- 8 files changed, 57 insertions(+), 37 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/create/AutoCreateAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/create/AutoCreateAction.java index 2bbdc6b32d502..823bff904283b 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/create/AutoCreateAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/create/AutoCreateAction.java @@ -350,7 +350,7 @@ private CreateIndexClusterStateUpdateRequest buildUpdateRequest(String indexName request.cause(), indexName, request.index() - ).ackTimeout(request.ackTimeout()).performReroute(false).masterNodeTimeout(request.masterNodeTimeout()); + ).performReroute(false); logger.debug("Auto-creating index {}", indexName); return updateRequest; } @@ -367,7 +367,7 @@ private CreateIndexClusterStateUpdateRequest buildSystemIndexUpdateRequest(Strin request.cause(), concreteIndexName, request.index() - ).ackTimeout(request.ackTimeout()).masterNodeTimeout(request.masterNodeTimeout()).performReroute(false); + ).performReroute(false); updateRequest.waitForActiveShards(ActiveShardCount.ALL); diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java index 9e41c6231bbc4..080ebb5951a7a 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java @@ -12,7 +12,6 @@ import org.elasticsearch.action.admin.indices.alias.Alias; import org.elasticsearch.action.admin.indices.shrink.ResizeType; import org.elasticsearch.action.support.ActiveShardCount; -import org.elasticsearch.cluster.ack.ClusterStateUpdateRequest; import org.elasticsearch.cluster.metadata.ComposableIndexTemplate; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; @@ -25,7 +24,7 @@ /** * Cluster state update request that allows to create an index */ -public class CreateIndexClusterStateUpdateRequest extends ClusterStateUpdateRequest { +public class CreateIndexClusterStateUpdateRequest { private final String cause; private final String index; diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/create/TransportCreateIndexAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/create/TransportCreateIndexAction.java index 5d761bcb5ebb3..b43f2006061a5 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/create/TransportCreateIndexAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/create/TransportCreateIndexAction.java @@ -150,6 +150,9 @@ protected void masterOperation( } createIndexService.createIndex( + request.masterNodeTimeout(), + request.ackTimeout(), + request.ackTimeout(), updateRequest, listener.map(response -> new CreateIndexResponse(response.isAcknowledged(), response.isShardsAcknowledged(), indexName)) ); @@ -166,9 +169,7 @@ private CreateIndexClusterStateUpdateRequest buildUpdateRequest( alias.isHidden(true); } }).collect(Collectors.toSet()); - return new CreateIndexClusterStateUpdateRequest(cause, indexName, request.index()).ackTimeout(request.ackTimeout()) - .masterNodeTimeout(request.masterNodeTimeout()) - .settings(request.settings()) + return new CreateIndexClusterStateUpdateRequest(cause, indexName, request.index()).settings(request.settings()) .mappings(request.mappings()) .aliases(aliases) .nameResolvedInstant(nameResolvedAt) @@ -196,15 +197,7 @@ private static CreateIndexClusterStateUpdateRequest buildSystemIndexUpdateReques ); } - final CreateIndexClusterStateUpdateRequest updateRequest = new CreateIndexClusterStateUpdateRequest( - cause, - descriptor.getPrimaryIndex(), - request.index() - ); - - return updateRequest.ackTimeout(request.ackTimeout()) - .masterNodeTimeout(request.masterNodeTimeout()) - .aliases(aliases) + return new CreateIndexClusterStateUpdateRequest(cause, descriptor.getPrimaryIndex(), request.index()).aliases(aliases) .waitForActiveShards(ActiveShardCount.ALL) .mappings(descriptor.getMappings()) .settings(settings); diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverService.java b/server/src/main/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverService.java index d8b2976547f34..cfc5b7802d989 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverService.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/rollover/MetadataRolloverService.java @@ -548,11 +548,7 @@ static CreateIndexClusterStateUpdateRequest prepareCreateIndexRequest( if (settings != null) { b.put(settings); } - return new CreateIndexClusterStateUpdateRequest(cause, targetIndexName, providedIndexName).ackTimeout( - createIndexRequest.ackTimeout() - ) - .masterNodeTimeout(createIndexRequest.masterNodeTimeout()) - .settings(b.build()) + return new CreateIndexClusterStateUpdateRequest(cause, targetIndexName, providedIndexName).settings(b.build()) .aliases(createIndexRequest.aliases()) .waitForActiveShards(ActiveShardCount.NONE) // not waiting for shards here, will wait on the alias switch operation .mappings(createIndexRequest.mappings()) diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/shrink/TransportResizeAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/shrink/TransportResizeAction.java index 28c2e838ddd12..4a6311e6400aa 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/shrink/TransportResizeAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/shrink/TransportResizeAction.java @@ -136,6 +136,9 @@ protected void masterOperation( return; } createIndexService.createIndex( + resizeRequest.masterNodeTimeout(), + resizeRequest.ackTimeout(), + resizeRequest.ackTimeout(), updateRequest, delegatedListener.map( response -> new CreateIndexResponse( @@ -234,8 +237,6 @@ static CreateIndexClusterStateUpdateRequest prepareCreateIndexRequest( // mappings are updated on the node when creating in the shards, this prevents race-conditions since all mapping must be // applied once we took the snapshot and if somebody messes things up and switches the index read/write and adds docs we // miss the mappings for everything is corrupted and hard to debug - .ackTimeout(targetIndex.ackTimeout()) - .masterNodeTimeout(targetIndex.masterNodeTimeout()) .settings(targetIndex.settings()) .aliases(targetIndex.aliases()) .waitForActiveShards(targetIndex.waitForActiveShards()) diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java index 061aa18dd464a..4cdf1508a7987 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java @@ -52,6 +52,7 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.PathUtils; import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexMode; @@ -254,12 +255,23 @@ public static void validateIndexOrAliasName(String index, BiFunction listener) { + public void createIndex( + final TimeValue masterNodeTimeout, + final TimeValue ackTimeout, + @Nullable final TimeValue waitForActiveShardsTimeout, + final CreateIndexClusterStateUpdateRequest request, + final ActionListener listener + ) { logger.trace("createIndex[{}]", request); - onlyCreateIndex(request, listener.delegateFailureAndWrap((delegate, response) -> { + onlyCreateIndex(masterNodeTimeout, ackTimeout, request, listener.delegateFailureAndWrap((delegate, response) -> { if (response.isAcknowledged()) { logger.trace( "[{}] index creation acknowledged, waiting for active shards [{}]", @@ -270,7 +282,7 @@ public void createIndex(final CreateIndexClusterStateUpdateRequest request, fina clusterService, new String[] { request.index() }, request.waitForActiveShards(), - request.ackTimeout(), + waitForActiveShardsTimeout, delegate.map(shardsAcknowledged -> { if (shardsAcknowledged == false) { logger.debug( @@ -290,18 +302,18 @@ public void createIndex(final CreateIndexClusterStateUpdateRequest request, fina })); } - private void onlyCreateIndex(final CreateIndexClusterStateUpdateRequest request, final ActionListener listener) { + private void onlyCreateIndex( + final TimeValue masterNodeTimeout, + final TimeValue ackTimeout, + final CreateIndexClusterStateUpdateRequest request, + final ActionListener listener + ) { normalizeRequestSetting(request); var delegate = new AllocationActionListener<>(listener, threadPool.getThreadContext()); submitUnbatchedTask( "create-index [" + request.index() + "], cause [" + request.cause() + "]", - new AckedClusterStateUpdateTask( - Priority.URGENT, - request.masterNodeTimeout(), - request.ackTimeout(), - delegate.clusterStateUpdate() - ) { + new AckedClusterStateUpdateTask(Priority.URGENT, masterNodeTimeout, ackTimeout, delegate.clusterStateUpdate()) { @Override public ClusterState execute(ClusterState currentState) throws Exception { diff --git a/server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java b/server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java index 94b856f7a22fb..711ca08cd5df6 100644 --- a/server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java +++ b/server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java @@ -500,7 +500,7 @@ private void createIndex(SystemIndexMigrationInfo migrationInfo, ActionListener< createRequest.waitForActiveShards(ActiveShardCount.ALL) .mappings(migrationInfo.getMappings()) .settings(Objects.requireNonNullElse(settingsBuilder.build(), Settings.EMPTY)); - metadataCreateIndexService.createIndex(createRequest, listener); + metadataCreateIndexService.createIndex(TimeValue.MINUS_ONE, TimeValue.ZERO, null, createRequest, listener); } private CheckedBiConsumer, AcknowledgedResponse, Exception> setAliasAndRemoveOldIndex( diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/create/TransportCreateIndexActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/create/TransportCreateIndexActionTests.java index 160fc3ddb1f4c..7e590dc2cdeec 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/create/TransportCreateIndexActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/create/TransportCreateIndexActionTests.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.util.concurrent.ThreadContext; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.indices.SystemIndexDescriptor; import org.elasticsearch.indices.SystemIndices; import org.elasticsearch.tasks.Task; @@ -147,7 +148,13 @@ public void testSystemIndicesCannotBeCreatedUnhidden() { ArgumentCaptor exceptionArgumentCaptor = ArgumentCaptor.forClass(Exception.class); verify(mockListener, times(0)).onResponse(any()); - verify(metadataCreateIndexService, times(0)).createIndex(any(), any()); + verify(metadataCreateIndexService, times(0)).createIndex( + any(TimeValue.class), + any(TimeValue.class), + any(TimeValue.class), + any(), + any() + ); verify(mockListener, times(1)).onFailure(exceptionArgumentCaptor.capture()); Exception e = exceptionArgumentCaptor.getValue(); @@ -167,7 +174,13 @@ public void testSystemIndicesCreatedHiddenByDefault() { CreateIndexClusterStateUpdateRequest.class ); verify(mockListener, times(0)).onFailure(any()); - verify(metadataCreateIndexService, times(1)).createIndex(createRequestArgumentCaptor.capture(), any()); + verify(metadataCreateIndexService, times(1)).createIndex( + any(TimeValue.class), + any(TimeValue.class), + any(TimeValue.class), + createRequestArgumentCaptor.capture(), + any() + ); CreateIndexClusterStateUpdateRequest processedRequest = createRequestArgumentCaptor.getValue(); assertTrue(processedRequest.settings().getAsBoolean(SETTING_INDEX_HIDDEN, false)); @@ -187,7 +200,13 @@ public void testSystemAliasCreatedHiddenByDefault() { CreateIndexClusterStateUpdateRequest.class ); verify(mockListener, times(0)).onFailure(any()); - verify(metadataCreateIndexService, times(1)).createIndex(createRequestArgumentCaptor.capture(), any()); + verify(metadataCreateIndexService, times(1)).createIndex( + any(TimeValue.class), + any(TimeValue.class), + any(TimeValue.class), + createRequestArgumentCaptor.capture(), + any() + ); CreateIndexClusterStateUpdateRequest processedRequest = createRequestArgumentCaptor.getValue(); assertTrue(processedRequest.aliases().contains(new Alias(SYSTEM_ALIAS_NAME).isHidden(true))); From cb41144cc2cf4c4dff41860472997ba211e877e1 Mon Sep 17 00:00:00 2001 From: Nikolaj Volgushev Date: Tue, 24 Sep 2024 19:21:48 +0200 Subject: [PATCH 33/58] More unsupported locales in Kerberos tests (#113354) (#113485) This PR adds two more locales to the unsupported set. I got a complete list this time by running through all locales. Relates: https://github.com/elastic/elasticsearch/pull/112582 Resolves: https://github.com/elastic/elasticsearch/issues/112631 Resolves: https://github.com/elastic/elasticsearch/issues/112632 Resolves: https://github.com/elastic/elasticsearch/issues/112639 --- muted-tests.yml | 9 --------- .../xpack/security/authc/kerberos/KerberosTestCase.java | 6 ++++-- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 20863a6f6349d..0a00fb931617d 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -166,15 +166,6 @@ tests: - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testPutJob_GivenFarequoteConfig issue: https://github.com/elastic/elasticsearch/issues/112382 -- class: org.elasticsearch.xpack.security.authc.kerberos.KerberosTicketValidatorTests - method: testWhenKeyTabWithInvalidContentFailsValidation - issue: https://github.com/elastic/elasticsearch/issues/112631 -- class: org.elasticsearch.xpack.security.authc.kerberos.KerberosTicketValidatorTests - method: testValidKebrerosTicket - issue: https://github.com/elastic/elasticsearch/issues/112632 -- class: org.elasticsearch.xpack.security.authc.kerberos.KerberosTicketValidatorTests - method: testKerbTicketGeneratedForDifferentServerFailsValidation - issue: https://github.com/elastic/elasticsearch/issues/112639 - class: org.elasticsearch.packaging.test.PackagesSecurityAutoConfigurationTests method: test20SecurityNotAutoConfiguredOnReInstallation issue: https://github.com/elastic/elasticsearch/issues/112635 diff --git a/x-pack/qa/evil-tests/src/test/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosTestCase.java b/x-pack/qa/evil-tests/src/test/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosTestCase.java index 229b6e2a8f92d..261bc567d5c91 100644 --- a/x-pack/qa/evil-tests/src/test/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosTestCase.java +++ b/x-pack/qa/evil-tests/src/test/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosTestCase.java @@ -62,7 +62,7 @@ public abstract class KerberosTestCase extends ESTestCase { * * Note: several unsupported locales were added in CLDR. #109670 included these below. */ - private static Set UNSUPPORTED_LOCALE_LANGUAGES = Set.of( + private static final Set UNSUPPORTED_LOCALE_LANGUAGES = Set.of( "ar", "ja", "th", @@ -88,7 +88,9 @@ public abstract class KerberosTestCase extends ESTestCase { "sat", "sa", "bgc", - "raj" + "raj", + "nqo", + "bho" ); @BeforeClass From 32e017270d29611b4caaf08ccce40b327244dfa3 Mon Sep 17 00:00:00 2001 From: Keith Massey Date: Tue, 24 Sep 2024 12:35:43 -0500 Subject: [PATCH 34/58] Making TransportGetDatabaseConfigurationAction extend TransportNodesAction (#113141) (#113467) --- .../GetDatabaseConfigurationAction.java | 124 +++++++++++++----- .../RestGetDatabaseConfigurationAction.java | 8 +- ...ansportGetDatabaseConfigurationAction.java | 105 +++++++++++---- .../ingest/IngestGeoIpFeatures.java | 7 +- 4 files changed, 179 insertions(+), 65 deletions(-) diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java index 72a72cfa204ee..0d1f1d2f9f660 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java @@ -9,13 +9,16 @@ package org.elasticsearch.ingest.geoip.direct; -import org.elasticsearch.action.ActionResponse; import org.elasticsearch.action.ActionType; -import org.elasticsearch.action.support.master.AcknowledgedRequest; -import org.elasticsearch.common.Strings; +import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.action.support.nodes.BaseNodeResponse; +import org.elasticsearch.action.support.nodes.BaseNodesRequest; +import org.elasticsearch.action.support.nodes.BaseNodesResponse; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.core.TimeValue; +import org.elasticsearch.transport.TransportRequest; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; @@ -28,8 +31,9 @@ import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.MODIFIED_DATE; import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.MODIFIED_DATE_MILLIS; import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.VERSION; +import static org.elasticsearch.ingest.geoip.direct.GetDatabaseConfigurationAction.Response; -public class GetDatabaseConfigurationAction extends ActionType { +public class GetDatabaseConfigurationAction extends ActionType { public static final GetDatabaseConfigurationAction INSTANCE = new GetDatabaseConfigurationAction(); public static final String NAME = "cluster:admin/ingest/geoip/database/get"; @@ -37,28 +41,16 @@ protected GetDatabaseConfigurationAction() { super(NAME); } - public static class Request extends AcknowledgedRequest { - + public static class Request extends BaseNodesRequest { private final String[] databaseIds; - public Request(TimeValue masterNodeTimeout, TimeValue ackTimeout, String... databaseIds) { - super(masterNodeTimeout, ackTimeout); - this.databaseIds = Objects.requireNonNull(databaseIds, "ids may not be null"); - } - - public Request(StreamInput in) throws IOException { - super(in); - databaseIds = in.readStringArray(); + public Request(String... databaseIds) { + super((String[]) null); + this.databaseIds = databaseIds; } public String[] getDatabaseIds() { - return this.databaseIds; - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); - out.writeStringArray(databaseIds); + return databaseIds; } @Override @@ -77,27 +69,36 @@ public boolean equals(Object obj) { Request other = (Request) obj; return Arrays.equals(databaseIds, other.databaseIds); } + } - public static class Response extends ActionResponse implements ToXContentObject { + public static class Response extends BaseNodesResponse implements ToXContentObject { private final List databases; - public Response(List databases) { + public Response( + List databases, + ClusterName clusterName, + List nodes, + List failures + ) { + super(clusterName, nodes, failures); this.databases = List.copyOf(databases); // defensive copy } - public Response(StreamInput in) throws IOException { - this(in.readCollectionAsList(DatabaseConfigurationMetadata::new)); + protected Response(StreamInput in) throws IOException { + super(in); + this.databases = in.readCollectionAsList(DatabaseConfigurationMetadata::new); } - public List getDatabases() { - return this.databases; + @Override + protected List readNodesFrom(StreamInput in) throws IOException { + return in.readCollectionAsList(NodeResponse::new); } @Override - public String toString() { - return Strings.toString(this); + protected void writeNodesTo(StreamOutput out, List nodes) throws IOException { + out.writeCollection(nodes); } @Override @@ -117,6 +118,67 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.endObject(); return builder; } + } + + public static class NodeRequest extends TransportRequest { + + private final String[] databaseIds; + + public NodeRequest(String... databaseIds) { + super(); + this.databaseIds = Objects.requireNonNull(databaseIds, "ids may not be null"); + } + + public NodeRequest(StreamInput in) throws IOException { + super(in); + databaseIds = in.readStringArray(); + } + + public String[] getDatabaseIds() { + return this.databaseIds; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeStringArray(databaseIds); + } + + @Override + public int hashCode() { + return Arrays.hashCode(databaseIds); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + NodeRequest other = (NodeRequest) obj; + return Arrays.equals(databaseIds, other.databaseIds); + } + } + + public static class NodeResponse extends BaseNodeResponse { + + private final List databases; + + public NodeResponse(DiscoveryNode node, List databases) { + super(node); + this.databases = List.copyOf(databases); // defensive copy + } + + public NodeResponse(StreamInput in) throws IOException { + super(in); + this.databases = in.readCollectionAsList(DatabaseConfigurationMetadata::new); + } + + public List getDatabases() { + return this.databases; + } @Override public void writeTo(StreamOutput out) throws IOException { @@ -136,7 +198,7 @@ public boolean equals(Object obj) { if (obj.getClass() != getClass()) { return false; } - Response other = (Response) obj; + NodeResponse other = (NodeResponse) obj; return databases.equals(other.databases); } } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java index 7d4c8de3f411b..f34f388f22965 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java @@ -20,8 +20,6 @@ import java.util.List; import static org.elasticsearch.rest.RestRequest.Method.GET; -import static org.elasticsearch.rest.RestUtils.getAckTimeout; -import static org.elasticsearch.rest.RestUtils.getMasterNodeTimeout; @ServerlessScope(Scope.INTERNAL) public class RestGetDatabaseConfigurationAction extends BaseRestHandler { @@ -38,11 +36,7 @@ public String getName() { @Override protected RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) { - final var req = new GetDatabaseConfigurationAction.Request( - getMasterNodeTimeout(request), - getAckTimeout(request), - Strings.splitStringByCommaToArray(request.param("id")) - ); + final var req = new GetDatabaseConfigurationAction.Request(Strings.splitStringByCommaToArray(request.param("id"))); return channel -> client.execute(GetDatabaseConfigurationAction.INSTANCE, req, new RestToXContentListener<>(channel)); } } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java index 0606882258d6a..730ae6d8b8ae5 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java @@ -11,21 +11,21 @@ import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.action.support.master.TransportMasterNodeAction; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.block.ClusterBlockException; -import org.elasticsearch.cluster.block.ClusterBlockLevel; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.action.support.nodes.TransportNodesAction; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.regex.Regex; -import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.features.FeatureService; import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; import org.elasticsearch.injection.guice.Inject; import org.elasticsearch.tasks.Task; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedHashSet; @@ -33,9 +33,16 @@ import java.util.Map; import java.util.Set; -public class TransportGetDatabaseConfigurationAction extends TransportMasterNodeAction< +import static org.elasticsearch.ingest.IngestGeoIpFeatures.GET_DATABASE_CONFIGURATION_ACTION_MULTI_NODE; + +public class TransportGetDatabaseConfigurationAction extends TransportNodesAction< GetDatabaseConfigurationAction.Request, - GetDatabaseConfigurationAction.Response> { + GetDatabaseConfigurationAction.Response, + GetDatabaseConfigurationAction.NodeRequest, + GetDatabaseConfigurationAction.NodeResponse, + List> { + + private final FeatureService featureService; @Inject public TransportGetDatabaseConfigurationAction( @@ -43,28 +50,39 @@ public TransportGetDatabaseConfigurationAction( ClusterService clusterService, ThreadPool threadPool, ActionFilters actionFilters, - IndexNameExpressionResolver indexNameExpressionResolver + FeatureService featureService ) { super( GetDatabaseConfigurationAction.NAME, - transportService, clusterService, - threadPool, + transportService, actionFilters, - GetDatabaseConfigurationAction.Request::new, - indexNameExpressionResolver, - GetDatabaseConfigurationAction.Response::new, - EsExecutors.DIRECT_EXECUTOR_SERVICE + GetDatabaseConfigurationAction.NodeRequest::new, + threadPool.executor(ThreadPool.Names.MANAGEMENT) ); + this.featureService = featureService; } @Override - protected void masterOperation( - final Task task, - final GetDatabaseConfigurationAction.Request request, - final ClusterState state, - final ActionListener listener + protected void doExecute( + Task task, + GetDatabaseConfigurationAction.Request request, + ActionListener listener ) { + if (featureService.clusterHasFeature(clusterService.state(), GET_DATABASE_CONFIGURATION_ACTION_MULTI_NODE) == false) { + /* + * TransportGetDatabaseConfigurationAction used to be a TransportMasterNodeAction, and not all nodes in the cluster have been + * updated. So we don't want to send node requests to the other nodes because they will blow up. Instead, we just return + * the information that we used to return from the master node (it doesn't make any difference that this might not be the master + * node, because we're only reading the clsuter state). + */ + newResponseAsync(task, request, createActionContext(task, request), List.of(), List.of(), listener); + } else { + super.doExecute(task, request, listener); + } + } + + protected List createActionContext(Task task, GetDatabaseConfigurationAction.Request request) { final Set ids; if (request.getDatabaseIds().length == 0) { // if we did not ask for a specific name, then return all databases @@ -79,7 +97,7 @@ protected void masterOperation( ); } - final IngestGeoIpMetadata geoIpMeta = state.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + final IngestGeoIpMetadata geoIpMeta = clusterService.state().metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); List results = new ArrayList<>(); for (String id : ids) { @@ -92,19 +110,54 @@ protected void masterOperation( } else { DatabaseConfigurationMetadata meta = geoIpMeta.getDatabases().get(id); if (meta == null) { - listener.onFailure(new ResourceNotFoundException("database configuration not found: {}", id)); - return; + throw new ResourceNotFoundException("database configuration not found: {}", id); } else { results.add(meta); } } } + return results; + } + + protected void newResponseAsync( + Task task, + GetDatabaseConfigurationAction.Request request, + List results, + List responses, + List failures, + ActionListener listener + ) { + ActionListener.run( + listener, + l -> ActionListener.respondAndRelease( + l, + new GetDatabaseConfigurationAction.Response(results, clusterService.getClusterName(), responses, failures) + ) + ); + } - listener.onResponse(new GetDatabaseConfigurationAction.Response(results)); + @Override + protected GetDatabaseConfigurationAction.Response newResponse( + GetDatabaseConfigurationAction.Request request, + List nodeResponses, + List failures + ) { + throw new UnsupportedOperationException("Use newResponseAsync instead"); } @Override - protected ClusterBlockException checkBlock(GetDatabaseConfigurationAction.Request request, ClusterState state) { - return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_READ); + protected GetDatabaseConfigurationAction.NodeRequest newNodeRequest(GetDatabaseConfigurationAction.Request request) { + return new GetDatabaseConfigurationAction.NodeRequest(request.getDatabaseIds()); } + + @Override + protected GetDatabaseConfigurationAction.NodeResponse newNodeResponse(StreamInput in, DiscoveryNode node) throws IOException { + return new GetDatabaseConfigurationAction.NodeResponse(in); + } + + @Override + protected GetDatabaseConfigurationAction.NodeResponse nodeOperation(GetDatabaseConfigurationAction.NodeRequest request, Task task) { + return new GetDatabaseConfigurationAction.NodeResponse(transportService.getLocalNode(), List.of()); + } + } diff --git a/server/src/main/java/org/elasticsearch/ingest/IngestGeoIpFeatures.java b/server/src/main/java/org/elasticsearch/ingest/IngestGeoIpFeatures.java index 6cadb515e1d2c..1933d285d7870 100644 --- a/server/src/main/java/org/elasticsearch/ingest/IngestGeoIpFeatures.java +++ b/server/src/main/java/org/elasticsearch/ingest/IngestGeoIpFeatures.java @@ -17,7 +17,12 @@ import static org.elasticsearch.ingest.EnterpriseGeoIpTask.GEOIP_DOWNLOADER_DATABASE_CONFIGURATION; public class IngestGeoIpFeatures implements FeatureSpecification { + + public static final NodeFeature GET_DATABASE_CONFIGURATION_ACTION_MULTI_NODE = new NodeFeature( + "get_database_configuration_action.multi_node" + ); + public Set getFeatures() { - return Set.of(GEOIP_DOWNLOADER_DATABASE_CONFIGURATION); + return Set.of(GEOIP_DOWNLOADER_DATABASE_CONFIGURATION, GET_DATABASE_CONFIGURATION_ACTION_MULTI_NODE); } } From 13a34b019fea31b0fbdc1c1a5d44867b3b290ad3 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 24 Sep 2024 13:57:46 -0400 Subject: [PATCH 35/58] ESQL: Speed up CASE for some parameters (#112295) (#113487) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This speeds up the `CASE` function when it has two or three arguments and both of the arguments are constants or fields. This works because `CASE` is lazy so it can avoid warnings in cases like ``` CASE(foo != 0, 2 / foo, 1) ``` And, in the case where the function is *very* slow, it can avoid the computations. But if the lhs and rhs of the `CASE` are constant then there isn't any work to avoid. The performance improvment is pretty substantial: ``` (operation) Before Error After Error Units case_1_lazy 97.422 ± 1.048 101.571 ± 0.737 ns/op case_1_eager 79.312 ± 1.190 4.601 ± 0.049 ns/op ``` The top line is a `CASE` that has to be lazy - it shouldn't change. The 4 nanos change here is noise. The eager version improves by about 94%. --- .../compute/operator/EvalBenchmark.java | 48 ++++++ docs/changelog/112295.yaml | 5 + .../compute/operator/EvalOperator.java | 11 ++ .../xpack/esql/evaluator/EvalMapper.java | 10 ++ .../function/scalar/conditional/Case.java | 138 +++++++++++++++--- .../scalar/conditional/CaseTests.java | 37 ++--- 6 files changed, 210 insertions(+), 39 deletions(-) create mode 100644 docs/changelog/112295.yaml diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java index d785cbeaffc60..9aab4a3e3210f 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java @@ -25,6 +25,7 @@ import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.compute.operator.Operator; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.expression.predicate.regex.RLikePattern; @@ -32,6 +33,7 @@ import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.evaluator.EvalMapper; +import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case; import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateTrunc; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Abs; import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvMin; @@ -53,6 +55,7 @@ import java.time.Duration; import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -91,6 +94,8 @@ public class EvalBenchmark { "abs", "add", "add_double", + "case_1_eager", + "case_1_lazy", "date_trunc", "equal_to_const", "long_equal_to_long", @@ -125,6 +130,18 @@ private static EvalOperator.ExpressionEvaluator evaluator(String operation) { layout(doubleField) ).get(driverContext); } + case "case_1_eager", "case_1_lazy" -> { + FieldAttribute f1 = longField(); + FieldAttribute f2 = longField(); + Expression condition = new Equals(Source.EMPTY, f1, new Literal(Source.EMPTY, 1L, DataType.LONG)); + Expression lhs = f1; + Expression rhs = f2; + if (operation.endsWith("lazy")) { + lhs = new Add(Source.EMPTY, lhs, new Literal(Source.EMPTY, 1L, DataType.LONG)); + rhs = new Add(Source.EMPTY, rhs, new Literal(Source.EMPTY, 1L, DataType.LONG)); + } + yield EvalMapper.toEvaluator(new Case(Source.EMPTY, condition, List.of(lhs, rhs)), layout(f1, f2)).get(driverContext); + } case "date_trunc" -> { FieldAttribute timestamp = new FieldAttribute( Source.EMPTY, @@ -216,6 +233,28 @@ private static void checkExpected(String operation, Page actual) { } } } + case "case_1_eager" -> { + LongVector f1 = actual.getBlock(0).asVector(); + LongVector f2 = actual.getBlock(1).asVector(); + LongVector result = actual.getBlock(2).asVector(); + for (int i = 0; i < BLOCK_LENGTH; i++) { + long expected = f1.getLong(i) == 1 ? f1.getLong(i) : f2.getLong(i); + if (result.getLong(i) != expected) { + throw new AssertionError("[" + operation + "] expected [" + expected + "] but was [" + result.getLong(i) + "]"); + } + } + } + case "case_1_lazy" -> { + LongVector f1 = actual.getBlock(0).asVector(); + LongVector f2 = actual.getBlock(1).asVector(); + LongVector result = actual.getBlock(2).asVector(); + for (int i = 0; i < BLOCK_LENGTH; i++) { + long expected = 1 + (f1.getLong(i) == 1 ? f1.getLong(i) : f2.getLong(i)); + if (result.getLong(i) != expected) { + throw new AssertionError("[" + operation + "] expected [" + expected + "] but was [" + result.getLong(i) + "]"); + } + } + } case "date_trunc" -> { LongVector v = actual.getBlock(1).asVector(); long oneDay = TimeValue.timeValueHours(24).millis(); @@ -280,6 +319,15 @@ private static Page page(String operation) { } yield new Page(builder.build()); } + case "case_1_eager", "case_1_lazy" -> { + var f1 = blockFactory.newLongBlockBuilder(BLOCK_LENGTH); + var f2 = blockFactory.newLongBlockBuilder(BLOCK_LENGTH); + for (int i = 0; i < BLOCK_LENGTH; i++) { + f1.appendLong(i); + f2.appendLong(-i); + } + yield new Page(f1.build(), f2.build()); + } case "long_equal_to_long" -> { var lhs = blockFactory.newLongBlockBuilder(BLOCK_LENGTH); var rhs = blockFactory.newLongBlockBuilder(BLOCK_LENGTH); diff --git a/docs/changelog/112295.yaml b/docs/changelog/112295.yaml new file mode 100644 index 0000000000000..ecbd365d03918 --- /dev/null +++ b/docs/changelog/112295.yaml @@ -0,0 +1,5 @@ +pr: 112295 +summary: "ESQL: Speed up CASE for some parameters" +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/EvalOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/EvalOperator.java index 10f23ed29094f..349ce7b00ff10 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/EvalOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/EvalOperator.java @@ -63,6 +63,17 @@ public interface ExpressionEvaluator extends Releasable { /** A Factory for creating ExpressionEvaluators. */ interface Factory { ExpressionEvaluator get(DriverContext context); + + /** + * {@code true} if it is safe and fast to evaluate this expression eagerly + * in {@link ExpressionEvaluator}s that need to be lazy, like {@code CASE}. + * This defaults to {@code false}, but expressions + * that evaluate quickly and can not produce warnings may override this to + * {@code true} to get a significant speed-up in {@code CASE}-like operations. + */ + default boolean eagerEvalSafeInLazy() { + return false; + } } /** diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java index d36ab3e18f336..9a2e9398f52fd 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/evaluator/EvalMapper.java @@ -176,6 +176,11 @@ public ExpressionEvaluator get(DriverContext driverContext) { public String toString() { return "Attribute[channel=" + channel + "]"; } + + @Override + public boolean eagerEvalSafeInLazy() { + return true; + } } return new AttributeFactory(layout.get(attr.id()).channel()); } @@ -209,6 +214,11 @@ public ExpressionEvaluator get(DriverContext driverContext) { public String toString() { return "LiteralsEvaluator[lit=" + lit + "]"; } + + @Override + public boolean eagerEvalSafeInLazy() { + return true; + } } return new LiteralsEvaluatorFactory(lit); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Case.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Case.java index 979f681a7fbd0..6acb8ea974ed0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Case.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/Case.java @@ -15,6 +15,7 @@ import org.elasticsearch.compute.data.BooleanBlock; import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.data.ToMask; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; @@ -311,25 +312,16 @@ private Expression finishPartialFold(List newChildren) { @Override public ExpressionEvaluator.Factory toEvaluator(Function toEvaluator) { - ElementType resultType = PlannerUtils.toElementType(dataType()); List conditionsFactories = conditions.stream().map(c -> c.toEvaluator(toEvaluator)).toList(); ExpressionEvaluator.Factory elseValueFactory = toEvaluator.apply(elseValue); - return new ExpressionEvaluator.Factory() { - @Override - public ExpressionEvaluator get(DriverContext context) { - return new CaseEvaluator( - context.blockFactory(), - resultType, - conditionsFactories.stream().map(x -> x.apply(context)).toList(), - elseValueFactory.get(context) - ); - } + ElementType resultType = PlannerUtils.toElementType(dataType()); - @Override - public String toString() { - return "CaseEvaluator[conditions=" + conditionsFactories + ", elseVal=" + elseValueFactory + ']'; - } - }; + if (conditionsFactories.size() == 1 + && conditionsFactories.get(0).value.eagerEvalSafeInLazy() + && elseValueFactory.eagerEvalSafeInLazy()) { + return new CaseEagerEvaluatorFactory(resultType, conditionsFactories.get(0), elseValueFactory); + } + return new CaseLazyEvaluatorFactory(resultType, conditionsFactories, elseValueFactory); } record ConditionEvaluatorSupplier(Source conditionSource, ExpressionEvaluator.Factory condition, ExpressionEvaluator.Factory value) @@ -375,9 +367,42 @@ public void close() { public String toString() { return "ConditionEvaluator[condition=" + condition + ", value=" + value + ']'; } + + public void registerMultivalue() { + conditionWarnings.registerException(new IllegalArgumentException("CASE expects a single-valued boolean")); + } } - private record CaseEvaluator( + private record CaseLazyEvaluatorFactory( + ElementType resultType, + List conditionsFactories, + ExpressionEvaluator.Factory elseValueFactory + ) implements ExpressionEvaluator.Factory { + @Override + public ExpressionEvaluator get(DriverContext context) { + List conditions = new ArrayList<>(conditionsFactories.size()); + ExpressionEvaluator elseValue = null; + try { + for (ConditionEvaluatorSupplier cond : conditionsFactories) { + conditions.add(cond.apply(context)); + } + elseValue = elseValueFactory.get(context); + ExpressionEvaluator result = new CaseLazyEvaluator(context.blockFactory(), resultType, conditions, elseValue); + conditions = null; + elseValue = null; + return result; + } finally { + Releasables.close(conditions == null ? () -> {} : Releasables.wrap(conditions), elseValue); + } + } + + @Override + public String toString() { + return "CaseLazyEvaluator[conditions=" + conditionsFactories + ", elseVal=" + elseValueFactory + ']'; + } + } + + private record CaseLazyEvaluator( BlockFactory blockFactory, ElementType resultType, List conditions, @@ -409,9 +434,7 @@ public Block eval(Page page) { continue; } if (b.getValueCount(0) > 1) { - condition.conditionWarnings.registerException( - new IllegalArgumentException("CASE expects a single-valued boolean") - ); + condition.registerMultivalue(); continue; } if (false == b.getBoolean(b.getFirstValueIndex(0))) { @@ -439,7 +462,80 @@ public void close() { @Override public String toString() { - return "CaseEvaluator[conditions=" + conditions + ", elseVal=" + elseVal + ']'; + return "CaseLazyEvaluator[conditions=" + conditions + ", elseVal=" + elseVal + ']'; + } + } + + private record CaseEagerEvaluatorFactory( + ElementType resultType, + ConditionEvaluatorSupplier conditionFactory, + ExpressionEvaluator.Factory elseValueFactory + ) implements ExpressionEvaluator.Factory { + @Override + public ExpressionEvaluator get(DriverContext context) { + ConditionEvaluator conditionEvaluator = conditionFactory.apply(context); + ExpressionEvaluator elseValue = null; + try { + elseValue = elseValueFactory.get(context); + ExpressionEvaluator result = new CaseEagerEvaluator(resultType, context.blockFactory(), conditionEvaluator, elseValue); + conditionEvaluator = null; + elseValue = null; + return result; + } finally { + Releasables.close(conditionEvaluator, elseValue); + } + } + + @Override + public String toString() { + return "CaseEagerEvaluator[conditions=[" + conditionFactory + "], elseVal=" + elseValueFactory + ']'; + } + } + + private record CaseEagerEvaluator( + ElementType resultType, + BlockFactory blockFactory, + ConditionEvaluator condition, + EvalOperator.ExpressionEvaluator elseVal + ) implements EvalOperator.ExpressionEvaluator { + @Override + public Block eval(Page page) { + try (BooleanBlock lhsOrRhsBlock = (BooleanBlock) condition.condition.eval(page); ToMask lhsOrRhs = lhsOrRhsBlock.toMask()) { + if (lhsOrRhs.hadMultivaluedFields()) { + condition.registerMultivalue(); + } + if (lhsOrRhs.mask().isConstant()) { + if (lhsOrRhs.mask().getBoolean(0)) { + return condition.value.eval(page); + } else { + return elseVal.eval(page); + } + } + try ( + Block lhs = condition.value.eval(page); + Block rhs = elseVal.eval(page); + Block.Builder builder = resultType.newBlockBuilder(lhs.getTotalValueCount(), blockFactory) + ) { + for (int p = 0; p < lhs.getPositionCount(); p++) { + if (lhsOrRhs.mask().getBoolean(p)) { + builder.copyFrom(lhs, p, p + 1); + } else { + builder.copyFrom(rhs, p, p + 1); + } + } + return builder.build(); + } + } + } + + @Override + public void close() { + Releasables.closeExpectNoException(condition, elseVal); + } + + @Override + public String toString() { + return "CaseEagerEvaluator[conditions=[" + condition + "], elseVal=" + elseVal + ']'; } } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java index 616e70191ee22..9d0d9c3da30a8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java @@ -123,9 +123,7 @@ public static Iterable parameters() { ) ); } - return - - parameterSuppliersFromTypedData(suppliers); + return parameterSuppliersFromTypedData(suppliers); } private static void twoAndThreeArgs( @@ -191,7 +189,7 @@ private static void twoAndThreeArgs( type, typedData, lhs, - startsWith("CaseEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator"), + startsWith("CaseEagerEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator"), false, List.of(typedData.get(1)), addBuildEvaluatorWarnings(warnings) @@ -240,7 +238,7 @@ private static void twoAndThreeArgs( type, typedData, lhsOrRhs ? lhs : rhs, - startsWith("CaseEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator"), + startsWith("CaseEagerEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator"), false, List.of(typedData.get(lhsOrRhs ? 1 : 2)), addWarnings(warnings) @@ -262,7 +260,7 @@ private static void twoAndThreeArgs( type, typedData, lhsOrRhs ? lhs : null, - startsWith("CaseEvaluator[conditions=[ConditionEvaluator[condition="), + startsWith("CaseEagerEvaluator[conditions=[ConditionEvaluator[condition="), false, null, addWarnings(warnings) @@ -285,7 +283,7 @@ private static void twoAndThreeArgs( type, typedData, lhsOrRhs ? lhs : rhs, - startsWith("CaseEvaluator[conditions=[ConditionEvaluator[condition="), + startsWith("CaseEagerEvaluator[conditions=[ConditionEvaluator[condition="), false, null, addWarnings(warnings) @@ -296,7 +294,7 @@ private static void twoAndThreeArgs( } suppliers.add( new TestCaseSupplier( - TestCaseSupplier.nameFrom(Arrays.asList(DataType.BOOLEAN, DataType.NULL, type)), + TestCaseSupplier.nameFrom(Arrays.asList(cond, DataType.NULL, type)), List.of(DataType.BOOLEAN, DataType.NULL, type), () -> { Object rhs = randomLiteral(type).value(); @@ -309,7 +307,7 @@ private static void twoAndThreeArgs( type, typedData, lhsOrRhs ? null : rhs, - startsWith("CaseEvaluator[conditions=[ConditionEvaluator[condition="), + startsWith("CaseEagerEvaluator[conditions=[ConditionEvaluator[condition="), false, null, addWarnings(warnings) @@ -319,7 +317,7 @@ private static void twoAndThreeArgs( ); suppliers.add( new TestCaseSupplier( - TestCaseSupplier.nameFrom(Arrays.asList(DataType.BOOLEAN, type, DataType.NULL)), + TestCaseSupplier.nameFrom(Arrays.asList(cond, type, DataType.NULL)), List.of(DataType.BOOLEAN, type, DataType.NULL), () -> { Object lhs = randomLiteral(type).value(); @@ -332,7 +330,7 @@ private static void twoAndThreeArgs( type, typedData, lhsOrRhs ? lhs : null, - startsWith("CaseEvaluator[conditions=[ConditionEvaluator[condition="), + startsWith("CaseEagerEvaluator[conditions=[ConditionEvaluator[condition="), false, null, addWarnings(warnings) @@ -445,7 +443,7 @@ private static void fourAndFiveArgs( type, typedData, r1, - startsWith("CaseEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator[lit="), + startsWith("CaseLazyEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator[lit="), false, List.of(typedData.get(1)), addBuildEvaluatorWarnings(warnings) @@ -501,7 +499,7 @@ private static void fourAndFiveArgs( type, typedData, r2, - startsWith("CaseEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator[lit="), + startsWith("CaseLazyEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator[lit="), false, List.of(typedData.get(3)), addWarnings(warnings) @@ -526,7 +524,7 @@ private static void fourAndFiveArgs( type, typedData, r2, - startsWith("CaseEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator[lit="), + startsWith("CaseLazyEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator[lit="), false, List.of(typedData.get(3)), addWarnings(warnings) @@ -551,7 +549,7 @@ private static void fourAndFiveArgs( type, typedData, r2, - startsWith("CaseEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator[lit="), + startsWith("CaseLazyEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator[lit="), false, typedData.subList(2, 4), addWarnings(warnings) @@ -607,7 +605,7 @@ private static void fourAndFiveArgs( type, typedData, r3, - startsWith("CaseEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator[lit="), + startsWith("CaseLazyEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator[lit="), false, List.of(typedData.get(4)), addWarnings(warnings) @@ -634,7 +632,7 @@ private static void fourAndFiveArgs( type, typedData, r3, - startsWith("CaseEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator[lit="), + startsWith("CaseLazyEvaluator[conditions=[ConditionEvaluator[condition=LiteralsEvaluator[lit="), false, typedData.subList(2, 5), addWarnings(warnings) @@ -648,7 +646,10 @@ private static void fourAndFiveArgs( } private static Matcher toStringMatcher(int conditions, boolean trailingNull) { - StringBuilder result = new StringBuilder("CaseEvaluator[conditions=["); + StringBuilder result = new StringBuilder(); + result.append("Case"); + result.append(conditions == 1 ? "Eager" : "Lazy"); + result.append("Evaluator[conditions=["); int channel = 0; for (int i = 0; i < conditions; i++) { if (i != 0) { From c8b720ed200408e62ae2bf4da84600071e6cce2e Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Tue, 24 Sep 2024 14:24:45 -0600 Subject: [PATCH 36/58] Improve date expression/remote handling in index names (#112405) (#113490) * Improve date expression/remote handling The original code did not account for the possibility of the date expression being prefixed with -. --- docs/changelog/112405.yaml | 6 + .../reindex/ReindexValidator.java | 2 + .../search/ccs/CrossClusterSearchIT.java | 160 ++++++++++++++---- .../resolve/ResolveClusterActionRequest.java | 5 +- .../transport/RemoteClusterAware.java | 9 +- .../transport/RemoteClusterAwareTests.java | 146 ++++++++++++++++ 6 files changed, 292 insertions(+), 36 deletions(-) create mode 100644 docs/changelog/112405.yaml diff --git a/docs/changelog/112405.yaml b/docs/changelog/112405.yaml new file mode 100644 index 0000000000000..4e9f095fb80a8 --- /dev/null +++ b/docs/changelog/112405.yaml @@ -0,0 +1,6 @@ +pr: 112405 +summary: Improve date expression/remote handling in index names +area: Search +type: bug +issues: + - 112243 diff --git a/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexValidator.java b/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexValidator.java index 35fd8e98bfb50..4d18f00ab572d 100644 --- a/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexValidator.java +++ b/modules/reindex/src/main/java/org/elasticsearch/reindex/ReindexValidator.java @@ -168,6 +168,8 @@ private static boolean isRemoteExpression(String expression) { // to distinguish between those two, given `expression` is pre-evaluated using date-math resolver // after evaluation date-math `expression` should not contain ':' symbol // otherwise if `expression` is legit remote name, ':' symbol remains + // NOTE: index expressions can be prefixed with "-", which will not be parsed by resolveDateMathExpression, + // but in this particular case it doesn't seem to be relevant. return IndexNameExpressionResolver.resolveDateMathExpression(expression) .contains(String.valueOf(RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR)); } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CrossClusterSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CrossClusterSearchIT.java index 95cb4783e6b43..223ee81e84a92 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CrossClusterSearchIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CrossClusterSearchIT.java @@ -40,11 +40,13 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -662,6 +664,97 @@ public void testRemoteClusterOnlyCCSWithFailuresOnAllShards() throws Exception { } } + public void testDateMathIndexes() throws ExecutionException, InterruptedException { + Map testClusterInfo = setupTwoClusters( + new String[] { "datemath-2001-01-01-14", "datemath-2001-01-01-15" }, + new String[] { "remotemath-2001-01-01-14", "remotemath-2001-01-01-15" } + ); + SearchRequest searchRequest = new SearchRequest( + REMOTE_CLUSTER + ":", + "" + ); + searchRequest.setCcsMinimizeRoundtrips(randomBoolean()); + searchRequest.allowPartialSearchResults(false); + searchRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).size(5000)); + assertResponse(client(LOCAL_CLUSTER).search(searchRequest), response -> { + assertNotNull(response); + Clusters clusters = response.getClusters(); + assertFalse("search cluster results should NOT be marked as partial", clusters.hasPartialResults()); + assertThat(clusters.getTotal(), equalTo(2)); + Cluster localClusterSearchInfo = clusters.getCluster(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY); + assertNotNull(localClusterSearchInfo); + Cluster remoteClusterSearchInfo = clusters.getCluster(REMOTE_CLUSTER); + assertNotNull(remoteClusterSearchInfo); + assertThat(Objects.requireNonNull(response.getHits().getTotalHits()).value, greaterThan(2L)); + for (var hit : response.getHits()) { + assertThat(hit.getIndex(), anyOf(equalTo("datemath-2001-01-01-14"), equalTo("remotemath-2001-01-01-14"))); + } + }); + } + + /** + * Test for issue https://github.com/elastic/elasticsearch/issues/112243 + */ + public void testDateMathNegativeIndexesLocal() throws ExecutionException, InterruptedException { + Map testClusterInfo = setupTwoClusters( + new String[] { "datemath-2001-01-01-14", "datemath-2001-01-01-15" }, + new String[] { "datemath-2001-01-01-14", "datemath-2001-01-01-15" } + ); + SearchRequest searchRequest = new SearchRequest("da*", "-"); + searchRequest.allowPartialSearchResults(false); + searchRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).size(5000)); + assertResponse(client(LOCAL_CLUSTER).search(searchRequest), response -> { + assertNotNull(response); + Clusters clusters = response.getClusters(); + assertFalse("search cluster results should NOT be marked as partial", clusters.hasPartialResults()); + assertThat(clusters.getTotal(), equalTo(0)); + for (var hit : response.getHits()) { + assertThat(hit.getIndex(), equalTo("datemath-2001-01-01-15")); + } + }); + } + + /** + * Test for issue https://github.com/elastic/elasticsearch/issues/112243 + */ + public void testDateMathNegativeIndexesRemote() throws ExecutionException, InterruptedException { + Map testClusterInfo = setupTwoClusters( + new String[] { "datemath-2001-01-01-14", "datemath-2001-01-01-15" }, + new String[] { "datemath-2001-01-01-14", "datemath-2001-01-01-15" } + ); + SearchRequest searchRequest = new SearchRequest( + REMOTE_CLUSTER + ":*", + REMOTE_CLUSTER + ":-" + ); + searchRequest.setCcsMinimizeRoundtrips(randomBoolean()); + searchRequest.allowPartialSearchResults(false); + searchRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).size(5000)); + assertResponse(client(LOCAL_CLUSTER).search(searchRequest), response -> { + assertNotNull(response); + Clusters clusters = response.getClusters(); + assertFalse("search cluster results should NOT be marked as partial", clusters.hasPartialResults()); + assertThat(clusters.getTotal(), equalTo(1)); + Cluster localClusterSearchInfo = clusters.getCluster(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY); + assertNull(localClusterSearchInfo); + Cluster remoteClusterSearchInfo = clusters.getCluster(REMOTE_CLUSTER); + assertNotNull(remoteClusterSearchInfo); + for (var hit : response.getHits()) { + assertThat(hit.getIndex(), equalTo("datemath-2001-01-01-15")); + } + }); + } + + public void testNegativeRemoteIndexNameThrows() { + SearchRequest searchRequest = new SearchRequest("*:*", "-" + REMOTE_CLUSTER + ":prod"); + searchRequest.setCcsMinimizeRoundtrips(true); + searchRequest.allowPartialSearchResults(false); + searchRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).size(5000)); + var queryFuture = client(LOCAL_CLUSTER).search(searchRequest); + // This should throw the wildcard error + ExecutionException ee = expectThrows(ExecutionException.class, queryFuture::get); + assertNotNull(ee.getCause()); + } + private static void assertOneFailedShard(Cluster cluster, int totalShards) { assertNotNull(cluster); assertThat(cluster.getStatus(), equalTo(Cluster.Status.PARTIAL)); @@ -675,40 +768,42 @@ private static void assertOneFailedShard(Cluster cluster, int totalShards) { assertTrue("should have 'index corrupted' in reason", remoteShardSearchFailure.reason().contains("index corrupted")); } - private Map setupTwoClusters() { - String localIndex = "demo"; + private Map setupTwoClusters(String[] localIndices, String[] remoteIndices) { int numShardsLocal = randomIntBetween(2, 10); Settings localSettings = indexSettings(numShardsLocal, randomIntBetween(0, 1)).build(); - assertAcked( - client(LOCAL_CLUSTER).admin() - .indices() - .prepareCreate(localIndex) - .setSettings(localSettings) - .setMapping("@timestamp", "type=date", "f", "type=text") - ); - indexDocs(client(LOCAL_CLUSTER), localIndex); + for (String localIndex : localIndices) { + assertAcked( + client(LOCAL_CLUSTER).admin() + .indices() + .prepareCreate(localIndex) + .setSettings(localSettings) + .setMapping("@timestamp", "type=date", "f", "type=text") + ); + indexDocs(client(LOCAL_CLUSTER), localIndex); + } - String remoteIndex = "prod"; int numShardsRemote = randomIntBetween(2, 10); final InternalTestCluster remoteCluster = cluster(REMOTE_CLUSTER); remoteCluster.ensureAtLeastNumDataNodes(randomIntBetween(1, 3)); - assertAcked( - client(REMOTE_CLUSTER).admin() - .indices() - .prepareCreate(remoteIndex) - .setSettings(indexSettings(numShardsRemote, randomIntBetween(0, 1))) - .setMapping("@timestamp", "type=date", "f", "type=text") - ); - assertFalse( - client(REMOTE_CLUSTER).admin() - .cluster() - .prepareHealth(TEST_REQUEST_TIMEOUT, remoteIndex) - .setWaitForYellowStatus() - .setTimeout(TimeValue.timeValueSeconds(10)) - .get() - .isTimedOut() - ); - indexDocs(client(REMOTE_CLUSTER), remoteIndex); + for (String remoteIndex : remoteIndices) { + assertAcked( + client(REMOTE_CLUSTER).admin() + .indices() + .prepareCreate(remoteIndex) + .setSettings(indexSettings(numShardsRemote, randomIntBetween(0, 1))) + .setMapping("@timestamp", "type=date", "f", "type=text") + ); + assertFalse( + client(REMOTE_CLUSTER).admin() + .cluster() + .prepareHealth(TEST_REQUEST_TIMEOUT, remoteIndex) + .setWaitForYellowStatus() + .setTimeout(TimeValue.timeValueSeconds(10)) + .get() + .isTimedOut() + ); + indexDocs(client(REMOTE_CLUSTER), remoteIndex); + } String skipUnavailableKey = Strings.format("cluster.remote.%s.skip_unavailable", REMOTE_CLUSTER); Setting skipUnavailableSetting = cluster(REMOTE_CLUSTER).clusterService().getClusterSettings().get(skipUnavailableKey); @@ -718,13 +813,18 @@ private Map setupTwoClusters() { Map clusterInfo = new HashMap<>(); clusterInfo.put("local.num_shards", numShardsLocal); - clusterInfo.put("local.index", localIndex); clusterInfo.put("remote.num_shards", numShardsRemote); - clusterInfo.put("remote.index", remoteIndex); clusterInfo.put("remote.skip_unavailable", skipUnavailable); return clusterInfo; } + private Map setupTwoClusters() { + var clusterInfo = setupTwoClusters(new String[] { "demo" }, new String[] { "prod" }); + clusterInfo.put("local.index", "demo"); + clusterInfo.put("remote.index", "prod"); + return clusterInfo; + } + private int indexDocs(Client client, String index) { int numDocs = between(500, 1200); for (int i = 0; i < numDocs; i++) { diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveClusterActionRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveClusterActionRequest.java index e3db5146370ea..dbcece1eb4364 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveClusterActionRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/ResolveClusterActionRequest.java @@ -168,7 +168,10 @@ boolean localIndicesPresent(String[] indices) { for (String index : indices) { // ensure that `index` is a remote name and not a date math expression which includes ':' symbol // since date math expression after evaluation should not contain ':' symbol - String indexExpression = IndexNameExpressionResolver.resolveDateMathExpression(index); + // NOTE: index expressions can be prefixed with "-" for index exclusion, which will not be parsed by resolveDateMathExpression + String indexExpression = IndexNameExpressionResolver.resolveDateMathExpression( + index.charAt(0) == '-' ? index.substring(1) : index + ); if (indexExpression.indexOf(RemoteClusterService.REMOTE_CLUSTER_INDEX_SEPARATOR) < 0) { return true; } diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterAware.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterAware.java index 335a045443cb5..76b93a2f802ec 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterAware.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterAware.java @@ -10,7 +10,6 @@ package org.elasticsearch.transport; import org.elasticsearch.cluster.metadata.ClusterNameExpressionResolver; -import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Setting; @@ -77,10 +76,10 @@ protected Map> groupClusterIndices(Set remoteCluste Set clustersToRemove = new HashSet<>(); for (String index : requestIndices) { // ensure that `index` is a remote name and not a datemath expression which includes ':' symbol - // since datemath expression after evaluation should not contain ':' symbol - String probe = IndexNameExpressionResolver.resolveDateMathExpression(index); - int i = probe.indexOf(RemoteClusterService.REMOTE_CLUSTER_INDEX_SEPARATOR); - if (i >= 0) { + // Remote names can not start with '<' so we are assuming that if the first character is '<' then it is a datemath expression. + boolean isDateMathExpression = (index.charAt(0) == '<' || index.startsWith("-<")); + int i = index.indexOf(RemoteClusterService.REMOTE_CLUSTER_INDEX_SEPARATOR); + if (isDateMathExpression == false && i >= 0) { if (isRemoteClusterClientEnabled == false) { assert remoteClusterNames.isEmpty() : remoteClusterNames; throw new IllegalArgumentException("node [" + nodeName + "] does not have the remote cluster client role enabled"); diff --git a/server/src/test/java/org/elasticsearch/transport/RemoteClusterAwareTests.java b/server/src/test/java/org/elasticsearch/transport/RemoteClusterAwareTests.java index 11d99b7712a8c..169f6d8060020 100644 --- a/server/src/test/java/org/elasticsearch/transport/RemoteClusterAwareTests.java +++ b/server/src/test/java/org/elasticsearch/transport/RemoteClusterAwareTests.java @@ -9,8 +9,19 @@ package org.elasticsearch.transport; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESTestCase; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasKey; +import static org.hamcrest.Matchers.not; + public class RemoteClusterAwareTests extends ESTestCase { public void testBuildRemoteIndexName() { @@ -27,4 +38,139 @@ public void testBuildRemoteIndexName() { assertEquals(index, remoteIndexName); } } + + public void testGroupClusterIndices() { + RemoteClusterAwareTest remoteClusterAware = new RemoteClusterAwareTest(); + Set remoteClusterNames = Set.of("cluster1", "cluster2", "some-cluster3"); + String[] requestIndices = new String[] { + "index1", + "index2", + "*", + "-index3", + "cluster1:index2", + "cluster2:*", + "cluster*:index1", + "", + "cluster1:", + "-", + "cluster2:-", }; + + Map> groupedIndices = remoteClusterAware.groupClusterIndices(remoteClusterNames, requestIndices); + assertEquals(3, groupedIndices.size()); + assertThat(groupedIndices, hasKey(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY)); + assertThat(groupedIndices, hasKey("cluster1")); + assertThat(groupedIndices, hasKey("cluster2")); + assertThat(groupedIndices, not(hasKey("some-cluster3"))); + + assertThat(groupedIndices.get(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY).size(), equalTo(6)); + assertThat( + groupedIndices.get(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY), + containsInAnyOrder( + "index1", + "index2", + "*", + "-index3", + "", + "-" + ) + ); + + assertThat(groupedIndices.get("cluster1").size(), equalTo(3)); + assertThat( + groupedIndices.get("cluster1"), + containsInAnyOrder("index2", "index1", "") + ); + + assertThat(groupedIndices.get("cluster2").size(), equalTo(3)); + assertThat( + groupedIndices.get("cluster2"), + containsInAnyOrder("*", "index1", "-") + ); + } + + public void testGroupClusterIndicesWildcard() { + RemoteClusterAwareTest remoteClusterAware = new RemoteClusterAwareTest(); + Set remoteClusterNames = Set.of("cluster1", "cluster2", "some-cluster3"); + String[] requestIndices = new String[] { + "*", + "*:*", + "cluster2*:index*", + "cluster*:index1", + "-some-*:*", + "-index*", + "cluster*:-noindex" }; + + Map> groupedIndices = remoteClusterAware.groupClusterIndices(remoteClusterNames, requestIndices); + assertEquals(3, groupedIndices.size()); + assertThat(groupedIndices, hasKey(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY)); + assertThat(groupedIndices, hasKey("cluster1")); + assertThat(groupedIndices, hasKey("cluster2")); + assertThat(groupedIndices, not(hasKey("some-cluster3"))); + + assertThat(groupedIndices.get(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY).size(), equalTo(2)); + assertThat(groupedIndices.get(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY), containsInAnyOrder("*", "-index*")); + + assertThat(groupedIndices.get("cluster1").size(), equalTo(3)); + assertThat(groupedIndices.get("cluster1"), containsInAnyOrder("*", "index1", "-noindex")); + + assertThat(groupedIndices.get("cluster2").size(), equalTo(4)); + assertThat(groupedIndices.get("cluster2"), containsInAnyOrder("*", "index*", "index1", "-noindex")); + } + + private static void mustThrowException(String[] requestIndices, Class expectedType, String expectedMessage) { + RemoteClusterAwareTest remoteClusterAware = new RemoteClusterAwareTest(); + Set remoteClusterNames = Set.of("cluster1", "cluster2", "some-cluster3"); + assertThat( + expectThrows(expectedType, () -> remoteClusterAware.groupClusterIndices(remoteClusterNames, requestIndices)).getMessage(), + containsString(expectedMessage) + ); + } + + public void testGroupClusterIndicesFail() { + RemoteClusterAwareTest remoteClusterAware = new RemoteClusterAwareTest(); + Set remoteClusterNames = Set.of("cluster1", "cluster2", "some-cluster3"); + + mustThrowException(new String[] { ":foo" }, NoSuchRemoteClusterException.class, "no such remote cluster"); + mustThrowException(new String[] { "notacluster:foo" }, NoSuchRemoteClusterException.class, "no such remote cluster"); + // Cluster wildcard exclusion requires :* + mustThrowException( + new String[] { "*:*", "-cluster*:index1" }, + IllegalArgumentException.class, + "To exclude a cluster you must specify the '*' wildcard" + ); + mustThrowException( + new String[] { "*:*", "-cluster2:index1" }, + IllegalArgumentException.class, + "To exclude a cluster you must specify the '*' wildcard" + ); + // Excluding a cluster that we didn't previously include + mustThrowException( + new String[] { "cluster1:*", "-some*:*" }, + IllegalArgumentException.class, + "not included in the list of clusters to be included" + ); + // Excluded all clusters + mustThrowException( + new String[] { "*:index1", "-some*:*", "-cluster*:*" }, + IllegalArgumentException.class, + "The '-' exclusions in the index expression list excludes all indexes" + ); + + } + + private static class RemoteClusterAwareTest extends RemoteClusterAware { + RemoteClusterAwareTest() { + super(Settings.EMPTY); + } + + @Override + protected void updateRemoteCluster(String clusterAlias, Settings settings) { + + } + + @Override + public Map> groupClusterIndices(Set remoteClusterNames, String[] requestIndices) { + return super.groupClusterIndices(remoteClusterNames, requestIndices); + } + } } From ce0681225bd316e4e3c4cf45f72fd446eb635430 Mon Sep 17 00:00:00 2001 From: Sam Xiao Date: Tue, 24 Sep 2024 16:53:11 -0400 Subject: [PATCH 37/58] ILM: Add total_shards_per_node setting to searchable snapshot (#112972) (#113493) Allows setting index total_shards_per_node in the SearchableSnapshot action of ILM to remediate hot spot in shard allocation for searchable snapshot index. Closes #112261 --- docs/changelog/112972.yaml | 6 ++ .../actions/ilm-searchable-snapshot.asciidoc | 5 +- .../org/elasticsearch/TransportVersions.java | 2 + .../xpack/core/ilm/MountSnapshotStep.java | 60 ++++++++++---- .../core/ilm/SearchableSnapshotAction.java | 42 ++++++++-- .../xpack/core/ilm/LifecyclePolicyTests.java | 6 +- .../core/ilm/MountSnapshotStepTests.java | 82 +++++++++++++++++-- .../ilm/SearchableSnapshotActionTests.java | 24 +++++- .../actions/SearchableSnapshotActionIT.java | 56 +++++++++++++ 9 files changed, 254 insertions(+), 29 deletions(-) create mode 100644 docs/changelog/112972.yaml diff --git a/docs/changelog/112972.yaml b/docs/changelog/112972.yaml new file mode 100644 index 0000000000000..5332ac13fd13f --- /dev/null +++ b/docs/changelog/112972.yaml @@ -0,0 +1,6 @@ +pr: 112972 +summary: "ILM: Add `total_shards_per_node` setting to searchable snapshot" +area: ILM+SLM +type: enhancement +issues: + - 112261 diff --git a/docs/reference/ilm/actions/ilm-searchable-snapshot.asciidoc b/docs/reference/ilm/actions/ilm-searchable-snapshot.asciidoc index 4ba4782174bef..73a77bef09bde 100644 --- a/docs/reference/ilm/actions/ilm-searchable-snapshot.asciidoc +++ b/docs/reference/ilm/actions/ilm-searchable-snapshot.asciidoc @@ -19,7 +19,7 @@ index>> prefixed with `partial-` to the frozen tier. In other phases, the action In the frozen tier, the action will ignore the setting <>, if it was present in the original index, -to account for the difference in the number of nodes between the frozen and the other tiers. +to account for the difference in the number of nodes between the frozen and the other tiers. To set <> for searchable snapshots, set the `total_shards_per_node` option in the frozen phase's `searchable_snapshot` action within the ILM policy. WARNING: Don't include the `searchable_snapshot` action in both the hot and cold @@ -74,6 +74,9 @@ will be performed on the hot nodes. If using a `searchable_snapshot` action in t force merge will be performed on whatever tier the index is *prior* to the `cold` phase (either `hot` or `warm`). +`total_shards_per_node`:: +The maximum number of shards (replicas and primaries) that will be allocated to a single node for the searchable snapshot index. Defaults to unbounded. + [[ilm-searchable-snapshot-ex]] ==== Examples //// diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index f1079e0cc4974..49b1f58d723bb 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -221,6 +221,8 @@ static TransportVersion def(int id) { public static final TransportVersion BULK_INCREMENTAL_STATE = def(8_745_00_0); public static final TransportVersion FAILURE_STORE_STATUS_IN_INDEX_RESPONSE = def(8_746_00_0); public static final TransportVersion ESQL_AGGREGATION_OPERATOR_STATUS_FINISH_NANOS = def(8_747_00_0); + public static final TransportVersion ML_TELEMETRY_MEMORY_ADDED = def(8_748_00_0); + public static final TransportVersion ILM_ADD_SEARCHABLE_SNAPSHOT_TOTAL_SHARDS_PER_NODE = def(8_749_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStep.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStep.java index aac4d74144e95..7d045f2950e1b 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStep.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStep.java @@ -18,11 +18,13 @@ import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xpack.core.searchablesnapshots.MountSearchableSnapshotAction; import org.elasticsearch.xpack.core.searchablesnapshots.MountSearchableSnapshotRequest; +import java.util.ArrayList; import java.util.Objects; import java.util.Optional; @@ -37,17 +39,34 @@ public class MountSnapshotStep extends AsyncRetryDuringSnapshotActionStep { private final String restoredIndexPrefix; private final MountSearchableSnapshotRequest.Storage storageType; + @Nullable + private final Integer totalShardsPerNode; public MountSnapshotStep( StepKey key, StepKey nextStepKey, Client client, String restoredIndexPrefix, - MountSearchableSnapshotRequest.Storage storageType + MountSearchableSnapshotRequest.Storage storageType, + @Nullable Integer totalShardsPerNode ) { super(key, nextStepKey, client); this.restoredIndexPrefix = restoredIndexPrefix; this.storageType = Objects.requireNonNull(storageType, "a storage type must be specified"); + if (totalShardsPerNode != null && totalShardsPerNode < 1) { + throw new IllegalArgumentException("[" + SearchableSnapshotAction.TOTAL_SHARDS_PER_NODE.getPreferredName() + "] must be >= 1"); + } + this.totalShardsPerNode = totalShardsPerNode; + } + + public MountSnapshotStep( + StepKey key, + StepKey nextStepKey, + Client client, + String restoredIndexPrefix, + MountSearchableSnapshotRequest.Storage storageType + ) { + this(key, nextStepKey, client, restoredIndexPrefix, storageType, null); } @Override @@ -63,6 +82,11 @@ public MountSearchableSnapshotRequest.Storage getStorage() { return storageType; } + @Nullable + public Integer getTotalShardsPerNode() { + return totalShardsPerNode; + } + @Override void performDuringNoSnapshot(IndexMetadata indexMetadata, ClusterState currentClusterState, ActionListener listener) { String indexName = indexMetadata.getIndex().getName(); @@ -140,6 +164,9 @@ void performDuringNoSnapshot(IndexMetadata indexMetadata, ClusterState currentCl final Settings.Builder settingsBuilder = Settings.builder(); overrideTierPreference(this.getKey().phase()).ifPresent(override -> settingsBuilder.put(DataTier.TIER_PREFERENCE, override)); + if (totalShardsPerNode != null) { + settingsBuilder.put(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey(), totalShardsPerNode); + } final MountSearchableSnapshotRequest mountSearchableSnapshotRequest = new MountSearchableSnapshotRequest( TimeValue.MAX_VALUE, @@ -148,9 +175,9 @@ void performDuringNoSnapshot(IndexMetadata indexMetadata, ClusterState currentCl snapshotName, indexName, settingsBuilder.build(), - ignoredIndexSettings(this.getKey().phase()), + ignoredIndexSettings(), // we'll not wait for the snapshot to complete in this step as the async steps are executed from threads that shouldn't - // perform expensive operations (ie. clusterStateProcessed) + // perform expensive operations (i.e. clusterStateProcessed) false, storageType ); @@ -198,23 +225,27 @@ static Optional overrideTierPreference(String phase) { * setting, the restored index would be captured by the ILM runner and, depending on what ILM execution state was captured at snapshot * time, make it's way forward from _that_ step forward in the ILM policy. We'll re-set this setting on the restored index at a later * step once we restored a deterministic execution state - * - index.routing.allocation.total_shards_per_node: It is likely that frozen tier has fewer nodes than the hot tier. - * Keeping this setting runs the risk that we will not have enough nodes to allocate all the shards in the - * frozen tier and the user does not have any way of fixing this. For this reason, we ignore this setting when moving to frozen. + * - index.routing.allocation.total_shards_per_node: It is likely that frozen tier has fewer nodes than the hot tier. If this setting + * is not specifically set in the frozen tier, keeping this setting runs the risk that we will not have enough nodes to + * allocate all the shards in the frozen tier and the user does not have any way of fixing this. For this reason, we ignore this + * setting when moving to frozen. We do not ignore this setting if it is specifically set in the mount searchable snapshot step + * of frozen tier. */ - static String[] ignoredIndexSettings(String phase) { + String[] ignoredIndexSettings() { + ArrayList ignoredSettings = new ArrayList<>(); + ignoredSettings.add(LifecycleSettings.LIFECYCLE_NAME); // if we are mounting a searchable snapshot in the hot phase, then we should not change the total_shards_per_node setting - if (TimeseriesLifecycleType.FROZEN_PHASE.equals(phase)) { - return new String[] { - LifecycleSettings.LIFECYCLE_NAME, - ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey() }; + // if total_shards_per_node setting is specifically set for the frozen phase and not propagated from previous phase, + // then it should not be ignored + if (TimeseriesLifecycleType.FROZEN_PHASE.equals(this.getKey().phase()) && this.totalShardsPerNode == null) { + ignoredSettings.add(ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey()); } - return new String[] { LifecycleSettings.LIFECYCLE_NAME }; + return ignoredSettings.toArray(new String[0]); } @Override public int hashCode() { - return Objects.hash(super.hashCode(), restoredIndexPrefix, storageType); + return Objects.hash(super.hashCode(), restoredIndexPrefix, storageType, totalShardsPerNode); } @Override @@ -228,6 +259,7 @@ public boolean equals(Object obj) { MountSnapshotStep other = (MountSnapshotStep) obj; return super.equals(obj) && Objects.equals(restoredIndexPrefix, other.restoredIndexPrefix) - && Objects.equals(storageType, other.storageType); + && Objects.equals(storageType, other.storageType) + && Objects.equals(totalShardsPerNode, other.totalShardsPerNode); } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotAction.java index 5b9b559b4d957..c06dcc0f083d1 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotAction.java @@ -32,6 +32,7 @@ import java.util.List; import java.util.Objects; +import static org.elasticsearch.TransportVersions.ILM_ADD_SEARCHABLE_SNAPSHOT_TOTAL_SHARDS_PER_NODE; import static org.elasticsearch.snapshots.SearchableSnapshotsSettings.SEARCHABLE_SNAPSHOTS_REPOSITORY_NAME_SETTING_KEY; import static org.elasticsearch.snapshots.SearchableSnapshotsSettings.SEARCHABLE_SNAPSHOTS_SNAPSHOT_NAME_SETTING_KEY; import static org.elasticsearch.snapshots.SearchableSnapshotsSettings.SEARCHABLE_SNAPSHOT_PARTIAL_SETTING_KEY; @@ -49,6 +50,7 @@ public class SearchableSnapshotAction implements LifecycleAction { public static final ParseField SNAPSHOT_REPOSITORY = new ParseField("snapshot_repository"); public static final ParseField FORCE_MERGE_INDEX = new ParseField("force_merge_index"); + public static final ParseField TOTAL_SHARDS_PER_NODE = new ParseField("total_shards_per_node"); public static final String CONDITIONAL_DATASTREAM_CHECK_KEY = BranchingStep.NAME + "-on-datastream-check"; public static final String CONDITIONAL_SKIP_ACTION_STEP = BranchingStep.NAME + "-check-prerequisites"; public static final String CONDITIONAL_SKIP_GENERATE_AND_CLEAN = BranchingStep.NAME + "-check-existing-snapshot"; @@ -58,12 +60,13 @@ public class SearchableSnapshotAction implements LifecycleAction { private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( NAME, - a -> new SearchableSnapshotAction((String) a[0], a[1] == null || (boolean) a[1]) + a -> new SearchableSnapshotAction((String) a[0], a[1] == null || (boolean) a[1], (Integer) a[2]) ); static { PARSER.declareString(ConstructingObjectParser.constructorArg(), SNAPSHOT_REPOSITORY); PARSER.declareBoolean(ConstructingObjectParser.optionalConstructorArg(), FORCE_MERGE_INDEX); + PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), TOTAL_SHARDS_PER_NODE); } public static SearchableSnapshotAction parse(XContentParser parser) { @@ -72,22 +75,36 @@ public static SearchableSnapshotAction parse(XContentParser parser) { private final String snapshotRepository; private final boolean forceMergeIndex; + @Nullable + private final Integer totalShardsPerNode; - public SearchableSnapshotAction(String snapshotRepository, boolean forceMergeIndex) { + public SearchableSnapshotAction(String snapshotRepository, boolean forceMergeIndex, @Nullable Integer totalShardsPerNode) { if (Strings.hasText(snapshotRepository) == false) { throw new IllegalArgumentException("the snapshot repository must be specified"); } this.snapshotRepository = snapshotRepository; this.forceMergeIndex = forceMergeIndex; + + if (totalShardsPerNode != null && totalShardsPerNode < 1) { + throw new IllegalArgumentException("[" + TOTAL_SHARDS_PER_NODE.getPreferredName() + "] must be >= 1"); + } + this.totalShardsPerNode = totalShardsPerNode; + } + + public SearchableSnapshotAction(String snapshotRepository, boolean forceMergeIndex) { + this(snapshotRepository, forceMergeIndex, null); } public SearchableSnapshotAction(String snapshotRepository) { - this(snapshotRepository, true); + this(snapshotRepository, true, null); } public SearchableSnapshotAction(StreamInput in) throws IOException { this.snapshotRepository = in.readString(); this.forceMergeIndex = in.readBoolean(); + this.totalShardsPerNode = in.getTransportVersion().onOrAfter(ILM_ADD_SEARCHABLE_SNAPSHOT_TOTAL_SHARDS_PER_NODE) + ? in.readOptionalInt() + : null; } boolean isForceMergeIndex() { @@ -98,6 +115,10 @@ public String getSnapshotRepository() { return snapshotRepository; } + public Integer getTotalShardsPerNode() { + return totalShardsPerNode; + } + @Override public List toSteps(Client client, String phase, StepKey nextStepKey) { assert false; @@ -298,7 +319,8 @@ public List toSteps(Client client, String phase, StepKey nextStepKey, XPac waitForGreenRestoredIndexKey, client, getRestoredIndexPrefix(mountSnapshotKey), - storageType + storageType, + totalShardsPerNode ); WaitForIndexColorStep waitForGreenIndexHealthStep = new WaitForIndexColorStep( waitForGreenRestoredIndexKey, @@ -402,6 +424,9 @@ public String getWriteableName() { public void writeTo(StreamOutput out) throws IOException { out.writeString(snapshotRepository); out.writeBoolean(forceMergeIndex); + if (out.getTransportVersion().onOrAfter(ILM_ADD_SEARCHABLE_SNAPSHOT_TOTAL_SHARDS_PER_NODE)) { + out.writeOptionalInt(totalShardsPerNode); + } } @Override @@ -409,6 +434,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); builder.field(SNAPSHOT_REPOSITORY.getPreferredName(), snapshotRepository); builder.field(FORCE_MERGE_INDEX.getPreferredName(), forceMergeIndex); + if (totalShardsPerNode != null) { + builder.field(TOTAL_SHARDS_PER_NODE.getPreferredName(), totalShardsPerNode); + } builder.endObject(); return builder; } @@ -422,12 +450,14 @@ public boolean equals(Object o) { return false; } SearchableSnapshotAction that = (SearchableSnapshotAction) o; - return Objects.equals(snapshotRepository, that.snapshotRepository) && Objects.equals(forceMergeIndex, that.forceMergeIndex); + return Objects.equals(snapshotRepository, that.snapshotRepository) + && Objects.equals(forceMergeIndex, that.forceMergeIndex) + && Objects.equals(totalShardsPerNode, that.totalShardsPerNode); } @Override public int hashCode() { - return Objects.hash(snapshotRepository, forceMergeIndex); + return Objects.hash(snapshotRepository, forceMergeIndex, totalShardsPerNode); } @Nullable diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecyclePolicyTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecyclePolicyTests.java index 66aa9a24cbcd4..7963d04e0f666 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecyclePolicyTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/LifecyclePolicyTests.java @@ -224,7 +224,11 @@ public static LifecyclePolicy randomTimeseriesLifecyclePolicy(@Nullable String l frozenTime, Collections.singletonMap( SearchableSnapshotAction.NAME, - new SearchableSnapshotAction(randomAlphaOfLength(10), randomBoolean()) + new SearchableSnapshotAction( + randomAlphaOfLength(10), + randomBoolean(), + (randomBoolean() ? null : randomIntBetween(1, 100)) + ) ) ) ); diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStepTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStepTests.java index 2b5a0535caa0e..8ca7a00ab0948 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStepTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/MountSnapshotStepTests.java @@ -41,7 +41,8 @@ public MountSnapshotStep createRandomInstance() { StepKey nextStepKey = randomStepKey(); String restoredIndexPrefix = randomAlphaOfLength(10); MountSearchableSnapshotRequest.Storage storage = randomStorageType(); - return new MountSnapshotStep(stepKey, nextStepKey, client, restoredIndexPrefix, storage); + Integer totalShardsPerNode = randomTotalShardsPerNode(true); + return new MountSnapshotStep(stepKey, nextStepKey, client, restoredIndexPrefix, storage, totalShardsPerNode); } public static MountSearchableSnapshotRequest.Storage randomStorageType() { @@ -59,7 +60,8 @@ protected MountSnapshotStep copyInstance(MountSnapshotStep instance) { instance.getNextStepKey(), instance.getClient(), instance.getRestoredIndexPrefix(), - instance.getStorage() + instance.getStorage(), + instance.getTotalShardsPerNode() ); } @@ -69,7 +71,8 @@ public MountSnapshotStep mutateInstance(MountSnapshotStep instance) { StepKey nextKey = instance.getNextStepKey(); String restoredIndexPrefix = instance.getRestoredIndexPrefix(); MountSearchableSnapshotRequest.Storage storage = instance.getStorage(); - switch (between(0, 3)) { + Integer totalShardsPerNode = instance.getTotalShardsPerNode(); + switch (between(0, 4)) { case 0: key = new StepKey(key.phase(), key.action(), key.name() + randomAlphaOfLength(5)); break; @@ -88,10 +91,30 @@ public MountSnapshotStep mutateInstance(MountSnapshotStep instance) { throw new AssertionError("unknown storage type: " + storage); } break; + case 4: + totalShardsPerNode = totalShardsPerNode == null ? 1 : totalShardsPerNode + randomIntBetween(1, 100); + break; default: throw new AssertionError("Illegal randomisation branch"); } - return new MountSnapshotStep(key, nextKey, instance.getClient(), restoredIndexPrefix, storage); + return new MountSnapshotStep(key, nextKey, instance.getClient(), restoredIndexPrefix, storage, totalShardsPerNode); + } + + public void testCreateWithInvalidTotalShardsPerNode() throws Exception { + int invalidTotalShardsPerNode = randomIntBetween(-100, 0); + + IllegalArgumentException exception = expectThrows( + IllegalArgumentException.class, + () -> new MountSnapshotStep( + randomStepKey(), + randomStepKey(), + client, + RESTORED_INDEX_PREFIX, + randomStorageType(), + invalidTotalShardsPerNode + ) + ); + assertEquals("[total_shards_per_node] must be >= 1", exception.getMessage()); } public void testPerformActionFailure() { @@ -345,7 +368,50 @@ public void testIgnoreTotalShardsPerNodeInFrozenPhase() throws Exception { randomStepKey(), client, RESTORED_INDEX_PREFIX, - randomStorageType() + randomStorageType(), + null + ); + performActionAndWait(step, indexMetadata, clusterState, null); + } + } + + public void testDoNotIgnoreTotalShardsPerNodeIfSet() throws Exception { + String indexName = randomAlphaOfLength(10); + String policyName = "test-ilm-policy"; + Map ilmCustom = new HashMap<>(); + String snapshotName = indexName + "-" + policyName; + ilmCustom.put("snapshot_name", snapshotName); + String repository = "repository"; + ilmCustom.put("snapshot_repository", repository); + + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(indexName) + .settings(settings(IndexVersion.current()).put(LifecycleSettings.LIFECYCLE_NAME, policyName)) + .putCustom(LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY, ilmCustom) + .numberOfShards(randomIntBetween(1, 5)) + .numberOfReplicas(randomIntBetween(0, 5)); + IndexMetadata indexMetadata = indexMetadataBuilder.build(); + + ClusterState clusterState = ClusterState.builder(emptyClusterState()) + .metadata(Metadata.builder().put(indexMetadata, true).build()) + .build(); + + try (var threadPool = createThreadPool()) { + final var client = getRestoreSnapshotRequestAssertingClient( + threadPool, + repository, + snapshotName, + indexName, + RESTORED_INDEX_PREFIX, + indexName, + new String[] { LifecycleSettings.LIFECYCLE_NAME } + ); + MountSnapshotStep step = new MountSnapshotStep( + new StepKey(TimeseriesLifecycleType.FROZEN_PHASE, randomAlphaOfLength(10), randomAlphaOfLength(10)), + randomStepKey(), + client, + RESTORED_INDEX_PREFIX, + randomStorageType(), + randomTotalShardsPerNode(false) ); performActionAndWait(step, indexMetadata, clusterState, null); } @@ -401,4 +467,10 @@ protected void } }; } + + private Integer randomTotalShardsPerNode(boolean nullable) { + Integer randomInt = randomIntBetween(1, 100); + Integer randomIntNullable = (randomBoolean() ? null : randomInt); + return nullable ? randomIntNullable : randomInt; + } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotActionTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotActionTests.java index 193d9abeec91d..ca219fdde3d57 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotActionTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SearchableSnapshotActionTests.java @@ -16,6 +16,7 @@ import java.util.List; import static org.elasticsearch.xpack.core.ilm.SearchableSnapshotAction.NAME; +import static org.elasticsearch.xpack.core.ilm.SearchableSnapshotAction.TOTAL_SHARDS_PER_NODE; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; @@ -97,6 +98,16 @@ public void testPrefixAndStorageTypeDefaults() { ); } + public void testCreateWithInvalidTotalShardsPerNode() { + int invalidTotalShardsPerNode = randomIntBetween(-100, 0); + + IllegalArgumentException exception = expectThrows( + IllegalArgumentException.class, + () -> new SearchableSnapshotAction("test", true, invalidTotalShardsPerNode) + ); + assertEquals("[" + TOTAL_SHARDS_PER_NODE.getPreferredName() + "] must be >= 1", exception.getMessage()); + } + private List expectedStepKeysWithForceMerge(String phase) { return List.of( new StepKey(phase, NAME, SearchableSnapshotAction.CONDITIONAL_SKIP_ACTION_STEP), @@ -160,14 +171,23 @@ protected Writeable.Reader instanceReader() { @Override protected SearchableSnapshotAction mutateInstance(SearchableSnapshotAction instance) { - return switch (randomIntBetween(0, 1)) { + return switch (randomIntBetween(0, 2)) { case 0 -> new SearchableSnapshotAction(randomAlphaOfLengthBetween(5, 10), instance.isForceMergeIndex()); case 1 -> new SearchableSnapshotAction(instance.getSnapshotRepository(), instance.isForceMergeIndex() == false); + case 2 -> new SearchableSnapshotAction( + instance.getSnapshotRepository(), + instance.isForceMergeIndex(), + instance.getTotalShardsPerNode() == null ? 1 : instance.getTotalShardsPerNode() + randomIntBetween(1, 100) + ); default -> throw new IllegalArgumentException("Invalid mutation branch"); }; } static SearchableSnapshotAction randomInstance() { - return new SearchableSnapshotAction(randomAlphaOfLengthBetween(5, 10), randomBoolean()); + return new SearchableSnapshotAction( + randomAlphaOfLengthBetween(5, 10), + randomBoolean(), + (randomBoolean() ? null : randomIntBetween(1, 100)) + ); } } diff --git a/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/actions/SearchableSnapshotActionIT.java b/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/actions/SearchableSnapshotActionIT.java index 0e3d0f1b2ec40..fefeaa95319ed 100644 --- a/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/actions/SearchableSnapshotActionIT.java +++ b/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/actions/SearchableSnapshotActionIT.java @@ -48,6 +48,7 @@ import java.util.concurrent.TimeUnit; import static java.util.Collections.singletonMap; +import static org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.xpack.TimeSeriesRestDriver.createComposableTemplate; import static org.elasticsearch.xpack.TimeSeriesRestDriver.createNewSingletonPolicy; @@ -921,6 +922,61 @@ public void testSearchableSnapshotInvokesAsyncActionOnNewIndex() throws Exceptio }, 30, TimeUnit.SECONDS); } + public void testSearchableSnapshotTotalShardsPerNode() throws Exception { + String index = "myindex-" + randomAlphaOfLength(4).toLowerCase(Locale.ROOT); + Integer totalShardsPerNode = 2; + createSnapshotRepo(client(), snapshotRepo, randomBoolean()); + createPolicy( + client(), + policy, + null, + null, + new Phase( + "cold", + TimeValue.ZERO, + singletonMap(SearchableSnapshotAction.NAME, new SearchableSnapshotAction(snapshotRepo, randomBoolean())) + ), + new Phase( + "frozen", + TimeValue.ZERO, + singletonMap(SearchableSnapshotAction.NAME, new SearchableSnapshotAction(snapshotRepo, randomBoolean(), totalShardsPerNode)) + ), + null + ); + + createIndex(index, Settings.EMPTY); + ensureGreen(index); + indexDocument(client(), index, true); + + // enable ILM after we indexed a document as otherwise ILM might sometimes run so fast the indexDocument call will fail with + // `index_not_found_exception` + updateIndexSettings(index, Settings.builder().put(LifecycleSettings.LIFECYCLE_NAME, policy)); + + // wait for snapshot successfully mounted and ILM execution completed + final String searchableSnapMountedIndexName = SearchableSnapshotAction.PARTIAL_RESTORED_INDEX_PREFIX + + SearchableSnapshotAction.FULL_RESTORED_INDEX_PREFIX + index; + assertBusy(() -> { + logger.info("--> waiting for [{}] to exist...", searchableSnapMountedIndexName); + assertTrue(indexExists(searchableSnapMountedIndexName)); + }, 30, TimeUnit.SECONDS); + assertBusy(() -> { + triggerStateChange(); + Step.StepKey stepKeyForIndex = getStepKeyForIndex(client(), searchableSnapMountedIndexName); + assertThat(stepKeyForIndex.phase(), is("frozen")); + assertThat(stepKeyForIndex.name(), is(PhaseCompleteStep.NAME)); + }, 30, TimeUnit.SECONDS); + + // validate total_shards_per_node setting + Map indexSettings = getIndexSettingsAsMap(searchableSnapMountedIndexName); + assertNotNull("expected total_shards_per_node to exist", indexSettings.get(INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey())); + Integer snapshotTotalShardsPerNode = Integer.valueOf((String) indexSettings.get(INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey())); + assertEquals( + "expected total_shards_per_node to be " + totalShardsPerNode + ", but got: " + snapshotTotalShardsPerNode, + snapshotTotalShardsPerNode, + totalShardsPerNode + ); + } + /** * Cause a bit of cluster activity using an empty reroute call in case the `wait-for-index-colour` ILM step missed the * notification that partial-index is now GREEN. From 5e15f842f13d5737a369769ecfbbcfb097fef18e Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 24 Sep 2024 18:31:06 -0400 Subject: [PATCH 38/58] Add Search Inference ID To Semantic Text Mapping (#113051) (#113494) Adds a search_inference_id parameter to the semantic_text mapping. This parameter defines the inference endpoint that is used to generate embeddings at query time. --- docs/changelog/113051.yaml | 5 + .../org/elasticsearch/TransportVersions.java | 1 + .../metadata/InferenceFieldMetadata.java | 37 ++- .../metadata/InferenceFieldMetadataTests.java | 10 +- .../xpack/inference/InferenceFeatures.java | 4 +- .../mapper/SemanticTextFieldMapper.java | 144 +++++++--- .../queries/SemanticQueryBuilder.java | 2 +- .../mapper/SemanticTextFieldMapperTests.java | 157 ++++++++--- .../queries/SemanticQueryBuilderTests.java | 13 +- .../test/inference/40_semantic_text_query.yml | 258 ++++++++++++++++++ ..._text_query_inference_endpoint_changes.yml | 11 +- 11 files changed, 562 insertions(+), 80 deletions(-) create mode 100644 docs/changelog/113051.yaml diff --git a/docs/changelog/113051.yaml b/docs/changelog/113051.yaml new file mode 100644 index 0000000000000..9be68f9f2b03e --- /dev/null +++ b/docs/changelog/113051.yaml @@ -0,0 +1,5 @@ +pr: 113051 +summary: Add Search Inference ID To Semantic Text Mapping +area: Mapping +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 49b1f58d723bb..7119f44d36444 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -223,6 +223,7 @@ static TransportVersion def(int id) { public static final TransportVersion ESQL_AGGREGATION_OPERATOR_STATUS_FINISH_NANOS = def(8_747_00_0); public static final TransportVersion ML_TELEMETRY_MEMORY_ADDED = def(8_748_00_0); public static final TransportVersion ILM_ADD_SEARCHABLE_SNAPSHOT_TOTAL_SHARDS_PER_NODE = def(8_749_00_0); + public static final TransportVersion SEMANTIC_TEXT_SEARCH_INFERENCE_ID = def(8_750_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/InferenceFieldMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/InferenceFieldMetadata.java index be0943f8f3066..271c60e829a87 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/InferenceFieldMetadata.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/InferenceFieldMetadata.java @@ -23,6 +23,8 @@ import java.util.List; import java.util.Objects; +import static org.elasticsearch.TransportVersions.SEMANTIC_TEXT_SEARCH_INFERENCE_ID; + /** * Contains inference field data for fields. * As inference is done in the coordinator node to avoid re-doing it at shard / replica level, the coordinator needs to check for the need @@ -32,21 +34,33 @@ */ public final class InferenceFieldMetadata implements SimpleDiffable, ToXContentFragment { private static final String INFERENCE_ID_FIELD = "inference_id"; + private static final String SEARCH_INFERENCE_ID_FIELD = "search_inference_id"; private static final String SOURCE_FIELDS_FIELD = "source_fields"; private final String name; private final String inferenceId; + private final String searchInferenceId; private final String[] sourceFields; public InferenceFieldMetadata(String name, String inferenceId, String[] sourceFields) { + this(name, inferenceId, inferenceId, sourceFields); + } + + public InferenceFieldMetadata(String name, String inferenceId, String searchInferenceId, String[] sourceFields) { this.name = Objects.requireNonNull(name); this.inferenceId = Objects.requireNonNull(inferenceId); + this.searchInferenceId = Objects.requireNonNull(searchInferenceId); this.sourceFields = Objects.requireNonNull(sourceFields); } public InferenceFieldMetadata(StreamInput input) throws IOException { this.name = input.readString(); this.inferenceId = input.readString(); + if (input.getTransportVersion().onOrAfter(SEMANTIC_TEXT_SEARCH_INFERENCE_ID)) { + this.searchInferenceId = input.readString(); + } else { + this.searchInferenceId = this.inferenceId; + } this.sourceFields = input.readStringArray(); } @@ -54,6 +68,9 @@ public InferenceFieldMetadata(StreamInput input) throws IOException { public void writeTo(StreamOutput out) throws IOException { out.writeString(name); out.writeString(inferenceId); + if (out.getTransportVersion().onOrAfter(SEMANTIC_TEXT_SEARCH_INFERENCE_ID)) { + out.writeString(searchInferenceId); + } out.writeStringArray(sourceFields); } @@ -64,12 +81,13 @@ public boolean equals(Object o) { InferenceFieldMetadata that = (InferenceFieldMetadata) o; return Objects.equals(name, that.name) && Objects.equals(inferenceId, that.inferenceId) + && Objects.equals(searchInferenceId, that.searchInferenceId) && Arrays.equals(sourceFields, that.sourceFields); } @Override public int hashCode() { - int result = Objects.hash(name, inferenceId); + int result = Objects.hash(name, inferenceId, searchInferenceId); result = 31 * result + Arrays.hashCode(sourceFields); return result; } @@ -82,6 +100,10 @@ public String getInferenceId() { return inferenceId; } + public String getSearchInferenceId() { + return searchInferenceId; + } + public String[] getSourceFields() { return sourceFields; } @@ -94,6 +116,9 @@ public static Diff readDiffFrom(StreamInput in) throws I public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(name); builder.field(INFERENCE_ID_FIELD, inferenceId); + if (searchInferenceId.equals(inferenceId) == false) { + builder.field(SEARCH_INFERENCE_ID_FIELD, searchInferenceId); + } builder.array(SOURCE_FIELDS_FIELD, sourceFields); return builder.endObject(); } @@ -106,6 +131,7 @@ public static InferenceFieldMetadata fromXContent(XContentParser parser) throws String currentFieldName = null; String inferenceId = null; + String searchInferenceId = null; List inputFields = new ArrayList<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { @@ -113,6 +139,8 @@ public static InferenceFieldMetadata fromXContent(XContentParser parser) throws } else if (token == XContentParser.Token.VALUE_STRING) { if (INFERENCE_ID_FIELD.equals(currentFieldName)) { inferenceId = parser.text(); + } else if (SEARCH_INFERENCE_ID_FIELD.equals(currentFieldName)) { + searchInferenceId = parser.text(); } } else if (token == XContentParser.Token.START_ARRAY) { if (SOURCE_FIELDS_FIELD.equals(currentFieldName)) { @@ -128,6 +156,11 @@ public static InferenceFieldMetadata fromXContent(XContentParser parser) throws parser.skipChildren(); } } - return new InferenceFieldMetadata(name, inferenceId, inputFields.toArray(String[]::new)); + return new InferenceFieldMetadata( + name, + inferenceId, + searchInferenceId == null ? inferenceId : searchInferenceId, + inputFields.toArray(String[]::new) + ); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/InferenceFieldMetadataTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/InferenceFieldMetadataTests.java index 6107246cf8ff1..2d5805696320d 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/InferenceFieldMetadataTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/InferenceFieldMetadataTests.java @@ -61,13 +61,15 @@ protected boolean supportsUnknownFields() { private static InferenceFieldMetadata createTestItem() { String name = randomAlphaOfLengthBetween(3, 10); String inferenceId = randomIdentifier(); + String searchInferenceId = randomIdentifier(); String[] inputFields = generateRandomStringArray(5, 10, false, false); - return new InferenceFieldMetadata(name, inferenceId, inputFields); + return new InferenceFieldMetadata(name, inferenceId, searchInferenceId, inputFields); } public void testNullCtorArgsThrowException() { - assertThrows(NullPointerException.class, () -> new InferenceFieldMetadata(null, "inferenceId", new String[0])); - assertThrows(NullPointerException.class, () -> new InferenceFieldMetadata("name", null, new String[0])); - assertThrows(NullPointerException.class, () -> new InferenceFieldMetadata("name", "inferenceId", null)); + assertThrows(NullPointerException.class, () -> new InferenceFieldMetadata(null, "inferenceId", "searchInferenceId", new String[0])); + assertThrows(NullPointerException.class, () -> new InferenceFieldMetadata("name", null, "searchInferenceId", new String[0])); + assertThrows(NullPointerException.class, () -> new InferenceFieldMetadata("name", "inferenceId", null, new String[0])); + assertThrows(NullPointerException.class, () -> new InferenceFieldMetadata("name", "inferenceId", "searchInferenceId", null)); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 12a32ecdc6d4f..fd330a8cf6cc6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -9,6 +9,7 @@ import org.elasticsearch.features.FeatureSpecification; import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper; import org.elasticsearch.xpack.inference.rank.random.RandomRankRetrieverBuilder; import org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder; @@ -23,7 +24,8 @@ public class InferenceFeatures implements FeatureSpecification { public Set getFeatures() { return Set.of( TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_RETRIEVER_SUPPORTED, - RandomRankRetrieverBuilder.RANDOM_RERANKER_RETRIEVER_SUPPORTED + RandomRankRetrieverBuilder.RANDOM_RERANKER_RETRIEVER_SUPPORTED, + SemanticTextFieldMapper.SEMANTIC_TEXT_SEARCH_INFERENCE_ID ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 81dfba769136b..0483296cd2c6a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Tuple; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.fielddata.FieldDataContext; @@ -79,6 +80,8 @@ * A {@link FieldMapper} for semantic text fields. */ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFieldMapper { + public static final NodeFeature SEMANTIC_TEXT_SEARCH_INFERENCE_ID = new NodeFeature("semantic_text.search_inference_id"); + public static final String CONTENT_TYPE = "semantic_text"; private final IndexSettings indexSettings; @@ -103,6 +106,13 @@ public static class Builder extends FieldMapper.Builder { } }); + private final Parameter searchInferenceId = Parameter.stringParam( + "search_inference_id", + true, + mapper -> ((SemanticTextFieldType) mapper.fieldType()).searchInferenceId, + null + ).acceptsNull(); + private final Parameter modelSettings = new Parameter<>( "model_settings", true, @@ -117,6 +127,17 @@ public static class Builder extends FieldMapper.Builder { private Function inferenceFieldBuilder; + public static Builder from(SemanticTextFieldMapper mapper) { + Builder builder = new Builder( + mapper.leafName(), + mapper.fieldType().indexVersionCreated, + mapper.fieldType().getChunksField().bitsetProducer(), + mapper.indexSettings + ); + builder.init(mapper); + return builder; + } + public Builder( String name, IndexVersion indexVersionCreated, @@ -140,6 +161,11 @@ public Builder setInferenceId(String id) { return this; } + public Builder setSearchInferenceId(String id) { + this.searchInferenceId.setValue(id); + return this; + } + public Builder setModelSettings(SemanticTextField.ModelSettings value) { this.modelSettings.setValue(value); return this; @@ -147,15 +173,17 @@ public Builder setModelSettings(SemanticTextField.ModelSettings value) { @Override protected Parameter[] getParameters() { - return new Parameter[] { inferenceId, modelSettings, meta }; + return new Parameter[] { inferenceId, searchInferenceId, modelSettings, meta }; } @Override protected void merge(FieldMapper mergeWith, Conflicts conflicts, MapperMergeContext mapperMergeContext) { - super.merge(mergeWith, conflicts, mapperMergeContext); + SemanticTextFieldMapper semanticMergeWith = (SemanticTextFieldMapper) mergeWith; + semanticMergeWith = copySettings(semanticMergeWith, mapperMergeContext); + + super.merge(semanticMergeWith, conflicts, mapperMergeContext); conflicts.check(); - var semanticMergeWith = (SemanticTextFieldMapper) mergeWith; - var context = mapperMergeContext.createChildContext(mergeWith.leafName(), ObjectMapper.Dynamic.FALSE); + var context = mapperMergeContext.createChildContext(semanticMergeWith.leafName(), ObjectMapper.Dynamic.FALSE); var inferenceField = inferenceFieldBuilder.apply(context.getMapperBuilderContext()); var mergedInferenceField = inferenceField.merge(semanticMergeWith.fieldType().getInferenceField(), context); inferenceFieldBuilder = c -> mergedInferenceField; @@ -181,6 +209,7 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) { new SemanticTextFieldType( fullName, inferenceId.getValue(), + searchInferenceId.getValue(), modelSettings.getValue(), inferenceField, indexVersionCreated, @@ -190,6 +219,25 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) { indexSettings ); } + + /** + * As necessary, copy settings from this builder to the passed-in mapper. + * Used to preserve {@link SemanticTextField.ModelSettings} when updating a semantic text mapping to one where the model settings + * are not specified. + * + * @param mapper The mapper + * @return A mapper with the copied settings applied + */ + private SemanticTextFieldMapper copySettings(SemanticTextFieldMapper mapper, MapperMergeContext mapperMergeContext) { + SemanticTextFieldMapper returnedMapper = mapper; + if (mapper.fieldType().getModelSettings() == null) { + Builder builder = from(mapper); + builder.setModelSettings(modelSettings.getValue()); + returnedMapper = builder.build(mapperMergeContext.getMapperBuilderContext()); + } + + return returnedMapper; + } } private SemanticTextFieldMapper( @@ -211,9 +259,7 @@ public Iterator iterator() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), fieldType().indexVersionCreated, fieldType().getChunksField().bitsetProducer(), indexSettings).init( - this - ); + return Builder.from(this); } @Override @@ -267,7 +313,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio } } else { Conflicts conflicts = new Conflicts(fullFieldName); - canMergeModelSettings(field.inference().modelSettings(), fieldType().getModelSettings(), conflicts); + canMergeModelSettings(fieldType().getModelSettings(), field.inference().modelSettings(), conflicts); try { conflicts.check(); } catch (Exception exc) { @@ -316,7 +362,7 @@ public InferenceFieldMetadata getMetadata(Set sourcePaths) { String[] copyFields = sourcePaths.toArray(String[]::new); // ensure consistent order Arrays.sort(copyFields); - return new InferenceFieldMetadata(fullPath(), fieldType().inferenceId, copyFields); + return new InferenceFieldMetadata(fullPath(), fieldType().getInferenceId(), fieldType().getSearchInferenceId(), copyFields); } @Override @@ -335,6 +381,7 @@ public Object getOriginalValue(Map sourceAsMap) { public static class SemanticTextFieldType extends SimpleMappedFieldType { private final String inferenceId; + private final String searchInferenceId; private final SemanticTextField.ModelSettings modelSettings; private final ObjectMapper inferenceField; private final IndexVersion indexVersionCreated; @@ -342,6 +389,7 @@ public static class SemanticTextFieldType extends SimpleMappedFieldType { public SemanticTextFieldType( String name, String inferenceId, + String searchInferenceId, SemanticTextField.ModelSettings modelSettings, ObjectMapper inferenceField, IndexVersion indexVersionCreated, @@ -349,6 +397,7 @@ public SemanticTextFieldType( ) { super(name, true, false, false, TextSearchInfo.NONE, meta); this.inferenceId = inferenceId; + this.searchInferenceId = searchInferenceId; this.modelSettings = modelSettings; this.inferenceField = inferenceField; this.indexVersionCreated = indexVersionCreated; @@ -363,6 +412,10 @@ public String getInferenceId() { return inferenceId; } + public String getSearchInferenceId() { + return searchInferenceId == null ? inferenceId : searchInferenceId; + } + public SemanticTextField.ModelSettings getModelSettings() { return modelSettings; } @@ -428,14 +481,7 @@ public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost case SPARSE_EMBEDDING -> { if (inferenceResults instanceof TextExpansionResults == false) { throw new IllegalArgumentException( - "Field [" - + name() - + "] expected query inference results to be of type [" - + TextExpansionResults.NAME - + "]," - + " got [" - + inferenceResults.getWriteableName() - + "]. Has the inference endpoint configuration changed?" + generateQueryInferenceResultsTypeMismatchMessage(inferenceResults, TextExpansionResults.NAME) ); } @@ -454,14 +500,7 @@ public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost case TEXT_EMBEDDING -> { if (inferenceResults instanceof MlTextEmbeddingResults == false) { throw new IllegalArgumentException( - "Field [" - + name() - + "] expected query inference results to be of type [" - + MlTextEmbeddingResults.NAME - + "]," - + " got [" - + inferenceResults.getWriteableName() - + "]. Has the inference endpoint configuration changed?" + generateQueryInferenceResultsTypeMismatchMessage(inferenceResults, MlTextEmbeddingResults.NAME) ); } @@ -469,13 +508,7 @@ public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost float[] inference = textEmbeddingResults.getInferenceAsFloat(); if (inference.length != modelSettings.dimensions()) { throw new IllegalArgumentException( - "Field [" - + name() - + "] expected query inference results with " - + modelSettings.dimensions() - + " dimensions, got " - + inference.length - + " dimensions. Has the inference endpoint configuration changed?" + generateDimensionCountMismatchMessage(inference.length, modelSettings.dimensions()) ); } @@ -484,7 +517,7 @@ public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost default -> throw new IllegalStateException( "Field [" + name() - + "] configured to use an inference endpoint with an unsupported task type [" + + "] is configured to use an inference endpoint with an unsupported task type [" + modelSettings.taskType() + "]" ); @@ -493,6 +526,51 @@ public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost return new NestedQueryBuilder(nestedFieldPath, childQueryBuilder, ScoreMode.Max).boost(boost).queryName(queryName); } + + private String generateQueryInferenceResultsTypeMismatchMessage(InferenceResults inferenceResults, String expectedResultsType) { + StringBuilder sb = new StringBuilder( + "Field [" + + name() + + "] expected query inference results to be of type [" + + expectedResultsType + + "]," + + " got [" + + inferenceResults.getWriteableName() + + "]." + ); + + return generateInvalidQueryInferenceResultsMessage(sb); + } + + private String generateDimensionCountMismatchMessage(int inferenceDimCount, int expectedDimCount) { + StringBuilder sb = new StringBuilder( + "Field [" + + name() + + "] expected query inference results with " + + expectedDimCount + + " dimensions, got " + + inferenceDimCount + + " dimensions." + ); + + return generateInvalidQueryInferenceResultsMessage(sb); + } + + private String generateInvalidQueryInferenceResultsMessage(StringBuilder baseMessageBuilder) { + if (searchInferenceId != null && searchInferenceId.equals(inferenceId) == false) { + baseMessageBuilder.append( + " Is the search inference endpoint [" + + searchInferenceId + + "] compatible with the inference endpoint [" + + inferenceId + + "]?" + ); + } else { + baseMessageBuilder.append(" Has the configuration for inference endpoint [" + inferenceId + "] changed?"); + } + + return baseMessageBuilder.toString(); + } } private static ObjectMapper createInferenceField( diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index 7f21f94d33276..9f7fcb1ef407c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -284,7 +284,7 @@ private static String getInferenceIdForForField(Collection indexM String inferenceId = null; for (IndexMetadata indexMetadata : indexMetadataCollection) { InferenceFieldMetadata inferenceFieldMetadata = indexMetadata.getInferenceFields().get(fieldName); - String indexInferenceId = inferenceFieldMetadata != null ? inferenceFieldMetadata.getInferenceId() : null; + String indexInferenceId = inferenceFieldMetadata != null ? inferenceFieldMetadata.getSearchInferenceId() : null; if (indexInferenceId != null) { if (inferenceId != null && inferenceId.equals(indexInferenceId) == false) { throw new IllegalArgumentException("Field [" + fieldName + "] has multiple inference IDs associated with it"); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index bb0691c691176..1697b33fedd92 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -23,6 +23,7 @@ import org.apache.lucene.search.join.QueryBitSetProducer; import org.apache.lucene.search.join.ScoreMode; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; +import org.elasticsearch.common.CheckedBiFunction; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.compress.CompressedXContent; @@ -140,6 +141,7 @@ public MappedFieldType getMappedFieldType() { "fake-inference-id", null, null, + null, IndexVersion.current(), Map.of() ); @@ -210,13 +212,28 @@ public void testUpdatesToInferenceIdNotSupported() throws IOException { public void testDynamicUpdate() throws IOException { final String fieldName = "semantic"; final String inferenceId = "test_service"; + final String searchInferenceId = "search_test_service"; - MapperService mapperService = mapperServiceForFieldWithModelSettings( - fieldName, - inferenceId, - new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null) - ); - assertSemanticTextField(mapperService, fieldName, true); + { + MapperService mapperService = mapperServiceForFieldWithModelSettings( + fieldName, + inferenceId, + new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null) + ); + assertSemanticTextField(mapperService, fieldName, true); + assertSearchInferenceId(mapperService, fieldName, inferenceId); + } + + { + MapperService mapperService = mapperServiceForFieldWithModelSettings( + fieldName, + inferenceId, + searchInferenceId, + new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null) + ); + assertSemanticTextField(mapperService, fieldName, true); + assertSearchInferenceId(mapperService, fieldName, searchInferenceId); + } } public void testUpdateModelSettings() throws IOException { @@ -260,19 +277,11 @@ public void testUpdateModelSettings() throws IOException { assertSemanticTextField(mapperService, fieldName, true); } { - Exception exc = expectThrows( - IllegalArgumentException.class, - () -> merge( - mapperService, - mapping( - b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "test_model").endObject() - ) - ) - ); - assertThat( - exc.getMessage(), - containsString("Cannot update parameter [model_settings] " + "from [task_type=sparse_embedding] to [null]") + merge( + mapperService, + mapping(b -> b.startObject(fieldName).field("type", "semantic_text").field("inference_id", "test_model").endObject()) ); + assertSemanticTextField(mapperService, fieldName, true); } { Exception exc = expectThrows( @@ -305,7 +314,60 @@ public void testUpdateModelSettings() throws IOException { } } - static void assertSemanticTextField(MapperService mapperService, String fieldName, boolean expectedModelSettings) { + public void testUpdateSearchInferenceId() throws IOException { + final String inferenceId = "test_inference_id"; + final String searchInferenceId1 = "test_search_inference_id_1"; + final String searchInferenceId2 = "test_search_inference_id_2"; + + CheckedBiFunction buildMapping = (f, sid) -> mapping(b -> { + b.startObject(f).field("type", "semantic_text").field("inference_id", inferenceId); + if (sid != null) { + b.field("search_inference_id", sid); + } + b.endObject(); + }); + + for (int depth = 1; depth < 5; depth++) { + String fieldName = randomFieldName(depth); + MapperService mapperService = createMapperService(buildMapping.apply(fieldName, null)); + assertSemanticTextField(mapperService, fieldName, false); + assertSearchInferenceId(mapperService, fieldName, inferenceId); + + merge(mapperService, buildMapping.apply(fieldName, searchInferenceId1)); + assertSemanticTextField(mapperService, fieldName, false); + assertSearchInferenceId(mapperService, fieldName, searchInferenceId1); + + merge(mapperService, buildMapping.apply(fieldName, searchInferenceId2)); + assertSemanticTextField(mapperService, fieldName, false); + assertSearchInferenceId(mapperService, fieldName, searchInferenceId2); + + merge(mapperService, buildMapping.apply(fieldName, null)); + assertSemanticTextField(mapperService, fieldName, false); + assertSearchInferenceId(mapperService, fieldName, inferenceId); + + mapperService = mapperServiceForFieldWithModelSettings( + fieldName, + inferenceId, + new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null) + ); + assertSemanticTextField(mapperService, fieldName, true); + assertSearchInferenceId(mapperService, fieldName, inferenceId); + + merge(mapperService, buildMapping.apply(fieldName, searchInferenceId1)); + assertSemanticTextField(mapperService, fieldName, true); + assertSearchInferenceId(mapperService, fieldName, searchInferenceId1); + + merge(mapperService, buildMapping.apply(fieldName, searchInferenceId2)); + assertSemanticTextField(mapperService, fieldName, true); + assertSearchInferenceId(mapperService, fieldName, searchInferenceId2); + + merge(mapperService, buildMapping.apply(fieldName, null)); + assertSemanticTextField(mapperService, fieldName, true); + assertSearchInferenceId(mapperService, fieldName, inferenceId); + } + } + + private static void assertSemanticTextField(MapperService mapperService, String fieldName, boolean expectedModelSettings) { Mapper mapper = mapperService.mappingLookup().getMapper(fieldName); assertNotNull(mapper); assertThat(mapper, instanceOf(SemanticTextFieldMapper.class)); @@ -347,21 +409,34 @@ static void assertSemanticTextField(MapperService mapperService, String fieldNam } } + private static void assertSearchInferenceId(MapperService mapperService, String fieldName, String expectedSearchInferenceId) { + var fieldType = mapperService.fieldType(fieldName); + assertNotNull(fieldType); + assertThat(fieldType, instanceOf(SemanticTextFieldMapper.SemanticTextFieldType.class)); + SemanticTextFieldMapper.SemanticTextFieldType semanticTextFieldType = (SemanticTextFieldMapper.SemanticTextFieldType) fieldType; + assertEquals(expectedSearchInferenceId, semanticTextFieldType.getSearchInferenceId()); + } + public void testSuccessfulParse() throws IOException { for (int depth = 1; depth < 4; depth++) { final String fieldName1 = randomFieldName(depth); final String fieldName2 = randomFieldName(depth + 1); + final String searchInferenceId = randomAlphaOfLength(8); + final boolean setSearchInferenceId = randomBoolean(); Model model1 = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING); Model model2 = TestModel.createRandomInstance(TaskType.SPARSE_EMBEDDING); XContentBuilder mapping = mapping(b -> { - addSemanticTextMapping(b, fieldName1, model1.getInferenceEntityId()); - addSemanticTextMapping(b, fieldName2, model2.getInferenceEntityId()); + addSemanticTextMapping(b, fieldName1, model1.getInferenceEntityId(), setSearchInferenceId ? searchInferenceId : null); + addSemanticTextMapping(b, fieldName2, model2.getInferenceEntityId(), setSearchInferenceId ? searchInferenceId : null); }); MapperService mapperService = createMapperService(mapping); - SemanticTextFieldMapperTests.assertSemanticTextField(mapperService, fieldName1, false); - SemanticTextFieldMapperTests.assertSemanticTextField(mapperService, fieldName2, false); + assertSemanticTextField(mapperService, fieldName1, false); + assertSearchInferenceId(mapperService, fieldName1, setSearchInferenceId ? searchInferenceId : model1.getInferenceEntityId()); + assertSemanticTextField(mapperService, fieldName2, false); + assertSearchInferenceId(mapperService, fieldName2, setSearchInferenceId ? searchInferenceId : model2.getInferenceEntityId()); + DocumentMapper documentMapper = mapperService.documentMapper(); ParsedDocument doc = documentMapper.parse( source( @@ -449,7 +524,7 @@ public void testSuccessfulParse() throws IOException { } public void testMissingInferenceId() throws IOException { - DocumentMapper documentMapper = createDocumentMapper(mapping(b -> addSemanticTextMapping(b, "field", "my_id"))); + DocumentMapper documentMapper = createDocumentMapper(mapping(b -> addSemanticTextMapping(b, "field", "my_id", null))); IllegalArgumentException ex = expectThrows( DocumentParsingException.class, IllegalArgumentException.class, @@ -468,7 +543,7 @@ public void testMissingInferenceId() throws IOException { } public void testMissingModelSettings() throws IOException { - DocumentMapper documentMapper = createDocumentMapper(mapping(b -> addSemanticTextMapping(b, "field", "my_id"))); + DocumentMapper documentMapper = createDocumentMapper(mapping(b -> addSemanticTextMapping(b, "field", "my_id", null))); IllegalArgumentException ex = expectThrows( DocumentParsingException.class, IllegalArgumentException.class, @@ -480,7 +555,7 @@ public void testMissingModelSettings() throws IOException { } public void testMissingTaskType() throws IOException { - DocumentMapper documentMapper = createDocumentMapper(mapping(b -> addSemanticTextMapping(b, "field", "my_id"))); + DocumentMapper documentMapper = createDocumentMapper(mapping(b -> addSemanticTextMapping(b, "field", "my_id", null))); IllegalArgumentException ex = expectThrows( DocumentParsingException.class, IllegalArgumentException.class, @@ -540,12 +615,24 @@ private MapperService mapperServiceForFieldWithModelSettings( String inferenceId, SemanticTextField.ModelSettings modelSettings ) throws IOException { + return mapperServiceForFieldWithModelSettings(fieldName, inferenceId, null, modelSettings); + } + + private MapperService mapperServiceForFieldWithModelSettings( + String fieldName, + String inferenceId, + String searchInferenceId, + SemanticTextField.ModelSettings modelSettings + ) throws IOException { + String mappingParams = "type=semantic_text,inference_id=" + inferenceId; + if (searchInferenceId != null) { + mappingParams += ",search_inference_id=" + searchInferenceId; + } + MapperService mapperService = createMapperService(mapping(b -> {})); mapperService.merge( "_doc", - new CompressedXContent( - Strings.toString(PutMappingRequest.simpleMapping(fieldName, "type=semantic_text,inference_id=" + inferenceId)) - ), + new CompressedXContent(Strings.toString(PutMappingRequest.simpleMapping(fieldName, mappingParams))), MapperService.MergeReason.MAPPING_UPDATE ); @@ -615,10 +702,18 @@ protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneD assertThat(query, instanceOf(MatchNoDocsQuery.class)); } - private static void addSemanticTextMapping(XContentBuilder mappingBuilder, String fieldName, String modelId) throws IOException { + private static void addSemanticTextMapping( + XContentBuilder mappingBuilder, + String fieldName, + String inferenceId, + String searchInferenceId + ) throws IOException { mappingBuilder.startObject(fieldName); mappingBuilder.field("type", SemanticTextFieldMapper.CONTENT_TYPE); - mappingBuilder.field("inference_id", modelId); + mappingBuilder.field("inference_id", inferenceId); + if (searchInferenceId != null) { + mappingBuilder.field("search_inference_id", searchInferenceId); + } mappingBuilder.endObject(); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java index c2b99923bae61..f54ce89183079 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilderTests.java @@ -79,9 +79,11 @@ public class SemanticQueryBuilderTests extends AbstractQueryTestCase randomFrom(DenseVectorFieldMapper.ElementType.values()) ); // TODO: Support bit elements once KNN bit vector queries are available + useSearchInferenceId = randomBoolean(); } @Override @@ -126,11 +129,14 @@ protected Settings createTestIndexSettings() { @Override protected void initializeAdditionalMappings(MapperService mapperService) throws IOException { + String mappingConfig = "type=semantic_text,inference_id=" + INFERENCE_ID; + if (useSearchInferenceId) { + mappingConfig += ",search_inference_id=" + SEARCH_INFERENCE_ID; + } + mapperService.merge( "_doc", - new CompressedXContent( - Strings.toString(PutMappingRequest.simpleMapping(SEMANTIC_TEXT_FIELD, "type=semantic_text,inference_id=" + INFERENCE_ID)) - ), + new CompressedXContent(Strings.toString(PutMappingRequest.simpleMapping(SEMANTIC_TEXT_FIELD, mappingConfig))), MapperService.MergeReason.MAPPING_UPDATE ); @@ -244,6 +250,7 @@ protected Object simulateMethod(Method method, Object[] args) { InferenceAction.Request request = (InferenceAction.Request) args[1]; assertThat(request.getTaskType(), equalTo(TaskType.ANY)); assertThat(request.getInputType(), equalTo(InputType.SEARCH)); + assertThat(request.getInferenceEntityId(), equalTo(useSearchInferenceId ? SEARCH_INFERENCE_ID : INFERENCE_ID)); List input = request.getInput(); assertThat(input.size(), equalTo(1)); diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml index 932ee4854f445..2070b3752791a 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml @@ -18,6 +18,21 @@ setup: } } + - do: + inference.put: + task_type: sparse_embedding + inference_id: sparse-inference-id-2 + body: > + { + "service": "test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + } + } + - do: inference.put: task_type: text_embedding @@ -35,6 +50,23 @@ setup: } } + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id-2 + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 10, + "api_key": "abc64", + "similarity": "COSINE" + }, + "task_settings": { + } + } + - do: indices.create: index: test-sparse-index @@ -142,6 +174,51 @@ setup: - match: { hits.hits.0._id: "doc_1" } - length: { hits.hits.0._source.inference_field.inference.chunks: 1 } +--- +"Query using a sparse embedding model via a search inference ID": + - requires: + cluster_features: "semantic_text.search_inference_id" + reason: search_inference_id introduced in 8.16.0 + + - skip: + features: [ "headers", "close_to" ] + + - do: + indices.put_mapping: + index: test-sparse-index + body: + properties: + inference_field: + type: semantic_text + inference_id: sparse-inference-id + search_inference_id: sparse-inference-id-2 + + - do: + index: + index: test-sparse-index + id: doc_1 + body: + inference_field: [ "inference test", "another inference test" ] + non_inference_field: "non inference test" + refresh: true + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-sparse-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 3.7837332e17, error: 1e10 } } + - length: { hits.hits.0._source.inference_field.inference.chunks: 2 } + --- "Query using a dense embedding model": - skip: @@ -286,6 +363,51 @@ setup: - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } - length: { hits.hits.0._source.inference_field.inference.chunks: 2 } +--- +"Query using a dense embedding model via a search inference ID": + - requires: + cluster_features: "semantic_text.search_inference_id" + reason: search_inference_id introduced in 8.16.0 + + - skip: + features: [ "headers", "close_to" ] + + - do: + indices.put_mapping: + index: test-dense-index + body: + properties: + inference_field: + type: semantic_text + inference_id: dense-inference-id + search_inference_id: dense-inference-id-2 + + - do: + index: + index: test-dense-index + id: doc_1 + body: + inference_field: ["inference test", "another inference test"] + non_inference_field: "non inference test" + refresh: true + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-dense-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } } + - length: { hits.hits.0._source.inference_field.inference.chunks: 2 } + --- "Apply boost and query name": - skip: @@ -581,3 +703,139 @@ setup: - match: { error.type: "resource_not_found_exception" } - match: { error.reason: "Inference endpoint not found [invalid-inference-id]" } + +--- +"Query a field with a search inference ID that uses the wrong task type": + - requires: + cluster_features: "semantic_text.search_inference_id" + reason: search_inference_id introduced in 8.16.0 + + - do: + indices.put_mapping: + index: test-sparse-index + body: + properties: + inference_field: + type: semantic_text + inference_id: sparse-inference-id + search_inference_id: dense-inference-id + + - do: + index: + index: test-sparse-index + id: doc_1 + body: + inference_field: [ "inference test", "another inference test" ] + non_inference_field: "non inference test" + refresh: true + + - do: + catch: bad_request + search: + index: test-sparse-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + + - match: { error.caused_by.type: "illegal_argument_exception" } + - match: { error.caused_by.reason: "Field [inference_field] expected query inference results to be of type + [text_expansion_result], got [text_embedding_result]. Is the search inference + endpoint [dense-inference-id] compatible with the inference endpoint + [sparse-inference-id]?" } + +--- +"Query a field with a search inference ID that uses the wrong dimension count": + - requires: + cluster_features: "semantic_text.search_inference_id" + reason: search_inference_id introduced in 8.16.0 + + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id-20-dims + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 20, + "api_key": "abc64", + "similarity": "COSINE" + }, + "task_settings": { + } + } + + - do: + indices.put_mapping: + index: test-dense-index + body: + properties: + inference_field: + type: semantic_text + inference_id: dense-inference-id + search_inference_id: dense-inference-id-20-dims + + - do: + index: + index: test-dense-index + id: doc_1 + body: + inference_field: ["inference test", "another inference test"] + non_inference_field: "non inference test" + refresh: true + + - do: + catch: bad_request + search: + index: test-dense-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + + - match: { error.caused_by.type: "illegal_argument_exception" } + - match: { error.caused_by.reason: "Field [inference_field] expected query inference results with 10 dimensions, got + 20 dimensions. Is the search inference endpoint [dense-inference-id-20-dims] + compatible with the inference endpoint [dense-inference-id]?" } + +--- +"Query a field with an invalid search inference ID": + - requires: + cluster_features: "semantic_text.search_inference_id" + reason: search_inference_id introduced in 8.16.0 + + - do: + indices.put_mapping: + index: test-dense-index + body: + properties: + inference_field: + type: semantic_text + inference_id: dense-inference-id + search_inference_id: invalid-inference-id + + - do: + index: + index: test-dense-index + id: doc_1 + body: + inference_field: [ "inference test", "another inference test" ] + non_inference_field: "non inference test" + refresh: true + + - do: + catch: missing + search: + index: test-dense-index + body: + query: + semantic: + field: "inference_field" + query: "inference test" + + - match: { error.type: "resource_not_found_exception" } + - match: { error.reason: "Inference endpoint not found [invalid-inference-id]" } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/50_semantic_text_query_inference_endpoint_changes.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/50_semantic_text_query_inference_endpoint_changes.yml index f6a7073914609..51595d40737a3 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/50_semantic_text_query_inference_endpoint_changes.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/50_semantic_text_query_inference_endpoint_changes.yml @@ -112,8 +112,8 @@ setup: - match: { error.caused_by.type: "illegal_argument_exception" } - match: { error.caused_by.reason: "Field [inference_field] expected query inference results to be of type - [text_expansion_result], got [text_embedding_result]. Has the inference endpoint - configuration changed?" } + [text_expansion_result], got [text_embedding_result]. Has the configuration for + inference endpoint [sparse-inference-id] changed?" } --- "text_embedding changed to sparse_embedding": @@ -149,8 +149,8 @@ setup: - match: { error.caused_by.type: "illegal_argument_exception" } - match: { error.caused_by.reason: "Field [inference_field] expected query inference results to be of type - [text_embedding_result], got [text_expansion_result]. Has the inference endpoint - configuration changed?" } + [text_embedding_result], got [text_expansion_result]. Has the configuration for + inference endpoint [dense-inference-id] changed?" } --- "text_embedding dimension count changed": @@ -188,4 +188,5 @@ setup: - match: { error.caused_by.type: "illegal_argument_exception" } - match: { error.caused_by.reason: "Field [inference_field] expected query inference results with 10 dimensions, got - 20 dimensions. Has the inference endpoint configuration changed?" } + 20 dimensions. Has the configuration for inference endpoint [dense-inference-id] + changed?" } From 5ef062cea05ad513ee55f1b5bc50ddff724fa986 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 25 Sep 2024 09:27:47 +0100 Subject: [PATCH 39/58] Make `UpdateSettingsClusterStateUpdateRequest` a record (#113484) No need to extend `IndicesClusterStateUpdateRequest`, this thing can be completely immutable. Backport of #113450 to 8.x --- .../MetadataUpdateSettingsServiceIT.java | 133 ++++++++++-------- .../put/TransportUpdateSettingsAction.java | 31 ++-- ...dateSettingsClusterStateUpdateRequest.java | 86 +++++------ .../MetadataUpdateSettingsService.java | 4 +- .../upgrades/SystemIndexMigrator.java | 15 +- ...TransportUpdateSecuritySettingsAction.java | 16 ++- .../TransportUpdateWatcherSettingsAction.java | 12 +- 7 files changed, 165 insertions(+), 132 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/metadata/MetadataUpdateSettingsServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/metadata/MetadataUpdateSettingsServiceIT.java index b3b7957801cd7..c1e68040e075b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/metadata/MetadataUpdateSettingsServiceIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/metadata/MetadataUpdateSettingsServiceIT.java @@ -28,6 +28,7 @@ import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Function; import static org.hamcrest.Matchers.equalTo; @@ -42,45 +43,58 @@ public void testThatNonDynamicSettingChangesTakeEffect() throws Exception { MetadataUpdateSettingsService metadataUpdateSettingsService = internalCluster().getCurrentMasterNodeInstance( MetadataUpdateSettingsService.class ); - UpdateSettingsClusterStateUpdateRequest request = new UpdateSettingsClusterStateUpdateRequest().ackTimeout(TimeValue.ZERO); - List indices = new ArrayList<>(); + List indicesList = new ArrayList<>(); for (IndicesService indicesService : internalCluster().getInstances(IndicesService.class)) { for (IndexService indexService : indicesService) { - indices.add(indexService.index()); + indicesList.add(indexService.index()); } } - request.indices(indices.toArray(Index.EMPTY_ARRAY)); - request.settings(Settings.builder().put("index.codec", "FastDecompressionCompressingStoredFieldsData").build()); + final var indices = indicesList.toArray(Index.EMPTY_ARRAY); + + final Function requestFactory = + onStaticSetting -> new UpdateSettingsClusterStateUpdateRequest( + TEST_REQUEST_TIMEOUT, + TimeValue.ZERO, + Settings.builder().put("index.codec", "FastDecompressionCompressingStoredFieldsData").build(), + UpdateSettingsClusterStateUpdateRequest.OnExisting.OVERWRITE, + onStaticSetting, + indices + ); // First make sure it fails if reopenShards is not set on the request: AtomicBoolean expectedFailureOccurred = new AtomicBoolean(false); - metadataUpdateSettingsService.updateSettings(request, new ActionListener<>() { - @Override - public void onResponse(AcknowledgedResponse acknowledgedResponse) { - fail("Should have failed updating a non-dynamic setting without reopenShards set to true"); - } + metadataUpdateSettingsService.updateSettings( + requestFactory.apply(UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REJECT), + new ActionListener<>() { + @Override + public void onResponse(AcknowledgedResponse acknowledgedResponse) { + fail("Should have failed updating a non-dynamic setting without reopenShards set to true"); + } - @Override - public void onFailure(Exception e) { - expectedFailureOccurred.set(true); + @Override + public void onFailure(Exception e) { + expectedFailureOccurred.set(true); + } } - }); + ); assertBusy(() -> assertThat(expectedFailureOccurred.get(), equalTo(true))); // Now we set reopenShards and expect it to work: - request.reopenShards(true); AtomicBoolean success = new AtomicBoolean(false); - metadataUpdateSettingsService.updateSettings(request, new ActionListener<>() { - @Override - public void onResponse(AcknowledgedResponse acknowledgedResponse) { - success.set(true); - } + metadataUpdateSettingsService.updateSettings( + requestFactory.apply(UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REOPEN_INDICES), + new ActionListener<>() { + @Override + public void onResponse(AcknowledgedResponse acknowledgedResponse) { + success.set(true); + } - @Override - public void onFailure(Exception e) { - fail(e); + @Override + public void onFailure(Exception e) { + fail(e); + } } - }); + ); assertBusy(() -> assertThat(success.get(), equalTo(true))); // Now we look into the IndexShard objects to make sure that the code was actually updated (vs just the setting): @@ -110,16 +124,23 @@ public void testThatNonDynamicSettingChangesDoNotUnncessesarilyCauseReopens() th MetadataUpdateSettingsService metadataUpdateSettingsService = internalCluster().getCurrentMasterNodeInstance( MetadataUpdateSettingsService.class ); - UpdateSettingsClusterStateUpdateRequest request = new UpdateSettingsClusterStateUpdateRequest().ackTimeout(TimeValue.ZERO); - List indices = new ArrayList<>(); + List indicesList = new ArrayList<>(); for (IndicesService indicesService : internalCluster().getInstances(IndicesService.class)) { for (IndexService indexService : indicesService) { - indices.add(indexService.index()); + indicesList.add(indexService.index()); } } - request.indices(indices.toArray(Index.EMPTY_ARRAY)); - request.settings(Settings.builder().put("index.codec", "FastDecompressionCompressingStoredFieldsData").build()); - request.reopenShards(true); + final var indices = indicesList.toArray(Index.EMPTY_ARRAY); + + final Function requestFactory = + settings -> new UpdateSettingsClusterStateUpdateRequest( + TEST_REQUEST_TIMEOUT, + TimeValue.ZERO, + settings.build(), + UpdateSettingsClusterStateUpdateRequest.OnExisting.OVERWRITE, + UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REOPEN_INDICES, + indices + ); ClusterService clusterService = internalCluster().getInstance(ClusterService.class); AtomicBoolean shardsUnassigned = new AtomicBoolean(false); @@ -142,47 +163,49 @@ public void testThatNonDynamicSettingChangesDoNotUnncessesarilyCauseReopens() th AtomicBoolean success = new AtomicBoolean(false); // Make the first request, just to set things up: - metadataUpdateSettingsService.updateSettings(request, new ActionListener<>() { - @Override - public void onResponse(AcknowledgedResponse acknowledgedResponse) { - success.set(true); - } + metadataUpdateSettingsService.updateSettings( + requestFactory.apply(Settings.builder().put("index.codec", "FastDecompressionCompressingStoredFieldsData")), + new ActionListener<>() { + @Override + public void onResponse(AcknowledgedResponse acknowledgedResponse) { + success.set(true); + } - @Override - public void onFailure(Exception e) { - fail(e); + @Override + public void onFailure(Exception e) { + fail(e); + } } - }); + ); assertBusy(() -> assertThat(success.get(), equalTo(true))); assertBusy(() -> assertThat(expectedSettingsChangeInClusterState.get(), equalTo(true))); assertThat(shardsUnassigned.get(), equalTo(true)); assertBusy(() -> assertThat(hasUnassignedShards(clusterService.state(), indexName), equalTo(false))); - // Same request, except now we'll also set the dynamic "index.max_result_window" setting: - request.settings( - Settings.builder() - .put("index.codec", "FastDecompressionCompressingStoredFieldsData") - .put("index.max_result_window", "1500") - .build() - ); success.set(false); expectedSettingsChangeInClusterState.set(false); shardsUnassigned.set(false); expectedSetting.set("index.max_result_window"); expectedSettingValue.set("1500"); // Making this request ought to add this new setting but not unassign the shards: - metadataUpdateSettingsService.updateSettings(request, new ActionListener<>() { - @Override - public void onResponse(AcknowledgedResponse acknowledgedResponse) { - success.set(true); - } + metadataUpdateSettingsService.updateSettings( + // Same request, except now we'll also set the dynamic "index.max_result_window" setting: + requestFactory.apply( + Settings.builder().put("index.codec", "FastDecompressionCompressingStoredFieldsData").put("index.max_result_window", "1500") + ), + new ActionListener<>() { + @Override + public void onResponse(AcknowledgedResponse acknowledgedResponse) { + success.set(true); + } - @Override - public void onFailure(Exception e) { - fail(e); + @Override + public void onFailure(Exception e) { + fail(e); + } } - }); + ); assertBusy(() -> assertThat(success.get(), equalTo(true))); assertBusy(() -> assertThat(expectedSettingsChangeInClusterState.get(), equalTo(true))); diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/settings/put/TransportUpdateSettingsAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/settings/put/TransportUpdateSettingsAction.java index 1d7c264065d6f..1e7f32641b86f 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/settings/put/TransportUpdateSettingsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/settings/put/TransportUpdateSettingsAction.java @@ -124,19 +124,24 @@ protected void masterOperation( return; } - UpdateSettingsClusterStateUpdateRequest clusterStateUpdateRequest = new UpdateSettingsClusterStateUpdateRequest().indices( - concreteIndices - ) - .settings(requestSettings) - .setPreserveExisting(request.isPreserveExisting()) - .reopenShards(request.reopen()) - .ackTimeout(request.ackTimeout()) - .masterNodeTimeout(request.masterNodeTimeout()); - - updateSettingsService.updateSettings(clusterStateUpdateRequest, listener.delegateResponse((l, e) -> { - logger.debug(() -> "failed to update settings on indices [" + Arrays.toString(concreteIndices) + "]", e); - l.onFailure(e); - })); + updateSettingsService.updateSettings( + new UpdateSettingsClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + requestSettings, + request.isPreserveExisting() + ? UpdateSettingsClusterStateUpdateRequest.OnExisting.PRESERVE + : UpdateSettingsClusterStateUpdateRequest.OnExisting.OVERWRITE, + request.reopen() + ? UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REOPEN_INDICES + : UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REJECT, + concreteIndices + ), + listener.delegateResponse((l, e) -> { + logger.debug(() -> "failed to update settings on indices [" + Arrays.toString(concreteIndices) + "]", e); + l.onFailure(e); + }) + ); } /** diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/settings/put/UpdateSettingsClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/settings/put/UpdateSettingsClusterStateUpdateRequest.java index 42a904c704bf3..fe8573da5fb68 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/settings/put/UpdateSettingsClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/settings/put/UpdateSettingsClusterStateUpdateRequest.java @@ -9,70 +9,60 @@ package org.elasticsearch.action.admin.indices.settings.put; -import org.elasticsearch.cluster.ack.IndicesClusterStateUpdateRequest; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.Index; -import java.util.Arrays; +import java.util.Objects; /** * Cluster state update request that allows to update settings for some indices */ -public class UpdateSettingsClusterStateUpdateRequest extends IndicesClusterStateUpdateRequest { - - private Settings settings; - - private boolean preserveExisting = false; - - private boolean reopenShards = false; - - /** - * Returns true iff the settings update should only add but not update settings. If the setting already exists - * it should not be overwritten by this update. The default is false - */ - public boolean isPreserveExisting() { - return preserveExisting; - } +public record UpdateSettingsClusterStateUpdateRequest( + TimeValue masterNodeTimeout, + TimeValue ackTimeout, + Settings settings, + OnExisting onExisting, + OnStaticSetting onStaticSetting, + Index... indices +) { /** - * Returns true if non-dynamic setting updates should go through, by automatically unassigning shards in the same cluster - * state change as the setting update. The shards will be automatically reassigned after the cluster state update is made. The - * default is false. + * Specifies the behaviour of an update-settings action on existing settings. */ - public boolean reopenShards() { - return reopenShards; - } + public enum OnExisting { + /** + * Update all the specified settings, overwriting any settings which already exist. This is the API default. + */ + OVERWRITE, - public UpdateSettingsClusterStateUpdateRequest reopenShards(boolean reopenShards) { - this.reopenShards = reopenShards; - return this; + /** + * Only add new settings, preserving the values of any settings which are already set and ignoring the new values specified in the + * request. + */ + PRESERVE } /** - * Iff set to true this settings update will only add settings not already set on an index. Existing settings remain - * unchanged. + * Specifies the behaviour of an update-settings action which is trying to adjust a non-dynamic setting. */ - public UpdateSettingsClusterStateUpdateRequest setPreserveExisting(boolean preserveExisting) { - this.preserveExisting = preserveExisting; - return this; - } + public enum OnStaticSetting { + /** + * Reject attempts to update non-dynamic settings on open indices. This is the API default. + */ + REJECT, - /** - * Returns the {@link Settings} to update - */ - public Settings settings() { - return settings; - } - - /** - * Sets the {@link Settings} to update - */ - public UpdateSettingsClusterStateUpdateRequest settings(Settings settings) { - this.settings = settings; - return this; + /** + * Automatically close and reopen the shards of any open indices when updating a non-dynamic setting, forcing the shard to + * reinitialize from scratch. + */ + REOPEN_INDICES } - @Override - public String toString() { - return Arrays.toString(indices()) + settings; + public UpdateSettingsClusterStateUpdateRequest { + Objects.requireNonNull(masterNodeTimeout); + Objects.requireNonNull(ackTimeout); + Objects.requireNonNull(settings); + Objects.requireNonNull(indices); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataUpdateSettingsService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataUpdateSettingsService.java index cee3b4c0bdac1..4fcbd4165423b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataUpdateSettingsService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataUpdateSettingsService.java @@ -176,7 +176,7 @@ ClusterState execute(ClusterState currentState) { } final Settings closedSettings = settingsForClosedIndices.build(); final Settings openSettings = settingsForOpenIndices.build(); - final boolean preserveExisting = request.isPreserveExisting(); + final boolean preserveExisting = request.onExisting() == UpdateSettingsClusterStateUpdateRequest.OnExisting.PRESERVE; RoutingTable.Builder routingTableBuilder = null; Metadata.Builder metadataBuilder = Metadata.builder(currentState.metadata()); @@ -199,7 +199,7 @@ ClusterState execute(ClusterState currentState) { } if (skippedSettings.isEmpty() == false && openIndices.isEmpty() == false) { - if (request.reopenShards()) { + if (request.onStaticSetting() == UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REOPEN_INDICES) { // We have non-dynamic settings and open indices. We will unassign all of the shards in these indices so that the new // changed settings are applied when the shards are re-assigned. routingTableBuilder = RoutingTable.builder( diff --git a/server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java b/server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java index 711ca08cd5df6..3e1d901a1f237 100644 --- a/server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java +++ b/server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java @@ -537,11 +537,18 @@ private CheckedBiConsumer, AcknowledgedResp */ private void setWriteBlock(Index index, boolean readOnlyValue, ActionListener listener) { final Settings readOnlySettings = Settings.builder().put(IndexMetadata.INDEX_BLOCKS_WRITE_SETTING.getKey(), readOnlyValue).build(); - UpdateSettingsClusterStateUpdateRequest updateSettingsRequest = new UpdateSettingsClusterStateUpdateRequest().indices( - new Index[] { index } - ).settings(readOnlySettings).setPreserveExisting(false).ackTimeout(TimeValue.ZERO); - metadataUpdateSettingsService.updateSettings(updateSettingsRequest, listener); + metadataUpdateSettingsService.updateSettings( + new UpdateSettingsClusterStateUpdateRequest( + TimeValue.MINUS_ONE, + TimeValue.ZERO, + readOnlySettings, + UpdateSettingsClusterStateUpdateRequest.OnExisting.OVERWRITE, + UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REJECT, + index + ), + listener + ); } private void reindex(SystemIndexMigrationInfo migrationInfo, ActionListener listener) { diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/settings/TransportUpdateSecuritySettingsAction.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/settings/TransportUpdateSecuritySettingsAction.java index 49f8846c36e1f..b924fe0d983bb 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/settings/TransportUpdateSecuritySettingsAction.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/settings/TransportUpdateSecuritySettingsAction.java @@ -119,8 +119,8 @@ protected void masterOperation( private Optional createUpdateSettingsRequest( String indexName, Settings settingsToUpdate, - TimeValue timeout, - TimeValue masterTimeout, + TimeValue ackTimeout, + TimeValue masterNodeTimeout, ClusterState state ) { if (settingsToUpdate.isEmpty()) { @@ -136,10 +136,14 @@ private Optional createUpdateSettingsRe } return Optional.of( - new UpdateSettingsClusterStateUpdateRequest().indices(new Index[] { writeIndex }) - .settings(settingsToUpdate) - .ackTimeout(timeout) - .masterNodeTimeout(masterTimeout) + new UpdateSettingsClusterStateUpdateRequest( + masterNodeTimeout, + ackTimeout, + settingsToUpdate, + UpdateSettingsClusterStateUpdateRequest.OnExisting.OVERWRITE, + UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REJECT, + writeIndex + ) ); } diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/transport/actions/TransportUpdateWatcherSettingsAction.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/transport/actions/TransportUpdateWatcherSettingsAction.java index 378ee642cf105..0407c2db63ac6 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/transport/actions/TransportUpdateWatcherSettingsAction.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/transport/actions/TransportUpdateWatcherSettingsAction.java @@ -24,7 +24,6 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.EsExecutors; -import org.elasticsearch.index.Index; import org.elasticsearch.injection.guice.Inject; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; @@ -91,9 +90,14 @@ protected void masterOperation( return; } final Settings newSettings = Settings.builder().loadFromMap(request.settings()).build(); - final UpdateSettingsClusterStateUpdateRequest clusterStateUpdateRequest = new UpdateSettingsClusterStateUpdateRequest().indices( - new Index[] { watcherIndexMd.getIndex() } - ).settings(newSettings).ackTimeout(request.ackTimeout()).masterNodeTimeout(request.masterNodeTimeout()); + final UpdateSettingsClusterStateUpdateRequest clusterStateUpdateRequest = new UpdateSettingsClusterStateUpdateRequest( + request.masterNodeTimeout(), + request.ackTimeout(), + newSettings, + UpdateSettingsClusterStateUpdateRequest.OnExisting.OVERWRITE, + UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REJECT, + watcherIndexMd.getIndex() + ); updateSettingsService.updateSettings(clusterStateUpdateRequest, new ActionListener<>() { @Override From 74785f31f9efdcfc0a9af64aa60cf32c4ac8ebbe Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 25 Sep 2024 13:00:33 +0200 Subject: [PATCH 40/58] Add initial synthetic source fallback logic (#112994) (#113519) Add initial code required to fallback synthetic source mode to stored source mode using an index settings provider. Note that the final version relies on a new index setting that determines source mode, which is currently controlled by `mode` mapping attribute in `_source` meta field mapper. Additionally index modes should not enforce synthetic source mode. --- x-pack/plugin/logsdb/build.gradle | 32 ++++++++++ x-pack/plugin/logsdb/qa/build.gradle | 9 +++ .../plugin/logsdb/qa/with-basic/build.gradle | 21 ++++++ .../xpack/logsdb/LogsdbRestIT.java | 51 +++++++++++++++ .../xpack/logsdb/LogsdbRestIT.java | 57 +++++++++++++++++ .../xpack/logsdb/LogsDBPlugin.java | 53 +++++++++++++++ .../SyntheticSourceIndexSettingsProvider.java | 61 ++++++++++++++++++ .../logsdb/SyntheticSourceLicenseService.java | 64 +++++++++++++++++++ .../SyntheticSourceLicenseServiceTests.java | 48 ++++++++++++++ 9 files changed, 396 insertions(+) create mode 100644 x-pack/plugin/logsdb/build.gradle create mode 100644 x-pack/plugin/logsdb/qa/build.gradle create mode 100644 x-pack/plugin/logsdb/qa/with-basic/build.gradle create mode 100644 x-pack/plugin/logsdb/qa/with-basic/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbRestIT.java create mode 100644 x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbRestIT.java create mode 100644 x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java create mode 100644 x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProvider.java create mode 100644 x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java create mode 100644 x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseServiceTests.java diff --git a/x-pack/plugin/logsdb/build.gradle b/x-pack/plugin/logsdb/build.gradle new file mode 100644 index 0000000000000..5b7e45a90149d --- /dev/null +++ b/x-pack/plugin/logsdb/build.gradle @@ -0,0 +1,32 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import org.elasticsearch.gradle.internal.info.BuildParams + +evaluationDependsOn(xpackModule('core')) + +apply plugin: 'elasticsearch.internal-es-plugin' +apply plugin: 'elasticsearch.internal-java-rest-test' + +esplugin { + name 'logsdb' + description 'A plugin for logsdb related functionality' + classname 'org.elasticsearch.xpack.logsdb.LogsDBPlugin' + extendedPlugins = ['x-pack-core'] +} +base { + archivesName = 'x-pack-logsdb' +} + +dependencies { + compileOnly project(path: xpackModule('core')) + testImplementation(testArtifact(project(xpackModule('core')))) +} + +tasks.named("javaRestTest").configure { + usesDefaultDistribution() +} diff --git a/x-pack/plugin/logsdb/qa/build.gradle b/x-pack/plugin/logsdb/qa/build.gradle new file mode 100644 index 0000000000000..0f98e90b4d52e --- /dev/null +++ b/x-pack/plugin/logsdb/qa/build.gradle @@ -0,0 +1,9 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + diff --git a/x-pack/plugin/logsdb/qa/with-basic/build.gradle b/x-pack/plugin/logsdb/qa/with-basic/build.gradle new file mode 100644 index 0000000000000..2fdeed338e1c1 --- /dev/null +++ b/x-pack/plugin/logsdb/qa/with-basic/build.gradle @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import org.elasticsearch.gradle.internal.info.BuildParams + +apply plugin: 'elasticsearch.internal-java-rest-test' + +dependencies { + javaRestTestImplementation(testArtifact(project(xpackModule('core')))) +} + +tasks.named("javaRestTest").configure { + // This test cluster is using a BASIC license and FIPS 140 mode is not supported in BASIC + BuildParams.withFipsEnabledOnly(it) + + usesDefaultDistribution() +} diff --git a/x-pack/plugin/logsdb/qa/with-basic/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbRestIT.java b/x-pack/plugin/logsdb/qa/with-basic/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbRestIT.java new file mode 100644 index 0000000000000..e7d267810424c --- /dev/null +++ b/x-pack/plugin/logsdb/qa/with-basic/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbRestIT.java @@ -0,0 +1,51 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.local.distribution.DistributionType; +import org.elasticsearch.test.rest.ESRestTestCase; +import org.hamcrest.Matchers; +import org.junit.ClassRule; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +public class LogsdbRestIT extends ESRestTestCase { + + @ClassRule + public static ElasticsearchCluster cluster = ElasticsearchCluster.local() + .distribution(DistributionType.DEFAULT) + .setting("xpack.license.self_generated.type", "basic") + .setting("xpack.security.enabled", "false") + .build(); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + + public void testFeatureUsageWithLogsdbIndex() throws IOException { + { + var response = getAsMap("/_license/feature_usage"); + @SuppressWarnings("unchecked") + List> features = (List>) response.get("features"); + assertThat(features, Matchers.empty()); + } + { + createIndex("test-index", Settings.builder().put("index.mode", "logsdb").build()); + var response = getAsMap("/_license/feature_usage"); + @SuppressWarnings("unchecked") + List> features = (List>) response.get("features"); + assertThat(features, Matchers.empty()); + } + } + +} diff --git a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbRestIT.java b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbRestIT.java new file mode 100644 index 0000000000000..efff6d0579838 --- /dev/null +++ b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbRestIT.java @@ -0,0 +1,57 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.local.distribution.DistributionType; +import org.elasticsearch.test.rest.ESRestTestCase; +import org.hamcrest.Matchers; +import org.junit.ClassRule; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import static org.hamcrest.Matchers.equalTo; + +public class LogsdbRestIT extends ESRestTestCase { + + @ClassRule + public static ElasticsearchCluster cluster = ElasticsearchCluster.local() + .distribution(DistributionType.DEFAULT) + .setting("xpack.security.enabled", "false") + .setting("xpack.license.self_generated.type", "trial") + .build(); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + + public void testFeatureUsageWithLogsdbIndex() throws IOException { + { + var response = getAsMap("/_license/feature_usage"); + @SuppressWarnings("unchecked") + List> features = (List>) response.get("features"); + assertThat(features, Matchers.empty()); + } + { + createIndex("test-index", Settings.builder().put("index.mode", "logsdb").build()); + var response = getAsMap("/_license/feature_usage"); + @SuppressWarnings("unchecked") + List> features = (List>) response.get("features"); + logger.info("response's features: {}", features); + assertThat(features, Matchers.not(Matchers.empty())); + Map feature = features.stream().filter(map -> "mappings".equals(map.get("family"))).findFirst().get(); + assertThat(feature.get("name"), equalTo("synthetic-source")); + assertThat(feature.get("license_level"), equalTo("enterprise")); + } + } + +} diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java new file mode 100644 index 0000000000000..e38f953be96a3 --- /dev/null +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java @@ -0,0 +1,53 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb; + +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexSettingProvider; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xpack.core.XPackPlugin; + +import java.util.Collection; +import java.util.List; + +import static org.elasticsearch.xpack.logsdb.SyntheticSourceLicenseService.FALLBACK_SETTING; + +public class LogsDBPlugin extends Plugin { + + private final Settings settings; + private final SyntheticSourceLicenseService licenseService; + + public LogsDBPlugin(Settings settings) { + this.settings = settings; + this.licenseService = new SyntheticSourceLicenseService(settings); + } + + @Override + public Collection createComponents(PluginServices services) { + licenseService.setLicenseState(XPackPlugin.getSharedLicenseState()); + var clusterSettings = services.clusterService().getClusterSettings(); + clusterSettings.addSettingsUpdateConsumer(FALLBACK_SETTING, licenseService::setSyntheticSourceFallback); + // Nothing to share here: + return super.createComponents(services); + } + + @Override + public Collection getAdditionalIndexSettingProviders(IndexSettingProvider.Parameters parameters) { + if (DiscoveryNode.isStateless(settings)) { + return List.of(); + } + return List.of(new SyntheticSourceIndexSettingsProvider(licenseService)); + } + + @Override + public List> getSettings() { + return List.of(FALLBACK_SETTING); + } +} diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProvider.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProvider.java new file mode 100644 index 0000000000000..5b7792de0622a --- /dev/null +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProvider.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.index.IndexSettingProvider; +import org.elasticsearch.index.IndexSettings; + +import java.time.Instant; +import java.util.List; +import java.util.Locale; + +/** + * An index setting provider that overwrites the source mode from synthetic to stored if synthetic source isn't allowed to be used. + */ +public class SyntheticSourceIndexSettingsProvider implements IndexSettingProvider { + + private static final Logger LOGGER = LogManager.getLogger(SyntheticSourceIndexSettingsProvider.class); + + private final SyntheticSourceLicenseService syntheticSourceLicenseService; + + public SyntheticSourceIndexSettingsProvider(SyntheticSourceLicenseService syntheticSourceLicenseService) { + this.syntheticSourceLicenseService = syntheticSourceLicenseService; + } + + @Override + public Settings getAdditionalIndexSettings( + String indexName, + String dataStreamName, + boolean isTimeSeries, + Metadata metadata, + Instant resolvedAt, + Settings indexTemplateAndCreateRequestSettings, + List combinedTemplateMappings + ) { + if (newIndexHasSyntheticSourceUsage(indexTemplateAndCreateRequestSettings) + && syntheticSourceLicenseService.fallbackToStoredSource()) { + LOGGER.debug("creation of index [{}] with synthetic source without it being allowed", indexName); + // TODO: handle falling back to stored source + } + return Settings.EMPTY; + } + + boolean newIndexHasSyntheticSourceUsage(Settings indexTemplateAndCreateRequestSettings) { + // TODO: build tmp MapperService and check whether SourceFieldMapper#isSynthetic() to determine synthetic source usage. + // Not using IndexSettings.MODE.get() to avoid validation that may fail at this point. + var rawIndexMode = indexTemplateAndCreateRequestSettings.get(IndexSettings.MODE.getKey()); + IndexMode indexMode = rawIndexMode != null ? Enum.valueOf(IndexMode.class, rawIndexMode.toUpperCase(Locale.ROOT)) : null; + return indexMode != null && indexMode.isSyntheticSourceEnabled(); + } +} diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java new file mode 100644 index 0000000000000..4e3e916762fab --- /dev/null +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java @@ -0,0 +1,64 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb; + +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.license.License; +import org.elasticsearch.license.LicensedFeature; +import org.elasticsearch.license.XPackLicenseState; + +/** + * Determines based on license and fallback setting whether synthetic source usages should fallback to stored source. + */ +public final class SyntheticSourceLicenseService { + + private static final String MAPPINGS_FEATURE_FAMILY = "mappings"; + + /** + * A setting that determines whether source mode should always be stored source. Regardless of licence. + */ + public static final Setting FALLBACK_SETTING = Setting.boolSetting( + "xpack.mapping.synthetic_source_fallback_to_stored_source", + false, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + + private static final LicensedFeature.Momentary SYNTHETIC_SOURCE_FEATURE = LicensedFeature.momentary( + MAPPINGS_FEATURE_FAMILY, + "synthetic-source", + License.OperationMode.ENTERPRISE + ); + + private XPackLicenseState licenseState; + private volatile boolean syntheticSourceFallback; + + public SyntheticSourceLicenseService(Settings settings) { + syntheticSourceFallback = FALLBACK_SETTING.get(settings); + } + + /** + * @return whether synthetic source mode should fallback to stored source. + */ + public boolean fallbackToStoredSource() { + if (syntheticSourceFallback) { + return true; + } + + return SYNTHETIC_SOURCE_FEATURE.check(licenseState) == false; + } + + void setSyntheticSourceFallback(boolean syntheticSourceFallback) { + this.syntheticSourceFallback = syntheticSourceFallback; + } + + void setLicenseState(XPackLicenseState licenseState) { + this.licenseState = licenseState; + } +} diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseServiceTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseServiceTests.java new file mode 100644 index 0000000000000..2ca3a8d57f2eb --- /dev/null +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseServiceTests.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.license.MockLicenseState; +import org.elasticsearch.test.ESTestCase; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class SyntheticSourceLicenseServiceTests extends ESTestCase { + + public void testLicenseAllowsSyntheticSource() { + MockLicenseState licenseState = mock(MockLicenseState.class); + when(licenseState.isAllowed(any())).thenReturn(true); + var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + licenseService.setLicenseState(licenseState); + assertFalse("synthetic source is allowed, so not fallback to stored source", licenseService.fallbackToStoredSource()); + } + + public void testDefaultDisallow() { + MockLicenseState licenseState = mock(MockLicenseState.class); + when(licenseState.isAllowed(any())).thenReturn(false); + var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + licenseService.setLicenseState(licenseState); + assertTrue("synthetic source is not allowed, so fallback to stored source", licenseService.fallbackToStoredSource()); + } + + public void testFallback() { + MockLicenseState licenseState = mock(MockLicenseState.class); + when(licenseState.isAllowed(any())).thenReturn(true); + var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + licenseService.setLicenseState(licenseState); + licenseService.setSyntheticSourceFallback(true); + assertTrue( + "synthetic source is allowed, but fallback has been enabled, so fallback to stored source", + licenseService.fallbackToStoredSource() + ); + } + +} From cd18cf976c276ea25916007e6dfe4fa4c126de8a Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 25 Sep 2024 12:23:36 +0100 Subject: [PATCH 41/58] Assert `NodeClient` in client wrapper (#113515) (#113521) Some `getClientWrapper()` implementations return a wrapper that only wraps `NodeClient` instances. In practice we _only_ wrap `NodeClient` instances so this check is redundant, and in a recent investigation it was confusing to readers. With this commit we assert that we're always wrapping a `NodeClient`. --- .../xpack/idp/saml/test/IdentityProviderIntegTestCase.java | 2 +- .../xpack/ml/integration/MlNativeIntegTestCase.java | 3 ++- .../java/org/elasticsearch/test/SecurityIntegTestCase.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/identity-provider/src/internalClusterTest/java/org/elasticsearch/xpack/idp/saml/test/IdentityProviderIntegTestCase.java b/x-pack/plugin/identity-provider/src/internalClusterTest/java/org/elasticsearch/xpack/idp/saml/test/IdentityProviderIntegTestCase.java index f02ccae7b8f29..60f95f2e56fd2 100644 --- a/x-pack/plugin/identity-provider/src/internalClusterTest/java/org/elasticsearch/xpack/idp/saml/test/IdentityProviderIntegTestCase.java +++ b/x-pack/plugin/identity-provider/src/internalClusterTest/java/org/elasticsearch/xpack/idp/saml/test/IdentityProviderIntegTestCase.java @@ -158,7 +158,7 @@ protected Function getClientWrapper() { // user. This is ok for internal n2n stuff but the test framework does other things like wiping indices, repositories, etc // that the system user cannot do. so we wrap the node client with a user that can do these things since the client() calls // return a node client - return client -> (client instanceof NodeClient) ? client.filterWithHeader(headers) : client; + return client -> asInstanceOf(NodeClient.class, client).filterWithHeader(headers); } @Override diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlNativeIntegTestCase.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlNativeIntegTestCase.java index 3b705e63a145f..d18b6b6cf9ab6 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlNativeIntegTestCase.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlNativeIntegTestCase.java @@ -18,6 +18,7 @@ import org.elasticsearch.action.support.broadcast.BroadcastResponse; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.cluster.ClusterModule; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.NamedDiff; @@ -172,7 +173,7 @@ protected Function getClientWrapper() { // user. This is ok for internal n2n stuff but the test framework does other things like wiping indices, repositories, etc // that the system user cannot do. so we wrap the node client with a user that can do these things since the client() calls // return a node client - return client -> client.filterWithHeader(headers); + return client -> asInstanceOf(NodeClient.class, client).filterWithHeader(headers); } private Settings externalClusterClientSettings() { diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/test/SecurityIntegTestCase.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/test/SecurityIntegTestCase.java index 3a39d54567726..29b8037de5a66 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/test/SecurityIntegTestCase.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/test/SecurityIntegTestCase.java @@ -379,7 +379,7 @@ protected Function getClientWrapper() { // user. This is ok for internal n2n stuff but the test framework does other things like wiping indices, repositories, etc // that the system user cannot do. so we wrap the node client with a user that can do these things since the client() calls // return a node client - return client -> (client instanceof NodeClient) ? client.filterWithHeader(headers) : client; + return client -> asInstanceOf(NodeClient.class, client).filterWithHeader(headers); } /** From b80aed6cdae02da465cb29be5dd66933765d9c46 Mon Sep 17 00:00:00 2001 From: Mary Gouseti Date: Wed, 25 Sep 2024 14:38:30 +0300 Subject: [PATCH 42/58] Add template builder (#113444) (#113512) Since we are enriching the component templates with more entries such as the data stream lifecycle and in the future the data stream options, we add a template builder to help with the code, especially tests. To highlight the value and prepare for the PRs that will add the data stream options to the template we replace calls to the constructor with all arguments by the builder: - when there are aguements with null values, or - when we copy another template and change only a few fields. This prepares the ground, so when we add data stream options, we will not need to edit all these places. (cherry picked from commit 3d7904bee3346be12412b02c194b959877e6b440) --- .../datastreams/DataStreamAutoshardingIT.java | 2 +- .../datastreams/DataStreamIT.java | 8 ++- .../ResolveClusterDataStreamIT.java | 2 +- .../CrudSystemDataStreamLifecycleIT.java | 10 ++-- .../DataStreamLifecycleServiceIT.java | 23 ++++---- .../ExplainDataStreamLifecycleIT.java | 7 ++- .../DataStreamLifecycleFixtures.java | 7 ++- .../TransportPutComponentTemplateAction.java | 2 +- .../metadata/ComposableIndexTemplate.java | 5 ++ .../MetadataIndexTemplateService.java | 19 ++---- .../cluster/metadata/Template.java | 58 +++++++++++++++++++ .../action/bulk/BulkOperationTests.java | 3 - .../metadata/ComponentTemplateTests.java | 30 ++-------- .../ComposableIndexTemplateTests.java | 27 ++++++--- ...amLifecycleWithRetentionWarningsTests.java | 34 +++++------ .../MetadataIndexTemplateServiceTests.java | 15 ++--- .../downsample/DataStreamLifecycleDriver.java | 2 +- .../xpack/ilm/TimeSeriesDataStreamsIT.java | 2 +- ...ataStreamAndIndexLifecycleMixingTests.java | 7 ++- ...adataMigrateToDataTiersRoutingService.java | 14 +---- ...StreamLifecycleDownsamplingSecurityIT.java | 4 +- ...reamLifecycleServiceRuntimeSecurityIT.java | 9 ++- 22 files changed, 171 insertions(+), 119 deletions(-) diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamAutoshardingIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamAutoshardingIT.java index dd6ed04f20378..ac73385a97d70 100644 --- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamAutoshardingIT.java +++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamAutoshardingIT.java @@ -502,7 +502,7 @@ static void putComposableIndexTemplate(String id, List patterns, @Nullab request.indexTemplate( ComposableIndexTemplate.builder() .indexPatterns(patterns) - .template(new Template(settings, null, null, null)) + .template(Template.builder().settings(settings)) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .build() ); diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamIT.java index d212bd336f413..ff3dd2737f408 100644 --- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamIT.java +++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/DataStreamIT.java @@ -2441,7 +2441,13 @@ static void putComposableIndexTemplate( request.indexTemplate( ComposableIndexTemplate.builder() .indexPatterns(patterns) - .template(new Template(settings, mappings == null ? null : CompressedXContent.fromJSON(mappings), aliases, lifecycle)) + .template( + Template.builder() + .settings(settings) + .mappings(mappings == null ? null : CompressedXContent.fromJSON(mappings)) + .aliases(aliases) + .lifecycle(lifecycle) + ) .metadata(metadata) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate(false, false, withFailureStore)) .build() diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/ResolveClusterDataStreamIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/ResolveClusterDataStreamIT.java index 59a8991e28195..4c85958498da0 100644 --- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/ResolveClusterDataStreamIT.java +++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/ResolveClusterDataStreamIT.java @@ -453,7 +453,7 @@ void putComposableIndexTemplate(Client client, String id, List patterns, request.indexTemplate( ComposableIndexTemplate.builder() .indexPatterns(patterns) - .template(new Template(null, null, aliases, null)) + .template(Template.builder().aliases(aliases)) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .build() ); diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/lifecycle/CrudSystemDataStreamLifecycleIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/lifecycle/CrudSystemDataStreamLifecycleIT.java index 3eb7ab7a55494..dd3f1e74d4f4e 100644 --- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/lifecycle/CrudSystemDataStreamLifecycleIT.java +++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/lifecycle/CrudSystemDataStreamLifecycleIT.java @@ -201,12 +201,10 @@ public Collection getSystemDataStreamDescriptors() { ComposableIndexTemplate.builder() .indexPatterns(List.of(".test-data-stream")) .template( - new Template( - Settings.EMPTY, - mappings, - null, - DataStreamLifecycle.newBuilder().dataRetention(randomMillisUpToYear9999()).build() - ) + Template.builder() + .settings(Settings.EMPTY) + .mappings(mappings) + .lifecycle(DataStreamLifecycle.newBuilder().dataRetention(randomMillisUpToYear9999()).build()) ) .dataStreamTemplate(new DataStreamTemplate()) .build(), diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleServiceIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleServiceIT.java index 1168bbc904c40..89c440f5edf8b 100644 --- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleServiceIT.java +++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleServiceIT.java @@ -345,7 +345,7 @@ public void testOriginationDate() throws Exception { request.indexTemplate( ComposableIndexTemplate.builder() .indexPatterns(List.of("index_*")) - .template(new Template(null, CompressedXContent.fromJSON(mapping), null, null)) + .template(Template.builder().mappings(CompressedXContent.fromJSON(mapping))) .build() ); client().execute(TransportPutComposableIndexTemplateAction.TYPE, request).actionGet(); @@ -1221,7 +1221,12 @@ static void putComposableIndexTemplate( request.indexTemplate( ComposableIndexTemplate.builder() .indexPatterns(patterns) - .template(new Template(settings, mappings == null ? null : CompressedXContent.fromJSON(mappings), null, lifecycle)) + .template( + Template.builder() + .settings(settings) + .mappings(mappings == null ? null : CompressedXContent.fromJSON(mappings)) + .lifecycle(lifecycle) + ) .metadata(metadata) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate(false, false, withFailureStore)) .build() @@ -1268,14 +1273,12 @@ public Collection getSystemDataStreamDescriptors() { .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .indexPatterns(List.of(DataStream.BACKING_INDEX_PREFIX + SYSTEM_DATA_STREAM_NAME + "*")) .template( - new Template( - Settings.EMPTY, - null, - null, - DataStreamLifecycle.newBuilder() - .dataRetention(TimeValue.timeValueDays(SYSTEM_DATA_STREAM_RETENTION_DAYS)) - .build() - ) + Template.builder() + .settings(Settings.EMPTY) + .lifecycle( + DataStreamLifecycle.newBuilder() + .dataRetention(TimeValue.timeValueDays(SYSTEM_DATA_STREAM_RETENTION_DAYS)) + ) ) .build(), Map.of(), diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/lifecycle/ExplainDataStreamLifecycleIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/lifecycle/ExplainDataStreamLifecycleIT.java index 48cb0321675a6..3c100d9cfe615 100644 --- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/lifecycle/ExplainDataStreamLifecycleIT.java +++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/lifecycle/ExplainDataStreamLifecycleIT.java @@ -445,7 +445,12 @@ static void putComposableIndexTemplate( request.indexTemplate( ComposableIndexTemplate.builder() .indexPatterns(patterns) - .template(new Template(settings, mappings == null ? null : CompressedXContent.fromJSON(mappings), null, lifecycle)) + .template( + Template.builder() + .settings(settings) + .mappings(mappings == null ? null : CompressedXContent.fromJSON(mappings)) + .lifecycle(lifecycle) + ) .metadata(metadata) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .build() diff --git a/modules/data-streams/src/test/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleFixtures.java b/modules/data-streams/src/test/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleFixtures.java index e94385b2b6409..0ab105a467ab3 100644 --- a/modules/data-streams/src/test/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleFixtures.java +++ b/modules/data-streams/src/test/java/org/elasticsearch/datastreams/lifecycle/DataStreamLifecycleFixtures.java @@ -114,7 +114,12 @@ static void putComposableIndexTemplate( request.indexTemplate( ComposableIndexTemplate.builder() .indexPatterns(patterns) - .template(new Template(settings, mappings == null ? null : CompressedXContent.fromJSON(mappings), null, lifecycle)) + .template( + Template.builder() + .settings(settings) + .mappings(mappings == null ? null : CompressedXContent.fromJSON(mappings)) + .lifecycle(lifecycle) + ) .metadata(metadata) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .build() diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/template/put/TransportPutComponentTemplateAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/template/put/TransportPutComponentTemplateAction.java index 4a8a114aa7438..fb5267ba87b75 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/template/put/TransportPutComponentTemplateAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/template/put/TransportPutComponentTemplateAction.java @@ -78,7 +78,7 @@ public static ComponentTemplate normalizeComponentTemplate( Settings.Builder builder = Settings.builder().put(template.settings()).normalizePrefix(IndexMetadata.INDEX_SETTING_PREFIX); Settings settings = builder.build(); indexScopedSettings.validate(settings, true); - template = new Template(settings, template.mappings(), template.aliases(), template.lifecycle()); + template = Template.builder(template).settings(settings).build(); componentTemplate = new ComponentTemplate( template, componentTemplate.version(), diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/ComposableIndexTemplate.java b/server/src/main/java/org/elasticsearch/cluster/metadata/ComposableIndexTemplate.java index ac3e85777f8fb..6d1a874e1c72b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/ComposableIndexTemplate.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/ComposableIndexTemplate.java @@ -537,6 +537,11 @@ public Builder template(Template template) { return this; } + public Builder template(Template.Builder template) { + this.template = template.build(); + return this; + } + public Builder componentTemplates(List componentTemplates) { this.componentTemplates = componentTemplates; return this; diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java index 9888059af9686..1f9f6f636c1cf 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateService.java @@ -693,7 +693,7 @@ public static Map> v2TemplateOverlaps( private void validateIndexTemplateV2(String name, ComposableIndexTemplate indexTemplate, ClusterState currentState) { // Workaround for the fact that start_time and end_time are injected by the MetadataCreateDataStreamService upon creation, // but when validating templates that create data streams the MetadataCreateDataStreamService isn't used. - var finalTemplate = Optional.ofNullable(indexTemplate.template()); + var finalTemplate = indexTemplate.template(); var finalSettings = Settings.builder(); final var now = Instant.now(); final var metadata = currentState.getMetadata(); @@ -717,18 +717,11 @@ private void validateIndexTemplateV2(String name, ComposableIndexTemplate indexT // Then apply setting from component templates: finalSettings.put(combinedSettings); // Then finally apply settings resolved from index template: - finalSettings.put(finalTemplate.map(Template::settings).orElse(Settings.EMPTY)); - - var templateToValidate = indexTemplate.toBuilder() - .template( - new Template( - finalSettings.build(), - finalTemplate.map(Template::mappings).orElse(null), - finalTemplate.map(Template::aliases).orElse(null), - finalTemplate.map(Template::lifecycle).orElse(null) - ) - ) - .build(); + if (finalTemplate != null && finalTemplate.settings() != null) { + finalSettings.put(finalTemplate.settings()); + } + + var templateToValidate = indexTemplate.toBuilder().template(Template.builder(finalTemplate).settings(finalSettings)).build(); validate(name, templateToValidate); validateDataStreamsStillReferenced(currentState, name, templateToValidate); diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/Template.java b/server/src/main/java/org/elasticsearch/cluster/metadata/Template.java index 3b8b89eb84a67..0a9e79284ced6 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/Template.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/Template.java @@ -291,4 +291,62 @@ static boolean mappingsEquals(CompressedXContent m1, CompressedXContent m2) { ); return Maps.deepEquals(thisUncompressedMapping, otherUncompressedMapping); } + + public static Builder builder() { + return new Builder(); + } + + public static Builder builder(@Nullable Template template) { + return template == null ? new Builder() : new Builder(template); + } + + public static class Builder { + private Settings settings = null; + private CompressedXContent mappings = null; + private Map aliases = null; + private DataStreamLifecycle lifecycle = null; + + private Builder() {} + + private Builder(Template template) { + settings = template.settings; + mappings = template.mappings; + aliases = template.aliases; + lifecycle = template.lifecycle; + } + + public Builder settings(Settings settings) { + this.settings = settings; + return this; + } + + public Builder settings(Settings.Builder settings) { + this.settings = settings.build(); + return this; + } + + public Builder mappings(CompressedXContent mappings) { + this.mappings = mappings; + return this; + } + + public Builder aliases(Map aliases) { + this.aliases = aliases; + return this; + } + + public Builder lifecycle(DataStreamLifecycle lifecycle) { + this.lifecycle = lifecycle; + return this; + } + + public Builder lifecycle(DataStreamLifecycle.Builder lifecycle) { + this.lifecycle = lifecycle.build(); + return this; + } + + public Template build() { + return new Template(settings, mappings, aliases, lifecycle); + } + } } diff --git a/server/src/test/java/org/elasticsearch/action/bulk/BulkOperationTests.java b/server/src/test/java/org/elasticsearch/action/bulk/BulkOperationTests.java index 5a71473e9b0ed..3be942bcd291e 100644 --- a/server/src/test/java/org/elasticsearch/action/bulk/BulkOperationTests.java +++ b/server/src/test/java/org/elasticsearch/action/bulk/BulkOperationTests.java @@ -36,7 +36,6 @@ import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.Metadata; -import org.elasticsearch.cluster.metadata.Template; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; @@ -147,13 +146,11 @@ public class BulkOperationTests extends ESTestCase { ComposableIndexTemplate.builder() .indexPatterns(List.of(dataStreamName)) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate(false, false, false)) - .template(new Template(null, null, null, null)) .build(), "ds-template-with-failure-store", ComposableIndexTemplate.builder() .indexPatterns(List.of(fsDataStreamName, fsRolloverDataStreamName)) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate(false, false, true)) - .template(new Template(null, null, null, null)) .build() ) ) diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/ComponentTemplateTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/ComponentTemplateTests.java index dd78b599bdb5a..e20788e341b7e 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/ComponentTemplateTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/ComponentTemplateTests.java @@ -154,45 +154,27 @@ public static ComponentTemplate mutateTemplate(ComponentTemplate orig) { Template ot = orig.template(); yield switch (randomIntBetween(0, 3)) { case 0 -> new ComponentTemplate( - new Template( - randomValueOtherThan(ot.settings(), ComponentTemplateTests::randomSettings), - ot.mappings(), - ot.aliases(), - ot.lifecycle() - ), + Template.builder(ot).settings(randomValueOtherThan(ot.settings(), ComponentTemplateTests::randomSettings)).build(), orig.version(), orig.metadata(), orig.deprecated() ); case 1 -> new ComponentTemplate( - new Template( - ot.settings(), - randomValueOtherThan(ot.mappings(), ComponentTemplateTests::randomMappings), - ot.aliases(), - ot.lifecycle() - ), + Template.builder(ot).mappings(randomValueOtherThan(ot.mappings(), ComponentTemplateTests::randomMappings)).build(), orig.version(), orig.metadata(), orig.deprecated() ); case 2 -> new ComponentTemplate( - new Template( - ot.settings(), - ot.mappings(), - randomValueOtherThan(ot.aliases(), ComponentTemplateTests::randomAliases), - ot.lifecycle() - ), + Template.builder(ot).aliases(randomValueOtherThan(ot.aliases(), ComponentTemplateTests::randomAliases)).build(), orig.version(), orig.metadata(), orig.deprecated() ); case 3 -> new ComponentTemplate( - new Template( - ot.settings(), - ot.mappings(), - ot.aliases(), - randomValueOtherThan(ot.lifecycle(), DataStreamLifecycleTests::randomLifecycle) - ), + Template.builder(ot) + .lifecycle(randomValueOtherThan(ot.lifecycle(), DataStreamLifecycleTests::randomLifecycle)) + .build(), orig.version(), orig.metadata(), orig.deprecated() diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/ComposableIndexTemplateTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/ComposableIndexTemplateTests.java index 2cc5f509c3164..daa303440bcf4 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/ComposableIndexTemplateTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/ComposableIndexTemplateTests.java @@ -60,23 +60,23 @@ protected ComposableIndexTemplate createTestInstance() { } public static ComposableIndexTemplate randomInstance() { - Settings settings = null; - CompressedXContent mappings = null; - Map aliases = null; Template template = null; ComposableIndexTemplate.DataStreamTemplate dataStreamTemplate = randomDataStreamTemplate(); - + Template.Builder builder = Template.builder(); if (dataStreamTemplate != null || randomBoolean()) { if (randomBoolean()) { - settings = randomSettings(); + builder.settings(randomSettings()); } if (dataStreamTemplate != null || randomBoolean()) { - mappings = randomMappings(dataStreamTemplate); + builder.mappings(randomMappings(dataStreamTemplate)); } if (dataStreamTemplate == null && randomBoolean()) { - aliases = randomAliases(); + builder.aliases(randomAliases()); } - template = new Template(settings, mappings, aliases); + if (dataStreamTemplate != null && randomBoolean()) { + builder.lifecycle(DataStreamLifecycleTests.randomLifecycle()); + } + template = builder.build(); } Map meta = null; @@ -169,7 +169,12 @@ public static ComposableIndexTemplate mutateTemplate(ComposableIndexTemplate ori .template( randomValueOtherThan( orig.template(), - () -> new Template(randomSettings(), randomMappings(orig.getDataStreamTemplate()), randomAliases()) + () -> Template.builder() + .settings(randomSettings()) + .mappings(randomMappings(orig.getDataStreamTemplate())) + .aliases(randomAliases()) + .lifecycle(orig.getDataStreamTemplate() == null ? null : DataStreamLifecycleTests.randomLifecycle()) + .build() ) ) .build(); @@ -261,5 +266,9 @@ public void testXContentSerializationWithRolloverAndEffectiveRetention() throws public void testBuilderRoundtrip() { ComposableIndexTemplate template = randomInstance(); assertEquals(template, template.toBuilder().build()); + + if (template.template() != null) { + assertEquals(template.template(), Template.builder(template.template()).build()); + } } } diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamLifecycleWithRetentionWarningsTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamLifecycleWithRetentionWarningsTests.java index 8d31904a88079..d7f10f484165b 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamLifecycleWithRetentionWarningsTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/DataStreamLifecycleWithRetentionWarningsTests.java @@ -171,7 +171,7 @@ public void testValidateLifecycleIndexTemplateWithWarning() { Metadata.builder().build(), randomAlphaOfLength(10), ComposableIndexTemplate.builder() - .template(new Template(null, null, null, DataStreamLifecycle.DEFAULT)) + .template(Template.builder().lifecycle(DataStreamLifecycle.DEFAULT)) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .indexPatterns(List.of(randomAlphaOfLength(10))) .build(), @@ -197,7 +197,7 @@ public void testValidateInternalDataStreamRetentionWithoutWarning() { Metadata.builder().build(), randomAlphaOfLength(10), ComposableIndexTemplate.builder() - .template(new Template(null, null, null, DataStreamLifecycle.DEFAULT)) + .template(Template.builder().lifecycle(DataStreamLifecycle.DEFAULT)) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .indexPatterns(List.of("." + randomAlphaOfLength(10))) .build(), @@ -220,16 +220,15 @@ public void testValidateLifecycleComponentTemplateWithWarning() { Map.of( "component-template", new ComponentTemplate( - new Template( - null, - null, - null, - new DataStreamLifecycle( - new DataStreamLifecycle.Retention(randomTimeValue(2, 100, TimeUnit.DAYS)), - null, - null + Template.builder() + .lifecycle( + new DataStreamLifecycle( + new DataStreamLifecycle.Retention(randomTimeValue(2, 100, TimeUnit.DAYS)), + null, + null + ) ) - ), + .build(), null, null ) @@ -238,7 +237,7 @@ public void testValidateLifecycleComponentTemplateWithWarning() { .build(), randomAlphaOfLength(10), ComposableIndexTemplate.builder() - .template(new Template(null, null, null, DataStreamLifecycle.DEFAULT)) + .template(Template.builder().lifecycle(DataStreamLifecycle.DEFAULT)) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .indexPatterns(List.of(randomAlphaOfLength(10))) .componentTemplates(List.of("component-template")) @@ -291,12 +290,11 @@ public void testValidateLifecycleInComponentTemplate() throws Exception { ThreadContext threadContext = new ThreadContext(Settings.EMPTY); HeaderWarning.setThreadContext(threadContext); - Template template = new Template( - ComponentTemplateTests.randomSettings(), - null, - ComponentTemplateTests.randomAliases(), - DataStreamLifecycle.DEFAULT - ); + Template template = Template.builder() + .settings(ComponentTemplateTests.randomSettings()) + .aliases(ComponentTemplateTests.randomAliases()) + .lifecycle(DataStreamLifecycle.DEFAULT) + .build(); ComponentTemplate componentTemplate = new ComponentTemplate(template, 1L, new HashMap<>()); state = metadataIndexTemplateService.addComponentTemplate(state, false, "foo", componentTemplate); diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateServiceTests.java index 7a1d4b5b1ddf4..5fadd8f263f7c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataIndexTemplateServiceTests.java @@ -545,14 +545,7 @@ public void testUpdateIndexTemplateV2() throws Exception { List patterns = new ArrayList<>(template.indexPatterns()); patterns.add("new-pattern"); - template = ComposableIndexTemplate.builder() - .indexPatterns(patterns) - .template(template.template()) - .componentTemplates(template.composedOf()) - .priority(template.priority()) - .version(template.version()) - .metadata(template.metadata()) - .build(); + template = template.toBuilder().indexPatterns(patterns).build(); state = metadataIndexTemplateService.addIndexTemplateV2(state, false, "foo", template); assertNotNull(state.metadata().templatesV2().get("foo")); @@ -1621,7 +1614,7 @@ private ClusterState addComponentTemplate( String name, DataStreamLifecycle lifecycle ) throws Exception { - ComponentTemplate ct = new ComponentTemplate(new Template(null, null, null, lifecycle), null, null); + ComponentTemplate ct = new ComponentTemplate(Template.builder().lifecycle(lifecycle).build(), null, null); return service.addComponentTemplate(state, true, name, ct); } @@ -1634,7 +1627,7 @@ private void assertLifecycleResolution( ) throws Exception { ComposableIndexTemplate it = ComposableIndexTemplate.builder() .indexPatterns(List.of(randomAlphaOfLength(10) + "*")) - .template(new Template(null, null, null, lifecycleZ)) + .template(Template.builder().lifecycle(lifecycleZ)) .componentTemplates(composeOf) .priority(0L) .version(1L) @@ -1858,7 +1851,7 @@ public void testIndexTemplateFailsToAdd() throws Exception { ClusterState state = ClusterState.EMPTY_STATE; ComponentTemplate ct = new ComponentTemplate( - new Template(null, null, null, DataStreamLifecycle.newBuilder().dataRetention(randomMillisUpToYear9999()).build()), + Template.builder().lifecycle(DataStreamLifecycle.newBuilder().dataRetention(randomMillisUpToYear9999())).build(), null, null ); diff --git a/x-pack/plugin/downsample/src/internalClusterTest/java/org/elasticsearch/xpack/downsample/DataStreamLifecycleDriver.java b/x-pack/plugin/downsample/src/internalClusterTest/java/org/elasticsearch/xpack/downsample/DataStreamLifecycleDriver.java index aea4a06411e4e..64fb9e8f85b9b 100644 --- a/x-pack/plugin/downsample/src/internalClusterTest/java/org/elasticsearch/xpack/downsample/DataStreamLifecycleDriver.java +++ b/x-pack/plugin/downsample/src/internalClusterTest/java/org/elasticsearch/xpack/downsample/DataStreamLifecycleDriver.java @@ -144,7 +144,7 @@ private static void putComposableIndexTemplate( request.indexTemplate( ComposableIndexTemplate.builder() .indexPatterns(patterns) - .template(new Template(settings, mappings == null ? null : mappings, null, lifecycle)) + .template(Template.builder().settings(settings).mappings(mappings).lifecycle(lifecycle)) .metadata(metadata) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .build() diff --git a/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/TimeSeriesDataStreamsIT.java b/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/TimeSeriesDataStreamsIT.java index 68894baa8f3cb..28f97adec8814 100644 --- a/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/TimeSeriesDataStreamsIT.java +++ b/x-pack/plugin/ilm/qa/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/ilm/TimeSeriesDataStreamsIT.java @@ -311,7 +311,7 @@ public void testDeleteOnlyIndexInDataStreamDeletesDataStream() throws Exception @SuppressWarnings("unchecked") public void testDataStreamWithMultipleIndicesAndWriteIndexInDeletePhase() throws Exception { - createComposableTemplate(client(), template, dataStream + "*", new Template(null, null, null, null)); + createComposableTemplate(client(), template, dataStream + "*", Template.builder().build()); indexDocument(client(), dataStream, true); createNewSingletonPolicy(client(), policyName, "delete", DeleteAction.NO_SNAPSHOT_DELETE); diff --git a/x-pack/plugin/ilm/src/internalClusterTest/java/org/elasticsearch/xpack/ilm/DataStreamAndIndexLifecycleMixingTests.java b/x-pack/plugin/ilm/src/internalClusterTest/java/org/elasticsearch/xpack/ilm/DataStreamAndIndexLifecycleMixingTests.java index 4b59488e3707c..21924634ff6ab 100644 --- a/x-pack/plugin/ilm/src/internalClusterTest/java/org/elasticsearch/xpack/ilm/DataStreamAndIndexLifecycleMixingTests.java +++ b/x-pack/plugin/ilm/src/internalClusterTest/java/org/elasticsearch/xpack/ilm/DataStreamAndIndexLifecycleMixingTests.java @@ -1069,7 +1069,12 @@ static void putComposableIndexTemplate( request.indexTemplate( ComposableIndexTemplate.builder() .indexPatterns(patterns) - .template(new Template(settings, mappings == null ? null : CompressedXContent.fromJSON(mappings), null, lifecycle)) + .template( + Template.builder() + .settings(settings) + .mappings(mappings == null ? null : CompressedXContent.fromJSON(mappings)) + .lifecycle(lifecycle) + ) .metadata(metadata) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .build() diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/cluster/metadata/MetadataMigrateToDataTiersRoutingService.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/cluster/metadata/MetadataMigrateToDataTiersRoutingService.java index 283e48a328aa7..e06c7bc2708ca 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/cluster/metadata/MetadataMigrateToDataTiersRoutingService.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/cluster/metadata/MetadataMigrateToDataTiersRoutingService.java @@ -695,12 +695,7 @@ static List migrateComposableTemplates(Metadata.Builder mb, ClusterState settingsBuilder.remove(requireRoutingSetting); settingsBuilder.remove(includeRoutingSetting); settingsBuilder.remove(excludeRoutingSetting); - Template migratedInnerTemplate = new Template( - settingsBuilder.build(), - currentInnerTemplate.mappings(), - currentInnerTemplate.aliases(), - currentInnerTemplate.lifecycle() - ); + Template migratedInnerTemplate = Template.builder(currentInnerTemplate).settings(settingsBuilder).build(); migratedComposableTemplateBuilder.indexPatterns(composableTemplate.indexPatterns()); migratedComposableTemplateBuilder.template(migratedInnerTemplate); @@ -741,12 +736,7 @@ static List migrateComponentTemplates(Metadata.Builder mb, ClusterState settingsBuilder.remove(requireRoutingSetting); settingsBuilder.remove(includeRoutingSetting); settingsBuilder.remove(excludeRoutingSetting); - Template migratedInnerTemplate = new Template( - settingsBuilder.build(), - currentInnerTemplate.mappings(), - currentInnerTemplate.aliases(), - currentInnerTemplate.lifecycle() - ); + Template migratedInnerTemplate = Template.builder(currentInnerTemplate).settings(settingsBuilder).build(); ComponentTemplate migratedComponentTemplate = new ComponentTemplate( migratedInnerTemplate, diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DataStreamLifecycleDownsamplingSecurityIT.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DataStreamLifecycleDownsamplingSecurityIT.java index 5f8744ace090d..458dee693c80a 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DataStreamLifecycleDownsamplingSecurityIT.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DataStreamLifecycleDownsamplingSecurityIT.java @@ -342,7 +342,7 @@ private void putComposableIndexTemplate( request.indexTemplate( ComposableIndexTemplate.builder() .indexPatterns(patterns) - .template(new Template(settings, mappings, null, lifecycle)) + .template(Template.builder().settings(settings).mappings(mappings).lifecycle(lifecycle)) .metadata(metadata) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .build() @@ -442,7 +442,7 @@ public Collection getSystemDataStreamDescriptors() { SystemDataStreamDescriptor.Type.EXTERNAL, ComposableIndexTemplate.builder() .indexPatterns(List.of(SYSTEM_DATA_STREAM_NAME)) - .template(new Template(settings.build(), getTSDBMappings(), null, LIFECYCLE)) + .template(Template.builder().settings(settings).mappings(getTSDBMappings()).lifecycle(LIFECYCLE)) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .build(), Map.of(), diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DataStreamLifecycleServiceRuntimeSecurityIT.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DataStreamLifecycleServiceRuntimeSecurityIT.java index 2ab51bece41ea..2c98d2e686e46 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DataStreamLifecycleServiceRuntimeSecurityIT.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DataStreamLifecycleServiceRuntimeSecurityIT.java @@ -227,7 +227,12 @@ private static void putComposableIndexTemplate( request.indexTemplate( ComposableIndexTemplate.builder() .indexPatterns(patterns) - .template(new Template(settings, mappings == null ? null : CompressedXContent.fromJSON(mappings), null, lifecycle)) + .template( + Template.builder() + .settings(settings) + .mappings(mappings == null ? null : CompressedXContent.fromJSON(mappings)) + .lifecycle(lifecycle) + ) .metadata(metadata) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .build() @@ -266,7 +271,7 @@ public Collection getSystemDataStreamDescriptors() { SystemDataStreamDescriptor.Type.EXTERNAL, ComposableIndexTemplate.builder() .indexPatterns(List.of(SYSTEM_DATA_STREAM_NAME)) - .template(new Template(Settings.EMPTY, null, null, DataStreamLifecycle.newBuilder().dataRetention(0).build())) + .template(Template.builder().lifecycle(DataStreamLifecycle.newBuilder().dataRetention(0))) .dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate()) .build(), Map.of(), From cc3caa228def0974e5d8299e5321b0a6987f9e6e Mon Sep 17 00:00:00 2001 From: David Kyle Date: Wed, 25 Sep 2024 13:35:09 +0100 Subject: [PATCH 43/58] [ML] Add deployment threading details and memory usage to telemetry (#113099) (#113516) Adds deployment threading options and a new memory section reporting the memory usage for each of the ml features # Conflicts: # server/src/main/java/org/elasticsearch/TransportVersions.java --- docs/reference/rest-api/usage.asciidoc | 8 +- .../ml/MachineLearningFeatureSetUsage.java | 54 +++++++++++ .../MachineLearningFeatureSetUsageTests.java | 75 ++++++++++++++ .../xpack/ml/integration/PyTorchModelIT.java | 22 +++++ .../xpack/ml/integration/MlUsageIT.java | 35 +++++++ .../MachineLearningUsageTransportAction.java | 97 ++++++++++++++----- ...chineLearningInfoTransportActionTests.java | 76 ++++++++++++++- 7 files changed, 338 insertions(+), 29 deletions(-) create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsageTests.java create mode 100644 x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlUsageIT.java diff --git a/docs/reference/rest-api/usage.asciidoc b/docs/reference/rest-api/usage.asciidoc index a54dbe21b46c6..4a8895807f2fa 100644 --- a/docs/reference/rest-api/usage.asciidoc +++ b/docs/reference/rest-api/usage.asciidoc @@ -195,7 +195,13 @@ GET /_xpack/usage } } }, - "node_count" : 1 + "node_count" : 1, + "memory": { + anomaly_detectors_memory_bytes: 0, + data_frame_analytics_memory_bytes: 0, + pytorch_inference_memory_bytes: 0, + total_used_memory_bytes: 0 + } }, "inference": { "available" : true, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsage.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsage.java index 98c31dd9106d0..60484675ec90b 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsage.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsage.java @@ -31,11 +31,13 @@ public class MachineLearningFeatureSetUsage extends XPackFeatureSet.Usage { public static final String NODE_COUNT = "node_count"; public static final String DATA_FRAME_ANALYTICS_JOBS_FIELD = "data_frame_analytics_jobs"; public static final String INFERENCE_FIELD = "inference"; + public static final String MEMORY_FIELD = "memory"; private final Map jobsUsage; private final Map datafeedsUsage; private final Map analyticsUsage; private final Map inferenceUsage; + private final Map memoryUsage; private final int nodeCount; public MachineLearningFeatureSetUsage( @@ -45,6 +47,7 @@ public MachineLearningFeatureSetUsage( Map datafeedsUsage, Map analyticsUsage, Map inferenceUsage, + Map memoryUsage, int nodeCount ) { super(XPackField.MACHINE_LEARNING, available, enabled); @@ -52,6 +55,7 @@ public MachineLearningFeatureSetUsage( this.datafeedsUsage = Objects.requireNonNull(datafeedsUsage); this.analyticsUsage = Objects.requireNonNull(analyticsUsage); this.inferenceUsage = Objects.requireNonNull(inferenceUsage); + this.memoryUsage = Objects.requireNonNull(memoryUsage); this.nodeCount = nodeCount; } @@ -62,6 +66,11 @@ public MachineLearningFeatureSetUsage(StreamInput in) throws IOException { this.analyticsUsage = in.readGenericMap(); this.inferenceUsage = in.readGenericMap(); this.nodeCount = in.readInt(); + if (in.getTransportVersion().onOrAfter(TransportVersions.ML_TELEMETRY_MEMORY_ADDED)) { + this.memoryUsage = in.readGenericMap(); + } else { + this.memoryUsage = Map.of(); + } } @Override @@ -77,6 +86,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeGenericMap(analyticsUsage); out.writeGenericMap(inferenceUsage); out.writeInt(nodeCount); + if (out.getTransportVersion().onOrAfter(TransportVersions.ML_TELEMETRY_MEMORY_ADDED)) { + out.writeGenericMap(memoryUsage); + } } @Override @@ -86,9 +98,51 @@ protected void innerXContent(XContentBuilder builder, Params params) throws IOEx builder.field(DATAFEEDS_FIELD, datafeedsUsage); builder.field(DATA_FRAME_ANALYTICS_JOBS_FIELD, analyticsUsage); builder.field(INFERENCE_FIELD, inferenceUsage); + builder.field(MEMORY_FIELD, memoryUsage); if (nodeCount >= 0) { builder.field(NODE_COUNT, nodeCount); } } + public Map getJobsUsage() { + return jobsUsage; + } + + public Map getDatafeedsUsage() { + return datafeedsUsage; + } + + public Map getAnalyticsUsage() { + return analyticsUsage; + } + + public Map getInferenceUsage() { + return inferenceUsage; + } + + public Map getMemoryUsage() { + return memoryUsage; + } + + public int getNodeCount() { + return nodeCount; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + MachineLearningFeatureSetUsage that = (MachineLearningFeatureSetUsage) o; + return nodeCount == that.nodeCount + && Objects.equals(jobsUsage, that.jobsUsage) + && Objects.equals(datafeedsUsage, that.datafeedsUsage) + && Objects.equals(analyticsUsage, that.analyticsUsage) + && Objects.equals(inferenceUsage, that.inferenceUsage) + && Objects.equals(memoryUsage, that.memoryUsage); + } + + @Override + public int hashCode() { + return Objects.hash(jobsUsage, datafeedsUsage, analyticsUsage, inferenceUsage, memoryUsage, nodeCount); + } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsageTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsageTests.java new file mode 100644 index 0000000000000..87d658c6f983c --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/MachineLearningFeatureSetUsageTests.java @@ -0,0 +1,75 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.ml; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.core.Tuple; + +import java.io.IOException; +import java.util.Collections; + +public class MachineLearningFeatureSetUsageTests extends AbstractBWCWireSerializationTestCase { + @Override + protected Writeable.Reader instanceReader() { + return MachineLearningFeatureSetUsage::new; + } + + @Override + protected MachineLearningFeatureSetUsage createTestInstance() { + boolean enabled = randomBoolean(); + + if (enabled == false) { + return new MachineLearningFeatureSetUsage( + randomBoolean(), + enabled, + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + 0 + ); + } else { + return new MachineLearningFeatureSetUsage( + randomBoolean(), + enabled, + randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))), + randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))), + randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))), + randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))), + randomMap(0, 4, () -> new Tuple<>(randomAlphaOfLength(4), randomAlphaOfLength(4))), + randomIntBetween(1, 10) + ); + } + } + + @Override + protected MachineLearningFeatureSetUsage mutateInstance(MachineLearningFeatureSetUsage instance) throws IOException { + return null; + } + + @Override + protected MachineLearningFeatureSetUsage mutateInstanceForVersion(MachineLearningFeatureSetUsage instance, TransportVersion version) { + if (version.before(TransportVersions.ML_TELEMETRY_MEMORY_ADDED)) { + return new MachineLearningFeatureSetUsage( + instance.available(), + instance.enabled(), + instance.getJobsUsage(), + instance.getDatafeedsUsage(), + instance.getAnalyticsUsage(), + instance.getInferenceUsage(), + Collections.emptyMap(), + instance.getNodeCount() + ); + } + + return instance; + } +} diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java index 34ef0baecccc5..4e92cad1026a3 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java @@ -1120,6 +1120,28 @@ public void testStartMultipleLowPriorityDeployments() throws Exception { } } + @SuppressWarnings("unchecked") + public void testDeploymentThreadsIncludedInUsage() throws IOException { + String modelId = "deployment_threads_in_usage"; + createPassThroughModel(modelId); + putModelDefinition(modelId); + putVocabulary(List.of("these", "are", "my", "words"), modelId); + startDeployment(modelId); + + Request request = new Request("GET", "/_xpack/usage"); + var usage = entityAsMap(client().performRequest(request).getEntity()); + + var ml = (Map) usage.get("ml"); + assertNotNull(usage.toString(), ml); + var inference = (Map) ml.get("inference"); + var deployments = (Map) inference.get("deployments"); + var deploymentStats = (List>) deployments.get("stats_by_model"); + for (var stat : deploymentStats) { + assertThat(stat.toString(), (Integer) stat.get("num_threads"), greaterThanOrEqualTo(1)); + assertThat(stat.toString(), (Integer) stat.get("num_allocations"), greaterThanOrEqualTo(1)); + } + } + private void putModelDefinition(String modelId) throws IOException { putModelDefinition(modelId, BASE_64_ENCODED_MODEL, RAW_MODEL_SIZE); } diff --git a/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlUsageIT.java b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlUsageIT.java new file mode 100644 index 0000000000000..05a307c2dfad3 --- /dev/null +++ b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/MlUsageIT.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.ml.integration; + +import org.elasticsearch.client.Request; +import org.elasticsearch.test.rest.ESRestTestCase; + +import java.io.IOException; +import java.util.Map; + +import static org.hamcrest.Matchers.greaterThanOrEqualTo; + +// Test the phone home/telemetry data +public class MlUsageIT extends ESRestTestCase { + + @SuppressWarnings("unchecked") + public void testMLUsage() throws IOException { + Request request = new Request("GET", "/_xpack/usage"); + var usage = entityAsMap(client().performRequest(request).getEntity()); + + var ml = (Map) usage.get("ml"); + assertNotNull(usage.toString(), ml); + var memoryUsage = (Map) ml.get("memory"); + assertNotNull(ml.toString(), memoryUsage); + assertThat(memoryUsage.toString(), (Integer) memoryUsage.get("anomaly_detectors_memory_bytes"), greaterThanOrEqualTo(0)); + assertThat(memoryUsage.toString(), (Integer) memoryUsage.get("data_frame_analytics_memory_bytes"), greaterThanOrEqualTo(0)); + assertThat(memoryUsage.toString(), (Integer) memoryUsage.get("pytorch_inference_memory_bytes"), greaterThanOrEqualTo(0)); + assertThat(memoryUsage.toString(), (Integer) memoryUsage.get("total_used_memory_bytes"), greaterThanOrEqualTo(0)); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningUsageTransportAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningUsageTransportAction.java index 583965e76e542..40e3fbb661db1 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningUsageTransportAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningUsageTransportAction.java @@ -39,6 +39,7 @@ import org.elasticsearch.xpack.core.ml.action.GetJobsStatsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsStatsAction; +import org.elasticsearch.xpack.core.ml.action.MlMemoryAction; import org.elasticsearch.xpack.core.ml.datafeed.DatafeedState; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState; @@ -65,6 +66,7 @@ import java.util.Map; import java.util.Objects; import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.stream.Collectors; @@ -72,16 +74,20 @@ public class MachineLearningUsageTransportAction extends XPackUsageFeatureTransportAction { - private static class ModelStats { + private static class DeploymentStats { private final String modelId; private final String taskType; private final StatsAccumulator inferenceCounts = new StatsAccumulator(); private Instant lastAccess; + private final int numThreads; + private final int numAllocations; - ModelStats(String modelId, String taskType) { + DeploymentStats(String modelId, String taskType, int numThreads, int numAllocations) { this.modelId = modelId; this.taskType = taskType; + this.numThreads = numThreads; + this.numAllocations = numAllocations; } void update(AssignmentStats.NodeStats stats) { @@ -95,6 +101,8 @@ Map asMap() { Map result = new HashMap<>(); result.put("model_id", modelId); result.put("task_type", taskType); + result.put("num_allocations", numAllocations); + result.put("num_threads", numThreads); result.put("inference_counts", inferenceCounts.asMap()); if (lastAccess != null) { result.put("last_access", lastAccess.toString()); @@ -158,6 +166,7 @@ protected void masterOperation( Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), + Collections.emptyMap(), 0 ); listener.onResponse(new XPackUsageFeatureResponse(usage)); @@ -167,11 +176,14 @@ protected void masterOperation( Map jobsUsage = new LinkedHashMap<>(); Map datafeedsUsage = new LinkedHashMap<>(); Map analyticsUsage = new LinkedHashMap<>(); + AtomicReference> inferenceUsage = new AtomicReference<>(Map.of()); + int nodeCount = mlNodeCount(state); - // Step 5. return final ML usage - ActionListener> inferenceUsageListener = ActionListener.wrap( - inferenceUsage -> listener.onResponse( + // Step 6. return final ML usage + ActionListener memoryUsageListener = ActionListener.wrap(memoryResponse -> { + var memoryUsage = extractMemoryUsage(memoryResponse); + listener.onResponse( new XPackUsageFeatureResponse( new MachineLearningFeatureSetUsage( MachineLearningField.ML_API_FEATURE.checkWithoutTracking(licenseState), @@ -179,28 +191,38 @@ protected void masterOperation( jobsUsage, datafeedsUsage, analyticsUsage, - inferenceUsage, + inferenceUsage.get(), + memoryUsage, nodeCount ) ) - ), - e -> { - logger.warn("Failed to get inference usage to include in ML usage", e); - listener.onResponse( - new XPackUsageFeatureResponse( - new MachineLearningFeatureSetUsage( - MachineLearningField.ML_API_FEATURE.checkWithoutTracking(licenseState), - enabled, - jobsUsage, - datafeedsUsage, - analyticsUsage, - Collections.emptyMap(), - nodeCount - ) + ); + }, e -> { + logger.warn("Failed to get memory usage to include in ML usage", e); + listener.onResponse( + new XPackUsageFeatureResponse( + new MachineLearningFeatureSetUsage( + MachineLearningField.ML_API_FEATURE.checkWithoutTracking(licenseState), + enabled, + jobsUsage, + datafeedsUsage, + analyticsUsage, + inferenceUsage.get(), + Collections.emptyMap(), + nodeCount ) - ); - } - ); + ) + ); + }); + + // Step 5. Get + ActionListener> inferenceUsageListener = ActionListener.wrap(inference -> { + inferenceUsage.set(inference); + client.execute(MlMemoryAction.INSTANCE, new MlMemoryAction.Request("_all"), memoryUsageListener); + }, e -> { + logger.warn("Failed to get inference usage to include in ML usage", e); + client.execute(MlMemoryAction.INSTANCE, new MlMemoryAction.Request("_all"), memoryUsageListener); + }); // Step 4. Extract usage from data frame analytics configs and then get inference usage ActionListener dataframeAnalyticsListener = ActionListener.wrap(response -> { @@ -464,7 +486,7 @@ private static void addDeploymentStats( int deploymentsCount = 0; double avgTimeSum = 0.0; StatsAccumulator nodeDistribution = new StatsAccumulator(); - Map statsByModel = new TreeMap<>(); + Map statsByModel = new TreeMap<>(); for (var stats : statsResponse.getResources().results()) { AssignmentStats deploymentStats = stats.getDeploymentStats(); if (deploymentStats == null) { @@ -478,7 +500,15 @@ private static void addDeploymentStats( String modelId = deploymentStats.getModelId(); String taskType = taskTypes.get(deploymentStats.getModelId()); String mapKey = modelId + ":" + taskType; - ModelStats modelStats = statsByModel.computeIfAbsent(mapKey, key -> new ModelStats(modelId, taskType)); + DeploymentStats modelStats = statsByModel.computeIfAbsent( + mapKey, + key -> new DeploymentStats( + modelId, + taskType, + deploymentStats.getThreadsPerAllocation(), + deploymentStats.getNumberOfAllocations() + ) + ); for (var nodeStats : deploymentStats.getNodeStats()) { long nodeInferenceCount = nodeStats.getInferenceCount().orElse(0L); avgTimeSum += nodeStats.getAvgInferenceTime().orElse(0.0) * nodeInferenceCount; @@ -499,7 +529,7 @@ private static void addDeploymentStats( "inference_counts", nodeDistribution.asMap(), "stats_by_model", - statsByModel.values().stream().map(ModelStats::asMap).collect(Collectors.toList()) + statsByModel.values().stream().map(DeploymentStats::asMap).collect(Collectors.toList()) ) ); } @@ -590,6 +620,21 @@ private static void addInferenceIngestUsage(GetTrainedModelsStatsAction.Response inferenceUsage.put("ingest_processors", Collections.singletonMap(MachineLearningFeatureSetUsage.ALL, ingestUsage)); } + private static Map extractMemoryUsage(MlMemoryAction.Response memoryResponse) { + var adMem = memoryResponse.getNodes().stream().mapToLong(mem -> mem.getMlAnomalyDetectors().getBytes()).sum(); + var dfaMem = memoryResponse.getNodes().stream().mapToLong(mem -> mem.getMlDataFrameAnalytics().getBytes()).sum(); + var pytorchMem = memoryResponse.getNodes().stream().mapToLong(mem -> mem.getMlNativeInference().getBytes()).sum(); + var nativeOverheadMem = memoryResponse.getNodes().stream().mapToLong(mem -> mem.getMlNativeCodeOverhead().getBytes()).sum(); + long totalUsedMem = adMem + dfaMem + pytorchMem + nativeOverheadMem; + + var memoryUsage = new LinkedHashMap(); + memoryUsage.put("anomaly_detectors_memory_bytes", adMem); + memoryUsage.put("data_frame_analytics_memory_bytes", dfaMem); + memoryUsage.put("pytorch_inference_memory_bytes", pytorchMem); + memoryUsage.put("total_used_memory_bytes", totalUsedMem); + return memoryUsage; + } + private static Map getMinMaxSumAsLongsFromStats(StatsAccumulator stats) { Map asMap = Maps.newMapWithExpectedSize(3); asMap.put("sum", Double.valueOf(stats.getTotal()).longValue()); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java index e5575abfeb020..4fdb7d2e5e46c 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java @@ -10,9 +10,11 @@ import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.node.DiscoveryNodeUtils; import org.elasticsearch.cluster.node.DiscoveryNodes; @@ -46,6 +48,7 @@ import org.elasticsearch.xpack.core.ml.action.GetJobsStatsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsStatsAction; +import org.elasticsearch.xpack.core.ml.action.MlMemoryAction; import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig; import org.elasticsearch.xpack.core.ml.datafeed.DatafeedState; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; @@ -134,6 +137,27 @@ public void init() { new QueryPage<>(Collections.emptyList(), 0, GetTrainedModelsStatsAction.Response.RESULTS_FIELD) ) ); + givenMlMemory( + new MlMemoryAction.Response( + new ClusterName("cluster_foo"), + List.of( + new MlMemoryAction.Response.MlMemoryStats( + mock(DiscoveryNode.class), + ByteSizeValue.ofBytes(100L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(20L), + ByteSizeValue.ofBytes(30L), + ByteSizeValue.ofBytes(40L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L) + ) + ), + List.of() + ) + ); } @After @@ -343,6 +367,8 @@ public void testUsage() throws Exception { assertThat(source.getValue("inference.deployments.inference_counts.avg"), equalTo(4.0)); assertThat(source.getValue("inference.deployments.stats_by_model.0.model_id"), equalTo("model_3")); assertThat(source.getValue("inference.deployments.stats_by_model.0.task_type"), equalTo("ner")); + assertThat(source.getValue("inference.deployments.stats_by_model.0.num_allocations"), equalTo(8)); + assertThat(source.getValue("inference.deployments.stats_by_model.0.num_threads"), equalTo(1)); assertThat(source.getValue("inference.deployments.stats_by_model.0.last_access"), equalTo(lastAccess(3).toString())); assertThat(source.getValue("inference.deployments.stats_by_model.0.inference_counts.total"), equalTo(3.0)); assertThat(source.getValue("inference.deployments.stats_by_model.0.inference_counts.min"), equalTo(3.0)); @@ -350,6 +376,8 @@ public void testUsage() throws Exception { assertThat(source.getValue("inference.deployments.stats_by_model.0.inference_counts.avg"), equalTo(3.0)); assertThat(source.getValue("inference.deployments.stats_by_model.1.model_id"), equalTo("model_4")); assertThat(source.getValue("inference.deployments.stats_by_model.1.task_type"), equalTo("text_expansion")); + assertThat(source.getValue("inference.deployments.stats_by_model.1.num_allocations"), equalTo(2)); + assertThat(source.getValue("inference.deployments.stats_by_model.1.num_threads"), equalTo(2)); assertThat(source.getValue("inference.deployments.stats_by_model.1.last_access"), equalTo(lastAccess(44).toString())); assertThat(source.getValue("inference.deployments.stats_by_model.1.inference_counts.total"), equalTo(9.0)); assertThat(source.getValue("inference.deployments.stats_by_model.1.inference_counts.min"), equalTo(4.0)); @@ -360,6 +388,11 @@ public void testUsage() throws Exception { assertThat(source.getValue("inference.deployments.model_sizes_bytes.max"), equalTo(1000.0)); assertThat(source.getValue("inference.deployments.model_sizes_bytes.avg"), equalTo(650.0)); assertThat(source.getValue("inference.deployments.time_ms.avg"), closeTo(44.0, 1e-10)); + + assertThat(source.getValue("memory.anomaly_detectors_memory_bytes"), equalTo(20)); + assertThat(source.getValue("memory.data_frame_analytics_memory_bytes"), equalTo(30)); + assertThat(source.getValue("memory.pytorch_inference_memory_bytes"), equalTo(40)); + assertThat(source.getValue("memory.total_used_memory_bytes"), equalTo(91)); } } @@ -566,6 +599,8 @@ public void testUsageWithOrphanedTask() throws Exception { Job closed1 = buildJob("closed1", Arrays.asList(buildMinDetector("foo"), buildMinDetector("bar"), buildMinDetector("foobar"))); GetJobsStatsAction.Response.JobStats closed1JobStats = buildJobStats("closed1", JobState.CLOSED, 300L, 0); givenJobs(Arrays.asList(opened1, closed1), Arrays.asList(opened1JobStats, opened2JobStats, closed1JobStats)); + MlMemoryAction.Response memory = new MlMemoryAction.Response(new ClusterName("foo"), List.of(), List.of()); + givenMlMemory(memory); var usageAction = newUsageAction(settings.build(), true, true, true); PlainActionFuture future = new PlainActionFuture<>(); @@ -590,6 +625,11 @@ public void testUsageWithOrphanedTask() throws Exception { assertThat(source.getValue("jobs._all.model_size.avg"), equalTo(200.0)); assertThat(source.getValue("jobs._all.created_by.a_cool_module"), equalTo(1)); assertThat(source.getValue("jobs._all.created_by.unknown"), equalTo(1)); + + assertThat(source.getValue("memory.anomaly_detectors_memory_bytes"), equalTo(0)); + assertThat(source.getValue("memory.data_frame_analytics_memory_bytes"), equalTo(0)); + assertThat(source.getValue("memory.pytorch_inference_memory_bytes"), equalTo(0)); + assertThat(source.getValue("memory.total_used_memory_bytes"), equalTo(0)); } public void testUsageDisabledML() throws Exception { @@ -802,6 +842,15 @@ private void givenTrainedModelStats(GetTrainedModelsStatsAction.Response trained }).when(client).execute(same(GetTrainedModelsStatsAction.INSTANCE), any(), any()); } + private void givenMlMemory(MlMemoryAction.Response memoryUsage) { + doAnswer(invocationOnMock -> { + @SuppressWarnings("unchecked") + ActionListener listener = (ActionListener) invocationOnMock.getArguments()[2]; + listener.onResponse(memoryUsage); + return Void.TYPE; + }).when(client).execute(same(MlMemoryAction.INSTANCE), any(), any()); + } + private static Detector buildMinDetector(String fieldName) { Detector.Builder detectorBuilder = new Detector.Builder(); detectorBuilder.setFunction("min"); @@ -1004,8 +1053,8 @@ private Map setupComplexMocks() { new AssignmentStats( "deployment_3", "model_3", - null, - null, + 1, + 8, null, null, null, @@ -1111,6 +1160,29 @@ private Map setupComplexMocks() { ) ) ); + + givenMlMemory( + new MlMemoryAction.Response( + new ClusterName("cluster_foo"), + List.of( + new MlMemoryAction.Response.MlMemoryStats( + mock(DiscoveryNode.class), + ByteSizeValue.ofBytes(100L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(20L), + ByteSizeValue.ofBytes(30L), + ByteSizeValue.ofBytes(40L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L), + ByteSizeValue.ofBytes(1L) + ) + ), + List.of() + ) + ); + return expectedDfaCountByAnalysis; } From 40f1e5057e2b6521c053b9e20245f358f46d06e5 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Wed, 25 Sep 2024 14:24:05 +0100 Subject: [PATCH 44/58] Add blog links to locale deprecation warnings (#113474) --- .../test/ingest/30_date_processor.yml | 18 +++++++++--------- .../test/ingest/20_combine_processors.yml | 18 +++++++++--------- .../search/180_locale_dependent_mapping.yml | 6 +++--- .../elasticsearch/common/time/DateUtils.java | 6 ++++-- .../src/main/resources/date.csv-spec | 2 +- .../rest-api-spec/test/esql/70_locale.yml | 6 +++--- .../test/security/authz/13_index_datemath.yml | 14 +++++++------- 7 files changed, 36 insertions(+), 34 deletions(-) diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/30_date_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/30_date_processor.yml index b2710ff05aa00..a5caf7493340c 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/30_date_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/30_date_processor.yml @@ -1,6 +1,6 @@ setup: - requires: - test_runner_features: allowed_warnings + test_runner_features: allowed_warnings_regex --- teardown: - do: @@ -100,8 +100,8 @@ teardown: "Test date processor with no timezone configured": - do: - allowed_warnings: - - 'Date format [dd/MMM/yyyy:H:m:s Z] contains textual field specifiers that could change in JDK 23' + allowed_warnings_regex: + - 'Date format \[dd/MMM/yyyy:H:m:s Z] contains textual field specifiers that could change in JDK 23.*' ingest.put_pipeline: id: "my_pipeline" # sample formats from beats, featuring mongodb, icinga, apache @@ -170,8 +170,8 @@ teardown: - match: { acknowledged: true } - do: - allowed_warnings: - - 'Date format [dd/MMM/yyyy:H:m:s Z] contains textual field specifiers that could change in JDK 23' + allowed_warnings_regex: + - 'Date format \[dd/MMM/yyyy:H:m:s Z] contains textual field specifiers that could change in JDK 23.*' index: index: test id: "1" @@ -211,8 +211,8 @@ teardown: --- "Test week based date parsing": - do: - allowed_warnings: - - 'Date format [YYYY-ww] contains week-date field specifiers that are changing in JDK 23' + allowed_warnings_regex: + - 'Date format \[YYYY-ww] contains week-date field specifiers that are changing in JDK 23.*' indices.create: index: test body: @@ -223,8 +223,8 @@ teardown: format: YYYY-ww - do: - allowed_warnings: - - 'Date format [YYYY-ww] contains week-date field specifiers that are changing in JDK 23' + allowed_warnings_regex: + - 'Date format \[YYYY-ww] contains week-date field specifiers that are changing in JDK 23.*' ingest.put_pipeline: id: "my_pipeline" body: > diff --git a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/20_combine_processors.yml b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/20_combine_processors.yml index 301ff636d72be..5b14efc7cce6a 100644 --- a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/20_combine_processors.yml +++ b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/20_combine_processors.yml @@ -1,11 +1,11 @@ setup: - requires: - test_runner_features: allowed_warnings + test_runner_features: allowed_warnings_regex --- "Test with date processor": - do: - allowed_warnings: - - 'Date format [dd/MMM/yyyy:HH:mm:ss xx] contains textual field specifiers that could change in JDK 23' + allowed_warnings_regex: + - 'Date format \[dd/MMM/yyyy:HH:mm:ss xx] contains textual field specifiers that could change in JDK 23.*' ingest.put_pipeline: id: "_id" body: > @@ -46,8 +46,8 @@ setup: - match: { acknowledged: true } - do: - allowed_warnings: - - 'Date format [dd/MMM/yyyy:HH:mm:ss xx] contains textual field specifiers that could change in JDK 23' + allowed_warnings_regex: + - 'Date format \[dd/MMM/yyyy:HH:mm:ss xx] contains textual field specifiers that could change in JDK 23.*' index: index: test id: "1" @@ -77,8 +77,8 @@ setup: --- "Test with date processor and ECS-v1": - do: - allowed_warnings: - - 'Date format [dd/MMM/yyyy:HH:mm:ss xx] contains textual field specifiers that could change in JDK 23' + allowed_warnings_regex: + - 'Date format \[dd/MMM/yyyy:HH:mm:ss xx] contains textual field specifiers that could change in JDK 23.*' ingest.put_pipeline: id: "_id" body: > @@ -108,8 +108,8 @@ setup: - match: { acknowledged: true } - do: - allowed_warnings: - - 'Date format [dd/MMM/yyyy:HH:mm:ss xx] contains textual field specifiers that could change in JDK 23' + allowed_warnings_regex: + - 'Date format \[dd/MMM/yyyy:HH:mm:ss xx] contains textual field specifiers that could change in JDK 23.*' index: index: test id: "1" diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml index 079b109d93044..47876d2820aac 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml @@ -1,11 +1,11 @@ setup: - requires: - test_runner_features: allowed_warnings + test_runner_features: allowed_warnings_regex --- "Test Index and Search locale dependent mappings / dates": - do: - allowed_warnings: - - 'Date format [E, d MMM yyyy HH:mm:ss Z] contains textual field specifiers that could change in JDK 23' + allowed_warnings_regex: + - 'Date format \[E, d MMM yyyy HH:mm:ss Z] contains textual field specifiers that could change in JDK 23.*' indices.create: index: test_index body: diff --git a/server/src/main/java/org/elasticsearch/common/time/DateUtils.java b/server/src/main/java/org/elasticsearch/common/time/DateUtils.java index 2bf4a07d85133..649275c10b665 100644 --- a/server/src/main/java/org/elasticsearch/common/time/DateUtils.java +++ b/server/src/main/java/org/elasticsearch/common/time/DateUtils.java @@ -405,7 +405,8 @@ static void checkTextualDateFormats(String format) { deprecationLogger.warn( DeprecationCategory.PARSING, "cldr_date_formats_" + format, - "Date format [{}] contains textual field specifiers that could change in JDK 23", + "Date format [{}] contains textual field specifiers that could change in JDK 23." + + " For more information, see https://ela.st/jdk-23-locales", format ); } @@ -413,7 +414,8 @@ static void checkTextualDateFormats(String format) { deprecationLogger.warn( DeprecationCategory.PARSING, "cldr_week_dates_" + format, - "Date format [{}] contains week-date field specifiers that are changing in JDK 23", + "Date format [{}] contains week-date field specifiers that are changing in JDK 23." + + " For more information, see https://ela.st/jdk-23-locales", format ); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec index f539644bfedff..fd22a8c4b06f1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec @@ -555,7 +555,7 @@ dateFormatLocale from employees | where emp_no == 10049 or emp_no == 10050 | sort emp_no | eval birth_month = date_format("MMMM", birth_date) | keep emp_no, birth_date, birth_month; ignoreOrder:true -warningRegex:Date format \[MMMM] contains textual field specifiers that could change in JDK 23 +warningRegex:Date format \[MMMM] contains textual field specifiers that could change in JDK 23.* emp_no:integer | birth_date:datetime | birth_month:keyword 10049 | null | null diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/70_locale.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/70_locale.yml index 7c6ea434c12df..b867881f406e8 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/70_locale.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/70_locale.yml @@ -28,8 +28,8 @@ setup: "Date format with default locale": - do: allowed_warnings_regex: - - "No limit defined, adding default limit of \\[.*\\]" - - "Date format \\[MMMM] contains textual field specifiers that could change in JDK 23" + - 'No limit defined, adding default limit of \[.*]' + - 'Date format \[MMMM] contains textual field specifiers that could change in JDK 23.*' esql.query: body: query: 'FROM events | eval fixed_format = date_format("MMMM", @timestamp), variable_format = date_format(format, @timestamp) | sort @timestamp | keep @timestamp, fixed_format, variable_format' @@ -51,7 +51,7 @@ setup: - do: allowed_warnings_regex: - "No limit defined, adding default limit of \\[.*\\]" - - "Date format \\[MMMM] contains textual field specifiers that could change in JDK 23" + - "Date format \\[MMMM] contains textual field specifiers that could change in JDK 23.*" esql.query: body: query: 'FROM events | eval fixed_format = date_format("MMMM", @timestamp), variable_format = date_format(format, @timestamp) | sort @timestamp | keep @timestamp, fixed_format, variable_format' diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/security/authz/13_index_datemath.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/security/authz/13_index_datemath.yml index eee0e273974bd..99d2a2db25f23 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/security/authz/13_index_datemath.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/security/authz/13_index_datemath.yml @@ -1,7 +1,7 @@ --- setup: - requires: - test_runner_features: allowed_warnings + test_runner_features: allowed_warnings_regex - skip: features: headers @@ -56,8 +56,8 @@ teardown: } - do: - allowed_warnings: - - 'Date format [YYYY.MM] contains week-date field specifiers that are changing in JDK 23' + allowed_warnings_regex: + - 'Date format \[YYYY\.MM] contains week-date field specifiers that are changing in JDK 23.*' headers: { Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" } # test_user bulk: body: @@ -93,8 +93,8 @@ teardown: } - do: - allowed_warnings: - - 'Date format [YYYY.MM] contains week-date field specifiers that are changing in JDK 23' + allowed_warnings_regex: + - 'Date format \[YYYY\.MM] contains week-date field specifiers that are changing in JDK 23.*' headers: { Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" } # test_user bulk: body: @@ -121,8 +121,8 @@ teardown: --- "Test bulk indexing with datemath when only some are allowed": - do: - allowed_warnings: - - 'Date format [YYYY] contains week-date field specifiers that are changing in JDK 23' + allowed_warnings_regex: + - 'Date format \[YYYY] contains week-date field specifiers that are changing in JDK 23.*' headers: { Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" } # test_user bulk: body: From 54617cb2abb03527a6af2795567f41dc77296660 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Wed, 25 Sep 2024 14:36:04 +0100 Subject: [PATCH 45/58] [ML] Remove the cluster state listener when the adaptive allocations service stops (#113524) (#113530) --- .../AdaptiveAllocationsScalerService.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java index 6c59add730052..bbe90f769818b 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java @@ -259,12 +259,17 @@ public synchronized void start() { } public synchronized void stop() { + clusterService.removeListener(this); stopScheduling(); metrics.close(); } @Override public void clusterChanged(ClusterChangedEvent event) { + if (event.metadataChanged() == false) { + return; + } + updateAutoscalers(event.state()); if (scalers.isEmpty() == false) { startScheduling(); From 2955518c87b903e62a4154e9791b9a15249b79a0 Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Wed, 25 Sep 2024 16:46:56 +0300 Subject: [PATCH 46/58] Extending catch clause in rewriteAndFetch (#112829) (#113520) Co-authored-by: Elastic Machine --- .../main/java/org/elasticsearch/index/query/Rewriteable.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/query/Rewriteable.java b/server/src/main/java/org/elasticsearch/index/query/Rewriteable.java index b275603fff635..28a0fbc6b59f7 100644 --- a/server/src/main/java/org/elasticsearch/index/query/Rewriteable.java +++ b/server/src/main/java/org/elasticsearch/index/query/Rewriteable.java @@ -9,7 +9,6 @@ package org.elasticsearch.index.query; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.common.ParsingException; import java.io.IOException; import java.util.ArrayList; @@ -108,7 +107,7 @@ static > void rewriteAndFetch( } } rewriteResponse.onResponse(builder); - } catch (IOException | IllegalArgumentException | ParsingException ex) { + } catch (Exception ex) { rewriteResponse.onFailure(ex); } } From a2188010b76932ed1a86974715987b78ee2027ef Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 25 Sep 2024 14:58:50 +0100 Subject: [PATCH 47/58] Fix up comment in `PersistentTasksNodeService` (#113526) (#113531) The formatting of this comment was destroyed by spotless. This commit fixes this. --- .../PersistentTasksNodeService.java | 54 ++++++++++--------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/persistent/PersistentTasksNodeService.java b/server/src/main/java/org/elasticsearch/persistent/PersistentTasksNodeService.java index 8bad8b5003bce..b86292be8e9ee 100644 --- a/server/src/main/java/org/elasticsearch/persistent/PersistentTasksNodeService.java +++ b/server/src/main/java/org/elasticsearch/persistent/PersistentTasksNodeService.java @@ -75,31 +75,35 @@ public void clusterChanged(ClusterChangedEvent event) { PersistentTasksCustomMetadata tasks = event.state().getMetadata().custom(PersistentTasksCustomMetadata.TYPE); PersistentTasksCustomMetadata previousTasks = event.previousState().getMetadata().custom(PersistentTasksCustomMetadata.TYPE); - // Cluster State Local State Local Action - // STARTED NULL Create as STARTED, Start - // STARTED STARTED Noop - running - // STARTED COMPLETED Noop - waiting for notification ack - // STARTED LOCAL_ABORTED Noop - waiting for notification ack - - // NULL NULL Noop - nothing to do - // NULL STARTED Remove locally, Mark as PENDING_CANCEL, Cancel - // NULL COMPLETED Remove locally - // NULL LOCAL_ABORTED Remove locally - - // Master states: - // NULL - doesn't exist in the cluster state - // STARTED - exist in the cluster state - - // Local state: - // NULL - we don't have task registered locally in runningTasks - // STARTED - registered in TaskManager, requires master notification when finishes - // PENDING_CANCEL - registered in TaskManager, doesn't require master notification when finishes - // COMPLETED - not registered in TaskManager, notified, waiting for master to remove it from CS so we can remove locally - // LOCAL_ABORTED - not registered in TaskManager, notified, waiting for master to adjust it in CS so we can remove locally - - // When task finishes if it is marked as STARTED or PENDING_CANCEL it is marked as COMPLETED and unregistered, - // If the task was STARTED, the master notification is also triggered (this is handled by unregisterTask() method, which is - // triggered by PersistentTaskListener + /* + * Master states: + * NULL - doesn't exist in the cluster state + * STARTED - exist in the cluster state + * + * Local states (see org.elasticsearch.persistent.AllocatedPersistentTask.State) + * NULL - we don't have task registered locally in runningTasks + * STARTED - registered in TaskManager, requires master notification when finishes + * PENDING_CANCEL - registered in TaskManager, doesn't require master notification when finishes + * COMPLETED - not registered in TaskManager, notified, waiting for master to remove it from CS so we can remove locally + * LOCAL_ABORTED - not registered in TaskManager, notified, waiting for master to adjust it in CS so we can remove locally + * + * Master state | Local state | Local action + * ---------------+----------------+----------------------------------------------- + * STARTED | NULL | Create as STARTED, Start + * STARTED | STARTED | Noop - running + * STARTED | PENDING_CANCEL | Impossible + * STARTED | COMPLETED | Noop - waiting for notification ack + * STARTED | LOCAL_ABORTED | Noop - waiting for notification ack + * NULL | NULL | Noop - nothing to do + * NULL | STARTED | Remove locally, Mark as PENDING_CANCEL, Cancel + * NULL | PENDING_CANCEL | Noop - will remove locally when complete + * NULL | COMPLETED | Remove locally + * NULL | LOCAL_ABORTED | Remove locally + * + * When task finishes if it is marked as STARTED or PENDING_CANCEL it is marked as COMPLETED and unregistered, + * If the task was STARTED, the master notification is also triggered (this is handled by unregisterTask() method, which is + * triggered by PersistentTaskListener + */ if (Objects.equals(tasks, previousTasks) == false || event.nodesChanged()) { // We have some changes let's check if they are related to our node From f7911fedf5e5c6e5d645c45805d0d18d1dae53c8 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Wed, 25 Sep 2024 15:25:25 +0100 Subject: [PATCH 48/58] [ML] Limit in flight requests when indexing model download parts (#112992) (#113514) Restores the changes from #111684 which uses multiple streams to improve the time to download and install the built in ml models. The first iteration has a problem where the number of in-flight requests was not properly limited which is fixed here. Additionally there are now circuit breaker checks on allocating the buffer used to store the model definition. --- docs/changelog/111684.yaml | 5 + .../xpack/inference/TextEmbeddingCrudIT.java | 14 +- .../MachineLearningPackageLoader.java | 28 +- .../packageloader/action/ModelImporter.java | 328 +++++++++++++---- .../action/ModelLoaderUtils.java | 150 +++++++- ...ortGetTrainedModelPackageConfigAction.java | 2 +- .../TransportLoadTrainedModelPackage.java | 91 ++--- .../MachineLearningPackageLoaderTests.java | 12 + .../action/ModelDownloadTaskTests.java | 20 +- .../action/ModelImporterTests.java | 334 ++++++++++++++++++ .../action/ModelLoaderUtilsTests.java | 40 ++- ...TransportLoadTrainedModelPackageTests.java | 45 +-- 12 files changed, 896 insertions(+), 173 deletions(-) create mode 100644 docs/changelog/111684.yaml create mode 100644 x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/ModelImporterTests.java diff --git a/docs/changelog/111684.yaml b/docs/changelog/111684.yaml new file mode 100644 index 0000000000000..32edb5723cb0a --- /dev/null +++ b/docs/changelog/111684.yaml @@ -0,0 +1,5 @@ +pr: 111684 +summary: Write downloaded model parts async +area: Machine Learning +type: enhancement +issues: [] diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/TextEmbeddingCrudIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/TextEmbeddingCrudIT.java index 7fb47e901f703..6c15b42dc65d5 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/TextEmbeddingCrudIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/TextEmbeddingCrudIT.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.inference; -import org.apache.lucene.tests.util.LuceneTestCase; import org.elasticsearch.client.Request; import org.elasticsearch.common.Strings; import org.elasticsearch.inference.TaskType; @@ -19,11 +18,11 @@ import static org.hamcrest.Matchers.containsString; -// Tests disabled in CI due to the models being too large to download. Can be enabled (commented out) for local testing -@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105198") +// This test was previously disabled in CI due to the models being too large +// See "https://github.com/elastic/elasticsearch/issues/105198". public class TextEmbeddingCrudIT extends InferenceBaseRestTest { - public void testPutE5Small_withNoModelVariant() throws IOException { + public void testPutE5Small_withNoModelVariant() { { String inferenceEntityId = randomAlphaOfLength(10).toLowerCase(); expectThrows( @@ -51,6 +50,7 @@ public void testPutE5Small_withPlatformAgnosticVariant() throws IOException { deleteTextEmbeddingModel(inferenceEntityId); } + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105198") public void testPutE5Small_withPlatformSpecificVariant() throws IOException { String inferenceEntityId = randomAlphaOfLength(10).toLowerCase(); if ("linux-x86_64".equals(Platforms.PLATFORM_NAME)) { @@ -124,7 +124,7 @@ private String noModelIdVariantJsonEntity() { private String platformAgnosticModelVariantJsonEntity() { return """ { - "service": "text_embedding", + "service": "elasticsearch", "service_settings": { "num_allocations": 1, "num_threads": 1, @@ -137,7 +137,7 @@ private String platformAgnosticModelVariantJsonEntity() { private String platformSpecificModelVariantJsonEntity() { return """ { - "service": "text_embedding", + "service": "elasticsearch", "service_settings": { "num_allocations": 1, "num_threads": 1, @@ -150,7 +150,7 @@ private String platformSpecificModelVariantJsonEntity() { private String fakeModelVariantJsonEntity() { return """ { - "service": "text_embedding", + "service": "elasticsearch", "service_settings": { "num_allocations": 1, "num_threads": 1, diff --git a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/MachineLearningPackageLoader.java b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/MachineLearningPackageLoader.java index e927c46e6bd29..a63d911e9d40d 100644 --- a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/MachineLearningPackageLoader.java +++ b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/MachineLearningPackageLoader.java @@ -15,12 +15,17 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ExecutorBuilder; +import org.elasticsearch.threadpool.FixedExecutorBuilder; import org.elasticsearch.xpack.core.ml.packageloader.action.GetTrainedModelPackageConfigAction; import org.elasticsearch.xpack.core.ml.packageloader.action.LoadTrainedModelPackageAction; import org.elasticsearch.xpack.ml.packageloader.action.ModelDownloadTask; +import org.elasticsearch.xpack.ml.packageloader.action.ModelImporter; import org.elasticsearch.xpack.ml.packageloader.action.TransportGetTrainedModelPackageConfigAction; import org.elasticsearch.xpack.ml.packageloader.action.TransportLoadTrainedModelPackage; @@ -44,9 +49,6 @@ public class MachineLearningPackageLoader extends Plugin implements ActionPlugin Setting.Property.Dynamic ); - // re-using thread pool setup by the ml plugin - public static final String UTILITY_THREAD_POOL_NAME = "ml_utility"; - // This link will be invalid for serverless, but serverless will never be // air-gapped, so this message should never be needed. private static final String MODEL_REPOSITORY_DOCUMENTATION_LINK = format( @@ -54,6 +56,8 @@ public class MachineLearningPackageLoader extends Plugin implements ActionPlugin Build.current().version().replaceFirst("^(\\d+\\.\\d+).*", "$1") ); + public static final String MODEL_DOWNLOAD_THREADPOOL_NAME = "model_download"; + public MachineLearningPackageLoader() {} @Override @@ -81,6 +85,24 @@ public List getNamedWriteables() { ); } + @Override + public List> getExecutorBuilders(Settings settings) { + return List.of(modelDownloadExecutor(settings)); + } + + public static FixedExecutorBuilder modelDownloadExecutor(Settings settings) { + // Threadpool with a fixed number of threads for + // downloading the model definition files + return new FixedExecutorBuilder( + settings, + MODEL_DOWNLOAD_THREADPOOL_NAME, + ModelImporter.NUMBER_OF_STREAMS, + -1, // unbounded queue size + "xpack.ml.model_download_thread_pool", + EsExecutors.TaskTrackingConfig.DO_NOT_TRACK + ); + } + @Override public List getBootstrapChecks() { return List.of(new BootstrapCheck() { diff --git a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelImporter.java b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelImporter.java index 33d5d5982d2b0..b155d6c73ccef 100644 --- a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelImporter.java +++ b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelImporter.java @@ -10,124 +10,265 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchStatusException; -import org.elasticsearch.action.ActionRequest; -import org.elasticsearch.action.ActionResponse; -import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.RefCountingListener; +import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.internal.Client; -import org.elasticsearch.common.Strings; +import org.elasticsearch.common.breaker.CircuitBreaker; import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.tasks.TaskCancelledException; +import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.core.ml.action.PutTrainedModelDefinitionPartAction; import org.elasticsearch.xpack.core.ml.action.PutTrainedModelVocabularyAction; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ModelPackageConfig; +import org.elasticsearch.xpack.ml.packageloader.MachineLearningPackageLoader; -import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; import java.util.Objects; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.atomic.AtomicInteger; import static org.elasticsearch.core.Strings.format; /** - * A helper class for abstracting out the use of the ModelLoaderUtils to make dependency injection testing easier. + * For downloading and the vocabulary and model definition file and + * indexing those files in Elasticsearch. + * Holding the large model definition file in memory will consume + * too much memory, instead it is streamed in chunks and each chunk + * written to the index in a non-blocking request. + * The model files may be installed from a local file or download + * from a server. The server download uses {@link #NUMBER_OF_STREAMS} + * connections each using the Range header to split the stream by byte + * range. There is a complication in that the final part of the model + * definition must be uploaded last as writing this part causes an index + * refresh. + * When read from file a single thread is used to read the file + * stream, split into chunks and index those chunks. */ -class ModelImporter { +public class ModelImporter { private static final int DEFAULT_CHUNK_SIZE = 1024 * 1024; // 1MB + public static final int NUMBER_OF_STREAMS = 5; private static final Logger logger = LogManager.getLogger(ModelImporter.class); private final Client client; private final String modelId; private final ModelPackageConfig config; private final ModelDownloadTask task; + private final ExecutorService executorService; + private final AtomicInteger progressCounter = new AtomicInteger(); + private final URI uri; + private final CircuitBreakerService breakerService; - ModelImporter(Client client, String modelId, ModelPackageConfig packageConfig, ModelDownloadTask task) { + ModelImporter( + Client client, + String modelId, + ModelPackageConfig packageConfig, + ModelDownloadTask task, + ThreadPool threadPool, + CircuitBreakerService cbs + ) throws URISyntaxException { this.client = client; this.modelId = Objects.requireNonNull(modelId); this.config = Objects.requireNonNull(packageConfig); this.task = Objects.requireNonNull(task); + this.executorService = threadPool.executor(MachineLearningPackageLoader.MODEL_DOWNLOAD_THREADPOOL_NAME); + this.uri = ModelLoaderUtils.resolvePackageLocation( + config.getModelRepository(), + config.getPackagedModelId() + ModelLoaderUtils.MODEL_FILE_EXTENSION + ); + this.breakerService = cbs; } - public void doImport() throws URISyntaxException, IOException, ElasticsearchStatusException { - long size = config.getSize(); + public void doImport(ActionListener listener) { + executorService.execute(() -> doImportInternal(listener)); + } - // Uploading other artefacts of the model first, that way the model is last and a simple search can be used to check if the - // download is complete - if (Strings.isNullOrEmpty(config.getVocabularyFile()) == false) { - uploadVocabulary(); + private void doImportInternal(ActionListener finalListener) { + assert ThreadPool.assertCurrentThreadPool(MachineLearningPackageLoader.MODEL_DOWNLOAD_THREADPOOL_NAME) + : format( + "Model download must execute from [%s] but thread is [%s]", + MachineLearningPackageLoader.MODEL_DOWNLOAD_THREADPOOL_NAME, + Thread.currentThread().getName() + ); - logger.debug(() -> format("[%s] imported model vocabulary [%s]", modelId, config.getVocabularyFile())); - } + ModelLoaderUtils.VocabularyParts vocabularyParts = null; + try { + if (config.getVocabularyFile() != null) { + vocabularyParts = ModelLoaderUtils.loadVocabulary( + ModelLoaderUtils.resolvePackageLocation(config.getModelRepository(), config.getVocabularyFile()) + ); + } - URI uri = ModelLoaderUtils.resolvePackageLocation( - config.getModelRepository(), - config.getPackagedModelId() + ModelLoaderUtils.MODEL_FILE_EXTENSION - ); + // simple round up + int totalParts = (int) ((config.getSize() + DEFAULT_CHUNK_SIZE - 1) / DEFAULT_CHUNK_SIZE); - InputStream modelInputStream = ModelLoaderUtils.getInputStreamFromModelRepository(uri); + if (ModelLoaderUtils.uriIsFile(uri) == false) { + breakerService.getBreaker(CircuitBreaker.REQUEST) + .addEstimateBytesAndMaybeBreak(DEFAULT_CHUNK_SIZE * NUMBER_OF_STREAMS, "model importer"); + var breakerFreeingListener = ActionListener.runAfter( + finalListener, + () -> breakerService.getBreaker(CircuitBreaker.REQUEST).addWithoutBreaking(-(DEFAULT_CHUNK_SIZE * NUMBER_OF_STREAMS)) + ); - ModelLoaderUtils.InputStreamChunker chunkIterator = new ModelLoaderUtils.InputStreamChunker(modelInputStream, DEFAULT_CHUNK_SIZE); + var ranges = ModelLoaderUtils.split(config.getSize(), NUMBER_OF_STREAMS, DEFAULT_CHUNK_SIZE); + var downloaders = new ArrayList(ranges.size()); + for (var range : ranges) { + downloaders.add(new ModelLoaderUtils.HttpStreamChunker(uri, range, DEFAULT_CHUNK_SIZE)); + } + downloadModelDefinition(config.getSize(), totalParts, vocabularyParts, downloaders, breakerFreeingListener); + } else { + InputStream modelInputStream = ModelLoaderUtils.getFileInputStream(uri); + ModelLoaderUtils.InputStreamChunker chunkIterator = new ModelLoaderUtils.InputStreamChunker( + modelInputStream, + DEFAULT_CHUNK_SIZE + ); + readModelDefinitionFromFile(config.getSize(), totalParts, chunkIterator, vocabularyParts, finalListener); + } + } catch (Exception e) { + finalListener.onFailure(e); + } + } - // simple round up - int totalParts = (int) ((size + DEFAULT_CHUNK_SIZE - 1) / DEFAULT_CHUNK_SIZE); + void downloadModelDefinition( + long size, + int totalParts, + @Nullable ModelLoaderUtils.VocabularyParts vocabularyParts, + List downloaders, + ActionListener finalListener + ) { + try (var countingListener = new RefCountingListener(1, ActionListener.wrap(ignore -> executorService.execute(() -> { + var finalDownloader = downloaders.get(downloaders.size() - 1); + downloadFinalPart(size, totalParts, finalDownloader, finalListener.delegateFailureAndWrap((l, r) -> { + checkDownloadComplete(downloaders); + l.onResponse(AcknowledgedResponse.TRUE); + })); + }), finalListener::onFailure))) { + // Uploading other artefacts of the model first, that way the model is last and a simple search can be used to check if the + // download is complete + if (vocabularyParts != null) { + uploadVocabulary(vocabularyParts, countingListener); + } - for (int part = 0; part < totalParts - 1; ++part) { - task.setProgress(totalParts, part); - BytesArray definition = chunkIterator.next(); + // Download all but the final split. + // The final split is a single chunk + for (int streamSplit = 0; streamSplit < downloaders.size() - 1; ++streamSplit) { + final var downloader = downloaders.get(streamSplit); + var rangeDownloadedListener = countingListener.acquire(); // acquire to keep the counting listener from closing + executorService.execute( + () -> downloadPartInRange(size, totalParts, downloader, executorService, countingListener, rangeDownloadedListener) + ); + } + } + } - PutTrainedModelDefinitionPartAction.Request modelPartRequest = new PutTrainedModelDefinitionPartAction.Request( - modelId, - definition, - part, - size, - totalParts, - true + private void downloadPartInRange( + long size, + int totalParts, + ModelLoaderUtils.HttpStreamChunker downloadChunker, + ExecutorService executorService, + RefCountingListener countingListener, + ActionListener rangeFullyDownloadedListener + ) { + assert ThreadPool.assertCurrentThreadPool(MachineLearningPackageLoader.MODEL_DOWNLOAD_THREADPOOL_NAME) + : format( + "Model download must execute from [%s] but thread is [%s]", + MachineLearningPackageLoader.MODEL_DOWNLOAD_THREADPOOL_NAME, + Thread.currentThread().getName() ); - executeRequestIfNotCancelled(PutTrainedModelDefinitionPartAction.INSTANCE, modelPartRequest); + if (countingListener.isFailing()) { + rangeFullyDownloadedListener.onResponse(null); // the error has already been reported elsewhere + return; } - // get the last part, this time verify the checksum and size - BytesArray definition = chunkIterator.next(); + try { + throwIfTaskCancelled(); + var bytesAndIndex = downloadChunker.next(); + task.setProgress(totalParts, progressCounter.getAndIncrement()); - if (config.getSha256().equals(chunkIterator.getSha256()) == false) { - String message = format( - "Model sha256 checksums do not match, expected [%s] but got [%s]", - config.getSha256(), - chunkIterator.getSha256() - ); + indexPart(bytesAndIndex.partIndex(), totalParts, size, bytesAndIndex.bytes()); + } catch (Exception e) { + rangeFullyDownloadedListener.onFailure(e); + return; + } - throw new ElasticsearchStatusException(message, RestStatus.INTERNAL_SERVER_ERROR); + if (downloadChunker.hasNext()) { + executorService.execute( + () -> downloadPartInRange( + size, + totalParts, + downloadChunker, + executorService, + countingListener, + rangeFullyDownloadedListener + ) + ); + } else { + rangeFullyDownloadedListener.onResponse(null); } + } - if (config.getSize() != chunkIterator.getTotalBytesRead()) { - String message = format( - "Model size does not match, expected [%d] but got [%d]", - config.getSize(), - chunkIterator.getTotalBytesRead() + private void downloadFinalPart( + long size, + int totalParts, + ModelLoaderUtils.HttpStreamChunker downloader, + ActionListener lastPartWrittenListener + ) { + assert ThreadPool.assertCurrentThreadPool(MachineLearningPackageLoader.MODEL_DOWNLOAD_THREADPOOL_NAME) + : format( + "Model download must execute from [%s] but thread is [%s]", + MachineLearningPackageLoader.MODEL_DOWNLOAD_THREADPOOL_NAME, + Thread.currentThread().getName() ); - throw new ElasticsearchStatusException(message, RestStatus.INTERNAL_SERVER_ERROR); + try { + var bytesAndIndex = downloader.next(); + task.setProgress(totalParts, progressCounter.getAndIncrement()); + + indexPart(bytesAndIndex.partIndex(), totalParts, size, bytesAndIndex.bytes()); + lastPartWrittenListener.onResponse(AcknowledgedResponse.TRUE); + } catch (Exception e) { + lastPartWrittenListener.onFailure(e); } + } - PutTrainedModelDefinitionPartAction.Request finalModelPartRequest = new PutTrainedModelDefinitionPartAction.Request( - modelId, - definition, - totalParts - 1, - size, - totalParts, - true - ); + void readModelDefinitionFromFile( + long size, + int totalParts, + ModelLoaderUtils.InputStreamChunker chunkIterator, + @Nullable ModelLoaderUtils.VocabularyParts vocabularyParts, + ActionListener finalListener + ) { + try (var countingListener = new RefCountingListener(1, ActionListener.wrap(ignore -> executorService.execute(() -> { + finalListener.onResponse(AcknowledgedResponse.TRUE); + }), finalListener::onFailure))) { + try { + if (vocabularyParts != null) { + uploadVocabulary(vocabularyParts, countingListener); + } - executeRequestIfNotCancelled(PutTrainedModelDefinitionPartAction.INSTANCE, finalModelPartRequest); - logger.debug(format("finished importing model [%s] using [%d] parts", modelId, totalParts)); - } + for (int part = 0; part < totalParts; ++part) { + throwIfTaskCancelled(); + task.setProgress(totalParts, part); + BytesArray definition = chunkIterator.next(); + indexPart(part, totalParts, size, definition); + } + task.setProgress(totalParts, totalParts); - private void uploadVocabulary() throws URISyntaxException { - ModelLoaderUtils.VocabularyParts vocabularyParts = ModelLoaderUtils.loadVocabulary( - ModelLoaderUtils.resolvePackageLocation(config.getModelRepository(), config.getVocabularyFile()) - ); + checkDownloadComplete(chunkIterator, totalParts); + } catch (Exception e) { + countingListener.acquire().onFailure(e); + } + } + } + private void uploadVocabulary(ModelLoaderUtils.VocabularyParts vocabularyParts, RefCountingListener countingListener) { PutTrainedModelVocabularyAction.Request request = new PutTrainedModelVocabularyAction.Request( modelId, vocabularyParts.vocab(), @@ -136,17 +277,58 @@ private void uploadVocabulary() throws URISyntaxException { true ); - executeRequestIfNotCancelled(PutTrainedModelVocabularyAction.INSTANCE, request); + client.execute(PutTrainedModelVocabularyAction.INSTANCE, request, countingListener.acquire(r -> { + logger.debug(() -> format("[%s] imported model vocabulary [%s]", modelId, config.getVocabularyFile())); + })); } - private void executeRequestIfNotCancelled( - ActionType action, - Request request - ) { - if (task.isCancelled()) { - throw new TaskCancelledException(format("task cancelled with reason [%s]", task.getReasonCancelled())); + private void indexPart(int partIndex, int totalParts, long totalSize, BytesArray bytes) { + PutTrainedModelDefinitionPartAction.Request modelPartRequest = new PutTrainedModelDefinitionPartAction.Request( + modelId, + bytes, + partIndex, + totalSize, + totalParts, + true + ); + + client.execute(PutTrainedModelDefinitionPartAction.INSTANCE, modelPartRequest).actionGet(); + } + + private void checkDownloadComplete(List downloaders) { + long totalBytesRead = downloaders.stream().mapToLong(ModelLoaderUtils.HttpStreamChunker::getTotalBytesRead).sum(); + int totalParts = downloaders.stream().mapToInt(ModelLoaderUtils.HttpStreamChunker::getCurrentPart).sum(); + checkSize(totalBytesRead); + logger.debug(format("finished importing model [%s] using [%d] parts", modelId, totalParts)); + } + + private void checkDownloadComplete(ModelLoaderUtils.InputStreamChunker fileInputStream, int totalParts) { + checkSha256(fileInputStream.getSha256()); + checkSize(fileInputStream.getTotalBytesRead()); + logger.debug(format("finished importing model [%s] using [%d] parts", modelId, totalParts)); + } + + private void checkSha256(String sha256) { + if (config.getSha256().equals(sha256) == false) { + String message = format("Model sha256 checksums do not match, expected [%s] but got [%s]", config.getSha256(), sha256); + + throw new ElasticsearchStatusException(message, RestStatus.INTERNAL_SERVER_ERROR); } + } - client.execute(action, request).actionGet(); + private void checkSize(long definitionSize) { + if (config.getSize() != definitionSize) { + String message = format("Model size does not match, expected [%d] but got [%d]", config.getSize(), definitionSize); + throw new ElasticsearchStatusException(message, RestStatus.INTERNAL_SERVER_ERROR); + } + } + + private void throwIfTaskCancelled() { + if (task.isCancelled()) { + logger.info("Model [{}] download task cancelled", modelId); + throw new TaskCancelledException( + format("Model [%s] download task cancelled with reason [%s]", modelId, task.getReasonCancelled()) + ); + } } } diff --git a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelLoaderUtils.java b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelLoaderUtils.java index 2f3f9cbf3f32c..e92aff74be463 100644 --- a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelLoaderUtils.java +++ b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelLoaderUtils.java @@ -17,6 +17,7 @@ import org.elasticsearch.common.io.Streams; import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xcontent.XContentParser; @@ -34,16 +35,20 @@ import java.security.AccessController; import java.security.MessageDigest; import java.security.PrivilegedAction; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import static java.net.HttpURLConnection.HTTP_MOVED_PERM; import static java.net.HttpURLConnection.HTTP_MOVED_TEMP; import static java.net.HttpURLConnection.HTTP_NOT_FOUND; import static java.net.HttpURLConnection.HTTP_OK; +import static java.net.HttpURLConnection.HTTP_PARTIAL; import static java.net.HttpURLConnection.HTTP_SEE_OTHER; /** @@ -61,6 +66,75 @@ final class ModelLoaderUtils { record VocabularyParts(List vocab, List merges, List scores) {} + // Range in bytes + record RequestRange(long rangeStart, long rangeEnd, int startPart, int numParts) { + public String bytesRange() { + return "bytes=" + rangeStart + "-" + rangeEnd; + } + } + + static class HttpStreamChunker { + + record BytesAndPartIndex(BytesArray bytes, int partIndex) {} + + private final InputStream inputStream; + private final int chunkSize; + private final AtomicLong totalBytesRead = new AtomicLong(); + private final AtomicInteger currentPart; + private final int lastPartNumber; + private final byte[] buf; + + HttpStreamChunker(URI uri, RequestRange range, int chunkSize) { + var inputStream = getHttpOrHttpsInputStream(uri, range); + this.inputStream = inputStream; + this.chunkSize = chunkSize; + this.lastPartNumber = range.startPart() + range.numParts(); + this.currentPart = new AtomicInteger(range.startPart()); + this.buf = new byte[chunkSize]; + } + + // This ctor exists for testing purposes only. + HttpStreamChunker(InputStream inputStream, RequestRange range, int chunkSize) { + this.inputStream = inputStream; + this.chunkSize = chunkSize; + this.lastPartNumber = range.startPart() + range.numParts(); + this.currentPart = new AtomicInteger(range.startPart()); + this.buf = new byte[chunkSize]; + } + + public boolean hasNext() { + return currentPart.get() < lastPartNumber; + } + + public BytesAndPartIndex next() throws IOException { + int bytesRead = 0; + + while (bytesRead < chunkSize) { + int read = inputStream.read(buf, bytesRead, chunkSize - bytesRead); + // EOF?? + if (read == -1) { + break; + } + bytesRead += read; + } + + if (bytesRead > 0) { + totalBytesRead.addAndGet(bytesRead); + return new BytesAndPartIndex(new BytesArray(buf, 0, bytesRead), currentPart.getAndIncrement()); + } else { + return new BytesAndPartIndex(BytesArray.EMPTY, currentPart.get()); + } + } + + public long getTotalBytesRead() { + return totalBytesRead.get(); + } + + public int getCurrentPart() { + return currentPart.get(); + } + } + static class InputStreamChunker { private final InputStream inputStream; @@ -101,14 +175,14 @@ public int getTotalBytesRead() { } } - static InputStream getInputStreamFromModelRepository(URI uri) throws IOException { + static InputStream getInputStreamFromModelRepository(URI uri) { String scheme = uri.getScheme().toLowerCase(Locale.ROOT); // if you add a scheme here, also add it to the bootstrap check in {@link MachineLearningPackageLoader#validateModelRepository} switch (scheme) { case "http": case "https": - return getHttpOrHttpsInputStream(uri); + return getHttpOrHttpsInputStream(uri, null); case "file": return getFileInputStream(uri); default: @@ -116,6 +190,11 @@ static InputStream getInputStreamFromModelRepository(URI uri) throws IOException } } + static boolean uriIsFile(URI uri) { + String scheme = uri.getScheme().toLowerCase(Locale.ROOT); + return "file".equals(scheme); + } + static VocabularyParts loadVocabulary(URI uri) { if (uri.getPath().endsWith(".json")) { try (InputStream vocabInputStream = getInputStreamFromModelRepository(uri)) { @@ -174,7 +253,7 @@ private ModelLoaderUtils() {} @SuppressWarnings("'java.lang.SecurityManager' is deprecated and marked for removal ") @SuppressForbidden(reason = "we need socket connection to download") - private static InputStream getHttpOrHttpsInputStream(URI uri) throws IOException { + private static InputStream getHttpOrHttpsInputStream(URI uri, @Nullable RequestRange range) { assert uri.getUserInfo() == null : "URI's with credentials are not supported"; @@ -186,18 +265,30 @@ private static InputStream getHttpOrHttpsInputStream(URI uri) throws IOException PrivilegedAction privilegedHttpReader = () -> { try { HttpURLConnection conn = (HttpURLConnection) uri.toURL().openConnection(); + if (range != null) { + conn.setRequestProperty("Range", range.bytesRange()); + } switch (conn.getResponseCode()) { case HTTP_OK: + case HTTP_PARTIAL: return conn.getInputStream(); + case HTTP_MOVED_PERM: case HTTP_MOVED_TEMP: case HTTP_SEE_OTHER: throw new IllegalStateException("redirects aren't supported yet"); case HTTP_NOT_FOUND: throw new ResourceNotFoundException("{} not found", uri); + case 416: // Range not satisfiable, for some reason not in the list of constants + throw new IllegalStateException("Invalid request range [" + range.bytesRange() + "]"); default: int responseCode = conn.getResponseCode(); - throw new ElasticsearchStatusException("error during downloading {}", RestStatus.fromCode(responseCode), uri); + throw new ElasticsearchStatusException( + "error during downloading {}. Got response code {}", + RestStatus.fromCode(responseCode), + uri, + responseCode + ); } } catch (IOException e) { throw new UncheckedIOException(e); @@ -209,7 +300,7 @@ private static InputStream getHttpOrHttpsInputStream(URI uri) throws IOException @SuppressWarnings("'java.lang.SecurityManager' is deprecated and marked for removal ") @SuppressForbidden(reason = "we need load model data from a file") - private static InputStream getFileInputStream(URI uri) { + static InputStream getFileInputStream(URI uri) { SecurityManager sm = System.getSecurityManager(); if (sm != null) { @@ -232,4 +323,53 @@ private static InputStream getFileInputStream(URI uri) { return AccessController.doPrivileged(privilegedFileReader); } + /** + * Split a stream of size {@code sizeInBytes} into {@code numberOfStreams} +1 + * ranges aligned on {@code chunkSizeBytes} boundaries. Each range contains a + * whole number of chunks. + * The first {@code numberOfStreams} ranges will be split evenly (in terms of + * number of chunks not the byte size), the final range split + * is for the single final chunk and will be no more than {@code chunkSizeBytes} + * in size. The separate range for the final chunk is because when streaming and + * uploading a large model definition, writing the last part has to handled + * as a special case. + * @param sizeInBytes The total size of the stream + * @param numberOfStreams Divide the bulk of the size into this many streams. + * @param chunkSizeBytes The size of each chunk + * @return List of {@code numberOfStreams} + 1 ranges. + */ + static List split(long sizeInBytes, int numberOfStreams, long chunkSizeBytes) { + int numberOfChunks = (int) ((sizeInBytes + chunkSizeBytes - 1) / chunkSizeBytes); + + var ranges = new ArrayList(); + + int baseChunksPerStream = numberOfChunks / numberOfStreams; + int remainder = numberOfChunks % numberOfStreams; + long startOffset = 0; + int startChunkIndex = 0; + + for (int i = 0; i < numberOfStreams - 1; i++) { + int numChunksInStream = (i < remainder) ? baseChunksPerStream + 1 : baseChunksPerStream; + long rangeEnd = startOffset + (numChunksInStream * chunkSizeBytes) - 1; // range index is 0 based + ranges.add(new RequestRange(startOffset, rangeEnd, startChunkIndex, numChunksInStream)); + startOffset = rangeEnd + 1; // range is inclusive start and end + startChunkIndex += numChunksInStream; + } + + // Want the final range request to be a single chunk + if (baseChunksPerStream > 1) { + int numChunksExcludingFinal = baseChunksPerStream - 1; + long rangeEnd = startOffset + (numChunksExcludingFinal * chunkSizeBytes) - 1; + ranges.add(new RequestRange(startOffset, rangeEnd, startChunkIndex, numChunksExcludingFinal)); + + startOffset = rangeEnd + 1; + startChunkIndex += numChunksExcludingFinal; + } + + // The final range is a single chunk the end of which should not exceed sizeInBytes + long rangeEnd = Math.min(sizeInBytes, startOffset + (baseChunksPerStream * chunkSizeBytes)) - 1; + ranges.add(new RequestRange(startOffset, rangeEnd, startChunkIndex, 1)); + + return ranges; + } } diff --git a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportGetTrainedModelPackageConfigAction.java b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportGetTrainedModelPackageConfigAction.java index ba50f2f6a6b74..68f869742d9e5 100644 --- a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportGetTrainedModelPackageConfigAction.java +++ b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportGetTrainedModelPackageConfigAction.java @@ -77,7 +77,7 @@ protected void masterOperation(Task task, Request request, ClusterState state, A String packagedModelId = request.getPackagedModelId(); logger.debug(() -> format("Fetch package manifest for [%s] from [%s]", packagedModelId, repository)); - threadPool.executor(MachineLearningPackageLoader.UTILITY_THREAD_POOL_NAME).execute(() -> { + threadPool.executor(MachineLearningPackageLoader.MODEL_DOWNLOAD_THREADPOOL_NAME).execute(() -> { try { URI uri = ModelLoaderUtils.resolvePackageLocation(repository, packagedModelId + ModelLoaderUtils.METADATA_FILE_EXTENSION); InputStream inputStream = ModelLoaderUtils.getInputStreamFromModelRepository(uri); diff --git a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackage.java b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackage.java index 70dcee165d3f6..76b7781b1cffe 100644 --- a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackage.java +++ b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackage.java @@ -23,6 +23,7 @@ import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.injection.guice.Inject; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.tasks.Task; @@ -37,14 +38,12 @@ import org.elasticsearch.xpack.core.ml.action.NodeAcknowledgedResponse; import org.elasticsearch.xpack.core.ml.packageloader.action.LoadTrainedModelPackageAction; import org.elasticsearch.xpack.core.ml.packageloader.action.LoadTrainedModelPackageAction.Request; -import org.elasticsearch.xpack.ml.packageloader.MachineLearningPackageLoader; import java.io.IOException; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.util.Map; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; import static org.elasticsearch.core.Strings.format; import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN; @@ -57,6 +56,7 @@ public class TransportLoadTrainedModelPackage extends TransportMasterNodeAction< private static final Logger logger = LogManager.getLogger(TransportLoadTrainedModelPackage.class); private final Client client; + private final CircuitBreakerService circuitBreakerService; @Inject public TransportLoadTrainedModelPackage( @@ -65,7 +65,8 @@ public TransportLoadTrainedModelPackage( ThreadPool threadPool, ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver, - Client client + Client client, + CircuitBreakerService circuitBreakerService ) { super( LoadTrainedModelPackageAction.NAME, @@ -79,6 +80,7 @@ public TransportLoadTrainedModelPackage( EsExecutors.DIRECT_EXECUTOR_SERVICE ); this.client = new OriginSettingClient(client, ML_ORIGIN); + this.circuitBreakerService = circuitBreakerService; } @Override @@ -98,11 +100,14 @@ protected void masterOperation(Task task, Request request, ClusterState state, A parentTaskAssigningClient, request.getModelId(), request.getModelPackageConfig(), - downloadTask + downloadTask, + threadPool, + circuitBreakerService ); - threadPool.executor(MachineLearningPackageLoader.UTILITY_THREAD_POOL_NAME) - .execute(() -> importModel(client, taskManager, request, modelImporter, listener, downloadTask)); + var downloadCompleteListener = request.isWaitForCompletion() ? listener : ActionListener.noop(); + + importModel(client, taskManager, request, modelImporter, downloadCompleteListener, downloadTask); } catch (Exception e) { taskManager.unregister(downloadTask); listener.onFailure(e); @@ -136,16 +141,12 @@ static void importModel( ActionListener listener, Task task ) { - String modelId = request.getModelId(); - final AtomicReference exceptionRef = new AtomicReference<>(); - - try { - final long relativeStartNanos = System.nanoTime(); + final String modelId = request.getModelId(); + final long relativeStartNanos = System.nanoTime(); - logAndWriteNotificationAtLevel(auditClient, modelId, "starting model import", Level.INFO); - - modelImporter.doImport(); + logAndWriteNotificationAtLevel(auditClient, modelId, "starting model import", Level.INFO); + var finishListener = ActionListener.wrap(success -> { final long totalRuntimeNanos = System.nanoTime() - relativeStartNanos; logAndWriteNotificationAtLevel( auditClient, @@ -153,29 +154,25 @@ static void importModel( format("finished model import after [%d] seconds", TimeUnit.NANOSECONDS.toSeconds(totalRuntimeNanos)), Level.INFO ); - } catch (TaskCancelledException e) { - recordError(auditClient, modelId, exceptionRef, e, Level.WARNING); - } catch (ElasticsearchException e) { - recordError(auditClient, modelId, exceptionRef, e, Level.ERROR); - } catch (MalformedURLException e) { - recordError(auditClient, modelId, "an invalid URL", exceptionRef, e, Level.ERROR, RestStatus.INTERNAL_SERVER_ERROR); - } catch (URISyntaxException e) { - recordError(auditClient, modelId, "an invalid URL syntax", exceptionRef, e, Level.ERROR, RestStatus.INTERNAL_SERVER_ERROR); - } catch (IOException e) { - recordError(auditClient, modelId, "an IOException", exceptionRef, e, Level.ERROR, RestStatus.SERVICE_UNAVAILABLE); - } catch (Exception e) { - recordError(auditClient, modelId, "an Exception", exceptionRef, e, Level.ERROR, RestStatus.INTERNAL_SERVER_ERROR); - } finally { - taskManager.unregister(task); - - if (request.isWaitForCompletion()) { - if (exceptionRef.get() != null) { - listener.onFailure(exceptionRef.get()); - } else { - listener.onResponse(AcknowledgedResponse.TRUE); - } + listener.onResponse(AcknowledgedResponse.TRUE); + }, exception -> listener.onFailure(processException(auditClient, modelId, exception))); + + modelImporter.doImport(ActionListener.runAfter(finishListener, () -> taskManager.unregister(task))); + } - } + static Exception processException(Client auditClient, String modelId, Exception e) { + if (e instanceof TaskCancelledException te) { + return recordError(auditClient, modelId, te, Level.WARNING); + } else if (e instanceof ElasticsearchException es) { + return recordError(auditClient, modelId, es, Level.ERROR); + } else if (e instanceof MalformedURLException) { + return recordError(auditClient, modelId, "an invalid URL", e, Level.ERROR, RestStatus.BAD_REQUEST); + } else if (e instanceof URISyntaxException) { + return recordError(auditClient, modelId, "an invalid URL syntax", e, Level.ERROR, RestStatus.BAD_REQUEST); + } else if (e instanceof IOException) { + return recordError(auditClient, modelId, "an IOException", e, Level.ERROR, RestStatus.SERVICE_UNAVAILABLE); + } else { + return recordError(auditClient, modelId, "an Exception", e, Level.ERROR, RestStatus.INTERNAL_SERVER_ERROR); } } @@ -213,30 +210,16 @@ public ModelDownloadTask createTask(long id, String type, String action, TaskId } } - private static void recordError( - Client client, - String modelId, - AtomicReference exceptionRef, - ElasticsearchException e, - Level level - ) { + private static Exception recordError(Client client, String modelId, ElasticsearchException e, Level level) { String message = format("Model importing failed due to [%s]", e.getDetailedMessage()); logAndWriteNotificationAtLevel(client, modelId, message, level); - exceptionRef.set(e); + return e; } - private static void recordError( - Client client, - String modelId, - String failureType, - AtomicReference exceptionRef, - Exception e, - Level level, - RestStatus status - ) { + private static Exception recordError(Client client, String modelId, String failureType, Exception e, Level level, RestStatus status) { String message = format("Model importing failed due to %s [%s]", failureType, e); logAndWriteNotificationAtLevel(client, modelId, message, level); - exceptionRef.set(new ElasticsearchStatusException(message, status, e)); + return new ElasticsearchStatusException(message, status, e); } private static void logAndWriteNotificationAtLevel(Client client, String modelId, String message, Level level) { diff --git a/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/MachineLearningPackageLoaderTests.java b/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/MachineLearningPackageLoaderTests.java index 967d1b4ba4b6a..2e487b6a9624c 100644 --- a/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/MachineLearningPackageLoaderTests.java +++ b/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/MachineLearningPackageLoaderTests.java @@ -7,9 +7,13 @@ package org.elasticsearch.xpack.ml.packageloader; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.PathUtils; import org.elasticsearch.test.ESTestCase; +import java.util.List; + import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.oneOf; @@ -80,4 +84,12 @@ public void testValidateModelRepository() { assertEquals("xpack.ml.model_repository does not support authentication", e.getMessage()); } + + public void testThreadPoolHasSingleThread() { + var fixedThreadPool = MachineLearningPackageLoader.modelDownloadExecutor(Settings.EMPTY); + List> settings = fixedThreadPool.getRegisteredSettings(); + var sizeSettting = settings.stream().filter(s -> s.getKey().startsWith("xpack.ml.model_download_thread_pool")).findFirst(); + assertTrue(sizeSettting.isPresent()); + assertEquals(5, sizeSettting.get().get(Settings.EMPTY)); + } } diff --git a/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/ModelDownloadTaskTests.java b/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/ModelDownloadTaskTests.java index 0afd08c70cf45..3a682fb6a5094 100644 --- a/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/ModelDownloadTaskTests.java +++ b/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/ModelDownloadTaskTests.java @@ -20,14 +20,7 @@ public class ModelDownloadTaskTests extends ESTestCase { public void testStatus() { - var task = new ModelDownloadTask( - 0L, - MODEL_IMPORT_TASK_TYPE, - MODEL_IMPORT_TASK_ACTION, - downloadModelTaskDescription("foo"), - TaskId.EMPTY_TASK_ID, - Map.of() - ); + var task = testTask(); task.setProgress(100, 0); var taskInfo = task.taskInfo("node", true); @@ -39,4 +32,15 @@ public void testStatus() { status = Strings.toString(taskInfo.status()); assertThat(status, containsString("{\"total_parts\":100,\"downloaded_parts\":1}")); } + + public static ModelDownloadTask testTask() { + return new ModelDownloadTask( + 0L, + MODEL_IMPORT_TASK_TYPE, + MODEL_IMPORT_TASK_ACTION, + downloadModelTaskDescription("foo"), + TaskId.EMPTY_TASK_ID, + Map.of() + ); + } } diff --git a/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/ModelImporterTests.java b/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/ModelImporterTests.java new file mode 100644 index 0000000000000..cbcf74e69f588 --- /dev/null +++ b/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/ModelImporterTests.java @@ -0,0 +1,334 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.ml.packageloader.action; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.ActionFuture; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.LatchedActionListener; +import org.elasticsearch.action.support.ActionTestUtils; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.hash.MessageDigests; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.indices.breaker.CircuitBreakerService; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.threadpool.TestThreadPool; +import org.elasticsearch.xpack.core.ml.action.PutTrainedModelDefinitionPartAction; +import org.elasticsearch.xpack.core.ml.action.PutTrainedModelVocabularyAction; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ModelPackageConfig; +import org.elasticsearch.xpack.ml.packageloader.MachineLearningPackageLoader; +import org.junit.After; +import org.junit.Before; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicReference; + +import static org.hamcrest.Matchers.containsString; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class ModelImporterTests extends ESTestCase { + + private TestThreadPool threadPool; + + @Before + public void createThreadPool() { + threadPool = createThreadPool(MachineLearningPackageLoader.modelDownloadExecutor(Settings.EMPTY)); + } + + @After + public void closeThreadPool() { + threadPool.close(); + } + + public void testDownloadModelDefinition() throws InterruptedException, URISyntaxException { + var client = mockClient(false); + var task = ModelDownloadTaskTests.testTask(); + var config = mockConfigWithRepoLinks(); + var vocab = new ModelLoaderUtils.VocabularyParts(List.of(), List.of(), List.of()); + var cbs = mock(CircuitBreakerService.class); + when(cbs.getBreaker(eq(CircuitBreaker.REQUEST))).thenReturn(mock(CircuitBreaker.class)); + + int totalParts = 5; + int chunkSize = 10; + long size = totalParts * chunkSize; + var modelDef = modelDefinition(totalParts, chunkSize); + var streamers = mockHttpStreamChunkers(modelDef, chunkSize, 2); + + var digest = computeDigest(modelDef); + when(config.getSha256()).thenReturn(digest); + when(config.getSize()).thenReturn(size); + + var importer = new ModelImporter(client, "foo", config, task, threadPool, cbs); + + var latch = new CountDownLatch(1); + var latchedListener = new LatchedActionListener(ActionTestUtils.assertNoFailureListener(ignore -> {}), latch); + importer.downloadModelDefinition(size, totalParts, vocab, streamers, latchedListener); + + latch.await(); + verify(client, times(totalParts)).execute(eq(PutTrainedModelDefinitionPartAction.INSTANCE), any()); + assertEquals(totalParts - 1, task.getStatus().downloadProgress().downloadedParts()); + assertEquals(totalParts, task.getStatus().downloadProgress().totalParts()); + } + + public void testReadModelDefinitionFromFile() throws InterruptedException, URISyntaxException { + var client = mockClient(false); + var task = ModelDownloadTaskTests.testTask(); + var config = mockConfigWithRepoLinks(); + var vocab = new ModelLoaderUtils.VocabularyParts(List.of(), List.of(), List.of()); + var cbs = mock(CircuitBreakerService.class); + when(cbs.getBreaker(eq(CircuitBreaker.REQUEST))).thenReturn(mock(CircuitBreaker.class)); + + int totalParts = 3; + int chunkSize = 10; + long size = totalParts * chunkSize; + var modelDef = modelDefinition(totalParts, chunkSize); + + var digest = computeDigest(modelDef); + when(config.getSha256()).thenReturn(digest); + when(config.getSize()).thenReturn(size); + + var importer = new ModelImporter(client, "foo", config, task, threadPool, cbs); + var streamChunker = new ModelLoaderUtils.InputStreamChunker(new ByteArrayInputStream(modelDef), chunkSize); + + var latch = new CountDownLatch(1); + var latchedListener = new LatchedActionListener(ActionTestUtils.assertNoFailureListener(ignore -> {}), latch); + importer.readModelDefinitionFromFile(size, totalParts, streamChunker, vocab, latchedListener); + + latch.await(); + verify(client, times(totalParts)).execute(eq(PutTrainedModelDefinitionPartAction.INSTANCE), any()); + assertEquals(totalParts, task.getStatus().downloadProgress().downloadedParts()); + assertEquals(totalParts, task.getStatus().downloadProgress().totalParts()); + } + + public void testSizeMismatch() throws InterruptedException, URISyntaxException { + var client = mockClient(false); + var task = mock(ModelDownloadTask.class); + var config = mockConfigWithRepoLinks(); + var cbs = mock(CircuitBreakerService.class); + when(cbs.getBreaker(eq(CircuitBreaker.REQUEST))).thenReturn(mock(CircuitBreaker.class)); + + int totalParts = 5; + int chunkSize = 10; + long size = totalParts * chunkSize; + var modelDef = modelDefinition(totalParts, chunkSize); + var streamers = mockHttpStreamChunkers(modelDef, chunkSize, 2); + + var digest = computeDigest(modelDef); + when(config.getSha256()).thenReturn(digest); + when(config.getSize()).thenReturn(size - 1); // expected size and read size are different + + var exceptionHolder = new AtomicReference(); + + var latch = new CountDownLatch(1); + var latchedListener = new LatchedActionListener( + ActionTestUtils.assertNoSuccessListener(exceptionHolder::set), + latch + ); + + var importer = new ModelImporter(client, "foo", config, task, threadPool, cbs); + importer.downloadModelDefinition(size, totalParts, null, streamers, latchedListener); + + latch.await(); + assertThat(exceptionHolder.get().getMessage(), containsString("Model size does not match")); + verify(client, times(totalParts)).execute(eq(PutTrainedModelDefinitionPartAction.INSTANCE), any()); + } + + public void testDigestMismatch() throws InterruptedException, URISyntaxException { + var client = mockClient(false); + var task = mock(ModelDownloadTask.class); + var config = mockConfigWithRepoLinks(); + var cbs = mock(CircuitBreakerService.class); + when(cbs.getBreaker(eq(CircuitBreaker.REQUEST))).thenReturn(mock(CircuitBreaker.class)); + + int totalParts = 5; + int chunkSize = 10; + long size = totalParts * chunkSize; + var modelDef = modelDefinition(totalParts, chunkSize); + var streamers = mockHttpStreamChunkers(modelDef, chunkSize, 2); + + when(config.getSha256()).thenReturn("0x"); // digest is different + when(config.getSize()).thenReturn(size); + + var exceptionHolder = new AtomicReference(); + var latch = new CountDownLatch(1); + var latchedListener = new LatchedActionListener( + ActionTestUtils.assertNoSuccessListener(exceptionHolder::set), + latch + ); + + var importer = new ModelImporter(client, "foo", config, task, threadPool, cbs); + // Message digest can only be calculated for the file reader + var streamChunker = new ModelLoaderUtils.InputStreamChunker(new ByteArrayInputStream(modelDef), chunkSize); + importer.readModelDefinitionFromFile(size, totalParts, streamChunker, null, latchedListener); + + latch.await(); + assertThat(exceptionHolder.get().getMessage(), containsString("Model sha256 checksums do not match")); + verify(client, times(totalParts)).execute(eq(PutTrainedModelDefinitionPartAction.INSTANCE), any()); + } + + public void testPutFailure() throws InterruptedException, URISyntaxException { + var client = mockClient(true); // client will fail put + var task = mock(ModelDownloadTask.class); + var config = mockConfigWithRepoLinks(); + var cbs = mock(CircuitBreakerService.class); + when(cbs.getBreaker(eq(CircuitBreaker.REQUEST))).thenReturn(mock(CircuitBreaker.class)); + + int totalParts = 4; + int chunkSize = 10; + long size = totalParts * chunkSize; + var modelDef = modelDefinition(totalParts, chunkSize); + var streamers = mockHttpStreamChunkers(modelDef, chunkSize, 1); + + var exceptionHolder = new AtomicReference(); + var latch = new CountDownLatch(1); + var latchedListener = new LatchedActionListener( + ActionTestUtils.assertNoSuccessListener(exceptionHolder::set), + latch + ); + + var importer = new ModelImporter(client, "foo", config, task, threadPool, cbs); + importer.downloadModelDefinition(size, totalParts, null, streamers, latchedListener); + + latch.await(); + assertThat(exceptionHolder.get().getMessage(), containsString("put model part failed")); + verify(client, times(1)).execute(eq(PutTrainedModelDefinitionPartAction.INSTANCE), any()); + } + + public void testReadFailure() throws IOException, InterruptedException, URISyntaxException { + var client = mockClient(true); + var task = mock(ModelDownloadTask.class); + var config = mockConfigWithRepoLinks(); + var cbs = mock(CircuitBreakerService.class); + when(cbs.getBreaker(eq(CircuitBreaker.REQUEST))).thenReturn(mock(CircuitBreaker.class)); + + int totalParts = 4; + int chunkSize = 10; + long size = totalParts * chunkSize; + + var streamer = mock(ModelLoaderUtils.HttpStreamChunker.class); + when(streamer.hasNext()).thenReturn(true); + when(streamer.next()).thenThrow(new IOException("stream failed")); // fail the read + + var exceptionHolder = new AtomicReference(); + var latch = new CountDownLatch(1); + var latchedListener = new LatchedActionListener( + ActionTestUtils.assertNoSuccessListener(exceptionHolder::set), + latch + ); + + var importer = new ModelImporter(client, "foo", config, task, threadPool, cbs); + importer.downloadModelDefinition(size, totalParts, null, List.of(streamer), latchedListener); + + latch.await(); + assertThat(exceptionHolder.get().getMessage(), containsString("stream failed")); + } + + @SuppressWarnings("unchecked") + public void testUploadVocabFailure() throws InterruptedException, URISyntaxException { + var client = mock(Client.class); + doAnswer(invocation -> { + ActionListener listener = (ActionListener) invocation.getArguments()[2]; + listener.onFailure(new ElasticsearchStatusException("put vocab failed", RestStatus.BAD_REQUEST)); + return null; + }).when(client).execute(eq(PutTrainedModelVocabularyAction.INSTANCE), any(), any()); + var cbs = mock(CircuitBreakerService.class); + when(cbs.getBreaker(eq(CircuitBreaker.REQUEST))).thenReturn(mock(CircuitBreaker.class)); + + var task = mock(ModelDownloadTask.class); + var config = mockConfigWithRepoLinks(); + + var vocab = new ModelLoaderUtils.VocabularyParts(List.of(), List.of(), List.of()); + + var exceptionHolder = new AtomicReference(); + var latch = new CountDownLatch(1); + var latchedListener = new LatchedActionListener( + ActionTestUtils.assertNoSuccessListener(exceptionHolder::set), + latch + ); + + var importer = new ModelImporter(client, "foo", config, task, threadPool, cbs); + importer.downloadModelDefinition(100, 5, vocab, List.of(), latchedListener); + + latch.await(); + assertThat(exceptionHolder.get().getMessage(), containsString("put vocab failed")); + verify(client, times(1)).execute(eq(PutTrainedModelVocabularyAction.INSTANCE), any(), any()); + verify(client, never()).execute(eq(PutTrainedModelDefinitionPartAction.INSTANCE), any()); + } + + private List mockHttpStreamChunkers(byte[] modelDef, int chunkSize, int numStreams) { + var ranges = ModelLoaderUtils.split(modelDef.length, numStreams, chunkSize); + + var result = new ArrayList(ranges.size()); + for (var range : ranges) { + int len = range.numParts() * chunkSize; + var modelDefStream = new ByteArrayInputStream(modelDef, (int) range.rangeStart(), len); + result.add(new ModelLoaderUtils.HttpStreamChunker(modelDefStream, range, chunkSize)); + } + + return result; + } + + private byte[] modelDefinition(int totalParts, int chunkSize) { + var bytes = new byte[totalParts * chunkSize]; + for (int i = 0; i < totalParts; i++) { + System.arraycopy(randomByteArrayOfLength(chunkSize), 0, bytes, i * chunkSize, chunkSize); + } + return bytes; + } + + private String computeDigest(byte[] modelDef) { + var digest = MessageDigests.sha256(); + digest.update(modelDef); + return MessageDigests.toHexString(digest.digest()); + } + + @SuppressWarnings("unchecked") + private Client mockClient(boolean failPutPart) { + var client = mock(Client.class); + + if (failPutPart) { + when(client.execute(eq(PutTrainedModelDefinitionPartAction.INSTANCE), any())).thenThrow( + new IllegalStateException("put model part failed") + ); + } else { + ActionFuture future = mock(ActionFuture.class); + when(future.actionGet()).thenReturn(AcknowledgedResponse.TRUE); + when(client.execute(eq(PutTrainedModelDefinitionPartAction.INSTANCE), any())).thenReturn(future); + } + + doAnswer(invocation -> { + ActionListener listener = (ActionListener) invocation.getArguments()[2]; + listener.onResponse(AcknowledgedResponse.TRUE); + return null; + }).when(client).execute(eq(PutTrainedModelVocabularyAction.INSTANCE), any(), any()); + + return client; + } + + private ModelPackageConfig mockConfigWithRepoLinks() { + var config = mock(ModelPackageConfig.class); + when(config.getModelRepository()).thenReturn("https://models.models"); + when(config.getPackagedModelId()).thenReturn("my-model"); + return config; + } +} diff --git a/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/ModelLoaderUtilsTests.java b/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/ModelLoaderUtilsTests.java index 661cd12f99957..f421a7b44e7f1 100644 --- a/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/ModelLoaderUtilsTests.java +++ b/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/ModelLoaderUtilsTests.java @@ -17,6 +17,7 @@ import java.nio.charset.StandardCharsets; import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.core.Is.is; public class ModelLoaderUtilsTests extends ESTestCase { @@ -80,14 +81,13 @@ public void testSha256AndSize() throws IOException { assertEquals(64, expectedDigest.length()); int chunkSize = randomIntBetween(100, 10_000); + int totalParts = (bytes.length + chunkSize - 1) / chunkSize; ModelLoaderUtils.InputStreamChunker inputStreamChunker = new ModelLoaderUtils.InputStreamChunker( new ByteArrayInputStream(bytes), chunkSize ); - int totalParts = (bytes.length + chunkSize - 1) / chunkSize; - for (int part = 0; part < totalParts - 1; ++part) { assertEquals(chunkSize, inputStreamChunker.next().length()); } @@ -112,4 +112,40 @@ public void testParseVocabulary() throws IOException { assertThat(parsedVocab.merges(), contains("mergefoo", "mergebar", "mergebaz")); assertThat(parsedVocab.scores(), contains(1.0, 2.0, 3.0)); } + + public void testSplitIntoRanges() { + long totalSize = randomLongBetween(10_000, 50_000_000); + int numStreams = randomIntBetween(1, 10); + int chunkSize = 1024; + var ranges = ModelLoaderUtils.split(totalSize, numStreams, chunkSize); + assertThat(ranges, hasSize(numStreams + 1)); + + int expectedNumChunks = (int) ((totalSize + chunkSize - 1) / chunkSize); + assertThat(ranges.stream().mapToInt(ModelLoaderUtils.RequestRange::numParts).sum(), is(expectedNumChunks)); + + long startBytes = 0; + int startPartIndex = 0; + for (int i = 0; i < ranges.size() - 1; i++) { + assertThat(ranges.get(i).rangeStart(), is(startBytes)); + long end = startBytes + ((long) ranges.get(i).numParts() * chunkSize) - 1; + assertThat(ranges.get(i).rangeEnd(), is(end)); + long expectedNumBytesInRange = (long) chunkSize * ranges.get(i).numParts() - 1; + assertThat(ranges.get(i).rangeEnd() - ranges.get(i).rangeStart(), is(expectedNumBytesInRange)); + assertThat(ranges.get(i).startPart(), is(startPartIndex)); + + startBytes = end + 1; + startPartIndex += ranges.get(i).numParts(); + } + + var finalRange = ranges.get(ranges.size() - 1); + assertThat(finalRange.rangeStart(), is(startBytes)); + assertThat(finalRange.rangeEnd(), is(totalSize - 1)); + assertThat(finalRange.numParts(), is(1)); + } + + public void testRangeRequestBytesRange() { + long start = randomLongBetween(0, 2 << 10); + long end = randomLongBetween(start + 1, 2 << 11); + assertEquals("bytes=" + start + "-" + end, new ModelLoaderUtils.RequestRange(start, end, 0, 1).bytesRange()); + } } diff --git a/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackageTests.java b/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackageTests.java index a3f59e13f2f5b..cbcfd5b760779 100644 --- a/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackageTests.java +++ b/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackageTests.java @@ -33,7 +33,7 @@ import static org.hamcrest.core.Is.is; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -42,7 +42,7 @@ public class TransportLoadTrainedModelPackageTests extends ESTestCase { private static final String MODEL_IMPORT_FAILURE_MSG_FORMAT = "Model importing failed due to %s [%s]"; public void testSendsFinishedUploadNotification() { - var uploader = mock(ModelImporter.class); + var uploader = createUploader(null); var taskManager = mock(TaskManager.class); var task = mock(Task.class); var client = mock(Client.class); @@ -63,49 +63,49 @@ public void testSendsFinishedUploadNotification() { assertThat(notificationArg.getValue().getMessage(), CoreMatchers.containsString("finished model import after")); } - public void testSendsErrorNotificationForInternalError() throws URISyntaxException, IOException { + public void testSendsErrorNotificationForInternalError() throws Exception { ElasticsearchStatusException exception = new ElasticsearchStatusException("exception", RestStatus.INTERNAL_SERVER_ERROR); String message = format("Model importing failed due to [%s]", exception.toString()); assertUploadCallsOnFailure(exception, message, Level.ERROR); } - public void testSendsErrorNotificationForMalformedURL() throws URISyntaxException, IOException { + public void testSendsErrorNotificationForMalformedURL() throws Exception { MalformedURLException exception = new MalformedURLException("exception"); String message = format(MODEL_IMPORT_FAILURE_MSG_FORMAT, "an invalid URL", exception.toString()); - assertUploadCallsOnFailure(exception, message, RestStatus.INTERNAL_SERVER_ERROR, Level.ERROR); + assertUploadCallsOnFailure(exception, message, RestStatus.BAD_REQUEST, Level.ERROR); } - public void testSendsErrorNotificationForURISyntax() throws URISyntaxException, IOException { + public void testSendsErrorNotificationForURISyntax() throws Exception { URISyntaxException exception = mock(URISyntaxException.class); String message = format(MODEL_IMPORT_FAILURE_MSG_FORMAT, "an invalid URL syntax", exception.toString()); - assertUploadCallsOnFailure(exception, message, RestStatus.INTERNAL_SERVER_ERROR, Level.ERROR); + assertUploadCallsOnFailure(exception, message, RestStatus.BAD_REQUEST, Level.ERROR); } - public void testSendsErrorNotificationForIOException() throws URISyntaxException, IOException { + public void testSendsErrorNotificationForIOException() throws Exception { IOException exception = mock(IOException.class); String message = format(MODEL_IMPORT_FAILURE_MSG_FORMAT, "an IOException", exception.toString()); assertUploadCallsOnFailure(exception, message, RestStatus.SERVICE_UNAVAILABLE, Level.ERROR); } - public void testSendsErrorNotificationForException() throws URISyntaxException, IOException { + public void testSendsErrorNotificationForException() throws Exception { RuntimeException exception = mock(RuntimeException.class); String message = format(MODEL_IMPORT_FAILURE_MSG_FORMAT, "an Exception", exception.toString()); assertUploadCallsOnFailure(exception, message, RestStatus.INTERNAL_SERVER_ERROR, Level.ERROR); } - public void testSendsWarningNotificationForTaskCancelledException() throws URISyntaxException, IOException { + public void testSendsWarningNotificationForTaskCancelledException() throws Exception { TaskCancelledException exception = new TaskCancelledException("cancelled"); String message = format("Model importing failed due to [%s]", exception.toString()); assertUploadCallsOnFailure(exception, message, Level.WARNING); } - public void testCallsOnResponseWithAcknowledgedResponse() throws URISyntaxException, IOException { + public void testCallsOnResponseWithAcknowledgedResponse() throws Exception { var client = mock(Client.class); var taskManager = mock(TaskManager.class); var task = mock(Task.class); @@ -134,15 +134,13 @@ public void testDoesNotCallListenerWhenNotWaitingForCompletion() { ); } - private void assertUploadCallsOnFailure(Exception exception, String message, RestStatus status, Level level) throws URISyntaxException, - IOException { + private void assertUploadCallsOnFailure(Exception exception, String message, RestStatus status, Level level) throws Exception { var esStatusException = new ElasticsearchStatusException(message, status, exception); assertNotificationAndOnFailure(exception, esStatusException, message, level); } - private void assertUploadCallsOnFailure(ElasticsearchException exception, String message, Level level) throws URISyntaxException, - IOException { + private void assertUploadCallsOnFailure(ElasticsearchException exception, String message, Level level) throws Exception { assertNotificationAndOnFailure(exception, exception, message, level); } @@ -151,7 +149,7 @@ private void assertNotificationAndOnFailure( ElasticsearchException onFailureException, String message, Level level - ) throws URISyntaxException, IOException { + ) throws Exception { var client = mock(Client.class); var taskManager = mock(TaskManager.class); var task = mock(Task.class); @@ -179,11 +177,18 @@ private void assertNotificationAndOnFailure( verify(taskManager).unregister(task); } - private ModelImporter createUploader(Exception exception) throws URISyntaxException, IOException { + @SuppressWarnings("unchecked") + private ModelImporter createUploader(Exception exception) { ModelImporter uploader = mock(ModelImporter.class); - if (exception != null) { - doThrow(exception).when(uploader).doImport(); - } + doAnswer(invocation -> { + ActionListener listener = (ActionListener) invocation.getArguments()[0]; + if (exception != null) { + listener.onFailure(exception); + } else { + listener.onResponse(AcknowledgedResponse.TRUE); + } + return null; + }).when(uploader).doImport(any(ActionListener.class)); return uploader; } From fd775317ed6e26f3310234447921880f5139d233 Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Wed, 25 Sep 2024 17:55:19 +0200 Subject: [PATCH 49/58] [DOCS] Create Elasticsearch basics section, refactor quickstarts section (#112436) (#113543) Co-authored-by: shainaraskas <58563081+shainaraskas@users.noreply.github.com> --- docs/reference/index.asciidoc | 4 +- docs/reference/intro.asciidoc | 368 ++++++++++++------ docs/reference/landing-page.asciidoc | 2 +- .../quickstart/getting-started.asciidoc | 87 +---- docs/reference/quickstart/index.asciidoc | 31 +- .../run-elasticsearch-locally.asciidoc | 34 +- docs/reference/setup.asciidoc | 2 + docs/reference/tab-widgets/api-call.asciidoc | 8 +- 8 files changed, 323 insertions(+), 213 deletions(-) rename docs/reference/{quickstart => }/run-elasticsearch-locally.asciidoc (68%) diff --git a/docs/reference/index.asciidoc b/docs/reference/index.asciidoc index 79b5f2b69f24d..24dbee8c2983b 100644 --- a/docs/reference/index.asciidoc +++ b/docs/reference/index.asciidoc @@ -6,10 +6,10 @@ include::links.asciidoc[] include::landing-page.asciidoc[] -include::intro.asciidoc[] - include::release-notes/highlights.asciidoc[] +include::intro.asciidoc[] + include::quickstart/index.asciidoc[] include::setup.asciidoc[] diff --git a/docs/reference/intro.asciidoc b/docs/reference/intro.asciidoc index f80856368af2b..831888103c5c1 100644 --- a/docs/reference/intro.asciidoc +++ b/docs/reference/intro.asciidoc @@ -1,68 +1,98 @@ [[elasticsearch-intro]] -== What is {es}? +== {es} basics + +This guide covers the core concepts you need to understand to get started with {es}. +If you'd prefer to start working with {es} right away, set up a <> and jump to <>. + +This guide covers the following topics: + +* <>: Learn about {es} and some of its main use cases. +* <>: Understand your options for deploying {es} in different environments, including a fast local development setup. +* <>: Understand {es}'s most important primitives and how it stores data. +* <>: Understand your options for ingesting data into {es}. +* <>: Understand your options for searching and analyzing data in {es}. +* <>: Understand the basic concepts required for moving your {es} deployment to production. + +[[elasticsearch-intro-what-is-es]] +=== What is {es}? {es-repo}[{es}] is a distributed search and analytics engine, scalable data store, and vector database built on Apache Lucene. It's optimized for speed and relevance on production-scale workloads. Use {es} to search, index, store, and analyze data of all shapes and sizes in near real time. +{es} is the heart of the {estc-welcome-current}/stack-components.html[Elastic Stack]. +Combined with https://www.elastic.co/kibana[{kib}], it powers the following Elastic solutions: + +* https://www.elastic.co/observability[Observability] +* https://www.elastic.co/enterprise-search[Search] +* https://www.elastic.co/security[Security] + [TIP] ==== {es} has a lot of features. Explore the full list on the https://www.elastic.co/elasticsearch/features[product webpage^]. ==== -{es} is the heart of the {estc-welcome-current}/stack-components.html[Elastic Stack] and powers the Elastic https://www.elastic.co/enterprise-search[Search], https://www.elastic.co/observability[Observability] and https://www.elastic.co/security[Security] solutions. - -{es} is used for a wide and growing range of use cases. Here are a few examples: - -* *Monitor log and event data*: Store logs, metrics, and event data for observability and security information and event management (SIEM). -* *Build search applications*: Add search capabilities to apps or websites, or build search engines over internal data. -* *Vector database*: Store and search vectorized data, and create vector embeddings with built-in and third-party natural language processing (NLP) models. -* *Retrieval augmented generation (RAG)*: Use {es} as a retrieval engine to augment generative AI models. -* *Application and security monitoring*: Monitor and analyze application performance and security data. -* *Machine learning*: Use {ml} to automatically model the behavior of your data in real-time. - -This is just a sample of search, observability, and security use cases enabled by {es}. -Refer to our https://www.elastic.co/customers/success-stories[customer success stories] for concrete examples across a range of industries. -// Link to demos, search labs chatbots - [discrete] [[elasticsearch-intro-elastic-stack]] .What is the Elastic Stack? ******************************* {es} is the core component of the Elastic Stack, a suite of products for collecting, storing, searching, and visualizing data. -https://www.elastic.co/guide/en/starting-with-the-elasticsearch-platform-and-its-solutions/current/stack-components.html[Learn more about the Elastic Stack]. +{estc-welcome-current}/stack-components.html[Learn more about the Elastic Stack]. ******************************* -// TODO: Remove once we've moved Stack Overview to a subpage? [discrete] +[[elasticsearch-intro-use-cases]] +==== Use cases + +{es} is used for a wide and growing range of use cases. Here are a few examples: + +**Observability** + +* *Logs, metrics, and traces*: Collect, store, and analyze logs, metrics, and traces from applications, systems, and services. +* *Application performance monitoring (APM)*: Monitor and analyze the performance of business-critical software applications. +* *Real user monitoring (RUM)*: Monitor, quantify, and analyze user interactions with web applications. +* *OpenTelemetry*: Reuse your existing instrumentation to send telemetry data to the Elastic Stack using the OpenTelemetry standard. + +**Search** + +* *Full-text search*: Build a fast, relevant full-text search solution using inverted indexes, tokenization, and text analysis. +* *Vector database*: Store and search vectorized data, and create vector embeddings with built-in and third-party natural language processing (NLP) models. +* *Semantic search*: Understand the intent and contextual meaning behind search queries using tools like synonyms, dense vector embeddings, and learned sparse query-document expansion. +* *Hybrid search*: Combine full-text search with vector search using state-of-the-art ranking algorithms. +* *Build search experiences*: Add hybrid search capabilities to apps or websites, or build enterprise search engines over your organization's internal data sources. +* *Retrieval augmented generation (RAG)*: Use {es} as a retrieval engine to supplement generative AI models with more relevant, up-to-date, or proprietary data for a range of use cases. +* *Geospatial search*: Search for locations and calculate spatial relationships using geospatial queries. + +**Security** + +* *Security information and event management (SIEM)*: Collect, store, and analyze security data from applications, systems, and services. +* *Endpoint security*: Monitor and analyze endpoint security data. +* *Threat hunting*: Search and analyze data to detect and respond to security threats. + +This is just a sample of search, observability, and security use cases enabled by {es}. +Refer to Elastic https://www.elastic.co/customers/success-stories[customer success stories] for concrete examples across a range of industries. + [[elasticsearch-intro-deploy]] -=== Deployment options +=== Run {es} To use {es}, you need a running instance of the {es} service. -You can deploy {es} in various ways: +You can deploy {es} in various ways. -* <>: Get started quickly with a minimal local Docker setup. -* {cloud}/ec-getting-started-trial.html[*Elastic Cloud*]: {es} is available as part of our hosted Elastic Stack offering, deployed in the cloud with your provider of choice. Sign up for a https://cloud.elastic.co/registration[14-day free trial]. +**Quick start option** + +* <>: Get started quickly with a minimal local Docker setup for development and testing. + +**Hosted options** + +* {cloud}/ec-getting-started-trial.html[*Elastic Cloud Hosted*]: {es} is available as part of the hosted Elastic Stack offering, deployed in the cloud with your provider of choice. Sign up for a https://cloud.elastic.co/registration[14-day free trial]. * {serverless-docs}/general/sign-up-trial[*Elastic Cloud Serverless* (technical preview)]: Create serverless projects for autoscaled and fully managed {es} deployments. Sign up for a https://cloud.elastic.co/serverless-registration[14-day free trial]. -**Advanced deployment options** +**Advanced options** * <>: Install, configure, and run {es} on your own premises. * {ece-ref}/Elastic-Cloud-Enterprise-overview.html[*Elastic Cloud Enterprise*]: Deploy Elastic Cloud on public or private clouds, virtual machines, or your own premises. * {eck-ref}/k8s-overview.html[*Elastic Cloud on Kubernetes*]: Deploy Elastic Cloud on Kubernetes. -[discrete] -[[elasticsearch-next-steps]] -=== Learn more - -Here are some resources to help you get started: - -* <>: A beginner's guide to deploying your first {es} instance, indexing data, and running queries. -* https://elastic.co/webinars/getting-started-elasticsearch[Webinar: Introduction to {es}]: Register for our live webinars to learn directly from {es} experts. -* https://www.elastic.co/search-labs[Elastic Search Labs]: Tutorials and blogs that explore AI-powered search using the latest {es} features. -** Follow our tutorial https://www.elastic.co/search-labs/tutorials/search-tutorial/welcome[to build a hybrid search solution in Python]. -** Check out the https://github.com/elastic/elasticsearch-labs?tab=readme-ov-file#elasticsearch-examples--apps[`elasticsearch-labs` repository] for a range of Python notebooks and apps for various use cases. - // new html page [[documents-indices]] === Indices, documents, and fields @@ -73,20 +103,16 @@ Here are some resources to help you get started: The index is the fundamental unit of storage in {es}, a logical namespace for storing data that share similar characteristics. After you have {es} <>, you'll get started by creating an index to store your data. +An index is a collection of documents uniquely identified by a name or an <>. +This unique name is important because it's used to target the index in search queries and other operations. + [TIP] ==== A closely related concept is a <>. -This index abstraction is optimized for append-only time-series data, and is made up of hidden, auto-generated backing indices. -If you're working with time-series data, we recommend the {observability-guide}[Elastic Observability] solution. +This index abstraction is optimized for append-only timestamped data, and is made up of hidden, auto-generated backing indices. +If you're working with timestamped data, we recommend the {observability-guide}[Elastic Observability] solution for additional tools and optimized content. ==== -Some key facts about indices: - -* An index is a collection of documents -* An index has a unique name -* An index can also be referred to by an alias -* An index has a mapping that defines the schema of its documents - [discrete] [[elasticsearch-intro-documents-fields]] ==== Documents and fields @@ -126,14 +152,12 @@ A simple {es} document might look like this: [discrete] [[elasticsearch-intro-documents-fields-data-metadata]] -==== Data and metadata +==== Metadata fields -An indexed document contains data and metadata. +An indexed document contains data and metadata. <> are system fields that store information about the documents. In {es}, metadata fields are prefixed with an underscore. +For example, the following fields are metadata fields: -The most important metadata fields are: - -* `_source`: Contains the original JSON document. * `_index`: The name of the index where the document is stored. * `_id`: The document's ID. IDs must be unique per index. @@ -146,8 +170,8 @@ A mapping defines the <> for each field, how the field and how it should be stored. When adding documents to {es}, you have two options for mappings: -* <>: Let {es} automatically detect the data types and create the mappings for you. This is great for getting started quickly, but can lead to unexpected results for complex data. -* <>: Define the mappings up front by specifying data types for each field. Recommended for production use cases, because you have much more control over how your data is indexed. +* <>: Let {es} automatically detect the data types and create the mappings for you. Dynamic mapping helps you get started quickly, but might yield suboptimal results for your specific use case due to automatic field type inference. +* <>: Define the mappings up front by specifying data types for each field. Recommended for production use cases, because you have full control over how your data is indexed to suit your specific use case. [TIP] ==== @@ -155,81 +179,207 @@ You can use a combination of dynamic and explicit mapping on the same index. This is useful when you have a mix of known and unknown fields in your data. ==== +// New html page +[[es-ingestion-overview]] +=== Add data to {es} + +There are multiple ways to ingest data into {es}. +The option that you choose depends on whether you're working with timestamped data or non-timestamped data, where the data is coming from, its complexity, and more. + +[TIP] +==== +You can load {kibana-ref}/connect-to-elasticsearch.html#_add_sample_data[sample data] into your {es} cluster using {kib}, to get started quickly. +==== + +[discrete] +[[es-ingestion-overview-general-content]] +==== General content + +General content is data that does not have a timestamp. +This could be data like vector embeddings, website content, product catalogs, and more. +For general content, you have the following options for adding data to {es} indices: + +* <>: Use the {es} <> to index documents directly, using the Dev Tools {kibana-ref}/console-kibana.html[Console], or cURL. ++ +If you're building a website or app, then you can call Elasticsearch APIs using an https://www.elastic.co/guide/en/elasticsearch/client/index.html[{es} client] in the programming language of your choice. If you use the Python client, then check out the `elasticsearch-labs` repo for various https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/search/python-examples[example notebooks]. +* {kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[File upload]: Use the {kib} file uploader to index single files for one-off testing and exploration. The GUI guides you through setting up your index and field mappings. +* https://github.com/elastic/crawler[Web crawler]: Extract and index web page content into {es} documents. +* {enterprise-search-ref}/connectors.html[Connectors]: Sync data from various third-party data sources to create searchable, read-only replicas in {es}. + +[discrete] +[[es-ingestion-overview-timestamped]] +==== Timestamped data + +Timestamped data in {es} refers to datasets that include a timestamp field. If you use the {ecs-ref}/ecs-reference.html[Elastic Common Schema (ECS)], this field is named `@timestamp`. +This could be data like logs, metrics, and traces. + +For timestamped data, you have the following options for adding data to {es} data streams: + +* {fleet-guide}/fleet-overview.html[Elastic Agent and Fleet]: The preferred way to index timestamped data. Each Elastic Agent based integration includes default ingestion rules, dashboards, and visualizations to start analyzing your data right away. +You can use the Fleet UI in {kib} to centrally manage Elastic Agents and their policies. +* {beats-ref}/beats-reference.html[Beats]: If your data source isn't supported by Elastic Agent, use Beats to collect and ship data to Elasticsearch. You install a separate Beat for each type of data to collect. +* {logstash-ref}/introduction.html[Logstash]: Logstash is an open source data collection engine with real-time pipelining capabilities that supports a wide variety of data sources. You might use this option because neither Elastic Agent nor Beats supports your data source. You can also use Logstash to persist incoming data, or if you need to send the data to multiple destinations. +* {cloud}/ec-ingest-guides.html[Language clients]: The linked tutorials demonstrate how to use {es} programming language clients to ingest data from an application. In these examples, {es} is running on Elastic Cloud, but the same principles apply to any {es} deployment. + +[TIP] +==== +If you're interested in data ingestion pipelines for timestamped data, use the decision tree in the {cloud}/ec-cloud-ingest-data.html#ec-data-ingest-pipeline[Elastic Cloud docs] to understand your options. +==== + // New html page [[search-analyze]] -=== Search and analyze +=== Search and analyze data -While you can use {es} as a document store and retrieve documents and their -metadata, the real power comes from being able to easily access the full suite -of search capabilities built on the Apache Lucene search engine library. +You can use {es} as a basic document store to retrieve documents and their +metadata. +However, the real power of {es} comes from its advanced search and analytics capabilities. -{es} provides a simple, coherent REST API for managing your cluster and indexing -and searching your data. For testing purposes, you can easily submit requests -directly from the command line or through the Developer Console in {kib}. From -your applications, you can use the -https://www.elastic.co/guide/en/elasticsearch/client/index.html[{es} client] -for your language of choice: Java, JavaScript, Go, .NET, PHP, Perl, Python -or Ruby. +You'll use a combination of an API endpoint and a query language to interact with your data. [discrete] -[[search-data]] -==== Searching your data - -The {es} REST APIs support structured queries, full text queries, and complex -queries that combine the two. Structured queries are -similar to the types of queries you can construct in SQL. For example, you -could search the `gender` and `age` fields in your `employee` index and sort the -matches by the `hire_date` field. Full-text queries find all documents that -match the query string and return them sorted by _relevance_—how good a -match they are for your search terms. - -In addition to searching for individual terms, you can perform phrase searches, -similarity searches, and prefix searches, and get autocomplete suggestions. - -Have geospatial or other numerical data that you want to search? {es} indexes -non-textual data in optimized data structures that support -high-performance geo and numerical queries. - -You can access all of these search capabilities using {es}'s -comprehensive JSON-style query language (<>). You can also -construct <> to search and aggregate data -natively inside {es}, and JDBC and ODBC drivers enable a broad range of -third-party applications to interact with {es} via SQL. +[[search-analyze-rest-api]] +==== REST API + +Use REST APIs to manage your {es} cluster, and to index +and search your data. +For testing purposes, you can submit requests +directly from the command line or through the Dev Tools {kibana-ref}/console-kibana.html[Console] in {kib}. +From your applications, you can use a +https://www.elastic.co/guide/en/elasticsearch/client/index.html[client] +in your programming language of choice. + +Refer to <> for a hands-on example of using the `_search` endpoint, adding data to {es}, and running basic searches in Query DSL syntax. [discrete] -[[analyze-data]] -==== Analyzing your data +[[search-analyze-query-languages]] +==== Query languages + +{es} provides a number of query languages for interacting with your data. + +*Query DSL* is the primary query language for {es} today. + +*{esql}* is a new piped query language and compute engine which was first added in version *8.11*. + +{esql} does not yet support all the features of Query DSL, like full-text search and semantic search. +Look forward to new {esql} features and functionalities in each release. + +Refer to <> for a full overview of the query languages available in {es}. + +[discrete] +[[search-analyze-query-dsl]] +===== Query DSL + +<> is a full-featured JSON-style query language that enables complex searching, filtering, and aggregations. +It is the original and most powerful query language for {es} today. + +The <> accepts queries written in Query DSL syntax. + +[discrete] +[[search-analyze-query-dsl-search-filter]] +====== Search and filter with Query DSL + +Query DSL support a wide range of search techniques, including the following: + +* <>: Search text that has been analyzed and indexed to support phrase or proximity queries, fuzzy matches, and more. +* <>: Search for exact matches using `keyword` fields. +* <>: Search `semantic_text` fields using dense or sparse vector search on embeddings generated in your {es} cluster. +* <>: Search for similar dense vectors using the kNN algorithm for embeddings generated outside of {es}. +* <>: Search for locations and calculate spatial relationships using geospatial queries. -{es} aggregations enable you to build complex summaries of your data and gain -insight into key metrics, patterns, and trends. Instead of just finding the -proverbial “needle in a haystack”, aggregations enable you to answer questions -like: +Learn about the full range of queries supported by <>. -* How many needles are in the haystack? -* What is the average length of the needles? -* What is the median length of the needles, broken down by manufacturer? -* How many needles were added to the haystack in each of the last six months? +You can also filter data using Query DSL. +Filters enable you to include or exclude documents by retrieving documents that match specific field-level criteria. +A query that uses the `filter` parameter indicates <>. -You can also use aggregations to answer more subtle questions, such as: +[discrete] +[[search-analyze-data-query-dsl]] +====== Analyze with Query DSL -* What are your most popular needle manufacturers? -* Are there any unusual or anomalous clumps of needles? +<> are the primary tool for analyzing {es} data using Query DSL. +Aggregrations enable you to build complex summaries of your data and gain +insight into key metrics, patterns, and trends. -Because aggregations leverage the same data-structures used for search, they are +Because aggregations leverage the same data structures used for search, they are also very fast. This enables you to analyze and visualize your data in real time. -Your reports and dashboards update as your data changes so you can take action -based on the latest information. +You can search documents, filter results, and perform analytics at the same time, on the same +data, in a single request. +That means aggregations are calculated in the context of the search query. + +The folowing aggregation types are available: + +* <>: Calculate metrics, +such as a sum or average, from field values. +* <>: Group documents into buckets based on field values, ranges, +or other criteria. +* <>: Run aggregations on the results of other aggregations. + +Run aggregations by specifying the <>'s `aggs` parameter. +Learn more in <>. + +[discrete] +[[search-analyze-data-esql]] +===== {esql} -What’s more, aggregations operate alongside search requests. You can search -documents, filter results, and perform analytics at the same time, on the same -data, in a single request. And because aggregations are calculated in the -context of a particular search, you’re not just displaying a count of all -size 70 needles, you’re displaying a count of the size 70 needles -that match your users' search criteria--for example, all size 70 _non-stick -embroidery_ needles. +<> is a piped query language for filtering, transforming, and analyzing data. +{esql} is built on top of a new compute engine, where search, aggregation, and transformation functions are +directly executed within {es} itself. +{esql} syntax can also be used within various {kib} tools. + +The <> accepts queries written in {esql} syntax. + +Today, it supports a subset of the features available in Query DSL, like aggregations, filters, and transformations. +It does not yet support full-text search or semantic search. + +It comes with a comprehensive set of <> for working with data and has robust integration with {kib}'s Discover, dashboards and visualizations. + +Learn more in <>, or try https://www.elastic.co/training/introduction-to-esql[our training course]. + +[discrete] +[[search-analyze-data-query-languages-table]] +==== List of available query languages +The following table summarizes all available {es} query languages, to help you choose the right one for your use case. + +[cols="1,2,2,1", options="header"] +|=== +| Name | Description | Use cases | API endpoint + +| <> +| The primary query language for {es}. A powerful and flexible JSON-style language that enables complex queries. +| Full-text search, semantic search, keyword search, filtering, aggregations, and more. +| <> + + +| <> +| Introduced in *8.11*, the Elasticsearch Query Language ({esql}) is a piped query language language for filtering, transforming, and analyzing data. +| Initially tailored towards working with time series data like logs and metrics. +Robust integration with {kib} for querying, visualizing, and analyzing data. +Does not yet support full-text search. +| <> + + +| <> +| Event Query Language (EQL) is a query language for event-based time series data. Data must contain the `@timestamp` field to use EQL. +| Designed for the threat hunting security use case. +| <> + +| <> +| Allows native, real-time SQL-like querying against {es} data. JDBC and ODBC drivers are available for integration with business intelligence (BI) tools. +| Enables users familiar with SQL to query {es} data using familiar syntax for BI and reporting. +| <> + +| {kibana-ref}/kuery-query.html[Kibana Query Language (KQL)] +| Kibana Query Language (KQL) is a text-based query language for filtering data when you access it through the {kib} UI. +| Use KQL to filter documents where a value for a field exists, matches a given value, or is within a given range. +| N/A + +|=== + +// New html page +// TODO: this page won't live here long term [[scalability]] -=== Scalability and resilience +=== Plan for production {es} is built to be always available and to scale with your needs. It does this by being distributed by nature. You can add servers (nodes) to a cluster to diff --git a/docs/reference/landing-page.asciidoc b/docs/reference/landing-page.asciidoc index e781dc0aff4e3..f1b5ce8210996 100644 --- a/docs/reference/landing-page.asciidoc +++ b/docs/reference/landing-page.asciidoc @@ -62,7 +62,7 @@ Elasticsearch is the search and analytics engine that powers the Elastic Stack.

- +

diff --git a/docs/reference/quickstart/getting-started.asciidoc b/docs/reference/quickstart/getting-started.asciidoc index 6b3095e07f9d4..e674dda147bcc 100644 --- a/docs/reference/quickstart/getting-started.asciidoc +++ b/docs/reference/quickstart/getting-started.asciidoc @@ -1,47 +1,20 @@ [[getting-started]] -== Quick start guide +== Quick start: Add data using Elasticsearch APIs +++++ +Basics: Add data using APIs +++++ -This guide helps you learn how to: +In this quick start guide, you'll learn how to do the following tasks: -* Run {es} and {kib} (using {ecloud} or in a local Docker dev environment), -* add simple (non-timestamped) dataset to {es}, -* run basic searches. - -[TIP] -==== -If you're interested in using {es} with Python, check out Elastic Search Labs. This is the best place to explore AI-powered search use cases, such as working with embeddings, vector search, and retrieval augmented generation (RAG). - -* https://www.elastic.co/search-labs/tutorials/search-tutorial/welcome[Tutorial]: this walks you through building a complete search solution with {es}, from the ground up. -* https://github.com/elastic/elasticsearch-labs[`elasticsearch-labs` repository]: it contains a range of Python https://github.com/elastic/elasticsearch-labs/tree/main/notebooks[notebooks] and https://github.com/elastic/elasticsearch-labs/tree/main/example-apps[example apps]. -==== - -[discrete] -[[run-elasticsearch]] -=== Run {es} - -The simplest way to set up {es} is to create a managed deployment with {ess} on -{ecloud}. If you prefer to manage your own test environment, install and -run {es} using Docker. - -include::{es-ref-dir}/tab-widgets/code.asciidoc[] -include::{es-ref-dir}/tab-widgets/quick-start-install-widget.asciidoc[] - -[discrete] -[[send-requests-to-elasticsearch]] -=== Send requests to {es} - -You send data and other requests to {es} using REST APIs. This lets you interact -with {es} using any client that sends HTTP requests, such as -https://curl.se[curl]. You can also use {kib}'s Console to send requests to -{es}. - -include::{es-ref-dir}/tab-widgets/api-call-widget.asciidoc[] +* Add a small, non-timestamped dataset to {es} using Elasticsearch REST APIs. +* Run basic searches. [discrete] [[add-data]] === Add data -You add data to {es} as JSON objects called documents. {es} stores these +You add data to {es} as JSON objects called documents. +{es} stores these documents in searchable indices. [discrete] @@ -58,6 +31,13 @@ The request automatically creates the index. PUT books ---- // TESTSETUP + +[source,console] +-------------------------------------------------- +DELETE books +-------------------------------------------------- +// TEARDOWN + //// [source,console] @@ -236,10 +216,11 @@ JSON object submitted during indexing. [[qs-match-query]] ==== `match` query -You can use the `match` query to search for documents that contain a specific value in a specific field. +You can use the <> to search for documents that contain a specific value in a specific field. This is the standard query for performing full-text search, including fuzzy matching and phrase searches. Run the following command to search the `books` index for documents containing `brave` in the `name` field: + [source,console] ---- GET books/_search @@ -251,34 +232,4 @@ GET books/_search } } ---- -// TEST[continued] - -[discrete] -[[whats-next]] -=== Next steps - -Now that {es} is up and running and you've learned the basics, you'll probably want to test out larger datasets, or index your own data. - -[discrete] -[[whats-next-search-learn-more]] -==== Learn more about search queries - -* <>. Jump here to learn about exact value search, full-text search, vector search, and more, using the <>. - -[discrete] -[[whats-next-more-data]] -==== Add more data - -* Learn how to {kibana-ref}/sample-data.html[install sample data] using {kib}. This is a quick way to test out {es} on larger workloads. -* Learn how to use the {kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[upload data UI] in {kib} to add your own CSV, TSV, or JSON files. -* Use the https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html[bulk API] to ingest your own datasets to {es}. - -[discrete] -[[whats-next-client-libraries]] -==== {es} programming language clients - -* Check out our https://www.elastic.co/guide/en/elasticsearch/client/index.html[client library] to work with your {es} instance in your preferred programming language. -* If you're using Python, check out https://www.elastic.co/search-labs[Elastic Search Labs] for a range of examples that use the {es} Python client. This is the best place to explore AI-powered search use cases, such as working with embeddings, vector search, and retrieval augmented generation (RAG). -** This extensive, hands-on https://www.elastic.co/search-labs/tutorials/search-tutorial/welcome[tutorial] -walks you through building a complete search solution with {es}, from the ground up. -** https://github.com/elastic/elasticsearch-labs[`elasticsearch-labs`] contains a range of executable Python https://github.com/elastic/elasticsearch-labs/tree/main/notebooks[notebooks] and https://github.com/elastic/elasticsearch-labs/tree/main/example-apps[example apps]. \ No newline at end of file +// TEST[continued] \ No newline at end of file diff --git a/docs/reference/quickstart/index.asciidoc b/docs/reference/quickstart/index.asciidoc index e517d039e620b..6bfed4c198c75 100644 --- a/docs/reference/quickstart/index.asciidoc +++ b/docs/reference/quickstart/index.asciidoc @@ -1,10 +1,29 @@ [[quickstart]] -= Quickstart += Quick starts -Get started quickly with {es}. +Use these quick starts to get hands-on experience with the {es} APIs. +Unless otherwise noted, these examples will use queries written in <> syntax. -* Learn how to run {es} (and {kib}) for <>. -* Follow our <> to add data to {es} and query it. +[discrete] +[[quickstart-requirements]] +== Requirements -include::run-elasticsearch-locally.asciidoc[] -include::getting-started.asciidoc[] +You'll need a running {es} cluster, together with {kib} to use the Dev Tools API Console. +Get started <> , or see our <>. + +[discrete] +[[quickstart-list]] +== Hands-on quick starts + +* <>. Learn how to add data to {es} and perform basic searches. + +[discrete] +[[quickstart-python-links]] +== Working in Python + +If you're interested in using {es} with Python, check out Elastic Search Labs: + +* https://github.com/elastic/elasticsearch-labs[`elasticsearch-labs` repository]: Contains a range of Python https://github.com/elastic/elasticsearch-labs/tree/main/notebooks[notebooks] and https://github.com/elastic/elasticsearch-labs/tree/main/example-apps[example apps]. +* https://www.elastic.co/search-labs/tutorials/search-tutorial/welcome[Tutorial]: This walks you through building a complete search solution with {es} from the ground up using Flask. + +include::getting-started.asciidoc[] \ No newline at end of file diff --git a/docs/reference/quickstart/run-elasticsearch-locally.asciidoc b/docs/reference/run-elasticsearch-locally.asciidoc similarity index 68% rename from docs/reference/quickstart/run-elasticsearch-locally.asciidoc rename to docs/reference/run-elasticsearch-locally.asciidoc index 24e0f3f22350e..64bcd3d066529 100644 --- a/docs/reference/quickstart/run-elasticsearch-locally.asciidoc +++ b/docs/reference/run-elasticsearch-locally.asciidoc @@ -1,7 +1,7 @@ [[run-elasticsearch-locally]] -== Run {es} locally in Docker (without security) +== Run {es} locally in Docker ++++ -Local dev setup (Docker) +Run {es} locally ++++ [WARNING] @@ -9,24 +9,13 @@ *DO NOT USE THESE INSTRUCTIONS FOR PRODUCTION DEPLOYMENTS* The instructions on this page are for *local development only*. Do not use these instructions for production deployments, because they are not secure. -While this approach is convenient for experimenting and learning, you should never run the service in this way in a production environment. +While this approach is convenient for experimenting and learning, you should never run Elasticsearch in this way in a production environment. ==== -The following commands help you very quickly spin up a single-node {es} cluster, together with {kib} in Docker. -Note that if you don't need the {kib} UI, you can skip those instructions. +Follow this tutorial if you want to quickly set up {es} in Docker for local development or testing. -[discrete] -[[local-dev-why]] -=== When would I use this setup? - -Use this setup if you want to quickly spin up {es} (and {kib}) for local development or testing. - -For example you might: - -* Want to run a quick test to see how a feature works. -* Follow a tutorial or guide that requires an {es} cluster, like our <>. -* Experiment with the {es} APIs using different tools, like the Dev Tools Console, cURL, or an Elastic programming language client. -* Quickly spin up an {es} cluster to test an executable https://github.com/elastic/elasticsearch-labs/tree/main/notebooks#readme[Python notebook] locally. +This tutorial also includes instructions for installing {kib}. + If you don't need access to the {kib} UI, then you can skip those instructions. [discrete] [[local-dev-prerequisites]] @@ -118,12 +107,12 @@ When you access {kib}, use `elastic` as the username and the password you set ea [NOTE] ==== -The service is started with a trial license. The trial license enables all features of Elasticsearch for a trial period of 30 days. After the trial period expires, the license is downgraded to a basic license, which is free forever. If you prefer to skip the trial and use the basic license, set the value of the `xpack.license.self_generated.type` variable to basic instead. For a detailed feature comparison between the different licenses, refer to our https://www.elastic.co/subscriptions[subscriptions page]. +The service is started with a trial license. The trial license enables all features of Elasticsearch for a trial period of 30 days. After the trial period expires, the license is downgraded to a basic license, which is free forever. ==== [discrete] [[local-dev-connecting-clients]] -== Connecting to {es} with language clients +=== Connect to {es} with language clients To connect to the {es} cluster from a language client, you can use basic authentication with the `elastic` username and the password you set in the environment variable. @@ -172,12 +161,11 @@ curl -u elastic:$ELASTIC_PASSWORD \ [[local-dev-next-steps]] === Next steps -Use our <> to learn the basics of {es}: how to add data and query it. +Use our <> to learn the basics of {es}. [discrete] [[local-dev-production]] === Moving to production -This setup is not suitable for production use. For production deployments, we recommend using our managed service on Elastic Cloud. https://cloud.elastic.co/registration[Sign up for a free trial] (no credit card required). - -Otherwise, refer to https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html[Install {es}] to learn about the various options for installing {es} in a self-managed production environment, including using Docker. +This setup is not suitable for production use. +Refer to <> for more information. \ No newline at end of file diff --git a/docs/reference/setup.asciidoc b/docs/reference/setup.asciidoc index b346fddc5e5a1..a284e563917c3 100644 --- a/docs/reference/setup.asciidoc +++ b/docs/reference/setup.asciidoc @@ -27,6 +27,8 @@ the only resource-intensive application on the host or container. For example, you might run {metricbeat} alongside {es} for cluster statistics, but a resource-heavy {ls} deployment should be on its own host. +include::run-elasticsearch-locally.asciidoc[] + include::setup/install.asciidoc[] include::setup/configuration.asciidoc[] diff --git a/docs/reference/tab-widgets/api-call.asciidoc b/docs/reference/tab-widgets/api-call.asciidoc index bb6b89374075d..5e70d73684436 100644 --- a/docs/reference/tab-widgets/api-call.asciidoc +++ b/docs/reference/tab-widgets/api-call.asciidoc @@ -1,5 +1,5 @@ // tag::cloud[] -**Use {kib}** +**Option 1: Use {kib}** //tag::kibana-api-ex[] . Open {kib}'s main menu ("*☰*" near Elastic logo) and go to **Dev Tools > Console**. @@ -16,9 +16,9 @@ GET / //end::kibana-api-ex[] -**Use curl** +**Option 2: Use `curl`** -To communicate with {es} using curl or another client, you need your cluster's +To communicate with {es} using `curl` or another client, you need your cluster's endpoint. . Open {kib}'s main menu and click **Manage this deployment**. @@ -26,7 +26,7 @@ endpoint. . From your deployment menu, go to the **Elasticsearch** page. Click **Copy endpoint**. -. To submit an example API request, run the following curl command in a new +. To submit an example API request, run the following `curl` command in a new terminal session. Replace `` with the password for the `elastic` user. Replace `` with your endpoint. + From e4a174d6a814fd1d00e9115878b410ef6461154c Mon Sep 17 00:00:00 2001 From: Dan Rubinstein Date: Wed, 25 Sep 2024 14:23:50 -0400 Subject: [PATCH 50/58] Inference endpoint validation for OpenAIService (#113137) (#113546) * Adding service integration and model validators for inference services. * Adding ModelValidators to OpenAiService * Cleaning up tests * Cleaning up tests * Adding mock response for completion model OpenAIService integration tests --------- Co-authored-by: Elastic Machine --- .../inference/InferenceService.java | 11 ++ .../inference/ServiceSettings.java | 9 + .../qa/mixed/OpenAIServiceMixedIT.java | 2 + .../application/OpenAiServiceUpgradeIT.java | 2 + ...mazonBedrockEmbeddingsServiceSettings.java | 3 +- ...zureAiStudioEmbeddingsServiceSettings.java | 3 +- .../services/openai/OpenAiService.java | 61 +++---- .../OpenAiEmbeddingsServiceSettings.java | 1 + .../services/validation/ModelValidator.java | 16 ++ .../validation/ModelValidatorBuilder.java | 29 ++++ .../ServiceIntegrationValidator.java | 17 ++ .../validation/SimpleModelValidator.java | 30 ++++ .../SimpleServiceIntegrationValidator.java | 47 ++++++ .../TextEmbeddingModelValidator.java | 79 +++++++++ .../elastic/ElasticInferenceServiceTests.java | 9 +- .../services/openai/OpenAiServiceTests.java | 47 ++++++ .../ModelValidatorBuilderTests.java | 42 +++++ .../validation/SimpleModelValidatorTests.java | 82 +++++++++ ...impleServiceIntegrationValidatorTests.java | 128 ++++++++++++++ .../TextEmbeddingModelValidatorTests.java | 156 ++++++++++++++++++ 20 files changed, 733 insertions(+), 41 deletions(-) create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ModelValidator.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ModelValidatorBuilder.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ServiceIntegrationValidator.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/SimpleModelValidator.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/SimpleServiceIntegrationValidator.java create mode 100644 x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/TextEmbeddingModelValidator.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/ModelValidatorBuilderTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/SimpleModelValidatorTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/SimpleServiceIntegrationValidatorTests.java create mode 100644 x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/TextEmbeddingModelValidatorTests.java diff --git a/server/src/main/java/org/elasticsearch/inference/InferenceService.java b/server/src/main/java/org/elasticsearch/inference/InferenceService.java index 9e9a4cf890379..f677f75dfb5ae 100644 --- a/server/src/main/java/org/elasticsearch/inference/InferenceService.java +++ b/server/src/main/java/org/elasticsearch/inference/InferenceService.java @@ -175,6 +175,17 @@ default void checkModelConfig(Model model, ActionListener listener) { listener.onResponse(model); }; + /** + * Update a text embedding model's dimensions based on a provided embedding + * size and set the default similarity if required. The default behaviour is to just return the model. + * @param model The original model without updated embedding details + * @param embeddingSize The embedding size to update the model with + * @return The model with updated embedding details + */ + default Model updateModelWithEmbeddingDetails(Model model, int embeddingSize) { + return model; + } + /** * Return true if this model is hosted in the local Elasticsearch cluster * @return True if in cluster diff --git a/server/src/main/java/org/elasticsearch/inference/ServiceSettings.java b/server/src/main/java/org/elasticsearch/inference/ServiceSettings.java index 1707e04324587..fbc70b2358f91 100644 --- a/server/src/main/java/org/elasticsearch/inference/ServiceSettings.java +++ b/server/src/main/java/org/elasticsearch/inference/ServiceSettings.java @@ -34,6 +34,15 @@ default Integer dimensions() { return null; } + /** + * Boolean signifying whether the dimensions were set by the user + * + * @return boolean signifying whether the dimensions were set by the user + */ + default Boolean dimensionsSetByUser() { + return null; + } + /** * The data type for the embeddings this service works with. Defaults to null, * Text Embedding models should return a non-null value diff --git a/x-pack/plugin/inference/qa/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/inference/qa/mixed/OpenAIServiceMixedIT.java b/x-pack/plugin/inference/qa/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/inference/qa/mixed/OpenAIServiceMixedIT.java index 33cad6a179281..38c0768196142 100644 --- a/x-pack/plugin/inference/qa/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/inference/qa/mixed/OpenAIServiceMixedIT.java +++ b/x-pack/plugin/inference/qa/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/inference/qa/mixed/OpenAIServiceMixedIT.java @@ -95,6 +95,8 @@ public void testOpenAiCompletions() throws IOException { final String inferenceId = "mixed-cluster-completions"; final String upgradedClusterId = "upgraded-cluster-completions"; + // queue a response as PUT will call the service + openAiChatCompletionsServer.enqueue(new MockResponse().setResponseCode(200).setBody(chatCompletionsResponse())); put(inferenceId, chatCompletionsConfig(getUrl(openAiChatCompletionsServer)), TaskType.COMPLETION); var configsMap = get(TaskType.COMPLETION, inferenceId); diff --git a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/OpenAiServiceUpgradeIT.java b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/OpenAiServiceUpgradeIT.java index df995c6f5e620..a5c8910251467 100644 --- a/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/OpenAiServiceUpgradeIT.java +++ b/x-pack/plugin/inference/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/xpack/application/OpenAiServiceUpgradeIT.java @@ -128,6 +128,7 @@ public void testOpenAiCompletions() throws IOException { var testTaskType = TaskType.COMPLETION; if (isOldCluster()) { + openAiChatCompletionsServer.enqueue(new MockResponse().setResponseCode(200).setBody(chatCompletionsResponse())); put(oldClusterId, chatCompletionsConfig(getUrl(openAiChatCompletionsServer)), testTaskType); var configs = (List>) get(testTaskType, oldClusterId).get(old_cluster_endpoint_identifier); @@ -157,6 +158,7 @@ public void testOpenAiCompletions() throws IOException { assertCompletionInference(oldClusterId); + openAiChatCompletionsServer.enqueue(new MockResponse().setResponseCode(200).setBody(chatCompletionsResponse())); put(upgradedClusterId, chatCompletionsConfig(getUrl(openAiChatCompletionsServer)), testTaskType); configs = (List>) get(testTaskType, upgradedClusterId).get("endpoints"); assertThat(configs, hasSize(1)); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsServiceSettings.java index 4bf037558c618..55b9972168278 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsServiceSettings.java @@ -183,7 +183,8 @@ public Integer dimensions() { return dimensions; } - public boolean dimensionsSetByUser() { + @Override + public Boolean dimensionsSetByUser() { return this.dimensionsSetByUser; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/embeddings/AzureAiStudioEmbeddingsServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/embeddings/AzureAiStudioEmbeddingsServiceSettings.java index d4a1fd938625e..8977ba8b12836 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/embeddings/AzureAiStudioEmbeddingsServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/embeddings/AzureAiStudioEmbeddingsServiceSettings.java @@ -146,7 +146,8 @@ public SimilarityMeasure similarity() { return similarity; } - public boolean dimensionsSetByUser() { + @Override + public Boolean dimensionsSetByUser() { return this.dimensionsSetByUser; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java index d2264ce5cd881..7d2a4adbb27b2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java @@ -11,8 +11,8 @@ import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.common.Strings; import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Strings; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; import org.elasticsearch.inference.ChunkingOptions; @@ -31,10 +31,10 @@ import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; import org.elasticsearch.xpack.inference.services.ServiceComponents; -import org.elasticsearch.xpack.inference.services.ServiceUtils; import org.elasticsearch.xpack.inference.services.openai.completion.OpenAiChatCompletionModel; import org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsModel; import org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsServiceSettings; +import org.elasticsearch.xpack.inference.services.validation.ModelValidatorBuilder; import java.util.List; import java.util.Map; @@ -255,48 +255,35 @@ protected void doChunkedInfer( */ @Override public void checkModelConfig(Model model, ActionListener listener) { + // TODO: Remove this function once all services have been updated to use the new model validators + ModelValidatorBuilder.buildModelValidator(model.getTaskType()).validate(this, model, listener); + } + + @Override + public Model updateModelWithEmbeddingDetails(Model model, int embeddingSize) { if (model instanceof OpenAiEmbeddingsModel embeddingsModel) { - ServiceUtils.getEmbeddingSize( - model, - this, - listener.delegateFailureAndWrap((l, size) -> l.onResponse(updateModelWithEmbeddingDetails(embeddingsModel, size))) + var serviceSettings = embeddingsModel.getServiceSettings(); + var similarityFromModel = serviceSettings.similarity(); + var similarityToUse = similarityFromModel == null ? SimilarityMeasure.DOT_PRODUCT : similarityFromModel; + + var updatedServiceSettings = new OpenAiEmbeddingsServiceSettings( + serviceSettings.modelId(), + serviceSettings.uri(), + serviceSettings.organizationId(), + similarityToUse, + embeddingSize, + serviceSettings.maxInputTokens(), + serviceSettings.dimensionsSetByUser(), + serviceSettings.rateLimitSettings() ); - } else { - listener.onResponse(model); - } - } - private OpenAiEmbeddingsModel updateModelWithEmbeddingDetails(OpenAiEmbeddingsModel model, int embeddingSize) { - if (model.getServiceSettings().dimensionsSetByUser() - && model.getServiceSettings().dimensions() != null - && model.getServiceSettings().dimensions() != embeddingSize) { + return new OpenAiEmbeddingsModel(embeddingsModel, updatedServiceSettings); + } else { throw new ElasticsearchStatusException( - Strings.format( - "The retrieved embeddings size [%s] does not match the size specified in the settings [%s]. " - + "Please recreate the [%s] configuration with the correct dimensions", - embeddingSize, - model.getServiceSettings().dimensions(), - model.getConfigurations().getInferenceEntityId() - ), + Strings.format("Can't update embedding details for model with unexpected type %s", model.getClass()), RestStatus.BAD_REQUEST ); } - - var similarityFromModel = model.getServiceSettings().similarity(); - var similarityToUse = similarityFromModel == null ? SimilarityMeasure.DOT_PRODUCT : similarityFromModel; - - OpenAiEmbeddingsServiceSettings serviceSettings = new OpenAiEmbeddingsServiceSettings( - model.getServiceSettings().modelId(), - model.getServiceSettings().uri(), - model.getServiceSettings().organizationId(), - similarityToUse, - embeddingSize, - model.getServiceSettings().maxInputTokens(), - model.getServiceSettings().dimensionsSetByUser(), - model.getServiceSettings().rateLimitSettings() - ); - - return new OpenAiEmbeddingsModel(model, serviceSettings); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/embeddings/OpenAiEmbeddingsServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/embeddings/OpenAiEmbeddingsServiceSettings.java index 6ef1f6f0feefe..940f78bcd2ca1 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/embeddings/OpenAiEmbeddingsServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/embeddings/OpenAiEmbeddingsServiceSettings.java @@ -247,6 +247,7 @@ public Integer dimensions() { return dimensions; } + @Override public Boolean dimensionsSetByUser() { return dimensionsSetByUser; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ModelValidator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ModelValidator.java new file mode 100644 index 0000000000000..c435939a17568 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ModelValidator.java @@ -0,0 +1,16 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.validation; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceService; +import org.elasticsearch.inference.Model; + +public interface ModelValidator { + void validate(InferenceService service, Model model, ActionListener listener); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ModelValidatorBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ModelValidatorBuilder.java new file mode 100644 index 0000000000000..0464e790ba79a --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ModelValidatorBuilder.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.validation; + +import org.elasticsearch.core.Strings; +import org.elasticsearch.inference.TaskType; + +public class ModelValidatorBuilder { + public static ModelValidator buildModelValidator(TaskType taskType) { + if (taskType == null) { + throw new IllegalArgumentException("Task type can't be null"); + } + + switch (taskType) { + case TEXT_EMBEDDING -> { + return new TextEmbeddingModelValidator(new SimpleServiceIntegrationValidator()); + } + case SPARSE_EMBEDDING, RERANK, COMPLETION, ANY -> { + return new SimpleModelValidator(new SimpleServiceIntegrationValidator()); + } + default -> throw new IllegalArgumentException(Strings.format("Can't validate inference model of for task type %s ", taskType)); + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ServiceIntegrationValidator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ServiceIntegrationValidator.java new file mode 100644 index 0000000000000..09fb43f584cf0 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/ServiceIntegrationValidator.java @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.validation; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceService; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.inference.Model; + +public interface ServiceIntegrationValidator { + void validate(InferenceService service, Model model, ActionListener listener); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/SimpleModelValidator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/SimpleModelValidator.java new file mode 100644 index 0000000000000..f44cf61079369 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/SimpleModelValidator.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.validation; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceService; +import org.elasticsearch.inference.Model; + +public class SimpleModelValidator implements ModelValidator { + + private final ServiceIntegrationValidator serviceIntegrationValidator; + + public SimpleModelValidator(ServiceIntegrationValidator serviceIntegrationValidator) { + this.serviceIntegrationValidator = serviceIntegrationValidator; + } + + @Override + public void validate(InferenceService service, Model model, ActionListener listener) { + serviceIntegrationValidator.validate( + service, + model, + listener.delegateFailureAndWrap((delegate, r) -> { delegate.onResponse(model); }) + ); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/SimpleServiceIntegrationValidator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/SimpleServiceIntegrationValidator.java new file mode 100644 index 0000000000000..9fc5748746085 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/SimpleServiceIntegrationValidator.java @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.validation; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceService; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.inference.InputType; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; + +import java.util.List; +import java.util.Map; + +public class SimpleServiceIntegrationValidator implements ServiceIntegrationValidator { + private static final List TEST_INPUT = List.of("how big"); + private static final String QUERY = "test query"; + + @Override + public void validate(InferenceService service, Model model, ActionListener listener) { + service.infer( + model, + model.getTaskType().equals(TaskType.RERANK) ? QUERY : null, + TEST_INPUT, + Map.of(), + InputType.INGEST, + InferenceAction.Request.DEFAULT_TIMEOUT, + listener.delegateFailureAndWrap((delegate, r) -> { + if (r != null) { + delegate.onResponse(r); + } else { + delegate.onFailure( + new ElasticsearchStatusException("Could not make a validation call to the selected service", RestStatus.BAD_REQUEST) + ); + } + }) + ); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/TextEmbeddingModelValidator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/TextEmbeddingModelValidator.java new file mode 100644 index 0000000000000..1fe5c684196fe --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/validation/TextEmbeddingModelValidator.java @@ -0,0 +1,79 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.validation; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.common.Strings; +import org.elasticsearch.inference.InferenceService; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.inference.Model; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; +import org.elasticsearch.xpack.core.inference.results.TextEmbedding; + +public class TextEmbeddingModelValidator implements ModelValidator { + + private final ServiceIntegrationValidator serviceIntegrationValidator; + + public TextEmbeddingModelValidator(ServiceIntegrationValidator serviceIntegrationValidator) { + this.serviceIntegrationValidator = serviceIntegrationValidator; + } + + @Override + public void validate(InferenceService service, Model model, ActionListener listener) { + serviceIntegrationValidator.validate(service, model, listener.delegateFailureAndWrap((delegate, r) -> { + delegate.onResponse(postValidate(service, model, r)); + })); + } + + private Model postValidate(InferenceService service, Model model, InferenceServiceResults results) { + if (results instanceof TextEmbedding embeddingResults) { + var serviceSettings = model.getServiceSettings(); + var dimensions = serviceSettings.dimensions(); + int embeddingSize = getEmbeddingSize(embeddingResults); + + if (Boolean.TRUE.equals(serviceSettings.dimensionsSetByUser()) + && dimensions != null + && (dimensions.equals(embeddingSize) == false)) { + throw new ElasticsearchStatusException( + Strings.format( + "The retrieved embeddings size [%s] does not match the size specified in the settings [%s]. " + + "Please recreate the [%s] configuration with the correct dimensions", + embeddingResults.getFirstEmbeddingSize(), + serviceSettings.dimensions(), + model.getInferenceEntityId() + ), + RestStatus.BAD_REQUEST + ); + } + + return service.updateModelWithEmbeddingDetails(model, embeddingSize); + } else { + throw new ElasticsearchStatusException( + "Validation call did not return expected results type." + + "Expected a result of type [" + + InferenceTextEmbeddingFloatResults.NAME + + "] got [" + + (results == null ? "null" : results.getWriteableName()) + + "]", + RestStatus.BAD_REQUEST + ); + } + } + + private int getEmbeddingSize(TextEmbedding embeddingResults) { + int embeddingSize; + try { + embeddingSize = embeddingResults.getFirstEmbeddingSize(); + } catch (Exception e) { + throw new ElasticsearchStatusException("Could not determine embedding size", RestStatus.BAD_REQUEST, e); + } + return embeddingSize; + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java index 62416f05800c6..f3bf7413d2553 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java @@ -37,7 +37,6 @@ import org.elasticsearch.xpack.inference.results.SparseEmbeddingResultsTests; import org.elasticsearch.xpack.inference.services.ServiceFields; import org.elasticsearch.xpack.inference.services.elser.ElserModels; -import org.elasticsearch.xpack.inference.services.openai.OpenAiService; import org.hamcrest.MatcherAssert; import org.hamcrest.Matchers; import org.junit.After; @@ -311,7 +310,13 @@ public void testParsePersistedConfigWithSecrets_DoesNotThrowWhenAnExtraKeyExists public void testCheckModelConfig_ReturnsNewModelReference() throws IOException { var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); - try (var service = new OpenAiService(senderFactory, createWithEmptySettings(threadPool))) { + try ( + var service = new ElasticInferenceService( + senderFactory, + createWithEmptySettings(threadPool), + new ElasticInferenceServiceComponents(getUrl(webServer)) + ) + ) { var model = ElasticInferenceServiceSparseEmbeddingsModelTests.createModel(getUrl(webServer)); PlainActionFuture listener = new PlainActionFuture<>(); service.checkModelConfig(model, listener); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java index 9ff175ca9685e..dbc365f3d6919 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java @@ -63,6 +63,7 @@ import static org.elasticsearch.xpack.inference.external.request.openai.OpenAiUtils.ORGANIZATION_HEADER; import static org.elasticsearch.xpack.inference.results.TextEmbeddingResultsTests.buildExpectationFloat; import static org.elasticsearch.xpack.inference.services.ServiceComponentsTests.createWithEmptySettings; +import static org.elasticsearch.xpack.inference.services.openai.completion.OpenAiChatCompletionModelTests.createChatCompletionModel; import static org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsServiceSettingsTests.getServiceSettingsMap; import static org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsTaskSettingsTests.getTaskSettingsMap; import static org.elasticsearch.xpack.inference.services.settings.DefaultSecretSettingsTests.getSecretSettingsMap; @@ -1151,6 +1152,52 @@ public void testCheckModelConfig_ReturnsNewModelReference_DoesNotOverrideSimilar } } + public void testUpdateModelWithEmbeddingDetails_InvalidModelProvided() throws IOException { + try (var service = createOpenAiService()) { + var model = createChatCompletionModel( + randomAlphaOfLength(10), + randomAlphaOfLength(10), + randomAlphaOfLength(10), + randomAlphaOfLength(10), + randomAlphaOfLength(10) + ); + assertThrows( + ElasticsearchStatusException.class, + () -> { service.updateModelWithEmbeddingDetails(model, randomNonNegativeInt()); } + ); + } + } + + public void testUpdateModelWithEmbeddingDetails_NullSimilarityInOriginalModel() throws IOException { + testUpdateModelWithEmbeddingDetails_Successful(null); + } + + public void testUpdateModelWithEmbeddingDetails_NonNullSimilarityInOriginalModel() throws IOException { + testUpdateModelWithEmbeddingDetails_Successful(randomFrom(SimilarityMeasure.values())); + } + + private void testUpdateModelWithEmbeddingDetails_Successful(SimilarityMeasure similarityMeasure) throws IOException { + try (var service = createOpenAiService()) { + var embeddingSize = randomNonNegativeInt(); + var model = OpenAiEmbeddingsModelTests.createModel( + randomAlphaOfLength(10), + randomAlphaOfLength(10), + randomAlphaOfLength(10), + randomAlphaOfLength(10), + randomAlphaOfLength(10), + null, + randomNonNegativeInt(), + randomNonNegativeInt(), + randomBoolean() + ); + + Model updatedModel = service.updateModelWithEmbeddingDetails(model, embeddingSize); + + assertEquals(SimilarityMeasure.DOT_PRODUCT, updatedModel.getServiceSettings().similarity()); + assertEquals(embeddingSize, updatedModel.getServiceSettings().dimensions().intValue()); + } + } + public void testInfer_UnauthorisedResponse() throws IOException { var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/ModelValidatorBuilderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/ModelValidatorBuilderTests.java new file mode 100644 index 0000000000000..c534fea8aeb3e --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/ModelValidatorBuilderTests.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.validation; + +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.test.ESTestCase; + +import java.util.Map; + +import static org.hamcrest.Matchers.isA; + +public class ModelValidatorBuilderTests extends ESTestCase { + public void testBuildModelValidator_NullTaskType() { + assertThrows(IllegalArgumentException.class, () -> { ModelValidatorBuilder.buildModelValidator(null); }); + } + + public void testBuildModelValidator_ValidTaskType() { + taskTypeToModelValidatorClassMap().forEach((taskType, modelValidatorClass) -> { + assertThat(ModelValidatorBuilder.buildModelValidator(taskType), isA(modelValidatorClass)); + }); + } + + private Map> taskTypeToModelValidatorClassMap() { + return Map.of( + TaskType.TEXT_EMBEDDING, + TextEmbeddingModelValidator.class, + TaskType.SPARSE_EMBEDDING, + SimpleModelValidator.class, + TaskType.RERANK, + SimpleModelValidator.class, + TaskType.COMPLETION, + SimpleModelValidator.class, + TaskType.ANY, + SimpleModelValidator.class + ); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/SimpleModelValidatorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/SimpleModelValidatorTests.java new file mode 100644 index 0000000000000..b14a1f8f3cc77 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/SimpleModelValidatorTests.java @@ -0,0 +1,82 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.validation; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceService; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.inference.Model; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.inference.results.SparseEmbeddingResultsTests; +import org.junit.Before; +import org.mockito.Mock; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; +import static org.mockito.MockitoAnnotations.openMocks; + +public class SimpleModelValidatorTests extends ESTestCase { + @Mock + private ServiceIntegrationValidator mockServiceIntegrationValidator; + @Mock + private InferenceService mockInferenceService; + @Mock + private Model mockModel; + @Mock + private ActionListener mockActionListener; + + private SimpleModelValidator underTest; + + @Before + public void setup() { + openMocks(this); + + underTest = new SimpleModelValidator(mockServiceIntegrationValidator); + + when(mockActionListener.delegateFailureAndWrap(any())).thenCallRealMethod(); + } + + public void testValidate_ServiceIntegrationValidatorThrowsException() { + doThrow(ElasticsearchStatusException.class).when(mockServiceIntegrationValidator) + .validate(eq(mockInferenceService), eq(mockModel), any()); + + assertThrows( + ElasticsearchStatusException.class, + () -> { underTest.validate(mockInferenceService, mockModel, mockActionListener); } + ); + verifyInteractions(); + } + + public void testValidate_ServiceReturnsInferenceServiceResults() { + mockCallToServiceIntegrationValidator(SparseEmbeddingResultsTests.createRandomResults()); + verify(mockActionListener).onResponse(mockModel); + verifyInteractions(); + } + + private void mockCallToServiceIntegrationValidator(InferenceServiceResults results) { + doAnswer(ans -> { + ActionListener responseListener = ans.getArgument(2); + responseListener.onResponse(results); + return null; + }).when(mockServiceIntegrationValidator).validate(eq(mockInferenceService), eq(mockModel), any()); + + underTest.validate(mockInferenceService, mockModel, mockActionListener); + } + + private void verifyInteractions() { + verify(mockServiceIntegrationValidator).validate(eq(mockInferenceService), eq(mockModel), any()); + verify(mockActionListener).delegateFailureAndWrap(any()); + verifyNoMoreInteractions(mockServiceIntegrationValidator, mockInferenceService, mockModel, mockActionListener); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/SimpleServiceIntegrationValidatorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/SimpleServiceIntegrationValidatorTests.java new file mode 100644 index 0000000000000..23000ce431e7b --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/SimpleServiceIntegrationValidatorTests.java @@ -0,0 +1,128 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.validation; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceService; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.inference.InputType; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.junit.Before; +import org.mockito.Mock; + +import java.util.List; +import java.util.Map; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; +import static org.mockito.MockitoAnnotations.openMocks; + +public class SimpleServiceIntegrationValidatorTests extends ESTestCase { + + private static final List TEST_INPUT = List.of("how big"); + private static final String TEST_QUERY = "test query"; + + @Mock + private InferenceService mockInferenceService; + @Mock + private Model mockModel; + @Mock + private ActionListener mockActionListener; + @Mock + private InferenceServiceResults mockInferenceServiceResults; + + private SimpleServiceIntegrationValidator underTest; + + @Before + public void setup() { + openMocks(this); + + underTest = new SimpleServiceIntegrationValidator(); + + when(mockActionListener.delegateFailureAndWrap(any())).thenCallRealMethod(); + } + + public void testValidate_ServiceThrowsException() { + when(mockModel.getTaskType()).thenReturn(TaskType.TEXT_EMBEDDING); + + doThrow(ElasticsearchStatusException.class).when(mockInferenceService) + .infer( + eq(mockModel), + eq(null), + eq(TEST_INPUT), + eq(Map.of()), + eq(InputType.INGEST), + eq(InferenceAction.Request.DEFAULT_TIMEOUT), + any() + ); + + assertThrows(ElasticsearchStatusException.class, () -> { + underTest.validate(mockInferenceService, mockModel, mockActionListener);}); + + verifyCallToService(false); + } + + public void testValidate_SuccessfulCallToServiceForNonReRankTaskType() { + when(mockModel.getTaskType()).thenReturn(randomValueOtherThan(TaskType.RERANK, () -> randomFrom(TaskType.values()))); + + mockSuccessfulCallToService(null, mockInferenceServiceResults); + verify(mockActionListener).onResponse(mockInferenceServiceResults); + verifyCallToService(false); + } + + public void testValidate_SuccessfulCallToServiceForReRankTaskType() { + when(mockModel.getTaskType()).thenReturn(TaskType.RERANK); + + mockSuccessfulCallToService(TEST_QUERY, mockInferenceServiceResults); + verify(mockActionListener).onResponse(mockInferenceServiceResults); + verifyCallToService(true); + } + + private void mockSuccessfulCallToService(String query, InferenceServiceResults result) { + doAnswer(ans -> { + ActionListener responseListener = ans.getArgument(6); + responseListener.onResponse(result); + return null; + }).when(mockInferenceService) + .infer( + eq(mockModel), + eq(query), + eq(TEST_INPUT), + eq(Map.of()), + eq(InputType.INGEST), + eq(InferenceAction.Request.DEFAULT_TIMEOUT), + any() + ); + + underTest.validate(mockInferenceService, mockModel, mockActionListener); + } + + private void verifyCallToService(boolean withQuery) { + verify(mockModel).getTaskType(); + verify(mockInferenceService).infer( + eq(mockModel), + eq(withQuery ? TEST_QUERY : null), + eq(TEST_INPUT), + eq(Map.of()), + eq(InputType.INGEST), + eq(InferenceAction.Request.DEFAULT_TIMEOUT), + any() + ); + verify(mockActionListener).delegateFailureAndWrap(any()); + verifyNoMoreInteractions(mockInferenceService, mockModel, mockActionListener, mockInferenceServiceResults); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/TextEmbeddingModelValidatorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/TextEmbeddingModelValidatorTests.java new file mode 100644 index 0000000000000..d608b42841305 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/validation/TextEmbeddingModelValidatorTests.java @@ -0,0 +1,156 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.validation; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceService; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.ServiceSettings; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingByteResults; +import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; +import org.elasticsearch.xpack.inference.EmptyTaskSettingsTests; +import org.elasticsearch.xpack.inference.ModelConfigurationsTests; +import org.elasticsearch.xpack.inference.results.InferenceTextEmbeddingByteResultsTests; +import org.elasticsearch.xpack.inference.results.SparseEmbeddingResultsTests; +import org.junit.Before; +import org.mockito.Mock; + +import java.util.List; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; +import static org.mockito.MockitoAnnotations.openMocks; + +public class TextEmbeddingModelValidatorTests extends ESTestCase { + @Mock + private ServiceIntegrationValidator mockServiceIntegrationValidator; + @Mock + private InferenceService mockInferenceService; + @Mock + private Model mockModel; + @Mock + private ActionListener mockActionListener; + @Mock + private ServiceSettings mockServiceSettings; + + private TextEmbeddingModelValidator underTest; + + @Before + public void setup() { + openMocks(this); + + underTest = new TextEmbeddingModelValidator(mockServiceIntegrationValidator); + + when(mockInferenceService.updateModelWithEmbeddingDetails(eq(mockModel), anyInt())).thenReturn(mockModel); + when(mockActionListener.delegateFailureAndWrap(any())).thenCallRealMethod(); + when(mockModel.getServiceSettings()).thenReturn(mockServiceSettings); + when(mockModel.getInferenceEntityId()).thenReturn(randomAlphaOfLength(10)); + } + + public void testValidate_ServiceIntegrationValidatorThrowsException() { + doThrow(ElasticsearchStatusException.class).when(mockServiceIntegrationValidator) + .validate(eq(mockInferenceService), eq(mockModel), any()); + + assertThrows( + ElasticsearchStatusException.class, + () -> { underTest.validate(mockInferenceService, mockModel, mockActionListener); } + ); + + verify(mockServiceIntegrationValidator).validate(eq(mockInferenceService), eq(mockModel), any()); + verify(mockActionListener).delegateFailureAndWrap(any()); + verifyNoMoreInteractions(mockServiceIntegrationValidator, mockInferenceService, mockModel, mockActionListener, mockServiceSettings); + } + + public void testValidate_ServiceReturnsNullResults() { + mockCallToServiceIntegrationValidator(null); + verify(mockActionListener).onFailure(any(ElasticsearchStatusException.class)); + verifyNoMoreInteractions(mockServiceIntegrationValidator, mockInferenceService, mockModel, mockActionListener, mockServiceSettings); + } + + public void testValidate_ServiceReturnsNonTextEmbeddingResults() { + mockCallToServiceIntegrationValidator(SparseEmbeddingResultsTests.createRandomResults()); + verify(mockActionListener).onFailure(any(ElasticsearchStatusException.class)); + verifyNoMoreInteractions(mockServiceIntegrationValidator, mockInferenceService, mockModel, mockActionListener, mockServiceSettings); + } + + public void testValidate_RetrievingEmbeddingSizeThrowsIllegalStateException() { + InferenceTextEmbeddingFloatResults results = new InferenceTextEmbeddingFloatResults(List.of()); + + when(mockServiceSettings.dimensionsSetByUser()).thenReturn(true); + when(mockServiceSettings.dimensions()).thenReturn(randomNonNegativeInt()); + + mockCallToServiceIntegrationValidator(results); + verify(mockActionListener).onFailure(any(ElasticsearchStatusException.class)); + verify(mockModel, times(1)).getServiceSettings(); + verify(mockServiceSettings).dimensions(); + verifyNoMoreInteractions(mockServiceIntegrationValidator, mockInferenceService, mockModel, mockActionListener, mockServiceSettings); + } + + public void testValidate_DimensionsSetByUserDoNotEqualEmbeddingSize() { + InferenceTextEmbeddingByteResults results = InferenceTextEmbeddingByteResultsTests.createRandomResults(); + var dimensions = randomValueOtherThan(results.getFirstEmbeddingSize(), ESTestCase::randomNonNegativeInt); + + when(mockServiceSettings.dimensionsSetByUser()).thenReturn(true); + when(mockServiceSettings.dimensions()).thenReturn(dimensions); + + mockCallToServiceIntegrationValidator(results); + verify(mockActionListener).onFailure(any(ElasticsearchStatusException.class)); + verify(mockModel).getServiceSettings(); + verify(mockModel).getInferenceEntityId(); + verify(mockServiceSettings).dimensionsSetByUser(); + verify(mockServiceSettings, times(2)).dimensions(); + verifyNoMoreInteractions(mockServiceIntegrationValidator, mockInferenceService, mockModel, mockActionListener, mockServiceSettings); + } + + public void testValidate_DimensionsSetByUserEqualEmbeddingSize() { + mockSuccessfulValidation(true); + } + + public void testValidate_DimensionsNotSetByUser() { + mockSuccessfulValidation(false); + } + + private void mockSuccessfulValidation(Boolean dimensionsSetByUser) { + InferenceTextEmbeddingByteResults results = InferenceTextEmbeddingByteResultsTests.createRandomResults(); + when(mockModel.getConfigurations()).thenReturn(ModelConfigurationsTests.createRandomInstance()); + when(mockModel.getTaskSettings()).thenReturn(EmptyTaskSettingsTests.createRandom()); + when(mockServiceSettings.dimensionsSetByUser()).thenReturn(dimensionsSetByUser); + when(mockServiceSettings.dimensions()).thenReturn(dimensionsSetByUser ? results.getFirstEmbeddingSize() : null); + + mockCallToServiceIntegrationValidator(results); + verify(mockActionListener).onResponse(mockModel); + verify(mockModel).getServiceSettings(); + verify(mockServiceSettings).dimensionsSetByUser(); + verify(mockServiceSettings).dimensions(); + verify(mockInferenceService).updateModelWithEmbeddingDetails(mockModel, results.getFirstEmbeddingSize()); + verifyNoMoreInteractions(mockServiceIntegrationValidator, mockInferenceService, mockModel, mockActionListener, mockServiceSettings); + } + + private void mockCallToServiceIntegrationValidator(InferenceServiceResults results) { + doAnswer(ans -> { + ActionListener responseListener = ans.getArgument(2); + responseListener.onResponse(results); + return null; + }).when(mockServiceIntegrationValidator).validate(eq(mockInferenceService), eq(mockModel), any()); + + underTest.validate(mockInferenceService, mockModel, mockActionListener); + + verify(mockServiceIntegrationValidator).validate(eq(mockInferenceService), eq(mockModel), any()); + verify(mockActionListener).delegateFailureAndWrap(any()); + } +} From 6ca4fb21c0948301f92cba2ab32dd8b824a8cc01 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 25 Sep 2024 19:42:56 +0100 Subject: [PATCH 51/58] Replace `DeleteIndexClusterStateUpdateRequest` with private class (#113509) No need to extend `IndicesClusterStateUpdateRequest` here, nor to attach the listener to the request. Instead we can encapsulate the cluster state update task directly instead. Backport of #113288 to 8.x --- .../snapshots/ConcurrentSnapshotsIT.java | 21 ++--- .../DeleteIndexClusterStateUpdateRequest.java | 56 ------------- .../delete/TransportDeleteIndexAction.java | 15 ++-- .../metadata/MetadataDeleteIndexService.java | 81 +++++++++++++++---- .../MetadataDeleteIndexServiceTests.java | 7 +- 5 files changed, 92 insertions(+), 88 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/action/admin/indices/delete/DeleteIndexClusterStateUpdateRequest.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java index 61a1168b128b9..de62c0152817a 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java @@ -18,7 +18,6 @@ import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus; import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse; -import org.elasticsearch.action.admin.indices.delete.DeleteIndexClusterStateUpdateRequest; import org.elasticsearch.action.support.ActionTestUtils; import org.elasticsearch.action.support.GroupedActionListener; import org.elasticsearch.action.support.PlainActionFuture; @@ -33,9 +32,7 @@ import org.elasticsearch.common.util.concurrent.ListenableFuture; import org.elasticsearch.common.util.concurrent.UncategorizedExecutionException; import org.elasticsearch.core.PathUtils; -import org.elasticsearch.core.TimeValue; import org.elasticsearch.discovery.AbstractDisruptionTestCase; -import org.elasticsearch.index.Index; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.RepositoryConflictException; @@ -59,6 +56,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -2214,12 +2212,17 @@ public void testDeleteIndexWithOutOfOrderFinalization() { .anyMatch(e -> e.snapshot().getSnapshotId().getName().equals("snapshot-with-index-1") && e.state().completed()) ) // execute the index deletion _directly on the master_ so it happens before the snapshot finalization executes - .andThen(l -> masterDeleteIndexService.deleteIndices(new DeleteIndexClusterStateUpdateRequest(l.map(r -> { - assertTrue(r.isAcknowledged()); - return null; - })).indices(new Index[] { internalCluster().clusterService().state().metadata().index(indexToDelete).getIndex() }) - .ackTimeout(TimeValue.timeValueSeconds(10)) - .masterNodeTimeout(TimeValue.timeValueSeconds(10)))) + .andThen( + l -> masterDeleteIndexService.deleteIndices( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + Set.of(internalCluster().clusterService().state().metadata().index(indexToDelete).getIndex()), + l.map(r -> { + assertTrue(r.isAcknowledged()); + return null; + }) + ) + ) // ultimately create the index again so that taking a full snapshot will pick up any missing shard gen blob, and deleting that // full snapshot will clean up all dangling shard-level blobs .andThen((l, ignored) -> prepareCreate(indexToDelete, indexSettingsNoReplicas(1)).execute(l.map(r -> { diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/delete/DeleteIndexClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/delete/DeleteIndexClusterStateUpdateRequest.java deleted file mode 100644 index cc6971a0f584b..0000000000000 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/delete/DeleteIndexClusterStateUpdateRequest.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ -package org.elasticsearch.action.admin.indices.delete; - -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.support.master.AcknowledgedResponse; -import org.elasticsearch.cluster.ClusterStateAckListener; -import org.elasticsearch.cluster.ClusterStateTaskListener; -import org.elasticsearch.cluster.ack.IndicesClusterStateUpdateRequest; -import org.elasticsearch.cluster.node.DiscoveryNode; - -/** - * Cluster state update request that allows to close one or more indices - */ -public class DeleteIndexClusterStateUpdateRequest extends IndicesClusterStateUpdateRequest - implements - ClusterStateAckListener, - ClusterStateTaskListener { - - private final ActionListener listener; - - public DeleteIndexClusterStateUpdateRequest(ActionListener listener) { - this.listener = listener; - } - - @Override - public void onFailure(Exception e) { - listener.onFailure(e); - } - - @Override - public boolean mustAck(DiscoveryNode discoveryNode) { - return true; - } - - @Override - public void onAllNodesAcked() { - listener.onResponse(AcknowledgedResponse.TRUE); - } - - @Override - public void onAckFailure(Exception e) { - listener.onResponse(AcknowledgedResponse.FALSE); - } - - @Override - public void onAckTimeout() { - listener.onResponse(AcknowledgedResponse.FALSE); - } -} diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/delete/TransportDeleteIndexAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/delete/TransportDeleteIndexAction.java index 24c152c1a1947..fbf95699dd03a 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/delete/TransportDeleteIndexAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/delete/TransportDeleteIndexAction.java @@ -92,11 +92,14 @@ protected void masterOperation( return; } - DeleteIndexClusterStateUpdateRequest deleteRequest = new DeleteIndexClusterStateUpdateRequest(listener.delegateResponse((l, e) -> { - logger.debug(() -> "failed to delete indices [" + concreteIndices + "]", e); - listener.onFailure(e); - })).ackTimeout(request.ackTimeout()).masterNodeTimeout(request.masterNodeTimeout()).indices(concreteIndices.toArray(new Index[0])); - - deleteIndexService.deleteIndices(deleteRequest); + deleteIndexService.deleteIndices( + request.masterNodeTimeout(), + request.ackTimeout(), + concreteIndices, + listener.delegateResponse((l, e) -> { + logger.debug(() -> "failed to delete indices [" + concreteIndices + "]", e); + listener.onFailure(e); + }) + ); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataDeleteIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataDeleteIndexService.java index ced5d4e490478..5d1a037d6bc3e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataDeleteIndexService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataDeleteIndexService.java @@ -11,13 +11,16 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.elasticsearch.action.admin.indices.delete.DeleteIndexClusterStateUpdateRequest; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateAckListener; import org.elasticsearch.cluster.ClusterStateTaskExecutor; +import org.elasticsearch.cluster.ClusterStateTaskListener; import org.elasticsearch.cluster.RestoreInProgress; import org.elasticsearch.cluster.SimpleBatchedAckListenerTaskExecutor; import org.elasticsearch.cluster.block.ClusterBlocks; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.allocation.AllocationService; import org.elasticsearch.cluster.service.ClusterService; @@ -25,7 +28,7 @@ import org.elasticsearch.common.Priority; import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.core.Tuple; import org.elasticsearch.index.Index; import org.elasticsearch.injection.guice.Inject; @@ -33,10 +36,10 @@ import org.elasticsearch.snapshots.SnapshotInProgressException; import org.elasticsearch.snapshots.SnapshotsService; -import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Map; +import java.util.Objects; import java.util.Set; import static org.elasticsearch.cluster.routing.allocation.allocator.AllocationActionListener.rerouteCompletionIsNotRequired; @@ -48,22 +51,19 @@ public class MetadataDeleteIndexService { private static final Logger logger = LogManager.getLogger(MetadataDeleteIndexService.class); - private final Settings settings; - // package private for tests - final ClusterStateTaskExecutor executor; - private final MasterServiceTaskQueue taskQueue; + final ClusterStateTaskExecutor executor; + private final MasterServiceTaskQueue taskQueue; @Inject public MetadataDeleteIndexService(Settings settings, ClusterService clusterService, AllocationService allocationService) { - this.settings = settings; executor = new SimpleBatchedAckListenerTaskExecutor<>() { @Override public Tuple executeTask( - DeleteIndexClusterStateUpdateRequest task, + DeleteIndicesClusterStateUpdateTask task, ClusterState clusterState ) { - return Tuple.tuple(MetadataDeleteIndexService.deleteIndices(clusterState, Sets.newHashSet(task.indices()), settings), task); + return Tuple.tuple(MetadataDeleteIndexService.deleteIndices(clusterState, task.indices, settings), task); } @Override @@ -81,11 +81,64 @@ public ClusterState afterBatchExecution(ClusterState clusterState, boolean clust taskQueue = clusterService.createTaskQueue("delete-index", Priority.URGENT, executor); } - public void deleteIndices(final DeleteIndexClusterStateUpdateRequest request) { - if (request.indices() == null || request.indices().length == 0) { - throw new IllegalArgumentException("Index name is required"); + public void deleteIndices( + TimeValue masterNodeTimeout, + TimeValue ackTimeout, + Set indices, + ActionListener listener + ) { + if (indices == null || indices.isEmpty()) { + throw new IllegalArgumentException("Indices are required"); + } + taskQueue.submitTask( + "delete-index " + indices, + new DeleteIndicesClusterStateUpdateTask(indices, ackTimeout, listener), + masterNodeTimeout + ); + } + + // package private for tests + static class DeleteIndicesClusterStateUpdateTask implements ClusterStateTaskListener, ClusterStateAckListener { + + private final Set indices; + private final TimeValue ackTimeout; + private final ActionListener listener; + + DeleteIndicesClusterStateUpdateTask(Set indices, TimeValue ackTimeout, ActionListener listener) { + this.indices = Objects.requireNonNull(indices); + this.ackTimeout = Objects.requireNonNull(ackTimeout); + this.listener = Objects.requireNonNull(listener); + } + + @Override + public boolean mustAck(DiscoveryNode discoveryNode) { + return true; + } + + @Override + public void onAllNodesAcked() { + listener.onResponse(AcknowledgedResponse.TRUE); + } + + @Override + public void onAckFailure(Exception e) { + listener.onResponse(AcknowledgedResponse.FALSE); + } + + @Override + public void onAckTimeout() { + listener.onResponse(AcknowledgedResponse.FALSE); + } + + @Override + public TimeValue ackTimeout() { + return ackTimeout; + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); } - taskQueue.submitTask("delete-index " + Arrays.toString(request.indices()), request, request.masterNodeTimeout()); } /** diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataDeleteIndexServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataDeleteIndexServiceTests.java index b0b229f19dd7d..0354b6f0bcea8 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataDeleteIndexServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataDeleteIndexServiceTests.java @@ -9,7 +9,6 @@ package org.elasticsearch.cluster.metadata; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.admin.indices.delete.DeleteIndexClusterStateUpdateRequest; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.SnapshotsInProgress; @@ -132,8 +131,10 @@ public void testDeleteUnassigned() throws Exception { before, service.executor, List.of( - new DeleteIndexClusterStateUpdateRequest(ActionListener.noop()).indices( - new Index[] { before.metadata().getIndices().get(index).getIndex() } + new MetadataDeleteIndexService.DeleteIndicesClusterStateUpdateTask( + Set.of(before.metadata().getIndices().get(index).getIndex()), + TEST_REQUEST_TIMEOUT, + ActionListener.noop() ) ) ); From 0e6bbb0bea4d0df1ba81fe7335d49ca804b3601e Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 25 Sep 2024 15:18:20 -0400 Subject: [PATCH 52/58] ESQL: TOP support for strings (#113183) (#113408) Adds support to the `TOP` aggregation for `keyword` and `text` field types. Closes #109849 --- docs/changelog/113183.yaml | 6 + .../esql/functions/kibana/definition/top.json | 48 +++ .../esql/functions/types/top.asciidoc | 2 + x-pack/plugin/esql/compute/build.gradle | 5 + .../aggregation/TopBytesRefAggregator.java | 146 +++++++ .../TopBytesRefAggregatorFunction.java | 174 ++++++++ ...TopBytesRefAggregatorFunctionSupplier.java | 45 ++ ...TopBytesRefGroupingAggregatorFunction.java | 221 ++++++++++ .../aggregation/X-TopAggregator.java.st | 15 +- .../compute/data/sort/BucketedSortCommon.java | 68 +++ .../data/sort/BytesRefBucketedSort.java | 386 ++++++++++++++++++ .../compute/data/sort/IpBucketedSort.java | 82 ++-- ...actTopBytesRefAggregatorFunctionTests.java | 37 ++ ...tesRefGroupingAggregatorFunctionTests.java | 49 +++ .../TopBytesRefAggregatorFunctionTests.java | 29 ++ ...tesRefGroupingAggregatorFunctionTests.java | 35 ++ .../TopIpAggregatorFunctionTests.java | 25 +- .../TopIpGroupingAggregatorFunctionTests.java | 43 +- .../data/sort/BytesRefBucketedSortTests.java | 79 ++++ .../esql/qa/mixed/MixedClusterEsqlSpecIT.java | 4 + .../xpack/esql/ccq/MultiClusterSpecIT.java | 4 + .../src/main/resources/meta.csv-spec | 30 +- .../src/main/resources/stats_top.csv-spec | 74 ++++ .../xpack/esql/action/EsqlCapabilities.java | 12 + .../expression/function/aggregate/Top.java | 10 +- .../xpack/esql/planner/AggregateMapper.java | 2 +- .../function/aggregate/TopTests.java | 4 +- 27 files changed, 1507 insertions(+), 128 deletions(-) create mode 100644 docs/changelog/113183.yaml create mode 100644 x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java create mode 100644 x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunction.java create mode 100644 x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionSupplier.java create mode 100644 x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunction.java create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BucketedSortCommon.java create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSort.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefAggregatorFunctionTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefGroupingAggregatorFunctionTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunctionTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSortTests.java diff --git a/docs/changelog/113183.yaml b/docs/changelog/113183.yaml new file mode 100644 index 0000000000000..f30ce9831adb3 --- /dev/null +++ b/docs/changelog/113183.yaml @@ -0,0 +1,6 @@ +pr: 113183 +summary: "ESQL: TOP support for strings" +area: ES|QL +type: feature +issues: + - 109849 diff --git a/docs/reference/esql/functions/kibana/definition/top.json b/docs/reference/esql/functions/kibana/definition/top.json index c688bf5ea77c8..bb9ae752fe823 100644 --- a/docs/reference/esql/functions/kibana/definition/top.json +++ b/docs/reference/esql/functions/kibana/definition/top.json @@ -124,6 +124,30 @@ "variadic" : false, "returnType" : "ip" }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "The field to collect the top values for." + }, + { + "name" : "limit", + "type" : "integer", + "optional" : false, + "description" : "The maximum number of values to collect." + }, + { + "name" : "order", + "type" : "keyword", + "optional" : false, + "description" : "The order to calculate the top values. Either `asc` or `desc`." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, { "params" : [ { @@ -147,6 +171,30 @@ ], "variadic" : false, "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "The field to collect the top values for." + }, + { + "name" : "limit", + "type" : "integer", + "optional" : false, + "description" : "The maximum number of values to collect." + }, + { + "name" : "order", + "type" : "keyword", + "optional" : false, + "description" : "The order to calculate the top values. Either `asc` or `desc`." + } + ], + "variadic" : false, + "returnType" : "text" } ], "examples" : [ diff --git a/docs/reference/esql/functions/types/top.asciidoc b/docs/reference/esql/functions/types/top.asciidoc index 0eb329c10b9ed..25d7962a27252 100644 --- a/docs/reference/esql/functions/types/top.asciidoc +++ b/docs/reference/esql/functions/types/top.asciidoc @@ -10,5 +10,7 @@ date | integer | keyword | date double | integer | keyword | double integer | integer | keyword | integer ip | integer | keyword | ip +keyword | integer | keyword | keyword long | integer | keyword | long +text | integer | keyword | text |=== diff --git a/x-pack/plugin/esql/compute/build.gradle b/x-pack/plugin/esql/compute/build.gradle index 81d1a6f5360ca..49e819b7cdc88 100644 --- a/x-pack/plugin/esql/compute/build.gradle +++ b/x-pack/plugin/esql/compute/build.gradle @@ -635,6 +635,11 @@ tasks.named('stringTemplates').configure { it.inputFile = topAggregatorInputFile it.outputFile = "org/elasticsearch/compute/aggregation/TopBooleanAggregator.java" } + template { + it.properties = bytesRefProperties + it.inputFile = topAggregatorInputFile + it.outputFile = "org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java" + } template { it.properties = ipProperties it.inputFile = topAggregatorInputFile diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java new file mode 100644 index 0000000000000..c9b0e679b3e64 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/TopBytesRefAggregator.java @@ -0,0 +1,146 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.compute.ann.Aggregator; +import org.elasticsearch.compute.ann.GroupingAggregator; +import org.elasticsearch.compute.ann.IntermediateState; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.sort.BytesRefBucketedSort; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.search.sort.SortOrder; + +/** + * Aggregates the top N field values for BytesRef. + *

+ * This class is generated. Edit `X-TopAggregator.java.st` to edit this file. + *

+ */ +@Aggregator({ @IntermediateState(name = "top", type = "BYTES_REF_BLOCK") }) +@GroupingAggregator +class TopBytesRefAggregator { + public static SingleState initSingle(BigArrays bigArrays, int limit, boolean ascending) { + return new SingleState(bigArrays, limit, ascending); + } + + public static void combine(SingleState state, BytesRef v) { + state.add(v); + } + + public static void combineIntermediate(SingleState state, BytesRefBlock values) { + int start = values.getFirstValueIndex(0); + int end = start + values.getValueCount(0); + var scratch = new BytesRef(); + for (int i = start; i < end; i++) { + combine(state, values.getBytesRef(i, scratch)); + } + } + + public static Block evaluateFinal(SingleState state, DriverContext driverContext) { + return state.toBlock(driverContext.blockFactory()); + } + + public static GroupingState initGrouping(BigArrays bigArrays, int limit, boolean ascending) { + return new GroupingState(bigArrays, limit, ascending); + } + + public static void combine(GroupingState state, int groupId, BytesRef v) { + state.add(groupId, v); + } + + public static void combineIntermediate(GroupingState state, int groupId, BytesRefBlock values, int valuesPosition) { + int start = values.getFirstValueIndex(valuesPosition); + int end = start + values.getValueCount(valuesPosition); + var scratch = new BytesRef(); + for (int i = start; i < end; i++) { + combine(state, groupId, values.getBytesRef(i, scratch)); + } + } + + public static void combineStates(GroupingState current, int groupId, GroupingState state, int statePosition) { + current.merge(groupId, state, statePosition); + } + + public static Block evaluateFinal(GroupingState state, IntVector selected, DriverContext driverContext) { + return state.toBlock(driverContext.blockFactory(), selected); + } + + public static class GroupingState implements Releasable { + private final BytesRefBucketedSort sort; + + private GroupingState(BigArrays bigArrays, int limit, boolean ascending) { + // TODO pass the breaker in from the DriverContext + CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST); + this.sort = new BytesRefBucketedSort(breaker, "top", bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit); + } + + public void add(int groupId, BytesRef value) { + sort.collect(value, groupId); + } + + public void merge(int groupId, GroupingState other, int otherGroupId) { + sort.merge(groupId, other.sort, otherGroupId); + } + + void toIntermediate(Block[] blocks, int offset, IntVector selected, DriverContext driverContext) { + blocks[offset] = toBlock(driverContext.blockFactory(), selected); + } + + Block toBlock(BlockFactory blockFactory, IntVector selected) { + return sort.toBlock(blockFactory, selected); + } + + void enableGroupIdTracking(SeenGroupIds seen) { + // we figure out seen values from nulls on the values block + } + + @Override + public void close() { + Releasables.closeExpectNoException(sort); + } + } + + public static class SingleState implements Releasable { + private final GroupingState internalState; + + private SingleState(BigArrays bigArrays, int limit, boolean ascending) { + this.internalState = new GroupingState(bigArrays, limit, ascending); + } + + public void add(BytesRef value) { + internalState.add(0, value); + } + + public void merge(GroupingState other) { + internalState.merge(0, other, 0); + } + + void toIntermediate(Block[] blocks, int offset, DriverContext driverContext) { + blocks[offset] = toBlock(driverContext.blockFactory()); + } + + Block toBlock(BlockFactory blockFactory) { + try (var intValues = blockFactory.newConstantIntVector(0, 1)) { + return internalState.toBlock(blockFactory, intValues); + } + } + + @Override + public void close() { + Releasables.closeExpectNoException(internalState); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunction.java b/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunction.java new file mode 100644 index 0000000000000..17b3d84ab0028 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunction.java @@ -0,0 +1,174 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.compute.aggregation; + +import java.lang.Integer; +import java.lang.Override; +import java.lang.String; +import java.lang.StringBuilder; +import java.util.List; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanVector; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; + +/** + * {@link AggregatorFunction} implementation for {@link TopBytesRefAggregator}. + * This class is generated. Do not edit it. + */ +public final class TopBytesRefAggregatorFunction implements AggregatorFunction { + private static final List INTERMEDIATE_STATE_DESC = List.of( + new IntermediateStateDesc("top", ElementType.BYTES_REF) ); + + private final DriverContext driverContext; + + private final TopBytesRefAggregator.SingleState state; + + private final List channels; + + private final int limit; + + private final boolean ascending; + + public TopBytesRefAggregatorFunction(DriverContext driverContext, List channels, + TopBytesRefAggregator.SingleState state, int limit, boolean ascending) { + this.driverContext = driverContext; + this.channels = channels; + this.state = state; + this.limit = limit; + this.ascending = ascending; + } + + public static TopBytesRefAggregatorFunction create(DriverContext driverContext, + List channels, int limit, boolean ascending) { + return new TopBytesRefAggregatorFunction(driverContext, channels, TopBytesRefAggregator.initSingle(driverContext.bigArrays(), limit, ascending), limit, ascending); + } + + public static List intermediateStateDesc() { + return INTERMEDIATE_STATE_DESC; + } + + @Override + public int intermediateBlockCount() { + return INTERMEDIATE_STATE_DESC.size(); + } + + @Override + public void addRawInput(Page page, BooleanVector mask) { + if (mask.isConstant()) { + if (mask.getBoolean(0) == false) { + // Entire page masked away + return; + } + // No masking + BytesRefBlock block = page.getBlock(channels.get(0)); + BytesRefVector vector = block.asVector(); + if (vector != null) { + addRawVector(vector); + } else { + addRawBlock(block); + } + return; + } + // Some positions masked away, others kept + BytesRefBlock block = page.getBlock(channels.get(0)); + BytesRefVector vector = block.asVector(); + if (vector != null) { + addRawVector(vector, mask); + } else { + addRawBlock(block, mask); + } + } + + private void addRawVector(BytesRefVector vector) { + BytesRef scratch = new BytesRef(); + for (int i = 0; i < vector.getPositionCount(); i++) { + TopBytesRefAggregator.combine(state, vector.getBytesRef(i, scratch)); + } + } + + private void addRawVector(BytesRefVector vector, BooleanVector mask) { + BytesRef scratch = new BytesRef(); + for (int i = 0; i < vector.getPositionCount(); i++) { + if (mask.getBoolean(i) == false) { + continue; + } + TopBytesRefAggregator.combine(state, vector.getBytesRef(i, scratch)); + } + } + + private void addRawBlock(BytesRefBlock block) { + BytesRef scratch = new BytesRef(); + for (int p = 0; p < block.getPositionCount(); p++) { + if (block.isNull(p)) { + continue; + } + int start = block.getFirstValueIndex(p); + int end = start + block.getValueCount(p); + for (int i = start; i < end; i++) { + TopBytesRefAggregator.combine(state, block.getBytesRef(i, scratch)); + } + } + } + + private void addRawBlock(BytesRefBlock block, BooleanVector mask) { + BytesRef scratch = new BytesRef(); + for (int p = 0; p < block.getPositionCount(); p++) { + if (mask.getBoolean(p) == false) { + continue; + } + if (block.isNull(p)) { + continue; + } + int start = block.getFirstValueIndex(p); + int end = start + block.getValueCount(p); + for (int i = start; i < end; i++) { + TopBytesRefAggregator.combine(state, block.getBytesRef(i, scratch)); + } + } + } + + @Override + public void addIntermediateInput(Page page) { + assert channels.size() == intermediateBlockCount(); + assert page.getBlockCount() >= channels.get(0) + intermediateStateDesc().size(); + Block topUncast = page.getBlock(channels.get(0)); + if (topUncast.areAllValuesNull()) { + return; + } + BytesRefBlock top = (BytesRefBlock) topUncast; + assert top.getPositionCount() == 1; + BytesRef scratch = new BytesRef(); + TopBytesRefAggregator.combineIntermediate(state, top); + } + + @Override + public void evaluateIntermediate(Block[] blocks, int offset, DriverContext driverContext) { + state.toIntermediate(blocks, offset, driverContext); + } + + @Override + public void evaluateFinal(Block[] blocks, int offset, DriverContext driverContext) { + blocks[offset] = TopBytesRefAggregator.evaluateFinal(state, driverContext); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getClass().getSimpleName()).append("["); + sb.append("channels=").append(channels); + sb.append("]"); + return sb.toString(); + } + + @Override + public void close() { + state.close(); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionSupplier.java b/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionSupplier.java new file mode 100644 index 0000000000000..8c77d2116bf69 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionSupplier.java @@ -0,0 +1,45 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.compute.aggregation; + +import java.lang.Integer; +import java.lang.Override; +import java.lang.String; +import java.util.List; +import org.elasticsearch.compute.operator.DriverContext; + +/** + * {@link AggregatorFunctionSupplier} implementation for {@link TopBytesRefAggregator}. + * This class is generated. Do not edit it. + */ +public final class TopBytesRefAggregatorFunctionSupplier implements AggregatorFunctionSupplier { + private final List channels; + + private final int limit; + + private final boolean ascending; + + public TopBytesRefAggregatorFunctionSupplier(List channels, int limit, + boolean ascending) { + this.channels = channels; + this.limit = limit; + this.ascending = ascending; + } + + @Override + public TopBytesRefAggregatorFunction aggregator(DriverContext driverContext) { + return TopBytesRefAggregatorFunction.create(driverContext, channels, limit, ascending); + } + + @Override + public TopBytesRefGroupingAggregatorFunction groupingAggregator(DriverContext driverContext) { + return TopBytesRefGroupingAggregatorFunction.create(channels, driverContext, limit, ascending); + } + + @Override + public String describe() { + return "top of bytes"; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunction.java b/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunction.java new file mode 100644 index 0000000000000..aa2d6094c8c3f --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/generated/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunction.java @@ -0,0 +1,221 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.compute.aggregation; + +import java.lang.Integer; +import java.lang.Override; +import java.lang.String; +import java.lang.StringBuilder; +import java.util.List; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; + +/** + * {@link GroupingAggregatorFunction} implementation for {@link TopBytesRefAggregator}. + * This class is generated. Do not edit it. + */ +public final class TopBytesRefGroupingAggregatorFunction implements GroupingAggregatorFunction { + private static final List INTERMEDIATE_STATE_DESC = List.of( + new IntermediateStateDesc("top", ElementType.BYTES_REF) ); + + private final TopBytesRefAggregator.GroupingState state; + + private final List channels; + + private final DriverContext driverContext; + + private final int limit; + + private final boolean ascending; + + public TopBytesRefGroupingAggregatorFunction(List channels, + TopBytesRefAggregator.GroupingState state, DriverContext driverContext, int limit, + boolean ascending) { + this.channels = channels; + this.state = state; + this.driverContext = driverContext; + this.limit = limit; + this.ascending = ascending; + } + + public static TopBytesRefGroupingAggregatorFunction create(List channels, + DriverContext driverContext, int limit, boolean ascending) { + return new TopBytesRefGroupingAggregatorFunction(channels, TopBytesRefAggregator.initGrouping(driverContext.bigArrays(), limit, ascending), driverContext, limit, ascending); + } + + public static List intermediateStateDesc() { + return INTERMEDIATE_STATE_DESC; + } + + @Override + public int intermediateBlockCount() { + return INTERMEDIATE_STATE_DESC.size(); + } + + @Override + public GroupingAggregatorFunction.AddInput prepareProcessPage(SeenGroupIds seenGroupIds, + Page page) { + BytesRefBlock valuesBlock = page.getBlock(channels.get(0)); + BytesRefVector valuesVector = valuesBlock.asVector(); + if (valuesVector == null) { + if (valuesBlock.mayHaveNulls()) { + state.enableGroupIdTracking(seenGroupIds); + } + return new GroupingAggregatorFunction.AddInput() { + @Override + public void add(int positionOffset, IntBlock groupIds) { + addRawInput(positionOffset, groupIds, valuesBlock); + } + + @Override + public void add(int positionOffset, IntVector groupIds) { + addRawInput(positionOffset, groupIds, valuesBlock); + } + + @Override + public void close() { + } + }; + } + return new GroupingAggregatorFunction.AddInput() { + @Override + public void add(int positionOffset, IntBlock groupIds) { + addRawInput(positionOffset, groupIds, valuesVector); + } + + @Override + public void add(int positionOffset, IntVector groupIds) { + addRawInput(positionOffset, groupIds, valuesVector); + } + + @Override + public void close() { + } + }; + } + + private void addRawInput(int positionOffset, IntVector groups, BytesRefBlock values) { + BytesRef scratch = new BytesRef(); + for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) { + int groupId = groups.getInt(groupPosition); + if (values.isNull(groupPosition + positionOffset)) { + continue; + } + int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset); + int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset); + for (int v = valuesStart; v < valuesEnd; v++) { + TopBytesRefAggregator.combine(state, groupId, values.getBytesRef(v, scratch)); + } + } + } + + private void addRawInput(int positionOffset, IntVector groups, BytesRefVector values) { + BytesRef scratch = new BytesRef(); + for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) { + int groupId = groups.getInt(groupPosition); + TopBytesRefAggregator.combine(state, groupId, values.getBytesRef(groupPosition + positionOffset, scratch)); + } + } + + private void addRawInput(int positionOffset, IntBlock groups, BytesRefBlock values) { + BytesRef scratch = new BytesRef(); + for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) { + if (groups.isNull(groupPosition)) { + continue; + } + int groupStart = groups.getFirstValueIndex(groupPosition); + int groupEnd = groupStart + groups.getValueCount(groupPosition); + for (int g = groupStart; g < groupEnd; g++) { + int groupId = groups.getInt(g); + if (values.isNull(groupPosition + positionOffset)) { + continue; + } + int valuesStart = values.getFirstValueIndex(groupPosition + positionOffset); + int valuesEnd = valuesStart + values.getValueCount(groupPosition + positionOffset); + for (int v = valuesStart; v < valuesEnd; v++) { + TopBytesRefAggregator.combine(state, groupId, values.getBytesRef(v, scratch)); + } + } + } + } + + private void addRawInput(int positionOffset, IntBlock groups, BytesRefVector values) { + BytesRef scratch = new BytesRef(); + for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) { + if (groups.isNull(groupPosition)) { + continue; + } + int groupStart = groups.getFirstValueIndex(groupPosition); + int groupEnd = groupStart + groups.getValueCount(groupPosition); + for (int g = groupStart; g < groupEnd; g++) { + int groupId = groups.getInt(g); + TopBytesRefAggregator.combine(state, groupId, values.getBytesRef(groupPosition + positionOffset, scratch)); + } + } + } + + @Override + public void selectedMayContainUnseenGroups(SeenGroupIds seenGroupIds) { + state.enableGroupIdTracking(seenGroupIds); + } + + @Override + public void addIntermediateInput(int positionOffset, IntVector groups, Page page) { + state.enableGroupIdTracking(new SeenGroupIds.Empty()); + assert channels.size() == intermediateBlockCount(); + Block topUncast = page.getBlock(channels.get(0)); + if (topUncast.areAllValuesNull()) { + return; + } + BytesRefBlock top = (BytesRefBlock) topUncast; + BytesRef scratch = new BytesRef(); + for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++) { + int groupId = groups.getInt(groupPosition); + TopBytesRefAggregator.combineIntermediate(state, groupId, top, groupPosition + positionOffset); + } + } + + @Override + public void addIntermediateRowInput(int groupId, GroupingAggregatorFunction input, int position) { + if (input.getClass() != getClass()) { + throw new IllegalArgumentException("expected " + getClass() + "; got " + input.getClass()); + } + TopBytesRefAggregator.GroupingState inState = ((TopBytesRefGroupingAggregatorFunction) input).state; + state.enableGroupIdTracking(new SeenGroupIds.Empty()); + TopBytesRefAggregator.combineStates(state, groupId, inState, position); + } + + @Override + public void evaluateIntermediate(Block[] blocks, int offset, IntVector selected) { + state.toIntermediate(blocks, offset, selected, driverContext); + } + + @Override + public void evaluateFinal(Block[] blocks, int offset, IntVector selected, + DriverContext driverContext) { + blocks[offset] = TopBytesRefAggregator.evaluateFinal(state, selected, driverContext); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getClass().getSimpleName()).append("["); + sb.append("channels=").append(channels); + sb.append("]"); + return sb.toString(); + } + + @Override + public void close() { + state.close(); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-TopAggregator.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-TopAggregator.java.st index b97d26ee6147d..18d573eea4a4c 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-TopAggregator.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-TopAggregator.java.st @@ -7,9 +7,12 @@ package org.elasticsearch.compute.aggregation; -$if(Ip)$ +$if(BytesRef || Ip)$ import org.apache.lucene.util.BytesRef; $endif$ +$if(BytesRef)$ +import org.elasticsearch.common.breaker.CircuitBreaker; +$endif$ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.compute.ann.Aggregator; import org.elasticsearch.compute.ann.GroupingAggregator; @@ -49,7 +52,7 @@ class Top$Name$Aggregator { public static void combineIntermediate(SingleState state, $Type$Block values) { int start = values.getFirstValueIndex(0); int end = start + values.getValueCount(0); -$if(Ip)$ +$if(BytesRef || Ip)$ var scratch = new BytesRef(); for (int i = start; i < end; i++) { combine(state, values.get$Type$(i, scratch)); @@ -76,7 +79,7 @@ $endif$ public static void combineIntermediate(GroupingState state, int groupId, $Type$Block values, int valuesPosition) { int start = values.getFirstValueIndex(valuesPosition); int end = start + values.getValueCount(valuesPosition); -$if(Ip)$ +$if(BytesRef || Ip)$ var scratch = new BytesRef(); for (int i = start; i < end; i++) { combine(state, groupId, values.get$Type$(i, scratch)); @@ -100,7 +103,13 @@ $endif$ private final $Name$BucketedSort sort; private GroupingState(BigArrays bigArrays, int limit, boolean ascending) { +$if(BytesRef)$ + // TODO pass the breaker in from the DriverContext + CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST); + this.sort = new BytesRefBucketedSort(breaker, "top", bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit); +$else$ this.sort = new $Name$BucketedSort(bigArrays, ascending ? SortOrder.ASC : SortOrder.DESC, limit); +$endif$ } public void add(int groupId, $type$ value) { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BucketedSortCommon.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BucketedSortCommon.java new file mode 100644 index 0000000000000..58306f2140a82 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BucketedSortCommon.java @@ -0,0 +1,68 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data.sort; + +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.BitArray; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.search.sort.SortOrder; + +/** + * Components common to BucketedSort implementations. + */ +class BucketedSortCommon implements Releasable { + final BigArrays bigArrays; + final SortOrder order; + final int bucketSize; + + /** + * {@code true} if the bucket is in heap mode, {@code false} if + * it is still gathering. + */ + private final BitArray heapMode; + + BucketedSortCommon(BigArrays bigArrays, SortOrder order, int bucketSize) { + this.bigArrays = bigArrays; + this.order = order; + this.bucketSize = bucketSize; + this.heapMode = new BitArray(0, bigArrays); + } + + /** + * The first index in a bucket. Note that this might not be used. + * See {@link } + */ + long rootIndex(int bucket) { + return (long) bucket * bucketSize; + } + + /** + * The last index in a bucket. + */ + long endIndex(long rootIndex) { + return rootIndex + bucketSize; + } + + boolean inHeapMode(int bucket) { + return heapMode.get(bucket); + } + + void enableHeapMode(int bucket) { + heapMode.set(bucket); + } + + void assertValidNextOffset(int next) { + assert 0 <= next && next < bucketSize + : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]"; + } + + @Override + public void close() { + heapMode.close(); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSort.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSort.java new file mode 100644 index 0000000000000..9198de53b1e04 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSort.java @@ -0,0 +1,386 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data.sort; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.common.util.ObjectArray; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.core.Assertions; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.search.sort.BucketedSort; +import org.elasticsearch.search.sort.SortOrder; + +import java.util.Arrays; +import java.util.stream.IntStream; +import java.util.stream.LongStream; + +/** + * Aggregates the top N variable length {@link BytesRef} values per bucket. + * See {@link BucketedSort} for more information. + */ +public class BytesRefBucketedSort implements Releasable { + private final BucketedSortCommon common; + private final CircuitBreaker breaker; + private final String label; + + /** + * An array containing all the values on all buckets. The structure is as follows: + *

+ * For each bucket, there are {@link BucketedSortCommon#bucketSize} elements, based + * on the bucket id (0, 1, 2...). Then, for each bucket, it can be in 2 states: + *

+ *
    + *
  • + * Gather mode: All buckets start in gather mode, and remain here while they have + * less than bucketSize elements. In gather mode, the elements are stored in the + * array from the highest index to the lowest index. The lowest index contains + * the offset to the next slot to be filled. + *

    + * This allows us to insert elements in O(1) time. + *

    + *

    + * When the bucketSize-th element is collected, the bucket transitions to heap + * mode, by heapifying its contents. + *

    + *
  • + *
  • + * Heap mode: The bucket slots are organized as a min heap structure. + *

    + * The root of the heap is the minimum value in the bucket, + * which allows us to quickly discard new values that are not in the top N. + *

    + *
  • + *
+ */ + private ObjectArray values; + + public BytesRefBucketedSort(CircuitBreaker breaker, String label, BigArrays bigArrays, SortOrder order, int bucketSize) { + this.breaker = breaker; + this.label = label; + common = new BucketedSortCommon(bigArrays, order, bucketSize); + boolean success = false; + try { + values = bigArrays.newObjectArray(0); + success = true; + } finally { + if (success == false) { + close(); + } + } + } + + private void checkInvariant(int bucket) { + if (Assertions.ENABLED == false) { + return; + } + long rootIndex = common.rootIndex(bucket); + long requiredSize = common.endIndex(rootIndex); + if (values.size() < requiredSize) { + throw new AssertionError("values too short " + values.size() + " < " + requiredSize); + } + if (values.get(rootIndex) == null) { + throw new AssertionError("new gather offset can't be null"); + } + if (common.inHeapMode(bucket) == false) { + common.assertValidNextOffset(getNextGatherOffset(rootIndex)); + } else { + for (long l = rootIndex; l < common.endIndex(rootIndex); l++) { + if (values.get(rootIndex) == null) { + throw new AssertionError("values missing in heap mode"); + } + } + } + } + + /** + * Collects a {@code value} into a {@code bucket}. + *

+ * It may or may not be inserted in the heap, depending on if it is better than the current root. + *

+ */ + public void collect(BytesRef value, int bucket) { + long rootIndex = common.rootIndex(bucket); + if (common.inHeapMode(bucket)) { + if (betterThan(value, values.get(rootIndex).bytesRefView())) { + clearedBytesAt(rootIndex).append(value); + downHeap(rootIndex, 0); + } + checkInvariant(bucket); + return; + } + // Gathering mode + long requiredSize = common.endIndex(rootIndex); + if (values.size() < requiredSize) { + grow(requiredSize); + } + int next = getNextGatherOffset(rootIndex); + common.assertValidNextOffset(next); + long index = next + rootIndex; + clearedBytesAt(index).append(value); + if (next == 0) { + common.enableHeapMode(bucket); + heapify(rootIndex); + } else { + ByteUtils.writeIntLE(next - 1, values.get(rootIndex).bytes(), 0); + } + checkInvariant(bucket); + } + + /** + * Merge the values from {@code other}'s {@code otherGroupId} into {@code groupId}. + */ + public void merge(int bucket, BytesRefBucketedSort other, int otherBucket) { + long otherRootIndex = other.common.rootIndex(otherBucket); + if (otherRootIndex >= other.values.size()) { + // The value was never collected. + return; + } + other.checkInvariant(bucket); + long otherStart = other.startIndex(otherBucket, otherRootIndex); + long otherEnd = other.common.endIndex(otherRootIndex); + // TODO: This can be improved for heapified buckets by making use of the heap structures + for (long i = otherStart; i < otherEnd; i++) { + collect(other.values.get(i).bytesRefView(), bucket); + } + } + + /** + * Creates a block with the values from the {@code selected} groups. + */ + public Block toBlock(BlockFactory blockFactory, IntVector selected) { + // Check if the selected groups are all empty, to avoid allocating extra memory + if (IntStream.range(0, selected.getPositionCount()).map(selected::getInt).noneMatch(bucket -> { + long rootIndex = common.rootIndex(bucket); + if (rootIndex >= values.size()) { + // Never collected + return false; + } + long start = startIndex(bucket, rootIndex); + long end = common.endIndex(rootIndex); + long size = end - start; + return size > 0; + })) { + return blockFactory.newConstantNullBlock(selected.getPositionCount()); + } + + // Used to sort the values in the bucket. + BytesRef[] bucketValues = new BytesRef[common.bucketSize]; + + try (var builder = blockFactory.newBytesRefBlockBuilder(selected.getPositionCount())) { + for (int s = 0; s < selected.getPositionCount(); s++) { + int bucket = selected.getInt(s); + long rootIndex = common.rootIndex(bucket); + if (rootIndex >= values.size()) { + // Never collected + builder.appendNull(); + continue; + } + + long start = startIndex(bucket, rootIndex); + long end = common.endIndex(rootIndex); + long size = end - start; + + if (size == 0) { + builder.appendNull(); + continue; + } + + if (size == 1) { + try (BreakingBytesRefBuilder bytes = values.get(start)) { + builder.appendBytesRef(bytes.bytesRefView()); + } + values.set(start, null); + continue; + } + + for (int i = 0; i < size; i++) { + try (BreakingBytesRefBuilder bytes = values.get(start + i)) { + bucketValues[i] = bytes.bytesRefView(); + } + values.set(start + i, null); + } + + // TODO: Make use of heap structures to faster iterate in order instead of copying and sorting + Arrays.sort(bucketValues, 0, (int) size); + + builder.beginPositionEntry(); + if (common.order == SortOrder.ASC) { + for (int i = 0; i < size; i++) { + builder.appendBytesRef(bucketValues[i]); + } + } else { + for (int i = (int) size - 1; i >= 0; i--) { + builder.appendBytesRef(bucketValues[i]); + } + } + builder.endPositionEntry(); + } + return builder.build(); + } + } + + private long startIndex(int bucket, long rootIndex) { + if (common.inHeapMode(bucket)) { + return rootIndex; + } + return rootIndex + getNextGatherOffset(rootIndex) + 1; + } + + /** + * Get the next index that should be "gathered" for a bucket rooted + * at {@code rootIndex}. + *

+ * Using the first 4 bytes of the element to store the next gather offset. + *

+ */ + private int getNextGatherOffset(long rootIndex) { + BreakingBytesRefBuilder bytes = values.get(rootIndex); + assert bytes.length() == Integer.BYTES; + return ByteUtils.readIntLE(bytes.bytes(), 0); + } + + /** + * {@code true} if the entry at index {@code lhs} is "better" than + * the entry at {@code rhs}. "Better" in this means "lower" for + * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}. + */ + private boolean betterThan(BytesRef lhs, BytesRef rhs) { + return common.order.reverseMul() * lhs.compareTo(rhs) < 0; + } + + /** + * Swap the data at two indices. + */ + private void swap(long lhs, long rhs) { + BreakingBytesRefBuilder tmp = values.get(lhs); + values.set(lhs, values.get(rhs)); + values.set(rhs, tmp); + } + + /** + * Allocate storage for more buckets and store the "next gather offset" + * for those new buckets. + */ + private void grow(long requiredSize) { + long oldMax = values.size(); + values = common.bigArrays.grow(values, requiredSize); + // Set the next gather offsets for all newly allocated buckets. + fillGatherOffsets(oldMax - (oldMax % common.bucketSize)); + } + + /** + * Maintain the "next gather offsets" for newly allocated buckets. + */ + private void fillGatherOffsets(long startingAt) { + assert startingAt % common.bucketSize == 0; + int nextOffset = common.bucketSize - 1; + for (long bucketRoot = startingAt; bucketRoot < values.size(); bucketRoot += common.bucketSize) { + BreakingBytesRefBuilder bytes = values.get(bucketRoot); + if (bytes != null) { + continue; + } + bytes = new BreakingBytesRefBuilder(breaker, label); + values.set(bucketRoot, bytes); + bytes.grow(Integer.BYTES); + bytes.setLength(Integer.BYTES); + ByteUtils.writeIntLE(nextOffset, bytes.bytes(), 0); + } + } + + /** + * Heapify a bucket whose entries are in random order. + *

+ * This works by validating the heap property on each node, iterating + * "upwards", pushing any out of order parents "down". Check out the + * wikipedia + * entry on binary heaps for more about this. + *

+ *

+ * While this *looks* like it could easily be {@code O(n * log n)}, it is + * a fairly well studied algorithm attributed to Floyd. There's + * been a bunch of work that puts this at {@code O(n)}, close to 1.88n worst + * case. + *

+ * + * @param rootIndex the index the start of the bucket + */ + private void heapify(long rootIndex) { + int maxParent = common.bucketSize / 2 - 1; + for (int parent = maxParent; parent >= 0; parent--) { + downHeap(rootIndex, parent); + } + } + + /** + * Correct the heap invariant of a parent and its children. This + * runs in {@code O(log n)} time. + * @param rootIndex index of the start of the bucket + * @param parent Index within the bucket of the parent to check. + * For example, 0 is the "root". + */ + private void downHeap(long rootIndex, int parent) { + while (true) { + long parentIndex = rootIndex + parent; + int worst = parent; + long worstIndex = parentIndex; + int leftChild = parent * 2 + 1; + long leftIndex = rootIndex + leftChild; + if (leftChild < common.bucketSize) { + if (betterThan(values.get(worstIndex).bytesRefView(), values.get(leftIndex).bytesRefView())) { + worst = leftChild; + worstIndex = leftIndex; + } + int rightChild = leftChild + 1; + long rightIndex = rootIndex + rightChild; + if (rightChild < common.bucketSize + && betterThan(values.get(worstIndex).bytesRefView(), values.get(rightIndex).bytesRefView())) { + + worst = rightChild; + worstIndex = rightIndex; + } + } + if (worst == parent) { + break; + } + swap(worstIndex, parentIndex); + parent = worst; + } + } + + private BreakingBytesRefBuilder clearedBytesAt(long index) { + BreakingBytesRefBuilder bytes = values.get(index); + if (bytes == null) { + bytes = new BreakingBytesRefBuilder(breaker, label); + values.set(index, bytes); + } else { + bytes.clear(); + } + return bytes; + } + + @Override + public final void close() { + Releasable allValues = values == null ? () -> {} : Releasables.wrap(LongStream.range(0, values.size()).mapToObj(i -> { + BreakingBytesRefBuilder bytes = values.get(i); + return bytes == null ? (Releasable) () -> {} : bytes; + }).toList().iterator()); + Releasables.close(allValues, values, common); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/IpBucketedSort.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/IpBucketedSort.java index 0fd38c18d7504..4eb31ea30db22 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/IpBucketedSort.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/sort/IpBucketedSort.java @@ -9,7 +9,6 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.common.util.BitArray; import org.elasticsearch.common.util.ByteArray; import org.elasticsearch.common.util.ByteUtils; import org.elasticsearch.compute.data.Block; @@ -29,7 +28,7 @@ * See {@link BucketedSort} for more information. */ public class IpBucketedSort implements Releasable { - private static final int IP_LENGTH = 16; + private static final int IP_LENGTH = 16; // Bytes. It's ipv6. // BytesRefs used in internal methods private final BytesRef scratch1 = new BytesRef(); @@ -39,18 +38,11 @@ public class IpBucketedSort implements Releasable { */ private final byte[] scratchBytes = new byte[IP_LENGTH]; - private final BigArrays bigArrays; - private final SortOrder order; - private final int bucketSize; - /** - * {@code true} if the bucket is in heap mode, {@code false} if - * it is still gathering. - */ - private final BitArray heapMode; + private final BucketedSortCommon common; /** * An array containing all the values on all buckets. The structure is as follows: *

- * For each bucket, there are bucketSize elements, based on the bucket id (0, 1, 2...). + * For each bucket, there are {@link BucketedSortCommon#bucketSize} elements, based on the bucket id (0, 1, 2...). * Then, for each bucket, it can be in 2 states: *

*
    @@ -77,10 +69,7 @@ public class IpBucketedSort implements Releasable { private ByteArray values; public IpBucketedSort(BigArrays bigArrays, SortOrder order, int bucketSize) { - this.bigArrays = bigArrays; - this.order = order; - this.bucketSize = bucketSize; - heapMode = new BitArray(0, bigArrays); + this.common = new BucketedSortCommon(bigArrays, order, bucketSize); boolean success = false; try { @@ -101,8 +90,8 @@ public IpBucketedSort(BigArrays bigArrays, SortOrder order, int bucketSize) { */ public void collect(BytesRef value, int bucket) { assert value.length == IP_LENGTH; - long rootIndex = (long) bucket * bucketSize; - if (inHeapMode(bucket)) { + long rootIndex = common.rootIndex(bucket); + if (common.inHeapMode(bucket)) { if (betterThan(value, get(rootIndex, scratch1))) { set(rootIndex, value); downHeap(rootIndex, 0); @@ -110,49 +99,34 @@ public void collect(BytesRef value, int bucket) { return; } // Gathering mode - long requiredSize = (rootIndex + bucketSize) * IP_LENGTH; + long requiredSize = common.endIndex(rootIndex) * IP_LENGTH; if (values.size() < requiredSize) { grow(requiredSize); } int next = getNextGatherOffset(rootIndex); - assert 0 <= next && next < bucketSize - : "Expected next to be in the range of valid buckets [0 <= " + next + " < " + bucketSize + "]"; + common.assertValidNextOffset(next); long index = next + rootIndex; set(index, value); if (next == 0) { - heapMode.set(bucket); + common.enableHeapMode(bucket); heapify(rootIndex); } else { setNextGatherOffset(rootIndex, next - 1); } } - /** - * The order of the sort. - */ - public SortOrder getOrder() { - return order; - } - - /** - * The number of values to store per bucket. - */ - public int getBucketSize() { - return bucketSize; - } - /** * Get the first and last indexes (inclusive, exclusive) of the values for a bucket. * Returns [0, 0] if the bucket has never been collected. */ private Tuple getBucketValuesIndexes(int bucket) { - long rootIndex = (long) bucket * bucketSize; + long rootIndex = common.rootIndex(bucket); if (rootIndex >= values.size() / IP_LENGTH) { // We've never seen this bucket. return Tuple.tuple(0L, 0L); } - long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1); - long end = rootIndex + bucketSize; + long start = startIndex(bucket, rootIndex); + long end = common.endIndex(rootIndex); return Tuple.tuple(start, end); } @@ -184,7 +158,7 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) { } // Used to sort the values in the bucket. - var bucketValues = new BytesRef[bucketSize]; + var bucketValues = new BytesRef[common.bucketSize]; try (var builder = blockFactory.newBytesRefBlockBuilder(selected.getPositionCount())) { for (int s = 0; s < selected.getPositionCount(); s++) { @@ -211,7 +185,7 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) { Arrays.sort(bucketValues, 0, (int) size); builder.beginPositionEntry(); - if (order == SortOrder.ASC) { + if (common.order == SortOrder.ASC) { for (int i = 0; i < size; i++) { builder.appendBytesRef(bucketValues[i]); } @@ -226,11 +200,11 @@ public Block toBlock(BlockFactory blockFactory, IntVector selected) { } } - /** - * Is this bucket a min heap {@code true} or in gathering mode {@code false}? - */ - private boolean inHeapMode(int bucket) { - return heapMode.get(bucket); + private long startIndex(int bucket, long rootIndex) { + if (common.inHeapMode(bucket)) { + return rootIndex; + } + return rootIndex + getNextGatherOffset(rootIndex) + 1; } /** @@ -267,7 +241,7 @@ private void setNextGatherOffset(long rootIndex, int offset) { * {@link SortOrder#ASC} and "higher" for {@link SortOrder#DESC}. */ private boolean betterThan(BytesRef lhs, BytesRef rhs) { - return getOrder().reverseMul() * lhs.compareTo(rhs) < 0; + return common.order.reverseMul() * lhs.compareTo(rhs) < 0; } /** @@ -296,17 +270,17 @@ private void swap(long lhs, long rhs) { */ private void grow(long minSize) { long oldMax = values.size() / IP_LENGTH; - values = bigArrays.grow(values, minSize); + values = common.bigArrays.grow(values, minSize); // Set the next gather offsets for all newly allocated buckets. - setNextGatherOffsets(oldMax - (oldMax % bucketSize)); + setNextGatherOffsets(oldMax - (oldMax % common.bucketSize)); } /** * Maintain the "next gather offsets" for newly allocated buckets. */ private void setNextGatherOffsets(long startingAt) { - int nextOffset = bucketSize - 1; - for (long bucketRoot = startingAt; bucketRoot < values.size() / IP_LENGTH; bucketRoot += bucketSize) { + int nextOffset = common.bucketSize - 1; + for (long bucketRoot = startingAt; bucketRoot < values.size() / IP_LENGTH; bucketRoot += common.bucketSize) { setNextGatherOffset(bucketRoot, nextOffset); } } @@ -334,7 +308,7 @@ private void setNextGatherOffsets(long startingAt) { * @param rootIndex the index the start of the bucket */ private void heapify(long rootIndex) { - int maxParent = bucketSize / 2 - 1; + int maxParent = common.bucketSize / 2 - 1; for (int parent = maxParent; parent >= 0; parent--) { downHeap(rootIndex, parent); } @@ -354,14 +328,14 @@ private void downHeap(long rootIndex, int parent) { long worstIndex = parentIndex; int leftChild = parent * 2 + 1; long leftIndex = rootIndex + leftChild; - if (leftChild < bucketSize) { + if (leftChild < common.bucketSize) { if (betterThan(get(worstIndex, scratch1), get(leftIndex, scratch2))) { worst = leftChild; worstIndex = leftIndex; } int rightChild = leftChild + 1; long rightIndex = rootIndex + rightChild; - if (rightChild < bucketSize && betterThan(get(worstIndex, scratch1), get(rightIndex, scratch2))) { + if (rightChild < common.bucketSize && betterThan(get(worstIndex, scratch1), get(rightIndex, scratch2))) { worst = rightChild; worstIndex = rightIndex; } @@ -400,6 +374,6 @@ private void set(long index, BytesRef value) { @Override public final void close() { - Releasables.close(values, heapMode); + Releasables.close(values, common); } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefAggregatorFunctionTests.java new file mode 100644 index 0000000000000..2815dd70e8124 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefAggregatorFunctionTests.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BlockUtils; +import org.elasticsearch.compute.operator.SequenceBytesRefBlockSourceOperator; +import org.elasticsearch.compute.operator.SourceOperator; + +import java.util.List; +import java.util.stream.IntStream; + +import static org.hamcrest.Matchers.contains; + +abstract class AbstractTopBytesRefAggregatorFunctionTests extends AggregatorFunctionTestCase { + static final int LIMIT = 100; + + @Override + protected final SourceOperator simpleInput(BlockFactory blockFactory, int size) { + return new SequenceBytesRefBlockSourceOperator(blockFactory, IntStream.range(0, size).mapToObj(l -> randomValue())); + } + + protected abstract BytesRef randomValue(); + + @Override + public final void assertSimpleOutput(List input, Block result) { + Object[] values = input.stream().flatMap(AggregatorFunctionTestCase::allBytesRefs).sorted().limit(LIMIT).toArray(Object[]::new); + assertThat((List) BlockUtils.toJavaObject(result, 0), contains(values)); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefGroupingAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefGroupingAggregatorFunctionTests.java new file mode 100644 index 0000000000000..45c8a23dfc1c0 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/AbstractTopBytesRefGroupingAggregatorFunctionTests.java @@ -0,0 +1,49 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BlockUtils; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.LongBytesRefTupleBlockSourceOperator; +import org.elasticsearch.compute.operator.SourceOperator; +import org.elasticsearch.core.Tuple; + +import java.util.List; +import java.util.stream.IntStream; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; + +public abstract class AbstractTopBytesRefGroupingAggregatorFunctionTests extends GroupingAggregatorFunctionTestCase { + static final int LIMIT = 100; + + @Override + protected final SourceOperator simpleInput(BlockFactory blockFactory, int size) { + return new LongBytesRefTupleBlockSourceOperator( + blockFactory, + IntStream.range(0, size).mapToObj(l -> Tuple.tuple(randomLongBetween(0, 4), randomValue())) + ); + } + + protected abstract BytesRef randomValue(); + + @Override + protected final void assertSimpleGroup(List input, Block result, int position, Long group) { + Object[] values = input.stream().flatMap(b -> allBytesRefs(b, group)).sorted().limit(LIMIT).toArray(Object[]::new); + if (values.length == 0) { + assertThat(result.isNull(position), equalTo(true)); + } else if (values.length == 1) { + assertThat(BlockUtils.toJavaObject(result, position), equalTo(values[0])); + } else { + assertThat((List) BlockUtils.toJavaObject(result, position), contains(values)); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionTests.java new file mode 100644 index 0000000000000..732229c98f9c7 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefAggregatorFunctionTests.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation; + +import org.apache.lucene.util.BytesRef; + +import java.util.List; + +public class TopBytesRefAggregatorFunctionTests extends AbstractTopBytesRefAggregatorFunctionTests { + @Override + protected BytesRef randomValue() { + return new BytesRef(randomAlphaOfLength(10)); + } + + @Override + protected AggregatorFunctionSupplier aggregatorFunction(List inputChannels) { + return new TopBytesRefAggregatorFunctionSupplier(inputChannels, LIMIT, true); + } + + @Override + protected String expectedDescriptionOfAggregator() { + return "top of bytes"; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunctionTests.java new file mode 100644 index 0000000000000..4932e1abef46d --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopBytesRefGroupingAggregatorFunctionTests.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.xpack.esql.core.type.DataType; + +import java.util.List; + +public class TopBytesRefGroupingAggregatorFunctionTests extends AbstractTopBytesRefGroupingAggregatorFunctionTests { + @Override + protected BytesRef randomValue() { + return new BytesRef(randomAlphaOfLength(6)); + } + + @Override + protected final AggregatorFunctionSupplier aggregatorFunction(List inputChannels) { + return new TopBytesRefAggregatorFunctionSupplier(inputChannels, LIMIT, true); + } + + @Override + protected DataType acceptedDataType() { + return DataType.KEYWORD; + } + + @Override + protected String expectedDescriptionOfAggregator() { + return "top of bytes"; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpAggregatorFunctionTests.java index 1594f66ed9fe2..840e4cf9af961 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpAggregatorFunctionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpAggregatorFunctionTests.java @@ -9,26 +9,13 @@ import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.compute.data.Block; -import org.elasticsearch.compute.data.BlockFactory; -import org.elasticsearch.compute.data.BlockUtils; -import org.elasticsearch.compute.operator.SequenceBytesRefBlockSourceOperator; -import org.elasticsearch.compute.operator.SourceOperator; import java.util.List; -import java.util.stream.IntStream; - -import static org.hamcrest.Matchers.contains; - -public class TopIpAggregatorFunctionTests extends AggregatorFunctionTestCase { - private static final int LIMIT = 100; +public class TopIpAggregatorFunctionTests extends AbstractTopBytesRefAggregatorFunctionTests { @Override - protected SourceOperator simpleInput(BlockFactory blockFactory, int size) { - return new SequenceBytesRefBlockSourceOperator( - blockFactory, - IntStream.range(0, size).mapToObj(l -> new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean())))) - ); + protected BytesRef randomValue() { + return new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean()))); } @Override @@ -40,10 +27,4 @@ protected AggregatorFunctionSupplier aggregatorFunction(List inputChann protected String expectedDescriptionOfAggregator() { return "top of ips"; } - - @Override - public void assertSimpleOutput(List input, Block result) { - Object[] values = input.stream().flatMap(b -> allBytesRefs(b)).sorted().limit(LIMIT).toArray(Object[]::new); - assertThat((List) BlockUtils.toJavaObject(result, 0), contains(values)); - } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpGroupingAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpGroupingAggregatorFunctionTests.java index da55ff2d7aab3..02bf6b667192b 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpGroupingAggregatorFunctionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/TopIpGroupingAggregatorFunctionTests.java @@ -9,36 +9,14 @@ import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.compute.data.Block; -import org.elasticsearch.compute.data.BlockFactory; -import org.elasticsearch.compute.data.BlockUtils; -import org.elasticsearch.compute.data.Page; -import org.elasticsearch.compute.operator.LongBytesRefTupleBlockSourceOperator; -import org.elasticsearch.compute.operator.SourceOperator; -import org.elasticsearch.core.Tuple; import org.elasticsearch.xpack.esql.core.type.DataType; import java.util.List; -import java.util.stream.IntStream; - -import static org.hamcrest.Matchers.contains; -import static org.hamcrest.Matchers.equalTo; - -public class TopIpGroupingAggregatorFunctionTests extends GroupingAggregatorFunctionTestCase { - private static final int LIMIT = 100; +public class TopIpGroupingAggregatorFunctionTests extends AbstractTopBytesRefGroupingAggregatorFunctionTests { @Override - protected SourceOperator simpleInput(BlockFactory blockFactory, int size) { - return new LongBytesRefTupleBlockSourceOperator( - blockFactory, - IntStream.range(0, size) - .mapToObj(l -> Tuple.tuple(randomLongBetween(0, 4), new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean()))))) - ); - } - - @Override - protected DataType acceptedDataType() { - return DataType.IP; + protected BytesRef randomValue() { + return new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean()))); } @Override @@ -47,19 +25,12 @@ protected AggregatorFunctionSupplier aggregatorFunction(List inputChann } @Override - protected String expectedDescriptionOfAggregator() { - return "top of ips"; + protected DataType acceptedDataType() { + return DataType.IP; } @Override - protected void assertSimpleGroup(List input, Block result, int position, Long group) { - Object[] values = input.stream().flatMap(b -> allBytesRefs(b, group)).sorted().limit(LIMIT).toArray(Object[]::new); - if (values.length == 0) { - assertThat(result.isNull(position), equalTo(true)); - } else if (values.length == 1) { - assertThat(BlockUtils.toJavaObject(result, position), equalTo(values[0])); - } else { - assertThat((List) BlockUtils.toJavaObject(result, position), contains(values)); - } + protected String expectedDescriptionOfAggregator() { + return "top of ips"; } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSortTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSortTests.java new file mode 100644 index 0000000000000..7a4e6658cd646 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/sort/BytesRefBucketedSortTests.java @@ -0,0 +1,79 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data.sort; + +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.search.sort.SortOrder; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; + +public class BytesRefBucketedSortTests extends BucketedSortTestCase { + @Override + protected BytesRefBucketedSort build(SortOrder sortOrder, int bucketSize) { + BigArrays bigArrays = bigArrays(); + return new BytesRefBucketedSort( + bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST), + "test", + bigArrays, + sortOrder, + bucketSize + ); + } + + @Override + protected BytesRef randomValue() { + return new BytesRef(InetAddressPoint.encode(randomIp(randomBoolean()))); + } + + @Override + protected List threeSortedValues() { + List values = new ArrayList<>(); + values.add(new BytesRef(randomAlphaOfLength(10))); + values.add(new BytesRef(randomAlphaOfLength(11))); + values.add(new BytesRef(randomAlphaOfLength(1))); + Collections.sort(values); + return values; + } + + @Override + protected void collect(BytesRefBucketedSort sort, BytesRef value, int bucket) { + sort.collect(value, bucket); + } + + @Override + protected void merge(BytesRefBucketedSort sort, int groupId, BytesRefBucketedSort other, int otherGroupId) { + sort.merge(groupId, other, otherGroupId); + } + + @Override + protected Block toBlock(BytesRefBucketedSort sort, BlockFactory blockFactory, IntVector selected) { + return sort.toBlock(blockFactory, selected); + } + + @Override + protected void assertBlockTypeAndValues(Block block, List values) { + assertThat(block.elementType(), equalTo(ElementType.BYTES_REF)); + var typedBlock = (BytesRefBlock) block; + var scratch = new BytesRef(); + for (int i = 0; i < values.size(); i++) { + assertThat("expected value on block position " + i, typedBlock.getBytesRef(i, scratch), equalTo(values.get(i))); + } + } +} diff --git a/x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java b/x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java index d0d6d5fa49c42..08b4794b740d6 100644 --- a/x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java +++ b/x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java @@ -72,6 +72,10 @@ public MixedClusterEsqlSpecIT( protected void shouldSkipTest(String testName) throws IOException { super.shouldSkipTest(testName); assumeTrue("Test " + testName + " is skipped on " + bwcVersion, isEnabled(testName, instructions, bwcVersion)); + assumeFalse( + "Skip META tests on mixed version clusters because we change it too quickly", + testCase.requiredCapabilities.contains("meta") + ); if (mode == ASYNC) { assumeTrue("Async is not supported on " + bwcVersion, supportsAsync()); } diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java index 3e799730f7269..8d54dc63598f0 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java @@ -112,6 +112,10 @@ protected void shouldSkipTest(String testName) throws IOException { ); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains("inlinestats")); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains("inlinestats_v2")); + assumeFalse( + "Skip META tests on mixed version clusters because we change it too quickly", + testCase.requiredCapabilities.contains("meta") + ); } private TestFeatureService remoteFeaturesService() throws IOException { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec index 6909f0aeb42f5..2b3fa9dec797d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/meta.csv-spec @@ -1,5 +1,7 @@ -metaFunctionsSynopsis#[skip:-8.15.99] +metaFunctionsSynopsis required_capability: date_nanos_type +required_capability: meta + meta functions | keep synopsis; synopsis:keyword @@ -118,14 +120,16 @@ double tau() "keyword|text to_upper(str:keyword|text)" "version to_ver(field:keyword|text|version)" "version to_version(field:keyword|text|version)" -"boolean|double|integer|long|date|ip top(field:boolean|double|integer|long|date|ip, limit:integer, order:keyword)" +"boolean|double|integer|long|date|ip|keyword|text top(field:boolean|double|integer|long|date|ip|keyword|text, limit:integer, order:keyword)" "keyword|text trim(string:keyword|text)" "boolean|date|double|integer|ip|keyword|long|text|version values(field:boolean|date|double|integer|ip|keyword|long|text|version)" "double weighted_avg(number:double|integer|long, weight:double|integer|long)" ; -metaFunctionsArgs#[skip:-8.15.99] +metaFunctionsArgs +required_capability: meta required_capability: date_nanos_type + META functions | EVAL name = SUBSTRING(name, 0, 14) | KEEP name, argNames, argTypes, argDescriptions; @@ -246,13 +250,15 @@ to_unsigned_lo|field |"boolean|date|keyword|text|d to_upper |str |"keyword|text" |String expression. If `null`, the function returns `null`. to_ver |field |"keyword|text|version" |Input value. The input can be a single- or multi-valued column or an expression. to_version |field |"keyword|text|version" |Input value. The input can be a single- or multi-valued column or an expression. -top |[field, limit, order] |["boolean|double|integer|long|date|ip", integer, keyword] |[The field to collect the top values for.,The maximum number of values to collect.,The order to calculate the top values. Either `asc` or `desc`.] +top |[field, limit, order] |["boolean|double|integer|long|date|ip|keyword|text", integer, keyword] |[The field to collect the top values for.,The maximum number of values to collect.,The order to calculate the top values. Either `asc` or `desc`.] trim |string |"keyword|text" |String expression. If `null`, the function returns `null`. values |field |"boolean|date|double|integer|ip|keyword|long|text|version" |[""] weighted_avg |[number, weight] |["double|integer|long", "double|integer|long"] |[A numeric value., A numeric weight.] ; -metaFunctionsDescription#[skip:-8.15.99] +metaFunctionsDescription +required_capability: meta + META functions | EVAL name = SUBSTRING(name, 0, 14) | KEEP name, description @@ -380,8 +386,10 @@ values |Returns all values in a group as a multivalued field. The order o weighted_avg |The weighted average of a numeric expression. ; -metaFunctionsRemaining#[skip:-8.15.99] +metaFunctionsRemaining +required_capability: meta required_capability: date_nanos_type + META functions | EVAL name = SUBSTRING(name, 0, 14) | KEEP name, * @@ -504,13 +512,15 @@ to_unsigned_lo|unsigned_long to_upper |"keyword|text" |false |false |false to_ver |version |false |false |false to_version |version |false |false |false -top |"boolean|double|integer|long|date|ip" |[false, false, false] |false |true +top |"boolean|double|integer|long|date|ip|keyword|text" |[false, false, false] |false |true trim |"keyword|text" |false |false |false values |"boolean|date|double|integer|ip|keyword|long|text|version" |false |false |true weighted_avg |"double" |[false, false] |false |true ; -metaFunctionsFiltered#[skip:-8.15.99] +metaFunctionsFiltered +required_capability: meta + META FUNCTIONS | WHERE STARTS_WITH(name, "sin") ; @@ -520,7 +530,9 @@ sin |"double sin(angle:double|integer|long|unsigned_long)" |angle sinh |"double sinh(number:double|integer|long|unsigned_long)" |number |"double|integer|long|unsigned_long" | "Numeric expression. If `null`, the function returns `null`." | double | "Returns the {wikipedia}/Hyperbolic_functions[hyperbolic sine] of a number." | false | false | false ; -countFunctions#[skip:-8.15.99] +countFunctions +required_capability: meta + meta functions | stats a = count(*), b = count(*), c = count(*) | mv_expand c; a:long | b:long | c:long diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec index 86f91adf506d1..80d11425c5bb6 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats_top.csv-spec @@ -224,3 +224,77 @@ a:ip | b:ip | c:ip | host:keyword [fe82::cae2:65ff:fece:fec0, fe81::cae2:65ff:fece:feb9] | [fe82::cae2:65ff:fece:fec0, fe81::cae2:65ff:fece:feb9] | [fe82::cae2:65ff:fece:fec0, fe81::cae2:65ff:fece:feb9] | epsilon [fe80::cae2:65ff:fece:feb9, fe80::cae2:65ff:fece:feb9] | [fe80::cae2:65ff:fece:feb9, fe80::cae2:65ff:fece:feb9] | [fe81::cae2:65ff:fece:feb9, 127.0.0.3] | gamma ; + +topKeywords +required_capability: agg_top +required_capability: agg_top_string_support + +FROM employees +| EVAL calc = SUBSTRING(last_name, 2) +| STATS + first_name = TOP(first_name, 3, "asc"), + last_name = TOP(calc, 3, "asc"), + evil = TOP(CASE(languages <= 2, first_name, last_name), 3, "desc"); + + first_name:keyword | last_name:keyword | evil:keyword +[Alejandro, Amabile, Anneke] | [acello, addadi, aek] | [Zschoche, Zielinski, Zhongwei] +; + +topKeywordsGrouping +required_capability: agg_top +required_capability: agg_top_string_support + +FROM employees +| EVAL calc = SUBSTRING(last_name, 2) +| STATS + first_name = TOP(first_name, 3, "asc"), + last_name = TOP(calc, 3, "asc"), + evil = TOP(CASE(languages <= 2, first_name, last_name), 3, "desc") + BY job_positions +| SORT job_positions +| LIMIT 3; + + first_name:keyword | last_name:keyword | evil:keyword | job_positions:keyword + [Arumugam, Bojan, Domenick] | [acello, aine, akrucki] | [Zhongwei, Yinghua, Valdiodio] | Accountant +[Alejandro, Charlene, Danel] | [andell, cAlpine, eistad] | [Stamatiou, Sluis, Sidou] | Architect + [Basil, Breannda, Hidefumi] | [aine, alabarba, ierman] | [Tramer, Syrzycki, Stamatiou] | Business Analyst +; + +topText +required_capability: agg_top +required_capability: agg_top_string_support +# we don't need MATCH, but the loader for books.csv is busted in CsvTests +required_capability: match_operator + +FROM books +| EVAL calc = TRIM(SUBSTRING(title, 2, 5)) +| STATS + title = TOP(title, 3, "desc"), + calc = TOP(calc, 3, "asc"), + evil = TOP(CASE(year < 1980, title, author), 3, "desc"); + +title:text | calc:keyword | evil:text +[Worlds of Exile and Illusion: Three Complete Novels of the Hainish Series in One Volume--Rocannon's World, Planet of Exile, City of Illusions, Woman-The Full Story: A Dynamic Celebration of Freedoms, Winter notes on summer impressions] | ["'Bria", "Gent", "HE UN"] | [William Faulkner, William Faulkner, William Faulkner] +; + +topTextGrouping +required_capability: agg_top +required_capability: agg_top_string_support +# we don't need MATCH, but the loader for books.csv is busted in CsvTests +required_capability: match_operator + +FROM books +| EVAL calc = TRIM(SUBSTRING(title, 2, 5)) +| STATS + title = TOP(title, 3, "desc"), + calc = TOP(calc, 3, "asc"), + evil = TOP(CASE(year < 1980, title, author), 3, "desc") + BY author +| SORT author +| LIMIT 3; + + title:text | calc:keyword | evil:text | author:text + A Tolkien Compass: Including J. R. R. Tolkien's Guide to the Names in The Lord of the Rings | Tolk | A Tolkien Compass: Including J. R. R. Tolkien's Guide to the Names in The Lord of the Rings | Agnes Perkins + The Lord of the Rings Poster Collection: Six Paintings by Alan Lee (No. 1) | he Lo | [J. R. R. Tolkien, Alan Lee] | Alan Lee +A Gentle Creature and Other Stories: White Nights, A Gentle Creature, and The Dream of a Ridiculous Man (The World's Classics) | Gent | [W. J. Leatherbarrow, Fyodor Dostoevsky, Alan Myers] | Alan Myers +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 597c349273eb2..31a3096c13cd2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -97,6 +97,11 @@ public enum Cap { */ AGG_TOP_IP_SUPPORT, + /** + * Support for {@code keyword} and {@code text} fields in {@code TOP} aggregation. + */ + AGG_TOP_STRING_SUPPORT, + /** * {@code CASE} properly handling multivalue conditions. */ @@ -251,6 +256,13 @@ public enum Cap { */ MATCH_OPERATOR(true), + /** + * Support for the {@code META} keyword. Tests with this tag are + * intentionally excluded from mixed version clusters because we + * continually add functions, so they constantly fail if we don't. + */ + META, + /** * Add CombineBinaryComparisons rule. */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java index 4927acc3e1cd9..cb1b0f0cad895 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Top.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.compute.aggregation.AggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.TopBooleanAggregatorFunctionSupplier; +import org.elasticsearch.compute.aggregation.TopBytesRefAggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.TopDoubleAggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.TopIntAggregatorFunctionSupplier; import org.elasticsearch.compute.aggregation.TopIpAggregatorFunctionSupplier; @@ -48,7 +49,7 @@ public class Top extends AggregateFunction implements ToAggregator, SurrogateExp private static final String ORDER_DESC = "DESC"; @FunctionInfo( - returnType = { "boolean", "double", "integer", "long", "date", "ip" }, + returnType = { "boolean", "double", "integer", "long", "date", "ip", "keyword", "text" }, description = "Collects the top values for a field. Includes repeated values.", isAggregation = true, examples = @Example(file = "stats_top", tag = "top") @@ -57,7 +58,7 @@ public Top( Source source, @Param( name = "field", - type = { "boolean", "double", "integer", "long", "date", "ip" }, + type = { "boolean", "double", "integer", "long", "date", "ip", "keyword", "text" }, description = "The field to collect the top values for." ) Expression field, @Param(name = "limit", type = { "integer" }, description = "The maximum number of values to collect.") Expression limit, @@ -125,12 +126,14 @@ protected TypeResolution resolveType() { dt -> dt == DataType.BOOLEAN || dt == DataType.DATETIME || dt == DataType.IP + || DataType.isString(dt) || (dt.isNumeric() && dt != DataType.UNSIGNED_LONG), sourceText(), FIRST, "boolean", "date", "ip", + "string", "numeric except unsigned_long or counter types" ).and(isNotNullAndFoldable(limitField(), sourceText(), SECOND)) .and(isType(limitField(), dt -> dt == DataType.INTEGER, sourceText(), SECOND, "integer")) @@ -190,6 +193,9 @@ public AggregatorFunctionSupplier supplier(List inputChannels) { if (type == DataType.IP) { return new TopIpAggregatorFunctionSupplier(inputChannels, limitValue(), orderValue()); } + if (DataType.isString(type)) { + return new TopBytesRefAggregatorFunctionSupplier(inputChannels, limitValue(), orderValue()); + } throw EsqlIllegalArgumentException.illegalDataType(type); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java index 60bf4be1d2b03..13ce9ba77cc71 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java @@ -170,7 +170,7 @@ private static Stream, Tuple>> typeAndNames(Class // TODO can't we figure this out from the function itself? types = List.of("Int", "Long", "Double", "Boolean", "BytesRef"); } else if (Top.class.isAssignableFrom(clazz)) { - types = List.of("Boolean", "Int", "Long", "Double", "Ip"); + types = List.of("Boolean", "Int", "Long", "Double", "Ip", "BytesRef"); } else if (Rate.class.isAssignableFrom(clazz)) { types = List.of("Int", "Long", "Double"); } else if (FromPartial.class.isAssignableFrom(clazz) || ToPartial.class.isAssignableFrom(clazz)) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/TopTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/TopTests.java index f64d6a200a031..f7bf338caa099 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/TopTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/aggregate/TopTests.java @@ -46,7 +46,9 @@ public static Iterable parameters() { MultiRowTestCaseSupplier.doubleCases(1, 1000, -Double.MAX_VALUE, Double.MAX_VALUE, true), MultiRowTestCaseSupplier.dateCases(1, 1000), MultiRowTestCaseSupplier.booleanCases(1, 1000), - MultiRowTestCaseSupplier.ipCases(1, 1000) + MultiRowTestCaseSupplier.ipCases(1, 1000), + MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.KEYWORD), + MultiRowTestCaseSupplier.stringCases(1, 1000, DataType.TEXT) ) .flatMap(List::stream) .map(fieldCaseSupplier -> TopTests.makeSupplier(fieldCaseSupplier, limitCaseSupplier, order)) From 37ebafdc11172e00c745c0194c70fd4289be8fd2 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 25 Sep 2024 20:51:03 +0100 Subject: [PATCH 53/58] Remove `{Indices,}ClusterStateUpdateRequest` (#113483) (#113506) These abstract classes are now unused so this commit removes them. --- server/src/main/java/module-info.java | 1 - .../ack/ClusterStateUpdateRequest.java | 55 ------------------- .../ack/IndicesClusterStateUpdateRequest.java | 35 ------------ 3 files changed, 91 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/cluster/ack/ClusterStateUpdateRequest.java delete mode 100644 server/src/main/java/org/elasticsearch/cluster/ack/IndicesClusterStateUpdateRequest.java diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 696624a4a8f27..f695b0d20854a 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -160,7 +160,6 @@ exports org.elasticsearch.client.internal.support; exports org.elasticsearch.client.internal.transport; exports org.elasticsearch.cluster; - exports org.elasticsearch.cluster.ack; exports org.elasticsearch.cluster.action.index; exports org.elasticsearch.cluster.action.shard; exports org.elasticsearch.cluster.block; diff --git a/server/src/main/java/org/elasticsearch/cluster/ack/ClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/cluster/ack/ClusterStateUpdateRequest.java deleted file mode 100644 index 8841b315b0138..0000000000000 --- a/server/src/main/java/org/elasticsearch/cluster/ack/ClusterStateUpdateRequest.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.cluster.ack; - -import org.elasticsearch.core.TimeValue; - -/** - * Base class to be used when needing to update the cluster state - * Contains the basic fields that are always needed - */ -public abstract class ClusterStateUpdateRequest> { - - private TimeValue ackTimeout; - private TimeValue masterNodeTimeout; - - /** - * Returns the maximum time interval to wait for acknowledgements - */ - public TimeValue ackTimeout() { - return ackTimeout; - } - - /** - * Sets the acknowledgement timeout - */ - @SuppressWarnings("unchecked") - public T ackTimeout(TimeValue ackTimeout) { - this.ackTimeout = ackTimeout; - return (T) this; - } - - /** - * Returns the maximum time interval to wait for the request to - * be completed on the master node - */ - public TimeValue masterNodeTimeout() { - return masterNodeTimeout; - } - - /** - * Sets the master node timeout - */ - @SuppressWarnings("unchecked") - public T masterNodeTimeout(TimeValue masterNodeTimeout) { - this.masterNodeTimeout = masterNodeTimeout; - return (T) this; - } -} diff --git a/server/src/main/java/org/elasticsearch/cluster/ack/IndicesClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/cluster/ack/IndicesClusterStateUpdateRequest.java deleted file mode 100644 index b1a52d50fd544..0000000000000 --- a/server/src/main/java/org/elasticsearch/cluster/ack/IndicesClusterStateUpdateRequest.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ -package org.elasticsearch.cluster.ack; - -import org.elasticsearch.index.Index; - -/** - * Base cluster state update request that allows to execute update against multiple indices - */ -public abstract class IndicesClusterStateUpdateRequest> extends ClusterStateUpdateRequest { - - private Index[] indices; - - /** - * Returns the indices the operation needs to be executed on - */ - public Index[] indices() { - return indices; - } - - /** - * Sets the indices the operation needs to be executed on - */ - @SuppressWarnings("unchecked") - public T indices(Index[] indices) { - this.indices = indices; - return (T) this; - } -} From c644dbb79787a6fe610401be773c2a8c8415a49f Mon Sep 17 00:00:00 2001 From: Pat Whelan Date: Wed, 25 Sep 2024 16:09:30 -0400 Subject: [PATCH 54/58] [ML] Move InferenceInputs up a level (#112726) (#113564) Refactor before streaming support is added - moving InferenceInputs up a level so that construction happens at the top level rather than each individual implementation. UnsupportedOperationException will now be thrown as an IllegalStateException later in the call chain, both would go through the listener's onFailure method anyway. Backport of https://github.com/elastic/elasticsearch/commit/6c1aaa48e73fc63f587fe1efffdd9d5c35108cc5 --- .../inference/services/SenderService.java | 25 +++++------- .../AlibabaCloudSearchService.java | 38 +++++-------------- .../amazonbedrock/AmazonBedrockService.java | 23 +++-------- .../services/anthropic/AnthropicService.java | 21 ++-------- .../azureaistudio/AzureAiStudioService.java | 28 +++++--------- .../azureopenai/AzureOpenAiService.java | 28 +++++--------- .../services/cohere/CohereService.java | 33 +++------------- .../elastic/ElasticInferenceService.java | 29 ++++---------- .../googleaistudio/GoogleAiStudioService.java | 28 +++++--------- .../googlevertexai/GoogleVertexAiService.java | 38 +++++-------------- .../huggingface/HuggingFaceBaseService.java | 20 ++-------- .../huggingface/HuggingFaceService.java | 10 +++-- .../elser/HuggingFaceElserService.java | 14 +++---- .../ibmwatsonx/IbmWatsonxService.java | 28 +++++--------- .../services/mistral/MistralService.java | 23 +++-------- .../services/openai/OpenAiService.java | 28 +++++--------- .../services/SenderServiceTests.java | 21 ++-------- .../AlibabaCloudSearchServiceTests.java | 3 +- .../AzureAiStudioServiceTests.java | 32 ---------------- .../elastic/ElasticInferenceServiceTests.java | 30 --------------- .../services/mistral/MistralServiceTests.java | 27 ------------- 21 files changed, 121 insertions(+), 406 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java index 1c64f505402d8..ad0c44714041f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java @@ -17,7 +17,10 @@ import org.elasticsearch.inference.InferenceServiceResults; import org.elasticsearch.inference.InputType; import org.elasticsearch.inference.Model; +import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; +import org.elasticsearch.xpack.inference.external.http.sender.QueryAndDocsInputs; import org.elasticsearch.xpack.inference.external.http.sender.Sender; import java.io.IOException; @@ -55,9 +58,9 @@ public void infer( ) { init(); if (query != null) { - doInfer(model, query, input, taskSettings, inputType, timeout, listener); + doInfer(model, new QueryAndDocsInputs(query, input), taskSettings, inputType, timeout, listener); } else { - doInfer(model, input, taskSettings, inputType, timeout, listener); + doInfer(model, new DocumentsOnlyInput(input), taskSettings, inputType, timeout, listener); } } @@ -86,22 +89,13 @@ public void chunkedInfer( ActionListener> listener ) { init(); - doChunkedInfer(model, null, input, taskSettings, inputType, chunkingOptions, timeout, listener); + // a non-null query is not supported and is dropped by all providers + doChunkedInfer(model, new DocumentsOnlyInput(input), taskSettings, inputType, chunkingOptions, timeout, listener); } protected abstract void doInfer( Model model, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ); - - protected abstract void doInfer( - Model model, - String query, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -110,8 +104,7 @@ protected abstract void doInfer( protected abstract void doChunkedInfer( Model model, - @Nullable String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchService.java index 7bb0fb86effc2..8f0c9896c6642 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchService.java @@ -28,7 +28,7 @@ import org.elasticsearch.xpack.inference.external.action.alibabacloudsearch.AlibabaCloudSearchActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; -import org.elasticsearch.xpack.inference.external.http.sender.QueryAndDocsInputs; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.external.request.alibabacloudsearch.AlibabaCloudSearchUtils; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; @@ -204,8 +204,7 @@ public AlibabaCloudSearchModel parsePersistedConfig(String inferenceEntityId, Ta @Override public void doInfer( Model model, - String query, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -220,35 +219,13 @@ public void doInfer( var actionCreator = new AlibabaCloudSearchActionCreator(getSender(), getServiceComponents()); var action = alibabaCloudSearchModel.accept(actionCreator, taskSettings, inputType); - action.execute(new QueryAndDocsInputs(query, input), timeout, listener); - } - - @Override - public void doInfer( - Model model, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - if (model instanceof AlibabaCloudSearchModel == false) { - listener.onFailure(createInvalidModelException(model)); - return; - } - - AlibabaCloudSearchModel alibabaCloudSearchModel = (AlibabaCloudSearchModel) model; - var actionCreator = new AlibabaCloudSearchActionCreator(getSender(), getServiceComponents()); - - var action = alibabaCloudSearchModel.accept(actionCreator, taskSettings, inputType); - action.execute(new DocumentsOnlyInput(input), timeout, listener); + action.execute(inputs, timeout, listener); } @Override protected void doChunkedInfer( Model model, - @Nullable String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -263,8 +240,11 @@ protected void doChunkedInfer( AlibabaCloudSearchModel alibabaCloudSearchModel = (AlibabaCloudSearchModel) model; var actionCreator = new AlibabaCloudSearchActionCreator(getSender(), getServiceComponents()); - var batchedRequests = new EmbeddingRequestChunker(input, EMBEDDING_MAX_BATCH_SIZE, EmbeddingRequestChunker.EmbeddingType.FLOAT) - .batchRequestsWithListeners(listener); + var batchedRequests = new EmbeddingRequestChunker( + inputs.getInputs(), + EMBEDDING_MAX_BATCH_SIZE, + EmbeddingRequestChunker.EmbeddingType.FLOAT + ).batchRequestsWithListeners(listener); for (var request : batchedRequests) { var action = alibabaCloudSearchModel.accept(actionCreator, taskSettings, inputType); action.execute(new DocumentsOnlyInput(request.batch().inputs()), timeout, request.listener()); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java index d12929eecb88e..c00932a169c24 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java @@ -28,6 +28,7 @@ import org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockRequestSender; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.external.http.sender.Sender; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; @@ -71,7 +72,7 @@ public AmazonBedrockService( @Override protected void doInfer( Model model, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -80,30 +81,16 @@ protected void doInfer( var actionCreator = new AmazonBedrockActionCreator(amazonBedrockSender, this.getServiceComponents(), timeout); if (model instanceof AmazonBedrockModel baseAmazonBedrockModel) { var action = baseAmazonBedrockModel.accept(actionCreator, taskSettings); - action.execute(new DocumentsOnlyInput(input), timeout, listener); + action.execute(inputs, timeout, listener); } else { listener.onFailure(createInvalidModelException(model)); } } - @Override - protected void doInfer( - Model model, - String query, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - throw new UnsupportedOperationException("Amazon Bedrock service does not support inference with query input"); - } - @Override protected void doChunkedInfer( Model model, - String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -113,7 +100,7 @@ protected void doChunkedInfer( var actionCreator = new AmazonBedrockActionCreator(amazonBedrockSender, this.getServiceComponents(), timeout); if (model instanceof AmazonBedrockModel baseAmazonBedrockModel) { var maxBatchSize = getEmbeddingsMaxBatchSize(baseAmazonBedrockModel.provider()); - var batchedRequests = new EmbeddingRequestChunker(input, maxBatchSize, EmbeddingRequestChunker.EmbeddingType.FLOAT) + var batchedRequests = new EmbeddingRequestChunker(inputs.getInputs(), maxBatchSize, EmbeddingRequestChunker.EmbeddingType.FLOAT) .batchRequestsWithListeners(listener); for (var request : batchedRequests) { var action = baseAmazonBedrockModel.accept(actionCreator, taskSettings); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/anthropic/AnthropicService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/anthropic/AnthropicService.java index d1db6f260351b..d7b945cd709fc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/anthropic/AnthropicService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/anthropic/AnthropicService.java @@ -25,6 +25,7 @@ import org.elasticsearch.xpack.inference.external.action.anthropic.AnthropicActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; import org.elasticsearch.xpack.inference.services.ServiceComponents; @@ -165,7 +166,7 @@ public AnthropicModel parsePersistedConfig(String inferenceEntityId, TaskType ta @Override public void doInfer( Model model, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -180,27 +181,13 @@ public void doInfer( var actionCreator = new AnthropicActionCreator(getSender(), getServiceComponents()); var action = anthropicModel.accept(actionCreator, taskSettings); - action.execute(new DocumentsOnlyInput(input), timeout, listener); - } - - @Override - protected void doInfer( - Model model, - String query, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - throw new UnsupportedOperationException("Anthropic service does not support inference with query input"); + action.execute(inputs, timeout, listener); } @Override protected void doChunkedInfer( Model model, - @Nullable String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioService.java index c4ef5faf8e667..bd648250a509b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioService.java @@ -28,6 +28,7 @@ import org.elasticsearch.xpack.inference.external.action.azureaistudio.AzureAiStudioActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; import org.elasticsearch.xpack.inference.services.ServiceComponents; @@ -62,7 +63,7 @@ public AzureAiStudioService(HttpRequestSender.Factory factory, ServiceComponents @Override protected void doInfer( Model model, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -72,30 +73,16 @@ protected void doInfer( if (model instanceof AzureAiStudioModel baseAzureAiStudioModel) { var action = baseAzureAiStudioModel.accept(actionCreator, taskSettings); - action.execute(new DocumentsOnlyInput(input), timeout, listener); + action.execute(inputs, timeout, listener); } else { listener.onFailure(createInvalidModelException(model)); } } - @Override - protected void doInfer( - Model model, - String query, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - throw new UnsupportedOperationException("Azure AI Studio service does not support inference with query input"); - } - @Override protected void doChunkedInfer( Model model, - String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -104,8 +91,11 @@ protected void doChunkedInfer( ) { if (model instanceof AzureAiStudioModel baseAzureAiStudioModel) { var actionCreator = new AzureAiStudioActionCreator(getSender(), getServiceComponents()); - var batchedRequests = new EmbeddingRequestChunker(input, EMBEDDING_MAX_BATCH_SIZE, EmbeddingRequestChunker.EmbeddingType.FLOAT) - .batchRequestsWithListeners(listener); + var batchedRequests = new EmbeddingRequestChunker( + inputs.getInputs(), + EMBEDDING_MAX_BATCH_SIZE, + EmbeddingRequestChunker.EmbeddingType.FLOAT + ).batchRequestsWithListeners(listener); for (var request : batchedRequests) { var action = baseAzureAiStudioModel.accept(actionCreator, taskSettings); action.execute(new DocumentsOnlyInput(request.batch().inputs()), timeout, request.listener()); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java index 3c75243770f97..e22500cc9dad7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java @@ -28,6 +28,7 @@ import org.elasticsearch.xpack.inference.external.action.azureopenai.AzureOpenAiActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; import org.elasticsearch.xpack.inference.services.ServiceComponents; @@ -185,7 +186,7 @@ public AzureOpenAiModel parsePersistedConfig(String inferenceEntityId, TaskType @Override protected void doInfer( Model model, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -200,27 +201,13 @@ protected void doInfer( var actionCreator = new AzureOpenAiActionCreator(getSender(), getServiceComponents()); var action = azureOpenAiModel.accept(actionCreator, taskSettings); - action.execute(new DocumentsOnlyInput(input), timeout, listener); - } - - @Override - protected void doInfer( - Model model, - String query, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - throw new UnsupportedOperationException("Azure OpenAI service does not support inference with query input"); + action.execute(inputs, timeout, listener); } @Override protected void doChunkedInfer( Model model, - String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -233,8 +220,11 @@ protected void doChunkedInfer( } AzureOpenAiModel azureOpenAiModel = (AzureOpenAiModel) model; var actionCreator = new AzureOpenAiActionCreator(getSender(), getServiceComponents()); - var batchedRequests = new EmbeddingRequestChunker(input, EMBEDDING_MAX_BATCH_SIZE, EmbeddingRequestChunker.EmbeddingType.FLOAT) - .batchRequestsWithListeners(listener); + var batchedRequests = new EmbeddingRequestChunker( + inputs.getInputs(), + EMBEDDING_MAX_BATCH_SIZE, + EmbeddingRequestChunker.EmbeddingType.FLOAT + ).batchRequestsWithListeners(listener); for (var request : batchedRequests) { var action = azureOpenAiModel.accept(actionCreator, taskSettings); action.execute(new DocumentsOnlyInput(request.batch().inputs()), timeout, request.listener()); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/CohereService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/CohereService.java index 2feb1428c4508..27f8fdf3a029a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/CohereService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/CohereService.java @@ -27,7 +27,7 @@ import org.elasticsearch.xpack.inference.external.action.cohere.CohereActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; -import org.elasticsearch.xpack.inference.external.http.sender.QueryAndDocsInputs; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; import org.elasticsearch.xpack.inference.services.ServiceComponents; @@ -188,8 +188,7 @@ public CohereModel parsePersistedConfig(String inferenceEntityId, TaskType taskT @Override public void doInfer( Model model, - String query, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -204,35 +203,13 @@ public void doInfer( var actionCreator = new CohereActionCreator(getSender(), getServiceComponents()); var action = cohereModel.accept(actionCreator, taskSettings, inputType); - action.execute(new QueryAndDocsInputs(query, input), timeout, listener); - } - - @Override - public void doInfer( - Model model, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - if (model instanceof CohereModel == false) { - listener.onFailure(createInvalidModelException(model)); - return; - } - - CohereModel cohereModel = (CohereModel) model; - var actionCreator = new CohereActionCreator(getSender(), getServiceComponents()); - - var action = cohereModel.accept(actionCreator, taskSettings, inputType); - action.execute(new DocumentsOnlyInput(input), timeout, listener); + action.execute(inputs, timeout, listener); } @Override protected void doChunkedInfer( Model model, - @Nullable String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -248,7 +225,7 @@ protected void doChunkedInfer( var actionCreator = new CohereActionCreator(getSender(), getServiceComponents()); var batchedRequests = new EmbeddingRequestChunker( - input, + inputs.getInputs(), EMBEDDING_MAX_BATCH_SIZE, EmbeddingRequestChunker.EmbeddingType.fromDenseVectorElementType(model.getServiceSettings().elementType()) ).batchRequestsWithListeners(listener); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java index f77217f9c02f9..103ddd4c5c5ea 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java @@ -30,6 +30,7 @@ import org.elasticsearch.xpack.inference.external.action.elastic.ElasticInferenceServiceActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; import org.elasticsearch.xpack.inference.services.ServiceComponents; @@ -63,7 +64,7 @@ public ElasticInferenceService( @Override protected void doInfer( Model model, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -78,27 +79,13 @@ protected void doInfer( var actionCreator = new ElasticInferenceServiceActionCreator(getSender(), getServiceComponents()); var action = elasticInferenceServiceModel.accept(actionCreator, taskSettings); - action.execute(new DocumentsOnlyInput(input), timeout, listener); - } - - @Override - protected void doInfer( - Model model, - String query, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - throw new UnsupportedOperationException("Query input not supported for Elastic Inference Service"); + action.execute(inputs, timeout, listener); } @Override protected void doChunkedInfer( Model model, - String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -107,10 +94,10 @@ protected void doChunkedInfer( ) { // Pass-through without actually performing chunking (result will have a single chunk per input) ActionListener inferListener = listener.delegateFailureAndWrap( - (delegate, response) -> delegate.onResponse(translateToChunkedResults(input, response)) + (delegate, response) -> delegate.onResponse(translateToChunkedResults(inputs, response)) ); - doInfer(model, input, taskSettings, inputType, timeout, inferListener); + doInfer(model, inputs, taskSettings, inputType, timeout, inferListener); } @Override @@ -247,11 +234,11 @@ public void checkModelConfig(Model model, ActionListener listener) { } private static List translateToChunkedResults( - List inputs, + InferenceInputs inputs, InferenceServiceResults inferenceResults ) { if (inferenceResults instanceof SparseEmbeddingResults sparseEmbeddingResults) { - return InferenceChunkedSparseEmbeddingResults.listOf(inputs, sparseEmbeddingResults); + return InferenceChunkedSparseEmbeddingResults.listOf(DocumentsOnlyInput.of(inputs).getInputs(), sparseEmbeddingResults); } else if (inferenceResults instanceof ErrorInferenceResults error) { return List.of(new ErrorChunkedInferenceResults(error.getException())); } else { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioService.java index 911ccd33690d4..08eb67ca744a4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioService.java @@ -27,6 +27,7 @@ import org.elasticsearch.xpack.inference.external.action.googleaistudio.GoogleAiStudioActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; import org.elasticsearch.xpack.inference.services.ServiceComponents; @@ -215,7 +216,7 @@ private GoogleAiStudioEmbeddingsModel updateModelWithEmbeddingDetails(GoogleAiSt @Override protected void doInfer( Model model, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -230,27 +231,13 @@ protected void doInfer( var actionCreator = new GoogleAiStudioActionCreator(getSender(), getServiceComponents()); var action = googleAiStudioModel.accept(actionCreator, taskSettings, inputType); - action.execute(new DocumentsOnlyInput(input), timeout, listener); - } - - @Override - protected void doInfer( - Model model, - String query, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - throw new UnsupportedOperationException("Query input not supported for Google AI Studio"); + action.execute(inputs, timeout, listener); } @Override protected void doChunkedInfer( Model model, - String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -260,8 +247,11 @@ protected void doChunkedInfer( GoogleAiStudioModel googleAiStudioModel = (GoogleAiStudioModel) model; var actionCreator = new GoogleAiStudioActionCreator(getSender(), getServiceComponents()); - var batchedRequests = new EmbeddingRequestChunker(input, EMBEDDING_MAX_BATCH_SIZE, EmbeddingRequestChunker.EmbeddingType.FLOAT) - .batchRequestsWithListeners(listener); + var batchedRequests = new EmbeddingRequestChunker( + inputs.getInputs(), + EMBEDDING_MAX_BATCH_SIZE, + EmbeddingRequestChunker.EmbeddingType.FLOAT + ).batchRequestsWithListeners(listener); for (var request : batchedRequests) { var action = googleAiStudioModel.accept(actionCreator, taskSettings, inputType); action.execute(new DocumentsOnlyInput(request.batch().inputs()), timeout, request.listener()); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java index f7a8055a90abb..2bbf219438280 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java @@ -27,7 +27,7 @@ import org.elasticsearch.xpack.inference.external.action.googlevertexai.GoogleVertexAiActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; -import org.elasticsearch.xpack.inference.external.http.sender.QueryAndDocsInputs; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; import org.elasticsearch.xpack.inference.services.ServiceComponents; @@ -149,7 +149,7 @@ public void checkModelConfig(Model model, ActionListener listener) { @Override protected void doInfer( Model model, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -165,36 +165,13 @@ protected void doInfer( var actionCreator = new GoogleVertexAiActionCreator(getSender(), getServiceComponents()); var action = googleVertexAiModel.accept(actionCreator, taskSettings); - action.execute(new DocumentsOnlyInput(input), timeout, listener); - } - - @Override - protected void doInfer( - Model model, - String query, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - if (model instanceof GoogleVertexAiModel == false) { - listener.onFailure(createInvalidModelException(model)); - return; - } - - GoogleVertexAiModel googleVertexAiModel = (GoogleVertexAiModel) model; - var actionCreator = new GoogleVertexAiActionCreator(getSender(), getServiceComponents()); - - var action = googleVertexAiModel.accept(actionCreator, taskSettings); - action.execute(new QueryAndDocsInputs(query, input), timeout, listener); + action.execute(inputs, timeout, listener); } @Override protected void doChunkedInfer( Model model, - String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -204,8 +181,11 @@ protected void doChunkedInfer( GoogleVertexAiModel googleVertexAiModel = (GoogleVertexAiModel) model; var actionCreator = new GoogleVertexAiActionCreator(getSender(), getServiceComponents()); - var batchedRequests = new EmbeddingRequestChunker(input, EMBEDDING_MAX_BATCH_SIZE, EmbeddingRequestChunker.EmbeddingType.FLOAT) - .batchRequestsWithListeners(listener); + var batchedRequests = new EmbeddingRequestChunker( + inputs.getInputs(), + EMBEDDING_MAX_BATCH_SIZE, + EmbeddingRequestChunker.EmbeddingType.FLOAT + ).batchRequestsWithListeners(listener); for (var request : batchedRequests) { var action = googleVertexAiModel.accept(actionCreator, taskSettings); action.execute(new DocumentsOnlyInput(request.batch().inputs()), timeout, request.listener()); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceBaseService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceBaseService.java index 27947f499fa18..d129a0c44e835 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceBaseService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceBaseService.java @@ -16,13 +16,12 @@ import org.elasticsearch.inference.ModelSecrets; import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.inference.external.action.huggingface.HuggingFaceActionCreator; -import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; import org.elasticsearch.xpack.inference.services.ServiceComponents; -import java.util.List; import java.util.Map; import java.util.Set; @@ -119,7 +118,7 @@ protected abstract HuggingFaceModel createModel( @Override public void doInfer( Model model, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -134,19 +133,6 @@ public void doInfer( var actionCreator = new HuggingFaceActionCreator(getSender(), getServiceComponents()); var action = huggingFaceModel.accept(actionCreator); - action.execute(new DocumentsOnlyInput(input), timeout, listener); - } - - @Override - protected void doInfer( - Model model, - String query, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - throw new UnsupportedOperationException("Hugging Face service does not support inference with query input"); + action.execute(inputs, timeout, listener); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceService.java index 7a591f094982d..bdfa87e77b708 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceService.java @@ -98,8 +98,7 @@ private static HuggingFaceEmbeddingsModel updateModelWithEmbeddingDetails(Huggin @Override protected void doChunkedInfer( Model model, - @Nullable String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -114,8 +113,11 @@ protected void doChunkedInfer( var huggingFaceModel = (HuggingFaceModel) model; var actionCreator = new HuggingFaceActionCreator(getSender(), getServiceComponents()); - var batchedRequests = new EmbeddingRequestChunker(input, EMBEDDING_MAX_BATCH_SIZE, EmbeddingRequestChunker.EmbeddingType.FLOAT) - .batchRequestsWithListeners(listener); + var batchedRequests = new EmbeddingRequestChunker( + inputs.getInputs(), + EMBEDDING_MAX_BATCH_SIZE, + EmbeddingRequestChunker.EmbeddingType.FLOAT + ).batchRequestsWithListeners(listener); for (var request : batchedRequests) { var action = huggingFaceModel.accept(actionCreator); action.execute(new DocumentsOnlyInput(request.batch().inputs()), timeout, request.listener()); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java index ee35869c6a8d1..b9540cab17a9a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java @@ -27,6 +27,7 @@ import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults; import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; +import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.ServiceComponents; @@ -68,8 +69,7 @@ protected HuggingFaceModel createModel( @Override protected void doChunkedInfer( Model model, - @Nullable String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -77,21 +77,21 @@ protected void doChunkedInfer( ActionListener> listener ) { ActionListener inferListener = listener.delegateFailureAndWrap( - (delegate, response) -> delegate.onResponse(translateToChunkedResults(input, response)) + (delegate, response) -> delegate.onResponse(translateToChunkedResults(inputs, response)) ); // TODO chunking sparse embeddings not implemented - doInfer(model, input, taskSettings, inputType, timeout, inferListener); + doInfer(model, inputs, taskSettings, inputType, timeout, inferListener); } private static List translateToChunkedResults( - List inputs, + DocumentsOnlyInput inputs, InferenceServiceResults inferenceResults ) { if (inferenceResults instanceof InferenceTextEmbeddingFloatResults textEmbeddingResults) { - return InferenceChunkedTextEmbeddingFloatResults.listOf(inputs, textEmbeddingResults); + return InferenceChunkedTextEmbeddingFloatResults.listOf(inputs.getInputs(), textEmbeddingResults); } else if (inferenceResults instanceof SparseEmbeddingResults sparseEmbeddingResults) { - return InferenceChunkedSparseEmbeddingResults.listOf(inputs, sparseEmbeddingResults); + return InferenceChunkedSparseEmbeddingResults.listOf(inputs.getInputs(), sparseEmbeddingResults); } else if (inferenceResults instanceof ErrorInferenceResults error) { return List.of(new ErrorChunkedInferenceResults(error.getException())); } else { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java index a9d8a319a56c2..895ebaa66c806 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java @@ -27,6 +27,7 @@ import org.elasticsearch.xpack.inference.external.action.ibmwatsonx.IbmWatsonxActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.external.http.sender.Sender; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; @@ -209,7 +210,7 @@ private IbmWatsonxEmbeddingsModel updateModelWithEmbeddingDetails(IbmWatsonxEmbe @Override protected void doInfer( Model model, - List input, + InferenceInputs input, Map taskSettings, InputType inputType, TimeValue timeout, @@ -223,27 +224,13 @@ protected void doInfer( IbmWatsonxModel ibmWatsonxModel = (IbmWatsonxModel) model; var action = ibmWatsonxModel.accept(getActionCreator(getSender(), getServiceComponents()), taskSettings, inputType); - action.execute(new DocumentsOnlyInput(input), timeout, listener); - } - - @Override - protected void doInfer( - Model model, - String query, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - throw new UnsupportedOperationException("Query input not supported for IBM Watsonx"); + action.execute(input, timeout, listener); } @Override protected void doChunkedInfer( Model model, - String query, - List input, + DocumentsOnlyInput input, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -252,8 +239,11 @@ protected void doChunkedInfer( ) { IbmWatsonxModel ibmWatsonxModel = (IbmWatsonxModel) model; - var batchedRequests = new EmbeddingRequestChunker(input, EMBEDDING_MAX_BATCH_SIZE, EmbeddingRequestChunker.EmbeddingType.FLOAT) - .batchRequestsWithListeners(listener); + var batchedRequests = new EmbeddingRequestChunker( + input.getInputs(), + EMBEDDING_MAX_BATCH_SIZE, + EmbeddingRequestChunker.EmbeddingType.FLOAT + ).batchRequestsWithListeners(listener); for (var request : batchedRequests) { var action = ibmWatsonxModel.accept(getActionCreator(getSender(), getServiceComponents()), taskSettings, inputType); action.execute(new DocumentsOnlyInput(request.batch().inputs()), timeout, request.listener()); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralService.java index 18f2570946662..1acc13f50778b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralService.java @@ -26,6 +26,7 @@ import org.elasticsearch.xpack.inference.external.action.mistral.MistralActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; import org.elasticsearch.xpack.inference.services.ServiceComponents; @@ -54,7 +55,7 @@ public MistralService(HttpRequestSender.Factory factory, ServiceComponents servi @Override protected void doInfer( Model model, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -64,30 +65,16 @@ protected void doInfer( if (model instanceof MistralEmbeddingsModel mistralEmbeddingsModel) { var action = mistralEmbeddingsModel.accept(actionCreator, taskSettings); - action.execute(new DocumentsOnlyInput(input), timeout, listener); + action.execute(inputs, timeout, listener); } else { listener.onFailure(createInvalidModelException(model)); } } - @Override - protected void doInfer( - Model model, - String query, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - throw new UnsupportedOperationException("Mistral service does not support inference with query input"); - } - @Override protected void doChunkedInfer( Model model, - String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -98,7 +85,7 @@ protected void doChunkedInfer( if (model instanceof MistralEmbeddingsModel mistralEmbeddingsModel) { var batchedRequests = new EmbeddingRequestChunker( - input, + inputs.getInputs(), MistralConstants.MAX_BATCH_SIZE, EmbeddingRequestChunker.EmbeddingType.FLOAT ).batchRequestsWithListeners(listener); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java index 7d2a4adbb27b2..cee3ccf676c4c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java @@ -28,6 +28,7 @@ import org.elasticsearch.xpack.inference.external.action.openai.OpenAiActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.SenderService; import org.elasticsearch.xpack.inference.services.ServiceComponents; @@ -188,7 +189,7 @@ public OpenAiModel parsePersistedConfig(String inferenceEntityId, TaskType taskT @Override public void doInfer( Model model, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -203,27 +204,13 @@ public void doInfer( var actionCreator = new OpenAiActionCreator(getSender(), getServiceComponents()); var action = openAiModel.accept(actionCreator, taskSettings); - action.execute(new DocumentsOnlyInput(input), timeout, listener); - } - - @Override - protected void doInfer( - Model model, - String query, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - throw new UnsupportedOperationException("OpenAI service does not support inference with query input"); + action.execute(inputs, timeout, listener); } @Override protected void doChunkedInfer( Model model, - @Nullable String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, @@ -238,8 +225,11 @@ protected void doChunkedInfer( OpenAiModel openAiModel = (OpenAiModel) model; var actionCreator = new OpenAiActionCreator(getSender(), getServiceComponents()); - var batchedRequests = new EmbeddingRequestChunker(input, EMBEDDING_MAX_BATCH_SIZE, EmbeddingRequestChunker.EmbeddingType.FLOAT) - .batchRequestsWithListeners(listener); + var batchedRequests = new EmbeddingRequestChunker( + inputs.getInputs(), + EMBEDDING_MAX_BATCH_SIZE, + EmbeddingRequestChunker.EmbeddingType.FLOAT + ).batchRequestsWithListeners(listener); for (var request : batchedRequests) { var action = openAiModel.accept(actionCreator, taskSettings); action.execute(new DocumentsOnlyInput(request.batch().inputs()), timeout, request.listener()); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/SenderServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/SenderServiceTests.java index 974b31e73b499..6ad17424dbcaa 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/SenderServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/SenderServiceTests.java @@ -10,7 +10,6 @@ import org.elasticsearch.TransportVersion; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.PlainActionFuture; -import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; import org.elasticsearch.inference.ChunkingOptions; @@ -20,7 +19,9 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.external.http.sender.Sender; import org.junit.After; import org.junit.Before; @@ -105,20 +106,7 @@ private static final class TestSenderService extends SenderService { @Override protected void doInfer( Model model, - List input, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - - } - - @Override - protected void doInfer( - Model model, - @Nullable String query, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, @@ -130,8 +118,7 @@ protected void doInfer( @Override protected void doChunkedInfer( Model model, - @Nullable String query, - List input, + DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, ChunkingOptions chunkingOptions, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchServiceTests.java index a192aeb57e9f8..9d9dbfaf86c15 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchServiceTests.java @@ -28,6 +28,7 @@ import org.elasticsearch.xpack.inference.external.http.HttpClientManager; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSenderTests; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; import org.elasticsearch.xpack.inference.external.request.alibabacloudsearch.AlibabaCloudSearchUtils; import org.elasticsearch.xpack.inference.logging.ThrottlerManager; import org.elasticsearch.xpack.inference.services.ServiceFields; @@ -107,7 +108,7 @@ public void testCheckModelConfig() throws IOException { @Override public void doInfer( Model model, - List input, + InferenceInputs inputs, Map taskSettings, InputType inputType, TimeValue timeout, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java index 4cc91249ad244..ee96ff5fef6e3 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java @@ -934,38 +934,6 @@ public void testChunkedInfer() throws IOException { } } - public void testInfer_ThrowsWhenQueryIsPresent() throws IOException { - var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); - - try (var service = new AzureAiStudioService(senderFactory, createWithEmptySettings(threadPool))) { - webServer.enqueue(new MockResponse().setResponseCode(200).setBody(testChatCompletionResultJson)); - - var model = AzureAiStudioChatCompletionModelTests.createModel( - "id", - getUrl(webServer), - AzureAiStudioProvider.OPENAI, - AzureAiStudioEndpointType.TOKEN, - "apikey" - ); - - PlainActionFuture listener = new PlainActionFuture<>(); - UnsupportedOperationException exception = expectThrows( - UnsupportedOperationException.class, - () -> service.infer( - model, - "should throw", - List.of("abc"), - new HashMap<>(), - InputType.INGEST, - InferenceAction.Request.DEFAULT_TIMEOUT, - listener - ) - ); - - assertThat(exception.getMessage(), is("Azure AI Studio service does not support inference with query input")); - } - } - public void testInfer_WithChatCompletionModel() throws IOException { var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java index f3bf7413d2553..5e13d1ddd6fa7 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java @@ -367,36 +367,6 @@ public void testInfer_ThrowsErrorWhenModelIsNotAValidModel() throws IOException verifyNoMoreInteractions(sender); } - public void testInfer_ThrowsWhenQueryIsPresent() throws IOException { - var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); - - try ( - var service = new ElasticInferenceService( - senderFactory, - createWithEmptySettings(threadPool), - new ElasticInferenceServiceComponents(getUrl(webServer)) - ) - ) { - var model = ElasticInferenceServiceSparseEmbeddingsModelTests.createModel(getUrl(webServer)); - - PlainActionFuture listener = new PlainActionFuture<>(); - UnsupportedOperationException exception = expectThrows( - UnsupportedOperationException.class, - () -> service.infer( - model, - "should throw", - List.of("abc"), - new HashMap<>(), - InputType.INGEST, - InferenceAction.Request.DEFAULT_TIMEOUT, - listener - ) - ); - - assertThat(exception.getMessage(), is("Query input not supported for Elastic Inference Service")); - } - } - public void testInfer_SendsEmbeddingsRequest() throws IOException { var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); var eisGatewayUrl = getUrl(webServer); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/MistralServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/MistralServiceTests.java index c1eb66ac848ab..e3bb2701aebd2 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/MistralServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/MistralServiceTests.java @@ -501,33 +501,6 @@ public void testChunkedInfer_Embeddings_CallsInfer_ConvertsFloatResponse() throw } } - public void testInfer_ThrowsWhenQueryIsPresent() throws IOException { - var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); - - try (var service = new MistralService(senderFactory, createWithEmptySettings(threadPool))) { - webServer.enqueue(new MockResponse().setResponseCode(200).setBody(testEmbeddingResultJson)); - - var model = MistralEmbeddingModelTests.createModel("id", "mistral-embed", "apikey", null, null, null, null); - model.setURI(getUrl(webServer)); - - PlainActionFuture listener = new PlainActionFuture<>(); - UnsupportedOperationException exception = expectThrows( - UnsupportedOperationException.class, - () -> service.infer( - model, - "should throw", - List.of("abc"), - new HashMap<>(), - InputType.INGEST, - InferenceAction.Request.DEFAULT_TIMEOUT, - listener - ) - ); - - assertThat(exception.getMessage(), is("Mistral service does not support inference with query input")); - } - } - public void testInfer_UnauthorisedResponse() throws IOException { var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); From d06f7f28e0fab3ec4f769fa8337db1eeeccaa49f Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 25 Sep 2024 21:38:38 +0100 Subject: [PATCH 55/58] Avoid implicit ML/transform master node timeouts (#113536) (#113559) Today in the ML and Transform plugins we use `null` for timeouts related to persistent tasks, which means to use the implicit default timeout of 30s. As per #107984 we want to eliminate all such uses of the implicit default timeout. This commit either moves to using the timeout from the associated transport request, if available, or else makes it explicit that we're using a hard-coded 30s timeout. --- .../xpack/ml/MachineLearning.java | 8 ++ ...rtCancelJobModelSnapshotUpgradeAction.java | 75 +++++++++-------- .../ml/action/TransportCloseJobAction.java | 80 ++++++++++--------- .../action/TransportDeleteDatafeedAction.java | 31 ++++--- .../ml/action/TransportDeleteJobAction.java | 7 +- .../ml/action/TransportOpenJobAction.java | 45 ++++++----- ...ransportStartDataFrameAnalyticsAction.java | 6 +- .../action/TransportStartDatafeedAction.java | 46 ++++++----- ...TransportStopDataFrameAnalyticsAction.java | 44 +++++----- .../action/TransportStopDatafeedAction.java | 52 ++++++------ ...ransportUpgradeJobModelSnapshotAction.java | 5 +- .../xpack/transform/Transform.java | 8 ++ .../action/TransportStartTransformAction.java | 5 +- .../action/TransportStopTransformAction.java | 23 +++--- 14 files changed, 249 insertions(+), 186 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 5876836185ba3..1bc867a849090 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -498,6 +498,14 @@ public class MachineLearning extends Plugin public static final String TRAINED_MODEL_CIRCUIT_BREAKER_NAME = "model_inference"; + /** + * Hard-coded timeout used for {@link org.elasticsearch.action.support.master.MasterNodeRequest#masterNodeTimeout()} for requests to + * the master node from ML code. Wherever possible, prefer to use a user-controlled timeout instead of this. + * + * @see #107984 + */ + public static final TimeValue HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT = TimeValue.THIRTY_SECONDS; + private static final long DEFAULT_MODEL_CIRCUIT_BREAKER_LIMIT = (long) ((0.50) * JvmInfo.jvmInfo().getMem().getHeapMax().getBytes()); private static final double DEFAULT_MODEL_CIRCUIT_BREAKER_OVERHEAD = 1.0D; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCancelJobModelSnapshotUpgradeAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCancelJobModelSnapshotUpgradeAction.java index c4820112211b0..96ad0cf17e780 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCancelJobModelSnapshotUpgradeAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCancelJobModelSnapshotUpgradeAction.java @@ -30,6 +30,7 @@ import org.elasticsearch.xpack.core.ml.job.config.Job; import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskParams; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; +import org.elasticsearch.xpack.ml.MachineLearning; import org.elasticsearch.xpack.ml.job.persistence.JobConfigProvider; import java.util.List; @@ -103,47 +104,51 @@ private void removePersistentTasks( final AtomicArray failures = new AtomicArray<>(numberOfTasks); for (PersistentTasksCustomMetadata.PersistentTask task : upgradeTasksToCancel) { - persistentTasksService.sendRemoveRequest(task.getId(), null, new ActionListener<>() { - @Override - public void onResponse(PersistentTasksCustomMetadata.PersistentTask task) { - if (counter.incrementAndGet() == numberOfTasks) { - sendResponseOrFailure(listener, failures); + persistentTasksService.sendRemoveRequest( + task.getId(), + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, + new ActionListener<>() { + @Override + public void onResponse(PersistentTasksCustomMetadata.PersistentTask task) { + if (counter.incrementAndGet() == numberOfTasks) { + sendResponseOrFailure(listener, failures); + } } - } - @Override - public void onFailure(Exception e) { - final int slot = counter.incrementAndGet(); - // Not found is not an error - it just means the upgrade completed before we could cancel it. - if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException == false) { - failures.set(slot - 1, e); - } - if (slot == numberOfTasks) { - sendResponseOrFailure(listener, failures); + @Override + public void onFailure(Exception e) { + final int slot = counter.incrementAndGet(); + // Not found is not an error - it just means the upgrade completed before we could cancel it. + if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException == false) { + failures.set(slot - 1, e); + } + if (slot == numberOfTasks) { + sendResponseOrFailure(listener, failures); + } } - } - private void sendResponseOrFailure(ActionListener listener, AtomicArray failures) { - List caughtExceptions = failures.asList(); - if (caughtExceptions.isEmpty()) { - listener.onResponse(new Response(true)); - return; + private void sendResponseOrFailure(ActionListener listener, AtomicArray failures) { + List caughtExceptions = failures.asList(); + if (caughtExceptions.isEmpty()) { + listener.onResponse(new Response(true)); + return; + } + + String msg = "Failed to cancel model snapshot upgrade for [" + + request.getSnapshotId() + + "] on job [" + + request.getJobId() + + "]. Total failures [" + + caughtExceptions.size() + + "], rethrowing first. All Exceptions: [" + + caughtExceptions.stream().map(Exception::getMessage).collect(Collectors.joining(", ")) + + "]"; + + ElasticsearchStatusException e = exceptionArrayToStatusException(failures, msg); + listener.onFailure(e); } - - String msg = "Failed to cancel model snapshot upgrade for [" - + request.getSnapshotId() - + "] on job [" - + request.getJobId() - + "]. Total failures [" - + caughtExceptions.size() - + "], rethrowing first. All Exceptions: [" - + caughtExceptions.stream().map(Exception::getMessage).collect(Collectors.joining(", ")) - + "]"; - - ElasticsearchStatusException e = exceptionArrayToStatusException(failures, msg); - listener.onFailure(e); } - }); + ); } } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCloseJobAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCloseJobAction.java index 306098f38bc08..34f28fb57f9cb 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCloseJobAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCloseJobAction.java @@ -206,7 +206,7 @@ protected void doExecute(Task task, CloseJobAction.Request request, ActionListen // these persistent tasks to disappear. persistentTasksService.sendRemoveRequest( jobTask.getId(), - null, + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, ActionListener.wrap( r -> logger.trace( () -> format("[%s] removed task to close unassigned job", resolvedJobId) @@ -517,48 +517,52 @@ private void forceCloseJob( PersistentTasksCustomMetadata.PersistentTask jobTask = MlTasks.getJobTask(jobId, tasks); if (jobTask != null) { auditor.info(jobId, Messages.JOB_AUDIT_FORCE_CLOSING); - persistentTasksService.sendRemoveRequest(jobTask.getId(), null, new ActionListener<>() { - @Override - public void onResponse(PersistentTasksCustomMetadata.PersistentTask task) { - if (counter.incrementAndGet() == numberOfJobs) { - sendResponseOrFailure(request.getJobId(), listener, failures); + persistentTasksService.sendRemoveRequest( + jobTask.getId(), + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, + new ActionListener<>() { + @Override + public void onResponse(PersistentTasksCustomMetadata.PersistentTask task) { + if (counter.incrementAndGet() == numberOfJobs) { + sendResponseOrFailure(request.getJobId(), listener, failures); + } } - } - @Override - public void onFailure(Exception e) { - final int slot = counter.incrementAndGet(); - if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException == false) { - failures.set(slot - 1, e); + @Override + public void onFailure(Exception e) { + final int slot = counter.incrementAndGet(); + if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException == false) { + failures.set(slot - 1, e); + } + if (slot == numberOfJobs) { + sendResponseOrFailure(request.getJobId(), listener, failures); + } } - if (slot == numberOfJobs) { - sendResponseOrFailure(request.getJobId(), listener, failures); - } - } - private static void sendResponseOrFailure( - String jobId, - ActionListener listener, - AtomicArray failures - ) { - List caughtExceptions = failures.asList(); - if (caughtExceptions.isEmpty()) { - listener.onResponse(new CloseJobAction.Response(true)); - return; + private static void sendResponseOrFailure( + String jobId, + ActionListener listener, + AtomicArray failures + ) { + List caughtExceptions = failures.asList(); + if (caughtExceptions.isEmpty()) { + listener.onResponse(new CloseJobAction.Response(true)); + return; + } + + String msg = "Failed to force close job [" + + jobId + + "] with [" + + caughtExceptions.size() + + "] failures, rethrowing first. All Exceptions: [" + + caughtExceptions.stream().map(Exception::getMessage).collect(Collectors.joining(", ")) + + "]"; + + ElasticsearchStatusException e = exceptionArrayToStatusException(failures, msg); + listener.onFailure(e); } - - String msg = "Failed to force close job [" - + jobId - + "] with [" - + caughtExceptions.size() - + "] failures, rethrowing first. All Exceptions: [" - + caughtExceptions.stream().map(Exception::getMessage).collect(Collectors.joining(", ")) - + "]"; - - ElasticsearchStatusException e = exceptionArrayToStatusException(failures, msg); - listener.onFailure(e); } - }); + ); } } } @@ -588,7 +592,7 @@ private void normalCloseJob( PersistentTasksCustomMetadata.PersistentTask jobTask = MlTasks.getJobTask(jobId, tasks); persistentTasksService.sendRemoveRequest( jobTask.getId(), - null, + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, ActionListener.wrap(r -> logger.trace("[{}] removed persistent task for relocated job", jobId), e -> { if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) { logger.debug("[{}] relocated job task already removed", jobId); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteDatafeedAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteDatafeedAction.java index 4fe24bbf468e2..f3638a9199567 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteDatafeedAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteDatafeedAction.java @@ -28,6 +28,7 @@ import org.elasticsearch.xpack.core.ml.action.DeleteDatafeedAction; import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; +import org.elasticsearch.xpack.ml.MachineLearning; import org.elasticsearch.xpack.ml.datafeed.DatafeedManager; import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN; @@ -103,22 +104,26 @@ private void removeDatafeedTask(DeleteDatafeedAction.Request request, ClusterSta if (datafeedTask == null) { listener.onResponse(true); } else { - persistentTasksService.sendRemoveRequest(datafeedTask.getId(), null, new ActionListener<>() { - @Override - public void onResponse(PersistentTasksCustomMetadata.PersistentTask persistentTask) { - listener.onResponse(Boolean.TRUE); - } + persistentTasksService.sendRemoveRequest( + datafeedTask.getId(), + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, + new ActionListener<>() { + @Override + public void onResponse(PersistentTasksCustomMetadata.PersistentTask persistentTask) { + listener.onResponse(Boolean.TRUE); + } - @Override - public void onFailure(Exception e) { - if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) { - // the task has been removed in between - listener.onResponse(true); - } else { - listener.onFailure(e); + @Override + public void onFailure(Exception e) { + if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) { + // the task has been removed in between + listener.onResponse(true); + } else { + listener.onFailure(e); + } } } - }); + ); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteJobAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteJobAction.java index 31aaf157d66ad..b5a8808561843 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteJobAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteJobAction.java @@ -45,6 +45,7 @@ import org.elasticsearch.xpack.core.ml.job.config.JobTaskState; import org.elasticsearch.xpack.core.ml.job.messages.Messages; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; +import org.elasticsearch.xpack.ml.MachineLearning; import org.elasticsearch.xpack.ml.datafeed.persistence.DatafeedConfigProvider; import org.elasticsearch.xpack.ml.job.JobManager; import org.elasticsearch.xpack.ml.job.persistence.JobConfigProvider; @@ -291,7 +292,11 @@ private void removePersistentTask(String jobId, ClusterState currentState, Actio if (jobTask == null) { listener.onResponse(null); } else { - persistentTasksService.sendRemoveRequest(jobTask.getId(), null, listener.safeMap(task -> true)); + persistentTasksService.sendRemoveRequest( + jobTask.getId(), + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, + listener.safeMap(task -> true) + ); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportOpenJobAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportOpenJobAction.java index b220052baff0d..bd628c4e04ac6 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportOpenJobAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportOpenJobAction.java @@ -47,6 +47,7 @@ import org.elasticsearch.xpack.core.ml.job.config.JobUpdate; import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; +import org.elasticsearch.xpack.ml.MachineLearning; import org.elasticsearch.xpack.ml.job.JobNodeSelector; import org.elasticsearch.xpack.ml.job.persistence.JobConfigProvider; import org.elasticsearch.xpack.ml.process.MlMemoryTracker; @@ -166,7 +167,7 @@ public void onFailure(Exception e) { MlTasks.jobTaskId(jobParams.getJobId()), MlTasks.JOB_TASK_NAME, jobParams, - null, + request.masterNodeTimeout(), waitForJobToStart ), listener::onFailure @@ -325,27 +326,31 @@ private void cancelJobStart( Exception exception, ActionListener listener ) { - persistentTasksService.sendRemoveRequest(persistentTask.getId(), null, new ActionListener<>() { - @Override - public void onResponse(PersistentTasksCustomMetadata.PersistentTask task) { - // We succeeded in cancelling the persistent task, but the - // problem that caused us to cancel it is the overall result - listener.onFailure(exception); - } + persistentTasksService.sendRemoveRequest( + persistentTask.getId(), + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, + new ActionListener<>() { + @Override + public void onResponse(PersistentTasksCustomMetadata.PersistentTask task) { + // We succeeded in cancelling the persistent task, but the + // problem that caused us to cancel it is the overall result + listener.onFailure(exception); + } - @Override - public void onFailure(Exception e) { - logger.error( - () -> format( - "[%s] Failed to cancel persistent task that could not be assigned due to [%s]", - persistentTask.getParams().getJobId(), - exception.getMessage() - ), - e - ); - listener.onFailure(exception); + @Override + public void onFailure(Exception e) { + logger.error( + () -> format( + "[%s] Failed to cancel persistent task that could not be assigned due to [%s]", + persistentTask.getParams().getJobId(), + exception.getMessage() + ), + e + ); + listener.onFailure(exception); + } } - }); + ); } /** diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java index 9db8a72f0bb14..2ec460a08caf9 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java @@ -211,7 +211,7 @@ public void onFailure(Exception e) { MlTasks.dataFrameAnalyticsTaskId(request.getId()), MlTasks.DATA_FRAME_ANALYTICS_TASK_NAME, taskParams, - null, + request.masterNodeTimeout(), waitForAnalyticsToStart ); }, listener::onFailure); @@ -603,8 +603,8 @@ private void cancelAnalyticsStart( ) { persistentTasksService.sendRemoveRequest( persistentTask.getId(), - null, - new ActionListener>() { + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, + new ActionListener<>() { @Override public void onResponse(PersistentTasksCustomMetadata.PersistentTask task) { // We succeeded in cancelling the persistent task, but the diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDatafeedAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDatafeedAction.java index 26d26d87e4cc7..37677aed35b3d 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDatafeedAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDatafeedAction.java @@ -345,7 +345,7 @@ private void createDataExtractor( MlTasks.datafeedTaskId(params.getDatafeedId()), MlTasks.DATAFEED_TASK_NAME, params, - null, + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, listener ), listener::onFailure @@ -408,28 +408,32 @@ private void cancelDatafeedStart( Exception exception, ActionListener listener ) { - persistentTasksService.sendRemoveRequest(persistentTask.getId(), null, new ActionListener<>() { - @Override - public void onResponse(PersistentTasksCustomMetadata.PersistentTask task) { - // We succeeded in cancelling the persistent task, but the - // problem that caused us to cancel it is the overall result - listener.onFailure(exception); - } + persistentTasksService.sendRemoveRequest( + persistentTask.getId(), + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, + new ActionListener<>() { + @Override + public void onResponse(PersistentTasksCustomMetadata.PersistentTask task) { + // We succeeded in cancelling the persistent task, but the + // problem that caused us to cancel it is the overall result + listener.onFailure(exception); + } - @Override - public void onFailure(Exception e) { - logger.error( - "[" - + persistentTask.getParams().getDatafeedId() - + "] Failed to cancel persistent task that could " - + "not be assigned due to [" - + exception.getMessage() - + "]", - e - ); - listener.onFailure(exception); + @Override + public void onFailure(Exception e) { + logger.error( + "[" + + persistentTask.getParams().getDatafeedId() + + "] Failed to cancel persistent task that could " + + "not be assigned due to [" + + exception.getMessage() + + "]", + e + ); + listener.onFailure(exception); + } } - }); + ); } private static ElasticsearchStatusException createUnlicensedError( diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStopDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStopDataFrameAnalyticsAction.java index 7d39cd7f76e17..5ad0328547d91 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStopDataFrameAnalyticsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStopDataFrameAnalyticsAction.java @@ -259,23 +259,27 @@ private void forceStop( for (String analyticsId : nonStoppedAnalytics) { PersistentTasksCustomMetadata.PersistentTask analyticsTask = MlTasks.getDataFrameAnalyticsTask(analyticsId, tasks); if (analyticsTask != null) { - persistentTasksService.sendRemoveRequest(analyticsTask.getId(), null, ActionListener.wrap(removedTask -> { - auditor.info(analyticsId, Messages.DATA_FRAME_ANALYTICS_AUDIT_FORCE_STOPPED); - if (counter.incrementAndGet() == nonStoppedAnalytics.size()) { - sendResponseOrFailure(request.getId(), listener, failures); - } - }, e -> { - final int slot = counter.incrementAndGet(); - // We validated that the analytics ids supplied in the request existed when we started processing the action. - // If the related tasks don't exist at this point then they must have been stopped by a simultaneous stop request. - // This is not an error. - if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException == false) { - failures.set(slot - 1, e); - } - if (slot == nonStoppedAnalytics.size()) { - sendResponseOrFailure(request.getId(), listener, failures); - } - })); + persistentTasksService.sendRemoveRequest( + analyticsTask.getId(), + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, + ActionListener.wrap(removedTask -> { + auditor.info(analyticsId, Messages.DATA_FRAME_ANALYTICS_AUDIT_FORCE_STOPPED); + if (counter.incrementAndGet() == nonStoppedAnalytics.size()) { + sendResponseOrFailure(request.getId(), listener, failures); + } + }, e -> { + final int slot = counter.incrementAndGet(); + // We validated that the analytics ids supplied in the request existed when we started processing the action. + // If the related tasks don't exist at this point then they must have been stopped by a simultaneous stop request. + // This is not an error. + if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException == false) { + failures.set(slot - 1, e); + } + if (slot == nonStoppedAnalytics.size()) { + sendResponseOrFailure(request.getId(), listener, failures); + } + }) + ); } else { // This should not happen, because nonStoppedAnalytics // were derived from the same tasks that were passed to this method @@ -328,7 +332,11 @@ private String[] findAllocatedNodesAndRemoveUnassignedTasks(List analyti // This means the task has not been assigned to a node yet so // we can stop it by removing its persistent task. // The listener is a no-op as we're already going to wait for the task to be removed. - persistentTasksService.sendRemoveRequest(task.getId(), null, ActionListener.noop()); + persistentTasksService.sendRemoveRequest( + task.getId(), + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, + ActionListener.noop() + ); } } return nodes.toArray(new String[0]); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStopDatafeedAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStopDatafeedAction.java index f998701dbd4e0..169399fafaaad 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStopDatafeedAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStopDatafeedAction.java @@ -251,7 +251,7 @@ private void normalStopDatafeed( // already waits for these persistent tasks to disappear. persistentTasksService.sendRemoveRequest( datafeedTask.getId(), - null, + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, ActionListener.wrap( r -> auditDatafeedStopped(datafeedTask), e -> logger.error("[" + datafeedId + "] failed to remove task to stop unassigned datafeed", e) @@ -278,7 +278,7 @@ private void normalStopDatafeed( PersistentTasksCustomMetadata.PersistentTask datafeedTask = MlTasks.getDatafeedTask(datafeedId, tasks); persistentTasksService.sendRemoveRequest( datafeedTask.getId(), - null, + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, ActionListener.wrap(r -> auditDatafeedStopped(datafeedTask), e -> { if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) { logger.debug("[{}] relocated datafeed task already removed", datafeedId); @@ -382,28 +382,32 @@ private void forceStopDatafeed( for (String datafeedId : notStoppedDatafeeds) { PersistentTasksCustomMetadata.PersistentTask datafeedTask = MlTasks.getDatafeedTask(datafeedId, tasks); if (datafeedTask != null) { - persistentTasksService.sendRemoveRequest(datafeedTask.getId(), null, ActionListener.wrap(persistentTask -> { - // For force stop, only audit here if the datafeed was unassigned at the time of the stop, hence inactive. - // If the datafeed was active then it audits itself on being cancelled. - if (PersistentTasksClusterService.needsReassignment(datafeedTask.getAssignment(), nodes)) { - auditDatafeedStopped(datafeedTask); - } - if (counter.incrementAndGet() == notStoppedDatafeeds.size()) { - sendResponseOrFailure(request.getDatafeedId(), listener, failures); - } - }, e -> { - final int slot = counter.incrementAndGet(); - // We validated that the datafeed names supplied in the request existed when we started processing the action. - // If the related tasks don't exist at this point then they must have been stopped by a simultaneous stop - // request. - // This is not an error. - if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException == false) { - failures.set(slot - 1, e); - } - if (slot == notStoppedDatafeeds.size()) { - sendResponseOrFailure(request.getDatafeedId(), listener, failures); - } - })); + persistentTasksService.sendRemoveRequest( + datafeedTask.getId(), + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, + ActionListener.wrap(persistentTask -> { + // For force stop, only audit here if the datafeed was unassigned at the time of the stop, hence inactive. + // If the datafeed was active then it audits itself on being cancelled. + if (PersistentTasksClusterService.needsReassignment(datafeedTask.getAssignment(), nodes)) { + auditDatafeedStopped(datafeedTask); + } + if (counter.incrementAndGet() == notStoppedDatafeeds.size()) { + sendResponseOrFailure(request.getDatafeedId(), listener, failures); + } + }, e -> { + final int slot = counter.incrementAndGet(); + // We validated that the datafeed names supplied in the request existed when we started processing the action. + // If the related tasks don't exist at this point then they must have been stopped by a simultaneous stop + // request. + // This is not an error. + if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException == false) { + failures.set(slot - 1, e); + } + if (slot == notStoppedDatafeeds.size()) { + sendResponseOrFailure(request.getDatafeedId(), listener, failures); + } + }) + ); } else { // This should not happen, because startedDatafeeds and stoppingDatafeeds // were derived from the same tasks that were passed to this method diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportUpgradeJobModelSnapshotAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportUpgradeJobModelSnapshotAction.java index 43b2f22ae79f0..e14752bd08e08 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportUpgradeJobModelSnapshotAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportUpgradeJobModelSnapshotAction.java @@ -49,6 +49,7 @@ import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskParams; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; import org.elasticsearch.xpack.core.ml.utils.TransportVersionUtils; +import org.elasticsearch.xpack.ml.MachineLearning; import org.elasticsearch.xpack.ml.job.persistence.JobConfigProvider; import org.elasticsearch.xpack.ml.job.persistence.JobResultsProvider; import org.elasticsearch.xpack.ml.job.snapshot.upgrader.SnapshotUpgradePredicate; @@ -164,7 +165,7 @@ protected void masterOperation(Task task, Request request, ClusterState state, A MlTasks.snapshotUpgradeTaskId(params.getJobId(), params.getSnapshotId()), MlTasks.JOB_SNAPSHOT_UPGRADE_TASK_NAME, params, - null, + request.masterNodeTimeout(), waitForJobToStart ); }, listener::onFailure); @@ -293,7 +294,7 @@ private void cancelJobStart( ) { persistentTasksService.sendRemoveRequest( persistentTask.getId(), - null, + MachineLearning.HARD_CODED_MACHINE_LEARNING_MASTER_NODE_TIMEOUT, ActionListener.wrap(t -> listener.onFailure(exception), e -> { logger.error( () -> format( diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/Transform.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/Transform.java index ab4652c562e22..732d64059d734 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/Transform.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/Transform.java @@ -138,6 +138,14 @@ public class Transform extends Plugin implements SystemIndexPlugin, PersistentTa public static final Integer DEFAULT_INITIAL_MAX_PAGE_SEARCH_SIZE = Integer.valueOf(500); public static final TimeValue DEFAULT_TRANSFORM_FREQUENCY = TimeValue.timeValueSeconds(60); + /** + * Hard-coded timeout used for {@link org.elasticsearch.action.support.master.MasterNodeRequest#masterNodeTimeout()} for requests to + * the master node from transforms code. Wherever possible, prefer to use a user-controlled timeout instead of this. + * + * @see #107984 + */ + public static final TimeValue HARD_CODED_TRANSFORM_MASTER_NODE_TIMEOUT = TimeValue.THIRTY_SECONDS; + public static final int DEFAULT_FAILURE_RETRIES = 10; // How many times the transform task can retry on a non-critical failure. // This cluster-level setting is deprecated, the users should be using transform-level setting instead. diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportStartTransformAction.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportStartTransformAction.java index 4ad56bf3a661a..cd9b79d921548 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportStartTransformAction.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportStartTransformAction.java @@ -45,6 +45,7 @@ import org.elasticsearch.xpack.core.transform.transforms.TransformState; import org.elasticsearch.xpack.core.transform.transforms.TransformTaskParams; import org.elasticsearch.xpack.core.transform.transforms.TransformTaskState; +import org.elasticsearch.xpack.transform.Transform; import org.elasticsearch.xpack.transform.TransformExtensionHolder; import org.elasticsearch.xpack.transform.TransformServices; import org.elasticsearch.xpack.transform.notifications.TransformAuditor; @@ -164,7 +165,7 @@ protected void masterOperation( transformTask.getId(), TransformTaskParams.NAME, transformTask, - null, + request.masterNodeTimeout(), newPersistentTaskActionListener ); } else { @@ -292,7 +293,7 @@ protected ClusterBlockException checkBlock(StartTransformAction.Request request, } private void cancelTransformTask(String taskId, String transformId, Exception exception, Consumer onFailure) { - persistentTasksService.sendRemoveRequest(taskId, null, new ActionListener<>() { + persistentTasksService.sendRemoveRequest(taskId, Transform.HARD_CODED_TRANSFORM_MASTER_NODE_TIMEOUT, new ActionListener<>() { @Override public void onResponse(PersistentTasksCustomMetadata.PersistentTask task) { // We succeeded in canceling the persistent task, but the diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportStopTransformAction.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportStopTransformAction.java index 34e89986b5bcd..fd24fd7c0faba 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportStopTransformAction.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportStopTransformAction.java @@ -45,6 +45,7 @@ import org.elasticsearch.xpack.core.transform.action.StopTransformAction.Response; import org.elasticsearch.xpack.core.transform.transforms.TransformState; import org.elasticsearch.xpack.core.transform.transforms.TransformTaskState; +import org.elasticsearch.xpack.transform.Transform; import org.elasticsearch.xpack.transform.TransformServices; import org.elasticsearch.xpack.transform.persistence.TransformConfigManager; import org.elasticsearch.xpack.transform.transforms.TransformNodeAssignments; @@ -509,14 +510,18 @@ static ActionListener cancelTransformTasksListener( ); for (String taskId : transformTasks) { - persistentTasksService.sendRemoveRequest(taskId, null, ActionListener.wrap(groupedListener::onResponse, e -> { - // If we are about to remove a persistent task which does not exist, treat it as success. - if (e instanceof ResourceNotFoundException) { - groupedListener.onResponse(null); - return; - } - groupedListener.onFailure(e); - })); + persistentTasksService.sendRemoveRequest( + taskId, + Transform.HARD_CODED_TRANSFORM_MASTER_NODE_TIMEOUT, + ActionListener.wrap(groupedListener::onResponse, e -> { + // If we are about to remove a persistent task which does not exist, treat it as success. + if (e instanceof ResourceNotFoundException) { + groupedListener.onResponse(null); + return; + } + groupedListener.onFailure(e); + }) + ); } }, e -> { GroupedActionListener> groupedListener = new GroupedActionListener<>( @@ -525,7 +530,7 @@ static ActionListener cancelTransformTasksListener( ); for (String taskId : transformTasks) { - persistentTasksService.sendRemoveRequest(taskId, null, groupedListener); + persistentTasksService.sendRemoveRequest(taskId, Transform.HARD_CODED_TRANSFORM_MASTER_NODE_TIMEOUT, groupedListener); } }); } From 7870e2dbe25ce5f0f9e753147a3107662d3aa73b Mon Sep 17 00:00:00 2001 From: Keith Massey Date: Wed, 25 Sep 2024 16:32:13 -0500 Subject: [PATCH 56/58] Adding component template substitutions to the simulate ingest API (#113276) (#113567) --- docs/changelog/113276.yaml | 5 + .../indices/put-component-template.asciidoc | 7 +- .../ingest/apis/simulate-ingest.asciidoc | 117 ++++++- .../test/ingest/80_ingest_simulate.yml | 303 ++++++++++++++++++ .../action/bulk/BulkFeatures.java | 3 +- .../bulk/TransportSimulateBulkAction.java | 3 + 6 files changed, 434 insertions(+), 4 deletions(-) create mode 100644 docs/changelog/113276.yaml diff --git a/docs/changelog/113276.yaml b/docs/changelog/113276.yaml new file mode 100644 index 0000000000000..87241878b3ec4 --- /dev/null +++ b/docs/changelog/113276.yaml @@ -0,0 +1,5 @@ +pr: 113276 +summary: Adding component template substitutions to the simulate ingest API +area: Ingest Node +type: enhancement +issues: [] diff --git a/docs/reference/indices/put-component-template.asciidoc b/docs/reference/indices/put-component-template.asciidoc index 6fd54f81d1222..05579d46d676d 100644 --- a/docs/reference/indices/put-component-template.asciidoc +++ b/docs/reference/indices/put-component-template.asciidoc @@ -128,6 +128,8 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] [[put-component-template-api-request-body]] ==== {api-request-body-title} +// tag::template[] + `template`:: (Required, object) This is the template to be applied, may optionally include a `mappings`, @@ -135,7 +137,7 @@ This is the template to be applied, may optionally include a `mappings`, + .Properties of `template` [%collapsible%open] -==== +===== `aliases`:: (Optional, object of objects) Aliases to add. + @@ -146,7 +148,7 @@ include::{es-ref-dir}/indices/create-index.asciidoc[tag=aliases-props] include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=mappings] include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=settings] -==== +===== `version`:: (Optional, integer) @@ -173,6 +175,7 @@ This map is not automatically generated by {es}. Marks this component template as deprecated. When a deprecated component template is referenced when creating or updating a non-deprecated index template, {es} will emit a deprecation warning. +end::template[] [[put-component-template-api-example]] ==== {api-examples-title} diff --git a/docs/reference/ingest/apis/simulate-ingest.asciidoc b/docs/reference/ingest/apis/simulate-ingest.asciidoc index ee84a39ee6f65..ac6da515402bb 100644 --- a/docs/reference/ingest/apis/simulate-ingest.asciidoc +++ b/docs/reference/ingest/apis/simulate-ingest.asciidoc @@ -83,11 +83,32 @@ POST /_ingest/_simulate } ] } + }, + "component_template_substitutions": { <2> + "my-component-template": { + "template": { + "mappings": { + "dynamic": "true", + "properties": { + "field3": { + "type": "keyword" + } + } + }, + "settings": { + "index": { + "default_pipeline": "my-pipeline" + } + } + } + } } } ---- <1> This replaces the existing `my-pipeline` pipeline with the contents given here for the duration of this request. +<2> This replaces the existing `my-component-template` component template with the contents given here for the duration of this request. +These templates can be used to change the pipeline(s) used, or to modify the mapping that will be used to validate the result. [[simulate-ingest-api-request]] ==== {api-request-title} @@ -191,6 +212,19 @@ Map of pipeline IDs to substitute pipeline definition objects. include::put-pipeline.asciidoc[tag=pipeline-object] ==== +`component_template_substitutions`:: +(Optional, map of strings to objects) +Map of component template names to substitute component template definition objects. ++ +.Properties of component template definition objects +[%collapsible%open] + +==== + +include::{es-ref-dir}/indices/put-component-template.asciidoc[tag=template] + +==== + [[simulate-ingest-api-example]] ==== {api-examples-title} @@ -268,7 +302,7 @@ The API returns the following response: [[simulate-ingest-api-request-body-ex]] ===== Specify a pipeline substitution in the request body -In this example the index `index` has a default pipeline called `my-pipeline` and a final +In this example the index `my-index` has a default pipeline called `my-pipeline` and a final pipeline called `my-final-pipeline`. But a substitute definition of `my-pipeline` is provided in `pipeline_substitutions`. The substitute `my-pipeline` will be used in place of the `my-pipeline` that is in the system, and then the `my-final-pipeline` that is already @@ -348,6 +382,87 @@ The API returns the following response: } ---- +[[simulate-ingest-api-substitute-component-templates-ex]] +===== Specify a component template substitution in the request body +In this example, imagine that the index `my-index` has a strict mapping with only the `foo` +keyword field defined. Say that field mapping came from a component template named +`my-mappings-template`. We want to test adding a new field, `bar`. So a substitute definition of +`my-mappings-template` is provided in `component_template_substitutions`. The substitute +`my-mappings-template` will be used in place of the existing mapping for `my-index` and in place +of the `my-mappings-template` that is in the system. + +[source,console] +---- +POST /_ingest/_simulate +{ + "docs": [ + { + "_index": "my-index", + "_id": "123", + "_source": { + "foo": "foo" + } + }, + { + "_index": "my-index", + "_id": "456", + "_source": { + "bar": "rab" + } + } + ], + "component_template_substitutions": { + "my-mappings_template": { + "template": { + "mappings": { + "dynamic": "strict", + "properties": { + "foo": { + "type": "keyword" + }, + "bar": { + "type": "keyword" + } + } + } + } + } + } +} +---- + +The API returns the following response: + +[source,console-result] +---- +{ + "docs": [ + { + "doc": { + "_id": "123", + "_index": "my-index", + "_version": -3, + "_source": { + "foo": "foo" + }, + "executed_pipelines": [] + } + }, + { + "doc": { + "_id": "456", + "_index": "my-index", + "_version": -3, + "_source": { + "bar": "rab" + }, + "executed_pipelines": [] + } + } + ] +} +---- + //// [source,console] ---- diff --git a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml index 35ec9979c3250..f3a977cd96f62 100644 --- a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml +++ b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml @@ -183,6 +183,7 @@ setup: body: settings: default_pipeline: "my-pipeline" + - match: { acknowledged: true } - do: headers: @@ -303,3 +304,305 @@ setup: - match: { docs.1.doc._index: "second-index" } - match: { docs.1.doc._source.bar: "foo" } - not_exists: docs.1.doc.error + +--- +"Test ingest simulate with template substitutions for component templates": + + - skip: + features: + - headers + - allowed_warnings + + - requires: + cluster_features: ["simulate.component.template.substitutions"] + reason: "ingest simulate component template substitutions added in 8.16" + + - do: + headers: + Content-Type: application/json + ingest.put_pipeline: + id: "foo-pipeline" + body: > + { + "processors": [ + { + "set": { + "field": "foo", + "value": true + } + } + ] + } + - match: { acknowledged: true } + + - do: + headers: + Content-Type: application/json + ingest.put_pipeline: + id: "bar-pipeline" + body: > + { + "processors": [ + { + "set": { + "field": "bar", + "value": true + } + } + ] + } + - match: { acknowledged: true } + + - do: + cluster.put_component_template: + name: mappings_template + body: + template: + mappings: + dynamic: strict + properties: + foo: + type: keyword + + - do: + cluster.put_component_template: + name: settings_template + body: + template: + settings: + index: + default_pipeline: "foo_pipeline" + + - do: + allowed_warnings: + - "index template [test-composable-1] has index patterns [tsdb_templated_*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [test-composable-1] will take precedence during new index creation" + indices.put_index_template: + name: test-composable-1 + body: + index_patterns: + - foo* + composed_of: + - mappings_template + - settings_template + + - do: + headers: + Content-Type: application/json + simulate.ingest: + index: foo-1 + body: > + { + "docs": [ + { + "_id": "asdf", + "_source": { + "foo": "FOO", + "other": "other" + } + } + ], + "component_template_substitutions": { + "mappings_template": { + "template": { + "mappings": { + "dynamic": "true", + "properties": { + "foo": { + "type": "keyword" + } + } + } + } + }, + "settings_template": { + "template": { + "settings": { + "index": { + "default_pipeline": "bar-pipeline" + } + } + } + } + } + } + - length: { docs: 1 } + - match: { docs.0.doc._index: "foo-1" } + - match: { docs.0.doc._source.other: "other" } + - match: { docs.0.doc._source.bar: true } + - match: { docs.0.doc._source.foo: "FOO" } + - match: { docs.0.doc.executed_pipelines: ["bar-pipeline"] } + - not_exists: docs.0.doc.error + + - do: + indices.create: + index: foo-1 + - match: { acknowledged: true } + + - do: + headers: + Content-Type: application/json + simulate.ingest: + index: foo-1 + body: > + { + "docs": [ + { + "_id": "asdf", + "_source": { + "foo": "FOO", + "other": "other" + } + } + ], + "component_template_substitutions": { + "mappings_template": { + "template": { + "mappings": { + "dynamic": "true", + "properties": { + "foo": { + "type": "keyword" + } + } + } + } + }, + "settings_template": { + "template": { + "settings": { + "index": { + "default_pipeline": "bar-pipeline" + } + } + } + } + } + } + - length: { docs: 1 } + - match: { docs.0.doc._index: "foo-1" } + - match: { docs.0.doc._source.other: "other" } + - match: { docs.0.doc._source.bar: true } + - match: { docs.0.doc._source.foo: "FOO" } + - match: { docs.0.doc.executed_pipelines: ["bar-pipeline"] } + - not_exists: docs.0.doc.error + +--- +"Test ingest simulate with template substitutions for component templates removing pipelines": + + - skip: + features: + - headers + - allowed_warnings + + - requires: + cluster_features: ["simulate.component.template.substitutions"] + reason: "ingest simulate component template substitutions added in 8.16" + + - do: + headers: + Content-Type: application/json + ingest.put_pipeline: + id: "foo-pipeline" + body: > + { + "processors": [ + { + "set": { + "field": "foo", + "value": true + } + } + ] + } + - match: { acknowledged: true } + + - do: + cluster.put_component_template: + name: settings_template + body: + template: + settings: + index: + default_pipeline: "foo_pipeline" + + - do: + allowed_warnings: + - "index template [test-composable-1] has index patterns [tsdb_templated_*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [test-composable-1] will take precedence during new index creation" + indices.put_index_template: + name: test-composable-1 + body: + index_patterns: + - foo* + composed_of: + - settings_template + + - do: + headers: + Content-Type: application/json + simulate.ingest: + index: foo-1 + body: > + { + "docs": [ + { + "_id": "asdf", + "_source": { + "foo": "FOO" + } + } + ], + "component_template_substitutions": { + "settings_template": { + "template": { + "settings": { + "index": { + "default_pipeline": null + } + } + } + } + } + } + - length: { docs: 1 } + - match: { docs.0.doc._index: "foo-1" } + - match: { docs.0.doc._source.foo: "FOO" } + - match: { docs.0.doc.executed_pipelines: [] } + - not_exists: docs.0.doc.error + + - do: + indices.create: + index: foo-1 + - match: { acknowledged: true } + + - do: + headers: + Content-Type: application/json + simulate.ingest: + index: foo-1 + body: > + { + "docs": [ + { + "_id": "asdf", + "_source": { + "foo": "FOO" + } + } + ], + "component_template_substitutions": { + "settings_template": { + "template": { + "settings": { + "index": { + "default_pipeline": null + } + } + } + } + } + } + - length: { docs: 1 } + - match: { docs.0.doc._index: "foo-1" } + - match: { docs.0.doc._source.foo: "FOO" } + - match: { docs.0.doc.executed_pipelines: [] } + - not_exists: docs.0.doc.error diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkFeatures.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkFeatures.java index 8299d53da17aa..af1782ac1ade3 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkFeatures.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkFeatures.java @@ -14,11 +14,12 @@ import java.util.Set; +import static org.elasticsearch.action.bulk.TransportSimulateBulkAction.SIMULATE_COMPONENT_TEMPLATE_SUBSTITUTIONS; import static org.elasticsearch.action.bulk.TransportSimulateBulkAction.SIMULATE_MAPPING_VALIDATION; import static org.elasticsearch.action.bulk.TransportSimulateBulkAction.SIMULATE_MAPPING_VALIDATION_TEMPLATES; public class BulkFeatures implements FeatureSpecification { public Set getFeatures() { - return Set.of(SIMULATE_MAPPING_VALIDATION, SIMULATE_MAPPING_VALIDATION_TEMPLATES); + return Set.of(SIMULATE_MAPPING_VALIDATION, SIMULATE_MAPPING_VALIDATION_TEMPLATES, SIMULATE_COMPONENT_TEMPLATE_SUBSTITUTIONS); } } diff --git a/server/src/main/java/org/elasticsearch/action/bulk/TransportSimulateBulkAction.java b/server/src/main/java/org/elasticsearch/action/bulk/TransportSimulateBulkAction.java index 0ea763c215959..c860c49809cb5 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/TransportSimulateBulkAction.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/TransportSimulateBulkAction.java @@ -69,6 +69,9 @@ public class TransportSimulateBulkAction extends TransportAbstractBulkAction { public static final NodeFeature SIMULATE_MAPPING_VALIDATION = new NodeFeature("simulate.mapping.validation"); public static final NodeFeature SIMULATE_MAPPING_VALIDATION_TEMPLATES = new NodeFeature("simulate.mapping.validation.templates"); + public static final NodeFeature SIMULATE_COMPONENT_TEMPLATE_SUBSTITUTIONS = new NodeFeature( + "simulate.component.template.substitutions" + ); private final IndicesService indicesService; private final NamedXContentRegistry xContentRegistry; private final Set indexSettingProviders; From 57915388f50be333060250a33a02ec2bf7642f6a Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Wed, 25 Sep 2024 16:59:33 -0700 Subject: [PATCH 57/58] Fix synthetic source for flattened field when used with ignore_above (#113499) (#113568) (cherry picked from commit 35fbbec46abb2c6e9d84ffce063a2775b02bdfcc) # Conflicts: # rest-api-spec/build.gradle --- docs/changelog/113499.yaml | 6 + .../test/get/100_synthetic_source.yml | 57 +++++++ .../540_ignore_above_synthetic_source.yml | 8 +- .../index/mapper/MapperFeatures.java | 3 +- .../flattened/FlattenedFieldMapper.java | 10 +- .../flattened/FlattenedFieldParser.java | 79 +++------- .../FlattenedFieldSyntheticWriterHelper.java | 17 ++- ...ortedSetDocValuesSyntheticFieldLoader.java | 142 ++++++++++++++++-- .../flattened/FlattenedFieldMapperTests.java | 131 +++++++++++++++- .../flattened/FlattenedFieldParserTests.java | 14 +- ...ttenedFieldSyntheticWriterHelperTests.java | 31 +++- 11 files changed, 396 insertions(+), 102 deletions(-) create mode 100644 docs/changelog/113499.yaml diff --git a/docs/changelog/113499.yaml b/docs/changelog/113499.yaml new file mode 100644 index 0000000000000..a4d7f28eb0de4 --- /dev/null +++ b/docs/changelog/113499.yaml @@ -0,0 +1,6 @@ +pr: 113499 +summary: Fix synthetic source for flattened field when used with `ignore_above` +area: Logs +type: bug +issues: + - 112044 diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml index f1e296ed8e304..a7600da575cd3 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml @@ -1050,6 +1050,63 @@ flattened field with ignore_above: - is_false: fields + +--- +flattened field with ignore_above and arrays: + - requires: + cluster_features: ["mapper.flattened.ignore_above_with_arrays_support"] + reason: requires support of ignore_above synthetic source with arrays + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + field: + type: flattened + ignore_above: 10 + + - do: + index: + index: test + id: 1 + body: | + { + "field": [ + { "key1": { "key2": "key2", "key3": "key3_ignored" }, "key4": "key4_ignored", "key5": { "key6": "key6_ignored" }, "key7": "key7" }, + { "key1": { "key2": "key12", "key13": "key13_ignored" }, "key4": "key14_ignored", "key15": { "key16": "key16_ignored" }, "key17": [ "key17", "key18" ] } + ] + } + + - do: + get: + index: test + id: 1 + + - match: { _index: "test" } + - match: { _id: "1" } + - match: { _version: 1 } + - match: { found: true } + - match: + _source: + field: + key1: + key2: [ "key12", "key2" ] + key3: "key3_ignored" + key13: "key13_ignored" + key4: [ "key14_ignored", "key4_ignored" ] + key5: + key6: "key6_ignored" + key7: "key7" + key15: + key16: "key16_ignored" + key17: [ "key17", "key18" ] + + - is_false: fields + --- completion: - requires: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml index defdc8467bf8d..11259d3e1bfd1 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml @@ -44,8 +44,8 @@ ignore_above mapping level setting: --- ignore_above mapping level setting on arrays: - requires: - cluster_features: [ "mapper.ignore_above_index_level_setting" ] - reason: introduce ignore_above index level setting + cluster_features: [ "mapper.flattened.ignore_above_with_arrays_support" ] + reason: requires support of ignore_above with arrays for flattened fields - do: indices.create: index: test @@ -80,9 +80,9 @@ ignore_above mapping level setting on arrays: match_all: {} - length: { hits.hits: 1 } - #TODO: synthetic source field reconstruction bug (TBD: add link to the issue here) + #TODO: synthetic source field reconstruction bug (TBD: add link to the issue here) #- match: { hits.hits.0._source.keyword: ["foo bar", "the quick brown fox"] } - - match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over"] } + - match: { hits.hits.0._source.flattened.value: [ "jumps over", "the quick brown fox" ] } - match: { hits.hits.0.fields.keyword.0: "foo bar" } - match: { hits.hits.0.fields.flattened.0.value: "jumps over" } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index ac7d10abc7121..2f665fd5d1e6a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -44,7 +44,8 @@ public Set getFeatures() { FlattenedFieldMapper.IGNORE_ABOVE_SUPPORT, IndexSettings.IGNORE_ABOVE_INDEX_LEVEL_SETTING, SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX, - TimeSeriesRoutingHashFieldMapper.TS_ROUTING_HASH_FIELD_PARSES_BYTES_REF + TimeSeriesRoutingHashFieldMapper.TS_ROUTING_HASH_FIELD_PARSES_BYTES_REF, + FlattenedFieldMapper.IGNORE_ABOVE_WITH_ARRAYS_SUPPORT ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java index 9ea52752ec679..f4b9fb2971389 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java @@ -112,9 +112,11 @@ public final class FlattenedFieldMapper extends FieldMapper { public static final NodeFeature IGNORE_ABOVE_SUPPORT = new NodeFeature("flattened.ignore_above_support"); + public static final NodeFeature IGNORE_ABOVE_WITH_ARRAYS_SUPPORT = new NodeFeature("mapper.flattened.ignore_above_with_arrays_support"); public static final String CONTENT_TYPE = "flattened"; public static final String KEYED_FIELD_SUFFIX = "._keyed"; + public static final String KEYED_IGNORED_VALUES_FIELD_SUFFIX = "._keyed._ignored"; public static final String TIME_SERIES_DIMENSIONS_ARRAY_PARAM = "time_series_dimensions"; private static class Defaults { @@ -835,6 +837,7 @@ private FlattenedFieldMapper( this.fieldParser = new FlattenedFieldParser( mappedFieldType.name(), mappedFieldType.name() + KEYED_FIELD_SUFFIX, + mappedFieldType.name() + KEYED_IGNORED_VALUES_FIELD_SUFFIX, mappedFieldType, builder.depthLimit.get(), builder.ignoreAbove.get(), @@ -903,7 +906,12 @@ public FieldMapper.Builder getMergeBuilder() { @Override protected SyntheticSourceSupport syntheticSourceSupport() { if (fieldType().hasDocValues()) { - var loader = new FlattenedSortedSetDocValuesSyntheticFieldLoader(fullPath(), fullPath() + "._keyed", leafName()); + var loader = new FlattenedSortedSetDocValuesSyntheticFieldLoader( + fullPath(), + fullPath() + KEYED_FIELD_SUFFIX, + ignoreAbove() < Integer.MAX_VALUE ? fullPath() + KEYED_IGNORED_VALUES_FIELD_SUFFIX : null, + leafName() + ); return new SyntheticSourceSupport.Native(loader); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java index 2291d8266ed8b..351e3149da3df 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java @@ -11,6 +11,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexableField; @@ -18,11 +19,7 @@ import org.elasticsearch.common.xcontent.XContentParserUtils; import org.elasticsearch.index.mapper.ContentPath; import org.elasticsearch.index.mapper.DocumentParserContext; -import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.XContentDataHelper; -import org.elasticsearch.xcontent.CopyingXContentParser; -import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; @@ -39,6 +36,7 @@ class FlattenedFieldParser { private final String rootFieldFullPath; private final String keyedFieldFullPath; + private final String keyedIgnoredValuesFieldFullPath; private final MappedFieldType fieldType; private final int depthLimit; @@ -48,6 +46,7 @@ class FlattenedFieldParser { FlattenedFieldParser( String rootFieldFullPath, String keyedFieldFullPath, + String keyedIgnoredValuesFieldFullPath, MappedFieldType fieldType, int depthLimit, int ignoreAbove, @@ -55,6 +54,7 @@ class FlattenedFieldParser { ) { this.rootFieldFullPath = rootFieldFullPath; this.keyedFieldFullPath = keyedFieldFullPath; + this.keyedIgnoredValuesFieldFullPath = keyedIgnoredValuesFieldFullPath; this.fieldType = fieldType; this.depthLimit = depthLimit; this.ignoreAbove = ignoreAbove; @@ -65,36 +65,18 @@ public List parse(final DocumentParserContext documentParserCont XContentParser parser = documentParserContext.parser(); XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); - XContentBuilder rawDataForSyntheticSource = null; - if (documentParserContext.canAddIgnoredField() && ignoreAbove < Integer.MAX_VALUE) { - var copyingParser = new CopyingXContentParser(parser); - rawDataForSyntheticSource = copyingParser.getBuilder(); - parser = copyingParser; - } - ContentPath path = new ContentPath(); List fields = new ArrayList<>(); var context = new Context(parser, documentParserContext); parseObject(context, path, fields); - if (rawDataForSyntheticSource != null && context.isIgnoredValueEncountered()) { - // One or more inner fields are ignored due to `ignore_above`. - // Because of that we will store whole object as is in order to generate synthetic source. - documentParserContext.addIgnoredField( - IgnoredSourceFieldMapper.NameValue.fromContext( - documentParserContext, - rootFieldFullPath, - XContentDataHelper.encodeXContentBuilder(rawDataForSyntheticSource) - ) - ); - } return fields; } private void parseObject(Context context, ContentPath path, List fields) throws IOException { String currentName = null; - XContentParser parser = context.getParser(); + XContentParser parser = context.parser(); while (true) { XContentParser.Token token = parser.nextToken(); if (token == XContentParser.Token.END_OBJECT) { @@ -111,7 +93,7 @@ private void parseObject(Context context, ContentPath path, List } private void parseArray(Context context, ContentPath path, String currentName, List fields) throws IOException { - XContentParser parser = context.getParser(); + XContentParser parser = context.parser(); while (true) { XContentParser.Token token = parser.nextToken(); if (token == XContentParser.Token.END_ARRAY) { @@ -128,7 +110,7 @@ private void parseFieldValue( String currentName, List fields ) throws IOException { - XContentParser parser = context.getParser(); + XContentParser parser = context.parser(); if (token == XContentParser.Token.START_OBJECT) { path.add(currentName); validateDepthLimit(path); @@ -151,19 +133,23 @@ private void parseFieldValue( } private void addField(Context context, ContentPath path, String currentName, String value, List fields) { - if (value.length() > ignoreAbove) { - context.onIgnoredValue(); - return; - } - String key = path.pathAsText(currentName); if (key.contains(SEPARATOR)) { throw new IllegalArgumentException( "Keys in [flattened] fields cannot contain the reserved character \\0. Offending key: [" + key + "]." ); } + String keyedValue = createKeyedValue(key, value); BytesRef bytesKeyedValue = new BytesRef(keyedValue); + + if (value.length() > ignoreAbove) { + if (context.documentParserContext().mappingLookup().isSourceSynthetic()) { + fields.add(new StoredField(keyedIgnoredValuesFieldFullPath, bytesKeyedValue)); + } + return; + } + // check the keyed value doesn't exceed the IndexWriter.MAX_TERM_LENGTH limit enforced by Lucene at index time // in that case we can already throw a more user friendly exception here which includes the offending fields key and value lengths if (bytesKeyedValue.length > IndexWriter.MAX_TERM_LENGTH) { @@ -198,10 +184,10 @@ private void addField(Context context, ContentPath path, String currentName, Str final String keyedFieldName = FlattenedFieldParser.extractKey(bytesKeyedValue).utf8ToString(); if (fieldType.isDimension() && fieldType.dimensions().contains(keyedFieldName)) { final BytesRef keyedFieldValue = FlattenedFieldParser.extractValue(bytesKeyedValue); - context.getDocumentParserContext() + context.documentParserContext() .getDimensions() .addString(rootFieldFullPath + "." + keyedFieldName, keyedFieldValue) - .validate(context.getDocumentParserContext().indexSettings()); + .validate(context.documentParserContext().indexSettings()); } } } @@ -239,32 +225,5 @@ static BytesRef extractValue(BytesRef keyedValue) { return new BytesRef(keyedValue.bytes, valueStart, keyedValue.length - valueStart); } - private static class Context { - private final XContentParser parser; - private final DocumentParserContext documentParserContext; - - private boolean ignoredValueEncountered; - - private Context(XContentParser parser, DocumentParserContext documentParserContext) { - this.parser = parser; - this.documentParserContext = documentParserContext; - this.ignoredValueEncountered = false; - } - - public XContentParser getParser() { - return parser; - } - - public DocumentParserContext getDocumentParserContext() { - return documentParserContext; - } - - public void onIgnoredValue() { - this.ignoredValueEncountered = true; - } - - public boolean isIgnoredValueEncountered() { - return ignoredValueEncountered; - } - } + private record Context(XContentParser parser, DocumentParserContext documentParserContext) {} } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelper.java b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelper.java index de578d724d98c..950fef95772fb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelper.java @@ -9,7 +9,6 @@ package org.elasticsearch.index.mapper.flattened; -import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; import org.elasticsearch.xcontent.XContentBuilder; @@ -226,19 +225,23 @@ public boolean equals(Object obj) { } } - private final SortedSetDocValues dv; + interface SortedKeyedValues { + BytesRef next() throws IOException; + } + + private final SortedKeyedValues sortedKeyedValues; - FlattenedFieldSyntheticWriterHelper(final SortedSetDocValues dv) { - this.dv = dv; + FlattenedFieldSyntheticWriterHelper(final SortedKeyedValues sortedKeyedValues) { + this.sortedKeyedValues = sortedKeyedValues; } void write(final XContentBuilder b) throws IOException { - KeyValue curr = new KeyValue(dv.lookupOrd(dv.nextOrd())); + KeyValue curr = new KeyValue(sortedKeyedValues.next()); KeyValue prev = KeyValue.EMPTY; final List values = new ArrayList<>(); values.add(curr.value()); - for (int i = 1; i < dv.docValueCount(); i++) { - KeyValue next = new KeyValue(dv.lookupOrd(dv.nextOrd())); + for (BytesRef nextValue = sortedKeyedValues.next(); nextValue != null; nextValue = sortedKeyedValues.next()) { + KeyValue next = new KeyValue(nextValue); writeObject(b, curr, next, curr.start(prev), curr.end(next), values); values.add(next.value()); prev = curr; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedSortedSetDocValuesSyntheticFieldLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedSortedSetDocValuesSyntheticFieldLoader.java index 482273d137621..f957d7ce01902 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedSortedSetDocValuesSyntheticFieldLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedSortedSetDocValuesSyntheticFieldLoader.java @@ -13,27 +13,44 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.mapper.SourceLoader; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeSet; +import java.util.stream.Stream; -public class FlattenedSortedSetDocValuesSyntheticFieldLoader extends SourceLoader.DocValuesBasedSyntheticFieldLoader { - private DocValuesFieldValues docValues = NO_VALUES; +class FlattenedSortedSetDocValuesSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader { private final String fieldFullPath; private final String keyedFieldFullPath; + private final String keyedIgnoredValuesFieldFullPath; private final String leafName; + private DocValuesFieldValues docValues = NO_VALUES; + private List ignoredValues = List.of(); + /** * Build a loader for flattened fields from doc values. * - * @param fieldFullPath full path to the original field - * @param keyedFieldFullPath full path to the keyed field to load doc values from - * @param leafName the name of the leaf field to use in the rendered {@code _source} + * @param fieldFullPath full path to the original field + * @param keyedFieldFullPath full path to the keyed field to load doc values from + * @param keyedIgnoredValuesFieldFullPath full path to the keyed field that stores values that are not present in doc values + * due to ignore_above + * @param leafName the name of the leaf field to use in the rendered {@code _source} */ - public FlattenedSortedSetDocValuesSyntheticFieldLoader(String fieldFullPath, String keyedFieldFullPath, String leafName) { + FlattenedSortedSetDocValuesSyntheticFieldLoader( + String fieldFullPath, + String keyedFieldFullPath, + @Nullable String keyedIgnoredValuesFieldFullPath, + String leafName + ) { this.fieldFullPath = fieldFullPath; this.keyedFieldFullPath = keyedFieldFullPath; + this.keyedIgnoredValuesFieldFullPath = keyedIgnoredValuesFieldFullPath; this.leafName = leafName; } @@ -42,6 +59,18 @@ public String fieldName() { return fieldFullPath; } + @Override + public Stream> storedFieldLoaders() { + if (keyedIgnoredValuesFieldFullPath == null) { + return Stream.empty(); + } + + return Stream.of(Map.entry(keyedIgnoredValuesFieldFullPath, (values) -> { + ignoredValues = new ArrayList<>(); + ignoredValues.addAll(values); + })); + } + @Override public DocValuesLoader docValuesLoader(LeafReader reader, int[] docIdsInLeaf) throws IOException { final SortedSetDocValues dv = DocValues.getSortedSet(reader, keyedFieldFullPath); @@ -56,23 +85,40 @@ public DocValuesLoader docValuesLoader(LeafReader reader, int[] docIdsInLeaf) th @Override public boolean hasValue() { - return docValues.count() > 0; + return docValues.count() > 0 || ignoredValues.isEmpty() == false; } @Override public void write(XContentBuilder b) throws IOException { - if (docValues.count() == 0) { + if (docValues.count() == 0 && ignoredValues.isEmpty()) { return; } + + FlattenedFieldSyntheticWriterHelper.SortedKeyedValues sortedKeyedValues = new DocValuesSortedKeyedValues(docValues); + if (ignoredValues.isEmpty() == false) { + var ignoredValuesSet = new TreeSet(); + for (Object value : ignoredValues) { + ignoredValuesSet.add((BytesRef) value); + } + ignoredValues = List.of(); + sortedKeyedValues = new DocValuesWithIgnoredSortedKeyedValues(sortedKeyedValues, ignoredValuesSet); + } + var writer = new FlattenedFieldSyntheticWriterHelper(sortedKeyedValues); + b.startObject(leafName); - docValues.write(b); + writer.write(b); b.endObject(); } + @Override + public void reset() { + ignoredValues = List.of(); + } + private interface DocValuesFieldValues { int count(); - void write(XContentBuilder b) throws IOException; + SortedSetDocValues getValues(); } private static final DocValuesFieldValues NO_VALUES = new DocValuesFieldValues() { @@ -82,7 +128,9 @@ public int count() { } @Override - public void write(XContentBuilder b) {} + public SortedSetDocValues getValues() { + return null; + } }; /** @@ -92,11 +140,9 @@ public void write(XContentBuilder b) {} private static class FlattenedFieldDocValuesLoader implements DocValuesLoader, DocValuesFieldValues { private final SortedSetDocValues dv; private boolean hasValue; - private final FlattenedFieldSyntheticWriterHelper writer; FlattenedFieldDocValuesLoader(final SortedSetDocValues dv) { this.dv = dv; - this.writer = new FlattenedFieldSyntheticWriterHelper(dv); } @Override @@ -110,8 +156,74 @@ public int count() { } @Override - public void write(XContentBuilder b) throws IOException { - this.writer.write(b); + public SortedSetDocValues getValues() { + return dv; + } + } + + private static class DocValuesWithIgnoredSortedKeyedValues implements FlattenedFieldSyntheticWriterHelper.SortedKeyedValues { + private final FlattenedFieldSyntheticWriterHelper.SortedKeyedValues docValues; + private final TreeSet ignoredValues; + + private BytesRef currentFromDocValues; + + private DocValuesWithIgnoredSortedKeyedValues( + FlattenedFieldSyntheticWriterHelper.SortedKeyedValues docValues, + TreeSet ignoredValues + ) { + this.docValues = docValues; + this.ignoredValues = ignoredValues; + } + + /** + * Returns next keyed field value to be included in synthetic source. + * This function merges keyed values from doc values and ignored values (due to ignore_above) + * that are loaded from stored fields and provided as input. + * Sort order of keyed values is preserved during merge so the output is the same as if + * it was using only doc values. + * @return + * @throws IOException + */ + @Override + public BytesRef next() throws IOException { + if (currentFromDocValues == null) { + currentFromDocValues = docValues.next(); + } + + if (ignoredValues.isEmpty() == false) { + BytesRef ignoredCandidate = ignoredValues.first(); + if (currentFromDocValues == null || ignoredCandidate.compareTo(currentFromDocValues) <= 0) { + ignoredValues.pollFirst(); + return ignoredCandidate; + } + } + if (currentFromDocValues == null) { + return null; + } + + var toReturn = currentFromDocValues; + currentFromDocValues = null; + return toReturn; + } + } + + private static class DocValuesSortedKeyedValues implements FlattenedFieldSyntheticWriterHelper.SortedKeyedValues { + private final DocValuesFieldValues docValues; + private int seen = 0; + + private DocValuesSortedKeyedValues(DocValuesFieldValues docValues) { + this.docValues = docValues; + } + + @Override + public BytesRef next() throws IOException { + if (seen < docValues.count()) { + seen += 1; + var sortedSetDocValues = docValues.getValues(); + return sortedSetDocValues.lookupOrd(sortedSetDocValues.nextOrd()); + } + + return null; } } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java index 285431b881add..5aca2357092e4 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java @@ -9,8 +9,11 @@ package org.elasticsearch.index.mapper.flattened; +import org.apache.lucene.document.Document; import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.StoredFields; import org.apache.lucene.util.BytesRef; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.bytes.BytesArray; @@ -41,7 +44,10 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -712,7 +718,7 @@ protected IngestScriptSupport ingestScriptSupport() { throw new AssumptionViolatedException("not supported"); } - private static void randomMapExample(final TreeMap example, int depth, int maxDepth) { + private static void randomMapExample(final Map example, int depth, int maxDepth) { for (int i = 0; i < randomIntBetween(2, 5); i++) { int j = depth >= maxDepth ? randomIntBetween(1, 2) : randomIntBetween(1, 3); switch (j) { @@ -728,7 +734,7 @@ private static void randomMapExample(final TreeMap example, int example.put(randomAlphaOfLength(6), randomList); } case 3 -> { - final TreeMap nested = new TreeMap<>(); + final Map nested = new HashMap<>(); randomMapExample(nested, depth + 1, maxDepth); example.put(randomAlphaOfLength(10), nested); } @@ -742,11 +748,73 @@ private static class FlattenedFieldSyntheticSourceSupport implements SyntheticSo @Override public SyntheticSourceExample example(int maxValues) throws IOException { - // NOTE: values must be keywords and we use a TreeMap to preserve order (doc values are sorted and the result - // is created with keys and nested keys in sorted order). - final TreeMap map = new TreeMap<>(); - randomMapExample(map, 0, maxValues); - return new SyntheticSourceExample(map, map, this::mapping); + if (randomBoolean()) { + // Create a singleton value + var value = randomObject(); + return new SyntheticSourceExample(value, mergeIntoExpectedMap(List.of(value)), this::mapping); + } + + // Create an array of flattened field values + var values = new ArrayList>(); + for (int i = 0; i < maxValues; i++) { + values.add(randomObject()); + } + var merged = mergeIntoExpectedMap(values); + + return new SyntheticSourceExample(values, merged, this::mapping); + } + + private Map randomObject() { + var maxDepth = randomIntBetween(1, 3); + + final Map map = new HashMap<>(); + randomMapExample(map, 0, maxDepth); + + return map; + } + + // Since arrays are moved to leafs in synthetic source, the result is not an array of objects + // but one big object containing merged values from all input objects. + // This function performs that transformation. + private Map mergeIntoExpectedMap(List> inputValues) { + // Fields are sorted since they come (mostly) from doc_values. + var result = new TreeMap(); + doMerge(inputValues, result); + return result; + } + + @SuppressWarnings("unchecked") + private void doMerge(List> inputValues, TreeMap result) { + for (var iv : inputValues) { + for (var field : iv.entrySet()) { + if (field.getValue() instanceof Map inputNestedMap) { + var intermediateResultMap = result.get(field.getKey()); + if (intermediateResultMap == null) { + var map = new TreeMap(); + + result.put(field.getKey(), map); + doMerge(List.of((Map) inputNestedMap), map); + } else if (intermediateResultMap instanceof Map m) { + doMerge(List.of((Map) inputNestedMap), (TreeMap) m); + } else { + throw new IllegalStateException("Conflicting entries in merged map"); + } + } else { + var valueAtCurrentLevel = result.get(field.getKey()); + if (valueAtCurrentLevel == null) { + result.put(field.getKey(), field.getValue()); + } else if (valueAtCurrentLevel instanceof List) { + ((List) valueAtCurrentLevel).add(field.getValue()); + } else { + var list = new ArrayList<>(); + list.add(valueAtCurrentLevel); + list.add(field.getValue()); + + result.put(field.getKey(), list); + } + } + } + } } @Override @@ -762,8 +830,57 @@ private void mapping(XContentBuilder b) throws IOException { } } + public void testSyntheticSourceWithOnlyIgnoredValues() throws IOException { + DocumentMapper mapper = createDocumentMapper(syntheticSourceMapping(b -> { + b.startObject("field").field("type", "flattened").field("ignore_above", 1).endObject(); + })); + + var syntheticSource = syntheticSource(mapper, b -> { + b.startObject("field"); + { + b.field("key1", "val1"); + b.startObject("obj1"); + { + b.field("key2", "val2"); + b.field("key3", List.of("val3", "val4")); + } + b.endObject(); + } + b.endObject(); + }); + assertThat(syntheticSource, equalTo("{\"field\":{\"key1\":\"val1\",\"obj1\":{\"key2\":\"val2\",\"key3\":[\"val3\",\"val4\"]}}}")); + } + @Override protected boolean supportsCopyTo() { return false; } + + @Override + public void assertStoredFieldsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException { + assert leftReader.maxDoc() == rightReader.maxDoc(); + StoredFields leftStoredFields = leftReader.storedFields(); + StoredFields rightStoredFields = rightReader.storedFields(); + for (int i = 0; i < leftReader.maxDoc(); i++) { + Document leftDoc = leftStoredFields.document(i); + Document rightDoc = rightStoredFields.document(i); + + // Everything is from LuceneTestCase except this part. + // LuceneTestCase sorts by name of the field only which results in a difference + // between keyed ignored field values that have the same name. + Comparator comp = Comparator.comparing(IndexableField::name).thenComparing(IndexableField::binaryValue); + List leftFields = new ArrayList<>(leftDoc.getFields()); + List rightFields = new ArrayList<>(rightDoc.getFields()); + Collections.sort(leftFields, comp); + Collections.sort(rightFields, comp); + + Iterator leftIterator = leftFields.iterator(); + Iterator rightIterator = rightFields.iterator(); + while (leftIterator.hasNext()) { + assertTrue(info, rightIterator.hasNext()); + assertStoredFieldEquals(info, leftIterator.next(), rightIterator.next()); + } + assertFalse(info, rightIterator.hasNext()); + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParserTests.java index 736c877eff6c5..68be241ca1885 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParserTests.java @@ -33,7 +33,15 @@ public class FlattenedFieldParserTests extends ESTestCase { @Before public void setUp() throws Exception { super.setUp(); - parser = new FlattenedFieldParser("field", "field._keyed", new FakeFieldType("field"), Integer.MAX_VALUE, Integer.MAX_VALUE, null); + parser = new FlattenedFieldParser( + "field", + "field._keyed", + "field._keyed._ignored", + new FakeFieldType("field"), + Integer.MAX_VALUE, + Integer.MAX_VALUE, + null + ); } public void testTextValues() throws Exception { @@ -283,6 +291,7 @@ public void testDepthLimit() throws Exception { FlattenedFieldParser configuredParser = new FlattenedFieldParser( "field", "field._keyed", + "field._keyed._ignored", new FakeFieldType("field"), 2, Integer.MAX_VALUE, @@ -306,6 +315,7 @@ public void testDepthLimitBoundary() throws Exception { FlattenedFieldParser configuredParser = new FlattenedFieldParser( "field", "field._keyed", + "field._keyed._ignored", new FakeFieldType("field"), 3, Integer.MAX_VALUE, @@ -323,6 +333,7 @@ public void testIgnoreAbove() throws Exception { FlattenedFieldParser configuredParser = new FlattenedFieldParser( "field", "field._keyed", + "field._keyed._ignored", new FakeFieldType("field"), Integer.MAX_VALUE, 10, @@ -345,6 +356,7 @@ public void testNullValues() throws Exception { FlattenedFieldParser configuredParser = new FlattenedFieldParser( "field", "field._keyed", + "field._keyed._ignored", fieldType, Integer.MAX_VALUE, Integer.MAX_VALUE, diff --git a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelperTests.java index c04766ad57112..71e31d1ff371d 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldSyntheticWriterHelperTests.java @@ -35,7 +35,7 @@ public void testSingleField() throws IOException { byte[] bytes = ("test" + '\0' + "one").getBytes(StandardCharsets.UTF_8); when(dv.nextOrd()).thenReturn(0L); when(dv.lookupOrd(0L)).thenReturn(new BytesRef(bytes, 0, bytes.length)); - FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(dv); + FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv)); ByteArrayOutputStream baos = new ByteArrayOutputStream(); XContentBuilder b = new XContentBuilder(XContentType.JSON.xContent(), baos); @@ -52,7 +52,7 @@ public void testSingleField() throws IOException { public void testFlatObject() throws IOException { // GIVEN final SortedSetDocValues dv = mock(SortedSetDocValues.class); - final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(dv); + final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv)); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos); final List bytes = List.of("a" + '\0' + "value_a", "b" + '\0' + "value_b", "c" + '\0' + "value_c", "d" + '\0' + "value_d") @@ -79,7 +79,7 @@ public void testFlatObject() throws IOException { public void testSingleObject() throws IOException { // GIVEN final SortedSetDocValues dv = mock(SortedSetDocValues.class); - final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(dv); + final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv)); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos); final List bytes = List.of( @@ -111,7 +111,7 @@ public void testSingleObject() throws IOException { public void testMultipleObjects() throws IOException { // GIVEN final SortedSetDocValues dv = mock(SortedSetDocValues.class); - final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(dv); + final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv)); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos); final List bytes = List.of("a.x" + '\0' + "10", "a.y" + '\0' + "20", "b.a" + '\0' + "30", "b.c" + '\0' + "40") @@ -138,7 +138,7 @@ public void testMultipleObjects() throws IOException { public void testSingleArray() throws IOException { // GIVEN final SortedSetDocValues dv = mock(SortedSetDocValues.class); - final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(dv); + final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv)); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos); final List bytes = List.of("a.x" + '\0' + "10", "a.x" + '\0' + "20", "a.x" + '\0' + "30", "a.x" + '\0' + "40") @@ -165,7 +165,7 @@ public void testSingleArray() throws IOException { public void testMultipleArrays() throws IOException { // GIVEN final SortedSetDocValues dv = mock(SortedSetDocValues.class); - final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(dv); + final FlattenedFieldSyntheticWriterHelper writer = new FlattenedFieldSyntheticWriterHelper(new SortedSetSortedKeyedValues(dv)); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final XContentBuilder builder = new XContentBuilder(XContentType.JSON.xContent(), baos); final List bytes = List.of( @@ -191,4 +191,23 @@ public void testMultipleArrays() throws IOException { // THEN assertEquals("{\"a\":{\"x\":[\"10\",\"20\"]},\"b\":{\"y\":[\"30\",\"40\",\"50\"]}}", baos.toString(StandardCharsets.UTF_8)); } + + private class SortedSetSortedKeyedValues implements FlattenedFieldSyntheticWriterHelper.SortedKeyedValues { + private final SortedSetDocValues dv; + private int seen = 0; + + private SortedSetSortedKeyedValues(SortedSetDocValues dv) { + this.dv = dv; + } + + @Override + public BytesRef next() throws IOException { + if (seen < dv.docValueCount()) { + seen += 1; + return dv.lookupOrd(dv.nextOrd()); + } + + return null; + } + } } From 774a97fac794ba665cdff1a3485f0c09a0e9291d Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Wed, 25 Sep 2024 18:42:09 -0600 Subject: [PATCH 58/58] Improve DateTime error handling and add some bad date tests (#112723) (#113569) * Improve DateTime error handling and add some bad date tests --- docs/changelog/112723.yaml | 6 +++ .../common/time/JavaDateMathParser.java | 4 +- .../index/mapper/DateFieldTypeTests.java | 41 +++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 docs/changelog/112723.yaml diff --git a/docs/changelog/112723.yaml b/docs/changelog/112723.yaml new file mode 100644 index 0000000000000..dbee3232d1c75 --- /dev/null +++ b/docs/changelog/112723.yaml @@ -0,0 +1,6 @@ +pr: 112723 +summary: Improve DateTime error handling and add some bad date tests +area: Search +type: bug +issues: + - 112190 diff --git a/server/src/main/java/org/elasticsearch/common/time/JavaDateMathParser.java b/server/src/main/java/org/elasticsearch/common/time/JavaDateMathParser.java index b5eb2efe2e06c..0ee0b34da3a5c 100644 --- a/server/src/main/java/org/elasticsearch/common/time/JavaDateMathParser.java +++ b/server/src/main/java/org/elasticsearch/common/time/JavaDateMathParser.java @@ -12,13 +12,13 @@ import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.Strings; +import java.time.DateTimeException; import java.time.DayOfWeek; import java.time.Instant; import java.time.LocalTime; import java.time.ZoneId; import java.time.ZoneOffset; import java.time.ZonedDateTime; -import java.time.format.DateTimeParseException; import java.time.temporal.ChronoField; import java.time.temporal.TemporalAccessor; import java.time.temporal.TemporalAdjusters; @@ -220,7 +220,7 @@ private Instant parseDateTime(String value, ZoneId timeZone, boolean roundUpIfNo return DateFormatters.from(accessor).withZoneSameLocal(timeZone).toInstant(); } - } catch (IllegalArgumentException | DateTimeParseException e) { + } catch (IllegalArgumentException | DateTimeException e) { throw new ElasticsearchParseException( "failed to parse date field [{}] with format [{}]: [{}]", e, diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java index f22681138378f..d925a9dd1d691 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java @@ -22,6 +22,7 @@ import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; +import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.time.DateFormatter; @@ -143,6 +144,46 @@ private void doTestIsFieldWithinQuery(DateFieldType ft, DirectoryReader reader, assertEquals(Relation.INTERSECTS, ft.isFieldWithinQuery(reader, "2015-10-12", "2016-04-03", false, false, zone, null, context)); assertEquals(Relation.INTERSECTS, ft.isFieldWithinQuery(reader, "2015-10-12", "2016-04-03", false, true, zone, null, context)); assertEquals(Relation.INTERSECTS, ft.isFieldWithinQuery(reader, "2015-10-12", "2016-04-03", true, false, zone, null, context)); + // Bad dates + assertThrows( + ElasticsearchParseException.class, + () -> ft.isFieldWithinQuery(reader, "2015-00-01", "2016-04-03", randomBoolean(), randomBoolean(), zone, null, context) + ); + assertThrows( + ElasticsearchParseException.class, + () -> ft.isFieldWithinQuery(reader, "2015-01-01", "2016-04-00", randomBoolean(), randomBoolean(), zone, null, context) + ); + assertThrows( + ElasticsearchParseException.class, + () -> ft.isFieldWithinQuery(reader, "2015-22-01", "2016-04-00", randomBoolean(), randomBoolean(), zone, null, context) + ); + assertThrows( + ElasticsearchParseException.class, + () -> ft.isFieldWithinQuery(reader, "2015-01-01", "2016-04-45", randomBoolean(), randomBoolean(), zone, null, context) + ); + assertThrows( + ElasticsearchParseException.class, + () -> ft.isFieldWithinQuery(reader, "2015-01-01", "2016-04-01T25:00:00", randomBoolean(), randomBoolean(), zone, null, context) + ); + if (ft.resolution().equals(Resolution.NANOSECONDS)) { + assertThrows( + IllegalArgumentException.class, + () -> ft.isFieldWithinQuery(reader, "-2016-04-01", "2016-04-01", randomBoolean(), randomBoolean(), zone, null, context) + ); + assertThrows( + IllegalArgumentException.class, + () -> ft.isFieldWithinQuery( + reader, + "9223372036854775807", + "2016-04-01", + randomBoolean(), + randomBoolean(), + zone, + null, + context + ) + ); + } } public void testValueFormat() {