Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/8.x' into backport/8.x/pr-114247
Browse files Browse the repository at this point in the history
  • Loading branch information
kkrik-es committed Oct 8, 2024
2 parents c861f69 + c1115d2 commit d9c1f07
Show file tree
Hide file tree
Showing 185 changed files with 6,676 additions and 2,997 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/112933.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 112933
summary: "Allow incubating Panama Vector in simdvec, and add vectorized `ipByteBin`"
area: Search
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/113251.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 113251
summary: Span term query to convert to match no docs when unmapped field is targeted
area: Search
type: bug
issues: []
6 changes: 6 additions & 0 deletions docs/changelog/113623.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 113623
summary: "Adding chunking settings to `MistralService,` `GoogleAiStudioService,` and\
\ `HuggingFaceService`"
area: Machine Learning
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/113812.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 113812
summary: Add Streaming Inference spec
area: Machine Learning
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/113873.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 113873
summary: Default inference endpoint for ELSER
area: Machine Learning
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/114002.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 114002
summary: Add a `mustache.max_output_size_bytes` setting to limit the length of results from mustache scripts
area: Infra/Scripting
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/114080.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 114080
summary: Stream Cohere Completion
area: Machine Learning
type: enhancement
issues: []
89 changes: 89 additions & 0 deletions docs/reference/ml/trained-models/apis/infer-trained-model.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,17 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`truncate`::::
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
=======
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
Expand Down Expand Up @@ -301,6 +312,17 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`truncate`::::
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
=======
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
Expand Down Expand Up @@ -397,6 +419,21 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`span`::::
(Optional, integer)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]

`truncate`::::
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
=======
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
Expand Down Expand Up @@ -517,6 +554,21 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`span`::::
(Optional, integer)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]

`truncate`::::
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
=======
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
Expand Down Expand Up @@ -608,6 +660,17 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`truncate`::::
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
=======
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
Expand Down Expand Up @@ -687,6 +750,21 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, integer)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]

`with_special_tokens`::::
(Optional, boolean)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`span`::::
(Optional, integer)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]

`with_special_tokens`::::
(Optional, boolean)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
Expand Down Expand Up @@ -790,6 +868,17 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`truncate`::::
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
=======
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
Expand Down
9 changes: 5 additions & 4 deletions docs/reference/rest-api/common-parms.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -1298,10 +1298,11 @@ tag::wait_for_active_shards[]
`wait_for_active_shards`::
+
--
(Optional, string) The number of shard copies that must be active before
proceeding with the operation. Set to `all` or any positive integer up
to the total number of shards in the index (`number_of_replicas+1`).
Default: 1, the primary shard.
(Optional, string) The number of copies of each shard that must be active
before proceeding with the operation. Set to `all` or any non-negative integer
up to the total number of copies of each shard in the index
(`number_of_replicas+1`). Defaults to `1`, meaning to wait just for each
primary shard to be active.

See <<index-wait-for-active-shards>>.
--
Expand Down
7 changes: 6 additions & 1 deletion docs/reference/rest-api/usage.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,12 @@ GET /_xpack/usage
"inference": {
"available" : true,
"enabled" : true,
"models" : []
"models" : [{
"service": "elasticsearch",
"task_type": "SPARSE_EMBEDDING",
"count": 1
}
]
},
"logstash" : {
"available" : true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ private static ScriptService getScriptService(final Settings settings, final Lon
PainlessScriptEngine.NAME,
new PainlessScriptEngine(settings, scriptContexts),
MustacheScriptEngine.NAME,
new MustacheScriptEngine()
new MustacheScriptEngine(settings)
);
return new ScriptService(settings, scriptEngines, ScriptModule.CORE_CONTEXTS, timeProvider);
}
Expand Down
15 changes: 15 additions & 0 deletions libs/simdvec/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

import org.elasticsearch.gradle.internal.info.BuildParams
import org.elasticsearch.gradle.internal.precommit.CheckForbiddenApisTask

apply plugin: 'elasticsearch.publish'
Expand All @@ -23,6 +24,20 @@ dependencies {
}
}

// compileMain21Java does not exist within idea (see MrJarPlugin) so we cannot reference directly by name
tasks.matching { it.name == "compileMain21Java" }.configureEach {
options.compilerArgs << '--add-modules=jdk.incubator.vector'
// we remove Werror, since incubating suppression (-Xlint:-incubating)
// is only support since JDK 22
options.compilerArgs -= '-Werror'
}

tasks.named('test').configure {
if (BuildParams.getRuntimeJavaVersion().majorVersion.toInteger() >= 21) {
jvmArgs '--add-modules=jdk.incubator.vector'
}
}

tasks.withType(CheckForbiddenApisTask).configureEach {
replaceSignatureFiles 'jdk-signatures'
}
Expand Down
1 change: 1 addition & 0 deletions libs/simdvec/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
module org.elasticsearch.simdvec {
requires org.elasticsearch.nativeaccess;
requires org.apache.lucene.core;
requires org.elasticsearch.logging;

exports org.elasticsearch.simdvec to org.elasticsearch.server;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.simdvec;

import org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport;
import org.elasticsearch.simdvec.internal.vectorization.ESVectorizationProvider;

import static org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport.B_QUERY;

public class ESVectorUtil {

private static final ESVectorUtilSupport IMPL = ESVectorizationProvider.getInstance().getVectorUtilSupport();

public static long ipByteBinByte(byte[] q, byte[] d) {
if (q.length != d.length * B_QUERY) {
throw new IllegalArgumentException("vector dimensions incompatible: " + q.length + "!= " + B_QUERY + " x " + d.length);
}
return IMPL.ipByteBinByte(q, d);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.simdvec.internal.vectorization;

import org.apache.lucene.util.BitUtil;

final class DefaultESVectorUtilSupport implements ESVectorUtilSupport {

DefaultESVectorUtilSupport() {}

@Override
public long ipByteBinByte(byte[] q, byte[] d) {
return ipByteBinByteImpl(q, d);
}

public static long ipByteBinByteImpl(byte[] q, byte[] d) {
long ret = 0;
int size = d.length;
for (int i = 0; i < B_QUERY; i++) {
int r = 0;
long subRet = 0;
for (final int upperBound = d.length & -Integer.BYTES; r < upperBound; r += Integer.BYTES) {
subRet += Integer.bitCount((int) BitUtil.VH_NATIVE_INT.get(q, i * size + r) & (int) BitUtil.VH_NATIVE_INT.get(d, r));
}
for (; r < d.length; r++) {
subRet += Integer.bitCount((q[i * size + r] & d[r]) & 0xFF);
}
ret += subRet << i;
}
return ret;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.simdvec.internal.vectorization;

final class DefaultESVectorizationProvider extends ESVectorizationProvider {
private final ESVectorUtilSupport vectorUtilSupport;

DefaultESVectorizationProvider() {
vectorUtilSupport = new DefaultESVectorUtilSupport();
}

@Override
public ESVectorUtilSupport getVectorUtilSupport() {
return vectorUtilSupport;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.simdvec.internal.vectorization;

public interface ESVectorUtilSupport {

short B_QUERY = 4;

long ipByteBinByte(byte[] q, byte[] d);
}
Loading

0 comments on commit d9c1f07

Please sign in to comment.