Skip to content

Commit

Permalink
Merge branch 'main' into many-infer
Browse files Browse the repository at this point in the history
  • Loading branch information
elasticmachine authored Nov 18, 2024
2 parents b923299 + fd0cdf0 commit 04a4f49
Show file tree
Hide file tree
Showing 133 changed files with 2,491 additions and 2,062 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import org.elasticsearch.gradle.testclusters.TestDistribution
// Common config when running with a FIPS-140 runtime JVM
if (buildParams.inFipsJvm) {
allprojects {
String javaSecurityFilename = buildParams.runtimeJavaDetails.toLowerCase().contains('oracle') ? 'fips_java_oracle.security' : 'fips_java.security'
String javaSecurityFilename = buildParams.runtimeJavaDetails.get().toLowerCase().contains('oracle') ? 'fips_java_oracle.security' : 'fips_java.security'
File fipsResourcesDir = new File(project.buildDir, 'fips-resources')
File fipsSecurity = new File(fipsResourcesDir, javaSecurityFilename)
File fipsPolicy = new File(fipsResourcesDir, 'fips_java.policy')
Expand Down
6 changes: 6 additions & 0 deletions docs/changelog/115585.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 115459
summary: Adds access to flags no_sub_matches and no_overlapping_matches to hyphenation-decompounder-tokenfilter
area: Search
type: enhancement
issues:
- 97849
6 changes: 6 additions & 0 deletions docs/changelog/116277.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 116277
summary: Update Semantic Query To Handle Zero Size Responses
area: Vector Search
type: bug
issues:
- 116083
5 changes: 0 additions & 5 deletions docs/changelog/116339.yaml

This file was deleted.

5 changes: 5 additions & 0 deletions docs/changelog/116676.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116676
summary: Fix handling of time exceeded exception in fetch phase
area: Search
type: bug
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/116915.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116915
summary: Improve message about insecure S3 settings
area: Snapshot/Restore
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/116918.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116918
summary: Split searchable snapshot into multiple repo operations
area: Snapshot/Restore
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/116922.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116922
summary: Always check if index mode is logsdb
area: Logs
type: bug
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/116931.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116931
summary: Enable built-in Inference Endpoints and default for Semantic Text
area: "Machine Learning"
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/116942.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116942
summary: Fix handling of bulk requests with semantic text fields and delete ops
area: Relevance
type: bug
issues: []
11 changes: 11 additions & 0 deletions docs/changelog/116943.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
pr: 116943
summary: Remove support for deprecated `force_source` highlighting parameter
area: Highlighting
type: breaking
issues: []
breaking:
title: Remove support for deprecated `force_source` highlighting parameter
area: REST API
details: The deprecated highlighting `force_source` parameter is no longer supported.
impact: Users should remove usages of the `force_source` parameter from their search requests.
notable: false
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,18 @@ output. Defaults to `5`.
(Optional, Boolean)
If `true`, only include the longest matching subword. Defaults to `false`.

`no_sub_matches`::
(Optional, Boolean)
If `true`, do not match sub tokens in tokens that are in the word list.
Defaults to `false`.

`no_overlapping_matches`::
(Optional, Boolean)
If `true`, do not allow overlapping tokens.
Defaults to `false`.

Typically users will only want to include one of the three flags as enabling `no_overlapping_matches` is the most restrictive and `no_sub_matches` is more restrictive than `only_longest_match`. When enabling a more restrictive option the state of the less restrictive does not have any effect.

[[analysis-hyp-decomp-tokenfilter-customize]]
==== Customize and add to an analyzer

Expand Down
2 changes: 1 addition & 1 deletion docs/reference/how-to/indexing-speed.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ different nodes so there's redundancy for any node failures. You can also use
insurance.

[discrete]
==== Local vs.remote storage
==== Local vs. remote storage

include::./remote-storage.asciidoc[]

Expand Down
2 changes: 0 additions & 2 deletions docs/reference/search/search-your-data/highlighting.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,6 @@ fragmenter:: Specifies how text should be broken up in highlight
snippets: `simple` or `span`. Only valid for the `plain` highlighter.
Defaults to `span`.

force_source:: deprecated; this parameter has no effect

`simple`::: Breaks up text into same-sized fragments.
`span`::: Breaks up text into same-sized fragments, but tries to avoid
breaking up text between highlighted terms. This is helpful when you're
Expand Down
5 changes: 0 additions & 5 deletions docs/reference/security/authorization/built-in-roles.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@ roles have a fixed set of privileges and cannot be updated.
Grants access necessary for the APM system user to send system-level data
(such as monitoring) to {es}.

[[built-in-roles-apm-user]] `apm_user` ::
Grants the privileges required for APM users (such as `read` and
`view_index_metadata` privileges on the `apm-*` and `.ml-anomalies*` indices).
deprecated:[7.13.0,"See {kibana-ref}/apm-app-users.html[APM app users and privileges\] for alternatives."].

[[built-in-roles-beats-admin]] `beats_admin` ::
Grants access to the `.management-beats` index, which contains configuration
information for the Beats.
Expand Down
2 changes: 1 addition & 1 deletion gradle/build.versions.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[versions]
asm = "9.6"
asm = "9.7.1"
jackson = "2.15.0"
junit5 = "5.8.1"
spock = "2.1-groovy-3.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
*/
public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundWordTokenFilterFactory {

private final boolean noSubMatches;
private final boolean noOverlappingMatches;
private final HyphenationTree hyphenationTree;

HyphenationCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
Expand All @@ -46,6 +48,9 @@ public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundW
} catch (Exception e) {
throw new IllegalArgumentException("Exception while reading hyphenation_patterns_path.", e);
}

noSubMatches = settings.getAsBoolean("no_sub_matches", false);
noOverlappingMatches = settings.getAsBoolean("no_overlapping_matches", false);
}

@Override
Expand All @@ -57,7 +62,9 @@ public TokenStream create(TokenStream tokenStream) {
minWordSize,
minSubwordSize,
maxSubwordSize,
onlyLongestMatch
onlyLongestMatch,
noSubMatches,
noOverlappingMatches
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
import org.hamcrest.MatcherAssert;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
Expand All @@ -42,6 +45,7 @@
import static org.hamcrest.Matchers.instanceOf;

public class CompoundAnalysisTests extends ESTestCase {

public void testDefaultsCompoundAnalysis() throws Exception {
Settings settings = getJsonSettings();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
Expand All @@ -63,6 +67,44 @@ public void testDictionaryDecompounder() throws Exception {
assertWarnings("Setting [version] on analysis component [custom7] has no effect and is deprecated");
}

public void testHyphenationDecompoundingAnalyzerOnlyLongestMatch() throws Exception {
Settings[] settingsArr = new Settings[] { getJsonSettings(), getYamlSettings() };
for (Settings settings : settingsArr) {
List<String> terms = analyze(settings, "hyphenationDecompoundingAnalyzerOnlyLongestMatch", "kaffeemaschine fussballpumpe");
MatcherAssert.assertThat(
terms,
hasItems("kaffeemaschine", "kaffee", "fee", "maschine", "fussballpumpe", "fussball", "ballpumpe", "pumpe")
);
}
assertWarnings("Setting [version] on analysis component [custom7] has no effect and is deprecated");
}

/**
* For example given a word list of: ["kaffee", "fee", "maschine"]
* no_sub_matches should prevent the token "fee" as a token in "kaffeemaschine".
*/
public void testHyphenationDecompoundingAnalyzerNoSubMatches() throws Exception {
Settings[] settingsArr = new Settings[] { getJsonSettings(), getYamlSettings() };
for (Settings settings : settingsArr) {
List<String> terms = analyze(settings, "hyphenationDecompoundingAnalyzerNoSubMatches", "kaffeemaschine fussballpumpe");
MatcherAssert.assertThat(terms, hasItems("kaffeemaschine", "kaffee", "maschine", "fussballpumpe", "fussball", "ballpumpe"));
}
assertWarnings("Setting [version] on analysis component [custom7] has no effect and is deprecated");
}

/**
* For example given a word list of: ["fuss", "fussball", "ballpumpe", "ball", "pumpe"]
* no_overlapping_matches should prevent the token "ballpumpe" as a token in "fussballpumpe.
*/
public void testHyphenationDecompoundingAnalyzerNoOverlappingMatches() throws Exception {
Settings[] settingsArr = new Settings[] { getJsonSettings(), getYamlSettings() };
for (Settings settings : settingsArr) {
List<String> terms = analyze(settings, "hyphenationDecompoundingAnalyzerNoOverlappingMatches", "kaffeemaschine fussballpumpe");
MatcherAssert.assertThat(terms, hasItems("kaffeemaschine", "kaffee", "maschine", "fussballpumpe", "fussball", "pumpe"));
}
assertWarnings("Setting [version] on analysis component [custom7] has no effect and is deprecated");
}

private List<String> analyze(Settings settings, String analyzerName, String text) throws IOException {
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("test", settings);
AnalysisModule analysisModule = createAnalysisModule(settings);
Expand Down Expand Up @@ -92,20 +134,25 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
}

private Settings getJsonSettings() throws IOException {
String json = "/org/elasticsearch/analysis/common/test1.json";
return Settings.builder()
.loadFromStream(json, getClass().getResourceAsStream(json), false)
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
return getSettings("/org/elasticsearch/analysis/common/test1.json");
}

private Settings getYamlSettings() throws IOException {
String yaml = "/org/elasticsearch/analysis/common/test1.yml";
return getSettings("/org/elasticsearch/analysis/common/test1.yml");
}

private Settings getSettings(String filePath) throws IOException {
String hypenationRulesFileName = "de_DR.xml";
InputStream hypenationRules = getClass().getResourceAsStream(hypenationRulesFileName);
Path home = createTempDir();
Path config = home.resolve("config");
Files.createDirectory(config);
Files.copy(hypenationRules, config.resolve(hypenationRulesFileName));

return Settings.builder()
.loadFromStream(yaml, getClass().getResourceAsStream(yaml), false)
.loadFromStream(filePath, getClass().getResourceAsStream(filePath), false)
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(Environment.PATH_HOME_SETTING.getKey(), home.toString())
.build();
}
}
Loading

0 comments on commit 04a4f49

Please sign in to comment.