Skip to content

Commit

Permalink
NodeInfo request can pull information about Node analysis components
Browse files Browse the repository at this point in the history
Adding a new option for NodeInfo request to include information about available
analysis components on individual nodes.

Closes opensearch-project#5481

Signed-off-by: Lukáš Vlček <lukas.vlcek@aiven.io>
  • Loading branch information
lukas-vlcek committed Jan 10, 2024
1 parent a1e4602 commit 568cb4f
Show file tree
Hide file tree
Showing 19 changed files with 636 additions and 22 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- GHA to verify checklist items completion in PR descriptions ([#10800](https://github.com/opensearch-project/OpenSearch/pull/10800))
- Allow to pass the list settings through environment variables (like [], ["a", "b", "c"], ...) ([#10625](https://github.com/opensearch-project/OpenSearch/pull/10625))
- [Admission Control] Integrate CPU AC with ResourceUsageCollector and add CPU AC stats to nodes/stats ([#10887](https://github.com/opensearch-project/OpenSearch/pull/10887))
- Add optional section of node analyzers into NodeInfo ([#10296](https://github.com/opensearch-project/OpenSearch/pull/10296))

### Dependencies
- Bump `log4j-core` from 2.18.0 to 2.19.0
Expand Down Expand Up @@ -58,6 +59,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add task completion count in search backpressure stats API ([#10028](https://github.com/opensearch-project/OpenSearch/pull/10028/))
- Deprecate CamelCase `PathHierarchy` tokenizer name in favor to lowercase `path_hierarchy` ([#10894](https://github.com/opensearch-project/OpenSearch/pull/10894))
- Switched to more reliable OpenSearch Lucene snapshot location([#11728](https://github.com/opensearch-project/OpenSearch/pull/11728))
- Breaking change: "search_pipelines" metric is not included in NodesInfoRequest by default ([#10296](https://github.com/opensearch-project/OpenSearch/pull/10296))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
"transport",
"http",
"plugins",
"ingest"
"ingest",
"analyzers"
],
"description":"A comma-separated list of metrics you wish returned. Leave empty to return all."
}
Expand All @@ -69,7 +70,8 @@
"transport",
"http",
"plugins",
"ingest"
"ingest",
"analyzers"
],
"description":"A comma-separated list of metrics you wish returned. Leave empty to return all."
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
"node_info test analyzers":
- skip:
version: " - 2.99.99"
reason: "analyzers metric was introduced in 3.0.0"
features: [arbitrary_key]

- do:
nodes.info:
metric: [ analyzers ]

# Note: It will only stash the first node_id in the api response.
- set:
nodes._arbitrary_key_: node_id

- is_true: nodes
- is_true: cluster_name

- is_true: nodes.$node_id.analysis_components.analyzers
- is_true: nodes.$node_id.analysis_components.tokenizers
- is_true: nodes.$node_id.analysis_components.tokenFilters
- is_true: nodes.$node_id.analysis_components.charFilters
- is_true: nodes.$node_id.analysis_components.normalizers
- is_true: nodes.$node_id.analysis_components.plugins
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.action.admin.cluster.node.info;

import org.opensearch.common.collect.Tuple;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.common.io.stream.Writeable;
import org.opensearch.core.service.ReportingService;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.analysis.AnalysisRegistry;
import org.opensearch.plugins.AnalysisPlugin;
import org.opensearch.plugins.PluginInfo;
import org.opensearch.plugins.PluginsService;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;

import static java.util.Collections.unmodifiableList;
import static java.util.Collections.unmodifiableSortedSet;

/**
* Information about node analysis components.
*
* Every node in the cluster contains several analysis components. Some are preconfigured, the rest come from
* {@link AnalysisPlugin}s installed on this node (such as org.opensearch.analysis.common.CommonAnalysisModulePlugin).
*
* @see org.opensearch.index.analysis.AnalysisRegistry
* @see org.opensearch.indices.analysis.AnalysisModule
*
* @opensearch.internal
*/
public class NodeAnalyzers implements ReportingService.Info {

private final SortedSet<String> analyzersIds;

private final SortedSet<String> tokenizersIds;

private final SortedSet<String> tokenFiltersIds;

private final SortedSet<String> charFiltersIds;

private final SortedSet<String> normalizersIds;

private final List<NodeAnalyzers.AnalysisPluginComponents> nodeAnalysisPlugins;

public SortedSet<String> getAnalyzersIds() {
return this.analyzersIds;
}

public SortedSet<String> getTokenizersIds() {
return this.tokenizersIds;
}

public SortedSet<String> getTokenFiltersIds() {
return this.tokenFiltersIds;
}

public SortedSet<String> getCharFiltersIds() {
return this.charFiltersIds;
}

public SortedSet<String> getNormalizersIds() {
return this.normalizersIds;
}

public List<NodeAnalyzers.AnalysisPluginComponents> getNodeAnalysisPlugins() {
return nodeAnalysisPlugins;
}

public NodeAnalyzers(AnalysisRegistry analysisRegistry, PluginsService pluginsService) {
List<NodeAnalyzers.AnalysisPluginComponents> nodeAnalysisPlugins = new ArrayList<>();
List<Tuple<PluginInfo, AnalysisPlugin>> analysisPlugins = pluginsService.filterPluginsForPluginInfo(AnalysisPlugin.class);
for (Tuple<PluginInfo, AnalysisPlugin> plugin : analysisPlugins) {
nodeAnalysisPlugins.add(
new NodeAnalyzers.AnalysisPluginComponents(
plugin.v1().getName(),
plugin.v1().getClassname(),
plugin.v2().getAnalyzers().keySet(),
plugin.v2().getTokenizers().keySet(),
plugin.v2().getTokenFilters().keySet(),
plugin.v2().getCharFilters().keySet(),
plugin.v2().getHunspellDictionaries().keySet()
)
);
}
this.analyzersIds = ensureSorted(analysisRegistry.getNodeAnalyzersKeys());
this.tokenizersIds = ensureSorted(analysisRegistry.getNodeTokenizersKeys());
this.tokenFiltersIds = ensureSorted(analysisRegistry.getNodeTokenFiltersKeys());
this.charFiltersIds = ensureSorted(analysisRegistry.getNodeCharFiltersKeys());
this.normalizersIds = ensureSorted(analysisRegistry.getNodeNormalizersKeys());
this.nodeAnalysisPlugins = ensureNonEmpty(nodeAnalysisPlugins);
}

public NodeAnalyzers(
final Set<String> analyzersKeySet,
final Set<String> tokenizersKeySet,
final Set<String> tokenFiltersKeySet,
final Set<String> charFiltersKeySet,
final Set<String> normalizersKeySet,
final List<NodeAnalyzers.AnalysisPluginComponents> nodeAnalysisPlugins
) {
this.analyzersIds = ensureSorted(analyzersKeySet);
this.tokenizersIds = ensureSorted(tokenizersKeySet);
this.tokenFiltersIds = ensureSorted(tokenFiltersKeySet);
this.charFiltersIds = ensureSorted(charFiltersKeySet);
this.normalizersIds = ensureSorted(normalizersKeySet);
this.nodeAnalysisPlugins = ensureNonEmpty(nodeAnalysisPlugins);
}

/**
* This class represents analysis components provided by {@link org.opensearch.plugins.AnalysisPlugin}.
*/
public static class AnalysisPluginComponents implements Comparable<NodeAnalyzers.AnalysisPluginComponents>, Writeable {

private final String pluginName;
private final String className;
private final SortedSet<String> analyzersIds;
private final SortedSet<String> tokenizersIds;
private final SortedSet<String> tokenFiltersIds;
private final SortedSet<String> charFiltersIds;
private final SortedSet<String> hunspellDictionaries;

public AnalysisPluginComponents(
final String pluginName,
final String className,
final Set<String> analyzersIds,
final Set<String> tokenizersIds,
final Set<String> tokenFiltersIds,
final Set<String> charFiltersIds,
final Set<String> hunspellDictionaries
) {
this.pluginName = pluginName;
this.className = className;
this.analyzersIds = unmodifiableSortedSet(new TreeSet<>(analyzersIds));
this.tokenizersIds = unmodifiableSortedSet(new TreeSet<>(tokenizersIds));
this.tokenFiltersIds = unmodifiableSortedSet(new TreeSet<>(tokenFiltersIds));
this.charFiltersIds = unmodifiableSortedSet(new TreeSet<>(charFiltersIds));
this.hunspellDictionaries = unmodifiableSortedSet(new TreeSet<>(hunspellDictionaries));
}

public AnalysisPluginComponents(StreamInput in) throws IOException {
this.pluginName = in.readString();
this.className = in.readString();
this.analyzersIds = unmodifiableSortedSet(new TreeSet<>(in.readSet(StreamInput::readString)));
this.tokenizersIds = unmodifiableSortedSet(new TreeSet<>(in.readSet(StreamInput::readString)));
this.tokenFiltersIds = unmodifiableSortedSet(new TreeSet<>(in.readSet(StreamInput::readString)));
this.charFiltersIds = unmodifiableSortedSet(new TreeSet<>(in.readSet(StreamInput::readString)));
this.hunspellDictionaries = unmodifiableSortedSet(new TreeSet<>(in.readSet(StreamInput::readString)));
}

public void writeTo(StreamOutput out) throws IOException {
out.writeString(this.pluginName);
out.writeString(this.className);
out.writeStringCollection(this.analyzersIds);
out.writeStringCollection(this.tokenizersIds);
out.writeStringCollection(this.tokenFiltersIds);
out.writeStringCollection(this.charFiltersIds);
out.writeStringCollection(this.hunspellDictionaries);
}

private static final Comparator<String> nullSafeStringComparator = Comparator.nullsFirst(String::compareTo);

private static String concatenateItems(SortedSet<String> items) {
return items.stream().collect(Collectors.joining());
}

/**
* This Comparator defines the comparison logic for sorting instances of AnalysisPluginComponents based on
* their attributes in the following order:
*
* 1. Plugin name (as specified in the plugin descriptor)
* 2. Class name
* 3. Analyzers IDs
* 4. Tokenizers IDs
* 5. TokenFilters IDs
* 6. CharFilters IDs
* 7. Hunspell dictionary IDs
*/
private static final Comparator<NodeAnalyzers.AnalysisPluginComponents> pluginComponentsComparator = Comparator.comparing(
AnalysisPluginComponents::getPluginName,
nullSafeStringComparator
)
.thenComparing(AnalysisPluginComponents::getClassName, nullSafeStringComparator)
.thenComparing(c -> concatenateItems(c.getAnalyzersIds()), nullSafeStringComparator)
.thenComparing(c -> concatenateItems(c.getTokenizersIds()), nullSafeStringComparator)
.thenComparing(c -> concatenateItems(c.getTokenFiltersIds()), nullSafeStringComparator)
.thenComparing(c -> concatenateItems(c.getCharFiltersIds()), nullSafeStringComparator)
.thenComparing(c -> concatenateItems(c.getHunspellDictionaries()), nullSafeStringComparator);

@Override
public int compareTo(NodeAnalyzers.AnalysisPluginComponents o) {
return pluginComponentsComparator.compare(this, o);
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
AnalysisPluginComponents that = (AnalysisPluginComponents) o;
return Objects.equals(pluginName, that.pluginName)
&& Objects.equals(className, that.className)
&& Objects.equals(analyzersIds, that.analyzersIds)
&& Objects.equals(tokenizersIds, that.tokenizersIds)
&& Objects.equals(tokenFiltersIds, that.tokenFiltersIds)
&& Objects.equals(charFiltersIds, that.charFiltersIds)
&& Objects.equals(hunspellDictionaries, that.hunspellDictionaries);
}

@Override
public int hashCode() {
return Objects.hash(pluginName, className, analyzersIds, tokenizersIds, tokenFiltersIds, charFiltersIds, hunspellDictionaries);
}

public String getPluginName() {
return this.pluginName;
}

public String getClassName() {
return this.className;
}

public SortedSet<String> getAnalyzersIds() {
return this.analyzersIds;
}

public SortedSet<String> getTokenizersIds() {
return this.tokenizersIds;
}

public SortedSet<String> getTokenFiltersIds() {
return this.tokenFiltersIds;
}

public SortedSet<String> getCharFiltersIds() {
return this.charFiltersIds;
}

public SortedSet<String> getHunspellDictionaries() {
return this.hunspellDictionaries;
}
}

public NodeAnalyzers(StreamInput in) throws IOException {
this.analyzersIds = unmodifiableSortedSet(new TreeSet<>(in.readSet(StreamInput::readString)));
this.tokenizersIds = unmodifiableSortedSet(new TreeSet<>(in.readSet(StreamInput::readString)));
this.tokenFiltersIds = unmodifiableSortedSet(new TreeSet<>(in.readSet(StreamInput::readString)));
this.charFiltersIds = unmodifiableSortedSet(new TreeSet<>(in.readSet(StreamInput::readString)));
this.normalizersIds = unmodifiableSortedSet(new TreeSet<>(in.readSet(StreamInput::readString)));
this.nodeAnalysisPlugins = unmodifiableList(in.readList(NodeAnalyzers.AnalysisPluginComponents::new));
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeStringCollection(this.analyzersIds);
out.writeStringCollection(this.tokenizersIds);
out.writeStringCollection(this.tokenFiltersIds);
out.writeStringCollection(this.charFiltersIds);
out.writeStringCollection(this.normalizersIds);
out.writeList(this.nodeAnalysisPlugins);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject("analysis_components");
builder.field("analyzers").value(this.getAnalyzersIds());
builder.field("tokenizers").value(this.getTokenizersIds());
builder.field("tokenFilters").value(this.getTokenFiltersIds());
builder.field("charFilters").value(this.getCharFiltersIds());
builder.field("normalizers").value(this.getNormalizersIds());
builder.startArray("plugins");
for (NodeAnalyzers.AnalysisPluginComponents pluginComponents : this.getNodeAnalysisPlugins()) {
builder.startObject();
builder.field("name", pluginComponents.getPluginName());
builder.field("classname", pluginComponents.getClassName());
builder.field("analyzers").value(pluginComponents.getAnalyzersIds());
builder.field("tokenizers").value(pluginComponents.getTokenizersIds());
builder.field("tokenFilters").value(pluginComponents.getTokenFiltersIds());
builder.field("charFilters").value(pluginComponents.getCharFiltersIds());
builder.field("hunspellDictionaries").value(pluginComponents.getHunspellDictionaries());
builder.endObject();
}
builder.endArray();
builder.endObject();
return builder;
}

public int hashCode() {
return Objects.hash(analyzersIds, tokenizersIds, tokenFiltersIds, charFiltersIds, normalizersIds, nodeAnalysisPlugins);
}

/**
* Ensures that a given set of strings is sorted in "natural" order.
*
* See: {@link SortedSet}
*/
private static SortedSet<String> ensureSorted(Set<String> stringSet) {
return stringSet == null ? Collections.emptySortedSet() : unmodifiableSortedSet(new TreeSet<>(stringSet));
}

private static List<NodeAnalyzers.AnalysisPluginComponents> ensureNonEmpty(
List<NodeAnalyzers.AnalysisPluginComponents> pluginComponents
) {
return pluginComponents == null ? Collections.emptyList() : unmodifiableList(pluginComponents);
}
}
Loading

0 comments on commit 568cb4f

Please sign in to comment.