Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add text embedding processor #1007

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ This section is for maintaining a changelog for all breaking changes for the cli

### Added

- Added support for [text embedding processor](https://opensearch.org/docs/latest/ingest-pipelines/processors/text-embedding/) ([#1007](https://github.com/opensearch-project/opensearch-java/pull/1007))

### Dependencies

### Changed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ public enum Kind implements JsonEnum {

Inference("inference"),

TextEmbedding("text_embedding"),

;

private final String jsonValue;
Expand Down Expand Up @@ -735,6 +737,23 @@ public InferenceProcessor inference() {
return TaggedUnionUtils.get(this, Kind.Inference);
}

/**
* Is this variant instance of kind {@code text_embedding}?
*/
public boolean isTextEmbedding() {
return _kind == Kind.TextEmbedding;
}

/**
* Get the {@code text_embedding} variant value.
*
* @throws IllegalStateException
* if the current variant is not of the {@code text_embedding} kind.
*/
public TextEmbeddingProcessor textEmbedding() {
return TaggedUnionUtils.get(this, Kind.TextEmbedding);
}

@Override
@SuppressWarnings("unchecked")
public void serialize(JsonGenerator generator, JsonpMapper mapper) {
Expand Down Expand Up @@ -1086,6 +1105,16 @@ public ObjectBuilder<Processor> inference(Function<InferenceProcessor.Builder, O
return this.inference(fn.apply(new InferenceProcessor.Builder()).build());
}

public ObjectBuilder<Processor> textEmbedding(TextEmbeddingProcessor v) {
this._kind = Kind.TextEmbedding;
this._value = v;
return this;
}

public ObjectBuilder<Processor> textEmbedding(Function<TextEmbeddingProcessor.Builder, ObjectBuilder<TextEmbeddingProcessor>> fn) {
return this.textEmbedding(fn.apply(new TextEmbeddingProcessor.Builder()).build());
}

public Processor build() {
_checkSingleUse();
return new Processor(this);
Expand Down Expand Up @@ -1128,6 +1157,7 @@ protected static void setupProcessorDeserializer(ObjectDeserializer<Builder> op)
op.add(Builder::drop, DropProcessor._DESERIALIZER, "drop");
op.add(Builder::circle, CircleProcessor._DESERIALIZER, "circle");
op.add(Builder::inference, InferenceProcessor._DESERIALIZER, "inference");
op.add(Builder::textEmbedding, TextEmbeddingProcessor._DESERIALIZER, "text_embedding");

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,4 +301,12 @@ public static InferenceProcessor.Builder inference() {
return new InferenceProcessor.Builder();
}

/**
* Creates a builder for the {@link TextEmbeddingProcessor text_embedding}
* {@code Processor} variant.
*/
public static TextEmbeddingProcessor.Builder textEmbedding() {
return new TextEmbeddingProcessor.Builder();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

/*
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry was this class copied for Elasticsearch codebase? If not, we could only keep SPDX License

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, probably something I copied and pasted from another processor class, fixed at: 752df7b

* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/

package org.opensearch.client.opensearch.ingest;

import jakarta.json.stream.JsonGenerator;
import java.util.Map;
import java.util.function.Function;
import javax.annotation.Nullable;
import org.opensearch.client.json.JsonpDeserializable;
import org.opensearch.client.json.JsonpDeserializer;
import org.opensearch.client.json.JsonpMapper;
import org.opensearch.client.json.ObjectBuilderDeserializer;
import org.opensearch.client.json.ObjectDeserializer;
import org.opensearch.client.util.ApiTypeHelper;
import org.opensearch.client.util.ObjectBuilder;

// typedef: ingest._types.TextEmbeddingProcessor

@JsonpDeserializable
public class TextEmbeddingProcessor extends ProcessorBase implements ProcessorVariant {
private final String modelId;

private final Map<String, String> fieldMap;

@Nullable
private final String description;

// ---------------------------------------------------------------------------------------------

private TextEmbeddingProcessor(Builder builder) {
super(builder);

this.modelId = ApiTypeHelper.requireNonNull(builder.modelId, this, "modelId");
this.fieldMap = ApiTypeHelper.unmodifiableRequired(builder.fieldMap, this, "fieldMap");
this.description = builder.description;

}

public static TextEmbeddingProcessor of(Function<Builder, ObjectBuilder<TextEmbeddingProcessor>> fn) {
return fn.apply(new Builder()).build();
}

/**
* Processor variant kind.
*/
@Override
public Processor.Kind _processorKind() {
return Processor.Kind.Inference;
}

/**
* Required - API name: {@code model_id}
*/
public final String modelId() {
return this.modelId;
}

/**
* API name: {@code field_map}
*/
public final Map<String, String> fieldMap() {
return this.fieldMap;
}

/**
* API name: {@code description}
*/
@Nullable
public final String description() {
return this.description;
}

protected void serializeInternal(JsonGenerator generator, JsonpMapper mapper) {

super.serializeInternal(generator, mapper);
generator.writeKey("model_id");
generator.write(this.modelId);

if (ApiTypeHelper.isDefined(this.fieldMap)) {
generator.writeKey("field_map");
generator.writeStartObject();
for (Map.Entry<String, String> item0 : this.fieldMap.entrySet()) {
generator.writeKey(item0.getKey());
generator.write(item0.getValue());
}
generator.writeEnd();

}
if (this.description != null) {
generator.writeKey("description");
generator.write(this.description);
}

}

// ---------------------------------------------------------------------------------------------

/**
* Builder for {@link TextEmbeddingProcessor}.
*/

public static class Builder extends ProcessorBase.AbstractBuilder<Builder> implements ObjectBuilder<TextEmbeddingProcessor> {
private String modelId;

@Nullable
private Map<String, String> fieldMap;

@Nullable
private String description;

/**
* Required - API name: {@code model_id}
*/
public final Builder modelId(String value) {
this.modelId = value;
return this;
}

/**
* API name: {@code field_map}
* <p>
* Adds all entries of <code>map</code> to <code>fieldMap</code>.
*/
public final Builder fieldMap(Map<String, String> map) {
this.fieldMap = _mapPutAll(this.fieldMap, map);
return this;
}

/**
* API name: {@code field_map}
* <p>
* Adds an entry to <code>fieldMap</code>.
*/
public final Builder fieldMap(String key, String value) {
this.fieldMap = _mapPut(this.fieldMap, key, value);
return this;
}

/**
* API name: {@code description}
*/
public final Builder description(@Nullable String value) {
this.description = value;
return this;
}

@Override
protected Builder self() {
return this;
}

/**
* Builds a {@link TextEmbeddingProcessor}.
*
* @throws NullPointerException
* if some of the required fields are null.
*/
public TextEmbeddingProcessor build() {
_checkSingleUse();

return new TextEmbeddingProcessor(this);
}
}

// ---------------------------------------------------------------------------------------------

/**
* Json deserializer for {@link TextEmbeddingProcessor}
*/
public static final JsonpDeserializer<TextEmbeddingProcessor> _DESERIALIZER = ObjectBuilderDeserializer.lazy(
Builder::new,
TextEmbeddingProcessor::setupTextEmbeddingProcessorDeserializer
);

protected static void setupTextEmbeddingProcessorDeserializer(ObjectDeserializer<TextEmbeddingProcessor.Builder> op) {
ProcessorBase.setupProcessorBaseDeserializer(op);
op.add(Builder::modelId, JsonpDeserializer.stringDeserializer(), "model_id");
op.add(Builder::fieldMap, JsonpDeserializer.stringMapDeserializer(JsonpDeserializer.stringDeserializer()), "field_map");
op.add(Builder::description, JsonpDeserializer.stringDeserializer(), "description");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/

package org.opensearch.client.opensearch.ingest;

import java.util.HashMap;
import java.util.Map;
import org.junit.Test;
import org.opensearch.client.opensearch.model.ModelTestCase;

public class TextEmbeddingProcessorTest extends ModelTestCase {

private static final Map<String, String> baseFieldMap = new HashMap<>();
static {
baseFieldMap.put("input_field", "vector_field");
}

private static TextEmbeddingProcessor.Builder baseTextEmbeddingProcessor() {
return new TextEmbeddingProcessor.Builder().modelId("modelId").fieldMap(baseFieldMap).tag("some-tag");
}

@Test
public void testJsonRoundtripWithDescription() {
Processor processor = new Processor.Builder().textEmbedding(
baseTextEmbeddingProcessor().description("processor-description").build()
).build();
String json =
"{\"text_embedding\":{\"tag\":\"some-tag\",\"model_id\":\"modelId\",\"field_map\":{\"input_field\":\"vector_field\"},\"description\":\"processor-description\"}}";
TextEmbeddingProcessor deserialized = checkJsonRoundtrip(processor, json).textEmbedding();

assertEquals("modelId", deserialized.modelId());
assertEquals(baseFieldMap, deserialized.fieldMap());
assertEquals("processor-description", deserialized.description());
assertEquals("some-tag", deserialized.tag());
}

@Test
public void testJsonRoundtripWithoutDescription() {
Processor processor = new Processor.Builder().textEmbedding(baseTextEmbeddingProcessor().build()).build();
String json =
"{\"text_embedding\":{\"tag\":\"some-tag\",\"model_id\":\"modelId\",\"field_map\":{\"input_field\":\"vector_field\"}}}";
TextEmbeddingProcessor deserialized = checkJsonRoundtrip(processor, json).textEmbedding();

assertEquals("modelId", deserialized.modelId());
assertEquals(baseFieldMap, deserialized.fieldMap());
assertNull(deserialized.description());
assertEquals("some-tag", deserialized.tag());
}
}
Loading