Skip to content

Commit

Permalink
add it
Browse files Browse the repository at this point in the history
Signed-off-by: zhichao-aws <zhichaog@amazon.com>
  • Loading branch information
zhichao-aws committed Nov 19, 2024
1 parent b8d8b7f commit c5e5a29
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.opensearch.neuralsearch.BaseNeuralSearchIT;

import com.google.common.collect.ImmutableList;
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder;

public class SparseEncodingProcessIT extends BaseNeuralSearchIT {

Expand All @@ -39,6 +40,35 @@ public void testSparseEncodingProcessor() throws Exception {
createSparseEncodingIndex();
ingestDocument();
assertEquals(1, getDocCount(INDEX_NAME));

NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder();
neuralSparseQueryBuilder.fieldName("title_sparse");
neuralSparseQueryBuilder.queryTokensSupplier(() -> Map.of("good", 1.0f, "a", 2.0f));
Map<String, Object> searchResponse = search(INDEX_NAME, neuralSparseQueryBuilder, 2);
assertFalse(searchResponse.isEmpty());
double maxScore = (Double) ((Map) searchResponse.get("hits")).get("max_score");
assertEquals(4.4433594, maxScore, 1e-3);
} finally {
wipeOfTestResources(INDEX_NAME, PIPELINE_NAME, modelId, null);
}
}

public void testSparseEncodingProcessorWithPrune() throws Exception {
String modelId = null;
try {
modelId = prepareSparseEncodingModel();
createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.SPARSE_ENCODING_PRUNE);
createSparseEncodingIndex();
ingestDocument();
assertEquals(1, getDocCount(INDEX_NAME));

NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder();
neuralSparseQueryBuilder.fieldName("title_sparse");
neuralSparseQueryBuilder.queryTokensSupplier(() -> Map.of("good", 1.0f, "a", 2.0f));
Map<String, Object> searchResponse = search(INDEX_NAME, neuralSparseQueryBuilder, 2);
assertFalse(searchResponse.isEmpty());
double maxScore = (Double) ((Map) searchResponse.get("hits")).get("max_score");
assertEquals(3.640625, maxScore, 1e-3);
} finally {
wipeOfTestResources(INDEX_NAME, PIPELINE_NAME, modelId, null);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"description": "An example sparse Encoding pipeline",
"processors" : [
{
"sparse_encoding": {
"model_id": "%s",
"batch_size": "%d",
"prune_type": "max_ratio",
"prune_ratio": 0.8,
"field_map": {
"title": "title_sparse",
"favor_list": "favor_list_sparse",
"favorites": {
"game": "game_sparse",
"movie": "movie_sparse"
}
}
}
}
]
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
{
"name": "tokenize-idf-0915",
"version": "1.0.0",
"function_name": "SPARSE_TOKENIZE",
"description": "test model",
"model_format": "TORCH_SCRIPT",
"name": "amazon/neural-sparse/opensearch-neural-sparse-tokenizer-v1",
"version": "1.0.1",
"model_group_id": "%s",
"model_content_hash_value": "b345e9e943b62c405a8dd227ef2c46c84c5ff0a0b71b584be9132b37bce91a9a",
"url": "https://github.com/opensearch-project/ml-commons/raw/main/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/sparse_encoding/sparse_demo.zip"
"model_format": "TORCH_SCRIPT"
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ public abstract class BaseNeuralSearchIT extends OpenSearchSecureRestTestCase {
ProcessorType.TEXT_IMAGE_EMBEDDING,
"processor/PipelineForTextImageEmbeddingProcessorConfiguration.json",
ProcessorType.TEXT_EMBEDDING_WITH_NESTED_FIELDS_MAPPING,
"processor/PipelineConfigurationWithNestedFieldsMapping.json"
"processor/PipelineConfigurationWithNestedFieldsMapping.json",
ProcessorType.SPARSE_ENCODING_PRUNE,
"processor/SparseEncodingPipelineConfigurationWithPrune.json"
);
private static final Set<RestStatus> SUCCESS_STATUSES = Set.of(RestStatus.CREATED, RestStatus.OK);
protected static final String CONCURRENT_SEGMENT_SEARCH_ENABLED = "search.concurrent_segment_search.enabled";
Expand Down Expand Up @@ -1439,6 +1441,7 @@ protected enum ProcessorType {
TEXT_EMBEDDING,
TEXT_EMBEDDING_WITH_NESTED_FIELDS_MAPPING,
TEXT_IMAGE_EMBEDDING,
SPARSE_ENCODING
SPARSE_ENCODING,
SPARSE_ENCODING_PRUNE
}
}

0 comments on commit c5e5a29

Please sign in to comment.