-
Notifications
You must be signed in to change notification settings - Fork 72
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FEATURE] support default model id in neural_sparse query (#614)
* feature: implement default model id for neural sparse Signed-off-by: zhichao-aws <zhichaog@amazon.com> * feature: implement default model id for neural sparse Signed-off-by: zhichao-aws <zhichaog@amazon.com> * add ut Signed-off-by: zhichao-aws <zhichaog@amazon.com> * add ut it Signed-off-by: zhichao-aws <zhichaog@amazon.com> * add changelog Signed-off-by: zhichao-aws <zhichaog@amazon.com> * nit Signed-off-by: zhichao-aws <zhichaog@amazon.com> * fix ingest pipeline in it Signed-off-by: zhichao-aws <zhichaog@amazon.com> * add it for bwc restart-upgrade Signed-off-by: zhichao-aws <zhichaog@amazon.com> * add it for bwc restart-upgrade Signed-off-by: zhichao-aws <zhichaog@amazon.com> * add it for bwc restart-upgrade Signed-off-by: zhichao-aws <zhichaog@amazon.com> * add it for bwc restart-upgrade Signed-off-by: zhichao-aws <zhichaog@amazon.com> * fix undeploy with retry Signed-off-by: zhichao-aws <zhichaog@amazon.com> * add it for bwc restart-upgrade Signed-off-by: zhichao-aws <zhichaog@amazon.com> * add it for bwc restart-upgrade Signed-off-by: zhichao-aws <zhichaog@amazon.com> * optimize it code structure Signed-off-by: zhichao-aws <zhichaog@amazon.com> * add it for bwc rolling-upgrade Signed-off-by: zhichao-aws <zhichaog@amazon.com> * tidy Signed-off-by: zhichao-aws <zhichaog@amazon.com> * update index mapping in it Signed-off-by: zhichao-aws <zhichaog@amazon.com> * nit Signed-off-by: zhichao-aws <zhichaog@amazon.com> * move version check to build script Signed-off-by: zhichao-aws <zhichaog@amazon.com> * resolve modelId Signed-off-by: zhichao-aws <zhichaog@amazon.com> * nit Signed-off-by: zhichao-aws <zhichaog@amazon.com> * update init model id Signed-off-by: zhichao-aws <zhichaog@amazon.com> * modify versions check logic in bwc test Signed-off-by: zhichao-aws <zhichaog@amazon.com> * add comments Signed-off-by: zhichao-aws <zhichaog@amazon.com> * nit Signed-off-by: zhichao-aws <zhichaog@amazon.com> * updates for comments Signed-off-by: zhichao-aws <zhichaog@amazon.com> --------- Signed-off-by: zhichao-aws <zhichaog@amazon.com>
- Loading branch information
1 parent
d2d4cc6
commit e41fba7
Showing
15 changed files
with
531 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
113 changes: 113 additions & 0 deletions
113
...upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralQueryEnricherProcessorIT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
package org.opensearch.neuralsearch.bwc; | ||
|
||
import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER; | ||
import static org.opensearch.neuralsearch.TestUtils.SPARSE_ENCODING_PROCESSOR; | ||
import static org.opensearch.neuralsearch.TestUtils.TEXT_EMBEDDING_PROCESSOR; | ||
|
||
import org.opensearch.common.settings.Settings; | ||
import org.opensearch.neuralsearch.TestUtils; | ||
import org.opensearch.neuralsearch.query.NeuralQueryBuilder; | ||
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder; | ||
|
||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.util.List; | ||
|
||
public class NeuralQueryEnricherProcessorIT extends AbstractRestartUpgradeRestTestCase { | ||
// add prefix to avoid conflicts with other IT class, since we don't wipe resources after first round | ||
private static final String SPARSE_INGEST_PIPELINE_NAME = "nqep-nlp-ingest-pipeline-sparse"; | ||
private static final String DENSE_INGEST_PIPELINE_NAME = "nqep-nlp-ingest-pipeline-dense"; | ||
private static final String SPARSE_SEARCH_PIPELINE_NAME = "nqep-nlp-search-pipeline-sparse"; | ||
private static final String DENSE_SEARCH_PIPELINE_NAME = "nqep-nlp-search-pipeline-dense"; | ||
private static final String TEST_ENCODING_FIELD = "passage_embedding"; | ||
private static final String TEST_TEXT_FIELD = "passage_text"; | ||
private static final String TEXT_1 = "Hello world a b"; | ||
|
||
// Test restart-upgrade neural_query_enricher in restart-upgrade scenario | ||
public void testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow() throws Exception { | ||
waitForClusterHealthGreen(NODES_BWC_CLUSTER); | ||
NeuralSparseQueryBuilder sparseEncodingQueryBuilderWithoutModelId = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD) | ||
.queryText(TEXT_1); | ||
// will set the model_id after we obtain the id | ||
NeuralSparseQueryBuilder sparseEncodingQueryBuilderWithModelId = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD) | ||
.queryText(TEXT_1); | ||
|
||
if (isRunningAgainstOldCluster()) { | ||
String modelId = uploadSparseEncodingModel(); | ||
loadModel(modelId); | ||
sparseEncodingQueryBuilderWithModelId.modelId(modelId); | ||
createPipelineForSparseEncodingProcessor(modelId, SPARSE_INGEST_PIPELINE_NAME); | ||
createIndexWithConfiguration( | ||
getIndexNameForTest(), | ||
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())), | ||
SPARSE_INGEST_PIPELINE_NAME | ||
); | ||
|
||
addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1)); | ||
|
||
createSearchRequestProcessor(modelId, SPARSE_SEARCH_PIPELINE_NAME); | ||
updateIndexSettings( | ||
getIndexNameForTest(), | ||
Settings.builder().put("index.search.default_pipeline", SPARSE_SEARCH_PIPELINE_NAME) | ||
); | ||
} else { | ||
String modelId = null; | ||
try { | ||
modelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR); | ||
loadModel(modelId); | ||
sparseEncodingQueryBuilderWithModelId.modelId(modelId); | ||
assertEquals( | ||
search(getIndexNameForTest(), sparseEncodingQueryBuilderWithoutModelId, 1).get("hits"), | ||
search(getIndexNameForTest(), sparseEncodingQueryBuilderWithModelId, 1).get("hits") | ||
); | ||
} finally { | ||
wipeOfTestResources(getIndexNameForTest(), SPARSE_INGEST_PIPELINE_NAME, modelId, SPARSE_SEARCH_PIPELINE_NAME); | ||
} | ||
} | ||
} | ||
|
||
public void testNeuralQueryEnricherProcessor_NeuralSearch_E2EFlow() throws Exception { | ||
waitForClusterHealthGreen(NODES_BWC_CLUSTER); | ||
NeuralQueryBuilder neuralQueryBuilderWithoutModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1); | ||
NeuralQueryBuilder neuralQueryBuilderWithModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1); | ||
|
||
if (isRunningAgainstOldCluster()) { | ||
String modelId = uploadTextEmbeddingModel(); | ||
loadModel(modelId); | ||
neuralQueryBuilderWithModelId.modelId(modelId); | ||
createPipelineProcessor(modelId, DENSE_INGEST_PIPELINE_NAME); | ||
createIndexWithConfiguration( | ||
getIndexNameForTest(), | ||
Files.readString(Path.of(classLoader.getResource("processor/IndexMappingMultipleShard.json").toURI())), | ||
DENSE_INGEST_PIPELINE_NAME | ||
); | ||
|
||
addDocument(getIndexNameForTest(), "0", TEST_TEXT_FIELD, TEXT_1, null, null); | ||
|
||
createSearchRequestProcessor(modelId, DENSE_SEARCH_PIPELINE_NAME); | ||
updateIndexSettings(getIndexNameForTest(), Settings.builder().put("index.search.default_pipeline", DENSE_SEARCH_PIPELINE_NAME)); | ||
assertEquals( | ||
search(getIndexNameForTest(), neuralQueryBuilderWithoutModelId, 1).get("hits"), | ||
search(getIndexNameForTest(), neuralQueryBuilderWithModelId, 1).get("hits") | ||
); | ||
} else { | ||
String modelId = null; | ||
try { | ||
modelId = TestUtils.getModelId(getIngestionPipeline(DENSE_INGEST_PIPELINE_NAME), TEXT_EMBEDDING_PROCESSOR); | ||
loadModel(modelId); | ||
neuralQueryBuilderWithModelId.modelId(modelId); | ||
|
||
assertEquals( | ||
search(getIndexNameForTest(), neuralQueryBuilderWithoutModelId, 1).get("hits"), | ||
search(getIndexNameForTest(), neuralQueryBuilderWithModelId, 1).get("hits") | ||
); | ||
} finally { | ||
wipeOfTestResources(getIndexNameForTest(), DENSE_INGEST_PIPELINE_NAME, modelId, DENSE_SEARCH_PIPELINE_NAME); | ||
} | ||
} | ||
} | ||
} |
11 changes: 11 additions & 0 deletions
11
qa/restart-upgrade/src/test/resources/processor/SearchRequestPipelineConfiguration.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"request_processors": [ | ||
{ | ||
"neural_query_enricher": { | ||
"tag": "tag1", | ||
"description": "This processor is going to restrict to publicly visible documents", | ||
"default_model_id": "%s" | ||
} | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.