-
Notifications
You must be signed in to change notification settings - Fork 72
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into BugTestOther
Signed-off-by: Yuye Zhu <yuyezhu@amazon.com>
- Loading branch information
Showing
54 changed files
with
3,543 additions
and
480 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
name: Run Additional Tests for Neural Search | ||
on: | ||
schedule: | ||
- cron: '0 0 * * *' # every night | ||
push: | ||
branches: | ||
- "*" | ||
- "feature/**" | ||
pull_request: | ||
branches: | ||
- "*" | ||
- "feature/**" | ||
|
||
jobs: | ||
Get-CI-Image-Tag: | ||
uses: opensearch-project/opensearch-build/.github/workflows/get-ci-image-tag.yml@main | ||
with: | ||
product: opensearch | ||
|
||
Check-neural-search-linux: | ||
needs: Get-CI-Image-Tag | ||
strategy: | ||
matrix: | ||
java: [11, 17, 21] | ||
os: [ubuntu-latest] | ||
|
||
name: Integ Tests Linux | ||
runs-on: ${{ matrix.os }} | ||
container: | ||
# using the same image which is used by opensearch-build team to build the OpenSearch Distribution | ||
# this image tag is subject to change as more dependencies and updates will arrive over time | ||
image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} | ||
# need to switch to root so that github actions can install runner binary on container without permission issues. | ||
options: --user root | ||
|
||
|
||
steps: | ||
- name: Checkout neural-search | ||
uses: actions/checkout@v1 | ||
|
||
- name: Setup Java ${{ matrix.java }} | ||
uses: actions/setup-java@v1 | ||
with: | ||
java-version: ${{ matrix.java }} | ||
|
||
- name: Run tests | ||
run: | | ||
chown -R 1000:1000 `pwd` | ||
su `id -un 1000` -c "./gradlew ':integTest' -Dtest_aggs=true --tests \"org.opensearch.neuralsearch.query.aggregation.*IT\"" | ||
Check-neural-search-windows: | ||
strategy: | ||
matrix: | ||
java: [11, 17, 21] | ||
os: [windows-latest] | ||
|
||
name: Integ Tests Windows | ||
runs-on: ${{ matrix.os }} | ||
|
||
steps: | ||
- name: Checkout neural-search | ||
uses: actions/checkout@v1 | ||
|
||
- name: Setup Java ${{ matrix.java }} | ||
uses: actions/setup-java@v1 | ||
with: | ||
java-version: ${{ matrix.java }} | ||
|
||
- name: Run tests | ||
run: | | ||
./gradlew ':integTest' -Dtest_aggs=true --tests "org.opensearch.neuralsearch.query.aggregation.*IT" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
75 changes: 75 additions & 0 deletions
75
...estart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextChunkingProcessorIT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
package org.opensearch.neuralsearch.bwc; | ||
|
||
import java.net.URL; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
|
||
import org.opensearch.index.query.MatchAllQueryBuilder; | ||
import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER; | ||
|
||
public class TextChunkingProcessorIT extends AbstractRestartUpgradeRestTestCase { | ||
|
||
private static final String PIPELINE_NAME = "pipeline-text-chunking"; | ||
private static final String INPUT_FIELD = "body"; | ||
private static final String OUTPUT_FIELD = "body_chunk"; | ||
private static final String TEST_INDEX_SETTING_PATH = "processor/ChunkingIndexSettings.json"; | ||
private static final String TEST_INGEST_TEXT = | ||
"This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch."; | ||
List<String> expectedPassages = List.of( | ||
"This is an example document to be chunked. The document ", | ||
"contains a single paragraph, two sentences and 24 tokens by ", | ||
"standard tokenizer in OpenSearch." | ||
); | ||
|
||
// Test rolling-upgrade text chunking processor | ||
// Create Text Chunking Processor, Ingestion Pipeline and add document | ||
// Validate process, pipeline and document count in restart-upgrade scenario | ||
public void testTextChunkingProcessor_E2EFlow() throws Exception { | ||
waitForClusterHealthGreen(NODES_BWC_CLUSTER); | ||
String indexName = getIndexNameForTest(); | ||
if (isRunningAgainstOldCluster()) { | ||
createPipelineForTextChunkingProcessor(PIPELINE_NAME); | ||
createChunkingIndex(indexName); | ||
addDocument(indexName, "0", INPUT_FIELD, TEST_INGEST_TEXT, null, null); | ||
validateTestIndex(indexName, OUTPUT_FIELD, 1, expectedPassages); | ||
} else { | ||
try { | ||
addDocument(indexName, "1", INPUT_FIELD, TEST_INGEST_TEXT, null, null); | ||
validateTestIndex(indexName, OUTPUT_FIELD, 2, expectedPassages); | ||
} finally { | ||
wipeOfTestResources(indexName, PIPELINE_NAME, null, null); | ||
} | ||
} | ||
} | ||
|
||
private void createChunkingIndex(String indexName) throws Exception { | ||
URL documentURLPath = classLoader.getResource(TEST_INDEX_SETTING_PATH); | ||
Objects.requireNonNull(documentURLPath); | ||
String indexSetting = Files.readString(Path.of(documentURLPath.toURI())); | ||
createIndexWithConfiguration(indexName, indexSetting, PIPELINE_NAME); | ||
} | ||
|
||
private void validateTestIndex(String indexName, String fieldName, int documentCount, Object expected) { | ||
int docCount = getDocCount(indexName); | ||
assertEquals(documentCount, docCount); | ||
MatchAllQueryBuilder query = new MatchAllQueryBuilder(); | ||
Map<String, Object> searchResults = search(indexName, query, 10); | ||
assertNotNull(searchResults); | ||
Map<String, Object> document = getFirstInnerHit(searchResults); | ||
assertNotNull(document); | ||
Object documentSource = document.get("_source"); | ||
assert (documentSource instanceof Map); | ||
@SuppressWarnings("unchecked") | ||
Map<String, Object> documentSourceMap = (Map<String, Object>) documentSource; | ||
assert (documentSourceMap).containsKey(fieldName); | ||
Object ingestOutputs = documentSourceMap.get(fieldName); | ||
assertEquals(expected, ingestOutputs); | ||
} | ||
} |
17 changes: 17 additions & 0 deletions
17
qa/restart-upgrade/src/test/resources/processor/ChunkingIndexSettings.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"settings":{ | ||
"default_pipeline": "%s", | ||
"number_of_shards": 3, | ||
"number_of_replicas": 1 | ||
}, | ||
"mappings": { | ||
"properties": { | ||
"body": { | ||
"type": "text" | ||
}, | ||
"body_chunk": { | ||
"type": "text" | ||
} | ||
} | ||
} | ||
} |
18 changes: 18 additions & 0 deletions
18
...t-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"description": "An example fixed token length chunker pipeline with standard tokenizer", | ||
"processors" : [ | ||
{ | ||
"text_chunking": { | ||
"field_map": { | ||
"body": "body_chunk" | ||
}, | ||
"algorithm": { | ||
"fixed_token_length": { | ||
"token_limit": 10, | ||
"tokenizer": "standard" | ||
} | ||
} | ||
} | ||
} | ||
] | ||
} |
Oops, something went wrong.