forked from opensearch-project/OpenSearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add SplitResponseProcessor to Search Pipelines (opensearch-project#14800
) (opensearch-project#14863) * Add SplitResponseProcessor for search pipelines * Register the split processor factory * Address code review comments * Avoid list copy by casting array --------- (cherry picked from commit 45c5f8d) Signed-off-by: Daniel Widdis <widdis@gmail.com> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
- Loading branch information
1 parent
7755055
commit 3b0ea65
Showing
5 changed files
with
380 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
162 changes: 162 additions & 0 deletions
162
...ne-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.search.pipeline.common; | ||
|
||
import org.opensearch.action.search.SearchRequest; | ||
import org.opensearch.action.search.SearchResponse; | ||
import org.opensearch.common.collect.Tuple; | ||
import org.opensearch.common.document.DocumentField; | ||
import org.opensearch.common.xcontent.XContentHelper; | ||
import org.opensearch.core.common.bytes.BytesReference; | ||
import org.opensearch.core.xcontent.MediaType; | ||
import org.opensearch.core.xcontent.XContentBuilder; | ||
import org.opensearch.ingest.ConfigurationUtils; | ||
import org.opensearch.search.SearchHit; | ||
import org.opensearch.search.pipeline.AbstractProcessor; | ||
import org.opensearch.search.pipeline.Processor; | ||
import org.opensearch.search.pipeline.SearchResponseProcessor; | ||
|
||
import java.util.Arrays; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
|
||
/** | ||
* Processor that sorts an array of items. | ||
* Throws exception is the specified field is not an array. | ||
*/ | ||
public class SplitResponseProcessor extends AbstractProcessor implements SearchResponseProcessor { | ||
/** Key to reference this processor type from a search pipeline. */ | ||
public static final String TYPE = "split"; | ||
/** Key defining the string field to be split. */ | ||
public static final String SPLIT_FIELD = "field"; | ||
/** Key defining the delimiter used to split the string. This can be a regular expression pattern. */ | ||
public static final String SEPARATOR = "separator"; | ||
/** Optional key for handling empty trailing fields. */ | ||
public static final String PRESERVE_TRAILING = "preserve_trailing"; | ||
/** Optional key to put the split values in a different field. */ | ||
public static final String TARGET_FIELD = "target_field"; | ||
|
||
private final String splitField; | ||
private final String separator; | ||
private final boolean preserveTrailing; | ||
private final String targetField; | ||
|
||
SplitResponseProcessor( | ||
String tag, | ||
String description, | ||
boolean ignoreFailure, | ||
String splitField, | ||
String separator, | ||
boolean preserveTrailing, | ||
String targetField | ||
) { | ||
super(tag, description, ignoreFailure); | ||
this.splitField = Objects.requireNonNull(splitField); | ||
this.separator = Objects.requireNonNull(separator); | ||
this.preserveTrailing = preserveTrailing; | ||
this.targetField = targetField == null ? splitField : targetField; | ||
} | ||
|
||
/** | ||
* Getter function for splitField | ||
* @return sortField | ||
*/ | ||
public String getSplitField() { | ||
return splitField; | ||
} | ||
|
||
/** | ||
* Getter function for separator | ||
* @return separator | ||
*/ | ||
public String getSeparator() { | ||
return separator; | ||
} | ||
|
||
/** | ||
* Getter function for preserveTrailing | ||
* @return preserveTrailing; | ||
*/ | ||
public boolean isPreserveTrailing() { | ||
return preserveTrailing; | ||
} | ||
|
||
/** | ||
* Getter function for targetField | ||
* @return targetField | ||
*/ | ||
public String getTargetField() { | ||
return targetField; | ||
} | ||
|
||
@Override | ||
public String getType() { | ||
return TYPE; | ||
} | ||
|
||
@Override | ||
public SearchResponse processResponse(SearchRequest request, SearchResponse response) throws Exception { | ||
SearchHit[] hits = response.getHits().getHits(); | ||
for (SearchHit hit : hits) { | ||
Map<String, DocumentField> fields = hit.getFields(); | ||
if (fields.containsKey(splitField)) { | ||
DocumentField docField = hit.getFields().get(splitField); | ||
if (docField == null) { | ||
throw new IllegalArgumentException("field [" + splitField + "] is null, cannot split."); | ||
} | ||
Object val = docField.getValue(); | ||
if (val == null || !String.class.isAssignableFrom(val.getClass())) { | ||
throw new IllegalArgumentException("field [" + splitField + "] is not a string, cannot split"); | ||
} | ||
Object[] strings = ((String) val).split(separator, preserveTrailing ? -1 : 0); | ||
hit.setDocumentField(targetField, new DocumentField(targetField, Arrays.asList(strings))); | ||
} | ||
if (hit.hasSource()) { | ||
BytesReference sourceRef = hit.getSourceRef(); | ||
Tuple<? extends MediaType, Map<String, Object>> typeAndSourceMap = XContentHelper.convertToMap( | ||
sourceRef, | ||
false, | ||
(MediaType) null | ||
); | ||
|
||
Map<String, Object> sourceAsMap = typeAndSourceMap.v2(); | ||
if (sourceAsMap.containsKey(splitField)) { | ||
Object val = sourceAsMap.get(splitField); | ||
if (val instanceof String) { | ||
Object[] strings = ((String) val).split(separator, preserveTrailing ? -1 : 0); | ||
sourceAsMap.put(targetField, Arrays.asList(strings)); | ||
} | ||
XContentBuilder builder = XContentBuilder.builder(typeAndSourceMap.v1().xContent()); | ||
builder.map(sourceAsMap); | ||
hit.sourceRef(BytesReference.bytes(builder)); | ||
} | ||
} | ||
} | ||
return response; | ||
} | ||
|
||
static class Factory implements Processor.Factory<SearchResponseProcessor> { | ||
|
||
@Override | ||
public SplitResponseProcessor create( | ||
Map<String, Processor.Factory<SearchResponseProcessor>> processorFactories, | ||
String tag, | ||
String description, | ||
boolean ignoreFailure, | ||
Map<String, Object> config, | ||
PipelineContext pipelineContext | ||
) { | ||
String splitField = ConfigurationUtils.readStringProperty(TYPE, tag, config, SPLIT_FIELD); | ||
String separator = ConfigurationUtils.readStringProperty(TYPE, tag, config, SEPARATOR); | ||
boolean preserveTrailing = ConfigurationUtils.readBooleanProperty(TYPE, tag, config, PRESERVE_TRAILING, false); | ||
String targetField = ConfigurationUtils.readStringProperty(TYPE, tag, config, TARGET_FIELD, splitField); | ||
return new SplitResponseProcessor(tag, description, ignoreFailure, splitField, separator, preserveTrailing, targetField); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.