Skip to content

Commit

Permalink
tweak keyword field to use binary DV
Browse files Browse the repository at this point in the history
  • Loading branch information
rishabhmaurya committed Dec 13, 2024
1 parent 05513df commit 49fe52a
Showing 1 changed file with 33 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
Expand Down Expand Up @@ -61,6 +62,7 @@
import org.opensearch.index.analysis.NamedAnalyzer;
import org.opensearch.index.compositeindex.datacube.DimensionType;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.fielddata.plain.BinaryIndexFieldData;
import org.opensearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.index.similarity.SimilarityProvider;
Expand Down Expand Up @@ -167,6 +169,13 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
false
);

private final Parameter<Boolean> useBinaryDV = Parameter.boolParam(
"use_binary_doc_value",
true,
m -> toType(m).useBinaryDocValue,
false
);

private final Parameter<Map<String, String>> meta = Parameter.metaParam();
private final Parameter<Float> boost = Parameter.boostParam();

Expand Down Expand Up @@ -216,7 +225,8 @@ protected List<Parameter<?>> getParameters() {
normalizer,
splitQueriesOnWhitespace,
boost,
meta
meta,
useBinaryDV
);
}

Expand All @@ -238,7 +248,7 @@ protected KeywordFieldType buildFieldType(BuilderContext context, FieldType fiel
} else if (splitQueriesOnWhitespace.getValue()) {
searchAnalyzer = Lucene.WHITESPACE_ANALYZER;
}
return new KeywordFieldType(buildFullName(context), fieldType, normalizer, searchAnalyzer, this);
return new KeywordFieldType(buildFullName(context), fieldType, normalizer, searchAnalyzer, this, useBinaryDV.getValue());
}

@Override
Expand Down Expand Up @@ -274,8 +284,13 @@ public static class KeywordFieldType extends StringFieldType {

private final int ignoreAbove;
private final String nullValue;
private final boolean useBinaryDocValue;

public KeywordFieldType(String name, FieldType fieldType, NamedAnalyzer normalizer, NamedAnalyzer searchAnalyzer, Builder builder) {
this(name,fieldType,normalizer, searchAnalyzer, builder, false);
}

public KeywordFieldType(String name, FieldType fieldType, NamedAnalyzer normalizer, NamedAnalyzer searchAnalyzer, Builder builder, boolean useBinaryDocValue) {
super(
name,
fieldType.indexOptions() != IndexOptions.NONE,
Expand All @@ -289,13 +304,15 @@ public KeywordFieldType(String name, FieldType fieldType, NamedAnalyzer normaliz
setBoost(builder.boost.getValue());
this.ignoreAbove = builder.ignoreAbove.getValue();
this.nullValue = builder.nullValue.getValue();
this.useBinaryDocValue = useBinaryDocValue;
}

public KeywordFieldType(String name, boolean isSearchable, boolean hasDocValues, Map<String, String> meta) {
super(name, isSearchable, false, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
this.ignoreAbove = Integer.MAX_VALUE;
this.nullValue = null;
this.useBinaryDocValue = false;
}

public KeywordFieldType(String name) {
Expand All @@ -313,12 +330,14 @@ public KeywordFieldType(String name, FieldType fieldType) {
);
this.ignoreAbove = Integer.MAX_VALUE;
this.nullValue = null;
this.useBinaryDocValue = false;
}

public KeywordFieldType(String name, NamedAnalyzer analyzer) {
super(name, true, false, true, new TextSearchInfo(Defaults.FIELD_TYPE, null, analyzer, analyzer), Collections.emptyMap());
this.ignoreAbove = Integer.MAX_VALUE;
this.nullValue = null;
this.useBinaryDocValue = false;
}

@Override
Expand All @@ -333,7 +352,11 @@ NamedAnalyzer normalizer() {
@Override
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier<SearchLookup> searchLookup) {
failIfNoDocValues();
return new SortedSetOrdinalsIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES);
if (useBinaryDocValue) {
return new BinaryIndexFieldData.Builder(name() + ".binary", CoreValuesSourceType.BYTES);
} else {
return new SortedSetOrdinalsIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES);
}
}

@Override
Expand Down Expand Up @@ -661,6 +684,7 @@ public Query wildcardQuery(
private final SimilarityProvider similarity;
private final String normalizerName;
private final boolean splitQueriesOnWhitespace;
private final boolean useBinaryDocValue;

private final IndexAnalyzers indexAnalyzers;

Expand All @@ -684,7 +708,7 @@ protected KeywordFieldMapper(
this.similarity = builder.similarity.getValue();
this.normalizerName = builder.normalizer.getValue();
this.splitQueriesOnWhitespace = builder.splitQueriesOnWhitespace.getValue();

this.useBinaryDocValue = builder.useBinaryDV.getValue();
this.indexAnalyzers = builder.indexAnalyzers;
}

Expand Down Expand Up @@ -741,7 +765,11 @@ protected void parseCreateField(ParseContext context) throws IOException {
}

if (fieldType().hasDocValues()) {
context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue));
if (useBinaryDocValue) {
context.doc().add(new BinaryDocValuesField(fieldType().name() + ".binary", binaryValue));
} else {
context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue));
}
}
}

Expand Down

0 comments on commit 49fe52a

Please sign in to comment.