Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix and optimize "Add XContentFieldFilter (#81970)" #83152

Merged
merged 13 commits into from
Feb 2, 2022
Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.common.xcontent;

import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.core.CheckedFunction;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParserConfiguration;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;

/**
* A filter that filter fields away from source
*/
public interface XContentFieldFilter {
/**
* filter source in {@link BytesReference} format and in {@link XContentType} content type
* note that xContentType may be null in some case, we should guess xContentType from sourceBytes in such cases
*/
BytesReference apply(BytesReference sourceBytes, @Nullable XContentType xContentType) throws IOException;

/**
* Construct {@link XContentFieldFilter} using given includes and excludes
*
* @param includes fields to keep, wildcard supported
* @param excludes fields to remove, wildcard supported
* @return filter that filter {@link org.elasticsearch.xcontent.XContent} with given includes and excludes
*/
static XContentFieldFilter newFieldFilter(String[] includes, String[] excludes) {
final CheckedFunction<XContentType, BytesReference, IOException> emptyValueSupplier = xContentType -> {
BytesStreamOutput bStream = new BytesStreamOutput();
XContentBuilder builder = XContentFactory.contentBuilder(xContentType, bStream).map(Collections.emptyMap());
builder.close();
return bStream.bytes();
};
// Use the old map-based filtering mechanism if there are wildcards in the excludes.
// TODO: Remove this if block once: https://github.com/elastic/elasticsearch/pull/80160 is merged
if ((CollectionUtils.isEmpty(excludes) == false) && Arrays.stream(excludes).filter(field -> field.contains("*")).count() > 0) {
mushao999 marked this conversation as resolved.
Show resolved Hide resolved
return (originalSource, contentType) -> {
if (originalSource == null || originalSource.length() <= 0) {
if (contentType == null) {
throw new IllegalStateException("originalSource and contentType can not be null at the same time");
}
return emptyValueSupplier.apply(contentType);
}
Function<Map<String, ?>, Map<String, Object>> mapFilter = XContentMapValues.filter(includes, excludes);
Tuple<XContentType, Map<String, Object>> mapTuple = XContentHelper.convertToMap(originalSource, true, contentType);
Map<String, Object> filteredSource = mapFilter.apply(mapTuple.v2());
BytesStreamOutput bStream = new BytesStreamOutput();
XContentType actualContentType = mapTuple.v1();
XContentBuilder builder = XContentFactory.contentBuilder(actualContentType, bStream).map(filteredSource);
builder.close();
return bStream.bytes();
};
} else {
final XContentParserConfiguration parserConfig = XContentParserConfiguration.EMPTY.withFiltering(
Set.of(includes),
Set.of(excludes),
true
);
return (originalSource, contentType) -> {
if (originalSource == null || originalSource.length() <= 0) {
if (contentType == null) {
throw new IllegalStateException("originalSource and contentType can not be null at the same time");
}
return emptyValueSupplier.apply(contentType);
}
if (contentType == null) {
contentType = XContentHelper.xContentTypeMayCompressed(originalSource);
}
BytesStreamOutput streamOutput = new BytesStreamOutput(Math.min(1024, originalSource.length()));
XContentBuilder builder = new XContentBuilder(contentType.xContent(), streamOutput);
XContentParser parser = contentType.xContent().createParser(parserConfig, originalSource.streamInput());
if ((parser.currentToken() == null) && (parser.nextToken() == null)) {
return emptyValueSupplier.apply(contentType);
}
builder.copyCurrentStructure(parser);
return BytesReference.bytes(builder);
};
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,32 @@ public static BytesReference toXContent(ToXContent toXContent, XContentType xCon
}
}

/**
* Guesses the content type based on the provided bytes which may be compressed.
*
* @deprecated the content type should not be guessed except for few cases where we effectively don't know the content type.
* The REST layer should move to reading the Content-Type header instead. There are other places where auto-detection may be needed.
* This method is deprecated to prevent usages of it from spreading further without specific reasons.
*/
@Deprecated
public static XContentType xContentTypeMayCompressed(BytesReference bytes) {
Compressor compressor = CompressorFactory.compressor(bytes);
if (compressor != null) {
try {
InputStream compressedStreamInput = compressor.threadLocalInputStream(bytes.streamInput());
if (compressedStreamInput.markSupported() == false) {
compressedStreamInput = new BufferedInputStream(compressedStreamInput);
}
return XContentFactory.xContentType(compressedStreamInput);
} catch (IOException e) {
assert false : "Should not happen, we're just reading bytes from memory";
throw new UncheckedIOException(e);
}
} else {
return XContentHelper.xContentType(bytes);
}
}

/**
* Guesses the content type based on the provided bytes.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,8 @@
import org.elasticsearch.common.metrics.CounterMetric;
import org.elasticsearch.common.metrics.MeanMetric;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.common.xcontent.XContentFieldFilter;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.engine.Engine;
Expand All @@ -33,8 +31,6 @@
import org.elasticsearch.index.shard.AbstractIndexShardComponent;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.util.HashMap;
Expand Down Expand Up @@ -253,15 +249,11 @@ private GetResult innerGetLoadFromStoredFields(
if (fetchSourceContext.fetchSource() == false) {
source = null;
} else if (fetchSourceContext.includes().length > 0 || fetchSourceContext.excludes().length > 0) {
Map<String, Object> sourceAsMap;
// TODO: The source might be parsed and available in the sourceLookup but that one uses unordered maps so different.
// Do we care?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nik9000 @romseygeek is this TODO still valid here? It seems like the code around it changed quite a bit.

Copy link
Contributor Author

@mushao999 mushao999 Apr 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SourceLookup in ShardGetSevice has been removed in #20158, so this todo is invalid now.
I open #85921 to remove another similar todo in this file, please help to reivew it.@javanna

Tuple<XContentType, Map<String, Object>> typeMapTuple = XContentHelper.convertToMap(source, true);
XContentType sourceContentType = typeMapTuple.v1();
sourceAsMap = typeMapTuple.v2();
sourceAsMap = XContentMapValues.filter(sourceAsMap, fetchSourceContext.includes(), fetchSourceContext.excludes());
try {
source = BytesReference.bytes(XContentFactory.contentBuilder(sourceContentType).map(sourceAsMap));
source = XContentFieldFilter.newFieldFilter(fetchSourceContext.includes(), fetchSourceContext.excludes())
.apply(source, null);
} catch (IOException e) {
throw new ElasticsearchException("Failed to get id [" + id + "] with includes/excludes set", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,24 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.common.xcontent.XContentFieldFilter;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;

public class SourceFieldMapper extends MetadataFieldMapper {

public static final String NAME = "_source";
public static final String RECOVERY_SOURCE_NAME = "_recovery_source";

public static final String CONTENT_TYPE = "_source";
private final Function<Map<String, ?>, Map<String, Object>> filter;
private final XContentFieldFilter filter;

private static final SourceFieldMapper DEFAULT = new SourceFieldMapper(Defaults.ENABLED, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY);

Expand Down Expand Up @@ -145,7 +137,9 @@ private SourceFieldMapper(boolean enabled, String[] includes, String[] excludes)
this.includes = includes;
this.excludes = excludes;
final boolean filtered = CollectionUtils.isEmpty(includes) == false || CollectionUtils.isEmpty(excludes) == false;
this.filter = enabled && filtered ? XContentMapValues.filter(includes, excludes) : null;
this.filter = enabled && filtered
? XContentFieldFilter.newFieldFilter(includes, excludes)
: (sourceBytes, contentType) -> sourceBytes;
this.complete = enabled && CollectionUtils.isEmpty(includes) && CollectionUtils.isEmpty(excludes);
}

Expand Down Expand Up @@ -180,18 +174,7 @@ public void preParse(DocumentParserContext context) throws IOException {
public BytesReference applyFilters(@Nullable BytesReference originalSource, @Nullable XContentType contentType) throws IOException {
if (enabled && originalSource != null) {
// Percolate and tv APIs may not set the source and that is ok, because these APIs will not index any data
if (filter != null) {
// we don't update the context source if we filter, we want to keep it as is...
Tuple<XContentType, Map<String, Object>> mapTuple = XContentHelper.convertToMap(originalSource, true, contentType);
Map<String, Object> filteredSource = filter.apply(mapTuple.v2());
BytesStreamOutput bStream = new BytesStreamOutput();
XContentType actualContentType = mapTuple.v1();
XContentBuilder builder = XContentFactory.contentBuilder(actualContentType, bStream).map(filteredSource);
builder.close();
return bStream.bytes();
} else {
return originalSource;
}
return filter.apply(originalSource, contentType);
} else {
return null;
}
Expand Down
Loading