Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Support unmapped fields in search 'fields' option #64651

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -295,3 +295,111 @@ setup:
- is_true: hits.hits.0._id
- match: { hits.hits.0.fields.count: [2] }
- is_false: hits.hits.0.fields.count_without_dv
---
Test unmapped field:
- skip:
version: ' - 7.99.99'
reason: support isn't yet backported
- do:
indices.create:
index: test
body:
mappings:
dynamic: false
properties:
f1:
type: keyword
f2:
type: object
enabled: false
f3:
type: object
- do:
index:
index: test
id: 1
refresh: true
body:
f1: some text
f2:
a: foo
b: bar
f3:
c: baz
f4: some other text
- do:
search:
index: test
body:
fields:
- f1
- f4
- match:
hits.hits.0.fields.f1:
- some text
- match:
hits.hits.0.fields.f4:
- some other text
- do:
search:
index: test
body:
fields:
- f*
- match:
hits.hits.0.fields.f1:
- some text
- match:
hits.hits.0.fields.f2\.a:
- foo
- match:
hits.hits.0.fields.f2\.b:
- bar
- match:
hits.hits.0.fields.f3\.c:
- baz
- match:
hits.hits.0.fields.f4:
- some other text
---
Test unmapped fields inside disabled objects:
- skip:
version: ' - 7.99.99'
reason: support isn't yet backported
- do:
indices.create:
index: test
body:
mappings:
properties:
f1:
type: object
enabled: false
- do:
index:
index: test
id: 1
refresh: true
body:
f1:
- some text
- a: b
-
- 1
- 2
- 3
- do:
search:
index: test
body:
fields: ["*"]
- match:
hits.hits.0.fields.f1:
- some text
-
- 1
- 2
- 3
- match:
hits.hits.0.fields.f1\.a:
- b
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ protected void setupInnerHitsContext(QueryShardContext queryShardContext,
innerHitsContext.docValuesContext(docValuesContext);
}
if (innerHitBuilder.getFetchFields() != null) {
FetchFieldsContext fieldsContext = new FetchFieldsContext(innerHitBuilder.getFetchFields());
FetchFieldsContext fieldsContext = new FetchFieldsContext(innerHitBuilder.getFetchFields(), false);
innerHitsContext.fetchFieldsContext(fieldsContext);
}
if (innerHitBuilder.getScriptFields() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -973,7 +973,8 @@ private void parseSource(DefaultSearchContext context, SearchSourceBuilder sourc
context.docValuesContext(docValuesContext);
}
if (source.fetchFields() != null) {
FetchFieldsContext fetchFieldsContext = new FetchFieldsContext(source.fetchFields());
// TODO make "includeUnmapped configurable?
FetchFieldsContext fetchFieldsContext = new FetchFieldsContext(source.fetchFields(), true);
context.fetchFieldsContext(fetchFieldsContext);
}
if (source.highlighter() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public Aggregator createInternal(SearchContext searchContext,
subSearchContext.docValuesContext(docValuesContext);
}
if (fetchFields != null) {
FetchFieldsContext fieldsContext = new FetchFieldsContext(fetchFields);
FetchFieldsContext fieldsContext = new FetchFieldsContext(fetchFields, true);
subSearchContext.fetchFieldsContext(fieldsContext);
}
for (ScriptFieldsContext.ScriptField field : scriptFields) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,18 @@
*/
public class FetchFieldsContext {
private final List<FieldAndFormat> fields;
private final boolean includeUnmapped;

public FetchFieldsContext(List<FieldAndFormat> fields) {
public FetchFieldsContext(List<FieldAndFormat> fields, boolean includeUnmapped) {
Copy link
Contributor

@jtibshirani jtibshirani Nov 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something to think about: if we end up making this configurable through a flag like include_unmapped, we could introduce a top-level parameter as we do here, or instead support the flag alongside each field pattern (so it'd be part of FieldAndFormat).

this.fields = fields;
this.includeUnmapped = includeUnmapped;
}

public List<FieldAndFormat> fields() {
return fields;
}

public boolean includeUnmapped() {
return includeUnmapped;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,13 @@ public FetchSubPhaseProcessor getProcessor(FetchContext fetchContext) {
"in the mappings for index [" + fetchContext.getIndexName() + "]");
}

FieldFetcher fieldFetcher = FieldFetcher.create(fetchContext.getQueryShardContext(), searchLookup, fetchFieldsContext.fields());
FieldFetcher fieldFetcher = FieldFetcher.create(
fetchContext.getQueryShardContext(),
searchLookup,
fetchFieldsContext.fields(),
fetchFieldsContext.includeUnmapped()
);

return new FetchSubPhaseProcessor() {
@Override
public void setNextReader(LeafReaderContext readerContext) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
package org.elasticsearch.search.fetch.subphase;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.QueryShardContext;
Expand All @@ -31,6 +33,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand All @@ -42,12 +45,18 @@
public class FieldFetcher {
public static FieldFetcher create(QueryShardContext context,
SearchLookup searchLookup,
Collection<FieldAndFormat> fieldAndFormats) {
Collection<FieldAndFormat> fieldAndFormats,
boolean includeUnmapped) {

List<FieldContext> fieldContexts = new ArrayList<>();
String[] originalPattern = new String[fieldAndFormats.size()];
Set<String> mappedToExclude = new HashSet<>();
int i = 0;

for (FieldAndFormat fieldAndFormat : fieldAndFormats) {
String fieldPattern = fieldAndFormat.field;
originalPattern[i] = fieldAndFormat.field;
i++;
String format = fieldAndFormat.format;

Collection<String> concreteFields = context.simpleMatchToIndexNames(fieldPattern);
Expand All @@ -57,17 +66,29 @@ public static FieldFetcher create(QueryShardContext context,
continue;
}
ValueFetcher valueFetcher = ft.valueFetcher(context, searchLookup, format);
mappedToExclude.add(field);
fieldContexts.add(new FieldContext(field, valueFetcher));
}
}

return new FieldFetcher(fieldContexts);
CharacterRunAutomaton pathAutomaton = new CharacterRunAutomaton(Regex.simpleMatchToAutomaton(originalPattern));
return new FieldFetcher(fieldContexts, includeUnmapped, pathAutomaton, mappedToExclude);
}

private final List<FieldContext> fieldContexts;

private FieldFetcher(List<FieldContext> fieldContexts) {
private final boolean includeUnmapped;
private final CharacterRunAutomaton pathAutomaton;
private final Set<String> mappedToExclude;

private FieldFetcher(
List<FieldContext> fieldContexts,
boolean includeUnmapped,
CharacterRunAutomaton pathAutomaton,
Set<String> mappedToExclude
) {
this.fieldContexts = fieldContexts;
this.includeUnmapped = includeUnmapped;
this.pathAutomaton = pathAutomaton;
this.mappedToExclude = mappedToExclude;
}

public Map<String, DocumentField> fetch(SourceLookup sourceLookup, Set<String> ignoredFields) throws IOException {
Expand All @@ -85,9 +106,70 @@ public Map<String, DocumentField> fetch(SourceLookup sourceLookup, Set<String> i
documentFields.put(field, new DocumentField(field, parsedValues));
}
}
if (includeUnmapped) {
collect(documentFields, sourceLookup.loadSourceIfNeeded(), "", 0);
}
return documentFields;
}

private void collect(Map<String, DocumentField> documentFields, Map<String, Object> source, String parentPath, int lastState) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This turned out to be a bit complex, I'm not sure my idea to do it all in one pass was a good one :) Maybe we could start by re-using the source filtering logic, optimizing later if we see a performance reason ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After looking at what the source filtering code does I actually find this solution less complicated. We are doing less work here than in the source filtering code where whole sub-trees of the source can be filtered out an the exclusion logic more complicated. These two methods that are pulling out the data from the source map while keeping track of where they are in the source tree via the inclusion pattern automaton are not recursive but not that long, and they are implementational details that we don't need to expose. On the other hand, using the source filtering code that was written for another purpose initially on the other hand might lead to problems when that code needs to be altered in the future. Having our "own" logic here doesn't seem to much overhead to me tbh.

for (String key : source.keySet()) {
Object value = source.get(key);
String currentPath = parentPath + key;
int currentState = step(this.pathAutomaton, key, lastState);
if (currentState == -1) {
// path doesn't match any fields pattern
continue;
}
if (value instanceof Map) {
// one step deeper into source tree
collect(documentFields, (Map<String, Object>) value, currentPath + ".", step(this.pathAutomaton, ".", currentState));
} else if (value instanceof List) {
// iterate through list values
List<Object> list = collectList(documentFields, (List<?>) value, currentPath, currentState);
if (list.isEmpty() == false) {
documentFields.put(currentPath, new DocumentField(currentPath, list));
}
} else {
// we have a leaf value
if (this.pathAutomaton.isAccept(currentState) && this.mappedToExclude.contains(currentPath) == false) {
if (value != null) {
DocumentField currentEntry = documentFields.get(currentPath);
if (currentEntry == null) {
List<Object> list = new ArrayList<>();
list.add(value);
documentFields.put(currentPath, new DocumentField(currentPath, list));
} else {
currentEntry.getValues().add(value);
}
}
}
}
}
}

private List<Object> collectList(Map<String, DocumentField> documentFields, Iterable<?> iterable, String parentPath, int lastState) {
List<Object> list = new ArrayList<>();
for (Object value : iterable) {
if (value instanceof Map) {
collect(documentFields, (Map<String, Object>) value, parentPath + ".", step(this.pathAutomaton, ".", lastState));
} else if (value instanceof List) {
// weird case, but can happen for objects with "enabled" : "false"
list.add(collectList(documentFields, (List<?>) value, parentPath, lastState));
} else if (this.pathAutomaton.isAccept(lastState)) {
list.add(value);
}
}
return list;
}

private static int step(CharacterRunAutomaton automaton, String key, int state) {
for (int i = 0; state != -1 && i < key.length(); ++i) {
state = automaton.step(state, key.charAt(i));
}
return state;
}

public void setNextReader(LeafReaderContext readerContext) {
for (FieldContext field : fieldContexts) {
field.valueFetcher.setNextReader(readerContext);
Expand Down
Loading