Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some queries for runtime fields #58940

Closed
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
0cfdf33
Add runtime fields plugin under x-pack
javanna Jun 17, 2020
e474582
Hack together some script contexts
nik9000 Jun 17, 2020
76d8b94
Scripted fields: rework script contexts (#58342)
nik9000 Jun 18, 2020
818403b
Spotless and maybe tests?
nik9000 Jun 18, 2020
3669ec4
Fixup test case after merge
nik9000 Jun 26, 2020
3a390b5
Get runtime fields tests passing
nik9000 Jun 26, 2020
59b2a18
Add queries
nik9000 Jun 26, 2020
28944a4
Better way for queries
nik9000 Jun 29, 2020
7d43642
Fails
nik9000 Jun 29, 2020
89d6352
Shared iteration
nik9000 Jun 30, 2020
5b353ce
Long term query
nik9000 Jun 30, 2020
23af466
Common stuffs?
nik9000 Jun 30, 2020
acf64c8
Double queries
nik9000 Jun 30, 2020
7ee42c0
Rename
nik9000 Jun 30, 2020
216c02c
Range query
nik9000 Jun 30, 2020
09e6feb
Fuzzy
nik9000 Jun 30, 2020
4e66638
Wildcard
nik9000 Jun 30, 2020
6837172
Regexp query
nik9000 Jun 30, 2020
f5106b2
Exists and some sorting
nik9000 Jun 30, 2020
2e05521
Spotless
nik9000 Jun 30, 2020
bfe15f8
WIP
nik9000 Jun 30, 2020
7c18ddb
Fixup bool
nik9000 Jul 1, 2020
79d41c8
Remove some duplication
nik9000 Jul 2, 2020
e3850a7
Merge branch 'feature/runtime_fields' into runtime_field_string_term_…
nik9000 Jul 2, 2020
ab6834e
Javadoc
nik9000 Jul 2, 2020
4e83233
Javadoc
nik9000 Jul 2, 2020
810568b
Javadoc
nik9000 Jul 2, 2020
3b79704
Merge branch 'feature/runtime_fields' into runtime_field_string_term_…
nik9000 Jul 2, 2020
bc80a5e
Wire in doc values and term query
nik9000 Jul 2, 2020
93a031f
Integrate!
nik9000 Jul 2, 2020
3409621
Its over nine thousand!
nik9000 Jul 2, 2020
b5b02ed
In a bool
nik9000 Jul 2, 2020
e17fef8
itr
nik9000 Jul 6, 2020
edc571f
Drop shared iteration tests
nik9000 Jul 8, 2020
f90a0c7
Remove caching
nik9000 Jul 8, 2020
724aaba
Rename
nik9000 Jul 8, 2020
6645c09
plumb
nik9000 Jul 8, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unused import

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

//TODO this is a temporary interface only to avoid changing signature of MappedFieldType#fielddataBuilder
public interface SearchLookupAware {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ public class DocLookup {

private final MapperService mapperService;
private final Function<MappedFieldType, IndexFieldData<?>> fieldDataLookup;
private final SearchLookup searchLookup;

public DocLookup(MapperService mapperService, Function<MappedFieldType, IndexFieldData<?>> fieldDataLookup) {
public DocLookup(MapperService mapperService, Function<MappedFieldType, IndexFieldData<?>> fieldDataLookup, SearchLookup searchLookup) {
this.mapperService = mapperService;
this.fieldDataLookup = fieldDataLookup;
this.searchLookup = searchLookup;
}

public MapperService mapperService() {
Expand All @@ -44,6 +46,6 @@ public IndexFieldData<?> getForField(MappedFieldType fieldType) {
}

public LeafDocLookup getLeafDocLookup(LeafReaderContext context) {
return new LeafDocLookup(mapperService, fieldDataLookup, context);
return new LeafDocLookup(mapperService, fieldDataLookup, searchLookup, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
import org.apache.lucene.index.LeafReaderContext;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.LeafFieldData;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.SearchLookupAware;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;

Expand All @@ -41,14 +43,20 @@ public class LeafDocLookup implements Map<String, ScriptDocValues<?>> {
private final MapperService mapperService;
private final Function<MappedFieldType, IndexFieldData<?>> fieldDataLookup;

private final SearchLookup searchLookup;
private final LeafReaderContext reader;

private int docId = -1;

LeafDocLookup(MapperService mapperService, Function<MappedFieldType, IndexFieldData<?>> fieldDataLookup,
LeafReaderContext reader) {
LeafDocLookup(
MapperService mapperService,
Function<MappedFieldType, IndexFieldData<?>> fieldDataLookup,
SearchLookup searchLookup,
LeafReaderContext reader
) {
this.mapperService = mapperService;
this.fieldDataLookup = fieldDataLookup;
this.searchLookup = searchLookup;
this.reader = reader;
}

Expand All @@ -75,7 +83,12 @@ public ScriptDocValues<?> get(Object key) {
scriptValues = AccessController.doPrivileged(new PrivilegedAction<ScriptDocValues<?>>() {
@Override
public ScriptDocValues<?> run() {
return fieldDataLookup.apply(fieldType).load(reader).getScriptValues();
// TODO should this go through QueryShardContext?
IndexFieldData<?> ifd = fieldDataLookup.apply(fieldType);
if (ifd instanceof SearchLookupAware) {
((SearchLookupAware) ifd).setSearchLookup(searchLookup);
}
return ifd.load(reader).getScriptValues();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not foresee the need for these changes but I see how they are caused by me not changing fielddataBuilder and rather hacking query shard context. Just to double check: this is to support runtime fields that refer to other runtime fields, otherwise they have no search lookup set?

To keep this contained and have the hack in a single place, would it work to rather modify QueryShardContext#lookup to do the following? It may have weird consequences but in our branch with a big TODO to revert it it may be ok? Also let's mention in a comment specifically why this is needed?

public SearchLookup lookup() {
    if (lookup == null) {
        lookup = new SearchLookup(getMapperService(), this::getForField);
    }
    return lookup;
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that is what my TODO was about - maybe this should go through getForField. I can look into making that change in master. I think it is good regardless. And, if it has consequences that we didn't think of we may as well see them earlier rather than later...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if this makes sense on master and what difference it would make, but I think it does make sense in our feature branch to isolate the hack around augmenting the fielddata impl.

}
});
localCacheFieldData.put(fieldName, scriptValues);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public class SearchLookup {
final FieldsLookup fieldsLookup;

public SearchLookup(MapperService mapperService, Function<MappedFieldType, IndexFieldData<?>> fieldDataLookup) {
docMap = new DocLookup(mapperService, fieldDataLookup);
docMap = new DocLookup(mapperService, fieldDataLookup, this);
sourceLookup = new SourceLookup();
fieldsLookup = new FieldsLookup(mapperService);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public void setUp() throws Exception {

docLookup = new LeafDocLookup(mapperService,
ignored -> fieldData,
null);
null, null);
}

public void testBasicLookup() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

package org.elasticsearch.xpack.runtimefields;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;

import java.io.IOException;
import java.util.Objects;
import java.util.function.IntConsumer;

/**
* Abstract base for implementing doc values and queries against values
* calculated at runtime. The tricky thing about this is that we'd like to
* calculate the values as few times as possible in case the calculation is
* expensive, <strong>but</strong> some of the APIs that we rely on to
* calculate the values like {@link SortedNumericDocValues#advanceExact(int)}
* are "forwards only".
* <p>
* We solve this in the same way that big cities handle public transportation:
* with a bus! In our case, the bus is subclasses of {@link SharedValues}.
* Queries and doc values are implemented calling {@link #unstarted()} to get
* the {@linkplain SharedValues} that has yet to start iterating. That way
* many queries can share the same underlying {@linkplain SharedValues}
* instance, only calculating the values for a document once. If other code
* needs to iterate the values after the first iteration has started then
* it'll get a new {@linkplain SharedValues} from {@linkplain #unstarted},
* this "leaving on a different bus".
*
* @param <SV> the subtype of {@link SharedValues} needed by the subclass
*/
public abstract class AbstractRuntimeValues<SV extends AbstractRuntimeValues<SV>.SharedValues> {
private SV unstarted;

protected final SV unstarted() {
if (unstarted == null) {
unstarted = newSharedValues();
}
return unstarted;
}

protected abstract SV newSharedValues();

protected abstract class SharedValues {
protected int count;
private boolean sort;

private int lastDocBase = -1;
private IntConsumer lastLeafCursor;
private int docId = -1;
private int maxDoc;

protected final IntConsumer leafCursor(LeafReaderContext ctx) throws IOException {
if (lastDocBase != ctx.docBase) {
if (lastDocBase == -1) {
// Now that we're started future iterations can't share these values.
unstarted = null;
}
lastDocBase = ctx.docBase;
IntConsumer leafLoader = newLeafLoader(ctx);
docId = -1;
maxDoc = ctx.reader().maxDoc();
lastLeafCursor = new IntConsumer() {
@Override
public void accept(int targetDocId) {
if (docId == targetDocId) {
return;
}
docId = targetDocId;
count = 0;
leafLoader.accept(targetDocId);
if (sort) {
sort();
}
}
};
}
return lastLeafCursor;
}

protected final void alwaysSortResults() {
sort = true;
}

protected final int docId() {
return docId;
}

protected final int maxDoc() {
return maxDoc;
}

protected abstract IntConsumer newLeafLoader(LeafReaderContext ctx) throws IOException;

protected abstract void sort();

protected abstract class AbstractRuntimeQuery extends Query {
protected final String fieldName;

protected AbstractRuntimeQuery(String fieldName) {
this.fieldName = fieldName;
}

@Override
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false; // scripts aren't really cacheable at this point
}

@Override
public Scorer scorer(LeafReaderContext ctx) throws IOException {
IntConsumer leafCursor = leafCursor(ctx);
DocIdSetIterator approximation = DocIdSetIterator.all(ctx.reader().maxDoc());
TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
leafCursor.accept(approximation.docID());
return AbstractRuntimeQuery.this.matches();
}

@Override
public float matchCost() {
// TODO we don't have a good way of estimating the complexity of the script so we just go with 9000
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would remove the constant, what value does it add? I think that the important part is that a script needs to be run for each document, which is the cost approximation. Then one script can be heavier than another, but I wonder if that is negligible at this stage, unless we can calculate the approximate cost of a script.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the javadoc, I think it should actually just be some constant - matchCost says it is only for a single document.

If I just returned 1 here then I think Lucene'd reason about the query as though it were match_all, which is probably bad

return approximation().cost() * 9000f;
}
};
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase);
}
};
}

protected abstract boolean matches();

@Override
public final String toString(String field) {
if (fieldName.contentEquals(field)) {
return bareToString();
}
return fieldName + ":" + bareToString();
}

protected abstract String bareToString();

@Override
public int hashCode() {
return Objects.hash(fieldName);
}

@Override
public boolean equals(Object obj) {
if (obj == null || getClass() != obj.getClass()) {
return false;
}
@SuppressWarnings("unchecked")
AbstractRuntimeQuery other = (AbstractRuntimeQuery) obj;
return fieldName.equals(other.fieldName);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,24 @@ public abstract class AbstractScriptFieldScript {
private final LeafSearchLookup leafSearchLookup;

public AbstractScriptFieldScript(Map<String, Object> params, SearchLookup searchLookup, LeafReaderContext ctx) {
this.leafSearchLookup = searchLookup.getLeafSearchLookup(ctx);
leafSearchLookup = searchLookup.getLeafSearchLookup(ctx);
// TODO how do other scripts get stored fields exposed? Through asMap? I don't see any getters for them.
this.params = params;
}

/**
* Set the document to run the script against.
*/
public final void setDocument(int docId) {
this.leafSearchLookup.setDocument(docId);
public final void setDocId(int docId) {
leafSearchLookup.setDocument(docId);
onSetDocId(docId);
}

/**
* Optional hook for the script to take extra actions when moving to a document.
*/
protected void onSetDocId(int docId) {}

/**
* Expose the {@code params} of the script to the script itself.
*/
Expand Down
Loading