-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Some queries for runtime fields #58940
Changes from 32 commits
0cfdf33
e474582
76d8b94
818403b
3669ec4
3a390b5
59b2a18
28944a4
7d43642
89d6352
5b353ce
23af466
acf64c8
7ee42c0
216c02c
09e6feb
4e66638
6837172
f5106b2
2e05521
bfe15f8
7c18ddb
79d41c8
e3850a7
ab6834e
4e83233
810568b
3b79704
bc80a5e
93a031f
3409621
b5b02ed
e17fef8
edc571f
f90a0c7
724aaba
6645c09
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,9 @@ | |
import org.apache.lucene.index.LeafReaderContext; | ||
import org.elasticsearch.ExceptionsHelper; | ||
import org.elasticsearch.index.fielddata.IndexFieldData; | ||
import org.elasticsearch.index.fielddata.LeafFieldData; | ||
import org.elasticsearch.index.fielddata.ScriptDocValues; | ||
import org.elasticsearch.index.fielddata.SearchLookupAware; | ||
import org.elasticsearch.index.mapper.MappedFieldType; | ||
import org.elasticsearch.index.mapper.MapperService; | ||
|
||
|
@@ -41,14 +43,20 @@ public class LeafDocLookup implements Map<String, ScriptDocValues<?>> { | |
private final MapperService mapperService; | ||
private final Function<MappedFieldType, IndexFieldData<?>> fieldDataLookup; | ||
|
||
private final SearchLookup searchLookup; | ||
private final LeafReaderContext reader; | ||
|
||
private int docId = -1; | ||
|
||
LeafDocLookup(MapperService mapperService, Function<MappedFieldType, IndexFieldData<?>> fieldDataLookup, | ||
LeafReaderContext reader) { | ||
LeafDocLookup( | ||
MapperService mapperService, | ||
Function<MappedFieldType, IndexFieldData<?>> fieldDataLookup, | ||
SearchLookup searchLookup, | ||
LeafReaderContext reader | ||
) { | ||
this.mapperService = mapperService; | ||
this.fieldDataLookup = fieldDataLookup; | ||
this.searchLookup = searchLookup; | ||
this.reader = reader; | ||
} | ||
|
||
|
@@ -75,7 +83,12 @@ public ScriptDocValues<?> get(Object key) { | |
scriptValues = AccessController.doPrivileged(new PrivilegedAction<ScriptDocValues<?>>() { | ||
@Override | ||
public ScriptDocValues<?> run() { | ||
return fieldDataLookup.apply(fieldType).load(reader).getScriptValues(); | ||
// TODO should this go through QueryShardContext? | ||
IndexFieldData<?> ifd = fieldDataLookup.apply(fieldType); | ||
if (ifd instanceof SearchLookupAware) { | ||
((SearchLookupAware) ifd).setSearchLookup(searchLookup); | ||
} | ||
return ifd.load(reader).getScriptValues(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did not foresee the need for these changes but I see how they are caused by me not changing fielddataBuilder and rather hacking query shard context. Just to double check: this is to support runtime fields that refer to other runtime fields, otherwise they have no search lookup set? To keep this contained and have the hack in a single place, would it work to rather modify QueryShardContext#lookup to do the following? It may have weird consequences but in our branch with a big TODO to revert it it may be ok? Also let's mention in a comment specifically why this is needed?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that is what my TODO was about - maybe this should go through There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know if this makes sense on master and what difference it would make, but I think it does make sense in our feature branch to isolate the hack around augmenting the fielddata impl. |
||
} | ||
}); | ||
localCacheFieldData.put(fieldName, scriptValues); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
|
||
package org.elasticsearch.xpack.runtimefields; | ||
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.SortedNumericDocValues; | ||
import org.apache.lucene.search.ConstantScoreScorer; | ||
import org.apache.lucene.search.ConstantScoreWeight; | ||
import org.apache.lucene.search.DocIdSetIterator; | ||
import org.apache.lucene.search.IndexSearcher; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.ScoreMode; | ||
import org.apache.lucene.search.Scorer; | ||
import org.apache.lucene.search.TwoPhaseIterator; | ||
import org.apache.lucene.search.Weight; | ||
|
||
import java.io.IOException; | ||
import java.util.Objects; | ||
import java.util.function.IntConsumer; | ||
|
||
/** | ||
* Abstract base for implementing doc values and queries against values | ||
* calculated at runtime. The tricky thing about this is that we'd like to | ||
* calculate the values as few times as possible in case the calculation is | ||
* expensive, <strong>but</strong> some of the APIs that we rely on to | ||
* calculate the values like {@link SortedNumericDocValues#advanceExact(int)} | ||
* are "forwards only". | ||
* <p> | ||
* We solve this in the same way that big cities handle public transportation: | ||
* with a bus! In our case, the bus is subclasses of {@link SharedValues}. | ||
* Queries and doc values are implemented calling {@link #unstarted()} to get | ||
* the {@linkplain SharedValues} that has yet to start iterating. That way | ||
* many queries can share the same underlying {@linkplain SharedValues} | ||
* instance, only calculating the values for a document once. If other code | ||
* needs to iterate the values after the first iteration has started then | ||
* it'll get a new {@linkplain SharedValues} from {@linkplain #unstarted}, | ||
* this "leaving on a different bus". | ||
* | ||
* @param <SV> the subtype of {@link SharedValues} needed by the subclass | ||
*/ | ||
public abstract class AbstractRuntimeValues<SV extends AbstractRuntimeValues<SV>.SharedValues> { | ||
private SV unstarted; | ||
|
||
protected final SV unstarted() { | ||
if (unstarted == null) { | ||
unstarted = newSharedValues(); | ||
} | ||
return unstarted; | ||
} | ||
|
||
protected abstract SV newSharedValues(); | ||
|
||
protected abstract class SharedValues { | ||
protected int count; | ||
private boolean sort; | ||
|
||
private int lastDocBase = -1; | ||
private IntConsumer lastLeafCursor; | ||
private int docId = -1; | ||
private int maxDoc; | ||
|
||
protected final IntConsumer leafCursor(LeafReaderContext ctx) throws IOException { | ||
if (lastDocBase != ctx.docBase) { | ||
if (lastDocBase == -1) { | ||
// Now that we're started future iterations can't share these values. | ||
unstarted = null; | ||
} | ||
lastDocBase = ctx.docBase; | ||
IntConsumer leafLoader = newLeafLoader(ctx); | ||
docId = -1; | ||
maxDoc = ctx.reader().maxDoc(); | ||
lastLeafCursor = new IntConsumer() { | ||
@Override | ||
public void accept(int targetDocId) { | ||
if (docId == targetDocId) { | ||
return; | ||
} | ||
docId = targetDocId; | ||
count = 0; | ||
leafLoader.accept(targetDocId); | ||
if (sort) { | ||
sort(); | ||
} | ||
} | ||
}; | ||
} | ||
return lastLeafCursor; | ||
} | ||
|
||
protected final void alwaysSortResults() { | ||
sort = true; | ||
} | ||
|
||
protected final int docId() { | ||
return docId; | ||
} | ||
|
||
protected final int maxDoc() { | ||
return maxDoc; | ||
} | ||
|
||
protected abstract IntConsumer newLeafLoader(LeafReaderContext ctx) throws IOException; | ||
|
||
protected abstract void sort(); | ||
|
||
protected abstract class AbstractRuntimeQuery extends Query { | ||
protected final String fieldName; | ||
|
||
protected AbstractRuntimeQuery(String fieldName) { | ||
this.fieldName = fieldName; | ||
} | ||
|
||
@Override | ||
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { | ||
return new ConstantScoreWeight(this, boost) { | ||
@Override | ||
public boolean isCacheable(LeafReaderContext ctx) { | ||
return false; // scripts aren't really cacheable at this point | ||
} | ||
|
||
@Override | ||
public Scorer scorer(LeafReaderContext ctx) throws IOException { | ||
IntConsumer leafCursor = leafCursor(ctx); | ||
DocIdSetIterator approximation = DocIdSetIterator.all(ctx.reader().maxDoc()); | ||
TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) { | ||
@Override | ||
public boolean matches() throws IOException { | ||
leafCursor.accept(approximation.docID()); | ||
return AbstractRuntimeQuery.this.matches(); | ||
} | ||
|
||
@Override | ||
public float matchCost() { | ||
// TODO we don't have a good way of estimating the complexity of the script so we just go with 9000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would remove the constant, what value does it add? I think that the important part is that a script needs to be run for each document, which is the cost approximation. Then one script can be heavier than another, but I wonder if that is negligible at this stage, unless we can calculate the approximate cost of a script. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking at the javadoc, I think it should actually just be some constant - If I just returned |
||
return approximation().cost() * 9000f; | ||
} | ||
}; | ||
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase); | ||
} | ||
}; | ||
} | ||
|
||
protected abstract boolean matches(); | ||
|
||
@Override | ||
public final String toString(String field) { | ||
if (fieldName.contentEquals(field)) { | ||
return bareToString(); | ||
} | ||
return fieldName + ":" + bareToString(); | ||
} | ||
|
||
protected abstract String bareToString(); | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(fieldName); | ||
} | ||
|
||
@Override | ||
public boolean equals(Object obj) { | ||
if (obj == null || getClass() != obj.getClass()) { | ||
return false; | ||
} | ||
@SuppressWarnings("unchecked") | ||
AbstractRuntimeQuery other = (AbstractRuntimeQuery) obj; | ||
return fieldName.equals(other.fieldName); | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unused import
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍