From 2f268647892b9cf3f5880fbe9780ba073b309fc9 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 15 May 2018 17:23:10 -0400 Subject: [PATCH 1/2] Improve explanation in rescore Currently in a rescore request if window_size is smaller than the top N documents returned (N=size), explanation of scores could be incorrect for documents that were a part of topN and not part of rescoring. This PR corrects this, but saving in RescoreContext docIDs of documents for which rescoring was applied, and adding rescoring explanation only for these docIDs. Closes #28725 --- .../test/search/210_rescore_explain.yml | 39 +++++++++++++++++++ .../search/rescore/QueryRescorer.java | 11 ++++-- .../search/rescore/RescoreContext.java | 11 ++++++ 3 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/210_rescore_explain.yml diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/210_rescore_explain.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/210_rescore_explain.yml new file mode 100644 index 0000000000000..24920580c4552 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/210_rescore_explain.yml @@ -0,0 +1,39 @@ +--- +"Score should match explanation in rescore": + - skip: + version: " - 6.99.99" + reason: Explanation for rescoring was corrected after these versions + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test_index", "_type": "_doc", "_id": "1"}}' + - '{"f1": "1"}' + - '{"index": {"_index": "test_index", "_type": "_doc", "_id": "2"}}' + - '{"f1": "2"}' + - '{"index": {"_index": "test_index", "_type": "_doc", "_id": "3"}}' + - '{"f1": "3"}' + + - do: + search: + index: test_index + body: + explain: true + query: + match_all: {} + rescore: + window_size: 2 + query: + rescore_query: + match_all: {} + query_weight: 5 + rescore_query_weight: 10 + + - match: { hits.hits.0._score: 15 } + - match: { hits.hits.0._explanation.value: 15 } + + - match: { hits.hits.1._score: 15 } + - match: { hits.hits.1._explanation.value: 15 } + + - match: { hits.hits.2._score: 5 } + - match: { hits.hits.2._explanation.value: 5 } diff --git a/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java b/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java index d4cf05d542560..93c220dd9ecb5 100644 --- a/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java +++ b/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java @@ -30,6 +30,8 @@ import java.util.Arrays; import java.util.Comparator; import java.util.Set; +import java.util.Collections; +import static java.util.stream.Collectors.toSet; public final class QueryRescorer implements Rescorer { @@ -61,6 +63,11 @@ protected float combine(float firstPassScore, boolean secondPassMatches, float s // First take top slice of incoming docs, to be rescored: TopDocs topNFirstPass = topN(topDocs, rescoreContext.getWindowSize()); + // Save doc IDs for which rescoring was applied to be used in score explanation + Set topNDocIDs = Collections.unmodifiableSet( + Arrays.stream(topNFirstPass.scoreDocs).map(scoreDoc -> scoreDoc.doc).collect(toSet())); + rescoreContext.setRescoredDocs(topNDocIDs); + // Rescore them: TopDocs rescored = rescorer.rescore(searcher, topNFirstPass, rescoreContext.getWindowSize()); @@ -76,8 +83,6 @@ public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreCon // this should not happen but just in case return Explanation.noMatch("nothing matched"); } - // TODO: this isn't right? I.e., we are incorrectly pretending all first pass hits were rescored? If the requested docID was - // beyond the top rescoreContext.window() in the first pass hits, we don't rescore it now? Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId); float primaryWeight = rescore.queryWeight(); @@ -92,7 +97,7 @@ public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreCon // NOTE: we don't use Lucene's Rescorer.explain because we want to insert our own description with which ScoreMode was used. Maybe // we should add QueryRescorer.explainCombine to Lucene? - if (rescoreExplain != null && rescoreExplain.isMatch()) { + if (rescoreContext.isRescored(topLevelDocId) && rescoreExplain != null && rescoreExplain.isMatch()) { float secondaryWeight = rescore.rescoreQueryWeight(); Explanation sec = Explanation.match( rescoreExplain.getValue() * secondaryWeight, diff --git a/server/src/main/java/org/elasticsearch/search/rescore/RescoreContext.java b/server/src/main/java/org/elasticsearch/search/rescore/RescoreContext.java index 75ce807a67f47..d070b5abd3f01 100644 --- a/server/src/main/java/org/elasticsearch/search/rescore/RescoreContext.java +++ b/server/src/main/java/org/elasticsearch/search/rescore/RescoreContext.java @@ -19,6 +19,8 @@ package org.elasticsearch.search.rescore; +import java.util.Set; + /** * Context available to the rescore while it is running. Rescore * implementations should extend this with any additional resources that @@ -27,6 +29,7 @@ public class RescoreContext { private final int windowSize; private final Rescorer rescorer; + private Set recroredDocs; //doc Ids for which rescoring was applied /** * Build the context. @@ -50,4 +53,12 @@ public Rescorer rescorer() { public int getWindowSize() { return windowSize; } + + public void setRescoredDocs(Set docIds) { + recroredDocs = docIds; + } + + public boolean isRescored(int docId) { + return recroredDocs.contains(docId); + } } From 515fabbf563a8e5cb67de8dc132eaa1d55062c07 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 16 May 2018 10:05:21 -0400 Subject: [PATCH 2/2] Don't build rescore explanation if not needed --- .../search/rescore/QueryRescorer.java | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java b/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java index 93c220dd9ecb5..4a9567a32c06a 100644 --- a/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java +++ b/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java @@ -78,14 +78,12 @@ protected float combine(float firstPassScore, boolean secondPassMatches, float s @Override public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreContext rescoreContext, Explanation sourceExplanation) throws IOException { - QueryRescoreContext rescore = (QueryRescoreContext) rescoreContext; if (sourceExplanation == null) { // this should not happen but just in case return Explanation.noMatch("nothing matched"); } - Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId); + QueryRescoreContext rescore = (QueryRescoreContext) rescoreContext; float primaryWeight = rescore.queryWeight(); - Explanation prim; if (sourceExplanation.isMatch()) { prim = Explanation.match( @@ -94,23 +92,24 @@ public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreCon } else { prim = Explanation.noMatch("First pass did not match", sourceExplanation); } - - // NOTE: we don't use Lucene's Rescorer.explain because we want to insert our own description with which ScoreMode was used. Maybe - // we should add QueryRescorer.explainCombine to Lucene? - if (rescoreContext.isRescored(topLevelDocId) && rescoreExplain != null && rescoreExplain.isMatch()) { - float secondaryWeight = rescore.rescoreQueryWeight(); - Explanation sec = Explanation.match( + if (rescoreContext.isRescored(topLevelDocId)){ + Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId); + // NOTE: we don't use Lucene's Rescorer.explain because we want to insert our own description with which ScoreMode was used. + // Maybe we should add QueryRescorer.explainCombine to Lucene? + if (rescoreExplain != null && rescoreExplain.isMatch()) { + float secondaryWeight = rescore.rescoreQueryWeight(); + Explanation sec = Explanation.match( rescoreExplain.getValue() * secondaryWeight, "product of:", rescoreExplain, Explanation.match(secondaryWeight, "secondaryWeight")); - QueryRescoreMode scoreMode = rescore.scoreMode(); - return Explanation.match( + QueryRescoreMode scoreMode = rescore.scoreMode(); + return Explanation.match( scoreMode.combine(prim.getValue(), sec.getValue()), scoreMode + " of:", prim, sec); - } else { - return prim; + } } + return prim; } private static final Comparator SCORE_DOC_COMPARATOR = new Comparator() {