Improve explanation in rescore (#30629)

Currently in a rescore request if window_size is smaller than the top N documents returned (N=size), explanation of scores could be incorrect for documents that were a part of topN and not part of rescoring. This PR corrects this by saving in RescoreContext docIDs of documents for which rescoring was applied, and adding rescoring explanation only for these docIDs. Closes #28725
elastic · May 17, 2018 · af017d4 · af017d4
1 parent ec98d15
commit af017d4
Show file tree

Hide file tree

Showing 3 changed files with 77 additions and 15 deletions.
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/210_rescore_explain.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/210_rescore_explain.yml
@@ -0,0 +1,47 @@
+---
+"Score should match explanation in rescore":
+  - skip:
+      version: " - 6.2.99"
+      reason: Explanation for rescoring was corrected after these versions
+  - do:
+      indices.create:
+        index: test_index
+        body:
+          settings:
+            index:
+              number_of_shards: 1
+              number_of_replicas: 0
+  - do:
+      bulk:
+        refresh: true
+        body:
+          - '{"index": {"_index": "test_index", "_type": "_doc", "_id": "1"}}'
+          - '{"f1": "1"}'
+          - '{"index": {"_index": "test_index", "_type": "_doc", "_id": "2"}}'
+          - '{"f1": "2"}'
+          - '{"index": {"_index": "test_index", "_type": "_doc", "_id": "3"}}'
+          - '{"f1": "3"}'
+
+  - do:
+      search:
+        index: test_index
+        body:
+          explain: true
+          query:
+            match_all: {}
+          rescore:
+            window_size: 2
+            query:
+              rescore_query:
+                match_all: {}
+              query_weight: 5
+              rescore_query_weight: 10
+
+  - match: { hits.hits.0._score: 15 }
+  - match: { hits.hits.0._explanation.value: 15 }
+
+  - match: { hits.hits.1._score: 15 }
+  - match: { hits.hits.1._explanation.value: 15 }
+
+  - match: { hits.hits.2._score: 5 }
+  - match: { hits.hits.2._explanation.value: 5 }
diff --git a/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java b/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java
@@ -30,6 +30,8 @@
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.Set;
+import java.util.Collections;
+import static java.util.stream.Collectors.toSet;
 
 public final class QueryRescorer implements Rescorer {
 
@@ -61,6 +63,11 @@ protected float combine(float firstPassScore, boolean secondPassMatches, float s
         // First take top slice of incoming docs, to be rescored:
         TopDocs topNFirstPass = topN(topDocs, rescoreContext.getWindowSize());
 
+        // Save doc IDs for which rescoring was applied to be used in score explanation
+        Set<Integer> topNDocIDs = Collections.unmodifiableSet(
+            Arrays.stream(topNFirstPass.scoreDocs).map(scoreDoc -> scoreDoc.doc).collect(toSet()));
+        rescoreContext.setRescoredDocs(topNDocIDs);
+
         // Rescore them:
         TopDocs rescored = rescorer.rescore(searcher, topNFirstPass, rescoreContext.getWindowSize());
 
@@ -71,16 +78,12 @@ protected float combine(float firstPassScore, boolean secondPassMatches, float s
     @Override
     public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreContext rescoreContext,
                                Explanation sourceExplanation) throws IOException {
-        QueryRescoreContext rescore = (QueryRescoreContext) rescoreContext;
         if (sourceExplanation == null) {
             // this should not happen but just in case
             return Explanation.noMatch("nothing matched");
         }
-        // TODO: this isn't right?  I.e., we are incorrectly pretending all first pass hits were rescored?  If the requested docID was
-        // beyond the top rescoreContext.window() in the first pass hits, we don't rescore it now?
-        Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId);
+        QueryRescoreContext rescore = (QueryRescoreContext) rescoreContext;
         float primaryWeight = rescore.queryWeight();
-
         Explanation prim;
         if (sourceExplanation.isMatch()) {
             prim = Explanation.match(
@@ -89,23 +92,24 @@ public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreCon
         } else {
             prim = Explanation.noMatch("First pass did not match", sourceExplanation);
         }
-
-        // NOTE: we don't use Lucene's Rescorer.explain because we want to insert our own description with which ScoreMode was used.  Maybe
-        // we should add QueryRescorer.explainCombine to Lucene?
-        if (rescoreExplain != null && rescoreExplain.isMatch()) {
-            float secondaryWeight = rescore.rescoreQueryWeight();
-            Explanation sec = Explanation.match(
+        if (rescoreContext.isRescored(topLevelDocId)){
+            Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId);
+            // NOTE: we don't use Lucene's Rescorer.explain because we want to insert our own description with which ScoreMode was used.
+            //  Maybe we should add QueryRescorer.explainCombine to Lucene?
+            if (rescoreExplain != null && rescoreExplain.isMatch()) {
+                float secondaryWeight = rescore.rescoreQueryWeight();
+                Explanation sec = Explanation.match(
                     rescoreExplain.getValue() * secondaryWeight,
                     "product of:",
                     rescoreExplain, Explanation.match(secondaryWeight, "secondaryWeight"));
-            QueryRescoreMode scoreMode = rescore.scoreMode();
-            return Explanation.match(
+                QueryRescoreMode scoreMode = rescore.scoreMode();
+                return Explanation.match(
                     scoreMode.combine(prim.getValue(), sec.getValue()),
                     scoreMode + " of:",
                     prim, sec);
-        } else {
-            return prim;
+            }
         }
+        return prim;
     }
 
     private static final Comparator<ScoreDoc> SCORE_DOC_COMPARATOR = new Comparator<ScoreDoc>() {

diff --git a/server/src/main/java/org/elasticsearch/search/rescore/RescoreContext.java b/server/src/main/java/org/elasticsearch/search/rescore/RescoreContext.java
@@ -19,6 +19,8 @@
 
 package org.elasticsearch.search.rescore;
 
+import java.util.Set;
+
 /**
  * Context available to the rescore while it is running. Rescore
  * implementations should extend this with any additional resources that
@@ -27,6 +29,7 @@
 public class RescoreContext {
     private final int windowSize;
     private final Rescorer rescorer;
+    private Set<Integer> rescoredDocs; //doc Ids for which rescoring was applied
 
     /**
      * Build the context.
@@ -50,4 +53,12 @@ public Rescorer rescorer() {
     public int getWindowSize() {
         return windowSize;
     }
+
+    public void setRescoredDocs(Set<Integer> docIds) {
+        rescoredDocs = docIds;
+    }
+
+    public boolean isRescored(int docId) {
+        return rescoredDocs.contains(docId);
+    }
 }