Skip to content

Commit

Permalink
Improve explanation in rescore (#30629)
Browse files Browse the repository at this point in the history
Currently in a rescore request if window_size is smaller than
the top N documents returned (N=size), explanation of scores could be incorrect
for documents that were a part of topN and not part of rescoring.
This PR corrects this by saving in RescoreContext docIDs of documents
for which rescoring was applied, and adding rescoring explanation
only for these docIDs.

Closes #28725
  • Loading branch information
mayya-sharipova committed May 17, 2018
1 parent ec98d15 commit af017d4
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 15 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
---
"Score should match explanation in rescore":
- skip:
version: " - 6.2.99"
reason: Explanation for rescoring was corrected after these versions
- do:
indices.create:
index: test_index
body:
settings:
index:
number_of_shards: 1
number_of_replicas: 0
- do:
bulk:
refresh: true
body:
- '{"index": {"_index": "test_index", "_type": "_doc", "_id": "1"}}'
- '{"f1": "1"}'
- '{"index": {"_index": "test_index", "_type": "_doc", "_id": "2"}}'
- '{"f1": "2"}'
- '{"index": {"_index": "test_index", "_type": "_doc", "_id": "3"}}'
- '{"f1": "3"}'

- do:
search:
index: test_index
body:
explain: true
query:
match_all: {}
rescore:
window_size: 2
query:
rescore_query:
match_all: {}
query_weight: 5
rescore_query_weight: 10

- match: { hits.hits.0._score: 15 }
- match: { hits.hits.0._explanation.value: 15 }

- match: { hits.hits.1._score: 15 }
- match: { hits.hits.1._explanation.value: 15 }

- match: { hits.hits.2._score: 5 }
- match: { hits.hits.2._explanation.value: 5 }
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import java.util.Arrays;
import java.util.Comparator;
import java.util.Set;
import java.util.Collections;
import static java.util.stream.Collectors.toSet;

public final class QueryRescorer implements Rescorer {

Expand Down Expand Up @@ -61,6 +63,11 @@ protected float combine(float firstPassScore, boolean secondPassMatches, float s
// First take top slice of incoming docs, to be rescored:
TopDocs topNFirstPass = topN(topDocs, rescoreContext.getWindowSize());

// Save doc IDs for which rescoring was applied to be used in score explanation
Set<Integer> topNDocIDs = Collections.unmodifiableSet(
Arrays.stream(topNFirstPass.scoreDocs).map(scoreDoc -> scoreDoc.doc).collect(toSet()));
rescoreContext.setRescoredDocs(topNDocIDs);

// Rescore them:
TopDocs rescored = rescorer.rescore(searcher, topNFirstPass, rescoreContext.getWindowSize());

Expand All @@ -71,16 +78,12 @@ protected float combine(float firstPassScore, boolean secondPassMatches, float s
@Override
public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreContext rescoreContext,
Explanation sourceExplanation) throws IOException {
QueryRescoreContext rescore = (QueryRescoreContext) rescoreContext;
if (sourceExplanation == null) {
// this should not happen but just in case
return Explanation.noMatch("nothing matched");
}
// TODO: this isn't right? I.e., we are incorrectly pretending all first pass hits were rescored? If the requested docID was
// beyond the top rescoreContext.window() in the first pass hits, we don't rescore it now?
Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId);
QueryRescoreContext rescore = (QueryRescoreContext) rescoreContext;
float primaryWeight = rescore.queryWeight();

Explanation prim;
if (sourceExplanation.isMatch()) {
prim = Explanation.match(
Expand All @@ -89,23 +92,24 @@ public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreCon
} else {
prim = Explanation.noMatch("First pass did not match", sourceExplanation);
}

// NOTE: we don't use Lucene's Rescorer.explain because we want to insert our own description with which ScoreMode was used. Maybe
// we should add QueryRescorer.explainCombine to Lucene?
if (rescoreExplain != null && rescoreExplain.isMatch()) {
float secondaryWeight = rescore.rescoreQueryWeight();
Explanation sec = Explanation.match(
if (rescoreContext.isRescored(topLevelDocId)){
Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId);
// NOTE: we don't use Lucene's Rescorer.explain because we want to insert our own description with which ScoreMode was used.
// Maybe we should add QueryRescorer.explainCombine to Lucene?
if (rescoreExplain != null && rescoreExplain.isMatch()) {
float secondaryWeight = rescore.rescoreQueryWeight();
Explanation sec = Explanation.match(
rescoreExplain.getValue() * secondaryWeight,
"product of:",
rescoreExplain, Explanation.match(secondaryWeight, "secondaryWeight"));
QueryRescoreMode scoreMode = rescore.scoreMode();
return Explanation.match(
QueryRescoreMode scoreMode = rescore.scoreMode();
return Explanation.match(
scoreMode.combine(prim.getValue(), sec.getValue()),
scoreMode + " of:",
prim, sec);
} else {
return prim;
}
}
return prim;
}

private static final Comparator<ScoreDoc> SCORE_DOC_COMPARATOR = new Comparator<ScoreDoc>() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.elasticsearch.search.rescore;

import java.util.Set;

/**
* Context available to the rescore while it is running. Rescore
* implementations should extend this with any additional resources that
Expand All @@ -27,6 +29,7 @@
public class RescoreContext {
private final int windowSize;
private final Rescorer rescorer;
private Set<Integer> rescoredDocs; //doc Ids for which rescoring was applied

/**
* Build the context.
Expand All @@ -50,4 +53,12 @@ public Rescorer rescorer() {
public int getWindowSize() {
return windowSize;
}

public void setRescoredDocs(Set<Integer> docIds) {
rescoredDocs = docIds;
}

public boolean isRescored(int docId) {
return rescoredDocs.contains(docId);
}
}

0 comments on commit af017d4

Please sign in to comment.