From cc7a10666b7c151e9bff66dc50d1413579dac22a Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Mon, 11 Nov 2024 09:17:46 +0100 Subject: [PATCH] fix: add logging for RetrievalEvaluator NaN values for similarity scores (#1398) Fixes #1389 --- .gitignore | 3 ++- mteb/evaluation/evaluators/RetrievalEvaluator.py | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 3219560494..868f0f1745 100644 --- a/.gitignore +++ b/.gitignore @@ -143,4 +143,5 @@ sb.ipynb tests/create_meta/model_card.md # removed results from mteb repo they are now available at: https://github.com/embeddings-benchmark/results -results/ \ No newline at end of file +results/ +uv.lock diff --git a/mteb/evaluation/evaluators/RetrievalEvaluator.py b/mteb/evaluation/evaluators/RetrievalEvaluator.py index 54e2e0acd8..4b2596c4d5 100644 --- a/mteb/evaluation/evaluators/RetrievalEvaluator.py +++ b/mteb/evaluation/evaluators/RetrievalEvaluator.py @@ -188,7 +188,12 @@ def search( cos_scores = self.score_functions[score_function]( query_embeddings, sub_corpus_embeddings ) - cos_scores[torch.isnan(cos_scores)] = -1 + is_nan = torch.isnan(cos_scores) + if is_nan.sum() > 0: + logger.warning( + f"Found {is_nan.sum()} NaN values in the similarity scores. Replacing NaN values with -1." + ) + cos_scores[is_nan] = -1 # Get top-k values cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(