From 748033ebd094896b2d40a47b0e88ae13bb7fbef8 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sun, 22 Dec 2024 23:18:53 +0100 Subject: [PATCH] fix: GermanDPR Dataset Causes Cross-Encoder Failure Due to Unexpected dict (#1621) Fixes #1609 --- mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py b/mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py index 5290ae6aa..73c2f5383 100644 --- a/mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py +++ b/mteb/tasks/Retrieval/deu/GermanDPRRetrieval.py @@ -83,6 +83,7 @@ def load_data(self, **kwargs): ) corpus.update(neg_docs) relevant_docs[q_id] = {k: 1 for k in pos_docs} + corpus = {doc["id"]: doc.get("title", "") + " " + doc["text"] for doc in corpus} self.queries = {self._EVAL_SPLIT: queries} self.corpus = {self._EVAL_SPLIT: corpus} self.relevant_docs = {self._EVAL_SPLIT: relevant_docs}