From 68bd8ac79b33e48942316b26f253db644b6763ad Mon Sep 17 00:00:00 2001 From: Daniel Buades Marcos Date: Sun, 8 Dec 2024 03:18:35 +0100 Subject: [PATCH] fix(publichealth-qa): ignore rows with `None` values in `question` or `answer` (#1565) --- mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py b/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py index c22d15afc..6f7d188b7 100644 --- a/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/PublicHealthQARetrieval.py @@ -43,6 +43,9 @@ def _load_publichealthqa_data( answer_ids = {answer: _id for _id, answer in enumerate(set(data["answer"]))} for row in data: + if row["question"] is None or row["answer"] is None: + # There are some questions and answers that are None in the original dataset, specifically in the Arabic subset. + continue question = row["question"] answer = row["answer"] query_id = f"Q{question_ids[question]}"