Skip to content

Commit

Permalink
fix: Add namaa MrTydi reranking dataset (#1573)
Browse files Browse the repository at this point in the history
* Add dataset class and file requirements

* pass tests

* make lint changes

* adjust meta data and remove load_data

---------

Co-authored-by: Omar Elshehy <omarelshehy@Omars-MacBook-Pro.local>
  • Loading branch information
omarelshehy and Omar Elshehy authored Dec 11, 2024
1 parent 27f7d8c commit 7b9b3c9
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 0 deletions.
1 change: 1 addition & 0 deletions mteb/tasks/Reranking/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from .ara.NamaaMrTydiReranking import *
from .eng.AskUbuntuDupQuestions import *
from .eng.MindSmallReranking import *
from .eng.SciDocsReranking import *
Expand Down
39 changes: 39 additions & 0 deletions mteb/tasks/Reranking/ara/NamaaMrTydiReranking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking


class NamaaMrTydiReranking(AbsTaskReranking):
metadata = TaskMetadata(
name="NamaaMrTydiReranking",
description="Mr. TyDi is a multi-lingual benchmark dataset built on TyDi, covering eleven typologically diverse languages. It is designed for monolingual retrieval, specifically to evaluate ranking with learned dense representations. This dataset adapts the arabic test split for Reranking evaluation purposes by the addition of multiple (Hard) Negatives to each query and positive",
reference="https://huggingface.co/NAMAA-Space",
dataset={
"path": "NAMAA-Space/mteb-eval-mrtydi",
"revision": "502637220a7ad0ecc5c39ff5518d7508d2624af8",
},
type="Reranking",
category="s2s",
modalities=["text"],
eval_splits=["test"],
eval_langs=["ara-Arab"],
main_score="map",
date=("2023-11-01", "2024-05-15"),
domains=["Encyclopaedic", "Written"],
task_subtypes=[],
license="cc-by-sa-3.0",
annotations_creators="human-annotated",
dialect=[],
sample_creation="found",
bibtex_citation="""@article{muennighoff2022mteb,
doi = {10.48550/ARXIV.2210.07316},
url = {https://arxiv.org/abs/2210.07316},
author = {Muennighoff, Niklas and Tazi, Nouamane and Magne, Lo{\"\i}c and Reimers, Nils},
title = {MTEB: Massive Text Embedding Benchmark},
publisher = {arXiv},
journal={arXiv preprint arXiv:2210.07316},
year = {2022}
}""",
)
Empty file.

0 comments on commit 7b9b3c9

Please sign in to comment.