From a240ea099aac446702a3f7167fd0921f6eb4e259 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Mon, 11 Nov 2024 10:06:52 +0100 Subject: [PATCH] fix: Add missing benchmarks in benchmarks.py (#1431) Fixes #1423 --- mteb/benchmarks/benchmarks.py | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/mteb/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks.py index c5181d0ab..743a5bde1 100644 --- a/mteb/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks.py @@ -918,3 +918,40 @@ def load_results( reference=None, citation=None, ) + +LONG_EMBED = Benchmark( + name="LongEmbed", + tasks=get_tasks( + tasks=[ + "LEMBNarrativeQARetrieval", + "LEMBNeedleRetrieval", + "LEMBPasskeyRetrieval", + "LEMBQMSumRetrieval", + "LEMBSummScreenFDRetrieval", + "LEMBWikimQARetrieval", + ], + ), + description="The main benchmark for evaluating long document retrieval.", + reference="https://arxiv.org/abs/2404.12096v2", + citation="""@article{zhu2024longembed, + title={LongEmbed: Extending Embedding Models for Long Context Retrieval}, + author={Zhu, Dawei and Wang, Liang and Yang, Nan and Song, Yifan and Wu, Wenhao and Wei, Furu and Li, Sujian}, + journal={arXiv preprint arXiv:2404.12096}, + year={2024} +}""", +) + +BRIGHT = Benchmark( + name="BRIGHT", + tasks=get_tasks( + tasks=["BrightRetrieval"], + ), + description="A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval.", + reference="https://brightbenchmark.github.io/", + citation="""@article{su2024bright, + title={Bright: A realistic and challenging benchmark for reasoning-intensive retrieval}, + author={Su, Hongjin and Yen, Howard and Xia, Mengzhou and Shi, Weijia and Muennighoff, Niklas and Wang, Han-yu and Liu, Haisu and Shi, Quan and Siegel, Zachary S and Tang, Michael and others}, + journal={arXiv preprint arXiv:2407.12883}, + year={2024} +}""", +)