From cd4a012271463b89db7a8ec9ca298a975805988d Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sun, 24 Mar 2024 13:35:38 +0100 Subject: [PATCH] fix: Added sizes to the metadata (#276) * restructing the readme * added mmteb * removed unec. method * Added docstring to metadata * Updated outdated examples * formatting documents * fix: Updated form to be parsed correctly * fix: Added sizes to the metadata this allow for automatic metadata generations * Updated based on feedback * Apply suggestions from code review Co-authored-by: Niklas Muennighoff * updated based on feedback * Added suggestion from review * added correction based on review * reformatted empty fields to None --------- Co-authored-by: Niklas Muennighoff --- docs/adding_a_dataset.md | 2 ++ mteb/abstasks/TaskMetadata.py | 5 +++++ .../BitextMining/da/BornholmskBitextMining.py | 2 ++ .../multilingual/BUCCBitextMining.py | 2 ++ .../multilingual/DiaBLaBitextMining.py | 2 ++ .../multilingual/FloresBitextMining.py | 2 ++ .../multilingual/NorwegianCourtsBitextMining.py | 2 ++ .../multilingual/TatoebaBitextMining.py | 2 ++ .../da/AngryTweetsClassification.py | 2 ++ .../Classification/da/DKHateClassification.py | 2 ++ .../Classification/da/DalajClassification.py | 2 ++ .../da/DanishPoliticalCommentsClassification.py | 2 ++ .../da/DdiscoCohesionClassification.py | 2 ++ .../da/LccSentimentClassification.py | 2 ++ .../en/AmazonPolarityClassification.py | 2 ++ .../Classification/en/Banking77Classification.py | 2 ++ .../Classification/en/EmotionClassification.py | 2 ++ .../Classification/en/ImdbClassification.py | 2 ++ .../en/ToxicConversationsClassification.py | 2 ++ .../en/TweetSentimentExtractionClassification.py | 2 ++ .../AmazonCounterfactualClassification.py | 2 ++ .../multilingual/AmazonReviewsClassification.py | 2 ++ .../multilingual/MTOPDomainClassification.py | 2 ++ .../multilingual/MTOPIntentClassification.py | 2 ++ .../multilingual/MasakhaNEWSClassification.py | 2 ++ .../multilingual/MassiveIntentClassification.py | 2 ++ .../MassiveScenarioClassification.py | 2 ++ .../multilingual/NordicLangClassification.py | 2 ++ .../multilingual/ScalaClassification.py | 8 ++++++++ .../Classification/nb/NoRecClassification.py | 2 ++ .../nb/NorwegianParliamentClassification.py | 2 ++ .../Classification/pl/PolishClassification.py | 10 ++++++++++ .../Classification/sv/SweRecClassification.py | 2 ++ .../Classification/zh/CMTEBClassification.py | 12 ++++++++++++ mteb/tasks/Clustering/de/BlurbsClusteringP2P.py | 2 ++ mteb/tasks/Clustering/de/BlurbsClusteringS2S.py | 2 ++ .../tasks/Clustering/de/TenKGnadClusteringP2P.py | 2 ++ .../tasks/Clustering/de/TenKGnadClusteringS2S.py | 2 ++ mteb/tasks/Clustering/en/ArxivClusteringP2P.py | 2 ++ mteb/tasks/Clustering/en/ArxivClusteringS2S.py | 2 ++ mteb/tasks/Clustering/en/BigPatentClustering.py | 2 ++ mteb/tasks/Clustering/en/BiorxivClusteringP2P.py | 2 ++ mteb/tasks/Clustering/en/BiorxivClusteringS2S.py | 2 ++ mteb/tasks/Clustering/en/MedrxivClusteringP2P.py | 2 ++ mteb/tasks/Clustering/en/MedrxivClusteringS2S.py | 2 ++ mteb/tasks/Clustering/en/RedditClustering.py | 2 ++ mteb/tasks/Clustering/en/RedditClusteringP2P.py | 2 ++ .../Clustering/en/StackExchangeClustering.py | 2 ++ .../Clustering/en/StackExchangeClusteringP2P.py | 2 ++ .../Clustering/en/TwentyNewsgroupsClustering.py | 2 ++ mteb/tasks/Clustering/en/WikiCitiesClustering.py | 2 ++ mteb/tasks/Clustering/es/FloresClusteringS2S.py | 2 ++ .../Clustering/es/SpanishNewsClusteringP2P.py | 2 ++ .../tasks/Clustering/fr/AlloProfClusteringP2P.py | 2 ++ .../tasks/Clustering/fr/AlloProfClusteringS2S.py | 2 ++ mteb/tasks/Clustering/fr/HALClusteringS2S.py | 2 ++ mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py | 2 ++ mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py | 2 ++ .../multilingual/MasakhaNEWSClusteringP2P.py | 2 ++ .../multilingual/MasakhaNEWSClusteringS2S.py | 2 ++ mteb/tasks/Clustering/pl/PolishClustering.py | 2 ++ mteb/tasks/Clustering/zh/CMTEBClustering.py | 8 ++++++++ .../en/SprintDuplicateQuestionsPC.py | 2 ++ .../en/TwitterSemEval2015PC.py | 2 ++ .../PairClassification/en/TwitterURLCorpusPC.py | 2 ++ .../multilingual/OpusparcusPC.py | 2 ++ .../PairClassification/multilingual/PawsX.py | 2 ++ mteb/tasks/PairClassification/pl/PolishPC.py | 8 ++++++++ .../zh/CMTEBPairClassification.py | 4 ++++ mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py | 2 ++ mteb/tasks/Reranking/en/MindSmallReranking.py | 2 ++ mteb/tasks/Reranking/en/SciDocsReranking.py | 2 ++ .../Reranking/en/StackOverflowDupQuestions.py | 2 ++ mteb/tasks/Reranking/fr/AlloprofReranking.py | 2 ++ mteb/tasks/Reranking/fr/SyntecReranking.py | 2 ++ .../Reranking/multilingual/MIRACLReranking.py | 2 ++ mteb/tasks/Reranking/zh/CMTEBReranking.py | 8 ++++++++ mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py | 2 ++ mteb/tasks/Retrieval/de/GermanDPRRetrieval.py | 2 ++ mteb/tasks/Retrieval/de/GermanQuADRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/ArguAnaRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackAndroidRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackEnglishRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackGamingRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackGisRetrieval.py | 2 ++ .../en/CQADupstackMathematicaRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackPhysicsRetrieval.py | 2 ++ .../en/CQADupstackProgrammersRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackStatsRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackTexRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackUnixRetrieval.py | 2 ++ .../en/CQADupstackWebmastersRetrieval.py | 2 ++ .../en/CQADupstackWordpressRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/DBPediaRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/FEVERRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/FiQA2018Retrieval.py | 2 ++ mteb/tasks/Retrieval/en/HagridRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/HotpotQARetrieval.py | 2 ++ mteb/tasks/Retrieval/en/MSMARCORetrieval.py | 2 ++ mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py | 2 ++ mteb/tasks/Retrieval/en/NFCorpusRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/NQRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/NarrativeQARetrieval.py | 2 ++ mteb/tasks/Retrieval/en/QuoraRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/SciFactRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/Touche2020Retrieval.py | 2 ++ .../Retrieval/es/SpanishPassageRetrievalS2P.py | 2 ++ .../Retrieval/es/SpanishPassageRetrievalS2S.py | 2 ++ mteb/tasks/Retrieval/fr/AlloprofRetrieval.py | 2 ++ mteb/tasks/Retrieval/fr/BSARDRetrieval.py | 2 ++ mteb/tasks/Retrieval/fr/SyntecRetrieval.py | 2 ++ mteb/tasks/Retrieval/ko/KoMiracl.py | 2 ++ mteb/tasks/Retrieval/ko/KoMrtydi.py | 2 ++ mteb/tasks/Retrieval/ko/KoStrategyQA.py | 2 ++ .../Retrieval/multilingual/MIRACLRetrieval.py | 2 ++ .../Retrieval/multilingual/MintakaRetrieval.py | 2 ++ .../multilingual/MultiLongDocRetrieval.py | 2 ++ .../Retrieval/multilingual/XMarketRetrieval.py | 2 ++ .../Retrieval/multilingual/XPQARetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/NQPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/zh/CMTEBRetrieval.py | 16 ++++++++++++++++ mteb/tasks/STS/de/GermanSTSBenchmarkSTS.py | 2 ++ mteb/tasks/STS/en/BiossesSTS.py | 2 ++ mteb/tasks/STS/en/STS12STS.py | 2 ++ mteb/tasks/STS/en/STS13STS.py | 2 ++ mteb/tasks/STS/en/STS14STS.py | 2 ++ mteb/tasks/STS/en/STS15STS.py | 2 ++ mteb/tasks/STS/en/STS16STS.py | 2 ++ mteb/tasks/STS/en/STSBenchmarkSTS.py | 2 ++ mteb/tasks/STS/en/SickrSTS.py | 2 ++ mteb/tasks/STS/es/STSES.py | 2 ++ mteb/tasks/STS/fr/SickFrSTS.py | 2 ++ .../STS/multilingual/STS17CrosslingualSTS.py | 2 ++ .../STS/multilingual/STS22CrosslingualSTS.py | 2 ++ .../multilingual/STSBenchmarkMultilingualSTS.py | 2 ++ mteb/tasks/STS/pl/PolishSTS.py | 4 ++++ mteb/tasks/STS/zh/CMTEBSTS.py | 14 ++++++++++++++ .../Summarization/en/SummEvalSummarization.py | 2 ++ .../Summarization/fr/SummEvalFrSummarization.py | 6 ++++-- 152 files changed, 381 insertions(+), 2 deletions(-) diff --git a/docs/adding_a_dataset.md b/docs/adding_a_dataset.md index 8086d971cc..67ee681037 100644 --- a/docs/adding_a_dataset.md +++ b/docs/adding_a_dataset.md @@ -45,6 +45,8 @@ class SciDocsReranking(AbsTaskReranking): dialect=None, text_creation="found", bibtex_citation= ... # removed for brevity + n_samples={"test": 19599}, + avg_character_length={"test": 69.0}, ) # testing the task with a model: diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 5ff4412bdb..1babbe978c 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -117,6 +117,8 @@ class TaskMetadata(BaseModel): text_creation: The method of text creation. Includes "found", "created", "machine-translated", "machine-translated and verified", and "machine-translated and localized". bibtex_citation: The BibTeX citation for the dataset. + n_samples: The number of samples in the dataset. This should only be for the splits evaluated on. + avg_character_length: The average character length of the samples in the dataset. This should only be for the splits evaluated on. """ hf_hub_name: str @@ -144,3 +146,6 @@ class TaskMetadata(BaseModel): text_creation: TEXT_CREATION_METHOD | None bibtex_citation: str | None + + n_samples: dict[str, int] | None + avg_character_length: dict[str, float] | None diff --git a/mteb/tasks/BitextMining/da/BornholmskBitextMining.py b/mteb/tasks/BitextMining/da/BornholmskBitextMining.py index da483e3891..e702f499cf 100644 --- a/mteb/tasks/BitextMining/da/BornholmskBitextMining.py +++ b/mteb/tasks/BitextMining/da/BornholmskBitextMining.py @@ -28,6 +28,8 @@ class BornholmBitextMining(AbsTaskBitextMining): dialect=None, text_creation=None, bibtex_citation=None, + avg_character_length={"test": 89.7}, + n_samples={"test": 500}, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py b/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py index 11c1ea9952..f2c10a1d37 100644 --- a/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py @@ -29,4 +29,6 @@ class BUCCBitextMining(AbsTaskBitextMining, CrosslingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 641684}, + avg_character_length={"test": 101.3}, ) diff --git a/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py b/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py index d9d90196ae..73d0e013ed 100644 --- a/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py @@ -29,6 +29,8 @@ class DiaBLaBitextMining(AbsTaskBitextMining, CrosslingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py b/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py index a03e3220e5..0e42ddce2c 100644 --- a/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py @@ -254,6 +254,8 @@ class FloresBitextMining(AbsTaskBitextMining, CrosslingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"dev": 997, "devtest": 1012}, + avg_character_length={}, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py b/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py index 01acd7f900..06c52e5a35 100644 --- a/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py @@ -28,6 +28,8 @@ class NorwegianCourtsBitextMining(AbsTaskBitextMining): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 2050}, + avg_character_length={"test": 1884.0}, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py index 8983f442c9..c57672476f 100644 --- a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py @@ -142,4 +142,6 @@ class TatoebaBitextMining(AbsTaskBitextMining, CrosslingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 2000}, + avg_character_length={"test": 39.4}, ) diff --git a/mteb/tasks/Classification/da/AngryTweetsClassification.py b/mteb/tasks/Classification/da/AngryTweetsClassification.py index cba265f003..3e876500e2 100644 --- a/mteb/tasks/Classification/da/AngryTweetsClassification.py +++ b/mteb/tasks/Classification/da/AngryTweetsClassification.py @@ -26,6 +26,8 @@ class AngryTweetsClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1050}, + avg_character_length={"test": 156.1}, ) @property diff --git a/mteb/tasks/Classification/da/DKHateClassification.py b/mteb/tasks/Classification/da/DKHateClassification.py index 85582c4e07..aa9bdaeaa7 100644 --- a/mteb/tasks/Classification/da/DKHateClassification.py +++ b/mteb/tasks/Classification/da/DKHateClassification.py @@ -28,6 +28,8 @@ class DKHateClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 329}, + avg_character_length={"test": 104.0}, ) @property diff --git a/mteb/tasks/Classification/da/DalajClassification.py b/mteb/tasks/Classification/da/DalajClassification.py index 20785d18b6..28e899cf42 100644 --- a/mteb/tasks/Classification/da/DalajClassification.py +++ b/mteb/tasks/Classification/da/DalajClassification.py @@ -29,6 +29,8 @@ class DalajClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 444}, + avg_character_length={"test": 243.8}, ) @property diff --git a/mteb/tasks/Classification/da/DanishPoliticalCommentsClassification.py b/mteb/tasks/Classification/da/DanishPoliticalCommentsClassification.py index 6dfe9ff582..fc7cb03921 100644 --- a/mteb/tasks/Classification/da/DanishPoliticalCommentsClassification.py +++ b/mteb/tasks/Classification/da/DanishPoliticalCommentsClassification.py @@ -28,6 +28,8 @@ class DanishPoliticalCommentsClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"train": 9010}, + avg_character_length={"train": 69.9}, ) @property diff --git a/mteb/tasks/Classification/da/DdiscoCohesionClassification.py b/mteb/tasks/Classification/da/DdiscoCohesionClassification.py index bfe5e303f5..6c1843868c 100644 --- a/mteb/tasks/Classification/da/DdiscoCohesionClassification.py +++ b/mteb/tasks/Classification/da/DdiscoCohesionClassification.py @@ -57,6 +57,8 @@ class DdiscoCohesionClassification(AbsTaskClassification): abstract = "To date, there has been no resource for studying discourse coherence on real-world Danish texts. Discourse coherence has mostly been approached with the assumption that incoherent texts can be represented by coherent texts in which sentences have been shuffled. However, incoherent real-world texts rarely resemble that. We thus present DDisCo, a dataset including text from the Danish Wikipedia and Reddit annotated for discourse coherence. We choose to annotate real-world texts instead of relying on artificially incoherent text for training and testing models. Then, we evaluate the performance of several methods, including neural networks, on the dataset.", } """, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Classification/da/LccSentimentClassification.py b/mteb/tasks/Classification/da/LccSentimentClassification.py index e1c000a446..9693f3561d 100644 --- a/mteb/tasks/Classification/da/LccSentimentClassification.py +++ b/mteb/tasks/Classification/da/LccSentimentClassification.py @@ -26,6 +26,8 @@ class LccSentimentClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 150}, + avg_character_length={"test": 118.7}, ) @property diff --git a/mteb/tasks/Classification/en/AmazonPolarityClassification.py b/mteb/tasks/Classification/en/AmazonPolarityClassification.py index 18334d3d95..4404d9a2ef 100644 --- a/mteb/tasks/Classification/en/AmazonPolarityClassification.py +++ b/mteb/tasks/Classification/en/AmazonPolarityClassification.py @@ -27,4 +27,6 @@ class AmazonPolarityClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 400000}, + avg_character_length={"test": 431.4}, ) diff --git a/mteb/tasks/Classification/en/Banking77Classification.py b/mteb/tasks/Classification/en/Banking77Classification.py index 935dc47a3c..11f4b34b71 100644 --- a/mteb/tasks/Classification/en/Banking77Classification.py +++ b/mteb/tasks/Classification/en/Banking77Classification.py @@ -27,4 +27,6 @@ class Banking77Classification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 3080}, + avg_character_length={"test": 54.2}, ) diff --git a/mteb/tasks/Classification/en/EmotionClassification.py b/mteb/tasks/Classification/en/EmotionClassification.py index 1b3379b7d6..b8e0a016d7 100644 --- a/mteb/tasks/Classification/en/EmotionClassification.py +++ b/mteb/tasks/Classification/en/EmotionClassification.py @@ -27,6 +27,8 @@ class EmotionClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 2000, "test": 2000}, + avg_character_length={"validation": 95.3, "test": 95.6}, ) @property diff --git a/mteb/tasks/Classification/en/ImdbClassification.py b/mteb/tasks/Classification/en/ImdbClassification.py index dfcae3d7a2..89fe76c256 100644 --- a/mteb/tasks/Classification/en/ImdbClassification.py +++ b/mteb/tasks/Classification/en/ImdbClassification.py @@ -27,4 +27,6 @@ class ImdbClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 25000}, + avg_character_length={"test": 1293.8}, ) diff --git a/mteb/tasks/Classification/en/ToxicConversationsClassification.py b/mteb/tasks/Classification/en/ToxicConversationsClassification.py index 864c770495..f9a2ed3bf0 100644 --- a/mteb/tasks/Classification/en/ToxicConversationsClassification.py +++ b/mteb/tasks/Classification/en/ToxicConversationsClassification.py @@ -27,6 +27,8 @@ class ToxicConversationsClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 50000}, + avg_character_length={"test": 296.6}, ) @property diff --git a/mteb/tasks/Classification/en/TweetSentimentExtractionClassification.py b/mteb/tasks/Classification/en/TweetSentimentExtractionClassification.py index e8288dd0d6..7dde91e1ac 100644 --- a/mteb/tasks/Classification/en/TweetSentimentExtractionClassification.py +++ b/mteb/tasks/Classification/en/TweetSentimentExtractionClassification.py @@ -27,6 +27,8 @@ class TweetSentimentExtractionClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 3534}, + avg_character_length={"test": 67.8}, ) @property diff --git a/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py b/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py index 5e6f1b8eab..2796a3a048 100644 --- a/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py +++ b/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py @@ -31,6 +31,8 @@ class AmazonCounterfactualClassification(MultilingualTask, AbsTaskClassification dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 335, "test": 670}, + avg_character_length={"validation": 109.2, "test": 106.1}, ) @property diff --git a/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py b/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py index 3655db992b..78380a3ec0 100644 --- a/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py +++ b/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py @@ -29,4 +29,6 @@ class AmazonReviewsClassification(MultilingualTask, AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 30000, "test": 30000}, + avg_character_length={"validation": 159.2, "test": 160.4}, ) diff --git a/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py b/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py index f535c6ec62..b67da6b4d5 100644 --- a/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py +++ b/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py @@ -29,4 +29,6 @@ class MTOPDomainClassification(MultilingualTask, AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 2235, "test": 4386}, + avg_character_length={"validation": 36.5, "test": 36.8}, ) diff --git a/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py b/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py index 0443ce8933..1dda37b2e4 100644 --- a/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py +++ b/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py @@ -29,4 +29,6 @@ class MTOPIntentClassification(MultilingualTask, AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 2235, "test": 4386}, + avg_character_length={"validation": 36.5, "test": 36.8}, ) diff --git a/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py b/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py index 93114197d5..16c26b8d6a 100644 --- a/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py +++ b/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py @@ -46,4 +46,6 @@ class MasakhaNEWSClassification(AbsTaskClassification, MultilingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 422}, + avg_character_length={"test": 5116.6}, ) diff --git a/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py b/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py index ba9fbafe63..b5736ff128 100644 --- a/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py +++ b/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py @@ -81,4 +81,6 @@ class MassiveIntentClassification(MultilingualTask, AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 2033, "test": 2974}, + avg_character_length={"validation": 34.8, "test": 34.6}, ) diff --git a/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py b/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py index 48113970e3..4202369fe8 100644 --- a/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py +++ b/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py @@ -81,4 +81,6 @@ class MassiveScenarioClassification(MultilingualTask, AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 2033, "test": 2974}, + avg_character_length={"validation": 34.8, "test": 34.6}, ) diff --git a/mteb/tasks/Classification/multilingual/NordicLangClassification.py b/mteb/tasks/Classification/multilingual/NordicLangClassification.py index b0b8cdd200..b8a48ba32b 100644 --- a/mteb/tasks/Classification/multilingual/NordicLangClassification.py +++ b/mteb/tasks/Classification/multilingual/NordicLangClassification.py @@ -28,6 +28,8 @@ class NordicLangClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 3000}, + avg_character_length={"test": 78.2}, ) @property diff --git a/mteb/tasks/Classification/multilingual/ScalaClassification.py b/mteb/tasks/Classification/multilingual/ScalaClassification.py index 80b817eaaa..333c65d37a 100644 --- a/mteb/tasks/Classification/multilingual/ScalaClassification.py +++ b/mteb/tasks/Classification/multilingual/ScalaClassification.py @@ -28,6 +28,8 @@ class ScalaDaClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1024}, + avg_character_length={"test": 109.4}, ) @property @@ -82,6 +84,8 @@ class ScalaNbClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1024}, + avg_character_length={"test": 98.4}, ) @property @@ -136,6 +140,8 @@ class ScalaNnClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1024}, + avg_character_length={"test": 104.8}, ) @property @@ -190,6 +196,8 @@ class ScalaSvClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1024}, + avg_character_length={"test": 98.3}, ) @property diff --git a/mteb/tasks/Classification/nb/NoRecClassification.py b/mteb/tasks/Classification/nb/NoRecClassification.py index abe2db8729..2bace59a52 100644 --- a/mteb/tasks/Classification/nb/NoRecClassification.py +++ b/mteb/tasks/Classification/nb/NoRecClassification.py @@ -26,4 +26,6 @@ class NoRecClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 2050}, + avg_character_length={"test": 82}, ) diff --git a/mteb/tasks/Classification/nb/NorwegianParliamentClassification.py b/mteb/tasks/Classification/nb/NorwegianParliamentClassification.py index 82befe0cac..a08da70ac2 100644 --- a/mteb/tasks/Classification/nb/NorwegianParliamentClassification.py +++ b/mteb/tasks/Classification/nb/NorwegianParliamentClassification.py @@ -26,4 +26,6 @@ class NorwegianParliamentClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1200, "validation": 1200}, + avg_character_length={"test": 1884.0, "validation": 1911.0}, ) diff --git a/mteb/tasks/Classification/pl/PolishClassification.py b/mteb/tasks/Classification/pl/PolishClassification.py index 30ef6e3478..221e0483ff 100644 --- a/mteb/tasks/Classification/pl/PolishClassification.py +++ b/mteb/tasks/Classification/pl/PolishClassification.py @@ -27,6 +27,8 @@ class CbdClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1000}, + avg_character_length={"test": 93.2}, ) @@ -53,6 +55,8 @@ class PolEmo2InClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -80,6 +84,8 @@ class PolEmo2OutClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 722}, + avg_character_length={"test": 756.2}, ) @@ -105,6 +111,8 @@ class AllegroReviewsClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1006}, + avg_character_length={"test": 477.2}, ) @@ -130,4 +138,6 @@ class PacClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 3453}, + avg_character_length={"test": 185.3}, ) diff --git a/mteb/tasks/Classification/sv/SweRecClassification.py b/mteb/tasks/Classification/sv/SweRecClassification.py index 50a31003dd..8ccf32eec6 100644 --- a/mteb/tasks/Classification/sv/SweRecClassification.py +++ b/mteb/tasks/Classification/sv/SweRecClassification.py @@ -26,4 +26,6 @@ class SweRecClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1024}, + avg_character_length={"test": 318.8}, ) diff --git a/mteb/tasks/Classification/zh/CMTEBClassification.py b/mteb/tasks/Classification/zh/CMTEBClassification.py index 9fa0f20081..46cba769df 100644 --- a/mteb/tasks/Classification/zh/CMTEBClassification.py +++ b/mteb/tasks/Classification/zh/CMTEBClassification.py @@ -27,6 +27,8 @@ class TNews(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -58,6 +60,8 @@ class IFlyTek(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -90,6 +94,8 @@ class MultilingualSentiment(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -121,6 +127,8 @@ class JDReview(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -152,6 +160,8 @@ class OnlineShopping(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -183,6 +193,8 @@ class Waimai(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/Clustering/de/BlurbsClusteringP2P.py b/mteb/tasks/Clustering/de/BlurbsClusteringP2P.py index 7b3b4f74e8..fe5e76eecc 100644 --- a/mteb/tasks/Clustering/de/BlurbsClusteringP2P.py +++ b/mteb/tasks/Clustering/de/BlurbsClusteringP2P.py @@ -26,4 +26,6 @@ class BlurbsClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 174637}, + avg_character_length={"test": 664.09}, ) diff --git a/mteb/tasks/Clustering/de/BlurbsClusteringS2S.py b/mteb/tasks/Clustering/de/BlurbsClusteringS2S.py index 640882f72a..d8c95adc13 100644 --- a/mteb/tasks/Clustering/de/BlurbsClusteringS2S.py +++ b/mteb/tasks/Clustering/de/BlurbsClusteringS2S.py @@ -27,4 +27,6 @@ class BlurbsClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 174637}, + avg_character_length={"test": 23.02}, ) diff --git a/mteb/tasks/Clustering/de/TenKGnadClusteringP2P.py b/mteb/tasks/Clustering/de/TenKGnadClusteringP2P.py index 86eb8af166..28ed56db18 100644 --- a/mteb/tasks/Clustering/de/TenKGnadClusteringP2P.py +++ b/mteb/tasks/Clustering/de/TenKGnadClusteringP2P.py @@ -27,4 +27,6 @@ class TenKGnadClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 45914}, + avg_character_length={"test": 2641.03}, ) diff --git a/mteb/tasks/Clustering/de/TenKGnadClusteringS2S.py b/mteb/tasks/Clustering/de/TenKGnadClusteringS2S.py index 2788a6de4b..73c44ab343 100644 --- a/mteb/tasks/Clustering/de/TenKGnadClusteringS2S.py +++ b/mteb/tasks/Clustering/de/TenKGnadClusteringS2S.py @@ -27,4 +27,6 @@ class TenKGnadClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 45914}, + avg_character_length={"test": 50.96}, ) diff --git a/mteb/tasks/Clustering/en/ArxivClusteringP2P.py b/mteb/tasks/Clustering/en/ArxivClusteringP2P.py index cb80bcc2af..b8da09ba77 100644 --- a/mteb/tasks/Clustering/en/ArxivClusteringP2P.py +++ b/mteb/tasks/Clustering/en/ArxivClusteringP2P.py @@ -27,4 +27,6 @@ class ArxivClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 732723}, + avg_character_length={"test": 1009.98}, ) diff --git a/mteb/tasks/Clustering/en/ArxivClusteringS2S.py b/mteb/tasks/Clustering/en/ArxivClusteringS2S.py index f43dd59c74..dc2cdf5b92 100644 --- a/mteb/tasks/Clustering/en/ArxivClusteringS2S.py +++ b/mteb/tasks/Clustering/en/ArxivClusteringS2S.py @@ -27,4 +27,6 @@ class ArxivClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 732723}, + avg_character_length={"test": 74}, ) diff --git a/mteb/tasks/Clustering/en/BigPatentClustering.py b/mteb/tasks/Clustering/en/BigPatentClustering.py index 9fe3c45601..4cb9d717b6 100644 --- a/mteb/tasks/Clustering/en/BigPatentClustering.py +++ b/mteb/tasks/Clustering/en/BigPatentClustering.py @@ -27,4 +27,6 @@ class BigPatentClustering(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Clustering/en/BiorxivClusteringP2P.py b/mteb/tasks/Clustering/en/BiorxivClusteringP2P.py index 685073d7d1..ab1ae9806e 100644 --- a/mteb/tasks/Clustering/en/BiorxivClusteringP2P.py +++ b/mteb/tasks/Clustering/en/BiorxivClusteringP2P.py @@ -26,4 +26,6 @@ class BiorxivClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 75000}, + avg_character_length={"test": 1666.2}, ) diff --git a/mteb/tasks/Clustering/en/BiorxivClusteringS2S.py b/mteb/tasks/Clustering/en/BiorxivClusteringS2S.py index 5fb38c3bdd..e11cdd656b 100644 --- a/mteb/tasks/Clustering/en/BiorxivClusteringS2S.py +++ b/mteb/tasks/Clustering/en/BiorxivClusteringS2S.py @@ -26,4 +26,6 @@ class BiorxivClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 75000}, + avg_character_length={"test": 101.6}, ) diff --git a/mteb/tasks/Clustering/en/MedrxivClusteringP2P.py b/mteb/tasks/Clustering/en/MedrxivClusteringP2P.py index 637f1ff3a0..9b7f28a301 100644 --- a/mteb/tasks/Clustering/en/MedrxivClusteringP2P.py +++ b/mteb/tasks/Clustering/en/MedrxivClusteringP2P.py @@ -27,4 +27,6 @@ class MedrxivClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 375000}, + avg_character_length={"test": 1981.2}, ) diff --git a/mteb/tasks/Clustering/en/MedrxivClusteringS2S.py b/mteb/tasks/Clustering/en/MedrxivClusteringS2S.py index 46023eaf5b..be24d1c769 100644 --- a/mteb/tasks/Clustering/en/MedrxivClusteringS2S.py +++ b/mteb/tasks/Clustering/en/MedrxivClusteringS2S.py @@ -27,4 +27,6 @@ class MedrxivClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 375000}, + avg_character_length={"test": 114.7}, ) diff --git a/mteb/tasks/Clustering/en/RedditClustering.py b/mteb/tasks/Clustering/en/RedditClustering.py index 2295ac323c..dcbeacf37d 100644 --- a/mteb/tasks/Clustering/en/RedditClustering.py +++ b/mteb/tasks/Clustering/en/RedditClustering.py @@ -27,4 +27,6 @@ class RedditClustering(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 420464}, + avg_character_length={"test": 64.7}, ) diff --git a/mteb/tasks/Clustering/en/RedditClusteringP2P.py b/mteb/tasks/Clustering/en/RedditClusteringP2P.py index d6003491d5..7f394fe366 100644 --- a/mteb/tasks/Clustering/en/RedditClusteringP2P.py +++ b/mteb/tasks/Clustering/en/RedditClusteringP2P.py @@ -27,4 +27,6 @@ class RedditClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 459399}, + avg_character_length={"test": 727.7}, ) diff --git a/mteb/tasks/Clustering/en/StackExchangeClustering.py b/mteb/tasks/Clustering/en/StackExchangeClustering.py index 5394097e2d..6a37776324 100644 --- a/mteb/tasks/Clustering/en/StackExchangeClustering.py +++ b/mteb/tasks/Clustering/en/StackExchangeClustering.py @@ -27,4 +27,6 @@ class StackExchangeClustering(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 373850}, + avg_character_length={"test": 57.0}, ) diff --git a/mteb/tasks/Clustering/en/StackExchangeClusteringP2P.py b/mteb/tasks/Clustering/en/StackExchangeClusteringP2P.py index 843b7de216..1643a34893 100644 --- a/mteb/tasks/Clustering/en/StackExchangeClusteringP2P.py +++ b/mteb/tasks/Clustering/en/StackExchangeClusteringP2P.py @@ -27,4 +27,6 @@ class StackExchangeClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 75000}, + avg_character_length={"test": 1090.7}, ) diff --git a/mteb/tasks/Clustering/en/TwentyNewsgroupsClustering.py b/mteb/tasks/Clustering/en/TwentyNewsgroupsClustering.py index 128741efdf..1d8ff3d70c 100644 --- a/mteb/tasks/Clustering/en/TwentyNewsgroupsClustering.py +++ b/mteb/tasks/Clustering/en/TwentyNewsgroupsClustering.py @@ -27,4 +27,6 @@ class TwentyNewsgroupsClustering(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 59545}, + avg_character_length={"test": 32.0}, ) diff --git a/mteb/tasks/Clustering/en/WikiCitiesClustering.py b/mteb/tasks/Clustering/en/WikiCitiesClustering.py index 88d50f8214..65fac916a7 100644 --- a/mteb/tasks/Clustering/en/WikiCitiesClustering.py +++ b/mteb/tasks/Clustering/en/WikiCitiesClustering.py @@ -27,4 +27,6 @@ class WikiCitiesClustering(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Clustering/es/FloresClusteringS2S.py b/mteb/tasks/Clustering/es/FloresClusteringS2S.py index 350328ee2c..f3afb3130c 100644 --- a/mteb/tasks/Clustering/es/FloresClusteringS2S.py +++ b/mteb/tasks/Clustering/es/FloresClusteringS2S.py @@ -27,4 +27,6 @@ class FloresClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Clustering/es/SpanishNewsClusteringP2P.py b/mteb/tasks/Clustering/es/SpanishNewsClusteringP2P.py index fa7b369bd5..bfa1b16c91 100644 --- a/mteb/tasks/Clustering/es/SpanishNewsClusteringP2P.py +++ b/mteb/tasks/Clustering/es/SpanishNewsClusteringP2P.py @@ -27,4 +27,6 @@ class SpanishNewsClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Clustering/fr/AlloProfClusteringP2P.py b/mteb/tasks/Clustering/fr/AlloProfClusteringP2P.py index 92f9c7db91..3e4e8a9509 100644 --- a/mteb/tasks/Clustering/fr/AlloProfClusteringP2P.py +++ b/mteb/tasks/Clustering/fr/AlloProfClusteringP2P.py @@ -30,6 +30,8 @@ class AlloProfClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/fr/AlloProfClusteringS2S.py b/mteb/tasks/Clustering/fr/AlloProfClusteringS2S.py index 1b26d68f25..8fdf9b52ba 100644 --- a/mteb/tasks/Clustering/fr/AlloProfClusteringS2S.py +++ b/mteb/tasks/Clustering/fr/AlloProfClusteringS2S.py @@ -30,6 +30,8 @@ class AlloProfClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/fr/HALClusteringS2S.py b/mteb/tasks/Clustering/fr/HALClusteringS2S.py index 8a88ab08aa..69c5f2fe41 100644 --- a/mteb/tasks/Clustering/fr/HALClusteringS2S.py +++ b/mteb/tasks/Clustering/fr/HALClusteringS2S.py @@ -30,6 +30,8 @@ class HALClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py b/mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py index d584a069c5..32c0cd30a3 100644 --- a/mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py +++ b/mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py @@ -30,6 +30,8 @@ class MLSUMClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py b/mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py index a2de3dc86b..fa3fd4630c 100644 --- a/mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py +++ b/mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py @@ -30,6 +30,8 @@ class MLSUMClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py index 3239d8ebf5..8c0ccd14d8 100644 --- a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py +++ b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py @@ -49,6 +49,8 @@ class MasakhaNEWSClusteringP2P(AbsTaskClustering, MultilingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py index db8694f68a..36b5d3cf2a 100644 --- a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py +++ b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py @@ -51,6 +51,8 @@ class MasakhaNEWSClusteringS2S(AbsTaskClustering, MultilingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/pl/PolishClustering.py b/mteb/tasks/Clustering/pl/PolishClustering.py index 9b56098b32..5ebd8caaf6 100644 --- a/mteb/tasks/Clustering/pl/PolishClustering.py +++ b/mteb/tasks/Clustering/pl/PolishClustering.py @@ -28,4 +28,6 @@ class EightTagsClustering(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Clustering/zh/CMTEBClustering.py b/mteb/tasks/Clustering/zh/CMTEBClustering.py index 6a360ecbc6..46d601858b 100644 --- a/mteb/tasks/Clustering/zh/CMTEBClustering.py +++ b/mteb/tasks/Clustering/zh/CMTEBClustering.py @@ -27,6 +27,8 @@ class CLSClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -52,6 +54,8 @@ class CLSClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -77,6 +81,8 @@ class ThuNewsClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -102,4 +108,6 @@ class ThuNewsClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/PairClassification/en/SprintDuplicateQuestionsPC.py b/mteb/tasks/PairClassification/en/SprintDuplicateQuestionsPC.py index ec5ca9ae68..f81d860f72 100644 --- a/mteb/tasks/PairClassification/en/SprintDuplicateQuestionsPC.py +++ b/mteb/tasks/PairClassification/en/SprintDuplicateQuestionsPC.py @@ -27,4 +27,6 @@ class SprintDuplicateQuestionsPC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 101000, "test": 101000}, + avg_character_length={"validation": 65.2, "test": 67.9}, ) diff --git a/mteb/tasks/PairClassification/en/TwitterSemEval2015PC.py b/mteb/tasks/PairClassification/en/TwitterSemEval2015PC.py index 0bc8ef0c1a..e1a04745d0 100644 --- a/mteb/tasks/PairClassification/en/TwitterSemEval2015PC.py +++ b/mteb/tasks/PairClassification/en/TwitterSemEval2015PC.py @@ -27,4 +27,6 @@ class TwitterSemEval2015PC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 16777}, + avg_character_length={"test": 38.3}, ) diff --git a/mteb/tasks/PairClassification/en/TwitterURLCorpusPC.py b/mteb/tasks/PairClassification/en/TwitterURLCorpusPC.py index 98316f9626..8f70cf30c3 100644 --- a/mteb/tasks/PairClassification/en/TwitterURLCorpusPC.py +++ b/mteb/tasks/PairClassification/en/TwitterURLCorpusPC.py @@ -27,4 +27,6 @@ class TwitterURLCorpusPC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 51534}, + avg_character_length={"test": 79.5}, ) diff --git a/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py b/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py index c49bca69b7..9aebcab6a6 100644 --- a/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py +++ b/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py @@ -31,6 +31,8 @@ class OpusparcusPC(AbsTaskPairClassification, MultilingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/PairClassification/multilingual/PawsX.py b/mteb/tasks/PairClassification/multilingual/PawsX.py index 1bacc739b3..2d5f840d39 100644 --- a/mteb/tasks/PairClassification/multilingual/PawsX.py +++ b/mteb/tasks/PairClassification/multilingual/PawsX.py @@ -30,6 +30,8 @@ class PawsX(MultilingualTask, AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/PairClassification/pl/PolishPC.py b/mteb/tasks/PairClassification/pl/PolishPC.py index c26fe307ab..ed383ccb4e 100644 --- a/mteb/tasks/PairClassification/pl/PolishPC.py +++ b/mteb/tasks/PairClassification/pl/PolishPC.py @@ -27,6 +27,8 @@ class SickePLPC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -52,6 +54,8 @@ class PpcPC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -77,6 +81,8 @@ class CdscePC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -102,4 +108,6 @@ class PscPC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/PairClassification/zh/CMTEBPairClassification.py b/mteb/tasks/PairClassification/zh/CMTEBPairClassification.py index ca4b6a25fb..4869cc9ca6 100644 --- a/mteb/tasks/PairClassification/zh/CMTEBPairClassification.py +++ b/mteb/tasks/PairClassification/zh/CMTEBPairClassification.py @@ -26,6 +26,8 @@ class Ocnli(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -51,4 +53,6 @@ class Cmnli(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py b/mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py index ab6fe217fe..ff7ad376d6 100644 --- a/mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py +++ b/mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py @@ -27,4 +27,6 @@ class AskUbuntuDupQuestions(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 2255}, + avg_character_length={"test": 52.5}, ) diff --git a/mteb/tasks/Reranking/en/MindSmallReranking.py b/mteb/tasks/Reranking/en/MindSmallReranking.py index a458e694d0..75353ece11 100644 --- a/mteb/tasks/Reranking/en/MindSmallReranking.py +++ b/mteb/tasks/Reranking/en/MindSmallReranking.py @@ -27,4 +27,6 @@ class MindSmallReranking(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 107968}, + avg_character_length={"test": 70.9}, ) diff --git a/mteb/tasks/Reranking/en/SciDocsReranking.py b/mteb/tasks/Reranking/en/SciDocsReranking.py index 150aba8ef5..b58faaa0b3 100644 --- a/mteb/tasks/Reranking/en/SciDocsReranking.py +++ b/mteb/tasks/Reranking/en/SciDocsReranking.py @@ -49,4 +49,6 @@ class SciDocsReranking(AbsTaskReranking): abstract = "Representation learning is a critical ingredient for natural language processing systems. Recent Transformer language models like BERT learn powerful textual representations, but these models are targeted towards token- and sentence-level training objectives and do not leverage information on inter-document relatedness, which limits their document-level representation power. For applications on scientific documents, such as classification and recommendation, accurate embeddings of documents are a necessity. We propose SPECTER, a new method to generate document-level embedding of scientific papers based on pretraining a Transformer language model on a powerful signal of document-level relatedness: the citation graph. Unlike existing pretrained language models, Specter can be easily applied to downstream applications without task-specific fine-tuning. Additionally, to encourage further research on document-level models, we introduce SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation prediction, to document classification and recommendation. We show that Specter outperforms a variety of competitive baselines on the benchmark.", } """, + n_samples={"test": 19599}, + avg_character_length={"test": 69.0}, ) diff --git a/mteb/tasks/Reranking/en/StackOverflowDupQuestions.py b/mteb/tasks/Reranking/en/StackOverflowDupQuestions.py index 6e61ce727a..eb354efbe5 100644 --- a/mteb/tasks/Reranking/en/StackOverflowDupQuestions.py +++ b/mteb/tasks/Reranking/en/StackOverflowDupQuestions.py @@ -27,4 +27,6 @@ class StackOverflowDupQuestions(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 3467}, + avg_character_length={"test": 49.8}, ) diff --git a/mteb/tasks/Reranking/fr/AlloprofReranking.py b/mteb/tasks/Reranking/fr/AlloprofReranking.py index c70f70e875..b349e9be9d 100644 --- a/mteb/tasks/Reranking/fr/AlloprofReranking.py +++ b/mteb/tasks/Reranking/fr/AlloprofReranking.py @@ -27,4 +27,6 @@ class AlloprofReranking(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Reranking/fr/SyntecReranking.py b/mteb/tasks/Reranking/fr/SyntecReranking.py index 7cc945c3ef..0d4b229c5d 100644 --- a/mteb/tasks/Reranking/fr/SyntecReranking.py +++ b/mteb/tasks/Reranking/fr/SyntecReranking.py @@ -27,4 +27,6 @@ class SyntecReranking(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Reranking/multilingual/MIRACLReranking.py b/mteb/tasks/Reranking/multilingual/MIRACLReranking.py index ca9c46ca3e..8f2861cf30 100644 --- a/mteb/tasks/Reranking/multilingual/MIRACLReranking.py +++ b/mteb/tasks/Reranking/multilingual/MIRACLReranking.py @@ -28,4 +28,6 @@ class MIRACLReranking(MultilingualTask, AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Reranking/zh/CMTEBReranking.py b/mteb/tasks/Reranking/zh/CMTEBReranking.py index c0508288f6..2618bf6dde 100644 --- a/mteb/tasks/Reranking/zh/CMTEBReranking.py +++ b/mteb/tasks/Reranking/zh/CMTEBReranking.py @@ -26,6 +26,8 @@ class T2Reranking(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -51,6 +53,8 @@ class MMarcoReranking(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -76,6 +80,8 @@ class CMedQAv1(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -101,4 +107,6 @@ class CMedQAv2(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py b/mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py index 85f56c30c7..e8ad5500f7 100644 --- a/mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py +++ b/mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py @@ -30,6 +30,8 @@ class GerDaLIR(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/de/GermanDPRRetrieval.py b/mteb/tasks/Retrieval/de/GermanDPRRetrieval.py index 11b60a82d6..ff0fae54b4 100644 --- a/mteb/tasks/Retrieval/de/GermanDPRRetrieval.py +++ b/mteb/tasks/Retrieval/de/GermanDPRRetrieval.py @@ -32,6 +32,8 @@ class GermanDPR(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @staticmethod diff --git a/mteb/tasks/Retrieval/de/GermanQuADRetrieval.py b/mteb/tasks/Retrieval/de/GermanQuADRetrieval.py index 68cbea11d5..5adfd1d5ae 100644 --- a/mteb/tasks/Retrieval/de/GermanQuADRetrieval.py +++ b/mteb/tasks/Retrieval/de/GermanQuADRetrieval.py @@ -49,6 +49,8 @@ class GermanQuADRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/en/ArguAnaRetrieval.py b/mteb/tasks/Retrieval/en/ArguAnaRetrieval.py index 6d55176bb3..51e72904aa 100644 --- a/mteb/tasks/Retrieval/en/ArguAnaRetrieval.py +++ b/mteb/tasks/Retrieval/en/ArguAnaRetrieval.py @@ -27,4 +27,6 @@ class ArguAna(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackAndroidRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackAndroidRetrieval.py index 6589f37eeb..916063bbdb 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackAndroidRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackAndroidRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackAndroidRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackEnglishRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackEnglishRetrieval.py index 460d0a9edb..1e8f20bb88 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackEnglishRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackEnglishRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackEnglishRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackGamingRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackGamingRetrieval.py index 35a599ba01..7d1dd09bff 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackGamingRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackGamingRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackGamingRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackGisRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackGisRetrieval.py index 891554eda1..79a0bfc617 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackGisRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackGisRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackGisRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackMathematicaRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackMathematicaRetrieval.py index ebb99388ef..c38d47da37 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackMathematicaRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackMathematicaRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackMathematicaRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackPhysicsRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackPhysicsRetrieval.py index 54af77e9e4..bfbc53a979 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackPhysicsRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackPhysicsRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackPhysicsRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackProgrammersRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackProgrammersRetrieval.py index 57638d6dc9..ec561ffa0f 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackProgrammersRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackProgrammersRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackProgrammersRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackStatsRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackStatsRetrieval.py index e4eb37de5b..0a4c3eb5f6 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackStatsRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackStatsRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackStatsRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackTexRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackTexRetrieval.py index be36f64ff7..03df962a36 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackTexRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackTexRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackTexRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackUnixRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackUnixRetrieval.py index 8df00a7a8c..75ff1c7df6 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackUnixRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackUnixRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackUnixRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackWebmastersRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackWebmastersRetrieval.py index e863d8c36e..9137711fc7 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackWebmastersRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackWebmastersRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackWebmastersRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackWordpressRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackWordpressRetrieval.py index 0be63da42b..52c56e5b68 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackWordpressRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackWordpressRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackWordpressRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py b/mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py index 67d677050e..c6200461de 100644 --- a/mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py +++ b/mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py @@ -27,4 +27,6 @@ class ClimateFEVER(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/DBPediaRetrieval.py b/mteb/tasks/Retrieval/en/DBPediaRetrieval.py index 7a82fc7e3c..c64b84a591 100644 --- a/mteb/tasks/Retrieval/en/DBPediaRetrieval.py +++ b/mteb/tasks/Retrieval/en/DBPediaRetrieval.py @@ -27,4 +27,6 @@ class DBPedia(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/FEVERRetrieval.py b/mteb/tasks/Retrieval/en/FEVERRetrieval.py index 9ef5670d27..6a6fa87096 100644 --- a/mteb/tasks/Retrieval/en/FEVERRetrieval.py +++ b/mteb/tasks/Retrieval/en/FEVERRetrieval.py @@ -31,4 +31,6 @@ class FEVER(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/FiQA2018Retrieval.py b/mteb/tasks/Retrieval/en/FiQA2018Retrieval.py index dd298d28e8..94a4c74845 100644 --- a/mteb/tasks/Retrieval/en/FiQA2018Retrieval.py +++ b/mteb/tasks/Retrieval/en/FiQA2018Retrieval.py @@ -27,4 +27,6 @@ class FiQA2018(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/HagridRetrieval.py b/mteb/tasks/Retrieval/en/HagridRetrieval.py index 0ea5acba84..2ef21cd21c 100644 --- a/mteb/tasks/Retrieval/en/HagridRetrieval.py +++ b/mteb/tasks/Retrieval/en/HagridRetrieval.py @@ -36,6 +36,8 @@ class HagridRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/en/HotpotQARetrieval.py b/mteb/tasks/Retrieval/en/HotpotQARetrieval.py index 16edce1548..f222ff50d8 100644 --- a/mteb/tasks/Retrieval/en/HotpotQARetrieval.py +++ b/mteb/tasks/Retrieval/en/HotpotQARetrieval.py @@ -30,4 +30,6 @@ class HotpotQA(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/MSMARCORetrieval.py b/mteb/tasks/Retrieval/en/MSMARCORetrieval.py index 6eb2af7b9c..be8ce33416 100644 --- a/mteb/tasks/Retrieval/en/MSMARCORetrieval.py +++ b/mteb/tasks/Retrieval/en/MSMARCORetrieval.py @@ -27,4 +27,6 @@ class MSMARCO(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py b/mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py index 80ed17db3d..bf62e0a4a7 100644 --- a/mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py +++ b/mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py @@ -27,4 +27,6 @@ class MSMARCOv2(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/NFCorpusRetrieval.py b/mteb/tasks/Retrieval/en/NFCorpusRetrieval.py index fbfa2f3bc7..8f1a4ffa38 100644 --- a/mteb/tasks/Retrieval/en/NFCorpusRetrieval.py +++ b/mteb/tasks/Retrieval/en/NFCorpusRetrieval.py @@ -27,4 +27,6 @@ class NFCorpus(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/NQRetrieval.py b/mteb/tasks/Retrieval/en/NQRetrieval.py index 9d2e5c9d80..dbcc34a37b 100644 --- a/mteb/tasks/Retrieval/en/NQRetrieval.py +++ b/mteb/tasks/Retrieval/en/NQRetrieval.py @@ -27,4 +27,6 @@ class NQ(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/NarrativeQARetrieval.py b/mteb/tasks/Retrieval/en/NarrativeQARetrieval.py index 6ef5b00b72..2f879d8926 100644 --- a/mteb/tasks/Retrieval/en/NarrativeQARetrieval.py +++ b/mteb/tasks/Retrieval/en/NarrativeQARetrieval.py @@ -34,6 +34,8 @@ class NarrativeQARetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/en/QuoraRetrieval.py b/mteb/tasks/Retrieval/en/QuoraRetrieval.py index 53eb15f5f2..4b65a612c5 100644 --- a/mteb/tasks/Retrieval/en/QuoraRetrieval.py +++ b/mteb/tasks/Retrieval/en/QuoraRetrieval.py @@ -30,4 +30,6 @@ class QuoraRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py b/mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py index c5a412e402..1217fab3f9 100644 --- a/mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py +++ b/mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py @@ -30,4 +30,6 @@ class SCIDOCS(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/SciFactRetrieval.py b/mteb/tasks/Retrieval/en/SciFactRetrieval.py index 74cbf1369d..5e27bd8b63 100644 --- a/mteb/tasks/Retrieval/en/SciFactRetrieval.py +++ b/mteb/tasks/Retrieval/en/SciFactRetrieval.py @@ -27,4 +27,6 @@ class SciFact(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py b/mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py index 8d6b55f3f8..5b0cfb3ae9 100644 --- a/mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py +++ b/mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py @@ -27,4 +27,6 @@ class TRECCOVID(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/Touche2020Retrieval.py b/mteb/tasks/Retrieval/en/Touche2020Retrieval.py index eb408670eb..2ccbe45728 100644 --- a/mteb/tasks/Retrieval/en/Touche2020Retrieval.py +++ b/mteb/tasks/Retrieval/en/Touche2020Retrieval.py @@ -27,4 +27,6 @@ class Touche2020(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2P.py b/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2P.py index f870399eeb..066d472c01 100644 --- a/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2P.py +++ b/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2P.py @@ -29,6 +29,8 @@ class SpanishPassageRetrievalS2P(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2S.py b/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2S.py index a975fd654c..d072fb006a 100644 --- a/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2S.py +++ b/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2S.py @@ -29,6 +29,8 @@ class SpanishPassageRetrievalS2S(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/fr/AlloprofRetrieval.py b/mteb/tasks/Retrieval/fr/AlloprofRetrieval.py index 561eda1663..03a63b515d 100644 --- a/mteb/tasks/Retrieval/fr/AlloprofRetrieval.py +++ b/mteb/tasks/Retrieval/fr/AlloprofRetrieval.py @@ -29,6 +29,8 @@ class AlloprofRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/fr/BSARDRetrieval.py b/mteb/tasks/Retrieval/fr/BSARDRetrieval.py index 867a67da9e..a8ffb1f03d 100644 --- a/mteb/tasks/Retrieval/fr/BSARDRetrieval.py +++ b/mteb/tasks/Retrieval/fr/BSARDRetrieval.py @@ -29,6 +29,8 @@ class BSARDRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/fr/SyntecRetrieval.py b/mteb/tasks/Retrieval/fr/SyntecRetrieval.py index 7e9007c971..5897c1a024 100644 --- a/mteb/tasks/Retrieval/fr/SyntecRetrieval.py +++ b/mteb/tasks/Retrieval/fr/SyntecRetrieval.py @@ -31,6 +31,8 @@ class SyntecRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/ko/KoMiracl.py b/mteb/tasks/Retrieval/ko/KoMiracl.py index 596cc2d474..5d85ae594b 100644 --- a/mteb/tasks/Retrieval/ko/KoMiracl.py +++ b/mteb/tasks/Retrieval/ko/KoMiracl.py @@ -27,4 +27,6 @@ class KoMiracl(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/ko/KoMrtydi.py b/mteb/tasks/Retrieval/ko/KoMrtydi.py index 0d9a8a774e..929ad97bf1 100644 --- a/mteb/tasks/Retrieval/ko/KoMrtydi.py +++ b/mteb/tasks/Retrieval/ko/KoMrtydi.py @@ -27,4 +27,6 @@ class KoMrtydi(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/ko/KoStrategyQA.py b/mteb/tasks/Retrieval/ko/KoStrategyQA.py index 8729c40227..8f49a61603 100644 --- a/mteb/tasks/Retrieval/ko/KoStrategyQA.py +++ b/mteb/tasks/Retrieval/ko/KoStrategyQA.py @@ -27,4 +27,6 @@ class KoStrategyQA(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py b/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py index fdc8a76eaf..f17ecd7f9e 100644 --- a/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py @@ -81,6 +81,8 @@ class MIRACLRetrieval(MultilingualTask, AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py b/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py index 1223687b91..2fd18c7cb6 100644 --- a/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py @@ -71,6 +71,8 @@ class MintakaRetrieval(MultilingualTask, AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py b/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py index a69adde8e1..b82b2bc47a 100644 --- a/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py @@ -79,6 +79,8 @@ class MultiLongDocRetrieval(MultilingualTask, AbsTaskRetrieval): primaryClass={cs.CL} } """, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py b/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py index 6911212cc9..f055f19918 100644 --- a/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py @@ -78,6 +78,8 @@ class XMarket(MultilingualTask, AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py b/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py index 923bddef29..16e4fb4dc3 100644 --- a/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py @@ -71,6 +71,8 @@ class XPQARetrieval(MultilingualTask, AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py b/mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py index ac35c36abc..6e04c10e05 100644 --- a/mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py @@ -27,4 +27,6 @@ class ArguAnaPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py b/mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py index 31e1fde60d..9e50ade0f1 100644 --- a/mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py @@ -27,4 +27,6 @@ class DBPediaPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py b/mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py index 8107ec4ed4..d7c9139ece 100644 --- a/mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py @@ -27,4 +27,6 @@ class FiQAPLRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py b/mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py index 676ed15023..e3969d86a4 100644 --- a/mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py @@ -27,4 +27,6 @@ class HotpotQAPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py b/mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py index a7a020d7c0..d16361287e 100644 --- a/mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py @@ -27,4 +27,6 @@ class MSMARCOPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py b/mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py index 0bc9d3bf43..35bbe65329 100644 --- a/mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py @@ -27,4 +27,6 @@ class NFCorpusPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/NQPLRetrieval.py b/mteb/tasks/Retrieval/pl/NQPLRetrieval.py index 3cc7784859..3a1bbf1662 100644 --- a/mteb/tasks/Retrieval/pl/NQPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/NQPLRetrieval.py @@ -27,4 +27,6 @@ class NQPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py b/mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py index c2a332ae74..769e214bb1 100644 --- a/mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py @@ -27,4 +27,6 @@ class QuoraPLRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py b/mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py index a5eb2362c5..fef578b28c 100644 --- a/mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py @@ -27,4 +27,6 @@ class SCIDOCSPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py b/mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py index 6c8934d49e..680d2ca560 100644 --- a/mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py @@ -27,4 +27,6 @@ class SciFactPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py b/mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py index 6b1b22e989..713aab120d 100644 --- a/mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py @@ -27,4 +27,6 @@ class TRECCOVIDPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/zh/CMTEBRetrieval.py b/mteb/tasks/Retrieval/zh/CMTEBRetrieval.py index 6adfa545b6..632fb06b72 100644 --- a/mteb/tasks/Retrieval/zh/CMTEBRetrieval.py +++ b/mteb/tasks/Retrieval/zh/CMTEBRetrieval.py @@ -48,6 +48,8 @@ class T2Retrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -82,6 +84,8 @@ class MMarcoRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -116,6 +120,8 @@ class DuRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -150,6 +156,8 @@ class CovidRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -184,6 +192,8 @@ class CmedqaRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -218,6 +228,8 @@ class EcomRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -252,6 +264,8 @@ class MedicalRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -286,6 +300,8 @@ class VideoRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/STS/de/GermanSTSBenchmarkSTS.py b/mteb/tasks/STS/de/GermanSTSBenchmarkSTS.py index 9f0ef64e08..7c2561c4f1 100644 --- a/mteb/tasks/STS/de/GermanSTSBenchmarkSTS.py +++ b/mteb/tasks/STS/de/GermanSTSBenchmarkSTS.py @@ -28,6 +28,8 @@ class GermanSTSBenchmarkSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/BiossesSTS.py b/mteb/tasks/STS/en/BiossesSTS.py index c1ec9ccf31..8d57176e83 100644 --- a/mteb/tasks/STS/en/BiossesSTS.py +++ b/mteb/tasks/STS/en/BiossesSTS.py @@ -27,6 +27,8 @@ class BiossesSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/STS12STS.py b/mteb/tasks/STS/en/STS12STS.py index 0be28a21b6..94ba603634 100644 --- a/mteb/tasks/STS/en/STS12STS.py +++ b/mteb/tasks/STS/en/STS12STS.py @@ -27,6 +27,8 @@ class STS12STS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/STS13STS.py b/mteb/tasks/STS/en/STS13STS.py index 09b79eedc5..183960a05f 100644 --- a/mteb/tasks/STS/en/STS13STS.py +++ b/mteb/tasks/STS/en/STS13STS.py @@ -27,6 +27,8 @@ class STS13STS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/STS14STS.py b/mteb/tasks/STS/en/STS14STS.py index b19f3c9ddc..9d7002e850 100644 --- a/mteb/tasks/STS/en/STS14STS.py +++ b/mteb/tasks/STS/en/STS14STS.py @@ -27,6 +27,8 @@ class STS14STS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/STS15STS.py b/mteb/tasks/STS/en/STS15STS.py index 1982da86ff..87026c8646 100644 --- a/mteb/tasks/STS/en/STS15STS.py +++ b/mteb/tasks/STS/en/STS15STS.py @@ -27,6 +27,8 @@ class STS15STS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/STS16STS.py b/mteb/tasks/STS/en/STS16STS.py index b9b0d3c9ef..f9b7c236a4 100644 --- a/mteb/tasks/STS/en/STS16STS.py +++ b/mteb/tasks/STS/en/STS16STS.py @@ -27,6 +27,8 @@ class STS16STS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/STSBenchmarkSTS.py b/mteb/tasks/STS/en/STSBenchmarkSTS.py index 469ae8cdbf..6a85300341 100644 --- a/mteb/tasks/STS/en/STSBenchmarkSTS.py +++ b/mteb/tasks/STS/en/STSBenchmarkSTS.py @@ -27,6 +27,8 @@ class STSBenchmarkSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/SickrSTS.py b/mteb/tasks/STS/en/SickrSTS.py index 501d70db39..eff86e1abb 100644 --- a/mteb/tasks/STS/en/SickrSTS.py +++ b/mteb/tasks/STS/en/SickrSTS.py @@ -27,6 +27,8 @@ class SickrSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/es/STSES.py b/mteb/tasks/STS/es/STSES.py index 1fceff5044..f0c329f6bb 100644 --- a/mteb/tasks/STS/es/STSES.py +++ b/mteb/tasks/STS/es/STSES.py @@ -31,6 +31,8 @@ class STSES(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/fr/SickFrSTS.py b/mteb/tasks/STS/fr/SickFrSTS.py index acd6e95b5a..626c8bcc73 100644 --- a/mteb/tasks/STS/fr/SickFrSTS.py +++ b/mteb/tasks/STS/fr/SickFrSTS.py @@ -29,6 +29,8 @@ class SickFrSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py b/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py index b03bfc293c..cf8c95c66e 100644 --- a/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py +++ b/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py @@ -41,6 +41,8 @@ class STS17Crosslingual(AbsTaskSTS, CrosslingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 500}, + avg_character_length={"test": 43.3}, ) @property diff --git a/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py b/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py index afe8080df5..d1dc92dac0 100644 --- a/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py +++ b/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py @@ -48,6 +48,8 @@ class STS22CrosslingualSTS(AbsTaskSTS, CrosslingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 8060}, + avg_character_length={"train": 1992.8}, ) @property diff --git a/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py b/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py index 1be043b9c9..7984e5b65e 100644 --- a/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py +++ b/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py @@ -35,6 +35,8 @@ class STSBenchmarkMultilingualSTS(AbsTaskSTS, MultilingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/pl/PolishSTS.py b/mteb/tasks/STS/pl/PolishSTS.py index 407c6ce1f4..23f0b2c72d 100644 --- a/mteb/tasks/STS/pl/PolishSTS.py +++ b/mteb/tasks/STS/pl/PolishSTS.py @@ -26,6 +26,8 @@ class SickrPLSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 9812}, + avg_character_length={"test": 42.8}, ) @property @@ -58,6 +60,8 @@ class CdscrSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/zh/CMTEBSTS.py b/mteb/tasks/STS/zh/CMTEBSTS.py index 4f4012a539..aecfb3895d 100644 --- a/mteb/tasks/STS/zh/CMTEBSTS.py +++ b/mteb/tasks/STS/zh/CMTEBSTS.py @@ -27,6 +27,8 @@ class ATEC(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -59,6 +61,8 @@ class BQ(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -91,6 +95,8 @@ class LCQMC(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -123,6 +129,8 @@ class PAWSX(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -155,6 +163,8 @@ class STSB(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -187,6 +197,8 @@ class AFQMC(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -219,4 +231,6 @@ class QBQTC(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Summarization/en/SummEvalSummarization.py b/mteb/tasks/Summarization/en/SummEvalSummarization.py index bb36333352..91dc2f5ff3 100644 --- a/mteb/tasks/Summarization/en/SummEvalSummarization.py +++ b/mteb/tasks/Summarization/en/SummEvalSummarization.py @@ -27,6 +27,8 @@ class SummEvalSummarization(AbsTaskSummarization): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 2800}, + avg_character_length={"test": 359.8}, ) @property diff --git a/mteb/tasks/Summarization/fr/SummEvalFrSummarization.py b/mteb/tasks/Summarization/fr/SummEvalFrSummarization.py index e1f5e341ee..80d2f51fd0 100644 --- a/mteb/tasks/Summarization/fr/SummEvalFrSummarization.py +++ b/mteb/tasks/Summarization/fr/SummEvalFrSummarization.py @@ -17,15 +17,17 @@ class SummEvalFrSummarization(AbsTaskSummarization): main_score="cosine_spearman", revision="b385812de6a9577b6f4d0f88c6a6e35395a94054", date=None, - form=None, + form=["written"], domains=None, task_subtypes=None, license=None, socioeconomic_status=None, annotations_creators=None, dialect=None, - text_creation=None, + text_creation="machine-translated", bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property