From 13f759a62bff085e156e4d115f64604c2dc0f087 Mon Sep 17 00:00:00 2001 From: Martin Bernstorff Date: Thu, 21 Mar 2024 11:03:02 +0100 Subject: [PATCH 1/6] dev: run tests on pull request towards any branch --- .github/workflows/tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 67c71e53a3..7f30575912 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,6 @@ on: push: branches: [main] pull_request: - branches: [main] jobs: pytest: From 845099d5b49b0757cc4cf23c08c6d7f65627538e Mon Sep 17 00:00:00 2001 From: Martin Bernstorff Date: Thu, 21 Mar 2024 11:19:35 +0100 Subject: [PATCH 2/6] dev: add isort (#271) * dev: add isort * dev: add isort --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e7cef2da3e..7be110688b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ exclude = ["tests", "results"] target-version = "py38" [tool.ruff.lint] -select = ["E4", "E7", "E9", "F"] +select = ["E4", "E7", "E9", "F", "I"] ignore = ["E203", "E501", "E741", "F403"] ignore-init-module-imports = true From b08913f8616c580f8bbb15bfa808549e2b74912a Mon Sep 17 00:00:00 2001 From: Martin Bernstorff Date: Thu, 21 Mar 2024 11:19:57 +0100 Subject: [PATCH 3/6] dev: add ruff as suggested extension (#274) --- .vscode/extensions.json | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .vscode/extensions.json diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000000..424b18fcec --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,5 @@ +{ + "recommendations": [ + "charliermarsh.ruff" + ] +} \ No newline at end of file From c0dc49a6b99f4d8136b7ec46c49563d7e1b866db Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sun, 24 Mar 2024 12:41:44 +0100 Subject: [PATCH 4/6] Added MMTEB (#275) * restructing the readme * added mmteb * removed unec. method * Added docstring to metadata * Updated outdated examples * formatting documents * fix: Updated form to be parsed correctly * Updated based on feedback * Apply suggestions from code review Co-authored-by: Niklas Muennighoff * updated based on feedback * Added suggestion from review * added correction based on review --------- Co-authored-by: Niklas Muennighoff --- .github/pull_request_template.md | 21 ++ Makefile | 5 + README.md | 52 ++-- docs/adding_a_dataset.md | 253 ++++++++++++++++++ docs/mmteb/points.md | 18 ++ docs/mmteb/readme.md | 64 +++++ mteb/abstasks/AbsTask.py | 15 +- mteb/abstasks/TaskMetadata.py | 32 ++- .../BitextMining/da/BornholmskBitextMining.py | 5 - .../multilingual/BUCCBitextMining.py | 4 - .../multilingual/DiaBLaBitextMining.py | 4 - .../multilingual/FloresBitextMining.py | 4 - .../NorwegianCourtsBitextMining.py | 4 - .../multilingual/TatoebaBitextMining.py | 4 - .../da/DdiscoCohesionClassification.py | 4 - .../en/AmazonPolarityClassification.py | 4 - .../en/Banking77Classification.py | 4 - .../Classification/en/ImdbClassification.py | 4 - .../AmazonReviewsClassification.py | 4 - .../multilingual/MTOPDomainClassification.py | 4 - .../multilingual/MTOPIntentClassification.py | 4 - .../multilingual/MasakhaNEWSClassification.py | 4 - .../MassiveIntentClassification.py | 4 - .../MassiveScenarioClassification.py | 4 - .../Classification/nb/NoRecClassification.py | 4 - .../nb/NorwegianParliamentClassification.py | 4 - .../Classification/pl/PolishClassification.py | 20 -- .../Classification/sv/SweRecClassification.py | 4 - .../Clustering/de/BlurbsClusteringP2P.py | 4 - .../Clustering/de/BlurbsClusteringS2S.py | 4 - .../Clustering/de/TenKGnadClusteringP2P.py | 4 - .../Clustering/de/TenKGnadClusteringS2S.py | 4 - .../tasks/Clustering/en/ArxivClusteringP2P.py | 4 - .../tasks/Clustering/en/ArxivClusteringS2S.py | 4 - .../Clustering/en/BigPatentClustering.py | 4 - .../Clustering/en/BiorxivClusteringP2P.py | 4 - .../Clustering/en/BiorxivClusteringS2S.py | 4 - .../Clustering/en/MedrxivClusteringP2P.py | 4 - .../Clustering/en/MedrxivClusteringS2S.py | 4 - mteb/tasks/Clustering/en/RedditClustering.py | 4 - .../Clustering/en/RedditClusteringP2P.py | 4 - .../Clustering/en/StackExchangeClustering.py | 19 -- .../en/StackExchangeClusteringP2P.py | 4 - .../en/TwentyNewsgroupsClustering.py | 4 - .../Clustering/en/WikiCitiesClustering.py | 4 - .../Clustering/es/FloresClusteringS2S.py | 4 - .../Clustering/es/SpanishNewsClusteringP2P.py | 16 -- .../Clustering/fr/AlloProfClusteringP2P.py | 4 - .../Clustering/fr/AlloProfClusteringS2S.py | 4 - mteb/tasks/Clustering/fr/HALClusteringS2S.py | 4 - .../tasks/Clustering/fr/MLSUMClusteringP2P.py | 4 - .../tasks/Clustering/fr/MLSUMClusteringS2S.py | 4 - .../multilingual/MasakhaNEWSClusteringP2P.py | 4 - .../multilingual/MasakhaNEWSClusteringS2S.py | 4 - mteb/tasks/Clustering/pl/PolishClustering.py | 4 - mteb/tasks/Clustering/zh/CMTEBClustering.py | 16 -- .../en/SprintDuplicateQuestionsPC.py | 4 - .../en/TwitterSemEval2015PC.py | 4 - .../en/TwitterURLCorpusPC.py | 4 - .../multilingual/OpusparcusPC.py | 4 - .../PairClassification/multilingual/PawsX.py | 4 - mteb/tasks/PairClassification/pl/PolishPC.py | 16 -- .../zh/CMTEBPairClassification.py | 8 - .../Reranking/en/AskUbuntuDupQuestions.py | 4 - mteb/tasks/Reranking/en/MindSmallReranking.py | 4 - mteb/tasks/Reranking/en/SciDocsReranking.py | 40 ++- .../Reranking/en/StackOverflowDupQuestions.py | 4 - mteb/tasks/Reranking/fr/AlloprofReranking.py | 4 - mteb/tasks/Reranking/fr/SyntecReranking.py | 4 - .../Reranking/multilingual/MIRACLReranking.py | 4 - mteb/tasks/Reranking/zh/CMTEBReranking.py | 28 -- mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py | 4 - mteb/tasks/Retrieval/de/GermanDPRRetrieval.py | 4 - .../tasks/Retrieval/de/GermanQuADRetrieval.py | 4 - mteb/tasks/Retrieval/en/ArguAnaRetrieval.py | 4 - .../en/CQADupstackAndroidRetrieval.py | 16 -- .../en/CQADupstackEnglishRetrieval.py | 4 - .../en/CQADupstackGamingRetrieval.py | 4 - .../Retrieval/en/CQADupstackGisRetrieval.py | 4 - .../en/CQADupstackMathematicaRetrieval.py | 4 - .../en/CQADupstackPhysicsRetrieval.py | 4 - .../en/CQADupstackProgrammersRetrieval.py | 4 - .../Retrieval/en/CQADupstackStatsRetrieval.py | 4 - .../Retrieval/en/CQADupstackTexRetrieval.py | 4 - .../Retrieval/en/CQADupstackUnixRetrieval.py | 4 - .../en/CQADupstackWebmastersRetrieval.py | 4 - .../en/CQADupstackWordpressRetrieval.py | 4 - .../Retrieval/en/ClimateFEVERRetrieval.py | 4 - mteb/tasks/Retrieval/en/DBPediaRetrieval.py | 4 - mteb/tasks/Retrieval/en/FEVERRetrieval.py | 4 - mteb/tasks/Retrieval/en/FiQA2018Retrieval.py | 4 - mteb/tasks/Retrieval/en/HagridRetrieval.py | 5 - mteb/tasks/Retrieval/en/HotpotQARetrieval.py | 4 - mteb/tasks/Retrieval/en/MSMARCORetrieval.py | 4 - mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py | 4 - mteb/tasks/Retrieval/en/NFCorpusRetrieval.py | 4 - mteb/tasks/Retrieval/en/NQRetrieval.py | 4 - .../Retrieval/en/NarrativeQARetrieval.py | 5 - mteb/tasks/Retrieval/en/QuoraRetrieval.py | 4 - mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py | 4 - mteb/tasks/Retrieval/en/SciFactRetrieval.py | 4 - mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py | 4 - .../tasks/Retrieval/en/Touche2020Retrieval.py | 4 - .../es/SpanishPassageRetrievalS2P.py | 4 - .../es/SpanishPassageRetrievalS2S.py | 4 - mteb/tasks/Retrieval/fr/AlloprofRetrieval.py | 4 - mteb/tasks/Retrieval/fr/BSARDRetrieval.py | 4 - mteb/tasks/Retrieval/fr/SyntecRetrieval.py | 4 - mteb/tasks/Retrieval/ko/KoMiracl.py | 4 - mteb/tasks/Retrieval/ko/KoMrtydi.py | 4 - mteb/tasks/Retrieval/ko/KoStrategyQA.py | 4 - .../Retrieval/multilingual/MIRACLRetrieval.py | 4 - .../multilingual/MintakaRetrieval.py | 4 - .../multilingual/MultiLongDocRetrieval.py | 4 - .../multilingual/XMarketRetrieval.py | 4 - .../Retrieval/multilingual/XPQARetrieval.py | 4 - mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py | 4 - mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py | 4 - mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py | 4 - .../tasks/Retrieval/pl/HotpotQAPLRetrieval.py | 4 - mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py | 4 - .../tasks/Retrieval/pl/NFCorpusPLRetrieval.py | 4 - mteb/tasks/Retrieval/pl/NQPLRetrieval.py | 4 - mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py | 4 - mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py | 4 - mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py | 4 - .../Retrieval/pl/TRECCOVIDPLRetrieval.py | 4 - mteb/tasks/Retrieval/zh/CMTEBRetrieval.py | 32 --- mteb/tasks/STS/zh/CMTEBSTS.py | 4 - 129 files changed, 447 insertions(+), 671 deletions(-) create mode 100644 .github/pull_request_template.md create mode 100644 docs/adding_a_dataset.md create mode 100644 docs/mmteb/points.md create mode 100644 docs/mmteb/readme.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000000..4bd98af03c --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,21 @@ + + + + + + +## Checklist for adding MMTEB dataset + + +- [ ] I have tested that the dataset runs with the `mteb` package. +- [ ] I have run the following models on the task (adding the results to the pr). These can be run using the `mteb run -m {model_name} -t {task_name}` command. + - [ ] `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2` + - [ ] `intfloat/multilingual-e5-small` +- [ ] I have checked that the performance is neither trivial (both models gain close to perfect scores) nor random (both models gain close to random scores). +- [ ] I have considered the size of the dataset and reduced it if it is too big (2048 examples is typically large enough for most tasks) +- [ ] Run tests locally to make sure nothing is broken using `make test`. +- [ ] Run the formatter to format the code using `make lint`. +- [ ] I have added points for my submission to the [POINTS.md](https://github.com/embeddings-benchmark/mteb/tree/main/docs/mmteb/POINTS.md) file. \ No newline at end of file diff --git a/Makefile b/Makefile index 134210f7ea..24d178e0a8 100644 --- a/Makefile +++ b/Makefile @@ -15,3 +15,8 @@ test-parallel: @echo "--- ๐Ÿงช Running tests ---" @echo "Note that parallel tests can sometimes cause issues with some tests." pytest -n auto --dist=loadfile -s -v + +pr: + @echo "--- ๐Ÿš€ Running requirements for a PR ---" + make lint + make test-parallel diff --git a/README.md b/README.md index 553794358c..20ba0145f9 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,8 @@ from sentence_transformers import SentenceTransformer # Define the sentence-transformers model name model_name = "average_word_embeddings_komninos" +# or directly from huggingface: +# model_name = "sentence-transformers/all-MiniLM-L6-v2" model = SentenceTransformer(model_name) evaluation = MTEB(tasks=["Banking77Classification"]) @@ -131,15 +133,15 @@ Models should implement the following interface, implementing an `encode` functi ```python class MyModel(): - def encode(self, sentences, batch_size=32, **kwargs): + def encode(self, sentences: list[str], **kwargs) -> list[np.ndarray] | list[torch.Tensor]: """ Returns a list of embeddings for the given sentences. + Args: - sentences (`List[str]`): List of sentences to encode - batch_size (`int`): Batch size for the encoding + sentences: List of sentences to encode Returns: - `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences + List of embeddings for the given sentences """ pass @@ -152,35 +154,33 @@ If you'd like to use different encoding functions for query and corpus when eval ```python class MyModel(): - def encode_queries(self, queries, batch_size=32, **kwargs): + def encode_queries(self, queries: list[str], **kwargs) -> list[np.ndarray] | list[torch.Tensor]: """ Returns a list of embeddings for the given sentences. Args: - queries (`List[str]`): List of sentences to encode - batch_size (`int`): Batch size for the encoding + queries: List of sentences to encode Returns: - `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences + List of embeddings for the given sentences """ pass - def encode_corpus(self, corpus, batch_size=32, **kwargs): + def encode_corpus(self, corpus: list[str] | list[dict[str, str]], **kwargs) -> list[np.ndarray] | list[torch.Tensor]: """ Returns a list of embeddings for the given sentences. Args: - corpus (`List[str]` or `List[Dict[str, str]]`): List of sentences to encode + corpus: List of sentences to encode or list of dictionaries with keys "title" and "text" - batch_size (`int`): Batch size for the encoding Returns: - `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences + List of embeddings for the given sentences """ pass ``` -### Evaluating on a custom task +### Evaluating on a custom dataset -To add a new task, you need to implement a new class that inherits from the `AbsTask` associated with the task type (e.g. `AbsTaskReranking` for reranking tasks). You can find the supported task types in [here](https://github.com/embeddings-benchmark/mteb-draft/tree/main/mteb/abstasks). +To evaluate on a custom task, you can run the following code on your custom task. See [how to add a new task](docs/adding_a_dataset.md), for how to create a new task in MTEB. ```python from mteb import MTEB @@ -188,28 +188,14 @@ from mteb.abstasks.AbsTaskReranking import AbsTaskReranking from sentence_transformers import SentenceTransformer -class MindSmallReranking(AbsTaskReranking): - @property - def description(self): - return { - "name": "MindSmallReranking", - "hf_hub_name": "mteb/mind_small", - "description": "Microsoft News Dataset: A Large-Scale English Dataset for News Recommendation Research", - "reference": "https://www.microsoft.com/en-us/research/uploads/prod/2019/03/nl4se18LinkSO.pdf", - "type": "Reranking", - "category": "s2s", - "eval_splits": ["validation"], - "eval_langs": ["en"], - "main_score": "map", - } +class MyCustomTask(AbsTaskReranking): + ... model = SentenceTransformer("average_word_embeddings_komninos") -evaluation = MTEB(tasks=[MindSmallReranking()]) +evaluation = MTEB(tasks=[MyCustomTask()]) evaluation.run(model) ``` -> **Note:** for multilingual tasks, make sure your class also inherits from the `MultilingualTask` class like in [this](https://github.com/embeddings-benchmark/mteb-draft/blob/main/mteb/tasks/Classification/MTOPIntentClassification.py) example. -
@@ -221,12 +207,16 @@ evaluation.run(model) | ๐Ÿ“‹ [Tasks] |ย Overview of available tasks | | ๐Ÿ“ˆ [Leaderboard] | The interactive leaderboard of the benchmark | | ๐Ÿค– [Adding a model] | Information related to how to submit a model to the leaderboard | +| ๐Ÿ‘ฉโ€๐Ÿ’ป [Adding a dataset] | How to add a new task/dataset to MTEB |ย  | ๐Ÿค [Contributing] | How to contribute to MTEB and set it up for development | + [Tasks]: docs/tasks.md [Contributing]: docs/contributing.md [Adding a model]: docs/adding_a_model.md +[Adding a task]: docs/adding_a_dataset.md [Leaderboard]: https://huggingface.co/spaces/mteb/leaderboard +[MMTEB]: docs/mmteb/readme.md ## Citing diff --git a/docs/adding_a_dataset.md b/docs/adding_a_dataset.md new file mode 100644 index 0000000000..8086d971cc --- /dev/null +++ b/docs/adding_a_dataset.md @@ -0,0 +1,253 @@ + +# Adding a Dataset + +To add a new dataset to MTEB, you need to do three things: + +1) Implement a task with the desired dataset, by subclassing an abstract task +2) Add metadata to the task +3) Submit the edits to the [MTEB](https://github.com/embeddings-benchmark/mteb) repository + +If you have any questions regarding this process feel free to open a discussion [thread](https://github.com/embeddings-benchmark/mteb/discussions). + +> Note: When we mention adding a dataset we refer to a subclass of one of the abstasks. + +## 1) Creating a new subclass + +### A Simple Example + +To add a new task, you need to implement a new class that inherits from the `AbsTask` associated with the task type (e.g. `AbsTaskReranking` for reranking tasks). You can find the supported task types in [here](https://github.com/embeddings-benchmark/mteb-draft/tree/main/mteb/abstasks). + +```python +from mteb import MTEB +from mteb.abstasks.AbsTaskReranking import AbsTaskReranking +from sentence_transformers import SentenceTransformer +from mteb.abstasks.TaskMetadata import TaskMetadata + +class SciDocsReranking(AbsTaskReranking): + metadata = TaskMetadata( + name="SciDocsRR", + description="Ranking of related scientific papers based on their title.", + reference="https://allenai.org/data/scidocs", + hf_hub_name="mteb/scidocs-reranking", + type="Reranking", + category="s2s", + eval_splits=["test"], + eval_langs=["en"], + main_score="map", + revision="d3c5e1fc0b855ab6097bf1cda04dd73947d7caab", + date=None, + form="written", + domains=["Academic", "Non-fiction"], + task_subtypes=["Scientific Reranking"], + license="cc-by-4.0", + socioeconomic_status="high", + annotations_creators=None, + dialect=None, + text_creation="found", + bibtex_citation= ... # removed for brevity +) + +# testing the task with a model: +model = SentenceTransformer("average_word_embeddings_komninos") +evaluation = MTEB(tasks=[MindSmallReranking()]) +evaluation.run(model) +``` + +> **Note:** for multilingual tasks, make sure your class also inherits from the `MultilingualTask` class like in [this](https://github.com/embeddings-benchmark/mteb-draft/blob/main/mteb/tasks/Classification/MTOPIntentClassification.py) example. +> For cross-lingual tasks, make sure your class also inherits from the `CrosslingualTask` class like in [this](https://github.com/embeddings-benchmark/mteb/blob/main/mteb/tasks/BitextMining/TatoebaBitextMining.py). + + + +### A Detailed Example +Often the dataset from HuggingFace is not in the format expected by MTEB. To resolve this you can either change the format on Hugging Face or add a `dataset_transform` method to your dataset to transform it into the right format on the fly. Here is an example along with some design considerations: + +```python +class VGClustering(AbsTaskClustering): + metadata = TaskMetadata( + name="VGClustering", + description="Articles and their classes (e.g. sports) from VG news articles extracted from Norsk Aviskorpus.", + reference="https://huggingface.co/datasets/navjordj/VG_summarization", + hf_hub_name="navjordj/VG_summarization", + type="Clustering", + category="p2p", + eval_splits=["test"], + eval_langs=["nb"], + main_score="v_measure", + revision="d4c5a8ba10ae71224752c727094ac4c46947fa29", + date=("2012-01-01", "2020-01-01"), + form="written", + domains=["Academic", "Non-fiction"], + task_subtypes=["Scientific Reranking"], + license="cc-by-nc", + socioeconomic_status="high", + annotations_creators="derived", + dialect=None, + text_creation="found", + bibtex_citation= ... # removed for brevity +) + + def load_data(self, **kwargs: dict): # noqa: ARG002 + """ + Load dataset from HuggingFace hub + """ + if self.data_loaded: + return + + self.dataset: datasets.DatasetDict = datasets.load_dataset( + self.description["hf_hub_name"], + revision=self.description.get("revision"), + ) + + self.dataset_transform() + self.data_loaded = True + + def dataset_transform(self): + splits = self.description["eval_splits"] + + documents: list = [] + labels: list = [] + label_col = "classes" + + ds = {} + for split in splits: + ds_split = self.dataset[split] + + _label = self.normalize_labels(ds_split[label_col]) + documents.extend(ds_split["title"]) + labels.extend(_label) + + documents.extend(ds_split["ingress"]) + labels.extend(_label) + + documents.extend(ds_split["article"]) + labels.extend(_label) + + assert len(documents) == len(labels) + + rng = random.Random(1111) # local only seed + pairs = list(zip(documents, labels)) + rng.shuffle(pairs) + documents, labels = [list(collection) for collection in zip(*pairs)] + + # To get a more robust estimate we create batches of size 512, this decision can vary depending on dataset + documents_batched = list(batched(documents, 512)) + labels_batched = list(batched(labels, 512)) + + # reduce the size of the dataset as we see that we obtain a consistent scores (if we change the seed) even + # with only 512x4 samples. + documents_batched = documents_batched[:4] + labels_batched = labels_batched[:4] + + + ds[split] = datasets.Dataset.from_dict( + { + "sentences": documents_batched, + "labels": labels_batched, + } + ) + + self.dataset = datasets.DatasetDict(ds) +``` + + + + +## 2) Creating the metadata object +Along with the task MTEB requires metadata regarding the task. If the metadata isn't available please provide your best guess or leave the field as `None`. + +To get an overview of the fields in the metadata object, you can look at the [TaskMetadata](https://github.com/embeddings-benchmark/mteb/blob/main/mteb/abstasks/TaskMetadata.py) class. + + +Note that these fields can be left blank if the information is not available and can be extended if necessary. We do not include any machine-translated (without verification) datasets in the benchmark. + +
+Domains +
+ +The domains follow the categories used in the [Universal Dependencies project](https://universaldependencies.org), though we updated them where deemed appropriate. These do not have to be mutually exclusive. + +| **Domain** | **Description** | +| ------------- | ---------------------------------------------------------------- | +| Academic | Academic writing | +| Religious | Religious text e.g. bibles | +| Blog | [Blogpost, weblog etc.](https://en.wikipedia.org/wiki/Blog) | +| Fiction | Works of [fiction](https://en.wikipedia.org/wiki/Fiction) | +| Government | Governmental communication, websites or similar | +| Legal | Legal documents, laws etc. | +| Medical | doctors notes, medical procedures or similar | +| News | News articles, tabloids etc. | +| Reviews | Reviews e.g. user reviews of products | +| Non-fiction | [non-fiction](https://en.wikipedia.org/wiki/Non-fiction) writing | +| Poetry | Poems, Epics etc. | +| Social | social media content | +| Spoken | Spoken dialogues | +| Encyclopaedic | E.g. Wikipedias | +| Web | Web content | + + +
+ + +
+
+Task Subtypes +
+ +These domains subtypes were introduced in the [Scandinavian Embedding Benchmark](https://openreview.net/pdf/f5f1953a9c798ec61bb050e62bc7a94037fd4fab.pdf) and are intended to be extended as needed. + + + +| Formalization | Task | Description | +| ----------------------- | ------------------------ | --------------------------------------------------------------------------------------------------------------- | +| **Retrieval** | | Retrieval focuses on locating and providing relevant information or documents based on a query. | +| | Question answering | Finding answers to queries in a dataset, focusing on exact answers or relevant passages. | +| | Article retrieval | Identifying and retrieving full articles that are relevant to a given query. | +| **Bitext Mining** | | Bitext mining involves identifying parallel texts across languages or dialects for translation or analysis. | +| | Dialect pairing | Identifying pairs of text that are translations of each other across different dialects. | +| **Classification** | | Classification is the process of categorizing text into predefined groups or classes based on their content. | +| | Political | Categorizing text according to political orientation or content. | +| | Language Identification | Determining the language in which a given piece of text is written. | +| | Linguistic Acceptability | Assessing whether a sentence is grammatically correct according to linguistic norms. | +| | Sentiment/Hate Speech | Detecting the sentiment of text or identifying hate speech within the content. | +| | Dialog Systems | Creating or evaluating systems capable of conversing with humans in a natural manner. | +| **Clustering** | | Clustering involves grouping sets of texts together based on their similarity without pre-defined labels. | +| | Thematic Clustering | Grouping texts based on their thematic similarity without prior labeling. | +| **Reranking** | | Reranking adjusts the order of items in a list to improve relevance or accuracy according to specific criteria. | +| **Pair Classification** | | Pair classification assesses relationships between pairs of items, such as texts, to classify their connection. | +| **STS** | | Semantic Textual Similarity measures the degree of semantic equivalence between two pieces of text. | + + +
+ + + +## Submit a PR + +Once you are finished create a PR to the [MTEB](https://github.com/embeddings-benchmark/mteb) repository. If you haven't created a PR before please refer to the [GitHub documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/) + +The PR will be reviewed by one of the organizers or contributors who might ask you to change things. Once the PR is approved the dataset will be added into the main repository. + + +Before you commit here is a checklist you should consider completing before submitting: + +- [ ] I have tested that the dataset runs with the `mteb` package. + +An easy way to test it is using: +```python +from mteb import MTEB +from sentence_transformers import SentenceTransformer + +# Define the sentence-transformers model name +model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" + +model = SentenceTransformer(model_name) +evaluation = MTEB(tasks=[YourNewTask()]) +``` + +- [ ] I have run the following models on the task (adding the results to the pr). These can be run using the `mteb run -m {model_name} -t {task_name}` command. + - [ ] `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2` + - [ ] `intfloat/multilingual-e5-small` +- [ ] I have checked that the performance is neither trivial (both models gain close to perfect scores) nor random (both models gain close to random scores). +- [ ] I have considered the size of the dataset and reduced it if it is too big (2048 examples is typically large enough for most tasks) +- [ ] Run tests locally to make sure nothing is broken using `make test`. +- [ ] Run the formatter to format the code using `make lint`. \ No newline at end of file diff --git a/docs/mmteb/points.md b/docs/mmteb/points.md new file mode 100644 index 0000000000..b061d338da --- /dev/null +++ b/docs/mmteb/points.md @@ -0,0 +1,18 @@ +# Points + +| GitHub | Total points | New dataset | New task | Dataset annotations | (Bug)fixes | Running Models | Review PR | Paper Writing | Ideation | Coordination | +| ----------------- | ------------ | ----------- | -------- | ------------------- | ---------- | -------------- | -------- | -------------- | -------- | ------------- | +| KennethEnevoldsen | | 38 | | | | | | | | | +| x-tabdeveloping | | 2 | | | | | | | | | + +Note that coordination and ideation is not included in the total points, but is used to determine first and last authors. + +# Contributor Informations + +First name and last name are as you want them to appear in a publication. + +| GitHub | First name | Last name | Email | User on openreview | +| ----------------- | ---------- | ---------- | ---------------------------- | -------------------- | +| KennethEnevoldsen | Kenneth | Enevoldsen | kennethcenevoldsen@gmail.com | ~Kenneth_Enevoldsen1 | +| x-tabdeveloping | Mรกrton | Kardos | | ~Mรกrton_Kardos1 | + diff --git a/docs/mmteb/readme.md b/docs/mmteb/readme.md new file mode 100644 index 0000000000..abdf7091df --- /dev/null +++ b/docs/mmteb/readme.md @@ -0,0 +1,64 @@ +# Welcome to MMTEB! ๐Ÿ‘‹ + +The Massive Multilingual Text Embedding Benchmark (MMTEB) is a community-led extension of [MTEB](https://arxiv.org/abs/2210.07316) to cover embedding tasks for a massive number of languages. + +## Background + +The Massive Text Embedding Benchmark (MTEB) is intended to evaluate the quality of document embeddings. When it was initially introduced, the benchmark consisted of 8 embedding tasks and 58 different datasets. Since then, MTEB has been subject to multiple community contributions as well as benchmark extensions over specific languages such as [SEB](https://openreview.net/pdf/f5f1953a9c798ec61bb050e62bc7a94037fd4fab.pdf), [C-MTEB](https://github.com/FlagOpen/FlagEmbedding/tree/master/C_MTEB) and [MTEB-French](https://github.com/Lyon-NLP/mteb-french). However, we want even wider coverage and thus announce the community-led extension of MTEB, where we seek to expand coverage of MTEB to as many languages as possible. + +## Contributing to MMTEB + +Everyone can join and contribute to this initiative from 1 April 2024 to 31 April 2024, win some SWAG, and become a co-author of our upcoming paper. We aim to publish the results of our findings at a top conference such as EMNLP, NeurIPS, etc. We have identified four ways to contribute: + +### ๐Ÿ—ƒ๏ธ 1: Contribute a new dataset + +For this segment, you open a PR in the MTEB repository where you create an implementation (subclass) of a task using a new language dataset uploaded to huggingface. Read more about how to add a task [here](adding_a_new_dataset.md) and check out [one of the previous additions](https://github.com/embeddings-benchmark/mteb/pull/247) for an example. + +### ๐Ÿ–ฅ๏ธ 2: Contribute a new task + +MTEB currently consists of 8 embedding tasks including tasks such as STS, retrieval, reranking, and more. If you feel like there is a category of tasks that is not yet covered, we would welcome contributions of these as well. + +### ๐Ÿ” 3: Contribute new scores + +Once we have the datasets, we want to evaluate models on them. We welcome evaluation scores for models, which will be added to the leaderboard. + +### ๐Ÿ”“ 4: Review PRs + +We welcome reviews of PRs adding new datasets. If you wish to review PRs of a specific language feel free to contact members of the MTEB team. + +## Authorship + +We follow a similar approach as in the [SeaCrowd Project](https://github.com/SEACrowd#contributing-to-seacrowd) and use a point-based system to determine co-authorships. + +To be considered a co-author, at least 10 contribution points are required. The position of contributors in the author list is determined by the score they acquire, higher scores will appear first. + +To monitor how many points you have obtained, the contribution point tracking is now live at [this sheet](POINTS.md) and we recommend updating the score along with your PR. Past contributions also count. + +Everyone with sufficient points will also be added to the MTEB GitHub and Huggingface repository as a contributor. + +# Contribution point guideline +The contribution points are computed using the following table: + +> **Note**: The purpose of the point system is not to barrier collaboration, but to reward contributions. We might adjust the point requirement lower to accommodate more co-authorship if needed. + + +| Contribution type | Demand | Points | Description | +| ------------------- | ------------------- | ------- | ----------------------------------------------------------------------------------------------------------------- | +| New dataset | As many as possible | 2+bonus | The first dataset for a language x task gains 4 bonus points. | +| New task | If relevant | 10 | Task 2. | +| Dataset annotations | On demand | 1 | Adding missing dataset annotations to existing datasets. | +| (bug)fixes | On demand | 1-10 | Points depends the effect of code changes. If you want to find issues related to the MMTEB you can find them [here](https://github.com/embeddings-benchmark/mteb/milestone/1). | +| Running Models | On demand | 1 | Task 3. | +| Review PR | On demand | 2 | Task 4. | + +For the purpose of counting points, a language is defined by its [ISO 639-3](https://en.wikipedia.org/wiki/ISO_639-3) code, however, we encourage dialects or written language variants. All programming languages are considered one language. + +Team submissions are free to distribute points among the members as they like. + +## Communication Channels + +We will communicate via this GitHub repository. Please feel free to open issues or discussions and `Watch` the repository to be notified of any changes. + +# Acknowledgments + +We thank [Contextual AI](https://contextual.ai/) for sponsoring the compute for this project. diff --git a/mteb/abstasks/AbsTask.py b/mteb/abstasks/AbsTask.py index 75785652f8..b6a7da8418 100644 --- a/mteb/abstasks/AbsTask.py +++ b/mteb/abstasks/AbsTask.py @@ -7,8 +7,12 @@ import numpy as np import torch +from mteb.abstasks.TaskMetadata import TaskMetadata + class AbsTask(ABC): + metadata: TaskMetadata + def __init__(self, seed=42, **kwargs): self.dataset = None self.data_loaded = False @@ -37,17 +41,8 @@ def load_data(self, **kwargs): self.data_loaded = True @property - @abstractmethod def metadata_dict(self) -> dict[str, str]: - """ - Returns a description of the task. Should contain the following fields: - name: Name of the task (usually equal to the class name. Should be a valid name for a path on disc) - description: Longer description & references for the task - type: Of the set: [sts] - eval_splits: Splits used for evaluation as list, e.g. ['dev', 'test'] - main_score: Main score value for task - """ - raise NotImplementedError + return dict(self.metadata) @abstractmethod def evaluate(self, model, split="test"): diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 499151533a..5ff4412bdb 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -21,6 +21,7 @@ "Question answering", "Sentiment/Hate speech", "Thematic clustering", + "Scientific Reranking", ] TASK_DOMAIN = Literal[ @@ -54,6 +55,7 @@ "high", "medium", "low", + "mixed", ] TASK_TYPE = Literal[ @@ -87,7 +89,35 @@ class TaskMetadata(BaseModel): - # Meta: We can annotate the requirements here, and then link to it in the docs. This would move the documentation closer to the code, which I think is a good idea. + """ + Metadata for a task. + + Args: + hf_hub_name: The name of the dataset for the task on the Hugging Face Hub. + revision: The revision of the dataset for the task on the Hugging Face Hub. + name: The name of the task. + description: A description of the task. + type: The type of the task. These includes "Classification", "Summarization", "STS", "Retrieval", "Reranking", "Clustering", + "PairClassification", "BitextMining". The type should match the abstask type. + category: The category of the task. E.g. includes "s2s", "s2p", "p2p" (s=sentence, p=paragraph). + reference: A URL to the documentation of the task. E.g. a published paper. + eval_splits: The splits of the dataset used for evaluation. + eval_langs: The languages of the dataset used for evaluation. + main_score: The main score used for evaluation. + date: The date when the data was collected. Specified as a tuple of two dates. + form: The form of the data. Either "spoken", "written". + domains: The domains of the data. These includes "Non-fiction", "Social", "Fiction", "News", "Academic", "Blog", "Encyclopaedic", + "Government", "Legal", "Medical", "Poetry", "Religious", "Reviews", "Web", "Spoken". A dataset can belong to multiple domains. + task_subtypes: The subtypes of the task. E.g. includes "Sentiment/Hate speech", "Thematic Clustering". Feel free to update the list as needed. + license: The license of the data. + socioeconomic_status: The socioeconomic status of the data. Includes "high", "medium", "low", "mixed". + annotations_creators: The type of the annotators. Includes "expert-annotated" (annotated by experts), "human-annotated" (annotated e.g. by + mturkers), "derived" (derived from structure in the data). + dialect: The dialect of the data, if applicable. Ideally specified as a BCP-47 language tag. + text_creation: The method of text creation. Includes "found", "created", "machine-translated", "machine-translated and verified", and + "machine-translated and localized". + bibtex_citation: The BibTeX citation for the dataset. + """ hf_hub_name: str revision: str diff --git a/mteb/tasks/BitextMining/da/BornholmskBitextMining.py b/mteb/tasks/BitextMining/da/BornholmskBitextMining.py index 1fe293f7ce..da483e3891 100644 --- a/mteb/tasks/BitextMining/da/BornholmskBitextMining.py +++ b/mteb/tasks/BitextMining/da/BornholmskBitextMining.py @@ -30,11 +30,6 @@ class BornholmBitextMining(AbsTaskBitextMining): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - return - def load_data(self, **kwargs): """ Load dataset from HuggingFace hub and convert it to the standard format. diff --git a/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py b/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py index 82350a232f..11c1ea9952 100644 --- a/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py @@ -30,7 +30,3 @@ class BUCCBitextMining(AbsTaskBitextMining, CrosslingualTask): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py b/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py index 4fd3546bc1..d9d90196ae 100644 --- a/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py @@ -31,10 +31,6 @@ class DiaBLaBitextMining(AbsTaskBitextMining, CrosslingualTask): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): """ Load dataset from HuggingFace hub and convert it to the standard format. diff --git a/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py b/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py index 008d69a524..a03e3220e5 100644 --- a/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py @@ -256,10 +256,6 @@ class FloresBitextMining(AbsTaskBitextMining, CrosslingualTask): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): """ Load dataset from HuggingFace hub diff --git a/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py b/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py index 0011ac934b..01acd7f900 100644 --- a/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py @@ -30,10 +30,6 @@ class NorwegianCourtsBitextMining(AbsTaskBitextMining): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): """ Load dataset from HuggingFace hub and convert it to the standard format. diff --git a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py index 7d2d137f86..8983f442c9 100644 --- a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py @@ -143,7 +143,3 @@ class TatoebaBitextMining(AbsTaskBitextMining, CrosslingualTask): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/da/DdiscoCohesionClassification.py b/mteb/tasks/Classification/da/DdiscoCohesionClassification.py index fff065f500..bfe5e303f5 100644 --- a/mteb/tasks/Classification/da/DdiscoCohesionClassification.py +++ b/mteb/tasks/Classification/da/DdiscoCohesionClassification.py @@ -77,7 +77,3 @@ def dataset_transform(self): self.dataset = self.dataset.rename_columns({"rating": "label"}).remove_columns( ["domain"] ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/en/AmazonPolarityClassification.py b/mteb/tasks/Classification/en/AmazonPolarityClassification.py index 591b02e052..18334d3d95 100644 --- a/mteb/tasks/Classification/en/AmazonPolarityClassification.py +++ b/mteb/tasks/Classification/en/AmazonPolarityClassification.py @@ -28,7 +28,3 @@ class AmazonPolarityClassification(AbsTaskClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/en/Banking77Classification.py b/mteb/tasks/Classification/en/Banking77Classification.py index 4c042cb818..935dc47a3c 100644 --- a/mteb/tasks/Classification/en/Banking77Classification.py +++ b/mteb/tasks/Classification/en/Banking77Classification.py @@ -28,7 +28,3 @@ class Banking77Classification(AbsTaskClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/en/ImdbClassification.py b/mteb/tasks/Classification/en/ImdbClassification.py index 57e9c9be33..dfcae3d7a2 100644 --- a/mteb/tasks/Classification/en/ImdbClassification.py +++ b/mteb/tasks/Classification/en/ImdbClassification.py @@ -28,7 +28,3 @@ class ImdbClassification(AbsTaskClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py b/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py index 59733396dd..3655db992b 100644 --- a/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py +++ b/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py @@ -30,7 +30,3 @@ class AmazonReviewsClassification(MultilingualTask, AbsTaskClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py b/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py index a95d5e4f18..f535c6ec62 100644 --- a/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py +++ b/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py @@ -30,7 +30,3 @@ class MTOPDomainClassification(MultilingualTask, AbsTaskClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py b/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py index 6a2c9ca589..0443ce8933 100644 --- a/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py +++ b/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py @@ -30,7 +30,3 @@ class MTOPIntentClassification(MultilingualTask, AbsTaskClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py b/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py index a0105af9d3..93114197d5 100644 --- a/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py +++ b/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py @@ -47,7 +47,3 @@ class MasakhaNEWSClassification(AbsTaskClassification, MultilingualTask): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py b/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py index 7ebc7b6509..ba9fbafe63 100644 --- a/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py +++ b/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py @@ -82,7 +82,3 @@ class MassiveIntentClassification(MultilingualTask, AbsTaskClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py b/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py index 8a8a4bab14..48113970e3 100644 --- a/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py +++ b/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py @@ -82,7 +82,3 @@ class MassiveScenarioClassification(MultilingualTask, AbsTaskClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/nb/NoRecClassification.py b/mteb/tasks/Classification/nb/NoRecClassification.py index 5281329604..abe2db8729 100644 --- a/mteb/tasks/Classification/nb/NoRecClassification.py +++ b/mteb/tasks/Classification/nb/NoRecClassification.py @@ -27,7 +27,3 @@ class NoRecClassification(AbsTaskClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/nb/NorwegianParliamentClassification.py b/mteb/tasks/Classification/nb/NorwegianParliamentClassification.py index b181cc7d36..82befe0cac 100644 --- a/mteb/tasks/Classification/nb/NorwegianParliamentClassification.py +++ b/mteb/tasks/Classification/nb/NorwegianParliamentClassification.py @@ -27,7 +27,3 @@ class NorwegianParliamentClassification(AbsTaskClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/pl/PolishClassification.py b/mteb/tasks/Classification/pl/PolishClassification.py index c8fb4eb158..30ef6e3478 100644 --- a/mteb/tasks/Classification/pl/PolishClassification.py +++ b/mteb/tasks/Classification/pl/PolishClassification.py @@ -29,10 +29,6 @@ class CbdClassification(AbsTaskClassification): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class PolEmo2InClassification(AbsTaskClassification): metadata = TaskMetadata( @@ -59,10 +55,6 @@ class PolEmo2InClassification(AbsTaskClassification): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class PolEmo2OutClassification(AbsTaskClassification): metadata = TaskMetadata( @@ -90,10 +82,6 @@ class PolEmo2OutClassification(AbsTaskClassification): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class AllegroReviewsClassification(AbsTaskClassification): metadata = TaskMetadata( @@ -119,10 +107,6 @@ class AllegroReviewsClassification(AbsTaskClassification): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class PacClassification(AbsTaskClassification): metadata = TaskMetadata( @@ -147,7 +131,3 @@ class PacClassification(AbsTaskClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Classification/sv/SweRecClassification.py b/mteb/tasks/Classification/sv/SweRecClassification.py index ab3bef79d7..50a31003dd 100644 --- a/mteb/tasks/Classification/sv/SweRecClassification.py +++ b/mteb/tasks/Classification/sv/SweRecClassification.py @@ -27,7 +27,3 @@ class SweRecClassification(AbsTaskClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/de/BlurbsClusteringP2P.py b/mteb/tasks/Clustering/de/BlurbsClusteringP2P.py index a5e6dad822..7b3b4f74e8 100644 --- a/mteb/tasks/Clustering/de/BlurbsClusteringP2P.py +++ b/mteb/tasks/Clustering/de/BlurbsClusteringP2P.py @@ -27,7 +27,3 @@ class BlurbsClusteringP2P(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/de/BlurbsClusteringS2S.py b/mteb/tasks/Clustering/de/BlurbsClusteringS2S.py index 93656a8596..640882f72a 100644 --- a/mteb/tasks/Clustering/de/BlurbsClusteringS2S.py +++ b/mteb/tasks/Clustering/de/BlurbsClusteringS2S.py @@ -28,7 +28,3 @@ class BlurbsClusteringS2S(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/de/TenKGnadClusteringP2P.py b/mteb/tasks/Clustering/de/TenKGnadClusteringP2P.py index 8baf4815b3..86eb8af166 100644 --- a/mteb/tasks/Clustering/de/TenKGnadClusteringP2P.py +++ b/mteb/tasks/Clustering/de/TenKGnadClusteringP2P.py @@ -28,7 +28,3 @@ class TenKGnadClusteringP2P(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/de/TenKGnadClusteringS2S.py b/mteb/tasks/Clustering/de/TenKGnadClusteringS2S.py index 65e88237b8..2788a6de4b 100644 --- a/mteb/tasks/Clustering/de/TenKGnadClusteringS2S.py +++ b/mteb/tasks/Clustering/de/TenKGnadClusteringS2S.py @@ -28,7 +28,3 @@ class TenKGnadClusteringS2S(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/en/ArxivClusteringP2P.py b/mteb/tasks/Clustering/en/ArxivClusteringP2P.py index 2323974eb4..cb80bcc2af 100644 --- a/mteb/tasks/Clustering/en/ArxivClusteringP2P.py +++ b/mteb/tasks/Clustering/en/ArxivClusteringP2P.py @@ -28,7 +28,3 @@ class ArxivClusteringP2P(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/en/ArxivClusteringS2S.py b/mteb/tasks/Clustering/en/ArxivClusteringS2S.py index dd813577de..f43dd59c74 100644 --- a/mteb/tasks/Clustering/en/ArxivClusteringS2S.py +++ b/mteb/tasks/Clustering/en/ArxivClusteringS2S.py @@ -28,7 +28,3 @@ class ArxivClusteringS2S(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/en/BigPatentClustering.py b/mteb/tasks/Clustering/en/BigPatentClustering.py index d10c90339c..9fe3c45601 100644 --- a/mteb/tasks/Clustering/en/BigPatentClustering.py +++ b/mteb/tasks/Clustering/en/BigPatentClustering.py @@ -28,7 +28,3 @@ class BigPatentClustering(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/en/BiorxivClusteringP2P.py b/mteb/tasks/Clustering/en/BiorxivClusteringP2P.py index 9ba39ce85a..685073d7d1 100644 --- a/mteb/tasks/Clustering/en/BiorxivClusteringP2P.py +++ b/mteb/tasks/Clustering/en/BiorxivClusteringP2P.py @@ -27,7 +27,3 @@ class BiorxivClusteringP2P(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/en/BiorxivClusteringS2S.py b/mteb/tasks/Clustering/en/BiorxivClusteringS2S.py index 12a66e8553..5fb38c3bdd 100644 --- a/mteb/tasks/Clustering/en/BiorxivClusteringS2S.py +++ b/mteb/tasks/Clustering/en/BiorxivClusteringS2S.py @@ -27,7 +27,3 @@ class BiorxivClusteringS2S(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/en/MedrxivClusteringP2P.py b/mteb/tasks/Clustering/en/MedrxivClusteringP2P.py index 9a252d7c74..637f1ff3a0 100644 --- a/mteb/tasks/Clustering/en/MedrxivClusteringP2P.py +++ b/mteb/tasks/Clustering/en/MedrxivClusteringP2P.py @@ -28,7 +28,3 @@ class MedrxivClusteringP2P(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/en/MedrxivClusteringS2S.py b/mteb/tasks/Clustering/en/MedrxivClusteringS2S.py index bd569cb494..46023eaf5b 100644 --- a/mteb/tasks/Clustering/en/MedrxivClusteringS2S.py +++ b/mteb/tasks/Clustering/en/MedrxivClusteringS2S.py @@ -28,7 +28,3 @@ class MedrxivClusteringS2S(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/en/RedditClustering.py b/mteb/tasks/Clustering/en/RedditClustering.py index c6a5131ea6..2295ac323c 100644 --- a/mteb/tasks/Clustering/en/RedditClustering.py +++ b/mteb/tasks/Clustering/en/RedditClustering.py @@ -28,7 +28,3 @@ class RedditClustering(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/en/RedditClusteringP2P.py b/mteb/tasks/Clustering/en/RedditClusteringP2P.py index 79871749a8..d6003491d5 100644 --- a/mteb/tasks/Clustering/en/RedditClusteringP2P.py +++ b/mteb/tasks/Clustering/en/RedditClusteringP2P.py @@ -28,7 +28,3 @@ class RedditClusteringP2P(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/en/StackExchangeClustering.py b/mteb/tasks/Clustering/en/StackExchangeClustering.py index 401400e8bc..5394097e2d 100644 --- a/mteb/tasks/Clustering/en/StackExchangeClustering.py +++ b/mteb/tasks/Clustering/en/StackExchangeClustering.py @@ -28,22 +28,3 @@ class StackExchangeClustering(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - return { - "name": "StackExchangeClustering", - "hf_hub_name": "mteb/stackexchange-clustering", - "description": ( - "Clustering of titles from 121 stackexchanges. Clustering of 25 sets, each with 10-50 classes, and" - " each class with 100 - 1000 sentences." - ), - "reference": "https://arxiv.org/abs/2104.07081", - "type": "Clustering", - "category": "s2s", - "eval_splits": ["validation", "test"], - "eval_langs": ["en"], - "main_score": "v_measure", - "revision": "6cbc1f7b2bc0622f2e39d2c77fa502909748c259", - } diff --git a/mteb/tasks/Clustering/en/StackExchangeClusteringP2P.py b/mteb/tasks/Clustering/en/StackExchangeClusteringP2P.py index 3ceb56e49f..843b7de216 100644 --- a/mteb/tasks/Clustering/en/StackExchangeClusteringP2P.py +++ b/mteb/tasks/Clustering/en/StackExchangeClusteringP2P.py @@ -28,7 +28,3 @@ class StackExchangeClusteringP2P(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/en/TwentyNewsgroupsClustering.py b/mteb/tasks/Clustering/en/TwentyNewsgroupsClustering.py index fa435b1bba..128741efdf 100644 --- a/mteb/tasks/Clustering/en/TwentyNewsgroupsClustering.py +++ b/mteb/tasks/Clustering/en/TwentyNewsgroupsClustering.py @@ -28,7 +28,3 @@ class TwentyNewsgroupsClustering(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/en/WikiCitiesClustering.py b/mteb/tasks/Clustering/en/WikiCitiesClustering.py index c8349d51d7..88d50f8214 100644 --- a/mteb/tasks/Clustering/en/WikiCitiesClustering.py +++ b/mteb/tasks/Clustering/en/WikiCitiesClustering.py @@ -28,7 +28,3 @@ class WikiCitiesClustering(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/es/FloresClusteringS2S.py b/mteb/tasks/Clustering/es/FloresClusteringS2S.py index f5c278d360..350328ee2c 100644 --- a/mteb/tasks/Clustering/es/FloresClusteringS2S.py +++ b/mteb/tasks/Clustering/es/FloresClusteringS2S.py @@ -28,7 +28,3 @@ class FloresClusteringS2S(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/es/SpanishNewsClusteringP2P.py b/mteb/tasks/Clustering/es/SpanishNewsClusteringP2P.py index fc2a0e3a96..fa7b369bd5 100644 --- a/mteb/tasks/Clustering/es/SpanishNewsClusteringP2P.py +++ b/mteb/tasks/Clustering/es/SpanishNewsClusteringP2P.py @@ -28,19 +28,3 @@ class SpanishNewsClusteringP2P(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - return { - "name": "SpanishNewsClusteringP2P", - "hf_hub_name": "jinaai/spanish_news_clustering", - "description": ("Clustering of news articles, 7 topics in total."), - "reference": "https://www.kaggle.com/datasets/kevinmorgado/spanish-news-classification", - "type": "Clustering", - "category": "p2p", - "eval_splits": ["test"], - "eval_langs": ["es"], - "main_score": "v_measure", - "revision": "b5edc3d3d7c12c7b9f883e9da50f6732f3624142", - } diff --git a/mteb/tasks/Clustering/fr/AlloProfClusteringP2P.py b/mteb/tasks/Clustering/fr/AlloProfClusteringP2P.py index f0b06a12a1..92f9c7db91 100644 --- a/mteb/tasks/Clustering/fr/AlloProfClusteringP2P.py +++ b/mteb/tasks/Clustering/fr/AlloProfClusteringP2P.py @@ -32,10 +32,6 @@ class AlloProfClusteringP2P(AbsTaskClustering): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): """ Load dataset from HuggingFace hub and convert it to the standard format. diff --git a/mteb/tasks/Clustering/fr/AlloProfClusteringS2S.py b/mteb/tasks/Clustering/fr/AlloProfClusteringS2S.py index a8d7b87fe8..1b26d68f25 100644 --- a/mteb/tasks/Clustering/fr/AlloProfClusteringS2S.py +++ b/mteb/tasks/Clustering/fr/AlloProfClusteringS2S.py @@ -32,10 +32,6 @@ class AlloProfClusteringS2S(AbsTaskClustering): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): """ Load dataset from HuggingFace hub and convert it to the standard format. diff --git a/mteb/tasks/Clustering/fr/HALClusteringS2S.py b/mteb/tasks/Clustering/fr/HALClusteringS2S.py index bcd177fb72..8a88ab08aa 100644 --- a/mteb/tasks/Clustering/fr/HALClusteringS2S.py +++ b/mteb/tasks/Clustering/fr/HALClusteringS2S.py @@ -32,10 +32,6 @@ class HALClusteringS2S(AbsTaskClustering): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): """ Load dataset from HuggingFace hub and convert it to the standard format. diff --git a/mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py b/mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py index 1adbf615f4..d584a069c5 100644 --- a/mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py +++ b/mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py @@ -32,10 +32,6 @@ class MLSUMClusteringP2P(AbsTaskClustering): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): """ Load dataset from HuggingFace hub and convert it to the standard format. diff --git a/mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py b/mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py index af522fbe8e..a2de3dc86b 100644 --- a/mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py +++ b/mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py @@ -32,10 +32,6 @@ class MLSUMClusteringS2S(AbsTaskClustering): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): """ Load dataset from HuggingFace hub and convert it to the standard format. diff --git a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py index 7155b6e899..3239d8ebf5 100644 --- a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py +++ b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py @@ -51,10 +51,6 @@ class MasakhaNEWSClusteringP2P(AbsTaskClustering, MultilingualTask): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): """ Load dataset from HuggingFace hub and convert it to the standard format. diff --git a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py index 39fa6966d5..db8694f68a 100644 --- a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py +++ b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py @@ -53,10 +53,6 @@ class MasakhaNEWSClusteringS2S(AbsTaskClustering, MultilingualTask): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): """ Load dataset from HuggingFace hub and convert it to the standard format. diff --git a/mteb/tasks/Clustering/pl/PolishClustering.py b/mteb/tasks/Clustering/pl/PolishClustering.py index 35e185da63..9b56098b32 100644 --- a/mteb/tasks/Clustering/pl/PolishClustering.py +++ b/mteb/tasks/Clustering/pl/PolishClustering.py @@ -29,7 +29,3 @@ class EightTagsClustering(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Clustering/zh/CMTEBClustering.py b/mteb/tasks/Clustering/zh/CMTEBClustering.py index 91504b871e..6a360ecbc6 100644 --- a/mteb/tasks/Clustering/zh/CMTEBClustering.py +++ b/mteb/tasks/Clustering/zh/CMTEBClustering.py @@ -29,10 +29,6 @@ class CLSClusteringS2S(AbsTaskClustering): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class CLSClusteringP2P(AbsTaskClustering): metadata = TaskMetadata( @@ -58,10 +54,6 @@ class CLSClusteringP2P(AbsTaskClustering): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class ThuNewsClusteringS2S(AbsTaskClustering): metadata = TaskMetadata( @@ -87,10 +79,6 @@ class ThuNewsClusteringS2S(AbsTaskClustering): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class ThuNewsClusteringP2P(AbsTaskClustering): metadata = TaskMetadata( @@ -115,7 +103,3 @@ class ThuNewsClusteringP2P(AbsTaskClustering): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/PairClassification/en/SprintDuplicateQuestionsPC.py b/mteb/tasks/PairClassification/en/SprintDuplicateQuestionsPC.py index a1d70c18f5..ec5ca9ae68 100644 --- a/mteb/tasks/PairClassification/en/SprintDuplicateQuestionsPC.py +++ b/mteb/tasks/PairClassification/en/SprintDuplicateQuestionsPC.py @@ -28,7 +28,3 @@ class SprintDuplicateQuestionsPC(AbsTaskPairClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/PairClassification/en/TwitterSemEval2015PC.py b/mteb/tasks/PairClassification/en/TwitterSemEval2015PC.py index d57b4ca79e..0bc8ef0c1a 100644 --- a/mteb/tasks/PairClassification/en/TwitterSemEval2015PC.py +++ b/mteb/tasks/PairClassification/en/TwitterSemEval2015PC.py @@ -28,7 +28,3 @@ class TwitterSemEval2015PC(AbsTaskPairClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/PairClassification/en/TwitterURLCorpusPC.py b/mteb/tasks/PairClassification/en/TwitterURLCorpusPC.py index e4b1425e55..98316f9626 100644 --- a/mteb/tasks/PairClassification/en/TwitterURLCorpusPC.py +++ b/mteb/tasks/PairClassification/en/TwitterURLCorpusPC.py @@ -28,7 +28,3 @@ class TwitterURLCorpusPC(AbsTaskPairClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py b/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py index 35b878a363..c49bca69b7 100644 --- a/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py +++ b/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py @@ -33,10 +33,6 @@ class OpusparcusPC(AbsTaskPairClassification, MultilingualTask): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): """ Load dataset from HuggingFace hub diff --git a/mteb/tasks/PairClassification/multilingual/PawsX.py b/mteb/tasks/PairClassification/multilingual/PawsX.py index f7259d50e3..1bacc739b3 100644 --- a/mteb/tasks/PairClassification/multilingual/PawsX.py +++ b/mteb/tasks/PairClassification/multilingual/PawsX.py @@ -32,10 +32,6 @@ class PawsX(MultilingualTask, AbsTaskPairClassification): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/PairClassification/pl/PolishPC.py b/mteb/tasks/PairClassification/pl/PolishPC.py index 0a985aff9f..c26fe307ab 100644 --- a/mteb/tasks/PairClassification/pl/PolishPC.py +++ b/mteb/tasks/PairClassification/pl/PolishPC.py @@ -29,10 +29,6 @@ class SickePLPC(AbsTaskPairClassification): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class PpcPC(AbsTaskPairClassification): metadata = TaskMetadata( @@ -58,10 +54,6 @@ class PpcPC(AbsTaskPairClassification): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class CdscePC(AbsTaskPairClassification): metadata = TaskMetadata( @@ -87,10 +79,6 @@ class CdscePC(AbsTaskPairClassification): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class PscPC(AbsTaskPairClassification): metadata = TaskMetadata( @@ -115,7 +103,3 @@ class PscPC(AbsTaskPairClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/PairClassification/zh/CMTEBPairClassification.py b/mteb/tasks/PairClassification/zh/CMTEBPairClassification.py index b6d7a6de2e..ca4b6a25fb 100644 --- a/mteb/tasks/PairClassification/zh/CMTEBPairClassification.py +++ b/mteb/tasks/PairClassification/zh/CMTEBPairClassification.py @@ -28,10 +28,6 @@ class Ocnli(AbsTaskPairClassification): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class Cmnli(AbsTaskPairClassification): metadata = TaskMetadata( @@ -56,7 +52,3 @@ class Cmnli(AbsTaskPairClassification): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py b/mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py index 70978a1028..ab6fe217fe 100644 --- a/mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py +++ b/mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py @@ -28,7 +28,3 @@ class AskUbuntuDupQuestions(AbsTaskReranking): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Reranking/en/MindSmallReranking.py b/mteb/tasks/Reranking/en/MindSmallReranking.py index 25476b6ffa..a458e694d0 100644 --- a/mteb/tasks/Reranking/en/MindSmallReranking.py +++ b/mteb/tasks/Reranking/en/MindSmallReranking.py @@ -28,7 +28,3 @@ class MindSmallReranking(AbsTaskReranking): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Reranking/en/SciDocsReranking.py b/mteb/tasks/Reranking/en/SciDocsReranking.py index e5e06e7d50..150aba8ef5 100644 --- a/mteb/tasks/Reranking/en/SciDocsReranking.py +++ b/mteb/tasks/Reranking/en/SciDocsReranking.py @@ -18,17 +18,35 @@ class SciDocsReranking(AbsTaskReranking): main_score="map", revision="d3c5e1fc0b855ab6097bf1cda04dd73947d7caab", date=None, - form=None, - domains=None, - task_subtypes=None, - license=None, - socioeconomic_status=None, + form=["written"], + domains=["Academic", "Non-fiction"], + task_subtypes=["Scientific Reranking"], + license="cc-by-4.0", + socioeconomic_status="high", annotations_creators=None, dialect=None, - text_creation=None, - bibtex_citation=None, + text_creation="found", + bibtex_citation=""" +@inproceedings{cohan-etal-2020-specter, + title = "{SPECTER}: Document-level Representation Learning using Citation-informed Transformers", + author = "Cohan, Arman and + Feldman, Sergey and + Beltagy, Iz and + Downey, Doug and + Weld, Daniel", + editor = "Jurafsky, Dan and + Chai, Joyce and + Schluter, Natalie and + Tetreault, Joel", + booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics", + month = jul, + year = "2020", + address = "Online", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/2020.acl-main.207", + doi = "10.18653/v1/2020.acl-main.207", + pages = "2270--2282", + abstract = "Representation learning is a critical ingredient for natural language processing systems. Recent Transformer language models like BERT learn powerful textual representations, but these models are targeted towards token- and sentence-level training objectives and do not leverage information on inter-document relatedness, which limits their document-level representation power. For applications on scientific documents, such as classification and recommendation, accurate embeddings of documents are a necessity. We propose SPECTER, a new method to generate document-level embedding of scientific papers based on pretraining a Transformer language model on a powerful signal of document-level relatedness: the citation graph. Unlike existing pretrained language models, Specter can be easily applied to downstream applications without task-specific fine-tuning. Additionally, to encourage further research on document-level models, we introduce SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation prediction, to document classification and recommendation. We show that Specter outperforms a variety of competitive baselines on the benchmark.", +} +""", ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Reranking/en/StackOverflowDupQuestions.py b/mteb/tasks/Reranking/en/StackOverflowDupQuestions.py index 130f0600c5..6e61ce727a 100644 --- a/mteb/tasks/Reranking/en/StackOverflowDupQuestions.py +++ b/mteb/tasks/Reranking/en/StackOverflowDupQuestions.py @@ -28,7 +28,3 @@ class StackOverflowDupQuestions(AbsTaskReranking): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Reranking/fr/AlloprofReranking.py b/mteb/tasks/Reranking/fr/AlloprofReranking.py index 6093487c9f..c70f70e875 100644 --- a/mteb/tasks/Reranking/fr/AlloprofReranking.py +++ b/mteb/tasks/Reranking/fr/AlloprofReranking.py @@ -28,7 +28,3 @@ class AlloprofReranking(AbsTaskReranking): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Reranking/fr/SyntecReranking.py b/mteb/tasks/Reranking/fr/SyntecReranking.py index b4e25f90fe..7cc945c3ef 100644 --- a/mteb/tasks/Reranking/fr/SyntecReranking.py +++ b/mteb/tasks/Reranking/fr/SyntecReranking.py @@ -28,7 +28,3 @@ class SyntecReranking(AbsTaskReranking): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Reranking/multilingual/MIRACLReranking.py b/mteb/tasks/Reranking/multilingual/MIRACLReranking.py index fee0c71e45..ca9c46ca3e 100644 --- a/mteb/tasks/Reranking/multilingual/MIRACLReranking.py +++ b/mteb/tasks/Reranking/multilingual/MIRACLReranking.py @@ -29,7 +29,3 @@ class MIRACLReranking(MultilingualTask, AbsTaskReranking): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Reranking/zh/CMTEBReranking.py b/mteb/tasks/Reranking/zh/CMTEBReranking.py index e9bb6ae056..c0508288f6 100644 --- a/mteb/tasks/Reranking/zh/CMTEBReranking.py +++ b/mteb/tasks/Reranking/zh/CMTEBReranking.py @@ -28,22 +28,6 @@ class T2Reranking(AbsTaskReranking): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - return { - "name": "T2Reranking", - "hf_hub_name": "C-MTEB/T2Reranking", - "description": "T2Ranking: A large-scale Chinese Benchmark for Passage Ranking", - "reference": "https://arxiv.org/abs/2304.03679", - "type": "Reranking", - "category": "s2p", - "eval_splits": ["dev"], - "eval_langs": ["zh"], - "main_score": "map", - "revision": "76631901a18387f85eaa53e5450019b87ad58ef9", - } - class MMarcoReranking(AbsTaskReranking): metadata = TaskMetadata( @@ -69,10 +53,6 @@ class MMarcoReranking(AbsTaskReranking): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class CMedQAv1(AbsTaskReranking): metadata = TaskMetadata( @@ -98,10 +78,6 @@ class CMedQAv1(AbsTaskReranking): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - class CMedQAv2(AbsTaskReranking): metadata = TaskMetadata( @@ -126,7 +102,3 @@ class CMedQAv2(AbsTaskReranking): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py b/mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py index 30d51a416d..85f56c30c7 100644 --- a/mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py +++ b/mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py @@ -32,10 +32,6 @@ class GerDaLIR(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/de/GermanDPRRetrieval.py b/mteb/tasks/Retrieval/de/GermanDPRRetrieval.py index 24b1c02abb..11b60a82d6 100644 --- a/mteb/tasks/Retrieval/de/GermanDPRRetrieval.py +++ b/mteb/tasks/Retrieval/de/GermanDPRRetrieval.py @@ -34,10 +34,6 @@ class GermanDPR(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - @staticmethod def _format_documents(docs, id_prefix="", existing_docs=None): if existing_docs is None: diff --git a/mteb/tasks/Retrieval/de/GermanQuADRetrieval.py b/mteb/tasks/Retrieval/de/GermanQuADRetrieval.py index c0d0fcb315..68cbea11d5 100644 --- a/mteb/tasks/Retrieval/de/GermanQuADRetrieval.py +++ b/mteb/tasks/Retrieval/de/GermanQuADRetrieval.py @@ -51,10 +51,6 @@ class GermanQuADRetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/en/ArguAnaRetrieval.py b/mteb/tasks/Retrieval/en/ArguAnaRetrieval.py index 45fab75d30..6d55176bb3 100644 --- a/mteb/tasks/Retrieval/en/ArguAnaRetrieval.py +++ b/mteb/tasks/Retrieval/en/ArguAnaRetrieval.py @@ -28,7 +28,3 @@ class ArguAna(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/CQADupstackAndroidRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackAndroidRetrieval.py index 3df82c7000..6589f37eeb 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackAndroidRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackAndroidRetrieval.py @@ -28,19 +28,3 @@ class CQADupstackAndroidRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - return { - "name": "CQADupstackAndroidRetrieval", - "hf_hub_name": "mteb/cqadupstack-android", - "description": "CQADupStack: A Benchmark Data Set for Community Question-Answering Research", - "reference": "http://nlp.cis.unimelb.edu.au/resources/cqadupstack/", - "type": "Retrieval", - "category": "s2p", - "eval_splits": ["test"], - "eval_langs": ["en"], - "main_score": "ndcg_at_10", - "revision": "f46a197baaae43b4f621051089b82a364682dfeb", - } diff --git a/mteb/tasks/Retrieval/en/CQADupstackEnglishRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackEnglishRetrieval.py index 34e7d885af..460d0a9edb 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackEnglishRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackEnglishRetrieval.py @@ -28,7 +28,3 @@ class CQADupstackEnglishRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/CQADupstackGamingRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackGamingRetrieval.py index c5e0086f11..35a599ba01 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackGamingRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackGamingRetrieval.py @@ -28,7 +28,3 @@ class CQADupstackGamingRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/CQADupstackGisRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackGisRetrieval.py index 5a689ba195..891554eda1 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackGisRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackGisRetrieval.py @@ -28,7 +28,3 @@ class CQADupstackGisRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/CQADupstackMathematicaRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackMathematicaRetrieval.py index 09fee8d394..ebb99388ef 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackMathematicaRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackMathematicaRetrieval.py @@ -28,7 +28,3 @@ class CQADupstackMathematicaRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/CQADupstackPhysicsRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackPhysicsRetrieval.py index d85ccef17d..54af77e9e4 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackPhysicsRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackPhysicsRetrieval.py @@ -28,7 +28,3 @@ class CQADupstackPhysicsRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/CQADupstackProgrammersRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackProgrammersRetrieval.py index 9a058bb18e..57638d6dc9 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackProgrammersRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackProgrammersRetrieval.py @@ -28,7 +28,3 @@ class CQADupstackProgrammersRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/CQADupstackStatsRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackStatsRetrieval.py index ef72e65345..e4eb37de5b 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackStatsRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackStatsRetrieval.py @@ -28,7 +28,3 @@ class CQADupstackStatsRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/CQADupstackTexRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackTexRetrieval.py index e3887ea231..be36f64ff7 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackTexRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackTexRetrieval.py @@ -28,7 +28,3 @@ class CQADupstackTexRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/CQADupstackUnixRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackUnixRetrieval.py index 9765909dc0..8df00a7a8c 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackUnixRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackUnixRetrieval.py @@ -28,7 +28,3 @@ class CQADupstackUnixRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/CQADupstackWebmastersRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackWebmastersRetrieval.py index c8966d06a5..e863d8c36e 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackWebmastersRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackWebmastersRetrieval.py @@ -28,7 +28,3 @@ class CQADupstackWebmastersRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/CQADupstackWordpressRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackWordpressRetrieval.py index 2db5d7abd7..0be63da42b 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackWordpressRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackWordpressRetrieval.py @@ -28,7 +28,3 @@ class CQADupstackWordpressRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py b/mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py index e67e467c7e..67d677050e 100644 --- a/mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py +++ b/mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py @@ -28,7 +28,3 @@ class ClimateFEVER(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/DBPediaRetrieval.py b/mteb/tasks/Retrieval/en/DBPediaRetrieval.py index ca4a39e34a..7a82fc7e3c 100644 --- a/mteb/tasks/Retrieval/en/DBPediaRetrieval.py +++ b/mteb/tasks/Retrieval/en/DBPediaRetrieval.py @@ -28,7 +28,3 @@ class DBPedia(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/FEVERRetrieval.py b/mteb/tasks/Retrieval/en/FEVERRetrieval.py index 9157d8f26b..9ef5670d27 100644 --- a/mteb/tasks/Retrieval/en/FEVERRetrieval.py +++ b/mteb/tasks/Retrieval/en/FEVERRetrieval.py @@ -32,7 +32,3 @@ class FEVER(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/FiQA2018Retrieval.py b/mteb/tasks/Retrieval/en/FiQA2018Retrieval.py index 570df54079..dd298d28e8 100644 --- a/mteb/tasks/Retrieval/en/FiQA2018Retrieval.py +++ b/mteb/tasks/Retrieval/en/FiQA2018Retrieval.py @@ -28,7 +28,3 @@ class FiQA2018(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/HagridRetrieval.py b/mteb/tasks/Retrieval/en/HagridRetrieval.py index 87d7dd5894..0ea5acba84 100644 --- a/mteb/tasks/Retrieval/en/HagridRetrieval.py +++ b/mteb/tasks/Retrieval/en/HagridRetrieval.py @@ -38,11 +38,6 @@ class HagridRetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - return {} - def load_data(self, **kwargs): """ Loads the different split of the dataset (queries/corpus/relevants) diff --git a/mteb/tasks/Retrieval/en/HotpotQARetrieval.py b/mteb/tasks/Retrieval/en/HotpotQARetrieval.py index 380c6f7dbf..16edce1548 100644 --- a/mteb/tasks/Retrieval/en/HotpotQARetrieval.py +++ b/mteb/tasks/Retrieval/en/HotpotQARetrieval.py @@ -31,7 +31,3 @@ class HotpotQA(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/MSMARCORetrieval.py b/mteb/tasks/Retrieval/en/MSMARCORetrieval.py index 99b30bdfed..6eb2af7b9c 100644 --- a/mteb/tasks/Retrieval/en/MSMARCORetrieval.py +++ b/mteb/tasks/Retrieval/en/MSMARCORetrieval.py @@ -28,7 +28,3 @@ class MSMARCO(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py b/mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py index 205e94327b..80ed17db3d 100644 --- a/mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py +++ b/mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py @@ -28,7 +28,3 @@ class MSMARCOv2(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/NFCorpusRetrieval.py b/mteb/tasks/Retrieval/en/NFCorpusRetrieval.py index 7f1dbe1a80..fbfa2f3bc7 100644 --- a/mteb/tasks/Retrieval/en/NFCorpusRetrieval.py +++ b/mteb/tasks/Retrieval/en/NFCorpusRetrieval.py @@ -28,7 +28,3 @@ class NFCorpus(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/NQRetrieval.py b/mteb/tasks/Retrieval/en/NQRetrieval.py index 5336d0dfa6..9d2e5c9d80 100644 --- a/mteb/tasks/Retrieval/en/NQRetrieval.py +++ b/mteb/tasks/Retrieval/en/NQRetrieval.py @@ -28,7 +28,3 @@ class NQ(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/NarrativeQARetrieval.py b/mteb/tasks/Retrieval/en/NarrativeQARetrieval.py index 9a29847c83..6ef5b00b72 100644 --- a/mteb/tasks/Retrieval/en/NarrativeQARetrieval.py +++ b/mteb/tasks/Retrieval/en/NarrativeQARetrieval.py @@ -36,11 +36,6 @@ class NarrativeQARetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - return {} - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/en/QuoraRetrieval.py b/mteb/tasks/Retrieval/en/QuoraRetrieval.py index e1065f9646..53eb15f5f2 100644 --- a/mteb/tasks/Retrieval/en/QuoraRetrieval.py +++ b/mteb/tasks/Retrieval/en/QuoraRetrieval.py @@ -31,7 +31,3 @@ class QuoraRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py b/mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py index 2babf0a088..c5a412e402 100644 --- a/mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py +++ b/mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py @@ -31,7 +31,3 @@ class SCIDOCS(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/SciFactRetrieval.py b/mteb/tasks/Retrieval/en/SciFactRetrieval.py index be2778e269..74cbf1369d 100644 --- a/mteb/tasks/Retrieval/en/SciFactRetrieval.py +++ b/mteb/tasks/Retrieval/en/SciFactRetrieval.py @@ -28,7 +28,3 @@ class SciFact(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py b/mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py index 05e44217d2..8d6b55f3f8 100644 --- a/mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py +++ b/mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py @@ -28,7 +28,3 @@ class TRECCOVID(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/en/Touche2020Retrieval.py b/mteb/tasks/Retrieval/en/Touche2020Retrieval.py index 43c9c11223..eb408670eb 100644 --- a/mteb/tasks/Retrieval/en/Touche2020Retrieval.py +++ b/mteb/tasks/Retrieval/en/Touche2020Retrieval.py @@ -28,7 +28,3 @@ class Touche2020(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2P.py b/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2P.py index df13958c73..f870399eeb 100644 --- a/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2P.py +++ b/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2P.py @@ -31,10 +31,6 @@ class SpanishPassageRetrievalS2P(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2S.py b/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2S.py index 70fac5f6b5..a975fd654c 100644 --- a/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2S.py +++ b/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2S.py @@ -31,10 +31,6 @@ class SpanishPassageRetrievalS2S(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/fr/AlloprofRetrieval.py b/mteb/tasks/Retrieval/fr/AlloprofRetrieval.py index 027c5c39dd..561eda1663 100644 --- a/mteb/tasks/Retrieval/fr/AlloprofRetrieval.py +++ b/mteb/tasks/Retrieval/fr/AlloprofRetrieval.py @@ -31,10 +31,6 @@ class AlloprofRetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/fr/BSARDRetrieval.py b/mteb/tasks/Retrieval/fr/BSARDRetrieval.py index b6ccc411eb..867a67da9e 100644 --- a/mteb/tasks/Retrieval/fr/BSARDRetrieval.py +++ b/mteb/tasks/Retrieval/fr/BSARDRetrieval.py @@ -31,10 +31,6 @@ class BSARDRetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/fr/SyntecRetrieval.py b/mteb/tasks/Retrieval/fr/SyntecRetrieval.py index e41679185e..7e9007c971 100644 --- a/mteb/tasks/Retrieval/fr/SyntecRetrieval.py +++ b/mteb/tasks/Retrieval/fr/SyntecRetrieval.py @@ -33,10 +33,6 @@ class SyntecRetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/ko/KoMiracl.py b/mteb/tasks/Retrieval/ko/KoMiracl.py index 901af78d43..596cc2d474 100644 --- a/mteb/tasks/Retrieval/ko/KoMiracl.py +++ b/mteb/tasks/Retrieval/ko/KoMiracl.py @@ -28,7 +28,3 @@ class KoMiracl(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/ko/KoMrtydi.py b/mteb/tasks/Retrieval/ko/KoMrtydi.py index 36d9a8d859..0d9a8a774e 100644 --- a/mteb/tasks/Retrieval/ko/KoMrtydi.py +++ b/mteb/tasks/Retrieval/ko/KoMrtydi.py @@ -28,7 +28,3 @@ class KoMrtydi(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/ko/KoStrategyQA.py b/mteb/tasks/Retrieval/ko/KoStrategyQA.py index 7d26281173..8729c40227 100644 --- a/mteb/tasks/Retrieval/ko/KoStrategyQA.py +++ b/mteb/tasks/Retrieval/ko/KoStrategyQA.py @@ -28,7 +28,3 @@ class KoStrategyQA(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py b/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py index 9814dcd72c..fdc8a76eaf 100644 --- a/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py @@ -83,10 +83,6 @@ class MIRACLRetrieval(MultilingualTask, AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py b/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py index 9ae739ae09..1223687b91 100644 --- a/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py @@ -73,10 +73,6 @@ class MintakaRetrieval(MultilingualTask, AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py b/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py index fbdc04b664..a69adde8e1 100644 --- a/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py @@ -81,10 +81,6 @@ class MultiLongDocRetrieval(MultilingualTask, AbsTaskRetrieval): """, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py b/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py index b0af6dd89e..6911212cc9 100644 --- a/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py @@ -80,10 +80,6 @@ class XMarket(MultilingualTask, AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py b/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py index eea0b179fd..923bddef29 100644 --- a/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py @@ -73,10 +73,6 @@ class XPQARetrieval(MultilingualTask, AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py b/mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py index 1d45db9d88..ac35c36abc 100644 --- a/mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py @@ -28,7 +28,3 @@ class ArguAnaPL(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py b/mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py index cc2af21a1d..31e1fde60d 100644 --- a/mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py @@ -28,7 +28,3 @@ class DBPediaPL(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py b/mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py index a3c44e2433..8107ec4ed4 100644 --- a/mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py @@ -28,7 +28,3 @@ class FiQAPLRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py b/mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py index 33e7e82b98..676ed15023 100644 --- a/mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py @@ -28,7 +28,3 @@ class HotpotQAPL(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py b/mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py index d2f738dff4..a7a020d7c0 100644 --- a/mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py @@ -28,7 +28,3 @@ class MSMARCOPL(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py b/mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py index 7437f80a85..0bc9d3bf43 100644 --- a/mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py @@ -28,7 +28,3 @@ class NFCorpusPL(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/pl/NQPLRetrieval.py b/mteb/tasks/Retrieval/pl/NQPLRetrieval.py index 12d34e7297..3cc7784859 100644 --- a/mteb/tasks/Retrieval/pl/NQPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/NQPLRetrieval.py @@ -28,7 +28,3 @@ class NQPL(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py b/mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py index b1ef313287..c2a332ae74 100644 --- a/mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py @@ -28,7 +28,3 @@ class QuoraPLRetrieval(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py b/mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py index 6e91fcf2c6..a5eb2362c5 100644 --- a/mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py @@ -28,7 +28,3 @@ class SCIDOCSPL(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py b/mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py index cc9dd08efe..6c8934d49e 100644 --- a/mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py @@ -28,7 +28,3 @@ class SciFactPL(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py b/mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py index 9cf1a0062f..6b1b22e989 100644 --- a/mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py @@ -28,7 +28,3 @@ class TRECCOVIDPL(AbsTaskRetrieval): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) diff --git a/mteb/tasks/Retrieval/zh/CMTEBRetrieval.py b/mteb/tasks/Retrieval/zh/CMTEBRetrieval.py index 312aa61089..6adfa545b6 100644 --- a/mteb/tasks/Retrieval/zh/CMTEBRetrieval.py +++ b/mteb/tasks/Retrieval/zh/CMTEBRetrieval.py @@ -50,10 +50,6 @@ class T2Retrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return @@ -88,10 +84,6 @@ class MMarcoRetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return @@ -126,10 +118,6 @@ class DuRetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return @@ -164,10 +152,6 @@ class CovidRetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return @@ -202,10 +186,6 @@ class CmedqaRetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return @@ -240,10 +220,6 @@ class EcomRetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return @@ -278,10 +254,6 @@ class MedicalRetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return @@ -316,10 +288,6 @@ class VideoRetrieval(AbsTaskRetrieval): bibtex_citation=None, ) - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) - def load_data(self, **kwargs): if self.data_loaded: return diff --git a/mteb/tasks/STS/zh/CMTEBSTS.py b/mteb/tasks/STS/zh/CMTEBSTS.py index 787628ca27..4f4012a539 100644 --- a/mteb/tasks/STS/zh/CMTEBSTS.py +++ b/mteb/tasks/STS/zh/CMTEBSTS.py @@ -220,7 +220,3 @@ class QBQTC(AbsTaskSTS): text_creation=None, bibtex_citation=None, ) - - @property - def metadata_dict(self) -> dict[str, str]: - return dict(self.metadata) From cd4a012271463b89db7a8ec9ca298a975805988d Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sun, 24 Mar 2024 13:35:38 +0100 Subject: [PATCH 5/6] fix: Added sizes to the metadata (#276) * restructing the readme * added mmteb * removed unec. method * Added docstring to metadata * Updated outdated examples * formatting documents * fix: Updated form to be parsed correctly * fix: Added sizes to the metadata this allow for automatic metadata generations * Updated based on feedback * Apply suggestions from code review Co-authored-by: Niklas Muennighoff * updated based on feedback * Added suggestion from review * added correction based on review * reformatted empty fields to None --------- Co-authored-by: Niklas Muennighoff --- docs/adding_a_dataset.md | 2 ++ mteb/abstasks/TaskMetadata.py | 5 +++++ .../BitextMining/da/BornholmskBitextMining.py | 2 ++ .../multilingual/BUCCBitextMining.py | 2 ++ .../multilingual/DiaBLaBitextMining.py | 2 ++ .../multilingual/FloresBitextMining.py | 2 ++ .../multilingual/NorwegianCourtsBitextMining.py | 2 ++ .../multilingual/TatoebaBitextMining.py | 2 ++ .../da/AngryTweetsClassification.py | 2 ++ .../Classification/da/DKHateClassification.py | 2 ++ .../Classification/da/DalajClassification.py | 2 ++ .../da/DanishPoliticalCommentsClassification.py | 2 ++ .../da/DdiscoCohesionClassification.py | 2 ++ .../da/LccSentimentClassification.py | 2 ++ .../en/AmazonPolarityClassification.py | 2 ++ .../Classification/en/Banking77Classification.py | 2 ++ .../Classification/en/EmotionClassification.py | 2 ++ .../Classification/en/ImdbClassification.py | 2 ++ .../en/ToxicConversationsClassification.py | 2 ++ .../en/TweetSentimentExtractionClassification.py | 2 ++ .../AmazonCounterfactualClassification.py | 2 ++ .../multilingual/AmazonReviewsClassification.py | 2 ++ .../multilingual/MTOPDomainClassification.py | 2 ++ .../multilingual/MTOPIntentClassification.py | 2 ++ .../multilingual/MasakhaNEWSClassification.py | 2 ++ .../multilingual/MassiveIntentClassification.py | 2 ++ .../MassiveScenarioClassification.py | 2 ++ .../multilingual/NordicLangClassification.py | 2 ++ .../multilingual/ScalaClassification.py | 8 ++++++++ .../Classification/nb/NoRecClassification.py | 2 ++ .../nb/NorwegianParliamentClassification.py | 2 ++ .../Classification/pl/PolishClassification.py | 10 ++++++++++ .../Classification/sv/SweRecClassification.py | 2 ++ .../Classification/zh/CMTEBClassification.py | 12 ++++++++++++ mteb/tasks/Clustering/de/BlurbsClusteringP2P.py | 2 ++ mteb/tasks/Clustering/de/BlurbsClusteringS2S.py | 2 ++ .../tasks/Clustering/de/TenKGnadClusteringP2P.py | 2 ++ .../tasks/Clustering/de/TenKGnadClusteringS2S.py | 2 ++ mteb/tasks/Clustering/en/ArxivClusteringP2P.py | 2 ++ mteb/tasks/Clustering/en/ArxivClusteringS2S.py | 2 ++ mteb/tasks/Clustering/en/BigPatentClustering.py | 2 ++ mteb/tasks/Clustering/en/BiorxivClusteringP2P.py | 2 ++ mteb/tasks/Clustering/en/BiorxivClusteringS2S.py | 2 ++ mteb/tasks/Clustering/en/MedrxivClusteringP2P.py | 2 ++ mteb/tasks/Clustering/en/MedrxivClusteringS2S.py | 2 ++ mteb/tasks/Clustering/en/RedditClustering.py | 2 ++ mteb/tasks/Clustering/en/RedditClusteringP2P.py | 2 ++ .../Clustering/en/StackExchangeClustering.py | 2 ++ .../Clustering/en/StackExchangeClusteringP2P.py | 2 ++ .../Clustering/en/TwentyNewsgroupsClustering.py | 2 ++ mteb/tasks/Clustering/en/WikiCitiesClustering.py | 2 ++ mteb/tasks/Clustering/es/FloresClusteringS2S.py | 2 ++ .../Clustering/es/SpanishNewsClusteringP2P.py | 2 ++ .../tasks/Clustering/fr/AlloProfClusteringP2P.py | 2 ++ .../tasks/Clustering/fr/AlloProfClusteringS2S.py | 2 ++ mteb/tasks/Clustering/fr/HALClusteringS2S.py | 2 ++ mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py | 2 ++ mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py | 2 ++ .../multilingual/MasakhaNEWSClusteringP2P.py | 2 ++ .../multilingual/MasakhaNEWSClusteringS2S.py | 2 ++ mteb/tasks/Clustering/pl/PolishClustering.py | 2 ++ mteb/tasks/Clustering/zh/CMTEBClustering.py | 8 ++++++++ .../en/SprintDuplicateQuestionsPC.py | 2 ++ .../en/TwitterSemEval2015PC.py | 2 ++ .../PairClassification/en/TwitterURLCorpusPC.py | 2 ++ .../multilingual/OpusparcusPC.py | 2 ++ .../PairClassification/multilingual/PawsX.py | 2 ++ mteb/tasks/PairClassification/pl/PolishPC.py | 8 ++++++++ .../zh/CMTEBPairClassification.py | 4 ++++ mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py | 2 ++ mteb/tasks/Reranking/en/MindSmallReranking.py | 2 ++ mteb/tasks/Reranking/en/SciDocsReranking.py | 2 ++ .../Reranking/en/StackOverflowDupQuestions.py | 2 ++ mteb/tasks/Reranking/fr/AlloprofReranking.py | 2 ++ mteb/tasks/Reranking/fr/SyntecReranking.py | 2 ++ .../Reranking/multilingual/MIRACLReranking.py | 2 ++ mteb/tasks/Reranking/zh/CMTEBReranking.py | 8 ++++++++ mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py | 2 ++ mteb/tasks/Retrieval/de/GermanDPRRetrieval.py | 2 ++ mteb/tasks/Retrieval/de/GermanQuADRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/ArguAnaRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackAndroidRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackEnglishRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackGamingRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackGisRetrieval.py | 2 ++ .../en/CQADupstackMathematicaRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackPhysicsRetrieval.py | 2 ++ .../en/CQADupstackProgrammersRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackStatsRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackTexRetrieval.py | 2 ++ .../Retrieval/en/CQADupstackUnixRetrieval.py | 2 ++ .../en/CQADupstackWebmastersRetrieval.py | 2 ++ .../en/CQADupstackWordpressRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/DBPediaRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/FEVERRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/FiQA2018Retrieval.py | 2 ++ mteb/tasks/Retrieval/en/HagridRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/HotpotQARetrieval.py | 2 ++ mteb/tasks/Retrieval/en/MSMARCORetrieval.py | 2 ++ mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py | 2 ++ mteb/tasks/Retrieval/en/NFCorpusRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/NQRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/NarrativeQARetrieval.py | 2 ++ mteb/tasks/Retrieval/en/QuoraRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/SciFactRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py | 2 ++ mteb/tasks/Retrieval/en/Touche2020Retrieval.py | 2 ++ .../Retrieval/es/SpanishPassageRetrievalS2P.py | 2 ++ .../Retrieval/es/SpanishPassageRetrievalS2S.py | 2 ++ mteb/tasks/Retrieval/fr/AlloprofRetrieval.py | 2 ++ mteb/tasks/Retrieval/fr/BSARDRetrieval.py | 2 ++ mteb/tasks/Retrieval/fr/SyntecRetrieval.py | 2 ++ mteb/tasks/Retrieval/ko/KoMiracl.py | 2 ++ mteb/tasks/Retrieval/ko/KoMrtydi.py | 2 ++ mteb/tasks/Retrieval/ko/KoStrategyQA.py | 2 ++ .../Retrieval/multilingual/MIRACLRetrieval.py | 2 ++ .../Retrieval/multilingual/MintakaRetrieval.py | 2 ++ .../multilingual/MultiLongDocRetrieval.py | 2 ++ .../Retrieval/multilingual/XMarketRetrieval.py | 2 ++ .../Retrieval/multilingual/XPQARetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/NQPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py | 2 ++ mteb/tasks/Retrieval/zh/CMTEBRetrieval.py | 16 ++++++++++++++++ mteb/tasks/STS/de/GermanSTSBenchmarkSTS.py | 2 ++ mteb/tasks/STS/en/BiossesSTS.py | 2 ++ mteb/tasks/STS/en/STS12STS.py | 2 ++ mteb/tasks/STS/en/STS13STS.py | 2 ++ mteb/tasks/STS/en/STS14STS.py | 2 ++ mteb/tasks/STS/en/STS15STS.py | 2 ++ mteb/tasks/STS/en/STS16STS.py | 2 ++ mteb/tasks/STS/en/STSBenchmarkSTS.py | 2 ++ mteb/tasks/STS/en/SickrSTS.py | 2 ++ mteb/tasks/STS/es/STSES.py | 2 ++ mteb/tasks/STS/fr/SickFrSTS.py | 2 ++ .../STS/multilingual/STS17CrosslingualSTS.py | 2 ++ .../STS/multilingual/STS22CrosslingualSTS.py | 2 ++ .../multilingual/STSBenchmarkMultilingualSTS.py | 2 ++ mteb/tasks/STS/pl/PolishSTS.py | 4 ++++ mteb/tasks/STS/zh/CMTEBSTS.py | 14 ++++++++++++++ .../Summarization/en/SummEvalSummarization.py | 2 ++ .../Summarization/fr/SummEvalFrSummarization.py | 6 ++++-- 152 files changed, 381 insertions(+), 2 deletions(-) diff --git a/docs/adding_a_dataset.md b/docs/adding_a_dataset.md index 8086d971cc..67ee681037 100644 --- a/docs/adding_a_dataset.md +++ b/docs/adding_a_dataset.md @@ -45,6 +45,8 @@ class SciDocsReranking(AbsTaskReranking): dialect=None, text_creation="found", bibtex_citation= ... # removed for brevity + n_samples={"test": 19599}, + avg_character_length={"test": 69.0}, ) # testing the task with a model: diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 5ff4412bdb..1babbe978c 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -117,6 +117,8 @@ class TaskMetadata(BaseModel): text_creation: The method of text creation. Includes "found", "created", "machine-translated", "machine-translated and verified", and "machine-translated and localized". bibtex_citation: The BibTeX citation for the dataset. + n_samples: The number of samples in the dataset. This should only be for the splits evaluated on. + avg_character_length: The average character length of the samples in the dataset. This should only be for the splits evaluated on. """ hf_hub_name: str @@ -144,3 +146,6 @@ class TaskMetadata(BaseModel): text_creation: TEXT_CREATION_METHOD | None bibtex_citation: str | None + + n_samples: dict[str, int] | None + avg_character_length: dict[str, float] | None diff --git a/mteb/tasks/BitextMining/da/BornholmskBitextMining.py b/mteb/tasks/BitextMining/da/BornholmskBitextMining.py index da483e3891..e702f499cf 100644 --- a/mteb/tasks/BitextMining/da/BornholmskBitextMining.py +++ b/mteb/tasks/BitextMining/da/BornholmskBitextMining.py @@ -28,6 +28,8 @@ class BornholmBitextMining(AbsTaskBitextMining): dialect=None, text_creation=None, bibtex_citation=None, + avg_character_length={"test": 89.7}, + n_samples={"test": 500}, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py b/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py index 11c1ea9952..f2c10a1d37 100644 --- a/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py @@ -29,4 +29,6 @@ class BUCCBitextMining(AbsTaskBitextMining, CrosslingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 641684}, + avg_character_length={"test": 101.3}, ) diff --git a/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py b/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py index d9d90196ae..73d0e013ed 100644 --- a/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py @@ -29,6 +29,8 @@ class DiaBLaBitextMining(AbsTaskBitextMining, CrosslingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py b/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py index a03e3220e5..0e42ddce2c 100644 --- a/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/FloresBitextMining.py @@ -254,6 +254,8 @@ class FloresBitextMining(AbsTaskBitextMining, CrosslingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"dev": 997, "devtest": 1012}, + avg_character_length={}, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py b/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py index 01acd7f900..06c52e5a35 100644 --- a/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/NorwegianCourtsBitextMining.py @@ -28,6 +28,8 @@ class NorwegianCourtsBitextMining(AbsTaskBitextMining): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 2050}, + avg_character_length={"test": 1884.0}, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py index 8983f442c9..c57672476f 100644 --- a/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py +++ b/mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py @@ -142,4 +142,6 @@ class TatoebaBitextMining(AbsTaskBitextMining, CrosslingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 2000}, + avg_character_length={"test": 39.4}, ) diff --git a/mteb/tasks/Classification/da/AngryTweetsClassification.py b/mteb/tasks/Classification/da/AngryTweetsClassification.py index cba265f003..3e876500e2 100644 --- a/mteb/tasks/Classification/da/AngryTweetsClassification.py +++ b/mteb/tasks/Classification/da/AngryTweetsClassification.py @@ -26,6 +26,8 @@ class AngryTweetsClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1050}, + avg_character_length={"test": 156.1}, ) @property diff --git a/mteb/tasks/Classification/da/DKHateClassification.py b/mteb/tasks/Classification/da/DKHateClassification.py index 85582c4e07..aa9bdaeaa7 100644 --- a/mteb/tasks/Classification/da/DKHateClassification.py +++ b/mteb/tasks/Classification/da/DKHateClassification.py @@ -28,6 +28,8 @@ class DKHateClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 329}, + avg_character_length={"test": 104.0}, ) @property diff --git a/mteb/tasks/Classification/da/DalajClassification.py b/mteb/tasks/Classification/da/DalajClassification.py index 20785d18b6..28e899cf42 100644 --- a/mteb/tasks/Classification/da/DalajClassification.py +++ b/mteb/tasks/Classification/da/DalajClassification.py @@ -29,6 +29,8 @@ class DalajClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 444}, + avg_character_length={"test": 243.8}, ) @property diff --git a/mteb/tasks/Classification/da/DanishPoliticalCommentsClassification.py b/mteb/tasks/Classification/da/DanishPoliticalCommentsClassification.py index 6dfe9ff582..fc7cb03921 100644 --- a/mteb/tasks/Classification/da/DanishPoliticalCommentsClassification.py +++ b/mteb/tasks/Classification/da/DanishPoliticalCommentsClassification.py @@ -28,6 +28,8 @@ class DanishPoliticalCommentsClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"train": 9010}, + avg_character_length={"train": 69.9}, ) @property diff --git a/mteb/tasks/Classification/da/DdiscoCohesionClassification.py b/mteb/tasks/Classification/da/DdiscoCohesionClassification.py index bfe5e303f5..6c1843868c 100644 --- a/mteb/tasks/Classification/da/DdiscoCohesionClassification.py +++ b/mteb/tasks/Classification/da/DdiscoCohesionClassification.py @@ -57,6 +57,8 @@ class DdiscoCohesionClassification(AbsTaskClassification): abstract = "To date, there has been no resource for studying discourse coherence on real-world Danish texts. Discourse coherence has mostly been approached with the assumption that incoherent texts can be represented by coherent texts in which sentences have been shuffled. However, incoherent real-world texts rarely resemble that. We thus present DDisCo, a dataset including text from the Danish Wikipedia and Reddit annotated for discourse coherence. We choose to annotate real-world texts instead of relying on artificially incoherent text for training and testing models. Then, we evaluate the performance of several methods, including neural networks, on the dataset.", } """, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Classification/da/LccSentimentClassification.py b/mteb/tasks/Classification/da/LccSentimentClassification.py index e1c000a446..9693f3561d 100644 --- a/mteb/tasks/Classification/da/LccSentimentClassification.py +++ b/mteb/tasks/Classification/da/LccSentimentClassification.py @@ -26,6 +26,8 @@ class LccSentimentClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 150}, + avg_character_length={"test": 118.7}, ) @property diff --git a/mteb/tasks/Classification/en/AmazonPolarityClassification.py b/mteb/tasks/Classification/en/AmazonPolarityClassification.py index 18334d3d95..4404d9a2ef 100644 --- a/mteb/tasks/Classification/en/AmazonPolarityClassification.py +++ b/mteb/tasks/Classification/en/AmazonPolarityClassification.py @@ -27,4 +27,6 @@ class AmazonPolarityClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 400000}, + avg_character_length={"test": 431.4}, ) diff --git a/mteb/tasks/Classification/en/Banking77Classification.py b/mteb/tasks/Classification/en/Banking77Classification.py index 935dc47a3c..11f4b34b71 100644 --- a/mteb/tasks/Classification/en/Banking77Classification.py +++ b/mteb/tasks/Classification/en/Banking77Classification.py @@ -27,4 +27,6 @@ class Banking77Classification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 3080}, + avg_character_length={"test": 54.2}, ) diff --git a/mteb/tasks/Classification/en/EmotionClassification.py b/mteb/tasks/Classification/en/EmotionClassification.py index 1b3379b7d6..b8e0a016d7 100644 --- a/mteb/tasks/Classification/en/EmotionClassification.py +++ b/mteb/tasks/Classification/en/EmotionClassification.py @@ -27,6 +27,8 @@ class EmotionClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 2000, "test": 2000}, + avg_character_length={"validation": 95.3, "test": 95.6}, ) @property diff --git a/mteb/tasks/Classification/en/ImdbClassification.py b/mteb/tasks/Classification/en/ImdbClassification.py index dfcae3d7a2..89fe76c256 100644 --- a/mteb/tasks/Classification/en/ImdbClassification.py +++ b/mteb/tasks/Classification/en/ImdbClassification.py @@ -27,4 +27,6 @@ class ImdbClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 25000}, + avg_character_length={"test": 1293.8}, ) diff --git a/mteb/tasks/Classification/en/ToxicConversationsClassification.py b/mteb/tasks/Classification/en/ToxicConversationsClassification.py index 864c770495..f9a2ed3bf0 100644 --- a/mteb/tasks/Classification/en/ToxicConversationsClassification.py +++ b/mteb/tasks/Classification/en/ToxicConversationsClassification.py @@ -27,6 +27,8 @@ class ToxicConversationsClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 50000}, + avg_character_length={"test": 296.6}, ) @property diff --git a/mteb/tasks/Classification/en/TweetSentimentExtractionClassification.py b/mteb/tasks/Classification/en/TweetSentimentExtractionClassification.py index e8288dd0d6..7dde91e1ac 100644 --- a/mteb/tasks/Classification/en/TweetSentimentExtractionClassification.py +++ b/mteb/tasks/Classification/en/TweetSentimentExtractionClassification.py @@ -27,6 +27,8 @@ class TweetSentimentExtractionClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 3534}, + avg_character_length={"test": 67.8}, ) @property diff --git a/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py b/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py index 5e6f1b8eab..2796a3a048 100644 --- a/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py +++ b/mteb/tasks/Classification/multilingual/AmazonCounterfactualClassification.py @@ -31,6 +31,8 @@ class AmazonCounterfactualClassification(MultilingualTask, AbsTaskClassification dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 335, "test": 670}, + avg_character_length={"validation": 109.2, "test": 106.1}, ) @property diff --git a/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py b/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py index 3655db992b..78380a3ec0 100644 --- a/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py +++ b/mteb/tasks/Classification/multilingual/AmazonReviewsClassification.py @@ -29,4 +29,6 @@ class AmazonReviewsClassification(MultilingualTask, AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 30000, "test": 30000}, + avg_character_length={"validation": 159.2, "test": 160.4}, ) diff --git a/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py b/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py index f535c6ec62..b67da6b4d5 100644 --- a/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py +++ b/mteb/tasks/Classification/multilingual/MTOPDomainClassification.py @@ -29,4 +29,6 @@ class MTOPDomainClassification(MultilingualTask, AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 2235, "test": 4386}, + avg_character_length={"validation": 36.5, "test": 36.8}, ) diff --git a/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py b/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py index 0443ce8933..1dda37b2e4 100644 --- a/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py +++ b/mteb/tasks/Classification/multilingual/MTOPIntentClassification.py @@ -29,4 +29,6 @@ class MTOPIntentClassification(MultilingualTask, AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 2235, "test": 4386}, + avg_character_length={"validation": 36.5, "test": 36.8}, ) diff --git a/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py b/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py index 93114197d5..16c26b8d6a 100644 --- a/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py +++ b/mteb/tasks/Classification/multilingual/MasakhaNEWSClassification.py @@ -46,4 +46,6 @@ class MasakhaNEWSClassification(AbsTaskClassification, MultilingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 422}, + avg_character_length={"test": 5116.6}, ) diff --git a/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py b/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py index ba9fbafe63..b5736ff128 100644 --- a/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py +++ b/mteb/tasks/Classification/multilingual/MassiveIntentClassification.py @@ -81,4 +81,6 @@ class MassiveIntentClassification(MultilingualTask, AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 2033, "test": 2974}, + avg_character_length={"validation": 34.8, "test": 34.6}, ) diff --git a/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py b/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py index 48113970e3..4202369fe8 100644 --- a/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py +++ b/mteb/tasks/Classification/multilingual/MassiveScenarioClassification.py @@ -81,4 +81,6 @@ class MassiveScenarioClassification(MultilingualTask, AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 2033, "test": 2974}, + avg_character_length={"validation": 34.8, "test": 34.6}, ) diff --git a/mteb/tasks/Classification/multilingual/NordicLangClassification.py b/mteb/tasks/Classification/multilingual/NordicLangClassification.py index b0b8cdd200..b8a48ba32b 100644 --- a/mteb/tasks/Classification/multilingual/NordicLangClassification.py +++ b/mteb/tasks/Classification/multilingual/NordicLangClassification.py @@ -28,6 +28,8 @@ class NordicLangClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 3000}, + avg_character_length={"test": 78.2}, ) @property diff --git a/mteb/tasks/Classification/multilingual/ScalaClassification.py b/mteb/tasks/Classification/multilingual/ScalaClassification.py index 80b817eaaa..333c65d37a 100644 --- a/mteb/tasks/Classification/multilingual/ScalaClassification.py +++ b/mteb/tasks/Classification/multilingual/ScalaClassification.py @@ -28,6 +28,8 @@ class ScalaDaClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1024}, + avg_character_length={"test": 109.4}, ) @property @@ -82,6 +84,8 @@ class ScalaNbClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1024}, + avg_character_length={"test": 98.4}, ) @property @@ -136,6 +140,8 @@ class ScalaNnClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1024}, + avg_character_length={"test": 104.8}, ) @property @@ -190,6 +196,8 @@ class ScalaSvClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1024}, + avg_character_length={"test": 98.3}, ) @property diff --git a/mteb/tasks/Classification/nb/NoRecClassification.py b/mteb/tasks/Classification/nb/NoRecClassification.py index abe2db8729..2bace59a52 100644 --- a/mteb/tasks/Classification/nb/NoRecClassification.py +++ b/mteb/tasks/Classification/nb/NoRecClassification.py @@ -26,4 +26,6 @@ class NoRecClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 2050}, + avg_character_length={"test": 82}, ) diff --git a/mteb/tasks/Classification/nb/NorwegianParliamentClassification.py b/mteb/tasks/Classification/nb/NorwegianParliamentClassification.py index 82befe0cac..a08da70ac2 100644 --- a/mteb/tasks/Classification/nb/NorwegianParliamentClassification.py +++ b/mteb/tasks/Classification/nb/NorwegianParliamentClassification.py @@ -26,4 +26,6 @@ class NorwegianParliamentClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1200, "validation": 1200}, + avg_character_length={"test": 1884.0, "validation": 1911.0}, ) diff --git a/mteb/tasks/Classification/pl/PolishClassification.py b/mteb/tasks/Classification/pl/PolishClassification.py index 30ef6e3478..221e0483ff 100644 --- a/mteb/tasks/Classification/pl/PolishClassification.py +++ b/mteb/tasks/Classification/pl/PolishClassification.py @@ -27,6 +27,8 @@ class CbdClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1000}, + avg_character_length={"test": 93.2}, ) @@ -53,6 +55,8 @@ class PolEmo2InClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -80,6 +84,8 @@ class PolEmo2OutClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 722}, + avg_character_length={"test": 756.2}, ) @@ -105,6 +111,8 @@ class AllegroReviewsClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1006}, + avg_character_length={"test": 477.2}, ) @@ -130,4 +138,6 @@ class PacClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 3453}, + avg_character_length={"test": 185.3}, ) diff --git a/mteb/tasks/Classification/sv/SweRecClassification.py b/mteb/tasks/Classification/sv/SweRecClassification.py index 50a31003dd..8ccf32eec6 100644 --- a/mteb/tasks/Classification/sv/SweRecClassification.py +++ b/mteb/tasks/Classification/sv/SweRecClassification.py @@ -26,4 +26,6 @@ class SweRecClassification(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 1024}, + avg_character_length={"test": 318.8}, ) diff --git a/mteb/tasks/Classification/zh/CMTEBClassification.py b/mteb/tasks/Classification/zh/CMTEBClassification.py index 9fa0f20081..46cba769df 100644 --- a/mteb/tasks/Classification/zh/CMTEBClassification.py +++ b/mteb/tasks/Classification/zh/CMTEBClassification.py @@ -27,6 +27,8 @@ class TNews(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -58,6 +60,8 @@ class IFlyTek(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -90,6 +94,8 @@ class MultilingualSentiment(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -121,6 +127,8 @@ class JDReview(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -152,6 +160,8 @@ class OnlineShopping(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -183,6 +193,8 @@ class Waimai(AbsTaskClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/Clustering/de/BlurbsClusteringP2P.py b/mteb/tasks/Clustering/de/BlurbsClusteringP2P.py index 7b3b4f74e8..fe5e76eecc 100644 --- a/mteb/tasks/Clustering/de/BlurbsClusteringP2P.py +++ b/mteb/tasks/Clustering/de/BlurbsClusteringP2P.py @@ -26,4 +26,6 @@ class BlurbsClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 174637}, + avg_character_length={"test": 664.09}, ) diff --git a/mteb/tasks/Clustering/de/BlurbsClusteringS2S.py b/mteb/tasks/Clustering/de/BlurbsClusteringS2S.py index 640882f72a..d8c95adc13 100644 --- a/mteb/tasks/Clustering/de/BlurbsClusteringS2S.py +++ b/mteb/tasks/Clustering/de/BlurbsClusteringS2S.py @@ -27,4 +27,6 @@ class BlurbsClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 174637}, + avg_character_length={"test": 23.02}, ) diff --git a/mteb/tasks/Clustering/de/TenKGnadClusteringP2P.py b/mteb/tasks/Clustering/de/TenKGnadClusteringP2P.py index 86eb8af166..28ed56db18 100644 --- a/mteb/tasks/Clustering/de/TenKGnadClusteringP2P.py +++ b/mteb/tasks/Clustering/de/TenKGnadClusteringP2P.py @@ -27,4 +27,6 @@ class TenKGnadClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 45914}, + avg_character_length={"test": 2641.03}, ) diff --git a/mteb/tasks/Clustering/de/TenKGnadClusteringS2S.py b/mteb/tasks/Clustering/de/TenKGnadClusteringS2S.py index 2788a6de4b..73c44ab343 100644 --- a/mteb/tasks/Clustering/de/TenKGnadClusteringS2S.py +++ b/mteb/tasks/Clustering/de/TenKGnadClusteringS2S.py @@ -27,4 +27,6 @@ class TenKGnadClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 45914}, + avg_character_length={"test": 50.96}, ) diff --git a/mteb/tasks/Clustering/en/ArxivClusteringP2P.py b/mteb/tasks/Clustering/en/ArxivClusteringP2P.py index cb80bcc2af..b8da09ba77 100644 --- a/mteb/tasks/Clustering/en/ArxivClusteringP2P.py +++ b/mteb/tasks/Clustering/en/ArxivClusteringP2P.py @@ -27,4 +27,6 @@ class ArxivClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 732723}, + avg_character_length={"test": 1009.98}, ) diff --git a/mteb/tasks/Clustering/en/ArxivClusteringS2S.py b/mteb/tasks/Clustering/en/ArxivClusteringS2S.py index f43dd59c74..dc2cdf5b92 100644 --- a/mteb/tasks/Clustering/en/ArxivClusteringS2S.py +++ b/mteb/tasks/Clustering/en/ArxivClusteringS2S.py @@ -27,4 +27,6 @@ class ArxivClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 732723}, + avg_character_length={"test": 74}, ) diff --git a/mteb/tasks/Clustering/en/BigPatentClustering.py b/mteb/tasks/Clustering/en/BigPatentClustering.py index 9fe3c45601..4cb9d717b6 100644 --- a/mteb/tasks/Clustering/en/BigPatentClustering.py +++ b/mteb/tasks/Clustering/en/BigPatentClustering.py @@ -27,4 +27,6 @@ class BigPatentClustering(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Clustering/en/BiorxivClusteringP2P.py b/mteb/tasks/Clustering/en/BiorxivClusteringP2P.py index 685073d7d1..ab1ae9806e 100644 --- a/mteb/tasks/Clustering/en/BiorxivClusteringP2P.py +++ b/mteb/tasks/Clustering/en/BiorxivClusteringP2P.py @@ -26,4 +26,6 @@ class BiorxivClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 75000}, + avg_character_length={"test": 1666.2}, ) diff --git a/mteb/tasks/Clustering/en/BiorxivClusteringS2S.py b/mteb/tasks/Clustering/en/BiorxivClusteringS2S.py index 5fb38c3bdd..e11cdd656b 100644 --- a/mteb/tasks/Clustering/en/BiorxivClusteringS2S.py +++ b/mteb/tasks/Clustering/en/BiorxivClusteringS2S.py @@ -26,4 +26,6 @@ class BiorxivClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 75000}, + avg_character_length={"test": 101.6}, ) diff --git a/mteb/tasks/Clustering/en/MedrxivClusteringP2P.py b/mteb/tasks/Clustering/en/MedrxivClusteringP2P.py index 637f1ff3a0..9b7f28a301 100644 --- a/mteb/tasks/Clustering/en/MedrxivClusteringP2P.py +++ b/mteb/tasks/Clustering/en/MedrxivClusteringP2P.py @@ -27,4 +27,6 @@ class MedrxivClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 375000}, + avg_character_length={"test": 1981.2}, ) diff --git a/mteb/tasks/Clustering/en/MedrxivClusteringS2S.py b/mteb/tasks/Clustering/en/MedrxivClusteringS2S.py index 46023eaf5b..be24d1c769 100644 --- a/mteb/tasks/Clustering/en/MedrxivClusteringS2S.py +++ b/mteb/tasks/Clustering/en/MedrxivClusteringS2S.py @@ -27,4 +27,6 @@ class MedrxivClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 375000}, + avg_character_length={"test": 114.7}, ) diff --git a/mteb/tasks/Clustering/en/RedditClustering.py b/mteb/tasks/Clustering/en/RedditClustering.py index 2295ac323c..dcbeacf37d 100644 --- a/mteb/tasks/Clustering/en/RedditClustering.py +++ b/mteb/tasks/Clustering/en/RedditClustering.py @@ -27,4 +27,6 @@ class RedditClustering(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 420464}, + avg_character_length={"test": 64.7}, ) diff --git a/mteb/tasks/Clustering/en/RedditClusteringP2P.py b/mteb/tasks/Clustering/en/RedditClusteringP2P.py index d6003491d5..7f394fe366 100644 --- a/mteb/tasks/Clustering/en/RedditClusteringP2P.py +++ b/mteb/tasks/Clustering/en/RedditClusteringP2P.py @@ -27,4 +27,6 @@ class RedditClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 459399}, + avg_character_length={"test": 727.7}, ) diff --git a/mteb/tasks/Clustering/en/StackExchangeClustering.py b/mteb/tasks/Clustering/en/StackExchangeClustering.py index 5394097e2d..6a37776324 100644 --- a/mteb/tasks/Clustering/en/StackExchangeClustering.py +++ b/mteb/tasks/Clustering/en/StackExchangeClustering.py @@ -27,4 +27,6 @@ class StackExchangeClustering(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 373850}, + avg_character_length={"test": 57.0}, ) diff --git a/mteb/tasks/Clustering/en/StackExchangeClusteringP2P.py b/mteb/tasks/Clustering/en/StackExchangeClusteringP2P.py index 843b7de216..1643a34893 100644 --- a/mteb/tasks/Clustering/en/StackExchangeClusteringP2P.py +++ b/mteb/tasks/Clustering/en/StackExchangeClusteringP2P.py @@ -27,4 +27,6 @@ class StackExchangeClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 75000}, + avg_character_length={"test": 1090.7}, ) diff --git a/mteb/tasks/Clustering/en/TwentyNewsgroupsClustering.py b/mteb/tasks/Clustering/en/TwentyNewsgroupsClustering.py index 128741efdf..1d8ff3d70c 100644 --- a/mteb/tasks/Clustering/en/TwentyNewsgroupsClustering.py +++ b/mteb/tasks/Clustering/en/TwentyNewsgroupsClustering.py @@ -27,4 +27,6 @@ class TwentyNewsgroupsClustering(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 59545}, + avg_character_length={"test": 32.0}, ) diff --git a/mteb/tasks/Clustering/en/WikiCitiesClustering.py b/mteb/tasks/Clustering/en/WikiCitiesClustering.py index 88d50f8214..65fac916a7 100644 --- a/mteb/tasks/Clustering/en/WikiCitiesClustering.py +++ b/mteb/tasks/Clustering/en/WikiCitiesClustering.py @@ -27,4 +27,6 @@ class WikiCitiesClustering(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Clustering/es/FloresClusteringS2S.py b/mteb/tasks/Clustering/es/FloresClusteringS2S.py index 350328ee2c..f3afb3130c 100644 --- a/mteb/tasks/Clustering/es/FloresClusteringS2S.py +++ b/mteb/tasks/Clustering/es/FloresClusteringS2S.py @@ -27,4 +27,6 @@ class FloresClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Clustering/es/SpanishNewsClusteringP2P.py b/mteb/tasks/Clustering/es/SpanishNewsClusteringP2P.py index fa7b369bd5..bfa1b16c91 100644 --- a/mteb/tasks/Clustering/es/SpanishNewsClusteringP2P.py +++ b/mteb/tasks/Clustering/es/SpanishNewsClusteringP2P.py @@ -27,4 +27,6 @@ class SpanishNewsClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Clustering/fr/AlloProfClusteringP2P.py b/mteb/tasks/Clustering/fr/AlloProfClusteringP2P.py index 92f9c7db91..3e4e8a9509 100644 --- a/mteb/tasks/Clustering/fr/AlloProfClusteringP2P.py +++ b/mteb/tasks/Clustering/fr/AlloProfClusteringP2P.py @@ -30,6 +30,8 @@ class AlloProfClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/fr/AlloProfClusteringS2S.py b/mteb/tasks/Clustering/fr/AlloProfClusteringS2S.py index 1b26d68f25..8fdf9b52ba 100644 --- a/mteb/tasks/Clustering/fr/AlloProfClusteringS2S.py +++ b/mteb/tasks/Clustering/fr/AlloProfClusteringS2S.py @@ -30,6 +30,8 @@ class AlloProfClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/fr/HALClusteringS2S.py b/mteb/tasks/Clustering/fr/HALClusteringS2S.py index 8a88ab08aa..69c5f2fe41 100644 --- a/mteb/tasks/Clustering/fr/HALClusteringS2S.py +++ b/mteb/tasks/Clustering/fr/HALClusteringS2S.py @@ -30,6 +30,8 @@ class HALClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py b/mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py index d584a069c5..32c0cd30a3 100644 --- a/mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py +++ b/mteb/tasks/Clustering/fr/MLSUMClusteringP2P.py @@ -30,6 +30,8 @@ class MLSUMClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py b/mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py index a2de3dc86b..fa3fd4630c 100644 --- a/mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py +++ b/mteb/tasks/Clustering/fr/MLSUMClusteringS2S.py @@ -30,6 +30,8 @@ class MLSUMClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py index 3239d8ebf5..8c0ccd14d8 100644 --- a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py +++ b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringP2P.py @@ -49,6 +49,8 @@ class MasakhaNEWSClusteringP2P(AbsTaskClustering, MultilingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py index db8694f68a..36b5d3cf2a 100644 --- a/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py +++ b/mteb/tasks/Clustering/multilingual/MasakhaNEWSClusteringS2S.py @@ -51,6 +51,8 @@ class MasakhaNEWSClusteringS2S(AbsTaskClustering, MultilingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Clustering/pl/PolishClustering.py b/mteb/tasks/Clustering/pl/PolishClustering.py index 9b56098b32..5ebd8caaf6 100644 --- a/mteb/tasks/Clustering/pl/PolishClustering.py +++ b/mteb/tasks/Clustering/pl/PolishClustering.py @@ -28,4 +28,6 @@ class EightTagsClustering(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Clustering/zh/CMTEBClustering.py b/mteb/tasks/Clustering/zh/CMTEBClustering.py index 6a360ecbc6..46d601858b 100644 --- a/mteb/tasks/Clustering/zh/CMTEBClustering.py +++ b/mteb/tasks/Clustering/zh/CMTEBClustering.py @@ -27,6 +27,8 @@ class CLSClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -52,6 +54,8 @@ class CLSClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -77,6 +81,8 @@ class ThuNewsClusteringS2S(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -102,4 +108,6 @@ class ThuNewsClusteringP2P(AbsTaskClustering): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/PairClassification/en/SprintDuplicateQuestionsPC.py b/mteb/tasks/PairClassification/en/SprintDuplicateQuestionsPC.py index ec5ca9ae68..f81d860f72 100644 --- a/mteb/tasks/PairClassification/en/SprintDuplicateQuestionsPC.py +++ b/mteb/tasks/PairClassification/en/SprintDuplicateQuestionsPC.py @@ -27,4 +27,6 @@ class SprintDuplicateQuestionsPC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"validation": 101000, "test": 101000}, + avg_character_length={"validation": 65.2, "test": 67.9}, ) diff --git a/mteb/tasks/PairClassification/en/TwitterSemEval2015PC.py b/mteb/tasks/PairClassification/en/TwitterSemEval2015PC.py index 0bc8ef0c1a..e1a04745d0 100644 --- a/mteb/tasks/PairClassification/en/TwitterSemEval2015PC.py +++ b/mteb/tasks/PairClassification/en/TwitterSemEval2015PC.py @@ -27,4 +27,6 @@ class TwitterSemEval2015PC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 16777}, + avg_character_length={"test": 38.3}, ) diff --git a/mteb/tasks/PairClassification/en/TwitterURLCorpusPC.py b/mteb/tasks/PairClassification/en/TwitterURLCorpusPC.py index 98316f9626..8f70cf30c3 100644 --- a/mteb/tasks/PairClassification/en/TwitterURLCorpusPC.py +++ b/mteb/tasks/PairClassification/en/TwitterURLCorpusPC.py @@ -27,4 +27,6 @@ class TwitterURLCorpusPC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 51534}, + avg_character_length={"test": 79.5}, ) diff --git a/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py b/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py index c49bca69b7..9aebcab6a6 100644 --- a/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py +++ b/mteb/tasks/PairClassification/multilingual/OpusparcusPC.py @@ -31,6 +31,8 @@ class OpusparcusPC(AbsTaskPairClassification, MultilingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/PairClassification/multilingual/PawsX.py b/mteb/tasks/PairClassification/multilingual/PawsX.py index 1bacc739b3..2d5f840d39 100644 --- a/mteb/tasks/PairClassification/multilingual/PawsX.py +++ b/mteb/tasks/PairClassification/multilingual/PawsX.py @@ -30,6 +30,8 @@ class PawsX(MultilingualTask, AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/PairClassification/pl/PolishPC.py b/mteb/tasks/PairClassification/pl/PolishPC.py index c26fe307ab..ed383ccb4e 100644 --- a/mteb/tasks/PairClassification/pl/PolishPC.py +++ b/mteb/tasks/PairClassification/pl/PolishPC.py @@ -27,6 +27,8 @@ class SickePLPC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -52,6 +54,8 @@ class PpcPC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -77,6 +81,8 @@ class CdscePC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -102,4 +108,6 @@ class PscPC(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/PairClassification/zh/CMTEBPairClassification.py b/mteb/tasks/PairClassification/zh/CMTEBPairClassification.py index ca4b6a25fb..4869cc9ca6 100644 --- a/mteb/tasks/PairClassification/zh/CMTEBPairClassification.py +++ b/mteb/tasks/PairClassification/zh/CMTEBPairClassification.py @@ -26,6 +26,8 @@ class Ocnli(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -51,4 +53,6 @@ class Cmnli(AbsTaskPairClassification): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py b/mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py index ab6fe217fe..ff7ad376d6 100644 --- a/mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py +++ b/mteb/tasks/Reranking/en/AskUbuntuDupQuestions.py @@ -27,4 +27,6 @@ class AskUbuntuDupQuestions(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 2255}, + avg_character_length={"test": 52.5}, ) diff --git a/mteb/tasks/Reranking/en/MindSmallReranking.py b/mteb/tasks/Reranking/en/MindSmallReranking.py index a458e694d0..75353ece11 100644 --- a/mteb/tasks/Reranking/en/MindSmallReranking.py +++ b/mteb/tasks/Reranking/en/MindSmallReranking.py @@ -27,4 +27,6 @@ class MindSmallReranking(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 107968}, + avg_character_length={"test": 70.9}, ) diff --git a/mteb/tasks/Reranking/en/SciDocsReranking.py b/mteb/tasks/Reranking/en/SciDocsReranking.py index 150aba8ef5..b58faaa0b3 100644 --- a/mteb/tasks/Reranking/en/SciDocsReranking.py +++ b/mteb/tasks/Reranking/en/SciDocsReranking.py @@ -49,4 +49,6 @@ class SciDocsReranking(AbsTaskReranking): abstract = "Representation learning is a critical ingredient for natural language processing systems. Recent Transformer language models like BERT learn powerful textual representations, but these models are targeted towards token- and sentence-level training objectives and do not leverage information on inter-document relatedness, which limits their document-level representation power. For applications on scientific documents, such as classification and recommendation, accurate embeddings of documents are a necessity. We propose SPECTER, a new method to generate document-level embedding of scientific papers based on pretraining a Transformer language model on a powerful signal of document-level relatedness: the citation graph. Unlike existing pretrained language models, Specter can be easily applied to downstream applications without task-specific fine-tuning. Additionally, to encourage further research on document-level models, we introduce SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation prediction, to document classification and recommendation. We show that Specter outperforms a variety of competitive baselines on the benchmark.", } """, + n_samples={"test": 19599}, + avg_character_length={"test": 69.0}, ) diff --git a/mteb/tasks/Reranking/en/StackOverflowDupQuestions.py b/mteb/tasks/Reranking/en/StackOverflowDupQuestions.py index 6e61ce727a..eb354efbe5 100644 --- a/mteb/tasks/Reranking/en/StackOverflowDupQuestions.py +++ b/mteb/tasks/Reranking/en/StackOverflowDupQuestions.py @@ -27,4 +27,6 @@ class StackOverflowDupQuestions(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 3467}, + avg_character_length={"test": 49.8}, ) diff --git a/mteb/tasks/Reranking/fr/AlloprofReranking.py b/mteb/tasks/Reranking/fr/AlloprofReranking.py index c70f70e875..b349e9be9d 100644 --- a/mteb/tasks/Reranking/fr/AlloprofReranking.py +++ b/mteb/tasks/Reranking/fr/AlloprofReranking.py @@ -27,4 +27,6 @@ class AlloprofReranking(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Reranking/fr/SyntecReranking.py b/mteb/tasks/Reranking/fr/SyntecReranking.py index 7cc945c3ef..0d4b229c5d 100644 --- a/mteb/tasks/Reranking/fr/SyntecReranking.py +++ b/mteb/tasks/Reranking/fr/SyntecReranking.py @@ -27,4 +27,6 @@ class SyntecReranking(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Reranking/multilingual/MIRACLReranking.py b/mteb/tasks/Reranking/multilingual/MIRACLReranking.py index ca9c46ca3e..8f2861cf30 100644 --- a/mteb/tasks/Reranking/multilingual/MIRACLReranking.py +++ b/mteb/tasks/Reranking/multilingual/MIRACLReranking.py @@ -28,4 +28,6 @@ class MIRACLReranking(MultilingualTask, AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Reranking/zh/CMTEBReranking.py b/mteb/tasks/Reranking/zh/CMTEBReranking.py index c0508288f6..2618bf6dde 100644 --- a/mteb/tasks/Reranking/zh/CMTEBReranking.py +++ b/mteb/tasks/Reranking/zh/CMTEBReranking.py @@ -26,6 +26,8 @@ class T2Reranking(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -51,6 +53,8 @@ class MMarcoReranking(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -76,6 +80,8 @@ class CMedQAv1(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @@ -101,4 +107,6 @@ class CMedQAv2(AbsTaskReranking): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py b/mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py index 85f56c30c7..e8ad5500f7 100644 --- a/mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py +++ b/mteb/tasks/Retrieval/de/GerDaLIRRetrieval.py @@ -30,6 +30,8 @@ class GerDaLIR(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/de/GermanDPRRetrieval.py b/mteb/tasks/Retrieval/de/GermanDPRRetrieval.py index 11b60a82d6..ff0fae54b4 100644 --- a/mteb/tasks/Retrieval/de/GermanDPRRetrieval.py +++ b/mteb/tasks/Retrieval/de/GermanDPRRetrieval.py @@ -32,6 +32,8 @@ class GermanDPR(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @staticmethod diff --git a/mteb/tasks/Retrieval/de/GermanQuADRetrieval.py b/mteb/tasks/Retrieval/de/GermanQuADRetrieval.py index 68cbea11d5..5adfd1d5ae 100644 --- a/mteb/tasks/Retrieval/de/GermanQuADRetrieval.py +++ b/mteb/tasks/Retrieval/de/GermanQuADRetrieval.py @@ -49,6 +49,8 @@ class GermanQuADRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/en/ArguAnaRetrieval.py b/mteb/tasks/Retrieval/en/ArguAnaRetrieval.py index 6d55176bb3..51e72904aa 100644 --- a/mteb/tasks/Retrieval/en/ArguAnaRetrieval.py +++ b/mteb/tasks/Retrieval/en/ArguAnaRetrieval.py @@ -27,4 +27,6 @@ class ArguAna(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackAndroidRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackAndroidRetrieval.py index 6589f37eeb..916063bbdb 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackAndroidRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackAndroidRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackAndroidRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackEnglishRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackEnglishRetrieval.py index 460d0a9edb..1e8f20bb88 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackEnglishRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackEnglishRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackEnglishRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackGamingRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackGamingRetrieval.py index 35a599ba01..7d1dd09bff 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackGamingRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackGamingRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackGamingRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackGisRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackGisRetrieval.py index 891554eda1..79a0bfc617 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackGisRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackGisRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackGisRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackMathematicaRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackMathematicaRetrieval.py index ebb99388ef..c38d47da37 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackMathematicaRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackMathematicaRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackMathematicaRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackPhysicsRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackPhysicsRetrieval.py index 54af77e9e4..bfbc53a979 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackPhysicsRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackPhysicsRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackPhysicsRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackProgrammersRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackProgrammersRetrieval.py index 57638d6dc9..ec561ffa0f 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackProgrammersRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackProgrammersRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackProgrammersRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackStatsRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackStatsRetrieval.py index e4eb37de5b..0a4c3eb5f6 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackStatsRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackStatsRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackStatsRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackTexRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackTexRetrieval.py index be36f64ff7..03df962a36 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackTexRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackTexRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackTexRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackUnixRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackUnixRetrieval.py index 8df00a7a8c..75ff1c7df6 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackUnixRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackUnixRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackUnixRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackWebmastersRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackWebmastersRetrieval.py index e863d8c36e..9137711fc7 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackWebmastersRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackWebmastersRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackWebmastersRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/CQADupstackWordpressRetrieval.py b/mteb/tasks/Retrieval/en/CQADupstackWordpressRetrieval.py index 0be63da42b..52c56e5b68 100644 --- a/mteb/tasks/Retrieval/en/CQADupstackWordpressRetrieval.py +++ b/mteb/tasks/Retrieval/en/CQADupstackWordpressRetrieval.py @@ -27,4 +27,6 @@ class CQADupstackWordpressRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py b/mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py index 67d677050e..c6200461de 100644 --- a/mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py +++ b/mteb/tasks/Retrieval/en/ClimateFEVERRetrieval.py @@ -27,4 +27,6 @@ class ClimateFEVER(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/DBPediaRetrieval.py b/mteb/tasks/Retrieval/en/DBPediaRetrieval.py index 7a82fc7e3c..c64b84a591 100644 --- a/mteb/tasks/Retrieval/en/DBPediaRetrieval.py +++ b/mteb/tasks/Retrieval/en/DBPediaRetrieval.py @@ -27,4 +27,6 @@ class DBPedia(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/FEVERRetrieval.py b/mteb/tasks/Retrieval/en/FEVERRetrieval.py index 9ef5670d27..6a6fa87096 100644 --- a/mteb/tasks/Retrieval/en/FEVERRetrieval.py +++ b/mteb/tasks/Retrieval/en/FEVERRetrieval.py @@ -31,4 +31,6 @@ class FEVER(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/FiQA2018Retrieval.py b/mteb/tasks/Retrieval/en/FiQA2018Retrieval.py index dd298d28e8..94a4c74845 100644 --- a/mteb/tasks/Retrieval/en/FiQA2018Retrieval.py +++ b/mteb/tasks/Retrieval/en/FiQA2018Retrieval.py @@ -27,4 +27,6 @@ class FiQA2018(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/HagridRetrieval.py b/mteb/tasks/Retrieval/en/HagridRetrieval.py index 0ea5acba84..2ef21cd21c 100644 --- a/mteb/tasks/Retrieval/en/HagridRetrieval.py +++ b/mteb/tasks/Retrieval/en/HagridRetrieval.py @@ -36,6 +36,8 @@ class HagridRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/en/HotpotQARetrieval.py b/mteb/tasks/Retrieval/en/HotpotQARetrieval.py index 16edce1548..f222ff50d8 100644 --- a/mteb/tasks/Retrieval/en/HotpotQARetrieval.py +++ b/mteb/tasks/Retrieval/en/HotpotQARetrieval.py @@ -30,4 +30,6 @@ class HotpotQA(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/MSMARCORetrieval.py b/mteb/tasks/Retrieval/en/MSMARCORetrieval.py index 6eb2af7b9c..be8ce33416 100644 --- a/mteb/tasks/Retrieval/en/MSMARCORetrieval.py +++ b/mteb/tasks/Retrieval/en/MSMARCORetrieval.py @@ -27,4 +27,6 @@ class MSMARCO(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py b/mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py index 80ed17db3d..bf62e0a4a7 100644 --- a/mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py +++ b/mteb/tasks/Retrieval/en/MSMARCOv2Retrieval.py @@ -27,4 +27,6 @@ class MSMARCOv2(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/NFCorpusRetrieval.py b/mteb/tasks/Retrieval/en/NFCorpusRetrieval.py index fbfa2f3bc7..8f1a4ffa38 100644 --- a/mteb/tasks/Retrieval/en/NFCorpusRetrieval.py +++ b/mteb/tasks/Retrieval/en/NFCorpusRetrieval.py @@ -27,4 +27,6 @@ class NFCorpus(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/NQRetrieval.py b/mteb/tasks/Retrieval/en/NQRetrieval.py index 9d2e5c9d80..dbcc34a37b 100644 --- a/mteb/tasks/Retrieval/en/NQRetrieval.py +++ b/mteb/tasks/Retrieval/en/NQRetrieval.py @@ -27,4 +27,6 @@ class NQ(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/NarrativeQARetrieval.py b/mteb/tasks/Retrieval/en/NarrativeQARetrieval.py index 6ef5b00b72..2f879d8926 100644 --- a/mteb/tasks/Retrieval/en/NarrativeQARetrieval.py +++ b/mteb/tasks/Retrieval/en/NarrativeQARetrieval.py @@ -34,6 +34,8 @@ class NarrativeQARetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/en/QuoraRetrieval.py b/mteb/tasks/Retrieval/en/QuoraRetrieval.py index 53eb15f5f2..4b65a612c5 100644 --- a/mteb/tasks/Retrieval/en/QuoraRetrieval.py +++ b/mteb/tasks/Retrieval/en/QuoraRetrieval.py @@ -30,4 +30,6 @@ class QuoraRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py b/mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py index c5a412e402..1217fab3f9 100644 --- a/mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py +++ b/mteb/tasks/Retrieval/en/SCIDOCSRetrieval.py @@ -30,4 +30,6 @@ class SCIDOCS(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/SciFactRetrieval.py b/mteb/tasks/Retrieval/en/SciFactRetrieval.py index 74cbf1369d..5e27bd8b63 100644 --- a/mteb/tasks/Retrieval/en/SciFactRetrieval.py +++ b/mteb/tasks/Retrieval/en/SciFactRetrieval.py @@ -27,4 +27,6 @@ class SciFact(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py b/mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py index 8d6b55f3f8..5b0cfb3ae9 100644 --- a/mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py +++ b/mteb/tasks/Retrieval/en/TRECCOVIDRetrieval.py @@ -27,4 +27,6 @@ class TRECCOVID(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/en/Touche2020Retrieval.py b/mteb/tasks/Retrieval/en/Touche2020Retrieval.py index eb408670eb..2ccbe45728 100644 --- a/mteb/tasks/Retrieval/en/Touche2020Retrieval.py +++ b/mteb/tasks/Retrieval/en/Touche2020Retrieval.py @@ -27,4 +27,6 @@ class Touche2020(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2P.py b/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2P.py index f870399eeb..066d472c01 100644 --- a/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2P.py +++ b/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2P.py @@ -29,6 +29,8 @@ class SpanishPassageRetrievalS2P(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2S.py b/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2S.py index a975fd654c..d072fb006a 100644 --- a/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2S.py +++ b/mteb/tasks/Retrieval/es/SpanishPassageRetrievalS2S.py @@ -29,6 +29,8 @@ class SpanishPassageRetrievalS2S(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/fr/AlloprofRetrieval.py b/mteb/tasks/Retrieval/fr/AlloprofRetrieval.py index 561eda1663..03a63b515d 100644 --- a/mteb/tasks/Retrieval/fr/AlloprofRetrieval.py +++ b/mteb/tasks/Retrieval/fr/AlloprofRetrieval.py @@ -29,6 +29,8 @@ class AlloprofRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/fr/BSARDRetrieval.py b/mteb/tasks/Retrieval/fr/BSARDRetrieval.py index 867a67da9e..a8ffb1f03d 100644 --- a/mteb/tasks/Retrieval/fr/BSARDRetrieval.py +++ b/mteb/tasks/Retrieval/fr/BSARDRetrieval.py @@ -29,6 +29,8 @@ class BSARDRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/fr/SyntecRetrieval.py b/mteb/tasks/Retrieval/fr/SyntecRetrieval.py index 7e9007c971..5897c1a024 100644 --- a/mteb/tasks/Retrieval/fr/SyntecRetrieval.py +++ b/mteb/tasks/Retrieval/fr/SyntecRetrieval.py @@ -31,6 +31,8 @@ class SyntecRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/ko/KoMiracl.py b/mteb/tasks/Retrieval/ko/KoMiracl.py index 596cc2d474..5d85ae594b 100644 --- a/mteb/tasks/Retrieval/ko/KoMiracl.py +++ b/mteb/tasks/Retrieval/ko/KoMiracl.py @@ -27,4 +27,6 @@ class KoMiracl(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/ko/KoMrtydi.py b/mteb/tasks/Retrieval/ko/KoMrtydi.py index 0d9a8a774e..929ad97bf1 100644 --- a/mteb/tasks/Retrieval/ko/KoMrtydi.py +++ b/mteb/tasks/Retrieval/ko/KoMrtydi.py @@ -27,4 +27,6 @@ class KoMrtydi(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/ko/KoStrategyQA.py b/mteb/tasks/Retrieval/ko/KoStrategyQA.py index 8729c40227..8f49a61603 100644 --- a/mteb/tasks/Retrieval/ko/KoStrategyQA.py +++ b/mteb/tasks/Retrieval/ko/KoStrategyQA.py @@ -27,4 +27,6 @@ class KoStrategyQA(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py b/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py index fdc8a76eaf..f17ecd7f9e 100644 --- a/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py @@ -81,6 +81,8 @@ class MIRACLRetrieval(MultilingualTask, AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py b/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py index 1223687b91..2fd18c7cb6 100644 --- a/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py @@ -71,6 +71,8 @@ class MintakaRetrieval(MultilingualTask, AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py b/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py index a69adde8e1..b82b2bc47a 100644 --- a/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/MultiLongDocRetrieval.py @@ -79,6 +79,8 @@ class MultiLongDocRetrieval(MultilingualTask, AbsTaskRetrieval): primaryClass={cs.CL} } """, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py b/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py index 6911212cc9..f055f19918 100644 --- a/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XMarketRetrieval.py @@ -78,6 +78,8 @@ class XMarket(MultilingualTask, AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py b/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py index 923bddef29..16e4fb4dc3 100644 --- a/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py +++ b/mteb/tasks/Retrieval/multilingual/XPQARetrieval.py @@ -71,6 +71,8 @@ class XPQARetrieval(MultilingualTask, AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py b/mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py index ac35c36abc..6e04c10e05 100644 --- a/mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/ArguAnaPLRetrieval.py @@ -27,4 +27,6 @@ class ArguAnaPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py b/mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py index 31e1fde60d..9e50ade0f1 100644 --- a/mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/DBPediaPLRetrieval.py @@ -27,4 +27,6 @@ class DBPediaPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py b/mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py index 8107ec4ed4..d7c9139ece 100644 --- a/mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/FiQAPLRetrieval.py @@ -27,4 +27,6 @@ class FiQAPLRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py b/mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py index 676ed15023..e3969d86a4 100644 --- a/mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/HotpotQAPLRetrieval.py @@ -27,4 +27,6 @@ class HotpotQAPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py b/mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py index a7a020d7c0..d16361287e 100644 --- a/mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/MSMARCOPLRetrieval.py @@ -27,4 +27,6 @@ class MSMARCOPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py b/mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py index 0bc9d3bf43..35bbe65329 100644 --- a/mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/NFCorpusPLRetrieval.py @@ -27,4 +27,6 @@ class NFCorpusPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/NQPLRetrieval.py b/mteb/tasks/Retrieval/pl/NQPLRetrieval.py index 3cc7784859..3a1bbf1662 100644 --- a/mteb/tasks/Retrieval/pl/NQPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/NQPLRetrieval.py @@ -27,4 +27,6 @@ class NQPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py b/mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py index c2a332ae74..769e214bb1 100644 --- a/mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/QuoraPLRetrieval.py @@ -27,4 +27,6 @@ class QuoraPLRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py b/mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py index a5eb2362c5..fef578b28c 100644 --- a/mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/SCIDOCSPLRetrieval.py @@ -27,4 +27,6 @@ class SCIDOCSPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py b/mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py index 6c8934d49e..680d2ca560 100644 --- a/mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/SciFactPLRetrieval.py @@ -27,4 +27,6 @@ class SciFactPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py b/mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py index 6b1b22e989..713aab120d 100644 --- a/mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py +++ b/mteb/tasks/Retrieval/pl/TRECCOVIDPLRetrieval.py @@ -27,4 +27,6 @@ class TRECCOVIDPL(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Retrieval/zh/CMTEBRetrieval.py b/mteb/tasks/Retrieval/zh/CMTEBRetrieval.py index 6adfa545b6..632fb06b72 100644 --- a/mteb/tasks/Retrieval/zh/CMTEBRetrieval.py +++ b/mteb/tasks/Retrieval/zh/CMTEBRetrieval.py @@ -48,6 +48,8 @@ class T2Retrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -82,6 +84,8 @@ class MMarcoRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -116,6 +120,8 @@ class DuRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -150,6 +156,8 @@ class CovidRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -184,6 +192,8 @@ class CmedqaRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -218,6 +228,8 @@ class EcomRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -252,6 +264,8 @@ class MedicalRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): @@ -286,6 +300,8 @@ class VideoRetrieval(AbsTaskRetrieval): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) def load_data(self, **kwargs): diff --git a/mteb/tasks/STS/de/GermanSTSBenchmarkSTS.py b/mteb/tasks/STS/de/GermanSTSBenchmarkSTS.py index 9f0ef64e08..7c2561c4f1 100644 --- a/mteb/tasks/STS/de/GermanSTSBenchmarkSTS.py +++ b/mteb/tasks/STS/de/GermanSTSBenchmarkSTS.py @@ -28,6 +28,8 @@ class GermanSTSBenchmarkSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/BiossesSTS.py b/mteb/tasks/STS/en/BiossesSTS.py index c1ec9ccf31..8d57176e83 100644 --- a/mteb/tasks/STS/en/BiossesSTS.py +++ b/mteb/tasks/STS/en/BiossesSTS.py @@ -27,6 +27,8 @@ class BiossesSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/STS12STS.py b/mteb/tasks/STS/en/STS12STS.py index 0be28a21b6..94ba603634 100644 --- a/mteb/tasks/STS/en/STS12STS.py +++ b/mteb/tasks/STS/en/STS12STS.py @@ -27,6 +27,8 @@ class STS12STS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/STS13STS.py b/mteb/tasks/STS/en/STS13STS.py index 09b79eedc5..183960a05f 100644 --- a/mteb/tasks/STS/en/STS13STS.py +++ b/mteb/tasks/STS/en/STS13STS.py @@ -27,6 +27,8 @@ class STS13STS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/STS14STS.py b/mteb/tasks/STS/en/STS14STS.py index b19f3c9ddc..9d7002e850 100644 --- a/mteb/tasks/STS/en/STS14STS.py +++ b/mteb/tasks/STS/en/STS14STS.py @@ -27,6 +27,8 @@ class STS14STS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/STS15STS.py b/mteb/tasks/STS/en/STS15STS.py index 1982da86ff..87026c8646 100644 --- a/mteb/tasks/STS/en/STS15STS.py +++ b/mteb/tasks/STS/en/STS15STS.py @@ -27,6 +27,8 @@ class STS15STS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/STS16STS.py b/mteb/tasks/STS/en/STS16STS.py index b9b0d3c9ef..f9b7c236a4 100644 --- a/mteb/tasks/STS/en/STS16STS.py +++ b/mteb/tasks/STS/en/STS16STS.py @@ -27,6 +27,8 @@ class STS16STS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/STSBenchmarkSTS.py b/mteb/tasks/STS/en/STSBenchmarkSTS.py index 469ae8cdbf..6a85300341 100644 --- a/mteb/tasks/STS/en/STSBenchmarkSTS.py +++ b/mteb/tasks/STS/en/STSBenchmarkSTS.py @@ -27,6 +27,8 @@ class STSBenchmarkSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/en/SickrSTS.py b/mteb/tasks/STS/en/SickrSTS.py index 501d70db39..eff86e1abb 100644 --- a/mteb/tasks/STS/en/SickrSTS.py +++ b/mteb/tasks/STS/en/SickrSTS.py @@ -27,6 +27,8 @@ class SickrSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/es/STSES.py b/mteb/tasks/STS/es/STSES.py index 1fceff5044..f0c329f6bb 100644 --- a/mteb/tasks/STS/es/STSES.py +++ b/mteb/tasks/STS/es/STSES.py @@ -31,6 +31,8 @@ class STSES(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/fr/SickFrSTS.py b/mteb/tasks/STS/fr/SickFrSTS.py index acd6e95b5a..626c8bcc73 100644 --- a/mteb/tasks/STS/fr/SickFrSTS.py +++ b/mteb/tasks/STS/fr/SickFrSTS.py @@ -29,6 +29,8 @@ class SickFrSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py b/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py index b03bfc293c..cf8c95c66e 100644 --- a/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py +++ b/mteb/tasks/STS/multilingual/STS17CrosslingualSTS.py @@ -41,6 +41,8 @@ class STS17Crosslingual(AbsTaskSTS, CrosslingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 500}, + avg_character_length={"test": 43.3}, ) @property diff --git a/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py b/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py index afe8080df5..d1dc92dac0 100644 --- a/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py +++ b/mteb/tasks/STS/multilingual/STS22CrosslingualSTS.py @@ -48,6 +48,8 @@ class STS22CrosslingualSTS(AbsTaskSTS, CrosslingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 8060}, + avg_character_length={"train": 1992.8}, ) @property diff --git a/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py b/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py index 1be043b9c9..7984e5b65e 100644 --- a/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py +++ b/mteb/tasks/STS/multilingual/STSBenchmarkMultilingualSTS.py @@ -35,6 +35,8 @@ class STSBenchmarkMultilingualSTS(AbsTaskSTS, MultilingualTask): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/pl/PolishSTS.py b/mteb/tasks/STS/pl/PolishSTS.py index 407c6ce1f4..23f0b2c72d 100644 --- a/mteb/tasks/STS/pl/PolishSTS.py +++ b/mteb/tasks/STS/pl/PolishSTS.py @@ -26,6 +26,8 @@ class SickrPLSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 9812}, + avg_character_length={"test": 42.8}, ) @property @@ -58,6 +60,8 @@ class CdscrSTS(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property diff --git a/mteb/tasks/STS/zh/CMTEBSTS.py b/mteb/tasks/STS/zh/CMTEBSTS.py index 4f4012a539..aecfb3895d 100644 --- a/mteb/tasks/STS/zh/CMTEBSTS.py +++ b/mteb/tasks/STS/zh/CMTEBSTS.py @@ -27,6 +27,8 @@ class ATEC(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -59,6 +61,8 @@ class BQ(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -91,6 +95,8 @@ class LCQMC(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -123,6 +129,8 @@ class PAWSX(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -155,6 +163,8 @@ class STSB(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -187,6 +197,8 @@ class AFQMC(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property @@ -219,4 +231,6 @@ class QBQTC(AbsTaskSTS): dialect=None, text_creation=None, bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) diff --git a/mteb/tasks/Summarization/en/SummEvalSummarization.py b/mteb/tasks/Summarization/en/SummEvalSummarization.py index bb36333352..91dc2f5ff3 100644 --- a/mteb/tasks/Summarization/en/SummEvalSummarization.py +++ b/mteb/tasks/Summarization/en/SummEvalSummarization.py @@ -27,6 +27,8 @@ class SummEvalSummarization(AbsTaskSummarization): dialect=None, text_creation=None, bibtex_citation=None, + n_samples={"test": 2800}, + avg_character_length={"test": 359.8}, ) @property diff --git a/mteb/tasks/Summarization/fr/SummEvalFrSummarization.py b/mteb/tasks/Summarization/fr/SummEvalFrSummarization.py index e1f5e341ee..80d2f51fd0 100644 --- a/mteb/tasks/Summarization/fr/SummEvalFrSummarization.py +++ b/mteb/tasks/Summarization/fr/SummEvalFrSummarization.py @@ -17,15 +17,17 @@ class SummEvalFrSummarization(AbsTaskSummarization): main_score="cosine_spearman", revision="b385812de6a9577b6f4d0f88c6a6e35395a94054", date=None, - form=None, + form=["written"], domains=None, task_subtypes=None, license=None, socioeconomic_status=None, annotations_creators=None, dialect=None, - text_creation=None, + text_creation="machine-translated", bibtex_citation=None, + n_samples=None, + avg_character_length=None, ) @property From a16eb07da1d1a6d8380683e9fa11df3244fae87b Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Sun, 24 Mar 2024 13:48:05 +0100 Subject: [PATCH 6/6] Enable ruff ci (#279) * restructing the readme * added mmteb * removed unec. method * Added docstring to metadata * Updated outdated examples * formatting documents * fix: Updated form to be parsed correctly * fix: Added sizes to the metadata this allow for automatic metadata generations * Updated based on feedback * Apply suggestions from code review Co-authored-by: Niklas Muennighoff * updated based on feedback * Added suggestion from review * added correction based on review * reformatted empty fields to None * CI: Enable linter --------- Co-authored-by: Niklas Muennighoff --- .github/{disabled_workflows => workflows}/lint.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{disabled_workflows => workflows}/lint.yml (100%) diff --git a/.github/disabled_workflows/lint.yml b/.github/workflows/lint.yml similarity index 100% rename from .github/disabled_workflows/lint.yml rename to .github/workflows/lint.yml