Skip to content

Commit

Permalink
fix: Added sizes to the metadata (#276)
Browse files Browse the repository at this point in the history
* restructing the readme

* added mmteb

* removed unec. method

* Added docstring to metadata

* Updated outdated examples

* formatting documents

* fix: Updated form to be parsed correctly

* fix: Added sizes to the metadata

this allow for automatic metadata generations

* Updated based on feedback

* Apply suggestions from code review

Co-authored-by: Niklas Muennighoff <n.muennighoff@gmail.com>

* updated based on feedback

* Added suggestion from review

* added correction based on review

* reformatted empty fields to None

---------

Co-authored-by: Niklas Muennighoff <n.muennighoff@gmail.com>
  • Loading branch information
KennethEnevoldsen and Muennighoff authored Mar 24, 2024
1 parent c0dc49a commit cd4a012
Show file tree
Hide file tree
Showing 152 changed files with 381 additions and 2 deletions.
2 changes: 2 additions & 0 deletions docs/adding_a_dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class SciDocsReranking(AbsTaskReranking):
dialect=None,
text_creation="found",
bibtex_citation= ... # removed for brevity
n_samples={"test": 19599},
avg_character_length={"test": 69.0},
)

# testing the task with a model:
Expand Down
5 changes: 5 additions & 0 deletions mteb/abstasks/TaskMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ class TaskMetadata(BaseModel):
text_creation: The method of text creation. Includes "found", "created", "machine-translated", "machine-translated and verified", and
"machine-translated and localized".
bibtex_citation: The BibTeX citation for the dataset.
n_samples: The number of samples in the dataset. This should only be for the splits evaluated on.
avg_character_length: The average character length of the samples in the dataset. This should only be for the splits evaluated on.
"""

hf_hub_name: str
Expand Down Expand Up @@ -144,3 +146,6 @@ class TaskMetadata(BaseModel):

text_creation: TEXT_CREATION_METHOD | None
bibtex_citation: str | None

n_samples: dict[str, int] | None
avg_character_length: dict[str, float] | None
2 changes: 2 additions & 0 deletions mteb/tasks/BitextMining/da/BornholmskBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class BornholmBitextMining(AbsTaskBitextMining):
dialect=None,
text_creation=None,
bibtex_citation=None,
avg_character_length={"test": 89.7},
n_samples={"test": 500},
)

def load_data(self, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,6 @@ class BUCCBitextMining(AbsTaskBitextMining, CrosslingualTask):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 641684},
avg_character_length={"test": 101.3},
)
2 changes: 2 additions & 0 deletions mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class DiaBLaBitextMining(AbsTaskBitextMining, CrosslingualTask):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

def load_data(self, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/BitextMining/multilingual/FloresBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ class FloresBitextMining(AbsTaskBitextMining, CrosslingualTask):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"dev": 997, "devtest": 1012},
avg_character_length={},
)

def load_data(self, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class NorwegianCourtsBitextMining(AbsTaskBitextMining):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 2050},
avg_character_length={"test": 1884.0},
)

def load_data(self, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,6 @@ class TatoebaBitextMining(AbsTaskBitextMining, CrosslingualTask):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 2000},
avg_character_length={"test": 39.4},
)
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/da/AngryTweetsClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class AngryTweetsClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1050},
avg_character_length={"test": 156.1},
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/da/DKHateClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class DKHateClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 329},
avg_character_length={"test": 104.0},
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/da/DalajClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class DalajClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 444},
avg_character_length={"test": 243.8},
)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class DanishPoliticalCommentsClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"train": 9010},
avg_character_length={"train": 69.9},
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/da/DdiscoCohesionClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class DdiscoCohesionClassification(AbsTaskClassification):
abstract = "To date, there has been no resource for studying discourse coherence on real-world Danish texts. Discourse coherence has mostly been approached with the assumption that incoherent texts can be represented by coherent texts in which sentences have been shuffled. However, incoherent real-world texts rarely resemble that. We thus present DDisCo, a dataset including text from the Danish Wikipedia and Reddit annotated for discourse coherence. We choose to annotate real-world texts instead of relying on artificially incoherent text for training and testing models. Then, we evaluate the performance of several methods, including neural networks, on the dataset.",
}
""",
n_samples=None,
avg_character_length=None,
)

def load_data(self, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/da/LccSentimentClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class LccSentimentClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 150},
avg_character_length={"test": 118.7},
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/en/AmazonPolarityClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,6 @@ class AmazonPolarityClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 400000},
avg_character_length={"test": 431.4},
)
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/en/Banking77Classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,6 @@ class Banking77Classification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 3080},
avg_character_length={"test": 54.2},
)
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/en/EmotionClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class EmotionClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 2000, "test": 2000},
avg_character_length={"validation": 95.3, "test": 95.6},
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/en/ImdbClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,6 @@ class ImdbClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 25000},
avg_character_length={"test": 1293.8},
)
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class ToxicConversationsClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 50000},
avg_character_length={"test": 296.6},
)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class TweetSentimentExtractionClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 3534},
avg_character_length={"test": 67.8},
)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class AmazonCounterfactualClassification(MultilingualTask, AbsTaskClassification
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 335, "test": 670},
avg_character_length={"validation": 109.2, "test": 106.1},
)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,6 @@ class AmazonReviewsClassification(MultilingualTask, AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 30000, "test": 30000},
avg_character_length={"validation": 159.2, "test": 160.4},
)
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,6 @@ class MTOPDomainClassification(MultilingualTask, AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 2235, "test": 4386},
avg_character_length={"validation": 36.5, "test": 36.8},
)
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,6 @@ class MTOPIntentClassification(MultilingualTask, AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 2235, "test": 4386},
avg_character_length={"validation": 36.5, "test": 36.8},
)
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,6 @@ class MasakhaNEWSClassification(AbsTaskClassification, MultilingualTask):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 422},
avg_character_length={"test": 5116.6},
)
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,6 @@ class MassiveIntentClassification(MultilingualTask, AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 2033, "test": 2974},
avg_character_length={"validation": 34.8, "test": 34.6},
)
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,6 @@ class MassiveScenarioClassification(MultilingualTask, AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 2033, "test": 2974},
avg_character_length={"validation": 34.8, "test": 34.6},
)
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class NordicLangClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 3000},
avg_character_length={"test": 78.2},
)

@property
Expand Down
8 changes: 8 additions & 0 deletions mteb/tasks/Classification/multilingual/ScalaClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class ScalaDaClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1024},
avg_character_length={"test": 109.4},
)

@property
Expand Down Expand Up @@ -82,6 +84,8 @@ class ScalaNbClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1024},
avg_character_length={"test": 98.4},
)

@property
Expand Down Expand Up @@ -136,6 +140,8 @@ class ScalaNnClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1024},
avg_character_length={"test": 104.8},
)

@property
Expand Down Expand Up @@ -190,6 +196,8 @@ class ScalaSvClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1024},
avg_character_length={"test": 98.3},
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/nb/NoRecClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ class NoRecClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 2050},
avg_character_length={"test": 82},
)
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ class NorwegianParliamentClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1200, "validation": 1200},
avg_character_length={"test": 1884.0, "validation": 1911.0},
)
10 changes: 10 additions & 0 deletions mteb/tasks/Classification/pl/PolishClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class CbdClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1000},
avg_character_length={"test": 93.2},
)


Expand All @@ -53,6 +55,8 @@ class PolEmo2InClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)


Expand Down Expand Up @@ -80,6 +84,8 @@ class PolEmo2OutClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 722},
avg_character_length={"test": 756.2},
)


Expand All @@ -105,6 +111,8 @@ class AllegroReviewsClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1006},
avg_character_length={"test": 477.2},
)


Expand All @@ -130,4 +138,6 @@ class PacClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 3453},
avg_character_length={"test": 185.3},
)
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/sv/SweRecClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ class SweRecClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1024},
avg_character_length={"test": 318.8},
)
12 changes: 12 additions & 0 deletions mteb/tasks/Classification/zh/CMTEBClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class TNews(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

@property
Expand Down Expand Up @@ -58,6 +60,8 @@ class IFlyTek(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

@property
Expand Down Expand Up @@ -90,6 +94,8 @@ class MultilingualSentiment(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

@property
Expand Down Expand Up @@ -121,6 +127,8 @@ class JDReview(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

@property
Expand Down Expand Up @@ -152,6 +160,8 @@ class OnlineShopping(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

@property
Expand Down Expand Up @@ -183,6 +193,8 @@ class Waimai(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Clustering/de/BlurbsClusteringP2P.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ class BlurbsClusteringP2P(AbsTaskClustering):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 174637},
avg_character_length={"test": 664.09},
)
Loading

0 comments on commit cd4a012

Please sign in to comment.