Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Added sizes to the metadata #276

Merged
merged 18 commits into from
Mar 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/adding_a_dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class SciDocsReranking(AbsTaskReranking):
dialect=None,
text_creation="found",
bibtex_citation= ... # removed for brevity
n_samples={"test": 19599},
avg_character_length={"test": 69.0},
)

# testing the task with a model:
Expand Down
5 changes: 5 additions & 0 deletions mteb/abstasks/TaskMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ class TaskMetadata(BaseModel):
text_creation: The method of text creation. Includes "found", "created", "machine-translated", "machine-translated and verified", and
"machine-translated and localized".
bibtex_citation: The BibTeX citation for the dataset.
n_samples: The number of samples in the dataset. This should only be for the splits evaluated on.
avg_character_length: The average character length of the samples in the dataset. This should only be for the splits evaluated on.
"""

hf_hub_name: str
Expand Down Expand Up @@ -144,3 +146,6 @@ class TaskMetadata(BaseModel):

text_creation: TEXT_CREATION_METHOD | None
bibtex_citation: str | None

n_samples: dict[str, int] | None
avg_character_length: dict[str, float] | None
2 changes: 2 additions & 0 deletions mteb/tasks/BitextMining/da/BornholmskBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class BornholmBitextMining(AbsTaskBitextMining):
dialect=None,
text_creation=None,
bibtex_citation=None,
avg_character_length={"test": 89.7},
n_samples={"test": 500},
)

def load_data(self, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/BitextMining/multilingual/BUCCBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,6 @@ class BUCCBitextMining(AbsTaskBitextMining, CrosslingualTask):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 641684},
avg_character_length={"test": 101.3},
)
2 changes: 2 additions & 0 deletions mteb/tasks/BitextMining/multilingual/DiaBLaBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class DiaBLaBitextMining(AbsTaskBitextMining, CrosslingualTask):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

def load_data(self, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/BitextMining/multilingual/FloresBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ class FloresBitextMining(AbsTaskBitextMining, CrosslingualTask):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"dev": 997, "devtest": 1012},
avg_character_length={},
)

def load_data(self, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class NorwegianCourtsBitextMining(AbsTaskBitextMining):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 2050},
avg_character_length={"test": 1884.0},
)

def load_data(self, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/BitextMining/multilingual/TatoebaBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,6 @@ class TatoebaBitextMining(AbsTaskBitextMining, CrosslingualTask):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 2000},
avg_character_length={"test": 39.4},
)
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/da/AngryTweetsClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class AngryTweetsClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1050},
avg_character_length={"test": 156.1},
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/da/DKHateClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class DKHateClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 329},
avg_character_length={"test": 104.0},
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/da/DalajClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class DalajClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 444},
avg_character_length={"test": 243.8},
)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class DanishPoliticalCommentsClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"train": 9010},
avg_character_length={"train": 69.9},
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/da/DdiscoCohesionClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class DdiscoCohesionClassification(AbsTaskClassification):
abstract = "To date, there has been no resource for studying discourse coherence on real-world Danish texts. Discourse coherence has mostly been approached with the assumption that incoherent texts can be represented by coherent texts in which sentences have been shuffled. However, incoherent real-world texts rarely resemble that. We thus present DDisCo, a dataset including text from the Danish Wikipedia and Reddit annotated for discourse coherence. We choose to annotate real-world texts instead of relying on artificially incoherent text for training and testing models. Then, we evaluate the performance of several methods, including neural networks, on the dataset.",
}
""",
n_samples=None,
avg_character_length=None,
)

def load_data(self, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/da/LccSentimentClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class LccSentimentClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 150},
avg_character_length={"test": 118.7},
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/en/AmazonPolarityClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,6 @@ class AmazonPolarityClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 400000},
avg_character_length={"test": 431.4},
)
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/en/Banking77Classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,6 @@ class Banking77Classification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 3080},
avg_character_length={"test": 54.2},
)
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/en/EmotionClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class EmotionClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 2000, "test": 2000},
avg_character_length={"validation": 95.3, "test": 95.6},
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/en/ImdbClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,6 @@ class ImdbClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 25000},
avg_character_length={"test": 1293.8},
)
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class ToxicConversationsClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 50000},
avg_character_length={"test": 296.6},
)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class TweetSentimentExtractionClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 3534},
avg_character_length={"test": 67.8},
)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class AmazonCounterfactualClassification(MultilingualTask, AbsTaskClassification
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 335, "test": 670},
avg_character_length={"validation": 109.2, "test": 106.1},
)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,6 @@ class AmazonReviewsClassification(MultilingualTask, AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 30000, "test": 30000},
avg_character_length={"validation": 159.2, "test": 160.4},
)
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,6 @@ class MTOPDomainClassification(MultilingualTask, AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 2235, "test": 4386},
avg_character_length={"validation": 36.5, "test": 36.8},
)
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,6 @@ class MTOPIntentClassification(MultilingualTask, AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 2235, "test": 4386},
avg_character_length={"validation": 36.5, "test": 36.8},
)
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,6 @@ class MasakhaNEWSClassification(AbsTaskClassification, MultilingualTask):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 422},
avg_character_length={"test": 5116.6},
)
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,6 @@ class MassiveIntentClassification(MultilingualTask, AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 2033, "test": 2974},
avg_character_length={"validation": 34.8, "test": 34.6},
)
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,6 @@ class MassiveScenarioClassification(MultilingualTask, AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"validation": 2033, "test": 2974},
avg_character_length={"validation": 34.8, "test": 34.6},
)
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class NordicLangClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 3000},
avg_character_length={"test": 78.2},
)

@property
Expand Down
8 changes: 8 additions & 0 deletions mteb/tasks/Classification/multilingual/ScalaClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class ScalaDaClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1024},
avg_character_length={"test": 109.4},
)

@property
Expand Down Expand Up @@ -82,6 +84,8 @@ class ScalaNbClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1024},
avg_character_length={"test": 98.4},
)

@property
Expand Down Expand Up @@ -136,6 +140,8 @@ class ScalaNnClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1024},
avg_character_length={"test": 104.8},
)

@property
Expand Down Expand Up @@ -190,6 +196,8 @@ class ScalaSvClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1024},
avg_character_length={"test": 98.3},
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/nb/NoRecClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ class NoRecClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 2050},
avg_character_length={"test": 82},
)
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ class NorwegianParliamentClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1200, "validation": 1200},
avg_character_length={"test": 1884.0, "validation": 1911.0},
)
10 changes: 10 additions & 0 deletions mteb/tasks/Classification/pl/PolishClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class CbdClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1000},
avg_character_length={"test": 93.2},
)


Expand All @@ -53,6 +55,8 @@ class PolEmo2InClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)


Expand Down Expand Up @@ -80,6 +84,8 @@ class PolEmo2OutClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 722},
avg_character_length={"test": 756.2},
)


Expand All @@ -105,6 +111,8 @@ class AllegroReviewsClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1006},
avg_character_length={"test": 477.2},
)


Expand All @@ -130,4 +138,6 @@ class PacClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 3453},
avg_character_length={"test": 185.3},
)
2 changes: 2 additions & 0 deletions mteb/tasks/Classification/sv/SweRecClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ class SweRecClassification(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 1024},
avg_character_length={"test": 318.8},
)
12 changes: 12 additions & 0 deletions mteb/tasks/Classification/zh/CMTEBClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class TNews(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

@property
Expand Down Expand Up @@ -58,6 +60,8 @@ class IFlyTek(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

@property
Expand Down Expand Up @@ -90,6 +94,8 @@ class MultilingualSentiment(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

@property
Expand Down Expand Up @@ -121,6 +127,8 @@ class JDReview(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

@property
Expand Down Expand Up @@ -152,6 +160,8 @@ class OnlineShopping(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

@property
Expand Down Expand Up @@ -183,6 +193,8 @@ class Waimai(AbsTaskClassification):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples=None,
avg_character_length=None,
)

@property
Expand Down
2 changes: 2 additions & 0 deletions mteb/tasks/Clustering/de/BlurbsClusteringP2P.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ class BlurbsClusteringP2P(AbsTaskClustering):
dialect=None,
text_creation=None,
bibtex_citation=None,
n_samples={"test": 174637},
avg_character_length={"test": 664.09},
)
Loading
Loading