Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Polish tasks (PL-MTEB) #137

Merged
merged 3 commits into from
Aug 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md

Large diffs are not rendered by default.

85 changes: 85 additions & 0 deletions mteb/tasks/Classification/PolishClassification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from ...abstasks import AbsTaskClassification


class CbdClassification(AbsTaskClassification):
@property
def description(self):
return {
"name": "CBD",
"hf_hub_name": "PL-MTEB/cbd",
"description": "Polish Tweets annotated for cyberbullying detection.",
"reference": "http://2019.poleval.pl/files/poleval2019.pdf",
"category": "s2s",
"type": "Classification",
"eval_splits": ["test"],
"eval_langs": ["pl"],
"main_score": "accuracy"
}


class PolEmo2InClassification(AbsTaskClassification):
@property
def description(self):
return {
"name": "PolEmo2.0-IN",
"hf_hub_name": "PL-MTEB/polemo2_in",
"description": "A collection of Polish online reviews from four domains: medicine, hotels, products and "
"school. The PolEmo2.0-IN task is to predict the sentiment of in-domain (medicine and hotels) "
"reviews.",
"reference": "https://aclanthology.org/K19-1092.pdf",
"category": "s2s",
"type": "Classification",
"eval_splits": ["test"],
"eval_langs": ["pl"],
"main_score": "accuracy"
}


class PolEmo2OutClassification(AbsTaskClassification):
@property
def description(self):
return {
"name": "PolEmo2.0-OUT",
"hf_hub_name": "PL-MTEB/polemo2_out",
"description": "A collection of Polish online reviews from four domains: medicine, hotels, products and "
"school. The PolEmo2.0-OUT task is to predict the sentiment of out-of-domain (products and "
"school) reviews using models train on reviews from medicine and hotels domains.",
"reference": "https://aclanthology.org/K19-1092.pdf",
"category": "s2s",
"type": "Classification",
"eval_splits": ["test"],
"eval_langs": ["pl"],
"main_score": "accuracy"
}


class AllegroReviewsClassification(AbsTaskClassification):
@property
def description(self):
return {
"name": "AllegroReviews",
"hf_hub_name": "PL-MTEB/allegro-reviews",
"description": "A Polish dataset for sentiment classification on reviews from e-commerce marketplace Allegro.",
"reference": "https://aclanthology.org/2020.acl-main.111.pdf",
"category": "s2s",
"type": "Classification",
"eval_splits": ["test"],
"eval_langs": ["pl"],
"main_score": "accuracy"
}


class PacClassification(AbsTaskClassification):
@property
def description(self):
return {
"name": "PAC",
"hf_hub_name": "laugustyniak/abusive-clauses-pl",
"description": "Polish Abusive Clauses Dataset",
"reference": "https://arxiv.org/pdf/2211.13112.pdf",
"category": "s2s",
"type": "Classification",
"eval_splits": ["test"],
"eval_langs": ["pl"],
"main_score": "accuracy"
}
3 changes: 1 addition & 2 deletions mteb/tasks/Classification/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


from .AmazonCounterfactualClassification import *
from .AmazonPolarityClassification import *
from .AmazonReviewsClassification import *
Expand All @@ -22,3 +20,4 @@
from .SweRecClassification import *
from .ToxicConversationsClassification import *
from .TweetSentimentExtractionClassification import *
from .PolishClassification import *
18 changes: 18 additions & 0 deletions mteb/tasks/Clustering/PolishClustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from ...abstasks.AbsTaskClustering import AbsTaskClustering


class EightTagsClustering(AbsTaskClustering):
@property
def description(self):
return {
"name": "8TagsClustering",
"hf_hub_name": "PL-MTEB/8tags-clustering",
"description": "Clustering of headlines from social media posts in Polish belonging to 8 categories: film, history, "
"food, medicine, motorization, work, sport and technology.",
"reference": "https://aclanthology.org/2020.lrec-1.207.pdf",
"type": "Clustering",
"category": "s2s",
"eval_splits": ["test"],
"eval_langs": ["pl"],
"main_score": "v_measure"
}
1 change: 1 addition & 0 deletions mteb/tasks/Clustering/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
from .TenKGnadClusteringP2P import *
from .TenKGnadClusteringS2S import *
from .TwentyNewsgroupsClustering import *
from .PolishClustering import *
65 changes: 65 additions & 0 deletions mteb/tasks/PairClassification/PolishPC.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from ...abstasks.AbsTaskPairClassification import AbsTaskPairClassification


class SickePLPC(AbsTaskPairClassification):
@property
def description(self):
return {
"name": "SICK-E-PL",
"hf_hub_name": "PL-MTEB/sicke-pl-pairclassification",
"description": "Polish version of SICK dataset for textual entailment.",
"reference": "https://aclanthology.org/2020.lrec-1.207.pdf",
"category": "s2s",
"type": "PairClassification",
"eval_splits": ["test"],
"eval_langs": ["pl"],
"main_score": "ap",
}


class PpcPC(AbsTaskPairClassification):
@property
def description(self):
return {
"name": "PPC",
"hf_hub_name": "PL-MTEB/ppc-pairclassification",
"description": "Polish Paraphrase Corpus",
"reference": "https://arxiv.org/pdf/2207.12759.pdf",
"category": "s2s",
"type": "PairClassification",
"eval_splits": ["test"],
"eval_langs": ["pl"],
"main_score": "ap"
}


class CdscePC(AbsTaskPairClassification):
@property
def description(self):
return {
"name": "CDSC-E",
"hf_hub_name": "PL-MTEB/cdsce-pairclassification",
"description": "Compositional Distributional Semantics Corpus for textual entailment.",
"reference": "https://aclanthology.org/P17-1073.pdf",
"category": "s2s",
"type": "PairClassification",
"eval_splits": ["test"],
"eval_langs": ["pl"],
"main_score": "ap"
}


class PscPC(AbsTaskPairClassification):
@property
def description(self):
return {
"name": "PSC",
"hf_hub_name": "PL-MTEB/psc-pairclassification",
"description": "Polish Summaries Corpus",
"reference": "http://www.lrec-conf.org/proceedings/lrec2014/pdf/1211_Paper.pdf",
"category": "s2s",
"type": "PairClassification",
"eval_splits": ["test"],
"eval_langs": ["pl"],
"main_score": "ap"
}
1 change: 1 addition & 0 deletions mteb/tasks/PairClassification/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .SprintDuplicateQuestionsPC import *
from .TwitterSemEval2015PC import *
from .TwitterURLCorpusPC import *
from .PolishPC import *
38 changes: 38 additions & 0 deletions mteb/tasks/STS/PolishSTS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from ...abstasks.AbsTaskSTS import AbsTaskSTS


class SickrPLSTS(AbsTaskSTS):
@property
def description(self):
return {
"name": "SICK-R-PL",
"hf_hub_name": "PL-MTEB/sickr-pl-sts",
"description": "Polish version of SICK dataset for textual relatedness.",
"reference": "https://aclanthology.org/2020.lrec-1.207.pdf",
"type": "STS",
"category": "s2s",
"eval_splits": ["test"],
"eval_langs": ["pl"],
"main_score": "cosine_spearman",
"min_score": 1,
"max_score": 5
}


class CdscrSTS(AbsTaskSTS):
@property
def description(self):
return {
"name": "CDSC-R",
"hf_hub_name": "PL-MTEB/cdscr-sts",
"description": "Compositional Distributional Semantics Corpus for textual relatedness.",
"reference": "https://aclanthology.org/P17-1073.pdf",
"type": "STS",
"category": "s2s",
"eval_splits": ["test"],
"eval_langs": ["pl"],
"main_score": "cosine_spearman",
"min_score": 1,
"max_score": 5
}

1 change: 1 addition & 0 deletions mteb/tasks/STS/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
from .STS17CrosslingualSTS import *
from .STS22CrosslingualSTS import *
from .STSBenchmarkSTS import *
from .PolishSTS import *
47 changes: 47 additions & 0 deletions scripts/run_mteb_polish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Example script for benchmarking all datasets constituting the MTEB Polish leaderboard & average scores"""

import logging

from mteb import MTEB
from sentence_transformers import SentenceTransformer

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("main")

classification_tasks = [
"CBD",
"PolEmo2.0-IN",
"PolEmo2.0-OUT",
"AllegroReviews",
"PAC",
"MassiveIntentClassification",
"MassiveScenarioClassification"
]

clustering_tasks = [
"8TagsClustering"
]

pair_classification_tasks = [
"SICK-E-PL",
"PPC",
"CDSC-E",
"PSC"
]

sts_tasks = [
"SICK-R-PL",
"CDSC-R",
"STS22"
]

tasks = classification_tasks \
+ clustering_tasks \
+ pair_classification_tasks \
+ sts_tasks

model_name = "sdadas/st-polish-paraphrase-from-distilroberta"
model = SentenceTransformer(model_name)

evaluation = MTEB(tasks=tasks, task_langs=["pl"])
evaluation.run(model, output_folder=f"results/pl/{model_name.split('/')[-1]}")