embeddings-benchmark · Muennighoff · Aug 26, 2023 · Aug 18, 2023 · Aug 25, 2023 · Aug 26, 2023
diff --git a/README.md b/README.md
diff --git a/mteb/tasks/Classification/PolishClassification.py b/mteb/tasks/Classification/PolishClassification.py
@@ -0,0 +1,85 @@
+from ...abstasks import AbsTaskClassification
+
+
+class CbdClassification(AbsTaskClassification):
+    @property
+    def description(self):
+        return {
+            "name": "CBD",
+            "hf_hub_name": "PL-MTEB/cbd",
+            "description": "Polish Tweets annotated for cyberbullying detection.",
+            "reference": "http://2019.poleval.pl/files/poleval2019.pdf",
+            "category": "s2s",
+            "type": "Classification",
+            "eval_splits": ["test"],
+            "eval_langs": ["pl"],
+            "main_score": "accuracy"
+        }
+
+
+class PolEmo2InClassification(AbsTaskClassification):
+    @property
+    def description(self):
+        return {
+            "name": "PolEmo2.0-IN",
+            "hf_hub_name": "PL-MTEB/polemo2_in",
+            "description": "A collection of Polish online reviews from four domains: medicine, hotels, products and "
+                           "school. The PolEmo2.0-IN task is to predict the sentiment of in-domain (medicine and hotels) "
+                           "reviews.",
+            "reference": "https://aclanthology.org/K19-1092.pdf",
+            "category": "s2s",
+            "type": "Classification",
+            "eval_splits": ["test"],
+            "eval_langs": ["pl"],
+            "main_score": "accuracy"
+        }
+
+
+class PolEmo2OutClassification(AbsTaskClassification):
+    @property
+    def description(self):
+        return {
+            "name": "PolEmo2.0-OUT",
+            "hf_hub_name": "PL-MTEB/polemo2_out",
+            "description": "A collection of Polish online reviews from four domains: medicine, hotels, products and "
+                           "school. The PolEmo2.0-OUT task is to predict the sentiment of out-of-domain (products and "
+                           "school) reviews using models train on reviews from medicine and hotels domains.",
+            "reference": "https://aclanthology.org/K19-1092.pdf",
+            "category": "s2s",
+            "type": "Classification",
+            "eval_splits": ["test"],
+            "eval_langs": ["pl"],
+            "main_score": "accuracy"
+        }
+
+
+class AllegroReviewsClassification(AbsTaskClassification):
+    @property
+    def description(self):
+        return {
+            "name": "AllegroReviews",
+            "hf_hub_name": "PL-MTEB/allegro-reviews",
+            "description": "A Polish dataset for sentiment classification on reviews from e-commerce marketplace Allegro.",
+            "reference": "https://aclanthology.org/2020.acl-main.111.pdf",
+            "category": "s2s",
+            "type": "Classification",
+            "eval_splits": ["test"],
+            "eval_langs": ["pl"],
+            "main_score": "accuracy"
+        }
+
+
+class PacClassification(AbsTaskClassification):
+    @property
+    def description(self):
+        return {
+            "name": "PAC",
+            "hf_hub_name": "laugustyniak/abusive-clauses-pl",
+            "description": "Polish Abusive Clauses Dataset",
+            "reference": "https://arxiv.org/pdf/2211.13112.pdf",
+            "category": "s2s",
+            "type": "Classification",
+            "eval_splits": ["test"],
+            "eval_langs": ["pl"],
+            "main_score": "accuracy"
+        }
diff --git a/mteb/tasks/Classification/__init__.py b/mteb/tasks/Classification/__init__.py
@@ -1,5 +1,3 @@
-
-
 from .AmazonCounterfactualClassification import *
 from .AmazonPolarityClassification import *
 from .AmazonReviewsClassification import *
@@ -22,3 +20,4 @@
 from .SweRecClassification import *
 from .ToxicConversationsClassification import *
 from .TweetSentimentExtractionClassification import *
+from .PolishClassification import *
diff --git a/mteb/tasks/Clustering/PolishClustering.py b/mteb/tasks/Clustering/PolishClustering.py
@@ -0,0 +1,18 @@
+from ...abstasks.AbsTaskClustering import AbsTaskClustering
+
+
+class EightTagsClustering(AbsTaskClustering):
+    @property
+    def description(self):
+        return {
+            "name": "8TagsClustering",
+            "hf_hub_name": "PL-MTEB/8tags-clustering",
+            "description": "Clustering of headlines from social media posts in Polish belonging to 8 categories: film, history, "
+                           "food, medicine, motorization, work, sport and technology.",
+            "reference": "https://aclanthology.org/2020.lrec-1.207.pdf",
+            "type": "Clustering",
+            "category": "s2s",
+            "eval_splits": ["test"],
+            "eval_langs": ["pl"],
+            "main_score": "v_measure"
+        }
diff --git a/mteb/tasks/Clustering/__init__.py b/mteb/tasks/Clustering/__init__.py
@@ -13,3 +13,4 @@
 from .TenKGnadClusteringP2P import *
 from .TenKGnadClusteringS2S import *
 from .TwentyNewsgroupsClustering import *
+from .PolishClustering import *
diff --git a/mteb/tasks/PairClassification/PolishPC.py b/mteb/tasks/PairClassification/PolishPC.py
@@ -0,0 +1,65 @@
+from ...abstasks.AbsTaskPairClassification import AbsTaskPairClassification
+
+
+class SickePLPC(AbsTaskPairClassification):
+    @property
+    def description(self):
+        return {
+            "name": "SICK-E-PL",
+            "hf_hub_name": "PL-MTEB/sicke-pl-pairclassification",
+            "description": "Polish version of SICK dataset for textual entailment.",
+            "reference": "https://aclanthology.org/2020.lrec-1.207.pdf",
+            "category": "s2s",
+            "type": "PairClassification",
+            "eval_splits": ["test"],
+            "eval_langs": ["pl"],
+            "main_score": "ap",
+        }
+
+
+class PpcPC(AbsTaskPairClassification):
+    @property
+    def description(self):
+        return {
+            "name": "PPC",
+            "hf_hub_name": "PL-MTEB/ppc-pairclassification",
+            "description": "Polish Paraphrase Corpus",
+            "reference": "https://arxiv.org/pdf/2207.12759.pdf",
+            "category": "s2s",
+            "type": "PairClassification",
+            "eval_splits": ["test"],
+            "eval_langs": ["pl"],
+            "main_score": "ap"
+        }
+
+
+class CdscePC(AbsTaskPairClassification):
+    @property
+    def description(self):
+        return {
+            "name": "CDSC-E",
+            "hf_hub_name": "PL-MTEB/cdsce-pairclassification",
+            "description": "Compositional Distributional Semantics Corpus for textual entailment.",
+            "reference": "https://aclanthology.org/P17-1073.pdf",
+            "category": "s2s",
+            "type": "PairClassification",
+            "eval_splits": ["test"],
+            "eval_langs": ["pl"],
+            "main_score": "ap"
+        }
+
+
+class PscPC(AbsTaskPairClassification):
+    @property
+    def description(self):
+        return {
+            "name": "PSC",
+            "hf_hub_name": "PL-MTEB/psc-pairclassification",
+            "description": "Polish Summaries Corpus",
+            "reference": "http://www.lrec-conf.org/proceedings/lrec2014/pdf/1211_Paper.pdf",
+            "category": "s2s",
+            "type": "PairClassification",
+            "eval_splits": ["test"],
+            "eval_langs": ["pl"],
+            "main_score": "ap"
+        }
diff --git a/mteb/tasks/PairClassification/__init__.py b/mteb/tasks/PairClassification/__init__.py
@@ -1,3 +1,4 @@
 from .SprintDuplicateQuestionsPC import *
 from .TwitterSemEval2015PC import *
 from .TwitterURLCorpusPC import *
+from .PolishPC import *
diff --git a/mteb/tasks/STS/PolishSTS.py b/mteb/tasks/STS/PolishSTS.py
@@ -0,0 +1,38 @@
+from ...abstasks.AbsTaskSTS import AbsTaskSTS
+
+
+class SickrPLSTS(AbsTaskSTS):
+    @property
+    def description(self):
+        return {
+            "name": "SICK-R-PL",
+            "hf_hub_name": "PL-MTEB/sickr-pl-sts",
+            "description": "Polish version of SICK dataset for textual relatedness.",
+            "reference": "https://aclanthology.org/2020.lrec-1.207.pdf",
+            "type": "STS",
+            "category": "s2s",
+            "eval_splits": ["test"],
+            "eval_langs": ["pl"],
+            "main_score": "cosine_spearman",
+            "min_score": 1,
+            "max_score": 5
+        }
+
+
+class CdscrSTS(AbsTaskSTS):
+    @property
+    def description(self):
+        return {
+            "name": "CDSC-R",
+            "hf_hub_name": "PL-MTEB/cdscr-sts",
+            "description": "Compositional Distributional Semantics Corpus for textual relatedness.",
+            "reference": "https://aclanthology.org/P17-1073.pdf",
+            "type": "STS",
+            "category": "s2s",
+            "eval_splits": ["test"],
+            "eval_langs": ["pl"],
+            "main_score": "cosine_spearman",
+            "min_score": 1,
+            "max_score": 5
+        }
+
diff --git a/mteb/tasks/STS/__init__.py b/mteb/tasks/STS/__init__.py
@@ -8,3 +8,4 @@
 from .STS17CrosslingualSTS import *
 from .STS22CrosslingualSTS import *
 from .STSBenchmarkSTS import *
+from .PolishSTS import *
diff --git a/scripts/run_mteb_polish.py b/scripts/run_mteb_polish.py
@@ -0,0 +1,47 @@
+"""Example script for benchmarking all datasets constituting the MTEB Polish leaderboard & average scores"""
+
+import logging
+
+from mteb import MTEB
+from sentence_transformers import SentenceTransformer
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("main")
+
+classification_tasks = [
+    "CBD",
+    "PolEmo2.0-IN",
+    "PolEmo2.0-OUT",
+    "AllegroReviews",
+    "PAC",
+    "MassiveIntentClassification",
+    "MassiveScenarioClassification"
+]
+
+clustering_tasks = [
+    "8TagsClustering"
+]
+
+pair_classification_tasks = [
+    "SICK-E-PL",
+    "PPC",
+    "CDSC-E",
+    "PSC"
+]
+
+sts_tasks = [
+    "SICK-R-PL",
+    "CDSC-R",
+    "STS22"
+]
+
+tasks = classification_tasks \
+        + clustering_tasks \
+        + pair_classification_tasks \
+        + sts_tasks
+
+model_name = "sdadas/st-polish-paraphrase-from-distilroberta"
+model = SentenceTransformer(model_name)
+
+evaluation = MTEB(tasks=tasks, task_langs=["pl"])
+evaluation.run(model, output_folder=f"results/pl/{model_name.split('/')[-1]}")