fix: Added English news classification dataset (#323)

* Fix typos in readme.md * Added news classification dataset. * Added news classification dataset. * Fixes on suggestions * Update docs/mmteb/points.md Co-authored-by: Kenneth Enevoldsen <kennethcenevoldsen@gmail.com> --------- Co-authored-by: Kenneth Enevoldsen <kennethcenevoldsen@gmail.com>
embeddings-benchmark · Apr 8, 2024 · 4d21807 · 4d21807
1 parent d69bf94
commit 4d21807
Show file tree

Hide file tree

Showing 9 changed files with 73 additions and 10 deletions.
diff --git a/docs/adding_a_dataset.md b/docs/adding_a_dataset.md
@@ -235,7 +235,7 @@ model = SentenceTransformer(model_name)
 evaluation = MTEB(tasks=[YourNewTask()])
 ```
 
-- [ ] I have run the following models on the task (adding the results to the pr). These can be run using the `mteb run -m {model_name} -t {task_name}` command.
+- [ ] I have run the following models on the task (adding the results to the pr). These can be run using the `mteb -m {model_name} -t {task_name}` command.
   - [ ] `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2`
   - [ ] `intfloat/multilingual-e5-small`
 - [ ] I have checked that the performance is neither trivial (both models gain close to perfect scores) nor random (both models gain close to random scores).

diff --git a/docs/mmteb/points.md b/docs/mmteb/points.md
@@ -1,14 +1,15 @@
 # Points
 
 | GitHub            | Total points | New dataset | New task | Dataset annotations | (Bug)fixes | Running Models | Review PR |  Paper Writing | Ideation | Coordination |
-| ----------------- | ------------ | ----------- | -------- | ------------------- | ---------- | -------------- |  -------- | -------------- | -------- | ------------- |
-| KennethEnevoldsen |              |   38+16     |          |                   1 |            |              8 |           |                |          |               |
+|-------------------| ------------ |-------------| -------- | ------------------- | ---------- | -------------- |  -------- | -------------- | -------- | ------------- |
+| KennethEnevoldsen |              | 38+16       |          |                   1 |            |              9 |           |                |          |               |
 | x-tabdeveloping   |              | 2+16        |          |                     |            |                |           |                |          |               |
-| imenelydiaker     |              |    88       |          |                     |            |                |     7     |                |          |               |
-| wissam-sib        |              |    88       |          |                     |            |                |     1     |                |          |               |
-| GabrielSequeira   |              |    88       |          |                     |            |                |           |                |          |               |
-| schmarion         |              |    88       |          |                     |            |                |           |                |          |               |
-| MathieuCiancone   |              |    88       |          |                     |            |                |           |                |          |               |
+| imenelydiaker     |              | 88          |          |                     |            |                |     7     |                |          |               |
+| wissam-sib        |              | 88          |          |                     |            |                |     1     |                |          |               |
+| GabrielSequeira   |              | 88          |          |                     |            |                |           |                |          |               |
+| schmarion         |              | 88          |          |                     |            |                |           |                |          |               |
+| MathieuCiancone   |              | 88          |          |                     |            |                |           |                |          |               |
+| Sakshamrzt        |              | 2           |          |                     |            |                |           |                |          |               |
 
 Note that coordination and ideation is not included in the total points, but is used to determine first and last authors. 
 

diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py
@@ -25,6 +25,7 @@
     "Thematic clustering",
     "Scientific Reranking",
     "Claim verification",
+    "Topic classification",
 ]
 
 TASK_DOMAIN = Literal[

diff --git a/mteb/tasks/Classification/__init__.py b/mteb/tasks/Classification/__init__.py
@@ -9,6 +9,7 @@
 from .en.Banking77Classification import *
 from .en.EmotionClassification import *
 from .en.ImdbClassification import *
+from .en.NewsClassification import *
 from .en.ToxicConversationsClassification import *
 from .en.TweetSentimentExtractionClassification import *
 from .multilingual.AmazonCounterfactualClassification import *

diff --git a/mteb/tasks/Classification/en/NewsClassification.py b/mteb/tasks/Classification/en/NewsClassification.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from mteb.abstasks.TaskMetadata import TaskMetadata
+
+from ....abstasks import AbsTaskClassification
+
+
+class NewsClassification(AbsTaskClassification):
+    metadata = TaskMetadata(
+        name="NewsClassification",
+        description="Large News Classification Dataset",
+        dataset={
+            "path": "ag_news",
+            "revision": "eb185aade064a813bc0b7f42de02595523103ca4",
+        },
+        reference="https://arxiv.org/abs/1509.01626",
+        type="Classification",
+        category="s2s",
+        eval_splits=["test"],
+        eval_langs=["en"],
+        main_score="accuracy",
+        date=None,
+        form=["written"],
+        domains=["News"],
+        task_subtypes=["Topic classification"],
+        license="Apache 2.0",
+        socioeconomic_status="medium",
+        annotations_creators="expert-annotated",
+        dialect=["en-US", "en-GB", "en-AU"],
+        text_creation="found",
+        bibtex_citation=None,
+        n_samples={"test": 7600},
+        avg_character_length={"test": 235.29},
+    )
diff --git a/results/intfloat__multilingual-e5-small/NewsClassification.json b/results/intfloat__multilingual-e5-small/NewsClassification.json
@@ -0,0 +1,13 @@
+{
+  "dataset_revision": "eb185aade064a813bc0b7f42de02595523103ca4",
+  "mteb_dataset_name": "NewsClassification",
+  "mteb_version": "1.5.2",
+  "test": {
+    "accuracy": 0.8145526315789475,
+    "accuracy_stderr": 0.02163049043233855,
+    "evaluation_time": 69.36,
+    "f1": 0.813099457864691,
+    "f1_stderr": 0.02206820251059848,
+    "main_score": 0.8145526315789475
+  }
+}
diff --git a/results/intfloat__multilingual-e5-small/model_meta.json b/results/intfloat__multilingual-e5-small/model_meta.json
@@ -1 +1 @@
-{"model_name": "intfloat/multilingual-e5-small", "time_of_run": "2024-03-25 11:44:13.724109", "versions": null}
+{"model_name": "intfloat/multilingual-e5-small", "time_of_run": "2024-04-07 09:46:52.759711", "versions": null}
diff --git a/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/NewsClassification.json b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/NewsClassification.json
@@ -0,0 +1,13 @@
+{
+  "dataset_revision": "eb185aade064a813bc0b7f42de02595523103ca4",
+  "mteb_dataset_name": "NewsClassification",
+  "mteb_version": "1.5.2",
+  "test": {
+    "accuracy": 0.685592105263158,
+    "accuracy_stderr": 0.03763057828997146,
+    "evaluation_time": 64.81,
+    "f1": 0.6841960793010067,
+    "f1_stderr": 0.03769716129683354,
+    "main_score": 0.685592105263158
+  }
+}
diff --git a/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/model_meta.json b/results/sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2/model_meta.json
@@ -1 +1 @@
-{"model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "time_of_run": "2024-04-02 10:10:21.708612", "versions": {"sentence_transformers": "2.0.0", "transformers": "4.7.0", "pytorch": "1.9.0+cu102"}}
+{"model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "time_of_run": "2024-04-07 09:48:17.924041", "versions": {"sentence_transformers": "2.0.0", "transformers": "4.7.0", "pytorch": "1.9.0+cu102"}}
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"model_name": "intfloat/multilingual-e5-small", "time_of_run": "2024-03-25 11:44:13.724109", "versions": null}
		{"model_name": "intfloat/multilingual-e5-small", "time_of_run": "2024-04-07 09:46:52.759711", "versions": null}
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "time_of_run": "2024-04-02 10:10:21.708612", "versions": {"sentence_transformers": "2.0.0", "transformers": "4.7.0", "pytorch": "1.9.0+cu102"}}
		{"model_name": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "time_of_run": "2024-04-07 09:48:17.924041", "versions": {"sentence_transformers": "2.0.0", "transformers": "4.7.0", "pytorch": "1.9.0+cu102"}}