Skip to content

Commit

Permalink
use modalities instead and separate out constants
Browse files Browse the repository at this point in the history
  • Loading branch information
isaac-chung committed Dec 26, 2024
1 parent cd8595d commit 8bb96a4
Show file tree
Hide file tree
Showing 20 changed files with 67 additions and 65 deletions.
6 changes: 1 addition & 5 deletions mteb/abstasks/TaskMetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
path_to_lang_codes,
path_to_lang_scripts,
)
from ..modalities import MODALITIES

TASK_SUBTYPE = Literal[
"Article retrieval",
Expand Down Expand Up @@ -124,11 +125,6 @@
"it2it",
]

MODALITIES = Literal[
"text",
"image",
]

ANNOTATOR_TYPE = Literal[
"expert-annotated",
"human-annotated",
Expand Down
8 changes: 3 additions & 5 deletions mteb/evaluation/MTEB.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,12 +373,10 @@ def run(
)

# skip evaluation if the model does not support the task modalities.
task_modalities = sorted(
"".join([m for m in set(task.metadata.category) if m.isalpha()])
)
if sorted(meta.modalities) != task_modalities:
task_modalities = "".join(sorted(task.metadata.modalities))
if "".join(sorted(meta.modalities)) != task_modalities:
logger.info(
f"{meta.name} only supports {meta.modalities}, but the task category is {task.metadata.category}."
f"{meta.name} only supports {meta.modalities}, but the task modalities are {task.metadata.modalities}."
)
del self.tasks[0] # empty memory
continue
Expand Down
8 changes: 8 additions & 0 deletions mteb/modalities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from __future__ import annotations

from typing_extensions import Literal

MODALITIES = Literal[
"text",
"image",
]
6 changes: 3 additions & 3 deletions mteb/model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from mteb.encoder_interface import Encoder

from .languages import ISO_LANGUAGE_SCRIPT
from .modalities import MODALITIES

if TYPE_CHECKING:
from .models.sentence_transformer_wrapper import SentenceTransformerWrapper
Expand Down Expand Up @@ -74,8 +75,7 @@ class ModelMeta(BaseModel):
input such as "query: {document}" or "passage: {document}".
zero_shot_benchmarks: A list of benchmarks on which the model has been evaluated in a zero-shot setting. By default we assume that all models
are evaluated non-zero-shot unless specified otherwise.
modalities: A string representing the modalities the model supports. e.g. "t" means the model supports text-related tasks, "it" or "ti" means
the model support text and image-related tasks. Default is "t".
modalities: A list of strings representing the modalities the model supports. Default is ["text].
"""

name: str | None
Expand All @@ -96,7 +96,7 @@ class ModelMeta(BaseModel):
similarity_fn_name: DISTANCE_METRICS | None = None
use_instuctions: bool | None = None
zero_shot_benchmarks: list[str] | None = None
modalities: str = "t"
modalities: list[MODALITIES] = ["text"]

def to_dict(self):
dict_repr = self.model_dump()
Expand Down
2 changes: 1 addition & 1 deletion mteb/models/align_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def get_fused_embeddings(
open_source=True,
revision="e96a37facc7b1f59090ece82293226b817afd6ba",
release_date="2023-02-24",
modalities="it",
modalities=["image", "text"],
)

if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions mteb/models/blip2_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def get_fused_embeddings(
open_source=True,
revision="51572668da0eb669e01a189dc22abe6088589a24",
release_date="2024-03-22",
modalities="it",
modalities=["image", "text"],
)

blip2_opt_6_7b_coco = ModelMeta(
Expand All @@ -238,7 +238,7 @@ def get_fused_embeddings(
open_source=True,
revision="0d580de59320a25a4d2c386387bcef310d5f286e",
release_date="2024-03-31",
modalities="it",
modalities=["image", "text"],
)


Expand Down
16 changes: 8 additions & 8 deletions mteb/models/blip_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def get_fused_embeddings(
open_source=True,
revision="2227ac38c9f16105cb0412e7cab4759978a8fd90",
release_date="2023-12-07",
modalities="it",
modalities=["image", "text"],
)

blip_image_captioning_base = ModelMeta(
Expand All @@ -177,7 +177,7 @@ def get_fused_embeddings(
open_source=True,
revision="89b09ea1789f7addf2f6d6f0dfc4ce10ab58ef84",
release_date="2023-08-01",
modalities="it",
modalities=["image", "text"],
)


Expand All @@ -191,7 +191,7 @@ def get_fused_embeddings(
open_source=True,
revision="c7df8e7cd7aa2ee9af18f56e2b29e59a92651b64",
release_date="2023-12-07",
modalities="it",
modalities=["image", "text"],
)

blip_vqa_capfilt_large = ModelMeta(
Expand All @@ -204,7 +204,7 @@ def get_fused_embeddings(
open_source=True,
revision="e53f95265aeab69013fabb5380500ab984adbbb4",
release_date="2023-01-22",
modalities="it",
modalities=["image", "text"],
)

blip_itm_base_coco = ModelMeta(
Expand All @@ -217,7 +217,7 @@ def get_fused_embeddings(
open_source=True,
revision="7eaa90c11850c0b17fc38c6a11e7d88bd6ac231f",
release_date="2023-08-01",
modalities="it",
modalities=["image", "text"],
)

blip_itm_large_coco = ModelMeta(
Expand All @@ -230,7 +230,7 @@ def get_fused_embeddings(
open_source=True,
revision="fef05cafc05298067cbbca00b125749394a77a6f",
release_date="2023-08-01",
modalities="it",
modalities=["image", "text"],
)

blip_itm_base_flickr = ModelMeta(
Expand All @@ -243,7 +243,7 @@ def get_fused_embeddings(
open_source=True,
revision="1de29e660d91ae1786c1876212ea805a22eab251",
release_date="2023-08-01",
modalities="it",
modalities=["image", "text"],
)

blip_itm_large_flickr = ModelMeta(
Expand All @@ -256,7 +256,7 @@ def get_fused_embeddings(
open_source=True,
revision="bda12e6506758f54261b5ab174b2c55a3ba143fb",
release_date="2023-08-01",
modalities="it",
modalities=["image", "text"],
)


Expand Down
6 changes: 3 additions & 3 deletions mteb/models/clip_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def get_fused_embeddings(
open_source=True,
revision="32bd64288804d66eefd0ccbe215aa642df71cc41",
release_date="2021-02-26",
modalities="it",
modalities=["image", "text"],
)

clip_vit_base_patch32 = ModelMeta(
Expand All @@ -160,7 +160,7 @@ def get_fused_embeddings(
open_source=True,
revision="3d74acf9a28c67741b2f4f2ea7635f0aaf6f0268",
release_date="2021-02-26",
modalities="it",
modalities=["image", "text"],
)

clip_vit_base_patch16 = ModelMeta(
Expand All @@ -173,7 +173,7 @@ def get_fused_embeddings(
open_source=True,
revision="57c216476eefef5ab752ec549e440a49ae4ae5f3",
release_date="2021-02-26",
modalities="it",
modalities=["image", "text"],
)

if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions mteb/models/cohere_v.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def get_fused_embeddings(
license=None,
similarity_fn_name="cosine",
framework=[],
modalities="it",
modalities=["image", "text"],
)

cohere_eng_3 = ModelMeta(
Expand All @@ -212,7 +212,7 @@ def get_fused_embeddings(
license=None,
similarity_fn_name="cosine",
framework=[],
modalities="it",
modalities=["image", "text"],
)

if __name__ == "__main__":
Expand Down
8 changes: 4 additions & 4 deletions mteb/models/dino_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def get_fused_embeddings(
open_source=True,
revision="ed25f3a31f01632728cabb09d1542f84ab7b0056",
release_date="2023-07-18",
modalities="i",
modalities=["image"],
)

dinov2_base = ModelMeta(
Expand All @@ -143,7 +143,7 @@ def get_fused_embeddings(
open_source=True,
revision="f9e44c814b77203eaa57a6bdbbd535f21ede1415",
release_date="2023-07-18",
modalities="i",
modalities=["image"],
)

dinov2_large = ModelMeta(
Expand All @@ -156,7 +156,7 @@ def get_fused_embeddings(
open_source=True,
revision="47b73eefe95e8d44ec3623f8890bd894b6ea2d6c",
release_date="2023-07-18",
modalities="i",
modalities=["image"],
)

dinov2_giant = ModelMeta(
Expand All @@ -169,7 +169,7 @@ def get_fused_embeddings(
open_source=True,
revision="611a9d42f2335e0f921f1e313ad3c1b7178d206d",
release_date="2023-07-18",
modalities="i",
modalities=["image"],
)

if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion mteb/models/e5_v.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def get_fused_embeddings(
open_source=True,
revision="0c1f22679417b3ae925d779442221c40cd1861ab",
release_date="2024-07-17",
modalities="it",
modalities=["image", "text"],
)

if __name__ == "__main__":
Expand Down
8 changes: 4 additions & 4 deletions mteb/models/evaclip_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def get_fused_embeddings(
open_source=True,
revision="11afd202f2ae80869d6cef18b1ec775e79bd8d12",
release_date="2023-04-26",
modalities="it",
modalities=["image", "text"],
)

EVA02_CLIP_L_14 = ModelMeta(
Expand All @@ -188,7 +188,7 @@ def get_fused_embeddings(
open_source=True,
revision="11afd202f2ae80869d6cef18b1ec775e79bd8d12",
release_date="2023-04-26",
modalities="it",
modalities=["image", "text"],
)

EVA02_CLIP_bigE_14 = ModelMeta(
Expand All @@ -201,7 +201,7 @@ def get_fused_embeddings(
open_source=True,
revision="11afd202f2ae80869d6cef18b1ec775e79bd8d12",
release_date="2023-04-26",
modalities="it",
modalities=["image", "text"],
)


Expand All @@ -215,5 +215,5 @@ def get_fused_embeddings(
open_source=True,
revision="11afd202f2ae80869d6cef18b1ec775e79bd8d12",
release_date="2023-04-26",
modalities="it",
modalities=["image", "text"],
)
2 changes: 1 addition & 1 deletion mteb/models/jina_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def encode( # type: ignore
open_source=True,
revision="06150c7c382d7a4faedc7d5a0d8cdb59308968f4",
release_date="2024-05-30",
modalities="it",
modalities=["image", "text"],
)


Expand Down
4 changes: 2 additions & 2 deletions mteb/models/moco_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def get_fused_embeddings(
open_source=True,
revision="7d091cd70772c5c0ecf7f00b5f12ca609a99d69d",
release_date="2024-06-03",
modalities="i",
modalities=["image"],
)

mocov3_vit_large = ModelMeta(
Expand All @@ -161,5 +161,5 @@ def get_fused_embeddings(
open_source=True,
revision="7bf75358d616f39b9716148bf4e3425f3bd35b47",
release_date="2024-06-03",
modalities="i",
modalities=["image"],
)
2 changes: 1 addition & 1 deletion mteb/models/nomic_models_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def get_fused_embeddings(
open_source=True,
revision="af2246fffdab78d8458418480e4886a8e48b70a7",
release_date="2024-06-08",
modalities="it",
modalities=["image", "text"],
)

if __name__ == "__main__":
Expand Down
16 changes: 8 additions & 8 deletions mteb/models/openclip_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def get_fused_embeddings(
open_source=True,
revision="84c9828e63dc9a9351d1fe637c346d4c1c4db341",
release_date="2023-04-26",
modalities="it",
modalities=["image", "text"],
)

CLIP_ViT_B_32_DataComp_XL_s13B_b90K = ModelMeta(
Expand All @@ -173,7 +173,7 @@ def get_fused_embeddings(
open_source=True,
revision="f0e2ffa09cbadab3db6a261ec1ec56407ce42912",
release_date="2023-04-26",
modalities="it",
modalities=["image", "text"],
)

CLIP_ViT_B_16_DataComp_XL_s13B_b90K = ModelMeta(
Expand All @@ -186,7 +186,7 @@ def get_fused_embeddings(
open_source=True,
revision="d110532e8d4ff91c574ee60a342323f28468b287",
release_date="2023-04-26",
modalities="it",
modalities=["image", "text"],
)

CLIP_ViT_bigG_14_laion2B_39B_b160k = ModelMeta(
Expand All @@ -199,7 +199,7 @@ def get_fused_embeddings(
open_source=True,
revision="bc7788f151930d91b58474715fdce5524ad9a189",
release_date="2023-01-23",
modalities="it",
modalities=["image", "text"],
)

CLIP_ViT_g_14_laion2B_s34B_b88K = ModelMeta(
Expand All @@ -212,7 +212,7 @@ def get_fused_embeddings(
open_source=True,
revision="15efd0f6ac0c40c0f9da7becca03c974d7012604",
release_date="2023-03-06",
modalities="it",
modalities=["image", "text"],
)

CLIP_ViT_H_14_laion2B_s32B_b79K = ModelMeta(
Expand All @@ -225,7 +225,7 @@ def get_fused_embeddings(
open_source=True,
revision="de081ac0a0ca8dc9d1533eed1ae884bb8ae1404b",
release_date="2022-09-15",
modalities="it",
modalities=["image", "text"],
)

CLIP_ViT_L_14_laion2B_s32B_b82K = ModelMeta(
Expand All @@ -238,7 +238,7 @@ def get_fused_embeddings(
open_source=True,
revision="1627032197142fbe2a7cfec626f4ced3ae60d07a",
release_date="2022-09-15",
modalities="it",
modalities=["image", "text"],
)

CLIP_ViT_B_32_laion2B_s34B_b79K = ModelMeta(
Expand All @@ -251,5 +251,5 @@ def get_fused_embeddings(
open_source=True,
revision="08f73555f1b2fb7c82058aebbd492887a94968ef",
release_date="2022-09-15",
modalities="it",
modalities=["image", "text"],
)
Loading

0 comments on commit 8bb96a4

Please sign in to comment.