Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: Add Model Meta parameters and metadata #1536

Merged
merged 5 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions mteb/model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ class ModelMeta(BaseModel):
input such as "query: {document}" or "passage: {document}".
zero_shot_benchmarks: A list of benchmarks on which the model has been evaluated in a zero-shot setting. By default we assume that all models
are evaluated non-zero-shot unless specified otherwise.
adapted_from: Name of the model from which this model is adapted from. For quantizations, fine-tunes, long doc extensions, etc.
supersedes: Name of the model that this model supersedes, e.g. nvidia/NV-Embed-v2 supersedes v1.
"""

model_config = ConfigDict(extra="forbid")
Expand All @@ -96,6 +98,8 @@ class ModelMeta(BaseModel):
similarity_fn_name: DISTANCE_METRICS | None = None
use_instructions: bool | None = None
zero_shot_benchmarks: list[str] | None = None
adapted_from: str | None = None
supersedes: str | None = None
isaac-chung marked this conversation as resolved.
Show resolved Hide resolved

def to_dict(self):
dict_repr = self.model_dump()
Expand Down
40 changes: 39 additions & 1 deletion mteb/models/sentence_transformers_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
memory_usage=None,
embed_dim=384,
license="apache-2.0",
max_tokens=512,
max_tokens=256,
reference="https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
Expand Down Expand Up @@ -127,3 +127,41 @@
framework=["Sentence Transformers", "PyTorch"],
use_instructions=False,
)

multi_qa_MiniLM_L6_cos_v1 = ModelMeta(
name="sentence-transformer/multi-qa-MiniLM-L6-cos-v1",
languages=["eng-Latn"],
open_weights=True,
revision="b207367332321f8e44f96e224ef15bc607f4dbf0", # can be any
release_date="2021-08-30",
n_parameters=22_700_000,
memory_usage=None,
embed_dim=384,
license="apache-2.0",
max_tokens=512,
reference="https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
use_instructions=False,
isaac-chung marked this conversation as resolved.
Show resolved Hide resolved
supersedes=None,
adapted_from=None,
)

all_mpnet_base_v2 = ModelMeta(
name="sentence-transformers/all-mpnet-base-v2",
languages=["eng-Latn"],
open_weights=True,
revision="9a3225965996d404b775526de6dbfe85d3368642", # can be any
release_date="2021-08-30",
n_parameters=109_000_000,
memory_usage=None,
embed_dim=768,
license="apache-2.0",
max_tokens=384,
reference="https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
use_instructions=False,
supersedes="sentence-transformers/all-mpnet-base-v1",
adapted_from=None,
)