Skip to content

Commit

Permalink
fix: Add training dataset to model meta
Browse files Browse the repository at this point in the history
Adresses #1556
  • Loading branch information
KennethEnevoldsen committed Dec 7, 2024
1 parent d713525 commit 365bb0a
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions mteb/model_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ class ModelMeta(BaseModel):
in the Latin script.
use_instructions: Whether the model uses instructions E.g. for prompt-based models. This also include models that require a specific format for
input such as "query: {document}" or "passage: {document}".
zero_shot_benchmarks: A list of benchmarks on which the model has been evaluated in a zero-shot setting. By default we assume that all models
are evaluated non-zero-shot unless specified otherwise.
training_datasets: A dictionary of datasets that the model was trained on. Names should be names as their appear in `mteb` for example
{"ArguAna": ["test"]} if the model is trained on the ArguAna test set. This field is used to determine if a model generalizes zero-shot to
a benchmark as well as mark dataset contaminations.
adapted_from: Name of the model from which this model is adapted from. For quantizations, fine-tunes, long doc extensions, etc.
superseded_by: Name of the model that supersedes this model, e.g. nvidia/NV-Embed-v2 supersedes v1.
"""
Expand All @@ -97,7 +98,7 @@ class ModelMeta(BaseModel):
reference: STR_URL | None = None
similarity_fn_name: DISTANCE_METRICS | None = None
use_instructions: bool | None = None
zero_shot_benchmarks: list[str] | None = None
training_datasets: dict[str, list[str]] | None = None
adapted_from: str | None = None
superseded_by: str | None = None

Expand Down

0 comments on commit 365bb0a

Please sign in to comment.