Skip to content

Commit

Permalink
fix: Added arctic models (#1541)
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen authored Dec 4, 2024
1 parent 97ab272 commit df11c38
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 2 deletions.
130 changes: 128 additions & 2 deletions mteb/models/arctic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,135 @@
n_parameters=109_000_000,
memory_usage=None,
max_tokens=512,
embed_dim=256,
embed_dim=768,
license="apache-2.0",
reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5",
similarity_fn_name="cosine_similarity",
similarity_fn_name="cosine",
use_instructions=False,
adapted_from=None,
supersedes="Snowflake/snowflake-arctic-embed-m",
)


arctic_embed_xs = ModelMeta(
loader=partial(
sentence_transformers_loader,
model_name="Snowflake/snowflake-arctic-embed-xs",
revision="742da4f66e1823b5b4dbe6c320a1375a1fd85f9e",
),
name="Snowflake/snowflake-arctic-embed-xs",
revision="742da4f66e1823b5b4dbe6c320a1375a1fd85f9e",
release_date="2024-07-08", # initial commit of hf model.
languages=["eng_Latn"],
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=22_600_000,
memory_usage=None,
max_tokens=512,
embed_dim=384,
license="apache-2.0",
reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-xs",
similarity_fn_name="cosine",
use_instructions=False,
adapted_from="sentence-transformers/all-MiniLM-L6-v2",
supersedes=None,
)


arctic_embed_s = ModelMeta(
loader=partial(
sentence_transformers_loader,
model_name="Snowflake/snowflake-arctic-embed-s",
revision="d3c1d2d433dd0fdc8e9ca01331a5f225639e798f",
),
name="Snowflake/snowflake-arctic-embed-s",
revision="d3c1d2d433dd0fdc8e9ca01331a5f225639e798f",
release_date="2024-04-12", # initial commit of hf model.
languages=["eng_Latn"],
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=32_200_000,
memory_usage=None,
max_tokens=512,
embed_dim=384,
license="apache-2.0",
reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-s",
similarity_fn_name="cosine",
use_instructions=False,
adapted_from="intfloat/e5-small-unsupervised",
supersedes=None,
)


arctic_embed_m = ModelMeta(
loader=partial(
sentence_transformers_loader,
model_name="Snowflake/snowflake-arctic-embed-m",
revision="cc17beacbac32366782584c8752220405a0f3f40",
),
name="Snowflake/snowflake-arctic-embed-m",
revision="cc17beacbac32366782584c8752220405a0f3f40",
release_date="2024-04-12", # initial commit of hf model.
languages=["eng_Latn"],
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=109_000_000,
memory_usage=None,
max_tokens=512,
embed_dim=768,
license="apache-2.0",
reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m",
similarity_fn_name="cosine",
use_instructions=False,
adapted_from="intfloat/e5-base-unsupervised",
supersedes=None,
)

arctic_embed_m_long = ModelMeta(
loader=partial(
sentence_transformers_loader,
model_name="Snowflake/snowflake-arctic-embed-m-long",
revision="89d0f6ab196eead40b90cb6f9fefec01a908d2d1",
),
name="Snowflake/snowflake-arctic-embed-m-long",
revision="89d0f6ab196eead40b90cb6f9fefec01a908d2d1",
release_date="2024-04-12", # initial commit of hf model.
languages=["eng_Latn"],
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=109_000_000,
memory_usage=None,
max_tokens=2048,
embed_dim=768,
license="apache-2.0",
reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-long",
similarity_fn_name="cosine",
use_instructions=False,
adapted_from="nomic-ai/nomic-embed-text-v1-unsupervised",
supersedes=None,
)


arctic_embed_l = ModelMeta(
loader=partial(
sentence_transformers_loader,
model_name="Snowflake/snowflake-arctic-embed-l",
revision="9a9e5834d2e89cdd8bb72b64111dde496e4fe78c",
),
name="Snowflake/snowflake-arctic-embed-l",
revision="9a9e5834d2e89cdd8bb72b64111dde496e4fe78c",
release_date="2024-04-12", # initial commit of hf model.
languages=["eng_Latn"],
open_weights=True,
framework=["Sentence Transformers", "PyTorch"],
n_parameters=109_000_000,
memory_usage=None,
max_tokens=512,
embed_dim=768,
license="apache-2.0",
reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-l",
similarity_fn_name="cosine",
use_instructions=False,
adapted_from="intfloat/e5-base-unsupervised",
supersedes=None,
)
2 changes: 2 additions & 0 deletions mteb/models/overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from mteb.encoder_interface import Encoder
from mteb.model_meta import ModelMeta
from mteb.models import (
arctic_models,
bge_models,
bm25,
cohere_models,
Expand Down Expand Up @@ -39,6 +40,7 @@
logger = logging.getLogger(__name__)

model_modules = [
arctic_models,
bge_models,
bm25,
cohere_models,
Expand Down

0 comments on commit df11c38

Please sign in to comment.