Skip to content

Commit

Permalink
Add IBM Granite Embedding Models (#1613)
Browse files Browse the repository at this point in the history
* add IBM granite embedding models
* lint formatting
* add adapted_from and superseded_by to ModelMeta
  • Loading branch information
aashka-trivedi authored Dec 19, 2024
1 parent 48cb97d commit ad05983
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 2 deletions.
114 changes: 114 additions & 0 deletions mteb/models/ibm_granite_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from __future__ import annotations

from functools import partial

from mteb.model_meta import ModelMeta, sentence_transformers_loader

GRANITE_LANGUAGES = [
"ara_Latn",
"ces_Latn",
"deu_Latn",
"eng_Latn",
"spa_Latn",
"fra_Latn",
"ita_Latn",
"jpn_Latn",
"kor_Latn",
"nld_Latn",
"por_Latn",
"zho_Hant",
"zho_Hans",
]


granite_107m_multilingual = ModelMeta(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="ibm-granite/granite-embedding-107m-multilingual",
revision="47db56afe692f731540413c67dd818ff492277e7",
),
name="ibm-granite/granite-embedding-107m-multilingual",
languages=GRANITE_LANGUAGES,
open_weights=True,
revision="47db56afe692f731540413c67dd818ff492277e7",
release_date="2024-12-18",
n_parameters=107_000_000,
memory_usage=None,
embed_dim=384,
license="apache-2.0",
max_tokens=512,
reference="https://huggingface.co/ibm-granite/granite-embedding-107m-multilingual",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
adapted_from=None,
superseded_by=None,
)

granite_278m_multilingual = ModelMeta(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="ibm-granite/granite-embedding-278m-multilingual",
revision="84e3546b88b0cb69f8078608a1df558020bcbf1f",
),
name="ibm-granite/granite-embedding-278m-multilingual",
languages=GRANITE_LANGUAGES,
open_weights=True,
revision="84e3546b88b0cb69f8078608a1df558020bcbf1f",
release_date="2024-12-18",
n_parameters=278_000_000,
memory_usage=None,
embed_dim=768,
license="apache-2.0",
max_tokens=512,
reference="https://huggingface.co/ibm-granite/granite-embedding-278m-multilingual",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
adapted_from=None,
superseded_by=None,
)

granite_30m_english = ModelMeta(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="ibm-granite/granite-embedding-30m-english",
revision="eddbb57470f896b5f8e2bfcb823d8f0e2d2024a5",
),
name="ibm-granite/granite-embedding-30m-english",
languages=["eng_Latn"],
open_weights=True,
revision="eddbb57470f896b5f8e2bfcb823d8f0e2d2024a5",
release_date="2024-12-18",
n_parameters=30_000_000,
memory_usage=None,
embed_dim=384,
license="apache-2.0",
max_tokens=512,
reference="https://huggingface.co/ibm-granite/granite-embedding-30m-english",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
adapted_from=None,
superseded_by=None,
)

granite_125m_english = ModelMeta(
loader=partial( # type: ignore
sentence_transformers_loader,
model_name="ibm-granite/granite-embedding-125m-english",
revision="e48d3a5b47eaa18e3fe07d4676e187fd80f32730",
),
name="ibm-granite/granite-embedding-125m-english",
languages=["eng_Latn"],
open_weights=True,
revision="e48d3a5b47eaa18e3fe07d4676e187fd80f32730",
release_date="2024-12-18",
n_parameters=125_000_000,
memory_usage=None,
embed_dim=768,
license="apache-2.0",
max_tokens=512,
reference="https://huggingface.co/ibm-granite/granite-embedding-125m-english",
similarity_fn_name="cosine",
framework=["Sentence Transformers", "PyTorch"],
adapted_from=None,
superseded_by=None,
)
2 changes: 2 additions & 0 deletions mteb/models/overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
google_models,
gritlm_models,
gte_models,
ibm_granite_models,
jina_models,
linq_models,
llm2vec_models,
Expand Down Expand Up @@ -56,6 +57,7 @@
google_models,
gritlm_models,
gte_models,
ibm_granite_models,
jina_models,
linq_models,
llm2vec_models,
Expand Down
6 changes: 4 additions & 2 deletions mteb/models/rerankers_monot5_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,10 @@ def get_prediction_tokens(
token_true_id = tokenizer.get_vocab()[token_true]
return token_false_id, token_true_id
else:
raise Exception(f"We don't know the indexes for the non-relevant/relevant tokens for\
the checkpoint {model_name_or_path} and you did not provide any.")
raise Exception(
f"We don't know the indexes for the non-relevant/relevant tokens for\
the checkpoint {model_name_or_path} and you did not provide any."
)
else:
token_false_id = tokenizer.get_vocab()[token_false]
token_true_id = tokenizer.get_vocab()[token_true]
Expand Down

0 comments on commit ad05983

Please sign in to comment.