From 5c5489b9938221f2a0c032c53c0cfb9c560c3c59 Mon Sep 17 00:00:00 2001 From: Daniel Buades Marcos Date: Fri, 20 Dec 2024 15:55:37 +0100 Subject: [PATCH] feat: set `use_instructions` to True in models using prompts --- mteb/models/arctic_models.py | 16 ++++++++-------- mteb/models/bge_models.py | 6 +++--- mteb/models/cohere_models.py | 8 ++++---- mteb/models/jina_models.py | 2 +- mteb/models/ru_sentence_models.py | 5 +++-- mteb/models/uae_models.py | 2 +- mteb/models/voyage_models.py | 16 ++++++++-------- 7 files changed, 28 insertions(+), 27 deletions(-) diff --git a/mteb/models/arctic_models.py b/mteb/models/arctic_models.py index eadc4065f..9fae56110 100644 --- a/mteb/models/arctic_models.py +++ b/mteb/models/arctic_models.py @@ -100,7 +100,7 @@ license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-xs", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from="sentence-transformers/all-MiniLM-L6-v2", superseded_by=None, ) @@ -125,7 +125,7 @@ license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-s", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from="intfloat/e5-small-unsupervised", superseded_by=None, ) @@ -150,7 +150,7 @@ license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from="intfloat/e5-base-unsupervised", superseded_by="Snowflake/snowflake-arctic-embed-m-v1.5", ) @@ -174,7 +174,7 @@ license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-long", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from="nomic-ai/nomic-embed-text-v1-unsupervised", superseded_by="Snowflake/snowflake-arctic-embed-m-v2.0", ) @@ -198,7 +198,7 @@ license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-l", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from="intfloat/e5-base-unsupervised", superseded_by="Snowflake/snowflake-arctic-embed-l-v2.0", ) @@ -225,7 +225,7 @@ license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v1.5", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from=None, superseded_by="Snowflake/snowflake-arctic-embed-m-v2.0", ) @@ -249,7 +249,7 @@ license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v2.0", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from="Alibaba-NLP/gte-multilingual-base", superseded_by=None, ) @@ -273,7 +273,7 @@ license="apache-2.0", reference="https://huggingface.co/Snowflake/snowflake-arctic-embed-l-v2.0", similarity_fn_name="cosine", - use_instructions=False, + use_instructions=True, adapted_from="BAAI/bge-m3-retromae", superseded_by=None, ) diff --git a/mteb/models/bge_models.py b/mteb/models/bge_models.py index b643b4dfb..cc183374c 100644 --- a/mteb/models/bge_models.py +++ b/mteb/models/bge_models.py @@ -26,7 +26,7 @@ reference="https://huggingface.co/BAAI/bge-small-en-v1.5", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, + use_instructions=True, ) bge_base_en_v1_5 = ModelMeta( @@ -49,7 +49,7 @@ reference="https://huggingface.co/BAAI/bge-base-en-v1.5", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, + use_instructions=True, ) bge_large_en_v1_5 = ModelMeta( @@ -72,5 +72,5 @@ reference="https://huggingface.co/BAAI/bge-large-en-v1.5", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, + use_instructions=True, ) diff --git a/mteb/models/cohere_models.py b/mteb/models/cohere_models.py index 2a8aa1e3d..43a797342 100644 --- a/mteb/models/cohere_models.py +++ b/mteb/models/cohere_models.py @@ -234,7 +234,7 @@ def encode( license=None, similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) cohere_eng_3 = ModelMeta( @@ -256,7 +256,7 @@ def encode( license=None, similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) cohere_mult_light_3 = ModelMeta( @@ -278,7 +278,7 @@ def encode( license=None, similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) cohere_eng_light_3 = ModelMeta( @@ -300,5 +300,5 @@ def encode( license=None, similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) diff --git a/mteb/models/jina_models.py b/mteb/models/jina_models.py index 7b2c343a1..122f19065 100644 --- a/mteb/models/jina_models.py +++ b/mteb/models/jina_models.py @@ -220,6 +220,6 @@ def encode( license="cc-by-nc-4.0", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, + use_instructions=True, reference="https://huggingface.co/jinaai/jina-embeddings-v3", ) diff --git a/mteb/models/ru_sentence_models.py b/mteb/models/ru_sentence_models.py index 301892e07..f90111b95 100644 --- a/mteb/models/ru_sentence_models.py +++ b/mteb/models/ru_sentence_models.py @@ -79,7 +79,7 @@ sentence_transformers_loader, model_name="deepvk/USER-base", revision="436a489a2087d61aa670b3496a9915f84e46c861", - prompts={"query": "query: ", "passage": "passage: "}, + model_prompts={"query": "query: ", "passage": "passage: "}, ), name="deepvk/USER-base", languages=["rus_Cyrl"], @@ -94,7 +94,7 @@ reference="https://huggingface.co/ai-forever/sbert_large_mt_nlu_ru", similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], - use_instructions=False, + use_instructions=True, ) deberta_v1_ru = ModelMeta( @@ -234,4 +234,5 @@ open_weights=True, revision="89fb1651989adbb1cfcfdedafd7d102951ad0555", release_date="2024-07-29", + use_instructions=True, ) diff --git a/mteb/models/uae_models.py b/mteb/models/uae_models.py index 942d50894..5c47cba67 100644 --- a/mteb/models/uae_models.py +++ b/mteb/models/uae_models.py @@ -74,5 +74,5 @@ def encode( similarity_fn_name="cosine", framework=["Sentence Transformers", "PyTorch"], reference="https://huggingface.co/WhereIsAI/UAE-Large-V1", - use_instructions=False, + use_instructions=True, ) diff --git a/mteb/models/voyage_models.py b/mteb/models/voyage_models.py index 57453b076..0e6ef71d9 100644 --- a/mteb/models/voyage_models.py +++ b/mteb/models/voyage_models.py @@ -181,7 +181,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2024/06/03/domain-specific-embeddings-finance-edition-voyage-finance-2/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_law_2 = ModelMeta( @@ -203,7 +203,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2024/04/15/domain-specific-embeddings-and-retrieval-legal-edition-voyage-law-2/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_code_2 = ModelMeta( @@ -225,7 +225,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2024/01/23/voyage-code-2-elevate-your-code-retrieval/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_large_2 = ModelMeta( @@ -247,7 +247,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2023/10/29/voyage-embeddings/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_2 = ModelMeta( @@ -269,7 +269,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2023/10/29/voyage-embeddings/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_multilingual_2 = ModelMeta( name="voyageai/voyage-multilingual-2", @@ -290,7 +290,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2024/06/10/voyage-multilingual-2-multilingual-embedding-model/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_3 = ModelMeta( @@ -312,7 +312,7 @@ def _batched_encode( reference="https://blog.voyageai.com/2024/09/18/voyage-3/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, ) voyage_3_lite = ModelMeta( @@ -334,5 +334,5 @@ def _batched_encode( reference="https://blog.voyageai.com/2024/09/18/voyage-3/", similarity_fn_name="cosine", framework=["API"], - use_instructions=False, + use_instructions=True, )