From 2ffd67bf0de11b483adff9965ff28e527d55581b Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Wed, 18 Sep 2024 10:44:25 +0100 Subject: [PATCH 1/2] Add `baseURL` and `dimensions` params to `text2vec-jinaai` --- .../classes/config_named_vectors.py | 8 ++++++ .../collections/classes/config_vectorizers.py | 26 +++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index fcb08fcac..c5b45fcd1 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -951,6 +951,8 @@ def text2vec_jinaai( source_properties: Optional[List[str]] = None, vector_index_config: Optional[_VectorIndexConfigCreate] = None, vectorize_collection_name: bool = True, + base_url: Optional[str] = None, + dimensions: Optional[int] = None, model: Optional[Union[JinaModel, str]] = None, ) -> _NamedVectorConfigCreate: """Create a named vector using the `text2vec-jinaai` model. @@ -967,6 +969,10 @@ def text2vec_jinaai( The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default `vectorize_collection_name` Whether to vectorize the collection name. Defaults to `True`. + `base_url` + The base URL to send the vectorization requests to. Defaults to `None`, which uses the server-defined default. + `dimensions` + The number of dimensions for the generated embeddings. Defaults to `None`, which uses the server-defined default. `model` The model to use. Defaults to `None`, which uses the server-defined default. See the @@ -976,6 +982,8 @@ def text2vec_jinaai( name=name, source_properties=source_properties, vectorizer=_Text2VecJinaConfigCreate( + baseURL=base_url, + dimensions=dimensions, model=model, vectorizeClassName=vectorize_collection_name, ), diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 3e8a1a018..b4e7c466d 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -24,7 +24,16 @@ OpenAIModel: TypeAlias = Literal[ "text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002" ] -JinaModel: TypeAlias = Literal["jina-embeddings-v2-base-en", "jina-embeddings-v2-small-en"] +JinaModel: TypeAlias = Literal[ + "jina-embeddings-v2-base-en", + "jina-embeddings-v2-small-en", + "jina-embeddings-v2-base-zh", + "jina-embeddings-v2-base-es", + "jina-embeddings-v2-base-code", + "jina-colbert-v1-en", + "jina-colbert-v2", + "jina-embeddings-v3", +] VoyageModel: TypeAlias = Literal[ "voyage-large-2", "voyage-code-2", @@ -341,6 +350,8 @@ class _Text2VecJinaConfig(_ConfigCreateModel): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_JINAAI, frozen=True, exclude=True ) + baseURL: Optional[str] + dimensions: Optional[int] model: Optional[str] vectorizeClassName: bool @@ -1094,6 +1105,8 @@ def text2vec_transformers( def text2vec_jinaai( model: Optional[Union[JinaModel, str]] = None, vectorize_collection_name: bool = True, + base_url: Optional[str] = None, + dimensions: Optional[int] = None, ) -> _VectorizerConfigCreate: """Create a `_Text2VecJinaConfigCreate` object for use when vectorizing using the `text2vec-jinaai` model. @@ -1107,8 +1120,17 @@ def text2vec_jinaai( [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-jinaai#available-models) for more details. `vectorize_collection_name` Whether to vectorize the collection name. Defaults to `True`. + `base_url` + The base URL to send the vectorization requests to. Defaults to `None`, which uses the server-defined default. + `dimensions` + The number of dimensions for the generated embeddings. Defaults to `None`, which uses the server-defined default. """ - return _Text2VecJinaConfigCreate(model=model, vectorizeClassName=vectorize_collection_name) + return _Text2VecJinaConfigCreate( + model=model, + vectorizeClassName=vectorize_collection_name, + baseURL=base_url, + dimensions=dimensions, + ) @staticmethod def text2vec_voyageai( From 3a7e1826d6082b4041c57e49a0f0cdba2a3a660e Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Wed, 18 Sep 2024 10:49:06 +0100 Subject: [PATCH 2/2] Remove colbert from allowed JinaModels --- weaviate/collections/classes/config_vectorizers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index b4e7c466d..056c73d73 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -30,8 +30,6 @@ "jina-embeddings-v2-base-zh", "jina-embeddings-v2-base-es", "jina-embeddings-v2-base-code", - "jina-colbert-v1-en", - "jina-colbert-v2", "jina-embeddings-v3", ] VoyageModel: TypeAlias = Literal[