From 8be03c99372cfaf7a86f31464959338f6f9b900f Mon Sep 17 00:00:00 2001 From: Ephraim Anierobi Date: Fri, 1 Dec 2023 15:01:53 +0100 Subject: [PATCH] Add a cache for weaviate client (#35983) * Add a cache for weaviate client While working on another issue, I realized how often I had to call get_conn. So instead of depreccating this, we can use it as a cache within the code so we don't connect everytime a method is called. * change cache to be on _conn --- airflow/providers/weaviate/hooks/weaviate.py | 23 +++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/airflow/providers/weaviate/hooks/weaviate.py b/airflow/providers/weaviate/hooks/weaviate.py index 151aaabea6f1f..66d820bbbebe4 100644 --- a/airflow/providers/weaviate/hooks/weaviate.py +++ b/airflow/providers/weaviate/hooks/weaviate.py @@ -18,6 +18,7 @@ from __future__ import annotations import warnings +from functools import cached_property from typing import Any from weaviate import Client as WeaviateClient @@ -94,18 +95,24 @@ def get_conn(self) -> WeaviateClient: url=url, auth_client_secret=auth_client_secret, additional_headers=additional_headers ) + @cached_property + def conn(self) -> WeaviateClient: + """Returns a Weaviate client.""" + return self.get_conn() + def get_client(self) -> WeaviateClient: + """Returns a Weaviate client.""" # Keeping this for backwards compatibility warnings.warn( "The `get_client` method has been renamed to `get_conn`", AirflowProviderDeprecationWarning, stacklevel=2, ) - return self.get_conn() + return self.conn def test_connection(self) -> tuple[bool, str]: try: - client = self.get_client() + client = self.conn client.schema.get() return True, "Connection established!" except Exception as e: @@ -114,7 +121,7 @@ def test_connection(self) -> tuple[bool, str]: def create_class(self, class_json: dict[str, Any]) -> None: """Create a new class.""" - client = self.get_client() + client = self.conn client.schema.create_class(class_json) def create_schema(self, schema_json: dict[str, Any]) -> None: @@ -125,13 +132,13 @@ def create_schema(self, schema_json: dict[str, Any]) -> None: :param schema_json: The schema to create """ - client = self.get_client() + client = self.conn client.schema.create(schema_json) def batch_data( self, class_name: str, data: list[dict[str, Any]], batch_config_params: dict[str, Any] | None = None ) -> None: - client = self.get_client() + client = self.conn if not batch_config_params: batch_config_params = {} client.batch.configure(**batch_config_params) @@ -147,7 +154,7 @@ def batch_data( def delete_class(self, class_name: str) -> None: """Delete an existing class.""" - client = self.get_client() + client = self.conn client.schema.delete_class(class_name) def query_with_vector( @@ -166,7 +173,7 @@ def query_with_vector( external vectorizer. Weaviate then converts this into a vector through the inference API (OpenAI in this particular example) and uses that vector as the basis for a vector search. """ - client = self.get_client() + client = self.conn results: dict[str, dict[Any, Any]] = ( client.query.get(class_name, properties[0]) .with_near_vector({"vector": embeddings, "certainty": certainty}) @@ -185,7 +192,7 @@ def query_without_vector( weaviate with a query search_text. Weaviate then converts this into a vector through the inference API (OpenAI in this particular example) and uses that vector as the basis for a vector search. """ - client = self.get_client() + client = self.conn results: dict[str, dict[Any, Any]] = ( client.query.get(class_name, properties[0]) .with_near_text({"concepts": [search_text]})