From a51ac218e3f88efc25601b2a112e6f091dbd19eb Mon Sep 17 00:00:00 2001 From: Ajit Date: Mon, 30 Sep 2024 10:22:51 +0530 Subject: [PATCH 1/6] reduced usage of numpy and substituted built-in libraries --- haystack/testing/test_utils.py | 3 --- haystack/utils/expit.py | 4 ++-- test/components/embedders/test_openai_document_embedder.py | 5 +++-- .../test_sentence_transformers_document_embedder.py | 6 ++++-- .../embedders/test_sentence_transformers_text_embedder.py | 6 ++++-- test/components/evaluators/test_faithfulness_evaluator.py | 1 - 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/haystack/testing/test_utils.py b/haystack/testing/test_utils.py index cbb3eb15c0..247bb2e3b6 100644 --- a/haystack/testing/test_utils.py +++ b/haystack/testing/test_utils.py @@ -5,8 +5,6 @@ import os import random -import numpy as np - from haystack import logging logger = logging.getLogger(__name__) @@ -23,7 +21,6 @@ def set_all_seeds(seed: int, deterministic_cudnn: bool = False) -> None: :param deterministic_cudnn: Enable for full reproducibility when using CUDA. Caution: might slow down training. """ random.seed(seed) - np.random.seed(seed) os.environ["PYTHONHASHSEED"] = str(seed) try: diff --git a/haystack/utils/expit.py b/haystack/utils/expit.py index ea84944c37..b61b60305f 100644 --- a/haystack/utils/expit.py +++ b/haystack/utils/expit.py @@ -2,9 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 -import numpy as np +from math import exp def expit(x: float) -> float: """Compute logistic sigmoid function. Maps input values to a range between 0 and 1""" - return 1 / (1 + np.exp(-x)) + return 1 / (1 + exp(-x)) diff --git a/test/components/embedders/test_openai_document_embedder.py b/test/components/embedders/test_openai_document_embedder.py index f64a3aca12..89ce62a929 100644 --- a/test/components/embedders/test_openai_document_embedder.py +++ b/test/components/embedders/test_openai_document_embedder.py @@ -5,7 +5,7 @@ from typing import List from haystack.utils.auth import Secret -import numpy as np +import random import pytest from haystack import Document @@ -16,7 +16,8 @@ def mock_openai_response(input: List[str], model: str = "text-embedding-ada-002" dict_response = { "object": "list", "data": [ - {"object": "embedding", "index": i, "embedding": np.random.rand(1536).tolist()} for i in range(len(input)) + {"object": "embedding", "index": i, "embedding": [random.random() for _ in range(1536)]} + for i in range(len(input)) ], "model": model, "usage": {"prompt_tokens": 4, "total_tokens": 4}, diff --git a/test/components/embedders/test_sentence_transformers_document_embedder.py b/test/components/embedders/test_sentence_transformers_document_embedder.py index a5e6af8278..6068419aae 100644 --- a/test/components/embedders/test_sentence_transformers_document_embedder.py +++ b/test/components/embedders/test_sentence_transformers_document_embedder.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from unittest.mock import MagicMock, patch -import numpy as np +import random import pytest import torch @@ -257,7 +257,9 @@ def test_warmup_doesnt_reload(self, mocked_factory): def test_run(self): embedder = SentenceTransformersDocumentEmbedder(model="model") embedder.embedding_backend = MagicMock() - embedder.embedding_backend.embed = lambda x, **kwargs: np.random.rand(len(x), 16).tolist() + embedder.embedding_backend.embed = lambda x, **kwargs: [ + [random.random() for _ in range(16)] for _ in range(len(x)) + ] documents = [Document(content=f"document number {i}") for i in range(5)] diff --git a/test/components/embedders/test_sentence_transformers_text_embedder.py b/test/components/embedders/test_sentence_transformers_text_embedder.py index 2f043de237..0fecd634f0 100644 --- a/test/components/embedders/test_sentence_transformers_text_embedder.py +++ b/test/components/embedders/test_sentence_transformers_text_embedder.py @@ -4,7 +4,7 @@ from unittest.mock import MagicMock, patch import torch -import numpy as np +import random import pytest from haystack.components.embedders.sentence_transformers_text_embedder import SentenceTransformersTextEmbedder @@ -233,7 +233,9 @@ def test_warmup_doesnt_reload(self, mocked_factory): def test_run(self): embedder = SentenceTransformersTextEmbedder(model="model") embedder.embedding_backend = MagicMock() - embedder.embedding_backend.embed = lambda x, **kwargs: np.random.rand(len(x), 16).tolist() + embedder.embedding_backend.embed = lambda x, **kwargs: [ + [random.random() for _ in range(16)] for _ in range(len(x)) + ] text = "a nice text to embed" diff --git a/test/components/evaluators/test_faithfulness_evaluator.py b/test/components/evaluators/test_faithfulness_evaluator.py index 1150099e02..de92388ece 100644 --- a/test/components/evaluators/test_faithfulness_evaluator.py +++ b/test/components/evaluators/test_faithfulness_evaluator.py @@ -5,7 +5,6 @@ import math from typing import List -import numpy as np import pytest from haystack import Pipeline From 9dbdaf2adc5640f41a0e1923e64c47b5937f830b Mon Sep 17 00:00:00 2001 From: Ajit Date: Mon, 30 Sep 2024 10:30:21 +0530 Subject: [PATCH 2/6] added release note --- .../enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 releasenotes/notes/enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml diff --git a/releasenotes/notes/enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml b/releasenotes/notes/enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml new file mode 100644 index 0000000000..b9dc7020fc --- /dev/null +++ b/releasenotes/notes/enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml @@ -0,0 +1,4 @@ +--- +enhancements: + - | + Reduced numpy dependency by using built-in libraries like math and random From 7d980611a68c58fb15e542d5eb9ea2a23a7528f9 Mon Sep 17 00:00:00 2001 From: Ajit Date: Thu, 3 Oct 2024 10:37:33 +0530 Subject: [PATCH 3/6] edited expit function to support both float as well as list (this case was giving error CI) --- haystack/utils/expit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/haystack/utils/expit.py b/haystack/utils/expit.py index b61b60305f..2c293de036 100644 --- a/haystack/utils/expit.py +++ b/haystack/utils/expit.py @@ -7,4 +7,5 @@ def expit(x: float) -> float: """Compute logistic sigmoid function. Maps input values to a range between 0 and 1""" - return 1 / (1 + exp(-x)) + sigmoid = lambda v: 1 / (1 + exp(-v)) + return list(map(sigmoid, x)) if isinstance(x, list) else sigmoid(x) From 92dbe67e6983ad9a9ae50dba56bce917be971826 Mon Sep 17 00:00:00 2001 From: Ajit Date: Thu, 3 Oct 2024 10:44:31 +0530 Subject: [PATCH 4/6] revert code , numpy can't be removed here --- haystack/utils/expit.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/haystack/utils/expit.py b/haystack/utils/expit.py index 2c293de036..ea84944c37 100644 --- a/haystack/utils/expit.py +++ b/haystack/utils/expit.py @@ -2,10 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 -from math import exp +import numpy as np def expit(x: float) -> float: """Compute logistic sigmoid function. Maps input values to a range between 0 and 1""" - sigmoid = lambda v: 1 / (1 + exp(-v)) - return list(map(sigmoid, x)) if isinstance(x, list) else sigmoid(x) + return 1 / (1 + np.exp(-x)) From decfe0316a99e1f7ce306809e7afe041808098e4 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Fri, 18 Oct 2024 15:21:19 +0200 Subject: [PATCH 5/6] more cleaning --- .../backends/sentence_transformers_backend.py | 6 ++---- haystack/utils/expit.py | 12 ++++++++---- .../test_hugging_face_api_document_embedder.py | 4 ++-- .../embedders/test_hugging_face_api_text_embedder.py | 4 ++-- .../retrievers/test_in_memory_embedding_retriever.py | 5 ++--- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/haystack/components/embedders/backends/sentence_transformers_backend.py b/haystack/components/embedders/backends/sentence_transformers_backend.py index cff9135c86..e3550183e9 100644 --- a/haystack/components/embedders/backends/sentence_transformers_backend.py +++ b/haystack/components/embedders/backends/sentence_transformers_backend.py @@ -2,9 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Dict, List, Optional, cast - -import numpy as np +from typing import Any, Dict, List, Optional from haystack.lazy_imports import LazyImport from haystack.utils.auth import Secret @@ -78,5 +76,5 @@ def __init__( ) def embed(self, data: List[str], **kwargs) -> List[List[float]]: - embeddings = cast(np.ndarray, self.model.encode(data, **kwargs)).tolist() + embeddings = self.model.encode(data, **kwargs).tolist() return embeddings diff --git a/haystack/utils/expit.py b/haystack/utils/expit.py index ea84944c37..2f29ce99bd 100644 --- a/haystack/utils/expit.py +++ b/haystack/utils/expit.py @@ -2,9 +2,13 @@ # # SPDX-License-Identifier: Apache-2.0 -import numpy as np +from numpy import exp -def expit(x: float) -> float: - """Compute logistic sigmoid function. Maps input values to a range between 0 and 1""" - return 1 / (1 + np.exp(-x)) +def expit(x) -> float: + """ + Compute logistic sigmoid function. Maps input values to a range between 0 and 1 + + :param x: input value. Can be a scalar or a numpy array. + """ + return 1 / (1 + exp(-x)) diff --git a/test/components/embedders/test_hugging_face_api_document_embedder.py b/test/components/embedders/test_hugging_face_api_document_embedder.py index a2fd67c621..b9332d5363 100644 --- a/test/components/embedders/test_hugging_face_api_document_embedder.py +++ b/test/components/embedders/test_hugging_face_api_document_embedder.py @@ -4,9 +4,9 @@ import os from unittest.mock import MagicMock, patch +import random import pytest from huggingface_hub.utils import RepositoryNotFoundError -from numpy import array, random from haystack.components.embedders import HuggingFaceAPIDocumentEmbedder from haystack.dataclasses import Document @@ -24,7 +24,7 @@ def mock_check_valid_model(): def mock_embedding_generation(json, **kwargs): - response = str(array([random.rand(384) for i in range(len(json["inputs"]))]).tolist()).encode() + response = str([[random.random() for _ in range(384)] for _ in range(len(json["inputs"]))]).encode() return response diff --git a/test/components/embedders/test_hugging_face_api_text_embedder.py b/test/components/embedders/test_hugging_face_api_text_embedder.py index 0031b6746e..6e699fca25 100644 --- a/test/components/embedders/test_hugging_face_api_text_embedder.py +++ b/test/components/embedders/test_hugging_face_api_text_embedder.py @@ -4,9 +4,9 @@ import os from unittest.mock import MagicMock, patch +import random import pytest from huggingface_hub.utils import RepositoryNotFoundError -from numpy import array, random from haystack.components.embedders import HuggingFaceAPITextEmbedder from haystack.utils.auth import Secret @@ -22,7 +22,7 @@ def mock_check_valid_model(): def mock_embedding_generation(json, **kwargs): - response = str(array([random.rand(384) for i in range(len(json["inputs"]))]).tolist()).encode() + response = str([[random.random() for _ in range(384)] for _ in range(len(json["inputs"]))]).encode() return response diff --git a/test/components/retrievers/test_in_memory_embedding_retriever.py b/test/components/retrievers/test_in_memory_embedding_retriever.py index 7fe8387d68..366fd17b32 100644 --- a/test/components/retrievers/test_in_memory_embedding_retriever.py +++ b/test/components/retrievers/test_in_memory_embedding_retriever.py @@ -4,7 +4,6 @@ from typing import Dict, Any import pytest -import numpy as np from haystack import Pipeline, DeserializationError from haystack.document_stores.types import FilterPolicy @@ -135,7 +134,7 @@ def test_valid_run(self): assert "documents" in result assert len(result["documents"]) == top_k - assert np.array_equal(result["documents"][0].embedding, [1.0, 1.0, 1.0, 1.0]) + assert result["documents"][0].embedding == [1.0, 1.0, 1.0, 1.0] def test_invalid_run_wrong_store_type(self): SomeOtherDocumentStore = document_store_class("SomeOtherDocumentStore") @@ -165,4 +164,4 @@ def test_run_with_pipeline(self): results_docs = result["retriever"]["documents"] assert results_docs assert len(results_docs) == top_k - assert np.array_equal(results_docs[0].embedding, [1.0, 1.0, 1.0, 1.0]) + assert results_docs[0].embedding == [1.0, 1.0, 1.0, 1.0] From 574b6187c12c0293a4dd2c01562ef5cf8ec720ec Mon Sep 17 00:00:00 2001 From: anakin87 Date: Fri, 18 Oct 2024 15:25:05 +0200 Subject: [PATCH 6/6] fix relnote --- .../enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/releasenotes/notes/enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml b/releasenotes/notes/enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml index b9dc7020fc..cc72e68db6 100644 --- a/releasenotes/notes/enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml +++ b/releasenotes/notes/enhance-numpy-dependency-reduced-f8a8afa08be098d3.yaml @@ -1,4 +1,4 @@ --- enhancements: - | - Reduced numpy dependency by using built-in libraries like math and random + Reduced numpy usage to speed up imports.