Skip to content

Commit

Permalink
Docs: Update docstrings lg (#6260)
Browse files Browse the repository at this point in the history
* Update docstrings lg

* Update test_in_memory_bm25_retriever.py

* Update test_in_memory_embedding_retriever.py

---------

Co-authored-by: Massimiliano Pippi <mpippi@gmail.com>
  • Loading branch information
agnieszka-m and masci authored Nov 9, 2023
1 parent 3be6ec7 commit 1046beb
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 22 deletions.
22 changes: 12 additions & 10 deletions haystack/preview/components/retrievers/in_memory_bm25_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
@component
class InMemoryBM25Retriever:
"""
A component for retrieving documents from a InMemoryDocumentStore using the BM25 algorithm.
Uses the BM25 algorithm to retrieve documents from the InMemoryDocumentStore.
Needs to be connected to a InMemoryDocumentStore to run.
Needs to be connected to the InMemoryDocumentStore to run.
"""

def __init__(
Expand All @@ -20,22 +20,23 @@ def __init__(
scale_score: bool = True,
):
"""
Create a InMemoryBM25Retriever component.
Create the InMemoryBM25Retriever component.
:param document_store: An instance of InMemoryDocumentStore.
:param filters: A dictionary with filters to narrow down the search space. Default is None.
:param top_k: The maximum number of documents to retrieve. Default is 10.
:param scale_score: Whether to scale the BM25 score or not. Default is True.
:param filters: A dictionary with filters to narrow down the search space. Defaults to `None`.
:param top_k: The maximum number of documents to retrieve. Defaults to `10`.
:param scale_score: Scales the BM25 score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
Defaults to `True`.
:raises ValueError: If the specified top_k is not > 0.
:raises ValueError: If the specified `top_k` is not > 0.
"""
if not isinstance(document_store, InMemoryDocumentStore):
raise ValueError("document_store must be an instance of InMemoryDocumentStore")

self.document_store = document_store

if top_k <= 0:
raise ValueError(f"top_k must be > 0, but got {top_k}")
raise ValueError(f"top_k must be greater than 0. Currently, the top_k is {top_k}")

self.filters = filters
self.top_k = top_k
Expand Down Expand Up @@ -85,10 +86,11 @@ def run(
"""
Run the InMemoryBM25Retriever on the given input data.
:param query: The query string for the retriever.
:param query: The query string for the Retriever.
:param filters: A dictionary with filters to narrow down the search space.
:param top_k: The maximum number of documents to return.
:param scale_score: Whether to scale the BM25 scores or not.
:param scale_score: Scales the BM25 score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
Defaults to `True`.
:return: The retrieved documents.
:raises ValueError: If the specified DocumentStore is not found or is not a InMemoryDocumentStore instance.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
@component
class InMemoryEmbeddingRetriever:
"""
A component for retrieving documents from a InMemoryDocumentStore using a vector similarity metric.
Uses a vector similarity metric to retrieve documents from the InMemoryDocumentStore.
Needs to be connected to a InMemoryDocumentStore to run.
Needs to be connected to the InMemoryDocumentStore to run.
"""

def __init__(
Expand All @@ -21,13 +21,14 @@ def __init__(
return_embedding: bool = False,
):
"""
Create a InMemoryEmbeddingRetriever component.
Create the InMemoryEmbeddingRetriever component.
:param document_store: An instance of InMemoryDocumentStore.
:param filters: A dictionary with filters to narrow down the search space. Default is None.
:param top_k: The maximum number of documents to retrieve. Default is 10.
:param scale_score: Whether to scale the scores of the retrieved documents or not. Default is True.
:param return_embedding: Whether to return the embedding of the retrieved Documents. Default is False.
:param filters: A dictionary with filters to narrow down the search space. Defaults to `None`.
:param top_k: The maximum number of documents to retrieve. Defaults to `10`.
:param scale_score: Scales the BM25 score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
Defaults to `True`.
:param return_embedding: Whether to return the embedding of the retrieved Documents. Default is `False`.
:raises ValueError: If the specified top_k is not > 0.
"""
Expand All @@ -37,7 +38,7 @@ def __init__(
self.document_store = document_store

if top_k <= 0:
raise ValueError(f"top_k must be > 0, but got {top_k}")
raise ValueError(f"top_k must be greater than 0. Currently, top_k is {top_k}")

self.filters = filters
self.top_k = top_k
Expand Down Expand Up @@ -97,11 +98,12 @@ def run(
:param query_embedding: Embedding of the query.
:param filters: A dictionary with filters to narrow down the search space.
:param top_k: The maximum number of documents to return.
:param scale_score: Whether to scale the scores of the retrieved documents or not.
:param scale_score: Scales the BM25 score to a unit interval in the range of 0 to 1, where 1 means extremely relevant. If set to `False`, uses raw similarity scores.
Defaults to `True`.
:param return_embedding: Whether to return the embedding of the retrieved Documents.
:return: The retrieved documents.
:raises ValueError: If the specified DocumentStore is not found or is not a InMemoryDocumentStore instance.
:raises ValueError: If the specified DocumentStore is not found or is not an InMemoryDocumentStore instance.
"""
if filters is None:
filters = self.filters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_init_with_parameters(self):

@pytest.mark.unit
def test_init_with_invalid_top_k_parameter(self):
with pytest.raises(ValueError, match="top_k must be > 0, but got -2"):
with pytest.raises(ValueError):
InMemoryBM25Retriever(InMemoryDocumentStore(), top_k=-2, scale_score=False)

@pytest.mark.unit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_init_with_parameters(self):

@pytest.mark.unit
def test_init_with_invalid_top_k_parameter(self):
with pytest.raises(ValueError, match="top_k must be > 0, but got -2"):
with pytest.raises(ValueError):
InMemoryEmbeddingRetriever(InMemoryDocumentStore(), top_k=-2, scale_score=False)

@pytest.mark.unit
Expand Down

0 comments on commit 1046beb

Please sign in to comment.