From 8fcfe103d99f8565022f6cd0d6eb6b170048c992 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 20 Nov 2023 14:51:30 +0100 Subject: [PATCH 1/3] Rework write_documents tests --- haystack/preview/testing/document_store.py | 61 ++++++++++++++-------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/haystack/preview/testing/document_store.py b/haystack/preview/testing/document_store.py index a66c380944..dd9c652d21 100644 --- a/haystack/preview/testing/document_store.py +++ b/haystack/preview/testing/document_store.py @@ -48,6 +48,8 @@ class WriteDocumentsTest: Utility class to test a Document Store `write_documents` method. To use it create a custom test class and override the `document_store` fixture to return your Document Store. + The Document Store `filter_documents` method must be at least partly implemented to return all stored Documents + for this tests to work correctly. Example usage: ```python @@ -59,44 +61,57 @@ def document_store(self): """ @pytest.mark.unit - def test_write(self, document_store: DocumentStore): + def test_write_documents(self, document_store: DocumentStore): + """ + Test write_documents() normal behaviour. + """ doc = Document(content="test doc") - document_store.write_documents([doc]) - assert document_store.filter_documents(filters={"id": doc.id}) == [doc] + assert document_store.write_documents([doc]) == 1 + assert document_store.filter_documents() == [doc] @pytest.mark.unit - def test_write_duplicate_fail(self, document_store: DocumentStore): + def test_write_documents_duplicate_fail(self, document_store: DocumentStore): + """ + Test write_documents() fails when trying to write Document with same id + using DuplicatePolicy.FAIL. + """ doc = Document(content="test doc") - document_store.write_documents([doc]) - with pytest.raises(DuplicateDocumentError, match=f"ID '{doc.id}' already exists."): + assert document_store.write_documents([doc]) == 1 + with pytest.raises(DuplicateDocumentError): document_store.write_documents(documents=[doc], policy=DuplicatePolicy.FAIL) - assert document_store.filter_documents(filters={"id": doc.id}) == [doc] + assert document_store.filter_documents() == [doc] @pytest.mark.unit - def test_write_duplicate_skip(self, document_store: DocumentStore): + def test_write_documents_duplicate_skip(self, document_store: DocumentStore): + """ + Test write_documents() skips Document when trying to write one with same id + using DuplicatePolicy.SKIP. + """ doc = Document(content="test doc") - document_store.write_documents([doc]) - document_store.write_documents(documents=[doc], policy=DuplicatePolicy.SKIP) - assert document_store.filter_documents(filters={"id": doc.id}) == [doc] + assert document_store.write_documents([doc]) == 1 + assert document_store.write_documents(documents=[doc], policy=DuplicatePolicy.SKIP) == 0 @pytest.mark.unit - def test_write_duplicate_overwrite(self, document_store: DocumentStore): - doc1 = Document(content="test doc 1") - doc2 = Document(content="test doc 2") - object.__setattr__(doc2, "id", doc1.id) # Make two docs with different content but same ID + def test_write_documents_duplicate_overwrite(self, document_store: DocumentStore): + """ + Test write_documents() overwrites stored Document when trying to write one with same id + using DuplicatePolicy.OVERWRITE. + """ + doc1 = Document(id="1", content="test doc 1") + doc2 = Document(id="1", content="test doc 2") - document_store.write_documents([doc2]) - assert document_store.filter_documents(filters={"id": doc1.id}) == [doc2] - document_store.write_documents(documents=[doc1], policy=DuplicatePolicy.OVERWRITE) - assert document_store.filter_documents(filters={"id": doc1.id}) == [doc1] + assert document_store.write_documents([doc2]) == 1 + assert document_store.filter_documents() == [doc2] + assert document_store.write_documents(documents=[doc1], policy=DuplicatePolicy.OVERWRITE) == 1 + assert document_store.filter_documents() == [doc1] @pytest.mark.unit - def test_write_not_docs(self, document_store: DocumentStore): + def test_write_documents_invalid_input(self, document_store: DocumentStore): + """ + Test write_documents() fails when providing unexpected input. + """ with pytest.raises(ValueError): document_store.write_documents(["not a document for sure"]) # type: ignore - - @pytest.mark.unit - def test_write_not_list(self, document_store: DocumentStore): with pytest.raises(ValueError): document_store.write_documents("not a list actually") # type: ignore From 44381f5481a6824efb076612ce266e28e79c4a92 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 20 Nov 2023 16:45:10 +0100 Subject: [PATCH 2/3] Rework delete_documents tests --- .../in_memory/document_store.py | 4 +-- haystack/preview/testing/document_store.py | 35 ++++++++++++------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/haystack/preview/document_stores/in_memory/document_store.py b/haystack/preview/document_stores/in_memory/document_store.py index 29510dd9f8..ff910e76d7 100644 --- a/haystack/preview/document_stores/in_memory/document_store.py +++ b/haystack/preview/document_stores/in_memory/document_store.py @@ -198,13 +198,11 @@ def write_documents(self, documents: List[Document], policy: DuplicatePolicy = D def delete_documents(self, document_ids: List[str]) -> None: """ Deletes all documents with matching document_ids from the DocumentStore. - Fails with `MissingDocumentError` if no document with this id is present in the DocumentStore. - :param object_ids: The object_ids to delete. """ for doc_id in document_ids: if doc_id not in self.storage.keys(): - raise MissingDocumentError(f"ID '{doc_id}' not found, cannot delete it.") + continue del self.storage[doc_id] def bm25_retrieval( diff --git a/haystack/preview/testing/document_store.py b/haystack/preview/testing/document_store.py index dd9c652d21..65a173a0e5 100644 --- a/haystack/preview/testing/document_store.py +++ b/haystack/preview/testing/document_store.py @@ -8,7 +8,7 @@ from haystack.preview.dataclasses import Document from haystack.preview.document_stores import DocumentStore, DuplicatePolicy -from haystack.preview.document_stores.errors import MissingDocumentError, DuplicateDocumentError +from haystack.preview.document_stores.errors import DuplicateDocumentError from haystack.preview.errors import FilterError @@ -121,6 +121,7 @@ class DeleteDocumentsTest: Utility class to test a Document Store `delete_documents` method. To use it create a custom test class and override the `document_store` fixture to return your Document Store. + The Document Store `write_documents` and `count_documents` methods must be implemented for this tests to work correctly. Example usage: ```python @@ -132,29 +133,37 @@ def document_store(self): """ @pytest.mark.unit - def test_delete_empty(self, document_store: DocumentStore): - with pytest.raises(MissingDocumentError): - document_store.delete_documents(["test"]) - - @pytest.mark.unit - def test_delete_not_empty(self, document_store: DocumentStore): + def test_delete_documents(self, document_store: DocumentStore): + """ + Test delete_documents() normal behaviour. + """ doc = Document(content="test doc") document_store.write_documents([doc]) + assert document_store.count_documents() == 1 document_store.delete_documents([doc.id]) + assert document_store.count_documents() == 0 - with pytest.raises(Exception): - assert document_store.filter_documents(filters={"id": doc.id}) + @pytest.mark.unit + def test_delete_documents_empty_document_store(self, document_store: DocumentStore): + """ + Test delete_documents() doesn't fail when called using an empty Document Store. + """ + document_store.delete_documents(["non_existing_id"]) @pytest.mark.unit - def test_delete_not_empty_nonexisting(self, document_store: DocumentStore): + def test_delete_documents_non_existing_document(self, document_store: DocumentStore): + """ + Test delete_documents() doesn't delete any Document when called with non existing id. + """ doc = Document(content="test doc") document_store.write_documents([doc]) + assert document_store.count_documents() == 1 - with pytest.raises(MissingDocumentError): - document_store.delete_documents(["non_existing"]) + document_store.delete_documents(["non_existing_id"]) - assert document_store.filter_documents(filters={"id": doc.id}) == [doc] + # No Document has been deleted + assert document_store.count_documents() == 1 class FilterableDocsFixtureMixin: From ea84e695a884aa7ce36e0a58cbeb492dcad47149 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Mon, 20 Nov 2023 17:22:16 +0100 Subject: [PATCH 3/3] Fix linting --- haystack/preview/document_stores/in_memory/document_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/preview/document_stores/in_memory/document_store.py b/haystack/preview/document_stores/in_memory/document_store.py index ff910e76d7..f52359760f 100644 --- a/haystack/preview/document_stores/in_memory/document_store.py +++ b/haystack/preview/document_stores/in_memory/document_store.py @@ -12,7 +12,7 @@ from haystack.preview.dataclasses import Document from haystack.preview.document_stores.protocols import DuplicatePolicy from haystack.preview.utils.filters import document_matches_filter -from haystack.preview.document_stores.errors import DuplicateDocumentError, MissingDocumentError, DocumentStoreError +from haystack.preview.document_stores.errors import DuplicateDocumentError, DocumentStoreError from haystack.preview.utils import expit logger = logging.getLogger(__name__)