Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move filter_documents tests with not equal filters from DocumentStoreBaseTests to separate class #6341

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
258 changes: 208 additions & 50 deletions haystack/preview/testing/document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,38 +290,63 @@ def test_eq_filter_embedding(self, docstore: DocumentStore, filterable_docs: Lis
assert result == [doc for doc in filterable_docs if embedding == doc.embedding]


class LegacyFilterDocumentsTest(LegacyFilterDocumentsInvalidFiltersTest, LegacyFilterDocumentsEqualTest):
class LegacyFilterDocumentsNotEqualTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using different types of legacy filters
Utility class to test a Document Store `filter_documents` method using explicit '$ne' legacy filters

To use it create a custom test class and override the `docstore` fixture to return your Document Store.
Example usage:

```python
class MyDocumentStoreTest(LegacyFilterDocumentsTest):
class MyDocumentStoreTest(LegacyFilterDocumentsNotEqualTest):
@pytest.fixture
def docstore(self):
return MyDocumentStore()
```
"""

@pytest.mark.unit
def test_no_filter_empty(self, docstore: DocumentStore):
assert docstore.filter_documents() == []
assert docstore.filter_documents(filters={}) == []
def test_ne_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": {"$ne": "100"}})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") != "100"]

@pytest.mark.unit
def test_no_filter_not_empty(self, docstore: DocumentStore):
docs = [Document(content="test doc")]
docstore.write_documents(docs)
assert docstore.filter_documents() == docs
assert docstore.filter_documents(filters={}) == docs
def test_ne_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"dataframe": {"$ne": pd.DataFrame([1])}})
assert result == [
doc
for doc in filterable_docs
if not isinstance(doc.dataframe, pd.DataFrame) or not doc.dataframe.equals(pd.DataFrame([1]))
]

@pytest.mark.unit
def test_ne_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
embedding = np.zeros([768, 1]).astype(np.float32)
result = docstore.filter_documents(filters={"embedding": {"$ne": embedding}})
assert result == [
doc
for doc in filterable_docs
if not isinstance(doc.dataframe, np.ndarray) or not np.array_equal(embedding, doc.embedding) # type: ignore
]


class DocumentStoreBaseTests(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, LegacyFilterDocumentsTest):
@pytest.fixture
def docstore(self) -> DocumentStore:
raise NotImplementedError()
class LegacyFilterDocumentsInTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using implicit and explicit '$in' legacy filters

To use it create a custom test class and override the `docstore` fixture to return your Document Store.
Example usage:

```python
class MyDocumentStoreTest(LegacyFilterDocumentsInTest):
@pytest.fixture
def docstore(self):
return MyDocumentStore()
```
"""

@pytest.mark.unit
def test_filter_simple_list_single_element(self, docstore: DocumentStore, filterable_docs: List[Document]):
Expand Down Expand Up @@ -386,32 +411,21 @@ def test_in_filter_embedding(self, docstore: DocumentStore, filterable_docs: Lis
doc for doc in filterable_docs if (embedding_zero == doc.embedding or embedding_one == doc.embedding)
]

@pytest.mark.unit
def test_ne_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"page": {"$ne": "100"}})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") != "100"]

@pytest.mark.unit
def test_ne_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
result = docstore.filter_documents(filters={"dataframe": {"$ne": pd.DataFrame([1])}})
assert result == [
doc
for doc in filterable_docs
if not isinstance(doc.dataframe, pd.DataFrame) or not doc.dataframe.equals(pd.DataFrame([1]))
]
class LegacyFilterDocumentsNotInTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using explicit '$nin' legacy filters

@pytest.mark.unit
def test_ne_filter_embedding(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
embedding = np.zeros([768, 1]).astype(np.float32)
result = docstore.filter_documents(filters={"embedding": {"$ne": embedding}})
assert result == [
doc
for doc in filterable_docs
if not isinstance(doc.dataframe, np.ndarray) or not np.array_equal(embedding, doc.embedding) # type: ignore
]
To use it create a custom test class and override the `docstore` fixture to return your Document Store.
Example usage:

```python
class MyDocumentStoreTest(LegacyFilterDocumentsNotInTest):
@pytest.fixture
def docstore(self):
return MyDocumentStore()
```
"""

@pytest.mark.unit
def test_nin_filter_table(self, docstore: DocumentStore, filterable_docs: List[Document]):
Expand Down Expand Up @@ -442,6 +456,22 @@ def test_nin_filter(self, docstore: DocumentStore, filterable_docs: List[Documen
result = docstore.filter_documents(filters={"page": {"$nin": ["100", "123", "n.a."]}})
assert result == [doc for doc in filterable_docs if doc.meta.get("page") not in ["100", "123"]]


class LegacyFilterDocumentsGreaterThanTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using explicit '$gt' legacy filters

To use it create a custom test class and override the `docstore` fixture to return your Document Store.
Example usage:

```python
class MyDocumentStoreTest(LegacyFilterDocumentsGreaterThanTest):
@pytest.fixture
def docstore(self):
return MyDocumentStore()
```
"""

@pytest.mark.unit
def test_gt_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
Expand All @@ -467,6 +497,22 @@ def test_gt_filter_embedding(self, docstore: DocumentStore, filterable_docs: Lis
with pytest.raises(FilterError):
docstore.filter_documents(filters={"embedding": {"$gt": embedding_zeros}})


class LegacyFilterDocumentsGreaterThanEqualTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using explicit '$gte' legacy filters

To use it create a custom test class and override the `docstore` fixture to return your Document Store.
Example usage:

```python
class MyDocumentStoreTest(LegacyFilterDocumentsGreaterThanEqualTest):
@pytest.fixture
def docstore(self):
return MyDocumentStore()
```
"""

@pytest.mark.unit
def test_gte_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
Expand All @@ -492,6 +538,22 @@ def test_gte_filter_embedding(self, docstore: DocumentStore, filterable_docs: Li
with pytest.raises(FilterError):
docstore.filter_documents(filters={"embedding": {"$gte": embedding_zeros}})


class LegacyFilterDocumentsLessThanTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using explicit '$lt' legacy filters

To use it create a custom test class and override the `docstore` fixture to return your Document Store.
Example usage:

```python
class MyDocumentStoreTest(LegacyFilterDocumentsLessThanTest):
@pytest.fixture
def docstore(self):
return MyDocumentStore()
```
"""

@pytest.mark.unit
def test_lt_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
Expand All @@ -517,6 +579,22 @@ def test_lt_filter_embedding(self, docstore: DocumentStore, filterable_docs: Lis
with pytest.raises(FilterError):
docstore.filter_documents(filters={"embedding": {"$lt": embedding_ones}})


class LegacyFilterDocumentsLessThanEqualTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using explicit '$lte' legacy filters

To use it create a custom test class and override the `docstore` fixture to return your Document Store.
Example usage:

```python
class MyDocumentStoreTest(LegacyFilterDocumentsLessThanEqualTest):
@pytest.fixture
def docstore(self):
return MyDocumentStore()
```
"""

@pytest.mark.unit
def test_lte_filter(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
Expand All @@ -542,6 +620,33 @@ def test_lte_filter_embedding(self, docstore: DocumentStore, filterable_docs: Li
with pytest.raises(FilterError):
docstore.filter_documents(filters={"embedding": {"$lte": embedding_ones}})


class LegacyFilterDocumentsSimpleLogicalTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using logical '$and', '$or' and '$not' legacy filters

To use it create a custom test class and override the `docstore` fixture to return your Document Store.
Example usage:

```python
class MyDocumentStoreTest(LegacyFilterDocumentsSimpleLogicalTest):
@pytest.fixture
def docstore(self):
return MyDocumentStore()
```
"""

@pytest.mark.unit
def test_filter_simple_or(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
filters = {"$or": {"name": {"$in": ["name_0", "name_1"]}, "number": {"$lt": 1.0}}}
result = docstore.filter_documents(filters=filters)
assert result == [
doc
for doc in filterable_docs
if (("number" in doc.meta and doc.meta["number"] < 1) or doc.meta.get("name") in ["name_0", "name_1"])
]

@pytest.mark.unit
def test_filter_simple_implicit_and_with_multi_key_dict(
self, docstore: DocumentStore, filterable_docs: List[Document]
Expand Down Expand Up @@ -582,6 +687,22 @@ def test_filter_simple_implicit_and(self, docstore: DocumentStore, filterable_do
if "number" in doc.meta and doc.meta["number"] <= 2.0 and doc.meta["number"] >= 0.0
]


class LegacyFilterDocumentsNestedLogicalTest(FilterableDocsFixtureMixin):
"""
Utility class to test a Document Store `filter_documents` method using multiple nested logical '$and', '$or' and '$not' legacy filters

To use it create a custom test class and override the `docstore` fixture to return your Document Store.
Example usage:

```python
class MyDocumentStoreTest(LegacyFilterDocumentsNestedLogicalTest):
@pytest.fixture
def docstore(self):
return MyDocumentStore()
```
"""

@pytest.mark.unit
def test_filter_nested_explicit_and(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
Expand Down Expand Up @@ -614,17 +735,6 @@ def test_filter_nested_implicit_and(self, docstore: DocumentStore, filterable_do
)
]

@pytest.mark.unit
def test_filter_simple_or(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
filters = {"$or": {"name": {"$in": ["name_0", "name_1"]}, "number": {"$lt": 1.0}}}
result = docstore.filter_documents(filters=filters)
assert result == [
doc
for doc in filterable_docs
if (("number" in doc.meta and doc.meta["number"] < 1) or doc.meta.get("name") in ["name_0", "name_1"])
]

@pytest.mark.unit
def test_filter_nested_or(self, docstore: DocumentStore, filterable_docs: List[Document]):
docstore.write_documents(filterable_docs)
Expand Down Expand Up @@ -711,3 +821,51 @@ def test_filter_nested_multiple_identical_operators_same_level(
or (doc.meta.get("chapter") in ["intro", "abstract"] and doc.meta.get("page") == "123")
)
]


class LegacyFilterDocumentsTest( # pylint: disable=too-many-ancestors
LegacyFilterDocumentsInvalidFiltersTest,
LegacyFilterDocumentsEqualTest,
LegacyFilterDocumentsNotEqualTest,
LegacyFilterDocumentsInTest,
LegacyFilterDocumentsNotInTest,
LegacyFilterDocumentsGreaterThanTest,
LegacyFilterDocumentsGreaterThanEqualTest,
LegacyFilterDocumentsLessThanTest,
LegacyFilterDocumentsLessThanEqualTest,
LegacyFilterDocumentsSimpleLogicalTest,
LegacyFilterDocumentsNestedLogicalTest,
):
"""
Utility class to test a Document Store `filter_documents` method using different types of legacy filters

To use it create a custom test class and override the `docstore` fixture to return your Document Store.
Example usage:

```python
class MyDocumentStoreTest(LegacyFilterDocumentsTest):
@pytest.fixture
def docstore(self):
return MyDocumentStore()
```
"""

@pytest.mark.unit
def test_no_filter_empty(self, docstore: DocumentStore):
assert docstore.filter_documents() == []
assert docstore.filter_documents(filters={}) == []

@pytest.mark.unit
def test_no_filter_not_empty(self, docstore: DocumentStore):
docs = [Document(content="test doc")]
docstore.write_documents(docs)
assert docstore.filter_documents() == docs
assert docstore.filter_documents(filters={}) == docs


class DocumentStoreBaseTests(
CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest, LegacyFilterDocumentsTest
): # pylint: disable=too-many-ancestors
@pytest.fixture
def docstore(self) -> DocumentStore:
raise NotImplementedError()
Loading