diff --git a/ci/docker-compose-azure.yml b/ci/docker-compose-azure.yml index 48e5531fa..c42b05d3c 100644 --- a/ci/docker-compose-azure.yml +++ b/ci/docker-compose-azure.yml @@ -10,7 +10,7 @@ services: - --scheme - http - --write-timeout=600s - image: semitechnologies/weaviate:1.21.0 + image: semitechnologies/weaviate:1.21.1 ports: - 8081:8081 restart: on-failure:0 diff --git a/ci/docker-compose-cluster.yml b/ci/docker-compose-cluster.yml index 247b7d945..abc2cbe01 100644 --- a/ci/docker-compose-cluster.yml +++ b/ci/docker-compose-cluster.yml @@ -2,7 +2,7 @@ version: '3.4' services: weaviate-node-1: - image: semitechnologies/weaviate:1.21.0 + image: semitechnologies/weaviate:1.21.1 restart: on-failure:0 ports: - "8087:8080" @@ -25,7 +25,7 @@ services: - '8080' - --scheme - http - image: semitechnologies/weaviate:1.21.0 + image: semitechnologies/weaviate:1.21.1 ports: - 8088:8080 - 6061:6060 diff --git a/ci/docker-compose-okta-cc.yml b/ci/docker-compose-okta-cc.yml index 6277392db..15d56668e 100644 --- a/ci/docker-compose-okta-cc.yml +++ b/ci/docker-compose-okta-cc.yml @@ -10,7 +10,7 @@ services: - --scheme - http - --write-timeout=600s - image: semitechnologies/weaviate:1.21.0 + image: semitechnologies/weaviate:1.21.1 ports: - 8082:8082 restart: on-failure:0 diff --git a/ci/docker-compose-okta-users.yml b/ci/docker-compose-okta-users.yml index 17888634b..2e5c8a5fa 100644 --- a/ci/docker-compose-okta-users.yml +++ b/ci/docker-compose-okta-users.yml @@ -10,7 +10,7 @@ services: - --scheme - http - --write-timeout=600s - image: semitechnologies/weaviate:1.21.0 + image: semitechnologies/weaviate:1.21.1 ports: - 8083:8083 restart: on-failure:0 diff --git a/ci/docker-compose-openai.yml b/ci/docker-compose-openai.yml index 26a23a47f..6eb0dbf3e 100644 --- a/ci/docker-compose-openai.yml +++ b/ci/docker-compose-openai.yml @@ -9,7 +9,7 @@ services: - '8086' - --scheme - http - image: semitechnologies/weaviate:1.21.0 + image: semitechnologies/weaviate:1.21.1 ports: - 8086:8086 restart: on-failure:0 diff --git a/ci/docker-compose-wcs.yml b/ci/docker-compose-wcs.yml index 1aa3034ee..efd1e57f5 100644 --- a/ci/docker-compose-wcs.yml +++ b/ci/docker-compose-wcs.yml @@ -10,7 +10,7 @@ services: - --scheme - http - --write-timeout=600s - image: semitechnologies/weaviate:1.21.0 + image: semitechnologies/weaviate:1.21.1 ports: - 8085:8085 restart: on-failure:0 diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index fd04b93de..504ddd0ab 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -10,7 +10,7 @@ services: - --scheme - http - --write-timeout=600s - image: semitechnologies/weaviate:1.21.0 + image: semitechnologies/weaviate:1.21.1 ports: - "8080:8080" - "50051:50051" diff --git a/integration/test_batch.py b/integration/test_batch.py index 07e457529..a135782a3 100644 --- a/integration/test_batch.py +++ b/integration/test_batch.py @@ -54,7 +54,10 @@ def client(): client.schema.create_class( { "class": "Test", - "properties": [{"name": "test", "dataType": ["Test"]}], + "properties": [ + {"name": "test", "dataType": ["Test"]}, + {"name": "name", "dataType": ["string"]}, + ], "vectorizer": "none", } ) @@ -79,6 +82,56 @@ def test_add_data_object(client: weaviate.Client, uuid: Optional[UUID], vector: assert has_batch_errors(response) is False, str(response) +def test_delete_objects(client: weaviate.Client): + with client.batch as batch: + batch.add_data_object(data_object={"name": "one"}, class_name="Test") + batch.add_data_object(data_object={"name": "two"}, class_name="Test") + batch.add_data_object(data_object={"name": "three"}, class_name="Test") + batch.add_data_object(data_object={"name": "four"}, class_name="Test") + batch.add_data_object(data_object={"name": "five"}, class_name="Test") + + with client.batch as batch: + batch.delete_objects( + "Test", + where={ + "path": ["name"], + "operator": "Equal", + "valueText": "one", + }, + ) + res = client.data_object.get() + names = [obj["properties"]["name"] for obj in res["objects"]] + assert "one" not in names + + with client.batch as batch: + batch.delete_objects( + "Test", + where={ + "path": ["name"], + "operator": "ContainsAny", + "valueTextArray": ["two", "three"], + }, + ) + res = client.data_object.get() + names = [obj["properties"]["name"] for obj in res["objects"]] + assert "two" not in names + assert "three" not in names + + with client.batch as batch: + batch.delete_objects( + "Test", + where={ + "path": ["name"], + "operator": "ContainsAll", + "valueTextArray": ["four", "five"], + }, + ) + res = client.data_object.get() + names = [obj["properties"]["name"] for obj in res["objects"]] + assert "four" in names + assert "five" in names + + @pytest.mark.parametrize("from_object_uuid", [uuid.uuid4(), str(uuid.uuid4()), uuid.uuid4().hex]) @pytest.mark.parametrize("to_object_uuid", [uuid.uuid4().hex, uuid.uuid4(), str(uuid.uuid4())]) @pytest.mark.parametrize("to_object_class_name", [None, "Test"]) diff --git a/integration/test_cluster.py b/integration/test_cluster.py index 3a51c7c2e..e60957948 100644 --- a/integration/test_cluster.py +++ b/integration/test_cluster.py @@ -4,8 +4,8 @@ import weaviate -GIT_HASH = "8172acb" -SERVER_VERSION = "1.21.0" +GIT_HASH = "5f2df4d" +SERVER_VERSION = "1.21.1" NODE_NAME = "node1" NUM_OBJECT = 10 diff --git a/integration/test_graphql.py b/integration/test_graphql.py index 8a51839fe..760e844ca 100644 --- a/integration/test_graphql.py +++ b/integration/test_graphql.py @@ -113,7 +113,7 @@ def test_get_data(client: weaviate.Client): def test_get_data_with_where_contains_any(client: weaviate.Client): """Test GraphQL's Get clause with where filter.""" - where_filter = {"path": ["size"], "operator": "ContainsAny", "valueIntList": [5]} + where_filter = {"path": ["size"], "operator": "ContainsAny", "valueIntArray": [5]} result = client.query.get("Ship", ["name", "size"]).with_where(where_filter).do() objects = get_objects_from_result(result) assert len(objects) == 1 and objects[0]["name"] == "HMS British Name" @@ -133,7 +133,7 @@ def test_get_data_with_where_contains_all( where_filter = { "path": ["description"], "operator": "ContainsAll", - "valueStringList": value_string_list, + "valueStringArray": value_string_list, } result = client.query.get("Ship", ["name"]).with_where(where_filter).do() objects = get_objects_from_result(result) diff --git a/test/data/test_crud_data.py b/test/data/test_crud_data.py index 75c8a444d..95a65cfba 100644 --- a/test/data/test_crud_data.py +++ b/test/data/test_crud_data.py @@ -455,11 +455,13 @@ def test_get_by_id(self): mock_get = Mock(return_value="Test") data_object.get = mock_get - data_object.get_by_id(uuid="UUID", additional_properties=["Test", "list"], with_vector=True) + data_object.get_by_id( + uuid="UUID", additional_properties=["Test", "Array"], with_vector=True + ) mock_get.assert_called_with( uuid="UUID", class_name=None, - additional_properties=["Test", "list"], + additional_properties=["Test", "Array"], with_vector=True, node_name=None, consistency_level=None, diff --git a/test/gql/test_filter.py b/test/gql/test_filter.py index 543328b9f..eb8c67be7 100644 --- a/test/gql/test_filter.py +++ b/test/gql/test_filter.py @@ -733,7 +733,7 @@ def test___str__(self): test_filter = { "path": ["name"], "operator": "ContainsAny", - "valueTextList": ["A", "B\n"], + "valueTextArray": ["A", "B\n"], } result = str(Where(test_filter)) self.assertEqual( @@ -743,7 +743,7 @@ def test___str__(self): test_filter = { "path": ["name"], "operator": "ContainsAll", - "valueStringList": ["A", '"B"'], + "valueStringArray": ["A", '"B"'], } result = str(Where(test_filter)) self.assertEqual( @@ -751,7 +751,7 @@ def test___str__(self): str(result), ) - test_filter = {"path": ["name"], "operator": "ContainsAny", "valueIntList": [1, 2]} + test_filter = {"path": ["name"], "operator": "ContainsAny", "valueIntArray": [1, 2]} result = str(Where(test_filter)) self.assertEqual( 'where: {path: ["name"] operator: ContainsAny valueInt: [1, 2]} ', str(result) @@ -760,20 +760,20 @@ def test___str__(self): test_filter = { "path": ["name"], "operator": "ContainsAny", - "valueStringList": "A", + "valueStringArray": "A", } with self.assertRaises(TypeError) as error: str(Where(test_filter)) - check_error_message(self, error, value_is_not_list_err("A", "valueStringList")) + check_error_message(self, error, value_is_not_list_err("A", "valueStringArray")) test_filter = { "path": ["name"], "operator": "ContainsAll", - "valueTextList": "A", + "valueTextArray": "A", } with self.assertRaises(TypeError) as error: str(Where(test_filter)) - check_error_message(self, error, value_is_not_list_err("A", "valueTextList")) + check_error_message(self, error, value_is_not_list_err("A", "valueTextArray")) test_filter = { "path": ["name"], diff --git a/weaviate/batch/crud_batch.py b/weaviate/batch/crud_batch.py index 8aa810359..448325848 100644 --- a/weaviate/batch/crud_batch.py +++ b/weaviate/batch/crud_batch.py @@ -18,6 +18,7 @@ from weaviate.connect import Connection from weaviate.data.replication import ConsistencyLevel +from weaviate.gql.filter import _find_value_type from weaviate.types import UUID from .requests import BatchRequest, ObjectsBatchRequest, ReferenceBatchRequest, BatchResponse from ..cluster import Cluster @@ -1308,7 +1309,7 @@ def delete_objects( payload = { "match": { "class": class_name, - "where": where, + "where": _clean_delete_objects_where(where), }, "output": output, "dryRun": dry_run, @@ -1795,3 +1796,53 @@ def _batch_create_error_handler(retry: int, max_retries: int, error: Exception) flush=True, ) time.sleep((retry + 1) * 2) + + +def _clean_delete_objects_where(where: dict) -> dict: + """Converts the Python-defined where filter type into the Weaviate-defined + where filter type used in the Batch REST request endpoint. + + Parameters + ---------- + where : dict + The Python-defined where filter. + + Returns + ------- + dict + The Weaviate-defined where filter. + """ + py_value_type = _find_value_type(where) + weaviate_value_type = _convert_value_type(py_value_type) + where[weaviate_value_type] = where.pop(py_value_type) + return where + + +def _convert_value_type(_type: str) -> str: + """Converts the Python-defined where filter type into the Weaviate-defined + where filter type used in the Batch REST request endpoint. + + Parameters + ---------- + _type : str + The Python-defined where filter type. + + Returns + ------- + str + The Weaviate-defined where filter type. + """ + if _type == "valueTextList": + return "valueTextArray" + elif _type == "valueStringList": + return "valueStringArray" + elif _type == "valueIntList": + return "valueIntArray" + elif _type == "valueNumberList": + return "valueNumberArray" + elif _type == "valueBooleanList": + return "valueBooleanList" + elif _type == "valueDateList": + return "valueDateArray" + else: + return _type diff --git a/weaviate/gql/filter.py b/weaviate/gql/filter.py index d2d9f3a93..5240303fa 100644 --- a/weaviate/gql/filter.py +++ b/weaviate/gql/filter.py @@ -17,6 +17,12 @@ from weaviate.util import get_vector, _sanitize_str VALUE_LIST_TYPES = { + "valueStringArray", + "valueTextArray", + "valueIntArray", + "valueNumberArray", + "valueBooleanArray", + "valueDateArray", "valueStringList", "valueTextList", "valueIntList", @@ -839,23 +845,23 @@ def __str__(self): if self.value_type in ["valueInt", "valueNumber"]: _check_is_not_list(self.value, self.value_type) gql += f"{self.value}}}" - elif self.value_type in ["valueIntList", "valueNumberList"]: + elif self.value_type in ["valueIntArray", "valueNumberArray"]: _check_is_list(self.value, self.value_type) gql += f"{self.value}}}" elif self.value_type in ["valueText", "valueString"]: _check_is_not_list(self.value, self.value_type) gql += f"{_sanitize_str(self.value)}}}" - elif self.value_type in ["valueTextList", "valueStringList"]: + elif self.value_type in ["valueTextArray", "valueStringArray"]: _check_is_list(self.value, self.value_type) val = [_sanitize_str(v) for v in self.value] gql += f"{_render_list(val)}}}" elif self.value_type == "valueBoolean": _check_is_not_list(self.value, self.value_type) gql += f"{_bool_to_str(self.value)}}}" - elif self.value_type == "valueBooleanList": + elif self.value_type == "valueBooleanArray": _check_is_list(self.value, self.value_type) gql += f"{_render_list(self.value)}}}" - elif self.value_type == "valueDateList": + elif self.value_type == "valueDateArray": _check_is_list(self.value, self.value_type) gql += f"{_render_list(self.value)}}}" elif self.value_type == "valueGeoRange": @@ -875,29 +881,31 @@ def __str__(self): def _convert_value_type(_type: str) -> str: - """Convert the value type to match `json` formatting required by Weaviate. + """Convert the value type to match `json` formatting required by the Weaviate-defined + GraphQL endpoints. NOTE: This is crucially different to the Batch REST endpoints wherein + the where filter is also used. Parameters ---------- _type : str - The type to be converted. + The Python-defined type to be converted. Returns ------- str - The string interpretation of the type in `json` format. + The string interpretation of the type in Weaviate-defined `json` format. """ - if _type == "valueTextList": + if _type == "valueTextArray" or _type == "valueTextList": return "valueText" - elif _type == "valueStringList": + elif _type == "valueStringArray" or _type == "valueStringList": return "valueString" - elif _type == "valueIntList": + elif _type == "valueIntArray" or _type == "valueIntList": return "valueInt" - elif _type == "valueNumberList": + elif _type == "valueNumberArray" or _type == "valueNumberList": return "valueNumber" - elif _type == "valueBooleanList": + elif _type == "valueBooleanArray" or _type == "valueBooleanList": return "valueBoolean" - elif _type == "valueDateList": + elif _type == "valueDateArray" or _type == "valueDateList": return "valueDate" else: return _type