diff --git a/integration/test_schema.py b/integration/test_schema.py index 12de6ae6f..560e20ebf 100644 --- a/integration/test_schema.py +++ b/integration/test_schema.py @@ -1,6 +1,7 @@ from typing import Optional import pytest +import requests import weaviate from weaviate import Tenant, TenantActivityStatus @@ -120,6 +121,19 @@ def test_class_tenants(client: weaviate.Client): assert len(tenants_get) == 1 +def test_update_schema_with_no_properties(client: weaviate.Client): + single_class = {"class": "NoProperties"} + + requests.post("http://localhost:8080/v1/schema", json=single_class) + assert client.schema.exists("NoProperties") + + client.schema.update_config("NoProperties", {"vectorIndexConfig": {"ef": 64}}) + assert client.schema.exists("NoProperties") + + client.schema.delete_class("NoProperties") + assert client.schema.exists("NoProperties") is False + + def test_class_tenants_activate_deactivate(client: weaviate.Client): class_name = "MultiTenancyActivateDeactivateSchemaTest" uncap_class_name = "multiTenancyActivateDeactivateSchemaTest" diff --git a/test/schema/properties/test_properties.py b/test/schema/properties/test_properties.py index 246cd2546..f742b0392 100644 --- a/test/schema/properties/test_properties.py +++ b/test/schema/properties/test_properties.py @@ -6,7 +6,6 @@ from test.util import mock_connection_func, check_error_message, check_startswith_error_message from weaviate.exceptions import ( UnexpectedStatusCodeException, - SchemaValidationException, ) from weaviate.schema.properties import Property @@ -21,18 +20,12 @@ def test_create(self): # invalid calls error_message = "Class name must be of type str but is " - check_property_error_message = 'Property does not contain "dataType"' requests_error_message = "Property was created properly." with self.assertRaises(TypeError) as error: prop.create(35, {}) check_error_message(self, error, error_message + str(int)) - # test if `check_property` is called in `create` - with self.assertRaises(SchemaValidationException) as error: - prop.create("Class", {}) - check_error_message(self, error, check_property_error_message) - prop = Property(mock_connection_func("post", side_effect=RequestsConnectionError("Test!"))) with self.assertRaises(RequestsConnectionError) as error: prop.create("Class", {"name": "test", "dataType": ["test_type"]}) diff --git a/test/schema/test_validate_schema.py b/test/schema/test_validate_schema.py deleted file mode 100644 index e753bf21c..000000000 --- a/test/schema/test_validate_schema.py +++ /dev/null @@ -1,352 +0,0 @@ -import unittest - -from test.util import check_error_message -from weaviate.exceptions import SchemaValidationException -from weaviate.schema.validate_schema import validate_schema, check_class, check_property - -valid_schema_with_all_properties = { - "classes": [ - { - "class": "Category", - "description": "Category an article is a type off", - "moduleConfig": {"text2vec-contextionary": {"vectorizeClassName": False}}, - "properties": [ - { - "dataType": ["string"], - "description": "category name", - "indexInverted": True, - "moduleConfig": {"text2vec-contextionary": {"vectorizePropertyName": False}}, - "name": "name", - } - ], - "vectorIndexType": "hnsw", - "vectorizer": "none", - "replicationConfig": { - "factor": 1, - }, - }, - { - "class": "Publication", - "description": "A publication with an online source", - "moduleConfig": {"text2vec-contextionary": {"vectorizeClassName": False}}, - "properties": [ - {"dataType": ["string"], "description": "Name of the publication", "name": "name"}, - { - "dataType": ["geoCoordinates"], - "description": "Geo location of the HQ", - "name": "headquartersGeoLocation", - }, - { - "dataType": ["Article"], - "description": "The articles this publication has", - "name": "hasArticles", - }, - { - "dataType": ["Article"], - "description": "Articles this author wrote", - "name": "wroteArticles", - }, - ], - "vectorIndexType": "hnsw", - "vectorizer": "none", - "replicationConfig": { - "factor": 1, - }, - }, - { - "class": "Author", - "description": "Normalised types", - "moduleConfig": {"text2vec-contextionary": {"vectorizeClassName": True}}, - "properties": [ - {"dataType": ["string"], "description": "Name of the author", "name": "name"}, - { - "dataType": ["Publication"], - "description": "The publication this author writes for", - "name": "writesFor", - }, - ], - "vectorIndexType": "hnsw", - "vectorizer": "none", - "replicationConfig": { - "factor": 1, - }, - }, - { - "class": "Article", - "description": "Normalised types", - "moduleConfig": {"text2vec-contextionary": {"vectorizeClassName": False}}, - "properties": [ - { - "dataType": ["string"], - "description": "title of the article", - "indexInverted": True, - "moduleConfig": {"text2vec-contextionary": {"vectorizePropertyName": False}}, - "name": "title", - }, - { - "dataType": ["string"], - "description": "url of the article", - "indexInverted": False, - "moduleConfig": {"text2vec-contextionary": {"vectorizePropertyName": False}}, - "name": "url", - }, - { - "dataType": ["text"], - "description": "summary of the article", - "indexInverted": True, - "moduleConfig": {"text2vec-contextionary": {"vectorizePropertyName": False}}, - "name": "summary", - }, - { - "dataType": ["date"], - "description": "date of publication of the article", - "name": "publicationDate", - }, - {"dataType": ["int"], "description": "Words in this article", "name": "wordCount"}, - { - "dataType": ["Author", "Publication"], - "description": "authors this article has", - "name": "hasAuthors", - }, - { - "dataType": ["Publication"], - "description": "publication this article is in", - "name": "inPublication", - }, - { - "dataType": ["Category"], - "description": "category this article is of", - "name": "ofCategory", - }, - { - "dataType": ["boolean"], - "description": "whether the article is currently accessible through the url", - "name": "isAccessible", - }, - ], - "vectorIndexType": "hnsw", - "vectorizer": "none", - "replicationConfig": { - "factor": 1, - }, - }, - ] -} - - -class TestSchemaValidation(unittest.TestCase): - def test_validate_schema(self): - """ - Test `validate_schema` function. - """ - - # invalid calls - classess_error_message = ( - 'Each schema has to have "classes" ' - "in the first level of the JSON format file/parameter/object" - ) - class_key_error_message = '"class" key is missing in class definition.' - - invalid_schema = {} - with self.assertRaises(SchemaValidationException) as error: - validate_schema(invalid_schema) - check_error_message(self, error, classess_error_message) - - invalid_schema = {"classes": "my_class"} - with self.assertRaises(SchemaValidationException) as error: - validate_schema(invalid_schema) - check_error_message(self, error, f'"classes" is type {str} but should be {list}.') - - invalid_schema = {"things": {"classes": []}} - with self.assertRaises(SchemaValidationException) as error: - validate_schema(invalid_schema) - check_error_message(self, error, classess_error_message) - - invalid_schema = {"classes": ["my_class"]} - with self.assertRaises(SchemaValidationException) as error: - validate_schema(invalid_schema) - check_error_message(self, error, f'"class" is type {str} but should be {dict}.') - - # test the call of the `check_class` function inside `validate_schema` - invalid_schema = {"classes": [{"my_class": []}]} - with self.assertRaises(SchemaValidationException) as error: - validate_schema(invalid_schema) - check_error_message(self, error, class_key_error_message) - - # valid calls - valid_schema = {"classes": []} - self.assertIsNone(validate_schema(valid_schema)) - valid_schema = {"classes": [], "author": "Unit Test"} - self.assertIsNone(validate_schema(valid_schema)) - self.assertIsNone(validate_schema(valid_schema_with_all_properties)) - - def test_check_class(self): - """ - Test `check_class` function. - """ - - # Valid maximal schema - max_valid = { - "class": "Boat", - "description": "boat swiming on the water", - "properties": [ - { - "dataType": ["text"], - "name": "tenant", - } - ], - "vectorIndexType": "hnsw", - "vectorIndexConfig": {}, - "moduleConfig": {}, - "vectorizer": "text2vec-contextionary", - "replicationConfig": { - "factor": 1, - }, - "multiTenancyConfig": {"enabled": True}, - } - check_class(max_valid) - # minimal must contain class key as string - check_class({"class": "Car"}) - - # invalid calls - class_key_error_message = '"class" key is missing in class definition.' - unknown_key_error_message = lambda key: f'"{key}" is not a known class definition key.' - key_type_error_messsage = lambda key, value, exp_type: ( - f'"{key}" is type {type(value)} ' f"but should be {exp_type}." - ) - - with self.assertRaises(SchemaValidationException) as error: - check_class({"invalid_key": "value"}) - check_error_message(self, error, class_key_error_message) - - with self.assertRaises(SchemaValidationException) as error: - check_class( - { - "class": [], - } - ) - check_error_message(self, error, key_type_error_messsage("class", [], str)) - - with self.assertRaises(SchemaValidationException) as error: - check_class({"class": "Tree", "invalid_key": []}) - check_error_message(self, error, unknown_key_error_message("invalid_key")) - - with self.assertRaises(SchemaValidationException) as error: - check_class({"class": "Tree", "description": []}) - check_error_message(self, error, key_type_error_messsage("description", [], str)) - - with self.assertRaises(SchemaValidationException) as error: - check_class({"class": "Tree", "properties": "References please"}) - check_error_message(self, error, key_type_error_messsage("properties", "", list)) - - with self.assertRaises(SchemaValidationException) as error: - check_class({"class": "Tree", "vectorIndexType": True}) - check_error_message(self, error, key_type_error_messsage("vectorIndexType", True, str)) - - with self.assertRaises(SchemaValidationException) as error: - check_class({"class": "Tree", "vectorIndexConfig": []}) - check_error_message(self, error, key_type_error_messsage("vectorIndexConfig", [], dict)) - - with self.assertRaises(SchemaValidationException) as error: - check_class({"class": "Tree", "replicationConfig": []}) - check_error_message(self, error, key_type_error_messsage("replicationConfig", [], dict)) - - with self.assertRaises(SchemaValidationException) as error: - check_class({"class": "Tree", "moduleConfig": []}) - check_error_message(self, error, key_type_error_messsage("moduleConfig", [], dict)) - - with self.assertRaises(SchemaValidationException) as error: - check_class({"class": "Tree", "vectorizer": 100.1}) - check_error_message(self, error, key_type_error_messsage("vectorizer", 100.1, str)) - - # check if `check_property` is called inside `check_class` - with self.assertRaises(SchemaValidationException) as error: - check_class( - { - "class": "Tree", - "properties": [ - { - "dataType": ["string"], - "description": "Test Property", - } - ], - } - ) - check_error_message(self, error, 'Property does not contain "name"') - - def test_check_property(self): - """ - Test `check_property` function. - """ - - # valid calls - valid_minimal = {"dataType": ["string"], "name": "string"} - - check_property(valid_minimal) - valid_max = { - "dataType": ["string"], - "name": "Rocket", - "moduleConfig": {}, - "description": "some description", - "indexInverted": True, - } - check_property(valid_max) - - # invalid calls - data_type_error_message = 'Property does not contain "dataType"' - name_error_message = 'Property does not contain "name"' - key_error_message = lambda key: f'Property "{key}" is not known.' - key_type_error_messsage = lambda key, value, exp_type: ( - f'"{key}" is type {type(value)} ' f"but should be {exp_type}." - ) - - properties = {"dataType": ["string"]} - with self.assertRaises(SchemaValidationException) as error: - check_property(properties) - check_error_message(self, error, name_error_message) - - properties = {"name": "string"} - with self.assertRaises(SchemaValidationException) as error: - check_property(properties) - check_error_message(self, error, data_type_error_message) - - properties = {"dataType": ["string"], "name": "string", "invalid_property": "value"} - with self.assertRaises(SchemaValidationException) as error: - check_property(properties) - check_error_message(self, error, key_error_message("invalid_property")) - - properties = { - "dataType": ["string"], - "name": "Rocket", - "moduleConfig": [], - } - with self.assertRaises(SchemaValidationException) as error: - check_property(properties) - check_error_message( - self, error, key_type_error_messsage("moduleConfig", properties["moduleConfig"], dict) - ) - - properties = { - "dataType": ["string"], - "name": "Rocket", - "description": ["some description"], - } - with self.assertRaises(SchemaValidationException) as error: - check_property(properties) - check_error_message( - self, error, key_type_error_messsage("description", properties["description"], str) - ) - - properties = {"dataType": ["string"], "name": "Rocket", "indexInverted": "True"} - with self.assertRaises(SchemaValidationException) as error: - check_property(properties) - check_error_message( - self, error, key_type_error_messsage("indexInverted", properties["indexInverted"], bool) - ) - - properties = {"dataType": ["string", 10], "name": "Rocket", "indexInverted": True} - with self.assertRaises(SchemaValidationException) as error: - check_property(properties) - check_error_message( - self, error, key_type_error_messsage("dataType object", properties["dataType"][1], str) - ) diff --git a/weaviate/schema/crud_schema.py b/weaviate/schema/crud_schema.py index 34d3760d5..612ca4df7 100644 --- a/weaviate/schema/crud_schema.py +++ b/weaviate/schema/crud_schema.py @@ -10,12 +10,6 @@ from weaviate.connect import Connection from weaviate.exceptions import UnexpectedStatusCodeException from weaviate.schema.properties import Property -from weaviate.schema.validate_schema import ( - validate_schema, - check_class, - CLASS_KEYS, - PROPERTY_KEYS, -) from weaviate.util import ( _get_dict_from_object, _is_sub_schema, @@ -24,6 +18,31 @@ _decode_json_response_list, ) +CLASS_KEYS = { + "class", + "vectorIndexType", + "vectorIndexConfig", + "moduleConfig", + "description", + "vectorizer", + "properties", + "invertedIndexConfig", + "shardingConfig", + "replicationConfig", + "multiTenancyConfig", +} + +PROPERTY_KEYS = { + "dataType", + "name", + "moduleConfig", + "description", + "indexInverted", + "tokenization", + "indexFilterable", + "indexSearchable", +} + _PRIMITIVE_WEAVIATE_TYPES_SET = { "string", "string[]", @@ -176,8 +195,6 @@ def create(self, schema: Union[dict, str]) -> None: """ loaded_schema = _get_dict_from_object(schema) - # validate the schema before loading - validate_schema(loaded_schema) self._create_classes_with_primitives(loaded_schema["classes"]) self._create_complex_properties_from_classes(loaded_schema["classes"]) @@ -230,8 +247,6 @@ def create_class(self, schema_class: Union[dict, str]) -> None: """ loaded_schema_class = _get_dict_from_object(schema_class) - # validate the class before loading - check_class(loaded_schema_class) self._create_class_with_primitives(loaded_schema_class) self._create_complex_properties_from_class(loaded_schema_class) @@ -437,7 +452,6 @@ def update_config(self, class_name: str, config: dict) -> None: class_name = _capitalize_first_letter(class_name) class_schema = self.get(class_name) new_class_schema = _update_nested_dict(class_schema, config) - check_class(new_class_schema) path = "/schema/" + class_name try: diff --git a/weaviate/schema/properties/crud_properties.py b/weaviate/schema/properties/crud_properties.py index 332d26eb4..75a46997b 100644 --- a/weaviate/schema/properties/crud_properties.py +++ b/weaviate/schema/properties/crud_properties.py @@ -5,7 +5,6 @@ from weaviate.connect import Connection from weaviate.exceptions import UnexpectedStatusCodeException -from weaviate.schema.validate_schema import check_property from weaviate.util import _get_dict_from_object, _capitalize_first_letter @@ -66,9 +65,6 @@ def create(self, schema_class_name: str, schema_property: dict) -> None: loaded_schema_property = _get_dict_from_object(schema_property) - # check if valid property - check_property(loaded_schema_property) - schema_class_name = _capitalize_first_letter(schema_class_name) path = f"/schema/{schema_class_name}/properties" diff --git a/weaviate/schema/validate_schema.py b/weaviate/schema/validate_schema.py deleted file mode 100644 index 9c4ea43b5..000000000 --- a/weaviate/schema/validate_schema.py +++ /dev/null @@ -1,170 +0,0 @@ -""" -Schema validation module. -""" -from typing import Any - -from weaviate.exceptions import SchemaValidationException - -CLASS_KEYS = { - "class", - "vectorIndexType", - "vectorIndexConfig", - "moduleConfig", - "description", - "vectorizer", - "properties", - "invertedIndexConfig", - "shardingConfig", - "replicationConfig", - "multiTenancyConfig", -} - -PROPERTY_KEYS = { - "dataType", - "name", - "moduleConfig", - "description", - "indexInverted", - "tokenization", - "indexFilterable", - "indexSearchable", -} - - -def validate_schema(schema: dict) -> None: - """ - Validate schema. - - Parameters - ---------- - schema : dict - Schema to be validated. - - Raises - ------ - weaviate.SchemaValidationException - If the schema could not be validated against the standard format. - """ - - # check if schema has required "classes" as keys - if "classes" not in schema: - raise SchemaValidationException( - 'Each schema has to have "classes" ' - "in the first level of the JSON format file/parameter/object" - ) - # check if "classes" is of type list - _check_key_type("classes", schema["classes"], list) - # check if each class in the "classes" is a valid class - for weaviate_class in schema["classes"]: - _check_key_type("class", weaviate_class, dict) - check_class(weaviate_class) - - -def check_class(class_definition: dict) -> None: - """ - Validate a class against the standard class format. - - Parameters - ---------- - class_definition : dict - The definition of the class to be validated. - - Raises - ------ - weaviate.SchemaValidationException - If the class could not be validated against the standard class format. - """ - - # check mandatory keys - if "class" not in class_definition: - raise SchemaValidationException('"class" key is missing in class definition.') - - # check optional keys - for key in class_definition.keys(): - # Check if key is known - if key not in CLASS_KEYS: - raise SchemaValidationException(f'"{key}" is not a known class definition key.') - # check if key is right type - if key in ["class", "vectorIndexType", "description", "vectorizer"]: - _check_key_type(key, class_definition[key], str) - if key in [ - "vectorIndexConfig", - "moduleConfig", - "invertedIndexConfig", - "shardingConfig", - "replicationConfig", - "multiTenancyConfig", - ]: - _check_key_type(key, class_definition[key], dict) - if key in ["properties"]: - _check_key_type(key, class_definition[key], list) - - if "properties" in class_definition: - for class_property in class_definition["properties"]: - check_property(class_property) - - -def check_property(class_property: dict) -> None: - """ - Validate a class property against the standard class property. - - Parameters - ---------- - class_property : dict - The class property to be validated. - - Raises - ------ - weaviate.SchemaValidationException - If the class property could not be validated against the standard class property format. - """ - - # mandatory fields - if "dataType" not in class_property: - raise SchemaValidationException('Property does not contain "dataType"') - if "name" not in class_property: - raise SchemaValidationException('Property does not contain "name"') - - for key in class_property: - # check for misspelled and/or non-existent properties - if key not in PROPERTY_KEYS: - raise SchemaValidationException(f'Property "{key}" is not known.') - - # Test types - if key in ["dataType"]: - _check_key_type(key, class_property[key], list) - if key in ["name", "description", "tokenization"]: - _check_key_type(key, class_property[key], str) - if key in ["indexInverted"]: - _check_key_type(key, class_property[key], bool) - if key in ["moduleConfig"]: - _check_key_type(key, class_property[key], dict) - - # Test dataType types - for data_type in class_property["dataType"]: - _check_key_type("dataType object", data_type, str) - - -def _check_key_type(key: str, value: Any, expected_type: Any) -> None: - """ - Check if value is of an expected type. - - Parameters - ---------- - key : str - The key for which to check data type. - value : Any - The value of the 'key' for which to check data type. - expected_type : Any - The expected data type of the 'value'. - - Raises - ------ - weaviate.SchemaValidationException - If the 'value' is of wrong data type. - """ - - if not isinstance(value, expected_type): - raise SchemaValidationException( - f'"{key}" is type {type(value)} ' f"but should be {expected_type}." - )