From b8fce36fa79052b55ad1bdd0be6402eefa151eb5 Mon Sep 17 00:00:00 2001 From: Wouter Remijn Date: Wed, 5 Jun 2024 11:01:19 +0200 Subject: [PATCH] combined valid and simple validators, fixed some RQ docs --- README.md | 10 ++-- geopackage_validator/validate.py | 3 +- geopackage_validator/validations/__init__.py | 8 +-- .../validations/geometry_simple_check.py | 45 -------------- .../validations/geometry_type_check.py | 2 +- .../validations/geometry_valid_check.py | 58 ++++++++++++++++--- .../validations/name_length_check.py | 4 +- geopackage_validator/validations/srs_check.py | 2 +- .../validations/table_definitions_check.py | 2 +- tests/test_validate.py | 1 - .../validations/test_geometry_simple_check.py | 18 ------ .../validations/test_geometry_valid_check.py | 34 +++++++++-- 12 files changed, 97 insertions(+), 90 deletions(-) delete mode 100644 geopackage_validator/validations/geometry_simple_check.py delete mode 100644 tests/validations/test_geometry_simple_check.py diff --git a/README.md b/README.md index 9cb2ca3..dc07b9b 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ The current checks are (see also the 'show-validations' command): | RQ2 | Layers must have at least one feature. | | RQ3 | _LEGACY:_ * Layer features should have an allowed geometry_type (one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON). | | RQ4 | The geopackage should have no views defined. | -| RQ5 | Geometry should be valid. | +| RQ5 | _LEGACY:_ * Geometry should be valid. | | RQ6 | Column names must start with a letter, and valid characters are lowercase a-z, numbers or underscores. | | RQ7 | Tables should have a feature id column with unique index. | | RQ8 | Geopackage must conform to given JSON or YAML definitions. | @@ -104,13 +104,13 @@ The current checks are (see also the 'show-validations' command): | RQ14 | The geometry_type_name from the gpkg_geometry_columns table must be one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON. | | RQ15 | All table geometries must match the geometry_type_name from the gpkg_geometry_columns table. | | RQ16 | _LEGACY:_ * All layer and column names shall not be longer than 53 characters. | +| RQ21 | All layer and column names shall not be longer than 57 characters. | +| RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. | +| RQ23 | Geometry should be valid and simple. | | RC17 | It is recommended to name all GEOMETRY type columns 'geom'. | | RC18 | It is recommended to give all GEOMETRY type columns the same name. | | RC19 | It is recommended to only use multidimensional geometry coordinates (elevation and measurement) when necessary. | -| RC20 | It is recommended that all (MULTI)POLYGON geometries have a counter-clockwise orientation for their exterior ring, and a clockwise direction for all interior rings. | -| RQ21 | All layer and column names shall not be longer than 57 characters. | -| RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. | -| RQ23 | Geometry should be simple. | +| RC20 | It is recommended that all (MULTI)POLYGON geometries have a counter-clockwise orientation for their exterior ring, and a clockwise direction for all interior rings. | | UNKNOWN_WARNINGS | It is recommended that the unexpected (GDAL) warnings are looked into. | \* Legacy requirements are only executed with the validate command when explicitly requested in the validation set. diff --git a/geopackage_validator/validate.py b/geopackage_validator/validate.py index 019cab0..e52f7e4 100644 --- a/geopackage_validator/validate.py +++ b/geopackage_validator/validate.py @@ -19,13 +19,14 @@ RQ0 = "RQ0" RQ3 = "RQ3" +RQ5 = "RQ5" RQ8 = "RQ8" RQ12 = "RQ12" RQ16 = "RQ16" # Drop legacy requirements -DROP_LEGACY_RQ_FROM_ALL = [RQ0, RQ3, RQ12, RQ16] +DROP_LEGACY_RQ_FROM_ALL = [RQ0, RQ3, RQ5, RQ12, RQ16] def validators_to_use( diff --git a/geopackage_validator/validations/__init__.py b/geopackage_validator/validations/__init__.py index cbbe0c3..d910f1d 100644 --- a/geopackage_validator/validations/__init__.py +++ b/geopackage_validator/validations/__init__.py @@ -6,9 +6,9 @@ GpkgGeometryTypeNameValidator, GeometryTypeEqualsGpkgDefinitionValidator, ) -from geopackage_validator.validations.geometry_valid_check import ValidGeometryValidator -from geopackage_validator.validations.geometry_simple_check import ( - SimpleGeometryValidator, +from geopackage_validator.validations.geometry_valid_check import ( + ValidGeometryValidator, + ValidGeometryValidatorV0, ) from geopackage_validator.validations.layerfeature_check import ( OGRIndexValidator, @@ -48,7 +48,7 @@ "FeatureIdValidator", "GeometryTypeValidator", "ValidGeometryValidator", - "SimpleGeometryValidator", + "ValidGeometryValidatorV0", "OGRIndexValidator", "NonEmptyLayerValidator", "LayerNameValidator", diff --git a/geopackage_validator/validations/geometry_simple_check.py b/geopackage_validator/validations/geometry_simple_check.py deleted file mode 100644 index 4073d70..0000000 --- a/geopackage_validator/validations/geometry_simple_check.py +++ /dev/null @@ -1,45 +0,0 @@ -from typing import Iterable, Tuple -from geopackage_validator.validations import validator -from geopackage_validator import utils - - -SQL_TEMPLATE = """SELECT -count(rowid) AS count, -cast(rowid AS INTEGER) AS row_id -FROM "{table_name}" WHERE ST_IsSimple("{column_name}") = 0""" - - -def query_geometry_simple(dataset) -> Iterable[Tuple[str, str, int]]: - columns = utils.dataset_geometry_tables(dataset) - - for table_name, column_name, _ in columns: - - validations = dataset.ExecuteSQL( - SQL_TEMPLATE.format(table_name=table_name, column_name=column_name) - ) - for count, row_id in validations: - if count > 0: - yield table_name, column_name, count, row_id - dataset.ReleaseResultSet(validations) - - -class SimpleGeometryValidator(validator.Validator): - """Geometries should be simple.""" - - code = 23 - level = validator.ValidationLevel.ERROR - message = "Found not simple geometry in table: {table_name}, column {column_name}, {count} {count_label}, example id {row_id}" - - def check(self) -> Iterable[str]: - result = query_geometry_simple(self.dataset) - - return [ - self.message.format( - table_name=table_name, - column_name=column_name, - count=count, - count_label=("time" if count == 1 else "times"), - row_id=row_id, - ) - for table_name, column_name, count, row_id in result - ] diff --git a/geopackage_validator/validations/geometry_type_check.py b/geopackage_validator/validations/geometry_type_check.py index fbefa81..ea0b638 100644 --- a/geopackage_validator/validations/geometry_type_check.py +++ b/geopackage_validator/validations/geometry_type_check.py @@ -84,7 +84,7 @@ def aggregate(results): class GeometryTypeValidator(validator.Validator): - """Layer features should have an allowed geometry_type (one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON).""" + """LEGACY: * Layer features should have an allowed geometry_type (one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON).""" code = 3 level = validator.ValidationLevel.ERROR diff --git a/geopackage_validator/validations/geometry_valid_check.py b/geopackage_validator/validations/geometry_valid_check.py index bec7336..2904e46 100644 --- a/geopackage_validator/validations/geometry_valid_check.py +++ b/geopackage_validator/validations/geometry_valid_check.py @@ -2,8 +2,7 @@ from geopackage_validator.validations import validator from geopackage_validator import utils - -SQL_TEMPLATE = """SELECT reason, count(reason) AS count, row_id +SQL_ONLY_VALID_TEMPLATE = """SELECT reason, count(reason) AS count, row_id FROM( SELECT CASE INSTR(ST_IsValidReason("{column_name}"), '[') @@ -16,28 +15,73 @@ ) GROUP BY reason;""" +SQL_VALID_TEMPLATE = """SELECT reason, count(reason) AS count, row_id +FROM( + SELECT + CASE ST_IsValid("{column_name}") + WHEN 0 + THEN + CASE INSTR(ST_IsValidReason("{column_name}"), '[') + WHEN 0 + THEN ST_IsValidReason("{column_name}") + ELSE substr(ST_IsValidReason("{column_name}"), 0, INSTR(ST_IsValidReason("{column_name}"), '[')) + END + ELSE + CASE ST_IsSimple("{column_name}") + WHEN 0 + THEN 'Not Simple' + END + END AS reason, + cast(rowid AS INTEGER) AS row_id + FROM "{table_name}" WHERE ST_IsValid("{column_name}") = 0 OR ST_IsSimple("{column_name}") = 0 +) +GROUP BY reason;""" -def query_geometry_valid(dataset) -> Iterable[Tuple[str, str, str, int]]: + +def query_geometry_valid(dataset, sql_template) -> Iterable[Tuple[str, str, str, int]]: columns = utils.dataset_geometry_tables(dataset) for table_name, column_name, _ in columns: validations = dataset.ExecuteSQL( - SQL_TEMPLATE.format(table_name=table_name, column_name=column_name) + sql_template.format(table_name=table_name, column_name=column_name) ) for reason, count, row_id in validations: yield table_name, column_name, reason, count, row_id dataset.ReleaseResultSet(validations) -class ValidGeometryValidator(validator.Validator): - """Geometries should be valid.""" +class ValidGeometryValidatorV0(validator.Validator): + """Legacy: * Geometries should be valid.""" code = 5 level = validator.ValidationLevel.ERROR message = "Found invalid geometry in table: {table_name}, column {column_name}, reason: {reason}, {count} {count_label}, example id {row_id}" def check(self) -> Iterable[str]: - result = query_geometry_valid(self.dataset) + result = query_geometry_valid(self.dataset, SQL_ONLY_VALID_TEMPLATE) + + return [ + self.message.format( + table_name=table_name, + column_name=column_name, + reason=reason, + count=count, + count_label=("time" if count == 1 else "times"), + row_id=row_id, + ) + for table_name, column_name, reason, count, row_id in result + ] + + +class ValidGeometryValidator(validator.Validator): + """Geometries should be valid and simple.""" + + code = 23 + level = validator.ValidationLevel.ERROR + message = "Found invalid geometry in table: {table_name}, column {column_name}, reason: {reason}, {count} {count_label}, example id {row_id}" + + def check(self) -> Iterable[str]: + result = query_geometry_valid(self.dataset, SQL_VALID_TEMPLATE) return [ self.message.format( diff --git a/geopackage_validator/validations/name_length_check.py b/geopackage_validator/validations/name_length_check.py index 6e8f174..7ee839f 100644 --- a/geopackage_validator/validations/name_length_check.py +++ b/geopackage_validator/validations/name_length_check.py @@ -21,7 +21,7 @@ def query_names(dataset) -> Iterable[Tuple[str, str, int]]: class NameLengthValidatorV0(validator.Validator): - f"""All names must be maximally {LEGACY_MAX_LENGTH} characters long.""" + """LEGACY: * All names must be maximally 53 characters long.""" code = 16 level = validator.ValidationLevel.ERROR @@ -42,7 +42,7 @@ def check_columns(cls, names: Iterable[Tuple[str, str, int]]) -> List[str]: class NameLengthValidator(validator.Validator): - f"""All names must be maximally {MAX_LENGTH} characters long.""" + """All names must be maximally 57 characters long.""" code = 21 level = validator.ValidationLevel.ERROR diff --git a/geopackage_validator/validations/srs_check.py b/geopackage_validator/validations/srs_check.py index 6ac0056..7a348ee 100644 --- a/geopackage_validator/validations/srs_check.py +++ b/geopackage_validator/validations/srs_check.py @@ -32,7 +32,7 @@ def srs_equal_check_query(dataset) -> Iterable[str]: class SrsValidatorV0(validator.Validator): - """Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3038, 3039, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3050, 3051, 4258, 4936, 4937, 5730, 7409.""" + """LEGACY: * Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3038, 3039, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3050, 3051, 4258, 4936, 4937, 5730, 7409.""" code = 12 level = validator.ValidationLevel.ERROR diff --git a/geopackage_validator/validations/table_definitions_check.py b/geopackage_validator/validations/table_definitions_check.py index 4715d23..f3d047b 100644 --- a/geopackage_validator/validations/table_definitions_check.py +++ b/geopackage_validator/validations/table_definitions_check.py @@ -116,7 +116,7 @@ def check_table_definitions(self, definitions_current: TableDefinition): class TableDefinitionValidatorV0(validator.Validator): - """Geopackage must conform to table names in the given JSON definitions.""" + """LEGACY: * Geopackage must conform to table names in the given JSON definitions.""" code = 0 level = validator.ValidationLevel.ERROR diff --git a/tests/test_validate.py b/tests/test_validate.py index c23a659..bdeff13 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -24,7 +24,6 @@ def test_determine_validations_to_use_none(): "RQ1", "RQ2", "RQ4", - "RQ5", "RQ6", "RQ7", "RQ9", diff --git a/tests/validations/test_geometry_simple_check.py b/tests/validations/test_geometry_simple_check.py deleted file mode 100644 index 0afd025..0000000 --- a/tests/validations/test_geometry_simple_check.py +++ /dev/null @@ -1,18 +0,0 @@ -from geopackage_validator.utils import open_dataset -from geopackage_validator.validations.geometry_simple_check import query_geometry_simple - - -def test_with_gpkg(): - dataset = open_dataset("tests/data/test_geometry_simple.gpkg") - checks = list(query_geometry_simple(dataset)) - assert len(checks) == 1 - assert checks[0][0] == "test_geometry_simple" - assert checks[0][1] == "geometry" - assert checks[0][2] == 1 - assert checks[0][3] == 1 - - -def test_with_gpkg_allcorrect(): - dataset = open_dataset("tests/data/test_allcorrect.gpkg") - checks = list(query_geometry_simple(dataset)) - assert len(checks) == 0 diff --git a/tests/validations/test_geometry_valid_check.py b/tests/validations/test_geometry_valid_check.py index a72e61c..8aba940 100644 --- a/tests/validations/test_geometry_valid_check.py +++ b/tests/validations/test_geometry_valid_check.py @@ -1,10 +1,36 @@ from geopackage_validator.utils import open_dataset -from geopackage_validator.validations.geometry_valid_check import query_geometry_valid +from geopackage_validator.validations.geometry_valid_check import ( + query_geometry_valid, + SQL_ONLY_VALID_TEMPLATE, + SQL_VALID_TEMPLATE, +) -def test_with_gpkg(): +def test_with_gpkg_valid(): dataset = open_dataset("tests/data/test_geometry_valid.gpkg") - checks = list(query_geometry_valid(dataset)) + checks = list(query_geometry_valid(dataset, SQL_ONLY_VALID_TEMPLATE)) + assert len(checks) == 1 + assert checks[0][0] == "test_geometry_valid" + assert checks[0][1] == "geometry" + assert checks[0][2] == "Self-intersection" + assert checks[0][3] == 1 + assert checks[0][4] == 1 + + +def test_with_gpkg_simple(): + dataset = open_dataset("tests/data/test_geometry_simple.gpkg") + checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE)) + assert len(checks) == 1 + assert checks[0][0] == "test_geometry_simple" + assert checks[0][1] == "geometry" + assert checks[0][2] == "Not Simple" + assert checks[0][3] == 1 + assert checks[0][4] == 1 + + +def test_with_gpkg_valid_simple(): + dataset = open_dataset("tests/data/test_geometry_valid.gpkg") + checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE)) assert len(checks) == 1 assert checks[0][0] == "test_geometry_valid" assert checks[0][1] == "geometry" @@ -15,5 +41,5 @@ def test_with_gpkg(): def test_with_gpkg_allcorrect(): dataset = open_dataset("tests/data/test_allcorrect.gpkg") - checks = list(query_geometry_valid(dataset)) + checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE)) assert len(checks) == 0