Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

check simple geometry #121

Merged
merged 3 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ The current checks are (see also the 'show-validations' command):
| RQ2 | Layers must have at least one feature. |
| RQ3 | _LEGACY:_ * Layer features should have an allowed geometry_type (one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON). |
| RQ4 | The geopackage should have no views defined. |
| RQ5 | Geometry should be valid. |
| RQ5 | _LEGACY:_ * Geometry should be valid. |
| RQ6 | Column names must start with a letter, and valid characters are lowercase a-z, numbers or underscores. |
| RQ7 | Tables should have a feature id column with unique index. |
| RQ8 | Geopackage must conform to given JSON or YAML definitions. |
Expand All @@ -104,12 +104,13 @@ The current checks are (see also the 'show-validations' command):
| RQ14 | The geometry_type_name from the gpkg_geometry_columns table must be one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON. |
| RQ15 | All table geometries must match the geometry_type_name from the gpkg_geometry_columns table. |
| RQ16 | _LEGACY:_ * All layer and column names shall not be longer than 53 characters. |
| RQ21 | All layer and column names shall not be longer than 57 characters. |
| RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. |
| RQ23 | Geometry should be valid and simple. |
| RC17 | It is recommended to name all GEOMETRY type columns 'geom'. |
| RC18 | It is recommended to give all GEOMETRY type columns the same name. |
| RC19 | It is recommended to only use multidimensional geometry coordinates (elevation and measurement) when necessary. |
| RC20 | It is recommended that all (MULTI)POLYGON geometries have a counter-clockwise orientation for their exterior ring, and a clockwise direction for all interior rings. |
| RQ21 | All layer and column names shall not be longer than 57 characters. |
| RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. |
| RC20 | It is recommended that all (MULTI)POLYGON geometries have a counter-clockwise orientation for their exterior ring, and a clockwise direction for all interior rings. |
| UNKNOWN_WARNINGS | It is recommended that the unexpected (GDAL) warnings are looked into. |

\* Legacy requirements are only executed with the validate command when explicitly requested in the validation set.
Expand Down
11 changes: 9 additions & 2 deletions geopackage_validator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,16 +420,23 @@ def geopackage_validator_command_generate_table_definitions(
name="show-validations",
help="Show all the possible validations that can be executed in the validate command.",
)
@click.option(
"--no-legacy",
required=False,
is_flag=True,
help="Output without Legacy checks",
)
@click.option(
"--yaml",
required=False,
is_flag=True,
help="Output yaml",
)
@click_log.simple_verbosity_option(logger)
def geopackage_validator_command_show_validations(yaml):
def geopackage_validator_command_show_validations(no_legacy, yaml):
try:
validation_codes = validate.get_validation_descriptions()
legacy = not no_legacy
validation_codes = validate.get_validation_descriptions(legacy)
output.print_output(validation_codes, yaml, yaml_indent=5)
except Exception:
logger.exception("Error while listing validations")
Expand Down
17 changes: 14 additions & 3 deletions geopackage_validator/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@

RQ0 = "RQ0"
RQ3 = "RQ3"
RQ5 = "RQ5"
RQ8 = "RQ8"
RQ12 = "RQ12"
RQ16 = "RQ16"


# Drop legacy requirements
DROP_LEGACY_RQ_FROM_ALL = [RQ0, RQ3, RQ12, RQ16]
DROP_LEGACY_RQ_FROM_ALL = [RQ0, RQ3, RQ5, RQ12, RQ16]


def validators_to_use(
Expand Down Expand Up @@ -193,10 +194,20 @@ def gdal_error_handler(err_class, err_num, error):
)


def get_validation_descriptions():
def get_validation_descriptions(legacy):
validation_classes = get_validator_classes()

if legacy:
return OrderedDict(
(klass.validation_code, klass.__doc__) for klass in validation_classes
)

rq_drop_list = DROP_LEGACY_RQ_FROM_ALL

return OrderedDict(
(klass.validation_code, klass.__doc__) for klass in validation_classes
(klass.validation_code, klass.__doc__)
for klass in validation_classes
if klass.validation_code not in rq_drop_list
)


Expand Down
6 changes: 5 additions & 1 deletion geopackage_validator/validations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
GpkgGeometryTypeNameValidator,
GeometryTypeEqualsGpkgDefinitionValidator,
)
from geopackage_validator.validations.geometry_valid_check import ValidGeometryValidator
from geopackage_validator.validations.geometry_valid_check import (
ValidGeometryValidator,
ValidGeometryValidatorV0,
)
from geopackage_validator.validations.layerfeature_check import (
OGRIndexValidator,
NonEmptyLayerValidator,
Expand Down Expand Up @@ -45,6 +48,7 @@
"FeatureIdValidator",
"GeometryTypeValidator",
"ValidGeometryValidator",
"ValidGeometryValidatorV0",
"OGRIndexValidator",
"NonEmptyLayerValidator",
"LayerNameValidator",
Expand Down
2 changes: 1 addition & 1 deletion geopackage_validator/validations/geometry_type_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def aggregate(results):


class GeometryTypeValidator(validator.Validator):
"""Layer features should have an allowed geometry_type (one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON)."""
"""LEGACY: * Layer features should have an allowed geometry_type (one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON)."""

code = 3
level = validator.ValidationLevel.ERROR
Expand Down
58 changes: 51 additions & 7 deletions geopackage_validator/validations/geometry_valid_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
from geopackage_validator.validations import validator
from geopackage_validator import utils


SQL_TEMPLATE = """SELECT reason, count(reason) AS count, row_id
SQL_ONLY_VALID_TEMPLATE = """SELECT reason, count(reason) AS count, row_id
FROM(
SELECT
CASE INSTR(ST_IsValidReason("{column_name}"), '[')
Expand All @@ -16,28 +15,73 @@
)
GROUP BY reason;"""

SQL_VALID_TEMPLATE = """SELECT reason, count(reason) AS count, row_id
FROM(
SELECT
CASE ST_IsValid("{column_name}")
WHEN 0
THEN
CASE INSTR(ST_IsValidReason("{column_name}"), '[')
WHEN 0
THEN ST_IsValidReason("{column_name}")
ELSE substr(ST_IsValidReason("{column_name}"), 0, INSTR(ST_IsValidReason("{column_name}"), '['))
END
ELSE
CASE ST_IsSimple("{column_name}")
WHEN 0
THEN 'Not Simple'
END
END AS reason,
cast(rowid AS INTEGER) AS row_id
FROM "{table_name}" WHERE ST_IsValid("{column_name}") = 0 OR ST_IsSimple("{column_name}") = 0
)
GROUP BY reason;"""

def query_geometry_valid(dataset) -> Iterable[Tuple[str, str, str, int]]:

def query_geometry_valid(dataset, sql_template) -> Iterable[Tuple[str, str, str, int]]:
columns = utils.dataset_geometry_tables(dataset)

for table_name, column_name, _ in columns:
validations = dataset.ExecuteSQL(
SQL_TEMPLATE.format(table_name=table_name, column_name=column_name)
sql_template.format(table_name=table_name, column_name=column_name)
)
for reason, count, row_id in validations:
yield table_name, column_name, reason, count, row_id
dataset.ReleaseResultSet(validations)


class ValidGeometryValidator(validator.Validator):
"""Geometries should be valid."""
class ValidGeometryValidatorV0(validator.Validator):
"""Legacy: * Geometries should be valid."""

code = 5
level = validator.ValidationLevel.ERROR
message = "Found invalid geometry in table: {table_name}, column {column_name}, reason: {reason}, {count} {count_label}, example id {row_id}"

def check(self) -> Iterable[str]:
result = query_geometry_valid(self.dataset)
result = query_geometry_valid(self.dataset, SQL_ONLY_VALID_TEMPLATE)

return [
self.message.format(
table_name=table_name,
column_name=column_name,
reason=reason,
count=count,
count_label=("time" if count == 1 else "times"),
row_id=row_id,
)
for table_name, column_name, reason, count, row_id in result
]


class ValidGeometryValidator(validator.Validator):
"""Geometries should be valid and simple."""

code = 23
level = validator.ValidationLevel.ERROR
message = "Found invalid geometry in table: {table_name}, column {column_name}, reason: {reason}, {count} {count_label}, example id {row_id}"

def check(self) -> Iterable[str]:
result = query_geometry_valid(self.dataset, SQL_VALID_TEMPLATE)

return [
self.message.format(
Expand Down
4 changes: 2 additions & 2 deletions geopackage_validator/validations/name_length_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def query_names(dataset) -> Iterable[Tuple[str, str, int]]:


class NameLengthValidatorV0(validator.Validator):
f"""All names must be maximally {LEGACY_MAX_LENGTH} characters long."""
"""LEGACY: * All names must be maximally 53 characters long."""

code = 16
level = validator.ValidationLevel.ERROR
Expand All @@ -42,7 +42,7 @@ def check_columns(cls, names: Iterable[Tuple[str, str, int]]) -> List[str]:


class NameLengthValidator(validator.Validator):
f"""All names must be maximally {MAX_LENGTH} characters long."""
"""All names must be maximally 57 characters long."""

code = 21
level = validator.ValidationLevel.ERROR
Expand Down
2 changes: 1 addition & 1 deletion geopackage_validator/validations/srs_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def srs_equal_check_query(dataset) -> Iterable[str]:


class SrsValidatorV0(validator.Validator):
"""Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3038, 3039, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3050, 3051, 4258, 4936, 4937, 5730, 7409."""
"""LEGACY: * Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3038, 3039, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3050, 3051, 4258, 4936, 4937, 5730, 7409."""

code = 12
level = validator.ValidationLevel.ERROR
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def check_table_definitions(self, definitions_current: TableDefinition):


class TableDefinitionValidatorV0(validator.Validator):
"""Geopackage must conform to table names in the given JSON definitions."""
"""LEGACY: * Geopackage must conform to table names in the given JSON definitions."""

code = 0
level = validator.ValidationLevel.ERROR
Expand Down
Binary file added tests/data/test_geometry_simple.gpkg
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def test_determine_validations_to_use_none():
"RQ1",
"RQ2",
"RQ4",
"RQ5",
"RQ6",
"RQ7",
"RQ9",
Expand All @@ -35,6 +34,7 @@ def test_determine_validations_to_use_none():
"RQ15",
"RQ21",
"RQ22",
"RQ23",
"RC17",
"RC18",
"RC19",
Expand Down
34 changes: 30 additions & 4 deletions tests/validations/test_geometry_valid_check.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,36 @@
from geopackage_validator.utils import open_dataset
from geopackage_validator.validations.geometry_valid_check import query_geometry_valid
from geopackage_validator.validations.geometry_valid_check import (
query_geometry_valid,
SQL_ONLY_VALID_TEMPLATE,
SQL_VALID_TEMPLATE,
)


def test_with_gpkg():
def test_with_gpkg_valid():
dataset = open_dataset("tests/data/test_geometry_valid.gpkg")
checks = list(query_geometry_valid(dataset))
checks = list(query_geometry_valid(dataset, SQL_ONLY_VALID_TEMPLATE))
assert len(checks) == 1
assert checks[0][0] == "test_geometry_valid"
assert checks[0][1] == "geometry"
assert checks[0][2] == "Self-intersection"
assert checks[0][3] == 1
assert checks[0][4] == 1


def test_with_gpkg_simple():
dataset = open_dataset("tests/data/test_geometry_simple.gpkg")
checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))
assert len(checks) == 1
assert checks[0][0] == "test_geometry_simple"
assert checks[0][1] == "geometry"
assert checks[0][2] == "Not Simple"
assert checks[0][3] == 1
assert checks[0][4] == 1


def test_with_gpkg_valid_simple():
dataset = open_dataset("tests/data/test_geometry_valid.gpkg")
checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))
assert len(checks) == 1
assert checks[0][0] == "test_geometry_valid"
assert checks[0][1] == "geometry"
Expand All @@ -15,5 +41,5 @@ def test_with_gpkg():

def test_with_gpkg_allcorrect():
dataset = open_dataset("tests/data/test_allcorrect.gpkg")
checks = list(query_geometry_valid(dataset))
checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))
assert len(checks) == 0
Loading