Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check for null geometries #137

Merged
merged 9 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
if [ "${{ matrix.os }}" = "ubuntu-20.04" ]
then
echo adding ubuntugis unstable
sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable
sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable
else
echo adding ubuntugis stable
sudo apt-add-repository ppa:ubuntugis/ppa
Expand Down Expand Up @@ -70,4 +70,4 @@ jobs:
pytest -vv
- name: Test with black
run: |
black --check .
black --check --diff .
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ The current checks are (see also the 'show-validations' command):
| RQ21 | All layer and column names shall not be longer than 57 characters. |
| RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. |
| RQ23 | Geometry should be valid and simple. |
| RQ24 | Geometry should not be empty (e.g. 'POINT EMPTY', represented as 'POINT(NaN NaN)'). |
| RQ24 | Geometry should not be null or empty (e.g. 'POINT EMPTY', WKT 'POINT(NaN NaN)'). |
| RC17 | It is recommended to name all GEOMETRY type columns 'geom'. |
| RC18 | It is recommended to give all GEOMETRY type columns the same name. |
| RC19 | It is recommended to only use multidimensional geometry coordinates (elevation and measurement) when necessary. |
Expand Down
28 changes: 19 additions & 9 deletions geopackage_validator/validations/geometry_empty_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,41 @@
from geopackage_validator.validations import validator
from geopackage_validator import utils

SQL_EMPTY_TEMPLATE = """SELECT count(row_id) AS count, row_id
SQL_EMPTY_TEMPLATE = """SELECT type, count(type) AS count, row_id
FROM(
SELECT
CASE
WHEN ST_IsEmpty("{column_name}") = 1
THEN 'empty'
WHEN "{column_name}" IS NULL
THEN 'null'
END AS type,
cast(rowid AS INTEGER) AS row_id
FROM "{table_name}" WHERE ST_IsEmpty("{column_name}") = 1
);"""
FROM "{table_name}" WHERE ST_IsEmpty("{column_name}") = 1 OR "{column_name}" IS NULL
)
GROUP BY type;"""


def query_geometry_empty(dataset, sql_template) -> Iterable[Tuple[str, str, int, int]]:
def query_geometry_empty(
dataset, sql_template
) -> Iterable[Tuple[str, str, str, int, int]]:
columns = utils.dataset_geometry_tables(dataset)

for table_name, column_name, _ in columns:
validations = dataset.ExecuteSQL(
sql_template.format(table_name=table_name, column_name=column_name)
)
for count, row_id in validations:
yield table_name, column_name, count, row_id
for type, count, row_id in validations:
yield table_name, column_name, type, count, row_id
dataset.ReleaseResultSet(validations)


class EmptyGeometryValidator(validator.Validator):
"""Geometries should not be empty."""
"""Geometries should not be null or empty."""

code = 24
level = validator.ValidationLevel.ERROR
message = "Found empty geometry in table: {table_name}, column {column_name}, {count} {count_label}, example id {row_id}"
message = "Found {type} geometry in table: {table_name}, column {column_name}, {count} {count_label}, example id {row_id}"

def check(self) -> Iterable[str]:
result = query_geometry_empty(self.dataset, SQL_EMPTY_TEMPLATE)
Expand All @@ -36,10 +45,11 @@ def check(self) -> Iterable[str]:
self.message.format(
table_name=table_name,
column_name=column_name,
type=type,
count=count,
count_label=("time" if count == 1 else "times"),
row_id=row_id,
)
for table_name, column_name, count, row_id in result
for table_name, column_name, type, count, row_id in result
if count > 0
]
Binary file modified tests/data/test_geometry_empty.gpkg
Binary file not shown.
Binary file added tests/data/test_geometry_null.gpkg
Binary file not shown.
12 changes: 11 additions & 1 deletion tests/validations/test_geometry_empty_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,17 @@ def test_with_gpkg_empty():
assert len(result) == 1
assert (
result[0]
== "Found empty geometry in table: stations, column geom, 45 times, example id 129"
== "Found empty geometry in table: test_geometry_empty, column geom, 45 times, example id 129"
)


def test_with_gpkg_null():
dataset = open_dataset("tests/data/test_geometry_null.gpkg")
result = list(EmptyGeometryValidator(dataset).check())
assert len(result) == 1
assert (
result[0]
== "Found null geometry in table: test_geometry_null, column geometry, 2 times, example id 1"
)


Expand Down
7 changes: 7 additions & 0 deletions tests/validations/test_geometry_valid_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ def test_with_gpkg_empty():
assert len(checks) == 0


def test_with_gpkg_null():
# geometries that are null slip through
dataset = open_dataset("tests/data/test_geometry_null.gpkg")
checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))
assert len(checks) == 0


def test_with_gpkg_allcorrect():
dataset = open_dataset("tests/data/test_allcorrect.gpkg")
checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))
Expand Down