Skip to content

Commit

Permalink
Merge pull request #137 from PDOK/PDOK/validate_null_geoms
Browse files Browse the repository at this point in the history
Check for null geometries
  • Loading branch information
kad-korpem authored Dec 4, 2024
2 parents 8baf13f + 64c03b1 commit 635781d
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 13 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
if [ "${{ matrix.os }}" = "ubuntu-20.04" ]
then
echo adding ubuntugis unstable
sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable
sudo apt-add-repository ppa:ubuntugis/ubuntugis-unstable
else
echo adding ubuntugis stable
sudo apt-add-repository ppa:ubuntugis/ppa
Expand Down Expand Up @@ -70,4 +70,4 @@ jobs:
pytest -vv
- name: Test with black
run: |
black --check .
black --check --diff .
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ The current checks are (see also the 'show-validations' command):
| RQ21 | All layer and column names shall not be longer than 57 characters. |
| RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. |
| RQ23 | Geometry should be valid and simple. |
| RQ24 | Geometry should not be empty (e.g. 'POINT EMPTY', represented as 'POINT(NaN NaN)'). |
| RQ24 | Geometry should not be null or empty (e.g. 'POINT EMPTY', WKT 'POINT(NaN NaN)'). |
| RC17 | It is recommended to name all GEOMETRY type columns 'geom'. |
| RC18 | It is recommended to give all GEOMETRY type columns the same name. |
| RC19 | It is recommended to only use multidimensional geometry coordinates (elevation and measurement) when necessary. |
Expand Down
28 changes: 19 additions & 9 deletions geopackage_validator/validations/geometry_empty_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,41 @@
from geopackage_validator.validations import validator
from geopackage_validator import utils

SQL_EMPTY_TEMPLATE = """SELECT count(row_id) AS count, row_id
SQL_EMPTY_TEMPLATE = """SELECT type, count(type) AS count, row_id
FROM(
SELECT
CASE
WHEN ST_IsEmpty("{column_name}") = 1
THEN 'empty'
WHEN "{column_name}" IS NULL
THEN 'null'
END AS type,
cast(rowid AS INTEGER) AS row_id
FROM "{table_name}" WHERE ST_IsEmpty("{column_name}") = 1
);"""
FROM "{table_name}" WHERE ST_IsEmpty("{column_name}") = 1 OR "{column_name}" IS NULL
)
GROUP BY type;"""


def query_geometry_empty(dataset, sql_template) -> Iterable[Tuple[str, str, int, int]]:
def query_geometry_empty(
dataset, sql_template
) -> Iterable[Tuple[str, str, str, int, int]]:
columns = utils.dataset_geometry_tables(dataset)

for table_name, column_name, _ in columns:
validations = dataset.ExecuteSQL(
sql_template.format(table_name=table_name, column_name=column_name)
)
for count, row_id in validations:
yield table_name, column_name, count, row_id
for type, count, row_id in validations:
yield table_name, column_name, type, count, row_id
dataset.ReleaseResultSet(validations)


class EmptyGeometryValidator(validator.Validator):
"""Geometries should not be empty."""
"""Geometries should not be null or empty."""

code = 24
level = validator.ValidationLevel.ERROR
message = "Found empty geometry in table: {table_name}, column {column_name}, {count} {count_label}, example id {row_id}"
message = "Found {type} geometry in table: {table_name}, column {column_name}, {count} {count_label}, example id {row_id}"

def check(self) -> Iterable[str]:
result = query_geometry_empty(self.dataset, SQL_EMPTY_TEMPLATE)
Expand All @@ -36,10 +45,11 @@ def check(self) -> Iterable[str]:
self.message.format(
table_name=table_name,
column_name=column_name,
type=type,
count=count,
count_label=("time" if count == 1 else "times"),
row_id=row_id,
)
for table_name, column_name, count, row_id in result
for table_name, column_name, type, count, row_id in result
if count > 0
]
Binary file modified tests/data/test_geometry_empty.gpkg
Binary file not shown.
Binary file added tests/data/test_geometry_null.gpkg
Binary file not shown.
12 changes: 11 additions & 1 deletion tests/validations/test_geometry_empty_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,17 @@ def test_with_gpkg_empty():
assert len(result) == 1
assert (
result[0]
== "Found empty geometry in table: stations, column geom, 45 times, example id 129"
== "Found empty geometry in table: test_geometry_empty, column geom, 45 times, example id 129"
)


def test_with_gpkg_null():
dataset = open_dataset("tests/data/test_geometry_null.gpkg")
result = list(EmptyGeometryValidator(dataset).check())
assert len(result) == 1
assert (
result[0]
== "Found null geometry in table: test_geometry_null, column geometry, 2 times, example id 1"
)


Expand Down
7 changes: 7 additions & 0 deletions tests/validations/test_geometry_valid_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ def test_with_gpkg_empty():
assert len(checks) == 0


def test_with_gpkg_null():
# geometries that are null slip through
dataset = open_dataset("tests/data/test_geometry_null.gpkg")
checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))
assert len(checks) == 0


def test_with_gpkg_allcorrect():
dataset = open_dataset("tests/data/test_allcorrect.gpkg")
checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))
Expand Down

0 comments on commit 635781d

Please sign in to comment.