Skip to content

Commit

Permalink
Merge pull request #121 from PDOK/simple-geometry-check
Browse files Browse the repository at this point in the history
check simple geometry
  • Loading branch information
Shalucik authored Jun 5, 2024
2 parents b82070a + 001d665 commit 9720e77
Show file tree
Hide file tree
Showing 12 changed files with 120 additions and 27 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ The current checks are (see also the 'show-validations' command):
| RQ2 | Layers must have at least one feature. |
| RQ3 | _LEGACY:_ * Layer features should have an allowed geometry_type (one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON). |
| RQ4 | The geopackage should have no views defined. |
| RQ5 | Geometry should be valid. |
| RQ5 | _LEGACY:_ * Geometry should be valid. |
| RQ6 | Column names must start with a letter, and valid characters are lowercase a-z, numbers or underscores. |
| RQ7 | Tables should have a feature id column with unique index. |
| RQ8 | Geopackage must conform to given JSON or YAML definitions. |
Expand All @@ -104,12 +104,13 @@ The current checks are (see also the 'show-validations' command):
| RQ14 | The geometry_type_name from the gpkg_geometry_columns table must be one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON. |
| RQ15 | All table geometries must match the geometry_type_name from the gpkg_geometry_columns table. |
| RQ16 | _LEGACY:_ * All layer and column names shall not be longer than 53 characters. |
| RQ21 | All layer and column names shall not be longer than 57 characters. |
| RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. |
| RQ23 | Geometry should be valid and simple. |
| RC17 | It is recommended to name all GEOMETRY type columns 'geom'. |
| RC18 | It is recommended to give all GEOMETRY type columns the same name. |
| RC19 | It is recommended to only use multidimensional geometry coordinates (elevation and measurement) when necessary. |
| RC20 | It is recommended that all (MULTI)POLYGON geometries have a counter-clockwise orientation for their exterior ring, and a clockwise direction for all interior rings. |
| RQ21 | All layer and column names shall not be longer than 57 characters. |
| RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. |
| RC20 | It is recommended that all (MULTI)POLYGON geometries have a counter-clockwise orientation for their exterior ring, and a clockwise direction for all interior rings. |
| UNKNOWN_WARNINGS | It is recommended that the unexpected (GDAL) warnings are looked into. |

\* Legacy requirements are only executed with the validate command when explicitly requested in the validation set.
Expand Down
11 changes: 9 additions & 2 deletions geopackage_validator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,16 +420,23 @@ def geopackage_validator_command_generate_table_definitions(
name="show-validations",
help="Show all the possible validations that can be executed in the validate command.",
)
@click.option(
"--no-legacy",
required=False,
is_flag=True,
help="Output without Legacy checks",
)
@click.option(
"--yaml",
required=False,
is_flag=True,
help="Output yaml",
)
@click_log.simple_verbosity_option(logger)
def geopackage_validator_command_show_validations(yaml):
def geopackage_validator_command_show_validations(no_legacy, yaml):
try:
validation_codes = validate.get_validation_descriptions()
legacy = not no_legacy
validation_codes = validate.get_validation_descriptions(legacy)
output.print_output(validation_codes, yaml, yaml_indent=5)
except Exception:
logger.exception("Error while listing validations")
Expand Down
17 changes: 14 additions & 3 deletions geopackage_validator/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@

RQ0 = "RQ0"
RQ3 = "RQ3"
RQ5 = "RQ5"
RQ8 = "RQ8"
RQ12 = "RQ12"
RQ16 = "RQ16"


# Drop legacy requirements
DROP_LEGACY_RQ_FROM_ALL = [RQ0, RQ3, RQ12, RQ16]
DROP_LEGACY_RQ_FROM_ALL = [RQ0, RQ3, RQ5, RQ12, RQ16]


def validators_to_use(
Expand Down Expand Up @@ -193,10 +194,20 @@ def gdal_error_handler(err_class, err_num, error):
)


def get_validation_descriptions():
def get_validation_descriptions(legacy):
validation_classes = get_validator_classes()

if legacy:
return OrderedDict(
(klass.validation_code, klass.__doc__) for klass in validation_classes
)

rq_drop_list = DROP_LEGACY_RQ_FROM_ALL

return OrderedDict(
(klass.validation_code, klass.__doc__) for klass in validation_classes
(klass.validation_code, klass.__doc__)
for klass in validation_classes
if klass.validation_code not in rq_drop_list
)


Expand Down
6 changes: 5 additions & 1 deletion geopackage_validator/validations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
GpkgGeometryTypeNameValidator,
GeometryTypeEqualsGpkgDefinitionValidator,
)
from geopackage_validator.validations.geometry_valid_check import ValidGeometryValidator
from geopackage_validator.validations.geometry_valid_check import (
ValidGeometryValidator,
ValidGeometryValidatorV0,
)
from geopackage_validator.validations.layerfeature_check import (
OGRIndexValidator,
NonEmptyLayerValidator,
Expand Down Expand Up @@ -45,6 +48,7 @@
"FeatureIdValidator",
"GeometryTypeValidator",
"ValidGeometryValidator",
"ValidGeometryValidatorV0",
"OGRIndexValidator",
"NonEmptyLayerValidator",
"LayerNameValidator",
Expand Down
2 changes: 1 addition & 1 deletion geopackage_validator/validations/geometry_type_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def aggregate(results):


class GeometryTypeValidator(validator.Validator):
"""Layer features should have an allowed geometry_type (one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON)."""
"""LEGACY: * Layer features should have an allowed geometry_type (one of POINT, LINESTRING, POLYGON, MULTIPOINT, MULTILINESTRING, or MULTIPOLYGON)."""

code = 3
level = validator.ValidationLevel.ERROR
Expand Down
58 changes: 51 additions & 7 deletions geopackage_validator/validations/geometry_valid_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
from geopackage_validator.validations import validator
from geopackage_validator import utils


SQL_TEMPLATE = """SELECT reason, count(reason) AS count, row_id
SQL_ONLY_VALID_TEMPLATE = """SELECT reason, count(reason) AS count, row_id
FROM(
SELECT
CASE INSTR(ST_IsValidReason("{column_name}"), '[')
Expand All @@ -16,28 +15,73 @@
)
GROUP BY reason;"""

SQL_VALID_TEMPLATE = """SELECT reason, count(reason) AS count, row_id
FROM(
SELECT
CASE ST_IsValid("{column_name}")
WHEN 0
THEN
CASE INSTR(ST_IsValidReason("{column_name}"), '[')
WHEN 0
THEN ST_IsValidReason("{column_name}")
ELSE substr(ST_IsValidReason("{column_name}"), 0, INSTR(ST_IsValidReason("{column_name}"), '['))
END
ELSE
CASE ST_IsSimple("{column_name}")
WHEN 0
THEN 'Not Simple'
END
END AS reason,
cast(rowid AS INTEGER) AS row_id
FROM "{table_name}" WHERE ST_IsValid("{column_name}") = 0 OR ST_IsSimple("{column_name}") = 0
)
GROUP BY reason;"""

def query_geometry_valid(dataset) -> Iterable[Tuple[str, str, str, int]]:

def query_geometry_valid(dataset, sql_template) -> Iterable[Tuple[str, str, str, int]]:
columns = utils.dataset_geometry_tables(dataset)

for table_name, column_name, _ in columns:
validations = dataset.ExecuteSQL(
SQL_TEMPLATE.format(table_name=table_name, column_name=column_name)
sql_template.format(table_name=table_name, column_name=column_name)
)
for reason, count, row_id in validations:
yield table_name, column_name, reason, count, row_id
dataset.ReleaseResultSet(validations)


class ValidGeometryValidator(validator.Validator):
"""Geometries should be valid."""
class ValidGeometryValidatorV0(validator.Validator):
"""Legacy: * Geometries should be valid."""

code = 5
level = validator.ValidationLevel.ERROR
message = "Found invalid geometry in table: {table_name}, column {column_name}, reason: {reason}, {count} {count_label}, example id {row_id}"

def check(self) -> Iterable[str]:
result = query_geometry_valid(self.dataset)
result = query_geometry_valid(self.dataset, SQL_ONLY_VALID_TEMPLATE)

return [
self.message.format(
table_name=table_name,
column_name=column_name,
reason=reason,
count=count,
count_label=("time" if count == 1 else "times"),
row_id=row_id,
)
for table_name, column_name, reason, count, row_id in result
]


class ValidGeometryValidator(validator.Validator):
"""Geometries should be valid and simple."""

code = 23
level = validator.ValidationLevel.ERROR
message = "Found invalid geometry in table: {table_name}, column {column_name}, reason: {reason}, {count} {count_label}, example id {row_id}"

def check(self) -> Iterable[str]:
result = query_geometry_valid(self.dataset, SQL_VALID_TEMPLATE)

return [
self.message.format(
Expand Down
4 changes: 2 additions & 2 deletions geopackage_validator/validations/name_length_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def query_names(dataset) -> Iterable[Tuple[str, str, int]]:


class NameLengthValidatorV0(validator.Validator):
f"""All names must be maximally {LEGACY_MAX_LENGTH} characters long."""
"""LEGACY: * All names must be maximally 53 characters long."""

code = 16
level = validator.ValidationLevel.ERROR
Expand All @@ -42,7 +42,7 @@ def check_columns(cls, names: Iterable[Tuple[str, str, int]]) -> List[str]:


class NameLengthValidator(validator.Validator):
f"""All names must be maximally {MAX_LENGTH} characters long."""
"""All names must be maximally 57 characters long."""

code = 21
level = validator.ValidationLevel.ERROR
Expand Down
2 changes: 1 addition & 1 deletion geopackage_validator/validations/srs_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def srs_equal_check_query(dataset) -> Iterable[str]:


class SrsValidatorV0(validator.Validator):
"""Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3038, 3039, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3050, 3051, 4258, 4936, 4937, 5730, 7409."""
"""LEGACY: * Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3038, 3039, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3050, 3051, 4258, 4936, 4937, 5730, 7409."""

code = 12
level = validator.ValidationLevel.ERROR
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def check_table_definitions(self, definitions_current: TableDefinition):


class TableDefinitionValidatorV0(validator.Validator):
"""Geopackage must conform to table names in the given JSON definitions."""
"""LEGACY: * Geopackage must conform to table names in the given JSON definitions."""

code = 0
level = validator.ValidationLevel.ERROR
Expand Down
Binary file added tests/data/test_geometry_simple.gpkg
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def test_determine_validations_to_use_none():
"RQ1",
"RQ2",
"RQ4",
"RQ5",
"RQ6",
"RQ7",
"RQ9",
Expand All @@ -35,6 +34,7 @@ def test_determine_validations_to_use_none():
"RQ15",
"RQ21",
"RQ22",
"RQ23",
"RC17",
"RC18",
"RC19",
Expand Down
34 changes: 30 additions & 4 deletions tests/validations/test_geometry_valid_check.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,36 @@
from geopackage_validator.utils import open_dataset
from geopackage_validator.validations.geometry_valid_check import query_geometry_valid
from geopackage_validator.validations.geometry_valid_check import (
query_geometry_valid,
SQL_ONLY_VALID_TEMPLATE,
SQL_VALID_TEMPLATE,
)


def test_with_gpkg():
def test_with_gpkg_valid():
dataset = open_dataset("tests/data/test_geometry_valid.gpkg")
checks = list(query_geometry_valid(dataset))
checks = list(query_geometry_valid(dataset, SQL_ONLY_VALID_TEMPLATE))
assert len(checks) == 1
assert checks[0][0] == "test_geometry_valid"
assert checks[0][1] == "geometry"
assert checks[0][2] == "Self-intersection"
assert checks[0][3] == 1
assert checks[0][4] == 1


def test_with_gpkg_simple():
dataset = open_dataset("tests/data/test_geometry_simple.gpkg")
checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))
assert len(checks) == 1
assert checks[0][0] == "test_geometry_simple"
assert checks[0][1] == "geometry"
assert checks[0][2] == "Not Simple"
assert checks[0][3] == 1
assert checks[0][4] == 1


def test_with_gpkg_valid_simple():
dataset = open_dataset("tests/data/test_geometry_valid.gpkg")
checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))
assert len(checks) == 1
assert checks[0][0] == "test_geometry_valid"
assert checks[0][1] == "geometry"
Expand All @@ -15,5 +41,5 @@ def test_with_gpkg():

def test_with_gpkg_allcorrect():
dataset = open_dataset("tests/data/test_allcorrect.gpkg")
checks = list(query_geometry_valid(dataset))
checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE))
assert len(checks) == 0

0 comments on commit 9720e77

Please sign in to comment.