From 197a936f3a047a2eee7168bc4db4e80b73c77c6b Mon Sep 17 00:00:00 2001 From: Roel Arents Date: Wed, 13 Nov 2024 13:46:29 +0100 Subject: [PATCH 1/5] add support for (validating and generating) (unique) indices and foreign keys in table definitions PDOK-16629 --- geopackage_validator/cli.py | 42 ++++-- geopackage_validator/generate.py | 115 +++++++++++++-- geopackage_validator/models.py | 98 ++++++++++--- geopackage_validator/output.py | 2 +- geopackage_validator/utils.py | 29 +++- .../validations/table_definitions_check.py | 106 ++++++++++++-- geopackage_validator/validations/validator.py | 4 +- pyproject.toml | 1 + .../test_allcorrect_with_indexes_and_fks.gpkg | Bin 0 -> 126976 bytes ...orrect_with_indexes_and_fks_definition.yml | 87 ++++++++++++ ...est_changed_indexes_and_fks_definition.yml | 87 ++++++++++++ tests/test_cli.py | 131 +++++++++++++++++- .../test_table_definitions_check.py | 41 ++++-- 13 files changed, 661 insertions(+), 82 deletions(-) create mode 100644 tests/data/test_allcorrect_with_indexes_and_fks.gpkg create mode 100644 tests/data/test_allcorrect_with_indexes_and_fks_definition.yml create mode 100644 tests/data/test_changed_indexes_and_fks_definition.yml diff --git a/geopackage_validator/cli.py b/geopackage_validator/cli.py index e0ae4c4..c32684f 100644 --- a/geopackage_validator/cli.py +++ b/geopackage_validator/cli.py @@ -2,10 +2,10 @@ """Main CLI entry for the Geopackage validator tool.""" # Setup logging before package imports. import logging -from datetime import datetime -from pathlib import Path import sys import time +from datetime import datetime +from pathlib import Path import click import click_log @@ -302,6 +302,13 @@ def geopackage_validator_command( is_flag=True, help="Output yaml", ) +@click.option( + "--with-indexes-and-fks", + default=False, + required=False, + is_flag=True, + help="Include indexes (and unique constraints) and foreign keys in the definitions", +) @click.option( "--s3-endpoint-no-protocol", envvar="S3_ENDPOINT_NO_PROTOCOL", @@ -367,17 +374,18 @@ def geopackage_validator_command( ) @click_log.simple_verbosity_option(logger) def geopackage_validator_command_generate_table_definitions( - gpkg_path, - yaml, - s3_endpoint_no_protocol, - s3_access_key, - s3_secret_key, - s3_bucket, - s3_key, - s3_secure, - s3_virtual_hosting, - s3_signing_region, - s3_no_sign_request, + gpkg_path: Path, + yaml: bool, + with_indexes_and_fks: bool, + s3_endpoint_no_protocol: str, + s3_access_key: str, + s3_secret_key: str, + s3_bucket: str, + s3_key: str, + s3_secure: bool, + s3_virtual_hosting: bool, + s3_signing_region: str, + s3_no_sign_request: bool, ): gpkg_path_not_exists = s3_endpoint_no_protocol is None and ( gpkg_path is None @@ -399,7 +407,9 @@ def geopackage_validator_command_generate_table_definitions( s3_signing_region=s3_signing_region, s3_no_sign_request=s3_no_sign_request, ) - definitionlist = generate.generate_definitions_for_path(gpkg_path) + definitionlist = generate.generate_definitions_for_path( + gpkg_path, with_indexes_and_fks + ) else: with s3.minio_resource( s3_endpoint_no_protocol, @@ -409,7 +419,9 @@ def geopackage_validator_command_generate_table_definitions( s3_key, s3_secure, ) as localfilename: - definitionlist = generate.generate_definitions_for_path(localfilename) + definitionlist = generate.generate_definitions_for_path( + localfilename, with_indexes_and_fks + ) output.print_output(definitionlist, yaml) except Exception: logger.exception("Error while generating table definitions") diff --git a/geopackage_validator/generate.py b/geopackage_validator/generate.py index 3fbd116..403a7e9 100644 --- a/geopackage_validator/generate.py +++ b/geopackage_validator/generate.py @@ -1,20 +1,24 @@ import logging -from typing import List +from typing import List, Optional, Dict -from osgeo.ogr import DataSource +from osgeo.ogr import DataSource, Layer from geopackage_validator import __version__ from geopackage_validator import utils from geopackage_validator.models import ( ColumnDefinition, + ColumnMapping, + ForeignKeyDefinition, + IndexDefinition, TableDefinition, TablesDefinition, ) +from geopackage_validator.utils import group_by logger = logging.getLogger(__name__) -def columns_definition(table, geometry_column) -> List[ColumnDefinition]: +def column_definitions(table, geometry_column) -> List[ColumnDefinition]: layer_definition = table.GetLayerDefn() assert layer_definition, f'Invalid Layer {"" if not table else table.GetName()}' @@ -40,8 +44,85 @@ def fid_column_definition(table) -> List[ColumnDefinition]: return [ColumnDefinition(name=name, type="INTEGER")] +def get_index_definitions( + dataset: DataSource, table_name: str +) -> List[IndexDefinition]: + index_definitions: List[IndexDefinition] = [] + index_list = dataset.ExecuteSQL( + f"select name, \"unique\", origin from pragma_index_list('{table_name}');" + ) + pk_in_index_list = False + for index_listing in index_list: + pk_in_index_list = pk_in_index_list or index_listing["origin"] == "pk" + index_definitions.append( + IndexDefinition( + columns=tuple(get_index_column_names(dataset, index_listing["name"])), + unique=bool(int(index_listing["unique"])), + ) + ) + dataset.ReleaseResultSet(index_list) + index_definitions = sorted(index_definitions, key=lambda d: d.columns) + + if not pk_in_index_list: + pk_index = get_pk_index(dataset, table_name) + if pk_index is not None: + index_definitions.insert(0, pk_index) + + return index_definitions + + +def get_pk_index(dataset: DataSource, table_name: str) -> Optional[IndexDefinition]: + pk_columns = dataset.ExecuteSQL( + f"select name from pragma_table_info('{table_name}') where pk;" + ) + column_names = tuple(r["name"] for r in pk_columns) + if len(column_names) == 0: + return None + return IndexDefinition(columns=column_names, unique=True) + + +def get_index_column_names(dataset: DataSource, index_name: str) -> List[str]: + index_info = dataset.ExecuteSQL( + f"select name from pragma_index_info('{index_name}');" + ) + column_names: List[str] = [r["name"] for r in index_info] + dataset.ReleaseResultSet(index_info) + return column_names -def generate_table_definitions(dataset: DataSource) -> TablesDefinition: + +def get_foreign_key_definitions(dataset, table_name) -> List[ForeignKeyDefinition]: + foreign_key_list = dataset.ExecuteSQL( + f'select id, seq, "table", "from", "to" from pragma_foreign_key_list(\'{table_name}\');' + ) + foreign_key_definitions: List[ForeignKeyDefinition] = [] + for foreign_key_listing in group_by(foreign_key_list, lambda r: r["id"]): + table: str = "" + columns: Dict[str, str] = {} + for column_reference in foreign_key_listing: + table = column_reference["table"] + to = column_reference["to"] + if to is None: + pk_index = get_pk_index(dataset, column_reference["table"]) + to = pk_index.columns[int(column_reference["seq"])] + columns[column_reference["from"]] = to + foreign_key_definitions.append( + ForeignKeyDefinition( + table=table, + columns=tuple( + ColumnMapping(src=c[0], dst=c[1]) for c in columns.items() + ), + ) + ) + foreign_key_definitions = sorted( + foreign_key_definitions, key=lambda fk: (fk.table, (c.src for c in fk.columns)) + ) + dataset.ReleaseResultSet(foreign_key_list) + return foreign_key_definitions + + +def generate_table_definitions( + dataset: DataSource, with_indexes_and_fks: bool = False +) -> TablesDefinition: projections = set() table_geometry_types = { table_name: geometry_type_name @@ -50,6 +131,7 @@ def generate_table_definitions(dataset: DataSource) -> TablesDefinition: table_list: List[TableDefinition] = [] for table in dataset: + table: Layer geo_column_name = table.GetGeometryColumn() if geo_column_name == "": continue @@ -59,11 +141,21 @@ def generate_table_definitions(dataset: DataSource) -> TablesDefinition: "name": geo_column_name, "type": table_geometry_types[table_name], } + columns = tuple(column_definitions(table, geometry_column)) + + indexes = None + foreign_keys = None + if with_indexes_and_fks: + indexes = tuple(get_index_definitions(dataset, table_name)) + foreign_keys = tuple(get_foreign_key_definitions(dataset, table_name)) + table_list.append( TableDefinition( name=table_name, geometry_column=geo_column_name, - columns=columns_definition(table, geometry_column), + columns=columns, + indexes=indexes, + foreign_keys=foreign_keys, ) ) @@ -74,16 +166,21 @@ def generate_table_definitions(dataset: DataSource) -> TablesDefinition: result = TablesDefinition( geopackage_validator_version=__version__, projection=int(projections.pop()), - tables=table_list, + tables=tuple(sorted(table_list, key=lambda t: t.name)), ) return result -def generate_definitions_for_path(gpkg_path: str) -> TablesDefinition: +def get_datasource_for_path(gpkg_path: str) -> DataSource: """Starts the geopackage validation.""" utils.check_gdal_version() + return utils.open_dataset(gpkg_path) - dataset = utils.open_dataset(gpkg_path) - return generate_table_definitions(dataset) +def generate_definitions_for_path( + gpkg_path: str, with_indexes_and_fks: bool = False +) -> TablesDefinition: + return generate_table_definitions( + get_datasource_for_path(gpkg_path), with_indexes_and_fks + ) diff --git a/geopackage_validator/models.py b/geopackage_validator/models.py index f3e3cbf..7d6058f 100644 --- a/geopackage_validator/models.py +++ b/geopackage_validator/models.py @@ -1,7 +1,8 @@ import copy -from typing import List, Optional +from typing import Optional, Tuple -from pydantic import BaseModel +from pydantic import BaseModel, Field, field_validator +from semver import Version class Named(BaseModel): @@ -9,39 +10,92 @@ class Named(BaseModel): class ColumnDefinition(Named): + class Config: + frozen = True + type: str +class IndexDefinition(BaseModel): + class Config: + frozen = True + + columns: Tuple[str, ...] = Field(min_length=1) + unique: bool = False + + +class ColumnMapping(BaseModel): + class Config: + frozen = True + + src: str + dst: str + + +class ForeignKeyDefinition(BaseModel): + class Config: + frozen = True + + @field_validator("columns") + @classmethod + def unique_src_columns( + cls, v: Tuple[ColumnMapping, ...] + ) -> Tuple[ColumnMapping, ...]: + src_columns = set() + for c in v: + if c.src in src_columns: + raise ValueError(f"Duplicate src column detected: {c.src}") + src_columns.add(c.src) + return v + + table: str = Field(min_length=1) + columns: Tuple[ColumnMapping, ...] = Field(min_length=1) + + class TableDefinition(Named): + class Config: + frozen = True + geometry_column: str = "geom" - columns: List[ColumnDefinition] = [] + columns: Tuple[ColumnDefinition, ...] = tuple() + """Ordered as in the table (left to right), but with FID and geometry columns always first. + (This order is not validated.)""" + indexes: Optional[Tuple[IndexDefinition, ...]] = None + """None means: don't validate. Empty list means: there should be no indexes.""" + foreign_keys: Optional[Tuple[ForeignKeyDefinition, ...]] = None + """None means: don't validate. Empty list means: there should be no foreign keys.""" class TablesDefinition(BaseModel): + class Config: + frozen = True + geopackage_validator_version: str = "0" projection: Optional[int] - tables: List[TableDefinition] + tables: Tuple[TableDefinition, ...] + """Ordered by table name""" + + def with_indexes_and_fks(self) -> bool: + for table in self.tables: + if table.indexes is not None or table.foreign_keys is not None: + return True + return False -def migrate_tables_definition(old: dict) -> dict: +def migrate_tables_definition(original: dict) -> dict: """Migrate a possibly old tables definition to new schema/model""" - version = old.get("geopackage_validator_version", "0") - # older versions where not versioned (?), so assuming "0" if there is no version - version_tuple = tuple(int(v) for v in version.split(".")) - if version_tuple == (0, 0, 0, "-dev") or version_tuple > ( - 0, - 5, - 8, - ): # no changes after 0.5.8 - return old - new = copy.deepcopy(old) - if version_tuple <= ( - 0, - 5, - 8, - ): # until 0.5.8, column's "type" property was named "data_type" - for t in new.get("tables", []): + # older versions were not versioned (?), so assuming "0.0.0" if there is no version + version = Version.parse(original.get("geopackage_validator_version", "0.0.0")) + if version == Version(0, 0, 0, "dev"): + return original + # nothing changed after v0.5.8 + if version > Version(0, 5, 8): + return original + migrated = copy.deepcopy(original) + # until and including 0.5.8, column's "type" property was named "data_type" + if version <= Version(0, 5, 8): + for t in migrated.get("tables", []): for c in t.get("columns", []): c["type"] = c["data_type"] del c["data_type"] - return new + return migrated diff --git a/geopackage_validator/output.py b/geopackage_validator/output.py index aa2a515..79ceb1b 100644 --- a/geopackage_validator/output.py +++ b/geopackage_validator/output.py @@ -68,7 +68,7 @@ def print_output(python_object, as_yaml, yaml_indent=2): def print_output_pydantic(model: BaseModel, as_yaml: bool, yaml_indent=2): - content = model.model_dump_json(indent=4) + content = model.model_dump_json(indent=4, exclude_none=True) if as_yaml: python_object = yaml.safe_load(content) content = yaml.dump(python_object, indent=yaml_indent, sort_keys=False) diff --git a/geopackage_validator/utils.py b/geopackage_validator/utils.py index 403b392..f745ed7 100644 --- a/geopackage_validator/utils.py +++ b/geopackage_validator/utils.py @@ -1,13 +1,14 @@ -import sys +import json import os +import sys import warnings from contextlib import contextmanager from functools import lru_cache - from pathlib import Path -import json +from typing import List, Tuple, Iterable, Callable import yaml +from osgeo.ogr import DataSource try: from osgeo import ogr, osr, gdal @@ -41,7 +42,7 @@ } -def open_dataset(filename=None, error_handler=None): +def open_dataset(filename=None, error_handler=None) -> DataSource: if error_handler is not None: gdal.UseExceptions() gdal.PushErrorHandler(error_handler) @@ -77,7 +78,7 @@ def check_gdal_version(): @lru_cache(None) -def dataset_geometry_tables(dataset): +def dataset_geometry_tables(dataset: ogr.DataSource) -> List[Tuple[str, str, str]]: """ Generate a list of geometry type names from the gpkg_geometry_columns table. """ @@ -110,3 +111,21 @@ def set_gdal_env(**kwargs): v = gdal_argument_mapping.get(v, v) if gdal_env_parameter not in os.environ: gdal.SetConfigOption(gdal_env_parameter, v) + + +def group_by( + iterable: Iterable[any], grouper: Callable[[any], any] +) -> Iterable[List[any]]: + first = True + current_id: any = None + group: List[any] = [] + for item in iterable: + group_id = grouper(item) + if not first and group_id != current_id: + yield group + group = [] + current_id = group_id + group.append(item) + first = False + if len(group) > 0: + yield group diff --git a/geopackage_validator/validations/table_definitions_check.py b/geopackage_validator/validations/table_definitions_check.py index 344a239..37e5dcd 100644 --- a/geopackage_validator/validations/table_definitions_check.py +++ b/geopackage_validator/validations/table_definitions_check.py @@ -1,5 +1,8 @@ from typing import Iterable, List, Dict, Set, Tuple +from osgeo.ogr import DataSource +from pydantic import BaseModel + from geopackage_validator.generate import generate_table_definitions from geopackage_validator.models import ( Named, @@ -11,7 +14,7 @@ def prepare_comparison( - new_: List[Named], old_: List[Named] + new_: Iterable[Named], old_: Iterable[Named] ) -> Tuple[Dict[str, Named], Dict[str, Named], str, str, Set[str]]: new_dict = {item.name: item for item in new_} old_dict = {item.name: item for item in old_} @@ -22,8 +25,8 @@ def prepare_comparison( def compare_column_definitions( - new_columns: List[ColumnDefinition], - old_columns: List[ColumnDefinition], + new_columns: Iterable[ColumnDefinition], + old_columns: Iterable[ColumnDefinition], table_name: str, ) -> List[str]: assert old_columns, f"table {table_name} in table definition misses columns" @@ -48,10 +51,37 @@ def compare_column_definitions( return result + wrong_types +def compare_object_lists( + current: Iterable[object], + expected: Iterable[object], + table_name: str, +) -> List[str]: + messages: List[str] = [] + added = set(current) + for o in expected: + if o in added: + added.remove(o) + else: + messages.append( + f"table {table_name} misses {o.__class__.__name__}: {o_repr_oneline(o)}" + ) + for o in added: + messages.append( + f"table {table_name} has extra {o.__class__.__name__}: {o_repr_oneline(o)}" + ) + return messages + + +def o_repr_oneline(o: object) -> str: + r = repr(o) if not isinstance(o, BaseModel) else o.model_dump_json(indent=0) + return r.replace("\n", " ") + + def compare_table_definitions( new_definition: TablesDefinition, old_definition: TablesDefinition, compare_columns=True, + compare_indexes_and_fks=True, ) -> List[str]: results = [] @@ -84,10 +114,39 @@ def compare_table_definitions( results += compare_column_definitions( new_table.columns, old_table.columns, table_name ) + if compare_indexes_and_fks: + if old_table.indexes is None: + results.append( + f"index checking is enabled but {table_name} misses the list" + ) + else: + results += compare_object_lists( + new_table.indexes, old_table.indexes, table_name + ) + if old_table.foreign_keys is None: + results.append( + f"foreign keys checking is enabled but {table_name} misses the list" + ) + else: + results += compare_object_lists( + new_table.foreign_keys, old_table.foreign_keys, table_name + ) return results +def check_foreign_keys(datasource: DataSource, table_name: str) -> List[str]: + messages: List[str] = [] + foreign_key_violations = datasource.ExecuteSQL( + f"select rowid, parent, fkid from pragma_foreign_key_check('{table_name}');" + ) + for violation in foreign_key_violations: + messages.append( + f"foreign key violation in {table_name} for fk {violation['fkid']} to {violation['parent']} on row {violation['rowid']}" + ) + return messages + + class TableDefinitionValidator(validator.Validator): """Geopackage must conform to given JSON definitions.""" @@ -96,19 +155,39 @@ class TableDefinitionValidator(validator.Validator): def __init__(self, dataset, **kwargs): super().__init__(dataset) - self.table_definitions = kwargs.get("table_definitions") + self.table_definitions: TablesDefinition = kwargs.get("table_definitions") def check(self) -> Iterable[str]: - current_definitions = generate_table_definitions(self.dataset) - return self.check_table_definitions(current_definitions) - - def check_table_definitions(self, definitions_current: TablesDefinition): - assert definitions_current is not None - if self.table_definitions is None: return ["Missing '--table-definitions-path' input"] + current_definitions = generate_table_definitions( + self.dataset, self.table_definitions.with_indexes_and_fks() + ) + return ( + self.check_table_definitions(current_definitions) + + self.check_foreign_keys() + ) + + def check_table_definitions( + self, definitions_current: TablesDefinition + ) -> List[str]: + assert definitions_current is not None + return compare_table_definitions( + definitions_current, + self.table_definitions, + compare_indexes_and_fks=self.table_definitions.with_indexes_and_fks(), + ) - return compare_table_definitions(definitions_current, self.table_definitions) + def check_foreign_keys(self) -> List[str]: + messages: List[str] = [] + if not self.table_definitions.with_indexes_and_fks(): + return messages + for table_definition in self.table_definitions.tables: + if table_definition.foreign_keys is None: + messages += f"foreign keys checking is enabled but {table_definition.name} misses the list" + elif len(table_definition.foreign_keys) > 0: + messages += check_foreign_keys(self.dataset, table_definition.name) + return messages class TableDefinitionValidatorV0(validator.Validator): @@ -132,5 +211,8 @@ def check_table_definitions(self, definitions_current: TablesDefinition): return ["Missing '--table-definitions-path' input"] return compare_table_definitions( - definitions_current, self.table_definitions, compare_columns=False + definitions_current, + self.table_definitions, + compare_columns=False, + compare_indexes_and_fks=False, ) diff --git a/geopackage_validator/validations/validator.py b/geopackage_validator/validations/validator.py index 854bea5..2df7c38 100644 --- a/geopackage_validator/validations/validator.py +++ b/geopackage_validator/validations/validator.py @@ -2,6 +2,8 @@ from abc import ABC, abstractmethod from enum import IntEnum +from osgeo.ogr import DataSource + class ValidationLevel(IntEnum): UNKNOWN_ERROR = 0 @@ -58,7 +60,7 @@ class Validator(ABC): message: str def __init__(self, dataset, **kwargs): - self.dataset = dataset + self.dataset: DataSource = dataset def validate(self) -> Dict[str, List[str]]: """Run validation at geopackage.""" diff --git a/pyproject.toml b/pyproject.toml index f81d430..364bb18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "minio", "pydantic >= 2.9", "pyyaml", + "semver >= 3.0.2", ] requires-python = ">=3.6" diff --git a/tests/data/test_allcorrect_with_indexes_and_fks.gpkg b/tests/data/test_allcorrect_with_indexes_and_fks.gpkg new file mode 100644 index 0000000000000000000000000000000000000000..6899d96b26871fb01aeccd23dc584ff7fef3a620 GIT binary patch literal 126976 zcmeI5Z)_XqeaG)aN|am+`}Mxz)}vMA~w?YOpVonJ}TTidj2 z=6cz4&)t#7BlX9YYsK?+`91$Wzu)h9?s<;9XW`3b4Q1U62gt%^w=eaY4 za2)p*eI28(C+Lf(uivLHCw-X*4eNheJDHh(cE${}y}!i`z4S&i4Qz7%T#Ndg`2gr)aM3gPruRv;7xbm!tFTt`+(L2MB-w z2!H?xfB*=900?{r3B1-{{^6O{5ZBZ6n@72+yr>kjg@U4z<0L*!qT;$Rzc6$4%zQld z%(;at-Ub5Rz>E-^sg3jorl%5%b5?n8U}64(5RXNsuXxXj#iE!a!O78}H!vQWmMo zj3SEix|Ck=28J7mPlktsn)qTglDOi{h|5%*N)8PNCq~CYBjJ&;U^qM)3N=U?4vvg8 zuES`h4zXv1bBRbSdc_-iF(+LuiOB_3RK&uvl3H7l(q3B6srb~KkPzZmyt$MrsilmV zEM$`fMOv0*Z(wYMQWy`8509~9ye2_flvO2_OHx%y87ZZS4}~Tt#zT|F5v~z0rc*gF znN6kDf}*uCOpoD-TA6GrR}_g8a z1{?7j)=^Jgj!^Rv%p!!Tg#=Y33yfBBjmF}M3yt(k(S|kbzs|W%aXqca@{4jt%u2GD zA8^XL4v{Y+__2G&bJIdMAcM!;P(XJTx+{#oR{^V#=R2 z%-#G2!H?xfB*=900@8p2!H?xfWX5^V8VHz_i?WC zlvevDx}2dV3v?o^WW|(PQpDoO@W|+3FgO?tC4yroLK7#(LPO!v@bg#CoNyfQ)h5iB za;lWfu5OujbbM$e9HeQ(O$YjF)6%cli_)^ZMbhyTq0rFCWQZnR4kBZI?ZgQ4NX@Z^c$@QKj)P&hdD{LS09^OwK<#vk7N&L^Kf zseJg4H-B2{e)n|iFHe8(`zxVR_cyMdX{S(}?r(AKAG!b4{T}^*0|Y<-1V8`;KmY_l z00ck)1V8`;Kwv)+=yLS()#VK5L5EL2H5Vxy-Htvz#8`T0I^-B=v#lKH^Zy@k?*DXu z;QoR8NBgOPs0#>y00@8p2!H?xfB*=900@8p2!OzD2sj-a@6`S{v_FmJ|JwKedG|*g z{f`3#KmY_l00ck)1V8`;KmY_l00cnbGeF=F-}wGNPjCC<&i`jX!Ds>qfB*=900@8p z2!H?xfB*=900?LV8hrnsJ^#=B3g`Y8_qW_{(hoR500ck)1V8`;KmY_l00ck)1V8`; z?kR!CoUYy;Pg84a^;i6KON9JOHaAN5-Vo@jFQRRxUS_*>$?z`F)| zj+)t59~P*K%r4EuqUQ7e-qC*l|MzqS;GKKgKx762AOHd&00JNY0w4eaAOHd&00JOz zCj=gG_V!pF_NPtZorj%1Bh0!u;OKGo8Nrq%fhLc0pvV5~KP?wL|Nl^w0FWF?VUv_2mnp(%(kb?yR*$1c$fW+G9tAj}Hq5+ptqSrGcC&c@=20P*Sd zBn6qMsTXr1@zvGI=OZskg&b{7k)#W9QB_iste&W?L^>tQ1(m!glAKs95_Kgdlc3K} zE?p34&HJOmrJFVtyNm2 zpC-eGO+MYF!))FK%)7M6YPre%WslX0k-Nlqy0gWzFvxR~oDr`TujV9GOr}a|ft@Gq zzMBl%L%KH4b+|l(gZ%4Z)+gEH?dP4Q`%j&n6^K2S^s`Koaw;#9gm5`QqA~hfoSh9= zno4)25wsq;5?9k(Z56vBO3N!Msf%W#g5Enfj@v5Xnskl&=e1-}dRZjXu|=&xl`83x z8;v8+#i9!d+Oa*PFt;HF&eqJNUxWwPFN#PT~w2oYZY%1-j!$C zttp+{yDdf9T0AdIY->vur5F~G9BF71?UB|y&q^Od>?O374>!9!$B*;nD^wW;iN2hYadO@GxmY~uj4eOp*ly{XtF z^wFciwbT|pscPdwqut_qP*zRCg}4HddQT%cWe7iD+U} z!N$Cs)MS%nXO>K|QYM*Qt)$b^(s5r@L^(~T?>6ZgF{%<{&XrCT)2WOYV9^D2MO3u9 zvBSKYH7jV9VMalvn9I}miVV^EWx{rT)aAEtjaecaF0G9*wtOI~;s)tAr2<6%q?PWk zYb%Q*{Zb}Cm_WNf|CGxU?&HgCY&ODx82Qr6?hQH@qjMH(=utz7k?6E=SqE4ruY;m1 zC?f6L%v$ze3y=-}m;5fzDOy}#UB-IzN_F9%HwrW^1sasAwIr!0Y+N$agys$Y@=N_L zPhTIudAw??Mg^^vJyK(-))>P~nf~p{ub7WrpC+KST0>k@q_yuq(H<6m<@DM*&X_ zBJYm!-hR12hsQ#t36|+tKrd4h@RvVNT%Kp2<;w#*Ra6t-rdTcd@UAPVNpD+4ZOr$V zpE&07%+2v--F&twT`AD%?oRPNyRII~&fCx48WPm%FBkmfBTu+IQ&X+w(>u`Hx@tz9 zo{Pj2i&L{C!4?s=(XW>*E<~a;gyqP37JA`9eVtxoXn!SngMHrcpL)E-b8%qXD*&yl zk&ARREE-pM)qqFK>yNQPXS0trjSbh;Xxr%{HDqm(bysaGbbbE9SIacPm%&uMBVL;+G;xZgj2q(vKTmsq=v^`oMAwZz6SeL2ZIJyg?fn{2<< zqXej^|N4?gUt~VH)qMy_g1L~wnkc0iF~QZR_>P-I&q=b@@(uitiitSx4ms-o-XA7p*s*ydiTeK)|L<_W z$GQL6{hs?H_n&=+nu(@>00@8p2!H?xfB*=900@8p2!O!DO5jL`BizQmd~EqbdxrkT zKEKx0>geTJ6#GW^;ckbcPd_)lYut3G!!ck_tj+&<_wRD_KMoK80T2KI5C8!X009sH z0T2KI5CDPuO+e&Z26_gs_Sz=<+V64MZx{7nYqFn4uc+$UiQ~uD*Vl(=^ja#tnpzfz z=;H#87uUpeaAf%S7plJ>Wa-uw`oO8df}GpXzyJR}=YIcwHxT8400@8p2!H?xfB*=9 z00@8p2!H?x?2$l!GhIBe-|KI*cA($?zs0$4?Xg582LTWO0T2KI5C8!X009sH0T2KI z5ZI3d9K3^TWApz5gB<;j0|Y<-1V8`;KmY_l00cn5O5lxo-pPISNgBi(!Tint`f&B; z@BPVZe{=l1x4!sWe|Od@j?W+f0wA!T325_wu6sY%5Oo6q5C8!X009sH0T2KI5C8!X z009u#D*!5humdIxc1d(%-{PIf#yXJA4xw>FJZreaY| zRv(Ay)!Gv!vDxV%Z&u29N$)8V6)qVkQ_*RCr)IDAYZxzyEJPP)XGtth781!wQOK{U z8*C?MJ>Ks(5KjxULP8+tqOesUvM*1t22vn<}!`pne%rkhe_w5#5$kxz1 zh8Mcm@aVSJjGhKn+XDMuBRo~r;>n5|#7SJ3pPf10wpYe;Q?rZ0f}wED7y`74 zbCP`7s^WlF@43{q+CW`!sgYn!py3OnWsa$HbA7k!9NDWnN9^i6PYk1``}^+b6%WQK zYBWmwkj%g zeTYp`N2b-Nq|?$WVpdc{IV~3TwACVO#0oMgtz}ZGNOw#wrc)U)z@pg$leN0BgLW%5 z6|~C8sl2EOC4|cf?cHodfM~-BnUB%Ly8QO7F-v44t+g@6vzp1OxIy|&sQ}SGX{Gz? z+REZczmy3OCeZFLt6y>*jSaM3-`Ke`%7{5prT=QiP*bVqWy>~Rsi!x6$`~fM?tlup z%#hJbX(I+3KJ<&2J0D@~ok{A&-1W#SF3-RKel%BPeKrnOW_El6@&5%csCOt%GQ8+sP=vDu}WSX2);lx^{fN{)8(sn$z( zhiX!*^=M_-UBQs7+aYno$h(EMZjXio z(f>F=00ck)1V8`;KmY_l00ck)1V8`;c2A)E6#a(%_2=4V<~hfis~q>oZ+!c5{`T#g z|JQo@-R{y)Z~oJV%9Edd@|~OC>@I~?zW=?`bQ?Z>LlGB!|L?b%^vyQ|lW+X(x%W<+ ziH8@?e^d(p#m)ErV186~{GyVHndzUt^V^=cH{U(|_`mx+Z^=KeWd6;8k>s0`zpR|w z^VjG9KjYj#+kFEe2?8Jh0w4eaAOHd&00JNY0w4eaAi(Ya`G4DCC+FePDK))f>Wkk0 zKWKi&I|zUP2!H?xfB*=900@8p2!H?xfWVFf_W%4pf4lPi0{#B~$DI4)9oq!~5C8!X i009sH0T2KI5C8!X009sHfrp4dJMV1cJ%`!-|NjRtlFkSK literal 0 HcmV?d00001 diff --git a/tests/data/test_allcorrect_with_indexes_and_fks_definition.yml b/tests/data/test_allcorrect_with_indexes_and_fks_definition.yml new file mode 100644 index 0000000..0dac533 --- /dev/null +++ b/tests/data/test_allcorrect_with_indexes_and_fks_definition.yml @@ -0,0 +1,87 @@ +geopackage_validator_version: 0.0.0-dev +projection: 28992 +tables: +- name: test_allcorrect + geometry_column: geom + columns: + - name: fid + type: INTEGER + - name: geom + type: POLYGON + - name: foreign_id + type: INTEGER64 + indexes: + - columns: + - fid + unique: true + foreign_keys: + - table: test_foreign + columns: + - src: foreign_id + dst: id +- name: test_foreign + geometry_column: geom + columns: + - name: id + type: INTEGER + - name: geom + type: POINT + - name: name + type: STRING + - name: x + type: INTEGER64 + - name: y + type: INTEGER64 + indexes: + - columns: + - id + unique: true + - columns: + - name + unique: true + - columns: + - x + - y + unique: false + foreign_keys: [] +- name: test_multi_fk + geometry_column: geom + columns: + - name: geom + type: POINT + - name: allcorrect_id + type: INTEGER64 + - name: other_id + type: INTEGER64 + - name: other_name + type: STRING + indexes: [] + foreign_keys: + - table: test_allcorrect + columns: + - src: allcorrect_id + dst: fid + - table: test_other + columns: + - src: other_id + dst: id + - src: other_name + dst: name +- name: test_other + geometry_column: geom + columns: + - name: id + type: INTEGER + - name: geom + type: POINT + - name: name + type: STRING + indexes: + - columns: + - id + unique: true + - columns: + - id + - name + unique: true + foreign_keys: [] diff --git a/tests/data/test_changed_indexes_and_fks_definition.yml b/tests/data/test_changed_indexes_and_fks_definition.yml new file mode 100644 index 0000000..ac17ee7 --- /dev/null +++ b/tests/data/test_changed_indexes_and_fks_definition.yml @@ -0,0 +1,87 @@ +geopackage_validator_version: 0.0.0-dev +projection: 28992 +tables: # purposely shuffled. order is not validated +- name: test_allcorrect + geometry_column: geom + columns: # purposely shuffled. order is not validated + - name: geom + type: POLYGON + - name: foreign_id + type: INTEGER64 + - name: fid + type: INTEGER + indexes: + - columns: + - foo # fid + unique: true + foreign_keys: + - table: unexisting # test_foreign + columns: + - src: foreign_id + dst: id +- name: test_foreign + geometry_column: geom + columns: + - name: id + type: INTEGER + - name: geom + type: POINT + - name: name + type: STRING + - name: x + type: INTEGER64 + - name: y + type: INTEGER64 + indexes: + - columns: + - id + unique: true + - columns: + - name + unique: false # true + - columns: + - x + - y + unique: true # false + foreign_keys: [] +- name: test_other + geometry_column: geom + columns: + - name: id + type: INTEGER + - name: geom + type: POINT + - name: name + type: STRING + indexes: # purposely shuffled. order is not validated + - columns: + - id + - name + unique: true + - columns: + - id + unique: true + foreign_keys: [] +- name: test_multi_fk + geometry_column: geom + columns: + - name: geom + type: POINT + - name: allcorrect_id + type: INTEGER64 + - name: other_id + type: INTEGER64 + - name: other_name + type: STRING + indexes: [] + foreign_keys: +# - table: test_allcorrect +# columns: +# - src: allcorrect_id +# dst: fid + - table: test_other + columns: + - src: other_id + dst: id + - src: other_name + dst: name diff --git a/tests/test_cli.py b/tests/test_cli.py index 97cd58a..d176548 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,5 +1,6 @@ import json +import pytest from click.testing import CliRunner from geopackage_validator import __version__ @@ -58,6 +59,130 @@ def test_generate_definitions_with_gpkg(): assert json.loads(result.output) == expected +@pytest.mark.parametrize( + "gpkg_path, expected", + [ + ( + "tests/data/test_allcorrect.gpkg", + { + "geopackage_validator_version": __version__, + "projection": 28992, + "tables": [ + { + "name": "test_allcorrect", + "geometry_column": "geom", + "columns": [ + {"name": "fid", "type": "INTEGER"}, + {"name": "geom", "type": "POLYGON"}, + ], + "indexes": [{"columns": ["fid"], "unique": True}], + "foreign_keys": [], + } + ], + }, + ), + ( + "tests/data/test_allcorrect_with_indexes_and_fks.gpkg", + { + "geopackage_validator_version": "0.0.0-dev", + "projection": 28992, + "tables": [ + { + "name": "test_allcorrect", + "geometry_column": "geom", + "columns": [ + {"name": "fid", "type": "INTEGER"}, # fid + {"name": "geom", "type": "POLYGON"}, # geom + {"name": "foreign_id", "type": "INTEGER64"}, + ], + "indexes": [ + {"columns": ["fid"], "unique": True}, # PK + ], + "foreign_keys": [ + { + "table": "test_foreign", + "columns": [{"src": "foreign_id", "dst": "id"}], + } + ], + }, + { + "name": "test_foreign", + "geometry_column": "geom", + "columns": [ + {"name": "id", "type": "INTEGER"}, # fid + {"name": "geom", "type": "POINT"}, # geom + {"name": "name", "type": "STRING"}, + {"name": "x", "type": "INTEGER64"}, + {"name": "y", "type": "INTEGER64"}, + ], + "indexes": [ + {"columns": ["id"], "unique": True}, # PK + {"columns": ["name"], "unique": True}, # n comes before x + {"columns": ["x", "y"], "unique": False}, + ], + "foreign_keys": [], + }, + { + "name": "test_multi_fk", + "geometry_column": "geom", + "columns": [ + {"name": "geom", "type": "POINT"}, + {"name": "allcorrect_id", "type": "INTEGER64"}, + {"name": "other_id", "type": "INTEGER64"}, + {"name": "other_name", "type": "STRING"}, + ], + "indexes": [], + "foreign_keys": [ + { + "table": "test_allcorrect", + "columns": [{"src": "allcorrect_id", "dst": "fid"}], + }, + { + "table": "test_other", + "columns": [ + {"src": "other_id", "dst": "id"}, + {"src": "other_name", "dst": "name"}, + ], + }, + ], + }, + { + "name": "test_other", + "geometry_column": "geom", + "columns": [ + {"name": "id", "type": "INTEGER"}, # fid + {"name": "geom", "type": "POINT"}, # geom + {"name": "name", "type": "STRING"}, + ], + "foreign_keys": [], + "indexes": [ + {"columns": ["id"], "unique": True}, # PK + {"columns": ["id", "name"], "unique": True}, + ], + }, + ], + }, + ), + ], +) +def test_generate_definitions_with_indexes_and_fks(gpkg_path: str, expected: dict): + runner = CliRunner() + result = runner.invoke( + cli, + [ + "generate-definitions", + "--gpkg-path", + gpkg_path, + "--with-indexes-and-fks", + ], + ) + + if result.exit_code != 0: + print(result.output) + assert result.exit_code == 0 + assert json.loads(result.output) == expected + + def test_generate_definitions_with_ndimension_geometries(): runner = CliRunner() result = runner.invoke( @@ -84,7 +209,7 @@ def test_generate_definitions_with_ndimension_geometries(): ], }, { - "name": "test_dimensions4", + "name": "test_dimensions3_correct", "geometry_column": "geom", "columns": [ {"name": "fid", "type": "INTEGER"}, @@ -92,7 +217,7 @@ def test_generate_definitions_with_ndimension_geometries(): ], }, { - "name": "test_dimensions4_correct", + "name": "test_dimensions4", "geometry_column": "geom", "columns": [ {"name": "fid", "type": "INTEGER"}, @@ -100,7 +225,7 @@ def test_generate_definitions_with_ndimension_geometries(): ], }, { - "name": "test_dimensions3_correct", + "name": "test_dimensions4_correct", "geometry_column": "geom", "columns": [ {"name": "fid", "type": "INTEGER"}, diff --git a/tests/validations/test_table_definitions_check.py b/tests/validations/test_table_definitions_check.py index d5538bb..b10d0e2 100644 --- a/tests/validations/test_table_definitions_check.py +++ b/tests/validations/test_table_definitions_check.py @@ -1,5 +1,6 @@ from geopackage_validator.generate import ( generate_definitions_for_path, + get_datasource_for_path, ) from geopackage_validator.models import TablesDefinition from geopackage_validator.validate import load_table_definitions @@ -9,19 +10,6 @@ ) -def test_table_definitions_input_is_none(): - current_definitions = generate_definitions_for_path( - "tests/data/test_allcorrect.gpkg" - ) - - diff = TableDefinitionValidator( - None, table_definitions=None - ).check_table_definitions(current_definitions) - - assert len(diff) == 1 - assert diff[0] == "Missing '--table-definitions-path' input" - - def test_table_definitions_check_correct(): current_definitions = generate_definitions_for_path( "tests/data/test_allcorrect.gpkg" @@ -35,7 +23,7 @@ def test_table_definitions_check_correct(): None, table_definitions=table_definitions ).check_table_definitions(current_definitions) - assert len(diff) == 0 + assert diff == [] def test_table_definitions_check_incorrect_geometry(): @@ -178,3 +166,28 @@ def test_legacy_table_definitions_check_table_changed(): "missing table(s): test_allcorrect", "extra table(s): test_different_table", ] + + +def test_table_definitions_check_changed_indexes_and_fks(): + datasource = get_datasource_for_path( + "tests/data/test_allcorrect_with_indexes_and_fks.gpkg" + ) + table_definitions = load_table_definitions( + "tests/data/test_changed_indexes_and_fks_definition.yml" + ) + + diff = TableDefinitionValidator( + dataset=datasource, table_definitions=table_definitions + ).check() + + assert set(diff) == { + 'table test_allcorrect has extra ForeignKeyDefinition: { "table": "test_foreign", "columns": [ { "src": "foreign_id", "dst": "id" } ] }', + 'table test_allcorrect has extra IndexDefinition: { "columns": [ "fid" ], "unique": true }', + 'table test_allcorrect misses ForeignKeyDefinition: { "table": "unexisting", "columns": [ { "src": "foreign_id", "dst": "id" } ] }', + 'table test_allcorrect misses IndexDefinition: { "columns": [ "foo" ], "unique": true }', + 'table test_foreign has extra IndexDefinition: { "columns": [ "name" ], "unique": true }', + 'table test_foreign has extra IndexDefinition: { "columns": [ "x", "y" ], "unique": false }', + 'table test_foreign misses IndexDefinition: { "columns": [ "name" ], "unique": false }', + 'table test_foreign misses IndexDefinition: { "columns": [ "x", "y" ], "unique": true }', + 'table test_multi_fk has extra ForeignKeyDefinition: { "table": "test_allcorrect", "columns": [ { "src": "allcorrect_id", "dst": "fid" } ] }', + } From 6f573463177ca4402e57ca5ddf33b4d6e8fd2b83 Mon Sep 17 00:00:00 2001 From: Roel Arents Date: Mon, 18 Nov 2024 10:42:54 +0100 Subject: [PATCH 2/5] add test for foreign key violation PDOK-16629 --- tests/data/test_foreign_key_violation.gpkg | Bin 0 -> 126976 bytes .../test_table_definitions_check.py | 16 ++++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 tests/data/test_foreign_key_violation.gpkg diff --git a/tests/data/test_foreign_key_violation.gpkg b/tests/data/test_foreign_key_violation.gpkg new file mode 100644 index 0000000000000000000000000000000000000000..f8777f212efa0874e02ccec00b5bdcf05d9590e5 GIT binary patch literal 126976 zcmeI5ZEPDyddGK3N|a<tpW}GMi%Kz~WKq-`?f7ijTFXo%Ql_ZbiW{&Dxs(hkiK}Mco!?n+wtp1&S7E?{bIM1rGNm{cyMfZF+sVLoYy4AUU-6 z+DmrX>A+3@y*TwNXcbX86 zmO`# zI0XR^009sH0T2KI5C8!X009sH0TB3A5a@cE=IYtMkN$Oa{DSLr?t8m)nSQ_l0w4ea zAOHd&00JNY0-r(xulH4caJntbbvu6ZFgIBcl~S%)R8(?|B&JAATovZ#r>~x#OT?c& zJAc{NM8FrE7UI+Ok-p&6WO8BFD(?%<&z%<%@#xfL-x;w~67wW9F%t3x$HL>|;mG)4 zXmm6<9Es5P;fc`jL~wE;c|M+qCKoUJv^w~LzVV@<5#Nh1Hi;PgK=IxnVj z>5N)bv^GZQF*IH;lS}7IVoFSxR7qa)1*s;6hKA~L(IsSR7|Tp#WGu`?bbd8w{YCeKlgfKauq>5yLv1+cdcp`benSLqSh=%>wIrmAfyX|OULC%UfNfxtY zsmhhiC+1`Em{2z3svoRE8EV~GNlw#yy^5HlYLPP{DXo=Mv2ghl^KhTwy7{A5YNce- ziYk^UZL^S1s+m_mkebjYg{(jRN4fTX>TWlr%8`rH^JHQ~t6o-ArHp3hUH@UalO9=h zC3>0)MJ7h9j%12ZADRe-$Ep8J3=WTu4-bb&$C_F1_)vJLx%G~PhsU&-2k1de`7?(3 z`+}zO_28ns#N5IcG)$$THyF`toHr$pnVv*y^h-pNNmk5Ag>;_2YkpoLMVY9}A~D#P zR+dGjC}r9Fzw-mm^JUK^&lB_m4iEqV5C8!X009sH0T2KI5C8!Xcr*!&yY}=P;r5-> zYVV-S8EUFXC&EfjOsi!@EDaA0j|_xD1EFv-G_G;PGOr?);W{ffOPEy)`s9XlQl4-QX+Y0@kFp8g~K`%b#)q1QK^ z&u5B?B4$)GHETvKn6_bfU}$t8Jd_-oI35~09v&NvghpSub>~ju(l_7y!&~3__>-rU z5C8GjPs&~Io@)E$sqcJuIb81g+SSt?6pG994bJmJ&%b)!qaSd900@8p2!H?xfB*=9 z00@8p2!H?x>?Q)8&K|zDoZ;H<^y{bQB89Wd*{g>bOAn3%&i;1W%7H%r{~qW0PtOOQ z?|FW>n;M9^fB*=900@8p2!H?xfB*=900@8p2yBOd%gOOB?T=IY(`^2)egB{Le8kcJ zI6wddKmY_l00ck)1V8`;KmY_l00ce-1P<`c@Bj1kwm;&=?cI*_qBn@3+5ZKrGV{Sh;%XR+m-hb=- zYUita`Mo#Wm-qbDo-6Lx+K;w;t!33U;rNjM3KiMRCy(vRz5{bte^YIg!f(Vmm&yz&_BF$hr8YA<Og`Jdq3%zg|yL zl!=;tDK8R#L!JD7^0HLS)7F$orYM(GB`wM7@%l<+(z0As$x9;1i=`4#m(wx{`2*zQ zd4blvFD6_ZtgG}hBosJ7gxJ*esV*Js{%v8E3G%T~2BQ=>F0=;*;v{a#)RSwl#rA7J~ zGGy4~Go3ojmTkbiM~keLo7!FWSgRPhM|`LDwR-0VcutbD;bZtuVV|3-xMN%naA`99PAC(q0X#2!ofSSBesT@Xo9xRfNZIDIY5%mgh> zrMuDyT8~_ftLv>cid`0^rDc^gM6*#r@12{+ZIp0Dx<>u;TB;2{dWYQ_kmFi`JQkHi5Qce;TJwmtMU{)+; z6lq13=%j)TxMtJ+{$M^`Qd1c^J6NLQ8=cK0qqDUUj+ladbdr%%rGnV!?_C_|Ee!N# zlfCDU_s$;g&As3c`sL!PKM)97d-5Q$_Ta&SrQ@bimJZuCE;KrWqPc@Ma@JDPlG zm*a7+^Dp-;?%mh%=j~!!yZfuHf8F|8%MV&6U0-qDb~+sERN#4Mr$(*G!Dv?Yea7wW z@8>HQG@9!tjx)!5W0bLF`x}~@6x5v@t)uCyi}Q8#br5>gb}qCwR+~!8<$RurXkt{s z#=M)q*`8SBj}H-vxIDA2eRXi~1$l9ZmXdC5!@nl}V0FZa2< zy}kVUv6`(K6|`3NP@Sb(V+=E8`nM~;YCd*-nt)d74RJ-03Tb7H=}(^zW)!hNhY;N= zSgkeV(NC{w24^o)k6LRM)e*GVS)lT3J#KG2&R0&bhAXNfiVe=fuH07A)H&2G1-x~L zd|S%<`s5-V9*fl`Sf*n^y-ZCYQ28uzd!Kubuk>$KQB8c4Vzual+peT0y=fJ-F+Wgw z@~GQ8JIhye^Vy_yrAVi{Tg7*8yLv1;UmtsGN>Ho6TntnWJ?ZvNPPSD}Z9#A2su^{9 zKAK1_OwN!bTSVAIzdo`sAB{~DmLuy~=!FOMb$X4V{gvc3_IWLE@<^-qLjR^$09scg zm*{9%GOqAy0gqQ!pJ0Q|dM|4l8?I~7w$sPz$l4+suJRgYPQhx%%_d8h5x0K*q?9Q( zToYR6MoZM=G9qc5)6`9ff~3CiKw$1rt2Wv$vUVHmMjD z2~yF(^+m6~$b4d>1wHH9%Dt|SHp#=jLQ2=}tx^wdjkK;3g>s3l+%G9~;zF!5wOJ-rnhkeDjWzX$WvX?RZF=jZ`TbC%+O->^=MU&?z1w4ZkA_-T-CfqB*41qX zD7EkZ^PZn_^gj*|009sH0T2KI5C8!X009sH0T2LzPZ5DVF5b%@%%s)K@J$ve4rHviukw*W3HVk%QC%c_0Dblc+^%)OTr(dlU+L40?Um5)rGOVUKOJyCtO z+Q?pO|HH?o$=VJ>qGbElM^T|}SBotQE zHMWzp9v=u8h^K@ZAt{h^iTEtpbmwy&rj(^>BYo9M1Z!2$;jKSc;TgO)dbbNNY-{LU z!wcVUcy!xqMo)vPZGnBi5uU7R@kG@P5+ota%}kyZv`z2hn=9k_$(aRV-cYz^3_)7O zSxLTRRdG)7W$Q8yE}I5o z+?BeMqh>EOd1gj1tj-klb@X)*der*n6zdn0f;JY)MU}|qe4dGDVsz9hsY+UsRZ>@k zeTYp_N2b-Nrqj|YVop>4K;UC51~#?cHodkZ8jRnTylJy8Py?F-v44t+g@6vzo~jagFquQbD4B(n=3B zw3Wq?J}Da{OrRrBQNQ3m9Pe+tzP5E|loj)$O8?c3p{7#J%a%>NQcrLClrc4X^wektGnHJlv37ghStxeOk zcGxYWYFjIcR7fjpOl#?~T9o9BA{OW+m~IQqHuNm!;xmiW@t7WNDBI!{)g0~SQ>&Nm z4z;9K>(R=vyMiHE%~_La@B=MG*QDtg9f4PV!|grQ!&iFPL&g|8(;#)TE!!gY8$1_c z(F+UO@H{14(%rFHI^FYRi-5(3E6=9f-ku)*da9;JmPxG|N1AAliI|FGdfQNCcEEZS zSL(Z!E%eHIgNCkvHLTVhbOf%iyvE$|#?`IeQQxZGW~gRMx8aU@dea@vF=oT-fj(JW zZScqKUCx<#@5=FuPU4)+Cvm!!-nClCMu5C8!X009sH0T2KI5C8!X0D(uEfZN6M{GC}xD}8Tw-PXSU&wJ)M z`X2`ffB*=900@8p2!H?xfB*=900@A<_6by;rr)r?{(SrN9OpcJmE+!e^P86nckbN! zzqV8Fc9nl}>z_VUp8DkDZ{7NOS2?`=-S3>D+wkcdin!_ff4|M7Z`}+|y!p3h-#cX{ z9-2S*Q91G#x8D1Mxe?jI?i^I{wIc_>`zAP2 Date: Mon, 18 Nov 2024 10:47:31 +0100 Subject: [PATCH 3/5] fix deprecated pydantic model Config PDOK-16629 --- geopackage_validator/models.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/geopackage_validator/models.py b/geopackage_validator/models.py index 7d6058f..07d6edd 100644 --- a/geopackage_validator/models.py +++ b/geopackage_validator/models.py @@ -1,7 +1,7 @@ import copy from typing import Optional, Tuple -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field, field_validator, ConfigDict from semver import Version @@ -10,31 +10,27 @@ class Named(BaseModel): class ColumnDefinition(Named): - class Config: - frozen = True + model_config = ConfigDict(frozen=True) type: str class IndexDefinition(BaseModel): - class Config: - frozen = True + model_config = ConfigDict(frozen=True) columns: Tuple[str, ...] = Field(min_length=1) unique: bool = False class ColumnMapping(BaseModel): - class Config: - frozen = True + model_config = ConfigDict(frozen=True) src: str dst: str class ForeignKeyDefinition(BaseModel): - class Config: - frozen = True + model_config = ConfigDict(frozen=True) @field_validator("columns") @classmethod @@ -53,8 +49,7 @@ def unique_src_columns( class TableDefinition(Named): - class Config: - frozen = True + model_config = ConfigDict(frozen=True) geometry_column: str = "geom" columns: Tuple[ColumnDefinition, ...] = tuple() @@ -67,8 +62,7 @@ class Config: class TablesDefinition(BaseModel): - class Config: - frozen = True + model_config = ConfigDict(frozen=True) geopackage_validator_version: str = "0" projection: Optional[int] From eb2194952774f909a9d369fa9aa0b5f08bcc391d Mon Sep 17 00:00:00 2001 From: Roel Arents Date: Mon, 18 Nov 2024 13:57:58 +0100 Subject: [PATCH 4/5] turn on gdal exceptions in some test_table_definitions_check PDOK-16629 --- geopackage_validator/generate.py | 4 ++-- geopackage_validator/utils.py | 2 +- tests/validations/test_table_definitions_check.py | 11 +++++++++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/geopackage_validator/generate.py b/geopackage_validator/generate.py index 403a7e9..2fcce9b 100644 --- a/geopackage_validator/generate.py +++ b/geopackage_validator/generate.py @@ -172,10 +172,10 @@ def generate_table_definitions( return result -def get_datasource_for_path(gpkg_path: str) -> DataSource: +def get_datasource_for_path(gpkg_path: str, error_handler=None) -> DataSource: """Starts the geopackage validation.""" utils.check_gdal_version() - return utils.open_dataset(gpkg_path) + return utils.open_dataset(gpkg_path, error_handler) def generate_definitions_for_path( diff --git a/geopackage_validator/utils.py b/geopackage_validator/utils.py index f745ed7..b64132c 100644 --- a/geopackage_validator/utils.py +++ b/geopackage_validator/utils.py @@ -42,7 +42,7 @@ } -def open_dataset(filename=None, error_handler=None) -> DataSource: +def open_dataset(filename: str = None, error_handler: Callable = None) -> DataSource: if error_handler is not None: gdal.UseExceptions() gdal.PushErrorHandler(error_handler) diff --git a/tests/validations/test_table_definitions_check.py b/tests/validations/test_table_definitions_check.py index 86f33be..0368658 100644 --- a/tests/validations/test_table_definitions_check.py +++ b/tests/validations/test_table_definitions_check.py @@ -168,9 +168,14 @@ def test_legacy_table_definitions_check_table_changed(): ] +def gdal_error_handler_print_err(err_level, err_no, err_msg): + print(f"GDAL error: err_level: {err_level}\nerr_no: {err_no}\nerr_msg: {err_msg}") + + def test_table_definitions_check_changed_indexes_and_fks(): datasource = get_datasource_for_path( - "tests/data/test_allcorrect_with_indexes_and_fks.gpkg" + "tests/data/test_allcorrect_with_indexes_and_fks.gpkg", + gdal_error_handler_print_err, ) table_definitions = load_table_definitions( "tests/data/test_changed_indexes_and_fks_definition.yml" @@ -194,7 +199,9 @@ def test_table_definitions_check_changed_indexes_and_fks(): def test_table_definitions_check_fk_violation(): - datasource = get_datasource_for_path("tests/data/test_foreign_key_violation.gpkg") + datasource = get_datasource_for_path( + "tests/data/test_foreign_key_violation.gpkg", gdal_error_handler_print_err + ) table_definitions = load_table_definitions( "tests/data/test_allcorrect_with_indexes_and_fks_definition.yml" ) From 7789d7c1e722ed70886d521b63827b6bbc4a9659 Mon Sep 17 00:00:00 2001 From: Roel Arents Date: Mon, 18 Nov 2024 14:24:52 +0100 Subject: [PATCH 5/5] perform foreign key violations check on whole gpkg at once PDOK-16629 --- .../validations/table_definitions_check.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/geopackage_validator/validations/table_definitions_check.py b/geopackage_validator/validations/table_definitions_check.py index 37e5dcd..99aa0bd 100644 --- a/geopackage_validator/validations/table_definitions_check.py +++ b/geopackage_validator/validations/table_definitions_check.py @@ -135,14 +135,18 @@ def compare_table_definitions( return results -def check_foreign_keys(datasource: DataSource, table_name: str) -> List[str]: +def get_foreign_key_violations(datasource: DataSource) -> List[str]: + # This used to be a per-table operation. But it's not due to + # a bug in sqlite: https://sqlite.org/forum/info/30cd7db3d0b2f12e + # used in github ubuntu 20-04: + # https://github.com/actions/runner-images/blob/main/images/ubuntu/Ubuntu2004-Readme.md#installed-apt-packages messages: List[str] = [] foreign_key_violations = datasource.ExecuteSQL( - f"select rowid, parent, fkid from pragma_foreign_key_check('{table_name}');" + f'select "table", rowid, parent, fkid from pragma_foreign_key_check();' ) - for violation in foreign_key_violations: + for v in foreign_key_violations: messages.append( - f"foreign key violation in {table_name} for fk {violation['fkid']} to {violation['parent']} on row {violation['rowid']}" + f"foreign key violation in {v['table']} for fk {v['fkid']} to {v['parent']} on row {v['rowid']}" ) return messages @@ -185,8 +189,7 @@ def check_foreign_keys(self) -> List[str]: for table_definition in self.table_definitions.tables: if table_definition.foreign_keys is None: messages += f"foreign keys checking is enabled but {table_definition.name} misses the list" - elif len(table_definition.foreign_keys) > 0: - messages += check_foreign_keys(self.dataset, table_definition.name) + messages += get_foreign_key_violations(self.dataset) return messages