From 7c013ef1ef94f5bd335354a14a8963fd9b457543 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Mon, 2 Dec 2024 18:08:26 +0100 Subject: [PATCH 01/10] Add RQ24 (geoms should not be empty) --- README.md | 52 ++++++++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 4006591..16f1954 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Geopackages are a data format that have a deliberately broad application, so many of the requirements are dependend on your use. -The PDOK geopackage validator is used by [PDOK](https://www.pdok.nl/). PDOK is part of the Dutch government. This geopackage validator is used to validate a [set of requirements](#what-does-it-do) to make sure geopackages adhere to our standardized ETL pipeline. It is possible to use this for your own purposes as described [here](https://github.com/PDOK/geopackage-validator/issues/115#issuecomment-1529488733). The validations will not change (except for bugfixes); **new validations are always added to the list**. In case you are looking for a more generic validator. These do exist and can be found: +The PDOK geopackage validator is used by [PDOK](https://www.pdok.nl/). PDOK is part of the Dutch government. This geopackage validator is used to validate a [set of requirements](#what-does-it-do) to make sure geopackages adhere to our standardized ETL pipeline. It is possible to use this for your own purposes as described [here](https://github.com/PDOK/geopackage-validator/issues/115#issuecomment-1529488733). The validations will not change (except for bugfixes); **new validations are always added to the list**. In case you are looking for a more generic validator. These do exist and can be found: - [teamengine](https://cite.opengeospatial.org/teamengine) (official OGC, Java) - [teamengine Github](https://github.com/opengeospatial/teamengine) @@ -13,18 +13,18 @@ The PDOK geopackage validator is used by [PDOK](https://www.pdok.nl/). PDOK is p ## Table of Contents -- [geopackage-validator](#geopackage-validator) +- [geopackage-validator](#pdok-geopackage-validator) - [Table of Contents](#table-of-contents) - [What does it do](#what-does-it-do) - [Geopackage versions](#geopackage-versions) - [Installation](#installation) - [Docker](#docker-installation) - [Usage](#usage) - - [RQ8 Validation](#local-rq8-validation) - - [Show validations](#local-show-validations) - - [Generate table definitions](#local-generate-table-definitions) + - [RQ8 Validation](#rq8-validation) + - [Show validations](#show-validations) + - [Generate table definitions](#generate-table-definitions) - [Local development](#local-development) - - [Usage](#usage-1) + - [Docker run](#docker-run) - [Python console](#python-console) - [Code style](#code-style) - [Tests](#tests) @@ -32,14 +32,15 @@ The PDOK geopackage validator is used by [PDOK](https://www.pdok.nl/). PDOK is p ## TL;DR Commands -Either run through [docker](#docker) or [locally](#local). +Either run through [docker](#docker) or [locally](#local). ### Docker + Validate a GeoPackage with the default set of validation rules: ```sh gpkg_path=relative/path/to/the.gpkg -docker run -v "$(pwd)":/gpkg --rm pdok/geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" +docker run -v "$(pwd)":/gpkg --rm pdok/geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" ``` Validate a GeoPackage with the default set of validation rules including a schema: @@ -47,15 +48,15 @@ Validate a GeoPackage with the default set of validation rules including a schem ```sh schema_path=relative/path/to/the/schema.json gpkg_path=relative/path/to/the.gpkg -docker run -v "$(pwd)":/gpkg --rm pdok/geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" --table-definitions-path "/gpkg/${schema_path}" +docker run -v "$(pwd)":/gpkg --rm pdok/geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" --table-definitions-path "/gpkg/${schema_path}" ``` -Generate a schema: +Generate a schema: ```sh schema_path=relative/path/to/the/schema.json gpkg_path=relative/path/to/the.gpkg -docker run -v "$(pwd)":/gpkg --rm pdok/geopackage-validator generate-definitions --gpkg-path "/gpkg/${gpkg_path}" > "$schema_path" +docker run -v "$(pwd)":/gpkg --rm pdok/geopackage-validator generate-definitions --gpkg-path "/gpkg/${gpkg_path}" > "$schema_path" ``` ### Local @@ -64,7 +65,7 @@ For a local setup we require/tested against python > 3.6 and gdal = 3.4. ```sh gpkg_path=relative/path/to/the.gpkg -geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" +geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" ``` Validate a GeoPackage with the default set of validation rules including a schema: @@ -72,15 +73,15 @@ Validate a GeoPackage with the default set of validation rules including a schem ```sh schema_path=relative/path/to/the/schema.json gpkg_path=relative/path/to/the.gpkg -geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" --table-definitions-path "/gpkg/${schema_path}" +geopackage-validator validate --gpkg-path "/gpkg/${gpkg_path}" --table-definitions-path "/gpkg/${schema_path}" ``` -Generate a schema: +Generate a schema: ```sh schema_path=relative/path/to/the/schema.json gpkg_path=relative/path/to/the.gpkg -geopackage-validator generate-definitions --gpkg-path "/gpkg/${gpkg_path}" > "$schema_path" +geopackage-validator generate-definitions --gpkg-path "/gpkg/${gpkg_path}" > "$schema_path" ``` ## What does it do @@ -111,20 +112,22 @@ The current checks are (see also the 'show-validations' command): | RQ21 | All layer and column names shall not be longer than 57 characters. | | RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. | | RQ23 | Geometry should be valid and simple. | +| RQ24 | Geometry should not be empty. | | RC17 | It is recommended to name all GEOMETRY type columns 'geom'. | | RC18 | It is recommended to give all GEOMETRY type columns the same name. | | RC19 | It is recommended to only use multidimensional geometry coordinates (elevation and measurement) when necessary. | -| RC20 | It is recommended that all (MULTI)POLYGON geometries have a counter-clockwise orientation for their exterior ring, and a clockwise direction for all interior rings. | +| RC20 | It is recommended that all (MULTI)POLYGON geometries have a counter-clockwise orientation for their exterior ring, and a clockwise direction for all interior rings. | | UNKNOWN_WARNINGS | It is recommended that the unexpected (GDAL) warnings are looked into. | \* Legacy requirements are only executed with the validate command when explicitly requested in the validation set. -\** Since version 0.8.0 the recommendations are part of the same sequence as the requirements. From now on a check will always maintain the integer part of the code. Even if at a later time the validation type can shift between requirement and recommendation. +\** Since version 0.8.0 the recommendations are part of the same sequence as the requirements. From now on a check will always maintain the integer part of the code. Even if at a later time the validation type can shift between requirement and recommendation. An explanation in Dutch with a reason for each rule can be found [here](https://www.pdok.nl/voor-data-aanbieders#:~:text=Regels%20in%20detail). ## Geopackage versions The Geopackage validator support the following Geopackage versions: + - 1.4 - 1.3.1 - 1.3 @@ -133,11 +136,12 @@ The Geopackage validator support the following Geopackage versions: ## Installation This package requires: + - [GDAL](https://gdal.org/) version >= 3.2.1. - [Spatialite](https://www.gaia-gis.it/fossil/libspatialite/index) version >= 5.0.0 - And python >= 3.8 to run. -We recommend using the docker image. When above requirements are met the package can be installed using pip (`pip install pdok-geopackage-validator`). +We recommend using the docker image. When above requirements are met the package can be installed using pip (`pip install pdok-geopackage-validator`). ### Docker Installation @@ -167,7 +171,7 @@ To validate RQ8 you have to generate definitions first. ```bash docker run -v ${PWD}:/gpkg --rm pdok/geopackage-validator geopackage-validator generate-definitions --gpkg-path /path/to/file.gpkg -```` +``` ### Validate @@ -402,14 +406,14 @@ Options: ## Local development -We advise using docker-compose for local development. This allows live editing and testing code with the correct gdal/ogr version with spatialite 5.0.0. -First build the local image with your machines user id and group id: +We advise using docker-compose for local development. This allows live editing and testing code with the correct gdal/ogr version with spatialite 5.0.0. +First build the local image with your machines user id and group id: ```bash docker-compose build --build-arg USER_ID=`id -u` --build-arg GROUP_ID=`id -g` ``` -### Usage +### Docker run There will be a script you can run like this: @@ -422,7 +426,7 @@ to point the docker-compose to other files, you can add or edit the volumes in t ### Python console -Ipython is available in the docker: +Ipython is available in the docker: ```bash docker-compose run --rm validator ipython @@ -435,7 +439,7 @@ work on it, run the following command periodically: ```bash docker-compose run --rm validator black . -``` +``` ### Tests From 697d16a90aab6a23dccafcdd735783928b8aaf0f Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Mon, 2 Dec 2024 18:08:51 +0100 Subject: [PATCH 02/10] Add validator for empty geoms --- .../validations/geometry_empty_check.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 geopackage_validator/validations/geometry_empty_check.py diff --git a/geopackage_validator/validations/geometry_empty_check.py b/geopackage_validator/validations/geometry_empty_check.py new file mode 100644 index 0000000..447c898 --- /dev/null +++ b/geopackage_validator/validations/geometry_empty_check.py @@ -0,0 +1,44 @@ +from typing import Iterable, Tuple +from geopackage_validator.validations import validator +from geopackage_validator import utils + +SQL_EMPTY_TEMPLATE = """SELECT count(row_id) AS count, row_id +FROM( + SELECT + cast(rowid AS INTEGER) AS row_id + FROM "{table_name}" WHERE ST_IsEmpty("{column_name}") = 1 +);""" + + +def query_geometry_empty(dataset, sql_template) -> Iterable[Tuple[str, str, str, int]]: + columns = utils.dataset_geometry_tables(dataset) + + for table_name, column_name, _ in columns: + validations = dataset.ExecuteSQL( + sql_template.format(table_name=table_name, column_name=column_name) + ) + for count, row_id in validations: + yield table_name, column_name, count, row_id + dataset.ReleaseResultSet(validations) + + +class ValidGeometryValidator(validator.Validator): + """Geometries should not be empty.""" + + code = 24 + level = validator.ValidationLevel.ERROR + message = "Found empty geometry in table: {table_name}, column {column_name}, {count} {count_label}, example id {row_id}" + + def check(self) -> Iterable[str]: + result = query_geometry_empty(self.dataset, SQL_EMPTY_TEMPLATE) + + return [ + self.message.format( + table_name=table_name, + column_name=column_name, + count=count, + count_label=("time" if count == 1 else "times"), + row_id=row_id, + ) + for table_name, column_name, count, row_id in result + ] From 324dd5979e6a6ed36116ed53fa4320aaf2284418 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Mon, 2 Dec 2024 19:14:10 +0100 Subject: [PATCH 03/10] Add EmptyGeometryValidator --- geopackage_validator/validations/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/geopackage_validator/validations/__init__.py b/geopackage_validator/validations/__init__.py index d910f1d..83e530a 100644 --- a/geopackage_validator/validations/__init__.py +++ b/geopackage_validator/validations/__init__.py @@ -10,6 +10,9 @@ ValidGeometryValidator, ValidGeometryValidatorV0, ) +from geopackage_validator.validations.geometry_empty_check import ( + EmptyGeometryValidator, +) from geopackage_validator.validations.layerfeature_check import ( OGRIndexValidator, NonEmptyLayerValidator, @@ -62,6 +65,7 @@ "GpkgGeometryTypeNameValidator", "GeometryTypeEqualsGpkgDefinitionValidator", "PolygonWindingOrderValidator", + "EmptyGeometryValidator", # Recommendations "GeomColumnNameValidator", "GeomColumnNameEqualValidator", From 7b7b3db2ac211f75fa27d74079616b31b2d5add6 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Mon, 2 Dec 2024 19:15:26 +0100 Subject: [PATCH 04/10] Rename class, fix return value --- geopackage_validator/validations/geometry_empty_check.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/geopackage_validator/validations/geometry_empty_check.py b/geopackage_validator/validations/geometry_empty_check.py index 447c898..3b43ba0 100644 --- a/geopackage_validator/validations/geometry_empty_check.py +++ b/geopackage_validator/validations/geometry_empty_check.py @@ -22,7 +22,7 @@ def query_geometry_empty(dataset, sql_template) -> Iterable[Tuple[str, str, str, dataset.ReleaseResultSet(validations) -class ValidGeometryValidator(validator.Validator): +class EmptyGeometryValidator(validator.Validator): """Geometries should not be empty.""" code = 24 @@ -41,4 +41,5 @@ def check(self) -> Iterable[str]: row_id=row_id, ) for table_name, column_name, count, row_id in result + if count > 0 ] From 56f0ceb05787ed362c4aeb6cb4eba15e48ebec15 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Mon, 2 Dec 2024 19:41:10 +0100 Subject: [PATCH 05/10] Add tests for RQ24 --- tests/data/test_geometry_empty.gpkg | Bin 0 -> 106496 bytes tests/test_validate.py | 1 + .../validations/test_geometry_empty_check.py | 25 ++++++++++++++++++ 3 files changed, 26 insertions(+) create mode 100755 tests/data/test_geometry_empty.gpkg create mode 100644 tests/validations/test_geometry_empty_check.py diff --git a/tests/data/test_geometry_empty.gpkg b/tests/data/test_geometry_empty.gpkg new file mode 100755 index 0000000000000000000000000000000000000000..46a35b252bbd7a8566adf9b3c9d56c70a9cd71ab GIT binary patch literal 106496 zcmeI5TWlNId4NgD5~(Ytb!_ggSLlD_t(=v#_Dr9hI0Am~dT(k_A^NFUPw%#Aa{ zp*WI^thM;Zq{TUN=FES-|3CkkbD1-_dnsPz$Y#A!Es7-W8gaSZt`7)txm*MAeH*^o z7J(no{CcDaH-rTfckcb5&}#-9VOYwLGDwOo(ZCbW zH~KF3?0IgUoA3Iy`v=Ao@pqHJ;nIlbQeo2ja#U>aTU%UXYv;)pEAkbN1*J9keeKU4 zUlzAnz9w)D(Hdtl%Ovs)$!C+x%S@I8t;g-s2$EaO?M#*-$#jm%=1C^qdR~w~48kN9 zS4cLq4ks)p(6;|ICd0V?`$GaaQ13{5TC8 zIMN(w3?mM+Nf@Dpg@(ZLC9Yw~Gs*t#Qq|*b=M4D^rF=&vM$3 zJz6_8HLT>fRlCAYQRIsiw!v+(!ht|0)QF+wfnNWk8OIt1@~v8?`WSra@_77nbMEh4 zZWj10u3qKD#sOQZS9Ys4K~}#t(A%nOYn1lN4l60!c?LZZ@nDCui_p$Co|Aksy+KQ% zUTmwaK?Y4jVWk(NxfG1nXB0aO^*v=E3t}p>yKr{NXeEVo@?HTd`erFM#fLIZ5#oY!6SJ|yKYnLVFcA}6{s;*F?x7S~oIue)h zx@x&I26XlO@d2<{e(EGyB^uv}wnSq}!om7*ZBCAh8Kf(jDu8S}~2W)(@ah!b#kyeaZkZX!6o zF*#nH953g`SFVq*ULW6l7z_t%^}S#y6gGPDFfn@YaMj?r;gG>$Tk3_@!qA|_fz})+ z@vgdaT{ze4Ukx6qVNDBaLC|)47P_6Esu;d@Ds91fY1`_s+b!3ATyrys0kGj|qqj{-Ik0^wyQvur*X2t#|VuxgAS z2G&Ubj6!I&XY%;&` zC_rThgadPt$aLVt4{eUj#^x935!w&Rz-5;k99P@pOWT2P#3uUuLL^F~LmM>zC{X6M zAUedn7KzSH&%|R3v6<+?!gM@tbLv7g7PFUOT9+Yn2gZDone?MT=5d98y34USk!x_! zA}{W2^Q8dDnaCzqnLLwy6o5eATt3xsE4Kw&mI8<~-}SJbTs*nDtZJzgB7u=VhG_O)BHef6cXlobwqn^0_1BO7fNJm`c#h3nmv+pVt zx0k#$w!WMr^V3wkGAHsSYUd;WtraKbpk9<tw>zaVoqr# z33^bWOF&k!K=>Ldb0xl7tiWGd&INgxaGN74dtqmrYt;ENZU4J{|LTGdZb$$LAOR$R z1dsp{Kmter2_OL^fCP}h>q_9JdthvAZ0IJHuS*c+i35Tgc{a`}GMCNc&i0VIF~kN^@u0!RP}AOR$R1dsp{Kmuoiz}s%m*ad%= z*Q<^C)0cp*W#ARA+WLRLaru68CbEJ;A^{|T1dsp{Kmter2_OL^fCP{L5nb01`j~ zNB{{S0VIF~kN^@u0!RP}AOWWY==eXj|DDppct`*VAOR$R1dsp{Kmter2_OL^fCSDa z0s8(wZ2zB4-J#S-00|%gB!C2v01`j~NB{{S0VIF~oD#tCf2X7{9uhzTNB{{S0VIF~ zkN^@u0!RP}Ac3<>U}*R=*QjgNHT;LcpAY|d`0Io2!B6_O2Yxj0#Pf~5%RT?nvv+R3 z>(}ldK$OngVSdzesW9n%85JA+))v>;+Ih0YihPA*t3?Rf*Z%DBWpSI`-6Qq*Sj7Vx!2{ z#Or1uO2t~OE|SL_sc?co#O-2@M1vu+zQTay6KQ7snn~yzBpP~;FzKbkr4a?|s3WX= zP2d`$72EN!Drx9glj$6j&7TsZlIG`#@s7`P=^j|;m3A1P*Xvcb!ad_EC$-I%6m*2D zW!5)IGtMc2p4=`CBG_4F#B7y@*f&T-waFX93d|k{Fi&Wa&FiwSmpwKGBPYc7{!p(! zH|ciqwKBIaJgx8|#};?Ry1dU?%Pt$Wh77-09Q61nC*9v(kbRQ%xOIO>EB=XFDTY{& zk_kBxwpOfiB+op^lXM2Yg;XkR$SS-qhM^u=KW6e)?T&79{MNQeS`N##g5sU+kF`5- zhu??txz7sxbB-)!3N%ByNXo$j`-6*_bS@9BJ)I{zPgqf`lC^AdHIdyQcbN?%gRr5D zhQfx^HsQ2tl{L_!wN7$5ZA~FWvdnF8>*+-%r{IQaLF8(pU_c!Ty?D3ZJaM{`lO3J z9{<#oTYA(q%=%UXw59kfSpZ`|Z>vEX4;%DygyY%disEjAgRAV;nw7&6b30K;DK$?h z(c9}UOdW~KcwM#J8G}48-s$rAgF*Mld(FI=@}=!`x8_MZWHhuRG6XG|B1?;sIZl`_ zgVX0Xd9I-xP^>px=7ds%-w}CeRLB*smi1sTTqz17TY}cX7SwOhn#m_un>8HKB2GXf zV^id-+(dAEV{*JYIbP0>uUsEry*|GAFc=Qj>U+UZC~WlPVPf>);i|!L!y$viw$ux) zg`q)<1Fbnu;$3y;x^S-7zZyJJ!{jB?2UDK|g_K)}dm-iQHbN@fozh~!4Y@uD5S9=4LX8TDE)h-d8W=rR; z+C}hrnW{-x+nyl%?p;;vX8-QW8wy*$Gqxu*RFk-`nMXCrb_$d(m;3(Nxl=hSlK$0$ zH$DEbG56ng%eTtfYoxWmqo!NpfEFAe6C|+7mjinLlCirjQOLs^sNu|2#@LlW0_z=gNF- zi>*E@2A~HZk%F{Z;}tjQZM!MbV`!;u^G_ZZX=~Lk#qC|b%&Q60rW_>P<{UM3n-iXK zjlE)NTVy{f78~rdVvVgs&|{7^1*z1!{CXZcouOPyj}8fN0^I3AV!}=Z2BK)HkVdt> z#no(*KRUjX#6|w2veSg+xRylQFw{w~aRrvFP)FRIpU}L>%yBQ`{DkI3qLJfX#qNw9 zj;}y3#4Z$`W8z?+@xpGg!VA#qN zR(gNbd$s429`Cs)-9PER*7ZqOkDGP<+v`((Nf#vXvg>yI()(Vo&*yTDj$XLn^NqrG z#p`x^>34a}HTuC*$gEoz`}f~ee*XTOnH)(cnZo*721fkp7*Qgh;)^gg*I4GLyDW!E zrCy@N1BS03b5FQhITD$Nf3ZkxhJKYjH6%V2nVyP8*I<(Z(iRAo)C4+SN=ZzsW0Oll*SPb`t!Ddv7=Et6V;F}T&_(h`&IEMs&w0-vUo5xT2{s8YHcO_Zb*q zPAqknC>oiC54C*yrPi;9s4{)@ku)UDfka={aG6|(Gl>|$wqi-0NA|T4-X++7w%&$CnhKGfu1C6N?&4a$?+jy%mNOT%+ zwUk+6))U~aCeu(2IhRSdp%xE%Uiu2iWbszVvguoencR)+Kd(l)W@3*ZD z&r6qD1w38>FbWuRAfW2mE&ZMdR3b?H-UZ9d5}i01WzCht(8|YSqHVwbVR_hKlIh<4BpOSttw3ACVJ)a=sfnsZYlBh`2o%z)n-d8H9vx zyMzV$rR87UR}npU00qnq^1P+Va}w1>=R3=xsjEpw3yI0HK->4cB+~W&+`j*H!3Q@a zfCP{L59+axc^N|JyK>?`sVIa`+yO|Ks?- zK7HbtL+v>JkK_N!$Ulz%cbYn+54<$H(zO`@YCkcK|I?Wf^egwFD^smhHZZLU$N!rX z(ArbN@qc-uo<7pA4I1P4f5*vv=EAkPL-w)nn)vu3$Kezw4Ef&FX`>G+uVp9JheNsW!*$HH!GinRThfZ$rJI<*c zJiH3hoMv|BeI^B~)85L$GXb6HsPgE2CM~l?2M$9ZPw*6)x}QkhWzy6GyaI799tJ?1 z8BH8h0dw{cP`;2%FJ-c8^7K_&{5wnKs1ZAGNFS9r4X9k2VeXRSJb%-i%i1y_+IL9j z)LX=%e%tXApgDMWKatKmdh$&*8`Uw=J*y$Xbny#^Z-PkrL#ZdwU2btDxdu-RPA6Q|k2))dTGBOK%_2DgFz#M&)esxF#4Z)16pi$XX)npzP`MkDIdD zO(5?+90BR3E?rWd0f1NJ4c?HR zgG~DK2=ZMMJm;)FK@9gz9q*dt>Hh2RbOe)K$rKz+v{y@{R`vE|Ar44h{X2 z%h$E%^8MrB-q2HDaNyU&-ya_6|GU1gdf(`+^%TxY-Cu{pxFLa8N#L+61WR6xd0%Kt z2&yYzv1P8p!CL61^8=)aq+93ON(IL>qq2C zd3(e3aWbWzqUB0k56@)Da;|i7$d#LfzF0(AhYLC*lyyDT94B*hjei2KLCNLWq`*{n z!~=QVPvv+hbbJ)>4wVW0>fz?K6^nmyWn2dHakV3u`0>HSUl|xM1(^&5M$O`t;W1f? zRtU4OAlX_fvB=Pc(lTv~4O*22c$a{p9=fVp7=&8oYY&VH3sXr~i~Ht4CE|u%L{p&Z zl}5j!81HIF9mS5XqnK4kx3v}D%nsRN#(!cfHmQlefv53h#cq8anM9AzB>L)^9M6DX zQ$t)0$Th>uTqmB4bx4DhUK8k5NvHki9#cUq#ok&-@NIT#@fn6!{6#G z4~=%DH1y35A?e(^9)CRUm7?wBun|GM`2A!yUr3}#zHPAykU|a?%ph_a6FjUo2QO;a zm*2@i|L1E5^6!Jt+FQM})NIH?sIQF@xTm{Zt;AV&-!Ns}a&Uq!a~g&-L(232pSpaX z8Y}^SA^{|T1dsp{Kmter2_OL^fCP{L53XKGi z01`j~NB{{S0VIF~kN^@u0!RP}I3ys?{~wri!3Q@afCP{L558%*{ A!vFvP literal 0 HcmV?d00001 diff --git a/tests/test_validate.py b/tests/test_validate.py index 8d37b8a..df11237 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -35,6 +35,7 @@ def test_determine_validations_to_use_none(): "RQ21", "RQ22", "RQ23", + "RQ24", "RC17", "RC18", "RC19", diff --git a/tests/validations/test_geometry_empty_check.py b/tests/validations/test_geometry_empty_check.py new file mode 100644 index 0000000..00e4649 --- /dev/null +++ b/tests/validations/test_geometry_empty_check.py @@ -0,0 +1,25 @@ +from geopackage_validator.utils import open_dataset +from geopackage_validator.validations.geometry_empty_check import ( + query_geometry_empty, + SQL_EMPTY_TEMPLATE, +) + + +def test_with_gpkg_empty(): + dataset = open_dataset('tests/data/test_geometry_empty.gpkg') + checks = list(query_geometry_empty(dataset, SQL_EMPTY_TEMPLATE)) + assert len(checks) == 1 + assert checks[0][0] == 'stations' + assert checks[0][1] == 'geom' + assert checks[0][2] == 45 + assert checks[0][3] == 129 + + +def test_with_gpkg_allcorrect(): + dataset = open_dataset('tests/data/test_allcorrect.gpkg') + checks = list(query_geometry_empty(dataset, SQL_EMPTY_TEMPLATE)) + assert len(checks) == 1 + assert checks[0][0] == 'test_allcorrect' + assert checks[0][1] == 'geom' + assert checks[0][2] == 0 + assert checks[0][3] is None From 04854cdf9bb3c5c5f2841d3be229b1cbf1161725 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Mon, 2 Dec 2024 19:42:14 +0100 Subject: [PATCH 06/10] Correct return type hint --- geopackage_validator/validations/geometry_empty_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geopackage_validator/validations/geometry_empty_check.py b/geopackage_validator/validations/geometry_empty_check.py index 3b43ba0..8457730 100644 --- a/geopackage_validator/validations/geometry_empty_check.py +++ b/geopackage_validator/validations/geometry_empty_check.py @@ -10,7 +10,7 @@ );""" -def query_geometry_empty(dataset, sql_template) -> Iterable[Tuple[str, str, str, int]]: +def query_geometry_empty(dataset, sql_template) -> Iterable[Tuple[str, str, int, int]]: columns = utils.dataset_geometry_tables(dataset) for table_name, column_name, _ in columns: From 25dd228216f94cf78a0e338e585190ffc1699aaf Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Mon, 2 Dec 2024 19:51:33 +0100 Subject: [PATCH 07/10] Add tests for RQ24 --- tests/validations/test_geometry_empty_check.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/validations/test_geometry_empty_check.py b/tests/validations/test_geometry_empty_check.py index 00e4649..011f9fe 100644 --- a/tests/validations/test_geometry_empty_check.py +++ b/tests/validations/test_geometry_empty_check.py @@ -6,20 +6,20 @@ def test_with_gpkg_empty(): - dataset = open_dataset('tests/data/test_geometry_empty.gpkg') + dataset = open_dataset("tests/data/test_geometry_empty.gpkg") checks = list(query_geometry_empty(dataset, SQL_EMPTY_TEMPLATE)) assert len(checks) == 1 - assert checks[0][0] == 'stations' - assert checks[0][1] == 'geom' + assert checks[0][0] == "stations" + assert checks[0][1] == "geom" assert checks[0][2] == 45 assert checks[0][3] == 129 def test_with_gpkg_allcorrect(): - dataset = open_dataset('tests/data/test_allcorrect.gpkg') + dataset = open_dataset("tests/data/test_allcorrect.gpkg") checks = list(query_geometry_empty(dataset, SQL_EMPTY_TEMPLATE)) assert len(checks) == 1 - assert checks[0][0] == 'test_allcorrect' - assert checks[0][1] == 'geom' + assert checks[0][0] == "test_allcorrect" + assert checks[0][1] == "geom" assert checks[0][2] == 0 assert checks[0][3] is None From 818b0cbb45e239c267252369b1267f9a821faf84 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Tue, 3 Dec 2024 10:08:26 +0100 Subject: [PATCH 08/10] Clarify RQ24 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 16f1954..d111626 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ The current checks are (see also the 'show-validations' command): | RQ21 | All layer and column names shall not be longer than 57 characters. | | RQ22 | Only the following EPSG spatial reference systems are allowed: 28992, 3034, 3035, 3040, 3041, 3042, 3043, 3044, 3045, 3046, 3047, 3048, 3049, 3857, 4258, 4326, 4936, 4937, 5730, 7409. | | RQ23 | Geometry should be valid and simple. | -| RQ24 | Geometry should not be empty. | +| RQ24 | Geometry should not be empty (e.g. 'POINT EMPTY', represented as 'POINT(NaN NaN)'). | | RC17 | It is recommended to name all GEOMETRY type columns 'geom'. | | RC18 | It is recommended to give all GEOMETRY type columns the same name. | | RC19 | It is recommended to only use multidimensional geometry coordinates (elevation and measurement) when necessary. | From 39401aba8252872308a67d56f4d1cd1b02e7aed6 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Tue, 3 Dec 2024 10:10:27 +0100 Subject: [PATCH 09/10] Refactor geometry_empty tests --- .../validations/test_geometry_empty_check.py | 23 ++++++++----------- .../validations/test_geometry_valid_check.py | 6 +++++ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/tests/validations/test_geometry_empty_check.py b/tests/validations/test_geometry_empty_check.py index 011f9fe..0cd0ef5 100644 --- a/tests/validations/test_geometry_empty_check.py +++ b/tests/validations/test_geometry_empty_check.py @@ -1,25 +1,20 @@ from geopackage_validator.utils import open_dataset from geopackage_validator.validations.geometry_empty_check import ( - query_geometry_empty, - SQL_EMPTY_TEMPLATE, + EmptyGeometryValidator ) def test_with_gpkg_empty(): dataset = open_dataset("tests/data/test_geometry_empty.gpkg") - checks = list(query_geometry_empty(dataset, SQL_EMPTY_TEMPLATE)) - assert len(checks) == 1 - assert checks[0][0] == "stations" - assert checks[0][1] == "geom" - assert checks[0][2] == 45 - assert checks[0][3] == 129 + result = list(EmptyGeometryValidator(dataset).check()) + assert len(result) == 1 + assert ( + result[0] + == "Found empty geometry in table: stations, column geom, 45 times, example id 129" + ) def test_with_gpkg_allcorrect(): dataset = open_dataset("tests/data/test_allcorrect.gpkg") - checks = list(query_geometry_empty(dataset, SQL_EMPTY_TEMPLATE)) - assert len(checks) == 1 - assert checks[0][0] == "test_allcorrect" - assert checks[0][1] == "geom" - assert checks[0][2] == 0 - assert checks[0][3] is None + result = list(EmptyGeometryValidator(dataset).check()) + assert len(result) == 0 diff --git a/tests/validations/test_geometry_valid_check.py b/tests/validations/test_geometry_valid_check.py index 8aba940..d8d2259 100644 --- a/tests/validations/test_geometry_valid_check.py +++ b/tests/validations/test_geometry_valid_check.py @@ -38,6 +38,12 @@ def test_with_gpkg_valid_simple(): assert checks[0][3] == 1 assert checks[0][4] == 1 +def test_with_gpkg_empty(): + # geometries that are empty are still considered valid + dataset = open_dataset("tests/data/test_geometry_empty.gpkg") + checks = list(query_geometry_valid(dataset, SQL_VALID_TEMPLATE)) + assert len(checks) == 0 + def test_with_gpkg_allcorrect(): dataset = open_dataset("tests/data/test_allcorrect.gpkg") From a9661c2909f6e557017ff7ac67c03d48e3e4ff69 Mon Sep 17 00:00:00 2001 From: Michiel Korpel Date: Tue, 3 Dec 2024 10:16:10 +0100 Subject: [PATCH 10/10] Refactor geometry_empty tests --- tests/validations/test_geometry_empty_check.py | 2 +- tests/validations/test_geometry_valid_check.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/validations/test_geometry_empty_check.py b/tests/validations/test_geometry_empty_check.py index 0cd0ef5..8dae640 100644 --- a/tests/validations/test_geometry_empty_check.py +++ b/tests/validations/test_geometry_empty_check.py @@ -1,6 +1,6 @@ from geopackage_validator.utils import open_dataset from geopackage_validator.validations.geometry_empty_check import ( - EmptyGeometryValidator + EmptyGeometryValidator, ) diff --git a/tests/validations/test_geometry_valid_check.py b/tests/validations/test_geometry_valid_check.py index d8d2259..30299f1 100644 --- a/tests/validations/test_geometry_valid_check.py +++ b/tests/validations/test_geometry_valid_check.py @@ -38,6 +38,7 @@ def test_with_gpkg_valid_simple(): assert checks[0][3] == 1 assert checks[0][4] == 1 + def test_with_gpkg_empty(): # geometries that are empty are still considered valid dataset = open_dataset("tests/data/test_geometry_empty.gpkg")