Skip to content

Commit

Permalink
Merge pull request #404 from DOV-Vlaanderen/grondmonster-observaties
Browse files Browse the repository at this point in the history
Update grondmonster XML based on XSD schema updates
  • Loading branch information
Roel authored Feb 20, 2025
2 parents 43a2b60 + 52733ab commit b9a1d6b
Show file tree
Hide file tree
Showing 118 changed files with 3,806 additions and 2,446 deletions.
6 changes: 3 additions & 3 deletions docs/select_datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,9 @@ Default dataframe output
korrelvolumemassa,Grondmonster,10,float,NaN
volumemassa,Grondmonster,10,float,NaN
watergehalte,Grondmonster,10,float,NaN
diameter,Korrelverdeling,10,float,10
fractie,Korrelverdeling,10,float,0
methode,Korrelverdeling,10,string,ZEEFPROEF
methode,Korrelverdeling,10,string,Korrelverdeling d.m.v. hydrometer/areometer
diameter,Korrelverdeling,10,float,0.001575
fractie,Korrelverdeling,10,float,68.4

CPT measurements (Sonderingen)
------------------------------
Expand Down
84 changes: 75 additions & 9 deletions pydov/types/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ class AbstractDovSubType(AbstractTypeCommon):

rootpath = None

subtypes = []

_UNRESOLVED = "{UNRESOLVED}"

def __init__(self):
Expand All @@ -133,6 +135,11 @@ def __init__(self):
[AbstractDovSubType._UNRESOLVED] * len(self.get_field_names()))
)

self.subdata = dict(
zip([st.get_name() for st in self.subtypes],
[] * len(self.subtypes))
)

@classmethod
def from_xml(cls, xml_data):
"""Build instances of this subtype from XML data.
Expand All @@ -151,7 +158,7 @@ def from_xml(cls, xml_data):
"""
try:
tree = parse_dov_xml(xml_data)
for element in tree.findall(cls.rootpath):
for element in tree.xpath(cls.rootpath):
yield cls.from_xml_element(element)
except XmlParseError:
# Ignore XmlParseError here in subtypes, assuming it will be
Expand Down Expand Up @@ -185,11 +192,12 @@ def from_xml_element(cls, element):
returntype=field.get('type', None)
)

instance._parse_subtypes(etree.tostring(element))
return instance

@classmethod
def get_field_names(cls):
"""Return the names of the fields available for this type.
"""Return the names of the fields available for this subtype.
Returns
-------
Expand All @@ -198,11 +206,16 @@ def get_field_names(cls):
the names of the columns in the output dataframe for this type.
"""
return [f['name'] for f in cls.fields]
field_names = [f['name'] for f in cls.fields]

for st in cls.subtypes:
field_names.extend(st.get_field_names())

return field_names

@classmethod
def get_fields(cls):
"""Return the metadata of the fields available for this type.
"""Return the metadata of the fields available for this subtype.
Returns
-------
Expand Down Expand Up @@ -233,10 +246,18 @@ def get_fields(cls):
Whether the field is mandatory (True) or can be null (False).
"""
return OrderedDict(
fields = OrderedDict(
zip([f['name'] for f in cls.fields],
[f for f in cls.fields]))

for st in cls.subtypes:
fields.update(OrderedDict(
zip([f['name'] for f in st.fields],
[f for f in st.fields])
))

return fields

@classmethod
def get_name(cls):
"""Return the name associated with this subtype.
Expand All @@ -249,6 +270,50 @@ def get_name(cls):
"""
return cls.__name__

def _parse_subtypes(self, xml):
"""Parse the subtypes with the given XML data.
Parameters
----------
xml : bytes
The raw XML data of the DOV object as bytes.
"""
for subtype in self.subtypes:
st_name = subtype.get_name()
if st_name not in self.subdata:
self.subdata[st_name] = []

for subitem in subtype.from_xml(xml):
self.subdata[st_name].append(subitem)

def get_data_dicts(self):
"""Return the data dictionaries for this instance, including subtypes,
for inclusion in the output dataframe.
Returns
-------
list(dict)
list of data dictionaries for inclusion in the output dataframe
"""
datadicts = []

if len(self.subdata) == 0:
datadicts.append(self.data)
else:
for subtype in self.subdata:
if len(self.subdata[subtype]) == 0:
datadicts.append(self.data)
else:
for subdata in self.subdata[subtype]:
for subdata_dict in subdata.get_data_dicts():
datadict = {}
datadict.update(self.data)
datadict.update(subdata_dict)
datadicts.append(datadict)

return datadicts


class AbstractDovType(AbstractTypeCommon):
"""Abstract DOV type grouping fields and methods common to all DOV
Expand Down Expand Up @@ -742,10 +807,11 @@ def get_df_array(self, return_fields=None, session=None):
datadicts.append(self.data)
else:
for subdata in self.subdata[subtype]:
datadict = {}
datadict.update(self.data)
datadict.update(subdata.data)
datadicts.append(datadict)
for subdata_dict in subdata.get_data_dicts():
datadict = {}
datadict.update(self.data)
datadict.update(subdata_dict)
datadicts.append(datadict)

for d in datadicts:
datarecords.append([d.get(field, np.nan) for field in fields])
Expand Down
78 changes: 42 additions & 36 deletions pydov/types/grondmonster.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,43 @@
# -*- coding: utf-8 -*-
"""Module containing the DOV data type for grondmonster, including
subtypes."""
from pydov.types.fields import WfsField, XmlField, XsdType
from pydov.types.fields import WfsField, XmlField

from .abstract import AbstractDovSubType, AbstractDovType


class Korrelverdeling(AbstractDovSubType):
class KorrelverdelingMeetreeks(AbstractDovSubType):
"""Class representing the values of a Korrelverdeling."""

rootpath = './/grondmonster/observatieReeksData/' \
'korrelverdeling_reeks/korrelverdeling'
rootpath = './/waarde_meetreeks/meetreekswaarde'

fields = [
XmlField(name='diameter',
source_xpath='/diameter',
definition='.',
source_xpath='/meetpunt_numeriek',
definition='Diameter van de korrels',
datatype='float',
notnull=False),
XmlField(name='fractie',
source_xpath='/fractie',
definition='.',
source_xpath='/meetwaarde_numeriek',
definition='Fractie met grotere diameter',
datatype='float',
notnull=False),
]


class Korrelverdeling(AbstractDovSubType):
"""Class representing the Korrelverdelingen."""

rootpath = (".//observatie[starts-with(parametergroep, "
"'Onderkenningsproeven-korrelverdeling')]")

subtypes = [KorrelverdelingMeetreeks]

fields = [
XmlField(name='methode',
source_xpath='/methode',
definition='.',
source_xpath='/parameter',
definition=('Gebruikte methode om de korrelverdeling'
' te bepalen'),
datatype='string',
notnull=False)
]
Expand All @@ -35,12 +48,6 @@ class Grondmonster(AbstractDovType):

subtypes = [Korrelverdeling]

__grondmonsterDataCodesEnumType = XsdType(
xsd_schema='https://www.dov.vlaanderen.be/xdov/schema/latest/'
'xsd/kern/grondmonster/GrondmonsterDataCodes.xsd',
typename='MonsterEnumType'
)

fields = [
WfsField(name='pkey_grondmonster',
source_field='grondmonsterfiche',
Expand Down Expand Up @@ -82,53 +89,52 @@ class Grondmonster(AbstractDovType):
source_field='monstertype',
datatype='string'),
XmlField(name='astm_naam',
source_xpath='/grondmonster/observatieData/observatie['
'parameter="ASTM_NAAM"]/waarde_text',
source_xpath='/observatie[parameter="ASTM_naam"]/waarde_text',
definition='ASTM_naam',
datatype='string'),
XmlField(name='grondsoort_bggg',
source_xpath='/grondmonster/observatieData/observatie['
'parameter="BGGG"]/waarde_text',
source_xpath=('/observatie[parameter="Grondsoort '
'BGGG"]/waarde_text'),
definition='Grondsoort BGGG',
datatype='string'),
XmlField(name='humusgehalte',
source_xpath='/grondmonster/observatieData/observatie['
'parameter="HUMUSGEHALTE"]/waarde_numeriek',
source_xpath=('/observatie[parameter="Gehalte Organische '
'stoffen"]/waarde_numeriek'),
definition='Humusgehalte',
datatype='float'),
XmlField(name='kalkgehalte',
source_xpath='/grondmonster/observatieData/observatie['
'parameter="KALKGEHALTE"]/waarde_numeriek',
source_xpath=('/observatie[parameter="Gehalte Kalkachtige '
'stoffen"]/waarde_numeriek'),
definition='Kalkgehalte',
datatype='float'),
XmlField(name='uitrolgrens',
source_xpath='/grondmonster/observatieData/observatie['
'parameter="UITROLGRENS"]/waarde_numeriek',
source_xpath=('/observatie[parameter="Consistentiegrenzen - '
'Uitrolgrens"]/waarde_numeriek'),
definition='Uitrolgrens',
datatype='float'),
XmlField(name='vloeigrens',
source_xpath='/grondmonster/observatieData/observatie['
'parameter="VLOEIGRENS"]/waarde_numeriek',
source_xpath=('/observatie[parameter="Consistentiegrenzen - '
'Vloeigrens"]/waarde_numeriek'),
definition='Vloeigrens',
datatype='float'),
XmlField(name='glauconiet_totaal',
source_xpath='/grondmonster/observatieData/observatie['
'parameter="GLAUCONIET_TOTAAL"]/waarde_numeriek',
source_xpath=('/observatie[parameter="Glauconiet totaal"]/'
'waarde_numeriek'),
definition='Glauconiet totaal in percent',
datatype='float'),
XmlField(name='korrelvolumemassa',
source_xpath='/grondmonster/observatieData/observatie['
'parameter="KORRELVOLUMEMASSA"]/waarde_numeriek',
source_xpath=('observatie[parameter="korrelvolumemassa"]/'
'waarde_numeriek'),
definition='',
datatype='float'),
XmlField(name='volumemassa',
source_xpath='/grondmonster/observatieData/observatie['
'parameter="VOLUMEMASSA"]/waarde_numeriek',
source_xpath=('/observatie[parameter="volumemassa nat"]/'
'waarde_numeriek'),
definition='',
datatype='float'),
XmlField(name='watergehalte',
source_xpath='/grondmonster/observatieData/observatie['
'parameter="WATERGEHALTE"]/waarde_numeriek',
source_xpath=('/observatie[parameter="watergehalte"]/'
'waarde_numeriek'),
definition='',
datatype='float')
]
Expand Down
23 changes: 23 additions & 0 deletions tests/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,3 +1000,26 @@ def test_missing_pkey(self):
"""
with pytest.raises(ValueError):
self.datatype_class(None)

def test_nested_subtype_from_xml_element(self, dov_xml):
"""Test initialising the subtype(s) from the XML document.
Parameters
----------
dov_xml : pytest.fixture returning bytes
Fixture providing DOV XML data.
"""
def instance_from_xml(clz, xml):
if len(clz.subtypes) == 0:
return

if xml is None:
return

st_instance = next(
clz.subtypes[0].from_xml(dov_xml))
assert isinstance(st_instance, clz.subtypes[0])

instance_from_xml(clz.subtypes[0], dov_xml)

instance_from_xml(self.datatype_class, dov_xml)
38 changes: 30 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,35 @@ def __get_remote_wfs_feature(*args, **kwargs):


@pytest.fixture
def mp_dov_xml(monkeypatch, request):
def dov_xml(request):
"""Fixture providing the DOV XML data.
This fixture requires a module variable ``location_dov_xml``
with the path to the dov_xml file on disk.
Parameters
----------
request : pytest.fixture
PyTest fixture providing request context.
"""
if not hasattr(request.module, "location_dov_xml"):
return

file_path = getattr(request.module, "location_dov_xml")

if file_path is None or not os.path.isfile(file_path):
return None

with open(file_path, 'r', encoding="utf-8") as f:
data = f.read()
if not isinstance(data, bytes):
data = data.encode('utf-8')
return data


@pytest.fixture
def mp_dov_xml(monkeypatch, dov_xml):
"""Monkeypatch the call to get the remote XML data.
This monkeypatch requires a module variable ``location_dov_xml``
Expand All @@ -328,14 +356,8 @@ def mp_dov_xml(monkeypatch, request):
PyTest fixture providing request context.
"""

def _get_xml_data(*args, **kwargs):
file_path = getattr(request.module, "location_dov_xml")
with open(file_path, 'r', encoding="utf-8") as f:
data = f.read()
if not isinstance(data, bytes):
data = data.encode('utf-8')
return data
return dov_xml

monkeypatch.setattr(pydov.types.abstract.AbstractDovType,
'_get_xml_data', _get_xml_data)
Expand Down
Loading

0 comments on commit b9a1d6b

Please sign in to comment.