Skip to content

Commit

Permalink
34 add function for latest measurements (#35)
Browse files Browse the repository at this point in the history
* added function to collect latest measurements

* added testcase

* updated history.rst
  • Loading branch information
veenstrajelmer authored Mar 1, 2024
1 parent e722c6d commit 1806fcd
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 57 deletions.
2 changes: 1 addition & 1 deletion HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ UNRELEASED
------------------
* improved nan filtering of measurements in https://github.com/openearth/ddlpy/pull/30
* add early return when no data in entire requested period in https://github.com/openearth/ddlpy/pull/33

* add `ddlpy.measurements_latest()` to retrieve latest measurements in https://github.com/openearth/ddlpy/pull/35

0.1.0 (2019-01-03)
------------------
Expand Down
2 changes: 1 addition & 1 deletion ddlpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
__version__ = '0.1.0'

from ddlpy.ddlpy import locations
from ddlpy.ddlpy import measurements
from ddlpy.ddlpy import measurements, measurements_latest

__all__ = ['locations', 'measurements']
145 changes: 90 additions & 55 deletions ddlpy/ddlpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,37 +125,9 @@ def _measurements_available(location, start_date, end_date):
return False


def _measurements_slice(location, start_date, end_date):
"""get measurements for location, for the period start_date, end_date, use measurements instead"""
endpoint = ENDPOINTS["collect_observations"]

start_date_str = pytz.UTC.localize(start_date).isoformat(timespec="milliseconds")
end_date_str = pytz.UTC.localize(end_date).isoformat(timespec="milliseconds")

request_dicts = _get_request_dicts(location)

request = {
"AquoPlusWaarnemingMetadata": {
"AquoMetadata": request_dicts["AquoMetadata"]
},
"Locatie": request_dicts["Locatie"],
"Periode": {"Begindatumtijd": start_date_str,
"Einddatumtijd": end_date_str},
}

try:
logger.debug("requesting: {}".format(request))
resp = requests.post(endpoint["url"], json=request)
result = resp.json()
if not result["Succesvol"]:
logger.debug("Got invalid response: {}".format(result))
raise NoDataException(result.get("Foutmelding", "No error returned"))
except NoDataException as e:
logger.debug("No data availble for {} {}".format(start_date, end_date))
raise e

def _combine_waarnemingenlijst(result, location):
assert "WaarnemingenLijst" in result

# assert len(result['WaarnemingenLijst']) == 1
# flatten the datastructure
rows = []
Expand Down Expand Up @@ -190,6 +162,94 @@ def _measurements_slice(location, start_date, end_date):
rows.append(new_row)
# normalize and return
df = pd.json_normalize(rows)

# add other info
df["locatie_code"] = location.get("Code", location.name)

for name in [
"Coordinatenstelsel",
"Naam",
"X",
"Y",
"Parameter_Wat_Omschrijving",
]:
df[name] = location[name]

# set NA value
if "WaarnemingMetadata.KwaliteitswaardecodeLijst" in df.columns:
bool_nan = df["WaarnemingMetadata.KwaliteitswaardecodeLijst"] == "99"
if "Meetwaarde.Waarde_Numeriek" in df.columns:
df.loc[bool_nan,"Meetwaarde.Waarde_Numeriek"] = np.nan

try:
df["t"] = pd.to_datetime(df["Tijdstip"])
except KeyError:
logger.exception(
"Cannot add time variable t because variable Tijdstip is not found"
)

return df


def measurements_latest(location):
"""checks if there are measurements for location, for the period start_date, end_date
gives None if check was unsuccesfull
gives True/False if there are / are no measurement available
"""
endpoint = ENDPOINTS['collect_latest_observations']

request_dicts = _get_request_dicts(location)

request = {"AquoPlusWaarnemingMetadataLijst":[{"AquoMetadata":request_dicts["AquoMetadata"]}],
"LocatieLijst":[request_dicts["Locatie"]]
}

try:
logger.debug('requesting: {}'.format(request))
resp = requests.post(endpoint['url'], json=request, timeout=5)
result = resp.json()
if not result['Succesvol']:
logger.debug('Got invalid response: {}'.format(result))
raise NoDataException(result.get('Foutmelding', 'No error returned'))
except NoDataException as e:
logger.debug('No data availble')
raise e

if result['Succesvol']:
df = _combine_waarnemingenlijst(result, location)
return df


def _measurements_slice(location, start_date, end_date):
"""get measurements for location, for the period start_date, end_date, use measurements instead"""
endpoint = ENDPOINTS["collect_observations"]

start_date_str = pytz.UTC.localize(start_date).isoformat(timespec="milliseconds")
end_date_str = pytz.UTC.localize(end_date).isoformat(timespec="milliseconds")

request_dicts = _get_request_dicts(location)

request = {
"AquoPlusWaarnemingMetadata": {
"AquoMetadata": request_dicts["AquoMetadata"]
},
"Locatie": request_dicts["Locatie"],
"Periode": {"Begindatumtijd": start_date_str,
"Einddatumtijd": end_date_str},
}

try:
logger.debug("requesting: {}".format(request))
resp = requests.post(endpoint["url"], json=request)
result = resp.json()
if not result["Succesvol"]:
logger.debug("Got invalid response: {}".format(result))
raise NoDataException(result.get("Foutmelding", "No error returned"))
except NoDataException as e:
logger.debug("No data availble for {} {}".format(start_date, end_date))
raise e

df = _combine_waarnemingenlijst(result, location)
return df


Expand Down Expand Up @@ -224,32 +284,7 @@ def measurements(location, start_date, end_date):
if len(measurements) > 0:
measurements = pd.concat(measurements)

# set NA value
if "WaarnemingMetadata.KwaliteitswaardecodeLijst" in measurements.columns:
bool_nan = measurements["WaarnemingMetadata.KwaliteitswaardecodeLijst"] == "99"
if "Meetwaarde.Waarde_Numeriek" in measurements.columns:
measurements.loc[bool_nan,"Meetwaarde.Waarde_Numeriek"] = np.nan

try:
measurements["t"] = pd.to_datetime(measurements["Tijdstip"])
except KeyError:
logger.exception(
"Cannot add time variable t because variable Tijdstip is not found"
)

# drop duplicate rows (preserves e.g. different Grootheden/Groeperingen at same timestep)
measurements = measurements.drop_duplicates()

# add other info
measurements["locatie_code"] = location.get("Code", location.name)

for name in [
"Coordinatenstelsel",
"Naam",
"X",
"Y",
"Parameter_Wat_Omschrijving",
]:
measurements[name] = location[name]

return measurements
5 changes: 5 additions & 0 deletions tests/test_ddlpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ def test_measurements(location):
measurements = ddlpy.measurements(location, start_date=start_date, end_date=end_date)
assert measurements.shape[0] > 1

def test_measurements_latest(location):
"""measurements for a location """
latest = ddlpy.measurements_latest(location)
assert latest.shape[0] > 1

def test_measurements_long(location):
"""measurements for a location """
start_date = datetime.datetime(1951, 11, 1)
Expand Down

0 comments on commit 1806fcd

Please sign in to comment.