From c140aeaef17c2079a53aedfbccc471f22e9c6711 Mon Sep 17 00:00:00 2001 From: sebastianzwickl <64072650+sebastianzwickl@users.noreply.github.com> Date: Wed, 25 Nov 2020 12:20:48 +0100 Subject: [PATCH] Add unit validation to `validate()` function (#84) --- nomenclature/__init__.py | 30 ++++++++++++++++++- .../definitions/variable/economy/economy.yaml | 2 +- nomenclature/tests/test_validate.py | 4 +++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/nomenclature/__init__.py b/nomenclature/__init__.py index 40c58e98..7e7aef11 100644 --- a/nomenclature/__init__.py +++ b/nomenclature/__init__.py @@ -179,7 +179,24 @@ def validate(df): # validate all (other) columns for col, codelist, ext in cols: - invalid = [c for c in df.data[col].unique() if c not in codelist] + invalid = [] + + # check variables for name and unit + if col == 'variable': + for c in df.data[col].unique(): + # check if name is in codelist + # and unit in the .yaml file description + if (c not in codelist) or not( + all(_s in variables[c][ + 'unit'] for _s in df.data.loc[ + df.data['variable'] == c]['unit'].values)): + invalid.append(c) + success = False + # check if only unit is not valid + invalid = _validate_unit(invalid) + else: + invalid = [c for c in df.data[col].unique() if c not in codelist] + # check if entries in the invalid list are related to directional data if col == 'region' and invalid: @@ -265,3 +282,14 @@ def _validate_directional(x): """Utility function to check whether region-to-region code is valid""" x = x.split('>') return len(x) == 2 and all([i in regions for i in x]) + + +def _validate_unit(x): + # sub function to filter out variables with valid name + for i in reversed(x): # iterate list reversely due to 'remove' method + if i in variables.keys(): + _valid_units = variables[i]['unit'] + logger.warning( + f'Unit for variable {i} is not in {_valid_units}!') + x.remove(i) + return x diff --git a/nomenclature/definitions/variable/economy/economy.yaml b/nomenclature/definitions/variable/economy/economy.yaml index 032cd481..9a11b94a 100644 --- a/nomenclature/definitions/variable/economy/economy.yaml +++ b/nomenclature/definitions/variable/economy/economy.yaml @@ -184,7 +184,7 @@ Price|Final Energy|Residential|Electricity: Prices should include the effect of carbon prices Mean price should reflect the variability of different prices that are accessible to end-users (including regulated prices, prices proposed by different competiting retailers...) - unit: US$2010/GJ + unit: [euro/kWh, US$2010/GJ] Price|Final Energy|Residential|Gases|Natural Gas: description: Mean Natural gas price at the final level in the residential sector. diff --git a/nomenclature/tests/test_validate.py b/nomenclature/tests/test_validate.py index 97b5b0c5..85210e36 100644 --- a/nomenclature/tests/test_validate.py +++ b/nomenclature/tests/test_validate.py @@ -56,3 +56,7 @@ def test_validate_time_entry(): replace([2005, 2010], value=['2005-06-17 00:00+01:00', '2010-07-21 12:00+01:00']) assert validate(IamDataFrame(df_sub)) + + +def test_validate_unit_entry(): + assert not (validate(df.rename(unit={'EJ/yr': 'MWh'})))