From b37dd992cce54a359f1563ce74a00471c835f6a8 Mon Sep 17 00:00:00 2001 From: Janis Gailis Date: Wed, 27 Sep 2017 13:21:41 +0200 Subject: [PATCH 1/6] Tests for time_bnds and single slice Added tests for testing temporal attribute introspection in case time_bnds exists, as well as for testing the single slice case. Refactored opimpl to prepare for time_bnds implementation --- cate/util/opimpl.py | 31 ++++++++++-- test/ops/test_normalize.py | 98 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 4 deletions(-) diff --git a/cate/util/opimpl.py b/cate/util/opimpl.py index c42a8f09e..e8cd6817c 100644 --- a/cate/util/opimpl.py +++ b/cate/util/opimpl.py @@ -219,14 +219,37 @@ def adjust_temporal_attrs_impl(ds: xr.Dataset) -> xr.Dataset: """ ds = ds.copy() - ds.attrs['time_coverage_start'] = str(ds.time.values[0]) - ds.attrs['time_coverage_end'] = str(ds.time.values[-1]) - ds.attrs['time_coverage_resolution'] = _get_temporal_res(ds.time.values) - ds.attrs['time_coverage_duration'] = _get_duration(ds.time.values) + tempattrs = _get_temporal_props(ds) + + for key in tempattrs: + if tempattrs[key] is not None: + ds.attrs[key] = tempattrs[key] + else: + ds.attrs.pop(key, None) return ds +def _get_temporal_props(ds: xr.Dataset) -> dict: + """ + Get temporal boundaries, resolution and duration of the given dataset. If + the 'bounds' are explicitly defined, these will be used for calculation, + otherwise it will rest on information gathered from the 'time' dimension + itself. + + :param ds: Dataset + :return: A dictionary {'attr_name': attr_value} + """ + ret = dict() + + ret['time_coverage_start'] = str(ds.time.values[0]) + ret['time_coverage_end'] = str(ds.time.values[-1]) + ret['time_coverage_resolution'] = _get_temporal_res(ds.time.values) + ret['time_coverage_duration'] = _get_duration(ds.time.values) + + return ret + + def _get_spatial_props(ds: xr.Dataset, dim: str) -> dict: """ Get spatial boundaries, resolution and units of the given dimension of the given diff --git a/test/ops/test_normalize.py b/test/ops/test_normalize.py index f3024ffdc..2cb5e1369 100644 --- a/test/ops/test_normalize.py +++ b/test/ops/test_normalize.py @@ -8,6 +8,7 @@ from jdcal import gcal2jd import numpy as np from datetime import datetime +import calendar from cate.ops.normalize import normalize, adjust_spatial_attrs, adjust_temporal_attrs from cate.core.op import OP_REGISTRY @@ -298,3 +299,100 @@ def test_nominal(self): 'P1M') self.assertEqual(ds2.attrs['time_coverage_duration'], 'P92D') + + def test_bnds(self): + """Test a case when time_bnds is available""" + ds = xr.Dataset({ + 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), + 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), + 'lat': np.linspace(-88, 88, 45), + 'lon': np.linspace(-178, 178, 90), + 'time': [datetime(2000, x, 1) for x in range(1, 13)]}) + + month_ends = list() + for x in ds.time.values: + year = int(str(x)[0:4]) + month = int(str(x)[5:7]) + day = calendar.monthrange(year, month)[1] + month_ends.append(datetime(year, month, day)) + + time_bnds = np.empty([len(ds.time), 2]) + time_bnds[:, 0] = ds.time.values + time_bnds[:, 1] = month_ends + + ds['nv'] = [0, 1] + ds['time_bnds'] = (['time', 'nv'], time_bnds) + ds.time.attrs['bounds'] = 'time_bnds' + + ds1 = adjust_temporal_attrs(ds) + + # Make sure original dataset is not altered + with self.assertRaises(KeyError): + ds.attrs['time_coverage_start'] + + # Make sure expected values are in the new dataset + self.assertEqual(ds1.attrs['time_coverage_start'], + '2000-01-01T00:00:00.000000000') + self.assertEqual(ds1.attrs['time_coverage_end'], + '2000-12-31:00:00.000000000') + self.assertEqual(ds1.attrs['time_coverage_resolution'], + 'P1M') + self.assertEqual(ds1.attrs['time_coverage_duration'], + 'P365D') + + def test_single_slice(self): + """Test a case when the dataset is a single time slice""" + # With bnds + ds = xr.Dataset({ + 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), + 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), + 'lat': np.linspace(-88, 88, 45), + 'lon': np.linspace(-178, 178, 90), + 'time': [datetime(2000, 1, 1)]}) + + year = int(str(ds.time.values[0])[0:4]) + month = int(str(ds.time.values[0])[5:7]) + day = calendar.monthrange(year, month)[1] + + time_bnds = np.empty([1, 2]) + time_bnds[0, 0] = ds.time.values[0] + time_bnds[0, 1] = datetime(year, month, day) + + ds['nv'] = [0, 1] + ds['time_bnds'] = (['time', 'nv'], time_bnds) + ds.time.attrs['bounds'] = 'time_bnds' + + ds1 = adjust_temporal_attrs(ds) + + # Make sure original dataset is not altered + with self.assertRaises(KeyError): + ds.attrs['time_coverage_start'] + + # Make sure expected values are in the new dataset + self.assertEqual(ds1.attrs['time_coverage_start'], + '2000-01-01T00:00:00.000000000') + self.assertEqual(ds1.attrs['time_coverage_end'], + '2000-01-31T00:00:00.000000000') + self.assertEqual(ds1.attrs['time_coverage_resolution'], + 'P1M') + self.assertEqual(ds1.attrs['time_coverage_duration'], + 'P31D') + + # Without bnds + ds = xr.Dataset({ + 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), + 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), + 'lat': np.linspace(-88, 88, 45), + 'lon': np.linspace(-178, 178, 90), + 'time': [datetime(2000, 1, 1)]}) + + ds1 = adjust_temporal_attrs(ds) + + self.assertEqual(ds1.attrs['time_coverage_start'], + '2000-01-01T00:00:00.000000000') + self.assertEqual(ds1.attrs['time_coverage_end'], + '2000-01-01:00:00.000000000') + with self.assertRaises(KeyError): + ds.attrs['time_coverage_resolution'] + with self.assertRaises(KeyError): + ds.attrs['time_coverage_duration'] From 06e43574e8204197382005da492652e4cc72dce4 Mon Sep 17 00:00:00 2001 From: Janis Gailis Date: Wed, 27 Sep 2017 13:50:41 +0200 Subject: [PATCH 2/6] Fix tests Fix tests such that they work when the code works. They still fail, but not because of errors in test implementations. --- test/ops/test_normalize.py | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/test/ops/test_normalize.py b/test/ops/test_normalize.py index 2cb5e1369..eec33d09b 100644 --- a/test/ops/test_normalize.py +++ b/test/ops/test_normalize.py @@ -302,12 +302,14 @@ def test_nominal(self): def test_bnds(self): """Test a case when time_bnds is available""" + time = [datetime(2000, x, 1) for x in range(1, 13)] ds = xr.Dataset({ 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), - 'time': [datetime(2000, x, 1) for x in range(1, 13)]}) + 'nv': [0, 1], + 'time': time}) month_ends = list() for x in ds.time.values: @@ -316,12 +318,7 @@ def test_bnds(self): day = calendar.monthrange(year, month)[1] month_ends.append(datetime(year, month, day)) - time_bnds = np.empty([len(ds.time), 2]) - time_bnds[:, 0] = ds.time.values - time_bnds[:, 1] = month_ends - - ds['nv'] = [0, 1] - ds['time_bnds'] = (['time', 'nv'], time_bnds) + ds['time_bnds'] = (['time', 'nv'], list(zip(time, month_ends))) ds.time.attrs['bounds'] = 'time_bnds' ds1 = adjust_temporal_attrs(ds) @@ -344,23 +341,15 @@ def test_single_slice(self): """Test a case when the dataset is a single time slice""" # With bnds ds = xr.Dataset({ - 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), - 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), + 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])), + 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), + 'nv': [0, 1], 'time': [datetime(2000, 1, 1)]}) - - year = int(str(ds.time.values[0])[0:4]) - month = int(str(ds.time.values[0])[5:7]) - day = calendar.monthrange(year, month)[1] - - time_bnds = np.empty([1, 2]) - time_bnds[0, 0] = ds.time.values[0] - time_bnds[0, 1] = datetime(year, month, day) - - ds['nv'] = [0, 1] - ds['time_bnds'] = (['time', 'nv'], time_bnds) ds.time.attrs['bounds'] = 'time_bnds' + ds['time_bnds'] = (['time', 'nv'], + [(datetime(2000, 1, 1), datetime(2000, 1, 31))]) ds1 = adjust_temporal_attrs(ds) @@ -380,8 +369,8 @@ def test_single_slice(self): # Without bnds ds = xr.Dataset({ - 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), - 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), + 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])), + 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])), 'lat': np.linspace(-88, 88, 45), 'lon': np.linspace(-178, 178, 90), 'time': [datetime(2000, 1, 1)]}) From d9a338efc12ea4c6e4afa084a36637047a89fab3 Mon Sep 17 00:00:00 2001 From: Janis Gailis Date: Wed, 27 Sep 2017 22:42:55 +0200 Subject: [PATCH 3/6] Updated temporal attribute normalization Temporal attribute determination now uses time_bnds variable if it is present and defined and can also handle datasets containing single time slices. --- cate/util/opimpl.py | 38 +++++++++++++++++++++++++++++--------- test/ops/test_normalize.py | 15 ++++++++------- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/cate/util/opimpl.py b/cate/util/opimpl.py index e8cd6817c..4b291345d 100644 --- a/cate/util/opimpl.py +++ b/cate/util/opimpl.py @@ -242,10 +242,28 @@ def _get_temporal_props(ds: xr.Dataset) -> dict: """ ret = dict() - ret['time_coverage_start'] = str(ds.time.values[0]) - ret['time_coverage_end'] = str(ds.time.values[-1]) - ret['time_coverage_resolution'] = _get_temporal_res(ds.time.values) - ret['time_coverage_duration'] = _get_duration(ds.time.values) + try: + # According to CF conventions, the 'bounds' variable name should be in + # the attributes of the coordinate variable + bnds = ds['time'].attrs['bounds'] + time_min = ds[bnds].values[0][0] + time_max = ds[bnds].values[-1][1] + except KeyError: + time_min = ds['time'].values[0] + time_max = ds['time'].values[-1] + + if time_min != time_max: + ret['time_coverage_duration'] = _get_duration(time_min, time_max) + else: + ret['time_coverage_duration'] = None + + if ds['time'].values[0] == ds['time'].values[-1]: + ret['time_coverage_resolution'] = None + else: + ret['time_coverage_resolution'] = _get_temporal_res(ds.time.values) + + ret['time_coverage_start'] = str(time_min) + ret['time_coverage_end'] = str(time_max) return ret @@ -315,17 +333,19 @@ def _get_temporal_res(time: np.ndarray) -> str: return 'P{}D'.format(int(days)) -def _get_duration(time: np.ndarray) -> str: +def _get_duration(tmin: np.datetime64, tmax: np.datetime64) -> str: """ - Determine the duration of the given datetimes array. + Determine the duration of the given datetimes. See also: `ISO 8601 Durations `_ - :param time: A numpy array containing np.datetime64 objects + :param tmin: Time minimum + :param tmax: Time maximum :return: Temporal resolution formatted as an ISO 8601:2004 duration string """ - delta = time[-1] - time[0] - days = delta.astype('timedelta64[D]') / np.timedelta64(1, 'D') + delta = tmax - tmin + day = np.timedelta64(1, 'D') + days = (delta.astype('timedelta64[D]') / day) + 1 return 'P{}D'.format(int(days)) diff --git a/test/ops/test_normalize.py b/test/ops/test_normalize.py index eec33d09b..350dacbe0 100644 --- a/test/ops/test_normalize.py +++ b/test/ops/test_normalize.py @@ -284,7 +284,7 @@ def test_nominal(self): self.assertEqual(ds1.attrs['time_coverage_resolution'], 'P1M') self.assertEqual(ds1.attrs['time_coverage_duration'], - 'P335D') + 'P336D') # Test existing attributes update indexers = {'time': slice(datetime(2000, 2, 15), datetime(2000, 6, 15))} @@ -298,7 +298,7 @@ def test_nominal(self): self.assertEqual(ds2.attrs['time_coverage_resolution'], 'P1M') self.assertEqual(ds2.attrs['time_coverage_duration'], - 'P92D') + 'P93D') def test_bnds(self): """Test a case when time_bnds is available""" @@ -331,11 +331,11 @@ def test_bnds(self): self.assertEqual(ds1.attrs['time_coverage_start'], '2000-01-01T00:00:00.000000000') self.assertEqual(ds1.attrs['time_coverage_end'], - '2000-12-31:00:00.000000000') + '2000-12-31T00:00:00.000000000') self.assertEqual(ds1.attrs['time_coverage_resolution'], 'P1M') self.assertEqual(ds1.attrs['time_coverage_duration'], - 'P365D') + 'P366D') def test_single_slice(self): """Test a case when the dataset is a single time slice""" @@ -362,10 +362,11 @@ def test_single_slice(self): '2000-01-01T00:00:00.000000000') self.assertEqual(ds1.attrs['time_coverage_end'], '2000-01-31T00:00:00.000000000') - self.assertEqual(ds1.attrs['time_coverage_resolution'], - 'P1M') self.assertEqual(ds1.attrs['time_coverage_duration'], 'P31D') + with self.assertRaises(KeyError): + # Resolution is not defined for a single slice + ds.attrs['time_coverage_resolution'] # Without bnds ds = xr.Dataset({ @@ -380,7 +381,7 @@ def test_single_slice(self): self.assertEqual(ds1.attrs['time_coverage_start'], '2000-01-01T00:00:00.000000000') self.assertEqual(ds1.attrs['time_coverage_end'], - '2000-01-01:00:00.000000000') + '2000-01-01T00:00:00.000000000') with self.assertRaises(KeyError): ds.attrs['time_coverage_resolution'] with self.assertRaises(KeyError): From 85213b61b71c8360ad897084a52183562cc1b6f8 Mon Sep 17 00:00:00 2001 From: Marco Zuehlke Date: Thu, 28 Sep 2017 16:20:08 +0200 Subject: [PATCH 4/6] only allow valid python identifiers as resource names See #436 --- cate/core/workspace.py | 11 +++++++++++ test/core/test_workspace.py | 19 ++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/cate/core/workspace.py b/cate/core/workspace.py index 56858b8be..44ec9ecad 100644 --- a/cate/core/workspace.py +++ b/cate/core/workspace.py @@ -486,6 +486,7 @@ def delete_resource(self, res_name: str): del self._resource_cache[res_name] def rename_resource(self, res_name: str, new_res_name: str) -> None: + Workspace._validate_res_name(new_res_name) with self._lock: res_step = self.workflow.find_node(res_name) if res_step is None: @@ -533,6 +534,7 @@ def set_resource(self, default_res_pattern = conf.get_default_res_pattern() res_pattern = op.op_meta_info.header.get('res_pattern', default_res_pattern) res_name = self._new_resource_name(res_pattern) + Workspace._validate_res_name(res_name) new_step = OpStep(op, node_id=res_name) @@ -656,6 +658,15 @@ def _assert_open(self): def _new_resource_name(self, res_pattern): return new_indexed_name({step.id for step in self.workflow.steps}, res_pattern) + @staticmethod + def _validate_res_name(res_name: str): + if not res_name.isidentifier(): + raise WorkspaceError( + "Resource name '%s' is not valid. " + "The name must only contain the uppercase and lowercase letters A through Z, the underscore _ and, " + "except for the first character, the digits 0 through 9." % res_name) + + # noinspection PyArgumentList class WorkspaceError(Exception): diff --git a/test/core/test_workspace.py b/test/core/test_workspace.py index 4cdb449dd..9767951c7 100644 --- a/test/core/test_workspace.py +++ b/test/core/test_workspace.py @@ -8,7 +8,7 @@ import xarray as xr from cate.core.workflow import Workflow, OpStep -from cate.core.workspace import Workspace, mk_op_arg, mk_op_args, mk_op_kwargs +from cate.core.workspace import Workspace, WorkspaceError, mk_op_arg, mk_op_args, mk_op_kwargs from cate.util import UNDEFINED from cate.util.opmetainf import OpMetaInfo @@ -304,6 +304,23 @@ def set_resource_and_execute(): expected_res_names = {'res_%s' % (i + 1) for i in range(num_res)} self.assertEqual(actual_res_names, expected_res_names) + def test_validate_res_name(self): + Workspace._validate_res_name("a") + Workspace._validate_res_name("A") + Workspace._validate_res_name("abc_42") + Workspace._validate_res_name("abc42") + Workspace._validate_res_name("_abc42") + # with self.assertRaises(WorkspaceError): + # Workspace._validate_res_name("0") + with self.assertRaises(WorkspaceError): + Workspace._validate_res_name("a-b") + with self.assertRaises(WorkspaceError): + Workspace._validate_res_name("a+b") + with self.assertRaises(WorkspaceError): + Workspace._validate_res_name("a.b") + with self.assertRaises(WorkspaceError): + Workspace._validate_res_name("file://path") + def test_example(self): expected_json_text = """{ "schema_version": 1, From 9d632478b8f38b4a6db16cd02d4c715f4481fc75 Mon Sep 17 00:00:00 2001 From: Marco Zuehlke Date: Thu, 28 Sep 2017 16:21:08 +0200 Subject: [PATCH 5/6] flake8 --- cate/core/workspace.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cate/core/workspace.py b/cate/core/workspace.py index 44ec9ecad..038a5f2e1 100644 --- a/cate/core/workspace.py +++ b/cate/core/workspace.py @@ -667,7 +667,6 @@ def _validate_res_name(res_name: str): "except for the first character, the digits 0 through 9." % res_name) - # noinspection PyArgumentList class WorkspaceError(Exception): def __init__(self, cause, *args, **kwargs): From a8928fa5cc92bbf405aaabb1ab4a66f9a9571ecd Mon Sep 17 00:00:00 2001 From: Marco Zuehlke Date: Thu, 28 Sep 2017 16:22:45 +0200 Subject: [PATCH 6/6] only allow valid python identifiers as resource names (CHANGES.md) See #436 --- CHANGES.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 17f10fb99..86b0c0ede 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,7 +15,9 @@ [#386](https://github.com/CCI-Tools/cate/issues/386) * Use global temporal attributes to determine temporal resolution in aggregation operations [#340](https://github.com/CCI-Tools/cate/issues/340) - +* Only allow valid python identifiers as resource names + [#436](https://github.com/CCI-Tools/cate/issues/436) + ## Changes in version 0.9.0.dev7 ### Improvements and new Features