diff --git a/CHANGES.md b/CHANGES.md index 17f10fb99..86b0c0ede 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,7 +15,9 @@ [#386](https://github.com/CCI-Tools/cate/issues/386) * Use global temporal attributes to determine temporal resolution in aggregation operations [#340](https://github.com/CCI-Tools/cate/issues/340) - +* Only allow valid python identifiers as resource names + [#436](https://github.com/CCI-Tools/cate/issues/436) + ## Changes in version 0.9.0.dev7 ### Improvements and new Features diff --git a/cate/core/workspace.py b/cate/core/workspace.py index 56858b8be..038a5f2e1 100644 --- a/cate/core/workspace.py +++ b/cate/core/workspace.py @@ -486,6 +486,7 @@ def delete_resource(self, res_name: str): del self._resource_cache[res_name] def rename_resource(self, res_name: str, new_res_name: str) -> None: + Workspace._validate_res_name(new_res_name) with self._lock: res_step = self.workflow.find_node(res_name) if res_step is None: @@ -533,6 +534,7 @@ def set_resource(self, default_res_pattern = conf.get_default_res_pattern() res_pattern = op.op_meta_info.header.get('res_pattern', default_res_pattern) res_name = self._new_resource_name(res_pattern) + Workspace._validate_res_name(res_name) new_step = OpStep(op, node_id=res_name) @@ -656,6 +658,14 @@ def _assert_open(self): def _new_resource_name(self, res_pattern): return new_indexed_name({step.id for step in self.workflow.steps}, res_pattern) + @staticmethod + def _validate_res_name(res_name: str): + if not res_name.isidentifier(): + raise WorkspaceError( + "Resource name '%s' is not valid. " + "The name must only contain the uppercase and lowercase letters A through Z, the underscore _ and, " + "except for the first character, the digits 0 through 9." % res_name) + # noinspection PyArgumentList class WorkspaceError(Exception): diff --git a/cate/util/opimpl.py b/cate/util/opimpl.py index c42a8f09e..4b291345d 100644 --- a/cate/util/opimpl.py +++ b/cate/util/opimpl.py @@ -219,14 +219,55 @@ def adjust_temporal_attrs_impl(ds: xr.Dataset) -> xr.Dataset: """ ds = ds.copy() - ds.attrs['time_coverage_start'] = str(ds.time.values[0]) - ds.attrs['time_coverage_end'] = str(ds.time.values[-1]) - ds.attrs['time_coverage_resolution'] = _get_temporal_res(ds.time.values) - ds.attrs['time_coverage_duration'] = _get_duration(ds.time.values) + tempattrs = _get_temporal_props(ds) + + for key in tempattrs: + if tempattrs[key] is not None: + ds.attrs[key] = tempattrs[key] + else: + ds.attrs.pop(key, None) return ds +def _get_temporal_props(ds: xr.Dataset) -> dict: + """ + Get temporal boundaries, resolution and duration of the given dataset. If + the 'bounds' are explicitly defined, these will be used for calculation, + otherwise it will rest on information gathered from the 'time' dimension + itself. + + :param ds: Dataset + :return: A dictionary {'attr_name': attr_value} + """ + ret = dict() + + try: + # According to CF conventions, the 'bounds' variable name should be in + # the attributes of the coordinate variable + bnds = ds['time'].attrs['bounds'] + time_min = ds[bnds].values[0][0] + time_max = ds[bnds].values[-1][1] + except KeyError: + time_min = ds['time'].values[0] + time_max = ds['time'].values[-1] + + if time_min != time_max: + ret['time_coverage_duration'] = _get_duration(time_min, time_max) + else: + ret['time_coverage_duration'] = None + + if ds['time'].values[0] == ds['time'].values[-1]: + ret['time_coverage_resolution'] = None + else: + ret['time_coverage_resolution'] = _get_temporal_res(ds.time.values) + + ret['time_coverage_start'] = str(time_min) + ret['time_coverage_end'] = str(time_max) + + return ret + + def _get_spatial_props(ds: xr.Dataset, dim: str) -> dict: """ Get spatial boundaries, resolution and units of the given dimension of the given @@ -292,17 +333,19 @@ def _get_temporal_res(time: np.ndarray) -> str: return 'P{}D'.format(int(days)) -def _get_duration(time: np.ndarray) -> str: +def _get_duration(tmin: np.datetime64, tmax: np.datetime64) -> str: """ - Determine the duration of the given datetimes array. + Determine the duration of the given datetimes. See also: `ISO 8601 Durations `_ - :param time: A numpy array containing np.datetime64 objects + :param tmin: Time minimum + :param tmax: Time maximum :return: Temporal resolution formatted as an ISO 8601:2004 duration string """ - delta = time[-1] - time[0] - days = delta.astype('timedelta64[D]') / np.timedelta64(1, 'D') + delta = tmax - tmin + day = np.timedelta64(1, 'D') + days = (delta.astype('timedelta64[D]') / day) + 1 return 'P{}D'.format(int(days)) diff --git a/test/core/test_workspace.py b/test/core/test_workspace.py index 4cdb449dd..9767951c7 100644 --- a/test/core/test_workspace.py +++ b/test/core/test_workspace.py @@ -8,7 +8,7 @@ import xarray as xr from cate.core.workflow import Workflow, OpStep -from cate.core.workspace import Workspace, mk_op_arg, mk_op_args, mk_op_kwargs +from cate.core.workspace import Workspace, WorkspaceError, mk_op_arg, mk_op_args, mk_op_kwargs from cate.util import UNDEFINED from cate.util.opmetainf import OpMetaInfo @@ -304,6 +304,23 @@ def set_resource_and_execute(): expected_res_names = {'res_%s' % (i + 1) for i in range(num_res)} self.assertEqual(actual_res_names, expected_res_names) + def test_validate_res_name(self): + Workspace._validate_res_name("a") + Workspace._validate_res_name("A") + Workspace._validate_res_name("abc_42") + Workspace._validate_res_name("abc42") + Workspace._validate_res_name("_abc42") + # with self.assertRaises(WorkspaceError): + # Workspace._validate_res_name("0") + with self.assertRaises(WorkspaceError): + Workspace._validate_res_name("a-b") + with self.assertRaises(WorkspaceError): + Workspace._validate_res_name("a+b") + with self.assertRaises(WorkspaceError): + Workspace._validate_res_name("a.b") + with self.assertRaises(WorkspaceError): + Workspace._validate_res_name("file://path") + def test_example(self): expected_json_text = """{ "schema_version": 1, diff --git a/test/ops/test_normalize.py b/test/ops/test_normalize.py index f3024ffdc..350dacbe0 100644 --- a/test/ops/test_normalize.py +++ b/test/ops/test_normalize.py @@ -8,6 +8,7 @@ from jdcal import gcal2jd import numpy as np from datetime import datetime +import calendar from cate.ops.normalize import normalize, adjust_spatial_attrs, adjust_temporal_attrs from cate.core.op import OP_REGISTRY @@ -283,7 +284,7 @@ def test_nominal(self): self.assertEqual(ds1.attrs['time_coverage_resolution'], 'P1M') self.assertEqual(ds1.attrs['time_coverage_duration'], - 'P335D') + 'P336D') # Test existing attributes update indexers = {'time': slice(datetime(2000, 2, 15), datetime(2000, 6, 15))} @@ -297,4 +298,91 @@ def test_nominal(self): self.assertEqual(ds2.attrs['time_coverage_resolution'], 'P1M') self.assertEqual(ds2.attrs['time_coverage_duration'], - 'P92D') + 'P93D') + + def test_bnds(self): + """Test a case when time_bnds is available""" + time = [datetime(2000, x, 1) for x in range(1, 13)] + ds = xr.Dataset({ + 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), + 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])), + 'lat': np.linspace(-88, 88, 45), + 'lon': np.linspace(-178, 178, 90), + 'nv': [0, 1], + 'time': time}) + + month_ends = list() + for x in ds.time.values: + year = int(str(x)[0:4]) + month = int(str(x)[5:7]) + day = calendar.monthrange(year, month)[1] + month_ends.append(datetime(year, month, day)) + + ds['time_bnds'] = (['time', 'nv'], list(zip(time, month_ends))) + ds.time.attrs['bounds'] = 'time_bnds' + + ds1 = adjust_temporal_attrs(ds) + + # Make sure original dataset is not altered + with self.assertRaises(KeyError): + ds.attrs['time_coverage_start'] + + # Make sure expected values are in the new dataset + self.assertEqual(ds1.attrs['time_coverage_start'], + '2000-01-01T00:00:00.000000000') + self.assertEqual(ds1.attrs['time_coverage_end'], + '2000-12-31T00:00:00.000000000') + self.assertEqual(ds1.attrs['time_coverage_resolution'], + 'P1M') + self.assertEqual(ds1.attrs['time_coverage_duration'], + 'P366D') + + def test_single_slice(self): + """Test a case when the dataset is a single time slice""" + # With bnds + ds = xr.Dataset({ + 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])), + 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])), + 'lat': np.linspace(-88, 88, 45), + 'lon': np.linspace(-178, 178, 90), + 'nv': [0, 1], + 'time': [datetime(2000, 1, 1)]}) + ds.time.attrs['bounds'] = 'time_bnds' + ds['time_bnds'] = (['time', 'nv'], + [(datetime(2000, 1, 1), datetime(2000, 1, 31))]) + + ds1 = adjust_temporal_attrs(ds) + + # Make sure original dataset is not altered + with self.assertRaises(KeyError): + ds.attrs['time_coverage_start'] + + # Make sure expected values are in the new dataset + self.assertEqual(ds1.attrs['time_coverage_start'], + '2000-01-01T00:00:00.000000000') + self.assertEqual(ds1.attrs['time_coverage_end'], + '2000-01-31T00:00:00.000000000') + self.assertEqual(ds1.attrs['time_coverage_duration'], + 'P31D') + with self.assertRaises(KeyError): + # Resolution is not defined for a single slice + ds.attrs['time_coverage_resolution'] + + # Without bnds + ds = xr.Dataset({ + 'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])), + 'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])), + 'lat': np.linspace(-88, 88, 45), + 'lon': np.linspace(-178, 178, 90), + 'time': [datetime(2000, 1, 1)]}) + + ds1 = adjust_temporal_attrs(ds) + + self.assertEqual(ds1.attrs['time_coverage_start'], + '2000-01-01T00:00:00.000000000') + self.assertEqual(ds1.attrs['time_coverage_end'], + '2000-01-01T00:00:00.000000000') + with self.assertRaises(KeyError): + ds.attrs['time_coverage_resolution'] + with self.assertRaises(KeyError): + ds.attrs['time_coverage_duration']