From b37dd992cce54a359f1563ce74a00471c835f6a8 Mon Sep 17 00:00:00 2001
From: Janis Gailis <gailis@stcorp.no>
Date: Wed, 27 Sep 2017 13:21:41 +0200
Subject: [PATCH 1/6] Tests for time_bnds and single slice

Added tests for testing temporal attribute introspection in case
time_bnds exists, as well as for testing the single slice case.

Refactored opimpl to prepare for time_bnds implementation
---
 cate/util/opimpl.py        | 31 ++++++++++--
 test/ops/test_normalize.py | 98 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 125 insertions(+), 4 deletions(-)

diff --git a/cate/util/opimpl.py b/cate/util/opimpl.py
index c42a8f09e..e8cd6817c 100644
--- a/cate/util/opimpl.py
+++ b/cate/util/opimpl.py
@@ -219,14 +219,37 @@ def adjust_temporal_attrs_impl(ds: xr.Dataset) -> xr.Dataset:
     """
     ds = ds.copy()
 
-    ds.attrs['time_coverage_start'] = str(ds.time.values[0])
-    ds.attrs['time_coverage_end'] = str(ds.time.values[-1])
-    ds.attrs['time_coverage_resolution'] = _get_temporal_res(ds.time.values)
-    ds.attrs['time_coverage_duration'] = _get_duration(ds.time.values)
+    tempattrs = _get_temporal_props(ds)
+
+    for key in tempattrs:
+        if tempattrs[key] is not None:
+            ds.attrs[key] = tempattrs[key]
+        else:
+            ds.attrs.pop(key, None)
 
     return ds
 
 
+def _get_temporal_props(ds: xr.Dataset) -> dict:
+    """
+    Get temporal boundaries, resolution and duration of the given dataset. If
+    the 'bounds' are explicitly defined, these will be used for calculation,
+    otherwise it will rest on information gathered from the 'time' dimension
+    itself.
+
+    :param ds: Dataset
+    :return: A dictionary {'attr_name': attr_value}
+    """
+    ret = dict()
+
+    ret['time_coverage_start'] = str(ds.time.values[0])
+    ret['time_coverage_end'] = str(ds.time.values[-1])
+    ret['time_coverage_resolution'] = _get_temporal_res(ds.time.values)
+    ret['time_coverage_duration'] = _get_duration(ds.time.values)
+
+    return ret
+
+
 def _get_spatial_props(ds: xr.Dataset, dim: str) -> dict:
     """
     Get spatial boundaries, resolution and units of the given dimension of the given
diff --git a/test/ops/test_normalize.py b/test/ops/test_normalize.py
index f3024ffdc..2cb5e1369 100644
--- a/test/ops/test_normalize.py
+++ b/test/ops/test_normalize.py
@@ -8,6 +8,7 @@
 from jdcal import gcal2jd
 import numpy as np
 from datetime import datetime
+import calendar
 
 from cate.ops.normalize import normalize, adjust_spatial_attrs, adjust_temporal_attrs
 from cate.core.op import OP_REGISTRY
@@ -298,3 +299,100 @@ def test_nominal(self):
                          'P1M')
         self.assertEqual(ds2.attrs['time_coverage_duration'],
                          'P92D')
+
+    def test_bnds(self):
+        """Test a case when time_bnds is available"""
+        ds = xr.Dataset({
+            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
+            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
+            'lat': np.linspace(-88, 88, 45),
+            'lon': np.linspace(-178, 178, 90),
+            'time': [datetime(2000, x, 1) for x in range(1, 13)]})
+
+        month_ends = list()
+        for x in ds.time.values:
+            year = int(str(x)[0:4])
+            month = int(str(x)[5:7])
+            day = calendar.monthrange(year, month)[1]
+            month_ends.append(datetime(year, month, day))
+
+        time_bnds = np.empty([len(ds.time), 2])
+        time_bnds[:, 0] = ds.time.values
+        time_bnds[:, 1] = month_ends
+
+        ds['nv'] = [0, 1]
+        ds['time_bnds'] = (['time', 'nv'], time_bnds)
+        ds.time.attrs['bounds'] = 'time_bnds'
+
+        ds1 = adjust_temporal_attrs(ds)
+
+        # Make sure original dataset is not altered
+        with self.assertRaises(KeyError):
+            ds.attrs['time_coverage_start']
+
+        # Make sure expected values are in the new dataset
+        self.assertEqual(ds1.attrs['time_coverage_start'],
+                         '2000-01-01T00:00:00.000000000')
+        self.assertEqual(ds1.attrs['time_coverage_end'],
+                         '2000-12-31:00:00.000000000')
+        self.assertEqual(ds1.attrs['time_coverage_resolution'],
+                         'P1M')
+        self.assertEqual(ds1.attrs['time_coverage_duration'],
+                         'P365D')
+
+    def test_single_slice(self):
+        """Test a case when the dataset is a single time slice"""
+        # With bnds
+        ds = xr.Dataset({
+            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
+            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
+            'lat': np.linspace(-88, 88, 45),
+            'lon': np.linspace(-178, 178, 90),
+            'time': [datetime(2000, 1, 1)]})
+
+        year = int(str(ds.time.values[0])[0:4])
+        month = int(str(ds.time.values[0])[5:7])
+        day = calendar.monthrange(year, month)[1]
+
+        time_bnds = np.empty([1, 2])
+        time_bnds[0, 0] = ds.time.values[0]
+        time_bnds[0, 1] = datetime(year, month, day)
+
+        ds['nv'] = [0, 1]
+        ds['time_bnds'] = (['time', 'nv'], time_bnds)
+        ds.time.attrs['bounds'] = 'time_bnds'
+
+        ds1 = adjust_temporal_attrs(ds)
+
+        # Make sure original dataset is not altered
+        with self.assertRaises(KeyError):
+            ds.attrs['time_coverage_start']
+
+        # Make sure expected values are in the new dataset
+        self.assertEqual(ds1.attrs['time_coverage_start'],
+                         '2000-01-01T00:00:00.000000000')
+        self.assertEqual(ds1.attrs['time_coverage_end'],
+                         '2000-01-31T00:00:00.000000000')
+        self.assertEqual(ds1.attrs['time_coverage_resolution'],
+                         'P1M')
+        self.assertEqual(ds1.attrs['time_coverage_duration'],
+                         'P31D')
+
+        # Without bnds
+        ds = xr.Dataset({
+            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
+            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
+            'lat': np.linspace(-88, 88, 45),
+            'lon': np.linspace(-178, 178, 90),
+            'time': [datetime(2000, 1, 1)]})
+
+        ds1 = adjust_temporal_attrs(ds)
+
+        self.assertEqual(ds1.attrs['time_coverage_start'],
+                         '2000-01-01T00:00:00.000000000')
+        self.assertEqual(ds1.attrs['time_coverage_end'],
+                         '2000-01-01:00:00.000000000')
+        with self.assertRaises(KeyError):
+            ds.attrs['time_coverage_resolution']
+        with self.assertRaises(KeyError):
+            ds.attrs['time_coverage_duration']

From 06e43574e8204197382005da492652e4cc72dce4 Mon Sep 17 00:00:00 2001
From: Janis Gailis <gailis@stcorp.no>
Date: Wed, 27 Sep 2017 13:50:41 +0200
Subject: [PATCH 2/6] Fix tests

Fix tests such that they work when the code works. They still fail, but
not because of errors in test implementations.
---
 test/ops/test_normalize.py | 33 +++++++++++----------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/test/ops/test_normalize.py b/test/ops/test_normalize.py
index 2cb5e1369..eec33d09b 100644
--- a/test/ops/test_normalize.py
+++ b/test/ops/test_normalize.py
@@ -302,12 +302,14 @@ def test_nominal(self):
 
     def test_bnds(self):
         """Test a case when time_bnds is available"""
+        time = [datetime(2000, x, 1) for x in range(1, 13)]
         ds = xr.Dataset({
             'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
             'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
             'lat': np.linspace(-88, 88, 45),
             'lon': np.linspace(-178, 178, 90),
-            'time': [datetime(2000, x, 1) for x in range(1, 13)]})
+            'nv': [0, 1],
+            'time': time})
 
         month_ends = list()
         for x in ds.time.values:
@@ -316,12 +318,7 @@ def test_bnds(self):
             day = calendar.monthrange(year, month)[1]
             month_ends.append(datetime(year, month, day))
 
-        time_bnds = np.empty([len(ds.time), 2])
-        time_bnds[:, 0] = ds.time.values
-        time_bnds[:, 1] = month_ends
-
-        ds['nv'] = [0, 1]
-        ds['time_bnds'] = (['time', 'nv'], time_bnds)
+        ds['time_bnds'] = (['time', 'nv'], list(zip(time, month_ends)))
         ds.time.attrs['bounds'] = 'time_bnds'
 
         ds1 = adjust_temporal_attrs(ds)
@@ -344,23 +341,15 @@ def test_single_slice(self):
         """Test a case when the dataset is a single time slice"""
         # With bnds
         ds = xr.Dataset({
-            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
-            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
+            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
+            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
             'lat': np.linspace(-88, 88, 45),
             'lon': np.linspace(-178, 178, 90),
+            'nv': [0, 1],
             'time': [datetime(2000, 1, 1)]})
-
-        year = int(str(ds.time.values[0])[0:4])
-        month = int(str(ds.time.values[0])[5:7])
-        day = calendar.monthrange(year, month)[1]
-
-        time_bnds = np.empty([1, 2])
-        time_bnds[0, 0] = ds.time.values[0]
-        time_bnds[0, 1] = datetime(year, month, day)
-
-        ds['nv'] = [0, 1]
-        ds['time_bnds'] = (['time', 'nv'], time_bnds)
         ds.time.attrs['bounds'] = 'time_bnds'
+        ds['time_bnds'] = (['time', 'nv'],
+                           [(datetime(2000, 1, 1), datetime(2000, 1, 31))])
 
         ds1 = adjust_temporal_attrs(ds)
 
@@ -380,8 +369,8 @@ def test_single_slice(self):
 
         # Without bnds
         ds = xr.Dataset({
-            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
-            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
+            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
+            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
             'lat': np.linspace(-88, 88, 45),
             'lon': np.linspace(-178, 178, 90),
             'time': [datetime(2000, 1, 1)]})

From d9a338efc12ea4c6e4afa084a36637047a89fab3 Mon Sep 17 00:00:00 2001
From: Janis Gailis <gailis@stcorp.no>
Date: Wed, 27 Sep 2017 22:42:55 +0200
Subject: [PATCH 3/6] Updated temporal attribute normalization

Temporal attribute determination now uses time_bnds variable if it is
present and defined and can also handle datasets containing single time
slices.
---
 cate/util/opimpl.py        | 38 +++++++++++++++++++++++++++++---------
 test/ops/test_normalize.py | 15 ++++++++-------
 2 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/cate/util/opimpl.py b/cate/util/opimpl.py
index e8cd6817c..4b291345d 100644
--- a/cate/util/opimpl.py
+++ b/cate/util/opimpl.py
@@ -242,10 +242,28 @@ def _get_temporal_props(ds: xr.Dataset) -> dict:
     """
     ret = dict()
 
-    ret['time_coverage_start'] = str(ds.time.values[0])
-    ret['time_coverage_end'] = str(ds.time.values[-1])
-    ret['time_coverage_resolution'] = _get_temporal_res(ds.time.values)
-    ret['time_coverage_duration'] = _get_duration(ds.time.values)
+    try:
+        # According to CF conventions, the 'bounds' variable name should be in
+        # the attributes of the coordinate variable
+        bnds = ds['time'].attrs['bounds']
+        time_min = ds[bnds].values[0][0]
+        time_max = ds[bnds].values[-1][1]
+    except KeyError:
+        time_min = ds['time'].values[0]
+        time_max = ds['time'].values[-1]
+
+    if time_min != time_max:
+        ret['time_coverage_duration'] = _get_duration(time_min, time_max)
+    else:
+        ret['time_coverage_duration'] = None
+
+    if ds['time'].values[0] == ds['time'].values[-1]:
+        ret['time_coverage_resolution'] = None
+    else:
+        ret['time_coverage_resolution'] = _get_temporal_res(ds.time.values)
+
+    ret['time_coverage_start'] = str(time_min)
+    ret['time_coverage_end'] = str(time_max)
 
     return ret
 
@@ -315,17 +333,19 @@ def _get_temporal_res(time: np.ndarray) -> str:
         return 'P{}D'.format(int(days))
 
 
-def _get_duration(time: np.ndarray) -> str:
+def _get_duration(tmin: np.datetime64, tmax: np.datetime64) -> str:
     """
-    Determine the duration of the given datetimes array.
+    Determine the duration of the given datetimes.
 
     See also: `ISO 8601 Durations <https://en.wikipedia.org/wiki/ISO_8601#Durations>`_
 
-    :param time: A numpy array containing np.datetime64 objects
+    :param tmin: Time minimum
+    :param tmax: Time maximum
     :return: Temporal resolution formatted as an ISO 8601:2004 duration string
     """
-    delta = time[-1] - time[0]
-    days = delta.astype('timedelta64[D]') / np.timedelta64(1, 'D')
+    delta = tmax - tmin
+    day = np.timedelta64(1, 'D')
+    days = (delta.astype('timedelta64[D]') / day) + 1
     return 'P{}D'.format(int(days))
 
 
diff --git a/test/ops/test_normalize.py b/test/ops/test_normalize.py
index eec33d09b..350dacbe0 100644
--- a/test/ops/test_normalize.py
+++ b/test/ops/test_normalize.py
@@ -284,7 +284,7 @@ def test_nominal(self):
         self.assertEqual(ds1.attrs['time_coverage_resolution'],
                          'P1M')
         self.assertEqual(ds1.attrs['time_coverage_duration'],
-                         'P335D')
+                         'P336D')
 
         # Test existing attributes update
         indexers = {'time': slice(datetime(2000, 2, 15), datetime(2000, 6, 15))}
@@ -298,7 +298,7 @@ def test_nominal(self):
         self.assertEqual(ds2.attrs['time_coverage_resolution'],
                          'P1M')
         self.assertEqual(ds2.attrs['time_coverage_duration'],
-                         'P92D')
+                         'P93D')
 
     def test_bnds(self):
         """Test a case when time_bnds is available"""
@@ -331,11 +331,11 @@ def test_bnds(self):
         self.assertEqual(ds1.attrs['time_coverage_start'],
                          '2000-01-01T00:00:00.000000000')
         self.assertEqual(ds1.attrs['time_coverage_end'],
-                         '2000-12-31:00:00.000000000')
+                         '2000-12-31T00:00:00.000000000')
         self.assertEqual(ds1.attrs['time_coverage_resolution'],
                          'P1M')
         self.assertEqual(ds1.attrs['time_coverage_duration'],
-                         'P365D')
+                         'P366D')
 
     def test_single_slice(self):
         """Test a case when the dataset is a single time slice"""
@@ -362,10 +362,11 @@ def test_single_slice(self):
                          '2000-01-01T00:00:00.000000000')
         self.assertEqual(ds1.attrs['time_coverage_end'],
                          '2000-01-31T00:00:00.000000000')
-        self.assertEqual(ds1.attrs['time_coverage_resolution'],
-                         'P1M')
         self.assertEqual(ds1.attrs['time_coverage_duration'],
                          'P31D')
+        with self.assertRaises(KeyError):
+            # Resolution is not defined for a single slice
+            ds.attrs['time_coverage_resolution']
 
         # Without bnds
         ds = xr.Dataset({
@@ -380,7 +381,7 @@ def test_single_slice(self):
         self.assertEqual(ds1.attrs['time_coverage_start'],
                          '2000-01-01T00:00:00.000000000')
         self.assertEqual(ds1.attrs['time_coverage_end'],
-                         '2000-01-01:00:00.000000000')
+                         '2000-01-01T00:00:00.000000000')
         with self.assertRaises(KeyError):
             ds.attrs['time_coverage_resolution']
         with self.assertRaises(KeyError):

From 85213b61b71c8360ad897084a52183562cc1b6f8 Mon Sep 17 00:00:00 2001
From: Marco Zuehlke <Marco.Zuehlke@brockmann-consult.de>
Date: Thu, 28 Sep 2017 16:20:08 +0200
Subject: [PATCH 4/6] only allow valid python identifiers as resource names

See #436
---
 cate/core/workspace.py      | 11 +++++++++++
 test/core/test_workspace.py | 19 ++++++++++++++++++-
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/cate/core/workspace.py b/cate/core/workspace.py
index 56858b8be..44ec9ecad 100644
--- a/cate/core/workspace.py
+++ b/cate/core/workspace.py
@@ -486,6 +486,7 @@ def delete_resource(self, res_name: str):
                 del self._resource_cache[res_name]
 
     def rename_resource(self, res_name: str, new_res_name: str) -> None:
+        Workspace._validate_res_name(new_res_name)
         with self._lock:
             res_step = self.workflow.find_node(res_name)
             if res_step is None:
@@ -533,6 +534,7 @@ def set_resource(self,
                 default_res_pattern = conf.get_default_res_pattern()
                 res_pattern = op.op_meta_info.header.get('res_pattern', default_res_pattern)
                 res_name = self._new_resource_name(res_pattern)
+            Workspace._validate_res_name(res_name)
 
             new_step = OpStep(op, node_id=res_name)
 
@@ -656,6 +658,15 @@ def _assert_open(self):
     def _new_resource_name(self, res_pattern):
         return new_indexed_name({step.id for step in self.workflow.steps}, res_pattern)
 
+    @staticmethod
+    def _validate_res_name(res_name: str):
+        if not res_name.isidentifier():
+            raise WorkspaceError(
+                "Resource name '%s' is not valid. "
+                "The name must only contain the uppercase and lowercase letters A through Z, the underscore _ and, "
+                "except for the first character, the digits 0 through 9." % res_name)
+
+
 
 # noinspection PyArgumentList
 class WorkspaceError(Exception):
diff --git a/test/core/test_workspace.py b/test/core/test_workspace.py
index 4cdb449dd..9767951c7 100644
--- a/test/core/test_workspace.py
+++ b/test/core/test_workspace.py
@@ -8,7 +8,7 @@
 import xarray as xr
 
 from cate.core.workflow import Workflow, OpStep
-from cate.core.workspace import Workspace, mk_op_arg, mk_op_args, mk_op_kwargs
+from cate.core.workspace import Workspace, WorkspaceError, mk_op_arg, mk_op_args, mk_op_kwargs
 from cate.util import UNDEFINED
 from cate.util.opmetainf import OpMetaInfo
 
@@ -304,6 +304,23 @@ def set_resource_and_execute():
         expected_res_names = {'res_%s' % (i + 1) for i in range(num_res)}
         self.assertEqual(actual_res_names, expected_res_names)
 
+    def test_validate_res_name(self):
+        Workspace._validate_res_name("a")
+        Workspace._validate_res_name("A")
+        Workspace._validate_res_name("abc_42")
+        Workspace._validate_res_name("abc42")
+        Workspace._validate_res_name("_abc42")
+        # with self.assertRaises(WorkspaceError):
+        #     Workspace._validate_res_name("0")
+        with self.assertRaises(WorkspaceError):
+            Workspace._validate_res_name("a-b")
+        with self.assertRaises(WorkspaceError):
+            Workspace._validate_res_name("a+b")
+        with self.assertRaises(WorkspaceError):
+            Workspace._validate_res_name("a.b")
+        with self.assertRaises(WorkspaceError):
+            Workspace._validate_res_name("file://path")
+
     def test_example(self):
         expected_json_text = """{
             "schema_version": 1,

From 9d632478b8f38b4a6db16cd02d4c715f4481fc75 Mon Sep 17 00:00:00 2001
From: Marco Zuehlke <Marco.Zuehlke@brockmann-consult.de>
Date: Thu, 28 Sep 2017 16:21:08 +0200
Subject: [PATCH 5/6] flake8

---
 cate/core/workspace.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cate/core/workspace.py b/cate/core/workspace.py
index 44ec9ecad..038a5f2e1 100644
--- a/cate/core/workspace.py
+++ b/cate/core/workspace.py
@@ -667,7 +667,6 @@ def _validate_res_name(res_name: str):
                 "except for the first character, the digits 0 through 9." % res_name)
 
 
-
 # noinspection PyArgumentList
 class WorkspaceError(Exception):
     def __init__(self, cause, *args, **kwargs):

From a8928fa5cc92bbf405aaabb1ab4a66f9a9571ecd Mon Sep 17 00:00:00 2001
From: Marco Zuehlke <Marco.Zuehlke@brockmann-consult.de>
Date: Thu, 28 Sep 2017 16:22:45 +0200
Subject: [PATCH 6/6] only allow valid python identifiers as resource names
 (CHANGES.md)

See #436
---
 CHANGES.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index 17f10fb99..86b0c0ede 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -15,7 +15,9 @@
   [#386](https://github.com/CCI-Tools/cate/issues/386)
 * Use global temporal attributes to determine temporal resolution in aggregation operations
   [#340](https://github.com/CCI-Tools/cate/issues/340)
-  
+* Only allow valid python identifiers as resource names
+  [#436](https://github.com/CCI-Tools/cate/issues/436)
+
 ## Changes in version 0.9.0.dev7
 
 ### Improvements and new Features