Skip to content
This repository has been archived by the owner on Aug 29, 2023. It is now read-only.

Commit

Permalink
Merge branch 'master' into 337-kb-datasource-completion-status
Browse files Browse the repository at this point in the history
  • Loading branch information
Krzysztof (Chris) Bernat committed Sep 28, 2017
2 parents e8c0450 + a8928fa commit d3b0ee2
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 13 deletions.
4 changes: 3 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
[#386](https://github.com/CCI-Tools/cate/issues/386)
* Use global temporal attributes to determine temporal resolution in aggregation operations
[#340](https://github.com/CCI-Tools/cate/issues/340)

* Only allow valid python identifiers as resource names
[#436](https://github.com/CCI-Tools/cate/issues/436)

## Changes in version 0.9.0.dev7

### Improvements and new Features
Expand Down
10 changes: 10 additions & 0 deletions cate/core/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,7 @@ def delete_resource(self, res_name: str):
del self._resource_cache[res_name]

def rename_resource(self, res_name: str, new_res_name: str) -> None:
Workspace._validate_res_name(new_res_name)
with self._lock:
res_step = self.workflow.find_node(res_name)
if res_step is None:
Expand Down Expand Up @@ -533,6 +534,7 @@ def set_resource(self,
default_res_pattern = conf.get_default_res_pattern()
res_pattern = op.op_meta_info.header.get('res_pattern', default_res_pattern)
res_name = self._new_resource_name(res_pattern)
Workspace._validate_res_name(res_name)

new_step = OpStep(op, node_id=res_name)

Expand Down Expand Up @@ -656,6 +658,14 @@ def _assert_open(self):
def _new_resource_name(self, res_pattern):
return new_indexed_name({step.id for step in self.workflow.steps}, res_pattern)

@staticmethod
def _validate_res_name(res_name: str):
if not res_name.isidentifier():
raise WorkspaceError(
"Resource name '%s' is not valid. "
"The name must only contain the uppercase and lowercase letters A through Z, the underscore _ and, "
"except for the first character, the digits 0 through 9." % res_name)


# noinspection PyArgumentList
class WorkspaceError(Exception):
Expand Down
61 changes: 52 additions & 9 deletions cate/util/opimpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,14 +219,55 @@ def adjust_temporal_attrs_impl(ds: xr.Dataset) -> xr.Dataset:
"""
ds = ds.copy()

ds.attrs['time_coverage_start'] = str(ds.time.values[0])
ds.attrs['time_coverage_end'] = str(ds.time.values[-1])
ds.attrs['time_coverage_resolution'] = _get_temporal_res(ds.time.values)
ds.attrs['time_coverage_duration'] = _get_duration(ds.time.values)
tempattrs = _get_temporal_props(ds)

for key in tempattrs:
if tempattrs[key] is not None:
ds.attrs[key] = tempattrs[key]
else:
ds.attrs.pop(key, None)

return ds


def _get_temporal_props(ds: xr.Dataset) -> dict:
"""
Get temporal boundaries, resolution and duration of the given dataset. If
the 'bounds' are explicitly defined, these will be used for calculation,
otherwise it will rest on information gathered from the 'time' dimension
itself.
:param ds: Dataset
:return: A dictionary {'attr_name': attr_value}
"""
ret = dict()

try:
# According to CF conventions, the 'bounds' variable name should be in
# the attributes of the coordinate variable
bnds = ds['time'].attrs['bounds']
time_min = ds[bnds].values[0][0]
time_max = ds[bnds].values[-1][1]
except KeyError:
time_min = ds['time'].values[0]
time_max = ds['time'].values[-1]

if time_min != time_max:
ret['time_coverage_duration'] = _get_duration(time_min, time_max)
else:
ret['time_coverage_duration'] = None

if ds['time'].values[0] == ds['time'].values[-1]:
ret['time_coverage_resolution'] = None
else:
ret['time_coverage_resolution'] = _get_temporal_res(ds.time.values)

ret['time_coverage_start'] = str(time_min)
ret['time_coverage_end'] = str(time_max)

return ret


def _get_spatial_props(ds: xr.Dataset, dim: str) -> dict:
"""
Get spatial boundaries, resolution and units of the given dimension of the given
Expand Down Expand Up @@ -292,17 +333,19 @@ def _get_temporal_res(time: np.ndarray) -> str:
return 'P{}D'.format(int(days))


def _get_duration(time: np.ndarray) -> str:
def _get_duration(tmin: np.datetime64, tmax: np.datetime64) -> str:
"""
Determine the duration of the given datetimes array.
Determine the duration of the given datetimes.
See also: `ISO 8601 Durations <https://en.wikipedia.org/wiki/ISO_8601#Durations>`_
:param time: A numpy array containing np.datetime64 objects
:param tmin: Time minimum
:param tmax: Time maximum
:return: Temporal resolution formatted as an ISO 8601:2004 duration string
"""
delta = time[-1] - time[0]
days = delta.astype('timedelta64[D]') / np.timedelta64(1, 'D')
delta = tmax - tmin
day = np.timedelta64(1, 'D')
days = (delta.astype('timedelta64[D]') / day) + 1
return 'P{}D'.format(int(days))


Expand Down
19 changes: 18 additions & 1 deletion test/core/test_workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import xarray as xr

from cate.core.workflow import Workflow, OpStep
from cate.core.workspace import Workspace, mk_op_arg, mk_op_args, mk_op_kwargs
from cate.core.workspace import Workspace, WorkspaceError, mk_op_arg, mk_op_args, mk_op_kwargs
from cate.util import UNDEFINED
from cate.util.opmetainf import OpMetaInfo

Expand Down Expand Up @@ -304,6 +304,23 @@ def set_resource_and_execute():
expected_res_names = {'res_%s' % (i + 1) for i in range(num_res)}
self.assertEqual(actual_res_names, expected_res_names)

def test_validate_res_name(self):
Workspace._validate_res_name("a")
Workspace._validate_res_name("A")
Workspace._validate_res_name("abc_42")
Workspace._validate_res_name("abc42")
Workspace._validate_res_name("_abc42")
# with self.assertRaises(WorkspaceError):
# Workspace._validate_res_name("0")
with self.assertRaises(WorkspaceError):
Workspace._validate_res_name("a-b")
with self.assertRaises(WorkspaceError):
Workspace._validate_res_name("a+b")
with self.assertRaises(WorkspaceError):
Workspace._validate_res_name("a.b")
with self.assertRaises(WorkspaceError):
Workspace._validate_res_name("file://path")

def test_example(self):
expected_json_text = """{
"schema_version": 1,
Expand Down
92 changes: 90 additions & 2 deletions test/ops/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from jdcal import gcal2jd
import numpy as np
from datetime import datetime
import calendar

from cate.ops.normalize import normalize, adjust_spatial_attrs, adjust_temporal_attrs
from cate.core.op import OP_REGISTRY
Expand Down Expand Up @@ -283,7 +284,7 @@ def test_nominal(self):
self.assertEqual(ds1.attrs['time_coverage_resolution'],
'P1M')
self.assertEqual(ds1.attrs['time_coverage_duration'],
'P335D')
'P336D')

# Test existing attributes update
indexers = {'time': slice(datetime(2000, 2, 15), datetime(2000, 6, 15))}
Expand All @@ -297,4 +298,91 @@ def test_nominal(self):
self.assertEqual(ds2.attrs['time_coverage_resolution'],
'P1M')
self.assertEqual(ds2.attrs['time_coverage_duration'],
'P92D')
'P93D')

def test_bnds(self):
"""Test a case when time_bnds is available"""
time = [datetime(2000, x, 1) for x in range(1, 13)]
ds = xr.Dataset({
'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
'lat': np.linspace(-88, 88, 45),
'lon': np.linspace(-178, 178, 90),
'nv': [0, 1],
'time': time})

month_ends = list()
for x in ds.time.values:
year = int(str(x)[0:4])
month = int(str(x)[5:7])
day = calendar.monthrange(year, month)[1]
month_ends.append(datetime(year, month, day))

ds['time_bnds'] = (['time', 'nv'], list(zip(time, month_ends)))
ds.time.attrs['bounds'] = 'time_bnds'

ds1 = adjust_temporal_attrs(ds)

# Make sure original dataset is not altered
with self.assertRaises(KeyError):
ds.attrs['time_coverage_start']

# Make sure expected values are in the new dataset
self.assertEqual(ds1.attrs['time_coverage_start'],
'2000-01-01T00:00:00.000000000')
self.assertEqual(ds1.attrs['time_coverage_end'],
'2000-12-31T00:00:00.000000000')
self.assertEqual(ds1.attrs['time_coverage_resolution'],
'P1M')
self.assertEqual(ds1.attrs['time_coverage_duration'],
'P366D')

def test_single_slice(self):
"""Test a case when the dataset is a single time slice"""
# With bnds
ds = xr.Dataset({
'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
'lat': np.linspace(-88, 88, 45),
'lon': np.linspace(-178, 178, 90),
'nv': [0, 1],
'time': [datetime(2000, 1, 1)]})
ds.time.attrs['bounds'] = 'time_bnds'
ds['time_bnds'] = (['time', 'nv'],
[(datetime(2000, 1, 1), datetime(2000, 1, 31))])

ds1 = adjust_temporal_attrs(ds)

# Make sure original dataset is not altered
with self.assertRaises(KeyError):
ds.attrs['time_coverage_start']

# Make sure expected values are in the new dataset
self.assertEqual(ds1.attrs['time_coverage_start'],
'2000-01-01T00:00:00.000000000')
self.assertEqual(ds1.attrs['time_coverage_end'],
'2000-01-31T00:00:00.000000000')
self.assertEqual(ds1.attrs['time_coverage_duration'],
'P31D')
with self.assertRaises(KeyError):
# Resolution is not defined for a single slice
ds.attrs['time_coverage_resolution']

# Without bnds
ds = xr.Dataset({
'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
'lat': np.linspace(-88, 88, 45),
'lon': np.linspace(-178, 178, 90),
'time': [datetime(2000, 1, 1)]})

ds1 = adjust_temporal_attrs(ds)

self.assertEqual(ds1.attrs['time_coverage_start'],
'2000-01-01T00:00:00.000000000')
self.assertEqual(ds1.attrs['time_coverage_end'],
'2000-01-01T00:00:00.000000000')
with self.assertRaises(KeyError):
ds.attrs['time_coverage_resolution']
with self.assertRaises(KeyError):
ds.attrs['time_coverage_duration']

0 comments on commit d3b0ee2

Please sign in to comment.