Skip to content
This repository was archived by the owner on Aug 29, 2023. It is now read-only.

Commit

Permalink
#277 unique id for local data sources and human-readable title
Browse files Browse the repository at this point in the history
  • Loading branch information
Krzysztof (Chris) Bernat committed Sep 7, 2017
1 parent 502a779 commit a738058
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 52 deletions.
2 changes: 1 addition & 1 deletion cate/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def convert(cls, value: Any) -> Optional[VarNames]:
raise ValueError('Variable name pattern can only be a string'
' or a list of strings.')

return value
return value.copy()

@classmethod
def format(cls, value: Optional[VarNames]) -> str:
Expand Down
13 changes: 6 additions & 7 deletions cate/ds/esa_cci_odp.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
_ESGF_CEDA_URL = "https://esgf-index1.ceda.ac.uk/esg-search/search/"

# _CSW_CEDA_URL = "http://csw1.cems.rl.ac.uk/geonetwork-CEDA/srv/eng/csw-CEDA-CCI"
_CSW_CEDA_URL = "https://csw.ceda.ac.uk/geonetwork/srv/eng/csw-CEDA-CCI"
_CSW_CEDA_URL = "https://csw.ceda.ac.uk/geonetwork/srv/eng/csw"

_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"

Expand Down Expand Up @@ -947,10 +947,8 @@ def make_local(self,
region: PolygonLike.TYPE = None,
var_names: VarNamesLike.TYPE = None,
monitor: Monitor = Monitor.NONE) -> Optional[DataSource]:
if not local_name:
raise ValueError('local_name is required')
elif len(local_name) == 0:
raise ValueError('local_name cannot be empty')
if not local_name or len(local_name) == 0:
local_name = self.title

local_store = DATA_STORE_REGISTRY.get_data_store('local')
if not local_store:
Expand All @@ -964,8 +962,9 @@ def make_local(self,
del local_meta_info['uuid']
local_meta_info['ref_uuid'] = self.meta_info['uuid']

local_ds = local_store.create_data_source(local_name, region, _REFERENCE_DATA_SOURCE_TYPE, self.id,
time_range, var_names, meta_info=local_meta_info, lock_file=True)
local_ds = local_store.create_data_source(self.id, region, _REFERENCE_DATA_SOURCE_TYPE, local_name,
time_range=time_range, var_names=var_names,
meta_info=local_meta_info, lock_file=True)
self._make_local(local_ds, time_range, region, var_names, monitor=monitor)
if local_ds.is_empty:
local_store.remove_data_source(local_ds)
Expand Down
63 changes: 55 additions & 8 deletions cate/ds/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import os
import psutil
import shutil
import uuid
import xarray as xr
from collections import OrderedDict
from datetime import datetime
Expand All @@ -63,6 +64,8 @@

_REFERENCE_DATA_SOURCE_TYPE = "FILE_PATTERN"

_NAMESPACE = uuid.UUID(bytes=b"1234567890123456", version=3)


def get_data_store_path():
return os.environ.get('CATE_LOCAL_DATA_STORE_PATH',
Expand Down Expand Up @@ -337,10 +340,8 @@ def make_local(self,
region: PolygonLike.TYPE = None,
var_names: VarNamesLike.TYPE = None,
monitor: Monitor = Monitor.NONE) -> Optional[DataSource]:
if not local_name:
raise ValueError('local_name is required')
elif len(local_name) == 0:
raise ValueError('local_name cannot be empty')
if not local_name or len(local_name) == 0:
local_name = self.title

local_store = DATA_STORE_REGISTRY.get_data_store('local')
if not local_store:
Expand All @@ -349,7 +350,10 @@ def make_local(self,
if not local_store:
raise ValueError('Cannot initialize `local` DataStore')

local_ds = local_store.create_data_source(local_name, region, _REFERENCE_DATA_SOURCE_TYPE, self.id,
self.meta_info.copy()

local_ds = local_store.create_data_source(self.id, region, _REFERENCE_DATA_SOURCE_TYPE, local_name,
time_range=time_range, var_names=var_names,
meta_info=self.meta_info)
self._make_local(local_ds, time_range, region, var_names, monitor)
if local_ds.is_empty:
Expand Down Expand Up @@ -593,13 +597,55 @@ def remove_data_source(self, data_source: Union[str, DataSource], remove_files:
shutil.rmtree(os.path.join(self._store_dir, data_source.id), ignore_errors=True)
self._data_sources.remove(data_source)

def create_data_source(self, data_source_id: str, region: PolygonLike.TYPE = None,
@classmethod
def generate_uuid(cls, ref_id=str,
time_range: TimeRangeLike.TYPE = None,
region: PolygonLike.TYPE = None,
var_names: VarNamesLike.TYPE = None) -> uuid.UUID:

constrains_str = ref_id
if time_range:
constrains_str += TimeRangeLike.format(time_range)
if region:
constrains_str += PolygonLike.format(region)
if var_names:
constrains_str += VarNamesLike.format(var_names)

return uuid.uuid3(_NAMESPACE, constrains_str)

@classmethod
def generate_title(cls, ref_id=str,
time_range: TimeRangeLike.TYPE = None,
region: PolygonLike.TYPE = None,
var_names: VarNamesLike.TYPE = None) -> uuid.UUID:

title = ref_id
if time_range:
title += " [TimeRange:{}]".format(TimeRangeLike.format(time_range))
if region:
title += " [Region:{}]".format(PolygonLike.format(region))
if var_names:
title += " [Variables:{}]".format(VarNamesLike.format(var_names))

return title

def create_data_source(self, ref_id: str, region: PolygonLike.TYPE = None,
reference_type: str = None, reference_name: str = None,
time_range: TimeRangeLike.TYPE = None, var_names: VarNamesLike.TYPE = None,
meta_info: OrderedDict = None, lock_file: bool = False):
self._init_data_sources()
if not data_source_id.startswith('%s.' % self.id):
data_source_id = '%s.%s' % (self.id, data_source_id)

if reference_name:
reference_name = LocalDataStore.generate_title(reference_name, time_range, region, var_names)
else:
reference_name = LocalDataStore.generate_title(ref_id, time_range, region, var_names)

if meta_info:
meta_info['title'] = reference_name

data_source_id = LocalDataStore.generate_uuid(ref_id, time_range, region, var_names)
data_source_id = '%s.%s' % (self.id, data_source_id)

lock_filename = '{}.lock'.format(data_source_id)
lock_filepath = os.path.join(self._store_dir, lock_filename)
existing_ds = None
Expand All @@ -622,6 +668,7 @@ def create_data_source(self, data_source_id: str, region: PolygonLike.TYPE = Non
data_source = existing_ds
else:
data_source = LocalDataSource(data_source_id, files=[], data_store=self, spatial_coverage=region,
variables=var_names, temporal_coverage=time_range,
reference_type=reference_type, reference_name=reference_name,
meta_info=meta_info)
if lock_file:
Expand Down
75 changes: 57 additions & 18 deletions test/ds/test_esa_cci_odp.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import shutil

from cate.core.ds import DATA_STORE_REGISTRY
from cate.core.types import PolygonLike, TimeRangeLike
from cate.core.types import PolygonLike, TimeRangeLike, VarNamesLike
from cate.ds.esa_cci_odp import EsaCciOdpDataStore, find_datetime_format
from cate.ds.local import LocalDataStore

Expand Down Expand Up @@ -124,18 +124,21 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size

with unittest.mock.patch('cate.ds.esa_cci_odp.EsaCciOdpDataSource._find_files', find_files_mock):
with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]):

new_ds_title = 'local_ds_test'
new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 16, 23, 59)))
try:
new_ds = self.first_oc_data_source.make_local('local_ds_test', None,
(datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 15, 23, 59)))
new_ds = self.first_oc_data_source.make_local(new_ds_title, time_range=new_ds_time_range)
except:
raise ValueError(reference_path, os.listdir(reference_path))
self.assertIsNotNone(new_ds)

self.assertEqual(new_ds.id, 'local.local_ds_test')
self.assertEqual(new_ds.temporal_coverage(),
(datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 15, 23, 59)))
test_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, new_ds_time_range)
test_ds_id = "local." + str(test_uuid)

self.assertEqual(new_ds.id, test_ds_id)
self.assertEqual(new_ds.temporal_coverage(), new_ds_time_range)

self.first_oc_data_source.update_local(new_ds.id, (datetime.datetime(1978, 11, 15, 00, 00),
datetime.datetime(1978, 11, 16, 23, 59)))
Expand All @@ -154,23 +157,59 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size
datetime.datetime(1978, 11, 16, 23, 59)))
self.assertTrue("Couldn't find local DataSource", context.exception.args[0])

new_ds_w_one_variable_title = 'local_ds_test'
new_ds_w_one_variable_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 16, 23, 59)))
new_ds_w_one_variable_var_names = VarNamesLike.convert(['sm'])

new_ds_w_one_variable = self.first_oc_data_source.make_local(
'local_ds_test_2', None, (datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 15, 23, 59)), None, ['sm'])
new_ds_w_one_variable_title,
time_range=new_ds_w_one_variable_time_range,
var_names=new_ds_w_one_variable_var_names
)
self.assertIsNotNone(new_ds_w_one_variable)
self.assertEqual(new_ds_w_one_variable.id, 'local.local_ds_test_2')

new_ds_w_one_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id,
time_range=new_ds_w_one_variable_time_range,
var_names=new_ds_w_one_variable_var_names)
new_ds_w_one_ds_id = "local." + str(new_ds_w_one_uuid)

self.assertEqual(new_ds_w_one_variable.id, new_ds_w_one_ds_id)
ds = new_ds_w_one_variable.open_dataset()
self.assertSetEqual(set(ds.variables), {'sm', 'lat', 'lon', 'time'})

new_ds_w_one_variable_var_names.extend(['lat', 'lon', 'time'])

self.assertSetEqual(set(ds.variables),
set(new_ds_w_one_variable_var_names))

new_ds_w_region_title = 'from_local_to_local_region'
new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 16, 23, 59)))
new_ds_w_region_var_names = VarNamesLike.convert(['sm'])
new_ds_w_region_spatial_coverage = PolygonLike.convert("10,10,20,20")

new_ds_w_region = self.first_oc_data_source.make_local(
'from_local_to_local_region', None, (datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 15, 23, 59)),
"10,10,20,20", ['sm']) # type: LocalDataSource
new_ds_w_region_title,
time_range=new_ds_w_region_time_range,
var_names=new_ds_w_region_var_names,
region=new_ds_w_region_spatial_coverage) # type: LocalDataSource

self.assertIsNotNone(new_ds_w_region)
self.assertEqual(new_ds_w_region.id, 'local.from_local_to_local_region')
self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20"))

new_ds_w_region_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id,
time_range=new_ds_w_region_time_range,
var_names=new_ds_w_region_var_names,
region=new_ds_w_region_spatial_coverage)
new_ds_w_region_ds_id = "local." + str(new_ds_w_region_uuid)

self.assertEqual(new_ds_w_region.id, new_ds_w_region_ds_id)

self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)
data_set = new_ds_w_region.open_dataset()
self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'})

new_ds_w_region_var_names.extend(['lat', 'lon', 'time'])

self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names))

no_data = self.first_oc_data_source.make_local(
'empty_ds', None, (datetime.datetime(2017, 12, 1, 0, 0),
Expand Down
61 changes: 43 additions & 18 deletions test/ds/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import datetime
import shutil
from cate.core.ds import DATA_STORE_REGISTRY
from cate.core.types import PolygonLike, TimeRangeLike
from cate.core.types import PolygonLike, TimeRangeLike, VarNamesLike
from cate.ds.local import LocalDataStore, LocalDataSource
from cate.ds.esa_cci_odp import EsaCciOdpDataStore
from collections import OrderedDict
Expand Down Expand Up @@ -34,23 +34,26 @@ def test_add_pattern(self):
self.assertIsNotNone(data_sources)
self.assertEqual(len(data_sources), 2)

test_uuid = LocalDataStore.generate_uuid('a_name')
test_ds_id = "test." + str(test_uuid)

new_ds = self.data_store.add_pattern("a_name", "a_pat")
self.assertEqual('test.a_name', new_ds.id)
self.assertEqual(test_ds_id, new_ds.id)

data_sources = self.data_store.query()
self.assertEqual(len(data_sources), 3)

with self.assertRaises(ValueError) as cm:
self.data_store.add_pattern("a_name", "a_pat2")
self.assertEqual("Local data store 'test' already contains a data source named 'test.a_name'",
self.assertEqual("Local data store 'test' already contains a data source named '{}'".format(test_ds_id),
str(cm.exception))

data_sources = self.data_store.query()
self.assertEqual(len(data_sources), 3)

def test__repr_html(self):
html = self.data_store._repr_html_()
self.assertEqual(524, len(html))
self.assertEqual(584, len(html), html)

def test_init(self):
data_store2 = LocalDataStore('test', self.tmp_dir)
Expand Down Expand Up @@ -246,11 +249,16 @@ def test_make_local(self):
data_source = self._local_data_store.query('local_w_temporal')[0]

with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]):
new_ds = data_source.make_local('from_local_to_local', None,
(datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 15, 23, 59)))
new_ds_title = 'from_local_to_local'
new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 15, 23, 59)))
new_ds = data_source.make_local(new_ds_title, time_range=new_ds_time_range)
self.assertIsNotNone(new_ds)
self.assertEqual(new_ds.id, 'local.from_local_to_local')

test_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_time_range)
test_ds_id = "local." + str(test_uuid)

self.assertEqual(new_ds.id, test_ds_id)
self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert(
(datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 15, 23, 59))))
Expand All @@ -266,21 +274,38 @@ def test_make_local(self):
datetime.datetime(1978, 11, 16, 23, 59)))
self.assertTrue("Couldn't find local DataSource", context.exception.args[0])

new_ds_w_one_variable = data_source.make_local('from_local_to_local_var', None,
(datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 15, 23, 59)),
None, ['sm'])
new_ds_2_title = 'from_local_to_local_var'
new_ds_2_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 15, 23, 59)))
new_ds_2_vars = VarNamesLike.convert(['sm'])

test_2_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_2_time_range, var_names=new_ds_2_vars)
test_ds_2_id = "local." + str(test_2_uuid)

new_ds_w_one_variable = data_source.make_local(new_ds_2_title,
time_range=new_ds_2_time_range,
var_names=new_ds_2_vars)
self.assertIsNotNone(new_ds_w_one_variable)
self.assertEqual(new_ds_w_one_variable.id, 'local.from_local_to_local_var')
self.assertEqual(new_ds_w_one_variable.id, test_ds_2_id)
data_set = new_ds_w_one_variable.open_dataset()
self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'})

new_ds_w_region = data_source.make_local('from_local_to_local_region', None,
(datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 15, 23, 59)),
"10,10,20,20", ['sm']) # type: LocalDataSource
new_ds_3_title = 'from_local_to_local_var'
new_ds_3_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
datetime.datetime(1978, 11, 15, 23, 59)))
new_ds_3_vars = VarNamesLike.convert(['sm'])
new_ds_3_region = PolygonLike.convert("10,10,20,20")

test_3_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_3_time_range, var_names=new_ds_3_vars,
region=new_ds_3_region)
test_ds_3_id = "local." + str(test_3_uuid)

new_ds_w_region = data_source.make_local(new_ds_3_title,
time_range=new_ds_3_time_range,
var_names=new_ds_3_vars,
region=new_ds_3_region) # type: LocalDataSource
self.assertIsNotNone(new_ds_w_region)
self.assertEqual(new_ds_w_region.id, 'local.from_local_to_local_region')
self.assertEqual(new_ds_w_region.id, test_ds_3_id)
self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20"))
data_set = new_ds_w_region.open_dataset()
self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'})
Expand Down

0 comments on commit a738058

Please sign in to comment.