From 2c30ba90b73937c25891bd8e7fbcb95645447b31 Mon Sep 17 00:00:00 2001 From: "Krzysztof (Chris) Bernat" Date: Tue, 5 Sep 2017 20:51:36 +0100 Subject: [PATCH 1/5] #277 unique id for local data sources and human-readable title --- cate/core/types.py | 2 +- cate/ds/esa_cci_odp.py | 13 +++---- cate/ds/local.py | 63 +++++++++++++++++++++++++++---- test/ds/test_esa_cci_odp.py | 75 ++++++++++++++++++++++++++++--------- test/ds/test_local.py | 61 +++++++++++++++++++++--------- 5 files changed, 162 insertions(+), 52 deletions(-) diff --git a/cate/core/types.py b/cate/core/types.py index 59f02c21d..cd4c19798 100644 --- a/cate/core/types.py +++ b/cate/core/types.py @@ -221,7 +221,7 @@ def convert(cls, value: Any) -> Optional[VarNames]: raise ValueError('Variable name pattern can only be a string' ' or a list of strings.') - return value + return value.copy() @classmethod def format(cls, value: Optional[VarNames]) -> str: diff --git a/cate/ds/esa_cci_odp.py b/cate/ds/esa_cci_odp.py index 642e51bcf..a29fd912b 100644 --- a/cate/ds/esa_cci_odp.py +++ b/cate/ds/esa_cci_odp.py @@ -68,7 +68,7 @@ _ESGF_CEDA_URL = "https://esgf-index1.ceda.ac.uk/esg-search/search/" # _CSW_CEDA_URL = "http://csw1.cems.rl.ac.uk/geonetwork-CEDA/srv/eng/csw-CEDA-CCI" -_CSW_CEDA_URL = "https://csw.ceda.ac.uk/geonetwork/srv/eng/csw-CEDA-CCI" +_CSW_CEDA_URL = "https://csw.ceda.ac.uk/geonetwork/srv/eng/csw" _TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S" @@ -878,10 +878,8 @@ def make_local(self, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE) -> Optional[DataSource]: - if not local_name: - raise ValueError('local_name is required') - elif len(local_name) == 0: - raise ValueError('local_name cannot be empty') + if not local_name or len(local_name) == 0: + local_name = self.title local_store = DATA_STORE_REGISTRY.get_data_store('local') if not local_store: @@ -895,8 +893,9 @@ def make_local(self, del local_meta_info['uuid'] local_meta_info['ref_uuid'] = self.meta_info['uuid'] - local_ds = local_store.create_data_source(local_name, region, _REFERENCE_DATA_SOURCE_TYPE, self.id, - time_range, var_names, meta_info=local_meta_info, lock_file=True) + local_ds = local_store.create_data_source(self.id, region, _REFERENCE_DATA_SOURCE_TYPE, local_name, + time_range=time_range, var_names=var_names, + meta_info=local_meta_info, lock_file=True) self._make_local(local_ds, time_range, region, var_names, monitor=monitor) if local_ds.is_empty: local_store.remove_data_source(local_ds) diff --git a/cate/ds/local.py b/cate/ds/local.py index d730b1092..b6d0f15c3 100644 --- a/cate/ds/local.py +++ b/cate/ds/local.py @@ -41,6 +41,7 @@ import os import psutil import shutil +import uuid import xarray as xr from collections import OrderedDict from datetime import datetime @@ -63,6 +64,8 @@ _REFERENCE_DATA_SOURCE_TYPE = "FILE_PATTERN" +_NAMESPACE = uuid.UUID(bytes=b"1234567890123456", version=3) + def get_data_store_path(): return os.environ.get('CATE_LOCAL_DATA_STORE_PATH', @@ -337,10 +340,8 @@ def make_local(self, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE) -> Optional[DataSource]: - if not local_name: - raise ValueError('local_name is required') - elif len(local_name) == 0: - raise ValueError('local_name cannot be empty') + if not local_name or len(local_name) == 0: + local_name = self.title local_store = DATA_STORE_REGISTRY.get_data_store('local') if not local_store: @@ -349,7 +350,10 @@ def make_local(self, if not local_store: raise ValueError('Cannot initialize `local` DataStore') - local_ds = local_store.create_data_source(local_name, region, _REFERENCE_DATA_SOURCE_TYPE, self.id, + self.meta_info.copy() + + local_ds = local_store.create_data_source(self.id, region, _REFERENCE_DATA_SOURCE_TYPE, local_name, + time_range=time_range, var_names=var_names, meta_info=self.meta_info) self._make_local(local_ds, time_range, region, var_names, monitor) if local_ds.is_empty: @@ -593,13 +597,55 @@ def remove_data_source(self, data_source: Union[str, DataSource], remove_files: shutil.rmtree(os.path.join(self._store_dir, data_source.id), ignore_errors=True) self._data_sources.remove(data_source) - def create_data_source(self, data_source_id: str, region: PolygonLike.TYPE = None, + @classmethod + def generate_uuid(cls, ref_id=str, + time_range: TimeRangeLike.TYPE = None, + region: PolygonLike.TYPE = None, + var_names: VarNamesLike.TYPE = None) -> uuid.UUID: + + constrains_str = ref_id + if time_range: + constrains_str += TimeRangeLike.format(time_range) + if region: + constrains_str += PolygonLike.format(region) + if var_names: + constrains_str += VarNamesLike.format(var_names) + + return uuid.uuid3(_NAMESPACE, constrains_str) + + @classmethod + def generate_title(cls, ref_id=str, + time_range: TimeRangeLike.TYPE = None, + region: PolygonLike.TYPE = None, + var_names: VarNamesLike.TYPE = None) -> uuid.UUID: + + title = ref_id + if time_range: + title += " [TimeRange:{}]".format(TimeRangeLike.format(time_range)) + if region: + title += " [Region:{}]".format(PolygonLike.format(region)) + if var_names: + title += " [Variables:{}]".format(VarNamesLike.format(var_names)) + + return title + + def create_data_source(self, ref_id: str, region: PolygonLike.TYPE = None, reference_type: str = None, reference_name: str = None, time_range: TimeRangeLike.TYPE = None, var_names: VarNamesLike.TYPE = None, meta_info: OrderedDict = None, lock_file: bool = False): self._init_data_sources() - if not data_source_id.startswith('%s.' % self.id): - data_source_id = '%s.%s' % (self.id, data_source_id) + + if reference_name: + reference_name = LocalDataStore.generate_title(reference_name, time_range, region, var_names) + else: + reference_name = LocalDataStore.generate_title(ref_id, time_range, region, var_names) + + if meta_info: + meta_info['title'] = reference_name + + data_source_id = LocalDataStore.generate_uuid(ref_id, time_range, region, var_names) + data_source_id = '%s.%s' % (self.id, data_source_id) + lock_filename = '{}.lock'.format(data_source_id) lock_filepath = os.path.join(self._store_dir, lock_filename) existing_ds = None @@ -622,6 +668,7 @@ def create_data_source(self, data_source_id: str, region: PolygonLike.TYPE = Non data_source = existing_ds else: data_source = LocalDataSource(data_source_id, files=[], data_store=self, spatial_coverage=region, + variables=var_names, temporal_coverage=time_range, reference_type=reference_type, reference_name=reference_name, meta_info=meta_info) if lock_file: diff --git a/test/ds/test_esa_cci_odp.py b/test/ds/test_esa_cci_odp.py index 7afdb3250..d68ad2213 100644 --- a/test/ds/test_esa_cci_odp.py +++ b/test/ds/test_esa_cci_odp.py @@ -9,7 +9,7 @@ import shutil from cate.core.ds import DATA_STORE_REGISTRY -from cate.core.types import PolygonLike, TimeRangeLike +from cate.core.types import PolygonLike, TimeRangeLike, VarNamesLike from cate.ds.esa_cci_odp import EsaCciOdpDataStore, find_datetime_format from cate.ds.local import LocalDataStore @@ -124,18 +124,21 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size with unittest.mock.patch('cate.ds.esa_cci_odp.EsaCciOdpDataSource._find_files', find_files_mock): with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]): + + new_ds_title = 'local_ds_test' + new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), + datetime.datetime(1978, 11, 16, 23, 59))) try: - new_ds = self.first_oc_data_source.make_local('local_ds_test', None, - (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59))) + new_ds = self.first_oc_data_source.make_local(new_ds_title, time_range=new_ds_time_range) except: raise ValueError(reference_path, os.listdir(reference_path)) self.assertIsNotNone(new_ds) - self.assertEqual(new_ds.id, 'local.local_ds_test') - self.assertEqual(new_ds.temporal_coverage(), - (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59))) + test_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, new_ds_time_range) + test_ds_id = "local." + str(test_uuid) + + self.assertEqual(new_ds.id, test_ds_id) + self.assertEqual(new_ds.temporal_coverage(), new_ds_time_range) self.first_oc_data_source.update_local(new_ds.id, (datetime.datetime(1978, 11, 15, 00, 00), datetime.datetime(1978, 11, 16, 23, 59))) @@ -154,23 +157,59 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size datetime.datetime(1978, 11, 16, 23, 59))) self.assertTrue("Couldn't find local DataSource", context.exception.args[0]) + new_ds_w_one_variable_title = 'local_ds_test' + new_ds_w_one_variable_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), + datetime.datetime(1978, 11, 16, 23, 59))) + new_ds_w_one_variable_var_names = VarNamesLike.convert(['sm']) + new_ds_w_one_variable = self.first_oc_data_source.make_local( - 'local_ds_test_2', None, (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59)), None, ['sm']) + new_ds_w_one_variable_title, + time_range=new_ds_w_one_variable_time_range, + var_names=new_ds_w_one_variable_var_names + ) self.assertIsNotNone(new_ds_w_one_variable) - self.assertEqual(new_ds_w_one_variable.id, 'local.local_ds_test_2') + + new_ds_w_one_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, + time_range=new_ds_w_one_variable_time_range, + var_names=new_ds_w_one_variable_var_names) + new_ds_w_one_ds_id = "local." + str(new_ds_w_one_uuid) + + self.assertEqual(new_ds_w_one_variable.id, new_ds_w_one_ds_id) ds = new_ds_w_one_variable.open_dataset() - self.assertSetEqual(set(ds.variables), {'sm', 'lat', 'lon', 'time'}) + + new_ds_w_one_variable_var_names.extend(['lat', 'lon', 'time']) + + self.assertSetEqual(set(ds.variables), + set(new_ds_w_one_variable_var_names)) + + new_ds_w_region_title = 'from_local_to_local_region' + new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), + datetime.datetime(1978, 11, 16, 23, 59))) + new_ds_w_region_var_names = VarNamesLike.convert(['sm']) + new_ds_w_region_spatial_coverage = PolygonLike.convert("10,10,20,20") new_ds_w_region = self.first_oc_data_source.make_local( - 'from_local_to_local_region', None, (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59)), - "10,10,20,20", ['sm']) # type: LocalDataSource + new_ds_w_region_title, + time_range=new_ds_w_region_time_range, + var_names=new_ds_w_region_var_names, + region=new_ds_w_region_spatial_coverage) # type: LocalDataSource + self.assertIsNotNone(new_ds_w_region) - self.assertEqual(new_ds_w_region.id, 'local.from_local_to_local_region') - self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20")) + + new_ds_w_region_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, + time_range=new_ds_w_region_time_range, + var_names=new_ds_w_region_var_names, + region=new_ds_w_region_spatial_coverage) + new_ds_w_region_ds_id = "local." + str(new_ds_w_region_uuid) + + self.assertEqual(new_ds_w_region.id, new_ds_w_region_ds_id) + + self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage) data_set = new_ds_w_region.open_dataset() - self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'}) + + new_ds_w_region_var_names.extend(['lat', 'lon', 'time']) + + self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names)) no_data = self.first_oc_data_source.make_local( 'empty_ds', None, (datetime.datetime(2017, 12, 1, 0, 0), diff --git a/test/ds/test_local.py b/test/ds/test_local.py index 8138d54b2..3fc21797a 100644 --- a/test/ds/test_local.py +++ b/test/ds/test_local.py @@ -6,7 +6,7 @@ import datetime import shutil from cate.core.ds import DATA_STORE_REGISTRY -from cate.core.types import PolygonLike, TimeRangeLike +from cate.core.types import PolygonLike, TimeRangeLike, VarNamesLike from cate.ds.local import LocalDataStore, LocalDataSource from cate.ds.esa_cci_odp import EsaCciOdpDataStore from collections import OrderedDict @@ -34,15 +34,18 @@ def test_add_pattern(self): self.assertIsNotNone(data_sources) self.assertEqual(len(data_sources), 2) + test_uuid = LocalDataStore.generate_uuid('a_name') + test_ds_id = "test." + str(test_uuid) + new_ds = self.data_store.add_pattern("a_name", "a_pat") - self.assertEqual('test.a_name', new_ds.id) + self.assertEqual(test_ds_id, new_ds.id) data_sources = self.data_store.query() self.assertEqual(len(data_sources), 3) with self.assertRaises(ValueError) as cm: self.data_store.add_pattern("a_name", "a_pat2") - self.assertEqual("Local data store 'test' already contains a data source named 'test.a_name'", + self.assertEqual("Local data store 'test' already contains a data source named '{}'".format(test_ds_id), str(cm.exception)) data_sources = self.data_store.query() @@ -50,7 +53,7 @@ def test_add_pattern(self): def test__repr_html(self): html = self.data_store._repr_html_() - self.assertEqual(524, len(html)) + self.assertEqual(584, len(html), html) def test_init(self): data_store2 = LocalDataStore('test', self.tmp_dir) @@ -246,11 +249,16 @@ def test_make_local(self): data_source = self._local_data_store.query('local_w_temporal')[0] with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]): - new_ds = data_source.make_local('from_local_to_local', None, - (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59))) + new_ds_title = 'from_local_to_local' + new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), + datetime.datetime(1978, 11, 15, 23, 59))) + new_ds = data_source.make_local(new_ds_title, time_range=new_ds_time_range) self.assertIsNotNone(new_ds) - self.assertEqual(new_ds.id, 'local.from_local_to_local') + + test_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_time_range) + test_ds_id = "local." + str(test_uuid) + + self.assertEqual(new_ds.id, test_ds_id) self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert( (datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59)))) @@ -266,21 +274,38 @@ def test_make_local(self): datetime.datetime(1978, 11, 16, 23, 59))) self.assertTrue("Couldn't find local DataSource", context.exception.args[0]) - new_ds_w_one_variable = data_source.make_local('from_local_to_local_var', None, - (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59)), - None, ['sm']) + new_ds_2_title = 'from_local_to_local_var' + new_ds_2_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), + datetime.datetime(1978, 11, 15, 23, 59))) + new_ds_2_vars = VarNamesLike.convert(['sm']) + + test_2_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_2_time_range, var_names=new_ds_2_vars) + test_ds_2_id = "local." + str(test_2_uuid) + + new_ds_w_one_variable = data_source.make_local(new_ds_2_title, + time_range=new_ds_2_time_range, + var_names=new_ds_2_vars) self.assertIsNotNone(new_ds_w_one_variable) - self.assertEqual(new_ds_w_one_variable.id, 'local.from_local_to_local_var') + self.assertEqual(new_ds_w_one_variable.id, test_ds_2_id) data_set = new_ds_w_one_variable.open_dataset() self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'}) - new_ds_w_region = data_source.make_local('from_local_to_local_region', None, - (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59)), - "10,10,20,20", ['sm']) # type: LocalDataSource + new_ds_3_title = 'from_local_to_local_var' + new_ds_3_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), + datetime.datetime(1978, 11, 15, 23, 59))) + new_ds_3_vars = VarNamesLike.convert(['sm']) + new_ds_3_region = PolygonLike.convert("10,10,20,20") + + test_3_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_3_time_range, var_names=new_ds_3_vars, + region=new_ds_3_region) + test_ds_3_id = "local." + str(test_3_uuid) + + new_ds_w_region = data_source.make_local(new_ds_3_title, + time_range=new_ds_3_time_range, + var_names=new_ds_3_vars, + region=new_ds_3_region) # type: LocalDataSource self.assertIsNotNone(new_ds_w_region) - self.assertEqual(new_ds_w_region.id, 'local.from_local_to_local_region') + self.assertEqual(new_ds_w_region.id, test_ds_3_id) self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20")) data_set = new_ds_w_region.open_dataset() self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'}) From fa244147859fcad8987d1886991dc2872703f18c Mon Sep 17 00:00:00 2001 From: "Krzysztof (Chris) Bernat" Date: Wed, 6 Sep 2017 18:17:02 +0100 Subject: [PATCH 2/5] 277 unique id for local ds, mixing remote and unique local id --- cate/cli/main.py | 12 ++-- cate/ds/esa_cci_odp.py | 35 +++++++---- cate/ds/local.py | 118 ++++++++++++++++++++++-------------- test/ds/test_esa_cci_odp.py | 29 +++------ test/ds/test_local.py | 35 ++++------- 5 files changed, 120 insertions(+), 109 deletions(-) diff --git a/cate/cli/main.py b/cate/cli/main.py index 47208f7ad..a7c63fdf8 100644 --- a/cate/cli/main.py +++ b/cate/cli/main.py @@ -104,7 +104,7 @@ from typing import Tuple, Union, List, Dict, Any, Optional from cate.conf.defaults import WEBAPI_INFO_FILE, WEBAPI_ON_INACTIVITY_AUTO_STOP_AFTER -from cate.core.types import Like, TimeRangeLike +from cate.core.types import Like, TimeRangeLike, PolygonLike, VarNamesLike from cate.core.ds import DATA_STORE_REGISTRY, find_data_sources from cate.core.objectio import OBJECT_IO_REGISTRY, find_writer, read_object from cate.core.op import OP_REGISTRY @@ -1230,13 +1230,13 @@ def _execute_copy(cls, command_args): if data_source is None: raise RuntimeError('internal error: no local data source found: %s' % ds_name) - local_name = command_args.name if command_args.name else ds_name + local_name = command_args.name if command_args.name else None - time_range = command_args.time - region = command_args.region - var_names = command_args.vars + time_range = TimeRangeLike.convert(command_args.time) + region = PolygonLike.convert(command_args.region) + var_names = VarNamesLike.convert(command_args.vars) - ds = data_source.make_local(local_name, None, time_range=time_range, region=region, var_names=var_names, + ds = data_source.make_local(local_name, time_range=time_range, region=region, var_names=var_names, monitor=cls.new_monitor()) if ds: print("Local data source with name '%s' has been created." % ds.id) diff --git a/cate/ds/esa_cci_odp.py b/cate/ds/esa_cci_odp.py index a29fd912b..845f3e2a7 100644 --- a/cate/ds/esa_cci_odp.py +++ b/cate/ds/esa_cci_odp.py @@ -58,7 +58,7 @@ from cate.core.ds import DATA_STORE_REGISTRY, DataStore, DataSource, Schema, \ open_xarray_dataset, get_data_stores_path, find_data_sources from cate.core.types import PolygonLike, TimeRange, TimeRangeLike, VarNamesLike -from cate.ds.local import add_to_data_store_registry, LocalDataSource +from cate.ds.local import add_to_data_store_registry, LocalDataSource, LocalDataStore from cate.util.monitor import Monitor __author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ @@ -878,8 +878,6 @@ def make_local(self, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE) -> Optional[DataSource]: - if not local_name or len(local_name) == 0: - local_name = self.title local_store = DATA_STORE_REGISTRY.get_data_store('local') if not local_store: @@ -889,18 +887,29 @@ def make_local(self, raise ValueError('Cannot initialize `local` DataStore') local_meta_info = self.meta_info.copy() - if local_meta_info.get('uuid'): - del local_meta_info['uuid'] - local_meta_info['ref_uuid'] = self.meta_info['uuid'] - local_ds = local_store.create_data_source(self.id, region, _REFERENCE_DATA_SOURCE_TYPE, local_name, - time_range=time_range, var_names=var_names, + if not local_name or len(local_name) == 0: + local_name = "local.{}.{}".format(self.id, LocalDataStore.generate_uuid(ref_id=self.id, + time_range=time_range, + region=region, + var_names=var_names)) + existing_ds_list = local_store.query(local_name) + if len(existing_ds_list) == 1: + return existing_ds_list[0] + + local_ds = local_store.create_data_source(local_name, + time_range=time_range, region=region, var_names=var_names, meta_info=local_meta_info, lock_file=True) - self._make_local(local_ds, time_range, region, var_names, monitor=monitor) - if local_ds.is_empty: - local_store.remove_data_source(local_ds) - return None - return local_ds + if local_ds: + if not local_ds.is_complete: + self._make_local(local_ds, time_range, region, var_names, monitor=monitor) + + if local_ds.is_empty: + local_store.remove_data_source(local_ds) + return None + + local_store.register_ds(local_ds) + return local_ds def _init_file_list(self, monitor: Monitor = Monitor.NONE): if self._file_list: diff --git a/cate/ds/local.py b/cate/ds/local.py index b6d0f15c3..96313a52a 100644 --- a/cate/ds/local.py +++ b/cate/ds/local.py @@ -55,7 +55,7 @@ from cate.conf.defaults import NETCDF_COMPRESSION_LEVEL from cate.core.ds import DATA_STORE_REGISTRY, DataStore, DataSource, open_xarray_dataset, find_data_sources from cate.core.ds import get_data_stores_path -from cate.core.types import PolygonLike, TimeRange, TimeRangeLike, VarNamesLike +from cate.core.types import Polygon, PolygonLike, TimeRange, TimeRangeLike, VarNames, VarNamesLike from cate.util.monitor import Monitor __author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ @@ -130,6 +130,7 @@ def __init__(self, self._reference_name = reference_name self._meta_info = meta_info if meta_info else OrderedDict() + self._is_complete = True def _resolve_file_path(self, path) -> Sequence: return glob(os.path.join(self._data_store.data_store_path, path)) @@ -350,16 +351,28 @@ def make_local(self, if not local_store: raise ValueError('Cannot initialize `local` DataStore') - self.meta_info.copy() - - local_ds = local_store.create_data_source(self.id, region, _REFERENCE_DATA_SOURCE_TYPE, local_name, + if not local_name or len(local_name) == 0: + local_name = "local.{}.{}".format(self.id, LocalDataStore.generate_uuid(ref_id=self.id, + time_range=time_range, + region=region, + var_names=var_names)) + existing_ds_list = local_store.query(local_name) + if len(existing_ds_list) == 1: + return existing_ds_list[0] + + local_ds = local_store.create_data_source(local_name, region, _REFERENCE_DATA_SOURCE_TYPE, local_name, time_range=time_range, var_names=var_names, - meta_info=self.meta_info) - self._make_local(local_ds, time_range, region, var_names, monitor) - if local_ds.is_empty: - local_store.remove_data_source(local_ds) - return None - return local_ds + meta_info=self.meta_info.copy()) + if local_ds: + if not local_ds.is_complete: + self._make_local(local_ds, time_range, region, var_names, monitor=monitor) + + if local_ds.is_empty: + local_store.remove_data_source(local_ds) + return None + + local_store.register_ds(local_ds) + return local_ds def update_local(self, local_id: str, @@ -492,6 +505,14 @@ def variables_info(self): def info_string(self): return 'Files: %s' % (' '.join(self._files)) + @property + def is_complete(self) -> bool: + """ + Return a DataSource creation state + :return: + """ + return self._is_complete + @property def is_empty(self) -> bool: """ @@ -500,6 +521,14 @@ def is_empty(self) -> bool: """ return not self._files or len(self._files) == 0 + def set_completed(self, state: bool): + """ + Sets state of DataSource creation/completion + :param state: Is DataSource completed + :return: + """ + self._is_complete = state + def _repr_html_(self): import html return '\n' \ @@ -583,6 +612,7 @@ def add_pattern(self, data_source_id: str, files: Union[str, Sequence[str]] = No is_first_file = False else: data_source.add_dataset(file) + self.register_ds(data_source) return data_source def remove_data_source(self, data_source: Union[str, DataSource], remove_files: bool = True): @@ -595,31 +625,34 @@ def remove_data_source(self, data_source: Union[str, DataSource], remove_files: os.remove(file_name) if remove_files: shutil.rmtree(os.path.join(self._store_dir, data_source.id), ignore_errors=True) - self._data_sources.remove(data_source) + if data_source in self._data_sources: + self._data_sources.remove(data_source) + + def register_ds(self, data_source: DataSource): + data_source.set_completed(True) + self._data_sources.append(data_source) @classmethod - def generate_uuid(cls, ref_id=str, - time_range: TimeRangeLike.TYPE = None, - region: PolygonLike.TYPE = None, - var_names: VarNamesLike.TYPE = None) -> uuid.UUID: + def generate_uuid(cls, ref_id: str, + time_range: Optional[TimeRange] = None, + region: Optional[Polygon] = None, + var_names: Optional[VarNames] = None) -> uuid.UUID: - constrains_str = ref_id if time_range: - constrains_str += TimeRangeLike.format(time_range) + ref_id += TimeRangeLike.format(time_range) if region: - constrains_str += PolygonLike.format(region) + ref_id += PolygonLike.format(region) if var_names: - constrains_str += VarNamesLike.format(var_names) + ref_id += VarNamesLike.format(var_names) - return uuid.uuid3(_NAMESPACE, constrains_str) + return str(uuid.uuid3(_NAMESPACE, ref_id)) @classmethod - def generate_title(cls, ref_id=str, - time_range: TimeRangeLike.TYPE = None, - region: PolygonLike.TYPE = None, - var_names: VarNamesLike.TYPE = None) -> uuid.UUID: + def generate_title(cls, title: str, + time_range: Optional[TimeRange] = None, + region: Optional[Polygon] = None, + var_names: Optional[VarNames] = None) -> uuid.UUID: - title = ref_id if time_range: title += " [TimeRange:{}]".format(TimeRangeLike.format(time_range)) if region: @@ -629,26 +662,21 @@ def generate_title(cls, ref_id=str, return title - def create_data_source(self, ref_id: str, region: PolygonLike.TYPE = None, - reference_type: str = None, reference_name: str = None, + def create_data_source(self, data_source_id: str, region: PolygonLike.TYPE = None, + reference_type: str = None, title: str = None, time_range: TimeRangeLike.TYPE = None, var_names: VarNamesLike.TYPE = None, meta_info: OrderedDict = None, lock_file: bool = False): self._init_data_sources() - if reference_name: - reference_name = LocalDataStore.generate_title(reference_name, time_range, region, var_names) - else: - reference_name = LocalDataStore.generate_title(ref_id, time_range, region, var_names) - if meta_info: - meta_info['title'] = reference_name + meta_info['title'] = title - data_source_id = LocalDataStore.generate_uuid(ref_id, time_range, region, var_names) - data_source_id = '%s.%s' % (self.id, data_source_id) + if meta_info.get('uuid'): + meta_info['ref_uuid'] = meta_info['uuid'] + del meta_info['uuid'] - lock_filename = '{}.lock'.format(data_source_id) - lock_filepath = os.path.join(self._store_dir, lock_filename) - existing_ds = None + lock_filepath = os.path.join(self._store_dir, '{}.lock'.format(data_source_id)) + data_source = None for ds in self._data_sources: if ds.id == data_source_id: if lock_file and os.path.isfile(lock_filepath): @@ -660,24 +688,24 @@ def create_data_source(self, ref_id: str, region: PolygonLike.TYPE = None, # ds.temporal_coverage() == time_range and if ds.spatial_coverage() == region \ and ds.variables_info == var_names: - existing_ds = ds + data_source = ds + data_source.set_completed(False) break raise ValueError("Local data store '{}' already contains a data source named '{}'" .format(self.id, data_source_id)) - if existing_ds: - data_source = existing_ds - else: + if not data_source: data_source = LocalDataSource(data_source_id, files=[], data_store=self, spatial_coverage=region, variables=var_names, temporal_coverage=time_range, - reference_type=reference_type, reference_name=reference_name, + reference_type=reference_type, reference_name=title, meta_info=meta_info) + data_source.set_completed(False) + self._save_data_source(data_source) + if lock_file: pid = os.getpid() with open(lock_filepath, 'w') as lock_file: lock_file.write(str(pid)) - self._save_data_source(data_source) - self._data_sources.append(data_source) return data_source @property diff --git a/test/ds/test_esa_cci_odp.py b/test/ds/test_esa_cci_odp.py index d68ad2213..58c00b902 100644 --- a/test/ds/test_esa_cci_odp.py +++ b/test/ds/test_esa_cci_odp.py @@ -134,10 +134,7 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size raise ValueError(reference_path, os.listdir(reference_path)) self.assertIsNotNone(new_ds) - test_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, new_ds_time_range) - test_ds_id = "local." + str(test_uuid) - - self.assertEqual(new_ds.id, test_ds_id) + self.assertEqual(new_ds.id, new_ds_title) self.assertEqual(new_ds.temporal_coverage(), new_ds_time_range) self.first_oc_data_source.update_local(new_ds.id, (datetime.datetime(1978, 11, 15, 00, 00), @@ -157,7 +154,7 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size datetime.datetime(1978, 11, 16, 23, 59))) self.assertTrue("Couldn't find local DataSource", context.exception.args[0]) - new_ds_w_one_variable_title = 'local_ds_test' + new_ds_w_one_variable_title = 'local_ds_test_var' new_ds_w_one_variable_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 16, 23, 59))) new_ds_w_one_variable_var_names = VarNamesLike.convert(['sm']) @@ -169,12 +166,7 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size ) self.assertIsNotNone(new_ds_w_one_variable) - new_ds_w_one_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, - time_range=new_ds_w_one_variable_time_range, - var_names=new_ds_w_one_variable_var_names) - new_ds_w_one_ds_id = "local." + str(new_ds_w_one_uuid) - - self.assertEqual(new_ds_w_one_variable.id, new_ds_w_one_ds_id) + self.assertEqual(new_ds_w_one_variable.id, new_ds_w_one_variable_title) ds = new_ds_w_one_variable.open_dataset() new_ds_w_one_variable_var_names.extend(['lat', 'lon', 'time']) @@ -196,13 +188,7 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size self.assertIsNotNone(new_ds_w_region) - new_ds_w_region_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, - time_range=new_ds_w_region_time_range, - var_names=new_ds_w_region_var_names, - region=new_ds_w_region_spatial_coverage) - new_ds_w_region_ds_id = "local." + str(new_ds_w_region_uuid) - - self.assertEqual(new_ds_w_region.id, new_ds_w_region_ds_id) + self.assertEqual(new_ds_w_region.id, new_ds_w_region_title) self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage) data_set = new_ds_w_region.open_dataset() @@ -211,10 +197,9 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names)) - no_data = self.first_oc_data_source.make_local( - 'empty_ds', None, (datetime.datetime(2017, 12, 1, 0, 0), - datetime.datetime(2017, 12, 31, 23, 59)), - ) + no_data = self.first_oc_data_source.make_local('empty_ds', + time_range=(datetime.datetime(2017, 12, 1, 0, 0), + datetime.datetime(2017, 12, 31, 23, 59))) self.assertIsNone(no_data) def test_data_store(self): diff --git a/test/ds/test_local.py b/test/ds/test_local.py index 3fc21797a..99379189e 100644 --- a/test/ds/test_local.py +++ b/test/ds/test_local.py @@ -34,18 +34,17 @@ def test_add_pattern(self): self.assertIsNotNone(data_sources) self.assertEqual(len(data_sources), 2) - test_uuid = LocalDataStore.generate_uuid('a_name') - test_ds_id = "test." + str(test_uuid) + new_ds_name = 'a_name' + new_ds = self.data_store.add_pattern(new_ds_name, "a_pat") - new_ds = self.data_store.add_pattern("a_name", "a_pat") - self.assertEqual(test_ds_id, new_ds.id) + self.assertEqual(new_ds_name, new_ds.id) data_sources = self.data_store.query() self.assertEqual(len(data_sources), 3) with self.assertRaises(ValueError) as cm: self.data_store.add_pattern("a_name", "a_pat2") - self.assertEqual("Local data store 'test' already contains a data source named '{}'".format(test_ds_id), + self.assertEqual("Local data store 'test' already contains a data source named '{}'".format(new_ds_name), str(cm.exception)) data_sources = self.data_store.query() @@ -53,7 +52,7 @@ def test_add_pattern(self): def test__repr_html(self): html = self.data_store._repr_html_() - self.assertEqual(584, len(html), html) + self.assertEqual(514, len(html), html) def test_init(self): data_store2 = LocalDataStore('test', self.tmp_dir) @@ -255,10 +254,7 @@ def test_make_local(self): new_ds = data_source.make_local(new_ds_title, time_range=new_ds_time_range) self.assertIsNotNone(new_ds) - test_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_time_range) - test_ds_id = "local." + str(test_uuid) - - self.assertEqual(new_ds.id, test_ds_id) + self.assertEqual(new_ds.id, new_ds_title) self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert( (datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59)))) @@ -279,40 +275,33 @@ def test_make_local(self): datetime.datetime(1978, 11, 15, 23, 59))) new_ds_2_vars = VarNamesLike.convert(['sm']) - test_2_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_2_time_range, var_names=new_ds_2_vars) - test_ds_2_id = "local." + str(test_2_uuid) - new_ds_w_one_variable = data_source.make_local(new_ds_2_title, time_range=new_ds_2_time_range, var_names=new_ds_2_vars) self.assertIsNotNone(new_ds_w_one_variable) - self.assertEqual(new_ds_w_one_variable.id, test_ds_2_id) + self.assertEqual(new_ds_w_one_variable.id, new_ds_2_title) data_set = new_ds_w_one_variable.open_dataset() self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'}) - new_ds_3_title = 'from_local_to_local_var' + new_ds_3_title = 'from_local_to_local_range' new_ds_3_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59))) new_ds_3_vars = VarNamesLike.convert(['sm']) new_ds_3_region = PolygonLike.convert("10,10,20,20") - test_3_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_3_time_range, var_names=new_ds_3_vars, - region=new_ds_3_region) - test_ds_3_id = "local." + str(test_3_uuid) - new_ds_w_region = data_source.make_local(new_ds_3_title, time_range=new_ds_3_time_range, var_names=new_ds_3_vars, region=new_ds_3_region) # type: LocalDataSource self.assertIsNotNone(new_ds_w_region) - self.assertEqual(new_ds_w_region.id, test_ds_3_id) + self.assertEqual(new_ds_w_region.id, new_ds_3_title) self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20")) data_set = new_ds_w_region.open_dataset() self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'}) - no_data = data_source.make_local('no_data', None, - (datetime.datetime(2020, 11, 14, 0, 0), - datetime.datetime(2020, 11, 15, 23, 59))) + no_data = data_source.make_local('no_data', + time_range=(datetime.datetime(2020, 11, 14, 0, 0), + datetime.datetime(2020, 11, 15, 23, 59))) self.assertIsNone(no_data) def test_remove_data_source_by_id(self): From a7380580648ea69dfd81d2e777cda06b77272204 Mon Sep 17 00:00:00 2001 From: "Krzysztof (Chris) Bernat" Date: Tue, 5 Sep 2017 20:51:36 +0100 Subject: [PATCH 3/5] #277 unique id for local data sources and human-readable title --- cate/core/types.py | 2 +- cate/ds/esa_cci_odp.py | 13 +++---- cate/ds/local.py | 63 +++++++++++++++++++++++++++---- test/ds/test_esa_cci_odp.py | 75 ++++++++++++++++++++++++++++--------- test/ds/test_local.py | 61 +++++++++++++++++++++--------- 5 files changed, 162 insertions(+), 52 deletions(-) diff --git a/cate/core/types.py b/cate/core/types.py index 59f02c21d..cd4c19798 100644 --- a/cate/core/types.py +++ b/cate/core/types.py @@ -221,7 +221,7 @@ def convert(cls, value: Any) -> Optional[VarNames]: raise ValueError('Variable name pattern can only be a string' ' or a list of strings.') - return value + return value.copy() @classmethod def format(cls, value: Optional[VarNames]) -> str: diff --git a/cate/ds/esa_cci_odp.py b/cate/ds/esa_cci_odp.py index 49a97890a..86133722a 100644 --- a/cate/ds/esa_cci_odp.py +++ b/cate/ds/esa_cci_odp.py @@ -70,7 +70,7 @@ _ESGF_CEDA_URL = "https://esgf-index1.ceda.ac.uk/esg-search/search/" # _CSW_CEDA_URL = "http://csw1.cems.rl.ac.uk/geonetwork-CEDA/srv/eng/csw-CEDA-CCI" -_CSW_CEDA_URL = "https://csw.ceda.ac.uk/geonetwork/srv/eng/csw-CEDA-CCI" +_CSW_CEDA_URL = "https://csw.ceda.ac.uk/geonetwork/srv/eng/csw" _TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S" @@ -947,10 +947,8 @@ def make_local(self, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE) -> Optional[DataSource]: - if not local_name: - raise ValueError('local_name is required') - elif len(local_name) == 0: - raise ValueError('local_name cannot be empty') + if not local_name or len(local_name) == 0: + local_name = self.title local_store = DATA_STORE_REGISTRY.get_data_store('local') if not local_store: @@ -964,8 +962,9 @@ def make_local(self, del local_meta_info['uuid'] local_meta_info['ref_uuid'] = self.meta_info['uuid'] - local_ds = local_store.create_data_source(local_name, region, _REFERENCE_DATA_SOURCE_TYPE, self.id, - time_range, var_names, meta_info=local_meta_info, lock_file=True) + local_ds = local_store.create_data_source(self.id, region, _REFERENCE_DATA_SOURCE_TYPE, local_name, + time_range=time_range, var_names=var_names, + meta_info=local_meta_info, lock_file=True) self._make_local(local_ds, time_range, region, var_names, monitor=monitor) if local_ds.is_empty: local_store.remove_data_source(local_ds) diff --git a/cate/ds/local.py b/cate/ds/local.py index d730b1092..b6d0f15c3 100644 --- a/cate/ds/local.py +++ b/cate/ds/local.py @@ -41,6 +41,7 @@ import os import psutil import shutil +import uuid import xarray as xr from collections import OrderedDict from datetime import datetime @@ -63,6 +64,8 @@ _REFERENCE_DATA_SOURCE_TYPE = "FILE_PATTERN" +_NAMESPACE = uuid.UUID(bytes=b"1234567890123456", version=3) + def get_data_store_path(): return os.environ.get('CATE_LOCAL_DATA_STORE_PATH', @@ -337,10 +340,8 @@ def make_local(self, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE) -> Optional[DataSource]: - if not local_name: - raise ValueError('local_name is required') - elif len(local_name) == 0: - raise ValueError('local_name cannot be empty') + if not local_name or len(local_name) == 0: + local_name = self.title local_store = DATA_STORE_REGISTRY.get_data_store('local') if not local_store: @@ -349,7 +350,10 @@ def make_local(self, if not local_store: raise ValueError('Cannot initialize `local` DataStore') - local_ds = local_store.create_data_source(local_name, region, _REFERENCE_DATA_SOURCE_TYPE, self.id, + self.meta_info.copy() + + local_ds = local_store.create_data_source(self.id, region, _REFERENCE_DATA_SOURCE_TYPE, local_name, + time_range=time_range, var_names=var_names, meta_info=self.meta_info) self._make_local(local_ds, time_range, region, var_names, monitor) if local_ds.is_empty: @@ -593,13 +597,55 @@ def remove_data_source(self, data_source: Union[str, DataSource], remove_files: shutil.rmtree(os.path.join(self._store_dir, data_source.id), ignore_errors=True) self._data_sources.remove(data_source) - def create_data_source(self, data_source_id: str, region: PolygonLike.TYPE = None, + @classmethod + def generate_uuid(cls, ref_id=str, + time_range: TimeRangeLike.TYPE = None, + region: PolygonLike.TYPE = None, + var_names: VarNamesLike.TYPE = None) -> uuid.UUID: + + constrains_str = ref_id + if time_range: + constrains_str += TimeRangeLike.format(time_range) + if region: + constrains_str += PolygonLike.format(region) + if var_names: + constrains_str += VarNamesLike.format(var_names) + + return uuid.uuid3(_NAMESPACE, constrains_str) + + @classmethod + def generate_title(cls, ref_id=str, + time_range: TimeRangeLike.TYPE = None, + region: PolygonLike.TYPE = None, + var_names: VarNamesLike.TYPE = None) -> uuid.UUID: + + title = ref_id + if time_range: + title += " [TimeRange:{}]".format(TimeRangeLike.format(time_range)) + if region: + title += " [Region:{}]".format(PolygonLike.format(region)) + if var_names: + title += " [Variables:{}]".format(VarNamesLike.format(var_names)) + + return title + + def create_data_source(self, ref_id: str, region: PolygonLike.TYPE = None, reference_type: str = None, reference_name: str = None, time_range: TimeRangeLike.TYPE = None, var_names: VarNamesLike.TYPE = None, meta_info: OrderedDict = None, lock_file: bool = False): self._init_data_sources() - if not data_source_id.startswith('%s.' % self.id): - data_source_id = '%s.%s' % (self.id, data_source_id) + + if reference_name: + reference_name = LocalDataStore.generate_title(reference_name, time_range, region, var_names) + else: + reference_name = LocalDataStore.generate_title(ref_id, time_range, region, var_names) + + if meta_info: + meta_info['title'] = reference_name + + data_source_id = LocalDataStore.generate_uuid(ref_id, time_range, region, var_names) + data_source_id = '%s.%s' % (self.id, data_source_id) + lock_filename = '{}.lock'.format(data_source_id) lock_filepath = os.path.join(self._store_dir, lock_filename) existing_ds = None @@ -622,6 +668,7 @@ def create_data_source(self, data_source_id: str, region: PolygonLike.TYPE = Non data_source = existing_ds else: data_source = LocalDataSource(data_source_id, files=[], data_store=self, spatial_coverage=region, + variables=var_names, temporal_coverage=time_range, reference_type=reference_type, reference_name=reference_name, meta_info=meta_info) if lock_file: diff --git a/test/ds/test_esa_cci_odp.py b/test/ds/test_esa_cci_odp.py index 7afdb3250..d68ad2213 100644 --- a/test/ds/test_esa_cci_odp.py +++ b/test/ds/test_esa_cci_odp.py @@ -9,7 +9,7 @@ import shutil from cate.core.ds import DATA_STORE_REGISTRY -from cate.core.types import PolygonLike, TimeRangeLike +from cate.core.types import PolygonLike, TimeRangeLike, VarNamesLike from cate.ds.esa_cci_odp import EsaCciOdpDataStore, find_datetime_format from cate.ds.local import LocalDataStore @@ -124,18 +124,21 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size with unittest.mock.patch('cate.ds.esa_cci_odp.EsaCciOdpDataSource._find_files', find_files_mock): with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]): + + new_ds_title = 'local_ds_test' + new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), + datetime.datetime(1978, 11, 16, 23, 59))) try: - new_ds = self.first_oc_data_source.make_local('local_ds_test', None, - (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59))) + new_ds = self.first_oc_data_source.make_local(new_ds_title, time_range=new_ds_time_range) except: raise ValueError(reference_path, os.listdir(reference_path)) self.assertIsNotNone(new_ds) - self.assertEqual(new_ds.id, 'local.local_ds_test') - self.assertEqual(new_ds.temporal_coverage(), - (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59))) + test_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, new_ds_time_range) + test_ds_id = "local." + str(test_uuid) + + self.assertEqual(new_ds.id, test_ds_id) + self.assertEqual(new_ds.temporal_coverage(), new_ds_time_range) self.first_oc_data_source.update_local(new_ds.id, (datetime.datetime(1978, 11, 15, 00, 00), datetime.datetime(1978, 11, 16, 23, 59))) @@ -154,23 +157,59 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size datetime.datetime(1978, 11, 16, 23, 59))) self.assertTrue("Couldn't find local DataSource", context.exception.args[0]) + new_ds_w_one_variable_title = 'local_ds_test' + new_ds_w_one_variable_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), + datetime.datetime(1978, 11, 16, 23, 59))) + new_ds_w_one_variable_var_names = VarNamesLike.convert(['sm']) + new_ds_w_one_variable = self.first_oc_data_source.make_local( - 'local_ds_test_2', None, (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59)), None, ['sm']) + new_ds_w_one_variable_title, + time_range=new_ds_w_one_variable_time_range, + var_names=new_ds_w_one_variable_var_names + ) self.assertIsNotNone(new_ds_w_one_variable) - self.assertEqual(new_ds_w_one_variable.id, 'local.local_ds_test_2') + + new_ds_w_one_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, + time_range=new_ds_w_one_variable_time_range, + var_names=new_ds_w_one_variable_var_names) + new_ds_w_one_ds_id = "local." + str(new_ds_w_one_uuid) + + self.assertEqual(new_ds_w_one_variable.id, new_ds_w_one_ds_id) ds = new_ds_w_one_variable.open_dataset() - self.assertSetEqual(set(ds.variables), {'sm', 'lat', 'lon', 'time'}) + + new_ds_w_one_variable_var_names.extend(['lat', 'lon', 'time']) + + self.assertSetEqual(set(ds.variables), + set(new_ds_w_one_variable_var_names)) + + new_ds_w_region_title = 'from_local_to_local_region' + new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), + datetime.datetime(1978, 11, 16, 23, 59))) + new_ds_w_region_var_names = VarNamesLike.convert(['sm']) + new_ds_w_region_spatial_coverage = PolygonLike.convert("10,10,20,20") new_ds_w_region = self.first_oc_data_source.make_local( - 'from_local_to_local_region', None, (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59)), - "10,10,20,20", ['sm']) # type: LocalDataSource + new_ds_w_region_title, + time_range=new_ds_w_region_time_range, + var_names=new_ds_w_region_var_names, + region=new_ds_w_region_spatial_coverage) # type: LocalDataSource + self.assertIsNotNone(new_ds_w_region) - self.assertEqual(new_ds_w_region.id, 'local.from_local_to_local_region') - self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20")) + + new_ds_w_region_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, + time_range=new_ds_w_region_time_range, + var_names=new_ds_w_region_var_names, + region=new_ds_w_region_spatial_coverage) + new_ds_w_region_ds_id = "local." + str(new_ds_w_region_uuid) + + self.assertEqual(new_ds_w_region.id, new_ds_w_region_ds_id) + + self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage) data_set = new_ds_w_region.open_dataset() - self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'}) + + new_ds_w_region_var_names.extend(['lat', 'lon', 'time']) + + self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names)) no_data = self.first_oc_data_source.make_local( 'empty_ds', None, (datetime.datetime(2017, 12, 1, 0, 0), diff --git a/test/ds/test_local.py b/test/ds/test_local.py index 8138d54b2..3fc21797a 100644 --- a/test/ds/test_local.py +++ b/test/ds/test_local.py @@ -6,7 +6,7 @@ import datetime import shutil from cate.core.ds import DATA_STORE_REGISTRY -from cate.core.types import PolygonLike, TimeRangeLike +from cate.core.types import PolygonLike, TimeRangeLike, VarNamesLike from cate.ds.local import LocalDataStore, LocalDataSource from cate.ds.esa_cci_odp import EsaCciOdpDataStore from collections import OrderedDict @@ -34,15 +34,18 @@ def test_add_pattern(self): self.assertIsNotNone(data_sources) self.assertEqual(len(data_sources), 2) + test_uuid = LocalDataStore.generate_uuid('a_name') + test_ds_id = "test." + str(test_uuid) + new_ds = self.data_store.add_pattern("a_name", "a_pat") - self.assertEqual('test.a_name', new_ds.id) + self.assertEqual(test_ds_id, new_ds.id) data_sources = self.data_store.query() self.assertEqual(len(data_sources), 3) with self.assertRaises(ValueError) as cm: self.data_store.add_pattern("a_name", "a_pat2") - self.assertEqual("Local data store 'test' already contains a data source named 'test.a_name'", + self.assertEqual("Local data store 'test' already contains a data source named '{}'".format(test_ds_id), str(cm.exception)) data_sources = self.data_store.query() @@ -50,7 +53,7 @@ def test_add_pattern(self): def test__repr_html(self): html = self.data_store._repr_html_() - self.assertEqual(524, len(html)) + self.assertEqual(584, len(html), html) def test_init(self): data_store2 = LocalDataStore('test', self.tmp_dir) @@ -246,11 +249,16 @@ def test_make_local(self): data_source = self._local_data_store.query('local_w_temporal')[0] with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]): - new_ds = data_source.make_local('from_local_to_local', None, - (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59))) + new_ds_title = 'from_local_to_local' + new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), + datetime.datetime(1978, 11, 15, 23, 59))) + new_ds = data_source.make_local(new_ds_title, time_range=new_ds_time_range) self.assertIsNotNone(new_ds) - self.assertEqual(new_ds.id, 'local.from_local_to_local') + + test_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_time_range) + test_ds_id = "local." + str(test_uuid) + + self.assertEqual(new_ds.id, test_ds_id) self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert( (datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59)))) @@ -266,21 +274,38 @@ def test_make_local(self): datetime.datetime(1978, 11, 16, 23, 59))) self.assertTrue("Couldn't find local DataSource", context.exception.args[0]) - new_ds_w_one_variable = data_source.make_local('from_local_to_local_var', None, - (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59)), - None, ['sm']) + new_ds_2_title = 'from_local_to_local_var' + new_ds_2_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), + datetime.datetime(1978, 11, 15, 23, 59))) + new_ds_2_vars = VarNamesLike.convert(['sm']) + + test_2_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_2_time_range, var_names=new_ds_2_vars) + test_ds_2_id = "local." + str(test_2_uuid) + + new_ds_w_one_variable = data_source.make_local(new_ds_2_title, + time_range=new_ds_2_time_range, + var_names=new_ds_2_vars) self.assertIsNotNone(new_ds_w_one_variable) - self.assertEqual(new_ds_w_one_variable.id, 'local.from_local_to_local_var') + self.assertEqual(new_ds_w_one_variable.id, test_ds_2_id) data_set = new_ds_w_one_variable.open_dataset() self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'}) - new_ds_w_region = data_source.make_local('from_local_to_local_region', None, - (datetime.datetime(1978, 11, 14, 0, 0), - datetime.datetime(1978, 11, 15, 23, 59)), - "10,10,20,20", ['sm']) # type: LocalDataSource + new_ds_3_title = 'from_local_to_local_var' + new_ds_3_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), + datetime.datetime(1978, 11, 15, 23, 59))) + new_ds_3_vars = VarNamesLike.convert(['sm']) + new_ds_3_region = PolygonLike.convert("10,10,20,20") + + test_3_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_3_time_range, var_names=new_ds_3_vars, + region=new_ds_3_region) + test_ds_3_id = "local." + str(test_3_uuid) + + new_ds_w_region = data_source.make_local(new_ds_3_title, + time_range=new_ds_3_time_range, + var_names=new_ds_3_vars, + region=new_ds_3_region) # type: LocalDataSource self.assertIsNotNone(new_ds_w_region) - self.assertEqual(new_ds_w_region.id, 'local.from_local_to_local_region') + self.assertEqual(new_ds_w_region.id, test_ds_3_id) self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20")) data_set = new_ds_w_region.open_dataset() self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'}) From 208927b90809d205b411f93ea698aae86f88e699 Mon Sep 17 00:00:00 2001 From: "Krzysztof (Chris) Bernat" Date: Wed, 6 Sep 2017 18:17:02 +0100 Subject: [PATCH 4/5] 277 unique id for local ds, mixing remote and unique local id --- cate/cli/main.py | 12 ++-- cate/ds/esa_cci_odp.py | 35 +++++++---- cate/ds/local.py | 118 ++++++++++++++++++++++-------------- test/ds/test_esa_cci_odp.py | 29 +++------ test/ds/test_local.py | 35 ++++------- 5 files changed, 121 insertions(+), 108 deletions(-) diff --git a/cate/cli/main.py b/cate/cli/main.py index 47208f7ad..a7c63fdf8 100644 --- a/cate/cli/main.py +++ b/cate/cli/main.py @@ -104,7 +104,7 @@ from typing import Tuple, Union, List, Dict, Any, Optional from cate.conf.defaults import WEBAPI_INFO_FILE, WEBAPI_ON_INACTIVITY_AUTO_STOP_AFTER -from cate.core.types import Like, TimeRangeLike +from cate.core.types import Like, TimeRangeLike, PolygonLike, VarNamesLike from cate.core.ds import DATA_STORE_REGISTRY, find_data_sources from cate.core.objectio import OBJECT_IO_REGISTRY, find_writer, read_object from cate.core.op import OP_REGISTRY @@ -1230,13 +1230,13 @@ def _execute_copy(cls, command_args): if data_source is None: raise RuntimeError('internal error: no local data source found: %s' % ds_name) - local_name = command_args.name if command_args.name else ds_name + local_name = command_args.name if command_args.name else None - time_range = command_args.time - region = command_args.region - var_names = command_args.vars + time_range = TimeRangeLike.convert(command_args.time) + region = PolygonLike.convert(command_args.region) + var_names = VarNamesLike.convert(command_args.vars) - ds = data_source.make_local(local_name, None, time_range=time_range, region=region, var_names=var_names, + ds = data_source.make_local(local_name, time_range=time_range, region=region, var_names=var_names, monitor=cls.new_monitor()) if ds: print("Local data source with name '%s' has been created." % ds.id) diff --git a/cate/ds/esa_cci_odp.py b/cate/ds/esa_cci_odp.py index 86133722a..3d40caa7f 100644 --- a/cate/ds/esa_cci_odp.py +++ b/cate/ds/esa_cci_odp.py @@ -60,7 +60,7 @@ from cate.core.ds import DATA_STORE_REGISTRY, DataStore, DataSource, Schema, \ open_xarray_dataset, get_data_stores_path, find_data_sources from cate.core.types import PolygonLike, TimeRange, TimeRangeLike, VarNamesLike, VarNames -from cate.ds.local import add_to_data_store_registry, LocalDataSource +from cate.ds.local import add_to_data_store_registry, LocalDataSource, LocalDataStore from cate.util.monitor import Monitor __author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ @@ -947,8 +947,6 @@ def make_local(self, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE) -> Optional[DataSource]: - if not local_name or len(local_name) == 0: - local_name = self.title local_store = DATA_STORE_REGISTRY.get_data_store('local') if not local_store: @@ -958,18 +956,31 @@ def make_local(self, raise ValueError('Cannot initialize `local` DataStore') local_meta_info = self.meta_info.copy() - if local_meta_info.get('uuid'): - del local_meta_info['uuid'] - local_meta_info['ref_uuid'] = self.meta_info['uuid'] - local_ds = local_store.create_data_source(self.id, region, _REFERENCE_DATA_SOURCE_TYPE, local_name, - time_range=time_range, var_names=var_names, + if not local_name or len(local_name) == 0: + local_name = "local.{}.{}".format(self.id, LocalDataStore.generate_uuid(ref_id=self.id, + time_range=time_range, + region=region, + var_names=var_names)) + existing_ds_list = local_store.query(local_name) + if len(existing_ds_list) == 1: + return existing_ds_list[0] + + local_ds = local_store.create_data_source(local_name, + time_range=time_range, region=region, var_names=var_names, meta_info=local_meta_info, lock_file=True) - self._make_local(local_ds, time_range, region, var_names, monitor=monitor) - if local_ds.is_empty: - local_store.remove_data_source(local_ds) + if local_ds: + if not local_ds.is_complete: + self._make_local(local_ds, time_range, region, var_names, monitor=monitor) + + if local_ds.is_empty: + local_store.remove_data_source(local_ds) + return None + + local_store.register_ds(local_ds) + return local_ds + else: return None - return local_ds def _init_file_list(self, monitor: Monitor = Monitor.NONE): if self._file_list: diff --git a/cate/ds/local.py b/cate/ds/local.py index b6d0f15c3..96313a52a 100644 --- a/cate/ds/local.py +++ b/cate/ds/local.py @@ -55,7 +55,7 @@ from cate.conf.defaults import NETCDF_COMPRESSION_LEVEL from cate.core.ds import DATA_STORE_REGISTRY, DataStore, DataSource, open_xarray_dataset, find_data_sources from cate.core.ds import get_data_stores_path -from cate.core.types import PolygonLike, TimeRange, TimeRangeLike, VarNamesLike +from cate.core.types import Polygon, PolygonLike, TimeRange, TimeRangeLike, VarNames, VarNamesLike from cate.util.monitor import Monitor __author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ @@ -130,6 +130,7 @@ def __init__(self, self._reference_name = reference_name self._meta_info = meta_info if meta_info else OrderedDict() + self._is_complete = True def _resolve_file_path(self, path) -> Sequence: return glob(os.path.join(self._data_store.data_store_path, path)) @@ -350,16 +351,28 @@ def make_local(self, if not local_store: raise ValueError('Cannot initialize `local` DataStore') - self.meta_info.copy() - - local_ds = local_store.create_data_source(self.id, region, _REFERENCE_DATA_SOURCE_TYPE, local_name, + if not local_name or len(local_name) == 0: + local_name = "local.{}.{}".format(self.id, LocalDataStore.generate_uuid(ref_id=self.id, + time_range=time_range, + region=region, + var_names=var_names)) + existing_ds_list = local_store.query(local_name) + if len(existing_ds_list) == 1: + return existing_ds_list[0] + + local_ds = local_store.create_data_source(local_name, region, _REFERENCE_DATA_SOURCE_TYPE, local_name, time_range=time_range, var_names=var_names, - meta_info=self.meta_info) - self._make_local(local_ds, time_range, region, var_names, monitor) - if local_ds.is_empty: - local_store.remove_data_source(local_ds) - return None - return local_ds + meta_info=self.meta_info.copy()) + if local_ds: + if not local_ds.is_complete: + self._make_local(local_ds, time_range, region, var_names, monitor=monitor) + + if local_ds.is_empty: + local_store.remove_data_source(local_ds) + return None + + local_store.register_ds(local_ds) + return local_ds def update_local(self, local_id: str, @@ -492,6 +505,14 @@ def variables_info(self): def info_string(self): return 'Files: %s' % (' '.join(self._files)) + @property + def is_complete(self) -> bool: + """ + Return a DataSource creation state + :return: + """ + return self._is_complete + @property def is_empty(self) -> bool: """ @@ -500,6 +521,14 @@ def is_empty(self) -> bool: """ return not self._files or len(self._files) == 0 + def set_completed(self, state: bool): + """ + Sets state of DataSource creation/completion + :param state: Is DataSource completed + :return: + """ + self._is_complete = state + def _repr_html_(self): import html return '
\n' \ @@ -583,6 +612,7 @@ def add_pattern(self, data_source_id: str, files: Union[str, Sequence[str]] = No is_first_file = False else: data_source.add_dataset(file) + self.register_ds(data_source) return data_source def remove_data_source(self, data_source: Union[str, DataSource], remove_files: bool = True): @@ -595,31 +625,34 @@ def remove_data_source(self, data_source: Union[str, DataSource], remove_files: os.remove(file_name) if remove_files: shutil.rmtree(os.path.join(self._store_dir, data_source.id), ignore_errors=True) - self._data_sources.remove(data_source) + if data_source in self._data_sources: + self._data_sources.remove(data_source) + + def register_ds(self, data_source: DataSource): + data_source.set_completed(True) + self._data_sources.append(data_source) @classmethod - def generate_uuid(cls, ref_id=str, - time_range: TimeRangeLike.TYPE = None, - region: PolygonLike.TYPE = None, - var_names: VarNamesLike.TYPE = None) -> uuid.UUID: + def generate_uuid(cls, ref_id: str, + time_range: Optional[TimeRange] = None, + region: Optional[Polygon] = None, + var_names: Optional[VarNames] = None) -> uuid.UUID: - constrains_str = ref_id if time_range: - constrains_str += TimeRangeLike.format(time_range) + ref_id += TimeRangeLike.format(time_range) if region: - constrains_str += PolygonLike.format(region) + ref_id += PolygonLike.format(region) if var_names: - constrains_str += VarNamesLike.format(var_names) + ref_id += VarNamesLike.format(var_names) - return uuid.uuid3(_NAMESPACE, constrains_str) + return str(uuid.uuid3(_NAMESPACE, ref_id)) @classmethod - def generate_title(cls, ref_id=str, - time_range: TimeRangeLike.TYPE = None, - region: PolygonLike.TYPE = None, - var_names: VarNamesLike.TYPE = None) -> uuid.UUID: + def generate_title(cls, title: str, + time_range: Optional[TimeRange] = None, + region: Optional[Polygon] = None, + var_names: Optional[VarNames] = None) -> uuid.UUID: - title = ref_id if time_range: title += " [TimeRange:{}]".format(TimeRangeLike.format(time_range)) if region: @@ -629,26 +662,21 @@ def generate_title(cls, ref_id=str, return title - def create_data_source(self, ref_id: str, region: PolygonLike.TYPE = None, - reference_type: str = None, reference_name: str = None, + def create_data_source(self, data_source_id: str, region: PolygonLike.TYPE = None, + reference_type: str = None, title: str = None, time_range: TimeRangeLike.TYPE = None, var_names: VarNamesLike.TYPE = None, meta_info: OrderedDict = None, lock_file: bool = False): self._init_data_sources() - if reference_name: - reference_name = LocalDataStore.generate_title(reference_name, time_range, region, var_names) - else: - reference_name = LocalDataStore.generate_title(ref_id, time_range, region, var_names) - if meta_info: - meta_info['title'] = reference_name + meta_info['title'] = title - data_source_id = LocalDataStore.generate_uuid(ref_id, time_range, region, var_names) - data_source_id = '%s.%s' % (self.id, data_source_id) + if meta_info.get('uuid'): + meta_info['ref_uuid'] = meta_info['uuid'] + del meta_info['uuid'] - lock_filename = '{}.lock'.format(data_source_id) - lock_filepath = os.path.join(self._store_dir, lock_filename) - existing_ds = None + lock_filepath = os.path.join(self._store_dir, '{}.lock'.format(data_source_id)) + data_source = None for ds in self._data_sources: if ds.id == data_source_id: if lock_file and os.path.isfile(lock_filepath): @@ -660,24 +688,24 @@ def create_data_source(self, ref_id: str, region: PolygonLike.TYPE = None, # ds.temporal_coverage() == time_range and if ds.spatial_coverage() == region \ and ds.variables_info == var_names: - existing_ds = ds + data_source = ds + data_source.set_completed(False) break raise ValueError("Local data store '{}' already contains a data source named '{}'" .format(self.id, data_source_id)) - if existing_ds: - data_source = existing_ds - else: + if not data_source: data_source = LocalDataSource(data_source_id, files=[], data_store=self, spatial_coverage=region, variables=var_names, temporal_coverage=time_range, - reference_type=reference_type, reference_name=reference_name, + reference_type=reference_type, reference_name=title, meta_info=meta_info) + data_source.set_completed(False) + self._save_data_source(data_source) + if lock_file: pid = os.getpid() with open(lock_filepath, 'w') as lock_file: lock_file.write(str(pid)) - self._save_data_source(data_source) - self._data_sources.append(data_source) return data_source @property diff --git a/test/ds/test_esa_cci_odp.py b/test/ds/test_esa_cci_odp.py index d68ad2213..58c00b902 100644 --- a/test/ds/test_esa_cci_odp.py +++ b/test/ds/test_esa_cci_odp.py @@ -134,10 +134,7 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size raise ValueError(reference_path, os.listdir(reference_path)) self.assertIsNotNone(new_ds) - test_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, new_ds_time_range) - test_ds_id = "local." + str(test_uuid) - - self.assertEqual(new_ds.id, test_ds_id) + self.assertEqual(new_ds.id, new_ds_title) self.assertEqual(new_ds.temporal_coverage(), new_ds_time_range) self.first_oc_data_source.update_local(new_ds.id, (datetime.datetime(1978, 11, 15, 00, 00), @@ -157,7 +154,7 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size datetime.datetime(1978, 11, 16, 23, 59))) self.assertTrue("Couldn't find local DataSource", context.exception.args[0]) - new_ds_w_one_variable_title = 'local_ds_test' + new_ds_w_one_variable_title = 'local_ds_test_var' new_ds_w_one_variable_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 16, 23, 59))) new_ds_w_one_variable_var_names = VarNamesLike.convert(['sm']) @@ -169,12 +166,7 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size ) self.assertIsNotNone(new_ds_w_one_variable) - new_ds_w_one_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, - time_range=new_ds_w_one_variable_time_range, - var_names=new_ds_w_one_variable_var_names) - new_ds_w_one_ds_id = "local." + str(new_ds_w_one_uuid) - - self.assertEqual(new_ds_w_one_variable.id, new_ds_w_one_ds_id) + self.assertEqual(new_ds_w_one_variable.id, new_ds_w_one_variable_title) ds = new_ds_w_one_variable.open_dataset() new_ds_w_one_variable_var_names.extend(['lat', 'lon', 'time']) @@ -196,13 +188,7 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size self.assertIsNotNone(new_ds_w_region) - new_ds_w_region_uuid = LocalDataStore.generate_uuid(self.first_oc_data_source.id, - time_range=new_ds_w_region_time_range, - var_names=new_ds_w_region_var_names, - region=new_ds_w_region_spatial_coverage) - new_ds_w_region_ds_id = "local." + str(new_ds_w_region_uuid) - - self.assertEqual(new_ds_w_region.id, new_ds_w_region_ds_id) + self.assertEqual(new_ds_w_region.id, new_ds_w_region_title) self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage) data_set = new_ds_w_region.open_dataset() @@ -211,10 +197,9 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names)) - no_data = self.first_oc_data_source.make_local( - 'empty_ds', None, (datetime.datetime(2017, 12, 1, 0, 0), - datetime.datetime(2017, 12, 31, 23, 59)), - ) + no_data = self.first_oc_data_source.make_local('empty_ds', + time_range=(datetime.datetime(2017, 12, 1, 0, 0), + datetime.datetime(2017, 12, 31, 23, 59))) self.assertIsNone(no_data) def test_data_store(self): diff --git a/test/ds/test_local.py b/test/ds/test_local.py index 3fc21797a..99379189e 100644 --- a/test/ds/test_local.py +++ b/test/ds/test_local.py @@ -34,18 +34,17 @@ def test_add_pattern(self): self.assertIsNotNone(data_sources) self.assertEqual(len(data_sources), 2) - test_uuid = LocalDataStore.generate_uuid('a_name') - test_ds_id = "test." + str(test_uuid) + new_ds_name = 'a_name' + new_ds = self.data_store.add_pattern(new_ds_name, "a_pat") - new_ds = self.data_store.add_pattern("a_name", "a_pat") - self.assertEqual(test_ds_id, new_ds.id) + self.assertEqual(new_ds_name, new_ds.id) data_sources = self.data_store.query() self.assertEqual(len(data_sources), 3) with self.assertRaises(ValueError) as cm: self.data_store.add_pattern("a_name", "a_pat2") - self.assertEqual("Local data store 'test' already contains a data source named '{}'".format(test_ds_id), + self.assertEqual("Local data store 'test' already contains a data source named '{}'".format(new_ds_name), str(cm.exception)) data_sources = self.data_store.query() @@ -53,7 +52,7 @@ def test_add_pattern(self): def test__repr_html(self): html = self.data_store._repr_html_() - self.assertEqual(584, len(html), html) + self.assertEqual(514, len(html), html) def test_init(self): data_store2 = LocalDataStore('test', self.tmp_dir) @@ -255,10 +254,7 @@ def test_make_local(self): new_ds = data_source.make_local(new_ds_title, time_range=new_ds_time_range) self.assertIsNotNone(new_ds) - test_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_time_range) - test_ds_id = "local." + str(test_uuid) - - self.assertEqual(new_ds.id, test_ds_id) + self.assertEqual(new_ds.id, new_ds_title) self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert( (datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59)))) @@ -279,40 +275,33 @@ def test_make_local(self): datetime.datetime(1978, 11, 15, 23, 59))) new_ds_2_vars = VarNamesLike.convert(['sm']) - test_2_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_2_time_range, var_names=new_ds_2_vars) - test_ds_2_id = "local." + str(test_2_uuid) - new_ds_w_one_variable = data_source.make_local(new_ds_2_title, time_range=new_ds_2_time_range, var_names=new_ds_2_vars) self.assertIsNotNone(new_ds_w_one_variable) - self.assertEqual(new_ds_w_one_variable.id, test_ds_2_id) + self.assertEqual(new_ds_w_one_variable.id, new_ds_2_title) data_set = new_ds_w_one_variable.open_dataset() self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'}) - new_ds_3_title = 'from_local_to_local_var' + new_ds_3_title = 'from_local_to_local_range' new_ds_3_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59))) new_ds_3_vars = VarNamesLike.convert(['sm']) new_ds_3_region = PolygonLike.convert("10,10,20,20") - test_3_uuid = LocalDataStore.generate_uuid(data_source.id, new_ds_3_time_range, var_names=new_ds_3_vars, - region=new_ds_3_region) - test_ds_3_id = "local." + str(test_3_uuid) - new_ds_w_region = data_source.make_local(new_ds_3_title, time_range=new_ds_3_time_range, var_names=new_ds_3_vars, region=new_ds_3_region) # type: LocalDataSource self.assertIsNotNone(new_ds_w_region) - self.assertEqual(new_ds_w_region.id, test_ds_3_id) + self.assertEqual(new_ds_w_region.id, new_ds_3_title) self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20")) data_set = new_ds_w_region.open_dataset() self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'}) - no_data = data_source.make_local('no_data', None, - (datetime.datetime(2020, 11, 14, 0, 0), - datetime.datetime(2020, 11, 15, 23, 59))) + no_data = data_source.make_local('no_data', + time_range=(datetime.datetime(2020, 11, 14, 0, 0), + datetime.datetime(2020, 11, 15, 23, 59))) self.assertIsNone(no_data) def test_remove_data_source_by_id(self): From 5e6cae306240e7d8edac26a88b3ddd9d861328d6 Mon Sep 17 00:00:00 2001 From: "Krzysztof (Chris) Bernat" Date: Thu, 7 Sep 2017 16:29:43 +0100 Subject: [PATCH 5/5] #277 - changed back ceda csw url --- cate/ds/esa_cci_odp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cate/ds/esa_cci_odp.py b/cate/ds/esa_cci_odp.py index 8876ca6e6..1014b3b46 100644 --- a/cate/ds/esa_cci_odp.py +++ b/cate/ds/esa_cci_odp.py @@ -70,7 +70,7 @@ _ESGF_CEDA_URL = "https://esgf-index1.ceda.ac.uk/esg-search/search/" # _CSW_CEDA_URL = "http://csw1.cems.rl.ac.uk/geonetwork-CEDA/srv/eng/csw-CEDA-CCI" -_CSW_CEDA_URL = "https://csw.ceda.ac.uk/geonetwork/srv/eng/csw" +_CSW_CEDA_URL = "https://csw.ceda.ac.uk/geonetwork/srv/eng/csw-CEDA-CCI" _TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"