diff --git a/CHANGES.md b/CHANGES.md index efa32a085..df211e32f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -44,6 +44,16 @@ ### Issues Fixed/Resolved +* Fixed [#299](https://github.com/CCI-Tools/cate-core/issues/299) + * renamed property `cate.core.ds.DataSource.name` to `id` + * renamed property `cate.core.ds.DataStore.name` to `id` + * renamed and changed signature of function `cate.core.ds.DataStore.query_data_sources(..., name=None)` + to `find_data_sources(..., id=None, query_expr=None)` + * changed signature of method `cate.core.ds.DataStore.query(name, ...)` to `query(id=None, query_expr=None, ...)` + * renamed and changed signature of method `cate.core.ds.DataSource.matches_filter(name)` to `matches(id=None, query_expr=None)` + * added `title` property to `cate.core.ds.DataStore` and `cate.core.ds.DataSource` + * made use of the new `id` and `title` properties of both `DataStore` and `DataSource` in their + JSON representations. * Fixed [#294](https://github.com/CCI-Tools/cate-core/issues/294) * Fixed [#286](https://github.com/CCI-Tools/cate-core/issues/286) * Fixed [#285](https://github.com/CCI-Tools/cate-core/issues/285) diff --git a/cate/cli/main.py b/cate/cli/main.py index d108d0e6f..0d64f7425 100644 --- a/cate/cli/main.py +++ b/cate/cli/main.py @@ -19,9 +19,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -__author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ - "Marco Zühlke (Brockmann Consult GmbH)" - """ Description =========== @@ -108,7 +105,7 @@ from cate.conf.defaults import WEBAPI_INFO_FILE, WEBAPI_ON_INACTIVITY_AUTO_STOP_AFTER from cate.core.types import Like, TimeRangeLike -from cate.core.ds import DATA_STORE_REGISTRY, query_data_sources +from cate.core.ds import DATA_STORE_REGISTRY, find_data_sources from cate.core.objectio import OBJECT_IO_REGISTRY, find_writer, read_object from cate.core.op import OP_REGISTRY from cate.core.plugin import PLUGIN_REGISTRY @@ -123,6 +120,8 @@ from cate.webapi.wsmanag import WebAPIWorkspaceManager from cate.version import __version__ +__author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ + "Marco Zühlke (Brockmann Consult GmbH)" #: Name of the Cate CLI executable (= ``cate``). CLI_NAME = 'cate' @@ -287,18 +286,23 @@ def _parse_op_args(raw_args: List[str], else: # For any non-None value and any data type we perform basic type validation: if value is not None and data_type: + # noinspection PyTypeChecker if issubclass(data_type, Like): - # For XXXLike-types call accepts() + # noinspection PyUnresolvedReferences compatible = data_type.accepts(value) else: + # noinspection PyTypeChecker compatible = isinstance(value, data_type) if not compatible: + # noinspection PyTypeChecker if issubclass(data_type, float): # Allow assigning bool and int to a float compatible = isinstance(value, bool) or isinstance(value, int) + # noinspection PyTypeChecker elif issubclass(data_type, int): # Allow assigning bool and float to an int compatible = isinstance(value, bool) or isinstance(value, float) + # noinspection PyTypeChecker elif issubclass(data_type, bool): # Allow assigning anything to a bool compatible = True @@ -396,7 +400,8 @@ def _get_op_info_str(op_meta_info: OpMetaInfo): op_info_str += '\n' op_info_str += _get_op_io_info_str(op_meta_info.inputs, 'Input', 'Inputs', 'Operation does not have any inputs.') - op_info_str += _get_op_io_info_str(op_meta_info.outputs, 'Output', 'Outputs', 'Operation does not have any outputs.') + op_info_str += _get_op_io_info_str(op_meta_info.outputs, 'Output', 'Outputs', + 'Operation does not have any outputs.') return op_info_str @@ -558,12 +563,12 @@ def execute(self, command_args): OP_ARGS_RES_HELP = 'Operation arguments given as KEY=VALUE. KEY is any supported input by OP. VALUE ' \ - 'depends on the expected data type of an OP input. It can be either a value or ' \ - 'a reference an existing resource prefixed by the add character "@". ' \ - 'The latter connects to operation steps with each other. To provide a (constant)' \ - 'value you can use boolean literals True and False, strings, or numeric values. ' \ - 'Type "cate op info OP" to print information about the supported OP ' \ - 'input names to be used as KEY and their data types to be used as VALUE. ' + 'depends on the expected data type of an OP input. It can be either a value or ' \ + 'a reference an existing resource prefixed by the add character "@". ' \ + 'The latter connects to operation steps with each other. To provide a (constant)' \ + 'value you can use boolean literals True and False, strings, or numeric values. ' \ + 'Type "cate op info OP" to print information about the supported OP ' \ + 'input names to be used as KEY and their data types to be used as VALUE. ' class WorkspaceCommand(SubCommandCommand): @@ -920,10 +925,13 @@ def _execute_open(cls, command_args): workspace_manager = _new_workspace_manager() op_args = dict(ds_name=command_args.ds_name) if command_args.var_names: + # noinspection PyArgumentList op_args.update(var_names=command_args.var_names) if command_args.region: + # noinspection PyArgumentList op_args.update(region=command_args.region) if command_args.start_date or command_args.end_date: + # noinspection PyArgumentList op_args.update(time_range="%s,%s" % (command_args.start_date or '', command_args.end_date or '')) workspace_manager.set_workspace_resource(_base_dir(command_args.base_dir), @@ -937,6 +945,7 @@ def _execute_read(cls, command_args): workspace_manager = _new_workspace_manager() op_args = dict(file=command_args.file_path) if command_args.format_name: + # noinspection PyArgumentList op_args.update(format=command_args.format_name) workspace_manager.set_workspace_resource(_base_dir(command_args.base_dir), command_args.res_name, @@ -1154,23 +1163,23 @@ def configure_parser_and_subparsers(cls, parser, subparsers): def _execute_list(cls, command_args): ds_name = command_args.name if command_args.coverage: - ds_names = OrderedDict(sorted(((ds.name, TimeRangeLike.format(ds.temporal_coverage()) + ds_names = OrderedDict(sorted(((ds.id, TimeRangeLike.format(ds.temporal_coverage()) if ds.temporal_coverage() else None) - for ds in query_data_sources()), + for ds in find_data_sources()), key=lambda item: item[0])) else: - ds_names = sorted(data_source.name for data_source in query_data_sources()) + ds_names = sorted(data_source.id for data_source in find_data_sources()) _list_items('data source', 'data sources', ds_names, ds_name) @classmethod def _execute_info(cls, command_args): ds_name = command_args.ds_name - data_sources = [data_source for data_source in query_data_sources(name=ds_name) if data_source.name == ds_name] + data_sources = [data_source for data_source in find_data_sources(id=ds_name) if data_source.id == ds_name] if not data_sources: raise CommandError('data source "%s" not found' % ds_name) data_source = data_sources[0] - title = 'Data source %s' % data_source.name + title = 'Data source %s' % data_source.id print() print(title) print('=' * len(title)) @@ -1197,7 +1206,7 @@ def _execute_add(cls, command_args): ds_name = command_args.ds_name files = command_args.file ds = local_store.add_pattern(ds_name, files) - print("Local data source with name '%s' added." % ds.name) + print("Local data source with name '%s' added." % ds.id) @classmethod def _execute_del(cls, command_args): @@ -1213,7 +1222,7 @@ def _execute_del(cls, command_args): if not answer or answer.lower() == 'y': keep_files = command_args.keep_files ds = local_store.remove_data_source(ds_name, not keep_files) - print("Local data source with name '%s' removed." % ds.name) + print("Local data source with name '%s' removed." % ds.id) @classmethod def _execute_copy(cls, command_args): @@ -1222,7 +1231,7 @@ def _execute_copy(cls, command_args): raise RuntimeError('internal error: no local data store found') ds_name = command_args.ref_ds - data_source = next(iter(query_data_sources(None, ds_name)), None) + data_source = next(iter(find_data_sources(None, id=ds_name)), None) if data_source is None: raise RuntimeError('internal error: no local data source found: %s' % ds_name) @@ -1234,7 +1243,7 @@ def _execute_copy(cls, command_args): ds = data_source.make_local(local_name, None, time_range=time_range, region=region, var_names=var_names, monitor=cls.new_monitor()) - print("Local data source with name '%s' has been created." % ds.name) + print("Local data source with name '%s' has been created." % ds.id) class PluginCommand(SubCommandCommand): @@ -1295,11 +1304,13 @@ def _trim_error_message(message: str) -> str: # use by 'sphinxarg' to generate the documentation def _make_cate_parser(): from cate.util.cli import _make_parser + # noinspection PyTypeChecker return _make_parser(CLI_NAME, CLI_DESCRIPTION, __version__, COMMAND_REGISTRY, license_text=_LICENSE, docs_url=_DOCS_URL) def main(args=None) -> int: + # noinspection PyTypeChecker return run_main(CLI_NAME, CLI_DESCRIPTION, __version__, diff --git a/cate/core/__init__.py b/cate/core/__init__.py index 1f64dee98..9a122fe8f 100644 --- a/cate/core/__init__.py +++ b/cate/core/__init__.py @@ -24,7 +24,7 @@ """ # noinspection PyUnresolvedReferences -from .ds import DataStore, DataSource, open_dataset, query_data_sources, DATA_STORE_REGISTRY +from .ds import DataStore, DataSource, open_dataset, find_data_sources, DATA_STORE_REGISTRY # noinspection PyUnresolvedReferences from .op import op, op_input, op_output, op_return, Operation, OP_REGISTRY, \ diff --git a/cate/core/ds.py b/cate/core/ds.py index c20c162e0..a651aa07f 100644 --- a/cate/core/ds.py +++ b/cate/core/ds.py @@ -92,7 +92,6 @@ from .types import PolygonLike, TimeRange, TimeRangeLike, VarNamesLike from ..util import Monitor - __author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ "Marco Zühlke (Brockmann Consult GmbH), " \ "Chris Bernat (Telespazio VEGA UK Ltd)" @@ -116,8 +115,8 @@ class DataSource(metaclass=ABCMeta): @property @abstractmethod - def name(self) -> str: - """Human-readable data source name.""" + def id(self) -> str: + """Data source identifier.""" @property def schema(self) -> Optional[Schema]: @@ -146,9 +145,18 @@ def protocols(self) -> []: def data_store(self) -> 'DataStore': """The data store to which this data source belongs.""" - def matches_filter(self, name=None) -> bool: - """Test if this data source matches the given *constraints*.""" - if name and name.lower() not in self.name.lower(): + def matches(self, id: str = None, query_expr: str = None) -> bool: + """ + Test if this data source matches the given *id* or *query_expr*. + If neither *id* nor *query_expr* are given, the method returns True. + + :param id: A data source identifier. + :param query_expr: A query expression. Currently, only simple search strings are supported. + :return: True, if this data sources matches the given *id* or *query_expr*. + """ + if query_expr: + raise NotImplementedError('query_expr not yet supported') + if id and id.lower() not in self.id.lower(): return False return True @@ -258,7 +266,16 @@ def delete_local(self, return 0 @property - def meta_info(self) -> Union[dict, None]: + def title(self) -> Optional[str]: + """ + Human-readable data source title. + The default implementation tries to retrieve the title from ``meta_info['title']``. + """ + meta_info = self.meta_info + return meta_info and meta_info.get('title') + + @property + def meta_info(self) -> Optional[dict]: """ Return meta-information about this data source. The returned dict, if any, is JSON-serializable. @@ -266,7 +283,7 @@ def meta_info(self) -> Union[dict, None]: return None @property - def cache_info(self) -> Union[dict, None]: + def cache_info(self) -> Optional[dict]: """ Return information about cached, locally available data sets. The returned dict, if any, is JSON-serializable. @@ -274,7 +291,7 @@ def cache_info(self) -> Union[dict, None]: return None @property - def variables_info(self) -> Union[dict, None]: + def variables_info(self) -> Optional[dict]: """ Return meta-information about the variables contained in this data source. The returned dict, if any, is JSON-serializable. @@ -282,7 +299,7 @@ def variables_info(self) -> Union[dict, None]: return None @property - def info_string(self): + def info_string(self) -> str: """ Return a textual representation of the meta-information about this data source. Useful for CLI / REPL applications. @@ -303,8 +320,9 @@ def info_string(self): return '\n'.join(info_lines) + # TODO (forman): No overrides! Remove from DataSource interface, turn into utility function instead @property - def variables_info_string(self): + def variables_info_string(self) -> str: """ Return some textual information about the variables contained in this data source. Useful for CLI / REPL applications. @@ -323,8 +341,9 @@ def variables_info_string(self): return '\n'.join(info_lines) + # TODO (forman): No overrides! Remove from DataSource interface, turn into utility function instead @property - def cached_datasets_coverage_string(self): + def cached_datasets_coverage_string(self) -> str: """ Return a textual representation of information about cached, locally available data sets. Useful for CLI / REPL applications. @@ -350,17 +369,30 @@ def _repr_html_(self): class DataStore(metaclass=ABCMeta): - """Represents a data store of data sources.""" + """ + Represents a data store of data sources. + + :param id: Unique data store identifier. + :param title: A human-readable tile. + """ + + def __init__(self, id: str, title: str = None): + self._id = id + self._title = title or id - def __init__(self, name: str): - self._name = name + @property + def id(self) -> str: + """ + Return the unique identifier for this data store. + """ + return self._id @property - def name(self) -> str: + def title(self) -> str: """ - Return the name of this data store. + Return a human-readable tile for this data store. """ - return self._name + return self._title @property def data_store_path(self) -> Optional[str]: @@ -370,11 +402,12 @@ def data_store_path(self) -> Optional[str]: return None @abstractmethod - def query(self, name=None, monitor: Monitor = Monitor.NONE) -> Sequence[DataSource]: + def query(self, id: str = None, query_expr: str = None, monitor: Monitor = Monitor.NONE) -> Sequence[DataSource]: """ Retrieve data sources in this data store using the given constraints. - :param name: Name of the data source. + :param id: Data source identifier. + :param query_expr: Query expression which may be used if *ìd* is unknown. :param monitor: A progress monitor. :return: Sequence of data sources. """ @@ -403,17 +436,17 @@ class DataStoreRegistry: def __init__(self): self._data_stores = dict() - def get_data_store(self, name: str) -> Optional[DataStore]: - return self._data_stores.get(name, None) + def get_data_store(self, id: str) -> Optional[DataStore]: + return self._data_stores.get(id) def get_data_stores(self) -> Sequence[DataStore]: return list(self._data_stores.values()) def add_data_store(self, data_store: DataStore): - self._data_stores[data_store.name] = data_store + self._data_stores[data_store.id] = data_store - def remove_data_store(self, name: str): - del self._data_stores[name] + def remove_data_store(self, id: str): + del self._data_stores[id] def __len__(self): return len(self._data_stores) @@ -427,8 +460,8 @@ def __repr__(self): def _repr_html_(self): rows = [] - for name, data_store in self._data_stores.items(): - rows.append('%s%s' % (name, repr(data_store))) + for id, data_store in self._data_stores.items(): + rows.append('%s%s' % (id, repr(data_store))) return '%s
' % '\n'.join(rows) @@ -437,13 +470,17 @@ def _repr_html_(self): DATA_STORE_REGISTRY = DataStoreRegistry() -def query_data_sources(data_stores: Union[DataStore, Sequence[DataStore]] = None, name=None) -> Sequence[DataSource]: - """Query the data store(s) for data sources matching the given constrains. +def find_data_sources(data_stores: Union[DataStore, Sequence[DataStore]] = None, + id: str = None, + query_expr: str = None) -> Sequence[DataSource]: + """ + Find data sources in the given data store(s) matching the given *id* or *query_expr*. See also :py:func:`open_dataset`. :param data_stores: If given these data stores will be queried. Otherwise all registered data stores will be used. - :param name: The name of a data source. + :param id: A data source identifier. + :param query_expr: A query expression. :return: All data sources matching the given constrains. """ results = [] @@ -455,21 +492,21 @@ def query_data_sources(data_stores: Union[DataStore, Sequence[DataStore]] = None primary_data_store = data_stores else: data_store_list = data_stores - if not primary_data_store and name and name.count('.') > 0: + if not primary_data_store and id and id.count('.') > 0: primary_data_store_index = -1 - primary_data_store_name, data_source_name = name.split('.', 1) + primary_data_store_id, data_source_name = id.split('.', 1) for idx, data_store in enumerate(data_store_list): - if data_store.name == primary_data_store_name: + if data_store.id == primary_data_store_id: primary_data_store_index = idx if primary_data_store_index >= 0: primary_data_store = data_store_list.pop(primary_data_store_index) if primary_data_store: - results.extend(primary_data_store.query(name)) + results.extend(primary_data_store.query(id=id, query_expr=query_expr)) if not results: # noinspection PyTypeChecker for data_store in data_store_list: - results.extend(data_store.query(name)) + results.extend(data_store.query(id=id, query_expr=query_expr)) return results @@ -494,7 +531,7 @@ def open_dataset(data_source: Union[DataSource, str], if isinstance(data_source, str): data_store_list = list(DATA_STORE_REGISTRY.get_data_stores()) - data_sources = query_data_sources(data_store_list, name=data_source) + data_sources = find_data_sources(data_store_list, id=data_source) if len(data_sources) == 0: raise ValueError("No data_source found for the given query term", data_source) elif len(data_sources) > 1: diff --git a/cate/ds/esa_cci_ftp.py b/cate/ds/esa_cci_ftp.py index cfd3ba0f5..139459e5f 100644 --- a/cate/ds/esa_cci_ftp.py +++ b/cate/ds/esa_cci_ftp.py @@ -20,10 +20,6 @@ # SOFTWARE. -__author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ - "Marco Zühlke (Brockmann Consult GmbH), " \ - "Chris Bernat (Telespacio VEGA UK Inc.)" - """ Description =========== @@ -61,6 +57,10 @@ from cate.core.types import PolygonLike, TimeRangeLike, VarNamesLike from cate.util import to_datetime, Monitor, Cancellation +__author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ + "Marco Zühlke (Brockmann Consult GmbH), " \ + "Chris Bernat (Telespacio VEGA UK Inc.)" + Time = Union[str, datetime] TimeRange = Tuple[Time, Time] @@ -85,7 +85,7 @@ class FileSetDataSource(DataSource): Parameters ---------- - name : str + id : str The name of the file set base_dir : str The base directory @@ -101,19 +101,19 @@ class FileSetDataSource(DataSource): def __init__(self, file_set_data_store: 'FileSetDataStore', - name: str, + id: str, base_dir: str, file_pattern: str, fileset_info: 'FileSetInfo' = None): self._file_set_data_store = file_set_data_store - self._name = name + self._id = id self._base_dir = base_dir self._file_pattern = file_pattern self._fileset_info = fileset_info @property - def name(self): - return self._name + def id(self): + return self._id @property def schema(self) -> Schema: @@ -153,7 +153,7 @@ def to_json_dict(self): :return: A JSON-serializable dictionary """ fsds_dict = OrderedDict() - fsds_dict['name'] = self.name + fsds_dict['name'] = self.id fsds_dict['base_dir'] = self._base_dir fsds_dict['file_pattern'] = self._file_pattern if self._fileset_info: @@ -234,7 +234,7 @@ def sync(self, num_of_synchronised_files = 0 num_of_expected_remote_files = len(list(chain.from_iterable(list(expected_remote_files.values())))) - with monitor.starting('Sync %s' % self._name, num_of_expected_remote_files): + with monitor.starting('Sync %s' % self._id, num_of_expected_remote_files): try: with ftplib.FTP(ftp_host_name) as ftp: ftp.login() @@ -318,7 +318,7 @@ def _get_expected_remote_files(self, time_range: TimeRange = (None, None)) -> Ma return expected_remote_files def __repr__(self): - return "FileSetDataSource(%s, %s, %s)" % (repr(self._name), repr(self._base_dir), repr(self._file_pattern)) + return "FileSetDataSource(%s, %s, %s)" % (repr(self._id), repr(self._base_dir), repr(self._file_pattern)) @property def info_string(self): @@ -338,7 +338,7 @@ def _repr_html_(self): return '%s
' % rows def get_table_data(self): - return OrderedDict([('Name', self._name), + return OrderedDict([('Name', self._id), ('Base directory', self._base_dir), ('File pattern', self._file_pattern)]) @@ -524,8 +524,8 @@ class FileSetDataStore(DataStore): :param remote_url: Optional URL of the data store's remote service. """ - def __init__(self, name: str, root_dir: str, remote_url: str = None): - super().__init__(name) + def __init__(self, id: str, root_dir: str, remote_url: str = None): + super().__init__(id) self._root_dir = root_dir self._remote_url = remote_url self._data_sources = [] @@ -544,8 +544,10 @@ def remote_url(self) -> str: """Optional URL of the data store's remote service.""" return self._remote_url - def query(self, name=None, monitor: Monitor = Monitor.NONE) -> Sequence[DataSource]: - return [ds for ds in self._data_sources if ds.matches_filter(name)] + def query(self, id: str = None, query_expr: str = None, monitor: Monitor = Monitor.NONE) -> Sequence[DataSource]: + if id or query_expr: + return [ds for ds in self._data_sources if ds.matches(id=id, query_expr=query_expr)] + return self._data_sources def load_from_json(self, json_fp_or_str: Union[str, IOBase]): if isinstance(json_fp_or_str, str): diff --git a/cate/ds/esa_cci_odp.py b/cate/ds/esa_cci_odp.py index cf793430e..da0645163 100644 --- a/cate/ds/esa_cci_odp.py +++ b/cate/ds/esa_cci_odp.py @@ -18,9 +18,6 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -__author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ - "Marco Zühlke (Brockmann Consult GmbH), " \ - "Chris Bernat (Telespazio VEGA UK Ltd)" """ Description @@ -59,11 +56,15 @@ from cate.conf import get_config_value from cate.conf.defaults import NETCDF_COMPRESSION_LEVEL from cate.core.ds import DATA_STORE_REGISTRY, DataStore, DataSource, Schema, \ - open_xarray_dataset, get_data_stores_path, query_data_sources + open_xarray_dataset, get_data_stores_path, find_data_sources from cate.core.types import PolygonLike, TimeRange, TimeRangeLike, VarNamesLike from cate.ds.local import add_to_data_store_registry, LocalDataSource from cate.util.monitor import Monitor +__author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ + "Marco Zühlke (Brockmann Consult GmbH), " \ + "Chris Bernat (Telespazio VEGA UK Ltd)" + _ESGF_CEDA_URL = "https://esgf-index1.ceda.ac.uk/esg-search/search/" _CSW_CEDA_URL = "http://csw1.cems.rl.ac.uk/geonetwork-CEDA/srv/eng/csw-CEDA-CCI" @@ -141,6 +142,7 @@ def _fetch_solr_json(base_url, query_args, offset=0, limit=3500, timeout=10, mon while True: monitor.progress(work=1) paging_query_args = dict(query_args or {}) + # noinspection PyArgumentList paging_query_args.update(offset=offset, limit=limit, format='application/solr+json') url = base_url + '?' + urllib.parse.urlencode(paging_query_args) with urllib.request.urlopen(url, timeout=timeout) as response: @@ -205,6 +207,7 @@ def _load_or_fetch_json(fetch_json_function, if json_obj is None: # noinspection PyArgumentList try: + # noinspection PyArgumentList json_obj = fetch_json_function(*(fetch_json_args or []), **(fetch_json_kwargs or {})) if cache_used: os.makedirs(cache_dir, exist_ok=True) @@ -277,11 +280,12 @@ def pick_start_time(file_info_rec): class EsaCciOdpDataStore(DataStore): def __init__(self, - name: str = 'esa_cci_odp', + id: str = 'esa_cci_odp', + title: str = 'ESA CCI Open Data Portal', index_cache_used: bool = True, index_cache_expiration_days: float = 1.0, index_cache_json_dict: dict = None): - super().__init__(name) + super().__init__(id, title=title) self._index_cache_used = index_cache_used self._index_cache_expiration_days = index_cache_expiration_days self._index_json_dict = index_cache_json_dict @@ -313,13 +317,11 @@ def update_indices(self, update_file_lists: bool = False, monitor: Monitor = Mon data_source.update_file_list() child_monitor.progress(work=1) - def query(self, name: str = None, monitor: Monitor = Monitor.NONE) -> Sequence['DataSource']: + def query(self, id: str = None, query_expr: str = None, monitor: Monitor = Monitor.NONE) -> Sequence['DataSource']: self._init_data_sources() - if name: - result = [data_source for data_source in self._data_sources if data_source.matches_filter(name)] - else: - result = self._data_sources - return result + if id or query_expr: + return [ds for ds in self._data_sources if ds.matches(id=id, query_expr=query_expr)] + return self._data_sources def _repr_html_(self) -> str: self._init_data_sources() @@ -440,7 +442,7 @@ def __init__(self, self._meta_info = None @property - def name(self) -> str: + def id(self) -> str: return self._master_id @property @@ -456,7 +458,6 @@ def spatial_coverage(self) -> Optional[PolygonLike]: if self._catalogue_data \ and self._catalogue_data.get('bbox_minx', None) and self._catalogue_data.get('bbox_miny', None) \ and self._catalogue_data.get('bbox_maxx', None) and self._catalogue_data.get('bbox_maxy', None): - return PolygonLike.convert([ self._catalogue_data.get('bbox_minx'), self._catalogue_data.get('bbox_miny'), @@ -574,9 +575,9 @@ def update_local(self, time_range: TimeRangeLike.TYPE, monitor: Monitor = Monitor.NONE) -> bool: - data_sources = query_data_sources(None, local_id) # type: Sequence['DataSource'] + data_sources = find_data_sources(None, id=local_id) # type: Sequence['DataSource'] data_source = next((ds for ds in data_sources if isinstance(ds, LocalDataSource) and - ds.name == local_id), None) # type: LocalDataSource + ds.id == local_id), None) # type: LocalDataSource if not data_source: raise ValueError("Couldn't find local DataSource", (local_id, data_sources)) @@ -600,11 +601,14 @@ def update_local(self, if to_remove: for time_range_to_remove in to_remove: data_source.reduce_temporal_coverage(time_range_to_remove) - if to_add: + if to_add: for time_range_to_add in to_add: self._make_local(data_source, time_range_to_add, None, data_source.variables_info, monitor) + # TODO (chris): forman added False (?) to make signature happy + return False + def delete_local(self, time_range: TimeRangeLike.TYPE) -> int: if time_range[0] >= self._temporal_coverage[0] \ @@ -652,7 +656,7 @@ def open_dataset(self, selected_file_list = self._find_files(time_range) if not selected_file_list: - msg = 'Data source \'{}\' does not seem to have any data files'.format(self.name) + msg = 'Data source \'{}\' does not seem to have any data files'.format(self.id) if time_range is not None: msg += ' in given time range {}'.format(TimeRangeLike.format(time_range)) raise IOError(msg) @@ -694,8 +698,7 @@ def _make_local(self, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE): - # local_name = local_ds.name - local_id = local_ds.name + local_id = local_ds.id time_range = TimeRangeLike.convert(time_range) if time_range else None region = PolygonLike.convert(region) if region else None @@ -721,7 +724,7 @@ def _make_local(self, if protocol == _ODP_PROTOCOL_OPENDAP: files = self._get_urls_list(selected_file_list, protocol) - monitor.start('Sync ' + self.name, total_work=len(files)) + monitor.start('Sync ' + self.id, total_work=len(files)) for idx, dataset_uri in enumerate(files): child_monitor = monitor.child(work=1) @@ -753,8 +756,8 @@ def _make_local(self, geo_lon_res = self._get_harmonized_coordinate_value(remote_dataset.attrs, 'geospatial_lat_resolution') if not (isnan(geo_lat_min) or isnan(geo_lat_max) or - isnan(geo_lon_min) or isnan(geo_lon_max) or - isnan(geo_lat_res) or isnan(geo_lon_res)): + isnan(geo_lon_min) or isnan(geo_lon_max) or + isnan(geo_lat_res) or isnan(geo_lon_res)): process_region = True [lon_min, lat_min, lon_max, lat_max] = region.bounds @@ -845,7 +848,7 @@ def _make_local(self, outdated_file_list.append(file_rec) if outdated_file_list: - with monitor.starting('Sync ' + self.name, len(outdated_file_list)): + with monitor.starting('Sync ' + self.id, len(outdated_file_list)): bytes_to_download = sum([file_rec[3] for file_rec in outdated_file_list]) dl_stat = _DownloadStatistics(bytes_to_download) @@ -891,7 +894,7 @@ def make_local(self, del local_meta_info['uuid'] local_meta_info['ref_uuid'] = self.meta_info['uuid'] - local_ds = local_store.create_data_source(local_name, region, _REFERENCE_DATA_SOURCE_TYPE, self.name, + local_ds = local_store.create_data_source(local_name, region, _REFERENCE_DATA_SOURCE_TYPE, self.id, time_range, var_names, meta_info=local_meta_info, lock_file=True) self._make_local(local_ds, time_range, region, var_names, monitor=monitor) return local_ds @@ -938,10 +941,10 @@ def __str__(self): return self.info_string def _repr_html_(self): - return self.name + return self.id def __repr__(self): - return self.name + return self.id class _DownloadStatistics: @@ -954,7 +957,7 @@ def handle_chunk(self, chunk): self.bytes_done += chunk @staticmethod - def _to_mibs(bytes_count): + def _to_mibs(bytes_count: int) -> float: return bytes_count / (1024 * 1024) def __str__(self): @@ -968,7 +971,6 @@ def __str__(self): class EsaCciCatalogueService: - def __init__(self, catalogue_url: str): self._catalogue_url = catalogue_url @@ -1023,20 +1025,20 @@ def _build_catalogue(self, monitor: Monitor = Monitor.NONE): self._catalogue = { record.identification.uricode[0]: { - 'abstract': record.identification.abstract, - 'bbox_minx': record.identification.bbox.minx if record.identification.bbox else None, - 'bbox_miny': record.identification.bbox.miny if record.identification.bbox else None, - 'bbox_maxx': record.identification.bbox.maxx if record.identification.bbox else None, - 'bbox_maxy': record.identification.bbox.maxy if record.identification.bbox else None, - 'creation_date': + 'abstract': record.identification.abstract, + 'bbox_minx': record.identification.bbox.minx if record.identification.bbox else None, + 'bbox_miny': record.identification.bbox.miny if record.identification.bbox else None, + 'bbox_maxx': record.identification.bbox.maxx if record.identification.bbox else None, + 'bbox_maxy': record.identification.bbox.maxy if record.identification.bbox else None, + 'creation_date': next(iter(e.date for e in record.identification.date if e and e.type == 'creation'), None), - 'publication_date': + 'publication_date': next(iter(e.date for e in record.identification.date if e and e.type == 'publication'), None), - 'title': record.identification.title, - 'data_sources': record.identification.uricode[1:], - 'licences': record.identification.uselimitation, - 'temporal_coverage_start': record.identification.temporalextent_start, - 'temporal_coverage_end': record.identification.temporalextent_end + 'title': record.identification.title, + 'data_sources': record.identification.uricode[1:], + 'licences': record.identification.uselimitation, + 'temporal_coverage_start': record.identification.temporalextent_start, + 'temporal_coverage_end': record.identification.temporalextent_end } for record in catalogue_metadata.values() if record.identification and len(record.identification.uricode) > 0 diff --git a/cate/ds/local.py b/cate/ds/local.py index f798b29ea..a9c918e6d 100644 --- a/cate/ds/local.py +++ b/cate/ds/local.py @@ -18,9 +18,6 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -__author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ - "Marco Zühlke (Brockmann Consult GmbH), " \ - "Chris Bernat (Telespazio VEGA UK Ltd)" """ Description @@ -55,11 +52,15 @@ from cate.conf import get_config_value from cate.conf.defaults import NETCDF_COMPRESSION_LEVEL -from cate.core.ds import DATA_STORE_REGISTRY, DataStore, DataSource, open_xarray_dataset, query_data_sources +from cate.core.ds import DATA_STORE_REGISTRY, DataStore, DataSource, open_xarray_dataset, find_data_sources from cate.core.ds import get_data_stores_path from cate.core.types import PolygonLike, TimeRange, TimeRangeLike, VarNamesLike from cate.util.monitor import Monitor +__author__ = "Norman Fomferra (Brockmann Consult GmbH), " \ + "Marco Zühlke (Brockmann Consult GmbH), " \ + "Chris Bernat (Telespazio VEGA UK Ltd)" + _REFERENCE_DATA_SOURCE_TYPE = "FILE_PATTERN" @@ -72,13 +73,32 @@ def add_to_data_store_registry(): data_store = LocalDataStore('local', get_data_store_path()) DATA_STORE_REGISTRY.add_data_store(data_store) - +# TODO (kbernat): document this class class LocalDataSource(DataSource): - def __init__(self, name: str, files: Union[Sequence[str], OrderedDict], data_store: 'LocalDataStore', - temporal_coverage: TimeRangeLike.TYPE = None, spatial_coverage: PolygonLike.TYPE = None, - variables: VarNamesLike.TYPE = None, reference_type: str = None, reference_name: str = None, + """ + + :param id: + :param files: + :param data_store: + :param temporal_coverage: + :param spatial_coverage: + :param variables: + :param reference_type: + :param reference_name: + :param meta_info: + """ + + def __init__(self, + id: str, + files: Union[Sequence[str], OrderedDict], + data_store: 'LocalDataStore', + temporal_coverage: TimeRangeLike.TYPE = None, + spatial_coverage: PolygonLike.TYPE = None, + variables: VarNamesLike.TYPE = None, + reference_type: str = None, + reference_name: str = None, meta_info: dict = None): - self._name = name + self._id = id if isinstance(files, Sequence): self._files = OrderedDict.fromkeys(files) else: @@ -127,11 +147,10 @@ def open_dataset(self, for i in range(len(time_series)): if time_series[i]: if isinstance(time_series[i], Tuple) and \ - time_series[i][0] >= time_range[0] and \ - time_series[i][1] <= time_range[1]: + time_series[i][0] >= time_range[0] and time_series[i][1] <= time_range[1]: paths.extend(self._resolve_file_path(file_paths[i])) elif isinstance(time_series[i], datetime) and \ - time_range[0] <= time_series[i] < time_range[1]: + time_range[0] <= time_series[i] < time_range[1]: paths.extend(self._resolve_file_path(file_paths[i])) else: for file in self._files.items(): @@ -165,8 +184,7 @@ def _make_local(self, var_names: VarNamesLike.TYPE = None, monitor: Monitor = Monitor.NONE): - # local_name = local_ds.name - local_id = local_ds.name + local_id = local_ds.id time_range = TimeRangeLike.convert(time_range) if time_range else None region = PolygonLike.convert(region) if region else None @@ -184,7 +202,7 @@ def _make_local(self, if not os.path.exists(local_path): os.makedirs(local_path) - monitor.start("Sync " + self.name, total_work=len(self._files.items())) + monitor.start("Sync " + self.id, total_work=len(self._files.items())) for remote_relative_filepath, coverage in self._files.items(): child_monitor = monitor.child(work=1) @@ -227,7 +245,7 @@ def _make_local(self, geo_lon_res = self._get_harmonized_coordinate_value(remote_dataset.attrs, 'geospatial_lat_resolution') if not (isnan(geo_lat_min) or isnan(geo_lat_max) or isnan(geo_lon_min) or - isnan(geo_lon_max) or isnan(geo_lat_res) or isnan(geo_lon_res)): + isnan(geo_lon_max) or isnan(geo_lat_res) or isnan(geo_lon_res)): process_region = True [lon_min, lat_min, lon_max, lat_max] = region.bounds @@ -330,7 +348,7 @@ def make_local(self, if not local_store: raise ValueError('Cannot initialize `local` DataStore') - local_ds = local_store.create_data_source(local_name, region, _REFERENCE_DATA_SOURCE_TYPE, self.name, + local_ds = local_store.create_data_source(local_name, region, _REFERENCE_DATA_SOURCE_TYPE, self.id, meta_info=self.meta_info) self._make_local(local_ds, time_range, region, var_names, monitor) return local_ds @@ -340,9 +358,9 @@ def update_local(self, time_range: TimeRangeLike.TYPE, monitor: Monitor = Monitor.NONE) -> bool: - data_sources = query_data_sources(None, local_id) # type: Sequence['DataSource'] + data_sources = find_data_sources(None, id=local_id) # type: Sequence['DataSource'] data_source = next((ds for ds in data_sources if isinstance(ds, LocalDataSource) and - ds.name == local_id), None) # type: LocalDataSource + ds.id == local_id), None) # type: LocalDataSource if not data_source: raise ValueError("Couldn't find local DataSource", (local_id, data_sources)) @@ -446,6 +464,18 @@ def temporal_coverage(self, monitor: Monitor = Monitor.NONE) -> Optional[TimeRan def spatial_coverage(self): return self._spatial_coverage + @property + def data_store(self) -> DataStore: + return self._data_store + + @property + def id(self) -> str: + return self._id + + @property + def meta_info(self) -> OrderedDict: + return self._meta_info + @property def variables_info(self): return self._variables @@ -459,19 +489,7 @@ def _repr_html_(self): return '\n' \ '\n' \ '\n' \ - '
Name%s
Files%s
\n' % (html.escape(self._name), html.escape(' '.join(self._files))) - - @property - def data_store(self) -> DataStore: - return self._data_store - - @property - def name(self) -> str: - return self._name - - @property - def meta_info(self) -> OrderedDict: - return self._meta_info + '\n' % (html.escape(self._id), html.escape(' '.join(self._files))) def to_json_dict(self): """ @@ -480,7 +498,7 @@ def to_json_dict(self): :return: A JSON-serializable dictionary """ config = OrderedDict({ - 'name': self._name, + 'name': self._id, 'meta_data': { 'deprecated': 'to be merged with meta_info in the future', 'temporal_covrage': TimeRangeLike.format(self._temporal_coverage) if self._temporal_coverage else None, @@ -521,8 +539,8 @@ def from_json_dict(cls, json_dicts: dict, data_store: 'LocalDataStore') -> Optio file_details_length = len(files[0]) if file_details_length > 2: files_dict = OrderedDict((item[0], (parser.parse(item[1]).replace(microsecond=0), - parser.parse(item[2]).replace(microsecond=0)) - if item[1] and item[2] else None) for item in files) + parser.parse(item[2]).replace(microsecond=0)) + if item[1] and item[2] else None) for item in files) elif file_details_length > 0: files_dict = OrderedDict((item[0], parser.parse(item[1]).replace(microsecond=0)) if len(item) > 1 else (item[0], None) for item in files) @@ -533,13 +551,13 @@ def from_json_dict(cls, json_dicts: dict, data_store: 'LocalDataStore') -> Optio class LocalDataStore(DataStore): - def __init__(self, name: str, store_dir: str): - super().__init__(name) + def __init__(self, id: str, store_dir: str): + super().__init__(id, title='Local Data Sources') self._store_dir = store_dir self._data_sources = None - def add_pattern(self, name: str, files: Union[str, Sequence[str]] = None) -> 'DataSource': - data_source = self.create_data_source(name) + def add_pattern(self, data_source_id: str, files: Union[str, Sequence[str]] = None) -> 'DataSource': + data_source = self.create_data_source(data_source_id) if isinstance(files, str): files = [files] is_first_file = True @@ -551,46 +569,46 @@ def add_pattern(self, name: str, files: Union[str, Sequence[str]] = None) -> 'Da data_source.add_dataset(file) return data_source - def remove_data_source(self, name: str, remove_files: bool = True): - data_sources = self.query(name) + def remove_data_source(self, data_source_id: str, remove_files: bool = True): + data_sources = self.query(id=data_source_id) if not data_sources or len(data_sources) != 1: return data_source = data_sources[0] - file_name = os.path.join(self._store_dir, data_source.name + '.json') + file_name = os.path.join(self._store_dir, data_source.id + '.json') os.remove(file_name) if remove_files: - shutil.rmtree(os.path.join(self._store_dir, data_source.name)) + shutil.rmtree(os.path.join(self._store_dir, data_source.id)) self._data_sources.remove(data_source) - def create_data_source(self, name: str, region: PolygonLike.TYPE = None, + def create_data_source(self, data_source_id: str, region: PolygonLike.TYPE = None, reference_type: str = None, reference_name: str = None, time_range: TimeRangeLike.TYPE = None, var_names: VarNamesLike.TYPE = None, meta_info: OrderedDict = None, lock_file: bool = False): self._init_data_sources() - if not name.startswith('%s.' % self.name): - name = '%s.%s' % (self.name, name) - lock_filename = '{}.lock'.format(name) + if not data_source_id.startswith('%s.' % self.id): + data_source_id = '%s.%s' % (self.id, data_source_id) + lock_filename = '{}.lock'.format(data_source_id) lock_filepath = os.path.join(self._store_dir, lock_filename) existing_ds = None for ds in self._data_sources: - if ds.name == name: + if ds.id == data_source_id: if lock_file and os.path.isfile(lock_filepath): with open(lock_filepath, 'r') as lock_file: writer_pid = lock_file.readline() if psutil.pid_exists(int(writer_pid)): raise ValueError("Cannot access data source {}, another process is using it (pid:{}" - .format(ds.name, writer_pid)) + .format(ds.id, writer_pid)) # ds.temporal_coverage() == time_range and if ds.spatial_coverage() == region \ and ds.variables_info == var_names: existing_ds = ds break raise ValueError("Local data store '{}' already contains a data source named '{}'" - .format(self.name, name)) + .format(self.id, data_source_id)) if existing_ds: data_source = existing_ds else: - data_source = LocalDataSource(name, files=[], data_store=self, spatial_coverage=region, + data_source = LocalDataSource(data_source_id, files=[], data_store=self, spatial_coverage=region, reference_type=reference_type, reference_name=reference_name, meta_info=meta_info) if lock_file: @@ -606,16 +624,15 @@ def create_data_source(self, name: str, region: PolygonLike.TYPE = None, def data_store_path(self): return self._store_dir - def query(self, name=None, monitor: Monitor = Monitor.NONE) -> Sequence[LocalDataSource]: + def query(self, id: str = None, query_expr: str = None, monitor: Monitor = Monitor.NONE) -> Sequence[ + LocalDataSource]: self._init_data_sources() - if name: - result = [ds for ds in self._data_sources if ds.matches_filter(name)] - else: - result = self._data_sources - return result + if id or query_expr: + return [ds for ds in self._data_sources if ds.matches(id=id, query_expr=query_expr)] + return self._data_sources def __repr__(self): - return "LocalFilePatternDataStore(%s)" % repr(self.name) + return "LocalFilePatternDataStore(%s)" % repr(self.id) def _repr_html_(self): self._init_data_sources() @@ -625,7 +642,7 @@ def _repr_html_(self): row_count += 1 # noinspection PyProtectedMember rows.append('%s%s' % (row_count, data_source._repr_html_())) - return '

Contents of LocalFilePatternDataStore "%s"

%s
' % (self.name, '\n'.join(rows)) + return '

Contents of LocalFilePatternDataStore "%s"

%s
' % (self.id, '\n'.join(rows)) def _init_data_sources(self): if self._data_sources: @@ -643,14 +660,14 @@ def save_data_source(self, data_source, unlock: bool = False): self._save_data_source(data_source) if unlock: try: - os.remove(os.path.join(self._store_dir, '{}.lock'.format(data_source.name))) + os.remove(os.path.join(self._store_dir, '{}.lock'.format(data_source.id))) except FileNotFoundError: pass def _save_data_source(self, data_source): json_dict = data_source.to_json_dict() dump_kwargs = dict(indent=' ', default=self._json_default_serializer) - file_name = os.path.join(self._store_dir, data_source.name + '.json') + file_name = os.path.join(self._store_dir, data_source.id + '.json') with open(file_name, 'w') as fp: json.dump(json_dict, fp, **dump_kwargs) diff --git a/cate/webapi/websocket.py b/cate/webapi/websocket.py index 265baf17d..dbac6261c 100644 --- a/cate/webapi/websocket.py +++ b/cate/webapi/websocket.py @@ -29,7 +29,7 @@ from cate.conf import conf from cate.conf.defaults import GLOBAL_CONF_FILE, WEBAPI_USE_WORKSPACE_IMAGERY_CACHE -from cate.core.ds import DATA_STORE_REGISTRY, get_data_stores_path, query_data_sources +from cate.core.ds import DATA_STORE_REGISTRY, get_data_stores_path, find_data_sources from cate.core.op import OP_REGISTRY from cate.core.wsmanag import WorkspaceManager from cate.core.workspace import OpKwArgs @@ -108,9 +108,7 @@ def get_data_stores(self) -> list: data_stores = DATA_STORE_REGISTRY.get_data_stores() data_store_list = [] for data_store in data_stores: - data_store_list.append(dict(id=data_store.name, - name=data_store.name, - description='')) + data_store_list.append(dict(id=data_store.id, title=data_store.title)) return sorted(data_store_list, key=lambda ds: ds['name']) @@ -129,8 +127,8 @@ def get_data_sources(self, data_store_id: str, monitor: Monitor) -> list: data_sources = data_store.query(monitor=monitor) data_source_list = [] for data_source in data_sources: - data_source_list.append(dict(id=data_source.name, - name=data_source.name, + data_source_list.append(dict(id=data_source.id, + title=data_source.title, meta_info=data_source.meta_info)) return sorted(data_source_list, key=lambda ds: ds['name']) @@ -147,7 +145,7 @@ def get_ds_temporal_coverage(self, data_store_id: str, data_source_id: str, moni data_store = DATA_STORE_REGISTRY.get_data_store(data_store_id) if data_store is None: raise ValueError('Unknown data store: "%s"' % data_store_id) - data_sources = data_store.query(name=data_source_id) + data_sources = data_store.query(id=data_source_id) if not data_sources: raise ValueError('data source "%s" not found' % data_source_id) data_source = data_sources[0] @@ -176,7 +174,7 @@ def make_ds_local(self, :return: JSON-serializable list of 'local' data sources, sorted by name. """ with monitor.starting('Making data source local', 100): - data_sources = query_data_sources(name=data_source_name) + data_sources = find_data_sources(id=data_source_name) if not data_sources: raise ValueError('data source "%s" not found' % data_source_name) if len(data_sources) > 1: @@ -214,7 +212,7 @@ def add_local_datasource(self, data_source_name: str, file_path_pattern: str, mo raise ValueError('Unknown data store: "%s"' % 'local') with monitor.starting('Making data source local', 100): # TODO use monitor, while extracting metadata - data_store.add_pattern(name=data_source_name, files=file_path_pattern) + data_store.add_pattern(data_source_id=data_source_name, files=file_path_pattern) return self.get_data_sources('local', monitor=monitor.child(100)) def remove_local_datasource(self, data_source_name: str, remove_files: bool) -> list: diff --git a/doc/source/_static/uml/cdm_schema.png b/doc/source/_static/uml/cdm_schema.png index 84b00151a..53f85587e 100644 Binary files a/doc/source/_static/uml/cdm_schema.png and b/doc/source/_static/uml/cdm_schema.png differ diff --git a/doc/source/_static/uml/cdm_schema.svg b/doc/source/_static/uml/cdm_schema.svg index 46fc5e735..8843569f7 100644 --- a/doc/source/_static/uml/cdm_schema.svg +++ b/doc/source/_static/uml/cdm_schema.svg @@ -1,25 +1 @@ - - -Schemaname: strDimensionname: strAttributename: strdata_type: typevalue: objectVariablename: strdata_type: typeASchemacorresponds to specifcDatasettype.Both aDataSourceand aDatasetknow their schema.We will use schemas mostly to check operation applicabilityand do input validation. Therefore we will focus on thevariables and their dimensions expected in a dataset.For this reason we resign theGroupschema here, whichis defined as part of the netCDF 4 CDM.However, original grouping is retained by usingpath namesfor Variables and attributes.Note that a variable's value is not aproperty ofVariablebecause thisis aschemaof a variable.In the netCDF CDM, the only property of aDimensionislength. We makelengthan optional attribute. Remember that the "features"read from a Shapefile may have two dimensions(e.g. lat / lon) but no actuallength. However, adimension might have other attributes in this case,such asunits=degreesandcrs=WGS84.Attributes are meta-data. Since this is aschema,the attribute'svalueused here represents aninvariant across a given number of datasets instances.Ifvalueis not known, this still means that allthese datasets share an attribute with the givenname. \ No newline at end of file +Dot Executable: nullCannot find Graphviz. You should try@startumltestdot@endumlorjava -jar plantuml.jar -testdot \ No newline at end of file diff --git a/doc/source/_static/uml/cli.png b/doc/source/_static/uml/cli.png index 07855e3fa..c548a327a 100644 Binary files a/doc/source/_static/uml/cli.png and b/doc/source/_static/uml/cli.png differ diff --git a/doc/source/_static/uml/cli.svg b/doc/source/_static/uml/cli.svg index c7afc62d5..8843569f7 100644 --- a/doc/source/_static/uml/cli.svg +++ b/doc/source/_static/uml/cli.svg @@ -1,25 +1 @@ - - -argparseArgumentParserNamespaceCommandname_and_parser_kwargs(): tupleconfigure_parser(parser: argparse.ArgumentParser)execute(self, command_args: argparse.Namespace)ListCommandRunCommandDataStoreCommandWorkspaceCommandWorkspaceResourceCommandCommand patternglobalsCOMMAND_REGISTRY: listmain()COMMAND_REGISTRY is a mutable list.Cate-plugins can extend the CLI byadding new commands to it.<configure><execute with>3..N \ No newline at end of file +Dot Executable: nullCannot find Graphviz. You should try@startumltestdot@endumlorjava -jar plantuml.jar -testdot \ No newline at end of file diff --git a/doc/source/_static/uml/ds.png b/doc/source/_static/uml/ds.png index d554ffbbb..021e9d9e8 100644 Binary files a/doc/source/_static/uml/ds.png and b/doc/source/_static/uml/ds.png differ diff --git a/doc/source/_static/uml/ds.svg b/doc/source/_static/uml/ds.svg index 876f24da9..8843569f7 100644 --- a/doc/source/_static/uml/ds.svg +++ b/doc/source/_static/uml/ds.svg @@ -1,25 +1 @@ - - -globalsDATA_STORE_REGISTRY: DataStoreRegistryquery_data_sources(data_stores, constraints)open_dataset(data_source, name, time_range)Plugins can extendDATA_STORE_REGISTRYby adding new data storesDataStoreRegistryget_data_stores(): List[DataStore]get_data_store(name): DataStoreadd_data_store(DataStore): voidremove_data_store(DataStore): voidDataStorename: strquery(query_expr): DataSourceDataSourcename: strget_dataset_schema(): cdm.Schemaopen_dataset(name, time_range): xarray.Datasetsync(time_range)N<provide> \ No newline at end of file +Dot Executable: nullCannot find Graphviz. You should try@startumltestdot@endumlorjava -jar plantuml.jar -testdot \ No newline at end of file diff --git a/doc/source/_static/uml/modules.png b/doc/source/_static/uml/modules.png index 6aad0a970..9142d2fa8 100644 Binary files a/doc/source/_static/uml/modules.png and b/doc/source/_static/uml/modules.png differ diff --git a/doc/source/_static/uml/modules.svg b/doc/source/_static/uml/modules.svg index 1ffab8ac9..8843569f7 100644 --- a/doc/source/_static/uml/modules.svg +++ b/doc/source/_static/uml/modules.svg @@ -1,25 +1 @@ - - -CCI_Toolbox_Corecate.dscate.opscate.corecate.clicate.webapiCCI_Toolbox_GUIcate.desktopCCI_Open_Data_Portalesa_cci_odpesa_cci_ftplocalmore data store modules...coregistrationcorrelationnormalizeioplotresamplingselectsubsettimeseriesmore operation modules...dsmonitorobjectiooppluginworkflowworkspacecli_mainwebapi_maindesktop_mainFTPHTTPOPeNDAPmore services... \ No newline at end of file +Dot Executable: nullCannot find Graphviz. You should try@startumltestdot@endumlorjava -jar plantuml.jar -testdot \ No newline at end of file diff --git a/doc/source/_static/uml/monitor.png b/doc/source/_static/uml/monitor.png index 1c2f35902..ea72073d4 100644 Binary files a/doc/source/_static/uml/monitor.png and b/doc/source/_static/uml/monitor.png differ diff --git a/doc/source/_static/uml/monitor.svg b/doc/source/_static/uml/monitor.svg index 9180f4871..8843569f7 100644 --- a/doc/source/_static/uml/monitor.svg +++ b/doc/source/_static/uml/monitor.svg @@ -1,25 +1 @@ - - -MonitorNULL: Monitorstart(self, label: str, total_work: float)progress(self, work: float = None, msg: str)done(self)child(self, work: float): Monitorcancel()is_cancelled(): boolConsoleMonitorChildMonitorparent_monitor1 \ No newline at end of file +Dot Executable: nullCannot find Graphviz. You should try@startumltestdot@endumlorjava -jar plantuml.jar -testdot \ No newline at end of file diff --git a/doc/source/_static/uml/objectio.png b/doc/source/_static/uml/objectio.png index 21133613a..4ff124604 100644 Binary files a/doc/source/_static/uml/objectio.png and b/doc/source/_static/uml/objectio.png differ diff --git a/doc/source/_static/uml/objectio.svg b/doc/source/_static/uml/objectio.svg index 6a9467693..8843569f7 100644 --- a/doc/source/_static/uml/objectio.svg +++ b/doc/source/_static/uml/objectio.svg @@ -1,25 +1 @@ - - -cate.ops.ioTextObjectIOJsonObjectIONetcdf4ObjectIONetcdf3ObjectIOObjectIORegistryobject_io_list: Listfind_reader(file_obj, format, ext): ObjectIOfind_writer(obj, format, ext): ObjectIOObjectIOdescription: strformat_name: strfilename_ext: strread_fitness(file_obj): intread(file_obj)write_fitness(obj): intwrite(obj, file_path)globalsfind_reader(file_obj, format_name):find_writer(obj, file_path, format_name)read_object(file_obj, format_name)writer_object(obj, file_path, format_name)OBJECT_IO_REGISTRY: WriterRegistryOBJECT_IO_REGISTRY contains all knowndata reader and writer instances.N \ No newline at end of file +Dot Executable: nullCannot find Graphviz. You should try@startumltestdot@endumlorjava -jar plantuml.jar -testdot \ No newline at end of file diff --git a/doc/source/_static/uml/op.png b/doc/source/_static/uml/op.png index 68a10282c..726a62b44 100644 Binary files a/doc/source/_static/uml/op.png and b/doc/source/_static/uml/op.png differ diff --git a/doc/source/_static/uml/op.svg b/doc/source/_static/uml/op.svg index ba45595e9..8843569f7 100644 --- a/doc/source/_static/uml/op.svg +++ b/doc/source/_static/uml/op.svg @@ -1,25 +1 @@ - - -globalsOP_REGISTRY: OpRegistry@op(**properties)@op_input(name, **properties)@op_output(name, **properties)Plugins can extend OP_REGISTRYby adding new operations to it.To register a Python function orclass, the @op, @op_input,@op_output decorators are used.OpRegistryget_op(name): Operationadd_op(operation): Operationremove_op(operation): OperationOperationoperation: object__call__(*args, **kwargs): objectOpMetaInfoqualified_name : strheader : OrderedDictinput : OrderedDictoutput : OrderedDictNop_meta_info \ No newline at end of file +Dot Executable: nullCannot find Graphviz. You should try@startumltestdot@endumlorjava -jar plantuml.jar -testdot \ No newline at end of file diff --git a/doc/source/_static/uml/plugin.png b/doc/source/_static/uml/plugin.png index 034df09ab..10122d262 100644 Binary files a/doc/source/_static/uml/plugin.png and b/doc/source/_static/uml/plugin.png differ diff --git a/doc/source/_static/uml/plugin.svg b/doc/source/_static/uml/plugin.svg index 51a3adeac..8843569f7 100644 --- a/doc/source/_static/uml/plugin.svg +++ b/doc/source/_static/uml/plugin.svg @@ -1,25 +1 @@ - - -globalsPLUGIN_REGISTRY: Dict[str, callable]_load_plugins()When the plugin module is imported,_load_plugins() is invoked andPLUGIN_REGISTRY contains all loadedplugins.PLUGIN_REGISTRY is a mappingof entry point names to somecallable Python object. \ No newline at end of file +Dot Executable: nullCannot find Graphviz. You should try@startumltestdot@endumlorjava -jar plantuml.jar -testdot \ No newline at end of file diff --git a/doc/source/_static/uml/webapi_seq.png b/doc/source/_static/uml/webapi_seq.png index 1337229f9..2eab17737 100644 Binary files a/doc/source/_static/uml/webapi_seq.png and b/doc/source/_static/uml/webapi_seq.png differ diff --git a/doc/source/_static/uml/webapi_seq.svg b/doc/source/_static/uml/webapi_seq.svg index cd61ebf79..c79c66c23 100644 --- a/doc/source/_static/uml/webapi_seq.svg +++ b/doc/source/_static/uml/webapi_seq.svg @@ -1,25 +1 @@ - - -Cate Frontend/Backend communication using the "React" state transition modelFrontendBackendUserUserFrontendViewFrontendViewFrontendControllerFrontendControllerFrontendListenerFrontendListenerFrontendStoreFrontendStoreBackendControllerBackendControllerBackendStoreBackendStore* click! *dispatch(action)dispatch(action)dispatch(action)store_action(action)get_new_actions(timestamp): timestamp, actionsGetting new actionsis done asynchronouslyfrom a background threadthat polls e.g. all 50msfor all actionsdispatch(action)notify(new_state)render() \ No newline at end of file +Cate Frontend/Backend communication using the "React" state transition modelFrontendBackendUserUserFrontendViewFrontendViewFrontendControllerFrontendControllerFrontendListenerFrontendListenerFrontendStoreFrontendStoreBackendControllerBackendControllerBackendStoreBackendStore* click! *dispatch(action)dispatch(action)dispatch(action)store_action(action)get_new_actions(timestamp): timestamp, actionsGetting new actionsis done asynchronouslyfrom a background threadthat polls e.g. all 50msfor all actionsdispatch(action)notify(new_state)render() \ No newline at end of file diff --git a/doc/source/_static/uml/workflow.png b/doc/source/_static/uml/workflow.png index 14698845b..6a1a86b32 100644 Binary files a/doc/source/_static/uml/workflow.png and b/doc/source/_static/uml/workflow.png differ diff --git a/doc/source/_static/uml/workflow.svg b/doc/source/_static/uml/workflow.svg index c9e8c65a4..8843569f7 100644 --- a/doc/source/_static/uml/workflow.svg +++ b/doc/source/_static/uml/workflow.svg @@ -1,25 +1 @@ - - -cate.core.opOpMetaInfoqualified_name : strheader : dictinput : dictoutput : dictOperationNodeid : strinput : Dict[str, NodePort]output : Dict[str, NodePort]from_json(json_dict)to_json_dict() : dictinvoke(monitor, **args)Workflowfrom_json(json_dict)to_json_dict() : dictinvoke(monitor, **args)StepWorkflowStepresource : strOpStepop_registration : OperationExprStepexpression : strSubProcessSteparguments : List[str]NoOpStepCollection pattern:a Workflow is a Node,and aggregates Steps,which are Nodes as well.parentworkflowop_meta_infosteps0..N \ No newline at end of file +Dot Executable: nullCannot find Graphviz. You should try@startumltestdot@endumlorjava -jar plantuml.jar -testdot \ No newline at end of file diff --git a/doc/source/_static/uml/workflow_node_port.png b/doc/source/_static/uml/workflow_node_port.png index 039353afe..fcf3f84b6 100644 Binary files a/doc/source/_static/uml/workflow_node_port.png and b/doc/source/_static/uml/workflow_node_port.png differ diff --git a/doc/source/_static/uml/workflow_node_port.svg b/doc/source/_static/uml/workflow_node_port.svg index 353f5add2..8843569f7 100644 --- a/doc/source/_static/uml/workflow_node_port.svg +++ b/doc/source/_static/uml/workflow_node_port.svg @@ -1,25 +1 @@ - - -Nodeid : strNodePortnode: Nodename: strsource: NodePortvalue: objectinput0..Noutput0..N \ No newline at end of file +Dot Executable: nullCannot find Graphviz. You should try@startumltestdot@endumlorjava -jar plantuml.jar -testdot \ No newline at end of file diff --git a/doc/source/_static/uml/workflow_seq.png b/doc/source/_static/uml/workflow_seq.png index 7117af1af..76c623ffd 100644 Binary files a/doc/source/_static/uml/workflow_seq.png and b/doc/source/_static/uml/workflow_seq.png differ diff --git a/doc/source/_static/uml/workflow_seq.svg b/doc/source/_static/uml/workflow_seq.svg index f08f96a2b..bcb183868 100644 --- a/doc/source/_static/uml/workflow_seq.svg +++ b/doc/source/_static/uml/workflow_seq.svg @@ -1,25 +1 @@ - - -UserUserWorkflowWorkflowOpStepOpStepoperationoperationinvoke()for all stepsinvoke()_call_() \ No newline at end of file +UserUserWorkflowWorkflowOpStepOpStepoperationoperationinvoke()for all stepsinvoke()_call_() \ No newline at end of file diff --git a/doc/source/_static/uml/workspace.png b/doc/source/_static/uml/workspace.png index 82f173bff..cf7d7612f 100644 Binary files a/doc/source/_static/uml/workspace.png and b/doc/source/_static/uml/workspace.png differ diff --git a/doc/source/_static/uml/workspace.svg b/doc/source/_static/uml/workspace.svg index c84922056..8843569f7 100644 --- a/doc/source/_static/uml/workspace.svg +++ b/doc/source/_static/uml/workspace.svg @@ -1,25 +1 @@ - - -cate.corecate.webapiWorkspaceManagerinit_workspace()set_workspace_resource()write_workspace_resource()FSWorkspaceManagerWorkspaceWorkflowOpStepWebAPIWorkspaceManagerWebService<communicate><uses>workspace_cache0..N110..N \ No newline at end of file +Dot Executable: nullCannot find Graphviz. You should try@startumltestdot@endumlorjava -jar plantuml.jar -testdot \ No newline at end of file diff --git a/doc/source/about.rst b/doc/source/about.rst index 7ada63916..420ca4641 100644 --- a/doc/source/about.rst +++ b/doc/source/about.rst @@ -72,9 +72,9 @@ to get an overview of the supported sub-comands. For example, use::: $ cate ds list -to list and query available data sources. +to list available data sources. -In the GUI, the panel **DATA SOURCES** lets you query and open all available data sources. +In the GUI, the panel **DATA SOURCES** lets you query and open available data sources. Note that all remote CCI data source identifiers are prefixed by "esacci.", for example ``esacci.SST.day.L4.SSTdepth.multi-sensor.multi-platform.OSTIA.1-0.r1``. Local data source identifiers are diff --git a/doc/source/api_reference.rst b/doc/source/api_reference.rst index ca12a9ab9..ea92d2167 100644 --- a/doc/source/api_reference.rst +++ b/doc/source/api_reference.rst @@ -5,7 +5,7 @@ API Reference Datasets ======== -.. autofunction:: cate.core.query_data_sources +.. autofunction:: cate.core.find_data_sources .. autofunction:: cate.core.open_dataset diff --git a/doc/source/uml/ds.puml b/doc/source/uml/ds.puml index c336ef429..a5f58cd17 100644 --- a/doc/source/uml/ds.puml +++ b/doc/source/uml/ds.puml @@ -2,7 +2,7 @@ object globals { DATA_STORE_REGISTRY: DataStoreRegistry - query_data_sources(data_stores, constraints) + find_data_sources(data_stores, id, query_expr) open_dataset(data_source, name, time_range) } @@ -20,14 +20,14 @@ class DataStoreRegistry { } interface DataStore { - name: str - {abstract} query(query_expr): DataSource + id: str + {abstract} query(id, query_expr): DataSource } interface DataSource { - name: str + id: str {abstract} get_dataset_schema(): cdm.Schema - {abstract} open_dataset(name, time_range): xarray.Dataset + {abstract} open_dataset(id, time_range): xarray.Dataset {abstract} sync(time_range) } diff --git a/doc/source/user_manual/um_cli.rst b/doc/source/user_manual/um_cli.rst index ab31f5f0c..08f0bb629 100644 --- a/doc/source/user_manual/um_cli.rst +++ b/doc/source/user_manual/um_cli.rst @@ -73,19 +73,19 @@ The following examples shall help you understand the basic concepts behind the v Manage datasets --------------- -To query all available datasets, type:: +To list all available data sources, type:: cate ds list -To query all datasets that have ``ozone`` in their name, type:: +To query all data sources that have ``ozone`` in their name, type:: cate ds list -n ozone -To get more detailed information on a specific dataset, e.g. ``esacci.OZONE.mon.L3...``, type:: +To get more detailed information on a specific data source, e.g. ``esacci.OZONE.mon.L3...``, type:: cate ds info esacci.OZONE.mon.L3.NP.multi-sensor.multi-platform.MERGED.fv0002.r1 -To add a local Dataset from all netCDF files in e.g. ``data/sst_v3`` and name it e.g. ``SSTV3``, type:: +To add a local data source from all NetCDF files in e.g. ``data/sst_v3`` and name it e.g. ``SSTV3``, type:: cate ds def SSTV3 data/sst_v3/*.nc @@ -93,12 +93,12 @@ Make sure it is there:: cate ds list -n SSTV3 -To make a temporal subset ECV dataset locally available, i.e. avoid remote data access during its usage:: +To make a temporal subset ECV data source locally available, i.e. avoid remote data access during its usage:: cate ds copy esacci.OZONE.mon.L3.NP.multi-sensor.multi-platform.MERGED.fv0002.r1 -t 2006-01-01,2007-12-31 -The section :doc:`um_config` describes, how to configure the directory where ``cate`` stores such synchronised -data. +The section :doc:`um_config` describes, how to configure the location of directory in which +Cate stores such synchronised data. Inspect available operations ---------------------------- diff --git a/notebooks/cate-uc06.ipynb b/notebooks/cate-uc06.ipynb index 91259006f..70c52c685 100644 --- a/notebooks/cate-uc06.ipynb +++ b/notebooks/cate-uc06.ipynb @@ -530,7 +530,7 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 3.0 }, "file_extension": ".py", "mimetype": "text/x-python", @@ -541,5 +541,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/notebooks/cate-uc09.ipynb b/notebooks/cate-uc09.ipynb index 108a7b2db..1da458b1b 100644 --- a/notebooks/cate-uc09.ipynb +++ b/notebooks/cate-uc09.ipynb @@ -524,7 +524,9 @@ "collapsed": true }, "outputs": [], - "source": [] + "source": [ + "" + ] } ], "metadata": { @@ -536,7 +538,7 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 3.0 }, "file_extension": ".py", "mimetype": "text/x-python", @@ -547,5 +549,5 @@ } }, "nbformat": 4, - "nbformat_minor": 1 -} + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/test/core/test_ds.py b/test/core/test_ds.py index 928d33cfb..c5de81fb4 100644 --- a/test/core/test_ds.py +++ b/test/core/test_ds.py @@ -1,4 +1,4 @@ -from typing import Sequence, Any +from typing import Sequence, Any, Optional from unittest import TestCase, skipIf import os.path as op import os @@ -14,33 +14,38 @@ class SimpleDataStore(ds.DataStore): - def __init__(self, name: str, data_sources: Sequence[ds.DataSource]): - super().__init__(name) + def __init__(self, id: str, data_sources: Sequence[ds.DataSource]): + super().__init__(id, title='Simple Test Store') self._data_sources = list(data_sources) - def query(self, name=None, monitor: Monitor = Monitor.NONE) -> Sequence[ds.DataSource]: - return [ds for ds in self._data_sources if ds.matches_filter(name)] + def query(self, id: str = None, query_expr: str = None, monitor: Monitor = Monitor.NONE) -> Sequence[ds.DataSource]: + return [ds for ds in self._data_sources if ds.matches(id=id)] def _repr_html_(self): return '' class SimpleDataSource(ds.DataSource): - def __init__(self, name: str): - self._name = name + def __init__(self, id: str, meta_info: dict = None): + self._id = id self._data_store = None + self._meta_info = meta_info @property def data_store(self) -> ds.DataStore: return self.data_store @property - def schema(self) -> ds.Schema: + def schema(self) -> Optional[ds.Schema]: return None @property - def name(self) -> str: - return self._name + def id(self) -> str: + return self._id + + @property + def meta_info(self) -> Optional[dict]: + return self._meta_info def open_dataset(self, time_range: TimeRangeLike.TYPE = None, @@ -59,10 +64,10 @@ def make_local(self, return None def __repr__(self): - return "SimpleDataSource(%s)" % repr(self._name) + return "SimpleDataSource(%s)" % repr(self._id) def _repr_html_(self): - return self._name + return self._id class InMemoryDataSource(SimpleDataSource): @@ -88,14 +93,22 @@ def _repr_html_(self): class IOTest(TestCase): def setUp(self): self.DS_AEROSOL = SimpleDataSource('aerosol') - self.DS_OZONE = SimpleDataSource('ozone') + self.DS_OZONE = SimpleDataSource('ozone', meta_info=dict(title='This is pure Ozone')) self.TEST_DATA_STORE = SimpleDataStore('test_aero_ozone', [self.DS_AEROSOL, self.DS_OZONE]) self.DS_AEROSOL._data_store = self.TEST_DATA_STORE self.DS_OZONE._data_store = self.TEST_DATA_STORE self.DS_SST = SimpleDataSource('sst') self.TEST_DATA_STORE_SST = SimpleDataStore('test_sst', [self.DS_SST]) - def test_query_data_sources_default_data_store(self): + def test_title(self): + self.assertEqual(self.DS_AEROSOL.title, None) + self.assertEqual(self.DS_OZONE.title, 'This is pure Ozone') + + def test_meta_info(self): + self.assertEqual(self.DS_AEROSOL.meta_info, None) + self.assertEqual(self.DS_OZONE.meta_info, dict(title='This is pure Ozone')) + + def test_find_data_sources_default_data_store(self): size_before = len(ds.DATA_STORE_REGISTRY) orig_stores = list(ds.DATA_STORE_REGISTRY.get_data_stores()) try: @@ -106,16 +119,16 @@ def test_query_data_sources_default_data_store(self): set_default_data_store_ftp() self.assertEqual(1, len(ds.DATA_STORE_REGISTRY)) - data_sources = ds.query_data_sources() + data_sources = ds.find_data_sources() self.assertIsNotNone(data_sources) self.assertEqual(len(data_sources), 98) - self.assertEqual(data_sources[0].name, "AEROSOL_ATSR2_SU_L3_V4.2_DAILY") + self.assertEqual(data_sources[0].id, "AEROSOL_ATSR2_SU_L3_V4.2_DAILY") - data_sources = ds.query_data_sources(name="AEROSOL_ATSR2_SU_L3_V4.2_DAILY") + data_sources = ds.find_data_sources(id="AEROSOL_ATSR2_SU_L3_V4.2_DAILY") self.assertIsNotNone(data_sources) self.assertEqual(len(data_sources), 1) - data_sources = ds.query_data_sources(name="ZZ") + data_sources = ds.find_data_sources(id="ZZ") self.assertIsNotNone(data_sources) self.assertEqual(len(data_sources), 0) finally: @@ -124,34 +137,34 @@ def test_query_data_sources_default_data_store(self): ds.DATA_STORE_REGISTRY.add_data_store(data_store) self.assertEqual(size_before, len(ds.DATA_STORE_REGISTRY)) - def test_query_data_sources_with_data_store_value(self): - data_sources = ds.query_data_sources(data_stores=self.TEST_DATA_STORE) + def test_find_data_sources_with_data_store_value(self): + data_sources = ds.find_data_sources(data_stores=self.TEST_DATA_STORE) self.assertIsNotNone(data_sources) self.assertEqual(len(data_sources), 2) - self.assertEqual(data_sources[0].name, "aerosol") - self.assertEqual(data_sources[1].name, "ozone") + self.assertEqual(data_sources[0].id, "aerosol") + self.assertEqual(data_sources[1].id, "ozone") - def test_query_data_sources_with_data_store_list(self): + def test_find_data_sources_with_data_store_list(self): data_stores = [self.TEST_DATA_STORE, self.TEST_DATA_STORE_SST] - data_sources = ds.query_data_sources(data_stores=data_stores) + data_sources = ds.find_data_sources(data_stores=data_stores) self.assertIsNotNone(data_sources) self.assertEqual(len(data_sources), 3) - self.assertEqual(data_sources[0].name, "aerosol") - self.assertEqual(data_sources[1].name, "ozone") - self.assertEqual(data_sources[2].name, "sst") + self.assertEqual(data_sources[0].id, "aerosol") + self.assertEqual(data_sources[1].id, "ozone") + self.assertEqual(data_sources[2].id, "sst") - def test_query_data_sources_with_constrains(self): - data_sources = ds.query_data_sources(data_stores=self.TEST_DATA_STORE, name="aerosol") + def test_find_data_sources_with_constrains(self): + data_sources = ds.find_data_sources(data_stores=self.TEST_DATA_STORE, id="aerosol") self.assertIsNotNone(data_sources) self.assertEqual(len(data_sources), 1) - self.assertEqual(data_sources[0].name, "aerosol") + self.assertEqual(data_sources[0].id, "aerosol") - data_sources = ds.query_data_sources(data_stores=self.TEST_DATA_STORE, name="ozone") + data_sources = ds.find_data_sources(data_stores=self.TEST_DATA_STORE, id="ozone") self.assertIsNotNone(data_sources) self.assertEqual(len(data_sources), 1) - self.assertEqual(data_sources[0].name, "ozone") + self.assertEqual(data_sources[0].id, "ozone") - data_sources = ds.query_data_sources(data_stores=self.TEST_DATA_STORE, name="Z") + data_sources = ds.find_data_sources(data_stores=self.TEST_DATA_STORE, id="Z") self.assertIsNotNone(data_sources) self.assertEqual(len(data_sources), 1) diff --git a/test/ds/test_esa_cci_ftp.py b/test/ds/test_esa_cci_ftp.py index 472317205..888bc194c 100644 --- a/test/ds/test_esa_cci_ftp.py +++ b/test/ds/test_esa_cci_ftp.py @@ -41,12 +41,12 @@ def setUp(self): data_store = FileSetDataStore.from_json('test', 'TEST_ROOT_DIR', FileSetDataSourceTest.JSON) self.assertIsNotNone(data_store) self.assertEqual(2, len(data_store._data_sources)) - self.assertEqual('test', data_store.name) + self.assertEqual('test', data_store.id) self.ds0 = data_store._data_sources[0] self.ds1 = data_store._data_sources[1] def test_from_json(self): - self.assertEqual('AEROSOL_ATSR2_SU_L3_V4.2_DAILY', self.ds0.name) + self.assertEqual('AEROSOL_ATSR2_SU_L3_V4.2_DAILY', self.ds0.id) json_dict = self.ds0.to_json_dict() self.assertEqual('aerosol/data/ATSR2_SU/L3/v4.2/DAILY', json_dict['base_dir']) @@ -59,7 +59,7 @@ def test_from_json(self): self.assertEqual(2631, fileset_info['num_files']) self.assertEqual(42338, fileset_info['size_in_mb']) - self.assertEqual('AEROSOL_ATSR2_SU_L3_V4.21_MONTHLY', self.ds1.name) + self.assertEqual('AEROSOL_ATSR2_SU_L3_V4.21_MONTHLY', self.ds1.id) json_dict = self.ds1.to_json_dict() self.assertEqual('aerosol/data/ATSR2_SU/L3/v4.21/MONTHLY', json_dict['base_dir']) diff --git a/test/ds/test_esa_cci_odp.py b/test/ds/test_esa_cci_odp.py index 5206dc2d7..1ac9cf47f 100644 --- a/test/ds/test_esa_cci_odp.py +++ b/test/ds/test_esa_cci_odp.py @@ -40,6 +40,10 @@ class EsaCciOdpDataStoreTest(unittest.TestCase): def setUp(self): self.data_store = _create_test_data_store() + def test_id_and_title(self): + self.assertEqual(self.data_store.id, 'test-odp') + self.assertEqual(self.data_store.title, 'ESA CCI Open Data Portal') + def test_query(self): data_sources = self.data_store.query() self.assertIsNotNone(data_sources) @@ -127,18 +131,18 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size except: raise ValueError(reference_path, os.listdir(reference_path)) - self.assertEqual(new_ds.name, 'local.local_ds_test') + self.assertEqual(new_ds.id, 'local.local_ds_test') self.assertEqual(new_ds.temporal_coverage(), (datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59))) - self.data_source.update_local(new_ds.name, (datetime.datetime(1978, 11, 15, 00, 00), + self.data_source.update_local(new_ds.id, (datetime.datetime(1978, 11, 15, 00, 00), datetime.datetime(1978, 11, 16, 23, 59))) self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert( (datetime.datetime(1978, 11, 15, 0, 0), datetime.datetime(1978, 11, 16, 23, 59)))) - self.data_source.update_local(new_ds.name, (datetime.datetime(1978, 11, 14, 00, 00), + self.data_source.update_local(new_ds.id, (datetime.datetime(1978, 11, 14, 00, 00), datetime.datetime(1978, 11, 15, 23, 59))) self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert( (datetime.datetime(1978, 11, 14, 0, 0), @@ -152,7 +156,7 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size new_ds_w_one_variable = self.data_source.make_local( 'local_ds_test_2', None, (datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59)), None, ['sm']) - self.assertEqual(new_ds_w_one_variable.name, 'local.local_ds_test_2') + self.assertEqual(new_ds_w_one_variable.id, 'local.local_ds_test_2') ds = new_ds_w_one_variable.open_dataset() self.assertSetEqual(set(ds.variables), {'sm', 'lat', 'lon', 'time'}) @@ -160,7 +164,7 @@ def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size 'from_local_to_local_region', None, (datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59)), "10,10,20,20", ['sm']) # type: LocalDataSource - self.assertEqual(new_ds_w_region.name, 'local.from_local_to_local_region') + self.assertEqual(new_ds_w_region.id, 'local.from_local_to_local_region') self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20")) data_set = new_ds_w_region.open_dataset() self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'}) @@ -170,7 +174,7 @@ def test_data_store(self): self.data_store) def test_id(self): - self.assertEqual(self.data_source.name, + self.assertEqual(self.data_source.id, 'esacci.OC.day.L3S.K_490.multi-sensor.multi-platform.MERGED.1-0.r2') def test_schema(self): diff --git a/test/ds/test_local.py b/test/ds/test_local.py index bb8599c9f..d70839de9 100644 --- a/test/ds/test_local.py +++ b/test/ds/test_local.py @@ -35,7 +35,7 @@ def test_add_pattern(self): self.assertEqual(len(data_sources), 2) new_ds = self.data_store.add_pattern("a_name", "a_pat") - self.assertEqual('test.a_name', new_ds.name) + self.assertEqual('test.a_name', new_ds.id) data_sources = self.data_store.query() self.assertEqual(len(data_sources), 3) @@ -144,11 +144,11 @@ def test_data_store(self): self.assertIs(self.ds4.data_store, self._dummy_store) def test_id(self): - self.assertEqual(self.ds1.name, 'ozone') - self.assertEqual(self.ds2.name, 'aerosol') - self.assertEqual(self.empty_ds.name, 'empty') - self.assertEqual(self.ds3.name, 'w_temporal_1') - self.assertEqual(self.ds4.name, 'w_temporal_2') + self.assertEqual(self.ds1.id, 'ozone') + self.assertEqual(self.ds2.id, 'aerosol') + self.assertEqual(self.empty_ds.id, 'empty') + self.assertEqual(self.ds3.id, 'w_temporal_1') + self.assertEqual(self.ds4.id, 'w_temporal_2') def test_schema(self): self.assertEqual(self.ds1.schema, None) @@ -249,12 +249,12 @@ def test_make_local(self): new_ds = data_source.make_local('from_local_to_local', None, (datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59))) - self.assertEqual(new_ds.name, 'local.from_local_to_local') + self.assertEqual(new_ds.id, 'local.from_local_to_local') self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert( (datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59)))) - data_source.update_local(new_ds.name, (datetime.datetime(1978, 11, 15, 00, 00), + data_source.update_local(new_ds.id, (datetime.datetime(1978, 11, 15, 00, 00), datetime.datetime(1978, 11, 16, 23, 59))) self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert( (datetime.datetime(1978, 11, 15, 0, 0), @@ -269,7 +269,7 @@ def test_make_local(self): (datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59)), None, ['sm']) - self.assertEqual(new_ds_w_one_variable.name, 'local.from_local_to_local_var') + self.assertEqual(new_ds_w_one_variable.id, 'local.from_local_to_local_var') data_set = new_ds_w_one_variable.open_dataset() self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'}) @@ -277,7 +277,7 @@ def test_make_local(self): (datetime.datetime(1978, 11, 14, 0, 0), datetime.datetime(1978, 11, 15, 23, 59)), "10,10,20,20", ['sm']) # type: LocalDataSource - self.assertEqual(new_ds_w_region.name, 'local.from_local_to_local_region') + self.assertEqual(new_ds_w_region.id, 'local.from_local_to_local_region') self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20")) data_set = new_ds_w_region.open_dataset() self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'})