From b014051d8ae9a1001796111eb7a51d99e85cfbe1 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Thu, 11 Apr 2019 12:33:52 +0200 Subject: [PATCH 01/20] pyDKB: introduce pyDKB.storages submodule. Sometimes we have to interact with the same storage in different scripts. When it happens we got to implement same functionality multiple times: read configuration, check that Python module is available, create client, ... This submodule is a place to implement this once and then reuse whenever it is needed. Initial module structire: ``` pyDKB |---> storages |---> exceptions | |---> StorageException | |---> NotFound | |---> client |---> Client ``` --- Utils/Dataflow/pyDKB/storages/__init__.py | 6 +++ .../Dataflow/pyDKB/storages/client/Client.py | 37 +++++++++++++++++++ .../pyDKB/storages/client/__init__.py | 5 +++ Utils/Dataflow/pyDKB/storages/exceptions.py | 25 +++++++++++++ 4 files changed, 73 insertions(+) create mode 100644 Utils/Dataflow/pyDKB/storages/__init__.py create mode 100644 Utils/Dataflow/pyDKB/storages/client/Client.py create mode 100644 Utils/Dataflow/pyDKB/storages/client/__init__.py create mode 100644 Utils/Dataflow/pyDKB/storages/exceptions.py diff --git a/Utils/Dataflow/pyDKB/storages/__init__.py b/Utils/Dataflow/pyDKB/storages/__init__.py new file mode 100644 index 000000000..33787547a --- /dev/null +++ b/Utils/Dataflow/pyDKB/storages/__init__.py @@ -0,0 +1,6 @@ +""" +pyDKB.storages +""" + +import exceptions +import client diff --git a/Utils/Dataflow/pyDKB/storages/client/Client.py b/Utils/Dataflow/pyDKB/storages/client/Client.py new file mode 100644 index 000000000..f3ec64a85 --- /dev/null +++ b/Utils/Dataflow/pyDKB/storages/client/Client.py @@ -0,0 +1,37 @@ +""" +pyDKB.storages.client.Client +""" + +from pyDKB.common import LoggableObject + + +class Client(LoggableObject): + """ Interface class for external and internal DKB storage clients. """ + + # Storage client + c = None + + def __init__(self): + """ Initialize Storage object. """ + raise NotImplementedError + + def configure(self, cfg): + """ Apply storage configuration (initialize client). + + :param cfg: configuration parameters + :type cfg: dict + """ + raise NotImplementedError + + def get(self, id, **kwargs): + """ Get object / record from storage by ID. + + Raise ``NotFound`` exception if object / record not found. + + :param id: object / record identfier + :type id: str, int + + :return: record with given ID + :rtype: dict + """ + raise NotImplementedError diff --git a/Utils/Dataflow/pyDKB/storages/client/__init__.py b/Utils/Dataflow/pyDKB/storages/client/__init__.py new file mode 100644 index 000000000..6fdcd4c5b --- /dev/null +++ b/Utils/Dataflow/pyDKB/storages/client/__init__.py @@ -0,0 +1,5 @@ +""" +pyDKB.storages.client +""" + +from Client import Client diff --git a/Utils/Dataflow/pyDKB/storages/exceptions.py b/Utils/Dataflow/pyDKB/storages/exceptions.py new file mode 100644 index 000000000..8eb7492b9 --- /dev/null +++ b/Utils/Dataflow/pyDKB/storages/exceptions.py @@ -0,0 +1,25 @@ +""" +pyDKB.storages.exceptions +""" + + +class StorageException(Exception): + """ Base exception for all storage-related exceptions. """ + pass + + +class NotFound(StorageException): + """ Exeption indicating that record with given ID not found. """ + + def __init__(self, **kwargs): + """ Initialize exception. + + :param kwargs: record primary key parameters + :type kwargs: dict + """ + message = "Record not found" + if kwargs: + params = [': '.join((key, '%r' % kwargs[key])) for key in kwargs] + params = ', '.join(params) + message = message + ' (%s)' % params + super(NotFound, self).__init__(message) From fcce139ea2c17834a1964dcaa0dec552944ac5e0 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Wed, 26 Jun 2019 12:49:57 +0200 Subject: [PATCH 02/20] pyDKB/storages: add mechanics to get clients for known storages. Suggested way of the `pyDKB.storages` usage is: ``` from pyDKB import storages def processA(...): rucio_client = storages.getClient('rucio', 'atlas') rucio_client.get_metadata(...) ... def processB(...): rucio_client = storages.getClient('rucio', 'atlas') rucio_client.get_metadata(...) ... ``` The client used in `processA` and `processB` (and at every call of a function) will be the same instance, initialized at first call. In other words, client classes for individual storages are "singleton" classes (as long as used via `getClient()` methods, not directly). "Scope" abstraction is introduced to keep project-specific fucntionality localised in a single submodule, preventing it from infiltration into other, more general modules. Each "scope" is nothing but a submodule of `pyDKB.storages`. It must contain pre-configured clients for the storages, used in a given project and support general access method to these clients (`getClient()` function). --- Utils/Dataflow/pyDKB/storages/__init__.py | 52 +++++++++++++++++++ .../Dataflow/pyDKB/storages/atlas/__init__.py | 3 ++ Utils/Dataflow/pyDKB/storages/atlas/rucio.py | 25 +++++++++ 3 files changed, 80 insertions(+) create mode 100644 Utils/Dataflow/pyDKB/storages/atlas/__init__.py create mode 100644 Utils/Dataflow/pyDKB/storages/atlas/rucio.py diff --git a/Utils/Dataflow/pyDKB/storages/__init__.py b/Utils/Dataflow/pyDKB/storages/__init__.py index 33787547a..4990d5644 100644 --- a/Utils/Dataflow/pyDKB/storages/__init__.py +++ b/Utils/Dataflow/pyDKB/storages/__init__.py @@ -2,5 +2,57 @@ pyDKB.storages """ +import importlib + + import exceptions import client + + +def getScope(scope): + """ Initialize storages scope for further usage. + + :param scope: scope name + :type scope: str + """ + try: + full_name = __name__ + "." + scope + scope = importlib.import_module(full_name) + except ImportError: + raise exceptions.StorageException("Scope not defined: '%s'" % scope) + return scope + + +def getClient(name, scope=None): + """ Get client for a given storage. + + Raise ``StorageException`` if failed to get client by given name and scope. + + :param name: storage name + :type name: str + :param scope: scope name. If not specified, default value set with + `setScope()` is used + :type scope: str, NoneType + + :return: storage client + :rtype: client.Client + """ + if scope: + scope = getScope(scope) + else: + raise exceptions.StorageException("Storages scope not specified") + cur_scope = scope + for n in name.split('.'): + try: + new_scope = getattr(cur_scope, n, None) + if new_scope is None: + new_scope_name = cur_scope.__name__ + "." + n + new_scope = importlib.import_module(new_scope_name) + cur_scope = new_scope + except ImportError: + raise exceptions.StorageException("Storage not defined in scope " + "'%s': '%s'" + % (scope.__name__.split('.')[-1], + name)) + client = cur_scope.getClient() + return client diff --git a/Utils/Dataflow/pyDKB/storages/atlas/__init__.py b/Utils/Dataflow/pyDKB/storages/atlas/__init__.py new file mode 100644 index 000000000..298900e21 --- /dev/null +++ b/Utils/Dataflow/pyDKB/storages/atlas/__init__.py @@ -0,0 +1,3 @@ +""" +pyDKB.storages.atlas +""" diff --git a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py new file mode 100644 index 000000000..95bdbac21 --- /dev/null +++ b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py @@ -0,0 +1,25 @@ +""" +pyDKB.storages.atlas.rucio +""" + +from ..client import Client + + +_client = None + + +def _initClient(): + """ Initialize client. """ + global _client + _client = RucioClient() + + +def getClient(): + """ Get Rucio client. """ + if not _client: + _initClient() + return _client + + +class RucioClient(Client): + pass From d36a7065347816a46b4bfba6bfef132b2fb01890 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Wed, 26 Jun 2019 13:14:18 +0200 Subject: [PATCH 03/20] pyDKB/storages: add possibility to set default scope. It allows to get clients simply by name, not specifying the scope every now and again: ``` from pyDKB import storages storages.setScope('atlas') def processA(...): s = storages.getClient('rucio') s.get_metadata(...) ``` --- Utils/Dataflow/pyDKB/storages/__init__.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Utils/Dataflow/pyDKB/storages/__init__.py b/Utils/Dataflow/pyDKB/storages/__init__.py index 4990d5644..14af6662c 100644 --- a/Utils/Dataflow/pyDKB/storages/__init__.py +++ b/Utils/Dataflow/pyDKB/storages/__init__.py @@ -9,6 +9,19 @@ import client +_scope = None + + +def setScope(scope): + """ Set default scope to look for storages. + + :param scope: scope name + :type scope: str + """ + global _scope + _scope = getScope(scope) + + def getScope(scope): """ Initialize storages scope for further usage. @@ -40,6 +53,8 @@ def getClient(name, scope=None): if scope: scope = getScope(scope) else: + scope = _scope + if scope is None: raise exceptions.StorageException("Storages scope not specified") cur_scope = scope for n in name.split('.'): From c48b02bf8dbace97a947b67bf620ccd724021ac7 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Wed, 10 Jul 2019 14:58:34 +0200 Subject: [PATCH 04/20] pyDKB/storages: remove reference to client object from the interface class. The idea of the interface to provide common set of methods to interact with different storages, not to (re-)implement all possible methods of interaction with storages. But different storages may have specific methods, and calling them as `my_client.c.client_method_A()` will look a bit wierd: `my_client.client_method_A()` looks more natural. To avoid re-implementation of all useful methods in this way: ``` def client_methodA(self, *args, **kwargs): self.c.client_method_A(*args, **kwargs) ``` multiple inheritance will be used: ``` class MyClient(pyDKB.storages.client.Client, ParentClientClass): ... ``` In case of `ParentClientClass` having same methods as Client, by default `Client` (interface) method will be used, raising `NotImplementedError`. If `ParentClientClass` method should be used, it is to be specified explicitly. --- Utils/Dataflow/pyDKB/storages/client/Client.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/Utils/Dataflow/pyDKB/storages/client/Client.py b/Utils/Dataflow/pyDKB/storages/client/Client.py index f3ec64a85..fbe19b439 100644 --- a/Utils/Dataflow/pyDKB/storages/client/Client.py +++ b/Utils/Dataflow/pyDKB/storages/client/Client.py @@ -8,9 +8,6 @@ class Client(LoggableObject): """ Interface class for external and internal DKB storage clients. """ - # Storage client - c = None - def __init__(self): """ Initialize Storage object. """ raise NotImplementedError From 1e073dd549ab013dbf56ad94de92e0ac35f7f5cc Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Wed, 10 Jul 2019 14:55:52 +0200 Subject: [PATCH 05/20] pyDKB/storages: add "bare" wrapper for `atlas.rucio` client. --- Utils/Dataflow/pyDKB/common/misc.py | 23 +++++++++++++++ Utils/Dataflow/pyDKB/storages/atlas/rucio.py | 31 ++++++++++++++++++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index 08a7e8963..bebd54174 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -7,6 +7,7 @@ import sys import inspect from datetime import datetime +import importlib from types import logLevel @@ -56,3 +57,25 @@ def log(message, level=logLevel.INFO, *args): out_message += "\n(==) %s" % l out_message += "\n" sys.stderr.write(out_message) + + +def try_to_import(modname, attrname=None): + """ Try to import specified module or attribute from a module. + + If module/attribute can not be imported, catch the exception and output log + message. + """ + result = False + try: + result = importlib.import_module(modname) + if attrname: + result = getattr(result, attrname) + except ImportError, err: + log("Failed to import '%s'.\nDetails: %s" % (modname, err), + logLevel.ERROR) + except AttributeError: + if attrname: + log("Failed to import '%s' from '%s'" % (attrname, modname)) + except Exception, err: + log(str(err), logLevel.ERROR) + return result diff --git a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py index 95bdbac21..58da402fc 100644 --- a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py +++ b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py @@ -2,7 +2,24 @@ pyDKB.storages.atlas.rucio """ +import os + from ..client import Client +from ..exceptions import StorageException +from pyDKB.common.misc import (log, logLevel) +from pyDKB.common.misc import try_to_import + + +if not os.environ.get("VIRTUAL_ENV", None): + user_rucio_dir = os.path.expanduser("~/.rucio") + if os.path.exists(user_rucio_dir): + os.environ["VIRTUAL_ENV"] = os.path.join(user_rucio_dir) + else: + os.environ["VIRTUAL_ENV"] = os.path.join(base_dir, ".rucio") + log("Set VIRTUAL_ENV: %s" % os.environ["VIRTUAL_ENV"], logLevel.INFO) + +_RucioClient = try_to_import('rucio.client', 'Client') +RucioException = try_to_import('rucio.common.exception', 'RucioException') _client = None @@ -11,6 +28,9 @@ def _initClient(): """ Initialize client. """ global _client + if not _RucioClient: + raise StorageException("Failed to initialize Rucio client: required " + "module(s) not loaded.") _client = RucioClient() @@ -21,5 +41,12 @@ def getClient(): return _client -class RucioClient(Client): - pass +ParentClientClass = _RucioClient if _RucioClient else object + + +class RucioClient(Client, ParentClientClass): + """ Implement common interface for Rucio client. """ + + def __init__(self, *args, **kwargs): + """ Initialize parent client class. """ + ParentClientClass.__init__(self, *args, **kwargs) From 9dd2f5e6fdff8093c6183fd75e74609c9f40e623 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Wed, 10 Jul 2019 16:38:34 +0200 Subject: [PATCH 06/20] pyDKB/storages: codestyle fix (reserved word). "id" is a reserved word in Python, so it should not be used as variable or parameter name. --- Utils/Dataflow/pyDKB/storages/client/Client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Utils/Dataflow/pyDKB/storages/client/Client.py b/Utils/Dataflow/pyDKB/storages/client/Client.py index fbe19b439..9d2dd754f 100644 --- a/Utils/Dataflow/pyDKB/storages/client/Client.py +++ b/Utils/Dataflow/pyDKB/storages/client/Client.py @@ -20,13 +20,13 @@ def configure(self, cfg): """ raise NotImplementedError - def get(self, id, **kwargs): + def get(self, oid, **kwargs): """ Get object / record from storage by ID. Raise ``NotFound`` exception if object / record not found. - :param id: object / record identfier - :type id: str, int + :param oid: object / record identfier + :type oid: str, int :return: record with given ID :rtype: dict From 560285734512757b5ae23028a0ad481cb42e84a1 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Wed, 10 Jul 2019 16:31:17 +0200 Subject: [PATCH 07/20] pyDKB/storages: implement `RucioClient.get()` method. Original Rucio `Client` does not have `get()` method. --- Utils/Dataflow/pyDKB/storages/atlas/rucio.py | 60 +++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py index 58da402fc..ab7632ab0 100644 --- a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py +++ b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py @@ -5,7 +5,7 @@ import os from ..client import Client -from ..exceptions import StorageException +from ..exceptions import (StorageException, NotFound) from pyDKB.common.misc import (log, logLevel) from pyDKB.common.misc import try_to_import @@ -50,3 +50,61 @@ class RucioClient(Client, ParentClientClass): def __init__(self, *args, **kwargs): """ Initialize parent client class. """ ParentClientClass.__init__(self, *args, **kwargs) + + def get(self, oid, **kwargs): + """ Get dataset metadata. + + Implementation of interface method `Clent.get()`. + + :param oid: dataset name + :type oid: str + :param fields: list of requested metadata fields + (None = all metadata) + :type fields: list + + :return: dataset metadata + :rtype: dict + """ + scope, name = self._scope_and_name(oid) + try: + result = self.get_metadata(scope=scope, name=name) + except ValueError, err: + raise StorageException("Failed to get metadata from Rucio: %s" + % err) + except RucioException, err: + if 'Data identifier not found' in str(err): + raise NotFound(scope=scope, name=name) + raise StorageException("Failed to get metadata from Rucio: %s" + % err) + if kwargs.get('fields') is not None: + result = {f: result.get(f, None) for f in kwargs['fields']} + return result + + def _scope_and_name(self, dsn): + """ Construct normalized scope and dataset name. + + As input accepts dataset names in two forms: + * dot-separated string: ".[.<...>]"; + * dot-separated string with prefix: ":.[.<...>]". + + In first case ID is taken as a canonical dataset name and scope is set + to its first field (or two first fields, if the ID starts with 'user' + or 'group'). + In second case prefix is taken as scope, and removed from ID to get the + canonical dataset name. + + :param dsn: dataset name + :type dsn: str + + :return: scope, datasetname + :rtype: tuple + """ + result = dsn.split(':') + if len(result) < 2: + splitted = dsn.split('.') + if dsn.startswith('user') or dsn.startswith('group'): + scope = '.'.join(splitted[0:2]) + else: + scope = splitted[0] + result = (scope, dsn) + return result From b8ddd92ba464c08ced67fec398d7998cd634d179 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Thu, 11 Jul 2019 14:19:57 +0200 Subject: [PATCH 08/20] pyDKB/storages: improve docstrings. --- Utils/Dataflow/pyDKB/storages/atlas/rucio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py index ab7632ab0..ab77a65b9 100644 --- a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py +++ b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py @@ -48,7 +48,7 @@ class RucioClient(Client, ParentClientClass): """ Implement common interface for Rucio client. """ def __init__(self, *args, **kwargs): - """ Initialize parent client class. """ + """ Initialize instance as parent client class object. """ ParentClientClass.__init__(self, *args, **kwargs) def get(self, oid, **kwargs): From ea6f358fe600d9b01c78fd9554a426ad8d2524df Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Thu, 11 Jul 2019 15:34:20 +0200 Subject: [PATCH 09/20] pyDKB/storages: add "bare" interface for ES client. Currently it is nothing but a wrapper around standard `elasticsearch.Elasticsearch` class, except that its `get()` method is overridden with `Client.get()` that raises `NotImplementedError`. --- Utils/Dataflow/pyDKB/storages/client/es.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 Utils/Dataflow/pyDKB/storages/client/es.py diff --git a/Utils/Dataflow/pyDKB/storages/client/es.py b/Utils/Dataflow/pyDKB/storages/client/es.py new file mode 100644 index 000000000..7d0438495 --- /dev/null +++ b/Utils/Dataflow/pyDKB/storages/client/es.py @@ -0,0 +1,21 @@ +""" +pyDKB.storages.client.es +""" + +from Client import Client +from pyDKB.common.misc import try_to_import + + +_ESClient = try_to_import('elasticsearch', 'Elasticsearch') + +ParentClientClass = _ESClient if _ESClient else object + + +class ESClient(Client, ParentClientClass): + """ Implement common interface for ES client. """ + + index = None + + def __init__(self, *args, **kwargs): + """ Initialize instance as parent client class object. """ + ParentClientClass.__init__(self, *args, **kwargs) From 04a2721db9eb07f95ee263857740f5da3ec96499 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Thu, 11 Jul 2019 15:48:43 +0200 Subject: [PATCH 10/20] pyDKB/storages: add implementation of `ESClient.configure()` method. It allows to pass configuration to the client as hash. `__init__()` accepts parameters in the form consistent with the `elasticsearch.Elasticsearch()` parameters, so it is not possible to pass default index name when the object is created. --- Utils/Dataflow/pyDKB/storages/client/es.py | 54 ++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/Utils/Dataflow/pyDKB/storages/client/es.py b/Utils/Dataflow/pyDKB/storages/client/es.py index 7d0438495..d27448d91 100644 --- a/Utils/Dataflow/pyDKB/storages/client/es.py +++ b/Utils/Dataflow/pyDKB/storages/client/es.py @@ -4,6 +4,7 @@ from Client import Client from pyDKB.common.misc import try_to_import +from pyDKB.common.types import logLevel _ESClient = try_to_import('elasticsearch', 'Elasticsearch') @@ -19,3 +20,56 @@ class ESClient(Client, ParentClientClass): def __init__(self, *args, **kwargs): """ Initialize instance as parent client class object. """ ParentClientClass.__init__(self, *args, **kwargs) + + def configure(self, cfg): + """ Apply configuration. + + Configuration parameters: + hosts (str) -- comma separated list of 'host:port' records + host (str) -- host name or IP (single) (ignored if hosts defined) + port (str) -- host port (ignored if hosts defined) + index (str) -- default index name + user (str) + passwd (str) + + :param cfg: configuration parameters + :type cfg: dict + """ + kwargs = {} + + hosts = None + host = {} + if cfg.get('hosts'): + hosts = [h.strip() for h in cfg['hosts'].split(',')] + if cfg.get('host'): + if cfg.get('hosts'): + self.log("Configuration parameter ignored: 'host' ('hosts' " + "specified)") + else: + host['host'] = cfg['host'] + if cfg.get('port'): + if cfg.get('hosts'): + self.log("Configuration parameter ignored: 'port' ('hosts' " + "specified)") + else: + host['port'] = cfg['port'] + if hosts or host: + kwargs['hosts'] = hosts if hosts else [host] + + if cfg.get('user'): + auth = (cfg['user'], ) + if cfg.get('passwd'): + auth += (cfg['passwd'], ) + else: + self.log("Configuration parameter missed: 'passwd' ('user' " + "specified)", logLevel.WARN) + kwargs['http_auth'] = auth + elif cfg.get('passwd'): + self.log("Configuration parameter ignored: 'passwd' ('user' " + "not specified)") + + if cfg.get('index'): + self.index = cfg['index'] + + # Re-initialize self as parent client class instance + ParentClientClass.__init__(self, **kwargs) From b7df5cd98b92f3cbedc9272168a2b4ce984e5714 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 6 Aug 2019 11:19:12 +0200 Subject: [PATCH 11/20] pyDKB/storages: bug fix (undefined variable). --- Utils/Dataflow/pyDKB/storages/atlas/rucio.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py index ab77a65b9..f5092d4db 100644 --- a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py +++ b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py @@ -11,6 +11,7 @@ if not os.environ.get("VIRTUAL_ENV", None): + base_dir = os.path.abspath(os.path.dirname(__file__)) user_rucio_dir = os.path.expanduser("~/.rucio") if os.path.exists(user_rucio_dir): os.environ["VIRTUAL_ENV"] = os.path.join(user_rucio_dir) From 2f6bb07fea2fee194419a7e3056ec77c9b587746 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 6 Aug 2019 11:21:33 +0200 Subject: [PATCH 12/20] pyDKB/storages: fix docstrings. --- Utils/Dataflow/pyDKB/storages/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/storages/exceptions.py b/Utils/Dataflow/pyDKB/storages/exceptions.py index 8eb7492b9..a0312c9e4 100644 --- a/Utils/Dataflow/pyDKB/storages/exceptions.py +++ b/Utils/Dataflow/pyDKB/storages/exceptions.py @@ -9,7 +9,7 @@ class StorageException(Exception): class NotFound(StorageException): - """ Exeption indicating that record with given ID not found. """ + """ Exception indicating that record with given ID not found. """ def __init__(self, **kwargs): """ Initialize exception. From 5c32433518495a31af9e11d9e2c21f5b2fec5e62 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Tue, 13 Aug 2019 11:49:15 +0200 Subject: [PATCH 13/20] pyDKB/storages: typo fix. --- Utils/Dataflow/pyDKB/storages/client/Client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/storages/client/Client.py b/Utils/Dataflow/pyDKB/storages/client/Client.py index 9d2dd754f..bb9a5d8b8 100644 --- a/Utils/Dataflow/pyDKB/storages/client/Client.py +++ b/Utils/Dataflow/pyDKB/storages/client/Client.py @@ -25,7 +25,7 @@ def get(self, oid, **kwargs): Raise ``NotFound`` exception if object / record not found. - :param oid: object / record identfier + :param oid: object / record identifier :type oid: str, int :return: record with given ID From 1d3c5cdb99c45c23d29e6a2e1f225e86176e2ff2 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 2 Sep 2019 12:26:16 +0200 Subject: [PATCH 14/20] pyDKB/storages: update exception description. --- Utils/Dataflow/pyDKB/storages/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/storages/exceptions.py b/Utils/Dataflow/pyDKB/storages/exceptions.py index a0312c9e4..44392d26a 100644 --- a/Utils/Dataflow/pyDKB/storages/exceptions.py +++ b/Utils/Dataflow/pyDKB/storages/exceptions.py @@ -9,7 +9,7 @@ class StorageException(Exception): class NotFound(StorageException): - """ Exception indicating that record with given ID not found. """ + """ Exception indicating that record with given ID is not found. """ def __init__(self, **kwargs): """ Initialize exception. From 9111b75db2113bf202d8e4fc0328490176c768a4 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 2 Sep 2019 12:29:53 +0200 Subject: [PATCH 15/20] pyDKB/storages: update docs to use Sphinx :raise XXX: markdown. --- Utils/Dataflow/pyDKB/storages/__init__.py | 2 +- Utils/Dataflow/pyDKB/storages/client/Client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Utils/Dataflow/pyDKB/storages/__init__.py b/Utils/Dataflow/pyDKB/storages/__init__.py index 14af6662c..33385497e 100644 --- a/Utils/Dataflow/pyDKB/storages/__init__.py +++ b/Utils/Dataflow/pyDKB/storages/__init__.py @@ -39,7 +39,7 @@ def getScope(scope): def getClient(name, scope=None): """ Get client for a given storage. - Raise ``StorageException`` if failed to get client by given name and scope. + :raise StorageException: failed to get client by given name and scope :param name: storage name :type name: str diff --git a/Utils/Dataflow/pyDKB/storages/client/Client.py b/Utils/Dataflow/pyDKB/storages/client/Client.py index bb9a5d8b8..fd472919c 100644 --- a/Utils/Dataflow/pyDKB/storages/client/Client.py +++ b/Utils/Dataflow/pyDKB/storages/client/Client.py @@ -23,7 +23,7 @@ def configure(self, cfg): def get(self, oid, **kwargs): """ Get object / record from storage by ID. - Raise ``NotFound`` exception if object / record not found. + :raise NotFound: object / record is not found :param oid: object / record identifier :type oid: str, int From 37de23183d84cacde82abae3eb2936f3619b5c19 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Mon, 16 Sep 2019 14:01:45 +0300 Subject: [PATCH 16/20] pyDKB/storages: typo fix. Co-Authored-By: Evildoor --- Utils/Dataflow/pyDKB/storages/atlas/rucio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py index f5092d4db..97dffb820 100644 --- a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py +++ b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py @@ -55,7 +55,7 @@ def __init__(self, *args, **kwargs): def get(self, oid, **kwargs): """ Get dataset metadata. - Implementation of interface method `Clent.get()`. + Implementation of interface method `Client.get()`. :param oid: dataset name :type oid: str From 6360f5d485387729122505ac4b8785e86653e10e Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Thu, 9 Jan 2020 14:47:08 +0100 Subject: [PATCH 17/20] pyDKB/misc: fix missed attribute handling in `try_to_import()`. --- Utils/Dataflow/pyDKB/common/misc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index bebd54174..074fd542d 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -75,7 +75,9 @@ def try_to_import(modname, attrname=None): logLevel.ERROR) except AttributeError: if attrname: - log("Failed to import '%s' from '%s'" % (attrname, modname)) + log("Failed to import '%s' from '%s'" % (attrname, modname), + logLevel.ERROR) + result = False except Exception, err: log(str(err), logLevel.ERROR) return result From 5891f413452b37a43b9f7b84a2d5bfe3c9d917f4 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Fri, 10 Jan 2020 11:05:05 +0100 Subject: [PATCH 18/20] pyDKB/misc: improve error handling in `try_to_import()`. There's only two possibilities: an exception was or was not thrown. Whatever exception it is, it indicates that we failed to import something we wanted to import => so we must return a value that indicates "import has failed" -- `False`. --- Utils/Dataflow/pyDKB/common/misc.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index 074fd542d..5d6903784 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -65,19 +65,18 @@ def try_to_import(modname, attrname=None): If module/attribute can not be imported, catch the exception and output log message. """ - result = False + if attrname: + err_msg = "Failed to import '%s' from '%s'.\nDetails: " \ + % (attrname, modname) + else: + err_msg = "Failed to import module '%s'.\nDetails: " % (modname) + try: result = importlib.import_module(modname) if attrname: result = getattr(result, attrname) - except ImportError, err: - log("Failed to import '%s'.\nDetails: %s" % (modname, err), - logLevel.ERROR) - except AttributeError: - if attrname: - log("Failed to import '%s' from '%s'" % (attrname, modname), - logLevel.ERROR) - result = False except Exception, err: - log(str(err), logLevel.ERROR) + log(err_msg + str(err), logLevel.ERROR) + result = False + return result From 2f06b83e237c0bd19751ab1117395eac08d1de15 Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Fri, 10 Jan 2020 11:08:40 +0100 Subject: [PATCH 19/20] pyDKB/misc: add params and return value description to `try_to_import()`. --- Utils/Dataflow/pyDKB/common/misc.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index 5d6903784..44a1c0d6b 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -64,6 +64,15 @@ def try_to_import(modname, attrname=None): If module/attribute can not be imported, catch the exception and output log message. + + :param modname: module name + :type modname: str + :param attrname: attribute name (optional) + :type attrname: str + + :return: imported module, attribute (or submodule); + `False` in case of failure. + :rtype: object """ if attrname: err_msg = "Failed to import '%s' from '%s'.\nDetails: " \ From 10ac41a40ba73a5c4467b375ae32bb27c2d714fa Mon Sep 17 00:00:00 2001 From: Marina Golosova Date: Fri, 10 Jan 2020 11:17:37 +0100 Subject: [PATCH 20/20] pyDKB/misc: introduce special return vale for `try_to_import()`. Who knows what value has this or that attribute one wants to import, right? And `False` looks like a pretty possible one... while string "NOT IMPORTED VALUE" should be less expected and I believe it won't conflict with real attributes values (except itself, but hopefully no one will use this function to import something added as service variable for this function itself). --- Utils/Dataflow/pyDKB/common/misc.py | 7 +++++-- Utils/Dataflow/pyDKB/storages/atlas/rucio.py | 4 ++-- Utils/Dataflow/pyDKB/storages/client/es.py | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Utils/Dataflow/pyDKB/common/misc.py b/Utils/Dataflow/pyDKB/common/misc.py index 44a1c0d6b..407241712 100644 --- a/Utils/Dataflow/pyDKB/common/misc.py +++ b/Utils/Dataflow/pyDKB/common/misc.py @@ -14,6 +14,9 @@ # Datetime format for log messages DTFORMAT = '%Y-%m-%d %H:%M:%S' +# Special value for `try_to_import()` to indicate failure +NOT_IMPORTED = 'NOT IMPORTED VALUE' + def log(message, level=logLevel.INFO, *args): """ Output log message with given log level. @@ -71,7 +74,7 @@ def try_to_import(modname, attrname=None): :type attrname: str :return: imported module, attribute (or submodule); - `False` in case of failure. + ``NOT_IMPORTED`` in case of failure. :rtype: object """ if attrname: @@ -86,6 +89,6 @@ def try_to_import(modname, attrname=None): result = getattr(result, attrname) except Exception, err: log(err_msg + str(err), logLevel.ERROR) - result = False + result = NOT_IMPORTED return result diff --git a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py index 97dffb820..df450820f 100644 --- a/Utils/Dataflow/pyDKB/storages/atlas/rucio.py +++ b/Utils/Dataflow/pyDKB/storages/atlas/rucio.py @@ -7,7 +7,7 @@ from ..client import Client from ..exceptions import (StorageException, NotFound) from pyDKB.common.misc import (log, logLevel) -from pyDKB.common.misc import try_to_import +from pyDKB.common.misc import (try_to_import, NOT_IMPORTED) if not os.environ.get("VIRTUAL_ENV", None): @@ -29,7 +29,7 @@ def _initClient(): """ Initialize client. """ global _client - if not _RucioClient: + if _RucioClient is NOT_IMPORTED: raise StorageException("Failed to initialize Rucio client: required " "module(s) not loaded.") _client = RucioClient() diff --git a/Utils/Dataflow/pyDKB/storages/client/es.py b/Utils/Dataflow/pyDKB/storages/client/es.py index d27448d91..44f63e23a 100644 --- a/Utils/Dataflow/pyDKB/storages/client/es.py +++ b/Utils/Dataflow/pyDKB/storages/client/es.py @@ -3,13 +3,13 @@ """ from Client import Client -from pyDKB.common.misc import try_to_import +from pyDKB.common.misc import (try_to_import, NOT_IMPORTED) from pyDKB.common.types import logLevel _ESClient = try_to_import('elasticsearch', 'Elasticsearch') -ParentClientClass = _ESClient if _ESClient else object +ParentClientClass = _ESClient if _ESClient is not NOT_IMPORTED else object class ESClient(Client, ParentClientClass):