Skip to content

Commit

Permalink
ADD: Repository methods for repo CLI and other features (#5156)
Browse files Browse the repository at this point in the history
Methods added and modifications:

* Added key_format property, necessary to check compatibility when
 operating between different repositories.
* Added has_objects as new abstract and adapted has_object
* Added delete_objects as new abstract and adapted delete_object
* Added list_objects method
* While modifying the inherited classes I removed some of the docstrings
 since these should be anyways inherited from the parent class and were
 just adding visual noise and maintenance costs.
  • Loading branch information
ramirezfranciscof authored Oct 4, 2021
1 parent 2c5cc6f commit 2a5d776
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 74 deletions.
34 changes: 14 additions & 20 deletions aiida/backends/general/migrations/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import os
import pathlib
import re
import typing
from typing import Dict, Iterable, List, Optional, Union

from disk_objectstore import Container
from disk_objectstore.utils import LazyOpener
Expand All @@ -42,8 +42,8 @@ def __init__(
self,
name: str = '',
file_type: FileType = FileType.DIRECTORY,
key: typing.Union[str, None, LazyOpener] = None,
objects: typing.Dict[str, 'File'] = None
key: Union[str, None, LazyOpener] = None,
objects: Dict[str, 'File'] = None
):
# pylint: disable=super-init-not-called
if not isinstance(name, str):
Expand Down Expand Up @@ -86,43 +86,37 @@ class NoopRepositoryBackend(AbstractRepositoryBackend):
"""

@property
def uuid(self) -> typing.Optional[str]:
def uuid(self) -> Optional[str]:
"""Return the unique identifier of the repository.
.. note:: A sandbox folder does not have the concept of a unique identifier and so always returns ``None``.
"""
return None

def initialise(self, **kwargs) -> None:
"""Initialise the repository if it hasn't already been initialised.
@property
def key_format(self) -> Optional[str]:
return None

:param kwargs: parameters for the initialisation.
"""
def initialise(self, **kwargs) -> None:
raise NotImplementedError()

@property
def is_initialised(self) -> bool:
"""Return whether the repository has been initialised."""
return True

def erase(self):
raise NotImplementedError()

def _put_object_from_filelike(self, handle: io.BufferedIOBase) -> str:
"""Store the byte contents of a file in the repository.
:param handle: filelike object with the byte content to be stored.
:return: the generated fully qualified identifier for the object within the repository.
:raises TypeError: if the handle is not a byte stream.
"""
return LazyOpener(handle.name)

def has_object(self, key: str) -> bool:
"""Return whether the repository has an object with the given key.
def has_objects(self, keys: List[str]) -> List[bool]:
raise NotImplementedError()

:param key: fully qualified identifier for the object within the repository.
:return: True if the object exists, False otherwise.
"""
def delete_objects(self, keys: List[str]) -> None:
raise NotImplementedError()

def list_objects(self) -> Iterable[str]:
raise NotImplementedError()


Expand Down
61 changes: 52 additions & 9 deletions aiida/repository/backend/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import hashlib
import io
import pathlib
import typing
from typing import BinaryIO, Iterable, Iterator, List, Optional, Union

from aiida.common.hashing import chunked_file_hash

Expand All @@ -30,9 +30,19 @@ class AbstractRepositoryBackend(metaclass=abc.ABCMeta):

@property
@abc.abstractmethod
def uuid(self) -> typing.Optional[str]:
def uuid(self) -> Optional[str]:
"""Return the unique identifier of the repository."""

@property
@abc.abstractmethod
def key_format(self) -> Optional[str]:
"""Return the format for the keys of the repository.
Important for when migrating between backends (e.g. archive -> main), as if they are not equal then it is
necessary to re-compute all the `Node.repository_metadata` before importing (otherwise they will not match
with the repository).
"""

@abc.abstractmethod
def initialise(self, **kwargs) -> None:
"""Initialise the repository if it hasn't already been initialised.
Expand All @@ -58,7 +68,7 @@ def erase(self) -> None:
def is_readable_byte_stream(handle) -> bool:
return hasattr(handle, 'read') and hasattr(handle, 'mode') and 'b' in handle.mode

def put_object_from_filelike(self, handle: typing.BinaryIO) -> str:
def put_object_from_filelike(self, handle: BinaryIO) -> str:
"""Store the byte contents of a file in the repository.
:param handle: filelike object with the byte content to be stored.
Expand All @@ -70,10 +80,10 @@ def put_object_from_filelike(self, handle: typing.BinaryIO) -> str:
return self._put_object_from_filelike(handle)

@abc.abstractmethod
def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str:
def _put_object_from_filelike(self, handle: BinaryIO) -> str:
pass

def put_object_from_file(self, filepath: typing.Union[str, pathlib.Path]) -> str:
def put_object_from_file(self, filepath: Union[str, pathlib.Path]) -> str:
"""Store a new object with contents of the file located at `filepath` on this file system.
:param filepath: absolute path of file whose contents to copy to the repository.
Expand All @@ -84,15 +94,33 @@ def put_object_from_file(self, filepath: typing.Union[str, pathlib.Path]) -> str
return self.put_object_from_filelike(handle)

@abc.abstractmethod
def has_objects(self, keys: List[str]) -> List[bool]:
"""Return whether the repository has an object with the given key.
:param keys:
list of fully qualified identifiers for objects within the repository.
:return:
list of logicals, in the same order as the keys provided, with value True if the respective
object exists and False otherwise.
"""

def has_object(self, key: str) -> bool:
"""Return whether the repository has an object with the given key.
:param key: fully qualified identifier for the object within the repository.
:return: True if the object exists, False otherwise.
"""
return self.has_objects([key])[0]

@abc.abstractmethod
def list_objects(self) -> Iterable[str]:
"""Return iterable that yields all available objects by key.
:return: An iterable for all the available object keys.
"""

@contextlib.contextmanager
def open(self, key: str) -> typing.Iterator[typing.BinaryIO]:
def open(self, key: str) -> Iterator[BinaryIO]:
"""Open a file handle to an object stored under the given key.
.. note:: this should only be used to open a handle to read an existing file. To write a new file use the method
Expand Down Expand Up @@ -130,12 +158,27 @@ def get_object_hash(self, key: str) -> str:
with self.open(key) as handle: # pylint: disable=not-context-manager
return chunked_file_hash(handle, hashlib.sha256)

def delete_object(self, key: str):
@abc.abstractmethod
def delete_objects(self, keys: List[str]) -> None:
"""Delete the objects from the repository.
:param keys: list of fully qualified identifiers for the objects within the repository.
:raise FileNotFoundError: if any of the files does not exist.
:raise OSError: if any of the files could not be deleted.
"""
keys_exist = self.has_objects(keys)
if not all(keys_exist):
error_message = 'some of the keys provided do not correspond to any object in the repository:\n'
for indx, key_exists in enumerate(keys_exist):
if not key_exists:
error_message += f' > object with key `{keys[indx]}` does not exist.\n'
raise FileNotFoundError(error_message)

def delete_object(self, key: str) -> None:
"""Delete the object from the repository.
:param key: fully qualified identifier for the object within the repository.
:raise FileNotFoundError: if the file does not exist.
:raise OSError: if the file could not be deleted.
"""
if not self.has_object(key):
raise FileNotFoundError(f'object with key `{key}` does not exist.')
return self.delete_objects([key])
36 changes: 16 additions & 20 deletions aiida/repository/backend/disk_object_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""Implementation of the ``AbstractRepositoryBackend`` using the ``disk-objectstore`` as the backend."""
import contextlib
import shutil
import typing
from typing import BinaryIO, Iterable, Iterator, List, Optional

from disk_objectstore import Container

Expand All @@ -27,12 +27,16 @@ def __str__(self) -> str:
return 'DiskObjectStoreRepository: <uninitialised>'

@property
def uuid(self) -> typing.Optional[str]:
def uuid(self) -> Optional[str]:
"""Return the unique identifier of the repository."""
if not self.is_initialised:
return None
return self.container.container_id

@property
def key_format(self) -> Optional[str]:
return self.container.hash_type

def initialise(self, **kwargs) -> None:
"""Initialise the repository if it hasn't already been initialised.
Expand All @@ -56,25 +60,20 @@ def erase(self):
except FileNotFoundError:
pass

def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str:
def _put_object_from_filelike(self, handle: BinaryIO) -> str:
"""Store the byte contents of a file in the repository.
:param handle: filelike object with the byte content to be stored.
:return: the generated fully qualified identifier for the object within the repository.
:raises TypeError: if the handle is not a byte stream.
"""
return self.container.add_object(handle.read())
return self.container.add_streamed_object(handle)

def has_object(self, key: str) -> bool:
"""Return whether the repository has an object with the given key.
:param key: fully qualified identifier for the object within the repository.
:return: True if the object exists, False otherwise.
"""
return self.container.has_object(key)
def has_objects(self, keys: List[str]) -> List[bool]:
return self.container.has_objects(keys)

@contextlib.contextmanager
def open(self, key: str) -> typing.Iterator[typing.BinaryIO]:
def open(self, key: str) -> Iterator[BinaryIO]:
"""Open a file handle to an object stored under the given key.
.. note:: this should only be used to open a handle to read an existing file. To write a new file use the method
Expand All @@ -90,15 +89,12 @@ def open(self, key: str) -> typing.Iterator[typing.BinaryIO]:
with self.container.get_object_stream(key) as handle:
yield handle # type: ignore[misc]

def delete_object(self, key: str):
"""Delete the object from the repository.
def delete_objects(self, keys: List[str]) -> None:
super().delete_objects(keys)
self.container.delete_objects(keys)

:param key: fully qualified identifier for the object within the repository.
:raise FileNotFoundError: if the file does not exist.
:raise OSError: if the file could not be deleted.
"""
super().delete_object(key)
self.container.delete_objects([key])
def list_objects(self) -> Iterable[str]:
return self.container.list_all_objects()

def get_object_hash(self, key: str) -> str:
"""Return the SHA-256 hash of an object stored under the given key.
Expand Down
41 changes: 21 additions & 20 deletions aiida/repository/backend/sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import contextlib
import os
import shutil
import typing
from typing import BinaryIO, Iterable, Iterator, List, Optional
import uuid

from .abstract import AbstractRepositoryBackend
Expand All @@ -16,7 +16,7 @@ class SandboxRepositoryBackend(AbstractRepositoryBackend):

def __init__(self):
from aiida.common.folders import SandboxFolder
self._sandbox: typing.Optional[SandboxFolder] = None
self._sandbox: Optional[SandboxFolder] = None

def __str__(self) -> str:
"""Return the string representation of this repository."""
Expand All @@ -29,13 +29,17 @@ def __del__(self):
self.erase()

@property
def uuid(self) -> typing.Optional[str]:
def uuid(self) -> Optional[str]:
"""Return the unique identifier of the repository.
.. note:: A sandbox folder does not have the concept of a unique identifier and so always returns ``None``.
"""
return None

@property
def key_format(self) -> Optional[str]:
return 'uuid4'

def initialise(self, **kwargs) -> None:
"""Initialise the repository if it hasn't already been initialised.
Expand Down Expand Up @@ -70,7 +74,7 @@ def erase(self):
finally:
self._sandbox = None

def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str:
def _put_object_from_filelike(self, handle: BinaryIO) -> str:
"""Store the byte contents of a file in the repository.
:param handle: filelike object with the byte content to be stored.
Expand All @@ -85,16 +89,15 @@ def _put_object_from_filelike(self, handle: typing.BinaryIO) -> str:

return key

def has_object(self, key: str) -> bool:
"""Return whether the repository has an object with the given key.
:param key: fully qualified identifier for the object within the repository.
:return: True if the object exists, False otherwise.
"""
return key in os.listdir(self.sandbox.abspath)
def has_objects(self, keys: List[str]) -> List[bool]:
result = list()
dirlist = os.listdir(self.sandbox.abspath)
for key in keys:
result.append(key in dirlist)
return result

@contextlib.contextmanager
def open(self, key: str) -> typing.Iterator[typing.BinaryIO]:
def open(self, key: str) -> Iterator[BinaryIO]:
"""Open a file handle to an object stored under the given key.
.. note:: this should only be used to open a handle to read an existing file. To write a new file use the method
Expand All @@ -110,12 +113,10 @@ def open(self, key: str) -> typing.Iterator[typing.BinaryIO]:
with self.sandbox.open(key, mode='rb') as handle:
yield handle

def delete_object(self, key: str):
"""Delete the object from the repository.
def delete_objects(self, keys: List[str]) -> None:
super().delete_objects(keys)
for key in keys:
os.remove(os.path.join(self.sandbox.abspath, key))

:param key: fully qualified identifier for the object within the repository.
:raise FileNotFoundError: if the file does not exist.
:raise OSError: if the file could not be deleted.
"""
super().delete_object(key)
os.remove(os.path.join(self.sandbox.abspath, key))
def list_objects(self) -> Iterable[str]:
return self.sandbox.get_content_list()
Loading

0 comments on commit 2a5d776

Please sign in to comment.