Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add can_read method to HDMFIO and HDF5IO #875

Merged
merged 10 commits into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# HDMF Changelog

## HMDF 3.7.0 (Upcoming)
## HDMF 3.7.0 (Upcoming)

### New features and minor improvements
- Updated `ExternalResources` to have EntityKeyTable with updated tests/documentation and minor bug fix to ObjectKeyTable. @mavaylon1 [#872](https://github.com/hdmf-dev/hdmf/pull/872)
- Added abstract static method `HDMFIO.can_read()` and concrete static method `HDF5IO.can_read()`. @bendichter [#875](https://github.com/hdmf-dev/hdmf/pull/875)
- Added warning for `DynamicTableRegion` links that are not added to the same parent as the original container object. @mavaylon1 [#891](https://github.com/hdmf-dev/hdmf/pull/891)
- Added the `TermSet` class along with integrated validation methods for any child of `AbstractContainer`, e.g., `VectorData`, `Data`, `DynamicTable`. @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880)
- Allow for `datetime.date` to be used instead of `datetime.datetime`. @bendichter [#874](https://github.com/hdmf-dev/hdmf/pull/874)
- Updated `HDMFIO` and `HDF5IO` to support `ExternalResources`. @mavaylon1 [#895](https://github.com/hdmf-dev/hdmf/pull/895)
- Dropped Python 3.7 support. @rly [#897](https://github.com/hdmf-dev/hdmf/pull/897)

### Documentation and tutorial enhancements:

- Added tutorial for the new `TermSet` class @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880)

## Bug fixes
Expand Down
29 changes: 20 additions & 9 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ class HDF5IO(HDMFIO):

__ns_spec_path = 'namespace' # path to the namespace dataset within a namespace group

@staticmethod
def can_read(path):
"""Determines whether a given path is readable by the HDF5IO class"""
if not os.path.isfile(path):
return False
try:
with h5py.File(path, "r"):
return True
except IOError:
return False

@docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None},
{'name': 'mode', 'type': str,
'doc': ('the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x"). '
Expand Down Expand Up @@ -82,8 +93,8 @@ def __init__(self, **kwargs):
self.__file = file_obj
super().__init__(manager, source=path, external_resources_path=external_resources_path)
# NOTE: source is not set if path is None and file_obj is passed
self.__built = dict() # keep track of each builder for each dataset/group/link for each file
self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder
self.__built = dict() # keep track of each builder for each dataset/group/link for each file
self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder
self.__ref_queue = deque() # a queue of the references that need to be added
self.__dci_queue = HDF5IODataChunkIteratorQueue() # a queue of DataChunkIterators that need to be exhausted
ObjectMapper.no_convert(Dataset)
Expand Down Expand Up @@ -603,7 +614,7 @@ def __read_group(self, h5obj, name=None, ignore=set()):
builder = self.__read_dataset(target_obj, builder_name)
else:
builder = self.__read_group(target_obj, builder_name, ignore=ignore)
self.__set_built(sub_h5obj.file.filename, target_obj.id, builder)
self.__set_built(sub_h5obj.file.filename, target_obj.id, builder)
link_builder = LinkBuilder(builder=builder, name=k, source=os.path.abspath(h5obj.file.filename))
link_builder.location = h5obj.name
self.__set_written(link_builder)
Expand Down Expand Up @@ -648,7 +659,7 @@ def __read_dataset(self, h5obj, name=None):
name = str(os.path.basename(h5obj.name))
kwargs['source'] = os.path.abspath(h5obj.file.filename)
ndims = len(h5obj.shape)
if ndims == 0: # read scalar
if ndims == 0: # read scalar
scalar = h5obj[()]
if isinstance(scalar, bytes):
scalar = scalar.decode('UTF-8')
Expand Down Expand Up @@ -678,7 +689,7 @@ def __read_dataset(self, h5obj, name=None):
elif isinstance(elem1, Reference):
d = BuilderH5ReferenceDataset(h5obj, self)
kwargs['dtype'] = d.dtype
elif h5obj.dtype.kind == 'V': # table / compound data type
elif h5obj.dtype.kind == 'V': # table / compound data type
cpd_dt = h5obj.dtype
ref_cols = [check_dtype(ref=cpd_dt[i]) or check_dtype(vlen=cpd_dt[i]) for i in range(len(cpd_dt))]
d = BuilderH5TableDataset(h5obj, self, ref_cols)
Expand Down Expand Up @@ -708,7 +719,7 @@ def __compound_dtype_to_list(cls, h5obj_dtype, dset_dtype):
def __read_attrs(self, h5obj):
ret = dict()
for k, v in h5obj.attrs.items():
if k == SPEC_LOC_ATTR: # ignore cached spec
if k == SPEC_LOC_ATTR: # ignore cached spec
continue
if isinstance(v, RegionReference):
raise ValueError("cannot read region reference attributes yet")
Expand Down Expand Up @@ -925,14 +936,14 @@ def set_attributes(self, **kwargs):
self.logger.debug("Setting %s '%s' attribute '%s' to %s"
% (obj.__class__.__name__, obj.name, key, value.__class__.__name__))
obj.attrs[key] = value
elif isinstance(value, (Container, Builder, ReferenceBuilder)): # a reference
elif isinstance(value, (Container, Builder, ReferenceBuilder)): # a reference
self.__queue_ref(self._make_attr_ref_filler(obj, key, value))
else:
self.logger.debug("Setting %s '%s' attribute '%s' to %s"
% (obj.__class__.__name__, obj.name, key, value.__class__.__name__))
if isinstance(value, np.ndarray) and value.dtype.kind == 'U':
value = np.array(value, dtype=H5_TEXT)
obj.attrs[key] = value # a regular scalar
obj.attrs[key] = value # a regular scalar
except Exception as e:
msg = "unable to write attribute '%s' on object '%s'" % (key, obj.name)
raise RuntimeError(msg) from e
Expand Down Expand Up @@ -1079,7 +1090,7 @@ def write_dataset(self, **kwargs): # noqa: C901
name = builder.name
data = builder.data
dataio = None
options = dict() # dict with additional
options = dict() # dict with additional
if isinstance(data, H5DataIO):
options['io_settings'] = data.io_settings
dataio = data
Expand Down
7 changes: 7 additions & 0 deletions src/hdmf/backends/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@


class HDMFIO(metaclass=ABCMeta):

@staticmethod
@abstractmethod
def can_read(path):
"""Determines whether a given path is readable by this HDMFIO class"""
pass

@docval({'name': 'manager', 'type': BuildManager,
'doc': 'the BuildManager to use for I/O', 'default': None},
{"name": "source", "type": (str, Path),
Expand Down
14 changes: 14 additions & 0 deletions tests/unit/test_io_hdf5_h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3228,6 +3228,10 @@ def test_non_manager_container(self):

class OtherIO(HDMFIO):

@staticmethod
def can_read(path):
pass

def read_builder(self):
pass

Expand Down Expand Up @@ -3257,6 +3261,10 @@ def test_non_HDF5_src_link_data_true(self):

class OtherIO(HDMFIO):

@staticmethod
def can_read(path):
pass

def __init__(self, manager):
super().__init__(manager=manager)

Expand Down Expand Up @@ -3570,3 +3578,9 @@ def test_dataio_shape_then_data(self):
dataio = H5DataIO(shape=(10, 10), dtype=int)
with self.assertRaisesRegex(ValueError, "Setting data when dtype and shape are not None is not supported"):
dataio.data = list()


def test_hdf5io_can_read():
assert not HDF5IO.can_read("not_a_file")
assert HDF5IO.can_read("tests/unit/back_compat_tests/1.0.5.h5")
assert not HDF5IO.can_read(__file__) # this file is not an HDF5 file