Skip to content

Commit

Permalink
add can_read method to HDMFIO and HDF5IO (#875)
Browse files Browse the repository at this point in the history
Co-authored-by: Ryan Ly <[email protected]>
  • Loading branch information
bendichter and rly authored Jul 10, 2023
1 parent 6b1a55f commit 1c7895f
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 11 deletions.
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# HDMF Changelog

## HMDF 3.7.0 (Upcoming)
## HDMF 3.7.0 (Upcoming)

### New features and minor improvements
- Updated `ExternalResources` to have EntityKeyTable with updated tests/documentation and minor bug fix to ObjectKeyTable. @mavaylon1 [#872](https://github.com/hdmf-dev/hdmf/pull/872)
- Added abstract static method `HDMFIO.can_read()` and concrete static method `HDF5IO.can_read()`. @bendichter [#875](https://github.com/hdmf-dev/hdmf/pull/875)
- Added warning for `DynamicTableRegion` links that are not added to the same parent as the original container object. @mavaylon1 [#891](https://github.com/hdmf-dev/hdmf/pull/891)
- Added the `TermSet` class along with integrated validation methods for any child of `AbstractContainer`, e.g., `VectorData`, `Data`, `DynamicTable`. @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880)
- Allow for `datetime.date` to be used instead of `datetime.datetime`. @bendichter [#874](https://github.com/hdmf-dev/hdmf/pull/874)
- Updated `HDMFIO` and `HDF5IO` to support `ExternalResources`. @mavaylon1 [#895](https://github.com/hdmf-dev/hdmf/pull/895)
- Dropped Python 3.7 support. @rly [#897](https://github.com/hdmf-dev/hdmf/pull/897)

### Documentation and tutorial enhancements:

- Added tutorial for the new `TermSet` class @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880)

## Bug fixes
Expand Down
29 changes: 20 additions & 9 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ class HDF5IO(HDMFIO):

__ns_spec_path = 'namespace' # path to the namespace dataset within a namespace group

@staticmethod
def can_read(path):
"""Determines whether a given path is readable by the HDF5IO class"""
if not os.path.isfile(path):
return False
try:
with h5py.File(path, "r"):
return True
except IOError:
return False

@docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None},
{'name': 'mode', 'type': str,
'doc': ('the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x"). '
Expand Down Expand Up @@ -82,8 +93,8 @@ def __init__(self, **kwargs):
self.__file = file_obj
super().__init__(manager, source=path, external_resources_path=external_resources_path)
# NOTE: source is not set if path is None and file_obj is passed
self.__built = dict() # keep track of each builder for each dataset/group/link for each file
self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder
self.__built = dict() # keep track of each builder for each dataset/group/link for each file
self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder
self.__ref_queue = deque() # a queue of the references that need to be added
self.__dci_queue = HDF5IODataChunkIteratorQueue() # a queue of DataChunkIterators that need to be exhausted
ObjectMapper.no_convert(Dataset)
Expand Down Expand Up @@ -603,7 +614,7 @@ def __read_group(self, h5obj, name=None, ignore=set()):
builder = self.__read_dataset(target_obj, builder_name)
else:
builder = self.__read_group(target_obj, builder_name, ignore=ignore)
self.__set_built(sub_h5obj.file.filename, target_obj.id, builder)
self.__set_built(sub_h5obj.file.filename, target_obj.id, builder)
link_builder = LinkBuilder(builder=builder, name=k, source=os.path.abspath(h5obj.file.filename))
link_builder.location = h5obj.name
self.__set_written(link_builder)
Expand Down Expand Up @@ -648,7 +659,7 @@ def __read_dataset(self, h5obj, name=None):
name = str(os.path.basename(h5obj.name))
kwargs['source'] = os.path.abspath(h5obj.file.filename)
ndims = len(h5obj.shape)
if ndims == 0: # read scalar
if ndims == 0: # read scalar
scalar = h5obj[()]
if isinstance(scalar, bytes):
scalar = scalar.decode('UTF-8')
Expand Down Expand Up @@ -678,7 +689,7 @@ def __read_dataset(self, h5obj, name=None):
elif isinstance(elem1, Reference):
d = BuilderH5ReferenceDataset(h5obj, self)
kwargs['dtype'] = d.dtype
elif h5obj.dtype.kind == 'V': # table / compound data type
elif h5obj.dtype.kind == 'V': # table / compound data type
cpd_dt = h5obj.dtype
ref_cols = [check_dtype(ref=cpd_dt[i]) or check_dtype(vlen=cpd_dt[i]) for i in range(len(cpd_dt))]
d = BuilderH5TableDataset(h5obj, self, ref_cols)
Expand Down Expand Up @@ -708,7 +719,7 @@ def __compound_dtype_to_list(cls, h5obj_dtype, dset_dtype):
def __read_attrs(self, h5obj):
ret = dict()
for k, v in h5obj.attrs.items():
if k == SPEC_LOC_ATTR: # ignore cached spec
if k == SPEC_LOC_ATTR: # ignore cached spec
continue
if isinstance(v, RegionReference):
raise ValueError("cannot read region reference attributes yet")
Expand Down Expand Up @@ -925,14 +936,14 @@ def set_attributes(self, **kwargs):
self.logger.debug("Setting %s '%s' attribute '%s' to %s"
% (obj.__class__.__name__, obj.name, key, value.__class__.__name__))
obj.attrs[key] = value
elif isinstance(value, (Container, Builder, ReferenceBuilder)): # a reference
elif isinstance(value, (Container, Builder, ReferenceBuilder)): # a reference
self.__queue_ref(self._make_attr_ref_filler(obj, key, value))
else:
self.logger.debug("Setting %s '%s' attribute '%s' to %s"
% (obj.__class__.__name__, obj.name, key, value.__class__.__name__))
if isinstance(value, np.ndarray) and value.dtype.kind == 'U':
value = np.array(value, dtype=H5_TEXT)
obj.attrs[key] = value # a regular scalar
obj.attrs[key] = value # a regular scalar
except Exception as e:
msg = "unable to write attribute '%s' on object '%s'" % (key, obj.name)
raise RuntimeError(msg) from e
Expand Down Expand Up @@ -1079,7 +1090,7 @@ def write_dataset(self, **kwargs): # noqa: C901
name = builder.name
data = builder.data
dataio = None
options = dict() # dict with additional
options = dict() # dict with additional
if isinstance(data, H5DataIO):
options['io_settings'] = data.io_settings
dataio = data
Expand Down
7 changes: 7 additions & 0 deletions src/hdmf/backends/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@


class HDMFIO(metaclass=ABCMeta):

@staticmethod
@abstractmethod
def can_read(path):
"""Determines whether a given path is readable by this HDMFIO class"""
pass

@docval({'name': 'manager', 'type': BuildManager,
'doc': 'the BuildManager to use for I/O', 'default': None},
{"name": "source", "type": (str, Path),
Expand Down
14 changes: 14 additions & 0 deletions tests/unit/test_io_hdf5_h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3228,6 +3228,10 @@ def test_non_manager_container(self):

class OtherIO(HDMFIO):

@staticmethod
def can_read(path):
pass

def read_builder(self):
pass

Expand Down Expand Up @@ -3257,6 +3261,10 @@ def test_non_HDF5_src_link_data_true(self):

class OtherIO(HDMFIO):

@staticmethod
def can_read(path):
pass

def __init__(self, manager):
super().__init__(manager=manager)

Expand Down Expand Up @@ -3570,3 +3578,9 @@ def test_dataio_shape_then_data(self):
dataio = H5DataIO(shape=(10, 10), dtype=int)
with self.assertRaisesRegex(ValueError, "Setting data when dtype and shape are not None is not supported"):
dataio.data = list()


def test_hdf5io_can_read():
assert not HDF5IO.can_read("not_a_file")
assert HDF5IO.can_read("tests/unit/back_compat_tests/1.0.5.h5")
assert not HDF5IO.can_read(__file__) # this file is not an HDF5 file

0 comments on commit 1c7895f

Please sign in to comment.