From 74e716141c9e89afdcd5b11dd1a9106cfb1edcbb Mon Sep 17 00:00:00 2001 From: bendichter Date: Wed, 30 Nov 2022 09:41:13 -0800 Subject: [PATCH 1/3] create a custom get_data_shape that does not return maxshape write test that has an unbounded dataset --- src/nwbinspector/checks/ecephys.py | 2 +- src/nwbinspector/checks/ophys.py | 2 +- src/nwbinspector/checks/tables.py | 2 +- src/nwbinspector/checks/time_series.py | 3 +- src/nwbinspector/utils.py | 39 ++++++++++++++++++++++++++ tests/unit_tests/test_time_series.py | 21 ++++++++++++++ 6 files changed, 64 insertions(+), 5 deletions(-) diff --git a/src/nwbinspector/checks/ecephys.py b/src/nwbinspector/checks/ecephys.py index 761dcf271..5044bd8d1 100644 --- a/src/nwbinspector/checks/ecephys.py +++ b/src/nwbinspector/checks/ecephys.py @@ -4,7 +4,7 @@ from pynwb.misc import Units from pynwb.ecephys import ElectricalSeries -from hdmf.utils import get_data_shape +from ..utils import get_data_shape from ..register_checks import register_check, Importance, InspectorMessage diff --git a/src/nwbinspector/checks/ophys.py b/src/nwbinspector/checks/ophys.py index 3342db51b..e7ef139b5 100644 --- a/src/nwbinspector/checks/ophys.py +++ b/src/nwbinspector/checks/ophys.py @@ -6,7 +6,7 @@ ImagingPlane, ) -from hdmf.utils import get_data_shape +from ..utils import get_data_shape from ..register_checks import register_check, Importance, InspectorMessage diff --git a/src/nwbinspector/checks/tables.py b/src/nwbinspector/checks/tables.py index c67a11025..5309247ba 100644 --- a/src/nwbinspector/checks/tables.py +++ b/src/nwbinspector/checks/tables.py @@ -4,7 +4,6 @@ import numpy as np from hdmf.common import DynamicTable, DynamicTableRegion, VectorIndex -from hdmf.utils import get_data_shape from pynwb.file import TimeIntervals, Units from ..register_checks import register_check, InspectorMessage, Importance @@ -14,6 +13,7 @@ is_ascending_series, is_dict_in_string, is_string_json_loadable, + get_data_shape, ) diff --git a/src/nwbinspector/checks/time_series.py b/src/nwbinspector/checks/time_series.py index 06c9ee862..43beb6968 100644 --- a/src/nwbinspector/checks/time_series.py +++ b/src/nwbinspector/checks/time_series.py @@ -2,10 +2,9 @@ import numpy as np from pynwb import TimeSeries -from hdmf.utils import get_data_shape from ..register_checks import register_check, Importance, Severity, InspectorMessage -from ..utils import is_regular_series, is_ascending_series +from ..utils import is_regular_series, is_ascending_series, get_data_shape @register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=TimeSeries) diff --git a/src/nwbinspector/utils.py b/src/nwbinspector/utils.py index 4e2693171..dfd614436 100644 --- a/src/nwbinspector/utils.py +++ b/src/nwbinspector/utils.py @@ -192,3 +192,42 @@ def calculate_number_of_cpu(requested_cpu: int = 1) -> int: return requested_cpu else: return total_cpu + requested_cpu + + +def get_data_shape(data, strict_no_data_load=False): + """ + modified from hdmf.utils.get_data_shape to return shape instead of maxshape + Helper function used to determine the shape of the given array. + + In order to determine the shape of nested tuples, lists, and sets, this function + recursively inspects elements along the dimensions, assuming that the data has a regular, + rectangular shape. In the case of out-of-core iterators, this means that the first item + along each dimension would potentially be loaded into memory. Set strict_no_data_load=True + to enforce that this does not happen, at the cost that we may not be able to determine + the shape of the array. + + :param data: Array for which we should determine the shape. + :type data: List, numpy.ndarray, DataChunkIterator, any object that support __len__ or .shape. + :param strict_no_data_load: If True and data is an out-of-core iterator, None may be returned. If False (default), + the first element of data may be loaded into memory. + :return: Tuple of ints indicating the size of known dimensions. Dimensions for which the size is unknown + will be set to None. + """ + + def __get_shape_helper(local_data): + shape = list() + if hasattr(local_data, '__len__'): + shape.append(len(local_data)) + if len(local_data): + el = next(iter(local_data)) + if not isinstance(el, (str, bytes)): + shape.extend(__get_shape_helper(el)) + return tuple(shape) + + if hasattr(data, "shape") and data.shape is not None: + return data.shape + if isinstance(data, dict): + return + if hasattr(data, "__len__") and not isinstance(data, (str, bytes)): + if not strict_no_data_load or isinstance(data, (list, tuple, set)): + return __get_shape_helper(data) diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index ea5dacd39..06d37cbd7 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -3,6 +3,8 @@ import pytest from packaging import version +import h5py + from nwbinspector import ( InspectorMessage, Importance, @@ -77,6 +79,25 @@ def test_check_data_orientation(): ) +def test_check_data_orientation_unbounded_maxshape(tmp_path): + filepath = tmp_path / "test.nwb" + with h5py.File(filepath, "w") as file: + data = file.create_dataset( + "data", + data=np.ones((10, 3)), + maxshape=(None,3), + ) + + time_series = pynwb.TimeSeries( + name="test_time_series", + unit="test_units", + data=data, + rate=1.0, + ) + + assert check_data_orientation(time_series) is None + + def test_check_timestamps(): assert check_timestamps_match_first_dimension( time_series=pynwb.TimeSeries( From 4f65e355fcef9003426f99b2d87c95925afec555 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Wed, 30 Nov 2022 12:43:39 -0500 Subject: [PATCH 2/3] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0af1270b5..436abbf86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Fixes * Fix `check_subject_proper_age_range` to parse years. [PR #314](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/314) +* Write a custom `get_data_shape` method that does not return `maxshape`, which fixes errors in parsing shape. [PR #315](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/315) # v0.4.20 From 3c81420c963f7ff9cba7c888e0f5899e619110ed Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 30 Nov 2022 17:46:20 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/nwbinspector/utils.py | 2 +- tests/unit_tests/test_time_series.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nwbinspector/utils.py b/src/nwbinspector/utils.py index dfd614436..945c93225 100644 --- a/src/nwbinspector/utils.py +++ b/src/nwbinspector/utils.py @@ -216,7 +216,7 @@ def get_data_shape(data, strict_no_data_load=False): def __get_shape_helper(local_data): shape = list() - if hasattr(local_data, '__len__'): + if hasattr(local_data, "__len__"): shape.append(len(local_data)) if len(local_data): el = next(iter(local_data)) diff --git a/tests/unit_tests/test_time_series.py b/tests/unit_tests/test_time_series.py index 06d37cbd7..bb03e7376 100644 --- a/tests/unit_tests/test_time_series.py +++ b/tests/unit_tests/test_time_series.py @@ -85,7 +85,7 @@ def test_check_data_orientation_unbounded_maxshape(tmp_path): data = file.create_dataset( "data", data=np.ones((10, 3)), - maxshape=(None,3), + maxshape=(None, 3), ) time_series = pynwb.TimeSeries(