Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix get_data_shape for unbounded dataset #315

Merged
merged 3 commits into from
Nov 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Fixes
* Fix `check_subject_proper_age_range` to parse years. [PR #314](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/314)
* Write a custom `get_data_shape` method that does not return `maxshape`, which fixes errors in parsing shape. [PR #315](https://github.com/NeurodataWithoutBorders/nwbinspector/pull/315)


# v0.4.20
Expand Down
2 changes: 1 addition & 1 deletion src/nwbinspector/checks/ecephys.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pynwb.misc import Units
from pynwb.ecephys import ElectricalSeries

from hdmf.utils import get_data_shape
from ..utils import get_data_shape

from ..register_checks import register_check, Importance, InspectorMessage

Expand Down
2 changes: 1 addition & 1 deletion src/nwbinspector/checks/ophys.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
ImagingPlane,
)

from hdmf.utils import get_data_shape
from ..utils import get_data_shape

from ..register_checks import register_check, Importance, InspectorMessage

Expand Down
2 changes: 1 addition & 1 deletion src/nwbinspector/checks/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import numpy as np
from hdmf.common import DynamicTable, DynamicTableRegion, VectorIndex
from hdmf.utils import get_data_shape
from pynwb.file import TimeIntervals, Units

from ..register_checks import register_check, InspectorMessage, Importance
Expand All @@ -14,6 +13,7 @@
is_ascending_series,
is_dict_in_string,
is_string_json_loadable,
get_data_shape,
)


Expand Down
3 changes: 1 addition & 2 deletions src/nwbinspector/checks/time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
import numpy as np

from pynwb import TimeSeries
from hdmf.utils import get_data_shape

from ..register_checks import register_check, Importance, Severity, InspectorMessage
from ..utils import is_regular_series, is_ascending_series
from ..utils import is_regular_series, is_ascending_series, get_data_shape


@register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=TimeSeries)
Expand Down
39 changes: 39 additions & 0 deletions src/nwbinspector/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,42 @@ def calculate_number_of_cpu(requested_cpu: int = 1) -> int:
return requested_cpu
else:
return total_cpu + requested_cpu


def get_data_shape(data, strict_no_data_load=False):
"""
modified from hdmf.utils.get_data_shape to return shape instead of maxshape
Helper function used to determine the shape of the given array.

In order to determine the shape of nested tuples, lists, and sets, this function
recursively inspects elements along the dimensions, assuming that the data has a regular,
rectangular shape. In the case of out-of-core iterators, this means that the first item
along each dimension would potentially be loaded into memory. Set strict_no_data_load=True
to enforce that this does not happen, at the cost that we may not be able to determine
the shape of the array.

:param data: Array for which we should determine the shape.
:type data: List, numpy.ndarray, DataChunkIterator, any object that support __len__ or .shape.
:param strict_no_data_load: If True and data is an out-of-core iterator, None may be returned. If False (default),
the first element of data may be loaded into memory.
:return: Tuple of ints indicating the size of known dimensions. Dimensions for which the size is unknown
will be set to None.
"""

def __get_shape_helper(local_data):
shape = list()
if hasattr(local_data, "__len__"):
shape.append(len(local_data))
if len(local_data):
el = next(iter(local_data))
if not isinstance(el, (str, bytes)):
shape.extend(__get_shape_helper(el))
return tuple(shape)

if hasattr(data, "shape") and data.shape is not None:
return data.shape
if isinstance(data, dict):
return
if hasattr(data, "__len__") and not isinstance(data, (str, bytes)):
if not strict_no_data_load or isinstance(data, (list, tuple, set)):
return __get_shape_helper(data)
21 changes: 21 additions & 0 deletions tests/unit_tests/test_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import pytest
from packaging import version

import h5py

from nwbinspector import (
InspectorMessage,
Importance,
Expand Down Expand Up @@ -77,6 +79,25 @@ def test_check_data_orientation():
)


def test_check_data_orientation_unbounded_maxshape(tmp_path):
filepath = tmp_path / "test.nwb"
with h5py.File(filepath, "w") as file:
data = file.create_dataset(
"data",
data=np.ones((10, 3)),
maxshape=(None, 3),
)

time_series = pynwb.TimeSeries(
name="test_time_series",
unit="test_units",
data=data,
rate=1.0,
)

assert check_data_orientation(time_series) is None


def test_check_timestamps():
assert check_timestamps_match_first_dimension(
time_series=pynwb.TimeSeries(
Expand Down