From bd62e71f4647c89fe00fdf6006e03b73730c07af Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Thu, 17 Feb 2022 14:56:05 -0800 Subject: [PATCH] MR: Implement Chunked Slicing --- .../data_reader/io_reader/utilities.py | 88 +++++++++++-------- setup.py | 4 +- 2 files changed, 52 insertions(+), 40 deletions(-) diff --git a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py index 19a727a9..14d34f1a 100644 --- a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py +++ b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py @@ -13,13 +13,14 @@ def chunk_to_slice(chunk): """ - Convert an openPMD_api.ChunkInfo to np.s_ + Convert an openPMD_api.ChunkInfo to slice """ stops = [a + b for a, b in zip(chunk.offset, chunk.extent)] indices_per_dim = zip(chunk.offset, stops) index_tuple = map(lambda s: slice(s[0], s[1], None), indices_per_dim) return tuple(index_tuple) + def get_data(series, record_component, i_slice=None, pos_slice=None, output_type=np.float64): """ @@ -67,46 +68,57 @@ def get_data(series, record_component, i_slice=None, pos_slice=None, series.flush() data[chunk_slice] = x else: - # Get largest element of pos_slice - max_pos = max(pos_slice) - # Create list of indices list_index of type - # [:, :, :, ...] where Ellipsis starts at max_pos + 1 - list_index = [np.s_[:]] * (max_pos + 2) - list_index[max_pos + 1] = np.s_[...] - # Fill list_index with elements of i_slice - for count, dir_index in enumerate(pos_slice): - list_index[dir_index] = i_slice[count] - # Convert list_index into a tuple - tuple_index = tuple(list_index) - print("tuple_index={}".format(tuple_index)) - - # potentially a better approach as below, since we only slice - # out hyperplanes, planes & lines: - # - allocate zero array for result, which is a hyperplane/plane/line - # - iterate over slices in tuple_index - # - reduce selected read range to "valid" range - - # initial experiment: - # full_indices can be HUGE, avoid!! - full_indices = np.indices(record_component.shape)[0] - #full_shape = full_indices.shape - #print("full_shape.shape={}".format(full_shape)) - #print("full_shape={}".format(full_shape)) - - # prepare sliced data according to tuple_index - slice_indices = full_indices[tuple_index] - slice_shape = slice_indices.shape + full_shape = record_component.shape + for dir_index, index in zip(pos_slice, i_slice): + slice_shape = list(full_shape) # copy + # future note: this needs to be converted to list and back to + # tuple once we fix + # https://github.com/openPMD/openPMD-api/issues/808 + del slice_shape[dir_index] + + # mask invalid regions with zero data = np.zeros(slice_shape, dtype=output_type) - # write now in index space between intersection of slice_indices and chunk indices + + # build requested ND slice with respect to full data + s = [] + for d in range(len(full_shape)): + if d in pos_slice: + s.append(index) # one index in such directions + else: # all indices in other direction + s.append(slice(None, None, None)) + s = tuple(s) + + # now we check which chunks contribute to the slice for chunk in chunks: + skip_this_chunk = False + s_valid = list(s) # same as s but reduced to valid regions in chunk + s_target = [] # starts and stops in sliced array chunk_slice = chunk_to_slice(chunk) - chunk_indices = full_indices[chunk_slice] - intersect_indices = np.intersect1d(chunk_indices, slice_indices) - print(intersect_indices) - data[slice_indices] = record_component[intersect_indices] - #data = np.zeros_like(record_component)[tuple_index] # just avoid invalid reads for now - - series.flush() + # read only valid region + for d, slice_d in enumerate(s): + start = chunk_slice[d].start + stop = chunk_slice[d].stop + if isinstance(slice_d, int): + # Nothing to do for s_target (dimension sliced out) + # Nothing to do for s_valid (dimension index is set) + if slice_d < start or slice_d > stop: + # chunk not in slice line/plane + skip_this_chunk = True + else: + if slice_d.start is None or slide_dir.start < start: + s_valid[d] = slice(start, s_valid[d].stop) + if slice_d.start is None or slide_dir.start > stop: + s_valid[d] = slice(s_valid[d].start, stop) + s_target.append(slice(start, stop)) + + s_valid = tuple(s_valid) + s_target = tuple(s_target) + + # read + if not skip_this_chunk: + x = record_component[s_valid] + series.flush() + data[s_target] = x # Convert to the right type if data.dtype != output_type: diff --git a/setup.py b/setup.py index 2f96dbaa..1da55e9d 100644 --- a/setup.py +++ b/setup.py @@ -38,12 +38,12 @@ def run_tests(self): tests_require=['pytest', 'jupyter'], install_requires=install_requires, extras_require = { - 'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api~=0.13.3,~=0.14.0", "wget"], + 'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api~=0.14.0", "wget"], 'GUI': ["ipywidgets", "ipympl", "matplotlib"], 'plot': ["matplotlib"], 'tutorials': ["ipywidgets", "ipympl", "matplotlib", "wget"], 'numba': ["numba"], - 'openpmd-api': ["openpmd-api~=0.13.3,~=0.14.0"] + 'openpmd-api': ["openpmd-api~=0.14.0"] }, cmdclass={'test': PyTest}, platforms='any',