[Draft] Fix MR Reads

Start to fix MR reads on higher levels, where we have declared but undefined regions and need to filter by valid boxes.
openPMD · Feb 17, 2022 · b8b5a48 · b8b5a48
1 parent 5a452d0
commit b8b5a48
Show file tree

Hide file tree

Showing 2 changed files with 48 additions and 5 deletions.
diff --git a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py
@@ -11,6 +11,15 @@
 import numpy as np
 
 
+def chunk_to_slice(chunk):
+    """
+    Convert an openPMD_api.ChunkInfo to np.s_
+    """
+    stops = [a + b for a, b in zip(chunk.offset, chunk.extent)]
+    indices_per_dim = zip(chunk.offset, stops)
+    index_tuple = map(lambda s: slice(s[0], s[1], None), indices_per_dim)
+    return tuple(index_tuple)
+
 def get_data(series, record_component, i_slice=None, pos_slice=None,
              output_type=np.float64):
     """
@@ -46,8 +55,17 @@ def get_data(series, record_component, i_slice=None, pos_slice=None,
     if i_slice is not None and not isinstance(i_slice, list):
         i_slice = [i_slice]
 
+    chunks = record_component.available_chunks()
+
     if pos_slice is None:
-        data = record_component[()]
+        # mask invalid regions with zero
+        data = np.zeros_like(record_component)
+        for chunk in chunks:
+            chunk_slice = chunk_to_slice(chunk)
+            # read only valid region
+            x = record_component[chunk_slice]
+            series.flush()
+            data[chunk_slice] = x
     else:
         # Get largest element of pos_slice
         max_pos = max(pos_slice)
@@ -60,8 +78,33 @@ def get_data(series, record_component, i_slice=None, pos_slice=None,
             list_index[dir_index] = i_slice[count]
         # Convert list_index into a tuple
         tuple_index = tuple(list_index)
-        # Slice dset according to tuple_index
-        data = record_component[tuple_index]
+        print("tuple_index={}".format(tuple_index))
+
+        # potentially a better approach as below, since we only slice
+        # out hyperplanes, planes & lines:
+        # - allocate zero array for result, which is a hyperplane/plane/line
+        # - iterate over slices in tuple_index
+        # - reduce selected read range to "valid" range
+
+        # initial experiment:
+        # full_indices can be HUGE, avoid!!
+        full_indices = np.indices(record_component.shape)[0]
+        #full_shape = full_indices.shape
+        #print("full_shape.shape={}".format(full_shape))
+        #print("full_shape={}".format(full_shape))
+
+        # prepare sliced data according to tuple_index
+        slice_indices = full_indices[tuple_index]
+        slice_shape = slice_indices.shape
+        data = np.zeros(slice_shape, dtype=output_type)
+        # write now in index space between intersection of slice_indices and chunk indices
+        for chunk in chunks:
+            chunk_slice = chunk_to_slice(chunk)
+            chunk_indices = full_indices[chunk_slice]
+            intersect_indices = np.intersect1d(chunk_indices, slice_indices)
+            print(intersect_indices)
+            data[slice_indices] = record_component[intersect_indices]
+        #data = np.zeros_like(record_component)[tuple_index]  # just avoid invalid reads for now
 
     series.flush()
 

diff --git a/setup.py b/setup.py
@@ -38,12 +38,12 @@ def run_tests(self):
       tests_require=['pytest', 'jupyter'],
       install_requires=install_requires,
       extras_require = {
-        'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api", "wget"],
+        'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api~=0.13.3,~=0.14.0", "wget"],
         'GUI':  ["ipywidgets", "ipympl", "matplotlib"],
         'plot': ["matplotlib"],
         'tutorials': ["ipywidgets", "ipympl", "matplotlib", "wget"],
         'numba': ["numba"],
-        'openpmd-api': ["openpmd-api"]
+        'openpmd-api': ["openpmd-api~=0.13.3,~=0.14.0"]
         },
       cmdclass={'test': PyTest},
       platforms='any',