Merge pull request #845 from bp/grid_surface_wrapper_packing

handling pre-packed arrays in property collection
bp · Oct 29, 2024 · f63314c · f63314c
2 parents f7bfd31 + b5c68d5
commit f63314c
Show file tree

Hide file tree

Showing 5 changed files with 71 additions and 54 deletions.
diff --git a/resqpy/multi_processing/wrappers/grid_surface_mp.py b/resqpy/multi_processing/wrappers/grid_surface_mp.py
@@ -18,32 +18,32 @@
 import resqpy.olio.uuid as bu
 
 
-def find_faces_to_represent_surface_regular_wrapper(
-        index: int,
-        parent_tmp_dir: str,
-        use_index_as_realisation: bool,
-        grid_epc: str,
-        grid_uuid: Union[UUID, str],
-        surface_epc: str,
-        surface_uuid: Union[UUID, str],
-        name: str,
-        title: Optional[str] = None,
-        agitate: bool = False,
-        random_agitation: bool = False,
-        feature_type: str = 'fault',
-        trimmed: bool = False,
-        is_curtain = False,
-        extend_fault_representation: bool = False,
-        flange_inner_ring = False,
-        saucer_parameter = None,
-        retriangulate: bool = False,
-        related_uuid = None,
-        progress_fn: Optional[Callable] = None,
-        extra_metadata = None,
-        return_properties: Optional[List[str]] = None,
-        raw_bisector: bool = False,
-        use_pack: bool = False,
-        flange_radius = None) -> Tuple[int, bool, str, List[Union[UUID, str]]]:
+def find_faces_to_represent_surface_regular_wrapper(index: int,
+                                                    parent_tmp_dir: str,
+                                                    use_index_as_realisation: bool,
+                                                    grid_epc: str,
+                                                    grid_uuid: Union[UUID, str],
+                                                    surface_epc: str,
+                                                    surface_uuid: Union[UUID, str],
+                                                    name: str,
+                                                    title: Optional[str] = None,
+                                                    agitate: bool = False,
+                                                    random_agitation: bool = False,
+                                                    feature_type: str = 'fault',
+                                                    trimmed: bool = False,
+                                                    is_curtain = False,
+                                                    extend_fault_representation: bool = False,
+                                                    flange_inner_ring = False,
+                                                    saucer_parameter = None,
+                                                    retriangulate: bool = False,
+                                                    related_uuid = None,
+                                                    progress_fn: Optional[Callable] = None,
+                                                    extra_metadata = None,
+                                                    return_properties: Optional[List[str]] = None,
+                                                    raw_bisector: bool = False,
+                                                    use_pack: bool = False,
+                                                    flange_radius = None,
+                                                    n_threads = 20) -> Tuple[int, bool, str, List[Union[UUID, str]]]:
     """Multiprocessing wrapper function of find_faces_to_represent_surface_regular_optimised.
 
     arguments:
@@ -92,10 +92,11 @@ def find_faces_to_represent_surface_regular_wrapper(
            the returned dictionary has the passed strings as keys and numpy arrays as values
         raw_bisector (bool, default False): if True and grid bisector is requested then it is left in a raw
            form without assessing which side is shallower (True values indicate same side as origin cell)
-        use_pack (bool, default False): if True, boolean properties will be stored in numpy packed format,
-           which will only be readable by resqpy based applications
+        use_pack (bool, default False): if True, boolean properties will be generated and stored in numpy
+           packed format, which will only be readable by resqpy based applications
         flange_radius (float, optional): the radial distance to use for outer flange extension points; if None,
            a large value will be calculated from the grid size; units are xy units of grid crs
+        n_threads (int, default 20): the number of parallel threads to use in numba points in triangles function
 
     returns:
         Tuple containing:
@@ -250,7 +251,9 @@ def find_faces_to_represent_surface_regular_wrapper(
                                                                      is_curtain,
                                                                      progress_fn,
                                                                      return_properties,
-                                                                     raw_bisector = raw_bisector)
+                                                                     raw_bisector = raw_bisector,
+                                                                     n_batches = n_threads,
+                                                                     packed_bisectors = use_pack)
 
     success = False
 
@@ -340,17 +343,17 @@ def find_faces_to_represent_surface_regular_wrapper(
                 if grid_pc is None:
                     grid_pc = rqp.PropertyCollection()
                     grid_pc.set_support(support = grid)
-                grid_pc.add_cached_array_to_imported_list(
-                    array,
-                    f"from find_faces function for {surface.title}",
-                    f'{surface.title} {p_name}',
-                    discrete = True,
-                    property_kind = "grid bisector",
-                    facet_type = 'direction',
-                    facet = 'raw' if raw_bisector else ('vertical' if is_curtain else 'sloping'),
-                    realization = realisation,
-                    indexable_element = "columns" if is_curtain else "cells",
-                )
+                grid_pc.add_cached_array_to_imported_list(array,
+                                                          f"from find_faces function for {surface.title}",
+                                                          f'{surface.title} {p_name}',
+                                                          discrete = True,
+                                                          property_kind = "grid bisector",
+                                                          facet_type = 'direction',
+                                                          facet = 'raw' if raw_bisector else
+                                                          ('vertical' if is_curtain else 'sloping'),
+                                                          realization = realisation,
+                                                          indexable_element = "columns" if is_curtain else "cells",
+                                                          pre_packed = use_pack)
             elif p_name == 'grid shadow':
                 if grid_pc is None:
                     grid_pc = rqp.PropertyCollection()

diff --git a/resqpy/olio/write_hdf5.py b/resqpy/olio/write_hdf5.py
@@ -101,7 +101,7 @@ def register_dataset(self,
         assert chunks is None or isinstance(chunks, str) or isinstance(chunks, tuple)
         assert compression is None or (isinstance(compression, str) and compression in ['gzip', 'lzf', 'none'])
         if str(dtype) == 'pack':
-            a = np.packbits(a, axis = -1)  # todo: check this returns uint8 array
+            a = np.packbits(a, axis = -1)
             dtype = 'uint8'
         elif dtype is not None:
             a = a.astype(dtype, copy = copy)

diff --git a/resqpy/property/_collection_add_part.py b/resqpy/property/_collection_add_part.py
@@ -199,7 +199,7 @@ def _process_imported_property(collection, attributes, property_kind_uuid, strin
                                extra_metadata, expand_const_arrays):
     (p_uuid, p_file_name, p_keyword, p_cached_name, p_discrete, p_uom, p_time_index, p_null_value, p_min_value,
      p_max_value, property_kind, facet_type, facet, realization, indexable_element, count, local_property_kind_uuid,
-     const_value, points, p_time_series_uuid, p_string_lookup_uuid) = attributes
+     const_value, points, p_time_series_uuid, p_string_lookup_uuid, pre_packed) = attributes
 
     log.debug('processing imported property ' + str(p_keyword))
     assert not points or not p_discrete
@@ -214,7 +214,7 @@ def _process_imported_property(collection, attributes, property_kind_uuid, strin
                                                                  p_keyword, p_discrete, string_lookup_uuid, points)
 
     p_array = _process_imported_property_get_p_array(collection, p_cached_name)
-    p_array_bool = isinstance(const_value, bool) if p_array is None else p_array.dtype in [bool, np.int8]
+    p_array_bool = isinstance(const_value, bool) if p_array is None else p_array.dtype in [bool, np.int8, np.uint8]
 
     add_min_max = pcga._process_imported_property_get_add_min_max(points, property_kind, string_lookup_uuid,
                                                                   local_property_kind_uuid, p_array_bool)
@@ -251,7 +251,8 @@ def _process_imported_property(collection, attributes, property_kind_uuid, strin
         find_local_property_kinds = find_local_property_kinds,
         extra_metadata = extra_metadata,
         const_value = const_value,
-        expand_const_arrays = expand_const_arrays)
+        expand_const_arrays = expand_const_arrays,
+        pre_packed = pre_packed)
     if p_node is not None:
         return p_node
     else:

diff --git a/resqpy/property/_collection_create_xml.py b/resqpy/property/_collection_create_xml.py
@@ -246,8 +246,10 @@ def _create_xml_facet_node(facet_type, facet, p_node):
         facet_value_node.text = facet
 
 
-def _check_shape_list(collection, indexable_element, direction, property_array, points, count):
+def _check_shape_list(collection, indexable_element, direction, property_array, points, count, pre_packed):
     shape_list = collection.supporting_shape(indexable_element = indexable_element, direction = direction)
+    if pre_packed:
+        shape_list[-1] = (shape_list[-1] - 1) // 8 + 1
     if shape_list is not None:
         if count > 1:
             shape_list.append(count)

diff --git a/resqpy/property/property_collection.py b/resqpy/property/property_collection.py
@@ -88,7 +88,7 @@ def __init__(self, support = None, property_set_root = None, realization = None)
         # above is list of (uuid, source, keyword, cached_name, discrete, uom, time_index, null_value,
         #                   min_value, max_value, property_kind, facet_type, facet, realization,
         #                   indexable_element, count, local_property_kind_uuid, const_value, points,
-        #                   time_series_uuid, string_lookup_uuid)
+        #                   time_series_uuid, string_lookup_uuid, pre_packed)
         self.guess_warning = False
         if support is not None:
             self.model = support.model
@@ -404,7 +404,8 @@ def add_to_imported_list_sampling_other_collection(self, other, flattened_indice
           call this method once for each group of differently sized properties; for very large collections
           it might also be necessary to divide the work into smaller groups to reduce memory usage;
           this method does not write to hdf5 nor create xml – use the usual methods for further processing
-          of the imported list
+          of the imported list;
+          does not currently support packed arrays
         """
 
         source = 'sampled'
@@ -2219,7 +2220,8 @@ def add_cached_array_to_imported_list(self,
                                           const_value = None,
                                           points = False,
                                           time_series_uuid = None,
-                                          string_lookup_uuid = None):
+                                          string_lookup_uuid = None,
+                                          pre_packed = False):
         """Caches array and adds to the list of imported properties (but not to the collection dict).
 
         arguments:
@@ -2250,6 +2252,7 @@ def add_cached_array_to_imported_list(self,
               be provided when writing hdf5 and creating xml for the imported list
            string_lookup_uuid (UUID, optional): should be provided for categorical properties, though can alternatively
               be specified when creating xml
+           pre_packed (bool, default False): set to True if the property is boolean and the array is already packed
 
         returns:
            uuid of nascent property object
@@ -2271,6 +2274,7 @@ def add_cached_array_to_imported_list(self,
         assert (cached_array is not None and const_value is None) or (cached_array is None and const_value is not None)
         assert not points or not discrete
         assert count > 0
+        assert (not pre_packed) or ((cached_array is not None) and (cached_array.dtype == np.uint8))
         rqp_c.check_and_warn_property_kind(property_kind, 'adding property to imported list')
 
         if self.imported_list is None:
@@ -2288,7 +2292,7 @@ def add_cached_array_to_imported_list(self,
         self.imported_list.append(
             (uuid, source_info, keyword, cached_name, discrete, uom, time_index, null_value, min_value, max_value,
              property_kind, facet_type, facet, realization, indexable_element, count, local_property_kind_uuid,
-             const_value, points, time_series_uuid, string_lookup_uuid))
+             const_value, points, time_series_uuid, string_lookup_uuid, pre_packed))
         return uuid
 
     def add_similar_to_imported_list(self,
@@ -2311,6 +2315,7 @@ def add_similar_to_imported_list(self,
                                      points = None,
                                      time_series_uuid = None,
                                      string_lookup_uuid = None,
+                                     pre_packed = False,
                                      similar_model = None,
                                      title = None):
         """Caches array and adds to the list of imported properties using default metadata from a similar property.
@@ -2342,6 +2347,7 @@ def add_similar_to_imported_list(self,
               be provided when writing hdf5 and creating xml for the imported list
            string_lookup_uuid (UUID, optional): should be provided for categorical properties, though can alternatively
               be specified when creating xml
+           pre_packed (bool, default False): set to True if the property is boolean and the cached array is packed
            similar_model (Model, optional): the model where the similar property resides, if not the same as this
               property collection
            title (str, optional): synonym for keyword argument
@@ -2398,6 +2404,7 @@ def get_arg(supplied, similarly):
         args['string_lookup_uuid'] = get_arg(time_series_uuid, similar.string_lookup_uuid())
         em = similar.extra_metadata if hasattr(similar, 'extra_metadata') else {}
         args['source_info'] = get_arg(source_info, em.get('source'))
+        args['pre_packed'] = pre_packed
 
         return self.add_cached_array_to_imported_list(cached_array, **args)
 
@@ -2445,7 +2452,8 @@ def write_hdf5_for_imported_list(self,
               as 32 bit; if None, the system default is to write as 32 bit; if True, 32 bit is used; if
               False, 64 bit data is written; ignored if dtype is not None
            use_pack (bool, default False): if True, bool arrays will be packed along their last axis; this
-              will generally result in hdf5 data that is not readable by non-resqpy applications
+              will generally result in hdf5 data that is not readable by non-resqpy applications; leave
+              as False for already packed arrays
            chunks (str, optional): if not None, one of 'auto', 'all', or 'slice', controlling hdf5 chunks
            compression (str, optional): if not None, one of 'gzip' or 'lzf' being the hdf5 compression
               algorithm to be used; gzip gives better compression ratio but is slower
@@ -2473,8 +2481,8 @@ def write_hdf5_for_imported_list(self,
                 uuid = entry[0]
                 cached_name = entry[3]
             tail = 'points_patch0' if entry[18] else 'values_patch0'
-            if use_pack and (str(dtype).startswith('bool') or
-                             (dtype is None and str(self.__dict__[cached_name].dtype) == 'bool')):
+            if use_pack and ('bool' in str(dtype) or
+                             (dtype is None and 'bool' in str(self.__dict__[cached_name].dtype))):
                 dtype = 'pack'
             h5_reg.register_dataset(uuid, tail, self.__dict__[cached_name], dtype = dtype)
         h5_reg.write(file = file_name, mode = mode, use_int32 = use_int32)
@@ -2601,7 +2609,8 @@ def create_xml(self,
                    points = False,
                    extra_metadata = {},
                    const_value = None,
-                   expand_const_arrays = False):
+                   expand_const_arrays = False,
+                   pre_packed = False):
         """Create a property xml node for a single property related to a given supporting representation node.
 
         arguments:
@@ -2661,6 +2670,8 @@ def create_xml(self,
            const_value (float, int or bool, optional): if present, create xml for a constant array filled with this value
            expand_const_arrays (boolean, default False): if True, the hdf5 write must also have been called with the
               same argument and the xml will treat a constant array as a normal array
+           pre_packed (boolean, default False): if True, the property is a boolean property and the array has already
+              been packed into bits
 
         returns:
            the newly created property xml node
@@ -2688,7 +2699,7 @@ def create_xml(self,
         direction = None if facet_type is None or facet_type != 'direction' else facet
 
         if self.support is not None:
-            pcxml._check_shape_list(self, indexable_element, direction, property_array, points, count)
+            pcxml._check_shape_list(self, indexable_element, direction, property_array, points, count, pre_packed)
 
         # todo: assertions:
         #    numpy data type matches discrete flag (and assumptions about precision)