Skip to content

Commit

Permalink
Merge pull request #845 from bp/grid_surface_wrapper_packing
Browse files Browse the repository at this point in the history
handling pre-packed arrays in property collection
  • Loading branch information
andy-beer authored Oct 29, 2024
2 parents f7bfd31 + b5c68d5 commit f63314c
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 54 deletions.
83 changes: 43 additions & 40 deletions resqpy/multi_processing/wrappers/grid_surface_mp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,32 @@
import resqpy.olio.uuid as bu


def find_faces_to_represent_surface_regular_wrapper(
index: int,
parent_tmp_dir: str,
use_index_as_realisation: bool,
grid_epc: str,
grid_uuid: Union[UUID, str],
surface_epc: str,
surface_uuid: Union[UUID, str],
name: str,
title: Optional[str] = None,
agitate: bool = False,
random_agitation: bool = False,
feature_type: str = 'fault',
trimmed: bool = False,
is_curtain = False,
extend_fault_representation: bool = False,
flange_inner_ring = False,
saucer_parameter = None,
retriangulate: bool = False,
related_uuid = None,
progress_fn: Optional[Callable] = None,
extra_metadata = None,
return_properties: Optional[List[str]] = None,
raw_bisector: bool = False,
use_pack: bool = False,
flange_radius = None) -> Tuple[int, bool, str, List[Union[UUID, str]]]:
def find_faces_to_represent_surface_regular_wrapper(index: int,
parent_tmp_dir: str,
use_index_as_realisation: bool,
grid_epc: str,
grid_uuid: Union[UUID, str],
surface_epc: str,
surface_uuid: Union[UUID, str],
name: str,
title: Optional[str] = None,
agitate: bool = False,
random_agitation: bool = False,
feature_type: str = 'fault',
trimmed: bool = False,
is_curtain = False,
extend_fault_representation: bool = False,
flange_inner_ring = False,
saucer_parameter = None,
retriangulate: bool = False,
related_uuid = None,
progress_fn: Optional[Callable] = None,
extra_metadata = None,
return_properties: Optional[List[str]] = None,
raw_bisector: bool = False,
use_pack: bool = False,
flange_radius = None,
n_threads = 20) -> Tuple[int, bool, str, List[Union[UUID, str]]]:
"""Multiprocessing wrapper function of find_faces_to_represent_surface_regular_optimised.
arguments:
Expand Down Expand Up @@ -92,10 +92,11 @@ def find_faces_to_represent_surface_regular_wrapper(
the returned dictionary has the passed strings as keys and numpy arrays as values
raw_bisector (bool, default False): if True and grid bisector is requested then it is left in a raw
form without assessing which side is shallower (True values indicate same side as origin cell)
use_pack (bool, default False): if True, boolean properties will be stored in numpy packed format,
which will only be readable by resqpy based applications
use_pack (bool, default False): if True, boolean properties will be generated and stored in numpy
packed format, which will only be readable by resqpy based applications
flange_radius (float, optional): the radial distance to use for outer flange extension points; if None,
a large value will be calculated from the grid size; units are xy units of grid crs
n_threads (int, default 20): the number of parallel threads to use in numba points in triangles function
returns:
Tuple containing:
Expand Down Expand Up @@ -250,7 +251,9 @@ def find_faces_to_represent_surface_regular_wrapper(
is_curtain,
progress_fn,
return_properties,
raw_bisector = raw_bisector)
raw_bisector = raw_bisector,
n_batches = n_threads,
packed_bisectors = use_pack)

success = False

Expand Down Expand Up @@ -340,17 +343,17 @@ def find_faces_to_represent_surface_regular_wrapper(
if grid_pc is None:
grid_pc = rqp.PropertyCollection()
grid_pc.set_support(support = grid)
grid_pc.add_cached_array_to_imported_list(
array,
f"from find_faces function for {surface.title}",
f'{surface.title} {p_name}',
discrete = True,
property_kind = "grid bisector",
facet_type = 'direction',
facet = 'raw' if raw_bisector else ('vertical' if is_curtain else 'sloping'),
realization = realisation,
indexable_element = "columns" if is_curtain else "cells",
)
grid_pc.add_cached_array_to_imported_list(array,
f"from find_faces function for {surface.title}",
f'{surface.title} {p_name}',
discrete = True,
property_kind = "grid bisector",
facet_type = 'direction',
facet = 'raw' if raw_bisector else
('vertical' if is_curtain else 'sloping'),
realization = realisation,
indexable_element = "columns" if is_curtain else "cells",
pre_packed = use_pack)
elif p_name == 'grid shadow':
if grid_pc is None:
grid_pc = rqp.PropertyCollection()
Expand Down
2 changes: 1 addition & 1 deletion resqpy/olio/write_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def register_dataset(self,
assert chunks is None or isinstance(chunks, str) or isinstance(chunks, tuple)
assert compression is None or (isinstance(compression, str) and compression in ['gzip', 'lzf', 'none'])
if str(dtype) == 'pack':
a = np.packbits(a, axis = -1) # todo: check this returns uint8 array
a = np.packbits(a, axis = -1)
dtype = 'uint8'
elif dtype is not None:
a = a.astype(dtype, copy = copy)
Expand Down
7 changes: 4 additions & 3 deletions resqpy/property/_collection_add_part.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def _process_imported_property(collection, attributes, property_kind_uuid, strin
extra_metadata, expand_const_arrays):
(p_uuid, p_file_name, p_keyword, p_cached_name, p_discrete, p_uom, p_time_index, p_null_value, p_min_value,
p_max_value, property_kind, facet_type, facet, realization, indexable_element, count, local_property_kind_uuid,
const_value, points, p_time_series_uuid, p_string_lookup_uuid) = attributes
const_value, points, p_time_series_uuid, p_string_lookup_uuid, pre_packed) = attributes

log.debug('processing imported property ' + str(p_keyword))
assert not points or not p_discrete
Expand All @@ -214,7 +214,7 @@ def _process_imported_property(collection, attributes, property_kind_uuid, strin
p_keyword, p_discrete, string_lookup_uuid, points)

p_array = _process_imported_property_get_p_array(collection, p_cached_name)
p_array_bool = isinstance(const_value, bool) if p_array is None else p_array.dtype in [bool, np.int8]
p_array_bool = isinstance(const_value, bool) if p_array is None else p_array.dtype in [bool, np.int8, np.uint8]

add_min_max = pcga._process_imported_property_get_add_min_max(points, property_kind, string_lookup_uuid,
local_property_kind_uuid, p_array_bool)
Expand Down Expand Up @@ -251,7 +251,8 @@ def _process_imported_property(collection, attributes, property_kind_uuid, strin
find_local_property_kinds = find_local_property_kinds,
extra_metadata = extra_metadata,
const_value = const_value,
expand_const_arrays = expand_const_arrays)
expand_const_arrays = expand_const_arrays,
pre_packed = pre_packed)
if p_node is not None:
return p_node
else:
Expand Down
4 changes: 3 additions & 1 deletion resqpy/property/_collection_create_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,10 @@ def _create_xml_facet_node(facet_type, facet, p_node):
facet_value_node.text = facet


def _check_shape_list(collection, indexable_element, direction, property_array, points, count):
def _check_shape_list(collection, indexable_element, direction, property_array, points, count, pre_packed):
shape_list = collection.supporting_shape(indexable_element = indexable_element, direction = direction)
if pre_packed:
shape_list[-1] = (shape_list[-1] - 1) // 8 + 1
if shape_list is not None:
if count > 1:
shape_list.append(count)
Expand Down
29 changes: 20 additions & 9 deletions resqpy/property/property_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def __init__(self, support = None, property_set_root = None, realization = None)
# above is list of (uuid, source, keyword, cached_name, discrete, uom, time_index, null_value,
# min_value, max_value, property_kind, facet_type, facet, realization,
# indexable_element, count, local_property_kind_uuid, const_value, points,
# time_series_uuid, string_lookup_uuid)
# time_series_uuid, string_lookup_uuid, pre_packed)
self.guess_warning = False
if support is not None:
self.model = support.model
Expand Down Expand Up @@ -404,7 +404,8 @@ def add_to_imported_list_sampling_other_collection(self, other, flattened_indice
call this method once for each group of differently sized properties; for very large collections
it might also be necessary to divide the work into smaller groups to reduce memory usage;
this method does not write to hdf5 nor create xml – use the usual methods for further processing
of the imported list
of the imported list;
does not currently support packed arrays
"""

source = 'sampled'
Expand Down Expand Up @@ -2219,7 +2220,8 @@ def add_cached_array_to_imported_list(self,
const_value = None,
points = False,
time_series_uuid = None,
string_lookup_uuid = None):
string_lookup_uuid = None,
pre_packed = False):
"""Caches array and adds to the list of imported properties (but not to the collection dict).
arguments:
Expand Down Expand Up @@ -2250,6 +2252,7 @@ def add_cached_array_to_imported_list(self,
be provided when writing hdf5 and creating xml for the imported list
string_lookup_uuid (UUID, optional): should be provided for categorical properties, though can alternatively
be specified when creating xml
pre_packed (bool, default False): set to True if the property is boolean and the array is already packed
returns:
uuid of nascent property object
Expand All @@ -2271,6 +2274,7 @@ def add_cached_array_to_imported_list(self,
assert (cached_array is not None and const_value is None) or (cached_array is None and const_value is not None)
assert not points or not discrete
assert count > 0
assert (not pre_packed) or ((cached_array is not None) and (cached_array.dtype == np.uint8))
rqp_c.check_and_warn_property_kind(property_kind, 'adding property to imported list')

if self.imported_list is None:
Expand All @@ -2288,7 +2292,7 @@ def add_cached_array_to_imported_list(self,
self.imported_list.append(
(uuid, source_info, keyword, cached_name, discrete, uom, time_index, null_value, min_value, max_value,
property_kind, facet_type, facet, realization, indexable_element, count, local_property_kind_uuid,
const_value, points, time_series_uuid, string_lookup_uuid))
const_value, points, time_series_uuid, string_lookup_uuid, pre_packed))
return uuid

def add_similar_to_imported_list(self,
Expand All @@ -2311,6 +2315,7 @@ def add_similar_to_imported_list(self,
points = None,
time_series_uuid = None,
string_lookup_uuid = None,
pre_packed = False,
similar_model = None,
title = None):
"""Caches array and adds to the list of imported properties using default metadata from a similar property.
Expand Down Expand Up @@ -2342,6 +2347,7 @@ def add_similar_to_imported_list(self,
be provided when writing hdf5 and creating xml for the imported list
string_lookup_uuid (UUID, optional): should be provided for categorical properties, though can alternatively
be specified when creating xml
pre_packed (bool, default False): set to True if the property is boolean and the cached array is packed
similar_model (Model, optional): the model where the similar property resides, if not the same as this
property collection
title (str, optional): synonym for keyword argument
Expand Down Expand Up @@ -2398,6 +2404,7 @@ def get_arg(supplied, similarly):
args['string_lookup_uuid'] = get_arg(time_series_uuid, similar.string_lookup_uuid())
em = similar.extra_metadata if hasattr(similar, 'extra_metadata') else {}
args['source_info'] = get_arg(source_info, em.get('source'))
args['pre_packed'] = pre_packed

return self.add_cached_array_to_imported_list(cached_array, **args)

Expand Down Expand Up @@ -2445,7 +2452,8 @@ def write_hdf5_for_imported_list(self,
as 32 bit; if None, the system default is to write as 32 bit; if True, 32 bit is used; if
False, 64 bit data is written; ignored if dtype is not None
use_pack (bool, default False): if True, bool arrays will be packed along their last axis; this
will generally result in hdf5 data that is not readable by non-resqpy applications
will generally result in hdf5 data that is not readable by non-resqpy applications; leave
as False for already packed arrays
chunks (str, optional): if not None, one of 'auto', 'all', or 'slice', controlling hdf5 chunks
compression (str, optional): if not None, one of 'gzip' or 'lzf' being the hdf5 compression
algorithm to be used; gzip gives better compression ratio but is slower
Expand Down Expand Up @@ -2473,8 +2481,8 @@ def write_hdf5_for_imported_list(self,
uuid = entry[0]
cached_name = entry[3]
tail = 'points_patch0' if entry[18] else 'values_patch0'
if use_pack and (str(dtype).startswith('bool') or
(dtype is None and str(self.__dict__[cached_name].dtype) == 'bool')):
if use_pack and ('bool' in str(dtype) or
(dtype is None and 'bool' in str(self.__dict__[cached_name].dtype))):
dtype = 'pack'
h5_reg.register_dataset(uuid, tail, self.__dict__[cached_name], dtype = dtype)
h5_reg.write(file = file_name, mode = mode, use_int32 = use_int32)
Expand Down Expand Up @@ -2601,7 +2609,8 @@ def create_xml(self,
points = False,
extra_metadata = {},
const_value = None,
expand_const_arrays = False):
expand_const_arrays = False,
pre_packed = False):
"""Create a property xml node for a single property related to a given supporting representation node.
arguments:
Expand Down Expand Up @@ -2661,6 +2670,8 @@ def create_xml(self,
const_value (float, int or bool, optional): if present, create xml for a constant array filled with this value
expand_const_arrays (boolean, default False): if True, the hdf5 write must also have been called with the
same argument and the xml will treat a constant array as a normal array
pre_packed (boolean, default False): if True, the property is a boolean property and the array has already
been packed into bits
returns:
the newly created property xml node
Expand Down Expand Up @@ -2688,7 +2699,7 @@ def create_xml(self,
direction = None if facet_type is None or facet_type != 'direction' else facet

if self.support is not None:
pcxml._check_shape_list(self, indexable_element, direction, property_array, points, count)
pcxml._check_shape_list(self, indexable_element, direction, property_array, points, count, pre_packed)

# todo: assertions:
# numpy data type matches discrete flag (and assumptions about precision)
Expand Down

0 comments on commit f63314c

Please sign in to comment.