Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added hierarchical masking #55

Merged
merged 30 commits into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
fbd1c9c
func for identifying hadronisation vertices #50
jacanchaplais Dec 9, 2022
31ab98d
added unbounded option to StatusArray.in_range #50
jacanchaplais Dec 9, 2022
6b1786d
patched typo
jacanchaplais Dec 9, 2022
95b8f83
Merge branch 'main' into feature/tag-50
jacanchaplais Dec 13, 2022
bfa119f
created algorithm for nested hierarchy masks #50
jacanchaplais Dec 16, 2022
65161db
sphinx docstring formatting #50
jacanchaplais Dec 16, 2022
df03980
added str displaying tree struc of MaskGroup #50
jacanchaplais Dec 16, 2022
5433ded
changed list to tuple #50
jacanchaplais Dec 16, 2022
c5806e0
added flatten method to MaskGroup #50
jacanchaplais Dec 16, 2022
190f6ef
documented methods and attributes of MaskGroup
jacanchaplais Dec 16, 2022
89c302a
nested structure preserving subscript for MaskGroup #50
jacanchaplais Dec 16, 2022
62d10d6
documented hierarchy #50
jacanchaplais Dec 16, 2022
f390039
added rich to build dependencies #50
jacanchaplais Dec 16, 2022
86b528c
pinned rich version in sphinx autobuild #50
jacanchaplais Dec 16, 2022
2f015f3
docstring formatting #50
jacanchaplais Dec 16, 2022
571c92c
sphinx formatting #50
jacanchaplais Dec 16, 2022
b1e4d94
more heterogeneous composite example #50
jacanchaplais Dec 16, 2022
b6db47b
split hierarchy func into more cohesive chunks #50
jacanchaplais Jan 3, 2023
10d998c
removed doc decorator interfering with type annotations
jacanchaplais Jan 5, 2023
1396cc1
import base for array types
jacanchaplais Jan 5, 2023
17f1020
Merge branch 'main' into feature/tag-50
jacanchaplais Jan 5, 2023
a3b5766
Merge branch 'main' into feature/tag-50
jacanchaplais Jan 5, 2023
d27f981
export new funcs #50
jacanchaplais Jan 5, 2023
e41aadc
Merge branch 'main' into feature/tag-50
jacanchaplais Jan 9, 2023
4629f99
Merge branch 'main' into feature/tag-50
jacanchaplais Jan 9, 2023
e93395e
fn to calc dist betw parton and hadron at hadron vtx #50
jacanchaplais Jan 19, 2023
c3a0fab
partition nested hierarchy masks by closest hard parton #50
jacanchaplais Jan 19, 2023
10c8364
Merge branch 'main' into feature/tag-50
jacanchaplais Jan 19, 2023
afd8c1a
improved cohesion of _make_tree, handled edge case of two hard partons
jacanchaplais Jan 19, 2023
554b61a
moved hard desc partons from parent latent into their own mask #50
jacanchaplais Jan 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dependencies:
- numpy=1.23.3
- scipy=1.9.3
- networkx=2.8.8
- rich=12.6.0
- pip:
- mcpid ==0.0.4
- typicle ==0.1.4
Expand Down
160 changes: 121 additions & 39 deletions graphicle/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from enum import Enum
from functools import cached_property
import warnings
from collections import abc
from collections import abc, OrderedDict
from typing import (
Tuple,
List,
Expand All @@ -64,6 +64,8 @@
from numpy.lib import recfunctions as rfn
from typicle import Types
from typicle.convert import cast_array
from rich.tree import Tree
from rich.console import Console

from . import base

Expand Down Expand Up @@ -284,14 +286,14 @@ def _mask_neq(mask1: base.MaskLike, mask2: base.MaskLike) -> MaskArray:
return MaskArray(np.not_equal(mask1, mask2))


_IN_MASK_DICT = Dict[str, Union[MaskArray, base.BoolVector]]
_MASK_DICT = Dict[str, MaskArray]
_IN_MASK_DICT = OrderedDict[str, Union[MaskArray, base.BoolVector]]
_MASK_DICT = OrderedDict[str, MaskArray]


def _mask_dict_convert(masks: _IN_MASK_DICT) -> _MASK_DICT:
out_masks = dict()
out_masks = OrderedDict()
for key, val in masks.items():
if isinstance(val, MaskArray):
if isinstance(val, MaskArray) or isinstance(val, MaskGroup):
mask = val
else:
mask = MaskArray(val)
Expand All @@ -308,7 +310,7 @@ class MaskGroup(base.MaskBase, abc.MutableMapping[str, base.MaskBase]):

Parameters
----------
mask_arrays : dict of MaskArrays or array-like objects
_mask_arrays : dict of MaskArrays or array-like objects
Dictionary of MaskArray objects to be composed.
agg_op : str or MaskAggOp
Defines the aggregation operation when accessing the `data`
Expand All @@ -319,6 +321,25 @@ class MaskGroup(base.MaskBase, abc.MutableMapping[str, base.MaskBase]):
----------
data : np.ndarray
Combination of all masks in group via bitwise AND reduction.
names : list[str]
Provides the string values of the keys to the top-level nested
``MaskBase`` objects as a list. Will be deprecated in future.
``MaskGroup.keys()`` is preferred.
bitwise_or : np.ndarray[bool_]
Bitwise ``OR`` reduction over the nested masks.
bitwise_or : np.ndarray[bool_]
Bitwise ``AND`` reduction over the nested masks.
dict : dict[base.MaskBase]
Masks nested in a dictionary instead of a ``MaskGroup``.

Methods
-------
from_numpy_structured()
Converts a structured boolean array into a ``MaskGroup``.
flatten()
Removes any nesting of ``MaskGroup``s within ``MaskGroup``s.
copy()
Copies the ``MaskGroup`` instance.
"""

_mask_arrays: _MASK_DICT = field(
Expand All @@ -328,27 +349,66 @@ class MaskGroup(base.MaskBase, abc.MutableMapping[str, base.MaskBase]):

@classmethod
def from_numpy_structured(cls, arr: np.ndarray) -> "MaskGroup":
return cls(dict(map(lambda name: (name, arr[name]), arr.dtype.names)))
return cls(
dict( # type: ignore
map(lambda name: (name, arr[name]), arr.dtype.names)
)
)

def __repr__(self) -> str:
keys = ", ".join(map(lambda name: '"' + name + '"', self.names))
return f"MaskGroup(mask_arrays=[{keys}], agg_op={self.agg_op.name})"
return f"MaskGroup(masks=[{keys}], agg_op={self.agg_op.name})"

def __rich__(self) -> Tree:
name = self.__class__.__name__
agg_op = self.agg_op.name
tree = Tree(f"{name}(agg_op=[yellow]{agg_op}[default])")

def make_tree(mask: "MaskGroup", branch: Tree) -> Tree:
branch_copy = deepcopy(branch)
for key, val in mask.items():
sub_branch = Tree(f"{key}")
if isinstance(val, MaskGroup):
sub_branch = sub_branch.add(make_tree(val, sub_branch))
branch_copy.add(sub_branch)
return branch_copy

return make_tree(self, tree)

def __str__(self) -> str:
console = Console(color_system=None)
with console.capture() as capture:
console.print(self)
return capture.get()

def __iter__(self) -> Iterator[str]:
return iter(self._mask_arrays)

def __getitem__(self, key) -> Union[MaskArray, "MaskGroup"]:
if not isinstance(key, str):
"""Subscripting for ``MaskGroup`` object.

Parameters
----------
key : str, list[str], slice, np.ndarray[bool_], base.MaskBase
If string, will return the ``MaskBase`` object associated
with a key of the same name. If list of strings, will return
a new ``MaskGroup`` with only the keys included in the list.
Otherwise will be treated as an array-like slice, returning
the ``MaskGroup`` whose components each individually have
the passed slice applied.
"""
agg = self.agg_op
if isinstance(key, list):
return self.__class__(
dict(
map(
lambda name_arr: (name_arr[0], name_arr[1][key]),
self._mask_arrays.items(),
)
)
OrderedDict({k: self._mask_arrays[k] for k in key}), agg
)

return self._mask_arrays[key]
elif not isinstance(key, str):
masked_data = OrderedDict()
for dict_key, val in self._mask_arrays.items():
masked_data[dict_key] = val[key]
return self.__class__(masked_data, agg)
else:
return self._mask_arrays[key]

def __setitem__(self, key, mask) -> None:
"""Add a new MaskArray to the group, with given key."""
Expand Down Expand Up @@ -446,6 +506,30 @@ def data(self) -> base.BoolVector:
def dict(self) -> Dict[str, base.BoolVector]:
return {key: val.data for key, val in self._mask_arrays.items()}

def flatten(self) -> "MaskGroup":
"""Removes nesting such that the ``MaskGroup`` contains only
``MaskArray``s, and no other ``MaskGroup``s.

Returns
-------
flat_masks : MaskGroup
``MaskGroup`` in which all sub-``MaskGroup``s are aggregated
and placed at the top level of the outer ``MaskGroup``,
along with the ``MaskArray``s from the innermost levels.
"""

def leaves(mask_group: "MaskGroup"):
for key, val in mask_group.items():
if key == "latent":
continue
if isinstance(val, type(self)):
yield key, val.data
yield from leaves(val)
else:
yield key, val

return self.__class__(dict(leaves(self)), "or") # type: ignore


############################
# PDG STORAGE AND QUERYING #
Expand Down Expand Up @@ -832,7 +916,7 @@ class HelicityArray(base.ArrayBase):
data: base.HalfIntVector = array_field("helicity")

def copy(self) -> "HelicityArray":
"""Returns a new StatusArray instance with same data."""
"""Returns a new HelicityArray instance with same data."""
return deepcopy(self)

def __getitem__(self, key) -> "HelicityArray":
Expand Down Expand Up @@ -897,20 +981,24 @@ def __bool__(self) -> bool:
return _truthy(self)

def in_range(
self, min_status: int, max_status: int, sign_sensitive: bool = False
self,
min_status: int = 0,
max_status: Optional[int] = None,
sign_sensitive: bool = False,
) -> MaskArray:
"""Returns a boolean mask over particles with status codes
sitting within passed (inclusive) range.

Parameters
----------
min_status : int
Minimum value for status codes.
max_status : int
Maximum value for status codes.
Minimum value for status codes. Default is 0.
max_status : int, optional
Maximum value for status codes. Passing ``None`` results in
unbounded upper range. Default is ``None``.
sign_sensitive : bool
Whether or not to take signs into account during the
comparison. (Default is False.)
comparison. Default is False.

Returns
-------
Expand All @@ -921,10 +1009,10 @@ def in_range(
array = self.data
if sign_sensitive is False:
array = np.abs(array)
elif sign_sensitive is not True:
raise ValueError("sign_sensitive must be boolean valued.")
more_than = array >= min_status # type: ignore
less_than = array <= max_status # type: ignore
more_than = array >= min_status
if max_status is None:
return MaskArray(more_than)
less_than = array <= max_status
return MaskArray(np.bitwise_and(more_than, less_than))

@property
Expand Down Expand Up @@ -1445,37 +1533,31 @@ def from_numpy(
adj_list = AdjacencyList()
return cls(particles=particles, adj=adj_list)

@property # type: ignore
@_attach_doc(PdgArray)
@property
def pdg(self) -> PdgArray:
return self.particles.pdg

@property # type: ignore
@_attach_doc(MomentumArray)
@property
def pmu(self) -> MomentumArray:
return self.particles.pmu

@property # type: ignore
@_attach_doc(ColorArray)
@property
def color(self) -> ColorArray:
return self.particles.color

@property # type: ignore
@_attach_doc(HelicityArray)
@property
def helicity(self) -> HelicityArray:
return self.particles.helicity

@property # type: ignore
@_attach_doc(StatusArray)
@property
def status(self) -> StatusArray:
return self.particles.status

@property
def hard_mask(self) -> MaskGroup:
return self.particles.status.hard_mask

@property # type: ignore
@_attach_doc(base.MaskBase)
@property
def final(self) -> base.MaskBase:
return self.particles.final

Expand Down
10 changes: 10 additions & 0 deletions graphicle/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,13 @@ def delta_R_aff(pmu: gcl.MomentumArray) -> base.DoubleVector:
parallel to the beam axis, __ie.__ with infinite pseudorapidity.
"""
return pmu.delta_R(pmu)


def parton_hadron_distance(
parton_pmu: gcl.MomentumArray,
hadron_pmu: gcl.MomentumArray,
pt_exp: float = -0.1,
) -> base.DoubleVector:
dR = parton_pmu.delta_R(hadron_pmu)
pt_weight = np.power(parton_pmu.pt, pt_exp)
return pt_weight[:, np.newaxis] * dR
Loading