Merge pull request #100 from jacanchaplais/feature/adj-leaves-99

Added leaves property to AdjacencyList #99
jacanchaplais · Feb 23, 2023 · 69c8eed · 69c8eed
2 parents 0d60ca4 + 474f2e0
commit 69c8eed
Show file tree

Hide file tree

Showing 2 changed files with 115 additions and 50 deletions.
diff --git a/graphicle/data.py b/graphicle/data.py
@@ -1753,6 +1753,19 @@ class AdjacencyList(base.AdjacencyBase):
         Scalar value embedded on each edge.
     matrix : ndarray
         Adjacency matrix representation.
+    leaves : MaskArray
+        Provides a mask for selecting the leaves of a DAG / tree.
+
+        .. versionadded:: 0.2.4
+    data : ndarray
+        Underlying array data. Identical to ``edges`` attribute,
+        included for consistency with ``base.ArrayBase`` numpy
+        interfaces.
+
+        .. versionadded:: 0.2.4
+
+    Methods
+    -------
     """
 
     _data: base.AnyVector = _array_field("<i4", 2)
@@ -1798,7 +1811,77 @@ def __getitem__(self, key) -> "AdjacencyList":
         return self.__class__(self._data[key])
 
     def copy(self) -> "AdjacencyList":
-        return deepcopy(self)
+        return self.__class__(self._data)
+
+    @fn.cached_property
+    def _edge_relabel(self) -> base.IntVector:
+        _, inv = np.unique(self._data, return_inverse=True)
+        return inv.reshape(-1, 2)
+
+    @fn.cached_property
+    def _sparse_signed(self) -> coo_array:
+        return self.to_sparse()
+
+    @fn.cached_property
+    def _sparse_unsigned(self) -> coo_array:
+        sparse_arr = self._sparse_signed.copy()
+        sparse_arr.data[...] = True
+        return sparse_arr
+
+    @property
+    def _sparse_weighted(self) -> coo_array:
+        sparse_arr = self._sparse_signed.copy()
+        sparse_arr.data = self.weights
+        return sparse_arr
+
+    @property
+    def data(self) -> base.VoidVector:
+        return self._data.view(self.dtype).reshape(-1)
+
+    @property
+    def edges(self) -> base.VoidVector:
+        return self.data
+
+    @property
+    def nodes(self) -> base.IntVector:
+        """Nodes are extracted from the edge list, and put in
+        ascending order of magnitude, regardless of sign.
+        Positive sign conventionally means final state particle.
+        """
+        unsort_nodes = np.unique(self._data)
+        sort_idxs = np.argsort(np.abs(unsort_nodes))
+        return unsort_nodes[sort_idxs]  # type: ignore
+
+    @property
+    def leaves(self) -> MaskArray:
+        """A mask to select the leaves of the graph."""
+        out_degree = self._sparse_unsigned.sum(axis=1)
+        zero_idxs = np.flatnonzero(out_degree == 0)
+        leaf_mask = np.in1d(self._sparse_unsigned.col, zero_idxs)
+        return MaskArray(leaf_mask)
+
+    @property
+    def matrix(self) -> ty.Union[base.DoubleVector, base.IntVector]:
+        """Exposes the adjacency as a dense matrix.
+
+        Notes
+        -----
+        For instances which have set ``weights`` attribute, the nonzero
+        output will be equal to the weights. Otherwise nonzero elements
+        will be an integer. For a single edge between two vertices, this
+        will be ``1``.
+
+        For both the weighted and unweighted case, if several edges
+        connect vertex pairs, their entries will be summed to produce
+        the dense matrix. This may cause loss of information.
+        """
+        if self.weights.size > 0:
+            adj = self._sparse_weighted
+        else:
+            adj = self._sparse_unsigned.copy()
+            adj.data = adj.data.astype("<i4")
+        adj.sum_duplicates()
+        return adj.todense(order="C")
 
     @classmethod
     def from_matrix(
@@ -1833,48 +1916,6 @@ def from_matrix(
             kwargs["weights"] = sps_adj.data
         return cls(**kwargs)
 
-    @property
-    def matrix(self) -> ty.Union[base.DoubleVector, base.IntVector]:
-        size = len(self.nodes)
-        if len(self.weights) > 0:
-            weights = self.weights
-            dtype = self.weights.dtype
-        else:
-            weights = np.array(1)
-            dtype = "<i4"
-        adj = np.zeros((size, size), dtype=dtype)
-        abs_edges = self._edge_relabel
-        adj[abs_edges[:, 0], abs_edges[:, 1]] = weights
-        return adj
-
-    @fn.cached_property
-    def _edge_relabel(self) -> base.IntVector:
-        _, inv = np.unique(self._data, return_inverse=True)
-        return inv.reshape(-1, 2)
-
-    @property
-    def data(self) -> base.VoidVector:
-        return self._data.view(self.dtype).reshape(-1)
-
-    @property
-    def edges(self) -> base.VoidVector:
-        return self.data
-
-    @property
-    def nodes(self) -> base.IntVector:
-        """Nodes are extracted from the edge list, and put in
-        ascending order of magnitude, regardless of sign.
-        Positive sign conventionally means final state particle.
-        """
-        # extract nodes from edge list
-        unsort_nodes = np.unique(self._data)
-        sort_idxs = np.argsort(np.abs(unsort_nodes))
-        return unsort_nodes[sort_idxs]  # type: ignore
-
-    @fn.cached_property
-    def _sparse(self) -> coo_array:
-        return self.to_sparse()
-
     def to_sparse(self, data: ty.Optional[base.AnyVector] = None) -> coo_array:
         """Converts the graph structure to a ``scipy.sparse.coo_array``
         instance.

diff --git a/graphicle/select.py b/graphicle/select.py
@@ -30,6 +30,7 @@
     "partition_descendants",
     "hadron_vertices",
     "fastjet_clusters",
+    "leaf_masks",
 ]
 
 
@@ -51,6 +52,9 @@ def fastjet_clusters(
 
     :group: select
 
+    .. versionadded:: 0.2.3
+       Migrated from ``graphicle.calculate.cluster_pmu()``.
+
     Parameters
     ----------
     pmu: gcl.MomentumArray
@@ -87,9 +91,6 @@ def fastjet_clusters(
 
     ``p_val`` set to ``-1`` gives anti-kT, ``0`` gives Cambridge-Aachen,
     and ``1`` gives kT clusterings.
-
-    .. versionadded:: 0.2.3
-    Migrated from ``graphicle.calculate.cluster_pmu()``.
     """
     pmu_pyjet = pmu.data[["e", "x", "y", "z"]]
     pmu_pyjet.dtype.names = "E", "px", "py", "pz"
@@ -126,6 +127,8 @@ def find_vertex(
 
     :group: select
 
+    .. versionadded:: 0.1.0
+
     Parameters
     ----------
     graph : Graphicle
@@ -200,6 +203,11 @@ def vertex_descendants(adj: gcl.AdjacencyList, vertex: int) -> gcl.MaskArray:
 
     :group: select
 
+    .. versionadded:: 0.1.0
+
+    .. versionchanged:: 0.1.11
+       Performance enhancements.
+
     Parameters
     ----------
     adj : AdjacencyList
@@ -213,10 +221,11 @@ def vertex_descendants(adj: gcl.AdjacencyList, vertex: int) -> gcl.MaskArray:
         Boolean mask over the graphicle objects associated with the
         passed AdjacencyList.
     """
-    bft = breadth_first_tree(adj._sparse, abs(vertex))
-    desc_nodes = (2 * bft.data.astype("<i4") - 1) * bft.indices
-    mask = np.isin(adj.edges["in"], desc_nodes)
-    mask[adj.edges["in"] == vertex] = True
+    sparse = adj._sparse_signed
+    vertex = sparse.row[vertex == adj.edges["in"]][0]
+    bft = breadth_first_tree(sparse, vertex)
+    mask = np.isin(sparse.row, bft.indices)
+    mask[sparse.row == vertex] = True  # include parent vertex
     return gcl.MaskArray(mask)
 
 
@@ -228,6 +237,8 @@ def hadron_vertices(
 
     :group: select
 
+    .. versionadded:: 0.1.11
+
     Parameters
     ----------
     adj : AdjacencyList
@@ -312,6 +323,8 @@ def partition_descendants(
 
     :group: select
 
+    .. versionadded:: 0.1.11
+
     Parameters
     ----------
     graph : Graphicle
@@ -430,6 +443,11 @@ def hard_descendants(
 
     :group: select
 
+    .. versionadded:: 0.1.0
+
+    .. versionchanged:: 0.1.11
+       Target parameter now optional.
+
     Parameters
     ----------
     graph : Graphicle
@@ -479,6 +497,8 @@ def hierarchy(
 
     :group: select
 
+    .. versionadded:: 0.1.11
+
     Parameters
     ----------
     graph : Graphicle
@@ -652,6 +672,8 @@ def leaf_masks(mask_tree: gcl.MaskGroup) -> gcl.MaskGroup:
 
     :group: select
 
+    .. versionadded:: 0.1.11
+
     Parameters
     ----------
     mask_tree : MaskGroup
@@ -675,6 +697,8 @@ def any_overlap(masks: gcl.MaskGroup) -> bool:
 
     :group: select
 
+    .. versionadded:: 0.1.0
+
     Parameters
     ----------
     masks : MaskGroup