From 6f4c35f1433a9da2d941a844ea5b6dceed9a4598 Mon Sep 17 00:00:00 2001 From: Maxime Lucas Date: Wed, 22 Mar 2023 18:07:47 +0100 Subject: [PATCH 1/6] perf: cleaner and faster incidence_matrix --- xgi/linalg/matrix.py | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/xgi/linalg/matrix.py b/xgi/linalg/matrix.py index cea0a2694..ff54dcfd7 100644 --- a/xgi/linalg/matrix.py +++ b/xgi/linalg/matrix.py @@ -110,34 +110,24 @@ def incidence_matrix( rowdict = {v: k for k, v in node_dict.items()} coldict = {v: k for k, v in edge_dict.items()} + # Compute the non-zero values, row and column indices for the given order + rows = [] + cols = [] + data = [] + for edge in edge_ids: + members = H._edge[edge] + for node in members: + rows.append(node_dict[node]) + cols.append(edge_dict[edge]) + data.append(weight(node, edge, H)) + + # Create the incidence matrix as a CSR matrix if sparse: - # Create csr sparse matrix - rows = [] - cols = [] - data = [] - for node in node_ids: - memberships = H.nodes.memberships(node) - # keep only those with right order - memberships = [i for i in memberships if i in edge_ids] - if len(memberships) > 0: - for edge in memberships: - data.append(weight(node, edge, H)) - rows.append(node_dict[node]) - cols.append(edge_dict[edge]) - else: # include disconnected nodes - for edge in edge_ids: - data.append(0) - rows.append(node_dict[node]) - cols.append(edge_dict[edge]) - I = csr_array((data, (rows, cols))) + I = csr_array((data, (rows, cols)), shape=(num_nodes, num_edges), dtype=int) else: - # Create an np.matrix I = np.zeros((num_nodes, num_edges), dtype=int) - for edge in edge_ids: - members = H.edges.members(edge) - for node in members: - I[node_dict[node], edge_dict[edge]] = weight(node, edge, H) - + I[rows, cols] = data + return (I, rowdict, coldict) if index else I From 893299effcf7816e9530dbf2a117a2de7685db69 Mon Sep 17 00:00:00 2001 From: Maxime Lucas Date: Wed, 22 Mar 2023 18:46:57 +0100 Subject: [PATCH 2/6] fix: return sparse (if asked) for empty H (sparse and non-sparse now have dim (0,0)). fixed corresponding tests. --- tests/linalg/test_matrix.py | 6 +++--- xgi/linalg/matrix.py | 20 +++++++++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/tests/linalg/test_matrix.py b/tests/linalg/test_matrix.py index 96b772980..95b7a0d17 100644 --- a/tests/linalg/test_matrix.py +++ b/tests/linalg/test_matrix.py @@ -579,13 +579,13 @@ def test_empty_order(edgelist6): H = xgi.Hypergraph(edgelist6) I, _, _ = xgi.incidence_matrix(H, order=1, index=True) A, _ = xgi.adjacency_matrix(H, order=1, index=True) - assert I.shape == (0,) + assert I.shape == (0, 0) assert A.shape == (5, 5) def test_empty(): H = xgi.Hypergraph([]) - assert xgi.incidence_matrix(H).shape == (0,) + assert xgi.incidence_matrix(H).shape == (0, 0) assert xgi.adjacency_matrix(H).shape == (0, 0) assert xgi.laplacian(H).shape == (0, 0) assert xgi.clique_motif_matrix(H).shape == (0, 0) @@ -593,7 +593,7 @@ def test_empty(): # with indices data = xgi.incidence_matrix(H, index=True) assert len(data) == 3 - assert data[0].shape == (0,) + assert data[0].shape == (0, 0) assert type(data[1]) == dict and type(data[2]) == dict data = xgi.adjacency_matrix(H, index=True) diff --git a/xgi/linalg/matrix.py b/xgi/linalg/matrix.py index ff54dcfd7..992e8bcac 100644 --- a/xgi/linalg/matrix.py +++ b/xgi/linalg/matrix.py @@ -98,7 +98,11 @@ def incidence_matrix( if order is not None: edge_ids = H.edges.filterby("order", order) if not edge_ids or not node_ids: - return (np.array([]), {}, {}) if index else np.array([]) + if sparse: + I = csr_array((0, 0), dtype=int) + else: + I = np.empty((0, 0), dtype=int) + return (I, {}, {}) if index else I num_edges = len(edge_ids) num_nodes = len(node_ids) @@ -159,11 +163,12 @@ def adjacency_matrix(H, order=None, sparse=True, s=1, weighted=False, index=Fals """ I, rowdict, coldict = incidence_matrix(H, order=order, sparse=sparse, index=True) - if I.shape == (0,): + if I.shape == (0, 0): if not rowdict: - A = np.array([]) + A = csr_array((0,0)) if sparse else np.empty((0,0)) if not coldict: - A = np.zeros((H.num_nodes, H.num_nodes)) + shape = (H.num_nodes, H.num_nodes) + A = csr_array(shape, dtype=int) if sparse else np.zeros(shape, dtype=int) return (A, {}) if index else A A = I.dot(I.T) @@ -236,7 +241,7 @@ def degree_matrix(H, order=None, index=False): """ I, rowdict, _ = incidence_matrix(H, order=order, index=True) - if I.shape == (0,): + if I.shape == (0, 0): K = np.zeros(H.num_nodes) else: K = np.ravel(np.sum(I, axis=1)) # flatten @@ -281,8 +286,9 @@ def laplacian(H, order=1, sparse=False, rescale_per_node=False, index=False): H, order=order, sparse=sparse, weighted=True, index=True ) - if A.shape == (0,): - return (np.array([]), {}) if index else np.array([]) + if A.shape == (0, 0): + L = csr_array((0,0)) if sparse else np.empty((0,0)) + return (L, {}) if index else L if sparse: K = csr_array(diags(degree_matrix(H, order=order))) From 3ac3a94f057b2430f85e238fe265b334ee7214e4 Mon Sep 17 00:00:00 2001 From: Maxime Lucas Date: Wed, 22 Mar 2023 19:20:29 +0100 Subject: [PATCH 3/6] tests: more to check sparse equals non sparse --- tests/linalg/test_matrix.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/tests/linalg/test_matrix.py b/tests/linalg/test_matrix.py index 95b7a0d17..9de61c062 100644 --- a/tests/linalg/test_matrix.py +++ b/tests/linalg/test_matrix.py @@ -242,11 +242,17 @@ def test_adjacency_matrix(edgelist1, edgelist4): assert A4[node_dict4[4], node_dict4[6]] == 0 A5 = xgi.adjacency_matrix(H1, sparse=False) + A5_sp = xgi.adjacency_matrix(H1, sparse=True) assert isinstance(A5, np.ndarray) - assert np.all(A5 == A1.todense()) + assert np.all(A5 == A5_sp.todense()) A6 = xgi.adjacency_matrix(H1, order=1, sparse=False) - assert np.all(A6 == A3.todense()) + A6_sp = xgi.adjacency_matrix(H1, order=1, sparse=True) + assert np.all(A6 == A6_sp.todense()) + + A7 = xgi.adjacency_matrix(H1, order=2, sparse=False) + A7_sp = xgi.adjacency_matrix(H1, order=2, sparse=True) + assert np.all(A7 == A7_sp.todense()) def test_laplacian(edgelist2, edgelist6): @@ -319,6 +325,19 @@ def test_laplacian(edgelist2, edgelist6): assert isinstance(L4, csr_array) assert np.all(L1 == L4.todense()) + L5 = xgi.laplacian(H1, sparse=False) + L5_sp = xgi.laplacian(H1, sparse=True) + assert isinstance(L5, np.ndarray) + assert np.all(L5 == L5_sp.todense()) + + L6 = xgi.laplacian(H1, order=1, sparse=False) + L6_sp = xgi.laplacian(H1, order=1, sparse=True) + assert np.all(L6 == L6_sp.todense()) + + L7 = xgi.laplacian(H1, order=2, sparse=False) + L7_sp = xgi.laplacian(H1, order=2, sparse=True) + assert np.all(L7 == L7_sp.todense()) + def test_multiorder_laplacian(edgelist2, edgelist6): el1 = edgelist6 @@ -610,3 +629,14 @@ def test_empty(): assert len(data) == 2 assert data[0].shape == (0, 0) assert type(data[1]) == dict + + # sparse + assert xgi.incidence_matrix(H, sparse=True).shape == (0, 0) + assert xgi.incidence_matrix(H, sparse=False).shape == (0, 0) + + assert xgi.adjacency_matrix(H, sparse=True).shape == (0, 0) + assert xgi.adjacency_matrix(H, sparse=False).shape == (0, 0) + + assert xgi.laplacian(H, sparse=True).shape == (0, 0) + assert xgi.laplacian(H, sparse=False).shape == (0, 0) + From 39c729945d27ccf0d8f054348243c1a8d16fbf94 Mon Sep 17 00:00:00 2001 From: Maxime Lucas Date: Wed, 22 Mar 2023 19:23:12 +0100 Subject: [PATCH 4/6] tests: more for sparse --- tests/linalg/test_matrix.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/linalg/test_matrix.py b/tests/linalg/test_matrix.py index 9de61c062..0ecc6c51e 100644 --- a/tests/linalg/test_matrix.py +++ b/tests/linalg/test_matrix.py @@ -596,10 +596,20 @@ def test_boundary_matrix(edgelist4): def test_empty_order(edgelist6): H = xgi.Hypergraph(edgelist6) - I, _, _ = xgi.incidence_matrix(H, order=1, index=True) - A, _ = xgi.adjacency_matrix(H, order=1, index=True) + I, _, _ = xgi.incidence_matrix(H, order=1, sparse=False, index=True) + A, _ = xgi.adjacency_matrix(H, order=1, sparse=False, index=True) + L, _ = xgi.laplacian(H, order=1, sparse=False, index=True) assert I.shape == (0, 0) assert A.shape == (5, 5) + assert L.shape == (5, 5) + + # sparse + I_sp, _, _ = xgi.incidence_matrix(H, order=1, sparse=True, index=True) + A_sp, _ = xgi.adjacency_matrix(H, order=1, sparse=True, index=True) + L_sp, _ = xgi.laplacian(H, order=1, sparse=True, index=True) + assert I_sp.shape == (0, 0) + assert A_sp.shape == (5, 5) + assert L_sp.shape == (5, 5) def test_empty(): From fe3db5fe6ec67a79236dc8011ffd23081c61a10f Mon Sep 17 00:00:00 2001 From: Maxime Lucas Date: Wed, 22 Mar 2023 20:37:20 +0100 Subject: [PATCH 5/6] perf: made multiorder_laplacian truly sparse and faster #301 --- xgi/linalg/matrix.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/xgi/linalg/matrix.py b/xgi/linalg/matrix.py index 992e8bcac..47e90566b 100644 --- a/xgi/linalg/matrix.py +++ b/xgi/linalg/matrix.py @@ -345,12 +345,15 @@ def multiorder_laplacian( raise ValueError("orders and weights must have the same length.") Ls = [ - laplacian(H, order=i, sparse=False, rescale_per_node=rescale_per_node) - for i in orders + laplacian(H, order=d, sparse=sparse, rescale_per_node=rescale_per_node) + for d in orders ] - Ks = [degree_matrix(H, order=i) for i in orders] + Ks = [degree_matrix(H, order=d) for d in orders] - L_multi = np.zeros((H.num_nodes, H.num_nodes)) + if sparse: + L_multi = csr_array((H.num_nodes, H.num_nodes)) + else: + L_multi = np.zeros((H.num_nodes, H.num_nodes)) for L, K, w, d in zip(Ls, Ks, weights, orders): if np.all(K == 0): @@ -362,10 +365,8 @@ def multiorder_laplacian( else: L_multi += L * w / np.mean(K) - if sparse: - L_multi = csr_array(L_multi) + rowdict = {i: v for i, v in enumerate(H.nodes)} - rowdict = dict(zip(range(H.num_nodes), H.nodes)) return (L_multi, rowdict) if index else L_multi From 53e481858a50638cb245f1b0e0992fefecb79cf2 Mon Sep 17 00:00:00 2001 From: Maxime Lucas Date: Wed, 22 Mar 2023 22:11:37 +0100 Subject: [PATCH 6/6] style: ran isort and black --- tests/drawing/test_layout.py | 4 ++-- tests/generators/test_nonuniform.py | 2 +- tests/linalg/test_matrix.py | 3 +-- xgi/algorithms/centrality.py | 2 +- xgi/convert.py | 28 ++++++++-------------------- xgi/linalg/matrix.py | 12 ++++++------ 6 files changed, 19 insertions(+), 32 deletions(-) diff --git a/tests/drawing/test_layout.py b/tests/drawing/test_layout.py index 0929b3bb2..1e97e4e95 100644 --- a/tests/drawing/test_layout.py +++ b/tests/drawing/test_layout.py @@ -1,6 +1,5 @@ -import pytest - import numpy as np +import pytest import xgi from xgi.exception import XGIError @@ -82,6 +81,7 @@ def test_barycenter_spring_layout(hypergraph1): pos = xgi.barycenter_spring_layout(H) assert len(pos) == H.num_nodes + def test_weighted_barycenter_spring_layout(hypergraph1): H = xgi.random_hypergraph(10, [0.2], seed=1) diff --git a/tests/generators/test_nonuniform.py b/tests/generators/test_nonuniform.py index d2350db57..e7246d6fe 100644 --- a/tests/generators/test_nonuniform.py +++ b/tests/generators/test_nonuniform.py @@ -1,7 +1,7 @@ -import pytest import random import numpy as np +import pytest import xgi diff --git a/tests/linalg/test_matrix.py b/tests/linalg/test_matrix.py index 0ecc6c51e..f9a593c33 100644 --- a/tests/linalg/test_matrix.py +++ b/tests/linalg/test_matrix.py @@ -640,7 +640,7 @@ def test_empty(): assert data[0].shape == (0, 0) assert type(data[1]) == dict - # sparse + # sparse assert xgi.incidence_matrix(H, sparse=True).shape == (0, 0) assert xgi.incidence_matrix(H, sparse=False).shape == (0, 0) @@ -649,4 +649,3 @@ def test_empty(): assert xgi.laplacian(H, sparse=True).shape == (0, 0) assert xgi.laplacian(H, sparse=False).shape == (0, 0) - diff --git a/xgi/algorithms/centrality.py b/xgi/algorithms/centrality.py index baa62cdb0..6618a1219 100644 --- a/xgi/algorithms/centrality.py +++ b/xgi/algorithms/centrality.py @@ -281,7 +281,7 @@ def line_vector_centrality(H): vc = {node: [] for node in H.nodes} edge_label_dict = {tuple(edge): index for index, edge in H._edge.items()} - + hyperedge_dims = {tuple(edge): len(edge) for edge in H.edges.members()} D = H.edges.size.max() diff --git a/xgi/convert.py b/xgi/convert.py index 481359e73..1a5a55328 100644 --- a/xgi/convert.py +++ b/xgi/convert.py @@ -5,23 +5,11 @@ import pandas as pd from networkx.algorithms import bipartite from numpy import matrix, ndarray -from scipy.sparse import ( - coo_array, - coo_matrix, - csc_array, - csc_matrix, - csr_array, - csr_matrix, - lil_array, - lil_matrix, -) - -from .classes import ( - Hypergraph, - SimplicialComplex, - maximal_simplices, - set_edge_attributes, -) +from scipy.sparse import (coo_array, coo_matrix, csc_array, csc_matrix, + csr_array, csr_matrix, lil_array, lil_matrix) + +from .classes import (Hypergraph, SimplicialComplex, maximal_simplices, + set_edge_attributes) from .exception import XGIError from .generators import empty_hypergraph, empty_simplicial_complex from .linalg import adjacency_matrix, incidence_matrix @@ -136,7 +124,7 @@ def convert_to_graph(H): def convert_to_line_graph(H): """Line graph of the hypergraph. - + The line graph of the hypergraph `H` is the graph whose nodes correspond to each hyperedge in `H`, linked together if they share at least one vertex. @@ -155,7 +143,7 @@ def convert_to_line_graph(H): LG = nx.Graph() edge_label_dict = {tuple(edge): index for index, edge in H._edge.items()} - + nodes = sorted(set(edge_label_dict.values())) LG.add_nodes_from(nodes) @@ -492,7 +480,7 @@ def from_simplicial_complex_to_hypergraph(SC): max_simplices = maximal_simplices(SC) H = Hypergraph() - H.add_nodes_from(SC.nodes) # to keep node order and isolated nodes + H.add_nodes_from(SC.nodes) # to keep node order and isolated nodes H.add_edges_from([list(SC.edges.members(e)) for e in max_simplices]) return H diff --git a/xgi/linalg/matrix.py b/xgi/linalg/matrix.py index 47e90566b..b4db2fbb0 100644 --- a/xgi/linalg/matrix.py +++ b/xgi/linalg/matrix.py @@ -98,9 +98,9 @@ def incidence_matrix( if order is not None: edge_ids = H.edges.filterby("order", order) if not edge_ids or not node_ids: - if sparse: + if sparse: I = csr_array((0, 0), dtype=int) - else: + else: I = np.empty((0, 0), dtype=int) return (I, {}, {}) if index else I @@ -124,14 +124,14 @@ def incidence_matrix( rows.append(node_dict[node]) cols.append(edge_dict[edge]) data.append(weight(node, edge, H)) - + # Create the incidence matrix as a CSR matrix if sparse: I = csr_array((data, (rows, cols)), shape=(num_nodes, num_edges), dtype=int) else: I = np.zeros((num_nodes, num_edges), dtype=int) I[rows, cols] = data - + return (I, rowdict, coldict) if index else I @@ -165,7 +165,7 @@ def adjacency_matrix(H, order=None, sparse=True, s=1, weighted=False, index=Fals if I.shape == (0, 0): if not rowdict: - A = csr_array((0,0)) if sparse else np.empty((0,0)) + A = csr_array((0, 0)) if sparse else np.empty((0, 0)) if not coldict: shape = (H.num_nodes, H.num_nodes) A = csr_array(shape, dtype=int) if sparse else np.zeros(shape, dtype=int) @@ -287,7 +287,7 @@ def laplacian(H, order=1, sparse=False, rescale_per_node=False, index=False): ) if A.shape == (0, 0): - L = csr_array((0,0)) if sparse else np.empty((0,0)) + L = csr_array((0, 0)) if sparse else np.empty((0, 0)) return (L, {}) if index else L if sparse: