Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed and memory improvements in linalg #303

Merged
merged 6 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tests/drawing/test_layout.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pytest

import numpy as np
import pytest

import xgi
from xgi.exception import XGIError
Expand Down Expand Up @@ -82,6 +81,7 @@ def test_barycenter_spring_layout(hypergraph1):
pos = xgi.barycenter_spring_layout(H)
assert len(pos) == H.num_nodes


def test_weighted_barycenter_spring_layout(hypergraph1):

H = xgi.random_hypergraph(10, [0.2], seed=1)
Expand Down
2 changes: 1 addition & 1 deletion tests/generators/test_nonuniform.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
import random

import numpy as np
import pytest

import xgi

Expand Down
53 changes: 46 additions & 7 deletions tests/linalg/test_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,17 @@ def test_adjacency_matrix(edgelist1, edgelist4):
assert A4[node_dict4[4], node_dict4[6]] == 0

A5 = xgi.adjacency_matrix(H1, sparse=False)
A5_sp = xgi.adjacency_matrix(H1, sparse=True)
assert isinstance(A5, np.ndarray)
assert np.all(A5 == A1.todense())
assert np.all(A5 == A5_sp.todense())

A6 = xgi.adjacency_matrix(H1, order=1, sparse=False)
assert np.all(A6 == A3.todense())
A6_sp = xgi.adjacency_matrix(H1, order=1, sparse=True)
assert np.all(A6 == A6_sp.todense())

A7 = xgi.adjacency_matrix(H1, order=2, sparse=False)
A7_sp = xgi.adjacency_matrix(H1, order=2, sparse=True)
assert np.all(A7 == A7_sp.todense())


def test_laplacian(edgelist2, edgelist6):
Expand Down Expand Up @@ -319,6 +325,19 @@ def test_laplacian(edgelist2, edgelist6):
assert isinstance(L4, csr_array)
assert np.all(L1 == L4.todense())

L5 = xgi.laplacian(H1, sparse=False)
L5_sp = xgi.laplacian(H1, sparse=True)
assert isinstance(L5, np.ndarray)
assert np.all(L5 == L5_sp.todense())

L6 = xgi.laplacian(H1, order=1, sparse=False)
L6_sp = xgi.laplacian(H1, order=1, sparse=True)
assert np.all(L6 == L6_sp.todense())

L7 = xgi.laplacian(H1, order=2, sparse=False)
L7_sp = xgi.laplacian(H1, order=2, sparse=True)
assert np.all(L7 == L7_sp.todense())


def test_multiorder_laplacian(edgelist2, edgelist6):
el1 = edgelist6
Expand Down Expand Up @@ -577,23 +596,33 @@ def test_boundary_matrix(edgelist4):

def test_empty_order(edgelist6):
H = xgi.Hypergraph(edgelist6)
I, _, _ = xgi.incidence_matrix(H, order=1, index=True)
A, _ = xgi.adjacency_matrix(H, order=1, index=True)
assert I.shape == (0,)
I, _, _ = xgi.incidence_matrix(H, order=1, sparse=False, index=True)
A, _ = xgi.adjacency_matrix(H, order=1, sparse=False, index=True)
L, _ = xgi.laplacian(H, order=1, sparse=False, index=True)
assert I.shape == (0, 0)
assert A.shape == (5, 5)
assert L.shape == (5, 5)

# sparse
I_sp, _, _ = xgi.incidence_matrix(H, order=1, sparse=True, index=True)
A_sp, _ = xgi.adjacency_matrix(H, order=1, sparse=True, index=True)
L_sp, _ = xgi.laplacian(H, order=1, sparse=True, index=True)
assert I_sp.shape == (0, 0)
assert A_sp.shape == (5, 5)
assert L_sp.shape == (5, 5)


def test_empty():
H = xgi.Hypergraph([])
assert xgi.incidence_matrix(H).shape == (0,)
assert xgi.incidence_matrix(H).shape == (0, 0)
assert xgi.adjacency_matrix(H).shape == (0, 0)
assert xgi.laplacian(H).shape == (0, 0)
assert xgi.clique_motif_matrix(H).shape == (0, 0)

# with indices
data = xgi.incidence_matrix(H, index=True)
assert len(data) == 3
assert data[0].shape == (0,)
assert data[0].shape == (0, 0)
assert type(data[1]) == dict and type(data[2]) == dict

data = xgi.adjacency_matrix(H, index=True)
Expand All @@ -610,3 +639,13 @@ def test_empty():
assert len(data) == 2
assert data[0].shape == (0, 0)
assert type(data[1]) == dict

# sparse
assert xgi.incidence_matrix(H, sparse=True).shape == (0, 0)
assert xgi.incidence_matrix(H, sparse=False).shape == (0, 0)

assert xgi.adjacency_matrix(H, sparse=True).shape == (0, 0)
assert xgi.adjacency_matrix(H, sparse=False).shape == (0, 0)

assert xgi.laplacian(H, sparse=True).shape == (0, 0)
assert xgi.laplacian(H, sparse=False).shape == (0, 0)
2 changes: 1 addition & 1 deletion xgi/algorithms/centrality.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def line_vector_centrality(H):
vc = {node: [] for node in H.nodes}

edge_label_dict = {tuple(edge): index for index, edge in H._edge.items()}

hyperedge_dims = {tuple(edge): len(edge) for edge in H.edges.members()}

D = H.edges.size.max()
Expand Down
28 changes: 8 additions & 20 deletions xgi/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,11 @@
import pandas as pd
from networkx.algorithms import bipartite
from numpy import matrix, ndarray
from scipy.sparse import (
coo_array,
coo_matrix,
csc_array,
csc_matrix,
csr_array,
csr_matrix,
lil_array,
lil_matrix,
)

from .classes import (
Hypergraph,
SimplicialComplex,
maximal_simplices,
set_edge_attributes,
)
from scipy.sparse import (coo_array, coo_matrix, csc_array, csc_matrix,
csr_array, csr_matrix, lil_array, lil_matrix)

from .classes import (Hypergraph, SimplicialComplex, maximal_simplices,
set_edge_attributes)
from .exception import XGIError
from .generators import empty_hypergraph, empty_simplicial_complex
from .linalg import adjacency_matrix, incidence_matrix
Expand Down Expand Up @@ -136,7 +124,7 @@ def convert_to_graph(H):

def convert_to_line_graph(H):
"""Line graph of the hypergraph.

The line graph of the hypergraph `H` is the graph whose
nodes correspond to each hyperedge in `H`, linked together
if they share at least one vertex.
Expand All @@ -155,7 +143,7 @@ def convert_to_line_graph(H):
LG = nx.Graph()

edge_label_dict = {tuple(edge): index for index, edge in H._edge.items()}

nodes = sorted(set(edge_label_dict.values()))
LG.add_nodes_from(nodes)

Expand Down Expand Up @@ -492,7 +480,7 @@ def from_simplicial_complex_to_hypergraph(SC):

max_simplices = maximal_simplices(SC)
H = Hypergraph()
H.add_nodes_from(SC.nodes) # to keep node order and isolated nodes
H.add_nodes_from(SC.nodes) # to keep node order and isolated nodes
H.add_edges_from([list(SC.edges.members(e)) for e in max_simplices])
return H

Expand Down
73 changes: 35 additions & 38 deletions xgi/linalg/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,11 @@ def incidence_matrix(
if order is not None:
edge_ids = H.edges.filterby("order", order)
if not edge_ids or not node_ids:
return (np.array([]), {}, {}) if index else np.array([])
if sparse:
I = csr_array((0, 0), dtype=int)
else:
I = np.empty((0, 0), dtype=int)
return (I, {}, {}) if index else I

num_edges = len(edge_ids)
num_nodes = len(node_ids)
Expand All @@ -110,33 +114,23 @@ def incidence_matrix(
rowdict = {v: k for k, v in node_dict.items()}
coldict = {v: k for k, v in edge_dict.items()}

# Compute the non-zero values, row and column indices for the given order
rows = []
cols = []
data = []
for edge in edge_ids:
members = H._edge[edge]
for node in members:
rows.append(node_dict[node])
cols.append(edge_dict[edge])
data.append(weight(node, edge, H))

# Create the incidence matrix as a CSR matrix
if sparse:
# Create csr sparse matrix
rows = []
cols = []
data = []
for node in node_ids:
memberships = H.nodes.memberships(node)
# keep only those with right order
memberships = [i for i in memberships if i in edge_ids]
if len(memberships) > 0:
for edge in memberships:
data.append(weight(node, edge, H))
rows.append(node_dict[node])
cols.append(edge_dict[edge])
else: # include disconnected nodes
for edge in edge_ids:
data.append(0)
rows.append(node_dict[node])
cols.append(edge_dict[edge])
I = csr_array((data, (rows, cols)))
I = csr_array((data, (rows, cols)), shape=(num_nodes, num_edges), dtype=int)
else:
# Create an np.matrix
I = np.zeros((num_nodes, num_edges), dtype=int)
for edge in edge_ids:
members = H.edges.members(edge)
for node in members:
I[node_dict[node], edge_dict[edge]] = weight(node, edge, H)
I[rows, cols] = data

return (I, rowdict, coldict) if index else I

Expand Down Expand Up @@ -169,11 +163,12 @@ def adjacency_matrix(H, order=None, sparse=True, s=1, weighted=False, index=Fals
"""
I, rowdict, coldict = incidence_matrix(H, order=order, sparse=sparse, index=True)

if I.shape == (0,):
if I.shape == (0, 0):
if not rowdict:
A = np.array([])
A = csr_array((0, 0)) if sparse else np.empty((0, 0))
if not coldict:
A = np.zeros((H.num_nodes, H.num_nodes))
shape = (H.num_nodes, H.num_nodes)
A = csr_array(shape, dtype=int) if sparse else np.zeros(shape, dtype=int)
return (A, {}) if index else A

A = I.dot(I.T)
Expand Down Expand Up @@ -246,7 +241,7 @@ def degree_matrix(H, order=None, index=False):
"""
I, rowdict, _ = incidence_matrix(H, order=order, index=True)

if I.shape == (0,):
if I.shape == (0, 0):
K = np.zeros(H.num_nodes)
else:
K = np.ravel(np.sum(I, axis=1)) # flatten
Expand Down Expand Up @@ -291,8 +286,9 @@ def laplacian(H, order=1, sparse=False, rescale_per_node=False, index=False):
H, order=order, sparse=sparse, weighted=True, index=True
)

if A.shape == (0,):
return (np.array([]), {}) if index else np.array([])
if A.shape == (0, 0):
L = csr_array((0, 0)) if sparse else np.empty((0, 0))
return (L, {}) if index else L

if sparse:
K = csr_array(diags(degree_matrix(H, order=order)))
Expand Down Expand Up @@ -349,12 +345,15 @@ def multiorder_laplacian(
raise ValueError("orders and weights must have the same length.")

Ls = [
laplacian(H, order=i, sparse=False, rescale_per_node=rescale_per_node)
for i in orders
laplacian(H, order=d, sparse=sparse, rescale_per_node=rescale_per_node)
for d in orders
]
Ks = [degree_matrix(H, order=i) for i in orders]
Ks = [degree_matrix(H, order=d) for d in orders]

L_multi = np.zeros((H.num_nodes, H.num_nodes))
if sparse:
L_multi = csr_array((H.num_nodes, H.num_nodes))
else:
L_multi = np.zeros((H.num_nodes, H.num_nodes))

for L, K, w, d in zip(Ls, Ks, weights, orders):
if np.all(K == 0):
Expand All @@ -366,10 +365,8 @@ def multiorder_laplacian(
else:
L_multi += L * w / np.mean(K)

if sparse:
L_multi = csr_array(L_multi)
rowdict = {i: v for i, v in enumerate(H.nodes)}

rowdict = dict(zip(range(H.num_nodes), H.nodes))
return (L_multi, rowdict) if index else L_multi


Expand Down