From 6f4c35f1433a9da2d941a844ea5b6dceed9a4598 Mon Sep 17 00:00:00 2001
From: Maxime Lucas <ml.maximelucas@gmail.com>
Date: Wed, 22 Mar 2023 18:07:47 +0100
Subject: [PATCH 1/6] perf: cleaner and faster incidence_matrix

---
 xgi/linalg/matrix.py | 40 +++++++++++++++-------------------------
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/xgi/linalg/matrix.py b/xgi/linalg/matrix.py
index cea0a2694..ff54dcfd7 100644
--- a/xgi/linalg/matrix.py
+++ b/xgi/linalg/matrix.py
@@ -110,34 +110,24 @@ def incidence_matrix(
         rowdict = {v: k for k, v in node_dict.items()}
         coldict = {v: k for k, v in edge_dict.items()}
 
+    # Compute the non-zero values, row and column indices for the given order
+    rows = []
+    cols = []
+    data = []
+    for edge in edge_ids:
+        members = H._edge[edge]
+        for node in members:
+            rows.append(node_dict[node])
+            cols.append(edge_dict[edge])
+            data.append(weight(node, edge, H))
+    
+    # Create the incidence matrix as a CSR matrix
     if sparse:
-        # Create csr sparse matrix
-        rows = []
-        cols = []
-        data = []
-        for node in node_ids:
-            memberships = H.nodes.memberships(node)
-            # keep only those with right order
-            memberships = [i for i in memberships if i in edge_ids]
-            if len(memberships) > 0:
-                for edge in memberships:
-                    data.append(weight(node, edge, H))
-                    rows.append(node_dict[node])
-                    cols.append(edge_dict[edge])
-            else:  # include disconnected nodes
-                for edge in edge_ids:
-                    data.append(0)
-                    rows.append(node_dict[node])
-                    cols.append(edge_dict[edge])
-        I = csr_array((data, (rows, cols)))
+        I = csr_array((data, (rows, cols)), shape=(num_nodes, num_edges), dtype=int)
     else:
-        # Create an np.matrix
         I = np.zeros((num_nodes, num_edges), dtype=int)
-        for edge in edge_ids:
-            members = H.edges.members(edge)
-            for node in members:
-                I[node_dict[node], edge_dict[edge]] = weight(node, edge, H)
-
+        I[rows, cols] = data
+    
     return (I, rowdict, coldict) if index else I
 
 

From 893299effcf7816e9530dbf2a117a2de7685db69 Mon Sep 17 00:00:00 2001
From: Maxime Lucas <ml.maximelucas@gmail.com>
Date: Wed, 22 Mar 2023 18:46:57 +0100
Subject: [PATCH 2/6] fix: return sparse (if asked) for empty H (sparse and
 non-sparse now have dim (0,0)). fixed corresponding tests.

---
 tests/linalg/test_matrix.py |  6 +++---
 xgi/linalg/matrix.py        | 20 +++++++++++++-------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/tests/linalg/test_matrix.py b/tests/linalg/test_matrix.py
index 96b772980..95b7a0d17 100644
--- a/tests/linalg/test_matrix.py
+++ b/tests/linalg/test_matrix.py
@@ -579,13 +579,13 @@ def test_empty_order(edgelist6):
     H = xgi.Hypergraph(edgelist6)
     I, _, _ = xgi.incidence_matrix(H, order=1, index=True)
     A, _ = xgi.adjacency_matrix(H, order=1, index=True)
-    assert I.shape == (0,)
+    assert I.shape == (0, 0)
     assert A.shape == (5, 5)
 
 
 def test_empty():
     H = xgi.Hypergraph([])
-    assert xgi.incidence_matrix(H).shape == (0,)
+    assert xgi.incidence_matrix(H).shape == (0, 0)
     assert xgi.adjacency_matrix(H).shape == (0, 0)
     assert xgi.laplacian(H).shape == (0, 0)
     assert xgi.clique_motif_matrix(H).shape == (0, 0)
@@ -593,7 +593,7 @@ def test_empty():
     # with indices
     data = xgi.incidence_matrix(H, index=True)
     assert len(data) == 3
-    assert data[0].shape == (0,)
+    assert data[0].shape == (0, 0)
     assert type(data[1]) == dict and type(data[2]) == dict
 
     data = xgi.adjacency_matrix(H, index=True)
diff --git a/xgi/linalg/matrix.py b/xgi/linalg/matrix.py
index ff54dcfd7..992e8bcac 100644
--- a/xgi/linalg/matrix.py
+++ b/xgi/linalg/matrix.py
@@ -98,7 +98,11 @@ def incidence_matrix(
     if order is not None:
         edge_ids = H.edges.filterby("order", order)
     if not edge_ids or not node_ids:
-        return (np.array([]), {}, {}) if index else np.array([])
+        if sparse: 
+            I = csr_array((0, 0), dtype=int)
+        else: 
+            I = np.empty((0, 0), dtype=int)
+        return (I, {}, {}) if index else I
 
     num_edges = len(edge_ids)
     num_nodes = len(node_ids)
@@ -159,11 +163,12 @@ def adjacency_matrix(H, order=None, sparse=True, s=1, weighted=False, index=Fals
     """
     I, rowdict, coldict = incidence_matrix(H, order=order, sparse=sparse, index=True)
 
-    if I.shape == (0,):
+    if I.shape == (0, 0):
         if not rowdict:
-            A = np.array([])
+            A = csr_array((0,0)) if sparse else np.empty((0,0))
         if not coldict:
-            A = np.zeros((H.num_nodes, H.num_nodes))
+            shape = (H.num_nodes, H.num_nodes)
+            A = csr_array(shape, dtype=int) if sparse else np.zeros(shape, dtype=int)
         return (A, {}) if index else A
 
     A = I.dot(I.T)
@@ -236,7 +241,7 @@ def degree_matrix(H, order=None, index=False):
     """
     I, rowdict, _ = incidence_matrix(H, order=order, index=True)
 
-    if I.shape == (0,):
+    if I.shape == (0, 0):
         K = np.zeros(H.num_nodes)
     else:
         K = np.ravel(np.sum(I, axis=1))  # flatten
@@ -281,8 +286,9 @@ def laplacian(H, order=1, sparse=False, rescale_per_node=False, index=False):
         H, order=order, sparse=sparse, weighted=True, index=True
     )
 
-    if A.shape == (0,):
-        return (np.array([]), {}) if index else np.array([])
+    if A.shape == (0, 0):
+        L = csr_array((0,0)) if sparse else np.empty((0,0))
+        return (L, {}) if index else L
 
     if sparse:
         K = csr_array(diags(degree_matrix(H, order=order)))

From 3ac3a94f057b2430f85e238fe265b334ee7214e4 Mon Sep 17 00:00:00 2001
From: Maxime Lucas <ml.maximelucas@gmail.com>
Date: Wed, 22 Mar 2023 19:20:29 +0100
Subject: [PATCH 3/6] tests: more to check sparse equals non sparse

---
 tests/linalg/test_matrix.py | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/tests/linalg/test_matrix.py b/tests/linalg/test_matrix.py
index 95b7a0d17..9de61c062 100644
--- a/tests/linalg/test_matrix.py
+++ b/tests/linalg/test_matrix.py
@@ -242,11 +242,17 @@ def test_adjacency_matrix(edgelist1, edgelist4):
     assert A4[node_dict4[4], node_dict4[6]] == 0
 
     A5 = xgi.adjacency_matrix(H1, sparse=False)
+    A5_sp = xgi.adjacency_matrix(H1, sparse=True)
     assert isinstance(A5, np.ndarray)
-    assert np.all(A5 == A1.todense())
+    assert np.all(A5 == A5_sp.todense())
 
     A6 = xgi.adjacency_matrix(H1, order=1, sparse=False)
-    assert np.all(A6 == A3.todense())
+    A6_sp = xgi.adjacency_matrix(H1, order=1, sparse=True)
+    assert np.all(A6 == A6_sp.todense())
+
+    A7 = xgi.adjacency_matrix(H1, order=2, sparse=False)
+    A7_sp = xgi.adjacency_matrix(H1, order=2, sparse=True)
+    assert np.all(A7 == A7_sp.todense())
 
 
 def test_laplacian(edgelist2, edgelist6):
@@ -319,6 +325,19 @@ def test_laplacian(edgelist2, edgelist6):
     assert isinstance(L4, csr_array)
     assert np.all(L1 == L4.todense())
 
+    L5 = xgi.laplacian(H1, sparse=False)
+    L5_sp = xgi.laplacian(H1, sparse=True)
+    assert isinstance(L5, np.ndarray)
+    assert np.all(L5 == L5_sp.todense())
+
+    L6 = xgi.laplacian(H1, order=1, sparse=False)
+    L6_sp = xgi.laplacian(H1, order=1, sparse=True)
+    assert np.all(L6 == L6_sp.todense())
+
+    L7 = xgi.laplacian(H1, order=2, sparse=False)
+    L7_sp = xgi.laplacian(H1, order=2, sparse=True)
+    assert np.all(L7 == L7_sp.todense())
+
 
 def test_multiorder_laplacian(edgelist2, edgelist6):
     el1 = edgelist6
@@ -610,3 +629,14 @@ def test_empty():
     assert len(data) == 2
     assert data[0].shape == (0, 0)
     assert type(data[1]) == dict
+
+    # sparse 
+    assert xgi.incidence_matrix(H, sparse=True).shape == (0, 0)
+    assert xgi.incidence_matrix(H, sparse=False).shape == (0, 0)
+
+    assert xgi.adjacency_matrix(H, sparse=True).shape == (0, 0)
+    assert xgi.adjacency_matrix(H, sparse=False).shape == (0, 0)
+
+    assert xgi.laplacian(H, sparse=True).shape == (0, 0)
+    assert xgi.laplacian(H, sparse=False).shape == (0, 0)
+

From 39c729945d27ccf0d8f054348243c1a8d16fbf94 Mon Sep 17 00:00:00 2001
From: Maxime Lucas <ml.maximelucas@gmail.com>
Date: Wed, 22 Mar 2023 19:23:12 +0100
Subject: [PATCH 4/6] tests: more for sparse

---
 tests/linalg/test_matrix.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tests/linalg/test_matrix.py b/tests/linalg/test_matrix.py
index 9de61c062..0ecc6c51e 100644
--- a/tests/linalg/test_matrix.py
+++ b/tests/linalg/test_matrix.py
@@ -596,10 +596,20 @@ def test_boundary_matrix(edgelist4):
 
 def test_empty_order(edgelist6):
     H = xgi.Hypergraph(edgelist6)
-    I, _, _ = xgi.incidence_matrix(H, order=1, index=True)
-    A, _ = xgi.adjacency_matrix(H, order=1, index=True)
+    I, _, _ = xgi.incidence_matrix(H, order=1, sparse=False, index=True)
+    A, _ = xgi.adjacency_matrix(H, order=1, sparse=False, index=True)
+    L, _ = xgi.laplacian(H, order=1, sparse=False, index=True)
     assert I.shape == (0, 0)
     assert A.shape == (5, 5)
+    assert L.shape == (5, 5)
+
+    # sparse
+    I_sp, _, _ = xgi.incidence_matrix(H, order=1, sparse=True, index=True)
+    A_sp, _ = xgi.adjacency_matrix(H, order=1, sparse=True, index=True)
+    L_sp, _ = xgi.laplacian(H, order=1, sparse=True, index=True)
+    assert I_sp.shape == (0, 0)
+    assert A_sp.shape == (5, 5)
+    assert L_sp.shape == (5, 5)
 
 
 def test_empty():

From fe3db5fe6ec67a79236dc8011ffd23081c61a10f Mon Sep 17 00:00:00 2001
From: Maxime Lucas <ml.maximelucas@gmail.com>
Date: Wed, 22 Mar 2023 20:37:20 +0100
Subject: [PATCH 5/6] perf: made multiorder_laplacian truly sparse and faster
 #301

---
 xgi/linalg/matrix.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/xgi/linalg/matrix.py b/xgi/linalg/matrix.py
index 992e8bcac..47e90566b 100644
--- a/xgi/linalg/matrix.py
+++ b/xgi/linalg/matrix.py
@@ -345,12 +345,15 @@ def multiorder_laplacian(
         raise ValueError("orders and weights must have the same length.")
 
     Ls = [
-        laplacian(H, order=i, sparse=False, rescale_per_node=rescale_per_node)
-        for i in orders
+        laplacian(H, order=d, sparse=sparse, rescale_per_node=rescale_per_node)
+        for d in orders
     ]
-    Ks = [degree_matrix(H, order=i) for i in orders]
+    Ks = [degree_matrix(H, order=d) for d in orders]
 
-    L_multi = np.zeros((H.num_nodes, H.num_nodes))
+    if sparse:
+        L_multi = csr_array((H.num_nodes, H.num_nodes))
+    else:
+        L_multi = np.zeros((H.num_nodes, H.num_nodes))
 
     for L, K, w, d in zip(Ls, Ks, weights, orders):
         if np.all(K == 0):
@@ -362,10 +365,8 @@ def multiorder_laplacian(
         else:
             L_multi += L * w / np.mean(K)
 
-    if sparse:
-        L_multi = csr_array(L_multi)
+    rowdict = {i: v for i, v in enumerate(H.nodes)}
 
-    rowdict = dict(zip(range(H.num_nodes), H.nodes))
     return (L_multi, rowdict) if index else L_multi
 
 

From 53e481858a50638cb245f1b0e0992fefecb79cf2 Mon Sep 17 00:00:00 2001
From: Maxime Lucas <ml.maximelucas@gmail.com>
Date: Wed, 22 Mar 2023 22:11:37 +0100
Subject: [PATCH 6/6] style: ran isort and black

---
 tests/drawing/test_layout.py        |  4 ++--
 tests/generators/test_nonuniform.py |  2 +-
 tests/linalg/test_matrix.py         |  3 +--
 xgi/algorithms/centrality.py        |  2 +-
 xgi/convert.py                      | 28 ++++++++--------------------
 xgi/linalg/matrix.py                | 12 ++++++------
 6 files changed, 19 insertions(+), 32 deletions(-)

diff --git a/tests/drawing/test_layout.py b/tests/drawing/test_layout.py
index 0929b3bb2..1e97e4e95 100644
--- a/tests/drawing/test_layout.py
+++ b/tests/drawing/test_layout.py
@@ -1,6 +1,5 @@
-import pytest
-
 import numpy as np
+import pytest
 
 import xgi
 from xgi.exception import XGIError
@@ -82,6 +81,7 @@ def test_barycenter_spring_layout(hypergraph1):
     pos = xgi.barycenter_spring_layout(H)
     assert len(pos) == H.num_nodes
 
+
 def test_weighted_barycenter_spring_layout(hypergraph1):
 
     H = xgi.random_hypergraph(10, [0.2], seed=1)
diff --git a/tests/generators/test_nonuniform.py b/tests/generators/test_nonuniform.py
index d2350db57..e7246d6fe 100644
--- a/tests/generators/test_nonuniform.py
+++ b/tests/generators/test_nonuniform.py
@@ -1,7 +1,7 @@
-import pytest
 import random
 
 import numpy as np
+import pytest
 
 import xgi
 
diff --git a/tests/linalg/test_matrix.py b/tests/linalg/test_matrix.py
index 0ecc6c51e..f9a593c33 100644
--- a/tests/linalg/test_matrix.py
+++ b/tests/linalg/test_matrix.py
@@ -640,7 +640,7 @@ def test_empty():
     assert data[0].shape == (0, 0)
     assert type(data[1]) == dict
 
-    # sparse 
+    # sparse
     assert xgi.incidence_matrix(H, sparse=True).shape == (0, 0)
     assert xgi.incidence_matrix(H, sparse=False).shape == (0, 0)
 
@@ -649,4 +649,3 @@ def test_empty():
 
     assert xgi.laplacian(H, sparse=True).shape == (0, 0)
     assert xgi.laplacian(H, sparse=False).shape == (0, 0)
-
diff --git a/xgi/algorithms/centrality.py b/xgi/algorithms/centrality.py
index baa62cdb0..6618a1219 100644
--- a/xgi/algorithms/centrality.py
+++ b/xgi/algorithms/centrality.py
@@ -281,7 +281,7 @@ def line_vector_centrality(H):
     vc = {node: [] for node in H.nodes}
 
     edge_label_dict = {tuple(edge): index for index, edge in H._edge.items()}
-    
+
     hyperedge_dims = {tuple(edge): len(edge) for edge in H.edges.members()}
 
     D = H.edges.size.max()
diff --git a/xgi/convert.py b/xgi/convert.py
index 481359e73..1a5a55328 100644
--- a/xgi/convert.py
+++ b/xgi/convert.py
@@ -5,23 +5,11 @@
 import pandas as pd
 from networkx.algorithms import bipartite
 from numpy import matrix, ndarray
-from scipy.sparse import (
-    coo_array,
-    coo_matrix,
-    csc_array,
-    csc_matrix,
-    csr_array,
-    csr_matrix,
-    lil_array,
-    lil_matrix,
-)
-
-from .classes import (
-    Hypergraph,
-    SimplicialComplex,
-    maximal_simplices,
-    set_edge_attributes,
-)
+from scipy.sparse import (coo_array, coo_matrix, csc_array, csc_matrix,
+                          csr_array, csr_matrix, lil_array, lil_matrix)
+
+from .classes import (Hypergraph, SimplicialComplex, maximal_simplices,
+                      set_edge_attributes)
 from .exception import XGIError
 from .generators import empty_hypergraph, empty_simplicial_complex
 from .linalg import adjacency_matrix, incidence_matrix
@@ -136,7 +124,7 @@ def convert_to_graph(H):
 
 def convert_to_line_graph(H):
     """Line graph of the hypergraph.
-    
+
     The line graph of the hypergraph `H` is the graph whose
     nodes correspond to each hyperedge in `H`, linked together
     if they share at least one vertex.
@@ -155,7 +143,7 @@ def convert_to_line_graph(H):
     LG = nx.Graph()
 
     edge_label_dict = {tuple(edge): index for index, edge in H._edge.items()}
-    
+
     nodes = sorted(set(edge_label_dict.values()))
     LG.add_nodes_from(nodes)
 
@@ -492,7 +480,7 @@ def from_simplicial_complex_to_hypergraph(SC):
 
     max_simplices = maximal_simplices(SC)
     H = Hypergraph()
-    H.add_nodes_from(SC.nodes) # to keep node order and isolated nodes
+    H.add_nodes_from(SC.nodes)  # to keep node order and isolated nodes
     H.add_edges_from([list(SC.edges.members(e)) for e in max_simplices])
     return H
 
diff --git a/xgi/linalg/matrix.py b/xgi/linalg/matrix.py
index 47e90566b..b4db2fbb0 100644
--- a/xgi/linalg/matrix.py
+++ b/xgi/linalg/matrix.py
@@ -98,9 +98,9 @@ def incidence_matrix(
     if order is not None:
         edge_ids = H.edges.filterby("order", order)
     if not edge_ids or not node_ids:
-        if sparse: 
+        if sparse:
             I = csr_array((0, 0), dtype=int)
-        else: 
+        else:
             I = np.empty((0, 0), dtype=int)
         return (I, {}, {}) if index else I
 
@@ -124,14 +124,14 @@ def incidence_matrix(
             rows.append(node_dict[node])
             cols.append(edge_dict[edge])
             data.append(weight(node, edge, H))
-    
+
     # Create the incidence matrix as a CSR matrix
     if sparse:
         I = csr_array((data, (rows, cols)), shape=(num_nodes, num_edges), dtype=int)
     else:
         I = np.zeros((num_nodes, num_edges), dtype=int)
         I[rows, cols] = data
-    
+
     return (I, rowdict, coldict) if index else I
 
 
@@ -165,7 +165,7 @@ def adjacency_matrix(H, order=None, sparse=True, s=1, weighted=False, index=Fals
 
     if I.shape == (0, 0):
         if not rowdict:
-            A = csr_array((0,0)) if sparse else np.empty((0,0))
+            A = csr_array((0, 0)) if sparse else np.empty((0, 0))
         if not coldict:
             shape = (H.num_nodes, H.num_nodes)
             A = csr_array(shape, dtype=int) if sparse else np.zeros(shape, dtype=int)
@@ -287,7 +287,7 @@ def laplacian(H, order=1, sparse=False, rescale_per_node=False, index=False):
     )
 
     if A.shape == (0, 0):
-        L = csr_array((0,0)) if sparse else np.empty((0,0))
+        L = csr_array((0, 0)) if sparse else np.empty((0, 0))
         return (L, {}) if index else L
 
     if sparse: