From 7ee8aa21aafbfb167d8016c9cd9b796c75e862d9 Mon Sep 17 00:00:00 2001 From: Nicholas Landry Date: Wed, 5 Apr 2023 13:54:58 -0400 Subject: [PATCH] Add clustering coefficient definitions (#316) * moved modules * add clustering code * added clustering functions * add tests * fixed tests * Update xgi/algorithms/clustering.py Co-authored-by: Maxime Lucas * updated centrality names * added more info on clustering * updated all names * fixed name * update docstrings * added tests * Update clustering.py * changed NaN to nan https://stackoverflow.com/questions/53436339/difference-between-np-nan-and-np-nan * response to review * change to isnan * fixed more tests --------- Co-authored-by: Maxime Lucas Co-authored-by: Leo Torres --- docs/source/api/algorithms.rst | 1 + .../algorithms/xgi.algorithms.centrality.rst | 4 +- .../algorithms/xgi.algorithms.clustering.rst | 12 + docs/source/api/linalg.rst | 2 +- .../{classes => linalg}/xgi.linalg.matrix.rst | 0 docs/source/api/stats/xgi.stats.nodestats.rst | 10 +- docs/source/api/utils.rst | 2 +- .../xgi.utils.utilities.rst | 0 tests/algorithms/test_centrality.py | 62 +++-- tests/algorithms/test_clustering.py | 125 +++++++++ tests/drawing/test_draw.py | 4 - tests/stats/test_nodestats.py | 66 ++++- tutorials/Tutorial 6 - Statistics.ipynb | 16 +- tutorials/quickstart.ipynb | 6 +- xgi/algorithms/__init__.py | 3 +- xgi/algorithms/centrality.py | 26 +- xgi/algorithms/clustering.py | 258 ++++++++++++++++++ xgi/stats/__init__.py | 52 ++-- xgi/stats/nodestats.py | 136 +++++++-- 19 files changed, 671 insertions(+), 114 deletions(-) create mode 100644 docs/source/api/algorithms/xgi.algorithms.clustering.rst rename docs/source/api/{classes => linalg}/xgi.linalg.matrix.rst (100%) rename docs/source/api/{classes => utils}/xgi.utils.utilities.rst (100%) create mode 100644 tests/algorithms/test_clustering.py create mode 100644 xgi/algorithms/clustering.py diff --git a/docs/source/api/algorithms.rst b/docs/source/api/algorithms.rst index 4c9b884a4..272894060 100644 --- a/docs/source/api/algorithms.rst +++ b/docs/source/api/algorithms.rst @@ -9,4 +9,5 @@ algorithms package ~xgi.algorithms.assortativity ~xgi.algorithms.centrality + ~xgi.algorithms.clustering ~xgi.algorithms.connected \ No newline at end of file diff --git a/docs/source/api/algorithms/xgi.algorithms.centrality.rst b/docs/source/api/algorithms/xgi.algorithms.centrality.rst index fff166299..87d120855 100644 --- a/docs/source/api/algorithms/xgi.algorithms.centrality.rst +++ b/docs/source/api/algorithms/xgi.algorithms.centrality.rst @@ -7,7 +7,7 @@ xgi.algorithms.centrality .. rubric:: Functions - .. autofunction:: CEC_centrality - .. autofunction:: HEC_centrality + .. autofunction:: clique_eigenvector_centrality + .. autofunction:: h_eigenvector_centrality .. autofunction:: node_edge_centrality .. autofunction:: line_vector_centrality diff --git a/docs/source/api/algorithms/xgi.algorithms.clustering.rst b/docs/source/api/algorithms/xgi.algorithms.clustering.rst new file mode 100644 index 000000000..f2e822ec0 --- /dev/null +++ b/docs/source/api/algorithms/xgi.algorithms.clustering.rst @@ -0,0 +1,12 @@ +xgi.algorithms.clustering +========================= + +.. currentmodule:: xgi.algorithms.clustering + +.. automodule:: xgi.algorithms.clustering + + .. rubric:: Functions + + .. autofunction:: clustering_coefficient + .. autofunction:: local_clustering_coefficient + .. autofunction:: two_node_clustering_coefficient \ No newline at end of file diff --git a/docs/source/api/linalg.rst b/docs/source/api/linalg.rst index 16a0562d6..c3e835ded 100644 --- a/docs/source/api/linalg.rst +++ b/docs/source/api/linalg.rst @@ -5,6 +5,6 @@ linalg package .. rubric:: Modules .. autosummary:: - :toctree: classes + :toctree: linalg ~xgi.linalg.matrix \ No newline at end of file diff --git a/docs/source/api/classes/xgi.linalg.matrix.rst b/docs/source/api/linalg/xgi.linalg.matrix.rst similarity index 100% rename from docs/source/api/classes/xgi.linalg.matrix.rst rename to docs/source/api/linalg/xgi.linalg.matrix.rst diff --git a/docs/source/api/stats/xgi.stats.nodestats.rst b/docs/source/api/stats/xgi.stats.nodestats.rst index 9e5052674..23f0a66e4 100644 --- a/docs/source/api/stats/xgi.stats.nodestats.rst +++ b/docs/source/api/stats/xgi.stats.nodestats.rst @@ -9,8 +9,10 @@ .. autofunction:: attrs .. autofunction:: average_neighbor_degree - .. autofunction:: cec_centrality - .. autofunction:: clustering + .. autofunction:: clique_eigenvector_centrality + .. autofunction:: clustering_coefficient .. autofunction:: degree - .. autofunction:: hec_centrality - .. autofunction:: node_edge_centrality \ No newline at end of file + .. autofunction:: h_eigenvector_centrality + .. autofunction:: local_clustering_coefficient + .. autofunction:: node_edge_centrality + .. autofunction:: two_node_clustering_coefficient \ No newline at end of file diff --git a/docs/source/api/utils.rst b/docs/source/api/utils.rst index 2a1161eaa..34bb4d8dc 100644 --- a/docs/source/api/utils.rst +++ b/docs/source/api/utils.rst @@ -5,6 +5,6 @@ utils package .. rubric:: Modules .. autosummary:: - :toctree: classes + :toctree: utils ~xgi.utils.utilities \ No newline at end of file diff --git a/docs/source/api/classes/xgi.utils.utilities.rst b/docs/source/api/utils/xgi.utils.utilities.rst similarity index 100% rename from docs/source/api/classes/xgi.utils.utilities.rst rename to docs/source/api/utils/xgi.utils.utilities.rst diff --git a/tests/algorithms/test_centrality.py b/tests/algorithms/test_centrality.py index 31c0bbd90..71d023fc3 100644 --- a/tests/algorithms/test_centrality.py +++ b/tests/algorithms/test_centrality.py @@ -6,71 +6,93 @@ from xgi.exception import XGIError -def test_cec_centrality(): +def test_clique_eigenvector_centrality(): # test empty hypergraph H = xgi.Hypergraph() - assert xgi.CEC_centrality(H) == dict() + assert xgi.clique_eigenvector_centrality(H) == dict() + # Test no edges H.add_nodes_from([0, 1, 2]) - assert xgi.CEC_centrality(H) == {0: np.NaN, 1: np.NaN, 2: np.NaN} + cec = xgi.clique_eigenvector_centrality(H) + assert set(cec) == {0, 1, 2} + for i in cec: + assert np.isnan(cec[i]) + # test disconnected H.add_edge([0, 1]) - assert xgi.CEC_centrality(H) == {0: np.NaN, 1: np.NaN, 2: np.NaN} + cec = xgi.clique_eigenvector_centrality(H) + assert set(cec) == {0, 1, 2} + for i in cec: + assert np.isnan(cec[i]) H = xgi.sunflower(3, 1, 3) - c = H.nodes.cec_centrality.asnumpy() + c = H.nodes.clique_eigenvector_centrality.asnumpy() assert norm(c[1:] - c[1]) < 1e-4 assert abs(c[0] / c[1] - ratio(3, 3, kind="CEC")) < 1e-4 H = xgi.sunflower(5, 1, 7) - c = H.nodes.cec_centrality.asnumpy() + c = H.nodes.clique_eigenvector_centrality.asnumpy() assert norm(c[1:] - c[1]) < 1e-4 assert abs(c[0] / c[1] - ratio(5, 7, kind="CEC")) < 1e-4 @pytest.mark.slow -def test_hec_centrality(): +def test_h_eigenvector_centrality(): # test empty hypergraph H = xgi.Hypergraph() - c = xgi.HEC_centrality(H) + c = xgi.h_eigenvector_centrality(H) assert c == dict() + # Test no edges H.add_nodes_from([0, 1, 2]) - c = xgi.HEC_centrality(H) - assert c == {0: np.NaN, 1: np.NaN, 2: np.NaN} + hec = xgi.h_eigenvector_centrality(H) + for i in hec: + assert np.isnan(hec[i]) + # test disconnected H.add_edge([0, 1]) - c = xgi.HEC_centrality(H) - assert c == {0: np.NaN, 1: np.NaN, 2: np.NaN} + hec = xgi.h_eigenvector_centrality(H) + assert set(hec) == {0, 1, 2} + for i in hec: + assert np.isnan(hec[i]) H = xgi.sunflower(3, 1, 5) - c = H.nodes.hec_centrality(max_iter=1000).asnumpy() + c = H.nodes.h_eigenvector_centrality(max_iter=1000).asnumpy() assert norm(c[1:] - c[1]) < 1e-4 assert abs(c[0] / c[1] - ratio(3, 5, kind="HEC")) < 1e-4 H = xgi.sunflower(5, 1, 7) - c = H.nodes.hec_centrality(max_iter=1000).asnumpy() + c = H.nodes.h_eigenvector_centrality(max_iter=1000).asnumpy() assert norm(c[1:] - c[1]) < 1e-4 assert abs(c[0] / c[1] - ratio(5, 7, kind="HEC")) < 1e-4 with pytest.raises(XGIError): H = xgi.Hypergraph([[1, 2], [2, 3, 4]]) - H.nodes.hec_centrality.asnumpy() + H.nodes.h_eigenvector_centrality.asnumpy() def test_node_edge_centrality(): # test empty hypergraph H = xgi.Hypergraph() assert xgi.node_edge_centrality(H) == (dict(), dict()) + # Test no edges H.add_nodes_from([0, 1, 2]) - assert xgi.node_edge_centrality(H) == ({0: np.NaN, 1: np.NaN, 2: np.NaN}, dict()) + nc, ec = xgi.node_edge_centrality(H) + assert set(nc) == {0, 1, 2} + for i in nc: + assert np.isnan(nc[i]) + assert ec == dict() + # test disconnected H.add_edge([0, 1]) - assert xgi.node_edge_centrality(H) == ( - {0: np.NaN, 1: np.NaN, 2: np.NaN}, - {0: np.NaN}, - ) + nc, ec = xgi.node_edge_centrality(H) + assert set(nc) == {0, 1, 2} + for i in nc: + assert np.isnan(nc[i]) + assert set(ec) == {0} + for i in ec: + assert np.isnan(ec[i]) H = xgi.Hypergraph([[0, 1, 2, 3, 4]]) c = H.nodes.node_edge_centrality.asnumpy() diff --git a/tests/algorithms/test_clustering.py b/tests/algorithms/test_clustering.py new file mode 100644 index 000000000..f74c30bcb --- /dev/null +++ b/tests/algorithms/test_clustering.py @@ -0,0 +1,125 @@ +import numpy as np +import pytest + +import xgi +from xgi.exception import XGIError + + +def test_local_clustering_coefficient(edgelist8): + H = xgi.random_hypergraph(3, [1]) + + cc = xgi.local_clustering_coefficient(H) + true_cc = {0: 1.0, 1: 1.0, 2: 1.0} + assert cc == true_cc + + H = xgi.random_hypergraph(3, [1, 1]) + cc = xgi.local_clustering_coefficient(H) + true_cc = {0: 1.0, 1: 1.0, 2: 1.0} + assert cc == true_cc + + H = xgi.random_hypergraph(3, [0, 1]) + cc = xgi.local_clustering_coefficient(H) + true_cc = {0: 0.0, 1: 0.0, 2: 0.0} + assert cc == true_cc + + H = xgi.Hypergraph() + cc = xgi.local_clustering_coefficient(H) + assert cc == {} + + H = xgi.Hypergraph() + H.add_nodes_from(range(3)) + cc = xgi.local_clustering_coefficient(H) + assert cc == {0: 0, 1: 0, 2: 0} + + H = xgi.Hypergraph(edgelist8) + cc = xgi.local_clustering_coefficient(H) + true_cc = { + 0: 0.6777777777777778, + 1: 0.575, + 2: 0.3333333333333333, + 3: 0.3333333333333333, + 4: 0.6666666666666666, + 5: 0.0, + 6: 0.0, + } + assert cc == true_cc + + +def test_clustering_coefficient(edgelist1): + H = xgi.random_hypergraph(3, [1]) + + cc = xgi.clustering_coefficient(H) + true_cc = {0: 1.0, 1: 1.0, 2: 1.0} + assert cc == true_cc + + H = xgi.random_hypergraph(3, [1, 1]) + cc = xgi.clustering_coefficient(H) + true_cc = {0: 1.0, 1: 1.0, 2: 1.0} + assert cc == true_cc + + H = xgi.random_hypergraph(3, [0, 1]) + cc = xgi.clustering_coefficient(H) + true_cc = {0: 1.0, 1: 1.0, 2: 1.0} + assert cc == true_cc + + H = xgi.Hypergraph() + cc = xgi.clustering_coefficient(H) + assert cc == {} + + H = xgi.Hypergraph() + H.add_nodes_from(range(3)) + cc = xgi.clustering_coefficient(H) + assert {0: 0, 1: 0, 2: 0} + + H = xgi.Hypergraph(edgelist1) + cc = xgi.clustering_coefficient(H) + true_cc = {1: 1.0, 2: 1.0, 3: 1.0, 4: 0, 5: 0, 6: 1 / 3, 8: 1.0, 7: 1.0} + assert cc == true_cc + + +def test_two_node_clustering_coefficient(edgelist1, edgelist8): + H = xgi.random_hypergraph(3, [1]) + + cc = xgi.two_node_clustering_coefficient(H) + true_cc = {0: 1 / 3, 1: 1 / 3, 2: 1 / 3} + assert cc == true_cc + + # check default keyword + cc1 = xgi.two_node_clustering_coefficient(H, kind="union") + assert cc == cc1 + + H = xgi.random_hypergraph(3, [1, 1]) + cc = xgi.two_node_clustering_coefficient(H) + true_cc = {0: 0.5, 1: 0.5, 2: 0.5} + assert cc == true_cc + + H = xgi.Hypergraph(edgelist1) + cc1 = xgi.two_node_clustering_coefficient(H, kind="union") + cc2 = xgi.two_node_clustering_coefficient(H, kind="min") + cc3 = xgi.two_node_clustering_coefficient(H, kind="max") + + true_cc1 = {1: 1.0, 2: 1.0, 3: 1.0, 4: 0, 5: 0.5, 6: 0.5, 8: 0.75, 7: 0.75} + true_cc2 = {1: 1.0, 2: 1.0, 3: 1.0, 4: 0, 5: 1.0, 6: 1.0, 8: 1.0, 7: 1.0} + true_cc3 = {1: 1.0, 2: 1.0, 3: 1.0, 4: 0, 5: 0.5, 6: 0.5, 8: 0.75, 7: 0.75} + + assert cc1 == true_cc1 + assert cc2 == true_cc2 + assert cc3 == true_cc3 + + with pytest.raises(XGIError): + xgi.two_node_clustering_coefficient(H, kind="test") + + H = xgi.Hypergraph(edgelist8) + H.add_node(10) + cc = xgi.two_node_clustering_coefficient(H, kind="min") + true_cc = { + 0: 0.6533333333333333, + 1: 0.4888888888888888, + 2: 0.5833333333333333, + 3: 0.5833333333333333, + 4: 0.5666666666666667, + 5: 0.5, + 6: 0.5, + 10: 0, + } + assert cc == true_cc diff --git a/tests/drawing/test_draw.py b/tests/drawing/test_draw.py index cdb20d361..5b6b4fd8e 100644 --- a/tests/drawing/test_draw.py +++ b/tests/drawing/test_draw.py @@ -90,7 +90,6 @@ def test_color_arg_to_dict(edgelist4): def test_draw(edgelist8): - H = xgi.Hypergraph(edgelist8) ax = xgi.draw(H) @@ -111,7 +110,6 @@ def test_draw(edgelist8): def test_draw_nodes(edgelist8): - H = xgi.Hypergraph(edgelist8) ax = xgi.draw_nodes(H) @@ -128,7 +126,6 @@ def test_draw_nodes(edgelist8): def test_draw_hyperedges(edgelist8): - H = xgi.Hypergraph(edgelist8) ax = xgi.draw_hyperedges(H) @@ -148,7 +145,6 @@ def test_draw_hyperedges(edgelist8): def test_draw_simplices(edgelist8): - with pytest.raises(XGIError): H = xgi.Hypergraph(edgelist8) ax = xgi.draw_simplices(H) diff --git a/tests/stats/test_nodestats.py b/tests/stats/test_nodestats.py index 49dba8718..f4fcd8108 100644 --- a/tests/stats/test_nodestats.py +++ b/tests/stats/test_nodestats.py @@ -162,24 +162,76 @@ def test_average_neighbor_degree(edgelist1, edgelist8): assert H.nodes.average_neighbor_degree().aslist() == list(vals.values()) -def test_clustering(): +def test_clustering_coefficient(): + # no nodes + H = xgi.Hypergraph() + + assert H.clustering_coefficient() == dict() + assert H.nodes.clustering_coefficient().aslist() == [] + assert H.nodes.clustering_coefficient().asdict() == dict() + + # no edges + H.add_nodes_from(range(3)) + assert H.nodes.clustering_coefficient().aslist() == [0, 0, 0] + assert H.nodes.clustering_coefficient().asdict() == {0: 0, 1: 0, 2: 0} + + # edges + edges = [[1, 2, 3], [2, 3, 4, 5], [3, 4, 5]] + H = xgi.Hypergraph(edges) + assert np.allclose( + H.nodes.clustering_coefficient.aslist(), np.array([1, 2 / 3, 2 / 3, 1, 1]) + ) + + +def test_local_clustering_coefficient(): + # no nodes + H = xgi.Hypergraph() + assert H.local_clustering_coefficient() == dict() + assert H.nodes.local_clustering_coefficient().aslist() == [] + assert H.nodes.local_clustering_coefficient().asdict() == dict() + + # no edges + H.add_nodes_from(range(3)) + assert H.nodes.local_clustering_coefficient().aslist() == [0, 0, 0] + assert H.nodes.local_clustering_coefficient().asdict() == {0: 0, 1: 0, 2: 0} + + # edges + edges = [[1, 2, 3], [2, 3, 4, 5], [3, 4, 5]] + H = xgi.Hypergraph(edges) + assert np.allclose( + H.nodes.local_clustering_coefficient.aslist(), np.array([0, 0, 0.25, 0, 0]) + ) + + +def test_two_node_clustering_coefficient(): # no nodes H = xgi.Hypergraph() - assert H.clustering() == dict() - assert H.nodes.clustering().aslist() == [] - assert H.nodes.clustering().asdict() == dict() + assert H.two_node_clustering_coefficient() == dict() + assert H.nodes.two_node_clustering_coefficient().aslist() == [] + assert H.nodes.two_node_clustering_coefficient().asdict() == dict() # no edges H.add_nodes_from(range(3)) - assert H.nodes.clustering().aslist() == [0.0, 0.0, 0.0] - assert H.nodes.clustering().asdict() == {0: 0.0, 1: 0.0, 2: 0.0} + assert H.nodes.two_node_clustering_coefficient().aslist() == [0, 0, 0] + assert H.nodes.two_node_clustering_coefficient().asdict() == {0: 0, 1: 0, 2: 0} # edges edges = [[1, 2, 3], [2, 3, 4, 5], [3, 4, 5]] H = xgi.Hypergraph(edges) - assert np.allclose(H.nodes.clustering.aslist(), [0.0, 4.0, 1.33333333, 3.0, 3.0]) + assert np.allclose( + H.nodes.two_node_clustering_coefficient(kind="union").aslist(), + np.array( + [ + 0.41666666666666663, + 0.45833333333333326, + 0.5833333333333333, + 0.6666666666666666, + 0.6666666666666666, + ] + ), + ) def test_aggregates(edgelist1, edgelist2, edgelist8): diff --git a/tutorials/Tutorial 6 - Statistics.ipynb b/tutorials/Tutorial 6 - Statistics.ipynb index 3c78b5ebb..f941af9c7 100644 --- a/tutorials/Tutorial 6 - Statistics.ipynb +++ b/tutorials/Tutorial 6 - Statistics.ipynb @@ -710,7 +710,11 @@ } ], "source": [ - "(H.nodes.filterby(\"degree\", 2).filterby_attr(\"color\", \"blue\").clustering.asdict())" + "(\n", + " H.nodes.filterby(\"degree\", 2)\n", + " .filterby_attr(\"color\", \"blue\")\n", + " .clustering_coefficient.asdict()\n", + ")" ] }, { @@ -848,7 +852,7 @@ } ], "source": [ - "H.nodes.multi([\"degree\", \"clustering\"])" + "H.nodes.multi([\"degree\", \"clustering_coefficient\"])" ] }, { @@ -883,7 +887,7 @@ } ], "source": [ - "H.nodes.multi([\"degree\", \"clustering\"]).asdict()" + "H.nodes.multi([\"degree\", \"clustering_coefficient\"]).asdict()" ] }, { @@ -934,7 +938,7 @@ } ], "source": [ - "ms = H.nodes.multi([\"degree\", \"clustering\"])\n", + "ms = H.nodes.multi([\"degree\", \"clustering_coefficient\"])\n", "\n", "from pprint import pprint\n", "\n", @@ -1049,7 +1053,7 @@ } ], "source": [ - "df = H.nodes.multi([\"degree\", \"clustering\"]).aspandas()\n", + "df = H.nodes.multi([\"degree\", \"clustering_coefficient\"]).aspandas()\n", "df" ] }, @@ -1157,7 +1161,7 @@ } ], "source": [ - "H.nodes.multi([\"degree\", \"clustering\"]).aspandas().plot();" + "H.nodes.multi([\"degree\", \"clustering_coefficient\"]).aspandas().plot();" ] }, { diff --git a/tutorials/quickstart.ipynb b/tutorials/quickstart.ipynb index f390c04dc..b539225b4 100644 --- a/tutorials/quickstart.ipynb +++ b/tutorials/quickstart.ipynb @@ -605,7 +605,9 @@ "metadata": {}, "outputs": [], "source": [ - "H_stats.nodes.multi([\"degree\", \"clustering\"]).aspandas().groupby(\"degree\").agg(\"mean\")" + "H_stats.nodes.multi([\"degree\", \"clustering_coefficient\"]).aspandas().groupby(\n", + " \"degree\"\n", + ").agg(\"mean\")" ] }, { @@ -615,7 +617,7 @@ "outputs": [], "source": [ "(\n", - " H_stats.nodes.multi([\"degree\", \"clustering\", \"average_neighbor_degree\"])\n", + " H_stats.nodes.multi([\"degree\", \"clustering_coefficient\", \"average_neighbor_degree\"])\n", " .aspandas()\n", " .groupby(\"degree\")\n", " .agg(\"mean\")\n", diff --git a/xgi/algorithms/__init__.py b/xgi/algorithms/__init__.py index 81f3af525..e1f6d76a8 100644 --- a/xgi/algorithms/__init__.py +++ b/xgi/algorithms/__init__.py @@ -1,4 +1,5 @@ -from . import assortativity, centrality, connected +from . import assortativity, centrality, clustering, connected from .assortativity import * from .centrality import * +from .clustering import * from .connected import * diff --git a/xgi/algorithms/centrality.py b/xgi/algorithms/centrality.py index 9eed7e592..e1e345fcf 100644 --- a/xgi/algorithms/centrality.py +++ b/xgi/algorithms/centrality.py @@ -12,15 +12,15 @@ from ..linalg import clique_motif_matrix, incidence_matrix __all__ = [ - "CEC_centrality", - "HEC_centrality", + "clique_eigenvector_centrality", + "h_eigenvector_centrality", "node_edge_centrality", "line_vector_centrality", ] -def CEC_centrality(H, tol=1e-6): - """Compute the CEC centrality of a hypergraph. +def clique_eigenvector_centrality(H, tol=1e-6): + """Compute the clique motif eigenvector centrality of a hypergraph. Parameters ---------- @@ -47,9 +47,9 @@ def CEC_centrality(H, tol=1e-6): if H.num_nodes == 0: return dict() # if the hypergraph is not connected, - # this metric doesn't make sense and should return NaN. + # this metric doesn't make sense and should return nan. if not is_connected(H): - return {n: np.NaN for n in H.nodes} + return {n: np.nan for n in H.nodes} W, node_dict = clique_motif_matrix(H, index=True) _, v = eigsh(W.asfptype(), k=1, which="LM", tol=tol) @@ -58,8 +58,8 @@ def CEC_centrality(H, tol=1e-6): return {node_dict[n]: v[n].item() for n in node_dict} -def HEC_centrality(H, max_iter=100, tol=1e-6): - """Compute the HEC centrality of a uniform hypergraph. +def h_eigenvector_centrality(H, max_iter=100, tol=1e-6): + """Compute the H-eigenvector centrality of a uniform hypergraph. Parameters ---------- @@ -93,9 +93,9 @@ def HEC_centrality(H, max_iter=100, tol=1e-6): if H.num_nodes == 0: return dict() # if the hypergraph is not connected, - # this metric doesn't make sense and should return NaN. + # this metric doesn't make sense and should return nan. if not is_connected(H): - return {n: np.NaN for n in H.nodes} + return {n: np.nan for n in H.nodes} m = is_uniform(H) if not m: @@ -205,11 +205,11 @@ def node_edge_centrality( # if there aren't any nodes or edges, return an empty dict if H.num_nodes == 0 or H.num_edges == 0 or not is_connected(H): - return {n: np.NaN for n in H.nodes}, {e: np.NaN for e in H.edges} + return {n: np.nan for n in H.nodes}, {e: np.nan for e in H.edges} # if the hypergraph is not connected, - # this metric doesn't make sense and should return NaN. + # this metric doesn't make sense and should return nan. # if not is_connected(H): - # return {n: np.NaN for n in H.nodes}, {e: np.NaN for e in H.edges} + # return {n: np.nan for n in H.nodes}, {e: np.nan for e in H.edges} n = H.num_nodes m = H.num_edges diff --git a/xgi/algorithms/clustering.py b/xgi/algorithms/clustering.py new file mode 100644 index 000000000..8c594560f --- /dev/null +++ b/xgi/algorithms/clustering.py @@ -0,0 +1,258 @@ +import numpy as np + +from ..exception import XGIError +from ..linalg import adjacency_matrix + +__all__ = [ + "local_clustering_coefficient", + "clustering_coefficient", + "two_node_clustering_coefficient", +] + + +def clustering_coefficient(H): + """Return the clustering coefficients for + each node in a Hypergraph. + + This clustering coefficient is defined as the + clustering coefficient of the unweighted pairwise + projection of the hypergraph, i.e., `num / denom`, + where `num` equals `A^3[n, n]` and `denom` equals + `k*(k-1)/2`. Here `A` is the adjacency matrix + of the network and `k` is the pairwise + degree of `n`. + + Parameters + ---------- + H : Hypergraph + Hypergraph + + Returns + ------- + dict + nodes are keys, clustering coefficients are values. + + Notes + ----- + The clustering coefficient is undefined when the number of + neighbors is 0 or 1, but we set the clustering coefficient + to 0 in these cases. For more discussion, see + https://arxiv.org/abs/0802.2512 + + References + ---------- + "Clustering Coefficients in Protein Interaction Hypernetworks" + by Suzanne Gallagher and Debra Goldberg. + DOI: 10.1145/2506583.2506635 + + Example + ------- + >>> import xgi + >>> H = xgi.random_hypergraph(3, [1, 1]) + >>> cc = xgi.clustering_coefficient(H) + >>> cc + {0: 1.0, 1: 1.0, 2: 1.0} + """ + adj, index = adjacency_matrix(H, index=True) + ndict = {n: i for i, n in index.items()} + + k = adj.sum(axis=1) + denom = k * (k - 1) / 2 + mat = adj.dot(adj).dot(adj) + + with np.errstate(divide="ignore", invalid="ignore"): + result = np.nan_to_num(0.5 * mat.diagonal() / denom) + + return {n: result[ndict[n]] if n in ndict else 0 for n in H.nodes} + + +def local_clustering_coefficient(H): + """Compute the local clustering coefficient. + + This clustering coefficient is based on the + overlap of the edges connected to a given node, + normalized by the size of the node's neighborhood. + + Parameters + ---------- + H : Hypergraph + Hypergraph + + Returns + ------- + dict + keys are node IDs and values are the + clustering coefficients. + + Notes + ----- + The clustering coefficient is undefined when the number of + neighbors is 0 or 1, but we set the clustering coefficient + to 0 in these cases. For more discussion, see + https://arxiv.org/abs/0802.2512 + + References + ---------- + "Properties of metabolic graphs: biological organization or representation artifacts?" + by Wanding Zhou and Luay Nakhleh. + https://doi.org/10.1186/1471-2105-12-132 + + "Hypergraphs for predicting essential genes using multiprotein complex data" + by Florian Klimm, Charlotte M. Deane, and Gesine Reinert. + https://doi.org/10.1093/comnet/cnaa028 + + Example + ------- + >>> import xgi + >>> H = xgi.random_hypergraph(3, [1, 1]) + >>> cc = xgi.local_clustering_coefficient(H) + >>> cc + {0: 1.0, 1: 1.0, 2: 1.0} + """ + result = {} + + memberships = H.nodes.memberships() + members = H.edges.members() + + for n in H.nodes: + ev = list(memberships[n]) + dv = len(ev) + if dv <= 1: + result[n] = 0 + else: + total_eo = 0 + # go over all pairs of edges pairwise + for e1 in range(dv): + edge1 = members[e1] + for e2 in range(e1): + edge2 = members[e2] + # set differences for the hyperedges + D1 = set(edge1) - set(edge2) + D2 = set(edge2) - set(edge1) + # if edges are the same by definition the extra overlap is zero + if len(D1.union(D2)) == 0: + eo = 0 + else: + # otherwise we have to look at their neighbours + # the neighbours of D1 and D2, respectively. + neighD1 = {i for d in D1 for i in H.nodes.neighbors(d)} + neighD2 = {i for d in D2 for i in H.nodes.neighbors(d)} + # compute the extra overlap [len() is used for cardinality of edges] + eo = ( + len(neighD1.intersection(D2)) + + len(neighD2.intersection(D1)) + ) / len( + D1.union(D2) + ) # add it up + # add it up + total_eo = total_eo + eo + + # include normalisation by degree k*(k-1)/2 + result[n] = 2 * total_eo / (dv * (dv - 1)) + return result + + +def two_node_clustering_coefficient(H, kind="union"): + """Return the clustering coefficients for + each node in a Hypergraph. + + This definition averages over all of the + two-node clustering coefficients involving the node. + + Parameters + ---------- + H : Hypergraph + Hypergraph + + Returns + ------- + dict + nodes are keys, clustering coefficients are values. + + Notes + ----- + The clustering coefficient is undefined when the number of + neighbors is 0 or 1, but we set the clustering coefficient + to 0 in these cases. For more discussion, see + https://arxiv.org/abs/0802.2512 + + References + ---------- + "Clustering Coefficients in Protein Interaction Hypernetworks" + by Suzanne Gallagher and Debra Goldberg. + DOI: 10.1145/2506583.2506635 + + Example + ------- + >>> import xgi + >>> H = xgi.random_hypergraph(3, [1, 1]) + >>> cc = xgi.two_node_clustering_coefficient(H, kind="union") + >>> cc + {0: 0.5, 1: 0.5, 2: 0.5} + """ + result = {} + memberships = H.nodes.memberships() + for n in H.nodes: + neighbors = H.nodes.neighbors(n) + result[n] = 0.0 + for v in neighbors: + result[n] += _uv_cc(n, v, memberships, kind=kind) / len(neighbors) + return result + + +def _uv_cc(u, v, memberships, kind="union"): + """Helper function to compute the two-node + clustering coefficient. + + Parameters + ---------- + u : hashable + First node + v : hashable + Second node + memberships : dict + node IDs are keys, edge IDs to which they belong + are values. + kind : str, optional + Type of clustering coefficient to compute, by default "union". + Options: + + - "union" + - "max" + - "min" + + Returns + ------- + float + The clustering coefficient + + Raises + ------ + XGIError + If an invalid clustering coefficient kind + is specified. + + References + ---------- + "Clustering Coefficients in Protein Interaction Hypernetworks" + by Suzanne Gallagher and Debra Goldberg. + DOI: 10.1145/2506583.2506635 + """ + m_u = memberships[u] + m_v = memberships[v] + + num = len(m_u.intersection(m_v)) + + if kind == "union": + denom = len(m_u.union(m_v)) + elif kind == "min": + denom = min(len(m_u), len(m_v)) + elif kind == "max": + denom = max(len(m_u), len(m_v)) + else: + raise XGIError("Invalid kind of clustering.") + + if denom == 0: + return np.nan + + return num / denom diff --git a/xgi/stats/__init__.py b/xgi/stats/__init__.py index ed984051c..d6d41015b 100644 --- a/xgi/stats/__init__.py +++ b/xgi/stats/__init__.py @@ -283,17 +283,20 @@ def asdict(self, inner=dict, transpose=False): -------- >>> import xgi >>> H = xgi.Hypergraph([[1, 2, 3], [2, 3, 4, 5], [3, 4, 5]]) - >>> m = H.nodes.multi(['degree', 'clustering']) + >>> m = H.nodes.multi(['degree', 'clustering_coefficient']) >>> m.asdict() # doctest: +NORMALIZE_WHITESPACE - {1: {'degree': 1, 'clustering': 0.0}, - 2: {'degree': 2, 'clustering': 4.0}, - 3: {'degree': 3, 'clustering': 1.3333333333333333}, - 4: {'degree': 2, 'clustering': 3.0}, - 5: {'degree': 2, 'clustering': 3.0}} + {1: {'degree': 1, 'clustering_coefficient': 1.0}, + 2: {'degree': 2, 'clustering_coefficient': 0.6666666666666666}, + 3: {'degree': 3, 'clustering_coefficient': 0.6666666666666666}, + 4: {'degree': 2, 'clustering_coefficient': 1.0}, + 5: {'degree': 2, 'clustering_coefficient': 1.0}} >>> m.asdict(transpose=True) # doctest: +NORMALIZE_WHITESPACE {'degree': {1: 1, 2: 2, 3: 3, 4: 2, 5: 2}, - 'clustering': {1: 0.0, 2: 4.0, 3: 1.3333333333333333, 4: 3.0, 5: 3.0}} - + 'clustering_coefficient': {1: 1.0, + 2: 0.6666666666666666, + 3: 0.6666666666666666, + 4: 1.0, + 5: 1.0}} """ val = self._val if inner is dict: @@ -324,12 +327,11 @@ def aslist(self, inner=list, transpose=False): -------- >>> import xgi >>> H = xgi.Hypergraph([[1, 2, 3], [2, 3, 4, 5], [3, 4, 5]]) - >>> m = H.nodes.multi(['degree', 'clustering']) + >>> m = H.nodes.multi(['degree', 'clustering_coefficient']) >>> m.aslist() # doctest: - [[1, 0.0], [2, 4.0], [3, 1.3333333333333333], [2, 3.0], [2, 3.0]] + [[1, 1.0], [2, 0.6666666666666666], [3, 0.6666666666666666], [2, 1.0], [2, 1.0]] >>> m.aslist(transpose=True) - [[1, 2, 3, 2, 2], [0.0, 4.0, 1.3333333333333333, 3.0, 3.0]] - + [[1, 2, 3, 2, 2], [1.0, 0.6666666666666666, 0.6666666666666666, 1.0, 1.0]] """ val = self._val if inner is list: @@ -353,12 +355,12 @@ def asnumpy(self): -------- >>> import xgi >>> H = xgi.Hypergraph([[1, 2, 3], [2, 3, 4, 5], [3, 4, 5]]) - >>> H.nodes.multi(['degree', 'clustering']).asnumpy() # doctest: +NORMALIZE_WHITESPACE - array([[1. , 0. ], - [2. , 4. ], - [3. , 1.33333333], - [2. , 3. ], - [2. , 3. ]]) + >>> H.nodes.multi(['degree', 'clustering_coefficient']).asnumpy() # doctest: +NORMALIZE_WHITESPACE + array([[1. , 1. ], + [2. , 0.66666667], + [3. , 0.66666667], + [2. , 1. ], + [2. , 1. ]]) """ return np.array(self.aslist(inner=list)) @@ -370,13 +372,13 @@ def aspandas(self): -------- >>> import xgi >>> H = xgi.Hypergraph([[1, 2, 3], [2, 3, 4, 5], [3, 4, 5]]) - >>> H.nodes.multi(['degree', 'clustering']).aspandas() # doctest: +NORMALIZE_WHITESPACE - degree clustering - 1 1 0.000000 - 2 2 4.000000 - 3 3 1.333333 - 4 2 3.000000 - 5 2 3.000000 + >>> H.nodes.multi(['degree', 'clustering_coefficient']).aspandas() # doctest: +NORMALIZE_WHITESPACE + degree clustering_coefficient + 1 1 1.000000 + 2 2 0.666667 + 3 3 0.666667 + 4 2 1.000000 + 5 2 1.000000 """ result = {s.name: s._val for s in self.stats} diff --git a/xgi/stats/nodestats.py b/xgi/stats/nodestats.py index 138f8a010..91f0e3d59 100644 --- a/xgi/stats/nodestats.py +++ b/xgi/stats/nodestats.py @@ -26,9 +26,11 @@ "attrs", "degree", "average_neighbor_degree", - "clustering", - "cec_centrality", - "hec_centrality", + "local_clustering_coefficient", + "clustering_coefficient", + "two_node_clustering_coefficient", + "clique_eigenvector_centrality", + "h_eigenvector_centrality", "node_edge_centrality", ] @@ -188,12 +190,16 @@ def average_neighbor_degree(net, bunch): return result -def clustering(net, bunch): +def clustering_coefficient(net, bunch): """Local clustering coefficient. - The clustering coefficient of a node `n` is defined as `num / denom`, where `num` - equals `A^3[n, n]` and `denom` equals `d*(d-1)/2`. Here `A` is the adjacency matrix - of the network and `d` is the degree of `n`. + This clustering coefficient is defined as the + clustering coefficient of the unweighted pairwise + projection of the hypergraph, i.e., `num / denom`, + where `num` equals `A^3[n, n]` and `denom` equals + `nu*(nu-1)/2`. Here `A` is the adjacency matrix + of the network and `nu` is the number of pairwise + neighbors of `n`. Parameters ---------- @@ -215,27 +221,101 @@ def clustering(net, bunch): -------- >>> import xgi, numpy as np >>> H = xgi.Hypergraph([[1, 2, 3], [2, 3, 4, 5], [3, 4, 5]]) - >>> np.round(H.nodes.clustering.asnumpy(), 3) - array([0. , 4. , 1.333, 3. , 3. ]) + >>> H.nodes.two_node_clustering_coefficient.asnumpy() + array([0.41666667, 0.45833333, 0.58333333, 0.66666667, 0.66666667]) """ - adj, index = xgi.adjacency_matrix(net, index=True) - node_to_index = {n: i for i, n in index.items()} - mat = adj.dot(adj).dot(adj) - result = {} - for n in bunch: - deg = len(net.nodes.memberships(n)) - denom = deg * (deg - 1) / 2 - if denom <= 0: - result[n] = 0.0 - else: - i = node_to_index[n] - result[n] = mat[i, i] / denom / 2 - return result + cc = xgi.clustering_coefficient(net) + return {n: cc[n] for n in cc if n in bunch} + + +def local_clustering_coefficient(net, bunch): + """Compute the local clustering coefficient. + + This clustering coefficient is based on the + overlap of the edges connected to a given node, + normalized by the size of the node's neighborhood. + + Parameters + ---------- + net : xgi.Hypergraph + The network. + bunch : Iterable + Nodes in `net`. + + Returns + ------- + dict + keys are node IDs and values are the + clustering coefficients. + + References + ---------- + "Properties of metabolic graphs: biological organization or representation artifacts?" + by Wanding Zhou and Luay Nakhleh. + https://doi.org/10.1186/1471-2105-12-132 + + "Hypergraphs for predicting essential genes using multiprotein complex data" + by Florian Klimm, Charlotte M. Deane, and Gesine Reinert. + https://doi.org/10.1093/comnet/cnaa028 + + Example + ------- + >>> import xgi + >>> H = xgi.random_hypergraph(3, [1, 1]) + >>> H.nodes.local_clustering_coefficient.asdict() + {0: 1.0, 1: 1.0, 2: 1.0} + """ + cc = xgi.local_clustering_coefficient(net) + return {n: cc[n] for n in cc if n in bunch} + + +def two_node_clustering_coefficient(net, bunch, kind="union"): + """Return the clustering coefficients for + each node in a Hypergraph. + + This definition averages over all of the + two-node clustering coefficients involving the node. + + Parameters + ---------- + net : xgi.Hypergraph + The network. + bunch : Iterable + Nodes in `net`. + kind : str + The type of two-node clustering coefficient. + Types are: + + - "union" + - "min" + - "max" + by default, "union". + + Returns + ------- + dict + nodes are keys, clustering coefficients are values. + + References + ---------- + "Clustering Coefficients in Protein Interaction Hypernetworks" + by Suzanne Gallagher and Debra Goldberg. + DOI: 10.1145/2506583.2506635 + + Example + ------- + >>> import xgi + >>> H = xgi.random_hypergraph(3, [1, 1]) + >>> H.nodes.two_node_clustering_coefficient.asdict() + {0: 0.5, 1: 0.5, 2: 0.5} + """ + cc = xgi.two_node_clustering_coefficient(net, kind=kind) + return {n: cc[n] for n in cc if n in bunch} -def cec_centrality(net, bunch, tol=1e-6): - """Compute the CEC centrality of a hypergraph. +def clique_eigenvector_centrality(net, bunch, tol=1e-6): + """Compute the clique motif eigenvector centrality of a hypergraph. Parameters ---------- @@ -257,12 +337,12 @@ def cec_centrality(net, bunch, tol=1e-6): Austin R. Benson, https://doi.org/10.1137/18M1203031 """ - c = xgi.CEC_centrality(net, tol) + c = xgi.clique_eigenvector_centrality(net, tol) return {n: c[n] for n in c if n in bunch} -def hec_centrality(net, bunch, max_iter=10, tol=1e-6): - """Compute the HEC centrality of a hypergraph. +def h_eigenvector_centrality(net, bunch, max_iter=10, tol=1e-6): + """Compute the H-eigenvector centrality of a hypergraph. Parameters ---------- @@ -286,7 +366,7 @@ def hec_centrality(net, bunch, max_iter=10, tol=1e-6): Austin R. Benson, https://doi.org/10.1137/18M1203031 """ - c = xgi.HEC_centrality(net, max_iter, tol) + c = xgi.h_eigenvector_centrality(net, max_iter, tol) return {n: c[n] for n in c if n in bunch}