From 3b68fc4b60f901928729d9a4294de4d96a2966e3 Mon Sep 17 00:00:00 2001 From: Matthew Treinish Date: Fri, 3 Jun 2022 16:30:03 -0400 Subject: [PATCH] Add Eigenvector centrality function This commit adds a new function eigenvector_centrality() to compute the eigenvector centrality of a graph. It uses the same power function approach that the NetworkX function eigenvector_centrality() [1] function uses. This is for two reasons, the first is that a more traditional eigenvector linear algebra/BLAS function would either require us to link against BLAS at build time (which is a big change in the build system and a large requirement) or to call out to numpy via python both of which seemed less than ideal. The second reason was to make handling holes in node indices bit easier. Using this approach also enabled us to put the implementation in retworkx-core so it can be reused with any petgraph graph. Part of #441 --- ...genvector-centrality-e8ca30e31738a666.yaml | 9 + retworkx-core/src/centrality.rs | 201 ++++++++++++++++++ retworkx/__init__.py | 57 +++++ src/centrality.rs | 129 ++++++++++- src/lib.rs | 5 + tests/digraph/test_centrality.py | 22 ++ tests/graph/test_centrality.py | 22 ++ 7 files changed, 444 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/add-eigenvector-centrality-e8ca30e31738a666.yaml diff --git a/releasenotes/notes/add-eigenvector-centrality-e8ca30e31738a666.yaml b/releasenotes/notes/add-eigenvector-centrality-e8ca30e31738a666.yaml new file mode 100644 index 000000000..48f6fdc4b --- /dev/null +++ b/releasenotes/notes/add-eigenvector-centrality-e8ca30e31738a666.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + Added a new function, :func:`~.eigenvector_centrality()` which is used to + compute the eigenvector centrality for all nodes in a given graph. + - | + Added a new function to retworkx-core ``eigenvector_centrality`` which is + used to compute the eigenvector centrality for all nodes in a given graph. + diff --git a/retworkx-core/src/centrality.rs b/retworkx-core/src/centrality.rs index 3474be99a..dac5c287c 100644 --- a/retworkx-core/src/centrality.rs +++ b/retworkx-core/src/centrality.rs @@ -16,8 +16,11 @@ use std::sync::RwLock; use hashbrown::HashMap; use petgraph::graph::NodeIndex; use petgraph::visit::{ + EdgeRef, GraphBase, GraphProp, // allows is_directed + IntoEdges, + IntoNeighbors, IntoNeighborsDirected, IntoNodeIdentifiers, NodeCount, @@ -297,3 +300,201 @@ where sigma, } } + +/// Compute the eigenvector centrality of a graph +/// +/// For details on the eigenvector centrality refer to: +/// +/// Phillip Bonacich. “Power and Centrality: A Family of Measures.” +/// American Journal of Sociology 92(5):1170–1182, 1986 +/// +/// +/// This function uses a power iteration method to compute the eigenvector +/// and convergence is not guaranteed. The function will stop when `max_iter` +/// iterations is reached or when the computed vector between two iterations +/// is smaller than the error tolerance multiplied by the number of nodes. +/// The implementation of this algorithm is based on the NetworkX +/// [`eigenvector_centrality()`](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.eigenvector_centrality.html) +/// function. +/// +/// In the case of multigraphs the weights of any parallel edges will be +/// summed when computing the eigenvector centrality. +/// +/// Arguments: +/// +/// * `graph` - The graph object to run the algorithm on +/// * `weight_fn` - An input callable that will be pased the `EdgeRef` for +/// an edge in the graph and is expected to return a `Result` of +/// the weight of that edge. +/// * `max_iter` - The maximum number of iterations in the power method. If +/// set to `None` a default value of 100 is used. +/// * `tol` - The error tolerance used when checking for convergence in the +/// power method. If set to `None` a dfault value of 1e-6 is used. +/// +/// # Example +/// ```rust +/// use hashbrown::HashMap; +/// +/// use retworkx_core::Result; +/// use retworkx_core::petgraph; +/// use retworkx_core::petgraph::visit::{IntoEdges, IntoNodeIdentifiers}; +/// use retworkx_core::centrality::eigenvector_centrality; +/// +/// let g = petgraph::graph::UnGraph::::from_edges(&[ +/// (0, 1), (1, 2) +/// ]); +/// // Calculate the betweeness centrality +/// let output: Result>> = +/// eigenvector_centrality(&g, |_| {Ok(1.)}, None, None); +/// ``` +pub fn eigenvector_centrality( + graph: G, + mut weight_fn: F, + max_iter: Option, + tol: Option, +) -> Result>, E> +where + G: NodeIndexable + + IntoNodeIdentifiers + + IntoNeighbors + + IntoEdges + + NodeCount + + GraphProp + + GraphBase, + F: FnMut(G::EdgeRef) -> Result, +{ + let tol: f64 = tol.unwrap_or(1e-6); + let max_iter = max_iter.unwrap_or(50); + let n_start: HashMap = graph.node_identifiers().map(|n| (n, 1.)).collect(); + let n_start_sum: f64 = n_start.len() as f64; + let mut x: HashMap = + n_start.iter().map(|(k, v)| (*k, v / n_start_sum)).collect(); + let node_count = graph.node_count(); + for _ in 0..max_iter { + let x_last = x.clone(); + for node in x_last.keys() { + for neighbor in graph.neighbors(*node) { + let w_vec: Vec = graph + .edges(*node) + .filter(|edge| edge.target() == neighbor) + .collect(); + let mut w = 0.; + for edge in w_vec { + w += weight_fn(edge)?; + } + *x.get_mut(&neighbor).unwrap() += x_last[node] * w; + } + } + let mut norm: f64 = x.values().map(|val| val.powi(2)).sum::().sqrt(); + if norm == 0. { + norm = 1.; + } + x = x.iter().map(|(k, v)| (*k, v / norm)).collect(); + if x.keys() + .map(|node| (x[node] - x_last[node]).abs()) + .sum::() + < node_count as f64 * tol + { + return Ok(Some(x)); + } + } + Ok(None) +} + +#[cfg(test)] +mod test_eigenvector_centrality { + + use hashbrown::HashMap; + + use crate::centrality::eigenvector_centrality; + use crate::petgraph; + use crate::Result; + + macro_rules! assert_almost_equal { + ($x:expr, $y:expr, $d:expr) => { + if !($x - $y < $d || $y - $x < $d) { + panic!("{} != {} within delta of {}", $x, $y, $d); + } + }; + } + #[test] + fn test_no_convergence() { + let g = petgraph::graph::UnGraph::::from_edges(&[(0, 1), (1, 2)]); + let output: Result>> = + eigenvector_centrality(&g, |_| Ok(1.), Some(0), None); + let result = output.unwrap(); + assert_eq!(None, result); + } + + #[test] + fn test_undirected_complete_graph() { + let g = petgraph::graph::UnGraph::::from_edges([ + (0, 1), + (0, 2), + (0, 3), + (0, 4), + (1, 2), + (1, 3), + (1, 4), + (2, 3), + (2, 4), + (3, 4), + ]); + let output: Result>> = + eigenvector_centrality(&g, |_| Ok(1.), None, None); + let result = output.unwrap().unwrap(); + let expected_value: f64 = (1_f64 / 5_f64).sqrt(); + let expected_values: Vec = vec![expected_value; 5]; + for i in 0..5 { + let index = petgraph::graph::NodeIndex::new(i); + assert_almost_equal!(expected_values[i], result[&index], 1e-4); + } + } + + #[test] + fn test_undirected_path_graph() { + let g = petgraph::graph::UnGraph::::from_edges(&[(0, 1), (1, 2)]); + let output: Result>> = + eigenvector_centrality(&g, |_| Ok(1.), None, None); + let result = output.unwrap().unwrap(); + let expected_values: Vec = vec![0.5, 0.7071, 0.5]; + for i in 0..3 { + let index = petgraph::graph::NodeIndex::new(i); + assert_almost_equal!(expected_values[i], result[&index], 1e-4); + } + } + + #[test] + fn test_directed_graph() { + let g = petgraph::graph::DiGraph::::from_edges([ + (0, 1), + (0, 2), + (1, 3), + (2, 1), + (2, 4), + (3, 1), + (3, 4), + (3, 5), + (4, 5), + (4, 6), + (4, 7), + (5, 7), + (6, 0), + (6, 4), + (6, 7), + (7, 5), + (7, 6), + ]); + let output: Result>> = + eigenvector_centrality(&g, |_| Ok(2.), None, None); + let result = output.unwrap().unwrap(); + let expected_values: Vec = vec![ + 0.25368793, 0.19576478, 0.32817092, 0.40430835, 0.48199885, 0.15724483, 0.51346196, + 0.32475403, + ]; + for i in 0..8 { + let index = petgraph::graph::NodeIndex::new(i); + assert_almost_equal!(expected_values[i], result[&index], 1e-4); + } + } +} diff --git a/retworkx/__init__.py b/retworkx/__init__.py index 209fafbea..b76a0247c 100644 --- a/retworkx/__init__.py +++ b/retworkx/__init__.py @@ -1592,6 +1592,63 @@ def _graph_betweenness_centrality(graph, normalized=True, endpoints=False, paral ) +@functools.singledispatch +def eigenvector_centrality(graph, weight_fn=None, default_weight=1.0, max_iter=100, tol=1e-6): + """Compute the eigenvector centrality of a :class:`~PyGraph`. + + For details on the eigenvector centrality refer to: + + Phillip Bonacich. “Power and Centrality: A Family of Measures.” + American Journal of Sociology 92(5):1170–1182, 1986 + + + This function uses a power iteration method to compute the eigenvector + and convergence is not guaranteed. The function will stop when `max_iter` + iterations is reached or when the computed vector between two iterations + is smaller than the error tolerance multiplied by the number of nodes. + The implementation of this algorithm is based on the NetworkX + `eigenvector_centrality() `__ + function. + + In the case of multigraphs the weights of any parallel edges will be + summed when computing the eigenvector centrality. + + :param PyDigraph graph: The graph object to run the algorithm on + :param weight_fn: An optional input callable that will be pased the edge's + payload object and is expected to return a `float` weight for that edge. + If this is not specified ``default_weight`` will be used as the weight + for every edge in ``graph`` + :param float default_weight: If ``weight_fn`` is not set the default weight + value to use for the weight of all edges + :param int max_iter: The maximum number of iterations in the power method. If + not specified a default value of 100 is used. + :param float tol: The error tolerance used when checking for convergence in the + power method. If this is not specified default value of 1e-6 is used. + + :returns: a read-only dict-like object whose keys are the node indices and values are the + centrality score for that node. + :rtype: CentralityMapping + """ + + +@eigenvector_centrality.register(PyDiGraph) +def _digraph_eigenvector_centrality( + graph, weight_fn=None, default_weight=1.0, max_iter=100, tol=1e-6 +): + return digraph_eigenvector_centrality( + graph, weight_fn=weight_fn, default_weight=default_weight, max_iter=max_iter, tol=tol + ) + + +@eigenvector_centrality.register(PyGraph) +def _graph_eigenvector_centrality( + graph, weight_fn=None, default_weight=1.0, max_iter=100, tol=1e-6 +): + return graph_eigenvector_centrality( + graph, weight_fn=weight_fn, default_weight=default_weight, max_iter=max_iter, tol=tol + ) + + @functools.singledispatch def vf2_mapping( first, diff --git a/src/centrality.rs b/src/centrality.rs index df87c7f8a..494247d7e 100644 --- a/src/centrality.rs +++ b/src/centrality.rs @@ -10,10 +10,13 @@ // License for the specific language governing permissions and limitations // under the License. -use crate::iterators::CentralityMapping; +use std::convert::TryFrom; use crate::digraph; use crate::graph; +use crate::iterators::CentralityMapping; +use crate::CostFn; +use crate::FailedToConverge; use pyo3::prelude::*; @@ -132,3 +135,127 @@ pub fn digraph_betweenness_centrality( .collect(), } } + +/// Compute the eigenvector centrality of a :class:`~PyGraph`. +/// +/// For details on the eigenvector centrality refer to: +/// +/// Phillip Bonacich. “Power and Centrality: A Family of Measures.” +/// American Journal of Sociology 92(5):1170–1182, 1986 +/// +/// +/// This function uses a power iteration method to compute the eigenvector +/// and convergence is not guaranteed. The function will stop when `max_iter` +/// iterations is reached or when the computed vector between two iterations +/// is smaller than the error tolerance multiplied by the number of nodes. +/// The implementation of this algorithm is based on the NetworkX +/// `eigenvector_centrality() `__ +/// function. +/// +/// In the case of multigraphs the weights of any parallel edges will be +/// summed when computing the eigenvector centrality. +/// +/// :param PyDigraph graph: The graph object to run the algorithm on +/// :param weight_fn: An optional input callable that will be pased the edge's +/// payload object and is expected to return a `float` weight for that edge. +/// If this is not specified ``default_weight`` will be used as the weight +/// for every edge in ``graph`` +/// :param float default_weight: If ``weight_fn`` is not set the default weight +/// value to use for the weight of all edges +/// :param int max_iter: The maximum number of iterations in the power method. If +/// not specified a default value of 100 is used. +/// :param float tol: The error tolerance used when checking for convergence in the +/// power method. If this is not specified default value of 1e-6 is used. +/// +/// :returns: a read-only dict-like object whose keys are the node indices and values are the +/// centrality score for that node. +/// :rtype: CentralityMapping +#[pyfunction(default_weight = "1.0", max_iter = "100", tol = "1e-6")] +#[pyo3(text_signature = "(graph, /, weight_fn=None, default_weight=1.0, max_iter=100, tol=1e-6)")] +pub fn graph_eigenvector_centrality( + py: Python, + graph: &graph::PyGraph, + weight_fn: Option, + default_weight: f64, + max_iter: usize, + tol: f64, +) -> PyResult { + let cost_fn = CostFn::try_from((weight_fn, default_weight))?; + let ev_centrality = centrality::eigenvector_centrality( + &graph.graph, + |e| cost_fn.call(py, e.weight()), + Some(max_iter), + Some(tol), + )?; + match ev_centrality { + Some(centrality) => Ok(CentralityMapping { + centralities: centrality.iter().map(|(k, v)| (k.index(), *v)).collect(), + }), + None => Err(FailedToConverge::new_err(format!( + "Function failed to converge on a solution in {} iterations", + max_iter + ))), + } +} + +/// Compute the eigenvector centrality of a :class:`~PyDiGraph`. +/// +/// For details on the eigenvector centrality refer to: +/// +/// Phillip Bonacich. “Power and Centrality: A Family of Measures.” +/// American Journal of Sociology 92(5):1170–1182, 1986 +/// +/// +/// This function uses a power iteration method to compute the eigenvector +/// and convergence is not guaranteed. The function will stop when `max_iter` +/// iterations is reached or when the computed vector between two iterations +/// is smaller than the error tolerance multiplied by the number of nodes. +/// The implementation of this algorithm is based on the NetworkX +/// `eigenvector_centrality() `__ +/// function. +/// +/// In the case of multigraphs the weights of any parallel edges will be +/// summed when computing the eigenvector centrality. +/// +/// :param PyDigraph graph: The graph object to run the algorithm on +/// :param weight_fn: An optional input callable that will be pased the edge's +/// payload object and is expected to return a `float` weight for that edge. +/// If this is not specified ``default_weight`` will be used as the weight +/// for every edge in ``graph`` +/// :param float default_weight: If ``weight_fn`` is not set the default weight +/// value to use for the weight of all edges +/// :param int max_iter: The maximum number of iterations in the power method. If +/// not specified a default value of 100 is used. +/// :param float tol: The error tolerance used when checking for convergence in the +/// power method. If this is not specified default value of 1e-6 is used. +/// +/// :returns: a read-only dict-like object whose keys are the node indices and values are the +/// centrality score for that node. +/// :rtype: CentralityMapping +#[pyfunction(default_weight = "1.0", max_iter = "100", tol = "1e-6")] +#[pyo3(text_signature = "(graph, /, weight_fn=None, default_weight=1.0, max_iter=100, tol=1e-6)")] +pub fn digraph_eigenvector_centrality( + py: Python, + graph: &digraph::PyDiGraph, + weight_fn: Option, + default_weight: f64, + max_iter: usize, + tol: f64, +) -> PyResult { + let cost_fn = CostFn::try_from((weight_fn, default_weight))?; + let ev_centrality = centrality::eigenvector_centrality( + &graph.graph, + |e| cost_fn.call(py, e.weight()), + Some(max_iter), + Some(tol), + )?; + match ev_centrality { + Some(centrality) => Ok(CentralityMapping { + centralities: centrality.iter().map(|(k, v)| (k.index(), *v)).collect(), + }), + None => Err(FailedToConverge::new_err(format!( + "Function failed to converge on a solution in {} iterations", + max_iter + ))), + } +} diff --git a/src/lib.rs b/src/lib.rs index 94586bc45..de051ed7d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -316,6 +316,8 @@ import_exception!(retworkx.visit, PruneSearch); import_exception!(retworkx.visit, StopSearch); // Negative Cycle found on shortest-path algorithm create_exception!(retworkx, NegativeCycle, PyException); +// Failed to Converge on a solution +create_exception!(retworkx, FailedToConverge, PyException); #[pymodule] fn retworkx(py: Python<'_>, m: &PyModule) -> PyResult<()> { @@ -328,6 +330,7 @@ fn retworkx(py: Python<'_>, m: &PyModule) -> PyResult<()> { m.add("NoPathFound", py.get_type::())?; m.add("NullGraph", py.get_type::())?; m.add("NegativeCycle", py.get_type::())?; + m.add("FailedToConverge", py.get_type::())?; m.add_wrapped(wrap_pyfunction!(bfs_successors))?; m.add_wrapped(wrap_pyfunction!(graph_bfs_search))?; m.add_wrapped(wrap_pyfunction!(digraph_bfs_search))?; @@ -400,6 +403,8 @@ fn retworkx(py: Python<'_>, m: &PyModule) -> PyResult<()> { ))?; m.add_wrapped(wrap_pyfunction!(graph_betweenness_centrality))?; m.add_wrapped(wrap_pyfunction!(digraph_betweenness_centrality))?; + m.add_wrapped(wrap_pyfunction!(graph_eigenvector_centrality))?; + m.add_wrapped(wrap_pyfunction!(digraph_eigenvector_centrality))?; m.add_wrapped(wrap_pyfunction!(graph_astar_shortest_path))?; m.add_wrapped(wrap_pyfunction!(digraph_astar_shortest_path))?; m.add_wrapped(wrap_pyfunction!(graph_greedy_color))?; diff --git a/tests/digraph/test_centrality.py b/tests/digraph/test_centrality.py index da0540e0d..674571273 100644 --- a/tests/digraph/test_centrality.py +++ b/tests/digraph/test_centrality.py @@ -10,6 +10,7 @@ # License for the specific language governing permissions and limitations # under the License. +import math import unittest import retworkx @@ -128,3 +129,24 @@ def test_betweenness_centrality_unnormalized(self): ) expected = {0: 0.0, 1: 2.0, 2: 2.0, 4: 0.0} self.assertEqual(expected, betweenness) + + +class TestEigenvectorCentrality(unittest.TestCase): + def test_complete_graph(self): + graph = retworkx.generators.directed_mesh_graph(5) + centrality = retworkx.eigenvector_centrality(graph) + expected_value = math.sqrt(1.0 / 5.0) + for value in centrality.values(): + self.assertAlmostEqual(value, expected_value) + + def test_path_graph(self): + graph = retworkx.generators.directed_path_graph(3, bidirectional=True) + centrality = retworkx.eigenvector_centrality(graph) + expected = [0.5, 0.7071, 0.5] + for k, v in centrality.items(): + self.assertAlmostEqual(v, expected[k], 4) + + def test_no_convergence(self): + graph = retworkx.PyDiGraph() + with self.assertRaises(retworkx.FailedToConverge): + retworkx.eigenvector_centrality(graph, max_iter=0) diff --git a/tests/graph/test_centrality.py b/tests/graph/test_centrality.py index 5c382361e..d7a51e9dd 100644 --- a/tests/graph/test_centrality.py +++ b/tests/graph/test_centrality.py @@ -10,6 +10,7 @@ # License for the specific language governing permissions and limitations # under the License. +import math import unittest import retworkx @@ -99,3 +100,24 @@ def test_betweenness_centrality_unnormalized(self): ) expected = {0: 0.0, 1: 2.0, 2: 2.0, 4: 0.0} self.assertEqual(expected, betweenness) + + +class TestEigenvectorCentrality(unittest.TestCase): + def test_complete_graph(self): + graph = retworkx.generators.mesh_graph(5) + centrality = retworkx.eigenvector_centrality(graph) + expected_value = math.sqrt(1.0 / 5.0) + for value in centrality.values(): + self.assertAlmostEqual(value, expected_value) + + def test_path_graph(self): + graph = retworkx.generators.path_graph(3) + centrality = retworkx.eigenvector_centrality(graph) + expected = [0.5, 0.7071, 0.5] + for k, v in centrality.items(): + self.assertAlmostEqual(v, expected[k], 4) + + def test_no_convergence(self): + graph = retworkx.PyGraph() + with self.assertRaises(retworkx.FailedToConverge): + retworkx.eigenvector_centrality(graph, max_iter=0)