Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge edges #210

Merged
merged 13 commits into from
Nov 16, 2022
36 changes: 36 additions & 0 deletions tests/classes/test_hypergraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,42 @@ def test_clear_edges(edgelist1):
assert len(H.edges) == 0


def test_merge_duplicate_edges(hyperwithdupsandattrs):
H = hyperwithdupsandattrs.copy()
H.merge_duplicate_edges()
assert H.num_edges == 2
assert set(H.edges) == {0, 3}
assert H.edges[0] == {"color": "blue"}
assert H.edges[3] == {"color": "purple"}

H = hyperwithdupsandattrs.copy()
H.merge_duplicate_edges(rename="tuple")
assert set(H.edges) == {(0, 1, 2), (3, 4)}
assert H.edges.members((0, 1, 2)) == {1, 2}
assert H.edges.members((3, 4)) == {3, 4, 5}

H = hyperwithdupsandattrs.copy()
H.merge_duplicate_edges(rename="new")
assert set(H.edges) == {5, 6}
assert H.edges.members(5) == {1, 2}
assert H.edges.members(6) == {3, 4, 5}

H = hyperwithdupsandattrs.copy()
H.merge_duplicate_edges(merge_rule="union", multiplicity="mult")
assert H.edges[0] == {
"color": {"blue", "red", "yellow"},
"weight": {2, None},
"mult": 3,
}
assert H.edges[3] == {"color": {"purple"}, "name": {"test", None}, "mult": 2}

H = hyperwithdupsandattrs.copy()
H.merge_duplicate_edges(merge_rule="intersection", multiplicity="multiplicity")
assert H.edges[0] == {"color": None, "weight": None, "multiplicity": 3}
assert H.edges[3] == {"color": "purple", "name": None, "multiplicity": 2}
assert H.edges.attrs("multiplicity").asdict() == {0: 3, 3: 2}


def test_issue_198(edgelist1):
H = xgi.Hypergraph(edgelist1)
H.clear_edges()
Expand Down
23 changes: 23 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,29 @@ def attr5():
return {"color": "blue", "name": "fish", "age": 2}


@pytest.fixture
def hyperwithdupsandattrs(attr1, attr2, attr3, attr4, attr5):
edges = [
({1, 2}, {"color": "blue"}),
({1, 2}, {"color": "red", "weight": 2}),
({1, 2}, {"color": "yellow"}),
({3, 4, 5}, {"color": "purple"}),
({3, 4, 5}, {"color": "purple", "name": "test"}),
]
H = xgi.Hypergraph()
H.add_edges_from(edges)
H.add_nodes_from(
[
(1, attr1),
(2, attr2),
(3, attr3),
(4, attr4),
(5, attr5),
]
)
return H


@pytest.fixture
def hyperwithattrs(edgelist4, attr1, attr2, attr3, attr4, attr5):
H = xgi.Hypergraph(edgelist4)
Expand Down
5 changes: 3 additions & 2 deletions xgi/classes/function.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""Functional interface to hypergraph methods and assorted utilities."""

from scipy.special import comb
from collections import Counter
from copy import deepcopy
from warnings import warn

from scipy.special import comb

from ..exception import IDNotFound, XGIError
from .hypergraph import Hypergraph

Expand Down Expand Up @@ -750,7 +751,7 @@ def order_filter(val, mode):

if order is None and max_order is None:
numer = H.num_edges
denom = 2 ** n - 1
denom = 2**n - 1
if ignore_singletons:
numer -= len(order_filter(0, mode="eq"))
denom -= n
Expand Down
168 changes: 166 additions & 2 deletions xgi/classes/hypergraph.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Base class for undirected hypergraphs."""
from collections import defaultdict
from collections.abc import Hashable, Iterable
from copy import deepcopy
from itertools import count
Expand Down Expand Up @@ -935,6 +936,169 @@ def clear_edges(self):
self._edge.clear()
self._edge_attr.clear()

def merge_duplicate_edges(
self, rename="first", merge_rule="first", multiplicity=None
):
"""Merges edges which have the same members.

Parameters
----------
rename : str, optional
Either "first", "tuple", or "new", by default "first"
If "first", the new edge ID is the first of the sorted
duplicate edge IDs. If "tuple", the new edge ID is a
tuple of the sorted duplicate edge IDs. If "new", a
new ID will be selected automatically.
merge_rule : str, optional
Either "first" or "union", by default "first"
If "first", takes the attributes of the first duplicate.
If "union", takes the set of attributes of all the duplicates.
nwlandry marked this conversation as resolved.
Show resolved Hide resolved
multiplicity : str, optional
The attribute in which to store the multiplicity of the hyperedge,
by default None

Raises
------
XGIError
If invalid rename or merge_rule specified.

Warns
-----
If the user chooses merge_rule="union". Tells the
user that they can no longer draw based on this stat.

Examples
--------

>>> import xgi
>>> edges = [{1, 2}, {1, 2}, {1, 2}, {3, 4, 5}, {3, 4, 5}]
>>> edge_attrs = dict()
>>> edge_attrs[0] = {"color": "blue"}
>>> edge_attrs[1] = {"color": "red", "weight": 2}
>>> edge_attrs[2] = {"color": "yellow"}
>>> edge_attrs[3] = {"color": "purple"}
>>> edge_attrs[4] = {"color": "purple", "name": "test"}
>>> H = xgi.Hypergraph(edges)
>>> xgi.set_edge_attributes(H, edge_attrs)
>>> H.edges
EdgeView((0, 1, 2, 3, 4))

There are several ways to rename the duplicate edges after merging:

1. The merged edge ID is the first duplicate edge ID.

>>> H1 = H.copy()
>>> H1.merge_duplicate_edges()
>>> H1.edges
EdgeView((0, 3))

2. The merged edge ID is a tuple of all the duplicate edge IDs.

>>> H2 = H.copy()
>>> H2.merge_duplicate_edges(rename="tuple")
>>> H2.edges
EdgeView(((0, 1, 2), (3, 4)))

3. The merged edge ID is assigned a new edge ID.

>>> H3 = H.copy()
>>> H3.merge_duplicate_edges(rename="new")
>>> H3.edges
EdgeView((5, 6))

We can also specify how we would like to combine the attributes
of the merged edges:

1. The attributes are the attributes of the first merged edge.

>>> H4 = H.copy()
>>> H4.merge_duplicate_edges()
>>> H4.edges[0]
{'color': 'blue'}

2. The attributes are the union of every attribute that each merged
edge has. If a duplicate edge doesn't have that attribute, it is set
to None.

>>> H5 = H.copy()
>>> H5.merge_duplicate_edges(merge_rule="union")
>>> H5.edges[0] == {'color': {'blue', 'red', 'yellow'}, 'weight':{2, None}}
True

3. We can also set the attributes to the intersection, i.e.,
if a particular attribute is the same across the duplicate
edges, we use this attribute, otherwise, we set it to None.

>>> H6 = H.copy()
>>> H6.merge_duplicate_edges(merge_rule="intersection")
>>> H6.edges[0] == {'color': None, 'weight': None}
True
>>> H6.edges[3] == {'color': 'purple', 'name': None}
True

We can also choose to store the multiplicity of the edge
as an attribute. The user simply provides the string of
the attribute which stores it. Note that this will not prevent
other attributes from being over written (e.g., weight), so
be careful that the attribute is not already in use.

>>> H7 = H.copy()
>>> H7.merge_duplicate_edges(multiplicity="mult")
>>> H7.edges[0]['mult'] == 3
True
"""
dups = []
hashes = defaultdict(list)
for idx, members in self._edge.items():
hashes[frozenset(members)].append(idx)

new_edges = list()
for members, dup_ids in hashes.items():
if len(dup_ids) > 1:
dups.extend(dup_ids)

if rename == "first":
new_id = sorted(dup_ids)[0]
elif rename == "tuple":
new_id = tuple(sorted(dup_ids))
elif rename == "new":
new_id = next(self._edge_uid)
else:
raise XGIError("Invalid ID renaming scheme!")

if merge_rule == "first":
id = min(dup_ids)
new_attrs = deepcopy(self._edge_attr[id])
elif merge_rule == "union":
attrs = {field for id in dup_ids for field in self._edge_attr[id]}
new_attrs = {
attr: {self._edge_attr[id].get(attr) for id in dup_ids}
for attr in attrs
}
elif merge_rule == "intersection":
attrs = {field for id in dup_ids for field in self._edge_attr[id]}
set_attrs = {
attr: {self._edge_attr[id].get(attr) for id in dup_ids}
for attr in attrs
}
new_attrs = {
attr: (None if len(val) != 1 else next(iter(val)))
for attr, val in set_attrs.items()
}
else:
raise XGIError("Invalid merge rule!")

if multiplicity is not None:
new_attrs[multiplicity] = len(dup_ids)
new_edges.append((members, new_id, new_attrs))
self.remove_edges_from(dups)
self.add_edges_from(new_edges)

if merge_rule == "union":
warn(
"You will not be able to color/draw by merged attributes with xgi.draw()!"
)

def copy(self):
"""A deep copy of the hypergraph.

Expand Down Expand Up @@ -1038,7 +1202,7 @@ def cleanup(
"""
if in_place:
if not multiedges:
self.remove_edges_from(self.edges.duplicates())
self.merge_duplicate_edges()
if not singletons:
self.remove_edges_from(self.edges.singletons())
if not isolates:
Expand All @@ -1063,7 +1227,7 @@ def cleanup(
if not multiedges:
H.remove_edges_from(H.edges.duplicates())
if not singletons:
H.remove_edges_from(H.edges.singletons())
H.merge_duplicate_edges()
if not isolates:
H.remove_nodes_from(H.nodes.isolates())
if relabel:
Expand Down
2 changes: 1 addition & 1 deletion xgi/classes/reportviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from collections.abc import Mapping, Set

from ..exception import IDNotFound, XGIError
from ..stats import IDStat, dispatch_stat, dispatch_many_stats
from ..stats import IDStat, dispatch_many_stats, dispatch_stat

__all__ = [
"NodeView",
Expand Down
10 changes: 1 addition & 9 deletions xgi/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def moment(self, order=2, center=False):

"""
arr = self.asnumpy()
return spmoment(arr, moment=order) if center else np.mean(arr ** order)
return spmoment(arr, moment=order) if center else np.mean(arr**order)

def dist(self):
return np.histogram(self.asnumpy(), density=True)
Expand Down Expand Up @@ -480,14 +480,6 @@ def nodestat_func(func):
NodeStat('degree')

However, `my_degree` is not recognized as a node statistic.

>>> H.my_degree()
Traceback (most recent call last):
AttributeError: stat "my_degree" not among available node or edge stats
>>> H.nodes.my_degree
Traceback (most recent call last):
AttributeError: Stat 'my_degree' not defined

leotrs marked this conversation as resolved.
Show resolved Hide resolved
Use the `nodestat_func` decorator to turn `my_degree` into a valid stat.

>>> original_my_degree = my_degree
Expand Down