Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Load bigg data #384

Merged
merged 12 commits into from
Jun 8, 2023
27 changes: 27 additions & 0 deletions tests/readwrite/test_bigg_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pytest

from xgi import load_bigg_data
from xgi.exception import XGIError


@pytest.mark.webtest
@pytest.mark.slow
def test_load_bigg_data(capfd):
# test loading the online data
H1 = load_bigg_data("iAF1260", cache=False)
assert H1.num_nodes == 1668
assert H1.num_edges == 2382
assert H1["name"] == "iAF1260"
assert H1.nodes["2agpg161_c"] == {'name': '2-Acyl-sn-glycero-3-phosphoglycerol (n-C16:1)'}

H2 = load_bigg_data("iAF1260", cache=True)
assert H1.nodes == H2.nodes
assert H1.edges == H2.edges

load_bigg_data()
out, _ = capfd.readouterr()
assert "Available datasets are the following:" in out
assert "iAF1260" in out

with pytest.raises(XGIError):
load_bigg_data("test")
8 changes: 7 additions & 1 deletion tests/readwrite/test_xgi_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

@pytest.mark.webtest
@pytest.mark.slow
def test_load_xgi_data():
def test_load_xgi_data(capfd):
# test loading the online data
H1 = load_xgi_data("email-enron", cache=False)
assert H1.num_nodes == 148
Expand Down Expand Up @@ -42,6 +42,12 @@ def test_load_xgi_data():
H4 = load_xgi_data("email-enron", read=True, path=dir)
assert H1.edges.members() == H4.edges.members()

load_xgi_data()
out, _ = capfd.readouterr()
assert "Available datasets are the following:" in out
assert "email-enron" in out
assert "congress-bills" in out


def test_download_xgi_data():
dir = tempfile.mkdtemp()
Expand Down
6 changes: 3 additions & 3 deletions xgi/algorithms/centrality.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,11 +353,11 @@ def katz_centrality(H, index=False, cutoff=100):

.. math::
c = [(I - \alpha A^{t})^{-1} - I]{\bf 1},

where :math:`A` is the adjency matrix of the the (hyper)graph.
Since :math:`A^{t} = A` for undirected graphs (our case), we have:


.. math::
&[I + A + \alpha A^2 + \alpha^2 A^3 + \dots](I - \alpha A^{t})

Expand All @@ -368,7 +368,7 @@ def katz_centrality(H, index=False, cutoff=100):
& - \alpha^2 A^3 - \alpha^3 A^4 - \dots

& = I

And :math:`(I - \alpha A^{t})^{-1} = I + A + \alpha A^2 + \alpha^2 A^3 + \dots`
Thus we can use the power serie to compute the Katz-centrality.
[2] The Katz-centrality of isolated nodes (no hyperedges contains them) is
Expand Down
3 changes: 2 additions & 1 deletion xgi/readwrite/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from . import bipartite, edgelist, incidence, json, xgi_data
from . import bigg_data, bipartite, edgelist, incidence, json, xgi_data
from .bipartite import *
from .edgelist import *
from .incidence import *
from .json import *
from .xgi_data import *
from .bigg_data import *
113 changes: 113 additions & 0 deletions xgi/readwrite/bigg_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""Load a metabolic network from the BiGG models database."""

from ..utils import request_json_from_url, request_json_from_url_cached

__all__ = ["load_bigg_data"]


def load_bigg_data(
dataset=None,
cache=True,
):
"""Load a metabolic network from the BiGG models database.

The Biochemical, Genetic and Genomic (BiGG) knowledge base
is hosted at http://bigg.ucsd.edu/. It contains metabolic
reaction networks at the genome scale.

We represent metabolites as nodes and metabolic reactions
as directed edges where reactants are the tail of the directed
edge and the products are the head of the directed edge.

Parameters
----------
dataset : str, default: None
Dataset name. Valid options are the "bigg_id" tags in
http://bigg.ucsd.edu/api/v2/models. If None, prints
the list of available datasets.
cache : bool, optional
Whether to cache the input data

Returns
-------
DiHypergraph
The loaded dihypergraph.

Raises
------
XGIError
The specified dataset does not exist.

References
----------
Zachary A. King, Justin Lu, Andreas Dräger,
Philip Miller, Stephen Federowicz, Joshua A. Lerman,
Ali Ebrahim, Bernhard O. Palsson, Nathan E. Lewis
Nucleic Acids Research, Volume 44, Issue D1,
4 January 2016, Pages D515–D522,
https://doi.org/10.1093/nar/gkv1049
"""

index_url = "http://bigg.ucsd.edu/api/v2/models"
base_url = "http://bigg.ucsd.edu/static/models/"

index_data = request_json_from_url(index_url)

# If no dataset is specified, print a list of the available datasets.
if dataset is None:
ids = []
for entry in index_data["results"]:
ids.append(entry["bigg_id"])
print("Available datasets are the following:")
print(*ids, sep="\n")
return

if cache:
model_data = request_json_from_url_cached(base_url + dataset + ".json")
else:
model_data = request_json_from_url(base_url + dataset + ".json")

return _bigg_to_dihypergraph(index_data, model_data)


def _bigg_to_dihypergraph(d_index, d_model):
"""Convert a BIGG-formatted dict to dihypergraph.

Parameters
----------
d : dict
A BIGG-formatted dict

Returns
-------
DiHypergraph
The dihypergraph from the selected BIGG model.
"""
from .. import DiHypergraph

DH = DiHypergraph()

id = d_model["id"]

DH["name"] = id

for d in d_index["results"]:
if d["bigg_id"] == id:
DH["organism"] = d["organism"]
break
nwlandry marked this conversation as resolved.
Show resolved Hide resolved

for m in d_model["metabolites"]:
DH.add_node(m["id"], name=m["name"])

for r in d_model["reactions"]:
head = set()
tail = set()
for m, val in r["metabolites"].items():
if val > 0:
head.add(m)
else:
tail.add(m)

DH.add_edge((tail, head), id=r["id"], name=r["name"])

return DH
103 changes: 28 additions & 75 deletions xgi/readwrite/xgi_data.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
"""Load a data set from the xgi-data repository or a local file."""
import json
import os
from functools import lru_cache
from warnings import warn

import requests

from .. import convert
from ..exception import XGIError
from ..utils import request_json_from_url, request_json_from_url_cached

__all__ = ["load_xgi_data", "download_xgi_data"]

Expand All @@ -25,23 +23,24 @@ def load_xgi_data(

Parameters
----------
dataset : str, default: None
dataset : str, optional
Dataset name. Valid options are the top-level tags of the
index.json file in the xgi-data repository. If None, prints
index.json file in the xgi-data repository. If None (default), prints
the list of available datasets.
cache : bool, optional
Whether to cache the input data
Whether to cache the input data, by default True.
read : bool, optional
If read==True, search for a local copy of the data set. Use the local
copy if it exists, otherwise use the xgi-data repository.
copy if it exists, otherwise use the xgi-data repository.
By default, False.
path : str, optional
Path to a local copy of the data set
nodetype : type, optional
Type to cast the node ID to
Type to cast the node ID to, by default None.
edgetype : type, optional
Type to cast the edge ID to
Type to cast the edge ID to, by default None.
max_order: int, optional
Maximum order of edges to add to the hypergraph
Maximum order of edges to add to the hypergraph, by default None.

Returns
-------
Expand All @@ -53,11 +52,15 @@ def load_xgi_data(
XGIError
The specified dataset does not exist.
"""
index_url = (
"https://gitlab.com/complexgroupinteractions/"
"xgi-data/-/raw/main/index.json?inline=false"
)

# If no dataset is specified, print a list of the available datasets.
if dataset is None:
index_url = "https://gitlab.com/complexgroupinteractions/xgi-data/-/raw/main/index.json?inline=false"
index_data = _request_json_from_url(index_url)

index_data = request_json_from_url(index_url)
print("Available datasets are the following:")
print(*index_data, sep="\n")
return
Expand All @@ -76,10 +79,7 @@ def load_xgi_data(
"from the xgi-data repository instead. To download a local "
"copy, use `download_xgi_data`."
)
if cache:
data = _request_from_xgi_data_cached(dataset)
else:
data = _request_from_xgi_data(dataset)
data = _request_from_xgi_data(dataset, cache=cache)

return convert.dict_to_hypergraph(
data, nodetype=nodetype, edgetype=edgetype, max_order=max_order
Expand All @@ -106,15 +106,17 @@ def download_xgi_data(dataset, path=""):
jsonfile.close()


def _request_from_xgi_data(dataset=None):
def _request_from_xgi_data(dataset=None, cache=True):
"""Request a dataset from xgi-data.

Parameters
----------
dataset : str, default: None
dataset : str, optional
Dataset name. Valid options are the top-level tags of the
index.json file in the xgi-data repository. If None, prints
the list of available datasets.
cache : bool, optional
Whether or not to cache the output

Returns
-------
Expand All @@ -130,69 +132,20 @@ def _request_from_xgi_data(dataset=None):
---------
load_xgi_data
"""
index_url = (
"https://gitlab.com/complexgroupinteractions/"
"xgi-data/-/raw/main/index.json?inline=false"
)

index_url = "https://gitlab.com/complexgroupinteractions/xgi-data/-/raw/main/index.json?inline=false"
index_data = _request_json_from_url(index_url)
index_data = request_json_from_url(index_url)

key = dataset.lower()
if key not in index_data:
print("Valid dataset names:")
print(*index_data, sep="\n")
raise XGIError("Must choose a valid dataset name!")

return _request_json_from_url(index_data[key]["url"])


@lru_cache(maxsize=None)
def _request_from_xgi_data_cached(dataset):
"""Request a dataset from xgi-data and cache the result.

Wraps `_request_from_xgi_data` in an lru_cache decorator.

Parameters
----------
dataset : str
Dataset name. Valid options are the top-level tags of the
index.json file in the xgi-data repository.

Returns
-------
Data
The requested data loaded from a json file.

See also
---------
load_xgi_data
"""

return _request_from_xgi_data(dataset)


def _request_json_from_url(url):
"""HTTP request json file and return as dict.

Parameters
----------
url : str
The url where the json file is located.

Returns
-------
dict
A dictionary of the JSON requested.

Raises
------
XGIError
If the connection fails or if there is a bad HTTP request.
"""

try:
r = requests.get(url)
except requests.ConnectionError:
raise XGIError("Connection Error!")

if r.ok:
return r.json()
if cache:
return request_json_from_url_cached(index_data[key]["url"])
else:
raise XGIError(f"Error: HTTP response {r.status_code}")
return request_json_from_url(index_data[key]["url"])
4 changes: 0 additions & 4 deletions xgi/stats/dinodestats.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@

"""

import numpy as np

import xgi

__all__ = [
"attrs",
"degree",
Expand Down
Loading