xgi-org · nwlandry · Jun 8, 2023 · Jun 5, 2023 · Jun 5, 2023 · Jun 5, 2023
diff --git a/tests/readwrite/test_bigg_data.py b/tests/readwrite/test_bigg_data.py
@@ -0,0 +1,27 @@
+import pytest
+
+from xgi import load_bigg_data
+from xgi.exception import XGIError
+
+
+@pytest.mark.webtest
+@pytest.mark.slow
+def test_load_bigg_data(capfd):
+    # test loading the online data
+    H1 = load_bigg_data("iAF1260", cache=False)
+    assert H1.num_nodes == 1668
+    assert H1.num_edges == 2382
+    assert H1["name"] == "iAF1260"
+    assert H1.nodes["2agpg161_c"] == {'name': '2-Acyl-sn-glycero-3-phosphoglycerol (n-C16:1)'}
+
+    H2 = load_bigg_data("iAF1260", cache=True)
+    assert H1.nodes == H2.nodes
+    assert H1.edges == H2.edges
+
+    load_bigg_data()
+    out, _ = capfd.readouterr()
+    assert "Available datasets are the following:" in out
+    assert "iAF1260" in out
+
+    with pytest.raises(XGIError):
+        load_bigg_data("test")
diff --git a/tests/readwrite/test_xgi_data.py b/tests/readwrite/test_xgi_data.py
@@ -10,7 +10,7 @@
 
 @pytest.mark.webtest
 @pytest.mark.slow
-def test_load_xgi_data():
+def test_load_xgi_data(capfd):
     # test loading the online data
     H1 = load_xgi_data("email-enron", cache=False)
     assert H1.num_nodes == 148
@@ -42,6 +42,12 @@ def test_load_xgi_data():
     H4 = load_xgi_data("email-enron", read=True, path=dir)
     assert H1.edges.members() == H4.edges.members()
 
+    load_xgi_data()
+    out, _ = capfd.readouterr()
+    assert "Available datasets are the following:" in out
+    assert "email-enron" in out
+    assert "congress-bills" in out
+
 
 def test_download_xgi_data():
     dir = tempfile.mkdtemp()

diff --git a/xgi/algorithms/centrality.py b/xgi/algorithms/centrality.py
@@ -353,11 +353,11 @@ def katz_centrality(H, index=False, cutoff=100):
 
     .. math::
         c = [(I - \alpha A^{t})^{-1} - I]{\bf 1},
-    
+
     where :math:`A` is the adjency matrix of the the (hyper)graph.
     Since :math:`A^{t} = A` for undirected graphs (our case), we have:
 
-    
+
     .. math::
         &[I + A + \alpha A^2 + \alpha^2 A^3 + \dots](I - \alpha A^{t})
 
@@ -368,7 +368,7 @@ def katz_centrality(H, index=False, cutoff=100):
         & - \alpha^2 A^3 - \alpha^3 A^4 - \dots
 
         & = I
-    
+
     And :math:`(I - \alpha A^{t})^{-1} = I + A + \alpha A^2 + \alpha^2 A^3 + \dots`
     Thus we can use the power serie to compute the Katz-centrality.
     [2] The Katz-centrality of isolated nodes (no hyperedges contains them) is

diff --git a/xgi/readwrite/__init__.py b/xgi/readwrite/__init__.py
@@ -1,6 +1,7 @@
-from . import bipartite, edgelist, incidence, json, xgi_data
+from . import bigg_data, bipartite, edgelist, incidence, json, xgi_data
 from .bipartite import *
 from .edgelist import *
 from .incidence import *
 from .json import *
 from .xgi_data import *
+from .bigg_data import *
diff --git a/xgi/readwrite/bigg_data.py b/xgi/readwrite/bigg_data.py
@@ -0,0 +1,113 @@
+"""Load a metabolic network from the BiGG models database."""
+
+from ..utils import request_json_from_url, request_json_from_url_cached
+
+__all__ = ["load_bigg_data"]
+
+
+def load_bigg_data(
+    dataset=None,
+    cache=True,
+):
+    """Load a metabolic network from the BiGG models database.
+
+    The Biochemical, Genetic and Genomic (BiGG) knowledge base
+    is hosted at http://bigg.ucsd.edu/. It contains metabolic
+    reaction networks at the genome scale.
+
+    We represent metabolites as nodes and metabolic reactions
+    as directed edges where reactants are the tail of the directed
+    edge and the products are the head of the directed edge.
+
+    Parameters
+    ----------
+    dataset : str, default: None
+        Dataset name. Valid options are the "bigg_id" tags in
+        http://bigg.ucsd.edu/api/v2/models. If None, prints
+        the list of available datasets.
+    cache : bool, optional
+        Whether to cache the input data
+
+    Returns
+    -------
+    DiHypergraph
+        The loaded dihypergraph.
+
+    Raises
+    ------
+    XGIError
+       The specified dataset does not exist.
+
+    References
+    ----------
+    Zachary A. King, Justin Lu, Andreas Dräger,
+    Philip Miller, Stephen Federowicz, Joshua A. Lerman,
+    Ali Ebrahim, Bernhard O. Palsson, Nathan E. Lewis
+    Nucleic Acids Research, Volume 44, Issue D1,
+    4 January 2016, Pages D515–D522,
+    https://doi.org/10.1093/nar/gkv1049
+    """
+
+    index_url = "http://bigg.ucsd.edu/api/v2/models"
+    base_url = "http://bigg.ucsd.edu/static/models/"
+
+    index_data = request_json_from_url(index_url)
+
+    # If no dataset is specified, print a list of the available datasets.
+    if dataset is None:
+        ids = []
+        for entry in index_data["results"]:
+            ids.append(entry["bigg_id"])
+        print("Available datasets are the following:")
+        print(*ids, sep="\n")
+        return
+
+    if cache:
+        model_data = request_json_from_url_cached(base_url + dataset + ".json")
+    else:
+        model_data = request_json_from_url(base_url + dataset + ".json")
+
+    return _bigg_to_dihypergraph(index_data, model_data)
+
+
+def _bigg_to_dihypergraph(d_index, d_model):
+    """Convert a BIGG-formatted dict to dihypergraph.
+
+    Parameters
+    ----------
+    d : dict
+        A BIGG-formatted dict
+
+    Returns
+    -------
+    DiHypergraph
+        The dihypergraph from the selected BIGG model.
+    """
+    from .. import DiHypergraph
+
+    DH = DiHypergraph()
+
+    id = d_model["id"]
+
+    DH["name"] = id
+
+    for d in d_index["results"]:
+        if d["bigg_id"] == id:
+            DH["organism"] = d["organism"]
+            break
+
+    for m in d_model["metabolites"]:
+        DH.add_node(m["id"], name=m["name"])
+
+    for r in d_model["reactions"]:
+        head = set()
+        tail = set()
+        for m, val in r["metabolites"].items():
+            if val > 0:
+                head.add(m)
+            else:
+                tail.add(m)
+
+        DH.add_edge((tail, head), id=r["id"], name=r["name"])
+
+    return DH
diff --git a/xgi/readwrite/xgi_data.py b/xgi/readwrite/xgi_data.py
@@ -1,13 +1,11 @@
 """Load a data set from the xgi-data repository or a local file."""
 import json
 import os
-from functools import lru_cache
 from warnings import warn
 
-import requests
-
 from .. import convert
 from ..exception import XGIError
+from ..utils import request_json_from_url, request_json_from_url_cached
 
 __all__ = ["load_xgi_data", "download_xgi_data"]
 
@@ -25,23 +23,24 @@ def load_xgi_data(
 
     Parameters
     ----------
-    dataset : str, default: None
+    dataset : str, optional
         Dataset name. Valid options are the top-level tags of the
-        index.json file in the xgi-data repository. If None, prints
+        index.json file in the xgi-data repository. If None (default), prints
         the list of available datasets.
     cache : bool, optional
-        Whether to cache the input data
+        Whether to cache the input data, by default True.
     read : bool, optional
         If read==True, search for a local copy of the data set. Use the local
-        copy if it exists, otherwise use the  xgi-data repository.
+        copy if it exists, otherwise use the xgi-data repository.
+        By default, False.
     path : str, optional
         Path to a local copy of the data set
     nodetype : type, optional
-        Type to cast the node ID to
+        Type to cast the node ID to, by default None.
     edgetype : type, optional
-        Type to cast the edge ID to
+        Type to cast the edge ID to, by default None.
     max_order: int, optional
-        Maximum order of edges to add to the hypergraph
+        Maximum order of edges to add to the hypergraph, by default None.
 
     Returns
     -------
@@ -53,11 +52,15 @@ def load_xgi_data(
     XGIError
        The specified dataset does not exist.
     """
+    index_url = (
+        "https://gitlab.com/complexgroupinteractions/"
+        "xgi-data/-/raw/main/index.json?inline=false"
+    )
 
     # If no dataset is specified, print a list of the available datasets.
     if dataset is None:
-        index_url = "https://gitlab.com/complexgroupinteractions/xgi-data/-/raw/main/index.json?inline=false"
-        index_data = _request_json_from_url(index_url)
+
+        index_data = request_json_from_url(index_url)
         print("Available datasets are the following:")
         print(*index_data, sep="\n")
         return
@@ -76,10 +79,7 @@ def load_xgi_data(
                 "from the xgi-data repository instead. To download a local "
                 "copy, use `download_xgi_data`."
             )
-    if cache:
-        data = _request_from_xgi_data_cached(dataset)
-    else:
-        data = _request_from_xgi_data(dataset)
+    data = _request_from_xgi_data(dataset, cache=cache)
 
     return convert.dict_to_hypergraph(
         data, nodetype=nodetype, edgetype=edgetype, max_order=max_order
@@ -106,15 +106,17 @@ def download_xgi_data(dataset, path=""):
     jsonfile.close()
 
 
-def _request_from_xgi_data(dataset=None):
+def _request_from_xgi_data(dataset=None, cache=True):
     """Request a dataset from xgi-data.
 
     Parameters
     ----------
-    dataset : str, default: None
+    dataset : str, optional
         Dataset name. Valid options are the top-level tags of the
         index.json file in the xgi-data repository. If None, prints
         the list of available datasets.
+    cache : bool, optional
+        Whether or not to cache the output
 
     Returns
     -------
@@ -130,69 +132,20 @@ def _request_from_xgi_data(dataset=None):
     ---------
     load_xgi_data
     """
+    index_url = (
+        "https://gitlab.com/complexgroupinteractions/"
+        "xgi-data/-/raw/main/index.json?inline=false"
+    )
 
-    index_url = "https://gitlab.com/complexgroupinteractions/xgi-data/-/raw/main/index.json?inline=false"
-    index_data = _request_json_from_url(index_url)
+    index_data = request_json_from_url(index_url)
 
     key = dataset.lower()
     if key not in index_data:
         print("Valid dataset names:")
         print(*index_data, sep="\n")
         raise XGIError("Must choose a valid dataset name!")
 
-    return _request_json_from_url(index_data[key]["url"])
-
-
-@lru_cache(maxsize=None)
-def _request_from_xgi_data_cached(dataset):
-    """Request a dataset from xgi-data and cache the result.
-
-    Wraps `_request_from_xgi_data` in an lru_cache decorator.
-
-    Parameters
-    ----------
-    dataset : str
-        Dataset name. Valid options are the top-level tags of the
-        index.json file in the xgi-data repository.
-
-    Returns
-    -------
-    Data
-        The requested data loaded from a json file.
-
-    See also
-    ---------
-    load_xgi_data
-    """
-
-    return _request_from_xgi_data(dataset)
-
-
-def _request_json_from_url(url):
-    """HTTP request json file and return as dict.
-
-    Parameters
-    ----------
-    url : str
-        The url where the json file is located.
-
-    Returns
-    -------
-    dict
-        A dictionary of the JSON requested.
-
-    Raises
-    ------
-    XGIError
-        If the connection fails or if there is a bad HTTP request.
-    """
-
-    try:
-        r = requests.get(url)
-    except requests.ConnectionError:
-        raise XGIError("Connection Error!")
-
-    if r.ok:
-        return r.json()
+    if cache:
+        return request_json_from_url_cached(index_data[key]["url"])
     else:
-        raise XGIError(f"Error: HTTP response {r.status_code}")
+        return request_json_from_url(index_data[key]["url"])
diff --git a/xgi/stats/dinodestats.py b/xgi/stats/dinodestats.py
@@ -18,10 +18,6 @@
 
 """
 
-import numpy as np
-
-import xgi
-
 __all__ = [
     "attrs",
     "degree",