Skip to content

Commit

Permalink
simplify hdf5 loading for adjacency as well
Browse files Browse the repository at this point in the history
  • Loading branch information
ejolly committed Apr 18, 2024
1 parent b6c4b02 commit 68446c7
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 27 deletions.
54 changes: 28 additions & 26 deletions nltools/data/adjacency.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@


class Adjacency(object):

"""
Adjacency is a class to represent Adjacency matrices as a vector rather
than a 2-dimensional matrix. This makes it easier to perform data
Expand Down Expand Up @@ -84,8 +83,7 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=None, **kwargs):
"'similarity_flat','directed_flat']"
)

# Flag to support hdf5 files saved using nltools <= 0.4.8
legacy_h5 = kwargs.pop("legacy_h5", False)
verbose = kwargs.pop("verbose", False)

# Setup data
if data is None:
Expand Down Expand Up @@ -137,7 +135,33 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=None, **kwargs):

# HDF5
if (".h5" in to_load) or (".hdf5" in to_load):
if legacy_h5:
try:
# Load X and Y attributes
with pd.HDFStore(to_load, "r") as f:
self.Y = f["Y"]

# Load other attributes
with h5File(to_load, "r") as f:
self.data = np.array(f["data"])
self.matrix_type = f["matrix_type"][()].decode()
self.is_single_matrix = f["is_single_matrix"][()]
self.issymmetric = f["issymmetric"][()]
# Deepdish saved empty label lists as np arrays of length 1
if len(f["labels"]) == 1:
self.labels = list(f["labels"])
elif len(f["labels"]) > 1:
self.labels = list(f["labels"].asstr())
else:
self.labels = []

# Done initializing
return
except Exception as e:
if verbose:
warnings.warn(
f"Falling back to legacy h5 loading due to error: {e}"
)

with tables.open_file(to_load, mode="r") as f:
# Setup data
self.data = np.array(f.root["data"])
Expand Down Expand Up @@ -184,28 +208,6 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=None, **kwargs):

return

else:
# Load X and Y attributes
with pd.HDFStore(to_load, "r") as f:
self.Y = f["Y"]

# Load other attributes
with h5File(to_load, "r") as f:
self.data = np.array(f["data"])
self.matrix_type = f["matrix_type"][()].decode()
self.is_single_matrix = f["is_single_matrix"][()]
self.issymmetric = f["issymmetric"][()]
# Deepdish saved empty label lists as np arrays of length 1
if len(f["labels"]) == 1:
self.labels = list(f["labels"])
elif len(f["labels"]) > 1:
self.labels = list(f["labels"].asstr())
else:
self.labels = []

# Done initializing
return

# CSV or array/dateframe
else:
(
Expand Down
5 changes: 4 additions & 1 deletion nltools/tests/test_adjacency.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import pytest
import numpy as np
import pandas as pd
from nltools.data import Adjacency, Design_Matrix
Expand Down Expand Up @@ -448,7 +449,9 @@ def test_cluster_summary():
def test_load_legacy_h5(
old_h5_adj_single, new_h5_adj_single, old_h5_adj_double, new_h5_adj_double, tmpdir
):
b_old = Adjacency(old_h5_adj_single, legacy_h5=True)
with pytest.warns(UserWarning):
# With verbosity on we should see a warning about the old h5 file format
b_old = Adjacency(old_h5_adj_single, verbose=True)
b_new = Adjacency(new_h5_adj_single)
assert b_old.shape() == b_new.shape()
assert np.allclose(b_old.data, b_new.data)
Expand Down

0 comments on commit 68446c7

Please sign in to comment.