Skip to content

Commit

Permalink
Fix Anndata set_meta for updated h5py
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdbeek authored and mr-c committed Nov 2, 2023
1 parent 4dafda6 commit 707b31b
Showing 1 changed file with 24 additions and 24 deletions.
48 changes: 24 additions & 24 deletions lib/galaxy/datatypes/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
Optional,
Tuple,
TYPE_CHECKING,
Union,
)

import h5py
Expand Down Expand Up @@ -1482,37 +1483,40 @@ def set_meta(self, dataset: DatasetProtocol, overwrite: bool = True, **kwd) -> N
dataset.metadata.layers_count = len(anndata_file)
dataset.metadata.layers_names = list(anndata_file.keys())

def get_index_value(tmp: Union[h5py.Dataset, h5py.Datatype, h5py.Group]):
if isinstance(tmp, (h5py.Dataset, h5py.Datatype)):
if "index" in tmp.dtype.names:
return tmp["index"]
if "_index" in tmp.dtype.names:
return tmp["_index"]
return None
else:
index_var = tmp.attrs.get("index")
if index_var is not None:
return tmp[index_var]
index_var = tmp.attrs.get("_index")
if index_var is not None:
return tmp[index_var]
return None

def _layercountsize(tmp, lennames=0):
"From TMP and LENNAMES, return layers, their number, and the length of one of the layers (all equal)."
if hasattr(tmp, "dtype"):
layers = list(tmp.dtype.names)
count = len(tmp.dtype)
size = int(tmp.size)
else:
layers = list(tmp.keys())
layers = list(tmp.attrs)
count = len(layers)
size = lennames
return (layers, count, size)

if "obs" in dataset.metadata.layers_names:
tmp = anndata_file["obs"]
obs_index = None
if "index" in tmp:
obs_index = "index"
elif "_index" in tmp:
obs_index = "_index"
obs = get_index_value(tmp)
# Determine cell labels
if obs_index:
dataset.metadata.obs_names = list(tmp[obs_index])
elif hasattr(tmp, "dtype"):
if "index" in tmp.dtype.names:
# Yes, we call tmp["index"], and not tmp.dtype["index"]
# here, despite the above tests.
dataset.metadata.obs_names = list(tmp["index"])
elif "_index" in tmp.dtype.names:
dataset.metadata.obs_names = list(tmp["_index"])
else:
log.warning("Could not determine cell labels for %s", self)
if obs is not None:
dataset.metadata.obs_names = [n.decode() for n in obs]
else:
log.warning("Could not determine observation index for %s", self)

Expand All @@ -1536,15 +1540,11 @@ def _layercountsize(tmp, lennames=0):

if "var" in dataset.metadata.layers_names:
tmp = anndata_file["var"]
var_index = None
if "index" in tmp:
var_index = "index"
elif "_index" in tmp:
var_index = "_index"
index = get_index_value(tmp)
# We never use var_names
# dataset.metadata.var_names = tmp[var_index]
if var_index:
x, y, z = _layercountsize(tmp, len(tmp[var_index]))
if index is not None:
x, y, z = _layercountsize(tmp, len(index))
else:
# failing to detect a var_index is not an indicator
# that the dataset is empty
Expand Down

0 comments on commit 707b31b

Please sign in to comment.