Skip to content

Commit

Permalink
switch from rhdf5 to hdf5r (#169)
Browse files Browse the repository at this point in the history
* Update write_h5ad_categorical

* fix styling

* Update write_h5ad_categorical

* Adjust H5AD categorical write test

* Add write_h5ad_attributes function

Replace repeated code in individual writers

* ignore cyclomatic complexity warning for `write_h5ad_element` warning

* formatting changes

* in write_h5ad_attributes, allow file to be an open hdf5 file

* wip

* wip

* substitute mentions of rhdf5 with hdf5r

* strip obs_names and var_names from framework

* update

* fix tests and finalize

* remove mentions of obs_names and var_names in the constructor

* make sure filenames are always unique

* add mode to various functions

* manually close anndatas in tests (where needed)

* only close when pointer is valid

* move match

* use $close() instead of $close_all()

* switch to different branch

* simplify test

* gc afterclosing the adata in write_h5ad

* guess the dtype and the space

* update docs

* use hhoeflin's remote

* bugfix in hdf5r has been released

* update: nevermind, the fix wasn't included in the release yet

* minor fixes

* bump version number

* remove remotes

* remove references to rhdf5

* fix attributes

* style

* fix write h5ad helpers

* fix unit tests

* fix linting issues

* move hdf5 helpers

* reuse existing functionality

* add test (this seems to have been fixed at some point)

* improve guessing of dtype when storing a logical vector

* fix styling

* reenable more tests

---------

Co-authored-by: Luke Zappia <[email protected]>
  • Loading branch information
rcannood and lazappi authored Jul 18, 2024
1 parent 220f977 commit 1194af2
Show file tree
Hide file tree
Showing 26 changed files with 955 additions and 864 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ Suggests:
BiocStyle,
knitr,
reticulate,
rhdf5,
hdf5r (>= 1.3.11),
rmarkdown,
S4Vectors,
SeuratObject,
Expand Down
22 changes: 19 additions & 3 deletions R/AbstractAnnData.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,25 @@ AbstractAnnData <- R6::R6Class("AbstractAnnData", # nolint
to_InMemoryAnnData(self)
},
#' @description Convert to an HDF5 Backed AnnData
#' @param path The path to the HDF5 file
to_HDF5AnnData = function(path) {
to_HDF5AnnData(self, path)
#' @param file The path to the HDF5 file
#' @param compression The compression algorithm to use when writing the
#' HDF5 file. Can be one of `"none"`, `"gzip"` or `"lzf"`. Defaults to
#' `"none"`.
#' @param mode The mode to open the HDF5 file.
#' * `a` creates a new file or opens an existing one for read/write.
#' * `r+` opens an existing file for read/write.
#' * `w` creates a file, truncating any existing ones
#' * `w-`/`x` are synonyms creating a file and failing if it already exists.
#' @return An HDF5AnnData object
to_HDF5AnnData = function(file,
compression = c("none", "gzip", "lzf"),
mode = c("w-", "r", "r+", "a", "w", "x")) {
to_HDF5AnnData(
adata = self,
file = file,
compression = compression,
mode = mode
)
}
),
private = list(
Expand Down
101 changes: 81 additions & 20 deletions R/HDF5AnnData.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,19 @@
#' Implementation of an in memory AnnData object.
HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
inherit = AbstractAnnData,
cloneable = FALSE,
private = list(
.h5obj = NULL,
.close_on_finalize = FALSE,
.compression = NULL
),
active = list(
#' @field X The X slot
X = function(value) {
if (!private$.h5obj$is_valid) stop("HDF5 file is closed")
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_X, status=done
read_h5ad_element(private$.h5obj, "/X")
read_h5ad_element(private$.h5obj, "X")
} else {
# trackstatus: class=HDF5AnnData, feature=set_X, status=done
value <- private$.validate_aligned_array(
Expand All @@ -23,13 +26,14 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
expected_rownames = rownames(self),
expected_colnames = colnames(self)
)
write_h5ad_element(value, private$.h5obj, "/X", private$.compression)
write_h5ad_element(value, private$.h5obj, "X", private$.compression)
}
},
#' @field layers The layers slot. Must be NULL or a named list
#' with with all elements having the dimensions consistent with
#' `obs` and `var`.
layers = function(value) {
if (!private$.h5obj$is_valid) stop("HDF5 file is closed")
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_layers, status=done
read_h5ad_element(private$.h5obj, "layers")
Expand All @@ -42,12 +46,13 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
expected_rownames = rownames(self),
expected_colnames = colnames(self)
)
write_h5ad_element(value, private$.h5obj, "/layers", private$.compression)
write_h5ad_element(value, private$.h5obj, "layers", private$.compression)
}
},
#' @field obsm The obsm slot. Must be `NULL` or a named list with
#' with all elements having the same number of rows as `obs`.
obsm = function(value) {
if (!private$.h5obj$is_valid) stop("HDF5 file is closed")
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_obsm, status=done
read_h5ad_element(private$.h5obj, "obsm")
Expand All @@ -59,12 +64,13 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
c(self$n_obs()),
expected_rownames = rownames(self)
)
write_h5ad_element(value, private$.h5obj, "/obsm")
write_h5ad_element(value, private$.h5obj, "obsm")
}
},
#' @field varm The varm slot. Must be `NULL` or a named list with
#' with all elements having the same number of rows as `var`.
varm = function(value) {
if (!private$.h5obj$is_valid) stop("HDF5 file is closed")
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_varm, status=done
read_h5ad_element(private$.h5obj, "varm")
Expand All @@ -76,12 +82,13 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
c(self$n_vars()),
expected_rownames = colnames(self)
)
write_h5ad_element(value, private$.h5obj, "/varm")
write_h5ad_element(value, private$.h5obj, "varm")
}
},
#' @field obsp The obsp slot. Must be `NULL` or a named list with
#' with all elements having the same number of rows and columns as `obs`.
obsp = function(value) {
if (!private$.h5obj$is_valid) stop("HDF5 file is closed")
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_obsp, status=done
read_h5ad_element(private$.h5obj, "obsp")
Expand All @@ -94,12 +101,13 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
expected_rownames = rownames(self),
expected_colnames = rownames(self)
)
write_h5ad_element(value, private$.h5obj, "/obsp")
write_h5ad_element(value, private$.h5obj, "obsp")
}
},
#' @field varp The varp slot. Must be `NULL` or a named list with
#' with all elements having the same number of rows and columns as `var`.
varp = function(value) {
if (!private$.h5obj$is_valid) stop("HDF5 file is closed")
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_varp, status=done
read_h5ad_element(private$.h5obj, "varp")
Expand All @@ -112,43 +120,46 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
expected_rownames = colnames(self),
expected_colnames = colnames(self)
)
write_h5ad_element(value, private$.h5obj, "/varp")
write_h5ad_element(value, private$.h5obj, "varp")
}
},

#' @field obs The obs slot
obs = function(value) {
if (!private$.h5obj$is_valid) stop("HDF5 file is closed")
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_obs, status=done
read_h5ad_element(private$.h5obj, "/obs")
read_h5ad_element(private$.h5obj, "obs")
} else {
# trackstatus: class=HDF5AnnData, feature=set_obs, status=done
value <- private$.validate_obsvar_dataframe(value, "obs")
write_h5ad_element(
value,
private$.h5obj,
"/obs",
"obs",
private$.compression
)
}
},
#' @field var The var slot
var = function(value) {
if (!private$.h5obj$is_valid) stop("HDF5 file is closed")
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_var, status=done
read_h5ad_element(private$.h5obj, "/var")
read_h5ad_element(private$.h5obj, "var")
} else {
# trackstatus: class=HDF5AnnData, feature=set_var, status=done
value <- private$.validate_obsvar_dataframe(value, "var")
write_h5ad_element(
value,
private$.h5obj,
"/var"
"var"
)
}
},
#' @field obs_names Names of observations
obs_names = function(value) {
if (!private$.h5obj$is_valid) stop("HDF5 file is closed")
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_obs_names, status=done
rownames(self$obs)
Expand All @@ -169,13 +180,14 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
},
#' @field uns The uns slot. Must be `NULL` or a named list.
uns = function(value) {
if (!private$.h5obj$is_valid) stop("HDF5 file is closed")
if (missing(value)) {
# trackstatus: class=HDF5AnnData, feature=get_uns, status=done
read_h5ad_element(private$.h5obj, "uns")
} else {
# trackstatus: class=HDF5AnnData, feature=set_uns, status=done
value <- private$.validate_named_list(value, "uns")
write_h5ad_element(value, private$.h5obj, "/uns")
write_h5ad_element(value, private$.h5obj, "uns")
}
}
),
Expand Down Expand Up @@ -214,6 +226,13 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
#' @param compression The compression algorithm to use when writing the
#' HDF5 file. Can be one of `"none"`, `"gzip"` or `"lzf"`. Defaults to
#' `"none"`.
#' @param mode The mode to open the HDF5 file.
#'
#' * `a` creates a new file or opens an existing one for read/write.
#' * `r` opens an existing file for reading.
#' * `r+` opens an existing file for read/write.
#' * `w` creates a file, truncating any existing ones.
#' * `w-`/`x` are synonyms, creating a file and failing if it already exists.
#'
#' @details
#' The constructor creates a new HDF5 AnnData interface object. This can
Expand All @@ -233,17 +252,23 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
varp = NULL,
uns = NULL,
shape = NULL,
mode = c("r", "r+", "a", "w", "w-", "x"),
compression = c("none", "gzip", "lzf")) {
if (!requireNamespace("rhdf5", quietly = TRUE)) {
stop("The HDF5 interface requires the 'rhdf5' package to be installed")
if (!requireNamespace("hdf5r", quietly = TRUE)) {
stop("The HDF5 interface requires the 'hdf5r' package to be installed")
}

# check arguments
compression <- match.arg(compression)
mode <- match.arg(mode)

# store compression for later use
private$.compression <- compression

if (!file.exists(file)) {
# Store filename
private$.h5obj <- file
if (is.character(file) && !file.exists(file)) {
# store private values
private$.h5obj <- hdf5r::H5File$new(file, mode = "w-")
private$.close_on_finalize <- TRUE

# Determine initial obs and var
shape <- get_shape(obs, var, X, shape)
Expand Down Expand Up @@ -276,8 +301,17 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
self$uns <- uns
}
} else {
open_hdf5_file <- is.character(file)
if (open_hdf5_file) {
file <- hdf5r::H5File$new(file, mode = mode)
}

if (!inherits(file, "H5File")) {
stop("file must be a character string or an H5File object")
}

# Check the file is a valid H5AD
attrs <- rhdf5::h5readAttributes(file, "/")
attrs <- hdf5r::h5attributes(file)

if (!all(c("encoding-type", "encoding-version") %in% names(attrs))) {
stop(
Expand All @@ -288,7 +322,9 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint

# Set the file path
private$.h5obj <- file
private$.close_on_finalize <- open_hdf5_file

# assert other arguments are NULL
if (!is.null(obs)) {
stop("obs must be NULL when loading an existing .h5ad file")
}
Expand Down Expand Up @@ -319,6 +355,21 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
}
},

#' @description Close the HDF5 file when the object is garbage collected
finalize = function() {
if (private$.close_on_finalize) {
self$close()
}
return(invisible(self))
},

#' @description Close the HDF5 file
close = function() {
if (private$.h5obj$is_valid) {
private$.h5obj$close()
}
},

#' @description Number of observations in the AnnData object
n_obs = function() {
nrow(self$obs)
Expand All @@ -341,6 +392,13 @@ HDF5AnnData <- R6::R6Class("HDF5AnnData", # nolint
#' @param compression The compression algorithm to use when writing the
#' HDF5 file. Can be one of `"none"`, `"gzip"` or `"lzf"`. Defaults to
#' `"none"`.
#' @param mode The mode to open the HDF5 file.
#'
#' * `a` creates a new file or opens an existing one for read/write.
#' * `r` opens an existing file for reading.
#' * `r+` opens an existing file for read/write.
#' * `w` creates a file, truncating any existing ones.
#' * `w-`/`x` are synonyms, creating a file and failing if it already exists.
#'
#' @return An HDF5AnnData object with the same data as the input AnnData
#' object.
Expand All @@ -365,10 +423,12 @@ to_HDF5AnnData <- function(
# nolint end: object_name_linter
adata,
file,
compression = c("none", "gzip", "lzf")) {
compression = c("none", "gzip", "lzf"),
mode = c("w-", "r", "r+", "a", "w", "x")) {
stopifnot(
inherits(adata, "AbstractAnnData")
)
mode <- match.arg(mode)
HDF5AnnData$new(
file = file,
X = adata$X,
Expand All @@ -380,7 +440,8 @@ to_HDF5AnnData <- function(
obsp = adata$obsp,
varp = adata$varp,
uns = adata$uns,
compression = compression,
shape = adata$shape(),
compression = compression
mode = mode
)
}
8 changes: 3 additions & 5 deletions R/InMemoryAnnData.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,10 @@
#' ad
#'
#' ## minimum example
#' # -> using `AnnData()` is synonymous to `InMemoryAnnData$new()`
#' ad <- AnnData(
#' AnnData(
#' obs = data.frame(row.names = letters[1:10]),
#' var = data.frame(row.names = LETTERS[1:5])
#' )
#' ad
#' @export
InMemoryAnnData <- R6::R6Class("InMemoryAnnData", # nolint
inherit = AbstractAnnData,
Expand Down Expand Up @@ -301,8 +299,8 @@ InMemoryAnnData <- R6::R6Class("InMemoryAnnData", # nolint
#' A = matrix(5:1, 3L, 5L),
#' B = matrix(letters[1:5], 3L, 5L)
#' ),
#' obs = data.frame(cell = 1:3, row.names = LETTERS[1:3]),
#' var = data.frame(gene = 1:5, row.names = letters[1:5])
#' obs = data.frame(row.names = LETTERS[1:3], cell = 1:3),
#' var = data.frame(row.names = letters[1:5], gene = 1:5)
#' )
#' to_InMemoryAnnData(ad)
to_InMemoryAnnData <- function(adata) { # nolint
Expand Down
1 change: 0 additions & 1 deletion R/Seurat.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ from_Seurat <- function(seurat_obj, output_class = c("InMemoryAnnData", "HDF5Ann
}
}


# get obs
# trackstatus: class=Seurat, feature=set_obs_names, status=done
# trackstatus: class=Seurat, feature=set_obs, status=done
Expand Down
12 changes: 11 additions & 1 deletion R/read_h5ad.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@
#' @param path Path to the H5AD file to read
#' @param to The type of object to return. Must be one of: "InMemoryAnnData",
#' "HDF5AnnData", "SingleCellExperiment", "Seurat"
#' @param mode The mode to open the HDF5 file.
#'
#' * `a` creates a new file or opens an existing one for read/write.
#' * `r` opens an existing file for reading.
#' * `r+` opens an existing file for read/write.
#' * `w` creates a file, truncating any existing ones.
#' * `w-`/`x` are synonyms, creating a file and failing if it already exists.
#'
#' @param ... Extra arguments provided to [to_SingleCellExperiment()] or
#' [to_Seurat()]
#'
Expand All @@ -26,10 +34,12 @@
read_h5ad <- function(
path,
to = c("InMemoryAnnData", "HDF5AnnData", "SingleCellExperiment", "Seurat"),
mode = c("r", "r+", "a", "w", "w-", "x"),
...) {
to <- match.arg(to)
mode <- match.arg(mode)

adata <- HDF5AnnData$new(path)
adata <- HDF5AnnData$new(path, mode = mode)

fun <- switch(to,
"SingleCellExperiment" = to_SingleCellExperiment,
Expand Down
Loading

0 comments on commit 1194af2

Please sign in to comment.