From bde7980ccb16734437e0ef1d51cac08cfca9992a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20Pag=C3=A8s?= Date: Fri, 16 Feb 2024 19:04:57 -0800 Subject: [PATCH 1/8] Address integer overflow in H5Screate_simple() --- R/H5S.R | 11 ++++++----- src/H5S.c | 6 ++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/R/H5S.R b/R/H5S.R index aba74ed..486e0cb 100644 --- a/R/H5S.R +++ b/R/H5S.R @@ -29,9 +29,9 @@ H5Screate <- function( type = h5default("H5S"), native = FALSE ) { #' Create a simple dataspace #' -#' @param dims An integer vector defining the initial dimensions of the dataspace. +#' @param dims A numeric vector defining the initial dimensions of the dataspace. #' The length of `dims` determines the rank of the dataspace. -#' @param maxdims An integer vector with the same length length as `dims`. Specifies the +#' @param maxdims A numeric vector with the same length length as `dims`. Specifies the #' upper limit on the size of the dataspace dimensions. Only needs to be specified #' if this is different from the values given to `dims`. #' @param native An object of class `logical`. If `TRUE`, array-like @@ -47,11 +47,12 @@ H5Screate <- function( type = h5default("H5S"), native = FALSE ) { #' #' @export H5Screate_simple <- function( dims, maxdims, native = FALSE ) { + dims <- as.numeric(dims) if (missing(maxdims)) { - maxdims = dims + maxdims <- dims + } else { + maxdims <- as.numeric(maxdims) } - dims <- as.integer(dims) - maxdims <- as.integer(maxdims) if (!native) { dims <- rev(dims) maxdims <- rev(maxdims) diff --git a/src/H5S.c b/src/H5S.c index 1b83257..dfb981e 100644 --- a/src/H5S.c +++ b/src/H5S.c @@ -45,7 +45,7 @@ SEXP _H5Screate_simple( SEXP _dims, SEXP _maxdims ) { int rank = length(_dims); hsize_t dims[rank]; for (int i=0; i Date: Fri, 16 Feb 2024 19:21:46 -0800 Subject: [PATCH 2/8] roxygenize --- DESCRIPTION | 2 +- man/H5Screate_simple.Rd | 4 ++-- man/rhdf5.Rd | 25 +++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0d789cc..c5f9b28 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,4 +33,4 @@ SystemRequirements: GNU make biocViews: Infrastructure, DataImport Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 diff --git a/man/H5Screate_simple.Rd b/man/H5Screate_simple.Rd index 02fa242..4bbbc61 100644 --- a/man/H5Screate_simple.Rd +++ b/man/H5Screate_simple.Rd @@ -7,10 +7,10 @@ H5Screate_simple(dims, maxdims, native = FALSE) } \arguments{ -\item{dims}{An integer vector defining the initial dimensions of the dataspace. +\item{dims}{A numeric vector defining the initial dimensions of the dataspace. The length of \code{dims} determines the rank of the dataspace.} -\item{maxdims}{An integer vector with the same length length as \code{dims}. Specifies the +\item{maxdims}{A numeric vector with the same length length as \code{dims}. Specifies the upper limit on the size of the dataspace dimensions. Only needs to be specified if this is different from the values given to \code{dims}.} diff --git a/man/rhdf5.Rd b/man/rhdf5.Rd index a19ea37..7a650ee 100644 --- a/man/rhdf5.Rd +++ b/man/rhdf5.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/Rhdf5.R \docType{package} \name{rhdf5} +\alias{rhdf5-package} \alias{rhdf5} \title{rhdf5: An interface between HDF5 and R} \description{ @@ -11,3 +12,27 @@ The rhdf5 package provides two categories of functions: \item \code{H5} functions mirror much of the the HDF5 C API } } +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/grimbough/rhdf5} + \item Report bugs at \url{https://github.com/grimbough/rhdf5/issues} +} + +} +\author{ +\strong{Maintainer}: Mike Smith \email{mike.smith@embl.de} (\href{https://orcid.org/0000-0002-7800-3848}{ORCID}) + +Authors: +\itemize{ + \item Bernd Fischer + \item Gregoire Pau +} + +Other contributors: +\itemize{ + \item Martin Morgan [contributor] + \item Daniel van Twisk [contributor] +} + +} From baa728f1105c5f6e5d3696a3245f1535a319cb64 Mon Sep 17 00:00:00 2001 From: Mike Smith Date: Wed, 28 Feb 2024 23:33:51 +0100 Subject: [PATCH 3/8] Tweaks to how H5S_UNLIMITED is handled --- R/H5S.R | 9 ++++++--- src/H5S.c | 30 +++++++++++++----------------- tests/testthat/test_H5S.R | 14 ++++++++++++++ 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/R/H5S.R b/R/H5S.R index 486e0cb..595a112 100644 --- a/R/H5S.R +++ b/R/H5S.R @@ -150,11 +150,14 @@ H5Sget_simple_extent_dims <- function( h5space ) { #' @export H5Sset_extent_simple <- function( h5space, dims, maxdims) { h5checktype(h5space, "dataspace") + + dims <- as.numeric(dims) if (missing(maxdims)) { - maxdims = dims + maxdims <- dims + } else { + maxdims <- as.numeric(maxdims) } - dims <- as.integer(dims) - maxdims <- as.integer(maxdims) + if (!h5space@native){ dims <- rev(dims) maxdims <- rev(maxdims) diff --git a/src/H5S.c b/src/H5S.c index dfb981e..9e01b97 100644 --- a/src/H5S.c +++ b/src/H5S.c @@ -56,14 +56,11 @@ SEXP _H5Screate_simple( SEXP _dims, SEXP _maxdims ) { hid = -1; } else { hsize_t maxdims[rank]; + double maxdim; for (int i=0; i R_LEN_T_MAX; - maxsize_is_numeric += maxsize[i] > R_LEN_T_MAX; + maxsize_is_numeric += (maxsize[i] > R_LEN_T_MAX) & (maxsize[i] != H5S_UNLIMITED); } Rsize = PROTECT(allocVector(REALSXP, rank)); Rmaxsize = PROTECT(allocVector(REALSXP, rank)); for (int i=0; i < rank; i++) { - REAL(Rsize)[i] = size[i]; - REAL(Rmaxsize)[i] = maxsize[i]; + REAL(Rsize)[i] = (double) size[i]; + REAL(Rmaxsize)[i] = (maxsize[i] == H5S_UNLIMITED) ? -1 : (double) maxsize[i]; } - SET_VECTOR_ELT(Rval,1,Rsize); - SET_VECTOR_ELT(Rval,2,Rmaxsize); + SET_VECTOR_ELT(Rval,1, Rsize); + SET_VECTOR_ELT(Rval,2, Rmaxsize); UNPROTECT(2); } @@ -145,7 +142,7 @@ SEXP _H5Sset_extent_simple( SEXP _space_id, SEXP _current_size, SEXP _maximum_si int rank = length(_current_size); hsize_t current_size[rank]; for (int i=0; i Date: Wed, 28 Feb 2024 23:34:21 +0100 Subject: [PATCH 4/8] Small refector of COMPOUND vs COMPLEX functions --- src/H5D.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/H5D.c b/src/H5D.c index f899b7e..f6e6683 100644 --- a/src/H5D.c +++ b/src/H5D.c @@ -682,9 +682,8 @@ int is_complex(hid_t dtype_id) { char *field1 = H5Tget_member_name(dtype_id, 0); char *field2 = H5Tget_member_name(dtype_id, 1); - if((strcmp(field1, "r") == 0) && (strcmp(field2, "i") == 0)) { + if((strcmp(field1, "r") == 0) && (strcmp(field2, "i") == 0)) res = 1; - } free(field1); free(field2); @@ -702,6 +701,7 @@ SEXP H5Dread_helper_COMPLEX(hid_t dataset_id, hid_t file_space_id, hid_t mem_spa herr_t herr = H5Dread(dataset_id, dtype_id, mem_space_id, file_space_id, H5P_DEFAULT, buf ); if(herr < 0) { + UNPROTECT(1); error("Unable to read dataset"); } @@ -718,11 +718,6 @@ SEXP H5Dread_helper_COMPOUND(hid_t dataset_id, hid_t file_space_id, hid_t mem_sp int bit64conversion, int native ) { SEXP Rval; - - if(is_complex(dtype_id)) { - Rval = H5Dread_helper_COMPLEX(dataset_id, file_space_id, mem_space_id, n, Rdim, dtype_id, native); - return(Rval); - } if ((LENGTH(Rdim) > 1) && compoundAsDataFrame) { compoundAsDataFrame = 0; @@ -784,6 +779,23 @@ SEXP H5Dread_helper_COMPOUND(hid_t dataset_id, hid_t file_space_id, hid_t mem_sp return(Rval); } +SEXP H5Dread_helper_COMPOUND_OR_COMPLEX( + hid_t dataset_id, hid_t file_space_id, hid_t mem_space_id, hsize_t n, SEXP Rdim, SEXP _buf, + hid_t dtype_id, hid_t cpdType, int cpdNField, char ** cpdField, int compoundAsDataFrame, + int bit64conversion, int native ) { + + SEXP Rval; + + if(is_complex(dtype_id)) { + Rval = H5Dread_helper_COMPLEX(dataset_id, file_space_id, mem_space_id, n, Rdim, dtype_id, native); + } else { + Rval = H5Dread_helper_COMPOUND(dataset_id, file_space_id, mem_space_id, n, Rdim, _buf, + dtype_id, cpdType, cpdNField, cpdField, compoundAsDataFrame, + bit64conversion, native); + } + + return(Rval); +} //SEXP H5Dread_helper_REFERENCE(hid_t attr_id, hsize_t n, SEXP Rdim, SEXP _buf, hid_t dtype_id) { SEXP H5Dread_helper_REFERENCE(hid_t dataset_id, hid_t file_space_id, hid_t mem_space_id, hsize_t n, SEXP Rdim, SEXP _buf, @@ -848,7 +860,7 @@ SEXP H5Dread_helper(hid_t dataset_id, hid_t file_space_id, hid_t mem_space_id, h dtype_id, cpdType, cpdNField, cpdField, compoundAsDataFrame, native ); } break; case H5T_COMPOUND: { - Rval = H5Dread_helper_COMPOUND(dataset_id, file_space_id, mem_space_id, n, Rdim, _buf, + Rval = H5Dread_helper_COMPOUND_OR_COMPLEX(dataset_id, file_space_id, mem_space_id, n, Rdim, _buf, dtype_id, cpdType, cpdNField, cpdField, compoundAsDataFrame, bit64conversion, native ); } break; From 518c8b497273915437df5072d3765318d601297b Mon Sep 17 00:00:00 2001 From: Mike Smith Date: Fri, 1 Mar 2024 18:16:25 +0100 Subject: [PATCH 5/8] Update man page --- R/H5S.R | 10 ++++++---- man/H5Sset_extent_simple.Rd | 12 +++++++----- man/rhdf5.Rd | 25 ------------------------- 3 files changed, 13 insertions(+), 34 deletions(-) diff --git a/R/H5S.R b/R/H5S.R index 595a112..5854c4b 100644 --- a/R/H5S.R +++ b/R/H5S.R @@ -140,13 +140,15 @@ H5Sget_simple_extent_dims <- function( h5space ) { #' #' @param h5space [H5IdComponent-class] object representing a dataspace. #' @param dims Dimension of the dataspace. This argument is similar to the dim -#' attribute of an array. When viewing the HDF5 dataset with an C-program -#' (e.g. HDFView), the dimensions appear in inverted order, because the -#' fastest changing dimension in R is the first one, and in C its the last -#' one. +#' attribute of an array. #' @param maxdims Maximum extension of the dimension of the dataset in the #' file. If not provided, it is set to `dims`. #' +#' When viewing the HDF5 dataset with other software +#' (e.g. HDFView), the dimensions appear in inverted order, because the +#' fastest changing dimension in R is the first one, and in C it's the last +#' one. +#' #' @export H5Sset_extent_simple <- function( h5space, dims, maxdims) { h5checktype(h5space, "dataspace") diff --git a/man/H5Sset_extent_simple.Rd b/man/H5Sset_extent_simple.Rd index 21b3928..c84885a 100644 --- a/man/H5Sset_extent_simple.Rd +++ b/man/H5Sset_extent_simple.Rd @@ -10,13 +10,15 @@ H5Sset_extent_simple(h5space, dims, maxdims) \item{h5space}{\linkS4class{H5IdComponent} object representing a dataspace.} \item{dims}{Dimension of the dataspace. This argument is similar to the dim -attribute of an array. When viewing the HDF5 dataset with an C-program -(e.g. HDFView), the dimensions appear in inverted order, because the -fastest changing dimension in R is the first one, and in C its the last -one.} +attribute of an array.} \item{maxdims}{Maximum extension of the dimension of the dataset in the -file. If not provided, it is set to \code{dims}.} +file. If not provided, it is set to \code{dims}. + +When viewing the HDF5 dataset with other software +(e.g. HDFView), the dimensions appear in inverted order, because the +fastest changing dimension in R is the first one, and in C it's the last +one.} } \description{ Set the size of a dataspace diff --git a/man/rhdf5.Rd b/man/rhdf5.Rd index 7a650ee..a19ea37 100644 --- a/man/rhdf5.Rd +++ b/man/rhdf5.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/Rhdf5.R \docType{package} \name{rhdf5} -\alias{rhdf5-package} \alias{rhdf5} \title{rhdf5: An interface between HDF5 and R} \description{ @@ -12,27 +11,3 @@ The rhdf5 package provides two categories of functions: \item \code{H5} functions mirror much of the the HDF5 C API } } -\seealso{ -Useful links: -\itemize{ - \item \url{https://github.com/grimbough/rhdf5} - \item Report bugs at \url{https://github.com/grimbough/rhdf5/issues} -} - -} -\author{ -\strong{Maintainer}: Mike Smith \email{mike.smith@embl.de} (\href{https://orcid.org/0000-0002-7800-3848}{ORCID}) - -Authors: -\itemize{ - \item Bernd Fischer - \item Gregoire Pau -} - -Other contributors: -\itemize{ - \item Martin Morgan [contributor] - \item Daniel van Twisk [contributor] -} - -} From 725d69d756ec057e697f4376dcf33525aad0c9a5 Mon Sep 17 00:00:00 2001 From: Mike Smith Date: Fri, 1 Mar 2024 18:25:41 +0100 Subject: [PATCH 6/8] Update NEWS --- inst/NEWS.Rd | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index 3837fab..f004684 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -8,6 +8,12 @@ \item R complex types can now be written to HDF5. These will be stored as a compound datatype with two elements (r & i) representing the real and imaginary parts. + \item Functions \code{H5Screate_simple} and \code{H5Sset_extent_simple} + now accept numeric values to the \code{dim} and \code{maxdim} arguments, + allowing the creation of HDF5 dataspaces larger than R's maximum + integer value. + (Thanks to @hpages for reporting this and providing a patch + https://github.com/grimbough/rhdf5/pull/140). } } From 2faea145356a0a2f301be47fd421b4b99d484f82 Mon Sep 17 00:00:00 2001 From: Mike Smith Date: Fri, 1 Mar 2024 18:27:17 +0100 Subject: [PATCH 7/8] Version bump --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c5f9b28..c05903f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rhdf5 Type: Package Title: R Interface to HDF5 -Version: 2.47.4 +Version: 2.47.5 Authors@R: c(person("Bernd", "Fischer", role = c("aut")), person("Mike", "Smith", role=c("aut", "cre"), From e22b821aefeacbf12458c975a0e7e7711fa1d9bf Mon Sep 17 00:00:00 2001 From: Mike Smith Date: Fri, 1 Mar 2024 19:20:01 +0100 Subject: [PATCH 8/8] Add some more unit tests for H5S functions --- tests/testthat/test_H5S.R | 55 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/testthat/test_H5S.R b/tests/testthat/test_H5S.R index 4e12059..25c8417 100644 --- a/tests/testthat/test_H5S.R +++ b/tests/testthat/test_H5S.R @@ -52,6 +52,11 @@ test_that("We can create a simple dataspace", { expect_equal(dspace_dims$size, dspace_dims$maxsize) expect_equal(dspace_dims$rank, 2) + ## if maxdims isn't supplied, it just matches dims + expect_silent(H5Sset_extent_simple(sid, dims = c(1,1,1))) + dspace_dims <- H5Sget_simple_extent_dims(sid) + expect_equal(dspace_dims$size, dspace_dims$maxsize) + expect_silent(H5Sset_extent_simple(sid, dims = c(1,2,3), maxdims = c(10,20,30))) expect_is(dspace_dims <- H5Sget_simple_extent_dims(sid), "list") expect_false( identical(dspace_dims$size, dspace_dims$maxsize) ) @@ -121,6 +126,56 @@ test_that("Selecting using an index", { expect_silent(H5Sclose(sid)) }) +test_that("Other selection functions", { + + dims <- c(10,20,30) + expect_silent(sid <- H5Screate_simple(dims = dims)) + + expect_silent( H5Sselect_all(sid) ) + expect_identical( H5Sget_select_npoints(sid), prod(dims) ) + + expect_silent( H5Sselect_none(sid) ) + expect_identical( H5Sget_select_npoints(sid), 0 ) + + expect_silent(H5Sclose(sid)) +}) + +test_that("Combining selections", { + + sid_1 <- H5Screate_simple(dims = 20) + sid_2 <- H5Screate_simple(dims = 10) + + ## select a single block of 5 points in sid_1 + ## this is equivalent to [11:16] in R syntax + H5Sselect_hyperslab(sid_1, start = 11, stride = 1, + block = 5, count = 1) + + ## select 2 blocks of 1 point from sid_2 + ## equivalent to [c(3,5)] in R syntax + H5Sselect_hyperslab(sid_2, start = 3, stride = 2, + block = 1, count = 2) + + ## confirm we have select 5 and 2 points resepectively + expect_equal(H5Sget_select_npoints(sid_1), 5) + expect_equal(H5Sget_select_npoints(sid_2), 2) + + ## combine the two dataset selections keeping points that + ## are in one or both of the selections + sid_3 <- H5Scombine_select(sid_1, "H5S_SELECT_OR", sid_2) + + ## extent of the new dataset is the same as sid_1 + sid_3 + ## confirm the selection contains 7 points + expect_equal(H5Sget_select_npoints(sid_3), 7) + + ## tidy up + H5Sclose(sid_1) + H5Sclose(sid_2) + H5Sclose(sid_3) + + +}) + ############################################################ context("H5S cleanup") ##########################################################