From d2ccb79505e2b401851a7f17c27382079d965287 Mon Sep 17 00:00:00 2001 From: LTLA Date: Tue, 30 Apr 2024 16:38:20 -0700 Subject: [PATCH] Got the tests to pass. --- js/index.js | 1 + js/initializeSparseMatrix.js | 118 ------------------------ js/initializeSparseMatrixFromHdf5.js | 128 +++++++++++++++++++++++++++ src/read_hdf5_matrix.cpp | 21 ++--- 4 files changed, 138 insertions(+), 130 deletions(-) create mode 100644 js/initializeSparseMatrixFromHdf5.js diff --git a/js/index.js b/js/index.js index 0f3afc8e..91b1d824 100644 --- a/js/index.js +++ b/js/index.js @@ -2,6 +2,7 @@ export { initialize, terminate, wasmArraySpace, heapSize, maximumThreads } from export { createUint8WasmArray, createInt32WasmArray, createFloat64WasmArray, free } from "./utils.js"; export * from "./initializeSparseMatrix.js"; +export * from "./initializeSparseMatrixFromHdf5.js"; export * from "./rds.js"; export * from "./file.js"; diff --git a/js/initializeSparseMatrix.js b/js/initializeSparseMatrix.js index 22d90b95..de3898e3 100644 --- a/js/initializeSparseMatrix.js +++ b/js/initializeSparseMatrix.js @@ -216,124 +216,6 @@ export function extractMatrixMarketDimensions(x, { compressed = null } = {}) { return output; } -/** - * Initialize a layered sparse matrix from a HDF5 file. - * - * @param {string} file Path to the HDF5 file. - * For browsers, the file should have been saved to the virtual filesystem. - * @param {string} name Name of the dataset inside the file. - * This can be a HDF5 Dataset for dense matrices or a HDF5 Group for sparse matrices. - * For the latter, both H5AD and 10X-style sparse formats are supported. - * @param {object} [options={}] - Optional parameters. - * @param {boolean} [options.forceInteger=true] - Whether to coerce all elements to integers via truncation. - * @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, see [**tatami_layered**](https://github.com/tatami-inc/tatami_layered) for more details. - * Only used if the relevant HDF5 dataset contains an integer type and/or `forceInteger = true`. - * Setting to `true` assumes that the matrix contains only non-negative integers. - * @param {?(Array|TypedArray|Int32WasmArray)} [options.subsetRow=null] - Row indices to extract. - * All indices must be non-negative integers less than the number of rows in the sparse matrix. - * @param {?(Array|TypedArray|Int32WasmArray)} [options.subsetColumn=null] - Column indices to extract. - * All indices must be non-negative integers less than the number of columns in the sparse matrix. - * - * @return {ScranMatrix} Matrix containing sparse data. - */ -export function initializeSparseMatrixFromHdf5(file, name, { forceInteger = true, layered = true, subsetRow = null, subsetColumn = null } = {}) { - const details = extractHdf5MatrixDetails(file, name); - if (details.format == "dense") { - // Setting transposed = true as all known dense matrices store the cells in the first dimension and the genes in the last dimension. - return initializeSparseMatrixFromHdf5DenseArray(file, name, { transposed: true, forceInteger, layered, subsetRow, subsetColumn }); - } else { - return initializeSparseMatrixFromHdf5SparseMatrix(file, name, details.rows, details.columns, details.format == "csc", { forceInteger, layered, subsetRow, subsetColumn }); - } -} - -function prepare_hdf5_matrix_subset(subsetRow, subsetColumn, fun) { - var output; - let wasm_row, wasm_col; - - try { - let use_row_subset = (subsetRow !== null); - let row_offset = 0, row_length = 0; - if (use_row_subset) { - wasm_row = utils.wasmifyArray(subsetRow, "Int32WasmArray"); - row_offset = wasm_row.offset; - row_length = wasm_row.length; - } - - let use_col_subset = (subsetColumn !== null); - let col_offset = 0, col_length = 0; - if (use_col_subset) { - wasm_col = utils.wasmifyArray(subsetColumn, "Int32WasmArray"); - col_offset = wasm_col.offset; - col_length = wasm_col.length; - } - - output = fun(use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length); - - } finally { - utils.free(wasm_row); - utils.free(wasm_col); - } - - return output; -} - -export function initializeSparseMatrixFromHdf5DenseArray(file, name, { transposed = false, forceInteger = true, layered = true, subsetRow = null, subsetColumn = null } = {}) { - return prepare_hdf5_matrix_subset(subsetRow, subsetColumn, (use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length) => { - return gc.call(module => module.read_sparse_matrix_from_hdf5_dense_array( - file, name, transposed, forceInteger, layered, - use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length - )); - }); -} - -export function initializeSparseMatrixFromHdf5SparseMatrix(file, name, numberOfRows, numberOfColumns, byColumn, { forceInteger = true, layered = true, subsetRow = null, subsetColumn = null } = {}) { - return prepare_hdf5_matrix_subset(subsetRow, subsetColumn, (use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length) => { - return gc.call(module => module.read_sparse_matrix_from_hdf5_sparse_matrix( - file, name, numberOfRows, numberOfColumns, byColumn, forceInteger, layered, - use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length - )); - }); -} - -/** - * Extract the format and dimensions of a HDF5 matrix. - * - * @param {string} file Path to the HDF5 file. - * For browsers, the file should have been saved to the virtual filesystem. - * @param {string} name Name of the dataset inside the file. - * This can be a HDF5 Dataset for dense matrices or a HDF5 Group for sparse matrices. - * For the latter, both H5AD and 10X-style sparse formats are supported. - * - * @return {object} An object containing: - * - `rows`, the number of rows in the matrix. - * - `columns`, the number of columns. - * - `format`, whether the matrix is dense, CSR or CSC. - * - `integer`, whether the matrix data is stored as integers or doubles. - */ -export function extractHdf5MatrixDetails(file, name) { - let output = {}; - let arr = utils.createInt32WasmArray(5); - try { - wasm.call(module => module.extract_hdf5_matrix_details(file, name, arr.offset)); - - let vals = arr.array(); - if (vals[0] > 0) { - output.format = "dense"; - } else if (vals[1] > 0) { - output.format = "csc"; - } else { - output.format = "csr"; - } - - output.rows = vals[2]; - output.columns = vals[3]; - output.integer = vals[4] > 0; - } finally { - arr.free(); - } - return output; -} - /** * Initialize a dense matrix from a column-major array. * diff --git a/js/initializeSparseMatrixFromHdf5.js b/js/initializeSparseMatrixFromHdf5.js new file mode 100644 index 00000000..093f38ef --- /dev/null +++ b/js/initializeSparseMatrixFromHdf5.js @@ -0,0 +1,128 @@ +import * as gc from "./gc.js"; +import * as wasm from "./wasm.js"; +import * as utils from "./utils.js"; +import { ScranMatrix } from "./ScranMatrix.js"; + +/** + * Initialize a layered sparse matrix from a HDF5 file. + * + * @param {string} file Path to the HDF5 file. + * For browsers, the file should have been saved to the virtual filesystem. + * @param {string} name Name of the dataset inside the file. + * This can be a HDF5 Dataset for dense matrices or a HDF5 Group for sparse matrices. + * For the latter, both H5AD and 10X-style sparse formats are supported. + * @param {object} [options={}] - Optional parameters. + * @param {boolean} [options.forceInteger=true] - Whether to coerce all elements to integers via truncation. + * @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, see [**tatami_layered**](https://github.com/tatami-inc/tatami_layered) for more details. + * Only used if the relevant HDF5 dataset contains an integer type and/or `forceInteger = true`. + * Setting to `true` assumes that the matrix contains only non-negative integers. + * @param {?(Array|TypedArray|Int32WasmArray)} [options.subsetRow=null] - Row indices to extract. + * All indices must be non-negative integers less than the number of rows in the sparse matrix. + * @param {?(Array|TypedArray|Int32WasmArray)} [options.subsetColumn=null] - Column indices to extract. + * All indices must be non-negative integers less than the number of columns in the sparse matrix. + * + * @return {ScranMatrix} Matrix containing sparse data. + */ +export function initializeSparseMatrixFromHdf5(file, name, { forceInteger = true, layered = true, subsetRow = null, subsetColumn = null } = {}) { + const details = extractHdf5MatrixDetails(file, name); + if (details.format == "dense") { + // Setting transposed = true as all known dense matrices store the cells in the first dimension and the genes in the last dimension. + return initializeSparseMatrixFromHdf5DenseArray(file, name, { transposed: true, forceInteger, layered, subsetRow, subsetColumn }); + } else { + return initializeSparseMatrixFromHdf5SparseMatrix(file, name, details.rows, details.columns, details.format == "csc", { forceInteger, layered, subsetRow, subsetColumn }); + } +} + +function prepare_hdf5_matrix_subset(subsetRow, subsetColumn, fun) { + var output; + let wasm_row, wasm_col; + + try { + let use_row_subset = (subsetRow !== null); + let row_offset = 0, row_length = 0; + if (use_row_subset) { + wasm_row = utils.wasmifyArray(subsetRow, "Int32WasmArray"); + row_offset = wasm_row.offset; + row_length = wasm_row.length; + } + + let use_col_subset = (subsetColumn !== null); + let col_offset = 0, col_length = 0; + if (use_col_subset) { + wasm_col = utils.wasmifyArray(subsetColumn, "Int32WasmArray"); + col_offset = wasm_col.offset; + col_length = wasm_col.length; + } + + output = fun(use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length); + + } finally { + utils.free(wasm_row); + utils.free(wasm_col); + } + + return output; +} + +export function initializeSparseMatrixFromHdf5DenseArray(file, name, { transposed = false, forceInteger = false, layered = true, subsetRow = null, subsetColumn = null } = {}) { + return prepare_hdf5_matrix_subset(subsetRow, subsetColumn, (use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length) => { + return gc.call( + module => module.read_sparse_matrix_from_hdf5_dense_array( + file, name, transposed, forceInteger, layered, + use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length + ), + ScranMatrix + ); + }); +} + +export function initializeSparseMatrixFromHdf5SparseMatrix(file, name, numberOfRows, numberOfColumns, byColumn, { forceInteger = false, layered = true, subsetRow = null, subsetColumn = null } = {}) { + return prepare_hdf5_matrix_subset(subsetRow, subsetColumn, (use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length) => { + return gc.call( + module => module.read_sparse_matrix_from_hdf5_sparse_matrix( + file, name, numberOfRows, numberOfColumns, byColumn, forceInteger, layered, + use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length + ), + ScranMatrix + ); + }); +} + +/** + * Extract the format and dimensions of a HDF5 matrix. + * + * @param {string} file Path to the HDF5 file. + * For browsers, the file should have been saved to the virtual filesystem. + * @param {string} name Name of the dataset inside the file. + * This can be a HDF5 Dataset for dense matrices or a HDF5 Group for sparse matrices. + * For the latter, both H5AD and 10X-style sparse formats are supported. + * + * @return {object} An object containing: + * - `rows`, the number of rows in the matrix. + * - `columns`, the number of columns. + * - `format`, whether the matrix is dense, CSR or CSC. + * - `integer`, whether the matrix data is stored as integers or doubles. + */ +export function extractHdf5MatrixDetails(file, name) { + let output = {}; + let arr = utils.createInt32WasmArray(5); + try { + wasm.call(module => module.extract_hdf5_matrix_details(file, name, arr.offset)); + + let vals = arr.array(); + if (vals[0] > 0) { + output.format = "dense"; + } else if (vals[1] > 0) { + output.format = "csc"; + } else { + output.format = "csr"; + } + + output.rows = vals[2]; + output.columns = vals[3]; + output.integer = vals[4] > 0; + } finally { + arr.free(); + } + return output; +} diff --git a/src/read_hdf5_matrix.cpp b/src/read_hdf5_matrix.cpp index d2369b49..e54d637d 100644 --- a/src/read_hdf5_matrix.cpp +++ b/src/read_hdf5_matrix.cpp @@ -204,7 +204,7 @@ NumericMatrix read_sparse_matrix_from_hdf5_dense_array( if (as_integer) { return read_sparse_matrix_from_hdf5_dense_array_internal(path, name, trans, layered, row_subset, row_offset, row_length, col_subset, col_offset, col_length); } else { - return read_sparse_matrix_from_hdf5_dense_array_internal(path, name, trans, layered, row_subset, row_offset, row_length, col_subset, col_offset, col_length); + return read_sparse_matrix_from_hdf5_dense_array_internal(path, name, trans, false, row_subset, row_offset, row_length, col_subset, col_offset, col_length); } } @@ -223,19 +223,16 @@ NumericMatrix read_sparse_matrix_from_hdf5_sparse_matrix_internal( uintptr_t col_offset, int col_length) { - if (!layered && !row_subset && !col_subset) { + if (!layered && !csc && !row_subset && !col_subset) { std::shared_ptr > mat; + // Don't do the same with CSC matrices; there is an implicit + // expectation that all instances of this function prefer row matrices, + // and if we did it with CSC, we'd get a column-major matrix instead. try { - if (!csc) { - mat.reset(new tatami::CompressedSparseRowMatrix >( - tatami_hdf5::load_hdf5_compressed_sparse_matrix >(nr, nc, path, name + "/data", name + "/indices", name + "/indptr") - )); - } else { - mat.reset(new tatami::CompressedSparseColumnMatrix >( - tatami_hdf5::load_hdf5_compressed_sparse_matrix >(nr, nc, path, name + "/data", name + "/indices", name + "/indptr") - )); - } + mat.reset(new tatami::CompressedSparseRowMatrix >( + tatami_hdf5::load_hdf5_compressed_sparse_matrix >(nr, nc, path, name + "/data", name + "/indices", name + "/indptr") + )); } catch (H5::Exception& e) { throw std::runtime_error(e.getCDetailMsg()); } @@ -297,7 +294,7 @@ NumericMatrix read_sparse_matrix_from_hdf5_sparse_matrix( if (as_integer) { return read_sparse_matrix_from_hdf5_sparse_matrix_internal(path, name, nr, nc, csc, layered, row_subset, row_offset, row_length, col_subset, col_offset, col_length); } else { - return read_sparse_matrix_from_hdf5_sparse_matrix_internal(path, name, nr, nc, csc, layered, row_subset, row_offset, row_length, col_subset, col_offset, col_length); + return read_sparse_matrix_from_hdf5_sparse_matrix_internal(path, name, nr, nc, csc, false, row_subset, row_offset, row_length, col_subset, col_offset, col_length); } }