-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Expose internal functions for reading sparse matrices from HDF5. (#86)
For dense matrices, the user is responsible for specifying whether the transposition was performed; for sparse matrices, the user specifies the dimension extents and CSC/CSR layouts. This allows creation of a ScranMatrix from HDF5 files that are not in the typical formats (i.e., H5AD/10X).
- Loading branch information
Showing
5 changed files
with
351 additions
and
139 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
import * as gc from "./gc.js"; | ||
import * as wasm from "./wasm.js"; | ||
import * as utils from "./utils.js"; | ||
import { ScranMatrix } from "./ScranMatrix.js"; | ||
|
||
/** | ||
* Initialize a (potentially layered) sparse matrix from a HDF5 file, | ||
* either from a dense array (via {@linkcode initializeSparseMatrixFromHdf5DenseArray}) | ||
* or a group containing a compressed sparse matrix (via {@linkcode initializeSparseMatrixFromHdf5SparseMatrix}). | ||
* | ||
* @param {string} file Path to the HDF5 file. | ||
* For browsers, the file should have been saved to the virtual filesystem. | ||
* @param {string} name Name of the matrix inside the file. | ||
* This can be a HDF5 Dataset for dense matrices or a HDF5 Group for sparse matrices. | ||
* For the latter, both H5AD and 10X-style sparse formats are supported. | ||
* @param {object} [options={}] - Optional parameters. | ||
* @param {boolean} [options.forceInteger=true] - Whether to coerce all elements to integers via truncation. | ||
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, see [**tatami_layered**](https://github.com/tatami-inc/tatami_layered) for more details. | ||
* Only used if the relevant HDF5 dataset contains an integer type and/or `forceInteger = true`. | ||
* Setting to `true` assumes that the matrix contains only non-negative integers. | ||
* @param {?(Array|TypedArray|Int32WasmArray)} [options.subsetRow=null] - Row indices to extract. | ||
* All indices must be non-negative integers less than the number of rows in the sparse matrix. | ||
* @param {?(Array|TypedArray|Int32WasmArray)} [options.subsetColumn=null] - Column indices to extract. | ||
* All indices must be non-negative integers less than the number of columns in the sparse matrix. | ||
* | ||
* @return {ScranMatrix} Matrix containing sparse data. | ||
*/ | ||
export function initializeSparseMatrixFromHdf5(file, name, { forceInteger = true, layered = true, subsetRow = null, subsetColumn = null } = {}) { | ||
const details = extractHdf5MatrixDetails(file, name); | ||
if (details.format == "dense") { | ||
// Setting transposed = true as all known dense matrices store the cells in the first dimension and the genes in the last dimension. | ||
return initializeSparseMatrixFromHdf5DenseArray(file, name, { transposed: true, forceInteger, layered, subsetRow, subsetColumn }); | ||
} else { | ||
return initializeSparseMatrixFromHdf5SparseMatrix(file, name, details.rows, details.columns, details.format == "csc", { forceInteger, layered, subsetRow, subsetColumn }); | ||
} | ||
} | ||
|
||
function prepare_hdf5_matrix_subset(subsetRow, subsetColumn, fun) { | ||
var output; | ||
let wasm_row, wasm_col; | ||
|
||
try { | ||
let use_row_subset = (subsetRow !== null); | ||
let row_offset = 0, row_length = 0; | ||
if (use_row_subset) { | ||
wasm_row = utils.wasmifyArray(subsetRow, "Int32WasmArray"); | ||
row_offset = wasm_row.offset; | ||
row_length = wasm_row.length; | ||
} | ||
|
||
let use_col_subset = (subsetColumn !== null); | ||
let col_offset = 0, col_length = 0; | ||
if (use_col_subset) { | ||
wasm_col = utils.wasmifyArray(subsetColumn, "Int32WasmArray"); | ||
col_offset = wasm_col.offset; | ||
col_length = wasm_col.length; | ||
} | ||
|
||
output = fun(use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length); | ||
|
||
} finally { | ||
utils.free(wasm_row); | ||
utils.free(wasm_col); | ||
} | ||
|
||
return output; | ||
} | ||
|
||
/** | ||
* Initialize a (potentially layered) sparse matrix from a two-dimensional HDF5 dataset. | ||
* | ||
* @param {string} file Path to the HDF5 file. | ||
* For browsers, the file should have been saved to the virtual filesystem. | ||
* @param {string} name Name of the dataset inside the file. | ||
* @param {object} [options={}] - Optional parameters. | ||
* @param {boolean} [options.transposed=false] - Whether the matrix is transposed inside the file, i.e., the last dimension represents the rows. | ||
* @param {boolean} [options.forceInteger=false] - Whether to coerce all elements to integers via truncation. | ||
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, see [**tatami_layered**](https://github.com/tatami-inc/tatami_layered) for more details. | ||
* Only used if the relevant HDF5 dataset contains an integer type and/or `forceInteger = true`. | ||
* Setting to `true` assumes that the matrix contains only non-negative integers. | ||
* @param {?(Array|TypedArray|Int32WasmArray)} [options.subsetRow=null] - Row indices to extract. | ||
* All indices must be non-negative integers less than the number of rows in the sparse matrix. | ||
* @param {?(Array|TypedArray|Int32WasmArray)} [options.subsetColumn=null] - Column indices to extract. | ||
* All indices must be non-negative integers less than the number of columns in the sparse matrix. | ||
* | ||
* @return {ScranMatrix} Matrix containing sparse data. | ||
*/ | ||
export function initializeSparseMatrixFromHdf5DenseArray(file, name, { transposed = false, forceInteger = false, layered = true, subsetRow = null, subsetColumn = null } = {}) { | ||
return prepare_hdf5_matrix_subset(subsetRow, subsetColumn, (use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length) => { | ||
return gc.call( | ||
module => module.read_sparse_matrix_from_hdf5_dense_array( | ||
file, name, transposed, forceInteger, layered, | ||
use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length | ||
), | ||
ScranMatrix | ||
); | ||
}); | ||
} | ||
|
||
/** | ||
* Initialize a (potentially layered) sparse matrix from a HDF5 group containing the usual `data`, `indices`, and `indptr` components of a compressed sparse matrix. | ||
* | ||
* @param {string} file - Path to the HDF5 file. | ||
* For browsers, the file should have been saved to the virtual filesystem. | ||
* @param {string} name - Name of the dataset inside the file. | ||
* @param {number} numberOfRows - Number of rows in the matrix. | ||
* @param {number} numberOfColumns - Number of columns in the matrix. | ||
* @param {boolean} byColumn - Whether the matrix is in the compressed sparse column (CSC) format. | ||
* If false, the format is assumed to be compressed sparse row (CSR) instead. | ||
* @param {object} [options={}] - Optional parameters. | ||
* @param {boolean} [options.forceInteger=false] - Whether to coerce all elements to integers via truncation. | ||
* @param {boolean} [options.layered=true] - Whether to create a layered sparse matrix, see [**tatami_layered**](https://github.com/tatami-inc/tatami_layered) for more details. | ||
* Only used if the relevant HDF5 dataset contains an integer type and/or `forceInteger = true`. | ||
* Setting to `true` assumes that the matrix contains only non-negative integers. | ||
* @param {?(Array|TypedArray|Int32WasmArray)} [options.subsetRow=null] - Row indices to extract. | ||
* All indices must be non-negative integers less than the number of rows in the sparse matrix. | ||
* @param {?(Array|TypedArray|Int32WasmArray)} [options.subsetColumn=null] - Column indices to extract. | ||
* All indices must be non-negative integers less than the number of columns in the sparse matrix. | ||
* | ||
* @return {ScranMatrix} Matrix containing sparse data. | ||
*/ | ||
export function initializeSparseMatrixFromHdf5SparseMatrix(file, name, numberOfRows, numberOfColumns, byColumn, { forceInteger = false, layered = true, subsetRow = null, subsetColumn = null } = {}) { | ||
return prepare_hdf5_matrix_subset(subsetRow, subsetColumn, (use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length) => { | ||
return gc.call( | ||
module => module.read_sparse_matrix_from_hdf5_sparse_matrix( | ||
file, name, numberOfRows, numberOfColumns, byColumn, forceInteger, layered, | ||
use_row_subset, row_offset, row_length, use_col_subset, col_offset, col_length | ||
), | ||
ScranMatrix | ||
); | ||
}); | ||
} | ||
|
||
/** | ||
* Extract the format and dimensions of a HDF5 matrix. | ||
* | ||
* @param {string} file Path to the HDF5 file. | ||
* For browsers, the file should have been saved to the virtual filesystem. | ||
* @param {string} name Name of the dataset inside the file. | ||
* This can be a HDF5 Dataset for dense matrices or a HDF5 Group for sparse matrices. | ||
* For the latter, both H5AD and 10X-style sparse formats are supported. | ||
* | ||
* @return {object} An object containing: | ||
* - `rows`, the number of rows in the matrix. | ||
* - `columns`, the number of columns. | ||
* - `format`, whether the matrix is dense, CSR or CSC. | ||
* - `integer`, whether the matrix data is stored as integers or doubles. | ||
*/ | ||
export function extractHdf5MatrixDetails(file, name) { | ||
let output = {}; | ||
let arr = utils.createInt32WasmArray(5); | ||
try { | ||
wasm.call(module => module.extract_hdf5_matrix_details(file, name, arr.offset)); | ||
|
||
let vals = arr.array(); | ||
if (vals[0] > 0) { | ||
output.format = "dense"; | ||
} else if (vals[1] > 0) { | ||
output.format = "csc"; | ||
} else { | ||
output.format = "csr"; | ||
} | ||
|
||
output.rows = vals[2]; | ||
output.columns = vals[3]; | ||
output.integer = vals[4] > 0; | ||
} finally { | ||
arr.free(); | ||
} | ||
return output; | ||
} |
Oops, something went wrong.