From 75e4bc83215d22174dfe8a22dcaa8a230ccc9b0c Mon Sep 17 00:00:00 2001 From: Eric Siegel Date: Tue, 12 Sep 2023 12:59:51 -0700 Subject: [PATCH] loupeR version 1.0.0 --- .github/workflows/check.yml | 45 +++++ .github/workflows/release.yml | 87 +++++++++ .gitignore | 5 + DESCRIPTION | 22 +++ LICENSE | 38 ++++ NAMESPACE | 24 +++ R/cmd.R | 64 ++++++ R/err.R | 142 ++++++++++++++ R/eula.R | 51 +++++ R/hdf5.R | 251 ++++++++++++++++++++++++ R/lib.R | 148 ++++++++++++++ R/setup.R | 199 +++++++++++++++++++ R/util.R | 290 ++++++++++++++++++++++++++++ R/validate.R | 167 ++++++++++++++++ R/zzz.R | 6 + README.md | 92 +++++++++ loupe10x.Rproj | 22 +++ man/create_bugreport.Rd | 32 +++ man/create_bugreport_from_seurat.Rd | 15 ++ man/create_loupe.Rd | 40 ++++ man/create_loupe_from_seurat.Rd | 35 ++++ man/select_assay.Rd | 18 ++ man/select_clusters.Rd | 19 ++ man/select_projections.Rd | 17 ++ man/setup.Rd | 14 ++ man/validate_clusters.Rd | 23 +++ man/validate_count_mat.Rd | 21 ++ man/validate_projections.Rd | 23 +++ tests/testthat.R | 12 ++ tests/testthat/helper.R | 25 +++ tests/testthat/test-hdf5.R | 49 +++++ tests/testthat/test-util.R | 120 ++++++++++++ tests/testthat/test-validate.R | 159 +++++++++++++++ tools/doc/logo.svg | 49 +++++ 34 files changed, 2324 insertions(+) create mode 100644 .github/workflows/check.yml create mode 100644 .github/workflows/release.yml create mode 100644 .gitignore create mode 100644 DESCRIPTION create mode 100644 LICENSE create mode 100644 NAMESPACE create mode 100644 R/cmd.R create mode 100644 R/err.R create mode 100644 R/eula.R create mode 100644 R/hdf5.R create mode 100644 R/lib.R create mode 100644 R/setup.R create mode 100644 R/util.R create mode 100644 R/validate.R create mode 100644 R/zzz.R create mode 100644 README.md create mode 100644 loupe10x.Rproj create mode 100644 man/create_bugreport.Rd create mode 100644 man/create_bugreport_from_seurat.Rd create mode 100644 man/create_loupe.Rd create mode 100644 man/create_loupe_from_seurat.Rd create mode 100644 man/select_assay.Rd create mode 100644 man/select_clusters.Rd create mode 100644 man/select_projections.Rd create mode 100644 man/setup.Rd create mode 100644 man/validate_clusters.Rd create mode 100644 man/validate_count_mat.Rd create mode 100644 man/validate_projections.Rd create mode 100644 tests/testthat.R create mode 100644 tests/testthat/helper.R create mode 100644 tests/testthat/test-hdf5.R create mode 100644 tests/testthat/test-util.R create mode 100644 tests/testthat/test-validate.R create mode 100644 tools/doc/logo.svg diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml new file mode 100644 index 0000000..5477215 --- /dev/null +++ b/.github/workflows/check.yml @@ -0,0 +1,45 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + pull_request: + branches: [main] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'release'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: false diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..94058c6 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,87 @@ +on: + workflow_dispatch: + inputs: + tag: + description: 'Git Tag/Release that we are uploading to' + required: true + +name: release + +permissions: + contents: write + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'release'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: false + + - name: Configure gh for auth + env: + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + run: gh auth setup-git + + - name: Create Exec directory + shell: bash + run: mkdir exec + + # Copy Louper binaries to the `exec` directory + - name: Copy Louper binary to Exec directory (Windows) + if: ${{ contains(matrix.config.os, 'windows') }} + env: + GH_TOKEN: ${{secrets.GITHUB_TOKEN}} + run: gh release download ${{ github.event.inputs.tag }} -p 'louper-windows*' -O exec/louper.exe + - name: Copy Louper binary to Exec directory (Linux) + if: ${{ contains(matrix.config.os, 'ubuntu') }} + env: + GH_TOKEN: ${{secrets.GITHUB_TOKEN}} + run: gh release download ${{ github.event.inputs.tag }} -p 'louper-linux*' -O exec/louper + - name: Copy Louper binary to Exec directory (MacOS) + if: ${{ contains(matrix.config.os, 'macos') }} + env: + GH_TOKEN: ${{secrets.GITHUB_TOKEN}} + run: gh release download ${{ github.event.inputs.tag }} -p 'louper-macos*' -O exec/louper + + - name: Build R source build + shell: bash + run: R CMD build . + + - name: Rename source build + shell: bash + run: mv loupeR*tar.gz loupeR_${{ runner.os }}.tar.gz + + - name: Upload build to release + run: gh release upload ${{ github.event.inputs.tag }} loupeR_${{ runner.os }}.tar.gz + env: + GH_TOKEN: ${{secrets.GITHUB_TOKEN}} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..565f2b6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.Rproj.user +.Rhistory +.Rdata +.httr-oauth +.DS_Store diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..89ac32b --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,22 @@ +Package: loupeR +Title: Converts Seurat objects to 10x Genomics Loupe files +Version: 1.0.0 +Authors@R: + person(given = "Eric", + family = "Siegel", + email = "eric.siegel@10xgenomics.com", + role = c("aut", "cre")) +Description: Converts Seurat objects to 10x Genomics Loupe files. + This is a second line to make the package checker not complain. +License: file LICENSE +Encoding: UTF-8 +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.2.3.9000 +Suggests: + testthat (>= 3.0.0), + Matrix +Config/testthat/edition: 3 +Imports: + methods, + Seurat, + hdf5r diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..98e3682 --- /dev/null +++ b/LICENSE @@ -0,0 +1,38 @@ +Copyright (c) 2023 10x Genomics + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +associated documentation files (the "Software"), to use, copy, modify, and/or merge copies of the +Software, in both source code and object code format, solely for your internal use, subject to the +following conditions: + +1. The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +2. The above rights granted in the Software may be exercised only in connection with a 10x Genomics +Product (defined below), rightfully purchased from 10x Genomics or an authorized reseller, or data +generated using such a 10x Genomics Product. A “10X Genomics Product” means, collectively, 10x +Genomics branded instruments, reagents, consumables, kits, and labware used in accordance with the +10X Genomics Product Terms and Conditions of Sale or, if applicable, any written contract between +you and 10x Genomics. The rights granted may also be exercised in connection with other products +when doing so is an integral part of an experiment where the data is generated primarily using a 10x +Genomics Product. + +3. You agree not to redistribute or sublicense the Software, either in source code or object code +format. + +4. All derivative works, including any modifications of the Software, shall be subject to all of the +restrictions set forth herein. + +5. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +OR OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, WHETHER IN +AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE, +THE USE OR INABILITY TO USE, OR OTHER DEALINGS IN THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF +THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED +OF THE POSSIBILITY OF SUCH DAMAGES. + +You may not use, propagate or modify the Software, or any derivatives thereof, except as expressly +provided herein. Any attempt otherwise to use, propagate or modify it is void, and will +automatically terminate your rights under this license. diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..6f29c5a --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,24 @@ +# Generated by roxygen2: do not edit by hand + +export(create_bugreport) +export(create_bugreport_from_seurat) +export(create_loupe) +export(create_loupe_from_seurat) +export(select_assay) +export(select_clusters) +export(select_projections) +export(setup) +export(validate_clusters) +export(validate_count_mat) +export(validate_projections) +importFrom(Seurat,Assays) +importFrom(Seurat,GetAssay) +importFrom(Seurat,Idents) +importFrom(Seurat,Reductions) +importFrom(hdf5r,H5File) +importFrom(hdf5r,H5T_STRING) +importFrom(methods,is) +importFrom(utils,download.file) +importFrom(utils,packageVersion) +importFrom(utils,sessionInfo) +importFrom(utils,strcapture) diff --git a/R/cmd.R b/R/cmd.R new file mode 100644 index 0000000..9c7e49d --- /dev/null +++ b/R/cmd.R @@ -0,0 +1,64 @@ +#' Create a Loupe file by invoking the external Louper executable +#' +#' @param h5path path to the h5 interchange file. +#' @param output_dir optional directory where the Loupe file will be written +#' @param output_name optional name of the Loupe file with the extensions not included. +#' @param executable_path optional path to the louper executable. +#' @param force optional logical as to whether we should overwrite an already existing file +#' +#' @return TRUE on success, FALSE on error +#' +#' @noRd +louper_create_cloupe <- function( + h5path, + output_dir = NULL, + output_name = NULL, + executable_path = NULL, + force = FALSE +) { + # default loupe name to `converted.cloupe` + if (is.null(output_name)) { + output_name <- "converted" + } + if (is.null(output_dir)) { + loupe_path <- sprintf("%s.cloupe", file.path(output_name)) + } else { + loupe_path <- sprintf("%s.cloupe", file.path(output_dir, output_name)) + } + + h5path <- normalizePath(path.expand(h5path)) + loupe_path <- suppressWarnings(normalizePath(path.expand(loupe_path))) + + input_flag <- sprintf("--input=%s", h5path) + output_flag <- sprintf("--output=%s", loupe_path) + args <- c("create", input_flag, output_flag) + + if (file.exists(loupe_path) && !force) { + return(err("Loupe file already exists. Set `force = TRUE` to overwrite")) + } + + if (force) { + args <- c(args, "--force") + } + + if (is.null(executable_path)) { + executable_path <- find_executable() + if (is.null(executable_path)) { + return(err("Could not find a valid louper executable. Please run `setup()`.")) + } + } + + executable_path <- normalizePath(path.expand(executable_path)) + + if (!file.exists(executable_path)) { + return(err(sprintf("cannot find Louper executable at path: '%s'", executable_path))) + } + + status <- system2(command=executable_path, args=args) + + if (status == 0) { + return(SUCCESS) + } else { + return(err(sprintf("Louper executable failed: status code %d", status))) + } +} diff --git a/R/err.R b/R/err.R new file mode 100644 index 0000000..dd9b50f --- /dev/null +++ b/R/err.R @@ -0,0 +1,142 @@ +#' Generate an error with the given msg +#' +#' @param msg is a string +#' +#' @noRd +err <- function(msg) { + list(success = FALSE, msg = msg) +} + +#' The non-error variant of above +#' @noRd +SUCCESS <- list(success = TRUE, msg = NULL) + +#' validation error with link to 10x support +#' @noRd +validation_err <- function(msg, name) { + sprintf("\nIt looks like the formatting of your %s does not match the required formatting for LoupeR. For further information, please see the documentation: 10xgen.com/louper\n\n%s", name, msg) +} + +#' general error with link to 10x support +#' @noRd +general_err <- function(msg, name) { + sprintf("\nIt looks like there was an issue with %s. For further information, please see the documentation: 10xgen.com/louper\n\n%s", name, msg) +} + +#' Create a Bugreport from a Seurat Object +#' +#' @description +#' This bugreport can then be included when reaching out to 10xGenomics Support or when filing +#' a Github ticket. This information should be included along with any other output when creating a Loupe file. +#' +#' @param obj A Seurat Object +#' +#' @importFrom methods is +#' +#' @export +create_bugreport_from_seurat <- function(obj) { + # metadata + cat("\nMetadata:\n\n") + if (is(obj, "Seurat")) { + obj_version <- as.character(obj@version) + metadata <- create_metadata(obj_version) + print_metadata(metadata) + } else { + metadata <- create_metadata() + print_metadata(metadata) + print_lines(sprintf("Object is not a Seurat Object it is a: %s", class(obj))) + return(invisible()) + } + + # overview + namedAssay <- select_assay(obj) + if (is.null(namedAssay)) { + cat("\nSeurat:\n\n") + cat("No assay found\n") + return(invisible()) + } + + assay_name <- names(namedAssay) + assay <- namedAssay[[1]] + clusters <- select_clusters(obj) + projections <- select_projections(obj) + + create_bugreport( + assay@counts, + clusters, + projections, + assay_name = assay_name, + seurat_obj_version = obj_version, + skip_metadata = TRUE + ) +} + +#' Create a Bugreport from a count matrix, projections, and clusters +#' +#' @description +#' This bugreport can then be included when reaching out to 10xGenomics Support or when filing +#' a Github ticket. This information should be included along with any other output when creating a Loupe file. +#' +#' @param count_mat A sparse dgCMatrix +#' @param clusters list of factors that hold information for each barcode +#' @param projections list of matrices, all with dimensions (barcodeCount x 2) +#' @param assay_name optional string that holds the Seurat Object assay name. +#' @param seurat_obj_version optional string that holds the Seurat Object version. It is useful for debugging compatibility issues. +#' @param skip_metadata optional logical which skips printing metadata +#' +#' @importFrom methods is +#' +#' @export +create_bugreport <- function( + count_mat, + clusters, + projections, + assay_name = NULL, + seurat_obj_version = NULL, + skip_metadata = FALSE +) { + # metadata + if (!skip_metadata) { + cat("\nMetadata:\n\n") + metadata <- create_metadata(seurat_obj_version) + print_metadata(metadata) + } + + # selections + cat("\nSelections:\n\n") + if (!is.null(assay_name)) { + cat("selected assay:\n") + print_lines(assay_name, " ") + } + cat("selected clusters:\n") + print_lines(names(clusters), " ") + cat("selected projections:\n") + print_lines(names(projections), " ") + + # matrix + cat("\nMatrix Sampling:\n\n") + all_features <- rownames(count_mat) + all_barcodes <- colnames(count_mat) + features <- sample(rownames(count_mat), size=min(10, length(all_features))) + barcodes <- sample(colnames(count_mat), size=min(10, length(all_barcodes))) + cat(sprintf("feature count: %d\n", length(all_features))) + cat(sprintf("barcode count: %d\n", length(all_barcodes))) + cat(sprintf("feature sampling:\n")) + print_lines(features, " ") + cat(sprintf("barcode sampling:\n")) + print_lines(barcodes, " ") + + print_validation <- function(name, res) { + if (res$success) { + cat(sprintf(" %s (VALID)\n", name)) + } else { + cat(sprintf(" %s (INVALID) %s\n", name, res$msg)) + } + } + + # validation + cat("\nValidation:\n\n") + print_validation("count matrix:", validate_count_mat(count_mat)) + print_validation("clusters: ", validate_clusters(clusters, length(all_barcodes))) + print_validation("projections: ", validate_projections(projections, length(all_barcodes))) +} diff --git a/R/eula.R b/R/eula.R new file mode 100644 index 0000000..a2145c2 --- /dev/null +++ b/R/eula.R @@ -0,0 +1,51 @@ +#' Present EULA and Record agreement +#' +#' @return TRUE on success, FALSE on error +#' +#' @noRd +eula <- function() { + if (eula_have_agreed()) { + return(invisible(TRUE)) + } + + resp <- "" + while(!(resp %in% c("y", "yes", "n", "no"))) { + resp <- readline(prompt="The LoupeR executable is subject to the 10x End User Software License, available at:\nhttps://10xgen.com/EULA \n\nDo you accept the End-User License Agreement\n(y/yes or n/no): ") + resp <- tolower(resp) + } + + if(resp %in% c("n", "no")) { + return(FALSE) + } + + dir.create(eula_data_dir(), showWarnings=FALSE, recursive = TRUE) + file.create(eula_lock_file()) + + invisible(TRUE) +} + +#' Reset Eula by removing lock file +#' @noRd +eula_reset <- function() { + if (eula_have_agreed()) { + file.remove(eula_lock_file()) + } +} + +#' Check if user has agreed to EULA +#' @noRd +eula_have_agreed <- function() { + file.exists(eula_lock_file()) +} + +#' Path to directory that holds EULA agreement lock file +#' @noRd +eula_data_dir <- function() { + tools::R_user_dir("loupeR", "data") +} + +#' Path to EULA agreement lock file +#' @noRd +eula_lock_file <- function() { + file.path(eula_data_dir(), "eula_agreement") +} diff --git a/R/hdf5.R b/R/hdf5.R new file mode 100644 index 0000000..4ce8c4b --- /dev/null +++ b/R/hdf5.R @@ -0,0 +1,251 @@ +#' Create an hdf5 interchange file +#' +#' @param count_mat A sparse dgCMatrix +#' @param clusters list of factors that hold information for each barcode +#' @param projections list of matrices, all with dimensions (barcodeCount x 2) +#' @param h5path path to h5 file +#' @param seurat_obj_version optional string that holds the Seurat Object version. It is useful for debugging compatibility issues. +#' +#' @importFrom hdf5r H5File +#' +#' @return TRUE on success, FALSE on error +#' +#' @noRd +create_hdf5 <- function( + count_mat, + clusters, + projections, + h5path, + seurat_obj_version +) { + if (file.exists(h5path)) { + return(err(sprintf("cannot create h5 file %s", h5path))) + } + + # create hdf5 file and matrix groups + f <- hdf5r::H5File$new(h5path, mode="w") + + write_mat(f, count_mat) + write_clusters(f, clusters) + write_projections(f, projections) + write_metadata(f, seurat_obj_version) + + f$close() + + SUCCESS +} + +#' Writes the matrix to the H5 file +#' +#' @param f An open H5File +#' @param count_mat A sparse dgCMatrix +#' +#' @noRd +write_mat <- function(f, count_mat) { + features <- rownames(count_mat) + barcodes_unmodified <- colnames(count_mat) + barcodes_formatted <- sanitize_barcodes(barcodes_unmodified) + feature_count <- length(features) + barcode_count <- length(barcodes_formatted ) + + # create groups + matrix_group <- f$create_group("matrix") + features_group <- matrix_group$create_group("features") + + create_str_dataset(matrix_group, "barcodes", barcodes_formatted ) + create_str_dataset(matrix_group, "barcodes_unmodified", barcodes_unmodified) + create_dataset(matrix_group, "data", as.integer(count_mat@x)) + create_dataset(matrix_group, "indices", as.integer(count_mat@i)) + create_dataset(matrix_group, "indptr", as.integer(count_mat@p)) + create_dataset(matrix_group, "shape", as.integer(c(feature_count, barcode_count))) + matrix_group$close() + + feature_ids <- lapply(1:length(features), function(x) {return(sprintf("feature_%d", x))}) + + create_str_dataset(features_group, "name", features) + create_str_dataset(features_group, "id", as.character(feature_ids)) + create_str_dataset(features_group, "feature_type", rep("Gene Expression", length(features))) + create_str_dataset(features_group, "_all_tag_keys", as.character()) # required features + + features_group$close() +} + +#' Prints the metadata list to stdout. +#' +#' @param metadata The metadata list +#' @param prefix What to prefix each line +#' +#' @noRd +print_metadata <- function(metadata, prefix="") { + for (name in names(metadata)) { + val <- metadata[[name]] + + if (is.list(val)) { + cat(sprintf("%s%s:\n", prefix, name)) + print_metadata(val, paste(prefix, " ")) + } else { + cat(sprintf("%s%s: %s\n", prefix, name, val)) + } + } +} + +#' Writes the clusters to the H5 file +#' +#' @param f An open H5File +#' @param clusters list of factors that hold information for each barcode +#' +#' @noRd +write_clusters <- function(f, clusters) { + clusters_group <- f$create_group("clusters") + + for (i in 1:length(clusters)) { + name <- names(clusters[i]) + cluster <- clusters[[i]] + + group <- clusters_group$create_group(name) + create_str_dataset(group, "name", name) + create_str_dataset(group, "group_names", levels(cluster)) + create_dataset(group, "assignments", as.integer(cluster@.Data - 1)) # zero index, so subtract 1 + create_dataset(group, "score", 0.0) + create_str_dataset(group, "clustering_type", "unknown") + group$close() + } + + clusters_group$close() +} + +#' Writes the projections to the H5 file +#' +#' @param f An open H5File +#' @param projections list of matrices, all with dimensions (barcodeCount x 2) +#' +#' @noRd +write_projections <- function(f, projections) { + projections_group <- f$create_group("projections") + + for (i in 1:length(projections)) { + name <- names(projections[i]) + projection <- projections[[i]] + + is_umap <- grepl("umap", name, ignore.case = TRUE) + is_tsne <- grepl("tsne", name, ignore.case = TRUE) + is_tsne_dash <- grepl("t-sne", name, ignore.case = TRUE) + if (is_umap) { + method <- "UMAP" + } else if (is_tsne || is_tsne_dash) { + method <- "t-SNE" + } else { + method <- name + } + + group <- projections_group$create_group(name) + create_str_dataset(group, "name", name) + create_str_dataset(group, "method", method) + create_dataset(group, "data", projection) + group$close() + } + + projections_group$close() +} + +#' Create the metadata list +#' +#' @param seurat_obj_version optional string that holds the Seurat Object version. +#' +#' @importFrom utils sessionInfo packageVersion +#' +#' @noRd +create_metadata <- function(seurat_obj_version = NULL) { + sinfo <- utils::sessionInfo() + rversion <- sinfo$R.version + + # Create version string where: + # major is a single digit, ie "4" + # minor is multiple digits with a period, ie "2.3" + # status is empty for release builds, but can be alpha, devel, etc + language_version <- sprintf("%s.%s", rversion$major, rversion$minor) + if (rversion$status != "") { + language_version <- paste0(language_version, "-", rversion$status) + } + + meta <- list() + meta["tool"] <- "loupeR" + meta["tool_version"] <- as.character(utils::packageVersion("loupeR")) + meta["os"] <- sinfo$running + meta["system"] <- sinfo$platform + meta["language"] <- rversion$language + meta["language_version"] <- language_version + + extra = list() + extra["loupeR_seurat_version"] <- as.character(utils::packageVersion("Seurat")) + extra["loupeR_seurat_object_version"] <- ifelse(is.null(seurat_obj_version), "n/a", seurat_obj_version) + extra["loupeR_hdf5r_version"] <- as.character(utils::packageVersion("hdf5r")) + extra["loupeR_hdf5_version"] <- hdf5r::h5version(FALSE) + meta[["extra"]] <- extra + + meta +} + +#' Writes the metadata +#' +#' @param f An open H5File +#' @param seurat_obj_version optional string that holds the Seurat Object version. It is useful for debugging compatibility issues. +#' +#' @noRd +write_metadata <- function(f, seurat_obj_version) { + metadata <- create_metadata(seurat_obj_version) + + create_datasets <- function(parent_group, data, groupname) { + group <- parent_group$create_group(groupname) + + for (name in names(data)) { + val <- data[[name]] + + if (is.list(val)) { + create_datasets(group, val, name) + } else { + create_str_dataset(group, name, val) + } + } + + group$close() + } + + create_datasets(f, metadata, "metadata") +} + +#' Create a dataset, but also closes the handle to reclaim memory +#' +#' @param obj A hdf5r File or Group +#' @param key A string name +#' @param value A vector of data +#' @param ... Additional params that will be passed to hdf5r$create_dataset +#' +#' @noRd +create_dataset <- function(obj, key, value, ...) { + d <- obj$create_dataset(key, value, ...) + d$close() +} + +#' Create a fixed length string dataset and closes the handle to reclaim memory +#' +#' @param obj A hdf5r File or Group +#' @param key A string name +#' @param strs A vector of character data. All Ascii characters +#' @param ... Additional params that will be passed to create_dataset +#' +#' @importFrom hdf5r H5T_STRING +#' +#' @noRd +create_str_dataset <- function(obj, key, strs, ...) { + if (length(strs) == 0) { + max_len <- 1 + } else { + max_len <- max(as.numeric(lapply(strs, nchar))) + } + + dtype <- hdf5r::H5T_STRING$new(size=max_len) + + create_dataset(obj, key, strs, dtype=dtype, ...) +} + diff --git a/R/lib.R b/R/lib.R new file mode 100644 index 0000000..ee16b57 --- /dev/null +++ b/R/lib.R @@ -0,0 +1,148 @@ +#' Create a Loupe file from a Seurat Object +#' +#' @description +#' `create_loupe_from_seurat()` passes the active counts matrix, +#' reductions, and factors found in `meta.data` to create a Loupe file. +#' +#' @param obj A Seurat Object +#' @param output_dir optional directory where the Loupe file will be written +#' @param output_name optional name of the Loupe file with the extensions not included. +#' @param dedup_clusters optional logical that will try to deduplicate all clusters that are numerically the same +#' @param executable_path optional path to the louper executable. +#' @param force optional logical as to whether we should overwrite an already existing file +#' +#' @return TRUE on success, FALSE on error +#' +#' @importFrom methods is +#' +#' @export +create_loupe_from_seurat <- function( + obj, + output_dir = NULL, + output_name = NULL, + dedup_clusters = FALSE, + executable_path = NULL, + force = FALSE +) { + v <- needs_setup(executable_path) + if (!v$success) { + stop(v$msg) + } + + if (!is(obj, "Seurat")) { + stop(validation_err("input object was not a Seurat object", "Seurat Object")) + } + + logMsg("extracting matrix, clusters, and projctions") + + namedAssay <- select_assay(obj) + if (is.null(namedAssay)) { + stop(validation_err("could not find a usable count matrix", "Seurat Object")) + } + + assay_name <- names(namedAssay) + assay <- namedAssay[[1]] + + clusters <- select_clusters(obj, dedup=dedup_clusters) + projections <- select_projections(obj) + + logMsg("selected assay:", assay_name) + logMsg("selected clusters:", names(clusters)) + logMsg("selected projections:", names(projections)) + + seurat_obj_version <- NULL + if (!is.null(obj@version)) { + seurat_obj_version <- as.character(obj@version) + } + + success <- create_loupe( + assay@counts, + clusters=clusters, + projections=projections, + output_dir=output_dir, + output_name=output_name, + executable_path=executable_path, + force=force, + seurat_obj_version=seurat_obj_version + ) + + invisible(success) +} + +#' Create a Loupe file +#' +#' @param count_mat A sparse dgCMatrix +#' @param clusters list of factors that hold information for each barcode +#' @param projections list of matrices, all with dimensions (barcodeCount x 2) +#' @param output_dir optional directory where the Loupe file will be written +#' @param output_name optional name of the Loupe file with the extensions not included. +#' @param executable_path optional path to the louper executable. +#' @param force optional logical as to whether we should overwrite an already existing file +#' @param seurat_obj_version optional string that holds the Seurat Object version. It is useful for debugging compatibility issues. +#' +#' @return TRUE on success, FALSE on error +#' +#' @importFrom methods is +#' +#' @export +create_loupe <- function( + count_mat, + clusters = list(), + projections = list(), + output_dir = NULL, + output_name = NULL, + executable_path = NULL, + force = FALSE, + seurat_obj_version = NULL +) { + v <- needs_setup(executable_path) + if (!v$success) { + stop(v$msg) + } + + logMsg("validating count matrix") + ok <- validate_count_mat(count_mat) + if (!ok$success) { + stop(validation_err(ok$msg, "count matrix")) + } + + barcodes <- colnames(count_mat) + barcode_count <- length(barcodes) + + logMsg("validating clusters") + ok <- validate_clusters(clusters, barcode_count) + if (!ok$success) { + stop(validation_err(ok$msg, "clusters")) + } + + logMsg("validating projections") + ok <- validate_projections(projections, barcode_count) + if (!ok$success) { + stop(validation_err(ok$msg, "projections")) + } + + h5path <- sprintf("%s.h5", tempfile()) + ok <- create_hdf5( + count_mat, + clusters, + projections, + h5path, + seurat_obj_version + ) + if (!ok$success) { + stop(general_err(ok$msg, "creating the temporary hdf5 file")) + } + + ok <- louper_create_cloupe( + h5path, + output_dir=output_dir, + output_name=output_name, + executable_path=executable_path, + force=force + ) + if (!ok$success) { + stop(general_err(ok$msg, "creating the loupe file")) + } + + invisible(TRUE) +} diff --git a/R/setup.R b/R/setup.R new file mode 100644 index 0000000..8f2564a --- /dev/null +++ b/R/setup.R @@ -0,0 +1,199 @@ +#' Setup eula and download executable +#' +#' @param executable_path optional string to a non default executable path +#' +#' @export +setup <- function(executable_path = NULL) { + if (is.null(executable_path)) { + executable_path <- find_executable() + if (is.null(executable_path)) { + cat("\nInstalling Executable\n\n") + install_executable() + } + } + + if (!eula_have_agreed()) { + cat("\nEULA\n\n") + eula() + } +} + +needs_setup <- function(executable_path = NULL) { + needs_eula <- !eula_have_agreed() + needs_executable <- is.null(executable_path) && is.null(find_executable()) + + if (needs_eula && needs_executable) { + return(err("Please call `setup()` to install the Louper executable and to agree to the EULA before continuing")) + } + if (needs_eula) { + return(err("Please call `setup()` to agree to the EULA before continuing")) + } + if (needs_executable) { + return(err("Please call `setup()` to install the Louper executable")) + } + + SUCCESS +} + +#' Downloads and installs the Louper executable. +#' +#' @description +#' This is automatically called when the `louperR` library is loaded. +#' +#' @param force optional logical as to whether we should overwrite an already installed executable +#' +#' @importFrom utils download.file +#' @importFrom Seurat GetAssay +#' +#' @noRd +install_executable <- function(force = FALSE) { + logMsg("Downloading executable") + + artifact <- get_artifact() + + destfile <- tempfile() + + # required until the package is made public and we no longer github auth + # for now users MUST set one of these enviroment variables. + headers <- list() + for (envname in c('GITHUB_PAT', 'GITHUB_TOKEN')) { + token <- Sys.getenv(x=envname) + if (token != "") { + headers["Authorization"] = paste0("token ", token) + headers["Accept"] = "application/octet-stream" + break + } + } + + ok <- utils::download.file(artifact$url, destfile, headers = headers, mode = "wb") + if (ok != 0) { + logMsg("Download failed") + return(invisible(FALSE)) + } + + executable_path <- default_executable_path() + + if (!dir.exists(dirname(executable_path))) { + ok <- dir.create(dirname(executable_path), showWarnings = FALSE, recursive = TRUE) + if (!ok) { + logMsg("Failed to create installation directory") + return(invisible(FALSE)) + } + } + + ok <- file.copy(destfile, executable_path, overwrite = TRUE) + if (!ok) { + logMsg("Failed to copy executable to final installation directory") + return(invisible(FALSE)) + } + + ok <- Sys.chmod(executable_path, mode = "0755", use_umask = TRUE) + if (!ok) { + logMsg("Failed to update executable file permissions") + return(invisible(FALSE)) + } + + v <- verify_executable(executable_path) + if (!v$success) { + logMsg("Verification of executable failed:", v$msg) + return(invisible(FALSE)) + } + + invisible(v$success) +} + +#' Finds the first valid executable_path +#' +#' @return string path on success, NULL if not found +#' +#' @noRd +find_executable <- function() { + for (p in c(default_executable_path(), bundled_executable_path())) { + res <- verify_executable(p) + if (res$success) { + return(p) + } + } + + NULL +} + +#' Verify the Louper executable is installed +#' +#' @noRd +verify_executable <- function(executable_path) { + if (!file.exists(executable_path)) { + return(err("executable not found")) + } + + artifact <- get_artifact() + digest <- tools::md5sum(executable_path) + if (digest != artifact$md5) { + return(err("executable digest does not match")) + } + + SUCCESS +} + +#' This is executable path that will be installed via the likes of `devtools::install` +#' +#' @description +#' We use the R_user_dir instead of writing to the package install directory as the user might not have +#' permissions to write to that directory. +#' +#' @noRd +default_executable_path <- function() { + basedir <- tools::R_user_dir("loupeR", "data") + normalizePath(path.expand(file.path(basedir, executable_basename())), mustWork = FALSE) +} + +#' This is executable path that when installing from a prebundled source tar.gz +#' +#' @description +#' Users who don't have `devtools` installed and want a simple way to install loupeR will +#' download the `tar.gz` R source package which includes in its `exec` directory the louper binary. +#' +#' @noRd +bundled_executable_path <- function() { + basedir <- system.file("", package = "loupeR") + normalizePath(path.expand(file.path(basedir, "exec", executable_basename())), mustWork = FALSE) +} + +executable_basename <- function() { + if (get_system_os() == "windows") { + return("louper.exe") + } else { + return("louper") + } +} + +#' Louper Executable Artifacts +#' @noRd +artifacts = list( + linux = list( + url = "https://github.com/10XGenomics/loupeR/releases/download/v1.0.0/louper-linux-x64", + md5 = "e3425631323e43cf9247b61a026cec09" + ), + mac = list( + url = "https://github.com/10XGenomics/loupeR/releases/download/v1.0.0/louper-macos-x64", + md5 = "fba8a41d02a00edde8671c0c44886589" + ), + windows = list( + url = "https://github.com/10XGenomics/loupeR/releases/download/v1.0.0/louper-windows-x64.exe", + md5 = "300e26db7fe5b31e196f838097afcfb9" + ) +) + +#' Gets the artifact information for current OS +#' @noRd +get_artifact <- function() { + os <- get_system_os() + + if (os == "windows") { + return(artifacts$windows) + } else if (os == "mac") { + return(artifacts$mac) + } else { + return(artifacts$linux) + } +} diff --git a/R/util.R b/R/util.R new file mode 100644 index 0000000..49653e7 --- /dev/null +++ b/R/util.R @@ -0,0 +1,290 @@ +#' Log a message +#' +#' @param ... a variable number of character message parts +#' +#' @noRd +logMsg <- function(...) { + l <- list(...) + if (length(l) == 0) { + return() + } + + now <- format(Sys.time(), format = "%Y/%m/%d %H:%M:%S", usetz=FALSE) + + msg_vector <- now + for (part in l) { + msg_vector <- c(msg_vector, part) + } + + msg <- do.call(paste, as.list(msg_vector)) + + message(msg) +} + +#' Select the "best" assay for conversion +#' +#' @description +#' Prioritizes the active assay, then RNA, and then the rest +#' Usable assays must have a non empty count matrix +#' +#' @param obj A Seurat Object +#' +#' @return A list with the named Seurat Assay or NULL if not found +#' +#' @importFrom Seurat Assays +#' @importFrom Seurat GetAssay +#' +#' @export +select_assay <- function(obj) { + # Search the assays for the best match for the count matrix + # Prioritize active assay, RNA, and then the rest + assay_priority <- c() + for (name in Seurat::Assays(obj)) { + if (identical(name, obj@active.assay)) { + priority <- 1 + } else if (grepl("rna", name, ignore.case=TRUE)) { + priority <- 2 + } else { + priority <- 3 + } + + assay_priority[name] <- priority + } + assay_priority <- sort(assay_priority) + + assay <- NULL + for (i in 1:length(assay_priority)) { + name <- names(assay_priority[i]) + assay <- Seurat::GetAssay(obj, assay=name) + + if (length(assay@counts) > 0) { + result = list() + result[[name]] = assay + return(result) + } + } + + NULL +} + +#' Select clusters from the assay +#' +#' @param obj A Seurat Object +#' @param dedup logical to dedupicate clusters. Default TRUE. +#' +#' @return A list of factors +#' +#' @importFrom Seurat Idents +#' +#' @export +select_clusters <- function(obj, dedup=FALSE) { + # Use the active.ident as a cluster + clusters <- list(active_cluster = Seurat::Idents(obj)) + + # Use all factors and character vectors from meta.data + for (name in names(obj@meta.data)) { + data <- obj@meta.data[[name]] + + if (is.factor(data)) { + clusters[[name]] = data + } else if (is.character(data)) { + clusters[[name]] = factor(data) + } + } + + if (dedup) deduplicate_clusters(clusters) else clusters +} + +#' Select projections from the assay +#' +#' @param obj A Seurat Object +#' +#' @return A list of matrices, all with dimensions (barcodeCount x 2) +#' +#' @importFrom Seurat Reductions +#' +#' @export +select_projections <- function(obj) { + projections = list() + for (name in Seurat::Reductions(obj)) { + reduction <- obj[[name]] + if (dim(reduction@cell.embeddings)[[2]] == 2) { + projections[[name]] = reduction@cell.embeddings + } + } + + projections +} + +#' Check Clusters are identical numerically +#' +#' @param x factor that hold information for each barcode +#' @param y factor that hold information for each barcode +#' +#' @return TRUE if identical FALSE otherwise +#' +#' @noRd +clusters_identical <- function(x, y) { + # relevel so that factors are in order of appearance, then convert to numeric + x <- as.numeric(factor(x, levels = as.character(unique(x)))) + y <- as.numeric(factor(y, levels = as.character(unique(y)))) + all(x == y) +} + +#' Deduplicate clusters +#' +#' @param clusters list of factors that hold information for each barcode +#' +#' @return list of clusters with duplicated removed +#' +#' @noRd +deduplicate_clusters <- function(clusters) { + if (length(clusters) <= 1) { + return(clusters) + } + + # grouping of the cluster names with identical data + groups <- list() + for (clusterIdx in seq_along(clusters)) { + cluster_with_name <- clusters[clusterIdx] + cluster <- clusters[[clusterIdx]] + + # find a matching group and potentially add this cluster to it. + found = FALSE + for (groupIdx in seq_along(groups)) { + group <- groups[[groupIdx]] + groupCluster <- group[[1]] + if (clusters_identical(cluster, groupCluster)) { + group <- c(group, cluster_with_name) + groups[[groupIdx]] <- group + found <- TRUE + break + } + } + + # if no matching group, create a new group with this cluster + if (!found) { + groups <- c(groups, list(cluster_with_name)) + } + } + + final_clusters = list() + + # choose one cluster from each group, prioritizing those with named factors + for (group in groups) { + named_cluster_idx <- Find(function(i) { cluster_levels_word_like(group[[i]]) }, 1:length(group)) + + if (!(is.null(named_cluster_idx))) { + final_clusters <- c(final_clusters, group[named_cluster_idx]) + } else { + final_clusters <- c(final_clusters, group[1]) + } + } + + final_clusters +} + +#' Checks if any of the cluster levels are word-like. +#' +#' @description +#' It is quite common for Seurat clusters to have values that are integers. +#' The are stored as characters, but can be easily parsed into numbers. +#' We want to prioritize clusters that have things like cell type names added. +#' +#' @param cluster A factor +#' +#' @return boolean if factor has names +#' +#' @noRd +cluster_levels_word_like <- function(cluster) { + lvls <- levels(cluster) + suppressWarnings({ + any(is.na(as.numeric(lvls))) + }) +} + +#' Sanitize barcodes into expected format +#' +#' @param barcodes character vector of barcodes names +#' +#' @importFrom utils strcapture +#' +#' @return character vector of sanitized barcode names +#' +#' @noRd +sanitize_barcodes <- function(barcodes) { + if (are_barcodes_valid(barcodes)) { + return(barcodes) + } + + # Some examples that we have seen + # + # Seurat Integrate will add a prefix to the barcode "12U_ACTGACTGACTG-1" + # Other users tend to add a prefix "SOMEPREFIX:ACTGACTGACTG" + pattern <-"^(.*?)(_|-|:)?([ACTG]{6,})(-\\d+)?(_|-|:)?(.*?)$" + + # only santize barcodes if all match the pattern + if (length(grep(pattern, barcodes)) != length(barcodes)) { + return(barcodes) + } + + # capture subgroups of pattern (prefix, barcode, suffix) + # NOTE: need to use perl regexs to support non-greedy matching + groups <- strcapture( + pattern=pattern, + x=barcodes, + perl=TRUE, + proto=list(prefix = character(), + sep1 = character(), + barcode = character(), + barcodeDashNum = character(), + sep2 = character(), + suffix = character())) + + # rewrite barcodes "BARCODE-PREFIX-SUFFIX" + updated_barcodes <- character(length(barcodes)) + for (i in 1:nrow(groups)) { + row <- groups[i,] + + prefix <- "" + if (nchar(row$prefix) > 0) { + prefix <- sprintf("-%s", row$prefix) + } + + suffix <- "" + if (nchar(row$suffix) > 0) { + suffix <- sprintf("-%s", row$suffix) + } + + updated_barcodes[[i]] = sprintf("%s%s%s%s", row$barcode, row$barcodeDashNum, prefix, suffix) + } + + updated_barcodes +} + +#' Gets the systems OS. +#' +#' @return "windows", "mac", "unix" +#' +#' @noRd +get_system_os <- function() { + if (.Platform$OS.type == "windows") { + os <- "windows" + } else { + platform <- R.Version()$platform + + if (grepl("apple", platform, ignore.case = TRUE) || grepl("darwin", platform, ignore.case = TRUE)) { + os <- "mac" + } else { + os <- "unix" + } + } + + os +} + +print_lines <- function(strs, prefix="") { + for (s in strs) { + cat(sprintf("%s%s\n", prefix, s)) + } +} diff --git a/R/validate.R b/R/validate.R new file mode 100644 index 0000000..9e20583 --- /dev/null +++ b/R/validate.R @@ -0,0 +1,167 @@ +#' Validate the seurat count matrix +#' +#' @param count_mat A sparse dgCMatrix +#' +#' @return A list with two elements: +#' \itemize{ +#' \item success: a logical value indicating success (TRUE) or failure (FALSE) +#' \item msg: an optional error message (NULL if success is TRUE) +#' } +#' +#' @importFrom methods is +#' +#' @export +validate_count_mat <- function(count_mat) { + if (!is(count_mat, "dgCMatrix")) { + return(err("count_mat must be a dgCMatrix")) + } + + features <- rownames(count_mat) + barcodes <- colnames(count_mat) + + if (is.null(features)) { + return(err("must supply feature dimnames on count_mat")) + } + if (is.null(barcodes)) { + return(err("must supply barcodes dimnames on count_mat")) + } + if (length(features) == 0) { + return(err("count_mat must have at least one feature")) + } + if (length(barcodes) == 0) { + return(err("count_mat must have at least one barcode")) + } + if (any(is.nan(count_mat@x))) { + return(err("matrix values must not be NaN")) + } + if (any(is.infinite(count_mat@x))) { + return(err("matrix values must not be infinite")) + } + + barcodes <- sanitize_barcodes(barcodes) + + if (!are_barcodes_valid(barcodes)) { + barcode_msg <- paste( + 'There is an issue with the formatting of your barcodes.', + 'Barcodes should begin with base pairs and end with an optional hyphen and suffix.', + 'For further information, please see the documentation: 10xgen.com/louper' + ) + + return(err(barcode_msg)) + } + + if (length(unique(barcodes)) != length(barcodes)) { + return(err("all barcodes should be unique")) + } + + SUCCESS +} + +#' Validate the format of the barcodes +#' +#' @param barcodes a character vector +#' +#' @return A boolean true or false +#' +#' @noRd +are_barcodes_valid <- function(barcodes) { + pattern <-"^([ACTG]{6,})(-.*?)?$" + return(all(grepl(pattern, barcodes))) +} + +#' Validate the seurat clusters +#' +#' @param clusters list of factors that hold information for each barcode +#' @param barcode_count number of barcodes +#' +#' @return A list with two elements: +#' \itemize{ +#' \item success: a logical value indicating success (TRUE) or failure (FALSE) +#' \item msg: an optional error message (NULL if success is TRUE) +#' } +#' +#' @importFrom methods is +#' +#' @export +validate_clusters <- function(clusters, barcode_count) { + cluster_names <- names(clusters) + + if (!is.list(clusters)) { + return(err("clusters must be in a list")) + } + if (length(clusters) == 0) { + return(err("clusters must have at least one cluster")) + } + if (!all(sapply(clusters, is.factor))) { + return(err("clusters must all be factors")) + } + if (is.null(cluster_names)) { + return(err("clusters must supply names")) + } + if (!all(sapply(cluster_names, nzchar))) { + return(err("cluster names cannot be the empty string")) + } + if (any(sapply(clusters, length) != barcode_count)) { + return(err("cluster must have the same length as the number of barcodes")) + } + + SUCCESS +} + +#' Validate the seurat projections +#' +#' @param projections list of matrices, all with dimensions (barcodeCount x 2) +#' @param barcode_count number of barcodes +#' +#' @return A list with two elements: +#' \itemize{ +#' \item success: a logical value indicating success (TRUE) or failure (FALSE) +#' \item msg: an optional error message (NULL if success is TRUE) +#' } +#' +#' @importFrom methods is +#' +#' @export +validate_projections <- function(projections, barcode_count) { + is.projection <- function(p) { return(is.matrix(p)) } + + # should have dimensions barcodeCount x 2 + projection_dims_good <- function(p) { + pdims <- dim(p) + return(pdims[[1]] == barcode_count && pdims[[2]] == 2) + } + + # no values should be NaN or Infinite + projection_values_good <- function(p) { + return( + !any(is.nan(p)) && + !any(is.infinite(p)) + ) + } + + proj_names <- names(projections) + + if (!is.list(projections)) { + return(err("projections must be in a list")) + } + if (length(projections) == 0) { + return(err("projections must have at least one projection")) + } + if (!all(sapply(projections, is.projection))) { + return(err("projections must all be matrices")) + } + if (is.null(proj_names)) { + return(err("projections must supply names")) + } + if (!all(sapply(proj_names, nzchar))) { + return(err("projection names cannot be the empty string")) + } + if (!all(sapply(projections, projection_dims_good))) { + return(err("projections must all have dimensions (BARCODE_COUNT, 2)")) + } + if (!all(sapply(projections, projection_values_good))) { + return(err("projections must not contain NaN or infinite values")) + } + + SUCCESS +} diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..e66a0e4 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,6 @@ +.onLoad <- function(libname, pkgname) { + v <- needs_setup() + if (!v$success) { + warning(v$msg) + } +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..20ca62c --- /dev/null +++ b/README.md @@ -0,0 +1,92 @@ +

+ loupeR - convert Seurat objects to Loupe files
+ Build Status + Build Status + EULA +
+ Convert Seurat objects to 10x Genomics Loupe files. +

+ +

+ How To Use • + Installation • + Troubleshooting +

+ +## How to Use + +Converting a Seurat object to a Loupe file is as simple as the following: + +```R +# import the library +library("loupeR") + +# convert the SeuratObject named `seurat_obj` to a Loupe file +create_loupe_from_seurat(seurat_obj) +``` + +Use the function `create_loupe` if you need more control in the clusters and projetions that included in the Loupe file. + +```R +# import the library +library("loupeR") + +# Gene Expression RNA assay +assay <- seurat_obj[["RNA"]] + +# convert the count matrix, clusters, and projections into a Loupe file +create_loupe( + assay@counts, + clusters = select_clusters(seurat_obj), + projections = select_projections(seurat_obj) +) +``` + +## Installation + +### HDF5 + +Before using `loupeR`, make sure that your system has installed [HDF5](https://www.hdfgroup.org/downloads/hdf5). The HDF5 organization requires registration before being able to download the installer. Below are some other more convenient methods for installing HDF5 if you happen to have these package managers installed. + +- macOS with [Homebrew](https://brew.sh/) - `brew install hdf5`
+- windows with [vcpkg](https://vcpkg.io/en/index.html) - `.\vcpkg install hdf5` + +### Installing loupeR from prebuilt bundle + +Go to the github [releases page](https://github.com/10XGenomics/loupeR/releases), find the version that you want, and download the platform specific `loupeR_PLATFORM.tar.gz`. For example, on macOS, the filename would be `loupeR_macOS.tar.gz`. + +Now in RStudio, or your R shell, run the following to install this package. + +```r +install.packages(PATH_TO_TAR_GZ, repos = NULL, type ='source') +``` + +If this fails, complaining about missing dependencies, you must manually install those as well first by running the following. + +``` r +install.packages("hdf5r") +install.packages("Seurat") +``` + +### Installing loupeR using the `remotes` package + +Another installation option is to use the `remotes` package to directly install `loupeR` and its dependencies. The installed package won't include the prebundled louper executable, so you must invoke the `loupeR::setup()` function which will go and download it. + +``` r +remotes::install_github("10XGenomics/loupeR") +loupeR::setup() +``` + +## Troubleshooting + +For more in depth documentation and support please head to our [support page](https://10xgen.com/louper) or send an email to [support@10xgenomics.com](mailto:support@10xgenomics.com) + +Additionally, we have provided utility functions to help gather useful information when contacting support or creating a Github issue. + +```R +# import the library +library("loupeR") + +# print extra debug information +create_bugreport_from_seurat(seurat_obj) +``` diff --git a/loupe10x.Rproj b/loupe10x.Rproj new file mode 100644 index 0000000..69fafd4 --- /dev/null +++ b/loupe10x.Rproj @@ -0,0 +1,22 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes +LineEndingConversion: Posix + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/man/create_bugreport.Rd b/man/create_bugreport.Rd new file mode 100644 index 0000000..94e38ba --- /dev/null +++ b/man/create_bugreport.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/err.R +\name{create_bugreport} +\alias{create_bugreport} +\title{Create a Bugreport from a count matrix, projections, and clusters} +\usage{ +create_bugreport( + count_mat, + clusters, + projections, + assay_name = NULL, + seurat_obj_version = NULL, + skip_metadata = FALSE +) +} +\arguments{ +\item{count_mat}{A sparse dgCMatrix} + +\item{clusters}{list of factors that hold information for each barcode} + +\item{projections}{list of matrices, all with dimensions (barcodeCount x 2)} + +\item{assay_name}{optional string that holds the Seurat Object assay name.} + +\item{seurat_obj_version}{optional string that holds the Seurat Object version. It is useful for debugging compatibility issues.} + +\item{skip_metadata}{optional logical which skips printing metadata} +} +\description{ +This bugreport can then be included when reaching out to 10xGenomics Support or when filing +a Github ticket. This information should be included along with any other output when creating a Loupe file. +} diff --git a/man/create_bugreport_from_seurat.Rd b/man/create_bugreport_from_seurat.Rd new file mode 100644 index 0000000..b3f5610 --- /dev/null +++ b/man/create_bugreport_from_seurat.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/err.R +\name{create_bugreport_from_seurat} +\alias{create_bugreport_from_seurat} +\title{Create a Bugreport from a Seurat Object} +\usage{ +create_bugreport_from_seurat(obj) +} +\arguments{ +\item{obj}{A Seurat Object} +} +\description{ +This bugreport can then be included when reaching out to 10xGenomics Support or when filing +a Github ticket. This information should be included along with any other output when creating a Loupe file. +} diff --git a/man/create_loupe.Rd b/man/create_loupe.Rd new file mode 100644 index 0000000..73781ee --- /dev/null +++ b/man/create_loupe.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lib.R +\name{create_loupe} +\alias{create_loupe} +\title{Create a Loupe file} +\usage{ +create_loupe( + count_mat, + clusters = list(), + projections = list(), + output_dir = NULL, + output_name = NULL, + executable_path = NULL, + force = FALSE, + seurat_obj_version = NULL +) +} +\arguments{ +\item{count_mat}{A sparse dgCMatrix} + +\item{clusters}{list of factors that hold information for each barcode} + +\item{projections}{list of matrices, all with dimensions (barcodeCount x 2)} + +\item{output_dir}{optional directory where the Loupe file will be written} + +\item{output_name}{optional name of the Loupe file with the extensions not included.} + +\item{executable_path}{optional path to the louper executable.} + +\item{force}{optional logical as to whether we should overwrite an already existing file} + +\item{seurat_obj_version}{optional string that holds the Seurat Object version. It is useful for debugging compatibility issues.} +} +\value{ +TRUE on success, FALSE on error +} +\description{ +Create a Loupe file +} diff --git a/man/create_loupe_from_seurat.Rd b/man/create_loupe_from_seurat.Rd new file mode 100644 index 0000000..31f35e9 --- /dev/null +++ b/man/create_loupe_from_seurat.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lib.R +\name{create_loupe_from_seurat} +\alias{create_loupe_from_seurat} +\title{Create a Loupe file from a Seurat Object} +\usage{ +create_loupe_from_seurat( + obj, + output_dir = NULL, + output_name = NULL, + dedup_clusters = FALSE, + executable_path = NULL, + force = FALSE +) +} +\arguments{ +\item{obj}{A Seurat Object} + +\item{output_dir}{optional directory where the Loupe file will be written} + +\item{output_name}{optional name of the Loupe file with the extensions not included.} + +\item{dedup_clusters}{optional logical that will try to deduplicate all clusters that are numerically the same} + +\item{executable_path}{optional path to the louper executable.} + +\item{force}{optional logical as to whether we should overwrite an already existing file} +} +\value{ +TRUE on success, FALSE on error +} +\description{ +\code{create_loupe_from_seurat()} passes the active counts matrix, +reductions, and factors found in \code{meta.data} to create a Loupe file. +} diff --git a/man/select_assay.Rd b/man/select_assay.Rd new file mode 100644 index 0000000..1f849f4 --- /dev/null +++ b/man/select_assay.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{select_assay} +\alias{select_assay} +\title{Select the "best" assay for conversion} +\usage{ +select_assay(obj) +} +\arguments{ +\item{obj}{A Seurat Object} +} +\value{ +A list with the named Seurat Assay or NULL if not found +} +\description{ +Prioritizes the active assay, then RNA, and then the rest +Usable assays must have a non empty count matrix +} diff --git a/man/select_clusters.Rd b/man/select_clusters.Rd new file mode 100644 index 0000000..b0b3815 --- /dev/null +++ b/man/select_clusters.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{select_clusters} +\alias{select_clusters} +\title{Select clusters from the assay} +\usage{ +select_clusters(obj, dedup = FALSE) +} +\arguments{ +\item{obj}{A Seurat Object} + +\item{dedup}{logical to dedupicate clusters. Default TRUE.} +} +\value{ +A list of factors +} +\description{ +Select clusters from the assay +} diff --git a/man/select_projections.Rd b/man/select_projections.Rd new file mode 100644 index 0000000..5dc05fd --- /dev/null +++ b/man/select_projections.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{select_projections} +\alias{select_projections} +\title{Select projections from the assay} +\usage{ +select_projections(obj) +} +\arguments{ +\item{obj}{A Seurat Object} +} +\value{ +A list of matrices, all with dimensions (barcodeCount x 2) +} +\description{ +Select projections from the assay +} diff --git a/man/setup.Rd b/man/setup.Rd new file mode 100644 index 0000000..1329a86 --- /dev/null +++ b/man/setup.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setup.R +\name{setup} +\alias{setup} +\title{Setup eula and download executable} +\usage{ +setup(executable_path = NULL) +} +\arguments{ +\item{executable_path}{optional string to a non default executable path} +} +\description{ +Setup eula and download executable +} diff --git a/man/validate_clusters.Rd b/man/validate_clusters.Rd new file mode 100644 index 0000000..1d2f8af --- /dev/null +++ b/man/validate_clusters.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validate.R +\name{validate_clusters} +\alias{validate_clusters} +\title{Validate the seurat clusters} +\usage{ +validate_clusters(clusters, barcode_count) +} +\arguments{ +\item{clusters}{list of factors that hold information for each barcode} + +\item{barcode_count}{number of barcodes} +} +\value{ +A list with two elements: +\itemize{ +\item success: a logical value indicating success (TRUE) or failure (FALSE) +\item msg: an optional error message (NULL if success is TRUE) +} +} +\description{ +Validate the seurat clusters +} diff --git a/man/validate_count_mat.Rd b/man/validate_count_mat.Rd new file mode 100644 index 0000000..9fa5d7e --- /dev/null +++ b/man/validate_count_mat.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validate.R +\name{validate_count_mat} +\alias{validate_count_mat} +\title{Validate the seurat count matrix} +\usage{ +validate_count_mat(count_mat) +} +\arguments{ +\item{count_mat}{A sparse dgCMatrix} +} +\value{ +A list with two elements: +\itemize{ +\item success: a logical value indicating success (TRUE) or failure (FALSE) +\item msg: an optional error message (NULL if success is TRUE) +} +} +\description{ +Validate the seurat count matrix +} diff --git a/man/validate_projections.Rd b/man/validate_projections.Rd new file mode 100644 index 0000000..69458e8 --- /dev/null +++ b/man/validate_projections.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validate.R +\name{validate_projections} +\alias{validate_projections} +\title{Validate the seurat projections} +\usage{ +validate_projections(projections, barcode_count) +} +\arguments{ +\item{projections}{list of matrices, all with dimensions (barcodeCount x 2)} + +\item{barcode_count}{number of barcodes} +} +\value{ +A list with two elements: +\itemize{ +\item success: a logical value indicating success (TRUE) or failure (FALSE) +\item msg: an optional error message (NULL if success is TRUE) +} +} +\description{ +Validate the seurat projections +} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..7dc547a --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/tests.html +# * https://testthat.r-lib.org/reference/test_package.html#special-files + +library(testthat) +library(loupeR) + +test_check("loupeR") diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R new file mode 100644 index 0000000..8b892a2 --- /dev/null +++ b/tests/testthat/helper.R @@ -0,0 +1,25 @@ +#' Create a sparse count_mat +#' +#' @importFrom Matrix rsparsematrix +create_count_mat <- function(rows, cols) { + mat <- Matrix::rsparsematrix(rows, cols, 0.5, rand.x = function(n) as.integer(100*runif(n))) + + rownames <- as.character() + if (rows > 0) { + rownames <- paste0("row", 1:rows) + } + + colnames <- as.character() + if (cols > 0) { + colnames <- paste0("col", 1:cols) + } + + dimnames(mat) <- list(rownames, colnames) + mat +} + +#' Create a dense matrix +create_dense_mat <- function(rows, cols) { + count <- rows * cols + matrix(runif(count), nrow=rows) +} diff --git a/tests/testthat/test-hdf5.R b/tests/testthat/test-hdf5.R new file mode 100644 index 0000000..e4e7dcc --- /dev/null +++ b/tests/testthat/test-hdf5.R @@ -0,0 +1,49 @@ +test_that("can create hdf5", { + barcode_count <- 5 + proj <- create_dense_mat(barcode_count, 2) + count_mat <- create_count_mat(100, barcode_count) + clusters <- list("f1" = factor(c("a", "c", "b", "a", "b"), levels=c("a", "b", "c"), ordered=TRUE)) + projections <- list("p1" = proj) + h5path <- sprintf("%s.h5", tempfile()) + + seurat_obj_version <- "1.2.3" + create_hdf5(count_mat, clusters, projections, h5path, seurat_obj_version) + + f <- hdf5r::h5file(h5path) + + # spot check matrix + matrix_group <- hdf5r::openGroup(f, "matrix") + barcodes <- hdf5r::openLocation(matrix_group, "barcodes") + data <- hdf5r::openLocation(matrix_group, "data") + expect_equal(hdf5r::readDataSet(barcodes), paste0("col", 1:barcode_count)) + expect_equal(hdf5r::readDataSet(data), count_mat@x) + + features_group <- hdf5r::openGroup(matrix_group, "features") + feature_names <- hdf5r::openGroup(features_group, "name") + expect_equal(hdf5r::readDataSet(feature_names), paste0("row", 1:100)) + + # spot check projections + projs_group <- hdf5r::openGroup(f, "projections") + proj_group <- hdf5r::openGroup(projs_group, "p1") + proj_data <- hdf5r::openLocation(proj_group, "data") + expect_equal(proj, hdf5r::readDataSet(proj_data)) + + # spot check clusters + clusters_group <- hdf5r::openGroup(f, "clusters") + cluster_group <- hdf5r::openGroup(clusters_group, "f1") + assignments <- hdf5r::openLocation(cluster_group, "assignments") + group_names <- hdf5r::openLocation(cluster_group, "group_names") + expect_equal(hdf5r::readDataSet(assignments), clusters[[1]]@.Data - 1) + expect_equal(hdf5r::readDataSet(group_names), levels(clusters[[1]])) + + # spot check metadata + metadata <- hdf5r::openGroup(f, "metadata") + + tool <- hdf5r::openLocation(metadata, "tool") + expect_equal(hdf5r::readDataSet(tool), "loupeR") + + extra <- hdf5r::openGroup(metadata, "extra") + loupeR_seurat_version <- hdf5r::openLocation(extra, "loupeR_seurat_version") + val <- hdf5r::readDataSet(loupeR_seurat_version) + expect(!is.null(hdf5r::readDataSet(loupeR_seurat_version)), "extra field is missing") +}) diff --git a/tests/testthat/test-util.R b/tests/testthat/test-util.R new file mode 100644 index 0000000..ad255aa --- /dev/null +++ b/tests/testthat/test-util.R @@ -0,0 +1,120 @@ +test_that("select_assay selects active assay", { + rna1 <- create_count_mat(5, 5) + rna2 <- create_count_mat(5, 5) + rna3 <- create_count_mat(5, 5) + + obj <- Seurat::CreateSeuratObject(rna1, assay="rna1") + obj[["rna2_"]] = Seurat::CreateAssayObject(rna2, key="rna2_") + obj[["rna3_"]] = Seurat::CreateAssayObject(rna3, key="rna3_") + + expect_equal(Seurat::DefaultAssay(object = obj), "rna1") + Seurat::DefaultAssay(object = obj) <- "rna2_" + expect_equal(Seurat::DefaultAssay(object = obj), "rna2_") + + assay <- select_assay(obj) + expect_equal(names(assay), "rna2_") +}) + +test_that("select_clusters selects Idents", { + rna <- create_count_mat(4, 4) + obj <- Seurat::CreateSeuratObject(rna, assay="rna") + + # adds a new ident, old is saved at `orig.ident` + Seurat::Idents(obj, cells = 1:4) <- c('a', 'b', 'c', 'd') + + clusters <- select_clusters(obj) + expect_length(clusters, 2) + expect_equal(levels(clusters[[1]]), c('a', 'b', 'c', 'd')) +}) + +test_that("select_clusters selects meta.data factors", { + rna <- create_count_mat(4, 4) + obj <- Seurat::CreateSeuratObject(rna, assay="rna") + + cell_types <- factor(c("c1", "c3", "c2", "c2"), levels=c("c1", "c2", "c3")) + obj@meta.data['cell_types'] = cell_types + + clusters <- select_clusters(obj) + expect_length(clusters, 3) + expect_equal(clusters[[3]], cell_types) +}) + +test_that("select_clusters selects meta.data factors with deduplication", { + rna <- create_count_mat(4, 4) + obj <- Seurat::CreateSeuratObject(rna, assay="rna") + + cell_types <- factor(c("c1", "c3", "c2", "c2"), levels=c("c1", "c2", "c3")) + obj@meta.data['cell_types'] = cell_types + + clusters <- select_clusters(obj, dedup=TRUE) + expect_length(clusters, 2) + expect_equal(clusters[[2]], cell_types) +}) + +test_that("select_projections select reductions", { + rna <- create_count_mat(1000, 100) + obj <- Seurat::CreateSeuratObject(rna, assay="rna") + obj <- suppressWarnings(Seurat::FindVariableFeatures(obj, verbose=FALSE)) + obj <- Seurat::ScaleData(obj, verbose=FALSE) + obj <- suppressWarnings(Seurat::RunPCA(obj, verbose=FALSE)) + obj <- Seurat::RunTSNE(obj, verbose=FALSE) + + projs <- select_projections(obj) + expect_length(projs, 1) + expect_equal(names(projs), c("tsne")) +}) + +test_that("deduplicate_clusters removes duplicates", { + cell_types <- factor(c("c1", "c3", "c2", "c2"), levels=c("c1", "c2", "c3")) + clusters <- deduplicate_clusters(list(cell_types=cell_types, clusters=cell_types)) + + expect_length(clusters, 1) + expect_equal(clusters[[1]], cell_types) +}) + +test_that("deduplicate_clusters prefers named factors", { + cell_types <- factor(c("c1", "c3", "c2", "c2"), levels=c("c1", "c2", "c3")) + cell_types_numeric_levels <- factor(c("1", "3", "2", "2"), levels=c("1", "2", "3")) + clusters <- deduplicate_clusters(list(cell_types=cell_types, cell_types_numeric_levels=cell_types_numeric_levels)) + + expect_length(clusters, 1) + expect_equal(clusters[[1]], cell_types) +}) + +test_that("sanitize_barcodes corrects barcodes", { + # no change + expect_equal(sanitize_barcodes("ACTGAA"), "ACTGAA") + + # no change + lane numbers + expect_equal(sanitize_barcodes("ACTGAA-1"), "ACTGAA-1") + + # prefix + expect_equal(sanitize_barcodes("prefix_ACTGAA"), "ACTGAA-prefix") + expect_equal(sanitize_barcodes("prefix-ACTGAA"), "ACTGAA-prefix") + expect_equal(sanitize_barcodes("prefix:ACTGAA"), "ACTGAA-prefix") + + # barcodes with lane numbers + prefix + expect_equal(sanitize_barcodes("prefix_ACTGAA-1"), "ACTGAA-1-prefix") + expect_equal(sanitize_barcodes("prefix-ACTGAA-1"), "ACTGAA-1-prefix") + expect_equal(sanitize_barcodes("prefix:ACTGAA-1"), "ACTGAA-1-prefix") + + # barcodes + prefix_with_underscore + expect_equal(sanitize_barcodes("pre_fix_ACTGAA"), "ACTGAA-pre_fix") + expect_equal(sanitize_barcodes("pre_fix-ACTGAA"), "ACTGAA-pre_fix") + expect_equal(sanitize_barcodes("pre_fix:ACTGAA"), "ACTGAA-pre_fix") + + # barcodes with lane numbers + prefix_with_underscore + expect_equal(sanitize_barcodes("pre_fix_ACTGAA-1"), "ACTGAA-1-pre_fix") + expect_equal(sanitize_barcodes("pre_fix-ACTGAA-1"), "ACTGAA-1-pre_fix") + expect_equal(sanitize_barcodes("pre_fix:ACTGAA-1"), "ACTGAA-1-pre_fix") + + # barcodes with prefix_with_underscore and suffix_with_underscore + expect_equal(sanitize_barcodes("pre_fix_ACTGAA-suf_fix"), "ACTGAA-pre_fix-suf_fix") + expect_equal(sanitize_barcodes("pre_fix-ACTGAA-suf_fix"), "ACTGAA-pre_fix-suf_fix") + expect_equal(sanitize_barcodes("pre_fix:ACTGAA-suf_fix"), "ACTGAA-pre_fix-suf_fix") + + # barcodes with lane lane numbers with prefix_with_underscore and suffix_with_underscore + expect_equal(sanitize_barcodes("pre_fix_ACTGAA-1-suf_fix"), "ACTGAA-1-pre_fix-suf_fix") + expect_equal(sanitize_barcodes("pre_fix-ACTGAA-1-suf_fix"), "ACTGAA-1-pre_fix-suf_fix") + expect_equal(sanitize_barcodes("pre_fix:ACTGAA-1-suf_fix"), "ACTGAA-1-pre_fix-suf_fix") +}) diff --git a/tests/testthat/test-validate.R b/tests/testthat/test-validate.R new file mode 100644 index 0000000..6c4c154 --- /dev/null +++ b/tests/testthat/test-validate.R @@ -0,0 +1,159 @@ +test_that("validate count matrix", { + # wrong type + resp <- validate_count_mat("not_a_valid_matrix") + expect_false(resp$success) + expect_match(resp$msg, "dgCMatrix") + + # missing feature names + mat <- create_count_mat(10, 10) + rownames(mat) <- NULL + resp <- validate_count_mat(mat) + expect_false(resp$success) + expect_match(resp$msg, "feature dimnames") + + # missing barcode names + mat <- create_count_mat(10, 10) + colnames(mat) <- NULL + resp <- validate_count_mat(mat) + expect_false(resp$success) + expect_match(resp$msg, "barcodes dimnames") + + # NaN values + mat <- create_count_mat(10, 10) + mat@x[[1]] <- NaN # set first value to NaN + resp <- validate_count_mat(mat) + expect_false(resp$success) + expect_match(resp$msg, "not be NaN") + + # infinite + mat <- create_count_mat(10, 10) + mat@x[[1]] <- Inf + resp <- validate_count_mat(mat) + expect_false(resp$success) + expect_match(resp$msg, "not be infinite") + + # bad barcodes + mat <- create_count_mat(10, 10) + resp <- validate_count_mat(mat) + expect_false(resp$success) + expect_match(resp$msg, "There is an issue with the formatting of your barcodes") + + # non unique barcodes + mat <- create_count_mat(3, 3) + colnames(mat) <- c("ACTGAA-1", "ACTGAA-1", "ACTGAA-1") + resp <- validate_count_mat(mat) + expect_false(resp$success) + expect_match(resp$msg, "should be unique") + + # good + mat <- create_count_mat(3, 3) + colnames(mat) <- c("ACTGAA-1", "ACTAAA-1", "CTAGAA-1") + resp <- validate_count_mat(mat) + expect_true(resp$success) + + # bad, but fixable barcodes + barcodes <- c("prefix-ACTGAA-1", "prefix-ACTAAA-1", "prefix-CTAGAA-1") + mat <- create_count_mat(3, 3) + colnames(mat) <- barcodes + resp <- validate_count_mat(mat) + expect_true(resp$success) +}) + +test_that("validate clusters", { + # wrong type + resp <- validate_clusters("not_a_valid_cluster_list", 3) + expect_false(resp$success) + expect_match(resp$msg, "must be in a list") + + # empty list + resp <- validate_clusters(list(), 3) + expect_false(resp$success) + expect_match(resp$msg, "at least one") + + # clusters should be factors + resp <- validate_clusters(list("not a factor"), 3) + expect_false(resp$success) + expect_match(resp$msg, "must all be factors") + + # clusters supply names + resp <- validate_clusters(list(factor(c("one", "two", "three"))), 3) + expect_false(resp$success) + expect_match(resp$msg, "must supply names") + + # empty name + factors <- list() + f <- factor(c("one", "two", "three")) + factors[[""]] = f + resp <- validate_clusters(factors, 3) + expect_false(resp$success) + expect_match(resp$msg, "cannot be the empty string") + + # wrong barcode count + factors <- list("f1" = factor(c("one", "two", "three"))) + resp <- validate_clusters(factors, 42) + expect_false(resp$success) + expect_match(resp$msg, "same length as the number of barcodes") + + # good + factors <- list("f1" = factor(c("one", "two", "three"))) + resp <- validate_clusters(factors, 3) + expect_true(resp$success) +}) + +test_that("validate projections", { + barcode_count <- 3 + + # wrong type + resp <- validate_projections("not_a_valid_projection_list", 3) + expect_false(resp$success) + expect_match(resp$msg, "must be in a list") + + # empty list + resp <- validate_projections(list(), 3) + expect_false(resp$success) + expect_match(resp$msg, "at least one") + + # projections should be matrix + resp <- validate_projections(list("not a matrix"), 3) + expect_false(resp$success) + expect_match(resp$msg, "must all be matrices") + + # projections supply names + proj <- create_dense_mat(barcode_count, 2) + resp <- validate_projections(list(proj), barcode_count) + expect_false(resp$success) + expect_match(resp$msg, "must supply names") + + # empty name + projs <- list() + proj <- create_dense_mat(barcode_count, 2) + projs[[""]] = proj + resp <- validate_projections(projs, barcode_count) + expect_false(resp$success) + expect_match(resp$msg, "cannot be the empty string") + + # mixed dimensions + proj <- create_dense_mat(2, barcode_count) + resp <- validate_projections(list("p1" = proj), barcode_count) + expect_false(resp$success) + expect_match(resp$msg, "must all have dimensions") + + # no NaN + proj <- create_dense_mat(barcode_count, 2) + proj[1,1] <- NaN + resp <- validate_projections(list("p1" = proj), barcode_count) + expect_false(resp$success) + expect_match(resp$msg, "must not contain NaN") + + # no infinite + proj <- create_dense_mat(barcode_count, 2) + proj[1,1] <- Inf + resp <- validate_projections(list("p1" = proj), barcode_count) + expect_false(resp$success) + expect_match(resp$msg, "infinite values") + + # good + proj <- create_dense_mat(barcode_count, 2) + resp <- validate_projections(list("p1" = proj), barcode_count) + expect_true(resp$success) +}) diff --git a/tools/doc/logo.svg b/tools/doc/logo.svg new file mode 100644 index 0000000..27d3e5c --- /dev/null +++ b/tools/doc/logo.svg @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +