From fd9b5116bb63e4c1eebcdfbc05ee26e995ba23a4 Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Thu, 16 Feb 2023 19:50:29 +1100 Subject: [PATCH 1/5] Update URLs --- R/query.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/query.R b/R/query.R index c9ef553..94a0a91 100644 --- a/R/query.R +++ b/R/query.R @@ -18,7 +18,7 @@ aside <- function(x, ...) { x } -REMOTE_URL <- "https://swift.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/harmonised-human-atlas" +REMOTE_URL <- "https://object-store.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/harmonised-human-atlas" #' Given a data frame of HCA metadata, returns a SingleCellExperiment object #' corresponding to the samples in that data frame @@ -374,7 +374,7 @@ get_seurat <- function(...) { #' @importFrom cli cli_alert_info #' @importFrom utils untar get_metadata <- function( - remote_url = "https://object-store.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata-sqlite/metadata.parquet", + remote_url = "https://object-store.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata/metadata.0.2.2.parquet", cache_directory = get_default_cache_dir() ) { db_path <- file.path(cache_directory, "metadata.parquet") From 7fde5a668520a84db3c930badd1ab13e36f71bd1 Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Thu, 16 Feb 2023 20:14:06 +1100 Subject: [PATCH 2/5] Use a versioned counts directory --- R/query.R | 8 +++++--- tests/testthat/test-query.R | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R/query.R b/R/query.R index f00b489..773a722 100644 --- a/R/query.R +++ b/R/query.R @@ -11,6 +11,7 @@ assay_map <- c( ) REMOTE_URL <- "https://swift.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/harmonised-human-atlas" +COUNTS_VERSION <- "0.2" #' Given a data frame of HCA metadata, returns a SingleCellExperiment object #' corresponding to the samples in that data frame @@ -81,7 +82,8 @@ get_SingleCellExperiment <- function( inherits(raw_data, "tbl") |> assert_that() has_name(raw_data, c("_cell", "file_id_db")) |> assert_that() - cache_directory |> dir.create(showWarnings = FALSE) + versioned_cache_directory = file.path(cache_directory, COUNTS_VERSION) + versioned_cache_directory |> dir.create(showWarnings = FALSE, recursive = TRUE) subdirs <- assay_map[assays] @@ -100,7 +102,7 @@ get_SingleCellExperiment <- function( as.character() |> sync_assay_files( url = parsed_repo, - cache_dir = cache_directory, + cache_dir = versioned_cache_directory, files = _, subdirs = subdirs ) @@ -111,7 +113,7 @@ get_SingleCellExperiment <- function( imap(function(current_subdir, current_assay) { # Build up an SCE for each assay dir_prefix <- file.path( - cache_directory, + versioned_cache_directory, current_subdir ) diff --git a/tests/testthat/test-query.R b/tests/testthat/test-query.R index 92d491a..6ff3c02 100755 --- a/tests/testthat/test-query.R +++ b/tests/testthat/test-query.R @@ -2,7 +2,7 @@ library(CuratedAtlasQueryR) test_that("get_SingleCellExperiment() correctly handles duplicate cell IDs", { meta <- get_metadata() |> - dplyr::filter(.cell == "868417_1") |> + dplyr::filter(`_cell` == "868417_1") |> dplyr::collect() sce <- get_SingleCellExperiment(meta) # This query should return multiple cells, despite querying only 1 cell ID From d9423064cb84de649edb219d46934d012fdff1c6 Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Fri, 17 Feb 2023 08:16:36 +1100 Subject: [PATCH 3/5] Update DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a2deb87..6d41984 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: CuratedAtlasQueryR Title: Queries the Human Cell Atlas -Version: 0.3.1 +Version: 0.4.1 Authors@R: c( person( "Stefano", From e56c95a9dc61370cac390ed87f905c3223c409ee Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Fri, 17 Feb 2023 08:16:59 +1100 Subject: [PATCH 4/5] Update DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6d41984..192e0a5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: CuratedAtlasQueryR Title: Queries the Human Cell Atlas -Version: 0.4.1 +Version: 0.4.0 Authors@R: c( person( "Stefano", From dde7b20cf49ba568a2eeeaf619bec03eab0c6f3a Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Fri, 17 Feb 2023 10:52:07 +1100 Subject: [PATCH 5/5] R CMD check --- DESCRIPTION | 1 + R/query.R | 17 +++++++++++------ man/get_SingleCellExperiment.Rd | 18 +++++++++--------- man/get_metadata.Rd | 21 +++++++++------------ man/get_seurat.Rd | 10 +++++----- 5 files changed, 35 insertions(+), 32 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 192e0a5..f3e425e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -126,3 +126,4 @@ LazyDataCompression: xz URL: https://github.com/stemangiola/CuratedAtlasQueryR BugReports: https://github.com/stemangiola/CuratedAtlasQueryR/issues VignetteBuilder: knitr +Roxygen: list(markdown = TRUE) diff --git a/R/query.R b/R/query.R index 773a722..129c86f 100644 --- a/R/query.R +++ b/R/query.R @@ -13,14 +13,17 @@ assay_map <- c( REMOTE_URL <- "https://swift.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/harmonised-human-atlas" COUNTS_VERSION <- "0.2" -#' Given a data frame of HCA metadata, returns a SingleCellExperiment object -#' corresponding to the samples in that data frame +#' Gets a SingleCellExperiment from curated metadata +#' +#' Given a data frame of Curated Atlas metadata obtained from [get_metadata()], +#' returns a [`SingleCellExperiment::SingleCellExperiment-class`] object corresponding to the samples in that +#' data frame #' #' @param data A data frame containing, at minimum, a `.sample` column, which #' corresponds to a single cell sample ID. This can be obtained from the #' [get_metadata()] function. -#' @param assays A character vector whose elements must be either "counts" and/or -#' "cpm", representing the corresponding assay(s) you want to request. +#' @param assays A character vector whose elements must be either "counts" +#' and/or "cpm", representing the corresponding assay(s) you want to request. #' @param repository A character vector of length one. If provided, it should be #' an HTTP URL pointing to the location where the single cell data is stored. #' @param cache_directory An optional character vector of length one. If @@ -52,7 +55,7 @@ COUNTS_VERSION <- "0.2" #' #' @export #' -#' +#' get_SingleCellExperiment <- function( data, assays = c("counts", "cpm"), @@ -335,10 +338,12 @@ get_seurat <- function(...) { get_SingleCellExperiment(...) |> as.Seurat(data = NULL) } +#' Gets the Curated Atlas metadata as a data frame. +#' #' Downloads a parquet database of the Human Cell Atlas metadata to a local #' cache, and then opens it as a data frame. It can then be filtered and #' passed into [get_SingleCellExperiment()] -#' to obtain a [`SingleCellExperiment`](SingleCellExperiment::SingleCellExperiment-class) +#' to obtain a [`SingleCellExperiment::SingleCellExperiment-class`] #' #' @param remote_url Optional character vector of length 1. An HTTP URL pointing #' to the location of the parquet database. diff --git a/man/get_SingleCellExperiment.Rd b/man/get_SingleCellExperiment.Rd index 6bf9aac..8c5bc59 100644 --- a/man/get_SingleCellExperiment.Rd +++ b/man/get_SingleCellExperiment.Rd @@ -2,8 +2,7 @@ % Please edit documentation in R/query.R \name{get_SingleCellExperiment} \alias{get_SingleCellExperiment} -\title{Given a data frame of HCA metadata, returns a SingleCellExperiment object -corresponding to the samples in that data frame} +\title{Gets a SingleCellExperiment from curated metadata} \usage{ get_SingleCellExperiment( data, @@ -14,12 +13,12 @@ get_SingleCellExperiment( ) } \arguments{ -\item{data}{A data frame containing, at minimum, a `.sample` column, which +\item{data}{A data frame containing, at minimum, a \code{.sample} column, which corresponds to a single cell sample ID. This can be obtained from the -[get_metadata()] function.} +\code{\link[=get_metadata]{get_metadata()}} function.} -\item{assays}{A character vector whose elements must be either "counts" and/or -"cpm", representing the corresponding assay(s) you want to request.} +\item{assays}{A character vector whose elements must be either "counts" +and/or "cpm", representing the corresponding assay(s) you want to request.} \item{cache_directory}{An optional character vector of length one. If provided, it should indicate a local file path where any remotely accessed @@ -33,11 +32,12 @@ the counts for. By default counts for all features will be returned.} } \value{ A SingleCellExperiment object, with one assay for each value in the - assays argument +assays argument } \description{ -Given a data frame of HCA metadata, returns a SingleCellExperiment object -corresponding to the samples in that data frame +Given a data frame of Curated Atlas metadata obtained from \code{\link[=get_metadata]{get_metadata()}}, +returns a \code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment::SingleCellExperiment}} object corresponding to the samples in that +data frame } \examples{ meta <- get_metadata() |> head(2) diff --git a/man/get_metadata.Rd b/man/get_metadata.Rd index f82560f..76dcf9d 100644 --- a/man/get_metadata.Rd +++ b/man/get_metadata.Rd @@ -2,14 +2,11 @@ % Please edit documentation in R/query.R \name{get_metadata} \alias{get_metadata} -\title{Downloads a parquet database of the Human Cell Atlas metadata to a local -cache, and then opens it as a data frame. It can then be filtered and -passed into [get_SingleCellExperiment()] -to obtain a [`SingleCellExperiment`](SingleCellExperiment::SingleCellExperiment-class)} +\title{Gets the Curated Atlas metadata as a data frame.} \usage{ get_metadata( remote_url = - "https://object-store.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata-sqlite/metadata.parquet", + "https://object-store.rc.nectar.org.au/v1/AUTH_06d6e008e3e642da99d806ba3ea629c5/metadata/metadata.0.2.2.parquet", cache_directory = get_default_cache_dir() ) } @@ -23,15 +20,15 @@ metadata.parquet} } \value{ A lazy data.frame subclass containing the metadata. You can interact - with this object using most standard dplyr functions. For string matching, - it is recommended that you use `stringr::str_like` to filter character - columns, as `stringr::str_match` will not work. +with this object using most standard dplyr functions. For string matching, +it is recommended that you use \code{stringr::str_like} to filter character +columns, as \code{stringr::str_match} will not work. } \description{ -Downloads a parquet database of the Human Cell Atlas metadata to a local -cache, and then opens it as a data frame. It can then be filtered and -passed into [get_SingleCellExperiment()] -to obtain a [`SingleCellExperiment`](SingleCellExperiment::SingleCellExperiment-class) +Downloads a parquet database of the Human Cell Atlas metadata to a local +cache, and then opens it as a data frame. It can then be filtered and +passed into \code{\link[=get_SingleCellExperiment]{get_SingleCellExperiment()}} +to obtain a \code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment::SingleCellExperiment}} } \examples{ library(dplyr) diff --git a/man/get_seurat.Rd b/man/get_seurat.Rd index 2e2a491..fb5189b 100644 --- a/man/get_seurat.Rd +++ b/man/get_seurat.Rd @@ -11,11 +11,11 @@ get_seurat(...) \item{...}{ Arguments passed on to \code{\link[=get_SingleCellExperiment]{get_SingleCellExperiment}} \describe{ - \item{\code{data}}{A data frame containing, at minimum, a `.sample` column, which + \item{\code{data}}{A data frame containing, at minimum, a \code{.sample} column, which corresponds to a single cell sample ID. This can be obtained from the -[get_metadata()] function.} - \item{\code{assays}}{A character vector whose elements must be either "counts" and/or -"cpm", representing the corresponding assay(s) you want to request.} +\code{\link[=get_metadata]{get_metadata()}} function.} + \item{\code{assays}}{A character vector whose elements must be either "counts" +and/or "cpm", representing the corresponding assay(s) you want to request.} \item{\code{repository}}{A character vector of length one. If provided, it should be an HTTP URL pointing to the location where the single cell data is stored.} \item{\code{cache_directory}}{An optional character vector of length one. If @@ -27,7 +27,7 @@ the counts for. By default counts for all features will be returned.} } \value{ A Seurat object containing the same data as a call to - get_SingleCellExperiment. +get_SingleCellExperiment. } \description{ Given a data frame of HCA metadata, returns a Seurat object corresponding to