diff --git a/cache/unnamed-chunk-12_fa7397dc612d368396c916ef2c024026.RData b/cache/unnamed-chunk-12_fa7397dc612d368396c916ef2c024026.RData new file mode 100644 index 0000000..1eec31e Binary files /dev/null and b/cache/unnamed-chunk-12_fa7397dc612d368396c916ef2c024026.RData differ diff --git a/cache/unnamed-chunk-12_ff65ff8596c60eb7d0bdd5aeedfe9718.rdb b/cache/unnamed-chunk-12_fa7397dc612d368396c916ef2c024026.rdb similarity index 100% rename from cache/unnamed-chunk-12_ff65ff8596c60eb7d0bdd5aeedfe9718.rdb rename to cache/unnamed-chunk-12_fa7397dc612d368396c916ef2c024026.rdb diff --git a/cache/unnamed-chunk-12_ff65ff8596c60eb7d0bdd5aeedfe9718.rdx b/cache/unnamed-chunk-12_fa7397dc612d368396c916ef2c024026.rdx similarity index 100% rename from cache/unnamed-chunk-12_ff65ff8596c60eb7d0bdd5aeedfe9718.rdx rename to cache/unnamed-chunk-12_fa7397dc612d368396c916ef2c024026.rdx diff --git a/cache/unnamed-chunk-12_ff65ff8596c60eb7d0bdd5aeedfe9718.RData b/cache/unnamed-chunk-12_ff65ff8596c60eb7d0bdd5aeedfe9718.RData deleted file mode 100644 index ef1f0c5..0000000 Binary files a/cache/unnamed-chunk-12_ff65ff8596c60eb7d0bdd5aeedfe9718.RData and /dev/null differ diff --git a/cell_type_annotation.md b/cell_type_annotation.md index 197a45e..5f8d986 100644 --- a/cell_type_annotation.md +++ b/cell_type_annotation.md @@ -1978,7 +1978,7 @@ Use `BiocParallel` and the `BPPARAM` argument! This example will set it to use f ``` r library(BiocParallel) -my_bpparam = MulticoreParam(workers = 4) +my_bpparam <- MulticoreParam(workers = 4) res2 <- SingleR(test = sce.mat, ref = ref.mat, diff --git a/eda_qc.md b/eda_qc.md index 622aeb4..3c13970 100644 --- a/eda_qc.md +++ b/eda_qc.md @@ -85,8 +85,8 @@ bcrank <- barcodeRanks(counts(sce)) # Only showing unique points for plotting speed. uniq <- !duplicated(bcrank$rank) -line_df = data.frame(cutoff = names(metadata(bcrank)), - value = unlist(metadata(bcrank))) +line_df <- data.frame(cutoff = names(metadata(bcrank)), + value = unlist(metadata(bcrank))) ggplot(bcrank[uniq,], aes(rank, total)) + geom_point() + @@ -107,6 +107,8 @@ A simple approach would be to apply a threshold on the total count to only retai ::: callout Depending on your data source, identifying and discarding empty droplets may not be necessary. Some academic institutions have research cores dedicated to single cell work that perform the sample preparation and sequencing. Many of these cores will also perform empty droplet filtering and other initial QC steps. If the sequencing outputs were provided to you by someone else, make sure to communicate with them about what pre-processing steps have been performed, if any. + + ::: :::: challenge @@ -973,7 +975,7 @@ e.out <- emptyDrops(counts(sce)) sce <- sce[,which(e.out$FDR <= 0.001)] # Thankfully the data come with gene symbols, which we can use to identify mitochondrial genes: -is.mito = grepl("^MT-", rowData(sce)$Symbol) +is.mito <- grepl("^MT-", rowData(sce)$Symbol) # QC metrics ---- df <- perCellQCMetrics(sce, subsets = list(Mito = is.mito)) diff --git a/hca.md b/hca.md index 34b63e9..7f91560 100644 --- a/hca.md +++ b/hca.md @@ -250,7 +250,7 @@ For the sake of demonstration, we'll focus this small subset of samples: ``` r -sample_subset = metadata |> +sample_subset <- metadata |> filter( ethnicity == "African" & grepl("10x", assay) & diff --git a/intro-sce.md b/intro-sce.md index 6ec043e..a6f524d 100644 --- a/intro-sce.md +++ b/intro-sce.md @@ -318,9 +318,9 @@ The `SingleCellExperiment` constructor function can be used to create a new `Sin ``` r -mat = matrix(runif(30), ncol = 5) +mat <- matrix(runif(30), ncol = 5) -my_sce = SingleCellExperiment(assays = list(logcounts = mat)) +my_sce <- SingleCellExperiment(assays = list(logcounts = mat)) my_sce$my_col_info = runif(5) @@ -359,7 +359,7 @@ sce <- WTChimeraData(samples = 5) sce6 <- WTChimeraData(samples = 6) -combined_sce = cbind(sce, sce6) +combined_sce <- cbind(sce, sce6) combined_sce ``` diff --git a/large_data.md b/large_data.md index 6d0abc1..b59e334 100644 --- a/large_data.md +++ b/large_data.md @@ -438,22 +438,21 @@ table(exact = colLabels(sce), approx = clusters) ``` output approx -exact 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - 1 90 0 0 0 4 0 0 0 1 0 0 0 0 0 0 - 2 0 143 0 0 0 0 0 0 0 0 0 0 0 0 1 - 3 0 0 77 0 0 0 0 0 0 0 0 0 0 0 0 - 4 0 0 0 341 0 0 0 0 0 0 0 0 0 0 0 - 5 0 0 0 0 388 0 0 0 0 1 0 1 0 0 0 - 6 0 0 0 0 0 208 1 0 0 0 1 0 0 0 0 - 7 0 0 0 0 0 1 244 0 0 1 0 0 0 0 0 - 8 0 0 0 0 1 0 0 91 0 0 0 0 0 0 0 - 9 1 0 0 0 1 0 0 0 106 0 0 0 0 0 0 - 10 0 0 0 0 0 0 0 0 0 113 0 0 0 0 0 - 11 0 0 0 0 0 0 0 0 0 0 153 0 0 0 0 - 12 0 0 0 0 2 0 0 0 0 0 0 218 0 0 0 - 13 0 0 0 0 0 0 0 0 0 0 0 0 146 0 0 - 14 0 0 0 0 0 0 0 0 0 0 0 0 0 20 0 - 15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 56 +exact 1 2 3 4 5 6 7 8 9 10 11 12 13 14 + 1 90 0 0 0 0 0 0 0 1 0 0 0 0 0 + 2 0 143 0 1 0 0 0 0 0 0 0 0 0 0 + 3 0 0 77 0 0 0 0 0 0 0 0 0 0 0 + 4 0 0 0 397 0 0 0 0 0 0 0 0 0 0 + 5 0 0 0 0 393 0 0 2 0 0 0 5 0 0 + 6 0 0 0 0 0 204 6 0 0 0 1 0 0 0 + 7 0 0 0 0 0 0 245 0 0 1 0 0 0 0 + 8 0 0 0 0 1 0 0 93 0 0 0 0 0 0 + 9 1 0 0 0 1 0 0 0 106 0 0 0 0 0 + 10 0 0 0 0 0 0 0 0 0 116 2 1 0 0 + 11 0 0 0 0 0 0 0 0 0 2 139 0 6 0 + 12 0 0 0 0 1 0 0 0 0 0 0 210 0 0 + 13 0 0 0 0 0 0 0 0 0 0 0 0 146 0 + 14 0 0 0 0 0 0 0 0 0 0 0 0 0 20 ``` The similarity of the two clusterings can be quantified by calculating the pairwise Rand index: @@ -633,8 +632,8 @@ From there we can visualize the error with a histogram: ``` r -error = reducedDim(r.out, "PCA")[,"PC1"] - - reducedDim(e.out, "PCA")[,"PC1"] +error <- reducedDim(r.out, "PCA")[,"PC1"] - + reducedDim(e.out, "PCA")[,"PC1"] data.frame(approx_error = error) |> ggplot(aes(approx_error)) + @@ -962,47 +961,50 @@ function for writing to HDF5 from the *[HDF5Array](https://bioconductor.org/pack ``` r -wt_out = tempfile(fileext = ".h5") +wt_out <- tempfile(fileext = ".h5") -wt_counts = counts(WTChimeraData()) +wt_counts <- counts(WTChimeraData()) +``` + +``` error +Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'counts': failed to load resource + name: EH2973 + title: WT chimera processed counts (sample 9) + reason: 1 resources failed to download +``` +``` r writeHDF5Array(wt_counts, name = "wt_counts", file = wt_out) ``` -``` output -<29453 x 30703> sparse HDF5Matrix object of type "double": - cell_1 cell_2 cell_3 ... cell_30702 cell_30703 -ENSMUSG00000051951 0 0 0 . 0 0 -ENSMUSG00000089699 0 0 0 . 0 0 -ENSMUSG00000102343 0 0 0 . 0 0 -ENSMUSG00000025900 0 0 0 . 0 0 -ENSMUSG00000025902 0 0 0 . 0 0 - ... . . . . . . -ENSMUSG00000095041 0 1 2 . 0 0 -ENSMUSG00000063897 0 0 0 . 0 0 -ENSMUSG00000096730 0 0 0 . 0 0 -ENSMUSG00000095742 0 0 0 . 0 0 - tomato-td 1 0 1 . 0 0 +``` error +Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'is_sparse': object 'wt_counts' not found ``` ``` r -oom_wt = HDF5Array(wt_out, "wt_counts") +oom_wt <- HDF5Array(wt_out, "wt_counts") +``` +``` error +Error in file_path_as_absolute(path): file '/tmp/Rtmp6Q9gmL/file1e892f562866.h5' does not exist +``` + +``` r object.size(wt_counts) ``` -``` output -1520366960 bytes +``` error +Error in eval(expr, envir, enclos): object 'wt_counts' not found ``` ``` r object.size(oom_wt) ``` -``` output -2488 bytes +``` error +Error in eval(expr, envir, enclos): object 'oom_wt' not found ``` ::::::::::::::::::::::: @@ -1028,7 +1030,7 @@ Use the function `system.time` to obtain the runtime of each job. ``` r -sce.brain = logNormCounts(sce.brain) +sce.brain <- logNormCounts(sce.brain) system.time({i.out <- runPCA(sce.brain, ncomponents = 20, diff --git a/md5sum.txt b/md5sum.txt index 7d518c8..ad41738 100644 --- a/md5sum.txt +++ b/md5sum.txt @@ -4,12 +4,12 @@ "config.yaml" "b0d664d3d6abdd0e98b16282e1c03107" "site/built/config.yaml" "2024-09-24" "index.md" "495939ddd3f110be3bbcd49b60f4a7ce" "site/built/index.md" "2024-09-24" "links.md" "8184cf4149eafbf03ce8da8ff0778c14" "site/built/links.md" "2024-09-24" -"episodes/intro-sce.Rmd" "2e2c4be36a8f7c2d803ca58200ee1e6d" "site/built/intro-sce.md" "2024-09-24" -"episodes/eda_qc.Rmd" "17151682c663ca6f41832a562e5cdc6d" "site/built/eda_qc.md" "2024-09-24" -"episodes/cell_type_annotation.Rmd" "dc23fda097f772bec1b7172277298221" "site/built/cell_type_annotation.md" "2024-09-30" +"episodes/intro-sce.Rmd" "709fc538c9872b9494fa37f1059ea4a0" "site/built/intro-sce.md" "2024-10-02" +"episodes/eda_qc.Rmd" "b4800ddfe2d5deb5047311658f254e6d" "site/built/eda_qc.md" "2024-10-02" +"episodes/cell_type_annotation.Rmd" "5bd585c6e4c6fc09a7443ce4da35899f" "site/built/cell_type_annotation.md" "2024-10-02" "episodes/multi-sample.Rmd" "4711a38fd8b29961424215dd17fb7528" "site/built/multi-sample.md" "2024-09-30" -"episodes/large_data.Rmd" "b9710492c6792ea435778c4e42f27e02" "site/built/large_data.md" "2024-09-24" -"episodes/hca.Rmd" "20f753a47fcae8ed5d0631fbc582f549" "site/built/hca.md" "2024-09-30" +"episodes/large_data.Rmd" "f19fa53e9e63d4cb8fe0f6ab61c8fc3a" "site/built/large_data.md" "2024-10-02" +"episodes/hca.Rmd" "3f2af9dc9e53fd617512a37db87f20a7" "site/built/hca.md" "2024-10-02" "instructors/instructor-notes.md" "205339793f625a1844a768dea8e4a9c8" "site/built/instructor-notes.md" "2024-09-24" "learners/reference.md" "40fc1d0be2412d2d9d434a5bc84e4de8" "site/built/reference.md" "2024-09-24" "learners/setup.md" "25772142a26fe3c0cebbe650f5683269" "site/built/setup.md" "2024-09-24"