markdown source builds

Auto-generated via {sandpaper} Source : ce47918 Branch : main Author : Andrew Ghazi <[email protected]> Time : 2024-09-09 03:05:01 +0000 Message : Merge pull request #27 from ccb-hms/add_exercises Add exercises
carpentries-incubator · Sep 9, 2024 · 6e0421b · 6e0421b
1 parent 976e4c9
commit 6e0421b
Show file tree

Hide file tree

Showing 39 changed files with 64 additions and 33 deletions.
diff --git a/cache/unnamed-chunk-12_a5e616dee08d04f21f12d3c94ad38ffc.RData b/cache/unnamed-chunk-12_a5e616dee08d04f21f12d3c94ad38ffc.RData
diff --git a/cache/unnamed-chunk-12_f33932eecda61abc5403607ab86d8040.RData b/cache/unnamed-chunk-12_f33932eecda61abc5403607ab86d8040.RData
diff --git a/...k-13_6da850b073bf362ae75b35f9051c4104.rdb → ...k-12_f33932eecda61abc5403607ab86d8040.rdb b/...k-13_6da850b073bf362ae75b35f9051c4104.rdb → ...k-12_f33932eecda61abc5403607ab86d8040.rdb
diff --git a/...k-13_6da850b073bf362ae75b35f9051c4104.rdx → ...k-12_f33932eecda61abc5403607ab86d8040.rdx b/...k-13_6da850b073bf362ae75b35f9051c4104.rdx → ...k-12_f33932eecda61abc5403607ab86d8040.rdx
diff --git a/cache/unnamed-chunk-13_6488c1139812e6ce4caf6256e3be49a4.RData b/cache/unnamed-chunk-13_6488c1139812e6ce4caf6256e3be49a4.RData
diff --git a/...k-14_34e672abd1b8e6e536c25eae9907537c.rdb → ...k-13_6488c1139812e6ce4caf6256e3be49a4.rdb b/...k-14_34e672abd1b8e6e536c25eae9907537c.rdb → ...k-13_6488c1139812e6ce4caf6256e3be49a4.rdb
diff --git a/...k-14_34e672abd1b8e6e536c25eae9907537c.rdx → ...k-13_6488c1139812e6ce4caf6256e3be49a4.rdx b/...k-14_34e672abd1b8e6e536c25eae9907537c.rdx → ...k-13_6488c1139812e6ce4caf6256e3be49a4.rdx
diff --git a/cache/unnamed-chunk-13_6da850b073bf362ae75b35f9051c4104.RData b/cache/unnamed-chunk-13_6da850b073bf362ae75b35f9051c4104.RData
diff --git a/cache/unnamed-chunk-14_34e672abd1b8e6e536c25eae9907537c.RData b/cache/unnamed-chunk-14_34e672abd1b8e6e536c25eae9907537c.RData
diff --git a/cache/unnamed-chunk-14_d113332b087de7cf5ad8ab9d2147a003.RData b/cache/unnamed-chunk-14_d113332b087de7cf5ad8ab9d2147a003.RData
diff --git a/...k-12_a5e616dee08d04f21f12d3c94ad38ffc.rdb → ...k-14_d113332b087de7cf5ad8ab9d2147a003.rdb b/...k-12_a5e616dee08d04f21f12d3c94ad38ffc.rdb → ...k-14_d113332b087de7cf5ad8ab9d2147a003.rdb
diff --git a/...k-12_a5e616dee08d04f21f12d3c94ad38ffc.rdx → ...k-14_d113332b087de7cf5ad8ab9d2147a003.rdx b/...k-12_a5e616dee08d04f21f12d3c94ad38ffc.rdx → ...k-14_d113332b087de7cf5ad8ab9d2147a003.rdx
diff --git a/cache/unnamed-chunk-15_98ce553190603201eaebedf5c122745b.RData b/cache/unnamed-chunk-15_98ce553190603201eaebedf5c122745b.RData
diff --git a/cache/unnamed-chunk-15_d2d665658821106f8c0bd9438def01f0.RData b/cache/unnamed-chunk-15_d2d665658821106f8c0bd9438def01f0.RData
diff --git a/...k-16_ff85c572ec1739f9049843e423c6aa1f.rdb → ...k-15_d2d665658821106f8c0bd9438def01f0.rdb b/...k-16_ff85c572ec1739f9049843e423c6aa1f.rdb → ...k-15_d2d665658821106f8c0bd9438def01f0.rdb
diff --git a/...k-16_ff85c572ec1739f9049843e423c6aa1f.rdx → ...k-15_d2d665658821106f8c0bd9438def01f0.rdx b/...k-16_ff85c572ec1739f9049843e423c6aa1f.rdx → ...k-15_d2d665658821106f8c0bd9438def01f0.rdx
diff --git a/...17_4b7cfdfb37db6c0bbc316dca1014516c.RData → ...16_c738c7b1db1a92b92244602dcc38be24.RData b/...17_4b7cfdfb37db6c0bbc316dca1014516c.RData → ...16_c738c7b1db1a92b92244602dcc38be24.RData
diff --git a/...k-15_98ce553190603201eaebedf5c122745b.rdb → ...k-16_c738c7b1db1a92b92244602dcc38be24.rdb b/...k-15_98ce553190603201eaebedf5c122745b.rdb → ...k-16_c738c7b1db1a92b92244602dcc38be24.rdb
diff --git a/...k-15_98ce553190603201eaebedf5c122745b.rdx → ...k-16_c738c7b1db1a92b92244602dcc38be24.rdx b/...k-15_98ce553190603201eaebedf5c122745b.rdx → ...k-16_c738c7b1db1a92b92244602dcc38be24.rdx
diff --git a/cache/unnamed-chunk-16_ff85c572ec1739f9049843e423c6aa1f.RData b/cache/unnamed-chunk-16_ff85c572ec1739f9049843e423c6aa1f.RData
diff --git a/...18_f495c4ecb065091935596fe793ef7faf.RData → ...17_f1bff20b028f4668b0caea3b6c78005c.RData b/...18_f495c4ecb065091935596fe793ef7faf.RData → ...17_f1bff20b028f4668b0caea3b6c78005c.RData
diff --git a/...k-17_4b7cfdfb37db6c0bbc316dca1014516c.rdb → ...k-17_f1bff20b028f4668b0caea3b6c78005c.rdb b/...k-17_4b7cfdfb37db6c0bbc316dca1014516c.rdb → ...k-17_f1bff20b028f4668b0caea3b6c78005c.rdb
diff --git a/...k-17_4b7cfdfb37db6c0bbc316dca1014516c.rdx → ...k-17_f1bff20b028f4668b0caea3b6c78005c.rdx b/...k-17_4b7cfdfb37db6c0bbc316dca1014516c.rdx → ...k-17_f1bff20b028f4668b0caea3b6c78005c.rdx
diff --git a/...19_824a3a515c391049151ecca054c7dd10.RData → ...18_336b2662515cabcd0960a12b8649cc3a.RData b/...19_824a3a515c391049151ecca054c7dd10.RData → ...18_336b2662515cabcd0960a12b8649cc3a.RData
diff --git a/...k-18_f495c4ecb065091935596fe793ef7faf.rdb → ...k-18_336b2662515cabcd0960a12b8649cc3a.rdb b/...k-18_f495c4ecb065091935596fe793ef7faf.rdb → ...k-18_336b2662515cabcd0960a12b8649cc3a.rdb
diff --git a/...k-18_f495c4ecb065091935596fe793ef7faf.rdx → ...k-18_336b2662515cabcd0960a12b8649cc3a.rdx b/...k-18_f495c4ecb065091935596fe793ef7faf.rdx → ...k-18_336b2662515cabcd0960a12b8649cc3a.rdx
diff --git a/...20_0a7523b916b4425a4c7a589e509759e7.RData → ...19_7c3ec703dee5d30ad3a69d7268a9ea30.RData b/...20_0a7523b916b4425a4c7a589e509759e7.RData → ...19_7c3ec703dee5d30ad3a69d7268a9ea30.RData
diff --git a/...k-19_824a3a515c391049151ecca054c7dd10.rdb → ...k-19_7c3ec703dee5d30ad3a69d7268a9ea30.rdb b/...k-19_824a3a515c391049151ecca054c7dd10.rdb → ...k-19_7c3ec703dee5d30ad3a69d7268a9ea30.rdb
diff --git a/...k-19_824a3a515c391049151ecca054c7dd10.rdx → ...k-19_7c3ec703dee5d30ad3a69d7268a9ea30.rdx b/...k-19_824a3a515c391049151ecca054c7dd10.rdx → ...k-19_7c3ec703dee5d30ad3a69d7268a9ea30.rdx
diff --git a/cache/unnamed-chunk-20_3f31c4868497cd575b1fea7a7ec7caca.RData b/cache/unnamed-chunk-20_3f31c4868497cd575b1fea7a7ec7caca.RData
diff --git a/...k-20_0a7523b916b4425a4c7a589e509759e7.rdb → ...k-20_3f31c4868497cd575b1fea7a7ec7caca.rdb b/...k-20_0a7523b916b4425a4c7a589e509759e7.rdb → ...k-20_3f31c4868497cd575b1fea7a7ec7caca.rdb
diff --git a/...k-20_0a7523b916b4425a4c7a589e509759e7.rdx → ...k-20_3f31c4868497cd575b1fea7a7ec7caca.rdx b/...k-20_0a7523b916b4425a4c7a589e509759e7.rdx → ...k-20_3f31c4868497cd575b1fea7a7ec7caca.rdx
diff --git a/cache/unnamed-chunk-21_8ad7b5653727d8b378d7320fd23d921e.RData b/cache/unnamed-chunk-21_8ad7b5653727d8b378d7320fd23d921e.RData
diff --git a/cache/unnamed-chunk-21_e3e49bf8cb680c8be603eec8724ca55b.RData b/cache/unnamed-chunk-21_e3e49bf8cb680c8be603eec8724ca55b.RData
diff --git a/...k-21_8ad7b5653727d8b378d7320fd23d921e.rdb → ...k-21_e3e49bf8cb680c8be603eec8724ca55b.rdb b/...k-21_8ad7b5653727d8b378d7320fd23d921e.rdb → ...k-21_e3e49bf8cb680c8be603eec8724ca55b.rdb
diff --git a/...k-21_8ad7b5653727d8b378d7320fd23d921e.rdx → ...k-21_e3e49bf8cb680c8be603eec8724ca55b.rdx b/...k-21_8ad7b5653727d8b378d7320fd23d921e.rdx → ...k-21_e3e49bf8cb680c8be603eec8724ca55b.rdx
diff --git a/hca.md b/hca.md
@@ -239,22 +239,6 @@ metadata |>
 12 sci-RNA-seq                        1
 ```
 
-## Available organisms
-
-
-``` r
-metadata |>
-    distinct(organism, dataset_id) |>
-    count(organism)
-```
-
-``` output
-# A tibble: 1 × 2
-  organism         n
-  <chr>        <int>
-1 Homo sapiens    63
-```
-
 ### Download single-cell RNA sequencing counts 
 
 The data can be provided as either "counts" or counts per million "cpm" as given

diff --git a/large_data.md b/large_data.md
@@ -64,7 +64,9 @@ set, as provided by the
 
 ``` r
 library(TENxBrainData)
+
 sce.brain <- TENxBrainData20k() 
+
 sce.brain
 ```
 
@@ -150,7 +152,9 @@ new file at every operation, which would unnecessarily require time-consuming di
 
 ``` r
 tmp <- counts(sce.brain)
+
 tmp <- log2(tmp + 1)
+
 tmp
 ```
 
@@ -183,8 +187,11 @@ function that we used in the other workflows.
 
 ``` r
 library(scater)
+
 is.mito <- grepl("^mt-", rowData(sce.brain)$Symbol)
+
 qcstats <- perCellQCMetrics(sce.brain, subsets = list(Mt = is.mito))
+
 qcstats
 ```
 
@@ -253,9 +260,10 @@ by indicating the `BPPARAM` argument in `bplapply`.
 
 ``` r
 param <- MulticoreParam(workers = 1)
+
 bplapply(
     X = c(4, 9, 16, 25),
-    FUN = function(x) { sqrt(x) },
+    FUN = sqrt,
     BPPARAM = param
 )
 ```
@@ -286,10 +294,15 @@ calculations on a Unix system:
 
 ``` r
 library(MouseGastrulationData)
+
 library(scran)
+
 sce <- WTChimeraData(samples = 5, type = "processed")
+
 sce <- logNormCounts(sce)
+
 dec.mc <- modelGeneVar(sce, BPPARAM = MulticoreParam(2))
+
 dec.mc
 ```
 
@@ -342,6 +355,7 @@ details).
 ``` r
 # 2 hours, 8 GB, 1 CPU per task, for 10 tasks.
 rs <- list(walltime = 7200, memory = 8000, ncpus = 1)
+
 bpp <- BatchtoolsParam(10, cluster = "slurm", resources = rs)
 ```
 
@@ -393,7 +407,9 @@ graph-based clustering using the Louvain algorithm for community detection:
 
 ``` r
 library(bluster)
+
 sce <- runPCA(sce)
+
 colLabels(sce) <- clusterCells(sce, use.dimred = "PCA",
                                BLUSPARAM = NNGraphParam(cluster.fun = "louvain"))
 ```
@@ -410,37 +426,41 @@ approximation can be largely ignored.
 
 ``` r
 library(scran)
+
 library(BiocNeighbors)
+
 clusters <- clusterCells(sce, use.dimred = "PCA",
                          BLUSPARAM = NNGraphParam(cluster.fun = "louvain",
                                                   BNPARAM = AnnoyParam()))
+
 table(exact = colLabels(sce), approx = clusters)
 ```
 
 ``` output
      approx
 exact   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
    1   90   0   0   0   0   0   0   0   1   0   0   0   0   0   0
-   2    0 143   0   1   0   0   0   0   0   0   0   0   0   0   0
+   2    0 143   0   0   0   0   0   0   0   0   0   0   0   0   1
    3    0   0  75   0   0   0   0   0   0   0   0   0   0   0   0
-   4    0   0   0 253   0   0   0   0   0   0   0   0 144   0   0
-   5    0   0   2   0 391   1   0   0   0   1   0   3   0   0   0
-   6    0   0   0   0   0 206  51   0   0   0   1   0   0   0   0
-   7    0   0   0   0   0   3 194   0   0   1   0   0   0   0   0
-   8    0   0   0   0   2   0   0  91   0   0   0   2   0   0   0
+   4    0   0   0 342   0   0   0   0   0   0   0   0   0   0  55
+   5    0   0   0   0  74   0   0   0   0   0   0 198   0   0   0
+   6    0   0   0   0   0 210   0   0   0   0   0   0   0   0   0
+   7    0   0   0   0   0   0 245   0   0   1   0   0   0   0   0
+   8    0   0   0   0   1   0   0  95   0   0   0   0   0   0   0
    9    1   0   0   0   1   0   0   0 106   0   0   0   0   0   0
-   10   0   0   0   0   0   0   0   0   0 113   8   0   0   0   0
-   11   0   0   0   0   0   0   0   0   0   0 144   0   0   0   0
-   12   0   0   0   0   2   0   0   0   0  15   0 199   0   0   0
-   13   0   0   0   0   0   0   0   0   0   0   0   0   0 146   0
-   14   0   0   0   0   0   0   0   0   0   0   0   0   0   0  20
+   10   0   0   0   0   0   0   0   0   0 113   0  16   0   0   0
+   11   0   0   0   0   0   0   0   0   0   0 153   0   0   0   0
+   12   0   0   2   0 321   0   0   0   0   1   0   0   0   0   0
+   13   0   0   0   0   0   0   0   0   0   0   0   0 146   0   0
+   14   0   0   0   0   0   0   0   0   0   0   0   0   0  20   0
 ```
 
 The similarity of the two clusterings can be quantified by calculating the pairwise Rand index: 
 
 
 ``` r
 rand <- pairwiseRand(colLabels(sce), clusters, mode = "index")
+
 stopifnot(rand > 0.8)
 ```
 
@@ -455,11 +475,17 @@ the biological conclusions.
 
 ``` r
 set.seed(1000)
+
 y1 <- matrix(rnorm(50000), nrow = 1000)
+
 y2 <- matrix(rnorm(50000), nrow = 1000)
+
 Y <- rbind(y1, y2)
+
 exact <- findKNN(Y, k = 20)
+
 approx <- findKNN(Y, k = 20, BNPARAM = AnnoyParam())
+
 mean(exact$index != approx$index)
 ```
 
@@ -487,7 +513,9 @@ library(BiocSingular)
 
 # As the name suggests, it is random, so we need to set the seed.
 set.seed(101000)
+
 r.out <- runPCA(sce, ncomponents = 20, BSPARAM = RandomParam())
+
 str(reducedDim(r.out, "PCA"))
 ```
 
@@ -506,7 +534,9 @@ str(reducedDim(r.out, "PCA"))
 
 ``` r
 set.seed(101001)
+
 i.out <- runPCA(sce, ncomponents = 20, BSPARAM = IrlbaParam())
+
 str(reducedDim(i.out, "PCA"))
 ```
 
@@ -546,7 +576,9 @@ This code block calculates the exact PCA coordinates. Another thing to note: PC
 
 ``` r
 set.seed(123)
+
 e.out <- runPCA(sce, ncomponents = 20, BSPARAM = ExactParam())
+
 str(reducedDim(e.out, "PCA"))
 ```
 
@@ -661,6 +693,7 @@ We then proceed by loading all required packages and installing the PBMC dataset
 
 ``` r
 library(SeuratData)
+
 InstallData("pbmc3k")
 ```
 
@@ -671,9 +704,13 @@ We then load the dataset as an `SeuratObject` and convert it to a
 ``` r
 # Use PBMC3K from SeuratData
 pbmc <- LoadData(ds = "pbmc3k", type = "pbmc3k.final")
+
 pbmc <- UpdateSeuratObject(pbmc)
+
 pbmc
+
 pbmc.sce <- as.SingleCellExperiment(pbmc)
+
 pbmc.sce
 ```
 
@@ -683,8 +720,11 @@ we demonstrate this here on the wild-type chimera mouse gastrulation dataset.
 
 ``` r
 sce <- WTChimeraData(samples = 5, type = "processed")
+
 assay(sce) <- as.matrix(assay(sce))
+
 sce <- logNormCounts(sce)
+
 sce
 ```
 
@@ -694,7 +734,9 @@ the `as.Seurat` function.
 
 ``` r
 sobj <- as.Seurat(sce)
+
 Idents(sobj) <- "celltype.mapped"
+
 sobj
 ```
 
@@ -734,6 +776,7 @@ package.
 ``` r
 example_h5ad <- system.file("extdata", "krumsiek11.h5ad",
                             package = "zellkonverter")
+
 readH5AD(example_h5ad)
 ```
 
@@ -758,6 +801,7 @@ chimera mouse gastrulation dataset.
 
 ``` r
 out.file <- tempfile(fileext = ".h5ad")
+
 writeH5AD(sce, file = out.file)
 ```
 
@@ -985,15 +1029,18 @@ Use the function `system.time` to obtain the runtime of each job.
 ``` r
 sce.brain = logNormCounts(sce.brain)
 
-system.time({i.out <- runPCA(sce.brain, ncomponents = 20, 
+system.time({i.out <- runPCA(sce.brain, 
+                             ncomponents = 20, 
                              BSPARAM = ExactParam(),
                              BPPARAM = SerialParam())})
 
-system.time({i.out <- runPCA(sce.brain, ncomponents = 20, 
+system.time({i.out <- runPCA(sce.brain, 
+                             ncomponents = 20, 
                              BSPARAM = ExactParam(),
                              BPPARAM = MulticoreParam(workers = 2))})
 
-system.time({i.out <- runPCA(sce.brain, ncomponents = 20, 
+system.time({i.out <- runPCA(sce.brain, 
+                             ncomponents = 20, 
                              BSPARAM = ExactParam(),
                              BPPARAM = MulticoreParam(workers = 3))})
 ```

diff --git a/md5sum.txt b/md5sum.txt
@@ -8,8 +8,8 @@
 "episodes/eda_qc.Rmd" "1e88f395d30778f4526532deea43eb03" "site/built/eda_qc.md" "2024-09-06"
 "episodes/cell_type_annotation.Rmd" "66af56b730aaa88e937bc1743afb471a" "site/built/cell_type_annotation.md" "2024-09-08"
 "episodes/multi-sample.Rmd" "2d38d9903358ea8a8067abd82a1f1f54" "site/built/multi-sample.md" "2024-09-08"
-"episodes/large_data.Rmd" "bbe443f474a0823122658effa2beb57e" "site/built/large_data.md" "2024-09-06"
-"episodes/hca.Rmd" "6db220495ae4ae56d33e4ca5b5f9b8ae" "site/built/hca.md" "2024-09-06"
+"episodes/large_data.Rmd" "b9710492c6792ea435778c4e42f27e02" "site/built/large_data.md" "2024-09-09"
+"episodes/hca.Rmd" "e01d3fd1e07f158bed08b72d657ae1d1" "site/built/hca.md" "2024-09-09"
 "instructors/instructor-notes.md" "cae72b6712578d74a49fea7513099f8c" "site/built/instructor-notes.md" "2024-09-06"
 "learners/reference.md" "40fc1d0be2412d2d9d434a5bc84e4de8" "site/built/reference.md" "2024-09-06"
 "learners/setup.md" "25772142a26fe3c0cebbe650f5683269" "site/built/setup.md" "2024-09-06"