From f667320225cce98775c757d489149b8cfb618afd Mon Sep 17 00:00:00 2001
From: csmagnano <chrismagnano@gmail.com>
Date: Fri, 3 May 2024 09:40:06 -0400
Subject: [PATCH] Updated further reading and heading levels in all lessons

---
 episodes/cell_type_annotation.Rmd | 33 +++++++++++++-----------
 episodes/hca.Rmd                  | 42 +++++++++++++++----------------
 episodes/intro-sce.Rmd            | 24 +++++++++---------
 episodes/large_data.Rmd           | 38 +++++++++++++++-------------
 episodes/multi-sample.Rmd         | 34 +++++++++++++------------
 5 files changed, 91 insertions(+), 80 deletions(-)

diff --git a/episodes/cell_type_annotation.Rmd b/episodes/cell_type_annotation.Rmd
index 538e684..7be6dc3 100644
--- a/episodes/cell_type_annotation.Rmd
+++ b/episodes/cell_type_annotation.Rmd
@@ -23,7 +23,7 @@ exercises: 15 # Minutes of exercises in the lesson
 ::::::::::::::::::::::::::::::::::::::::::::::::
 
 
-# Setup
+## Setup
 
 ```{r setup, message = FALSE}
 library(BiocStyle)
@@ -35,7 +35,7 @@ library(scater)
 library(scran)
 ```
 
-# Data retrieval
+## Data retrieval
 
 ```{r data, message = FALSE}
 sce <- WTChimeraData(samples = 5, type = "processed")
@@ -50,14 +50,14 @@ ind <- sample(ncol(sce), 1000)
 sce <- sce[,ind]
 ```
 
-# Preprocessing
+## Preprocessing
 
 ```{r preproc, warning = FALSE}
 sce <- logNormCounts(sce)
 sce <- runPCA(sce)
 ```
 
-# Clustering
+## Clustering
 
 Clustering is an unsupervised learning procedure that is used to empirically 
 define groups of cells with similar expression profiles. 
@@ -104,7 +104,7 @@ sce <- runUMAP(sce, dimred = "PCA")
 plotReducedDim(sce, "UMAP", color_by = "label")
 ```
 
-# Marker gene detection
+## Marker gene detection
 
 To interpret clustering results as obtained in the previous section, we identify
 the genes that drive separation between clusters. These marker genes allow us to 
@@ -156,7 +156,7 @@ top.markers <- head(rownames(markers[[1]]))
 plotExpression(sce, features = top.markers, x = "label", color_by = "label")
 ```
 
-# Cell type annotation
+## Cell type annotation
 
 The most challenging task in scRNA-seq data analysis is arguably the
 interpretation of the results.
@@ -182,7 +182,7 @@ reference datasets where each sample or cell has already been annotated with its
 putative biological state by domain experts.
 Here, we will demonstrate both approaches on the wild-type chimera dataset.
 
-## Assigning cell labels from reference data
+### Assigning cell labels from reference data
 
 A conceptually straightforward annotation approach is to compare the single-cell
 expression profiles with previously annotated reference datasets.
@@ -303,7 +303,7 @@ tab <- table(res$pruned.labels, sce$celltype.mapped)
 pheatmap(log2(tab + 10), color = colorRampPalette(c("white", "blue"))(101))
 ```
 
-## Assigning cell labels from gene sets
+### Assigning cell labels from gene sets
 
 A related strategy is to explicitly identify sets of marker genes that are highly
 expressed in each individual cell.
@@ -397,19 +397,15 @@ a fitted three-component mixture, and the grey curve represents a fitted normal
 distribution. Vertical lines represent threshold estimates corresponding to each
 estimate of the distribution. 
 
-# Session Info
+## Session Info
 
 ```{r sessionInfo}
 sessionInfo()
 ```
 
-# Further Reading
 
-* OSCA book, [Chapters 5-7](https://bioconductor.org/books/release/OSCA.basic/clustering.html)
-* Assigning cell types with SingleR ([the book](https://bioconductor.org/books/release/SingleRBook/)).
-* The [AUCell](https://bioconductor.org/packages/AUCell) package vignette.
 
-# Exercises
+## Exercises
 
 :::::::::::::::::::::::::::::::::: challenge
 
@@ -484,6 +480,15 @@ TODO
 
 :::::::::::::::::::::::::::::::::::::::::::::
 
+:::::::::::::: checklist
+## Further Reading
+
+* OSCA book, [Chapters 5-7](https://bioconductor.org/books/release/OSCA.basic/clustering.html)
+* Assigning cell types with SingleR ([the book](https://bioconductor.org/books/release/SingleRBook/)).
+* The [AUCell](https://bioconductor.org/packages/AUCell) package vignette.
+
+::::::::::::::
+
 ::::::::::::::::::::::::::::::::::::: keypoints 
 
 - TODO
diff --git a/episodes/hca.Rmd b/episodes/hca.Rmd
index 65473e5..3d621be 100644
--- a/episodes/hca.Rmd
+++ b/episodes/hca.Rmd
@@ -18,7 +18,7 @@ exercises: 10 # Minutes of exercises in the lesson
 
 ::::::::::::::::::::::::::::::::::::::::::::::::
 
-# HCA Project
+## HCA Project
 
 The Human Cell Atlas (HCA) is a large project that aims to learn from and map
 every cell type in the human body. The project extracts spatial and molecular
@@ -27,7 +27,7 @@ international collaborative that charts healthy cells in the human body at all
 ages. There are about 37.2 trillion cells in the human body. To read more about
 the project, head over to their website at https://www.humancellatlas.org.
 
-# CELLxGENE
+## CELLxGENE
 
 CELLxGENE is a database and a suite of tools that help scientists to find,
 download, explore, analyze, annotate, and publish single cell data. It includes
@@ -35,7 +35,7 @@ several analytic and visualization tools to help you to discover single cell
 data patterns. To see the list of tools, browse to
 https://cellxgene.cziscience.com/.
 
-# CELLxGENE | Census
+## CELLxGENE | Census
 
 The Census provides efficient computational tooling to access, query, and
 analyze all single-cell RNA data from CZ CELLxGENE Discover. Using a new access
@@ -44,7 +44,7 @@ through TileDB-SOMA, or get slices in AnnData or Seurat objects, thus
 accelerating your research by significantly minimizing data harmonization at
 https://chanzuckerberg.github.io/cellxgene-census/.
 
-# The CuratedAtlasQueryR Project
+## The CuratedAtlasQueryR Project
 
 To systematically characterize the immune system across tissues, demographics
 and multiple studies, single cell transcriptomics data was harmonized from the
@@ -71,7 +71,7 @@ accessing atlas-level datasets programmatically and reproducibly.
 
 ![](figures/curatedAtlasQuery.png)
 
-# Data Sources in R / Bioconductor
+## Data Sources in R / Bioconductor
 
 There are a few options to access single cell data with R / Bioconductor.
 
@@ -81,7 +81,7 @@ There are a few options to access single cell data with R / Bioconductor.
 | [cellxgenedp](https://bioconductor.org/packages/cellxgenedp) | [CellxGene](https://cellxgene.cziscience.com/) | Human and mouse SC data including HCA |
 | [CuratedAtlasQueryR](https://stemangiola.github.io/CuratedAtlasQueryR/) | [CellxGene](https://cellxgene.cziscience.com/) | fine-grained query capable CELLxGENE data including HCA |
 
-# Installation
+## Installation
 
 ```{r, eval=FALSE}
 if (!requireNamespace("BiocManager", quietly = TRUE))
@@ -90,14 +90,14 @@ if (!requireNamespace("BiocManager", quietly = TRUE))
 BiocManager::install("CuratedAtlasQueryR")
 ```
 
-# Package load 
+## Package load 
 
 ```{r, include = TRUE, results = "hide", message = FALSE, warning = FALSE}
 library(CuratedAtlasQueryR)
 library(dplyr)
 ```
 
-# HCA Metadata
+## HCA Metadata
 
 The metadata allows the user to get a lay of the land of what is available
 via the package. In this example, we are using the sample database URL which
@@ -115,7 +115,7 @@ metadata |>
   glimpse()
 ```
 
-# A note on the piping operator
+## A note on the piping operator
 
 The vignette materials provided by `CuratedAtlasQueryR` show the use of the
 'native' R pipe (implemented after R version `4.1.0`). For those not familiar
@@ -136,7 +136,7 @@ iris |>
   aggregate(. ~ Species, data = _, mean)
 ```
 
-# Summarizing the metadata
+## Summarizing the metadata
 
 For each distinct tissue and dataset combination, count the number of datasets
 by tissue type. 
@@ -147,13 +147,13 @@ metadata |>
   count(tissue)
 ```
 
-# Columns available in the metadata
+## Columns available in the metadata
 
 ```{r, message = FALSE}
 head(names(metadata), 10)
 ```
 
-# Available assays
+## Available assays
 
 ```{r}
 metadata |>
@@ -161,7 +161,7 @@ metadata |>
     count(assay)
 ```
 
-# Available organisms
+## Available organisms
 
 ```{r}
 metadata |>
@@ -169,14 +169,14 @@ metadata |>
     count(organism)
 ```
 
-## Download single-cell RNA sequencing counts 
+### Download single-cell RNA sequencing counts 
 
 The data can be provided as either "counts" or counts per million "cpm" as given
 by the `assays` argument in the `get_single_cell_experiment()` function. By
 default, the `SingleCellExperiment` provided will contain only the 'counts'
 data.
 
-### Query raw counts
+#### Query raw counts
 
 ```{r, message = FALSE}
 single_cell_counts <- 
@@ -192,7 +192,7 @@ single_cell_counts <-
 single_cell_counts
 ```
 
-### Query counts scaled per million
+#### Query counts scaled per million
 
 This is helpful if just few genes are of interest, as they can be compared
 across samples.
@@ -208,7 +208,7 @@ metadata |>
   get_single_cell_experiment(assays = "cpm")
 ```
 
-### Extract only a subset of genes
+#### Extract only a subset of genes
 
 ```{r, message = FALSE}
 single_cell_counts <-
@@ -224,7 +224,7 @@ single_cell_counts <-
 single_cell_counts
 ```
 
-### Extracting counts as a Seurat object
+#### Extracting counts as a Seurat object
 
 If needed, the H5 `SingleCellExperiment` can be converted into a Seurat object.
 Note that it may take a long time and use a lot of memory depending on how many
@@ -244,9 +244,9 @@ single_cell_counts <-
 single_cell_counts
 ```
 
-## Save your `SingleCellExperiment`
+### Save your `SingleCellExperiment`
 
-### Saving as HDF5 
+#### Saving as HDF5 
 
 The recommended way of saving these `SingleCellExperiment` objects, if
 necessary, is to use `saveHDF5SummarizedExperiment` from the `HDF5Array`
@@ -256,7 +256,7 @@ package.
 single_cell_counts |> saveHDF5SummarizedExperiment("single_cell_counts")
 ```
 
-# Exercises
+## Exercises
 
 :::::::::::::::::::::::::::::::::: challenge
 
diff --git a/episodes/intro-sce.Rmd b/episodes/intro-sce.Rmd
index 5f8587a..bf756cd 100644
--- a/episodes/intro-sce.Rmd
+++ b/episodes/intro-sce.Rmd
@@ -20,7 +20,7 @@ exercises: 10 # Minutes of exercises in the lesson
 
 ::::::::::::::::::::::::::::::::::::::::::::::::
 
-# Setup
+## Setup
 
 ```{r setup, message = FALSE, warning=FALSE}
 library(SummarizedExperiment)
@@ -29,9 +29,9 @@ library(MouseGastrulationData)
 library(BiocStyle)
 ```
 
-# Bioconductor
+## Bioconductor
 
-## Overview 
+### Overview 
 
 Within the R ecosystem, the Bioconductor project provides tools for the analysis and comprehension of high-throughput genomics data.
 The scope of the project covers microarray data, various forms of sequencing (RNA-seq, ChIP-seq, bisulfite, genotyping, etc.), proteomics, flow cytometry and more.
@@ -39,7 +39,7 @@ One of Bioconductor's main selling points is the use of common data structures t
 allowing code written by different people (from different organizations, in different countries) to work together seamlessly in complex analyses. 
 By extending R to genomics, Bioconductor serves as a powerful addition to the computational biologist's toolkit.
 
-## Installing Bioconductor Packages
+### Installing Bioconductor Packages
 
 The default repository for R packages is the [Comprehensive R Archive Network](https://cran.r-project.org/mirrors.html) (CRAN), which is home to over 13,000 different R packages. 
 We can easily install packages from CRAN - say, the popular `r CRANpkg("ggplot2")` package for data visualization - by opening up R and typing in:
@@ -78,7 +78,7 @@ BiocManager::install("scater")
 Packages only need to be installed once, and then they are available for all subsequent uses of a particular R installation.
 There is no need to repeat the installation every time we start R.
 
-## Finding relevant packages
+### Finding relevant packages
 
 To find relevant Bioconductor packages, one useful resource is the [BiocViews](https://bioconductor.org/packages/release/BiocViews.html) page.
 This provides a hierarchically organized view of annotations associated with each Bioconductor package.
@@ -87,7 +87,7 @@ This gives us a listing of all Bioconductor packages that might be useful for ou
 CRAN uses the similar concept of ["Task views"](https://cran.r-project.org/web/views/), though this is understandably more general than genomics.
 For example, the [Cluster task view page](https://cran.r-project.org/web/views/Cluster.html) lists an assortment of packages that are relevant to cluster analyses.
 
-## Staying up to date
+### Staying up to date
 
 Updating all R/Bioconductor packages is as simple as running `BiocManager::install()` without any arguments.
 This will check for more recent versions of each package (within a Bioconductor release) and prompt the user to update if any are available.
@@ -96,7 +96,7 @@ This will check for more recent versions of each package (within a Bioconductor
 BiocManager::install()
 ```
 
-# The `SingleCellExperiment` class
+## The `SingleCellExperiment` class
 
 One of the main strengths of the Bioconductor project lies in the use of a common data infrastructure that powers interoperability across packages. 
 
@@ -110,7 +110,7 @@ knitr::include_graphics("http://bioconductor.org/books/3.17/OSCA.intro/images/Si
 
 Let's start with an example dataset.
 
-```{r, message = FALSE}
+```{r, message = FALSE, warning=FALSE}
 sce <- WTChimeraData(samples=5)
 sce
 ```
@@ -121,7 +121,7 @@ The _getter_ methods are used to extract information from the slots and the _set
 
 Depending on the object, slots can contain different types of data (e.g., numeric matrices, lists, etc.). We will here review the main slots of the SingleCellExperiment class as well as their getter/setter methods.
 
-## The `assays`
+### The `assays`
 
 This is arguably the most fundamental part of the object that contains the count matrix, and potentially other matrices with transformed data. We can access the _list_ of matrices with the `assays` function and individual matrices with the `assay` function. If one of these matrices is called "counts", we can use the special `counts` getter (and the analogous `logcounts`).
 
@@ -132,7 +132,7 @@ counts(sce)[1:3, 1:3]
 
 You will notice that in this case we have a sparse matrix of class "dgTMatrix" inside the object. More generally, any "matrix-like" object can be used, e.g., dense matrices or HDF5-backed matrices (see "Working with large data").
 
-## The `colData` and `rowData`
+### The `colData` and `rowData`
 
 Conceptually, these are two data frames that annotate the columns and the rows of your assay, respectively.
 
@@ -151,7 +151,7 @@ sce$my_sum <- colSums(counts(sce))
 colData(sce)
 ```
 
-## The `reducedDims`
+### The `reducedDims`
 
 Everything that we have described so far (except for the `counts` getter) is part of the `SummarizedExperiment` class that SingleCellExperiment extends. You can find a complete lesson on the `SummarizedExperiment` class [here](https://carpentries-incubator.github.io/bioc-intro/60-next-steps.html).
 
@@ -196,7 +196,7 @@ Combining two objects: The `MouseGastrulationData` package contains several data
 
 :::::::::::::: checklist
 
-# Further Reading
+## Further Reading
 
 * OSCA book, [Introduction](https://bioconductor.org/books/release/OSCA.intro)
 
diff --git a/episodes/large_data.Rmd b/episodes/large_data.Rmd
index 8311c02..1e973b6 100644
--- a/episodes/large_data.Rmd
+++ b/episodes/large_data.Rmd
@@ -25,7 +25,7 @@ exercises: 2 # Minutes of exercises in the lesson
 library(BiocStyle)
 ```
 
-# Motivation 
+## Motivation 
 
 Advances in scRNA-seq technologies have increased the number of cells that can 
 be assayed in routine experiments.
@@ -39,7 +39,7 @@ increasing size of scRNA-seq data sets.
 This section discusses how we can use various aspects of the Bioconductor 
 ecosystem to tune our analysis pipelines for greater speed and efficiency.
 
-# Out of memory representations
+## Out of memory representations
 
 The count matrix is the central structure around which our analyses are based.
 In most of the previous chapters, this has been held fully in memory as a dense 
@@ -126,7 +126,7 @@ in-memory representations on HPC systems with plentiful memory, and then
 distributing file-backed counterparts to individual users for exploration and 
 visualization on their personal machines.
 
-# Parallelization
+## Parallelization
 
 Parallelization of calculations across genes or cells is an obvious strategy for
 speeding up scRNA-seq analysis workflows.
@@ -136,7 +136,7 @@ computing throughout the Bioconductor ecosystem, manifesting as a `BPPARAM`
 argument in compatible functions. We can also use `BiocParallel` with more
 expressive functions directly through the package's interface.
 
-### Basic use
+#### Basic use
 
 ```{r,include=TRUE,results="hide",message=FALSE,warning=FALSE}
 library(BiocParallel)
@@ -206,9 +206,9 @@ parallelization backends involve (i) setting up one or more separate R sessions,
 session. Depending on the nature and size of the task, this overhead may
 outweigh any benefit from parallel computing.
 
-# Fast approximations
+## Fast approximations
 
-## Nearest neighbor searching
+### Nearest neighbor searching
 
 Identification of neighbouring cells in PC or expression space is a common procedure
 that is used in many functions, e.g., `buildSNNGraph()`, `doubletCells()`.
@@ -273,7 +273,7 @@ approx <- findKNN(Y, k = 20, BNPARAM = AnnoyParam())
 mean(exact$index != approx$index)
 ```
 
-## Singular value decomposition 
+### Singular value decomposition 
 
 The singular value decomposition (SVD) underlies the PCA used throughout our
 analyses, e.g., in `denoisePCA()`, `fastMNN()`, `doubletCells()`.
@@ -313,9 +313,9 @@ of power iterations (`q=`).
 We tend to prefer IRLBA as its default behavior is more accurate, though RSVD is
 much faster for file-backed matrices.
 
-# Interoperability with popular single-cell analysis ecosytems
+## Interoperability with popular single-cell analysis ecosytems
 
-## Seurat
+### Seurat
 
 [Seurat](https://satijalab.org/seurat) is an R package designed for QC, analysis,
 and exploration of single-cell RNA-seq data. Seurat can be used to identify and
@@ -390,7 +390,7 @@ Idents(sobj) <- "celltype.mapped"
 sobj
 ```
 
-## Scanpy
+### Scanpy
 
 [Scanpy](https://scanpy.readthedocs.io) is a scalable toolkit for analyzing
 single-cell gene expression data built jointly with
@@ -440,18 +440,14 @@ The resulting H5AD file can then be read into Python using scanpy's
 [read_h5ad](https://scanpy.readthedocs.io/en/stable/generated/scanpy.read_h5ad.html)
 function and then directly used in compatible Python-based analysis frameworks.
 
-# Session Info
+## Session Info
 
 ```{r sessionInfo}
 sessionInfo()
 ```
 
-# Further Reading
 
-* OSCA book, [Chapter 14](https://bioconductor.org/books/release/OSCA.advanced/dealing-with-big-data.html): Dealing with big data 
-* The `BiocParallel` `r Biocpkg("BiocParallel", vignette = "Introduction_To_BiocParallel.html", label = "intro vignette")`. 
-
-# Exercises
+## Exercises
 
 :::::::::::::::::::::::::::::::::: challenge
 
@@ -524,6 +520,14 @@ Use Seurat's `DimPlot` function.
 
 :::::::::::::::::::::::
 
+:::::::::::::: checklist
+## Further Reading
+
+* OSCA book, [Chapter 14](https://bioconductor.org/books/release/OSCA.advanced/dealing-with-big-data.html): Dealing with big data 
+* The `BiocParallel` `r Biocpkg("BiocParallel", vignette = "Introduction_To_BiocParallel.html", label = "intro vignette")`. 
+
+::::::::::::::
+
 ::::::::::::::::::::::::::::::::::::: keypoints 
 
 - Out-of-memory representations can be used to work with single-cell datasets that are too large to fit in memory
@@ -533,4 +537,4 @@ Use Seurat's `DimPlot` function.
 
 ::::::::::::::::::::::::::::::::::::::::::::::::
 
-# References
+## References
diff --git a/episodes/multi-sample.Rmd b/episodes/multi-sample.Rmd
index db1c618..f185a5b 100644
--- a/episodes/multi-sample.Rmd
+++ b/episodes/multi-sample.Rmd
@@ -21,7 +21,7 @@ exercises: 15 # Minutes of exercises in the lesson
 ::::::::::::::::::::::::::::::::::::::::::::::::
 
 
-# Setup and data exploration
+## Setup and data exploration
 
 As said, we will use the the wild-type data from the Tal1 chimera experiment:
 
@@ -89,7 +89,7 @@ There are evident sample effects. Depending on the analysis that you want to per
 
 For now, let's assume that we want to remove this effect.
 
-# Correcting batch effects
+## Correcting batch effects
 
 We correct the effect of samples by aid of the `correctExperiment` function
 in the `batchelor` package and using the `sample` `colData` column as batch.
@@ -119,7 +119,7 @@ Once we removed the sample batch effect, we can proceed with the Differential
 Expression Analysis.
 
 
-# Differential Expression
+## Differential Expression
 
 In order to perform a Differential Expression Analysis, we need to identify 
 groups of cells across samples/conditions (depending on the experimental 
@@ -131,7 +131,7 @@ In our case we will focus on this second aspect to group cells according to the
 already annotated cell types to proceed with the computation of the 
 pseudo-bulk samples.
 
-## Pseudo-bulk samples
+### Pseudo-bulk samples
 
 To compute differences between groups of cells, a possible way is to 
 compute pseudo-bulk samples, where we mediate the gene signal of all the cells
@@ -155,7 +155,7 @@ summed
 
 ```
 
-## Differential Expression Analysis
+### Differential Expression Analysis
 
 The main advantage of using pseudo-bulk samples is the possibility to use 
 well-tested methods for differential analysis like `edgeR` and `DESeq2`, we will
@@ -307,7 +307,7 @@ cur.results[order(cur.results$PValue),]
 ```
 
 
-# Differential Abundance
+## Differential Abundance
 
 With DA we test for differences between clusters across conditions, to investigate
 which clusters change accordingly to the treatment (the tomato injection in our case).
@@ -333,7 +333,7 @@ y.ab <- estimateDisp(y.ab, design, trend="none")
 fit.ab <- glmQLFit(y.ab, design, robust=TRUE, abundance.trend=FALSE)
 ```
 
-## Background on compositional effect
+### Background on compositional effect
 
 As mentioned before, in DA we don't normalize our data with `calcNormFactors` 
 function, because this approach considers that most of the input features do not vary between conditions.
@@ -355,7 +355,7 @@ consider this aspect.
 
 We now look at different approaches for handling the compositional effect.
 
-## Assuming most labels do not change
+### Assuming most labels do not change
 
 We can use a similar approach used during the DEGs analysis, assuming that most
 labels are not changing, in particular if we think about the low number of DEGs 
@@ -379,7 +379,7 @@ summary(decideTests(res2))
 topTags(res2, n=10)
 ```
 
-##  Testing against a log-fold change threshold
+###  Testing against a log-fold change threshold
 
 This other approach assumes that the composition bias introduces a spurious log2-fold change of no more than a \tau quantity for a non-DA label. 
 In other words, we interpret this as the maximum log-fold change in the total number of cells given by DA in other labels.
@@ -394,18 +394,13 @@ topTags(res.lfc)
 
 Addionally, the choice of \tau can be guided by other external experimental data, like a previous or a pilot experiment.
 
-# Session Info
+## Session Info
 
 ```{r, tidy=TRUE}
 sessionInfo()
 ```
 
-
-# Further Reading
-
-* OSCA book, Multi-sample analysis, [Chapters 1, 4, and 6](https://bioconductor.org/books/release/OSCA.multisample)
-
-# Exercises
+## Exercises
 
 :::::::::::::::::::::::::::::::::: challenge
 
@@ -444,6 +439,13 @@ TODO
 
 :::::::::::::::::::::::::::::::::::::::::::::
 
+:::::::::::::: checklist
+## Further Reading
+
+* OSCA book, Multi-sample analysis, [Chapters 1, 4, and 6](https://bioconductor.org/books/release/OSCA.multisample)
+
+::::::::::::::
+
 ::::::::::::::::::::::::::::::::::::: keypoints 
 
 - TODO