From 4fb85398b6b9c2c7f14a4b1416ceb84bcf42e442 Mon Sep 17 00:00:00 2001 From: Andrew Ghazi <6763470+andrewGhazi@users.noreply.github.com> Date: Tue, 12 Nov 2024 12:17:00 -0500 Subject: [PATCH] post sprint 2 --- episodes/multi-sample.Rmd | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/episodes/multi-sample.Rmd b/episodes/multi-sample.Rmd index d049f17..26cd1b4 100644 --- a/episodes/multi-sample.Rmd +++ b/episodes/multi-sample.Rmd @@ -34,7 +34,7 @@ As before, we will use the the wild-type data from the Tal1 chimera experiment: Note that this is a paired design in which for each biological replicate (pool 3, 4, and 5), we have both host and injected cells. -We start by loading the data and doing a quick exploratory analysis, essentially applying the normalization and visualization techniques that we have seen in the previous lectures to all samples. Note that this time we're selecting samples 5 to 10, not just 5 by itself. +We start by loading the data and doing a quick exploratory analysis, essentially applying the normalization and visualization techniques that we have seen in the previous lectures to all samples. Note that this time we're selecting samples 5 to 10, not just 5 by itself. Also note the `type = "processed"` argument: we are explicitly selecting the version of the data that has already been QC processed. ```{r chunk-opts, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE) @@ -58,7 +58,7 @@ sce colData(sce) ``` -To speed up computations, after removing doublets, we randomly select 50% cells per sample. +For the sake of making these examples run faster, we drop some problematic types (stripped nuclei and doublets) and also randomly select 50% cells per sample. ```{r} drop <- sce$celltype.mapped %in% c("stripped", "Doublet") @@ -75,7 +75,7 @@ idx <- unlist(tapply(colnames(sce), sce$sample, function(x) { sce <- sce[,idx] ``` -We now normalize the data, run some dimensionality reduction steps, and visualize them in a tSNE plot. +We now normalize the data, run some dimensionality reduction steps, and visualize them in a tSNE plot. In this case we happen to have a ton of cell types to visualize, so we define a custom palette with a lot of visually distinct colors (adapted from the `polychrome` palette in the [`pals` package](https://cran.r-project.org/web/packages/pals/vignettes/pals_examples.html)). ```{r} sce <- logNormCounts(sce) @@ -92,8 +92,15 @@ sce$sample <- as.factor(sce$sample) plotTSNE(sce, colour_by = "sample") +color_vec <- c("#5A5156", "#E4E1E3", "#F6222E", "#FE00FA", "#16FF32", "#3283FE", + "#FEAF16", "#B00068", "#1CFFCE", "#90AD1C", "#2ED9FF", "#DEA0FD", + "#AA0DFE", "#F8A19F", "#325A9B", "#C4451C", "#1C8356", "#85660D", + "#B10DA1", "#3B00FB", "#1CBE4F", "#FA0087", "#333333", "#F7E1A0", + "#C075A6", "#782AB6", "#AAF400", "#BDCDFF", "#822E1C", "#B5EFB5", + "#7ED7D1", "#1C7F93", "#D85FF7", "#683B79", "#66B0FF", "#FBE426") + plotTSNE(sce, colour_by = "celltype.mapped") + - scale_color_discrete() + + scale_color_manual(values = color_vec) + theme(legend.position = "bottom") ``` @@ -140,6 +147,15 @@ plotTSNE(merged, colour_by = "batch") ``` +We can also see that when coloring by cell type, the cell types are now nicely confined to their own clusters for the most part: + +```{r} +plotTSNE(merged, colour_by = "celltype.mapped") + + scale_color_manual(values = color_vec) + + theme(legend.position = "bottom") +``` + + Once we removed the sample batch effect, we can proceed with the Differential Expression Analysis. @@ -494,7 +510,7 @@ Use the `pheatmap` package to create a heatmap of the abundances table. Does it :::::::::::::: hint -You can simply hand `pheatmap()` a matrix as its only argument. `pheatmap()` has a million options you can tweak, but the defaults are usually pretty good. +You can simply hand `pheatmap()` a matrix as its only argument. `pheatmap()` has a million options you can adjust, but the defaults are usually pretty good. Try to overlay sample-level information with the `annotation_col` argument for an extra challenge. ::::::::::::::::::::::: @@ -502,6 +518,17 @@ You can simply hand `pheatmap()` a matrix as its only argument. `pheatmap()` has ```{r} pheatmap(y.ab$counts) + +anno_df <- y.ab$samples[,c("tomato", "pool")] + +anno_df$pool = as.character(anno_df$pool) + +anno_df$tomato <- ifelse(anno_df$tomato, + "tomato+", + "tomato-") + +pheatmap(y.ab$counts, + annotation_col = anno_df) ``` The top DA result was a decrease in ExE ectoderm in the tomato condition, which you can sort of see, especially if you `log1p()` the counts or discard rows that show much higher values. ExE ectoderm counts were much higher in samples 8 and 10 compared to 5, 7, and 9.