Skip to content

Commit

Permalink
qc and de with new loc of test data
Browse files Browse the repository at this point in the history
  • Loading branch information
abartlett004 committed Apr 23, 2024
1 parent 3a7dee4 commit 8c9b26b
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 68 deletions.
8 changes: 4 additions & 4 deletions inst/rmarkdown/templates/rnaseq/skeleton/params_de.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ aim = 'short description'
analyst = 'person in the core'

# project params
root = "../"
root = "../.."
date = "YYYYMMDD"
column = "sample_type"
subset_column = NA
metadata_fn = "/n/data1/cores/bcbio/platform/test-data/rnaseq/GSE251845_colon_n3_tumor_normal/final/2024-04-18_samples/coldata.csv"
counts_fn = '/n/data1/cores/bcbio/platform/test-data/rnaseq/GSE251845_colon_n3_tumor_normal/final/2024-04-18_samples/counts/tximport-counts.csv'
basedir <- 'reports'
metadata_fn = "/n/data1/cores/bcbio/platform/bcbioR-test-data/rnaseq/bcbio/coldata.csv"
counts_fn = '/n/data1/cores/bcbio/platform/bcbioR-test-data/rnaseq/bcbio/tximport-counts.csv'
basedir <- 'results'
15 changes: 7 additions & 8 deletions inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# info params
project = "Interon_rnaseq_hbc04916"
PI = 'Donggi Paik, Ph.D.'
experiment = '8 human cell lines from multiple origins. Treatment are 9 conditions including untreated.'
aim = 'Determine treatment effect within cell line and in combination with TNF'
analyst = 'Lorena Pantano, PhD'
project = "name_hbcXXXXX"
PI = 'person name'
experiment = 'short description'
aim = 'short description'
analyst = 'person in the core'


metadata_fn="../meta/hbc04916_metadata_bcbio.csv"
se_object="../counts/bcbio-se.rds"
cols_to_show=c("cell","treatment")
metadata_fn="/n/data1/cores/bcbio/platform/bcbioR-test-data/rnaseq/bcbio/coldata.csv"
se_object="/n/data1/cores/bcbio/platform/bcbioR-test-data/rnaseq/bcbio/bcbio-se.rds"
53 changes: 29 additions & 24 deletions inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/DEG.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,13 @@ sanitize_datatable = function(df, ...) {
- Comparison: `r paste0(params$subset_value, ': ', params$numerator, ' vs. ', params$denominator)`

```{r create_filenames}
filenames = str_interp("${params$subset_value}_${params$numerator}_vs_${params$denominator}")
if (!is.na(subset_value)){
filenames = str_interp("${params$subset_value}_${params$numerator}_vs_${params$denominator}")
} else {
filenames = str_interp("${params$numerator}_vs_${params$denominator}")
}
contrasts = c(column,params$numerator,params$denominator)
name_expression_fn=file.path(root,
Expand Down Expand Up @@ -182,7 +188,7 @@ resLFCS <- lfcShrink(de, coef=resultsNames(de)[ncol(vars)+2], type="apeglm")
res <- as.data.frame(resLFCS) %>%
rownames_to_column('gene_id') %>% left_join(rdata, by = 'gene_id') %>%
relocate(gene_name) %>% rename(lfc = log2FoldChange) %>%
relocate(gene_name) %>% dplyr::rename(lfc = log2FoldChange) %>%
mutate(pi = abs(lfc) * -log10(padj)) %>% arrange(-pi)
res_sig <- res %>% filter(padj < 0.05) %>% arrange(padj)
Expand Down Expand Up @@ -228,15 +234,6 @@ universe=res %>%
filter(!is.na(padj)) %>% pull(gene_id)
mapping = AnnotationDbi::select(org.Hs.eg.db, universe, 'ENTREZID', 'ENSEMBL')
ranks_df <- res %>%
filter(padj < 0.01, !is.na(padj)) %>% # only if enough DE genes
arrange((lfc)) %>%
left_join(mapping, by = c("gene_id"="ENSEMBL")) %>%
rename(entrez_id=ENTREZID) %>%
filter(!is.na(entrez_id) & !is.na(padj))
ranks <- ranks_df$lfc
names(ranks) <- ranks_df$entrez_id
all_in_life=list(
msigdbr(species = "human", category = "H") %>% mutate(gs_subcat="Hallmark"),
msigdbr(species = "human", category = "C2", subcategory = "CP:REACTOME"),
Expand Down Expand Up @@ -286,19 +283,27 @@ pathways_ora_all %>% sanitize_datatable()

```{r write-files}
counts_norm=ruvset$normalizedCounts %>% as.data.frame() %>%
rownames_to_column("gene_id")
write_csv(counts_norm %>%
mutate(subset = params$subset_value,
comparison = str_interp("${params$numerator}_vs_${params$denominator}")),
name_expression_fn)
write_csv(res %>%
mutate(subset = params$subset_value,
comparison = str_interp("${params$numerator}_vs_${params$denominator}")),
name_deg_fn)
write_csv(pathways_long %>%
mutate(subset = params$subset_value,
comparison = str_interp("${params$numerator}_vs_${params$denominator}")),
name_pathways_fn)
rownames_to_column("gene_id") %>%
mutate(comparison = str_interp("${params$numerator}_vs_${params$denominator}"))
res_for_writing <- res %>%
mutate(comparison = str_interp("${params$numerator}_vs_${params$denominator}"))
pathways_for_writing <- pathways_long %>%
mutate(comparison = str_interp("${params$numerator}_vs_${params$denominator}"))
if (!is.na(subset_value)){
counts_norm <- counts_norm %>%
mutate(subset = params$subset_value)
res_for_writing <- res_for_writing %>%
mutate(subset = params$subset_value)
pathways_for_writing <- pathways_for_writing %>%
mutate(subset = params$subset_value)
}
write_csv(counts_norm, name_expression_fn)
write_csv(res_for_writing, name_deg_fn)
write_csv(pathways_for_writing, name_pathways_fn)
```
# R session

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ render_de <- function(numerator, denominator, subset_value = NA,
params_file = '../../params_de.R'){

rmarkdown::render(input = "./inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/DEG.Rmd",
output_dir = "./inst/rmarkdown/templates/rnaseq/skeleton/reports/",
output_dir = "./inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/",
output_format = "html_document",
output_file = ifelse(!is.na(subset_value),
paste0('DE_', subset_value, '_', numerator, '_vs_', denominator, '.html'),
Expand Down
64 changes: 33 additions & 31 deletions inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/QC.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ output:
editor_options:
chunk_output_type: console
params:
params_file: code/params_qc.R
params_file: ./inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R
---


Expand All @@ -34,6 +34,7 @@ library(ggrepel)
library(pheatmap)
library(RColorBrewer)
library(DT)
library(pheatmap)
ggplot2::theme_set(theme_light(base_size = 14))
opts_chunk[["set"]](
cache = FALSE,
Expand Down Expand Up @@ -113,9 +114,10 @@ sanitize_datatable = function(df, ...) {
# Samples and metadata

```{r load_metadata}
meta_df=read_csv(metadata_fn)
meta_df=read_csv(metadata_fn) %>% mutate(sample = tolower(description)) %>%
dplyr::select(-description)
ggplot(meta_df, aes(cell, fill=treatment)) +
ggplot(meta_df, aes(sample_type, fill = sample_type)) +
geom_bar() + ylab("") + xlab("") +
scale_fill_brewer(palette = "Set1")
```
Expand All @@ -126,13 +128,13 @@ se <- readRDS(se_object) #local
metrics <- metadata(se)$metrics %>%
full_join(meta_df, by = c("sample" = "sample"))
full_join(meta_df , by = c("sample" = "sample"))
meta_sm <- meta_df %>%
as.data.frame() %>%
column_to_rownames("sample")
meta_sm[,cols_to_show] %>% sanitize_datatable()
meta_sm %>% sanitize_datatable()
```

Expand All @@ -144,9 +146,9 @@ Here, we want to see consistency and a minimum of 20 million reads.

```{r plot_total_reads}
metrics %>%
ggplot(aes(x = cell,
y = total_reads,
color = treatment )) +
ggplot(aes(x = sample_type,
y = total_reads,
color = sample_type)) +
geom_point(alpha=0.5) +
coord_flip() +
scale_y_continuous(name = "million reads") +
Expand All @@ -168,8 +170,9 @@ The genomic mapping rate represents the percentage of reads mapping to the refer
```{r plot_mapping_rate}
metrics$mapped_reads_pct <- round(metrics$mapped_reads/metrics$total_reads*100,1)
metrics %>%
ggplot(aes(x = cell,
y = mapped_reads_pct, color = treatment)) +
ggplot(aes(x = sample_type,
y = mapped_reads_pct,
color = sample_type)) +
geom_point() +
coord_flip() +
scale_color_brewer(palette = "Set1") +
Expand All @@ -193,8 +196,8 @@ genes_detected <- summarise(genes_detected,
metrics <- metrics %>%
left_join(genes_detected, by = c("sample" = "name"))
ggplot(metrics,aes(x = cell,
y = n_genes, color = treatment)) +
ggplot(metrics,aes(x = sample_type,
y = n_genes, color = sample_type)) +
geom_point() +
coord_flip() +
scale_color_brewer(palette = "Set1") +
Expand All @@ -213,7 +216,7 @@ This plot shows how complex the samples are. We expect samples with more reads t
metrics %>%
ggplot(aes(x = total_reads,
y = n_genes,
color = treatment)) +
color = sample_type)) +
geom_point()+
scale_x_log10() +
scale_color_brewer(palette = "Set1") +
Expand All @@ -227,9 +230,9 @@ Here we are looking for consistency, and exonic mapping rates around 70% or 75%

```{r plot_exonic_mapping_rate}
metrics %>%
ggplot(aes(x = cell,
ggplot(aes(x = sample_type,
y = exonic_rate * 100,
color = treatment)) +
color = sample_type)) +
geom_point() +
ylab("Exonic rate %") +
ggtitle("Exonic mapping rate") +
Expand All @@ -247,9 +250,9 @@ Here, we expect a low intronic mapping rate (≤ 15% - 20%)

```{r plot_intronic_mapping_rate}
metrics %>%
ggplot(aes(x = cell,
ggplot(aes(x = sample_type,
y = intronic_rate * 100,
color = treatment)) +
color = sample_type)) +
geom_point() +
ylab("Intronic rate %") +
ggtitle("Intronic mapping rate") +
Expand All @@ -267,9 +270,9 @@ Here, we expect a low intergenic mapping rate, which is true for all samples.

```{r plot_intergenic_mapping_rate}
metrics %>%
ggplot(aes(x = cell,
ggplot(aes(x = sample_type,
y = intergenic_rate * 100,
color = treatment)) +
color = sample_type)) +
geom_point() +
ylab("Intergenic rate %") +
ggtitle("Intergenic mapping rate") +
Expand All @@ -286,9 +289,9 @@ Samples should have a ribosomal RNA (rRNA) "contamination" rate below 10%
# for some bad samples it could be > 50%
rrna_ylim <- max(round(metrics$r_rna_rate*100, 2)) + 10
metrics %>%
ggplot(aes(x = cell,
ggplot(aes(x = sample_type,
y = r_rna_rate * 100,
color = treatment)) +
color = sample_type)) +
geom_point() +
ylab("rRNA rate, %")+
ylim(0, rrna_ylim) +
Expand All @@ -303,9 +306,9 @@ There should be little bias, i.e. the values should be close to 1, or at least c

```{r plot_53_bias}
metrics %>%
ggplot(aes(x = cell,
ggplot(aes(x = sample_type,
y = x5_3_bias,
color = treatment)) +
color = sample_type)) +
geom_point() +
ggtitle("5'-3' bias") +
coord_flip() +
Expand All @@ -319,7 +322,7 @@ metrics %>%
We expect consistency in the box plots here between the samples, i.e. the distribution of counts across the genes is similar

```{r plot_counts_per_gene}
metrics_small <- metrics %>% dplyr::select(sample, treatment)
metrics_small <- metrics %>% dplyr::select(sample, sample_type)
metrics_small <- left_join(sample_names, metrics_small)
counts <-
Expand All @@ -330,9 +333,9 @@ counts <-
counts <- left_join(counts, metrics, by = c("name" = "sample"))
ggplot(counts, aes(cell,
ggplot(counts, aes(sample_type,
log2(counts+1),
fill = treatment)) +
fill = sample_type)) +
geom_boxplot() +
scale_fill_brewer(palette = "Set1") +
ggtitle("Counts per gene, all non-zero genes") +
Expand All @@ -355,17 +358,16 @@ raw_counts <- assays(se)[["raw"]] %>%
filter(rowSums(.)!=0) %>%
as.matrix()
colnames(raw_counts) <- str_replace(colnames(raw_counts), "^x", "")
vst <- vst(raw_counts)
#fix samples names
coldat_for_pca <- as.data.frame(metrics)
rownames(coldat_for_pca) <- coldat_for_pca$sample
coldat_for_pca <- coldat_for_pca[colnames(raw_counts),]
pca1 <- degPCA(vst, coldat_for_pca,
condition = "treatment", data = T)[["plot"]]
condition = "sample_type", data = T)[["plot"]]
pca2 <- degPCA(vst, coldat_for_pca,
condition = "cell", data = T)[["plot"]]
condition = "sample_type", data = T, pc1="PC3", pc2="PC4")[["plot"]]
pca1 + scale_color_brewer(palette = "Set1")
pca2 + scale_color_brewer(palette = "Set1")
Expand All @@ -377,13 +379,13 @@ variables=degCovariates(vst, coldat_for_pca)
```


```{r clustering fig, fig.width = 10, fig.asp = .62, eval=FALSE}
```{r clustering fig, fig.width = 10, fig.asp = .62}
## Hierarchical clustering
vst_cor <- cor(vst)
p <- pheatmap(vst_cor, annotation =coldat_for_pca[,c("cell", "treatment"), drop=F], show_rownames = T, show_colnames = T)
p <- pheatmap(vst_cor, annotation =coldat_for_pca[,c("sample_type"), drop=F], show_rownames = T, show_colnames = T)
p
```

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
library(rmarkdown)

rmarkdown::render("./inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/QC.Rmd",
output_dir = "./inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/",
clean = TRUE,
output_format = "html_document",
params = list(
params_file = '../../params_qc.R')
)
Empty file.

0 comments on commit 8c9b26b

Please sign in to comment.