.Rhistory

# See https://stats.stackexchange.com/questions/81483/warning-in-r-chi-squared-approximation-may-be-incorrect
# for more info on the `simulate.p.value` parameter
x2 <- chisq.test(x = mutData, simulate.p.value = T)
} else {
x2 <- chisq.test(x = mutData)
}
results <- data.frame(Mutation = x,
X2 = round(x2$statistic, 4),
p.val = round(x2$p.value, 6),
Simulated.p.val = ifelse(any(mutData < 5), "Yes", "No"))
return(results)
}) %>%
set_names(rownames(freqTable)) %>%
bind_rows()
View(chisq_results)
knitr::opts_chunk$set(echo = TRUE)
library(ComplexHeatmap)
library(circlize)
library(edgeR)
library(here)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(tidyverse)
source("../../../../../../scripts/aleonti/FilteringNormalizing_Counts.R")
source("../../../../../../scripts/aleonti/DE_limma_voom.R")
source("../../../../../../scripts/aleonti/Survival_Analysis.R")
here()
# CEBPA mutation allelic status for patients from TARGET
# Recoding the mutation categorical data to match the terms used in the manuscript
cebpa_mutStatus <- readxl::read_excel(here("Mutation_data", "CEBPA_mutation_allelic_status_RR.xlsx"), na = "NA") %>%
filter(!is.na(USI)) %>%
mutate(`CEBPA Allelic Status (Recoded)` = case_when(`CEBPA Allelic Status` == "Double Mutant" ~ "CEBPA-dm",
`CEBPA Allelic Status` == "Single Mutant" ~ "CEBPA-bZip")) %>%
as.data.frame()
rownames(cebpa_mutStatus) <- cebpa_mutStatus$USI
oncoprint_data <- readxl::read_excel("Mutation_data/CEBPA_oncoprint_input_KT.xlsx")
table(oncoprint_data$Sample)
# Ribodepleted expression data from TARGET 1031 & 0531
tpms <- readRDS("/Volumes/homes/Personal_Working_Copies/Ribodepleted_Counts_1031_0531/TARGET_AAML1031_0531_Ribodepleted_TPMs_filtered_for_dupGenes_no_MPNs_05.20.2019.RDS")
# Ribodepleted expression data for the "Relapse" seq batch
load("/Volumes/homes/Personal_Working_Copies/Ribodepleted_Counts_1031_0531/TARGET_AML1031_Ribodepleted_Relapse_Counts_filteredForDupGenes_07.17.2019.RData")
# Only retaining Dx timepoint samples & removing extra identifier info from the colnames
counts_relapse$TPMs <- counts_relapse$TPMs[,grep("09A|03A", colnames(counts_relapse$TPMs))]
colnames(counts_relapse$TPMs) <- str_match(colnames(counts_relapse$TPMs), pattern = "P[A-Z]{5}")
# Removing batch replicates (these were also sequenced in batch 2)
counts_relapse$TPMs <- counts_relapse$TPMs[,!colnames(counts_relapse$TPMs) %in% colnames(tpms)]
intersect(colnames(tpms), colnames(counts_relapse$TPMs)) # Making sure no duplicates remain
# Adding the remaining Dx patients
tpms <- cbind(counts_relapse$TPMs, tpms)
tpms_cebpaOnly <- tpms[, intersect(cebpa_mutStatus$USI, colnames(tpms))]
cebpa_mutStatus %>%
filter(USI %in% colnames(tpms)) %>%
count(`CEBPA Allelic Status (Recoded)`)
# Dx timepoint fractional counts
fracCounts <- readRDS("/Volumes/homes/Personal_Working_Copies/Ribodepleted_Counts_1031_0531/TARGET_AAML1031_0531_Ribodepleted_fracCounts_filtered_for_dupGenes_no_MPNs_05.28.2019.RDS")
dim(fracCounts)
# Only retaining Dx timepoint samples & removing extra identifier info from the colnames
counts_relapse$fracCounts <- counts_relapse$fracCounts[,grep("09A|03A", colnames(counts_relapse$fracCounts))]
colnames(counts_relapse$fracCounts) <- str_match(colnames(counts_relapse$fracCounts), pattern = "P[A-Z]{5}")
# Removing batch replicates (these were also sequenced in batch 2)
counts_relapse$fracCounts <- counts_relapse$fracCounts[,!colnames(counts_relapse$fracCounts) %in% colnames(fracCounts)]
intersect(colnames(fracCounts), colnames(counts_relapse$fracCounts)) # Making sure no duplicates remain
# Adding the remaining Dx patients
fracCounts <- cbind(counts_relapse$fracCounts, fracCounts)
fracCounts_cebpaOnly <- fracCounts[, intersect(cebpa_mutStatus$USI, colnames(fracCounts))]
dim(tpms)
dim(fracCounts)
dim(tpms_cebpaOnly)
dim(fracCounts_cebpaOnly)
setdiff(colnames(tpms), colnames(fracCounts)) %in% cebpa_mutStatus$USI
# Gene list from Jenny to use for clustering
UMAP_geneList <- read.csv("../2019.12.31_UMAP_Clustering/Results/Dx_AMLonly_forCEBPA_PCAselect_sg6816/TARGET_AML_sg6816_Genelist_forCEBPA_Analysis_3.10.20.csv", stringsAsFactors = F)
table(UMAP_geneList$x %in% rownames(tpms)) # All of these genes are in the TPMs dataframe I'm using
rownames(tpms)[1:5] # Checking to see if the TPMs dataframe already has the USIs as rownames
tpms.hm1 <- tpms[as.character(UMAP_geneList$x),] # Creating a new TPMs data object to use for generating heatmaps
length(grep("^BM|^RO", colnames(tpms.hm1))) # Checking to see if any NBMs are still in the dataset (yes)
any(is.na(tpms.hm1))
# Making sure none of the genes have a variance of 0 - it makes it impossible to scale + cluster them
var <- matrixStats::rowVars(as.matrix(tpms.hm1))
range(var) # No constant variance genes - looking good!
tpms.hm1 <- log2(tpms.hm1 + 1) # Log2 transforming the counts to help with heatmap visualization
tpms.hm1 <- t(scale(t(tpms.hm1), center = T, scale = T)) # Need to perform scaling by row, not by column
tpms.hm1[1:5,1:5]
range(tpms.hm1)
any(is.nan(tpms.hm1))
dim(tpms.hm1)
head(rownames(tpms.hm1)) # Making sure rownames weren't dropped
anno_data <- data.frame(USI = colnames(tpms.hm1))
anno_data$`Allelic Status` <- cebpa_mutStatus$`CEBPA Allelic Status (Recoded)`[match(anno_data$USI, cebpa_mutStatus$USI)]
# Recoding this column to include other AMLs and NBM samples
anno_data$`Allelic Status` <- case_when(grepl("^RO|^BM", anno_data$USI) ~ "NBM",
is.na(anno_data$`Allelic Status`) ~ "CEBPA-WT",
TRUE ~ anno_data$`Allelic Status`)
# Setting factor level order for the figures
anno_data$`Allelic Status` <- factor(anno_data$`Allelic Status`, levels = c("CEBPA-bZip", "CEBPA-dm", "CEBPA-WT", "NBM"))
# Reading in the CDEs for 1031 & 0531 so I can add mutation data for CSF3R and GATA2 to the heatmap (reccomended by Soheil)
cdes <- readRDS("../../../../Clinical/CDE/Merged/TARGET_AML_0531_1031_merged_CDEs_2.12.20.RDS") %>%
dplyr::select(Reg., USI, CSF3R, GATA2, FLT3.ITD.positive., OS.ID, Event.ID, OS.time..days., EFS.time..days.)
anno_data <- left_join(anno_data, cdes[,c("USI", "CSF3R", "GATA2")], by = "USI")
table(anno_data$CSF3R, useNA = "always")
# Adding another column for whether or not they're CSF3R mutants
anno_data$CSF3R <- case_when(anno_data$`Allelic Status` == "NBM" ~ "NBM",
is.na(anno_data$CSF3R) ~ "Untested",
anno_data$CSF3R %in% c("T618I", "T640N", "Q754*", "c.2252insC/50") ~ "Yes",
TRUE ~ anno_data$CSF3R)
anno_data$CSF3R <- factor(anno_data$CSF3R, levels = c("Yes", "No", "Unknown", "NBM"))
table(anno_data$GATA2, useNA = "always")
anno_data$GATA2 <- case_when(anno_data$`Allelic Status` == "NBM" ~ "NBM",
anno_data$GATA2 == "Unknown" ~ "Unknown",
TRUE ~ "Yes")
anno_data$GATA2 <- factor(anno_data$GATA2, levels = c("Yes", "Unknown", "NBM"))
lapply(anno_data[-1], table, useNA = "always")
rownames(anno_data) <- anno_data$USI
# Creating the top annotation bars for the heatmap
anno_colors <- list(`Allelic Status` = c("magenta", "green", "grey90", "black"),
CSF3R = c("orangered", "dimgrey", "grey80", "black"),
GATA2 = c("gold", "grey80", "black"))
names(anno_colors$`Allelic Status`) <- levels(anno_data$`Allelic Status`)
names(anno_colors$CSF3R) <- levels(anno_data$CSF3R)
names(anno_colors$GATA2) <- levels(anno_data$GATA2)
table(anno_data$`Allelic Status`)
tpms[1:5,1:5] # Checking to see if the TPMs dataframe already has the USIs as rownames
tpms.hm2 <- tpms[UMAP_geneList$x, intersect(cebpa_mutStatus$USI, colnames(tpms))] # Creating a new TPMs data object to use for generating heatmaps
dim(tpms.hm2)
identical(rownames(tpms.hm2), rownames(tpms.hm1))
# So a lot of these have a variance of 0 within the CEBPA patients
any(matrixStats::rowVars(as.matrix(tpms.hm2)) == 0) # Shoot
sum(matrixStats::rowVars(as.matrix(tpms.hm2)) == 0) # Will need to remove these genes from the dataset
tpms.hm2 <- tpms.hm2[-which(matrixStats::rowVars(as.matrix(tpms.hm2)) == 0),]
any(matrixStats::rowVars(as.matrix(tpms.hm2)) == 0)
length(grep("^BM|^RO", colnames(tpms.hm2))) # Checking to see if any NBMs are still in the dataset (no)
tpms.hm2 <- log2(tpms.hm2 + 1) # Log2 transforming the counts to help with heatmap visualization
tpms.hm2 <- t(scale(t(tpms.hm2), center = T, scale = T)) # Need to perform scaling by row, not by column
any(is.nan(tpms.hm2))
range(tpms.hm2)
# The wrong way to do it
# tpms.hm2 <- tpms.hm1[,intersect(cebpa_mutStatus$USI, colnames(tpms))]
anno_data2 <- subset(anno_data, subset = anno_data$USI %in% colnames(tpms.hm2))
table(colnames(tpms.hm2) %in% cdes$USI)
# New reg. numers to replace the colnames of the heatmap exp dataframe with
regs <- cdes$Reg.[match(colnames(tpms.hm2), cdes$USI)]
# New reg. numers to replace the colnames of the heatmap exp dataframe with
colnames(tpms.hm2) <- regs
rownames(anno_data2) <- regs
tpms.hm2[1:5, 1:5]
plyr::count(oncoprint_data$Gene)
#----- CSF3R --------#
nrow(anno_data %>% filter(`Allelic Status` %in% c("CEBPA-dm", "CEBPA-bZip") & CSF3R == "Yes")) # Patients that are both CEBPA+ and CSF3R+
table(anno_data$CSF3R)
#----- GATA2 -------#
nrow(anno_data %>% filter(`Allelic Status` %in% c("CEBPA-dm", "CEBPA-bZip") & GATA2 == "Yes")) # Patients that are both CEBPA+ & GATA2+
table(anno_data$GATA2)
# Creating subsets of the original oncoprint data to refer to which patients are CEBPA bZip/dm
bzip <- subset(oncoprint_data, Gene == "CEBPA_bZip")
bzip$CEBPA.bZip <- "CEBPA_bZip"
dm <- subset(oncoprint_data, Gene == "CEBPA_dm")
dm$CEBPA.dm <- "CEBPA_dm"
chisq_data <- oncoprint_data %>%
left_join(., dm[c("Sample", "CEBPA.dm")], by = "Sample") %>%
left_join(., bzip[c("Sample", "CEBPA.bZip")], by = "Sample") %>%
filter(!Gene %in% c("CEBPA_bZip", "CEBPA_dm")) # Removes patients with CEBPA muts & no co-occuring mutations
table(chisq_data$Gene)
# Would liked to have done this with lapply for both conditions, but dplyr::count
# doesn't like being passed variables that represent colnames.... ugh
freqTable_bzip <- chisq_data %>% # Making a frequency table of co-occuring mutations in bZip patients
group_by(Gene) %>%
count(CEBPA.bZip) %>%
spread(CEBPA.bZip, n) %>%
mutate_at(vars(CEBPA_bZip), ~ifelse(is.na(.), 0, .))
# Making a frequency table of co-occuring mutations in dm patients
freqTable_dm <- chisq_data %>%
group_by(Gene) %>%
count(CEBPA.dm) %>%
spread(CEBPA.dm, n) %>%
mutate_at(vars(CEBPA_dm), ~ifelse(is.na(.), 0, .))
freqTable <- left_join(freqTable_bzip[,1:2], freqTable_dm[,1:2], by = "Gene") %>%
tibble::column_to_rownames("Gene")
ids <- filter(oncoprint_data, Gene == "CSF3R") %>% pull(Sample)
filter(oncoprint_data, Sample %in% ids)
chisq_data$Mutation <- coalesce(chisq_data$CEBPA.bZip, chisq_data$CEBPA.dm)
chisq_data <- chisq_data %>%
mutate_at(vars(Gene), ~gsub("\\_", " ", .)) %>%
mutate_at(vars(Mutation), ~gsub("\\_", "-", .)) %>%
mutate_at(vars(Gene), ~fct_relevel(., "Normal Karyotype", after = Inf))
table(chisq_data$Gene, chisq_data$Mutation)
freqPlot <- ggplot(chisq_data, aes(x = Gene, fill = Mutation)) +
theme_classic() +
geom_bar() +
labs(y = "Frequency of Mutation\n", x = "Mutated Gene", fill = "Co-occuring \nCEBPA Mutation") +
theme(axis.text.x = element_text(angle = 25, hjust = 1),
axis.title = element_text(face = "bold")) +
scale_fill_manual(values = c("magenta", "green"))
freqPlot
# Performes a chi squared test on every fusion group to determine if they significantly co-occur with CEBPA muts
perform_freqSig_tests <- function(mutation) {
}
ggsave(freqPlot, "Figures/TARGET_AML_CEBPA_Cooccuring_Muts_Barplot_03.24.3030.png", width = 4, height = 3)
freqPlot <- ggplot(chisq_data, aes(x = Gene, fill = Mutation)) +
theme_classic() +
geom_bar() +
labs(y = "Frequency\n", x = "Mutated Gene", fill = "Co-occuring \nCEBPA Mutation") +
theme(axis.text.x = element_text(angle = 25, hjust = 1),
axis.title = element_text(face = "bold")) +
scale_fill_manual(values = c("magenta", "green"))
ggsave(freqPlot, filename = "Figures/TARGET_AML_CEBPA_Cooccuring_Muts_Barplot_03.24.3030.png", width = 4, height = 3)
ggsave(freqPlot, filename = "Figures/TARGET_AML_CEBPA_Cooccuring_Muts_Barplot_03.24.3030.png", width = 6, height = 3)
ggsave(freqPlot, filename = "Figures/TARGET_AML_CEBPA_Cooccuring_Muts_Barplot_03.24.3030.png", width = 6, height = 4)
x <- "NRAS"
mutData <- freqTable[x,]
if (any(mutData < 5)) {
# See https://stats.stackexchange.com/questions/81483/warning-in-r-chi-squared-approximation-may-be-incorrect
# for more info on the `simulate.p.value` parameter
x2 <- chisq.test(x = mutData, simulate.p.value = T)
} else {
x2 <- chisq.test(x = mutData)
}
results <- data.frame(Mutation = x,
X2 = round(x2$statistic, 4),
p.val = round(x2$p.value, 6),
Simulated.p.val = ifelse(any(mutData < 5), "Yes", "No"))
chisq_results <- lapply(rownames(freqTable), function(x) {
mutData <- freqTable[x,]
if (any(mutData < 5)) {
# See https://stats.stackexchange.com/questions/81483/warning-in-r-chi-squared-approximation-may-be-incorrect
# for more info on the `simulate.p.value` parameter
x2 <- chisq.test(x = mutData, simulate.p.value = T)
} else {
x2 <- chisq.test(x = mutData)
}
results <- data.frame(Mutation = x,
X2 = round(x2$statistic, 4),
p.val = round(x2$p.value, 6),
Simulated.p.val = ifelse(any(mutData < 5), "Yes", "No"))
return(results)
}) %>%
set_names(rownames(freqTable)) %>%
bind_rows()
View(chisq_results)
View(freqTable)
View(chisq_results)
freqTable$Mutation <- rownames(freqTable)
freqTable <- left_join(freqTable, chisq_results, by = "Mutation")
View(freqTable)
freqTable <- freqTable %>%
tibble::rownames_to_column("Mutation") %>%
left_join(freqTable, chisq_results, by = "Mutation") %>%
dplyr::select(Mutation, everything())
# Creating subsets of the original oncoprint data to refer to which patients are CEBPA bZip/dm
bzip <- subset(oncoprint_data, Gene == "CEBPA_bZip")
bzip$CEBPA.bZip <- "CEBPA_bZip"
dm <- subset(oncoprint_data, Gene == "CEBPA_dm")
dm$CEBPA.dm <- "CEBPA_dm"
chisq_data <- oncoprint_data %>%
left_join(., dm[c("Sample", "CEBPA.dm")], by = "Sample") %>%
left_join(., bzip[c("Sample", "CEBPA.bZip")], by = "Sample") %>%
filter(!Gene %in% c("CEBPA_bZip", "CEBPA_dm")) # Removes patients with CEBPA muts & no co-occuring mutations
table(chisq_data$Gene)
# Would liked to have done this with lapply for both conditions, but dplyr::count
# doesn't like being passed variables that represent colnames.... ugh
freqTable_bzip <- chisq_data %>% # Making a frequency table of co-occuring mutations in bZip patients
group_by(Gene) %>%
count(CEBPA.bZip) %>%
spread(CEBPA.bZip, n) %>%
mutate_at(vars(CEBPA_bZip), ~ifelse(is.na(.), 0, .))
# Making a frequency table of co-occuring mutations in dm patients
freqTable_dm <- chisq_data %>%
group_by(Gene) %>%
count(CEBPA.dm) %>%
spread(CEBPA.dm, n) %>%
mutate_at(vars(CEBPA_dm), ~ifelse(is.na(.), 0, .))
freqTable <- left_join(freqTable_bzip[,1:2], freqTable_dm[,1:2], by = "Gene") %>%
tibble::column_to_rownames("Gene")
ids <- filter(oncoprint_data, Gene == "CSF3R") %>% pull(Sample)
filter(oncoprint_data, Sample %in% ids)
freqTable <- freqTable %>%
tibble::rownames_to_column("Mutation") %>%
left_join(freqTable, chisq_results, by = "Mutation") %>%
dplyr::select(Mutation, everything())
freqTable <- left_join(freqTable_bzip[,1:2], freqTable_dm[,1:2], by = "Gene") %>%
tibble::column_to_rownames("Gene")
View(freqTable)
freqTable2 <- freqTable %>%
tibble::rownames_to_column(var = "Mutation") %>%
left_join(freqTable, chisq_results, by = "Mutation") %>%
dplyr::select(Mutation, everything())
freqTable2 <- freqTable %>%
tibble::rownames_to_column(var = "Mutation") %>%
left_join(,, chisq_results, by = "Mutation") %>%
dplyr::select(Mutation, everything())
freqTable2 <- freqTable %>%
tibble::rownames_to_column(var = "Mutation") %>%
left_join(., chisq_results, by = "Mutation") %>%
dplyr::select(Mutation, everything())
View(freqTable_bzip)
View(freqTable2)
writexl::write.xlsx(freqTable2, "TARGET_AML_CEBPA_dm_bZip_CoOccuringMut_Frequency_03.24.2020.xlsx")
writexl::write_xlsx(freqTable2, "TARGET_AML_CEBPA_dm_bZip_CoOccuringMut_Frequency_03.24.2020.xlsx")
writexl::write_xlsx(freqTable2[1:5], "TARGET_AML_CEBPA_dm_bZip_CoOccuringMut_Frequency_03.24.2020.xlsx")
writexl::write_xlsx(freqTable2[,1:5], "TARGET_AML_CEBPA_dm_bZip_CoOccuringMut_Frequency_03.24.2020.xlsx")
shiny::runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
install.packages("shinydashboard")
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
install.packages("shinythemes")
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
knitr::opts_chunk$set(echo = TRUE)
adc <- readxl::read_excel("../ADC_and_Targets_Database_with_clintrialsGov_v3.5.xlsx")
cart <- readxl::read_excel("../CARTcell_and_Targets_Database_v1.xlsx")
adc <- readxl::read_excel("../ADC_and_Targets_Database_with_clintrialsGov_v3.5.xlsx")
cart <- readxl::read_excel("../CARTcell_and_Targets_Database_v1.xlsx")
all_adcs_carts <- bind_rows(adcs, carts)
all_adcs_carts <- bind_rows(adc, cart)
write.csv(all_adcs_carts, file = "../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.xlsx")
write.csv(all_adcs_carts, file = "../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.csv")
```{r}
write.csv(all_adcs_carts, file = "../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.csv")
all_targets <- read.csv("../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.xlsx")
adc <- readxl::read_excel("../ADC_Targets_Database_with_clintrialsGov_v3.5.xlsx")
cart <- readxl::read_excel("../CARTcell_Targets_Database_v1.xlsx")
all_adcs_carts <- bind_rows(adc, cart)
all_targets <- read.csv("../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.csv")
table(all_targets$X)
write.csv(all_adcs_carts, file = "../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.csv", row.names = F)
all_targets <- read.csv("../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.csv")
table(all_targets$Treatment.type)
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
?infobox
infoBox
?infoBox
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
adc_cart_targetData <- read.csv("data/ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.csv", stringsAsFactors = F)
setwd("/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz")
adc_cart_targetData <- read.csv("data/ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.csv", stringsAsFactors = F)
runApp()
?infoBoxOutput
runApp()
runApp()
adc_cart_targetData$Gene.symbol.of.target..Final.
runApp()
runApp()
runApp()
knitr::opts_chunk$set(echo = TRUE)
all_targets <- read.csv("../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.csv")
library(tidyverse)
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
all_targets %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ",")
adc <- readxl::read_excel("../ADC_Targets_Database_with_clintrialsGov_v3.5.xlsx")
cart <- readxl::read_excel("../CARTcell_Targets_Database_v1.xlsx")
all_adcs_carts <- bind_rows(adc, cart)
all_targets %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ",")
all_targets %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ",") %>%
dplyr::select(If.currently.in.clinical.trials..drug.trial.ID.number)
all_targets %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ";") %>%
dplyr::select(If.currently.in.clinical.trials..drug.trial.ID.number)
all_targets %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ";") %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ",") %>%
dplyr::select(If.currently.in.clinical.trials..drug.trial.ID.number)
all_targets %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ";") %>%
View(separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ","))
View(all_targets %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ";") %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ","))
View(unique(all_targets %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ";") %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ",")))
test <- all_targets %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ";") %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ","))
any(duplicated(all_adcs_carts))
which(duplicated(all_adcs_carts)))
which(duplicated(all_adcs_carts))
all_adcs_carts[which(duplicated(all_adcs_carts)),]
View(all_adcs_carts[which(duplicated(all_adcs_carts)),])
View(all_adcs_carts[which(duplicated(all_adcs_carts$`If currently in clinical trials, drug/trial ID number`)),])
adc <- readxl::read_excel("../ADC_Targets_Database_with_clintrialsGov_v3.5.xlsx")
cart <- readxl::read_excel("../CARTcell_Targets_Database_v1.xlsx")
all_adcs_carts <- bind_rows(adc, cart)
View(all_adcs_carts[which(duplicated(all_adcs_carts$`If currently in clinical trials, drug/trial ID number`)),])
all_targets <- all_targets %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ";") %>%
separate_rows(If.currently.in.clinical.trials..drug.trial.ID.number, sep = ",")
write.csv(all_adcs_carts, file = "../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.csv", row.names = F)
writexl::write_xlsx(all_adcs_carts, file = "../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.xlsx", row.names = F)
write.csv(all_adcs_carts, file = "../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.csv", row.names = F)
writexl::write_xlsx(all_adcs_carts, file = "../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.xlsx",)
writexl::write_xlsx(all_adcs_carts, "../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.xlsx",)
writexl::write_xlsx(all_adcs_carts, "../ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.xlsx")
shiny::runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
?datatable
?renderDataTable
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
?datatable
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
adc_cart_targetData <<- readxl::read_excel("data/ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.xlsx", stringsAsFactors = F)
adc_cart_targetData <<- readxl::read_excel("data/ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.xlsx")
setwd("/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz")
adc_cart_targetData <<- readxl::read_excel("data/ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.xlsx")
adc_cart_targetData <<- readxl::read_excel("data/ADC_and_CARTcell_Targets_Database_ADCReview_clinicaltrialsGov.xlsx")
View(adc_cart_targetData)
# Turning clinical trial identifier into a URL to use for hyperlinking: https://clinicaltrials.gov/ct2/about-site/link-to
adc_cart_targetData <- adc_cart_targetData %>%
dplyr::select(`Treatment type`, `Gene symbol of target (Final)`, `Associated.cancer(s)`,
`If currently in clinical trials, drug trial ID number`, `Development sponsor`, `Development status`) %>%
mutate_at(vars(`If currently in clinical trials, drug trial ID number`), ~paste0("<a href='", "https://clinicaltrials.gov/show/", ., "'>", ., "</a>"))
# Turning clinical trial identifier into a URL to use for hyperlinking: https://clinicaltrials.gov/ct2/about-site/link-to
adc_cart_targetData <- adc_cart_targetData %>%
dplyr::select(`Treatment type`, `Gene symbol of target (Final)`, `Associated cancer(s)`,
`If currently in clinical trials, drug trial ID number`, `Development sponsor`, `Development status`) %>%
mutate_at(vars(`If currently in clinical trials, drug trial ID number`), ~paste0("<a href='", "https://clinicaltrials.gov/show/", ., "'>", ., "</a>"))
# Turning clinical trial identifier into a URL to use for hyperlinking: https://clinicaltrials.gov/ct2/about-site/link-to
adc_cart_targetData <- adc_cart_targetData %>%
dplyr::select(`Treatment type`, `Gene symbol of target (Final)`, `Associated cancer(s)`,
`If currently in clinical trials, drug/trial ID number`, `Development sponsor`, `Development status`) %>%
mutate_at(vars(`If currently in clinical trials, drug/trial ID number`), ~paste0("<a href='", "https://clinicaltrials.gov/show/", ., "'>", ., "</a>"))
adc_cart_targetData$`If currently in clinical trials, drug/trial ID number`
runApp()
runApp()
runApp()
runApp()
runApp()
?box
runApp()
runApp()
library(shiny)
library(shiny)
library(shinydashboard)
ui <- dashboardPage(
dashboardHeader(title = "Simple tabs"),
dashboardSidebar(
sidebarMenu(id = "tabs",
menuItem("Menu Item 1", tabName = "one", icon = icon("dashboard")),
menuItem("Menu Item 1", tabName = "two", icon = icon("th"))
)
),
dashboardBody(
tabItems(
tabItem(tabName = "one",h2("Dashboard tab content"),actionButton('switchtab', 'Switch tab')),
tabItem(tabName = "two",h2("Widgets tab content"))
)
)
)
server <- function(input, output, session) {
observeEvent(input$switchtab, {
newtab <- switch(input$tabs, "one" = "two","two" = "one")
updateTabItems(session, "tabs", newtab)
})
}
shinyApp(ui, server)
shiny::runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
runApp('/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz')
library("tidyverse")
library("biomaRt")
p1 <- readRDS("data/TARGET_AAML1031_0531_RBD_Dx_Relapse_TPMs_filt4dupGenes_with_cellLines_CD34posNBM_forShinyApp_PART1_12.27.2019.RDS")
library("tidyverse")
library("biomaRt")
# Reading in the counts data used by the Shiny app for plotting
p1 <- readRDS("TARGET_AAML1031_0531_RBD_Dx_Relapse_TPMs_filt4dupGenes_with_cellLines_CD34posNBM_forShinyApp_PART1_12.27.2019.RDS")
setwd("/Volumes/homes/meshinchi_s/workingDir/For_Soheil/Shiny_App/target-data-viz/data")
# Reading in the counts data used by the Shiny app for plotting
p1 <- readRDS("TARGET_AAML1031_0531_RBD_Dx_Relapse_TPMs_filt4dupGenes_with_cellLines_CD34posNBM_forShinyApp_PART1_12.27.2019.RDS")
p2 <- readRDS("TARGET_AAML1031_0531_RBD_Dx_Relapse_TPMs_filt4dupGenes_with_cellLines_CD34posNBM_forShinyApp_PART2_12.27.2019.RDS")
p3 <- readRDS("TARGET_AAML1031_0531_RBD_Dx_Relapse_TPMs_filt4dupGenes_with_cellLines_CD34posNBM_forShinyApp_PART3_12.27.2019.RDS")
p4 <- readRDS("TARGET_AAML1031_0531_RBD_Dx_Relapse_TPMs_filt4dupGenes_with_cellLines_CD34posNBM_forShinyApp_PART4_12.27.2019.RDS")
countsData <<- rbind(p1, p2, p3, p4)
################# Changing gene symbols to gene stable IDs to feed into biomaRt, otherwise it can return multiple entries from different assemblies for each gene
ids2symbols_conversions <- read.csv("/Volumes/homes/meshinchi_s/workingDir/TARGET/AML_TARGET/RNA/mRNAseq/analysis/0000.00.02_Reference_GeneInfo/GeneSymbol_Ensembl_ID_Conversion_GRCh37.69_FromBCCA.csv", header = TRUE, stringsAsFactors = F)
View(ids2symbols_conversions)
View(ids2symbols_conversions)
################# Changing gene symbols to gene stable IDs to feed into biomaRt, otherwise it can return multiple entries from different assemblies for each gene
ids2symbols_conversions <- read.csv("/Volumes/homes/meshinchi_s/workingDir/TARGET/AML_TARGET/RNA/mRNAseq/analysis/0000.00.02_Reference_GeneInfo/GeneSymbol_Ensembl_ID_Conversion_GRCh37.69_FromBCCA.csv", header = TRUE, stringsAsFactors = F) %>%
filter(geneSymbol %in% colnames(countsData))
View(ids2symbols_conversions)
################# Changing gene symbols to gene stable IDs to feed into biomaRt, otherwise it can return multiple entries from different assemblies for each gene
ids2symbols_conversions <- read.csv("/Volumes/homes/meshinchi_s/workingDir/TARGET/AML_TARGET/RNA/mRNAseq/analysis/0000.00.02_Reference_GeneInfo/GeneSymbol_Ensembl_ID_Conversion_GRCh37.69_FromBCCA.csv", header = TRUE, stringsAsFactors = F) %>%
filter(geneSymbol %in% rowames(countsData))
################# Changing gene symbols to gene stable IDs to feed into biomaRt, otherwise it can return multiple entries from different assemblies for each gene
ids2symbols_conversions <- read.csv("/Volumes/homes/meshinchi_s/workingDir/TARGET/AML_TARGET/RNA/mRNAseq/analysis/0000.00.02_Reference_GeneInfo/GeneSymbol_Ensembl_ID_Conversion_GRCh37.69_FromBCCA.csv", header = TRUE, stringsAsFactors = F) %>%
filter(geneSymbol %in% rownames(countsData))
View(ids2symbols_conversions)
rownames(countsData)
table(ids2symbols_conversions$geneSymbol %in% rownames(countsData))
# Uses vector of gene stable IDs to query Ensembl for further info about the gene & associated transcripts, proteins etc.
mart <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "uswest.ensembl.org")
results <- getBM(values = as.vector(gene_vector$geneStableID), filter = "ensembl_gene_id",
attributes = c("external_gene_name", "ensembl_gene_id", "tmhmm"),
mart = mart)
results <- getBM(values = ids2symbols_conversions$gene_id, filter = "ensembl_gene_id",
attributes = c("external_gene_name", "ensembl_gene_id", "tmhmm"),
mart = mart)
View(results)
View(results)
write.csv(results, "TMhelix_Prediction_Results_TMHMM_Ensembl_biomaRt_04.02.2020.csv", row.names = F)