Skip to content

Commit

Permalink
Merge pull request #86 from UMCUGenetics/release/DIMSv2.7.0
Browse files Browse the repository at this point in the history
Release/DIMSv2.7.0
  • Loading branch information
ALuesink authored Feb 29, 2024
2 parents 5cc5583 + ec8cc9e commit 78a5235
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 23 deletions.
35 changes: 31 additions & 4 deletions pipeline/scripts/15-dIEM_violin.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
# corresponding Z-scores.
# 2. All files from github: https://github.com/UMCUGenetics/dIEM

library(dplyr) # tidytable is for other_isobaric.R (left_join)

suppressPackageStartupMessages(library("dplyr")) # tidytable is for other_isobaric.R (left_join)
library(reshape2) # used in prepare_data.R
library(openxlsx) # for opening Excel file
library(ggplot2) # for plotting
library(gridExtra) # for table top highest/lowest
suppressPackageStartupMessages(library("gridExtra")) # for table top highest/lowest
library(stringr) # for Helix output


# load functions
source("/hpc/dbg_mz/production/DIMS/pipeline/scripts/AddOnFunctions/check_same_samplename.R")
Expand All @@ -20,6 +23,11 @@ source("/hpc/dbg_mz/production/DIMS/pipeline/scripts/AddOnFunctions/prepare_data
source("/hpc/dbg_mz/production/DIMS/pipeline/scripts/AddOnFunctions/prepare_toplist.R")
source("/hpc/dbg_mz/production/DIMS/pipeline/scripts/AddOnFunctions/create_violin_plots.R")
source("/hpc/dbg_mz/production/DIMS/pipeline/scripts/AddOnFunctions/prepare_alarmvalues.R")
source("/hpc/dbg_mz/production/DIMS/pipeline/scripts/AddOnFunctions/output_helix.R")
source("/hpc/dbg_mz/production/DIMS/pipeline/scripts/AddOnFunctions/get_patient_data_to_helix.R")
source("/hpc/dbg_mz/production/DIMS/pipeline/scripts/AddOnFunctions/add_lab_id_and_onderzoeksnummer.R")
source("/hpc/dbg_mz/production/DIMS/pipeline/scripts/AddOnFunctions/is_diagnostic_patient.R")


# define parameters - check after addition to run.sh
cmd_args <- commandArgs(trailingOnly = TRUE)
Expand Down Expand Up @@ -50,6 +58,7 @@ header_row <- 1
col_start <- "B"
zscore_cutoff <- 5
xaxis_cutoff <- 20
protocol_name <- "DIMS_PL_DIAG"

# path to DIMS excel file
path_DIMSfile <- paste0(outdir, "/", run_name, ".xlsx") # ${outdir} in run.sh
Expand All @@ -72,6 +81,7 @@ file_explanation <- "/hpc/dbg_mz/tools/Explanation_violin_plots.txt"
# copy list of isomers to project folder.
file.copy("/hpc/dbg_mz/tools/isomers.txt", outdir)


#### STEP 1: Preparation ####
# in: run_name, path_DIMSfile, header_row ||| out: output_dir, DIMS

Expand Down Expand Up @@ -318,7 +328,8 @@ if (algorithm == 1) {
}

#### STEP 5: Make violin plots #####
# in: algorithm / zscore_patients, violin, nr_contr, nr_pat, Data, path_textfiles, zscore_cutoff, xaxis_cutoff, top_diseases, top_metab, output_dir ||| out: pdf file
# in: algorithm / zscore_patients, violin, nr_contr, nr_pat, Data, path_textfiles, zscore_cutoff, xaxis_cutoff, top_diseases, top_metab, output_dir |||
# out: pdf file, Helix csv file

if (violin == 1) { # make violin plots

Expand Down Expand Up @@ -381,13 +392,29 @@ if (violin == 1) { # make violin plots
metab_interest_controls <- prepare_data(metab_list_all, zscore_controls)
metab_perpage <- prepare_data_perpage(metab_interest_sorted, metab_interest_controls, nr_plots_perpage, nr_pat, nr_contr)

# for Diagnostics metabolites to be saved in Helix
if(grepl("Diagnost", pdf_dir)) {
# get table that combines DIMS results with stofgroepen/Helix table
dims_helix_table <- get_patient_data_to_helix(metab_interest_sorted, metab_list_all)

# check if run contains Diagnostics patients (e.g. "P2024M"), not for research runs
if(any(is_diagnostic_patient(dims_helix_table$Patient))){
# get output file for Helix
output_helix <- output_for_helix(protocol_name, dims_helix_table)
# write output to file
path_helixfile <- paste0(outdir, "/output_Helix_", run_name,".csv")
write.csv(output_helix, path_helixfile, quote = F, row.names = F)
}
}


# make violin plots per patient
for (pt_nr in 1:length(patient_list)) {
pt_name <- patient_list[pt_nr]
# for category Diagnostics, make list of metabolites that exceed alarm values for this patient
# for category Other, make list of top highest and lowest Z-scores for this patient
if (grepl("Diagnost", pdf_dir)) {
top_metab_pt <- prepare_alarmvalues(pt_name, metab_interest_sorted)
top_metab_pt <- prepare_alarmvalues(pt_name, dims_helix_table)
# save(top_metab_pt, file=paste0(outdir, "/start_15_prepare_alarmvalues.RData"))
} else {
top_metab_pt <- prepare_toplist(pt_name, zscore_patients)
Expand Down
10 changes: 10 additions & 0 deletions pipeline/scripts/AddOnFunctions/add_lab_id_and_onderzoeksnummer.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
add_lab_id_and_onderzoeksnummer <- function(df_metabs_helix) {
# Split patient number into labnummer and Onderzoeksnummer
for (row in 1:nrow(df_metabs_helix)) {
df_metabs_helix[row,"labnummer"] <- gsub("^P|\\.[0-9]*", "", df_metabs_helix[row,"Patient"])
labnummer_split <- strsplit(as.character(df_metabs_helix[row, "labnummer"]), "M")[[1]]
df_metabs_helix[row, "Onderzoeksnummer"] <- paste0("MB", labnummer_split[1], "/", labnummer_split[2])
}

return(df_metabs_helix)
}
2 changes: 1 addition & 1 deletion pipeline/scripts/AddOnFunctions/create_violin_plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ create_violin_plots <- function(pdf_dir, pt_name, metab_perpage, top_metab_pt=NU
# patient plots, create the PDF device
pt_name_sub <- pt_name
suffix <- ""
if (grepl("Diagnostics", pdf_dir) & grepl("^P[0-9]{4}M", pt_name)) {
if (grepl("Diagnostics", pdf_dir) & is_diagnostic_patient(pt_name)) {
prefix <- "MB"
suffix <- "_DIMS_PL_DIAG"
# substitute P and M in P2020M00001 into right format for Helix
Expand Down
31 changes: 31 additions & 0 deletions pipeline/scripts/AddOnFunctions/get_patient_data_to_helix.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
get_patient_data_to_helix <- function(metab_interest_sorted, metab_list_all){
# Combine Z-scores of metab groups together
df_all_metabs_zscores <- bind_rows(metab_interest_sorted)
# Change columnnames
colnames(df_all_metabs_zscores) <- c("HMDB_name", "Patient", "Z_score")
# Change Patient column to character instead of factor
df_all_metabs_zscores$Patient <- as.character(df_all_metabs_zscores$Patient)

# Delete whitespaces HMDB_name
df_all_metabs_zscores$HMDB_name <- str_trim(df_all_metabs_zscores$HMDB_name, "right")

# Split HMDB_name column on "nitine;" for match dims_helix_table
df_all_metabs_zscores$HMDB_name_split <- str_split_fixed(df_all_metabs_zscores$HMDB_name, "nitine;", 2)[,1]

# Combine stofgroepen
dims_helix_table <- bind_rows(metab_list_all)
# Filter table for metabolites for Helix
dims_helix_table <- dims_helix_table %>% filter(Helix == "ja")
# Split HMDB_name column on "nitine;" for match df_all_metabs_zscores
dims_helix_table$HMDB_name_split <- str_split_fixed(dims_helix_table$HMDB_name, "nitine;", 2)[,1]
dims_helix_table <- dims_helix_table %>% select(HMDB_name_split, Helix_naam, high_zscore, low_zscore)

# Filter DIMS results for metabolites for Helix
df_metabs_helix <- df_all_metabs_zscores %>% filter(HMDB_name_split %in% dims_helix_table$HMDB_name_split)
# Combine dims_helix_table and df_metabs_helix, adding Helix codes etc.
df_metabs_helix <- df_metabs_helix %>% left_join(dims_helix_table, by = join_by(HMDB_name_split))

df_metabs_helix <- df_metabs_helix %>% select(HMDB_name, Patient, Z_score, Helix_naam, high_zscore, low_zscore)

return(df_metabs_helix)
}
6 changes: 6 additions & 0 deletions pipeline/scripts/AddOnFunctions/is_diagnostic_patient.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
is_diagnostic_patient <- function(patient_column){
# Check for Diagnostics patients with correct patientnumber (e.g. starting with "P2024M")
diagnostic_patients <- grepl("^P[0-9]{4}M",patient_column)

return(diagnostic_patients)
}
31 changes: 31 additions & 0 deletions pipeline/scripts/AddOnFunctions/output_helix.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
output_for_helix <- function(protocol_name, df_metabs_helix){

# Remove positive controls
df_metabs_helix <- df_metabs_helix %>% filter(is_diagnostic_patient(Patient))

# Add 'Vial' column, each patient has unique ID
df_metabs_helix <- df_metabs_helix %>%
group_by(Patient) %>%
mutate(Vial = cur_group_id()) %>%
ungroup()

# Split patient number into labnummer and Onderzoeksnummer
df_metabs_helix <- add_lab_id_and_onderzoeksnummer(df_metabs_helix)

# Add column with protocol name
df_metabs_helix$Protocol <- protocol_name

# Change name Z_score and Helix_naam columns to Amount and Name
change_columns <- c(Amount = "Z_score", Name = "Helix_naam")
df_metabs_helix <- df_metabs_helix %>% rename(all_of(change_columns))

# Select only necessary columns and set them in correct order
df_metabs_helix <- df_metabs_helix %>%
select(c(Vial, labnummer, Onderzoeksnummer, Protocol, Name, Amount))

# Remove duplicate patient-metabolite combinations ("leucine + isoleucine + allo-isoleucin_Z-score" is added 3 times)
df_metabs_helix <- df_metabs_helix %>%
group_by(Onderzoeksnummer, Name) %>% distinct() %>% ungroup()

return(df_metabs_helix)
}
56 changes: 38 additions & 18 deletions pipeline/scripts/AddOnFunctions/prepare_alarmvalues.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,43 @@
prepare_alarmvalues <- function(pt_name, metab_interest_sorted) {
# set parameters for table
high_zscore_cutoff <- 5
low_zscore_cutoff <- -3
prepare_alarmvalues <- function(pt_name, dims_helix_table) {
# extract data for patient of interest (pt_name)
pt_metabs_helix <- dims_helix_table %>% filter(Patient == pt_name)
pt_metabs_helix$Z_score <- round(pt_metabs_helix$Z_score, 2)

# Make empty dataframes for metabolites above or below alarmvalues
pt_list_high <- data.frame(HMDB_name = character(), Z_score = numeric())
pt_list_low <- data.frame(HMDB_name = character(), Z_score = numeric())

# make table of all metabolites
all_metab <- c()
for (page_nr in 1:length(metab_interest_sorted)) {
all_metab <- rbind(all_metab, metab_interest_sorted[[page_nr]])
# Loop over individual metabolites
for (metab in unique(pt_metabs_helix$HMDB_name)){
# Get data for individual metabolite
pt_metab <- pt_metabs_helix %>% filter(HMDB_name == metab)
# print(pt_metab)

# Check if zscore is positive of negative
if(pt_metab$Z_score > 0) {
# Get specific alarmvalue for metabolite
high_zscore_cutoff_metab <- pt_metabs_helix %>% filter(HMDB_name == metab) %>% pull(high_zscore)

# If zscore is above the alarmvalue, add to pt_list_high table
if(pt_metab$Z_score > high_zscore_cutoff_metab) {
pt_metab_high <- pt_metab %>% select(HMDB_name, Z_score)
pt_list_high <- rbind(pt_list_high, pt_metab_high)
}
} else {
# Get specific alarmvalue for metabolite
low_zscore_cutoff_metab <- pt_metabs_helix %>% filter(HMDB_name == metab) %>% pull(low_zscore)

# If zscore is below the alarmvalue, add to pt_list_low table
if(pt_metab$Z_score < low_zscore_cutoff_metab) {
pt_metab_low <- pt_metab %>% select(HMDB_name, Z_score)
pt_list_low <- rbind(pt_list_low, pt_metab_low)
}
}
}
# extract data for patient of interest (pt_name)
pt_list <- all_metab[which(all_metab$variable==pt_name), ]
# remove column with patient name
pt_list <- pt_list[ , -2]
# round off Z-scores
pt_list$value <- round(as.numeric(pt_list$value), 2)

# determine alarms for this patient: Z-score above 5 or below -3
pt_list_high <- pt_list[pt_list$value > high_zscore_cutoff, ]
pt_list_low <- pt_list[pt_list$value < low_zscore_cutoff, ]
# sort tables on zscore
pt_list_high <- pt_list_high %>% arrange(desc(Z_score))
pt_list_low <- pt_list_low %>% arrange(Z_score)
# add lines for increased, decreased
extra_line1 <- c("Increased", "")
extra_line2 <- c("Decreased", "")
Expand All @@ -27,6 +47,6 @@ prepare_alarmvalues <- function(pt_name, metab_interest_sorted) {
rownames(top_metab_pt) <- NULL
# change column names for display
colnames(top_metab_pt) <- c("Metabolite", "Z-score")

return(top_metab_pt)
}

0 comments on commit 78a5235

Please sign in to comment.