Skip to content

Commit

Permalink
Merge pull request #17 from christopher-mohr/development
Browse files Browse the repository at this point in the history
Add functionality for parsing raw CNV data
  • Loading branch information
apeltzer authored Nov 13, 2023
2 parents 3a3f970 + 754dc23 commit 62003c8
Show file tree
Hide file tree
Showing 9 changed files with 205 additions and 1 deletion.
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export(add_amplification_data)
export(add_annotation_data)
export(add_common_theme_elements)
export(add_tmb_variant_data)
export(cnv)
export(cvo)
export(filter_consequences)
export(filter_depth)
Expand Down Expand Up @@ -53,6 +54,7 @@ export(process_and_filter_small_variant_data)
export(qualitymetrics)
export(read_analysis_status)
export(read_annotation_data)
export(read_cnv_data)
export(read_cvo_data)
export(read_dna_expanded_metrics)
export(read_dna_qc_metrics)
Expand All @@ -70,7 +72,13 @@ export(read_splice_variants)
export(read_tmb_details_data)
export(read_tmb_details_data_csv)
export(read_tmb_trace_data)
export(summarize_cnv_data)
export(tmb)
export(write_multiqc_data)
export(write_rdata_file)
export(write_workbook)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(stringr,str_split_i)
importFrom(vcfR,read.vcfR)
importFrom(vcfR,vcfR2tidy)
75 changes: 75 additions & 0 deletions R/cnv.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#' Read in a *CopyNumberVariants.vcf file and store as an object
#'
#' @description Read in a *CopyNumberVariants.vcf file
#'
#' @param cnv_file_path a file path to a *CopyNumberVariants.vcf file
#'
#' @return A cnv.output object
#'
#' @export
cnv <- function(cnv_file_path, local_app=FALSE){
new_cnv_output(cnv_file_path)
}

#' Constructor function for combined.cnv.output objects
#' Not to be called directly
#'
#' @param cnv_file_path a file path to a *CopyNumberVariants.vcf file
#' @param local_app specifies whether quality metrics are coming from local app
#'
#' @return A combined.cnv.output object
new_cnv_output <- function(cnv_file_path, local_app=FALSE) {

cnv_data = tibble(file = cnv_file_path) %>%
mutate(data = lapply(file, parse_vcf_to_df)) %>%
unnest(data) %>%
mutate(sample_id = str_replace(basename(file), "_CopyNumberVariants.vcf", "")) %>%
select(-file) %>%
relocate(sample_id)

return(structure(cnv_data, class = "combined.cnv.output"))
}

#' Read in a batch of *CopyNumberVariants.vcf files into a list of CNV objects
#'
#' @param cnv_directory a file path to a directory containing one of more *CopyNumberVariants.vcf files
#' @param local_app specifies whether quality metrics are coming from local app
#'
#' @return A named list of combined.cnv.output objects
#'
#' @export
read_cnv_data <- function(cnv_directory, local_app=FALSE){
cnv_files <- list.files(
path = cnv_directory,
pattern = "*CopyNumberVariants.vcf",
full.names = TRUE
)
cnv_data <- map(cnv_files, cnv, local_app) %>%
set_names(str_remove(basename(cnv_files), "\\.vcf$"))
cnv_data
}

#' Read in a batch of *CopyNumberVariants.vcf files into one dataframe
#'
#' @param tmb_directory a file path to a directory containing one of more
#' *tmb.json files
#'
#' @return A dataframe with the read CNV data
#'
#' @export
summarize_cnv_data <- function(cnv_directory){
cnv_files <- list.files(
path = cnv_directory,
pattern = "*cnv.vcf|*CopyNumberVariants.vcf",
full.names = TRUE
)

cnv_data = tibble(file = cnv_files) %>%
mutate(data = lapply(file, parse_vcf_to_df)) %>%
unnest(data) %>%
mutate(sample_id = str_replace(basename(file), "_CopyNumberVariants.vcf", "")) %>%
select(-file) %>%
relocate(sample_id)

cnv_data
}
3 changes: 2 additions & 1 deletion R/cvo.R
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ parse_cvo_record <- function(record_string){
stringr::str_split("\n") %>%
unlist() %>%
stringr::str_remove("\\t$") %>%
stringr::str_split("\\t")
stringr::str_split("\\t") %>%
rapply(., function(x) ifelse(x=="NA",NA,x), how = "replace") # replace all string NAs with NA to avoid warnings from as.numeric

if(stringr::str_detect(record_string, "TMB|MSI")){
record <- purrr::map(intermediate, ~ as.numeric(.x[2]))
Expand Down
28 changes: 28 additions & 0 deletions R/wrangle.R
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,34 @@ read_rna_expanded_metrics <- function(qmo_list){
return(rna_expanded_metrics)
}

#' Parse VCF files for a provided path and construct data frame.
#'
#' @param path path to VCF file in `*.vcf` or `*.vcf.gz` format
#' @return {tibble} new data frame with all variants (fixed field and genotype information)
#' @importFrom dplyr mutate left_join
#' @importFrom vcfR read.vcfR vcfR2tidy
#' @importFrom stringr str_split_i
parse_vcf_to_df <- function(path) {
# parse VCF file
vcf_content <- read.vcfR(path)

# fixed field content to data frame
fixed_df <- vcfR2tidy(vcf_content)$fix

# GT content to data frame
gt_df <- vcfR2tidy(vcf_content)$gt

# create addition column with observed nucleotides in order to avoid collisions when we do the left_join
#gt_df <- gt_df %>%
# dplyr::mutate(ALT = str_split_i(gt_GT_alleles, "/", 2))

# next use ChromKey, POS and ALT for joining vcf content data frames
joined_vcf_df <- fixed_df %>%
dplyr::left_join(gt_df, by = c("ChromKey", "POS"))

as_tibble(joined_vcf_df)
}

#' Process and filter small variant data-frame to requirements
#'
#' @description Processes small-variant data to comply with requirements for
Expand Down
17 changes: 17 additions & 0 deletions man/cnv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/new_cnv_output.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions man/parse_vcf_to_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/read_cnv_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions man/summarize_cnv_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 62003c8

Please sign in to comment.