Skip to content

Commit

Permalink
Fix reading CNV report
Browse files Browse the repository at this point in the history
grst committed Jan 26, 2024
1 parent b085b74 commit 8853dd7
Showing 1 changed file with 17 additions and 8 deletions.
25 changes: 17 additions & 8 deletions R/personalis.R
Original file line number Diff line number Diff line change
@@ -149,7 +149,7 @@ read_personalis_small_variant_reports <- function(sample_paths, modality, sample
row_data <- all_variants |>
select(
mut_id,
Sequence,
Chromosome,
POS,
`Variant Type`,
`Genomic Variant`
@@ -225,14 +225,17 @@ read_personalis_small_variant_report_sample <- function(sample_folder, modality,
guess_max = GUESS_MAX
) |>
mutate(sample = sample_name) |>
mutate(mut_id = sprintf("%s_%s_%s", Sequence, `Genomic Variant`, `Variant Type`))
# in older versions, the "Chromosome" column is called "Sequence"
rename_with(\(x) if_else(x == "Sequence", "Chromosome", x)) |>
mutate(mut_id = sprintf("%s_%s_%s", Chromosome, `Genomic Variant`, `Variant Type`))

variant_table
}

#'
#' @importFrom tidyr pivot_longer
#' @importFrom dplyr bind_rows
#' @importFrom purrr keep
#' @keywords internal
read_personalis_somatic_variants_summary_statistics <- function(sample_folder, modality, sample_type) {
stopifnot("`modality` must be one of 'DNA' or 'RNA'." = modality %in% c("DNA", "RNA"))
@@ -252,7 +255,9 @@ read_personalis_somatic_variants_summary_statistics <- function(sample_folder, m
html_elements("#somatic_variant_annotation") |>
html_elements("table") |>
html_table(na.strings = "N/A")
tables[2:3] |>
tables |>
# some reports contain two such tables, some only one
keep(\(x) colnames(x)[1] == "Summary Small Variants") |>
lapply(function(df) {
colnames(df) <- make.names(colnames(df))
colnames(df)[1] <- "metric"
@@ -296,7 +301,7 @@ read_personalis_cnv_reports <- function(sample_paths) {

all_cnv <- bind_rows(map(cnv_list, "cnv_report"))
row_data <- all_cnv |>
select(cnv_id, `Gene Symbol`, `Sequence`, `Segment Start`, `Segment End`) |>
select(cnv_id, `Gene Symbol`, `Chromosome`, `Segment Start`, `Segment End`) |>
distinct()
stopifnot("cnv_id is not a unique identifier" = !any(duplicated(row_data$cnv_id)))

@@ -339,11 +344,11 @@ read_personalis_cnv_report_sample <- function(sample_folder) {
"Gene Symbol" = as.character,
"CNA Type" = as.character,
"AbsoluteCN" = as.numeric,
"Sequence" = as.character,
"Chromosome" = as.character,
"Segment Start" = as.numeric,
"Segment End" = as.numeric,
"Estimated Sample purity" = as.numeric,
"Estimated Sample Ploidy" = as.numeric,
# "Estimated Sample purity" = as.numeric,
# "Estimated Sample Ploidy" = as.numeric,
"Percent of Gene in Event" = \(x) as.numeric(sub("%", "", x))
)
suppressWarnings({
@@ -353,9 +358,13 @@ read_personalis_cnv_report_sample <- function(sample_folder) {
# we also can't specify the columns at import time, because in some personalis versions, some columns
# are omitted.
amp = read_excel(cnv_file, sheet = "AMP", col_types = NULL) |>
# In older reports the "Chromosome" column is called sequence
rename_with(\(x) if_else(x == "Sequence", "Chromosome", x)) |>
select(-any_of(c("log posterior probability", "B-allele Frequency", "Allelotype", "Mean_log2Ratio"))) |>
mutate(across(names(COL_TYPES), \(x) COL_TYPES[[cur_column()]](x))),
del = read_excel(cnv_file, sheet = "DEL", col_types = NULL) |>
# In older reports the "Chromosome" column is called sequence
rename_with(\(x) if_else(x == "Sequence", "Chromosome", x)) |>
select(-any_of(c("Wilcoxon pvalue", "KS pvalue"))) |>
mutate(across(names(COL_TYPES), \(x) COL_TYPES[[cur_column()]](x)))
)
@@ -368,7 +377,7 @@ read_personalis_cnv_report_sample <- function(sample_folder) {
cnv_table <- cnv_table |>
mutate(sample = sample_name) |>
# if a segment spans multiple genes, there will be multiple rows per gene
mutate(cnv_id = sprintf("%s_%i_%i_%s", Sequence, `Segment Start`, `Segment End`, `Gene Symbol`))
mutate(cnv_id = sprintf("%s_%i_%i_%s", Chromosome, `Segment Start`, `Segment End`, `Gene Symbol`))
}

cnv_table

0 comments on commit 8853dd7

Please sign in to comment.