Skip to content

Commit

Permalink
Fix incompatibilities with more recent outputs (#3)
Browse files Browse the repository at this point in the history
* Fix reading CNV report

* Roxygenize

* Fix parse copy number report

---------

Co-authored-by: grst <[email protected]>
  • Loading branch information
grst and grst authored Mar 7, 2024
1 parent b085b74 commit 980e27b
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 23 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Authors@R:
Description: This package provides convenience functions for reading real-world evidence data provided by Personalis into Bioconductor MultiAssayExperiment objects.
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.0
RoxygenNote: 7.3.1
Depends:
SummarizedExperiment,
readxl,
Expand Down
61 changes: 39 additions & 22 deletions R/personalis.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ read_personalis_small_variant_reports <- function(sample_paths, modality, sample
row_data <- all_variants |>
select(
mut_id,
Sequence,
Chromosome,
POS,
`Variant Type`,
`Genomic Variant`
Expand Down Expand Up @@ -225,14 +225,17 @@ read_personalis_small_variant_report_sample <- function(sample_folder, modality,
guess_max = GUESS_MAX
) |>
mutate(sample = sample_name) |>
mutate(mut_id = sprintf("%s_%s_%s", Sequence, `Genomic Variant`, `Variant Type`))
# in older versions, the "Chromosome" column is called "Sequence"
rename_with(\(x) if_else(x == "Sequence", "Chromosome", x)) |>
mutate(mut_id = sprintf("%s_%s_%s", Chromosome, `Genomic Variant`, `Variant Type`))

variant_table
}

#'
#' @importFrom tidyr pivot_longer
#' @importFrom dplyr bind_rows
#' @importFrom purrr keep
#' @keywords internal
read_personalis_somatic_variants_summary_statistics <- function(sample_folder, modality, sample_type) {
stopifnot("`modality` must be one of 'DNA' or 'RNA'." = modality %in% c("DNA", "RNA"))
Expand All @@ -252,7 +255,9 @@ read_personalis_somatic_variants_summary_statistics <- function(sample_folder, m
html_elements("#somatic_variant_annotation") |>
html_elements("table") |>
html_table(na.strings = "N/A")
tables[2:3] |>
tables |>
# some reports contain two such tables, some only one
keep(\(x) "SNVs" %in% colnames(x)) |>
lapply(function(df) {
colnames(df) <- make.names(colnames(df))
colnames(df)[1] <- "metric"
Expand Down Expand Up @@ -291,12 +296,14 @@ read_personalis_cnv_reports <- function(sample_paths) {
return(NULL)
}

col_data <- bind_rows(map(cnv_list, "summary_stats")) |>
tibble::column_to_rownames("sample")
col_data <- bind_rows(map(cnv_list, "summary_stats"))
if (nrow(col_data)) {
col_data <- col_data |> tibble::column_to_rownames("sample")
}

all_cnv <- bind_rows(map(cnv_list, "cnv_report"))
row_data <- all_cnv |>
select(cnv_id, `Gene Symbol`, `Sequence`, `Segment Start`, `Segment End`) |>
select(cnv_id, `Gene Symbol`, `Chromosome`, `Segment Start`, `Segment End`) |>
distinct()
stopifnot("cnv_id is not a unique identifier" = !any(duplicated(row_data$cnv_id)))

Expand Down Expand Up @@ -339,11 +346,11 @@ read_personalis_cnv_report_sample <- function(sample_folder) {
"Gene Symbol" = as.character,
"CNA Type" = as.character,
"AbsoluteCN" = as.numeric,
"Sequence" = as.character,
"Chromosome" = as.character,
"Segment Start" = as.numeric,
"Segment End" = as.numeric,
"Estimated Sample purity" = as.numeric,
"Estimated Sample Ploidy" = as.numeric,
# "Estimated Sample purity" = as.numeric,
# "Estimated Sample Ploidy" = as.numeric,
"Percent of Gene in Event" = \(x) as.numeric(sub("%", "", x))
)
suppressWarnings({
Expand All @@ -353,9 +360,13 @@ read_personalis_cnv_report_sample <- function(sample_folder) {
# we also can't specify the columns at import time, because in some personalis versions, some columns
# are omitted.
amp = read_excel(cnv_file, sheet = "AMP", col_types = NULL) |>
# In older reports the "Chromosome" column is called sequence
rename_with(\(x) if_else(x == "Sequence", "Chromosome", x)) |>
select(-any_of(c("log posterior probability", "B-allele Frequency", "Allelotype", "Mean_log2Ratio"))) |>
mutate(across(names(COL_TYPES), \(x) COL_TYPES[[cur_column()]](x))),
del = read_excel(cnv_file, sheet = "DEL", col_types = NULL) |>
# In older reports the "Chromosome" column is called sequence
rename_with(\(x) if_else(x == "Sequence", "Chromosome", x)) |>
select(-any_of(c("Wilcoxon pvalue", "KS pvalue"))) |>
mutate(across(names(COL_TYPES), \(x) COL_TYPES[[cur_column()]](x)))
)
Expand All @@ -368,7 +379,7 @@ read_personalis_cnv_report_sample <- function(sample_folder) {
cnv_table <- cnv_table |>
mutate(sample = sample_name) |>
# if a segment spans multiple genes, there will be multiple rows per gene
mutate(cnv_id = sprintf("%s_%i_%i_%s", Sequence, `Segment Start`, `Segment End`, `Gene Symbol`))
mutate(cnv_id = sprintf("%s_%i_%i_%s", Chromosome, `Segment Start`, `Segment End`, `Gene Symbol`))
}

cnv_table
Expand All @@ -386,18 +397,24 @@ read_personalis_cnv_summary_statistics <- function(sample_folder) {
sprintf("DNA_%s_dna_statistics.html", sample_name)
)
# unfortunately, this is not a table, but a div of divs that looks like a table
table <- (read_html(html_file) |> html_elements("#copy_number"))[[1]]
titles <- table |>
html_nodes(".title") |>
html_text()
values <- table |>
html_nodes(".value") |>
html_text()

cnv_metrics <- tibble(metric = titles[1:5], value = values[1:5]) |>
mutate(sample = sample_name) |>
pivot_wider(id_cols = sample, names_from = metric, values_from = value)
cnv_metrics
table <- (read_html(html_file) |> html_elements("#copy_number"))
# unfortunately, it seems missing in newer versions of the report
if (!length(table)) {
return(tibble())
} else {
table <- table[[1]]
titles <- table |>
html_nodes(".title") |>
html_text()
values <- table |>
html_nodes(".value") |>
html_text()

cnv_metrics <- tibble(metric = titles[1:5], value = values[1:5]) |>
mutate(sample = sample_name) |>
pivot_wider(id_cols = sample, names_from = metric, values_from = value)
cnv_metrics
}
}


Expand Down

0 comments on commit 980e27b

Please sign in to comment.