Skip to content

Commit

Permalink
Merge pull request #24 from ccb-hms/heatmap_add
Browse files Browse the repository at this point in the history
merge heatmap_add
  • Loading branch information
tram-nguyen-n authored Jan 22, 2025
2 parents a5a4014 + 5c6a220 commit d018b4f
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 69 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: ProteinGymR
Title: Programmatic access to ProteinGym datasets in R/Bioconductor
Version: 1.1.2
Version: 1.1.3
Authors@R:
c(person("Tram", "Nguyen",
role = c("aut", "cre"),
Expand Down Expand Up @@ -41,6 +41,7 @@ Imports:
tidyr,
tidyselect,
ComplexHeatmap,
circlize,
stringr
Suggests:
AnnotationHub,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(dms_substitutions)
export(plot_dms_heatmap)
export(zeroshot_DMS_metrics)
importFrom(ComplexHeatmap,Heatmap)
importFrom(ComplexHeatmap,anno_text)
importFrom(ComplexHeatmap,columnAnnotation)
importFrom(ExperimentHub,ExperimentHub)
importFrom(circlize,colorRamp2)
Expand Down Expand Up @@ -43,6 +44,7 @@ importFrom(ggplot2,theme)
importFrom(ggplot2,theme_classic)
importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(grid,gpar)
importFrom(purrr,keep)
importFrom(queryup,query_uniprot)
importFrom(spdl,info)
Expand Down
171 changes: 105 additions & 66 deletions R/plot_dms_heatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,23 @@ filter_by_pos <-
stop("The 'pos' column must be an integer vector.")
}

## If start_pos is NULL, default to the minimum value in "pos"
if (is.null(start_pos)) {
start_pos <- min(df$pos, na.rm = TRUE)
## Grab minimum and maximum values of the pos column
min_pos <- min(df$pos, na.rm = TRUE)
max_pos <- max(df$pos, na.rm = TRUE)

## Check if user-provided start_pos or end_pos is within the range
if (!is.null(start_pos) && (start_pos > max_pos)) {
stop(sprintf("start_pos (%d) is outside the assay range (%d to %d)",
start_pos, min_pos, max_pos))
}

## If end_pos is NULL, default to the maximum value in "pos"
if (is.null(end_pos)) {
end_pos <- max(df$pos, na.rm = TRUE)
if (!is.null(end_pos) && (end_pos < min_pos)) {
stop(sprintf("end_pos (%d) is outside the assay range (%d to %d)",
end_pos, min_pos, max_pos))
}

## If start or end is NULL, default to min or max "pos"
if (is.null(start_pos)) start_pos <- min_pos
if (is.null(end_pos)) end_pos <- max_pos

## Filter the dataframe based on the specified positions
filtered_df <- df |>
Expand All @@ -34,6 +42,51 @@ filter_by_pos <-
return(filtered_df)
}

#' @rdname plot_dms_heatmap
#'
#' @noRd
#'

filter_exact_coord <-
function(assay_pos, start_pos = NULL, end_pos = NULL, exact_coord = NULL)
{
if (missing(exact_coord)) {

message(paste(
"'exact_coord' not provided,",
"using only positions available in assay."
))

assay_pos

} else if (exact_coord == FALSE) {

assay_pos

} else if (exact_coord == TRUE) {

if (is.null(start_pos)) start_pos <- min(assay_pos$pos)
if (is.null(end_pos)) end_pos <- max(assay_pos$pos)

# Create a sequence of consecutive positions
all_pos <- seq(start_pos, end_pos)

# Merge with full sequence and fill missing values with NA
assay_pos <- merge(
data.frame(pos = all_pos),
assay_pos,
by = "pos",
all.x = TRUE
)

assay_pos

} else {

assay_pos
}
}

#' @rdname plot_dms_heatmap
#'
#' @title Visualize DMS Scores Along a Protein
Expand Down Expand Up @@ -91,14 +144,25 @@ filter_by_pos <-
#' dms_data = dms_data,
#' start_pos = 10,
#' end_pos = 80)
#'
#'
#' plot_dms_heatmap(assay_name = "A0A192B1T2_9HIV1_Haddox_2018",
#' dms_data = dms_data,
#' start_pos = 10,
#' end_pos = 80, exact_coord = TRUE)
#'
#' plot_dms_heatmap(assay_name = "A0A192B1T2_9HIV1_Haddox_2018",
#' dms_data = dms_data,
#' start_pos = 50,
#' end_pos = 100, cluster_rows = TRUE)
#'
#' @importFrom dplyr filter pull as_tibble rename_with mutate
#' arrange select
#'
#' @importFrom tidyr pivot_wider
#'
#' @importFrom ComplexHeatmap Heatmap columnAnnotation
#' @importFrom ComplexHeatmap Heatmap columnAnnotation anno_text
#'
#' @importFrom grid gpar
#'
#' @importFrom circlize colorRamp2
#'
Expand All @@ -109,11 +173,14 @@ plot_dms_heatmap <-
function(
assay_name,
dms_data,
start_pos,
end_pos,
start_pos = NULL,
end_pos = NULL,
exact_coord,
cluster_rows = FALSE,
cluster_columns = FALSE,
...)
{

## Extract the specified assay
assay_df <- dms_data[[assay_name]]

Expand Down Expand Up @@ -148,26 +215,19 @@ plot_dms_heatmap <-
arrange(pos)

## Subset to start_pos and end_pos, or default to first and last sites.
if (missing(start_pos)) {
if (is.null(start_pos)) {
message(paste(
"'start_pos' not provided,",
"using the first position in the protein."
))

if (missing(end_pos)) {
}

if (is.null(end_pos)) {
message(paste(
"'end_pos' not provided,",
"using the last data point in the protein."
"using the last position in the protein."
))
}

assay_pos <- filter_by_pos(
df = assay_wide)

ref_df <- filter_by_pos(
df = assay_df)

} else {
}

assay_pos <- filter_by_pos(
df = assay_wide,
Expand All @@ -180,50 +240,30 @@ plot_dms_heatmap <-
start_pos = start_pos,
end_pos = end_pos)

}

## exact_coord
if (missing(exact_coord)) {

message(paste(
"'exact_coord' not provided,",
"using only positions available in assay."
))

assay_pos

} else if (exact_coord == FALSE) {

assay_pos

} else if (exact_coord == TRUE) {

assay_pos

# Create a sequence of consecutive positions
all_pos <- seq(start_pos, end_pos)

# Merge with full sequence and fill missing values with NA
assay_pos <- merge(
data.frame(pos = all_pos),
assay_pos,
by = "pos",
all.x = TRUE
)

assay_pos

} else {

assay_pos

}
assay_pos <- filter_exact_coord(
assay_pos,
start_pos = start_pos,
end_pos = end_pos,
exact_coord = exact_coord
)

# Define a text annotation for the columns
column_annotation <- assay_pos |>
select(ref, pos) |>
unique()

## cluster_columns with NA check
if (sum(is.na(column_annotation)) > 0 & cluster_columns == TRUE){
stop("Protein range includes missing values, preventing ",
"clustering of columns. Try setting exact_coord argument ",
"to FALSE."
)
}

column_annotation[is.na(column_annotation)] <- " "


## Convert to matrix
pos <- assay_pos$pos
alt <- colnames(assay_pos)
Expand All @@ -250,7 +290,7 @@ plot_dms_heatmap <-
# Define a text annotation for the columns
column_annotation <- columnAnnotation(
text = anno_text(column_annotation$ref,
rot = 90, just = "right", gp = gpar(fontsize = 10))
rot = 0, just = "right", gp = gpar(fontsize = 10))
)

## Create the heatmap
Expand All @@ -262,12 +302,11 @@ plot_dms_heatmap <-

ComplexHeatmap::Heatmap(reordered_matrix,
name = "DMS Score",
cluster_rows = FALSE,
cluster_columns = FALSE,
show_row_names = TRUE,
cluster_rows = cluster_rows,
cluster_columns = cluster_columns,
col = col_fun,
na_col = "grey",
show_column_names = TRUE, top_annotation = column_annotation,
top_annotation = column_annotation,
...)

}
22 changes: 20 additions & 2 deletions man/plot_dms_heatmap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d018b4f

Please sign in to comment.