diff --git a/R/data.R b/R/data.R index fc00cfa5..bdd96e81 100644 --- a/R/data.R +++ b/R/data.R @@ -1,4 +1,3 @@ - #' Unbalanced treatment and time sample list #' #' A sample list with 4 columns SampleName, Well, Time and Treatment @@ -86,3 +85,26 @@ #' \item{Sex}{Female (F) or Male (M) - a constraint which kind of animal may receive the respective treatment} #' } "invivo_study_treatments" + +#' Example dataset with a plate effect +#' +#' Here top and bottom row were both used as controls (in dilutions). The top +#' row however was affected differently than the bottom one. This makes +#' normalization virtually impossible. +#' +#' @docType data +#' @keywords datasets +#' @name plate_effect_example +#' @usage data(plate_effect_example) +#' @author Balazs Banfai +#' +#' @format An object of class \code{"tibble"} +#' \describe{ +#' \item{row}{Plate row} +#' \item{column}{Plate column} +#' \item{conc}{Sample concentration} +#' \item{log_conc}{Logarithm of sample concentration} +#' \item{treatment}{Sample treatment} +#' \item{readout}{Readout from experiment} +#' } +"plate_effect_example" diff --git a/data/plate_effect_example.rda b/data/plate_effect_example.rda new file mode 100644 index 00000000..7ae80008 Binary files /dev/null and b/data/plate_effect_example.rda differ diff --git a/man/plate_effect_example.Rd b/man/plate_effect_example.Rd new file mode 100644 index 00000000..0420eecf --- /dev/null +++ b/man/plate_effect_example.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{plate_effect_example} +\alias{plate_effect_example} +\title{Example dataset with a plate effect} +\format{ +An object of class \code{"tibble"} +\describe{ +\item{row}{Plate row} +\item{column}{Plate column} +\item{conc}{Sample concentration} +\item{log_conc}{Logarithm of sample concentration} +\item{treatment}{Sample treatment} +\item{readout}{Readout from experiment} +} +} +\usage{ +data(plate_effect_example) +} +\description{ +Here top and bottom row were both used as controls (in dilutions). The top +row however was affected differently than the bottom one. This makes +normalization virtually impossible. +} +\author{ +Balazs Banfai +} +\keyword{datasets} diff --git a/vignettes/NCS22_talk.Rmd b/vignettes/NCS22_talk.Rmd index adf608d9..373df995 100644 --- a/vignettes/NCS22_talk.Rmd +++ b/vignettes/NCS22_talk.Rmd @@ -26,25 +26,81 @@ library(tidyverse) # Introduction -Examples in this vignette are used in a talk at NCS22. +Examples in this vignette are used were used in our presentation. It uses a subset of the `longitudinal_subject_samples` dataset. ```{r get_data, include = TRUE} data("longitudinal_subject_samples") -dat <- longitudinal_subject_samples %>% - filter(Group %in% 1:5, Week %in% c(1,4)) %>% +dat <- longitudinal_subject_samples |> + filter(Group %in% 1:5, Week %in% c(1, 4)) |> select(SampleID, SubjectID, Group, Sex, Week) -# for simplicity: remove two subjects that don't have both visits -dat <- dat %>% - filter(SubjectID %in% (dat %>% count(SubjectID) %>% filter(n == 2) %>% .$SubjectID)) +# for simplicity: remove two subjects that don't have both visits +dat <- dat |> + filter(SubjectID %in% + (dat |> count(SubjectID) |> filter(n == 2) |> pull(SubjectID))) -subject_data <- dat %>% select(SubjectID, Group, Sex) %>% unique() +subject_data <- dat |> + select(SubjectID, Group, Sex) |> + unique() ``` +## Batch effects matter +Here's an example of plate effect. Here both top and bottom rows of the +plate are used as controls. + +This is the experiment design: + +```{r, fig.width= 4, fig.height=3, echo = FALSE} +data("plate_effect_example") +plate_effect_example |> + ggplot() + + aes(x = column, y = row, fill = treatment, alpha = log_conc) + + geom_tile() + + theme( + panel.grid.major = element_blank(), + panel.grid.minor = element_blank() + ) + + scale_y_discrete(limits = rev) + + scale_fill_brewer(palette = "Set1") + + # make transparency more visible + scale_alpha_continuous(range = c(0.2, 1)) + + ggtitle("Design") +``` + +These are the readouts: + +```{r, fig.width= 4, fig.height=5, echo = FALSE} +p1 <- plate_effect_example |> + ggplot() + + aes(x = column, y = row, fill = readout) + + geom_tile() + + theme( + panel.grid.major = element_blank(), + panel.grid.minor = element_blank() + ) + + scale_y_discrete(limits = rev) + + ggtitle("Readout") + +p2 <- plate_effect_example |> + filter(treatment == "control") |> + mutate(column = as.numeric(column)) |> + ggplot() + + aes(x = column, y = readout, color = row) + + geom_point() + + geom_line() + + scale_color_brewer(palette = "Set1") + + ggtitle("Control") + +cowplot::plot_grid(p1, p2, nrow = 2) +``` + +Due to the plate effect, the control rows are affected differently. It is +virtually impossible to normalize readouts in a meaningful way. + ## Go fully random? * Could it be sufficient to randomly distribute samples across batches? @@ -56,8 +112,8 @@ subject_data <- dat %>% select(SubjectID, Group, Sex) %>% unique() set.seed(17) # gives `bad` random assignment bc <- BatchContainer$new( - dimensions = list("batch" = 3, "location" = 11), -) %>% + dimensions = list("batch" = 3, "location" = 11) +) |> assign_random(subject_data) ``` @@ -65,8 +121,9 @@ Gone wrong: Random distribution of 31 grouped subjects into 3 batches turns out unbalanced: ```{r, fig.width= 3, fig.height=3, echo = FALSE} -bc$get_samples() %>% ggplot(aes(x = batch, fill = Group)) + - geom_bar() + +bc$get_samples() |> + ggplot(aes(x = batch, fill = Group)) + + geom_bar() + labs(y = "subject count") ``` @@ -110,8 +167,8 @@ set.seed(17) # gives `bad` random assignment ``` ```{r} bc <- BatchContainer$new( - dimensions = list("batch" = 3, "location" = 11), -) %>% + dimensions = list("batch" = 3, "location" = 11) +) |> assign_random(subject_data) ``` @@ -121,10 +178,12 @@ bc <- BatchContainer$new( ```{r, fig.width= 5.5, fig.height=3, echo = FALSE} cowplot::plot_grid( plotlist = list( - bc$get_samples() %>% ggplot(aes(x = batch, fill = Group)) + + bc$get_samples() |> + ggplot(aes(x = batch, fill = Group)) + geom_bar() + labs(y = "subject count"), - bc$get_samples() %>% ggplot(aes(x = batch, fill = Sex)) + + bc$get_samples() |> + ggplot(aes(x = batch, fill = Sex)) + geom_bar() + labs(y = "subject count") ), @@ -137,16 +196,23 @@ bc$get_samples() ``` ```{r, echo=FALSE} -bind_rows(head(bc$get_samples(), 3) %>% - mutate(across(everything(), as.character)), - tibble(batch = "...", - location = " ...", - SubjectID = "...", - Group = "...", Sex = "..."), - tail(bc$get_samples(), 3) %>% - mutate(across(everything(), as.character))) %>% - gt::gt() %>% gt::tab_options(table.font.size = 11, - data_row.padding = 0.1) +bind_rows( + head(bc$get_samples(), 3) |> + mutate(across(everything(), as.character)), + tibble( + batch = "...", + location = " ...", + SubjectID = "...", + Group = "...", Sex = "..." + ), + tail(bc$get_samples(), 3) |> + mutate(across(everything(), as.character)) +) |> + gt::gt() |> + gt::tab_options( + table.font.size = 11, + data_row.padding = 0.1 + ) ``` @@ -183,10 +249,12 @@ bc <- optimize_design( ```{r, fig.width= 8, fig.height=3, echo = FALSE} cowplot::plot_grid( plotlist = list( - bc$get_samples() %>% ggplot(aes(x = batch, fill = Group)) + + bc$get_samples() |> + ggplot(aes(x = batch, fill = Group)) + geom_bar() + labs(y = "subject count"), - bc$get_samples() %>% ggplot(aes(x = batch, fill = Sex)) + + bc$get_samples() |> + ggplot(aes(x = batch, fill = Sex)) + geom_bar() + labs(y = "subject count"), bc$plot_trace(include_aggregated = TRUE) @@ -197,16 +265,23 @@ cowplot::plot_grid( ```{r, echo=FALSE} -bind_rows(head(bc$get_samples(), 3) %>% - mutate(across(everything(), as.character)), - tibble(batch = "...", - location = " ...", - SubjectID = "...", - Group = "...", Sex = "..."), - tail(bc$get_samples(), 3) %>% - mutate(across(everything(), as.character))) %>% - gt::gt() %>% gt::tab_options(table.font.size = 11, - data_row.padding = 0.1) +bind_rows( + head(bc$get_samples(), 3) |> + mutate(across(everything(), as.character)), + tibble( + batch = "...", + location = " ...", + SubjectID = "...", + Group = "...", Sex = "..." + ), + tail(bc$get_samples(), 3) |> + mutate(across(everything(), as.character)) +) |> + gt::gt() |> + gt::tab_options( + table.font.size = 11, + data_row.padding = 0.1 + ) ``` @@ -235,41 +310,117 @@ of interest evenly across the plate and adjust for the effect computationally. * sex (lower priority) ```{r} -set.seed(1) #1 #2 +set.seed(4) bc <- BatchContainer$new( - dimensions = list("plate" = 3, "row" = 4, "col" = 6), -) %>% - assign_random(dat) -# assign_in_order(dat) + dimensions = list("plate" = 3, "row" = 4, "col" = 6) +) |> + assign_in_order(dat) ``` ```{r, fig.width= 5, fig.height=4.5, eval=FALSE} -plot_plate(bc, plate = plate, row = row, column = col, - .color = Group, title = "Initial layout by Group") -plot_plate(bc, plate = plate, row = row, column = col, - .color = Sex, title = "Initial layout by Sex") +plot_plate(bc, + plate = plate, row = row, column = col, + .color = Group, title = "Initial layout by Group" +) +plot_plate(bc, + plate = plate, row = row, column = col, + .color = Sex, title = "Initial layout by Sex" +) ``` ```{r, fig.width= 5, fig.height=4.5, echo=FALSE} cowplot::plot_grid( plotlist = list( - plot_plate(bc, plate = plate, row = row, column = col, - .color = Group, title = "Initial layout by Group"), - plot_plate(bc, plate = plate, row = row, column = col, - .color = Sex, title = "Initial layout by Sex") + plot_plate(bc, + plate = plate, row = row, column = col, + .color = Group, title = "Initial layout by Group" + ), + plot_plate(bc, + plate = plate, row = row, column = col, + .color = Sex, title = "Initial layout by Sex" + ) ), nrow = 2 ) ``` -**2-step optimization** `multi_plate_layout()` +## 2-step optimization -* Across plate optimization using osat score as before -* Within plate optimization using distance based sample scoring function +### Across plate optimization using osat score as before +```{r, warning=FALSE} +bc1 <- optimize_design( + bc, + scoring = list( + group = osat_score_generator( + batch_vars = "plate", + feature_vars = "Group" + ), + sex = osat_score_generator( + batch_vars = "plate", + feature_vars = "Sex" + ) + ), + n_shuffle = 1, + acceptance_func = + ~ accept_leftmost_improvement(..., tolerance = 0.01), + max_iter = 150, + quiet = TRUE +) +``` +```{r, fig.width= 5, fig.height=4.5, echo=FALSE} +cowplot::plot_grid( + plotlist = list( + plot_plate(bc1, + plate = plate, row = row, column = col, + .color = Group, title = "Layout after the first step, Group" + ), + plot_plate(bc1, + plate = plate, row = row, column = col, + .color = Sex, title = "Layout after the first step, Sex" + ) + ), + nrow = 2 +) +``` -## Spatial arrangement +### Within plate optimization using distance based sample scoring function +```{r, warning=FALSE} +bc2 <- optimize_design( + bc1, + scoring = mk_plate_scoring_functions( + bc1, + plate = "plate", row = "row", column = "col", + group = "Group" + ), + shuffle_proposal_func = shuffle_with_constraints(dst = plate == .src$plate), + max_iter = 150, + quiet = TRUE +) +``` + +```{r, fig.width= 5, fig.height=4.5, echo=FALSE} +cowplot::plot_grid( + plotlist = list( + plot_plate(bc2, + plate = plate, row = row, column = col, + .color = Group, title = "Layout after the second step, Group" + ), + plot_plate(bc2, + plate = plate, row = row, column = col, + .color = Sex, title = "Layout after the second step, Sex" + ) + ), + nrow = 2 +) +``` + + +## 2-step optimization `multi_plate_layout()` + +We are performing the same optimization as before, but using the +`multi_plate_layout()` function to combine the two steps. ```{r, warning=FALSE, message=FALSE} bc <- optimize_multi_plate_design( @@ -278,7 +429,7 @@ bc <- optimize_multi_plate_design( within_plate_variables = c("Group"), plate = "plate", row = "row", column = "col", n_shuffle = 2, - max_iter = 500 #2000 + max_iter = 500 # 2000 ) ``` @@ -286,10 +437,14 @@ bc <- optimize_multi_plate_design( ```{r, fig.width= 5, fig.height=4.5, echo=FALSE} cowplot::plot_grid( plotlist = list( - plot_plate(bc, plate = plate, row = row, column = col, - .color = Group, title = "Initial layout by Group"), - plot_plate(bc, plate = plate, row = row, column = col, - .color = Sex, title = "Initial layout by Sex") + plot_plate(bc, + plate = plate, row = row, column = col, + .color = Group, title = "After optimization, Group" + ), + plot_plate(bc, + plate = plate, row = row, column = col, + .color = Sex, title = "After optimization, Sex" + ) ), nrow = 2 ) @@ -340,13 +495,14 @@ see vignette `invivo_study_design` for the full story. ```{r, fig.width=4.0, fig.hight = 5.0, echo = FALSE} -layout <- crossing(row = 1:9, column = 1:12) %>% - mutate(Questions = "no") -layout$Questions[c(16, 17, 18, 19, 20, 21, - 27, 28, 33, 34, - 45, 46, - 55, 56, 66, 67, 90, 91)] <- "yes" +layout <- crossing(row = 1:9, column = 1:12) |> + mutate(Questions = "no") +layout$Questions[c( + 16, 17, 18, 19, 20, 21, + 27, 28, 33, 34, + 45, 46, + 55, 56, 66, 67, 90, 91 +)] <- "yes" plot_plate(layout, .color = Questions, title = "Thank you") - ```