diff --git a/DESCRIPTION b/DESCRIPTION index b38c59f7..c0cca1e0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,16 +67,13 @@ Suggests: tidyverse, printr, devtools (>= 2.0.0), - gridpattern, ggpattern, cowplot, - bestNormalize + bestNormalize, + here Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 VignetteBuilder: knitr biocViews: -Remotes: - github::trevorld/gridpattern, - github::coolbutuseless/ggpattern diff --git a/NAMESPACE b/NAMESPACE index 2cc2cf16..e8a9d03c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,7 +6,6 @@ export(BatchContainer) export(BatchContainerDimension) export(L1_norm) export(L2s_norm) -export(OptimizationTrace) export(accept_leftmost_improvement) export(as_label) export(as_name) diff --git a/R/all_equal_df.R b/R/all_equal_df.R new file mode 100644 index 00000000..c52e2044 --- /dev/null +++ b/R/all_equal_df.R @@ -0,0 +1,45 @@ +#' Compare two data.frames. +#' +#' This will convert factors to characters and disregard +#' row and column order +#' +#' @param df1 first [data.frame()] to compare +#' @param df2 second `data.frame()` to compare +#' @return `TRUE` or `FALSE` in case differences are present +#' @keywords internal +all_equal_df <- function(df1, df2) { + if (!is.data.frame(df1) || !is.data.frame(df2)) { + return(FALSE) + } + + if (nrow(df1) != nrow(df2) || ncol(df1) != ncol(df2)) { + return(FALSE) + } + + assertthat::assert_that( + !any(duplicated(colnames(df1))), + !any(duplicated(colnames(df2))), + msg = "duplicated colnames" + ) + + df2 <- df2[colnames(df1)] + + # convert factors to characters + df1 <- df1 |> + dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.character)) + df2 <- df2 |> + dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.character)) + + # order by all columns + df1 <- df1[do.call(order, df1),] + df2 <- df2[do.call(order, df2),] + + # remove row names + rownames(df1) <- NULL + rownames(df2) <- NULL + + assertthat::are_equal( + all.equal(df1, df2, check.attributes = FALSE), + TRUE + ) +} diff --git a/R/assignment.R b/R/assignment.R index f52f0572..c9a44f54 100644 --- a/R/assignment.R +++ b/R/assignment.R @@ -4,16 +4,16 @@ #' @param samples data.frame with samples. #' @param batch_container Instance of BatchContainer class #' -#' @return Returns `BatchContainer`, invisibly. +#' @return Returns a new `BatchContainer`. #' @example man/examples/assignment.R assign_random <- function(batch_container, samples = NULL) { - assign_in_order(batch_container, samples) + batch_container <- assign_in_order(batch_container, samples) batch_container$move_samples( location_assignment = sample(batch_container$assignment) ) - invisible(batch_container) + batch_container } #' Distributes samples in order. @@ -25,9 +25,10 @@ assign_random <- function(batch_container, samples = NULL) { #' @param samples data.frame with samples. #' @param batch_container Instance of BatchContainer class #' -#' @return Returns `BatchContainer`, invisibly. +#' @return Returns a new `BatchContainer`. #' @example man/examples/assignment.R assign_in_order <- function(batch_container, samples = NULL) { + batch_container <- batch_container$copy() if (is.null(samples)) { assertthat::assert_that(batch_container$has_samples, msg = "batch-container is empty and no samples provided" @@ -46,7 +47,7 @@ assign_in_order <- function(batch_container, samples = NULL) { rep(NA_integer_, n_locations - n_samples) )) - invisible(batch_container) + batch_container } #' Shuffling proposal function with constraints. @@ -113,7 +114,7 @@ shuffle_with_constraints <- function(src = TRUE, dst = TRUE) { #' the function will check if samples in `batch_container` are identical to the ones in the #' `samples` argument. #' -#' @return Returns `BatchContainer`, invisibly. +#' @return Returns a new `BatchContainer`. #' #' @examples #' bc <- BatchContainer$new( @@ -133,11 +134,12 @@ shuffle_with_constraints <- function(src = TRUE, dst = TRUE) { #' 2, "a", 3, 5, "TRT", #' ) #' # assign samples from the sample sheet -#' assign_from_table(bc, sample_sheet) +#' bc <- assign_from_table(bc, sample_sheet) #' #' bc$get_samples(remove_empty_locations = TRUE) #' assign_from_table <- function(batch_container, samples) { + batch_container <- batch_container$copy() # sample sheet has all the batch variable assertthat::assert_that(is.data.frame(samples) && nrow(samples) > 0, msg = "samples should be non-empty data.frame" @@ -156,11 +158,9 @@ assign_from_table <- function(batch_container, samples) { if (is.null(batch_container$samples)) { batch_container$samples <- only_samples } else { - assertthat::assert_that(dplyr::all_equal(only_samples, - batch_container$get_samples(assignment = FALSE), - ignore_col_order = TRUE, - ignore_row_order = TRUE, - convert = TRUE + assertthat::assert_that(all_equal_df( + only_samples, + batch_container$get_samples(assignment = FALSE) ), msg = "sample sheet should be compatible with samples inside the batch container" ) @@ -177,5 +177,5 @@ assign_from_table <- function(batch_container, samples) { batch_container$move_samples(location_assignment = samples_with_id$.sample_id) - invisible(batch_container) + batch_container } diff --git a/R/batch_container.R b/R/batch_container.R index 19c0b763..f2983c03 100644 --- a/R/batch_container.R +++ b/R/batch_container.R @@ -328,23 +328,67 @@ BatchContainer <- R6::R6Class("BatchContainer", #' @description #' Score current sample assignment, - #' @return Returns a vector of all scoring functions values. - score = function() { - assertthat::assert_that(!is.null(private$scoring_funcs), - msg = "Scoring function needs to be assigned" + #' @param scoring a function or a names list of scoring functions. + #' Each function should return a numeric vector. + #' @return Returns a named vector of all scoring functions values. + score = function(scoring) { + assertthat::assert_that( + !missing(scoring), + !is.null(scoring), + msg = "Scoring function needs to be provided" ) - assertthat::assert_that(is.list(private$scoring_funcs), - length(private$scoring_funcs) >= 1, - msg = "Scroring function should be a non-empty list" + if (is.function(scoring)) { + scoring <- list(scoring) + } else { + assertthat::assert_that(is.list(scoring), length(scoring) >= 1) + assertthat::assert_that( + all(purrr::map_lgl(scoring, is.function)), + msg = "All elements of scoring should be functions" + ) + } + if (is.null(names(scoring))) { + names(scoring) <- stringr::str_c("score_", seq_along(scoring)) + } + assertthat::assert_that( + !any(names(scoring) == ""), + msg = "scoring cannot be a partially named list" ) - assertthat::assert_that(!is.null(private$samples_table), + assertthat::assert_that(is.list(scoring), + length(scoring) >= 1, + msg = "Scoring function should be a non-empty list" + ) + assertthat::assert_that(!is.null(names(scoring)), + msg = "scoring should be a named list" + ) + assertthat::assert_that(self$has_samples, msg = "No samples in the batch container, cannot compute score" ) - - res <- purrr::map_dbl(private$scoring_funcs, ~ .x(self)) - assertthat::assert_that(length(res) == length(private$scoring_funcs)) - - assertthat::assert_that(is.numeric(res), msg = "Scoring function should return a number") + res <- purrr::imap( + scoring, + \(f, i) { + v <- f(self) + assertthat::assert_that( + is.numeric(v), + length(v) >= 1, + msg = "scoring function should return a numeric vector of positive length" + ) + if (length(v) > 1) { + if (is.null(names(v))) { + names(v) <- seq_along(v) + } + names(v) <- stringr::str_c(i, names(v)) + } else { + names(v) <- i + } + v + } + ) |> + purrr::flatten_dbl() + assertthat::assert_that(length(res) >= length(scoring)) + assertthat::assert_that( + !any(names(res) == "step"), + msg = "score name cannot be 'step'" + ) return(res) }, @@ -368,7 +412,7 @@ BatchContainer <- R6::R6Class("BatchContainer", bc$samples_attr <- private$samples_attributes } - bc$scoring_f <- self$scoring_f + bc$trace <- self$trace bc }, @@ -398,6 +442,100 @@ BatchContainer <- R6::R6Class("BatchContainer", cat() cat("\n") invisible(self) + }, + + #' @field trace Optimization trace, a [tibble::tibble()] + trace = tibble::tibble( + optimization_index = numeric(), + call = list(), + start_assignment_vec = list(), + end_assignment_vec = list(), + scores = list(), + aggregated_scores = list(), + seed = list(), + elapsed = as.difftime(character(0)) + ), + + #' @description + #' Return a table with scores from an optimization. + #' + #' @param index optimization index, all by default + #' @param include_aggregated include aggregated scores + #' @return a [tibble::tibble()] with scores + scores_table = function(index = NULL, include_aggregated = FALSE) { + assertthat::assert_that( + tibble::is_tibble(self$trace), + nrow(self$trace) >= 1, + msg = "trace should be available" + ) + assertthat::assert_that(assertthat::is.flag(include_aggregated)) + if (is.null(index)) { + index <- self$trace$optimization_index + } + assertthat::assert_that( + rlang::is_integerish(index), + msg = "index should be an integer" + ) + d <- self$trace %>% + dplyr::filter(.data$optimization_index %in% index) %>% + dplyr::select(.data$optimization_index, .data$scores) %>% + tidyr::unnest(.data$scores) %>% + tidyr::pivot_longer(c(-.data$optimization_index, -.data$step), + names_to = "score", + values_to = "value") %>% + dplyr::mutate(aggregated = FALSE) + if (include_aggregated) { + d_agg <- self$trace %>% + dplyr::filter(.data$optimization_index %in% index) %>% + dplyr::select(.data$optimization_index, .data$aggregated_scores) %>% + tidyr::unnest(.data$aggregated_scores) + + if ("step" %in% colnames(d_agg)) { + # if no aggregated scores are provided (aggregated_scores=NULL), + # there will be no step column after unnesting + d_agg <- d_agg %>% + tidyr::pivot_longer(c(-.data$optimization_index, -.data$step), + names_to = "score", + values_to = "value") %>% + dplyr::mutate( + aggregated = TRUE, + score = paste0("agg.", .data$score) + ) + d <- dplyr::bind_rows( + d, + d_agg + ) + } + } + d + }, + + #' @description + #' Plot trace + #' @param index optimization index, all by default + #' @param include_aggregated include aggregated scores + #' @param ... not used. + #' @return a [ggplot2::ggplot()] object + plot_trace = function(index = NULL, include_aggregated = FALSE, ...) { + d <- self$scores_table(index, include_aggregated) %>% + dplyr::mutate( + agg_title = dplyr::if_else(.data$aggregated, "aggregated", "score") + ) + p <- ggplot2::ggplot(d) + + ggplot2::aes(.data$step, .data$value, group = .data$score, color = .data$score) + + ggplot2::geom_line() + + ggplot2::geom_point() + if (length(unique(d$optimization_index)) > 1) { + p <- p + + ggplot2::facet_wrap(~ optimization_index, scales = "free") + } else if (include_aggregated && any(d$aggregated)) { + p <- p + + ggplot2::facet_wrap(~ agg_title, scales = "free_y", ncol = 1) + } else { + p <- p + + ggplot2::facet_wrap(~ score, scales = "free_y", ncol = 1) + } + p } ), private = list( @@ -445,22 +583,7 @@ BatchContainer <- R6::R6Class("BatchContainer", #' Upon assignment a single function will be automatically converted to a list #' In the later case each function is called. scoring_f = function(value) { - if (missing(value)) { - private$scoring_funcs - } else { - if (is.null(value)) { - private$scoring_funcs <- NULL - } else if (is.function(value)) { - private$scoring_funcs <- list(value) - } else { - assertthat::assert_that(is.list(value), length(value) >= 1) - assertthat::assert_that( - all(purrr::map_lgl(self$scoring_f, is.function)), - msg = "All elements of scoring_f should be functions" - ) - private$scoring_funcs <- value - } - } + stop("scoring_f is deprecated, pass it to optimize_design() directly instead") }, #' @field has_samples diff --git a/R/optimize.R b/R/optimize.R index 2db24aa5..2ae7b8eb 100644 --- a/R/optimize.R +++ b/R/optimize.R @@ -123,8 +123,9 @@ update_batchcontainer <- function(bc, shuffle_params) { #' @param batch_container An instance of `BatchContainer`. #' @param samples A `data.frame` with sample information. #' Should be `NULL` if the `BatchContainer` already has samples in it. +#' @param scoring Scoring function or a named [list()] of scoring functions. #' @param n_shuffle Vector of length 1 or larger, defining how many random sample -#' swaps should be performed in each iteration. If length(n_shuffle)==1, +#' swaps should be performed in each iteration. If `length(n_shuffle)==1`, #' this sets no limit to the number of iterations. Otherwise, the optimization #' stops if the swapping protocol is exhausted. #' @param shuffle_proposal_func A user defined function to propose the next shuffling of samples. @@ -170,10 +171,14 @@ update_batchcontainer <- function(bc, shuffle_params) { #' bc <- BatchContainer$new( #' dimensions = c("plate" = 2, "column" = 5, "row" = 6) #' ) -#' bc$scoring_f <- osat_score_generator("plate", "Sex") -#' optimize_design(bc, invivo_study_samples, max_iter = 100) +#' bc <- optimize_design(bc, invivo_study_samples, +#' scoring = osat_score_generator("plate", "Sex"), +#' max_iter = 100 +#' ) #' plot_plate(bc$get_samples(), .col = Sex) -optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, +optimize_design <- function(batch_container, samples = NULL, + scoring = NULL, + n_shuffle = NULL, shuffle_proposal_func = NULL, acceptance_func = accept_strict_improvement, aggregate_scores_func = identity, @@ -182,10 +187,19 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, sample_attributes_fixed = FALSE, max_iter = 1e4, min_delta = NA, quiet = FALSE) { start_time <- Sys.time() + cl <- match.call() + + # create a copy, so that we do not modify the BatchContainer + batch_container <- batch_container$copy() + trace <- tibble::tibble( + optimization_index = max(batch_container$trace$optimization_index, 0) + 1, + call = list(cl), + start_assignment_vec = list(batch_container$assignment) + ) # based on https://stat.ethz.ch/pipermail/r-help/2007-September/141717.html if (!exists(".Random.seed")) stats::runif(1) - save_random_seed <- .Random.seed + trace$seed <- list(.Random.seed) if (is.null(samples)) { assertthat::assert_that(batch_container$has_samples, @@ -193,15 +207,14 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, ) } else { assertthat::assert_that(nrow(samples) > 0) - assign_in_order(batch_container, samples) + batch_container <- assign_in_order(batch_container, samples) } - # Check presence of scoring function and that it's a list of functions - assertthat::assert_that(!is.null(batch_container$scoring_f), msg = "no scoring function set for BatchContainer") - assertthat::assert_that(is.list(batch_container$scoring_f), msg = "scoring function is expected to be a list") - assertthat::assert_that(all(purrr::map_lgl(batch_container$scoring_f, is.function)), msg = "All scoring functions have to be function definitions") - + assertthat::assert_that( + !is.null(scoring), + msg = "Scoring should be provided when calling optimize_design()" + ) # Get assigned samples and locations from the batch container samp <- batch_container$get_samples(include_id = TRUE, assignment = TRUE, remove_empty_locations = FALSE) @@ -257,13 +270,14 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, best_shuffle <- list(src = NULL, dst = NULL, location_assignment = batch_container$assignment, samples_attr = NULL) } - initial_score <- batch_container$score() # Evaluate this just once in order not to break current tests + # Evaluate this just once in order not to break current tests + initial_score <- batch_container$score(scoring) score_dim <- length(initial_score) # Check score variances (should be all >0) if (check_score_variance) { bc_copy <- batch_container$copy() - score_vars <- random_score_variances(batch_container$copy(), random_perm = 100, sample_attributes_fixed) + score_vars <- random_score_variances(batch_container$copy(), scoring = scoring, random_perm = 100, sample_attributes_fixed) low_var_scores <- score_vars < 1e-10 if (!quiet) { message( @@ -298,6 +312,7 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, ) } autoscale_func <- mk_autoscale_function(batch_container$copy(), + scoring = scoring, random_perm = autoscaling_permutations, use_boxcox = autoscale_useboxcox, sample_attributes_fixed @@ -312,20 +327,23 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, prev_agg <- NULL - trace <- OptimizationTrace$new( - max_iter + 1, # + 1 to accommodate initial score - length(batch_container$scoring_f), - names(batch_container$scoring_f) + scores_mat <- matrix( + nrow = max_iter + 1, # + 1 to accommodate initial score + ncol = length(best_score), + dimnames = list(NULL, names(best_score)) ) + scores_mat[1,] <- best_score if (identical(aggregate_scores_func, identity)) { - # Do not store aggregated scores if unnecessary - trace$set_scores(1, best_score, NULL) + aggregated_scores_mat <- NULL } else { - trace$set_scores(1, best_score, best_agg) + aggregated_scores_mat <- matrix( + nrow = max_iter + 1, # + 1 to accommodate initial score + ncol = length(best_agg), + dimnames = list(NULL, names(best_agg)) + ) + aggregated_scores_mat[1,] <- best_agg } - # to do: make work with >1-dim agg, line should read as - # trace$set_scores(1, best_score, best_agg) if (!quiet) report_scores(best_score, best_agg, iteration = 0) @@ -338,7 +356,7 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, using_attributes <- TRUE } - new_score <- autoscale_func(batch_container$score()) + new_score <- autoscale_func(batch_container$score(scoring)) assertthat::assert_that(!any(is.na(new_score)), msg = stringr::str_c("NA apprearing during scoring in iteration ", iteration)) new_agg <- aggregate_scores_func(new_score) @@ -358,11 +376,9 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, } iteration <- iteration + 1 - if (identical(aggregate_scores_func, identity)) { - # Do not store aggregated scores if unnecessary - trace$set_scores(iteration, best_score, NULL) - } else { - trace$set_scores(iteration, best_score, best_agg) + scores_mat[iteration,] <- best_score + if (!is.null(aggregated_scores_mat)) { + aggregated_scores_mat[iteration,] <- best_agg } # Test stopping criteria @@ -384,8 +400,14 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, # In the end, always make sure that final state of bc is the one with the best score update_batchcontainer(batch_container, best_shuffle) - trace$shrink(iteration) - trace$seed <- save_random_seed + # shrink + trace$scores <- shrink_mat(scores_mat, iteration) + trace$aggregated_scores <- shrink_mat(aggregated_scores_mat, iteration) trace$elapsed <- Sys.time() - start_time - trace + trace$end_assignment_vec = list(batch_container$assignment) + batch_container$trace <- dplyr::bind_rows( + batch_container$trace, + trace + ) + batch_container } diff --git a/R/plot.R b/R/plot.R index 6f880a24..d1a9c4bc 100644 --- a/R/plot.R +++ b/R/plot.R @@ -78,7 +78,7 @@ plot_design <- function(.tbl, ..., .color, .alpha = NULL) { #' ) #' #' # assign samples from the sample sheet -#' assign_random(bc, samples = sample_sheet) +#' bc <- assign_random(bc, samples = sample_sheet) #' #' plot_plate(bc$get_samples(), #' plate = plate, column = column, row = row, diff --git a/R/score_autoscaling.R b/R/score_autoscaling.R index bdcf1dfc..8104d48c 100644 --- a/R/score_autoscaling.R +++ b/R/score_autoscaling.R @@ -1,20 +1,22 @@ #' Sample scores from a number of completely random sample permutations #' #' @param batch_container An instance of [BatchContainer]. +#' @param scoring A named [list()] of scoring function. Each function should +#' return a vector of non-zero length. #' @param random_perm Number of random sample permutations to be done. #' @param sample_attributes_fixed Logical; if `FALSE`, simulate a shuffle function that alters sample attributes at each iteration. #' #' @return A score matrix with n (# of permutations) rows and m (dimensionality of score) columns. #' #' @keywords internal -sample_random_scores <- function(batch_container, random_perm, sample_attributes_fixed) { - random_scores <- matrix(NA_real_, nrow = random_perm, ncol = length(batch_container$score())) +sample_random_scores <- function(batch_container, scoring, random_perm, sample_attributes_fixed) { + random_scores <- matrix(NA_real_, nrow = random_perm, ncol = length(batch_container$score(scoring))) for (i in seq_len(random_perm)) { batch_container$move_samples(location_assignment = complete_random_shuffling(batch_container)) if (!sample_attributes_fixed && batch_container$has_samples_attr) { batch_container$samples_attr <- batch_container$samples_attr[sample(nrow(batch_container$samples_attr)), ] } - random_scores[i, ] <- batch_container$score() + random_scores[i, ] <- batch_container$score(scoring) } random_scores @@ -24,6 +26,8 @@ sample_random_scores <- function(batch_container, random_perm, sample_attributes #' Create a function that transforms a current (multi-dimensional) score into a boxcox normalized one #' #' @param batch_container An instance of [BatchContainer]. +#' @param scoring A named [list()] of scoring function. Each function should +#' return a vector of non-zero length. #' @param random_perm Number of random sample permutations for the estimation of the scaling params. #' @param use_boxcox Logical; if TRUE and the `bestNormalize` package is available, boxcox transformations will be used to #' normalize individual scores. If not possible, scores will just be transformed to a zero mean and unit standard deviation. @@ -31,9 +35,9 @@ sample_random_scores <- function(batch_container, random_perm, sample_attributes #' #' @return The transformation function for a new score vector #' @keywords internal -mk_autoscale_function <- function(batch_container, random_perm, use_boxcox = TRUE, sample_attributes_fixed = FALSE) { - random_scores <- sample_random_scores(batch_container, random_perm, sample_attributes_fixed) - score_dim <- length(batch_container$score()) +mk_autoscale_function <- function(batch_container, scoring, random_perm, use_boxcox = TRUE, sample_attributes_fixed = FALSE) { + random_scores <- sample_random_scores(batch_container, scoring, random_perm, sample_attributes_fixed) + score_dim <- length(batch_container$score(scoring)) # Return function using boxcox transform if bestNormalize package is available if (use_boxcox && requireNamespace("bestNormalize", quietly = T)) { @@ -69,13 +73,15 @@ mk_autoscale_function <- function(batch_container, random_perm, use_boxcox = TRU #' Estimate the variance of individual scores by a series of completely random sample permutations #' #' @param batch_container An instance of `BatchContainer`. +#' @param scoring A named [list()] of scoring function. Each function should +#' return a vector of non-zero length. #' @param random_perm Number of random sample permutations to be done. #' @param sample_attributes_fixed Logical; if FALSE, simulate a shuffle function that alters sample attributes at each iteration. #' #' @return Vector of length m (=dimensionality of score) with estimated variances of each subscore #' #' @keywords internal -random_score_variances <- function(batch_container, random_perm, sample_attributes_fixed) { - random_scores <- sample_random_scores(batch_container, random_perm, sample_attributes_fixed) +random_score_variances <- function(batch_container, scoring, random_perm, sample_attributes_fixed) { + random_scores <- sample_random_scores(batch_container, scoring, random_perm, sample_attributes_fixed) purrr::map_dbl(asplit(random_scores, 2), stats::var, na.rm = T) } diff --git a/R/score_plates.R b/R/score_plates.R index 7cf1c50c..addea7e4 100644 --- a/R/score_plates.R +++ b/R/score_plates.R @@ -69,12 +69,12 @@ mk_dist_matrix <- function(plate_x = 12, plate_y = 8, dist = "minkowski", p = 2, #' bc <- BatchContainer$new( #' dimensions = c("column" = 6, "row" = 10) #' ) -#' assign_random(bc, invivo_study_samples) -#' bc$scoring_f <- mk_plate_scoring_functions( +#' bc <- assign_random(bc, invivo_study_samples) +#' scoring_f <- mk_plate_scoring_functions( #' bc, #' row = "row", column = "column", group = "Sex" #' ) -#' optimize_design(bc, max_iter = 100) +#' bc <- optimize_design(bc, scoring = scoring_f, max_iter = 100) #' plot_plate(bc$get_samples(), .col = Sex) #' mk_plate_scoring_functions <- function(batch_container, plate = NULL, row, column, group, p = 2, penalize_lines = "soft") { @@ -229,8 +229,6 @@ optimize_multi_plate_design <- function(batch_container, across_plates_variables msg = "All columns in 'within_plate_variable' argument have to be found in batch container samples." ) - traces <- list() - skip_osat <- is.null(across_plates_variables) || is.null(plate) || dplyr::n_distinct(bc$get_locations()[[plate]]) < 2 if (skip_osat && !quiet) message("\nNo balancing of variables across plates required...") @@ -239,25 +237,22 @@ optimize_multi_plate_design <- function(batch_container, across_plates_variables scoring_funcs <- purrr::map(across_plates_variables, ~ osat_score_generator(batch_vars = plate, feature_vars = .x)) %>% unlist() names(scoring_funcs) <- across_plates_variables - bc$scoring_f <- scoring_funcs if (!quiet) message("\nAssigning samples to plates...") - trace1 <- optimize_design(bc, + bc <- optimize_design(bc, + scoring = scoring_funcs, max_iter = max_iter, n_shuffle = n_shuffle, acceptance_func = accept_leftmost_improvement, quiet = TRUE ) - traces <- c(traces, osat_across_plates = trace1) } if (!is.null(within_plate_variables)) { - within_traces <- list() plate_levels <- unique(bc$get_locations()[[plate]]) scoring_funcs <- purrr::map(within_plate_variables, ~ mk_plate_scoring_functions(bc, plate = plate, row = row, column = column, group = .x)) %>% unlist() names(scoring_funcs) <- paste(rep(within_plate_variables, each = length(plate_levels)), names(scoring_funcs)) - bc$scoring_f <- scoring_funcs if (!quiet) { @@ -270,7 +265,8 @@ optimize_multi_plate_design <- function(batch_container, across_plates_variables for (curr_plate in plate_levels) { if (!quiet && length(plate_levels) > 1) cat(curr_plate, "... ") - trace2 <- optimize_design(bc, + bc <- optimize_design(bc, + scoring = scoring_funcs, max_iter = max_iter, quiet = TRUE, shuffle_proposal_func = mk_subgroup_shuffling_function( @@ -279,16 +275,13 @@ optimize_multi_plate_design <- function(batch_container, across_plates_variables ), acceptance_func = accept_leftmost_improvement ) - within_traces <- c(within_traces, trace2) } if (!quiet) cat("\n") - names(within_traces) <- paste0("within_plate_", plate_levels) - traces <- c(traces, within_traces) } if (skip_osat && is.null(within_plate_variables) && !quiet) { message("\nBoth across plates and within plate optimization skipped ('within_plate_variables' is empty).\nBatch container unchanged.\n") } - invisible(traces) + bc } diff --git a/R/shuffle_samples.R b/R/shuffle_samples.R index ef56deb4..ad94b7ef 100644 --- a/R/shuffle_samples.R +++ b/R/shuffle_samples.R @@ -86,9 +86,9 @@ mk_constant_swapping_function <- function(n_swaps, quiet = FALSE) { #' bc <- BatchContainer$new( #' dimensions = c("plate" = 2, "column" = 5, "row" = 6) #' ) -#' bc$scoring_f <- osat_score_generator("plate", "Sex") -#' optimize_design( -#' bc, invivo_study_samples, +#' scoring_f <- osat_score_generator("plate", "Sex") +#' bc <- optimize_design( +#' bc, scoring = scoring_f, invivo_study_samples, #' max_iter = 100, #' shuffle_proposal_func = complete_random_shuffling #' ) @@ -114,9 +114,9 @@ complete_random_shuffling <- function(batch_container, ...) { #' bc <- BatchContainer$new( #' dimensions = c("plate" = 2, "column" = 5, "row" = 6) #' ) -#' bc$scoring_f <- osat_score_generator("plate", "Sex") +#' scoring_f <- osat_score_generator("plate", "Sex") #' optimize_design( -#' bc, invivo_study_samples, +#' bc, scoring = scoring_f, invivo_study_samples, #' max_iter = 100, #' shuffle_proposal_func = mk_swapping_function(1) #' ) diff --git a/R/trace.R b/R/trace.R index b4a88dad..72fa6358 100644 --- a/R/trace.R +++ b/R/trace.R @@ -1,240 +1,3 @@ -#' @title -#' OptimizationTrace represents optimization trace. -#' -#' @description -#' Usually it is created by [optimize_design()]. -#' -#' @export -OptimizationTrace <- R6::R6Class("OptimizationTrace", - public = list( - #' @field scores - #' Contains a matrix of scores. The matrix size is usually - #' `c(iterations + 1, length(bc$scoring_f))` - scores = NULL, - - #' @field aggregated_scores - #' Contains a matrix of scores after aggregation. - #' The matrix size is usually `c(iterations + 1, length(aggregated))`, - #' where `length(aggregated)` is the length of aggregated scores vector. - #' Can be `NULL` if aggregated scores are not used. - aggregated_scores = NULL, - - #' @field seed - #' Saved value of [.Random.seed]. - seed = NULL, - - #' @field elapsed - #' Running time of the optimization. - elapsed = NULL, - - #' @field last_step - #' Last iteration step for which the score was set. - last_step = 0, - - #' @description - #' Create a new `OptimizationTrace` object. - #' - #' @param n_steps - #' Number of values to save. Usually `n_steps == iterations + 1`. - #' @param n_scores - #' Number of scoring functions. - #' @param score_names - #' Names of scoring functions. - #' - #' @examples - #' tr <- OptimizationTrace$new(10, 2, c("score1", "score2")) - initialize = function(n_steps, n_scores, score_names) { - self$scores <- matrix(NA_real_, nrow = n_steps, ncol = n_scores) - if (!is.null(score_names)) { - dimnames(self$scores) <- list(NULL, score_names) - } - }, - - #' @description - #' Set scores for i-th step. - #' - #' @param i - #' Step number. - #' @param scores - #' Scores, a vector or a value if no auxiliary functions are used. - #' @param aggregated_scores - #' Vector of aggregated scores. Can be NULL. - #' - #' @return `OptimizationTrace` invisibly. - #' - #' @examples - #' tr$set_scores(1, c(0.5, 0.5), NULL) - #' tr$set_scores(2, c(0.5, 0.5), NULL) - set_scores = function(i, scores, aggregated_scores) { - assertthat::assert_that( - assertthat::is.count(i), - is.vector(scores), - is.null(aggregated_scores) || is.vector(aggregated_scores) - ) - # initialize aggregated scores, in case they're empty - self$scores[i, ] <- scores - if (!is.null(aggregated_scores)) { - if (is.null(self$aggregated_scores)) { - self$aggregated_scores <- matrix( - NA_real_, - nrow = nrow(self$scores), - ncol = length(aggregated_scores) - ) - } - assertthat::assert_that( - length(aggregated_scores) == ncol(self$aggregated_scores) - ) - self$aggregated_scores[i, ] <- aggregated_scores - } - self$last_step <- i - invisible(self) - }, - - #' @description - #' Shrink scores by keeping only first `last_step` scores. - #' - #' @param last_step - #' Last step to keep. - #' - #' @return `OptimizationTrace` invisibly. - #' @examples - #' tr$shrink(2) - shrink = function(last_step = self$last_step) { - self$scores <- head(self$scores, last_step) - if (!is.null(self$aggregated_scores)) { - self$aggregated_scores <- head(self$aggregated_scores, last_step) - } - invisible(self) - }, - - #' @description - #' Return individual (not aggregated!) scores by keeping only first `last_step` scores. - #' - #' @param last_step - #' Last step to keep. - #' - #' @return `OptimizationTrace` invisibly. - #' @examples - #' tr$get_scores() - get_scores = function(last_step = self$last_step) { - head(self$scores, last_step) - }, - - #' @description - #' Print `OptimizationTrace`. - #' - #' @param ... - #' Unused. - #' - #' @return `OptimizationTrace` invisibly. - #' @examples - #' print(tr) - print = function(...) { - start_score <- self$scores[1, ] %>% - round(3) %>% - stringr::str_c(collapse = ",") - final_score <- tail(self$scores, 1) %>% - round(3) %>% - stringr::str_c(collapse = ",") - cat(stringr::str_glue("Optimization trace ({self$n_steps} score values, elapsed {format(self$elapsed)}).\n\n")) - cat(" Starting score: ", start_score, "\n", sep = "") - cat(" Final score : ", final_score, "\n", sep = "") - invisible(self) - }, - - #' @description - #' Convert to a [data.frame]. - #' - #' @param include_aggregated Include aggregated scores. Otherwise only - #' raw scores are exported. - #' - #' @return [data.frame] - #' @examples - #' tr$as_tibble() - as_tibble = function(include_aggregated = TRUE) { - scores <- make_colnames(self$scores, "score") %>% - tibble::as_tibble() %>% - dplyr::mutate(step = dplyr::row_number()) %>% - tidyr::pivot_longer( - c(-step), - names_to = "score", - values_to = "value" - ) %>% - dplyr::mutate(score = factor(score)) - if (include_aggregated) { - agg_scores <- self$aggregated_scores - } else { - agg_scores <- NULL - } - if (!is.null(agg_scores) && include_aggregated) { - colnames(agg_scores) <- stringr::str_c( - "agg.", seq_len(ncol(agg_scores)) - ) - agg_scores <- agg_scores %>% - tibble::as_tibble() %>% - dplyr::mutate(step = dplyr::row_number()) %>% - tidyr::pivot_longer( - c(-step), - names_to = "score", - values_to = "value" - ) %>% - dplyr::mutate(score = factor(score)) - } - dplyr::bind_rows( - score = scores, - aggregated = agg_scores, - .id = "type" - ) %>% - dplyr::mutate(type = factor(type, levels = c("score", "aggregated"))) - }, - - #' @description - #' Plot `OptimizationTrace`. Only the main score at the moment. - #' - #' @param include_aggregated Include aggregated scores. Otherwise only - #' raw scores are plotted. - #' @param ... - #' Not used. - #' - #' @examples - #' tr <- OptimizationTrace$new(10, 3, letters[1:3]) - #' for (i in seq_len(10)) { - #' tr$set_scores(i, rnorm(3)*(1:3), rnorm(3)*(1:3)) - #' } - #' - #' # plot only the main scores - #' plot(tr) - #' # plot main and aggregated scores - #' plot(tr, include_aggregated=TRUE) - plot = function(include_aggregated = FALSE, ...) { - p <- self$as_tibble(include_aggregated = include_aggregated) %>% - ggplot2::ggplot() + - ggplot2::aes(x = step, y = value, group = score, color = score) + - ggplot2::geom_point() + - ggplot2::geom_line() - - if (include_aggregated) { - p + - ggplot2::facet_wrap(~type, scales = "free_y", ncol = 1) - } else { - p + - ggplot2::facet_wrap(~score, scales = "free_y", ncol = 1) - } - } - ), - active = list( - #' @field n_steps - #' Returns number of steps in the `OptimizationTrace`. - n_steps = function(value) { - if (missing(value)) { - nrow(self$scores) - } else { - stop("Cannot set n_steps (read-only).") - } - } - ) -) - #' Make [matrix] column names unique. #' #' @param prefix Prefix to add if column names are empty. @@ -251,3 +14,20 @@ make_colnames <- function(m, prefix = "X") { colnames(m) <- make.names(colnames(m), unique = TRUE) m } + +#' Shrinks a matrix with scores and adds an iteration column. +#' +#' @param m input matrix +#' @param last_iteration last iteration +#' +#' @return a [tibble::tibble()] wrapped in a list +#' @keywords internal +shrink_mat <- function(m, last_iteration) { + if (is.null(m)) + return(m) + dplyr::bind_cols( + tibble::tibble(step=seq_len(last_iteration)), + as.data.frame(utils::head(m, last_iteration)) + ) %>% + list() +} diff --git a/README.Rmd b/README.Rmd index e5a3507c..5df79833 100644 --- a/README.Rmd +++ b/README.Rmd @@ -69,7 +69,7 @@ bc <- BatchContainer$new( # assign samples randomly set.seed(17) -assign_random(bc, subject_data) +bc <- assign_random(bc, subject_data) bc$get_samples() %>% ggplot() + @@ -82,7 +82,7 @@ Random assignmet of samples to batches produced an uneven distribution. ### Optimizing the assignemnt ```{r optimized_assignment, warning=FALSE} # set scoring functions -bc$scoring_f <- list( +scoring_f <- list( # first priority, groups are evenly distributed group = osat_score_generator(batch_vars = "batch", feature_vars = "Group"), @@ -91,7 +91,9 @@ bc$scoring_f <- list( feature_vars = "Sex") ) -trace <- optimize_design(bc, max_iter = 150, quiet = TRUE) +bc <- optimize_design( + bc, scoring = scoring_f, max_iter = 150, quiet = TRUE +) bc$get_samples() %>% ggplot() + @@ -99,7 +101,7 @@ bc$get_samples() %>% geom_bar() # show optimization trace -plot(trace) +bc$plot_trace() ``` ## Examples diff --git a/README.md b/README.md index bcfa081b..8be1fe49 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ bc <- BatchContainer$new( # assign samples randomly set.seed(17) -assign_random(bc, subject_data) +bc <- assign_random(bc, subject_data) bc$get_samples() %>% ggplot() + @@ -85,7 +85,7 @@ Random assignmet of samples to batches produced an uneven distribution. ``` r # set scoring functions -bc$scoring_f <- list( +scoring_f <- list( # first priority, groups are evenly distributed group = osat_score_generator(batch_vars = "batch", feature_vars = "Group"), @@ -94,7 +94,9 @@ bc$scoring_f <- list( feature_vars = "Sex") ) -trace <- optimize_design(bc, max_iter = 150, quiet = TRUE) +bc <- optimize_design( + bc, scoring = scoring_f, max_iter = 150, quiet = TRUE +) bc$get_samples() %>% ggplot() + @@ -107,7 +109,7 @@ bc$get_samples() %>% ``` r # show optimization trace -plot(trace) +bc$plot_trace() ``` ![](man/figures/README-optimized_assignment-2.png) diff --git a/man/BatchContainer.Rd b/man/BatchContainer.Rd index db4a1e6b..e5c32ac8 100644 --- a/man/BatchContainer.Rd +++ b/man/BatchContainer.Rd @@ -27,6 +27,13 @@ bc <- BatchContainer$new( bc } +\section{Public fields}{ +\if{html}{\out{
Females are exclusively used for treatment 2, as was specified in the treatment list.
- +Red diamonds mark the mean values for a specific sex within each treatment group.
- +The following plots show the organization of the cage rack, individual cages colored by different variables each time.
- +Finally, an overview plot illustrates the placement of animals in the cages. Notice the distinct earmarks within each cage, a ‘soft’ design constraint that could be achieved with the given solution.
- +
-set.seed(41)
+set.seed(41)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example1,
+ scoring = scoring_f,
max_iter = 300, # this is set to shorten vignette run-time based on known random seed, normally we don't know.
n_shuffle = 2,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
@@ -121,35 +120,33 @@ Example 1: An expensive way to construct a 4x4 latin square (one
#> Aggregated: 16
#> Achieved score: c(0, 0) at iteration 284
#> Aggregated: 0
-trace
-#> Optimization trace (301 score values, elapsed 21.10947 secs).
-#> Starting score: 48,0
-#> Final score : 0,0
+bc$trace$elapsed
+#> Time difference of 5.483629 secs
plot_plate(bc,
plate = plate, row = row, column = col, .color = Group,
title = "Ex1: Using OSAT scores for plate design\n(not the recommended way!)"
)
-
+
Now using a dedicated scoring for the group distances on a plate.
This should reliably lead to a nice symmetry-bearing latin square
design with only a one-dimensional score to look at.
-bc <- example1$copy()
-
-bc$scoring_f <- mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group")
+scoring_f <- mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group")
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example1,
+ scoring = scoring_f,
max_iter = 1000, # this is set to shorten vignette run-time based on random seed, normally we don't know.
n_shuffle = 2,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
quiet = TRUE
)
-trace$elapsed
-#> Time difference of 11.43023 secs
+bc$trace$elapsed
+#> Time difference of 6.182886 secs
plot_plate(bc,
plate = plate, row = row, column = col, .color = Group,
title = "Ex1: Using a dedicated plate scoring function:\nThis should show a latin square!"
)
-
+
diff --git a/vignettes/cached/_plate_scoring_ex2.Rmd b/vignettes/cached/_plate_scoring_ex2.Rmd
index cc47b1e0..b3cba4bc 100644
--- a/vignettes/cached/_plate_scoring_ex2.Rmd
+++ b/vignettes/cached/_plate_scoring_ex2.Rmd
@@ -1,7 +1,7 @@
---
title: "Plate scoring example 2"
output: html_fragment
-knit: (\(input, ...) rmarkdown::render(input, output_dir = "vignettes/cached"))
+knit: (\(input, ...) rmarkdown::render(input, output_dir = here::here("vignettes/cached")))
---
```{r, include = FALSE}
@@ -40,28 +40,28 @@ example2 <- BatchContainer$new(
)
# Add samples to container
-assign_in_order(example2, samples = tibble::tibble(
+example2 <- assign_in_order(example2, samples = tibble::tibble(
Group = c(rep(c("Grp 1", "Grp 2", "Grp 3", "Grp 4"), each = 8)),
ID = 1:32
))
-bc <- example2$copy()
-
-bc$scoring_f <- c(mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group"),
+scoring_f <- c(mk_plate_scoring_functions(example2, plate = "plate", row = "row", column = "col", group = "Group"),
osat_plate = osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
)
-plot_plate(bc,
+plot_plate(example2,
plate = plate, row = row, column = col, .color = Group,
title = "Ex2: Initial sample arrangement"
)
-bc$score()
+example2$score(scoring_f)
```
```{r}
set.seed(41)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example2,
+ scoring = scoring_f,
n_shuffle = c(rep(10, 10), rep(3, 90), rep(2, 100), rep(1, 1400)),
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)),
aggregate_scores_func = worst_score,
@@ -70,9 +70,9 @@ trace <- optimize_design(bc,
```
```{r}
-trace$elapsed
+bc$trace$elapsed
-bc$score()
+bc$score(scoring_f)
plot_plate(bc,
plate = plate, row = row, column = col, .color = Group,
@@ -92,33 +92,34 @@ permutation function which takes the plate structure into account and only
shuffles samples around within one plate.
```{r}
-# Setting up the batch container
-
-bc <- example2$copy()
-
-bc$scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
+scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
set.seed(42)
-optimize_design(bc,
+bc <- optimize_design(
+ example2,
+ scoring = scoring_f,
quiet = TRUE,
max_iter = 200, # this is set to shorten vignette run-time, normally we don't know.
n_shuffle = 2,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)),
)
+bc$trace$elapsed
plot_plate(bc,
plate = plate, row = row, column = col, .color = Group,
title = "Ex2: 'Plate wise' design\nStep 1: after allocating samples to plates"
)
-bc$scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")
+scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")
-bc$score()
+bc$score(scoring_f)
```
```{r}
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ bc,
+ scoring = scoring_f,
max_iter = 400,
shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate")),
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
@@ -128,9 +129,9 @@ trace <- optimize_design(bc,
```
```{r}
-trace
+bc$trace$elapsed
-bc$score()
+bc$score(scoring_f)
plot_plate(bc,
plate = plate, row = row, column = col, .color = Group,
@@ -153,16 +154,14 @@ happen first within plate 1, then within plate 2, so that the two scores can be
optimized in succeeding runs.
```{r}
-# Setting up the batch container
-
-bc <- example2$copy()
-
-bc$scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
+scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
```
```{r}
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example2,
+ scoring = scoring_f,
quiet = TRUE,
max_iter = 150, # this is set to shorten vignette run-time, normally we don't know.
n_shuffle = 2,
@@ -171,21 +170,23 @@ trace <- optimize_design(bc,
```
```{r}
-trace
+bc$trace$elapsed
plot_plate(bc,
plate = plate, row = row, column = col, .color = Group,
title = "Ex2: 'Serial plate' design\nStep 1: after allocating samples to plates"
)
-bc$scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")
+scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")
-bc$score()
+bc$score(scoring_f)
```
```{r}
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ bc,
+ scoring = scoring_f,
max_iter = 150,
quiet = TRUE,
shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate"), restrain_on_subgroup_levels = c(1)),
@@ -195,14 +196,16 @@ trace <- optimize_design(bc,
```
```{r}
-trace
+bc$trace$elapsed
-bc$score()
+bc$score(scoring_f)
```
```{r}
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ bc,
+ scoring = scoring_f,
max_iter = 550,
quiet = TRUE,
shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate"), restrain_on_subgroup_levels = c(2)),
@@ -212,9 +215,9 @@ trace <- optimize_design(bc,
```
```{r}
-trace
+bc$trace$elapsed
-bc$score()
+bc$score(scoring_f)
plot_plate(bc,
plate = plate, row = row, column = col, .color = Group,
diff --git a/vignettes/cached/_plate_scoring_ex2.html b/vignettes/cached/_plate_scoring_ex2.html
index a5299f2d..efcb9fbf 100644
--- a/vignettes/cached/_plate_scoring_ex2.html
+++ b/vignettes/cached/_plate_scoring_ex2.html
@@ -4,6 +4,14 @@
library(designit)
library(ggplot2)
library(dplyr)
+#>
+#> Attaching package: 'dplyr'
+#> The following objects are masked from 'package:stats':
+#>
+#> filter, lag
+#> The following objects are masked from 'package:base':
+#>
+#> intersect, setdiff, setequal, union
library(tidyr)
Example 2: Scoring two plates at once
@@ -26,37 +34,37 @@ Example 2: Scoring two plates at once
)
# Add samples to container
-assign_in_order(example2, samples = tibble::tibble(
+example2 <- assign_in_order(example2, samples = tibble::tibble(
Group = c(rep(c("Grp 1", "Grp 2", "Grp 3", "Grp 4"), each = 8)),
ID = 1:32
))
-bc <- example2$copy()
-
-bc$scoring_f <- c(mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group"),
+scoring_f <- c(mk_plate_scoring_functions(example2, plate = "plate", row = "row", column = "col", group = "Group"),
osat_plate = osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
)
-plot_plate(bc,
+plot_plate(example2,
plate = plate, row = row, column = col, .color = Group,
title = "Ex2: Initial sample arrangement"
)
-
+
-bc$score()
+example2$score(scoring_f)
#> Plate 1 Plate 2 osat_plate
#> 23.89265 23.89265 128.00000
set.seed(41)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example2,
+ scoring = scoring_f,
n_shuffle = c(rep(10, 10), rep(3, 90), rep(2, 100), rep(1, 1400)),
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)),
aggregate_scores_func = worst_score,
quiet = TRUE
)
-trace$elapsed
-#> Time difference of 25.73155 secs
+bc$trace$elapsed
+#> Time difference of 16.73602 secs
-bc$score()
+bc$score(scoring_f)
#> Plate 1 Plate 2 osat_plate
#> 6.127258 6.094080 0.000000
@@ -64,7 +72,7 @@ Example 2: Scoring two plates at once
plate = plate, row = row, column = col, .color = Group,
title = "Ex2: Design created by swapping samples 'globally' across the plates"
)
-
+
While this ‘global’ optimization is possible, it does probably not
converge to an (almost) ideal solution in an acceptable time if there
are more samples involved. This is due to a lot of unproductive sample
@@ -75,48 +83,46 @@
Example 2: Scoring two plates at once
the use of a dedicated sample permutation function which takes the plate
structure into account and only shuffles samples around within one
plate.
-# Setting up the batch container
-
-bc <- example2$copy()
-
-bc$scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
+scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
set.seed(42)
-optimize_design(bc,
+bc <- optimize_design(
+ example2,
+ scoring = scoring_f,
quiet = TRUE,
max_iter = 200, # this is set to shorten vignette run-time, normally we don't know.
n_shuffle = 2,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)),
)
-#> Optimization trace (201 score values, elapsed 4.254329 secs).
-#> Starting score: 128
-#> Final score : 0
+bc$trace$elapsed
+#> Time difference of 2.140794 secs
plot_plate(bc,
plate = plate, row = row, column = col, .color = Group,
title = "Ex2: 'Plate wise' design\nStep 1: after allocating samples to plates"
)
-
+
-bc$scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")
+scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")
-bc$score()
+bc$score(scoring_f)
#> Plate 1 Plate 2
#> 12.77527 13.63704
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ bc,
+ scoring = scoring_f,
max_iter = 400,
shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate")),
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
aggregate_scores_func = L2s_norm,
quiet = TRUE
)
-trace
-#> Optimization trace (401 score values, elapsed 3.579697 secs).
-#> Starting score: 12.775,13.637
-#> Final score : 6.855,6.309
+bc$trace$elapsed
+#> Time differences in secs
+#> [1] 2.140794 3.270934
-bc$score()
+bc$score(scoring_f)
#> Plate 1 Plate 2
#> 6.854748 6.309297
@@ -124,7 +130,7 @@ Example 2: Scoring two plates at once
plate = plate, row = row, column = col, .color = Group,
title = "Ex2: 'Plate wise' design\nStep 2: after arranging samples within plates"
)
-
+
In this case, the shuffling function exchanges 1 pair of sample
assignments every time (the default). However, any number of constant
swaps or a swapping protocol (formally a vector of integers) can be
@@ -137,64 +143,62 @@
Example 2: Scoring two plates at once
generates the permutations. It enforces permutation only to happen first
within plate 1, then within plate 2, so that the two scores can be
optimized in succeeding runs.
-# Setting up the batch container
-
-bc <- example2$copy()
-
-bc$scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
+scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example2,
+ scoring = scoring_f,
quiet = TRUE,
max_iter = 150, # this is set to shorten vignette run-time, normally we don't know.
n_shuffle = 2,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)),
)
-trace
-#> Optimization trace (151 score values, elapsed 4.056035 secs).
-#> Starting score: 128
-#> Final score : 0
+bc$trace$elapsed
+#> Time difference of 1.791624 secs
plot_plate(bc,
plate = plate, row = row, column = col, .color = Group,
title = "Ex2: 'Serial plate' design\nStep 1: after allocating samples to plates"
)
-
+
-bc$scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")
+scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")
-bc$score()
+bc$score(scoring_f)
#> Plate 1 Plate 2
#> 10.57482 26.16613
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ bc,
+ scoring = scoring_f,
max_iter = 150,
quiet = TRUE,
shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate"), restrain_on_subgroup_levels = c(1)),
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
aggregate_scores_func = L2s_norm
)
-trace
-#> Optimization trace (151 score values, elapsed 2.528134 secs).
-#> Starting score: 10.575,26.166
-#> Final score : 6.416,26.166
+bc$trace$elapsed
+#> Time differences in secs
+#> [1] 1.791624 1.591002
-bc$score()
+bc$score(scoring_f)
#> Plate 1 Plate 2
#> 6.416193 26.166134
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ bc,
+ scoring = scoring_f,
max_iter = 550,
quiet = TRUE,
shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate"), restrain_on_subgroup_levels = c(2)),
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
aggregate_scores_func = L2s_norm
)
-trace
-#> Optimization trace (551 score values, elapsed 2.93787 secs).
-#> Starting score: 6.416,26.166
-#> Final score : 6.416,6.582
+bc$trace$elapsed
+#> Time differences in secs
+#> [1] 1.791624 1.591002 4.226352
-bc$score()
+bc$score(scoring_f)
#> Plate 1 Plate 2
#> 6.416193 6.581966
@@ -202,5 +206,5 @@ Example 2: Scoring two plates at once
plate = plate, row = row, column = col, .color = Group,
title = "Ex2: 'Serial plate' design\nStep 2: after optimizing each plate in turn"
)
-
+
diff --git a/vignettes/cached/_plate_scoring_ex3.Rmd b/vignettes/cached/_plate_scoring_ex3.Rmd
index 8f5fdca6..872e9d9d 100644
--- a/vignettes/cached/_plate_scoring_ex3.Rmd
+++ b/vignettes/cached/_plate_scoring_ex3.Rmd
@@ -1,7 +1,7 @@
---
title: "Plate scoring example 3"
output: html_fragment
-knit: (\(input, ...) rmarkdown::render(input, output_dir = "vignettes/cached"))
+knit: (\(input, ...) rmarkdown::render(input, output_dir = here::here("vignettes/cached")))
---
```{r, include = FALSE}
@@ -51,7 +51,7 @@ example3 <- BatchContainer$new(
# Assign samples randomly to start from a better initial state
-assign_random(example3,
+example3 <- assign_random(example3,
samples = tibble::tibble(
Group = rep.int(c("Grp 1", "Grp 2", "Grp3"),
times = c(69, 30, 69)
@@ -60,14 +60,14 @@ assign_random(example3,
)
)
-bc <- example3$copy()
-
-bc$scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
+scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
```
```{r}
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example3,
+ scoring = scoring_f,
quiet = TRUE,
max_iter = 150,
n_shuffle = 2,
@@ -76,7 +76,7 @@ trace <- optimize_design(bc,
```
```{r}
-trace
+bc$trace$elapsed
```
```{r, fig.width=7, fig.height=3.5}
@@ -87,17 +87,19 @@ plot_plate(bc,
```
```{r}
-bc$scoring_f <- mk_plate_scoring_functions(bc,
+scoring_f <- mk_plate_scoring_functions(bc,
plate = "plate", row = "row",
column = "col", group = "Group"
)
-bc$score()
+bc$score(scoring_f)
```
```{r}
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ bc,
+ scoring = scoring_f,
max_iter = 300,
shuffle_proposal_func = mk_subgroup_shuffling_function(
subgroup_vars = c("plate"),
@@ -110,9 +112,9 @@ trace <- optimize_design(bc,
```
```{r}
-trace$elapsed
+bc$trace$elapsed
-bc$score()
+bc$score(scoring_f)
```
```{r, fig.width=7, fig.height=3.5}
diff --git a/vignettes/cached/_plate_scoring_ex3.html b/vignettes/cached/_plate_scoring_ex3.html
index 35a11141..40ed3f06 100644
--- a/vignettes/cached/_plate_scoring_ex3.html
+++ b/vignettes/cached/_plate_scoring_ex3.html
@@ -4,6 +4,14 @@
library(designit)
library(ggplot2)
library(dplyr)
+#>
+#> Attaching package: 'dplyr'
+#> The following objects are masked from 'package:stats':
+#>
+#> filter, lag
+#> The following objects are masked from 'package:base':
+#>
+#> intersect, setdiff, setequal, union
library(tidyr)
Example 3: 3 plates with different dimension and different sample
@@ -39,7 +47,7 @@ Example 3: 3 plates with different dimension and different sample
# Assign samples randomly to start from a better initial state
-assign_random(example3,
+example3 <- assign_random(example3,
samples = tibble::tibble(
Group = rep.int(c("Grp 1", "Grp 2", "Grp3"),
times = c(69, 30, 69)
@@ -48,35 +56,35 @@ Example 3: 3 plates with different dimension and different sample
)
)
-bc <- example3$copy()
-
-bc$scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
+scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example3,
+ scoring = scoring_f,
quiet = TRUE,
max_iter = 150,
n_shuffle = 2,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
)
-trace
-#> Optimization trace (151 score values, elapsed 3.348524 secs).
-#> Starting score: 17.714
-#> Final score : 1.429
+bc$trace$elapsed
+#> Time difference of 2.03003 secs
plot_plate(bc,
plate = plate, row = row, column = col, .color = Group,
title = "Ex3: Dealing with plates of different size\nStep 1: after distributing groups across plates"
)
-
-bc$scoring_f <- mk_plate_scoring_functions(bc,
+
+scoring_f <- mk_plate_scoring_functions(bc,
plate = "plate", row = "row",
column = "col", group = "Group"
)
-bc$score()
+bc$score(scoring_f)
#> Plate 1 Plate 2 Plate 3
-#> 9.706637 9.585770 10.419567
+#> 9.387071 10.302690 9.826243
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ bc,
+ scoring = scoring_f,
max_iter = 300,
shuffle_proposal_func = mk_subgroup_shuffling_function(
subgroup_vars = c("plate"),
@@ -86,15 +94,16 @@ Example 3: 3 plates with different dimension and different sample
aggregate_scores_func = L2s_norm,
quiet = TRUE
)
-trace$elapsed
-#> Time difference of 40.35472 secs
+bc$trace$elapsed
+#> Time differences in secs
+#> [1] 2.030030 4.778121
-bc$score()
+bc$score(scoring_f)
#> Plate 1 Plate 2 Plate 3
-#> 8.974408 8.253074 7.980756
+#> 8.809205 8.553802 8.185525
plot_plate(bc,
plate = plate, row = row, column = col, .color = Group,
title = "Ex3: Dealing with plates of different size\nStep 2: after swapping samples within plates"
)
-
+
diff --git a/vignettes/cached/_plate_scoring_ex4.Rmd b/vignettes/cached/_plate_scoring_ex4.Rmd
index c12c29bc..6ce2e1c5 100644
--- a/vignettes/cached/_plate_scoring_ex4.Rmd
+++ b/vignettes/cached/_plate_scoring_ex4.Rmd
@@ -1,7 +1,7 @@
---
title: "Plate scoring example 4"
output: html_fragment
-knit: (\(input, ...) rmarkdown::render(input, output_dir = "vignettes/cached"))
+knit: (\(input, ...) rmarkdown::render(input, output_dir = here::here("vignettes/cached")))
---
```{r, include = FALSE}
@@ -38,31 +38,31 @@ example4 <- BatchContainer$new(
# Assign samples randomly to start from lower score (avoid Inf values even since plate 3 will miss 2 groups initially :)
-assign_in_order(example4, samples = tibble::tibble(
+example4 <- assign_in_order(example4, samples = tibble::tibble(
Group = rep.int(c("Treatment 1", "Treatment 2"), times = c(10, 10)),
Sex = c(rep(c("M", "F", "F", "M"), times = 4), "M", NA, NA, "F"), ID = 1:20
))
-bc <- example4$copy()
-
cowplot::plot_grid(
- plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Initial layout by Group"),
- plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Initial layout by Sex"),
+ plot_plate(example4, plate = plate, row = row, column = col, .color = Group, title = "Initial layout by Group"),
+ plot_plate(example4, plate = plate, row = row, column = col, .color = Sex, title = "Initial layout by Sex"),
ncol = 2
)
-bc$scoring_f <- c(
- Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"),
- Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex")
+scoring_f <- c(
+ Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"),
+ Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex")
)
-bc$score()
+example4$score(scoring_f)
```
```{r}
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example4,
+ scoring = scoring_f,
max_iter = 750,
n_shuffle = 1,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 1)),
@@ -74,9 +74,9 @@ trace <- optimize_design(bc,
```
```{r, fig.width=7, fig.height=3.5}
-trace$elapsed
+bc$trace$elapsed
-bc$score()
+bc$score(scoring_f)
cowplot::plot_grid(
plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Final layout by Group"),
@@ -90,7 +90,9 @@ reference!
```{r}
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ bc,
+ scoring = scoring_f,
max_iter = 500,
n_shuffle = 1,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 1)),
@@ -103,7 +105,7 @@ trace <- optimize_design(bc,
```
```{r, fig.width=7, fig.height=3.5}
-bc$score()
+bc$score(scoring_f)
cowplot::plot_grid(
plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Final layout by Group"),
@@ -118,17 +120,17 @@ default acceptance function. We are strictly prioritizing the leftmost score in
addition to reflect relevance for the design.
```{r}
-bc <- example4$copy()
-
-bc$scoring_f <- c(
- Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"),
- Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex")
+scoring_f <- c(
+ Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"),
+ Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex")
)
-bc$score()
+example4$score(scoring_f)
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example4,
+ scoring = scoring_f,
max_iter = 5000,
n_shuffle = 1,
acceptance_func = accept_leftmost_improvement,
@@ -138,7 +140,7 @@ trace <- optimize_design(bc,
```
```{r, fig.width=7, fig.height=3.5}
-bc$score()
+bc$score(scoring_f)
cowplot::plot_grid(
plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Final layout by Group"),
@@ -151,17 +153,16 @@ Using a tolerance value to accept slightly worse solutions in the leftmost
relevant score if overcompensated by other scores:
```{r}
-bc <- example4$copy()
-
-
-bc$scoring_f <- c(
- Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"),
- Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex")
+scoring_f <- c(
+ Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"),
+ Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex")
)
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example4,
+ scoring = scoring_f,
max_iter = 5000,
n_shuffle = 1,
acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.1),
@@ -169,12 +170,10 @@ trace <- optimize_design(bc,
quiet = TRUE
)
-bc$score()
+bc$score(scoring_f)
```
```{r, fig.width=7, fig.height=3.5}
-bc$score()
-
cowplot::plot_grid(
plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Final layout by Group"),
plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Final layout by Sex"),
@@ -188,17 +187,17 @@ $\kappa^p$, $0 < \kappa < 1$ We choose a $\kappa$ of 0.5, i.e. the second
score's improvement counts half of that of the first one.
```{r}
-bc <- example4$copy()
-
-bc$scoring_f <- c(
- Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"),
- Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex")
+scoring_f <- c(
+ Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"),
+ Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex")
)
-bc$score()
+bc$score(scoring_f)
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example4,
+ scoring = scoring_f,
max_iter = 1000,
n_shuffle = 1,
acceptance_func = mk_exponentially_weighted_acceptance_func(kappa = 0.5, simulated_annealing = T),
@@ -208,7 +207,7 @@ trace <- optimize_design(bc,
```
```{r, fig.width=7, fig.height=3.5}
-bc$score()
+bc$score(scoring_f)
cowplot::plot_grid(
plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Final layout by Group"),
diff --git a/vignettes/cached/_plate_scoring_ex4.html b/vignettes/cached/_plate_scoring_ex4.html
index 61806644..d3c1472d 100644
--- a/vignettes/cached/_plate_scoring_ex4.html
+++ b/vignettes/cached/_plate_scoring_ex4.html
@@ -4,6 +4,14 @@
library(designit)
library(ggplot2)
library(dplyr)
+#>
+#> Attaching package: 'dplyr'
+#> The following objects are masked from 'package:stats':
+#>
+#> filter, lag
+#> The following objects are masked from 'package:base':
+#>
+#> intersect, setdiff, setequal, union
library(tidyr)
Example 4: More than one group factor to balance and empty plate
@@ -26,31 +34,31 @@ Example 4: More than one group factor to balance and empty plate
# Assign samples randomly to start from lower score (avoid Inf values even since plate 3 will miss 2 groups initially :)
-assign_in_order(example4, samples = tibble::tibble(
+example4 <- assign_in_order(example4, samples = tibble::tibble(
Group = rep.int(c("Treatment 1", "Treatment 2"), times = c(10, 10)),
Sex = c(rep(c("M", "F", "F", "M"), times = 4), "M", NA, NA, "F"), ID = 1:20
))
-bc <- example4$copy()
-
cowplot::plot_grid(
- plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Initial layout by Group"),
- plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Initial layout by Sex"),
+ plot_plate(example4, plate = plate, row = row, column = col, .color = Group, title = "Initial layout by Group"),
+ plot_plate(example4, plate = plate, row = row, column = col, .color = Sex, title = "Initial layout by Sex"),
ncol = 2
)
-
+
-bc$scoring_f <- c(
- Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"),
- Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex")
+scoring_f <- c(
+ Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"),
+ Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex")
)
-bc$score()
+example4$score(scoring_f)
#> Group.Plate Sex.Plate
#> 83.63858 239.20748
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example4,
+ scoring = scoring_f,
max_iter = 750,
n_shuffle = 1,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 1)),
@@ -59,10 +67,10 @@ Example 4: More than one group factor to balance and empty plate
},
quiet = TRUE
)
-trace$elapsed
-#> Time difference of 7.062086 secs
+bc$trace$elapsed
+#> Time difference of 6.251454 secs
-bc$score()
+bc$score(scoring_f)
#> Group.Plate Sex.Plate
#> 8.019656 7.608810
@@ -71,11 +79,13 @@ Example 4: More than one group factor to balance and empty plate
plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Final layout by Sex"),
ncol = 2
)
-
+
We do the same example with auto-scaling, weighted scoring and SA to
have a reference!
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ bc,
+ scoring = scoring_f,
max_iter = 500,
n_shuffle = 1,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 1)),
@@ -86,7 +96,7 @@ Example 4: More than one group factor to balance and empty plate
quiet = TRUE
)
#> ... Performing simple mean/stddev adjustment.
-bc$score()
+bc$score(scoring_f)
#> Group.Plate Sex.Plate
#> 8.080860 7.458345
@@ -95,25 +105,25 @@ Example 4: More than one group factor to balance and empty plate
plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Final layout by Sex"),
ncol = 2
)
-
+
We do the same example with auto-scaling and position-dependent
scoring now, not aggregating the score vector! This is more effective
even when using the default acceptance function. We are strictly
prioritizing the leftmost score in addition to reflect relevance for the
design.
-bc <- example4$copy()
-
-bc$scoring_f <- c(
- Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"),
- Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex")
+scoring_f <- c(
+ Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"),
+ Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex")
)
-bc$score()
+example4$score(scoring_f)
#> Group.Plate Sex.Plate
#> 83.63858 239.20748
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example4,
+ scoring = scoring_f,
max_iter = 5000,
n_shuffle = 1,
acceptance_func = accept_leftmost_improvement,
@@ -121,7 +131,7 @@ Example 4: More than one group factor to balance and empty plate
quiet = TRUE
)
#> ... Performing simple mean/stddev adjustment.
-bc$score()
+bc$score(scoring_f)
#> Group.Plate Sex.Plate
#> 7.619846 7.473524
@@ -130,20 +140,19 @@ Example 4: More than one group factor to balance and empty plate
plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Final layout by Sex"),
ncol = 2
)
-
+
Using a tolerance value to accept slightly worse solutions in the
leftmost relevant score if overcompensated by other scores:
-bc <- example4$copy()
-
-
-bc$scoring_f <- c(
- Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"),
- Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex")
+scoring_f <- c(
+ Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"),
+ Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex")
)
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example4,
+ scoring = scoring_f,
max_iter = 5000,
n_shuffle = 1,
acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.1),
@@ -152,37 +161,33 @@ Example 4: More than one group factor to balance and empty plate
)
#> ... Performing simple mean/stddev adjustment.
-bc$score()
+bc$score(scoring_f)
#> Group.Plate Sex.Plate
#> 7.366667 7.323324
-bc$score()
-#> Group.Plate Sex.Plate
-#> 7.366667 7.323324
-
-cowplot::plot_grid(
+cowplot::plot_grid(
plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Final layout by Group"),
plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Final layout by Sex"),
ncol = 2
)
-
+
Testing an alternative left-to-right weighing of scores, based on
exponential down-weighing of the respective score differences at
position \(p\) with factor \(\kappa^p\), \(0
< \kappa < 1\) We choose a \(\kappa\) of 0.5, i.e. the second score’s
improvement counts half of that of the first one.
-bc <- example4$copy()
-
-bc$scoring_f <- c(
- Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"),
- Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex")
+scoring_f <- c(
+ Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"),
+ Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex")
)
-bc$score()
+bc$score(scoring_f)
#> Group.Plate Sex.Plate
-#> 83.63858 239.20748
+#> 7.366667 7.323324
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example4,
+ scoring = scoring_f,
max_iter = 1000,
n_shuffle = 1,
acceptance_func = mk_exponentially_weighted_acceptance_func(kappa = 0.5, simulated_annealing = T),
@@ -190,7 +195,7 @@ Example 4: More than one group factor to balance and empty plate
quiet = TRUE
)
#> ... Performing simple mean/stddev adjustment.
-bc$score()
+bc$score(scoring_f)
#> Group.Plate Sex.Plate
#> 7.630367 7.616179
@@ -199,5 +204,5 @@ Example 4: More than one group factor to balance and empty plate
plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Final layout by Sex"),
ncol = 2
)
-
+
diff --git a/vignettes/cached/_plate_scoring_ex5.Rmd b/vignettes/cached/_plate_scoring_ex5.Rmd
index be4af9be..064d0571 100644
--- a/vignettes/cached/_plate_scoring_ex5.Rmd
+++ b/vignettes/cached/_plate_scoring_ex5.Rmd
@@ -1,7 +1,7 @@
---
title: "Plate scoring example 5"
output: html_fragment
-knit: (\(input, ...) rmarkdown::render(input, output_dir = "vignettes/cached"))
+knit: (\(input, ...) rmarkdown::render(input, output_dir = here::here("vignettes/cached")))
---
```{r, include = FALSE}
@@ -33,25 +33,25 @@ example5 <- BatchContainer$new(
)
# Assign samples randomly to start from lower score (avoid `Inf` values when doing the 'hard' penalization)
-assign_random(example5, samples = tibble::tibble(
+example5 <- assign_random(example5, samples = tibble::tibble(
Group = rep.int(paste("Group", 1:5), times = c(8, 8, 8, 8, 64)),
ID = 1:96
))
penalize_lines <- "hard"
-bc <- example5$copy()
-
-bc$scoring_f <- c(
- Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group", p = 2, penalize_lines = penalize_lines)
+scoring_f <- c(
+ Group = mk_plate_scoring_functions(example5, row = "row", column = "col", group = "Group", p = 2, penalize_lines = penalize_lines)
)
-bc$score()
+example5$score(scoring_f)
```
```{r}
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example5,
+ scoring = scoring_f,
max_iter = 5000,
n_shuffle = 1,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 500, alpha = 0.1)),
@@ -60,9 +60,9 @@ trace <- optimize_design(bc,
```
```{r, fig.width=7, fig.height=3.5}
-trace$elapsed
+bc$trace$elapsed
-bc$score()
+bc$score(scoring_f)
plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = stringr::str_c("Line penalization: ", penalize_lines))
```
diff --git a/vignettes/cached/_plate_scoring_ex5.html b/vignettes/cached/_plate_scoring_ex5.html
index 35874c9f..368fc058 100644
--- a/vignettes/cached/_plate_scoring_ex5.html
+++ b/vignettes/cached/_plate_scoring_ex5.html
@@ -4,6 +4,14 @@
library(designit)
library(ggplot2)
library(dplyr)
+#>
+#> Attaching package: 'dplyr'
+#> The following objects are masked from 'package:stats':
+#>
+#> filter, lag
+#> The following objects are masked from 'package:base':
+#>
+#> intersect, setdiff, setequal, union
library(tidyr)
Example 5: Avoiding ‘regular patterns’ in plate layout
@@ -21,36 +29,36 @@ Example 5: Avoiding ‘regular patterns’ in plate layout
)
# Assign samples randomly to start from lower score (avoid `Inf` values when doing the 'hard' penalization)
-assign_random(example5, samples = tibble::tibble(
+example5 <- assign_random(example5, samples = tibble::tibble(
Group = rep.int(paste("Group", 1:5), times = c(8, 8, 8, 8, 64)),
ID = 1:96
))
penalize_lines <- "hard"
-bc <- example5$copy()
-
-bc$scoring_f <- c(
- Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group", p = 2, penalize_lines = penalize_lines)
+scoring_f <- c(
+ Group = mk_plate_scoring_functions(example5, row = "row", column = "col", group = "Group", p = 2, penalize_lines = penalize_lines)
)
-bc$score()
+example5$score(scoring_f)
#> Group.Plate
-#> 9.960584
+#> 11.08608
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+ example5,
+ scoring = scoring_f,
max_iter = 5000,
n_shuffle = 1,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 500, alpha = 0.1)),
quiet = T
)
-trace$elapsed
-#> Time difference of 28.7293 secs
+bc$trace$elapsed
+#> Time difference of 29.47413 secs
-bc$score()
+bc$score(scoring_f)
#> Group.Plate
-#> 8.819968
+#> 8.785693
plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = stringr::str_c("Line penalization: ", penalize_lines))
-
+
diff --git a/vignettes/custom_shuffle.Rmd b/vignettes/custom_shuffle.Rmd
index 6d835657..1dec234f 100644
--- a/vignettes/custom_shuffle.Rmd
+++ b/vignettes/custom_shuffle.Rmd
@@ -55,8 +55,8 @@ We start by assigning samples randomly.
set.seed(42)
bc <- BatchContainer$new(
dimensions = c("cage" = 11, "position" = 5)
-)
-assign_random(bc, samples)
+) %>%
+ assign_random(samples)
bc
```
@@ -96,15 +96,14 @@ and `sex` interactions are considered in the scoring function. We only use 10 it
shuffling is limited to locations with males and enforces change of cage on every iteration.
```{r}
-bc$scoring_f <- osat_score_generator(
- "cage",
- "sex"
-)
-
set.seed(10)
-res <- optimize_design(
+bc <- optimize_design(
bc,
+ scoring = osat_score_generator(
+ "cage",
+ "sex"
+ ),
shuffle_proposal_func = shuffle_with_constraints(
sex == "M",
cage != .src$cage
@@ -112,7 +111,7 @@ res <- optimize_design(
max_iter = 10
)
-plot(res)
+bc$plot_trace()
```
We expect the distribution of males become even, while other variables are not significantly
affected.
@@ -129,7 +128,7 @@ locations. We also ensure that on every iteration the cage number is changed; we
`position` dimension does affect actual animal allocation.
```{r}
-bc$scoring_f <- function(bc) {
+scoring_f <- function(bc) {
samples <- bc$get_samples(include_id = TRUE, as_tibble = FALSE)
avg_w <- samples[, mean(weight, na.rm = TRUE)]
avg_w_per_cage <- samples[!is.na(weight), mean(weight), by = cage]$V1
@@ -141,7 +140,8 @@ bc$scoring_f <- function(bc) {
}
set.seed(12)
-res <- optimize_design(bc,
+bc <- optimize_design(bc,
+ scoring = scoring_f,
shuffle_proposal = shuffle_with_constraints(
sex == "F",
cage != .src$cage & (is.na(sex) | sex != "M")
@@ -149,8 +149,8 @@ res <- optimize_design(bc,
n_shuffle = c(rep(10, 20), rep(5, 20), rep(3, 20), rep(1, 140)),
max_iter = 200
)
-plot(res)
-bc$score()
+bc$plot_trace()
+scoring_f(bc)
```
Now we have a much more even distribution of weights and treatment/control balance.
diff --git a/vignettes/invivo_study_design.Rmd b/vignettes/invivo_study_design.Rmd
index d09375ef..fec8ac1f 100644
--- a/vignettes/invivo_study_design.Rmd
+++ b/vignettes/invivo_study_design.Rmd
@@ -215,7 +215,7 @@ InVivo_assignTreatments <- function(animal_list, treatments,
exclude = exclude_table
)
- assign_in_order(bc_treatment, ani_bclevels)
+ bc_treatment <- assign_in_order(bc_treatment, ani_bclevels)
if (!quiet_process) message("Constructing scoring functions:")
@@ -275,20 +275,20 @@ InVivo_assignTreatments <- function(animal_list, treatments,
}
assertthat::assert_that(length(scoring_functions) > 0, msg = "No variables for scoring found or all have only one level. Nothing to do.")
- bc_treatment$scoring_f <- scoring_functions
- bc_treatment$score()
+ bc_treatment$score(scoring_functions)
- trace <- optimize_design(
+ bc_treatment <- optimize_design(
bc_treatment,
+ scoring = scoring_functions,
n_shuffle = n_shuffle,
acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.1),
quiet = quiet_optimize
)
# Check if user given constraints (if provided) could be satisfied
- if ("trt_constraints" %in% names(bc_treatment$score())) {
- if (bc_treatment$score()[["trt_constraints"]] > 0) {
- message("CAUTION: User defined constraints could not be fully met (remaining score ", bc_treatment$score()[["trt_constraints"]], ")")
+ if ("trt_constraints" %in% names(bc_treatment$score(scoring_functions))) {
+ if (bc_treatment$score(scoring_functions)[["trt_constraints"]] > 0) {
+ message("CAUTION: User defined constraints could not be fully met (remaining score ", bc_treatment$score(scoring_functions)[["trt_constraints"]], ")")
} else {
if (!quiet_process) message("Success. User provided constraints could be fully met.")
}
@@ -325,7 +325,7 @@ Invivo_assignCages <- function(design_trt,
bc_cage <- BatchContainer$new(
dimensions = c("Dummy" = 1, ID = nrow(design_trt))
)
- assign_in_order(bc_cage, design_trt)
+ bc_cage <- assign_in_order(bc_cage, design_trt)
shuffle_proposal <- shuffle_grouped_data(bc_cage,
allocate_var = "Dummy",
@@ -372,15 +372,14 @@ Invivo_assignCages <- function(design_trt,
scoring_functions <- c(scoring_functions, sf)
}
- if (length(scoring_functions) > 0) {
- bc_cage$scoring_f <- scoring_functions
- } else {
+ if (length(scoring_functions) == 0) {
if (!quiet_process) message(" ... just a dummy score as there are no user provided balancing variables")
- bc_cage$scoring_f <- osat_score_generator(batch_vars = "Dummy", feature_vars = c("Treatment"))
+ scoring_functions <- osat_score_generator(batch_vars = "Dummy", feature_vars = c("Treatment"))
}
- trace <- optimize_design(
+ bc_cage <- optimize_design(
bc_cage,
+ scoring = scoring_functions,
shuffle_proposal_func = shuffle_proposal,
acceptance_func = accept_leftmost_improvement,
max_iter = maxiter,
@@ -436,7 +435,7 @@ Invivo_arrangeCages <- function(design_cage,
dimensions = c(Rack = nr_racks, CageRow = n_cage_x, CageCol = n_cage_y)
)
- assign_random(bc_rack, design_rack)
+ bc_rack <- assign_random(bc_rack, design_rack)
# Firstly, distribute variables across racks if necessary
if (nr_racks > 1) {
@@ -447,16 +446,18 @@ Invivo_arrangeCages <- function(design_cage,
)
}
- bc_rack$scoring_f <- list(across_rack = osat_score_generator(batch_vars = c("Rack"), feature_vars = distribute_cagerack_vars))
+ scoring_functions <- list(across_rack = osat_score_generator(batch_vars = c("Rack"), feature_vars = distribute_cagerack_vars))
- optimize_design(bc_rack,
+ bc_rack <- optimize_design(
+ bc_rack,
+ scoring = scoring_functions,
quiet = quiet_optimize,
min_score = 0, max_iter = 1e3,
n_shuffle = 2,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5))
)
- if (!quiet_process) message(" ... final score: ", bc_rack$score())
+ if (!quiet_process) message(" ... final score: ", bc_rack$score(scoring_f))
}
if (!quiet_process) {
@@ -475,13 +476,12 @@ Invivo_arrangeCages <- function(design_cage,
}
names(scoring_functions) <- stringr::str_c(names(scoring_functions), rep(distribute_cagerack_vars, each = nr_racks), sep = "_")
- bc_rack$scoring_f <- scoring_functions
-
for (i in 1:nr_racks) {
if (!quiet_process) message(" ... Rack ", i)
- trace <- optimize_design(
+ bc_rack <- optimize_design(
bc_rack,
+ scoring = scoring_functions,
shuffle_proposal_func = mk_subgroup_shuffling_function(
subgroup_vars = "Rack",
restrain_on_subgroup_levels = c(i),
@@ -495,7 +495,7 @@ Invivo_arrangeCages <- function(design_cage,
)
}
- if (!quiet_process) message(" ... final scores: ", paste(names(bc_rack$score()), round(bc_rack$score(), 2), sep = ": ", collapse = ", "))
+ if (!quiet_process) message(" ... final scores: ", paste(names(bc_rack$score(scoring_functions)), round(bc_rack$score(scoring_functions), 2), sep = ": ", collapse = ", "))
# Translate Rack numbers to some text output and assign CageNr
design_rack <- bc_rack$get_samples() %>%
diff --git a/vignettes/nested_dimensions_examples.Rmd b/vignettes/nested_dimensions_examples.Rmd
index ffa9226f..575c3965 100644
--- a/vignettes/nested_dimensions_examples.Rmd
+++ b/vignettes/nested_dimensions_examples.Rmd
@@ -54,13 +54,8 @@ bc <- BatchContainer$new(
)
)
-# Add samples to container
-bc$samples <- multi_trt_day_samples
# Initial random assignment
-assign_in_order(bc)
-# Set scoring function
-bc$scoring_f <- osat_score_generator(c("batch"), c("Treatment", "Time"))
-
+bc <- assign_in_order(bc, multi_trt_day_samples)
bc
```
@@ -73,10 +68,10 @@ n_iterations <- length(n_shuffle)
set.seed(42) # should we have conventions for this?
-# initial score
-# bc$score()
-trace <- optimize_design(
+scoring_f <- osat_score_generator(c("batch"), c("Treatment", "Time"))
+bc <- optimize_design(
bc,
+ scoring = scoring_f,
n_shuffle = n_shuffle,
max_iter = n_iterations
) # default is 10000
@@ -89,8 +84,10 @@ I practice you will have to run for a much higher number of iterations.
```{r, fig.width=5, fig.height= 4}
qplot(
- x = 1:trace$n_steps, y = trace$scores, color = factor(c(32, n_shuffle)),
- main = str_glue("Final score={bc$score()}"), geom = "point"
+ x = bc$trace$scores[[1]]$step,
+ y = bc$trace$scores[[1]]$score_1,
+ color = factor(c(32, n_shuffle)),
+ main = str_glue("Final score={bc$score(scoring_f)}"), geom = "point"
)
```
@@ -105,18 +102,16 @@ bc$get_samples(assignment = TRUE) %>%
# Repeat but use shuffle with contraints
```{r, fig.width=6, fig.height= 4}
-
# copy batch container for second optimization
-bc2 <- bc$copy()
-# Initial random assignment
-assign_in_order(bc2)
+bc2 <- assign_in_order(bc)
n_iterations <- 200
set.seed(42) # should we have conventions for this?
-trace2 <- optimize_design(
+bc2 <- optimize_design(
bc2,
+ scoring = scoring_f,
shuffle_proposal = shuffle_with_constraints(
src = TRUE,
# batch needs to change for shuffle to be accepted
@@ -126,8 +121,9 @@ trace2 <- optimize_design(
)
qplot(
- x = 1:trace$n_steps, y = trace2$scores, # color = factor(n_shuffle),
- main = str_glue("Final score={bc2$score()}"), geom = "point"
+ x = bc2$trace$scores[[1]]$step,
+ y = bc2$trace$scores[[1]]$score_1,
+ main = str_glue("Final score={bc2$score(scoring_f)}"), geom = "point"
)
bc2$get_samples(assignment = TRUE) %>%
@@ -147,16 +143,17 @@ For this we keep the optimized `batch` and now only optimize `run` with constrai
```{r}
n_iterations <- 100
-# assign new optimization function
-bc$scoring_f <- osat_score_generator(c("run"), c("Treatment", "Time"))
+# new optimization function
+scoring_f <- osat_score_generator(c("run"), c("Treatment", "Time"))
# like this the optimization score is wrong because it tries to optimize across Batches.
# Possible ways to go:
# - we'd need something like c("batch", batch/run") for optimize by batch and run within batch.
# - or we add "batch/run" to the constraints somehow.
-bc$score()
+bc$score(scoring_f)
-trace_run <- optimize_design(
+bc <- optimize_design(
bc,
+ scoring = scoring_f,
shuffle_proposal = shuffle_with_constraints(
src = TRUE,
# batch remains the same and run needs to change
@@ -168,8 +165,10 @@ trace_run <- optimize_design(
```{r, fig.width=6, fig.height= 4}
qplot(
- x = 1:trace_run$n_steps, y = trace_run$scores, color = factor(n_iterations),
- main = str_glue("Final score={bc$score()}"), geom = "point"
+ x = bc$trace$scores[[1]]$step,
+ y = bc$trace$scores[[1]]$score_1,
+ color = factor(n_iterations),
+ main = str_glue("Final score={bc$score(scoring_f)}"), geom = "point"
)
```
@@ -178,7 +177,6 @@ qplot(
This is not giving the expected mix of treatments across runs.
```{r, fig.width=6, fig.height= 4}
-
bc$get_samples() %>%
mutate(run = factor(run)) %>%
ggplot(aes(x = run, fill = Treatment, alpha = factor(Time))) +
diff --git a/vignettes/optimizer_examples.Rmd b/vignettes/optimizer_examples.Rmd
index c707c416..be3bf7de 100644
--- a/vignettes/optimizer_examples.Rmd
+++ b/vignettes/optimizer_examples.Rmd
@@ -17,6 +17,7 @@ knitr::opts_chunk$set(
```{r setup}
library(designit)
+library(magrittr)
```
@@ -60,12 +61,9 @@ bc <- BatchContainer$new(
run = 2, position = 5
),
exclude = tibble::tibble(batch = 4, run = c(1, 2), position = c(5, 5))
-)
-
-# Add samples to container
-assign_in_order(bc, samples = multi_trt_day_samples)
-# Set scoring function
-bc$scoring_f <- osat_score_generator(c("batch"), c("Treatment", "Time"))
+) %>%
+ # Add samples to container
+ assign_in_order(samples = multi_trt_day_samples)
bc
```
@@ -85,15 +83,15 @@ Optimization finishes after the list of permutations is exhausted.
```{r}
n_shuffle <- rep(c(32, 10, 5, 2, 1), c(20, 40, 40, 50, 50))
+scoring_f <- osat_score_generator(c("batch"), c("Treatment", "Time"))
-bc1 <- bc$copy()
-
-trace1 <- optimize_design(
- bc1,
+bc1 <- optimize_design(
+ bc,
+ scoring = scoring_f,
n_shuffle = n_shuffle # will implicitly generate a shuffling function according to the provided schedule
)
-trace1$elapsed
+bc1$trace$elapsed
```
## Optimization trace
@@ -101,14 +99,27 @@ trace1$elapsed
Custom plot with some colours:
```{r, fig.width=5, fig.height= 4}
-ggplot2::qplot(x = seq_along(trace1$scores), y = trace1$scores, color = factor(n_shuffle)[1:length(trace1$scores)], geom = "point") +
- ggplot2::labs(title = "Score 1 tracing", subtitle = stringr::str_glue("Final score = {bc1$score()}"), x = "Iteration", y = "Score", color = "n_shuffle")
+bc1$scores_table() %>%
+ dplyr::mutate(
+ n_shuffle = c(NA, n_shuffle)
+ ) %>%
+ ggplot2::ggplot(
+ ggplot2::aes(step, value, color = factor(n_shuffle))
+ ) +
+ ggplot2::geom_point() +
+ ggplot2::labs(
+ title = "Score 1 tracing",
+ subtitle = stringr::str_glue("Final score = {bc1$score(scoring_f)}"),
+ x = "Iteration",
+ y = "Score",
+ color = "n_shuffle"
+ )
```
Using the internal method...
```{r, fig.width=5, fig.height= 4}
-trace1$plot()
+bc1$plot_trace()
```
We may safely apply the batch container methods get_samples() and score() also
@@ -116,7 +127,7 @@ after using the new optimization code.
## Final batch layout
```{r, fig.width=6, fig.height=5}
-bc1$score()
+bc1$score(scoring_f)
bc1$get_samples(assignment = TRUE) %>%
dplyr::filter(!is.na(Treatment)) %>%
@@ -136,8 +147,9 @@ immediately on the same batch container.
```{r}
n_shuffle <- rep(c(5, 2, 1), c(30, 30, 30))
-optimize_design(
+bc1 <- optimize_design(
bc1,
+ scoring = scoring_f,
n_shuffle = n_shuffle
)
```
@@ -154,10 +166,9 @@ reaching a specific minimum delta threshold (score improvement from one selected
solution to the next).
```{r}
-bc2 <- bc$copy()
-
-optimize_design(
- bc2,
+bc2 <- optimize_design(
+ bc,
+ scoring = scoring_f,
n_shuffle = 3, # will implicitly generate a shuffling function that will do 3 swaps at each iteration
max_iter = 2000,
min_delta = 0.1
@@ -168,7 +179,7 @@ optimize_design(
# Optimization with multi-variate scoring function
Instead of passing a single scoring function, a list of multiple scoring
-functions can be assigned to a batch container, each of which to return a scalar
+functions can be passed to the optimizer, each of which to return a scalar
value on evaluation.
By default, a strict improvement rule is applied for classifying a potential
@@ -182,16 +193,15 @@ The second scoring function used here is by the way rather redundant and just
serves for illustration.
```{r}
-bc3 <- bc$copy()
-
-bc3$scoring_f <- list(
+multi_scoring_f <- list(
osat_score_generator(c("batch"), c("Treatment", "Time")),
osat_score_generator(c("batch"), c("Treatment"))
)
-trace <- optimize_design(
- bc3,
+bc3 <- optimize_design(
+ bc,
+ scoring = multi_scoring_f,
n_shuffle = 3,
max_iter = 200,
min_delta = 0.1
@@ -216,16 +226,9 @@ We may also want to decrease the delta_min parameter to match the new numerical
range.
```{r}
-bc3_as <- bc$copy()
-
-bc3_as$scoring_f <- list(
- osat_score_generator(c("batch"), c("Treatment", "Time")),
- osat_score_generator(c("batch"), c("Treatment"))
-)
-
-
-trace <- optimize_design(
- bc3_as,
+bc3_as <- optimize_design(
+ bc,
+ scoring = multi_scoring_f,
n_shuffle = 3,
max_iter = 200,
min_delta = 0.01,
@@ -245,16 +248,9 @@ simply set the aggregated score to whichever of the individual scores is larger
```{r}
-bc4 <- bc$copy()
-
-bc4$scoring_f <- list(
- osat_score_generator(c("batch"), c("Treatment", "Time")),
- osat_score_generator(c("batch"), c("Treatment"))
-)
-
-
-optimize_design(
- bc4,
+bc4 <- optimize_design(
+ bc,
+ scoring = multi_scoring_f,
n_shuffle = 3,
aggregate_scores_func = worst_score,
max_iter = 200,
@@ -271,15 +267,9 @@ For illustration, we omit the `n_shuffle` parameter here, which will lead by
default to pairwise sample swaps being done on each iteration.
```{r, eval = FALSE}
-bc5 <- bc$copy()
-
-bc5$scoring_f <- list(
- osat_score_generator(c("batch"), c("Treatment", "Time")),
- osat_score_generator(c("batch"), c("Treatment"))
-)
-
-optimize_design(
- bc5,
+bc5 <- optimize_design(
+ bc,
+ scoring = multi_scoring_f,
aggregate_scores_func = sum_scores,
max_iter = 200,
autoscale_scores = TRUE,
@@ -295,15 +285,9 @@ Note that we don't use the auto-scaling in this case as the L2-norm based optimi
not the minimal (negative) value that would be desired in that case.
```{r, eval = FALSE}
-bc5_2 <- bc$copy()
-
-bc5_2$scoring_f <- list(
- osat_score_generator(c("batch"), c("Treatment", "Time")),
- osat_score_generator(c("batch"), c("Treatment"))
-)
-
-optimize_design(
- bc5_2,
+bc5_2 <- optimize_design(
+ bc,
+ scoring = multi_scoring_f,
aggregate_scores_func = L2s_norm,
max_iter = 200,
)
@@ -322,10 +306,9 @@ across all available positions in the batch container. Note that this is usually
not a good strategy for converging to a solution.
```{r}
-bc6 <- bc$copy()
-
-optimize_design(
- bc6,
+bc6 <- optimize_design(
+ bc,
+ scoring = scoring_f,
shuffle_proposal_func = complete_random_shuffling,
max_iter = 200
)
@@ -349,10 +332,9 @@ to be optimized. Choose an appropriate aggregation function if you happen to
have multiple scores initially.
```{r}
-bc7 <- bc$copy()
-
-trace7 <- optimize_design(
- bc7,
+bc7 <- optimize_design(
+ bc,
+ scoring = scoring_f,
n_shuffle = 1,
acceptance_func = mk_simanneal_acceptance_func(),
max_iter = 200
@@ -362,23 +344,22 @@ The trace may show a non strictly monotonic behavior now, reflecting the SA
protocol at work.
```{r, fig.width=5, fig.height= 4}
-trace7$plot()
+bc7$plot_trace()
```
Better results and quicker convergence may be achieved by playing with the
starting temperature (T0) and cooling speed (alpha) in a specific case.
```{r}
-bc8 <- bc$copy()
-
-trace8 <- optimize_design(
- bc8,
+bc8 <- optimize_design(
+ bc,
+ scoring = scoring_f,
n_shuffle = 1,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 100, alpha = 2)),
max_iter = 150
)
-trace8$plot()
+bc8$plot_trace()
```
# Full blown example
@@ -387,16 +368,15 @@ The following example puts together all possible options to illustrate the
flexibility of the optimization.
```{r}
-bc$scoring_f <- list(
- osat_score_generator(c("batch"), c("Treatment", "Time")),
- osat_score_generator(c("batch"), c("Treatment")),
- osat_score_generator(c("batch"), c("Time"))
-)
-
n_shuffle <- rep(c(3, 2, 1), c(20, 20, 200))
-trace <- optimize_design(
+bc9 <- optimize_design(
bc,
+ scoring = list(
+ osat_score_generator(c("batch"), c("Treatment", "Time")),
+ osat_score_generator(c("batch"), c("Treatment")),
+ osat_score_generator(c("batch"), c("Time"))
+ ),
n_shuffle = n_shuffle,
aggregate_scores_func = sum_scores,
acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 500, alpha = 1)),
@@ -405,9 +385,9 @@ trace <- optimize_design(
autoscale_scores = T
)
-trace$plot()
+bc9$plot_trace()
-bc$get_samples(assignment = TRUE) %>%
+bc9$get_samples(assignment = TRUE) %>%
dplyr::mutate(batch = factor(batch)) %>%
ggplot2::ggplot(ggplot2::aes(x = batch, fill = Treatment, alpha = factor(Time))) +
ggplot2::geom_bar()
diff --git a/vignettes/osat.Rmd b/vignettes/osat.Rmd
index 1b9c38c7..17549095 100644
--- a/vignettes/osat.Rmd
+++ b/vignettes/osat.Rmd
@@ -124,7 +124,7 @@ bc$n_locations
Assign samples and get initial setup.
```{r}
-bc$samples <- samples
+bc <- assign_in_order(bc, samples)
starting_assignment <- bc$get_locations() %>%
left_join(g_setup_start) %>%
@@ -139,22 +139,23 @@ bc$get_samples(remove_empty_locations = TRUE) %>%
## Using designit OSAT score implementation
```{r}
-bc$scoring_f <- osat_score_generator("plates", c("SampleType", "Race", "AgeGrp"))
+scoring_f <- osat_score_generator("plates", c("SampleType", "Race", "AgeGrp"))
-bc$score()
+bc$score(scoring_f)
g_setup@metadata$optValue %>% head(1)
# should be identical
bench::system_time({
set.seed(123)
- trace_reference <- optimize_design(bc, max_iter = params$iterations)
+ bc_reference <- optimize_design(bc, scoring = scoring_f, max_iter = params$iterations)
})
```
```{r}
# final score
-bc$score()
-plot(trace_reference, main = str_glue("Final score={bc$score()}"))
+bc_reference$score(scoring_f)
+bc_reference$plot_trace() +
+ ggtitle(str_glue("Final score={bc$score(scoring_f)}"))
bc$get_samples(remove_empty_locations = TRUE) %>%
plot_batch()
```
@@ -163,9 +164,8 @@ bc$get_samples(remove_empty_locations = TRUE) %>%
Instead of relying on `BatchContainer`, here we have a manual optimization process using `data.table`.
```{r}
-bc$move_samples(location_assignment = starting_assignment)
-
fast_osat_optimize <- function(bc, batch_vars, feature_vars, iterations) {
+ bc <- bc$copy()
ldf <- data.table::data.table(bc$get_locations())[, c("plates")][, ".sample_id" := bc$assignment]
fcols <- c(".sample_id", feature_vars)
smp <- data.table::data.table(bc$samples)[, ..fcols]
@@ -202,27 +202,27 @@ fast_osat_optimize <- function(bc, batch_vars, feature_vars, iterations) {
bc$assignment <- df$.sample_id
- scores
+ list(bc=bc, scores=scores)
}
bench::system_time({
set.seed(123)
- trace <- fast_osat_optimize(bc, "plates", c("SampleType", "Race", "AgeGrp"), iterations = params$iterations)
+ opt_res <- fast_osat_optimize(bc, "plates", c("SampleType", "Race", "AgeGrp"), iterations = params$iterations)
})
```
# Shuffle optimization with burn-in
```{r}
-bc$move_samples(location_assignment = starting_assignment)
-
-bc$scoring_f <- osat_score_generator("plates", c("SampleType", "Race", "AgeGrp"))
+scoring_f <- osat_score_generator("plates", c("SampleType", "Race", "AgeGrp"))
burn_in_it <- floor(params$iterations * 0.1)
burn_in_it
bench::system_time({
set.seed(123)
- trace_burn_in <- optimize_design(bc,
+ bc_burn_in <- optimize_design(
+ bc,
+ scoring = scoring_f,
n_shuffle = c(
rep(20, burn_in_it),
rep(
@@ -237,9 +237,9 @@ bench::system_time({
```{r}
tibble(
- i = seq_len(trace_burn_in$n_steps),
- normal = trace_reference$scores,
- burnin = trace_burn_in$scores
+ i = bc_burn_in$trace$scores[[1]]$step,
+ normal = bc_reference$trace$scores[[1]]$score_1,
+ burnin = bc_burn_in$trace$scores[[1]]$score_1
) %>%
pivot_longer(-i, names_to = "method", values_to = "score") %>%
ggplot(aes(i, score, col = method)) +
@@ -249,9 +249,7 @@ tibble(
# Score demonstration
```{r}
-bc$score()
-bc$scoring_f <- function(...) rnorm(1)
-bc$score()
+bc$score(scoring_f)
```
```{r}
@@ -260,13 +258,11 @@ assign_random(bc)
bc$get_samples()
bc$get_samples(remove_empty_locations = TRUE)
-bc$score()
-
-bc$scoring_f <- list(
+scoring_f <- list(
fc0 = function(samples) rnorm(1) + 2 * rexp(1),
fc1 = function(samples) rnorm(1, 100),
fc2 = function(samples) -7
)
-bc$score()
+bc$score(scoring_f)
```
diff --git a/vignettes/plate_layouts.Rmd b/vignettes/plate_layouts.Rmd
index fe1af67b..b2c81095 100644
--- a/vignettes/plate_layouts.Rmd
+++ b/vignettes/plate_layouts.Rmd
@@ -68,7 +68,7 @@ bc <- BatchContainer$new(
dimensions = list("plate" = 3, "row" = 4, "col" = 6),
)
-assign_in_order(bc, dat)
+bc <- assign_in_order(bc, dat)
head(bc$get_samples()) %>% gt::gt()
```
@@ -99,7 +99,7 @@ The order of the factors indicate their relative importance.
In this case we prioritize Group over Sex.
```{r}
-traces <- optimize_multi_plate_design(bc,
+bc <- optimize_multi_plate_design(bc,
across_plates_variables = c("Group", "Sex"),
within_plate_variables = c("Group"),
plate = "plate",
@@ -130,8 +130,12 @@ cowplot::plot_grid(
We can look at the trace objects for each internal `optimize_design` run,
returned from the wrapper function.
-```{r fig.width=3, fig.height=3}
-purrr::imap(traces, ~ .x$plot(include_aggregated = TRUE) + labs(title = .y))
+```{r fig.width=6, fig.height=3}
+bc$scores_table() |>
+ ggplot(aes(step, value, color = score)) +
+ geom_line() +
+ geom_point() +
+ facet_wrap(~ optimization_index, scales = "free_y")
```
## Plate scoring
@@ -140,7 +144,7 @@ Note that internally the wrapper function sets up plate specific scoring functio
that could manually be set up in the following way.
```{r, eval = FALSE}
-bc$scoring_f <- c(
+scoring_f <- c(
Group = mk_plate_scoring_functions(bc,
plate = "plate", row = "row", column = "col",
group = "Group", penalize_lines = "hard"
@@ -184,12 +188,12 @@ bc <- BatchContainer$new(
dimensions = list("batch" = 3, "location" = 11)
)
-bc$scoring_f <- list(
+scoring_f <- list(
group = osat_score_generator(batch_vars = "batch", feature_vars = "Group"),
sex = osat_score_generator(batch_vars = "batch", feature_vars = "Sex")
)
-assign_random(
+bc <- assign_random(
bc,
dat %>% select(SubjectID, Group, Sex) %>% distinct()
)
@@ -217,8 +221,9 @@ cowplot::plot_grid(
Optimizing the layout with `optimize_design()`
```{r}
-trace <- optimize_design(
+bc <- optimize_design(
bc,
+ scoring = scoring_f,
n_shuffle = 1,
acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.01),
max_iter = 150,
@@ -238,7 +243,7 @@ cowplot::plot_grid(
bc$get_samples() %>% ggplot(aes(x = batch, fill = Sex)) +
geom_bar() +
labs(y = "subject count"),
- trace$plot(include_aggregated = TRUE)
+ bc$plot_trace(include_aggregated = TRUE)
),
ncol = 3
)
@@ -268,7 +273,7 @@ bc <- BatchContainer$new(
)
# initial assignment such that the original plate assigned stays the same
-assign_in_order(
+bc <- assign_in_order(
bc,
dat %>% arrange(batch)
)
@@ -295,7 +300,7 @@ For distributing samples within each plate, we use variables Group and Sex again
The order of the factors indicate their relative importance.
```{r}
-traces <- optimize_multi_plate_design(bc,
+bc <- optimize_multi_plate_design(bc,
within_plate_variables = c("Group", "Sex"),
plate = "plate",
row = "row",
@@ -322,8 +327,12 @@ cowplot::plot_grid(
)
```
-```{r fig.width=3, fig.height=3}
-purrr::imap(traces, ~ .x$plot(include_aggregated = TRUE) + labs(title = .y))
+```{r fig.width=9, fig.height=3}
+bc$scores_table() |>
+ ggplot(aes(step, value, color = score)) +
+ geom_line() +
+ geom_point() +
+ facet_wrap(~ optimization_index)
```
# Full dataset
@@ -381,12 +390,12 @@ bc <- BatchContainer$new(
dimensions = list("plate" = 3, "locations" = 11)
)
-bc$scoring_f <- list(
+scoring_f <- list(
group = osat_score_generator(batch_vars = "plate", feature_vars = c("Group")),
sex = osat_score_generator(batch_vars = "plate", feature_vars = "Sex")
)
-assign_random(bc, subjects)
+bc <- assign_random(bc, subjects)
```
```{r, fig.width= 8, fig.height=3}
@@ -409,8 +418,9 @@ cowplot::plot_grid(
Optimizing the layout with `optimize_design()`
```{r}
-trace <- optimize_design(
+bc <- optimize_design(
bc,
+ scoring = scoring_f,
n_shuffle = 1,
acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.1),
max_iter = 150,
@@ -433,7 +443,7 @@ cowplot::plot_grid(
bc$get_samples() %>% ggplot(aes(x = factor(plate), y = Age)) +
geom_boxplot() +
geom_point(),
- trace$plot(include_aggregated = TRUE)
+ bc$plot_trace(include_aggregated = TRUE)
),
nrow = 2
)
@@ -472,7 +482,7 @@ bc <- BatchContainer$new(
)
# assign samples in order of plate
-assign_in_order(
+bc <- assign_in_order(
bc,
samples_with_plate %>%
arrange(plate) %>%
@@ -512,7 +522,7 @@ For distributing samples within each plate, we use variables Group and Sex again
The order of the factors indicate their relative importance.
```{r}
-traces <- optimize_multi_plate_design(bc,
+bc <- optimize_multi_plate_design(bc,
within_plate_variables = c("Group", "SubjectID", "Sex"),
plate = "plate",
row = "row",
@@ -545,6 +555,10 @@ cowplot::plot_grid(
```
-```{r fig.width=5, fig.height=5}
-purrr::imap(traces, ~ .x$plot(include_aggregated = TRUE) + labs(title = .y))
+```{r fig.width=6, fig.height=5}
+bc$scores_table() |>
+ ggplot(aes(step, value, color = score)) +
+ geom_line() +
+ geom_point() +
+ facet_wrap(~ optimization_index)
```
diff --git a/vignettes/shuffling_with_constraints.Rmd b/vignettes/shuffling_with_constraints.Rmd
index 9fe748af..4ebf87de 100644
--- a/vignettes/shuffling_with_constraints.Rmd
+++ b/vignettes/shuffling_with_constraints.Rmd
@@ -68,9 +68,9 @@ table(treatments)
bc <- BatchContainer$new(locations_table = data.frame(Treatment = treatments, Position = seq_along(treatments)))
-assign_in_order(bc, invivo_study_samples)
+bc <- assign_in_order(bc, invivo_study_samples)
-bc$scoring_f <- osat_score_generator(batch_vars = "Treatment", feature_vars = c("Strain", "Sex"))
+scoring_f <- osat_score_generator(batch_vars = "Treatment", feature_vars = c("Strain", "Sex"))
bc
```
@@ -100,11 +100,10 @@ defined above at the same time. It can be passed to the optimizer together with
such as the scoring or acceptance functions.
```{r}
-bc2 <- bc$copy()
-
-optimize_design(
- bc2,
- shuffle_proposal_func = shuffle_grouped_data(bc2,
+bc2 <- optimize_design(
+ bc,
+ scoring = scoring_f,
+ shuffle_proposal_func = shuffle_grouped_data(bc,
allocate_var = "Treatment",
keep_together_vars = c("Strain", "Sex"),
keep_separate_vars = c("Earmark"),
@@ -238,10 +237,9 @@ the beginning!)
We can finally use the customized shuffling function in the optimization.
```{r echo=TRUE}
-bc3 <- bc$copy()
-
-trace <- optimize_design(
- bc3,
+bc3 <- optimize_design(
+ bc,
+ scoring = scoring_f,
shuffle_proposal_func = shuffle_proposal,
max_iter = 300
)