diff --git a/DESCRIPTION b/DESCRIPTION index b38c59f7..c0cca1e0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,16 +67,13 @@ Suggests: tidyverse, printr, devtools (>= 2.0.0), - gridpattern, ggpattern, cowplot, - bestNormalize + bestNormalize, + here Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 VignetteBuilder: knitr biocViews: -Remotes: - github::trevorld/gridpattern, - github::coolbutuseless/ggpattern diff --git a/NAMESPACE b/NAMESPACE index 2cc2cf16..e8a9d03c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,7 +6,6 @@ export(BatchContainer) export(BatchContainerDimension) export(L1_norm) export(L2s_norm) -export(OptimizationTrace) export(accept_leftmost_improvement) export(as_label) export(as_name) diff --git a/R/all_equal_df.R b/R/all_equal_df.R new file mode 100644 index 00000000..c52e2044 --- /dev/null +++ b/R/all_equal_df.R @@ -0,0 +1,45 @@ +#' Compare two data.frames. +#' +#' This will convert factors to characters and disregard +#' row and column order +#' +#' @param df1 first [data.frame()] to compare +#' @param df2 second `data.frame()` to compare +#' @return `TRUE` or `FALSE` in case differences are present +#' @keywords internal +all_equal_df <- function(df1, df2) { + if (!is.data.frame(df1) || !is.data.frame(df2)) { + return(FALSE) + } + + if (nrow(df1) != nrow(df2) || ncol(df1) != ncol(df2)) { + return(FALSE) + } + + assertthat::assert_that( + !any(duplicated(colnames(df1))), + !any(duplicated(colnames(df2))), + msg = "duplicated colnames" + ) + + df2 <- df2[colnames(df1)] + + # convert factors to characters + df1 <- df1 |> + dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.character)) + df2 <- df2 |> + dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.character)) + + # order by all columns + df1 <- df1[do.call(order, df1),] + df2 <- df2[do.call(order, df2),] + + # remove row names + rownames(df1) <- NULL + rownames(df2) <- NULL + + assertthat::are_equal( + all.equal(df1, df2, check.attributes = FALSE), + TRUE + ) +} diff --git a/R/assignment.R b/R/assignment.R index f52f0572..c9a44f54 100644 --- a/R/assignment.R +++ b/R/assignment.R @@ -4,16 +4,16 @@ #' @param samples data.frame with samples. #' @param batch_container Instance of BatchContainer class #' -#' @return Returns `BatchContainer`, invisibly. +#' @return Returns a new `BatchContainer`. #' @example man/examples/assignment.R assign_random <- function(batch_container, samples = NULL) { - assign_in_order(batch_container, samples) + batch_container <- assign_in_order(batch_container, samples) batch_container$move_samples( location_assignment = sample(batch_container$assignment) ) - invisible(batch_container) + batch_container } #' Distributes samples in order. @@ -25,9 +25,10 @@ assign_random <- function(batch_container, samples = NULL) { #' @param samples data.frame with samples. #' @param batch_container Instance of BatchContainer class #' -#' @return Returns `BatchContainer`, invisibly. +#' @return Returns a new `BatchContainer`. #' @example man/examples/assignment.R assign_in_order <- function(batch_container, samples = NULL) { + batch_container <- batch_container$copy() if (is.null(samples)) { assertthat::assert_that(batch_container$has_samples, msg = "batch-container is empty and no samples provided" @@ -46,7 +47,7 @@ assign_in_order <- function(batch_container, samples = NULL) { rep(NA_integer_, n_locations - n_samples) )) - invisible(batch_container) + batch_container } #' Shuffling proposal function with constraints. @@ -113,7 +114,7 @@ shuffle_with_constraints <- function(src = TRUE, dst = TRUE) { #' the function will check if samples in `batch_container` are identical to the ones in the #' `samples` argument. #' -#' @return Returns `BatchContainer`, invisibly. +#' @return Returns a new `BatchContainer`. #' #' @examples #' bc <- BatchContainer$new( @@ -133,11 +134,12 @@ shuffle_with_constraints <- function(src = TRUE, dst = TRUE) { #' 2, "a", 3, 5, "TRT", #' ) #' # assign samples from the sample sheet -#' assign_from_table(bc, sample_sheet) +#' bc <- assign_from_table(bc, sample_sheet) #' #' bc$get_samples(remove_empty_locations = TRUE) #' assign_from_table <- function(batch_container, samples) { + batch_container <- batch_container$copy() # sample sheet has all the batch variable assertthat::assert_that(is.data.frame(samples) && nrow(samples) > 0, msg = "samples should be non-empty data.frame" @@ -156,11 +158,9 @@ assign_from_table <- function(batch_container, samples) { if (is.null(batch_container$samples)) { batch_container$samples <- only_samples } else { - assertthat::assert_that(dplyr::all_equal(only_samples, - batch_container$get_samples(assignment = FALSE), - ignore_col_order = TRUE, - ignore_row_order = TRUE, - convert = TRUE + assertthat::assert_that(all_equal_df( + only_samples, + batch_container$get_samples(assignment = FALSE) ), msg = "sample sheet should be compatible with samples inside the batch container" ) @@ -177,5 +177,5 @@ assign_from_table <- function(batch_container, samples) { batch_container$move_samples(location_assignment = samples_with_id$.sample_id) - invisible(batch_container) + batch_container } diff --git a/R/batch_container.R b/R/batch_container.R index 19c0b763..f2983c03 100644 --- a/R/batch_container.R +++ b/R/batch_container.R @@ -328,23 +328,67 @@ BatchContainer <- R6::R6Class("BatchContainer", #' @description #' Score current sample assignment, - #' @return Returns a vector of all scoring functions values. - score = function() { - assertthat::assert_that(!is.null(private$scoring_funcs), - msg = "Scoring function needs to be assigned" + #' @param scoring a function or a names list of scoring functions. + #' Each function should return a numeric vector. + #' @return Returns a named vector of all scoring functions values. + score = function(scoring) { + assertthat::assert_that( + !missing(scoring), + !is.null(scoring), + msg = "Scoring function needs to be provided" ) - assertthat::assert_that(is.list(private$scoring_funcs), - length(private$scoring_funcs) >= 1, - msg = "Scroring function should be a non-empty list" + if (is.function(scoring)) { + scoring <- list(scoring) + } else { + assertthat::assert_that(is.list(scoring), length(scoring) >= 1) + assertthat::assert_that( + all(purrr::map_lgl(scoring, is.function)), + msg = "All elements of scoring should be functions" + ) + } + if (is.null(names(scoring))) { + names(scoring) <- stringr::str_c("score_", seq_along(scoring)) + } + assertthat::assert_that( + !any(names(scoring) == ""), + msg = "scoring cannot be a partially named list" ) - assertthat::assert_that(!is.null(private$samples_table), + assertthat::assert_that(is.list(scoring), + length(scoring) >= 1, + msg = "Scoring function should be a non-empty list" + ) + assertthat::assert_that(!is.null(names(scoring)), + msg = "scoring should be a named list" + ) + assertthat::assert_that(self$has_samples, msg = "No samples in the batch container, cannot compute score" ) - - res <- purrr::map_dbl(private$scoring_funcs, ~ .x(self)) - assertthat::assert_that(length(res) == length(private$scoring_funcs)) - - assertthat::assert_that(is.numeric(res), msg = "Scoring function should return a number") + res <- purrr::imap( + scoring, + \(f, i) { + v <- f(self) + assertthat::assert_that( + is.numeric(v), + length(v) >= 1, + msg = "scoring function should return a numeric vector of positive length" + ) + if (length(v) > 1) { + if (is.null(names(v))) { + names(v) <- seq_along(v) + } + names(v) <- stringr::str_c(i, names(v)) + } else { + names(v) <- i + } + v + } + ) |> + purrr::flatten_dbl() + assertthat::assert_that(length(res) >= length(scoring)) + assertthat::assert_that( + !any(names(res) == "step"), + msg = "score name cannot be 'step'" + ) return(res) }, @@ -368,7 +412,7 @@ BatchContainer <- R6::R6Class("BatchContainer", bc$samples_attr <- private$samples_attributes } - bc$scoring_f <- self$scoring_f + bc$trace <- self$trace bc }, @@ -398,6 +442,100 @@ BatchContainer <- R6::R6Class("BatchContainer", cat() cat("\n") invisible(self) + }, + + #' @field trace Optimization trace, a [tibble::tibble()] + trace = tibble::tibble( + optimization_index = numeric(), + call = list(), + start_assignment_vec = list(), + end_assignment_vec = list(), + scores = list(), + aggregated_scores = list(), + seed = list(), + elapsed = as.difftime(character(0)) + ), + + #' @description + #' Return a table with scores from an optimization. + #' + #' @param index optimization index, all by default + #' @param include_aggregated include aggregated scores + #' @return a [tibble::tibble()] with scores + scores_table = function(index = NULL, include_aggregated = FALSE) { + assertthat::assert_that( + tibble::is_tibble(self$trace), + nrow(self$trace) >= 1, + msg = "trace should be available" + ) + assertthat::assert_that(assertthat::is.flag(include_aggregated)) + if (is.null(index)) { + index <- self$trace$optimization_index + } + assertthat::assert_that( + rlang::is_integerish(index), + msg = "index should be an integer" + ) + d <- self$trace %>% + dplyr::filter(.data$optimization_index %in% index) %>% + dplyr::select(.data$optimization_index, .data$scores) %>% + tidyr::unnest(.data$scores) %>% + tidyr::pivot_longer(c(-.data$optimization_index, -.data$step), + names_to = "score", + values_to = "value") %>% + dplyr::mutate(aggregated = FALSE) + if (include_aggregated) { + d_agg <- self$trace %>% + dplyr::filter(.data$optimization_index %in% index) %>% + dplyr::select(.data$optimization_index, .data$aggregated_scores) %>% + tidyr::unnest(.data$aggregated_scores) + + if ("step" %in% colnames(d_agg)) { + # if no aggregated scores are provided (aggregated_scores=NULL), + # there will be no step column after unnesting + d_agg <- d_agg %>% + tidyr::pivot_longer(c(-.data$optimization_index, -.data$step), + names_to = "score", + values_to = "value") %>% + dplyr::mutate( + aggregated = TRUE, + score = paste0("agg.", .data$score) + ) + d <- dplyr::bind_rows( + d, + d_agg + ) + } + } + d + }, + + #' @description + #' Plot trace + #' @param index optimization index, all by default + #' @param include_aggregated include aggregated scores + #' @param ... not used. + #' @return a [ggplot2::ggplot()] object + plot_trace = function(index = NULL, include_aggregated = FALSE, ...) { + d <- self$scores_table(index, include_aggregated) %>% + dplyr::mutate( + agg_title = dplyr::if_else(.data$aggregated, "aggregated", "score") + ) + p <- ggplot2::ggplot(d) + + ggplot2::aes(.data$step, .data$value, group = .data$score, color = .data$score) + + ggplot2::geom_line() + + ggplot2::geom_point() + if (length(unique(d$optimization_index)) > 1) { + p <- p + + ggplot2::facet_wrap(~ optimization_index, scales = "free") + } else if (include_aggregated && any(d$aggregated)) { + p <- p + + ggplot2::facet_wrap(~ agg_title, scales = "free_y", ncol = 1) + } else { + p <- p + + ggplot2::facet_wrap(~ score, scales = "free_y", ncol = 1) + } + p } ), private = list( @@ -445,22 +583,7 @@ BatchContainer <- R6::R6Class("BatchContainer", #' Upon assignment a single function will be automatically converted to a list #' In the later case each function is called. scoring_f = function(value) { - if (missing(value)) { - private$scoring_funcs - } else { - if (is.null(value)) { - private$scoring_funcs <- NULL - } else if (is.function(value)) { - private$scoring_funcs <- list(value) - } else { - assertthat::assert_that(is.list(value), length(value) >= 1) - assertthat::assert_that( - all(purrr::map_lgl(self$scoring_f, is.function)), - msg = "All elements of scoring_f should be functions" - ) - private$scoring_funcs <- value - } - } + stop("scoring_f is deprecated, pass it to optimize_design() directly instead") }, #' @field has_samples diff --git a/R/optimize.R b/R/optimize.R index 2db24aa5..2ae7b8eb 100644 --- a/R/optimize.R +++ b/R/optimize.R @@ -123,8 +123,9 @@ update_batchcontainer <- function(bc, shuffle_params) { #' @param batch_container An instance of `BatchContainer`. #' @param samples A `data.frame` with sample information. #' Should be `NULL` if the `BatchContainer` already has samples in it. +#' @param scoring Scoring function or a named [list()] of scoring functions. #' @param n_shuffle Vector of length 1 or larger, defining how many random sample -#' swaps should be performed in each iteration. If length(n_shuffle)==1, +#' swaps should be performed in each iteration. If `length(n_shuffle)==1`, #' this sets no limit to the number of iterations. Otherwise, the optimization #' stops if the swapping protocol is exhausted. #' @param shuffle_proposal_func A user defined function to propose the next shuffling of samples. @@ -170,10 +171,14 @@ update_batchcontainer <- function(bc, shuffle_params) { #' bc <- BatchContainer$new( #' dimensions = c("plate" = 2, "column" = 5, "row" = 6) #' ) -#' bc$scoring_f <- osat_score_generator("plate", "Sex") -#' optimize_design(bc, invivo_study_samples, max_iter = 100) +#' bc <- optimize_design(bc, invivo_study_samples, +#' scoring = osat_score_generator("plate", "Sex"), +#' max_iter = 100 +#' ) #' plot_plate(bc$get_samples(), .col = Sex) -optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, +optimize_design <- function(batch_container, samples = NULL, + scoring = NULL, + n_shuffle = NULL, shuffle_proposal_func = NULL, acceptance_func = accept_strict_improvement, aggregate_scores_func = identity, @@ -182,10 +187,19 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, sample_attributes_fixed = FALSE, max_iter = 1e4, min_delta = NA, quiet = FALSE) { start_time <- Sys.time() + cl <- match.call() + + # create a copy, so that we do not modify the BatchContainer + batch_container <- batch_container$copy() + trace <- tibble::tibble( + optimization_index = max(batch_container$trace$optimization_index, 0) + 1, + call = list(cl), + start_assignment_vec = list(batch_container$assignment) + ) # based on https://stat.ethz.ch/pipermail/r-help/2007-September/141717.html if (!exists(".Random.seed")) stats::runif(1) - save_random_seed <- .Random.seed + trace$seed <- list(.Random.seed) if (is.null(samples)) { assertthat::assert_that(batch_container$has_samples, @@ -193,15 +207,14 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, ) } else { assertthat::assert_that(nrow(samples) > 0) - assign_in_order(batch_container, samples) + batch_container <- assign_in_order(batch_container, samples) } - # Check presence of scoring function and that it's a list of functions - assertthat::assert_that(!is.null(batch_container$scoring_f), msg = "no scoring function set for BatchContainer") - assertthat::assert_that(is.list(batch_container$scoring_f), msg = "scoring function is expected to be a list") - assertthat::assert_that(all(purrr::map_lgl(batch_container$scoring_f, is.function)), msg = "All scoring functions have to be function definitions") - + assertthat::assert_that( + !is.null(scoring), + msg = "Scoring should be provided when calling optimize_design()" + ) # Get assigned samples and locations from the batch container samp <- batch_container$get_samples(include_id = TRUE, assignment = TRUE, remove_empty_locations = FALSE) @@ -257,13 +270,14 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, best_shuffle <- list(src = NULL, dst = NULL, location_assignment = batch_container$assignment, samples_attr = NULL) } - initial_score <- batch_container$score() # Evaluate this just once in order not to break current tests + # Evaluate this just once in order not to break current tests + initial_score <- batch_container$score(scoring) score_dim <- length(initial_score) # Check score variances (should be all >0) if (check_score_variance) { bc_copy <- batch_container$copy() - score_vars <- random_score_variances(batch_container$copy(), random_perm = 100, sample_attributes_fixed) + score_vars <- random_score_variances(batch_container$copy(), scoring = scoring, random_perm = 100, sample_attributes_fixed) low_var_scores <- score_vars < 1e-10 if (!quiet) { message( @@ -298,6 +312,7 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, ) } autoscale_func <- mk_autoscale_function(batch_container$copy(), + scoring = scoring, random_perm = autoscaling_permutations, use_boxcox = autoscale_useboxcox, sample_attributes_fixed @@ -312,20 +327,23 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, prev_agg <- NULL - trace <- OptimizationTrace$new( - max_iter + 1, # + 1 to accommodate initial score - length(batch_container$scoring_f), - names(batch_container$scoring_f) + scores_mat <- matrix( + nrow = max_iter + 1, # + 1 to accommodate initial score + ncol = length(best_score), + dimnames = list(NULL, names(best_score)) ) + scores_mat[1,] <- best_score if (identical(aggregate_scores_func, identity)) { - # Do not store aggregated scores if unnecessary - trace$set_scores(1, best_score, NULL) + aggregated_scores_mat <- NULL } else { - trace$set_scores(1, best_score, best_agg) + aggregated_scores_mat <- matrix( + nrow = max_iter + 1, # + 1 to accommodate initial score + ncol = length(best_agg), + dimnames = list(NULL, names(best_agg)) + ) + aggregated_scores_mat[1,] <- best_agg } - # to do: make work with >1-dim agg, line should read as - # trace$set_scores(1, best_score, best_agg) if (!quiet) report_scores(best_score, best_agg, iteration = 0) @@ -338,7 +356,7 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, using_attributes <- TRUE } - new_score <- autoscale_func(batch_container$score()) + new_score <- autoscale_func(batch_container$score(scoring)) assertthat::assert_that(!any(is.na(new_score)), msg = stringr::str_c("NA apprearing during scoring in iteration ", iteration)) new_agg <- aggregate_scores_func(new_score) @@ -358,11 +376,9 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, } iteration <- iteration + 1 - if (identical(aggregate_scores_func, identity)) { - # Do not store aggregated scores if unnecessary - trace$set_scores(iteration, best_score, NULL) - } else { - trace$set_scores(iteration, best_score, best_agg) + scores_mat[iteration,] <- best_score + if (!is.null(aggregated_scores_mat)) { + aggregated_scores_mat[iteration,] <- best_agg } # Test stopping criteria @@ -384,8 +400,14 @@ optimize_design <- function(batch_container, samples = NULL, n_shuffle = NULL, # In the end, always make sure that final state of bc is the one with the best score update_batchcontainer(batch_container, best_shuffle) - trace$shrink(iteration) - trace$seed <- save_random_seed + # shrink + trace$scores <- shrink_mat(scores_mat, iteration) + trace$aggregated_scores <- shrink_mat(aggregated_scores_mat, iteration) trace$elapsed <- Sys.time() - start_time - trace + trace$end_assignment_vec = list(batch_container$assignment) + batch_container$trace <- dplyr::bind_rows( + batch_container$trace, + trace + ) + batch_container } diff --git a/R/plot.R b/R/plot.R index 6f880a24..d1a9c4bc 100644 --- a/R/plot.R +++ b/R/plot.R @@ -78,7 +78,7 @@ plot_design <- function(.tbl, ..., .color, .alpha = NULL) { #' ) #' #' # assign samples from the sample sheet -#' assign_random(bc, samples = sample_sheet) +#' bc <- assign_random(bc, samples = sample_sheet) #' #' plot_plate(bc$get_samples(), #' plate = plate, column = column, row = row, diff --git a/R/score_autoscaling.R b/R/score_autoscaling.R index bdcf1dfc..8104d48c 100644 --- a/R/score_autoscaling.R +++ b/R/score_autoscaling.R @@ -1,20 +1,22 @@ #' Sample scores from a number of completely random sample permutations #' #' @param batch_container An instance of [BatchContainer]. +#' @param scoring A named [list()] of scoring function. Each function should +#' return a vector of non-zero length. #' @param random_perm Number of random sample permutations to be done. #' @param sample_attributes_fixed Logical; if `FALSE`, simulate a shuffle function that alters sample attributes at each iteration. #' #' @return A score matrix with n (# of permutations) rows and m (dimensionality of score) columns. #' #' @keywords internal -sample_random_scores <- function(batch_container, random_perm, sample_attributes_fixed) { - random_scores <- matrix(NA_real_, nrow = random_perm, ncol = length(batch_container$score())) +sample_random_scores <- function(batch_container, scoring, random_perm, sample_attributes_fixed) { + random_scores <- matrix(NA_real_, nrow = random_perm, ncol = length(batch_container$score(scoring))) for (i in seq_len(random_perm)) { batch_container$move_samples(location_assignment = complete_random_shuffling(batch_container)) if (!sample_attributes_fixed && batch_container$has_samples_attr) { batch_container$samples_attr <- batch_container$samples_attr[sample(nrow(batch_container$samples_attr)), ] } - random_scores[i, ] <- batch_container$score() + random_scores[i, ] <- batch_container$score(scoring) } random_scores @@ -24,6 +26,8 @@ sample_random_scores <- function(batch_container, random_perm, sample_attributes #' Create a function that transforms a current (multi-dimensional) score into a boxcox normalized one #' #' @param batch_container An instance of [BatchContainer]. +#' @param scoring A named [list()] of scoring function. Each function should +#' return a vector of non-zero length. #' @param random_perm Number of random sample permutations for the estimation of the scaling params. #' @param use_boxcox Logical; if TRUE and the `bestNormalize` package is available, boxcox transformations will be used to #' normalize individual scores. If not possible, scores will just be transformed to a zero mean and unit standard deviation. @@ -31,9 +35,9 @@ sample_random_scores <- function(batch_container, random_perm, sample_attributes #' #' @return The transformation function for a new score vector #' @keywords internal -mk_autoscale_function <- function(batch_container, random_perm, use_boxcox = TRUE, sample_attributes_fixed = FALSE) { - random_scores <- sample_random_scores(batch_container, random_perm, sample_attributes_fixed) - score_dim <- length(batch_container$score()) +mk_autoscale_function <- function(batch_container, scoring, random_perm, use_boxcox = TRUE, sample_attributes_fixed = FALSE) { + random_scores <- sample_random_scores(batch_container, scoring, random_perm, sample_attributes_fixed) + score_dim <- length(batch_container$score(scoring)) # Return function using boxcox transform if bestNormalize package is available if (use_boxcox && requireNamespace("bestNormalize", quietly = T)) { @@ -69,13 +73,15 @@ mk_autoscale_function <- function(batch_container, random_perm, use_boxcox = TRU #' Estimate the variance of individual scores by a series of completely random sample permutations #' #' @param batch_container An instance of `BatchContainer`. +#' @param scoring A named [list()] of scoring function. Each function should +#' return a vector of non-zero length. #' @param random_perm Number of random sample permutations to be done. #' @param sample_attributes_fixed Logical; if FALSE, simulate a shuffle function that alters sample attributes at each iteration. #' #' @return Vector of length m (=dimensionality of score) with estimated variances of each subscore #' #' @keywords internal -random_score_variances <- function(batch_container, random_perm, sample_attributes_fixed) { - random_scores <- sample_random_scores(batch_container, random_perm, sample_attributes_fixed) +random_score_variances <- function(batch_container, scoring, random_perm, sample_attributes_fixed) { + random_scores <- sample_random_scores(batch_container, scoring, random_perm, sample_attributes_fixed) purrr::map_dbl(asplit(random_scores, 2), stats::var, na.rm = T) } diff --git a/R/score_plates.R b/R/score_plates.R index 7cf1c50c..addea7e4 100644 --- a/R/score_plates.R +++ b/R/score_plates.R @@ -69,12 +69,12 @@ mk_dist_matrix <- function(plate_x = 12, plate_y = 8, dist = "minkowski", p = 2, #' bc <- BatchContainer$new( #' dimensions = c("column" = 6, "row" = 10) #' ) -#' assign_random(bc, invivo_study_samples) -#' bc$scoring_f <- mk_plate_scoring_functions( +#' bc <- assign_random(bc, invivo_study_samples) +#' scoring_f <- mk_plate_scoring_functions( #' bc, #' row = "row", column = "column", group = "Sex" #' ) -#' optimize_design(bc, max_iter = 100) +#' bc <- optimize_design(bc, scoring = scoring_f, max_iter = 100) #' plot_plate(bc$get_samples(), .col = Sex) #' mk_plate_scoring_functions <- function(batch_container, plate = NULL, row, column, group, p = 2, penalize_lines = "soft") { @@ -229,8 +229,6 @@ optimize_multi_plate_design <- function(batch_container, across_plates_variables msg = "All columns in 'within_plate_variable' argument have to be found in batch container samples." ) - traces <- list() - skip_osat <- is.null(across_plates_variables) || is.null(plate) || dplyr::n_distinct(bc$get_locations()[[plate]]) < 2 if (skip_osat && !quiet) message("\nNo balancing of variables across plates required...") @@ -239,25 +237,22 @@ optimize_multi_plate_design <- function(batch_container, across_plates_variables scoring_funcs <- purrr::map(across_plates_variables, ~ osat_score_generator(batch_vars = plate, feature_vars = .x)) %>% unlist() names(scoring_funcs) <- across_plates_variables - bc$scoring_f <- scoring_funcs if (!quiet) message("\nAssigning samples to plates...") - trace1 <- optimize_design(bc, + bc <- optimize_design(bc, + scoring = scoring_funcs, max_iter = max_iter, n_shuffle = n_shuffle, acceptance_func = accept_leftmost_improvement, quiet = TRUE ) - traces <- c(traces, osat_across_plates = trace1) } if (!is.null(within_plate_variables)) { - within_traces <- list() plate_levels <- unique(bc$get_locations()[[plate]]) scoring_funcs <- purrr::map(within_plate_variables, ~ mk_plate_scoring_functions(bc, plate = plate, row = row, column = column, group = .x)) %>% unlist() names(scoring_funcs) <- paste(rep(within_plate_variables, each = length(plate_levels)), names(scoring_funcs)) - bc$scoring_f <- scoring_funcs if (!quiet) { @@ -270,7 +265,8 @@ optimize_multi_plate_design <- function(batch_container, across_plates_variables for (curr_plate in plate_levels) { if (!quiet && length(plate_levels) > 1) cat(curr_plate, "... ") - trace2 <- optimize_design(bc, + bc <- optimize_design(bc, + scoring = scoring_funcs, max_iter = max_iter, quiet = TRUE, shuffle_proposal_func = mk_subgroup_shuffling_function( @@ -279,16 +275,13 @@ optimize_multi_plate_design <- function(batch_container, across_plates_variables ), acceptance_func = accept_leftmost_improvement ) - within_traces <- c(within_traces, trace2) } if (!quiet) cat("\n") - names(within_traces) <- paste0("within_plate_", plate_levels) - traces <- c(traces, within_traces) } if (skip_osat && is.null(within_plate_variables) && !quiet) { message("\nBoth across plates and within plate optimization skipped ('within_plate_variables' is empty).\nBatch container unchanged.\n") } - invisible(traces) + bc } diff --git a/R/shuffle_samples.R b/R/shuffle_samples.R index ef56deb4..ad94b7ef 100644 --- a/R/shuffle_samples.R +++ b/R/shuffle_samples.R @@ -86,9 +86,9 @@ mk_constant_swapping_function <- function(n_swaps, quiet = FALSE) { #' bc <- BatchContainer$new( #' dimensions = c("plate" = 2, "column" = 5, "row" = 6) #' ) -#' bc$scoring_f <- osat_score_generator("plate", "Sex") -#' optimize_design( -#' bc, invivo_study_samples, +#' scoring_f <- osat_score_generator("plate", "Sex") +#' bc <- optimize_design( +#' bc, scoring = scoring_f, invivo_study_samples, #' max_iter = 100, #' shuffle_proposal_func = complete_random_shuffling #' ) @@ -114,9 +114,9 @@ complete_random_shuffling <- function(batch_container, ...) { #' bc <- BatchContainer$new( #' dimensions = c("plate" = 2, "column" = 5, "row" = 6) #' ) -#' bc$scoring_f <- osat_score_generator("plate", "Sex") +#' scoring_f <- osat_score_generator("plate", "Sex") #' optimize_design( -#' bc, invivo_study_samples, +#' bc, scoring = scoring_f, invivo_study_samples, #' max_iter = 100, #' shuffle_proposal_func = mk_swapping_function(1) #' ) diff --git a/R/trace.R b/R/trace.R index b4a88dad..72fa6358 100644 --- a/R/trace.R +++ b/R/trace.R @@ -1,240 +1,3 @@ -#' @title -#' OptimizationTrace represents optimization trace. -#' -#' @description -#' Usually it is created by [optimize_design()]. -#' -#' @export -OptimizationTrace <- R6::R6Class("OptimizationTrace", - public = list( - #' @field scores - #' Contains a matrix of scores. The matrix size is usually - #' `c(iterations + 1, length(bc$scoring_f))` - scores = NULL, - - #' @field aggregated_scores - #' Contains a matrix of scores after aggregation. - #' The matrix size is usually `c(iterations + 1, length(aggregated))`, - #' where `length(aggregated)` is the length of aggregated scores vector. - #' Can be `NULL` if aggregated scores are not used. - aggregated_scores = NULL, - - #' @field seed - #' Saved value of [.Random.seed]. - seed = NULL, - - #' @field elapsed - #' Running time of the optimization. - elapsed = NULL, - - #' @field last_step - #' Last iteration step for which the score was set. - last_step = 0, - - #' @description - #' Create a new `OptimizationTrace` object. - #' - #' @param n_steps - #' Number of values to save. Usually `n_steps == iterations + 1`. - #' @param n_scores - #' Number of scoring functions. - #' @param score_names - #' Names of scoring functions. - #' - #' @examples - #' tr <- OptimizationTrace$new(10, 2, c("score1", "score2")) - initialize = function(n_steps, n_scores, score_names) { - self$scores <- matrix(NA_real_, nrow = n_steps, ncol = n_scores) - if (!is.null(score_names)) { - dimnames(self$scores) <- list(NULL, score_names) - } - }, - - #' @description - #' Set scores for i-th step. - #' - #' @param i - #' Step number. - #' @param scores - #' Scores, a vector or a value if no auxiliary functions are used. - #' @param aggregated_scores - #' Vector of aggregated scores. Can be NULL. - #' - #' @return `OptimizationTrace` invisibly. - #' - #' @examples - #' tr$set_scores(1, c(0.5, 0.5), NULL) - #' tr$set_scores(2, c(0.5, 0.5), NULL) - set_scores = function(i, scores, aggregated_scores) { - assertthat::assert_that( - assertthat::is.count(i), - is.vector(scores), - is.null(aggregated_scores) || is.vector(aggregated_scores) - ) - # initialize aggregated scores, in case they're empty - self$scores[i, ] <- scores - if (!is.null(aggregated_scores)) { - if (is.null(self$aggregated_scores)) { - self$aggregated_scores <- matrix( - NA_real_, - nrow = nrow(self$scores), - ncol = length(aggregated_scores) - ) - } - assertthat::assert_that( - length(aggregated_scores) == ncol(self$aggregated_scores) - ) - self$aggregated_scores[i, ] <- aggregated_scores - } - self$last_step <- i - invisible(self) - }, - - #' @description - #' Shrink scores by keeping only first `last_step` scores. - #' - #' @param last_step - #' Last step to keep. - #' - #' @return `OptimizationTrace` invisibly. - #' @examples - #' tr$shrink(2) - shrink = function(last_step = self$last_step) { - self$scores <- head(self$scores, last_step) - if (!is.null(self$aggregated_scores)) { - self$aggregated_scores <- head(self$aggregated_scores, last_step) - } - invisible(self) - }, - - #' @description - #' Return individual (not aggregated!) scores by keeping only first `last_step` scores. - #' - #' @param last_step - #' Last step to keep. - #' - #' @return `OptimizationTrace` invisibly. - #' @examples - #' tr$get_scores() - get_scores = function(last_step = self$last_step) { - head(self$scores, last_step) - }, - - #' @description - #' Print `OptimizationTrace`. - #' - #' @param ... - #' Unused. - #' - #' @return `OptimizationTrace` invisibly. - #' @examples - #' print(tr) - print = function(...) { - start_score <- self$scores[1, ] %>% - round(3) %>% - stringr::str_c(collapse = ",") - final_score <- tail(self$scores, 1) %>% - round(3) %>% - stringr::str_c(collapse = ",") - cat(stringr::str_glue("Optimization trace ({self$n_steps} score values, elapsed {format(self$elapsed)}).\n\n")) - cat(" Starting score: ", start_score, "\n", sep = "") - cat(" Final score : ", final_score, "\n", sep = "") - invisible(self) - }, - - #' @description - #' Convert to a [data.frame]. - #' - #' @param include_aggregated Include aggregated scores. Otherwise only - #' raw scores are exported. - #' - #' @return [data.frame] - #' @examples - #' tr$as_tibble() - as_tibble = function(include_aggregated = TRUE) { - scores <- make_colnames(self$scores, "score") %>% - tibble::as_tibble() %>% - dplyr::mutate(step = dplyr::row_number()) %>% - tidyr::pivot_longer( - c(-step), - names_to = "score", - values_to = "value" - ) %>% - dplyr::mutate(score = factor(score)) - if (include_aggregated) { - agg_scores <- self$aggregated_scores - } else { - agg_scores <- NULL - } - if (!is.null(agg_scores) && include_aggregated) { - colnames(agg_scores) <- stringr::str_c( - "agg.", seq_len(ncol(agg_scores)) - ) - agg_scores <- agg_scores %>% - tibble::as_tibble() %>% - dplyr::mutate(step = dplyr::row_number()) %>% - tidyr::pivot_longer( - c(-step), - names_to = "score", - values_to = "value" - ) %>% - dplyr::mutate(score = factor(score)) - } - dplyr::bind_rows( - score = scores, - aggregated = agg_scores, - .id = "type" - ) %>% - dplyr::mutate(type = factor(type, levels = c("score", "aggregated"))) - }, - - #' @description - #' Plot `OptimizationTrace`. Only the main score at the moment. - #' - #' @param include_aggregated Include aggregated scores. Otherwise only - #' raw scores are plotted. - #' @param ... - #' Not used. - #' - #' @examples - #' tr <- OptimizationTrace$new(10, 3, letters[1:3]) - #' for (i in seq_len(10)) { - #' tr$set_scores(i, rnorm(3)*(1:3), rnorm(3)*(1:3)) - #' } - #' - #' # plot only the main scores - #' plot(tr) - #' # plot main and aggregated scores - #' plot(tr, include_aggregated=TRUE) - plot = function(include_aggregated = FALSE, ...) { - p <- self$as_tibble(include_aggregated = include_aggregated) %>% - ggplot2::ggplot() + - ggplot2::aes(x = step, y = value, group = score, color = score) + - ggplot2::geom_point() + - ggplot2::geom_line() - - if (include_aggregated) { - p + - ggplot2::facet_wrap(~type, scales = "free_y", ncol = 1) - } else { - p + - ggplot2::facet_wrap(~score, scales = "free_y", ncol = 1) - } - } - ), - active = list( - #' @field n_steps - #' Returns number of steps in the `OptimizationTrace`. - n_steps = function(value) { - if (missing(value)) { - nrow(self$scores) - } else { - stop("Cannot set n_steps (read-only).") - } - } - ) -) - #' Make [matrix] column names unique. #' #' @param prefix Prefix to add if column names are empty. @@ -251,3 +14,20 @@ make_colnames <- function(m, prefix = "X") { colnames(m) <- make.names(colnames(m), unique = TRUE) m } + +#' Shrinks a matrix with scores and adds an iteration column. +#' +#' @param m input matrix +#' @param last_iteration last iteration +#' +#' @return a [tibble::tibble()] wrapped in a list +#' @keywords internal +shrink_mat <- function(m, last_iteration) { + if (is.null(m)) + return(m) + dplyr::bind_cols( + tibble::tibble(step=seq_len(last_iteration)), + as.data.frame(utils::head(m, last_iteration)) + ) %>% + list() +} diff --git a/README.Rmd b/README.Rmd index e5a3507c..5df79833 100644 --- a/README.Rmd +++ b/README.Rmd @@ -69,7 +69,7 @@ bc <- BatchContainer$new( # assign samples randomly set.seed(17) -assign_random(bc, subject_data) +bc <- assign_random(bc, subject_data) bc$get_samples() %>% ggplot() + @@ -82,7 +82,7 @@ Random assignmet of samples to batches produced an uneven distribution. ### Optimizing the assignemnt ```{r optimized_assignment, warning=FALSE} # set scoring functions -bc$scoring_f <- list( +scoring_f <- list( # first priority, groups are evenly distributed group = osat_score_generator(batch_vars = "batch", feature_vars = "Group"), @@ -91,7 +91,9 @@ bc$scoring_f <- list( feature_vars = "Sex") ) -trace <- optimize_design(bc, max_iter = 150, quiet = TRUE) +bc <- optimize_design( + bc, scoring = scoring_f, max_iter = 150, quiet = TRUE +) bc$get_samples() %>% ggplot() + @@ -99,7 +101,7 @@ bc$get_samples() %>% geom_bar() # show optimization trace -plot(trace) +bc$plot_trace() ``` ## Examples diff --git a/README.md b/README.md index bcfa081b..8be1fe49 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ bc <- BatchContainer$new( # assign samples randomly set.seed(17) -assign_random(bc, subject_data) +bc <- assign_random(bc, subject_data) bc$get_samples() %>% ggplot() + @@ -85,7 +85,7 @@ Random assignmet of samples to batches produced an uneven distribution. ``` r # set scoring functions -bc$scoring_f <- list( +scoring_f <- list( # first priority, groups are evenly distributed group = osat_score_generator(batch_vars = "batch", feature_vars = "Group"), @@ -94,7 +94,9 @@ bc$scoring_f <- list( feature_vars = "Sex") ) -trace <- optimize_design(bc, max_iter = 150, quiet = TRUE) +bc <- optimize_design( + bc, scoring = scoring_f, max_iter = 150, quiet = TRUE +) bc$get_samples() %>% ggplot() + @@ -107,7 +109,7 @@ bc$get_samples() %>% ``` r # show optimization trace -plot(trace) +bc$plot_trace() ``` ![](man/figures/README-optimized_assignment-2.png) diff --git a/man/BatchContainer.Rd b/man/BatchContainer.Rd index db4a1e6b..e5c32ac8 100644 --- a/man/BatchContainer.Rd +++ b/man/BatchContainer.Rd @@ -27,6 +27,13 @@ bc <- BatchContainer$new( bc } +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{trace}}{Optimization trace, a \code{\link[tibble:tibble]{tibble::tibble()}}} +} +\if{html}{\out{
}} +} \section{Active bindings}{ \if{html}{\out{
}} \describe{ @@ -71,6 +78,8 @@ Assigning this field is deprecated, please use \verb{$move_samples()} instead.} \item \href{#method-BatchContainer-score}{\code{BatchContainer$score()}} \item \href{#method-BatchContainer-copy}{\code{BatchContainer$copy()}} \item \href{#method-BatchContainer-print}{\code{BatchContainer$print()}} +\item \href{#method-BatchContainer-scores_table}{\code{BatchContainer$scores_table()}} +\item \href{#method-BatchContainer-plot_trace}{\code{BatchContainer$plot_trace()}} } } \if{html}{\out{
}} @@ -200,11 +209,19 @@ The length of the vector should match the number of locations, \subsection{Method \code{score()}}{ Score current sample assignment, \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{BatchContainer$score()}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{BatchContainer$score(scoring)}\if{html}{\out{
}} } +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{scoring}}{a function or a names list of scoring functions. +Each function should return a numeric vector.} +} +\if{html}{\out{
}} +} \subsection{Returns}{ -Returns a vector of all scoring functions values. +Returns a named vector of all scoring functions values. } } \if{html}{\out{
}} @@ -232,16 +249,62 @@ Prints information about \code{BatchContainer}. \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{...}}{not used. +\item{\code{...}}{not used.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BatchContainer-scores_table}{}}} +\subsection{Method \code{scores_table()}}{ +Return a table with scores from an optimization. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BatchContainer$scores_table(index = NULL, include_aggregated = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{index}}{optimization index, all by default} + +\item{\code{include_aggregated}}{include aggregated scores} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +a \code{\link[tibble:tibble]{tibble::tibble()}} with scores +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BatchContainer-plot_trace}{}}} +\subsection{Method \code{plot_trace()}}{ +Plot trace +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BatchContainer$plot_trace(index = NULL, include_aggregated = FALSE, ...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{index}}{optimization index, all by default} + +\item{\code{include_aggregated}}{include aggregated scores} + +\item{\code{...}}{not used.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +a \code{\link[ggplot2:ggplot]{ggplot2::ggplot()}} object List of scoring functions. Tibble with batch container locations. Tibble with sample information and sample ids. Sample attributes, a data.table. Vector with assignment of sample ids to locations. Cached data.table with samples assignment. -Validate sample assignment.} -} -\if{html}{\out{
}} +Validate sample assignment. } } } diff --git a/man/OptimizationTrace.Rd b/man/OptimizationTrace.Rd deleted file mode 100644 index 0ee9731a..00000000 --- a/man/OptimizationTrace.Rd +++ /dev/null @@ -1,329 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/trace.R -\name{OptimizationTrace} -\alias{OptimizationTrace} -\title{OptimizationTrace represents optimization trace.} -\description{ -Usually it is created by \code{\link[=optimize_design]{optimize_design()}}. -} -\examples{ - -## ------------------------------------------------ -## Method `OptimizationTrace$new` -## ------------------------------------------------ - -tr <- OptimizationTrace$new(10, 2, c("score1", "score2")) - -## ------------------------------------------------ -## Method `OptimizationTrace$set_scores` -## ------------------------------------------------ - -tr$set_scores(1, c(0.5, 0.5), NULL) -tr$set_scores(2, c(0.5, 0.5), NULL) - -## ------------------------------------------------ -## Method `OptimizationTrace$shrink` -## ------------------------------------------------ - -tr$shrink(2) - -## ------------------------------------------------ -## Method `OptimizationTrace$get_scores` -## ------------------------------------------------ - -tr$get_scores() - -## ------------------------------------------------ -## Method `OptimizationTrace$print` -## ------------------------------------------------ - -print(tr) - -## ------------------------------------------------ -## Method `OptimizationTrace$as_tibble` -## ------------------------------------------------ - -tr$as_tibble() - -## ------------------------------------------------ -## Method `OptimizationTrace$plot` -## ------------------------------------------------ - -tr <- OptimizationTrace$new(10, 3, letters[1:3]) -for (i in seq_len(10)) { - tr$set_scores(i, rnorm(3)*(1:3), rnorm(3)*(1:3)) -} - -# plot only the main scores -plot(tr) -# plot main and aggregated scores -plot(tr, include_aggregated=TRUE) -} -\section{Public fields}{ -\if{html}{\out{
}} -\describe{ -\item{\code{scores}}{Contains a matrix of scores. The matrix size is usually -\code{c(iterations + 1, length(bc$scoring_f))}} - -\item{\code{aggregated_scores}}{Contains a matrix of scores after aggregation. -The matrix size is usually \code{c(iterations + 1, length(aggregated))}, -where \code{length(aggregated)} is the length of aggregated scores vector. -Can be \code{NULL} if aggregated scores are not used.} - -\item{\code{seed}}{Saved value of \link{.Random.seed}.} - -\item{\code{elapsed}}{Running time of the optimization.} - -\item{\code{last_step}}{Last iteration step for which the score was set.} -} -\if{html}{\out{
}} -} -\section{Active bindings}{ -\if{html}{\out{
}} -\describe{ -\item{\code{n_steps}}{Returns number of steps in the \code{OptimizationTrace}.} -} -\if{html}{\out{
}} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-OptimizationTrace-new}{\code{OptimizationTrace$new()}} -\item \href{#method-OptimizationTrace-set_scores}{\code{OptimizationTrace$set_scores()}} -\item \href{#method-OptimizationTrace-shrink}{\code{OptimizationTrace$shrink()}} -\item \href{#method-OptimizationTrace-get_scores}{\code{OptimizationTrace$get_scores()}} -\item \href{#method-OptimizationTrace-print}{\code{OptimizationTrace$print()}} -\item \href{#method-OptimizationTrace-as_tibble}{\code{OptimizationTrace$as_tibble()}} -\item \href{#method-OptimizationTrace-plot}{\code{OptimizationTrace$plot()}} -\item \href{#method-OptimizationTrace-clone}{\code{OptimizationTrace$clone()}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-OptimizationTrace-new}{}}} -\subsection{Method \code{new()}}{ -Create a new \code{OptimizationTrace} object. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{OptimizationTrace$new(n_steps, n_scores, score_names)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{n_steps}}{Number of values to save. Usually \code{n_steps == iterations + 1}.} - -\item{\code{n_scores}}{Number of scoring functions.} - -\item{\code{score_names}}{Names of scoring functions.} -} -\if{html}{\out{
}} -} -\subsection{Examples}{ -\if{html}{\out{
}} -\preformatted{tr <- OptimizationTrace$new(10, 2, c("score1", "score2")) -} -\if{html}{\out{
}} - -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-OptimizationTrace-set_scores}{}}} -\subsection{Method \code{set_scores()}}{ -Set scores for i-th step. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{OptimizationTrace$set_scores(i, scores, aggregated_scores)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{i}}{Step number.} - -\item{\code{scores}}{Scores, a vector or a value if no auxiliary functions are used.} - -\item{\code{aggregated_scores}}{Vector of aggregated scores. Can be NULL.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -\code{OptimizationTrace} invisibly. -} -\subsection{Examples}{ -\if{html}{\out{
}} -\preformatted{tr$set_scores(1, c(0.5, 0.5), NULL) -tr$set_scores(2, c(0.5, 0.5), NULL) -} -\if{html}{\out{
}} - -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-OptimizationTrace-shrink}{}}} -\subsection{Method \code{shrink()}}{ -Shrink scores by keeping only first \code{last_step} scores. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{OptimizationTrace$shrink(last_step = self$last_step)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{last_step}}{Last step to keep.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -\code{OptimizationTrace} invisibly. -} -\subsection{Examples}{ -\if{html}{\out{
}} -\preformatted{tr$shrink(2) -} -\if{html}{\out{
}} - -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-OptimizationTrace-get_scores}{}}} -\subsection{Method \code{get_scores()}}{ -Return individual (not aggregated!) scores by keeping only first \code{last_step} scores. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{OptimizationTrace$get_scores(last_step = self$last_step)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{last_step}}{Last step to keep.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -\code{OptimizationTrace} invisibly. -} -\subsection{Examples}{ -\if{html}{\out{
}} -\preformatted{tr$get_scores() -} -\if{html}{\out{
}} - -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-OptimizationTrace-print}{}}} -\subsection{Method \code{print()}}{ -Print \code{OptimizationTrace}. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{OptimizationTrace$print(...)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{...}}{Unused.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -\code{OptimizationTrace} invisibly. -} -\subsection{Examples}{ -\if{html}{\out{
}} -\preformatted{print(tr) -} -\if{html}{\out{
}} - -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-OptimizationTrace-as_tibble}{}}} -\subsection{Method \code{as_tibble()}}{ -Convert to a \link{data.frame}. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{OptimizationTrace$as_tibble(include_aggregated = TRUE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{include_aggregated}}{Include aggregated scores. Otherwise only -raw scores are exported.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -\link{data.frame} -} -\subsection{Examples}{ -\if{html}{\out{
}} -\preformatted{tr$as_tibble() -} -\if{html}{\out{
}} - -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-OptimizationTrace-plot}{}}} -\subsection{Method \code{plot()}}{ -Plot \code{OptimizationTrace}. Only the main score at the moment. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{OptimizationTrace$plot(include_aggregated = FALSE, ...)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{include_aggregated}}{Include aggregated scores. Otherwise only -raw scores are plotted.} - -\item{\code{...}}{Not used.} -} -\if{html}{\out{
}} -} -\subsection{Examples}{ -\if{html}{\out{
}} -\preformatted{tr <- OptimizationTrace$new(10, 3, letters[1:3]) -for (i in seq_len(10)) { - tr$set_scores(i, rnorm(3)*(1:3), rnorm(3)*(1:3)) -} - -# plot only the main scores -plot(tr) -# plot main and aggregated scores -plot(tr, include_aggregated=TRUE) -} -\if{html}{\out{
}} - -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-OptimizationTrace-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{OptimizationTrace$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/man/all_equal_df.Rd b/man/all_equal_df.Rd new file mode 100644 index 00000000..d44ce8fe --- /dev/null +++ b/man/all_equal_df.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/all_equal_df.R +\name{all_equal_df} +\alias{all_equal_df} +\title{Compare two data.frames.} +\usage{ +all_equal_df(df1, df2) +} +\arguments{ +\item{df1}{first \code{\link[=data.frame]{data.frame()}} to compare} + +\item{df2}{second \code{data.frame()} to compare} +} +\value{ +\code{TRUE} or \code{FALSE} in case differences are present +} +\description{ +This will convert factors to characters and disregard +row and column order +} +\keyword{internal} diff --git a/man/assign_from_table.Rd b/man/assign_from_table.Rd index 8f802078..770d3cd8 100644 --- a/man/assign_from_table.Rd +++ b/man/assign_from_table.Rd @@ -16,7 +16,7 @@ the function will check if samples in \code{batch_container} are identical to th \code{samples} argument.} } \value{ -Returns \code{BatchContainer}, invisibly. +Returns a new \code{BatchContainer}. } \description{ Distributes samples based on a sample sheet. @@ -39,7 +39,7 @@ sample_sheet <- tibble::tribble( 2, "a", 3, 5, "TRT", ) # assign samples from the sample sheet -assign_from_table(bc, sample_sheet) +bc <- assign_from_table(bc, sample_sheet) bc$get_samples(remove_empty_locations = TRUE) diff --git a/man/assign_in_order.Rd b/man/assign_in_order.Rd index 2263f586..4fa43a7c 100644 --- a/man/assign_in_order.Rd +++ b/man/assign_in_order.Rd @@ -12,7 +12,7 @@ assign_in_order(batch_container, samples = NULL) \item{samples}{data.frame with samples.} } \value{ -Returns \code{BatchContainer}, invisibly. +Returns a new \code{BatchContainer}. } \description{ First sample is assigned to the first location, second @@ -27,10 +27,10 @@ bc set.seed(42) # assigns samples randomly -assign_random(bc, samples) +bc <- assign_random(bc, samples) bc$get_samples() # assigns samples in order -assign_in_order(bc) +bc <- assign_in_order(bc) bc$get_samples() } diff --git a/man/assign_random.Rd b/man/assign_random.Rd index 50afcbb3..5742e59a 100644 --- a/man/assign_random.Rd +++ b/man/assign_random.Rd @@ -12,7 +12,7 @@ assign_random(batch_container, samples = NULL) \item{samples}{data.frame with samples.} } \value{ -Returns \code{BatchContainer}, invisibly. +Returns a new \code{BatchContainer}. } \description{ Assignment function which distributes samples randomly. @@ -26,10 +26,10 @@ bc set.seed(42) # assigns samples randomly -assign_random(bc, samples) +bc <- assign_random(bc, samples) bc$get_samples() # assigns samples in order -assign_in_order(bc) +bc <- assign_in_order(bc) bc$get_samples() } diff --git a/man/complete_random_shuffling.Rd b/man/complete_random_shuffling.Rd index 75611c6c..e8b849f2 100644 --- a/man/complete_random_shuffling.Rd +++ b/man/complete_random_shuffling.Rd @@ -23,9 +23,9 @@ data("invivo_study_samples") bc <- BatchContainer$new( dimensions = c("plate" = 2, "column" = 5, "row" = 6) ) -bc$scoring_f <- osat_score_generator("plate", "Sex") -optimize_design( - bc, invivo_study_samples, +scoring_f <- osat_score_generator("plate", "Sex") +bc <- optimize_design( + bc, scoring = scoring_f, invivo_study_samples, max_iter = 100, shuffle_proposal_func = complete_random_shuffling ) diff --git a/man/examples/assignment.R b/man/examples/assignment.R index e6604410..bc1b2a9f 100644 --- a/man/examples/assignment.R +++ b/man/examples/assignment.R @@ -6,9 +6,9 @@ bc set.seed(42) # assigns samples randomly -assign_random(bc, samples) +bc <- assign_random(bc, samples) bc$get_samples() # assigns samples in order -assign_in_order(bc) +bc <- assign_in_order(bc) bc$get_samples() diff --git a/man/examples/shuffle_with_constraints.R b/man/examples/shuffle_with_constraints.R index a8168bbe..fe704c5e 100644 --- a/man/examples/shuffle_with_constraints.R +++ b/man/examples/shuffle_with_constraints.R @@ -10,7 +10,7 @@ bc <- BatchContainer$new( dimensions = c("plate" = 5, "position" = 25) ) -bc$scoring_f <- function(samples) { +scoring_f <- function(samples) { osat_score( samples, "plate", @@ -21,8 +21,9 @@ bc$scoring_f <- function(samples) { # in this example we treat all the positions in the plate as equal. # when shuffling we enforce that source location is non-empty, # and destination location has a different plate number -optimize_design( +bc <- optimize_design( bc, + scoring = scoring_f, samples, shuffle_proposal = shuffle_with_constraints( # source is non-empty location diff --git a/man/examples/two_step_optimization.R b/man/examples/two_step_optimization.R index 7ca0f39c..94635413 100644 --- a/man/examples/two_step_optimization.R +++ b/man/examples/two_step_optimization.R @@ -7,7 +7,7 @@ bc <- BatchContainer$new( ) ) -assign_in_order(bc, samples = tibble::tibble( +bc <- assign_in_order(bc, samples = tibble::tibble( Group = c(rep(c("Grp 1", "Grp 2", "Grp 3", "Grp 4"), each = 8)), ID = 1:32 )) @@ -22,24 +22,29 @@ plot_plate(bc, ) # Step 1, assign samples to plates -bc$scoring_f <- osat_score_generator( +scoring_f <- osat_score_generator( batch_vars = c("plate"), feature_vars = c("Group") ) -optimize_design(bc, +bc <- optimize_design( + bc, + scoring = scoring_f, max_iter = 10, # the real number of iterations should be bigger n_shuffle = 2, quiet = TRUE ) -plot_plate(bc, +plot_plate( + bc, plate = plate, row = row, column = col, .color = Group ) # Step 2, distribute samples within plates -bc$scoring_f <- mk_plate_scoring_functions( +scoring_f <- mk_plate_scoring_functions( bc, plate = "plate", row = "row", column = "col", group = "Group" ) -optimize_design(bc, +bc <- optimize_design( + bc, + scoring = scoring_f, max_iter = 50, shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate")), aggregate_scores_func = L2s_norm, diff --git a/man/figures/README-optimized_assignment-1.png b/man/figures/README-optimized_assignment-1.png index d2f931ee..cc5cd780 100644 Binary files a/man/figures/README-optimized_assignment-1.png and b/man/figures/README-optimized_assignment-1.png differ diff --git a/man/figures/README-optimized_assignment-2.png b/man/figures/README-optimized_assignment-2.png index 0b61a4b1..78ac8243 100644 Binary files a/man/figures/README-optimized_assignment-2.png and b/man/figures/README-optimized_assignment-2.png differ diff --git a/man/figures/README-random_assignment-1.png b/man/figures/README-random_assignment-1.png index af7c6479..9ad8551c 100644 Binary files a/man/figures/README-random_assignment-1.png and b/man/figures/README-random_assignment-1.png differ diff --git a/man/mk_autoscale_function.Rd b/man/mk_autoscale_function.Rd index 3da74fab..c885e84d 100644 --- a/man/mk_autoscale_function.Rd +++ b/man/mk_autoscale_function.Rd @@ -6,6 +6,7 @@ \usage{ mk_autoscale_function( batch_container, + scoring, random_perm, use_boxcox = TRUE, sample_attributes_fixed = FALSE @@ -14,6 +15,9 @@ mk_autoscale_function( \arguments{ \item{batch_container}{An instance of \link{BatchContainer}.} +\item{scoring}{A named \code{\link[=list]{list()}} of scoring function. Each function should +return a vector of non-zero length.} + \item{random_perm}{Number of random sample permutations for the estimation of the scaling params.} \item{use_boxcox}{Logical; if TRUE and the \code{bestNormalize} package is available, boxcox transformations will be used to diff --git a/man/mk_plate_scoring_functions.Rd b/man/mk_plate_scoring_functions.Rd index 560b9a41..722ac793 100644 --- a/man/mk_plate_scoring_functions.Rd +++ b/man/mk_plate_scoring_functions.Rd @@ -42,12 +42,12 @@ data("invivo_study_samples") bc <- BatchContainer$new( dimensions = c("column" = 6, "row" = 10) ) -assign_random(bc, invivo_study_samples) -bc$scoring_f <- mk_plate_scoring_functions( +bc <- assign_random(bc, invivo_study_samples) +scoring_f <- mk_plate_scoring_functions( bc, row = "row", column = "column", group = "Sex" ) -optimize_design(bc, max_iter = 100) +bc <- optimize_design(bc, scoring = scoring_f, max_iter = 100) plot_plate(bc$get_samples(), .col = Sex) } diff --git a/man/mk_subgroup_shuffling_function.Rd b/man/mk_subgroup_shuffling_function.Rd index dd0728e0..dd265dd4 100644 --- a/man/mk_subgroup_shuffling_function.Rd +++ b/man/mk_subgroup_shuffling_function.Rd @@ -34,7 +34,7 @@ bc <- BatchContainer$new( ) ) -assign_in_order(bc, samples = tibble::tibble( +bc <- assign_in_order(bc, samples = tibble::tibble( Group = c(rep(c("Grp 1", "Grp 2", "Grp 3", "Grp 4"), each = 8)), ID = 1:32 )) @@ -49,24 +49,29 @@ plot_plate(bc, ) # Step 1, assign samples to plates -bc$scoring_f <- osat_score_generator( +scoring_f <- osat_score_generator( batch_vars = c("plate"), feature_vars = c("Group") ) -optimize_design(bc, +bc <- optimize_design( + bc, + scoring = scoring_f, max_iter = 10, # the real number of iterations should be bigger n_shuffle = 2, quiet = TRUE ) -plot_plate(bc, +plot_plate( + bc, plate = plate, row = row, column = col, .color = Group ) # Step 2, distribute samples within plates -bc$scoring_f <- mk_plate_scoring_functions( +scoring_f <- mk_plate_scoring_functions( bc, plate = "plate", row = "row", column = "col", group = "Group" ) -optimize_design(bc, +bc <- optimize_design( + bc, + scoring = scoring_f, max_iter = 50, shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate")), aggregate_scores_func = L2s_norm, diff --git a/man/mk_swapping_function.Rd b/man/mk_swapping_function.Rd index 1fdde7c5..65bffa4e 100644 --- a/man/mk_swapping_function.Rd +++ b/man/mk_swapping_function.Rd @@ -22,9 +22,9 @@ data("invivo_study_samples") bc <- BatchContainer$new( dimensions = c("plate" = 2, "column" = 5, "row" = 6) ) -bc$scoring_f <- osat_score_generator("plate", "Sex") +scoring_f <- osat_score_generator("plate", "Sex") optimize_design( - bc, invivo_study_samples, + bc, scoring = scoring_f, invivo_study_samples, max_iter = 100, shuffle_proposal_func = mk_swapping_function(1) ) diff --git a/man/optimize_design.Rd b/man/optimize_design.Rd index f0f8ef6e..5ce33e36 100644 --- a/man/optimize_design.Rd +++ b/man/optimize_design.Rd @@ -7,6 +7,7 @@ optimize_design( batch_container, samples = NULL, + scoring = NULL, n_shuffle = NULL, shuffle_proposal_func = NULL, acceptance_func = accept_strict_improvement, @@ -27,8 +28,10 @@ optimize_design( \item{samples}{A \code{data.frame} with sample information. Should be \code{NULL} if the \code{BatchContainer} already has samples in it.} +\item{scoring}{Scoring function or a named \code{\link[=list]{list()}} of scoring functions.} + \item{n_shuffle}{Vector of length 1 or larger, defining how many random sample -swaps should be performed in each iteration. If length(n_shuffle)==1, +swaps should be performed in each iteration. If \code{length(n_shuffle)==1}, this sets no limit to the number of iterations. Otherwise, the optimization stops if the swapping protocol is exhausted.} @@ -87,7 +90,9 @@ data("invivo_study_samples") bc <- BatchContainer$new( dimensions = c("plate" = 2, "column" = 5, "row" = 6) ) -bc$scoring_f <- osat_score_generator("plate", "Sex") -optimize_design(bc, invivo_study_samples, max_iter = 100) +bc <- optimize_design(bc, invivo_study_samples, + scoring = osat_score_generator("plate", "Sex"), + max_iter = 100 +) plot_plate(bc$get_samples(), .col = Sex) } diff --git a/man/plot_plate.Rd b/man/plot_plate.Rd index 276f48ed..24a53ce5 100644 --- a/man/plot_plate.Rd +++ b/man/plot_plate.Rd @@ -70,7 +70,7 @@ sample_sheet <- tibble::tibble( ) # assign samples from the sample sheet -assign_random(bc, samples = sample_sheet) +bc <- assign_random(bc, samples = sample_sheet) plot_plate(bc$get_samples(), plate = plate, column = column, row = row, diff --git a/man/random_score_variances.Rd b/man/random_score_variances.Rd index a8451066..e539b28c 100644 --- a/man/random_score_variances.Rd +++ b/man/random_score_variances.Rd @@ -4,11 +4,19 @@ \alias{random_score_variances} \title{Estimate the variance of individual scores by a series of completely random sample permutations} \usage{ -random_score_variances(batch_container, random_perm, sample_attributes_fixed) +random_score_variances( + batch_container, + scoring, + random_perm, + sample_attributes_fixed +) } \arguments{ \item{batch_container}{An instance of \code{BatchContainer}.} +\item{scoring}{A named \code{\link[=list]{list()}} of scoring function. Each function should +return a vector of non-zero length.} + \item{random_perm}{Number of random sample permutations to be done.} \item{sample_attributes_fixed}{Logical; if FALSE, simulate a shuffle function that alters sample attributes at each iteration.} diff --git a/man/sample_random_scores.Rd b/man/sample_random_scores.Rd index d50f275e..f8546c6f 100644 --- a/man/sample_random_scores.Rd +++ b/man/sample_random_scores.Rd @@ -4,11 +4,19 @@ \alias{sample_random_scores} \title{Sample scores from a number of completely random sample permutations} \usage{ -sample_random_scores(batch_container, random_perm, sample_attributes_fixed) +sample_random_scores( + batch_container, + scoring, + random_perm, + sample_attributes_fixed +) } \arguments{ \item{batch_container}{An instance of \link{BatchContainer}.} +\item{scoring}{A named \code{\link[=list]{list()}} of scoring function. Each function should +return a vector of non-zero length.} + \item{random_perm}{Number of random sample permutations to be done.} \item{sample_attributes_fixed}{Logical; if \code{FALSE}, simulate a shuffle function that alters sample attributes at each iteration.} diff --git a/man/shrink_mat.Rd b/man/shrink_mat.Rd new file mode 100644 index 00000000..d3ffc2ec --- /dev/null +++ b/man/shrink_mat.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/trace.R +\name{shrink_mat} +\alias{shrink_mat} +\title{Shrinks a matrix with scores and adds an iteration column.} +\usage{ +shrink_mat(m, last_iteration) +} +\arguments{ +\item{m}{input matrix} + +\item{last_iteration}{last iteration} +} +\value{ +a \code{\link[tibble:tibble]{tibble::tibble()}} wrapped in a list +} +\description{ +Shrinks a matrix with scores and adds an iteration column. +} +\keyword{internal} diff --git a/man/shuffle_with_constraints.Rd b/man/shuffle_with_constraints.Rd index 23880a09..48134daf 100644 --- a/man/shuffle_with_constraints.Rd +++ b/man/shuffle_with_constraints.Rd @@ -40,7 +40,7 @@ bc <- BatchContainer$new( dimensions = c("plate" = 5, "position" = 25) ) -bc$scoring_f <- function(samples) { +scoring_f <- function(samples) { osat_score( samples, "plate", @@ -51,8 +51,9 @@ bc$scoring_f <- function(samples) { # in this example we treat all the positions in the plate as equal. # when shuffling we enforce that source location is non-empty, # and destination location has a different plate number -optimize_design( +bc <- optimize_design( bc, + scoring = scoring_f, samples, shuffle_proposal = shuffle_with_constraints( # source is non-empty location diff --git a/tests/testthat/test-all-equal.R b/tests/testthat/test-all-equal.R new file mode 100644 index 00000000..24673d8c --- /dev/null +++ b/tests/testthat/test-all-equal.R @@ -0,0 +1,68 @@ +test_that("basic all_equal_df behavior", { + expect_true(all_equal_df(iris, iris)) + + x <- iris + expect_true(all_equal_df(x, iris)) + # add some NAs + x[1, 2] <- 20 + expect_false(all_equal_df(x, iris)) + + # add some NAs + x <- iris + x[1, 1] <- NA + expect_false(all_equal_df(x, iris)) +}) + +test_that("all_equal_df on empty dfs", { + expect_true(all_equal_df(data.frame(), data.frame())) + expect_true(all_equal_df( + data.frame(a=character(0), b=integer(0)), + data.frame(a=character(0), b=integer(0)) + )) +}) + +test_that("all_equal_df on NA dfs", { + expect_true(all_equal_df( + data.frame(a=rep(NA_character_,10), b=rep(NA_integer_, 10)), + data.frame(a=rep(NA_character_,10), b=rep(NA_integer_, 10)) + )) + x <- data.frame(a=rep(NA_character_,10), b=rep(NA_integer_, 10)) + y <- x + x[3, "b"] = 2 + expect_false(all_equal_df(x, y)) +}) + + +test_that("all_equal_df on reorder", { + for (i in 1:10) { + row_reorder <- iris[sample(seq_len(nrow(iris))),] + expect_true(all_equal_df(iris, row_reorder)) + } + + for (i in 1:10) { + col_reorder <- iris[sample(colnames(iris))] + expect_true(all_equal_df(iris, col_reorder)) + } + + for (i in 1:10) { + row_col_reorder <- iris[sample(seq_len(nrow(iris))),] + row_col_reorder <- row_col_reorder[sample(seq_len(nrow(row_col_reorder))),] + expect_true(all_equal_df(iris, row_col_reorder)) + } +}) + +test_that("all_equal_df on reorder with NAs", { + x <- data.frame(a=rep(NA_character_,10), b=rep(NA_integer_, 10)) + x[3, "b"] = 2 + + for (i in 1:10) { + row_col_reorder <- x[sample(seq_len(nrow(x))),] + row_col_reorder <- row_col_reorder[sample(seq_len(nrow(row_col_reorder))),] + expect_true(all_equal_df(x, row_col_reorder)) + } +}) + +test_that("compare tibble and a data.frame with all_equal_df", { + x <- tibble::tibble(iris) + expect_true(all_equal_df(x, iris)) +}) diff --git a/tests/testthat/test-compute-score-vector.R b/tests/testthat/test-compute-score-vector.R new file mode 100644 index 00000000..205cc8ac --- /dev/null +++ b/tests/testthat/test-compute-score-vector.R @@ -0,0 +1,21 @@ +test_that("bc$score() produces correct vector names", { + bc <- BatchContainer$new( + dimensions = c(row = 3, column = 3) + ) + samp <- data.frame(sid = 1) + bc <- assign_in_order(bc, samp) + expect_equal( + bc$score( + list( + a = function(...) c(1, 2), + b = function(...) c(1), + c = function(...) c(x=1, y=2), + d = function(...) c(1) + ) + ), + setNames( + c(1, 2, 1, 1, 2, 1), + c("a1", "a2", "b", "cx", "cy", "d") + ) + ) +}) diff --git a/tests/testthat/test-sample_attr.R b/tests/testthat/test-sample_attr.R index f96fb2e7..d5f5572f 100644 --- a/tests/testthat/test-sample_attr.R +++ b/tests/testthat/test-sample_attr.R @@ -2,7 +2,7 @@ bc <- BatchContainer$new( dimensions = c(row = 3, column = 3) ) samp <- data.frame(i = 1:8) -assign_in_order(bc, samp) +bc <- assign_in_order(bc, samp) test_that("cannot assign has_samples_attr", { bc1 <- bc$copy() diff --git a/tests/testthat/test-scoring-names.R b/tests/testthat/test-scoring-names.R new file mode 100644 index 00000000..b4f4931e --- /dev/null +++ b/tests/testthat/test-scoring-names.R @@ -0,0 +1,46 @@ +samples <- data.frame(i = 1:384, x = rnorm(384)) + +bc <- BatchContainer$new( + dimensions = c(row = 16, column = 24) +) |> + assign_in_order(samples) + + +test_that("scoring functions can be an unnamed list", { + tmp <- optimize_design(bc, + scoring = list( + \(...) rnorm(1), + \(...) rnorm(1) + ), + max_iter = 10 + ) + expect_equal( + colnames(tmp$trace$scores[[1]]), + c("step", "score_1", "score_2") + ) +}) + +test_that("all scoring functions should be named (or unnamed)", { + expect_error( + optimize_design(bc, + scoring = list( + \(...) rnorm(1), + f = \(...) rnorm(1) + ), + max_iter = 10 + ), + "scoring cannot be a partially named list" + ) +}) + +test_that("scoring function cannot have a name 'step'", { + expect_error( + optimize_design(bc, + scoring = list( + step = \(...) rnorm(1) + ), + max_iter = 10 + ), + "score name cannot be 'step'" + ) +}) diff --git a/tests/testthat/test_assign_from_table.R b/tests/testthat/test_assign_from_table.R index 57b0d7ad..4206681a 100644 --- a/tests/testthat/test_assign_from_table.R +++ b/tests/testthat/test_assign_from_table.R @@ -40,17 +40,17 @@ sample_sheet4 <- rbind( test_that("assign samples from sample sheet", { bc <- bc1$copy() expect_null(bc$samples) - assign_from_table(bc, sample_sheet1) + bc <- assign_from_table(bc, sample_sheet1) expect_equal(nrow(sample_sheet1), nrow(bc$samples)) bc <- bc1$copy() expect_null(bc$samples) - assign_from_table(bc, sample_sheet2) + bc <- assign_from_table(bc, sample_sheet2) expect_equal(nrow(sample_sheet2), nrow(bc$samples)) bc <- bc2$copy() expect_null(bc$samples) - assign_from_table(bc, sample_sheet2) + bc <- assign_from_table(bc, sample_sheet2) expect_equal(nrow(sample_sheet2), nrow(bc$samples)) }) @@ -58,20 +58,20 @@ test_that("assign samples from sample sheet works when samples are added/assigne bc <- bc1$copy() expect_null(bc$samples) bc$samples <- subset(sample_sheet1, select = sampleID) - assign_from_table(bc, sample_sheet1) + bc <- assign_from_table(bc, sample_sheet1) bc <- bc2$copy() expect_null(bc$samples) - assign_from_table(bc, sample_sheet2) - assign_random(bc) - assign_from_table(bc, sample_sheet2) + bc <- assign_from_table(bc, sample_sheet2) + bc <- assign_random(bc) + bc <- assign_from_table(bc, sample_sheet2) }) test_that("assign samples from sample sheet works only when samples match what's in the container", { bc <- bc1$copy() expect_null(bc$samples) - assign_from_table(bc, sample_sheet1) + bc <- assign_from_table(bc, sample_sheet1) expect_error(assign_from_table(bc, sample_sheet2)) }) diff --git a/tests/testthat/test_assign_random.R b/tests/testthat/test_assign_random.R index 4d6378db..fc4c0bca 100644 --- a/tests/testthat/test_assign_random.R +++ b/tests/testthat/test_assign_random.R @@ -6,12 +6,12 @@ bc <- BatchContainer$new( samples <- data.frame(i = 1:384, x = rnorm(384)) test_that("assign_random shuffles elements", { - assign_random(bc, samples) + bc <- assign_random(bc, samples) # this can fail by chance, but the probability extremely low # we set the random seed to make sure this is reproducible expect_true(any(bc$get_samples()$i != 1:384)) - assign_in_order(bc) + bc <- assign_in_order(bc) expect_equal(bc$get_samples()$i, 1:384) - assign_random(bc) + bc <- assign_random(bc) expect_true(any(bc$get_samples()$i != 1:384)) }) diff --git a/tests/testthat/test_batch_container.R b/tests/testthat/test_batch_container.R index 0931293b..5f2e7276 100644 --- a/tests/testthat/test_batch_container.R +++ b/tests/testthat/test_batch_container.R @@ -47,14 +47,14 @@ test_that("Test adding samples and then assigning them", { bc1_copy <- bc1$copy() bc1_copy$samples <- samples expect_null(bc1_copy$assignment) - assign_in_order(bc1_copy) + bc1_copy <- assign_in_order(bc1_copy) expect_equal( bc1_copy$assignment, c(seq_len(nrow(samples)), rep(NA_integer_, bc1_copy$n_locations - nrow(samples))) ) bc1_copy <- bc1$copy() - assign_in_order(bc1_copy, samples) + bc1_copy <- assign_in_order(bc1_copy, samples) expect_equal( bc1_copy$assignment, c(seq_len(nrow(samples)), rep(NA_integer_, bc1_copy$n_locations - nrow(samples))) @@ -66,7 +66,7 @@ test_that("Test assigning samples randomly", { bc3_copy <- bc3_excl$copy() expect_null(bc3_copy$assignment) expect_false(any(!is.na(bc3_copy$assignment))) - assign_random(bc3_copy, samples) + bc3_copy <- assign_random(bc3_copy, samples) expect_true(any(!is.na(bc3_copy$assignment))) }) diff --git a/tests/testthat/test_batch_container_from_table.R b/tests/testthat/test_batch_container_from_table.R index 7b0aa918..68c7723d 100644 --- a/tests/testthat/test_batch_container_from_table.R +++ b/tests/testthat/test_batch_container_from_table.R @@ -31,7 +31,7 @@ test_that( sample_id = 1:9 ) bc <- batch_container_from_table(tab, c("row", "column")) - expect_true(dplyr::all_equal(bc$get_samples(), tab)) + expect_true(all_equal_df(bc$get_samples(), tab)) } ) @@ -44,7 +44,7 @@ test_that( sample_id = c(1, 2, 3, NA, 5, 6, 7, NA, 9) ) bc <- batch_container_from_table(tab, c("row", "column")) - expect_true(dplyr::all_equal(bc$get_samples(), tab)) + expect_true(all_equal_df(bc$get_samples(), tab)) } ) @@ -61,7 +61,7 @@ test_that( expect_warning({ bc <- batch_container_from_table(tab, c("row", "column")) }) - expect_true(dplyr::all_equal(bc$get_samples(), tab)) + expect_true(all_equal_df(bc$get_samples(), tab)) } ) diff --git a/tests/testthat/test_copy_samples_cache.R b/tests/testthat/test_copy_samples_cache.R index 807767a8..826d3822 100644 --- a/tests/testthat/test_copy_samples_cache.R +++ b/tests/testthat/test_copy_samples_cache.R @@ -3,7 +3,7 @@ test_that("$copy() does not preserve samples table cache", { dimensions = c(row = 3, column = 3) ) samp <- data.frame(i = 1:9) - assign_in_order(bc, samp) + bc <- assign_in_order(bc, samp) # creates cache bc$get_samples() bc_clone <- bc$copy() @@ -18,14 +18,14 @@ test_that("Cloning preservs dimensions, samples, scores & assignment", { exclude = data.frame(row = 3, column = 1:3) ) samp <- data.frame(i = 1:6) - assign_in_order(bc, samp) - bc$scoring_f <- function(...) 42L + bc <- assign_in_order(bc, samp) + scoring_f <- function(...) 42L bc_clone <- bc$copy() expect_equal(bc$samples, bc_clone$samples) expect_equal(bc$get_locations(), bc_clone$get_locations()) expect_equal(bc$n_locations, bc_clone$n_locations) - expect_equal(bc$score(), bc_clone$score()) + expect_equal(bc$score(scoring_f), bc_clone$score(scoring_f)) expect_equal(bc$n_dimensions, bc_clone$n_dimensions) expect_equal(bc$dimension_names, bc_clone$dimension_names) expect_equal(bc$get_samples(include_id = TRUE), bc_clone$get_samples(include_id = TRUE)) diff --git a/tests/testthat/test_get_samples.R b/tests/testthat/test_get_samples.R index 908acfa7..df3f3a82 100644 --- a/tests/testthat/test_get_samples.R +++ b/tests/testthat/test_get_samples.R @@ -5,7 +5,7 @@ test_that("$get_samles(as_tibble=TRUE) returns correct columns & expected number samp <- data.frame(i = 1:8) expect_error(bc$get_samples()) bc$samples <- samp - assign_in_order(bc) + bc <- assign_in_order(bc) stab <- bc$get_samples() expect_true(tibble::is_tibble(stab)) expect_named(stab, c("row", "column", "i")) @@ -18,7 +18,7 @@ test_that("$get_samles(as_tibble=TRUE) returns correct columns & expected number expect_true(tibble::is_tibble(stab)) expect_named(stab, c("row", "column", ".sample_id", "i")) expect_equal(nrow(stab), 8) - assign_random(bc) + bc <- assign_random(bc) stab <- bc$get_samples(include_id = TRUE, assignment = FALSE) expect_true(tibble::is_tibble(stab)) expect_named(stab, c(".sample_id", "i")) @@ -33,7 +33,7 @@ test_that("$get_samles(as_tibble=FALSE) returns correct columns & expected numbe samp <- data.frame(i = 1:8) expect_error(bc$get_samples()) bc$samples <- samp - assign_in_order(bc) + bc <- assign_in_order(bc) stab <- bc$get_samples(as_tibble = FALSE) expect_true(data.table::is.data.table(stab)) expect_named(stab, c("row", "column", "i")) @@ -46,7 +46,7 @@ test_that("$get_samles(as_tibble=FALSE) returns correct columns & expected numbe expect_true(data.table::is.data.table(stab)) expect_named(stab, c("row", "column", ".sample_id", "i")) expect_equal(nrow(stab), 8) - assign_random(bc) + bc <- assign_random(bc) stab <- bc$get_samples(include_id = TRUE, assignment = FALSE, as_tibble = FALSE) expect_true(data.table::is.data.table(stab)) expect_named(stab, c(".sample_id", "i")) diff --git a/tests/testthat/test_mk_shuffle_function.R b/tests/testthat/test_mk_shuffle_function.R index 2db08caa..bc6c4ec9 100644 --- a/tests/testthat/test_mk_shuffle_function.R +++ b/tests/testthat/test_mk_shuffle_function.R @@ -6,7 +6,7 @@ samples <- data.frame( sampleId = seq_len(bc$n_locations) ) -assign_in_order(bc, samples) +bc <- assign_in_order(bc, samples) test_that("mk_swapping_function returns an error if iteration number is too large", { f <- mk_swapping_function(c(1, 2, 3)) diff --git a/tests/testthat/test_move_samples.R b/tests/testthat/test_move_samples.R index 690cf341..b6b4a85a 100644 --- a/tests/testthat/test_move_samples.R +++ b/tests/testthat/test_move_samples.R @@ -2,7 +2,7 @@ bc <- BatchContainer$new( dimensions = c(row = 3, column = 3) ) samp <- data.frame(i = 1:9) -assign_in_order(bc, samp) +bc <- assign_in_order(bc, samp) test_that("assigning $assignment generates a warning", { expect_silent(bc$assignment) @@ -23,7 +23,7 @@ bc <- BatchContainer$new( dimensions = c(row = 3, column = 3) ) samp <- data.frame(i = 1:8) -assign_in_order(bc, samp) +bc <- assign_in_order(bc, samp) test_that("$move_samples() works as expected with src & dst (without $get_samples())", { bc$move_samples(src = c(1L, 2L), dst = c(2L, 1L)) @@ -52,7 +52,7 @@ test_that("$move_samples() does not accept non-integers", { }) -assign_in_order(bc) +bc <- assign_in_order(bc) test_that("$move_samples() works as expected with src & dst (after $get_samples())", { bc$get_samples() @@ -82,7 +82,7 @@ bc <- BatchContainer$new( dimensions = c(row = 3, column = 3) ) samp <- data.frame(i = 1:8) -assign_in_order(bc, samp) +bc <- assign_in_order(bc, samp) test_that("$move_samples() works as expected with location_assignment (without $get_samples())", { a <- as.integer(c(2, 3, NA, 4, 5, 6, 7, 8, 1)) diff --git a/tests/testthat/test_no_shuff_proposed.R b/tests/testthat/test_no_shuff_proposed.R index 62b0452c..27d23121 100644 --- a/tests/testthat/test_no_shuff_proposed.R +++ b/tests/testthat/test_no_shuff_proposed.R @@ -7,12 +7,12 @@ samples <- data.frame( sampleId = seq_len(bc$n_locations) ) -bc$scoring_f <- function(...) rnorm(1) +scoring_f <- function(...) rnorm(1) test_that("No shuffling proposed error is generated", { set.seed(6) expect_error( - optimize_design(bc, samples = samples, max_iter = 30, n_shuffle = 2), + optimize_design(bc, scoring = scoring_f, samples = samples, max_iter = 30, n_shuffle = 2), NA ) }) diff --git a/tests/testthat/test_optimization_trace.R b/tests/testthat/test_optimization_trace.R deleted file mode 100644 index 21696f69..00000000 --- a/tests/testthat/test_optimization_trace.R +++ /dev/null @@ -1,61 +0,0 @@ -test_that("n_steps cannot be negative", { - expect_error(OptimizationTrace$new(-10, 10)) -}) - -test_that("n_scores cannot be negative", { - expect_error(OptimizationTrace$new(10, -10)) -}) - -test_that("OptimiztionTrace returns a single-column score matrix", { - ot <- OptimizationTrace$new(10, 1, "") - ot$set_scores(1, 2, NULL) - ot$set_scores(2, 3, NULL) - ot$shrink() - expect_true(all(ot$scores == matrix(c(2, 3), ncol = 1))) - expect_null(ot$aggregated_scores) -}) - -test_that("OptimiztionTrace fails on incorrect number of aggregated scores", { - ot <- OptimizationTrace$new(10, 1, "") - ot$set_scores(1, 2, c(2, 4)) - expect_error(ot$set_scores(2, 3, 3)) -}) - -test_that("OptimiztionTrace returns a correct matrix of aggregated scores", { - ot <- OptimizationTrace$new(10, 1, "a") - ot$set_scores(1, 2, c(2, 4)) - ot$set_scores(2, 2, c(4, 2)) - ot$shrink() - expect_true(all(ot$aggregated_scores == matrix(c(2, 4, 4, 2), ncol = 2))) -}) - -test_that("OptimiztionTrace returns a tibble", { - ot <- OptimizationTrace$new(10, 1, "a") - ot$set_scores(1, 2, c(2, 4)) - ot$set_scores(2, 2, c(4, 2)) - ot$shrink() - tb <- ot$as_tibble() - expect_true(tibble::is_tibble(tb)) - expect_named(tb, c("type", "step", "score", "value")) - expect_true(all(tb$step %in% c(1, 2))) - expect_true(is.factor(tb$type)) - expect_equal(levels(tb$type), c("score", "aggregated")) - expect_true(all(tb$score %in% c("a", "agg.1", "agg.2"))) - expect_equal(nrow(tb), 6) -}) - -test_that("OptimiztionTrace returns a tibble (include_aggregated = FALSE)", { - ot <- OptimizationTrace$new(10, 1, "a") - ot$set_scores(1, 2, c(2, 4)) - ot$set_scores(2, 2, c(4, 2)) - ot$shrink() - tb <- ot$as_tibble(FALSE) - expect_true(tibble::is_tibble(tb)) - expect_named(tb, c("type", "step", "score", "value")) - expect_true(all(tb$step %in% c(1, 2))) - expect_true(is.factor(tb$type)) - expect_equal(levels(tb$type), c("score", "aggregated")) - expect_true(all(tb$type == "score")) - expect_true(all(tb$score == "a")) - expect_equal(nrow(tb), 2) -}) diff --git a/tests/testthat/test_optimize_design.R b/tests/testthat/test_optimize_design.R index a43d7a11..c710f856 100644 --- a/tests/testthat/test_optimize_design.R +++ b/tests/testthat/test_optimize_design.R @@ -6,9 +6,9 @@ samples <- data.frame( sampleId = seq_len(bc$n_locations) ) -assign_in_order(bc, samples) +bc <- assign_in_order(bc, samples) -bc$scoring_f <- function(...) rnorm(1) +scoring_f <- function(...) rnorm(1) my_shuffle_proposal <- function(...) { v <- sample(20, 2) @@ -16,72 +16,72 @@ my_shuffle_proposal <- function(...) { } test_that("n_shuffle could be >=2 for simple shuffling", { - expect_error(optimize_design(bc, max_iter = 10, n_shuffle = 2), NA) - expect_error(optimize_design(bc, max_iter = 10, n_shuffle = rep(c(3, 2), c(5, 5))), NA) - expect_error(optimize_design(bc, max_iter = 10, n_shuffle = 10), NA) + expect_error(optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = 2), NA) + expect_error(optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = rep(c(3, 2), c(5, 5))), NA) + expect_error(optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = 10), NA) # n_shuffle should be >= 1 - expect_error(optimize_design(bc, max_iter = 10, n_shuffle = 0)) - expect_error(optimize_design(bc, max_iter = 10, n_shuffle = c(rep(9, 9), 0))) + expect_error(optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = 0)) + expect_error(optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = c(rep(9, 9), 0))) }) test_that("error when shuffle proposal function returns not a list", { expect_error( - optimize_design(bc, max_iter = 10, n_shuffle = 2, shuffle_proposal = function(...) 0:1), + optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = 2, shuffle_proposal = function(...) 0:1), "sample assignment length doesn't match the number of available locations" ) }) test_that("n_shuffle could be >=1 for shuffling with proposal function", { expect_error( - optimize_design(bc, max_iter = 10, n_shuffle = 1, shuffle_proposal = my_shuffle_proposal), + optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = 1, shuffle_proposal = my_shuffle_proposal), NA ) expect_error( - optimize_design(bc, max_iter = 10, n_shuffle = rep(c(3, 1), c(5, 5)), shuffle_proposal = my_shuffle_proposal), + optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = rep(c(3, 1), c(5, 5)), shuffle_proposal = my_shuffle_proposal), NA ) expect_error( - optimize_design(bc, max_iter = 10, n_shuffle = 10, shuffle_proposal = my_shuffle_proposal), + optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = 10, shuffle_proposal = my_shuffle_proposal), NA ) # n_shuffle should be >= 0 - expect_error(optimize_design(bc, max_iter = 10, n_shuffle = 0, shuffle_proposal = my_shuffle_proposal)) - expect_error(optimize_design(bc, max_iter = 10, n_shuffle = c(rep(9, 9), 0), shuffle_proposal = my_shuffle_proposal)) + expect_error(optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = 0, shuffle_proposal = my_shuffle_proposal)) + expect_error(optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = c(rep(9, 9), 0), shuffle_proposal = my_shuffle_proposal)) }) test_that("mismatch between number of iterations and n_shuffle", { expect_error( - trace <- optimize_design(bc, max_iter = 5, n_shuffle = 2), + bc <- optimize_design(bc, scoring = scoring_f, max_iter = 5, n_shuffle = 2), NA ) - expect_equal(trace$n_steps, 5 + 1) + expect_equal(nrow(bc$trace$scores[[nrow(bc$trace)]]), 5 + 1) expect_error( - trace <- optimize_design(bc, max_iter = 10, n_shuffle = c(2, 2)), + bc <- optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = c(2, 2)), NA ) - expect_equal(trace$n_steps, 2 + 1) + expect_equal(nrow(bc$trace$scores[[nrow(bc$trace)]]), 2 + 1) expect_error( - trace <- optimize_design(bc, max_iter = 10, n_shuffle = rep(2, 200)), + bc <- optimize_design(bc, scoring = scoring_f, max_iter = 10, n_shuffle = rep(2, 200)), NA ) - expect_equal(trace$n_steps, 10 + 1) + expect_equal(nrow(bc$trace$scores[[nrow(bc$trace)]]), 10 + 1) }) test_that("inferring iterations from n_shuffle", { expect_error( - trace <- optimize_design(bc, n_shuffle = rep(2, 5)), + bc <- optimize_design(bc, scoring = scoring_f, n_shuffle = rep(2, 5)), NA ) - expect_equal(trace$n_steps, 5 + 1) + expect_equal(nrow(bc$trace$scores[[nrow(bc$trace)]]), 5 + 1) }) test_that("default n_shuffle", { expect_error( - optimize_design(bc, max_iter = 10), + optimize_design(bc, scoring = scoring_f, max_iter = 10), NA ) expect_error( - optimize_design(bc, max_iter = 10, shuffle_proposal = my_shuffle_proposal), + optimize_design(bc, scoring = scoring_f, max_iter = 10, shuffle_proposal = my_shuffle_proposal), NA ) }) diff --git a/tests/testthat/test_optimize_design_complete_random_shuffle.R b/tests/testthat/test_optimize_design_complete_random_shuffle.R index ee3c0f2d..d680e7a2 100644 --- a/tests/testthat/test_optimize_design_complete_random_shuffle.R +++ b/tests/testthat/test_optimize_design_complete_random_shuffle.R @@ -6,10 +6,10 @@ samples <- data.frame( sampleId = seq_len(98) ) -assign_in_order(bc, samples) +bc <- assign_in_order(bc, samples) # decreasing score -bc$scoring_f <- (function() { +scoring_f <- (function() { score <- 1 function(...) { score <<- score - 1 @@ -21,8 +21,9 @@ set.seed(42) start_state <- ifelse(is.na(bc$assignment), -1, bc$assignment) test_that("complete_random_shuffling shuffles most of the elements", { - optimize_design( + bc <- optimize_design( bc, + scoring = scoring_f, max_iter = 1, shuffle_proposal_func = complete_random_shuffling ) diff --git a/tests/testthat/test_optimize_design_simple_shuffle.R b/tests/testthat/test_optimize_design_simple_shuffle.R index e6e073bc..e037ca12 100644 --- a/tests/testthat/test_optimize_design_simple_shuffle.R +++ b/tests/testthat/test_optimize_design_simple_shuffle.R @@ -15,9 +15,9 @@ n_elements_changed <- function(bc) { Inf } -assign_in_order(bc, samples) +bc <- assign_in_order(bc, samples) -bc$scoring_f <- n_elements_changed +scoring_f <- n_elements_changed set.seed(42) @@ -27,6 +27,7 @@ n_changed <- numeric(0) test_that("correct number of shuffles = 1", { optimize_design( bc, + scoring = scoring_f, max_iter = 10, check_score_variance = F, autoscale_scores = F @@ -38,6 +39,7 @@ n_changed <- numeric(0) test_that("correct number of shuffles = 2", { optimize_design( bc, + scoring = scoring_f, max_iter = 10, n_shuffle = 2, check_score_variance = F, @@ -50,6 +52,7 @@ n_changed <- numeric(0) test_that("correct number of shuffles = 5", { optimize_design( bc, + scoring = scoring_f, max_iter = 10, n_shuffle = 5, check_score_variance = FALSE, @@ -62,6 +65,7 @@ n_changed <- numeric(0) test_that("specify too many shuffles", { optimize_design( bc, + scoring = scoring_f, max_iter = 10, n_shuffle = 40, check_score_variance = FALSE, @@ -74,6 +78,7 @@ n_changed <- numeric(0) test_that("complex shuffling schedule", { optimize_design( bc, + scoring = scoring_f, max_iter = 10, n_shuffle = c(2, 2, 5, 2, 2, 10, 20, 40, 40), check_score_variance = F, diff --git a/tests/testthat/test_optimize_design_uses_all_locations.R b/tests/testthat/test_optimize_design_uses_all_locations.R index 9dd777c1..4ac764b5 100644 --- a/tests/testthat/test_optimize_design_uses_all_locations.R +++ b/tests/testthat/test_optimize_design_uses_all_locations.R @@ -6,9 +6,7 @@ samples <- data.frame( sampleId = seq_len(bc$n_locations - 5) ) -assign_in_order(bc, samples) - -bc$scoring_f <- function(...) rnorm(1) +bc <- assign_in_order(bc, samples) set.seed(42) @@ -17,9 +15,9 @@ test_that("empty locations are used by optimize_design", { assignments <- purrr::map_dfr( seq_len(20), function(...) { - bc1 <- bc$copy() - optimize_design( - bc1, + bc1 <- optimize_design( + bc, + scoring = function(...) rnorm(1), max_iter = 5 ) bc1$get_samples() diff --git a/tests/testthat/test_samples_attr.R b/tests/testthat/test_samples_attr.R index 57503ee0..26c58565 100644 --- a/tests/testthat/test_samples_attr.R +++ b/tests/testthat/test_samples_attr.R @@ -10,7 +10,7 @@ test_that("add attributes before assigning samples", { expect_equal(bc$get_samples(assignment = FALSE)$attr1, rev(1:8)) }) -assign_in_order(bc) +bc <- assign_in_order(bc) test_that("add attributes after assigning samples", { bc$samples_attr <- NULL @@ -23,7 +23,7 @@ test_that("add attributes after assigning samples", { test_that("shuffling samples keeps attributes order", { set.seed(42) - assign_random(bc) + bc <- assign_random(bc) bc$get_samples(include_id = TRUE, as_tibble = FALSE) bc$samples_attr <- data.frame(attr1 = rev(1:8)) expect_equal(bc$get_samples(assignment = FALSE)$attr1, rev(1:8)) diff --git a/tests/testthat/test_scores.R b/tests/testthat/test_scores.R index b97f6e5a..b3202f16 100644 --- a/tests/testthat/test_scores.R +++ b/tests/testthat/test_scores.R @@ -6,53 +6,49 @@ samples <- data.frame( sampleId = seq_len(bc$n_locations) ) -assign_random(bc, samples) +bc <- assign_random(bc, samples) test_that("Score lenght is correct", { - bc$scoring_f <- list( + scoring_f <- list( function(...) rnorm(1) ) - expect_length(bc$scoring_f, 1) - expect_length(bc$score(), 1) - # single value is automatically converted to a list - expect_type(bc$scoring_f, "list") + expect_length(bc$score(scoring_f), 1) - bc$scoring_f <- function(...) rnorm(1) - expect_length(bc$score(), 1) - - bc$scoring_f <- list( + scoring_f <- list( function(...) rnorm(1), function(...) rnorm(1) ) - expect_length(bc$score(), 2) + expect_length(bc$score(scoring_f), 2) }) test_that("Score names are correct", { - bc$scoring_f <- list( + scoring_f <- list( a = function(...) rnorm(1), b = function(...) rnorm(1) ) - expect_named(bc$score(), c("a", "b")) + expect_named(bc$score(scoring_f), c("a", "b")) }) test_that("Can optimize a single score", { set.seed(6) - bc$scoring_f <- function(...) rnorm(1) + scoring_f <- function(...) rnorm(1) + bc <- optimize_design(bc, scoring = scoring_f, max_iter = 30, n_shuffle = 2) expect_equal( - optimize_design(bc, max_iter = 30, n_shuffle = 2)$n_steps, + nrow(bc$trace$scores[[1]]), 31 ) }) test_that("Can optimize multiple scores", { set.seed(6) - bc$scoring_f <- list( + scoring_f <- list( a = function(...) rnorm(1), b = function(...) rnorm(1) ) + bc <- optimize_design(bc, scoring = scoring_f, max_iter = 30, n_shuffle = 2, aggregate_scores_func = first_score_only) expect_equal( - optimize_design(bc, max_iter = 30, n_shuffle = 2, aggregate_scores_func = first_score_only)$n_steps, + nrow(bc$trace$scores[[1]]), 31 ) }) diff --git a/vignettes/NCS22_talk.Rmd b/vignettes/NCS22_talk.Rmd index 07746f40..adf608d9 100644 --- a/vignettes/NCS22_talk.Rmd +++ b/vignettes/NCS22_talk.Rmd @@ -4,7 +4,7 @@ author: "Juliane Siebourg-Polster, Iakov Davydov, Guido Steiner, Balazs Banfai" output: rmarkdown::html_vignette: vignette: > - %\VignetteIndexEntry{NCS Talk} + %\VignetteIndexEntry{designit: a flexible engine to generate experiment layouts} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} editor_options: @@ -57,16 +57,8 @@ set.seed(17) # gives `bad` random assignment bc <- BatchContainer$new( dimensions = list("batch" = 3, "location" = 11), -) - -bc$scoring_f <- list( - group = osat_score_generator(batch_vars = "batch", - feature_vars = "Group"), - sex = osat_score_generator(batch_vars = "batch", - feature_vars = "Sex") -) - -assign_random(bc, subject_data) +) %>% + assign_random(subject_data) ``` Gone wrong: Random distribution of 31 grouped subjects into 3 batches @@ -119,14 +111,8 @@ set.seed(17) # gives `bad` random assignment ```{r} bc <- BatchContainer$new( dimensions = list("batch" = 3, "location" = 11), -) -bc$scoring_f <- list( - group = osat_score_generator(batch_vars = "batch", - feature_vars = "Group"), - sex = osat_score_generator(batch_vars = "batch", - feature_vars = "Sex") -) -assign_random(bc, subject_data) +) %>% + assign_random(subject_data) ``` @@ -152,13 +138,13 @@ bc$get_samples() ```{r, echo=FALSE} bind_rows(head(bc$get_samples(), 3) %>% - mutate(across(, as.character)), + mutate(across(everything(), as.character)), tibble(batch = "...", location = " ...", SubjectID = "...", Group = "...", Sex = "..."), tail(bc$get_samples(), 3) %>% - mutate(across(, as.character))) %>% + mutate(across(everything(), as.character))) %>% gt::gt() %>% gt::tab_options(table.font.size = 11, data_row.padding = 0.1) ``` @@ -172,10 +158,20 @@ bind_rows(head(bc$get_samples(), 3) %>% * sex (lower priority) ```{r, warning=FALSE} -trace <- optimize_design( +bc <- optimize_design( bc, + scoring = list( + group = osat_score_generator( + batch_vars = "batch", + feature_vars = "Group" + ), + sex = osat_score_generator( + batch_vars = "batch", + feature_vars = "Sex" + ) + ), n_shuffle = 1, - acceptance_func = + acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.01), max_iter = 150, quiet = TRUE @@ -193,7 +189,7 @@ cowplot::plot_grid( bc$get_samples() %>% ggplot(aes(x = batch, fill = Sex)) + geom_bar() + labs(y = "subject count"), - trace$plot(include_aggregated = TRUE) + bc$plot_trace(include_aggregated = TRUE) ), ncol = 3 ) @@ -202,13 +198,13 @@ cowplot::plot_grid( ```{r, echo=FALSE} bind_rows(head(bc$get_samples(), 3) %>% - mutate(across(, as.character)), + mutate(across(everything(), as.character)), tibble(batch = "...", location = " ...", SubjectID = "...", Group = "...", Sex = "..."), tail(bc$get_samples(), 3) %>% - mutate(across(, as.character))) %>% + mutate(across(everything(), as.character))) %>% gt::gt() %>% gt::tab_options(table.font.size = 11, data_row.padding = 0.1) ``` @@ -243,9 +239,9 @@ set.seed(1) #1 #2 bc <- BatchContainer$new( dimensions = list("plate" = 3, "row" = 4, "col" = 6), -) -assign_random(bc, dat) -#assign_in_order(bc, dat) +) %>% + assign_random(dat) +# assign_in_order(dat) ``` ```{r, fig.width= 5, fig.height=4.5, eval=FALSE} @@ -276,7 +272,7 @@ cowplot::plot_grid( ## Spatial arrangement ```{r, warning=FALSE, message=FALSE} -traces <- optimize_multi_plate_design( +bc <- optimize_multi_plate_design( bc, across_plates_variables = c("Group", "Sex"), within_plate_variables = c("Group"), @@ -299,8 +295,8 @@ cowplot::plot_grid( ) ``` -```{r fig.width=3, fig.height=3, echo=FALSE} -purrr::imap(traces, ~ .x$plot(include_aggregated=TRUE) + labs(title = .y))[1:2] +```{r fig.width=5, fig.height=4, echo=FALSE} +bc$plot_trace() ``` diff --git a/vignettes/basic_examples.Rmd b/vignettes/basic_examples.Rmd index 0f53c09f..66144d0f 100644 --- a/vignettes/basic_examples.Rmd +++ b/vignettes/basic_examples.Rmd @@ -102,7 +102,7 @@ bc$get_locations() %>% head() Use random assignment function to place samples to plate locations ```{r} -assign_random(bc, samples) +bc <- assign_random(bc, samples) bc$get_samples() bc$get_samples(remove_empty_locations = TRUE) @@ -128,7 +128,9 @@ plot_plate(bc$get_samples(remove_empty_locations = TRUE), To move individual samples or manually assigning all locations we can use the `batchContainer$move_samples()` method -To swap two or more samples use +To swap two or more samples use: + +**Warning**: This will change your BatchContainer in-place. ```{r, fig.width=6, fig.height=3.5} bc$move_samples(src = c(1L, 2L), dst = c(2L, 1L)) @@ -140,6 +142,9 @@ plot_plate(bc$get_samples(remove_empty_locations = TRUE), ``` To assign all samples in one go, use the option `location_assignment`. + +**Warning**: This will change your BatchContainer in-place. + The example below orders samples by ID and adds the empty locations afterwards ```{r, fig.width=6, fig.height=3.5} bc$move_samples( @@ -155,29 +160,23 @@ plot_plate(bc$get_samples(remove_empty_locations = TRUE, include_id = TRUE), ) ``` -## Scoring a layout - -To evaluate how good a layout is, we need a scoring function. -This we also assign to the batch container. - -This function will assess how well treatment -and dose are balanced across the two plates. - -```{r} -bc$scoring_f <- osat_score_generator( - batch_vars = "plate", - feature_vars = c("treatment", "dose") -) -``` - ## Run an optimization The optimization procedure is invoked with e.g. `optimize_design`. Here we use a simple shuffling schedule: swap 10 samples for 100 times, then swap 2 samples for 400 times. +To evaluate how good a layout is, we need a scoring function. + +This function will assess how well treatment +and dose are balanced across the two plates. + ```{r} -trace <- optimize_design(bc, +bc <- optimize_design(bc, + scoring = osat_score_generator( + batch_vars = "plate", + feature_vars = c("treatment", "dose") + ), # shuffling schedule n_shuffle = c(rep(10, 200), rep(2, 400)) ) @@ -186,7 +185,7 @@ trace <- optimize_design(bc, Development of the score can be viewed with ```{r, fig.width=3.5, fig.height=3} -trace$plot() +bc$plot_trace() ``` The layout after plate batching looks the following diff --git a/vignettes/cached/README.md b/vignettes/cached/README.md index 3758311b..63f2916a 100644 --- a/vignettes/cached/README.md +++ b/vignettes/cached/README.md @@ -10,7 +10,7 @@ --- title: "Title of vignette child" output: html_fragment - knit: (\(input, ...) rmarkdown::render(input, output_dir = "vignettes/cached")) + knit: (\(input, ...) rmarkdown::render(input, output_dir = here::here("vignettes/cached"))) --- ```` * knit child to HTML fragment (either in RStudio or with `rmarkdown::render()`) diff --git a/vignettes/cached/_invivo_computed.Rmd b/vignettes/cached/_invivo_computed.Rmd index 596bb980..719042bd 100644 --- a/vignettes/cached/_invivo_computed.Rmd +++ b/vignettes/cached/_invivo_computed.Rmd @@ -1,7 +1,7 @@ --- title: "In vivo examples computed" output: html_fragment -knit: (\(input, ...) rmarkdown::render(input, output_dir = "vignettes/cached")) +knit: (\(input, ...) rmarkdown::render(input, output_dir = here::here("vignettes/cached"))) --- ```{r, include = FALSE} diff --git a/vignettes/cached/_invivo_computed.html b/vignettes/cached/_invivo_computed.html index cdf00f45..548ed2ba 100644 --- a/vignettes/cached/_invivo_computed.html +++ b/vignettes/cached/_invivo_computed.html @@ -25,7 +25,7 @@

Calculating the design

#> ... user specified treatment allocation constraint (Treatment-Strain-Sex) #> ... facilitating homogeneity of treatment in cages (CageGroup) #> ... ANOVA -logP for numerical variables balanced across treatment (ArrivalWeight, AgeGroup) -#> CAUTION: User defined constraints could not be fully met (remaining score 4) +#> Success. User provided constraints could be fully met. # Form cages with reasonable animal numbers and compliant with all constraints design_cage <- Invivo_assignCages(design_trt, @@ -39,17 +39,15 @@

Calculating the design

) #> Setting up batch container. #> -#> Formed 19 homogeneous groups using 59 samples. +#> Formed 22 homogeneous groups using 59 samples. #> 27 subgroups needed to satisfy size constraints. #> #> Finding possible ways to allocate variable of interest with 1 levels ... #> #> Finished with 27 recursive calls. #> 1 allocations found. -#> No permutations fulfilling the 'keep_separate' constraints in 1000 iters! -#> Increasing number of tolerated violations to 1 #> -#> Expecting 27 cages to be created and 3 single-housed animals. +#> Expecting 27 cages to be created and 4 single-housed animals. #> Constructing scoring functions: #> ... ANOVA -logP for numerical variables balanced across cages (ArrivalWeight, AgeGroup) #> Adding 4 attributes to samples. @@ -70,7 +68,7 @@

Calculating the design

#> Distributing target variables (Treatment, Strain, Sex) within rack #> ... Rack 1 #> ... Performing simple mean/stddev adjustment. -#> ... final scores: Plate_Treatment: 5.2, Plate_Strain: 5.46, Plate_Sex: 5.67 +#> ... final scores: Plate_Treatment: 5.12, Plate_Strain: 5.48, Plate_Sex: 5.72

Visualization of the study design

@@ -82,7 +80,7 @@

Cage composition

treatment, strain and sex.

Females are exclusively used for treatment 2, as was specified in the treatment list.

-

+

Body weights

@@ -95,19 +93,19 @@

Body weights

compensate, achieving better cross-treatment balance of this factor.

Red diamonds mark the mean values for a specific sex within each treatment group.

-

+

Cage arrangement in rack

The following plots show the organization of the cage rack, individual cages colored by different variables each time.

-

+

Individual animals in cages

Finally, an overview plot illustrates the placement of animals in the cages. Notice the distinct earmarks within each cage, a ‘soft’ design constraint that could be achieved with the given solution.

-

+

diff --git a/vignettes/cached/_plate_scoring_ex1.Rmd b/vignettes/cached/_plate_scoring_ex1.Rmd index 6a534e32..7a0ada3f 100644 --- a/vignettes/cached/_plate_scoring_ex1.Rmd +++ b/vignettes/cached/_plate_scoring_ex1.Rmd @@ -1,7 +1,7 @@ --- title: "Plate scoring example 1" output: html_fragment -knit: (\(input, ...) rmarkdown::render(input, output_dir = "vignettes/cached")) +knit: (\(input, ...) rmarkdown::render(input, output_dir = here::here("vignettes/cached"))) --- ```{r, include = FALSE} @@ -35,7 +35,7 @@ example1 <- BatchContainer$new( # Add samples to container # Need unique Sample ID. Can we drop this constraint? -assign_in_order(example1, +example1 <- assign_in_order(example1, samples = tibble::tibble( Group = rep(c("Grp 1", "Grp 2", "Grp 3", "Grp 4"), each = 4), ID = 1:16 @@ -43,26 +43,25 @@ assign_in_order(example1, ) # The following does not work (an gives a constant score of 144!) -# example1$scoring_f <- osat_score_generator(batch_vars = c("row","col"), feature_vars = c("Group")) +# scoring_f <- osat_score_generator(batch_vars = c("row","col"), feature_vars = c("Group")) # First analysis of problem indicates that osat_score generates a full row*col vector of 'ideal scores' # which are in fact the same value, implying an identical overall result as each position can be either # allocated by 1 sample or 0 samples, the sum of 1's being the sample count. # --> don't use osat_score if there's a lack of samples as compared to possible positioning -bc <- example1$copy() - # # Set scoring function -bc$scoring_f <- list( +scoring_f <- list( Row.Score = osat_score_generator(batch_vars = c("row"), feature_vars = c("Group")), Column.Score = osat_score_generator(batch_vars = c("col"), feature_vars = c("Group")) ) ``` ```{r} - set.seed(41) -trace <- optimize_design(bc, +bc <- optimize_design( + example1, + scoring = scoring_f, max_iter = 300, # this is set to shorten vignette run-time based on known random seed, normally we don't know. n_shuffle = 2, acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)), @@ -71,7 +70,7 @@ trace <- optimize_design(bc, ``` ```{r} -trace +bc$trace$elapsed plot_plate(bc, plate = plate, row = row, column = col, .color = Group, @@ -85,14 +84,14 @@ This should reliably lead to a nice symmetry-bearing latin square design with only a one-dimensional score to look at. ```{r} -bc <- example1$copy() - -bc$scoring_f <- mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group") +scoring_f <- mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group") ``` ```{r} set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + example1, + scoring = scoring_f, max_iter = 1000, # this is set to shorten vignette run-time based on random seed, normally we don't know. n_shuffle = 2, acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)), @@ -101,7 +100,7 @@ trace <- optimize_design(bc, ``` ```{r} -trace$elapsed +bc$trace$elapsed plot_plate(bc, plate = plate, row = row, column = col, .color = Group, diff --git a/vignettes/cached/_plate_scoring_ex1.html b/vignettes/cached/_plate_scoring_ex1.html index 6f97caa9..332c6764 100644 --- a/vignettes/cached/_plate_scoring_ex1.html +++ b/vignettes/cached/_plate_scoring_ex1.html @@ -31,7 +31,7 @@

Example 1: An expensive way to construct a 4x4 latin square (one # Add samples to container # Need unique Sample ID. Can we drop this constraint? -assign_in_order(example1, +example1 <- assign_in_order(example1, samples = tibble::tibble( Group = rep(c("Grp 1", "Grp 2", "Grp 3", "Grp 4"), each = 4), ID = 1:16 @@ -39,23 +39,22 @@

Example 1: An expensive way to construct a 4x4 latin square (one ) # The following does not work (an gives a constant score of 144!) -# example1$scoring_f <- osat_score_generator(batch_vars = c("row","col"), feature_vars = c("Group")) +# scoring_f <- osat_score_generator(batch_vars = c("row","col"), feature_vars = c("Group")) # First analysis of problem indicates that osat_score generates a full row*col vector of 'ideal scores' # which are in fact the same value, implying an identical overall result as each position can be either # allocated by 1 sample or 0 samples, the sum of 1's being the sample count. # --> don't use osat_score if there's a lack of samples as compared to possible positioning -bc <- example1$copy() - # # Set scoring function -bc$scoring_f <- list( +scoring_f <- list( Row.Score = osat_score_generator(batch_vars = c("row"), feature_vars = c("Group")), Column.Score = osat_score_generator(batch_vars = c("col"), feature_vars = c("Group")) ) -

-set.seed(41)
+
set.seed(41)
 
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  example1,
+  scoring = scoring_f,
   max_iter = 300, # this is set to shorten vignette run-time based on known random seed, normally we don't know.
   n_shuffle = 2,
   acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
@@ -121,35 +120,33 @@ 

Example 1: An expensive way to construct a 4x4 latin square (one #> Aggregated: 16 #> Achieved score: c(0, 0) at iteration 284 #> Aggregated: 0

-
trace
-#> Optimization trace (301 score values, elapsed 21.10947 secs).
-#>   Starting score: 48,0
-#>   Final score   : 0,0
+
bc$trace$elapsed
+#> Time difference of 5.483629 secs
 
 plot_plate(bc,
   plate = plate, row = row, column = col, .color = Group,
   title = "Ex1: Using OSAT scores for plate design\n(not the recommended way!)"
 )
-

+

Now using a dedicated scoring for the group distances on a plate.

This should reliably lead to a nice symmetry-bearing latin square design with only a one-dimensional score to look at.

-
bc <- example1$copy()
-
-bc$scoring_f <- mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group")
+
scoring_f <- mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group")
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  example1,
+  scoring = scoring_f,
   max_iter = 1000, # this is set to shorten vignette run-time based on random seed, normally we don't know.
   n_shuffle = 2,
   acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
   quiet = TRUE
 )
-
trace$elapsed
-#> Time difference of 11.43023 secs
+
bc$trace$elapsed
+#> Time difference of 6.182886 secs
 
 plot_plate(bc,
   plate = plate, row = row, column = col, .color = Group,
   title = "Ex1: Using a dedicated plate scoring function:\nThis should show a latin square!"
 )
-

+

diff --git a/vignettes/cached/_plate_scoring_ex2.Rmd b/vignettes/cached/_plate_scoring_ex2.Rmd index cc47b1e0..b3cba4bc 100644 --- a/vignettes/cached/_plate_scoring_ex2.Rmd +++ b/vignettes/cached/_plate_scoring_ex2.Rmd @@ -1,7 +1,7 @@ --- title: "Plate scoring example 2" output: html_fragment -knit: (\(input, ...) rmarkdown::render(input, output_dir = "vignettes/cached")) +knit: (\(input, ...) rmarkdown::render(input, output_dir = here::here("vignettes/cached"))) --- ```{r, include = FALSE} @@ -40,28 +40,28 @@ example2 <- BatchContainer$new( ) # Add samples to container -assign_in_order(example2, samples = tibble::tibble( +example2 <- assign_in_order(example2, samples = tibble::tibble( Group = c(rep(c("Grp 1", "Grp 2", "Grp 3", "Grp 4"), each = 8)), ID = 1:32 )) -bc <- example2$copy() - -bc$scoring_f <- c(mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group"), +scoring_f <- c(mk_plate_scoring_functions(example2, plate = "plate", row = "row", column = "col", group = "Group"), osat_plate = osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group")) ) -plot_plate(bc, +plot_plate(example2, plate = plate, row = row, column = col, .color = Group, title = "Ex2: Initial sample arrangement" ) -bc$score() +example2$score(scoring_f) ``` ```{r} set.seed(41) -trace <- optimize_design(bc, +bc <- optimize_design( + example2, + scoring = scoring_f, n_shuffle = c(rep(10, 10), rep(3, 90), rep(2, 100), rep(1, 1400)), acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)), aggregate_scores_func = worst_score, @@ -70,9 +70,9 @@ trace <- optimize_design(bc, ``` ```{r} -trace$elapsed +bc$trace$elapsed -bc$score() +bc$score(scoring_f) plot_plate(bc, plate = plate, row = row, column = col, .color = Group, @@ -92,33 +92,34 @@ permutation function which takes the plate structure into account and only shuffles samples around within one plate. ```{r} -# Setting up the batch container - -bc <- example2$copy() - -bc$scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group")) +scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group")) set.seed(42) -optimize_design(bc, +bc <- optimize_design( + example2, + scoring = scoring_f, quiet = TRUE, max_iter = 200, # this is set to shorten vignette run-time, normally we don't know. n_shuffle = 2, acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)), ) +bc$trace$elapsed plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Ex2: 'Plate wise' design\nStep 1: after allocating samples to plates" ) -bc$scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group") +scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group") -bc$score() +bc$score(scoring_f) ``` ```{r} set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + bc, + scoring = scoring_f, max_iter = 400, shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate")), acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)), @@ -128,9 +129,9 @@ trace <- optimize_design(bc, ``` ```{r} -trace +bc$trace$elapsed -bc$score() +bc$score(scoring_f) plot_plate(bc, plate = plate, row = row, column = col, .color = Group, @@ -153,16 +154,14 @@ happen first within plate 1, then within plate 2, so that the two scores can be optimized in succeeding runs. ```{r} -# Setting up the batch container - -bc <- example2$copy() - -bc$scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group")) +scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group")) ``` ```{r} set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + example2, + scoring = scoring_f, quiet = TRUE, max_iter = 150, # this is set to shorten vignette run-time, normally we don't know. n_shuffle = 2, @@ -171,21 +170,23 @@ trace <- optimize_design(bc, ``` ```{r} -trace +bc$trace$elapsed plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Ex2: 'Serial plate' design\nStep 1: after allocating samples to plates" ) -bc$scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group") +scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group") -bc$score() +bc$score(scoring_f) ``` ```{r} set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + bc, + scoring = scoring_f, max_iter = 150, quiet = TRUE, shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate"), restrain_on_subgroup_levels = c(1)), @@ -195,14 +196,16 @@ trace <- optimize_design(bc, ``` ```{r} -trace +bc$trace$elapsed -bc$score() +bc$score(scoring_f) ``` ```{r} set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + bc, + scoring = scoring_f, max_iter = 550, quiet = TRUE, shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate"), restrain_on_subgroup_levels = c(2)), @@ -212,9 +215,9 @@ trace <- optimize_design(bc, ``` ```{r} -trace +bc$trace$elapsed -bc$score() +bc$score(scoring_f) plot_plate(bc, plate = plate, row = row, column = col, .color = Group, diff --git a/vignettes/cached/_plate_scoring_ex2.html b/vignettes/cached/_plate_scoring_ex2.html index a5299f2d..efcb9fbf 100644 --- a/vignettes/cached/_plate_scoring_ex2.html +++ b/vignettes/cached/_plate_scoring_ex2.html @@ -4,6 +4,14 @@
library(designit)
 library(ggplot2)
 library(dplyr)
+#> 
+#> Attaching package: 'dplyr'
+#> The following objects are masked from 'package:stats':
+#> 
+#>     filter, lag
+#> The following objects are masked from 'package:base':
+#> 
+#>     intersect, setdiff, setequal, union
 library(tidyr)

Example 2: Scoring two plates at once

@@ -26,37 +34,37 @@

Example 2: Scoring two plates at once

) # Add samples to container -assign_in_order(example2, samples = tibble::tibble( +example2 <- assign_in_order(example2, samples = tibble::tibble( Group = c(rep(c("Grp 1", "Grp 2", "Grp 3", "Grp 4"), each = 8)), ID = 1:32 )) -bc <- example2$copy() - -bc$scoring_f <- c(mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group"), +scoring_f <- c(mk_plate_scoring_functions(example2, plate = "plate", row = "row", column = "col", group = "Group"), osat_plate = osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group")) ) -plot_plate(bc, +plot_plate(example2, plate = plate, row = row, column = col, .color = Group, title = "Ex2: Initial sample arrangement" )
-

+


-bc$score()
+example2$score(scoring_f)
 #>    Plate 1    Plate 2 osat_plate 
 #>   23.89265   23.89265  128.00000
set.seed(41)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  example2,
+  scoring = scoring_f,
   n_shuffle = c(rep(10, 10), rep(3, 90), rep(2, 100), rep(1, 1400)),
   acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)),
   aggregate_scores_func = worst_score,
   quiet = TRUE
 )
-
trace$elapsed
-#> Time difference of 25.73155 secs
+
bc$trace$elapsed
+#> Time difference of 16.73602 secs
 
-bc$score()
+bc$score(scoring_f)
 #>    Plate 1    Plate 2 osat_plate 
 #>   6.127258   6.094080   0.000000
 
@@ -64,7 +72,7 @@ 

Example 2: Scoring two plates at once

plate = plate, row = row, column = col, .color = Group, title = "Ex2: Design created by swapping samples 'globally' across the plates" )
-

+

While this ‘global’ optimization is possible, it does probably not converge to an (almost) ideal solution in an acceptable time if there are more samples involved. This is due to a lot of unproductive sample @@ -75,48 +83,46 @@

Example 2: Scoring two plates at once

the use of a dedicated sample permutation function which takes the plate structure into account and only shuffles samples around within one plate.

-
# Setting up the batch container
-
-bc <- example2$copy()
-
-bc$scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
+
scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
 
 set.seed(42)
-optimize_design(bc,
+bc <- optimize_design(
+  example2,
+  scoring = scoring_f,
   quiet = TRUE,
   max_iter = 200, # this is set to shorten vignette run-time, normally we don't know.
   n_shuffle = 2,
   acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)),
 )
-#> Optimization trace (201 score values, elapsed 4.254329 secs).
-#>   Starting score: 128
-#>   Final score   : 0
+bc$trace$elapsed
+#> Time difference of 2.140794 secs
 
 plot_plate(bc,
   plate = plate, row = row, column = col, .color = Group,
   title = "Ex2: 'Plate wise' design\nStep 1: after allocating samples to plates"
 )
-

+


-bc$scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")
+scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")
 
-bc$score()
+bc$score(scoring_f)
 #>  Plate 1  Plate 2 
 #> 12.77527 13.63704
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  bc,
+  scoring = scoring_f,
   max_iter = 400,
   shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate")),
   acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
   aggregate_scores_func = L2s_norm,
   quiet = TRUE
 )
-
trace
-#> Optimization trace (401 score values, elapsed 3.579697 secs).
-#>   Starting score: 12.775,13.637
-#>   Final score   : 6.855,6.309
+
bc$trace$elapsed
+#> Time differences in secs
+#> [1] 2.140794 3.270934
 
-bc$score()
+bc$score(scoring_f)
 #>  Plate 1  Plate 2 
 #> 6.854748 6.309297
 
@@ -124,7 +130,7 @@ 

Example 2: Scoring two plates at once

plate = plate, row = row, column = col, .color = Group, title = "Ex2: 'Plate wise' design\nStep 2: after arranging samples within plates" )
-

+

In this case, the shuffling function exchanges 1 pair of sample assignments every time (the default). However, any number of constant swaps or a swapping protocol (formally a vector of integers) can be @@ -137,64 +143,62 @@

Example 2: Scoring two plates at once

generates the permutations. It enforces permutation only to happen first within plate 1, then within plate 2, so that the two scores can be optimized in succeeding runs.

-
# Setting up the batch container
-
-bc <- example2$copy()
-
-bc$scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
+
scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  example2,
+  scoring = scoring_f,
   quiet = TRUE,
   max_iter = 150, # this is set to shorten vignette run-time, normally we don't know.
   n_shuffle = 2,
   acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)),
 )
-
trace
-#> Optimization trace (151 score values, elapsed 4.056035 secs).
-#>   Starting score: 128
-#>   Final score   : 0
+
bc$trace$elapsed
+#> Time difference of 1.791624 secs
 
 plot_plate(bc,
   plate = plate, row = row, column = col, .color = Group,
   title = "Ex2: 'Serial plate' design\nStep 1: after allocating samples to plates"
 )
-

+


-bc$scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")
+scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group")
 
-bc$score()
+bc$score(scoring_f)
 #>  Plate 1  Plate 2 
 #> 10.57482 26.16613
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  bc,
+  scoring = scoring_f,
   max_iter = 150,
   quiet = TRUE,
   shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate"), restrain_on_subgroup_levels = c(1)),
   acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
   aggregate_scores_func = L2s_norm
 )
-
trace
-#> Optimization trace (151 score values, elapsed 2.528134 secs).
-#>   Starting score: 10.575,26.166
-#>   Final score   : 6.416,26.166
+
bc$trace$elapsed
+#> Time differences in secs
+#> [1] 1.791624 1.591002
 
-bc$score()
+bc$score(scoring_f)
 #>   Plate 1   Plate 2 
 #>  6.416193 26.166134
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  bc,
+  scoring = scoring_f,
   max_iter = 550,
   quiet = TRUE,
   shuffle_proposal_func = mk_subgroup_shuffling_function(subgroup_vars = c("plate"), restrain_on_subgroup_levels = c(2)),
   acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
   aggregate_scores_func = L2s_norm
 )
-
trace
-#> Optimization trace (551 score values, elapsed 2.93787 secs).
-#>   Starting score: 6.416,26.166
-#>   Final score   : 6.416,6.582
+
bc$trace$elapsed
+#> Time differences in secs
+#> [1] 1.791624 1.591002 4.226352
 
-bc$score()
+bc$score(scoring_f)
 #>  Plate 1  Plate 2 
 #> 6.416193 6.581966
 
@@ -202,5 +206,5 @@ 

Example 2: Scoring two plates at once

plate = plate, row = row, column = col, .color = Group, title = "Ex2: 'Serial plate' design\nStep 2: after optimizing each plate in turn" )
-

+

diff --git a/vignettes/cached/_plate_scoring_ex3.Rmd b/vignettes/cached/_plate_scoring_ex3.Rmd index 8f5fdca6..872e9d9d 100644 --- a/vignettes/cached/_plate_scoring_ex3.Rmd +++ b/vignettes/cached/_plate_scoring_ex3.Rmd @@ -1,7 +1,7 @@ --- title: "Plate scoring example 3" output: html_fragment -knit: (\(input, ...) rmarkdown::render(input, output_dir = "vignettes/cached")) +knit: (\(input, ...) rmarkdown::render(input, output_dir = here::here("vignettes/cached"))) --- ```{r, include = FALSE} @@ -51,7 +51,7 @@ example3 <- BatchContainer$new( # Assign samples randomly to start from a better initial state -assign_random(example3, +example3 <- assign_random(example3, samples = tibble::tibble( Group = rep.int(c("Grp 1", "Grp 2", "Grp3"), times = c(69, 30, 69) @@ -60,14 +60,14 @@ assign_random(example3, ) ) -bc <- example3$copy() - -bc$scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group")) +scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group")) ``` ```{r} set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + example3, + scoring = scoring_f, quiet = TRUE, max_iter = 150, n_shuffle = 2, @@ -76,7 +76,7 @@ trace <- optimize_design(bc, ``` ```{r} -trace +bc$trace$elapsed ``` ```{r, fig.width=7, fig.height=3.5} @@ -87,17 +87,19 @@ plot_plate(bc, ``` ```{r} -bc$scoring_f <- mk_plate_scoring_functions(bc, +scoring_f <- mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group" ) -bc$score() +bc$score(scoring_f) ``` ```{r} set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + bc, + scoring = scoring_f, max_iter = 300, shuffle_proposal_func = mk_subgroup_shuffling_function( subgroup_vars = c("plate"), @@ -110,9 +112,9 @@ trace <- optimize_design(bc, ``` ```{r} -trace$elapsed +bc$trace$elapsed -bc$score() +bc$score(scoring_f) ``` ```{r, fig.width=7, fig.height=3.5} diff --git a/vignettes/cached/_plate_scoring_ex3.html b/vignettes/cached/_plate_scoring_ex3.html index 35a11141..40ed3f06 100644 --- a/vignettes/cached/_plate_scoring_ex3.html +++ b/vignettes/cached/_plate_scoring_ex3.html @@ -4,6 +4,14 @@
library(designit)
 library(ggplot2)
 library(dplyr)
+#> 
+#> Attaching package: 'dplyr'
+#> The following objects are masked from 'package:stats':
+#> 
+#>     filter, lag
+#> The following objects are masked from 'package:base':
+#> 
+#>     intersect, setdiff, setequal, union
 library(tidyr)

Example 3: 3 plates with different dimension and different sample @@ -39,7 +47,7 @@

Example 3: 3 plates with different dimension and different sample # Assign samples randomly to start from a better initial state -assign_random(example3, +example3 <- assign_random(example3, samples = tibble::tibble( Group = rep.int(c("Grp 1", "Grp 2", "Grp3"), times = c(69, 30, 69) @@ -48,35 +56,35 @@

Example 3: 3 plates with different dimension and different sample ) ) -bc <- example3$copy() - -bc$scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))

+scoring_f <- osat_score_generator(batch_vars = c("plate"), feature_vars = c("Group"))
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  example3,
+  scoring = scoring_f,
   quiet = TRUE,
   max_iter = 150,
   n_shuffle = 2,
   acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 1000, alpha = 0.5)),
 )
-
trace
-#> Optimization trace (151 score values, elapsed 3.348524 secs).
-#>   Starting score: 17.714
-#>   Final score   : 1.429
+
bc$trace$elapsed
+#> Time difference of 2.03003 secs
plot_plate(bc,
   plate = plate, row = row, column = col, .color = Group,
   title = "Ex3: Dealing with plates of different size\nStep 1: after distributing groups across plates"
 )
-

-
bc$scoring_f <- mk_plate_scoring_functions(bc,
+

+
scoring_f <- mk_plate_scoring_functions(bc,
   plate = "plate", row = "row",
   column = "col", group = "Group"
 )
 
-bc$score()
+bc$score(scoring_f)
 #>   Plate 1   Plate 2   Plate 3 
-#>  9.706637  9.585770 10.419567
+#> 9.387071 10.302690 9.826243
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  bc,
+  scoring = scoring_f,
   max_iter = 300,
   shuffle_proposal_func = mk_subgroup_shuffling_function(
     subgroup_vars = c("plate"),
@@ -86,15 +94,16 @@ 

Example 3: 3 plates with different dimension and different sample aggregate_scores_func = L2s_norm, quiet = TRUE )

-
trace$elapsed
-#> Time difference of 40.35472 secs
+
bc$trace$elapsed
+#> Time differences in secs
+#> [1] 2.030030 4.778121
 
-bc$score()
+bc$score(scoring_f)
 #>  Plate 1  Plate 2  Plate 3 
-#> 8.974408 8.253074 7.980756
+#> 8.809205 8.553802 8.185525
plot_plate(bc,
   plate = plate, row = row, column = col, .color = Group,
   title = "Ex3: Dealing with plates of different size\nStep 2: after swapping samples within plates"
 )
-

+

diff --git a/vignettes/cached/_plate_scoring_ex4.Rmd b/vignettes/cached/_plate_scoring_ex4.Rmd index c12c29bc..6ce2e1c5 100644 --- a/vignettes/cached/_plate_scoring_ex4.Rmd +++ b/vignettes/cached/_plate_scoring_ex4.Rmd @@ -1,7 +1,7 @@ --- title: "Plate scoring example 4" output: html_fragment -knit: (\(input, ...) rmarkdown::render(input, output_dir = "vignettes/cached")) +knit: (\(input, ...) rmarkdown::render(input, output_dir = here::here("vignettes/cached"))) --- ```{r, include = FALSE} @@ -38,31 +38,31 @@ example4 <- BatchContainer$new( # Assign samples randomly to start from lower score (avoid Inf values even since plate 3 will miss 2 groups initially :) -assign_in_order(example4, samples = tibble::tibble( +example4 <- assign_in_order(example4, samples = tibble::tibble( Group = rep.int(c("Treatment 1", "Treatment 2"), times = c(10, 10)), Sex = c(rep(c("M", "F", "F", "M"), times = 4), "M", NA, NA, "F"), ID = 1:20 )) -bc <- example4$copy() - cowplot::plot_grid( - plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Initial layout by Group"), - plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Initial layout by Sex"), + plot_plate(example4, plate = plate, row = row, column = col, .color = Group, title = "Initial layout by Group"), + plot_plate(example4, plate = plate, row = row, column = col, .color = Sex, title = "Initial layout by Sex"), ncol = 2 ) -bc$scoring_f <- c( - Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"), - Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex") +scoring_f <- c( + Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"), + Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex") ) -bc$score() +example4$score(scoring_f) ``` ```{r} set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + example4, + scoring = scoring_f, max_iter = 750, n_shuffle = 1, acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 1)), @@ -74,9 +74,9 @@ trace <- optimize_design(bc, ``` ```{r, fig.width=7, fig.height=3.5} -trace$elapsed +bc$trace$elapsed -bc$score() +bc$score(scoring_f) cowplot::plot_grid( plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Final layout by Group"), @@ -90,7 +90,9 @@ reference! ```{r} set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + bc, + scoring = scoring_f, max_iter = 500, n_shuffle = 1, acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 1)), @@ -103,7 +105,7 @@ trace <- optimize_design(bc, ``` ```{r, fig.width=7, fig.height=3.5} -bc$score() +bc$score(scoring_f) cowplot::plot_grid( plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Final layout by Group"), @@ -118,17 +120,17 @@ default acceptance function. We are strictly prioritizing the leftmost score in addition to reflect relevance for the design. ```{r} -bc <- example4$copy() - -bc$scoring_f <- c( - Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"), - Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex") +scoring_f <- c( + Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"), + Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex") ) -bc$score() +example4$score(scoring_f) set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + example4, + scoring = scoring_f, max_iter = 5000, n_shuffle = 1, acceptance_func = accept_leftmost_improvement, @@ -138,7 +140,7 @@ trace <- optimize_design(bc, ``` ```{r, fig.width=7, fig.height=3.5} -bc$score() +bc$score(scoring_f) cowplot::plot_grid( plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Final layout by Group"), @@ -151,17 +153,16 @@ Using a tolerance value to accept slightly worse solutions in the leftmost relevant score if overcompensated by other scores: ```{r} -bc <- example4$copy() - - -bc$scoring_f <- c( - Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"), - Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex") +scoring_f <- c( + Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"), + Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex") ) set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + example4, + scoring = scoring_f, max_iter = 5000, n_shuffle = 1, acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.1), @@ -169,12 +170,10 @@ trace <- optimize_design(bc, quiet = TRUE ) -bc$score() +bc$score(scoring_f) ``` ```{r, fig.width=7, fig.height=3.5} -bc$score() - cowplot::plot_grid( plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Final layout by Group"), plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Final layout by Sex"), @@ -188,17 +187,17 @@ $\kappa^p$, $0 < \kappa < 1$ We choose a $\kappa$ of 0.5, i.e. the second score's improvement counts half of that of the first one. ```{r} -bc <- example4$copy() - -bc$scoring_f <- c( - Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"), - Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex") +scoring_f <- c( + Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"), + Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex") ) -bc$score() +bc$score(scoring_f) set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + example4, + scoring = scoring_f, max_iter = 1000, n_shuffle = 1, acceptance_func = mk_exponentially_weighted_acceptance_func(kappa = 0.5, simulated_annealing = T), @@ -208,7 +207,7 @@ trace <- optimize_design(bc, ``` ```{r, fig.width=7, fig.height=3.5} -bc$score() +bc$score(scoring_f) cowplot::plot_grid( plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Final layout by Group"), diff --git a/vignettes/cached/_plate_scoring_ex4.html b/vignettes/cached/_plate_scoring_ex4.html index 61806644..d3c1472d 100644 --- a/vignettes/cached/_plate_scoring_ex4.html +++ b/vignettes/cached/_plate_scoring_ex4.html @@ -4,6 +4,14 @@
library(designit)
 library(ggplot2)
 library(dplyr)
+#> 
+#> Attaching package: 'dplyr'
+#> The following objects are masked from 'package:stats':
+#> 
+#>     filter, lag
+#> The following objects are masked from 'package:base':
+#> 
+#>     intersect, setdiff, setequal, union
 library(tidyr)

Example 4: More than one group factor to balance and empty plate @@ -26,31 +34,31 @@

Example 4: More than one group factor to balance and empty plate # Assign samples randomly to start from lower score (avoid Inf values even since plate 3 will miss 2 groups initially :) -assign_in_order(example4, samples = tibble::tibble( +example4 <- assign_in_order(example4, samples = tibble::tibble( Group = rep.int(c("Treatment 1", "Treatment 2"), times = c(10, 10)), Sex = c(rep(c("M", "F", "F", "M"), times = 4), "M", NA, NA, "F"), ID = 1:20 )) -bc <- example4$copy() - cowplot::plot_grid( - plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Initial layout by Group"), - plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Initial layout by Sex"), + plot_plate(example4, plate = plate, row = row, column = col, .color = Group, title = "Initial layout by Group"), + plot_plate(example4, plate = plate, row = row, column = col, .color = Sex, title = "Initial layout by Sex"), ncol = 2 )

-

+


 
-bc$scoring_f <- c(
-  Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"),
-  Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex")
+scoring_f <- c(
+  Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"),
+  Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex")
 )
 
-bc$score()
+example4$score(scoring_f)
 #> Group.Plate   Sex.Plate 
 #>    83.63858   239.20748
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  example4,
+  scoring = scoring_f,
   max_iter = 750,
   n_shuffle = 1,
   acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 1)),
@@ -59,10 +67,10 @@ 

Example 4: More than one group factor to balance and empty plate }, quiet = TRUE )

-
trace$elapsed
-#> Time difference of 7.062086 secs
+
bc$trace$elapsed
+#> Time difference of 6.251454 secs
 
-bc$score()
+bc$score(scoring_f)
 #> Group.Plate   Sex.Plate 
 #>    8.019656    7.608810
 
@@ -71,11 +79,13 @@ 

Example 4: More than one group factor to balance and empty plate plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Final layout by Sex"), ncol = 2 )

-

+

We do the same example with auto-scaling, weighted scoring and SA to have a reference!

set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  bc,
+  scoring = scoring_f,
   max_iter = 500,
   n_shuffle = 1,
   acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 1)),
@@ -86,7 +96,7 @@ 

Example 4: More than one group factor to balance and empty plate quiet = TRUE ) #> ... Performing simple mean/stddev adjustment.

-
bc$score()
+
bc$score(scoring_f)
 #> Group.Plate   Sex.Plate 
 #>    8.080860    7.458345
 
@@ -95,25 +105,25 @@ 

Example 4: More than one group factor to balance and empty plate plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Final layout by Sex"), ncol = 2 )

-

+

We do the same example with auto-scaling and position-dependent scoring now, not aggregating the score vector! This is more effective even when using the default acceptance function. We are strictly prioritizing the leftmost score in addition to reflect relevance for the design.

-
bc <- example4$copy()
-
-bc$scoring_f <- c(
-  Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"),
-  Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex")
+
scoring_f <- c(
+  Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"),
+  Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex")
 )
 
-bc$score()
+example4$score(scoring_f)
 #> Group.Plate   Sex.Plate 
 #>    83.63858   239.20748
 
 set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  example4,
+  scoring = scoring_f,
   max_iter = 5000,
   n_shuffle = 1,
   acceptance_func = accept_leftmost_improvement,
@@ -121,7 +131,7 @@ 

Example 4: More than one group factor to balance and empty plate quiet = TRUE ) #> ... Performing simple mean/stddev adjustment.

-
bc$score()
+
bc$score(scoring_f)
 #> Group.Plate   Sex.Plate 
 #>    7.619846    7.473524
 
@@ -130,20 +140,19 @@ 

Example 4: More than one group factor to balance and empty plate plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Final layout by Sex"), ncol = 2 )

-

+

Using a tolerance value to accept slightly worse solutions in the leftmost relevant score if overcompensated by other scores:

-
bc <- example4$copy()
-
-
-bc$scoring_f <- c(
-  Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"),
-  Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex")
+
scoring_f <- c(
+  Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"),
+  Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex")
 )
 
 
 set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  example4,
+  scoring = scoring_f,
   max_iter = 5000,
   n_shuffle = 1,
   acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.1),
@@ -152,37 +161,33 @@ 

Example 4: More than one group factor to balance and empty plate ) #> ... Performing simple mean/stddev adjustment. -bc$score() +bc$score(scoring_f) #> Group.Plate Sex.Plate #> 7.366667 7.323324

-
bc$score()
-#> Group.Plate   Sex.Plate 
-#>    7.366667    7.323324
-
-cowplot::plot_grid(
+
cowplot::plot_grid(
   plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = "Final layout by Group"),
   plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Final layout by Sex"),
   ncol = 2
 )
-

+

Testing an alternative left-to-right weighing of scores, based on exponential down-weighing of the respective score differences at position \(p\) with factor \(\kappa^p\), \(0 < \kappa < 1\) We choose a \(\kappa\) of 0.5, i.e. the second score’s improvement counts half of that of the first one.

-
bc <- example4$copy()
-
-bc$scoring_f <- c(
-  Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group"),
-  Sex = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Sex")
+
scoring_f <- c(
+  Group = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Group"),
+  Sex = mk_plate_scoring_functions(example4, row = "row", column = "col", group = "Sex")
 )
 
-bc$score()
+bc$score(scoring_f)
 #> Group.Plate   Sex.Plate 
-#>    83.63858   239.20748
+#>    7.366667    7.323324
 
 set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  example4,
+  scoring = scoring_f,
   max_iter = 1000,
   n_shuffle = 1,
   acceptance_func = mk_exponentially_weighted_acceptance_func(kappa = 0.5, simulated_annealing = T),
@@ -190,7 +195,7 @@ 

Example 4: More than one group factor to balance and empty plate quiet = TRUE ) #> ... Performing simple mean/stddev adjustment.

-
bc$score()
+
bc$score(scoring_f)
 #> Group.Plate   Sex.Plate 
 #>    7.630367    7.616179
 
@@ -199,5 +204,5 @@ 

Example 4: More than one group factor to balance and empty plate plot_plate(bc, plate = plate, row = row, column = col, .color = Sex, title = "Final layout by Sex"), ncol = 2 )

-

+

diff --git a/vignettes/cached/_plate_scoring_ex5.Rmd b/vignettes/cached/_plate_scoring_ex5.Rmd index be4af9be..064d0571 100644 --- a/vignettes/cached/_plate_scoring_ex5.Rmd +++ b/vignettes/cached/_plate_scoring_ex5.Rmd @@ -1,7 +1,7 @@ --- title: "Plate scoring example 5" output: html_fragment -knit: (\(input, ...) rmarkdown::render(input, output_dir = "vignettes/cached")) +knit: (\(input, ...) rmarkdown::render(input, output_dir = here::here("vignettes/cached"))) --- ```{r, include = FALSE} @@ -33,25 +33,25 @@ example5 <- BatchContainer$new( ) # Assign samples randomly to start from lower score (avoid `Inf` values when doing the 'hard' penalization) -assign_random(example5, samples = tibble::tibble( +example5 <- assign_random(example5, samples = tibble::tibble( Group = rep.int(paste("Group", 1:5), times = c(8, 8, 8, 8, 64)), ID = 1:96 )) penalize_lines <- "hard" -bc <- example5$copy() - -bc$scoring_f <- c( - Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group", p = 2, penalize_lines = penalize_lines) +scoring_f <- c( + Group = mk_plate_scoring_functions(example5, row = "row", column = "col", group = "Group", p = 2, penalize_lines = penalize_lines) ) -bc$score() +example5$score(scoring_f) ``` ```{r} set.seed(42) -trace <- optimize_design(bc, +bc <- optimize_design( + example5, + scoring = scoring_f, max_iter = 5000, n_shuffle = 1, acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 500, alpha = 0.1)), @@ -60,9 +60,9 @@ trace <- optimize_design(bc, ``` ```{r, fig.width=7, fig.height=3.5} -trace$elapsed +bc$trace$elapsed -bc$score() +bc$score(scoring_f) plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = stringr::str_c("Line penalization: ", penalize_lines)) ``` diff --git a/vignettes/cached/_plate_scoring_ex5.html b/vignettes/cached/_plate_scoring_ex5.html index 35874c9f..368fc058 100644 --- a/vignettes/cached/_plate_scoring_ex5.html +++ b/vignettes/cached/_plate_scoring_ex5.html @@ -4,6 +4,14 @@
library(designit)
 library(ggplot2)
 library(dplyr)
+#> 
+#> Attaching package: 'dplyr'
+#> The following objects are masked from 'package:stats':
+#> 
+#>     filter, lag
+#> The following objects are masked from 'package:base':
+#> 
+#>     intersect, setdiff, setequal, union
 library(tidyr)

Example 5: Avoiding ‘regular patterns’ in plate layout

@@ -21,36 +29,36 @@

Example 5: Avoiding ‘regular patterns’ in plate layout

) # Assign samples randomly to start from lower score (avoid `Inf` values when doing the 'hard' penalization) -assign_random(example5, samples = tibble::tibble( +example5 <- assign_random(example5, samples = tibble::tibble( Group = rep.int(paste("Group", 1:5), times = c(8, 8, 8, 8, 64)), ID = 1:96 )) penalize_lines <- "hard" -bc <- example5$copy() - -bc$scoring_f <- c( - Group = mk_plate_scoring_functions(bc, row = "row", column = "col", group = "Group", p = 2, penalize_lines = penalize_lines) +scoring_f <- c( + Group = mk_plate_scoring_functions(example5, row = "row", column = "col", group = "Group", p = 2, penalize_lines = penalize_lines) ) -bc$score() +example5$score(scoring_f) #> Group.Plate -#> 9.960584
+#> 11.08608
set.seed(42)
-trace <- optimize_design(bc,
+bc <- optimize_design(
+  example5,
+  scoring = scoring_f,
   max_iter = 5000,
   n_shuffle = 1,
   acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 500, alpha = 0.1)),
   quiet = T
 )
-
trace$elapsed
-#> Time difference of 28.7293 secs
+
bc$trace$elapsed
+#> Time difference of 29.47413 secs
 
-bc$score()
+bc$score(scoring_f)
 #> Group.Plate 
-#>    8.819968
+#>    8.785693
 
 plot_plate(bc, plate = plate, row = row, column = col, .color = Group, title = stringr::str_c("Line penalization: ", penalize_lines))
-

+

diff --git a/vignettes/custom_shuffle.Rmd b/vignettes/custom_shuffle.Rmd index 6d835657..1dec234f 100644 --- a/vignettes/custom_shuffle.Rmd +++ b/vignettes/custom_shuffle.Rmd @@ -55,8 +55,8 @@ We start by assigning samples randomly. set.seed(42) bc <- BatchContainer$new( dimensions = c("cage" = 11, "position" = 5) -) -assign_random(bc, samples) +) %>% + assign_random(samples) bc ``` @@ -96,15 +96,14 @@ and `sex` interactions are considered in the scoring function. We only use 10 it shuffling is limited to locations with males and enforces change of cage on every iteration. ```{r} -bc$scoring_f <- osat_score_generator( - "cage", - "sex" -) - set.seed(10) -res <- optimize_design( +bc <- optimize_design( bc, + scoring = osat_score_generator( + "cage", + "sex" + ), shuffle_proposal_func = shuffle_with_constraints( sex == "M", cage != .src$cage @@ -112,7 +111,7 @@ res <- optimize_design( max_iter = 10 ) -plot(res) +bc$plot_trace() ``` We expect the distribution of males become even, while other variables are not significantly affected. @@ -129,7 +128,7 @@ locations. We also ensure that on every iteration the cage number is changed; we `position` dimension does affect actual animal allocation. ```{r} -bc$scoring_f <- function(bc) { +scoring_f <- function(bc) { samples <- bc$get_samples(include_id = TRUE, as_tibble = FALSE) avg_w <- samples[, mean(weight, na.rm = TRUE)] avg_w_per_cage <- samples[!is.na(weight), mean(weight), by = cage]$V1 @@ -141,7 +140,8 @@ bc$scoring_f <- function(bc) { } set.seed(12) -res <- optimize_design(bc, +bc <- optimize_design(bc, + scoring = scoring_f, shuffle_proposal = shuffle_with_constraints( sex == "F", cage != .src$cage & (is.na(sex) | sex != "M") @@ -149,8 +149,8 @@ res <- optimize_design(bc, n_shuffle = c(rep(10, 20), rep(5, 20), rep(3, 20), rep(1, 140)), max_iter = 200 ) -plot(res) -bc$score() +bc$plot_trace() +scoring_f(bc) ``` Now we have a much more even distribution of weights and treatment/control balance. diff --git a/vignettes/invivo_study_design.Rmd b/vignettes/invivo_study_design.Rmd index d09375ef..fec8ac1f 100644 --- a/vignettes/invivo_study_design.Rmd +++ b/vignettes/invivo_study_design.Rmd @@ -215,7 +215,7 @@ InVivo_assignTreatments <- function(animal_list, treatments, exclude = exclude_table ) - assign_in_order(bc_treatment, ani_bclevels) + bc_treatment <- assign_in_order(bc_treatment, ani_bclevels) if (!quiet_process) message("Constructing scoring functions:") @@ -275,20 +275,20 @@ InVivo_assignTreatments <- function(animal_list, treatments, } assertthat::assert_that(length(scoring_functions) > 0, msg = "No variables for scoring found or all have only one level. Nothing to do.") - bc_treatment$scoring_f <- scoring_functions - bc_treatment$score() + bc_treatment$score(scoring_functions) - trace <- optimize_design( + bc_treatment <- optimize_design( bc_treatment, + scoring = scoring_functions, n_shuffle = n_shuffle, acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.1), quiet = quiet_optimize ) # Check if user given constraints (if provided) could be satisfied - if ("trt_constraints" %in% names(bc_treatment$score())) { - if (bc_treatment$score()[["trt_constraints"]] > 0) { - message("CAUTION: User defined constraints could not be fully met (remaining score ", bc_treatment$score()[["trt_constraints"]], ")") + if ("trt_constraints" %in% names(bc_treatment$score(scoring_functions))) { + if (bc_treatment$score(scoring_functions)[["trt_constraints"]] > 0) { + message("CAUTION: User defined constraints could not be fully met (remaining score ", bc_treatment$score(scoring_functions)[["trt_constraints"]], ")") } else { if (!quiet_process) message("Success. User provided constraints could be fully met.") } @@ -325,7 +325,7 @@ Invivo_assignCages <- function(design_trt, bc_cage <- BatchContainer$new( dimensions = c("Dummy" = 1, ID = nrow(design_trt)) ) - assign_in_order(bc_cage, design_trt) + bc_cage <- assign_in_order(bc_cage, design_trt) shuffle_proposal <- shuffle_grouped_data(bc_cage, allocate_var = "Dummy", @@ -372,15 +372,14 @@ Invivo_assignCages <- function(design_trt, scoring_functions <- c(scoring_functions, sf) } - if (length(scoring_functions) > 0) { - bc_cage$scoring_f <- scoring_functions - } else { + if (length(scoring_functions) == 0) { if (!quiet_process) message(" ... just a dummy score as there are no user provided balancing variables") - bc_cage$scoring_f <- osat_score_generator(batch_vars = "Dummy", feature_vars = c("Treatment")) + scoring_functions <- osat_score_generator(batch_vars = "Dummy", feature_vars = c("Treatment")) } - trace <- optimize_design( + bc_cage <- optimize_design( bc_cage, + scoring = scoring_functions, shuffle_proposal_func = shuffle_proposal, acceptance_func = accept_leftmost_improvement, max_iter = maxiter, @@ -436,7 +435,7 @@ Invivo_arrangeCages <- function(design_cage, dimensions = c(Rack = nr_racks, CageRow = n_cage_x, CageCol = n_cage_y) ) - assign_random(bc_rack, design_rack) + bc_rack <- assign_random(bc_rack, design_rack) # Firstly, distribute variables across racks if necessary if (nr_racks > 1) { @@ -447,16 +446,18 @@ Invivo_arrangeCages <- function(design_cage, ) } - bc_rack$scoring_f <- list(across_rack = osat_score_generator(batch_vars = c("Rack"), feature_vars = distribute_cagerack_vars)) + scoring_functions <- list(across_rack = osat_score_generator(batch_vars = c("Rack"), feature_vars = distribute_cagerack_vars)) - optimize_design(bc_rack, + bc_rack <- optimize_design( + bc_rack, + scoring = scoring_functions, quiet = quiet_optimize, min_score = 0, max_iter = 1e3, n_shuffle = 2, acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 10000, alpha = 0.5)) ) - if (!quiet_process) message(" ... final score: ", bc_rack$score()) + if (!quiet_process) message(" ... final score: ", bc_rack$score(scoring_f)) } if (!quiet_process) { @@ -475,13 +476,12 @@ Invivo_arrangeCages <- function(design_cage, } names(scoring_functions) <- stringr::str_c(names(scoring_functions), rep(distribute_cagerack_vars, each = nr_racks), sep = "_") - bc_rack$scoring_f <- scoring_functions - for (i in 1:nr_racks) { if (!quiet_process) message(" ... Rack ", i) - trace <- optimize_design( + bc_rack <- optimize_design( bc_rack, + scoring = scoring_functions, shuffle_proposal_func = mk_subgroup_shuffling_function( subgroup_vars = "Rack", restrain_on_subgroup_levels = c(i), @@ -495,7 +495,7 @@ Invivo_arrangeCages <- function(design_cage, ) } - if (!quiet_process) message(" ... final scores: ", paste(names(bc_rack$score()), round(bc_rack$score(), 2), sep = ": ", collapse = ", ")) + if (!quiet_process) message(" ... final scores: ", paste(names(bc_rack$score(scoring_functions)), round(bc_rack$score(scoring_functions), 2), sep = ": ", collapse = ", ")) # Translate Rack numbers to some text output and assign CageNr design_rack <- bc_rack$get_samples() %>% diff --git a/vignettes/nested_dimensions_examples.Rmd b/vignettes/nested_dimensions_examples.Rmd index ffa9226f..575c3965 100644 --- a/vignettes/nested_dimensions_examples.Rmd +++ b/vignettes/nested_dimensions_examples.Rmd @@ -54,13 +54,8 @@ bc <- BatchContainer$new( ) ) -# Add samples to container -bc$samples <- multi_trt_day_samples # Initial random assignment -assign_in_order(bc) -# Set scoring function -bc$scoring_f <- osat_score_generator(c("batch"), c("Treatment", "Time")) - +bc <- assign_in_order(bc, multi_trt_day_samples) bc ``` @@ -73,10 +68,10 @@ n_iterations <- length(n_shuffle) set.seed(42) # should we have conventions for this? -# initial score -# bc$score() -trace <- optimize_design( +scoring_f <- osat_score_generator(c("batch"), c("Treatment", "Time")) +bc <- optimize_design( bc, + scoring = scoring_f, n_shuffle = n_shuffle, max_iter = n_iterations ) # default is 10000 @@ -89,8 +84,10 @@ I practice you will have to run for a much higher number of iterations. ```{r, fig.width=5, fig.height= 4} qplot( - x = 1:trace$n_steps, y = trace$scores, color = factor(c(32, n_shuffle)), - main = str_glue("Final score={bc$score()}"), geom = "point" + x = bc$trace$scores[[1]]$step, + y = bc$trace$scores[[1]]$score_1, + color = factor(c(32, n_shuffle)), + main = str_glue("Final score={bc$score(scoring_f)}"), geom = "point" ) ``` @@ -105,18 +102,16 @@ bc$get_samples(assignment = TRUE) %>% # Repeat but use shuffle with contraints ```{r, fig.width=6, fig.height= 4} - # copy batch container for second optimization -bc2 <- bc$copy() -# Initial random assignment -assign_in_order(bc2) +bc2 <- assign_in_order(bc) n_iterations <- 200 set.seed(42) # should we have conventions for this? -trace2 <- optimize_design( +bc2 <- optimize_design( bc2, + scoring = scoring_f, shuffle_proposal = shuffle_with_constraints( src = TRUE, # batch needs to change for shuffle to be accepted @@ -126,8 +121,9 @@ trace2 <- optimize_design( ) qplot( - x = 1:trace$n_steps, y = trace2$scores, # color = factor(n_shuffle), - main = str_glue("Final score={bc2$score()}"), geom = "point" + x = bc2$trace$scores[[1]]$step, + y = bc2$trace$scores[[1]]$score_1, + main = str_glue("Final score={bc2$score(scoring_f)}"), geom = "point" ) bc2$get_samples(assignment = TRUE) %>% @@ -147,16 +143,17 @@ For this we keep the optimized `batch` and now only optimize `run` with constrai ```{r} n_iterations <- 100 -# assign new optimization function -bc$scoring_f <- osat_score_generator(c("run"), c("Treatment", "Time")) +# new optimization function +scoring_f <- osat_score_generator(c("run"), c("Treatment", "Time")) # like this the optimization score is wrong because it tries to optimize across Batches. # Possible ways to go: # - we'd need something like c("batch", batch/run") for optimize by batch and run within batch. # - or we add "batch/run" to the constraints somehow. -bc$score() +bc$score(scoring_f) -trace_run <- optimize_design( +bc <- optimize_design( bc, + scoring = scoring_f, shuffle_proposal = shuffle_with_constraints( src = TRUE, # batch remains the same and run needs to change @@ -168,8 +165,10 @@ trace_run <- optimize_design( ```{r, fig.width=6, fig.height= 4} qplot( - x = 1:trace_run$n_steps, y = trace_run$scores, color = factor(n_iterations), - main = str_glue("Final score={bc$score()}"), geom = "point" + x = bc$trace$scores[[1]]$step, + y = bc$trace$scores[[1]]$score_1, + color = factor(n_iterations), + main = str_glue("Final score={bc$score(scoring_f)}"), geom = "point" ) ``` @@ -178,7 +177,6 @@ qplot( This is not giving the expected mix of treatments across runs. ```{r, fig.width=6, fig.height= 4} - bc$get_samples() %>% mutate(run = factor(run)) %>% ggplot(aes(x = run, fill = Treatment, alpha = factor(Time))) + diff --git a/vignettes/optimizer_examples.Rmd b/vignettes/optimizer_examples.Rmd index c707c416..be3bf7de 100644 --- a/vignettes/optimizer_examples.Rmd +++ b/vignettes/optimizer_examples.Rmd @@ -17,6 +17,7 @@ knitr::opts_chunk$set( ```{r setup} library(designit) +library(magrittr) ``` @@ -60,12 +61,9 @@ bc <- BatchContainer$new( run = 2, position = 5 ), exclude = tibble::tibble(batch = 4, run = c(1, 2), position = c(5, 5)) -) - -# Add samples to container -assign_in_order(bc, samples = multi_trt_day_samples) -# Set scoring function -bc$scoring_f <- osat_score_generator(c("batch"), c("Treatment", "Time")) +) %>% + # Add samples to container + assign_in_order(samples = multi_trt_day_samples) bc ``` @@ -85,15 +83,15 @@ Optimization finishes after the list of permutations is exhausted. ```{r} n_shuffle <- rep(c(32, 10, 5, 2, 1), c(20, 40, 40, 50, 50)) +scoring_f <- osat_score_generator(c("batch"), c("Treatment", "Time")) -bc1 <- bc$copy() - -trace1 <- optimize_design( - bc1, +bc1 <- optimize_design( + bc, + scoring = scoring_f, n_shuffle = n_shuffle # will implicitly generate a shuffling function according to the provided schedule ) -trace1$elapsed +bc1$trace$elapsed ``` ## Optimization trace @@ -101,14 +99,27 @@ trace1$elapsed Custom plot with some colours: ```{r, fig.width=5, fig.height= 4} -ggplot2::qplot(x = seq_along(trace1$scores), y = trace1$scores, color = factor(n_shuffle)[1:length(trace1$scores)], geom = "point") + - ggplot2::labs(title = "Score 1 tracing", subtitle = stringr::str_glue("Final score = {bc1$score()}"), x = "Iteration", y = "Score", color = "n_shuffle") +bc1$scores_table() %>% + dplyr::mutate( + n_shuffle = c(NA, n_shuffle) + ) %>% + ggplot2::ggplot( + ggplot2::aes(step, value, color = factor(n_shuffle)) + ) + + ggplot2::geom_point() + + ggplot2::labs( + title = "Score 1 tracing", + subtitle = stringr::str_glue("Final score = {bc1$score(scoring_f)}"), + x = "Iteration", + y = "Score", + color = "n_shuffle" + ) ``` Using the internal method... ```{r, fig.width=5, fig.height= 4} -trace1$plot() +bc1$plot_trace() ``` We may safely apply the batch container methods get_samples() and score() also @@ -116,7 +127,7 @@ after using the new optimization code. ## Final batch layout ```{r, fig.width=6, fig.height=5} -bc1$score() +bc1$score(scoring_f) bc1$get_samples(assignment = TRUE) %>% dplyr::filter(!is.na(Treatment)) %>% @@ -136,8 +147,9 @@ immediately on the same batch container. ```{r} n_shuffle <- rep(c(5, 2, 1), c(30, 30, 30)) -optimize_design( +bc1 <- optimize_design( bc1, + scoring = scoring_f, n_shuffle = n_shuffle ) ``` @@ -154,10 +166,9 @@ reaching a specific minimum delta threshold (score improvement from one selected solution to the next). ```{r} -bc2 <- bc$copy() - -optimize_design( - bc2, +bc2 <- optimize_design( + bc, + scoring = scoring_f, n_shuffle = 3, # will implicitly generate a shuffling function that will do 3 swaps at each iteration max_iter = 2000, min_delta = 0.1 @@ -168,7 +179,7 @@ optimize_design( # Optimization with multi-variate scoring function Instead of passing a single scoring function, a list of multiple scoring -functions can be assigned to a batch container, each of which to return a scalar +functions can be passed to the optimizer, each of which to return a scalar value on evaluation. By default, a strict improvement rule is applied for classifying a potential @@ -182,16 +193,15 @@ The second scoring function used here is by the way rather redundant and just serves for illustration. ```{r} -bc3 <- bc$copy() - -bc3$scoring_f <- list( +multi_scoring_f <- list( osat_score_generator(c("batch"), c("Treatment", "Time")), osat_score_generator(c("batch"), c("Treatment")) ) -trace <- optimize_design( - bc3, +bc3 <- optimize_design( + bc, + scoring = multi_scoring_f, n_shuffle = 3, max_iter = 200, min_delta = 0.1 @@ -216,16 +226,9 @@ We may also want to decrease the delta_min parameter to match the new numerical range. ```{r} -bc3_as <- bc$copy() - -bc3_as$scoring_f <- list( - osat_score_generator(c("batch"), c("Treatment", "Time")), - osat_score_generator(c("batch"), c("Treatment")) -) - - -trace <- optimize_design( - bc3_as, +bc3_as <- optimize_design( + bc, + scoring = multi_scoring_f, n_shuffle = 3, max_iter = 200, min_delta = 0.01, @@ -245,16 +248,9 @@ simply set the aggregated score to whichever of the individual scores is larger ```{r} -bc4 <- bc$copy() - -bc4$scoring_f <- list( - osat_score_generator(c("batch"), c("Treatment", "Time")), - osat_score_generator(c("batch"), c("Treatment")) -) - - -optimize_design( - bc4, +bc4 <- optimize_design( + bc, + scoring = multi_scoring_f, n_shuffle = 3, aggregate_scores_func = worst_score, max_iter = 200, @@ -271,15 +267,9 @@ For illustration, we omit the `n_shuffle` parameter here, which will lead by default to pairwise sample swaps being done on each iteration. ```{r, eval = FALSE} -bc5 <- bc$copy() - -bc5$scoring_f <- list( - osat_score_generator(c("batch"), c("Treatment", "Time")), - osat_score_generator(c("batch"), c("Treatment")) -) - -optimize_design( - bc5, +bc5 <- optimize_design( + bc, + scoring = multi_scoring_f, aggregate_scores_func = sum_scores, max_iter = 200, autoscale_scores = TRUE, @@ -295,15 +285,9 @@ Note that we don't use the auto-scaling in this case as the L2-norm based optimi not the minimal (negative) value that would be desired in that case. ```{r, eval = FALSE} -bc5_2 <- bc$copy() - -bc5_2$scoring_f <- list( - osat_score_generator(c("batch"), c("Treatment", "Time")), - osat_score_generator(c("batch"), c("Treatment")) -) - -optimize_design( - bc5_2, +bc5_2 <- optimize_design( + bc, + scoring = multi_scoring_f, aggregate_scores_func = L2s_norm, max_iter = 200, ) @@ -322,10 +306,9 @@ across all available positions in the batch container. Note that this is usually not a good strategy for converging to a solution. ```{r} -bc6 <- bc$copy() - -optimize_design( - bc6, +bc6 <- optimize_design( + bc, + scoring = scoring_f, shuffle_proposal_func = complete_random_shuffling, max_iter = 200 ) @@ -349,10 +332,9 @@ to be optimized. Choose an appropriate aggregation function if you happen to have multiple scores initially. ```{r} -bc7 <- bc$copy() - -trace7 <- optimize_design( - bc7, +bc7 <- optimize_design( + bc, + scoring = scoring_f, n_shuffle = 1, acceptance_func = mk_simanneal_acceptance_func(), max_iter = 200 @@ -362,23 +344,22 @@ The trace may show a non strictly monotonic behavior now, reflecting the SA protocol at work. ```{r, fig.width=5, fig.height= 4} -trace7$plot() +bc7$plot_trace() ``` Better results and quicker convergence may be achieved by playing with the starting temperature (T0) and cooling speed (alpha) in a specific case. ```{r} -bc8 <- bc$copy() - -trace8 <- optimize_design( - bc8, +bc8 <- optimize_design( + bc, + scoring = scoring_f, n_shuffle = 1, acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 100, alpha = 2)), max_iter = 150 ) -trace8$plot() +bc8$plot_trace() ``` # Full blown example @@ -387,16 +368,15 @@ The following example puts together all possible options to illustrate the flexibility of the optimization. ```{r} -bc$scoring_f <- list( - osat_score_generator(c("batch"), c("Treatment", "Time")), - osat_score_generator(c("batch"), c("Treatment")), - osat_score_generator(c("batch"), c("Time")) -) - n_shuffle <- rep(c(3, 2, 1), c(20, 20, 200)) -trace <- optimize_design( +bc9 <- optimize_design( bc, + scoring = list( + osat_score_generator(c("batch"), c("Treatment", "Time")), + osat_score_generator(c("batch"), c("Treatment")), + osat_score_generator(c("batch"), c("Time")) + ), n_shuffle = n_shuffle, aggregate_scores_func = sum_scores, acceptance_func = mk_simanneal_acceptance_func(mk_simanneal_temp_func(T0 = 500, alpha = 1)), @@ -405,9 +385,9 @@ trace <- optimize_design( autoscale_scores = T ) -trace$plot() +bc9$plot_trace() -bc$get_samples(assignment = TRUE) %>% +bc9$get_samples(assignment = TRUE) %>% dplyr::mutate(batch = factor(batch)) %>% ggplot2::ggplot(ggplot2::aes(x = batch, fill = Treatment, alpha = factor(Time))) + ggplot2::geom_bar() diff --git a/vignettes/osat.Rmd b/vignettes/osat.Rmd index 1b9c38c7..17549095 100644 --- a/vignettes/osat.Rmd +++ b/vignettes/osat.Rmd @@ -124,7 +124,7 @@ bc$n_locations Assign samples and get initial setup. ```{r} -bc$samples <- samples +bc <- assign_in_order(bc, samples) starting_assignment <- bc$get_locations() %>% left_join(g_setup_start) %>% @@ -139,22 +139,23 @@ bc$get_samples(remove_empty_locations = TRUE) %>% ## Using designit OSAT score implementation ```{r} -bc$scoring_f <- osat_score_generator("plates", c("SampleType", "Race", "AgeGrp")) +scoring_f <- osat_score_generator("plates", c("SampleType", "Race", "AgeGrp")) -bc$score() +bc$score(scoring_f) g_setup@metadata$optValue %>% head(1) # should be identical bench::system_time({ set.seed(123) - trace_reference <- optimize_design(bc, max_iter = params$iterations) + bc_reference <- optimize_design(bc, scoring = scoring_f, max_iter = params$iterations) }) ``` ```{r} # final score -bc$score() -plot(trace_reference, main = str_glue("Final score={bc$score()}")) +bc_reference$score(scoring_f) +bc_reference$plot_trace() + + ggtitle(str_glue("Final score={bc$score(scoring_f)}")) bc$get_samples(remove_empty_locations = TRUE) %>% plot_batch() ``` @@ -163,9 +164,8 @@ bc$get_samples(remove_empty_locations = TRUE) %>% Instead of relying on `BatchContainer`, here we have a manual optimization process using `data.table`. ```{r} -bc$move_samples(location_assignment = starting_assignment) - fast_osat_optimize <- function(bc, batch_vars, feature_vars, iterations) { + bc <- bc$copy() ldf <- data.table::data.table(bc$get_locations())[, c("plates")][, ".sample_id" := bc$assignment] fcols <- c(".sample_id", feature_vars) smp <- data.table::data.table(bc$samples)[, ..fcols] @@ -202,27 +202,27 @@ fast_osat_optimize <- function(bc, batch_vars, feature_vars, iterations) { bc$assignment <- df$.sample_id - scores + list(bc=bc, scores=scores) } bench::system_time({ set.seed(123) - trace <- fast_osat_optimize(bc, "plates", c("SampleType", "Race", "AgeGrp"), iterations = params$iterations) + opt_res <- fast_osat_optimize(bc, "plates", c("SampleType", "Race", "AgeGrp"), iterations = params$iterations) }) ``` # Shuffle optimization with burn-in ```{r} -bc$move_samples(location_assignment = starting_assignment) - -bc$scoring_f <- osat_score_generator("plates", c("SampleType", "Race", "AgeGrp")) +scoring_f <- osat_score_generator("plates", c("SampleType", "Race", "AgeGrp")) burn_in_it <- floor(params$iterations * 0.1) burn_in_it bench::system_time({ set.seed(123) - trace_burn_in <- optimize_design(bc, + bc_burn_in <- optimize_design( + bc, + scoring = scoring_f, n_shuffle = c( rep(20, burn_in_it), rep( @@ -237,9 +237,9 @@ bench::system_time({ ```{r} tibble( - i = seq_len(trace_burn_in$n_steps), - normal = trace_reference$scores, - burnin = trace_burn_in$scores + i = bc_burn_in$trace$scores[[1]]$step, + normal = bc_reference$trace$scores[[1]]$score_1, + burnin = bc_burn_in$trace$scores[[1]]$score_1 ) %>% pivot_longer(-i, names_to = "method", values_to = "score") %>% ggplot(aes(i, score, col = method)) + @@ -249,9 +249,7 @@ tibble( # Score demonstration ```{r} -bc$score() -bc$scoring_f <- function(...) rnorm(1) -bc$score() +bc$score(scoring_f) ``` ```{r} @@ -260,13 +258,11 @@ assign_random(bc) bc$get_samples() bc$get_samples(remove_empty_locations = TRUE) -bc$score() - -bc$scoring_f <- list( +scoring_f <- list( fc0 = function(samples) rnorm(1) + 2 * rexp(1), fc1 = function(samples) rnorm(1, 100), fc2 = function(samples) -7 ) -bc$score() +bc$score(scoring_f) ``` diff --git a/vignettes/plate_layouts.Rmd b/vignettes/plate_layouts.Rmd index fe1af67b..b2c81095 100644 --- a/vignettes/plate_layouts.Rmd +++ b/vignettes/plate_layouts.Rmd @@ -68,7 +68,7 @@ bc <- BatchContainer$new( dimensions = list("plate" = 3, "row" = 4, "col" = 6), ) -assign_in_order(bc, dat) +bc <- assign_in_order(bc, dat) head(bc$get_samples()) %>% gt::gt() ``` @@ -99,7 +99,7 @@ The order of the factors indicate their relative importance. In this case we prioritize Group over Sex. ```{r} -traces <- optimize_multi_plate_design(bc, +bc <- optimize_multi_plate_design(bc, across_plates_variables = c("Group", "Sex"), within_plate_variables = c("Group"), plate = "plate", @@ -130,8 +130,12 @@ cowplot::plot_grid( We can look at the trace objects for each internal `optimize_design` run, returned from the wrapper function. -```{r fig.width=3, fig.height=3} -purrr::imap(traces, ~ .x$plot(include_aggregated = TRUE) + labs(title = .y)) +```{r fig.width=6, fig.height=3} +bc$scores_table() |> + ggplot(aes(step, value, color = score)) + + geom_line() + + geom_point() + + facet_wrap(~ optimization_index, scales = "free_y") ``` ## Plate scoring @@ -140,7 +144,7 @@ Note that internally the wrapper function sets up plate specific scoring functio that could manually be set up in the following way. ```{r, eval = FALSE} -bc$scoring_f <- c( +scoring_f <- c( Group = mk_plate_scoring_functions(bc, plate = "plate", row = "row", column = "col", group = "Group", penalize_lines = "hard" @@ -184,12 +188,12 @@ bc <- BatchContainer$new( dimensions = list("batch" = 3, "location" = 11) ) -bc$scoring_f <- list( +scoring_f <- list( group = osat_score_generator(batch_vars = "batch", feature_vars = "Group"), sex = osat_score_generator(batch_vars = "batch", feature_vars = "Sex") ) -assign_random( +bc <- assign_random( bc, dat %>% select(SubjectID, Group, Sex) %>% distinct() ) @@ -217,8 +221,9 @@ cowplot::plot_grid( Optimizing the layout with `optimize_design()` ```{r} -trace <- optimize_design( +bc <- optimize_design( bc, + scoring = scoring_f, n_shuffle = 1, acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.01), max_iter = 150, @@ -238,7 +243,7 @@ cowplot::plot_grid( bc$get_samples() %>% ggplot(aes(x = batch, fill = Sex)) + geom_bar() + labs(y = "subject count"), - trace$plot(include_aggregated = TRUE) + bc$plot_trace(include_aggregated = TRUE) ), ncol = 3 ) @@ -268,7 +273,7 @@ bc <- BatchContainer$new( ) # initial assignment such that the original plate assigned stays the same -assign_in_order( +bc <- assign_in_order( bc, dat %>% arrange(batch) ) @@ -295,7 +300,7 @@ For distributing samples within each plate, we use variables Group and Sex again The order of the factors indicate their relative importance. ```{r} -traces <- optimize_multi_plate_design(bc, +bc <- optimize_multi_plate_design(bc, within_plate_variables = c("Group", "Sex"), plate = "plate", row = "row", @@ -322,8 +327,12 @@ cowplot::plot_grid( ) ``` -```{r fig.width=3, fig.height=3} -purrr::imap(traces, ~ .x$plot(include_aggregated = TRUE) + labs(title = .y)) +```{r fig.width=9, fig.height=3} +bc$scores_table() |> + ggplot(aes(step, value, color = score)) + + geom_line() + + geom_point() + + facet_wrap(~ optimization_index) ``` # Full dataset @@ -381,12 +390,12 @@ bc <- BatchContainer$new( dimensions = list("plate" = 3, "locations" = 11) ) -bc$scoring_f <- list( +scoring_f <- list( group = osat_score_generator(batch_vars = "plate", feature_vars = c("Group")), sex = osat_score_generator(batch_vars = "plate", feature_vars = "Sex") ) -assign_random(bc, subjects) +bc <- assign_random(bc, subjects) ``` ```{r, fig.width= 8, fig.height=3} @@ -409,8 +418,9 @@ cowplot::plot_grid( Optimizing the layout with `optimize_design()` ```{r} -trace <- optimize_design( +bc <- optimize_design( bc, + scoring = scoring_f, n_shuffle = 1, acceptance_func = ~ accept_leftmost_improvement(..., tolerance = 0.1), max_iter = 150, @@ -433,7 +443,7 @@ cowplot::plot_grid( bc$get_samples() %>% ggplot(aes(x = factor(plate), y = Age)) + geom_boxplot() + geom_point(), - trace$plot(include_aggregated = TRUE) + bc$plot_trace(include_aggregated = TRUE) ), nrow = 2 ) @@ -472,7 +482,7 @@ bc <- BatchContainer$new( ) # assign samples in order of plate -assign_in_order( +bc <- assign_in_order( bc, samples_with_plate %>% arrange(plate) %>% @@ -512,7 +522,7 @@ For distributing samples within each plate, we use variables Group and Sex again The order of the factors indicate their relative importance. ```{r} -traces <- optimize_multi_plate_design(bc, +bc <- optimize_multi_plate_design(bc, within_plate_variables = c("Group", "SubjectID", "Sex"), plate = "plate", row = "row", @@ -545,6 +555,10 @@ cowplot::plot_grid( ``` -```{r fig.width=5, fig.height=5} -purrr::imap(traces, ~ .x$plot(include_aggregated = TRUE) + labs(title = .y)) +```{r fig.width=6, fig.height=5} +bc$scores_table() |> + ggplot(aes(step, value, color = score)) + + geom_line() + + geom_point() + + facet_wrap(~ optimization_index) ``` diff --git a/vignettes/shuffling_with_constraints.Rmd b/vignettes/shuffling_with_constraints.Rmd index 9fe748af..4ebf87de 100644 --- a/vignettes/shuffling_with_constraints.Rmd +++ b/vignettes/shuffling_with_constraints.Rmd @@ -68,9 +68,9 @@ table(treatments) bc <- BatchContainer$new(locations_table = data.frame(Treatment = treatments, Position = seq_along(treatments))) -assign_in_order(bc, invivo_study_samples) +bc <- assign_in_order(bc, invivo_study_samples) -bc$scoring_f <- osat_score_generator(batch_vars = "Treatment", feature_vars = c("Strain", "Sex")) +scoring_f <- osat_score_generator(batch_vars = "Treatment", feature_vars = c("Strain", "Sex")) bc ``` @@ -100,11 +100,10 @@ defined above at the same time. It can be passed to the optimizer together with such as the scoring or acceptance functions. ```{r} -bc2 <- bc$copy() - -optimize_design( - bc2, - shuffle_proposal_func = shuffle_grouped_data(bc2, +bc2 <- optimize_design( + bc, + scoring = scoring_f, + shuffle_proposal_func = shuffle_grouped_data(bc, allocate_var = "Treatment", keep_together_vars = c("Strain", "Sex"), keep_separate_vars = c("Earmark"), @@ -238,10 +237,9 @@ the beginning!) We can finally use the customized shuffling function in the optimization. ```{r echo=TRUE} -bc3 <- bc$copy() - -trace <- optimize_design( - bc3, +bc3 <- optimize_design( + bc, + scoring = scoring_f, shuffle_proposal_func = shuffle_proposal, max_iter = 300 )