ctsit · ChemiKyle · Oct 5, 2023 · Aug 18, 2023 · Aug 21, 2023 · Aug 21, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -59,13 +59,15 @@ Imports:
 Suggests:
     RSQLite,
     digest,
+    dotenv,
     duckdb,
     fs,
     knitr (>= 1.18),
     rmarkdown (>= 2.0),
-    testthat (>= 3.0.0)
+    testthat (>= 3.0.0),
+    tidyselect
 VignetteBuilder: knitr
 Config/testthat/edition: 3
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.2
 Depends: 
     R (>= 2.10)
diff --git a/NAMESPACE b/NAMESPACE
@@ -9,6 +9,7 @@ export(create_allocation_rows)
 export(create_randomization_row)
 export(create_test_table)
 export(create_test_tables)
+export(dataframe_to_redcap_dictionary)
 export(dataset_diff)
 export(disable_non_interactive_quit)
 export(enable_randomization_on_a_preconfigured_project_in_production)
@@ -34,6 +35,7 @@ export(is_on_ci)
 export(log_job_debug)
 export(log_job_failure)
 export(log_job_success)
+export(mirror_data_to_redcap_project)
 export(mutate_columns_to_posixct)
 export(quit_non_interactive_run)
 export(scrape_user_api_tokens)

diff --git a/R/dataframe_to_redcap_dictionary.R b/R/dataframe_to_redcap_dictionary.R
@@ -0,0 +1,93 @@
+#' Create a REDCap data dictionary from a dataframe
+#'
+#' @param df the dataframe to generate the data dictionary for
+#' @param record_id_col a column in the dataframe that uniquely  identifies each record
+#' @param form_name  the form name to display in REDCap
+#' @param write_to_csv If TRUE will write the data dictionary to a csv.
+#' @param filename A string specifying the filename for the CSV.
+#'
+#' @return A redcap data dictionary
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#'
+#' df <- data.frame(
+#'   pk_col = c("a1", "a2", "a3"),
+#'   integer_col = c(1, 2, 3),
+#'   numeric_col = 5.9,
+#'   character_col = c("a", "b", "c"),
+#'   date_col = as.Date("2011-03-07"),
+#'   date_time_col = as.POSIXct(strptime("2011-03-27 01:30:00", "%Y-%m-%d %H:%M:%S")),
+#'   email_col = c("[email protected]", "[email protected]", "[email protected]")
+#' )
+#'
+#' redcap_data_dictionary <- dataframe_to_redcap_dictionary(df, "pk_col", "test_form")
+#' redcap_data_dictionary <- dataframe_to_redcap_dictionary(
+#' df, "pk_col", "test_form"
+#' TRUE, "<output_path>.csv"
+#' )
+#' }
+dataframe_to_redcap_dictionary <- function(df,
+                                           record_id_col,
+                                           form_name,
+                                           write_to_csv = FALSE,
+                                           filename = NULL) {
+  contains_emails <- function(col) {
+    email_pattern <- "^[A-Za-z0-9._%-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}$"
+    return(any(grepl(email_pattern, col)))
+  }
+
+  get_validation_type <- function(col) {
+    if (contains_emails(col)) {
+      return("email")
+    }
+
+    col_type <- class(col)[1]
+    switch(
+      col_type,
+      "numeric" = "number",
+      "integer" = "number",
+      "Date" = "datetime_ymd",
+      "POSIXct" = "datetime_seconds_ymd",
+      as.character(NA)
+    )
+  }
+
+  if (!record_id_col %in% names(df)) {
+    stop("The provided record_id_col does not exist in the input dataframe.")
+  }
+
+  # create the record_id and make it the first column in df
+  df_with_record_id <- df |>
+    dplyr::rename(record_id = tidyselect::all_of(record_id_col)) |>
+    dplyr::select("record_id", dplyr::everything())
+
+  df_with_ordered_cols <- df |>
+    dplyr::select(tidyselect::all_of(record_id_col), dplyr::everything())
+
+  redcap_data_dictionary <- data.frame(
+    field_name = names(df_with_record_id),
+    form_name = form_name,
+    section_header = as.character(NA),
+    field_type = "text",
+    field_label = names(df_with_ordered_cols),
+    select_choices_or_calculations = as.character(NA),
+    field_note = as.character(NA),
+    text_validation_type_or_show_slider_number = sapply(df_with_record_id, get_validation_type)
+  )
+
+  rownames(redcap_data_dictionary) <- NULL
+
+  if (write_to_csv) {
+    if (is.null(filename)) {
+      stop("Please provide a filename if you want to write to CSV.")
+    }
+    utils::write.csv(redcap_data_dictionary,
+                     filename,
+                     na = "",
+                     row.names = FALSE)
+  }
+
+  return(redcap_data_dictionary)
+}
diff --git a/R/mirror_data_to_redcap_project.R b/R/mirror_data_to_redcap_project.R
@@ -0,0 +1,91 @@
+#' Mirror a dataframe to a REDCap project
+#'
+#' @param df_to_mirror Dataframe to be imported into the REDCap project.
+#' @param record_id_col Column in the dataframe that uniquely identifies each record.
+#' @param project_name REDCap project name.
+#' @param path_to_env_file Path to the ".env" credentials file.
+#' @param ... Additional parameters passed to \code{\link[REDCapR]{redcap_write}}.
+#'
+#' @return Same values as returned by \code{\link[REDCapR]{redcap_write}}.
+#' @export
+#'
+#' @seealso \code{\link{dataframe_to_redcap_dictionary}} to understand the REDCap data dictionary requirements.
+#' The dictionary must be created and uploaded to REDCap for \code{mirror_data_to_redcap_project} to function properly.
+#'
+#' @examples
+#' \dontrun{
+#' df <- data.frame(
+#'   pk_col = c("a1", "a2", "a3"),
+#'   integer_col = c(1, 2, 3),
+#'   numeric_col = 5.9,
+#'   character_col = c("a", "b", "c"),
+#'   date_col = as.Date("2011-03-07"),
+#'   date_time_col = as.POSIXct(strptime("2011-03-27 01:30:00", "%Y-%m-%d %H:%M:%S")),
+#'   email_col = c("[email protected]", "[email protected]", "[email protected]")
+#' )
+#' mirrored_data <- mirror_data_to_redcap_project(df, "pk_col", "test_project", "<env_path>.env")
+#' }
+#'
+mirror_data_to_redcap_project <- function(df_to_mirror,
+                                          record_id_col,
+                                          project_name,
+                                          path_to_env_file,
+                                          ...) {
+  if (!file.exists(path_to_env_file)) {
+    stop(sprintf("The .env file was not found at %s", path_to_env_file))
+  } else {
+    dotenv::load_dot_env(path_to_env_file)
+  }
+
+  # retrieve the PID value from environment the env file
+  project_id <- paste0(toupper(project_name), "_PID") |>
+    Sys.getenv()
+
+  credentials_db_path <- Sys.getenv("CREDENTIALS_DB")
+
+  # check if the PID was found
+  if (project_id == "") {
+    stop(sprintf(
+      "Environment variable %s not found in %s.",
+      paste0(toupper(project_name), "_PID"),
+      path_to_env_file
+    ))
+  } else if (credentials_db_path == "") {
+    stop(sprintf(
+      "Environment variable CREDENTIALS_DB not found in %s.",
+      path_to_env_file
+    ))
+  }
+
+  credentials_db <- DBI::dbConnect(RSQLite::SQLite(), credentials_db_path)
+
+  # get the REDCap API credentials from the credentials database
+  redcap_credentials <- dplyr::tbl(credentials_db, "credentials") |>
+    dplyr::filter(project_id == local(project_id)) |>
+    dplyr::collect()
+
+  # check if credentials were found
+  if (nrow(redcap_credentials) == 0) {
+    stop(sprintf("No REDCap API credentials found for PID %s.", project_id))
+  }
+
+  if (!record_id_col %in% names(df_to_mirror)) {
+    stop("The provided record_id_col does not exist in the input dataframe.")
+  }
+
+  # create the record_id and make it the first column in df
+  df_to_mirror_with_record_id <- df_to_mirror |>
+    dplyr::rename(record_id = tidyselect::all_of(record_id_col)) |>
+    dplyr::select("record_id", dplyr::everything())
+
+  rownames(df_to_mirror_with_record_id) <- NULL
+
+  write_result <- REDCapR::redcap_write(
+    redcap_uri = redcap_credentials$redcap_uri,
+    token = redcap_credentials$token,
+    ds_to_write = df_to_mirror_with_record_id,
+    ...
+  )
+
+  return(write_result)
+}
diff --git a/man/dataframe_to_redcap_dictionary.Rd b/man/dataframe_to_redcap_dictionary.Rd
diff --git a/man/mirror_data_to_redcap_project.Rd b/man/mirror_data_to_redcap_project.Rd