Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create functions to mirror dfs to redcap #136

Merged
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,15 @@ Imports:
Suggests:
RSQLite,
digest,
dotenv,
duckdb,
fs,
knitr (>= 1.18),
rmarkdown (>= 2.0),
testthat (>= 3.0.0)
testthat (>= 3.0.0),
tidyselect
VignetteBuilder: knitr
Config/testthat/edition: 3
RoxygenNote: 7.2.1
RoxygenNote: 7.2.2
Depends:
R (>= 2.10)
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export(create_allocation_rows)
export(create_randomization_row)
export(create_test_table)
export(create_test_tables)
export(dataframe_to_redcap_dictionary)
export(dataset_diff)
export(disable_non_interactive_quit)
export(enable_randomization_on_a_preconfigured_project_in_production)
Expand All @@ -34,6 +35,7 @@ export(is_on_ci)
export(log_job_debug)
export(log_job_failure)
export(log_job_success)
export(mirror_data_to_redcap_project)
export(mutate_columns_to_posixct)
export(quit_non_interactive_run)
export(scrape_user_api_tokens)
Expand Down
93 changes: 93 additions & 0 deletions R/dataframe_to_redcap_dictionary.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#' Create a REDCap data dictionary from a dataframe
#'
#' @param df the dataframe to generate the data dictionary for
#' @param record_id_col a column in the dataframe that uniquely identifies each record
#' @param form_name the form name to display in REDCap
#' @param write_to_csv If TRUE will write the data dictionary to a csv.
#' @param filename A string specifying the filename for the CSV.
#'
#' @return A redcap data dictionary
#' @export
#'
#' @examples
#' \dontrun{
#'
#' df <- data.frame(
#' pk_col = c("a1", "a2", "a3"),
#' integer_col = c(1, 2, 3),
#' numeric_col = 5.9,
#' character_col = c("a", "b", "c"),
#' date_col = as.Date("2011-03-07"),
#' date_time_col = as.POSIXct(strptime("2011-03-27 01:30:00", "%Y-%m-%d %H:%M:%S")),
#' email_col = c("[email protected]", "[email protected]", "[email protected]")
#' )
#'
#' redcap_data_dictionary <- dataframe_to_redcap_dictionary(df, "pk_col", "test_form")
#' redcap_data_dictionary <- dataframe_to_redcap_dictionary(
#' df, "pk_col", "test_form"
#' TRUE, "<output_path>.csv"
#' )
#' }
dataframe_to_redcap_dictionary <- function(df,
record_id_col,
form_name,
write_to_csv = FALSE,
filename = NULL) {
contains_emails <- function(col) {
email_pattern <- "^[A-Za-z0-9._%-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}$"
return(any(grepl(email_pattern, col)))
}

get_validation_type <- function(col) {
if (contains_emails(col)) {
return("email")
}

col_type <- class(col)[1]
switch(
col_type,
"numeric" = "number",
"integer" = "number",
"Date" = "datetime_ymd",
"POSIXct" = "datetime_seconds_ymd",
as.character(NA)
)
}

if (!record_id_col %in% names(df)) {
stop("The provided record_id_col does not exist in the input dataframe.")
}

# create the record_id and make it the first column in df
df_with_record_id <- df |>
dplyr::rename(record_id = tidyselect::all_of(record_id_col)) |>
dplyr::select("record_id", dplyr::everything())
ljwoodley marked this conversation as resolved.
Show resolved Hide resolved

df_with_ordered_cols <- df |>
dplyr::select(tidyselect::all_of(record_id_col), dplyr::everything())

redcap_data_dictionary <- data.frame(
field_name = names(df_with_record_id),
form_name = form_name,
section_header = as.character(NA),
field_type = "text",
field_label = names(df_with_ordered_cols),
select_choices_or_calculations = as.character(NA),
field_note = as.character(NA),
text_validation_type_or_show_slider_number = sapply(df_with_record_id, get_validation_type)
)

rownames(redcap_data_dictionary) <- NULL

if (write_to_csv) {
if (is.null(filename)) {
stop("Please provide a filename if you want to write to CSV.")
}
utils::write.csv(redcap_data_dictionary,
filename,
na = "",
row.names = FALSE)
}
ljwoodley marked this conversation as resolved.
Show resolved Hide resolved

return(redcap_data_dictionary)
}
91 changes: 91 additions & 0 deletions R/mirror_data_to_redcap_project.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#' Mirror a dataframe to a REDCap project
#'
#' @param df_to_mirror Dataframe to be imported into the REDCap project.
#' @param record_id_col Column in the dataframe that uniquely identifies each record.
#' @param project_name REDCap project name.
#' @param path_to_env_file Path to the ".env" credentials file.
#' @param ... Additional parameters passed to \code{\link[REDCapR]{redcap_write}}.
#'
#' @return Same values as returned by \code{\link[REDCapR]{redcap_write}}.
#' @export
#'
#' @seealso \code{\link{dataframe_to_redcap_dictionary}} to understand the REDCap data dictionary requirements.
#' The dictionary must be created and uploaded to REDCap for \code{mirror_data_to_redcap_project} to function properly.
#'
#' @examples
#' \dontrun{
#' df <- data.frame(
#' pk_col = c("a1", "a2", "a3"),
#' integer_col = c(1, 2, 3),
#' numeric_col = 5.9,
#' character_col = c("a", "b", "c"),
#' date_col = as.Date("2011-03-07"),
#' date_time_col = as.POSIXct(strptime("2011-03-27 01:30:00", "%Y-%m-%d %H:%M:%S")),
#' email_col = c("[email protected]", "[email protected]", "[email protected]")
#' )
#' mirrored_data <- mirror_data_to_redcap_project(df, "pk_col", "test_project", "<env_path>.env")
#' }
#'
mirror_data_to_redcap_project <- function(df_to_mirror,
record_id_col,
project_name,
path_to_env_file,
...) {
if (!file.exists(path_to_env_file)) {
stop(sprintf("The .env file was not found at %s", path_to_env_file))
} else {
dotenv::load_dot_env(path_to_env_file)
}
ljwoodley marked this conversation as resolved.
Show resolved Hide resolved

# retrieve the PID value from environment the env file
project_id <- paste0(toupper(project_name), "_PID") |>
Sys.getenv()

credentials_db_path <- Sys.getenv("CREDENTIALS_DB")

# check if the PID was found
if (project_id == "") {
stop(sprintf(
"Environment variable %s not found in %s.",
paste0(toupper(project_name), "_PID"),
path_to_env_file
))
} else if (credentials_db_path == "") {
stop(sprintf(
"Environment variable CREDENTIALS_DB not found in %s.",
path_to_env_file
))
}

credentials_db <- DBI::dbConnect(RSQLite::SQLite(), credentials_db_path)

# get the REDCap API credentials from the credentials database
redcap_credentials <- dplyr::tbl(credentials_db, "credentials") |>
dplyr::filter(project_id == local(project_id)) |>
dplyr::collect()

# check if credentials were found
if (nrow(redcap_credentials) == 0) {
stop(sprintf("No REDCap API credentials found for PID %s.", project_id))
}
ljwoodley marked this conversation as resolved.
Show resolved Hide resolved

if (!record_id_col %in% names(df_to_mirror)) {
stop("The provided record_id_col does not exist in the input dataframe.")
}

# create the record_id and make it the first column in df
df_to_mirror_with_record_id <- df_to_mirror |>
dplyr::rename(record_id = tidyselect::all_of(record_id_col)) |>
dplyr::select("record_id", dplyr::everything())
ljwoodley marked this conversation as resolved.
Show resolved Hide resolved

rownames(df_to_mirror_with_record_id) <- NULL

write_result <- REDCapR::redcap_write(
redcap_uri = redcap_credentials$redcap_uri,
token = redcap_credentials$token,
ds_to_write = df_to_mirror_with_record_id,
...
)

return(write_result)
}
51 changes: 51 additions & 0 deletions man/dataframe_to_redcap_dictionary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 50 additions & 0 deletions man/mirror_data_to_redcap_project.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.