Skip to content

Commit

Permalink
Move create_data_list() logic to get_file_paths()
Browse files Browse the repository at this point in the history
  • Loading branch information
mingstat committed Nov 19, 2024
1 parent b971bb9 commit 6f65e5c
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 48 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(get_cre_path)
export(get_file_paths)
export(get_nfs_path)
export(load_data)
export(load_data_files)
10 changes: 4 additions & 6 deletions R/dvloader.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,14 @@ load_data <- function(

dir_path <- if (is.null(sub_dir)) base_dir else file.path(base_dir, sub_dir)

file_paths <- get_file_paths(dir_path = dir_path, file_names = file_names, prefer_sas = prefer_sas)

if (isTRUE(print_file_paths)) {
cat("Loading data from", dir_path, "\n")
cat("Loading data file(s):", file_names, "\n")
cat("Loading data file(s):", basename(file_paths), "\n")
}

data_list <- create_data_list(
dir_path = dir_path,
file_names = file_names,
prefer_sas = prefer_sas
)
data_list <- load_data_files(file_paths)

return(data_list)
}
49 changes: 31 additions & 18 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,38 @@
#' Create a List of Data Frames with Metadata
#' Get File Paths
#'
#' For each file name provided, this function reads the first matching file and its metadata/attributes.
#' By default, RDS files are preferred over SAS files for faster loading.
#' The function performs case-insensitive matching of file names.
#' This function constructs file paths for given file names, handling both RDS and SAS7BDAT files.
#' It can prioritize SAS files over RDS files based on the `prefer_sas` parameter.
#'
#' @param dir_path [character(1)] Directory path where the files are located
#' @param file_names [character(1+)] Vector of file names
#' @param prefer_sas [logical(1)] If TRUE, SAS (.sas7bdat) files are preferred over RDS (.rds) files
#' @param dir_path [character(1)] The directory path where the files are located.
#' @param file_names [character(1+)] A vector of file names to process.
#' @param prefer_sas [logical(1)] Whether to prefer SAS files over RDS files. Default is FALSE.
#'
#' @return [list] A named list of data frames, where each name is the basename of the corresponding file path.
create_data_list <- function(dir_path, file_names, prefer_sas = FALSE) {
#' @return [character] A vector of normalized file paths.
#'
#' @examples
#' \dontrun{
#' temp_dir <- tempdir()
#'
#' file_names <- c("adsl", "adae")
#'
#' file.create(file.path(temp_dir, paste0(file_names, ".rds")))
#' file.create(file.path(temp_dir, paste0(file_names, ".sas7bdat")))
#'
#' list.files(temp_dir)
#'
#' get_file_paths(dir_path = temp_dir, file_names = file_names)
#' get_file_paths(dir_path = temp_dir, file_names = file_names, prefer_sas = TRUE)
#'
#' unlink(temp_dir, recursive = TRUE)
#' }
#'
#' @export
get_file_paths <- function(dir_path, file_names, prefer_sas = FALSE) {
checkmate::assert_character(dir_path, len = 1)
checkmate::assert_character(file_names, min.len = 1)
checkmate::assert_logical(prefer_sas, len = 1)
checkmate::assert_directory_exists(dir_path)

data_list <- lapply(file_names, function(x) {
file_paths <- lapply(file_names, function(x) {
extensions <- c("", ".rds", ".sas7bdat")
if (prefer_sas) {
extensions <- c("", ".sas7bdat", ".rds")
Expand Down Expand Up @@ -46,18 +63,14 @@ create_data_list <- function(dir_path, file_names, prefer_sas = FALSE) {
stop(paste("create_data_list(): No RDS or SAS files found for", dir_path, x))
}

# Load a single data file and get the first element of the list
output <- load_data_files(file.path(dir_path, file_name_to_load))[[1]]

return(output)
return(file.path(dir_path, file_name_to_load))
})

names(data_list) <- file_names

return(data_list)
return(normalizePath(unlist(file_paths)))
}



#' Load Data Files
#'
#' This function reads data from multiple file paths and returns a list of data frames.
Expand Down
23 changes: 0 additions & 23 deletions man/create_data_list.Rd

This file was deleted.

40 changes: 40 additions & 0 deletions man/get_file_paths.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ test_that(
{
actual <- load_data(
sub_dir = local_test_path,
file_names = c("dummyads1.RDS", "dummyads1.sas7bdat"),
file_names = c("dummyads1.RDS", "dummyads2.sas7bdat"),
use_wd = TRUE
)
actual <- c(
Expand Down

0 comments on commit 6f65e5c

Please sign in to comment.