Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New field_types_advanced() function to allow specification of only a subset of fields #20

Merged
merged 7 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: daiquiri
Type: Package
Title: Data Quality Reporting for Temporal Datasets
Version: 1.1.1
Version: 1.1.1.9000
Authors@R: c(
person(c("T.", "Phuong"), "Quan", email = "[email protected]",
role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8566-1817")),
Expand Down Expand Up @@ -36,7 +36,7 @@ Imports:
utils,
stats,
xfun (>= 0.15)
RoxygenNote: 7.2.0
RoxygenNote: 7.3.1
Suggests:
covr,
knitr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export(close_log)
export(daiquiri_report)
export(export_aggregated_data)
export(field_types)
export(field_types_advanced)
export(ft_categorical)
export(ft_datetime)
export(ft_freetext)
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# daiquiri (development version)

## New features

* New `field_types_advanced()` function. Allows just a subset of the columns in the source df to be named explicitly in the specification, with the remaining columns set to the `.default_field_type` parameter. (#16)


# daiquiri 1.1.1 (2023-07-18)

## New features
Expand All @@ -20,6 +27,7 @@

* Hex logo now appears on reports, adding dependency to `xfun`


# daiquiri 1.0.3 (2022-12-06)

## Bug fixes and minor improvements
Expand Down
272 changes: 191 additions & 81 deletions R/field_types.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,95 +34,65 @@
field_types <- function(...) {
fts <- list(...)

# validate - collect all errors together and return only once
err_validation <- character()
is_field_type <- vapply(fts, is_field_type, logical(1))
if (any(!is_field_type)) {
err_validation <-
append(
err_validation,
paste(
"Unrecognised field_type(s) in positions: [",
paste(which(!is_field_type), collapse = ", "),
"]",
"names: [",
paste(names(fts)[which(!is_field_type)], collapse = ", "),
"]"
)
)
}
is_timepoint <- vapply(fts, is_ft_timepoint, logical(1))
if (sum(is_timepoint) != 1) {
err_validation <-
append(
err_validation,
paste(
"Must specify one and only one timepoint field. Timepoints currently in positions: [",
paste(which(is_timepoint), collapse = ", "),
"]",
"names: [",
paste(names(fts)[which(is_timepoint)], collapse = ", "),
"]"
)
)
}
is_strata <- vapply(fts, is_ft_strata, logical(1))
if (sum(is_strata) > 1) {
err_validation <- field_types_problems(fts)

# additional validation for .default_field_type reserved name
if (".default_field_type" %in% names(fts)) {
err_validation <-
append(
err_validation,
paste(
"Only one strata field allowed. Strata fields currently specified in positions: [",
paste(which(is_strata), collapse = ", "),
"]",
"names: [",
paste(names(fts)[which(is_strata)], collapse = ", "),
"]"
)
".default_field_type is a reserved name and cannot be one of the field names in the data.
Did you mean to use the field_types_advanced() function instead?"
)
}
is_aggregate_by_each_category <- is_field_type
is_aggregate_by_each_category[is_field_type] <-
vapply(fts[is_field_type],
FUN = field_type_has_option,
FUN.VALUE = logical(1),
option = "aggregate_by_each_category")
if (any(is_strata) && any(is_aggregate_by_each_category)) {
err_validation <-
append(
err_validation,
paste(
"Cannot use aggregate_by_each_category option when there is a strata field. Option currently specified in positions: [",
paste(which(is_aggregate_by_each_category), collapse = ", "),
"]",
"names: [",
paste(names(fts)[which(is_aggregate_by_each_category)], collapse = ", "),
"]"
)
)
}
if (anyDuplicated(names(fts)) > 0) {
err_validation <-
append(
err_validation,
paste(
"Duplicate column names not allowed: [",
paste(names(fts)[duplicated(names(fts))], collapse = ", "),
"]"
)

if (length(err_validation) > 0) {
stop_custom(
.subclass = "invalid_field_types",
message = paste0(
"Invalid `field_types' specification.\n",
paste(err_validation, collapse = "\n")
)
)
}
# check for reserved names
if (any(names(fts) %in% c("[DUPLICATES]", "[ALL_FIELDS_COMBINED]"))) {

structure(fts, class = "daiquiri_field_types")
}


# -----------------------------------------------------------------------------
#' Create field_types_advanced specification
#'
#' Specify only a subset of the names and types of fields in the source data frame. The remaining
#' fields will be given the same 'default' type.
#'
#' @param ... names and types of fields (columns) in source data.
#' @param .default_field_type `field_type` to use for any remaining fields (columns) in source
#' data. Note, this means there can not be a field in the data named `.default_field_type`
#' @return A `field_types` object
#' @examples
#' fts <- field_types_advanced(
#' PrescriptionDate = ft_timepoint(),
#' PatientID = ft_ignore(),
#' .default_field_type = ft_simple()
#' )
#'
#' fts
#' @seealso [field_types()], [field_types_available()], [template_field_types()]
#' @export
field_types_advanced <- function(..., .default_field_type = ft_simple()) {

fts <- list(..., ".default_field_type" = .default_field_type)

err_validation <- field_types_problems(fts)

# additional validation for .default_field_type
if (is_ft_timepoint(.default_field_type) || is_ft_strata(.default_field_type)) {
err_validation <-
append(
err_validation,
paste(
"'[DUPLICATES]' and '[ALL_FIELDS_COMBINED]' are names reserved for calculated columns.
Please rename these columns in your data."
)
)
append(err_validation,
".default_field_type cannot be a timepoint nor strata field_type")
}

if (length(err_validation) > 0) {
stop_custom(
.subclass = "invalid_field_types",
Expand All @@ -133,7 +103,7 @@
)
}

structure(fts, class = "daiquiri_field_types")
structure(fts, class = c("daiquiri_field_types", "daiquiri_field_types_advanced"))
}


Expand Down Expand Up @@ -493,6 +463,15 @@
is_field_types <- function(x) inherits(x, "daiquiri_field_types")


# -----------------------------------------------------------------------------
#' Test if object is a field_types_advanced object
#'
#' @param x object to test
#' @return Logical
#' @noRd
is_field_types_advanced <- function(x) inherits(x, "daiquiri_field_types_advanced")


# -----------------------------------------------------------------------------
#' Constructor for individual field_type object
#'
Expand Down Expand Up @@ -701,6 +680,7 @@
strata_field_name
}

# -----------------------------------------------------------------------------
#' Test if field_type has a particular option set
#'
#' @param ft field_type to test
Expand All @@ -710,3 +690,133 @@
field_type_has_option <- function(ft, option){
option %in% ft$options
}


# -----------------------------------------------------------------------------
#' Validate list of (standard) field_types
#'
#' @param fts list of individual `field_type`s
#' @return A character vector of error messages (if any)
#'
#' @noRd
field_types_problems <- function(fts) {
# validate - collect all errors together and return only once
err_validation <- character()
is_field_type <- vapply(fts, is_field_type, logical(1))
if (any(!is_field_type)) {
err_validation <-
append(
err_validation,
paste(
"Unrecognised field_type(s) in positions: [",
paste(which(!is_field_type), collapse = ", "),
"]",
"names: [",
paste(names(fts)[which(!is_field_type)], collapse = ", "),
"]"
)
)
}
is_timepoint <- vapply(fts, is_ft_timepoint, logical(1))
if (sum(is_timepoint) != 1) {
err_validation <-
append(
err_validation,
paste(
"Must specify one and only one timepoint field. Timepoints currently in positions: [",
paste(which(is_timepoint), collapse = ", "),
"]",
"names: [",
paste(names(fts)[which(is_timepoint)], collapse = ", "),
"]"
)
)
}
is_strata <- vapply(fts, is_ft_strata, logical(1))
if (sum(is_strata) > 1) {
err_validation <-
append(
err_validation,
paste(
"Only one strata field allowed. Strata fields currently specified in positions: [",
paste(which(is_strata), collapse = ", "),
"]",
"names: [",
paste(names(fts)[which(is_strata)], collapse = ", "),
"]"
)
)
}
is_aggregate_by_each_category <- is_field_type
is_aggregate_by_each_category[is_field_type] <-
vapply(fts[is_field_type],
FUN = field_type_has_option,
FUN.VALUE = logical(1),
option = "aggregate_by_each_category")
if (any(is_strata) && any(is_aggregate_by_each_category)) {
err_validation <-
append(
err_validation,
paste(
"Cannot use aggregate_by_each_category option when there is a strata field. Option currently specified in positions: [",
paste(which(is_aggregate_by_each_category), collapse = ", "),
"]",
"names: [",
paste(names(fts)[which(is_aggregate_by_each_category)], collapse = ", "),
"]"
)
)
}
if (anyDuplicated(names(fts)) > 0) {
err_validation <-
append(
err_validation,
paste(
"Duplicate column names not allowed: [",
paste(names(fts)[duplicated(names(fts))], collapse = ", "),
"]"
)
)
}
# check for reserved names
if (any(names(fts) %in% c("[DUPLICATES]", "[ALL_FIELDS_COMBINED]"))) {
err_validation <-
append(
err_validation,
paste(
"'[DUPLICATES]' and '[ALL_FIELDS_COMBINED]' are names reserved for calculated columns.
Please rename these columns in your data."
)
)
}

err_validation
}


# -----------------------------------------------------------------------------
#' Fill in default field_types (if any) to create a fully-named specification
#'
#' @param df_names field names in the supplied df
#' @param field_types field_types object with or without .default_field_type specified
#' @return A `field_types` object
#' @noRd
complete_field_types <- function(df_names, field_types){

if (!is_field_types_advanced(field_types)) {
fts <- field_types

Check warning on line 807 in R/field_types.R

View check run for this annotation

Codecov / codecov/patch

R/field_types.R#L807

Added line #L807 was not covered by tests
} else{
fts <- list()
for (i in seq_along(df_names)) {
if (df_names[i] %in% names(field_types)) {
fts[[df_names[i]]] <- field_types[[df_names[i]]]
} else{
fts[[df_names[i]]] <-
field_types[[".default_field_type"]]
}
}
}

structure(fts, class = "daiquiri_field_types")
}

Loading
Loading