Skip to content

Commit

Permalink
Merge pull request #19 from gongcastro/test
Browse files Browse the repository at this point in the history
Replace `group_by()`/`ungroup()` for experimental argument `.by` in d…
  • Loading branch information
gongcastro authored May 11, 2023
2 parents 5280fb6 + c061ed7 commit be17390
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 144 deletions.
1 change: 1 addition & 0 deletions .covrignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ R/globals.R
R/import.R
R/participants.R
R/connect.R
R/responses.R
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
* Add examples and tests for `get_time_stamp()` and `get_doe()`
* Add documentation for new version of `bvq_norms()` in its vignette
* Use .covrignore
* Replace `group_by()`/`ungroup()` for experimental argument `.by` in dplyr functions


# bvq 0.3.0

Expand Down
114 changes: 56 additions & 58 deletions R/logs.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' many times a participant has been sent the questionnaire, independently of
#' whether a response was obtained from them later.
#'
#' @import dplyr
#' @import dplyr
#' @importFrom lubridate as_date
#' @importFrom lubridate today
#' @importFrom lubridate as_datetime
Expand Down Expand Up @@ -57,63 +57,61 @@ bvq_logs <- function(participants = NULL,
other_threshold = 0.10,
...) {

suppressMessages({

# get participant information
if (is.null(participants)) participants <- bvq_participants()
if (is.null(responses)) responses <- bvq_responses(participants)

# get n items answered by participants (depends on the questionnaire version)
total_items <- studies %>%
distinct(version, language, n) %>%
summarise(total_items = sum(n),
.by = version)

grouping_vars <- c("id", "date_birth", "time",
"edu_parent1", "edu_parent2",
"date_birth", "date_started", "date_finished",
"doe_spanish", "doe_catalan",
"doe_others", "date_birth", "code", "study",
"version")

vars <- c("code", "time", "study", "version", "age",
"date_birth", "date_started", "date_finished",
"duration", "dominance", "lp",
"edu_parent1", "edu_parent2",
"doe_spanish", "doe_catalan", "doe_others", "completed")

# generate logs
logs <- responses %>%
# total items to fill by each participant (varies across versions)
summarise(complete_items = sum(!is.na(response)),
.by = one_of(grouping_vars)) %>%
left_join(total_items,
by = join_by(version)) %>%
left_join(select(participants, -c(date_birth, version)),
by = join_by(id, time, code, study)) %>%
drop_na(id) %>%
mutate(
# define language profiles based on thresholds
lp = case_when(doe_catalan >= bilingual_threshold ~ "Monolingual",
doe_spanish >= bilingual_threshold ~ "Monolingual",
doe_others > other_threshold ~ "Other",
.default = "Bilingual"),
# define language dominance
dominance = case_when(doe_catalan > doe_spanish ~ "Catalan",
doe_spanish > doe_catalan ~ "Spanish",
doe_catalan == doe_spanish ~ sample(c("Catalan", "Spanish"), 1)),
age = diff_in_time(date_finished, date_birth, "months"),
duration = diff_in_time(date_finished, date_started, "days")
) %>%
# compute participant's progress through the questionnaire
rowwise() %>%
mutate(progress = complete_items / total_items,
completed = progress >= 0.95) %>%
ungroup() %>%
# select relevant columns and reorder them
select(id, one_of(vars)) %>%
arrange(desc(date_finished))
})
# get participant information
if (is.null(participants)) participants <- bvq_participants()
if (is.null(responses)) responses <- bvq_responses(participants)

# get n items answered by participants (depends on the questionnaire version)
total_items <- studies %>%
distinct(version, language, n) %>%
summarise(total_items = sum(n),
.by = version)

grouping_vars <- c("id", "date_birth", "time",
"edu_parent1", "edu_parent2",
"date_birth", "date_started", "date_finished",
"doe_spanish", "doe_catalan",
"doe_others", "date_birth", "code", "study",
"version")

vars <- c("code", "time", "study", "version", "age",
"date_birth", "date_started", "date_finished",
"duration", "dominance", "lp",
"edu_parent1", "edu_parent2",
"doe_spanish", "doe_catalan", "doe_others", "completed")

# generate logs
logs <- responses %>%
# total items to fill by each participant (varies across versions)
summarise(complete_items = sum(!is.na(response)),
.by = one_of(grouping_vars)) %>%
left_join(total_items,
by = join_by(version)) %>%
left_join(select(participants, -c(date_birth, version)),
by = join_by(id, time, code, study)) %>%
drop_na(id) %>%
mutate(
# define language profiles based on thresholds
lp = case_when(
doe_catalan >= bilingual_threshold ~ "Monolingual",
doe_spanish >= bilingual_threshold ~ "Monolingual",
doe_others > other_threshold ~ "Other",
.default = "Bilingual"
),
# define language dominance
dominance = case_when(
doe_catalan > doe_spanish ~ "Catalan",
doe_spanish > doe_catalan ~ "Spanish",
doe_catalan == doe_spanish ~ sample(c("Catalan", "Spanish"), 1)),
age = diff_in_time(date_finished, date_birth, "months"),
duration = diff_in_time(date_finished, date_started, "days")
) %>%
# compute participant's progress through the questionnaire
mutate(progress = complete_items / total_items,
completed = progress >= 0.95) %>%
# select relevant columns and reorder them
select(id, one_of(vars)) %>%
arrange(desc(date_finished))

return(logs)
}
63 changes: 22 additions & 41 deletions R/responses.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,52 +51,33 @@ bvq_responses <- function(participants = NULL,
longitudinal = "all",
...) {

bvq_connect() # get credentials to Google and formr

# get participant information
if (is.null(participants)) participants <- bvq_participants()

# retrieve data from formr
formr2 <- import_formr2() # formr2
formr_lockdown <- import_formr_lockdown() # formr-lockdown
formr_short <- import_formr_short() # formr-lockdown

# merge data
suppressMessages({

cbc_studies <- c("CBC", "Signs", "Negation", "Inhibition")

responses <- list(formr1, formr2, formr_short, formr_lockdown) %>%
bind_rows() %>%
distinct(id, code, item, .keep_all = TRUE) %>%
mutate(across(c(starts_with("date_"), time_stamp), as_date),
date_finished = coalesce(time_stamp, date_finished),
version = case_when(
study %in% "DevLex" ~ "DevLex",
study %in% cbc_studies ~ "CBC",
.default = version
),
time = ifelse(is.na(time), 1, time),
version = trimws(version, whitespace = "[\\h\\v]"),
) %>%
fix_item() %>%
fix_doe() %>%
mutate(across(starts_with("doe_"),
function(x) x / 100)) %>%
fix_sex() %>%
mutate(study = ifelse(is.na(study), "BiLexicon", study)) %>%
fix_id_exp() %>%
drop_na(date_finished) %>%
get_longitudinal(longitudinal = longitudinal) %>%
arrange(desc(date_finished)) %>%
select(id, time, code, study,
version, randomisation,
starts_with("date_"),
item, response, sex,
starts_with("doe_"),
starts_with("edu_"))
})

responses <- list(formr1, formr2, formr_short, formr_lockdown) %>%
bind_rows() %>%
distinct(id, code, item, .keep_all = TRUE) %>%
mutate(across(c(starts_with("date_"), time_stamp), as_date),
date_finished = coalesce(time_stamp, date_finished),
time = ifelse(is.na(time), 1, time),
version = trimws(version, whitespace = "[\\h\\v]")) %>%
fix_item() %>%
fix_doe() %>%
mutate(across(starts_with("doe_"), function(x) x / 100)) %>%
fix_sex() %>%
mutate(study = ifelse(is.na(study), "BiLexicon", study)) %>%
fix_id_exp() %>%
drop_na(date_finished) %>%
get_longitudinal(longitudinal = longitudinal) %>%
arrange(desc(date_finished)) %>%
select(id, time, code, study,
version, randomisation,
starts_with("date_"),
item, response, sex,
starts_with("doe_"),
starts_with("edu_"))

return(responses)
}
59 changes: 20 additions & 39 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,19 @@ get_doe <- function(...) {
#'
#' @param x Vector of `code` whose values should be fixed.
#' @author Gonzalo Garcia-Castro
#'
#' @examples
#' code_vctr <- c("BL0123", "bl0123", "Bl0123", "BLBL0123",
#' "bi0123", "blo123", "B0123", "BI0123",
#' " BL0123")
#'
#' fix_code(code_vctr)
fix_code <- function(x) {
x <- toupper(trimws(x))
x <- gsub(".*BL", "", x)
x <- gsub("O", "0", x)
x <- gsub("I", "L", x)
x <- ifelse(!grepl("BL", x), paste0("BL", x), x)
x <- gsub("[^\\d]+", "", x, perl=TRUE)
x <- paste0("BL", x)

return(x)
}
Expand Down Expand Up @@ -163,24 +170,11 @@ fix_sex <- function(x) { # nocov start
"Female",
x$sex)

x$sex <- ifelse(x$id_bvq %in% c("bilexicon_1447"), "Male", x$sex)
x$sex <- ifelse(x$id_bvq=="bilexicon_1447", "Male", x$sex)

return(x)
} # nocov end

#' Fix postcode
#'
#' @param x Vector of `postcode` whose values should be fixed
#' @author Gonzalo Garcia-Castro
fix_postcode <- function(x) {

pcd <- x$postcode
pcd <- ifelse(nchar(pcd) < 5, paste0("0", pcd), pcd)
pcd <- ifelse(nchar(pcd) < 5, NA_character_, pcd)
x$postcode <- pcd

return(x)
}

#' Fix item
#'
Expand Down Expand Up @@ -232,49 +226,36 @@ fix_id_exp <- function(x) { # nocov start
#' * `"first"` returns the first response of each participant (participants with only one appearance are
#' included).
#' * `"last"` returns the last response from each participant (participants with only one response are included).
#' @importFrom dplyr group_by
#' @importFrom dplyr distinct
#' @importFrom dplyr n
#' @importFrom dplyr filter
#' @importFrom dplyr ungroup
#' @returns A subset of the data frame `x` with only the selected cases,
#' according to `longitudinal`.
#' @author Gonzalo Garcia-Castro
#' @examples
#' id <- c(1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 5, 6, 7, 7, 8, 9, 10, 10)
#' sums <- rle(sort(id))[["lengths"]]
#' dat <- data.frame(id, time = unlist(sapply(sums, function(x) seq(1, x))))
#'
#' (dat)
#'
#' get_longitudinal(dat, "first")
#' get_longitudinal(dat, "only")
get_longitudinal <- function(x, longitudinal = "all") {

longitudinal_opts <- c("all", "no", "first", "last", "only")

if (!(longitudinal %in% longitudinal_opts) && interactive()) {
cli_abort(paste0("longitudinal must be one of: ",
paste0(longitudinal_opts, collapse = ", ")))
}

repeated <- distinct(x, id, time) %>%
group_by(id) %>%
filter(n() > 1) %>%
ungroup()

if (longitudinal == "no") x <- x[!(x$id %in% repeated$id), ]

if (longitudinal == "first") {
x <- group_by(x, id) %>%
filter(time == min(time, na.rm = TRUE)) %>%
ungroup()
long_colapsed <- paste0(longitudinal_opts, collapse = ", ")
cli_abort(paste0("longitudinal must be one of: ", long_colapsed))
}

if (longitudinal == "last") {
x <- group_by(x, id) %>%
filter(time == max(time, na.rm = TRUE)) %>%
ungroup()
}
repeated <- filter(distinct(x, id, time), n() > 1, .by = id)

if (longitudinal == "only") x <- x[x$id %in% repeated$id, ]
if (longitudinal == "no") x <- filter(x, !(id %in% repeated$id))
if (longitudinal == "first") x <- filter(x, time==min(time), .by = id)
if (longitudinal == "last") x <- filter(x, time==max(time), .by = id)
if (longitudinal == "only") x <- filter(x, id %in% repeated$id)

return(x)
}
Expand Down
7 changes: 3 additions & 4 deletions R/vocabulary.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@
#' `"prop"`. If `"prop"` (default), vocabulary metrics are calculated as
#' proportions. If `"count"`, vocabulary metrics are reported as counts
#' (number of words).
#' @param ... Extra arguments that will be passed to the `bvq_responses`.
#' function.
#' @param ... Extra arguments that will be passed to [bvq::bvq_responses()]`.
#' @returns A dataset (actually, a [tibble::tibble] with each participant's
#' comprehensive and/or vocabulary size in each language. This data frame
#' contains the following variables:
Expand Down Expand Up @@ -170,8 +169,8 @@ bvq_vocabulary <- function(participants,
by = join_by(id, time, type, !!!.by)) %>%
mutate(across(matches("concept|te"),
function(x) ifelse(is.na(x), as.integer(0), x))) %>%
select(any_of(c("id", "time", "age", "type", .by)), matches(.scale)) %>%
select(-ends_with(which_col_not))
select(any_of(c("id", "time", "age", "type", .by)),
matches(.scale), (-ends_with(which_col_not)))

return(vocabulary)

Expand Down
14 changes: 12 additions & 2 deletions tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ test_that("get_doe works", {
doe_other = 1-get_doe(matches("cat|spa")),
doe_cat = get_doe(doe_cat_1, doe_cat_2),
doe_spa = get_doe(matches("spa")))

expect_true(all(vapply(y, class, "character")=="numeric"))
expect_true(all(sapply(y, \(x) (x>=0) & (x <= 1))))
expect_equal(y$doe_other, 1-rowSums(x))
expect_equal(y$doe_cat, rowSums(x[, c("doe_cat_1", "doe_cat_2")]))
expect_equal(y$doe_spa, rowSums(x[, c("doe_spa_1", "doe_spa_2")]))
})

test_that("get_longitudinal works correctly", {
test_that("get_longitudinal works", {
id <- c(1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 5, 6, 7, 7, 8, 9, 10, 10)
sums <- rle(sort(id))[["lengths"]]
dat <- data.frame(id, time = unlist(sapply(sums, function(x) seq(1, x))))
Expand All @@ -62,3 +62,13 @@ test_that("get_longitudinal works correctly", {
expect_identical(get_longitudinal(dat, "last")$id, unique(dat$id))

})


test_that("fix_code works", {
code_vctr <- c("BL0123", "bl0123", "Bl0123", "BLBL0123",
"bi0123", "blo123", "B0123", "BI0123",
" BL0123")

expect_equal(unique(fix_code(code_vctr)), "BL0123")
})

0 comments on commit be17390

Please sign in to comment.