Merge pull request #9 from gongcastro/simple-id

Simple
gongcastro · Apr 25, 2023 · 7ab2278 · 7ab2278
2 parents 351450a + 1a5a327
commit 7ab2278
Show file tree

Hide file tree

Showing 26 changed files with 80 additions and 72 deletions.
diff --git a/.Rprofile b/.Rprofile
@@ -34,4 +34,25 @@ if(interactive()) {
         )
     )      
     cli::cli_text("")
+
+    load_all()
+
+    make_fixtures <- function() {
+        participants <- bvq_participants()
+        responses <- bvq_responses(participants = participants)
+        logs <- bvq_logs(participants, responses)
+        vocabulary <- bvq_vocabulary(participants, responses)
+        norms <- bvq_norms(participants, responses)
+
+        obj_lst <- lst(participants, responses, logs, vocabulary, norms, pool)
+
+        invisible({
+            map2(obj_lst, names(obj_lst),
+                 function(x, y = names(x)) {
+                     paths <- paste0(testthat::test_path("fixtures", y), ".rds")
+                     saveRDS(x, paths)
+                 })
+        })
+    } 
+
 }
diff --git a/.github/SUPPORT.md b/.github/SUPPORT.md
@@ -1,6 +1,6 @@
-# Getting help with bvqdev
+# Getting help with bvq
 
-Thanks for using bvqdev!
+Thanks for using bvq!
 Before filing an issue, there are a few places to explore and pieces to put together to make the process as smooth as possible.
 
 ## Make a reprex
@@ -16,12 +16,12 @@ Armed with your reprex, the next step is to figure out [where to ask](https://ww
 
 *   If it's a question: start with [community.rstudio.com](https://community.rstudio.com/), and/or StackOverflow. There are more people there to answer questions.  
 
-*   If it's a bug: you're in the right place, [file an issue](https://github.com/gongcastro/bvqdev/issues/new).  
+*   If it's a bug: you're in the right place, [file an issue](https://github.com/gongcastro/bvq/issues/new).  
 
 *   If you're not sure: let the community help you figure it out! 
     If your problem _is_ a bug or a feature request, you can easily return here and report it. 
 
-Before opening a new issue, be sure to [search issues and pull requests](https://github.com/gongcastro/bvqdev/issues) to make sure the bug hasn't been reported and/or already fixed in the development version. 
+Before opening a new issue, be sure to [search issues and pull requests](https://github.com/gongcastro/bvq/issues) to make sure the bug hasn't been reported and/or already fixed in the development version. 
 By default, the search will be pre-populated with `is:issue is:open`. 
 You can [edit the qualifiers](https://help.github.com/articles/searching-issues-and-pull-requests/)  (e.g. `is:pr`, `is:closed`) as needed. 
 For example, you'd simply remove `is:open` to search _all_ issues in the repo, open or closed.

diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml
@@ -2,9 +2,9 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: [main, master, test]
+    branches: [main, master, test, simple-id]
   pull_request:
-    branches: [main, master, test]
+    branches: [main, master, test, simple-id]
 
 name: R-CMD-check
 

diff --git a/LICENSE.md b/LICENSE.md
@@ -1,6 +1,6 @@
 # MIT License
 
-Copyright (c) 2023 bvqdev authors
+Copyright (c) 2023 bvq authors
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/R/globals.R b/R/globals.R
@@ -1,4 +1,4 @@
-globalVariables(unique(c(
+utils::globalVariables(unique(c(
     # bvq_logs: 
     "code",
     "date_started",

diff --git a/R/import.R b/R/import.R
@@ -127,15 +127,13 @@ process_survey <- function(raw, participants_tmp, survey_name)
 #' 
 #' @md
 import_formr_lockdown <- function(
-
         surveys = c("bilexicon_lockdown_01_log",
                     "bilexicon_lockdown_02_welcome",
                     "bilexicon_lockdown_03_consent",
                     "bilexicon_lockdown_04_demo",
                     "bilexicon_lockdown_05_language",
                     "bilexicon_lockdown_06_words_catalan",
-                    "bilexicon_lockdown_06_words_spanish"),
-        ...)
+                    "bilexicon_lockdown_06_words_spanish"), ...)
 {
 
     participants_tmp <- get("participants", parent.frame()) %>% 

diff --git a/R/logs.R b/R/logs.R
@@ -29,9 +29,7 @@
 #'   participant-level information. Each row corresponds to a questionnaire
 #'   response and each column represents a variable. The output includes the
 #'   following variables:
-#' * id: a character string indicating a participant's identifier. This value is always the same for each participant, so that different responses from the same participant share the same `id`.
-#' * id_exp: a character string indicating a participant's identifier in the context of the particular study in which the participant was tested and invited to fill in the questionnaire. This value is always the same for each participant within the same study, so that different responses from the same participant in the same study share `id_exp` The same participant may have different `id_exp` across different studies.
-#' * id_db: a character string with five digits indicating a participant's identifier in the database from the [Laboratori de Recerca en Infància](https://www.upf.edu/web/cbclab) at Universitat Pompeu Fabra. This value is always the same for each participant, so that different responses from the same participant share the same `id_db`.
+#' * id: a character string with five digits indicating a participant's identifier in the database from the [Laboratori de Recerca en Infància](https://www.upf.edu/web/cbclab) at Universitat Pompeu Fabra. This value is always the same for each participant, so that different responses from the same participant share the same `id`.
 #' * code: a character string identifying a single response to the questionnaire. This value is always unique for each response to the questionnaire, even for responses from the same participant.
 #' * time: a numeric value indicating how many times a given participant has been sent the questionnaire, regardless of whether they completed it or not.
 #' * study: a character string indicating the study in which the participant was invited to fill in the questionnaire. Frequently, participants that filled in the questionnaire came to the lab to participant in a study, and were then invited to fill in the questionnaire later. This value indicates what study each participant was tested in before being sent the questionnaire.
@@ -49,7 +47,6 @@
 #' * doe_spanish: a numeric value ranging from 0 to 1 indicating participants' daily exposure to Spanish, as estimated by parents/caregivers. This value aggregates participants' exposure to any variant of Spanish (e.g., European and American Spanish).
 #' * doe_catalan: a numeric value ranging from 0 to 1 indicating participants' daily exposure to Catalan, as estimated by parents/caregivers. This value aggregates participants' exposure to any variant of Catalan (e.g., Catalan from Mallorca or Barcelona).
 #' * doe_others: a numeric value ranging from 0 to 1 indicating participants' daily exposure to languages other than Spanish or Catalan, as estimated by parents/caregivers, aggregating participants' exposure to all those other languages (e.g., Norwegian, Arab, Swahili).
-#' * progress: a numeric value ranging from 0 to 1 indicating participants' progress filling the questionnaire. A value of `0` indicates that the participant has not filled in any item yet. A value of `0.5` indicates that the participant is halfway through the questionnaire. A value of `1` indicates that the participant has completed all items.
 #' * completed: a logical value that returns `TRUE` if `progress` is 1, and `FALSE` otherwise.
 #' 
 #' @author Gonzalo Garcia-Castro
@@ -68,7 +65,7 @@ bvq_logs <- function(participants,
             summarise(total_items = sum(n), 
                       .by = version)
 
-        grouping_vars <- c("id_db", "date_birth", "time",
+        grouping_vars <- c("id", "date_birth", "time",
                            "edu_parent1", "edu_parent2",
                            "date_birth", "date_started", "date_finished", 
                            "doe_spanish", "doe_catalan",
@@ -79,8 +76,7 @@ bvq_logs <- function(participants,
                   "date_birth", "date_started", "date_finished", 
                   "duration", "dominance", "lp", 
                   "edu_parent1", "edu_parent2",
-                  "doe_spanish", "doe_catalan", "doe_others",
-                  "progress", "completed")
+                  "doe_spanish", "doe_catalan", "doe_others", "completed")
 
         # generate logs
         logs <- responses %>%
@@ -90,7 +86,7 @@ bvq_logs <- function(participants,
             left_join(total_items,
                       by = join_by(version)) %>%
             left_join(select(participants, -c(date_birth, version)),
-                      by = join_by(id_db, time, code, study)) %>%
+                      by = join_by(id, time, code, study)) %>%
             drop_na(id) %>%
             mutate(
                 # define language profiles based on thresholds
@@ -111,7 +107,7 @@ bvq_logs <- function(participants,
                    completed = progress >= 0.95) %>%
             ungroup() %>%
             # select relevant columns and reorder them
-            select(starts_with("id"), one_of(vars), ) %>%
+            select(id, one_of(vars)) %>%
             arrange(desc(date_finished))
     })
 

diff --git a/R/participants.R b/R/participants.R
@@ -17,9 +17,9 @@
 #'   that have participated or are candidates to participate in any of the
 #'   versions of BVQ Each row corresponds to a questionnaire response and each
 #'   column represents a variable. The output includes the following variables:
-#' * id: a character string indicating a participant's identifier. This value is always the same for each participant, so that different responses from the same participant share the same `id`.
+#' * id: a character string with five digits indicating a participant's identifier in the database from the [Laboratori de Recerca en Infància (https://www.upf.edu/web/cbclab) at Universitat Pompeu Fabra. This value is always the same for each participant, so that different responses from the same participant share the same `id`.
+#' * id_bvq: a character string indicating a participant's identifier. This value is always the same for each participant, so that different responses from the same participant share the same `id_bvq`.
 #' * id_exp: a character string indicating a participant's identifier in the context of the particular study in which the participant was tested and invited to fill in the questionnaire. This value is always the same for each participant within the same study, so that different responses from the same participant in the same study share `id_exp`. The same participant may have different `id_exp` across different studies.
-#' * id_db: a character string with five digits indicating a participant's identifier in the database from the [Laboratori de Recerca en Infància (https://www.upf.edu/web/cbclab) at Universitat Pompeu Fabra. This value is always the same for each participant, so that different responses from the same participant share the same `id_db`.
 #' * code: a character string identifying a single response to the questionnaire. This value is always unique for each response to the questionnaire, even for responses from the same participant.
 #' * time: a numeric value indicating how many times a given participant has been sent the questionnaire, regardless of whether they completed it or not.
 #' * date_birth: a date value (see lubridate package) in `yyyy/mm/dd` format indicating participants birth date.
@@ -43,7 +43,6 @@
 #'   the participant was sent the questionnaire, and has been already reminded
 #'   of it), or `stop` (participant has not completed the questionnaire after
 #'   two weeks since they were sent the questionnaire).
-#' * comments: a character string indicating useful information for database management.
 #'
 #' @author Gonzalo Garcia-Castro
 #' @md
@@ -55,10 +54,13 @@ bvq_participants <- function(...) {
         ss <- "164DMKLRO0Xju0gdfkCS3evAq9ihTgEgFiuJopmqt7mo"
         participants <- read_sheet(ss, sheet = "Participants") %>% 
             drop_na(code) %>%
-            mutate(across(c(date_birth, date_test, date_sent), as_date)) %>% 
-            select(-link) %>%
+            mutate(across(c(date_birth, date_test, date_sent), as_date),
+                   across(include, as.logical)) %>% 
+            filter(include) %>% 
+            select(-c(link, comments, include)) %>%
             arrange(desc(as.numeric(gsub("BL", "", code))))
     })
+
     # make sure no columns are lists (probably due to inconsistent cell types)
     if (any(map_lgl(participants, is.list))) {
         col <- names(which(map_lgl(participants, is.list)))

diff --git a/R/responses.R b/R/responses.R
@@ -28,9 +28,7 @@
 #' @returns A data frame (actually, a [tibble::tibble] containing participant's
 #'   responses to each item, along with some session-specific metadata. The
 #'   output includes the following variables:
-#' * id: a character string indicating a participant's identifier. This value is always the same for each participant, so that different responses from the same participant share the same `id`.
-#' * id_exp: a character string indicating a participant's identifier in the context of the particular study in which the participant was tested and invited to fill in the questionnaire. This value is always the same for each participant within the same study, so that different responses from the same participant in the same study share `id_exp`. The same participant may have different `id_exp` across different studies.
-#' * id_db: a character string with five digits indicating a participant's identifier in the database from the [Laboratori de Recerca en Infància](https://www.upf.edu/web/cbclab) at Universitat Pompeu Fabra. This value is always the same for each participant, so that different responses from the same participant share the same `id_db`.
+#' * id: a character string with five digits indicating a participant's identifier in the database from the [Laboratori de Recerca en Infància](https://www.upf.edu/web/cbclab) at Universitat Pompeu Fabra. This value is always the same for each participant, so that different responses from the same participant share the same `id`.
 #' * time: a numeric value indicating how many times a given participant has been sent the questionnaire, regardless of whether they completed it or not.
 #' * code: a character string identifying a single response to the questionnaire. This value is always unique for each response to the questionnaire, even for responses from the same participant.
 #' * study: a character string indicating the study in which the participant was invited to fill in the questionnaire. Frequently, participants that filled in the questionnaire came to the lab to participant in a study, and were then invited to fill in the questionnaire later. This value indicates what study each participant was tested in before being sent the questionnaire.
@@ -66,14 +64,17 @@ bvq_responses <- function(participants = NULL,
 
     # merge data
     suppressMessages({
+
+        cbc_studies <- c("CBC", "Signs", "Negation", "Inhibition")
+
         responses <- list(formr1, formr2, formr_short, formr_lockdown) %>%
             bind_rows() %>%
             distinct(id, code, item, .keep_all = TRUE) %>%  
             mutate(across(c(starts_with("date_"), time_stamp), as_date),
                    date_finished = coalesce(time_stamp, date_finished),
                    version = case_when(
                        study %in% "DevLex" ~ "DevLex",
-                       study %in% c("CBC", "Signs", "Negation", "Inhibition") ~ "CBC",
+                       study %in% cbc_studies ~ "CBC",
                        .default = version
                    ),
                    time = ifelse(is.na(time), 1, time),
@@ -88,7 +89,7 @@ bvq_responses <- function(participants = NULL,
             drop_na(date_finished) %>%
             get_longitudinal(longitudinal = longitudinal) %>%
             arrange(desc(date_finished)) %>% 
-            select(starts_with("id"), time, code, study,
+            select(id, time, code, study,
                    version, randomisation,
                    starts_with("date_"),
                    item, response, sex, starts_with("doe_"),

diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/R/utils.R b/R/utils.R
@@ -79,14 +79,14 @@ fix_code <- function(x) {
               "BLBL" = "BL")
         )
     x <- ifelse(!grepl("BL", x), paste0("BL", x), x)
+
     return(x)
 }
 
 
 #' Fix raw codes
 #'
-#' @param x Vector of `code` whose values should be fixed, based on
-#'   `session`ç
+#' @param x Vector of `code` whose values should be fixed, based on `session`.
 #' @author Gonzalo Garcia-Castro
 fix_code_raw <- function(x) {
     x[x$session == "-OYU0wA9FPQ9-ugKUpyrz1A0usJZIuM5hb-cbV2yMgGBal5S9q3ReRgphBDDxFEY", "code"] <- "BL1674"
@@ -111,16 +111,16 @@ fix_doe <- function(x) {
     x %>%
         mutate(
             doe_catalan = case_when(
-                id_db == "54469" & time == 2 ~ 0,
-                id_db == "57157" & time == 1 ~ 80,
-                id_db == "57046" & time == 1 ~ 50,
+                id == "54469" & time == 2 ~ 0,
+                id == "57157" & time == 1 ~ 80,
+                id == "57046" & time == 1 ~ 50,
                 code == "BL1582" ~ 30,
                 code == "BL1295" ~ 10,
                 code == "BL1252" ~ 90,
                 .default = doe_catalan
             ),
             doe_spanish = case_when(
-                id_db == "57046" & time == 1 ~ 50,
+                id == "57046" & time == 1 ~ 50,
                 code == "BL896" ~ 75,
                 .default = doe_spanish
             ),
@@ -141,14 +141,14 @@ fix_doe <- function(x) {
 #' @author Gonzalo Garcia-Castro
 fix_sex <- function(x) {
 
-    x$sex <- ifelse(x$id %in% c("bilexicon_1097", 
-                                "bilexicon_1441", 
-                                "bilexicon_1124",
-                                "bilexicon_1448"),
+    x$sex <- ifelse(x$id_bvq %in% c("bilexicon_1097", 
+                                    "bilexicon_1441", 
+                                    "bilexicon_1124",
+                                    "bilexicon_1448"),
                     "Female",
                     x$sex)
 
-    x$sex <- ifelse(x$id %in% c("bilexicon_1447"), "Male", x$sex)
+    x$sex <- ifelse(x$id_bvq %in% c("bilexicon_1447"), "Male", x$sex)
 
     return(x)
 }
@@ -158,10 +158,12 @@ fix_sex <- function(x) {
 #' @param x Vector of `postcode` whose values should be fixed
 #' @author Gonzalo Garcia-Castro
 fix_postcode <- function(x) {
+
     pcd <- x$postcode
     pcd <- ifelse(nchar(pcd) < 5, paste0("0", pcd), pcd)
     pcd <- ifelse(nchar(pcd) < 5, NA_character_, pcd)
     x$postcode <- pcd
+
     return(x)
 }
 
@@ -269,3 +271,5 @@ get_longitudinal <- function(x, longitudinal = "all") {
 
     return(x)
 }
+
+