diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index 73c6bd1..0000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,126 +0,0 @@ -# Contributor Covenant Code of Conduct - -## Our Pledge - -We as members, contributors, and leaders pledge to make participation in our -community a harassment-free experience for everyone, regardless of age, body -size, visible or invisible disability, ethnicity, sex characteristics, gender -identity and expression, level of experience, education, socio-economic status, -nationality, personal appearance, race, caste, color, religion, or sexual -identity and orientation. - -We pledge to act and interact in ways that contribute to an open, welcoming, -diverse, inclusive, and healthy community. - -## Our Standards - -Examples of behavior that contributes to a positive environment for our -community include: - -* Demonstrating empathy and kindness toward other people -* Being respectful of differing opinions, viewpoints, and experiences -* Giving and gracefully accepting constructive feedback -* Accepting responsibility and apologizing to those affected by our mistakes, - and learning from the experience -* Focusing on what is best not just for us as individuals, but for the overall - community - -Examples of unacceptable behavior include: - -* The use of sexualized language or imagery, and sexual attention or advances of - any kind -* Trolling, insulting or derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or email address, - without their explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting - -## Enforcement Responsibilities - -Community leaders are responsible for clarifying and enforcing our standards of -acceptable behavior and will take appropriate and fair corrective action in -response to any behavior that they deem inappropriate, threatening, offensive, -or harmful. - -Community leaders have the right and responsibility to remove, edit, or reject -comments, commits, code, wiki edits, issues, and other contributions that are -not aligned to this Code of Conduct, and will communicate reasons for moderation -decisions when appropriate. - -## Scope - -This Code of Conduct applies within all community spaces, and also applies when -an individual is officially representing the community in public spaces. -Examples of representing our community include using an official e-mail address, -posting via an official social media account, or acting as an appointed -representative at an online or offline event. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported to the community leaders responsible for enforcement at gonzalo.garciadecastro@upf.edu. -All complaints will be reviewed and investigated promptly and fairly. - -All community leaders are obligated to respect the privacy and security of the -reporter of any incident. - -## Enforcement Guidelines - -Community leaders will follow these Community Impact Guidelines in determining -the consequences for any action they deem in violation of this Code of Conduct: - -### 1. Correction - -**Community Impact**: Use of inappropriate language or other behavior deemed -unprofessional or unwelcome in the community. - -**Consequence**: A private, written warning from community leaders, providing -clarity around the nature of the violation and an explanation of why the -behavior was inappropriate. A public apology may be requested. - -### 2. Warning - -**Community Impact**: A violation through a single incident or series of -actions. - -**Consequence**: A warning with consequences for continued behavior. No -interaction with the people involved, including unsolicited interaction with -those enforcing the Code of Conduct, for a specified period of time. This -includes avoiding interactions in community spaces as well as external channels -like social media. Violating these terms may lead to a temporary or permanent -ban. - -### 3. Temporary Ban - -**Community Impact**: A serious violation of community standards, including -sustained inappropriate behavior. - -**Consequence**: A temporary ban from any sort of interaction or public -communication with the community for a specified period of time. No public or -private interaction with the people involved, including unsolicited interaction -with those enforcing the Code of Conduct, is allowed during this period. -Violating these terms may lead to a permanent ban. - -### 4. Permanent Ban - -**Community Impact**: Demonstrating a pattern of violation of community -standards, including sustained inappropriate behavior, harassment of an -individual, or aggression toward or disparagement of classes of individuals. - -**Consequence**: A permanent ban from any sort of public interaction within the -community. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], -version 2.1, available at -. - -Community Impact Guidelines were inspired by -[Mozilla's code of conduct enforcement ladder][https://github.com/mozilla/inclusion]. - -For answers to common questions about this code of conduct, see the FAQ at -. Translations are available at . - -[homepage]: https://www.contributor-covenant.org diff --git a/DESCRIPTION b/DESCRIPTION index 09f4077..694026c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,60 +1,60 @@ -Package: bvq -Title: Barcelona Vocabulary Questionnaire Database and Helper Functions -Version: 0.3.2 -Authors@R: c( - person(given = "Gonzalo", family = "Garcia-Castro", - email = "gongarciacastro@gmail.com", - role = c("cre", "aut"), - comment = c(ORCID = "0000-0002-8553-4209")), - person(given = "Daniela S. ", family = "Ávila-Varela", - email = "avila.varela.daniela@gmail.com", - role = "aut", - comment = c(ORCID = "0000-0002-3518-8117")), - person(given = "Nuria", - family = "Sebastian-Galles", - email = "nuria.sebastian@upf.edu", - role = "ctb", - comment = c(ORCID = "0000-0001-6938-2498"))) -Maintainer: Gonzalo Garcia-Castro -Description: Download, clean, and process the Barcelona Vocabulary - Questionnaire (BVQ) data. BVQ is a vocabulary inventory developed for - assesing the vocabulary of Catalan-Spanish bilinguals infants from the - Metropolitan Area of Barcelona (Spain). This - package includes functions to download the data from formr servers, - and return the processed data in multiple formats. -License: MIT + file LICENSE -URL: https://gongcastro.github.io/bvq/, - https://github.com/gongcastro/bvq/ -BugReports: https://github.com/gongcastro/bvq/issues -Depends: - R (>= 3.5.0), -Imports: - cli (>= 3.6.1), - dplyr (>= 1.1.0), - formr (>= 0.9.1), - googlesheets4 (>= 1.0.0), - janitor (>= 2.2.0), - lifecycle (>= 1.0.3), - lubridate (>= 1.8.0), - magrittr (>= 2.0.3), - rlang (>= 1.1.1), - tibble (>= 3.2.1), - tidyr (>= 1.2.0) -Suggests: - knitr, - readxl, - rmarkdown, - roxygen2, - testthat (>= 3.0.0), - ggplot2 (>= 3.4.2), - withr -VignetteBuilder: - knitr -Remotes: - github::rubenarslan/formr -Config/testthat/edition: 3 -Encoding: UTF-8 -Language: en-US -LazyData: true -Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +Package: bvq +Title: Barcelona Vocabulary Questionnaire Database and Helper Functions +Version: 0.3.2 +Authors@R: c( + person(given = "Gonzalo", family = "Garcia-Castro", + email = "gongarciacastro@gmail.com", + role = c("cre", "aut"), + comment = c(ORCID = "0000-0002-8553-4209")), + person(given = "Daniela S. ", family = "Ávila-Varela", + email = "avila.varela.daniela@gmail.com", + role = "aut", + comment = c(ORCID = "0000-0002-3518-8117")), + person(given = "Nuria", + family = "Sebastian-Galles", + email = "nuria.sebastian@upf.edu", + role = "ctb", + comment = c(ORCID = "0000-0001-6938-2498"))) +Maintainer: Gonzalo Garcia-Castro +Description: Download, clean, and process the Barcelona Vocabulary + Questionnaire (BVQ) data. BVQ is a vocabulary inventory developed for + assesing the vocabulary of Catalan-Spanish bilinguals infants from the + Metropolitan Area of Barcelona (Spain). This + package includes functions to download the data from formr servers, + and return the processed data in multiple formats. +License: MIT + file LICENSE +URL: https://gongcastro.github.io/bvq/, + https://github.com/gongcastro/bvq/ +BugReports: https://github.com/gongcastro/bvq/issues +Depends: + R (>= 3.5.0), +Imports: + cli (>= 3.6.1), + dplyr (>= 1.1.0), + formr (>= 0.9.1), + googlesheets4 (>= 1.0.0), + janitor (>= 2.2.0), + lifecycle (>= 1.0.3), + lubridate (>= 1.8.0), + magrittr (>= 2.0.3), + rlang (>= 1.1.1), + tibble (>= 3.2.1), + tidyr (>= 1.2.0) +Suggests: + knitr, + readxl, + rmarkdown, + roxygen2, + testthat (>= 3.0.0), + ggplot2 (>= 3.4.2), + withr +VignetteBuilder: + knitr +Remotes: + github::rubenarslan/formr +Config/testthat/edition: 3 +Encoding: UTF-8 +Language: en-US +LazyData: true +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.2.3 diff --git a/NAMESPACE b/NAMESPACE index e5e5258..4525107 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,12 +13,9 @@ export(bvq_responses) export(bvq_vocabulary) export(enquo) export(enquos) -export(flatten_xsampa) export(get_doe) export(get_longitudinal) export(prop_adj) -export(syllabify_xsampa) -export(syllable_str_xsampa) import(dplyr) import(rlang) importFrom(cli,cli_abort) @@ -54,7 +51,6 @@ importFrom(lubridate,today) importFrom(magrittr,"%>%") importFrom(rlang,":=") importFrom(rlang,.data) -importFrom(rlang,.env) importFrom(rlang,as_label) importFrom(rlang,as_name) importFrom(rlang,enquo) diff --git a/NEWS.md b/NEWS.md index 531e778..7b37549 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# bvq 0.4.0 + +* Switch most functions to base R. I made this decision in order to learn R base a bit better. Performance differences are negligible, and the user interface remains the same. +* Phonology functions and datasets have been removed, and will be included in a different package. +* `bvq_vocabulary()` now has a better naming system. + # bvq 0.3.2 * Refactor `bvq_vocabulary()` and `bvq_norms()` to use rlang and the dynamic dots (`...`) (#20) operator diff --git a/R/connect.R b/R/connect.R index d1143eb..18afdb8 100644 --- a/R/connect.R +++ b/R/connect.R @@ -25,60 +25,62 @@ #' bvq_connect <- function(google_email = NULL, password = NULL) { - formr_email <- "gonzalo.garciadecastro@upf.edu" - - # ask for email in console is everything is NULL - if (is.null(google_email)) google_email <- formr_email - - if (is.null(password)) { - password <- Sys.getenv("FORMR_PWD", unset = NA) - if (is.na(password)) { - cli_abort("Please, provide a password") + formr_email <- "gonzalo.garciadecastro@upf.edu" + + # ask for email in console is everything is NULL + if (is.null(google_email)) google_email <- formr_email + + if (is.null(password)) { + password <- Sys.getenv("FORMR_PWD", unset = NA) + if (is.na(password)) { + cli_abort("Please, provide a password") + } } - } - - # if key exists, use it to log in - tryCatch( - suppressWarnings( - formr_connect( - email = formr_email, - password = password, - host = "https://formr.org/" - ) - ), - error = function(e) { - cli_abort( - strwrap( - prefix = " ", - initial = "", - "Could not connect to {.url https://formr.org/}. \ + + # if key exists, use it to log in + tryCatch( + suppressWarnings( + formr::formr_connect( + email = formr_email, + password = password, + host = "https://formr.org/" + ) + ), + error = function(e) { + cli_abort( + strwrap( + prefix = " ", + initial = "", + "Could not connect to {.url https://formr.org/}. \ Please check your internet connection or \ make sure you have set the right formr password." - ) - ) - } - ) - - # check if Google credentials exists, ask for them if not - if (!gs4_has_token()) { - tryCatch( - suppressWarnings(gs4_auth( - email = google_email, - token = Sys.getenv("GOOGLE_TOKEN", unset = NA) - )), - error = function(e) { - cli_abort( - strwrap( - prefix = " ", - initial = "", - "Could not connect to Google.\ + ) + ) + } + ) + + # check if Google credentials exists, ask for them if not + if (!gs4_has_token()) { + tryCatch( + suppressWarnings({ + googlesheets4::gs4_auth( + email = google_email, + token = Sys.getenv("GOOGLE_TOKEN", unset = NA) + ) + }), + error = function(e) { + cli_abort( + strwrap( + prefix = " ", + initial = "", + "Could not connect to Google.\ Please check your internet connection or \ grant the necessary permissions." - ) + ) + ) + } ) - } - ) - } - - invisible(gs4_has_token()) + } + + invisible(googlesheets4::gs4_has_token()) } diff --git a/R/consonants.R b/R/consonants.R deleted file mode 100644 index 6ab2b7b..0000000 --- a/R/consonants.R +++ /dev/null @@ -1,28 +0,0 @@ -#' Inventory and classification of consonants in X-SAMPA format. -#' -#' A dataset containing most consonant phonemes identified by the [International -#' Phonetic Association](https://en.wikipedia.org/wiki/International_Phonetic_Association) -#' (IPA). Phonemes are classified across three dimensions: place of articulation, -#' manner of articulation, and voicing. Each phoneme is assigned a symbol in -#' X-SAMPA format. -#' @source https://en.wikipedia.org/wiki/X-SAMPA -#' @format A data frame with 65 rows and 5 variables: -#' * xsampa: phoneme symbol in [X-SAMPA](https://en.wikipedia.org/wiki/X-SAMPA) -#' format -#' * place: place of articulation (broad classification): `"Coronal"`, `"Dorsal"`, -#' `"Labial"`, or `"Pharyngeal"`. "Location along the vocal tract where its -#' production occurs. -#' * place_2: place of articulation (fine classification): `"Nasal"`, `"Plosive"`, -#' `"Fricative"`, `"Approximant"`, `"Trill"`, `"Flap"`, `"Lateral Fricative"`, -#' `"Lateral Approximant"`, or `"Lateral Flat"` -#' * manner: manner of articulation: `"Bilabial"`, `"Labio-dental"`, `"Dental"`, -#' `"Alveolar"`, `"Post-alveolar"`, `"Retroflex"`, `"Palatal"`, `"Velar"`, -#' `"Uvular"`, `"Epiglotal"`, `"Glotal"`. Configuration and interaction of the -#' articulators (speech organs such as the tongue, lips, and palate) when making -#' a speech sound. -#' * voicing: `"Voiced"`, `"Voiceless"`. "Classification of speech sounds that -#' tend to be associated with vocal cord vibration but may not actually be voiced -#' at the articulatory level. -"consonants" - - diff --git a/R/import.R b/R/import.R index 2650b01..857ea48 100644 --- a/R/import.R +++ b/R/import.R @@ -4,6 +4,7 @@ #' @importFrom cli cli_progress_step #' @importFrom cli cli_progress_update #' @importFrom cli qty +#' @importFrom formr formr_raw_results #' #' @param surveys Name of the surveys in the formr run. #' @param ... Unused. @@ -25,7 +26,7 @@ download_surveys <- function(surveys, ...) { } for (i in seq_along(surveys)) { - raw[[i]] <- formr_raw_results(surveys[i]) + raw[[i]] <- formr::formr_raw_results(surveys[i]) if (interactive()) cli_progress_update() } @@ -62,10 +63,10 @@ process_survey <- function(raw, participants_tmp, survey_name) { mutate( code = fix_code(code), survey_name = .env$survey_name, - version = ifelse(survey_name == "BL-Long-2", + version = ifelse(survey_name=="long", survey_name, paste(survey_name, - trimws(version, whitespace = "[\\h\\v]"), + trimws(tolower(version), whitespace = "[\\h\\v]"), sep = "-" ) ) @@ -87,23 +88,11 @@ process_survey <- function(raw, participants_tmp, survey_name) { filter(code %in% participants_tmp$code, !if_any(matches("created_|ended_"), is.na)) %>% mutate( - across( - c(matches("created_|ended_"), date_birth), - as_datetime - ), - across( - starts_with("language_doe"), - function(x) ifelse(is.na(x), 0, x) - ), + across(c(matches("created_|ended_"), date_birth), as.POSIXct), + across(starts_with("language_doe"), function(x) ifelse(is.na(x), 0, x)), survey_name = .env$survey_name, - date_started = get_time_stamp(ended_cat, - ended_spa, - which = "first" - ), - date_finished = get_time_stamp(ended_cat, - ended_spa, - which = "last" - ), + date_started = get_time_stamp(ended_cat, ended_spa, which = "first"), + date_finished = get_time_stamp(ended_cat, ended_spa, which = "last"), language_doe_catalan = get_doe(matches("catalan")), language_doe_spanish = get_doe(matches("spanish")), language_doe_others = 100 - rowSums(across(c( @@ -143,131 +132,53 @@ process_survey <- function(raw, participants_tmp, survey_name) { return(processed) } -#' Import lockdown data +#' Collect survey data #' -#' @import dplyr -#' @importFrom formr formr_raw_results -#' @importFrom lubridate as_datetime -#' @importFrom lubridate time_length -#' @importFrom tidyr pivot_longer -#' @importFrom janitor clean_names -#' @importFrom rlang .env #' @importFrom cli cli_alert_success #' -#' @param surveys Name of formr surveys from the `bilexicon_lockdown` run. +#' @param version Character string indicating the name fo the formr run (must be one of "lockdown", "long", or "short") +#' @param participants Participants dataset, as returned by [bvq::bvq_participants()] #' @param ... Unused. #' #' @author Gonzalo Garcia-Castro #' #' @noRd #' @keywords internal -#' +#' #' @md -import_formr_lockdown <- function(participants, - surveys = c( - "bilexicon_lockdown_01_log", - "bilexicon_lockdown_02_welcome", - "bilexicon_lockdown_03_consent", - "bilexicon_lockdown_04_demo", - "bilexicon_lockdown_05_language", - "bilexicon_lockdown_06_words_catalan", - "bilexicon_lockdown_06_words_spanish" -), ...) -{ - version <- "BL-Lockdown" - - if (missing(participants)) participants <- bvq_participants() - - participants_tmp <- select(participants, -version) +collect_survey <- function(version, participants, ...) { - # fetch responses - raw <- download_surveys(surveys) - raw[[1]] <- raw[[1]] %>% - rename(code = bl_code) %>% - mutate( - code = fix_code(na_if(code, "")), # fix codes known to be wrong - created = as_datetime(created) - ) %>% - filter(!is.na(code), !is.na(ended)) %>% - fix_code_raw() %>% # fix codes known to be wrong - filter(code %in% participants_tmp$code) %>% # remove codes not included in participants - arrange(desc(created)) %>% - distinct(code, .keep_all = TRUE) # get only last response of each code + # validate version name + survey_options <- c("long", "short", "lockdown") + if (!(version %in% survey_options)) { + cli_abort("survey must be one of {survey_options}") + } - processed <- process_survey(raw, participants_tmp, version) + # get survey names + survey_names <- c("_log", "_welcome", "_consent", "_demo", "_language", + "_words_catalan", "_words_spanish") - if (interactive()) { - n_responses <- nrow(distinct(processed, code)) - msg <- "{version} updated: {n_responses} response{?s} retrieved" - cli_alert_success(msg) + # get survey names + surveys <- paste0("bilexicon_", version, "_0", c(1:6, 6), survey_names) + if (version=="long") { + surveys <- gsub("_long", "", surveys) + surveys <- gsub("catalan", "cat", surveys) + surveys <- gsub("spanish", "spa", surveys) } - return(processed) -} - - -#' Import short -#' -#' @import dplyr -#' @importFrom formr formr_raw_results -#' @importFrom lubridate as_datetime -#' @importFrom tidyr pivot_longer -#' @importFrom janitor clean_names -#' @importFrom rlang .env -#' @importFrom cli cli_alert_success -#' -#' @param surveys Name of formr surveys from the `bilexicon_short` run -#' @param ... Unused. -#' -#' @author Gonzalo Garcia-Castro -#' -#' @noRd -#' @keywords internal -#' -#' @md -import_formr_short <- function(participants, - surveys = c( - "bilexicon_short_01_log", - "bilexicon_short_02_welcome", - "bilexicon_short_03_consent", - "bilexicon_short_04_demo", - "bilexicon_short_05_language", - "bilexicon_short_06_words_catalan", - "bilexicon_short_06_words_spanish" -), -...) -{ - version <- "BL-Short" - + # process participant info if (missing(participants)) participants <- bvq_participants() + participants_tmp <- participants[participants$version %in% version, ] + if (version=="long") { + participants_tmp <- participants_tmp[participants_tmp$randomisation=="2", ] + } + participants_tmp <- participants_tmp[, colnames(participants_tmp)!="version"] - participants_tmp <- participants %>% - filter(version %in% .env$version) %>% - select(-version) - - # fetch responses - raw <- download_surveys(surveys) - - # edit Spanish inventory - raw[[7]] <- rename_all(raw[[7]], ~ gsub("cat_", "spa_", .)) - - # edit logs dataset - raw[[1]] <- raw[[1]] %>% - # fix codes known to be wrong - mutate( - code = fix_code(na_if(code, "")), - created = as_datetime(created) - ) %>% - # remove codes not inlcuded in participants - filter(code %in% participants_tmp$code) %>% - # get only last response of each code - arrange(desc(created)) %>% - distinct(code, .keep_all = TRUE) %>% - # remove responses with no code - filter(!is.na(code), !is.na(ended)) %>% - # fix codes known to be wrong - fix_code_raw() - + # download and process survey data + raw <- download_surveys(surveys) # fetch responses + # fix Spanish dataframe colnames + colnames(raw[[7]]) <- gsub("cat_", "spa_", colnames(raw[[7]])) + raw[[1]] <- fix_logs_df(raw, participants_tmp) # fix logs dataframe processed <- process_survey(raw, participants_tmp, version) if (interactive()) { @@ -279,78 +190,35 @@ import_formr_short <- function(participants, return(processed) } -#' Import formr 2 -#' -#' @import dplyr -#' @importFrom formr formr_raw_results -#' @importFrom lubridate as_datetime -#' @importFrom tidyr pivot_longer -#' @importFrom janitor clean_names -#' @importFrom rlang .env -#' @importFrom cli cli_alert_success + +#' Fix logs dataframe #' -#' @param surveys Name of formr surveys from the bilexicon_long2 run. -#' @param ... Unused. +#' @param raw Named list with the contents of the surveys, as returned by [bvq::download_surveys()]. #' #' @author Gonzalo Garcia-Castro #' #' @noRd #' @keywords internal -#' +#' #' @md -import_formr2 <- function(participants, - surveys = c( - "bilexicon_01_log", - "bilexicon_02_welcome", - "bilexicon_03_consent", - "bilexicon_04_demo", - "bilexicon_05_language", - "bilexicon_06_words_cat", - "bilexicon_06_words_spa" -), -...) { - - version <- "BL-Long" - - if (missing(participants)) participants <- bvq_participants() +fix_logs_df <- function(raw, participants_tmp) { + + # fix logs dataframe + logs <- raw[[1]] + names(logs)[names(logs) == "bl_code"] <- "code" + # variables to correct types + logs[c("created", "ended")] <- lapply(logs[c("created", "ended")], as.POSIXct) + # remove if missing any critical variable + logs$code <- fix_code(ifelse(logs$code=="", NA_character_, logs$code)) + logs <- logs[!is.na(logs$code) & !is.na(logs$ended) & !is.na(logs$session), ] + # fix codes known to be wrong + logs <- fix_code_raw(logs) + # remove codes not included in participants + logs <- logs[logs$code %in% participants_tmp$code, ] + # get only last response of each code + logs <- logs[order(logs$created, decreasing = TRUE), , drop = FALSE] + logs <- logs[!duplicated(logs$code), , drop = FALSE] + + return(logs) - participants_tmp <- participants %>% - filter( - version %in% .env$version, - randomisation %in% "2" - ) %>% - select(-version) - - # fetch responses - raw <- download_surveys(surveys) - raw[[7]] <- rename_with( - raw[[7]], - function(x) gsub("cat_", "spa_", x), everything() - ) - raw[[1]] <- raw[[1]] %>% - # fix codes known to be wrong - mutate( - code = fix_code(na_if(code, "")), - created = as_datetime(created) - ) %>% - # remove codes not included in participants - filter(code %in% participants_tmp$code) %>% - # get only last response of each code - arrange(desc(created)) %>% - distinct(code, .keep_all = TRUE) %>% - # remove responses with no code - filter(!is.na(code), !is.na(ended)) %>% - # fix codes known to be wrong - fix_code_raw() - - # process data - processed <- process_survey(raw, participants_tmp, version) - - if (interactive()) { - n_responses <- nrow(distinct(processed, code)) - msg <- "{version} updated: {n_responses} response{?s} retrieved" - cli_alert_success(msg) - } - - return(processed) } diff --git a/R/logs.R b/R/logs.R index 6f14cc3..785e89e 100644 --- a/R/logs.R +++ b/R/logs.R @@ -49,14 +49,14 @@ #' * completed: a logical value that returns `TRUE` if `progress` is 1, and `FALSE` otherwise. #' #' @author Gonzalo Garcia-Castro -#' +#' #' @examples #' \dontrun{ #' responses <- bvq_responses() -#' +#' #' logs <- bvq_logs(responses = responses) #' } -#' +#' #' @md bvq_logs <- function(participants = NULL, responses = NULL, @@ -64,7 +64,7 @@ bvq_logs <- function(participants = NULL, other_threshold = 0.10) { if (is.null(participants)) participants <- bvq_participants() if (is.null(responses)) responses <- bvq_responses(participants) - + # get n items answered by participants (depends on the questionnaire version) total_items <- studies %>% distinct(version, language, n) %>% @@ -72,7 +72,7 @@ bvq_logs <- function(participants = NULL, total_items = sum(n), .by = version ) - + grouping_vars <- c( "id", "date_birth", "time", "edu_parent1", "edu_parent2", @@ -81,7 +81,7 @@ bvq_logs <- function(participants = NULL, "doe_others", "date_birth", "code", "study", "version" ) - + vars <- c( "code", "time", "study", "version", "age", "date_birth", "date_started", "date_finished", @@ -89,7 +89,7 @@ bvq_logs <- function(participants = NULL, "edu_parent1", "edu_parent2", "doe_spanish", "doe_catalan", "doe_others", "completed" ) - + # generate logs logs <- responses %>% # total items to fill by each participant (varies across versions) @@ -98,10 +98,12 @@ bvq_logs <- function(participants = NULL, .by = one_of(grouping_vars) ) %>% left_join(total_items, - by = join_by(version)) %>% + by = join_by(version) + ) %>% left_join(select(participants, -c(date_birth, version)), - by = join_by(id, time, code, study)) %>% - filter(!is.na(id)) %>% + by = join_by(id, time, code, study) + ) %>% + filter(!is.na(id)) %>% mutate( # define language profiles based on thresholds lp = case_when( @@ -127,6 +129,6 @@ bvq_logs <- function(participants = NULL, # select relevant columns and reorder them select(id, one_of(vars)) %>% arrange(desc(date_finished)) - + return(logs) } diff --git a/R/norms.R b/R/norms.R index 1badcd1..34a9a8e 100644 --- a/R/norms.R +++ b/R/norms.R @@ -82,27 +82,26 @@ bvq_norms <- function(participants = NULL, ..., te = NULL, item = NULL, - age = NULL) { + age = c(0, 100)) { + if (is.null(participants)) participants <- bvq_participants() if (is.null(responses)) responses <- bvq_responses(participants) # collect ... into a character vector for `any_of` dots_vctr <- as.character(match.call(expand.dots = FALSE)$`...`) - group_vars <- c( - "te", "item", "label", "age", "type", - "item_dominance", dots_vctr - ) + group_vars <- c("te", "item", "label", "age", "type", "item_dominance", dots_vctr) # retrieve participants and logs ------------------------------------------- - logs_tmp <- bvq_logs( - participants = participants, - responses = responses - ) %>% - select(id, time, dominance, any_of(group_vars)) %>% - mutate(age = floor(age)) - pool_tmp <- select(bvq::pool, language, any_of(group_vars)) + logs_tmp <- bvq_logs(participants = participants, responses = responses) + cols.keep <- colnames(logs_tmp) %in% c("id", "time", "dominance", group_vars) + logs_tmp <- logs_tmp[, cols.keep] + logs_tmp$age <- floor(logs_tmp$age) + logs_tmp <- logs_tmp[logs_tmp$age >= min(age) & logs_tmp$age <= max(age), ] + + cols.keep <- colnames(bvq::pool) %in% c("language", group_vars) + pool_tmp <- bvq::pool[, cols.keep] # check arguments ---------------------------------------------------------- @@ -126,35 +125,32 @@ bvq_norms <- function(participants = NULL, # compute norms ------------------------------------------------------------ - norms <- responses %>% - left_join(logs_tmp, by = join_by(id, time), multiple = "all") %>% - filter( - item %in% .env$item, - !is.na(response), - age >= min(.env$age), - age <= max(.env$age) - ) %>% - mutate( - understands = response > 1, - produces = response == 3 - ) %>% - select(-response) %>% - pivot_longer(c(understands, produces), - names_to = "type", - values_to = "response" - ) %>% - left_join(pool_tmp, - relationship = "many-to-many", - by = join_by(item) - ) %>% - mutate(item_dominance = ifelse(language == dominance, "L1", "L2")) %>% - summarise( - .sum = sum(response, na.rm = TRUE), - .n = n(), - .by = any_of(group_vars) - ) %>% - mutate(.prop = prop_adj(.sum, .n)) %>% - arrange(te, item, item_dominance, type, age, .sum, .n, .prop) + responses_tmp <- responses[responses$item %in% item & + !is.na(responses$response), ] + responses_tmp$understands <- responses_tmp$response > 1 + responses_tmp$produces <- responses_tmp$response > 2 + responses_tmp <- responses_tmp[, colnames(responses_tmp)!="response"] + responses_tmp <- tidyr::pivot_longer(responses_tmp, + c(understands, produces), + names_to = "type", + values_to = "response") + + + norms <- merge(responses_tmp, logs_tmp) + norms <- merge(norms, pool_tmp, all.x = TRUE) + + norms$item_dominance <- ifelse(norms$language==norms$dominance, "L1", "L2") + norms <- summarise(norms, + .sum = sum(response, na.rm = TRUE), + .n = n(), + .by = any_of(group_vars)) + norms$.prop <- prop_adj(norms$.sum, norms$.n) + norms <- norms[order(norms$te, + norms$item, + norms$type, + norms$age, + decreasing = TRUE), , drop = FALSE] + norms <- tibble::as_tibble(norms) return(norms) } diff --git a/R/participants.R b/R/participants.R index fa00ddf..20ecb78 100644 --- a/R/participants.R +++ b/R/participants.R @@ -4,8 +4,6 @@ #' that have participated or are candidates to participate in any of the #' versions of BVQ. #' -#' @import dplyr -#' @importFrom lubridate as_date #' @importFrom cli cli_abort #' @importFrom googlesheets4 read_sheet #' @@ -22,7 +20,7 @@ #' * id_exp: a character string indicating a participant's identifier in the context of the particular study in which the participant was tested and invited to fill in the questionnaire. This value is always the same for each participant within the same study, so that different responses from the same participant in the same study share `id_exp`. The same participant may have different `id_exp` across different studies. #' * code: a character string identifying a single response to the questionnaire. This value is always unique for each response to the questionnaire, even for responses from the same participant. #' * time: a numeric value indicating how many times a given participant has been sent the questionnaire, regardless of whether they completed it or not. -#' * date_birth: a date value (see lubridate package) in `yyyy/mm/dd` format indicating participants birth date. +#' * date_birth: a date value in `yyyy/mm/dd` format indicating participants birth date. #' * age_now: a numeric value indicating the number of months elapsed since participants' birth date until the present day, as indicated by [lubridate::now()]. #' * study: a character string indicating the study in which the participant was invited to fill in the questionnaire. Frequently, participants that filled in the questionnaire came to the lab to participant in a study, and were then invited to fill in the questionnaire later. This value indicates what study each participant was tested in before being sent the questionnaire. #' * version: a character string indicating what version of the questionnaire a given participant filled in. Different versions may contain a different subset of items, and the administration instructions might vary slightly (see formr questionnaire templates in the [GitHub repository](https://github.com/gongcastro/multilex). Also, different versions were designed, implemented, and administrated at different time points (e.g., before/during/after the COVID-related lockdown). @@ -55,25 +53,36 @@ #' #' @md bvq_participants <- function(...) { + bvq_connect() # get credentials to Google and formr + + # download Sheets suppressMessages({ - bvq_connect() # get credentials to Google and formr - ss <- "164DMKLRO0Xju0gdfkCS3evAq9ihTgEgFiuJopmqt7mo" - participants <- read_sheet(ss, sheet = "Participants") %>% - mutate( - across(c(date_birth, date_test, date_sent), as_date), - across(include, as.logical) - ) %>% - filter(!is.na(code), include) %>% - select(-c(link, comments, include)) %>% - arrange(desc(as.numeric(gsub("BL", "", code)))) + x <- googlesheets4::read_sheet(ss, sheet = "Participants") }) - # make sure no columns are lists (probably due to inconsistent cell types) - is_col_list <- vapply(participants, is.list, logical(1)) + # change classes + cols.dates <- grepl("date_", names(x)) + x[, cols.dates] <- lapply(x[, cols.dates], as.Date) + x[, "include"] <- lapply(x[, "include"], as.logical) + + # filter rows + cols.keep <- !(names(x) %in% c("link", "comments", "include")) + x <- subset(x, !is.na(x$code) & x$include, cols.keep) + + # reorder rows + code.sorted <- as.numeric(gsub("BL", "", x$code)) + x <- x[order(code.sorted, decreasing = TRUE), , drop = FALSE] + + # fix version values + x$version <- gsub("bl-", "", tolower(x$version)) + + # make sure no columns are lists + # (probably due to inconsistent cell types) + is_col_list <- vapply(x, is.list, logical(1)) if (any(is_col_list)) { col <- names(which(is_col_list)) cli_abort("{col} {?has/have} class {.cls list}") } - return(participants) + return(x) } diff --git a/R/phonology.R b/R/phonology.R deleted file mode 100644 index 4c01022..0000000 --- a/R/phonology.R +++ /dev/null @@ -1,85 +0,0 @@ -#' Remove punctuation from X-SAMPA transcriptions -#' -#' @details Note that this function will effectively remove information about -#' syllabification and stress from the phonological representations. -#' -#' @export flatten_xsampa -#' -#' @param x A character string with a phonological transcription in X-SAMPA format. -#' -#' @return A character string containing a phonological transcription in X-SAMPA format in which punctuation characters -#' have been removed. -#' -#' @author Gonzalo Garcia-Castro -#' -#' @examples -#' \dontrun{ -#' vct <- pool$xsampa[1:10] -#' -#' flatten_xsampa(vct) -#' } -#' -#' @md -flatten_xsampa <- function(x) { - str_rm <- c("\\.", "\\\\", ",", "/", "?", "'", '"') - str <- gsub(paste0(str_rm, collapse = "|"), "", x) - str <- gsub("\\{", "\\\\{", str) - return(str) -} - -#' Syllabify phonological transcriptions in X-SAMPA formats -#' -#' @export syllabify_xsampa -#' -#' @param x A character string with a phonological transcription in X-SAMPA. -#' @param .sep A vector of character strings indicating the characters that will be used to separate syllables. Takes `"\\."` and `"\\\""` by default. -#' -#' @return A vector of characters in which each element is a syllable. -#' -#' @author Gonzalo Garcia-Castro -#' -#' @examples -#' \dontrun{ -#' vct <- pool$xsampa[1:10] -#' -#' syllabify_xsampa(vct) -#' } -#' -#' @md -syllabify_xsampa <- function(x, .sep = c("\\.", "\\\"")) { - syll <- strsplit(x, split = paste0(.sep, collapse = "|")) - syll <- lapply(syll, function(x) x[x != ""]) - return(syll) -} - -#' Get syllable structure from X-SAMPA phonological transcription -#' -#' @export syllable_str_xsampa -#' -#' @param x A character string with a phonological transcription in X-SAMPA format. -#' @param .sep Character separating syllables in the input transcriptions. -#' -#' @return A vector of characters in which each element is a syllable, in which vowels have been replaced with `"V"` and each consonants has been replaced with `"C"`. -#' -#' @author Gonzalo Garcia-Castro -#' -#' @examples -#' \dontrun{ -#' vct <- pool$xsampa[1:10] -#' -#' syllable_str_xsampa(vct) -#' } -#' -#' @md -syllable_str_xsampa <- function(x, .sep = c("\\.", "\\\"")) { - syll <- syllabify_xsampa(x) - - syll <- lapply(syll, function(x) { - phon <- strsplit(x, split = "") - vapply(phon, function(x) { - type <- ifelse(x %in% vowels$xsampa, "V", "C") - paste0(unlist(type), collapse = "") - }, character(1)) - }) - return(syll) -} diff --git a/R/responses.R b/R/responses.R index ac896ee..d05e26b 100644 --- a/R/responses.R +++ b/R/responses.R @@ -5,9 +5,6 @@ #' (the output of [bvq::bvq_participants()]) and `runs` (a character vector that can #' take zero, one, or multiple of the following values: `"formr2"`, #' `"formr-short"`, `"formr-lockdown"`) as arguments. -#' @import dplyr -#' @importFrom lubridate as_date -#' @importFrom formr formr_connect #' @importFrom stats time #' #' @export bvq_responses @@ -50,34 +47,48 @@ bvq_responses <- function(participants = NULL) if (is.null(participants)) participants <- bvq_participants() # retrieve data from formr - formr2 <- import_formr2(participants) # formr2 - formr_lockdown <- import_formr_lockdown(participants) # formr-lockdown - formr_short <- import_formr_short(participants) # formr-lockdown + formr.long <- collect_survey("long", participants) # formr2 + formr.lockdown <- collect_survey("lockdown", participants) # formr-lockdown + formr.short <- collect_survey("short", participants) # formr-lockdown - responses <- list(formr1, formr2, formr_short, formr_lockdown) %>% - bind_rows() %>% - distinct(id, code, item, .keep_all = TRUE) %>% - mutate(across(c(starts_with("date_"), time_stamp), as_date), - date_finished = coalesce(time_stamp, date_finished), - time = ifelse(is.na(time), 1, time), - version = trimws(version, whitespace = "[\\h\\v]") - ) %>% - fix_item() %>% - fix_doe() %>% - mutate(across(starts_with("doe_"), function(x) x / 100)) %>% - fix_sex() %>% - mutate(study = ifelse(is.na(study), "BiLexicon", study)) %>% - fix_id_exp() %>% - filter(!is.na(date_finished)) %>% - arrange(desc(date_finished)) %>% - select( - id, time, code, study, - version, randomisation, - starts_with("date_"), - item, response, sex, - starts_with("doe_"), - starts_with("edu_") - ) + # merge dataframes + formr.list <- list(formr1, formr.long, formr.short, formr.lockdown) + responses <- do.call(rbind, formr.list) + + # remove duplicated combinations of id, code, and item + responses <- responses[!duplicated(responses[c("id", "code", "item")]), , drop = FALSE] + + # fix date columns + date.cols <- grepl("^date_", colnames(responses)) + responses[, date.cols] <- lapply(responses[, date.cols], as.Date, origin = c("1970-01-01")) + + # fix other variables + responses$time <- ifelse(is.na(responses$time), 1, responses$time) + responses$version <- trimws(responses$version, whitespace = "[\\h\\v]") + responses <- fix_item(responses) + responses <- fix_doe(responses) + responses <- fix_sex(responses) + responses$study <- ifelse(is.na(responses$study), "BiLexicon", responses$study) + responses <- fix_id_exp(responses) + + # fix degree of exposure columns + doe.cols <- grepl("^doe_", colnames(responses)) + responses[, doe.cols] <- lapply(responses[, doe.cols], function(x) x / 100) + + # remove rows with missing date_finished and reorder + responses <- responses[!is.na(responses$date_finished), ] + + # reorder dataframe and select columns + responses <- responses[order(responses$date_finished, decreasing = TRUE), , drop = FALSE] + cols.keep <- c( + "id", "time", "code", "study", + "version", "randomisation", + colnames(responses)[grepl("^date_", colnames(responses))], + "item", "response", "sex", + colnames(responses)[grepl("^doe_", colnames(responses))], + colnames(responses)[grepl("^edu_", colnames(responses))] + ) + responses <- responses[, cols.keep] return(responses) } diff --git a/R/stats.R b/R/stats.R deleted file mode 100644 index 6001e99..0000000 --- a/R/stats.R +++ /dev/null @@ -1,21 +0,0 @@ -#' Proportion, adjusted for zero- and one-inflation -#' -#' @details -#' It is very common that a large proportion o the participants know or do not know some word. -#' Vocabulary sizes and word prevalence norms in package are calculated using an estimate that -#' adjusts for zero- and one-inflation so that, at the population level such etimates are more -#' likely to be accurate. -#' -#' -#' @export prop_adj -#' -#' @param x Number of successes -#' @param n Number of tries -#' -#' @returns A numeric scalar. -#' -#' @examples prop_adj(4, 60) -#' -prop_adj <- function(x, n) { - (x + 2) / (n + 4) -} diff --git a/R/sysdata.rda b/R/sysdata.rda index 1778739..8fc0760 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/utils.R b/R/utils.R index 443a838..e5f652c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -18,10 +18,10 @@ #' @examples #' diff_in_time(as.Date("2023-02-01"), as.Date("2022-02-01")) diff_in_time <- function(x, y, units = "months") { - diff <- difftime(x, y) - diff <- abs(time_length(diff, units)) - diff <- ifelse(diff %in% c(-Inf, Inf), NA_real_, diff) - return(diff) + diff <- difftime(x, y) + diff <- abs(time_length(diff, units)) + diff <- ifelse(diff %in% c(-Inf, Inf), NA_real_, diff) + return(diff) } #' Get timestamps @@ -56,14 +56,14 @@ diff_in_time <- function(x, y, units = "months") { #' (y) #' get_time_stamp <- function(..., which = "first") { - if (!(which == "first" || which == "last")) { - cli_abort("`which` must be 'first' or 'last'") - } - - if (which == "first") fun <- min else fun <- max - x <- as_datetime(apply(across(c(...)), 1, fun, na.rm = TRUE)) - - return(x) + if (!(which == "first" || which == "last")) { + cli_abort("`which` must be 'first' or 'last'") + } + + if (which == "first") fun <- min else fun <- max + x <- lubridate::as_datetime(apply(across(c(...)), 1, fun, na.rm = TRUE)) + + return(x) } #' Summarise language profile @@ -96,7 +96,7 @@ get_time_stamp <- function(..., which = "first") { #' #' (y) get_doe <- function(...) { - rowSums(across(c(...)), dims = 1, na.rm = TRUE) + rowSums(across(c(...)), dims = 1, na.rm = TRUE) } #' Fix codes @@ -109,13 +109,13 @@ get_doe <- function(...) { #' @keywords internal #' fix_code <- function(x) { # nocov start - x <- toupper(trimws(x)) - x <- gsub("O", "0", x) - x <- gsub("I", "L", x) - x <- gsub("[^\\d]+", "", x, perl = TRUE) - x <- paste0("BL", x) - - return(x) + x <- toupper(trimws(x)) + x <- gsub("O", "0", x) + x <- gsub("I", "L", x) + x <- gsub("[^\\d]+", "", x, perl = TRUE) + x <- paste0("BL", x) + + return(x) } # nocov end @@ -129,15 +129,15 @@ fix_code <- function(x) { # nocov start #' @keywords internal #' fix_code_raw <- function(x) { # nocov start - x[x$session == "-OYU0wA9FPQ9-ugKUpyrz1A0usJZIuM5hb-cbV2yMgGBal5S9q3ReRgphBDDxFEY", "code"] <- "BL1674" - x[x$session == "ZZiRT3JN4AdKnXMxjEMtU3CzRkniH0hOSZzS-0kzquRt_Ls9PJzmKsY3qm8tQ7Z2", "code"] <- "BL1671" - x[x$session == "TW8vSEn7YTtbZoe9BaEtRgwNvryWTwSv49dLKb5W0_6bFL306Eiw0Ehg72Q9nqLx", "code"] <- "BL1672" - x[x$session == "DDjiYrPl-WD951rocaSKH9grkn2T4ZJKjhdCAPDzqNBWyhc8E8wwjOY0CcruNn1m", "code"] <- "BL1673" - x[x$session == "c9fPw4Lbm5WS0AbBRppv4NVHh4eETxvEitH8lUC1pkt2ulxxHCvXgAYopCGRQSa_", "code"] <- "BL1569" - x[x$session == "I8ldNrILmQd7Vhtwqk99Y9YaKWrZzlExKeXsRv9_toIEi43BtlfuLI-PmdU4iY7G", "code"] <- "BL1788" - x[x$session == "dU5CZLLkvmY7SDpe8d0jFQO3xzpmeS0lCOFF_ErjETC1tyyYbv3ZhyaDmlfdJwHc", "code"] <- "BL1876" - x[x$session == "L4F1jd13H4wyFf6QYGy8hfSURneFr-zfzMn1YFFeBTbTZWWjxYPRbC-rPY6U1qdr", "code"] <- "remove" - return(x) + x[x$session == "-OYU0wA9FPQ9-ugKUpyrz1A0usJZIuM5hb-cbV2yMgGBal5S9q3ReRgphBDDxFEY", "code"] <- "BL1674" + x[x$session == "ZZiRT3JN4AdKnXMxjEMtU3CzRkniH0hOSZzS-0kzquRt_Ls9PJzmKsY3qm8tQ7Z2", "code"] <- "BL1671" + x[x$session == "TW8vSEn7YTtbZoe9BaEtRgwNvryWTwSv49dLKb5W0_6bFL306Eiw0Ehg72Q9nqLx", "code"] <- "BL1672" + x[x$session == "DDjiYrPl-WD951rocaSKH9grkn2T4ZJKjhdCAPDzqNBWyhc8E8wwjOY0CcruNn1m", "code"] <- "BL1673" + x[x$session == "c9fPw4Lbm5WS0AbBRppv4NVHh4eETxvEitH8lUC1pkt2ulxxHCvXgAYopCGRQSa_", "code"] <- "BL1569" + x[x$session == "I8ldNrILmQd7Vhtwqk99Y9YaKWrZzlExKeXsRv9_toIEi43BtlfuLI-PmdU4iY7G", "code"] <- "BL1788" + x[x$session == "dU5CZLLkvmY7SDpe8d0jFQO3xzpmeS0lCOFF_ErjETC1tyyYbv3ZhyaDmlfdJwHc", "code"] <- "BL1876" + x[x$session == "L4F1jd13H4wyFf6QYGy8hfSURneFr-zfzMn1YFFeBTbTZWWjxYPRbC-rPY6U1qdr", "code"] <- "remove" + return(x) } # nocov end #' Fix DOEs @@ -153,32 +153,23 @@ fix_code_raw <- function(x) { # nocov start #' @keywords internal #' fix_doe <- function(x) { # nocov start - - x %>% - mutate( - doe_catalan = case_when( - id == "54469" & time == 2 ~ 0, - id == "57157" & time == 1 ~ 80, - id == "57046" & time == 1 ~ 50, - code == "BL1582" ~ 30, - code == "BL1295" ~ 10, - code == "BL1252" ~ 90, - .default = doe_catalan - ), - doe_spanish = case_when( - id == "57046" & time == 1 ~ 50, - code == "BL896" ~ 75, - .default = doe_spanish - ), - doe_others = case_when( - code == "BL1252" ~ 0, - code == "BL1208" ~ 0, - code == "BL896" ~ 0, - code == "BL1582" ~ 0, - code == "BL1295" ~ 0, - .default = doe_others - ) - ) + + x$doe_catalan[x$id=="54469" & x$time == 2] <- 0 + x$doe_catalan[x$id=="57157" & x$time == 1] <- 80 + x$doe_catalan[x$id=="57046" & x$time == 1] <- 50 + x$doe_catalan[x$code=="54469" & x$time == 2] <- 30 + x$doe_catalan[x$code=="BL1295" & x$time == 2] <- 10 + x$doe_catalan[x$code=="BL1252" & x$time == 2] <- 90 + x$doe_spanish[x$id=="57046" & x$time == 2] <- 50 + x$doe_spanish[x$code=="BL896"] <- 75 + x$doe_others[x$code=="BL1252"] <- 0 + x$doe_others[x$code=="BL1208"] <- 0 + x$doe_others[x$code=="BL896"] <- 0 + x$doe_others[x$code=="BL1582"] <- 0 + x$doe_others[x$code=="BL1295"] <- 0 + + return(x) + } # nocov end #' Fix sex (missing in first responses to BL-Lockdown) @@ -191,20 +182,15 @@ fix_doe <- function(x) { # nocov start #' @keywords internal #' fix_sex <- function(x) { # nocov start - - x$sex <- ifelse(x$id_bvq %in% c( - "bilexicon_1097", - "bilexicon_1441", - "bilexicon_1124", - "bilexicon_1448" - ), - "Female", - x$sex - ) - - x$sex <- ifelse(x$id_bvq == "bilexicon_1447", "Male", x$sex) - - return(x) + + id_bvq_female <- c("bilexicon_1097", + "bilexicon_1441", + "bilexicon_1124", + "bilexicon_1448") + x$sex[x$id_bvq %in% id_bvq_female] <- "Female" + x$sex[x$id_bvq %in% "bilexicon_1447"] <- "Male" + + return(x) } # nocov end @@ -218,22 +204,22 @@ fix_sex <- function(x) { # nocov start #' @keywords internal #' fix_item <- function(x) { # nocov start - - x$item[x$item == "cat_parc"] <- "cat_parc1" - x$item[x$item == "cat_eciam"] <- "cat_enciam" - x$item[x$item == "cat_voler3"] <- "cat_voler2" - x$item[x$item == "cat_voler"] <- "cat_voler1" - x$item[x$item == "cat_despres1"] <- "cat_despres" - x$item[x$item == "cat_peix"] <- "cat_peix1" - x$item[x$item == "cat_estar"] <- "cat_estar1" - x$item[x$item == "cat_querer"] <- "cat_querer1" - x$item[x$item == "cat_estiguestequiet"] <- "cat_estiguesquiet" - x$item[x$item == "spa_nibla"] <- "spa_niebla" - x$item[x$item == "spa_ir"] <- "spa_ir1" - x$item[x$item == "spa_querer"] <- "spa_querer1" - x$item[x$item == "cat_anar"] <- "cat_anar1" - - return(x) + + x$item[x$item == "cat_parc"] <- "cat_parc1" + x$item[x$item == "cat_eciam"] <- "cat_enciam" + x$item[x$item == "cat_voler3"] <- "cat_voler2" + x$item[x$item == "cat_voler"] <- "cat_voler1" + x$item[x$item == "cat_despres1"] <- "cat_despres" + x$item[x$item == "cat_peix"] <- "cat_peix1" + x$item[x$item == "cat_estar"] <- "cat_estar1" + x$item[x$item == "cat_querer"] <- "cat_querer1" + x$item[x$item == "cat_estiguestequiet"] <- "cat_estiguesquiet" + x$item[x$item == "spa_nibla"] <- "spa_niebla" + x$item[x$item == "spa_ir"] <- "spa_ir1" + x$item[x$item == "spa_querer"] <- "spa_querer1" + x$item[x$item == "cat_anar"] <- "cat_anar1" + + return(x) } # nocov end @@ -247,8 +233,8 @@ fix_item <- function(x) { # nocov start #' @keywords internal #' fix_id_exp <- function(x) { # nocov start - x$id_exp <- ifelse(x$code %in% "BL547", "bilexicon_189", x$id_exp) - return(x) + x$id_exp[x$code %in% "BL547"] <- "bilexicon_189" + return(x) } # nocov end #' Deal with repeated measures @@ -289,19 +275,42 @@ fix_id_exp <- function(x) { # nocov start #' get_longitudinal(dat, "first") #' get_longitudinal(dat, "only") get_longitudinal <- function(x, longitudinal = "all") { - longitudinal_opts <- c("all", "no", "first", "last", "only") - - if (!(longitudinal %in% longitudinal_opts) && interactive()) { - long_colapsed <- paste0(longitudinal_opts, collapse = ", ") - cli_abort(paste0("longitudinal must be one of: ", long_colapsed)) - } - - repeated <- filter(distinct(x, id, time), n() > 1, .by = id) - - if (longitudinal == "no") x <- filter(x, !(id %in% repeated$id)) - if (longitudinal == "first") x <- filter(x, time == min(time), .by = id) - if (longitudinal == "last") x <- filter(x, time == max(time), .by = id) - if (longitudinal == "only") x <- filter(x, id %in% repeated$id) + longitudinal_opts <- c("all", "no", "first", "last", "only") + + if (!(longitudinal %in% longitudinal_opts) && interactive()) { + long_colapsed <- paste0(longitudinal_opts, collapse = ", ") + cli_abort(paste0("longitudinal must be one of: ", long_colapsed)) + } + + repeated <- filter(distinct(x, id, time), n() > 1, .by = id) + + if (longitudinal == "no") x <- filter(x, !(id %in% repeated$id)) + if (longitudinal == "first") x <- filter(x, time == min(time), .by = id) + if (longitudinal == "last") x <- filter(x, time == max(time), .by = id) + if (longitudinal == "only") x <- filter(x, id %in% repeated$id) + + return(x) +} - return(x) +#' Proportion, adjusted for zero- and one-inflation +#' +#' @details +#' It is very common that a large proportion of the participants know or do not know some word. +#' Vocabulary sizes and word prevalence norms in package are calculated using an estimate that +#' adjusts for zero- and one-inflation so that, at the population level such estimates are more +#' likely to be accurate. +#' +#' +#' @export prop_adj +#' +#' @param x Number of successes +#' @param n Number of tries +#' +#' @returns A numeric scalar. +#' +#' @examples prop_adj(4, 60) +#' +prop_adj <- function(x, n) { + (x + 2) / (n + 4) } + diff --git a/R/vocabulary.R b/R/vocabulary.R index ad53910..f7de8a7 100644 --- a/R/vocabulary.R +++ b/R/vocabulary.R @@ -60,8 +60,8 @@ bvq_vocabulary <- function(participants, if (missing(responses)) responses <- bvq_responses(participants) # get logs - logs <- bvq_logs(participants, responses) %>% - filter(id %in% unique(responses$id)) + logs <- bvq_logs(participants, responses) + logs <- logs[logs$id %in% unique(responses$id), ] # collect ... into a character vector for `any_of` dots_vctr <- as.character(match.call(expand.dots = FALSE)$`...`) @@ -75,53 +75,61 @@ bvq_vocabulary <- function(participants, } # get main dataset - base <- responses %>% - mutate( - understands = response > 1, - produces = response > 2 - ) %>% - select(-response) %>% - pivot_longer(c(understands, produces), - names_to = "type", - values_to = "response" - ) %>% - filter(!is.na(response)) %>% - inner_join(select(bvq::pool, item, te, language, any_of(dots_vctr)), - multiple = "all", - by = join_by(item), - relationship = "many-to-many" - ) %>% - inner_join(select(logs, id, time, dominance, any_of(dots_vctr)), - multiple = "all", - by = join_by(id, time), - relationship = "many-to-many" - ) %>% - mutate(item_dominance = ifelse(language == dominance, "L1", "L2")) %>% - select( - id, time, dominance, item_dominance, type, - te, item, any_of(dots_vctr), response - ) + base <- responses + base$understands <- base$response > 1 + base$produces <- base$response > 2 + base <- base[, names(base) != "response"] + base <- pivot_longer(base, + c(understands, produces), + names_to = "type", + values_to = "response") + base <- base[!is.na(base$response), ] - base_n <- base %>% - distinct(pick(c(id, time, te, ...))) %>% - count(pick(c(id, time, ...)), - name = "n_total" - ) %>% - select(id, time, ..., n_total) + # join TE-level properties + cols.keep <- names(bvq::pool) %in% c("item", "te", "language", dots_vctr) + pool_tmp <- bvq::pool[, cols.keep] + base <- dplyr::inner_join(base, + pool_tmp, + multiple = "all", + by = join_by(item), + relationship = "many-to-many") - base_te <- base %>% - filter(response) %>% - left_join(base_n, - multiple = "all", - by = join_by(id, time, ...) - ) %>% - pivot_wider( - names_from = item_dominance, - values_from = response, - values_fn = sum, - values_fill = 0, - id_cols = c(id, time, type, n_total, te, ...) - ) + # join participant-level properties + cols.keep <- names(logs) %in% c("id", "time", "dominance", dots_vctr) + logs_tmp <- logs[, cols.keep] + base <- dplyr::inner_join(base, logs_tmp, + multiple = "all", + by = join_by(id, time), + relationship = "many-to-many") + + # define TE-by-participant properties + base$item_dominance <- ifelse(base$language==base$dominance, "L1", "L2") + cols.keep <- c("id", "time", "dominance", "item_dominance", "type", + "te", "item", dots_vctr, "response") + base <- base[, cols.keep] + + # compute total denominator + cols.distinct <- names(base) %in% c("id", "time", "te", dots_vctr) + base_n <- base[!duplicated(base[, cols.distinct]), , drop = FALSE] + base_n <- dplyr::count(base_n, + dplyr::pick(id, time, type, ...), + name = "n_total") + cols.keep <- names(base_n) %in% c("id", "time", dots_vctr, "n_total") + base_n <- base_n[, cols.keep] + + # compute TE-wise denominator + base_te <- base[base$response, ] + base_te <- dplyr::left_join(base_te, base_n, + by = join_by(id, time, ...)) + + base_te <- tidyr::pivot_wider( + base_te, + names_from = item_dominance, + values_from = response, + values_fn = sum, + values_fill = 0, + id_cols = c(id, time, type, n_total, te, ...) + ) # total vocabulary total <- vocab_total(base, dots_vctr) @@ -132,21 +140,31 @@ bvq_vocabulary <- function(participants, # merge all datasets which_col_not <- c("count", "prop")[which(!(c("count", "prop") %in% .scale))] - vocabulary <- list(total, dominance, concept, te) %>% - reduce(left_join, - multiple = "all", - by = join_by(id, time, type, ...) - ) %>% - mutate(across( - matches("concept|te"), - function(x) ifelse(is.na(x), as.integer(0), x) - )) %>% - select(id, time, type, any_of(dots_vctr), matches(.scale)) %>% - select(-ends_with(which_col_not)) + vocabulary <- list(total, dominance, concept, te) + vocabulary <- reduce(vocabulary, + dplyr::left_join, + multiple = "all", + by = join_by(id, time, type, ...)) + cols.integer <- names(vocabulary)[grepl("concept|te", names(vocabulary))] + vocabulary[, cols.integer] <- lapply(vocabulary[, cols.integer], + function(x) { + ifelse(is.na(x), as.integer(0), x) + }) + cols.scale <- names(vocabulary)[grepl(paste(.scale, collapse = "|"), + names(vocabulary))] + cols.keep <- c("id", "time", "type", + dots_vctr, + cols.scale[cols.scale %in% cols.scale[grepl("prop$", cols.scale)]], + cols.scale[cols.scale %in% cols.scale[grepl("count$", cols.scale)]]) + vocabulary <- vocabulary[, cols.keep] + vocabulary <- vocabulary[, !(names(vocabulary) %in% which_col_not)] + vocabulary <- tibble::as_tibble(vocabulary) return(vocabulary) } + + #' Check argument `...` in the [bvq::bvq_vocabulary()] function #' #' @param x Variable names passed with `...` diff --git a/R/vowels.R b/R/vowels.R deleted file mode 100644 index 4aa1c22..0000000 --- a/R/vowels.R +++ /dev/null @@ -1,9 +0,0 @@ -#' Inventory and classification of vowels in X-SAMPA format. -#' -#' A dataset containing most vowel phonemes identified by the [International Phonetic Association](https://en.wikipedia.org/wiki/International_Phonetic_Association) (IPA). Phonemes are classified across three dimensions: place of articulation, manner of articulation, and voicing. Each phoneme is assigned a symbol in X-SAMPA format. -#' @source https://en.wikipedia.org/wiki/X-SAMPA -#' @format A data frame with 34 rows and 4 variables: -#' * xsampa: phoneme symbol in [X-SAMPA](https://en.wikipedia.org/wiki/X-SAMPA) format -#' * openness: `"Close"`, `"Near-close"`, `"Near-mid"`, `"Close-mid"`, `"Mid"`, `"Open-mid"`, `"Near-open"`, or `"Open"`. "An open vowel is a vowel sound in which the tongue is positioned as far as possible from the roof of the mouth. Open vowels are sometimes also called low vowels (in U.S. terminology) in reference to the low position of the tongue. -#' * frontness: `"Back`, `"Central`, or `"Front`. "A front vowel is a class of vowel sounds used in some spoken languages, its defining characteristic being that the highest point of the tongue is positioned as far forward as possible in the mouth without creating a constriction that would otherwise make it a consonant. Front vowels are sometimes also called bright vowels because they are perceived as sounding brighter than the back vowels. -"vowels" diff --git a/_pkgdown.yml b/_pkgdown.yml index f982781..701ecbc 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -42,20 +42,12 @@ reference: - bvq_participants - bvq_responses - pool - - vowels - - consonants - title: "Computing vocabulary norms" desc: "Functions for computing participant information, vocabulary size, and word acquisition norms." contents: - bvq_logs - bvq_vocabulary - bvq_norms -- title: "Phonology" - desc: "Functions to manipulate phonological transcriptions in IPA and X-SAMPA formats." - contents: - - flatten_xsampa - - syllabify_xsampa - - syllable_str_xsampa - title: "Helper functions" desc: "Additional functions to access and process the data" contents: diff --git a/data-raw/consonants.R b/data-raw/consonants.R deleted file mode 100644 index 173e7d2..0000000 --- a/data-raw/consonants.R +++ /dev/null @@ -1,23 +0,0 @@ -## code to prepare `phonemes` dataset goes here -library(dplyr) -library(readxl) - -# consonants -consonants <- read_xlsx(system.file("extdata/consonants.xlsx", - package = "bvq" -)) %>% - rename(xsampa = phoneme) %>% - mutate( - across(-xsampa, as.factor), - across( - c(place, place_2, manner, voicing), - function(x) strsplit(as.character(x), ", ") - ) - ) %>% - relocate(xsampa, place, place_2, manner, voicing) - -# export for future testing -saveRDS(consonants, test_path("fixtures", "consonants.rds")) - -# export database -usethis::use_data(consonants, overwrite = TRUE, internal = FALSE) diff --git a/data-raw/vowels.R b/data-raw/vowels.R deleted file mode 100644 index 08470ea..0000000 --- a/data-raw/vowels.R +++ /dev/null @@ -1,49 +0,0 @@ -## code to prepare `vowels` dataset goes here -library(dplyr) -library(readxl) - -# vowels -openness_levels <- c( - "Close" = 0, - "Near close" = 1, - "Close-mid" = 2, - "Mid" = 3, - "Open-mid" = 4, - "Near-open" = 5, - "Open" = 6 -) - -frontness_levels <- c( - "Back" = 0, - "Central" = 1, - "Front" = 2 -) - -vowels <- read_xlsx(system.file("extdata/vowels.xlsx", - package = "bvq" -)) %>% - mutate( - openness = factor(openness, - levels = openness_levels, - labels = names(openness_levels) - ), - frontness = factor(frontness, - levels = frontness_levels, - labels = names(frontness_levels) - ) - ) %>% - rename(xsampa = phoneme) %>% - mutate( - across(-xsampa, as.factor), - across( - c(openness, frontness), - function(x) strsplit(as.character(x), ", ") - ) - ) %>% - relocate(xsampa, openness, frontness) - -# export for future testing -saveRDS(vowels, test_path("fixtures", "vowels.rds")) - -# export database -usethis::use_data(vowels, overwrite = TRUE, internal = FALSE) diff --git a/data/consonants.rda b/data/consonants.rda deleted file mode 100644 index 5eea2a5..0000000 Binary files a/data/consonants.rda and /dev/null differ diff --git a/data/vowels.rda b/data/vowels.rda deleted file mode 100644 index 0f7f803..0000000 Binary files a/data/vowels.rda and /dev/null differ diff --git a/inst/extdata/consonants.xlsx b/inst/extdata/consonants.xlsx deleted file mode 100644 index 37dfdfb..0000000 Binary files a/inst/extdata/consonants.xlsx and /dev/null differ diff --git a/inst/extdata/vowels.xlsx b/inst/extdata/vowels.xlsx deleted file mode 100644 index 847e1d6..0000000 Binary files a/inst/extdata/vowels.xlsx and /dev/null differ diff --git a/inst/figures/logo.png b/inst/figures/logo.png index 5e51fd7..b7e210e 100644 Binary files a/inst/figures/logo.png and b/inst/figures/logo.png differ diff --git a/inst/fixtures/consonants.rds b/inst/fixtures/consonants.rds deleted file mode 100644 index 16205b2..0000000 Binary files a/inst/fixtures/consonants.rds and /dev/null differ diff --git a/inst/fixtures/vowels.rds b/inst/fixtures/vowels.rds deleted file mode 100644 index 0abb339..0000000 Binary files a/inst/fixtures/vowels.rds and /dev/null differ diff --git a/man/bvq-package.Rd b/man/bvq-package.Rd index bb4e755..62690f9 100644 --- a/man/bvq-package.Rd +++ b/man/bvq-package.Rd @@ -6,8 +6,6 @@ \alias{bvq-package} \title{bvq: Barcelona Vocabulary Questionnaire Database and Helper Functions} \description{ -\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} - Download, clean, and process the Barcelona Vocabulary Questionnaire (BVQ) data. BVQ is a vocabulary inventory developed for assesing the vocabulary of Catalan-Spanish bilinguals infants from the Metropolitan Area of Barcelona (Spain). This package includes functions to download the data from formr servers, and return the processed data in multiple formats. } \seealso{ diff --git a/man/bvq_norms.Rd b/man/bvq_norms.Rd index d6cde9b..f93bab8 100644 --- a/man/bvq_norms.Rd +++ b/man/bvq_norms.Rd @@ -10,7 +10,7 @@ bvq_norms( ..., te = NULL, item = NULL, - age = NULL + age = c(0, 100) ) } \arguments{ diff --git a/man/bvq_participants.Rd b/man/bvq_participants.Rd index 16a5059..825621c 100644 --- a/man/bvq_participants.Rd +++ b/man/bvq_participants.Rd @@ -20,7 +20,7 @@ column represents a variable. The output includes the following variables: \item id_exp: a character string indicating a participant's identifier in the context of the particular study in which the participant was tested and invited to fill in the questionnaire. This value is always the same for each participant within the same study, so that different responses from the same participant in the same study share \code{id_exp}. The same participant may have different \code{id_exp} across different studies. \item code: a character string identifying a single response to the questionnaire. This value is always unique for each response to the questionnaire, even for responses from the same participant. \item time: a numeric value indicating how many times a given participant has been sent the questionnaire, regardless of whether they completed it or not. -\item date_birth: a date value (see lubridate package) in \code{yyyy/mm/dd} format indicating participants birth date. +\item date_birth: a date value in \code{yyyy/mm/dd} format indicating participants birth date. \item age_now: a numeric value indicating the number of months elapsed since participants' birth date until the present day, as indicated by \code{\link[lubridate:now]{lubridate::now()}}. \item study: a character string indicating the study in which the participant was invited to fill in the questionnaire. Frequently, participants that filled in the questionnaire came to the lab to participant in a study, and were then invited to fill in the questionnaire later. This value indicates what study each participant was tested in before being sent the questionnaire. \item version: a character string indicating what version of the questionnaire a given participant filled in. Different versions may contain a different subset of items, and the administration instructions might vary slightly (see formr questionnaire templates in the \href{https://github.com/gongcastro/multilex}{GitHub repository}. Also, different versions were designed, implemented, and administrated at different time points (e.g., before/during/after the COVID-related lockdown). diff --git a/man/consonants.Rd b/man/consonants.Rd deleted file mode 100644 index 7fbd8d5..0000000 --- a/man/consonants.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/consonants.R -\docType{data} -\name{consonants} -\alias{consonants} -\title{Inventory and classification of consonants in X-SAMPA format.} -\format{ -A data frame with 65 rows and 5 variables: -\itemize{ -\item xsampa: phoneme symbol in \href{https://en.wikipedia.org/wiki/X-SAMPA}{X-SAMPA} -format -\item place: place of articulation (broad classification): \code{"Coronal"}, \code{"Dorsal"}, -\code{"Labial"}, or \code{"Pharyngeal"}. "Location along the vocal tract where its -production occurs. -\item place_2: place of articulation (fine classification): \code{"Nasal"}, \code{"Plosive"}, -\code{"Fricative"}, \code{"Approximant"}, \code{"Trill"}, \code{"Flap"}, \code{"Lateral Fricative"}, -\code{"Lateral Approximant"}, or \code{"Lateral Flat"} -\item manner: manner of articulation: \code{"Bilabial"}, \code{"Labio-dental"}, \code{"Dental"}, -\code{"Alveolar"}, \code{"Post-alveolar"}, \code{"Retroflex"}, \code{"Palatal"}, \code{"Velar"}, -\code{"Uvular"}, \code{"Epiglotal"}, \code{"Glotal"}. Configuration and interaction of the -articulators (speech organs such as the tongue, lips, and palate) when making -a speech sound. -\item voicing: \code{"Voiced"}, \code{"Voiceless"}. "Classification of speech sounds that -tend to be associated with vocal cord vibration but may not actually be voiced -at the articulatory level. -} -} -\source{ -https://en.wikipedia.org/wiki/X-SAMPA -} -\usage{ -consonants -} -\description{ -A dataset containing most consonant phonemes identified by the \href{https://en.wikipedia.org/wiki/International_Phonetic_Association}{International Phonetic Association} -(IPA). Phonemes are classified across three dimensions: place of articulation, -manner of articulation, and voicing. Each phoneme is assigned a symbol in -X-SAMPA format. -} -\keyword{datasets} diff --git a/man/figures/logo.png b/man/figures/logo.png index e0c7764..b7e210e 100644 Binary files a/man/figures/logo.png and b/man/figures/logo.png differ diff --git a/man/flatten_xsampa.Rd b/man/flatten_xsampa.Rd deleted file mode 100644 index 00e7db1..0000000 --- a/man/flatten_xsampa.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/phonology.R -\name{flatten_xsampa} -\alias{flatten_xsampa} -\title{Remove punctuation from X-SAMPA transcriptions} -\usage{ -flatten_xsampa(x) -} -\arguments{ -\item{x}{A character string with a phonological transcription in X-SAMPA format.} -} -\value{ -A character string containing a phonological transcription in X-SAMPA format in which punctuation characters -have been removed. -} -\description{ -Remove punctuation from X-SAMPA transcriptions -} -\details{ -Note that this function will effectively remove information about -syllabification and stress from the phonological representations. -} -\examples{ -\dontrun{ -vct <- pool$xsampa[1:10] - -flatten_xsampa(vct) -} - -} -\author{ -Gonzalo Garcia-Castro -} diff --git a/man/prop_adj.Rd b/man/prop_adj.Rd index d95ea6a..e7e79db 100644 --- a/man/prop_adj.Rd +++ b/man/prop_adj.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/stats.R +% Please edit documentation in R/utils.R \name{prop_adj} \alias{prop_adj} \title{Proportion, adjusted for zero- and one-inflation} @@ -18,9 +18,9 @@ A numeric scalar. Proportion, adjusted for zero- and one-inflation } \details{ -It is very common that a large proportion o the participants know or do not know some word. +It is very common that a large proportion of the participants know or do not know some word. Vocabulary sizes and word prevalence norms in package are calculated using an estimate that -adjusts for zero- and one-inflation so that, at the population level such etimates are more +adjusts for zero- and one-inflation so that, at the population level such estimates are more likely to be accurate. } \examples{ diff --git a/man/syllabify_xsampa.Rd b/man/syllabify_xsampa.Rd deleted file mode 100644 index 24626f0..0000000 --- a/man/syllabify_xsampa.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/phonology.R -\name{syllabify_xsampa} -\alias{syllabify_xsampa} -\title{Syllabify phonological transcriptions in X-SAMPA formats} -\usage{ -syllabify_xsampa(x, .sep = c("\\\\.", "\\\\\\"")) -} -\arguments{ -\item{x}{A character string with a phonological transcription in X-SAMPA.} - -\item{.sep}{A vector of character strings indicating the characters that will be used to separate syllables. Takes \code{"\\\\."} and \verb{"\\\\\\""} by default.} -} -\value{ -A vector of characters in which each element is a syllable. -} -\description{ -Syllabify phonological transcriptions in X-SAMPA formats -} -\examples{ -\dontrun{ -vct <- pool$xsampa[1:10] - -syllabify_xsampa(vct) -} - -} -\author{ -Gonzalo Garcia-Castro -} diff --git a/man/syllable_str_xsampa.Rd b/man/syllable_str_xsampa.Rd deleted file mode 100644 index c0312f1..0000000 --- a/man/syllable_str_xsampa.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/phonology.R -\name{syllable_str_xsampa} -\alias{syllable_str_xsampa} -\title{Get syllable structure from X-SAMPA phonological transcription} -\usage{ -syllable_str_xsampa(x, .sep = c("\\\\.", "\\\\\\"")) -} -\arguments{ -\item{x}{A character string with a phonological transcription in X-SAMPA format.} - -\item{.sep}{Character separating syllables in the input transcriptions.} -} -\value{ -A vector of characters in which each element is a syllable, in which vowels have been replaced with \code{"V"} and each consonants has been replaced with \code{"C"}. -} -\description{ -Get syllable structure from X-SAMPA phonological transcription -} -\examples{ -\dontrun{ -vct <- pool$xsampa[1:10] - -syllable_str_xsampa(vct) -} - -} -\author{ -Gonzalo Garcia-Castro -} diff --git a/man/vowels.Rd b/man/vowels.Rd deleted file mode 100644 index e5f7ac5..0000000 --- a/man/vowels.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/vowels.R -\docType{data} -\name{vowels} -\alias{vowels} -\title{Inventory and classification of vowels in X-SAMPA format.} -\format{ -A data frame with 34 rows and 4 variables: -\itemize{ -\item xsampa: phoneme symbol in \href{https://en.wikipedia.org/wiki/X-SAMPA}{X-SAMPA} format -\item openness: \code{"Close"}, \code{"Near-close"}, \code{"Near-mid"}, \code{"Close-mid"}, \code{"Mid"}, \code{"Open-mid"}, \code{"Near-open"}, or \code{"Open"}. "An open vowel is a vowel sound in which the tongue is positioned as far as possible from the roof of the mouth. Open vowels are sometimes also called low vowels (in U.S. terminology) in reference to the low position of the tongue. -\item frontness: \verb{"Back}, \verb{"Central}, or \verb{"Front}. "A front vowel is a class of vowel sounds used in some spoken languages, its defining characteristic being that the highest point of the tongue is positioned as far forward as possible in the mouth without creating a constriction that would otherwise make it a consonant. Front vowels are sometimes also called bright vowels because they are perceived as sounding brighter than the back vowels. -} -} -\source{ -https://en.wikipedia.org/wiki/X-SAMPA -} -\usage{ -vowels -} -\description{ -A dataset containing most vowel phonemes identified by the \href{https://en.wikipedia.org/wiki/International_Phonetic_Association}{International Phonetic Association} (IPA). Phonemes are classified across three dimensions: place of articulation, manner of articulation, and voicing. Each phoneme is assigned a symbol in X-SAMPA format. -} -\keyword{datasets} diff --git a/tests/testthat/test-consonants.R b/tests/testthat/test-consonants.R deleted file mode 100644 index 6fe26b3..0000000 --- a/tests/testthat/test-consonants.R +++ /dev/null @@ -1,50 +0,0 @@ -consonants <- readRDS(system.file("fixtures/consonants.rds", - mustWork = TRUE, - package = "bvq")) - -test_that("consonants columns are the right classes", { - - expect_type(consonants$xsampa, "character") - expect_type(consonants$place, "list") - expect_type(consonants$place_2, "list") - expect_type(consonants$manner, "list") - expect_type(consonants$voicing, "list") -}) - -test_that("consonants column values are right", { - consonants_vct <- c( - "m", "F", "n", "n`", "J", "N", "N\\", "p", "b", "p_d", - "b_d", "t", "d", "t`", "d`", "c", "J\\", "k", "g", "q", - "G\\", ">\\", "?", "p\\", "B", "f", "v", "T", "D", "s", - "z", "S", "Z", "s`", "z`", "C", "j\\", "x", "G", "X", - "R", "X\\", "?\\", "H\\", "<\\", "h", "h\\", "B_o", - "v\\", "r\\", "r\\`", "j", "M\\", "B\\", "r", "R\\", "4", - "r`", "K", "K\\", "l", "l`", "L", "L\\", "l\\" - ) - - place_vct <- c("Labial", "Coronal", "Dorsal", "Laryngeal") - - place_2_vct <- c( - "Nasal", "Plosive", "Fricative", - "Approximant", "Trill", "Flap", - "Lateral Fricative", "Lateral Approximant", "Lateral Flap" - ) - - manner_vct <- c( - "Bilabial", "Labio-dental", "Dental", "Alveolar", - "Post-alveolar", "Retroflex", "Palatal", "Velar", - "Uvular", "Epiglotal", "Glotal", "Pharyngeal" - ) - - voicing_vct <- c("Voiced", "Voiceless", NA) - - expect_in(unique(unlist(consonants$xsampa)), consonants_vct) - expect_in(unique(unlist(consonants$place)), place_vct) - expect_in(unique(unlist(consonants$place_2)), place_2_vct) - expect_in(unique(unlist(consonants$manner)), manner_vct) - expect_in(unique(unlist(consonants$voicing)),voicing_vct) -}) - -test_that("consonants are not duplicated", { - expect_false(any(duplicated(consonants$xsampa))) -}) diff --git a/tests/testthat/test-norms.R b/tests/testthat/test-norms.R index eb9d00b..b9aae11 100644 --- a/tests/testthat/test-norms.R +++ b/tests/testthat/test-norms.R @@ -38,12 +38,13 @@ test_that("test item argument with single item", { ".prop" ) ) - expect_equal(unique(norms$item), "cat_gat") + expect_setequal(unique(norms$item), "cat_gat") }) test_that("test item argument with multiple items", { - norms <- - bvq_norms(participants, responses, item = c("cat_gat", "spa_perro")) + norms <- bvq_norms(participants, + responses, + item = c("cat_gat", "spa_perro")) expect_equal( colnames(norms), c( @@ -58,14 +59,17 @@ test_that("test item argument with multiple items", { ".prop" ) ) - expect_equal(unique(norms$item), c("cat_gat", "spa_perro")) + expect_setequal(unique(norms$item), c("cat_gat", "spa_perro")) }) test_that("test te = TRUE argument with single item", { norms <- - bvq_norms(participants, responses, item = "cat_gat", te = TRUE) + bvq_norms(participants, + responses, + item = "cat_gat", + te = TRUE) - expect_equal( + expect_setequal( colnames(norms), c( "te", @@ -79,7 +83,7 @@ test_that("test te = TRUE argument with single item", { ".prop" ) ) - expect_equal(unique(norms$item), c("cat_gat", "spa_gato")) + expect_in(unique(norms$item), c("cat_gat", "spa_gato")) }) test_that("test te = TRUE argument with multiple items", { @@ -101,14 +105,16 @@ test_that("test te = TRUE argument with multiple items", { ".prop" ) ) - expect_equal(unique(norms$item), - c("cat_gat", "spa_gato", - "cat_gos", "spa_perro")) + expect_setequal(unique(norms$item), + c("cat_gat", "spa_gato", + "cat_gos", "spa_perro")) }) test_that("test te = FALSE argument", { - norms <- - bvq_norms(participants, responses, item = "cat_gat", te = FALSE) + norms <- bvq_norms(participants, + responses, + item = "cat_gat", + te = FALSE) expect_equal( colnames(norms), c( @@ -123,14 +129,16 @@ test_that("test te = FALSE argument", { ".prop" ) ) - expect_equal(unique(norms$item), c("cat_gat")) + expect_setequal(unique(norms$item), c("cat_gat")) }) test_that("test te = 175 argument", { - norms <- - bvq_norms(participants, responses, item = "cat_cuc", te = 175) + norms <- bvq_norms(participants, + responses, + item = "cat_cuc", + te = 175) - expect_equal( + expect_setequal( colnames(norms), c( "te", @@ -144,7 +152,7 @@ test_that("test te = 175 argument", { ".prop" ) ) - expect_equal(unique(norms$item), c("cat_cuc", "spa_gusano")) + expect_setequal(unique(norms$item), c("cat_cuc", "spa_gusano")) }) test_that("test te = 9999 throws an error", { @@ -157,33 +165,29 @@ test_that("test item = 'XXXXXX' throws an error", { test_that("test if items not in `te` are excluded with a warning", { suppressMessages({ - expect_message(bvq_norms( - participants, - responses, - te = 175, - item = c("spa_gusano", "cat_gat") - )) + expect_message( + bvq_norms(participants, + responses, + te = 175, + item = c("spa_gusano", "cat_gat")) + ) - norms <- bvq_norms( - participants, - responses, - te = 175, - item = c("spa_gusano", "cat_gat") - ) + norms <- bvq_norms(participants, + responses, + te = 175, + item = c("spa_gusano", "cat_gat")) - expect_equal(unique(norms$item), c("cat_cuc", "spa_gusano")) + expect_setequal(unique(norms$item), c("cat_cuc", "spa_gusano")) }) }) test_that("test that the ... argument works", { - norms <- bvq_norms( - participants, - responses, - lp, - semantic_category, - item = "cat_gat", - age = c(10, 12) - ) + norms <- bvq_norms(participants, + responses, + lp, + semantic_category, + item = "cat_gat", + age = c(10, 12)) expect_in(c("lp", "semantic_category"), colnames(norms)) }) diff --git a/tests/testthat/test-phonology.R b/tests/testthat/test-phonology.R deleted file mode 100644 index b6a608c..0000000 --- a/tests/testthat/test-phonology.R +++ /dev/null @@ -1,53 +0,0 @@ -test_that("flatten_xsampa works", { - vct <- pool$xsampa[1:10] - vct_flat <- flatten_xsampa(vct) - vct_corr <- c( - "p@siGOL@s", "@B4@sa", "uB4i", "@k@Ba", "L@nsa", "@p@Ga", - "@p4End4@", "@zg@r@pa", "@ZuDa", "b@La" - ) - expect_length(vct_flat, length(vct)) - expect_type(vct_corr, "character") - expect_equal(vct_flat, vct_corr) -}) - -test_that("syllabify_xsampa works", { - vct <- pool$xsampa[1:10] - syll <- syllabify_xsampa(vct) - corr <- list( - c("p@", "si", "GO", "L@s"), - c("@", "B4@", "sa"), - c("u", "B4i"), - c("@", "k@", "Ba"), - c("L@n", "sa"), - c("@", "p@", "Ga"), - c("@", "p4En", "d4@"), - c("@z", "g@", "r@", "pa"), - c("@", "Zu", "Da"), - c("b@", "La") - ) - expect_length(syll, length(vct)) - expect_type(syll, "list") - expect_type(syll[[1]], "character") - expect_equal(syll, corr) -}) - -test_that("syllable_str_xsampa works", { - vct <- pool$xsampa[1:10] - syll <- syllable_str_xsampa(vct) - corr <- list( - c("CV", "CV", "CV", "CVC"), - c("V", "CCV", "CV"), - c("V", "CCV"), - c("V", "CV", "CV"), - c("CVC", "CV"), - c("V", "CV", "CV"), - c("V", "CCVC", "CCV"), - c("VC", "CV", "CV", "CV"), - c("V", "CV", "CV"), - c("CV", "CV") - ) - expect_length(syll, length(vct)) - expect_type(syll, "list") - expect_type(syll[[1]], "character") - expect_equal(syll, corr) -}) diff --git a/tests/testthat/test-vowels.R b/tests/testthat/test-vowels.R deleted file mode 100644 index 8cb9877..0000000 --- a/tests/testthat/test-vowels.R +++ /dev/null @@ -1,32 +0,0 @@ -vowels <- readRDS(system.file("fixtures/vowels.rds", - package = "bvq")) - -test_that("vowels columns are the right classes", { - expect_type(vowels$xsampa, "character") - expect_type(vowels$openness, "list") - expect_type(vowels$frontness, "list") -}) - -test_that("vowels column values are right", { - vowels_vct <- c( - "i", "y", "1", "}", "M", "u", "I", "Y", "I\\", "U\\", - "U", "e", "2", "\\@", "8", "7", "o", "e_o", "2_o", "@", - "o_o", "E", "9", "3", "3\\", "V", "O", "\\{", "6", "a", - "&", "a_\"", "A", "Q" - ) - - openness_vct <- c( - "Close", "Near close", "Close-mid", "Mid", "Open-mid", - "Near-open", "Open" - ) - - frontness_vct <- c("Front", "Central", "Back") - - expect_true(all(unique(unlist(vowels$xsampa)) %in% vowels_vct)) - expect_true(all(unique(unlist(vowels$openness)) %in% openness_vct)) - expect_true(all(unique(unlist(vowels$frontness)) %in% frontness_vct)) -}) - -test_that("vowels are not duplicated", { - expect_false(any(duplicated(vowels$xsampa))) -})