Merge pull request #6 from gongcastro/test

Test
gongcastro · Apr 23, 2023 · 351450a · 351450a
2 parents 566cf8f + 5b58011
commit 351450a
Show file tree

Hide file tree

Showing 40 changed files with 454 additions and 643 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,48 +1,3 @@
-<<<<<<< HEAD
-# History files
-.Rhistory
-.Rapp.history
-
-# Session Data files
-.RData
-
-# User-specific files
-.Ruserdata
-
-# Example code in package build process
-*-Ex.R
-
-# Output files from R CMD build
-/*.tar.gz
-
-# Output files from R CMD check
-/*.Rcheck/
-
-# RStudio files
-.Rproj.user/
-
-# produced vignettes
-vignettes/*.html
-vignettes/*.pdf
-
-# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
-.httr-oauth
-
-# knitr and R markdown default cache directories
-*_cache/
-/cache/
-
-# Temporary files created by R markdown
-*.utf8.md
-*.knit.md
-
-# R Environment Variables
-.Renviron
-docs
-
-# datasets
-inst/*.rds
-=======
 # History files
 .Rhistory
 .Rapp.history
@@ -86,4 +41,6 @@ docs
 
 # executables
 .exe
->>>>>>> dplyr-1.0.0
+
+# datasets
+inst/*.rds
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,49 +1,48 @@
-Package: bvq
-Title: Barcelona Vocabulary Questionnaire Database and Helper Functions
-Version: 0.1.0
-Authors@R: 
-    person("Gonzalo", "Garcia-Castro", , "[email protected]", role = "cre",
-           comment = c(ORCID = "0000-0002-8553-4209"))
-Maintainer: Gonzalo Garcia-Castro <[email protected]>
-Description: Download, clean, and process the Barcelona Vocabulary
-    Questionnaire data. BVQ is a vocabulary inventory developed for
-    assesing the vocabulary of Catalan-Spanish bilinguals infants from the
-    Metropolitan Area of Barcelona (Spain), aged 10 to 40 months. This
-    package includes functions to download the data from formr servers,
-    and return the processed data in multiple formats.
-License: MIT + file LICENSE
-URL: https://gongcastro.github.io/bvq/,
-    https://github.com/gongcastro/bvq/
-BugReports: https://github.com/gongcastro/bvq/issues
-Depends: 
-    R (>= 3.5.0),
-Imports: 
-    cli,
-    dplyr (>= 1.0.0),
-    formr (>= 0.9.1),
-    googlesheets4,
-    ipa,
-    janitor,
-    lifecycle,
-    lubridate,
-    magrittr,
-    rlang (>= 0.4.11),
-    stringr,
-    tibble,
-    tidyr
-Suggests: 
-    knitr,
-    readxl,
-    roxygen2,
-    testthat (>= 3.0.0),
-    withr
-VignetteBuilder: 
-    knitr
-Remotes: 
-    github::rubenarslan/formr
-Config/testthat/edition: 3
-Encoding: UTF-8
-Language: en-US
-LazyData: true
-Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+Package: bvq
+Title: Barcelona Vocabulary Questionnaire Database and Helper Functions
+Version: 0.2.0
+Authors@R: 
+    person("Gonzalo", "Garcia-Castro", , "[email protected]", role = "cre",
+           comment = c(ORCID = "0000-0002-8553-4209"))
+Maintainer: Gonzalo Garcia-Castro <[email protected]>
+Description: Download, clean, and process the Barcelona Vocabulary
+    Questionnaire (BVQ) data. BVQ is a vocabulary inventory developed for
+    assesing the vocabulary of Catalan-Spanish bilinguals infants from the
+    Metropolitan Area of Barcelona (Spain), aged 10 to 40 months. This
+    package includes functions to download the data from formr servers,
+    and return the processed data in multiple formats.
+License: MIT + file LICENSE
+URL: https://gongcastro.github.io/bvq/,
+    https://github.com/gongcastro/bvq/
+BugReports: https://github.com/gongcastro/bvq/issues
+Depends: 
+    R (>= 3.5.0),
+Imports: 
+    cli,
+    dplyr (>= 1.0.0),
+    formr (>= 0.9.1),
+    googlesheets4,
+    janitor,
+    lifecycle,
+    lubridate,
+    magrittr,
+    rlang,
+    stringr,
+    tibble,
+    tidyr
+Suggests: 
+    knitr,
+    readxl,
+    roxygen2,
+    testthat (>= 3.0.0),
+    withr
+VignetteBuilder: 
+    knitr
+Remotes: 
+    github::rubenarslan/formr
+Config/testthat/edition: 3
+Encoding: UTF-8
+Language: en-US
+LazyData: true
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.2.3
diff --git a/LICENSE b/LICENSE
@@ -1,2 +1,2 @@
 YEAR: 2023
-COPYRIGHT HOLDER: bvqdev authors
+COPYRIGHT HOLDER: bvq authors
diff --git a/NAMESPACE b/NAMESPACE
@@ -11,20 +11,15 @@ export(bvq_norms)
 export(bvq_participants)
 export(bvq_responses)
 export(bvq_vocabulary)
-export(check_xsampa)
 export(diff_in_time)
 export(enquo)
 export(enquos)
-export(flatten_ipa)
 export(flatten_xsampa)
 export(get_longitudinal)
 export(prop_adj)
-export(syllabify_ipa)
 export(syllabify_xsampa)
-export(syllable_str_ipa)
 export(syllable_str_xsampa)
 import(dplyr)
-import(ipa)
 import(rlang)
 importFrom(cli,cli_abort)
 importFrom(cli,cli_alert_success)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,14 @@
-# bvqdev 0.0.0.9000
-
-* Added a `NEWS.md` file to track changes to the package.
+# bvq 0.2.0
+
+* Package name changes from {bvqdev} to {bvq}
+* New phonology functions
+* Remove {ipa} dependency
+* Tests for the `vowels` and `consonants` datasets
+* Now X-SAMPA is used exclusively instead of IPA
+* `bvq_vocabulary()` has been refactored (major speed improvement), now column names are shorter, and only "proportion" is set as default `.scale` argument (`scale` in previous versions)
+* Removed unused argument `runs` in `import_*()` functions
+* Some adjustments for integration in the [r-universe](https://gongcastro.r-universe.dev/bvq)
+
+# bvqdev 0.0.0.9000
+
+* Added a `NEWS.md` file to track changes to the package.
diff --git a/R/bvq-package.R b/R/bvq-package.R
@@ -1,9 +1,9 @@
-#' @keywords internal
-"_PACKAGE"
-
-## usethis namespace: start
-#' @import rlang
-#' @importFrom lifecycle deprecated
-#' @importFrom tibble tibble
-## usethis namespace: end
-NULL
+#' @keywords internal
+"_PACKAGE"
+
+## usethis namespace: start
+#' @importFrom lifecycle deprecated
+#' @importFrom tibble tibble
+#' @import rlang
+## usethis namespace: end
+NULL
diff --git a/R/consonants.R b/R/consonants.R
@@ -1,10 +1,9 @@
-#' Inventory and classification of consonants in X-SAMPA and IPA formats.
+#' Inventory and classification of consonants in X-SAMPA format.
 #'
-#' A dataset containing most consonant phonemes identified by the [International Phonetic Association](https://en.wikipedia.org/wiki/International_Phonetic_Association) (IPA). Phonemes are classified across three dimensions: place of articulation, manner of articulation, and voicing. Each phoneme is assigned a symbol in X-SAMPA and IPA format.
+#' A dataset containing most consonant phonemes identified by the [International Phonetic Association](https://en.wikipedia.org/wiki/International_Phonetic_Association) (IPA). Phonemes are classified across three dimensions: place of articulation, manner of articulation, and voicing. Each phoneme is assigned a symbol in X-SAMPA format.
 #' @source https://en.wikipedia.org/wiki/X-SAMPA
 #' @format A data frame with 65 rows and 5 variables:
 #' * xsampa: phoneme symbol in [X-SAMPA](https://en.wikipedia.org/wiki/X-SAMPA) format
-#' * ipa: phoneme symbol in [IPA](https://en.wikipedia.org/wiki/International_Phonetic_Alphabet) format
 #' * place: place of articulation (broad classification): `"Coronal"`, `"Dorsal"`, `"Labial"`, or `"Pharyngeal"`. "Location along the vocal tract where its production occurs.
 #' * place_2: place of articulation (fine classification): `"Nasal"`, `"Plosive"`, `"Fricative"`, `"Approximant"`, `"Trill"`, `"Flap"`, `"Lateral Fricative"`, `"Lateral Approximant"`, or `"Lateral Flat"`
 #' * manner: manner of articulation: `"Bilabial"`, `"Labio-dental"`, `"Dental"`, `"Alveolar"`, `"Post-alveolar"`, `"Retroflex"`, `"Palatal"`, `"Velar"`, `"Uvular"`, `"Epiglotal"`, `"Glotal"`. Configuration and interaction of the articulators (speech organs such as the tongue, lips, and palate) when making a speech sound.

diff --git a/R/globals.R b/R/globals.R
@@ -145,8 +145,6 @@ globalVariables(unique(c(
     # import_pool: 
     "cognate", 
     "include",
-    "ipa", 
-    "ipa_flat", 
     "te",
     # vowels
     "vowels"

diff --git a/R/import.R b/R/import.R
@@ -36,7 +36,7 @@ download_surveys <- function(surveys, ...) {
 #' Process survey contents
 #' 
 #' @importFrom stringr str_trim
-#' @param raw Raw survey data, as generated by [bvq::download_survey()]
+#' @param raw Raw survey data, as generated by [bvq::download_surveys()]
 #' @param participants_tmp Participants dataset, inherited from inside the `import_*()` function environment.
 #' @param survey_name character string indicating the name of the survey being processed (must be `"BL-Lockdown"`, `"BL-Short"`, or `"BL-Long-2"`).
 #' @author Gonzalo Garcia-Castro

diff --git a/R/phonology.R b/R/phonology.R
@@ -1,21 +1,4 @@
-#' Remove punctuation and fix non-ASCII characters from IPA transcriptions
-#'
-#' @details Note that this function will effectively remove information about
-#'   syllabification and stress from the phonological representations.
-#' @export flatten_ipa
-#' @param x A character string with a phonological transcription in IPA format.
-#' @return A character string containing a phonological transcription in IPA format in which punctuation characters
-#'   have been removed.
-#' @author Gonzalo Garcia-Castro
-flatten_ipa <- function(x) {
-    unique_phonemes <- unique(unlist(strsplit(paste(x, collapse = ""), "")))
-    shortlisted_phonemes <- paste0("\\", 
-                                   unique_phonemes[c(3, 6, 37, 39, 44, 50)] ,
-                                   collapse = "|")
-    gsub(shortlisted_phonemes, "", x)
-}
-
-#' Remove punctuation from SAMPA transcriptions
+#' Remove punctuation from X-SAMPA transcriptions
 #'
 #' @details Note that this function will effectively remove information about
 #'   syllabification and stress from the phonological representations.
@@ -26,7 +9,7 @@ flatten_ipa <- function(x) {
 #' @author Gonzalo Garcia-Castro
 #' @md
 flatten_xsampa <- function(x) {
-    str_rm <- c("\\.", "\\\\", ",", "/", "?", "¿", "'", '"', "ˈ")
+    str_rm <- c("\\.", "\\\\", ",", "/", "?", "'", '"')
     str <- gsub(paste0(str_rm, collapse = "|"), "", x)
     str <- gsub("\\{", "\\\\{", str)
     return(str)
@@ -46,20 +29,6 @@ syllabify_xsampa <- function(x, .sep = c("\\.", "\\\"")) {
     return(syll)
 }
 
-#' Syllabify phonological transcriptions in IPA or X-SAMPA formats
-#'
-#' @export syllabify_ipa
-#' @param x A character string with a phonological transcription in IPA format.
-#' @param .sep A vector of character strings indicating the characters that will be used to separate syllables. Takes `"\\."`, `"'"` and `"ˈ"` by default.
-#' @return A vector of characters in which each element is a syllable.
-#' @author Gonzalo Garcia-Castro
-#' @md
-syllabify_ipa <- function(x, .sep = c("\\.", "'", "ˈ")) {
-    syll <- strsplit(x, split = paste0(.sep, collapse = "|"))
-    syll <- lapply(syll, function(x) x[x != ""]) 
-    return(syll)
-}
-
 #' Get syllable structure from X-SAMPA phonological transcription
 #'
 #' @export syllable_str_xsampa
@@ -71,6 +40,7 @@ syllabify_ipa <- function(x, .sep = c("\\.", "'", "ˈ")) {
 syllable_str_xsampa <- function(x, .sep = c("\\.", "\\\""))
 {
     syll <- syllabify_xsampa(x)
+
     syll <- map(syll, function(x) {
         phon <- strsplit(x, split = "")
         map_chr(phon, function(x) {
@@ -81,54 +51,4 @@ syllable_str_xsampa <- function(x, .sep = c("\\.", "\\\""))
     return(syll)
 }
 
-#' Get syllable structure from IPA phonological transcription
-#'
-#' @export syllable_str_ipa
-#' @param x A character string with a phonological transcription in X-SAMPA format.
-#' @param .sep Character separating syllables in the input transcriptions.
-#' @return A vector of characters in which each element is a syllable, in which vowels have been replaced with `"V"` and each consonants has been replaced with `"C"`. 
-#' @author Gonzalo Garcia-Castro
-#' @md
-syllable_str_ipa <- function(x, .sep = c("\\.", "\\\""))
-{
-    syll <- syllabify_ipa(x)
-    syll <- map(syll, function(x) {
-        phon <- strsplit(x, split = "")
-        map_chr(phon, function(x) {
-            type <- ifelse(x %in% vowels$ipa, "V", "C")
-            paste0(unlist(type), collapse = "")
-        })
-    })
-    return(syll)
-}
-
-#' Check that all characters included in X-SAMPA phonological transcriptions are part of the X-SAMPA alphabet
-#'
-#' @export check_xsampa
-#' @importFrom cli cli_abort
-#' @import ipa
-#' @param x A vector of character strings with at least one element that contains
-#'   phonological transcriptions in X-SAMPA format.
-#' @return A logical (invisible) value indicating whether all symbols in `x` are part of the X-SAMPA alphabet.
-#' @author Gonzalo Garcia-Castro
-#' @md
-check_xsampa <- function(x) {
-
-    str <- flatten_xsampa(x)
-    str <- unique(unlist(strsplit(str, "")))
-
-    phonemes_df <- get("phonemes",
-                       envir = asNamespace("ipa"),
-                       inherits = FALSE)
-
-    is_xsampa <- str %in% phonemes_df$xsampa
-    if (!all(is_xsampa)) {
-        which_not <- str[which(!is_xsampa)]
-        cli_abort("Character{?s} {which_not} {?is/are} not a X-SAMPA symbol{?s}")
-    } else {
-        return(invisible(TRUE))
-    }
-
-}
-
 
diff --git a/R/pool.R b/R/pool.R
@@ -10,7 +10,6 @@
 #' * language: language the item belongs to.
 #' * te: index associated to translation equivalents across languages.
 #' * label: item label, as presented to participants in the front-end of the questionnaire, some labels are not unique within or across questionnaires.
-#' * ipa: phonological transcription in IPA format, extracted from [Wiktionary](https://www.wiktionary.org/) or manually coded if not available.
 #' * xsampa: phonological transcription in X-SAMPA format, transcribed from `ipa` using the [ipa::ipa] function.
 #' * n_lemmas: an integer indicating the number of different lemmas showed in the item label to participants. for instance, the Spanish item `"spa_hierba"` was shown to in the questionnaire as `"hierba / césped"`. Lemma with similar roots were considered as one, such as the Spanish item `"spa_tonto"`, presented as `"tonto / tonta"` in the questionnaire.
 #' * is_multiword: an logical indicating  whether the item included a multi-word phrase as presented in the questionnaire. For instance the Spanish item `"spa_cepillodientes"` was shown as `"cepillo de dientes"` in the questionnaire, which includes three words.