From 86219f962442313529ff0bf5b99efcdd50953996 Mon Sep 17 00:00:00 2001 From: Lisa Steinmann <37840007+lsteinmann@users.noreply.github.com> Date: Wed, 15 Nov 2023 14:17:52 +0100 Subject: [PATCH] documentation and gp fixes --- DESCRIPTION | 27 +++++++++++---------------- NEWS.md | 2 +- R/data.R | 3 +++ R/datplot_utility.R | 12 +++++------- R/datsteps.R | 20 +++++++++++++------- R/get_histogramscale.R | 8 ++++---- R/scaleweight.R | 8 +++++++- README.md | 14 +++++--------- data-raw/Inscr_Bithynia.R | 3 +-- inst/literatur.bib | 8 +++++--- man/datplot-package.Rd | 2 +- man/datsteps.Rd | 7 ++----- man/get.histogramscale.Rd | 5 ++--- man/get.step.sequence.Rd | 2 -- man/scaleweight.Rd | 6 ++++++ tests/create_testing_df.R | 2 +- tests/testthat/test-datplot_utility.R | 4 ++-- tests/testthat/test-datsteps.R | 9 +++++++++ vignettes/data_preparation.Rmd | 8 ++++---- vignettes/how-to.Rmd | 4 ++-- 20 files changed, 84 insertions(+), 70 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 079c809..fba009e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,23 +1,18 @@ Type: Package Package: datplot -Title: Preparation of Object Dating Ranges for Density Plots - (Aoristic Analysis) +Title: Preparation of Object Dating Ranges for Density Plots (Aoristic + Analysis) Version: 1.1.0 -Authors@R: - c(person(given = "Lisa", - family = "Steinmann", - role = c("aut", "cre"), - email = "lisa.steinmann@rub.de", - comment = c(ORCID = "0000-0002-2215-1243")), - person(given = "Barbora", - family = "Weissova", - role = "ctb", - email = "barbora.weissova@rub.de", - comment = c(ORCID = "0000-0002-3297-6855"))) +Authors@R: c( + person("Lisa", "Steinmann", , "lisa.steinmann@rub.de", role = c("aut", "cre", "cph"), + comment = c(ORCID = "0000-0002-2215-1243")), + person("Barbora", "Weissova", , "barbora.weissova@rub.de", role = "ctb", + comment = c(ORCID = "0000-0002-3297-6855")) + ) Maintainer: Lisa Steinmann -Description: Converting date ranges into dating 'steps' eases - the visualization of changes in e.g. pottery consumption, style and - other variables over time. This package provides tools to process and +Description: Converting date ranges into dating 'steps' eases the + visualization of changes in e.g. pottery consumption, style and other + variables over time. This package provides tools to process and prepare data for visualization and employs the concept of aoristic analysis. License: GPL (>= 3) diff --git a/NEWS.md b/NEWS.md index 282f91c..4469976 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,7 +7,7 @@ argument `calc = "weight"` or `calc = "probability"` probability calculation instead of the original (weights) calculation. * Change and improve error-handling of `scaleweight()`. * Remove UTF-8 characters to comply with CRAN. -* Update documentation and add a pkgdown-site. +* Update documentation and add a [pkgdown-site](https://lsteinmann.github.io/datplot/). # datplot 1.0.1 diff --git a/R/data.R b/R/data.R index 866024c..ab7ac89 100644 --- a/R/data.R +++ b/R/data.R @@ -79,4 +79,7 @@ NULL #' Quantitative Analysis.” Dissertation, Berlin: Freie Universität Berlin. #' \url{https://refubium.fu-berlin.de/handle/fub188/23730}, #' partially after \url{https://inscriptions.packhum.org/} + "Inscr_Bithynia" +NULL + diff --git a/R/datplot_utility.R b/R/datplot_utility.R index 5077ea5..197e024 100644 --- a/R/datplot_utility.R +++ b/R/datplot_utility.R @@ -148,7 +148,6 @@ get.probability <- function(DAT_min, DAT_max) { #' @export get.step.sequence #' #' @examples -#' \dontrun{ #' min_year <- -494 #' max_year <- -334 #' sequence <- get.step.sequence(datmin = min_year, datmax = max_year, stepsize = 25) @@ -158,7 +157,6 @@ get.probability <- function(DAT_min, DAT_max) { #' max_year <- 100 #' sequence <- get.step.sequence(datmin = min_year, datmax = max_year, stepsize = 25) #' print(sequence) -#' } get.step.sequence <- function(datmin = 0, datmax = 100, stepsize = 25) { stopifnot(is.numeric(datmin)) @@ -245,10 +243,10 @@ create.sub.objects <- function(DAT_list, if (any(diffs < stepsize)) { - warning(paste("stepsize is larger than the range of the closest dated object at Index = ", - paste(which(diffs < stepsize), collapse = ", "), "). ", - "For information see documentation of get.step.sequence().", - sep = "")) + warning(paste0("stepsize is larger than the range of the ", + "closest dated object at Index = ", + paste(which(diffs < stepsize), collapse = ", "), "). ", + "For information see documentation of get.step.sequence().")) } DAT_list <- lapply(DAT_list, function(object) { @@ -332,7 +330,7 @@ check.structure <- function(DAT_df) { } if (any(dat_df_structure[c("is.minDAT", "is.maxDAT")] == FALSE)) { result <- FALSE - stop("The 3rd or 4th columns of your data.frame are not numbers.") + stop("The 3rd and 4th columns of your data.frame have to be numeric.") } else { result <- TRUE } diff --git a/R/datsteps.R b/R/datsteps.R index 83e742c..6a687b9 100644 --- a/R/datsteps.R +++ b/R/datsteps.R @@ -40,15 +40,13 @@ #' Added columns contain the value of each step, the 'weight' or 'probability'- #' value for each step, and (if chosen) the cumulative probability. #' +#' @export datsteps +#' #' @examples -#' \dontrun{ -#' data(DAT_df) +#' data("Inscr_Bithynia") +#' DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")] #' DAT_df_steps <- datsteps(DAT_df, stepsize = 25) #' plot(density(DAT_df_steps$DAT_step)) -#' } -#' -#' -#' @export datsteps datsteps <- function(DAT_df, stepsize = 1, calc = "weight", @@ -76,6 +74,14 @@ datsteps <- function(DAT_df, weight = message("Using 'weight'-calculation (see https://doi.org/10.1017/aap.2021.8)."), probability = message("Using step-wise probability calculation.")) + if (any(is.na(DAT_df))) { + NA_rows <- c(which(is.na(DAT_df[, 3])), + which(is.na(DAT_df[, 4]))) + NA_rows <- unique(NA_rows) + DAT_df <- DAT_df[-NA_rows, ] + warning(paste0(length(NA_rows), " rows with NA-values in the ", + "dating columns will be omitted.")) + } DAT_df <- as.data.frame(DAT_df) # Checking the overall structure @@ -88,7 +94,7 @@ datsteps <- function(DAT_df, # Prepare the Matrix to be used instead of the df for faster processing DAT_mat <- matrix(ncol = 5, nrow = nrow(DAT_df)) - DAT_mat[, 1] <- 1:nrow(DAT_df) + DAT_mat[, 1] <- seq_len(nrow(DAT_df)) DAT_mat[, 2] <- DAT_df[, 3] DAT_mat[, 3] <- DAT_df[, 4] diff --git a/R/get_histogramscale.R b/R/get_histogramscale.R index e524c31..115e368 100644 --- a/R/get_histogramscale.R +++ b/R/get_histogramscale.R @@ -14,16 +14,16 @@ #' @return the value with which to scale the density curve to a histogram #' plot so that both will be visible #' +#' @export get.histogramscale +#' #' @examples -#' \dontrun{ +#' data("Inscr_Bithynia") +#' DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")] #' DAT_df_steps <- datsteps(DAT_df, stepsize = 25) #' get.histogramscale(DAT_df_steps) #' #' get.histogramscale(DAT_df_steps$DAT_step, binwidth = 20) #' get.histogramscale(500, binwidth = 20) -#' } -#' -#' @export get.histogramscale get.histogramscale <- function(DAT_df_steps, binwidth = "stepsize") { if (check.number(DAT_df_steps) & length(DAT_df_steps) == 1) { nrow <- DAT_df_steps diff --git a/R/scaleweight.R b/R/scaleweight.R index 29f953c..f9cf737 100644 --- a/R/scaleweight.R +++ b/R/scaleweight.R @@ -11,6 +11,12 @@ #' @return the same data.frame, with the scaled values in the specified column #' #' @export scaleweight +#' +#' @examples +#' data("Inscr_Bithynia") +#' DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")] +#' DAT_df_steps <- datsteps(DAT_df, stepsize = 25) +#' DAT_df_scaled <- scaleweight(DAT_df_steps, var = 2, val = 5) scaleweight <- function(DAT_df, var = c("all", 2), val = 5) { @@ -46,7 +52,7 @@ scaleweight <- function(DAT_df, var = c("all", 2), val = 5) { "(scaled to sum of all objects)") } else { uvar <- unique(DAT_df[, var]) - for (row in 1:length(uvar)) { + for (row in seq_len(length(uvar))) { index <- which(DAT_df[, var] == uvar[row]) DAT_df[index, val] <- DAT_df[index, val] / sum(DAT_df[index, val]) } diff --git a/README.md b/README.md index ee32f66..a7668b2 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,8 @@ -[![CRAN status](https://www.r-pkg.org/badges/version/datplot)](https://CRAN.R-project.org/package=datplot) -[![R-CMD-check](https://github.com/lsteinmann/datplot/workflows/R-CMD-check/badge.svg)](https://github.com/lsteinmann/datplot/actions) -[![codecov](https://app.codecov.io/gh/lsteinmann/datplot/branch/main/graph/badge.svg?token=CVNCAL9U4W)](https://app.codecov.io/gh/lsteinmann/datplot) - -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4285912.svg)](https://doi.org/10.5281/zenodo.4285912) +[![R-CMD-check](https://github.com/lsteinmann/datplot/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/lsteinmann/datplot/actions/workflows/R-CMD-check.yaml) +[![codecov](https://codecov.io/gh/lsteinmann/datplot/branch/main/graph/badge.svg)](https://app.codecov.io/gh/lsteinmann/datplot) [![DOI](https://img.shields.io/badge/Publication-10.1017/aap.2021.8-green.svg)](https://doi.org/10.1017/aap.2021.8) +[![CRAN status](https://www.r-pkg.org/badges/version/datplot)](https://CRAN.R-project.org/package=datplot) @@ -19,9 +17,7 @@ This package proposes implements the concepts of aoristic analysis to prepare ar browseVignettes("datplot") -after installing the package, or on GitHub in the /vignettes/ directory. Density plots are easy to understand and are usually aesthetically pleasing. They do omit a some information, such as individual counts, that bar histograms can communicate better. On the other hand, ranges can be incorporated into the visualization as well to regard the variety of timespans archaeological objects may be dated to. - -**Note:** Please note that the weight calculation has changed with version 1.0.1 to reflect true probabilities for each object when a stepsize of 1 is used. The change does not affect the visualization, but makes the weight-values usable as dating probability for steps of 1 year exactly. +or [on the pkgdown-site](https://lsteinmann.github.io/datplot/articles/how-to.html) after installing the package, or on GitHub in the /vignettes/ directory. Density plots are easy to understand and are usually aesthetically pleasing. They do omit a some information, such as individual counts, that bar histograms can communicate better. On the other hand, ranges can be incorporated into the visualization as well to regard the variety of timespans archaeological objects may be dated to. ![Attic Pottery from BAPD by Date](man/figures/demo_readme.png "Attic Pottery from BAPD by Date") @@ -31,7 +27,7 @@ The package at version 1.0.0 has been published along with a case study on inscr Recommendation ------- -People interested in employing this method should also consider taking a look at [ISAAKiel's package aoristAAR](https://github.com/ISAAKiel/aoristAAR/), or at [archSeries](https://github.com/davidcorton/archSeries), [tabula](https://github.com/tesselle/tabula), [rtefact](https://github.com/ahb108/rtfact) and [aoristic-analysis (LimesLimits)](https://github.com/LimesLimits/aoristic-analysis). +People interested in employing this method should also consider taking a look at [ISAAKiel's package aoristAAR](https://github.com/ISAAKiel/aoristAAR/), or at [archSeries](https://github.com/davidcorton/archSeries), [tabula](https://github.com/tesselle/tabula), [rtefact](https://github.com/ahb108/rtfact), [aoristic-analysis (LimesLimits)](https://github.com/LimesLimits/aoristic-analysis) and (in the future) [baorista](https://github.com/ercrema/baorista). Installation diff --git a/data-raw/Inscr_Bithynia.R b/data-raw/Inscr_Bithynia.R index 744e313..bb3789a 100644 --- a/data-raw/Inscr_Bithynia.R +++ b/data-raw/Inscr_Bithynia.R @@ -29,8 +29,7 @@ inscriptions$URL[repl] <- paste("https://epigraphy.packhum.org/text/", gsub("PH", "", inscriptions$ikey[repl]), sep = "") - -inscriptions$ID <- paste("I_", 1:nrow(inscriptions), sep = "") +inscriptions$ID <- paste("I_", seq_len(nrow(inscriptions)), sep = "") inscriptions <- inscriptions %>% rename(Dating = Chronological.Frame) %>% mutate(Language = replace(Language, Language == "Gr/Lat", "Greek/Latin"), diff --git a/inst/literatur.bib b/inst/literatur.bib index 552b200..d7ab2c3 100644 --- a/inst/literatur.bib +++ b/inst/literatur.bib @@ -10,10 +10,12 @@ @Www{BAPD @article{datplotarticle, title = {datplot: {{A}} new r-package for the visualization of date ranges in archaeology}, author = {Weissova, Barbora and Steinmann, Lisa}, - date = {forthcoming}, + date = {2021}, journaltitle = {Advances in Archaeological Practice}, - volume = {tba}, - pages = {tba}, + volume = {9}, + number = {7}, + pages = {288-298}, + doi = {10.1017/aap.2021.8} } diff --git a/man/datplot-package.Rd b/man/datplot-package.Rd index dadbff6..74724ec 100644 --- a/man/datplot-package.Rd +++ b/man/datplot-package.Rd @@ -18,7 +18,7 @@ Useful links: } \author{ -\strong{Maintainer}: Lisa Steinmann \email{lisa.steinmann@rub.de} (\href{https://orcid.org/0000-0002-2215-1243}{ORCID}) +\strong{Maintainer}: Lisa Steinmann \email{lisa.steinmann@rub.de} (\href{https://orcid.org/0000-0002-2215-1243}{ORCID}) [copyright holder] Other contributors: \itemize{ diff --git a/man/datsteps.Rd b/man/datsteps.Rd index 3e3119a..5a15ff3 100644 --- a/man/datsteps.Rd +++ b/man/datsteps.Rd @@ -51,11 +51,8 @@ The function along with a guide on how to use it and a case study is published in [Steinmann -- Weissova 2021](https://doi.org/10.1017/aap.2021.8). } \examples{ -\dontrun{ -data(DAT_df) +data("Inscr_Bithynia") +DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")] DAT_df_steps <- datsteps(DAT_df, stepsize = 25) plot(density(DAT_df_steps$DAT_step)) } - - -} diff --git a/man/get.histogramscale.Rd b/man/get.histogramscale.Rd index 5800ae6..87ce1a6 100644 --- a/man/get.histogramscale.Rd +++ b/man/get.histogramscale.Rd @@ -25,12 +25,11 @@ DAT_df_steps. Calculates the value with which the y-axis of a density graph should be multiplied by in order to be visible in the corresponding histogram. } \examples{ -\dontrun{ +data("Inscr_Bithynia") +DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")] DAT_df_steps <- datsteps(DAT_df, stepsize = 25) get.histogramscale(DAT_df_steps) get.histogramscale(DAT_df_steps$DAT_step, binwidth = 20) get.histogramscale(500, binwidth = 20) } - -} diff --git a/man/get.step.sequence.Rd b/man/get.step.sequence.Rd index f4d9681..e3dea04 100644 --- a/man/get.step.sequence.Rd +++ b/man/get.step.sequence.Rd @@ -29,7 +29,6 @@ If there is a residual, the stepsize is modified depending on how large the residual is. } \examples{ -\dontrun{ min_year <- -494 max_year <- -334 sequence <- get.step.sequence(datmin = min_year, datmax = max_year, stepsize = 25) @@ -40,7 +39,6 @@ max_year <- 100 sequence <- get.step.sequence(datmin = min_year, datmax = max_year, stepsize = 25) print(sequence) } -} \seealso{ [datsteps()], [create.sub.objects()] } diff --git a/man/scaleweight.Rd b/man/scaleweight.Rd index d8ca3de..018a2d7 100644 --- a/man/scaleweight.Rd +++ b/man/scaleweight.Rd @@ -21,3 +21,9 @@ the same data.frame, with the scaled values in the specified column \description{ Requires a data.frame with one variable and one value column. } +\examples{ +data("Inscr_Bithynia") +DAT_df <- Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")] +DAT_df_steps <- datsteps(DAT_df, stepsize = 25) +DAT_df_scaled <- scaleweight(DAT_df_steps, var = 2, val = 5) +} diff --git a/tests/create_testing_df.R b/tests/create_testing_df.R index d8dd4ea..5d4794d 100644 --- a/tests/create_testing_df.R +++ b/tests/create_testing_df.R @@ -34,6 +34,6 @@ create.testing.df <- function(k = 100, distmean = 150, distsd = 25) { rm(test_df_two) test_df <- test_df[sample(nrow(test_df)), ] - test_df$id <- paste("ID_", 1:nrow(test_df), sep = "") + test_df$id <- paste("ID_", seq_len(nrow(test_df)), sep = "") return(test_df) } diff --git a/tests/testthat/test-datplot_utility.R b/tests/testthat/test-datplot_utility.R index 107f244..1c758d0 100644 --- a/tests/testthat/test-datplot_utility.R +++ b/tests/testthat/test-datplot_utility.R @@ -31,7 +31,7 @@ test_that("switch.dating issues warning", { testdf <- create.testing.df() fristlast <- matrix(nrow = nrow(testdf), ncol = 2) -for (r in 1:nrow(testdf)) { +for (r in seq_len(nrow(testdf))) { seq <- get.step.sequence(datmin = testdf[r, 3], datmax = testdf[r, 4], stepsize = 25) @@ -89,7 +89,7 @@ testdf[, 4] <- sample(1:200, nrow(testdf)) testdf[1, 3:4] <- c(4, 4) DAT_mat <- matrix(ncol = 5, nrow = nrow(testdf)) -DAT_mat[, 1] <- 1:nrow(testdf) +DAT_mat[, 1] <- seq_len(nrow(testdf)) DAT_mat[, 2] <- testdf[, 3] DAT_mat[, 3] <- testdf[, 4] colnames(DAT_mat) <- c("index", "datmin", "datmax", "weight", "step") diff --git a/tests/testthat/test-datsteps.R b/tests/testthat/test-datsteps.R index 1c05f8a..e4e24a3 100644 --- a/tests/testthat/test-datsteps.R +++ b/tests/testthat/test-datsteps.R @@ -11,6 +11,15 @@ test_that("warning for wrong column types", { }) +data("Inscr_Bithynia") + +test_that("removes NA with warning", { + expect_warning(datsteps(Inscr_Bithynia[, c("ID", "Location", "DAT_min", "DAT_max")], + stepsize = 100), regexp = "NA") +}) + + + data("DAT_df") test_that("warning for problematic value of stepsize", { diff --git a/vignettes/data_preparation.Rmd b/vignettes/data_preparation.Rmd index f09eac3..31218ab 100644 --- a/vignettes/data_preparation.Rmd +++ b/vignettes/data_preparation.Rmd @@ -55,7 +55,7 @@ The original file consists of five columns which each row representing a single The data set is not yet suited for analysis, as some variables, especially the chronological frame, have many inconsistencies. For further processing, we should also be sure to include an identifier-column. As `r nrow(inscriptions) - length(unique(inscriptions$ikey))` inscriptions do not have an ikey-Value, which might have otherwise been a good candidate for identification, we chose to create a new automatically generated ID, so that every inscription can be individually identifiable. ```{r } -inscriptions$ID <- paste("I_", 1:nrow(inscriptions), sep = "") +inscriptions$ID <- paste("I_", seq_len(nrow(inscriptions)), sep = "") ``` Two of the variables of this data set are almost ready for further use, i.e. Location and Language. A look at their unique values reveals only small inconsistencies that can be easily fixed: @@ -154,7 +154,7 @@ As a demonstration, this is the resulting table (`num_dating`) of up to this poi ```{r echo = FALSE} require(knitr) -knitr::kable(na.omit(na.omit(num_dating)[sample(1:nrow(na.omit(num_dating)), +knitr::kable(na.omit(na.omit(num_dating)[sample(seq_len(nrow(na.omit(num_dating))), 10), ])) ``` @@ -214,7 +214,7 @@ for (r in sel) { Another look at the data set can help us to check for possible errors. ```{r echo = FALSE} -knitr::kable(na.omit(na.omit(num_dating)[sample(1:nrow(na.omit(num_dating)), +knitr::kable(na.omit(na.omit(num_dating)[sample(seq_len(nrow(na.omit(num_dating))), 10), ])) ``` @@ -248,7 +248,7 @@ for (r in sel) { ``` ```{r echo = FALSE} -knitr::kable(na.omit(na.omit(num_dating)[sample(1:nrow(na.omit(num_dating)), +knitr::kable(na.omit(na.omit(num_dating)[sample(seq_len(nrow(na.omit(num_dating))), 10), ])) ``` diff --git a/vignettes/how-to.Rmd b/vignettes/how-to.Rmd index 0deb23d..2a8a43c 100644 --- a/vignettes/how-to.Rmd +++ b/vignettes/how-to.Rmd @@ -46,7 +46,7 @@ library(datplot) data(Beazley) ``` ```{r preptable, echo = FALSE} -knitr::kable(Beazley[sample(1:nrow(Beazley), 10, replace = FALSE), ]) +knitr::kable(Beazley[sample(seq_len(nrow(Beazley)), 10, replace = FALSE), ]) ``` ## How to Display a Range? @@ -206,7 +206,7 @@ This now opens the possibility to calculate the cumulative probability for each ```{r cumulative demo, fig.height = 10} data("Inscr_Bithynia") Inscr_Bithynia <- na.omit(Inscr_Bithynia[, c(1, 3, 8, 9)]) -Inscr_Bithynia <- Inscr_Bithynia[sample(1:nrow(Inscr_Bithynia), 5), ] +Inscr_Bithynia <- Inscr_Bithynia[sample(seq_len(nrow(Inscr_Bithynia)), 5), ] Inscr_Bithynia_steps <- datsteps(Inscr_Bithynia, stepsize = 1, calc = "probability",