diff --git a/R/density_area.R b/R/density_area.R index 376379a..86bc367 100644 --- a/R/density_area.R +++ b/R/density_area.R @@ -76,16 +76,16 @@ sf_polygon_safely <- function(...){ isolines_to_df <- function(isolines, probs, nameswap){ isolines |> dplyr::mutate( - line_id = .data$line |> + level_id = .data$line |> as.numeric() |> dplyr::desc() |> as.factor() |> - as.numeric(), - prob = sort(probs)[.data$line_id], + as.integer(), + prob = sort(probs)[.data$level_id], order = dplyr::row_number() ) |> dplyr::select(-"line") |> - dplyr::select("line_id", + dplyr::select("level_id", "id", "prob", "x", @@ -106,7 +106,7 @@ iso_df_to_sf <- function(iso_poly_df, xname, yname){ iso_poly_df |> dplyr::mutate( - polygon_id = paste(.data$line_id, .data$id, sep = "-") + polygon_id = paste(.data$level_id, .data$id, sep = "-") ) -> df_prepared df_prepared |> @@ -128,7 +128,7 @@ iso_df_to_sf <- function(iso_poly_df, xname, yname){ iso_poly_pieces_sf |> dplyr::group_by( - .data$line_id, .data$prob + .data$level_id, .data$prob ) |> dplyr::summarise() -> iso_poly_sf @@ -145,15 +145,15 @@ iso_df_to_sf <- function(iso_poly_df, xname, yname){ #' densities. #' #' @details -#' When using `density_polygons()` together with tidyverse verbs, like -#' [dplyr::summarise()], `as_list` should be `TRUE`. +#' When using `density_polygons()` together with [dplyr::summarise()], `as_list` +#' should be `TRUE`. #' #' #' @param x,y Numeric data dimensions #' @param probs Probabilities to compute density polygons for #' @param as_sf Should the returned values be [sf::sf]? Defaults to `FALSE`. -#' @param as_list Should the returned value be a list? Defaults to `TRUE` to -#' work well with tidyverse list columns +#' @param as_list Should the returned value be a list? Defaults to `FALSE` to +#' work with [dplyr::reframe()] #' @param range_mult A multiplier to the range of `x` and `y` across which the #' probability density will be estimated. #' @param rangex,rangey Custom ranges across `x` and `y` ranges across which the @@ -161,7 +161,32 @@ iso_df_to_sf <- function(iso_poly_df, xname, yname){ #' @param ... Additional arguments to be passed to [ggdensity::get_hdr()] #' #' @returns A list of data frames, if `as_list=TRUE`, or just a data frame, -#' if `as_list=FALSE` +#' if `as_list=FALSE`. +#' +#' ## Data frame output +#' +#' If `as_sf=FALSE`, the data frame has the following columns: +#' \describe{ +#' \item{level_id}{An integer id for each probability level} +#' \item{id}{An integer id for each sub-polygon within a probabilty level} +#' \item{prob}{The probability level (originally passed to `probs`)} +#' \item{x, y}{The values along the original `x` and `y` dimensions defining +#' the density polygon. These will be renamed to the original input variable +#' names.} +#' \item{order}{The original plotting order of the polygon points, for +#' convenience.} +#' } +#' +#' ## sf output +#' If `as_sf=TRUE`, the data frame has the following columns: +#' \describe{ +#' \item{level_id}{An integer id for each probability level} +#' \item{prob}{The probability level (originally passed to `probs`)} +#' \item{geometry}{A column of [sf::st_polygon()]s.} +#' } +#' +#' This output will need to be passed to [sf::st_sf()] to utilize many of the +#' features of [sf]. #' #' @details #' If both `rangex` and `rangey` are defined, `range_mult` will be disregarded. @@ -180,7 +205,7 @@ density_polygons <- function(x, y, probs = 0.5, as_sf = FALSE, - as_list = TRUE, + as_list = FALSE, range_mult = 0.25, rangex = NULL, rangey = NULL, @@ -273,8 +298,8 @@ density_polygons <- function(x, density_area <- function(x, y, probs = 0.5, - as_sf = F, - as_list = T, + as_sf = FALSE, + as_list = FALSE, range_mult = 0.25, rangex = NULL, rangey = NULL, diff --git a/inst/examples/density_area_example.R b/inst/examples/density_area_example.R index 3dc6bc9..0dadc54 100644 --- a/inst/examples/density_area_example.R +++ b/inst/examples/density_area_example.R @@ -12,25 +12,16 @@ y <- rnorm(100) density_area(x, y, - probs = ppoints(50), - as_list = FALSE) -> + probs = ppoints(50)) -> poly_areas_df head(poly_areas_df) # Plotting the relationship between probability level and area ggplot(poly_areas_df, - aes(prob, area))+ + aes(prob, area)) + geom_line() -# Assuming distribution is circular, the radius would be `sqrt(area/pi).` -poly_areas_df |> - mutate( - radius = sqrt(area/pi) - ) |> - ggplot(aes(prob, radius)) + - geom_line() - # Tidyverse usage data(s01) @@ -39,41 +30,34 @@ data(s01) s01 |> mutate(log_F2 = -log(F2), - log_F1 = -log(F1))-> + log_F1 = -log(F1)) -> s01 +### Data frame output + s01 |> group_by(name) |> - summarise( - area_df = density_area(log_F2, - log_F1, - probs = ppoints(10), - as_sf = FALSE, - n = 200), - area_sf = density_area(log_F2, - log_F1, - probs = ppoints(10), - as_sf = TRUE, - n = 200) - ) -> - s01_areas + reframe(density_area(log_F2, + log_F1, + probs = ppoints(10))) -> + s01_areas_df -s01_areas |> - unnest(area_df) |> - ggplot( - aes(prob, area) - )+ - geom_line() +s01_areas_df |> + ggplot(aes(prob, area)) + + geom_line() -# If the shape were an equilateral triangle, each side would be -# equal to `sqrt(area * (4/sqrt(3)))` +### Including sf output -s01_areas |> - unnest( - area_sf - ) |> - st_sf() |> +s01 |> + group_by(name) |> + reframe(density_area(log_F2, + log_F1, + probs = ppoints(10), + as_sf = TRUE)) |> + st_sf() -> + s01_areas_sf + +s01_areas_sf |> arrange(desc(prob)) |> - mutate(side = sqrt(area * (4/sqrt(3)))) |> - ggplot()+ - geom_sf(aes(fill = side)) + ggplot() + + geom_sf(aes(fill = area)) diff --git a/inst/examples/density_polygon_example.R b/inst/examples/density_polygon_example.R index 3cc3309..5e6ad02 100644 --- a/inst/examples/density_polygon_example.R +++ b/inst/examples/density_polygon_example.R @@ -12,26 +12,22 @@ y <- c(rnorm(100)) # ordinary data frame output poly_df <- density_polygons(x, y, - probs = ppoints(5), - as_list = FALSE) + probs = ppoints(5)) head(poly_df) -# It's necessary to specify a grouping factor that combines `line_id` and `id` +# It's necessary to specify a grouping factor that combines `level_id` and `id` # for cases of multimodal density distributions ggplot(poly_df, aes(x, y)) + - geom_path( - aes(group = paste0(line_id, id), - color = prob) - ) + geom_path(aes(group = paste0(level_id, id), + color = prob)) # sf output poly_sf <- density_polygons(x, y, probs = ppoints(5), - as_sf = TRUE, - as_list = FALSE) + as_sf = TRUE) head(poly_sf) @@ -39,7 +35,7 @@ head(poly_sf) poly_sf |> arrange(desc(prob)) |> ggplot() + - geom_sf(aes(fill=prob)) + geom_sf(aes(fill = prob)) # Tidyverse usage @@ -48,120 +44,62 @@ data(s01) # Data transformation s01 <- s01 |> - mutate( - log_F1 = -log(F1), - log_F2 = -log(F2) - ) + mutate(log_F1 = -log(F1), + log_F2 = -log(F2)) -# This data has only one group for `name`, but this is the general -# approach to take for multiple groups +## Basic usage with `dplyr::reframe()` +### Data frame output s01 |> group_by(name) |> - summarise( - poly_df = density_polygons(log_F2, - log_F1, - probs = ppoints(5), - as_sf = FALSE, - n = 200), - poly_sf = density_polygons(log_F2, - log_F1, - probs = ppoints(5), - as_sf = TRUE, - n = 200) - )-> - speaker_polys - -# The list columns will need to be unnested for additional analysis/plotting -speaker_polys - -# plotting the data frame output -speaker_polys |> - unnest(poly_df) |> - ggplot( - aes(log_F2, log_F1) - )+ - geom_path( - aes(group = paste0(line_id, id), - color = prob) - )+ - coord_fixed() - -# plotting the sf output -speaker_polys |> - unnest(poly_sf) |> - st_sf() |> - arrange(desc(prob)) |> - ggplot()+ - geom_sf(aes(fill = prob)) + reframe(density_polygons(log_F2, + log_F1, + probs = ppoints(5))) -> + speaker_poly_df + +speaker_poly_df |> + ggplot(aes(log_F2, log_F1)) + + geom_path(aes(group = paste0(level_id, id), + color = prob)) + + coord_fixed() + +### sf output +s01 |> + group_by(name) |> + reframe(density_polygons(log_F2, + log_F1, + probs = ppoints(5), + as_sf = TRUE)) |> + st_sf() -> + speaker_poly_sf + +speaker_poly_sf |> + ggplot() + + geom_sf(aes(color = prob), + fill = NA) -# Using additional sf capabilities to explore category overlap +## basic usage with dplyr::summarise() +### data frame output -## focusing on two categories s01 |> - filter(plt_vclass %in% c("o", "oh"))-> - s01_lowback - -## raw data -s01_lowback |> - ggplot( - aes(log_F2, - log_F1) - )+ - geom_point( - aes(color = plt_vclass) - )+ - coord_fixed() - -## Getting the sf polygons of the categories at 80% -s01_lowback |> - group_by(plt_vclass) |> - summarise( - poly_sf = density_polygons(log_F2, - log_F1, - probs = 0.8, - as_sf = TRUE) - ) |> - unnest(poly_sf) |> + group_by(name) |> + summarise(poly = density_polygons(log_F2, + log_F1, + probs = ppoints(5), + as_list = TRUE)) |> + unnest(poly) -> + speaker_poly_df + +### sf output + +s01 |> + group_by(name) |> + summarise(poly = density_polygons( + log_F2, + log_F1, + probs = ppoints(5), + as_list = TRUE, + as_sf = TRUE + )) |> + unnest(poly) |> st_sf() -> - lowback_sf - -## The basic polygons -lowback_sf |> - ggplot()+ - geom_sf( - aes(fill = plt_vclass), - alpha = 0.6 - ) - -## `sf::st_intersection()` will generate unique polygons for intersections -lowback_sf |> - st_intersection() |> - # recoding overlapping areas - mutate( - plt_vclass = case_when( - n.overlaps > 1 ~ "o~oh", - .default = plt_vclass - ) - ) -> - lowback_overlap - -## plotting the overlap -lowback_overlap |> - ggplot() + - geom_sf( - aes(fill = plt_vclass) - ) - -## plotting overlaping area proportions -lowback_overlap |> - mutate( - area = st_area(geometry), - pop_area = area/sum(area) - ) |> - ggplot( - aes(plt_vclass, pop_area) - )+ - geom_col( - aes(fill = plt_vclass) - )+ - ylim(0,0.5) + speaker_poly_sf diff --git a/man/density_area.Rd b/man/density_area.Rd index 5f0b427..2331a2a 100644 --- a/man/density_area.Rd +++ b/man/density_area.Rd @@ -8,8 +8,8 @@ density_area( x, y, probs = 0.5, - as_sf = F, - as_list = T, + as_sf = FALSE, + as_list = FALSE, range_mult = 0.25, rangex = NULL, rangey = NULL, @@ -57,25 +57,16 @@ y <- rnorm(100) density_area(x, y, - probs = ppoints(50), - as_list = FALSE) -> + probs = ppoints(50)) -> poly_areas_df head(poly_areas_df) # Plotting the relationship between probability level and area ggplot(poly_areas_df, - aes(prob, area))+ + aes(prob, area)) + geom_line() -# Assuming distribution is circular, the radius would be `sqrt(area/pi).` -poly_areas_df |> - mutate( - radius = sqrt(area/pi) - ) |> - ggplot(aes(prob, radius)) + - geom_line() - # Tidyverse usage data(s01) @@ -84,42 +75,35 @@ data(s01) s01 |> mutate(log_F2 = -log(F2), - log_F1 = -log(F1))-> + log_F1 = -log(F1)) -> s01 +### Data frame output + +s01 |> + group_by(name) |> + reframe(density_area(log_F2, + log_F1, + probs = ppoints(10))) -> + s01_areas_df + +s01_areas_df |> + ggplot(aes(prob, area)) + + geom_line() + +### Including sf output + s01 |> group_by(name) |> - summarise( - area_df = density_area(log_F2, - log_F1, - probs = ppoints(10), - as_sf = FALSE, - n = 200), - area_sf = density_area(log_F2, - log_F1, - probs = ppoints(10), - as_sf = TRUE, - n = 200) - ) -> - s01_areas - -s01_areas |> - unnest(area_df) |> - ggplot( - aes(prob, area) - )+ - geom_line() - -# If the shape were an equilateral triangle, each side would be -# equal to `sqrt(area * (4/sqrt(3)))` - -s01_areas |> - unnest( - area_sf - ) |> - st_sf() |> + reframe(density_area(log_F2, + log_F1, + probs = ppoints(10), + as_sf = TRUE)) |> + st_sf() -> + s01_areas_sf + +s01_areas_sf |> arrange(desc(prob)) |> - mutate(side = sqrt(area * (4/sqrt(3)))) |> - ggplot()+ - geom_sf(aes(fill = side)) + ggplot() + + geom_sf(aes(fill = area)) } diff --git a/man/density_polygons.Rd b/man/density_polygons.Rd index 14f13ff..27b9fe3 100644 --- a/man/density_polygons.Rd +++ b/man/density_polygons.Rd @@ -9,7 +9,7 @@ density_polygons( y, probs = 0.5, as_sf = FALSE, - as_list = TRUE, + as_list = FALSE, range_mult = 0.25, rangex = NULL, rangey = NULL, @@ -23,8 +23,8 @@ density_polygons( \item{as_sf}{Should the returned values be \link[sf:sf]{sf::sf}? Defaults to \code{FALSE}.} -\item{as_list}{Should the returned value be a list? Defaults to \code{TRUE} to -work well with tidyverse list columns} +\item{as_list}{Should the returned value be a list? Defaults to \code{FALSE} to +work with \code{\link[dplyr:reframe]{dplyr::reframe()}}} \item{range_mult}{A multiplier to the range of \code{x} and \code{y} across which the probability density will be estimated.} @@ -36,7 +36,34 @@ probability density will be estimated.} } \value{ A list of data frames, if \code{as_list=TRUE}, or just a data frame, -if \code{as_list=FALSE} +if \code{as_list=FALSE}. +\subsection{Data frame output}{ + +If \code{as_sf=FALSE}, the data frame has the following columns: +\describe{ +\item{level_id}{An integer id for each probability level} +\item{id}{An integer id for each sub-polygon within a probabilty level} +\item{prob}{The probability level (originally passed to \code{probs})} +\item{x, y}{The values along the original \code{x} and \code{y} dimensions defining +the density polygon. These will be renamed to the original input variable +names.} +\item{order}{The original plotting order of the polygon points, for +convenience.} +} +} + +\subsection{sf output}{ + +If \code{as_sf=TRUE}, the data frame has the following columns: +\describe{ +\item{level_id}{An integer id for each probability level} +\item{prob}{The probability level (originally passed to \code{probs})} +\item{geometry}{A column of \code{\link[sf:st]{sf::st_polygon()}}s.} +} + +This output will need to be passed to \code{\link[sf:sf]{sf::st_sf()}} to utilize many of the +features of \link{sf}. +} } \description{ Given numeric vectors \code{x} and \code{y}, \code{density_polygons()} will return @@ -44,8 +71,8 @@ a data frame, or list of a data frames, of the polygon defining 2d kernel densities. } \details{ -When using \code{density_polygons()} together with tidyverse verbs, like -\code{\link[dplyr:summarise]{dplyr::summarise()}}, \code{as_list} should be \code{TRUE}. +When using \code{density_polygons()} together with \code{\link[dplyr:summarise]{dplyr::summarise()}}, \code{as_list} +should be \code{TRUE}. If both \code{rangex} and \code{rangey} are defined, \code{range_mult} will be disregarded. If only one or the other of \code{rangex} and \code{rangey} are defined, \code{range_mult} @@ -66,26 +93,22 @@ y <- c(rnorm(100)) # ordinary data frame output poly_df <- density_polygons(x, y, - probs = ppoints(5), - as_list = FALSE) + probs = ppoints(5)) head(poly_df) -# It's necessary to specify a grouping factor that combines `line_id` and `id` +# It's necessary to specify a grouping factor that combines `level_id` and `id` # for cases of multimodal density distributions ggplot(poly_df, aes(x, y)) + - geom_path( - aes(group = paste0(line_id, id), - color = prob) - ) + geom_path(aes(group = paste0(level_id, id), + color = prob)) # sf output poly_sf <- density_polygons(x, y, probs = ppoints(5), - as_sf = TRUE, - as_list = FALSE) + as_sf = TRUE) head(poly_sf) @@ -93,7 +116,7 @@ head(poly_sf) poly_sf |> arrange(desc(prob)) |> ggplot() + - geom_sf(aes(fill=prob)) + geom_sf(aes(fill = prob)) # Tidyverse usage @@ -102,121 +125,63 @@ data(s01) # Data transformation s01 <- s01 |> - mutate( - log_F1 = -log(F1), - log_F2 = -log(F2) - ) + mutate(log_F1 = -log(F1), + log_F2 = -log(F2)) -# This data has only one group for `name`, but this is the general -# approach to take for multiple groups +## Basic usage with `dplyr::reframe()` +### Data frame output s01 |> group_by(name) |> - summarise( - poly_df = density_polygons(log_F2, - log_F1, - probs = ppoints(5), - as_sf = FALSE, - n = 200), - poly_sf = density_polygons(log_F2, - log_F1, - probs = ppoints(5), - as_sf = TRUE, - n = 200) - )-> - speaker_polys - -# The list columns will need to be unnested for additional analysis/plotting -speaker_polys - -# plotting the data frame output -speaker_polys |> - unnest(poly_df) |> - ggplot( - aes(log_F2, log_F1) - )+ - geom_path( - aes(group = paste0(line_id, id), - color = prob) - )+ - coord_fixed() - -# plotting the sf output -speaker_polys |> - unnest(poly_sf) |> - st_sf() |> - arrange(desc(prob)) |> - ggplot()+ - geom_sf(aes(fill = prob)) + reframe(density_polygons(log_F2, + log_F1, + probs = ppoints(5))) -> + speaker_poly_df + +speaker_poly_df |> + ggplot(aes(log_F2, log_F1)) + + geom_path(aes(group = paste0(level_id, id), + color = prob)) + + coord_fixed() + +### sf output +s01 |> + group_by(name) |> + reframe(density_polygons(log_F2, + log_F1, + probs = ppoints(5), + as_sf = TRUE)) |> + st_sf() -> + speaker_poly_sf + +speaker_poly_sf |> + ggplot() + + geom_sf(aes(color = prob), + fill = NA) + +## basic usage with dplyr::summarise() +### data frame output -# Using additional sf capabilities to explore category overlap +s01 |> + group_by(name) |> + summarise(poly = density_polygons(log_F2, + log_F1, + probs = ppoints(5), + as_list = TRUE)) |> + unnest(poly) -> + speaker_poly_df + +### sf output -## focusing on two categories s01 |> - filter(plt_vclass \%in\% c("o", "oh"))-> - s01_lowback - -## raw data -s01_lowback |> - ggplot( - aes(log_F2, - log_F1) - )+ - geom_point( - aes(color = plt_vclass) - )+ - coord_fixed() - -## Getting the sf polygons of the categories at 80\% -s01_lowback |> - group_by(plt_vclass) |> - summarise( - poly_sf = density_polygons(log_F2, - log_F1, - probs = 0.8, - as_sf = TRUE) - ) |> - unnest(poly_sf) |> + group_by(name) |> + summarise(poly = density_polygons( + log_F2, + log_F1, + probs = ppoints(5), + as_list = TRUE, + as_sf = TRUE + )) |> + unnest(poly) |> st_sf() -> - lowback_sf - -## The basic polygons -lowback_sf |> - ggplot()+ - geom_sf( - aes(fill = plt_vclass), - alpha = 0.6 - ) - -## `sf::st_intersection()` will generate unique polygons for intersections -lowback_sf |> - st_intersection() |> - # recoding overlapping areas - mutate( - plt_vclass = case_when( - n.overlaps > 1 ~ "o~oh", - .default = plt_vclass - ) - ) -> - lowback_overlap - -## plotting the overlap -lowback_overlap |> - ggplot() + - geom_sf( - aes(fill = plt_vclass) - ) - -## plotting overlaping area proportions -lowback_overlap |> - mutate( - area = st_area(geometry), - pop_area = area/sum(area) - ) |> - ggplot( - aes(plt_vclass, pop_area) - )+ - geom_col( - aes(fill = plt_vclass) - )+ - ylim(0,0.5) + speaker_poly_sf } diff --git a/tests/testthat/test-list_return.R b/tests/testthat/test-list_return.R index 498c0aa..bcea711 100644 --- a/tests/testthat/test-list_return.R +++ b/tests/testthat/test-list_return.R @@ -1,39 +1,39 @@ -test_that("`density_polygons()` returns list by default", { +test_that("`density_polygons()` returns a list data frame when asked", { set.seed(10) x <- rnorm(100) y <- rnorm(100) - output <- densityarea::density_polygons(x = x, y = y) + output <- densityarea::density_polygons(x = x, y = y, as_list = T) expect_type(output,"list") expect_s3_class(output, NA) expect_s3_class(output[[1]], "data.frame") }) -test_that("`density_polygon()` returns dataframe when asked", { +test_that("`density_polygon()` returns data frame by default", { set.seed(10) x <- rnorm(100) y <- rnorm(100) - output <- densityarea::density_polygons(x = x, y = y, as_list = F) + output <- densityarea::density_polygons(x = x, y = y) expect_s3_class(output,"data.frame") }) -test_that("`density_area()` returns list by default", { +test_that("`density_area()` returns list by when asked", { set.seed(10) x <- rnorm(100) y <- rnorm(100) - output <- densityarea::density_area(x = x, y = y) + output <- densityarea::density_area(x = x, y = y, as_list = T) expect_type(output,"list") expect_s3_class(output, NA) expect_s3_class(output[[1]], "data.frame") }) -test_that("`density_area()` returns a dataframe when asked", { +test_that("`density_area()` returns a data frame by default", { set.seed(10) x <- rnorm(100) y <- rnorm(100) - output <- densityarea::density_area(x = x, y = y, as_list = F) + output <- densityarea::density_area(x = x, y = y) expect_s3_class(output, "data.frame") })