diff --git a/DESCRIPTION b/DESCRIPTION
index 008a81e..d4f290c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: oceandatr
Type: Package
Title: Ocean Data Access
-Version: 0.2.1.1
+Version: 0.2.1.2
Authors@R: person(given = "Jason", family = "Flower", email = "jflower@ucsb.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-6731-8182"))
Description: For retrieving and gridding ocean related data.
License: GPL (>= 3) + file LICENSE
diff --git a/NAMESPACE b/NAMESPACE
index 23092e0..b7c9392 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,7 +7,7 @@ export(get_coral_habitat)
export(get_data_in_grid)
export(get_dist)
export(get_ecoregion)
-export(get_enviro_regions)
+export(get_enviro_zones)
export(get_features)
export(get_geomorphology)
export(get_gfw)
diff --git a/R/get_enviro_regions.R b/R/get_enviro_zones.R
similarity index 71%
rename from R/get_enviro_regions.R
rename to R/get_enviro_zones.R
index 326e238..1a887a1 100644
--- a/R/get_enviro_regions.R
+++ b/R/get_enviro_zones.R
@@ -1,10 +1,10 @@
-#' Create environmental regions for area of interest
+#' Create environmental zones for area of interest
#'
#' @description This function gets [Bio-Oracle](https://bio-oracle.org/)
#' environmental data for the spatial grid and can then create environmental
-#' regions using k-means clustering. The idea for the clustering comes from
+#' zones, using k-means clustering. The idea for the clustering comes from
#' Magris et al. [2020](https://doi.org/10.1111/ddi.13183). The number of
-#' environmental regions can be specified directly, using `num_clusters`, but
+#' environmental zones can be specified directly, using `num_clusters`, but
#' the function can also find the 'optimal' number of clusters using the
#' `NbClust()` from the `NbClust` package.
#'
@@ -42,34 +42,32 @@
#' @inheritParams get_bathymetry
#' @param raw `logical` if TRUE, `spatial_grid` should be an `sf` polygon, and
#' the raw Bio-Oracle environmental data in that polygon(s) will be returned,
-#' unless `enviro_regions = TRUE`, in which case the raw data will be
-#' classified into environmental regions
-#' @param enviro_regions `logical` if TRUE, environmental regions will be
+#' unless `enviro_zones = TRUE`, in which case the raw data will be
+#' classified into environmental zones
+#' @param enviro_zones `logical` if TRUE, environmental zones will be
#' created. If FALSE the gridded Bio-Oracle data will be returned
#' @param show_plots `logical`; whether to show boxplots for each environmental
-#' variable in each environmental region (default is FALSE)
-#' @param num_clusters `numeric`; the number of environmental regions to cluster
+#' variable in each environmental zone (default is FALSE)
+#' @param num_clusters `numeric`; the number of environmental zones to cluster
#' the data into - to be used when a clustering algorithm is not necessary
#' (default is NULL)
#' @param max_num_clusters `numeric`; the maximum number of environmental
-#' regions to try when using the clustering algorithm (default is 6)
+#' zones to try when using the clustering algorithm (default is 6)
#' @param sample_size `numeric`; default is 5000. Larger sample sizes will
#' quickly consume memory (>10GB) so should be used with caution.
#' @param num_samples `numeric`; default is 5, which resulted in good consensus
#' on the optimal number of clusters in testing.
#' @param num_cores `numeric`; default 1. Multi-core sampling is supported if
-#' the package `parallel` is installed, but be aware than increasing the
+#' the package `parallel` is installed, but be aware that increasing the
#' number of cores will also increase the memory required.
-#' @param custom_seed `numeric`; default `1234`, but a custom seed can be
-#' supplied if desired.
#'
-#' @return If `enviro_regions = FALSE`, Bio-Oracle data in the `spatial_grid`
-#' supplied, or in the original raster file resolution if `raw = TRUE`. If
-#' `enviro_regions = TRUE` a multi-layer raster or an `sf` object with one
-#' environmental region in each column/ layer is returned, depending on the
-#' `spatial_grid` format. If `enviro_regions = TRUE` and `raw = TRUE` (in
-#' which case `spatial_grid` should be an `sf` polygon), the raw Bio-Oracle
-#' data is classified into environmental zones.
+#' @return If `enviro_zones = FALSE`, Bio-Oracle data in the `spatial_grid`
+#' supplied, or the original Bio-Oracle data cropped and masked to the grid if
+#' `raw = TRUE`. If `enviro_zones = TRUE` a multi-layer raster or an `sf`
+#' object with one environmental zone in each column/ layer is returned,
+#' depending on the `spatial_grid` format. If `enviro_zones = TRUE` and `raw
+#' = TRUE` (in which case `spatial_grid` should be an `sf` polygon), the raw
+#' Bio-Oracle data is classified into environmental zones.
#'
#' @export
#'
@@ -77,22 +75,27 @@
#' # Get EEZ data first
#' bermuda_eez <- get_boundary(name = "Bermuda")
#' # Get raw Bio-Oracle environmental data for Bermuda
-#' enviro_data <- get_enviro_regions(spatial_grid = bermuda_eez, raw = TRUE, enviro_regions = FALSE)
+#' enviro_data <- get_enviro_zones(spatial_grid = bermuda_eez, raw = TRUE, enviro_zones = FALSE)
#' terra::plot(enviro_data)
#' # Get gridded Bio-Oracle data for Bermuda:
#' bermuda_grid <- get_grid(boundary = bermuda_eez, crs = '+proj=laea +lon_0=-64.8108333 +lat_0=32.3571917 +datum=WGS84 +units=m +no_defs', resolution = 20000)
-#' enviro_data_gridded <- get_enviro_regions(spatial_grid = bermuda_grid, raw = FALSE, enviro_regions = FALSE)
+#'
+#' enviro_data_gridded <- get_enviro_zones(spatial_grid = bermuda_grid, raw = FALSE, enviro_zones = FALSE)
#' terra::plot(enviro_data_gridded)
-#' # Get 3 environmental regions for Bermuda
-#' bermuda_enviro_regions <- get_enviro_regions(spatial_grid = bermuda_grid, raw = FALSE, enviro_regions = TRUE, num_clusters = 3)
-#' terra::plot(bermuda_enviro_regions)
-#' # Can also create environmental regions from the raw Bio-Oracle data using setting raw = TRUE and enviro_regions = TRUE. In this case, the `spatial_grid` should be a polygon of the area you want the data for
-#' bermuda_enviro_regions2 <- get_enviro_regions(spatial_grid = bermuda_eez, raw = TRUE, enviro_regions = TRUE, num_clusters = 3)
-#' terra::plot(bermuda_enviro_regions2)
+#'
+#' # Get 3 environmental zones for Bermuda
+#'
+#' #set seed for reproducibility in the sampling to find optimal number of clusters
+#' set.seed(500)
+#' bermuda_enviro_zones <- get_enviro_zones(spatial_grid = bermuda_grid, raw = FALSE, enviro_zones = TRUE, num_clusters = 3)
+#' terra::plot(bermuda_enviro_zones)
+#' # Can also create environmental zones from the raw Bio-Oracle data using setting raw = TRUE and enviro_zones = TRUE. In this case, the `spatial_grid` should be a polygon of the area you want the data for
+#' bermuda_enviro_zones2 <- get_enviro_zones(spatial_grid = bermuda_eez, raw = TRUE, enviro_zones = TRUE, num_clusters = 3)
+#' terra::plot(bermuda_enviro_zones2)
-get_enviro_regions <- function(spatial_grid = NULL, raw = FALSE, enviro_regions = TRUE, show_plots = FALSE, num_clusters = NULL, max_num_clusters = 6, antimeridian = NULL, sample_size = 5000, num_samples = 5, num_cores = 1, custom_seed = 1234){
+get_enviro_zones <- function(spatial_grid = NULL, raw = FALSE, enviro_zones = TRUE, show_plots = FALSE, num_clusters = NULL, max_num_clusters = 6, antimeridian = NULL, sample_size = 5000, num_samples = 5, num_cores = 1){
- rlang::check_installed("biooracler", reason = "to get Bio-Oracle data using `get_enviro_regions()`", action = \(pkg, ...) remotes::install_github("bio-oracle/biooracler"))
+ rlang::check_installed("biooracler", reason = "to get Bio-Oracle data using `get_enviro_zones()`", action = \(pkg, ...) remotes::install_github("bio-oracle/biooracler"))
check_grid(spatial_grid)
@@ -125,7 +128,7 @@ get_enviro_regions <- function(spatial_grid = NULL, raw = FALSE, enviro_regions
enviro_data <- get_enviro_data(spatial_grid = spatial_grid) %>%
get_data_in_grid(spatial_grid = spatial_grid, dat = ., raw = raw, meth = meth)
- if(!enviro_regions){
+ if(!enviro_zones){
return(enviro_data)
}else{
@@ -136,7 +139,6 @@ get_enviro_regions <- function(spatial_grid = NULL, raw = FALSE, enviro_regions
if(is.null(num_clusters)){
message("This could take several minutes")
#setting index = "all" results in large memory usage and long runtime (I haven't run to completion after >1hr), for the moment, setting the index to "hartigan" which is the same algorithm (Hartigan-Wong) used by the kmeans() function used below
- set.seed(custom_seed)
n_df_rows <- nrow(df_for_clustering)
@@ -166,33 +168,33 @@ get_enviro_regions <- function(spatial_grid = NULL, raw = FALSE, enviro_regions
clust_partition <- clust_result$cluster
if(show_plots) {
- enviro_regions_boxplot(clust_partition, df_for_clustering)
- enviro_regions_pca(clust_partition, df_for_clustering)
+ enviro_zones_boxplot(clust_partition, df_for_clustering)
+ enviro_zones_pca(clust_partition, df_for_clustering)
}
if(check_sf(enviro_data)){
- enviro_region_cols <- stats::model.matrix(~ as.factor(clust_partition) - 1) %>%
+ enviro_zone_cols <- stats::model.matrix(~ as.factor(clust_partition) - 1) %>%
as.data.frame() %>%
- stats::setNames(paste0("enviro_region_", 1:ncol(.))) %>%
+ stats::setNames(paste0("enviro_zone_", 1:ncol(.))) %>%
dplyr::mutate(row_id = as.numeric(names(clust_partition)))
sf::st_geometry(enviro_data) %>%
sf::st_sf() %>%
dplyr::mutate(row_id = 1:nrow(.)) %>%
- dplyr::left_join(enviro_region_cols, by = dplyr::join_by(row_id)) %>%
+ dplyr::left_join(enviro_zone_cols, by = dplyr::join_by(row_id)) %>%
dplyr::select(-row_id) %>%
{if(grid_has_extra_cols) cbind(., extra_cols) %>% dplyr::relocate(colnames(extra_cols), .before = 1) else .}
}else{
- #create environmental regions raster, filled with NAs to start with
- enviro_regions <- terra::rast(enviro_data, nlyrs=1, vals = NA, names = "enviro_region")
+ #create environmental zones raster, filled with NAs to start with
+ enviro_zones <- terra::rast(enviro_data, nlyrs=1, vals = NA, names = "enviro_zone")
#set cluster ids in raster - subset for only raster values that are non-NA
- enviro_regions[as.numeric(names(clust_partition))] <- clust_partition
+ enviro_zones[as.numeric(names(clust_partition))] <- clust_partition
- enviro_regions %>%
+ enviro_zones %>%
terra::segregate() %>%
- stats::setNames(paste0("enviro_region_", names(.)))
+ stats::setNames(paste0("enviro_zone_", names(.)))
}
}
}
@@ -259,24 +261,24 @@ get_enviro_data <- function(spatial_grid = NULL){
stats::setNames(c("Chlorophyll", "Dissolved_oxygen", "Nitrate", "Minimum_temp", "Mean_temp", "Max_temp", "pH", "Phosphate", "Salinity", "Silicate", "Phytoplankton"))
}
-enviro_regions_boxplot <- function(enviro_region, enviro_data){
- #compare values in each environmental region
- enviro_regions_df <- cbind(enviro_region, enviro_data)
+enviro_zones_boxplot <- function(enviro_zone, enviro_data){
+ #compare values in each environmental zone
+ enviro_zones_df <- cbind(enviro_zone, enviro_data)
graphics::par(mfrow = c(3,4))
- for (i in 2:ncol(enviro_regions_df)) {
- eval(parse(text = paste0("boxplot(`", colnames(enviro_regions_df[i]), "` ~ enviro_region, data = enviro_regions_df, col = palette.colors(n = ", max(enviro_region), ", palette = 'Dark2'))")))
+ for (i in 2:ncol(enviro_zones_df)) {
+ eval(parse(text = paste0("boxplot(`", colnames(enviro_zones_df[i]), "` ~ enviro_zone, data = enviro_zones_df, col = palette.colors(n = ", max(enviro_zone), ", palette = 'Dark2'))")))
}
graphics::par(mfrow = c(1,1))
}
-enviro_regions_pca <- function(enviro_region, enviro_data){
+enviro_zones_pca <- function(enviro_zone, enviro_data){
pca_df <- stats::prcomp(enviro_data, scale. = TRUE, center = TRUE) %>%
.[["x"]] %>%
as.data.frame()
- pca_df$enviro_region <- enviro_region
+ pca_df$enviro_zone <- enviro_zone
- plot(x = pca_df$PC1, y = pca_df$PC2, col = pca_df$enviro_region, xlab = "PC1", ylab = "PC2", pch = 4, cex = 0.6)
- graphics::legend("bottomright", legend = unique(pca_df$enviro_region), col = unique(pca_df$enviro_region), pch = 4, cex = 1, title = "Enviro region")
+ plot(x = pca_df$PC1, y = pca_df$PC2, col = pca_df$enviro_zone, xlab = "PC1", ylab = "PC2", pch = 4, cex = 0.6)
+ graphics::legend("bottomright", legend = unique(pca_df$enviro_zone), col = unique(pca_df$enviro_zone), pch = 4, cex = 1, title = "Enviro zone")
}
diff --git a/R/get_features.R b/R/get_features.R
index a73d65f..e3a1c8d 100644
--- a/R/get_features.R
+++ b/R/get_features.R
@@ -2,7 +2,7 @@
#'
#' @description This is a wrapper of `get_bathymetry()`,
#' `get_seamounts_buffered()`, `get_knolls()`, `get_geomorphology()`,
-#' `get_coral_habitat()`, and `get_enviro_regions()`. See the individual
+#' `get_coral_habitat()`, and `get_enviro_zones()`. See the individual
#' functions for details.
#'
#' @inheritParams get_bathymetry
@@ -10,7 +10,7 @@
#' the raw feature data in that polygon(s) will be returned. Note that this
#' will be a list object, since raster and `sf` data may be returned.
#' @param features a vector of feature names, can include: "bathymetry",
-#' "seamounts", "knolls", "geomorphology", "corals", "enviro_regions"
+#' "seamounts", "knolls", "geomorphology", "corals", "enviro_zones"
#' @param bathy_resolution `numeric`; the resolution (in minutes) of data to
#' pull from the ETOPO 2022 Global Relief model. Values less than 1 can only
#' be 0.5 (30 arc seconds) and 0.25 (15 arc seconds)
@@ -25,11 +25,11 @@
#' @param octocoral_threshold `numeric` between 0 and 7; the threshold value for
#' how many species (of 7) should be predicted present in an area for
#' octocorals to be considered present (default is 2)
-#' @param enviro_clusters `numeric`; the number of environmental regions to
+#' @param enviro_clusters `numeric`; the number of environmental zones to
#' cluster the data into - to be used when a clustering algorithm is not
#' necessary (default is NULL)
#' @param max_enviro_clusters `numeric`; the maximum number of environmental
-#' regions to try when using the clustering algorithm (default is 8)
+#' zones to try when using the clustering algorithm (default is 8)
#'
#' @return If `raw = TRUE`, a list of feature data is returned (mixed raster and
#' `sf` objects). If a `spatial_grid` is supplied, a multi-layer raster or
@@ -44,10 +44,12 @@
#' raw_data <- get_features(spatial_grid = bermuda_eez, raw = TRUE)
#' # Get feature data in a spatial grid
#' bermuda_grid <- get_grid(boundary = bermuda_eez, crs = '+proj=laea +lon_0=-64.8108333 +lat_0=32.3571917 +datum=WGS84 +units=m +no_defs', resolution = 20000)
+#' #set seed for reproducibility in the get_enviro_zones() function
+#' set.seed(500)
#' features_gridded <- get_features(spatial_grid = bermuda_grid)
#' terra::plot(features_gridded)
-get_features <- function(spatial_grid = NULL, raw = FALSE, features = c("bathymetry", "seamounts", "knolls", "geomorphology", "corals", "enviro_regions"), bathy_resolution = 1, seamount_buffer = 30000, antipatharia_threshold = 22, octocoral_threshold = 2, enviro_clusters = NULL, max_enviro_clusters = 6, antimeridian = NULL){
+get_features <- function(spatial_grid = NULL, raw = FALSE, features = c("bathymetry", "seamounts", "knolls", "geomorphology", "corals", "enviro_zones"), bathy_resolution = 1, seamount_buffer = 30000, antipatharia_threshold = 22, octocoral_threshold = 2, enviro_clusters = NULL, max_enviro_clusters = 6, antimeridian = NULL){
#set extra columns aside - only need this is it a spatial grid, so added
#nrow() check to remove the need for this step if only raw data is required
@@ -92,10 +94,10 @@ get_features <- function(spatial_grid = NULL, raw = FALSE, features = c("bathyme
})
}
- if("enviro_regions" %in% features) {
- message("Getting environmental regions data... This could take several minutes")
+ if("enviro_zones" %in% features) {
+ message("Getting environmental zones data... This could take several minutes")
suppressMessages({
- enviro_regions <- get_enviro_regions(spatial_grid = spatial_grid, raw = raw, show_plots = FALSE, num_clusters = enviro_clusters, max_num_clusters = max_enviro_clusters, antimeridian = antimeridian)
+ enviro_zones <- get_enviro_zones(spatial_grid = spatial_grid, raw = raw, show_plots = FALSE, num_clusters = enviro_clusters, max_num_clusters = max_enviro_clusters, antimeridian = antimeridian)
})
}
diff --git a/README.Rmd b/README.Rmd
index a97dc65..3c109d8 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -180,9 +180,9 @@ coral_habitat <- get_coral_habitat(spatial_grid = bermuda_grid)
terra::plot(coral_habitat, col = c("grey60", "coral"), axes = FALSE, fun = function()terra::lines(terra::as.polygons(seamounts, dissolve = TRUE), col = "orangered4"))
```
-## Environmental Regions
+## Environmental Zones
-Bioregions are often included in spatial planning, but available bioregional classifications are either too coarse or too detailed to be useful for planning at the EEZ level. Borrowing methods from [Magris et al. 2020](https://doi.org/10.1111/ddi.13183), we use spatial clustering of biophysical environmental data from [Bio-Oracle](https://bio-oracle.org/), to create 'environmental regions'. Biophysical conditions within a environmental region are more similar than areas outside that region, though the differences may be small. Diagnostic boxplots and a PCA will be shown if `show_plots = TRUE`. All the biophysical data are ocean surface data for the period 2010 - 2020:
+Bioregions are often included in spatial planning, but available bioregional classifications are either too coarse or too detailed to be useful for planning at the EEZ level. Borrowing methods from [Magris et al. 2020](https://doi.org/10.1111/ddi.13183), we use spatial clustering of biophysical environmental data from [Bio-Oracle](https://bio-oracle.org/), to create 'environmental zones'. Biophysical conditions within a environmental zone are more similar than areas outside that zone, though the differences may be small. Diagnostic boxplots and a PCA will be shown if `show_plots = TRUE`. All the biophysical data are ocean surface data for the period 2010 - 2020:
* Chlorophyll concentration (mean, mg/ m3)
* Dissolved oxygen concentration (mean)
@@ -196,14 +196,13 @@ Bioregions are often included in spatial planning, but available bioregional cla
* Sea surface temperature (min, degree C)
* Silicate concentration (mean, mmol/ m3)
-```{r environmental_regions, warning=FALSE, message=FALSE}
-
+```{r environmental_zones, warning=FALSE, message=FALSE}
#set number of clusters to 3 to reduce runtime and memory usage
-enviro_regions <- get_enviro_regions(spatial_grid = bermuda_grid, show_plots = TRUE, num_clusters = 3)
+enviro_zones <- get_enviro_zones(spatial_grid = bermuda_grid, show_plots = TRUE, num_clusters = 3)
```
-```{r enviro_regions_maps, warning=FALSE, message=FALSE}
-#value of 1 indicates that environmental region is present
-terra::plot(enviro_regions, col = c("grey60", "forestgreen"), axes = FALSE, fun = function(){terra::lines(terra::vect(bermuda_eez_projected))})
+```{r enviro_zones_maps, warning=FALSE, message=FALSE}
+#value of 1 indicates that environmental zone is present
+terra::plot(enviro_zones, col = c("grey60", "forestgreen"), axes = FALSE, fun = function(){terra::lines(terra::vect(bermuda_eez_projected))})
```
diff --git a/README.md b/README.md
index 7558626..f2daf19 100644
--- a/README.md
+++ b/README.md
@@ -241,19 +241,18 @@ terra::plot(coral_habitat, col = c("grey60", "coral"), axes = FALSE, fun = funct
-## Environmental Regions
+## Environmental Zones
Bioregions are often included in spatial planning, but available
bioregional classifications are either too coarse or too detailed to be
useful for planning at the EEZ level. Borrowing methods from [Magris et
al. 2020](https://doi.org/10.1111/ddi.13183), we use spatial clustering
of biophysical environmental data from
-[Bio-Oracle](https://bio-oracle.org/), to create ‘environmental
-regions’. Biophysical conditions within a environmental region are more
-similar than areas outside that region, though the differences may be
-small. Diagnostic boxplots and a PCA will be shown if
-`show_plots = TRUE`. All the biophysical data are ocean surface data for
-the period 2010 - 2020:
+[Bio-Oracle](https://bio-oracle.org/), to create ‘environmental zones’.
+Biophysical conditions within a environmental zone are more similar than
+areas outside that zone, though the differences may be small. Diagnostic
+boxplots and a PCA will be shown if `show_plots = TRUE`. All the
+biophysical data are ocean surface data for the period 2010 - 2020:
- Chlorophyll concentration (mean, mg/ m3)
- Dissolved oxygen concentration (mean)
@@ -268,16 +267,15 @@ the period 2010 - 2020:
- Silicate concentration (mean, mmol/ m3)
``` r
-
#set number of clusters to 3 to reduce runtime and memory usage
-enviro_regions <- get_enviro_regions(spatial_grid = bermuda_grid, show_plots = TRUE, num_clusters = 3)
+enviro_zones <- get_enviro_zones(spatial_grid = bermuda_grid, show_plots = TRUE, num_clusters = 3)
```
-![](man/figures/README-environmental_regions-1.png)
+![](man/figures/README-environmental_zones-1.png)
``` r
-#value of 1 indicates that environmental region is present
-terra::plot(enviro_regions, col = c("grey60", "forestgreen"), axes = FALSE, fun = function(){terra::lines(terra::vect(bermuda_eez_projected))})
+#value of 1 indicates that environmental zone is present
+terra::plot(enviro_zones, col = c("grey60", "forestgreen"), axes = FALSE, fun = function(){terra::lines(terra::vect(bermuda_eez_projected))})
```
-
+
diff --git a/man/figures/README-area_of_interest-1.png b/man/figures/README-area_of_interest-1.png
index 4a43751..f25fec4 100644
Binary files a/man/figures/README-area_of_interest-1.png and b/man/figures/README-area_of_interest-1.png differ
diff --git a/man/figures/README-bathymetry-1.png b/man/figures/README-bathymetry-1.png
index d84da16..9598e7c 100644
Binary files a/man/figures/README-bathymetry-1.png and b/man/figures/README-bathymetry-1.png differ
diff --git a/man/figures/README-bermuda-grid-1.png b/man/figures/README-bermuda-grid-1.png
index 6b52d0b..b5a8bf0 100644
Binary files a/man/figures/README-bermuda-grid-1.png and b/man/figures/README-bermuda-grid-1.png differ
diff --git a/man/figures/README-coral_habitat-1.png b/man/figures/README-coral_habitat-1.png
index 3d8f288..58a4e1c 100644
Binary files a/man/figures/README-coral_habitat-1.png and b/man/figures/README-coral_habitat-1.png differ
diff --git a/man/figures/README-depth_classification-1.png b/man/figures/README-depth_classification-1.png
index c10c736..4eef9c9 100644
Binary files a/man/figures/README-depth_classification-1.png and b/man/figures/README-depth_classification-1.png differ
diff --git a/man/figures/README-enviro_regions_maps-1.png b/man/figures/README-enviro_regions_maps-1.png
deleted file mode 100644
index ff5c61e..0000000
Binary files a/man/figures/README-enviro_regions_maps-1.png and /dev/null differ
diff --git a/man/figures/README-enviro_zones_maps-1.png b/man/figures/README-enviro_zones_maps-1.png
new file mode 100644
index 0000000..12c6aef
Binary files /dev/null and b/man/figures/README-enviro_zones_maps-1.png differ
diff --git a/man/figures/README-environmental_regions-1.png b/man/figures/README-environmental_regions-1.png
deleted file mode 100644
index 643379c..0000000
Binary files a/man/figures/README-environmental_regions-1.png and /dev/null differ
diff --git a/man/figures/README-environmental_regions-2.png b/man/figures/README-environmental_regions-2.png
deleted file mode 100644
index 2e98ff1..0000000
Binary files a/man/figures/README-environmental_regions-2.png and /dev/null differ
diff --git a/man/figures/README-environmental_zones-1.png b/man/figures/README-environmental_zones-1.png
new file mode 100644
index 0000000..0edf9a5
Binary files /dev/null and b/man/figures/README-environmental_zones-1.png differ
diff --git a/man/figures/README-environmental_zones-2.png b/man/figures/README-environmental_zones-2.png
new file mode 100644
index 0000000..4a3836c
Binary files /dev/null and b/man/figures/README-environmental_zones-2.png differ
diff --git a/man/figures/README-geomorphology-1.png b/man/figures/README-geomorphology-1.png
index 0c48837..d98f060 100644
Binary files a/man/figures/README-geomorphology-1.png and b/man/figures/README-geomorphology-1.png differ
diff --git a/man/figures/README-grid_cells-1.png b/man/figures/README-grid_cells-1.png
index 7df525d..c204888 100644
Binary files a/man/figures/README-grid_cells-1.png and b/man/figures/README-grid_cells-1.png differ
diff --git a/man/figures/README-knolls-1.png b/man/figures/README-knolls-1.png
index 25d8035..9ebe414 100644
Binary files a/man/figures/README-knolls-1.png and b/man/figures/README-knolls-1.png differ
diff --git a/man/figures/README-seamounts-1.png b/man/figures/README-seamounts-1.png
index 3fd8a1d..41b0e7b 100644
Binary files a/man/figures/README-seamounts-1.png and b/man/figures/README-seamounts-1.png differ
diff --git a/man/get_enviro_regions.Rd b/man/get_enviro_zones.Rd
similarity index 64%
rename from man/get_enviro_regions.Rd
rename to man/get_enviro_zones.Rd
index ce4069e..590ad65 100644
--- a/man/get_enviro_regions.Rd
+++ b/man/get_enviro_zones.Rd
@@ -1,21 +1,20 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_enviro_regions.R
-\name{get_enviro_regions}
-\alias{get_enviro_regions}
-\title{Create environmental regions for area of interest}
+% Please edit documentation in R/get_enviro_zones.R
+\name{get_enviro_zones}
+\alias{get_enviro_zones}
+\title{Create environmental zones for area of interest}
\usage{
-get_enviro_regions(
+get_enviro_zones(
spatial_grid = NULL,
raw = FALSE,
- enviro_regions = TRUE,
+ enviro_zones = TRUE,
show_plots = FALSE,
num_clusters = NULL,
max_num_clusters = 6,
antimeridian = NULL,
sample_size = 5000,
num_samples = 5,
- num_cores = 1,
- custom_seed = 1234
+ num_cores = 1
)
}
\arguments{
@@ -25,21 +24,21 @@ be provided, e.g. created using \code{get_boundary()}, and set \code{raw = TRUE}
\item{raw}{\code{logical} if TRUE, \code{spatial_grid} should be an \code{sf} polygon, and
the raw Bio-Oracle environmental data in that polygon(s) will be returned,
-unless \code{enviro_regions = TRUE}, in which case the raw data will be
-classified into environmental regions}
+unless \code{enviro_zones = TRUE}, in which case the raw data will be
+classified into environmental zones}
-\item{enviro_regions}{\code{logical} if TRUE, environmental regions will be
+\item{enviro_zones}{\code{logical} if TRUE, environmental zones will be
created. If FALSE the gridded Bio-Oracle data will be returned}
\item{show_plots}{\code{logical}; whether to show boxplots for each environmental
-variable in each environmental region (default is FALSE)}
+variable in each environmental zone (default is FALSE)}
-\item{num_clusters}{\code{numeric}; the number of environmental regions to cluster
+\item{num_clusters}{\code{numeric}; the number of environmental zones to cluster
the data into - to be used when a clustering algorithm is not necessary
(default is NULL)}
\item{max_num_clusters}{\code{numeric}; the maximum number of environmental
-regions to try when using the clustering algorithm (default is 6)}
+zones to try when using the clustering algorithm (default is 6)}
\item{antimeridian}{Does \code{spatial_grid} span the antimeridian? If so, this
should be set to \code{TRUE}, otherwise set to \code{FALSE}. If set to \code{NULL}
@@ -53,27 +52,23 @@ quickly consume memory (>10GB) so should be used with caution.}
on the optimal number of clusters in testing.}
\item{num_cores}{\code{numeric}; default 1. Multi-core sampling is supported if
-the package \code{parallel} is installed, but be aware than increasing the
+the package \code{parallel} is installed, but be aware that increasing the
number of cores will also increase the memory required.}
-
-\item{custom_seed}{\code{numeric}; default \code{1234}, but a custom seed can be
-supplied if desired.}
}
\value{
-If \code{enviro_regions = FALSE}, Bio-Oracle data in the \code{spatial_grid}
-supplied, or in the original raster file resolution if \code{raw = TRUE}. If
-\code{enviro_regions = TRUE} a multi-layer raster or an \code{sf} object with one
-environmental region in each column/ layer is returned, depending on the
-\code{spatial_grid} format. If \code{enviro_regions = TRUE} and \code{raw = TRUE} (in
-which case \code{spatial_grid} should be an \code{sf} polygon), the raw Bio-Oracle
-data is classified into environmental zones.
+If \code{enviro_zones = FALSE}, Bio-Oracle data in the \code{spatial_grid}
+supplied, or the original Bio-Oracle data cropped and masked to the grid if
+\code{raw = TRUE}. If \code{enviro_zones = TRUE} a multi-layer raster or an \code{sf}
+object with one environmental zone in each column/ layer is returned,
+depending on the \code{spatial_grid} format. If \code{enviro_zones = TRUE} and \code{raw = TRUE} (in which case \code{spatial_grid} should be an \code{sf} polygon), the raw
+Bio-Oracle data is classified into environmental zones.
}
\description{
This function gets \href{https://bio-oracle.org/}{Bio-Oracle}
environmental data for the spatial grid and can then create environmental
-regions using k-means clustering. The idea for the clustering comes from
+zones, using k-means clustering. The idea for the clustering comes from
Magris et al. \href{https://doi.org/10.1111/ddi.13183}{2020}. The number of
-environmental regions can be specified directly, using \code{num_clusters}, but
+environmental zones can be specified directly, using \code{num_clusters}, but
the function can also find the 'optimal' number of clusters using the
\code{NbClust()} from the \code{NbClust} package.
}
@@ -112,16 +107,21 @@ installations). To find the number of available cores on your systems run
# Get EEZ data first
bermuda_eez <- get_boundary(name = "Bermuda")
# Get raw Bio-Oracle environmental data for Bermuda
-enviro_data <- get_enviro_regions(spatial_grid = bermuda_eez, raw = TRUE, enviro_regions = FALSE)
+enviro_data <- get_enviro_zones(spatial_grid = bermuda_eez, raw = TRUE, enviro_zones = FALSE)
terra::plot(enviro_data)
# Get gridded Bio-Oracle data for Bermuda:
bermuda_grid <- get_grid(boundary = bermuda_eez, crs = '+proj=laea +lon_0=-64.8108333 +lat_0=32.3571917 +datum=WGS84 +units=m +no_defs', resolution = 20000)
-enviro_data_gridded <- get_enviro_regions(spatial_grid = bermuda_grid, raw = FALSE, enviro_regions = FALSE)
+
+enviro_data_gridded <- get_enviro_zones(spatial_grid = bermuda_grid, raw = FALSE, enviro_zones = FALSE)
terra::plot(enviro_data_gridded)
-# Get 3 environmental regions for Bermuda
-bermuda_enviro_regions <- get_enviro_regions(spatial_grid = bermuda_grid, raw = FALSE, enviro_regions = TRUE, num_clusters = 3)
-terra::plot(bermuda_enviro_regions)
-# Can also create environmental regions from the raw Bio-Oracle data using setting raw = TRUE and enviro_regions = TRUE. In this case, the `spatial_grid` should be a polygon of the area you want the data for
-bermuda_enviro_regions2 <- get_enviro_regions(spatial_grid = bermuda_eez, raw = TRUE, enviro_regions = TRUE, num_clusters = 3)
-terra::plot(bermuda_enviro_regions2)
+
+# Get 3 environmental zones for Bermuda
+
+#set seed for reproducibility in the sampling to find optimal number of clusters
+set.seed(500)
+bermuda_enviro_zones <- get_enviro_zones(spatial_grid = bermuda_grid, raw = FALSE, enviro_zones = TRUE, num_clusters = 3)
+terra::plot(bermuda_enviro_zones)
+# Can also create environmental zones from the raw Bio-Oracle data using setting raw = TRUE and enviro_zones = TRUE. In this case, the `spatial_grid` should be a polygon of the area you want the data for
+bermuda_enviro_zones2 <- get_enviro_zones(spatial_grid = bermuda_eez, raw = TRUE, enviro_zones = TRUE, num_clusters = 3)
+terra::plot(bermuda_enviro_zones2)
}
diff --git a/man/get_features.Rd b/man/get_features.Rd
index b8d4db2..be8c9c9 100644
--- a/man/get_features.Rd
+++ b/man/get_features.Rd
@@ -8,7 +8,7 @@ get_features(
spatial_grid = NULL,
raw = FALSE,
features = c("bathymetry", "seamounts", "knolls", "geomorphology", "corals",
- "enviro_regions"),
+ "enviro_zones"),
bathy_resolution = 1,
seamount_buffer = 30000,
antipatharia_threshold = 22,
@@ -28,7 +28,7 @@ the raw feature data in that polygon(s) will be returned. Note that this
will be a list object, since raster and \code{sf} data may be returned.}
\item{features}{a vector of feature names, can include: "bathymetry",
-"seamounts", "knolls", "geomorphology", "corals", "enviro_regions"}
+"seamounts", "knolls", "geomorphology", "corals", "enviro_zones"}
\item{bathy_resolution}{\code{numeric}; the resolution (in minutes) of data to
pull from the ETOPO 2022 Global Relief model. Values less than 1 can only
@@ -47,12 +47,12 @@ present (default is 22, as defined in Yesson et al., 2017)}
how many species (of 7) should be predicted present in an area for
octocorals to be considered present (default is 2)}
-\item{enviro_clusters}{\code{numeric}; the number of environmental regions to
+\item{enviro_clusters}{\code{numeric}; the number of environmental zones to
cluster the data into - to be used when a clustering algorithm is not
necessary (default is NULL)}
\item{max_enviro_clusters}{\code{numeric}; the maximum number of environmental
-regions to try when using the clustering algorithm (default is 8)}
+zones to try when using the clustering algorithm (default is 8)}
\item{antimeridian}{Does \code{spatial_grid} span the antimeridian? If so, this
should be set to \code{TRUE}, otherwise set to \code{FALSE}. If set to \code{NULL}
@@ -68,7 +68,7 @@ format.
\description{
This is a wrapper of \code{get_bathymetry()},
\code{get_seamounts_buffered()}, \code{get_knolls()}, \code{get_geomorphology()},
-\code{get_coral_habitat()}, and \code{get_enviro_regions()}. See the individual
+\code{get_coral_habitat()}, and \code{get_enviro_zones()}. See the individual
functions for details.
}
\examples{
@@ -78,6 +78,8 @@ bermuda_eez <- get_boundary(name = "Bermuda")
raw_data <- get_features(spatial_grid = bermuda_eez, raw = TRUE)
# Get feature data in a spatial grid
bermuda_grid <- get_grid(boundary = bermuda_eez, crs = '+proj=laea +lon_0=-64.8108333 +lat_0=32.3571917 +datum=WGS84 +units=m +no_defs', resolution = 20000)
+#set seed for reproducibility in the get_enviro_zones() function
+set.seed(500)
features_gridded <- get_features(spatial_grid = bermuda_grid)
terra::plot(features_gridded)
}
diff --git a/tests/testthat/test-get_enviro_regions.R b/tests/testthat/test-get_enviro_zones.R
similarity index 62%
rename from tests/testthat/test-get_enviro_regions.R
rename to tests/testthat/test-get_enviro_zones.R
index 1a7820a..0ba498d 100644
--- a/tests/testthat/test-get_enviro_regions.R
+++ b/tests/testthat/test-get_enviro_zones.R
@@ -1,57 +1,60 @@
test_that("returns raw Bio-Oracle data - 11 layer raster", {
- expect_equal(terra::nlyr(get_enviro_regions(get_boundary(name = "Bermuda"), raw = TRUE, enviro_regions = FALSE)), 11)
+ expect_equal(terra::nlyr(get_enviro_zones(get_boundary(name = "Bermuda"), raw = TRUE, enviro_zones = FALSE)), 11)
})
test_that("returns gridded Bermuda Bio-Oracle data - raster", {
+ set.seed(500)
expect_s4_class(get_boundary(name = "Bermuda") |>
get_grid(crs = '+proj=laea +lon_0=-64.8108333 +lat_0=32.3571917 +datum=WGS84 +units=m +no_defs',
resolution = 20000)|>
- get_enviro_regions(raw = FALSE, enviro_regions = FALSE), class = "SpatRaster")
+ get_enviro_zones(raw = FALSE, enviro_zones = FALSE), class = "SpatRaster")
})
test_that("returns gridded Bermuda Bio-Oracle data - sf", {
+ set.seed(500)
expect_s3_class(get_boundary(name = "Bermuda") |>
get_grid(crs = '+proj=laea +lon_0=-64.8108333 +lat_0=32.3571917 +datum=WGS84 +units=m +no_defs',
resolution = 20000,
output = "sf_square")|>
- get_enviro_regions(raw = FALSE, enviro_regions = FALSE), class = "sf")
+ get_enviro_zones(raw = FALSE, enviro_zones = FALSE), class = "sf")
})
-test_that("returns gridded Bermuda enviroregions - raster", {
+test_that("returns gridded Bermuda envirozones - raster", {
expect_s4_class(get_boundary(name = "Bermuda") |>
get_grid(crs = '+proj=laea +lon_0=-64.8108333 +lat_0=32.3571917 +datum=WGS84 +units=m +no_defs',
resolution = 20000,
output = "raster")|>
- get_enviro_regions(raw = FALSE, enviro_regions = TRUE, num_clusters = 3), class = "SpatRaster")
+ get_enviro_zones(raw = FALSE, enviro_zones = TRUE, num_clusters = 3), class = "SpatRaster")
})
-test_that("returns gridded Kiribati enviroregions - sf", {
+test_that("returns gridded Kiribati envirozones - sf", {
expect_s3_class(get_boundary(name = "Kiribati", country_type = "sovereign") |>
get_grid(crs = '+proj=laea +lon_0=-159.609375 +lat_0=0 +datum=WGS84 +units=m +no_defs',
resolution = 50000,
output = "sf_square") |>
- get_enviro_regions(raw = FALSE, enviro_regions = TRUE, num_clusters = 3), class = "sf")
+ get_enviro_zones(raw = FALSE, enviro_zones = TRUE, num_clusters = 3), class = "sf")
})
-test_that("returns gridded Bermuda enviroregions data with extra input columns - sf", {
+test_that("returns gridded Bermuda envirozones data with extra input columns - sf", {
+ set.seed(500)
expect_equal(get_boundary(name = "Bermuda") |>
get_grid(crs = '+proj=laea +lon_0=-64.8108333 +lat_0=32.3571917 +datum=WGS84 +units=m +no_defs',
resolution = 20000,
output = "sf_square") |>
dplyr::mutate(extracol1 = 1, extracol2 = 2, .before = 1) |>
- get_enviro_regions(raw = FALSE, enviro_regions = TRUE, show_plots = TRUE) |>
+ get_enviro_zones(raw = FALSE, enviro_zones = TRUE, show_plots = TRUE) |>
ncol(), 6)
})
test_that("returns error because num_clusters is not a positive integer", {
- expect_error(get_enviro_regions(get_boundary(name = "Bermuda"), num_clusters = 0))
+ expect_error(get_enviro_zones(get_boundary(name = "Bermuda"), num_clusters = 0))
})
test_that("returns error because num_clusters is not an integer", {
- expect_error(get_enviro_regions(get_boundary(name = "Bermuda"), num_clusters = 1.5))
+ expect_error(get_enviro_zones(get_boundary(name = "Bermuda"), num_clusters = 1.5))
})
test_that("returns error because max_num_clusters is not an integer", {
- expect_error(get_enviro_regions(get_boundary(name = "Bermuda"), max_num_clusters = 10.5))
+ expect_error(get_enviro_zones(get_boundary(name = "Bermuda"), max_num_clusters = 10.5))
})
diff --git a/tests/testthat/test-get_features.R b/tests/testthat/test-get_features.R
index 9da5803..e0cc640 100644
--- a/tests/testthat/test-get_features.R
+++ b/tests/testthat/test-get_features.R
@@ -4,6 +4,7 @@ test_that("returns raw Bermuda data as list", {
})
test_that("returns gridded Bermuda features - raster", {
+ set.seed(500)
expect_s4_class(suppressWarnings(get_features(spatial_grid = get_grid(boundary = get_boundary(name = "Bermuda"),
crs = '+proj=laea +lon_0=-64.8108333 +lat_0=32.3571917 +datum=WGS84 +units=m +no_defs',
resolution = 20000))),
@@ -11,6 +12,7 @@ test_that("returns gridded Bermuda features - raster", {
})
test_that("returns gridded Kiribati features - sf with extra cols", {
+ set.seed(1234)
expect_equal(suppressWarnings(get_boundary(name = "Kiribati", country_type = "sovereign") |>
get_grid(crs = '+proj=laea +lon_0=-159.609375 +lat_0=0 +datum=WGS84 +units=m +no_defs',
resolution = 50000,
diff --git a/vignettes/figure/features-1.png b/vignettes/figure/features-1.png
index cd9d2a0..f0f022c 100644
Binary files a/vignettes/figure/features-1.png and b/vignettes/figure/features-1.png differ
diff --git a/vignettes/figure/fishing-effort-1.png b/vignettes/figure/fishing-effort-1.png
index 7f86dc2..5991064 100644
Binary files a/vignettes/figure/fishing-effort-1.png and b/vignettes/figure/fishing-effort-1.png differ
diff --git a/vignettes/figure/high_seas_area-1.png b/vignettes/figure/high_seas_area-1.png
index e7f897a..4e30620 100644
Binary files a/vignettes/figure/high_seas_area-1.png and b/vignettes/figure/high_seas_area-1.png differ
diff --git a/vignettes/figure/pacific_high_seas-1.png b/vignettes/figure/pacific_high_seas-1.png
index 3fd90eb..a488862 100644
Binary files a/vignettes/figure/pacific_high_seas-1.png and b/vignettes/figure/pacific_high_seas-1.png differ
diff --git a/vignettes/figure/planning_grid-1.png b/vignettes/figure/planning_grid-1.png
index 0f1fdbb..c8f8c70 100644
Binary files a/vignettes/figure/planning_grid-1.png and b/vignettes/figure/planning_grid-1.png differ
diff --git a/vignettes/figure/prioritization-1.png b/vignettes/figure/prioritization-1.png
index 697ffc7..26d2fe1 100644
Binary files a/vignettes/figure/prioritization-1.png and b/vignettes/figure/prioritization-1.png differ
diff --git a/vignettes/figure/prioritization_patches-1.png b/vignettes/figure/prioritization_patches-1.png
index 052642c..84e75fe 100644
Binary files a/vignettes/figure/prioritization_patches-1.png and b/vignettes/figure/prioritization_patches-1.png differ
diff --git a/vignettes/pacific_high_seas_example.Rmd b/vignettes/pacific_high_seas_example.Rmd
index 71767df..52c2909 100644
--- a/vignettes/pacific_high_seas_example.Rmd
+++ b/vignettes/pacific_high_seas_example.Rmd
@@ -117,6 +117,9 @@ Now we have a planning grid, we can get data on conservation features (e.g. habi
``` r
+#set seed for reproducibility in the get_enviro_zones() sampling to find optimal cluster number
+set.seed(500)
+
feature_set <- get_features(spatial_grid = pacific_hs_planning_grid, seamount_buffer = 30000) %>%
oceandatr:::remove_empty_layers() #use this to remove raster layers that are empty
diff --git a/vignettes/pacific_high_seas_example.Rmd.orig b/vignettes/pacific_high_seas_example.Rmd.orig
index b6bdeaf..eade94f 100644
--- a/vignettes/pacific_high_seas_example.Rmd.orig
+++ b/vignettes/pacific_high_seas_example.Rmd.orig
@@ -166,6 +166,9 @@ Now we have a planning grid, we can get data on conservation features (e.g. habi
```{r features, fig.cap="Conservation features for the Pacific High Seas planning area"}
+#set seed for reproducibility in the get_enviro_zones() sampling to find optimal cluster number
+set.seed(500)
+
feature_set <- get_features(spatial_grid = pacific_hs_planning_grid, seamount_buffer = 30000) %>%
oceandatr:::remove_empty_layers() #use this to remove raster layers that are empty