Skip to content

Commit

Permalink
Merge pull request #283 from ropensci/v5.0.0
Browse files Browse the repository at this point in the history
V5.0.0
  • Loading branch information
cboettig authored Aug 12, 2024
2 parents ef3e913 + 4cf94ac commit 0fdb957
Show file tree
Hide file tree
Showing 88 changed files with 744 additions and 2,012 deletions.
27 changes: 8 additions & 19 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Description: A programmatic interface to 'FishBase', re-written
supports experimental access to 'SeaLifeBase' data, which contains
nearly 200,000 species records for all types of aquatic life not covered by
'FishBase.'
Version: 4.1.1
Version: 5.0.0
Encoding: UTF-8
License: CC0
Authors@R: c(person("Carl", "Boettiger",
Expand All @@ -26,31 +26,20 @@ LazyData: true
Depends:
R (>= 4.0)
Imports:
methods,
utils,
tools,
purrr,
progress,
memoise,
rlang,
magrittr,
readr (>= 2.0.0),
glue,
stringr,
purrr,
jsonlite,
DBI,
dplyr,
dbplyr,
duckdb,
contentid (>= 0.0.15),
rstudioapi,
fs,
glue,
tibble
duckdbfs,
rlang,
magrittr,
memoise
Suggests:
testthat,
rmarkdown,
knitr,
covr,
spelling
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Language: en-US
27 changes: 0 additions & 27 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,15 @@ export(common_to_sci)
export(country)
export(countrysub)
export(countrysubref)
export(db_dir)
export(db_disconnect)
export(diet)
export(diet_items)
export(distribution)
export(docs)
export(ecology)
export(ecosystem)
export(estimate)
export(faoareas)
export(fb_conn)
export(fb_import)
export(fb_tables)
export(fb_tbl)
export(fecundity)
export(fishbase_pane)
export(fooditems)
export(genetics)
export(introductions)
Expand All @@ -36,7 +29,6 @@ export(load_taxa)
export(maturity)
export(morphology)
export(morphometrics)
export(occurrence)
export(oxygen)
export(popchar)
export(popgrowth)
Expand All @@ -52,17 +44,13 @@ export(sci_to_common)
export(spawning)
export(species)
export(species_by_ecosystem)
export(species_list)
export(species_names)
export(speed)
export(stocks)
export(swimming)
export(synonyms)
export(validate_names)
importFrom(dbplyr,translate_sql)
importFrom(dplyr,collect)
importFrom(dplyr,distinct)
importFrom(dplyr,enquo)
importFrom(dplyr,filter)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
Expand All @@ -71,24 +59,9 @@ importFrom(dplyr,rename)
importFrom(dplyr,right_join)
importFrom(dplyr,select)
importFrom(dplyr,sym)
importFrom(fs,dir_create)
importFrom(fs,dir_ls)
importFrom(fs,path)
importFrom(magrittr,`%>%`)
importFrom(methods,as)
importFrom(methods,is)
importFrom(progress,progress_bar)
importFrom(purrr,map_chr)
importFrom(purrr,map_dfr)
importFrom(readr,read_csv)
importFrom(rlang,"!!")
importFrom(rlang,.data)
importFrom(stats,na.omit)
importFrom(stringr,str_extract)
importFrom(stringr,str_to_lower)
importFrom(tools,R_user_dir)
importFrom(utils,data)
importFrom(utils,globalVariables)
importFrom(utils,lsf.str)
importFrom(utils,packageVersion)
importFrom(utils,read.csv)
27 changes: 26 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,35 @@ And constructed with the following guidelines:

For more information on SemVer, please visit http://semver.org/.

v 5.0.0
-------

Another streamlined re-design following new abilities for data hosting and access.
This release relies on a HuggingFace datasets hosting for data and metadata hosting
in parquet and schema.org.

Data access is simplified to use the simple HuggingFace datasets API instead
of the previous contentid-based resolution. This allows metadata to be defined
with directly alongside the data platform independent of the R package.

A simplified access protocol relies on `duckdbfs` for direct reads of tables.
Several functions previously used only to manage connections are now deprecated
or removed, along with a significant number of dependencies.

Core use still centers around the same package API using the `fb_tbl()` function,
with legacy helper functions for common tables like `species()` are still accessible and
can still optionally filter by species name where appropriate. As before, loading the
full tables and sub-setting manually is still recommended.

Historic helper functions like `load_taxa()` (combining the taxonomic classification from Species,
Genus, Family and Order tables), `validate_names()`, and `common_to_sci()` and
`sci_to_common()` should be in working order, all using table-based outputs.


v 4.1.1
-------

* hotfix for bug in 4.1.0 on Windows -- duckdb httpfs on windows creates sigfault
* hotfix for bug in 4.1.0 on Windows -- `duckdb` `httpfs` on windows created `segfault`

v 4.1.0
--------
Expand Down
85 changes: 25 additions & 60 deletions R/00-endpoint.R
Original file line number Diff line number Diff line change
@@ -1,90 +1,48 @@

utils::globalVariables("sci_name", package="rfishbase")


## Allows us to define functions for each endpoint using closures
#' @importFrom dplyr left_join rename sym
#' @importFrom rlang !! .data
endpoint <- function(endpt, join = NULL, by = NULL){

function(species_list = NULL,
fields = NULL,
server = getOption("FISHBASE_API", "fishbase"),
version = get_latest_release(),
db = default_db(server, version),
server = c("fishbase", "sealifebase"),
version = "latest",
db = NULL,
...){

out <- fb_tbl(endpt, server, version, db) %>% fix_ids()
out <- fb_tbl(endpt, server, version) %>% fix_ids()

if(!is.null(species_list)){
species <-
dplyr::select(fb_tbl("species", server, version, db),
"SpecCode", "Genus", "Species") %>%
fb_tbl("species", server, version) %>%
dplyr::select("SpecCode", "Genus", "Species") %>%
dplyr::mutate(sci_name = paste(Genus, Species)) %>%
dplyr::filter(sci_name %in% species_list) %>%
dplyr::select(Species=sci_name, "SpecCode")

out <- dplyr::inner_join(species, out) %>% dplyr::distinct()
}


if(!is.null(fields)){
out <- select(out, !!fields) %>% dplyr::distinct()
out <- dplyr::select(out, !!fields) %>% dplyr::distinct()
}

if(!is.null(join))
out <- left_join(out, join, by = by)

dplyr::collect(out)
}

}


species_subset <- function(species_list,
full_data,
server = getOption("FISHBASE_API", "fishbase"),
version = get_latest_release(),
db = default_db()){


species <-
dplyr::select(dplyr::tbl(db, "species"),
"SpecCode", "Genus", "Species") %>%
dplyr::mutate(sci_name = paste(Genus, Species)) %>%
dplyr::select("SpecCode", Species=sci_name) %>%
collect()



## "Species" in many tables is just the epithet, we want full species name so drop that.
if("Species" %in% colnames(full_data)){
sp <- dplyr::sym("Species")
full_data <- dplyr::select(full_data, - !!sp)
}

if(is.null(species_list)){
return(dplyr::left_join(species, full_data, by = "SpecCode"))
if(!is.null(join)) {
out <- dplyr::left_join(out, join, by = by)
}
out
}

speccodes(species_list, table = species, db = db) %>%
dplyr::left_join(full_data, by = "SpecCode")
}


## handle ids or species names, returning remote table for joining
speccodes <- function(species_list, table, db){
if(is.integer(species_list)){
df <- dplyr::tibble(SpecCode = species_list)
} else {
df <- dplyr::tibble(Species = species_list)
}

## Manually copy. we want a left_join since right_join isn't in RSQLite
## but left_join(copy=TRUE) would copy the larger table instead
#tmp <- tmp_tablename()
#dplyr::copy_to(db, df = df, name = tmp, overwrite=TRUE, temporary=TRUE)
#df <- dplyr::tbl(db, tmp)


suppressMessages({
dplyr::left_join(df, table) %>%
select("SpecCode", "Species")
Expand All @@ -93,8 +51,6 @@ speccodes <- function(species_list, table, db){





#' @importFrom dplyr sym
fix_ids <- function(full_data){
if("Speccode" %in% colnames(full_data)){
Expand All @@ -111,13 +67,22 @@ fix_ids <- function(full_data){
## Define function that maps sci names to SpecCode, subsets table by requested sci name or spec code
#' @importFrom dplyr mutate select
fb_species <- function(server = getOption("FISHBASE_API", "fishbase"),
version = get_latest_release(),
db = default_db(server, version),
version = "latest",
db = NULL,
...){
load_taxa(server, version, db, collect = FALSE, ...) %>% dplyr::select("SpecCode", "Species")
load_taxa(server, version, db=NULL, collect = FALSE, ...) %>%
dplyr::select("SpecCode", "Species")
}



tmp_tablename <- function(n=10)
paste0("tmp_", paste0(sample(letters, n, replace = TRUE), collapse = ""))


utils::globalVariables("sci_name", package="rfishbase")


#' @importFrom magrittr `%>%`
#' @export
magrittr::`%>%`
49 changes: 0 additions & 49 deletions R/available_releases.R

This file was deleted.

Loading

0 comments on commit 0fdb957

Please sign in to comment.