Skip to content

Commit

Permalink
feat tox provide all data and tests improved [skipci]
Browse files Browse the repository at this point in the history
  • Loading branch information
c1au6i0 committed Dec 31, 2024
1 parent f544b1d commit 050c765
Show file tree
Hide file tree
Showing 32 changed files with 3,832 additions and 340 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ export(extr_tetramer)
export(extr_tox)
export(with_extr_sandbox)
export(write_dataframes_to_excel)
importFrom(cli,cli_warn)
1 change: 0 additions & 1 deletion R/cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#' The function will overwrite any existing file with the same name.
#' @noRd
save_to_cache <- function(obj, file_name, verbose = FALSE) {

# Sys.getenv("R_USER_CACHE_DIR")

cache_dir <- tools::R_user_dir("extractox", which = "cache")
Expand Down
5 changes: 2 additions & 3 deletions R/checks.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
' Check for NA values in a specific column of a dataframe
#'
" Check for NA values in a specific column of a dataframe
#"
#' Checks for NA values in a specified column of a dataframe and optionally warns if any are found.
#'
#' @param dat A dataframe that contains the data.
Expand All @@ -9,7 +9,6 @@
#' @keywords internal
#' @noRd
check_na_warn <- function(dat, col_to_check, verbose = TRUE) {

ids_not_found <- dat$query[is.na(dat[[col_to_check]])]

if (all(isTRUE(verbose), length(ids_not_found) != 0)) {
Expand Down
2 changes: 1 addition & 1 deletion R/extr_comptox.R
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ extr_comptox <- function(ids,
sheet_names <- readxl::excel_sheets(xlsx_file)

suppressMessages(
dat_list <- lapply(sheet_names, readxl::read_excel, path = xlsx_file)
dat_list <- lapply(sheet_names, readxl::read_excel, path = xlsx_file)
)

names(dat_list) <- paste0("comptox_", gsub(" ", "_", tolower(sheet_names)))
Expand Down
16 changes: 6 additions & 10 deletions R/extr_ctd.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ extr_ctd <- function(
check_internet(verbose = verbose)


col_names <- c("chemical_name", "chemical_id", "casrn", "gene_symbol",
"gene_id", "organism", "organism_id", "pubmed_ids", "query")
col_names <- c(
"chemical_name", "chemical_id", "casrn", "gene_symbol",
"gene_id", "organism", "organism_id", "pubmed_ids", "query"
)

params <- list(
inputType = category,
Expand Down Expand Up @@ -145,11 +147,10 @@ extr_ctd <- function(
# Lets clean up
out$query <- gsub(" \\[Object not found\\]", "", out$query)

check_na_warn(dat = out,col_to_check = "gene_id", verbose = verbose)
check_na_warn(dat = out, col_to_check = "gene_id", verbose = verbose)

unlink(csv_file)
out

}

#' Extract Tetramer Data from the CTD API
Expand Down Expand Up @@ -198,8 +199,6 @@ extr_tetramer <- function(
verify_ssl = FALSE,
verbose = TRUE,
...) {


if (missing(chem)) {
cli::cli_abort("The argument {.field {chem}} is required.")
}
Expand Down Expand Up @@ -234,7 +233,7 @@ extr_tetramer <- function(
)
}

check_na_warn(dat = out,col_to_check = "gene_id", verbose = verbose)
check_na_warn(dat = out, col_to_check = "gene_id", verbose = verbose)

out
}
Expand All @@ -254,7 +253,6 @@ extr_tetramer_ <- function(
verify_ssl = FALSE,
verbose = verbose,
...) {

# Define the base URL
base_url <- "https://ctdbase.org/query.go"

Expand Down Expand Up @@ -330,5 +328,3 @@ extr_tetramer_ <- function(

out
}


26 changes: 15 additions & 11 deletions R/extr_ice.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,13 @@ extr_ice <- function(casrn,
check_status_code(resp, verbose = verbose)

# This is used in case no results are retrieved in next chunk
col_names <- c("assay", "endpoint", "substance_type", "casrn", "qsar_ready_id",
"value", "unit", "species", "receptor_species", "route", "sex",
"strain", "life_stage", "tissue", "lesion", "location",
"assay_source", "in_vitro_assay_format", "reference",
"reference_url", "dtxsid", "substance_name", "pubmed_id")
col_names <- c(
"assay", "endpoint", "substance_type", "casrn", "qsar_ready_id",
"value", "unit", "species", "receptor_species", "route", "sex",
"strain", "life_stage", "tissue", "lesion", "location",
"assay_source", "in_vitro_assay_format", "reference",
"reference_url", "dtxsid", "substance_name", "pubmed_id"
)

out <- stats::setNames(as.data.frame(matrix(ncol = length(col_names), nrow = 0)), col_names)

Expand All @@ -78,7 +80,7 @@ extr_ice <- function(casrn,
},
error = function(e) {
if (grepl("Unexpected content type \"text/plain\"", e$message)) {
if(isTRUE(verbose)){
if (isTRUE(verbose)) {
cli::cli_warn("It seems that the ids were not found in ICE:
{conditionMessage(e)}")
}
Expand Down Expand Up @@ -112,14 +114,16 @@ extr_ice <- function(casrn,
ids_founds <- casrn[casrn %in% out$casrn]

if (nrow(out) > 0) {

out[, "query"] <- paste(ids_founds, collapse = ", ")
to_add <- stats::setNames(data.frame(matrix(ncol = ncol(out),
nrow = length(ids_not_found))),
names(out))
to_add <- stats::setNames(
data.frame(matrix(
ncol = ncol(out),
nrow = length(ids_not_found)
)),
names(out)
)
to_add$query <- ids_not_found
out <- rbind(to_add, out)

} else {
out[1:length(casrn), "query"] <- casrn
}
Expand Down
9 changes: 4 additions & 5 deletions R/extr_iris.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@
#' extr_iris(c("1332-21-4", "50-00-0"))
#' }
extr_iris <- function(casrn = NULL, verbose = TRUE) {
cancer_types <- c("non_cancer", "cancer")

cancer_types <- c("non_cancer", "cancer")

if (missing(casrn)) {
cli::cli_abort("The argument {.field {casrn}} is required.")
}
if (missing(casrn)) {
cli::cli_abort("The argument {.field {casrn}} is required.")
}

# Check if online
check_internet(verbose = verbose)
Expand Down
39 changes: 20 additions & 19 deletions R/extr_monograph.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@
#'
#' @param search_type A character string specifying the type of search to
#' perform. Valid options are "cas_rn" (CAS Registry Number) and "name"
#'. (name of the chemical). If `search_type` is "casrn", the function filters
#'. by the CAS Registry Number.
#' . (name of the chemical). If `search_type` is "casrn", the function filters
#' . by the CAS Registry Number.
#' If `search_type` is "name", the function performs a partial match search
#' for the chemical name.
#' @param ids A character vector of IDs to search for.
#' @param verbose A logical value indicating whether to print detailed messages.
#'. Default is TRUE.
#' . Default is TRUE.
#' @param get_all Logical. If TRUE ignore all the other ignore `ids`,
#' `search_type`, set `force = TRUE` and get the all dataset.
#' This is was introduced for debugging purposes.
#' @return A data frame containing the relevant information from the WHO IARC,
#'. including Monograph `volume`, `volume_publication_year`, `evaluation_year`,
#'. and `additional_information` where the chemical was described.
#' . including Monograph `volume`, `volume_publication_year`, `evaluation_year`,
#' . and `additional_information` where the chemical was described.
#' @seealso \url{https://monographs.iarc.who.int/list-of-classifications/}
#' @export
#' @examples
Expand All @@ -26,17 +26,17 @@
#' str(dat)
#'
#' # Example usage for name search
#' dat2 <- extr_monograph(search_type = "name",
#' ids = c("Aloe", "Schistosoma",
#'. "Styrene")
#'. )
#' dat2 <- extr_monograph(
#' search_type = "name",
#' ids = c("Aloe", "Schistosoma", "Styrene")
#' )
#' str(dat2)
#' }
extr_monograph <- function(ids,
search_type ="casrn",
search_type = "casrn",
verbose = TRUE,
get_all = FALSE) {
if(isTRUE(get_all)) {
if (isTRUE(get_all)) {
return(who_iarc_monographs)
}

Expand All @@ -54,16 +54,17 @@ extr_monograph <- function(ids,
cli::cli_alert_info("Extracting WHO IARC monographs...\nLast updated: 2024-11-29 5:08pm (CET)")
}

col_names <- c(names(who_iarc_monographs), "query")
col_names <- c(names(who_iarc_monographs), "query")

out <- search_and_match(dat = who_iarc_monographs,
ids = ids,
search_type = search_type,
col_names = col_names,
chemical_col = "agent"
)
out <- search_and_match(
dat = who_iarc_monographs,
ids = ids,
search_type = search_type,
col_names = col_names,
chemical_col = "agent"
)

check_na_warn(dat = out, col_to_check = "agent", verbose = verbose)
check_na_warn(dat = out, col_to_check = "agent", verbose = verbose)

out
}
54 changes: 30 additions & 24 deletions R/extr_pprtv.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@
#' extr_pprtv(ids = "107-02-8", search_type = "casrn", verbose = TRUE)
#'
#' # Extract data for a chemical name
#' extr_pprtv(ids = "Acrolein", search_type = "name", verbose = TRUE,
#' force = FALSE)
#' extr_pprtv(
#' ids = "Acrolein", search_type = "name", verbose = TRUE,
#' force = FALSE
#' )
#'
#' # Extract data for multiple identifiers
#' extr_pprtv(
Expand All @@ -45,23 +47,24 @@
#' }
extr_pprtv <- function(ids, search_type = "casrn", verbose = TRUE, force = TRUE,
get_all = FALSE) {


if (all(missing(ids), !isTRUE(get_all))) {
cli::cli_abort("The argument {.field ids} is required.")
}

if (all(!search_type %in% c("casrn", "name"),!isTRUE(get_all))) {
if (all(!search_type %in% c("casrn", "name"), !isTRUE(get_all))) {
cli::cli_abort("The argument {.field search_type} needs to be either `casrn`
or `name`.")
}

file_name <- "epa_pprtvs.rds" # Filename for caching

# Check if path is present otherwise it download it again
full_path_cache_file <- fs::path(tools::R_user_dir("extractox",
which = "cache"),
file_name)
full_path_cache_file <- fs::path(
tools::R_user_dir("extractox",
which = "cache"
),
file_name
)

cache_present <- fs::file_exists(full_path_cache_file)

Expand All @@ -75,12 +78,14 @@ extr_pprtv <- function(ids, search_type = "casrn", verbose = TRUE, force = TRUE,
url = "https://cfpub.epa.gov/ncea/pprtv/atoz.cfm",
url_query_param = list(excel = "yes"),
file_name = file_name,
verbose = verbose)

# get eveything
if (get_all == TRUE) return(dat)
path_cache <- save_to_cache(dat, file_name, verbose = verbose)
verbose = verbose
)

# get eveything
if (get_all == TRUE) {
return(dat)
}
path_cache <- save_to_cache(dat, file_name, verbose = verbose)
} else {
dat <- read_from_cache(file_name = file_name, verbose = verbose)

Expand All @@ -95,20 +100,21 @@ extr_pprtv <- function(ids, search_type = "casrn", verbose = TRUE, force = TRUE,
cli::cli_alert_info("Extracting EPA PPRTVs.")
}

col_names <- c("pprtv_substance_id", "chemical", "casrn", "last_revision",
"pprtv_assessment", "iris_link", "rf_c_value", "rf_d_value",
"woe", "date_downloaded", "query")
col_names <- c(
"pprtv_substance_id", "chemical", "casrn", "last_revision",
"pprtv_assessment", "iris_link", "rf_c_value", "rf_d_value",
"woe", "date_downloaded", "query"
)

out <- search_and_match(
dat = dat,
ids = ids,
search_type = search_type,
col_names = col_names,
chemical_col = "chemical"
)
dat = dat,
ids = ids,
search_type = search_type,
col_names = col_names,
chemical_col = "chemical"
)

check_na_warn(dat = out, col_to_check = "chemical", verbose = verbose)
check_na_warn(dat = out, col_to_check = "chemical", verbose = verbose)

out

}
Loading

0 comments on commit 050c765

Please sign in to comment.