You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
#' Estimate Generalized Beta Parameters#'#' @family Parameter Estimation#' @family Generalized Beta#'#' @details This function will attempt to estimate the generalized Beta shape1, shape2, shape3, and rate#' parameters given some vector of values.#'#' @description The function will return a list output by default, and if the parameter#' `.auto_gen_empirical` is set to `TRUE` then the empirical data given to the#' parameter `.x` will be run through the `tidy_empirical()` function and combined#' with the estimated generalized Beta data.#'#' @param .x The vector of data to be passed to the function.#' @param .auto_gen_empirical This is a boolean value of TRUE/FALSE with default#' set to TRUE. This will automatically create the `tidy_empirical()` output#' for the `.x` parameter and use the `tidy_combine_distributions()`. The user#' can then plot out the data using `$combined_data_tbl` from the function output.#'#' @examples#' library(dplyr)#' library(ggplot2)#'#' set.seed(123)#' x <- tidy_generalized_beta(100, .shape1 = 2, .shape2 = 3, #' .shape3 = 4, .rate = 5)[["y"]]#' output <- util_generalized_beta_param_estimate(x)#'#' output$parameter_tbl#'#' output$combined_data_tbl %>%#' tidy_combined_autoplot()#'#' @return#' A tibble/list#'#' @author Steven P. Sanderson II, MPH#'#' @export#'util_generalized_beta_param_estimate<-function(.x, .auto_gen_empirical=TRUE) {
# Tidyeval ----x_term<- as.numeric(.x)
n<- length(x_term)
# Checks ----if (!is.vector(x_term, mode="numeric") || is.factor(x_term)) {
rlang::abort(
message="'.x' must be a numeric vector.",
use_cli_format=TRUE
)
}
if (n<2) {
rlang::abort(
message="'.x' must contain at least two non-missing distinct values. All values of '.x' must be positive.",
use_cli_format=TRUE
)
}
# Negative log-likelihood function for generalized Beta distributiongenbeta_lik<-function(params, data) {
shape1<-params[1]
shape2<-params[2]
shape3<-params[3]
rate<-params[4]
-sum(actuar::dgenbeta(data, shape1=shape1, shape2=shape2,
shape3=shape3, rate=rate, log=TRUE))
}
# Initial parameter guessesinitial_params<- c(shape1=1, shape2=1, shape3=1, rate=1)
# Optimize to minimize the negative log-likelihoodopt_result<-stats::optim(
par=initial_params,
fn=genbeta_lik,
data=x_term
)
shape1<-opt_result$par[["shape1"]]
shape2<-opt_result$par[["shape2"]]
shape3<-opt_result$par[["shape3"]]
rate<-opt_result$par[["rate"]]
# Return Tibble ----if (.auto_gen_empirical) {
te<- tidy_empirical(.x=x_term)
td<- tidy_generalized_beta(.n=n, .shape1= round(shape1, 3), .shape2= round(shape2, 3), .shape3= round(shape3, 3), .rate= round(rate, 3))
combined_tbl<- tidy_combine_distributions(te, td)
}
ret<-dplyr::tibble(
dist_type="Generalized Beta",
samp_size=n,
min= min(x_term),
max= max(x_term),
mean= mean(x_term),
shape1=shape1,
shape2=shape2,
shape3=shape3,
rate=rate
)
# Return ----
attr(ret, "tibble_type") <-"parameter_estimation"
attr(ret, "family") <-"generalized_beta"
attr(ret, "x_term") <-.x
attr(ret, "n") <-nif (.auto_gen_empirical) {
output<-list(
combined_data_tbl=combined_tbl,
parameter_tbl=ret
)
} else {
output<-list(
parameter_tbl=ret
)
}
return(output)
}
#' Distribution Statistics#'#' @family Generalized Beta#' @family Distribution Statistics#'#' @details This function will take in a tibble and return the statistics#' of the given type of `tidy_` distribution. It is required that data be#' passed from a `tidy_` distribution function.#'#' @description Returns distribution statistics in a tibble.#'#' @param .data The data being passed from a `tidy_` distribution function.#'#' @examples#' library(dplyr)#'#' set.seed(123)#' tidy_generalized_beta() |>#' util_generalized_beta_stats_tbl() |>#' glimpse()#'#' @return#' A tibble#'#' @author Steven P. Sanderson II, MPH#'#' @export#' @rdname util_generalized_beta_stats_tblutil_generalized_beta_stats_tbl<-function(.data) {
# Immediate check for tidy_ distribution functionif (!"tibble_type"%in% names(attributes(.data))) {
rlang::abort(
message="You must pass data from the 'tidy_dist' function.",
use_cli_format=TRUE
)
}
if (attributes(.data)$tibble_type!="tidy_generalized_beta") {
rlang::abort(
message="You must use 'tidy_generalized_beta()'",
use_cli_format=TRUE
)
}
# Datadata_tbl<-dplyr::as_tibble(.data)
atb<- attributes(data_tbl)
shape1<-atb$.shape1shape2<-atb$.shape2shape3<-atb$.shape3rate<-atb$.ratescale<-1/rate# Generalized Beta statistics calculationstat_mean<- ifelse(shape2>1, shape1/ (shape2-1), "undefined")
stat_mode<- ifelse((shape1>1) & (shape2>2), (shape1-1) / (shape2-2), "undefined")
stat_var<- ifelse(shape2>2, (shape1*shape2) / ((shape2-1)^2* (shape2-2)), "undefined")
stat_sd<- ifelse(stat_var=="undefined", "undefined", sqrt(stat_var))
stat_skewness<- ifelse(shape2>3, (2* (shape2-2*shape1) * sqrt(shape2-2)) / ((shape2-3) * sqrt(shape1* (shape1+shape2))), "undefined")
stat_kurtosis<- ifelse(shape2>4, 3+ (6* (shape2^3-2*shape2^2* (shape1-1) +shape1^2* (shape1+1))) / (shape1* (shape1+1) * (shape2-3) * (shape2-4)), "undefined")
# Data Tibbleret<-dplyr::tibble(
tidy_function=atb$tibble_type,
function_call=atb$dist_with_params,
distribution= dist_type_extractor(atb$tibble_type),
distribution_type=atb$distribution_family_type,
points=atb$.n,
simulations=atb$.num_sims,
mean=stat_mean,
mode=stat_mode,
range= paste0("0 to Inf"),
std_dv=stat_sd,
coeff_var= ifelse(stat_var=="undefined", "undefined", sqrt(stat_var) /stat_mean),
skewness=stat_skewness,
kurtosis=stat_kurtosis,
computed_std_skew= tidy_skewness_vec(data_tbl$y),
computed_std_kurt= tidy_kurtosis_vec(data_tbl$y),
ci_lo= ci_lo(data_tbl$y),
ci_hi= ci_hi(data_tbl$y)
)
# Returnreturn(ret)
}
#' Calculate Akaike Information Criterion (AIC) for Generalized Beta Distribution#'#' This function calculates the Akaike Information Criterion (AIC) for a generalized Beta#' distribution fitted to the provided data.#'#' @family Utility#' #' @author Steven P. Sanderson II, MPH#' #' @description#' This function estimates the shape1, shape2, shape3, and rate parameters of a generalized Beta distribution#' from the provided data using maximum likelihood estimation,#' and then calculates the AIC value based on the fitted distribution.#'#' @param .x A numeric vector containing the data to be fitted to a generalized Beta distribution.#'#' @details#' This function fits a generalized Beta distribution to the provided data using maximum#' likelihood estimation. It estimates the shape1, shape2, shape3, and rate parameters#' of the generalized Beta distribution using maximum likelihood estimation. Then, it#' calculates the AIC value based on the fitted distribution.#'#' Initial parameter estimates: The function uses reasonable initial estimates#' for the shape1, shape2, shape3, and rate parameters of the generalized Beta distribution.#'#' Optimization method: The function uses the optim function for optimization.#' You might explore different optimization methods within optim for potentially#' better performance.#'#' Goodness-of-fit: While AIC is a useful metric for model comparison, it's#' recommended to also assess the goodness-of-fit of the chosen model using#' visualization and other statistical tests.#'#' @examples#' # Example 1: Calculate AIC for a sample dataset#' set.seed(123)#' x <- tidy_generalized_beta(100, .shape1 = 2, .shape2 = 3, #' .shape3 = 4, .rate = 5)[["y"]]#' util_generalized_beta_aic(x)#'#' @return#' The AIC value calculated based on the fitted generalized Beta distribution to #' the provided data.#'#' @name util_generalized_beta_aicNULL#' @export#' @rdname util_generalized_beta_aicutil_generalized_beta_aic<-function(.x) {
# Tidyevalx<- as.numeric(.x)
# Negative log-likelihood function for generalized Beta distributionneg_log_lik_genbeta<-function(par, data) {
shape1<-par[1]
shape2<-par[2]
shape3<-par[3]
rate<-par[4]
-sum(actuar::dgenbeta(data, shape1=shape1, shape2=shape2,
shape3=shape3, rate=rate, log=TRUE))
}
# Initial parameter estimatespe<-TidyDensity::util_generalized_beta_param_estimate(x)$parameter_tblshape1<-pe$shape1shape2<-pe$shape2shape3<-pe$shape3rate<-pe$rateinitial_params<- c(shape1=shape1, shape2=shape2, shape3=shape3,
rate=rate)
# Fit generalized Beta distribution using optimfit_genbeta<-stats::optim(
par=initial_params,
fn=neg_log_lik_genbeta,
data=x
)
# Extract log-likelihood and number of parameterslogLik_genbeta<--fit_genbeta$valuek_genbeta<-4# Number of parameters for generalized Beta distribution (shape1, shape2, shape3, and rate)# Calculate AICAIC_genbeta<-2*k_genbeta-2*logLik_genbeta# Return AICreturn(AIC_genbeta)
}
Param Estimate
Function:
Example:
Stats Tibble
Function:
Example:
AIC
Function:
Example:
The text was updated successfully, but these errors were encountered: