-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add function util_chisquare_param_estimate()
#414
Comments
This might work well too, although still finding a good ncp start point might prove somewha elusive: # Load required package
library(bbmle)
# Sample data (replace with your actual data)
data <- rchisq(100, df = 5, ncp = 20)
# Define negative log-likelihood function
negLogLik <- function(df, ncp) {
-sum(dchisq(data, df = df, ncp = ncp, log = TRUE))
}
# Initial values for optimization (crucial for good convergence)
start_vals <- list(df = 3, ncp = 20)
# Maximum likelihood estimation
mle_fit <- mle2(negLogLik, start = start_vals)
# Extract estimated parameters
df_est <- coef(mle_fit)[1]
ncp_est <- coef(mle_fit)[2]
# Print the results
cat("Estimated df:", df_est, "\n")
cat("Estimated ncp:", ncp_est) Output: > # Load required package
> library(bbmle)
>
> # Sample data (replace with your actual data)
> data <- rchisq(100, df = 5, ncp = 20)
>
> # Define negative log-likelihood function
> negLogLik <- function(df, ncp) {
+ -sum(dchisq(data, df = df, ncp = ncp, log = TRUE))
+ }
>
> # Initial values for optimization (crucial for good convergence)
> start_vals <- list(df = 3, ncp = 20)
>
> # Maximum likelihood estimation
> mle_fit <- mle2(negLogLik, start = start_vals)
>
> # Extract estimated parameters
> df_est <- coef(mle_fit)[1]
> ncp_est <- coef(mle_fit)[2]
>
> # Print the results
> cat("Estimated df:", df_est, "\n")
Estimated df: 10.84161
> cat("Estimated ncp:", ncp_est)
Estimated ncp: 14.05248
nd <- rchisq(100, df = df_est, ncp = ncp_est)
hist(data, col = "lightblue", main = "Histogram of data")
hist(nd, col = "lightgreen", add = TRUE) |
Another: > estimate_chisq_params <- function(data) {
+ # Negative log-likelihood function
+ negLogLik <- function(df, ncp) {
+ -sum(dchisq(data, df = df, ncp = ncp, log = TRUE))
+ }
+
+ # Initial values (adjust based on your data if necessary)
+ start_vals <- list(df = trunc(var(data)/2), ncp = trunc(mean(data)))
+
+ # MLE using bbmle
+ mle_fit <- bbmle::mle2(negLogLik, start = start_vals)
+
+ # Return estimated parameters as a named vector
+ c(df = coef(mle_fit)[1], ncp = coef(mle_fit)[2])
+ }
>
> library(purrr)
>
> # List of data vectors (replace with your actual data)
> data_list <- list(rchisq(100, df = 5, ncp = 2),
+ rchisq(80, df = 3, ncp = 1),
+ rchisq(120, df = 7, ncp = 4))
>
> # Apply the estimation function to each data vector
> param_estimates <- map(data_list, estimate_chisq_params)
Warning messages:
1: In dchisq(data, df = df, ncp = ncp, log = TRUE) : NaNs produced
2: In dchisq(data, df = df, ncp = ncp, log = TRUE) : NaNs produced
3: In dchisq(data, df = df, ncp = ncp, log = TRUE) : NaNs produced
4: In dchisq(data, df = df, ncp = ncp, log = TRUE) : NaNs produced
5: In dchisq(data, df = df, ncp = ncp, log = TRUE) : NaNs produced
6: In dchisq(data, df = df, ncp = ncp, log = TRUE) : NaNs produced
>
> # Print results
> print(param_estimates)
[[1]]
df.df ncp.ncp
5.3677154 0.9875087
[[2]]
df.df ncp.ncp
3.2668169 0.3253299
[[3]]
df.df ncp.ncp
6.559403 4.540669 |
Mega Script test# Lib Load ----
library(tidyverse)
library(bbmle)
# Data ----
# Make parameters and grid
df <- 1:10
ncp <- 1:10
n <- runif(10, 250, 500) |> trunc()
param_grid <- expand_grid(n = n, df = df, ncp = ncp)
# Functions ----
# functions to estimate the parameters of a chisq distribution
# dof
mean_x <- function(x) mean(x)
mean_minus_1 <- function(x) mean(x) - 1
var_div_2 <- function(x) var(x) / 2
length_minus_1 <- function(x) length(x) - 1
# ncp
mean_minus_mean_minus_1 <- function(x) mean(x) - (mean(x) - 1)
ie_mean_minus_var_div_2 <- function(x) ifelse((mean(x) - (var(x) / 2)) < 0, 0, mean(x) - var(x)/2)
ie_optim <- function(x) optim(par = 0,
fn = function(ncp) {
-sum(dchisq(x, df = var(x)/2, ncp = ncp, log = TRUE))
},
method = "Brent",
lower = 0,
upper = 10 * var(x)/2)$par
# both
estimate_chisq_params <- function(data) {
# Negative log-likelihood function
negLogLik <- function(df, ncp) {
-sum(dchisq(data, df = df, ncp = ncp, log = TRUE))
}
# Initial values (adjust based on your data if necessary)
start_vals <- list(df = trunc(var(data)/2), ncp = trunc(mean(data)))
# MLE using bbmle
mle_fit <- bbmle::mle2(negLogLik, start = start_vals)
# Return estimated parameters as a named vector
df <- dplyr::tibble(
est_df = coef(mle_fit)[1],
est_ncp = coef(mle_fit)[2]
)
return(df)
}
safe_estimates <- {
purrr::possibly(
estimate_chisq_params,
otherwise = NA_real_,
quiet = TRUE
)
}
# Simulate data ----
set.seed(123)
dff <- param_grid |>
mutate(x = pmap(pick(everything()), match.fun("rchisq"))) |>
mutate(
safe_est_parms = map(x, safe_estimates),
dfa = map_dbl(x, mean_minus_1),
dfb = map_dbl(x, var_div_2),
dfc = map_dbl(x, length_minus_1),
ncpa = map_dbl(x, mean_minus_mean_minus_1),
ncpb = map_dbl(x, ie_mean_minus_var_div_2),
ncpc = map_dbl(x, ie_optim)
) |>
select(-x) |>
filter(map_lgl(safe_est_parms, ~ any(is.na(.x))) == FALSE) |>
unnest(cols = safe_est_parms) |>
mutate(
dfa_resid = dfa - df,
dfb_resid = dfb - df,
dfc_resid = dfc - df,
dfd_resid = est_df - df,
ncpa_resid = ncpa - ncp,
ncpb_resid = ncpb - ncp,
ncpc_resid = ncpc - ncp,
ncpd_resid = est_ncp - ncp
)
# Visuals ----
boxplot(dff$dfa ~ dff$df, main = "mean(x) -1 ~ df")
boxplot(dff$dfb ~ dff$df, main = "var(x) / 2 ~ df")
boxplot(dff$dfc ~ dff$df, main = "length(x) - 1 ~ df")
boxplot(dff$est_df ~ dff$df, main = "negloglik ~ df - Looks Good")
boxplot(dff$ncpa ~ dff$ncp, main = "mean(x) - (mean(x) - 1) ~ ncp")
boxplot(dff$ncpb ~ dff$ncp, main = "mean(x) - var(x)/2 ~ nc")
boxplot(dff$ncpc ~ dff$ncp, main = "optim ~ ncp")
boxplot(dff$est_ncp ~ dff$ncp, main = "negloglik ~ ncp - Looks Good")
boxplot(dff$dfa_resid ~ dff$df, main = "mean(x) -1 ~ df Residuals")
boxplot(dff$dfb_resid ~ dff$df, main = "var(x) / 2 ~ df Residuals")
boxplot(dff$dfc_resid ~ dff$df, main = "length(x) - 1 ~ df Residuals")
boxplot(dff$dfd_resid ~ dff$df, main = "negloglik ~ df Residuals")
boxplot(dff$ncpa_resid ~ dff$ncp, main = "mean(x) - (mean(x) - 1) ~ ncp Residuals")
boxplot(dff$ncpb_resid ~ dff$ncp, main = "mean(x) - var(x)/2 ~ ncp Residuals")
boxplot(dff$ncpc_resid ~ dff$ncp, main = "optim ~ ncp Residuals")
boxplot(dff$ncpd_resid ~ dff$ncp, main = "negloglik ~ ncp Residuals") |
spsanderson
added a commit
that referenced
this issue
Apr 15, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add a function for
util_chisquare_param_estimate()
Function:
Example:
The text was updated successfully, but these errors were encountered: