diff --git a/DESCRIPTION b/DESCRIPTION index 26193e43..1e23b739 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ingredients Title: Effects and Importances of Model Ingredients -Version: 0.5.2 +Version: 1.0 Authors@R: c(person("Przemyslaw", "Biecek", email = "przemyslaw.biecek@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8423-1823")), @@ -11,9 +11,9 @@ Description: Collection of tools for assessment of feature importance and featur Key functions are: feature_importance() for assessment of global level feature importance, ceteris_paribus() for calculation of the what-if plots, - partial_dependency() for partial dependency plots, - conditional_dependency() for conditional dependency plots, - accumulated_dependency() for accumulated local effects plots, + partial_dependence() for partial dependence plots, + conditional_dependence() for conditional dependence plots, + accumulated_dependence() for accumulated local effects plots, aggregate_profiles() and cluster_profiles() for aggregation of ceteris paribus profiles, generic print() and plot() for better usability of selected explainers, generic plotD3() for interactive, D3 based explanations, and diff --git a/NAMESPACE b/NAMESPACE index b140e5f8..c2153306 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,21 +1,22 @@ # Generated by roxygen2: do not edit by hand -S3method(accumulated_dependency,ceteris_paribus_explainer) -S3method(accumulated_dependency,default) -S3method(accumulated_dependency,explainer) +S3method(accumulated_dependence,ceteris_paribus_explainer) +S3method(accumulated_dependence,default) +S3method(accumulated_dependence,explainer) S3method(ceteris_paribus,default) S3method(ceteris_paribus,explainer) -S3method(conditional_dependency,ceteris_paribus_explainer) -S3method(conditional_dependency,default) -S3method(conditional_dependency,explainer) +S3method(conditional_dependence,ceteris_paribus_explainer) +S3method(conditional_dependence,default) +S3method(conditional_dependence,explainer) S3method(describe,ceteris_paribus_explainer) S3method(describe,feature_importance_explainer) +S3method(describe,partial_dependence_explainer) S3method(describe,partial_dependency_explainer) S3method(feature_importance,default) S3method(feature_importance,explainer) -S3method(partial_dependency,ceteris_paribus_explainer) -S3method(partial_dependency,default) -S3method(partial_dependency,explainer) +S3method(partial_dependence,ceteris_paribus_explainer) +S3method(partial_dependence,default) +S3method(partial_dependence,explainer) S3method(plot,aggregated_profiles_explainer) S3method(plot,ceteris_paribus_2d_explainer) S3method(plot,ceteris_paribus_explainer) @@ -29,16 +30,19 @@ S3method(print,ceteris_paribus_explainer) S3method(print,feature_importance_explainer) S3method(select_neighbours,default) S3method(select_sample,default) +export(accumulated_dependence) export(accumulated_dependency) export(aggregate_profiles) export(calculate_oscillations) export(ceteris_paribus) export(ceteris_paribus_2d) export(cluster_profiles) +export(conditional_dependence) export(conditional_dependency) export(describe) export(feature_importance) export(local_dependency) +export(partial_dependence) export(partial_dependency) export(plotD3) export(select_neighbours) diff --git a/NEWS.md b/NEWS.md index 2fa1e82b..05f577fc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +ingredients 1.0 +--------------------------------------------------------------- +* change `dependency` to `dependence` [#103](https://github.com/ModelOriented/ingredients/issues/103) + ingredients 0.5.2 --------------------------------------------------------------- * `ceteris_paribus` profiles are now working for categorical variables diff --git a/R/accumulated_dependency.R b/R/accumulated_dependence.R similarity index 86% rename from R/accumulated_dependency.R rename to R/accumulated_dependence.R index 06bcbc0d..435a16cb 100644 --- a/R/accumulated_dependency.R +++ b/R/accumulated_dependence.R @@ -1,9 +1,9 @@ #' Accumulated Local Effects Profiles aka ALEPlots #' #' Accumulated Local Effects Profiles accumulate local changes in Ceteris Paribus Profiles. -#' Function \code{\link{accumulated_dependency}} calls \code{\link{ceteris_paribus}} and then \code{\link{aggregate_profiles}}. +#' Function \code{\link{accumulated_dependence}} calls \code{\link{ceteris_paribus}} and then \code{\link{aggregate_profiles}}. #' -#' Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependency Chapter}. +#' Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependence Chapter}. #' #' @param x an explainer created with function \code{DALEX::explain()}, an object of the class \code{ceteris_paribus_explainer} #' or a model to be explained. @@ -13,7 +13,7 @@ #' @param variables names of variables for which profiles shall be calculated. #' Will be passed to \code{\link{calculate_variable_split}}. #' If \code{NULL} then all variables from the validation data will be used. -#' @param N number of observations used for calculation of partial dependency profiles. +#' @param N number of observations used for calculation of partial dependence profiles. #' By default, 500 observations will be chosen randomly. #' @param ... other parameters #' @param variable_splits named list of splits for variables, in most cases created with \code{\link{calculate_variable_split}}. @@ -39,7 +39,7 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' adp_glm <- accumulated_dependency(explain_titanic_glm, +#' adp_glm <- accumulated_dependence(explain_titanic_glm, #' N = 150, variables = c("age", "fare")) #' head(adp_glm) #' plot(adp_glm) @@ -54,21 +54,21 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' adp_rf <- accumulated_dependency(explain_titanic_rf, N = 200, variable_type = "numerical") +#' adp_rf <- accumulated_dependence(explain_titanic_rf, N = 200, variable_type = "numerical") #' plot(adp_rf) #' -#' adp_rf <- accumulated_dependency(explain_titanic_rf, N = 200, variable_type = "categorical") +#' adp_rf <- accumulated_dependence(explain_titanic_rf, N = 200, variable_type = "categorical") #' plotD3(adp_rf, label_margin = 80, scale_plot = TRUE) #' } #' #' @export -#' @rdname accumulated_dependency -accumulated_dependency <- function(x, ...) - UseMethod("accumulated_dependency") +#' @rdname accumulated_dependence +accumulated_dependence <- function(x, ...) + UseMethod("accumulated_dependence") #' @export -#' @rdname accumulated_dependency -accumulated_dependency.explainer <- function(x, +#' @rdname accumulated_dependence +accumulated_dependence.explainer <- function(x, variables = NULL, N = 500, variable_splits = NULL, @@ -81,7 +81,7 @@ accumulated_dependency.explainer <- function(x, predict_function <- x$predict_function label <- x$label - accumulated_dependency.default(x = model, + accumulated_dependence.default(x = model, data = data, predict_function = predict_function, label = label, @@ -94,8 +94,8 @@ accumulated_dependency.explainer <- function(x, #' @export -#' @rdname accumulated_dependency -accumulated_dependency.default <- function(x, +#' @rdname accumulated_dependence +accumulated_dependence.default <- function(x, data, predict_function = predict, label = class(x)[1], @@ -127,10 +127,13 @@ accumulated_dependency.default <- function(x, #' @export -#' @rdname accumulated_dependency -accumulated_dependency.ceteris_paribus_explainer <- function(x, ..., +#' @rdname accumulated_dependence +accumulated_dependence.ceteris_paribus_explainer <- function(x, ..., variables = NULL) { aggregate_profiles(x, ..., type = "accumulated", variables = variables) } +#' @export +#' @rdname accumulated_dependence +accumulated_dependency <- accumulated_dependence diff --git a/R/aggregate_profiles.R b/R/aggregate_profiles.R index e8eb2baf..6a019af0 100644 --- a/R/aggregate_profiles.R +++ b/R/aggregate_profiles.R @@ -1,9 +1,9 @@ #' Aggregates Ceteris Paribus Profiles #' #' The function \code{aggregate_profiles()} calculates an aggregate of ceteris paribus profiles. -#' It can be: Partial Dependency Profile (average across Ceteris Paribus Profiles), -#' Conditional Dependency Profile (local weighted average across Ceteris Paribus Profiles) or -#' Accumulated Local Dependency Profile (cummulated average local changes in Ceteris Paribus Profiles). +#' It can be: Partial Dependence Profile (average across Ceteris Paribus Profiles), +#' Conditional Dependence Profile (local weighted average across Ceteris Paribus Profiles) or +#' Accumulated Local Dependence Profile (cummulated average local changes in Ceteris Paribus Profiles). #' #' @param x a ceteris paribus explainer produced with function \code{ceteris_paribus()} #' @param ... other explainers that shall be calculated together @@ -170,17 +170,17 @@ aggregate_profiles <- function(x, ..., if (type == "partial") { aggregated_profiles <- aggregated_profiles_partial(all_profiles, groups) class(aggregated_profiles) <- c("aggregated_profiles_explainer", - "partial_dependency_explainer", "data.frame") + "partial_dependence_explainer", "data.frame") } if (type == "conditional") { aggregated_profiles <- aggregated_profiles_conditional(all_profiles, groups, span = span) class(aggregated_profiles) <- c("aggregated_profiles_explainer", - "conditional_dependency_explainer", "data.frame") + "conditional_dependence_explainer", "data.frame") } if (type == "accumulated") { aggregated_profiles <- aggregated_profiles_accumulated(all_profiles, groups, span = span, center = center) class(aggregated_profiles) <- c("aggregated_profiles_explainer", - "accumulated_dependency_explainer", "data.frame") + "accumulated_dependence_explainer", "data.frame") } # calculate mean(all observation's _yhat_), mean of prediction diff --git a/R/conditional_dependency.R b/R/conditional_dependence.R similarity index 82% rename from R/conditional_dependency.R rename to R/conditional_dependence.R index 8edc1f38..7a4101ff 100644 --- a/R/conditional_dependency.R +++ b/R/conditional_dependence.R @@ -1,9 +1,9 @@ -#' Conditional Dependency Profiles +#' Conditional Dependence Profiles #' -#' Conditional Dependency Profiles (aka Local Profiles) average localy Ceteris Paribus Profiles. -#' Function 'conditional_dependency' calls 'ceteris_paribus' and then 'aggregate_profiles'. +#' Conditional Dependence Profiles (aka Local Profiles) average localy Ceteris Paribus Profiles. +#' Function 'conditional_dependence' calls 'ceteris_paribus' and then 'aggregate_profiles'. #' -#' Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependency Chapter}. +#' Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependence Chapter}. #' #' @param x an explainer created with function \code{DALEX::explain()}, an object of the class \code{ceteris_paribus_explainer} #' or a model to be explained. @@ -12,7 +12,7 @@ #' @param predict_function predict function, will be extracted from \code{x} if it's an explainer #' @param variables names of variables for which profiles shall be calculated. #' Will be passed to \code{\link{calculate_variable_split}}. If \code{NULL} then all variables from the validation data will be used. -#' @param N number of observations used for calculation of partial dependency profiles. By default 500. +#' @param N number of observations used for calculation of partial dependence profiles. By default 500. #' @param ... other parameters #' @param variable_splits named list of splits for variables, in most cases created with \code{\link{calculate_variable_split}}. #' If \code{NULL} then it will be calculated based on validation data avaliable in the \code{explainer}. @@ -36,7 +36,7 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' cdp_glm <- conditional_dependency(explain_titanic_glm, +#' cdp_glm <- conditional_dependence(explain_titanic_glm, #' N = 150, variables = c("age", "fare")) #' head(cdp_glm) #' plot(cdp_glm) @@ -51,21 +51,21 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' cdp_rf <- conditional_dependency(explain_titanic_rf, N = 200, variable_type = "numerical") +#' cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "numerical") #' plot(cdp_rf) #' -#' cdp_rf <- conditional_dependency(explain_titanic_rf, N = 200, variable_type = "categorical") +#' cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "categorical") #' plotD3(cdp_rf, label_margin = 80, scale_plot = TRUE) #' } #' #' @export -#' @rdname conditional_dependency -conditional_dependency <- function(x, ...) - UseMethod("conditional_dependency") +#' @rdname conditional_dependence +conditional_dependence <- function(x, ...) + UseMethod("conditional_dependence") #' @export -#' @rdname conditional_dependency -conditional_dependency.explainer <- function(x, +#' @rdname conditional_dependence +conditional_dependence.explainer <- function(x, variables = NULL, N = 500, variable_splits = NULL, @@ -78,7 +78,7 @@ conditional_dependency.explainer <- function(x, predict_function <- x$predict_function label <- x$label - conditional_dependency.default(x = model, + conditional_dependence.default(x = model, data = data, predict_function = predict_function, label = label, @@ -91,8 +91,8 @@ conditional_dependency.explainer <- function(x, #' @export -#' @rdname conditional_dependency -conditional_dependency.default <- function(x, +#' @rdname conditional_dependence +conditional_dependence.default <- function(x, data, predict_function = predict, label = class(x)[1], @@ -119,18 +119,22 @@ conditional_dependency.default <- function(x, variable_splits = variable_splits, label = label, ...) - conditional_dependency.ceteris_paribus_explainer(cp, variables = variables, variable_type = variable_type, ...) + conditional_dependence.ceteris_paribus_explainer(cp, variables = variables, variable_type = variable_type, ...) } #' @export -#' @rdname conditional_dependency -conditional_dependency.ceteris_paribus_explainer <- function(x, ..., +#' @rdname conditional_dependence +conditional_dependence.ceteris_paribus_explainer <- function(x, ..., variables = NULL) { aggregate_profiles(x, ..., type = "conditional", variables = variables) } #' @export -#' @rdname conditional_dependency -local_dependency <- conditional_dependency +#' @rdname conditional_dependence +local_dependency <- conditional_dependence + +#' @export +#' @rdname conditional_dependence +conditional_dependency <- conditional_dependence diff --git a/R/describe_aggregated_profiles.R b/R/describe_aggregated_profiles.R index 2514adad..3d266355 100644 --- a/R/describe_aggregated_profiles.R +++ b/R/describe_aggregated_profiles.R @@ -32,7 +32,7 @@ #' #' @export #' @rdname describe -describe.partial_dependency_explainer <- function(x, +describe.partial_dependence_explainer <- function(x, nonsignificance_treshold = 0.15, ..., display_values = FALSE, @@ -290,3 +290,6 @@ specify_df_aggregated <- function(x, variables, nonsignificance_treshold) { list("df" = df, "treshold" = treshold) } + +#' @export +describe.partial_dependency_explainer <- describe.partial_dependence_explainer diff --git a/R/partial_dependency.R b/R/partial_dependence.R similarity index 85% rename from R/partial_dependency.R rename to R/partial_dependence.R index ba9b5ae9..1ae0baf2 100644 --- a/R/partial_dependency.R +++ b/R/partial_dependence.R @@ -1,7 +1,7 @@ -#' Partial Dependency Profiles +#' Partial Dependence Profiles #' -#' Partial Dependency Profiles are averages from Ceteris Paribus Profiles. -#' Function \code{partial_dependency} calls \code{ceteris_paribus} and then \code{aggregate_profiles}. +#' Partial Dependence Profiles are averages from Ceteris Paribus Profiles. +#' Function \code{partial_dependence} calls \code{ceteris_paribus} and then \code{aggregate_profiles}. #' #' Find more detailes in the \href{https://pbiecek.github.io/ema/partialDependenceProfiles.html}{Partial Dependence Profiles Chapter}. #' @@ -13,7 +13,7 @@ #' @param variables names of variables for which profiles shall be calculated. #' Will be passed to \code{\link{calculate_variable_split}}. #' If \code{NULL} then all variables from the validation data will be used. -#' @param N number of observations used for calculation of partial dependency profiles. By default 500. +#' @param N number of observations used for calculation of partial dependence profiles. By default 500. #' @param ... other parameters #' @param variable_splits named list of splits for variables, in most cases created with \code{\link{calculate_variable_split}}. #' If \code{NULL} then it will be calculated based on validation data avaliable in the \code{explainer}. @@ -37,7 +37,7 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' pdp_glm <- partial_dependency(explain_titanic_glm, +#' pdp_glm <- partial_dependence(explain_titanic_glm, #' N = 50, variables = c("age", "fare")) #' head(pdp_glm) #' plot(pdp_glm) @@ -52,21 +52,21 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' pdp_rf <- partial_dependency(explain_titanic_rf, variable_type = "numerical") +#' pdp_rf <- partial_dependence(explain_titanic_rf, variable_type = "numerical") #' plot(pdp_rf) #' -#' pdp_rf <- partial_dependency(explain_titanic_rf, variable_type = "categorical") +#' pdp_rf <- partial_dependence(explain_titanic_rf, variable_type = "categorical") #' plotD3(pdp_rf, label_margin = 80, scale_plot = TRUE) #' } #' #' @export -#' @rdname partial_dependency -partial_dependency <- function(x, ...) - UseMethod("partial_dependency") +#' @rdname partial_dependence +partial_dependence <- function(x, ...) + UseMethod("partial_dependence") #' @export -#' @rdname partial_dependency -partial_dependency.explainer <- function(x, +#' @rdname partial_dependence +partial_dependence.explainer <- function(x, variables = NULL, N = 500, variable_splits = NULL, @@ -79,7 +79,7 @@ partial_dependency.explainer <- function(x, predict_function <- x$predict_function label <- x$label - partial_dependency.default(x = model, + partial_dependence.default(x = model, data = data, predict_function = predict_function, label = label, @@ -93,8 +93,8 @@ partial_dependency.explainer <- function(x, #' @export -#' @rdname partial_dependency -partial_dependency.default <- function(x, +#' @rdname partial_dependence +partial_dependence.default <- function(x, data, predict_function = predict, label = class(x)[1], @@ -127,9 +127,12 @@ partial_dependency.default <- function(x, #' @export -#' @rdname partial_dependency -partial_dependency.ceteris_paribus_explainer <- function(x, ..., +#' @rdname partial_dependence +partial_dependence.ceteris_paribus_explainer <- function(x, ..., variables = NULL) { aggregate_profiles(x, ..., type = "partial", variables = variables) } +#' @export +#' @rdname partial_dependence +partial_dependency <- partial_dependence diff --git a/R/plotD3_aggregated_profiles.R b/R/plotD3_aggregated_profiles.R index 71a1600c..a38ec525 100644 --- a/R/plotD3_aggregated_profiles.R +++ b/R/plotD3_aggregated_profiles.R @@ -121,7 +121,7 @@ plotD3.aggregated_profiles_explainer <- function(x, ..., size = 2, alpha = 1, }) - ymean <- ifelse("partial_dependency_explainer" %in% class(x), round(attr(x, "mean_prediction"), 3), 0) + ymean <- ifelse("partial_dependence_explainer" %in% class(x), round(attr(x, "mean_prediction"), 3), 0) } options <- list(variableNames = as.list(all_variables), diff --git a/R/plot_aggregated_profiles.R b/R/plot_aggregated_profiles.R index 172a4820..1ac9a09c 100644 --- a/R/plot_aggregated_profiles.R +++ b/R/plot_aggregated_profiles.R @@ -1,6 +1,6 @@ #' Plots Aggregated Profiles #' -#' Function \code{plot.aggregated_profiles_explainer} plots partial dependency plot or accumulated effect plot. +#' Function \code{plot.aggregated_profiles_explainer} plots partial dependence plot or accumulated effect plot. #' It works in a similar way to \code{plot.ceteris_paribus}, but instead of individual profiles #' show average profiles for each variable listed in the \code{variables} vector. #' @@ -27,11 +27,11 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' pdp_rf_p <- partial_dependency(explain_titanic_glm, N = 50) +#' pdp_rf_p <- partial_dependence(explain_titanic_glm, N = 50) #' pdp_rf_p$`_label_` <- "RF_partial" -#' pdp_rf_l <- conditional_dependency(explain_titanic_glm, N = 50) +#' pdp_rf_l <- conditional_dependence(explain_titanic_glm, N = 50) #' pdp_rf_l$`_label_` <- "RF_local" -#' pdp_rf_a<- accumulated_dependency(explain_titanic_glm, N = 50) +#' pdp_rf_a<- accumulated_dependence(explain_titanic_glm, N = 50) #' pdp_rf_a$`_label_` <- "RF_accumulated" #' head(pdp_rf_p) #' plot(pdp_rf_p, pdp_rf_l, pdp_rf_a, color = "_label_") diff --git a/README.md b/README.md index 70f65bca..277920ca 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,9 @@ Key functions: * `feature_importance()` for assessment of global level feature importance, * `ceteris_paribus()` for calculation of the Ceteris Paribus / What-If Profiles (read more at https://pbiecek.github.io/ema/ceterisParibus.html), -* `partial_dependency()` for Partial Dependency Plots, -* `conditional_dependency()` for Conditional Dependency Plots also called M Plots, -* `accumulated_dependency()` for Accumulated Local Effects Plots, +* `partial_dependence()` for Partial Dependence Plots, +* `conditional_dependence()` for Conditional Dependence Plots also called M Plots, +* `accumulated_dependence()` for Accumulated Local Effects Plots, * `aggregate_profiles()` and `cluster_profiles()` for aggregation of Ceteris Paribus Profiles, * `calculate_oscillations()` for calculation of the Ceteris Paribus Oscillations (read more at https://pbiecek.github.io/ema/ceterisParibusOscillations.html), * `ceteris_paribus_2d()` for Ceteris Paribus 2D Profiles (read more at https://pbiecek.github.io/ema/ceterisParibus2d.html), diff --git a/docs/404.html b/docs/404.html index 6f5a07b0..834499a8 100644 --- a/docs/404.html +++ b/docs/404.html @@ -36,12 +36,12 @@ + - @@ -80,7 +80,7 @@
diff --git a/docs/articles/index.html b/docs/articles/index.html index 374c8477..f4d928e9 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -36,12 +36,12 @@ + - @@ -80,7 +80,7 @@ diff --git a/docs/articles/vignette_describe.html b/docs/articles/vignette_describe.html index 0c75cf30..84e010b4 100644 --- a/docs/articles/vignette_describe.html +++ b/docs/articles/vignette_describe.html @@ -42,7 +42,7 @@ @@ -100,7 +100,7 @@vignettes/vignette_describe.Rmd
vignette_describe.Rmd
The ingredients
package allows for generating prediction validation and predition perturbation explanations. They allow for both global and local model explanation.
Generic function decribe()
generates a natural language description for explanations generated with feature_importance()
, ceteris_paribus()
functions.
To show generating automatic descriptions we first load the data set and build a random forest model classifying, which of the passangers survived sinking of the titanic. Then, using DALEX
package, we generate an explainer of the model. Lastly we select a random passanger, which prediction’s should be explained.
library("DALEX")
-library("ingredients")
-library("randomForest")
-titanic <- na.omit(titanic)
-
-model_titanic_rf <- randomForest(survived == "yes" ~ .,
- data = titanic)
-
-explain_titanic_rf <- explain(model_titanic_rf,
- data = titanic[,-9],
- y = titanic$survived == "yes",
- label = "Random Forest")
library("DALEX")
+library("ingredients")
+library("randomForest")
+titanic <- na.omit(titanic)
+
+model_titanic_rf <- randomForest(survived == "yes" ~ .,
+ data = titanic)
+
+explain_titanic_rf <- explain(model_titanic_rf,
+ data = titanic[,-9],
+ y = titanic$survived == "yes",
+ label = "Random Forest")
#> Preparation of a new explainer is initiated
#> -> model label : Random Forest
#> -> data : 2099 rows 8 cols
#> -> target variable : 2099 values
#> -> model_info : package randomForest , ver. 4.6.14 , task regression ( [33m default [39m )
#> -> predict function : yhat.randomForest will be used ( [33m default [39m )
-#> -> predicted values : numerical, min = 0.007074326 , mean = 0.323572 , max = 0.9909881
+#> -> predicted values : numerical, min = 0.007831174 , mean = 0.3243102 , max = 0.9920739
#> -> residual function : difference between y and yhat ( [33m default [39m )
-#> -> residuals : numerical, min = -0.8119382 , mean = 0.0008682545 , max = 0.8993734
+#> -> residuals : numerical, min = -0.8199771 , mean = 0.0001300024 , max = 0.9015269
#> [32m A new explainer has been created! [39m
-
-#> gender age class embarked country fare sibsp parch
-#> 959 female 22 1st Cherbourg Spain 108.18 1 0
+
+#> gender age class embarked country fare sibsp parch
+#> 1284 male 50 1st Southampton United States 211.1 1 1
Now we are ready for generating various explantions and then describing it with describe()
function.
Feature importance explanation shows the importance of all the model’s variables. As it is a global explanation technique, no passanger need to be specified.
- +Function describe()
easily describes which variables are the most important. Argument nonsignificance_treshold
as always sets the level above which variables become significant. For higher treshold, less variables will be described as significant.
#> The number of important variables for Random Forest's prediction is 65 out of 108.
-#> Variables _baseline_, _baseline_, _baseline_ have the highest importantance.
+
+#> The number of important variables for Random Forest's prediction is 5 out of 8.
+#> Variables gender, class, age have the highest importantance.
Ceteris Paribus profiles shows how the model’s input changes with the change of a specified variable.
-perturbed_variable <- "class"
-cp_rf <- ceteris_paribus(explain_titanic_rf,
- passanger,
- variables = perturbed_variable)
-plot(cp_rf, variable_type = "categorical")
perturbed_variable <- "class"
+cp_rf <- ceteris_paribus(explain_titanic_rf,
+ passanger,
+ variables = perturbed_variable)
+plot(cp_rf, variable_type = "categorical")
For a user with no experience, interpreting the above plot may be not straightforward. Thus we generate a natural language description in order to make it easier.
- -#> For the selected instance, prediction estimated by Random Forest is equal to 0.95.
+
+#> For the selected instance, prediction estimated by Random Forest is equal to 0.239.
#>
-#> Model's prediction would decrease substantially if the value of class variable would change to "engineering crew", "3rd", "restaurant staff", "victualling crew", "deck crew".
-#> The largest change would be marked if class variable would change to "engineering crew".
+#> Model's prediction would increase substantially if the value of class variable would change to "deck crew". On the other hand, Random Forest's prediction would decrease substantially if the value of class variable would change to "2nd", "restaurant staff", "3rd", "engineering crew", "victualling crew". The largest change would be marked if class variable would change to "2nd".
#>
-#> Other variables are with less importance and they do not change prediction by more than 0.04%.
+#> All the variables were displayed.
Natural lannguage descriptions should be flexible in order to provide the desired level of complexity and specificity. Thus various parameters can modify the description being generated.
-describe(cp_rf,
- display_numbers = TRUE,
- label = "the probability that the passanger will survive")
#> Random Forest predicts that for the selected instance, the probability that the passanger will survive is equal to 0.95
+describe(cp_rf,
+ display_numbers = TRUE,
+ label = "the probability that the passanger will survive")
+#> Random Forest predicts that for the selected instance, the probability that the passanger will survive is equal to 0.239
#>
-#> The most important change in Random Forest's prediction would occur for class = "engineering crew". It decreases the prediction by 0.277.
-#> The second most important change in the prediction would occur for class = "3rd". It decreases the prediction by 0.276.
-#> The third most important change in the prediction would occur for class = "restaurant staff". It decreases the prediction by 0.215.
+#> The most important change in Random Forest's prediction would occur for class = "2nd". It decreases the prediction by 0.057.
+#> The second most important change in the prediction would occur for class = "restaurant staff". It decreases the prediction by 0.055.
+#> The third most important change in the prediction would occur for class = "3rd". It decreases the prediction by 0.053.
#>
-#> Other variable values are with less importance. They do not change the the probability that the passanger will survive by more than 0.2.
+#> Other variable values are with less importance. They do not change the the probability that the passanger will survive by more than 0.042.
Please note, that describe()
can handle only one variable at a time, so it is recommended to specify, which variables should be described.
describe(cp_rf,
- display_numbers = TRUE,
- label = "the probability that the passanger will survive",
- variables = perturbed_variable)
#> Random Forest predicts that for the selected instance, the probability that the passanger will survive is equal to 0.95
+describe(cp_rf,
+ display_numbers = TRUE,
+ label = "the probability that the passanger will survive",
+ variables = perturbed_variable)
+#> Random Forest predicts that for the selected instance, the probability that the passanger will survive is equal to 0.239
#>
-#> The most important change in Random Forest's prediction would occur for class = "engineering crew". It decreases the prediction by 0.277.
-#> The second most important change in the prediction would occur for class = "3rd". It decreases the prediction by 0.276.
-#> The third most important change in the prediction would occur for class = "restaurant staff". It decreases the prediction by 0.215.
+#> The most important change in Random Forest's prediction would occur for class = "2nd". It decreases the prediction by 0.057.
+#> The second most important change in the prediction would occur for class = "restaurant staff". It decreases the prediction by 0.055.
+#> The third most important change in the prediction would occur for class = "3rd". It decreases the prediction by 0.053.
#>
-#> Other variable values are with less importance. They do not change the the probability that the passanger will survive by more than 0.2.
+#> Other variable values are with less importance. They do not change the the probability that the passanger will survive by more than 0.042.
Continuous variables are described as well.
-perturbed_variable_continuous <- "age"
-cp_rf <- ceteris_paribus(explain_titanic_rf,
- passanger)
-plot(cp_rf, variables = perturbed_variable_continuous)
perturbed_variable_continuous <- "age"
+cp_rf <- ceteris_paribus(explain_titanic_rf,
+ passanger)
+plot(cp_rf, variables = perturbed_variable_continuous)
#> Random Forest predicts that for the selected instance, prediction is equal to 0.95
+
+#> Random Forest predicts that for the selected instance, prediction is equal to 0.239
#>
-#> The highest prediction occurs for (age = 39), while the lowest for (age = 74).
-#> Breakpoint is identified at (age = 60).
+#> The highest prediction occurs for (age = 2), while the lowest for (age = 74).
+#> Breakpoint is identified at (age = 13).
#>
-#> Average model responses are *lower* for variable values *higher* than breakpoint (= 60).
-Ceteris Paribus profiles are described only for a single observation. If we want to access the influence of more than one observation, we need to describe dependency profiles.
+#> Average model responses are *higher* for variable values *lower* than breakpoint (= 13).
+Ceteris Paribus profiles are described only for a single observation. If we want to access the influence of more than one observation, we need to describe dependence profiles.
#> Random Forest's mean prediction is equal to 0.95.
+
+#> Random Forest's mean prediction is equal to 0.239.
#>
-#> The highest prediction occurs for (fare = 80), while the lowest for (fare = 0).
+#> The highest prediction occurs for (fare = 55.081), while the lowest for (fare = 221.669696).
#> Breakpoint is identified at (fare = 80).
#>
-#> Average model responses are *higher* for variable values *higher* than breakpoint (= 80).
-
+#> Average model responses are *lower* for variable values *higher* than breakpoint (= 80).
+pdp <- aggregate_profiles(cp_rf, type = "partial", variable_type = "categorical")
+plot(pdp, variables = perturbed_variable)
#> Random Forest's mean prediction is equal to 0.95.
+
+#> Random Forest's mean prediction is equal to 0.239.
#>
-#> Model's prediction would increase substantially if the value of class variable would change to "3rd". On the other hand, Random Forest's prediction would decrease substantially if the value of class variable would change to "engineering crew". The largest change would be marked if class variable would change to "2nd".
+#> Model's prediction would increase substantially if the value of class variable would change to "2nd".
+#> The largest change would be marked if class variable would change to "1st".
#>
-#> Other variables are with less importance and they do not change prediction by more than 0.04%.
+#> Other variables are with less importance and they do not change prediction by more than 0.01%.
vignettes/vignette_simulated.Rmd
vignette_simulated.Rmd
But \(x_1\) and \(x_2\) are correlated. How XAI methods work for such model?
-# predict function for the model
-the_model_predict <- function(m, x) {
- x$x1 * x$x2 + x$x2
-}
-
-# correlated variables
-N <- 50
-set.seed(1)
-x1 <- runif(N, -5, 5)
-x2 <- x1 + runif(N)/100
-df <- data.frame(x1, x2)
# predict function for the model
+the_model_predict <- function(m, x) {
+ x$x1 * x$x2 + x$x2
+}
+
+# correlated variables
+N <- 50
+set.seed(1)
+x1 <- runif(N, -5, 5)
+x2 <- x1 + runif(N)/100
+df <- data.frame(x1, x2)
In fact this model is defined by the predict function the_model_predict
. So it does not matter what is in the first argument of the explain
function.
library("DALEX")
-explain_the_model <- explain(1,
- data = df,
- predict_function = the_model_predict)
library("DALEX")
+explain_the_model <- explain(1,
+ data = df,
+ predict_function = the_model_predict)
#> Preparation of a new explainer is initiated
#> -> model label : numeric ( [33m default [39m )
#> -> data : 50 rows 2 cols
@@ -151,109 +151,109 @@
Ceteris paribus
Use the ceteris_paribus()
function to see Ceteris Paribus profiles. Clearly it’s not an additive model, as the effect of \(x_1\) depends on \(x_2\).
-library("ingredients")
-library("ggplot2")
-
-sample_rows <- data.frame(x1 = -5:5,
- x2 = -5:5)
-
-cp_model <- ceteris_paribus(explain_the_model, sample_rows)
-plot(cp_model) +
- show_observations(cp_model) +
- ggtitle("Ceteris Paribus profiles")
+library("ingredients")
+library("ggplot2")
+
+sample_rows <- data.frame(x1 = -5:5,
+ x2 = -5:5)
+
+cp_model <- ceteris_paribus(explain_the_model, sample_rows)
+plot(cp_model) +
+ show_observations(cp_model) +
+ ggtitle("Ceteris Paribus profiles")
![](vignette_simulated_files/figure-html/unnamed-chunk-3-1.png)
Lets try Partial Dependency profiles, Conditional Dependency profiles and Accumulated Local profiles. For the last two we can try different smoothing factors
-pd_model <- partial_dependency(explain_the_model, variables = c("x1", "x2"))
-pd_model$`_label_` = "PDP"
-
-cd_model <- conditional_dependency(explain_the_model, variables = c("x1", "x2"))
-cd_model$`_label_` = "CDP 0.25"
-
-ad_model <- accumulated_dependency(explain_the_model, variables = c("x1", "x2"))
-ad_model$`_label_` = "ALE 0.25"
-
-plot(ad_model, cd_model, pd_model) +
- ggtitle("Feature effects - PDP, CDP, ALE")
Lets try Partial Dependence profiles, Conditional Dependence profiles and Accumulated Local profiles. For the last two we can try different smoothing factors
+pd_model <- partial_dependence(explain_the_model, variables = c("x1", "x2"))
+pd_model$`_label_` = "PDP"
+
+cd_model <- conditional_dependence(explain_the_model, variables = c("x1", "x2"))
+cd_model$`_label_` = "CDP 0.25"
+
+ad_model <- accumulated_dependence(explain_the_model, variables = c("x1", "x2"))
+ad_model$`_label_` = "ALE 0.25"
+
+plot(ad_model, cd_model, pd_model) +
+ ggtitle("Feature effects - PDP, CDP, ALE")
cd_model_1 <- conditional_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.1)
-cd_model_1$`_label_` = "CDP 0.1"
-
-cd_model_5 <- conditional_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.5)
-cd_model_5$`_label_` = "CDP 0.5"
-
-ad_model_1 <- accumulated_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.5)
-ad_model_1$`_label_` = "ALE 0.1"
-
-ad_model_5 <- accumulated_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.5)
-ad_model_5$`_label_` = "ALE 0.5"
-
-plot(ad_model, cd_model, pd_model, cd_model_1, cd_model_5, ad_model_1, ad_model_5) +
- ggtitle("Feature effects - PDP, CDP, ALE")
cd_model_1 <- conditional_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.1)
+cd_model_1$`_label_` = "CDP 0.1"
+
+cd_model_5 <- conditional_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.5)
+cd_model_5$`_label_` = "CDP 0.5"
+
+ad_model_1 <- accumulated_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.5)
+ad_model_1$`_label_` = "ALE 0.1"
+
+ad_model_5 <- accumulated_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.5)
+ad_model_5$`_label_` = "ALE 0.5"
+
+plot(ad_model, cd_model, pd_model, cd_model_1, cd_model_5, ad_model_1, ad_model_5) +
+ ggtitle("Feature effects - PDP, CDP, ALE")
And now, let’s see how the grouping factor works
-# add grouping variable
-df$x3 <- factor(sign(df$x2))
-# update the data argument
-explain_the_model$data = df
-
-# PDP in groups
-pd_model_groups <- partial_dependency(explain_the_model,
- variables = c("x1", "x2"),
- groups = "x3")
-plot(pd_model_groups) +
- ggtitle("Partial Dependency")
# add grouping variable
+df$x3 <- factor(sign(df$x2))
+# update the data argument
+explain_the_model$data = df
+
+# PDP in groups
+pd_model_groups <- partial_dependence(explain_the_model,
+ variables = c("x1", "x2"),
+ groups = "x3")
+plot(pd_model_groups) +
+ ggtitle("Partial Dependence")
# ALE in groups
-ad_model_groups <- accumulated_dependency(explain_the_model,
- variables = c("x1", "x2"),
- groups = "x3")
-plot(ad_model_groups) +
- ggtitle("Accumulated Local")
# ALE in groups
+ad_model_groups <- accumulated_dependence(explain_the_model,
+ variables = c("x1", "x2"),
+ groups = "x3")
+plot(ad_model_groups) +
+ ggtitle("Accumulated Local")
# CDP in groups
-cd_model_groups <- conditional_dependency(explain_the_model,
- variables = c("x1", "x2"),
- groups = "x3")
-plot(cd_model_groups) +
- ggtitle("Conditional Dependency")
# CDP in groups
+cd_model_groups <- conditional_dependence(explain_the_model,
+ variables = c("x1", "x2"),
+ groups = "x3")
+plot(cd_model_groups) +
+ ggtitle("Conditional Dependence")
#> R version 3.6.1 (2019-07-05)
-#> Platform: x86_64-apple-darwin15.6.0 (64-bit)
-#> Running under: macOS Mojave 10.14.4
+
+#> R version 3.6.0 (2019-04-26)
+#> Platform: x86_64-w64-mingw32/x64 (64-bit)
+#> Running under: Windows 10 x64 (build 17763)
#>
#> Matrix products: default
-#> BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
-#> LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
#>
#> locale:
-#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+#> [1] LC_COLLATE=Polish_Poland.1250 LC_CTYPE=Polish_Poland.1250
+#> [3] LC_MONETARY=Polish_Poland.1250 LC_NUMERIC=C
+#> [5] LC_TIME=Polish_Poland.1250
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
-#> [1] ggplot2_3.2.1 ingredients_0.5.2 DALEX_0.9.3
+#> [1] ggplot2_3.2.1 ingredients_1.0 DALEX_0.9.4
#>
#> loaded via a namespace (and not attached):
-#> [1] Rcpp_1.0.3 compiler_3.6.1 pillar_1.4.3 tools_3.6.1
-#> [5] digest_0.6.23 evaluate_0.14 memoise_1.1.0 lifecycle_0.1.0
-#> [9] tibble_2.1.3 gtable_0.3.0 pkgconfig_2.0.3 rlang_0.4.2
-#> [13] rstudioapi_0.10 yaml_2.2.0 pkgdown_1.4.1 xfun_0.11
-#> [17] withr_2.1.2 stringr_1.4.0 dplyr_0.8.3 knitr_1.26
-#> [21] desc_1.2.0 fs_1.3.1 rprojroot_1.3-2 grid_3.6.1
+#> [1] Rcpp_1.0.3 compiler_3.6.0 pillar_1.4.3 tools_3.6.0
+#> [5] digest_0.6.24 evaluate_0.14 memoise_1.1.0 lifecycle_0.1.0
+#> [9] tibble_2.1.3 gtable_0.3.0 pkgconfig_2.0.3 rlang_0.4.4
+#> [13] rstudioapi_0.10 yaml_2.2.0 pkgdown_1.4.1 xfun_0.6
+#> [17] withr_2.1.2 stringr_1.4.0 dplyr_0.8.3 knitr_1.22
+#> [21] desc_1.2.0 fs_1.3.1 rprojroot_1.3-2 grid_3.6.0
#> [25] tidyselect_0.2.5 glue_1.3.1 R6_2.4.1 rmarkdown_1.16
#> [29] farver_2.0.3 purrr_0.3.3 magrittr_1.5 backports_1.1.5
#> [33] scales_1.1.0 htmltools_0.4.0 MASS_7.3-51.4 assertthat_0.2.1
@@ -263,7 +263,6 @@
vignettes/vignette_titanic.Rmd
vignette_titanic.Rmd
Let’s see an example for DALEX
package for classification models for the survival problem for Titanic dataset. Here we are using a dataset titanic
avaliable in the DALEX
package. Note that this data was copied from the stablelearner
package.
#> gender age class embarked country fare sibsp parch survived
#> 1 male 42 3rd Southampton United States 7.11 0 0 no
#> 2 male 13 3rd Southampton United States 20.05 0 2 no
@@ -127,12 +127,12 @@
Model for Titanic survival
Ok, now it’s time to create a model. Let’s use the Random Forest model.
-# prepare model
-library("randomForest")
-titanic <- na.omit(titanic)
-model_titanic_rf <- randomForest(survived == "yes" ~ gender + age + class + embarked +
- fare + sibsp + parch, data = titanic)
-model_titanic_rf
+# prepare model
+library("randomForest")
+titanic <- na.omit(titanic)
+model_titanic_rf <- randomForest(survived == "yes" ~ gender + age + class + embarked +
+ fare + sibsp + parch, data = titanic)
+model_titanic_rf
#>
#> Call:
#> randomForest(formula = survived == "yes" ~ gender + age + class + embarked + fare + sibsp + parch, data = titanic)
@@ -140,84 +140,84 @@
#> Number of trees: 500
#> No. of variables tried at each split: 2
#>
-#> Mean of squared residuals: 0.1429994
-#> % Var explained: 34.76
+#> Mean of squared residuals: 0.1428618
+#> % Var explained: 34.82
The third step (it’s optional but useful) is to create a DALEX
explainer for random forest model.
library("DALEX")
-explain_titanic_rf <- explain(model_titanic_rf,
- data = titanic[,-9],
- y = titanic$survived == "yes",
- label = "Random Forest v7")
library("DALEX")
+explain_titanic_rf <- explain(model_titanic_rf,
+ data = titanic[,-9],
+ y = titanic$survived == "yes",
+ label = "Random Forest v7")
#> Preparation of a new explainer is initiated
#> -> model label : Random Forest v7
#> -> data : 2099 rows 8 cols
#> -> target variable : 2099 values
#> -> model_info : package randomForest , ver. 4.6.14 , task regression ( [33m default [39m )
#> -> predict function : yhat.randomForest will be used ( [33m default [39m )
-#> -> predicted values : numerical, min = 0.009726169 , mean = 0.3249803 , max = 0.9917946
+#> -> predicted values : numerical, min = 0.0133887 , mean = 0.3248256 , max = 0.9924437
#> -> residual function : difference between y and yhat ( [33m default [39m )
-#> -> residuals : numerical, min = -0.8000352 , mean = -0.0005401156 , max = 0.8904472
+#> -> residuals : numerical, min = -0.7953965 , mean = -0.0003854325 , max = 0.9009842
#> [32m A new explainer has been created! [39m
Use the feature_importance()
explainer to present importance of particular features. Note that type = "difference"
normalizes dropouts, and now they all start in 0.
#> variable mean_dropout_loss label
-#> 1 _full_model_ 0.3337988 Random Forest v7
-#> 2 country 0.3337988 Random Forest v7
-#> 3 parch 0.3446496 Random Forest v7
-#> 4 sibsp 0.3461183 Random Forest v7
-#> 5 embarked 0.3505868 Random Forest v7
-#> 6 fare 0.3742650 Random Forest v7
-
+#> 1 _full_model_ 0.3329421 Random Forest v7
+#> 2 country 0.3329421 Random Forest v7
+#> 3 parch 0.3435576 Random Forest v7
+#> 4 sibsp 0.3447126 Random Forest v7
+#> 5 embarked 0.3498490 Random Forest v7
+#> 6 fare 0.3729980 Random Forest v7
+
As we see the most important feature is gender
. Next three importnat features are class
, age
and fare
. Let’s see the link between model response and these features.
Such univariate relation can be calculated with partial_dependency()
.
Such univariate relation can be calculated with partial_dependence()
.
Kids 5 years old and younger have much higher survival probability.
-#> Top profiles :
#> _vname_ _label_ _x_ _yhat_ _ids_
-#> 1 fare Random Forest v7 0.0000000 0.3491995 0
-#> 2 age Random Forest v7 0.1666667 0.5393965 0
-#> 3 age Random Forest v7 2.0000000 0.5598879 0
-#> 4 age Random Forest v7 4.0000000 0.5647907 0
-#> 5 fare Random Forest v7 6.1904000 0.3325981 0
-#> 6 age Random Forest v7 7.0000000 0.5278368 0
-
+#> 1 fare Random Forest v7 0.0000000 0.3159050 0
+#> 2 age Random Forest v7 0.1666667 0.5384993 0
+#> 3 age Random Forest v7 2.0000000 0.5608622 0
+#> 4 age Random Forest v7 4.0000000 0.5678473 0
+#> 5 fare Random Forest v7 6.1904000 0.3005658 0
+#> 6 age Random Forest v7 7.0000000 0.5320725 0
+
Let’s see break down explanation for model predictions for 8 years old male from 1st class that embarked from port C.
First Ceteris Paribus Profiles for numerical variables
-new_passanger <- data.frame(
- class = factor("1st", levels = c("1st", "2nd", "3rd", "deck crew", "engineering crew", "restaurant staff", "victualling crew")),
- gender = factor("male", levels = c("female", "male")),
- age = 8,
- sibsp = 0,
- parch = 0,
- fare = 72,
- embarked = factor("Southampton", levels = c("Belfast", "Cherbourg", "Queenstown", "Southampton"))
-)
-
-sp_rf <- ceteris_paribus(explain_titanic_rf, new_passanger)
-plot(sp_rf) +
- show_observations(sp_rf)
new_passanger <- data.frame(
+ class = factor("1st", levels = c("1st", "2nd", "3rd", "deck crew", "engineering crew", "restaurant staff", "victualling crew")),
+ gender = factor("male", levels = c("female", "male")),
+ age = 8,
+ sibsp = 0,
+ parch = 0,
+ fare = 72,
+ embarked = factor("Southampton", levels = c("Belfast", "Cherbourg", "Queenstown", "Southampton"))
+)
+
+sp_rf <- ceteris_paribus(explain_titanic_rf, new_passanger)
+plot(sp_rf) +
+ show_observations(sp_rf)
And for selected categorical variables. Note, that sibsp is numerical but here is presented as a categorical variable.
-plot(sp_rf,
- variables = c("class", "embarked", "gender", "sibsp"),
- variable_type = "categorical")
plot(sp_rf,
+ variables = c("class", "embarked", "gender", "sibsp"),
+ variable_type = "categorical")
It looks like the most important feature for this passenger is age
and sex
. After all his odds for survival are higher than for the average passenger. Mainly because of the young age and despite of being a male.
passangers <- select_sample(titanic, n = 100)
-
-sp_rf <- ceteris_paribus(explain_titanic_rf, passangers)
-clust_rf <- cluster_profiles(sp_rf, k = 3)
-head(clust_rf)
passangers <- select_sample(titanic, n = 100)
+
+sp_rf <- ceteris_paribus(explain_titanic_rf, passangers)
+clust_rf <- cluster_profiles(sp_rf, k = 3)
+head(clust_rf)
#> Top profiles :
#> _vname_ _label_ _x_ _cluster_ _yhat_ _ids_
-#> 1 fare Random Forest v7_1 0.0000000 1 0.1989530 0
-#> 2 sibsp Random Forest v7_1 0.0000000 1 0.1715849 0
-#> 3 parch Random Forest v7_1 0.0000000 1 0.1739948 0
-#> 4 age Random Forest v7_1 0.1666667 1 0.4734090 0
-#> 5 parch Random Forest v7_1 0.2800000 1 0.1739649 0
-#> 6 sibsp Random Forest v7_1 1.0000000 1 0.1681367 0
-
+#> 1 fare Random Forest v7_1 0.0000000 1 0.1957328 0
+#> 2 sibsp Random Forest v7_1 0.0000000 1 0.1707538 0
+#> 3 parch Random Forest v7_1 0.0000000 1 0.1744207 0
+#> 4 age Random Forest v7_1 0.1666667 1 0.4916774 0
+#> 5 parch Random Forest v7_1 0.2800000 1 0.1744207 0
+#> 6 sibsp Random Forest v7_1 1.0000000 1 0.1646322 0
+
#> R version 3.6.1 (2019-07-05)
-#> Platform: x86_64-apple-darwin15.6.0 (64-bit)
-#> Running under: macOS Mojave 10.14.4
+
+#> R version 3.6.0 (2019-04-26)
+#> Platform: x86_64-w64-mingw32/x64 (64-bit)
+#> Running under: Windows 10 x64 (build 17763)
#>
#> Matrix products: default
-#> BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
-#> LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
#>
#> locale:
-#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+#> [1] LC_COLLATE=Polish_Poland.1250 LC_CTYPE=Polish_Poland.1250
+#> [3] LC_MONETARY=Polish_Poland.1250 LC_NUMERIC=C
+#> [5] LC_TIME=Polish_Poland.1250
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
-#> [1] ingredients_0.5.2 randomForest_4.6-14 DALEX_0.9.3
+#> [1] ingredients_1.0 randomForest_4.6-14 DALEX_0.9.4
#>
#> loaded via a namespace (and not attached):
-#> [1] Rcpp_1.0.3 compiler_3.6.1 pillar_1.4.3 tools_3.6.1
-#> [5] digest_0.6.23 evaluate_0.14 memoise_1.1.0 lifecycle_0.1.0
-#> [9] tibble_2.1.3 gtable_0.3.0 pkgconfig_2.0.3 rlang_0.4.2
-#> [13] rstudioapi_0.10 yaml_2.2.0 pkgdown_1.4.1 xfun_0.11
-#> [17] stringr_1.4.0 dplyr_0.8.3 knitr_1.26 desc_1.2.0
-#> [21] fs_1.3.1 rprojroot_1.3-2 grid_3.6.1 tidyselect_0.2.5
+#> [1] Rcpp_1.0.3 compiler_3.6.0 pillar_1.4.3 tools_3.6.0
+#> [5] digest_0.6.24 evaluate_0.14 memoise_1.1.0 lifecycle_0.1.0
+#> [9] tibble_2.1.3 gtable_0.3.0 pkgconfig_2.0.3 rlang_0.4.4
+#> [13] rstudioapi_0.10 yaml_2.2.0 pkgdown_1.4.1 xfun_0.6
+#> [17] stringr_1.4.0 dplyr_0.8.3 knitr_1.22 desc_1.2.0
+#> [21] fs_1.3.1 rprojroot_1.3-2 grid_3.6.0 tidyselect_0.2.5
#> [25] glue_1.3.1 R6_2.4.1 rmarkdown_1.16 farver_2.0.3
#> [29] ggplot2_3.2.1 purrr_0.3.3 magrittr_1.5 backports_1.1.5
#> [33] scales_1.1.0 htmltools_0.4.0 MASS_7.3-51.4 assertthat_0.2.1
@@ -305,7 +305,6 @@
ceteris_paribus()
for calculation of the Ceteris Paribus / What-If Profiles (read more at https://pbiecek.github.io/ema/ceterisParibus.html),partial_dependency()
for Partial Dependency Plots,partial_dependence()
for Partial Dependence Plots,
conditional_dependency()
for Conditional Dependency Plots also called M Plots,conditional_dependence()
for Conditional Dependence Plots also called M Plots,
accumulated_dependency()
for Accumulated Local Effects Plots,accumulated_dependence()
for Accumulated Local Effects Plots,
aggregate_profiles()
and cluster_profiles()
for aggregation of Ceteris Paribus Profiles,# the easiest way to get ingredients is to install it from CRAN:
+# the easiest way to get ingredients is to install it from CRAN:
install.packages("ingredients")
# Or the the development version from GitHub:
# install.packages("devtools")
-devtools::install_github("ModelOriented/ingredients")
+devtools::install_github("ModelOriented/ingredients")
NEWS.md
dependency
to dependence
#103
+ceteris_paribus
profiles are now working for categorical variablesshow_profiles
, show_observations
, show_residuals
are now working for categorical variables
desc_sorting
in plot.variable_importance_explainer
#94
feature_importance
now does 15
permutations on each variable by default. Use the B
argument to change this numberaggregate_profiles
use now gaussian kernel smoothing. Use the span
argument for fine control over this parameter (#79)plot.aspect_importance
- it can plot more than single figuretriplot
, plot.aspect_importance
and plot_group_variables
to add more clarity in plots and allow some parameterizationtriplot
function that illustrates hierarchical aspect_importance()
groupingsaspect_importance()
functionsonly_numerical
parameter to variable_type
in functions aggregated_profiles(), cluster_profiles(), plot() and others, as requested in #15describe()
function for ceteris_paribus()
, feature_importance()
and aggregate_profiles()
explanations.aggregated_profiles_conditional
and aggregated_profiles_accumulated
are rewritten with some code fixeslime
is implemented in the lime()
/aspect_importance()
function.B
that replicates permutations B
times and calculates average from drop loss.plotD3
now supports Ceteris Paribus Profiles.show_profiles
and show_residuals
functions extend Ceteris Paribus Plots.describe()
and print.ceteris_paribus_descriptions()
for text based descriptions of Ceteris Paribus explainersplot.ceteris_paribus_explainer
works now also for categorical variables. Use the only_numerical = FALSE
to force barsceteris_paribus_2d
extends classical ceteris paribus profilescluster_profiles
helps to identify interactionsaggregate_profiles
calculates partial dependency plots and much more
model_feature_importance
and model_feature_response
from DALEX
to ingredients
R/accumulated_dependence.R
+ accumulated_dependence.Rd
Accumulated Local Effects Profiles accumulate local changes in Ceteris Paribus Profiles.
+Function accumulated_dependence
calls ceteris_paribus
and then aggregate_profiles
.
accumulated_dependence(x, ...) + +# S3 method for explainer +accumulated_dependence( + x, + variables = NULL, + N = 500, + variable_splits = NULL, + grid_points = 101, + ..., + variable_type = "numerical" +) + +# S3 method for default +accumulated_dependence( + x, + data, + predict_function = predict, + label = class(x)[1], + variables = NULL, + N = 500, + variable_splits = NULL, + grid_points = 101, + ..., + variable_type = "numerical" +) + +# S3 method for ceteris_paribus_explainer +accumulated_dependence(x, ..., variables = NULL) + +accumulated_dependency(x, ...)+ +
x | +an explainer created with function |
+
---|---|
... | +other parameters |
+
variables | +names of variables for which profiles shall be calculated.
+Will be passed to |
+
N | +number of observations used for calculation of partial dependence profiles. +By default, 500 observations will be chosen randomly. |
+
variable_splits | +named list of splits for variables, in most cases created with |
+
grid_points | +number of points for profile. Will be passed to |
+
variable_type | +a character. If "numerical" then only numerical variables will be calculated. +If "categorical" then only categorical variables will be calculated. |
+
data | +validation dataset Will be extracted from |
+
predict_function | +predict function Will be extracted from |
+
label | +name of the model. By default it's extracted from the |
+
an object of the class aggregated_profiles_explainer
Find more detailes in the Accumulated Local Dependence Chapter.
+ +ALEPlot: Accumulated Local Effects (ALE) Plots and Partial Dependence (PD) Plots https://cran.r-project.org/package=ALEPlot, +Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+ + ++library("DALEX")#>+#>#> +#>#>+#> +#>+model_titanic_glm <- glm(survived ~ gender + age + fare, + data = titanic_imputed, family = "binomial") + +explain_titanic_glm <- explain(model_titanic_glm, + data = titanic_imputed[,-8], + y = titanic_imputed[,8], + verbose = FALSE) + +adp_glm <- accumulated_dependence(explain_titanic_glm, + N = 150, variables = c("age", "fare")) +head(adp_glm)#> Top profiles : +#> _vname_ _label_ _x_ _yhat_ _ids_ +#> 1 age lm 0.1666667 0.000000000 0 +#> 2 age lm 2.0000000 -0.002498427 0 +#> 3 age lm 4.0000000 -0.005212249 0 +#> 4 age lm 7.0000000 -0.009258781 0 +#> 5 age lm 9.0000000 -0.011939391 0 +#> 6 age lm 13.0000000 -0.017257909 0plot(adp_glm)#>#>#> Warning: The response has five or fewer unique values. Are you sure you want to do regression?+explain_titanic_rf <- explain(model_titanic_rf, + data = titanic_imputed[,-8], + y = titanic_imputed[,8], + verbose = FALSE) + +adp_rf <- accumulated_dependence(explain_titanic_rf, N = 200, variable_type = "numerical") +plot(adp_rf)+adp_rf <- accumulated_dependence(explain_titanic_rf, N = 200, variable_type = "categorical") +plotD3(adp_rf, label_margin = 80, scale_plot = TRUE) +# }
The function aggregate_profiles()
calculates an aggregate of ceteris paribus profiles.
-It can be: Partial Dependency Profile (average across Ceteris Paribus Profiles),
-Conditional Dependency Profile (local weighted average across Ceteris Paribus Profiles) or
-Accumulated Local Dependency Profile (cummulated average local changes in Ceteris Paribus Profiles).
aggregate_profiles( @@ -158,9 +161,10 @@- +Aggregates Ceteris Paribus Profiles
groups = NULL, type = "partial", variables = NULL, - span = 0.25 + span = 0.25, + center = FALSE )
an object of the class aggregated_profiles_explainer
Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+library("DALEX") @@ -290,8 +300,8 @@Examp pdp_rf_a$`_label_` <- "RF_accumulated" plot(pdp_rf_p, pdp_rf_c, pdp_rf_a, color = "_label_")
#> -#>#> library(ggplot2)+#> Warning: pakiet 'ggplot2' został zbudowany w wersji R 3.6.1#> +#>#>#> #>pdp_rf <- aggregate_profiles(cp_rf, variables = "class", variable_type = "categorical", @@ -313,8 +323,11 @@Examp
Contents
diff --git a/docs/reference/calculate_oscillations.html b/docs/reference/calculate_oscillations.html index f2aff99d..bdc0f431 100644 --- a/docs/reference/calculate_oscillations.html +++ b/docs/reference/calculate_oscillations.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@
Oscillations are proxies for local feature importance at the instance level. Find more detailes in Ceteris Paribus Oscillations Chapter.
+calculate_oscillations(x, sort = TRUE, ...)- +
other arguments |
an object of the class ceteris_paribus_oscillations
Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+library("DALEX") @@ -188,7 +193,7 @@Examp #> -> model label : lm ( default ) #> -> data : 500 rows 7 cols #> -> target variable : 500 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.0795294 , mean = 0.302 , max = 0.9859411 #> -> residual function : difference between y and yhat ( default ) @@ -239,8 +244,11 @@
Examp
Contents
diff --git a/docs/reference/calculate_variable_profile.html b/docs/reference/calculate_variable_profile.html index 6188ac04..0e506924 100644 --- a/docs/reference/calculate_variable_profile.html +++ b/docs/reference/calculate_variable_profile.html @@ -36,13 +36,14 @@ + + - @@ -81,7 +82,7 @@
This function calculates individual variable profiles (ceteris paribus profiles), i.e. series of predictions from a model calculated for observations with altered single coordinate.
+calculate_variable_profile( @@ -161,7 +164,7 @@- +Internal Function for Individual Variable Profiles
predict_function = predict, ... )
other parameters that will be passed to the |
a data frame with profiles for selected variables and selected observations
+Note that calculate_variable_profile
function is S3 generic.
If you want to work on non standard data sources (like H2O ddf, external databases)
you should overload it.
Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+This function calculate candidate splits for each selected variable. For numerical variables splits are calculated as percentiles (in general uniform quantiles of the length grid_points). For all other variables splits are calculated as unique values.
+calculate_variable_split(data, variables = colnames(data), grid_points = 101) # S3 method for default calculate_variable_split(data, variables = colnames(data), grid_points = 101)- +
number of points used for response path |
A named list with splits for selected variables
+Note that calculate_variable_split
function is S3 generic.
If you want to work on non standard data sources (like H2O ddf, external databases)
you should overload it.
This explainer works for individual observations. For each observation it calculates Ceteris Paribus Profiles for selected variables. Such profiles can be used to hypothesize about model results if selected variable is changed. For this reason it is also called 'What-If Profiles'.
+ceteris_paribus(x, ...) @@ -177,7 +180,7 @@- +Ceteris Paribus Profiles aka Individual Variable Profiles
label = class(x)[1], ... )
name of the model. By default it's extracted from the |
an object of the class ceteris_paribus_explainer
.
Find more details in Ceteris Paribus Chapter.
+Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+library("DALEX") @@ -331,9 +337,13 @@Examp
Contents
diff --git a/docs/reference/ceteris_paribus_2d-1.png b/docs/reference/ceteris_paribus_2d-1.png index 447a636b..f0d0f959 100644 Binary files a/docs/reference/ceteris_paribus_2d-1.png and b/docs/reference/ceteris_paribus_2d-1.png differ diff --git a/docs/reference/ceteris_paribus_2d-2.png b/docs/reference/ceteris_paribus_2d-2.png index 74dfd72a..55ad8149 100644 Binary files a/docs/reference/ceteris_paribus_2d-2.png and b/docs/reference/ceteris_paribus_2d-2.png differ diff --git a/docs/reference/ceteris_paribus_2d.html b/docs/reference/ceteris_paribus_2d.html index 295a407b..f71117ae 100644 --- a/docs/reference/ceteris_paribus_2d.html +++ b/docs/reference/ceteris_paribus_2d.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@
This function calculates ceteris paribus profiles for grid of values spanned by two variables. It may be useful to identify or present interactions between two variables.
+ceteris_paribus_2d(explainer, observation, grid_points = 101, variables = NULL)- +
if specified, then only these variables will be explained |
an object of the class ceteris_paribus_2d_explainer
.
library("DALEX") @@ -187,11 +191,11 @@Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1707237 , mean = 0.3221568 , max = 0.9983551 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.9519492 , mean = 4.78827e-11 , max = 0.8167072 +#> -> residuals : numerical, min = -0.9519492 , mean = 4.788274e-11 , max = 0.8167072 #> A new explainer has been created!
cp_rf <- ceteris_paribus_2d(explain_titanic_glm, titanic_imputed[1,], variables = c("age", "fare", "sibsp")) @@ -239,7 +243,9 @@Examp
Contents
diff --git a/docs/reference/cluster_profiles-1.png b/docs/reference/cluster_profiles-1.png index 0f2be5fc..85e40f23 100644 Binary files a/docs/reference/cluster_profiles-1.png and b/docs/reference/cluster_profiles-1.png differ diff --git a/docs/reference/cluster_profiles-2.png b/docs/reference/cluster_profiles-2.png index 1b2ac122..8352e90b 100644 Binary files a/docs/reference/cluster_profiles-2.png and b/docs/reference/cluster_profiles-2.png differ diff --git a/docs/reference/cluster_profiles-3.png b/docs/reference/cluster_profiles-3.png index b9d8e104..af034aa2 100644 Binary files a/docs/reference/cluster_profiles-3.png and b/docs/reference/cluster_profiles-3.png differ diff --git a/docs/reference/cluster_profiles.html b/docs/reference/cluster_profiles.html index 728717c7..5afe50eb 100644 --- a/docs/reference/cluster_profiles.html +++ b/docs/reference/cluster_profiles.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@
This function calculates aggregates of ceteris paribus profiles based on hierarchical clustering.
+cluster_profiles( @@ -156,7 +159,7 @@- +Cluster Ceteris Paribus Profiles
k = 3, variables = NULL )
if not |
an object of the class aggregated_profiles_explainer
Find more detailes in the Clustering Profiles Chapter.
+Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+library("DALEX") @@ -214,11 +220,11 @@Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1490412 , mean = 0.3221568 , max = 0.9878987 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.8898433 , mean = 4.198546e-13 , max = 0.8448637 +#> -> residuals : numerical, min = -0.8898433 , mean = 4.198219e-13 , max = 0.8448637 #> A new explainer has been created!
cp_rf <- ceteris_paribus(explain_titanic_glm, selected_passangers) clust_rf <- cluster_profiles(cp_rf, k = 3, variables = "age") @@ -316,9 +322,13 @@Examp
Contents
diff --git a/docs/reference/conditional_dependence-1.png b/docs/reference/conditional_dependence-1.png new file mode 100644 index 00000000..8945e837 Binary files /dev/null and b/docs/reference/conditional_dependence-1.png differ diff --git a/docs/reference/conditional_dependence-2.png b/docs/reference/conditional_dependence-2.png new file mode 100644 index 00000000..18e89b1e Binary files /dev/null and b/docs/reference/conditional_dependence-2.png differ diff --git a/docs/reference/conditional_dependence.html b/docs/reference/conditional_dependence.html new file mode 100644 index 00000000..4525d8b4 --- /dev/null +++ b/docs/reference/conditional_dependence.html @@ -0,0 +1,321 @@ + + + + + + + + +Conditional Dependence Profiles — conditional_dependence • ingredients + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +++ + + + + + + + diff --git a/docs/reference/describe-1.png b/docs/reference/describe-1.png index 749feeb6..1f7db316 100644 Binary files a/docs/reference/describe-1.png and b/docs/reference/describe-1.png differ diff --git a/docs/reference/describe-2.png b/docs/reference/describe-2.png index c68297c2..f3276ca9 100644 Binary files a/docs/reference/describe-2.png and b/docs/reference/describe-2.png differ diff --git a/docs/reference/describe.html b/docs/reference/describe.html index ab718bc6..3a74b10c 100644 --- a/docs/reference/describe.html +++ b/docs/reference/describe.html @@ -6,7 +6,7 @@ -+ + + + + + +++ + + +++ +++ +Conditional Dependence Profiles
+ Source:R/conditional_dependence.R
++conditional_dependence.Rd
+ ++ +Conditional Dependence Profiles (aka Local Profiles) average localy Ceteris Paribus Profiles. +Function 'conditional_dependence' calls 'ceteris_paribus' and then 'aggregate_profiles'.
+ +conditional_dependence(x, ...) + +# S3 method for explainer +conditional_dependence( + x, + variables = NULL, + N = 500, + variable_splits = NULL, + grid_points = 101, + ..., + variable_type = "numerical" +) + +# S3 method for default +conditional_dependence( + x, + data, + predict_function = predict, + label = class(x)[1], + variables = NULL, + N = 500, + variable_splits = NULL, + grid_points = 101, + ..., + variable_type = "numerical" +) + +# S3 method for ceteris_paribus_explainer +conditional_dependence(x, ..., variables = NULL) + +local_dependency(x, ...) + +conditional_dependency(x, ...)+ +Arguments
++
+ ++ + +x ++ an explainer created with function
DALEX::explain()
, an object of the classceteris_paribus_explainer
+or a model to be explained.+ +... ++ other parameters
+ +variables ++ names of variables for which profiles shall be calculated. +Will be passed to
calculate_variable_split
. IfNULL
then all variables from the validation data will be used.+ +N ++ number of observations used for calculation of partial dependence profiles. By default 500.
+ +variable_splits ++ named list of splits for variables, in most cases created with
calculate_variable_split
. +IfNULL
then it will be calculated based on validation data avaliable in theexplainer
.+ +grid_points ++ number of points for profile. Will be passed to
calculate_variable_split
.+ +variable_type ++ a character. If
numerical
then only numerical variables will be calculated. +Ifcategorical
then only categorical variables will be calculated.+ +data ++ validation dataset, will be extracted from
x
if it's an explainer +NOTE: It is best when target variable is not present in thedata
+ +predict_function ++ predict function, will be extracted from
x
if it's an explainer+ +label ++ name of the model. By default it's extracted from the
class
attribute of the modelValue
+ +an object of the class
+ +aggregated_profile_explainer
Details
+ +Find more detailes in the Accumulated Local Dependence Chapter.
+ +References
+ +Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+ + +Examples
++library("DALEX") + +model_titanic_glm <- glm(survived ~ gender + age + fare, + data = titanic_imputed, family = "binomial") + +explain_titanic_glm <- explain(model_titanic_glm, + data = titanic_imputed[,-8], + y = titanic_imputed[,8], + verbose = FALSE) + +cdp_glm <- conditional_dependence(explain_titanic_glm, + N = 150, variables = c("age", "fare")) +head(cdp_glm)#> Top profiles : +#> _vname_ _label_ _x_ _yhat_ _ids_ +#> age.lm.0.1666666667 age lm 0.1666667 0.3488203 0 +#> age.lm.2 age lm 2.0000000 0.3452057 0 +#> age.lm.4 age lm 4.0000000 0.3412918 0 +#> age.lm.7 age lm 7.0000000 0.3354948 0 +#> age.lm.9 age lm 9.0000000 0.3316884 0 +#> age.lm.13 age lm 13.0000000 0.3242332 0plot(cdp_glm)+# \donttest{ +library("randomForest") + +model_titanic_rf <- randomForest(survived ~., data = titanic_imputed)#> Warning: The response has five or fewer unique values. Are you sure you want to do regression?+explain_titanic_rf <- explain(model_titanic_rf, + data = titanic_imputed[,-8], + y = titanic_imputed[,8], + verbose = FALSE) + +cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "numerical") +plot(cdp_rf)+cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "categorical") +plotD3(cdp_rf, label_margin = 80, scale_plot = TRUE) +# }Natural language description of feature importance explainer — describe.partial_dependency_explainer • ingredients +Natural language description of feature importance explainer — describe.partial_dependence_explainer • ingredients @@ -35,16 +35,17 @@ - + + + - @@ -83,7 +84,7 @@
Generic function describe
generates a natural language
description of ceteris_paribus()
, aggregated_profiles()
and
feature_importance()
explanations what enchaces their interpretability.
# S3 method for partial_dependency_explainer +# S3 method for partial_dependence_explainer describe( x, nonsignificance_treshold = 0.15, @@ -175,7 +178,7 @@- +Natural language description of feature importance explainer
# S3 method for feature_importance_explainer describe(x, nonsignificance_treshold = 0.15, ...)Arguments
label for model's prediction |
Function describe.ceteris_paribus()
generates a natural language description of
@@ -230,6 +233,7 @@
nonsignificance_treshold
.
The description prints the three most important variables for the model's prediction.
The current design of DALEX explainer does not allow for displaying variables values.
+
library("DALEX") @@ -294,7 +298,7 @@Examp #> -> model label : lm ( default ) #> -> data : 1000 rows 5 cols #> -> target variable : 1000 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.lm will be used ( default ) #> -> predicted values : numerical, min = 1781.848 , mean = 3487.019 , max = 6176.032 #> -> residual function : difference between y and yhat ( default ) @@ -302,15 +306,17 @@
Examp #>
A new explainer has been created!describe(fi_lm)#> The number of important variables for lm's prediction is 43 out of 75. -#> Variables _baseline_, _baseline_, _baseline_ have the highest importantance.+plot(fi_lm)describe(fi_lm)#> The number of important variables for lm's prediction is 3 out of 5. +#> Variables district, surface, floor have the highest importantance.
This function calculates permutation based feature importance. For this reason it is also called the Variable Dropout Plot.
+feature_importance(x, ...) @@ -177,7 +180,7 @@- +Feature Importance
variables = NULL, variable_groups = NULL )
predict function, will be extracted from |
an object of the class feature_importance
Find more detailes in the Feature Importance Chapter.
+Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+@@ -162,21 +162,21 @@library("DALEX") @@ -259,11 +265,11 @@Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1490412 , mean = 0.3221568 , max = 0.9878987 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.8898433 , mean = 4.198546e-13 , max = 0.8448637 +#> -> residuals : numerical, min = -0.8898433 , mean = 4.198219e-13 , max = 0.8448637 #> A new explainer has been created!
@@ -334,11 +340,11 @@Examp #> -> model label : lm ( default ) #> -> data : 7847 rows 6 cols #> -> target variable : 7847 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.00861694 , mean = 0.3638333 , max = 0.7822214 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.7755901 , mean = -1.293796e-13 , max = 0.9820537 +#> -> residuals : numerical, min = -0.7755901 , mean = -1.294707e-13 , max = 0.9820537 #> A new explainer has been created!
fi_glm <- feature_importance(explainer_glm, type = "raw", loss_function = loss_root_mean_square) head(fi_glm)#> variable mean_dropout_loss label @@ -364,25 +370,29 @@Examp #> -> predict function : yhat.default will be used ( default ) #> -> predicted values : numerical, min = 1.687903e-06 , mean = 0.363713 , max = 0.9996712 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.9885727 , mean = 0.0001203494 , max = 0.9970635 +#> -> residuals : numerical, min = -0.9885727 , mean = 0.0001203497 , max = 0.9970635 #> A new explainer has been created!
#> variable mean_dropout_loss label -#> 1 _full_model_ 98.58735 xgboost -#> 2 gendermale 98.58735 xgboost -#> 3 evaluation 116.50755 xgboost -#> 4 genderfemale 149.72728 xgboost -#> 5 age 159.28469 xgboost -#> 6 salary 167.93442 xgboost# } +#> 1 _full_model_ 98.39138 xgboost +#> 2 gendermale 98.39138 xgboost +#> 3 evaluation 115.96322 xgboost +#> 4 genderfemale 149.93447 xgboost +#> 5 age 159.41428 xgboost +#> 6 salary 168.88640 xgboost# }
partial_dependence()
partial_dependency()
Partial Dependency Profiles
Partial Dependence Profiles
Accumulated Local Effects Profiles aka ALEPlots
conditional_dependency()
local_dependency()
conditional_dependence()
local_dependency()
conditional_dependency()
Conditional Dependency Profiles
Conditional Dependence Profiles
Partial Dependence Profiles are averages from Ceteris Paribus Profiles.
+Function partial_dependence
calls ceteris_paribus
and then aggregate_profiles
.
partial_dependence(x, ...) + +# S3 method for explainer +partial_dependence( + x, + variables = NULL, + N = 500, + variable_splits = NULL, + grid_points = 101, + ..., + variable_type = "numerical" +) + +# S3 method for default +partial_dependence( + x, + data, + predict_function = predict, + label = class(x)[1], + variables = NULL, + grid_points = 101, + variable_splits = NULL, + N = 500, + ..., + variable_type = "numerical" +) + +# S3 method for ceteris_paribus_explainer +partial_dependence(x, ..., variables = NULL) + +partial_dependency(x, ...)+ +
x | +an explainer created with function |
+
---|---|
... | +other parameters |
+
variables | +names of variables for which profiles shall be calculated.
+Will be passed to |
+
N | +number of observations used for calculation of partial dependence profiles. By default 500. |
+
variable_splits | +named list of splits for variables, in most cases created with |
+
grid_points | +number of points for profile. Will be passed to |
+
variable_type | +a character. If |
+
data | +validation dataset, will be extracted from |
+
predict_function | +predict function, will be extracted from |
+
label | +name of the model. By default it's extracted from the |
+
an object of the class aggregated_profiles_explainer
Find more detailes in the Partial Dependence Profiles Chapter.
+ +Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+ + ++library("DALEX") + +model_titanic_glm <- glm(survived ~ gender + age + fare, + data = titanic_imputed, family = "binomial") + +explain_titanic_glm <- explain(model_titanic_glm, + data = titanic_imputed[,-8], + y = titanic_imputed[,8], + verbose = FALSE) + +pdp_glm <- partial_dependence(explain_titanic_glm, + N = 50, variables = c("age", "fare")) +head(pdp_glm)#> Top profiles : +#> _vname_ _label_ _x_ _yhat_ _ids_ +#> 1 fare lm 0.0000000 0.2848034 0 +#> 2 age lm 0.1666667 0.3506775 0 +#> 3 age lm 2.0000000 0.3481410 0 +#> 4 age lm 4.0000000 0.3453864 0 +#> 5 fare lm 6.1793080 0.2922441 0 +#> 6 age lm 7.0000000 0.3412793 0plot(pdp_glm)+# \donttest{ +library("randomForest") + +model_titanic_rf <- randomForest(survived ~., data = titanic_imputed)#> Warning: The response has five or fewer unique values. Are you sure you want to do regression?+explain_titanic_rf <- explain(model_titanic_rf, + data = titanic_imputed[,-8], + y = titanic_imputed[,8], + verbose = FALSE) + +pdp_rf <- partial_dependence(explain_titanic_rf, variable_type = "numerical") +plot(pdp_rf)+pdp_rf <- partial_dependence(explain_titanic_rf, variable_type = "categorical") +plotD3(pdp_rf, label_margin = 80, scale_plot = TRUE) +# }
Function plot.aggregated_profiles_explainer
plots partial dependency plot or accumulated effect plot.
+
+
Function plot.aggregated_profiles_explainer
plots partial dependence plot or accumulated effect plot.
It works in a similar way to plot.ceteris_paribus
, but instead of individual profiles
show average profiles for each variable listed in the variables
vector.
# S3 method for aggregated_profiles_explainer @@ -159,7 +162,7 @@- +Plots Aggregated Profiles
facet_ncol = NULL, variables = NULL )
if not |
a ggplot2
object
Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+library("DALEX") @@ -211,11 +216,11 @@Examp y = titanic_imputed[,8], verbose = FALSE) -pdp_rf_p <- partial_dependency(explain_titanic_glm, N = 50) +pdp_rf_p <- partial_dependence(explain_titanic_glm, N = 50) pdp_rf_p$`_label_` <- "RF_partial" -pdp_rf_l <- conditional_dependency(explain_titanic_glm, N = 50) +pdp_rf_l <- conditional_dependence(explain_titanic_glm, N = 50) pdp_rf_l$`_label_` <- "RF_local" -pdp_rf_a<- accumulated_dependency(explain_titanic_glm, N = 50) +pdp_rf_a<- accumulated_dependence(explain_titanic_glm, N = 50) pdp_rf_a$`_label_` <- "RF_accumulated" head(pdp_rf_p)
#> Top profiles : #> _vname_ _label_ _x_ _yhat_ _ids_ @@ -296,8 +301,11 @@@@ -142,7 +143,9 @@Examp
Contents
diff --git a/docs/reference/plot.ceteris_paribus_2d_explainer-1.png b/docs/reference/plot.ceteris_paribus_2d_explainer-1.png index f23c9fe7..316e98c5 100644 Binary files a/docs/reference/plot.ceteris_paribus_2d_explainer-1.png and b/docs/reference/plot.ceteris_paribus_2d_explainer-1.png differ diff --git a/docs/reference/plot.ceteris_paribus_2d_explainer-2.png b/docs/reference/plot.ceteris_paribus_2d_explainer-2.png index 90045644..7ec1d1ea 100644 Binary files a/docs/reference/plot.ceteris_paribus_2d_explainer-2.png and b/docs/reference/plot.ceteris_paribus_2d_explainer-2.png differ diff --git a/docs/reference/plot.ceteris_paribus_2d_explainer-3.png b/docs/reference/plot.ceteris_paribus_2d_explainer-3.png index 2576f8f4..811494e3 100644 Binary files a/docs/reference/plot.ceteris_paribus_2d_explainer-3.png and b/docs/reference/plot.ceteris_paribus_2d_explainer-3.png differ diff --git a/docs/reference/plot.ceteris_paribus_2d_explainer-4.png b/docs/reference/plot.ceteris_paribus_2d_explainer-4.png index 809d87e8..77b09461 100644 Binary files a/docs/reference/plot.ceteris_paribus_2d_explainer-4.png and b/docs/reference/plot.ceteris_paribus_2d_explainer-4.png differ diff --git a/docs/reference/plot.ceteris_paribus_2d_explainer-5.png b/docs/reference/plot.ceteris_paribus_2d_explainer-5.png index 7c5d702c..5f2ae7fa 100644 Binary files a/docs/reference/plot.ceteris_paribus_2d_explainer-5.png and b/docs/reference/plot.ceteris_paribus_2d_explainer-5.png differ diff --git a/docs/reference/plot.ceteris_paribus_2d_explainer.html b/docs/reference/plot.ceteris_paribus_2d_explainer.html index f54281b9..9e0396c4 100644 --- a/docs/reference/plot.ceteris_paribus_2d_explainer.html +++ b/docs/reference/plot.ceteris_paribus_2d_explainer.html @@ -36,13 +36,14 @@ + + - @@ -81,7 +82,7 @@Plot Ceteris Paribus 2D Explanations
+This function plots What-If Plots for a single prediction / observation.
+# S3 method for ceteris_paribus_2d_explainer @@ -157,7 +160,7 @@- +Plot Ceteris Paribus 2D Explanations
pch = "+", size = 6 )Arguments
numeric, size of individual datapoints |
a ggplot2
object
Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+library("DALEX") @@ -267,8 +272,11 @@@@ -144,9 +145,11 @@Examp
Contents
diff --git a/docs/reference/plot.ceteris_paribus_explainer-1.png b/docs/reference/plot.ceteris_paribus_explainer-1.png index 4604ee7f..f52f0054 100644 Binary files a/docs/reference/plot.ceteris_paribus_explainer-1.png and b/docs/reference/plot.ceteris_paribus_explainer-1.png differ diff --git a/docs/reference/plot.ceteris_paribus_explainer-2.png b/docs/reference/plot.ceteris_paribus_explainer-2.png index ac574ef4..2de9d8a5 100644 Binary files a/docs/reference/plot.ceteris_paribus_explainer-2.png and b/docs/reference/plot.ceteris_paribus_explainer-2.png differ diff --git a/docs/reference/plot.ceteris_paribus_explainer-3.png b/docs/reference/plot.ceteris_paribus_explainer-3.png index e6317636..dcc058cf 100644 Binary files a/docs/reference/plot.ceteris_paribus_explainer-3.png and b/docs/reference/plot.ceteris_paribus_explainer-3.png differ diff --git a/docs/reference/plot.ceteris_paribus_explainer-4.png b/docs/reference/plot.ceteris_paribus_explainer-4.png index e55a6968..74d30858 100644 Binary files a/docs/reference/plot.ceteris_paribus_explainer-4.png and b/docs/reference/plot.ceteris_paribus_explainer-4.png differ diff --git a/docs/reference/plot.ceteris_paribus_explainer-5.png b/docs/reference/plot.ceteris_paribus_explainer-5.png index 59cc69d3..3043e691 100644 Binary files a/docs/reference/plot.ceteris_paribus_explainer-5.png and b/docs/reference/plot.ceteris_paribus_explainer-5.png differ diff --git a/docs/reference/plot.ceteris_paribus_explainer-6.png b/docs/reference/plot.ceteris_paribus_explainer-6.png index 194c2b71..b102442c 100644 Binary files a/docs/reference/plot.ceteris_paribus_explainer-6.png and b/docs/reference/plot.ceteris_paribus_explainer-6.png differ diff --git a/docs/reference/plot.ceteris_paribus_explainer.html b/docs/reference/plot.ceteris_paribus_explainer.html index f8041b5a..f302c8e2 100644 --- a/docs/reference/plot.ceteris_paribus_explainer.html +++ b/docs/reference/plot.ceteris_paribus_explainer.html @@ -36,15 +36,16 @@ + + - @@ -83,7 +84,7 @@Plots Ceteris Paribus Profiles
+Function
plot.ceteris_paribus_explainer
plots Individual Variable Profiles for selected observations. Various parameters help to decide what should be plotted, profiles, aggregated profiles, points or rugs.Find more detailes in Ceteris Paribus Chapter.
+# S3 method for ceteris_paribus_explainer @@ -160,7 +163,7 @@- +Plots Ceteris Paribus Profiles
facet_ncol = NULL, variables = NULL )Arguments
if not |
a ggplot2
object
Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+library("DALEX") @@ -305,8 +310,11 @@@@ -143,13 +144,15 @@Examp
Contents
diff --git a/docs/reference/plot.ceteris_paribus_oscillations-1.png b/docs/reference/plot.ceteris_paribus_oscillations-1.png index 51348513..47fd4280 100644 Binary files a/docs/reference/plot.ceteris_paribus_oscillations-1.png and b/docs/reference/plot.ceteris_paribus_oscillations-1.png differ diff --git a/docs/reference/plot.ceteris_paribus_oscillations-2.png b/docs/reference/plot.ceteris_paribus_oscillations-2.png index d87bcabb..4290b067 100644 Binary files a/docs/reference/plot.ceteris_paribus_oscillations-2.png and b/docs/reference/plot.ceteris_paribus_oscillations-2.png differ diff --git a/docs/reference/plot.ceteris_paribus_oscillations.html b/docs/reference/plot.ceteris_paribus_oscillations.html index c867d1ab..1fd2b1b7 100644 --- a/docs/reference/plot.ceteris_paribus_oscillations.html +++ b/docs/reference/plot.ceteris_paribus_oscillations.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@Plot Ceteris Paribus Oscillations
+This function plots local variable importance plots calculated as oscillations in the Ceteris Paribus Profiles.
+# S3 method for ceteris_paribus_oscillations plot(x, ..., bar_width = 10)- +Arguments
width of bars. By default 10 |
a ggplot2
object
Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+library("DALEX") @@ -217,8 +222,11 @@@@ -148,6 +149,7 @@Examp
Contents
diff --git a/docs/reference/plot.feature_importance_explainer-1.png b/docs/reference/plot.feature_importance_explainer-1.png index f4745bbb..527ade42 100644 Binary files a/docs/reference/plot.feature_importance_explainer-1.png and b/docs/reference/plot.feature_importance_explainer-1.png differ diff --git a/docs/reference/plot.feature_importance_explainer-2.png b/docs/reference/plot.feature_importance_explainer-2.png index de0ab9d6..db1f7a66 100644 Binary files a/docs/reference/plot.feature_importance_explainer-2.png and b/docs/reference/plot.feature_importance_explainer-2.png differ diff --git a/docs/reference/plot.feature_importance_explainer-3.png b/docs/reference/plot.feature_importance_explainer-3.png index daee5ccd..ceb93fab 100644 Binary files a/docs/reference/plot.feature_importance_explainer-3.png and b/docs/reference/plot.feature_importance_explainer-3.png differ diff --git a/docs/reference/plot.feature_importance_explainer-4.png b/docs/reference/plot.feature_importance_explainer-4.png index b747dd15..e8c795c2 100644 Binary files a/docs/reference/plot.feature_importance_explainer-4.png and b/docs/reference/plot.feature_importance_explainer-4.png differ diff --git a/docs/reference/plot.feature_importance_explainer-5.png b/docs/reference/plot.feature_importance_explainer-5.png index bb7d5788..cd6f56eb 100644 Binary files a/docs/reference/plot.feature_importance_explainer-5.png and b/docs/reference/plot.feature_importance_explainer-5.png differ diff --git a/docs/reference/plot.feature_importance_explainer.html b/docs/reference/plot.feature_importance_explainer.html index 4d80c927..aaef5b78 100644 --- a/docs/reference/plot.feature_importance_explainer.html +++ b/docs/reference/plot.feature_importance_explainer.html @@ -36,6 +36,7 @@ + + - @@ -87,7 +88,7 @@Plots Feature Importance
+This function plots variable importance calculated as changes in the loss function after variable drops. It uses output from
feature_importance
function that corresponds to permutation based measure of variable importance. @@ -155,6 +157,7 @@Plots Feature Importance
The order depends on the average drop out loss. In different panels variable contributions may not look like sorted if variable importance is different in different in different models. +# S3 method for feature_importance_explainer @@ -166,7 +169,7 @@- +Plots Feature Importance
bar_width = 10, desc_sorting = TRUE )Arguments
logical. Should the bars be sorted descending? By default TRUE |
a ggplot2
object
Find more details in the Feature Importance Chapter.
+Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+@@ -316,9 +322,13 @@library("DALEX") @@ -219,11 +225,11 @@Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1490412 , mean = 0.3221568 , max = 0.9878987 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.8898433 , mean = 4.198546e-13 , max = 0.8448637 +#> -> residuals : numerical, min = -0.8898433 , mean = 4.198219e-13 , max = 0.8448637 #> A new explainer has been created!
@@ -264,11 +270,11 @@Examp #> -> model label : lm ( default ) #> -> data : 7847 rows 6 cols #> -> target variable : 7847 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.00861694 , mean = 0.3638333 , max = 0.7822214 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.7755901 , mean = -1.293796e-13 , max = 0.9820537 +#> -> residuals : numerical, min = -0.7755901 , mean = -1.294707e-13 , max = 0.9820537 #> A new explainer has been created!
fi_glm <- feature_importance(explainer_glm, type = "raw", loss_function = loss_root_mean_square) @@ -298,17 +304,17 @@Examp #> -> predict function : yhat.default will be used ( default ) #> -> predicted values : numerical, min = 1.687903e-06 , mean = 0.363713 , max = 0.9996712 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.9885727 , mean = 0.0001203494 , max = 0.9970635 +#> -> residuals : numerical, min = -0.9885727 , mean = 0.0001203497 , max = 0.9970635 #> A new explainer has been created!
#> variable mean_dropout_loss label -#> 1 _full_model_ 98.22955 xgboost -#> 2 gendermale 98.22955 xgboost -#> 3 evaluation 114.79861 xgboost -#> 4 genderfemale 153.09930 xgboost -#> 5 age 158.15857 xgboost -#> 6 salary 171.38984 xgboost# } +#> 1 _full_model_ 98.46621 xgboost +#> 2 gendermale 98.46621 xgboost +#> 3 evaluation 114.56402 xgboost +#> 4 genderfemale 155.01631 xgboost +#> 5 age 156.90054 xgboost +#> 6 salary 170.13976 xgboost# }
Function plotD3.aggregated_profiles_explainer
plots an aggregate of ceteris paribus profiles.
It works in a similar way to plotD3.ceteris_paribus_explainer
but, instead of individual profiles,
show average profiles for each variable listed in the variables
vector.
Find more detailes in Ceteris Paribus Chapter.
+# S3 method for aggregated_profiles_explainer @@ -164,7 +167,7 @@- +Plots Aggregated Ceteris Paribus Profiles in D3 with r2d3 Package.
chart_title = "Aggregated Profiles", label_margin = 60 )
a numeric. Set width of label margins in |
a r2d3
object.
Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+library("DALEX") @@ -260,8 +265,11 @@@@ -145,10 +146,12 @@Examp
Contents
diff --git a/docs/reference/plotD3_ceteris_paribus.html b/docs/reference/plotD3_ceteris_paribus.html index 7335d4ed..e8a434f2 100644 --- a/docs/reference/plotD3_ceteris_paribus.html +++ b/docs/reference/plotD3_ceteris_paribus.html @@ -36,16 +36,17 @@ + + - @@ -84,7 +85,7 @@Plots Ceteris Paribus Profiles in D3 with r2d3 Package.
+Function
plotD3.ceteris_paribus_explainer
plots Individual Variable Profiles for selected observations. It uses output fromceteris_paribus
function. Various parameters help to decide what should be plotted, profiles, aggregated profiles, points or rugs.Find more detailes in Ceteris Paribus Chapter.
+plotD3(x, ...) @@ -169,7 +172,7 @@- +Plots Ceteris Paribus Profiles in D3 with r2d3 Package.
show_observations = TRUE, show_rugs = TRUE )Arguments
a logical. Adds rugs layer to a plot. By default it's |
a r2d3
object.
library("DALEX") @@ -266,7 +270,9 @@@@ -145,10 +146,12 @@Examp
Contents
diff --git a/docs/reference/plotD3_feature_importance.html b/docs/reference/plotD3_feature_importance.html index 6026b5b8..c3fe42c6 100644 --- a/docs/reference/plotD3_feature_importance.html +++ b/docs/reference/plotD3_feature_importance.html @@ -36,16 +36,17 @@ + + - @@ -84,7 +85,7 @@Plot Feature Importance Objects in D3 with r2d3 Package.
+Function
+plotD3.feature_importance_explainer
plots dropouts for variables used in the model. It uses output fromfeature_importance
function that corresponds to permutation based measure of feature importance. Variables are sorted in the same order in all panels. The order depends on the average drop out loss. In different panels variable contributions may not look like sorted if variable importance is different in different models.# S3 method for feature_importance_explainer @@ -163,7 +166,7 @@- +Plot Feature Importance Objects in D3 with r2d3 Package.
margin = 0.15, chart_title = "Feature importance" )Arguments
a character. Set custom title |
a r2d3
object.
+} @@ -142,12 +143,14 @@plotD3(fi_lm) -# \dontrun{ +if (FALSE) { library("randomForest") rf_model <- randomForest(m2.price~., data = apartments) @@ -242,26 +246,23 @@Examp fi_rf <- feature_importance(explainer_rf, loss_function = loss_root_mean_square) -head(fi_rf)
#> variable mean_dropout_loss label -#> 1 _full_model_ 198.5890 rf -#> 2 construction.year 366.4297 rf -#> 3 no.rooms 371.6181 rf -#> 4 floor 412.9942 rf -#> 5 surface 456.1096 rf -#> 6 district 830.3744 rf
Prints Aggregated Profiles
+# S3 method for aggregated_profiles_explainer print(x, ...)- +
other arguments that will be passed to |
library("DALEX") @@ -174,11 +177,11 @@Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1490412 , mean = 0.3221568 , max = 0.9878987 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.8898433 , mean = 4.198546e-13 , max = 0.8448637 +#> -> residuals : numerical, min = -0.8898433 , mean = 4.198219e-13 , max = 0.8448637 #> A new explainer has been created!
selected_passangers <- select_sample(titanic_imputed, n = 100) cp_rf <- ceteris_paribus(explain_titanic_glm, selected_passangers) @@ -282,6 +285,7 @@@@ -142,12 +143,14 @@Examp
Contents
diff --git a/docs/reference/print.ceteris_paribus_explainer.html b/docs/reference/print.ceteris_paribus_explainer.html index b8874138..d8dcf381 100644 --- a/docs/reference/print.ceteris_paribus_explainer.html +++ b/docs/reference/print.ceteris_paribus_explainer.html @@ -36,13 +36,14 @@ + + - @@ -81,7 +82,7 @@Prints Individual Variable Explainer Summary
+Prints Individual Variable Explainer Summary
+# S3 method for ceteris_paribus_explainer print(x, ...)- +Arguments
other arguments that will be passed to |
library("DALEX") @@ -221,6 +224,7 @@@@ -142,12 +143,14 @@Examp
Contents
diff --git a/docs/reference/print.feature_importance_explainer.html b/docs/reference/print.feature_importance_explainer.html index 227c3c4c..e82baa9f 100644 --- a/docs/reference/print.feature_importance_explainer.html +++ b/docs/reference/print.feature_importance_explainer.html @@ -36,13 +36,14 @@ + + - @@ -81,7 +82,7 @@Print Generic for Feature Importance Object
+Print Generic for Feature Importance Object
+# S3 method for feature_importance_explainer print(x, ...)- +Arguments
other parameters. |
a data frame.
+Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema
+library("DALEX") @@ -181,11 +186,11 @@Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1490412 , mean = 0.3221568 , max = 0.9878987 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.8898433 , mean = 4.198546e-13 , max = 0.8448637 +#> -> residuals : numerical, min = -0.8898433 , mean = 4.198219e-13 , max = 0.8448637 #> A new explainer has been created!
fi_glm <- feature_importance(explain_titanic_glm) @@ -206,8 +211,11 @@@@ -143,8 +144,10 @@Examp
Contents
diff --git a/docs/reference/select_neighbours.html b/docs/reference/select_neighbours.html index 11e4dca2..e613a9ef 100644 --- a/docs/reference/select_neighbours.html +++ b/docs/reference/select_neighbours.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@Select Subset of Rows Closest to a Specified Observation
+Function
+select_neighbours
selects subset of rows from data set. This is useful if data is large and we need just a sample to calculate profiles.select_neighbours( @@ -155,7 +158,7 @@- +Select Subset of Rows Closest to a Specified Observation
n = 20, frac = NULL )Arguments
a data frame with selected rows
+Note that select_neighbours()
function is S3 generic.
If you want to work on non standard data sources (like H2O ddf, external databases)
you should overload it.
library("DALEX") @@ -220,8 +225,11 @@@@ -143,12 +144,14 @@Examp
Contents
diff --git a/docs/reference/select_sample.html b/docs/reference/select_sample.html index 62f25836..8599c1ec 100644 --- a/docs/reference/select_sample.html +++ b/docs/reference/select_sample.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@Select Subset of Rows
+Function
+select_sample
selects subset of rows from data set. This is useful if data is large and we need just a sample to calculate profiles.select_sample(data, n = 100, seed = 1313)- +Arguments
seed for random number generator. |
a data frame with selected rows
+Note that select_subsample()
function is S3 generic.
If you want to work on non standard data sources (like H2O ddf, external databases)
you should overload it.
library("DALEX") @@ -192,8 +197,11 @@@@ -143,8 +144,10 @@Examp
Contents
diff --git a/docs/reference/show_aggregated_profiles-1.png b/docs/reference/show_aggregated_profiles-1.png index 84ab7f19..a7461620 100644 Binary files a/docs/reference/show_aggregated_profiles-1.png and b/docs/reference/show_aggregated_profiles-1.png differ diff --git a/docs/reference/show_aggregated_profiles-2.png b/docs/reference/show_aggregated_profiles-2.png index cfd96a7a..cf2c7ea3 100644 Binary files a/docs/reference/show_aggregated_profiles-2.png and b/docs/reference/show_aggregated_profiles-2.png differ diff --git a/docs/reference/show_aggregated_profiles.html b/docs/reference/show_aggregated_profiles.html index ebe2320a..98e1110f 100644 --- a/docs/reference/show_aggregated_profiles.html +++ b/docs/reference/show_aggregated_profiles.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@Adds a Layer with Aggregated Profiles
+Function
+show_aggregated_profiles
adds a layer to a plot created withplot.ceteris_paribus_explainer
.show_aggregated_profiles( @@ -155,7 +158,7 @@- +Adds a Layer with Aggregated Profiles
color = "#371ea3", variables = NULL )Arguments
if not |
a ggplot2
layer
library("DALEX") @@ -203,11 +207,11 @@Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1490412 , mean = 0.3221568 , max = 0.9878987 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.8898433 , mean = 4.198546e-13 , max = 0.8448637 +#> -> residuals : numerical, min = -0.8898433 , mean = 4.198219e-13 , max = 0.8448637 #> A new explainer has been created!
cp_rf <- ceteris_paribus(explain_titanic_glm, selected_passangers) @@ -278,7 +282,9 @@@@ -144,9 +145,11 @@Examp
Contents
diff --git a/docs/reference/show_observations-1.png b/docs/reference/show_observations-1.png index 749545fb..e532ab79 100644 Binary files a/docs/reference/show_observations-1.png and b/docs/reference/show_observations-1.png differ diff --git a/docs/reference/show_observations.html b/docs/reference/show_observations.html index 8ba7209b..1c58180d 100644 --- a/docs/reference/show_observations.html +++ b/docs/reference/show_observations.html @@ -36,15 +36,16 @@ + + - @@ -83,7 +84,7 @@Adds a Layer with Observations to a Profile Plot
+Function
+show_observations
adds a layer to a plot created withplot.ceteris_paribus_explainer
for selected observations. Various parameters help to decide what should be plotted, profiles, aggregated profiles, points or rugs.show_observations( @@ -158,7 +161,7 @@- +Adds a Layer with Observations to a Profile Plot
variable_type = "numerical", variables = NULL )Arguments
if not |
a ggplot2
layer
library("DALEX") @@ -252,7 +256,9 @@@@ -143,8 +144,10 @@Examp
Contents
diff --git a/docs/reference/show_profiles-1.png b/docs/reference/show_profiles-1.png index 68f2dffb..f340fe2e 100644 Binary files a/docs/reference/show_profiles-1.png and b/docs/reference/show_profiles-1.png differ diff --git a/docs/reference/show_profiles-2.png b/docs/reference/show_profiles-2.png index 7a09e189..a2e8b187 100644 Binary files a/docs/reference/show_profiles-2.png and b/docs/reference/show_profiles-2.png differ diff --git a/docs/reference/show_profiles.html b/docs/reference/show_profiles.html index 9b73639c..719ef6fd 100644 --- a/docs/reference/show_profiles.html +++ b/docs/reference/show_profiles.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@Adds a Layer with Profiles
+Function
+show_profiles
adds a layer to a plot created withplot.ceteris_paribus_explainer
.show_profiles( @@ -155,7 +158,7 @@- +Adds a Layer with Profiles
color = "#371ea3", variables = NULL )Arguments
if not |
a ggplot2
layer
library("DALEX") @@ -271,7 +275,9 @@@@ -144,9 +145,11 @@Examp
Contents
diff --git a/docs/reference/show_residuals-1.png b/docs/reference/show_residuals-1.png index 57c76f00..1f47745c 100644 Binary files a/docs/reference/show_residuals-1.png and b/docs/reference/show_residuals-1.png differ diff --git a/docs/reference/show_residuals-2.png b/docs/reference/show_residuals-2.png index 8df717a7..bcf95537 100644 Binary files a/docs/reference/show_residuals-2.png and b/docs/reference/show_residuals-2.png differ diff --git a/docs/reference/show_residuals.html b/docs/reference/show_residuals.html index ab408d89..f0d64c3b 100644 --- a/docs/reference/show_residuals.html +++ b/docs/reference/show_residuals.html @@ -36,15 +36,16 @@ + + - @@ -83,7 +84,7 @@Adds a Layer with Residuals to a Profile Plot
+Function
+show_residuals
adds a layer to a plot created withplot.ceteris_paribus_explainer
for selected observations. Note that they
argument has to be specified in theceteris_paribus
function.show_residuals( @@ -154,10 +157,10 @@- +Adds a Layer with Residuals to a Profile Plot
..., size = 0.75, alpha = 1, - color = c(`TRUE` = "#371ea3", `FALSE` = "#f05a71"), + color = c(`TRUE` = "#8bdcbe", `FALSE` = "#f05a71"), variables = NULL )Arguments
if not |
a ggplot2
layer
library("DALEX") @@ -250,7 +254,9 @@@@ -144,9 +145,11 @@Examp
Contents
diff --git a/docs/reference/show_rugs-1.png b/docs/reference/show_rugs-1.png index 4154de15..f922d944 100644 Binary files a/docs/reference/show_rugs-1.png and b/docs/reference/show_rugs-1.png differ diff --git a/docs/reference/show_rugs.html b/docs/reference/show_rugs.html index 0970fa7f..cecfecf0 100644 --- a/docs/reference/show_rugs.html +++ b/docs/reference/show_rugs.html @@ -36,15 +36,16 @@ + + - @@ -83,7 +84,7 @@Adds a Layer with Rugs to a Profile Plot
+Function
+show_rugs
adds a layer to a plot created withplot.ceteris_paribus_explainer
for selected observations. Various parameters help to decide what should be plotted, profiles, aggregated profiles, points or rugs.show_rugs( @@ -159,7 +162,7 @@- +Adds a Layer with Rugs to a Profile Plot
sides = "b", variables = NULL )Arguments
if not |
a ggplot2
layer
library("DALEX") @@ -254,7 +258,9 @@Examp
Contents
diff --git a/man/accumulated_dependency.Rd b/man/accumulated_dependence.Rd similarity index 81% rename from man/accumulated_dependency.Rd rename to man/accumulated_dependence.Rd index 6c2a3ef6..857c7d04 100644 --- a/man/accumulated_dependency.Rd +++ b/man/accumulated_dependence.Rd @@ -1,15 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/accumulated_dependency.R -\name{accumulated_dependency} +% Please edit documentation in R/accumulated_dependence.R +\name{accumulated_dependence} +\alias{accumulated_dependence} +\alias{accumulated_dependence.explainer} +\alias{accumulated_dependence.default} +\alias{accumulated_dependence.ceteris_paribus_explainer} \alias{accumulated_dependency} -\alias{accumulated_dependency.explainer} -\alias{accumulated_dependency.default} -\alias{accumulated_dependency.ceteris_paribus_explainer} \title{Accumulated Local Effects Profiles aka ALEPlots} \usage{ -accumulated_dependency(x, ...) +accumulated_dependence(x, ...) -\method{accumulated_dependency}{explainer}( +\method{accumulated_dependence}{explainer}( x, variables = NULL, N = 500, @@ -19,7 +20,7 @@ accumulated_dependency(x, ...) variable_type = "numerical" ) -\method{accumulated_dependency}{default}( +\method{accumulated_dependence}{default}( x, data, predict_function = predict, @@ -32,7 +33,9 @@ accumulated_dependency(x, ...) variable_type = "numerical" ) -\method{accumulated_dependency}{ceteris_paribus_explainer}(x, ..., variables = NULL) +\method{accumulated_dependence}{ceteris_paribus_explainer}(x, ..., variables = NULL) + +accumulated_dependency(x, ...) } \arguments{ \item{x}{an explainer created with function \code{DALEX::explain()}, an object of the class \code{ceteris_paribus_explainer} @@ -44,7 +47,7 @@ or a model to be explained.} Will be passed to \code{\link{calculate_variable_split}}. If \code{NULL} then all variables from the validation data will be used.} -\item{N}{number of observations used for calculation of partial dependency profiles. +\item{N}{number of observations used for calculation of partial dependence profiles. By default, 500 observations will be chosen randomly.} \item{variable_splits}{named list of splits for variables, in most cases created with \code{\link{calculate_variable_split}}. @@ -67,10 +70,10 @@ an object of the class \code{aggregated_profiles_explainer} } \description{ Accumulated Local Effects Profiles accumulate local changes in Ceteris Paribus Profiles. -Function \code{\link{accumulated_dependency}} calls \code{\link{ceteris_paribus}} and then \code{\link{aggregate_profiles}}. +Function \code{\link{accumulated_dependence}} calls \code{\link{ceteris_paribus}} and then \code{\link{aggregate_profiles}}. } \details{ -Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependency Chapter}. +Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependence Chapter}. } \examples{ library("DALEX") @@ -83,7 +86,7 @@ explain_titanic_glm <- explain(model_titanic_glm, y = titanic_imputed[,8], verbose = FALSE) -adp_glm <- accumulated_dependency(explain_titanic_glm, +adp_glm <- accumulated_dependence(explain_titanic_glm, N = 150, variables = c("age", "fare")) head(adp_glm) plot(adp_glm) @@ -98,10 +101,10 @@ explain_titanic_rf <- explain(model_titanic_rf, y = titanic_imputed[,8], verbose = FALSE) -adp_rf <- accumulated_dependency(explain_titanic_rf, N = 200, variable_type = "numerical") +adp_rf <- accumulated_dependence(explain_titanic_rf, N = 200, variable_type = "numerical") plot(adp_rf) -adp_rf <- accumulated_dependency(explain_titanic_rf, N = 200, variable_type = "categorical") +adp_rf <- accumulated_dependence(explain_titanic_rf, N = 200, variable_type = "categorical") plotD3(adp_rf, label_margin = 80, scale_plot = TRUE) } diff --git a/man/aggregate_profiles.Rd b/man/aggregate_profiles.Rd index b9b96862..becb59de 100644 --- a/man/aggregate_profiles.Rd +++ b/man/aggregate_profiles.Rd @@ -39,9 +39,9 @@ an object of the class \code{aggregated_profiles_explainer} } \description{ The function \code{aggregate_profiles()} calculates an aggregate of ceteris paribus profiles. -It can be: Partial Dependency Profile (average across Ceteris Paribus Profiles), -Conditional Dependency Profile (local weighted average across Ceteris Paribus Profiles) or -Accumulated Local Dependency Profile (cummulated average local changes in Ceteris Paribus Profiles). +It can be: Partial Dependence Profile (average across Ceteris Paribus Profiles), +Conditional Dependence Profile (local weighted average across Ceteris Paribus Profiles) or +Accumulated Local Dependence Profile (cummulated average local changes in Ceteris Paribus Profiles). } \examples{ library("DALEX") diff --git a/man/conditional_dependency.Rd b/man/conditional_dependence.Rd similarity index 77% rename from man/conditional_dependency.Rd rename to man/conditional_dependence.Rd index 6d577ed5..913938ef 100644 --- a/man/conditional_dependency.Rd +++ b/man/conditional_dependence.Rd @@ -1,16 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/conditional_dependency.R -\name{conditional_dependency} -\alias{conditional_dependency} -\alias{conditional_dependency.explainer} -\alias{conditional_dependency.default} -\alias{conditional_dependency.ceteris_paribus_explainer} +% Please edit documentation in R/conditional_dependence.R +\name{conditional_dependence} +\alias{conditional_dependence} +\alias{conditional_dependence.explainer} +\alias{conditional_dependence.default} +\alias{conditional_dependence.ceteris_paribus_explainer} \alias{local_dependency} -\title{Conditional Dependency Profiles} +\alias{conditional_dependency} +\title{Conditional Dependence Profiles} \usage{ -conditional_dependency(x, ...) +conditional_dependence(x, ...) -\method{conditional_dependency}{explainer}( +\method{conditional_dependence}{explainer}( x, variables = NULL, N = 500, @@ -20,7 +21,7 @@ conditional_dependency(x, ...) variable_type = "numerical" ) -\method{conditional_dependency}{default}( +\method{conditional_dependence}{default}( x, data, predict_function = predict, @@ -33,9 +34,11 @@ conditional_dependency(x, ...) variable_type = "numerical" ) -\method{conditional_dependency}{ceteris_paribus_explainer}(x, ..., variables = NULL) +\method{conditional_dependence}{ceteris_paribus_explainer}(x, ..., variables = NULL) local_dependency(x, ...) + +conditional_dependency(x, ...) } \arguments{ \item{x}{an explainer created with function \code{DALEX::explain()}, an object of the class \code{ceteris_paribus_explainer} @@ -46,7 +49,7 @@ or a model to be explained.} \item{variables}{names of variables for which profiles shall be calculated. Will be passed to \code{\link{calculate_variable_split}}. If \code{NULL} then all variables from the validation data will be used.} -\item{N}{number of observations used for calculation of partial dependency profiles. By default 500.} +\item{N}{number of observations used for calculation of partial dependence profiles. By default 500.} \item{variable_splits}{named list of splits for variables, in most cases created with \code{\link{calculate_variable_split}}. If \code{NULL} then it will be calculated based on validation data avaliable in the \code{explainer}.} @@ -67,11 +70,11 @@ NOTE: It is best when target variable is not present in the \code{data}} an object of the class \code{aggregated_profile_explainer} } \description{ -Conditional Dependency Profiles (aka Local Profiles) average localy Ceteris Paribus Profiles. -Function 'conditional_dependency' calls 'ceteris_paribus' and then 'aggregate_profiles'. +Conditional Dependence Profiles (aka Local Profiles) average localy Ceteris Paribus Profiles. +Function 'conditional_dependence' calls 'ceteris_paribus' and then 'aggregate_profiles'. } \details{ -Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependency Chapter}. +Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependence Chapter}. } \examples{ library("DALEX") @@ -84,7 +87,7 @@ explain_titanic_glm <- explain(model_titanic_glm, y = titanic_imputed[,8], verbose = FALSE) -cdp_glm <- conditional_dependency(explain_titanic_glm, +cdp_glm <- conditional_dependence(explain_titanic_glm, N = 150, variables = c("age", "fare")) head(cdp_glm) plot(cdp_glm) @@ -99,10 +102,10 @@ explain_titanic_rf <- explain(model_titanic_rf, y = titanic_imputed[,8], verbose = FALSE) -cdp_rf <- conditional_dependency(explain_titanic_rf, N = 200, variable_type = "numerical") +cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "numerical") plot(cdp_rf) -cdp_rf <- conditional_dependency(explain_titanic_rf, N = 200, variable_type = "categorical") +cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "categorical") plotD3(cdp_rf, label_margin = 80, scale_plot = TRUE) } diff --git a/man/describe.Rd b/man/describe.Rd index bc400835..5abf0141 100644 --- a/man/describe.Rd +++ b/man/describe.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/describe_aggregated_profiles.R, % R/describe_ceteris_paribus.R, R/describe_feature_importance.R -\name{describe.partial_dependency_explainer} -\alias{describe.partial_dependency_explainer} +\name{describe.partial_dependence_explainer} +\alias{describe.partial_dependence_explainer} \alias{describe} \alias{describe.ceteris_paribus_explainer} \alias{describe.feature_importance_explainer} \title{Natural language description of feature importance explainer} \usage{ -\method{describe}{partial_dependency_explainer}( +\method{describe}{partial_dependence_explainer}( x, nonsignificance_treshold = 0.15, ..., diff --git a/man/partial_dependency.Rd b/man/partial_dependence.Rd similarity index 79% rename from man/partial_dependency.Rd rename to man/partial_dependence.Rd index df6359aa..f1d96a4f 100644 --- a/man/partial_dependency.Rd +++ b/man/partial_dependence.Rd @@ -1,15 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/partial_dependency.R -\name{partial_dependency} +% Please edit documentation in R/partial_dependence.R +\name{partial_dependence} +\alias{partial_dependence} +\alias{partial_dependence.explainer} +\alias{partial_dependence.default} +\alias{partial_dependence.ceteris_paribus_explainer} \alias{partial_dependency} -\alias{partial_dependency.explainer} -\alias{partial_dependency.default} -\alias{partial_dependency.ceteris_paribus_explainer} -\title{Partial Dependency Profiles} +\title{Partial Dependence Profiles} \usage{ -partial_dependency(x, ...) +partial_dependence(x, ...) -\method{partial_dependency}{explainer}( +\method{partial_dependence}{explainer}( x, variables = NULL, N = 500, @@ -19,7 +20,7 @@ partial_dependency(x, ...) variable_type = "numerical" ) -\method{partial_dependency}{default}( +\method{partial_dependence}{default}( x, data, predict_function = predict, @@ -32,7 +33,9 @@ partial_dependency(x, ...) variable_type = "numerical" ) -\method{partial_dependency}{ceteris_paribus_explainer}(x, ..., variables = NULL) +\method{partial_dependence}{ceteris_paribus_explainer}(x, ..., variables = NULL) + +partial_dependency(x, ...) } \arguments{ \item{x}{an explainer created with function \code{DALEX::explain()}, an object of the class \code{ceteris_paribus_explainer} or @@ -44,7 +47,7 @@ or a model to be explained.} Will be passed to \code{\link{calculate_variable_split}}. If \code{NULL} then all variables from the validation data will be used.} -\item{N}{number of observations used for calculation of partial dependency profiles. By default 500.} +\item{N}{number of observations used for calculation of partial dependence profiles. By default 500.} \item{variable_splits}{named list of splits for variables, in most cases created with \code{\link{calculate_variable_split}}. If \code{NULL} then it will be calculated based on validation data avaliable in the \code{explainer}.} @@ -65,8 +68,8 @@ NOTE: It is best when target variable is not present in the \code{data}} an object of the class \code{aggregated_profiles_explainer} } \description{ -Partial Dependency Profiles are averages from Ceteris Paribus Profiles. -Function \code{partial_dependency} calls \code{ceteris_paribus} and then \code{aggregate_profiles}. +Partial Dependence Profiles are averages from Ceteris Paribus Profiles. +Function \code{partial_dependence} calls \code{ceteris_paribus} and then \code{aggregate_profiles}. } \details{ Find more detailes in the \href{https://pbiecek.github.io/ema/partialDependenceProfiles.html}{Partial Dependence Profiles Chapter}. @@ -82,7 +85,7 @@ explain_titanic_glm <- explain(model_titanic_glm, y = titanic_imputed[,8], verbose = FALSE) -pdp_glm <- partial_dependency(explain_titanic_glm, +pdp_glm <- partial_dependence(explain_titanic_glm, N = 50, variables = c("age", "fare")) head(pdp_glm) plot(pdp_glm) @@ -97,10 +100,10 @@ explain_titanic_rf <- explain(model_titanic_rf, y = titanic_imputed[,8], verbose = FALSE) -pdp_rf <- partial_dependency(explain_titanic_rf, variable_type = "numerical") +pdp_rf <- partial_dependence(explain_titanic_rf, variable_type = "numerical") plot(pdp_rf) -pdp_rf <- partial_dependency(explain_titanic_rf, variable_type = "categorical") +pdp_rf <- partial_dependence(explain_titanic_rf, variable_type = "categorical") plotD3(pdp_rf, label_margin = 80, scale_plot = TRUE) } diff --git a/man/plot.aggregated_profiles_explainer.Rd b/man/plot.aggregated_profiles_explainer.Rd index ae828e3b..b3618e21 100644 --- a/man/plot.aggregated_profiles_explainer.Rd +++ b/man/plot.aggregated_profiles_explainer.Rd @@ -33,7 +33,7 @@ a \code{ggplot2} object } \description{ -Function \code{plot.aggregated_profiles_explainer} plots partial dependency plot or accumulated effect plot. +Function \code{plot.aggregated_profiles_explainer} plots partial dependence plot or accumulated effect plot. It works in a similar way to \code{plot.ceteris_paribus}, but instead of individual profiles show average profiles for each variable listed in the \code{variables} vector. } @@ -48,11 +48,11 @@ explain_titanic_glm <- explain(model_titanic_glm, y = titanic_imputed[,8], verbose = FALSE) -pdp_rf_p <- partial_dependency(explain_titanic_glm, N = 50) +pdp_rf_p <- partial_dependence(explain_titanic_glm, N = 50) pdp_rf_p$`_label_` <- "RF_partial" -pdp_rf_l <- conditional_dependency(explain_titanic_glm, N = 50) +pdp_rf_l <- conditional_dependence(explain_titanic_glm, N = 50) pdp_rf_l$`_label_` <- "RF_local" -pdp_rf_a<- accumulated_dependency(explain_titanic_glm, N = 50) +pdp_rf_a<- accumulated_dependence(explain_titanic_glm, N = 50) pdp_rf_a$`_label_` <- "RF_accumulated" head(pdp_rf_p) plot(pdp_rf_p, pdp_rf_l, pdp_rf_a, color = "_label_") diff --git a/tests/testthat/test_aggregated_profiles.R b/tests/testthat/test_aggregated_profiles.R index d29c8214..5ed5f5be 100644 --- a/tests/testthat/test_aggregated_profiles.R +++ b/tests/testthat/test_aggregated_profiles.R @@ -28,11 +28,11 @@ test_that("plot aggregate_profiles",{ expect_true("gg" %in% class(pl1)) - pdp_rf_p <- partial_dependency(explainer_rf, variables = "age") + pdp_rf_p <- partial_dependence(explainer_rf, variables = "age") pdp_rf_p$`_label_` <- "RF_partial" - pdp_rf_c <- conditional_dependency(explainer_rf, variables = "age") + pdp_rf_c <- conditional_dependence(explainer_rf, variables = "age") pdp_rf_c$`_label_` <- "RF_conditional" - pdp_rf_a <- accumulated_dependency(explainer_rf, variables = "age") + pdp_rf_a <- accumulated_dependence(explainer_rf, variables = "age") pdp_rf_a$`_label_` <- "RF_accumulated" pl2 <- plot(pdp_rf_p, pdp_rf_c, pdp_rf_a, color = "_label_") @@ -42,7 +42,7 @@ test_that("plot aggregate_profiles",{ }) -test_that("plot partial_dependency",{ +test_that("plot partial_dependence",{ library("DALEX") library("randomForest") titanic <- na.omit(titanic) @@ -56,7 +56,7 @@ test_that("plot partial_dependency",{ selected_passangers <- select_sample(titanic, n = 100) cp_rf <- ceteris_paribus(explain_titanic_rf, selected_passangers) - res <- partial_dependency(explain_titanic_rf, N=50, variables = "gender", variable_type = "categorical") + res <- partial_dependence(explain_titanic_rf, N=50, variables = "gender", variable_type = "categorical") expect_true("aggregated_profiles_explainer" %in% class(res)) }) diff --git a/vignettes/vignette_describe.Rmd b/vignettes/vignette_describe.Rmd index d1c85373..d614cf0a 100644 --- a/vignettes/vignette_describe.Rmd +++ b/vignettes/vignette_describe.Rmd @@ -114,9 +114,9 @@ plot(cp_rf, variables = perturbed_variable_continuous) describe(cp_rf, variables = perturbed_variable_continuous) ``` -Ceteris Paribus profiles are described only for a single observation. If we want to access the influence of more than one observation, we need to describe dependency profiles. +Ceteris Paribus profiles are described only for a single observation. If we want to access the influence of more than one observation, we need to describe dependence profiles. -## Partial Dependency Profiles +## Partial Dependence Profiles ```{r} pdp <- aggregate_profiles(cp_rf, type = "partial") diff --git a/vignettes/vignette_simulated.Rmd b/vignettes/vignette_simulated.Rmd index f8e81c2d..a757416e 100644 --- a/vignettes/vignette_simulated.Rmd +++ b/vignettes/vignette_simulated.Rmd @@ -74,33 +74,33 @@ plot(cp_model) + ``` -# Dependency profiles +# Dependence profiles -Lets try Partial Dependency profiles, Conditional Dependency profiles and Accumulated Local profiles. For the last two we can try different smoothing factors +Lets try Partial Dependence profiles, Conditional Dependence profiles and Accumulated Local profiles. For the last two we can try different smoothing factors ```{r} -pd_model <- partial_dependency(explain_the_model, variables = c("x1", "x2")) +pd_model <- partial_dependence(explain_the_model, variables = c("x1", "x2")) pd_model$`_label_` = "PDP" -cd_model <- conditional_dependency(explain_the_model, variables = c("x1", "x2")) +cd_model <- conditional_dependence(explain_the_model, variables = c("x1", "x2")) cd_model$`_label_` = "CDP 0.25" -ad_model <- accumulated_dependency(explain_the_model, variables = c("x1", "x2")) +ad_model <- accumulated_dependence(explain_the_model, variables = c("x1", "x2")) ad_model$`_label_` = "ALE 0.25" plot(ad_model, cd_model, pd_model) + ggtitle("Feature effects - PDP, CDP, ALE") -cd_model_1 <- conditional_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.1) +cd_model_1 <- conditional_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.1) cd_model_1$`_label_` = "CDP 0.1" -cd_model_5 <- conditional_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.5) +cd_model_5 <- conditional_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.5) cd_model_5$`_label_` = "CDP 0.5" -ad_model_1 <- accumulated_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.5) +ad_model_1 <- accumulated_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.5) ad_model_1$`_label_` = "ALE 0.1" -ad_model_5 <- accumulated_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.5) +ad_model_5 <- accumulated_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.5) ad_model_5$`_label_` = "ALE 0.5" plot(ad_model, cd_model, pd_model, cd_model_1, cd_model_5, ad_model_1, ad_model_5) + @@ -108,7 +108,7 @@ plot(ad_model, cd_model, pd_model, cd_model_1, cd_model_5, ad_model_1, ad_model_ ``` -# Dependency profiles in groups +# Dependence profiles in groups And now, let's see how the grouping factor works @@ -119,14 +119,14 @@ df$x3 <- factor(sign(df$x2)) explain_the_model$data = df # PDP in groups -pd_model_groups <- partial_dependency(explain_the_model, +pd_model_groups <- partial_dependence(explain_the_model, variables = c("x1", "x2"), groups = "x3") plot(pd_model_groups) + - ggtitle("Partial Dependency") + ggtitle("Partial Dependence") # ALE in groups -ad_model_groups <- accumulated_dependency(explain_the_model, +ad_model_groups <- accumulated_dependence(explain_the_model, variables = c("x1", "x2"), groups = "x3") plot(ad_model_groups) + @@ -134,11 +134,11 @@ plot(ad_model_groups) + # CDP in groups -cd_model_groups <- conditional_dependency(explain_the_model, +cd_model_groups <- conditional_dependence(explain_the_model, variables = c("x1", "x2"), groups = "x3") plot(cd_model_groups) + - ggtitle("Conditional Dependency") + ggtitle("Conditional Dependence") ``` diff --git a/vignettes/vignette_titanic.Rmd b/vignettes/vignette_titanic.Rmd index 4c5b1c68..4aa57169 100644 --- a/vignettes/vignette_titanic.Rmd +++ b/vignettes/vignette_titanic.Rmd @@ -69,31 +69,31 @@ plot(fi_rf) As we see the most important feature is `gender`. Next three importnat features are `class`, `age` and `fare`. Let's see the link between model response and these features. -Such univariate relation can be calculated with `partial_dependency()`. +Such univariate relation can be calculated with `partial_dependence()`. ## age Kids 5 years old and younger have much higher survival probability. -### Partial Dependency Profiles +### Partial Dependence Profiles ```{r} -pp_age <- partial_dependency(explain_titanic_rf, variables = c("age", "fare")) +pp_age <- partial_dependence(explain_titanic_rf, variables = c("age", "fare")) head(pp_age) plot(pp_age) ``` -### Conditional Dependency Profiles +### Conditional Dependence Profiles ```{r} -cp_age <- conditional_dependency(explain_titanic_rf, variables = c("age", "fare")) +cp_age <- conditional_dependence(explain_titanic_rf, variables = c("age", "fare")) plot(cp_age) ``` ### Accumulated Local Effect Profiles ```{r} -ap_age <- accumulated_dependency(explain_titanic_rf, variables = c("age", "fare")) +ap_age <- accumulated_dependence(explain_titanic_rf, variables = c("age", "fare")) plot(ap_age) ```