diff --git a/DESCRIPTION b/DESCRIPTION index 26193e43..1e23b739 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ingredients Title: Effects and Importances of Model Ingredients -Version: 0.5.2 +Version: 1.0 Authors@R: c(person("Przemyslaw", "Biecek", email = "przemyslaw.biecek@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8423-1823")), @@ -11,9 +11,9 @@ Description: Collection of tools for assessment of feature importance and featur Key functions are: feature_importance() for assessment of global level feature importance, ceteris_paribus() for calculation of the what-if plots, - partial_dependency() for partial dependency plots, - conditional_dependency() for conditional dependency plots, - accumulated_dependency() for accumulated local effects plots, + partial_dependence() for partial dependence plots, + conditional_dependence() for conditional dependence plots, + accumulated_dependence() for accumulated local effects plots, aggregate_profiles() and cluster_profiles() for aggregation of ceteris paribus profiles, generic print() and plot() for better usability of selected explainers, generic plotD3() for interactive, D3 based explanations, and diff --git a/NAMESPACE b/NAMESPACE index b140e5f8..c2153306 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,21 +1,22 @@ # Generated by roxygen2: do not edit by hand -S3method(accumulated_dependency,ceteris_paribus_explainer) -S3method(accumulated_dependency,default) -S3method(accumulated_dependency,explainer) +S3method(accumulated_dependence,ceteris_paribus_explainer) +S3method(accumulated_dependence,default) +S3method(accumulated_dependence,explainer) S3method(ceteris_paribus,default) S3method(ceteris_paribus,explainer) -S3method(conditional_dependency,ceteris_paribus_explainer) -S3method(conditional_dependency,default) -S3method(conditional_dependency,explainer) +S3method(conditional_dependence,ceteris_paribus_explainer) +S3method(conditional_dependence,default) +S3method(conditional_dependence,explainer) S3method(describe,ceteris_paribus_explainer) S3method(describe,feature_importance_explainer) +S3method(describe,partial_dependence_explainer) S3method(describe,partial_dependency_explainer) S3method(feature_importance,default) S3method(feature_importance,explainer) -S3method(partial_dependency,ceteris_paribus_explainer) -S3method(partial_dependency,default) -S3method(partial_dependency,explainer) +S3method(partial_dependence,ceteris_paribus_explainer) +S3method(partial_dependence,default) +S3method(partial_dependence,explainer) S3method(plot,aggregated_profiles_explainer) S3method(plot,ceteris_paribus_2d_explainer) S3method(plot,ceteris_paribus_explainer) @@ -29,16 +30,19 @@ S3method(print,ceteris_paribus_explainer) S3method(print,feature_importance_explainer) S3method(select_neighbours,default) S3method(select_sample,default) +export(accumulated_dependence) export(accumulated_dependency) export(aggregate_profiles) export(calculate_oscillations) export(ceteris_paribus) export(ceteris_paribus_2d) export(cluster_profiles) +export(conditional_dependence) export(conditional_dependency) export(describe) export(feature_importance) export(local_dependency) +export(partial_dependence) export(partial_dependency) export(plotD3) export(select_neighbours) diff --git a/NEWS.md b/NEWS.md index 2fa1e82b..05f577fc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +ingredients 1.0 +--------------------------------------------------------------- +* change `dependency` to `dependence` [#103](https://github.com/ModelOriented/ingredients/issues/103) + ingredients 0.5.2 --------------------------------------------------------------- * `ceteris_paribus` profiles are now working for categorical variables diff --git a/R/accumulated_dependency.R b/R/accumulated_dependence.R similarity index 86% rename from R/accumulated_dependency.R rename to R/accumulated_dependence.R index 06bcbc0d..435a16cb 100644 --- a/R/accumulated_dependency.R +++ b/R/accumulated_dependence.R @@ -1,9 +1,9 @@ #' Accumulated Local Effects Profiles aka ALEPlots #' #' Accumulated Local Effects Profiles accumulate local changes in Ceteris Paribus Profiles. -#' Function \code{\link{accumulated_dependency}} calls \code{\link{ceteris_paribus}} and then \code{\link{aggregate_profiles}}. +#' Function \code{\link{accumulated_dependence}} calls \code{\link{ceteris_paribus}} and then \code{\link{aggregate_profiles}}. #' -#' Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependency Chapter}. +#' Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependence Chapter}. #' #' @param x an explainer created with function \code{DALEX::explain()}, an object of the class \code{ceteris_paribus_explainer} #' or a model to be explained. @@ -13,7 +13,7 @@ #' @param variables names of variables for which profiles shall be calculated. #' Will be passed to \code{\link{calculate_variable_split}}. #' If \code{NULL} then all variables from the validation data will be used. -#' @param N number of observations used for calculation of partial dependency profiles. +#' @param N number of observations used for calculation of partial dependence profiles. #' By default, 500 observations will be chosen randomly. #' @param ... other parameters #' @param variable_splits named list of splits for variables, in most cases created with \code{\link{calculate_variable_split}}. @@ -39,7 +39,7 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' adp_glm <- accumulated_dependency(explain_titanic_glm, +#' adp_glm <- accumulated_dependence(explain_titanic_glm, #' N = 150, variables = c("age", "fare")) #' head(adp_glm) #' plot(adp_glm) @@ -54,21 +54,21 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' adp_rf <- accumulated_dependency(explain_titanic_rf, N = 200, variable_type = "numerical") +#' adp_rf <- accumulated_dependence(explain_titanic_rf, N = 200, variable_type = "numerical") #' plot(adp_rf) #' -#' adp_rf <- accumulated_dependency(explain_titanic_rf, N = 200, variable_type = "categorical") +#' adp_rf <- accumulated_dependence(explain_titanic_rf, N = 200, variable_type = "categorical") #' plotD3(adp_rf, label_margin = 80, scale_plot = TRUE) #' } #' #' @export -#' @rdname accumulated_dependency -accumulated_dependency <- function(x, ...) - UseMethod("accumulated_dependency") +#' @rdname accumulated_dependence +accumulated_dependence <- function(x, ...) + UseMethod("accumulated_dependence") #' @export -#' @rdname accumulated_dependency -accumulated_dependency.explainer <- function(x, +#' @rdname accumulated_dependence +accumulated_dependence.explainer <- function(x, variables = NULL, N = 500, variable_splits = NULL, @@ -81,7 +81,7 @@ accumulated_dependency.explainer <- function(x, predict_function <- x$predict_function label <- x$label - accumulated_dependency.default(x = model, + accumulated_dependence.default(x = model, data = data, predict_function = predict_function, label = label, @@ -94,8 +94,8 @@ accumulated_dependency.explainer <- function(x, #' @export -#' @rdname accumulated_dependency -accumulated_dependency.default <- function(x, +#' @rdname accumulated_dependence +accumulated_dependence.default <- function(x, data, predict_function = predict, label = class(x)[1], @@ -127,10 +127,13 @@ accumulated_dependency.default <- function(x, #' @export -#' @rdname accumulated_dependency -accumulated_dependency.ceteris_paribus_explainer <- function(x, ..., +#' @rdname accumulated_dependence +accumulated_dependence.ceteris_paribus_explainer <- function(x, ..., variables = NULL) { aggregate_profiles(x, ..., type = "accumulated", variables = variables) } +#' @export +#' @rdname accumulated_dependence +accumulated_dependency <- accumulated_dependence diff --git a/R/aggregate_profiles.R b/R/aggregate_profiles.R index e8eb2baf..6a019af0 100644 --- a/R/aggregate_profiles.R +++ b/R/aggregate_profiles.R @@ -1,9 +1,9 @@ #' Aggregates Ceteris Paribus Profiles #' #' The function \code{aggregate_profiles()} calculates an aggregate of ceteris paribus profiles. -#' It can be: Partial Dependency Profile (average across Ceteris Paribus Profiles), -#' Conditional Dependency Profile (local weighted average across Ceteris Paribus Profiles) or -#' Accumulated Local Dependency Profile (cummulated average local changes in Ceteris Paribus Profiles). +#' It can be: Partial Dependence Profile (average across Ceteris Paribus Profiles), +#' Conditional Dependence Profile (local weighted average across Ceteris Paribus Profiles) or +#' Accumulated Local Dependence Profile (cummulated average local changes in Ceteris Paribus Profiles). #' #' @param x a ceteris paribus explainer produced with function \code{ceteris_paribus()} #' @param ... other explainers that shall be calculated together @@ -170,17 +170,17 @@ aggregate_profiles <- function(x, ..., if (type == "partial") { aggregated_profiles <- aggregated_profiles_partial(all_profiles, groups) class(aggregated_profiles) <- c("aggregated_profiles_explainer", - "partial_dependency_explainer", "data.frame") + "partial_dependence_explainer", "data.frame") } if (type == "conditional") { aggregated_profiles <- aggregated_profiles_conditional(all_profiles, groups, span = span) class(aggregated_profiles) <- c("aggregated_profiles_explainer", - "conditional_dependency_explainer", "data.frame") + "conditional_dependence_explainer", "data.frame") } if (type == "accumulated") { aggregated_profiles <- aggregated_profiles_accumulated(all_profiles, groups, span = span, center = center) class(aggregated_profiles) <- c("aggregated_profiles_explainer", - "accumulated_dependency_explainer", "data.frame") + "accumulated_dependence_explainer", "data.frame") } # calculate mean(all observation's _yhat_), mean of prediction diff --git a/R/conditional_dependency.R b/R/conditional_dependence.R similarity index 82% rename from R/conditional_dependency.R rename to R/conditional_dependence.R index 8edc1f38..7a4101ff 100644 --- a/R/conditional_dependency.R +++ b/R/conditional_dependence.R @@ -1,9 +1,9 @@ -#' Conditional Dependency Profiles +#' Conditional Dependence Profiles #' -#' Conditional Dependency Profiles (aka Local Profiles) average localy Ceteris Paribus Profiles. -#' Function 'conditional_dependency' calls 'ceteris_paribus' and then 'aggregate_profiles'. +#' Conditional Dependence Profiles (aka Local Profiles) average localy Ceteris Paribus Profiles. +#' Function 'conditional_dependence' calls 'ceteris_paribus' and then 'aggregate_profiles'. #' -#' Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependency Chapter}. +#' Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependence Chapter}. #' #' @param x an explainer created with function \code{DALEX::explain()}, an object of the class \code{ceteris_paribus_explainer} #' or a model to be explained. @@ -12,7 +12,7 @@ #' @param predict_function predict function, will be extracted from \code{x} if it's an explainer #' @param variables names of variables for which profiles shall be calculated. #' Will be passed to \code{\link{calculate_variable_split}}. If \code{NULL} then all variables from the validation data will be used. -#' @param N number of observations used for calculation of partial dependency profiles. By default 500. +#' @param N number of observations used for calculation of partial dependence profiles. By default 500. #' @param ... other parameters #' @param variable_splits named list of splits for variables, in most cases created with \code{\link{calculate_variable_split}}. #' If \code{NULL} then it will be calculated based on validation data avaliable in the \code{explainer}. @@ -36,7 +36,7 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' cdp_glm <- conditional_dependency(explain_titanic_glm, +#' cdp_glm <- conditional_dependence(explain_titanic_glm, #' N = 150, variables = c("age", "fare")) #' head(cdp_glm) #' plot(cdp_glm) @@ -51,21 +51,21 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' cdp_rf <- conditional_dependency(explain_titanic_rf, N = 200, variable_type = "numerical") +#' cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "numerical") #' plot(cdp_rf) #' -#' cdp_rf <- conditional_dependency(explain_titanic_rf, N = 200, variable_type = "categorical") +#' cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "categorical") #' plotD3(cdp_rf, label_margin = 80, scale_plot = TRUE) #' } #' #' @export -#' @rdname conditional_dependency -conditional_dependency <- function(x, ...) - UseMethod("conditional_dependency") +#' @rdname conditional_dependence +conditional_dependence <- function(x, ...) + UseMethod("conditional_dependence") #' @export -#' @rdname conditional_dependency -conditional_dependency.explainer <- function(x, +#' @rdname conditional_dependence +conditional_dependence.explainer <- function(x, variables = NULL, N = 500, variable_splits = NULL, @@ -78,7 +78,7 @@ conditional_dependency.explainer <- function(x, predict_function <- x$predict_function label <- x$label - conditional_dependency.default(x = model, + conditional_dependence.default(x = model, data = data, predict_function = predict_function, label = label, @@ -91,8 +91,8 @@ conditional_dependency.explainer <- function(x, #' @export -#' @rdname conditional_dependency -conditional_dependency.default <- function(x, +#' @rdname conditional_dependence +conditional_dependence.default <- function(x, data, predict_function = predict, label = class(x)[1], @@ -119,18 +119,22 @@ conditional_dependency.default <- function(x, variable_splits = variable_splits, label = label, ...) - conditional_dependency.ceteris_paribus_explainer(cp, variables = variables, variable_type = variable_type, ...) + conditional_dependence.ceteris_paribus_explainer(cp, variables = variables, variable_type = variable_type, ...) } #' @export -#' @rdname conditional_dependency -conditional_dependency.ceteris_paribus_explainer <- function(x, ..., +#' @rdname conditional_dependence +conditional_dependence.ceteris_paribus_explainer <- function(x, ..., variables = NULL) { aggregate_profiles(x, ..., type = "conditional", variables = variables) } #' @export -#' @rdname conditional_dependency -local_dependency <- conditional_dependency +#' @rdname conditional_dependence +local_dependency <- conditional_dependence + +#' @export +#' @rdname conditional_dependence +conditional_dependency <- conditional_dependence diff --git a/R/describe_aggregated_profiles.R b/R/describe_aggregated_profiles.R index 2514adad..3d266355 100644 --- a/R/describe_aggregated_profiles.R +++ b/R/describe_aggregated_profiles.R @@ -32,7 +32,7 @@ #' #' @export #' @rdname describe -describe.partial_dependency_explainer <- function(x, +describe.partial_dependence_explainer <- function(x, nonsignificance_treshold = 0.15, ..., display_values = FALSE, @@ -290,3 +290,6 @@ specify_df_aggregated <- function(x, variables, nonsignificance_treshold) { list("df" = df, "treshold" = treshold) } + +#' @export +describe.partial_dependency_explainer <- describe.partial_dependence_explainer diff --git a/R/partial_dependency.R b/R/partial_dependence.R similarity index 85% rename from R/partial_dependency.R rename to R/partial_dependence.R index ba9b5ae9..1ae0baf2 100644 --- a/R/partial_dependency.R +++ b/R/partial_dependence.R @@ -1,7 +1,7 @@ -#' Partial Dependency Profiles +#' Partial Dependence Profiles #' -#' Partial Dependency Profiles are averages from Ceteris Paribus Profiles. -#' Function \code{partial_dependency} calls \code{ceteris_paribus} and then \code{aggregate_profiles}. +#' Partial Dependence Profiles are averages from Ceteris Paribus Profiles. +#' Function \code{partial_dependence} calls \code{ceteris_paribus} and then \code{aggregate_profiles}. #' #' Find more detailes in the \href{https://pbiecek.github.io/ema/partialDependenceProfiles.html}{Partial Dependence Profiles Chapter}. #' @@ -13,7 +13,7 @@ #' @param variables names of variables for which profiles shall be calculated. #' Will be passed to \code{\link{calculate_variable_split}}. #' If \code{NULL} then all variables from the validation data will be used. -#' @param N number of observations used for calculation of partial dependency profiles. By default 500. +#' @param N number of observations used for calculation of partial dependence profiles. By default 500. #' @param ... other parameters #' @param variable_splits named list of splits for variables, in most cases created with \code{\link{calculate_variable_split}}. #' If \code{NULL} then it will be calculated based on validation data avaliable in the \code{explainer}. @@ -37,7 +37,7 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' pdp_glm <- partial_dependency(explain_titanic_glm, +#' pdp_glm <- partial_dependence(explain_titanic_glm, #' N = 50, variables = c("age", "fare")) #' head(pdp_glm) #' plot(pdp_glm) @@ -52,21 +52,21 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' pdp_rf <- partial_dependency(explain_titanic_rf, variable_type = "numerical") +#' pdp_rf <- partial_dependence(explain_titanic_rf, variable_type = "numerical") #' plot(pdp_rf) #' -#' pdp_rf <- partial_dependency(explain_titanic_rf, variable_type = "categorical") +#' pdp_rf <- partial_dependence(explain_titanic_rf, variable_type = "categorical") #' plotD3(pdp_rf, label_margin = 80, scale_plot = TRUE) #' } #' #' @export -#' @rdname partial_dependency -partial_dependency <- function(x, ...) - UseMethod("partial_dependency") +#' @rdname partial_dependence +partial_dependence <- function(x, ...) + UseMethod("partial_dependence") #' @export -#' @rdname partial_dependency -partial_dependency.explainer <- function(x, +#' @rdname partial_dependence +partial_dependence.explainer <- function(x, variables = NULL, N = 500, variable_splits = NULL, @@ -79,7 +79,7 @@ partial_dependency.explainer <- function(x, predict_function <- x$predict_function label <- x$label - partial_dependency.default(x = model, + partial_dependence.default(x = model, data = data, predict_function = predict_function, label = label, @@ -93,8 +93,8 @@ partial_dependency.explainer <- function(x, #' @export -#' @rdname partial_dependency -partial_dependency.default <- function(x, +#' @rdname partial_dependence +partial_dependence.default <- function(x, data, predict_function = predict, label = class(x)[1], @@ -127,9 +127,12 @@ partial_dependency.default <- function(x, #' @export -#' @rdname partial_dependency -partial_dependency.ceteris_paribus_explainer <- function(x, ..., +#' @rdname partial_dependence +partial_dependence.ceteris_paribus_explainer <- function(x, ..., variables = NULL) { aggregate_profiles(x, ..., type = "partial", variables = variables) } +#' @export +#' @rdname partial_dependence +partial_dependency <- partial_dependence diff --git a/R/plotD3_aggregated_profiles.R b/R/plotD3_aggregated_profiles.R index 71a1600c..a38ec525 100644 --- a/R/plotD3_aggregated_profiles.R +++ b/R/plotD3_aggregated_profiles.R @@ -121,7 +121,7 @@ plotD3.aggregated_profiles_explainer <- function(x, ..., size = 2, alpha = 1, }) - ymean <- ifelse("partial_dependency_explainer" %in% class(x), round(attr(x, "mean_prediction"), 3), 0) + ymean <- ifelse("partial_dependence_explainer" %in% class(x), round(attr(x, "mean_prediction"), 3), 0) } options <- list(variableNames = as.list(all_variables), diff --git a/R/plot_aggregated_profiles.R b/R/plot_aggregated_profiles.R index 172a4820..1ac9a09c 100644 --- a/R/plot_aggregated_profiles.R +++ b/R/plot_aggregated_profiles.R @@ -1,6 +1,6 @@ #' Plots Aggregated Profiles #' -#' Function \code{plot.aggregated_profiles_explainer} plots partial dependency plot or accumulated effect plot. +#' Function \code{plot.aggregated_profiles_explainer} plots partial dependence plot or accumulated effect plot. #' It works in a similar way to \code{plot.ceteris_paribus}, but instead of individual profiles #' show average profiles for each variable listed in the \code{variables} vector. #' @@ -27,11 +27,11 @@ #' y = titanic_imputed[,8], #' verbose = FALSE) #' -#' pdp_rf_p <- partial_dependency(explain_titanic_glm, N = 50) +#' pdp_rf_p <- partial_dependence(explain_titanic_glm, N = 50) #' pdp_rf_p$`_label_` <- "RF_partial" -#' pdp_rf_l <- conditional_dependency(explain_titanic_glm, N = 50) +#' pdp_rf_l <- conditional_dependence(explain_titanic_glm, N = 50) #' pdp_rf_l$`_label_` <- "RF_local" -#' pdp_rf_a<- accumulated_dependency(explain_titanic_glm, N = 50) +#' pdp_rf_a<- accumulated_dependence(explain_titanic_glm, N = 50) #' pdp_rf_a$`_label_` <- "RF_accumulated" #' head(pdp_rf_p) #' plot(pdp_rf_p, pdp_rf_l, pdp_rf_a, color = "_label_") diff --git a/README.md b/README.md index 70f65bca..277920ca 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,9 @@ Key functions: * `feature_importance()` for assessment of global level feature importance, * `ceteris_paribus()` for calculation of the Ceteris Paribus / What-If Profiles (read more at https://pbiecek.github.io/ema/ceterisParibus.html), -* `partial_dependency()` for Partial Dependency Plots, -* `conditional_dependency()` for Conditional Dependency Plots also called M Plots, -* `accumulated_dependency()` for Accumulated Local Effects Plots, +* `partial_dependence()` for Partial Dependence Plots, +* `conditional_dependence()` for Conditional Dependence Plots also called M Plots, +* `accumulated_dependence()` for Accumulated Local Effects Plots, * `aggregate_profiles()` and `cluster_profiles()` for aggregation of Ceteris Paribus Profiles, * `calculate_oscillations()` for calculation of the Ceteris Paribus Oscillations (read more at https://pbiecek.github.io/ema/ceterisParibusOscillations.html), * `ceteris_paribus_2d()` for Ceteris Paribus 2D Profiles (read more at https://pbiecek.github.io/ema/ceterisParibus2d.html), diff --git a/docs/404.html b/docs/404.html index 6f5a07b0..834499a8 100644 --- a/docs/404.html +++ b/docs/404.html @@ -36,12 +36,12 @@ + - @@ -80,7 +80,7 @@
part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
diff --git a/docs/articles/index.html b/docs/articles/index.html index 374c8477..f4d928e9 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -36,12 +36,12 @@ + - @@ -80,7 +80,7 @@
part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
diff --git a/docs/articles/vignette_describe.html b/docs/articles/vignette_describe.html index 0c75cf30..84e010b4 100644 --- a/docs/articles/vignette_describe.html +++ b/docs/articles/vignette_describe.html @@ -42,7 +42,7 @@
part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
@@ -100,7 +100,7 @@

Explanations in natural language

Adam Izdebski

-

2020-01-26

+

2020-02-17

Source: vignettes/vignette_describe.Rmd @@ -120,129 +120,128 @@

The ingredients package allows for generating prediction validation and predition perturbation explanations. They allow for both global and local model explanation.

Generic function decribe() generates a natural language description for explanations generated with feature_importance(), ceteris_paribus() functions.

To show generating automatic descriptions we first load the data set and build a random forest model classifying, which of the passangers survived sinking of the titanic. Then, using DALEX package, we generate an explainer of the model. Lastly we select a random passanger, which prediction’s should be explained.

-
library("DALEX")
-library("ingredients")
-library("randomForest")
-titanic <- na.omit(titanic)
-
-model_titanic_rf <- randomForest(survived == "yes" ~ .,
-                                 data = titanic)
-
-explain_titanic_rf <- explain(model_titanic_rf,
-                            data = titanic[,-9],
-                            y = titanic$survived == "yes",
-                            label = "Random Forest")
+
library("DALEX")
+library("ingredients")
+library("randomForest")
+titanic <- na.omit(titanic)
+
+model_titanic_rf <- randomForest(survived == "yes" ~ .,
+                                 data = titanic)
+
+explain_titanic_rf <- explain(model_titanic_rf,
+                            data = titanic[,-9],
+                            y = titanic$survived == "yes",
+                            label = "Random Forest")
#> Preparation of a new explainer is initiated
 #>   -> model label       :  Random Forest 
 #>   -> data              :  2099  rows  8  cols 
 #>   -> target variable   :  2099  values 
 #>   -> model_info        :  package randomForest , ver. 4.6.14 , task regression ( [33m default [39m ) 
 #>   -> predict function  :  yhat.randomForest  will be used ( [33m default [39m )
-#>   -> predicted values  :  numerical, min =  0.007074326 , mean =  0.323572 , max =  0.9909881  
+#>   -> predicted values  :  numerical, min =  0.007831174 , mean =  0.3243102 , max =  0.9920739  
 #>   -> residual function :  difference between y and yhat ( [33m default [39m )
-#>   -> residuals         :  numerical, min =  -0.8119382 , mean =  0.0008682545 , max =  0.8993734  
+#>   -> residuals         :  numerical, min =  -0.8199771 , mean =  0.0001300024 , max =  0.9015269  
 #>  [32m A new explainer has been created! [39m
-
passanger <- titanic[sample(nrow(titanic), 1) ,-9]
-passanger
-
#>     gender age class  embarked country   fare sibsp parch
-#> 959 female  22   1st Cherbourg   Spain 108.18     1     0
+
passanger <- titanic[sample(nrow(titanic), 1) ,-9]
+passanger
+
#>      gender age class    embarked       country  fare sibsp parch
+#> 1284   male  50   1st Southampton United States 211.1     1     1

Now we are ready for generating various explantions and then describing it with describe() function.

Feature Importance

Feature importance explanation shows the importance of all the model’s variables. As it is a global explanation technique, no passanger need to be specified.

-
importance_rf <- feature_importance(explain_titanic_rf)
-plot(importance_rf)
+
importance_rf <- feature_importance(explain_titanic_rf)
+plot(importance_rf)

Function describe() easily describes which variables are the most important. Argument nonsignificance_treshold as always sets the level above which variables become significant. For higher treshold, less variables will be described as significant.

-
describe(importance_rf)
-
#> The number of important variables for Random Forest's prediction is 65 out of 108. 
-#>  Variables _baseline_, _baseline_, _baseline_ have the highest importantance.
+
describe(importance_rf)
+
#> The number of important variables for Random Forest's prediction is 5 out of 8. 
+#>  Variables gender, class, age have the highest importantance.

Ceteris Paribus Profiles

Ceteris Paribus profiles shows how the model’s input changes with the change of a specified variable.

- +

For a user with no experience, interpreting the above plot may be not straightforward. Thus we generate a natural language description in order to make it easier.

-
describe(cp_rf)
-
#> For the selected instance, prediction estimated by Random Forest is equal to 0.95.
+
describe(cp_rf)
+
#> For the selected instance, prediction estimated by Random Forest is equal to 0.239.
 #> 
-#> Model's prediction would decrease substantially if the value of class variable would change to "engineering crew", "3rd", "restaurant staff", "victualling crew", "deck crew".
-#> The largest change would be marked if class variable would change to "engineering crew".
+#> Model's prediction would increase substantially if the value of class variable would change to "deck crew". On the other hand, Random Forest's prediction would decrease substantially if the value of class variable would change to "2nd", "restaurant staff", "3rd", "engineering crew", "victualling crew". The largest change would be marked if class variable would change to "2nd".
 #> 
-#> Other variables are with less importance and they do not change prediction by more than 0.04%.
+#> All the variables were displayed.

Natural lannguage descriptions should be flexible in order to provide the desired level of complexity and specificity. Thus various parameters can modify the description being generated.

- -
#> Random Forest predicts that for the selected instance, the probability that the passanger will survive is equal to 0.95
+
+
#> Random Forest predicts that for the selected instance, the probability that the passanger will survive is equal to 0.239
 #> 
-#> The most important change in Random Forest's prediction would occur for class = "engineering crew". It decreases the prediction by 0.277. 
-#> The second most important change in the prediction would occur for class = "3rd". It decreases the prediction by 0.276. 
-#> The third most important change in the prediction would occur for class = "restaurant staff". It decreases the prediction by 0.215.
+#> The most important change in Random Forest's prediction would occur for class = "2nd". It decreases the prediction by 0.057. 
+#> The second most important change in the prediction would occur for class = "restaurant staff". It decreases the prediction by 0.055. 
+#> The third most important change in the prediction would occur for class = "3rd". It decreases the prediction by 0.053.
 #> 
-#> Other variable values are with less importance. They do not change the the probability that the passanger will survive by more than 0.2.
+#> Other variable values are with less importance. They do not change the the probability that the passanger will survive by more than 0.042.

Please note, that describe() can handle only one variable at a time, so it is recommended to specify, which variables should be described.

- -
#> Random Forest predicts that for the selected instance, the probability that the passanger will survive is equal to 0.95
+
+
#> Random Forest predicts that for the selected instance, the probability that the passanger will survive is equal to 0.239
 #> 
-#> The most important change in Random Forest's prediction would occur for class = "engineering crew". It decreases the prediction by 0.277. 
-#> The second most important change in the prediction would occur for class = "3rd". It decreases the prediction by 0.276. 
-#> The third most important change in the prediction would occur for class = "restaurant staff". It decreases the prediction by 0.215.
+#> The most important change in Random Forest's prediction would occur for class = "2nd". It decreases the prediction by 0.057. 
+#> The second most important change in the prediction would occur for class = "restaurant staff". It decreases the prediction by 0.055. 
+#> The third most important change in the prediction would occur for class = "3rd". It decreases the prediction by 0.053.
 #> 
-#> Other variable values are with less importance. They do not change the the probability that the passanger will survive by more than 0.2.
+#> Other variable values are with less importance. They do not change the the probability that the passanger will survive by more than 0.042.

Continuous variables are described as well.

-
perturbed_variable_continuous <- "age"
-cp_rf <- ceteris_paribus(explain_titanic_rf,
-                         passanger)
-plot(cp_rf, variables = perturbed_variable_continuous)
+
perturbed_variable_continuous <- "age"
+cp_rf <- ceteris_paribus(explain_titanic_rf,
+                         passanger)
+plot(cp_rf, variables = perturbed_variable_continuous)

-
describe(cp_rf, variables = perturbed_variable_continuous)
-
#> Random Forest predicts that for the selected instance, prediction is equal to 0.95
+
describe(cp_rf, variables = perturbed_variable_continuous)
+
#> Random Forest predicts that for the selected instance, prediction is equal to 0.239
 #> 
-#> The highest prediction occurs for (age = 39), while the lowest for (age = 74).
-#> Breakpoint is identified at (age = 60).
+#> The highest prediction occurs for (age = 2), while the lowest for (age = 74).
+#> Breakpoint is identified at (age = 13).
 #> 
-#> Average model responses are *lower* for variable values *higher* than breakpoint (= 60).
-

Ceteris Paribus profiles are described only for a single observation. If we want to access the influence of more than one observation, we need to describe dependency profiles.

+#> Average model responses are *higher* for variable values *lower* than breakpoint (= 13).
+

Ceteris Paribus profiles are described only for a single observation. If we want to access the influence of more than one observation, we need to describe dependence profiles.

-
+

-Partial Dependency Profiles

-
pdp <- aggregate_profiles(cp_rf, type = "partial")
-plot(pdp, variables = "fare")
+Partial Dependence Profiles

+
pdp <- aggregate_profiles(cp_rf, type = "partial")
+plot(pdp, variables = "fare")

-
describe(pdp, variables = "fare")
-
#> Random Forest's mean prediction is equal to 0.95.
+
describe(pdp, variables = "fare")
+
#> Random Forest's mean prediction is equal to 0.239.
 #> 
-#> The highest prediction occurs for (fare = 80), while the lowest for (fare = 0).
+#> The highest prediction occurs for (fare = 55.081), while the lowest for (fare = 221.669696).
 #> Breakpoint is identified at (fare = 80).
 #> 
-#> Average model responses are *higher* for variable values *higher* than breakpoint (= 80).
-
pdp <- aggregate_profiles(cp_rf, type = "partial", variable_type = "categorical")
-plot(pdp, variables = perturbed_variable)
+#> Average model responses are *lower* for variable values *higher* than breakpoint (= 80).
+
pdp <- aggregate_profiles(cp_rf, type = "partial", variable_type = "categorical")
+plot(pdp, variables = perturbed_variable)

-
describe(pdp, variables = perturbed_variable)
-
#> Random Forest's mean prediction is equal to 0.95.
+
describe(pdp, variables = perturbed_variable)
+
#> Random Forest's mean prediction is equal to 0.239.
 #> 
-#> Model's prediction would increase substantially if the value of class variable would change to "3rd". On the other hand, Random Forest's prediction would decrease substantially if the value of class variable would change to "engineering crew". The largest change would be marked if class variable would change to "2nd".
+#> Model's prediction would increase substantially if the value of class variable would change to "2nd".
+#> The largest change would be marked if class variable would change to "1st".
 #> 
-#> Other variables are with less importance and they do not change prediction by more than 0.04%.
+#> Other variables are with less importance and they do not change prediction by more than 0.01%.
@@ -100,7 +100,7 @@

Simulated data, real problem

Przemyslaw Biecek

-

2020-01-26

+

2020-02-17

Source: vignettes/vignette_simulated.Rmd @@ -117,26 +117,26 @@

y = x_1 * x_2 + x_2 \]

But \(x_1\) and \(x_2\) are correlated. How XAI methods work for such model?

-
# predict function for the model
-the_model_predict <- function(m, x) {
- x$x1 * x$x2 + x$x2
-}
-
-# correlated variables 
-N <- 50
-set.seed(1)
-x1 <- runif(N, -5, 5)
-x2 <- x1 + runif(N)/100
-df <- data.frame(x1, x2)
+
# predict function for the model
+the_model_predict <- function(m, x) {
+ x$x1 * x$x2 + x$x2
+}
+
+# correlated variables 
+N <- 50
+set.seed(1)
+x1 <- runif(N, -5, 5)
+x2 <- x1 + runif(N)/100
+df <- data.frame(x1, x2)

Explainer for the models

In fact this model is defined by the predict function the_model_predict. So it does not matter what is in the first argument of the explain function.

- +
#> Preparation of a new explainer is initiated
 #>   -> model label       :  numeric  ( [33m default [39m )
 #>   -> data              :  50  rows  2  cols 
@@ -151,109 +151,109 @@ 

Ceteris paribus

Use the ceteris_paribus() function to see Ceteris Paribus profiles. Clearly it’s not an additive model, as the effect of \(x_1\) depends on \(x_2\).

-
library("ingredients")
-library("ggplot2")
-
-sample_rows <- data.frame(x1 = -5:5,
-                          x2 = -5:5)
-
-cp_model <- ceteris_paribus(explain_the_model, sample_rows)
-plot(cp_model) +
-  show_observations(cp_model) +
-  ggtitle("Ceteris Paribus profiles")
+
library("ingredients")
+library("ggplot2")
+
+sample_rows <- data.frame(x1 = -5:5,
+                          x2 = -5:5)
+
+cp_model <- ceteris_paribus(explain_the_model, sample_rows)
+plot(cp_model) +
+  show_observations(cp_model) +
+  ggtitle("Ceteris Paribus profiles")

-
+

-Dependency profiles

-

Lets try Partial Dependency profiles, Conditional Dependency profiles and Accumulated Local profiles. For the last two we can try different smoothing factors

-
pd_model <- partial_dependency(explain_the_model, variables = c("x1", "x2"))
-pd_model$`_label_` = "PDP"
-
-cd_model <- conditional_dependency(explain_the_model, variables = c("x1", "x2"))
-cd_model$`_label_` = "CDP 0.25"
-
-ad_model <- accumulated_dependency(explain_the_model, variables = c("x1", "x2"))
-ad_model$`_label_` = "ALE 0.25"
-
-plot(ad_model, cd_model, pd_model) +
-  ggtitle("Feature effects - PDP, CDP, ALE")
+Dependence profiles

+

Lets try Partial Dependence profiles, Conditional Dependence profiles and Accumulated Local profiles. For the last two we can try different smoothing factors

+
pd_model <- partial_dependence(explain_the_model, variables = c("x1", "x2"))
+pd_model$`_label_` = "PDP"
+
+cd_model <- conditional_dependence(explain_the_model, variables = c("x1", "x2"))
+cd_model$`_label_` = "CDP 0.25"
+
+ad_model <- accumulated_dependence(explain_the_model, variables = c("x1", "x2"))
+ad_model$`_label_` = "ALE 0.25"
+
+plot(ad_model, cd_model, pd_model) +
+  ggtitle("Feature effects - PDP, CDP, ALE")

-
cd_model_1 <- conditional_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.1)
-cd_model_1$`_label_` = "CDP 0.1"
-
-cd_model_5 <- conditional_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.5)
-cd_model_5$`_label_` = "CDP 0.5"
-
-ad_model_1 <- accumulated_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.5)
-ad_model_1$`_label_` = "ALE 0.1"
-
-ad_model_5 <- accumulated_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.5)
-ad_model_5$`_label_` = "ALE 0.5"
-
-plot(ad_model, cd_model, pd_model, cd_model_1, cd_model_5, ad_model_1, ad_model_5) +
-  ggtitle("Feature effects - PDP, CDP, ALE")
+
cd_model_1 <- conditional_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.1)
+cd_model_1$`_label_` = "CDP 0.1"
+
+cd_model_5 <- conditional_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.5)
+cd_model_5$`_label_` = "CDP 0.5"
+
+ad_model_1 <- accumulated_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.5)
+ad_model_1$`_label_` = "ALE 0.1"
+
+ad_model_5 <- accumulated_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.5)
+ad_model_5$`_label_` = "ALE 0.5"
+
+plot(ad_model, cd_model, pd_model, cd_model_1, cd_model_5, ad_model_1, ad_model_5) +
+  ggtitle("Feature effects - PDP, CDP, ALE")

-
+

-Dependency profiles in groups

+Dependence profiles in groups

And now, let’s see how the grouping factor works

- +

-
# ALE in groups
-ad_model_groups <- accumulated_dependency(explain_the_model, 
-                                      variables = c("x1", "x2"), 
-                                      groups = "x3")
-plot(ad_model_groups) +
-  ggtitle("Accumulated Local")
+

-
# CDP in groups
-cd_model_groups <- conditional_dependency(explain_the_model, 
-                                      variables = c("x1", "x2"), 
-                                      groups = "x3")
-plot(cd_model_groups) +
-  ggtitle("Conditional Dependency")
+

Session info

- -
#> R version 3.6.1 (2019-07-05)
-#> Platform: x86_64-apple-darwin15.6.0 (64-bit)
-#> Running under: macOS Mojave 10.14.4
+
+
#> R version 3.6.0 (2019-04-26)
+#> Platform: x86_64-w64-mingw32/x64 (64-bit)
+#> Running under: Windows 10 x64 (build 17763)
 #> 
 #> Matrix products: default
-#> BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
-#> LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
 #> 
 #> locale:
-#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+#> [1] LC_COLLATE=Polish_Poland.1250  LC_CTYPE=Polish_Poland.1250   
+#> [3] LC_MONETARY=Polish_Poland.1250 LC_NUMERIC=C                  
+#> [5] LC_TIME=Polish_Poland.1250    
 #> 
 #> attached base packages:
 #> [1] stats     graphics  grDevices utils     datasets  methods   base     
 #> 
 #> other attached packages:
-#> [1] ggplot2_3.2.1     ingredients_0.5.2 DALEX_0.9.3      
+#> [1] ggplot2_3.2.1   ingredients_1.0 DALEX_0.9.4    
 #> 
 #> loaded via a namespace (and not attached):
-#>  [1] Rcpp_1.0.3       compiler_3.6.1   pillar_1.4.3     tools_3.6.1     
-#>  [5] digest_0.6.23    evaluate_0.14    memoise_1.1.0    lifecycle_0.1.0 
-#>  [9] tibble_2.1.3     gtable_0.3.0     pkgconfig_2.0.3  rlang_0.4.2     
-#> [13] rstudioapi_0.10  yaml_2.2.0       pkgdown_1.4.1    xfun_0.11       
-#> [17] withr_2.1.2      stringr_1.4.0    dplyr_0.8.3      knitr_1.26      
-#> [21] desc_1.2.0       fs_1.3.1         rprojroot_1.3-2  grid_3.6.1      
+#>  [1] Rcpp_1.0.3       compiler_3.6.0   pillar_1.4.3     tools_3.6.0     
+#>  [5] digest_0.6.24    evaluate_0.14    memoise_1.1.0    lifecycle_0.1.0 
+#>  [9] tibble_2.1.3     gtable_0.3.0     pkgconfig_2.0.3  rlang_0.4.4     
+#> [13] rstudioapi_0.10  yaml_2.2.0       pkgdown_1.4.1    xfun_0.6        
+#> [17] withr_2.1.2      stringr_1.4.0    dplyr_0.8.3      knitr_1.22      
+#> [21] desc_1.2.0       fs_1.3.1         rprojroot_1.3-2  grid_3.6.0      
 #> [25] tidyselect_0.2.5 glue_1.3.1       R6_2.4.1         rmarkdown_1.16  
 #> [29] farver_2.0.3     purrr_0.3.3      magrittr_1.5     backports_1.1.5 
 #> [33] scales_1.1.0     htmltools_0.4.0  MASS_7.3-51.4    assertthat_0.2.1
@@ -263,7 +263,6 @@ 

@@ -100,7 +100,7 @@

General introduction: Survival on the RMS Titanic

Przemyslaw Biecek

-

2020-01-26

+

2020-02-17

Source: vignettes/vignette_titanic.Rmd @@ -113,8 +113,8 @@

2020-01-26

Data for Titanic survival

Let’s see an example for DALEX package for classification models for the survival problem for Titanic dataset. Here we are using a dataset titanic avaliable in the DALEX package. Note that this data was copied from the stablelearner package.

-
library("DALEX")
-head(titanic)
+
library("DALEX")
+head(titanic)
#>   gender age class    embarked       country  fare sibsp parch survived
 #> 1   male  42   3rd Southampton United States  7.11     0     0       no
 #> 2   male  13   3rd Southampton United States 20.05     0     2       no
@@ -127,12 +127,12 @@ 

Model for Titanic survival

Ok, now it’s time to create a model. Let’s use the Random Forest model.

-
# prepare model
-library("randomForest")
-titanic <- na.omit(titanic)
-model_titanic_rf <- randomForest(survived == "yes" ~ gender + age + class + embarked +
-                                   fare + sibsp + parch,  data = titanic)
-model_titanic_rf
+
#> 
 #> Call:
 #>  randomForest(formula = survived == "yes" ~ gender + age + class +      embarked + fare + sibsp + parch, data = titanic) 
@@ -140,84 +140,84 @@ 

#> Number of trees: 500 #> No. of variables tried at each split: 2 #> -#> Mean of squared residuals: 0.1429994 -#> % Var explained: 34.76

+#> Mean of squared residuals: 0.1428618 +#> % Var explained: 34.82

Explainer for Titanic survival

The third step (it’s optional but useful) is to create a DALEX explainer for random forest model.

- +
#> Preparation of a new explainer is initiated
 #>   -> model label       :  Random Forest v7 
 #>   -> data              :  2099  rows  8  cols 
 #>   -> target variable   :  2099  values 
 #>   -> model_info        :  package randomForest , ver. 4.6.14 , task regression ( [33m default [39m ) 
 #>   -> predict function  :  yhat.randomForest  will be used ( [33m default [39m )
-#>   -> predicted values  :  numerical, min =  0.009726169 , mean =  0.3249803 , max =  0.9917946  
+#>   -> predicted values  :  numerical, min =  0.0133887 , mean =  0.3248256 , max =  0.9924437  
 #>   -> residual function :  difference between y and yhat ( [33m default [39m )
-#>   -> residuals         :  numerical, min =  -0.8000352 , mean =  -0.0005401156 , max =  0.8904472  
+#>   -> residuals         :  numerical, min =  -0.7953965 , mean =  -0.0003854325 , max =  0.9009842  
 #>  [32m A new explainer has been created! [39m

Model Level Feature Importance

Use the feature_importance() explainer to present importance of particular features. Note that type = "difference" normalizes dropouts, and now they all start in 0.

-
library("ingredients")
-
-fi_rf <- feature_importance(explain_titanic_rf)
-head(fi_rf)
+
library("ingredients")
+
+fi_rf <- feature_importance(explain_titanic_rf)
+head(fi_rf)
#>       variable mean_dropout_loss            label
-#> 1 _full_model_         0.3337988 Random Forest v7
-#> 2      country         0.3337988 Random Forest v7
-#> 3        parch         0.3446496 Random Forest v7
-#> 4        sibsp         0.3461183 Random Forest v7
-#> 5     embarked         0.3505868 Random Forest v7
-#> 6         fare         0.3742650 Random Forest v7
-
plot(fi_rf)
+#> 1 _full_model_ 0.3329421 Random Forest v7 +#> 2 country 0.3329421 Random Forest v7 +#> 3 parch 0.3435576 Random Forest v7 +#> 4 sibsp 0.3447126 Random Forest v7 +#> 5 embarked 0.3498490 Random Forest v7 +#> 6 fare 0.3729980 Random Forest v7
+
plot(fi_rf)

Feature effects

As we see the most important feature is gender. Next three importnat features are class, age and fare. Let’s see the link between model response and these features.

-

Such univariate relation can be calculated with partial_dependency().

+

Such univariate relation can be calculated with partial_dependence().

age

Kids 5 years old and younger have much higher survival probability.

-
+

-Partial Dependency Profiles

-
pp_age  <- partial_dependency(explain_titanic_rf, variables =  c("age", "fare"))
-head(pp_age)
+Partial Dependence Profiles +
pp_age  <- partial_dependence(explain_titanic_rf, variables =  c("age", "fare"))
+head(pp_age)
#> Top profiles    : 
 #>   _vname_          _label_       _x_    _yhat_ _ids_
-#> 1    fare Random Forest v7 0.0000000 0.3491995     0
-#> 2     age Random Forest v7 0.1666667 0.5393965     0
-#> 3     age Random Forest v7 2.0000000 0.5598879     0
-#> 4     age Random Forest v7 4.0000000 0.5647907     0
-#> 5    fare Random Forest v7 6.1904000 0.3325981     0
-#> 6     age Random Forest v7 7.0000000 0.5278368     0
-
plot(pp_age)
+#> 1 fare Random Forest v7 0.0000000 0.3159050 0 +#> 2 age Random Forest v7 0.1666667 0.5384993 0 +#> 3 age Random Forest v7 2.0000000 0.5608622 0 +#> 4 age Random Forest v7 4.0000000 0.5678473 0 +#> 5 fare Random Forest v7 6.1904000 0.3005658 0 +#> 6 age Random Forest v7 7.0000000 0.5320725 0 +
plot(pp_age)

-
+

-Conditional Dependency Profiles

-
cp_age  <- conditional_dependency(explain_titanic_rf, variables =  c("age", "fare"))
-plot(cp_age)
+Conditional Dependence Profiles +
cp_age  <- conditional_dependence(explain_titanic_rf, variables =  c("age", "fare"))
+plot(cp_age)

Accumulated Local Effect Profiles

-
ap_age  <- accumulated_dependency(explain_titanic_rf, variables =  c("age", "fare"))
-plot(ap_age)
+
ap_age  <- accumulated_dependence(explain_titanic_rf, variables =  c("age", "fare"))
+plot(ap_age)

@@ -227,75 +227,75 @@

Instance level explanations

Let’s see break down explanation for model predictions for 8 years old male from 1st class that embarked from port C.

First Ceteris Paribus Profiles for numerical variables

-
new_passanger <- data.frame(
-  class = factor("1st", levels = c("1st", "2nd", "3rd", "deck crew", "engineering crew", "restaurant staff", "victualling crew")),
-  gender = factor("male", levels = c("female", "male")),
-  age = 8,
-  sibsp = 0,
-  parch = 0,
-  fare = 72,
-  embarked = factor("Southampton", levels = c("Belfast", "Cherbourg", "Queenstown", "Southampton"))
-)
-
-sp_rf <- ceteris_paribus(explain_titanic_rf, new_passanger)
-plot(sp_rf) +
-  show_observations(sp_rf)
+
new_passanger <- data.frame(
+  class = factor("1st", levels = c("1st", "2nd", "3rd", "deck crew", "engineering crew", "restaurant staff", "victualling crew")),
+  gender = factor("male", levels = c("female", "male")),
+  age = 8,
+  sibsp = 0,
+  parch = 0,
+  fare = 72,
+  embarked = factor("Southampton", levels = c("Belfast", "Cherbourg", "Queenstown", "Southampton"))
+)
+
+sp_rf <- ceteris_paribus(explain_titanic_rf, new_passanger)
+plot(sp_rf) +
+  show_observations(sp_rf)

And for selected categorical variables. Note, that sibsp is numerical but here is presented as a categorical variable.

-
plot(sp_rf,
-     variables = c("class", "embarked", "gender", "sibsp"),
-     variable_type = "categorical")
+
plot(sp_rf,
+     variables = c("class", "embarked", "gender", "sibsp"),
+     variable_type = "categorical")

It looks like the most important feature for this passenger is age and sex. After all his odds for survival are higher than for the average passenger. Mainly because of the young age and despite of being a male.

Profile clustering

-
passangers <- select_sample(titanic, n = 100)
-
-sp_rf <- ceteris_paribus(explain_titanic_rf, passangers)
-clust_rf <- cluster_profiles(sp_rf, k = 3)
-head(clust_rf)
+
passangers <- select_sample(titanic, n = 100)
+
+sp_rf <- ceteris_paribus(explain_titanic_rf, passangers)
+clust_rf <- cluster_profiles(sp_rf, k = 3)
+head(clust_rf)
#> Top profiles    : 
 #>   _vname_            _label_       _x_ _cluster_    _yhat_ _ids_
-#> 1    fare Random Forest v7_1 0.0000000         1 0.1989530     0
-#> 2   sibsp Random Forest v7_1 0.0000000         1 0.1715849     0
-#> 3   parch Random Forest v7_1 0.0000000         1 0.1739948     0
-#> 4     age Random Forest v7_1 0.1666667         1 0.4734090     0
-#> 5   parch Random Forest v7_1 0.2800000         1 0.1739649     0
-#> 6   sibsp Random Forest v7_1 1.0000000         1 0.1681367     0
-
plot(sp_rf, alpha = 0.1) +
-  show_aggregated_profiles(clust_rf, color = "_label_", size = 2)
+#> 1 fare Random Forest v7_1 0.0000000 1 0.1957328 0 +#> 2 sibsp Random Forest v7_1 0.0000000 1 0.1707538 0 +#> 3 parch Random Forest v7_1 0.0000000 1 0.1744207 0 +#> 4 age Random Forest v7_1 0.1666667 1 0.4916774 0 +#> 5 parch Random Forest v7_1 0.2800000 1 0.1744207 0 +#> 6 sibsp Random Forest v7_1 1.0000000 1 0.1646322 0 +
plot(sp_rf, alpha = 0.1) +
+  show_aggregated_profiles(clust_rf, color = "_label_", size = 2)

Session info

- -
#> R version 3.6.1 (2019-07-05)
-#> Platform: x86_64-apple-darwin15.6.0 (64-bit)
-#> Running under: macOS Mojave 10.14.4
+
+
#> R version 3.6.0 (2019-04-26)
+#> Platform: x86_64-w64-mingw32/x64 (64-bit)
+#> Running under: Windows 10 x64 (build 17763)
 #> 
 #> Matrix products: default
-#> BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
-#> LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
 #> 
 #> locale:
-#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+#> [1] LC_COLLATE=Polish_Poland.1250  LC_CTYPE=Polish_Poland.1250   
+#> [3] LC_MONETARY=Polish_Poland.1250 LC_NUMERIC=C                  
+#> [5] LC_TIME=Polish_Poland.1250    
 #> 
 #> attached base packages:
 #> [1] stats     graphics  grDevices utils     datasets  methods   base     
 #> 
 #> other attached packages:
-#> [1] ingredients_0.5.2   randomForest_4.6-14 DALEX_0.9.3        
+#> [1] ingredients_1.0     randomForest_4.6-14 DALEX_0.9.4        
 #> 
 #> loaded via a namespace (and not attached):
-#>  [1] Rcpp_1.0.3       compiler_3.6.1   pillar_1.4.3     tools_3.6.1     
-#>  [5] digest_0.6.23    evaluate_0.14    memoise_1.1.0    lifecycle_0.1.0 
-#>  [9] tibble_2.1.3     gtable_0.3.0     pkgconfig_2.0.3  rlang_0.4.2     
-#> [13] rstudioapi_0.10  yaml_2.2.0       pkgdown_1.4.1    xfun_0.11       
-#> [17] stringr_1.4.0    dplyr_0.8.3      knitr_1.26       desc_1.2.0      
-#> [21] fs_1.3.1         rprojroot_1.3-2  grid_3.6.1       tidyselect_0.2.5
+#>  [1] Rcpp_1.0.3       compiler_3.6.0   pillar_1.4.3     tools_3.6.0     
+#>  [5] digest_0.6.24    evaluate_0.14    memoise_1.1.0    lifecycle_0.1.0 
+#>  [9] tibble_2.1.3     gtable_0.3.0     pkgconfig_2.0.3  rlang_0.4.4     
+#> [13] rstudioapi_0.10  yaml_2.2.0       pkgdown_1.4.1    xfun_0.6        
+#> [17] stringr_1.4.0    dplyr_0.8.3      knitr_1.22       desc_1.2.0      
+#> [21] fs_1.3.1         rprojroot_1.3-2  grid_3.6.0       tidyselect_0.2.5
 #> [25] glue_1.3.1       R6_2.4.1         rmarkdown_1.16   farver_2.0.3    
 #> [29] ggplot2_3.2.1    purrr_0.3.3      magrittr_1.5     backports_1.1.5 
 #> [33] scales_1.1.0     htmltools_0.4.0  MASS_7.3-51.4    assertthat_0.2.1
@@ -305,7 +305,6 @@ 

diff --git a/docs/index.html b/docs/index.html index 2d5abc6d..1bc2700c 100644 --- a/docs/index.html +++ b/docs/index.html @@ -15,9 +15,9 @@ Key functions are: feature_importance() for assessment of global level feature importance, ceteris_paribus() for calculation of the what-if plots, - partial_dependency() for partial dependency plots, - conditional_dependency() for conditional dependency plots, - accumulated_dependency() for accumulated local effects plots, + partial_dependence() for partial dependence plots, + conditional_dependence() for conditional dependence plots, + accumulated_dependence() for accumulated local effects plots, aggregate_profiles() and cluster_profiles() for aggregation of ceteris paribus profiles, generic print() and plot() for better usability of selected explainers, generic plotD3() for interactive, D3 based explanations, and @@ -53,7 +53,7 @@
part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
@@ -107,9 +107,9 @@
-
+
@@ -123,11 +123,11 @@

  • ceteris_paribus() for calculation of the Ceteris Paribus / What-If Profiles (read more at https://pbiecek.github.io/ema/ceterisParibus.html),
  • -partial_dependency() for Partial Dependency Plots,
  • +partial_dependence() for Partial Dependence Plots,
  • -conditional_dependency() for Conditional Dependency Plots also called M Plots,
  • +conditional_dependence() for Conditional Dependence Plots also called M Plots,
  • -accumulated_dependency() for Accumulated Local Effects Plots,
  • +accumulated_dependence() for Accumulated Local Effects Plots,
  • aggregate_profiles() and cluster_profiles() for aggregation of Ceteris Paribus Profiles,
  • @@ -143,12 +143,12 @@

    diff --git a/docs/news/index.html b/docs/news/index.html index 2301f045..731561ff 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -36,12 +36,12 @@ + - @@ -80,7 +80,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0

  • @@ -139,9 +139,17 @@

    Changelog

    Source: NEWS.md
    -
    +
    +

    +ingredients 1.0

    +
      +
    • change dependency to dependence #103 +
    • +
    +
    +

    -ingredients 0.5.2

    +ingredients 0.5.2
    • ceteris_paribus profiles are now working for categorical variables
    • @@ -149,18 +157,18 @@

      show_profiles, show_observations, show_residuals are now working for categorical variables

    -
    +

    -ingredients 0.5.1

    +ingredients 0.5.1
    • synchronisation with changes in DALEX 0.5
    • new argument desc_sorting in plot.variable_importance_explainer #94
    -
    +

    -ingredients 0.5.0

    +ingredients 0.5.0
    • feature_importance now does 15 permutations on each variable by default. Use the B argument to change this number
    • @@ -169,9 +177,9 @@

    -
    +

    -ingredients 0.4.2

    +ingredients 0.4.2
    • aggregate_profiles use now gaussian kernel smoothing. Use the span argument for fine control over this parameter (#79)
    • @@ -182,41 +190,41 @@

    • added Travis-CI for OSX
    -
    +

    -ingredients 0.4.1

    +ingredients 0.4.1
    • fixed rounding problem in the describe function (#76)
    -
    +

    -ingredients 0.4

    +ingredients 0.4
    • CRAN release
    -
    +

    -ingredients 0.3.12

    +ingredients 0.3.12
    • aspect_importance is moved to DALEXtra (#66)
    • examples are updated in order to reflect changes in titanic_imputed from DALEX (#65)
    -
    +

    -ingredients 0.3.11

    +ingredients 0.3.11
    • modified plot.aspect_importance - it can plot more than single figure
    • modified triplot, plot.aspect_importance and plot_group_variables to add more clarity in plots and allow some parameterization
    -
    +

    -ingredients 0.3.10

    +ingredients 0.3.10
    • added triplot function that illustrates hierarchical aspect_importance() groupings
    • changes in aspect_importance() functions
    • @@ -224,46 +232,46 @@

    -
    +

    -ingredients 0.3.9

    +ingredients 0.3.9
    • change only_numerical parameter to variable_type in functions aggregated_profiles(), cluster_profiles(), plot() and others, as requested in #15
    -
    +

    -ingredients 0.3.8

    +ingredients 0.3.8
    -
    +

    -ingredients 0.3.7

    +ingredients 0.3.7
    • aggregated_profiles_conditional and aggregated_profiles_accumulated are rewritten with some code fixes
    -
    +

    -ingredients 0.3.6

    +ingredients 0.3.6
    • a new version of lime is implemented in the lime()/aspect_importance() function.
    • Kasia Pekala and Huber Baniecki are added as contributors.
    -
    +

    -ingredients 0.3.5

    +ingredients 0.3.5
    • new feature #29. Feature importance now takes an argument B that replicates permutations B times and calculates average from drop loss.
    -
    +

    -ingredients 0.3.4

    +ingredients 0.3.4
    • plotD3 now supports Ceteris Paribus Profiles.
    • @@ -273,9 +281,9 @@

    • fix #27 for multiple rows
    -
    +

    -ingredients 0.3.3

    +ingredients 0.3.3
    • show_profiles and show_residuals functions extend Ceteris Paribus Plots.
    • @@ -285,18 +293,18 @@

    • centering of ggplot2 title
    -
    +

    -ingredients 0.3.2

    +ingredients 0.3.2
    • added new functions describe() and print.ceteris_paribus_descriptions() for text based descriptions of Ceteris Paribus explainers
    • plot.ceteris_paribus_explainer works now also for categorical variables. Use the only_numerical = FALSE to force bars
    -
    +

    -ingredients 0.3.1

    +ingredients 0.3.1
    • added references to PM VEE
    • @@ -304,9 +312,9 @@

    • major changes in function names and file names
    -
    +

    -ingredients 0.3

    +ingredients 0.3
    • ceteris_paribus_2d extends classical ceteris paribus profiles
    • @@ -315,9 +323,9 @@

    • fixed examples and file names
    -
    +

    -ingredients 0.2

    +ingredients 0.2
    • cluster_profiles helps to identify interactions
    • @@ -327,9 +335,9 @@

      aggregate_profiles calculates partial dependency plots and much more

    -
    +

    -ingredients 0.1

    +ingredients 0.1
    diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 1b5c8f70..f0c79253 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,4 +1,4 @@ -pandoc: 2.3.1 +pandoc: 2.7.1 pkgdown: 1.4.1 pkgdown_sha: ~ articles: diff --git a/docs/reference/accumulated_dependence-1.png b/docs/reference/accumulated_dependence-1.png new file mode 100644 index 00000000..c9ad33bb Binary files /dev/null and b/docs/reference/accumulated_dependence-1.png differ diff --git a/docs/reference/accumulated_dependence-2.png b/docs/reference/accumulated_dependence-2.png new file mode 100644 index 00000000..d83a9471 Binary files /dev/null and b/docs/reference/accumulated_dependence-2.png differ diff --git a/docs/reference/accumulated_dependence.html b/docs/reference/accumulated_dependence.html new file mode 100644 index 00000000..b8c139cd --- /dev/null +++ b/docs/reference/accumulated_dependence.html @@ -0,0 +1,324 @@ + + + + + + + + +Accumulated Local Effects Profiles aka ALEPlots — accumulated_dependence • ingredients + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    + +

    Accumulated Local Effects Profiles accumulate local changes in Ceteris Paribus Profiles. +Function accumulated_dependence calls ceteris_paribus and then aggregate_profiles.

    + +
    + +
    accumulated_dependence(x, ...)
    +
    +# S3 method for explainer
    +accumulated_dependence(
    +  x,
    +  variables = NULL,
    +  N = 500,
    +  variable_splits = NULL,
    +  grid_points = 101,
    +  ...,
    +  variable_type = "numerical"
    +)
    +
    +# S3 method for default
    +accumulated_dependence(
    +  x,
    +  data,
    +  predict_function = predict,
    +  label = class(x)[1],
    +  variables = NULL,
    +  N = 500,
    +  variable_splits = NULL,
    +  grid_points = 101,
    +  ...,
    +  variable_type = "numerical"
    +)
    +
    +# S3 method for ceteris_paribus_explainer
    +accumulated_dependence(x, ..., variables = NULL)
    +
    +accumulated_dependency(x, ...)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    x

    an explainer created with function DALEX::explain(), an object of the class ceteris_paribus_explainer +or a model to be explained.

    ...

    other parameters

    variables

    names of variables for which profiles shall be calculated. +Will be passed to calculate_variable_split. +If NULL then all variables from the validation data will be used.

    N

    number of observations used for calculation of partial dependence profiles. +By default, 500 observations will be chosen randomly.

    variable_splits

    named list of splits for variables, in most cases created with calculate_variable_split. +If NULL then it will be calculated based on validation data avaliable in the explainer.

    grid_points

    number of points for profile. Will be passed tocalculate_variable_split.

    variable_type

    a character. If "numerical" then only numerical variables will be calculated. +If "categorical" then only categorical variables will be calculated.

    data

    validation dataset Will be extracted from x if it's an explainer +NOTE: It is best when target variable is not present in the data

    predict_function

    predict function Will be extracted from x if it's an explainer

    label

    name of the model. By default it's extracted from the class attribute of the model

    + +

    Value

    + +

    an object of the class aggregated_profiles_explainer

    + +

    Details

    + +

    Find more detailes in the Accumulated Local Dependence Chapter.

    + +

    References

    + +

    ALEPlot: Accumulated Local Effects (ALE) Plots and Partial Dependence (PD) Plots https://cran.r-project.org/package=ALEPlot, +Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    + + +

    Examples

    +
    library("DALEX")
    #> Welcome to DALEX (version: 0.9.4). +#> Find examples and detailed introduction at: https://pbiecek.github.io/ema/
    #> +#> Dołączanie pakietu: 'DALEX'
    #> Następujący obiekt został zakryty z 'package:ingredients': +#> +#> feature_importance
    +model_titanic_glm <- glm(survived ~ gender + age + fare, + data = titanic_imputed, family = "binomial") + +explain_titanic_glm <- explain(model_titanic_glm, + data = titanic_imputed[,-8], + y = titanic_imputed[,8], + verbose = FALSE) + +adp_glm <- accumulated_dependence(explain_titanic_glm, + N = 150, variables = c("age", "fare")) +head(adp_glm)
    #> Top profiles : +#> _vname_ _label_ _x_ _yhat_ _ids_ +#> 1 age lm 0.1666667 0.000000000 0 +#> 2 age lm 2.0000000 -0.002498427 0 +#> 3 age lm 4.0000000 -0.005212249 0 +#> 4 age lm 7.0000000 -0.009258781 0 +#> 5 age lm 9.0000000 -0.011939391 0 +#> 6 age lm 13.0000000 -0.017257909 0
    plot(adp_glm)
    +# \donttest{ +library("randomForest")
    #> randomForest 4.6-14
    #> Type rfNews() to see new features/changes/bug fixes.
    +model_titanic_rf <- randomForest(survived ~., data = titanic_imputed)
    #> Warning: The response has five or fewer unique values. Are you sure you want to do regression?
    +explain_titanic_rf <- explain(model_titanic_rf, + data = titanic_imputed[,-8], + y = titanic_imputed[,8], + verbose = FALSE) + +adp_rf <- accumulated_dependence(explain_titanic_rf, N = 200, variable_type = "numerical") +plot(adp_rf)
    +adp_rf <- accumulated_dependence(explain_titanic_rf, N = 200, variable_type = "categorical") +plotD3(adp_rf, label_margin = 80, scale_plot = TRUE) +# }
    +
    + +
    + + +
    +
    +

    + Developed by Przemyslaw Biecek, Hubert Baniecki, Adam Izdebski. + Site built by pkgdown. +

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/aggregate_profiles-1.png b/docs/reference/aggregate_profiles-1.png index 7dfcb324..929c3960 100644 Binary files a/docs/reference/aggregate_profiles-1.png and b/docs/reference/aggregate_profiles-1.png differ diff --git a/docs/reference/aggregate_profiles-2.png b/docs/reference/aggregate_profiles-2.png index 72e638f0..1ab0a1d0 100644 Binary files a/docs/reference/aggregate_profiles-2.png and b/docs/reference/aggregate_profiles-2.png differ diff --git a/docs/reference/aggregate_profiles-3.png b/docs/reference/aggregate_profiles-3.png index 39abdd4f..ea62d485 100644 Binary files a/docs/reference/aggregate_profiles-3.png and b/docs/reference/aggregate_profiles-3.png differ diff --git a/docs/reference/aggregate_profiles-4.png b/docs/reference/aggregate_profiles-4.png index 053cab4f..8508e18f 100644 Binary files a/docs/reference/aggregate_profiles-4.png and b/docs/reference/aggregate_profiles-4.png differ diff --git a/docs/reference/aggregate_profiles-5.png b/docs/reference/aggregate_profiles-5.png index 60262889..50c94120 100644 Binary files a/docs/reference/aggregate_profiles-5.png and b/docs/reference/aggregate_profiles-5.png differ diff --git a/docs/reference/aggregate_profiles-6.png b/docs/reference/aggregate_profiles-6.png index 70f5bc67..9c84fe9c 100644 Binary files a/docs/reference/aggregate_profiles-6.png and b/docs/reference/aggregate_profiles-6.png differ diff --git a/docs/reference/aggregate_profiles.html b/docs/reference/aggregate_profiles.html index 63817db8..ab781a09 100644 --- a/docs/reference/aggregate_profiles.html +++ b/docs/reference/aggregate_profiles.html @@ -36,16 +36,17 @@ + +It can be: Partial Dependence Profile (average across Ceteris Paribus Profiles), +Conditional Dependence Profile (local weighted average across Ceteris Paribus Profiles) or +Accumulated Local Dependence Profile (cummulated average local changes in Ceteris Paribus Profiles)." /> + - @@ -84,7 +85,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -145,10 +146,12 @@

    Aggregates Ceteris Paribus Profiles

    +

    The function aggregate_profiles() calculates an aggregate of ceteris paribus profiles. -It can be: Partial Dependency Profile (average across Ceteris Paribus Profiles), -Conditional Dependency Profile (local weighted average across Ceteris Paribus Profiles) or -Accumulated Local Dependency Profile (cummulated average local changes in Ceteris Paribus Profiles).

    +It can be: Partial Dependence Profile (average across Ceteris Paribus Profiles), +Conditional Dependence Profile (local weighted average across Ceteris Paribus Profiles) or +Accumulated Local Dependence Profile (cummulated average local changes in Ceteris Paribus Profiles).

    +
    aggregate_profiles(
    @@ -158,9 +161,10 @@ 

    Aggregates Ceteris Paribus Profiles

    groups = NULL, type = "partial", variables = NULL, - span = 0.25 + span = 0.25, + center = FALSE )
    - +

    Arguments

    @@ -194,14 +198,20 @@

    Arg

    + + + +
    span

    smoothing coeffcient, by default 0.25.It's the sd for gaussian kernel

    center

    by default accumulated profiles start at 0. if center=TRUE then they are centered around average response

    - +

    Value

    an object of the class aggregated_profiles_explainer

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +

    Examples

    library("DALEX") @@ -290,8 +300,8 @@

    Examp pdp_rf_a$`_label_` <- "RF_accumulated" plot(pdp_rf_p, pdp_rf_c, pdp_rf_a, color = "_label_")

    # or maybe flipped? -library(ggplot2)
    #> -#> Attaching package: ‘ggplot2’
    #> The following object is masked from ‘package:randomForest’: +library(ggplot2)
    #> Warning: pakiet 'ggplot2' został zbudowany w wersji R 3.6.1
    #> +#> Dołączanie pakietu: 'ggplot2'
    #> Następujący obiekt został zakryty z 'package:randomForest': #> #> margin
    plot(pdp_rf_p, pdp_rf_c, pdp_rf_a, color = "_label_") + coord_flip()
    pdp_rf <- aggregate_profiles(cp_rf, variables = "class", variable_type = "categorical", @@ -313,8 +323,11 @@

    Examp

    Contents

    diff --git a/docs/reference/calculate_oscillations.html b/docs/reference/calculate_oscillations.html index f2aff99d..bdc0f431 100644 --- a/docs/reference/calculate_oscillations.html +++ b/docs/reference/calculate_oscillations.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -143,12 +144,14 @@

    Calculate Oscillations for Ceteris Paribus Explainer

    +

    Oscillations are proxies for local feature importance at the instance level. Find more detailes in Ceteris Paribus Oscillations Chapter.

    +
    calculate_oscillations(x, sort = TRUE, ...)
    - +

    Arguments

    @@ -165,13 +168,15 @@

    Arg

    other arguments

    - +

    Value

    an object of the class ceteris_paribus_oscillations

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +

    Examples

    library("DALEX") @@ -188,7 +193,7 @@

    Examp #> -> model label : lm ( default ) #> -> data : 500 rows 7 cols #> -> target variable : 500 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.0795294 , mean = 0.302 , max = 0.9859411 #> -> residual function : difference between y and yhat ( default ) @@ -239,8 +244,11 @@

    Examp

    Contents

    diff --git a/docs/reference/calculate_variable_profile.html b/docs/reference/calculate_variable_profile.html index 6188ac04..0e506924 100644 --- a/docs/reference/calculate_variable_profile.html +++ b/docs/reference/calculate_variable_profile.html @@ -36,13 +36,14 @@ + + - @@ -81,7 +82,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0

    @@ -142,7 +143,9 @@

    Internal Function for Individual Variable Profiles

    +

    This function calculates individual variable profiles (ceteris paribus profiles), i.e. series of predictions from a model calculated for observations with altered single coordinate.

    +
    calculate_variable_profile(
    @@ -161,7 +164,7 @@ 

    Internal Function for Individual Variable Profiles

    predict_function = predict, ... )
    - +

    Arguments

    @@ -186,28 +189,34 @@

    Arg

    other parameters that will be passed to the predict_function

    - +

    Value

    a data frame with profiles for selected variables and selected observations

    +

    Details

    Note that calculate_variable_profile function is S3 generic. If you want to work on non standard data sources (like H2O ddf, external databases) you should overload it.

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +
    diff --git a/docs/reference/calculate_variable_split.html b/docs/reference/calculate_variable_split.html index e0dee3af..dc75e9b0 100644 --- a/docs/reference/calculate_variable_split.html +++ b/docs/reference/calculate_variable_split.html @@ -36,16 +36,17 @@ + + - @@ -84,7 +85,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -145,17 +146,19 @@

    Internal Function for Split Points for Selected Variables

    +

    This function calculate candidate splits for each selected variable. For numerical variables splits are calculated as percentiles (in general uniform quantiles of the length grid_points). For all other variables splits are calculated as unique values.

    +
    calculate_variable_split(data, variables = colnames(data), grid_points = 101)
     
     # S3 method for default
     calculate_variable_split(data, variables = colnames(data), grid_points = 101)
    - +

    Arguments

    @@ -172,24 +175,28 @@

    Arg

    number of points used for response path

    - +

    Value

    A named list with splits for selected variables

    +

    Details

    Note that calculate_variable_split function is S3 generic. If you want to work on non standard data sources (like H2O ddf, external databases) you should overload it.

    +
    diff --git a/docs/reference/ceteris_paribus-1.png b/docs/reference/ceteris_paribus-1.png index 32a429a7..121ff2de 100644 Binary files a/docs/reference/ceteris_paribus-1.png and b/docs/reference/ceteris_paribus-1.png differ diff --git a/docs/reference/ceteris_paribus-2.png b/docs/reference/ceteris_paribus-2.png index d68c3704..a174098b 100644 Binary files a/docs/reference/ceteris_paribus-2.png and b/docs/reference/ceteris_paribus-2.png differ diff --git a/docs/reference/ceteris_paribus.html b/docs/reference/ceteris_paribus.html index 36df4638..af2af015 100644 --- a/docs/reference/ceteris_paribus.html +++ b/docs/reference/ceteris_paribus.html @@ -36,16 +36,17 @@ + + - @@ -84,7 +85,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -145,10 +146,12 @@

    Ceteris Paribus Profiles aka Individual Variable Profiles

    +

    This explainer works for individual observations. For each observation it calculates Ceteris Paribus Profiles for selected variables. Such profiles can be used to hypothesize about model results if selected variable is changed. For this reason it is also called 'What-If Profiles'.

    +
    ceteris_paribus(x, ...)
    @@ -177,7 +180,7 @@ 

    Ceteris Paribus Profiles aka Individual Variable Profiles

    label = class(x)[1], ... )
    - +

    Arguments

    @@ -227,16 +230,19 @@

    Arg

    name of the model. By default it's extracted from the class attribute of the model

    - +

    Value

    an object of the class ceteris_paribus_explainer.

    +

    Details

    Find more details in Ceteris Paribus Chapter.

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +

    Examples

    library("DALEX") @@ -331,9 +337,13 @@

    Examp

    Contents

    diff --git a/docs/reference/ceteris_paribus_2d-1.png b/docs/reference/ceteris_paribus_2d-1.png index 447a636b..f0d0f959 100644 Binary files a/docs/reference/ceteris_paribus_2d-1.png and b/docs/reference/ceteris_paribus_2d-1.png differ diff --git a/docs/reference/ceteris_paribus_2d-2.png b/docs/reference/ceteris_paribus_2d-2.png index 74dfd72a..55ad8149 100644 Binary files a/docs/reference/ceteris_paribus_2d-2.png and b/docs/reference/ceteris_paribus_2d-2.png differ diff --git a/docs/reference/ceteris_paribus_2d.html b/docs/reference/ceteris_paribus_2d.html index 295a407b..f71117ae 100644 --- a/docs/reference/ceteris_paribus_2d.html +++ b/docs/reference/ceteris_paribus_2d.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -143,12 +144,14 @@

    Ceteris Paribus 2D Plot

    +

    This function calculates ceteris paribus profiles for grid of values spanned by two variables. It may be useful to identify or present interactions between two variables.

    +
    ceteris_paribus_2d(explainer, observation, grid_points = 101, variables = NULL)
    - +

    Arguments

    @@ -169,10 +172,11 @@

    Arg

    if specified, then only these variables will be explained

    - +

    Value

    an object of the class ceteris_paribus_2d_explainer.

    +

    Examples

    library("DALEX") @@ -187,11 +191,11 @@

    Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1707237 , mean = 0.3221568 , max = 0.9983551 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.9519492 , mean = 4.78827e-11 , max = 0.8167072 +#> -> residuals : numerical, min = -0.9519492 , mean = 4.788274e-11 , max = 0.8167072 #> A new explainer has been created!

    cp_rf <- ceteris_paribus_2d(explain_titanic_glm, titanic_imputed[1,], variables = c("age", "fare", "sibsp")) @@ -239,7 +243,9 @@

    Examp

    Contents

    diff --git a/docs/reference/cluster_profiles-1.png b/docs/reference/cluster_profiles-1.png index 0f2be5fc..85e40f23 100644 Binary files a/docs/reference/cluster_profiles-1.png and b/docs/reference/cluster_profiles-1.png differ diff --git a/docs/reference/cluster_profiles-2.png b/docs/reference/cluster_profiles-2.png index 1b2ac122..8352e90b 100644 Binary files a/docs/reference/cluster_profiles-2.png and b/docs/reference/cluster_profiles-2.png differ diff --git a/docs/reference/cluster_profiles-3.png b/docs/reference/cluster_profiles-3.png index b9d8e104..af034aa2 100644 Binary files a/docs/reference/cluster_profiles-3.png and b/docs/reference/cluster_profiles-3.png differ diff --git a/docs/reference/cluster_profiles.html b/docs/reference/cluster_profiles.html index 728717c7..5afe50eb 100644 --- a/docs/reference/cluster_profiles.html +++ b/docs/reference/cluster_profiles.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -143,8 +144,10 @@

    Cluster Ceteris Paribus Profiles

    +

    This function calculates aggregates of ceteris paribus profiles based on hierarchical clustering.

    +
    cluster_profiles(
    @@ -156,7 +159,7 @@ 

    Cluster Ceteris Paribus Profiles

    k = 3, variables = NULL )
    - +

    Arguments

    @@ -190,16 +193,19 @@

    Arg

    if not NULL then only variables will be presented

    - +

    Value

    an object of the class aggregated_profiles_explainer

    +

    Details

    Find more detailes in the Clustering Profiles Chapter.

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +

    Examples

    library("DALEX") @@ -214,11 +220,11 @@

    Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1490412 , mean = 0.3221568 , max = 0.9878987 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.8898433 , mean = 4.198546e-13 , max = 0.8448637 +#> -> residuals : numerical, min = -0.8898433 , mean = 4.198219e-13 , max = 0.8448637 #> A new explainer has been created!

    cp_rf <- ceteris_paribus(explain_titanic_glm, selected_passangers) clust_rf <- cluster_profiles(cp_rf, k = 3, variables = "age") @@ -316,9 +322,13 @@

    Examp

    Contents

    diff --git a/docs/reference/conditional_dependence-1.png b/docs/reference/conditional_dependence-1.png new file mode 100644 index 00000000..8945e837 Binary files /dev/null and b/docs/reference/conditional_dependence-1.png differ diff --git a/docs/reference/conditional_dependence-2.png b/docs/reference/conditional_dependence-2.png new file mode 100644 index 00000000..18e89b1e Binary files /dev/null and b/docs/reference/conditional_dependence-2.png differ diff --git a/docs/reference/conditional_dependence.html b/docs/reference/conditional_dependence.html new file mode 100644 index 00000000..4525d8b4 --- /dev/null +++ b/docs/reference/conditional_dependence.html @@ -0,0 +1,321 @@ + + + + + + + + +Conditional Dependence Profiles — conditional_dependence • ingredients + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    + +

    Conditional Dependence Profiles (aka Local Profiles) average localy Ceteris Paribus Profiles. +Function 'conditional_dependence' calls 'ceteris_paribus' and then 'aggregate_profiles'.

    + +
    + +
    conditional_dependence(x, ...)
    +
    +# S3 method for explainer
    +conditional_dependence(
    +  x,
    +  variables = NULL,
    +  N = 500,
    +  variable_splits = NULL,
    +  grid_points = 101,
    +  ...,
    +  variable_type = "numerical"
    +)
    +
    +# S3 method for default
    +conditional_dependence(
    +  x,
    +  data,
    +  predict_function = predict,
    +  label = class(x)[1],
    +  variables = NULL,
    +  N = 500,
    +  variable_splits = NULL,
    +  grid_points = 101,
    +  ...,
    +  variable_type = "numerical"
    +)
    +
    +# S3 method for ceteris_paribus_explainer
    +conditional_dependence(x, ..., variables = NULL)
    +
    +local_dependency(x, ...)
    +
    +conditional_dependency(x, ...)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    x

    an explainer created with function DALEX::explain(), an object of the class ceteris_paribus_explainer +or a model to be explained.

    ...

    other parameters

    variables

    names of variables for which profiles shall be calculated. +Will be passed to calculate_variable_split. If NULL then all variables from the validation data will be used.

    N

    number of observations used for calculation of partial dependence profiles. By default 500.

    variable_splits

    named list of splits for variables, in most cases created with calculate_variable_split. +If NULL then it will be calculated based on validation data avaliable in the explainer.

    grid_points

    number of points for profile. Will be passed to calculate_variable_split.

    variable_type

    a character. If numerical then only numerical variables will be calculated. +If categorical then only categorical variables will be calculated.

    data

    validation dataset, will be extracted from x if it's an explainer +NOTE: It is best when target variable is not present in the data

    predict_function

    predict function, will be extracted from x if it's an explainer

    label

    name of the model. By default it's extracted from the class attribute of the model

    + +

    Value

    + +

    an object of the class aggregated_profile_explainer

    + +

    Details

    + +

    Find more detailes in the Accumulated Local Dependence Chapter.

    + +

    References

    + +

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    + + +

    Examples

    +
    library("DALEX") + +model_titanic_glm <- glm(survived ~ gender + age + fare, + data = titanic_imputed, family = "binomial") + +explain_titanic_glm <- explain(model_titanic_glm, + data = titanic_imputed[,-8], + y = titanic_imputed[,8], + verbose = FALSE) + +cdp_glm <- conditional_dependence(explain_titanic_glm, + N = 150, variables = c("age", "fare")) +head(cdp_glm)
    #> Top profiles : +#> _vname_ _label_ _x_ _yhat_ _ids_ +#> age.lm.0.1666666667 age lm 0.1666667 0.3488203 0 +#> age.lm.2 age lm 2.0000000 0.3452057 0 +#> age.lm.4 age lm 4.0000000 0.3412918 0 +#> age.lm.7 age lm 7.0000000 0.3354948 0 +#> age.lm.9 age lm 9.0000000 0.3316884 0 +#> age.lm.13 age lm 13.0000000 0.3242332 0
    plot(cdp_glm)
    +# \donttest{ +library("randomForest") + +model_titanic_rf <- randomForest(survived ~., data = titanic_imputed)
    #> Warning: The response has five or fewer unique values. Are you sure you want to do regression?
    +explain_titanic_rf <- explain(model_titanic_rf, + data = titanic_imputed[,-8], + y = titanic_imputed[,8], + verbose = FALSE) + +cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "numerical") +plot(cdp_rf)
    +cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "categorical") +plotD3(cdp_rf, label_margin = 80, scale_plot = TRUE) +# }
    +
    + +
    + + +
    +
    +

    + Developed by Przemyslaw Biecek, Hubert Baniecki, Adam Izdebski. + Site built by pkgdown. +

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/describe-1.png b/docs/reference/describe-1.png index 749feeb6..1f7db316 100644 Binary files a/docs/reference/describe-1.png and b/docs/reference/describe-1.png differ diff --git a/docs/reference/describe-2.png b/docs/reference/describe-2.png index c68297c2..f3276ca9 100644 Binary files a/docs/reference/describe-2.png and b/docs/reference/describe-2.png differ diff --git a/docs/reference/describe.html b/docs/reference/describe.html index ab718bc6..3a74b10c 100644 --- a/docs/reference/describe.html +++ b/docs/reference/describe.html @@ -6,7 +6,7 @@ -Natural language description of feature importance explainer — describe.partial_dependency_explainer • ingredients +Natural language description of feature importance explainer — describe.partial_dependence_explainer • ingredients @@ -35,16 +35,17 @@ - + + + - @@ -83,7 +84,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -144,12 +145,14 @@

    Natural language description of feature importance explainer

    +

    Generic function describe generates a natural language description of ceteris_paribus(), aggregated_profiles() and feature_importance() explanations what enchaces their interpretability.

    +
    -
    # S3 method for partial_dependency_explainer
    +    
    # S3 method for partial_dependence_explainer
     describe(
       x,
       nonsignificance_treshold = 0.15,
    @@ -175,7 +178,7 @@ 

    Natural language description of feature importance explainer

    # S3 method for feature_importance_explainer describe(x, nonsignificance_treshold = 0.15, ...)
    - +

    Arguments

    @@ -208,7 +211,7 @@

    Arg

    label for model's prediction

    - +

    Details

    Function describe.ceteris_paribus() generates a natural language description of @@ -230,6 +233,7 @@

    Details have significant dropout difference from the full model, depending on nonsignificance_treshold. The description prints the three most important variables for the model's prediction. The current design of DALEX explainer does not allow for displaying variables values.

    +

    Examples

    library("DALEX") @@ -294,7 +298,7 @@

    Examp #> -> model label : lm ( default ) #> -> data : 1000 rows 5 cols #> -> target variable : 1000 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.lm will be used ( default ) #> -> predicted values : numerical, min = 1781.848 , mean = 3487.019 , max = 6176.032 #> -> residual function : difference between y and yhat ( default ) @@ -302,15 +306,17 @@

    Examp #> A new explainer has been created!

    fi_lm <- feature_importance(explainer_lm, loss_function = loss_root_mean_square) -plot(fi_lm)
    describe(fi_lm)
    #> The number of important variables for lm's prediction is 43 out of 75. -#> Variables _baseline_, _baseline_, _baseline_ have the highest importantance.
    +plot(fi_lm)
    describe(fi_lm)
    #> The number of important variables for lm's prediction is 3 out of 5. +#> Variables district, surface, floor have the highest importantance.
    @@ -143,8 +144,10 @@

    Feature Importance

    +

    This function calculates permutation based feature importance. For this reason it is also called the Variable Dropout Plot.

    +
    feature_importance(x, ...)
    @@ -177,7 +180,7 @@ 

    Feature Importance

    variables = NULL, variable_groups = NULL )
    - +

    Arguments

    @@ -236,16 +239,19 @@

    Arg

    predict function, will be extracted from x if it's an explainer

    - +

    Value

    an object of the class feature_importance

    +

    Details

    Find more detailes in the Feature Importance Chapter.

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +

    Examples

    library("DALEX") @@ -259,11 +265,11 @@

    Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1490412 , mean = 0.3221568 , max = 0.9878987 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.8898433 , mean = 4.198546e-13 , max = 0.8448637 +#> -> residuals : numerical, min = -0.8898433 , mean = 4.198219e-13 , max = 0.8448637 #> A new explainer has been created!

    fi_glm <- feature_importance(explain_titanic_glm) plot(fi_glm)
    @@ -334,11 +340,11 @@

    Examp #> -> model label : lm ( default ) #> -> data : 7847 rows 6 cols #> -> target variable : 7847 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.00861694 , mean = 0.3638333 , max = 0.7822214 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.7755901 , mean = -1.293796e-13 , max = 0.9820537 +#> -> residuals : numerical, min = -0.7755901 , mean = -1.294707e-13 , max = 0.9820537 #> A new explainer has been created!

    fi_glm <- feature_importance(explainer_glm, type = "raw", loss_function = loss_root_mean_square) head(fi_glm)
    #> variable mean_dropout_loss label @@ -364,25 +370,29 @@

    Examp #> -> predict function : yhat.default will be used ( default ) #> -> predicted values : numerical, min = 1.687903e-06 , mean = 0.363713 , max = 0.9996712 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.9885727 , mean = 0.0001203494 , max = 0.9970635 +#> -> residuals : numerical, min = -0.9885727 , mean = 0.0001203497 , max = 0.9970635 #> A new explainer has been created!

    fi_xgb <- feature_importance(explainer_xgb, type = "raw") head(fi_xgb)
    #> variable mean_dropout_loss label -#> 1 _full_model_ 98.58735 xgboost -#> 2 gendermale 98.58735 xgboost -#> 3 evaluation 116.50755 xgboost -#> 4 genderfemale 149.72728 xgboost -#> 5 age 159.28469 xgboost -#> 6 salary 167.93442 xgboost
    plot(fi_xgb, fi_glm)
    # } +#> 1 _full_model_ 98.39138 xgboost +#> 2 gendermale 98.39138 xgboost +#> 3 evaluation 115.96322 xgboost +#> 4 genderfemale 149.93447 xgboost +#> 5 age 159.41428 xgboost +#> 6 salary 168.88640 xgboost
    plot(fi_xgb, fi_glm)
    # }
    @@ -162,21 +162,21 @@

    partial_dependency()

    +

    partial_dependence() partial_dependency()

    -

    Partial Dependency Profiles

    +

    Partial Dependence Profiles

    -

    accumulated_dependency()

    +

    accumulated_dependence() accumulated_dependency()

    Accumulated Local Effects Profiles aka ALEPlots

    -

    conditional_dependency() local_dependency()

    +

    conditional_dependence() local_dependency() conditional_dependency()

    -

    Conditional Dependency Profiles

    +

    Conditional Dependence Profiles

    diff --git a/docs/reference/partial_dependence-1.png b/docs/reference/partial_dependence-1.png new file mode 100644 index 00000000..d11505d9 Binary files /dev/null and b/docs/reference/partial_dependence-1.png differ diff --git a/docs/reference/partial_dependence-2.png b/docs/reference/partial_dependence-2.png new file mode 100644 index 00000000..589103fb Binary files /dev/null and b/docs/reference/partial_dependence-2.png differ diff --git a/docs/reference/partial_dependence.html b/docs/reference/partial_dependence.html new file mode 100644 index 00000000..a991b39a --- /dev/null +++ b/docs/reference/partial_dependence.html @@ -0,0 +1,320 @@ + + + + + + + + +Partial Dependence Profiles — partial_dependence • ingredients + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    + +
    +
    + + +
    + +

    Partial Dependence Profiles are averages from Ceteris Paribus Profiles. +Function partial_dependence calls ceteris_paribus and then aggregate_profiles.

    + +
    + +
    partial_dependence(x, ...)
    +
    +# S3 method for explainer
    +partial_dependence(
    +  x,
    +  variables = NULL,
    +  N = 500,
    +  variable_splits = NULL,
    +  grid_points = 101,
    +  ...,
    +  variable_type = "numerical"
    +)
    +
    +# S3 method for default
    +partial_dependence(
    +  x,
    +  data,
    +  predict_function = predict,
    +  label = class(x)[1],
    +  variables = NULL,
    +  grid_points = 101,
    +  variable_splits = NULL,
    +  N = 500,
    +  ...,
    +  variable_type = "numerical"
    +)
    +
    +# S3 method for ceteris_paribus_explainer
    +partial_dependence(x, ..., variables = NULL)
    +
    +partial_dependency(x, ...)
    + +

    Arguments

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    x

    an explainer created with function DALEX::explain(), an object of the class ceteris_paribus_explainer or +or a model to be explained.

    ...

    other parameters

    variables

    names of variables for which profiles shall be calculated. +Will be passed to calculate_variable_split. +If NULL then all variables from the validation data will be used.

    N

    number of observations used for calculation of partial dependence profiles. By default 500.

    variable_splits

    named list of splits for variables, in most cases created with calculate_variable_split. +If NULL then it will be calculated based on validation data avaliable in the explainer.

    grid_points

    number of points for profile. Will be passed to calculate_variable_split.

    variable_type

    a character. If numerical then only numerical variables will be calculated. +If categorical then only categorical variables will be calculated.

    data

    validation dataset, will be extracted from x if it's an explainer +NOTE: It is best when target variable is not present in the data

    predict_function

    predict function, will be extracted from x if it's an explainer

    label

    name of the model. By default it's extracted from the class attribute of the model

    + +

    Value

    + +

    an object of the class aggregated_profiles_explainer

    + +

    Details

    + +

    Find more detailes in the Partial Dependence Profiles Chapter.

    + +

    References

    + +

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    + + +

    Examples

    +
    library("DALEX") + +model_titanic_glm <- glm(survived ~ gender + age + fare, + data = titanic_imputed, family = "binomial") + +explain_titanic_glm <- explain(model_titanic_glm, + data = titanic_imputed[,-8], + y = titanic_imputed[,8], + verbose = FALSE) + +pdp_glm <- partial_dependence(explain_titanic_glm, + N = 50, variables = c("age", "fare")) +head(pdp_glm)
    #> Top profiles : +#> _vname_ _label_ _x_ _yhat_ _ids_ +#> 1 fare lm 0.0000000 0.2848034 0 +#> 2 age lm 0.1666667 0.3506775 0 +#> 3 age lm 2.0000000 0.3481410 0 +#> 4 age lm 4.0000000 0.3453864 0 +#> 5 fare lm 6.1793080 0.2922441 0 +#> 6 age lm 7.0000000 0.3412793 0
    plot(pdp_glm)
    +# \donttest{ +library("randomForest") + +model_titanic_rf <- randomForest(survived ~., data = titanic_imputed)
    #> Warning: The response has five or fewer unique values. Are you sure you want to do regression?
    +explain_titanic_rf <- explain(model_titanic_rf, + data = titanic_imputed[,-8], + y = titanic_imputed[,8], + verbose = FALSE) + +pdp_rf <- partial_dependence(explain_titanic_rf, variable_type = "numerical") +plot(pdp_rf)
    +pdp_rf <- partial_dependence(explain_titanic_rf, variable_type = "categorical") +plotD3(pdp_rf, label_margin = 80, scale_plot = TRUE) +# }
    +
    + +
    + + +
    +
    +

    + Developed by Przemyslaw Biecek, Hubert Baniecki, Adam Izdebski. + Site built by pkgdown. +

    +
    + +
    +
    + + + + + + + + diff --git a/docs/reference/plot.aggregated_profiles_explainer-1.png b/docs/reference/plot.aggregated_profiles_explainer-1.png index 883f9e02..7eaa99c4 100644 Binary files a/docs/reference/plot.aggregated_profiles_explainer-1.png and b/docs/reference/plot.aggregated_profiles_explainer-1.png differ diff --git a/docs/reference/plot.aggregated_profiles_explainer-2.png b/docs/reference/plot.aggregated_profiles_explainer-2.png index 37a79932..16fc2dad 100644 Binary files a/docs/reference/plot.aggregated_profiles_explainer-2.png and b/docs/reference/plot.aggregated_profiles_explainer-2.png differ diff --git a/docs/reference/plot.aggregated_profiles_explainer-3.png b/docs/reference/plot.aggregated_profiles_explainer-3.png index 64e54ffe..cf72a4ec 100644 Binary files a/docs/reference/plot.aggregated_profiles_explainer-3.png and b/docs/reference/plot.aggregated_profiles_explainer-3.png differ diff --git a/docs/reference/plot.aggregated_profiles_explainer-4.png b/docs/reference/plot.aggregated_profiles_explainer-4.png index 9746e0dc..1daabf1a 100644 Binary files a/docs/reference/plot.aggregated_profiles_explainer-4.png and b/docs/reference/plot.aggregated_profiles_explainer-4.png differ diff --git a/docs/reference/plot.aggregated_profiles_explainer.html b/docs/reference/plot.aggregated_profiles_explainer.html index ccc4b002..0c8b687b 100644 --- a/docs/reference/plot.aggregated_profiles_explainer.html +++ b/docs/reference/plot.aggregated_profiles_explainer.html @@ -36,15 +36,16 @@ - + - @@ -83,7 +84,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -144,9 +145,11 @@

    Plots Aggregated Profiles

    -

    Function plot.aggregated_profiles_explainer plots partial dependency plot or accumulated effect plot. + +

    Function plot.aggregated_profiles_explainer plots partial dependence plot or accumulated effect plot. It works in a similar way to plot.ceteris_paribus, but instead of individual profiles show average profiles for each variable listed in the variables vector.

    +
    # S3 method for aggregated_profiles_explainer
    @@ -159,7 +162,7 @@ 

    Plots Aggregated Profiles

    facet_ncol = NULL, variables = NULL )
    - +

    Arguments

    @@ -192,13 +195,15 @@

    Arg

    if not NULL then only variables will be presented

    - +

    Value

    a ggplot2 object

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +

    Examples

    library("DALEX") @@ -211,11 +216,11 @@

    Examp y = titanic_imputed[,8], verbose = FALSE) -pdp_rf_p <- partial_dependency(explain_titanic_glm, N = 50) +pdp_rf_p <- partial_dependence(explain_titanic_glm, N = 50) pdp_rf_p$`_label_` <- "RF_partial" -pdp_rf_l <- conditional_dependency(explain_titanic_glm, N = 50) +pdp_rf_l <- conditional_dependence(explain_titanic_glm, N = 50) pdp_rf_l$`_label_` <- "RF_local" -pdp_rf_a<- accumulated_dependency(explain_titanic_glm, N = 50) +pdp_rf_a<- accumulated_dependence(explain_titanic_glm, N = 50) pdp_rf_a$`_label_` <- "RF_accumulated" head(pdp_rf_p)

    #> Top profiles : #> _vname_ _label_ _x_ _yhat_ _ids_ @@ -296,8 +301,11 @@

    Examp

    Contents

    diff --git a/docs/reference/plot.ceteris_paribus_2d_explainer-1.png b/docs/reference/plot.ceteris_paribus_2d_explainer-1.png index f23c9fe7..316e98c5 100644 Binary files a/docs/reference/plot.ceteris_paribus_2d_explainer-1.png and b/docs/reference/plot.ceteris_paribus_2d_explainer-1.png differ diff --git a/docs/reference/plot.ceteris_paribus_2d_explainer-2.png b/docs/reference/plot.ceteris_paribus_2d_explainer-2.png index 90045644..7ec1d1ea 100644 Binary files a/docs/reference/plot.ceteris_paribus_2d_explainer-2.png and b/docs/reference/plot.ceteris_paribus_2d_explainer-2.png differ diff --git a/docs/reference/plot.ceteris_paribus_2d_explainer-3.png b/docs/reference/plot.ceteris_paribus_2d_explainer-3.png index 2576f8f4..811494e3 100644 Binary files a/docs/reference/plot.ceteris_paribus_2d_explainer-3.png and b/docs/reference/plot.ceteris_paribus_2d_explainer-3.png differ diff --git a/docs/reference/plot.ceteris_paribus_2d_explainer-4.png b/docs/reference/plot.ceteris_paribus_2d_explainer-4.png index 809d87e8..77b09461 100644 Binary files a/docs/reference/plot.ceteris_paribus_2d_explainer-4.png and b/docs/reference/plot.ceteris_paribus_2d_explainer-4.png differ diff --git a/docs/reference/plot.ceteris_paribus_2d_explainer-5.png b/docs/reference/plot.ceteris_paribus_2d_explainer-5.png index 7c5d702c..5f2ae7fa 100644 Binary files a/docs/reference/plot.ceteris_paribus_2d_explainer-5.png and b/docs/reference/plot.ceteris_paribus_2d_explainer-5.png differ diff --git a/docs/reference/plot.ceteris_paribus_2d_explainer.html b/docs/reference/plot.ceteris_paribus_2d_explainer.html index f54281b9..9e0396c4 100644 --- a/docs/reference/plot.ceteris_paribus_2d_explainer.html +++ b/docs/reference/plot.ceteris_paribus_2d_explainer.html @@ -36,13 +36,14 @@ + + - @@ -81,7 +82,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -142,7 +143,9 @@

    Plot Ceteris Paribus 2D Explanations

    +

    This function plots What-If Plots for a single prediction / observation.

    +
    # S3 method for ceteris_paribus_2d_explainer
    @@ -157,7 +160,7 @@ 

    Plot Ceteris Paribus 2D Explanations

    pch = "+", size = 6 )
    - +

    Arguments

    @@ -198,13 +201,15 @@

    Arg

    numeric, size of individual datapoints

    - +

    Value

    a ggplot2 object

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +

    Examples

    library("DALEX") @@ -267,8 +272,11 @@

    Examp

    Contents

    diff --git a/docs/reference/plot.ceteris_paribus_explainer-1.png b/docs/reference/plot.ceteris_paribus_explainer-1.png index 4604ee7f..f52f0054 100644 Binary files a/docs/reference/plot.ceteris_paribus_explainer-1.png and b/docs/reference/plot.ceteris_paribus_explainer-1.png differ diff --git a/docs/reference/plot.ceteris_paribus_explainer-2.png b/docs/reference/plot.ceteris_paribus_explainer-2.png index ac574ef4..2de9d8a5 100644 Binary files a/docs/reference/plot.ceteris_paribus_explainer-2.png and b/docs/reference/plot.ceteris_paribus_explainer-2.png differ diff --git a/docs/reference/plot.ceteris_paribus_explainer-3.png b/docs/reference/plot.ceteris_paribus_explainer-3.png index e6317636..dcc058cf 100644 Binary files a/docs/reference/plot.ceteris_paribus_explainer-3.png and b/docs/reference/plot.ceteris_paribus_explainer-3.png differ diff --git a/docs/reference/plot.ceteris_paribus_explainer-4.png b/docs/reference/plot.ceteris_paribus_explainer-4.png index e55a6968..74d30858 100644 Binary files a/docs/reference/plot.ceteris_paribus_explainer-4.png and b/docs/reference/plot.ceteris_paribus_explainer-4.png differ diff --git a/docs/reference/plot.ceteris_paribus_explainer-5.png b/docs/reference/plot.ceteris_paribus_explainer-5.png index 59cc69d3..3043e691 100644 Binary files a/docs/reference/plot.ceteris_paribus_explainer-5.png and b/docs/reference/plot.ceteris_paribus_explainer-5.png differ diff --git a/docs/reference/plot.ceteris_paribus_explainer-6.png b/docs/reference/plot.ceteris_paribus_explainer-6.png index 194c2b71..b102442c 100644 Binary files a/docs/reference/plot.ceteris_paribus_explainer-6.png and b/docs/reference/plot.ceteris_paribus_explainer-6.png differ diff --git a/docs/reference/plot.ceteris_paribus_explainer.html b/docs/reference/plot.ceteris_paribus_explainer.html index f8041b5a..f302c8e2 100644 --- a/docs/reference/plot.ceteris_paribus_explainer.html +++ b/docs/reference/plot.ceteris_paribus_explainer.html @@ -36,15 +36,16 @@ + + - @@ -83,7 +84,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -144,9 +145,11 @@

    Plots Ceteris Paribus Profiles

    +

    Function plot.ceteris_paribus_explainer plots Individual Variable Profiles for selected observations. Various parameters help to decide what should be plotted, profiles, aggregated profiles, points or rugs.

    Find more detailes in Ceteris Paribus Chapter.

    +
    # S3 method for ceteris_paribus_explainer
    @@ -160,7 +163,7 @@ 

    Plots Ceteris Paribus Profiles

    facet_ncol = NULL, variables = NULL )
    - +

    Arguments

    @@ -198,13 +201,15 @@

    Arg

    if not NULL then only variables will be presented

    - +

    Value

    a ggplot2 object

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +

    Examples

    library("DALEX") @@ -305,8 +310,11 @@

    Examp

    Contents

    diff --git a/docs/reference/plot.ceteris_paribus_oscillations-1.png b/docs/reference/plot.ceteris_paribus_oscillations-1.png index 51348513..47fd4280 100644 Binary files a/docs/reference/plot.ceteris_paribus_oscillations-1.png and b/docs/reference/plot.ceteris_paribus_oscillations-1.png differ diff --git a/docs/reference/plot.ceteris_paribus_oscillations-2.png b/docs/reference/plot.ceteris_paribus_oscillations-2.png index d87bcabb..4290b067 100644 Binary files a/docs/reference/plot.ceteris_paribus_oscillations-2.png and b/docs/reference/plot.ceteris_paribus_oscillations-2.png differ diff --git a/docs/reference/plot.ceteris_paribus_oscillations.html b/docs/reference/plot.ceteris_paribus_oscillations.html index c867d1ab..1fd2b1b7 100644 --- a/docs/reference/plot.ceteris_paribus_oscillations.html +++ b/docs/reference/plot.ceteris_paribus_oscillations.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -143,13 +144,15 @@

    Plot Ceteris Paribus Oscillations

    +

    This function plots local variable importance plots calculated as oscillations in the Ceteris Paribus Profiles.

    +
    # S3 method for ceteris_paribus_oscillations
     plot(x, ..., bar_width = 10)
    - +

    Arguments

    @@ -166,13 +169,15 @@

    Arg

    width of bars. By default 10

    - +

    Value

    a ggplot2 object

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +

    Examples

    library("DALEX") @@ -217,8 +222,11 @@

    Examp

    Contents

    diff --git a/docs/reference/plot.feature_importance_explainer-1.png b/docs/reference/plot.feature_importance_explainer-1.png index f4745bbb..527ade42 100644 Binary files a/docs/reference/plot.feature_importance_explainer-1.png and b/docs/reference/plot.feature_importance_explainer-1.png differ diff --git a/docs/reference/plot.feature_importance_explainer-2.png b/docs/reference/plot.feature_importance_explainer-2.png index de0ab9d6..db1f7a66 100644 Binary files a/docs/reference/plot.feature_importance_explainer-2.png and b/docs/reference/plot.feature_importance_explainer-2.png differ diff --git a/docs/reference/plot.feature_importance_explainer-3.png b/docs/reference/plot.feature_importance_explainer-3.png index daee5ccd..ceb93fab 100644 Binary files a/docs/reference/plot.feature_importance_explainer-3.png and b/docs/reference/plot.feature_importance_explainer-3.png differ diff --git a/docs/reference/plot.feature_importance_explainer-4.png b/docs/reference/plot.feature_importance_explainer-4.png index b747dd15..e8c795c2 100644 Binary files a/docs/reference/plot.feature_importance_explainer-4.png and b/docs/reference/plot.feature_importance_explainer-4.png differ diff --git a/docs/reference/plot.feature_importance_explainer-5.png b/docs/reference/plot.feature_importance_explainer-5.png index bb7d5788..cd6f56eb 100644 Binary files a/docs/reference/plot.feature_importance_explainer-5.png and b/docs/reference/plot.feature_importance_explainer-5.png differ diff --git a/docs/reference/plot.feature_importance_explainer.html b/docs/reference/plot.feature_importance_explainer.html index 4d80c927..aaef5b78 100644 --- a/docs/reference/plot.feature_importance_explainer.html +++ b/docs/reference/plot.feature_importance_explainer.html @@ -36,6 +36,7 @@ + + - @@ -87,7 +88,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -148,6 +149,7 @@

    Plots Feature Importance

    +

    This function plots variable importance calculated as changes in the loss function after variable drops. It uses output from feature_importance function that corresponds to permutation based measure of variable importance. @@ -155,6 +157,7 @@

    Plots Feature Importance

    The order depends on the average drop out loss. In different panels variable contributions may not look like sorted if variable importance is different in different in different models.

    +
    # S3 method for feature_importance_explainer
    @@ -166,7 +169,7 @@ 

    Plots Feature Importance

    bar_width = 10, desc_sorting = TRUE )
    - +

    Arguments

    @@ -196,16 +199,19 @@

    Arg

    logical. Should the bars be sorted descending? By default TRUE

    - +

    Value

    a ggplot2 object

    +

    Details

    Find more details in the Feature Importance Chapter.

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +

    Examples

    library("DALEX") @@ -219,11 +225,11 @@

    Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1490412 , mean = 0.3221568 , max = 0.9878987 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.8898433 , mean = 4.198546e-13 , max = 0.8448637 +#> -> residuals : numerical, min = -0.8898433 , mean = 4.198219e-13 , max = 0.8448637 #> A new explainer has been created!

    fi_rf <- feature_importance(explain_titanic_glm) plot(fi_rf)
    @@ -264,11 +270,11 @@

    Examp #> -> model label : lm ( default ) #> -> data : 7847 rows 6 cols #> -> target variable : 7847 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.00861694 , mean = 0.3638333 , max = 0.7822214 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.7755901 , mean = -1.293796e-13 , max = 0.9820537 +#> -> residuals : numerical, min = -0.7755901 , mean = -1.294707e-13 , max = 0.9820537 #> A new explainer has been created!

    fi_glm <- feature_importance(explainer_glm, type = "raw", loss_function = loss_root_mean_square) @@ -298,17 +304,17 @@

    Examp #> -> predict function : yhat.default will be used ( default ) #> -> predicted values : numerical, min = 1.687903e-06 , mean = 0.363713 , max = 0.9996712 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.9885727 , mean = 0.0001203494 , max = 0.9970635 +#> -> residuals : numerical, min = -0.9885727 , mean = 0.0001203497 , max = 0.9970635 #> A new explainer has been created!

    fi_xgb <- feature_importance(explainer_xgb, type = "raw") head(fi_xgb)
    #> variable mean_dropout_loss label -#> 1 _full_model_ 98.22955 xgboost -#> 2 gendermale 98.22955 xgboost -#> 3 evaluation 114.79861 xgboost -#> 4 genderfemale 153.09930 xgboost -#> 5 age 158.15857 xgboost -#> 6 salary 171.38984 xgboost
    plot(fi_glm, fi_xgb, bar_width = 5)
    # } +#> 1 _full_model_ 98.46621 xgboost +#> 2 gendermale 98.46621 xgboost +#> 3 evaluation 114.56402 xgboost +#> 4 genderfemale 155.01631 xgboost +#> 5 age 156.90054 xgboost +#> 6 salary 170.13976 xgboost
    plot(fi_glm, fi_xgb, bar_width = 5)
    # }
    @@ -316,9 +322,13 @@

    Examp

    Contents

    diff --git a/docs/reference/plotD3_aggregated_profiles.html b/docs/reference/plotD3_aggregated_profiles.html index 3b4e8eb5..2febb91b 100644 --- a/docs/reference/plotD3_aggregated_profiles.html +++ b/docs/reference/plotD3_aggregated_profiles.html @@ -36,16 +36,17 @@ + + - @@ -84,7 +85,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -145,10 +146,12 @@

    Plots Aggregated Ceteris Paribus Profiles in D3 with r2d3 Package.

    +

    Function plotD3.aggregated_profiles_explainer plots an aggregate of ceteris paribus profiles. It works in a similar way to plotD3.ceteris_paribus_explainer but, instead of individual profiles, show average profiles for each variable listed in the variables vector.

    Find more detailes in Ceteris Paribus Chapter.

    +
    # S3 method for aggregated_profiles_explainer
    @@ -164,7 +167,7 @@ 

    Plots Aggregated Ceteris Paribus Profiles in D3 with r2d3 Package.

    chart_title = "Aggregated Profiles", label_margin = 60 )
    - +

    Arguments

    @@ -209,13 +212,15 @@

    Arg

    a numeric. Set width of label margins in categorical type

    - +

    Value

    a r2d3 object.

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +

    Examples

    library("DALEX") @@ -260,8 +265,11 @@

    Examp

    Contents

    diff --git a/docs/reference/plotD3_ceteris_paribus.html b/docs/reference/plotD3_ceteris_paribus.html index 7335d4ed..e8a434f2 100644 --- a/docs/reference/plotD3_ceteris_paribus.html +++ b/docs/reference/plotD3_ceteris_paribus.html @@ -36,16 +36,17 @@ + + - @@ -84,7 +85,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -145,10 +146,12 @@

    Plots Ceteris Paribus Profiles in D3 with r2d3 Package.

    +

    Function plotD3.ceteris_paribus_explainer plots Individual Variable Profiles for selected observations. It uses output from ceteris_paribus function. Various parameters help to decide what should be plotted, profiles, aggregated profiles, points or rugs.

    Find more detailes in Ceteris Paribus Chapter.

    +
    plotD3(x, ...)
    @@ -169,7 +172,7 @@ 

    Plots Ceteris Paribus Profiles in D3 with r2d3 Package.

    show_observations = TRUE, show_rugs = TRUE )
    - +

    Arguments

    @@ -227,10 +230,11 @@

    Arg

    a logical. Adds rugs layer to a plot. By default it's TRUE

    - +

    Value

    a r2d3 object.

    +

    Examples

    library("DALEX") @@ -266,7 +270,9 @@

    Examp

    Contents

    diff --git a/docs/reference/plotD3_feature_importance.html b/docs/reference/plotD3_feature_importance.html index 6026b5b8..c3fe42c6 100644 --- a/docs/reference/plotD3_feature_importance.html +++ b/docs/reference/plotD3_feature_importance.html @@ -36,16 +36,17 @@ + + - @@ -84,7 +85,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -145,10 +146,12 @@

    Plot Feature Importance Objects in D3 with r2d3 Package.

    +

    Function plotD3.feature_importance_explainer plots dropouts for variables used in the model. It uses output from feature_importance function that corresponds to permutation based measure of feature importance. Variables are sorted in the same order in all panels. The order depends on the average drop out loss. In different panels variable contributions may not look like sorted if variable importance is different in different models.

    +
    # S3 method for feature_importance_explainer
    @@ -163,7 +166,7 @@ 

    Plot Feature Importance Objects in D3 with r2d3 Package.

    margin = 0.15, chart_title = "Feature importance" )
    - +

    Arguments

    @@ -206,10 +209,11 @@

    Arg

    a character. Set custom title

    - +

    Value

    a r2d3 object.

    +

    Examples

    library("DALEX") @@ -230,7 +234,7 @@

    Examp #> 5 surface 614.5519 lm #> 6 district 989.8451 lm

    plotD3(fi_lm) -# \dontrun{ +if (FALSE) { library("randomForest") rf_model <- randomForest(m2.price~., data = apartments) @@ -242,26 +246,23 @@

    Examp fi_rf <- feature_importance(explainer_rf, loss_function = loss_root_mean_square) -head(fi_rf)

    #> variable mean_dropout_loss label -#> 1 _full_model_ 198.5890 rf -#> 2 construction.year 366.4297 rf -#> 3 no.rooms 371.6181 rf -#> 4 floor 412.9942 rf -#> 5 surface 456.1096 rf -#> 6 district 830.3744 rf
    plotD3(fi_lm, fi_rf) +head(fi_rf) +plotD3(fi_lm, fi_rf) plotD3(fi_lm, fi_rf, split = "feature") plotD3(fi_lm, fi_rf, max_vars = 3, bar_width = 16, scale_height = TRUE) plotD3(fi_lm, fi_rf, max_vars = 3, bar_width = 16, split = "feature", scale_height = TRUE) plotD3(fi_lm, margin = 0.2) -# }
    +}
    @@ -142,12 +143,14 @@

    Prints Aggregated Profiles

    +

    Prints Aggregated Profiles

    +
    # S3 method for aggregated_profiles_explainer
     print(x, ...)
    - +

    Arguments

    @@ -160,7 +163,7 @@

    Arg

    other arguments that will be passed to head()

    - +

    Examples

    library("DALEX") @@ -174,11 +177,11 @@

    Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1490412 , mean = 0.3221568 , max = 0.9878987 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.8898433 , mean = 4.198546e-13 , max = 0.8448637 +#> -> residuals : numerical, min = -0.8898433 , mean = 4.198219e-13 , max = 0.8448637 #> A new explainer has been created!

    selected_passangers <- select_sample(titanic_imputed, n = 100) cp_rf <- ceteris_paribus(explain_titanic_glm, selected_passangers) @@ -282,6 +285,7 @@

    Examp

    Contents

    diff --git a/docs/reference/print.ceteris_paribus_explainer.html b/docs/reference/print.ceteris_paribus_explainer.html index b8874138..d8dcf381 100644 --- a/docs/reference/print.ceteris_paribus_explainer.html +++ b/docs/reference/print.ceteris_paribus_explainer.html @@ -36,13 +36,14 @@ + + - @@ -81,7 +82,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -142,12 +143,14 @@

    Prints Individual Variable Explainer Summary

    +

    Prints Individual Variable Explainer Summary

    +
    # S3 method for ceteris_paribus_explainer
     print(x, ...)
    - +

    Arguments

    @@ -160,7 +163,7 @@

    Arg

    other arguments that will be passed to head()

    - +

    Examples

    library("DALEX") @@ -221,6 +224,7 @@

    Examp

    Contents

    diff --git a/docs/reference/print.feature_importance_explainer.html b/docs/reference/print.feature_importance_explainer.html index 227c3c4c..e82baa9f 100644 --- a/docs/reference/print.feature_importance_explainer.html +++ b/docs/reference/print.feature_importance_explainer.html @@ -36,13 +36,14 @@ + + - @@ -81,7 +82,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -142,12 +143,14 @@

    Print Generic for Feature Importance Object

    +

    Print Generic for Feature Importance Object

    +
    # S3 method for feature_importance_explainer
     print(x, ...)
    - +

    Arguments

    @@ -160,13 +163,15 @@

    Arg

    other parameters.

    - +

    Value

    a data frame.

    +

    References

    Explanatory Model Analysis. Explore, Explain and Examine Predictive Models. https://pbiecek.github.io/ema

    +

    Examples

    library("DALEX") @@ -181,11 +186,11 @@

    Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1490412 , mean = 0.3221568 , max = 0.9878987 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.8898433 , mean = 4.198546e-13 , max = 0.8448637 +#> -> residuals : numerical, min = -0.8898433 , mean = 4.198219e-13 , max = 0.8448637 #> A new explainer has been created!

    fi_glm <- feature_importance(explain_titanic_glm) @@ -206,8 +211,11 @@

    Examp

    Contents

    diff --git a/docs/reference/select_neighbours.html b/docs/reference/select_neighbours.html index 11e4dca2..e613a9ef 100644 --- a/docs/reference/select_neighbours.html +++ b/docs/reference/select_neighbours.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -143,8 +144,10 @@

    Select Subset of Rows Closest to a Specified Observation

    +

    Function select_neighbours selects subset of rows from data set. This is useful if data is large and we need just a sample to calculate profiles.

    +
    select_neighbours(
    @@ -155,7 +158,7 @@ 

    Select Subset of Rows Closest to a Specified Observation

    n = 20, frac = NULL )
    - +

    Arguments

    @@ -186,15 +189,17 @@

    Arg Either n or frac need to be specified.

    - +

    Value

    a data frame with selected rows

    +

    Details

    Note that select_neighbours() function is S3 generic. If you want to work on non standard data sources (like H2O ddf, external databases) you should overload it.

    +

    Examples

    library("DALEX") @@ -220,8 +225,11 @@

    Examp

    Contents

    diff --git a/docs/reference/select_sample.html b/docs/reference/select_sample.html index 62f25836..8599c1ec 100644 --- a/docs/reference/select_sample.html +++ b/docs/reference/select_sample.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -143,12 +144,14 @@

    Select Subset of Rows

    +

    Function select_sample selects subset of rows from data set. This is useful if data is large and we need just a sample to calculate profiles.

    +
    select_sample(data, n = 100, seed = 1313)
    - +

    Arguments

    @@ -165,15 +168,17 @@

    Arg

    seed for random number generator.

    - +

    Value

    a data frame with selected rows

    +

    Details

    Note that select_subsample() function is S3 generic. If you want to work on non standard data sources (like H2O ddf, external databases) you should overload it.

    +

    Examples

    library("DALEX") @@ -192,8 +197,11 @@

    Examp

    Contents

    diff --git a/docs/reference/show_aggregated_profiles-1.png b/docs/reference/show_aggregated_profiles-1.png index 84ab7f19..a7461620 100644 Binary files a/docs/reference/show_aggregated_profiles-1.png and b/docs/reference/show_aggregated_profiles-1.png differ diff --git a/docs/reference/show_aggregated_profiles-2.png b/docs/reference/show_aggregated_profiles-2.png index cfd96a7a..cf2c7ea3 100644 Binary files a/docs/reference/show_aggregated_profiles-2.png and b/docs/reference/show_aggregated_profiles-2.png differ diff --git a/docs/reference/show_aggregated_profiles.html b/docs/reference/show_aggregated_profiles.html index ebe2320a..98e1110f 100644 --- a/docs/reference/show_aggregated_profiles.html +++ b/docs/reference/show_aggregated_profiles.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -143,8 +144,10 @@

    Adds a Layer with Aggregated Profiles

    +

    Function show_aggregated_profiles adds a layer to a plot created with plot.ceteris_paribus_explainer.

    +
    show_aggregated_profiles(
    @@ -155,7 +158,7 @@ 

    Adds a Layer with Aggregated Profiles

    color = "#371ea3", variables = NULL )
    - +

    Arguments

    @@ -184,10 +187,11 @@

    Arg

    if not NULL then only variables will be presented

    - +

    Value

    a ggplot2 layer

    +

    Examples

    library("DALEX") @@ -203,11 +207,11 @@

    Examp #> -> model label : lm ( default ) #> -> data : 2207 rows 7 cols #> -> target variable : 2207 values -#> -> model_info : package stats , ver. 3.6.1 , task regression ( default ) +#> -> model_info : package stats , ver. 3.6.0 , task regression ( default ) #> -> predict function : yhat.glm will be used ( default ) #> -> predicted values : numerical, min = 0.1490412 , mean = 0.3221568 , max = 0.9878987 #> -> residual function : difference between y and yhat ( default ) -#> -> residuals : numerical, min = -0.8898433 , mean = 4.198546e-13 , max = 0.8448637 +#> -> residuals : numerical, min = -0.8898433 , mean = 4.198219e-13 , max = 0.8448637 #> A new explainer has been created!

    cp_rf <- ceteris_paribus(explain_titanic_glm, selected_passangers) @@ -278,7 +282,9 @@

    Examp

    Contents

    diff --git a/docs/reference/show_observations-1.png b/docs/reference/show_observations-1.png index 749545fb..e532ab79 100644 Binary files a/docs/reference/show_observations-1.png and b/docs/reference/show_observations-1.png differ diff --git a/docs/reference/show_observations.html b/docs/reference/show_observations.html index 8ba7209b..1c58180d 100644 --- a/docs/reference/show_observations.html +++ b/docs/reference/show_observations.html @@ -36,15 +36,16 @@ + + - @@ -83,7 +84,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -144,9 +145,11 @@

    Adds a Layer with Observations to a Profile Plot

    +

    Function show_observations adds a layer to a plot created with plot.ceteris_paribus_explainer for selected observations. Various parameters help to decide what should be plotted, profiles, aggregated profiles, points or rugs.

    +
    show_observations(
    @@ -158,7 +161,7 @@ 

    Adds a Layer with Observations to a Profile Plot

    variable_type = "numerical", variables = NULL )
    - +

    Arguments

    @@ -192,10 +195,11 @@

    Arg

    if not NULL then only variables will be presented

    - +

    Value

    a ggplot2 layer

    +

    Examples

    library("DALEX") @@ -252,7 +256,9 @@

    Examp

    Contents

    diff --git a/docs/reference/show_profiles-1.png b/docs/reference/show_profiles-1.png index 68f2dffb..f340fe2e 100644 Binary files a/docs/reference/show_profiles-1.png and b/docs/reference/show_profiles-1.png differ diff --git a/docs/reference/show_profiles-2.png b/docs/reference/show_profiles-2.png index 7a09e189..a2e8b187 100644 Binary files a/docs/reference/show_profiles-2.png and b/docs/reference/show_profiles-2.png differ diff --git a/docs/reference/show_profiles.html b/docs/reference/show_profiles.html index 9b73639c..719ef6fd 100644 --- a/docs/reference/show_profiles.html +++ b/docs/reference/show_profiles.html @@ -36,14 +36,15 @@ + + - @@ -82,7 +83,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -143,8 +144,10 @@

    Adds a Layer with Profiles

    +

    Function show_profiles adds a layer to a plot created with plot.ceteris_paribus_explainer.

    +
    show_profiles(
    @@ -155,7 +158,7 @@ 

    Adds a Layer with Profiles

    color = "#371ea3", variables = NULL )
    - +

    Arguments

    @@ -184,10 +187,11 @@

    Arg

    if not NULL then only variables will be presented

    - +

    Value

    a ggplot2 layer

    +

    Examples

    library("DALEX") @@ -271,7 +275,9 @@

    Examp

    Contents

    diff --git a/docs/reference/show_residuals-1.png b/docs/reference/show_residuals-1.png index 57c76f00..1f47745c 100644 Binary files a/docs/reference/show_residuals-1.png and b/docs/reference/show_residuals-1.png differ diff --git a/docs/reference/show_residuals-2.png b/docs/reference/show_residuals-2.png index 8df717a7..bcf95537 100644 Binary files a/docs/reference/show_residuals-2.png and b/docs/reference/show_residuals-2.png differ diff --git a/docs/reference/show_residuals.html b/docs/reference/show_residuals.html index ab408d89..f0d64c3b 100644 --- a/docs/reference/show_residuals.html +++ b/docs/reference/show_residuals.html @@ -36,15 +36,16 @@ + + - @@ -83,7 +84,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -144,9 +145,11 @@

    Adds a Layer with Residuals to a Profile Plot

    +

    Function show_residuals adds a layer to a plot created with plot.ceteris_paribus_explainer for selected observations. Note that the y argument has to be specified in the ceteris_paribus function.

    +
    show_residuals(
    @@ -154,10 +157,10 @@ 

    Adds a Layer with Residuals to a Profile Plot

    ..., size = 0.75, alpha = 1, - color = c(`TRUE` = "#371ea3", `FALSE` = "#f05a71"), + color = c(`TRUE` = "#8bdcbe", `FALSE` = "#f05a71"), variables = NULL )
    - +

    Arguments

    @@ -187,10 +190,11 @@

    Arg

    if not NULL then only variables will be presented

    - +

    Value

    a ggplot2 layer

    +

    Examples

    library("DALEX") @@ -250,7 +254,9 @@

    Examp

    Contents

    diff --git a/docs/reference/show_rugs-1.png b/docs/reference/show_rugs-1.png index 4154de15..f922d944 100644 Binary files a/docs/reference/show_rugs-1.png and b/docs/reference/show_rugs-1.png differ diff --git a/docs/reference/show_rugs.html b/docs/reference/show_rugs.html index 0970fa7f..cecfecf0 100644 --- a/docs/reference/show_rugs.html +++ b/docs/reference/show_rugs.html @@ -36,15 +36,16 @@ + + - @@ -83,7 +84,7 @@
    part of the DrWhy.AI developed by the MI^2 DataLab - 0.5.2 + 1.0
    @@ -144,9 +145,11 @@

    Adds a Layer with Rugs to a Profile Plot

    +

    Function show_rugs adds a layer to a plot created with plot.ceteris_paribus_explainer for selected observations. Various parameters help to decide what should be plotted, profiles, aggregated profiles, points or rugs.

    +
    show_rugs(
    @@ -159,7 +162,7 @@ 

    Adds a Layer with Rugs to a Profile Plot

    sides = "b", variables = NULL )
    - +

    Arguments

    @@ -197,10 +200,11 @@

    Arg

    if not NULL then only variables will be presented

    - +

    Value

    a ggplot2 layer

    +

    Examples

    library("DALEX") @@ -254,7 +258,9 @@

    Examp

    Contents

    diff --git a/man/accumulated_dependency.Rd b/man/accumulated_dependence.Rd similarity index 81% rename from man/accumulated_dependency.Rd rename to man/accumulated_dependence.Rd index 6c2a3ef6..857c7d04 100644 --- a/man/accumulated_dependency.Rd +++ b/man/accumulated_dependence.Rd @@ -1,15 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/accumulated_dependency.R -\name{accumulated_dependency} +% Please edit documentation in R/accumulated_dependence.R +\name{accumulated_dependence} +\alias{accumulated_dependence} +\alias{accumulated_dependence.explainer} +\alias{accumulated_dependence.default} +\alias{accumulated_dependence.ceteris_paribus_explainer} \alias{accumulated_dependency} -\alias{accumulated_dependency.explainer} -\alias{accumulated_dependency.default} -\alias{accumulated_dependency.ceteris_paribus_explainer} \title{Accumulated Local Effects Profiles aka ALEPlots} \usage{ -accumulated_dependency(x, ...) +accumulated_dependence(x, ...) -\method{accumulated_dependency}{explainer}( +\method{accumulated_dependence}{explainer}( x, variables = NULL, N = 500, @@ -19,7 +20,7 @@ accumulated_dependency(x, ...) variable_type = "numerical" ) -\method{accumulated_dependency}{default}( +\method{accumulated_dependence}{default}( x, data, predict_function = predict, @@ -32,7 +33,9 @@ accumulated_dependency(x, ...) variable_type = "numerical" ) -\method{accumulated_dependency}{ceteris_paribus_explainer}(x, ..., variables = NULL) +\method{accumulated_dependence}{ceteris_paribus_explainer}(x, ..., variables = NULL) + +accumulated_dependency(x, ...) } \arguments{ \item{x}{an explainer created with function \code{DALEX::explain()}, an object of the class \code{ceteris_paribus_explainer} @@ -44,7 +47,7 @@ or a model to be explained.} Will be passed to \code{\link{calculate_variable_split}}. If \code{NULL} then all variables from the validation data will be used.} -\item{N}{number of observations used for calculation of partial dependency profiles. +\item{N}{number of observations used for calculation of partial dependence profiles. By default, 500 observations will be chosen randomly.} \item{variable_splits}{named list of splits for variables, in most cases created with \code{\link{calculate_variable_split}}. @@ -67,10 +70,10 @@ an object of the class \code{aggregated_profiles_explainer} } \description{ Accumulated Local Effects Profiles accumulate local changes in Ceteris Paribus Profiles. -Function \code{\link{accumulated_dependency}} calls \code{\link{ceteris_paribus}} and then \code{\link{aggregate_profiles}}. +Function \code{\link{accumulated_dependence}} calls \code{\link{ceteris_paribus}} and then \code{\link{aggregate_profiles}}. } \details{ -Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependency Chapter}. +Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependence Chapter}. } \examples{ library("DALEX") @@ -83,7 +86,7 @@ explain_titanic_glm <- explain(model_titanic_glm, y = titanic_imputed[,8], verbose = FALSE) -adp_glm <- accumulated_dependency(explain_titanic_glm, +adp_glm <- accumulated_dependence(explain_titanic_glm, N = 150, variables = c("age", "fare")) head(adp_glm) plot(adp_glm) @@ -98,10 +101,10 @@ explain_titanic_rf <- explain(model_titanic_rf, y = titanic_imputed[,8], verbose = FALSE) -adp_rf <- accumulated_dependency(explain_titanic_rf, N = 200, variable_type = "numerical") +adp_rf <- accumulated_dependence(explain_titanic_rf, N = 200, variable_type = "numerical") plot(adp_rf) -adp_rf <- accumulated_dependency(explain_titanic_rf, N = 200, variable_type = "categorical") +adp_rf <- accumulated_dependence(explain_titanic_rf, N = 200, variable_type = "categorical") plotD3(adp_rf, label_margin = 80, scale_plot = TRUE) } diff --git a/man/aggregate_profiles.Rd b/man/aggregate_profiles.Rd index b9b96862..becb59de 100644 --- a/man/aggregate_profiles.Rd +++ b/man/aggregate_profiles.Rd @@ -39,9 +39,9 @@ an object of the class \code{aggregated_profiles_explainer} } \description{ The function \code{aggregate_profiles()} calculates an aggregate of ceteris paribus profiles. -It can be: Partial Dependency Profile (average across Ceteris Paribus Profiles), -Conditional Dependency Profile (local weighted average across Ceteris Paribus Profiles) or -Accumulated Local Dependency Profile (cummulated average local changes in Ceteris Paribus Profiles). +It can be: Partial Dependence Profile (average across Ceteris Paribus Profiles), +Conditional Dependence Profile (local weighted average across Ceteris Paribus Profiles) or +Accumulated Local Dependence Profile (cummulated average local changes in Ceteris Paribus Profiles). } \examples{ library("DALEX") diff --git a/man/conditional_dependency.Rd b/man/conditional_dependence.Rd similarity index 77% rename from man/conditional_dependency.Rd rename to man/conditional_dependence.Rd index 6d577ed5..913938ef 100644 --- a/man/conditional_dependency.Rd +++ b/man/conditional_dependence.Rd @@ -1,16 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/conditional_dependency.R -\name{conditional_dependency} -\alias{conditional_dependency} -\alias{conditional_dependency.explainer} -\alias{conditional_dependency.default} -\alias{conditional_dependency.ceteris_paribus_explainer} +% Please edit documentation in R/conditional_dependence.R +\name{conditional_dependence} +\alias{conditional_dependence} +\alias{conditional_dependence.explainer} +\alias{conditional_dependence.default} +\alias{conditional_dependence.ceteris_paribus_explainer} \alias{local_dependency} -\title{Conditional Dependency Profiles} +\alias{conditional_dependency} +\title{Conditional Dependence Profiles} \usage{ -conditional_dependency(x, ...) +conditional_dependence(x, ...) -\method{conditional_dependency}{explainer}( +\method{conditional_dependence}{explainer}( x, variables = NULL, N = 500, @@ -20,7 +21,7 @@ conditional_dependency(x, ...) variable_type = "numerical" ) -\method{conditional_dependency}{default}( +\method{conditional_dependence}{default}( x, data, predict_function = predict, @@ -33,9 +34,11 @@ conditional_dependency(x, ...) variable_type = "numerical" ) -\method{conditional_dependency}{ceteris_paribus_explainer}(x, ..., variables = NULL) +\method{conditional_dependence}{ceteris_paribus_explainer}(x, ..., variables = NULL) local_dependency(x, ...) + +conditional_dependency(x, ...) } \arguments{ \item{x}{an explainer created with function \code{DALEX::explain()}, an object of the class \code{ceteris_paribus_explainer} @@ -46,7 +49,7 @@ or a model to be explained.} \item{variables}{names of variables for which profiles shall be calculated. Will be passed to \code{\link{calculate_variable_split}}. If \code{NULL} then all variables from the validation data will be used.} -\item{N}{number of observations used for calculation of partial dependency profiles. By default 500.} +\item{N}{number of observations used for calculation of partial dependence profiles. By default 500.} \item{variable_splits}{named list of splits for variables, in most cases created with \code{\link{calculate_variable_split}}. If \code{NULL} then it will be calculated based on validation data avaliable in the \code{explainer}.} @@ -67,11 +70,11 @@ NOTE: It is best when target variable is not present in the \code{data}} an object of the class \code{aggregated_profile_explainer} } \description{ -Conditional Dependency Profiles (aka Local Profiles) average localy Ceteris Paribus Profiles. -Function 'conditional_dependency' calls 'ceteris_paribus' and then 'aggregate_profiles'. +Conditional Dependence Profiles (aka Local Profiles) average localy Ceteris Paribus Profiles. +Function 'conditional_dependence' calls 'ceteris_paribus' and then 'aggregate_profiles'. } \details{ -Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependency Chapter}. +Find more detailes in the \href{https://pbiecek.github.io/ema/accumulatedLocalProfiles.html}{Accumulated Local Dependence Chapter}. } \examples{ library("DALEX") @@ -84,7 +87,7 @@ explain_titanic_glm <- explain(model_titanic_glm, y = titanic_imputed[,8], verbose = FALSE) -cdp_glm <- conditional_dependency(explain_titanic_glm, +cdp_glm <- conditional_dependence(explain_titanic_glm, N = 150, variables = c("age", "fare")) head(cdp_glm) plot(cdp_glm) @@ -99,10 +102,10 @@ explain_titanic_rf <- explain(model_titanic_rf, y = titanic_imputed[,8], verbose = FALSE) -cdp_rf <- conditional_dependency(explain_titanic_rf, N = 200, variable_type = "numerical") +cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "numerical") plot(cdp_rf) -cdp_rf <- conditional_dependency(explain_titanic_rf, N = 200, variable_type = "categorical") +cdp_rf <- conditional_dependence(explain_titanic_rf, N = 200, variable_type = "categorical") plotD3(cdp_rf, label_margin = 80, scale_plot = TRUE) } diff --git a/man/describe.Rd b/man/describe.Rd index bc400835..5abf0141 100644 --- a/man/describe.Rd +++ b/man/describe.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/describe_aggregated_profiles.R, % R/describe_ceteris_paribus.R, R/describe_feature_importance.R -\name{describe.partial_dependency_explainer} -\alias{describe.partial_dependency_explainer} +\name{describe.partial_dependence_explainer} +\alias{describe.partial_dependence_explainer} \alias{describe} \alias{describe.ceteris_paribus_explainer} \alias{describe.feature_importance_explainer} \title{Natural language description of feature importance explainer} \usage{ -\method{describe}{partial_dependency_explainer}( +\method{describe}{partial_dependence_explainer}( x, nonsignificance_treshold = 0.15, ..., diff --git a/man/partial_dependency.Rd b/man/partial_dependence.Rd similarity index 79% rename from man/partial_dependency.Rd rename to man/partial_dependence.Rd index df6359aa..f1d96a4f 100644 --- a/man/partial_dependency.Rd +++ b/man/partial_dependence.Rd @@ -1,15 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/partial_dependency.R -\name{partial_dependency} +% Please edit documentation in R/partial_dependence.R +\name{partial_dependence} +\alias{partial_dependence} +\alias{partial_dependence.explainer} +\alias{partial_dependence.default} +\alias{partial_dependence.ceteris_paribus_explainer} \alias{partial_dependency} -\alias{partial_dependency.explainer} -\alias{partial_dependency.default} -\alias{partial_dependency.ceteris_paribus_explainer} -\title{Partial Dependency Profiles} +\title{Partial Dependence Profiles} \usage{ -partial_dependency(x, ...) +partial_dependence(x, ...) -\method{partial_dependency}{explainer}( +\method{partial_dependence}{explainer}( x, variables = NULL, N = 500, @@ -19,7 +20,7 @@ partial_dependency(x, ...) variable_type = "numerical" ) -\method{partial_dependency}{default}( +\method{partial_dependence}{default}( x, data, predict_function = predict, @@ -32,7 +33,9 @@ partial_dependency(x, ...) variable_type = "numerical" ) -\method{partial_dependency}{ceteris_paribus_explainer}(x, ..., variables = NULL) +\method{partial_dependence}{ceteris_paribus_explainer}(x, ..., variables = NULL) + +partial_dependency(x, ...) } \arguments{ \item{x}{an explainer created with function \code{DALEX::explain()}, an object of the class \code{ceteris_paribus_explainer} or @@ -44,7 +47,7 @@ or a model to be explained.} Will be passed to \code{\link{calculate_variable_split}}. If \code{NULL} then all variables from the validation data will be used.} -\item{N}{number of observations used for calculation of partial dependency profiles. By default 500.} +\item{N}{number of observations used for calculation of partial dependence profiles. By default 500.} \item{variable_splits}{named list of splits for variables, in most cases created with \code{\link{calculate_variable_split}}. If \code{NULL} then it will be calculated based on validation data avaliable in the \code{explainer}.} @@ -65,8 +68,8 @@ NOTE: It is best when target variable is not present in the \code{data}} an object of the class \code{aggregated_profiles_explainer} } \description{ -Partial Dependency Profiles are averages from Ceteris Paribus Profiles. -Function \code{partial_dependency} calls \code{ceteris_paribus} and then \code{aggregate_profiles}. +Partial Dependence Profiles are averages from Ceteris Paribus Profiles. +Function \code{partial_dependence} calls \code{ceteris_paribus} and then \code{aggregate_profiles}. } \details{ Find more detailes in the \href{https://pbiecek.github.io/ema/partialDependenceProfiles.html}{Partial Dependence Profiles Chapter}. @@ -82,7 +85,7 @@ explain_titanic_glm <- explain(model_titanic_glm, y = titanic_imputed[,8], verbose = FALSE) -pdp_glm <- partial_dependency(explain_titanic_glm, +pdp_glm <- partial_dependence(explain_titanic_glm, N = 50, variables = c("age", "fare")) head(pdp_glm) plot(pdp_glm) @@ -97,10 +100,10 @@ explain_titanic_rf <- explain(model_titanic_rf, y = titanic_imputed[,8], verbose = FALSE) -pdp_rf <- partial_dependency(explain_titanic_rf, variable_type = "numerical") +pdp_rf <- partial_dependence(explain_titanic_rf, variable_type = "numerical") plot(pdp_rf) -pdp_rf <- partial_dependency(explain_titanic_rf, variable_type = "categorical") +pdp_rf <- partial_dependence(explain_titanic_rf, variable_type = "categorical") plotD3(pdp_rf, label_margin = 80, scale_plot = TRUE) } diff --git a/man/plot.aggregated_profiles_explainer.Rd b/man/plot.aggregated_profiles_explainer.Rd index ae828e3b..b3618e21 100644 --- a/man/plot.aggregated_profiles_explainer.Rd +++ b/man/plot.aggregated_profiles_explainer.Rd @@ -33,7 +33,7 @@ a \code{ggplot2} object } \description{ -Function \code{plot.aggregated_profiles_explainer} plots partial dependency plot or accumulated effect plot. +Function \code{plot.aggregated_profiles_explainer} plots partial dependence plot or accumulated effect plot. It works in a similar way to \code{plot.ceteris_paribus}, but instead of individual profiles show average profiles for each variable listed in the \code{variables} vector. } @@ -48,11 +48,11 @@ explain_titanic_glm <- explain(model_titanic_glm, y = titanic_imputed[,8], verbose = FALSE) -pdp_rf_p <- partial_dependency(explain_titanic_glm, N = 50) +pdp_rf_p <- partial_dependence(explain_titanic_glm, N = 50) pdp_rf_p$`_label_` <- "RF_partial" -pdp_rf_l <- conditional_dependency(explain_titanic_glm, N = 50) +pdp_rf_l <- conditional_dependence(explain_titanic_glm, N = 50) pdp_rf_l$`_label_` <- "RF_local" -pdp_rf_a<- accumulated_dependency(explain_titanic_glm, N = 50) +pdp_rf_a<- accumulated_dependence(explain_titanic_glm, N = 50) pdp_rf_a$`_label_` <- "RF_accumulated" head(pdp_rf_p) plot(pdp_rf_p, pdp_rf_l, pdp_rf_a, color = "_label_") diff --git a/tests/testthat/test_aggregated_profiles.R b/tests/testthat/test_aggregated_profiles.R index d29c8214..5ed5f5be 100644 --- a/tests/testthat/test_aggregated_profiles.R +++ b/tests/testthat/test_aggregated_profiles.R @@ -28,11 +28,11 @@ test_that("plot aggregate_profiles",{ expect_true("gg" %in% class(pl1)) - pdp_rf_p <- partial_dependency(explainer_rf, variables = "age") + pdp_rf_p <- partial_dependence(explainer_rf, variables = "age") pdp_rf_p$`_label_` <- "RF_partial" - pdp_rf_c <- conditional_dependency(explainer_rf, variables = "age") + pdp_rf_c <- conditional_dependence(explainer_rf, variables = "age") pdp_rf_c$`_label_` <- "RF_conditional" - pdp_rf_a <- accumulated_dependency(explainer_rf, variables = "age") + pdp_rf_a <- accumulated_dependence(explainer_rf, variables = "age") pdp_rf_a$`_label_` <- "RF_accumulated" pl2 <- plot(pdp_rf_p, pdp_rf_c, pdp_rf_a, color = "_label_") @@ -42,7 +42,7 @@ test_that("plot aggregate_profiles",{ }) -test_that("plot partial_dependency",{ +test_that("plot partial_dependence",{ library("DALEX") library("randomForest") titanic <- na.omit(titanic) @@ -56,7 +56,7 @@ test_that("plot partial_dependency",{ selected_passangers <- select_sample(titanic, n = 100) cp_rf <- ceteris_paribus(explain_titanic_rf, selected_passangers) - res <- partial_dependency(explain_titanic_rf, N=50, variables = "gender", variable_type = "categorical") + res <- partial_dependence(explain_titanic_rf, N=50, variables = "gender", variable_type = "categorical") expect_true("aggregated_profiles_explainer" %in% class(res)) }) diff --git a/vignettes/vignette_describe.Rmd b/vignettes/vignette_describe.Rmd index d1c85373..d614cf0a 100644 --- a/vignettes/vignette_describe.Rmd +++ b/vignettes/vignette_describe.Rmd @@ -114,9 +114,9 @@ plot(cp_rf, variables = perturbed_variable_continuous) describe(cp_rf, variables = perturbed_variable_continuous) ``` -Ceteris Paribus profiles are described only for a single observation. If we want to access the influence of more than one observation, we need to describe dependency profiles. +Ceteris Paribus profiles are described only for a single observation. If we want to access the influence of more than one observation, we need to describe dependence profiles. -## Partial Dependency Profiles +## Partial Dependence Profiles ```{r} pdp <- aggregate_profiles(cp_rf, type = "partial") diff --git a/vignettes/vignette_simulated.Rmd b/vignettes/vignette_simulated.Rmd index f8e81c2d..a757416e 100644 --- a/vignettes/vignette_simulated.Rmd +++ b/vignettes/vignette_simulated.Rmd @@ -74,33 +74,33 @@ plot(cp_model) + ``` -# Dependency profiles +# Dependence profiles -Lets try Partial Dependency profiles, Conditional Dependency profiles and Accumulated Local profiles. For the last two we can try different smoothing factors +Lets try Partial Dependence profiles, Conditional Dependence profiles and Accumulated Local profiles. For the last two we can try different smoothing factors ```{r} -pd_model <- partial_dependency(explain_the_model, variables = c("x1", "x2")) +pd_model <- partial_dependence(explain_the_model, variables = c("x1", "x2")) pd_model$`_label_` = "PDP" -cd_model <- conditional_dependency(explain_the_model, variables = c("x1", "x2")) +cd_model <- conditional_dependence(explain_the_model, variables = c("x1", "x2")) cd_model$`_label_` = "CDP 0.25" -ad_model <- accumulated_dependency(explain_the_model, variables = c("x1", "x2")) +ad_model <- accumulated_dependence(explain_the_model, variables = c("x1", "x2")) ad_model$`_label_` = "ALE 0.25" plot(ad_model, cd_model, pd_model) + ggtitle("Feature effects - PDP, CDP, ALE") -cd_model_1 <- conditional_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.1) +cd_model_1 <- conditional_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.1) cd_model_1$`_label_` = "CDP 0.1" -cd_model_5 <- conditional_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.5) +cd_model_5 <- conditional_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.5) cd_model_5$`_label_` = "CDP 0.5" -ad_model_1 <- accumulated_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.5) +ad_model_1 <- accumulated_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.5) ad_model_1$`_label_` = "ALE 0.1" -ad_model_5 <- accumulated_dependency(explain_the_model, variables = c("x1", "x2"), span = 0.5) +ad_model_5 <- accumulated_dependence(explain_the_model, variables = c("x1", "x2"), span = 0.5) ad_model_5$`_label_` = "ALE 0.5" plot(ad_model, cd_model, pd_model, cd_model_1, cd_model_5, ad_model_1, ad_model_5) + @@ -108,7 +108,7 @@ plot(ad_model, cd_model, pd_model, cd_model_1, cd_model_5, ad_model_1, ad_model_ ``` -# Dependency profiles in groups +# Dependence profiles in groups And now, let's see how the grouping factor works @@ -119,14 +119,14 @@ df$x3 <- factor(sign(df$x2)) explain_the_model$data = df # PDP in groups -pd_model_groups <- partial_dependency(explain_the_model, +pd_model_groups <- partial_dependence(explain_the_model, variables = c("x1", "x2"), groups = "x3") plot(pd_model_groups) + - ggtitle("Partial Dependency") + ggtitle("Partial Dependence") # ALE in groups -ad_model_groups <- accumulated_dependency(explain_the_model, +ad_model_groups <- accumulated_dependence(explain_the_model, variables = c("x1", "x2"), groups = "x3") plot(ad_model_groups) + @@ -134,11 +134,11 @@ plot(ad_model_groups) + # CDP in groups -cd_model_groups <- conditional_dependency(explain_the_model, +cd_model_groups <- conditional_dependence(explain_the_model, variables = c("x1", "x2"), groups = "x3") plot(cd_model_groups) + - ggtitle("Conditional Dependency") + ggtitle("Conditional Dependence") ``` diff --git a/vignettes/vignette_titanic.Rmd b/vignettes/vignette_titanic.Rmd index 4c5b1c68..4aa57169 100644 --- a/vignettes/vignette_titanic.Rmd +++ b/vignettes/vignette_titanic.Rmd @@ -69,31 +69,31 @@ plot(fi_rf) As we see the most important feature is `gender`. Next three importnat features are `class`, `age` and `fare`. Let's see the link between model response and these features. -Such univariate relation can be calculated with `partial_dependency()`. +Such univariate relation can be calculated with `partial_dependence()`. ## age Kids 5 years old and younger have much higher survival probability. -### Partial Dependency Profiles +### Partial Dependence Profiles ```{r} -pp_age <- partial_dependency(explain_titanic_rf, variables = c("age", "fare")) +pp_age <- partial_dependence(explain_titanic_rf, variables = c("age", "fare")) head(pp_age) plot(pp_age) ``` -### Conditional Dependency Profiles +### Conditional Dependence Profiles ```{r} -cp_age <- conditional_dependency(explain_titanic_rf, variables = c("age", "fare")) +cp_age <- conditional_dependence(explain_titanic_rf, variables = c("age", "fare")) plot(cp_age) ``` ### Accumulated Local Effect Profiles ```{r} -ap_age <- accumulated_dependency(explain_titanic_rf, variables = c("age", "fare")) +ap_age <- accumulated_dependence(explain_titanic_rf, variables = c("age", "fare")) plot(ap_age) ```