-
Notifications
You must be signed in to change notification settings - Fork 87
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f2739c5
commit 1de9e7f
Showing
2 changed files
with
95 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,51 +1,44 @@ | ||
--- | ||
title: 'iml: An R package for Interpretable Machine Learning' | ||
tags: | ||
- R | ||
- machine learning | ||
- interpretability | ||
authors: | ||
- name: Christoph Molnar | ||
orcid: 0000-0003-2331-868X | ||
affiliation: 1 | ||
affiliations: | ||
- name: Department of Statistics, LMU Munich | ||
index: 1 | ||
date: 8 May 2018 | ||
bibliography: paper.bib | ||
--- | ||
# Summary | ||
Machine learning models are good at predicting, but are usually not interpretable. | ||
A bunch of methods exist to make those interpretable. | ||
``iml`` is an R package for making machine learning models interpretable. It | ||
offers model-agnostic methods, that work on any type of supervised model for | ||
tabular data. Implemented methods include: | ||
- Partial dependence plots [@pdp] | ||
- Individual conditional expectation [@ice] | ||
- Feature importance [@importance] | ||
- Global surrogate mdoels [@global] | ||
- Local surrogate models [@local] | ||
- Shapley value [@shapley] | ||
- Interaction effects [@interaction] | ||
``iml`` was designed to provide a class-based and user-friendly way to | ||
use all the listed methods in a similar way. Internally, they all use | ||
the same parent class and share a common framework for the computation. | ||
The goal of ``iml`` is to provide a unified interface for all these methods | ||
so that it can all be used in one place. Many of the methods are already | ||
implemented in other places ([@pdp-pkg], [@ice-pkg], [@lime-pkg], [@imp-pkg]), | ||
but this is the first place where all are with the same interface and | ||
in the same computational framework. | ||
The unified interface will greatly simplify the analysis and interpretation | ||
of supervised box machine learning learning models. | ||
# Acknowledgements | ||
This work is funded by the Bavarian State Ministry of Science and the Arts in the framework of the Centre Digitisation.Bavaria (ZD.B) | ||
# References | ||
@Manual{R, | ||
title = {R: A Language and Environment for Statistical Computing}, | ||
author = {{R Core Team}}, | ||
organization = {R Foundation for Statistical Computing}, | ||
address = {Vienna, Austria}, | ||
year = {2016}, | ||
url = {https://www.R-project.org/}, | ||
} | ||
|
||
|
||
@article{Goldstein2013, | ||
abstract = {This article presents Individual Conditional Expectation (ICE) plots, a tool for visualizing the model estimated by any supervised learning algorithm. Classical partial dependence plots (PDPs) help visualize the average partial relationship between the predicted response and one or more features. In the presence of substantial interaction effects, the partial response relationship can be heterogeneous. Thus, an average curve, such as the PDP, can obfuscate the complexity of the modeled relationship. Accordingly, ICE plots refine the partial dependence plot by graphing the functional relationship between the predicted response and the feature for individual observations. Specifically, ICE plots highlight the variation in the fitted values across the range of a covariate, suggesting where and to what extent heterogeneities might exist. In addition to providing a plotting suite for exploratory analysis, we include a visual test for additive structure in the data generating model. Through simulated examples and real data sets, we demonstrate how ICE plots can shed light on estimated models in ways PDPs cannot. Procedures outlined are available in the R package ICEbox.}, | ||
archivePrefix = {arXiv}, | ||
arxivId = {1309.6392}, | ||
author = {Goldstein, Alex and Kapelner, Adam and Bleich, Justin and Pitkin, Emil}, | ||
doi = {10.1080/10618600.2014.907095}, | ||
eprint = {1309.6392}, | ||
file = {:Users/chris/Downloads-paper/1309.6392.pdf:pdf}, | ||
issn = {1061-8600}, | ||
mendeley-groups = {IML/5. done}, | ||
pages = {1--22}, | ||
title = {{Peeking Inside the Black Box: Visualizing Statistical Learning with Plots of Individual Conditional Expectation}}, | ||
url = {http://arxiv.org/abs/1309.6392}, | ||
year = {2013} | ||
} | ||
|
||
@article{Friedman1999, | ||
archivePrefix = {arXiv}, | ||
arxivId = {arXiv:1011.1669v3}, | ||
author = {Friedman, Jerome H}, | ||
doi = {10.2307/2699986}, | ||
eprint = {arXiv:1011.1669v3}, | ||
file = {:Users/chris/Downloads-paper/GreedyFuncApproxSS.pdf:pdf}, | ||
isbn = {0090-5364}, | ||
issn = {00905364}, | ||
journal = {North}, | ||
mendeley-groups = {AI and ML}, | ||
number = {3}, | ||
pages = {1--10}, | ||
pmid = {21740230}, | ||
title = {{Greedy Function Approximation : A Gradient Boosting Machine}}, | ||
volume = {1}, | ||
year = {1999} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
--- | ||
title: 'iml: An R package for Interpretable Machine Learning' | ||
tags: | ||
- R | ||
- machine learning | ||
- interpretability | ||
authors: | ||
- name: Christoph Molnar | ||
orcid: 0000-0003-2331-868X | ||
affiliation: 1 | ||
affiliations: | ||
- name: Department of Statistics, LMU Munich | ||
index: 1 | ||
date: 8 May 2018 | ||
bibliography: paper.bib | ||
--- | ||
|
||
# Summary | ||
|
||
Machine learning models are good at predicting, but are usually not interpretable. | ||
A bunch of methods exist to make those interpretable. | ||
|
||
``iml`` is an R package [@R] for making machine learning models interpretable. It | ||
offers model-agnostic methods, that work on any type of supervised model for | ||
tabular data. Implemented methods include: | ||
|
||
- Partial dependence plots [@Friedman1999] | ||
- Individual conditional expectation [@Goldstein2013] | ||
- Feature importance [@importance] | ||
- Global surrogate mdoels [@global] | ||
- Local surrogate models [@local] | ||
- Shapley value [@shapley] | ||
- Interaction effects [@interaction] | ||
|
||
``iml`` was designed to provide a class-based and user-friendly way to | ||
use all the listed methods in a similar way. Internally, they all use | ||
the same parent class and share a common framework for the computation. | ||
The goal of ``iml`` is to provide a unified interface for all these methods | ||
so that it can all be used in one place. Many of the methods are already | ||
implemented in other places ([@pdp-pkg], [@ice-pkg], [@lime-pkg], [@imp-pkg]), | ||
but this is the first place where all are with the same interface and | ||
in the same computational framework. | ||
|
||
The unified interface will greatly simplify the analysis and interpretation | ||
of supervised box machine learning learning models. | ||
|
||
# Acknowledgements | ||
|
||
This work is funded by the Bavarian State Ministry of Science and the Arts in the framework of the Centre Digitisation.Bavaria (ZD.B) | ||
|
||
# References |