diff --git a/.Rbuildignore b/.Rbuildignore
index 03e533dde..1901147da 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -2,6 +2,7 @@
 ^\.Rproj\.user$
 README.*
 README_files
+cran-comments.md
 framework.jpg
 .travis.yml
 Makefile
diff --git a/.gitignore b/.gitignore
index a93b71222..2bfa84b9b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,6 @@
 *.swp
 iml.Rcheck
 ..Rcheck
+README.html
+notes.md
+
diff --git a/DESCRIPTION b/DESCRIPTION
index 76e00a9b3..7eaddc939 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,15 +1,13 @@
 Package: iml
 Type: Package
-Title: Methods for interpretable machine learning
-Version: 0.1
-Date: 2017-11-30
-Author: Christoph Molnar
+Title: Interpretable Machine Learning
+Version: 0.2
+Date: 2018-03-04
 Authors@R: c(person(given = "Christoph", family = "Molnar", 
                     role = c("aut", "cre"), email = "christoph.molnar@gmail.com"))
 Maintainer: Christoph Molnar <christoph.molnar@gmail.com>
-Description: Model-agnostic interpretability methods for machine learning models. 
-  The iml package provides you with tools to analyse and explain the behaviour and prediction of black box machine learning models. 
-  Implemented tools are: partial dependence plots, local models (LIME), feature importance measure and many more. 
+Description:The iml package provides model-agnostic interpretability methods to
+ analyse and explain the behaviour and predictions of any black box machine learning model. 
 Imports: R6,
          checkmate,
          dplyr, 
@@ -18,8 +16,7 @@ Imports: R6,
          partykit,
          glmnet, 
          Metrics, 
-         data.table,
-         mlr
+         data.table
 Suggests: randomForest,
           gower, 
           testthat, 
@@ -27,7 +24,7 @@ Suggests: randomForest,
           MASS,
           caret,
           e1071, 
-          lime
+          lime,
+          mlr
 License: MIT + file LICENSE
-Roxygen: list(wrap = FALSE)
 RoxygenNote: 6.0.1
diff --git a/LICENSE b/LICENSE
index 63b4b681c..3027f8345 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,21 +1,2 @@
-MIT License
-
-Copyright (c) [year] [fullname]
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+YEAR: 2018
+COPYRIGHT HOLDER: Christoph Molnar
\ No newline at end of file
diff --git a/NAMESPACE b/NAMESPACE
index 0974b6bf2..4a4117b55 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,14 +1,15 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(plot,Importance)
-S3method(predict,LIME)
+S3method(plot,FeatureImp)
+S3method(predict,LocalModel)
 S3method(predict,TreeSurrogate)
-export(feature.imp)
-export(ice)
-export(lime)
-export(pdp)
-export(shapley)
-export(tree.surrogate)
+export(FeatureImp)
+export(Ice)
+export(LocalModel)
+export(PartialDependence)
+export(Predictor)
+export(Shapley)
+export(TreeSurrogate)
 import(Metrics)
 import(R6)
 import(checkmate)
@@ -21,9 +22,7 @@ importFrom(dplyr,left_join)
 importFrom(dplyr,one_of)
 importFrom(dplyr,summarise)
 importFrom(glmnet,glmnet)
-importFrom(mlr,getPredictionProbabilities)
-importFrom(mlr,getPredictionResponse)
-importFrom(mlr,getTaskType)
+importFrom(stats,model.matrix)
 importFrom(stats,predict)
 importFrom(tidyr,gather)
 importFrom(utils,methods)
diff --git a/NEWS.md b/NEWS.md
new file mode 100644
index 000000000..3a6efe7f0
--- /dev/null
+++ b/NEWS.md
@@ -0,0 +1,11 @@
+# iml 0.2
+* The API has been  reworked: 
+  * User directly interacts with R6 classes (`pdp()` is now `PartialDependence$new()`).
+  * User has to wrap the machine learning model with `Predictor$new()`.
+  * New data points in `Shapley` and `LocalModel` can be set with `$explain()`.
+  * `Lime` has been renamed to `LocalModel`.
+* Plots have been improved.
+* Documentation has been improved.
+
+# iml 0.1
+Initial release
\ No newline at end of file
diff --git a/R/DataSampler.R b/R/Data.R
similarity index 54%
rename from R/DataSampler.R
rename to R/Data.R
index 4bd6fb26c..13c6fd7e2 100644
--- a/R/DataSampler.R
+++ b/R/Data.R
@@ -1,4 +1,4 @@
-DataSampler  = R6::R6Class('DataSampler',
+Data  = R6::R6Class("Data",
   public = list(
     X = NULL,
     y = NULL,
@@ -8,36 +8,44 @@ DataSampler  = R6::R6Class('DataSampler',
     n.features = NULL,
     prob = NULL,
     sample = function(n=100, replace = TRUE, prob = NULL, get.y=FALSE) {
-      if(is.null(prob) & !is.null(self$prob)) {
+      if (is.null(prob) & !is.null(self$prob)) {
         prob = self$prob
       }
       indices = sample.int(private$nrows, size = n, 
         replace = replace, prob = prob)
-      if(get.y){
+      if (get.y) {
         cbind(self$X[indices,], self$y[indices,])
       } else {
         self$X[indices, ]
       }
     },
-    get.x = function(...){
+    get.x = function(...) {
       self$X
     },
-    get.xy = function(...){
+    get.xy = function(...) {
       cbind(self$X, self$y)
     },
-    print = function(){
+    print = function() {
       cat("Sampling from data.frame with", nrow(self$X), "rows and", ncol(self$X), "columns.")
     },
-    initialize = function(X, y = NULL, prob = NULL){
+    initialize = function(X, y = NULL, prob = NULL) {
       assertDataFrame(X, all.missing = FALSE)
-      assert_named(X)
-      assertDataFrame(y, all.missing = FALSE, null.ok = TRUE)
+      assertNamed(X)
       self$X = X
-      if(!missing(y)){
+      if (length(y) == 1 & is.character(y)) {
+        assert_true(y %in% names(X))
+        self$y = X[y]
+        self$y.names = y
+        self$X = self$X[setdiff(colnames(X), self$y.names)]
+      } else if (inherits(y, "data.frame")) {
+        assertDataFrame(y, all.missing = FALSE, null.ok = TRUE, nrows = nrow(X))
         self$y = y
-        self$y.names = names(y)
-        assert_true(nrow(X)==nrow(y))
-      }
+        self$y.names = colnames(self$y)
+      } else if (is.vector(y) | is.factor(y)) {
+        assert_vector(y, any.missing = FALSE, null.ok = TRUE, len = nrow(X))
+        self$y = data.frame(..y = y)
+        self$y.names = colnames(self$y)
+      } 
       self$prob = prob
       self$feature.types = get.feature.type(unlist(lapply(X, class)))
       self$feature.names = colnames(X)
diff --git a/R/Experiment.R b/R/Experiment.R
deleted file mode 100644
index 90931d0c6..000000000
--- a/R/Experiment.R
+++ /dev/null
@@ -1,130 +0,0 @@
-Experiment = R6::R6Class("Experiment",
-  public = list(
-    plot = function(...){
-      private$plot.data = private$generate.plot(...)
-      if(!is.null(private$plot.data)) {return(private$plot.data)} else {warning('no plot data generated')}
-    },
-    initialize = function(predictor, sampler){
-      checkmate::assert_class(predictor, 'Prediction')
-      checkmate::assert_class(sampler, 'DataSampler')
-      private$predictor = predictor
-      private$predict = predictor$predict
-      private$sampler = sampler
-      private$get.data = private$sampler$get.x
-    },
-    data = function(){
-      private$results
-    },
-    print = function(){
-      cat("Interpretation method: ", class(self)[1], "\n")
-      private$print.parameters()
-      cat("\n\nAnalysed model: \n")
-      private$predictor$print()
-      cat("\n\nAnalysed data:\n")
-      print(private$sampler)
-      cat("\n\nHead of results:\n")
-      if(private$finished){
-        print(head(self$data()))
-      }
-    },
-    run = function(force = FALSE, ...){
-      if(force) private$flush()
-      if(!private$finished){
-        # DESIGN experiment
-        private$X.sample = private$get.data()
-        private$X.design = private$intervene()
-        # EXECUTE experiment
-        predict.results = private$predict(private$X.design)
-        private$multi.class = ifelse(ncol(predict.results) > 1, TRUE, FALSE)
-        private$Q.results = private$Q(predict.results)
-        # AGGREGATE measurements
-        private$results = private$aggregate()
-        private$finished = TRUE
-      }
-      self
-    }
-  ),
-  private = list(
-    # The sampling object for sampling from X
-    sampler = NULL,
-    # Wrapper for sampler
-    get.data = NULL,
-    # The sampled data
-    X.sample = NULL,
-    # The intervention on the sample
-    intervene = function(){private$X.sample},
-    # The design matrix after intervention
-    X.design = NULL,
-    # The predictor
-    predictor = NULL,
-    # The wrapper for the predictor
-    predict = NULL,
-    # The quantity of interest from black box model prediction
-    Q = function(x){x},
-    Q.results = NULL,
-    # Flag if the prediction is multi.class (more than one column)
-    multi.class = NULL,
-    # Weights for the aggregation step
-    weight.samples = function(){1},
-    # The aggregation function for the results
-    aggregate = function(){cbind(private$X.design, private$Q.results)},
-    # The aggregated results of the experiment
-    results = NULL,
-    # Flag if the experiment is finished
-    finished = FALSE,
-    # Removes experiment results as preparation for running experiment again
-    flush = function(){
-      private$X.sample = NULL
-      private$X.design = NULL
-      private$Q.results = NULL
-      private$results = NULL
-      private$finished = FALSE
-    }, 
-    # The data need for plotting of results
-    plot.data = NULL,
-    # Function to generate the plot
-    generate.plot = function(){NULL},
-    # Feature names of X
-    feature.names = NULL, 
-    print.parameters = function(){}
-  )
-)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/R/FeatureImp.R b/R/FeatureImp.R
new file mode 100644
index 000000000..6187748d9
--- /dev/null
+++ b/R/FeatureImp.R
@@ -0,0 +1,232 @@
+#' Feature importance
+#' 
+#' @description 
+#' \code{FeatureImp} computes feature importances for prediction models.
+#' The importance is measured as the factor by which the model's prediction error increases when the feature is shuffled. 
+#' 
+#' @format \code{\link{R6Class}} object.
+#' @name FeatureImp
+#' 
+#' @section Usage:
+#' \preformatted{
+#' imp = FeatureImp$new(predictor, loss, method = "shuffle", run = TRUE)
+#' 
+#' plot(imp)
+#' imp$results
+#' print(imp)
+#' } 
+#' 
+#' @section Arguments:
+#' 
+#' For FeatureImp$new():
+#' \describe{
+#' \item{predictor}{Object of type \code{Predictor}. See \link{Predictor}.}
+#' \item{run}{logical. Should the Interpretation method be run?}
+#' \item{loss}{The loss function. A string (e.g. "ce" for classification or "mse") or a function. See Details for allowed losses.}
+#' \item{method}{Either "shuffle" or "cartesian". See Details.}
+#' }
+#' 
+#' @section Details:
+#' Read the Interpretable Machine Learning book to learn in detail about feature importance: 
+#' \url{https://christophm.github.io/interpretable-ml-book/feature-importance.html}
+#' 
+#' Two permutation schemes are implemented: 
+#' \itemize{
+#' \item shuffle: A simple shuffling of the feature values, yielding n perturbed instances per feature (fast)
+#' \item cartesian: Matching every instance with the feature value of all other instances, yielding n x (n-1) perturbed instances per feature (very slow)
+#' }
+#' 
+#' The loss function can be either specified via a string, or by handing a function to \code{FeatureImp()}.
+#' If you want to use your own loss function it should have this signature: function(actual, predicted).
+#' Using the string is a shortcut to using loss functions from the \code{Metrics} package. 
+#' Only use functions that return a single performance value, not a vector. 
+#' Allowed losses are: "ce", "f1", "logLoss", "mae", "mse", "rmse", "mape", "mdae", 
+#' "msle", "percent_bias", "rae", "rmse", "rmsle", "rse", "rrse", "smape"
+#' See \code{library(help = "Metrics")} to get a list of functions. 
+#' 
+#' 
+#' @section Fields:
+#' \describe{
+#' \item{original.error}{The loss of the model before perturbing features.}
+#' \item{predictor}{The prediction model that was analysed.}
+#' \item{results}{data.frame with the results of the feature importance computation.}
+#' }
+#' 
+#' @section Methods:
+#' \describe{
+#' \item{loss(actual,predicted)}{The loss function. Can also be applied to data: \code{object$loss(actual, predicted)}}
+#' \item{plot()}{method to plot the feature importances. See \link{plot.FeatureImp}}
+#' \item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
+#' \item{\code{clone()}}{[internal] method to clone the R6 object.}
+#' \item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+#' }
+#' 
+#' @references 
+#' Fisher, A., Rudin, C., and Dominici, F. (2018). Model Class Reliance: Variable Importance Measures for any Machine Learning Model Class, from the "Rashomon" Perspective. Retrieved from http://arxiv.org/abs/1801.01489
+#' 
+#' @import Metrics
+#' @examples
+#' if (require("rpart")) {
+#' # We train a tree on the Boston dataset:
+#' data("Boston", package  = "MASS")
+#' tree = rpart(medv ~ ., data = Boston)
+#' y = Boston$medv
+#' X = Boston[-which(names(Boston) == "medv")]
+#' mod = Predictor$new(tree, data = X, y = y)
+#' 
+#' # Compute feature importances as the performance drop in mean absolute error
+#' imp = FeatureImp$new(mod, loss = "mae")
+#' 
+#' # Plot the results directly
+#' plot(imp)
+#' 
+#' 
+#' # Since the result is a ggplot object, you can extend it: 
+#' if (require("ggplot2")) {
+#'   plot(imp) + theme_bw()
+#'   # If you want to do your own thing, just extract the data: 
+#'   imp.dat = imp$results
+#'   head(imp.dat)
+#'   ggplot(imp.dat, aes(x = feature, y = importance)) + geom_point() + 
+#'   theme_bw()
+#' }
+#' 
+#' # FeatureImp also works with multiclass classification. 
+#' # In this case, the importance measurement regards all classes
+#' tree = rpart(Species ~ ., data= iris)
+#' X = iris[-which(names(iris) == "Species")]
+#' y = iris$Species
+#' predict.fun = function(object, newdata) predict(object, newdata, type = "prob")
+#' mod = Predictor$new(tree, data = X, y = y, predict.fun) 
+#' 
+#' # For some models we have to specify additional arguments for the predict function
+#' imp = FeatureImp$new(mod, loss = "ce")
+#' plot(imp)
+#' 
+#' # For multiclass classification models, you can choose to only compute performance for one class. 
+#' # Make sure to adapt y
+#' mod = Predictor$new(tree, data = X, y = y == "virginica", 
+#'   predict.fun = predict.fun, class = "virginica") 
+#' imp = FeatureImp$new(mod, loss = "ce")
+#' plot(imp)
+#' }
+NULL
+
+#' @export
+
+FeatureImp = R6::R6Class("FeatureImp", 
+  inherit = InterpretationMethod,
+  public = list(
+    loss = NULL,
+    original.error = NULL,
+    initialize = function(predictor, loss, method = "shuffle", run = TRUE) {
+      assert_choice(method, c("shuffle", "cartesian"))
+      
+      if (!inherits(loss, "function")) {
+        ## Only allow metrics from Metrics package
+        allowedLosses = c("ce", "f1", "logLoss", "mae", "mse", "rmse", "mape", "mdae", 
+          "msle", "percent_bias", "rae", "rmse", "rmsle", "rse", "rrse", "smape")
+        checkmate::assert_choice(loss, allowedLosses)
+        private$loss.string  = loss
+        loss = getFromNamespace(loss, "Metrics")
+      } else {
+        private$loss.string = head(loss)
+      }
+      if (is.null(predictor$data$y)) {
+        stop("Please call Predictor$new() with the y target vector.")
+      }
+      super$initialize(predictor = predictor)
+      self$loss = private$set.loss(loss)
+      private$method = method
+      private$getData = private$sampler$get.xy
+      actual = private$sampler$y[[1]]
+      predicted = private$q(self$predictor$predict(private$sampler$X))[[1]]
+      # Assuring that levels are the same
+      self$original.error = loss(actual, predicted)
+      if(run) self$run()
+    }
+  ),
+  private = list(
+    method = NULL,
+    # for printing
+    loss.string = NULL,
+    shuffleFeature = function(feature.name, method) {
+      if (method == "shuffle") {
+        X.inter = private$dataSample
+        X.inter[feature.name] = X.inter[sample(1:nrow(private$dataSample)), feature.name]
+      } else if (method == "cartesian") {
+        n = nrow(private$dataSample)
+        row.indices = rep(1:n, times = n)
+        replace.indices = rep(1:n, each = n)
+        # Indices of instances to keep. Removes those where instance matched with own value
+        keep.indices = row.indices != replace.indices
+        X.inter = private$dataSample[row.indices, ]
+        X.inter[feature.name] = X.inter[replace.indices, feature.name]
+        X.inter = X.inter[keep.indices,]
+      } else {
+        stop(sprintf("%s method not implemented"))
+      }
+      X.inter$..feature = feature.name
+      X.inter 
+    },
+    q = function(pred) probs.to.labels(pred),
+    intervene = function() {
+      X.inter.list = lapply(private$sampler$feature.names, 
+        function(i) private$shuffleFeature(i, method = private$method))
+      data.frame(data.table::rbindlist(X.inter.list))
+    },
+    aggregate = function() {
+      y = private$dataDesign[private$sampler$y.names]
+      y.hat = private$qResults
+      # For classification we work with the class labels instead of probs
+      result = data.frame(feature = private$dataDesign$..feature, actual = y[[1]], 
+        predicted = y.hat[[1]])
+      
+      result.grouped  = group_by_(result, "feature")
+      result = summarise(result.grouped, original.error = self$original.error, permutationError = self$loss(actual, predicted), 
+        importance = permutationError / self$original.error)
+      result = result[order(result$importance, decreasing = TRUE),]
+      result
+    },
+    generatePlot = function(sort = TRUE, ...) {
+      res = self$results
+      if (sort) {
+        res$feature = factor(res$feature, levels = res$feature[order(res$importance)])
+      }
+      ggplot(res, aes(y = feature, x = importance)) + geom_point()+ 
+        geom_segment(aes(y = feature, yend = feature, x=1, xend = importance)) + 
+        scale_x_continuous("Feature Importance") + 
+        scale_y_discrete("Feature")
+    }, 
+    set.loss = function(loss) {
+      self$loss = loss
+    }, 
+    printParameters = function() {
+      cat("error function:", private$loss.string)
+    }
+  )
+)
+
+
+#' Plot Feature Importance
+#' 
+#' plot.FeatureImp() plots the feature importance results of a FeatureImp object.
+#' 
+#' For examples see \link{FeatureImp}
+#' @param x A FeatureImp R6 object
+#' @param sort logical. Should the features be sorted in descending order? Defaults to TRUE.
+#' @param ... Further arguments for the objects plot function
+#' @return ggplot2 plot object
+#' @export
+#' @importFrom dplyr group_by_
+#' @seealso 
+#' \link{FeatureImp}
+plot.FeatureImp = function(x, sort = TRUE, ...) {
+  x$plot(sort = sort, ...)
+}
+
+
+
+
+
+
diff --git a/R/Ice.R b/R/Ice.R
new file mode 100644
index 000000000..bf3df93b2
--- /dev/null
+++ b/R/Ice.R
@@ -0,0 +1,215 @@
+#' Individual conditional expectations (Ice)
+#' 
+#' \code{Ice} fits and plots individual conditional expectation curves for prediction models.
+#' 
+#' @format \code{\link{R6Class}} object.
+#' @name Ice
+#' 
+#' @section Usage:
+#' \preformatted{
+#' ice = Ice$new(predictor, feature, grid.size = 20, center.at = NULL, run = TRUE)
+#' 
+#' plot(ice)
+#' ice$results
+#' print(ice)
+#' ice$set.feature(2)
+#' ice$center(1)
+#' }
+#' 
+#' @section Arguments:
+#' 
+#' For Ice$new():
+#' \describe{
+#' \item{predictor}{Object of type \code{Predictor}. See \link{Predictor}.}
+#' \item{feature}{The feature name or index for which to compute the individual conditional expectations.}
+#' \item{grid.size}{The size of the grid for evaluating the predictions}
+#' \item{center.at}{The value for the centering of the plot. Numeric for numeric features, and the level name for factors.}
+#' \item{run}{logical. Should the Interpretation method be run?}
+#' }
+#' 
+#' 
+#' @section Details:
+#' The individual conditional expectation curves show how the prediction for each instance changes
+#' when we vary a single feature.
+#' 
+#' To learn more about individual conditional expectation, 
+#' read the Interpretable Machine Learning book: https://christophm.github.io/interpretable-ml-book/ice.html
+#' 
+#' 
+#' @section Fields:
+#' \describe{
+#' \item{feature.index}{The index of the features for which the partial dependence was computed.}
+#' \item{feature.name}{The names of the features for which the partial dependence was computed.}
+#' \item{feature.type}{The detected types of the features, either "categorical" or "numerical".}
+#' \item{center.at}{The value for the centering of the plot. Numeric for numeric features, and the level name for factors.}
+#' \item{grid.size}{The size of the grid.}
+#' \item{predictor}{The prediction model that was analysed.}
+#' \item{results}{data.frame with the grid of feature of interest and the predicted \eqn{\hat{y}}.} 
+#' }
+#' 
+#' @section Methods:
+#' \describe{
+#' \item{center()}{method to set the value at which the ice computations are centered. See examples.}
+#' \item{set.feature()}{method to set the feature (index) for which to compute individual conditional expectations See examples for usage.}
+#' \item{plot()}{method to plot the individual conditional expectations. See \link{plot.Ice}.}
+#' \item{\code{clone()}}{[internal] method to clone the R6 object.}
+#' \item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+#' }
+#' @references 
+#' Goldstein, A., Kapelner, A., Bleich, J., and Pitkin, E. (2013). Peeking Inside the Black Box: 
+#' Visualizing Statistical Learning with Plots of Individual Conditional Expectation, 1-22. https://doi.org/10.1080/10618600.2014.907095 
+#' @seealso 
+#' \link{PartialDependence} for partial dependence plots (aggregated ice plots)
+#' @importFrom dplyr left_join
+
+#' @examples
+#' # We train a random forest on the Boston dataset:
+#' if (require("randomForest")) {
+#' 
+#' data("Boston", package  = "MASS")
+#' rf = randomForest(medv ~ ., data = Boston, ntree = 50)
+#' mod = Predictor$new(rf, data = Boston)
+#' 
+#' # Compute the individual conditional expectations for the first feature
+#' ice = Ice$new(mod, feature = "crim")
+#' 
+#' # Plot the results directly
+#' plot(ice)
+#' 
+#' # You can center the Ice plot
+#' ice$center(0)
+#' plot(ice)
+#' 
+#' # Ice plots can be centered at initialization
+#' ice = Ice$new(mod, feature = "crim", center = 75)
+#' plot(ice)
+#' 
+#' # Centering can also be removed
+#' ice$center(NULL)
+#' plot(ice)
+#' 
+#' # Since the result is a ggplot object, you can extend it: 
+#' if (require("ggplot2")) {
+#' plot(ice) + theme_bw()
+#' 
+#' 
+#' # If you want to do your own thing, just extract the data: 
+#' iceData = ice$results
+#' head(iceData)
+#' ggplot(iceData) + 
+#' geom_line(aes(x = crim, y = y.hat, group = ..individual, color = factor(..individual))) + 
+#' scale_color_discrete(guide = "none")
+#' }
+#' # You can reuse the ice object for other features: 
+#' ice$set.feature("lstat")
+#' plot(ice)
+#' 
+#' # Ice also works with multiclass classification
+#' rf = randomForest(Species ~ ., data= iris, ntree=50)
+#' predict.fun = function(obj, newdata) predict(obj, newdata, type = "prob")
+#' mod = Predictor$new(rf, data = iris, predict.fun = predict.fun)
+#' 
+#' # For some models we have to specify additional arguments for the predict function
+#' plot(Ice$new(mod, feature = "Sepal.Length"))
+#' 
+#' # For multiclass classification models, you can choose to only show one class:
+#' mod = Predictor$new(rf, data = iris, predict.fun = predict.fun, class = "virginica")
+#' plot(Ice$new(mod, feature = "Sepal.Length"))
+#' 
+#' # Ice plots can be centered: 
+#' plot(Ice$new(mod, feature = "Sepal.Length", center = 1))
+#' }
+#' @export
+NULL
+
+#' @export
+
+Ice = R6::R6Class("Ice",
+  inherit = PartialDependence,
+  public = list( 
+    initialize = function(predictor, feature, grid.size = 20, center.at = NULL, run = TRUE) {
+      checkmate::assert_number(center.at, null.ok = TRUE)
+      private$anchor.value = center.at
+      super$initialize(predictor = predictor,  feature=feature, run = run, 
+        grid.size = grid.size)
+    }, 
+    center = function(center.at) {
+      private$anchor.value = center.at
+      private$flush()
+      self$run()
+    }
+  ),
+  private = list(
+    generatePlot = function() {
+      p = ggplot(self$results, mapping = aes_string(x = names(self$results)[1], 
+        y = "y.hat", group = "..individual"))
+      if (self$feature.type == "numerical") p = p + geom_line()
+      else if (self$feature.type == "categorical") {
+        p = p + geom_line(alpha = 0.2) 
+      }
+      if (private$multiClass) {
+        p = p + facet_wrap("..class.name")
+      } 
+      p +  scale_y_continuous(expression(hat(y)))
+    }, 
+    intervene = function() {
+      dataDesign = super$intervene()
+      if (!is.null(private$anchor.value)) {
+        dataDesign.anchor = private$dataSample
+        dataDesign.anchor[self$feature.index] = private$anchor.value
+        private$dataDesign.ids = c(private$dataDesign.ids, 1:nrow(private$dataSample))
+        dataDesign = rbind(dataDesign, dataDesign.anchor)
+      }
+      dataDesign
+    },
+    aggregate = function() {
+      X.id = private$dataDesign.ids
+      X.results = private$dataDesign[self$feature.index]
+      X.results$..individual = X.id
+      if (private$multiClass) {
+        y.hat.names = colnames(private$qResults)
+        X.results = cbind(X.results, private$qResults)
+        X.results = gather(X.results, key = "..class.name", value = "y.hat", one_of(y.hat.names))
+      } else {
+        X.results["y.hat"]= private$qResults
+        X.results["..class.name"] = 1
+      }
+      
+      if (!is.null(private$anchor.value)) {
+        X.aggregated.anchor = X.results[X.results[self$feature.name] == private$anchor.value, c("y.hat", "..individual", "..class.name")]
+        names(X.aggregated.anchor) = c("anchor.yhat", "..individual", "..class.name")
+        X.results = left_join(X.results, X.aggregated.anchor, by = c("..individual", "..class.name"))
+        X.results$y.hat = X.results$y.hat - X.results$anchor.yhat
+        X.results$anchor.yhat = NULL
+      }
+      
+      # Remove class name column again if single output
+      if (!private$multiClass) {
+        X.results$..class.name = NULL
+      }
+      
+      X.results
+    },
+    anchor.value = NULL
+  ),
+  active = list(
+    center.at = function(x) {
+      if(!missing(x)) warning("Please use $center() to change the value.")
+      return(private$anchor.value)
+    }
+  )
+)
+
+#' Plot ICE (Individual Conditional Expectation)
+#' 
+#' plot.Ice() plots the individiual expectation results from an Ice object.
+#' 
+#' For examples see \link{Ice}
+#' @param x An Ice R6 object
+#' @return ggplot2 plot object
+#' @seealso 
+#' \link{Ice}
+plot.Ice = function(x) {
+  x$plot()
+}
+
diff --git a/R/InterpretationMethod.R b/R/InterpretationMethod.R
new file mode 100644
index 000000000..838bbd7ed
--- /dev/null
+++ b/R/InterpretationMethod.R
@@ -0,0 +1,89 @@
+InterpretationMethod = R6::R6Class("InterpretationMethod",
+  public = list(
+    # The aggregated results of the experiment
+    results = NULL,
+    # The prediction model
+    predictor = NULL,
+    plot = function(...) {
+      private$plotData = private$generatePlot(...)
+      if (!is.null(private$plotData)) {
+        return(private$plotData)
+        } else {
+          warning("call run() first!")
+        }
+    },
+    initialize = function(predictor) {
+      checkmate::assert_class(predictor, "Predictor")
+      self$predictor = predictor
+      private$sampler = predictor$data
+      private$getData = private$sampler$get.x
+    },
+    print = function() {
+      cat("Interpretation method: ", class(self)[1], "\n")
+      private$printParameters()
+      cat("\n\nAnalysed predictor: \n")
+      self$predictor$print()
+      cat("\n\nAnalysed data:\n")
+      print(private$sampler)
+      cat("\n\nHead of results:\n")
+      if (private$finished) {
+        print(head(self$results))
+      }
+    },
+    run = function(force = FALSE, ...) {
+      if (force) private$flush()
+      if (!private$finished) {
+        # DESIGN experiment
+        private$dataSample = private$getData()
+        private$dataDesign = private$intervene()
+        # EXECUTE experiment
+        private$predictResults = self$predictor$predict(private$dataDesign)
+        private$multiClass = ifelse(ncol(private$predictResults) > 1, TRUE, FALSE)
+        private$qResults = private$q(private$predictResults)
+        # AGGREGATE measurements
+        self$results = data.frame(private$aggregate())
+        private$finished = TRUE
+      }
+      self
+    }
+  ),
+  private = list(
+    # The sampling object for sampling from X
+    sampler = NULL,
+    # Wrapper for sampler
+    getData = NULL,
+    # The sampled data
+    dataSample = NULL,
+    # The intervention on the sample
+    intervene = function() private$dataSample,
+    # The design matrix after intervention
+    dataDesign = NULL,
+    # The quantity of interest from black box model prediction
+    predictResults = NULL,
+    q = function(x) x,
+    qResults = NULL,
+    # Flag if the prediction is multiClass (more than one column)
+    multiClass = NULL,
+    # Weights for the aggregation step
+    weightSamples = function() 1,
+    # The aggregation function for the results
+    aggregate = function() cbind(private$dataDesign, private$qResults),
+    # Flag if the experiment is finished
+    finished = FALSE,
+    # Removes experiment results as preparation for running experiment again
+    flush = function() {
+      private$dataSample = NULL
+      private$dataDesign = NULL
+      private$qResults = NULL
+      self$results = NULL
+      private$finished = FALSE
+    }, 
+    # The data need for plotting of results
+    plotData = NULL,
+    # Function to generate the plot
+    generatePlot = function() NULL,
+    # Feature names of X
+    feature.names = NULL, 
+    printParameters = function() {}
+  )
+)
diff --git a/R/LocalModel.R b/R/LocalModel.R
new file mode 100644
index 000000000..0de62ee9b
--- /dev/null
+++ b/R/LocalModel.R
@@ -0,0 +1,244 @@
+#' LocalModel
+#' 
+#' \code{LocalModel} fits locally weighted linear regression models (logistic regression for classification) to explain single predictions of a prediction model.
+#' 
+#' 
+#' @format \code{\link{R6Class}} object.
+#' @name LocalModel
+#' @section Usage:
+#' \preformatted{
+#' lime = LocalModel$new(predictor, x.interest = NULL, k = 3 run = TRUE)
+#' 
+#' plot(lime)
+#' predict(lime, newdata)
+#' lime$results
+#' lime$explain(x.interest)
+#' print(lime)
+#' }
+#' 
+#' @section Arguments:
+#' 
+#' For LocalModel$new():
+#' \describe{
+#' \item{predictor}{Object of type \code{Predictor}. See \link{Predictor}.}
+#' \item{x.interest}{data.frame with a single row for the instance to be explained.}
+#' \item{k}{the (maximum) number of features to be used for the surrogate model.}
+#' \item{run}{logical. Should the Interpretation method be run?}
+#' }
+#' 
+#' @section Details: 
+#' A weighted glm is fitted with the machine learning model prediction as target. 
+#' Data points are weighted by their proximity to the instance to be explained, using the gower proximity measure. 
+#' L1-regularisation is used to make the results sparse. 
+#' The resulting model can be seen as a surrogate for the machine learning model, which is only valid for that one point.
+#' Categorical features are binarized, depending on the category of the instance to be explained: 1 if the category is the same, 0 otherwise.
+#' To learn more about local models, read the Interpretable Machine Learning book: https://christophm.github.io/interpretable-ml-book/lime.html
+#'
+#' The approach is similar to LIME, but has the following differences:
+#' \itemize{
+#' \item Distance measure: Uses gower proximity (= 1 - gower distance) instead of a kernel based on the Euclidean distance. Has the advantage to have a meaningful neighbourhood and no kernel width to tune.
+#' \item Sampling: Uses the original data instead of sampling from normal distributions. 
+#' Has the advantage to follow the original data distribution. 
+#' \item Visualisation: Plots effects instead of betas. Both are the same for binary features, but ared different for numerical features. 
+#' For numerical features, plotting the betas makes no sense, 
+#' because a negative beta might still increase the prediction when the feature value is also negative.
+#' }
+#' 
+#' @section Fields:
+#' \describe{
+#' \item{best.fit.index}{The index of the best glmnet fit.}
+#' \item{k}{The number of features as set by the user.}
+#' \item{model}{The glmnet object.}
+#' \item{predictor}{The prediction model that was analysed.}
+#' \item{results}{data.frame with the feature names (\code{feature}) and contributions to the prediction}
+#' \item{x.interest}{The data.frame with the instance to be explained. See Examples for usage.}
+#' }
+#' 
+#' @section Methods:
+#' \describe{
+#' \item{explain(x.interest)}{method to set a new data point which to explain.}
+#' \item{plot()}{method to plot the LocalModel feature effects. See \link{plot.LocalModel}}
+#' \item{predict()}{method to predict new data with the local model See also \link{predict.LocalModel}}
+#' \item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
+#' \item{\code{clone()}}{[internal] method to clone the R6 object.}
+#' \item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+#' }
+#' 
+#' 
+#' @references 
+#' Ribeiro, M. T., Singh, S., & Guestrin, C. (2016). "Why Should I Trust You?": Explaining the Predictions of Any Classifier. Retrieved from http://arxiv.org/abs/1602.04938
+#'
+#' Gower, J. C. (1971), "A general coefficient of similarity and some of its properties". Biometrics, 27, 623--637.
+#' 
+#' @seealso 
+#' \code{\link{plot.LocalModel}} and \code{\link{predict.LocalModel}}
+#' 
+#' \code{\link{Shapley}} can also be used to explain single predictions
+#' 
+#' \code{\link[lime]{lime}}, the original implementation
+#' @export
+#' @importFrom glmnet glmnet
+#' @examples 
+#' if (require("randomForest")) {
+#' # First we fit a machine learning model on the Boston housing data
+#' data("Boston", package  = "MASS")
+#' X = Boston[-which(names(Boston) == "medv")]
+#' rf = randomForest(medv ~ ., data = Boston, ntree = 50)
+#' mod = Predictor$new(rf, data = X)
+#' 
+#' # Explain the first instance of the dataset with the LocalModel method:
+#' x.interest = X[1,]
+#' lemon = LocalModel$new(mod, x.interest = x.interest, k = 2)
+#' lemon
+#' 
+#' # Look at the results in a table
+#' lemon$results
+#' # Or as a plot
+#' plot(lemon)
+#'
+#' # Reuse the object with a new instance to explain
+#' lemon$x.interest
+#' lemon$explain(X[2,])
+#' lemon$x.interest
+#' plot(lemon)
+#'   
+#' # LocalModel also works with multiclass classification
+#' rf = randomForest(Species ~ ., data= iris, ntree=50)
+#' X = iris[-which(names(iris) == 'Species')]
+#' predict.fun = function(object, newdata) predict(object, newdata, type = "prob")
+#' mod = Predictor$new(rf, data = X, predict.fun = predict.fun, class = "setosa")
+#' 
+#' # Then we explain the first instance of the dataset with the LocalModel method:
+#' lemon = LocalModel$new(mod, x.interest = X[1,], k = 2)
+#' lemon$results
+#' plot(lemon) 
+#' }
+NULL
+
+
+#' @export
+
+LocalModel = R6::R6Class("LocalModel", 
+  inherit = InterpretationMethod,
+  public = list(
+    x.interest = NULL, 
+    k = NULL,
+    model = NULL,
+    best.fit.index = NULL,
+    predict = function(newdata = NULL, ...) {
+      if (is.null(newdata)) newdata = self$x.interest
+      X.recode = recode(newdata, self$x.interest)
+      if (private$multiClass) {
+        prediction = predict(self$model, newx=as.matrix(X.recode), type = "response")
+        colnames(prediction) = colnames(private$predictResults)
+      } else {
+        prediction = predict(self$model, newx=as.matrix(X.recode))
+      }
+      if (private$multiClass) {
+        data.frame(prediction[,,self$best.fit.index])
+      } else {
+        pred = prediction[,self$best.fit.index, drop=FALSE]
+        colnames(pred) = NULL
+        data.frame(prediction = pred)
+      }
+    },
+    explain = function(x.interest) {
+      self$x.interest = x.interest
+      private$flush()
+      self$run()
+    },
+    initialize = function(predictor, x.interest = NULL, k = 3, run = TRUE) {
+      checkmate::assert_number(k, lower = 1, upper = predictor$data$n.features)
+      checkmate::assert_data_frame(x.interest, null.ok = TRUE)
+      if (!require("glmnet")) {
+        stop("Please install glmnet.")
+      }
+      super$initialize(predictor = predictor)
+      self$k = k
+      if (!is.null(x.interest)) {
+        self$x.interest = x.interest
+      }
+      if (run & !is.null(x.interest)) self$run()
+    }
+  ),
+  private = list(
+    q = function(pred) probs.to.labels(pred),
+    best.index = NULL,
+    aggregate = function() {
+      X.recode = recode(private$dataDesign, self$x.interest)
+      x.recoded = recode(self$x.interest, self$x.interest)
+      fam = ifelse(private$multiClass, "multinomial", "gaussian")
+      y = unlist(private$qResults[1])
+      self$model = glmnet(x = as.matrix(X.recode), y = y, 
+        family = fam, w = private$weightSamples(), 
+        intercept = TRUE, standardize = TRUE, type.multinomial = "grouped")
+      res = self$model
+      ## It can happen, that no n.vars matching k occurs
+      if (any(res$df == self$k)) {
+        best.index = max(which(res$df == self$k))
+      } else {
+        best.index = max(which(res$df < self$k))
+        warning("Had to choose a smaller k")
+      }
+      self$best.fit.index = best.index
+      if (private$multiClass) {
+        class.results = lapply(res$beta, extract.glmnet.effects, 
+          best.index = best.index, x.recoded = x.recoded, x.original = self$x.interest)
+        res = data.table::rbindlist(class.results)
+        res$..class = rep(names(class.results), each = ncol(X.recode))
+      } else {
+        res = extract.glmnet.effects(res$beta, best.index, x.recoded, self$x.interest)
+      }
+      res[res$beta != 0, ]
+    },
+    intervene = function() private$dataSample, 
+    generatePlot = function() {
+      p = ggplot(self$results) + 
+        geom_col(aes(y = effect, x=feature.value)) + coord_flip()
+      if (private$multiClass) p = p + facet_wrap("..class")
+      p
+    },
+    weightSamples = function() {
+      require("gower")
+      1 - gower_dist(private$dataDesign, self$x.interest)
+    }
+  )
+)
+
+
+
+#' Predict LocalModel
+#' 
+#' Predict the response for newdata with the LocalModel model.
+#' 
+#' For examples see \link{LocalModel}
+#' @param object A LocalModel R6 object
+#' @param newdata A data.frame for which to predict
+#' @param ... Further arguments for the objects predict function
+#' @return A data.frame with the predicted outcome. 
+#' @seealso 
+#' \link{LocalModel}
+#' @importFrom stats predict
+#' @export
+predict.LocalModel = function(object, newdata = NULL, ...) {
+  object$predict(newdata = newdata, ...)
+}
+
+#' Plot Local Model
+#' 
+#' plot.LocalModel() plots the feature effects of a LocalModel object.
+#' 
+#' For examples see \link{LocalModel}
+#' @param object  A LocalModel R6 object
+#' @return ggplot2 plot object
+#' @seealso 
+#' \link{LocalModel}
+plot.LocalModel = function(object) {
+  object$plot()
+}
+
+
+
+
+
+
diff --git a/R/PartialDependence.R b/R/PartialDependence.R
new file mode 100644
index 000000000..f593aea04
--- /dev/null
+++ b/R/PartialDependence.R
@@ -0,0 +1,277 @@
+#' Partial Dependence Plot
+#' 
+#' \code{PartialDependence} computes and plots partial dependence functions of prediction models. 
+#' 
+#' @format \code{\link{R6Class}} object.
+#' @name PartialDependence
+#' 
+#' @section Usage:
+#' \preformatted{
+#' pdp = PartialDependence$new(predictor, feature, grid.size = 20, run = TRUE)
+#' 
+#' plot(pdp)
+#' pdp$results
+#' print(pdp)
+#' pdp$set.feature(2)
+#' }
+#' 
+#' @section Arguments:
+#' 
+#' For PartialDependence$new():
+#' \describe{
+#' \item{predictor}{Object of type \code{Predictor}. See \link{Predictor}.}
+#' \item{feature}{The feature name or index for which to compute the partial dependencies. 
+#' Either a single number or vector of two numbers.}
+#' \item{grid.size}{The size of the grid for evaluating the predictions}
+#' \item{run}{logical. Should the Interpretation method be run?}
+#' }
+#' 
+#' @section Details:
+#' The partial dependence plot calculates and plots the dependence of f(X) on a single or two features.
+#' To learn more about partial dependence plot, read the Interpretable Machine Learning book: 
+#' https://christophm.github.io/interpretable-ml-book/pdp.html
+#' 
+#' 
+#' @section Fields:
+#' \describe{
+#' \item{feature.index}{The index of the features for which the partial dependence was computed.}
+#' \item{feature.name}{The names of the features for which the partial dependence was computed.}
+#' \item{feature.type}{The detected types of the features, either "categorical" or "numerical".}
+#' \item{grid.size}{The size of the grid.}
+#' \item{n.features}{The number of features (either 1 or 2)}
+#' \item{predictor}{The prediction model that was analysed.}
+#' \item{results}{data.frame with the grid of feature of interest and the predicted \eqn{\hat{y}}. 
+#' Can be used for creating custom partial dependence plots.}
+#' }
+#' 
+#' @section Methods:
+#' \describe{
+#' \item{set.feature}{method to get/set feature(s) (by index) fpr  which to compute pdp. See examples for usage.}
+#' \item{plot()}{method to plot the partial dependence function. See \link{plot.PartialDependence}}
+#' \item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
+#' \item{\code{clone()}}{[internal] method to clone the R6 object.}
+#' \item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+#' }
+#' @seealso 
+#' \link{plot.PartialDependence}
+#' 
+#' \code{\link{Ice}} for individual conditional expectation plots. 
+#' 
+#' 
+#' 
+#' @references 
+#' Friedman, J.H. 2001. "Greedy Function Approximation: A Gradient Boosting Machine." Annals of Statistics 29: 1189-1232.
+#' 
+#' @importFrom tidyr gather
+#' @importFrom dplyr one_of group_by group_by_at summarise
+#' @import ggplot2
+#' @export
+#' @examples
+#' # We train a random forest on the Boston dataset:
+#' if (require("randomForest")) {
+#' data("Boston", package  = "MASS")
+#' rf = randomForest(medv ~ ., data = Boston, ntree = 50)
+#' mod = Predictor$new(rf, data = Boston)
+#' 
+#' # Compute the partial dependence for the first feature
+#' pdp.obj = PartialDependence$new(mod, feature = "crim")
+#' 
+#' # Plot the results directly
+#' plot(pdp.obj)
+#' 
+#' # Since the result is a ggplot object, you can extend it: 
+#' if (require("ggplot2")) {
+#'  plot(pdp.obj) + theme_bw()
+#' }
+#' 
+#' # If you want to do your own thing, just extract the data: 
+#' pdp.dat = pdp.obj$results
+#' head(pdp.dat)
+#' 
+#' # You can reuse the pdp object for other features: 
+#' pdp.obj$set.feature("lstat")
+#' plot(pdp.obj)
+#' 
+#' # Partial dependence plots support up to two features: 
+#' pdp.obj = PartialDependence$new(mod, feature = c("crim", "lstat"))  
+#' plot(pdp.obj)
+#' 
+#' # Partial dependence plots also works with multiclass classification
+#' rf = randomForest(Species ~ ., data = iris, ntree=50)
+#' predict.fun = function(object, newdata) predict(object, newdata, type = "prob")
+#' mod = Predictor$new(rf, data = iris, predict.fun = predict.fun)
+#' 
+#' # For some models we have to specify additional arguments for the predict function
+#' plot(PartialDependence$new(mod, feature = "Sepal.Length"))
+#'
+#' # Partial dependence plots support up to two features: 
+#' pdp.obj = PartialDependence$new(mod, feature = c("Sepal.Length", "Petal.Length"))
+#' pdp.obj$plot()   
+#' 
+#' # For multiclass classification models, you can choose to only show one class:
+#' mod = Predictor$new(rf, data = iris, predict.fun = predict.fun, class = 1)
+#' plot(PartialDependence$new(mod, feature = "Sepal.Length"))
+#' }
+NULL
+
+
+
+#' @export
+
+PartialDependence = R6::R6Class("PartialDependence", 
+  inherit = InterpretationMethod,
+  public = list(
+    grid.size = NULL, 
+    feature.index = NULL, 
+    feature.name = NULL,
+    n.features = NULL, 
+    feature.type= NULL,
+    initialize = function(predictor, feature, grid.size = 20, run = TRUE) {
+      feature = private$sanitizeFeature(feature, predictor$data$feature.names)
+      checkmate::assert_numeric(feature, lower = 1, upper = predictor$data$n.features, min.len = 1, max.len = 2)
+      checkmate::assert_numeric(grid.size, min.len = 1, max.len = length(feature))
+      if (length(feature) == 2) checkmate::assert_false(feature[1] == feature[2])
+      super$initialize(predictor)
+      private$setFeatureFromIndex(feature)
+      private$set.grid.size(grid.size)
+      private$grid.size.original = grid.size
+      if(run) self$run()
+    }, 
+    set.feature = function(feature) {
+      feature = private$sanitizeFeature(feature, self$predictor$data$feature.names)
+      private$flush()
+      private$setFeatureFromIndex(feature)
+      private$set.grid.size(private$grid.size.original)
+      self$run()
+    }
+  ), 
+  private = list(
+    aggregate = function() {
+      results = private$dataDesign[self$feature.index]
+      if (private$multiClass) {
+        y.hat.names = colnames(private$qResults)
+        results = cbind(results, private$qResults)
+        results = gather(results, key = "..class.name", value = "y.hat", one_of(y.hat.names))
+      } else {
+        results["y.hat"]= private$qResults
+        results["..class.name"] = self$predictor$class
+      }
+      results.grouped = group_by_at(results, self$feature.name)
+      if ("..class.name" %in% colnames(results)) {
+        results.grouped = group_by(results.grouped, ..class.name, add = TRUE)
+      }
+      results = data.frame(summarise(results.grouped, y.hat = mean(y.hat)))
+      results 
+    },
+    intervene = function() {
+      grid = get.1D.grid(private$dataSample[[self$feature.index[1]]], self$feature.type[1], self$grid.size[1])
+      
+      private$dataDesign.ids = rep(1:nrow(private$dataSample), times = length(grid))
+      dataDesign = private$dataSample[private$dataDesign.ids,]
+      dataDesign[self$feature.index[1]] = rep(grid, each = nrow(private$dataSample))
+      
+      if (self$n.features == 2) {
+        grid2 = get.1D.grid(private$dataSample[[self$feature.index[2]]], self$feature.type[2], self$grid.size[2])
+        private$dataDesign.ids = rep(private$dataDesign.ids, times = length(grid2))
+        dataDesign2 = dataDesign[rep(1:nrow(dataDesign), times = length(grid2)), ]
+        dataDesign2[self$feature.index[2]] = rep(grid2, each = nrow(dataDesign))
+        return(dataDesign2)
+      }
+      dataDesign
+    }, 
+    dataDesign.ids = NULL, 
+    grid.size.original = NULL,
+    setFeatureFromIndex = function(feature.index) {
+      self$feature.index = feature.index
+      self$n.features = length(feature.index)
+      self$feature.type = private$sampler$feature.types[feature.index]
+      self$feature.name = private$sampler$feature.names[feature.index]
+    },
+    printParameters = function() {
+      cat("features:", paste(sprintf("%s[%s]", self$feature.name, self$feature.type), collapse = ", "))
+      cat("\ngrid size:", paste(self$grid.size, collapse = "x"))
+    },
+    generatePlot = function() {
+      if (self$n.features == 1) {
+        p = ggplot(self$results, mapping = aes_string(x = self$feature.name,"y.hat")) + 
+          scale_y_continuous(expression(hat(y)))
+        if (self$feature.type == "numerical") {
+          p = p + geom_path() 
+        } else if (self$feature.type == "categorical") {
+          p = p + geom_point()
+        }
+        
+      } else if (self$n.features == 2) {
+        if (all(self$feature.type %in% "numerical") | all(self$feature.type %in% "categorical")) {
+          p = ggplot(self$results) + 
+            geom_tile(aes_string(x = self$feature.name[1], 
+              y = self$feature.name[2], 
+              fill = "y.hat")) + 
+            scale_fill_continuous(expression(hat(y)))
+        } else {
+          categorical.feature = self$feature.name[self$feature.type=="categorical"]
+          numerical.feature = setdiff(self$feature.name, categorical.feature)
+          p = ggplot(self$results) + 
+            geom_line(aes_string(x = numerical.feature, y = "y.hat", 
+              group = categorical.feature, color = categorical.feature)) + 
+            scale_y_continuous(expression(hat(y)))
+        }
+      }
+      if (private$multiClass) {
+        p = p + facet_wrap("..class.name")
+      } 
+      p
+    }, 
+    set.grid.size = function(size) {
+      self$grid.size = numeric(length=self$n.features)
+      names(self$grid.size) = private$sampler$feature.name[self$feature.index]
+      private$set.grid.size.single(size[1], 1)
+      if (self$n.features > 1) {
+        if (length(size) == 1) {
+          # If user only provided 1D grid size
+          private$set.grid.size.single(size[1], 2)
+        } else {
+          # If user provided 2D grid.size
+          private$set.grid.size.single(size[2], 2)
+        }
+      }
+    }, 
+    set.grid.size.single = function(size, feature.number) {
+      self$grid.size[feature.number] = ifelse(self$feature.type[feature.number] == "numerical", 
+        size, length(unique(private$sampler$get.x()[[self$feature.index[feature.number]]])))
+    }, 
+    sanitizeFeature = function(feature, feature.names) {
+      if (is.character(feature)) {
+        feature.char = feature
+        stopifnot(all(feature %in% feature.names))
+        feature = which(feature.char[1] == feature.names)
+        if (length(feature.char) == 2) {
+          feature = c(feature, which(feature.char[2] == feature.names))
+        }
+      }
+      feature
+    }
+  )
+)
+
+
+#' Plot Partial Dependence
+#' 
+#' plot.PartialDependence() plots the results of a PartialDependence object.
+#' 
+#' For examples see \link{PartialDependence}
+#' @param x A PartialDependence R6 object
+#' @return ggplot2 plot object
+#' @seealso 
+#' \link{PartialDependence}
+plot.PartialDependence = function(x) {
+  x$plot()
+}
+
+
+
+
+
+
+
+
diff --git a/R/Prediction.R b/R/Prediction.R
deleted file mode 100644
index e69bfbcb7..000000000
--- a/R/Prediction.R
+++ /dev/null
@@ -1,186 +0,0 @@
-#' Get a prediction object
-#' 
-#' @param object function, mlr WrappedObject, S3 class with predict function, or caret train object
-#' @param class In case of classification, class specifies the class for which to predict the probability. 
-#' By default the first class in the prediction (first column) is chosen. 
-#' @param predict.args named list with arguments passed down to the prediction model
-#' @importFrom mlr getTaskType getPredictionProbabilities getPredictionResponse
-#' @return object of type Prediction
-prediction.model = function(object, class = NULL, predict.args = NULL){
-  assert_vector(class, len=1, null.ok=TRUE)
-  if(inherits(object, "function")) {
-    Prediction.f$new(object = object, class = class)
-  } else if(inherits(object, "WrappedModel")){
-    Prediction.mlr$new(object = object, class = class)
-  } else if(inherits(object, 'train')){
-    Prediction.caret$new(object = object, class = class)
-  } else if(has.predict(object)) {
-    Prediction.S3$new(object = object, class = class, predict.args = predict.args)
-  } else {
-    stop(sprintf('Object of type [%s] not supported', paste(class(object), collapse = ", ")))
-  }
-}
-
-
-
-Prediction = R6::R6Class("Prediction", 
-  public = list(
-    predict = function(newdata){
-      newdata = data.frame(newdata)
-      prediction = private$predict.function(newdata)
-      if(is.null(self$prediction.colnames)) private$extract.prediction.colnames(prediction)
-      prediction = data.frame(prediction)
-      if(self$type == 'classification' & !is.null(self$class)){
-        prediction = prediction[,self$class, drop=FALSE]
-      } 
-      colnames(prediction) = self$prediction.colnames
-      rownames(prediction) = NULL
-      data.frame(prediction)
-    },
-    class = NULL,
-    prediction.colnames = NULL, 
-    type = NULL,
-    print = function(){
-      cat('Prediction type:', self$type, '\n')
-      if(self$type == 'classification') cat('Classes: ', paste(self$prediction.colnames, collapse = ', '))
-    },
-    initialize = function(object, class=NULL){
-      # if object has predict function, but not from caret or mlr, then 
-      # it is difficult to know if it's classification or regression model
-      # if class was set, then at least we can make a guess
-      self$class = class
-      if(!is.null(class)) self$type = 'classification'
-      private$object = object
-      private$infer.type()
-    }
-  ), 
-  private = list(
-    object = NULL, 
-    extract.prediction.colnames = function(prediction){
-      if(!inherits(prediction, 'data.frame')){
-        self$prediction.colnames = "..prediction"
-      }
-      self$prediction.colnames = colnames(prediction)
-      if(is.null(self$prediction.colnames)){
-        self$prediction.colnames = "..prediction"
-      }
-      if(!is.null(self$class)){
-        self$prediction.colnames = self$prediction.colnames[self$class]
-      }
-    }
-  )
-)
-
-
-Prediction.mlr = R6::R6Class("Prediction.mlr", 
-  inherit = Prediction,
-  public = list(), 
-  private = list(
-    # Automatically set task type and class name according to output
-    infer.type = function(){
-      tsk = mlr::getTaskType(private$object)
-      if(tsk == 'classif'){
-        self$type = 'classification'
-      } else if(tsk == 'regr'){
-        self$type = 'regression'
-      } else {
-        stop(sprintf('mlr task type <%s> not supported', tsk))
-      }
-    },
-    predict.function = function(x){
-      pred = predict(private$object, newdata = x)
-      if(self$type == 'classification') {
-        getPredictionProbabilities(pred, cl = private$object$task.desc$class.levels)
-      } else {
-        getPredictionResponse(pred)
-      }
-    }
-  )
-)
-
-
-
-
-Prediction.f = R6::R6Class("Prediction.f", 
-  inherit = Prediction,
-  public = list(), 
-  private = list(
-    ## can't infer the type before first prediction
-    infer.type = function(){},
-    infer.f.type = function(pred){
-      assert_true(any(class(pred) %in% c('integer', 'numeric', 'data.frame', 'matrix')))
-      if(inherits(pred, c('data.frame', 'matrix')) && dim(pred)[2] > 1) {
-        self$type = 'classification' 
-        
-      } else {
-        self$type = 'regression'
-      }
-    }, 
-    predict.function = function(x){
-      pred = private$object(x)
-      if(is.null(self$type)) private$infer.f.type(pred)
-      pred
-    }
-  )
-)
-
-
-Prediction.caret = R6::R6Class("Prediction.caret", 
-  inherit = Prediction,
-  public = list(), 
-  private = list(
-    infer.type = function(){
-      mtype = private$object$modelType
-      if(mtype == 'Regression'){
-        self$type = 'regression'
-      } else if (mtype == 'Classification'){
-        self$type = 'classification'
-      } else {
-        stop(sprintf('caret model type %s not supported.', mtype))
-      }
-    },
-    predict.function = function(x){
-      if(self$type == 'classification'){
-        predict(private$object, newdata = x,  type = 'prob')
-      } else if(self$type == 'regression'){
-        predict(private$object, newdata = x)
-      } else {
-        stop(sprintf('private$type of %s not allowed.', private$type))
-      }
-    }
-  )
-)
-
-
-Prediction.S3 = R6::R6Class("Prediction.S3", 
-  inherit = Prediction.f,
-  public = list(
-    initialize = function(predict.args=NULL, ...){
-      super$initialize(...)
-      private$predict.args = predict.args
-    }
-  ), 
-  private = list(
-    predict.args = NULL,
-    infer.type = function(){},
-    predict.function = function(x){
-      predict.args = c(list(object = private$object, newdata = x), private$predict.args)
-      pred = do.call(predict, predict.args)
-      ## TODO: warning instead of error and reshape output to conform numeric data.frame type
-      if(private$is.label.output(pred)) stop("Output seems to be class instead of probabilities. Please use the predict.args argument.")
-      if(is.null(self$type)) private$infer.f.type(pred)
-      pred 
-    },
-    is.label.output = function(pred){
-      if(inherits(pred, c('character', 'factor'))) return(TRUE)
-      if(inherits(pred, c('data.frame', 'matrix')) && inherits(pred[,1], 'character')){
-        return(TRUE)
-      }
-      FALSE
-    }
-  )
-)
-
-
-
-
diff --git a/R/Predictor.R b/R/Predictor.R
new file mode 100644
index 000000000..77ef7a538
--- /dev/null
+++ b/R/Predictor.R
@@ -0,0 +1,131 @@
+#' Predictor object
+#' 
+#' A \code{Predictor} object holds any machine learning model (mlr, caret, randomForest, ...) and the data to be used of analysing the model. 
+#' The interpretation methods in the iml package need the machine learning model to be wrapped in a \code{Predictor} object. 
+#' 
+#' @format \code{\link{R6Class}} object.
+#' @name Predictor
+#' @section Usage:
+#' \preformatted{
+#' model = Predictor$new(model = NULL, data, y = NULL, class=NULL, 
+#'   predict.fun = function(object, newdata) predict(object, newdata))
+#' }
+#' 
+#' @section Arguments:
+#' \describe{
+#' \item{model}{The machine learning model. Recommended are models from mlr and caret.
+#' Other machine learning with a S3 predict functions work as well, but less robust (e.g. randomForest).}
+#' \item{data}{The data to be used for analysing the prediction model.}
+#' \item{y}{The target vector or (preferably) the name of the target column in the \code{data} argument.}
+#' \item{class}{The class column to be returned in case of multiclass output.}
+#' \item{predict.fun}{The function to predict newdata. Only needed if \code{model} is not a model from mlr or caret package.}
+#' }
+#' 
+#' @section Details: 
+#' A Predictor object is a container for the prediction model and the data. 
+#' This ensures that the machine learning model can be analysed robustly. 
+#' 
+#' Note: In case of classification, the model should return probabilities.
+#' 
+#' 
+#' @section Fields:
+#' \describe{
+#' \item{class}{The class column to be returned.}
+#' \item{data}{data object with the data for the model interpretation.}
+#' \item{prediction.colnames}{The column names of the predictions.}
+#' \item{task}{The inferred prediction task: "classification" or "regression".}
+#' }
+#' 
+#' @section Methods:
+#' \describe{
+#' \item{predict(newdata)}{method to predict new data with the machine learning model.}
+#' \item{\code{clone()}}{[internal] method to clone the R6 object.}
+#' \item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+#' }
+#' 
+#' @export
+#' @examples
+#' if (require("mlr")) {
+#' task = makeClassifTask(data = iris, target = "Species")
+#' learner = makeLearner("classif.rpart", minsplit = 7, predict.type = "prob")
+#' mod.mlr = train(learner, task)
+#' mod = Predictor$new(mod.mlr, data = iris)
+#' mod$predict(iris[1:5,])
+#' 
+#' mod = Predictor$new(mod.mlr, data = iris, class = "setosa")
+#' mod$predict(iris[1:5,])
+#' }
+#' 
+#' if (require("randomForest")) {
+#' rf = randomForest(Species ~ ., data = iris, ntree = 20)
+#' 
+#' # We need this for the randomForest
+#' predict.fun = function(obj, newdata) { 
+#'   predict(obj, newdata = newdata, type = "prob")
+#' }
+#' 
+#' mod = Predictor$new(rf, data = iris, predict.fun = predict.fun)
+#' mod$predict(iris[50:55,])
+#' 
+#' # Feature importance needs the target vector, which needs to be supplied: 
+#' mod = Predictor$new(rf, data = iris, y = "Species", predict.fun = predict.fun)
+#' }
+NULL
+
+#' @export
+
+Predictor = R6::R6Class("Predictor", 
+  public = list(
+    data = NULL,
+    model = NULL, 
+    predict = function(newdata) {
+      checkmate::assert_data_frame(newdata)
+      prediction = self$prediction.function(newdata)
+      if (!private$predictionChecked) {
+        checkPrediction(prediction, newdata)
+        private$predictionChecked = TRUE
+      }
+      # If S3 or function, we can only infer the task 
+      # once we see the predictions
+      if (is.null(self$task)) {
+        self$task = inferTaskFromPrediction(prediction)
+      }
+      if (!is.null(self$class) & ncol(prediction) > 1) {
+        prediction = prediction[,self$class, drop=FALSE]
+      } 
+      rownames(prediction) = NULL
+      data.frame(prediction)
+    },
+    class = NULL,
+    prediction.colnames = NULL, 
+    prediction.function = NULL,
+    task = NULL,
+    print = function() {
+      cat("Prediction task:", self$task, "\n")
+      if (self$task == "classification") {
+        cat("Classes: ", paste(self$prediction.colnames, collapse = ", "))
+      }
+    },
+    initialize = function(model = NULL, data, predict.fun = NULL, y = NULL, class=NULL) {
+      if(is.null(model) & is.null(predict.fun)) { 
+        stop("Provide a model, a predict.fun or both!")  
+      }
+      self$data = Data$new(data, y = y)
+      self$class = class
+      self$model = model
+      self$task = inferTaskFromModel(model)
+      self$prediction.function = createPredictionFunction(model, self$task, predict.fun)
+    }
+  ), 
+  private = list(
+    predictionChecked = FALSE
+  )
+)
+
+
+
+
+
+
+
+
diff --git a/R/Shapley.R b/R/Shapley.R
new file mode 100644
index 000000000..e715d2930
--- /dev/null
+++ b/R/Shapley.R
@@ -0,0 +1,207 @@
+#' Prediction explanations with game theory
+#' 
+#' \code{Shapley} computes feature contributions for single predictions with the Shapley value, an approach from cooperative game theory. 
+#' The features values of an instance cooperate to achieve the prediction. 
+#' The Shapley value fairly distributes the difference of the instance's prediction and the datasets average prediction among the features. 
+#' 
+#' @format \code{\link{R6Class}} object.
+#' @name Shapley
+#' @section Usage:
+#' \preformatted{
+#' shapley = Shapley$new(predictor, x.interest = NULL, sample.size = 100, run = TRUE)
+#' 
+#' plot(shapley)
+#' shapley$results
+#' print(shapley)
+#' shapley$explain(x.interest)
+#' }
+#' 
+#' @section Arguments: 
+#' For Shapley$new():
+#' \describe{
+#' \item{predictor}{object of type \code{Predictor}. See \link{Predictor}.}
+#' \item{x.interest}{data.frame with a single row for the instance to be explained.}
+#' \item{sample.size}{The number of  Monte Carlos samples for estimating the Shapley value.} 
+#' \item{run}{logical. Should the Interpretation method be run?}
+#' }
+#' 
+#' @section Details:
+#' For more details on the algorithm see https://christophm.github.io/interpretable-ml-book/shapley.html
+#' 
+#' @section Fields:
+#' \describe{
+#' \item{predictor}{The prediction model that was analysed.}
+#' \item{results}{data.frame with the Shapley values (phi) per feature.}
+#' \item{sample.size}{The number of times coalitions/marginals are sampled from data X. The higher the more accurate the explanations become.}
+#' \item{x.interest}{data.frame with a single row for the instance to be explained.}
+#' \item{y.hat.interest}{predicted value for instance of interest}
+#' \item{y.hat.average}{average predicted value for data \code{X}} 
+#' }
+#' 
+#' @section Methods:
+#' \describe{
+#' \item{explain(x.interest)}{method to set a new data point which to explain.}
+#' \item{plot()}{method to plot the Shapley value. See \link{plot.Shapley}}
+#' \item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
+#' \item{\code{clone()}}{[internal] method to clone the R6 object.}
+#' \item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+#' }
+#' 
+#' 
+#' @references 
+#' Strumbelj, E., Kononenko, I. (2014). Explaining prediction models and individual predictions with feature contributions. Knowledge and Information Systems, 41(3), 647-665. https://doi.org/10.1007/s10115-013-0679-x
+#' @seealso 
+#' \link{Shapley}
+#' 
+#' @seealso 
+#' A different way to explain predictions: \link{LocalModel}
+#' 
+#' @examples 
+#' if (require("randomForest")) {
+#' # First we fit a machine learning model on the Boston housing data
+#' data("Boston", package  = "MASS")
+#' rf =  randomForest(medv ~ ., data = Boston, ntree = 50)
+#' X = Boston[-which(names(Boston) == "medv")]
+#' mod = Predictor$new(rf, data = X)
+#' 
+#' # Then we explain the first instance of the dataset with the Shapley method:
+#' x.interest = X[1,]
+#' shapley = Shapley$new(mod, x.interest = x.interest)
+#' shapley
+#' 
+#' # Look at the results in a table
+#' shapley$results
+#' # Or as a plot
+#' plot(shapley)
+#' 
+#' # Explain another instance
+#' shapley$explain(X[2,])
+#' plot(shapley)
+#' 
+#' # Shapley() also works with multiclass classification
+#' rf = randomForest(Species ~ ., data= iris, ntree=50)
+#' X = iris[-which(names(iris) == "Species")]
+#' predict.fun = function(object, newdata) predict(object, newdata, type = "prob")
+#' mod = Predictor$new(rf, data = X, predict.fun = predict.fun)
+#' 
+#' # Then we explain the first instance of the dataset with the Shapley() method:
+#' shapley = Shapley$new(mod, x.interest = X[1,])
+#' shapley$results
+#' plot(shapley) 
+#' 
+#' # You can also focus on one class
+#' mod = Predictor$new(rf, data = X, predict.fun = predict.fun, class = "setosa")
+#' shapley = Shapley$new(mod, x.interest = X[1,])
+#' shapley$results
+#' plot(shapley) 
+#' }
+NULL
+
+#'@export
+
+Shapley = R6::R6Class("Shapley", 
+  inherit = InterpretationMethod,
+  public = list(
+    x.interest = NULL,
+    y.hat.interest = NULL,
+    y.hat.average = NULL,
+    sample.size = NULL,
+    explain = function(x.interest) {
+      private$flush()
+      private$set.x.interest(x.interest)
+      self$run()
+    },
+    initialize = function(predictor, x.interest = NULL, sample.size = 100,  run = TRUE) {
+      checkmate::assert_data_frame(x.interest, null.ok = TRUE)
+      super$initialize(predictor = predictor)
+      self$sample.size = sample.size
+      if (!is.null(x.interest)) {
+        private$set.x.interest(x.interest)
+      }
+      private$getData = function(...) private$sampler$sample(n = self$sample.size, ...)
+      if (run & !is.null(x.interest)) self$run()
+    }
+  ), 
+  private = list(
+    aggregate = function() {
+      y.hat.with.k = private$qResults[1:(nrow(private$qResults)/2), , drop = FALSE]
+      y.hat.without.k = private$qResults[(nrow(private$qResults)/2 + 1):nrow(private$qResults), , drop = FALSE]
+      y.hat.diff = y.hat.with.k - y.hat.without.k
+      cnames = colnames(y.hat.diff)
+      y.hat.diff = cbind(data.frame(feature = rep(colnames(private$dataDesign), times = self$sample.size)), 
+        y.hat.diff)
+      y.hat.diff = gather(y.hat.diff, key = "class", "value", one_of(cnames))
+      y.hat.diff.grouped  =   group_by(y.hat.diff, feature, class)
+      y.hat.diff = summarise(y.hat.diff.grouped, phi = mean(value), phi.var = var(value))
+      if (!private$multiClass) y.hat.diff$class = NULL
+      y.hat.diff$feature.value = sprintf('%s=%s', colnames(self$x.interest), self$x.interest)
+      y.hat.diff
+    },
+    intervene = function() {
+      # The intervention
+      runs = lapply(1:self$sample.size, function(m) {
+        # randomly order features
+        new.feature.order = sample(1:private$sampler$n.features)
+        # randomly choose sample instance from X
+        sample.instance.shuffled = private$dataSample[sample(1:nrow(private$dataSample), 1), new.feature.order]
+        x.interest.shuffled = self$x.interest[new.feature.order]
+        
+        featurewise = lapply(1:private$sampler$n.features, function(k) {
+          k.at.index = which(new.feature.order == k)
+          instance.with.k = x.interest.shuffled
+          if (k.at.index < ncol(self$x.interest)) {
+            instance.with.k[(k.at.index + 1):ncol(instance.with.k)] =
+              sample.instance.shuffled[(k.at.index + 1):ncol(instance.with.k)]
+          }
+          instance.without.k = instance.with.k
+          instance.without.k[k.at.index] = sample.instance.shuffled[k.at.index]
+          cbind(instance.with.k[private$sampler$feature.names], instance.without.k[private$sampler$feature.names])
+        }) 
+        data.table::rbindlist(featurewise)
+        
+      }) 
+      runs = data.table::rbindlist(runs)
+      dat.with.k = data.frame(runs[,1:(ncol(runs)/2)])
+      dat.without.k = data.frame(runs[,(ncol(runs)/2 + 1):ncol(runs)])
+      
+      rbind(dat.with.k, dat.without.k)
+    }, 
+    set.x.interest = function(x.interest) {
+      self$x.interest = x.interest
+      self$y.hat.interest = self$predictor$predict(x.interest)[1,]
+      self$y.hat.average = colMeans(self$predictor$predict(private$sampler$get.x()))
+    },
+    generatePlot = function(sort = TRUE, ...) {
+      res = self$results
+      if (sort & !private$multiClass) {
+        res$feature.value = factor(res$feature.value, levels = res$feature.value[order(res$phi)])
+      }
+      p = ggplot(res) + 
+        geom_col(aes(y = phi, x=feature.value)) + coord_flip()
+      if (private$multiClass) p = p + facet_wrap("class")
+      p
+    },
+    printParameters = function() {
+      cat(sprintf("Predicted value: %f, Average prediction: %f (diff = %f)", 
+        self$y.hat.interest, self$y.hat.average, self$y.hat.interest - self$y.hat.average))
+    }
+  )
+)
+
+#' Plot Shapley
+#' 
+#' plot.Shapley() plots the Shapley values - the contributions of feature values to the prediction. 
+#' 
+#' For examples see \link{Shapley}
+#' @param object  A Shapley R6 object
+#' @param sort logical. Should the feature values be sorted by Shapley value? Ignored for multi.class output.
+#' @return ggplot2 plot object
+#' @seealso 
+#' \link{Shapley}
+plot.Shapley = function(object, sort = TRUE) {
+  object$plot(sort = sort)
+}
+
+
+
+
diff --git a/R/TreeSurrogate.R b/R/TreeSurrogate.R
new file mode 100644
index 000000000..6d68debf7
--- /dev/null
+++ b/R/TreeSurrogate.R
@@ -0,0 +1,236 @@
+#' Decision tree surrogate model
+#' 
+#' \code{TreeSurrogate} fits a decision tree on the predictions of a prediction model.
+#' 
+#' @format \code{\link{R6Class}} object.
+#' @name TreeSurrogate
+#' @section Usage:
+#' \preformatted{
+#' tree = TreeSurrogate$new(predictor, maxdepth = 2, tree.args = NULL, run = TRUE)
+#' 
+#' plot(tree)
+#' predict(tree, newdata)
+#' tree$results
+#' print(tree)
+#' }
+#' 
+#' @section Arguments:
+#' 
+#' For TreeSurrogate$new():
+#' \describe{
+#' \item{predictor}{Object of type \code{Predictor}. See \link{Predictor}.}
+#' \item{maxdepth}{The maximum depth of the tree. Default is 2.}
+#' \item{run}{logical. Should the Interpretation method be run?}
+#' \item{tree.args}{A list with further arguments for \code{ctree}.}
+#' }
+#' 
+#' @section Details:  
+#' A conditional inference tree is fitted on the predicted \eqn{\hat{y}} from the machine learning model and the data.
+#' The \code{partykit} package and function are used to fit the tree. 
+#' By default a tree of maximum depth of 2 is fitted to improve interpretability.
+#' 
+#' @section Fields:
+#' \describe{
+#' \item{maxdepth}{the maximal tree depth.}
+#' \item{predictor}{The prediction model that was analysed.}
+#' \item{results}{data.frame with sampled feature X together with the leaf node information (columns ..node and ..path) 
+#' and the predicted \eqn{\hat{y}} for tree and machine learning model (columns starting with ..y.hat).}
+#' \item{tree}{the fitted tree of class \code{party}. See also \link[partykit]{ctree}.}
+#' }
+#'  
+#' @section Methods:
+#' \describe{
+#' \item{plot()}{method to plot the leaf nodes of the surrogate decision tree. See \link{plot.TreeSurrogate}.}
+#' \item{predict()}{method to predict new data with the tree. See also \link{predict.TreeSurrogate}}
+#' \item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
+#' \item{\code{clone()}}{[internal] method to clone the R6 object.}
+#' \item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+#' }
+#' 
+#' @examples 
+#' if (require("randomForest")) {
+#' # Fit a Random Forest on the Boston housing data set
+#' data("Boston", package  = "MASS")
+#' rf = randomForest(medv ~ ., data = Boston, ntree = 50)
+#' # Create a model object
+#' mod = Predictor$new(rf, data = Boston[-which(names(Boston) == "medv")]) 
+#' 
+#' # Fit a decision tree as a surrogate for the whole random forest
+#' dt = TreeSurrogate$new(mod)
+#' 
+#' # Plot the resulting leaf nodes
+#' plot(dt) 
+#' 
+#' # Use the tree to predict new data
+#' predict(dt, Boston[1:10,])
+#' 
+#' # Extract the results
+#' dat = dt$results
+#' head(dat)
+#' 
+#' 
+#' # It also works for classification
+#' rf = randomForest(Species ~ ., data = iris, ntree = 50)
+#' X = iris[-which(names(iris) == "Species")]
+#' predict.fun = function(object, newdata) predict(object, newdata, type = "prob")
+#' mod = Predictor$new(rf, data = X, predict.fun = predict.fun)
+#' 
+#' # Fit a decision tree as a surrogate for the whole random forest
+#' dt = TreeSurrogate$new(mod, maxdepth=2)
+#'
+#' # Plot the resulting leaf nodes
+#' plot(dt) 
+#' 
+#' # If you want to visualise the tree directly:
+#' plot(dt$tree)
+#' 
+#' # Use the tree to predict new data
+#' set.seed(42)
+#' iris.sample = X[sample(1:nrow(X), 10),]
+#' predict(dt, iris.sample)
+#' predict(dt, iris.sample, type = "class")
+#' 
+#' # Extract the dataset
+#' dat = dt$results
+#' head(dat)
+#' }
+#' @seealso 
+#' \link{predict.TreeSurrogate}
+#' \link{plot.TreeSurrogate}
+#' 
+#' For the tree implementation
+#' \link[partykit]{ctree}
+#' @export
+#' @import partykit
+NULL
+
+#' @export
+TreeSurrogate = R6::R6Class("TreeSurrogate",
+  inherit = InterpretationMethod,
+  public = list(
+    # The fitted tree
+    tree = NULL,
+    # Maximal depth as set by the user
+    maxdepth = NULL,
+    initialize = function(predictor, maxdepth = 2, tree.args = NULL, run = TRUE) {
+      super$initialize(predictor)
+      private$tree.args = tree.args
+      self$maxdepth = maxdepth
+      if(run) self$run()
+    }, 
+    predict = function(newdata, type = "prob", ...) {
+      assert_choice(type, c("prob", "class"))
+      res = data.frame(predict(self$tree, newdata = newdata, type = "response", ...))
+      if (private$multiClass) {
+        if (type == "class") {
+          res = data.frame(..class = colnames(res)[apply(res, 1, which.max)])
+        }
+      } else {
+        res = data.frame(..y.hat = predict(self$tree, newdata = newdata, ...))
+      }
+      res
+    }
+  ), 
+  private = list(
+    tree.args = NULL,
+    # Only relevant in multiClass case
+    tree.predict.colnames = NULL,
+    # Only relevant in multiClass case
+    object.predict.colnames = NULL,
+
+    intervene = function() private$dataSample,
+    aggregate = function() {
+      y.hat = private$qResults
+      if (private$multiClass) {
+        classes = colnames(y.hat)
+        form = formula(sprintf("%s ~ .", paste(classes, collapse = "+")))       
+      } else {
+        y.hat = unlist(y.hat[1])
+        form = y.hat ~ .
+      }
+      dat = cbind(y.hat, private$dataDesign)
+      tree.args = c(list(formula = form, data = dat, maxdepth = self$maxdepth), private$tree.args)
+      self$tree = do.call(partykit::ctree, tree.args)
+      result = data.frame(..node = predict(self$tree, type = "node"), 
+        ..path = pathpred(self$tree))
+      if (private$multiClass) {
+        outcome = private$qResults
+        colnames(outcome) = paste("..y.hat.", colnames(outcome), sep="")
+        private$object.predict.colnames = colnames(outcome)
+        
+        # result = gather(result, key = "..class", value = "..y.hat", one_of(cnames))
+        ..y.hat.tree = self$predict(private$dataDesign, type = "prob")
+        colnames(..y.hat.tree) = paste("..y.hat.tree.", colnames(..y.hat.tree), sep="")
+        private$tree.predict.colnames = colnames(..y.hat.tree)
+        
+        #..y.hat.tree = gather(..y.hat.tree, "..class.tree", "..y.hat.tree")
+        result = cbind(result, outcome, ..y.hat.tree)
+      } else {
+        result$..y.hat = private$qResults[[1]]
+        result$..y.hat.tree = self$predict(private$dataDesign)[[1]]
+      }
+      design = private$dataDesign
+      rownames(design) = NULL
+      cbind(design, result)
+    }, 
+    generatePlot = function() {
+      p = ggplot(self$results) + 
+        geom_boxplot(aes(y = ..y.hat, x = "")) + 
+        scale_x_discrete("") + 
+        facet_wrap("..path") + 
+        scale_y_continuous(expression(hat(y)))
+      if (private$multiClass) {
+        plotData = self$results
+        # max class for model
+        plotData$..class = private$object.predict.colnames[apply(plotData[private$object.predict.colnames], 1, which.max)]
+        plotData$..class = gsub("..y.hat.", "", plotData$..class)
+        plotData = plotData[setdiff(names(plotData), private$object.predict.colnames)]
+        p = ggplot(plotData) + 
+          geom_bar(aes(x = ..class)) + 
+          facet_wrap("..path")
+      }
+      p
+    }
+  )
+)
+
+
+
+#' Predict Tree Surrogate
+#' 
+#' Predict the response for newdata of a TreeSurrogate object.
+#' 
+#' This function makes the TreeSurrogate object call 
+#' its iternal object$predict() method. 
+#' For examples see \link{TreeSurrogate}
+#' @param object The surrogate tree. A TreeSurrogate R6 object
+#' @param newdata A data.frame for which to predict
+#' @param type Either "prob" or "class". Ignored if the surrogate tree does regression. 
+#' @param ... Further argumets for \code{predict_party}
+#' @return A data.frame with the predicted outcome. 
+#' In case of regression it is the predicted \eqn{\hat{y}}. 
+#' In case of classification it is either the class probabilities (for type "prob") or the class label (type "class")
+#' @seealso 
+#' \link{TreeSurrogate}
+#' @importFrom stats predict
+#' @export
+predict.TreeSurrogate = function(object, newdata, type = "prob", ...) {
+  object$predict(newdata = newdata, type, ...)
+}
+
+
+#' Plot Tree Surrogate
+#' 
+#' Plot the response for newdata of a TreeSurrogate object.
+#' Each plot facet is one leaf node and visualises the distribution of the \eqn{\hat{y}}
+#' from the machine learning model. 
+#' 
+#' For examples see \link{TreeSurrogate}
+#' @param object A TreeSurrogate R6 object
+#' @return ggplot2 plot object
+#' @seealso 
+#' \link{TreeSurrogate}
+plot.TreeSurrogate = function(object) {
+  object$plot()
+}
+
diff --git a/R/createPredictionFunction.R b/R/createPredictionFunction.R
new file mode 100644
index 000000000..0595a40f5
--- /dev/null
+++ b/R/createPredictionFunction.R
@@ -0,0 +1,72 @@
+createPredictionFunction = function(model, task, predict.fun = NULL){
+  UseMethod("createPredictionFunction")
+}
+
+createPredictionFunction.WrappedModel = function(model, task, predict.fun = NULL){
+  if (!requireNamespace("mlr")) {
+    "Please install the mlr package."
+  }
+  if (task == "classification") {
+    function(newdata){
+      pred = predict(model, newdata = newdata)
+      if (model$learner$predict.type == "response") {
+        pred = mlr::getPredictionResponse(pred)
+        data.frame(model.matrix(~prediction-1, data.frame(prediction = pred), sep = ":"))
+      } else {
+        mlr::getPredictionProbabilities(pred, cl = model$task.desc$class.levels)
+      }
+    }
+  } else if (task == "regression") {      
+    function(newdata){
+      pred = predict(model, newdata = newdata)
+      data.frame(..prediction = mlr::getPredictionResponse(pred))
+    }
+  } else {
+    stop(sprintf("Task type '%s' not supported", task))
+  }
+}
+
+createPredictionFunction.train = function(model, task, predict.fun = NULL){
+  if (task == "classification") {
+    function(newdata) {
+      predict(model, newdata = newdata, type = "prob")
+    }
+  } else if (task == "regression") {
+    function(newdata) {
+      data.frame(..prediction = predict(model, newdata = newdata))
+    }
+  } else {
+    stop(sprintf("task of type %s not allowed.", task))
+  }
+}
+
+
+createPredictionFunction.NULL = function(model, task, predict.fun = NULL) {
+  function(newdata) { 
+    pred = predict.fun(newdata = newdata)
+    if (is.label.output(pred)) {
+      warning("Output seems to be class instead of probabilities. 
+          Automatically transformed to 0 and 1 probabilities.
+          Please use the predict.fun argument.")
+      pred = data.frame(model.matrix(~prediction-1, data.frame(prediction = pred), sep = ":"))
+    }
+    data.frame(pred)
+  }
+}
+
+#' @importFrom stats model.matrix
+createPredictionFunction.default = function(model, task, predict.fun = NULL){
+  if (is.null(predict.fun)) {
+    predict.fun = function(object, newdata) predict(object, newdata)
+  }
+  function(newdata) {
+    pred = do.call(predict.fun, list(model, newdata = newdata))
+    if (is.label.output(pred)) {
+      warning("Output seems to be class instead of probabilities. 
+          Automatically transformed to 0 and 1 probabilities.
+          Please use the predict.fun argument.")
+      pred = data.frame(model.matrix(~prediction-1, data.frame(prediction = pred), sep = ":"))
+    }
+    data.frame(pred)
+  }
+}
\ No newline at end of file
diff --git a/R/feature.imp.R b/R/feature.imp.R
deleted file mode 100644
index fd9670175..000000000
--- a/R/feature.imp.R
+++ /dev/null
@@ -1,200 +0,0 @@
-#' Feature importance
-#' 
-#' @description 
-#' feature.imp() computes feature importances for machine learning models. 
-#' The importance of a feature is the factor by which the model's prediction error increases when the feature is shuffled. 
-#' 
-#' @details
-#' Read the Interpretable Machine Learning book to learn more about feature importance: 
-#' \url{https://christophm.github.io/interpretable-ml-book/permutation-feature-importance.html}
-#' 
-#' Two permutation schemes are implemented: 
-#' \itemize{
-#' \item shuffle: A simple shuffling of the feature values, yielding n perturbed instances per feature (faster)
-#' \item cartesian: Matching every instance with the feature value of all other instances, yielding n x (n-1) perturbed instances per feature (slow)
-#' }
-#' The loss function can be either specified via a string, or by handing a function to \code{feature.imp()}.
-#' Using the string is a shortcut to using loss functions from the \code{Metrics} package. 
-#' See \code{library(help = "Metrics")} to get a list of functions. 
-#' Only use functions that return a single performance value, not a vector. 
-#' You can also provide a function directly. It has to take the actual value as its first argument and the prediction as its second. 
-#' 
-#' 
-#' @param loss The loss function. A string (e.g. "ce" for classification or "mse") or a function. See Details.
-#' @param method Either 'shuffle' or 'cartesian'. See Details. 
-#' @param y The vector or data.frame with the actual target values associated with X.
-#' @return 
-#' An Importance object (R6). Its methods and variables can be accessed with the \code{$}-operator:
-#' \item{error.original}{The loss of the model before perturbing features.}
-#' \item{loss}{The loss function. Can also be applied to data: \code{object$loss(actual, predicted)}}
-#' \item{data()}{method to extract the results of the feature importance computation.
-#' Returns a data.frame with importance and permutation error measurements per feature.}
-#' \item{plot()}{method to plot the feature importances. See \link{plot.Importance}}
-#' @template args_internal_methods
-#' 
-#' @references 
-#' Fisher, A., Rudin, C., and Dominici, F. (2018). Model Class Reliance: Variable Importance Measures for any Machine Learning Model Class, from the "Rashomon" Perspective. Retrieved from http://arxiv.org/abs/1801.01489
-#' @export
-#' @import Metrics
-#' @template args_experiment_wrap
-#' @examples
-#' # We train a tree on the Boston dataset:
-#' if(require("rpart")){
-#' data("Boston", package  = "MASS")
-#' mod = rpart(medv ~ ., data = Boston)
-#' 
-#' # Compute the individual conditional expectations for the first feature
-#' X = Boston[-which(names(Boston) == 'medv')]
-#' y = Boston$medv
-#' 
-#' # Compute feature importances as the performance drop in mean absolute error
-#' imp = feature.imp(mod, X, y, loss = 'mae')
-#' 
-#' # Plot the results directly
-#' plot(imp)
-#' 
-#' 
-#' # Since the result is a ggplot object, you can extend it: 
-#' library("ggplot2")
-#' plot(imp) + theme_bw()
-#' 
-#' # If you want to do your own thing, just extract the data: 
-#' imp.dat = imp$data()
-#' head(imp.dat)
-#' ggplot(imp.dat, aes(x = ..feature, y = importance)) + geom_point() + 
-#' theme_bw()
-#' 
-#' # feature.imp() also works with multiclass classification. 
-#' # In this case, the importance measurement regards all classes
-#' mod = rpart(Species ~ ., data= iris)
-#' X = iris[-which(names(iris) == 'Species')]
-#' y = iris$Species
-#' # For some models we have to specify additional arguments for the predict function
-#' imp = feature.imp(mod, X, y, loss = 'ce', predict.args = list(type = 'prob'))
-#' plot(imp)
-#' # Here we encounter the special case that the machine learning model perfectly predicts
-#' # The importance becomes infinite
-#' imp$data()
-#' 
-#' # For multiclass classification models, you can choose to only compute performance for one class. 
-#' # Make sure to adapt y
-#' imp = feature.imp(mod, X, y == 'virginica', class = 3, loss = 'ce', 
-#'     predict.args = list(type = 'prob'))
-#' plot(imp)
-#' }
-feature.imp = function(object, X, y, class=NULL, loss, method = 'shuffle', ...){
-  assert_vector(y, any.missing = FALSE)
-  
-  samp = DataSampler$new(X, y = data.frame(y = y))
-  pred = prediction.model(object, class = class,...)
-  
-  Importance$new(predictor = pred, sampler = samp, loss=loss, method=method)$run()
-}
-
-
-#' Feature importance plot
-#' 
-#' plot.Importance() plots the feature importance results of an Importance object.
-#' 
-#' For examples see \link{feature.imp}
-#' @param x The feature importance. An Importance R6 object
-#' @param sort logical. Should the features be sorted in descending order? Defaults to TRUE.
-#' @param ... Further arguments for the objects plot function
-#' @return ggplot2 plot object
-#' @export
-#' @importFrom dplyr group_by_
-#' @seealso 
-#' \link{feature.imp}
-plot.Importance = function(x, sort = TRUE, ...){
-  x$plot(sort = sort, ...)
-}
-
-
-Importance = R6::R6Class('Importance', 
-  inherit = Experiment,
-  public = list(
-    loss = NULL,
-    error.original = NULL,
-    initialize = function(predictor, sampler, loss, method){
-      if(!inherits(loss, 'function')){
-        ## Only allow metrics from Metrics package
-        private$loss.string  = loss
-        loss = getFromNamespace(loss, "Metrics")
-      } else {
-        private$loss.string = head(loss)
-      }
-      checkmate::assert_choice(method, c('shuffle', 'cartesian'))
-      super$initialize(predictor = predictor, sampler = sampler)
-      self$loss = private$set.loss(loss)
-      private$method = method
-      private$get.data = private$sampler$get.xy
-      self$error.original = loss(private$sampler$y[[1]], private$Q(private$predict(private$sampler$X))[[1]])
-    }
-  ),
-  private = list(
-    method = NULL,
-    # for printing
-    loss.string = NULL,
-    shuffle.feature = function(feature.name, method){
-      if(method == 'shuffle'){
-        X.inter = private$X.sample
-        X.inter[feature.name] = X.inter[sample(1:nrow(private$X.sample)), feature.name]
-      } else if(method == 'cartesian'){
-        n = nrow(private$X.sample)
-        row.indices = rep(1:n, times = n)
-        replace.indices = rep(1:n, each = n)
-        # Indices of instances to keep. Removes those where instance matched with own value
-        keep.indices = as.logical(as.vector(1 - diag(n)))
-        X.inter = private$X.sample[row.indices, ]
-        X.inter[feature.name] = X.inter[replace.indices, feature.name]
-        X.inter = X.inter[keep.indices,]
-      } else {
-        stop(sprintf('%s method not implemented'))
-      }
-      X.inter$..feature = feature.name
-      X.inter 
-    },
-    Q = function(pred) probs.to.labels(pred),
-    intervene = function(){
-      X.inter.list = lapply(private$sampler$feature.names, function(i) private$shuffle.feature(i, method = private$method))
-      data.frame(data.table::rbindlist(X.inter.list))
-    },
-    aggregate = function(){
-      y = private$X.design[private$sampler$y.names]
-      y.hat = private$Q.results
-      # For classification we work with the class labels instead of probs
-      result = data.frame(..feature = private$X.design$..feature, ..actual = y[[1]], ..predicted = y.hat[[1]])
-      
-      result.grouped  = group_by_(result, "..feature")
-      result = summarise(result.grouped, error = self$loss(..actual, ..predicted), 
-        importance = error / self$error.original)
-      result = result[order(result$importance, decreasing = TRUE),]
-      result
-    },
-    generate.plot = function(sort = TRUE, ...){
-      res = private$results
-      if(sort){
-        res$..feature = factor(res$..feature, levels = res$..feature[order(res$importance)])
-      }
-      ggplot(res, aes(y = ..feature, x = importance)) + geom_point()+ 
-        geom_segment(aes(y = ..feature, yend = ..feature, x=1, xend = importance)) + 
-        scale_x_continuous("Feature Importance") + 
-        scale_y_discrete("Feature")
-    }, 
-    set.loss = function(loss){
-      self$loss = loss
-    }, 
-    print.parameters = function(){
-      cat('error function:', private$loss.string)
-    }
-  )
-)
-
-
-
-
-
-
-
-
-
diff --git a/R/ice.r b/R/ice.r
deleted file mode 100644
index d5586b5a3..000000000
--- a/R/ice.r
+++ /dev/null
@@ -1,200 +0,0 @@
-#' Individual conditional expectations (ICE)
-#' 
-#' @description 
-#' Fits and plots individual conditional expectation function on an arbitrary machine learning model
-#' 
-#' @details
-#' Machine learning model try to learn the relationship \eqn{y = f(X)}. We can't visualize 
-#' the learned \eqn{\hat{f}} directly for an individual, high-dimensional point \eqn{x_i}. 
-#' 
-#' But we can take one of the input features of an observation and change its value. 
-#' We try out a grid of different values and observe the predicted outcome. 
-#' This gives us the predicted \eqn{\hat{y}} as a function of feature \eqn{X_j}, which we can plot as a line. 
-#' The \code{ice} method repeats this for all the observations in the dataset and plots all the lines in the same plot.
-#' 
-#' Mathematically, we split up the learned function into its parts:
-#' \deqn{f(x_i) = f_1(x_{i,1}) + \ldots + f_p(x_{i,p}) + f_{1, 2}(x_{i,1}, x_{i,2}) + \ldots + f_{p-1, p}(x_{i,p-1}, x_{p}) + \ldots + f_{1\ldots p}(x_{i,1\ldots X_p})}, 
-#' 
-#' And we can isolate the individual conditional expectation of \eqn{y} on a single \eqn{X_j}: \eqn{f_j(X_j)} and plot it. 
-#' 
-#' Partial dependence plots (\link{pdp}) are the averaged lines of ice curves. 
-#'  The returned object can be plotted is a \code{ggplot}
-#' object. This means it can be plotted directly or be extended using ggplots \code{+} operator.   
-#' To learn more about partial dependence plot, read the Interpretable Machine Learning book: https://christophm.github.io/interpretable-ml-book/ice.html
-#' 
-#' @param feature The index of the feature of interest.
-#' @template arg_grid.size 
-#' @param center.at The value for the centering of the plot. Numeric for numeric features, and the level name for factors.
-#' @return An individual conditional expectation object
-#' @template args_experiment_wrap
-#' @return 
-#' An ICE object (R6). Its methods and variables can be accessed with the \code{$}-operator:
-#' \item{feature.name}{The feature name for which the partial dependence was computed.}
-#' \item{feature.type}{The detected type of the feature, either "categorical" or "numerical".}
-#' \item{feature.index}{The index of the feature for which the individual conditional expectations weree computed.}
-#' \item{center.at}{The features value(s) at which the ice computations are centered.}
-#' \item{grid.size}{The size of the grid.}
-#' \item{sample.size}{The number of instances sampled from data X.}
-#' \item{center}{method to get/set the feature value at which the ice computation should be centered. See examples for usage.}
-#' \item{feature}{method to get/set the feature (index) for which to compute ice. See examples for usage.}
-#' \item{data()}{method to extract the results of the partial dependence plot. 
-#' Returns a data.frame with the grid of feature of interest and the predicted \eqn{\hat{y}}. 
-#' Can be used for creating custom partial dependence plots.}
-#' \item{plot()}{method to plot the partial dependence function. See \link{plot.PDP}}
-#' @references 
-#' Goldstein, A., Kapelner, A., Bleich, J., and Pitkin, E. (2013). Peeking Inside the Black Box: 
-#' Visualizing Statistical Learning with Plots of Individual Conditional Expectation, 1-22. https://doi.org/10.1080/10618600.2014.907095 
-#' @seealso 
-#' \link{pdp} for partial dependence plots (aggregated ice plots)
-#' 
-#' @examples
-#' # We train a random forest on the Boston dataset:
-#' if(require("randomForest")){
-#' 
-#' data("Boston", package  = "MASS")
-#' mod = randomForest(medv ~ ., data = Boston, ntree = 50)
-#' 
-#' # Compute the individual conditional expectations for the first feature
-#' ice.obj = ice(mod, Boston, feature = 1)
-#' 
-#' # Plot the results directly
-#' plot(ice.obj)
-#' 
-#' # You can center the ICE plot
-#' ice.obj$center.at = 0
-#' plot(ice.obj)
-#' 
-#' # ICE plots can be centered at initialization
-#' ice.obj = ice(mod, Boston, feature = 1, center=75)
-#' plot(ice.obj)
-#' 
-#' # Centering can also be removed
-#' ice.obj$center.at = NULL
-#' plot(ice.obj)
-#' 
-#' # Since the result is a ggplot object, you can extend it: 
-#' library("ggplot2")
-#' plot(ice.obj) + theme_bw()
-#' 
-#' # If you want to do your own thing, just extract the data: 
-#' ice.dat = ice.obj$data()
-#' head(ice.dat)
-#' ggplot(ice.dat) + 
-#' geom_line(aes(x = crim, y = y.hat, group = ..individual, color = factor(..individual))) + 
-#' scale_color_discrete(guide = "none")
-#' 
-#' # You can reuse the ice object for other features: 
-#' ice.obj$feature = 2
-#' plot(ice.obj)
-#' 
-#' # ICE also works with multiclass classification
-#' library("randomForest")
-#' mod = randomForest(Species ~ ., data= iris, ntree=50)
-#' 
-#' # For some models we have to specify additional arguments for the predict function
-#' plot(ice(mod, iris, feature = 1, predict.args = list(type = 'prob')))
-#' 
-#' # For multiclass classification models, you can choose to only show one class:
-#' plot(ice(mod, iris, feature = 1, class = 1, predict.args = list(type = 'prob')))
-#' 
-#' # ICE plots can be centered: 
-#' plot(ice(mod, iris, feature = 1, center = 1, predict.args = list(type = 'prob')))
-#' }
-#' @importFrom dplyr left_join
-#' @export
-ice = function(object, X, feature, grid.size=10, center.at = NULL, class=NULL, ...){
-  samp = DataSampler$new(X)
-  pred = prediction.model(object, class = class, ...)
-  obj = ICE$new(predictor = pred, sampler = samp, anchor.value = center.at,  feature = feature, grid.size = grid.size)
-  obj$run()
-  obj
-}
-
-
-#' Individual conditional expectation plots
-#' 
-#' plot.ICE() plots individiual expectation curves for each observation for one feature.
-#' 
-#' For examples see \link{ice}
-#' @param x The individual conditional expectation curves. An ICE R6 object
-#' @param ... Further arguments for the objects plot function
-#' @return ggplot2 plot object
-#' @seealso 
-#' \link{ice}
-plot.ICE = function(x, ...){
-  x$plot()
-}
-
-
-ICE = R6::R6Class('ICE',
-  inherit = PDP,
-  public = list( 
-    initialize = function(feature, anchor.value = NULL, ...){
-      checkmate::assert_number(anchor.value, null.ok = TRUE)
-      private$anchor.value = anchor.value
-      assert_count(feature)
-      super$initialize(feature=feature, ...)
-    }
-  ),
-  private = list(
-    generate.plot = function(){
-      p = ggplot(private$results, mapping = aes_string(x = names(private$results)[1], y = 'y.hat', group = '..individual'))
-      if(self$feature.type == 'numerical') p = p + geom_line()
-      else if (self$feature.type == 'categorical') p = p + geom_line(alpha = 0.2) + geom_point()
-      
-      if(private$multi.class){
-        p + facet_wrap("..class.name")
-      } else {
-        p
-      }
-    }, 
-    intervene = function(){
-      X.design = super$intervene()
-      if(!is.null(private$anchor.value)) {
-        X.design.anchor = private$X.sample
-        X.design.anchor[self$feature.index] = private$anchor.value
-        private$X.design.ids = c(private$X.design.ids, 1:nrow(private$X.sample))
-        X.design = rbind(X.design, X.design.anchor)
-      }
-      X.design
-    },
-    aggregate = function(){
-      X.id = private$X.design.ids
-      X.results = private$X.design[self$feature.index]
-      X.results$..individual = X.id
-      if(private$multi.class){
-        y.hat.names = colnames(private$Q.results)
-        X.results = cbind(X.results, private$Q.results)
-        X.results = gather(X.results, key = "..class.name", value = "y.hat", one_of(y.hat.names))
-      } else {
-        X.results['y.hat']= private$Q.results
-        X.results['..class.name'] = 1
-      }
-      
-      if(!is.null(private$anchor.value)){
-        X.aggregated.anchor = X.results[X.results[self$feature.names] == private$anchor.value, c('y.hat', '..individual', '..class.name')]
-        names(X.aggregated.anchor) = c('anchor.yhat', '..individual', '..class.name')
-        X.results = left_join(X.results, X.aggregated.anchor, by = c('..individual', '..class.name'))
-        X.results$y.hat = X.results$y.hat - X.results$anchor.yhat
-        X.results$anchor.yhat = NULL
-      }
-      
-      # Remove class name column again if single output
-      if(!private$multi.class){
-        X.results$..class.name = NULL
-      }
-      
-      X.results
-    },
-    anchor.value = NULL
-  ),
-  active = list(
-    center.at = function(anchor.value){
-      if(missing(anchor.value)) {return(private$anchor.value)}
-      private$anchor.value = anchor.value
-      private$flush()
-      self$run()
-    }
-  )
-)
-
diff --git a/R/iml.R b/R/iml.R
index c4e0a8678..f93775234 100644
--- a/R/iml.R
+++ b/R/iml.R
@@ -2,7 +2,6 @@
 #'
 #' The iml package provides tools to analyse machine learning models and predictions. 
 #'
-#' TODO
 #' 
 #' @seealso 
 #' \href{https://christophm.github.io/interpretable-ml-book/agnostic}{Book on Interpretable Machine Learning}
diff --git a/R/inferTask.R b/R/inferTask.R
new file mode 100644
index 000000000..c09aa9937
--- /dev/null
+++ b/R/inferTask.R
@@ -0,0 +1,58 @@
+inferTaskFromModel = function(model){
+  UseMethod("inferTaskFromModel")
+}
+
+
+inferTaskFromModel.NULL = function(model){
+  "unknown"
+}
+
+inferTaskFromModel.WrappedModel = function(model){
+  if (!requireNamespace("mlr")) {
+    stop("Please install the mlr package.")
+  }
+  if(inherits(model, "WrappedModel"))
+    tsk = mlr::getTaskType(model)
+  if (tsk == "classif") {
+    if (model$learner$predict.type != "prob") {
+      warning("Output seems to be class instead of probabilities. 
+               Automatically transformed to 0 and 1 probabilities.
+               You might want to set predict.type = 'prob' for Learner!")    
+    }
+    return("classification")
+  } else if (tsk == "regr") {
+    return("regression")
+  } else {
+    stop(sprintf("mlr task type <%s> not supported", tsk))
+  }
+}
+
+
+inferTaskFromModel.train = function(model) {
+  mtype = model$modelType
+  if (mtype == "Regression") {
+    return("regression")
+  } else if (mtype == "Classification") {
+    return("classification")
+  } else {
+    stop(sprintf("caret model type %s not supported.", mtype))
+  }
+}
+
+inferTaskFromModel.default = function(model){
+  "unknown"
+}
+
+
+
+inferTaskFromPrediction  = function(prediction){
+  assert_true(any(class(prediction) %in% 
+      c("integer", "numeric", "data.frame", "matrix", "factor", "character")))
+  if (inherits(prediction, c("data.frame", "matrix")) && dim(prediction)[2] > 1) {
+    "classification" 
+  } else if (inherits(prediction, c("factor", "character"))) {
+    "classification"
+  } else {
+    "regression"
+  }
+}
\ No newline at end of file
diff --git a/R/lime.r b/R/lime.r
deleted file mode 100644
index b432092f6..000000000
--- a/R/lime.r
+++ /dev/null
@@ -1,213 +0,0 @@
-#' LIME
-#' 
-#' @description 
-#' \code{lime()} fits a locally weighted linear regression model (logistic for classification) to explain a single machine learning prediction.
-#' 
-#' @details 
-#' Data points are sampled and weighted by their proximity to the instance to be explained. 
-#' A weighted glm is fitted with the machine learning model prediction as target. 
-#' L1-regularisation is used to make the results sparse. 
-#' The resulting model can be seen as a surrogate for the machine learning model, which is only valid for that one point.
-#' Categorical features are binarized, depending on the category of the instance to be explained: 1 if the category is the same, 0 otherwise.
-#' 
-#' Differences to the original LIME implementation: 
-#' \itemize{
-#' \item Distance measure: Uses gower proximity (= 1 - gower distance) instead of a kernel based on the Euclidean distance. Has the advantage to have a meaningful neighbourhood and no kernel width to tune.
-#' \item Sampling: Sample from X instead of from normal distributions. 
-#' Has the advantage to follow the original data distribution. 
-#' \item Visualisation: Plots effects instead of betas. Is the same for binary features, but makes a difference for numerical features. 
-#' For numerical features, plotting the betas makes no sense, 
-#' because a negative beta might still increase the prediction when the feature value is also negative.
-#' }
-#' To learn more about local models, read the Interpretable Machine Learning book: https://christophm.github.io/interpretable-ml-book/lime.html
-#' 
-#' @references 
-#' Ribeiro, M. T., Singh, S., & Guestrin, C. (2016). "Why Should I Trust You?": Explaining the Predictions of Any Classifier. Retrieved from http://arxiv.org/abs/1602.04938
-#' 
-#' @seealso 
-#' \code{\link{plot.LIME}} and \code{\link{predict.LIME}}
-#' 
-#' \code{\link{shapley}} can also be used to explain single predictions
-#' 
-#' \code{\link[lime]{lime}}, the original implementation
-#' @export
-#' @importFrom glmnet glmnet
-#' @template args_experiment_wrap
-#' @template arg_sample.size
-#' @template args_x.interest
-#' @param k the (maximum) number of features to be used for the surrogate model
-#' @return 
-#' A LIME object (R6). Its methods and variables can be accessed with the \code{$}-operator:
-#' \item{sample.size}{The number of samples from data X. The higher the more accurate the explanations become.}
-#' \item{model}{the glmnet object.}
-#' \item{best.fit.index}{the index of the best glmnet fit}
-#' \item{k}{The number of features as set by the user.}
-#' \item{x.interest}{method to get/set the instance. See examples for usage.}
-#' \item{data()}{method to extract the results of the local feature effects 
-#' Returns a data.frame with the feature names (\code{feature}) and contributions to the prediction}
-#' \item{plot()}{method to plot the LIME feature effects. See \link{plot.LIME}}
-#' \item{predict()}{method to predict new data with the local model See also \link{predict.LIME}}
-#' @template args_internal_methods
-#' @examples 
-#' # First we fit a machine learning model on the Boston housing data
-#' library("randomForest")
-#' data("Boston", package  = "MASS")
-#' mod = randomForest(medv ~ ., data = Boston, ntree = 50)
-#' X = Boston[-which(names(Boston) == "medv")]
-#' 
-#' # Then we explain the first instance of the dataset with the lime() method:
-#' x.interest = X[1,]
-#' lemon = lime(mod, X, x.interest = x.interest, k = 2)
-#' lemon
-#' 
-#' # Look at the results in a table
-#' lemon$data()
-#' # Or as a plot
-#' plot(lemon)
-#'
-#' # Reuse the object with a new instance to explain
-#' lemon$x.interest = X[2,]
-#' plot(lemon)
-#'   
-#' # lime() also works with multiclass classification
-#' library("randomForest")
-#' mod = randomForest(Species ~ ., data= iris, ntree=50)
-#' X = iris[-which(names(iris) == 'Species')]
-#' 
-#' # Then we explain the first instance of the dataset with the lime() method:
-#' lemon = lime(mod, X, x.interest = X[1,], predict.args = list(type='prob'), k = 3)
-#' lemon$data()
-#' plot(lemon) 
-#' 
-#'# You can also focus on one class
-#' lemon = lime(mod, X, x.interest = X[1,], class = 2, predict.args = list(type='prob'), k = 2)
-#' lemon$data()
-#' plot(lemon) 
-#' 
-lime = function(object, X, sample.size=100, k = 3, x.interest, class = NULL, ...){
-  samp = DataSampler$new(X)
-  pred = prediction.model(object, class = class, ...)
-  LIME$new(predictor = pred, sampler = samp, sample.size=sample.size, k = k, x.interest = x.interest)$run()
-}
-
-
-
-#' LIME prediction
-#' 
-#' Predict the response for newdata with the LIME model.
-#' 
-#' This function makes the LIME object call 
-#' its iternal object$predict() method. 
-#' For examples see \link{lime}
-#' @param object A LIME R6 object
-#' @param newdata A data.frame for which to predict
-#' @param ... Further arguments for the objects predict function
-#' @return A data.frame with the predicted outcome. 
-#' @seealso 
-#' \link{lime}
-#' @importFrom stats predict
-#' @export
-predict.LIME = function(object, newdata = NULL, ...){
-  object$predict(newdata = newdata, ...)
-}
-
-#' LIME plot
-#' 
-#' plot.LIME() plots the feature effects of the LIME model.
-#' 
-#' For examples see \link{lime}
-#' @param object  A LIME R6 object
-#' @return ggplot2 plot object
-#' @seealso 
-#' \link{lime}
-plot.LIME = function(object){
-  object$plot()
-}
-
-
-LIME = R6::R6Class('LIME', 
-  inherit = Experiment,
-  public = list(
-    x = NULL, 
-    k = NULL,
-    model = NULL,
-    best.fit.index = NULL,
-    sample.size = NULL,
-    predict = function(newdata = NULL, ...){
-      if(is.null(newdata)) newdata = self$x
-      X.recode = recode(newdata, self$x)
-      prediction = predict(self$model, newx=as.matrix(X.recode))
-      if(private$multi.class){
-        data.frame(prediction[,,self$best.fit.index])
-      } else {
-        pred = prediction[,self$best.fit.index, drop=FALSE]
-        colnames(pred) = NULL
-        data.frame(prediction = pred)
-      }
-    },
-    initialize = function(predictor, sampler, sample.size, k, x.interest, class, ...){
-      checkmate::assert_number(k, lower = 1, upper = sampler$n.features)
-      checkmate::assert_data_frame(x.interest)
-      if(!require('glmnet')){stop('Please install glmnet.')}
-      super$initialize(predictor = predictor, sampler = sampler)
-      self$sample.size = sample.size
-      self$k = k
-      self$x = x.interest
-      private$get.data = function(...) private$sampler$sample(n = self$sample.size, ...)
-    }
-  ),
-  private = list(
-    Q = function(pred) probs.to.labels(pred),
-    best.index = NULL,
-    aggregate = function(){
-      X.recode = recode(private$X.design, self$x)
-      x.scaled = recode(self$x, self$x)
-      fam = ifelse(private$multi.class, 'multinomial', 'gaussian')
-      self$model = glmnet(x = as.matrix(X.recode), y = unlist(private$Q.results[1]), 
-        family = fam, w = private$weight.samples(), 
-        intercept = TRUE, standardize = TRUE, type.multinomial = "grouped")
-      res = self$model
-      ## It can happen, that no n.vars matching k occurs
-      if(any(res$df == self$k)){
-        best.index = max(which(res$df == self$k))
-      } else {
-        best.index = max(which(res$df < self$k))
-        warning("Had to choose a smaller k")
-      }
-      self$best.fit.index = best.index
-      if(private$multi.class){
-        class.results = lapply(res$beta, extract.glmnet.effects, 
-          best.index = best.index, x.scaled = x.scaled, x.original = self$x)
-        res = data.table::rbindlist(class.results)
-        res$..class = rep(names(class.results), each = ncol(X.recode))
-      } else {
-        res = extract.glmnet.effects(res$beta, best.index, x.scaled, self$x)
-      }
-      res[res$beta != 0, ]
-    },
-    intervene = function(){private$X.sample}, 
-    generate.plot = function(){
-      p = ggplot(private$results) + geom_point(aes(y = feature.value, x = effect))
-      if(private$multi.class) p = p + facet_wrap("..class")
-      p
-    },
-    weight.samples = function(){
-      require('gower')
-      1 - gower_dist(private$X.design, self$x)
-    }
-  ),
-  active = list(
-    x.interest = function(x.interest){
-      self$x = x.interest
-      private$flush()
-      self$run()
-    }
-  )
-)
-
-
-
-
-
-
-
diff --git a/R/list.rules.party.R b/R/list_rules_party.R
similarity index 97%
rename from R/list.rules.party.R
rename to R/list_rules_party.R
index 622250052..b33e50342 100644
--- a/R/list.rules.party.R
+++ b/R/list_rules_party.R
@@ -1,7 +1,5 @@
-
 # Copied from internal partykit function
-list.rules.party = function (x, i = NULL, ...) 
-{
+list.rules.party = function (x, i = NULL, ...){
   if (is.null(i)) 
     i <- nodeids(x, terminal = TRUE)
   if (length(i) > 1) {
diff --git a/R/pdp.r b/R/pdp.r
deleted file mode 100644
index f29b2f90b..000000000
--- a/R/pdp.r
+++ /dev/null
@@ -1,257 +0,0 @@
-#' Partial Dependence
-#' 
-#' @description 
-#' \code{pdp()} computes partial dependence functions of prediction models. 
-#' 
-#' @details
-#' Machine learning model try to learn the relationship \eqn{y = f(X)}. We can't visualize 
-#' the learned \eqn{\hat{f}} directly for high-dimensional X. 
-#' But we can split it into parts:
-#' \deqn{f(X) = f_1(X_1) + \ldots + f_p(X_p) + f_{1, 2}(X_1, X_2) + \ldots + f_{p-1, p}(X_{p-1}, X_p) + \ldots + f_{1\ldots p}(X_1\ldots X_p)}, 
-#' 
-#' And we can isolate the partial dependence of \eqn{y} on a single \eqn{X_j}: \eqn{f_j(X_j)} and plot it. 
-#' We can even do this for higher dimensions, but a maximum of 2 features makes sense: \eqn{f_j(X_j) + f_k(X_k) + f_{jk}(X_{jk})}
-#' 
-#' The partial dependence for a feature \eqn{X_j} is estimated by spanning a grid over the feature space. 
-#' For each value of the grid, we replace in the whole dataset the \eqn{X_j}-value with the grid value, 
-#' predict the outcomes \eqn{\hat{y}} with the machine learning models and average the predictions. 
-#' This generate one point of the partial dependence curve. After doing this for the whole grid, 
-#' the outcome is a curve (or 2D plane), that then can be plotted. 
-#' 
-#' To learn more about partial dependence plot, read the Interpretable Machine Learning book: https://christophm.github.io/interpretable-ml-book/pdp.html
-#' 
-#' 
-#' @seealso 
-#' \link{plot.PDP}
-#' 
-#' \code{\link{ice}} for individual conditional expectation plots. 
-#' 
-#' 
-#' @template args_experiment_wrap
-#' @param feature The feature index for which to compute the partial dependencies. 
-#' Either a single number or vector of two numbers
-#' 
-#' @references 
-#' Friedman, J.H. 2001. "Greedy Function Approximation: A Gradient Boosting Machine." Annals of Statistics 29: 1189-1232.
-#' @return 
-#' A PDP object (R6). Its methods and variables can be accessed with the \code{$}-operator:
-#' \item{feature}{The feature names for which the partial dependence was computed.}
-#' \item{feature.type}{The detected types of the features, either "categorical" or "numerical".}
-#' \item{feature.index}{The index of the features for which the partial dependence was computed.}
-#' \item{grid.size}{The size of the grid.}
-#' \item{sample.size}{The number of instances sampled from data X.}
-#' \item{feature(index)}{method to get/set feature(s) (by index) fpr  which to compute pdp. See examples for usage.}
-#' \item{data()}{method to extract the results of the partial dependence plot. 
-#' Returns a data.frame with the grid of feature of interest and the predicted \eqn{\hat{y}}. 
-#' Can be used for creating custom partial dependence plots.}
-#' \item{plot()}{method to plot the partial dependence function. See \link{plot.PDP}}
-#' @template args_internal_methods
-#' @template arg_grid.size 
-#' 
-#' @importFrom tidyr gather
-#' @importFrom dplyr one_of group_by group_by_at summarise
-#' @import ggplot2
-#' @export
-#' @examples
-#' 
-#' # We train a random forest on the Boston dataset:
-#' library("randomForest")
-#' data("Boston", package  = "MASS")
-#' mod = randomForest(medv ~ ., data = Boston, ntree = 50)
-#' 
-#' # Compute the partial dependence for the first feature
-#' pdp.obj = pdp(mod, Boston, feature = 1)
-#' 
-#' # Plot the results directly
-#' plot(pdp.obj)
-#' 
-#' 
-#' # Since the result is a ggplot object, you can extend it: 
-#' library("ggplot2")
-#' plot(pdp.obj) + theme_bw()
-#' 
-#' # If you want to do your own thing, just extract the data: 
-#' pdp.dat = pdp.obj$data()
-#' head(pdp.dat)
-#' 
-#' # You can reuse the pdp object for other features: 
-#' pdp.obj$feature = 2
-#' plot(pdp.obj)
-#' 
-#' # Partial dependence plots support up to two features: 
-#' pdp.obj = pdp(mod, Boston, feature = c(1,2))  
-#' 
-#' # Partial dependence plots also works with multiclass classification
-#' library("randomForest")
-#' mod = randomForest(Species ~ ., data= iris, ntree=50)
-#' 
-#' # For some models we have to specify additional arguments for the predict function
-#' plot(pdp(mod, iris, feature = 1, predict.args = list(type = 'prob')))
-#' 
-#' # For multiclass classification models, you can choose to only show one class:
-#' plot(pdp(mod, iris, feature = 1, class = 1, predict.args = list(type = 'prob')))
-#' 
-#' # Partial dependence plots support up to two features: 
-#' pdp.obj = pdp(mod, iris, feature = c(1,3), predict.args = list(type = 'prob'))
-#' pdp.obj$plot()  
-#' 
-pdp  = function(object, X, feature, grid.size = 10, class=NULL,  ...){
-  samp = DataSampler$new(X)
-  pred = prediction.model(object, class = class, ...)
-  
-  PDP$new(predictor = pred, sampler = samp, feature = feature, grid.size = grid.size)$run()
-}
-
-
-#' Partial dependence plot
-#' 
-#' plot.PDP() plots a line for a single feature and a tile plot for 2 features.
-#' 
-#' For examples see \link{pdp}
-#' @param x The partial dependence. A PDP R6 object
-#' @param ... Further arguments for the objects plot function
-#' @return ggplot2 plot object
-#' @seealso 
-#' \link{pdp}
-plot.PDP = function(x, ...){
-  x$plot(x, ...)
-}
-
-# TODO: Allow empty grid size, where grid points are drawn from X. 
-PDP = R6::R6Class('partial dependence plot', 
-  inherit = Experiment,
-  public = list(
-    grid.size = NULL, 
-    feature.index = NULL, 
-    feature.names = NULL,
-    n.features = NULL, 
-    feature.type= NULL,
-    initialize = function(predictor, sampler, feature, grid.size){
-      checkmate::assert_numeric(feature, lower = 1, upper = sampler$n.features, min.len = 1, max.len = 2)
-      checkmate::assert_numeric(grid.size, min.len = 1, max.len = length(feature))
-      if(length(feature) == 2) checkmate::assert_false(feature[1] == feature[2])
-      super$initialize(predictor, sampler)
-      private$set.feature(feature)
-      private$set.grid.size(grid.size)
-      private$grid.size.original = grid.size
-    }
-  ), 
-  private = list(
-    aggregate = function(){
-      results = private$X.design[self$feature.index]
-      
-      if(private$multi.class){
-        y.hat.names = colnames(private$Q.results)
-        results = cbind(results, private$Q.results)
-        results = gather(results, key = "..class.name", value = "y.hat", one_of(y.hat.names))
-      } else {
-        results['y.hat']= private$Q.results
-        results['..class.name'] = private$predictor$class
-      }
-      results.grouped = group_by_at(results, self$feature.names)
-      if('..class.name' %in% colnames(results)) results.grouped = group_by(results.grouped, ..class.name, add = TRUE)
-      results = data.frame(summarise(results.grouped, y.hat = mean(y.hat)))
-      results 
-    },
-    intervene = function(){
-      grid = get.1D.grid(private$X.sample[[self$feature.index[1]]], self$feature.type[1], self$grid.size[1])
-      
-      private$X.design.ids = rep(1:nrow(private$X.sample), times = length(grid))
-      X.design = private$X.sample[private$X.design.ids,]
-      X.design[self$feature.index[1]] = rep(grid, each = nrow(private$X.sample))
-      
-      if(self$n.features == 2) {
-        grid2 = get.1D.grid(private$X.sample[[self$feature.index[2]]], self$feature.type[2], self$grid.size[2])
-        private$X.design.ids = rep(private$X.design.ids, times = length(grid2))
-        X.design2 = X.design[rep(1:nrow(X.design), times = length(grid2)), ]
-        X.design2[self$feature.index[2]] = rep(grid2, each = nrow(X.design))
-        return(X.design2)
-      }
-      X.design
-    }, 
-    X.design.ids = NULL, 
-    grid.size.original = NULL,
-    set.feature = function(feature.index){
-      self$feature.index = feature.index
-      self$n.features = length(feature.index)
-      self$feature.type = private$sampler$feature.types[feature.index]
-      self$feature.names = private$sampler$feature.names[feature.index]
-    },
-    print.parameters = function(){
-      cat('features:', paste(sprintf("%s[%s]", self$feature.names, self$feature.type), collapse = ", "))
-      cat('\ngrid size:', paste(self$grid.size, collapse = "x"))
-    },
-    generate.plot = function(){
-      if(self$n.features == 1){
-        p = ggplot(private$results, mapping = aes_string(x = self$feature.names,"y.hat"))
-        if(self$feature.type == 'numerical') p = p + geom_path() 
-        else if (self$feature.type == 'categorical') p = p + geom_point()
-      } else if (self$n.features == 2){
-        if(all(self$feature.type %in% 'numerical') | all(self$feature.type %in% 'categorical')) {
-          p = ggplot(private$results) + 
-            geom_tile(aes_string(x = self$feature.names[1], 
-              y = self$feature.names[2], 
-              fill = "y.hat"))
-        } else {
-          categorical.feature = self$feature.names[self$feature.type=='categorical']
-          numerical.feature = setdiff(self$feature.names, categorical.feature)
-          p = ggplot(private$results) + 
-            geom_line(aes_string(x = numerical.feature, y = "y.hat", 
-              group = categorical.feature, color = categorical.feature))
-        }
-      }
-      if(private$multi.class){
-        p = p + facet_wrap("..class.name")
-      } 
-      p
-    }, 
-    set.grid.size = function(size){
-      self$grid.size = numeric(length=self$n.features)
-      names(self$grid.size) = private$sampler$feature.names[self$feature.index]
-      private$set.grid.size.single(size[1], 1)
-      if(self$n.features > 1) {
-        if(length(size) == 1) {
-          # If user only provided 1D grid size
-          private$set.grid.size.single(size[1], 2)
-        } else {
-          # If user provided 2D grid.size
-          private$set.grid.size.single(size[2], 2)
-        }
-      }
-    }, 
-    set.grid.size.single = function(size, feature.number){
-      self$grid.size[feature.number] = ifelse(self$feature.type[feature.number] == 'numerical', 
-        size, length(unique(private$sampler$get.x()[[self$feature.index[feature.number]]])))
-    }
-  ), 
-  active = list(
-    feature = function(feature.index){
-      private$flush()
-      private$set.feature(feature.index)
-      private$set.grid.size(private$grid.size.original)
-      self$run()
-    }
-  )
-)
-
-
-## TODO:document
-## TODO: Write test
-
-get.1D.grid = function(feature, feature.type, grid.size){
-  checkmate::assert_vector(feature)
-  if(feature.type == 'numerical'){
-    grid = seq(from = min(feature), 
-      to = max(feature), 
-      length.out = grid.size)
-  } else if(feature.type == 'categorical') {
-    grid = unique(feature)
-  }
-}
-
-
-
-
-
-
-
diff --git a/R/shapley.r b/R/shapley.r
deleted file mode 100644
index 1c1f46174..000000000
--- a/R/shapley.r
+++ /dev/null
@@ -1,173 +0,0 @@
-#' Explain predictions
-#' 
-#' @description 
-#' shapley() computes feature contributions for single predictions with the Shapley value, an approach from cooperative game theory approach. 
-#' The features values of an instance cooperate to achieve the prediction. 
-#' shapley() fairly distributes the difference of the instance's prediction and the datasets average prediction among the features. 
-#' A features contribution can be negative. 
-#' 
-#' @details
-#' See TODO: BOOK REFERENCE
-#' 
-#' @seealso 
-#' A different way to explain predictions: \link{lime}
-#' @template args_experiment_wrap
-#' @template args_x.interest
-#' @param sample.size Number of samples to be drawn to estimate the Shapley value. The higher the more accurate the estimations.
-#' @return 
-#' A Shapley object (R6). Its methods and variables can be accessed with the \code{$}-operator:
-#' \item{sample.size}{The number of times coalitions/marginals are sampled from data X. The higher the more accurate the explanations become.}
-#' \item{x.interest}{data.frame with the instance of interest}
-#' \item{y.hat.interest}{predicted value for instance of interest}
-#' \item{y.hat.averate}{average predicted value for data \code{X}} 
-#' \item{x}{method to get/set the instance. See examples for usage.}
-#' \item{data()}{method to extract the results of the shapley estimations. 
-#' Returns a data.frame with the feature names (\code{feature}) and contributions to the prediction (\code{phi})}
-#' \item{plot()}{method to plot the Shapley value. See \link{plot.Shapley}}
-#' @template args_internal_methods
-#' @references 
-#' Strumbelj, E., Kononenko, I. (2014). Explaining prediction models and individual predictions with feature contributions. Knowledge and Information Systems, 41(3), 647-665. https://doi.org/10.1007/s10115-013-0679-x
-#' @seealso 
-#' \link{lime}
-#' @export
-#' @examples 
-#' # First we fit a machine learning model on the Boston housing data
-#' library("randomForest")
-#' data("Boston", package  = "MASS")
-#' mod = randomForest(medv ~ ., data = Boston, ntree = 50)
-#' X = Boston[-which(names(Boston) == "medv")]
-#' 
-#' # Then we explain the first instance of the dataset with the shapley() method:
-#' x.interest = X[1,]
-#' shap = shapley(mod, X, x.interest = x.interest)
-#' shap
-#' 
-#' # Look at the results in a table
-#' shap$data()
-#' # Or as a plot
-#' plot(shap)
-#' 
-#' # shapley() also works with multiclass classification
-#' library("randomForest")
-#' mod = randomForest(Species ~ ., data= iris, ntree=50)
-#' X = iris[-which(names(iris) == 'Species')]
-#' 
-#' # Then we explain the first instance of the dataset with the shapley() method:
-#' shap = shapley(mod, X, x.interest = X[1,], predict.args = list(type='prob'))
-#' shap$data()
-#' plot(shap) 
-#' 
-#'# You can also focus on one class
-#' shap = shapley(mod, X, x.interest = X[1,], class = 2, predict.args = list(type='prob'))
-#' shap$data()
-#' plot(shap) 
-#' 
-shapley = function(object, X, x.interest, sample.size=100, class=NULL, ...){
-  samp = DataSampler$new(X)
-  pred = prediction.model(object, class = class, ...)
-  
-  Shapley$new(predictor = pred, sampler = samp, x.interest=x.interest, sample.size=sample.size)$run()
-}
-
-
-#' Shapley plot
-#' 
-#' plot.Shapley() plots the Shapley values - the contributions of feature values to the prediction. 
-#' 
-#' For examples see \link{shapley}
-#' @param object  A Shapley R6 object
-#' @return ggplot2 plot object
-#' @seealso 
-#' \link{shapley}
-plot.Shapley = function(object){
-  object$plot()
-}
-
-Shapley = R6::R6Class('Shapley', 
-  inherit = Experiment,
-  public = list(
-    x.interest = NULL,
-    y.hat.interest = NULL,
-    y.hat.average = NULL,
-    sample.size = NULL,
-    initialize = function(predictor, sampler, x.interest, sample.size){
-      checkmate::assert_data_frame(x.interest)
-      super$initialize(predictor = predictor, sampler = sampler)
-      self$sample.size = sample.size
-      private$set.x.interest(x.interest)
-      private$get.data = function(...) private$sampler$sample(n = self$sample.size, ...)
-    }
-  ), 
-  private = list(
-    aggregate = function(){
-      y.hat.with.k = private$Q.results[1:(nrow(private$Q.results)/2), , drop = FALSE]
-      y.hat.without.k = private$Q.results[(nrow(private$Q.results)/2 + 1):nrow(private$Q.results), , drop = FALSE]
-      y.hat.diff = y.hat.with.k - y.hat.without.k
-      cnames = colnames(y.hat.diff)
-      y.hat.diff = cbind(data.frame(feature = rep(colnames(private$X.design), times = self$sample.size)), 
-        y.hat.diff)
-      y.hat.diff = gather(y.hat.diff, key = "class", "value", one_of(cnames))
-      y.hat.diff.grouped  =   group_by(y.hat.diff, feature, class)
-      y.hat.diff = summarise(y.hat.diff.grouped, phi = mean(value), phi.var = var(value))
-      if(!private$multi.class) y.hat.diff$class = NULL
-      y.hat.diff
-    },
-    intervene = function(){
-      # The intervention
-      runs = lapply(1:self$sample.size, function(m){
-        # randomly order features
-        new.feature.order = sample(1:private$sampler$n.features)
-        # randomly choose sample instance from X
-        sample.instance.shuffled = private$X.sample[sample(1:nrow(private$X.sample), 1), new.feature.order]
-        x.interest.shuffled = self$x.interest[new.feature.order]
-        
-        featurewise = lapply(1:private$sampler$n.features, function(k){
-          k.at.index = which(new.feature.order == k)
-          instance.with.k = x.interest.shuffled
-          if(k.at.index < ncol(self$x.interest)){
-            instance.with.k[(k.at.index + 1):ncol(instance.with.k)] =
-              sample.instance.shuffled[(k.at.index + 1):ncol(instance.with.k)]
-          }
-          instance.without.k = instance.with.k
-          instance.without.k[k.at.index] = sample.instance.shuffled[k.at.index]
-          cbind(instance.with.k[private$sampler$feature.names], instance.without.k[private$sampler$feature.names])
-        }) 
-        data.table::rbindlist(featurewise)
-        
-      }) 
-      runs = data.table::rbindlist(runs)
-      dat.with.k = data.frame(runs[,1:(ncol(runs)/2)])
-      dat.without.k = data.frame(runs[,(ncol(runs)/2 + 1):ncol(runs)])
-      
-      rbind(dat.with.k, dat.without.k)
-    }, 
-    set.x.interest = function(x.interest){
-      self$x.interest = x.interest
-      self$y.hat.interest = private$predict(x.interest)[1,]
-      self$y.hat.average = colMeans(private$predict(private$sampler$get.x()))
-    },
-    generate.plot = function(){
-      p = ggplot(private$results) + geom_point(aes(y = feature, x = phi))
-      if(private$multi.class) p = p + facet_wrap("class")
-      p
-    },
-    print.parameters = function(){
-      cat(sprintf('Predicted value: %f, Average prediction: %f (diff = %f)', 
-        self$y.hat.interest, self$y.hat.average, self$y.hat.interest - self$y.hat.average))
-    }
-  ),
-  active = list(
-    x = function(x.interest){
-      private$flush()
-      private$set.x.interest(x.interest)
-      self$run()
-      self
-    }
-  )
-)
-
-
-
-
-
-
diff --git a/R/tree-surrogate.r b/R/tree-surrogate.r
deleted file mode 100644
index 331de6e72..000000000
--- a/R/tree-surrogate.r
+++ /dev/null
@@ -1,237 +0,0 @@
-#' Decision tree surrogate model
-#' 
-#' @description 
-#' tree.surrogate() fits a decision tree on the predictions of a machine learning model to make it interpretable.
-#' 
-#' @details  
-#' A conditional inference tree is fitted on the predicted \eqn{\hat{y}} from the machine learning model and the data \eqn{X}.
-#' The \code{partykit} package and function are used to fit the tree. 
-#' By default a tree of maximum depth of 2 is fitted to improve interpretability.
-#' @template arg_sample.size
-#' @template args_experiment_wrap
-#' @param maxdepth The maximum depth of the tree. Default is 2. 
-#' @param tree.args A list with further arguments for \code{ctree}
-#' 
-#' @return 
-#' A TreeSurrogate object (R6). Its methods and variables can be accessed with the \code{$}-operator:
-#' \item{tree}{the fitted tree of class \code{party}. See also \link[partykit]{ctree}.}
-#' \item{maxdepth}{the maximal tree depth set by the user.}
-#' \item{data()}{method to extract the results of the tree. 
-#' Returns the sampled feature X together with the leaf node information (columns ..node and ..path) 
-#' and the predicted \eqn{\hat{y}} for tree and machine learning model (columns starting with ..y.hat).}
-#' \item{plot()}{method to plot the leaf nodes of the surrogate decision tree. See \link{plot.TreeSurrogate}}
-#' \item{predict()}{method to predict new data with the tree. See also \link{predict.TreeSurrogate}}
-#' @template args_internal_methods
-#' 
-#' @examples 
-#' # Fit a Random Forest on the Boston housing data set
-#' library("randomForest")
-#' data("Boston", package  = "MASS")
-#' mod = randomForest(medv ~ ., data = Boston, ntree = 50)
-#' 
-#' # Fit a decision tree as a surrogate for the whole random forest
-#' dt = tree.surrogate(mod, Boston[-which(names(Boston) == 'medv')], 200)
-#' 
-#' # Plot the resulting leaf nodes
-#' plot(dt) 
-#' 
-#' # Use the tree to predict new data
-#' predict(dt, Boston[1:10,])
-#' 
-#' # Extract the results
-#' dat = dt$data()
-#' head(dat)
-#' 
-#' 
-#' # It also works for classification
-#' mod = randomForest(Species ~ ., data = iris, ntree = 50)
-#' 
-#' # Fit a decision tree as a surrogate for the whole random forest
-#' X = iris[-which(names(iris) == 'Species')]
-#' dt = tree.surrogate(mod, X, 200, predict.args = list(type = 'prob'), maxdepth=2, class=3)
-#'
-#' # Plot the resulting leaf nodes
-#' plot(dt) 
-#' 
-#' # If you want to visualise the tree directly:
-#' plot(dt$tree)
-#' 
-#' # Use the tree to predict new data
-#' set.seed(42)
-#' iris.sample = X[sample(1:nrow(X), 10),]
-#' predict(dt, iris.sample)
-#' predict(dt, iris.sample, type = 'class')
-#' 
-#' # Extract the dataset
-#' dat = dt$data()
-#' head(dat)
-#' @seealso 
-#' \link{predict.TreeSurrogate}
-#' \link{plot.TreeSurrogate}
-#' 
-#' For the tree implementation
-#' \link[partykit]{ctree}
-#' @export
-#' @import partykit
-tree.surrogate = function(object, X, sample.size=100, class = NULL, maxdepth = 2, tree.args = NULL, ...){
-  samp = DataSampler$new(X)
-  pred = prediction.model(object, class = class, ...)
-  
-  TreeSurrogate$new(predictor = pred, sampler = samp, sample.size = sample.size, maxdepth=maxdepth, tree.args = tree.args)$run()
-}
-
-#' Surrogate tree prediction
-#' 
-#' Predict the response for newdata with the surrogate tree
-#' 
-#' This function makes the TreeSurrogate object call 
-#' its iternal object$predict() method. 
-#' For examples see \link{tree.surrogate}
-#' @param object The surrogate tree. A TreeSurrogate R6 object
-#' @param newdata A data.frame for which to predict
-#' @param type Either "prob" or "class". Ignored if the surrogate tree does regression. 
-#' @param ... Further argumets for \code{predict_party}
-#' @return A data.frame with the predicted outcome. 
-#' In case of regression it is the predicted \eqn{\hat{y}}. 
-#' In case of classification it is either the class probabilities *(for type "prob") or the class label (type "class")
-#' @seealso 
-#' \link{tree.surrogate}
-#' @importFrom stats predict
-#' @export
-predict.TreeSurrogate = function(object, newdata, type = "prob", ...){
-  object$predict(newdata = newdata, type, ...)
-}
-
-
-#' Surrogate tree visualisation
-#' 
-#' Predict the response for newdata with the surrogate tree
-#' Each plot facet is one leaf node and visualises the distribution of the \eqn{\hat{y}}
-#' from the machine learning model. 
-#' This makes the TreeSurrogate object call 
-#' its iternal object$plot() method. 
-#' 
-#' For examples see \link{tree.surrogate}
-#' @param object The surrogate tree. A TreeSurrogate R6 object
-#' @return ggplot2 plot object
-#' @seealso 
-#' \link{tree.surrogate}
-plot.TreeSurrogate = function(object){
-  object$plot()
-}
-
-## Craven, M. W., & Shavlik, J. W. (1996).
-## Extracting tree-structured representations of trained neural networks.
-## Advances in Neural Information Processing Systems, 8, 24–30.
-## Retrieved from citeseer.ist.psu.edu/craven96extracting.html
-TreeSurrogate = R6::R6Class('TreeSurrogate',
-  inherit = Experiment,
-  public = list(
-    # The fitted tree
-    tree = NULL,
-    # Maximal depth as set by the user
-    maxdepth = NULL,
-    sample.size = NULL,
-    initialize = function(predictor, sampler, sample.size, maxdepth, tree.args){
-      super$initialize(predictor, sampler)
-      self$sample.size = sample.size
-      private$tree.args = tree.args
-      self$maxdepth = maxdepth
-      private$get.data = function(...) private$sampler$sample(n = self$sample.size, ...)
-    }, 
-    predict = function(newdata, type = 'prob', ...){
-      assert_choice(type, c('prob', 'class'))
-      res = data.frame(predict(self$tree, newdata = newdata, type = 'response', ...))
-      if(private$multi.class){
-        if(type == 'class') {
-          res = data.frame(..class = colnames(res)[apply(res, 1, which.max)])
-        }
-      } else {
-        res = data.frame(..y.hat = predict(self$tree, newdata = newdata, ...))
-      }
-      res
-    }
-  ), 
-  private = list(
-    tree.args = NULL,
-    # Only relevant in multi.class case
-    tree.predict.colnames = NULL,
-    # Only relevant in multi.class case
-    object.predict.colnames = NULL,
-
-    intervene = function(){private$X.sample},
-    aggregate = function(){
-      y.hat = private$Q.results
-      if(private$multi.class){
-        classes = colnames(y.hat)
-        form = formula(sprintf("%s ~ .", paste(classes, collapse = "+")))       
-      } else {
-        y.hat = unlist(y.hat[1])
-        form = y.hat ~ .
-      }
-      dat = cbind(y.hat, private$X.design)
-      tree.args = c(list(formula = form, data = dat, maxdepth = self$maxdepth), private$tree.args)
-      self$tree = do.call(partykit::ctree, tree.args)
-      result = data.frame(..node = predict(self$tree, type = 'node'), 
-        ..path = pathpred(self$tree))
-      if(private$multi.class){
-        outcome = private$Q.results
-        colnames(outcome) = paste('..y.hat:', colnames(outcome), sep='')
-        private$object.predict.colnames = colnames(outcome)
-        
-        # result = gather(result, key = "..class", value = "..y.hat", one_of(cnames))
-        ..y.hat.tree = self$predict(private$X.design, type = 'prob')
-        colnames(..y.hat.tree) = paste('..y.hat.tree:', colnames(..y.hat.tree), sep='')
-        private$tree.predict.colnames = colnames(..y.hat.tree)
-        
-        #..y.hat.tree = gather(..y.hat.tree, '..class.tree', '..y.hat.tree')
-        result = cbind(result, outcome, ..y.hat.tree)
-      } else {
-        result$..y.hat = private$Q.results[[1]]
-        result$..y.hat.tree = self$predict(private$X.design)[[1]]
-      }
-      design = private$X.design
-      rownames(design) = NULL
-      cbind(design, result)
-    }, 
-    generate.plot = function(){
-      p = ggplot(private$results) + 
-        geom_boxplot(aes(y = ..y.hat, x = "")) + 
-        scale_x_discrete('') + 
-        facet_wrap("..path")
-      if(private$multi.class){
-        plot.data = private$results
-        # max class for model
-        plot.data$..class = private$object.predict.colnames[apply(plot.data[private$object.predict.colnames], 1, which.max)]
-        plot.data$..class = gsub('..y.hat:', '', plot.data$..class)
-        plot.data = plot.data[setdiff(names(plot.data), private$object.predict.colnames)]
-        # dataset from wide to long
-        #plot.data$..class.tree = private$tree.predict.colnames[apply(plot.data[private$tree.predict.colnames], 1, which.max)]
-        #plot.data$..class.tree = gsub('..y.hat.tree:', '', plot.data$..class.tree)
-        #plot.data = plot.data[setdiff(names(plot.data), private$tree.predict.colnames)]        
-        p = ggplot(plot.data) + 
-          geom_bar(aes(x = ..class)) + 
-          facet_wrap("..path")
-      }
-      p
-    }
-    
-  )
-)
-
-
-# Return the paths of a ctree for each training data point
-pathpred <- function(object, ...)
-{
-  ## coerce to "party" object if necessary
-  if(!inherits(object, "party")) object = partykit::as.party(object)
-
-  ## get rules for each node
-  rls = list.rules.party(object)
-  
-  ## get predicted node and select corresponding rule
-  rules = rls[as.character(predict(object, type = "node", ...))]
-  rules = gsub("&", "&\n", rules)
-  
-  return(rules)
-}
diff --git a/R/util.R b/R/util.R
deleted file mode 100644
index f01a47258..000000000
--- a/R/util.R
+++ /dev/null
@@ -1,85 +0,0 @@
-get.feature.type = function(feature.class){
-  assertCharacter(feature.class)
-  
-  feature.types = c(
-    'numeric'='numerical', 
-    'integer'='numerical', 
-    'character'='categorical',
-    'factor'='categorical',
-    'ordered'='categorical'
-    )
-  
-  stopifnot(all(feature.class %in% names(feature.types)))
-  feature.types[feature.class]
-}
-
-
-
-# returns TRUE if object has predict function
-#' @importFrom utils methods
-has.predict = function(object){
-  classes = class(object)
-  any(unlist(lapply(classes, function(x){
-    'predict' %in% attr(methods(class = x), 'info')$generic
-  })))
-}
-
-#test = randomForest(Species ~ ., data = iris)
-
-
-# Turn class probabilities into class labels
-probs.to.labels = function(prediction){
-  checkmate::assert_data_frame(prediction)
-  if(ncol(prediction) > 1){
-    prediction = colnames(prediction)[apply(prediction, 1, which.max)]
-    data.frame(..class = prediction)
-  } else {
-    prediction
-  }
-}
-
-#' Extract glmnet effects
-#' @param betas glmnet$beta
-#' @param best.index index k 
-#' @param x.scaled the scaled version of x
-#' @param x.original the original x
-#' Assuming that the first row is the x.interest
-extract.glmnet.effects = function(betas, best.index, x.scaled, x.original){
-  checkmate::assert_data_frame(x.scaled, nrows=1)
-  checkmate::assert_data_frame(x.original, nrows=1)
-  res = data.frame(beta = betas[, best.index])
-  res$x.scaled = unlist(x.scaled[1,])
-  res$effect = res$beta * res$x.scaled
-  res$x.original = unlist(lapply(x.original[1,], as.character))
-  res$feature = colnames(x.scaled)
-  res$feature.value = sprintf('%s=%s', colnames(x.original), res$x.original)
-  res
-}
-
-
-
-
-# binarizes categorical variables: TRUE if same level as x.interest, else FALSE
-# TODO: Keep level names for factors
-# used in lime
-recode = function(dat, x.interest){
-  checkmate::assert_data_frame(dat)
-  checkmate::assert_data_frame(x.interest, nrows = 1, ncols = ncol(dat))
-  checkmate::assert_true(all(colnames(dat) == colnames(x.interest)))
-  
-  types = unlist(lapply(dat, function(feature) get.feature.type(class(feature))))
-  new.colnames = colnames(dat)
-  cat.index = types == 'categorical'
-  x.cat = unlist(lapply(x.interest[cat.index], as.character))
-  new.colnames[cat.index] = sprintf('%s=%s', colnames(dat)[cat.index], x.cat)
-
-  dat2 = data.frame(lapply(1:ncol(dat), function(x){
-    if(types[x] == 'categorical'){
-      1 * (dat[[x]] == x.interest[[x]])
-    } else {
-      dat[[x]]
-    }
-  }))
-  colnames(dat2) = new.colnames
-  dat2
-}
\ No newline at end of file
diff --git a/R/utils.R b/R/utils.R
new file mode 100644
index 000000000..abc7f4980
--- /dev/null
+++ b/R/utils.R
@@ -0,0 +1,158 @@
+get.feature.type = function(feature.class) {
+  assertCharacter(feature.class)
+  
+  feature.types = c(
+    'numeric'='numerical', 
+    'integer'='numerical', 
+    'character'='categorical',
+    'factor'='categorical',
+    'ordered'='categorical'
+  )
+  
+  stopifnot(all(feature.class %in% names(feature.types)))
+  feature.types[feature.class]
+}
+
+
+
+# returns TRUE if object has predict function
+#' @importFrom utils methods
+has.predict = function(object) {
+  classes = class(object)
+  any(unlist(lapply(classes, function(x) {
+    'predict' %in% attr(methods(class = x), 'info')$generic
+  })))
+}
+
+
+# Turn class probabilities into class labels
+probs.to.labels = function(prediction, levels = NULL) {
+  checkmate::assert_data_frame(prediction)
+  if (ncol(prediction) > 1) {
+    prediction = colnames(prediction)[apply(prediction, 1, which.max)]
+    data.frame(..class = prediction)
+  } else {
+    prediction
+  }
+}
+
+# Extract glmnet effects
+# @param betas glmnet$beta
+# @param best.index index k 
+# @param x.recoded the recoded version of x
+# @param x.original the original x
+# Assuming that the first row is the x.interest
+extract.glmnet.effects = function(betas, best.index, x.recoded, x.original) {
+  checkmate::assert_data_frame(x.recoded, nrows=1)
+  checkmate::assert_data_frame(x.original, nrows=1)
+  res = data.frame(beta = betas[, best.index])
+  res$x.recoded = unlist(x.recoded[1,])
+  res$effect = res$beta * res$x.recoded
+  res$x.original = unlist(lapply(x.original[1,], as.character))
+  res$feature = colnames(x.recoded)
+  res$feature.value = sprintf('%s=%s', colnames(x.original), res$x.original)
+  res
+}
+
+
+
+
+# binarizes categorical variables: TRUE if same level as x.interest, else FALSE
+# used in lime
+recode = function(dat, x.interest) {
+  checkmate::assert_data_frame(dat)
+  checkmate::assert_data_frame(x.interest, nrows = 1, ncols = ncol(dat))
+  checkmate::assert_true(all(colnames(dat) == colnames(x.interest)))
+  
+  types = unlist(lapply(dat, function(feature) get.feature.type(class(feature))))
+  new.colnames = colnames(dat)
+  cat.index = types == 'categorical'
+  x.cat = unlist(lapply(x.interest[cat.index], as.character))
+  new.colnames[cat.index] = sprintf('%s=%s', colnames(dat)[cat.index], x.cat)
+  
+  dat2 = data.frame(lapply(1:ncol(dat), function(x) {
+    if (types[x] == 'categorical') {
+      1 * (dat[[x]] == x.interest[[x]])
+    } else {
+      dat[[x]]
+    }
+  }))
+  colnames(dat2) = new.colnames
+  dat2
+}
+
+# Return the paths of a ctree for each training data point
+pathpred = function(object, ...) {
+  ## coerce to "party" object if necessary
+  if (!inherits(object, "party")) object = partykit::as.party(object)
+  
+  ## get rules for each node
+  rls = list.rules.party(object)
+  
+  ## get predicted node and select corresponding rule
+  rules = rls[as.character(predict(object, type = "node", ...))]
+  rules = gsub("&", "&\n", rules)
+  
+  return(rules)
+}
+
+
+is.label.output = function(pred) {
+  if (inherits(pred, c("character", "factor"))) return(TRUE)
+  if (inherits(pred, c("data.frame", "matrix")) && 
+      inherits(pred[,1], "character") && ncol(pred) == 1) {
+    return(TRUE)
+  }
+  FALSE
+}
+
+
+get.1D.grid = function(feature, feature.type, grid.size) {
+  checkmate::assert_vector(feature, all.missing = FALSE, min.len = 2)
+  checkmate::assert_choice(feature.type, c("numerical", "categorical"))
+  checkmate::assert_numeric(grid.size)
+  
+  if (feature.type == "numerical") {
+    # remove NaN NA and inf
+    feature = feature[is.finite(feature)]
+    if (length(feature) == 0) stop("Feature does not contain any finite values.")
+    
+    grid = seq(from = min(feature), 
+      to = max(feature), 
+      length.out = grid.size)
+  } else if (feature.type == "categorical") {
+    grid = unique(feature)
+  }
+  grid
+}
+
+checkPrediction = function(prediction, data) {
+  checkmate::assert_data_frame(data)
+  checkmate::assert_data_frame(prediction, nrows = nrow(data), any.missing = FALSE, 
+    types = c("numeric", "integerish"))
+}
+
+# Currently not used
+sanitizePrediction = function(prediction) {
+  if (inherits(prediction, c("numeric", "integer"))) {
+    prediction = data.frame(..prediction = prediction, fix.empty.names = FALSE)
+  } else if (inherits(prediction, "matrix")) {
+    cnames = colnames(prediction)
+    res = data.frame(prediction)
+    if (is.null(cnames)) {
+      if (ncol(prediction) == 1) {
+        colnames(prediction) = "..prediction"
+      } else {
+        colnames(prediction) = paste("..prediction", 1:ncol(prediction), sep = ".")
+      }
+    } else {
+      colnames(prediction) = cnames
+    }
+  } else {
+    prediction = data.frame(prediction)
+  }
+  prediction
+}
+
+
+
diff --git a/README.Rmd b/README.Rmd
index f67178184..ae3dda830 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -13,11 +13,13 @@ output: github_document
  - Partial dependence plots
  - Individual conditional expectation plots (ICE)
  - Tree surrogate
- - LIME: Local Interpretable Model-agnostic Explanations
+ - LocalModel: Local Interpretable Model-agnostic Explanations
  - Shapley value for explaining single predictions
  
 Read more about the methods in the [Interpretable Machine Learning book](https://christophm.github.io/interpretable-ml-book/agnostic.html)
 
+
+
 ```{r global_options, include=FALSE}
 library(knitr)
 opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE)
@@ -26,46 +28,50 @@ set.seed(42)
 
 # Installation 
 The package can be installed directly from github with devtools:
-```{r, results = 'hide', eval = FALSE}
+```{r, results = "hide", eval = FALSE}
 # install.packages("devtools")
-devtools::install_github('christophM/iml')
+devtools::install_github("christophM/iml")
 ```
 
+# News
+Changes of the packages can be accessed in the [NEWS file](https://github.com/christophM/iml/blob/master/NEWS.md) shipped with the package.
 
 # Examples
 
 First we train a randomForest to predict the Boston median housing value
 ```{r}
-library('iml')
+library("iml")
 
-library('randomForest')
+library("randomForest")
 data("Boston", package  = "MASS")
-mod = randomForest(medv ~ ., data = Boston, ntree = 50)
+rf = randomForest(medv ~ ., data = Boston, ntree = 50)
+X =  Boston[which(names(Boston) != "medv")]
+model = Predictor$new(rf, data = X, y = Boston$medv)
 ```
 
 #### What were the most important features? (Permutation feature importance / Model reliance)
 ```{r}
-imp = feature.imp(mod, Boston, y = Boston$medv, loss = 'mae')
+imp = FeatureImp$new(model, loss = "mae")
 plot(imp)
-imp$data()
+imp$results
 ```
 
-### Let's build a single tree from the randomForest predictions! (Tree surrogate)
+### Let"s build a single tree from the randomForest predictions! (Tree surrogate)
 ```{r}
-tree = tree.surrogate(mod, Boston[which(names(Boston) != 'medv')], maxdepth = 2)
+tree = TreeSurrogate$new(model, maxdepth = 2)
 plot(tree)
 ```
 
 ### How does lstat influence the prediction on average? (Partial dependence plot)
 ```{r}
-pdp.obj = pdp(mod, Boston, feature = 13)
+pdp.obj = PartialDependence$new(model, feature = "lstat")
 plot(pdp.obj)
 ```
 
 
 ### How does lstat influence the individual predictions? (ICE)
 ```{r}
-ice.curves = ice(mod, Boston[1:100,], feature = 13)
+ice.curves = Ice$new(model, feature = "lstat")
 plot(ice.curves) 
 ```
 
@@ -73,9 +79,8 @@ plot(ice.curves)
 
 ### Explain a single prediction with a local linear model. (LIME) 
 ```{r}
-x = Boston[1,]
-lime.explain = lime(mod, Boston, x.interest = x)
-lime.explain$data()
+lime.explain = LocalModel$new(model, x.interest = X[1,])
+lime.explain$results
 plot(lime.explain)
 ```
 
@@ -83,9 +88,8 @@ plot(lime.explain)
 
 ### Explain a single prediction with game theory. (Shapley)
 ```{r}
-x = Boston[1,]
-shapley.explain = shapley(mod, Boston, x.interest = x)
-shapley.explain$data()
+shapley.explain = Shapley$new(model, x.interest = X[1, ])
+shapley.explain$results
 plot(shapley.explain)
 
 ```
diff --git a/README.md b/README.md
index 6c03d498b..7cbb96c35 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ Currently implemented:
 -   Partial dependence plots
 -   Individual conditional expectation plots (ICE)
 -   Tree surrogate
--   LIME: Local Interpretable Model-agnostic Explanations
+-   LocalModel: Local Interpretable Model-agnostic Explanations
 -   Shapley value for explaining single predictions
 
 Read more about the methods in the [Interpretable Machine Learning book](https://christophm.github.io/interpretable-ml-book/agnostic.html)
@@ -24,7 +24,7 @@ The package can be installed directly from github with devtools:
 
 ``` r
 # install.packages("devtools")
-devtools::install_github('christophM/iml')
+devtools::install_github("christophM/iml")
 ```
 
 Examples
@@ -33,48 +33,47 @@ Examples
 First we train a randomForest to predict the Boston median housing value
 
 ``` r
-library('iml')
+library("iml")
 
-library('randomForest')
+library("randomForest")
 data("Boston", package  = "MASS")
-mod = randomForest(medv ~ ., data = Boston, ntree = 50)
+rf = randomForest(medv ~ ., data = Boston, ntree = 50)
+X =  Boston[which(names(Boston) != "medv")]
+model = Predictor$new(rf, data = X, y = Boston$medv)
 ```
 
 #### What were the most important features? (Permutation feature importance / Model reliance)
 
 ``` r
-imp = feature.imp(mod, Boston, y = Boston$medv, loss = 'mae')
+imp = FeatureImp$new(model, loss = "mae")
 plot(imp)
 ```
 
 ![](README_files/figure-markdown_github/unnamed-chunk-3-1.png)
 
 ``` r
-imp$data()
+imp$results
 ```
 
-    ## # A tibble: 14 x 3
-    ##    ..feature error importance
-    ##    <fct>     <dbl>      <dbl>
-    ##  1 age       1.28        1.30
-    ##  2 black     1.24        1.25
-    ##  3 chas      1.01        1.03
-    ##  4 crim      1.65        1.67
-    ##  5 dis       1.66        1.68
-    ##  6 indus     1.47        1.49
-    ##  7 lstat     4.23        4.29
-    ##  8 medv      0.988       1.00
-    ##  9 nox       1.64        1.66
-    ## 10 ptratio   1.58        1.60
-    ## 11 rad       1.11        1.12
-    ## 12 rm        3.71        3.76
-    ## 13 tax       1.24        1.26
-    ## 14 zn        1.09        1.11
-
-### Let's build a single tree from the randomForest predictions! (Tree surrogate)
+    ##    feature original.error permutationError importance
+    ## 1    lstat       1.004678         4.235347   4.215628
+    ## 2       rm       1.004678         3.273892   3.258649
+    ## 3     crim       1.004678         1.731467   1.723405
+    ## 4  ptratio       1.004678         1.729005   1.720955
+    ## 5      nox       1.004678         1.661731   1.653994
+    ## 6      dis       1.004678         1.653906   1.646205
+    ## 7    indus       1.004678         1.608825   1.601335
+    ## 8      age       1.004678         1.378753   1.372334
+    ## 9      tax       1.004678         1.360375   1.354041
+    ## 10   black       1.004678         1.246488   1.240685
+    ## 11     rad       1.004678         1.161532   1.156124
+    ## 12      zn       1.004678         1.061880   1.056936
+    ## 13    chas       1.004678         1.034714   1.029897
+
+### Let"s build a single tree from the randomForest predictions! (Tree surrogate)
 
 ``` r
-tree = tree.surrogate(mod, Boston[which(names(Boston) != 'medv')], maxdepth = 2)
+tree = TreeSurrogate$new(model, maxdepth = 2)
 plot(tree)
 ```
 
@@ -83,7 +82,7 @@ plot(tree)
 ### How does lstat influence the prediction on average? (Partial dependence plot)
 
 ``` r
-pdp.obj = pdp(mod, Boston, feature = 13)
+pdp.obj = PartialDependence$new(model, feature = "lstat")
 plot(pdp.obj)
 ```
 
@@ -92,7 +91,7 @@ plot(pdp.obj)
 ### How does lstat influence the individual predictions? (ICE)
 
 ``` r
-ice.curves = ice(mod, Boston[1:100,], feature = 13)
+ice.curves = Ice$new(model, feature = "lstat")
 plot(ice.curves) 
 ```
 
@@ -101,15 +100,14 @@ plot(ice.curves)
 ### Explain a single prediction with a local linear model. (LIME)
 
 ``` r
-x = Boston[1,]
-lime.explain = lime(mod, Boston, x.interest = x)
-lime.explain$data()
+lime.explain = LocalModel$new(model, x.interest = X[1,])
+lime.explain$results
 ```
 
-    ##              beta x.scaled     effect x.original feature feature.value
-    ## rm     1.03669473    6.575  6.8162678      6.575      rm      rm=6.575
-    ## lstat -0.05867633    4.980 -0.2922081       4.98   lstat    lstat=4.98
-    ## medv   0.72759814   24.000 17.4623553         24    medv       medv=24
+    ##               beta x.recoded    effect x.original feature feature.value
+    ## rm       4.2445268     6.575 27.907764      6.575      rm      rm=6.575
+    ## ptratio -0.5224666    15.300 -7.993738       15.3 ptratio  ptratio=15.3
+    ## lstat   -0.4287899     4.980 -2.135374       4.98   lstat    lstat=4.98
 
 ``` r
 plot(lime.explain)
@@ -120,29 +118,24 @@ plot(lime.explain)
 ### Explain a single prediction with game theory. (Shapley)
 
 ``` r
-x = Boston[1,]
-shapley.explain = shapley(mod, Boston, x.interest = x)
-shapley.explain$data()
+shapley.explain = Shapley$new(model, x.interest = X[1, ])
+shapley.explain$results
 ```
 
-    ## # A tibble: 14 x 3
-    ## # Groups:   feature [?]
-    ##    feature      phi  phi.var
-    ##    <fct>      <dbl>    <dbl>
-    ##  1 age     -0.00607  0.0969 
-    ##  2 black    0.0974   0.102  
-    ##  3 chas    -0.00898  0.00539
-    ##  4 crim    -0.298    1.86   
-    ##  5 dis     -0.147    1.40   
-    ##  6 indus    0.634    0.724  
-    ##  7 lstat    3.12    12.7    
-    ##  8 medv     0        0      
-    ##  9 nox     -0.206    0.467  
-    ## 10 ptratio  0.553    0.698  
-    ## 11 rad     -0.156    0.0683 
-    ## 12 rm       0.706    4.05   
-    ## 13 tax     -0.111    0.378  
-    ## 14 zn       0.201    0.115
+    ##    feature           phi      phi.var feature.value
+    ## 1      age -0.0850019791  0.333944689  crim=0.00632
+    ## 2    black -0.0008046753  0.267453217         zn=18
+    ## 3     chas -0.0240226667  0.009907358    indus=2.31
+    ## 4     crim -0.0723403620  1.236298269        chas=0
+    ## 5      dis -0.1753825842  1.089173160     nox=0.538
+    ## 6    indus  0.7402716277  1.556442963      rm=6.575
+    ## 7    lstat  3.7001500745 19.387675029      age=65.2
+    ## 8      nox -0.1974995303  0.727310327      dis=4.09
+    ## 9  ptratio  0.7285225750  1.220922649         rad=1
+    ## 10     rad -0.4722923333  0.407054214       tax=296
+    ## 11      rm -0.0752455000  7.788278183  ptratio=15.3
+    ## 12     tax -0.1853100000  0.393279176   black=396.9
+    ## 13      zn -0.0950710000  0.042044428    lstat=4.98
 
 ``` r
 plot(shapley.explain)
diff --git a/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-3-1.png b/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-3-1.png
new file mode 100644
index 000000000..f048d473b
Binary files /dev/null and b/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-3-1.png differ
diff --git a/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-4-1.png b/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-4-1.png
new file mode 100644
index 000000000..4b1d6a7cf
Binary files /dev/null and b/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-4-1.png differ
diff --git a/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-5-1.png b/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-5-1.png
new file mode 100644
index 000000000..2a95e1a2e
Binary files /dev/null and b/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-5-1.png differ
diff --git a/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-6-1.png b/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-6-1.png
new file mode 100644
index 000000000..22ff79a3a
Binary files /dev/null and b/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-6-1.png differ
diff --git a/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-7-1.png b/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-7-1.png
new file mode 100644
index 000000000..21cafa8e6
Binary files /dev/null and b/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-7-1.png differ
diff --git a/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-8-1.png b/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-8-1.png
new file mode 100644
index 000000000..4098e0d66
Binary files /dev/null and b/README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-8-1.png differ
diff --git a/README_files/figure-markdown_github/unnamed-chunk-3-1.png b/README_files/figure-markdown_github/unnamed-chunk-3-1.png
index 880f36e26..fe7db6682 100644
Binary files a/README_files/figure-markdown_github/unnamed-chunk-3-1.png and b/README_files/figure-markdown_github/unnamed-chunk-3-1.png differ
diff --git a/README_files/figure-markdown_github/unnamed-chunk-4-1.png b/README_files/figure-markdown_github/unnamed-chunk-4-1.png
index 6c48f1767..dcb056bfb 100644
Binary files a/README_files/figure-markdown_github/unnamed-chunk-4-1.png and b/README_files/figure-markdown_github/unnamed-chunk-4-1.png differ
diff --git a/README_files/figure-markdown_github/unnamed-chunk-5-1.png b/README_files/figure-markdown_github/unnamed-chunk-5-1.png
index 9f40a4a73..f69d088a5 100644
Binary files a/README_files/figure-markdown_github/unnamed-chunk-5-1.png and b/README_files/figure-markdown_github/unnamed-chunk-5-1.png differ
diff --git a/README_files/figure-markdown_github/unnamed-chunk-6-1.png b/README_files/figure-markdown_github/unnamed-chunk-6-1.png
index 754af40fb..2860c0d96 100644
Binary files a/README_files/figure-markdown_github/unnamed-chunk-6-1.png and b/README_files/figure-markdown_github/unnamed-chunk-6-1.png differ
diff --git a/README_files/figure-markdown_github/unnamed-chunk-7-1.png b/README_files/figure-markdown_github/unnamed-chunk-7-1.png
index 2051d4764..d2b21cbaf 100644
Binary files a/README_files/figure-markdown_github/unnamed-chunk-7-1.png and b/README_files/figure-markdown_github/unnamed-chunk-7-1.png differ
diff --git a/README_files/figure-markdown_github/unnamed-chunk-8-1.png b/README_files/figure-markdown_github/unnamed-chunk-8-1.png
index ada2fa20e..d66049a3a 100644
Binary files a/README_files/figure-markdown_github/unnamed-chunk-8-1.png and b/README_files/figure-markdown_github/unnamed-chunk-8-1.png differ
diff --git a/cran-comments.md b/cran-comments.md
new file mode 100644
index 000000000..4dc4b0c3a
--- /dev/null
+++ b/cran-comments.md
@@ -0,0 +1,20 @@
+## Test environments
+* local Debian 9.3 stretch, R 3.3.3
+* local Mac OS High Sierra 10.13.3 (17D102)
+* win-builder (devel and release)
+
+## R CMD check results
+There were no ERRORs or WARNINGs. 
+
+There was 1 NOTE:
+  
+  *  NOTE
+Maintainer: 'Christoph Molnar <christoph.molnar@gmail.com>'
+
+New submission
+
+Possibly mis-spelled words in DESCRIPTION:
+  Interpretable (3:8)
+  iml (9:18)
+  interpretability (9:63)
+
diff --git a/inst/framework.R b/inst/framework.R
deleted file mode 100644
index 359715c89..000000000
--- a/inst/framework.R
+++ /dev/null
@@ -1,134 +0,0 @@
-library('mlr')
-library('dplyr')
-library('ggplot2')
-library('digest')
-library('iml')
-library('caret')
-
-
-X = iris[-which(names(iris) == 'Species')]
-y = iris$Species
-
-## Generate the task
-task = makeClassifTask(data = iris, target = "Species")
-
-## Generate the learner
-lrn = makeLearner("classif.randomForest", predict.type = 'prob')
-
-## Train the learner
-#mod = train(lrn, task)
-mod = randomForest::randomForest(Species ~ ., data= iris)
-
-mod <- caret::train(Species ~ ., data = iris, method = "knn",trControl = caret::trainControl(method = "cv"))
-
-
-## PDP
-pdp.obj = pdp(object = mod, X=X, feature = c(1, 3), grid.size = 50)  
-pdp.obj
-pdp.obj$data()
-
-plot(pdp.obj) + theme_bw()
-
-
-plot(pdp.obj) + scale_fill_continuous(low='white', high='red')
-
-
-pdp.obj$feature = c(3,4)
-pdp.obj$plot()
-
-pdp.obj$feature = 1
-pdp.obj$plot()
-
-
-pdp(mod, X, feature = 1, class = 2)$plot()
-
-## ICE
-ice(object = mod, X=X, feature = 2, predict.args = list(type='prob'))$plot()  
-
-ice1 = ice(mod, X=X, feature = 2, predict.args = list(type='prob'))  
-plot(ice1)
-
-ice1$data()
-ice1$feature = 3
-ice1$plot()
-
-## ICE centered
-ice1 = ice(mod, X=X, feature = 1, center.at = 4)
-ice1$plot()
-
-
-ice1 = ice(mod, X=X, feature = 1, center.at = 4, class = 2)
-plot(ice1)
-ice1$center.at = 4
-plot(ice1)
-
-
-
-## LIME
-
-i = 121
-x.interest = X[i,]
-
-lime(mod, X,  1000, x.interest=x.interest, predict.args = list(type = 'prob'), class = 1)
-
-lime1 = lime(mod, X,  1000, x.interest=x.interest, predict.args = list(type = 'prob'))
-dat = lime1$data()
-plot(lime1)
-lime1$x <- X[i+1,]
-lime1
-
-
-lime1 = lime(mod, X,  1000, x.interest=x.interest, predict.args = list(type = 'prob'), class = 2)
-lime1
-plot(lime1)
-
-## Shapley
-shapley(mod, X, x.interest, 100, predict.args = list(type = 'prob'), class = 3)
-shapley1 = shapley(mod, X, x.interest, 100, class=NULL, predict.args = list(type = 'prob'))
-shapley1$x = X[i+2,]
-plot(shapley1)
-shapley1
-
-## Permutation feature importance
-feature.imp(mod, X,  y=1*(y=='virginica'), predict.args = list(type = 'prob'), class = 3, loss = 'mae')$data()
-pimp = feature.imp(mod, X,  y = y, loss = 'ce')
-pimp$data()
-
-pimp = feature.imp(mod, X,  y = y, loss = 'ce', method = 'cartesian')
-pimp$data()
-plot(pimp)
-
-feature.imp(mod, X,  y=y, predict.args = list(type = 'prob'),  loss = 'mae')
-
-library("randomForest")
-data("Boston", package  = "MASS")
-mod = randomForest(medv ~ ., data = Boston, ntree = 50)
-pimportance = feature.imp(mod, Boston[which(names(Boston) != 'medv')], y = Boston$medv, loss = 'mae', method = 'cartesian')
-
-pimp$data()
-pimp$plot()
-
-
-## tree surrogate model, centered
-library("randomForest")
-data("Boston", package  = "MASS")
-mod = randomForest(medv ~ ., data = Boston, ntree = 50)
-
-tree = tree.surrogate(mod, Boston[which(names(Boston) != 'medv')], 100, maxdepth = 4)
-
-mod = randomForest(Species ~ ., data = iris, ntree = 50)
-
-tree = tree.surrogate(mod, iris[which(names(iris) != 'Species')], 100, 
-  maxdepth = 2, predict.args = list(type = 'prob'))
-
-plot(tree)
-
-print(tree)
-tree$data()
-
-# get.tree.data 
-# Returns: data.frame with nodes
-dat = X
-# alternative: tree$data()$where, but works only for training data
-tree.str = tree$data()
-
diff --git a/man/FeatureImp.Rd b/man/FeatureImp.Rd
new file mode 100644
index 000000000..3eab748e2
--- /dev/null
+++ b/man/FeatureImp.Rd
@@ -0,0 +1,122 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/FeatureImp.R
+\name{FeatureImp}
+\alias{FeatureImp}
+\title{Feature importance}
+\format{\code{\link{R6Class}} object.}
+\description{
+\code{FeatureImp} computes feature importances for prediction models.
+The importance is measured as the factor by which the model's prediction error increases when the feature is shuffled.
+}
+\section{Usage}{
+
+\preformatted{
+imp = FeatureImp$new(predictor, loss, method = "shuffle", run = TRUE)
+
+plot(imp)
+imp$results
+print(imp)
+}
+}
+
+\section{Arguments}{
+
+
+For FeatureImp$new():
+\describe{
+\item{predictor}{Object of type \code{Predictor}. See \link{Predictor}.}
+\item{run}{logical. Should the Interpretation method be run?}
+\item{loss}{The loss function. A string (e.g. "ce" for classification or "mse") or a function. See Details for allowed losses.}
+\item{method}{Either "shuffle" or "cartesian". See Details.}
+}
+}
+
+\section{Details}{
+
+Read the Interpretable Machine Learning book to learn in detail about feature importance: 
+\url{https://christophm.github.io/interpretable-ml-book/feature-importance.html}
+
+Two permutation schemes are implemented: 
+\itemize{
+\item shuffle: A simple shuffling of the feature values, yielding n perturbed instances per feature (fast)
+\item cartesian: Matching every instance with the feature value of all other instances, yielding n x (n-1) perturbed instances per feature (very slow)
+}
+
+The loss function can be either specified via a string, or by handing a function to \code{FeatureImp()}.
+If you want to use your own loss function it should have this signature: function(actual, predicted).
+Using the string is a shortcut to using loss functions from the \code{Metrics} package. 
+Only use functions that return a single performance value, not a vector. 
+Allowed losses are: "ce", "f1", "logLoss", "mae", "mse", "rmse", "mape", "mdae", 
+"msle", "percent_bias", "rae", "rmse", "rmsle", "rse", "rrse", "smape"
+See \code{library(help = "Metrics")} to get a list of functions.
+}
+
+\section{Fields}{
+
+\describe{
+\item{original.error}{The loss of the model before perturbing features.}
+\item{predictor}{The prediction model that was analysed.}
+\item{results}{data.frame with the results of the feature importance computation.}
+}
+}
+
+\section{Methods}{
+
+\describe{
+\item{loss(actual,predicted)}{The loss function. Can also be applied to data: \code{object$loss(actual, predicted)}}
+\item{plot()}{method to plot the feature importances. See \link{plot.FeatureImp}}
+\item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
+\item{\code{clone()}}{[internal] method to clone the R6 object.}
+\item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+}
+}
+
+\examples{
+if (require("rpart")) {
+# We train a tree on the Boston dataset:
+data("Boston", package  = "MASS")
+tree = rpart(medv ~ ., data = Boston)
+y = Boston$medv
+X = Boston[-which(names(Boston) == "medv")]
+mod = Predictor$new(tree, data = X, y = y)
+
+# Compute feature importances as the performance drop in mean absolute error
+imp = FeatureImp$new(mod, loss = "mae")
+
+# Plot the results directly
+plot(imp)
+
+
+# Since the result is a ggplot object, you can extend it: 
+if (require("ggplot2")) {
+  plot(imp) + theme_bw()
+  # If you want to do your own thing, just extract the data: 
+  imp.dat = imp$results
+  head(imp.dat)
+  ggplot(imp.dat, aes(x = feature, y = importance)) + geom_point() + 
+  theme_bw()
+}
+
+# FeatureImp also works with multiclass classification. 
+# In this case, the importance measurement regards all classes
+tree = rpart(Species ~ ., data= iris)
+X = iris[-which(names(iris) == "Species")]
+y = iris$Species
+predict.fun = function(object, newdata) predict(object, newdata, type = "prob")
+mod = Predictor$new(tree, data = X, y = y, predict.fun) 
+
+# For some models we have to specify additional arguments for the predict function
+imp = FeatureImp$new(mod, loss = "ce")
+plot(imp)
+
+# For multiclass classification models, you can choose to only compute performance for one class. 
+# Make sure to adapt y
+mod = Predictor$new(tree, data = X, y = y == "virginica", 
+  predict.fun = predict.fun, class = "virginica") 
+imp = FeatureImp$new(mod, loss = "ce")
+plot(imp)
+}
+}
+\references{
+Fisher, A., Rudin, C., and Dominici, F. (2018). Model Class Reliance: Variable Importance Measures for any Machine Learning Model Class, from the "Rashomon" Perspective. Retrieved from http://arxiv.org/abs/1801.01489
+}
diff --git a/man/Ice.Rd b/man/Ice.Rd
new file mode 100644
index 000000000..53e1772e0
--- /dev/null
+++ b/man/Ice.Rd
@@ -0,0 +1,133 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Ice.R
+\name{Ice}
+\alias{Ice}
+\title{Individual conditional expectations (Ice)}
+\format{\code{\link{R6Class}} object.}
+\description{
+\code{Ice} fits and plots individual conditional expectation curves for prediction models.
+}
+\section{Usage}{
+
+\preformatted{
+ice = Ice$new(predictor, feature, grid.size = 20, center.at = NULL, run = TRUE)
+
+plot(ice)
+ice$results
+print(ice)
+ice$set.feature(2)
+ice$center(1)
+}
+}
+
+\section{Arguments}{
+
+
+For Ice$new():
+\describe{
+\item{predictor}{Object of type \code{Predictor}. See \link{Predictor}.}
+\item{feature}{The feature name or index for which to compute the individual conditional expectations.}
+\item{grid.size}{The size of the grid for evaluating the predictions}
+\item{center.at}{The value for the centering of the plot. Numeric for numeric features, and the level name for factors.}
+\item{run}{logical. Should the Interpretation method be run?}
+}
+}
+
+\section{Details}{
+
+The individual conditional expectation curves show how the prediction for each instance changes
+when we vary a single feature.
+
+To learn more about individual conditional expectation, 
+read the Interpretable Machine Learning book: https://christophm.github.io/interpretable-ml-book/ice.html
+}
+
+\section{Fields}{
+
+\describe{
+\item{feature.index}{The index of the features for which the partial dependence was computed.}
+\item{feature.name}{The names of the features for which the partial dependence was computed.}
+\item{feature.type}{The detected types of the features, either "categorical" or "numerical".}
+\item{center.at}{The value for the centering of the plot. Numeric for numeric features, and the level name for factors.}
+\item{grid.size}{The size of the grid.}
+\item{predictor}{The prediction model that was analysed.}
+\item{results}{data.frame with the grid of feature of interest and the predicted \eqn{\hat{y}}.} 
+}
+}
+
+\section{Methods}{
+
+\describe{
+\item{center()}{method to set the value at which the ice computations are centered. See examples.}
+\item{set.feature()}{method to set the feature (index) for which to compute individual conditional expectations See examples for usage.}
+\item{plot()}{method to plot the individual conditional expectations. See \link{plot.Ice}.}
+\item{\code{clone()}}{[internal] method to clone the R6 object.}
+\item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+}
+}
+
+\examples{
+# We train a random forest on the Boston dataset:
+if (require("randomForest")) {
+
+data("Boston", package  = "MASS")
+rf = randomForest(medv ~ ., data = Boston, ntree = 50)
+mod = Predictor$new(rf, data = Boston)
+
+# Compute the individual conditional expectations for the first feature
+ice = Ice$new(mod, feature = "crim")
+
+# Plot the results directly
+plot(ice)
+
+# You can center the Ice plot
+ice$center(0)
+plot(ice)
+
+# Ice plots can be centered at initialization
+ice = Ice$new(mod, feature = "crim", center = 75)
+plot(ice)
+
+# Centering can also be removed
+ice$center(NULL)
+plot(ice)
+
+# Since the result is a ggplot object, you can extend it: 
+if (require("ggplot2")) {
+plot(ice) + theme_bw()
+
+
+# If you want to do your own thing, just extract the data: 
+iceData = ice$results
+head(iceData)
+ggplot(iceData) + 
+geom_line(aes(x = crim, y = y.hat, group = ..individual, color = factor(..individual))) + 
+scale_color_discrete(guide = "none")
+}
+# You can reuse the ice object for other features: 
+ice$set.feature("lstat")
+plot(ice)
+
+# Ice also works with multiclass classification
+rf = randomForest(Species ~ ., data= iris, ntree=50)
+predict.fun = function(obj, newdata) predict(obj, newdata, type = "prob")
+mod = Predictor$new(rf, data = iris, predict.fun = predict.fun)
+
+# For some models we have to specify additional arguments for the predict function
+plot(Ice$new(mod, feature = "Sepal.Length"))
+
+# For multiclass classification models, you can choose to only show one class:
+mod = Predictor$new(rf, data = iris, predict.fun = predict.fun, class = "virginica")
+plot(Ice$new(mod, feature = "Sepal.Length"))
+
+# Ice plots can be centered: 
+plot(Ice$new(mod, feature = "Sepal.Length", center = 1))
+}
+}
+\references{
+Goldstein, A., Kapelner, A., Bleich, J., and Pitkin, E. (2013). Peeking Inside the Black Box: 
+Visualizing Statistical Learning with Plots of Individual Conditional Expectation, 1-22. https://doi.org/10.1080/10618600.2014.907095
+}
+\seealso{
+\link{PartialDependence} for partial dependence plots (aggregated ice plots)
+}
diff --git a/man/LocalModel.Rd b/man/LocalModel.Rd
new file mode 100644
index 000000000..f71eb7222
--- /dev/null
+++ b/man/LocalModel.Rd
@@ -0,0 +1,126 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/LocalModel.R
+\name{LocalModel}
+\alias{LocalModel}
+\title{LocalModel}
+\format{\code{\link{R6Class}} object.}
+\description{
+\code{LocalModel} fits locally weighted linear regression models (logistic regression for classification) to explain single predictions of a prediction model.
+}
+\section{Usage}{
+
+\preformatted{
+lime = LocalModel$new(predictor, x.interest = NULL, k = 3 run = TRUE)
+
+plot(lime)
+predict(lime, newdata)
+lime$results
+lime$explain(x.interest)
+print(lime)
+}
+}
+
+\section{Arguments}{
+
+
+For LocalModel$new():
+\describe{
+\item{predictor}{Object of type \code{Predictor}. See \link{Predictor}.}
+\item{x.interest}{data.frame with a single row for the instance to be explained.}
+\item{k}{the (maximum) number of features to be used for the surrogate model.}
+\item{run}{logical. Should the Interpretation method be run?}
+}
+}
+
+\section{Details}{
+ 
+A weighted glm is fitted with the machine learning model prediction as target. 
+Data points are weighted by their proximity to the instance to be explained, using the gower proximity measure. 
+L1-regularisation is used to make the results sparse. 
+The resulting model can be seen as a surrogate for the machine learning model, which is only valid for that one point.
+Categorical features are binarized, depending on the category of the instance to be explained: 1 if the category is the same, 0 otherwise.
+To learn more about local models, read the Interpretable Machine Learning book: https://christophm.github.io/interpretable-ml-book/lime.html
+
+The approach is similar to LIME, but has the following differences:
+\itemize{
+\item Distance measure: Uses gower proximity (= 1 - gower distance) instead of a kernel based on the Euclidean distance. Has the advantage to have a meaningful neighbourhood and no kernel width to tune.
+\item Sampling: Uses the original data instead of sampling from normal distributions. 
+Has the advantage to follow the original data distribution. 
+\item Visualisation: Plots effects instead of betas. Both are the same for binary features, but ared different for numerical features. 
+For numerical features, plotting the betas makes no sense, 
+because a negative beta might still increase the prediction when the feature value is also negative.
+}
+}
+
+\section{Fields}{
+
+\describe{
+\item{best.fit.index}{The index of the best glmnet fit.}
+\item{k}{The number of features as set by the user.}
+\item{model}{The glmnet object.}
+\item{predictor}{The prediction model that was analysed.}
+\item{results}{data.frame with the feature names (\code{feature}) and contributions to the prediction}
+\item{x.interest}{The data.frame with the instance to be explained. See Examples for usage.}
+}
+}
+
+\section{Methods}{
+
+\describe{
+\item{explain(x.interest)}{method to set a new data point which to explain.}
+\item{plot()}{method to plot the LocalModel feature effects. See \link{plot.LocalModel}}
+\item{predict()}{method to predict new data with the local model See also \link{predict.LocalModel}}
+\item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
+\item{\code{clone()}}{[internal] method to clone the R6 object.}
+\item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+}
+}
+
+\examples{
+if (require("randomForest")) {
+# First we fit a machine learning model on the Boston housing data
+data("Boston", package  = "MASS")
+X = Boston[-which(names(Boston) == "medv")]
+rf = randomForest(medv ~ ., data = Boston, ntree = 50)
+mod = Predictor$new(rf, data = X)
+
+# Explain the first instance of the dataset with the LocalModel method:
+x.interest = X[1,]
+lemon = LocalModel$new(mod, x.interest = x.interest, k = 2)
+lemon
+
+# Look at the results in a table
+lemon$results
+# Or as a plot
+plot(lemon)
+
+# Reuse the object with a new instance to explain
+lemon$x.interest
+lemon$explain(X[2,])
+lemon$x.interest
+plot(lemon)
+  
+# LocalModel also works with multiclass classification
+rf = randomForest(Species ~ ., data= iris, ntree=50)
+X = iris[-which(names(iris) == 'Species')]
+predict.fun = function(object, newdata) predict(object, newdata, type = "prob")
+mod = Predictor$new(rf, data = X, predict.fun = predict.fun, class = "setosa")
+
+# Then we explain the first instance of the dataset with the LocalModel method:
+lemon = LocalModel$new(mod, x.interest = X[1,], k = 2)
+lemon$results
+plot(lemon) 
+}
+}
+\references{
+Ribeiro, M. T., Singh, S., & Guestrin, C. (2016). "Why Should I Trust You?": Explaining the Predictions of Any Classifier. Retrieved from http://arxiv.org/abs/1602.04938
+
+Gower, J. C. (1971), "A general coefficient of similarity and some of its properties". Biometrics, 27, 623--637.
+}
+\seealso{
+\code{\link{plot.LocalModel}} and \code{\link{predict.LocalModel}}
+
+\code{\link{Shapley}} can also be used to explain single predictions
+
+\code{\link[lime]{lime}}, the original implementation
+}
diff --git a/man/PartialDependence.Rd b/man/PartialDependence.Rd
new file mode 100644
index 000000000..014318311
--- /dev/null
+++ b/man/PartialDependence.Rd
@@ -0,0 +1,121 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/PartialDependence.R
+\name{PartialDependence}
+\alias{PartialDependence}
+\title{Partial Dependence Plot}
+\format{\code{\link{R6Class}} object.}
+\description{
+\code{PartialDependence} computes and plots partial dependence functions of prediction models.
+}
+\section{Usage}{
+
+\preformatted{
+pdp = PartialDependence$new(predictor, feature, grid.size = 20, run = TRUE)
+
+plot(pdp)
+pdp$results
+print(pdp)
+pdp$set.feature(2)
+}
+}
+
+\section{Arguments}{
+
+
+For PartialDependence$new():
+\describe{
+\item{predictor}{Object of type \code{Predictor}. See \link{Predictor}.}
+\item{feature}{The feature name or index for which to compute the partial dependencies. 
+Either a single number or vector of two numbers.}
+\item{grid.size}{The size of the grid for evaluating the predictions}
+\item{run}{logical. Should the Interpretation method be run?}
+}
+}
+
+\section{Details}{
+
+The partial dependence plot calculates and plots the dependence of f(X) on a single or two features.
+To learn more about partial dependence plot, read the Interpretable Machine Learning book: 
+https://christophm.github.io/interpretable-ml-book/pdp.html
+}
+
+\section{Fields}{
+
+\describe{
+\item{feature.index}{The index of the features for which the partial dependence was computed.}
+\item{feature.name}{The names of the features for which the partial dependence was computed.}
+\item{feature.type}{The detected types of the features, either "categorical" or "numerical".}
+\item{grid.size}{The size of the grid.}
+\item{n.features}{The number of features (either 1 or 2)}
+\item{predictor}{The prediction model that was analysed.}
+\item{results}{data.frame with the grid of feature of interest and the predicted \eqn{\hat{y}}. 
+Can be used for creating custom partial dependence plots.}
+}
+}
+
+\section{Methods}{
+
+\describe{
+\item{set.feature}{method to get/set feature(s) (by index) fpr  which to compute pdp. See examples for usage.}
+\item{plot()}{method to plot the partial dependence function. See \link{plot.PartialDependence}}
+\item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
+\item{\code{clone()}}{[internal] method to clone the R6 object.}
+\item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+}
+}
+
+\examples{
+# We train a random forest on the Boston dataset:
+if (require("randomForest")) {
+data("Boston", package  = "MASS")
+rf = randomForest(medv ~ ., data = Boston, ntree = 50)
+mod = Predictor$new(rf, data = Boston)
+
+# Compute the partial dependence for the first feature
+pdp.obj = PartialDependence$new(mod, feature = "crim")
+
+# Plot the results directly
+plot(pdp.obj)
+
+# Since the result is a ggplot object, you can extend it: 
+if (require("ggplot2")) {
+ plot(pdp.obj) + theme_bw()
+}
+
+# If you want to do your own thing, just extract the data: 
+pdp.dat = pdp.obj$results
+head(pdp.dat)
+
+# You can reuse the pdp object for other features: 
+pdp.obj$set.feature("lstat")
+plot(pdp.obj)
+
+# Partial dependence plots support up to two features: 
+pdp.obj = PartialDependence$new(mod, feature = c("crim", "lstat"))  
+plot(pdp.obj)
+
+# Partial dependence plots also works with multiclass classification
+rf = randomForest(Species ~ ., data = iris, ntree=50)
+predict.fun = function(object, newdata) predict(object, newdata, type = "prob")
+mod = Predictor$new(rf, data = iris, predict.fun = predict.fun)
+
+# For some models we have to specify additional arguments for the predict function
+plot(PartialDependence$new(mod, feature = "Sepal.Length"))
+
+# Partial dependence plots support up to two features: 
+pdp.obj = PartialDependence$new(mod, feature = c("Sepal.Length", "Petal.Length"))
+pdp.obj$plot()   
+
+# For multiclass classification models, you can choose to only show one class:
+mod = Predictor$new(rf, data = iris, predict.fun = predict.fun, class = 1)
+plot(PartialDependence$new(mod, feature = "Sepal.Length"))
+}
+}
+\references{
+Friedman, J.H. 2001. "Greedy Function Approximation: A Gradient Boosting Machine." Annals of Statistics 29: 1189-1232.
+}
+\seealso{
+\link{plot.PartialDependence}
+
+\code{\link{Ice}} for individual conditional expectation plots.
+}
diff --git a/man/Predictor.Rd b/man/Predictor.Rd
new file mode 100644
index 000000000..ed54e7c78
--- /dev/null
+++ b/man/Predictor.Rd
@@ -0,0 +1,84 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Predictor.R
+\name{Predictor}
+\alias{Predictor}
+\title{Predictor object}
+\format{\code{\link{R6Class}} object.}
+\description{
+A \code{Predictor} object holds any machine learning model (mlr, caret, randomForest, ...) and the data to be used of analysing the model. 
+The interpretation methods in the iml package need the machine learning model to be wrapped in a \code{Predictor} object.
+}
+\section{Usage}{
+
+\preformatted{
+model = Predictor$new(model = NULL, data, y = NULL, class=NULL, 
+  predict.fun = function(object, newdata) predict(object, newdata))
+}
+}
+
+\section{Arguments}{
+
+\describe{
+\item{model}{The machine learning model. Recommended are models from mlr and caret.
+Other machine learning with a S3 predict functions work as well, but less robust (e.g. randomForest).}
+\item{data}{The data to be used for analysing the prediction model.}
+\item{y}{The target vector or (preferably) the name of the target column in the \code{data} argument.}
+\item{class}{The class column to be returned in case of multiclass output.}
+\item{predict.fun}{The function to predict newdata. Only needed if \code{model} is not a model from mlr or caret package.}
+}
+}
+
+\section{Details}{
+ 
+A Predictor object is a container for the prediction model and the data. 
+This ensures that the machine learning model can be analysed robustly. 
+
+Note: In case of classification, the model should return probabilities.
+}
+
+\section{Fields}{
+
+\describe{
+\item{class}{The class column to be returned.}
+\item{data}{data object with the data for the model interpretation.}
+\item{prediction.colnames}{The column names of the predictions.}
+\item{task}{The inferred prediction task: "classification" or "regression".}
+}
+}
+
+\section{Methods}{
+
+\describe{
+\item{predict(newdata)}{method to predict new data with the machine learning model.}
+\item{\code{clone()}}{[internal] method to clone the R6 object.}
+\item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+}
+}
+
+\examples{
+if (require("mlr")) {
+task = makeClassifTask(data = iris, target = "Species")
+learner = makeLearner("classif.rpart", minsplit = 7, predict.type = "prob")
+mod.mlr = train(learner, task)
+mod = Predictor$new(mod.mlr, data = iris)
+mod$predict(iris[1:5,])
+
+mod = Predictor$new(mod.mlr, data = iris, class = "setosa")
+mod$predict(iris[1:5,])
+}
+
+if (require("randomForest")) {
+rf = randomForest(Species ~ ., data = iris, ntree = 20)
+
+# We need this for the randomForest
+predict.fun = function(obj, newdata) { 
+  predict(obj, newdata = newdata, type = "prob")
+}
+
+mod = Predictor$new(rf, data = iris, predict.fun = predict.fun)
+mod$predict(iris[50:55,])
+
+# Feature importance needs the target vector, which needs to be supplied: 
+mod = Predictor$new(rf, data = iris, y = "Species", predict.fun = predict.fun)
+}
+}
diff --git a/man/Shapley.Rd b/man/Shapley.Rd
new file mode 100644
index 000000000..e45d6e5d2
--- /dev/null
+++ b/man/Shapley.Rd
@@ -0,0 +1,110 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Shapley.R
+\name{Shapley}
+\alias{Shapley}
+\title{Prediction explanations with game theory}
+\format{\code{\link{R6Class}} object.}
+\description{
+\code{Shapley} computes feature contributions for single predictions with the Shapley value, an approach from cooperative game theory. 
+The features values of an instance cooperate to achieve the prediction. 
+The Shapley value fairly distributes the difference of the instance's prediction and the datasets average prediction among the features.
+}
+\section{Usage}{
+
+\preformatted{
+shapley = Shapley$new(predictor, x.interest = NULL, sample.size = 100, run = TRUE)
+
+plot(shapley)
+shapley$results
+print(shapley)
+shapley$explain(x.interest)
+}
+}
+
+\section{Arguments}{
+ 
+For Shapley$new():
+\describe{
+\item{predictor}{object of type \code{Predictor}. See \link{Predictor}.}
+\item{x.interest}{data.frame with a single row for the instance to be explained.}
+\item{sample.size}{The number of  Monte Carlos samples for estimating the Shapley value.} 
+\item{run}{logical. Should the Interpretation method be run?}
+}
+}
+
+\section{Details}{
+
+For more details on the algorithm see https://christophm.github.io/interpretable-ml-book/shapley.html
+}
+
+\section{Fields}{
+
+\describe{
+\item{predictor}{The prediction model that was analysed.}
+\item{results}{data.frame with the Shapley values (phi) per feature.}
+\item{sample.size}{The number of times coalitions/marginals are sampled from data X. The higher the more accurate the explanations become.}
+\item{x.interest}{data.frame with a single row for the instance to be explained.}
+\item{y.hat.interest}{predicted value for instance of interest}
+\item{y.hat.average}{average predicted value for data \code{X}} 
+}
+}
+
+\section{Methods}{
+
+\describe{
+\item{explain(x.interest)}{method to set a new data point which to explain.}
+\item{plot()}{method to plot the Shapley value. See \link{plot.Shapley}}
+\item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
+\item{\code{clone()}}{[internal] method to clone the R6 object.}
+\item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+}
+}
+
+\examples{
+if (require("randomForest")) {
+# First we fit a machine learning model on the Boston housing data
+data("Boston", package  = "MASS")
+rf =  randomForest(medv ~ ., data = Boston, ntree = 50)
+X = Boston[-which(names(Boston) == "medv")]
+mod = Predictor$new(rf, data = X)
+
+# Then we explain the first instance of the dataset with the Shapley method:
+x.interest = X[1,]
+shapley = Shapley$new(mod, x.interest = x.interest)
+shapley
+
+# Look at the results in a table
+shapley$results
+# Or as a plot
+plot(shapley)
+
+# Explain another instance
+shapley$explain(X[2,])
+plot(shapley)
+
+# Shapley() also works with multiclass classification
+rf = randomForest(Species ~ ., data= iris, ntree=50)
+X = iris[-which(names(iris) == "Species")]
+predict.fun = function(object, newdata) predict(object, newdata, type = "prob")
+mod = Predictor$new(rf, data = X, predict.fun = predict.fun)
+
+# Then we explain the first instance of the dataset with the Shapley() method:
+shapley = Shapley$new(mod, x.interest = X[1,])
+shapley$results
+plot(shapley) 
+
+# You can also focus on one class
+mod = Predictor$new(rf, data = X, predict.fun = predict.fun, class = "setosa")
+shapley = Shapley$new(mod, x.interest = X[1,])
+shapley$results
+plot(shapley) 
+}
+}
+\references{
+Strumbelj, E., Kononenko, I. (2014). Explaining prediction models and individual predictions with feature contributions. Knowledge and Information Systems, 41(3), 647-665. https://doi.org/10.1007/s10115-013-0679-x
+}
+\seealso{
+\link{Shapley}
+
+A different way to explain predictions: \link{LocalModel}
+}
diff --git a/man/TreeSurrogate.Rd b/man/TreeSurrogate.Rd
new file mode 100644
index 000000000..066817b42
--- /dev/null
+++ b/man/TreeSurrogate.Rd
@@ -0,0 +1,117 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/TreeSurrogate.R
+\name{TreeSurrogate}
+\alias{TreeSurrogate}
+\title{Decision tree surrogate model}
+\format{\code{\link{R6Class}} object.}
+\description{
+\code{TreeSurrogate} fits a decision tree on the predictions of a prediction model.
+}
+\section{Usage}{
+
+\preformatted{
+tree = TreeSurrogate$new(predictor, maxdepth = 2, tree.args = NULL, run = TRUE)
+
+plot(tree)
+predict(tree, newdata)
+tree$results
+print(tree)
+}
+}
+
+\section{Arguments}{
+
+
+For TreeSurrogate$new():
+\describe{
+\item{predictor}{Object of type \code{Predictor}. See \link{Predictor}.}
+\item{maxdepth}{The maximum depth of the tree. Default is 2.}
+\item{run}{logical. Should the Interpretation method be run?}
+\item{tree.args}{A list with further arguments for \code{ctree}.}
+}
+}
+
+\section{Details}{
+  
+A conditional inference tree is fitted on the predicted \eqn{\hat{y}} from the machine learning model and the data.
+The \code{partykit} package and function are used to fit the tree. 
+By default a tree of maximum depth of 2 is fitted to improve interpretability.
+}
+
+\section{Fields}{
+
+\describe{
+\item{maxdepth}{the maximal tree depth.}
+\item{predictor}{The prediction model that was analysed.}
+\item{results}{data.frame with sampled feature X together with the leaf node information (columns ..node and ..path) 
+and the predicted \eqn{\hat{y}} for tree and machine learning model (columns starting with ..y.hat).}
+\item{tree}{the fitted tree of class \code{party}. See also \link[partykit]{ctree}.}
+}
+}
+
+\section{Methods}{
+
+\describe{
+\item{plot()}{method to plot the leaf nodes of the surrogate decision tree. See \link{plot.TreeSurrogate}.}
+\item{predict()}{method to predict new data with the tree. See also \link{predict.TreeSurrogate}}
+\item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
+\item{\code{clone()}}{[internal] method to clone the R6 object.}
+\item{\code{initialize()}}{[internal] method to initialize the R6 object.}
+}
+}
+
+\examples{
+if (require("randomForest")) {
+# Fit a Random Forest on the Boston housing data set
+data("Boston", package  = "MASS")
+rf = randomForest(medv ~ ., data = Boston, ntree = 50)
+# Create a model object
+mod = Predictor$new(rf, data = Boston[-which(names(Boston) == "medv")]) 
+
+# Fit a decision tree as a surrogate for the whole random forest
+dt = TreeSurrogate$new(mod)
+
+# Plot the resulting leaf nodes
+plot(dt) 
+
+# Use the tree to predict new data
+predict(dt, Boston[1:10,])
+
+# Extract the results
+dat = dt$results
+head(dat)
+
+
+# It also works for classification
+rf = randomForest(Species ~ ., data = iris, ntree = 50)
+X = iris[-which(names(iris) == "Species")]
+predict.fun = function(object, newdata) predict(object, newdata, type = "prob")
+mod = Predictor$new(rf, data = X, predict.fun = predict.fun)
+
+# Fit a decision tree as a surrogate for the whole random forest
+dt = TreeSurrogate$new(mod, maxdepth=2)
+
+# Plot the resulting leaf nodes
+plot(dt) 
+
+# If you want to visualise the tree directly:
+plot(dt$tree)
+
+# Use the tree to predict new data
+set.seed(42)
+iris.sample = X[sample(1:nrow(X), 10),]
+predict(dt, iris.sample)
+predict(dt, iris.sample, type = "class")
+
+# Extract the dataset
+dat = dt$results
+head(dat)
+}
+}
+\seealso{
+\link{predict.TreeSurrogate}
+\link{plot.TreeSurrogate}
+
+For the tree implementation
+\link[partykit]{ctree}
+}
diff --git a/man/extract.glmnet.effects.Rd b/man/extract.glmnet.effects.Rd
deleted file mode 100644
index b434f8e7d..000000000
--- a/man/extract.glmnet.effects.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/util.R
-\name{extract.glmnet.effects}
-\alias{extract.glmnet.effects}
-\title{Extract glmnet effects}
-\usage{
-extract.glmnet.effects(betas, best.index, x.scaled, x.original)
-}
-\arguments{
-\item{betas}{glmnet$beta}
-
-\item{best.index}{index k}
-
-\item{x.scaled}{the scaled version of x}
-
-\item{x.original}{the original x
-Assuming that the first row is the x.interest}
-}
-\description{
-Extract glmnet effects
-}
diff --git a/man/feature.imp.Rd b/man/feature.imp.Rd
deleted file mode 100644
index 87c8529e9..000000000
--- a/man/feature.imp.Rd
+++ /dev/null
@@ -1,108 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/feature.imp.R
-\name{feature.imp}
-\alias{feature.imp}
-\title{Feature importance}
-\usage{
-feature.imp(object, X, y, class = NULL, loss, method = "shuffle", ...)
-}
-\arguments{
-\item{object}{The machine learning model. Different types are allowed. 
-Recommended are mlr WrappedModel and caret train objects. The \code{object} can also be 
-a function that predicts the outcome given features or anything with an S3 predict function,
-like an object from class \code{lm}.}
-
-\item{X}{data.frame with the data for the prediction model}
-
-\item{y}{The vector or data.frame with the actual target values associated with X.}
-
-\item{class}{In case of classification, class specifies the class for which to predict the probability. 
-By default the multiclass classification is done.}
-
-\item{loss}{The loss function. A string (e.g. "ce" for classification or "mse") or a function. See Details.}
-
-\item{method}{Either 'shuffle' or 'cartesian'. See Details.}
-
-\item{...}{Further arguments  for the prediction method.}
-}
-\value{
-An Importance object (R6). Its methods and variables can be accessed with the \code{$}-operator:
-\item{error.original}{The loss of the model before perturbing features.}
-\item{loss}{The loss function. Can also be applied to data: \code{object$loss(actual, predicted)}}
-\item{data()}{method to extract the results of the feature importance computation.
-Returns a data.frame with importance and permutation error measurements per feature.}
-\item{plot()}{method to plot the feature importances. See \link{plot.Importance}}
-
-\item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
-General R6 methods
-\item{\code{clone()}}{[internal] method to clone the R6 object.}
-\item{\code{initialize()}}{[internal] method to initialize the R6 object.}
-}
-\description{
-feature.imp() computes feature importances for machine learning models. 
-The importance of a feature is the factor by which the model's prediction error increases when the feature is shuffled.
-}
-\details{
-Read the Interpretable Machine Learning book to learn more about feature importance: 
-\url{https://christophm.github.io/interpretable-ml-book/permutation-feature-importance.html}
-
-Two permutation schemes are implemented: 
-\itemize{
-\item shuffle: A simple shuffling of the feature values, yielding n perturbed instances per feature (faster)
-\item cartesian: Matching every instance with the feature value of all other instances, yielding n x (n-1) perturbed instances per feature (slow)
-}
-The loss function can be either specified via a string, or by handing a function to \code{feature.imp()}.
-Using the string is a shortcut to using loss functions from the \code{Metrics} package. 
-See \code{library(help = "Metrics")} to get a list of functions. 
-Only use functions that return a single performance value, not a vector. 
-You can also provide a function directly. It has to take the actual value as its first argument and the prediction as its second.
-}
-\examples{
-# We train a tree on the Boston dataset:
-if(require("rpart")){
-data("Boston", package  = "MASS")
-mod = rpart(medv ~ ., data = Boston)
-
-# Compute the individual conditional expectations for the first feature
-X = Boston[-which(names(Boston) == 'medv')]
-y = Boston$medv
-
-# Compute feature importances as the performance drop in mean absolute error
-imp = feature.imp(mod, X, y, loss = 'mae')
-
-# Plot the results directly
-plot(imp)
-
-
-# Since the result is a ggplot object, you can extend it: 
-library("ggplot2")
-plot(imp) + theme_bw()
-
-# If you want to do your own thing, just extract the data: 
-imp.dat = imp$data()
-head(imp.dat)
-ggplot(imp.dat, aes(x = ..feature, y = importance)) + geom_point() + 
-theme_bw()
-
-# feature.imp() also works with multiclass classification. 
-# In this case, the importance measurement regards all classes
-mod = rpart(Species ~ ., data= iris)
-X = iris[-which(names(iris) == 'Species')]
-y = iris$Species
-# For some models we have to specify additional arguments for the predict function
-imp = feature.imp(mod, X, y, loss = 'ce', predict.args = list(type = 'prob'))
-plot(imp)
-# Here we encounter the special case that the machine learning model perfectly predicts
-# The importance becomes infinite
-imp$data()
-
-# For multiclass classification models, you can choose to only compute performance for one class. 
-# Make sure to adapt y
-imp = feature.imp(mod, X, y == 'virginica', class = 3, loss = 'ce', 
-    predict.args = list(type = 'prob'))
-plot(imp)
-}
-}
-\references{
-Fisher, A., Rudin, C., and Dominici, F. (2018). Model Class Reliance: Variable Importance Measures for any Machine Learning Model Class, from the "Rashomon" Perspective. Retrieved from http://arxiv.org/abs/1801.01489
-}
diff --git a/man/ice.Rd b/man/ice.Rd
deleted file mode 100644
index ca3075844..000000000
--- a/man/ice.Rd
+++ /dev/null
@@ -1,128 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ice.r
-\name{ice}
-\alias{ice}
-\title{Individual conditional expectations (ICE)}
-\usage{
-ice(object, X, feature, grid.size = 10, center.at = NULL, class = NULL,
-  ...)
-}
-\arguments{
-\item{object}{The machine learning model. Different types are allowed. 
-Recommended are mlr WrappedModel and caret train objects. The \code{object} can also be 
-a function that predicts the outcome given features or anything with an S3 predict function,
-like an object from class \code{lm}.}
-
-\item{X}{data.frame with the data for the prediction model}
-
-\item{feature}{The index of the feature of interest.}
-
-\item{grid.size}{The size of the grid for evaluating the predictions}
-
-\item{center.at}{The value for the centering of the plot. Numeric for numeric features, and the level name for factors.}
-
-\item{class}{In case of classification, class specifies the class for which to predict the probability. 
-By default the multiclass classification is done.}
-
-\item{...}{Further arguments  for the prediction method.}
-}
-\value{
-An individual conditional expectation object
-
-An ICE object (R6). Its methods and variables can be accessed with the \code{$}-operator:
-\item{feature.name}{The feature name for which the partial dependence was computed.}
-\item{feature.type}{The detected type of the feature, either "categorical" or "numerical".}
-\item{feature.index}{The index of the feature for which the individual conditional expectations weree computed.}
-\item{center.at}{The features value(s) at which the ice computations are centered.}
-\item{grid.size}{The size of the grid.}
-\item{sample.size}{The number of instances sampled from data X.}
-\item{center}{method to get/set the feature value at which the ice computation should be centered. See examples for usage.}
-\item{feature}{method to get/set the feature (index) for which to compute ice. See examples for usage.}
-\item{data()}{method to extract the results of the partial dependence plot. 
-Returns a data.frame with the grid of feature of interest and the predicted \eqn{\hat{y}}. 
-Can be used for creating custom partial dependence plots.}
-\item{plot()}{method to plot the partial dependence function. See \link{plot.PDP}}
-}
-\description{
-Fits and plots individual conditional expectation function on an arbitrary machine learning model
-}
-\details{
-Machine learning model try to learn the relationship \eqn{y = f(X)}. We can't visualize 
-the learned \eqn{\hat{f}} directly for an individual, high-dimensional point \eqn{x_i}. 
-
-But we can take one of the input features of an observation and change its value. 
-We try out a grid of different values and observe the predicted outcome. 
-This gives us the predicted \eqn{\hat{y}} as a function of feature \eqn{X_j}, which we can plot as a line. 
-The \code{ice} method repeats this for all the observations in the dataset and plots all the lines in the same plot.
-
-Mathematically, we split up the learned function into its parts:
-\deqn{f(x_i) = f_1(x_{i,1}) + \ldots + f_p(x_{i,p}) + f_{1, 2}(x_{i,1}, x_{i,2}) + \ldots + f_{p-1, p}(x_{i,p-1}, x_{p}) + \ldots + f_{1\ldots p}(x_{i,1\ldots X_p})}, 
-
-And we can isolate the individual conditional expectation of \eqn{y} on a single \eqn{X_j}: \eqn{f_j(X_j)} and plot it. 
-
-Partial dependence plots (\link{pdp}) are the averaged lines of ice curves. 
- The returned object can be plotted is a \code{ggplot}
-object. This means it can be plotted directly or be extended using ggplots \code{+} operator.   
-To learn more about partial dependence plot, read the Interpretable Machine Learning book: https://christophm.github.io/interpretable-ml-book/ice.html
-}
-\examples{
-# We train a random forest on the Boston dataset:
-if(require("randomForest")){
-
-data("Boston", package  = "MASS")
-mod = randomForest(medv ~ ., data = Boston, ntree = 50)
-
-# Compute the individual conditional expectations for the first feature
-ice.obj = ice(mod, Boston, feature = 1)
-
-# Plot the results directly
-plot(ice.obj)
-
-# You can center the ICE plot
-ice.obj$center.at = 0
-plot(ice.obj)
-
-# ICE plots can be centered at initialization
-ice.obj = ice(mod, Boston, feature = 1, center=75)
-plot(ice.obj)
-
-# Centering can also be removed
-ice.obj$center.at = NULL
-plot(ice.obj)
-
-# Since the result is a ggplot object, you can extend it: 
-library("ggplot2")
-plot(ice.obj) + theme_bw()
-
-# If you want to do your own thing, just extract the data: 
-ice.dat = ice.obj$data()
-head(ice.dat)
-ggplot(ice.dat) + 
-geom_line(aes(x = crim, y = y.hat, group = ..individual, color = factor(..individual))) + 
-scale_color_discrete(guide = "none")
-
-# You can reuse the ice object for other features: 
-ice.obj$feature = 2
-plot(ice.obj)
-
-# ICE also works with multiclass classification
-library("randomForest")
-mod = randomForest(Species ~ ., data= iris, ntree=50)
-
-# For some models we have to specify additional arguments for the predict function
-plot(ice(mod, iris, feature = 1, predict.args = list(type = 'prob')))
-
-# For multiclass classification models, you can choose to only show one class:
-plot(ice(mod, iris, feature = 1, class = 1, predict.args = list(type = 'prob')))
-
-# ICE plots can be centered: 
-plot(ice(mod, iris, feature = 1, center = 1, predict.args = list(type = 'prob')))
-}
-}
-\references{
-Goldstein, A., Kapelner, A., Bleich, J., and Pitkin, E. (2013). Peeking Inside the Black Box: 
-Visualizing Statistical Learning with Plots of Individual Conditional Expectation, 1-22. https://doi.org/10.1080/10618600.2014.907095
-}
-\seealso{
-\link{pdp} for partial dependence plots (aggregated ice plots)
-}
diff --git a/man/iml-package.Rd b/man/iml-package.Rd
index 8ad89dfd5..b5002a348 100644
--- a/man/iml-package.Rd
+++ b/man/iml-package.Rd
@@ -8,9 +8,6 @@
 \description{
 The iml package provides tools to analyse machine learning models and predictions.
 }
-\details{
-TODO
-}
 \seealso{
 \href{https://christophm.github.io/interpretable-ml-book/agnostic}{Book on Interpretable Machine Learning}
 }
diff --git a/man/lime.Rd b/man/lime.Rd
deleted file mode 100644
index e23f244be..000000000
--- a/man/lime.Rd
+++ /dev/null
@@ -1,112 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lime.r
-\name{lime}
-\alias{lime}
-\title{LIME}
-\usage{
-lime(object, X, sample.size = 100, k = 3, x.interest, class = NULL, ...)
-}
-\arguments{
-\item{object}{The machine learning model. Different types are allowed. 
-Recommended are mlr WrappedModel and caret train objects. The \code{object} can also be 
-a function that predicts the outcome given features or anything with an S3 predict function,
-like an object from class \code{lm}.}
-
-\item{X}{data.frame with the data for the prediction model}
-
-\item{sample.size}{The number of instances to be sampled from X.}
-
-\item{k}{the (maximum) number of features to be used for the surrogate model}
-
-\item{x.interest}{data.frame with a single row for the instance to be explained.}
-
-\item{class}{In case of classification, class specifies the class for which to predict the probability. 
-By default the multiclass classification is done.}
-
-\item{...}{Further arguments  for the prediction method.}
-}
-\value{
-A LIME object (R6). Its methods and variables can be accessed with the \code{$}-operator:
-\item{sample.size}{The number of samples from data X. The higher the more accurate the explanations become.}
-\item{model}{the glmnet object.}
-\item{best.fit.index}{the index of the best glmnet fit}
-\item{k}{The number of features as set by the user.}
-\item{x.interest}{method to get/set the instance. See examples for usage.}
-\item{data()}{method to extract the results of the local feature effects 
-Returns a data.frame with the feature names (\code{feature}) and contributions to the prediction}
-\item{plot()}{method to plot the LIME feature effects. See \link{plot.LIME}}
-\item{predict()}{method to predict new data with the local model See also \link{predict.LIME}}
-
-\item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
-General R6 methods
-\item{\code{clone()}}{[internal] method to clone the R6 object.}
-\item{\code{initialize()}}{[internal] method to initialize the R6 object.}
-}
-\description{
-\code{lime()} fits a locally weighted linear regression model (logistic for classification) to explain a single machine learning prediction.
-}
-\details{
-Data points are sampled and weighted by their proximity to the instance to be explained. 
-A weighted glm is fitted with the machine learning model prediction as target. 
-L1-regularisation is used to make the results sparse. 
-The resulting model can be seen as a surrogate for the machine learning model, which is only valid for that one point.
-Categorical features are binarized, depending on the category of the instance to be explained: 1 if the category is the same, 0 otherwise.
-
-Differences to the original LIME implementation: 
-\itemize{
-\item Distance measure: Uses gower proximity (= 1 - gower distance) instead of a kernel based on the Euclidean distance. Has the advantage to have a meaningful neighbourhood and no kernel width to tune.
-\item Sampling: Sample from X instead of from normal distributions. 
-Has the advantage to follow the original data distribution. 
-\item Visualisation: Plots effects instead of betas. Is the same for binary features, but makes a difference for numerical features. 
-For numerical features, plotting the betas makes no sense, 
-because a negative beta might still increase the prediction when the feature value is also negative.
-}
-To learn more about local models, read the Interpretable Machine Learning book: https://christophm.github.io/interpretable-ml-book/lime.html
-}
-\examples{
-# First we fit a machine learning model on the Boston housing data
-library("randomForest")
-data("Boston", package  = "MASS")
-mod = randomForest(medv ~ ., data = Boston, ntree = 50)
-X = Boston[-which(names(Boston) == "medv")]
-
-# Then we explain the first instance of the dataset with the lime() method:
-x.interest = X[1,]
-lemon = lime(mod, X, x.interest = x.interest, k = 2)
-lemon
-
-# Look at the results in a table
-lemon$data()
-# Or as a plot
-plot(lemon)
-
-# Reuse the object with a new instance to explain
-lemon$x.interest = X[2,]
-plot(lemon)
-  
-# lime() also works with multiclass classification
-library("randomForest")
-mod = randomForest(Species ~ ., data= iris, ntree=50)
-X = iris[-which(names(iris) == 'Species')]
-
-# Then we explain the first instance of the dataset with the lime() method:
-lemon = lime(mod, X, x.interest = X[1,], predict.args = list(type='prob'), k = 3)
-lemon$data()
-plot(lemon) 
-
-# You can also focus on one class
-lemon = lime(mod, X, x.interest = X[1,], class = 2, predict.args = list(type='prob'), k = 2)
-lemon$data()
-plot(lemon) 
-
-}
-\references{
-Ribeiro, M. T., Singh, S., & Guestrin, C. (2016). "Why Should I Trust You?": Explaining the Predictions of Any Classifier. Retrieved from http://arxiv.org/abs/1602.04938
-}
-\seealso{
-\code{\link{plot.LIME}} and \code{\link{predict.LIME}}
-
-\code{\link{shapley}} can also be used to explain single predictions
-
-\code{\link[lime]{lime}}, the original implementation
-}
diff --git a/man/pdp.Rd b/man/pdp.Rd
deleted file mode 100644
index 263935a3d..000000000
--- a/man/pdp.Rd
+++ /dev/null
@@ -1,116 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pdp.r
-\name{pdp}
-\alias{pdp}
-\title{Partial Dependence}
-\usage{
-pdp(object, X, feature, grid.size = 10, class = NULL, ...)
-}
-\arguments{
-\item{object}{The machine learning model. Different types are allowed. 
-Recommended are mlr WrappedModel and caret train objects. The \code{object} can also be 
-a function that predicts the outcome given features or anything with an S3 predict function,
-like an object from class \code{lm}.}
-
-\item{X}{data.frame with the data for the prediction model}
-
-\item{feature}{The feature index for which to compute the partial dependencies. 
-Either a single number or vector of two numbers}
-
-\item{grid.size}{The size of the grid for evaluating the predictions}
-
-\item{class}{In case of classification, class specifies the class for which to predict the probability. 
-By default the multiclass classification is done.}
-
-\item{...}{Further arguments  for the prediction method.}
-}
-\value{
-A PDP object (R6). Its methods and variables can be accessed with the \code{$}-operator:
-\item{feature}{The feature names for which the partial dependence was computed.}
-\item{feature.type}{The detected types of the features, either "categorical" or "numerical".}
-\item{feature.index}{The index of the features for which the partial dependence was computed.}
-\item{grid.size}{The size of the grid.}
-\item{sample.size}{The number of instances sampled from data X.}
-\item{feature(index)}{method to get/set feature(s) (by index) fpr  which to compute pdp. See examples for usage.}
-\item{data()}{method to extract the results of the partial dependence plot. 
-Returns a data.frame with the grid of feature of interest and the predicted \eqn{\hat{y}}. 
-Can be used for creating custom partial dependence plots.}
-\item{plot()}{method to plot the partial dependence function. See \link{plot.PDP}}
-
-\item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
-General R6 methods
-\item{\code{clone()}}{[internal] method to clone the R6 object.}
-\item{\code{initialize()}}{[internal] method to initialize the R6 object.}
-}
-\description{
-\code{pdp()} computes partial dependence functions of prediction models.
-}
-\details{
-Machine learning model try to learn the relationship \eqn{y = f(X)}. We can't visualize 
-the learned \eqn{\hat{f}} directly for high-dimensional X. 
-But we can split it into parts:
-\deqn{f(X) = f_1(X_1) + \ldots + f_p(X_p) + f_{1, 2}(X_1, X_2) + \ldots + f_{p-1, p}(X_{p-1}, X_p) + \ldots + f_{1\ldots p}(X_1\ldots X_p)}, 
-
-And we can isolate the partial dependence of \eqn{y} on a single \eqn{X_j}: \eqn{f_j(X_j)} and plot it. 
-We can even do this for higher dimensions, but a maximum of 2 features makes sense: \eqn{f_j(X_j) + f_k(X_k) + f_{jk}(X_{jk})}
-
-The partial dependence for a feature \eqn{X_j} is estimated by spanning a grid over the feature space. 
-For each value of the grid, we replace in the whole dataset the \eqn{X_j}-value with the grid value, 
-predict the outcomes \eqn{\hat{y}} with the machine learning models and average the predictions. 
-This generate one point of the partial dependence curve. After doing this for the whole grid, 
-the outcome is a curve (or 2D plane), that then can be plotted. 
-
-To learn more about partial dependence plot, read the Interpretable Machine Learning book: https://christophm.github.io/interpretable-ml-book/pdp.html
-}
-\examples{
-
-# We train a random forest on the Boston dataset:
-library("randomForest")
-data("Boston", package  = "MASS")
-mod = randomForest(medv ~ ., data = Boston, ntree = 50)
-
-# Compute the partial dependence for the first feature
-pdp.obj = pdp(mod, Boston, feature = 1)
-
-# Plot the results directly
-plot(pdp.obj)
-
-
-# Since the result is a ggplot object, you can extend it: 
-library("ggplot2")
-plot(pdp.obj) + theme_bw()
-
-# If you want to do your own thing, just extract the data: 
-pdp.dat = pdp.obj$data()
-head(pdp.dat)
-
-# You can reuse the pdp object for other features: 
-pdp.obj$feature = 2
-plot(pdp.obj)
-
-# Partial dependence plots support up to two features: 
-pdp.obj = pdp(mod, Boston, feature = c(1,2))  
-
-# Partial dependence plots also works with multiclass classification
-library("randomForest")
-mod = randomForest(Species ~ ., data= iris, ntree=50)
-
-# For some models we have to specify additional arguments for the predict function
-plot(pdp(mod, iris, feature = 1, predict.args = list(type = 'prob')))
-
-# For multiclass classification models, you can choose to only show one class:
-plot(pdp(mod, iris, feature = 1, class = 1, predict.args = list(type = 'prob')))
-
-# Partial dependence plots support up to two features: 
-pdp.obj = pdp(mod, iris, feature = c(1,3), predict.args = list(type = 'prob'))
-pdp.obj$plot()  
-
-}
-\references{
-Friedman, J.H. 2001. "Greedy Function Approximation: A Gradient Boosting Machine." Annals of Statistics 29: 1189-1232.
-}
-\seealso{
-\link{plot.PDP}
-
-\code{\link{ice}} for individual conditional expectation plots.
-}
diff --git a/man/plot.FeatureImp.Rd b/man/plot.FeatureImp.Rd
new file mode 100644
index 000000000..66a67dfe7
--- /dev/null
+++ b/man/plot.FeatureImp.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/FeatureImp.R
+\name{plot.FeatureImp}
+\alias{plot.FeatureImp}
+\title{Plot Feature Importance}
+\usage{
+\method{plot}{FeatureImp}(x, sort = TRUE, ...)
+}
+\arguments{
+\item{x}{A FeatureImp R6 object}
+
+\item{sort}{logical. Should the features be sorted in descending order? Defaults to TRUE.}
+
+\item{...}{Further arguments for the objects plot function}
+}
+\value{
+ggplot2 plot object
+}
+\description{
+plot.FeatureImp() plots the feature importance results of a FeatureImp object.
+}
+\details{
+For examples see \link{FeatureImp}
+}
+\seealso{
+\link{FeatureImp}
+}
diff --git a/man/plot.ICE.Rd b/man/plot.ICE.Rd
deleted file mode 100644
index 2564fead7..000000000
--- a/man/plot.ICE.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ice.r
-\name{plot.ICE}
-\alias{plot.ICE}
-\title{Individual conditional expectation plots}
-\usage{
-\method{plot}{ICE}(x, ...)
-}
-\arguments{
-\item{x}{The individual conditional expectation curves. An ICE R6 object}
-
-\item{...}{Further arguments for the objects plot function}
-}
-\value{
-ggplot2 plot object
-}
-\description{
-plot.ICE() plots individiual expectation curves for each observation for one feature.
-}
-\details{
-For examples see \link{ice}
-}
-\seealso{
-\link{ice}
-}
diff --git a/man/plot.Ice.Rd b/man/plot.Ice.Rd
new file mode 100644
index 000000000..2cdf07aa8
--- /dev/null
+++ b/man/plot.Ice.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/Ice.R
+\name{plot.Ice}
+\alias{plot.Ice}
+\title{Plot ICE (Individual Conditional Expectation)}
+\usage{
+\method{plot}{Ice}(x)
+}
+\arguments{
+\item{x}{An Ice R6 object}
+}
+\value{
+ggplot2 plot object
+}
+\description{
+plot.Ice() plots the individiual expectation results from an Ice object.
+}
+\details{
+For examples see \link{Ice}
+}
+\seealso{
+\link{Ice}
+}
diff --git a/man/plot.Importance.Rd b/man/plot.Importance.Rd
deleted file mode 100644
index c0be69906..000000000
--- a/man/plot.Importance.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/feature.imp.R
-\name{plot.Importance}
-\alias{plot.Importance}
-\title{Feature importance plot}
-\usage{
-\method{plot}{Importance}(x, sort = TRUE, ...)
-}
-\arguments{
-\item{x}{The feature importance. An Importance R6 object}
-
-\item{sort}{logical. Should the features be sorted in descending order? Defaults to TRUE.}
-
-\item{...}{Further arguments for the objects plot function}
-}
-\value{
-ggplot2 plot object
-}
-\description{
-plot.Importance() plots the feature importance results of an Importance object.
-}
-\details{
-For examples see \link{feature.imp}
-}
-\seealso{
-\link{feature.imp}
-}
diff --git a/man/plot.LIME.Rd b/man/plot.LIME.Rd
deleted file mode 100644
index 41b5cd9ee..000000000
--- a/man/plot.LIME.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lime.r
-\name{plot.LIME}
-\alias{plot.LIME}
-\title{LIME plot}
-\usage{
-\method{plot}{LIME}(object)
-}
-\arguments{
-\item{object}{A LIME R6 object}
-}
-\value{
-ggplot2 plot object
-}
-\description{
-plot.LIME() plots the feature effects of the LIME model.
-}
-\details{
-For examples see \link{lime}
-}
-\seealso{
-\link{lime}
-}
diff --git a/man/plot.LocalModel.Rd b/man/plot.LocalModel.Rd
new file mode 100644
index 000000000..4e96c97c4
--- /dev/null
+++ b/man/plot.LocalModel.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/LocalModel.R
+\name{plot.LocalModel}
+\alias{plot.LocalModel}
+\title{Plot Local Model}
+\usage{
+\method{plot}{LocalModel}(object)
+}
+\arguments{
+\item{object}{A LocalModel R6 object}
+}
+\value{
+ggplot2 plot object
+}
+\description{
+plot.LocalModel() plots the feature effects of a LocalModel object.
+}
+\details{
+For examples see \link{LocalModel}
+}
+\seealso{
+\link{LocalModel}
+}
diff --git a/man/plot.PDP.Rd b/man/plot.PDP.Rd
deleted file mode 100644
index 3311f2745..000000000
--- a/man/plot.PDP.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/pdp.r
-\name{plot.PDP}
-\alias{plot.PDP}
-\title{Partial dependence plot}
-\usage{
-\method{plot}{PDP}(x, ...)
-}
-\arguments{
-\item{x}{The partial dependence. A PDP R6 object}
-
-\item{...}{Further arguments for the objects plot function}
-}
-\value{
-ggplot2 plot object
-}
-\description{
-plot.PDP() plots a line for a single feature and a tile plot for 2 features.
-}
-\details{
-For examples see \link{pdp}
-}
-\seealso{
-\link{pdp}
-}
diff --git a/man/plot.PartialDependence.Rd b/man/plot.PartialDependence.Rd
new file mode 100644
index 000000000..086e8f875
--- /dev/null
+++ b/man/plot.PartialDependence.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/PartialDependence.R
+\name{plot.PartialDependence}
+\alias{plot.PartialDependence}
+\title{Plot Partial Dependence}
+\usage{
+\method{plot}{PartialDependence}(x)
+}
+\arguments{
+\item{x}{A PartialDependence R6 object}
+}
+\value{
+ggplot2 plot object
+}
+\description{
+plot.PartialDependence() plots the results of a PartialDependence object.
+}
+\details{
+For examples see \link{PartialDependence}
+}
+\seealso{
+\link{PartialDependence}
+}
diff --git a/man/plot.Shapley.Rd b/man/plot.Shapley.Rd
index e5306c89d..e92d1fb02 100644
--- a/man/plot.Shapley.Rd
+++ b/man/plot.Shapley.Rd
@@ -1,13 +1,15 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/shapley.r
+% Please edit documentation in R/Shapley.R
 \name{plot.Shapley}
 \alias{plot.Shapley}
-\title{Shapley plot}
+\title{Plot Shapley}
 \usage{
-\method{plot}{Shapley}(object)
+\method{plot}{Shapley}(object, sort = TRUE)
 }
 \arguments{
 \item{object}{A Shapley R6 object}
+
+\item{sort}{logical. Should the feature values be sorted by Shapley value? Ignored for multi.class output.}
 }
 \value{
 ggplot2 plot object
@@ -16,8 +18,8 @@ ggplot2 plot object
 plot.Shapley() plots the Shapley values - the contributions of feature values to the prediction.
 }
 \details{
-For examples see \link{shapley}
+For examples see \link{Shapley}
 }
 \seealso{
-\link{shapley}
+\link{Shapley}
 }
diff --git a/man/plot.TreeSurrogate.Rd b/man/plot.TreeSurrogate.Rd
index 6256af10b..0b030a795 100644
--- a/man/plot.TreeSurrogate.Rd
+++ b/man/plot.TreeSurrogate.Rd
@@ -1,27 +1,25 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/tree-surrogate.r
+% Please edit documentation in R/TreeSurrogate.R
 \name{plot.TreeSurrogate}
 \alias{plot.TreeSurrogate}
-\title{Surrogate tree visualisation}
+\title{Plot Tree Surrogate}
 \usage{
 \method{plot}{TreeSurrogate}(object)
 }
 \arguments{
-\item{object}{The surrogate tree. A TreeSurrogate R6 object}
+\item{object}{A TreeSurrogate R6 object}
 }
 \value{
 ggplot2 plot object
 }
 \description{
-Predict the response for newdata with the surrogate tree
+Plot the response for newdata of a TreeSurrogate object.
 Each plot facet is one leaf node and visualises the distribution of the \eqn{\hat{y}}
-from the machine learning model. 
-This makes the TreeSurrogate object call 
-its iternal object$plot() method.
+from the machine learning model.
 }
 \details{
-For examples see \link{tree.surrogate}
+For examples see \link{TreeSurrogate}
 }
 \seealso{
-\link{tree.surrogate}
+\link{TreeSurrogate}
 }
diff --git a/man/predict.LIME.Rd b/man/predict.LIME.Rd
deleted file mode 100644
index fbaf9e632..000000000
--- a/man/predict.LIME.Rd
+++ /dev/null
@@ -1,29 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lime.r
-\name{predict.LIME}
-\alias{predict.LIME}
-\title{LIME prediction}
-\usage{
-\method{predict}{LIME}(object, newdata = NULL, ...)
-}
-\arguments{
-\item{object}{A LIME R6 object}
-
-\item{newdata}{A data.frame for which to predict}
-
-\item{...}{Further arguments for the objects predict function}
-}
-\value{
-A data.frame with the predicted outcome.
-}
-\description{
-Predict the response for newdata with the LIME model.
-}
-\details{
-This function makes the LIME object call 
-its iternal object$predict() method. 
-For examples see \link{lime}
-}
-\seealso{
-\link{lime}
-}
diff --git a/man/predict.LocalModel.Rd b/man/predict.LocalModel.Rd
new file mode 100644
index 000000000..8ae2432ff
--- /dev/null
+++ b/man/predict.LocalModel.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/LocalModel.R
+\name{predict.LocalModel}
+\alias{predict.LocalModel}
+\title{Predict LocalModel}
+\usage{
+\method{predict}{LocalModel}(object, newdata = NULL, ...)
+}
+\arguments{
+\item{object}{A LocalModel R6 object}
+
+\item{newdata}{A data.frame for which to predict}
+
+\item{...}{Further arguments for the objects predict function}
+}
+\value{
+A data.frame with the predicted outcome.
+}
+\description{
+Predict the response for newdata with the LocalModel model.
+}
+\details{
+For examples see \link{LocalModel}
+}
+\seealso{
+\link{LocalModel}
+}
diff --git a/man/predict.TreeSurrogate.Rd b/man/predict.TreeSurrogate.Rd
index f2b6d9280..18977a087 100644
--- a/man/predict.TreeSurrogate.Rd
+++ b/man/predict.TreeSurrogate.Rd
@@ -1,8 +1,8 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/tree-surrogate.r
+% Please edit documentation in R/TreeSurrogate.R
 \name{predict.TreeSurrogate}
 \alias{predict.TreeSurrogate}
-\title{Surrogate tree prediction}
+\title{Predict Tree Surrogate}
 \usage{
 \method{predict}{TreeSurrogate}(object, newdata, type = "prob", ...)
 }
@@ -18,16 +18,16 @@
 \value{
 A data.frame with the predicted outcome. 
 In case of regression it is the predicted \eqn{\hat{y}}. 
-In case of classification it is either the class probabilities *(for type "prob") or the class label (type "class")
+In case of classification it is either the class probabilities (for type "prob") or the class label (type "class")
 }
 \description{
-Predict the response for newdata with the surrogate tree
+Predict the response for newdata of a TreeSurrogate object.
 }
 \details{
 This function makes the TreeSurrogate object call 
 its iternal object$predict() method. 
-For examples see \link{tree.surrogate}
+For examples see \link{TreeSurrogate}
 }
 \seealso{
-\link{tree.surrogate}
+\link{TreeSurrogate}
 }
diff --git a/man/prediction.model.Rd b/man/prediction.model.Rd
deleted file mode 100644
index 97fd9dd50..000000000
--- a/man/prediction.model.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/Prediction.R
-\name{prediction.model}
-\alias{prediction.model}
-\title{Get a prediction object}
-\usage{
-prediction.model(object, class = NULL, predict.args = NULL)
-}
-\arguments{
-\item{object}{function, mlr WrappedObject, S3 class with predict function, or caret train object}
-
-\item{class}{In case of classification, class specifies the class for which to predict the probability. 
-By default the first class in the prediction (first column) is chosen.}
-
-\item{predict.args}{named list with arguments passed down to the prediction model}
-}
-\value{
-object of type Prediction
-}
-\description{
-Get a prediction object
-}
diff --git a/man/shapley.Rd b/man/shapley.Rd
deleted file mode 100644
index 2b686da3c..000000000
--- a/man/shapley.Rd
+++ /dev/null
@@ -1,91 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/shapley.r
-\name{shapley}
-\alias{shapley}
-\title{Explain predictions}
-\usage{
-shapley(object, X, x.interest, sample.size = 100, class = NULL, ...)
-}
-\arguments{
-\item{object}{The machine learning model. Different types are allowed. 
-Recommended are mlr WrappedModel and caret train objects. The \code{object} can also be 
-a function that predicts the outcome given features or anything with an S3 predict function,
-like an object from class \code{lm}.}
-
-\item{X}{data.frame with the data for the prediction model}
-
-\item{x.interest}{data.frame with a single row for the instance to be explained.}
-
-\item{sample.size}{Number of samples to be drawn to estimate the Shapley value. The higher the more accurate the estimations.}
-
-\item{class}{In case of classification, class specifies the class for which to predict the probability. 
-By default the multiclass classification is done.}
-
-\item{...}{Further arguments  for the prediction method.}
-}
-\value{
-A Shapley object (R6). Its methods and variables can be accessed with the \code{$}-operator:
-\item{sample.size}{The number of times coalitions/marginals are sampled from data X. The higher the more accurate the explanations become.}
-\item{x.interest}{data.frame with the instance of interest}
-\item{y.hat.interest}{predicted value for instance of interest}
-\item{y.hat.averate}{average predicted value for data \code{X}} 
-\item{x}{method to get/set the instance. See examples for usage.}
-\item{data()}{method to extract the results of the shapley estimations. 
-Returns a data.frame with the feature names (\code{feature}) and contributions to the prediction (\code{phi})}
-\item{plot()}{method to plot the Shapley value. See \link{plot.Shapley}}
-
-\item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
-General R6 methods
-\item{\code{clone()}}{[internal] method to clone the R6 object.}
-\item{\code{initialize()}}{[internal] method to initialize the R6 object.}
-}
-\description{
-shapley() computes feature contributions for single predictions with the Shapley value, an approach from cooperative game theory approach. 
-The features values of an instance cooperate to achieve the prediction. 
-shapley() fairly distributes the difference of the instance's prediction and the datasets average prediction among the features. 
-A features contribution can be negative.
-}
-\details{
-See TODO: BOOK REFERENCE
-}
-\examples{
-# First we fit a machine learning model on the Boston housing data
-library("randomForest")
-data("Boston", package  = "MASS")
-mod = randomForest(medv ~ ., data = Boston, ntree = 50)
-X = Boston[-which(names(Boston) == "medv")]
-
-# Then we explain the first instance of the dataset with the shapley() method:
-x.interest = X[1,]
-shap = shapley(mod, X, x.interest = x.interest)
-shap
-
-# Look at the results in a table
-shap$data()
-# Or as a plot
-plot(shap)
-
-# shapley() also works with multiclass classification
-library("randomForest")
-mod = randomForest(Species ~ ., data= iris, ntree=50)
-X = iris[-which(names(iris) == 'Species')]
-
-# Then we explain the first instance of the dataset with the shapley() method:
-shap = shapley(mod, X, x.interest = X[1,], predict.args = list(type='prob'))
-shap$data()
-plot(shap) 
-
-# You can also focus on one class
-shap = shapley(mod, X, x.interest = X[1,], class = 2, predict.args = list(type='prob'))
-shap$data()
-plot(shap) 
-
-}
-\references{
-Strumbelj, E., Kononenko, I. (2014). Explaining prediction models and individual predictions with feature contributions. Knowledge and Information Systems, 41(3), 647-665. https://doi.org/10.1007/s10115-013-0679-x
-}
-\seealso{
-A different way to explain predictions: \link{lime}
-
-\link{lime}
-}
diff --git a/man/tree.surrogate.Rd b/man/tree.surrogate.Rd
deleted file mode 100644
index 32807d793..000000000
--- a/man/tree.surrogate.Rd
+++ /dev/null
@@ -1,101 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/tree-surrogate.r
-\name{tree.surrogate}
-\alias{tree.surrogate}
-\title{Decision tree surrogate model}
-\usage{
-tree.surrogate(object, X, sample.size = 100, class = NULL, maxdepth = 2,
-  tree.args = NULL, ...)
-}
-\arguments{
-\item{object}{The machine learning model. Different types are allowed. 
-Recommended are mlr WrappedModel and caret train objects. The \code{object} can also be 
-a function that predicts the outcome given features or anything with an S3 predict function,
-like an object from class \code{lm}.}
-
-\item{X}{data.frame with the data for the prediction model}
-
-\item{sample.size}{The number of instances to be sampled from X.}
-
-\item{class}{In case of classification, class specifies the class for which to predict the probability. 
-By default the multiclass classification is done.}
-
-\item{maxdepth}{The maximum depth of the tree. Default is 2.}
-
-\item{tree.args}{A list with further arguments for \code{ctree}}
-
-\item{...}{Further arguments  for the prediction method.}
-}
-\value{
-A TreeSurrogate object (R6). Its methods and variables can be accessed with the \code{$}-operator:
-\item{tree}{the fitted tree of class \code{party}. See also \link[partykit]{ctree}.}
-\item{maxdepth}{the maximal tree depth set by the user.}
-\item{data()}{method to extract the results of the tree. 
-Returns the sampled feature X together with the leaf node information (columns ..node and ..path) 
-and the predicted \eqn{\hat{y}} for tree and machine learning model (columns starting with ..y.hat).}
-\item{plot()}{method to plot the leaf nodes of the surrogate decision tree. See \link{plot.TreeSurrogate}}
-\item{predict()}{method to predict new data with the tree. See also \link{predict.TreeSurrogate}}
-
-\item{\code{run()}}{[internal] method to run the interpretability method. Use \code{obj$run(force = TRUE)} to force a rerun.}
-General R6 methods
-\item{\code{clone()}}{[internal] method to clone the R6 object.}
-\item{\code{initialize()}}{[internal] method to initialize the R6 object.}
-}
-\description{
-tree.surrogate() fits a decision tree on the predictions of a machine learning model to make it interpretable.
-}
-\details{
-A conditional inference tree is fitted on the predicted \eqn{\hat{y}} from the machine learning model and the data \eqn{X}.
-The \code{partykit} package and function are used to fit the tree. 
-By default a tree of maximum depth of 2 is fitted to improve interpretability.
-}
-\examples{
-# Fit a Random Forest on the Boston housing data set
-library("randomForest")
-data("Boston", package  = "MASS")
-mod = randomForest(medv ~ ., data = Boston, ntree = 50)
-
-# Fit a decision tree as a surrogate for the whole random forest
-dt = tree.surrogate(mod, Boston[-which(names(Boston) == 'medv')], 200)
-
-# Plot the resulting leaf nodes
-plot(dt) 
-
-# Use the tree to predict new data
-predict(dt, Boston[1:10,])
-
-# Extract the results
-dat = dt$data()
-head(dat)
-
-
-# It also works for classification
-mod = randomForest(Species ~ ., data = iris, ntree = 50)
-
-# Fit a decision tree as a surrogate for the whole random forest
-X = iris[-which(names(iris) == 'Species')]
-dt = tree.surrogate(mod, X, 200, predict.args = list(type = 'prob'), maxdepth=2, class=3)
-
-# Plot the resulting leaf nodes
-plot(dt) 
-
-# If you want to visualise the tree directly:
-plot(dt$tree)
-
-# Use the tree to predict new data
-set.seed(42)
-iris.sample = X[sample(1:nrow(X), 10),]
-predict(dt, iris.sample)
-predict(dt, iris.sample, type = 'class')
-
-# Extract the dataset
-dat = dt$data()
-head(dat)
-}
-\seealso{
-\link{predict.TreeSurrogate}
-\link{plot.TreeSurrogate}
-
-For the tree implementation
-\link[partykit]{ctree}
-}
diff --git a/notes.md b/notes.md
deleted file mode 100644
index d9195702f..000000000
--- a/notes.md
+++ /dev/null
@@ -1,192 +0,0 @@
----
-title: "Interpretability Framework for Machine Learning models"
-output: html_document
----
-
-
-# Content of framework
-Modular approach of creating model-agnostic interpretations of machine learning models, like feature importances, feature effects and surrogate models. 
-The framework covers permutation feature importance, partial dependence plots, LIME, etc. 
-The framework works for any method that analysis input / output pairs of a function $\hat{f}(X)$
-
-Goal: R package and later JSS paper
-
-![](framework.jpg)
-
-# Possible paper titles:
-Machine learning interpretability as an intervention framework. 
-An intervention-based framework for model-agnostic interpretability methods. 
-
-
-# Core class: "Experiment"
-Claim: Any model-agnostic measure can be computed using an "Experiment" framework. 
-An experiment is initialised by the machine learning model $\hat{f}$ and data $X$.
-Following steps are executed: 
-
-1. DESIGN: Create an experimental design 
-    1. SAMPLE data based on $X$. 
-    1. Do an INTERVENTION on the data. ($X \rightarrow X^*$)
-    1. Choose QUANTITY of interest Q (often $\hat{y}$ itself)
-1. EXECUTE: Predict $\hat{y}^*$ from $X^*$ with $\hat{f}$ and compute $Q$. ($X^* \rightarrow \hat{y}^*$  and $\hat{y}^* \rightarrow Q$)
-1. ANALYSE: Aggregate the quantity of interest Q. ($X^*, Q \rightarrow \text{summary}$)
-1. PRESENT: Visualise or print analysis. 
-
-Optional in ANALYSE step: Map features to interpretable features.
-
-
-## Examples
-
-### Partial dependence plots (for one feature)
-1. DESIGN
-    1. sample from $X$
-    1. intervention: replicate $X$ $length(grid)$ times,replace $x_j$ with value from grid in each replica
-    1. Q is simply $\hat{y}$
-1. EXECUTE: Predict
-1. ANALYSE: Average Q by unique $x_j$ value
-1. PRESENT: Plot curve
-
-
-### LIME
-1. DESIGN
-    1. sample $X^*$ from $X$ (in the weird mean, std. way)
-    1. no intervention
-    1. Q is simply $\hat{y}$
-1. EXECUTE: Predict
-1. ANALYSE: Estimated weighted Lasso on $\hat{y}^* \sim X^*$ 
-1. PRESENT: Visualise $\mathbf{\beta}$
-
-
-### Shapley for causal effect attribution for one instance
-1. DESIGN
-    1. sample $X^*$ from $X$
-    1. INTERVENTION: Recombine samples and instance of interest
-    1. Q is difference in prediction with and without feature $Q = \hat{y} - \hat{y}_{X_{-j}}$
-1. EXECUTE: Predict
-1. ANALYSE: Average Q per feature
-1. PRESENT: Visualise Q per feature
-
-
-### Permutation feature importance (for one feature)
-1. DESIGN
-    1. sample $X^*$ from $X$
-    1. INTERVENTION: Shuffle $x_j$
-    1. Q is difference in performance
-1. EXECUTE: Predict
-1. ANALYSE: Average Q
-1. PRESENT: Visualise Q
-
-### Global surrogate models
-1. DESIGN
-    1. sample $X^*$ from $X$
-    1. INTERVENTION: None
-    1. $Q = \hat{y}$
-1. EXECUTE: Predict
-1. ANALYSE: Fit interpretable model, e.g. decision tree
-1. PRESENT: Visualise or print interpretable model
-
-
-# Core class: Interpretation()
-
-An interpretation object can hold multiple experiments. 
-The common elements of the experiments are the model and the data sampler. 
-The interpreter can memoise the predictions of the black box model to improve computational speed. 
-
-
-
-# Notes and remarks
-
-- Focus is on tabular data
-- Output is often $E(\hat{y} | X_{something})$. Basically always working with expected values (E(.)), but sometimes this collapses to single point, for example in ICE
-- Checkout if you can use some bound to calculate the number of samples you need. See "Algorithmic Transparency viaQuantitative Input Influence", chapter A. Computing Power Indices. They use epsilon-delta approximation and Hoeffding bound. https://en.wikipedia.org/wiki/Hoeffding%27s_inequality. If you can squeeze the quantity of interest between interval $[0,1]$ you can use the Hoeffding bound. Maybe the settings has to be that the quantity of interest is always between $[0,1]$. Should be doable by rescaling (e.g. $y_{star} = \frac{y_{hat} - min(\hat{y})}{max(\hat{y}) - min(\hat{y})}$
-- The interventions require to have some marginal distribution, which almost makes it a bayesian approach. Maybe you could frame it as Bayesian approach. Intervention = using prior of one feature, which might be estimated from the marginal distribution of a feature for example. Posterior distribution has no meaning though, right?
-- Confidence intervals can be introduced by all measurements by repeating the procedure framework, which ultimately yields bootstrapped results for the quantity of interest, depending on your generate.data.fun. 
-- The outcome of the interpretability method is, so far, always one number (or plot) per feature. Maybe this is useful for formulating and simplyfying the framework. 
-- All methods work with marginal and or conditional expectations. ICE: conditional. PDP: marginal. Difference also to sensitivity and uncerntainty analysis approaches: They often are not defined with distribution of input variables in mind and use grid/random/pseudo-random input. 
-- PDPs can be seen from the functional Anova view: $f_{x_u}(x) = \int_{x_{-u}}(f(x) - f(\emptyset))dx_{-u} = \int_{x_{-u}}(f(x))dx_{-u} - \mu$, where $\mu = \bar{f}(x)$ (the mean prediction over the data). PDPs are the same, but without the $\mu$ constant. This is another argument that the framework aggregation function does something like Q(b  efore after intervention) - Q(after intervention).
-- When you want to evaluate the f(x) and get one number, you have to decide for each feature, if you want to condition on it (= fix to value) or if you want to marginalise it (= average over distribution for the feature). 
-- Iml framework is using Monte Carlo A lot
-- There are 3 types of interpretability methods, and a method can have more than one type:
-  - Feature importance: Ranks features. Example permutation feature importance. Makes use of interventions. Aggregation usually single number per feature. 
-  - Feature effects: Summarises how feature contribute to prediction. Example: PDp, Shapley. Makes use of interventions, aggregation either conditional on feature grid or one number per feature. 
-  - Surrogate models: Fit an interpretable model on black box. Example: LIME(, Shap?). Makes no use of intervention. Makes use of weights. Aggregation is a model fit. 
-- common among all methods: the resulting aggregation is always per feature. only outlier: surrogate models, when model is not a linear combiniation of features (for example a tree). 
-- Within the framework, the features change between different modes:
-  - conditioned feature: The f() is conditioned on certain values of the feature
-  - marginalised feature: f() is marginalised over the feature. Usually done via Monte carlo
-  - interventioned feature: The feature receives an intervention before it goes into f(). Then usually in a conditioned way. 
-  - Examples:
-    - ICE: Condition on all features of instance of interest, excluding the feature of interest. Intervene on feature of interest. 
-    - PDP: Marginalise all features, excluding feature of interest. Intervene and condition on feature of interest. 
-    - Shapley: In each loop: Condition on "players in the room" (=intervention?), marginalise over players not in the room. Then exclude  feature of interest from players in the room and do the same. Calculate the difference
-    - LIME: Doesnt really fit these steps. 
-    - Permutation feature importance: Marginalise over all features after intervening on feature of interest. - Feature importance: Quantity of interest is either performance or variance of model
-- Feature effects: Quantity of interest is $\hat(y)$
-- Surrogate models: Quantity of interest is: g, where g is from class of interpretable models and fidel to f?
-
-
-# Value of package and paper
-- High abstraction view of interpretability methods
-- Unification of methods
-- Explore new things that arises naturally from modular system, like explaining clusters against dataset, or instance against cluster, or instance against archetype (=single instance as well).
-- R package
-- Extend "Algorithmic Transparency viaQuantitative Input Influence" by allowing numerical features, showing how it is the same as ICE / PermImp / ..., discovering more use cases (cluster vs complete data etc.)
-- By showing the similarity between the algos and they all go through the same predict(someX), results can be memoised and reused. The framework will make it more efficient when you want to have multiple iml analyses.
-
-# TODOs:
-- Read DeepLift paper and decide if it fits into the framework or if it even should.
-- List interesting quantities of interest (probability for class, probability for own class, numerical prediction, Performance, Group differences (e.g. men and women).
-- Demonstrate how to switch between local and global explanations within the framework
-- Demonstrate how to differentiate between feature effects and feature importance within the framework.
-
-
-
-## Modules
-See also implementation in sensitivity package and paper: Algorithmic Transparency viaQuantitative Input Influence
-
-- Quantity of interest, e.g. performance, prediction, probability for certain class
-- Difference measure for comparing quantity of interest before and after intervention
-- *Instance selector*: Which instances should be explained? Ranging from a single instance to cluster (or even all instances, e.g. pdp)
-- *Feature selector*: Which features should be integrated. For LIME, shap, shapley: all. For pdp, it is only one or two features at a time. This point could be omitted as well or at least not be the biggest point. Feature selector step could be omitted in the overview, since it is not important and does not really allow to find new things that arise from the modular system.
-- *Quantity of interest* What shall be compared? DPD, ICE, LIME, Shapley, Shap: Predicted value. Feature Importance: Performance. Sobol/Shapley for var: Variance
-- *Reference/background/intervention sampler*: Function that returns n samples. Against which data (instances) should the prediction of selected instance be compared to? For shapley or pdp the function draws samples from the training data. For LIME it draw from the data points that are generated each feature independently with mean and variance sampling. This sampling can also be seen as doing an intervention on this variable. For ICE it is not clear, whether the single lines are generated from the background sampler or a selected X.interest: Those are different point of views. If you want ICE for specific datapoint, no data generator needed. If you are only interested in the distribution of lines, you would just use the data generator.  
-- *Weighting*: How the background data should be weighted. In LIME it is with the kernels. In shapley it is 1 for each sampled background. In shap paper it is weighted by the shap kernel. For better overview, this step could be integrated into the background sampler strategy. Disadvantage: If seperated, it is clearer in LIME that samples can be reused for each explanation.
-- *Intervention* (dpd: create multiple datasets from trainding dat, each time shuffle one variable.)
-It's a function g(x, x') that projects from the cartesian X product to X: XxX -> X
-One x comes from the reference distribution P_X, the other from some other distribution (marginal distribution for permutation feature importance, normal dist with mean and var from one feature for lime) or a grid (dpd, ice) or just from one instance.  Break the association between features. For ICE and PDP: Force grid. For Permuation importance: force shuffle of one variable.
-- *Get predictions* (always the same I guess?. only more complicated if you have some kind of adaptive sampling procedure). Or more general: measure some quantity of interest, that is likely a function of the prediction. In case of permutation feature importance it is the Performance you measure.
-- *Fitting process  / Statistic on collected data* (LIME: linear model, pdp: avg by x-grid, shapley: avg, shap: lin mod). Apart from LIME, a lot have the formula $\hat{y}_{instance of interest} - \hat{y}_{background}$
-- *Attribution method*: How changes in the quantity of interest are attributed to features (in case multiple features are possibly involved). For shapley, shap: shapley value attribution. For LIME it means fitting a linear model. ICE, PDP, permutation feature importance it is not necessary, because only one variable was focused. For permutation feature importance
-- *Repeat and overlay*: How often should the previous steps be repeated and overlayed? Examples: ICE (multiple runs but with different instances and instance, pdp: multiple runs with different features, aLIME: multiple runs with different instances)
-
-## How to do comparisons using this framework
-- What do you want to compare (e.g. single instance) against what (e.g. complete data distribution)
-- What do you want to measure (e.g. prediction or performance)
-- How do you want to compare the measures (e.g. difference)
-- How do you want to attribute comparison differences (e.g. Shapley or a simple comparison (like ICE or DPD))
-- How do you want to aggregate the comparisons across instances
-- How do you want to aggregate the comparisons across features
-
-## Similar papers
-
-### "Algorithmic Transparency viaQuantitative Input Influence"
-- Follows exactly this framework. Actually is kind of this framework, but not written down properly and not shown that it's all the same scheme.
-- They don't compare it with dpd, ice, pimp and so on
-- They don't show what it extends to (except of course for their own things they use in the transparency report.
-- They only talk about categorical features, I will generalize it towards numerical features. also ordinal?
-- They write: "A thorough exploration of other points in this design space remains an important direction for future work.". See als next chapter: New things that arise from modular view. So I am basically doing what they suggest.
-- Their notation sucks. I will do it better with usual notation from statistics field.
-- What's weird in their paper: They use shapley only for their "Set QII" which involves the influence of multiple variables. But they don't use shapley for a single variable.
-
-## New things arising from modular view
-- explaining clusters against dataset
-- explaining instance against cluster
-- explaining instance against instance, e.g. archetype (=single instance as well)
-- relative feature importance, by choosing single instance and comparing feature importance against background.
-- feature importance in cluster compared to whole datase
-- weighted feature importance measures
-- pdps are essentially a special case of shapley value. We only check the contribution for one variable and do so multiple times to get  a curve over the feature range.
-- permutation feature importance for whole dataset with shapley value: value = performance with all vars minus performance with only mean prediction. attribute performance gains with shapley value, by pertubing
-
-
-What does not work:
-Choosing all instances in instance selector for shapley should yield estimate of zero??
diff --git a/tests/testthat.R b/tests/testthat.R
index 50f921b97..58d9d3581 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -1,4 +1,5 @@
 library(testthat)
+library(checkmate)
 library(iml)
 
 test_check("iml")
diff --git a/tests/testthat/helper-predictor.R b/tests/testthat/helper-predictor.R
new file mode 100644
index 000000000..3a9d779d0
--- /dev/null
+++ b/tests/testthat/helper-predictor.R
@@ -0,0 +1,21 @@
+
+f = function(newdata, multi = FALSE) {
+  pred = unlist(newdata[1] + newdata[2] + 100 * (newdata[3] == "a")) / (155)
+  dat = data.frame(pred = pred)
+  if (multi) dat$pred2 = 1 - dat$pred
+  dat
+}
+
+X = data.frame(a = c(1, 2, 3, 4, 5), 
+  b = c(10, 20, 30, 40, 50), 
+  c = factor(c("a", "b", "c", "a", "b")), 
+  d = factor(c("A", "A", "B", "B", "B")))
+
+set.seed(12)
+y = f(X) + rnorm(nrow(X))
+y2 = factor(ifelse(X$b + X$a < 20, "pred", "pred2"))
+
+predictor1 = Predictor$new(data = X, y = y, predict.fun = f)
+predict.fun = function(obj, newdata) obj(newdata, multi = TRUE)
+predictor2 = Predictor$new(f, data = X, y = y2, predict.fun = predict.fun)
+predictor3 = Predictor$new(f, data = X, predict.fun = predict.fun, class = 2)
diff --git a/tests/testthat/test-data-sampler.R b/tests/testthat/test-Data.R
similarity index 53%
rename from tests/testthat/test-data-sampler.R
rename to tests/testthat/test-Data.R
index 4ae12f733..8f5801bdb 100644
--- a/tests/testthat/test-data-sampler.R
+++ b/tests/testthat/test-Data.R
@@ -1,28 +1,28 @@
-context('DataSampler')
-ds = DataSampler$new(iris)
+context("Data")
+ds = Data$new(iris)
 sample1 = ds$sample(3)
 n = 100
 set.seed(42)
 sample1.weighted = ds$sample(n, prob = c(2,1, rep(0, times = nrow(iris)- 2)))
 
-ds.weighted = DataSampler$new(iris, prob = c(2,1, rep(0, times = nrow(iris)- 2)))
+ds.weighted = Data$new(iris, prob = c(2,1, rep(0, times = nrow(iris)- 2)))
 set.seed(42)
 
 sample2 = ds.weighted$sample(n)
 
-test_that('samplings works',{
-  expect_class(sample1, 'data.frame')
+test_that("samplings works",{
+  expect_class(sample1, "data.frame")
   expect_equal(colnames(sample1), colnames(iris))
   expect_equal(ds$get.x(), iris)
   expect_equal(ds$feature.names, colnames(iris))
-  expect_equal(ds$feature.types, c(Sepal.Length='numerical', 
-    Sepal.Width = 'numerical', Petal.Length = 'numerical', Petal.Width =  'numerical', 
-    Species = 'categorical'))
+  expect_equal(ds$feature.types, c(Sepal.Length="numerical", 
+    Sepal.Width = "numerical", Petal.Length = "numerical", Petal.Width =  "numerical", 
+    Species = "categorical"))
   expect_equal(ds$n.features, 5)
 
 })
 
-test_that('weighted sampling works', {
+test_that("weighted sampling works", {
   expect_equal(nrow(unique(sample1.weighted)), 2)
   expect_equal(nrow(unique(sample2)), 2)
   expect_equal(sample1.weighted, sample2)
diff --git a/tests/testthat/test-FeatureImp.R b/tests/testthat/test-FeatureImp.R
new file mode 100644
index 000000000..d04dc1fc0
--- /dev/null
+++ b/tests/testthat/test-FeatureImp.R
@@ -0,0 +1,90 @@
+context("FeatureImp()")
+
+#set.seed(42)
+
+expectedColnames = c("feature", "original.error", "permutationError", "importance")
+
+test_that("FeatureImp works for single output", {
+  
+  var.imp = FeatureImp$new(predictor1,  loss = "mse")
+  dat = var.imp$results
+  expect_class(dat, "data.frame")
+  expect_equal(colnames(dat), expectedColnames)
+  expect_equal(nrow(dat), ncol(X))  
+  p = plot(var.imp)
+  expect_s3_class(p, c("gg", "ggplot"))
+  p
+  
+
+  var.imp = FeatureImp$new(predictor1,  loss = "mse", method = "cartesian")
+  dat = var.imp$results
+  # Making sure the result is sorted by decreasing importance
+  expect_class(dat, "data.frame")
+  expect_equal(dat$importance, dat[order(dat$importance, decreasing = TRUE),]$importance)
+  expect_equal(colnames(dat), expectedColnames)
+  expect_equal(nrow(dat), ncol(X))  
+  p = plot(var.imp)
+  expect_s3_class(p, c("gg", "ggplot"))
+  p
+  
+  X.exact = data.frame(x1 = c(1,2,3), x2 = c(9,4,2))
+  y.exact = c(2,3,4)
+  f.exact = Predictor$new(predict.fun = function(newdata) newdata[[1]], data = X.exact, y = y.exact)
+  # creates a problem on win builder
+  # model.error = Metrics::mse(y.exact, f.exact$predict(X.exact))
+  model.error = 1
+  cart.indices = c(1,1,2,2,3,3)
+  cartesian.error = Metrics::mse(y.exact[cart.indices], c(2,3,1,3,1,2))
+  
+  var.imp = FeatureImp$new(f.exact, loss = "mse", method = "cartesian")
+  dat = var.imp$results
+  expect_class(dat, "data.frame")
+  expect_equal(dat$importance, c(cartesian.error, 1))
+  expect_equal(colnames(dat), expectedColnames)
+  expect_equal(model.error, var.imp$original.error)
+  expect_equal(nrow(dat), ncol(X.exact))  
+  p = plot(var.imp)
+  expect_s3_class(p, c("gg", "ggplot"))
+  p
+  
+  p = plot(var.imp, sort = FALSE)
+  expect_s3_class(p, c("gg", "ggplot"))
+  p
+  
+  p = var.imp$plot()
+  expect_s3_class(p, c("gg", "ggplot"))
+  p
+  
+})
+
+test_that("FeatureImp works for single output and function as loss", {
+    
+  var.imp = FeatureImp$new(predictor1, loss = Metrics::mse)
+  dat = var.imp$results
+  expect_class(dat, "data.frame")
+  # Making sure the result is sorted by decreasing importance
+  expect_equal(dat$importance, dat[order(dat$importance, decreasing = TRUE),]$importance)
+  expect_equal(colnames(dat), expectedColnames)
+  expect_equal(nrow(dat), ncol(X))  
+  p = plot(var.imp)
+  expect_s3_class(p, c("gg", "ggplot"))
+  p
+  
+})
+
+test_that("FeatureImp works for multiple output",{
+  var.imp = FeatureImp$new(predictor2, loss = "ce")
+  dat = var.imp$results
+  expect_class(dat, "data.frame")
+  expect_equal(colnames(dat), expectedColnames)
+  expect_equal(nrow(dat), ncol(X))  
+  p = plot(var.imp)
+  expect_s3_class(p, c("gg", "ggplot"))
+  p
+})
+
+
+test_that("FeatureImp fails without target vector",{
+  predictor2 = Predictor$new(f, data = X, predict.fun = predict.fun)
+  expect_error(FeatureImp$new(predictor2, loss = "ce"))
+})
\ No newline at end of file
diff --git a/tests/testthat/test-ice.R b/tests/testthat/test-Ice.R
similarity index 54%
rename from tests/testthat/test-ice.R
rename to tests/testthat/test-Ice.R
index a75a8f4a5..43b3cbbde 100644
--- a/tests/testthat/test-ice.R
+++ b/tests/testthat/test-Ice.R
@@ -1,23 +1,11 @@
-context('ice()')
+context("Ice()")
 
-
-f = function(x, multi = FALSE){
-  pred = unlist(x[1] + x[2] + 100 * (x[3] == 'a'))
-  dat = data.frame(pred = pred)
-  if(multi) dat$pred2 = 0.2 * dat$pred + 7 
-  dat
-}
-X = data.frame(a = c(1, 2, 3, 4, 5), 
-  b = c(10, 20, 30, 40, 50), 
-  c = factor(c("a", "b", "c", "a", "b")), 
-  d = factor(c("A", "A", "B", "B", "B")))
-
-
-test_that('ice works for single output and single feature',{
+test_that("Ice works for single output and single feature", {
   
   grid.size = 10
-  ice.obj = ice(f, X, feature = 1, grid.size = grid.size)
-  dat = ice.obj$data()
+  ice.obj = Ice$new(predictor1, feature = 1, grid.size = grid.size)
+  dat = ice.obj$results
+  expect_class(dat, "data.frame")
   expect_equal(colnames(dat), c("a", "..individual", "y.hat"))
   expect_equal(nrow(dat), grid.size * nrow(X))  
   expect_equal(nrow(unique(dat)), grid.size * nrow(X))
@@ -29,11 +17,12 @@ test_that('ice works for single output and single feature',{
   
 })
 
-test_that('ice works for multiple output',{
+test_that("Ice works for multiple output", {
   
   grid.size = 10
-  ice.obj = ice(function(x){f(x, multi = TRUE)}, X, feature = c(1), grid.size = grid.size)
-  dat = ice.obj$data()
+  ice.obj = Ice$new(predictor2, feature = "a", grid.size = grid.size)
+  dat = ice.obj$results
+  expect_class(dat, "data.frame")
   expect_equal(colnames(dat), c("a", "..individual", "..class.name", "y.hat"))
   expect_equal(nrow(dat), grid.size * nrow(X)*2)  
   expect_equal(nrow(unique(dat)), grid.size * nrow(X) * 2)
@@ -47,24 +36,31 @@ test_that('ice works for multiple output',{
 })
 
 
-test_that('centered ice works for multiple output',{
+test_that("centered Ice works for multiple output", {
   
   grid.size = 10
-  ice.obj = ice(function(x){f(x, multi = TRUE)}, X, feature = c(1), grid.size = grid.size, center = 10)
-  dat = ice.obj$data()
+  ice.obj = Ice$new(predictor2, feature = "a", grid.size = grid.size, center = 10)
+  dat = ice.obj$results
+  expect_equal(ice.obj$center.at, 10)
+  expect_class(dat, "data.frame")
   expect_equal(colnames(dat), c("a", "..individual","..class.name", "y.hat"))
   expect_equal(nrow(dat), (grid.size + 1) * nrow(X) * 2)  
   expect_equal(nrow(unique(dat)), (grid.size + 1) * nrow(X) * 2)
   expect_equal(max(dat$a), 10)
   expect_equal(min(dat$a), 1)
-  expect_equal(max(dat$y.hat), 0)
   p = plot(ice.obj)
   expect_s3_class(p, c("gg", "ggplot"))
   p
   
   
-  ice.obj$center.at = -1
-  dat = ice.obj$data()
+  ice.obj$center(-1)
+  expect_equal(ice.obj$center.at, -1)
+  
+  expect_warning({ice.obj$center.at = 10})
+  expect_equal(ice.obj$center.at, -1)
+  
+  dat = ice.obj$results
+  expect_class(dat, "data.frame")
   expect_equal(max(dat$a), 5)
   expect_equal(min(dat$a), -1)
   
diff --git a/tests/testthat/test-LocalModel.R b/tests/testthat/test-LocalModel.R
new file mode 100644
index 000000000..9f20880bc
--- /dev/null
+++ b/tests/testthat/test-LocalModel.R
@@ -0,0 +1,55 @@
+context("LocalModel()")
+
+set.seed(12)
+expected.colnames = c("beta", "x.recoded", "effect", "x.original", "feature", "feature.value")
+
+test_that("LocalModel works for single output and single feature", {
+  
+  x.interest = X[2,]
+  k = 2
+  set.seed(42)
+  LocalModel1 = LocalModel$new(predictor1, x.interest=x.interest, k = k)
+  dat = LocalModel1$results
+  expect_class(dat, "data.frame")
+  expect_equal(colnames(dat), expected.colnames)
+  expect_lte(nrow(dat), k)
+  p = plot(LocalModel1)
+  expect_s3_class(p, c("gg", "ggplot"))
+  p
+  
+  x.interest2 = X[4,]
+  LocalModel1$explain(x.interest2)
+  dat = LocalModel1$results
+  expect_equal(colnames(dat), expected.colnames)
+  expect_lte(nrow(dat), k)  
+
+  pred = predict(LocalModel1, newdata = X[3:4,])
+  expect_data_frame(pred, nrows = 2)
+  expect_equal(colnames(pred), "prediction")
+})
+
+test_that("LocalModel works for multiple output", {
+  
+  library('rpart')
+  clf = rpart(Species ~ ., data = iris)
+  mod = Predictor$new(clf, data = iris)
+  x.interest = iris[1,]
+  k = 1
+  set.seed(42)
+  # rbind X a few times to eliminate glm warning
+  LocalModel1 = LocalModel$new(mod, x.interest, k = k)
+  dat = LocalModel1$results
+  expect_equal(colnames(dat), c(expected.colnames, "..class"))
+  expect_lte(nrow(dat), k * 3)  
+  pred2 = predict(LocalModel1, iris[c(2,3),])
+  expect_class(dat, "data.frame")
+  expect_data_frame(pred2, nrows=2)
+  expect_equal(colnames(pred2), c("setosa", "versicolor", "virginica"))
+  
+  p = plot(LocalModel1)
+  expect_s3_class(p, c("gg", "ggplot"))
+  p
+})
+
+
+
diff --git a/tests/testthat/test-pdp.R b/tests/testthat/test-PartialDependence.R
similarity index 60%
rename from tests/testthat/test-pdp.R
rename to tests/testthat/test-PartialDependence.R
index e48779c3c..ec5309c07 100644
--- a/tests/testthat/test-pdp.R
+++ b/tests/testthat/test-PartialDependence.R
@@ -1,23 +1,11 @@
-context('pdp()')
+context("PartialDependence()")
 
-
-f = function(x, multi = FALSE){
-  pred = unlist(x[1] + x[2] + 100 * (x[3] == 'a'))
-  dat = data.frame(pred = pred)
-  if(multi) dat$pred2 = 0.2 * dat$pred + 7 
-  dat
-}
-X = data.frame(a = c(1, 2, 3, 4, 5), 
-  b = c(10, 20, 30, 40, 50), 
-  c = factor(c("a", "b", "c", "a", "b")), 
-  d = factor(c("A", "A", "B", "B", "B")))
-
-
-test_that('pdp works for single output and single feature',{
+test_that("PartialDependence works for single output and single feature", {
   
   grid.size = 10
-  pdp.obj = pdp(f, X, feature = 1, grid.size = grid.size)
-  dat = pdp.obj$data()
+  pdp.obj = PartialDependence$new(predictor1, feature = 1, grid.size = grid.size)
+  dat = pdp.obj$results
+  expect_class(dat, "data.frame")
   expect_equal(colnames(dat), c("a", "y.hat"))
   expect_equal(nrow(dat), grid.size)  
   expect_equal(nrow(unique(dat)), grid.size)
@@ -28,9 +16,14 @@ test_that('pdp works for single output and single feature',{
   p
   
   
-  
-  pdp.obj$feature = 2
-  dat = pdp.obj$data()
+  expect_equal(pdp.obj$feature.name, "a")
+  pdp.obj$set.feature(3)
+  expect_equal(pdp.obj$feature.name, "c")
+  pdp.obj$set.feature("b")
+  expect_equal(pdp.obj$feature.name, "b")
+
+  dat = pdp.obj$results
+  expect_class(dat, "data.frame")
   expect_equal(colnames(dat), c("b", "y.hat"))
   expect_equal(nrow(dat), grid.size)  
   expect_equal(nrow(unique(dat)), grid.size)
@@ -38,12 +31,13 @@ test_that('pdp works for single output and single feature',{
   expect_equal(min(dat$b), 10)
 })
 
-test_that('pdp works for single output and 2 features, 2D grid.size',{
+test_that("PartialDependence works for single output and 2 features, 2D grid.size", {
   
   ## two numerical features with 2 grid.sizes
   grid.size = c(10,2)
-  pdp.obj = pdp(f, X, feature = c(1,2), grid.size = grid.size)
-  dat = pdp.obj$data()
+  pdp.obj = PartialDependence$new(predictor1, feature = c("a", "b"), grid.size = grid.size)
+  dat = pdp.obj$results
+  expect_class(dat, "data.frame")
   expect_equal(colnames(dat), c("a", "b", "y.hat"))
   expect_equal(nrow(dat), grid.size[1] * grid.size[2])  
   expect_equal(nrow(unique(dat)), grid.size[1] * grid.size[2])
@@ -56,13 +50,14 @@ test_that('pdp works for single output and 2 features, 2D grid.size',{
   p
 })
 
-test_that('pdp works for single output and 2 numerical features, 1D grid.size',{
+test_that("PartialDependence works for single output and 2 numerical features, 1D grid.size", {
   
   
   ## Two numerical with same grid.size
   grid.size = 10
-  pdp.obj = pdp(f, X, feature = c(1,2), grid.size = grid.size)
-  dat = pdp.obj$data()
+  pdp.obj = PartialDependence$new(predictor1, feature = c(1,2), grid.size = grid.size)
+  dat = pdp.obj$results
+  expect_class(dat, "data.frame")
   expect_equal(colnames(dat), c("a", "b", "y.hat"))
   expect_equal(nrow(dat), grid.size * grid.size)  
   expect_equal(nrow(unique(dat)), grid.size * grid.size)
@@ -74,13 +69,14 @@ test_that('pdp works for single output and 2 numerical features, 1D grid.size',{
   p
   
 })
-  
-test_that('pdp works for single output and numerical + categorical feature',{
+
+test_that("PartialDependence works for single output and numerical + categorical feature", {
   
   ## One feature categorical
   grid.size = 11
-  pdp.obj = pdp(f, X, feature = c(1,3), grid.size = grid.size)
-  dat = pdp.obj$data()
+  pdp.obj = PartialDependence$new(predictor1, feature = c(1,3), grid.size = grid.size)
+  dat = pdp.obj$results
+  expect_class(dat, "data.frame")
   expect_equal(colnames(dat), c("a", "c", "y.hat"))
   expect_equal(nrow(dat), grid.size[1] * length(unique(X[,3])))  
   expect_equal(nrow(unique(dat)), grid.size * length(unique(X[,3])))  
@@ -92,8 +88,9 @@ test_that('pdp works for single output and numerical + categorical feature',{
   
   ## One feature categorical
   grid.size = c(7,9)
-  pdp.obj = pdp(f, X, feature = c(3,2), grid.size = grid.size)
-  dat = pdp.obj$data()
+  pdp.obj = PartialDependence$new(predictor1, feature = c(3,2), grid.size = grid.size)
+  dat = pdp.obj$results
+  expect_class(dat, "data.frame")
   expect_equal(colnames(dat), c("c", "b", "y.hat"))
   expect_equal(nrow(dat), grid.size[2] * length(unique(X[,3])))  
   expect_equal(nrow(unique(dat)), grid.size[2] * length(unique(X[,3])))  
@@ -105,11 +102,12 @@ test_that('pdp works for single output and numerical + categorical feature',{
   
 })
 
-test_that('pdp works for multiple output',{
+test_that("PartialDependence works for multiple output", {
   
   grid.size = 10
-  pdp.obj = pdp(function(x){f(x, multi=TRUE)}, X, feature = c(1), grid.size = grid.size)
-  dat = pdp.obj$data()
+  pdp.obj = PartialDependence$new(predictor2, feature = "a", grid.size = grid.size)
+  dat = pdp.obj$results
+  expect_class(dat, "data.frame")
   expect_equal(colnames(dat), c("a", "..class.name", "y.hat"))
   expect_equal(nrow(dat), grid.size * 2)  
   expect_equal(nrow(unique(dat)), grid.size * 2)
@@ -120,4 +118,4 @@ test_that('pdp works for multiple output',{
   expect_s3_class(p, c("gg", "ggplot"))
   p
   
-})
\ No newline at end of file
+  })
diff --git a/tests/testthat/test-Predictor.R b/tests/testthat/test-Predictor.R
new file mode 100644
index 000000000..760b7c7e3
--- /dev/null
+++ b/tests/testthat/test-Predictor.R
@@ -0,0 +1,126 @@
+context("Predictor")
+
+
+library("mlr")
+library("randomForest")
+library("caret")
+
+
+## mlr
+task = mlr::makeClassifTask(data = iris, target = "Species")
+lrn = mlr::makeLearner("classif.randomForest", predict.type = "prob")
+mod.mlr = mlr::train(lrn, task)
+predictor.mlr = Predictor$new(mod.mlr, data = iris)
+
+# S3 predict
+mod.S3 = mod.mlr$learner.model
+predict.fun = function(object, newdata) predict(object, newdata, type = "prob")
+predictor.S3 = Predictor$new(mod.S3, data = iris, predict.fun = predict.fun)
+
+# caret
+mod.caret = caret::train(Species ~ ., data = iris, method = "knn", 
+  trControl = caret::trainControl(method = "cv"))
+predictor.caret = Predictor$new(mod.caret, data = iris)
+
+# function
+mod.f = function(newdata) {
+  predict(mod.caret, newdata = newdata,  type = "prob")
+}
+predictor.f = Predictor$new(predict.fun = mod.f, data = iris)
+iris.test = iris[c(2,20, 100, 150), c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")]
+prediction.f = predictor.f$predict(iris.test)
+
+
+test_that("equivalence", {
+  expect_equivalent(prediction.f, predictor.caret$predict(iris.test))
+  expect_equivalent(predictor.mlr$predict(iris.test), predictor.S3$predict(iris.test))
+  
+})
+
+test_that("f works", {
+  expect_equal(colnames(prediction.f), c("setosa", "versicolor", "virginica"))
+  expect_s3_class(prediction.f, "data.frame")
+  predictor.f.1 = Predictor$new(predict.fun = mod.f, class = 1, data = iris)
+  expect_equal(prediction.f[,1], predictor.f.1$predict(iris.test)$setosa)
+})
+
+
+# Test single class  predictions
+
+
+
+## mlr
+predictor.mlr = Predictor$new(mod.mlr, class = 2, data = iris)
+# S3 predict
+predictor.S3 = Predictor$new(mod.S3, class = 2, predict.fun = predict.fun, data = iris)
+# caret
+predictor.caret = Predictor$new(mod.caret, class = 2, data = iris)
+# function
+predictor.f = Predictor$new(predict.fun = mod.f, class = 2, data = iris)
+prediction.f = predictor.f$predict(iris.test)
+test_that("equivalence",{
+  expect_equivalent(prediction.f, predictor.caret$predict(iris.test))
+  expect_equivalent(predictor.mlr$predict(iris.test), predictor.S3$predict(iris.test))
+})
+
+test_that("Missing predict.type gives warning", {
+  task = mlr::makeClassifTask(data = iris, target = "Species")
+  lrn = mlr::makeLearner("classif.randomForest")
+  mod.mlr = mlr::train(lrn, task)
+  expect_warning(Predictor$new(mod.mlr, data = iris))
+})
+
+
+
+
+# Test numeric predictions
+
+data(Boston, package="MASS")
+## mlr
+task = mlr::makeRegrTask(data = Boston, target = "medv")
+lrn = mlr::makeLearner("regr.randomForest")
+mod.mlr = mlr::train(lrn, task)
+predictor.mlr = Predictor$new(mod.mlr, data = Boston)
+
+# S3 predict
+mod.S3 = mod.mlr$learner.model
+predictor.S3 = Predictor$new(mod.S3, data = Boston)
+
+# caret
+mod.caret = caret::train(medv ~ ., data = Boston, method = "knn", 
+  trControl = caret::trainControl(method = "cv"))
+  predictor.caret = Predictor$new(mod.caret, data = Boston)
+
+# function
+mod.f = function(newdata) {
+  predict(mod.caret, newdata = newdata)
+}
+predictor.f = Predictor$new(predict.fun = mod.f, data = Boston)
+boston.test = Boston[c(1,2,3,4), ]
+prediction.f = predictor.f$predict(boston.test)
+
+
+test_that("equivalence", {
+  expect_equivalent(prediction.f, predictor.caret$predict(boston.test))
+  expect_equivalent(predictor.mlr$predict(boston.test), predictor.S3$predict(boston.test))
+})
+
+test_that("f works", {
+  expect_equal(colnames(prediction.f), c("pred"))
+  expect_class(prediction.f, "data.frame")
+})
+
+
+predictor.mlr = Predictor$new(mod.mlr, class = 2, data = iris, y = iris$Species)
+predictor.mlr2 = Predictor$new(mod.mlr, class = 2, data = iris, y = "Species")
+
+
+test_that("Giving y", {
+  expect_equal(predictor.mlr$data$y, data.frame(..y = iris$Species))
+  expect_equal(predictor.mlr2$data$y, data.frame(Species = iris$Species))
+  expect_false("Species" %in% colnames(predictor.mlr2$data$X))
+})
+
+
+
+
diff --git a/tests/testthat/test-Shapley.R b/tests/testthat/test-Shapley.R
new file mode 100644
index 000000000..388cce15c
--- /dev/null
+++ b/tests/testthat/test-Shapley.R
@@ -0,0 +1,44 @@
+context("Shapley()")
+
+expected_colnames = c("feature", "phi", "phi.var", "feature.value")
+
+test_that("Shapley works for single output and single feature",{
+  
+  x.interest = X[1,]
+  
+  set.seed(42)
+  shap = Shapley$new(predictor1, x.interest, sample.size = 400)
+  dat = shap$results
+  expect_class(dat, "data.frame")
+  expect_equal(colnames(dat), expected_colnames)
+  expect_equal(nrow(dat), ncol(X))  
+  expect_equal(sum(dat$phi), unlist(f(x.interest) - mean(f(X)$pred), use.names = FALSE), tolerance = 0.02)
+  p = plot(shap)
+  expect_s3_class(p, c("gg", "ggplot"))
+  p
+  
+  x.interest2 = X[2,]
+  shap$explain(x.interest2)
+  dat = shap$results
+  expect_class(dat, "data.frame")
+  expect_equal(colnames(dat), expected_colnames)
+  expect_equal(nrow(dat), ncol(X))  
+  expect_equal(sum(dat$phi), unlist(f(x.interest2) - mean(f(X)$pred), use.names = FALSE), tolerance = 0.02)
+  
+  shap = Shapley$new(predictor1, sample.size = 400)
+  shap$explain(x.interest)
+  
+})
+
+test_that("Shapley works for multiple output",{
+  x.interest = X[1,]
+  set.seed(42)
+  shap = Shapley$new(predictor2, x.interest, sample.size = 400)
+  dat = shap$results
+  expect_class(dat, "data.frame")
+  expect_equal(colnames(dat), c("feature", "class", "phi", "phi.var", "feature.value"))
+  expect_equal(sum(dat$phi[dat$class == "pred"]), unlist(f(x.interest) - mean(f(X)$pred), use.names = FALSE), tolerance = 0.02)
+  p = plot(shap)
+  expect_s3_class(p, c("gg", "ggplot"))
+  p
+})
\ No newline at end of file
diff --git a/tests/testthat/test-TreeSurrogate.R b/tests/testthat/test-TreeSurrogate.R
new file mode 100644
index 000000000..2eb5e1c63
--- /dev/null
+++ b/tests/testthat/test-TreeSurrogate.R
@@ -0,0 +1,48 @@
+context("TreeSurrogate")
+
+test_that("TreeSurrogate works for single output and single feature", {
+  tree = TreeSurrogate$new(predictor1)
+  dat = tree$results
+  expect_class(dat, "data.frame")
+  expect_equal(colnames(dat), c(colnames(X), "..node", "..path", "..y.hat", "..y.hat.tree"))
+  expect_equal(nrow(dat), nrow(X))  
+  p = plot(tree)
+  expect_s3_class(p, c("gg", "ggplot"))
+  expect_s3_class(tree$predict(X), c("data.frame"))
+  p
+  
+})
+
+test_that("TreeSurrogate works for multiple output", {
+  tree = TreeSurrogate$new(predictor2)
+  dat = tree$results
+  expect_class(dat, "data.frame")
+  expect_equal(colnames(dat), c(colnames(X), "..node", "..path",  "..y.hat.pred", "..y.hat.pred2", "..y.hat.tree.pred", "..y.hat.tree.pred2"))
+  expect_equal(nrow(dat), nrow(X))  
+  p = plot(tree)
+  expect_s3_class(p, c("gg", "ggplot"))
+  expect_s3_class(tree$predict(X), c("data.frame"))
+  p
+  actual.prediction = predict(tree, X)
+  expect_equal(colnames(actual.prediction), c("pred", "pred2"))
+  expect_s3_class(actual.prediction, "data.frame")
+
+  actual.classification = predict(tree, X[1:4,], type = "class")
+  expect_equal(colnames(actual.classification), "..class")
+
+})
+
+test_that("TreeSurrogate works for multiple output with selected class", {
+  tree = TreeSurrogate$new(predictor3)
+  dat = tree$results
+  expect_class(dat, "data.frame")
+  expect_equal(colnames(dat), c(colnames(X), "..node", "..path",  "..y.hat", "..y.hat.tree"))
+  expect_equal(nrow(dat), nrow(X))  
+  p = plot(tree)
+  expect_s3_class(p, c("gg", "ggplot"))
+  expect_s3_class(tree$predict(X), c("data.frame"))
+  p
+  pred = predict(tree, newdata = X[1:10, ])
+  expect_equal(dim(pred), c(10, 1))
+  expect_s3_class(pred, "data.frame")
+})
diff --git a/tests/testthat/test-createPredictionFunction.R b/tests/testthat/test-createPredictionFunction.R
new file mode 100644
index 000000000..420c63bcc
--- /dev/null
+++ b/tests/testthat/test-createPredictionFunction.R
@@ -0,0 +1,109 @@
+context("createPredictionFunction")
+
+
+library("mlr")
+library("randomForest")
+library("caret")
+
+
+## mlr
+task = mlr::makeClassifTask(data = iris, target = "Species")
+lrn = mlr::makeLearner("classif.randomForest", predict.type = "prob")
+mod.mlr = mlr::train(lrn, task)
+predictor.mlr = createPredictionFunction(mod.mlr, "classification")
+
+lrn.response = mlr::makeLearner("classif.randomForest", predict.type = "response")
+mod.mlr.response = mlr::train(lrn.response, task)
+predictor.mlr.response = createPredictionFunction(mod.mlr.response, "classification")
+
+
+# S3 predict
+mod.S3 = mod.mlr$learner.model
+predict.fun = function(object, newdata) predict(object, newdata, type = "prob")
+predictor.S3 = createPredictionFunction(mod.S3, predict.fun = predict.fun)
+
+# caret
+mod.caret = caret::train(Species ~ ., data = iris, method = "knn", 
+  trControl = caret::trainControl(method = "cv"))
+predictor.caret = createPredictionFunction(mod.caret, task = "classification")
+
+# function
+mod.f = function(newdata) {
+  predict(mod.caret, newdata = newdata,  type = "prob")
+}
+predictor.f = createPredictionFunction(NULL, predict.fun = mod.f, task = "classification")
+iris.test = iris[c(2,20, 100, 150), c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")]
+prediction.f = predictor.f(iris.test)
+
+test_that("output shape", {
+  checkmate::expect_data_frame(prediction.f, ncols = 3)
+  checkmate::expect_data_frame(predictor.caret(iris.test), ncols = 3)
+  checkmate::expect_data_frame(predictor.S3(iris.test), ncols = 3)
+  checkmate::expect_data_frame(predictor.mlr(iris.test), ncols = 3)
+  checkmate::expect_data_frame(predictor.mlr.response(iris.test), ncols = 3)
+})
+
+
+test_that("equivalence", {
+  expect_equivalent(prediction.f, predictor.caret(iris.test))
+  expect_equivalent(predictor.mlr(iris.test), data.frame(predictor.S3(iris.test)))
+})
+
+test_that("f works", {
+  expect_equal(colnames(prediction.f), c("setosa", "versicolor", "virginica"))
+  expect_s3_class(prediction.f, "data.frame")
+  predictor.f.1 = createPredictionFunction(NULL, predict.fun = mod.f, task = "classification")
+  expect_equal(prediction.f[,1], predictor.f.1(iris.test)$setosa)
+})
+
+
+predictor.S3.2 = createPredictionFunction(mod.S3, predict.fun = NULL)
+
+test_that("output is label", {
+  expect_warning({pred = predictor.S3.2(iris)})
+  checkmate::expect_data_frame(pred, nrows = nrow(iris), ncols = 3)
+})
+
+
+# Test numeric predictions
+
+data(Boston, package="MASS")
+## mlr
+task = mlr::makeRegrTask(data = Boston, target = "medv")
+lrn = mlr::makeLearner("regr.randomForest")
+mod.mlr = mlr::train(lrn, task)
+predictor.mlr = createPredictionFunction(mod.mlr, task = "regression")
+
+# S3 predict
+mod.S3 = mod.mlr$learner.model
+predictor.S3 = createPredictionFunction(mod.S3, task = "regression")
+
+# caret
+mod.caret = caret::train(medv ~ ., data = Boston, method = "knn", 
+  trControl = caret::trainControl(method = "cv"))
+predictor.caret = createPredictionFunction(mod.caret, task = "regression")
+
+# function
+mod.f = function(newdata) {
+  predict(mod.caret, newdata = newdata)
+}
+predictor.f = createPredictionFunction(NULL, predict.fun = mod.f, task = "regression")
+boston.test = Boston[c(1,2,3,4), ]
+prediction.f = predictor.f(boston.test)
+
+test_that("output shape", {
+  checkmate::expect_data_frame(prediction.f, ncols = 1)
+  checkmate::expect_data_frame(predictor.caret(boston.test), ncols = 1)
+  checkmate::expect_data_frame(predictor.S3(boston.test), ncols = 1)
+  checkmate::expect_data_frame(predictor.mlr(boston.test), ncols = 1)
+  
+})
+
+
+test_that("equivalence", {
+  expect_equivalent(prediction.f, predictor.caret(boston.test))
+  expect_equivalent(predictor.mlr(boston.test), predictor.S3(boston.test))
+})
+
+
+
diff --git a/tests/testthat/test-experiment.R b/tests/testthat/test-experiment.R
deleted file mode 100644
index 58eacfde6..000000000
--- a/tests/testthat/test-experiment.R
+++ /dev/null
@@ -1,22 +0,0 @@
-context('Experiment')
-
-
-test_that('Experiments work',{
-  
-  
-  f = function(x){
-    unlist(x[1] + x[2])
-  }
-  X = data.frame(a = c(1,2,3), b = c(2,3,4))
-  ds = DataSampler$new(X)
-  pred = Prediction.f$new(f)
-  e = Experiment$new(pred, ds)
-  set.seed(1)
-  dat = e$run()$data()
-  
-  set.seed(2)
-  dat3 = e$run(force=TRUE)$data()
-  
-})
-  
-  
\ No newline at end of file
diff --git a/tests/testthat/test-feature.imp.r b/tests/testthat/test-feature.imp.r
deleted file mode 100644
index 2e3ddd60c..000000000
--- a/tests/testthat/test-feature.imp.r
+++ /dev/null
@@ -1,90 +0,0 @@
-context('feature.imp()')
-
-
-f = function(x, multi = FALSE){
-  pred = unlist(x[1] + x[2] + 100 * (x[3] == 'a'))
-  dat = data.frame(pred = pred)
-  if(multi) dat$pred2 = 0.2 * dat$pred + 30
-  dat
-}
-X = data.frame(a = c(1, 2, 3, 4, 5), 
-  b = c(10, 20, 30, 40, 50), 
-  c = factor(c("a", "b", "c", "a", "b")), 
-  d = factor(c("A", "A", "B", "B", "B")))
-f2 = function(x){f(x, TRUE)}
-set.seed(42)
-y = f(X) + rnorm(nrow(X))
-y2 = factor(ifelse(X$b + X$a < 20, 'pred', 'pred2'))
-
-test_that('feature.imp works for single output',{
-  
-  var.imp = feature.imp(f, X, y = y, loss = 'mse')
-  dat = var.imp$data()
-  expect_equal(colnames(dat), c("..feature", "error", "importance"))
-  expect_equal(nrow(dat), ncol(X))  
-  p = plot(var.imp)
-  expect_s3_class(p, c("gg", "ggplot"))
-  p
-  
-  # Making sure the result is sorted by decreasing importance
-  expect_equal(dat$importance, dat[order(dat$importance, decreasing = TRUE),]$importance)
-  
-  var.imp = feature.imp(f, X, y = y, loss = 'mse', method = 'cartesian')
-  dat = var.imp$data()
-  expect_equal(colnames(dat), c("..feature", "error", "importance"))
-  expect_equal(nrow(dat), ncol(X))  
-  p = plot(var.imp)
-  expect_s3_class(p, c("gg", "ggplot"))
-  p
-  
-  X.exact = data.frame(x1 = c(1,2,3), x2 = c(9,4,2))
-  f.exact = function(x){x[[1]]}
-  y.exact = c(2,3,4)
-  model.error = Metrics::mse(y.exact, f.exact(X.exact))
-  cart.indices = c(1,1,2,2,3,3)
-  cartesian.error = Metrics::mse(y.exact[cart.indices], c(2,3,1,3,1,2))
-  
-  var.imp = feature.imp(f.exact, X.exact, y = y.exact, loss = 'mse', method = 'cartesian')
-  dat = var.imp$data()
-  expect_equal(dat$importance, c(cartesian.error, 1))
-  expect_equal(colnames(dat), c("..feature", "error", "importance"))
-  expect_equal(model.error, var.imp$error.original)
-  expect_equal(nrow(dat), ncol(X.exact))  
-  p = plot(var.imp)
-  expect_s3_class(p, c("gg", "ggplot"))
-  p
-  
-  p = plot(var.imp, sort = FALSE)
-  expect_s3_class(p, c("gg", "ggplot"))
-  p
-  
-  p = var.imp$plot()
-  expect_s3_class(p, c("gg", "ggplot"))
-  p
-  
-})
-
-test_that('feature.imp works for single output and function as loss',{
-    
-  var.imp = feature.imp(f, X, y = y, loss = Metrics::mse)
-  dat = var.imp$data()
-  # Making sure the result is sorted by decreasing importance
-  expect_equal(dat$importance, dat[order(dat$importance, decreasing = TRUE),]$importance)
-  expect_equal(colnames(dat), c("..feature", "error", "importance"))
-  expect_equal(nrow(dat), ncol(X))  
-  p = plot(var.imp)
-  expect_s3_class(p, c("gg", "ggplot"))
-  p
-  
-})
-
-test_that('feature.imp works for multiple output',{
-  
-  var.imp = feature.imp(f2, X, y = y2, loss = 'ce')
-  dat = var.imp$data()
-  expect_equal(colnames(dat), c("..feature", "error", "importance"))
-  expect_equal(nrow(dat), ncol(X))  
-  p = plot(var.imp)
-  expect_s3_class(p, c("gg", "ggplot"))
-  p
-})
\ No newline at end of file
diff --git a/tests/testthat/test-inferTask.R b/tests/testthat/test-inferTask.R
new file mode 100644
index 000000000..cac854bac
--- /dev/null
+++ b/tests/testthat/test-inferTask.R
@@ -0,0 +1,56 @@
+context("inferTask")
+
+rf = randomForest(Species ~ ., data = iris, ntree = 1)
+f = function(x) predict(rf, newdata = x)
+
+
+test_that("classificaton",{
+  ## use linear discriminant analysis to classify iris data
+  task = makeClassifTask(data = iris, target = "Species")
+  learner = makeLearner("classif.lda", method = "mle")
+  mod = mlr::train(learner, task)
+  expect_warning({tsk = inferTaskFromModel(mod)})
+  expect_equal(tsk, "classification")
+  
+  TrainData <- iris[,1:4]
+  TrainClasses <- iris[,5]
+  
+  knnFit1 <- train(TrainData, TrainClasses,
+    method = "knn",
+    preProcess = c("center", "scale"),
+    tuneLength = 2,
+    trControl = trainControl(method = "cv"))
+  
+  expect_equal(inferTaskFromModel(knnFit1), "classification")
+  
+  t1 = inferTaskFromPrediction(predict(rf, newdata = iris))
+  t2 = inferTaskFromPrediction(predict(rf, newdata = iris, type = "prob"))
+  
+  expect_equal(t1, "classification")
+  expect_equal(t2, "classification")
+})
+
+test_that("regression",{
+  
+  ## use linear discriminant analysis to classify iris data
+  task = makeRegrTask(data = cars, target = "dist")
+  learner = makeLearner("regr.lm")
+  mod = mlr::train(learner, task)
+  expect_equal(inferTaskFromModel(mod), "regression")
+  
+  
+  lmFit <- train(dist ~ .,
+    data = cars,
+    method = "lm")
+  
+  expect_equal(inferTaskFromModel(lmFit), "regression")
+  pred = predict(lmFit)
+  expect_equal(inferTaskFromPrediction(pred), "regression")
+})
+
+
+test_that("unknown",{
+  expect_equal(inferTaskFromModel(rf), "unknown")
+  expect_equal(inferTaskFromModel(f), "unknown")
+})
+
diff --git a/tests/testthat/test-lime.r b/tests/testthat/test-lime.r
deleted file mode 100644
index 8e0b4c137..000000000
--- a/tests/testthat/test-lime.r
+++ /dev/null
@@ -1,69 +0,0 @@
-context('lime()')
-
-
-f = function(x, multi = FALSE){
-  pred = unlist(5  + 20 * x[1] - 10 * x[2] + 100 * (x[3] == 'a')) 
-  dat = data.frame(pred = pred)
-  if(multi) dat$pred2 = 1 - dat$pred
-  dat
-}
-
-f2 = function(x) f(x, multi = TRUE)
-set.seed(123)
-n = 100
-X = data.frame(
-  x1 = rnorm(n), 
-  x2 = runif(n), 
-  x3 = sample(c('a', 'b','c'), size = n, replace=TRUE), 
-  x4 = sample(c('A', 'B', 'C'), size = n, replace=TRUE))
-
-y = f(X)[[1]]
-
-expected.colnames = c("beta", "x.scaled", "effect", "x.original", "feature", "feature.value")
-
-test_that('lime works for single output and single feature',{
-  
-  x.interest = X[2,]
-  k = 2
-  set.seed(42)
-  lime1 = lime(f, X, x.interest=x.interest, sample.size = 100, k = k)
-  dat = lime1$data()
-  expect_equal(colnames(dat), expected.colnames)
-  expect_true("x3=a" %in% dat$feature)
-  expect_equal(nrow(dat), k)
-  p = plot(lime1)
-  expect_s3_class(p, c("gg", "ggplot"))
-  p
-  
-  x.interest2 = X[4,]
-  lime1$x = x.interest2
-  dat = lime1$data()
-  expect_equal(colnames(dat), expected.colnames)
-  expect_equal(nrow(dat), k)  
-
-  pred = predict(lime1, newdata = X[3:4,])
-  expect_data_frame(pred, nrows = 2)
-  expect_equal(colnames(pred), 'prediction')
-})
-
-test_that('lime works for multiple output',{
-  
-  x.interest = X[1,]
-  
-  k = 3
-  set.seed(42)
-  lime1 = lime(f2, X, x.interest, sample.size = 400, k = k)
-  dat = lime1$data()
-  expect_equal(colnames(dat), c(expected.colnames, '..class'))
-  expect_equal(nrow(dat), k * 2)  
-  pred2 = predict(lime1, X[c(2,3),])
-  expect_data_frame(pred2, nrows=2)
-  expect_equal(colnames(pred2), c('pred', 'pred2'))
-  
-  p = plot(lime1)
-  expect_s3_class(p, c("gg", "ggplot"))
-  p
-})
-
-
-
diff --git a/tests/testthat/test-prediction.R b/tests/testthat/test-prediction.R
deleted file mode 100644
index f245000d7..000000000
--- a/tests/testthat/test-prediction.R
+++ /dev/null
@@ -1,114 +0,0 @@
-context('Prediction')
-
-
-library('mlr')
-library('randomForest')
-library('caret')
-
-
-## mlr
-task = mlr::makeClassifTask(data = iris, target = "Species")
-lrn = mlr::makeLearner("classif.randomForest", predict.type = 'prob')
-mod.mlr = mlr::train(lrn, task)
-predictor.mlr = prediction.model(mod.mlr)
-
-# S3 predict
-mod.S3 = mod.mlr$learner.model
-predictor.S3 = prediction.model(mod.S3, predict.args = list(type='prob'))
-
-# caret
-mod.caret = caret::train(Species ~ ., data = iris, method = "knn", 
-  trControl = caret::trainControl(method = "cv"))
-predictor.caret = prediction.model(mod.caret)
-
-# function
-mod.f = function(X){
-  predict(mod.caret, newdata = X,  type = 'prob')
-}
-predictor.f = prediction.model(mod.f)
-iris.test = iris[c(2,20, 100, 150), c('Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width')]
-prediction.f = predictor.f$predict(iris.test)
-
-
-test_that('equivalence',{
-  expect_equivalent(prediction.f, predictor.caret$predict(iris.test))
-  expect_equivalent(predictor.mlr$predict(iris.test), predictor.S3$predict(iris.test))
-  
-})
-
-test_that('f works', {
-  expect_equal(colnames(prediction.f), c('setosa', 'versicolor', 'virginica'))
-  expect_s3_class(prediction.f, 'data.frame')
-  predictor.f.1 = prediction.model(mod.f, class = 1)
-  expect_equal(prediction.f[,1], predictor.f.1$predict(iris.test)$setosa)
-})
-
-
-# Test single class  predictions
-
-
-
-## mlr
-predictor.mlr = prediction.model(mod.mlr, class = 2)
-# S3 predict
-predictor.S3 = prediction.model(mod.S3, class = 2, predict.args = list(type='prob'))
-# caret
-predictor.caret = prediction.model(mod.caret, class = 2)
-# function
-predictor.f = prediction.model(mod.f, class = 2)
-prediction.f = predictor.f$predict(iris.test)
-test_that('equivalence',{
-  expect_equivalent(prediction.f, predictor.caret$predict(iris.test))
-  expect_equivalent(predictor.mlr$predict(iris.test), predictor.S3$predict(iris.test))
-})
-
-test_that('f works', {
-  expect_equal(colnames(prediction.f), c('versicolor'))
-  expect_class(prediction.f, 'data.frame')
-})
-
-
-
-
-# Test numeric predictions
-
-data(Boston, package='MASS')
-## mlr
-task = mlr::makeRegrTask(data = Boston, target = "medv")
-lrn = mlr::makeLearner("regr.randomForest")
-mod.mlr = mlr::train(lrn, task)
-predictor.mlr = prediction.model(mod.mlr)
-
-# S3 predict
-mod.S3 = mod.mlr$learner.model
-predictor.S3 = prediction.model(mod.S3)
-
-# caret
-mod.caret = caret::train(medv ~ ., data = Boston, method = "knn", 
-  trControl = caret::trainControl(method = "cv"))
-  predictor.caret = prediction.model(mod.caret)
-
-# function
-mod.f = function(X){
-  predict(mod.caret, newdata = X)
-}
-predictor.f = prediction.model(mod.f)
-boston.test = Boston[c(1,2,3,4), ]
-prediction.f = predictor.f$predict(boston.test)
-
-
-
-
-test_that('equivalence',{
-  expect_equivalent(prediction.f, predictor.caret$predict(boston.test))
-  expect_equivalent(predictor.mlr$predict(boston.test), predictor.S3$predict(boston.test))
-})
-
-test_that('f works', {
-  expect_equal(colnames(prediction.f), c('..prediction'))
-  expect_class(prediction.f, 'data.frame')
-})
-
-
-
-
diff --git a/tests/testthat/test-shapley.r b/tests/testthat/test-shapley.r
deleted file mode 100644
index 085e13ac7..000000000
--- a/tests/testthat/test-shapley.r
+++ /dev/null
@@ -1,53 +0,0 @@
-context('shapley()')
-
-
-f = function(x, multi = FALSE){
-  pred = unlist(x[1] + x[2] + 100 * (x[3] == 'a')) / (155)
-  dat = data.frame(pred = pred)
-  if(multi) dat$pred2 = 1 - dat$pred
-  dat
-}
-
-f2 = function(x) f(x, multi = TRUE)
-X = data.frame(a = c(1, 2, 3, 4, 5), 
-  b = c(10, 20, 30, 40, 50), 
-  c = factor(c("a", "b", "c", "a", "b")), 
-  d = factor(c("A", "A", "B", "B", "B")))
-
-
-test_that('shapley works for single output and single feature',{
-  
-  x.interest = X[1,]
-  
-  set.seed(42)
-  shap = shapley(f, X, x.interest, sample.size = 400)
-  dat = shap$data()
-  expect_equal(colnames(dat), c("feature", "phi", 'phi.var'))
-  expect_equal(nrow(dat), ncol(X))  
-  expect_equal(sum(dat$phi), unlist(f(x.interest) - mean(f(X)$pred), use.names = FALSE), tolerance = 0.02)
-  p = plot(shap)
-  expect_s3_class(p, c("gg", "ggplot"))
-  p
-  
-  x.interest2 = X[2,]
-  shap$x = x.interest2
-  dat = shap$data()
-  expect_equal(colnames(dat), c("feature", "phi", 'phi.var'))
-  expect_equal(nrow(dat), ncol(X))  
-  expect_equal(sum(dat$phi), unlist(f(x.interest2) - mean(f(X)$pred), use.names = FALSE), tolerance = 0.02)
-  
-})
-
-test_that('shapley works for multiple output',{
-  
-  x.interest = X[1,]
-  
-  set.seed(42)
-  shap = shapley(f2, X, x.interest, sample.size = 400)
-  dat = shap$data()
-  expect_equal(colnames(dat), c("feature", "class", "phi", 'phi.var'))
-  expect_equal(sum(dat$phi[dat$class == 'pred']), unlist(f(x.interest) - mean(f(X)$pred), use.names = FALSE), tolerance = 0.02)
-  p = plot(shap)
-  expect_s3_class(p, c("gg", "ggplot"))
-  p
-})
\ No newline at end of file
diff --git a/tests/testthat/test-tree.surrogate.r b/tests/testthat/test-tree.surrogate.r
deleted file mode 100644
index e67d2abb4..000000000
--- a/tests/testthat/test-tree.surrogate.r
+++ /dev/null
@@ -1,64 +0,0 @@
-context('tree.surrogate()')
-
-
-f = function(x, multi = FALSE){
-  pred = unlist(x[1] + x[2] + 100 * (x[3] == 'a')) / (155)
-  dat = data.frame(pred = pred)
-  if(multi) dat$pred2 = 1 - dat$pred
-  dat
-}
-
-f2 = function(x) f(x, multi = TRUE)
-X = data.frame(a = c(1, 2, 3, 4, 5), 
-  b = c(10, 20, 30, 40, 50), 
-  c = factor(c("a", "b", "c", "a", "b")), 
-  d = factor(c("A", "A", "B", "B", "B")))
-
-
-test_that('tree.surrogate works for single output and single feature',{
-  
-  sample.size = 100
-  tree = tree.surrogate(f, X, sample.size = sample.size)
-  dat = tree$data()
-  expect_equal(colnames(dat), c(colnames(X), "..node", "..path", "..y.hat", "..y.hat.tree"))
-  expect_equal(nrow(dat), sample.size)  
-  p = plot(tree)
-  expect_s3_class(p, c("gg", "ggplot"))
-  expect_s3_class(tree$predict(X), c('data.frame'))
-  p
-  
-})
-
-test_that('tree.surrogate works for multiple output',{
-  sample.size = 50
-  tree = tree.surrogate(f2, X, sample.size = sample.size)
-  dat = tree$data()
-  expect_equal(colnames(dat), c(colnames(X), "..node", "..path",  "..y.hat:pred", "..y.hat:pred2", "..y.hat.tree:pred", "..y.hat.tree:pred2"))
-  expect_equal(nrow(dat), sample.size)  
-  p = plot(tree)
-  expect_s3_class(p, c("gg", "ggplot"))
-  expect_s3_class(tree$predict(X), c('data.frame'))
-  p
-  actual.prediction = predict(tree, X)
-  expect_equal(colnames(actual.prediction), c('pred', 'pred2'))
-  expect_s3_class(actual.prediction, 'data.frame')
-
-  actual.classification = predict(tree, X[1:4,], type = 'class')
-  expect_equal(colnames(actual.classification), '..class')
-
-})
-
-test_that('tree.surrogate works for multiple output with selected class',{
-  sample.size = 50
-  tree = tree.surrogate(f2, X, sample.size = sample.size, class = 1)
-  dat = tree$data()
-  expect_equal(colnames(dat), c(colnames(X), "..node", "..path",  "..y.hat", "..y.hat.tree"))
-  expect_equal(nrow(dat), sample.size )  
-  p = plot(tree)
-  expect_s3_class(p, c("gg", "ggplot"))
-  expect_s3_class(tree$predict(X), c('data.frame'))
-  p
-  pred = predict(tree, newdata = X[1:10, ])
-  expect_equal(dim(pred), c(10, 1))
-  expect_s3_class(pred, 'data.frame')
-})
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index cbbac48e7..43eea1098 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -1,24 +1,22 @@
-context('utility functions')
+context("utility functions")
 
 
-test_that('get.feature.type function works', {
-  
-  f1 = c('a', 'b')
-  expect_equal(get.feature.type(class(f1)), c('character'='categorical'))
+test_that("get.feature.type", {
+  f1 = c("a", "b")
+  expect_equal(get.feature.type(class(f1)), c("character"="categorical"))
   expect_error(get.feature.type(data.frame(f1)))
-  expect_equal(get.feature.type(class(factor(f1))), c('factor'='categorical'))
+  expect_equal(get.feature.type(class(factor(f1))), c("factor"="categorical"))
   
   f2 = 1:10
   expect_equal(get.feature.type(c(class(f1), class(f2))), 
-    c('character'='categorical', 'integer'='numerical'))
+    c("character"="categorical", "integer"="numerical"))
 })
 
 
 
-test_that('probs.to.labels',{
-  
+test_that("probs.to.labels",{
   probs = data.frame(a = c(0.1, 0.2, 0.7), b = c(0.9, 0.8, 0.3), c = c(0,1,0))
-  labels = data.frame(..class = c('b', 'c', 'a'))
+  labels = data.frame(..class = c("b", "c", "a"))
   expect_equal(probs.to.labels(probs), labels)
   pred = data.frame(prediction = 1:10)
   expect_equal(probs.to.labels(pred), pred)
@@ -27,8 +25,8 @@ test_that('probs.to.labels',{
 
 
 
-test_that('recode',{
-  X = data.frame(x1=1:10, x2=letters[c(1,1,2,2,2,2,2,2,1,1)], x3=rep(0, 10), x4=factor('c'))
+test_that("recode",{
+  X = data.frame(x1=1:10, x2=letters[c(1,1,2,2,2,2,2,2,1,1)], x3=rep(0, 10), x4=factor("c"))
   X.recode = recode(X, X[1,])
   expect_equal(dim(X), dim(X.recode))
   expect_equal(rownames(X), rownames(X.recode))
@@ -36,5 +34,26 @@ test_that('recode',{
   expect_equal(X.recode$x3, X$x3)
   expect_equal(X.recode$x2, c(1,1,0,0,0,0,0,0,1,1))
   expect_equal(X.recode$x4, rep(1,10))
-  expect_equal(colnames(X.recode), c('x1', 'x2=a', 'x3', 'x4=c'))
+  expect_equal(colnames(X.recode), c("x1", "x2=a", "x3", "x4=c"))
+})
+
+
+test_that("get.1D.grid", {
+  expect_equal(get.1D.grid(1:10, "numerical", 4), c(1, 4, 7, 10))
+  expect_equal(get.1D.grid(1:10, "categorical", 1), 1:10)
+  expect_equal(get.1D.grid(letters, "categorical", 4), letters)
+  expect_error(get.1D.grid(letters, "numerical", 4))
+  expect_equal(get.1D.grid(1:10, "numerical", 2), c(1, 10))
+  expect_equal(get.1D.grid(c(NA, 1:10), "numerical", 2), c(1,10))
+  expect_equal(get.1D.grid(c(NaN, 1:10), "numerical", 2), c(1,10))
+  expect_equal(get.1D.grid(c(Inf, 1:10), "numerical", 2), c(1,10))
+  expect_equal(get.1D.grid(c(-Inf, 1:10), "numerical", 2), c(1,10))
+})
+
+test_that("is.label.output", {
+  expect_equal(is.label.output(letters), TRUE)
+  expect_equal(is.label.output(as.factor(letters)), TRUE)
+  expect_equal(is.label.output(1:10), FALSE)
+  expect_equal(is.label.output(c(NA, 1:10)), FALSE)
+  expect_equal(is.label.output(iris), FALSE)
 })