mlr-org · mllg · Jul 11, 2017 · Jun 9, 2017 · Jun 9, 2017 · Jun 9, 2017
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -150,6 +150,7 @@ Suggests:
     smoof,
     sparseLDA,
     stepPlr,
+    survAUC,
     SwarmSVM,
     svglite,
     testthat,

diff --git a/NAMESPACE b/NAMESPACE
@@ -288,7 +288,6 @@ S3method(makeRLearner,surv.gamboost)
 S3method(makeRLearner,surv.gbm)
 S3method(makeRLearner,surv.glmboost)
 S3method(makeRLearner,surv.glmnet)
-S3method(makeRLearner,surv.penalized)
 S3method(makeRLearner,surv.randomForestSRC)
 S3method(makeRLearner,surv.ranger)
 S3method(makeRLearner,surv.rpart)
@@ -484,7 +483,6 @@ S3method(predictLearner,surv.gamboost)
 S3method(predictLearner,surv.gbm)
 S3method(predictLearner,surv.glmboost)
 S3method(predictLearner,surv.glmnet)
-S3method(predictLearner,surv.penalized)
 S3method(predictLearner,surv.randomForestSRC)
 S3method(predictLearner,surv.ranger)
 S3method(predictLearner,surv.rpart)
@@ -735,7 +733,6 @@ S3method(trainLearner,surv.gamboost)
 S3method(trainLearner,surv.gbm)
 S3method(trainLearner,surv.glmboost)
 S3method(trainLearner,surv.glmnet)
-S3method(trainLearner,surv.penalized)
 S3method(trainLearner,surv.randomForestSRC)
 S3method(trainLearner,surv.ranger)
 S3method(trainLearner,surv.rpart)
@@ -762,6 +759,7 @@ export(calculateConfusionMatrix)
 export(calculateROCMeasures)
 export(capLargeValues)
 export(cindex)
+export(cindex.uno)
 export(configureMlr)
 export(convertBMRToRankMatrix)
 export(convertMLBenchObjToTask)
@@ -877,6 +875,7 @@ export(helpLearner)
 export(helpLearnerParam)
 export(holdout)
 export(hout)
+export(iauc.uno)
 export(impute)
 export(imputeConstant)
 export(imputeHist)
@@ -1090,6 +1089,7 @@ export(setHyperPars)
 export(setHyperPars2)
 export(setId)
 export(setLearnerId)
+export(setMeasurePars)
 export(setPredictThreshold)
 export(setPredictType)
 export(setThreshold)

diff --git a/R/Measure.R b/R/Measure.R
@@ -43,7 +43,7 @@
 #'     \item{req.task}{Is task object required in calculation? Usually not the case}
 #'     \item{req.model}{Is model object required in calculation? Usually not the case.}
 #'     \item{req.feats}{Are feature values required in calculation? Usually not the case.}
-#'     \item{req.prob}{Are predicted probabilites required in calculation? Usually not the case, example would be AUC.}
+#'     \item{req.prob}{Are predicted probabilities required in calculation? Usually not the case, example would be AUC.}
 #'   }
 #'   Default is \code{character(0)}.
 #' @param fun [\code{function(task, model, pred, feats, extra.args)}]\cr
@@ -63,6 +63,7 @@
 #'   }
 #' @param extra.args [\code{list}]\cr
 #'   List of extra arguments which will always be passed to \code{fun}.
+#'   Can be changed after construction via \code{\link{setMeasurePars}}<`3`>.
 #'   Default is empty list.
 #' @param aggr [\code{\link{Aggregation}}]\cr
 #'   Aggregation funtion, which is used to aggregate the values measured
@@ -156,24 +157,6 @@ getDefaultMeasure = function(x) {
   )
 }
 
-
-#' Set aggregation function of measure.
-#'
-#' Set how this measure will be aggregated after resampling.
-#' To see possible aggregation functions: \code{\link{aggregations}}.
-#'
-#' @param measure [\code{\link{Measure}}]\cr
-#'   Performance measure.
-#' @template arg_aggr
-#' @return [\code{\link{Measure}}] with changed aggregation behaviour.
-#' @export
-setAggregation = function(measure, aggr) {
-  assertClass(measure, classes = "Measure")
-  assertClass(aggr, classes = "Aggregation")
-  measure$aggr = aggr
-  return(measure)
-}
-
 #' @export
 print.Measure = function(x, ...) {
   catf("Name: %s", x$name)
@@ -182,5 +165,6 @@ print.Measure = function(x, ...) {
   catf("Minimize: %s", x$minimize)
   catf("Best: %g; Worst: %g", x$best, x$worst)
   catf("Aggregated by: %s", x$aggr$id)
+  catf("Arguments: %s", listToShortString(x$extra.args))
   catf("Note: %s", x$note)
 }
diff --git a/R/Measure_operators.R b/R/Measure_operators.R
@@ -0,0 +1,43 @@
+#' @title Set parameters of performance measures
+#'
+#' @description
+#' Sets hyperparameters of measures.
+#'
+#' @param measure [\code{\link{Measure}}]\cr
+#'   Performance measure.
+#' @param ... [any]\cr
+#'   Named (hyper)parameters with new settings. Alternatively these can be passed
+#'   using the \code{par.vals} argument.
+#' @param par.vals [\code{list}]\cr
+#'   Optional list of named (hyper)parameter settings. The arguments in
+#'   \code{...} take precedence over values in this list.
+#' @template ret_measure
+#' @family performance
+#' @export
+setMeasurePars = function(measure, ..., par.vals = list()) {
+  args = list(...)
+  assertClass(measure, classes = "Measure")
+  assertList(args, names = "unique", .var.name = "parameter settings")
+  assertList(par.vals, names = "unique", .var.name = "parameter settings")
+  measure$extra.args = insert(measure$extra.args, insert(par.vals, args))
+  measure
+}
+
+#' @title Set aggregation function of measure.
+#'
+#' @description
+#' Set how this measure will be aggregated after resampling.
+#' To see possible aggregation functions: \code{\link{aggregations}}.
+#'
+#' @param measure [\code{\link{Measure}}]\cr
+#'   Performance measure.
+#' @template arg_aggr
+#' @return [\code{\link{Measure}}] with changed aggregation behaviour.
+#' @family performance
+#' @export
+setAggregation = function(measure, aggr) {
+  assertClass(measure, classes = "Measure")
+  assertClass(aggr, classes = "Aggregation")
+  measure$aggr = aggr
+  return(measure)
+}
diff --git a/R/RLearner_surv_cforest.R b/R/RLearner_surv_cforest.R
@@ -53,7 +53,8 @@ trainLearner.surv.cforest = function(.learner, .task, .subset,
 
 #' @export
 predictLearner.surv.cforest = function(.learner, .model, .newdata, ...) {
-  predict(.model$learner.model, newdata = .newdata, ...)
+  # cforest returns median survival times; multiply by -1 so that high values correspond to high risk
+  -1 * predict(.model$learner.model, newdata = .newdata, type = "response", ...)
 }
 
 #' @export

diff --git a/R/RLearner_surv_coxph.R b/R/RLearner_surv_coxph.R
@@ -27,23 +27,13 @@ trainLearner.surv.coxph = function(.learner, .task, .subset, .weights = NULL,  .
   f = getTaskFormula(.task)
   data = getTaskData(.task, subset = .subset)
   if (is.null(.weights)) {
-    mod = survival::coxph(formula = f, data = data, ...)
+    survival::coxph(formula = f, data = data, ...)
   } else  {
-    mod = survival::coxph(formula = f, data = data, weights = .weights, ...)
+    survival::coxph(formula = f, data = data, weights = .weights, ...)
   }
-  #if (.learner$predict.type == "prob")
-  #  mod = attachTrainingInfo(mod, list(surv.range = range(getTaskTargets(.task)[, 1L])))
-  mod
 }
 
 #' @export
 predictLearner.surv.coxph = function(.learner, .model, .newdata, ...) {
-  if (.learner$predict.type == "response") {
-    predict(.model$learner.model, newdata = .newdata, type = "lp", ...)
-  }
-    # else if (.learner$predict.type == "prob") {
-    # surv.range = getTrainingInfo(.model$learner.model)$surv.range
-    # times = seq(from = surv.range[1L], to = surv.range[2L], length.out = 1000)
-    # t(summary(survival::survfit(.model$learner.model, newdata = .newdata, se.fit = FALSE, conf.int = FALSE), times = times)$surv)
-    # }
+  predict(.model$learner.model, newdata = .newdata, type = "lp", ...)
 }
diff --git a/R/RLearner_surv_gamboost.R b/R/RLearner_surv_gamboost.R
@@ -52,8 +52,5 @@ trainLearner.surv.gamboost = function(.learner, .task, .subset, .weights = NULL,
 
 #' @export
 predictLearner.surv.gamboost = function(.learner, .model, .newdata, ...) {
-  if (.learner$predict.type == "response")
-    predict(.model$learner.model, newdata = .newdata, type = "link")
-  else
-    stop("Unknown predict type")
+  predict(.model$learner.model, newdata = .newdata, type = "link")
 }
diff --git a/R/RLearner_surv_glmboost.R b/R/RLearner_surv_glmboost.R
@@ -66,8 +66,5 @@ predictLearner.surv.glmboost = function(.learner, .model, .newdata, use.formula,
     info = getTrainingInfo(.model)
     .newdata = as.matrix(fixDataForLearner(.newdata, info))
   }
-  if (.learner$predict.type == "response")
-    predict(.model$learner.model, newdata = .newdata, type = "link")
-  else
-    stop("Unknown predict type")
+  predict(.model$learner.model, newdata = .newdata, type = "link")
 }
diff --git a/R/RLearner_surv_penalized.R b/R/RLearner_surv_penalized.R
diff --git a/R/RLearner_surv_rpart.R b/R/RLearner_surv_rpart.R
@@ -39,11 +39,7 @@ trainLearner.surv.rpart = function(.learner, .task, .subset, .weights = NULL, ..
 
 #' @export
 predictLearner.surv.rpart = function(.learner, .model, .newdata, ...) {
-  if (.learner$predict.type == "response") {
-    predict(.model$learner.model, newdata = .newdata, type = "vector", ...)
-  } else {
-    stop("Unsupported predict type")
-  }
+  predict(.model$learner.model, newdata = .newdata, type = "vector", ...)
 }
 
 #' @export

diff --git a/R/measures.R b/R/measures.R
@@ -17,6 +17,9 @@
 #' For clustering measures, we compact the predicted cluster IDs such that they form a continuous series
 #' starting with 1. If this is not the case, some of the measures will generate warnings.
 #'
+#' Some measure have parameters. Their defaults are set in the constructor \code{\link{makeMeasure}} and can be
+#' overwritten using \code{\link{setMeasurePars}}.
+#'
 #' @param truth [\code{factor}]\cr
 #'   Vector of the true class.
 #' @param response [\code{factor}]\cr
@@ -1337,19 +1340,65 @@ measureMultilabelTPR = function(truth, response) {
 #' @format none
 cindex = makeMeasure(id = "cindex", minimize = FALSE, best = 1, worst = 0,
   properties = c("surv", "req.pred", "req.truth"),
-  name = "Concordance index",
+  name = "Harrell's Concordance index",
   note = "Fraction of all pairs of subjects whose predicted survival times are correctly ordered among all subjects that can actually be ordered. In other words, it is the probability of concordance between the predicted and the observed survival.",
   fun = function(task, model, pred, feats, extra.args) {
-    requirePackages("Hmisc", default.method = "load")
-    resp = pred$data$response
-    if (anyMissing(resp))
+    requirePackages("_Hmisc")
+    y = getPredictionResponse(pred)
+    if (anyMissing(y))
       return(NA_real_)
-    # FIXME: we need to convert to he correct survival type
-    s = Surv(pred$data$truth.time, pred$data$truth.event)
-    Hmisc::rcorr.cens(-1 * resp, s)[["C Index"]]
+    s = getPredictionTruth(pred)
+    Hmisc::rcorr.cens(-1 * y, s)[["C Index"]]
   }
 )
 
+#' @export cindex.uno
+#' @rdname measures
+#' @format none
+#' @references
+#' H. Uno et al.
+#' \emph{On the C-statistics for Evaluating Overall Adequacy of Risk Prediction Procedures with Censored Survival Data}
+#' Statistics in medicine. 2011;30(10):1105-1117. \url{http://dx.doi.org/10.1002/sim.4154}.
+cindex.uno = makeMeasure(id = "cindex.uno", minimize = FALSE, best = 1, worst = 0,
+  properties = c("surv", "req.pred", "req.truth", "req.model"),
+  name = "Uno's Concordance index",
+  note = "Fraction of all pairs of subjects whose predicted survival times are correctly ordered among all subjects that can actually be ordered. In other words, it is the probability of concordance between the predicted and the observed survival. Corrected by weighting with IPCW as suggested by Uno. Implemented in survAUC::UnoC.",
+  fun = function(task, model, pred, feats, extra.args) {
+    requirePackages("_survAUC")
+    y = getPredictionResponse(pred)
+    if (anyMissing(y))
+      return(NA_real_)
+    surv.train = getTaskTargets(task, recode.target = "rcens")[model$subset]
+    max.time = assertNumber(extra.args$max.time, null.ok = TRUE) %??% max(getTaskTargets(task)[, 1L])
+    survAUC::UnoC(Surv.rsp = surv.train, Surv.rsp.new = getPredictionTruth(pred), time = max.time, lpnew = y)
+  },
+  extra.args = list(max.time = NULL)
+)
+
+#' @export iauc.uno
+#' @rdname measures
+#' @format none
+#' @references
+#' H. Uno et al.
+#' \emph{Evaluating Prediction Rules for T-Year Survivors with Censored Regression Models}
+#' Journal of the American Statistical Association 102, no. 478 (2007): 527-37. \url{http://www.jstor.org/stable/27639883}.
+iauc.uno = makeMeasure(id = "iauc.uno", minimize = FALSE, best = 1, worst = 0,
+  properties = c("surv", "req.pred", "req.truth", "req.model", "req.task"),
+  name = "Uno's estimator of cumulative AUC for right censored time-to-event data",
+  note = "To set an upper time limit, set argument max.time (defaults to max time in complete task). Implemented in survAUC::AUC.uno.",
+  fun = function(task, model, pred, feats, extra.args) {
+    requirePackages("_survAUC")
+    max.time = assertNumber(extra.args$max.time, null.ok = TRUE) %??% max(getTaskTargets(task)[, 1L])
+    times = seq(from = 0, to = max.time, length.out = extra.args$resolution)
+    surv.train = getTaskTargets(task, recode.target = "rcens")[model$subset]
+    y = getPredictionResponse(pred)
+    if (anyMissing(y))
+      return(NA_real_)
+    survAUC::AUC.uno(Surv.rsp = surv.train, Surv.rsp.new = getPredictionTruth(pred), times = times, lpnew = y)$iauc
+  },
+  extra.args = list(max.time = NULL, resolution = 1000)
+)
+
 ###############################################################################
 ### cost-sensitive ###
 ###############################################################################

diff --git a/man/ConfusionMatrix.Rd b/man/ConfusionMatrix.Rd
diff --git a/man/calculateConfusionMatrix.Rd b/man/calculateConfusionMatrix.Rd
diff --git a/man/calculateROCMeasures.Rd b/man/calculateROCMeasures.Rd
diff --git a/man/estimateRelativeOverfitting.Rd b/man/estimateRelativeOverfitting.Rd
diff --git a/man/makeCostMeasure.Rd b/man/makeCostMeasure.Rd
diff --git a/man/makeCustomResampledMeasure.Rd b/man/makeCustomResampledMeasure.Rd