From 068fc2d82f4c7f4ab05793aeae8a97840fb51f56 Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Fri, 9 Jun 2017 13:42:11 +0200 Subject: [PATCH 01/15] ... --- DESCRIPTION | 2 + NAMESPACE | 6 +-- R/Measure.R | 22 ++--------- R/Measure_operators.R | 43 +++++++++++++++++++++ R/RLearner_surv_cforest.R | 3 +- R/RLearner_surv_coxph.R | 16 ++------ R/RLearner_surv_gamboost.R | 5 +-- R/RLearner_surv_glmboost.R | 5 +-- R/RLearner_surv_penalized.R | 40 -------------------- R/RLearner_surv_rpart.R | 6 +-- R/measures.R | 55 +++++++++++++++++++++++---- man/ConfusionMatrix.Rd | 3 +- man/calculateConfusionMatrix.Rd | 3 +- man/calculateROCMeasures.Rd | 3 +- man/estimateRelativeOverfitting.Rd | 3 +- man/makeCostMeasure.Rd | 3 +- man/makeCustomResampledMeasure.Rd | 4 +- man/makeMeasure.Rd | 7 +++- man/measures.Rd | 13 ++++++- man/performance.Rd | 4 +- man/setAggregation.Rd | 12 +++++- man/setMeasurePars.Rd | 36 ++++++++++++++++++ tests/testthat/helper_objects.R | 7 ++++ tests/testthat/test_base_measures.R | 56 ++++++++++++++++++++++++---- tests/testthat/test_surv_measures.R | 25 +++++++++++++ tests/testthat/test_surv_penalized.R | 26 ------------- 26 files changed, 267 insertions(+), 141 deletions(-) create mode 100644 R/Measure_operators.R delete mode 100644 R/RLearner_surv_penalized.R create mode 100644 man/setMeasurePars.Rd create mode 100644 tests/testthat/test_surv_measures.R delete mode 100644 tests/testthat/test_surv_penalized.R diff --git a/DESCRIPTION b/DESCRIPTION index 4a35d48c9e..25d5dd9d02 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -150,9 +150,11 @@ Suggests: smoof, sparseLDA, stepPlr, + survAUC, SwarmSVM, svglite, testthat, + timeROC, tgp, TH.data, xgboost (>= 0.6-2), diff --git a/NAMESPACE b/NAMESPACE index 7520cedf69..3bda352e68 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -289,7 +289,6 @@ S3method(makeRLearner,surv.gamboost) S3method(makeRLearner,surv.gbm) S3method(makeRLearner,surv.glmboost) S3method(makeRLearner,surv.glmnet) -S3method(makeRLearner,surv.penalized) S3method(makeRLearner,surv.randomForestSRC) S3method(makeRLearner,surv.ranger) S3method(makeRLearner,surv.rpart) @@ -486,7 +485,6 @@ S3method(predictLearner,surv.gamboost) S3method(predictLearner,surv.gbm) S3method(predictLearner,surv.glmboost) S3method(predictLearner,surv.glmnet) -S3method(predictLearner,surv.penalized) S3method(predictLearner,surv.randomForestSRC) S3method(predictLearner,surv.ranger) S3method(predictLearner,surv.rpart) @@ -738,7 +736,6 @@ S3method(trainLearner,surv.gamboost) S3method(trainLearner,surv.gbm) S3method(trainLearner,surv.glmboost) S3method(trainLearner,surv.glmnet) -S3method(trainLearner,surv.penalized) S3method(trainLearner,surv.randomForestSRC) S3method(trainLearner,surv.ranger) S3method(trainLearner,surv.rpart) @@ -765,6 +762,7 @@ export(calculateConfusionMatrix) export(calculateROCMeasures) export(capLargeValues) export(cindex) +export(cindex.uno) export(configureMlr) export(convertBMRToRankMatrix) export(convertMLBenchObjToTask) @@ -880,6 +878,7 @@ export(helpLearner) export(helpLearnerParam) export(holdout) export(hout) +export(iauc.uno) export(impute) export(imputeConstant) export(imputeHist) @@ -1093,6 +1092,7 @@ export(setHyperPars) export(setHyperPars2) export(setId) export(setLearnerId) +export(setMeasurePars) export(setPredictThreshold) export(setPredictType) export(setThreshold) diff --git a/R/Measure.R b/R/Measure.R index c1cd930bad..5fc0f05a07 100644 --- a/R/Measure.R +++ b/R/Measure.R @@ -43,7 +43,7 @@ #' \item{req.task}{Is task object required in calculation? Usually not the case} #' \item{req.model}{Is model object required in calculation? Usually not the case.} #' \item{req.feats}{Are feature values required in calculation? Usually not the case.} -#' \item{req.prob}{Are predicted probabilites required in calculation? Usually not the case, example would be AUC.} +#' \item{req.prob}{Are predicted probabilities required in calculation? Usually not the case, example would be AUC.} #' } #' Default is \code{character(0)}. #' @param fun [\code{function(task, model, pred, feats, extra.args)}]\cr @@ -63,6 +63,7 @@ #' } #' @param extra.args [\code{list}]\cr #' List of extra arguments which will always be passed to \code{fun}. +#' Can be changed after construction via \code{\link{setMeasurePars}}<`3`>. #' Default is empty list. #' @param aggr [\code{\link{Aggregation}}]\cr #' Aggregation funtion, which is used to aggregate the values measured @@ -156,24 +157,6 @@ getDefaultMeasure = function(x) { ) } - -#' Set aggregation function of measure. -#' -#' Set how this measure will be aggregated after resampling. -#' To see possible aggregation functions: \code{\link{aggregations}}. -#' -#' @param measure [\code{\link{Measure}}]\cr -#' Performance measure. -#' @template arg_aggr -#' @return [\code{\link{Measure}}] with changed aggregation behaviour. -#' @export -setAggregation = function(measure, aggr) { - assertClass(measure, classes = "Measure") - assertClass(aggr, classes = "Aggregation") - measure$aggr = aggr - return(measure) -} - #' @export print.Measure = function(x, ...) { catf("Name: %s", x$name) @@ -182,5 +165,6 @@ print.Measure = function(x, ...) { catf("Minimize: %s", x$minimize) catf("Best: %g; Worst: %g", x$best, x$worst) catf("Aggregated by: %s", x$aggr$id) + catf("Arguments: %s", listToShortString(x$extra.args)) catf("Note: %s", x$note) } diff --git a/R/Measure_operators.R b/R/Measure_operators.R new file mode 100644 index 0000000000..380dcc24eb --- /dev/null +++ b/R/Measure_operators.R @@ -0,0 +1,43 @@ +#' @title Set parameters of performance measures +#' +#' @description +#' Sets hyperparameters of measures. +#' +#' @param measure [\code{\link{Measure}}]\cr +#' Performance measure. +#' @param ... [any]\cr +#' Named (hyper)parameters with new settings. Alternatively these can be passed +#' using the \code{par.vals} argument. +#' @param par.vals [\code{list}]\cr +#' Optional list of named (hyper)parameter settings. The arguments in +#' \code{...} take precedence over values in this list. +#' @template ret_measure +#' @family performance +#' @export +setMeasurePars = function(measure, ..., par.vals = list()) { + args = list(...) + assertClass(measure, classes = "Measure") + assertList(args, names = "unique", .var.name = "parameter settings") + assertList(par.vals, names = "unique", .var.name = "parameter settings") + measure$extra.args = insert(measure$extra.args, insert(par.vals, args)) + measure +} + +#' @title Set aggregation function of measure. +#' +#' @description +#' Set how this measure will be aggregated after resampling. +#' To see possible aggregation functions: \code{\link{aggregations}}. +#' +#' @param measure [\code{\link{Measure}}]\cr +#' Performance measure. +#' @template arg_aggr +#' @return [\code{\link{Measure}}] with changed aggregation behaviour. +#' @family performance +#' @export +setAggregation = function(measure, aggr) { + assertClass(measure, classes = "Measure") + assertClass(aggr, classes = "Aggregation") + measure$aggr = aggr + return(measure) +} diff --git a/R/RLearner_surv_cforest.R b/R/RLearner_surv_cforest.R index f833512612..65ae59567f 100644 --- a/R/RLearner_surv_cforest.R +++ b/R/RLearner_surv_cforest.R @@ -53,7 +53,8 @@ trainLearner.surv.cforest = function(.learner, .task, .subset, #' @export predictLearner.surv.cforest = function(.learner, .model, .newdata, ...) { - predict(.model$learner.model, newdata = .newdata, ...) + # cforest returns median survival times; multiply by -1 so that high values correspond to high risk + -1 * predict(.model$learner.model, newdata = .newdata, type = "response", ...) } #' @export diff --git a/R/RLearner_surv_coxph.R b/R/RLearner_surv_coxph.R index 94bb45f47d..fea4bb79cf 100644 --- a/R/RLearner_surv_coxph.R +++ b/R/RLearner_surv_coxph.R @@ -27,23 +27,13 @@ trainLearner.surv.coxph = function(.learner, .task, .subset, .weights = NULL, . f = getTaskFormula(.task) data = getTaskData(.task, subset = .subset) if (is.null(.weights)) { - mod = survival::coxph(formula = f, data = data, ...) + survival::coxph(formula = f, data = data, ...) } else { - mod = survival::coxph(formula = f, data = data, weights = .weights, ...) + survival::coxph(formula = f, data = data, weights = .weights, ...) } - #if (.learner$predict.type == "prob") - # mod = attachTrainingInfo(mod, list(surv.range = range(getTaskTargets(.task)[, 1L]))) - mod } #' @export predictLearner.surv.coxph = function(.learner, .model, .newdata, ...) { - if (.learner$predict.type == "response") { - predict(.model$learner.model, newdata = .newdata, type = "lp", ...) - } - # else if (.learner$predict.type == "prob") { - # surv.range = getTrainingInfo(.model$learner.model)$surv.range - # times = seq(from = surv.range[1L], to = surv.range[2L], length.out = 1000) - # t(summary(survival::survfit(.model$learner.model, newdata = .newdata, se.fit = FALSE, conf.int = FALSE), times = times)$surv) - # } + predict(.model$learner.model, newdata = .newdata, type = "lp", ...) } diff --git a/R/RLearner_surv_gamboost.R b/R/RLearner_surv_gamboost.R index 5f8d004a71..85a77d6dba 100644 --- a/R/RLearner_surv_gamboost.R +++ b/R/RLearner_surv_gamboost.R @@ -52,8 +52,5 @@ trainLearner.surv.gamboost = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.surv.gamboost = function(.learner, .model, .newdata, ...) { - if (.learner$predict.type == "response") - predict(.model$learner.model, newdata = .newdata, type = "link") - else - stop("Unknown predict type") + predict(.model$learner.model, newdata = .newdata, type = "link") } diff --git a/R/RLearner_surv_glmboost.R b/R/RLearner_surv_glmboost.R index d8b1ad9ff1..3e8e8f7d66 100644 --- a/R/RLearner_surv_glmboost.R +++ b/R/RLearner_surv_glmboost.R @@ -66,8 +66,5 @@ predictLearner.surv.glmboost = function(.learner, .model, .newdata, use.formula, info = getTrainingInfo(.model) .newdata = as.matrix(fixDataForLearner(.newdata, info)) } - if (.learner$predict.type == "response") - predict(.model$learner.model, newdata = .newdata, type = "link") - else - stop("Unknown predict type") + predict(.model$learner.model, newdata = .newdata, type = "link") } diff --git a/R/RLearner_surv_penalized.R b/R/RLearner_surv_penalized.R deleted file mode 100644 index ed1783bd30..0000000000 --- a/R/RLearner_surv_penalized.R +++ /dev/null @@ -1,40 +0,0 @@ -#' @export -makeRLearner.surv.penalized = function() { - makeRLearnerSurv( - cl = "surv.penalized", - package = "!penalized", - par.set = makeParamSet( - makeNumericLearnerParam(id = "lambda1", default = 0, lower = 0), - makeNumericLearnerParam(id = "lambda2", default = 0, lower = 0), - makeLogicalLearnerParam(id = "fusedl", default = FALSE), - makeUntypedLearnerParam(id = "unpenalized", tunable = FALSE), - makeLogicalVectorLearnerParam(id = "positive", default = FALSE), - makeNumericVectorLearnerParam(id = "startbeta"), - makeNumericVectorLearnerParam(id = "startgamma"), - # untyped here because one can also pass "Park" to steps - makeUntypedLearnerParam(id = "steps", default = 1L, tunable = FALSE), - makeNumericLearnerParam(id = "epsilon", lower = 0, default = 1e-10), - makeIntegerLearnerParam(id = "maxiter", lower = 1L), - makeLogicalLearnerParam(id = "standardize", default = FALSE), - makeLogicalLearnerParam(id = "trace", default = TRUE, tunable = FALSE) - ), - par.vals = list(), - properties = c("numerics", "factors", "ordered", "rcens"), - name = "Penalized Cox Regression", - short.name = "penalized", - note = "trace=FALSE was set by default to disable logging output.", - callees = "penalized" - ) -} - -#' @export -trainLearner.surv.penalized = function(.learner, .task, .subset, .weights = NULL, ...) { - f = getTaskFormula(.task) - penalized::penalized(f, data = getTaskData(.task, subset = .subset), model = "cox", ...) -} - -#' @export -predictLearner.surv.penalized = function(.learner, .model, .newdata, ...) { - # Note: this is a rather ugly hack but should work according to Jelle - penalized::survival(penalized::predict(.model$learner.model, data = .newdata), Inf) -} diff --git a/R/RLearner_surv_rpart.R b/R/RLearner_surv_rpart.R index 14eeafe040..03a36d4235 100644 --- a/R/RLearner_surv_rpart.R +++ b/R/RLearner_surv_rpart.R @@ -39,11 +39,7 @@ trainLearner.surv.rpart = function(.learner, .task, .subset, .weights = NULL, .. #' @export predictLearner.surv.rpart = function(.learner, .model, .newdata, ...) { - if (.learner$predict.type == "response") { - predict(.model$learner.model, newdata = .newdata, type = "vector", ...) - } else { - stop("Unsupported predict type") - } + predict(.model$learner.model, newdata = .newdata, type = "vector", ...) } #' @export diff --git a/R/measures.R b/R/measures.R index e7ce586ee8..f470d0d657 100644 --- a/R/measures.R +++ b/R/measures.R @@ -17,6 +17,9 @@ #' For clustering measures, we compact the predicted cluster IDs such that they form a continuous series #' starting with 1. If this is not the case, some of the measures will generate warnings. #' +#' Some measure have parameters. Their defaults are set in the constructor \code{\link{makeMeasure}} and can be +#' overwritten using \code{\link{setMeasurePars}}. +#' #' @param truth [\code{factor}]\cr #' Vector of the true class. #' @param response [\code{factor}]\cr @@ -1337,19 +1340,57 @@ measureMultilabelTPR = function(truth, response) { #' @format none cindex = makeMeasure(id = "cindex", minimize = FALSE, best = 1, worst = 0, properties = c("surv", "req.pred", "req.truth"), - name = "Concordance index", + name = "Harrell's Concordance index", note = "Fraction of all pairs of subjects whose predicted survival times are correctly ordered among all subjects that can actually be ordered. In other words, it is the probability of concordance between the predicted and the observed survival.", fun = function(task, model, pred, feats, extra.args) { - requirePackages("Hmisc", default.method = "load") - resp = pred$data$response - if (anyMissing(resp)) + requirePackages("_Hmisc") + y = getPredictionResponse(pred) + if (anyMissing(y)) return(NA_real_) - # FIXME: we need to convert to he correct survival type - s = Surv(pred$data$truth.time, pred$data$truth.event) - Hmisc::rcorr.cens(-1 * resp, s)[["C Index"]] + s = getPredictionTruth(pred) + Hmisc::rcorr.cens(-1 * y, s)[["C Index"]] } ) +#' @export cindex.uno +#' @rdname measures +#' @format none +cindex.uno = makeMeasure(id = "cindex.uno", minimize = FALSE, best = 1, worst = 0, + properties = c("surv", "req.pred", "req.truth", "req.model"), + name = "Uno's Concordance index", + note = "Fraction of all pairs of subjects whose predicted survival times are correctly ordered among all subjects that can actually be ordered. In other words, it is the probability of concordance between the predicted and the observed survival. Corrected by weighting with IPCW as suggested by Uno.", + fun = function(task, model, pred, feats, extra.args) { + requirePackages("_survAUC") + y = getPredictionResponse(pred) + if (anyMissing(y)) + return(NA_real_) + surv.train = getTaskTargets(task, recode.target = "rcens")[model$subset] + max.time = assertNumber(extra.args$max.time, null.ok = TRUE) %??% max(getTaskTargets(task)[, 1L]) + survAUC::UnoC(Surv.rsp = surv.train, Surv.rsp.new = getPredictionTruth(pred), time = max.time, lpnew = y) + }, + extra.args = list(max.time = NULL) +) + +#' @export iauc.uno +#' @rdname measures +#' @format none +iauc.uno = makeMeasure(id = "iauc.uno", minimize = FALSE, best = 1, worst = 0, + properties = c("surv", "req.pred", "req.truth", "req.model", "req.task"), + name = "Uno's estimator of cumulative AUC for right censored time-to-event data", + note = "To set an upper time limit, set argument max.time (defaults to max time in complete task).", + fun = function(task, model, pred, feats, extra.args) { + requirePackages("_survAUC") + max.time = assertNumber(extra.args$max.time, null.ok = TRUE) %??% max(getTaskTargets(task)[, 1L]) + times = seq(from = 0, to = max.time, length.out = extra.args$resolution) + surv.train = getTaskTargets(task, recode.target = "rcens")[model$subset] + y = getPredictionResponse(pred) + if (anyMissing(y)) + return(NA_real_) + survAUC::AUC.uno(Surv.rsp = surv.train, Surv.rsp.new = getPredictionTruth(pred), times = times, lpnew = y)$iauc + }, + extra.args = list(max.time = NULL, resolution = 1000) +) + ############################################################################### ### cost-sensitive ### ############################################################################### diff --git a/man/ConfusionMatrix.Rd b/man/ConfusionMatrix.Rd index bb8f8615de..eae1430602 100644 --- a/man/ConfusionMatrix.Rd +++ b/man/ConfusionMatrix.Rd @@ -25,5 +25,6 @@ Other performance: \code{\link{calculateConfusionMatrix}}, \code{\link{makeCostMeasure}}, \code{\link{makeCustomResampledMeasure}}, \code{\link{makeMeasure}}, \code{\link{measures}}, - \code{\link{performance}} + \code{\link{performance}}, \code{\link{setAggregation}}, + \code{\link{setMeasurePars}} } diff --git a/man/calculateConfusionMatrix.Rd b/man/calculateConfusionMatrix.Rd index 9c1c86cc84..836b73e734 100644 --- a/man/calculateConfusionMatrix.Rd +++ b/man/calculateConfusionMatrix.Rd @@ -83,5 +83,6 @@ Other performance: \code{\link{ConfusionMatrix}}, \code{\link{makeCostMeasure}}, \code{\link{makeCustomResampledMeasure}}, \code{\link{makeMeasure}}, \code{\link{measures}}, - \code{\link{performance}} + \code{\link{performance}}, \code{\link{setAggregation}}, + \code{\link{setMeasurePars}} } diff --git a/man/calculateROCMeasures.Rd b/man/calculateROCMeasures.Rd index bfb9bf1759..4a8e492bd0 100644 --- a/man/calculateROCMeasures.Rd +++ b/man/calculateROCMeasures.Rd @@ -77,5 +77,6 @@ Other performance: \code{\link{ConfusionMatrix}}, \code{\link{makeCostMeasure}}, \code{\link{makeCustomResampledMeasure}}, \code{\link{makeMeasure}}, \code{\link{measures}}, - \code{\link{performance}} + \code{\link{performance}}, \code{\link{setAggregation}}, + \code{\link{setMeasurePars}} } diff --git a/man/estimateRelativeOverfitting.Rd b/man/estimateRelativeOverfitting.Rd index e6ac6fcc42..950a67e71b 100644 --- a/man/estimateRelativeOverfitting.Rd +++ b/man/estimateRelativeOverfitting.Rd @@ -51,5 +51,6 @@ Other performance: \code{\link{ConfusionMatrix}}, \code{\link{makeCostMeasure}}, \code{\link{makeCustomResampledMeasure}}, \code{\link{makeMeasure}}, \code{\link{measures}}, - \code{\link{performance}} + \code{\link{performance}}, \code{\link{setAggregation}}, + \code{\link{setMeasurePars}} } diff --git a/man/makeCostMeasure.Rd b/man/makeCostMeasure.Rd index 1a46b16310..80d1ba5852 100644 --- a/man/makeCostMeasure.Rd +++ b/man/makeCostMeasure.Rd @@ -54,5 +54,6 @@ Other performance: \code{\link{ConfusionMatrix}}, \code{\link{estimateRelativeOverfitting}}, \code{\link{makeCustomResampledMeasure}}, \code{\link{makeMeasure}}, \code{\link{measures}}, - \code{\link{performance}} + \code{\link{performance}}, \code{\link{setAggregation}}, + \code{\link{setMeasurePars}} } diff --git a/man/makeCustomResampledMeasure.Rd b/man/makeCustomResampledMeasure.Rd index 55d3df3511..cb5477d264 100644 --- a/man/makeCustomResampledMeasure.Rd +++ b/man/makeCustomResampledMeasure.Rd @@ -77,5 +77,7 @@ Other performance: \code{\link{ConfusionMatrix}}, \code{\link{calculateROCMeasures}}, \code{\link{estimateRelativeOverfitting}}, \code{\link{makeCostMeasure}}, \code{\link{makeMeasure}}, - \code{\link{measures}}, \code{\link{performance}} + \code{\link{measures}}, \code{\link{performance}}, + \code{\link{setAggregation}}, + \code{\link{setMeasurePars}} } diff --git a/man/makeMeasure.Rd b/man/makeMeasure.Rd index 1943a65104..0ede4c85a1 100644 --- a/man/makeMeasure.Rd +++ b/man/makeMeasure.Rd @@ -32,7 +32,7 @@ Set of measure properties. Some standard property names include: \item{req.task}{Is task object required in calculation? Usually not the case} \item{req.model}{Is model object required in calculation? Usually not the case.} \item{req.feats}{Are feature values required in calculation? Usually not the case.} - \item{req.prob}{Are predicted probabilites required in calculation? Usually not the case, example would be AUC.} + \item{req.prob}{Are predicted probabilities required in calculation? Usually not the case, example would be AUC.} } Default is \code{character(0)}.} @@ -54,6 +54,7 @@ object \code{pred}. \item{extra.args}{[\code{list}]\cr List of extra arguments which will always be passed to \code{fun}. +Can be changed after construction via \code{\link{setMeasurePars}}<`3`>. Default is empty list.} \item{aggr}{[\code{\link{Aggregation}}]\cr @@ -113,5 +114,7 @@ Other performance: \code{\link{ConfusionMatrix}}, \code{\link{estimateRelativeOverfitting}}, \code{\link{makeCostMeasure}}, \code{\link{makeCustomResampledMeasure}}, - \code{\link{measures}}, \code{\link{performance}} + \code{\link{measures}}, \code{\link{performance}}, + \code{\link{setAggregation}}, + \code{\link{setMeasurePars}} } diff --git a/man/measures.Rd b/man/measures.Rd index 17ea9d1f05..f632757694 100644 --- a/man/measures.Rd +++ b/man/measures.Rd @@ -116,6 +116,8 @@ \alias{multilabel.tpr} \alias{measureMultilabelTPR} \alias{cindex} +\alias{cindex.uno} +\alias{iauc.uno} \alias{meancosts} \alias{mcp} \alias{db} @@ -352,6 +354,10 @@ measureMultilabelTPR(truth, response) cindex +cindex.uno + +iauc.uno + meancosts mcp @@ -399,6 +405,9 @@ Most measures can directly be accessed via the function named after the scheme m For clustering measures, we compact the predicted cluster IDs such that they form a continuous series starting with 1. If this is not the case, some of the measures will generate warnings. + +Some measure have parameters. Their defaults are set in the constructor \code{\link{makeMeasure}} and can be +overwritten using \code{\link{setMeasurePars}}. } \references{ He, H. & Garcia, E. A. (2009) @@ -412,6 +421,8 @@ Other performance: \code{\link{ConfusionMatrix}}, \code{\link{estimateRelativeOverfitting}}, \code{\link{makeCostMeasure}}, \code{\link{makeCustomResampledMeasure}}, - \code{\link{makeMeasure}}, \code{\link{performance}} + \code{\link{makeMeasure}}, \code{\link{performance}}, + \code{\link{setAggregation}}, + \code{\link{setMeasurePars}} } \keyword{datasets} diff --git a/man/performance.Rd b/man/performance.Rd index fff34860cf..8425549d42 100644 --- a/man/performance.Rd +++ b/man/performance.Rd @@ -52,5 +52,7 @@ Other performance: \code{\link{ConfusionMatrix}}, \code{\link{estimateRelativeOverfitting}}, \code{\link{makeCostMeasure}}, \code{\link{makeCustomResampledMeasure}}, - \code{\link{makeMeasure}}, \code{\link{measures}} + \code{\link{makeMeasure}}, \code{\link{measures}}, + \code{\link{setAggregation}}, + \code{\link{setMeasurePars}} } diff --git a/man/setAggregation.Rd b/man/setAggregation.Rd index 97d479901b..653794b366 100644 --- a/man/setAggregation.Rd +++ b/man/setAggregation.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Measure.R +% Please edit documentation in R/Measure_operators.R \name{setAggregation} \alias{setAggregation} \title{Set aggregation function of measure.} @@ -20,3 +20,13 @@ Aggregation function.} Set how this measure will be aggregated after resampling. To see possible aggregation functions: \code{\link{aggregations}}. } +\seealso{ +Other performance: \code{\link{ConfusionMatrix}}, + \code{\link{calculateConfusionMatrix}}, + \code{\link{calculateROCMeasures}}, + \code{\link{estimateRelativeOverfitting}}, + \code{\link{makeCostMeasure}}, + \code{\link{makeCustomResampledMeasure}}, + \code{\link{makeMeasure}}, \code{\link{measures}}, + \code{\link{performance}}, \code{\link{setMeasurePars}} +} diff --git a/man/setMeasurePars.Rd b/man/setMeasurePars.Rd new file mode 100644 index 0000000000..5dc2e54b57 --- /dev/null +++ b/man/setMeasurePars.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Measure_operators.R +\name{setMeasurePars} +\alias{setMeasurePars} +\title{Set parameters of performance measures} +\usage{ +setMeasurePars(measure, ..., par.vals = list()) +} +\arguments{ +\item{measure}{[\code{\link{Measure}}]\cr +Performance measure.} + +\item{...}{[any]\cr +Named (hyper)parameters with new settings. Alternatively these can be passed +using the \code{par.vals} argument.} + +\item{par.vals}{[\code{list}]\cr +Optional list of named (hyper)parameter settings. The arguments in +\code{...} take precedence over values in this list.} +} +\value{ +[\code{\link{Measure}}]. +} +\description{ +Sets hyperparameters of measures. +} +\seealso{ +Other performance: \code{\link{ConfusionMatrix}}, + \code{\link{calculateConfusionMatrix}}, + \code{\link{calculateROCMeasures}}, + \code{\link{estimateRelativeOverfitting}}, + \code{\link{makeCostMeasure}}, + \code{\link{makeCustomResampledMeasure}}, + \code{\link{makeMeasure}}, \code{\link{measures}}, + \code{\link{performance}}, \code{\link{setAggregation}} +} diff --git a/tests/testthat/helper_objects.R b/tests/testthat/helper_objects.R index c2db493401..70f206b769 100644 --- a/tests/testthat/helper_objects.R +++ b/tests/testthat/helper_objects.R @@ -92,6 +92,13 @@ getSurvData = function(n = 100, p = 10) { cens.time = rexp(n, rate = 1 / 10) status = ifelse(real.time <= cens.time, TRUE, FALSE) obs.time = ifelse(real.time <= cens.time, real.time, cens.time) + 1 + + # mark large outliers in survival as censored + q = quantile(obs.time, .90) + i = which(obs.time > q) + obs.time[i] = q + cens.time[i] = FALSE + return(cbind(data.frame(time = obs.time, status = status), x)) } surv.df = getSurvData() diff --git a/tests/testthat/test_base_measures.R b/tests/testthat/test_base_measures.R index 57dbd349c7..b8441125a6 100644 --- a/tests/testthat/test_base_measures.R +++ b/tests/testthat/test_base_measures.R @@ -16,6 +16,7 @@ test_that("measures", { mod = train(lrn, task = ct, subset = binaryclass.train.inds) pred = predict(mod, task = ct, subset = binaryclass.test.inds) perf = performance(pred, measures = ms) + expect_numeric(perf, any.missing = FALSE, len = length(ms)) rdesc = makeResampleDesc("Holdout", split = 0.2) r = resample(lrn, ct, rdesc, measures = ms) @@ -32,7 +33,7 @@ test_that("measures", { mod = train(lrn, task = regr.task, subset = regr.train.inds) pred = predict(mod, task = regr.task, subset = regr.test.inds) perf = performance(pred, measures = ms, model = mod) - expect_is(perf, "numeric") + expect_numeric(perf, any.missing = FALSE, len = length(ms)) # Test multiclass auc lrn = makeLearner("classif.randomForest", predict.type = "prob") @@ -40,15 +41,32 @@ test_that("measures", { pred = predict(mod, task = multiclass.task, subset = multiclass.test.inds) perf = performance(pred, measures = list(multiclass.aunu, multiclass.aunp, multiclass.au1u, multiclass.au1p)) - expect_is(perf, "numeric") + expect_numeric(perf, any.missing = FALSE) # test survival measure - ms = list(cindex) - lrn = makeLearner("surv.coxph") - mod = train(lrn, task = surv.task, subset = surv.train.inds) - pred = predict(mod, task = surv.task, subset = surv.test.inds) - perf = performance(pred, measures = ms) - expect_is(perf, "numeric") + ms = list(cindex, cindex.uno, iauc.uno) + learners = c("surv.rpart", "surv.coxph") + + for (lrn in learners) { + mod = suppressWarnings(train(lrn, task = surv.task, subset = surv.train.inds)) + pred = predict(mod, task = surv.task, subset = surv.test.inds) + perf = performance(pred, model = mod, task = surv.task, measures = ms) + Map(function(measure, perf) { + r = range(measure$worst, measure$best) + expect_number(perf, lower = r[1], upper = r[2], label = measure$id) + }, measure = ms, perf = perf) + } + + task = lung.task + rin = makeResampleInstance("Holdout", task = task) + for (lrn in learners) { + res = resample(lrn, task, resampling = rin, measures = ms)$aggr + expect_numeric(res, any.missing = FALSE) + Map(function(measure) { + r = range(measure$worst, measure$best) + expect_number(res[[sprintf("%s.test.mean", measure$id)]], lower = r[1], upper = r[2], label = measure$id) + }, measure = ms) + } }) test_that("classif measures do not produce integer overflow", { @@ -898,3 +916,25 @@ test_that("measures MCC denominator 0 (#1736)", { res = measureMCC(c(TRUE, TRUE, TRUE), c(TRUE, TRUE, TRUE), TRUE, FALSE) expect_equal(res, 0) }) + +test_that("setMeasurePars", { + mm = mmce + expect_list(mm$extra.args, len = 0L, names = "named") + mm = setMeasurePars(mm, foo = 1, bar = 2) + expect_list(mm$extra.args, len = 2L, names = "named") + expect_equal(mm$extra.args, list(foo = 1, bar = 2)) + expect_list(mmce$extra.args, len = 0L, names = "named") # mmce is untouched? + + mm = setMeasurePars(mmce, foo = 1, bar = 2, par.vals = list(foobar = 99)) + expect_equal(mm$extra.args, list(foobar = 99, foo = 1, bar = 2)) + + # re-setting parameters to NULL + mm = setMeasurePars(mmce, foo = 1, bar = 2) + expect_list(mm$extra.args, len = 2L, names = "named") + mm = setMeasurePars(mm, foo = NULL, bar = 2) + expect_equal(mm$extra.args, list(foo = NULL, bar = 2)) + + # precedence of ... over par.vals + mm = setMeasurePars(mmce, foo = 1, par.vals = list(foo = 2)) + expect_equal(mm$extra.args, list(foo = 1)) +}) diff --git a/tests/testthat/test_surv_measures.R b/tests/testthat/test_surv_measures.R new file mode 100644 index 0000000000..4ce0d6254d --- /dev/null +++ b/tests/testthat/test_surv_measures.R @@ -0,0 +1,25 @@ +context("survival measures") + +test_that("survival measures do not do stupid things", { + set.seed(1) + n = 100 + time = sort(rexp(n, 0.1)) + 1 + status = sample(0:1, n, replace = TRUE, prob = c(1, 10)) + data = data.frame(time = time, status = status, x1 = time + rnorm(n, sd = 0.1), x2 = runif(n)) + task = makeSurvTask(id = "dummy", data = data, target = c("time", "status")) + + ms = list(cindex, cindex.uno, iauc.uno) + learners = c("surv.coxph", "surv.CoxBoost", "surv.rpart", "surv.ranger", "surv.cvglmnet", "surv.glmnet", "surv.gamboost", "surv.glmboost", "surv.randomForestSRC", "surv.cforest") + learners = lapply(learners, makeLearner) + + for (lrn in learners) { + res = suppressWarnings(resample(lrn, task, resampling = hout, measures = ms, models = FALSE, keep.pred = FALSE)) + aggr = res$aggr + for (measure in ms) { + r = range(measure$worst, measure$best) + x = aggr[[sprintf("%s.test.mean", measure$id)]] + expect_number(x, lower = r[1], upper = r[2], label = sprintf("%s/%s", lrn$id, measure$id)) + expect_true(abs(x - measure$worst) > abs(x - measure$best), label = sprintf("%s/%s", lrn$id, measure$id)) + } + } +}) diff --git a/tests/testthat/test_surv_penalized.R b/tests/testthat/test_surv_penalized.R deleted file mode 100644 index 21c8246cdd..0000000000 --- a/tests/testthat/test_surv_penalized.R +++ /dev/null @@ -1,26 +0,0 @@ -context("surv_penalized") - -test_that("surv_penalized", { - requirePackages("survival", default.method = "load") - requirePackages("penalized", default.method = "load") - parset.list = list( - list(maxiter = 100), - list(lambda1 = 2, lambda2 = 1), - list(lambda1 = 1, lambda2 = 1), - list(fusedl = TRUE, lambda1 = 2, lambda2 = 1, maxiter = 5L), - list(fusedl = TRUE, lambda1 = 1, lambda2 = 1, maxiter = 10L) - ) - - old.predicts.list = list() - - for (i in seq_along(parset.list)) { - pars = c(list(response = surv.formula, data = surv.train, - model = "cox"), parset.list[[i]]) - set.seed(getOption("mlr.debug.seed")) - m = do.call(penalized::penalized, pars) - p = penalized::survival(penalized::predict(m, data = surv.test), Inf) - old.predicts.list[[i]] = p - } - testSimpleParsets("surv.penalized", surv.df, surv.target, - surv.train.inds, old.predicts.list, parset.list) -}) From 8be0fd34bc3cb930ee8e6ff0c056aaac1f6406ca Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Fri, 9 Jun 2017 13:44:34 +0200 Subject: [PATCH 02/15] removed timeROC from Suggests --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 25d5dd9d02..4945fc9200 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -154,7 +154,6 @@ Suggests: SwarmSVM, svglite, testthat, - timeROC, tgp, TH.data, xgboost (>= 0.6-2), From 49f286773c9a14cf3991a184be51d7ebe4ab654f Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Fri, 9 Jun 2017 13:58:58 +0200 Subject: [PATCH 03/15] improve test --- tests/testthat/test_surv_measures.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test_surv_measures.R b/tests/testthat/test_surv_measures.R index 4ce0d6254d..33097a3f4e 100644 --- a/tests/testthat/test_surv_measures.R +++ b/tests/testthat/test_surv_measures.R @@ -19,7 +19,8 @@ test_that("survival measures do not do stupid things", { r = range(measure$worst, measure$best) x = aggr[[sprintf("%s.test.mean", measure$id)]] expect_number(x, lower = r[1], upper = r[2], label = sprintf("%s/%s", lrn$id, measure$id)) - expect_true(abs(x - measure$worst) > abs(x - measure$best), label = sprintf("%s/%s", lrn$id, measure$id)) + if (!anyInfinte(r)) + expect_true(abs(x - measure$worst) >= abs(x - measure$best), label = sprintf("%s/%s", lrn$id, measure$id)) } } }) From 4396ecfde4b6c0e3129f627cc8d8590058166c85 Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Fri, 9 Jun 2017 14:08:11 +0200 Subject: [PATCH 04/15] fixed test --- tests/testthat/test_surv_measures.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test_surv_measures.R b/tests/testthat/test_surv_measures.R index 33097a3f4e..bd52fdf257 100644 --- a/tests/testthat/test_surv_measures.R +++ b/tests/testthat/test_surv_measures.R @@ -9,7 +9,7 @@ test_that("survival measures do not do stupid things", { task = makeSurvTask(id = "dummy", data = data, target = c("time", "status")) ms = list(cindex, cindex.uno, iauc.uno) - learners = c("surv.coxph", "surv.CoxBoost", "surv.rpart", "surv.ranger", "surv.cvglmnet", "surv.glmnet", "surv.gamboost", "surv.glmboost", "surv.randomForestSRC", "surv.cforest") + learners = listLearners("surv", warn.missing.packages = FALSE)$class learners = lapply(learners, makeLearner) for (lrn in learners) { @@ -19,7 +19,7 @@ test_that("survival measures do not do stupid things", { r = range(measure$worst, measure$best) x = aggr[[sprintf("%s.test.mean", measure$id)]] expect_number(x, lower = r[1], upper = r[2], label = sprintf("%s/%s", lrn$id, measure$id)) - if (!anyInfinte(r)) + if (!anyInfinite(r)) expect_true(abs(x - measure$worst) >= abs(x - measure$best), label = sprintf("%s/%s", lrn$id, measure$id)) } } From fea0216c40ee8ed14e2b848e5db39f219a0e7711 Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Mon, 12 Jun 2017 09:58:12 +0200 Subject: [PATCH 05/15] added reference for uno --- R/measures.R | 4 ++++ man/measures.Rd | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/R/measures.R b/R/measures.R index f470d0d657..3264882d00 100644 --- a/R/measures.R +++ b/R/measures.R @@ -1355,6 +1355,10 @@ cindex = makeMeasure(id = "cindex", minimize = FALSE, best = 1, worst = 0, #' @export cindex.uno #' @rdname measures #' @format none +#' @references +#' Uno H, Cai T, Pencina MJ, D'Agostino RB, Wei LJ. +#' \emph{On the C-statistics for Evaluating Overall Adequacy of Risk Prediction Procedures with Censored Survival Data} +#' Statistics in medicine. 2011;30(10):1105-1117. \url{http://dx.doi.org/10.1002/sim.4154}. cindex.uno = makeMeasure(id = "cindex.uno", minimize = FALSE, best = 1, worst = 0, properties = c("surv", "req.pred", "req.truth", "req.model"), name = "Uno's Concordance index", diff --git a/man/measures.Rd b/man/measures.Rd index f632757694..19847ecc08 100644 --- a/man/measures.Rd +++ b/man/measures.Rd @@ -413,6 +413,10 @@ overwritten using \code{\link{setMeasurePars}}. He, H. & Garcia, E. A. (2009) \emph{Learning from Imbalanced Data.} IEEE Transactions on Knowledge and Data Engineering, vol. 21, no. 9. pp. 1263-1284. + +Uno H, Cai T, Pencina MJ, D'Agostino RB, Wei LJ. +\emph{On the C-statistics for Evaluating Overall Adequacy of Risk Prediction Procedures with Censored Survival Data} +Statistics in medicine. 2011;30(10):1105-1117. \url{http://dx.doi.org/10.1002/sim.4154}. } \seealso{ Other performance: \code{\link{ConfusionMatrix}}, From 154dd02ac59a58968ff475bd28c8370ae6072564 Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Mon, 12 Jun 2017 10:01:47 +0200 Subject: [PATCH 06/15] ref for iauc.uno --- R/measures.R | 10 +++++++--- man/measures.Rd | 6 +++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/R/measures.R b/R/measures.R index 3264882d00..a6a25e9cbb 100644 --- a/R/measures.R +++ b/R/measures.R @@ -1356,13 +1356,13 @@ cindex = makeMeasure(id = "cindex", minimize = FALSE, best = 1, worst = 0, #' @rdname measures #' @format none #' @references -#' Uno H, Cai T, Pencina MJ, D'Agostino RB, Wei LJ. +#' H. Uno et al. #' \emph{On the C-statistics for Evaluating Overall Adequacy of Risk Prediction Procedures with Censored Survival Data} #' Statistics in medicine. 2011;30(10):1105-1117. \url{http://dx.doi.org/10.1002/sim.4154}. cindex.uno = makeMeasure(id = "cindex.uno", minimize = FALSE, best = 1, worst = 0, properties = c("surv", "req.pred", "req.truth", "req.model"), name = "Uno's Concordance index", - note = "Fraction of all pairs of subjects whose predicted survival times are correctly ordered among all subjects that can actually be ordered. In other words, it is the probability of concordance between the predicted and the observed survival. Corrected by weighting with IPCW as suggested by Uno.", + note = "Fraction of all pairs of subjects whose predicted survival times are correctly ordered among all subjects that can actually be ordered. In other words, it is the probability of concordance between the predicted and the observed survival. Corrected by weighting with IPCW as suggested by Uno. Implemented in survAUC::UnoC.", fun = function(task, model, pred, feats, extra.args) { requirePackages("_survAUC") y = getPredictionResponse(pred) @@ -1378,10 +1378,14 @@ cindex.uno = makeMeasure(id = "cindex.uno", minimize = FALSE, best = 1, worst = #' @export iauc.uno #' @rdname measures #' @format none +#' @references +#' H. Uno et al. +#' \emph{Evaluating Prediction Rules for T-Year Survivors with Censored Regression Models} +#' Journal of the American Statistical Association 102, no. 478 (2007): 527-37. \url{http://www.jstor.org/stable/27639883}. iauc.uno = makeMeasure(id = "iauc.uno", minimize = FALSE, best = 1, worst = 0, properties = c("surv", "req.pred", "req.truth", "req.model", "req.task"), name = "Uno's estimator of cumulative AUC for right censored time-to-event data", - note = "To set an upper time limit, set argument max.time (defaults to max time in complete task).", + note = "To set an upper time limit, set argument max.time (defaults to max time in complete task). Implemented in survAUC::AUC.uno.", fun = function(task, model, pred, feats, extra.args) { requirePackages("_survAUC") max.time = assertNumber(extra.args$max.time, null.ok = TRUE) %??% max(getTaskTargets(task)[, 1L]) diff --git a/man/measures.Rd b/man/measures.Rd index 19847ecc08..f458026b8d 100644 --- a/man/measures.Rd +++ b/man/measures.Rd @@ -414,9 +414,13 @@ He, H. & Garcia, E. A. (2009) \emph{Learning from Imbalanced Data.} IEEE Transactions on Knowledge and Data Engineering, vol. 21, no. 9. pp. 1263-1284. -Uno H, Cai T, Pencina MJ, D'Agostino RB, Wei LJ. +H. Uno et al. \emph{On the C-statistics for Evaluating Overall Adequacy of Risk Prediction Procedures with Censored Survival Data} Statistics in medicine. 2011;30(10):1105-1117. \url{http://dx.doi.org/10.1002/sim.4154}. + +H. Uno et al. +\emph{Evaluating Prediction Rules for T-Year Survivors with Censored Regression Models} +Journal of the American Statistical Association 102, no. 478 (2007): 527-37. \url{http://www.jstor.org/stable/27639883}. } \seealso{ Other performance: \code{\link{ConfusionMatrix}}, From b28d209892c94f80fe303ce4efeec6ec3467c35d Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Mon, 12 Jun 2017 10:19:38 +0200 Subject: [PATCH 07/15] added an additional test --- tests/testthat/test_surv_measures.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/testthat/test_surv_measures.R b/tests/testthat/test_surv_measures.R index bd52fdf257..f221921628 100644 --- a/tests/testthat/test_surv_measures.R +++ b/tests/testthat/test_surv_measures.R @@ -24,3 +24,20 @@ test_that("survival measures do not do stupid things", { } } }) + +test_that("setting measure pars works", { + mod = train("surv.rpart", wpbc.task) + pred = predict(mod, wpbc.task) + + measures = list(setMeasurePars(cindex.uno, max.time = 50), cindex.uno) + perf = performance(pred = pred, task = wpbc.task, model = mod, measures = measures) + expect_true(perf[1] < perf[2]) + + measures = list(setMeasurePars(iauc.uno, max.time = 50), iauc.uno) + perf = performance(pred = pred, task = wpbc.task, model = mod, measures = measures) + expect_true(perf[1] < perf[2]) + + measures = list(setMeasurePars(iauc.uno, resolution = 10), iauc.uno) + perf = performance(pred = pred, task = wpbc.task, model = mod, measures = measures) + expect_string(all.equal(perf[1], perf[2])) +}) From d3da8b163e29e2667d8f7f874c26c241d8ea796e Mon Sep 17 00:00:00 2001 From: PhilippPro Date: Mon, 12 Jun 2017 15:55:47 +0200 Subject: [PATCH 08/15] Make the help file of performance better. Maybe also add an example for survival in the example section? --- R/performance.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/performance.R b/R/performance.R index 61147b3243..cfe1a493d7 100644 --- a/R/performance.R +++ b/R/performance.R @@ -5,9 +5,9 @@ #' @template arg_pred #' @template arg_measures #' @param task [\code{\link{Task}}]\cr -#' Learning task, might be requested by performance measure, usually not needed except for clustering. +#' Learning task, might be requested by performance measure, usually not needed except for clustering or survival. #' @param model [\code{\link{WrappedModel}}]\cr -#' Model built on training data, might be requested by performance measure, usually not needed. +#' Model built on training data, might be requested by performance measure, usually not needed except for survival. #' @param feats [\code{data.frame}]\cr #' Features of predicted data, usually not needed except for clustering. #' If the prediction was generated from a \code{task}, you can also pass this instead and the features From d6ed18141babfb4ddee3e3a1230d251a79a83125 Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Tue, 13 Jun 2017 10:53:31 +0200 Subject: [PATCH 09/15] added test for surv measures --- tests/testthat/test_surv_measures.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/testthat/test_surv_measures.R b/tests/testthat/test_surv_measures.R index f221921628..24867742c5 100644 --- a/tests/testthat/test_surv_measures.R +++ b/tests/testthat/test_surv_measures.R @@ -41,3 +41,17 @@ test_that("setting measure pars works", { perf = performance(pred = pred, task = wpbc.task, model = mod, measures = measures) expect_string(all.equal(perf[1], perf[2])) }) + +test_that("hand constructed tests", { + n = 100 + time = sort(rexp(n, 0.1)) + 1 + data = data.frame(time = time, status = 1, x1 = order(time)) + task = makeSurvTask(id = "dummy", data = data, target = c("time", "status")) + mod = suppressWarnings(train("surv.coxph", task)) + + pred = predict(mod, task) + expect_numeric(-getPredictionResponse(pred), sorted = TRUE, any.missing = FALSE) # perfect predictor + + perf = performance(pred = pred, model = mod, task = task, measures = list(cindex, cindex.uno, iauc.uno)) + expect_equal(unname(perf), c(1, 1, 0.99)) +}) From 00631d3791969f6937c3f3c34d87b9f1e9c6cb28 Mon Sep 17 00:00:00 2001 From: Michel Lang Date: Wed, 14 Jun 2017 11:01:59 +0200 Subject: [PATCH 10/15] dropped support for lcens and icens --- R/Filter.R | 2 +- R/Prediction_operators.R | 3 +- R/RLearner_surv_CoxBoost.R | 4 +- R/RLearner_surv_cforest.R | 2 +- R/RLearner_surv_coxph.R | 2 +- R/RLearner_surv_cv.CoxBoost.R | 4 +- R/RLearner_surv_cvglmnet.R | 4 +- R/RLearner_surv_gamboost.R | 6 +- R/RLearner_surv_gbm.R | 2 +- R/RLearner_surv_glmboost.R | 8 +-- R/RLearner_surv_glmnet.R | 4 +- R/RLearner_surv_randomForestSRC.R | 2 +- R/RLearner_surv_ranger.R | 2 +- R/RLearner_surv_rpart.R | 3 +- R/SurvTask.R | 71 ++++++------------- R/TaskDesc.R | 4 -- R/Task_operators.R | 45 ++---------- R/checkLearnerBeforeTrain.R | 5 -- R/measures.R | 4 +- R/zzz.R | 2 +- man/Task.Rd | 11 +-- man/TaskDesc.Rd | 4 -- man/performance.Rd | 4 +- tests/testthat/test_base_Learner_properties.R | 3 +- tests/testthat/test_base_getTaskData.R | 2 +- tests/testthat/test_surv_cforest.R | 5 +- 26 files changed, 61 insertions(+), 147 deletions(-) diff --git a/R/Filter.R b/R/Filter.R index bc8f716a60..883f209406 100644 --- a/R/Filter.R +++ b/R/Filter.R @@ -129,7 +129,7 @@ makeFilter( supported.features = c("numerics", "ordered"), fun = function(task, nselect, ...) { if (inherits(task, "SurvTask")) { - data = getTaskData(task, target.extra = TRUE, recode.target = "rcens") + data = getTaskData(task, target.extra = TRUE, recode.target = "surv") data = cbind(..surv = data$target, data$data) target.ind = 1L } else { diff --git a/R/Prediction_operators.R b/R/Prediction_operators.R index 5775527b2f..2a771b7ddb 100644 --- a/R/Prediction_operators.R +++ b/R/Prediction_operators.R @@ -158,8 +158,7 @@ getPredictionTruth.PredictionCluster = function(pred) { #' @export getPredictionTruth.PredictionSurv = function(pred) { - lookup = setNames(c("left", "right", "interval2"), c("lcens", "rcens", "icens")) - Surv(pred$data$truth.time, pred$data$truth.event, type = lookup[pred$task.desc$censoring]) + Surv(pred$data$truth.time, pred$data$truth.event, type = "right") } #' @export diff --git a/R/RLearner_surv_CoxBoost.R b/R/RLearner_surv_CoxBoost.R index 9227280621..b0fa892bc4 100644 --- a/R/RLearner_surv_CoxBoost.R +++ b/R/RLearner_surv_CoxBoost.R @@ -14,7 +14,7 @@ makeRLearner.surv.CoxBoost = function() { makeLogicalLearnerParam(id = "trace", default = FALSE, tunable = FALSE) ), par.vals = list(return.score = FALSE), - properties = c("numerics", "factors", "ordered", "weights", "rcens"), + properties = c("numerics", "factors", "ordered", "weights"), name = "Cox Proportional Hazards Model with Componentwise Likelihood based Boosting", short.name = "coxboost", note = "Factors automatically get converted to dummy columns, ordered factors to integer.", @@ -24,7 +24,7 @@ makeRLearner.surv.CoxBoost = function() { #' @export trainLearner.surv.CoxBoost = function(.learner, .task, .subset, .weights = NULL, penalty = NULL, unpen.index = NULL, ...) { - data = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "rcens") + data = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "surv") info = getFixDataInfo(data$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) data$data = as.matrix(fixDataForLearner(data$data, info)) diff --git a/R/RLearner_surv_cforest.R b/R/RLearner_surv_cforest.R index 65ae59567f..887283b31c 100644 --- a/R/RLearner_surv_cforest.R +++ b/R/RLearner_surv_cforest.R @@ -22,7 +22,7 @@ makeRLearner.surv.cforest = function() { makeIntegerLearnerParam(id = "maxdepth", lower = 0L, default = 0L), makeLogicalLearnerParam(id = "savesplitstats", default = FALSE, tunable = FALSE) ), - properties = c("factors", "numerics", "ordered", "weights", "rcens", "missings", "featimp"), + properties = c("factors", "numerics", "ordered", "weights", "missings", "featimp"), par.vals = list(), name = "Random Forest based on Conditional Inference Trees", short.name = "crf", diff --git a/R/RLearner_surv_coxph.R b/R/RLearner_surv_coxph.R index fea4bb79cf..51bf9d3f76 100644 --- a/R/RLearner_surv_coxph.R +++ b/R/RLearner_surv_coxph.R @@ -15,7 +15,7 @@ makeRLearner.surv.coxph = function() { makeLogicalLearnerParam(id = "x", default = FALSE, tunable = FALSE), makeLogicalLearnerParam(id = "y", default = TRUE, tunable = FALSE) ), - properties = c("numerics", "factors", "weights", "rcens"), + properties = c("numerics", "factors", "weights"), name = "Cox Proportional Hazard Model", short.name = "coxph", callees = c("coxph", "coxph.control") diff --git a/R/RLearner_surv_cv.CoxBoost.R b/R/RLearner_surv_cv.CoxBoost.R index a86a6e13fe..66de066b45 100644 --- a/R/RLearner_surv_cv.CoxBoost.R +++ b/R/RLearner_surv_cv.CoxBoost.R @@ -17,7 +17,7 @@ makeRLearner.surv.cv.CoxBoost = function() { makeNumericLearnerParam(id = "stepsize.factor", default = 1, lower = 0), makeLogicalLearnerParam(id = "trace", default = FALSE, tunable = FALSE) ), - properties = c("numerics", "factors", "weights", "rcens"), + properties = c("numerics", "factors", "weights"), name = "Cox Proportional Hazards Model with Componentwise Likelihood based Boosting, tuned for the optimal number of boosting steps", short.name = "cv.CoxBoost", note = "Factors automatically get converted to dummy columns, ordered factors to integer.", @@ -27,7 +27,7 @@ makeRLearner.surv.cv.CoxBoost = function() { #' @export trainLearner.surv.cv.CoxBoost = function(.learner, .task, .subset, .weights = NULL, penalty = NULL, unpen.index = NULL, ...) { - data = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "rcens") + data = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "surv") info = getFixDataInfo(data$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) if (is.null(penalty)) diff --git a/R/RLearner_surv_cvglmnet.R b/R/RLearner_surv_cvglmnet.R index 2825d62899..7b6354f04c 100644 --- a/R/RLearner_surv_cvglmnet.R +++ b/R/RLearner_surv_cvglmnet.R @@ -32,7 +32,7 @@ makeRLearner.surv.cvglmnet = function() { makeIntegerLearnerParam(id = "mxit", default = 100, lower = 1), makeLogicalLearnerParam(id = "factory", default = FALSE) ), - properties = c("numerics", "factors", "ordered", "weights", "rcens"), + properties = c("numerics", "factors", "ordered", "weights"), name = "GLM with Regularization (Cross Validated Lambda)", short.name = "cvglmnet", note = "Factors automatically get converted to dummy columns, ordered factors to integer.", @@ -42,7 +42,7 @@ makeRLearner.surv.cvglmnet = function() { #' @export trainLearner.surv.cvglmnet = function(.learner, .task, .subset, .weights = NULL, ...) { - d = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "rcens") + d = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "surv") info = getFixDataInfo(d$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) args = c(list(x = as.matrix(fixDataForLearner(d$data, info)), y = d$target, family = "cox", parallel = FALSE), list(...)) rm(d) diff --git a/R/RLearner_surv_gamboost.R b/R/RLearner_surv_gamboost.R index 85a77d6dba..5172a55020 100644 --- a/R/RLearner_surv_gamboost.R +++ b/R/RLearner_surv_gamboost.R @@ -20,7 +20,7 @@ makeRLearner.surv.gamboost = function() { par.vals = list( family = "CoxPH" ), - properties = c("numerics", "factors", "ordered", "weights", "rcens"), + properties = c("numerics", "factors", "ordered", "weights"), name = "Gradient boosting with smooth components", short.name = "gamboost", note = "`family` has been set to `CoxPH()` by default.", @@ -42,11 +42,11 @@ trainLearner.surv.gamboost = function(.learner, .task, .subset, .weights = NULL, ) f = getTaskFormula(.task) - data = getTaskData(.task, subset = .subset, recode.target = "rcens") + data = getTaskData(.task, subset = .subset, recode.target = "surv") if (is.null(.weights)) { model = mboost::gamboost(f, data = data, control = ctrl, family = family, ...) } else { - model = mboost::gamboost(f, data = getTaskData(.task, subset = .subset, recode.target = "rcens"), control = ctrl, weights = .weights, family = family, ...) + model = mboost::gamboost(f, data = getTaskData(.task, subset = .subset, recode.target = "surv"), control = ctrl, weights = .weights, family = family, ...) } } diff --git a/R/RLearner_surv_gbm.R b/R/RLearner_surv_gbm.R index 9e83a52da8..41d9fb4c24 100644 --- a/R/RLearner_surv_gbm.R +++ b/R/RLearner_surv_gbm.R @@ -15,7 +15,7 @@ makeRLearner.surv.gbm = function() { makeLogicalLearnerParam(id = "keep.data", default = TRUE, tunable = FALSE), makeLogicalLearnerParam(id = "verbose", default = FALSE, tunable = FALSE) ), - properties = c("missings", "numerics", "factors", "rcens", "prob", "weights", "featimp"), + properties = c("missings", "numerics", "factors", "weights", "featimp"), par.vals = list(keep.data = FALSE), name = "Gradient Boosting Machine", short.name = "gbm", diff --git a/R/RLearner_surv_glmboost.R b/R/RLearner_surv_glmboost.R index 3e8e8f7d66..e73ae537da 100644 --- a/R/RLearner_surv_glmboost.R +++ b/R/RLearner_surv_glmboost.R @@ -20,7 +20,7 @@ makeRLearner.surv.glmboost = function() { family = "CoxPH", use.formula = TRUE ), - properties = c("numerics", "factors", "ordered", "weights", "rcens"), + properties = c("numerics", "factors", "ordered", "weights"), name = "Gradient Boosting with Componentwise Linear Models", short.name = "glmboost", note = "`family` has been set to `CoxPH()` by default.", @@ -42,12 +42,12 @@ trainLearner.surv.glmboost = function(.learner, .task, .subset, .weights = NULL, if (use.formula) { f = getTaskFormula(.task) model = if (is.null(.weights)) { - mboost::glmboost(f, data = getTaskData(.task, subset = .subset, recode.target = "rcens"), control = ctrl, family = family, ...) + mboost::glmboost(f, data = getTaskData(.task, subset = .subset, recode.target = "surv"), control = ctrl, family = family, ...) } else { - mboost::glmboost(f, data = getTaskData(.task, subset = .subset, recode.target = "rcens"), control = ctrl, weights = .weights, family = family, ...) + mboost::glmboost(f, data = getTaskData(.task, subset = .subset, recode.target = "surv"), control = ctrl, weights = .weights, family = family, ...) } } else { - data = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "rcens") + data = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "surv") info = getFixDataInfo(data$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) data$data = as.matrix(fixDataForLearner(data$data, info)) model = if (is.null(.weights)) { diff --git a/R/RLearner_surv_glmnet.R b/R/RLearner_surv_glmnet.R index 0fbaa39b50..34b0b8700b 100644 --- a/R/RLearner_surv_glmnet.R +++ b/R/RLearner_surv_glmnet.R @@ -30,7 +30,7 @@ makeRLearner.surv.glmnet = function() { makeNumericLearnerParam(id = "prec", default = 1e-10), makeIntegerLearnerParam(id = "mxit", default = 100, lower = 1) ), - properties = c("numerics", "factors", "ordered", "weights", "rcens"), + properties = c("numerics", "factors", "ordered", "weights"), par.vals = list(s = 0.01), name = "GLM with Regularization", short.name = "glmnet", @@ -47,7 +47,7 @@ makeRLearner.surv.glmnet = function() { #' @export trainLearner.surv.glmnet = function(.learner, .task, .subset, .weights = NULL, ...) { - d = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "rcens") + d = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "surv") info = getFixDataInfo(d$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) args = c(list(x = as.matrix(fixDataForLearner(d$data, info)), y = d$target, family = "cox"), list(...)) rm(d) diff --git a/R/RLearner_surv_randomForestSRC.R b/R/RLearner_surv_randomForestSRC.R index fb3ef961d0..0e6eb885c2 100644 --- a/R/RLearner_surv_randomForestSRC.R +++ b/R/RLearner_surv_randomForestSRC.R @@ -42,7 +42,7 @@ makeRLearner.surv.randomForestSRC = function() { makeLogicalLearnerParam(id = "tree.err", default = FALSE, tunable = FALSE) ), par.vals = list(na.action = "na.impute"), - properties = c("missings", "numerics", "factors", "ordered", "rcens", "weights", "oobpreds", "featimp"), + properties = c("missings", "numerics", "factors", "ordered", "weights", "oobpreds", "featimp"), name = "Random Forest", short.name = "rfsrc", note = '`na.action` has been set to `"na.impute"` by default to allow missing data support.', diff --git a/R/RLearner_surv_ranger.R b/R/RLearner_surv_ranger.R index 00f8dd3f3f..609ff48431 100644 --- a/R/RLearner_surv_ranger.R +++ b/R/RLearner_surv_ranger.R @@ -27,7 +27,7 @@ makeRLearner.surv.ranger = function() { makeLogicalLearnerParam(id = "keep.inbag", default = FALSE, tunable = FALSE) ), par.vals = list(num.threads = 1L, verbose = FALSE, respect.unordered.factors = TRUE), - properties = c("numerics", "factors", "ordered", "rcens", "featimp"), + properties = c("numerics", "factors", "ordered", "featimp"), name = "Random Forests", short.name = "ranger", note = "By default, internal parallelization is switched off (`num.threads = 1`), `verbose` output is disabled, `respect.unordered.factors` is set to `TRUE`. All settings are changeable.", diff --git a/R/RLearner_surv_rpart.R b/R/RLearner_surv_rpart.R index 03a36d4235..cf56f5c4a1 100644 --- a/R/RLearner_surv_rpart.R +++ b/R/RLearner_surv_rpart.R @@ -17,8 +17,7 @@ makeRLearner.surv.rpart = function() { makeUntypedLearnerParam(id = "parms") ), par.vals = list(xval = 0L), - properties = c("rcens", "missings", "numerics", "factors", "ordered", - "weights", "featimp"), + properties = c("missings", "numerics", "factors", "ordered", "weights", "featimp"), name = "Survival Tree", short.name = "rpart", note = "`xval` has been set to `0` by default for speed.", diff --git a/R/SurvTask.R b/R/SurvTask.R index cd1f2393cc..403d21df2e 100644 --- a/R/SurvTask.R +++ b/R/SurvTask.R @@ -1,74 +1,49 @@ #' @rdname Task -#' @param censoring [\code{character(1)}]\cr -#' Censoring type. Allowed choices are \dQuote{rcens} for right censored data (default), -#' \dQuote{lcens} for left censored and \dQuote{icens} for interval censored data using -#' the \dQuote{interval2} format. -#' See \code{\link[survival]{Surv}} for details. #' @export -makeSurvTask = function(id = deparse(substitute(data)), data, target, censoring = "rcens", weights = NULL, blocking = NULL, fixup.data = "warn", check.data = TRUE) { +makeSurvTask = function(id = deparse(substitute(data)), data, target, weights = NULL, blocking = NULL, fixup.data = "warn", check.data = TRUE) { assertString(id) assertDataFrame(data) assertCharacter(target, any.missing = FALSE, len = 2L) - assertChoice(censoring, choices = c("rcens", "lcens", "icens")) assertChoice(fixup.data, choices = c("no", "quiet", "warn")) assertFlag(check.data) if (fixup.data != "no") { - if (censoring %in% c("lcens", "rcens")) { - time = data[[target[1L]]] - event = data[[target[2L]]] - - if (is.integer(time)) { - data[[target[1L]]] = as.double(time) - } - - if (is.numeric(event)) { - if (testIntegerish(event) && all(as.integer(event) %in% c(0L, 1L))) - data[[target[2L]]] = (as.integer(event) == 1L) - } else if (is.factor(event)) { - lvls = levels(event) - if (length(lvls) == 2L) { - if (all(lvls %in% c("TRUE", "FALSE"))) { - data[[target[2L]]] = (event == "TRUE") - } else if (all(lvls %in% c("0", "1"))) { - data[[target[2L]]] = (as.character(event) == "1") - } + time = data[[target[1L]]] + event = data[[target[2L]]] + + if (is.integer(time)) + data[[target[1L]]] = as.double(time) + + if (is.numeric(event)) { + if (testIntegerish(event) && all(as.integer(event) %in% c(0L, 1L))) + data[[target[2L]]] = (as.integer(event) == 1L) + } else if (is.factor(event)) { + lvls = levels(event) + if (length(lvls) == 2L) { + if (all(lvls %in% c("TRUE", "FALSE"))) { + data[[target[2L]]] = (event == "TRUE") + } else if (all(lvls %in% c("0", "1"))) { + data[[target[2L]]] = (as.character(event) == "1") } } - } else { # icens - time1 = data[[target[1L]]] - time2 = data[[target[2L]]] - - if (is.integer(time1)) - data[[target[1L]]] = as.numeric(time1) - if (is.integer(time2)) - data[[target[2L]]] = as.numeric(time2) } } task = makeSupervisedTask("regr", data, target, weights, blocking, fixup.data = fixup.data, check.data = check.data) if (check.data) { - if (censoring %in% c("lcens", "rcens")) { - time = data[[target[1L]]] - event = data[[target[2L]]] - assertNumeric(time, lower = 0, finite = TRUE, any.missing = FALSE, .var.name = "target column time") - assertLogical(event, any.missing = FALSE, .var.name = "target column event") - } else { # icens - time1 = data[[target[1L]]] - time2 = data[[target[2L]]] - assertNumeric(time1, any.missing = TRUE, finite = FALSE, .var.name = "target column time1") - assertNumeric(time2, any.missing = TRUE, finite = FALSE, .var.name = "target column time2") - } + time = data[[target[1L]]] + event = data[[target[2L]]] + assertNumeric(time, lower = 0, finite = TRUE, any.missing = FALSE, .var.name = "target column time") + assertLogical(event, any.missing = FALSE, .var.name = "target column event") } - task$task.desc = makeSurvTaskDesc(id, data, target, weights, blocking, censoring) + task$task.desc = makeSurvTaskDesc(id, data, target, weights, blocking) addClasses(task, "SurvTask") } -makeSurvTaskDesc = function(id, data, target, weights, blocking, censoring) { +makeSurvTaskDesc = function(id, data, target, weights, blocking) { td = makeTaskDescInternal("surv", id, data, target, weights, blocking) - td$censoring = censoring addClasses(td, c("SurvTaskDesc", "SupervisedTaskDesc")) } diff --git a/R/TaskDesc.R b/R/TaskDesc.R index 0c5211fcdd..940908a9d5 100644 --- a/R/TaskDesc.R +++ b/R/TaskDesc.R @@ -29,10 +29,6 @@ #' Only present for \dQuote{classif}, NA for multiclass.} #' \item{negative [\code{character(1)}]}{Negative class label for binary classification. #' Only present for \dQuote{classif}, NA for multiclass.} -#' \item{censoring [\code{character(1)}]}{Censoring type for survival analysis. -#' Only present for \dQuote{surv}, one of \dQuote{rcens} for right censored data, -#' \dQuote{lcens} for left censored data, and \dQuote{icens} for interval censored -#' data.} #' } #' @name TaskDesc #' @rdname TaskDesc diff --git a/R/Task_operators.R b/R/Task_operators.R index 53147a3f24..860f1ea50e 100644 --- a/R/Task_operators.R +++ b/R/Task_operators.R @@ -180,8 +180,7 @@ getTaskFormula = function(x, target = getTaskTargetNames(x), explicit.features = td = getTaskDesc(x) type = td$type if (type == "surv") { - lookup = setNames(c("left", "right", "interval2"), c("lcens", "rcens", "icens")) - target = sprintf("Surv(%s, %s, type = \"%s\")", target[1L], target[2L], lookup[td$censoring]) + target = sprintf("Surv(%s, %s, type = \"right\")", target[1L], target[2L]) } else if (type == "multilabel") { target = collapse(target, "+") } else if (type == "costsens") { @@ -334,49 +333,13 @@ recodeY = function(y, type, td) { return(as.numeric(y == td$positive)) if (type == "-1+1") return(as.numeric(2L * (y == td$positive) - 1L)) - if (type %in% c("lcens", "rcens", "icens")) - return(recodeSurvivalTimes(y, from = td$censoring, to = type)) + if (type == "surv") + return(Surv(y[, 1L], y[, 2L], type = "right")) if (type == "multilabel.factor") return(lapply(y, function(x) factor(x, levels = c("TRUE", "FALSE")))) stopf("Unknown value for 'type': %s", type) } -recodeSurvivalTimes = function(y, from, to) { - is.neg.infinite = function(x) is.infinite(x) & x < 0 - is.pos.infinite = function(x) is.infinite(x) & x > 0 - lookup = setNames(c("left", "right", "interval2"), c("lcens", "rcens", "icens")) - - if (from == to) - return(Surv(y[, 1L], y[, 2L], type = lookup[to])) - if (setequal(c(from, to), c("lcens", "rcens"))) - stop("Converting left censored to right censored data (or vice versa) is not possible") - - switch(from, - rcens = { - time1 = y[, 1L] - time2 = ifelse(y[, 2L], y[, 1L], Inf) - }, - lcens = { - time1 = ifelse(y[, 2L], y[, 1L], -Inf) - time2 = y[, 1L] - }, - icens = { - if (to == "lcens") { - if (!all(is.neg.infinite(y[, 1L] | y[, 1L] == y[, 2L]))) - stop("Could not convert interval2 survival data to left censored data") - time1 = y[, 2L] - time2 = is.infinite(y[, 1L]) - } else { - if (!all(is.pos.infinite(y[, 2L] | y[, 2L] == y[, 1L]))) - stop("Could not convert interval2 survival data to right censored data") - time1 = y[, 1L] - time2 = is.infinite(y[, 2L]) - } - } - ) - Surv(time1, time2, type = lookup[to]) -} - #' @title Extract costs in task. #' #' @description @@ -449,7 +412,7 @@ changeData = function(task, data, costs, weights) { "classif" = makeClassifTaskDesc(td$id, data, td$target, task$weights, task$blocking, td$positive), "regr" = makeRegrTaskDesc(td$id, data, td$target, task$weights, task$blocking), "cluster" = makeClusterTaskDesc(td$id, data, task$weights, task$blocking), - "surv" = makeSurvTaskDesc(td$id, data, td$target, task$weights, task$blocking, td$censoring), + "surv" = makeSurvTaskDesc(td$id, data, td$target, task$weights, task$blocking), "costsens" = makeCostSensTaskDesc(td$id, data, td$target, task$blocking, costs), "multilabel" = makeMultilabelTaskDesc(td$id, data, td$target, td$weights, task$blocking) ) diff --git a/R/checkLearnerBeforeTrain.R b/R/checkLearnerBeforeTrain.R index 65c36f0aec..72101c8a3b 100644 --- a/R/checkLearnerBeforeTrain.R +++ b/R/checkLearnerBeforeTrain.R @@ -50,11 +50,6 @@ checkLearnerBeforeTrain = function(task, learner, weights) { if (!hasLearnerProperties(learner, "multiclass")) stopf("Task '%s' is a multiclass-problem, but learner '%s' does not support that!", td$id, learner$id) } - } else if (td$type == "surv") { - if (!hasLearnerProperties(learner, td$censoring)) - stopf("Task '%s' is %s censored, but learner '%s' does not support that!", td$id, td$censoring, learner$id) } invisible(NULL) } - - diff --git a/R/measures.R b/R/measures.R index 234646cf10..e76e8739ca 100644 --- a/R/measures.R +++ b/R/measures.R @@ -1384,7 +1384,7 @@ cindex.uno = makeMeasure(id = "cindex.uno", minimize = FALSE, best = 1, worst = y = getPredictionResponse(pred) if (anyMissing(y)) return(NA_real_) - surv.train = getTaskTargets(task, recode.target = "rcens")[model$subset] + surv.train = getTaskTargets(task, recode.target = "surv")[model$subset] max.time = assertNumber(extra.args$max.time, null.ok = TRUE) %??% max(getTaskTargets(task)[, 1L]) survAUC::UnoC(Surv.rsp = surv.train, Surv.rsp.new = getPredictionTruth(pred), time = max.time, lpnew = y) }, @@ -1406,7 +1406,7 @@ iauc.uno = makeMeasure(id = "iauc.uno", minimize = FALSE, best = 1, worst = 0, requirePackages("_survAUC") max.time = assertNumber(extra.args$max.time, null.ok = TRUE) %??% max(getTaskTargets(task)[, 1L]) times = seq(from = 0, to = max.time, length.out = extra.args$resolution) - surv.train = getTaskTargets(task, recode.target = "rcens")[model$subset] + surv.train = getTaskTargets(task, recode.target = "surv")[model$subset] y = getPredictionResponse(pred) if (anyMissing(y)) return(NA_real_) diff --git a/R/zzz.R b/R/zzz.R index 9e5f2825cc..8ccd6979b5 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -28,7 +28,7 @@ mlr$learner.properties = list( multilabel = c("numerics", "factors", "ordered", "missings", "weights", "prob", "oneclass", "twoclass", "multiclass"), regr = c("numerics", "factors", "ordered", "missings", "weights", "se", "featimp", "oobpreds"), cluster = c("numerics", "factors", "ordered", "missings", "weights", "prob"), - surv = c("numerics", "factors", "ordered", "missings", "weights", "prob", "lcens", "rcens", "icens", "featimp", "oobpreds"), + surv = c("numerics", "factors", "ordered", "missings", "weights", "prob", "featimp", "oobpreds"), costsens = c("numerics", "factors", "ordered", "missings", "weights", "prob", "twoclass", "multiclass") ) mlr$learner.properties$any = unique(unlist(mlr$learner.properties)) diff --git a/man/Task.Rd b/man/Task.Rd index 2945244d40..085a09ca68 100644 --- a/man/Task.Rd +++ b/man/Task.Rd @@ -35,9 +35,8 @@ makeMultilabelTask(id = deparse(substitute(data)), data, target, makeRegrTask(id = deparse(substitute(data)), data, target, weights = NULL, blocking = NULL, fixup.data = "warn", check.data = TRUE) -makeSurvTask(id = deparse(substitute(data)), data, target, - censoring = "rcens", weights = NULL, blocking = NULL, - fixup.data = "warn", check.data = TRUE) +makeSurvTask(id = deparse(substitute(data)), data, target, weights = NULL, + blocking = NULL, fixup.data = "warn", check.data = TRUE) } \arguments{ \item{id}{[\code{character(1)}]\cr @@ -92,12 +91,6 @@ The columns correspond to classes and their names are the class labels (if unnamed we use y1 to yk as labels). Each entry (i,j) of the matrix specifies the cost of predicting class j for observation i.} - -\item{censoring}{[\code{character(1)}]\cr -Censoring type. Allowed choices are \dQuote{rcens} for right censored data (default), -\dQuote{lcens} for left censored and \dQuote{icens} for interval censored data using -the \dQuote{interval2} format. -See \code{\link[survival]{Surv}} for details.} } \value{ [\code{\link{Task}}]. diff --git a/man/TaskDesc.Rd b/man/TaskDesc.Rd index be8f926575..7c8b29c83d 100644 --- a/man/TaskDesc.Rd +++ b/man/TaskDesc.Rd @@ -34,9 +34,5 @@ Object members: Only present for \dQuote{classif}, NA for multiclass.} \item{negative [\code{character(1)}]}{Negative class label for binary classification. Only present for \dQuote{classif}, NA for multiclass.} -\item{censoring [\code{character(1)}]}{Censoring type for survival analysis. - Only present for \dQuote{surv}, one of \dQuote{rcens} for right censored data, - \dQuote{lcens} for left censored data, and \dQuote{icens} for interval censored - data.} } } diff --git a/man/performance.Rd b/man/performance.Rd index 8425549d42..4cb6368d45 100644 --- a/man/performance.Rd +++ b/man/performance.Rd @@ -15,10 +15,10 @@ Performance measure(s) to evaluate. Default is the default measure for the task, see here \code{\link{getDefaultMeasure}}.} \item{task}{[\code{\link{Task}}]\cr -Learning task, might be requested by performance measure, usually not needed except for clustering.} +Learning task, might be requested by performance measure, usually not needed except for clustering or survival.} \item{model}{[\code{\link{WrappedModel}}]\cr -Model built on training data, might be requested by performance measure, usually not needed.} +Model built on training data, might be requested by performance measure, usually not needed except for survival.} \item{feats}{[\code{data.frame}]\cr Features of predicted data, usually not needed except for clustering. diff --git a/tests/testthat/test_base_Learner_properties.R b/tests/testthat/test_base_Learner_properties.R index 3bb69bfe9b..07e6ad74bc 100644 --- a/tests/testthat/test_base_Learner_properties.R +++ b/tests/testthat/test_base_Learner_properties.R @@ -19,8 +19,7 @@ test_that("listLearnerProperties", { classif = c("numerics", "factors", "ordered", "missings", "weights", "prob", "oneclass", "twoclass", "multiclass", "class.weights", "featimp", "oobpreds") expect_equal(listLearnerProperties("classif"), classif) - surv = c("numerics", "factors", "ordered", "missings", "weights", "prob", "lcens", "rcens", - "icens", "featimp", "oobpreds") + surv = c("numerics", "factors", "ordered", "missings", "weights", "prob", "featimp", "oobpreds") expect_equal(listLearnerProperties("surv"), surv) cluster = c("numerics", "factors", "ordered", "missings", "weights", "prob") expect_equal(listLearnerProperties("cluster"), cluster) diff --git a/tests/testthat/test_base_getTaskData.R b/tests/testthat/test_base_getTaskData.R index 4966294fef..561bfad565 100644 --- a/tests/testthat/test_base_getTaskData.R +++ b/tests/testthat/test_base_getTaskData.R @@ -61,7 +61,7 @@ test_that("getTaskData survival", { expect_equal(names(x$target), surv.target) expect_true(setequal(names(x$data), setdiff(names(surv.df), surv.target))) - x = getTaskData(surv.task, target.extra = TRUE, recode.target = "rcens") + x = getTaskData(surv.task, target.extra = TRUE, recode.target = "surv") expect_true(survival::is.Surv(x$target)) expect_equal(dim(x$target), c(nrow(surv.df), 2L)) }) diff --git a/tests/testthat/test_surv_cforest.R b/tests/testthat/test_surv_cforest.R index 59e5fa47f5..444a9d208b 100644 --- a/tests/testthat/test_surv_cforest.R +++ b/tests/testthat/test_surv_cforest.R @@ -22,11 +22,10 @@ test_that("surv_cforest", { pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) m = do.call(party::cforest, pars) - old.predicts.list[[i]] = predict(m, newdata = surv.test) + old.predicts.list[[i]] = -1 * predict(m, newdata = surv.test) } - testSimpleParsets("surv.cforest", surv.df, surv.target, surv.train.inds, - old.predicts.list, parset.list2) + testSimpleParsets("surv.cforest", surv.df, surv.target, surv.train.inds, old.predicts.list, parset.list2) # issue 556 parset.list3 = list( From 41df983af898bfd3bb65de55c7e2ff5e64e24eda Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 20 Jun 2017 16:12:19 +0200 Subject: [PATCH 11/15] Fix 1857 fixup.data (#1858) * Check that fixup.data is used * Use fixup.data and check.data in cluster, multilabel * Make make[Un]SupervisedTask parameters non-optional --- R/ClusterTask.R | 2 +- R/MultilabelTask.R | 2 +- R/SupervisedTask.R | 2 +- R/UnsupervisedTask.R | 2 +- tests/testthat/test_base_SupervisedTask.R | 6 ++++++ tests/testthat/test_base_UnsupervisedTask.R | 7 +++++++ 6 files changed, 17 insertions(+), 4 deletions(-) diff --git a/R/ClusterTask.R b/R/ClusterTask.R index ffe5983441..4ace9e968e 100644 --- a/R/ClusterTask.R +++ b/R/ClusterTask.R @@ -6,7 +6,7 @@ makeClusterTask = function(id = deparse(substitute(data)), data, weights = NULL, assertChoice(fixup.data, choices = c("no", "quiet", "warn")) assertFlag(check.data) - task = makeUnsupervisedTask("cluster", data, weights, blocking) + task = makeUnsupervisedTask("cluster", data, weights, blocking, fixup.data, check.data) task$task.desc = makeClusterTaskDesc(id, data, weights, blocking) addClasses(task, "ClusterTask") } diff --git a/R/MultilabelTask.R b/R/MultilabelTask.R index 25be3fd1b6..7da5907f3d 100644 --- a/R/MultilabelTask.R +++ b/R/MultilabelTask.R @@ -8,7 +8,7 @@ makeMultilabelTask = function(id = deparse(substitute(data)), data, target, weig assertChoice(fixup.data, choices = c("no", "quiet", "warn")) assertFlag(check.data) - task = makeSupervisedTask("multilabel", data, target, weights, blocking) + task = makeSupervisedTask("multilabel", data, target, weights, blocking, fixup.data, check.data) # currently we dont do any fixup here if (check.data) { for (cn in target) diff --git a/R/SupervisedTask.R b/R/SupervisedTask.R index 19a71fad43..a1a0a73ca8 100644 --- a/R/SupervisedTask.R +++ b/R/SupervisedTask.R @@ -1,4 +1,4 @@ -makeSupervisedTask = function(type, data, target, weights = NULL, blocking = NULL, fixup.data = "warn", check.data = TRUE) { +makeSupervisedTask = function(type, data, target, weights, blocking, fixup.data, check.data) { task = makeTask(type = type, data = data, weights = weights, blocking = blocking, fixup.data = fixup.data, check.data = check.data) if (check.data) { diff --git a/R/UnsupervisedTask.R b/R/UnsupervisedTask.R index 47a9f9db9e..88b4e01a1a 100644 --- a/R/UnsupervisedTask.R +++ b/R/UnsupervisedTask.R @@ -1,4 +1,4 @@ -makeUnsupervisedTask = function(type, data, weights = NULL, blocking = NULL, fixup.data = "warn", check.data = TRUE) { +makeUnsupervisedTask = function(type, data, weights, blocking, fixup.data, check.data) { task = makeTask(type, data, weights, blocking, fixup.data = fixup.data, check.data = check.data) # we can't use getTaskData to access the tasks's data here because we then # want to access the description object which is not existing yet diff --git a/tests/testthat/test_base_SupervisedTask.R b/tests/testthat/test_base_SupervisedTask.R index 7110e646bf..7c4800ef2a 100644 --- a/tests/testthat/test_base_SupervisedTask.R +++ b/tests/testthat/test_base_SupervisedTask.R @@ -56,6 +56,12 @@ test_that("SupervisedTask dropping of levels works", { "Empty factor levels") e = getTaskData(task) expect_true(setequal(levels(e$Species), levs1)) + + expect_warning(makeMultilabelTask("multilabel", multilabel.df[1:10, ], target = c("y1", "y2"), fixup.data = "warn"), + "Empty factor levels") + + expect_warning(makeMultilabelTask("multilabel", multilabel.df[1:10, ], target = c("y1", "y2"), fixup.data = "quiet"), NA) + }) test_that("SupervisedTask does not drop positive class", { diff --git a/tests/testthat/test_base_UnsupervisedTask.R b/tests/testthat/test_base_UnsupervisedTask.R index c49921505d..b19492c4f5 100644 --- a/tests/testthat/test_base_UnsupervisedTask.R +++ b/tests/testthat/test_base_UnsupervisedTask.R @@ -25,4 +25,11 @@ test_that("UnsupervisedTask", { expect_true(getTaskDesc(ct1)$has.blocking) ct2 = subsetTask(ct1) expect_true(getTaskDesc(ct2)$has.blocking) + + # check 'fixup data' works + + expect_warning(makeClusterTask("cluster", iris[1:10, ], fixup.data = "warn"), "Empty factor levels") + + expect_warning(makeClusterTask("cluster", iris[1:10, ], fixup.data = "quiet"), NA) + }) From 4b407640a9d98e6a2c822c764f31078d7e6203c1 Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Thu, 22 Jun 2017 13:00:21 -0500 Subject: [PATCH 12/15] xgboost: expose watchlist and callbacks (#1859) * xgboost: expose watchlist and callbacks; remove silent from params; set default lambda=1; add tweedie_variance_power param * disable TODO linter --- R/RLearner_classif_xgboost.R | 29 ++++++++++++++++++++--------- R/RLearner_regr_xgboost.R | 30 +++++++++++++++++++++--------- tests/testthat/helper_lint.R | 2 +- tests/testthat/test_regr_xgboost.R | 6 +++--- 4 files changed, 45 insertions(+), 22 deletions(-) diff --git a/R/RLearner_classif_xgboost.R b/R/RLearner_classif_xgboost.R index cae749f1ac..47230116c2 100644 --- a/R/RLearner_classif_xgboost.R +++ b/R/RLearner_classif_xgboost.R @@ -7,7 +7,7 @@ makeRLearner.classif.xgboost = function() { # we pass all of what goes in 'params' directly to ... of xgboost # makeUntypedLearnerParam(id = "params", default = list()), makeDiscreteLearnerParam(id = "booster", default = "gbtree", values = c("gbtree", "gblinear", "dart")), - makeIntegerLearnerParam(id = "silent", default = 0L, tunable = FALSE), + makeUntypedLearnerParam(id = "watchlist", default = NULL, tunable = FALSE), makeNumericLearnerParam(id = "eta", default = 0.3, lower = 0, upper = 1), makeNumericLearnerParam(id = "gamma", default = 0, lower = 0), makeIntegerLearnerParam(id = "max_depth", default = 6L, lower = 1L), @@ -16,7 +16,7 @@ makeRLearner.classif.xgboost = function() { makeNumericLearnerParam(id = "colsample_bytree", default = 1, lower = 0, upper = 1), makeNumericLearnerParam(id = "colsample_bylevel", default = 1, lower = 0, upper = 1), makeIntegerLearnerParam(id = "num_parallel_tree", default = 1L, lower = 1L), - makeNumericLearnerParam(id = "lambda", default = 0, lower = 0), + makeNumericLearnerParam(id = "lambda", default = 1, lower = 0), makeNumericLearnerParam(id = "lambda_bias", default = 0, lower = 0), makeNumericLearnerParam(id = "alpha", default = 0, lower = 0), makeUntypedLearnerParam(id = "objective", default = "binary:logistic", tunable = FALSE), @@ -26,6 +26,7 @@ makeRLearner.classif.xgboost = function() { makeNumericLearnerParam(id = "missing", default = NULL, tunable = FALSE, when = "both", special.vals = list(NA, NA_real_, NULL)), makeIntegerVectorLearnerParam(id = "monotone_constraints", default = 0, lower = -1, upper = 1), + makeNumericLearnerParam(id = "tweedie_variance_power", lower = 1, upper = 2, default = 1.5, requires = quote(objective == "reg:tweedie")), makeIntegerLearnerParam(id = "nthread", lower = 1L, tunable = FALSE), makeIntegerLearnerParam(id = "nrounds", default = 1L, lower = 1L), # FIXME nrounds seems to have no default in xgboost(), if it has 1, par.vals is redundant @@ -38,7 +39,14 @@ makeRLearner.classif.xgboost = function() { makeDiscreteLearnerParam(id = "sample_type", default = "uniform", values = c("uniform", "weighted"), requires = quote(booster == "dart")), makeDiscreteLearnerParam(id = "normalize_type", default = "tree", values = c("tree", "forest"), requires = quote(booster == "dart")), makeNumericLearnerParam(id = "rate_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")), - makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")) + makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")), + # TODO: uncomment the following after the next CRAN update, and set max_depth's lower = 0L + #makeLogicalLearnerParam(id = "one_drop", default = FALSE, requires = quote(booster == "dart")), + #makeDiscreteLearnerParam(id = "tree_method", default = "exact", values = c("exact", "hist"), requires = quote(booster != "gblinear")), + #makeDiscreteLearnerParam(id = "grow_policy", default = "depthwise", values = c("depthwise", "lossguide"), requires = quote(tree_method == "hist")), + #makeIntegerLearnerParam(id = "max_leaves", default = 0L, lower = 0L, requires = quote(grow_policy == "lossguide")), + #makeIntegerLearnerParam(id = "max_bin", default = 256L, lower = 2L, requires = quote(tree_method == "hist")), + makeUntypedLearnerParam(id = "callbacks", default = list(), tunable = FALSE) ), par.vals = list(nrounds = 1L, verbose = 0L), properties = c("twoclass", "multiclass", "numerics", "prob", "weights", "missings", "featimp"), @@ -54,8 +62,6 @@ trainLearner.classif.xgboost = function(.learner, .task, .subset, .weights = NUL td = getTaskDesc(.task) parlist = list(...) - parlist$data = data.matrix(getTaskData(.task, .subset, target.extra = TRUE)$data) - parlist$label = match(as.character(getTaskData(.task, .subset, target.extra = TRUE)$target), td$class.levels) - 1 nc = length(td$class.levels) if (is.null(parlist$objective)) @@ -68,10 +74,17 @@ trainLearner.classif.xgboost = function(.learner, .task, .subset, .weights = NUL if (parlist$objective %in% c("multi:softprob", "multi:softmax")) parlist$num_class = nc + task.data = getTaskData(.task, .subset, target.extra = TRUE) + label = match(as.character(task.data$target), td$class.levels) - 1 + parlist$data = xgboost::xgb.DMatrix(data = data.matrix(task.data$data), label = label) + if (!is.null(.weights)) - parlist$data = xgboost::xgb.DMatrix(data = parlist$data, label = parlist$label, weight = .weights) + xgboost::setinfo(parlist$data, "weight", .weights) + + if (is.null(parlist$watchlist)) + parlist$watchlist = list(train = parlist$data) - do.call(xgboost::xgboost, parlist) + do.call(xgboost::xgb.train, parlist) } #' @export @@ -131,5 +144,3 @@ getFeatureImportanceLearner.classif.xgboost = function(.learner, .model, ...) { fiv = imp$Gain setNames(fiv, imp$Feature) } - - diff --git a/R/RLearner_regr_xgboost.R b/R/RLearner_regr_xgboost.R index cb7ea7f726..2c843840fc 100644 --- a/R/RLearner_regr_xgboost.R +++ b/R/RLearner_regr_xgboost.R @@ -7,7 +7,7 @@ makeRLearner.regr.xgboost = function() { # we pass all of what goes in 'params' directly to ... of xgboost #makeUntypedLearnerParam(id = "params", default = list()), makeDiscreteLearnerParam(id = "booster", default = "gbtree", values = c("gbtree", "gblinear", "dart")), - makeIntegerLearnerParam(id = "silent", default = 0L, tunable = FALSE), + makeUntypedLearnerParam(id = "watchlist", default = NULL, tunable = FALSE), makeNumericLearnerParam(id = "eta", default = 0.3, lower = 0, upper = 1), makeNumericLearnerParam(id = "gamma", default = 0, lower = 0), makeIntegerLearnerParam(id = "max_depth", default = 6L, lower = 1L), @@ -16,16 +16,17 @@ makeRLearner.regr.xgboost = function() { makeNumericLearnerParam(id = "colsample_bytree", default = 1, lower = 0, upper = 1), makeNumericLearnerParam(id = "colsample_bylevel", default = 1, lower = 0, upper = 1), makeIntegerLearnerParam(id = "num_parallel_tree", default = 1L, lower = 1L), - makeNumericLearnerParam(id = "lambda", default = 0, lower = 0), + makeNumericLearnerParam(id = "lambda", default = 1, lower = 0), makeNumericLearnerParam(id = "lambda_bias", default = 0, lower = 0), makeNumericLearnerParam(id = "alpha", default = 0, lower = 0), makeUntypedLearnerParam(id = "objective", default = "reg:linear", tunable = FALSE), makeUntypedLearnerParam(id = "eval_metric", default = "rmse", tunable = FALSE), makeNumericLearnerParam(id = "base_score", default = 0.5, tunable = FALSE), - + makeNumericLearnerParam(id = "max_delta_step", lower = 0, default = 0), makeNumericLearnerParam(id = "missing", default = NULL, tunable = FALSE, when = "both", special.vals = list(NA, NA_real_, NULL)), makeIntegerVectorLearnerParam(id = "monotone_constraints", default = 0, lower = -1, upper = 1), + makeNumericLearnerParam(id = "tweedie_variance_power", lower = 1, upper = 2, default = 1.5, requires = quote(objective == "reg:tweedie")), makeIntegerLearnerParam(id = "nthread", lower = 1L, tunable = FALSE), makeIntegerLearnerParam(id = "nrounds", default = 1L, lower = 1L), # FIXME nrounds seems to have no default in xgboost(), if it has 1, par.vals is redundant @@ -35,9 +36,17 @@ makeRLearner.regr.xgboost = function() { requires = quote(verbose == 1L)), makeIntegerLearnerParam(id = "early_stopping_rounds", default = NULL, lower = 1L, special.vals = list(NULL), tunable = FALSE), makeLogicalLearnerParam(id = "maximize", default = NULL, special.vals = list(NULL), tunable = FALSE), + makeDiscreteLearnerParam(id = "sample_type", default = "uniform", values = c("uniform", "weighted"), requires = quote(booster == "dart")), makeDiscreteLearnerParam(id = "normalize_type", default = "tree", values = c("tree", "forest"), requires = quote(booster == "dart")), makeNumericLearnerParam(id = "rate_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")), - makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")) + makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")), + # TODO: uncomment the following after the next CRAN update, and set max_depth's lower = 0L + #makeLogicalLearnerParam(id = "one_drop", default = FALSE, requires = quote(booster == "dart")), + #makeDiscreteLearnerParam(id = "tree_method", default = "exact", values = c("exact", "hist"), requires = quote(booster != "gblinear")), + #makeDiscreteLearnerParam(id = "grow_policy", default = "depthwise", values = c("depthwise", "lossguide"), requires = quote(tree_method == "hist")), + #makeIntegerLearnerParam(id = "max_leaves", default = 0L, lower = 0L, requires = quote(grow_policy == "lossguide")), + #makeIntegerLearnerParam(id = "max_bin", default = 256L, lower = 2L, requires = quote(tree_method == "hist")), + makeUntypedLearnerParam(id = "callbacks", default = list(), tunable = FALSE) ), par.vals = list(nrounds = 1L, verbose = 0L), properties = c("numerics", "weights", "featimp", "missings"), @@ -52,16 +61,19 @@ makeRLearner.regr.xgboost = function() { trainLearner.regr.xgboost = function(.learner, .task, .subset, .weights = NULL, ...) { parlist = list(...) - parlist$label = getTaskData(.task, .subset, target.extra = TRUE)$target - parlist$data = data.matrix(getTaskData(.task, .subset, target.extra = TRUE)$data) - if (is.null(parlist$objective)) parlist$objective = "reg:linear" + task.data = getTaskData(.task, .subset, target.extra = TRUE) + parlist$data = xgboost::xgb.DMatrix(data = data.matrix(task.data$data), label = task.data$target) + if (!is.null(.weights)) - parlist$data = xgboost::xgb.DMatrix(data = parlist$data, label = parlist$label, weight = .weights) + xgboost::setinfo(parlist$data, "weight", .weights) + + if (is.null(parlist$watchlist)) + parlist$watchlist = list(train = parlist$data) - do.call(xgboost::xgboost, parlist) + do.call(xgboost::xgb.train, parlist) } #' @export diff --git a/tests/testthat/helper_lint.R b/tests/testthat/helper_lint.R index 9a1824d444..46e0a8847a 100644 --- a/tests/testthat/helper_lint.R +++ b/tests/testthat/helper_lint.R @@ -266,7 +266,7 @@ if (isLintrVersionOk() && require("lintr", quietly = TRUE) && require("rex", qui seq = lintr::seq_linter, unneeded.concatenation = lintr::unneeded_concatenation_linter, trailing.whitespace = lintr::trailing_whitespace_linter, - todo.comment = lintr::todo_comment_linter(todo = "todo"), # is case-insensitive + #todo.comment = lintr::todo_comment_linter(todo = "todo"), # is case-insensitive spaces.inside = lintr::spaces_inside_linter, infix.spaces = infix.spaces.linter, object.naming = object.naming.linter) diff --git a/tests/testthat/test_regr_xgboost.R b/tests/testthat/test_regr_xgboost.R index 81bb36171e..aa5bf715a4 100644 --- a/tests/testthat/test_regr_xgboost.R +++ b/tests/testthat/test_regr_xgboost.R @@ -31,9 +31,9 @@ test_that("regr_xgboost", { }) test_that("xgboost works with different 'missing' arg vals", { - lrn = makeLearner("classif.xgboost", missing = NA_real_) - lrn = makeLearner("classif.xgboost", missing = NA) - lrn = makeLearner("classif.xgboost", missing = NULL) + lrn = makeLearner("regr.xgboost", missing = NA_real_) + lrn = makeLearner("regr.xgboost", missing = NA) + lrn = makeLearner("regr.xgboost", missing = NULL) }) From a21c2d698461c2a69653485d5ec692a03ed2ecdf Mon Sep 17 00:00:00 2001 From: Jakob Richter Date: Mon, 26 Jun 2017 15:14:44 +0300 Subject: [PATCH 13/15] Revert "BSD LICENSE file (#1772)" (#1867) This reverts commit 17d7eac68433b5e37bc4c118d1a9056c5e4cc497. --- LICENSE | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/LICENSE b/LICENSE index c5c1603657..ac84d81f14 100644 --- a/LICENSE +++ b/LICENSE @@ -1,25 +1,2 @@ -BSD 2-Clause License - -Copyright (c) 2013-2017, Bernd Bischl -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +YEAR: 2013-2017 +COPYRIGHT HOLDER: Bernd Bischl From b162ba9e73c4b3e6976684278f68dccbeb3ca060 Mon Sep 17 00:00:00 2001 From: Troy James Palanca Date: Mon, 10 Jul 2017 06:08:55 +0700 Subject: [PATCH 14/15] Modify `makeBaseWrapper` to unlock wrapped feature selection and tuning and to prevent errors in wrapped feature selection and imputation (#1871) * Allows nested feature selection and tuning * Only restrict tune wrappers from being wrapped * Add tests for nested nested resampling positive case and negative case for wrapping tuning wrapper * Fix lintrbot issues --- R/BaseWrapper.R | 5 ++- tests/testthat/test_base_BaseWrapper.R | 47 ++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/R/BaseWrapper.R b/R/BaseWrapper.R index cd0bc5de69..9cb96d903c 100644 --- a/R/BaseWrapper.R +++ b/R/BaseWrapper.R @@ -1,8 +1,7 @@ makeBaseWrapper = function(id, type, next.learner, package = character(0L), par.set = makeParamSet(), par.vals = list(), learner.subclass, model.subclass) { - - if (inherits(next.learner, "OptWrapper")) - stop("Cannot wrap an optimization wrapper with something else!") + if (inherits(next.learner, "OptWrapper") && is.element("TuneWrapper", learner.subclass)) + stop("Cannot wrap a tuning wrapper around another optimization wrapper!") ns = intersect(names(par.set$pars), names(next.learner$par.set$pars)) if (length(ns) > 0L) stopf("Hyperparameter names in wrapper clash with base learner names: %s", collapse(ns)) diff --git a/tests/testthat/test_base_BaseWrapper.R b/tests/testthat/test_base_BaseWrapper.R index 27a460d3d9..6434236d42 100644 --- a/tests/testthat/test_base_BaseWrapper.R +++ b/tests/testthat/test_base_BaseWrapper.R @@ -22,3 +22,50 @@ test_that("BaseWrapper", { lrn2.rm = removeHyperPars(lrn2, names(getHyperPars(lrn2))) expect_equal(length(getHyperPars(lrn2.rm)), 0) }) + +test_that("Joint model performance estimation, tuning, and model performance", { + lrn = makeLearner("classif.ksvm", predict.type = "prob") + lrn2 = makeTuneWrapper( + learner = lrn, + par.set = makeParamSet( + makeDiscreteParam("C", values = 2 ^ (-2:2)), + makeDiscreteParam("sigma", values = 2 ^ (-2:2)) + ), + measures = list(auc, acc), + control = makeTuneControlRandom(maxit = 3L), + resampling = makeResampleDesc(method = "Holdout") + ) + lrn3 = makeFeatSelWrapper( + learner = lrn2, + measures = list(auc, acc), + control = makeFeatSelControlRandom(maxit = 3L), + resampling = makeResampleDesc(method = "Holdout") + ) + bmrk = benchmark(lrn3, pid.task, makeResampleDesc(method = "Holdout")) + expect_is(bmrk, "BenchmarkResult") +}) + +test_that("Error when wrapping tune wrapper around another optimization wrapper", { + expect_error({ + lrn = makeLearner("classif.ksvm", predict.type = "prob") + lrn2 = makeFeatSelWrapper( + learner = lrn, + measures = list(auc, acc), + control = makeFeatSelControlRandom(maxit = 3L), + resampling = makeResampleDesc(method = "Holdout") + ) + lrn3 = makeTuneWrapper( + learner = lrn2, + par.set = makeParamSet( + makeDiscreteParam("C", values = 2 ^ (-2:2)), + makeDiscreteParam("sigma", values = 2 ^ (-2:2)) + ), + measures = list(auc, acc), + control = makeTuneControlRandom(maxit = 3L), + resampling = makeResampleDesc(method = "Holdout") + ) + bmrk = benchmark(lrn3, pid.task) + }, "Cannot wrap a tuning wrapper around another optimization wrapper!") +}) + + From 99ad7900596ef37f394b05e487dabea78082fe28 Mon Sep 17 00:00:00 2001 From: Lars Kotthoff Date: Sun, 9 Jul 2017 16:10:38 -0700 Subject: [PATCH 15/15] NEWS for #1871 --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 0b8274bc43..32c5681413 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # mlr 2.12: +## general +* relaxed the way wrappers can be nested -- the only explicitly forbidden + combination is to wrap a tuning wrapper around another optimization wrapper + ## functions - general * generatePartialDependenceData: added parameter "range" to allow to specify the range of values for the partial dependencies