diff --git a/DESCRIPTION b/DESCRIPTION index 01aa5a7..7fa574c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,7 @@ Authors@R: Description: Extends the 'mlr3' ecosystem to time series forecasting. License: LGPL-3 Depends: - mlr3 (>= 0.22.1), + mlr3 (>= 0.22.1.9000), R (>= 3.3.0) Imports: backports, @@ -21,6 +21,8 @@ Suggests: testthat (>= 3.2.0), tsbox, withr (>= 3.0.0) +Remotes: + mlr-org/mlr3 Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true @@ -30,6 +32,8 @@ Collate: 'DataBackendTimeSeries.R' 'ForecastLearner.R' 'zzz.R' + 'LearnerARIMA.R' + 'LearnerFcst.R' 'MeasureDirectional.R' 'ResamplingForecastCV.R' 'ResamplingForecastHoldout.R' diff --git a/NAMESPACE b/NAMESPACE index ad55d82..a4378f0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ S3method(as_task_fcst,TaskFcst) S3method(as_task_fcst,data.frame) export(DataBackendTimeSeries) export(ForecastLearner) +export(LearnerFcstARIMA) export(ResamplingForecastCV) export(ResamplingForecastHoldout) export(TaskFcst) diff --git a/R/ForecastLearner.R b/R/ForecastLearner.R index 1c1b83e..e12ea52 100644 --- a/R/ForecastLearner.R +++ b/R/ForecastLearner.R @@ -30,7 +30,7 @@ ForecastLearner = R6::R6Class("ForecastLearner", super$initialize( id = learner$id, - task_type = learner$task_type, + task_type = "regr", param_set = learner$param_set, predict_types = learner$predict_types, feature_types = learner$feature_types, @@ -38,110 +38,41 @@ ForecastLearner = R6::R6Class("ForecastLearner", packages = c("mlr3forecast", learner$packages), man = learner$man ) - }, - - #' @description - #' Uses the information stored during `$train()` in `$state` to create a new [Prediction] - #' for a set of observations of the provided `task`. - #' - #' @param task ([Task]). - #' - #' @param row_ids (`integer()`)\cr - #' Vector of test indices as subset of `task$row_ids`. For a simple split - #' into training and test set, see [partition()]. - #' - #' @return [Prediction]. - predict = function(task, row_ids = NULL) { - task = assert_task(as_task(task)) - row_ids = assert_integerish(row_ids, - lower = 1L, any.missing = FALSE, coerce = TRUE, null.ok = TRUE - ) - - # 1. direct learner$predict(): entire task + row_ids or `NULL` for entire task prediction - # 2. resampling: test task and `NULL` row_ids, task$row_ids are from entire task - # 3. glrn$predict(): test task and `NULL` row_ids, task$row_ids are from train task - # 4. glrn$predict_newdata(): test task and `NULL` row_ids, task$row_ids are 1:n, i.e. not from entire task - # NB: this will need some special handling, how do I know if its called by glrn? - # check for glrn$predict_newdata() case - has_row_ids = !is.null(row_ids) - row_ids = row_ids %??% task$row_ids - row_ids = sort(row_ids) - if (!has_row_ids && - nrow(fintersect(task$data(), private$.task$data())) == 0 && - all(task$row_ids %in% private$.task$row_ids)) { - row_ids = seq_along(row_ids) + tail(private$.task$row_ids, 1L) - } - if (is.null(task$key) && !all(diff(row_ids) == 1L)) { - stopf("Row ids must be consecutive") - } - private$.predict_recursive(task, row_ids) - }, - - #' @description - #' Uses the model fitted during `$train()` to create a new [Prediction] based on the forecast horizon `n`. - #' - #' @param task ([Task]). - #' @param n (`integer(1)`). - #' @param newdata (any object supported by [as_data_backend()])\cr - #' New data to predict on. - #' All data formats convertible by [as_data_backend()] are supported, e.g. - #' `data.frame()` or [DataBackend]. - #' If a [DataBackend] is provided as `newdata`, the row ids are preserved, - #' otherwise they are set to to the sequence `1:nrow(newdata)`. - #' - #' @return [Prediction]. - predict_newdata = function(newdata, task) { - task = assert_task(as_task(task)) - assert_learnable(task, self) - private$.predict_newdata_recursive(task, newdata) } ), private = list( .task = NULL, + .max_index = NULL, .train = function(task) { + private$.max_index = max(task$data(cols = task$col_roles$order)[[1L]]) private$.task = task$clone() target = task$target_names dt = private$.lag_transform(task$data(), target) new_task = as_task_regr(dt, target = target) - learner = self$learner$clone(deep = TRUE)$train(new_task) structure(list(learner = learner), class = c("forecast_learner_model", "list")) }, .predict = function(task) { - self$predict(task) + private$.predict_recursive(task) }, - .lag_transform = function(dt, target) { - lag = self$lag - nms = sprintf("%s_lag_%s", target, lag) - dt = copy(dt) - key = private$.task$key - if (is.null(key)) { - dt[, (nms) := shift(.SD, n = lag, type = "lag"), .SDcols = target] - } else { - setorderv(dt, c(key)) - dt[, (nms) := shift(.SD, n = lag, type = "lag"), by = key, .SDcols = target] - } - dt - }, - - .predict_recursive = function(task, row_ids) { - # join the training task with the prediction task for lag transformation - # in normal predict we get the entire task, in resampling we only get the subset - # TODO: check why `Task$data_formats` warning is thrown - if (suppressWarnings(isTRUE(all.equal(private$.task, task)))) { - dt = task$data() + .predict_recursive = function(task) { + target = private$.task$target_names + if (private$.is_newdata(task)) { + row_ids = private$.task$nrow + seq_len(task$nrow) + dt = rbind(private$.task$data(), task$data(), fill = TRUE) } else { - dt = rbind(private$.task$data(), task$data()) + row_ids = task$row_ids + dt = private$.task$data() } - target = private$.task$target_names # one model for all steps preds = map(row_ids, function(i) { new_x = private$.lag_transform(dt, target)[i] pred = self$model$learner$predict_newdata(new_x) + # set is faster with DT dt[i, (target) := pred$response] pred }) @@ -150,27 +81,31 @@ ForecastLearner = R6::R6Class("ForecastLearner", preds }, - .predict_newdata_recursive = function(task, newdata) { - dt = task$data() - target = task$target_names - # create a new rows for the new prediction - dt = rbind(dt, newdata, fill = TRUE) - row_ids = task$nrow + seq_len(nrow(newdata)) - # one model for all steps - preds = map(row_ids, function(i) { - new_x = private$.lag_transform(dt, target)[i] - pred = self$model$learner$predict_newdata(new_x) - dt[i, (target) := pred$response] - pred - }) - preds = do.call(c, preds) - preds$data$row_ids = seq_len(nrow(newdata)) - preds - }, - .predict_direct = function(dt, n) { # one model for each step .NotYetImplemented() + }, + + .lag_transform = function(dt, target) { + lag = self$lag + nms = sprintf("%s_lag_%s", target, lag) + dt = copy(dt) + key = private$.task$key + if (is.null(key)) { + dt[, (nms) := shift(.SD, n = lag, type = "lag"), .SDcols = target] + } else { + dt[, (nms) := shift(.SD, n = lag, type = "lag"), by = key, .SDcols = target] + } + dt + }, + + .is_newdata = function(task) { + order_cols = task$col_roles$order + tab = task$backend$data(rows = task$row_ids, cols = order_cols) + if (nrow(tab) == 0L) { + return(TRUE) + } + !any(private$.max_index %in% tab[[1L]]) } ) ) diff --git a/R/LearnerARIMA.R b/R/LearnerARIMA.R new file mode 100644 index 0000000..f4c1bc5 --- /dev/null +++ b/R/LearnerARIMA.R @@ -0,0 +1,95 @@ +#' @title ARIMA +#' +#' @name mlr_learners_fcst.arima +#' +#' @description +#' ... +#' +#' @templateVar id fcst.arima +#' @template learner +#' +#' @references +#' ... +#' +#' @export +#' @template seealso_learner +LearnerFcstARIMA = R6Class("LearnerFcstARIMA", + inherit = LearnerRegr, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + + ps = ps( + order = p_uty(default = c(0, 0, 0), tags = "train"), + seasonal = p_uty(default = c(0, 0, 0), tags = "train"), + include.mean = p_lgl(default = TRUE, tags = "train"), + include.drift = p_lgl(default = FALSE, tags = "train"), + biasadj = p_lgl(default = FALSE, tags = "train"), + method = p_fct(c("CSS-ML", "ML", "CSS"), default = "CSS-ML", tags = "train") + ) + + super$initialize( + id = "fcst.arima", + param_set = ps, + feature_types = c("logical", "integer", "numeric"), + packages = c("mlr3learners", "forecast"), + label = "ARIMA", + man = "mlr3learners::mlr_learners_arima.arima" + ) + } + ), + + private = list( + .max_index = NULL, + + .train = function(task) { + if (length(task$col_roles$order) == 0L) { + stopf("%s learner requires an ordered task.", self$id) + } + private$.max_index = max(task$data(cols = task$col_roles$order)[[1L]]) + pv = self$param_set$get_values(tags = "train") + if ("weights" %in% task$properties) { + pv = insert_named(pv, list(weights = task$weights$weight)) + } + if (length(task$feature_names) > 0) { + xreg = as.matrix(task$data(cols = task$feature_names)) + invoke(forecast::Arima, + y = task$data(rows = task$row_ids, cols = task$target_names), + xreg = xreg, + .args = pv + ) + } else { + invoke(forecast::Arima, + y = task$data(rows = task$row_ids, cols = task$target_names), + .args = pv) + } + }, + + .predict = function(task) { + pv = self$param_set$get_values(tags = "predict") + if (private$.is_newdata(task)) { + if (length(task$feature_names) > 0) { + newdata = as.matrix(task$data(cols = task$feature_names)) + prediction = invoke(forecast::forecast, self$model, xreg = newdata) + } else { + prediction = invoke(forecast::forecast, self$model, h = length(task$row_ids)) + browser() + } + list(response = prediction$mean) + } else { + prediction = stats::fitted(self$model[task$row_ids]) + list(response = prediction) + } + }, + + .is_newdata = function(task) { + order_cols = task$col_roles$order + idx = task$backend$data(rows = task$row_ids, cols = order_cols)[[1L]] + !any(private$.max_index %in% idx) + } + ) +) + +#' @include zzz.R +register_learner("fcst.arima", LearnerFcstARIMA) diff --git a/R/LearnerFcst.R b/R/LearnerFcst.R new file mode 100644 index 0000000..53b83e6 --- /dev/null +++ b/R/LearnerFcst.R @@ -0,0 +1,23 @@ +#' @title Forecast Learner +#' +LearnerFcst = R6Class("LearnerFcst", + inherit = Learner, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function(id, param_set = ps(), predict_types = "response", feature_types = character(), properties = character(), data_formats, packages = character(), label = NA_character_, man = NA_character_) { + super$initialize( + id = id, + task_type = "fcst", + param_set = param_set, + feature_types = feature_types, + predict_types = predict_types, + properties = properties, + data_formats, + packages = packages, + label = label, + man = man + ) + } + ) +) diff --git a/R/ResamplingForecastCV.R b/R/ResamplingForecastCV.R index 0940ba0..e3af152 100644 --- a/R/ResamplingForecastCV.R +++ b/R/ResamplingForecastCV.R @@ -83,8 +83,11 @@ ResamplingForecastCV = R6Class("ResamplingForecastCV", private = list( .sample = function(ids, ...) { pars = self$param_set$get_values() + window_size = pars$window_size + horizon = pars$horizon + ids = sort(ids) - train_end = ids[ids <= (max(ids) - pars$horizon) & ids >= pars$window_size] + train_end = ids[ids <= (max(ids) - horizon) & ids >= window_size] train_end = seq.int( from = train_end[length(train_end)], by = -pars$step_size, @@ -93,14 +96,44 @@ ResamplingForecastCV = R6Class("ResamplingForecastCV", if (!pars$fixed_window) { train_ids = map(train_end, function(x) ids[1L]:x) } else { - train_ids = map(train_end, function(x) (x - pars$window_size + 1L):x) + train_ids = map(train_end, function(x) (x - window_size + 1L):x) } - test_ids = map(train_ids, function(x) (x[length(x)] + 1L):(x[length(x)] + pars$horizon)) + test_ids = map(train_ids, function(x) (x[length(x)] + 1L):(x[length(x)] + horizon)) list(train = train_ids, test = test_ids) }, .sample_new = function(ids, task, ...) { .NotYetImplemented() + + pars = self$param_set$get_values() + horizon = pars$horizon + window_size = pars$window_size + step_size = pars$step_size + folds = pars$folds + fixed_window = pars$fixed_window + + order_cols = task$col_roles$order + key_cols = task$key + has_key = length(key_cols) > 0L + + tab = task$backend$data( + rows = ids, + cols = c(task$backend$primary_key, order_cols, key_cols) + ) + + if (has_key) { + setnames(tab, c("row_id", "order", "key")) + setorderv(tab, c("key", "order")) + } else { + setnames(tab, c("row_id", "order")) + setorderv(tab, "order") + } + + if (!has_key) { + } else { + + } + }, .get_train = function(i) { diff --git a/R/TaskFcst.R b/R/TaskFcst.R index d56bead..d282ff7 100644 --- a/R/TaskFcst.R +++ b/R/TaskFcst.R @@ -48,14 +48,6 @@ TaskFcst = R6::R6Class("TaskFcst", extra_args = extra_args ) self$key = key - }, - - #' @description - #' True response for specified `row_ids`. Format depends on the task type. - #' Defaults to all rows with role "use". - #' @return `numeric()`. - truth = function(rows = NULL) { - super$truth(rows)[[1L]] } ) ) diff --git a/R/zzz.R b/R/zzz.R index 6eb2b18..1675d9f 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -33,16 +33,16 @@ register_mlr3 = function() { mlr_reflections$task_types = mlr_reflections$task_types[!"fcst"] mlr_reflections$task_types = setkeyv(rbind(mlr_reflections$task_types, rowwise_table( ~type, ~package, ~task, ~learner, ~prediction, ~prediction_data, ~measure, - "fcst", "mlr3forecast", "TaskFcst", "LearnerRegr", "PredictionFcst", "PredictionDataFcst", "MeasureRegr" # nolint + "fcst", "mlr3forecast", "TaskFcst", "LearnerFcst", "PredictionFcst", "PredictionDataFcst", "MeasureFcst" # nolint ), fill = TRUE), "type") mlr_reflections$learner_predict_types$fcst = mlr_reflections$learner_predict_types$regr - # mlr_reflections$task_col_roles$fcst = union( - # mlr_reflections$task_col_roles$regr, mlr3forecast_col_roles - # ) + mlr_reflections$learner_properties$fcst = mlr_reflections$learner_properties$regr + mlr_reflections$task_col_roles$fcst = mlr_reflections$task_col_roles$regr mlr_reflections$task_feature_types = named_union( mlr_reflections$task_feature_types, mlr3forecast_feature_types ) mlr_reflections$task_properties$fcst = c("univariate", "multivariate") + mlr_reflections$measure_properties$fcst = mlr_reflections$measure_properties$regr # add resamplings mlr_resamplings = utils::getFromNamespace("mlr_resamplings", ns = "mlr3") diff --git a/README.Rmd b/README.Rmd index b249e9d..29d9e9f 100644 --- a/README.Rmd +++ b/README.Rmd @@ -53,23 +53,22 @@ library(mlr3learners) task = tsk("airpassengers") task$select(setdiff(task$feature_names, "date")) -measure = msr("regr.rmse") flrn = ForecastLearner$new(lrn("regr.ranger"), 1:12)$train(task) newdata = data.frame(passengers = rep(NA_real_, 3L)) prediction = flrn$predict_newdata(newdata, task) prediction prediction = flrn$predict(task, 142:144) prediction -prediction$score(measure) +prediction$score(msr("regr.rmse")) flrn = ForecastLearner$new(lrn("regr.ranger"), 1:12) resampling = rsmp("forecast_holdout", ratio = 0.9) rr = resample(task, flrn, resampling) -rr$aggregate(measure) +rr$aggregate(msr("regr.rmse")) resampling = rsmp("forecast_cv") rr = resample(task, flrn, resampling) -rr$aggregate(measure) +rr$aggregate(msr("regr.rmse")) ``` ### Multivariate @@ -87,7 +86,7 @@ graph = ppl("convert_types", "Date", "POSIXct") %>>% new_task = graph$train(task)[[1L]] flrn = ForecastLearner$new(lrn("regr.ranger"), 1:12)$train(new_task) prediction = flrn$predict(new_task, 142:144) -prediction$score(measure) +prediction$score(msr("regr.rmse")) row_ids = new_task$nrow - 0:2 flrn$predict_newdata(new_task$data(rows = row_ids), new_task) @@ -96,25 +95,29 @@ flrn$predict_newdata(newdata, new_task) resampling = rsmp("forecast_holdout", ratio = 0.9) rr = resample(new_task, flrn, resampling) -rr$aggregate(measure) +rr$aggregate(msr("regr.rmse")) resampling = rsmp("forecast_cv") rr = resample(new_task, flrn, resampling) -rr$aggregate(measure) +rr$aggregate(msr("regr.rmse")) ``` ### mlr3pipelines integration ```{r} +graph = ppl("convert_types", "Date", "POSIXct") %>>% + po("datefeatures", + param_vals = list(is_day = FALSE, hour = FALSE, minute = FALSE, second = FALSE) + ) flrn = ForecastLearner$new(lrn("regr.ranger"), 1:12) glrn = as_learner(graph %>>% flrn)$train(task) prediction = glrn$predict(task, 142:144) -prediction$score(measure) +prediction$score(msr("regr.rmse")) ``` ### Example: Forecasting electricity demand -```{r} +```{r, eval = FALSE} library(mlr3learners) library(mlr3pipelines) @@ -148,6 +151,7 @@ newdata = data.frame( temperature = 26, holiday = c(TRUE, rep(FALSE, 13L)) ) +# NOTE: glrn$predict_newdata() throws an incorrect dimension error prediction = glrn$predict_newdata(newdata, task) prediction ``` @@ -179,12 +183,12 @@ task = graph$train(task)[[1L]] flrn = ForecastLearner$new(lrn("regr.ranger"), 1:3)$train(task) prediction = flrn$predict(task, 4460:4464) -prediction$score(measure) +prediction$score(msr("regr.rmse")) flrn = ForecastLearner$new(lrn("regr.ranger"), 1:3) resampling = rsmp("forecast_holdout", ratio = 0.9) rr = resample(task, flrn, resampling) -rr$aggregate(measure) +rr$aggregate(msr("regr.rmse")) ``` ### Example: Global vs Local Forecasting @@ -217,7 +221,7 @@ tab = task$backend$data( setnames(tab, c("row_id", "year")) row_ids = tab[year >= 2015, row_id] prediction = flrn$predict(task, row_ids) -prediction$score(measure) +prediction$score(msr("regr.rmse")) # global forecasting task = tsibbledata::aus_livestock |> @@ -235,7 +239,7 @@ tab = task$backend$data( setnames(tab, c("row_id", "year", "state")) row_ids = tab[year >= 2015 & state == "Western Australia", row_id] prediction = flrn$predict(task, row_ids) -prediction$score(measure) +prediction$score(msr("regr.rmse")) ``` ### Example: generate new data @@ -278,3 +282,14 @@ task = tsk("airpassengers") newdata = generate_newdata(task, 12L, "month") newdata ``` + +### Example: WIP + +```{r, eval = FALSE} +task = tsk("airpassengers") +task$select(setdiff(task$feature_names, "date")) +learner = LearnerFcstARIMA$new()$train(task) +newdata = generate_newdata(task, 12L, "month") +learner$predict(task, 140:144) +learner$predict_newdata(newdata, task) +``` diff --git a/README.md b/README.md index f637839..a12ad79 100644 --- a/README.md +++ b/README.md @@ -38,39 +38,38 @@ library(mlr3learners) task = tsk("airpassengers") task$select(setdiff(task$feature_names, "date")) -measure = msr("regr.rmse") flrn = ForecastLearner$new(lrn("regr.ranger"), 1:12)$train(task) newdata = data.frame(passengers = rep(NA_real_, 3L)) prediction = flrn$predict_newdata(newdata, task) prediction #> for 3 observations: #> row_ids truth response -#> 1 NA 437.1837 -#> 2 NA 438.4461 -#> 3 NA 459.7955 +#> 1 NA 434.3284 +#> 2 NA 434.2805 +#> 3 NA 454.7944 prediction = flrn$predict(task, 142:144) prediction #> for 3 observations: #> row_ids truth response -#> 1 461 459.4802 -#> 2 390 410.1595 -#> 3 432 431.6638 -prediction$score(measure) +#> 1 461 455.1578 +#> 2 390 410.7651 +#> 3 432 430.3026 +prediction$score(msr("regr.rmse")) #> regr.rmse -#> 11.67375 +#> 12.49271 flrn = ForecastLearner$new(lrn("regr.ranger"), 1:12) resampling = rsmp("forecast_holdout", ratio = 0.9) rr = resample(task, flrn, resampling) -rr$aggregate(measure) +rr$aggregate(msr("regr.rmse")) #> regr.rmse -#> 47.67586 +#> 45.60276 resampling = rsmp("forecast_cv") rr = resample(task, flrn, resampling) -rr$aggregate(measure) +rr$aggregate(msr("regr.rmse")) #> regr.rmse -#> 24.16082 +#> 24.09385 ``` ### Multivariate @@ -88,47 +87,51 @@ graph = ppl("convert_types", "Date", "POSIXct") %>>% new_task = graph$train(task)[[1L]] flrn = ForecastLearner$new(lrn("regr.ranger"), 1:12)$train(new_task) prediction = flrn$predict(new_task, 142:144) -prediction$score(measure) +prediction$score(msr("regr.rmse")) #> regr.rmse -#> 13.4413 +#> 15.44297 row_ids = new_task$nrow - 0:2 flrn$predict_newdata(new_task$data(rows = row_ids), new_task) #> for 3 observations: #> row_ids truth response -#> 1 432 434.2353 -#> 2 390 431.9932 -#> 3 461 456.3106 +#> 1 432 432.9556 +#> 2 390 436.0987 +#> 3 461 457.6307 newdata = new_task$data(rows = row_ids, cols = new_task$feature_names) flrn$predict_newdata(newdata, new_task) #> for 3 observations: #> row_ids truth response -#> 1 NA 434.2353 -#> 2 NA 431.9932 -#> 3 NA 456.3106 +#> 1 NA 432.9556 +#> 2 NA 436.0987 +#> 3 NA 457.6307 resampling = rsmp("forecast_holdout", ratio = 0.9) rr = resample(new_task, flrn, resampling) -rr$aggregate(measure) +rr$aggregate(msr("regr.rmse")) #> regr.rmse -#> 48.93365 +#> 49.6094 resampling = rsmp("forecast_cv") rr = resample(new_task, flrn, resampling) -rr$aggregate(measure) +rr$aggregate(msr("regr.rmse")) #> regr.rmse -#> 25.04447 +#> 28.2292 ``` ### mlr3pipelines integration ``` r +graph = ppl("convert_types", "Date", "POSIXct") %>>% + po("datefeatures", + param_vals = list(is_day = FALSE, hour = FALSE, minute = FALSE, second = FALSE) + ) flrn = ForecastLearner$new(lrn("regr.ranger"), 1:12) glrn = as_learner(graph %>>% flrn)$train(task) prediction = glrn$predict(task, 142:144) -prediction$score(measure) +prediction$score(msr("regr.rmse")) #> regr.rmse -#> 13.36275 +#> 13.19144 ``` ### Example: Forecasting electricity demand @@ -167,17 +170,9 @@ newdata = data.frame( temperature = 26, holiday = c(TRUE, rep(FALSE, 13L)) ) +# NOTE: glrn$predict_newdata() throws an incorrect dimension error prediction = glrn$predict_newdata(newdata, task) prediction -#> for 14 observations: -#> row_ids truth response -#> 1 NA 186.8375 -#> 2 NA 191.9585 -#> 3 NA 184.0513 -#> --- --- --- -#> 12 NA 215.1711 -#> 13 NA 218.9118 -#> 14 NA 219.7826 ``` ### Global Forecasting @@ -207,16 +202,16 @@ task = graph$train(task)[[1L]] flrn = ForecastLearner$new(lrn("regr.ranger"), 1:3)$train(task) prediction = flrn$predict(task, 4460:4464) -prediction$score(measure) +prediction$score(msr("regr.rmse")) #> regr.rmse -#> 21451.45 +#> 22616.99 flrn = ForecastLearner$new(lrn("regr.ranger"), 1:3) resampling = rsmp("forecast_holdout", ratio = 0.9) rr = resample(task, flrn, resampling) -rr$aggregate(measure) +rr$aggregate(msr("regr.rmse")) #> regr.rmse -#> 77934.44 +#> 91193.86 ``` ### Example: Global vs Local Forecasting @@ -249,9 +244,9 @@ tab = task$backend$data( setnames(tab, c("row_id", "year")) row_ids = tab[year >= 2015, row_id] prediction = flrn$predict(task, row_ids) -prediction$score(measure) +prediction$score(msr("regr.rmse")) #> regr.rmse -#> 33069.46 +#> 33173.5 # global forecasting task = tsibbledata::aus_livestock |> @@ -269,9 +264,9 @@ tab = task$backend$data( setnames(tab, c("row_id", "year", "state")) row_ids = tab[year >= 2015 & state == "Western Australia", row_id] prediction = flrn$predict(task, row_ids) -prediction$score(measure) +prediction$score(msr("regr.rmse")) #> regr.rmse -#> 31124.47 +#> 31728.69 ``` ### Example: generate new data @@ -327,3 +322,14 @@ newdata #> 11 1961-11-01 NA #> 12 1961-12-01 NA ``` + +### Example: WIP + +``` r +task = tsk("airpassengers") +task$select(setdiff(task$feature_names, "date")) +learner = LearnerFcstARIMA$new()$train(task) +newdata = generate_newdata(task, 12L, "month") +learner$predict(task, 140:144) +learner$predict_newdata(newdata, task) +``` diff --git a/man-roxygen/learner.R b/man-roxygen/learner.R new file mode 100644 index 0000000..1fbfb4c --- /dev/null +++ b/man-roxygen/learner.R @@ -0,0 +1,14 @@ +#' @section Dictionary: +#' This [Learner] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr_learners] or with the associated sugar function [lrn()]: +#' ``` +#' mlr_learners$get("<%= id %>") +#' lrn("<%= id %>") +#' ``` +#' +#' @section Meta Information: +#' `r mlr3misc::rd_info(mlr3::lrn("<%= id %>"))` +#' @md +#' +#' @section Parameters: +#' `r mlr3misc::rd_info(mlr3::lrn("<%= id %>")$param_set)` +#' @md diff --git a/man-roxygen/seealso_learner.R b/man-roxygen/seealso_learner.R new file mode 100644 index 0000000..929079e --- /dev/null +++ b/man-roxygen/seealso_learner.R @@ -0,0 +1,17 @@ +#' @seealso +#' +#' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): +#' \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners} +#' * Package \CRANpkg{mlr3learners} for a solid collection of essential learners. +#' * Package [mlr3extralearners](https://github.com/mlr-org/mlr3extralearners) for more learners. +#' * [Dictionary][mlr3misc::Dictionary] of [Learners][Learner]: [mlr_learners] +#' * `as.data.table(mlr_learners)` for a table of available [Learners][Learner] in the running session (depending on the loaded packages). +#' * \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps. +#' * Package \CRANpkg{mlr3viz} for some generic visualizations. +#' * Extension packages for additional task types: +#' * \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. +#' * \CRANpkg{mlr3cluster} for unsupervised clustering. +#' * \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces} +#' for established default tuning spaces. +#' +#' @family Learner diff --git a/man/ForecastLearner.Rd b/man/ForecastLearner.Rd index 1b59c51..920288f 100644 --- a/man/ForecastLearner.Rd +++ b/man/ForecastLearner.Rd @@ -17,8 +17,11 @@ Forecast Learner \item{\code{learner}}{(\link{Learner})\cr The learner} -\item{\code{lag}}{(\code{integer(1)})\cr +\item{\code{lag}}{(\code{integer()})\cr The lag} + +\item{\code{trafo}}{(\link{Graph})\cr +The task transformation} } \if{html}{\out{}} } @@ -26,7 +29,6 @@ The lag} \subsection{Public methods}{ \itemize{ \item \href{#method-ForecastLearner-new}{\code{ForecastLearner$new()}} -\item \href{#method-ForecastLearner-predict}{\code{ForecastLearner$predict()}} \item \href{#method-ForecastLearner-predict_newdata}{\code{ForecastLearner$predict_newdata()}} \item \href{#method-ForecastLearner-clone}{\code{ForecastLearner$clone()}} } @@ -38,6 +40,7 @@ The lag}
  • mlr3::Learner$encapsulate()
  • mlr3::Learner$format()
  • mlr3::Learner$help()
  • +
  • mlr3::Learner$predict()
  • mlr3::Learner$print()
  • mlr3::Learner$reset()
  • mlr3::Learner$train()
  • @@ -50,7 +53,7 @@ The lag} \subsection{Method \code{new()}}{ Creates a new instance of this \link[R6:R6Class]{R6} class. \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{ForecastLearner$new(learner, lag)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{ForecastLearner$new(learner, lag, trafo = NULL)}\if{html}{\out{
    }} } \subsection{Arguments}{ @@ -60,64 +63,21 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. \item{\code{lag}}{(\code{integer(1)})\cr} -\item{\code{task}}{(\link{Task})\cr} -} -\if{html}{\out{}} -} -} -\if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ForecastLearner-predict}{}}} -\subsection{Method \code{predict()}}{ -Uses the information stored during \verb{$train()} in \verb{$state} to create a new \link{Prediction} -for a set of observations of the provided \code{task}. -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{ForecastLearner$predict(task, row_ids = NULL)}\if{html}{\out{
    }} -} - -\subsection{Arguments}{ -\if{html}{\out{
    }} -\describe{ -\item{\code{task}}{(\link{Task}).} +\item{\code{trafo}}{(\link{Graph})\cr} -\item{\code{row_ids}}{(\code{integer()})\cr -Vector of test indices as subset of \code{task$row_ids}. For a simple split -into training and test set, see \code{\link[=partition]{partition()}}.} +\item{\code{task}}{(\link{Task})\cr} } \if{html}{\out{
    }} } -\subsection{Returns}{ -\link{Prediction}. -} } \if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-ForecastLearner-predict_newdata}{}}} \subsection{Method \code{predict_newdata()}}{ -Uses the model fitted during \verb{$train()} to create a new \link{Prediction} based on the forecast horizon \code{n}. \subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{ForecastLearner$predict_newdata(newdata, task)}\if{html}{\out{
    }} +\if{html}{\out{
    }}\preformatted{ForecastLearner$predict_newdata(newdata, task = NULL)}\if{html}{\out{
    }} } -\subsection{Arguments}{ -\if{html}{\out{
    }} -\describe{ -\item{\code{newdata}}{(any object supported by \code{\link[=as_data_backend]{as_data_backend()}})\cr -New data to predict on. -All data formats convertible by \code{\link[=as_data_backend]{as_data_backend()}} are supported, e.g. -\code{data.frame()} or \link{DataBackend}. -If a \link{DataBackend} is provided as \code{newdata}, the row ids are preserved, -otherwise they are set to to the sequence \code{1:nrow(newdata)}.} - -\item{\code{task}}{(\link{Task}).} - -\item{\code{n}}{(\code{integer(1)}).} -} -\if{html}{\out{
    }} -} -\subsection{Returns}{ -\link{Prediction}. -} } \if{html}{\out{
    }} \if{html}{\out{}} diff --git a/man/LearnerFcst.Rd b/man/LearnerFcst.Rd new file mode 100644 index 0000000..bfd751a --- /dev/null +++ b/man/LearnerFcst.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/LearnerFcst.R +\name{LearnerFcst} +\alias{LearnerFcst} +\title{Forecast Learner} +\description{ +Forecast Learner + +Forecast Learner +} +\section{Super class}{ +\code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{LearnerFcst} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-LearnerFcst-new}{\code{LearnerFcst$new()}} +\item \href{#method-LearnerFcst-clone}{\code{LearnerFcst$clone()}} +} +} +\if{html}{\out{ +
    Inherited methods + +
    +}} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerFcst-new}{}}} +\subsection{Method \code{new()}}{ +Creates a new instance of this \link[R6:R6Class]{R6} class. +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{LearnerFcst$new( + id, + param_set = ps(), + predict_types = "response", + feature_types = character(), + properties = character(), + data_formats, + packages = character(), + label = NA_character_, + man = NA_character_ +)}\if{html}{\out{
    }} +} + +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerFcst-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{LearnerFcst$clone(deep = FALSE)}\if{html}{\out{
    }} +} + +\subsection{Arguments}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
    }} +} +} +} diff --git a/man/TaskFcst.Rd b/man/TaskFcst.Rd index 11c0a19..520e1b7 100644 --- a/man/TaskFcst.Rd +++ b/man/TaskFcst.Rd @@ -58,7 +58,6 @@ Key of data.} \subsection{Public methods}{ \itemize{ \item \href{#method-TaskFcst-new}{\code{TaskFcst$new()}} -\item \href{#method-TaskFcst-truth}{\code{TaskFcst$truth()}} \item \href{#method-TaskFcst-clone}{\code{TaskFcst$clone()}} } } @@ -84,6 +83,7 @@ Key of data.}
  • mlr3::Task$set_col_roles()
  • mlr3::Task$set_levels()
  • mlr3::Task$set_row_roles()
  • +
  • mlr3::TaskRegr$truth()
  • }} @@ -130,28 +130,6 @@ via \code{\link[=convert_task]{convert_task()}}.} } } \if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TaskFcst-truth}{}}} -\subsection{Method \code{truth()}}{ -True response for specified \code{row_ids}. Format depends on the task type. -Defaults to all rows with role "use". -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{TaskFcst$truth(rows = NULL)}\if{html}{\out{
    }} -} - -\subsection{Arguments}{ -\if{html}{\out{
    }} -\describe{ -\item{\code{rows}}{(positive \code{integer()})\cr -Vector or row indices.} -} -\if{html}{\out{
    }} -} -\subsection{Returns}{ -\code{numeric()}. -} -} -\if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TaskFcst-clone}{}}} \subsection{Method \code{clone()}}{ diff --git a/man/mlr_learners_fcst.arima.Rd b/man/mlr_learners_fcst.arima.Rd new file mode 100644 index 0000000..bb3d9ed --- /dev/null +++ b/man/mlr_learners_fcst.arima.Rd @@ -0,0 +1,116 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/LearnerARIMA.R +\name{mlr_learners_fcst.arima} +\alias{mlr_learners_fcst.arima} +\alias{LearnerFcstARIMA} +\title{ARIMA} +\description{ +... +} +\section{Dictionary}{ + +This \link{Learner} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} \link{mlr_learners} or with the associated sugar function \code{\link[=lrn]{lrn()}}: + +\if{html}{\out{
    }}\preformatted{mlr_learners$get("fcst.arima") +lrn("fcst.arima") +}\if{html}{\out{
    }} +} + +\section{Meta Information}{ + +\itemize{ +\item Task type: \dQuote{regr} +\item Predict Types: \dQuote{response} +\item Feature Types: \dQuote{logical}, \dQuote{integer}, \dQuote{numeric} +\item Required Packages: \CRANpkg{mlr3}, \CRANpkg{mlr3learners}, \CRANpkg{forecast} +} +} + +\section{Parameters}{ +\tabular{llll}{ + Id \tab Type \tab Default \tab Levels \cr + order \tab untyped \tab c(0, 0, 0) \tab \cr + seasonal \tab untyped \tab c(0, 0, 0) \tab \cr + include.mean \tab logical \tab TRUE \tab TRUE, FALSE \cr + include.drift \tab logical \tab FALSE \tab TRUE, FALSE \cr + biasadj \tab logical \tab FALSE \tab TRUE, FALSE \cr + method \tab character \tab CSS-ML \tab CSS-ML, ML, CSS \cr +} +} + +\references{ +... +} +\seealso{ +\itemize{ +\item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}: +\url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners} +\item Package \CRANpkg{mlr3learners} for a solid collection of essential learners. +\item Package \href{https://github.com/mlr-org/mlr3extralearners}{mlr3extralearners} for more learners. +\item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Learner]{Learners}: \link{mlr_learners} +\item \code{as.data.table(mlr_learners)} for a table of available \link[=Learner]{Learners} in the running session (depending on the loaded packages). +\item \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps. +\item Package \CRANpkg{mlr3viz} for some generic visualizations. +\item Extension packages for additional task types: +\itemize{ +\item \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. +\item \CRANpkg{mlr3cluster} for unsupervised clustering. +} +\item \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces} +for established default tuning spaces. +} +} +\concept{Learner} +\section{Super classes}{ +\code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{\link[mlr3:LearnerRegr]{mlr3::LearnerRegr}} -> \code{LearnerFcstARIMA} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-LearnerFcstARIMA-new}{\code{LearnerFcstARIMA$new()}} +\item \href{#method-LearnerFcstARIMA-clone}{\code{LearnerFcstARIMA$clone()}} +} +} +\if{html}{\out{ +
    Inherited methods + +
    +}} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerFcstARIMA-new}{}}} +\subsection{Method \code{new()}}{ +Creates a new instance of this \link[R6:R6Class]{R6} class. +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{LearnerFcstARIMA$new()}\if{html}{\out{
    }} +} + +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerFcstARIMA-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{LearnerFcstARIMA$clone(deep = FALSE)}\if{html}{\out{
    }} +} + +\subsection{Arguments}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
    }} +} +} +}