ARIMA - Add support for missing observations and padding (rapidsai#4058)

This PR allows support for missing observations and padding at the start for variable-length batch. Example: ![missing_obs_0](https://user-images.githubusercontent.com/17441062/125832072-1ff903c9-088e-4d77-9b17-be365890d982.png) Note: I had to change ARIMA tests because I used a different method than statsmodels (which is used as a reference in tests) to compute the initial parameter estimation. They cut all missing observations for their initial least-square estimation, and I decided to fill them with naive replacements instead, so I keep the temporal relationships in the data and have a much better initial estimate and often a better fit in the end, according to some MASE measurements I made. So I updated the integration test to use the MASE and pass if we are approximately the same _or better_ than statsmodels. Authors: - Louis Sugy (https://github.com/Nyrio) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Tamas Bela Feher (https://github.com/tfeher) - Dante Gama Dessavre (https://github.com/dantegd) - Ray Douglass (https://github.com/raydouglass) URL: rapidsai#4058
vimarsh6739 · Sep 22, 2021 · 816b782 · 816b782
1 parent dabad13
commit 816b782
Show file tree

Hide file tree

Showing 22 changed files with 1,566 additions and 396 deletions.
diff --git a/ci/docs/build.sh b/ci/docs/build.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 #################################
 # cuML Docs build script for CI #
 #################################

diff --git a/ci/gpu/test-notebooks.sh b/ci/gpu/test-notebooks.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 NOTEBOOKS_DIR="$WORKSPACE/notebooks"
 NBTEST="$WORKSPACE/ci/utils/nbtest.sh"

diff --git a/ci/utils/nbtest.sh b/ci/utils/nbtest.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 MAGIC_OVERRIDE_CODE="
 def my_run_line_magic(*args, **kwargs):

diff --git a/cpp/bench/sg/arima_loglikelihood.cu b/cpp/bench/sg/arima_loglikelihood.cu
@@ -44,7 +44,6 @@ class ArimaLoglikelihood : public TsFixtureRandom<DataT> {
       order(p.order),
       param(0, rmm::cuda_stream_default),
       loglike(0, rmm::cuda_stream_default),
-      residual(0, rmm::cuda_stream_default),
       temp_mem(0, rmm::cuda_stream_default)
   {
   }
@@ -85,7 +84,6 @@ class ArimaLoglikelihood : public TsFixtureRandom<DataT> {
                       order,
                       param.data(),
                       loglike.data(),
-                      residual.data(),
                       true,
                       false);
     });
@@ -101,9 +99,8 @@ class ArimaLoglikelihood : public TsFixtureRandom<DataT> {
     // Buffer for the model parameters
     param.resize(order.complexity() * this->params.batch_size, stream);
 
-    // Buffers for the log-likelihood and residuals
+    // Buffers for the log-likelihood
     loglike.resize(this->params.batch_size, stream);
-    residual.resize(this->params.batch_size * this->params.n_obs, stream);
 
     // Temporary memory
     size_t temp_buf_size =
@@ -117,7 +114,6 @@ class ArimaLoglikelihood : public TsFixtureRandom<DataT> {
   ARIMAOrder order;
   rmm::device_uvector<DataT> param;
   rmm::device_uvector<DataT> loglike;
-  rmm::device_uvector<DataT> residual;
   rmm::device_uvector<char> temp_mem;
 };
 

diff --git a/cpp/include/cuml/tsa/arima_common.h b/cpp/include/cuml/tsa/arima_common.h
@@ -200,9 +200,9 @@ struct ARIMAMemory {
   T *params_mu, *params_ar, *params_ma, *params_sar, *params_sma, *params_sigma2, *Tparams_mu,
     *Tparams_ar, *Tparams_ma, *Tparams_sar, *Tparams_sma, *Tparams_sigma2, *d_params, *d_Tparams,
     *Z_dense, *R_dense, *T_dense, *RQR_dense, *RQ_dense, *P_dense, *alpha_dense, *ImT_dense,
-    *ImT_inv_dense, *v_tmp_dense, *m_tmp_dense, *K_dense, *TP_dense, *vs, *y_diff, *loglike,
-    *loglike_base, *loglike_pert, *x_pert, *F_buffer, *sumLogF_buffer, *sigma2_buffer,
-    *I_m_AxA_dense, *I_m_AxA_inv_dense, *Ts_dense, *RQRs_dense, *Ps_dense;
+    *ImT_inv_dense, *v_tmp_dense, *m_tmp_dense, *K_dense, *TP_dense, *pred, *y_diff, *loglike,
+    *loglike_base, *loglike_pert, *x_pert, *sigma2_buffer, *I_m_AxA_dense, *I_m_AxA_inv_dense,
+    *Ts_dense, *RQRs_dense, *Ps_dense;
   T **Z_batches, **R_batches, **T_batches, **RQR_batches, **RQ_batches, **P_batches,
     **alpha_batches, **ImT_batches, **ImT_inv_batches, **v_tmp_batches, **m_tmp_batches,
     **K_batches, **TP_batches, **I_m_AxA_batches, **I_m_AxA_inv_batches, **Ts_batches,
@@ -279,11 +279,9 @@ struct ARIMAMemory {
     append_buffer<assign>(K_batches, batch_size);
     append_buffer<assign>(TP_dense, rd * rd * batch_size);
     append_buffer<assign>(TP_batches, batch_size);
-    append_buffer<assign>(F_buffer, n_obs * batch_size);
-    append_buffer<assign>(sumLogF_buffer, batch_size);
     append_buffer<assign>(sigma2_buffer, batch_size);
 
-    append_buffer<assign>(vs, n_obs * batch_size);
+    append_buffer<assign>(pred, n_obs * batch_size);
     append_buffer<assign>(y_diff, n_obs * batch_size);
     append_buffer<assign>(loglike, batch_size);
     append_buffer<assign>(loglike_base, batch_size);

diff --git a/cpp/include/cuml/tsa/batched_arima.hpp b/cpp/include/cuml/tsa/batched_arima.hpp
@@ -56,6 +56,15 @@ void unpack(raft::handle_t& handle,
             int batch_size,
             const double* param_vec);
 
+/**
+ * Detect missing observations in a time series
+ *
+ * @param[in]  handle     cuML handle
+ * @param[in]  d_y        Time series
+ * @param[in]  n_elem     Total number of elements in the dataset
+ */
+bool detect_missing(raft::handle_t& handle, const double* d_y, int n_elem);
+
 /**
  * Compute the differenced series (seasonal and/or non-seasonal differences)
  *
@@ -87,9 +96,6 @@ void batched_diff(raft::handle_t& handle,
  * @param[in]  d_params     Parameters to evaluate grouped by series:
  *                          [mu0, ar.., ma.., mu1, ..] (device)
  * @param[out] loglike      Log-Likelihood of the model per series
- * @param[out] d_vs         The residual between model and original signal.
- *                          shape = (n_obs-d-s*D, batch_size) (device)
- *                          Note: no output when using CSS estimation
  * @param[in]  trans        Run `jones_transform` on params.
  * @param[in]  host_loglike Whether loglike is a host pointer
  * @param[in]  method       Whether to use sum-of-squares or Kalman filter
@@ -110,7 +116,6 @@ void batched_loglike(raft::handle_t& handle,
                      const ARIMAOrder& order,
                      const double* d_params,
                      double* loglike,
-                     double* d_vs,
                      bool trans           = true,
                      bool host_loglike    = true,
                      LoglikeMethod method = MLE,
@@ -137,9 +142,6 @@ void batched_loglike(raft::handle_t& handle,
  * @param[in]  order        ARIMA hyper-parameters
  * @param[in]  params       ARIMA parameters (device)
  * @param[out] loglike      Log-Likelihood of the model per series
- * @param[out] d_vs         The residual between model and original signal.
- *                          shape = (n_obs-d-s*D, batch_size) (device)
- *                          Note: no output when using CSS estimation
  * @param[in]  trans        Run `jones_transform` on params.
  * @param[in]  host_loglike Whether loglike is a host pointer
  * @param[in]  method       Whether to use sum-of-squares or Kalman filter
@@ -160,7 +162,6 @@ void batched_loglike(raft::handle_t& handle,
                      const ARIMAOrder& order,
                      const ARIMAParams<double>& params,
                      double* loglike,
-                     double* d_vs,
                      bool trans           = true,
                      bool host_loglike    = true,
                      LoglikeMethod method = MLE,
@@ -277,12 +278,14 @@ void information_criterion(raft::handle_t& handle,
  * @param[in]  n_obs       Number of samples per time series
  *                         (all series must be identical)
  * @param[in]  order       ARIMA hyper-parameters
+ * @param[in]  missing     Are there missing observations?
  */
 void estimate_x0(raft::handle_t& handle,
                  ARIMAParams<double>& params,
                  const double* d_y,
                  int batch_size,
                  int n_obs,
-                 const ARIMAOrder& order);
+                 const ARIMAOrder& order,
+                 bool missing);
 
 }  // namespace ML
diff --git a/cpp/include/cuml/tsa/batched_kalman.hpp b/cpp/include/cuml/tsa/batched_kalman.hpp
@@ -37,8 +37,7 @@ namespace ML {
  * @param[in]  order         ARIMA hyper-parameters
  * @param[in]  batch_size    Number of series making up the batch
  * @param[out] d_loglike     Resulting log-likelihood (per series) (device)
- * @param[out] d_vs          Residual between the prediction and the
- *                           original series.
+ * @param[out] d_pred        Predictions
  *                           shape=(nobs-d-s*D, batch_size) (device)
  * @param[in]  fc_steps      Number of steps to forecast
  * @param[in]  d_fc          Array to store the forecast
@@ -55,7 +54,7 @@ void batched_kalman_filter(raft::handle_t& handle,
                            const ARIMAOrder& order,
                            int batch_size,
                            double* d_loglike,
-                           double* d_vs,
+                           double* d_pred,
                            int fc_steps    = 0,
                            double* d_fc    = nullptr,
                            double level    = 0,

diff --git a/cpp/src/arima/batched_arima.cu b/cpp/src/arima/batched_arima.cu
@@ -23,6 +23,7 @@
 #include <thrust/fill.h>
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
+#include <thrust/logical.h>
 
 #include <cuml/tsa/batched_arima.hpp>
 #include <cuml/tsa/batched_kalman.hpp>
@@ -36,6 +37,7 @@
 #include <raft/linalg/matrix_vector_op.cuh>
 #include <rmm/device_uvector.hpp>
 #include <timeSeries/arima_helpers.cuh>
+#include <timeSeries/fillna.cuh>
 
 namespace ML {
 
@@ -71,6 +73,20 @@ void batched_diff(raft::handle_t& handle,
     d_y_diff, d_y, batch_size, n_obs, order.d, order.D, order.s, stream);
 }
 
+template <typename T>
+struct is_missing {
+  typedef T argument_type;
+  typedef T result_type;
+
+  __thrust_exec_check_disable__ __device__ const T operator()(const T& x) const { return isnan(x); }
+};  // end is_missing
+
+bool detect_missing(raft::handle_t& handle, const double* d_y, int n_elem)
+{
+  return thrust::any_of(
+    thrust::cuda::par.on(handle.get_stream()), d_y, d_y + n_elem, is_missing<double>());
+}
+
 void predict(raft::handle_t& handle,
              const ARIMAMemory<double>& arima_mem,
              const double* d_y,
@@ -108,7 +124,7 @@ void predict(raft::handle_t& handle,
     d_y_kf   = d_y;
   }
 
-  double* d_vs = arima_mem.vs;
+  double* d_pred = arima_mem.pred;
 
   // Create temporary array for the forecasts
   int num_steps = std::max(end - n_obs, 0);
@@ -126,7 +142,6 @@ void predict(raft::handle_t& handle,
                   order_after_prep,
                   params,
                   loglike.data(),
-                  d_vs,
                   false,
                   true,
                   MLE,
@@ -144,22 +159,33 @@ void predict(raft::handle_t& handle,
   // In-sample prediction
   //
 
-  int res_offset = diff ? order.d + order.s * order.D : 0;
-  int p_start    = std::max(start, res_offset);
-  int p_end      = std::min(n_obs, end);
-
   // The prediction loop starts by filling undefined predictions with NaN,
   // then computes the predictions from the observations and residuals
   if (start < n_obs) {
+    int res_offset = diff ? order.d + order.s * order.D : 0;
+    int p_start    = std::max(start, res_offset);
+    int p_end      = std::min(n_obs, end);
+    int dD         = diff ? order.d + order.D : 0;
+    int period1    = order.d ? 1 : order.s;
+    int period2    = order.d == 2 ? 1 : order.s;
+
     thrust::for_each(
       thrust::cuda::par.on(stream), counting, counting + batch_size, [=] __device__(int bid) {
         d_y_p[0] = 0.0;
         for (int i = 0; i < res_offset - start; i++) {
           d_y_p[bid * predict_ld + i] = nan("");
         }
         for (int i = p_start; i < p_end; i++) {
-          d_y_p[bid * predict_ld + i - start] =
-            d_y[bid * n_obs + i] - d_vs[bid * n_obs_kf + i - res_offset];
+          if (dD == 0) {
+            d_y_p[bid * predict_ld + i - start] = d_pred[bid * n_obs + i];
+          } else if (dD == 1) {
+            d_y_p[bid * predict_ld + i - start] =
+              d_y[bid * n_obs + i - period1] + d_pred[bid * n_obs_kf + i - res_offset];
+          } else {
+            d_y_p[bid * predict_ld + i - start] =
+              d_y[bid * n_obs + i - period1] + d_y[bid * n_obs + i - period2] -
+              d_y[bid * n_obs + i - period1 - period2] + d_pred[bid * n_obs_kf + i - res_offset];
+          }
         }
       });
   }
@@ -343,7 +369,6 @@ void batched_loglike(raft::handle_t& handle,
                      const ARIMAOrder& order,
                      const ARIMAParams<double>& params,
                      double* loglike,
-                     double* d_vs,
                      bool trans,
                      bool host_loglike,
                      LoglikeMethod method,
@@ -358,6 +383,8 @@ void batched_loglike(raft::handle_t& handle,
 
   auto stream = handle.get_stream();
 
+  double* d_pred = arima_mem.pred;
+
   ARIMAParams<double> Tparams = {arima_mem.Tparams_mu,
                                  arima_mem.Tparams_ar,
                                  arima_mem.Tparams_ma,
@@ -396,7 +423,7 @@ void batched_loglike(raft::handle_t& handle,
                           order,
                           batch_size,
                           d_loglike,
-                          d_vs,
+                          d_pred,
                           fc_steps,
                           d_fc,
                           level,
@@ -419,7 +446,6 @@ void batched_loglike(raft::handle_t& handle,
                      const ARIMAOrder& order,
                      const double* d_params,
                      double* loglike,
-                     double* d_vs,
                      bool trans,
                      bool host_loglike,
                      LoglikeMethod method,
@@ -452,7 +478,6 @@ void batched_loglike(raft::handle_t& handle,
                   order,
                   params,
                   loglike,
-                  d_vs,
                   trans,
                   host_loglike,
                   method,
@@ -488,7 +513,6 @@ void batched_loglike_grad(raft::handle_t& handle,
   double* d_x_pert = arima_mem.x_pert;
   raft::copy(d_x_pert, d_x, N * batch_size, stream);
 
-  double* d_vs      = arima_mem.vs;
   double* d_ll_base = arima_mem.loglike_base;
   double* d_ll_pert = arima_mem.loglike_pert;
 
@@ -501,7 +525,6 @@ void batched_loglike_grad(raft::handle_t& handle,
                   order,
                   d_x,
                   d_ll_base,
-                  d_vs,
                   trans,
                   false,
                   method,
@@ -523,7 +546,6 @@ void batched_loglike_grad(raft::handle_t& handle,
                     order,
                     d_x_pert,
                     d_ll_pert,
-                    d_vs,
                     trans,
                     false,
                     method,
@@ -558,11 +580,9 @@ void information_criterion(raft::handle_t& handle,
   ML::PUSH_RANGE(__func__);
   auto stream = handle.get_stream();
 
-  double* d_vs = arima_mem.vs;
-
   /* Compute log-likelihood in d_ic */
   batched_loglike(
-    handle, arima_mem, d_y, batch_size, n_obs, order, params, d_ic, d_vs, false, false, MLE);
+    handle, arima_mem, d_y, batch_size, n_obs, order, params, d_ic, false, false, MLE);
 
   /* Compute information criterion from log-likelihood and base term */
   MLCommon::Metrics::Batched::information_criterion(
@@ -835,18 +855,33 @@ void estimate_x0(raft::handle_t& handle,
                  const double* d_y,
                  int batch_size,
                  int n_obs,
-                 const ARIMAOrder& order)
+                 const ARIMAOrder& order,
+                 bool missing)
 {
   ML::PUSH_RANGE(__func__);
   const auto& handle_impl = handle;
   auto stream             = handle_impl.get_stream();
   auto cublas_handle      = handle_impl.get_cublas_handle();
 
+  // Least squares can't deal with missing values: create copy with naive
+  // replacements for missing values
+  const double* d_y_no_missing;
+  rmm::device_uvector<double> y_no_missing(0, stream);
+  if (missing) {
+    y_no_missing.resize(n_obs * batch_size, stream);
+    d_y_no_missing = y_no_missing.data();
+
+    raft::copy(y_no_missing.data(), d_y, n_obs * batch_size, stream);
+    MLCommon::TimeSeries::fillna(y_no_missing.data(), batch_size, n_obs, stream);
+  } else {
+    d_y_no_missing = d_y;
+  }
+
   // Difference if necessary, copy otherwise
   MLCommon::LinAlg::Batched::Matrix<double> bm_yd(
     n_obs - order.d - order.s * order.D, 1, batch_size, cublas_handle, stream, false);
   MLCommon::TimeSeries::prepare_data(
-    bm_yd.raw_data(), d_y, batch_size, n_obs, order.d, order.D, order.s, stream);
+    bm_yd.raw_data(), d_y_no_missing, batch_size, n_obs, order.d, order.D, order.s, stream);
 
   // Do the computation of the initial parameters
   _start_params(handle, params, bm_yd, order);