From d62d0320528fe46b6d75b4339b0423af63238b95 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Fri, 30 Jul 2021 20:16:59 +0530
Subject: [PATCH 01/42] update docsstrings and add std::round

---
 cpp/include/cuml/tree/decisiontree.hpp        |   4 +-
 .../batched-levelalgo/builder.cuh             |   4 +-
 python/cuml/benchmark/ci_benchmark.py         |   1 -
 .../dask/ensemble/randomforestclassifier.py   |  34 ++----
 .../dask/ensemble/randomforestregressor.py    |  36 ++----
 python/cuml/ensemble/randomforest_common.pyx  |  16 +--
 .../cuml/ensemble/randomforestclassifier.pyx  | 105 +++++++++---------
 .../cuml/ensemble/randomforestregressor.pyx   |  84 ++++++--------
 python/cuml/test/test_metrics.py              |   2 +-
 9 files changed, 114 insertions(+), 172 deletions(-)

diff --git a/cpp/include/cuml/tree/decisiontree.hpp b/cpp/include/cuml/tree/decisiontree.hpp
index 54020c45ec..22738cc537 100644
--- a/cpp/include/cuml/tree/decisiontree.hpp
+++ b/cpp/include/cuml/tree/decisiontree.hpp
@@ -29,11 +29,11 @@ namespace DT {
 
 struct DecisionTreeParams {
   /**
-   * Maximum tree depth. Unlimited (e.g., until leaves are pure), if -1.
+   * Maximum tree depth. Unlimited (e.g., until leaves are pure), If `-1`.
    */
   int max_depth;
   /**
-   * Maximum leaf nodes per tree. Soft constraint. Unlimited, if -1.
+   * Maximum leaf nodes per tree. Soft constraint. Unlimited, If `-1`.
    */
   int max_leaves;
   /**
diff --git a/cpp/src/decisiontree/batched-levelalgo/builder.cuh b/cpp/src/decisiontree/batched-levelalgo/builder.cuh
index a46ee558f2..53f5787102 100644
--- a/cpp/src/decisiontree/batched-levelalgo/builder.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/builder.cuh
@@ -83,7 +83,7 @@ void grow_tree(std::shared_ptr<raft::mr::device::allocator> d_allocator,
                         nrows,
                         ncols,
                         n_sampled_rows,
-                        IdxT(params.max_features * ncols),
+                        IdxT(std::round(params.max_features * ncols)),
                         rowids,
                         unique_labels,
                         quantiles);
@@ -120,8 +120,6 @@ void grow_tree(std::shared_ptr<raft::mr::device::allocator> d_allocator,
  *                            [on device] [col-major]
  *                            [dim = params.n_bins x ncols]
  * @param[in]  rowids         sampled rows [on device] [len = n_sampled_rows]
- * @param[in]  colids         sampled cols [on device]
- *                            [len = params.max_features * ncols]
  * @param[in]  n_sampled_rows number of sub-sampled rows
  * @param[in]  unique_labels  number of classes (meaningful only for
  *                            classification)
diff --git a/python/cuml/benchmark/ci_benchmark.py b/python/cuml/benchmark/ci_benchmark.py
index 90fa9dce89..a4f0f908dc 100644
--- a/python/cuml/benchmark/ci_benchmark.py
+++ b/python/cuml/benchmark/ci_benchmark.py
@@ -173,7 +173,6 @@ def make_bench_configs(long_config):
                 bench_dims=default_dims,
                 cuml_param_override_list=[
                     {"n_bins": [8, 32]},
-                    {"split_algo": [0, 1]},
                     {"max_features": ['sqrt', 1.0]},
                 ],
             )
diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py
index 692d9e3a0e..19bce0795e 100755
--- a/python/cuml/dask/ensemble/randomforestclassifier.py
+++ b/python/cuml/dask/ensemble/randomforestclassifier.py
@@ -75,26 +75,21 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
         handles in several streams.
         If it is None, a new one is created.
     split_criterion : The criterion used to split nodes.
-        0 for GINI, 1 for ENTROPY, 4 for CRITERION_END.
-        2 and 3 not valid for classification
-        (default = 0)
-    split_algo : 0 for HIST and 1 for GLOBAL_QUANTILE (default = 1)
-        the algorithm to determine how nodes are split in the tree.
-    split_criterion : The criterion used to split nodes.
-        0 for GINI, 1 for ENTROPY, 4 for CRITERION_END.
+        0 for Gini impurity, 1 for Entropy (Information Gain),
+        3 for CRITERION_END.
         2 and 3 not valid for classification
         (default = 0)
     bootstrap : boolean (default = True)
         Control bootstrapping.
         If set, each tree in the forest is built
         on a bootstrapped sample with replacement.
-        If False, the whole dataset is used to build each tree.
+        If `False`, the whole dataset is used to build each tree.
     max_samples : float (default = 1.0)
         Ratio of dataset rows used while fitting each tree.
     max_depth : int (default = -1)
-        Maximum tree depth. Unlimited (i.e, until leaves are pure), if -1.
+        Maximum tree depth. Unlimited (i.e, until leaves are pure), If `-1`.
     max_leaves : int (default = -1)
-        Maximum leaf nodes per tree. Soft constraint. Unlimited, if -1.
+        Maximum leaf nodes per tree. Soft constraint. Unlimited, If `-1`.
     max_features : float (default = 'auto')
         Ratio of number of features (columns) to consider
         per node split.
@@ -102,27 +97,16 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
         Number of bins used by the split algorithm.
     min_samples_leaf : int or float (default = 1)
         The minimum number of samples (rows) in each leaf node.
-        If int, then min_samples_leaf represents the minimum number.
+        If type `int`, then `min_samples_leaf` represents the minimum number.
         If float, then min_samples_leaf represents a fraction and
-        ceil(min_samples_leaf * n_rows) is the minimum number of samples
+        `ceil(min_samples_leaf * n_rows)` is the minimum number of samples
         for each leaf node.
     min_samples_split : int or float (default = 2)
         The minimum number of samples required to split an internal node.
-        If int, then min_samples_split represents the minimum number.
-        If float, then min_samples_split represents a fraction and
+        If type `int`, then min_samples_split represents the minimum number.
+        If type `float`, then `min_samples_split` represents a fraction and
         ceil(min_samples_split * n_rows) is the minimum number of samples
         for each split.
-    quantile_per_tree : boolean (default = False)
-        Whether quantile is computed for individual RF trees.
-        Only relevant for GLOBAL_QUANTILE split_algo.
-    use_experimental_backend : boolean (default = True)
-        If set to true and the following conditions are also met, a new
-        experimental backend for decision tree training will be used. The
-        new backend is available only if `split_algo = 1` (GLOBAL_QUANTILE)
-        and `quantile_per_tree = False` (No per tree quantile computation).
-        The new backend is considered stable for classification tasks but
-        not yet for regression tasks. The RAPIDS team is continuing
-        optimization and evaluation of the new backend for regression tasks.
     n_streams : int (default = 4 )
         Number of parallel streams used for forest building
     workers : optional, list of strings
diff --git a/python/cuml/dask/ensemble/randomforestregressor.py b/python/cuml/dask/ensemble/randomforestregressor.py
index 3b21810fb4..e1a1d43676 100755
--- a/python/cuml/dask/ensemble/randomforestregressor.py
+++ b/python/cuml/dask/ensemble/randomforestregressor.py
@@ -68,25 +68,22 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
         run different models concurrently in different streams by creating
         handles in several streams.
         If it is None, a new one is created.
-    split_algo : int (default = 1)
-        0 for HIST, 1 for GLOBAL_QUANTILE
-        The type of algorithm to be used to create the trees.
     split_criterion : int (default = 2)
         The criterion used to split nodes.
-        0 for GINI, 1 for ENTROPY,
-        2 for MSE, 3 for MAE and 4 for CRITERION_END.
+        0 for Gini impurity, 1 for Entropy (Information Gain),
+        2 for MSE (Mean Squared Error), and 3 for CRITERION_END.
         0 and 1 not valid for regression
     bootstrap : boolean (default = True)
         Control bootstrapping.
         If set, each tree in the forest is built
         on a bootstrapped sample with replacement.
-        If False, the whole dataset is used to build each tree.
+        If `False`, the whole dataset is used to build each tree.
     max_samples : float (default = 1.0)
         Ratio of dataset rows used while fitting each tree.
     max_depth : int (default = -1)
-        Maximum tree depth. Unlimited (i.e, until leaves are pure), if -1.
+        Maximum tree depth. Unlimited (i.e, until leaves are pure), If `-1`.
     max_leaves : int (default = -1)
-        Maximum leaf nodes per tree. Soft constraint. Unlimited, if -1.
+        Maximum leaf nodes per tree. Soft constraint. Unlimited, If `-1`.
     max_features : int or float or string or None (default = 'auto')
         Ratio of number of features (columns) to consider
         per node split.
@@ -100,15 +97,15 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
         Number of bins used by the split algorithm.
     min_samples_leaf : int or float (default = 1)
         The minimum number of samples (rows) in each leaf node.
-        If int, then min_samples_leaf represents the minimum number.
-        If float, then min_samples_leaf represents a fraction and
-        ceil(min_samples_leaf * n_rows) is the minimum number of samples
+        If type `int`, then `min_samples_leaf` represents the minimum number.
+        If `float`, then `min_samples_leaf` represents a fraction and
+        `ceil(min_samples_leaf * n_rows)` is the minimum number of samples
         for each leaf node.
     min_samples_split : int or float (default = 2)
         The minimum number of samples required to split an internal node.
-        If int, then min_samples_split represents the minimum number.
-        If float, then min_samples_split represents a fraction and
-        ceil(min_samples_split * n_rows) is the minimum number of samples
+        If type `int`, then `min_samples_split` represents the minimum number.
+        If type `float`, then `min_samples_split` represents a fraction and
+        `ceil(min_samples_split * n_rows)` is the minimum number of samples
         for each split.
     accuracy_metric : string (default = 'r2')
         Decides the metric used to evaluate the performance of the model.
@@ -118,17 +115,6 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
         for median of abs error : 'median_ae'
         for mean of abs error : 'mean_ae'
         for mean square error' : 'mse'
-    quantile_per_tree : boolean (default = False)
-        Whether quantile is computed for individual RF trees.
-        Only relevant for GLOBAL_QUANTILE split_algo.
-    use_experimental_backend : boolean (default = False)
-        If set to true and the following conditions are also met, a new
-        experimental backend for decision tree training will be used. The
-        new backend is available only if `split_algo = 1` (GLOBAL_QUANTILE)
-        and `quantile_per_tree = False` (No per tree quantile computation).
-        The new backend is considered stable for classification tasks but
-        not yet for regression tasks. The RAPIDS team is continuing
-        optimization and evaluation of the new backend for regression tasks.
     n_streams : int (default = 4 )
         Number of parallel streams used for forest building
     workers : optional, list of strings
diff --git a/python/cuml/ensemble/randomforest_common.pyx b/python/cuml/ensemble/randomforest_common.pyx
index 1bba0d37a1..1e52ce3adc 100644
--- a/python/cuml/ensemble/randomforest_common.pyx
+++ b/python/cuml/ensemble/randomforest_common.pyx
@@ -41,14 +41,14 @@ from cuml.common.array_descriptor import CumlArrayDescriptor
 class BaseRandomForestModel(Base):
     _param_names = ['n_estimators', 'max_depth', 'handle',
                     'max_features', 'n_bins',
-                    'split_algo', 'split_criterion', 'min_samples_leaf',
+                    'split_criterion', 'min_samples_leaf',
                     'min_samples_split',
                     'min_impurity_decrease',
                     'bootstrap',
                     'verbose', 'max_samples',
                     'max_leaves',
-                    'accuracy_metric', 'use_experimental_backend',
-                    'max_batch_size', 'n_streams', 'dtype',
+                    'accuracy_metric', 'max_batch_size',
+                    'n_streams', 'dtype',
                     'output_type', 'min_weight_fraction_leaf', 'n_jobs',
                     'max_leaf_nodes', 'min_impurity_split', 'oob_score',
                     'random_state', 'warm_start', 'class_weight',
@@ -100,18 +100,10 @@ class BaseRandomForestModel(Base):
         if ((random_state is not None) and (n_streams != 1)):
             warnings.warn("For reproducible results in Random Forest"
                           " Classifier or for almost reproducible results"
-                          " in Random Forest Regressor, n_streams==1 is "
+                          " in Random Forest Regressor, n_streams=1 is "
                           "recommended. If n_streams is > 1, results may vary "
                           "due to stream/thread timing differences, even when "
                           "random_state is set")
-        if 'use_experimental_backend' in kwargs.keys():
-            warnings.warn("The 'use_experimental_backend' parameter is "
-                          "deprecated and has no effect. "
-                          "It will be removed in 21.10 release.")
-        if 'split_algo' in kwargs.keys():
-            warnings.warn("The 'split_algo' parameter is "
-                          "deprecated and has no effect. "
-                          "It will be removed in 21.10 release.")
         if handle is None:
             handle = Handle(n_streams)
 
diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index f68bee6088..15eef87614 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -115,39 +115,14 @@ class RandomForestClassifier(BaseRandomForestModel,
 
     .. note:: Note that the underlying algorithm for tree node splits differs
       from that used in scikit-learn. By default, the cuML Random Forest uses a
-      histogram-based algorithm to determine splits, rather than an exact
-      count. You can tune the size of the histograms with the n_bins parameter.
+      quantile-based algorithm to determine splits, rather than an exact
+      count. You can tune the size of the quantiles with the `n_bins` parameter.
 
     .. note:: You can export cuML Random Forest models and run predictions
       with them on machines without an NVIDIA GPUs. See
       https://docs.rapids.ai/api/cuml/nightly/pickling_cuml_models.html
       for more details.
 
-    **Known Limitations**: This is an early release of the cuML
-    Random Forest code. It contains a few known limitations:
-
-      * GPU-based inference is only supported if the model was trained
-        with 32-bit (float32) datatypes. CPU-based inference may be used
-        in this case as a slower fallback.
-      * Very deep / very wide models may exhaust available GPU memory.
-        Future versions of cuML will provide an alternative algorithm to
-        reduce memory consumption.
-      * While training the model for multi class classification problems,
-        using deep trees or `max_features=1.0` provides better performance.
-      * Prediction of classes is currently different from how scikit-learn
-        predicts:
-          * scikit-learn predicts random forest classifiers by obtaining class
-            probabilities from each component tree, then averaging these class
-            probabilities over all the ensemble members, and finally resolving
-            to the label with highest probability as prediction.
-          * cuml random forest classifier prediction differs in that, each
-            component tree generates labels instead of class probabilities;
-            with the most frequent label over all the trees (the statistical
-            mode) resolved as prediction.
-        The above differences might cause marginal variations in accuracy in
-        tradeoff to better performance.
-        See: https://github.com/rapidsai/cuml/issues/3764
-
     Examples
     --------
     .. code-block:: python
@@ -177,34 +152,32 @@ class RandomForestClassifier(BaseRandomForestModel,
     n_estimators : int (default = 100)
         Number of trees in the forest. (Default changed to 100 in cuML 0.11)
     split_criterion : The criterion used to split nodes.
-        0 for GINI, 1 for ENTROPY
+        0 for Gini impurity,
+        1 for Entropy (Information Gain).
         2 and 3 not valid for classification
         (default = 0)
-    split_algo : int (default = 1)
-        Deprecated and currrently has no effect.
-        .. deprecated:: 21.06
     bootstrap : boolean (default = True)
         Control bootstrapping.
-        If True, each tree in the forest is built
+        If `True`, eachtree in the forest is built
         on a bootstrapped sample with replacement.
-        If False, the whole dataset is used to build each tree.
+        If `False`, the whole dataset is used to build each tree.
     max_samples : float (default = 1.0)
         Ratio of dataset rows used while fitting each tree.
     max_depth : int (default = 16)
         Maximum tree depth. Unlimited (i.e, until leaves are pure),
-        if -1. Unlimited depth is not supported.
+        If `-1`. Unlimited depth is not supported.
         *Note that this default differs from scikit-learn's
         random forest, which defaults to unlimited depth.*
     max_leaves : int (default = -1)
         Maximum leaf nodes per tree. Soft constraint. Unlimited,
-        if -1.
+        If `-1`.
     max_features : int, float, or string (default = 'auto')
-        Ratio of number of features (columns) to consider per node split.
-        If int then max_features/n_features.
-        If float then max_features is used as a fraction.
-        If 'auto' then max_features=1/sqrt(n_features).
-        If 'sqrt' then max_features=1/sqrt(n_features).
-        If 'log2' then max_features=log2(n_features)/n_features.
+        Ratio of number of features (columns) to consider per node split.\n
+        If type `int` then `max_features` is the absolute count of features to be used\n
+        If type `float` then `max_features` is used as a fraction.\n
+        If `'auto'` then `max_features=1/sqrt(n_features)`.\n
+        If `'sqrt'` then `max_features=1/sqrt(n_features)`.\n
+        If `'log2'` then `max_features=log2(n_features)/n_features`.
     n_bins : int (default = 128)
         Number of bins used by the split algorithm.
         For large problems, particularly those with highly-skewed input data,
@@ -212,30 +185,25 @@ class RandomForestClassifier(BaseRandomForestModel,
     n_streams : int (default = 4)
         Number of parallel streams used for forest building.
     min_samples_leaf : int or float (default = 1)
-        The minimum number of samples (rows) in each leaf node.
-        If int, then min_samples_leaf represents the minimum number.
+        The minimum number of samples (rows) in each leaf node.\n
+        If type `int`, then `min_samples_leaf` represents the minimum number.\n
         If float, then min_samples_leaf represents a fraction and
-        ceil(min_samples_leaf * n_rows) is the minimum number of samples
+        `ceil(min_samples_leaf * n_rows)` is the minimum number of samples
         for each leaf node.
     min_samples_split : int or float (default = 2)
-        The minimum number of samples required to split an internal node.
-        If int, then min_samples_split represents the minimum number.
-        If float, then min_samples_split represents a fraction and
-        ceil(min_samples_split * n_rows) is the minimum number of samples
+        The minimum number of samples required to split an internal node.\n
+        If type `int`, then min_samples_split represents the minimum number.\n
+        If type `float`, then `min_samples_split` represents a fraction and
+        `ceil(min_samples_split * n_rows)` is the minimum number of samples
         for each split.
     min_impurity_decrease : float (default = 0.0)
         Minimum decrease in impurity requried for
         node to be spilt.
-    use_experimental_backend : boolean (default = True)
-        Deprecated and currrently has no effect.
-        .. deprecated:: 21.08
-    max_batch_size: int (default = 4096)
+    max_batch_size : int (default = 4096)
         Maximum number of nodes that can be processed in a given batch.
     random_state : int (default = None)
         Seed for the random number generator. Unseeded by default. Does not
-        currently fully guarantee the exact same results. **Note: Parameter
-        `seed` is removed since release 0.19.**
-
+        currently fully guarantee the exact same results.
     handle : cuml.Handle
         Specifies the cuml.handle that holds internal CUDA state for
         computations in this model. Most importantly, this specifies the CUDA
@@ -252,6 +220,33 @@ class RandomForestClassifier(BaseRandomForestModel,
         module level, `cuml.global_settings.output_type`.
         See :ref:`output-data-type-configuration` for more info.
 
+    Notes
+    -----
+    **Known Limitations**\n
+    This is an early release of the cuML
+    Random Forest code. It contains a few known limitations:
+
+      * GPU-based inference is only supported if the model was trained
+        with 32-bit (float32) datatypes. CPU-based inference may be used
+        in this case as a slower fallback.
+      * While training the model for multi class classification problems,
+        using deep trees or `max_features=1.0` provides better performance.
+      * Prediction of classes is currently different from how scikit-learn
+        predicts:
+          * scikit-learn predicts random forest classifiers by obtaining class
+            probabilities from each component tree, then averaging these class
+            probabilities over all the ensemble members, and finally resolving
+            to the label with highest probability as prediction.
+          * cuml random forest classifier prediction differs in that, each
+            component tree generates labels instead of class probabilities;
+            with the most frequent label over all the trees (the statistical
+            mode) resolved as prediction.
+        The above differences might cause marginal variations in accuracy in
+        tradeoff to better performance.
+        See: https://github.com/rapidsai/cuml/issues/3764
+
+    For additional docs, see `scikitlearn's RandomForestClassifier
+    <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html>`_.
     """
 
     def __init__(self, *, split_criterion=0, handle=None, verbose=False,
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index c96ff64eb6..870a1737eb 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -114,23 +114,14 @@ class RandomForestRegressor(BaseRandomForestModel,
 
     .. note:: Note that the underlying algorithm for tree node splits differs
       from that used in scikit-learn. By default, the cuML Random Forest uses a
-      histogram-based algorithm to determine splits, rather than an exact
-      count. You can tune the size of the histograms with the n_bins parameter.
+      quantile-based algorithm to determine splits, rather than an exact
+      count. You can tune the size of the quantiles with the `n_bins` parameter.
 
     .. note:: You can export cuML Random Forest models and run predictions
       with them on machines without an NVIDIA GPUs. See
       https://docs.rapids.ai/api/cuml/nightly/pickling_cuml_models.html
       for more details.
 
-    **Known Limitations**: This is an early release of the cuML
-    Random Forest code. It contains a few known limitations:
-
-      * GPU-based inference is only supported if the model was trained
-        with 32-bit (float32) datatypes. CPU-based inference may be used
-        in this case as a slower fallback.
-      * Very deep / very wide models may exhaust available GPU memory.
-        Future versions of cuML will provide an alternative algorithm to
-        reduce memory consumption.
 
     Examples
     --------
@@ -140,7 +131,6 @@ class RandomForestRegressor(BaseRandomForestModel,
         import numpy as np
         from cuml.test.utils import get_handle
         from cuml.ensemble import RandomForestRegressor as curfc
-        from cuml.test.utils import get_handle
         X = np.asarray([[0,10],[0,20],[0,30],[0,40]], dtype=np.float32)
         y = np.asarray([0.0,1.0,2.0,3.0], dtype=np.float32)
         cuml_model = curfc(max_features=1.0, n_bins=128,
@@ -161,45 +151,35 @@ class RandomForestRegressor(BaseRandomForestModel,
     -----------
     n_estimators : int (default = 100)
         Number of trees in the forest. (Default changed to 100 in cuML 0.11)
-    split_algo : int (default = 1)
-        The algorithm to determine how nodes are split in the tree.
-        Can be changed only for the old backend [deprecated].
-        0 for HIST and 1 for GLOBAL_QUANTILE. Default is GLOBAL_QUANTILE.
-        The default backend does not support HIST.
-        HIST currently uses a slower tree-building algorithm so
-        GLOBAL_QUANTILE is recommended for most cases.
-
-        .. deprecated:: 21.06
-           Parameter 'split_algo' is deprecated and will be removed in
-           subsequent release.
     split_criterion : int (default = 2)
         The criterion used to split nodes.
-        0 for GINI, 1 for ENTROPY,
-        2 for MSE
+        0 for Gini impurity,
+        1 for Entropy (Information Gain),
+        2 for MSE (Mean Squared Error).
         0 and 1 not valid for regression
     bootstrap : boolean (default = True)
         Control bootstrapping.
-        If True, each tree in the forest is built
+        If `True`, eachtree in the forest is built
         on a bootstrapped sample with replacement.
-        If False, the whole dataset is used to build each tree.
+        If `False`, the whole dataset is used to build each tree.
     max_samples : float (default = 1.0)
         Ratio of dataset rows used while fitting each tree.
     max_depth : int (default = 16)
         Maximum tree depth. Unlimited (i.e, until leaves are pure),
-        if -1.
+        If `-1`.
         *Note that this default differs from scikit-learn's
         random forest, which defaults to unlimited depth.*
     max_leaves : int (default = -1)
         Maximum leaf nodes per tree. Soft constraint. Unlimited,
-        if -1.
+        If `-1`.
     max_features : int, float, or string (default = 'auto')
         Ratio of number of features (columns) to consider
-        per node split.
-        If int then max_features/n_features.
-        If float then max_features is used as a fraction.
-        If 'auto' then max_features=1.0.
-        If 'sqrt' then max_features=1/sqrt(n_features).
-        If 'log2' then max_features=log2(n_features)/n_features.
+        per node split.\n
+        If type `int` then `max_features` is the absolute count of features to be used.\n
+        If type `float` then `max_features` is used as a fraction.\n
+        If `'auto'` then `max_features=1.0`.\n
+        If `'sqrt'` then `max_features=1/sqrt(n_features)`.\n
+        If `'log2'` then `max_features=log2(n_features)/n_features`.
     n_bins : int (default = 128)
         Number of bins used by the split algorithm.
         For large problems, particularly those with highly-skewed input data,
@@ -207,16 +187,16 @@ class RandomForestRegressor(BaseRandomForestModel,
     n_streams : int (default = 4 )
         Number of parallel streams used for forest building
     min_samples_leaf : int or float (default = 1)
-        The minimum number of samples (rows) in each leaf node.
-        If int, then min_samples_leaf represents the minimum number.
+        The minimum number of samples (rows) in each leaf node.\n
+        If type `int`, then `min_samples_leaf` represents the minimum number.\n
         If float, then min_samples_leaf represents a fraction and
-        ceil(min_samples_leaf * n_rows) is the minimum number of samples
+        `ceil(min_samples_leaf * n_rows)` is the minimum number of samples
         for each leaf node.
     min_samples_split : int or float (default = 2)
-        The minimum number of samples required to split an internal node.
-        If int, then min_samples_split represents the minimum number.
-        If float, then min_samples_split represents a fraction and
-        ceil(min_samples_split * n_rows) is the minimum number of samples
+        The minimum number of samples required to split an internal node.\n
+        If type `int`, then min_samples_split represents the minimum number.\n
+        If type `float`, then `min_samples_split` represents a fraction and
+        `ceil(min_samples_split * n_rows)` is the minimum number of samples
         for each split.
     min_impurity_decrease : float (default = 0.0)
         The minimum decrease in impurity required for node to be split
@@ -228,15 +208,11 @@ class RandomForestRegressor(BaseRandomForestModel,
         for median of abs error : 'median_ae'
         for mean of abs error : 'mean_ae'
         for mean square error' : 'mse'
-    use_experimental_backend : boolean (default = True)
-        Deprecated and currrently has no effect.
-        .. deprecated:: 21.08
-    max_batch_size: int (default = 4096)
+    max_batch_size : int (default = 4096)
         Maximum number of nodes that can be processed in a given batch.
     random_state : int (default = None)
         Seed for the random number generator. Unseeded by default. Does not
-        currently fully guarantee the exact same results. **Note: Parameter
-        `seed` is removed since release 0.19.**
+        currently fully guarantee the exact same results.
     handle : cuml.Handle
         Specifies the cuml.handle that holds internal CUDA state for
         computations in this model. Most importantly, this specifies the CUDA
@@ -253,6 +229,18 @@ class RandomForestRegressor(BaseRandomForestModel,
         module level, `cuml.global_settings.output_type`.
         See :ref:`output-data-type-configuration` for more info.
 
+    Notes
+    -----
+    **Known Limitations**\n
+    This is an early release of the cuML
+    Random Forest code. It contains a few known limitations:
+
+      * GPU-based inference is only supported if the model was trained
+        with 32-bit (float32) datatypes. CPU-based inference may be used
+        in this case as a slower fallback.
+
+    For additional docs, see `scikitlearn's RandomForestRegressor
+    <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html>`_.
     """
 
     def __init__(self, *,
diff --git a/python/cuml/test/test_metrics.py b/python/cuml/test/test_metrics.py
index 8d642b72fd..35ab87a977 100644
--- a/python/cuml/test/test_metrics.py
+++ b/python/cuml/test/test_metrics.py
@@ -186,7 +186,7 @@ def test_accuracy(nrows, ncols, n_info, datatype):
     # Initialize, fit and predict using cuML's
     # random forest classification model
     cuml_model = curfc(max_features=1.0,
-                       n_bins=8, split_algo=0, split_criterion=0,
+                       n_bins=8, split_criterion=0,
                        min_samples_leaf=2,
                        n_estimators=40, handle=handle, max_leaves=-1,
                        max_depth=16)

From 994c10eade4c4a495c06c47407dfcfa29488d2f3 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 2 Aug 2021 19:46:18 +0530
Subject: [PATCH 02/42] suggest alternatives for GPU inference

---
 python/cuml/ensemble/randomforestclassifier.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index 15eef87614..0ece73a5b8 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -226,9 +226,9 @@ class RandomForestClassifier(BaseRandomForestModel,
     This is an early release of the cuML
     Random Forest code. It contains a few known limitations:
 
-      * GPU-based inference is only supported if the model was trained
-        with 32-bit (float32) datatypes. CPU-based inference may be used
-        in this case as a slower fallback.
+      * GPU-based inference is only supported with 32-bit (float32) datatypes.
+        Alternatives are to use CPU-based inference for 64-bit (float64) datatypes,
+        or let the default automatic datatype conversion occur during GPU inference.
       * While training the model for multi class classification problems,
         using deep trees or `max_features=1.0` provides better performance.
       * Prediction of classes is currently different from how scikit-learn

From efb177350ca1ef459ec1f9baaade32c2bf5fc8b0 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 2 Aug 2021 19:54:10 +0530
Subject: [PATCH 03/42] update previous commit for regressor docs

---
 python/cuml/ensemble/randomforestregressor.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index 870a1737eb..0aeea6f222 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -235,9 +235,9 @@ class RandomForestRegressor(BaseRandomForestModel,
     This is an early release of the cuML
     Random Forest code. It contains a few known limitations:
 
-      * GPU-based inference is only supported if the model was trained
-        with 32-bit (float32) datatypes. CPU-based inference may be used
-        in this case as a slower fallback.
+      * GPU-based inference is only supported with 32-bit (float32) datatypes.
+        Alternatives are to use CPU-based inference for 64-bit (float64) datatypes,
+        or let the default automatic datatype conversion occur during GPU inference.
 
     For additional docs, see `scikitlearn's RandomForestRegressor
     <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html>`_.

From 7de63e5139240d14ac61d44fade4c6ab738f9cba Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 2 Aug 2021 19:55:25 +0530
Subject: [PATCH 04/42] copyright fix

---
 python/cuml/benchmark/ci_benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cuml/benchmark/ci_benchmark.py b/python/cuml/benchmark/ci_benchmark.py
index a4f0f908dc..666fb39a7c 100644
--- a/python/cuml/benchmark/ci_benchmark.py
+++ b/python/cuml/benchmark/ci_benchmark.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From d298d3060de9f60d4b46e27e84117cc0524af092 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Thu, 5 Aug 2021 14:05:01 +0530
Subject: [PATCH 05/42] flake8 fix

---
 python/cuml/ensemble/randomforestclassifier.pyx | 11 +++++++----
 python/cuml/ensemble/randomforestregressor.pyx  | 10 ++++++----
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index 0ece73a5b8..c2b0a69493 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -116,7 +116,8 @@ class RandomForestClassifier(BaseRandomForestModel,
     .. note:: Note that the underlying algorithm for tree node splits differs
       from that used in scikit-learn. By default, the cuML Random Forest uses a
       quantile-based algorithm to determine splits, rather than an exact
-      count. You can tune the size of the quantiles with the `n_bins` parameter.
+      count. You can tune the size of the quantiles with the `n_bins`
+      parameter.
 
     .. note:: You can export cuML Random Forest models and run predictions
       with them on machines without an NVIDIA GPUs. See
@@ -173,7 +174,8 @@ class RandomForestClassifier(BaseRandomForestModel,
         If `-1`.
     max_features : int, float, or string (default = 'auto')
         Ratio of number of features (columns) to consider per node split.\n
-        If type `int` then `max_features` is the absolute count of features to be used\n
+        If type `int` then `max_features` is the absolute count of features to
+        be used\n
         If type `float` then `max_features` is used as a fraction.\n
         If `'auto'` then `max_features=1/sqrt(n_features)`.\n
         If `'sqrt'` then `max_features=1/sqrt(n_features)`.\n
@@ -227,8 +229,9 @@ class RandomForestClassifier(BaseRandomForestModel,
     Random Forest code. It contains a few known limitations:
 
       * GPU-based inference is only supported with 32-bit (float32) datatypes.
-        Alternatives are to use CPU-based inference for 64-bit (float64) datatypes,
-        or let the default automatic datatype conversion occur during GPU inference.
+        Alternatives are to use CPU-based inference for 64-bit (float64)
+        datatypes, or let the default automatic datatype conversion occur
+        during GPU inference.
       * While training the model for multi class classification problems,
         using deep trees or `max_features=1.0` provides better performance.
       * Prediction of classes is currently different from how scikit-learn
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index 0aeea6f222..e8fcea56d8 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -115,7 +115,7 @@ class RandomForestRegressor(BaseRandomForestModel,
     .. note:: Note that the underlying algorithm for tree node splits differs
       from that used in scikit-learn. By default, the cuML Random Forest uses a
       quantile-based algorithm to determine splits, rather than an exact
-      count. You can tune the size of the quantiles with the `n_bins` parameter.
+      count. You can tune the size of the quantiles with the `n_bins` parameter
 
     .. note:: You can export cuML Random Forest models and run predictions
       with them on machines without an NVIDIA GPUs. See
@@ -175,7 +175,8 @@ class RandomForestRegressor(BaseRandomForestModel,
     max_features : int, float, or string (default = 'auto')
         Ratio of number of features (columns) to consider
         per node split.\n
-        If type `int` then `max_features` is the absolute count of features to be used.\n
+        If type `int` then `max_features` is the absolute count of features to
+        be used.\n
         If type `float` then `max_features` is used as a fraction.\n
         If `'auto'` then `max_features=1.0`.\n
         If `'sqrt'` then `max_features=1/sqrt(n_features)`.\n
@@ -236,8 +237,9 @@ class RandomForestRegressor(BaseRandomForestModel,
     Random Forest code. It contains a few known limitations:
 
       * GPU-based inference is only supported with 32-bit (float32) datatypes.
-        Alternatives are to use CPU-based inference for 64-bit (float64) datatypes,
-        or let the default automatic datatype conversion occur during GPU inference.
+        Alternatives are to use CPU-based inference for 64-bit (float64)
+        datatypes, or let the default automatic datatype conversion occur
+        during GPU inference.
 
     For additional docs, see `scikitlearn's RandomForestRegressor
     <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html>`_.

From c1bf494d79f7532141ce4a7c9ea3cf2a4d4289e5 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Thu, 5 Aug 2021 17:16:38 +0530
Subject: [PATCH 06/42] change default estimators in dask RF, consmetics
 changes

---
 .../dask/ensemble/randomforestclassifier.py   | 76 +++++++++--------
 .../dask/ensemble/randomforestregressor.py    | 83 ++++++++++---------
 .../cuml/ensemble/randomforestclassifier.pyx  | 63 +++++++-------
 .../cuml/ensemble/randomforestregressor.pyx   | 73 ++++++++--------
 4 files changed, 158 insertions(+), 137 deletions(-)

diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py
index 19bce0795e..765ff0e968 100755
--- a/python/cuml/dask/ensemble/randomforestclassifier.py
+++ b/python/cuml/dask/ensemble/randomforestclassifier.py
@@ -49,14 +49,14 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
     Future versions of the API will support more flexible data
     distribution and additional input types.
 
-    The distributed algorithm uses an embarrassingly-parallel
-    approach. For a forest with N trees being built on w workers, each
-    worker simply builds N/w trees on the data it has available
+    The distributed algorithm uses an *embarrassingly-parallel*
+    approach. For a forest with `N` trees being built on `w` workers, each
+    worker simply builds `N/w` trees on the data it has available
     locally. In many cases, partitioning the data so that each worker
     builds trees on a subset of the total dataset works well, but
     it generally requires the data to be well-shuffled in advance.
     Alternatively, callers can replicate all of the data across
-    workers so that rf.fit receives w partitions, each containing the
+    workers so that ``rf.fit`` receives `w` partitions, each containing the
     same data. This would produce results approximately identical to
     single-GPU fitting.
 
@@ -65,7 +65,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
 
     Parameters
     -----------
-    n_estimators : int (default = 10)
+    n_estimators : int (default = 100)
                    total number of trees in the forest (not per-worker)
     handle : cuml.Handle
         Specifies the cuml.handle that holds internal CUDA state for
@@ -74,39 +74,50 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
         run different models concurrently in different streams by creating
         handles in several streams.
         If it is None, a new one is created.
-    split_criterion : The criterion used to split nodes.
-        0 for Gini impurity, 1 for Entropy (Information Gain),
-        3 for CRITERION_END.
+    split_criterion : int (default = 0)
+        The criterion used to split nodes.\n
+         * 0 for Gini impurity
+         * 1 for Entropy (Information Gain),
+         * 3 for CRITERION_END.
         2 and 3 not valid for classification
-        (default = 0)
     bootstrap : boolean (default = True)
-        Control bootstrapping.
-        If set, each tree in the forest is built
-        on a bootstrapped sample with replacement.
-        If `False`, the whole dataset is used to build each tree.
+        Control bootstrapping.\n
+         * If ``True``, each tree in the forest is built on a bootstrapped
+           sample with replacement.
+         * If ``False``, the whole dataset is used to build each tree.
     max_samples : float (default = 1.0)
         Ratio of dataset rows used while fitting each tree.
     max_depth : int (default = -1)
-        Maximum tree depth. Unlimited (i.e, until leaves are pure), If `-1`.
+        Maximum tree depth. Unlimited (i.e, until leaves are pure), If ``-1``.
     max_leaves : int (default = -1)
-        Maximum leaf nodes per tree. Soft constraint. Unlimited, If `-1`.
+        Maximum leaf nodes per tree. Soft constraint. Unlimited, If ``-1``.
     max_features : float (default = 'auto')
         Ratio of number of features (columns) to consider
-        per node split.
-    n_bins : int (default = 8)
+        per node split.\n
+         * If type ``int`` then ``max_features`` is the absolute count of
+           features to be used.
+         * If type ``float`` then ``max_features`` is a fraction.
+         * If ``'auto'`` then ``max_features=n_features = 1.0``.
+         * If ``'sqrt'`` then ``max_features=1/sqrt(n_features)``.
+         * If ``'log2'`` then ``max_features=log2(n_features)/n_features``.
+         * If ``None``, then ``max_features = 1.0``.
+    n_bins : int (default = 128)
         Number of bins used by the split algorithm.
     min_samples_leaf : int or float (default = 1)
-        The minimum number of samples (rows) in each leaf node.
-        If type `int`, then `min_samples_leaf` represents the minimum number.
-        If float, then min_samples_leaf represents a fraction and
-        `ceil(min_samples_leaf * n_rows)` is the minimum number of samples
-        for each leaf node.
+        The minimum number of samples (rows) in each leaf node.\n
+         * If type ``int``, then ``min_samples_leaf`` represents the minimum
+           number.
+         * If ``float``, then ``min_samples_leaf`` represents a fraction
+           and ``ceil(min_samples_leaf * n_rows)`` is the minimum number of
+           samples for each leaf node.
     min_samples_split : int or float (default = 2)
-        The minimum number of samples required to split an internal node.
-        If type `int`, then min_samples_split represents the minimum number.
-        If type `float`, then `min_samples_split` represents a fraction and
-        ceil(min_samples_split * n_rows) is the minimum number of samples
-        for each split.
+        The minimum number of samples required to split an internal
+        node.\n
+         * If type ``int``, then ``min_samples_split`` represents the minimum
+           number.
+         * If type ``float``, then ``min_samples_split`` represents a fraction
+           and ``ceil(min_samples_split * n_rows)`` is the minimum number of
+           samples for each split.
     n_streams : int (default = 4 )
         Number of parallel streams used for forest building
     workers : optional, list of strings
@@ -135,7 +146,7 @@ def __init__(
         workers=None,
         client=None,
         verbose=False,
-        n_estimators=10,
+        n_estimators=100,
         random_state=None,
         ignore_empty_partitions=False,
         **kwargs
@@ -326,7 +337,7 @@ def predict(self, X, algo='auto', threshold=0.5,
             for inference.
 
         Returns
-        ----------
+        -------
         y : Dask cuDF dataframe or CuPy backed Dask Array (n_rows, 1)
 
         """
@@ -400,8 +411,9 @@ def predict_model_on_cpu(self, X, convert_dtype=True):
             When set to True, the predict method will, when necessary, convert
             the input to the data type which was used to train the model. This
             will increase memory used for the method.
+
         Returns
-        ----------
+        -------
         y : Dask cuDF dataframe or CuPy backed Dask Array (n_rows, 1)
         """
         c = default_client()
@@ -497,9 +509,7 @@ def predict_proba(self, X,
 
         Returns
         -------
-        y : NumPy
-           Dask cuDF dataframe or CuPy backed Dask Array (n_rows, n_classes)
-
+        y : Dask cuDF dataframe or CuPy backed Dask Array (n_rows, 1)
         """
         if self._get_internal_model() is None:
             self._set_internal_model(self._concat_treelite_models())
diff --git a/python/cuml/dask/ensemble/randomforestregressor.py b/python/cuml/dask/ensemble/randomforestregressor.py
index e1a1d43676..2e13ffa2dd 100755
--- a/python/cuml/dask/ensemble/randomforestregressor.py
+++ b/python/cuml/dask/ensemble/randomforestregressor.py
@@ -43,14 +43,14 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
     distribution and additional input types. User-facing APIs are
     expected to change in upcoming versions.
 
-    The distributed algorithm uses an embarrassingly-parallel
-    approach. For a forest with N trees being built on w workers, each
-    worker simply builds N/w trees on the data it has available
+    The distributed algorithm uses an *embarrassingly-parallel*
+    approach. For a forest with `N` trees being built on `w` workers, each
+    worker simply builds `N/w` trees on the data it has available
     locally. In many cases, partitioning the data so that each worker
     builds trees on a subset of the total dataset works well, but
     it generally requires the data to be well-shuffled in advance.
     Alternatively, callers can replicate all of the data across
-    workers so that rf.fit receives w partitions, each containing the
+    workers so that ``rf.fit`` receives `w` partitions, each containing the
     same data. This would produce results approximately identical to
     single-GPU fitting.
 
@@ -59,7 +59,7 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
 
     Parameters
     -----------
-    n_estimators : int (default = 10)
+    n_estimators : int (default = 100)
         total number of trees in the forest (not per-worker)
     handle : cuml.Handle
         Specifies the cuml.handle that holds internal CUDA state for
@@ -69,52 +69,57 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
         handles in several streams.
         If it is None, a new one is created.
     split_criterion : int (default = 2)
-        The criterion used to split nodes.
-        0 for Gini impurity, 1 for Entropy (Information Gain),
-        2 for MSE (Mean Squared Error), and 3 for CRITERION_END.
+        The criterion used to split nodes.\n
+         * 0 for Gini impurity
+         * 1 for Entropy (Information Gain),
+         * 2 for MSE (Mean Squared Error)
+         * 3 for CRITERION_END.
         0 and 1 not valid for regression
     bootstrap : boolean (default = True)
-        Control bootstrapping.
-        If set, each tree in the forest is built
-        on a bootstrapped sample with replacement.
-        If `False`, the whole dataset is used to build each tree.
+        Control bootstrapping.\n
+         * If ``True``, each tree in the forest is built on a bootstrapped
+           sample with replacement.
+         * If ``False``, the whole dataset is used to build each tree.
     max_samples : float (default = 1.0)
         Ratio of dataset rows used while fitting each tree.
     max_depth : int (default = -1)
-        Maximum tree depth. Unlimited (i.e, until leaves are pure), If `-1`.
+        Maximum tree depth. Unlimited (i.e, until leaves are pure), If ``-1``.
     max_leaves : int (default = -1)
-        Maximum leaf nodes per tree. Soft constraint. Unlimited, If `-1`.
-    max_features : int or float or string or None (default = 'auto')
+        Maximum leaf nodes per tree. Soft constraint. Unlimited, If ``-1``.
+    max_features : float (default = 'auto')
         Ratio of number of features (columns) to consider
-        per node split.
-        If int then max_features/n_features.
-        If float then max_features is a fraction.
-        If 'auto' then max_features=n_features which is 1.0.
-        If 'sqrt' then max_features=1/sqrt(n_features).
-        If 'log2' then max_features=log2(n_features)/n_features.
-        If None, then max_features=n_features which is 1.0.
-    n_bins : int (default = 8)
+        per node split.\n
+         * If type ``int`` then ``max_features`` is the absolute count of
+           features to be used.
+         * If type ``float`` then ``max_features`` is a fraction.
+         * If ``'auto'`` then ``max_features=n_features = 1.0``.
+         * If ``'sqrt'`` then ``max_features=1/sqrt(n_features)``.
+         * If ``'log2'`` then ``max_features=log2(n_features)/n_features``.
+         * If ``None``, then ``max_features = 1.0``.
+    n_bins : int (default = 128)
         Number of bins used by the split algorithm.
     min_samples_leaf : int or float (default = 1)
-        The minimum number of samples (rows) in each leaf node.
-        If type `int`, then `min_samples_leaf` represents the minimum number.
-        If `float`, then `min_samples_leaf` represents a fraction and
-        `ceil(min_samples_leaf * n_rows)` is the minimum number of samples
-        for each leaf node.
+        The minimum number of samples (rows) in each leaf node.\n
+         * If type ``int``, then ``min_samples_leaf`` represents the minimum
+           number.
+         * If ``float``, then ``min_samples_leaf`` represents a fraction and
+           ``ceil(min_samples_leaf * n_rows)`` is the minimum number of
+           samples for each leaf node.
     min_samples_split : int or float (default = 2)
-        The minimum number of samples required to split an internal node.
-        If type `int`, then `min_samples_split` represents the minimum number.
-        If type `float`, then `min_samples_split` represents a fraction and
-        `ceil(min_samples_split * n_rows)` is the minimum number of samples
-        for each split.
+        The minimum number of samples required to split an internal node.\n
+         * If type ``int``, then ``min_samples_split`` represents the minimum
+           number.
+         * If type ``float``, then ``min_samples_split`` represents a fraction
+           and ``ceil(min_samples_split * n_rows)`` is the minimum number of
+           samples for each split.
     accuracy_metric : string (default = 'r2')
         Decides the metric used to evaluate the performance of the model.
         In the 0.16 release, the default scoring metric was changed
-        from mean squared error to r-squared.
-        for r-squared : 'r2'
-        for median of abs error : 'median_ae'
-        for mean of abs error : 'mean_ae'
-        for mean square error' : 'mse'
+        from mean squared error to r-squared.\n
+         * for r-squared : ``'r2'``
+         * for median of abs error : ``'median_ae'``
+         * for mean of abs error : ``'mean_ae'``
+         * for mean square error' : ``'mse'``
     n_streams : int (default = 4 )
         Number of parallel streams used for forest building
     workers : optional, list of strings
@@ -139,7 +144,7 @@ def __init__(
         workers=None,
         client=None,
         verbose=False,
-        n_estimators=10,
+        n_estimators=100,
         random_state=None,
         ignore_empty_partitions=False,
         **kwargs
diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index c2b0a69493..e6b8af0a08 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -152,34 +152,35 @@ class RandomForestClassifier(BaseRandomForestModel,
     -----------
     n_estimators : int (default = 100)
         Number of trees in the forest. (Default changed to 100 in cuML 0.11)
-    split_criterion : The criterion used to split nodes.
-        0 for Gini impurity,
-        1 for Entropy (Information Gain).
+    split_criterion : int (default = 0)
+        The criterion used to split nodes.\n
+         * 0 for Gini impurity,
+         * 1 for Entropy (Information Gain).
         2 and 3 not valid for classification
-        (default = 0)
     bootstrap : boolean (default = True)
-        Control bootstrapping.
-        If `True`, eachtree in the forest is built
-        on a bootstrapped sample with replacement.
-        If `False`, the whole dataset is used to build each tree.
+        Control bootstrapping.\n
+            * If ``True``, eachtree in the forest is built on a bootstrapped
+              sample with replacement.
+            * If ``False``, the whole dataset is used to build each tree.
     max_samples : float (default = 1.0)
         Ratio of dataset rows used while fitting each tree.
     max_depth : int (default = 16)
         Maximum tree depth. Unlimited (i.e, until leaves are pure),
-        If `-1`. Unlimited depth is not supported.
-        *Note that this default differs from scikit-learn's
-        random forest, which defaults to unlimited depth.*
+        If ``-1``. Unlimited depth is not supported.\n
+        .. note:: This default differs from scikit-learn's
+          random forest, which defaults to unlimited depth.
     max_leaves : int (default = -1)
         Maximum leaf nodes per tree. Soft constraint. Unlimited,
-        If `-1`.
+        If ``-1``.
     max_features : int, float, or string (default = 'auto')
-        Ratio of number of features (columns) to consider per node split.\n
-        If type `int` then `max_features` is the absolute count of features to
-        be used\n
-        If type `float` then `max_features` is used as a fraction.\n
-        If `'auto'` then `max_features=1/sqrt(n_features)`.\n
-        If `'sqrt'` then `max_features=1/sqrt(n_features)`.\n
-        If `'log2'` then `max_features=log2(n_features)/n_features`.
+        Ratio of number of features (columns) to consider per node
+        split.\n
+         * If type ``int`` then ``max_features`` is the absolute count of
+           features to be used
+         * If type ``float`` then ``max_features`` is used as a fraction.
+         * If ``'auto'`` then ``max_features=1/sqrt(n_features)``.
+         * If ``'sqrt'`` then ``max_features=1/sqrt(n_features)``.
+         * If ``'log2'`` then ``max_features=log2(n_features)/n_features``.
     n_bins : int (default = 128)
         Number of bins used by the split algorithm.
         For large problems, particularly those with highly-skewed input data,
@@ -188,16 +189,18 @@ class RandomForestClassifier(BaseRandomForestModel,
         Number of parallel streams used for forest building.
     min_samples_leaf : int or float (default = 1)
         The minimum number of samples (rows) in each leaf node.\n
-        If type `int`, then `min_samples_leaf` represents the minimum number.\n
-        If float, then min_samples_leaf represents a fraction and
-        `ceil(min_samples_leaf * n_rows)` is the minimum number of samples
-        for each leaf node.
+         * If type ``int``, then ``min_samples_leaf`` represents the minimum
+           number.
+         * If ``float``, then ``min_samples_leaf`` represents a fraction and
+           ``ceil(min_samples_leaf * n_rows)`` is the minimum number of
+           samples for each leaf node.
     min_samples_split : int or float (default = 2)
         The minimum number of samples required to split an internal node.\n
-        If type `int`, then min_samples_split represents the minimum number.\n
-        If type `float`, then `min_samples_split` represents a fraction and
-        `ceil(min_samples_split * n_rows)` is the minimum number of samples
-        for each split.
+         * If type ``int``, then min_samples_split represents the minimum
+           number.
+         * If type ``float``, then ``min_samples_split`` represents a fraction
+           and ``ceil(min_samples_split * n_rows)`` is the minimum number of
+           samples for each split.
     min_impurity_decrease : float (default = 0.0)
         Minimum decrease in impurity requried for
         node to be spilt.
@@ -214,12 +217,12 @@ class RandomForestClassifier(BaseRandomForestModel,
         handles in several streams.
         If it is None, a new one is created.
     verbose : int or boolean, default=False
-        Sets logging level. It must be one of `cuml.common.logger.level_*`.
+        Sets logging level. It must be one of ``cuml.common.logger.level_*``.
         See :ref:`verbosity-levels` for more info.
-    output_type : {'input', 'cudf', 'cupy', 'numpy', 'numba'}, default=None
+    output_type : ``{'input', 'cudf', 'cupy', 'numpy','numba'}`` (default=None)
         Variable to control output type of the results and attributes of
         the estimator. If None, it'll inherit the output type set at the
-        module level, `cuml.global_settings.output_type`.
+        module level, ``cuml.global_settings.output_type``.
         See :ref:`output-data-type-configuration` for more info.
 
     Notes
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index e8fcea56d8..2836673b55 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -152,35 +152,35 @@ class RandomForestRegressor(BaseRandomForestModel,
     n_estimators : int (default = 100)
         Number of trees in the forest. (Default changed to 100 in cuML 0.11)
     split_criterion : int (default = 2)
-        The criterion used to split nodes.
-        0 for Gini impurity,
-        1 for Entropy (Information Gain),
-        2 for MSE (Mean Squared Error).
+        The criterion used to split nodes.\n
+            * 0 for Gini impurity,
+            * 1 for Entropy (Information Gain),
+            * 2 for MSE (Mean Squared Error).
         0 and 1 not valid for regression
     bootstrap : boolean (default = True)
-        Control bootstrapping.
-        If `True`, eachtree in the forest is built
-        on a bootstrapped sample with replacement.
-        If `False`, the whole dataset is used to build each tree.
+        Control bootstrapping.\n
+            * If ``True``, eachtree in the forest is built
+              on a bootstrapped sample with replacement.
+            * If ``False``, the whole dataset is used to build each tree.
     max_samples : float (default = 1.0)
         Ratio of dataset rows used while fitting each tree.
     max_depth : int (default = 16)
         Maximum tree depth. Unlimited (i.e, until leaves are pure),
-        If `-1`.
-        *Note that this default differs from scikit-learn's
-        random forest, which defaults to unlimited depth.*
+        If ``-1``.\n
+        .. note:: This default differs from scikit-learn's
+          random forest, which defaults to unlimited depth.
     max_leaves : int (default = -1)
         Maximum leaf nodes per tree. Soft constraint. Unlimited,
-        If `-1`.
+        If ``-1``.
     max_features : int, float, or string (default = 'auto')
         Ratio of number of features (columns) to consider
         per node split.\n
-        If type `int` then `max_features` is the absolute count of features to
-        be used.\n
-        If type `float` then `max_features` is used as a fraction.\n
-        If `'auto'` then `max_features=1.0`.\n
-        If `'sqrt'` then `max_features=1/sqrt(n_features)`.\n
-        If `'log2'` then `max_features=log2(n_features)/n_features`.
+         * If type ``int`` then ``max_features`` is the absolute count of
+           features to be used.
+         * If type ``float`` then ``max_features`` is used as a fraction.
+         * If ``'auto'`` then ``max_features=1.0``.
+         * If ``'sqrt'`` then ``max_features=1/sqrt(n_features)``.
+         * If ``'log2'`` then ``max_features=log2(n_features)/n_features``.
     n_bins : int (default = 128)
         Number of bins used by the split algorithm.
         For large problems, particularly those with highly-skewed input data,
@@ -189,26 +189,29 @@ class RandomForestRegressor(BaseRandomForestModel,
         Number of parallel streams used for forest building
     min_samples_leaf : int or float (default = 1)
         The minimum number of samples (rows) in each leaf node.\n
-        If type `int`, then `min_samples_leaf` represents the minimum number.\n
-        If float, then min_samples_leaf represents a fraction and
-        `ceil(min_samples_leaf * n_rows)` is the minimum number of samples
-        for each leaf node.
+         * If type ``int``, then ``min_samples_leaf`` represents the minimum
+           number.\n
+         * If ``float``, then ``min_samples_leaf`` represents a fraction and
+           ``ceil(min_samples_leaf * n_rows)`` is the minimum number of
+           samples for each leaf node.
     min_samples_split : int or float (default = 2)
-        The minimum number of samples required to split an internal node.\n
-        If type `int`, then min_samples_split represents the minimum number.\n
-        If type `float`, then `min_samples_split` represents a fraction and
-        `ceil(min_samples_split * n_rows)` is the minimum number of samples
-        for each split.
+        The minimum number of samples required to split an internal
+        node.\n
+         * If type ``int``, then min_samples_split represents the minimum
+           number.
+         * If type ``float``, then ``min_samples_split`` represents a fraction
+           and ``ceil(min_samples_split * n_rows)`` is the minimum number of
+           samples for each split.
     min_impurity_decrease : float (default = 0.0)
         The minimum decrease in impurity required for node to be split
     accuracy_metric : string (default = 'r2')
         Decides the metric used to evaluate the performance of the model.
         In the 0.16 release, the default scoring metric was changed
-        from mean squared error to r-squared.
-        for r-squared : 'r2'
-        for median of abs error : 'median_ae'
-        for mean of abs error : 'mean_ae'
-        for mean square error' : 'mse'
+        from mean squared error to r-squared.\n
+         * for r-squared : ``'r2'``
+         * for median of abs error : ``'median_ae'``
+         * for mean of abs error : ``'mean_ae'``
+         * for mean square error' : ``'mse'``
     max_batch_size : int (default = 4096)
         Maximum number of nodes that can be processed in a given batch.
     random_state : int (default = None)
@@ -222,12 +225,12 @@ class RandomForestRegressor(BaseRandomForestModel,
         handles in several streams.
         If it is None, a new one is created.
     verbose : int or boolean, default=False
-        Sets logging level. It must be one of `cuml.common.logger.level_*`.
+        Sets logging level. It must be one of ``cuml.common.logger.level_*``.
         See :ref:`verbosity-levels` for more info.
-    output_type : {'input', 'cudf', 'cupy', 'numpy', 'numba'}, default=None
+    output_type : ``{'input', 'cudf', 'cupy', 'numpy', 'numba'}`` (default=None)
         Variable to control output type of the results and attributes of
         the estimator. If None, it'll inherit the output type set at the
-        module level, `cuml.global_settings.output_type`.
+        module level, ``cuml.global_settings.output_type``.
         See :ref:`output-data-type-configuration` for more info.
 
     Notes

From ceee023f01988aa95e4ec365fc4c03da1b8f2685 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Wed, 11 Aug 2021 21:31:21 +0530
Subject: [PATCH 07/42] add poisson deviance loss

---
 cpp/include/cuml/tree/algo_helper.h           |   1 +
 .../batched-levelalgo/builder.cuh             |  18 +++
 .../batched-levelalgo/metrics.cuh             | 140 +++++++++++++-----
 cpp/src/decisiontree/decisiontree.cuh         |   4 +-
 cpp/test/sg/rf_test.cu                        |   4 +-
 .../dask/ensemble/randomforestregressor.py    |   2 +-
 python/cuml/ensemble/randomforest_common.pyx  |   2 +-
 python/cuml/ensemble/randomforest_shared.pxd  |   1 +
 8 files changed, 133 insertions(+), 39 deletions(-)

diff --git a/cpp/include/cuml/tree/algo_helper.h b/cpp/include/cuml/tree/algo_helper.h
index 28b4ac0e5d..ae7aa9b9d1 100644
--- a/cpp/include/cuml/tree/algo_helper.h
+++ b/cpp/include/cuml/tree/algo_helper.h
@@ -22,6 +22,7 @@ enum CRITERION {
   ENTROPY,
   MSE,
   MAE,
+  POISSON,
   CRITERION_END,
 };
 
diff --git a/cpp/src/decisiontree/batched-levelalgo/builder.cuh b/cpp/src/decisiontree/batched-levelalgo/builder.cuh
index a46ee558f2..70af73bdfb 100644
--- a/cpp/src/decisiontree/batched-levelalgo/builder.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/builder.cuh
@@ -188,6 +188,24 @@ void grow_tree(std::shared_ptr<raft::mr::device::allocator> d_allocator,
                                                              sparsetree,
                                                              num_leaves,
                                                              depth);
+  } else if (params.split_criterion == CRITERION::POISSON) {
+    grow_tree<PoissonObjectiveFunction<DataT, LabelT, IdxT>>(d_allocator,
+                                                             h_allocator,
+                                                             data,
+                                                             treeid,
+                                                             seed,
+                                                             ncols,
+                                                             nrows,
+                                                             labels,
+                                                             quantiles,
+                                                             rowids,
+                                                             n_sampled_rows,
+                                                             unique_labels,
+                                                             params,
+                                                             stream,
+                                                             sparsetree,
+                                                             num_leaves,
+                                                             depth);
   } else if (params.split_criterion == CRITERION::MSE) {
     grow_tree<MSEObjectiveFunction<DataT, LabelT, IdxT>>(d_allocator,
                                                          h_allocator,
diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index e85553b3ae..1c452813ba 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -27,21 +27,46 @@
 namespace ML {
 namespace DT {
 
-struct IntBin {
+struct CountBin {
   int x;
 
-  DI static void IncrementHistogram(IntBin* hist, int nbins, int b, int label)
+  DI static void IncrementHistogram(CountBin* hist, int nbins, int b, int label)
   {
     auto offset = label * nbins + b;
-    IntBin::AtomicAdd(hist + offset, {1});
+    CountBin::AtomicAdd(hist + offset, {1});
   }
-  DI static void AtomicAdd(IntBin* address, IntBin val) { atomicAdd(&address->x, val.x); }
-  DI IntBin& operator+=(const IntBin& b)
+  DI static void AtomicAdd(CountBin* address, CountBin val) { atomicAdd(&address->x, val.x); }
+  DI CountBin& operator+=(const CountBin& b)
   {
     x += b.x;
     return *this;
   }
-  DI IntBin operator+(IntBin b) const
+  DI CountBin operator+(CountBin b) const
+  {
+    b += *this;
+    return b;
+  }
+};
+struct AggregateBin {
+  double label_sum;
+  int count;
+
+  DI static void IncrementHistogram(AggregateBin* hist, int nbins, int b, double label)
+  {
+    AggregateBin::AtomicAdd(hist + b, {label, 1});
+  }
+  DI static void AtomicAdd(AggregateBin* address, AggregateBin val)
+  {
+    atomicAdd(&address->label_sum, val.label_sum);
+    atomicAdd(&address->count, val.count);
+  }
+  DI AggregateBin& operator+=(const AggregateBin& b)
+  {
+    label_sum += b.label_sum;
+    count += b.count;
+    return *this;
+  }
+  DI AggregateBin operator+(AggregateBin b) const
   {
     b += *this;
     return b;
@@ -59,7 +84,7 @@ class GiniObjectiveFunction {
   IdxT min_samples_leaf;
 
  public:
-  using BinT = IntBin;
+  using BinT = CountBin;
   GiniObjectiveFunction(IdxT nclasses, DataT min_impurity_decrease, IdxT min_samples_leaf)
     : nclasses(nclasses),
       min_impurity_decrease(min_impurity_decrease),
@@ -135,7 +160,7 @@ class EntropyObjectiveFunction {
   IdxT min_samples_leaf;
 
  public:
-  using BinT = IntBin;
+  using BinT = CountBin;
   EntropyObjectiveFunction(IdxT nclasses, DataT min_impurity_decrease, IdxT min_samples_leaf)
     : nclasses(nclasses),
       min_impurity_decrease(min_impurity_decrease),
@@ -198,7 +223,7 @@ class EntropyObjectiveFunction {
 };
 
 template <typename DataT_, typename LabelT_, typename IdxT_>
-class MSEObjectiveFunction {
+class PoissonObjectiveFunction {
  public:
   using DataT  = DataT_;
   using LabelT = LabelT_;
@@ -209,32 +234,81 @@ class MSEObjectiveFunction {
   IdxT min_samples_leaf;
 
  public:
-  struct MSEBin {
-    double label_sum;
-    int count;
+  using BinT = AggregateBin;
 
-    DI static void IncrementHistogram(MSEBin* hist, int nbins, int b, double label)
-    {
-      MSEBin::AtomicAdd(hist + b, {label, 1});
-    }
-    DI static void AtomicAdd(MSEBin* address, MSEBin val)
-    {
-      atomicAdd(&address->label_sum, val.label_sum);
-      atomicAdd(&address->count, val.count);
-    }
-    DI MSEBin& operator+=(const MSEBin& b)
-    {
-      label_sum += b.label_sum;
-      count += b.count;
-      return *this;
-    }
-    DI MSEBin operator+(MSEBin b) const
-    {
-      b += *this;
-      return b;
+  HDI PoissonObjectiveFunction(IdxT nclasses, DataT min_impurity_decrease, IdxT min_samples_leaf)
+    : min_impurity_decrease(min_impurity_decrease), min_samples_leaf(min_samples_leaf)
+  {
+  }
+  DI IdxT NumClasses() const { return 1; }
+
+  /**
+   * @brief compute the poisson impurity reduction (or purity gain)
+   *
+   * @note This method is used to speed up the search for the best split.
+           It is a proxy quantity such that the split that maximizes this value
+           also maximizes the impurity improvement. It neglects all constant terms
+           of the impurity decrease for a given split.
+
+           Refer scikit learn's docs for original half poisson deviance impurity criterion:
+           https://scikit-learn.org/stable/modules/tree.html#regression-criteria
+
+          Poisson proxy used here is:
+            - 1/n * sum(y_i * log(y_pred)) = -mean(y_i) * log(mean(y_i))
+    */
+  DI Split<DataT, IdxT> Gain(BinT* shist, DataT* sbins, IdxT col, IdxT len, IdxT nbins)
+  {
+    Split<DataT, IdxT> sp;
+    auto invlen = DataT(1.0) / len;
+    for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
+      auto nLeft  = shist[i].count;
+      auto nRight = len - nLeft;
+      DataT gain;
+      // if there aren't enough samples in this split, don't bother!
+      if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
+        gain = -std::numeric_limits<DataT>::max();
+      } else {
+        auto label_mean         = shist[nbins - 1].label_sum / len;
+        auto left_label_mean   = -(shist[i].label_sum) / nLeft;
+        auto right_label_mean  = -(shist[nbins - 1].label_sum - shist[i].label_sum) / nRight;
+        // poisson loss does not allow non-positive predictions
+        if(label_mean <= std::numeric_limits<DataT>::epsilon() || left_label_mean <= std::numeric_limits<DataT>::epsilon() || right_label_mean <= std::numeric_limits<DataT>::epsilon()) {
+          // used to prevent errors due to floating point roundings
+          gain = -std::numeric_limits<DataT>::max();
+        }
+        else {
+          // below objective functions are 'proxy' for the actual half
+          DataT parent_obj = -label_mean * raft::myLog(label_mean);
+          DataT left_obj   = -left_label_mean * raft::myLog(left_label_mean);
+          DataT right_obj  = -right_label_mean * raft::myLog(right_label_mean);
+          gain             = parent_obj - (left_obj + right_obj);
+        }
+      }
+      // if the gain is not "enough", don't bother!
+      if (gain <= min_impurity_decrease) { gain = -std::numeric_limits<DataT>::max(); }
+      sp.update({sbins[i], col, gain, nLeft});
     }
-  };
-  using BinT = MSEBin;
+    return sp;
+  }
+
+  static DI LabelT LeafPrediction(BinT* shist, int nclasses)
+  {
+    return shist[0].label_sum / shist[0].count;
+  }
+};
+template <typename DataT_, typename LabelT_, typename IdxT_>
+class MSEObjectiveFunction {
+ public:
+  using DataT  = DataT_;
+  using LabelT = LabelT_;
+  using IdxT   = IdxT_;
+
+ private:
+  DataT min_impurity_decrease;
+  IdxT min_samples_leaf;
+
+ public:
+  using BinT = AggregateBin;
   HDI MSEObjectiveFunction(IdxT nclasses, DataT min_impurity_decrease, IdxT min_samples_leaf)
     : min_impurity_decrease(min_impurity_decrease), min_samples_leaf(min_samples_leaf)
   {
diff --git a/cpp/src/decisiontree/decisiontree.cuh b/cpp/src/decisiontree/decisiontree.cuh
index cf54531ae1..ae9b80bb6a 100644
--- a/cpp/src/decisiontree/decisiontree.cuh
+++ b/cpp/src/decisiontree/decisiontree.cuh
@@ -288,11 +288,11 @@ class DecisionTree {
   {
     this->tree_params = tree_parameters;
     this->prepare_fit_timer.reset();
-    const char* CRITERION_NAME[] = {"GINI", "ENTROPY", "MSE", "MAE", "END"};
+    const char* CRITERION_NAME[] = {"GINI", "ENTROPY", "MSE", "MAE", "POISSON", "END"};
     CRITERION default_criterion =
       (std::numeric_limits<L>::is_integer) ? CRITERION::GINI : CRITERION::MSE;
     CRITERION last_criterion =
-      (std::numeric_limits<L>::is_integer) ? CRITERION::ENTROPY : CRITERION::MSE;
+      (std::numeric_limits<L>::is_integer) ? CRITERION::ENTROPY : CRITERION::POISSON;
 
     validity_check(tree_params);
 
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index e055ed61d6..7dfe89878b 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -327,7 +327,7 @@ class RfTest : public ::testing::TestWithParam<RfTestParams> {
   void SetUp() override
   {
     RfTestParams params = ::testing::TestWithParam<RfTestParams>::GetParam();
-    bool is_regression  = params.split_criterion == MSE || params.split_criterion == MAE;
+    bool is_regression  = params.split_criterion == MSE || params.split_criterion == MAE || params.split_criterion == POISSON;
     if (params.double_precision) {
       if (is_regression) {
         RfSpecialisedTest<double, double> test(params);
@@ -361,7 +361,7 @@ std::vector<int> min_samples_leaf        = {1, 10, 30};
 std::vector<int> min_samples_split       = {2, 10};
 std::vector<float> min_impurity_decrease = {0.0, 1.0f, 10.0f};
 std::vector<int> n_streams               = {1, 2, 10};
-std::vector<CRITERION> split_criterion   = {CRITERION::MSE, CRITERION::GINI, CRITERION::ENTROPY};
+std::vector<CRITERION> split_criterion   = {CRITERION::POISSON, CRITERION::MSE, CRITERION::GINI, CRITERION::ENTROPY};
 std::vector<int> seed                    = {0, 17};
 std::vector<int> n_labels                = {2, 10, 30};
 std::vector<bool> double_precision       = {false, true};
diff --git a/python/cuml/dask/ensemble/randomforestregressor.py b/python/cuml/dask/ensemble/randomforestregressor.py
index 3b21810fb4..c2521d21c7 100755
--- a/python/cuml/dask/ensemble/randomforestregressor.py
+++ b/python/cuml/dask/ensemble/randomforestregressor.py
@@ -74,7 +74,7 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
     split_criterion : int (default = 2)
         The criterion used to split nodes.
         0 for GINI, 1 for ENTROPY,
-        2 for MSE, 3 for MAE and 4 for CRITERION_END.
+        2 for MSE, 3 for MAE and 4 for POISSON
         0 and 1 not valid for regression
     bootstrap : boolean (default = True)
         Control bootstrapping.
diff --git a/python/cuml/ensemble/randomforest_common.pyx b/python/cuml/ensemble/randomforest_common.pyx
index 1bba0d37a1..2b3de31007 100644
--- a/python/cuml/ensemble/randomforest_common.pyx
+++ b/python/cuml/ensemble/randomforest_common.pyx
@@ -55,7 +55,7 @@ class BaseRandomForestModel(Base):
                     'criterion']
 
     criterion_dict = {'0': GINI, '1': ENTROPY, '2': MSE,
-                      '3': MAE, '4': CRITERION_END}
+                      '3': MAE, '4': POISSON, '5': CRITERION_END}
 
     classes_ = CumlArrayDescriptor()
 
diff --git a/python/cuml/ensemble/randomforest_shared.pxd b/python/cuml/ensemble/randomforest_shared.pxd
index 9e3c23fb4f..7811dca811 100644
--- a/python/cuml/ensemble/randomforest_shared.pxd
+++ b/python/cuml/ensemble/randomforest_shared.pxd
@@ -42,6 +42,7 @@ cdef extern from "cuml/ensemble/randomforest.hpp" namespace "ML":
         ENTROPY,
         MSE,
         MAE,
+        POISSON,
         CRITERION_END
 
 cdef extern from "cuml/ensemble/randomforest.hpp" namespace "ML":

From a40c323645bc1d8d8560b93b19cd23cdb38d5b62 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Thu, 12 Aug 2021 17:15:26 +0530
Subject: [PATCH 08/42] sign bug fix

---
 cpp/src/decisiontree/batched-levelalgo/metrics.cuh | 9 +++++----
 python/cuml/ensemble/randomforestregressor.pyx     | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index 1c452813ba..cca9452f99 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -269,11 +269,12 @@ class PoissonObjectiveFunction {
         gain = -std::numeric_limits<DataT>::max();
       } else {
         auto label_mean         = shist[nbins - 1].label_sum / len;
-        auto left_label_mean   = -(shist[i].label_sum) / nLeft;
-        auto right_label_mean  = -(shist[nbins - 1].label_sum - shist[i].label_sum) / nRight;
+        auto left_label_mean   = (shist[i].label_sum) / nLeft;
+        auto right_label_mean  = (shist[nbins - 1].label_sum - shist[i].label_sum) / nRight;
         // poisson loss does not allow non-positive predictions
-        if(label_mean <= std::numeric_limits<DataT>::epsilon() || left_label_mean <= std::numeric_limits<DataT>::epsilon() || right_label_mean <= std::numeric_limits<DataT>::epsilon()) {
-          // used to prevent errors due to floating point roundings
+        // used to prevent errors due to floating point roundings
+        constexpr DataT EPS = 10 * std::numeric_limits<DataT>::epsilon();
+        if(label_mean < EPS || left_label_mean < EPS || right_label_mean < EPS) {
           gain = -std::numeric_limits<DataT>::max();
         }
         else {
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index c96ff64eb6..2d049c67d9 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -175,7 +175,7 @@ class RandomForestRegressor(BaseRandomForestModel,
     split_criterion : int (default = 2)
         The criterion used to split nodes.
         0 for GINI, 1 for ENTROPY,
-        2 for MSE
+        2 for MSE, 3 for MAE, 4 for POISSON
         0 and 1 not valid for regression
     bootstrap : boolean (default = True)
         Control bootstrapping.

From 8cd1ce1e09d88858da0a77cfbaaa503f2145878e Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Thu, 19 Aug 2021 21:53:59 +0530
Subject: [PATCH 09/42] modify proxy impurity, refactor tests, clang fix

---
 .../batched-levelalgo/metrics.cuh             | 24 +++++++++----------
 cpp/test/sg/rf_test.cu                        | 12 ++++++----
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index cca9452f99..41b6563e3d 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -258,6 +258,7 @@ class PoissonObjectiveFunction {
     */
   DI Split<DataT, IdxT> Gain(BinT* shist, DataT* sbins, IdxT col, IdxT len, IdxT nbins)
   {
+    constexpr DataT EPS = 10 * std::numeric_limits<DataT>::epsilon();
     Split<DataT, IdxT> sp;
     auto invlen = DataT(1.0) / len;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
@@ -268,21 +269,18 @@ class PoissonObjectiveFunction {
       if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
         gain = -std::numeric_limits<DataT>::max();
       } else {
-        auto label_mean         = shist[nbins - 1].label_sum / len;
-        auto left_label_mean   = (shist[i].label_sum) / nLeft;
-        auto right_label_mean  = (shist[nbins - 1].label_sum - shist[i].label_sum) / nRight;
-        // poisson loss does not allow non-positive predictions
-        // used to prevent errors due to floating point roundings
-        constexpr DataT EPS = 10 * std::numeric_limits<DataT>::epsilon();
-        if(label_mean < EPS || left_label_mean < EPS || right_label_mean < EPS) {
+        auto label_sum       = shist[nbins - 1].label_sum;
+        auto left_label_sum  = (shist[i].label_sum);
+        auto right_label_sum = (shist[nbins - 1].label_sum - shist[i].label_sum);
+
+        if (label_sum < EPS || left_label_sum < EPS || right_label_sum < EPS) {
           gain = -std::numeric_limits<DataT>::max();
-        }
-        else {
-          // below objective functions are 'proxy' for the actual half
-          DataT parent_obj = -label_mean * raft::myLog(label_mean);
-          DataT left_obj   = -left_label_mean * raft::myLog(left_label_mean);
-          DataT right_obj  = -right_label_mean * raft::myLog(right_label_mean);
+        } else {
+          DataT parent_obj = -label_sum * raft::myLog(label_sum / len);
+          DataT left_obj   = -left_label_sum * raft::myLog(left_label_sum / nLeft);
+          DataT right_obj  = -right_label_sum * raft::myLog(right_label_sum / nRight);
           gain             = parent_obj - (left_obj + right_obj);
+          gain             = gain / len;
         }
       }
       // if the gain is not "enough", don't bother!
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index 7dfe89878b..cdea9c23e9 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -327,7 +327,8 @@ class RfTest : public ::testing::TestWithParam<RfTestParams> {
   void SetUp() override
   {
     RfTestParams params = ::testing::TestWithParam<RfTestParams>::GetParam();
-    bool is_regression  = params.split_criterion == MSE || params.split_criterion == MAE || params.split_criterion == POISSON;
+    bool is_regression  = params.split_criterion == MSE || params.split_criterion == MAE ||
+                         params.split_criterion == POISSON;
     if (params.double_precision) {
       if (is_regression) {
         RfSpecialisedTest<double, double> test(params);
@@ -361,10 +362,11 @@ std::vector<int> min_samples_leaf        = {1, 10, 30};
 std::vector<int> min_samples_split       = {2, 10};
 std::vector<float> min_impurity_decrease = {0.0, 1.0f, 10.0f};
 std::vector<int> n_streams               = {1, 2, 10};
-std::vector<CRITERION> split_criterion   = {CRITERION::POISSON, CRITERION::MSE, CRITERION::GINI, CRITERION::ENTROPY};
-std::vector<int> seed                    = {0, 17};
-std::vector<int> n_labels                = {2, 10, 30};
-std::vector<bool> double_precision       = {false, true};
+std::vector<CRITERION> split_criterion   = {
+  CRITERION::POISSON, CRITERION::MSE, CRITERION::GINI, CRITERION::ENTROPY};
+std::vector<int> seed              = {0, 17};
+std::vector<int> n_labels          = {2, 10, 30};
+std::vector<bool> double_precision = {false, true};
 
 int n_tests = 100;
 

From dca32f941f20db6bc4a1bc21f5881cd6304d16ce Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Tue, 31 Aug 2021 22:12:28 +0530
Subject: [PATCH 10/42] add tests for poisson & gini objectives, bug fixes and
 other refactors

---
 .../batched-levelalgo/builder.cuh             |   4 +-
 .../batched-levelalgo/metrics.cuh             | 304 ++++++++++--------
 cpp/test/sg/rf_test.cu                        | 257 ++++++++++++---
 3 files changed, 394 insertions(+), 171 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/builder.cuh b/cpp/src/decisiontree/batched-levelalgo/builder.cuh
index e41a933553..b77e9594a8 100644
--- a/cpp/src/decisiontree/batched-levelalgo/builder.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/builder.cuh
@@ -164,8 +164,7 @@ void grow_tree(const raft::handle_t& handle,
                                                              num_leaves,
                                                              depth);
   } else if (params.split_criterion == CRITERION::POISSON) {
-    grow_tree<PoissonObjectiveFunction<DataT, LabelT, IdxT>>(d_allocator,
-                                                             h_allocator,
+    grow_tree<PoissonObjectiveFunction<DataT, LabelT, IdxT>>(handle,
                                                              data,
                                                              treeid,
                                                              seed,
@@ -177,7 +176,6 @@ void grow_tree(const raft::handle_t& handle,
                                                              n_sampled_rows,
                                                              unique_labels,
                                                              params,
-                                                             stream,
                                                              sparsetree,
                                                              num_leaves,
                                                              depth);
diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index 5283805336..19009b4234 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -27,8 +27,12 @@
 namespace ML {
 namespace DT {
 
+#define EPS 10 * std::numeric_limits<DataT>::epsilon()
+
 struct CountBin {
   int x;
+  HDI CountBin() : x(0) {}
+  HDI CountBin(int x_) : x(x_) {}
 
   DI static void IncrementHistogram(CountBin* hist, int nbins, int b, int label)
   {
@@ -36,21 +40,25 @@ struct CountBin {
     CountBin::AtomicAdd(hist + offset, {1});
   }
   DI static void AtomicAdd(CountBin* address, CountBin val) { atomicAdd(&address->x, val.x); }
-  DI CountBin& operator+=(const CountBin& b)
+  HDI CountBin& operator+=(const CountBin& b)
   {
     x += b.x;
     return *this;
   }
-  DI CountBin operator+(CountBin b) const
+  HDI CountBin operator+(CountBin b) const
   {
     b += *this;
     return b;
   }
 };
+
 struct AggregateBin {
   double label_sum;
   int count;
 
+  HDI AggregateBin() : label_sum(0.0), count(0) {}
+  HDI AggregateBin(double label_sum, int count) : label_sum(label_sum), count(count) {}
+
   DI static void IncrementHistogram(AggregateBin* hist, int nbins, int b, double label)
   {
     AggregateBin::AtomicAdd(hist + b, {label, 1});
@@ -60,13 +68,13 @@ struct AggregateBin {
     atomicAdd(&address->label_sum, val.label_sum);
     atomicAdd(&address->count, val.count);
   }
-  DI AggregateBin& operator+=(const AggregateBin& b)
+  HDI AggregateBin& operator+=(const AggregateBin& b)
   {
     label_sum += b.label_sum;
     count += b.count;
     return *this;
   }
-  DI AggregateBin operator+(AggregateBin b) const
+  HDI AggregateBin operator+(AggregateBin b) const
   {
     b += *this;
     return b;
@@ -93,46 +101,56 @@ class GiniObjectiveFunction {
   }
 
   DI IdxT NumClasses() const { return nclasses; }
-  DI Split<DataT, IdxT> Gain(BinT* scdf_labels, DataT* sbins, IdxT col, IdxT len, IdxT nbins)
+
+  HDI DataT gain(BinT* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) {
+
+    auto nRight = len - nLeft;
+    constexpr DataT One = DataT(1.0);
+    auto invlen = One / len;
+    auto invLeft = One / nLeft;
+    auto invRight = One / nRight;
+    auto gain_   = DataT(0.0);
+
+    // if there aren't enough samples in this split, don't bother!
+    if (nLeft < min_samples_leaf || nRight < min_samples_leaf) return -std::numeric_limits<DataT>::max();
+
+    for (IdxT j = 0; j < nclasses; ++j) {
+      int val_i = 0;
+      auto lval_i = hist[nbins * j + i].x;
+      auto lval = DataT(lval_i);
+      gain_ += lval * invLeft * lval * invlen;
+
+      val_i += lval_i;
+      auto total_sum = hist[nbins * j + nbins - 1].x;
+      auto rval_i = total_sum - lval_i;
+      auto rval = DataT(rval_i);
+      gain_ += rval * invRight * rval * invlen;
+
+      val_i += rval_i;
+      auto val = DataT(val_i) * invlen;
+      gain_ -= val * val;
+    }
+
+    // if the gain is not "enough", don't bother!
+    if (gain_ <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
+
+    else return gain_;
+  }
+
+  DI Split<DataT, IdxT> Gain(BinT * shist, DataT * sbins, IdxT col, IdxT len, IdxT nbins)
   {
     Split<DataT, IdxT> sp;
-    constexpr DataT One = DataT(1.0);
-    DataT invlen        = One / len;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
-      int nLeft = 0;
+      auto nLeft = IdxT(0);
       for (IdxT j = 0; j < nclasses; ++j) {
-        nLeft += scdf_labels[nbins * j + i].x;
-      }
-      auto nRight = len - nLeft;
-      auto gain   = DataT(0.0);
-      // if there aren't enough samples in this split, don't bother!
-      if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
-        gain = -std::numeric_limits<DataT>::max();
-      } else {
-        auto invLeft  = One / nLeft;
-        auto invRight = One / nRight;
-        for (IdxT j = 0; j < nclasses; ++j) {
-          int val_i   = 0;
-          auto lval_i = scdf_labels[nbins * j + i].x;
-          auto lval   = DataT(lval_i);
-          gain += lval * invLeft * lval * invlen;
-
-          val_i += lval_i;
-          auto total_sum = scdf_labels[nbins * j + nbins - 1].x;
-          auto rval_i    = total_sum - lval_i;
-          auto rval      = DataT(rval_i);
-          gain += rval * invRight * rval * invlen;
-
-          val_i += rval_i;
-          auto val = DataT(val_i) * invlen;
-          gain -= val * val;
-        }
+        nLeft += shist[nbins * j + i].x;
       }
-      sp.update({sbins[i], col, gain, nLeft});
+      sp.update({sbins[i], col, gain(shist, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
-  static DI LabelT LeafPrediction(BinT* shist, int nclasses)
+
+  static DI LabelT LeafPrediction(BinT const * shist, int nclasses)
   {
     int class_idx = 0;
     int count     = 0;
@@ -166,52 +184,66 @@ class EntropyObjectiveFunction {
   {
   }
   DI IdxT NumClasses() const { return nclasses; }
+
+  HDI DataT gain(BinT const * hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
+  {
+    auto nRight {len - nLeft};
+    auto gain_   {DataT(0.0)};
+    // if there aren't enough samples in this split, don't bother!
+    if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
+    {
+      return -std::numeric_limits<DataT>::max();
+    }
+    else
+    {
+      auto invLeft {DataT(1.0) / nLeft};
+      auto invRight {DataT(1.0) / nRight};
+      auto invLen {DataT(1.0) / len};
+      for (IdxT c = 0; c < nclasses; ++c) {
+        int val_i   = 0;
+        auto lval_i = hist[nbins * c + i].x;
+        if (lval_i != 0) {
+          auto lval = DataT(lval_i);
+          gain_ += raft::myLog(lval * invLeft) / raft::myLog(DataT(2)) * lval * invLen;
+        }
+
+        val_i += lval_i;
+        auto total_sum = hist[nbins * c + nbins - 1].x;
+        auto rval_i    = total_sum - lval_i;
+        if (rval_i != 0) {
+          auto rval = DataT(rval_i);
+          gain_ += raft::myLog(rval * invRight) / raft::myLog(DataT(2)) * rval * invLen;
+        }
+
+        val_i += rval_i;
+        if (val_i != 0) {
+          auto val = DataT(val_i) * invLen;
+          gain_ -= val * raft::myLog(val) / raft::myLog(DataT(2));
+        }
+      }
+
+        // if the gain is not "enough", don't bother!
+        if (gain_ <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
+
+        return gain_;
+    }
+  }
+
   DI Split<DataT, IdxT> Gain(BinT* scdf_labels, DataT* sbins, IdxT col, IdxT len, IdxT nbins)
   {
     Split<DataT, IdxT> sp;
-    constexpr DataT One = DataT(1.0);
-    DataT invlen        = One / len;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
-      int nLeft = 0;
-      for (IdxT j = 0; j < nclasses; ++j) {
+      auto nLeft {IdxT(0)};
+      for (IdxT j = 0; j < nclasses; ++j)
+      {
         nLeft += scdf_labels[nbins * j + i].x;
       }
-      auto nRight = len - nLeft;
-      auto gain   = DataT(0.0);
-      // if there aren't enough samples in this split, don't bother!
-      if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
-        gain = -std::numeric_limits<DataT>::max();
-      } else {
-        auto invLeft  = One / nLeft;
-        auto invRight = One / nRight;
-        for (IdxT j = 0; j < nclasses; ++j) {
-          int val_i   = 0;
-          auto lval_i = scdf_labels[nbins * j + i].x;
-          if (lval_i != 0) {
-            auto lval = DataT(lval_i);
-            gain += raft::myLog(lval * invLeft) / raft::myLog(DataT(2)) * lval * invlen;
-          }
-
-          val_i += lval_i;
-          auto total_sum = scdf_labels[nbins * j + nbins - 1].x;
-          auto rval_i    = total_sum - lval_i;
-          if (rval_i != 0) {
-            auto rval = DataT(rval_i);
-            gain += raft::myLog(rval * invRight) / raft::myLog(DataT(2)) * rval * invlen;
-          }
-
-          val_i += rval_i;
-          if (val_i != 0) {
-            auto val = DataT(val_i) * invlen;
-            gain -= val * raft::myLog(val) / raft::myLog(DataT(2));
-          }
-        }
-      }
-      sp.update({sbins[i], col, gain, nLeft});
+      sp.update({sbins[i], col, gain(scdf_labels, i , nbins, len, nLeft), nLeft});
     }
     return sp;
   }
-  static DI LabelT LeafPrediction(BinT* shist, int nclasses)
+
+  static DI LabelT LeafPrediction(BinT const * shist, int nclasses)
   {
     // Same as Gini
     return GiniObjectiveFunction<DataT, LabelT, IdxT>::LeafPrediction(shist, nclasses);
@@ -239,54 +271,55 @@ class PoissonObjectiveFunction {
   DI IdxT NumClasses() const { return 1; }
 
   /**
-   * @brief compute the poisson impurity reduction (or purity gain)
+   * @brief compute the poisson impurity reduction (or purity gain) for each split
    *
-   * @note This method is used to speed up the search for the best split.
-           It is a proxy quantity such that the split that maximizes this value
-           also maximizes the impurity improvement. It neglects all constant terms
-           of the impurity decrease for a given split.
+   * @note This method is used to speed up the search for the best split
+   *       by calculating the gain using a proxy poisson half deviance reduction.
+   *       It is a proxy quantity such that the split that maximizes this value
+   *       also maximizes the impurity improvement. It neglects all constant terms
+   *       of the impurity decrease for a given split.
+   *       The Gain is the difference in the proxy impurities of the parent and the
+   *       weighted sum of impurities of its children.
+    */
+  HDI DataT gain(BinT const * hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) {
 
-           Refer scikit learn's docs for original half poisson deviance impurity criterion:
-           https://scikit-learn.org/stable/modules/tree.html#regression-criteria
+    // get the lens'
+    auto  nRight = len - nLeft;
 
-          Poisson proxy used here is:
-            - 1/n * sum(y_i * log(y_pred)) = -mean(y_i) * log(mean(y_i))
-    */
-  DI Split<DataT, IdxT> Gain(BinT* shist, DataT* sbins, IdxT col, IdxT len, IdxT nbins)
+    // if there aren't enough samples in this split, don't bother!
+    if (nLeft < min_samples_leaf || nRight < min_samples_leaf) return -std::numeric_limits<DataT>::max();
+
+    auto  label_sum       = hist[nbins - 1].label_sum;
+    auto  left_label_sum  = (hist[i].label_sum);
+    auto  right_label_sum = (hist[nbins - 1].label_sum - hist[i].label_sum);
+
+    // label sum cannot be non-positive
+    if (label_sum < EPS || left_label_sum < EPS || right_label_sum < EPS) return -std::numeric_limits<DataT>::max();
+
+    // compute the gain to be
+    DataT  parent_obj     = -label_sum * raft::myLog(label_sum / len);
+    DataT  left_obj       = -left_label_sum * raft::myLog(left_label_sum / nLeft);
+    DataT  right_obj      = -right_label_sum * raft::myLog(right_label_sum / nRight);
+    auto gain_      = parent_obj - (left_obj + right_obj);
+    gain_           = gain_ / len;
+
+    // if the gain is not "enough", don't bother!
+    if (gain_ <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
+
+    else return gain_;
+  }
+
+  DI Split<DataT, IdxT> Gain(BinT const * shist, DataT const * sbins, IdxT col, IdxT len, IdxT nbins)
   {
-    constexpr DataT EPS = 10 * std::numeric_limits<DataT>::epsilon();
     Split<DataT, IdxT> sp;
-    auto invlen = DataT(1.0) / len;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
-      auto nLeft  = shist[i].count;
-      auto nRight = len - nLeft;
-      DataT gain;
-      // if there aren't enough samples in this split, don't bother!
-      if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
-        gain = -std::numeric_limits<DataT>::max();
-      } else {
-        auto label_sum       = shist[nbins - 1].label_sum;
-        auto left_label_sum  = (shist[i].label_sum);
-        auto right_label_sum = (shist[nbins - 1].label_sum - shist[i].label_sum);
-
-        if (label_sum < EPS || left_label_sum < EPS || right_label_sum < EPS) {
-          gain = -std::numeric_limits<DataT>::max();
-        } else {
-          DataT parent_obj = -label_sum * raft::myLog(label_sum / len);
-          DataT left_obj   = -left_label_sum * raft::myLog(left_label_sum / nLeft);
-          DataT right_obj  = -right_label_sum * raft::myLog(right_label_sum / nRight);
-          gain             = parent_obj - (left_obj + right_obj);
-          gain             = gain / len;
-        }
-      }
-      // if the gain is not "enough", don't bother!
-      if (gain <= min_impurity_decrease) { gain = -std::numeric_limits<DataT>::max(); }
-      sp.update({sbins[i], col, gain, nLeft});
+      auto nLeft = shist[i].count;
+      sp.update({sbins[i], col, gain(shist, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
 
-  static DI LabelT LeafPrediction(BinT* shist, int nclasses)
+  static DI LabelT LeafPrediction(BinT const * shist, int nclasses)
   {
     return shist[0].label_sum / shist[0].count;
   }
@@ -309,32 +342,45 @@ class MSEObjectiveFunction {
   {
   }
   DI IdxT NumClasses() const { return 1; }
-  DI Split<DataT, IdxT> Gain(BinT* shist, DataT* sbins, IdxT col, IdxT len, IdxT nbins)
+
+  HDI DataT gain(BinT const * hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
+  {
+    auto gain_ {DataT(0)};
+    auto nRight {len - nLeft};
+    auto invLen {DataT(1.0) / len};
+    // if there aren't enough samples in this split, don't bother!
+    if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
+    {
+      return -std::numeric_limits<DataT>::max();
+    }
+    else
+    {
+      auto label_sum        = hist[nbins - 1].label_sum;
+      auto parent_obj      = -label_sum * label_sum * invLen;
+      auto left_obj        = -(hist[i].label_sum * hist[i].label_sum) / nLeft;
+      auto right_label_sum = hist[i].label_sum - label_sum;
+      auto right_obj       = -(right_label_sum * right_label_sum) / nRight;
+      gain_                  = parent_obj - (left_obj + right_obj);
+      gain_ *= invLen;
+
+      // if the gain is not "enough", don't bother!
+      if (gain_ <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
+
+      return gain_;
+    }
+  }
+
+  DI Split<DataT, IdxT> Gain(BinT const * shist, DataT const * sbins, IdxT col, IdxT len, IdxT nbins)
   {
     Split<DataT, IdxT> sp;
-    auto invlen = DataT(1.0) / len;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
       auto nLeft  = shist[i].count;
-      auto nRight = len - nLeft;
-      DataT gain;
-      // if there aren't enough samples in this split, don't bother!
-      if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
-        gain = -std::numeric_limits<DataT>::max();
-      } else {
-        auto label_sum        = shist[nbins - 1].label_sum;
-        DataT parent_obj      = -label_sum * label_sum / len;
-        DataT left_obj        = -(shist[i].label_sum * shist[i].label_sum) / nLeft;
-        DataT right_label_sum = shist[i].label_sum - label_sum;
-        DataT right_obj       = -(right_label_sum * right_label_sum) / nRight;
-        gain                  = parent_obj - (left_obj + right_obj);
-        gain *= invlen;
-      }
-      sp.update({sbins[i], col, gain, nLeft});
+      sp.update({sbins[i], col, gain(shist, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
 
-  static DI LabelT LeafPrediction(BinT* shist, int nclasses)
+  static DI LabelT LeafPrediction(BinT const * shist, int nclasses)
   {
     return shist[0].label_sum / shist[0].count;
   }
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index d2db5828ef..690191f5a7 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include<icecream.hpp>
 
 #include <test_utils.h>
 
@@ -250,8 +251,6 @@ class RfSpecialisedTest {
   void TestAccuracyImprovement()
   {
     if (params.max_depth <= 1) { return; }
-    // avereraging between models can introduce variance
-    if (params.n_trees > 1) { return; }
     // accuracy is not guaranteed to improve with bootstrapping
     if (params.bootstrap) { return; }
     raft::handle_t handle(params.n_streams);
@@ -279,36 +278,13 @@ class RfSpecialisedTest {
   void TestTreeSize()
   {
     for (int i = 0u; i < forest->rf_params.n_trees; i++) {
-      // Check we have actually built something, otherwise these tests can all pass when the tree
-      // algorithm produces only stumps
-      size_t effective_rows = params.n_rows * params.max_samples;
-      if (params.max_depth > 0 && params.min_impurity_decrease == 0 && effective_rows >= 100) {
-        EXPECT_GT(forest->trees[i].leaf_counter, 1);
-      }
-
-      // Check number of leaves is accurate
-      int num_leaves = 0;
-      for (auto n : forest->trees[i].sparsetree) {
-        num_leaves += n.IsLeaf();
-      }
-      EXPECT_EQ(num_leaves, forest->trees[i].leaf_counter);
-      if (params.max_leaves > 0) { EXPECT_LE(forest->trees[i].leaf_counter, params.max_leaves); }
-
       EXPECT_LE(forest->trees[i].depth_counter, params.max_depth);
+      if (params.max_leaves > 0) { EXPECT_LE(forest->trees[i].leaf_counter, params.max_leaves); }
       EXPECT_LE(forest->trees[i].leaf_counter,
                 raft::ceildiv(params.n_rows, params.min_samples_leaf));
     }
   }
-  void TestMinImpurity()
-  {
-    for (int i = 0u; i < forest->rf_params.n_trees; i++) {
-      for (auto n : forest->trees[i].sparsetree) {
-        if (!n.IsLeaf()) { EXPECT_GT(n.best_metric_val, params.min_impurity_decrease); }
-      }
-    }
-  }
   void TestDeterminism()
-
   {
     // Regression models use floating point atomics, so are not bitwise reproducible
     bool is_regression = params.split_criterion == MSE || params.split_criterion == MAE;
@@ -333,9 +309,9 @@ class RfSpecialisedTest {
   void Test()
   {
     TestAccuracyImprovement();
-    TestDeterminism();
-    TestMinImpurity();
-    TestTreeSize();
+    // Bugs
+    // TestDeterminism();
+    // TestTreeSize();
   }
 
   RF_metrics training_metrics;
@@ -373,18 +349,19 @@ class RfTest : public ::testing::TestWithParam<RfTestParams> {
 TEST_P(RfTest, PropertyBasedTest) {}
 
 // Parameter ranges to test
-std::vector<int> n_rows                  = {10, 100, 1452};
-std::vector<int> n_cols                  = {1, 5, 152, 1014};
-std::vector<int> n_trees                 = {1, 5, 17};
-std::vector<float> max_features          = {0.1f, 0.5f, 1.0f};
-std::vector<float> max_samples           = {0.1f, 0.5f, 1.0f};
-std::vector<int> max_depth               = {1, 10, 30};
-std::vector<int> max_leaves              = {-1, 16, 50};
-std::vector<bool> bootstrap              = {false, true};
-std::vector<int> n_bins                  = {2, 57, 128, 256};
+std::vector<int> n_rows         = {10, 100, 1452};
+std::vector<int> n_cols         = {1, 5, 152, 1014};
+std::vector<int> n_trees        = {1, 5, 17};
+std::vector<float> max_features = {0.1f, 0.5f, 1.0f};
+std::vector<float> max_samples  = {0.1f, 0.5f, 1.0f};
+std::vector<int> max_depth      = {1, 10, 30};
+std::vector<int> max_leaves = {-1};  // Bug for max_leaves, non-determinism as threads compete to
+                                     // place their nodes inside this limit
+std::vector<bool> bootstrap = {false, true};
+std::vector<int> n_bins     = {2, 57, 128};  // Bug for n_bins > 128. Uses too much shared memory.
 std::vector<int> min_samples_leaf        = {1, 10, 30};
 std::vector<int> min_samples_split       = {2, 10};
-std::vector<float> min_impurity_decrease = {0.0f, 1.0f, 10.0f};
+std::vector<float> min_impurity_decrease = {0.0, 1.0f, 10.0f};
 std::vector<int> n_streams               = {1, 2, 10};
 std::vector<CRITERION> split_criterion   = {
   CRITERION::POISSON, CRITERION::MSE, CRITERION::GINI, CRITERION::ENTROPY};
@@ -415,6 +392,7 @@ INSTANTIATE_TEST_CASE_P(RfTests,
                                                                            seed,
                                                                            n_labels,
                                                                            double_precision)));
+
 struct QuantileTestParameters {
   int n_rows;
   int n_bins;
@@ -535,4 +513,205 @@ typedef RFQuantileBinsLowerBoundTest<double> RFQuantileBinsLowerBoundTestD;
 TEST_P(RFQuantileBinsLowerBoundTestD, test) {}
 INSTANTIATE_TEST_CASE_P(RfTests, RFQuantileBinsLowerBoundTestD, ::testing::ValuesIn(inputs));
 
+//------------------------------------------------------------------------------------------------------
+
+namespace DT {
+
+struct ObjectiveTestParameters
+{
+  CRITERION criterion;
+  uint64_t seed;
+  int n_bins;
+  int n_classes;
+  double min_impurity_decrease;
+  int min_samples_leaf;
+
+};
+
+template <typename ObjectiveT>
+class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters>
+{
+  typedef typename ObjectiveT::DataT DataT;
+  typedef typename ObjectiveT::LabelT LabelT;
+  typedef typename ObjectiveT::IdxT IdxT;
+  typedef typename ObjectiveT::BinT BinT;
+
+  ObjectiveTestParameters params;
+
+ public:
+
+ auto _rand(int const end = 1000)
+ {
+   return rand() % end;
+ }
+
+  auto _gen_hist_bins(){
+    std::vector<BinT> hist_bins(params.n_bins * params.n_classes);
+    for(auto c = 0; c < params.n_classes; ++c)
+    {
+      for(auto b = 0; b < params.n_bins; ++b)
+      {
+        // initializing hist_bins
+        BinT tmp = BinT();
+        if constexpr(std::is_same<BinT, CountBin>::value) // classification type
+        {
+          tmp += BinT(_rand());
+          hist_bins[c*params.n_bins + b] += tmp; // random pdf bin
+          hist_bins[c*params.n_bins + b] += ( b > 0 ? hist_bins[c*params.n_bins + b - 1] : BinT()); // pdf to cdf
+        }
+        else // regression type
+        {
+          tmp += BinT(static_cast<LabelT>(_rand()), _rand());
+          hist_bins[c*params.n_bins + b] += tmp; // random pdf bin
+          hist_bins[c*params.n_bins + b] += ( b > 0 ? hist_bins[c*params.n_bins + b - 1] : BinT()); // pdf to cdf
+        }
+      }
+    }
+    return hist_bins;
+  }
+
+  auto _poisson_ground_truth_gain(std::vector<BinT> const & hist_bins, std::size_t split_bin_index)
+  {
+
+    // compute the gain to be
+    DataT label_sum      = hist_bins.back().label_sum;
+    IdxT len            = hist_bins.back().count;
+    IdxT nLeft          = hist_bins[split_bin_index].count;
+    DataT left_label_sum = hist_bins[split_bin_index].label_sum;
+    DataT right_label_sum= label_sum - left_label_sum;
+    IdxT nRight         = len - nLeft;
+    DataT parent_obj     = -label_sum * raft::myLog(label_sum / len);
+    DataT left_obj       = -left_label_sum * raft::myLog(left_label_sum / nLeft);
+    DataT right_obj      = -right_label_sum * raft::myLog(right_label_sum / nRight);
+    auto gain      = parent_obj - (left_obj + right_obj);
+    gain           = gain / len;
+
+    // edge cases
+    if (gain <= params.min_impurity_decrease ||
+        nLeft < params.min_samples_leaf ||
+        nRight < params.min_samples_leaf ||
+        label_sum < EPS ||
+        right_label_sum < EPS ||
+        left_label_sum < EPS)
+      return -std::numeric_limits<DataT>::max();
+    else return gain;
+
+  }
+
+  auto _gini_ground_truth_gain(std::vector<BinT> const & hist_bins, std::size_t const split_bin_index)
+  {
+    auto len = _get_nLeft(hist_bins, params.n_bins-1);
+    auto nLeft = _get_nLeft(hist_bins, split_bin_index);
+    auto nRight         = len - nLeft;
+    constexpr DataT One = DataT(1.0);
+    auto invlen         = One / len;
+    auto invLeft        = One / nLeft;
+    auto invRight       = One / nRight;
+    auto gain           = DataT(0.0);
+
+    for(IdxT c = 0; c < params.n_classes; ++c)
+    {
+      IdxT val_i  = 0;
+      auto lval_i = hist_bins[params.n_bins * c + split_bin_index].x;
+      auto lval   = DataT(lval_i);
+      gain += lval * invLeft * lval * invlen;
+
+      val_i += lval_i;
+      auto total_sum = hist_bins[params.n_bins * c + params.n_bins - 1].x;
+      auto rval_i    = total_sum - lval_i;
+      auto rval      = DataT(rval_i);
+      gain += rval * invRight * rval * invlen;
+
+      val_i += rval_i;
+      auto val = DataT(val_i) * invlen;
+      gain -= val * val;
+    }
+
+    // edge cases
+    if (gain <= params.min_impurity_decrease ||
+        nLeft < params.min_samples_leaf ||
+        nRight < params.min_samples_leaf)
+    {
+      return -std::numeric_limits<DataT>::max();
+    }
+    else
+    {
+      return gain;
+    }
+  }
+
+  auto _get_ground_truth_gain(std::vector<BinT> const & hist_bins, std::size_t const split_bin_index)
+  {
+    if constexpr(std::is_same<ObjectiveT, PoissonObjectiveFunction<DataT, LabelT, IdxT>>::value) // poisson
+    {
+      return _poisson_ground_truth_gain(hist_bins, split_bin_index);
+    }
+    else if constexpr(std::is_same<ObjectiveT, GiniObjectiveFunction<DataT, LabelT, IdxT>>::value) // gini
+    {
+      return _gini_ground_truth_gain(hist_bins, split_bin_index);
+    }
+    return (double)0.0;
+  }
+
+  auto _get_nLeft(std::vector<BinT> const & hist_bins, IdxT idx)
+  {
+    auto count {IdxT(0)};
+    for (auto c = 0; c < params.n_classes; ++c)
+    {
+      if constexpr(std::is_same<BinT, CountBin>::value) // countbin
+      {
+        count += hist_bins[params.n_bins * c + idx].x;
+      }
+      else // aggregatebin
+      {
+        count += hist_bins[params.n_bins * c + idx].count;
+      }
+    }
+    return count;
+  }
+
+  void SetUp() override
+  {
+    srand(params.seed);
+    params = ::testing::TestWithParam<ObjectiveTestParameters>::GetParam();
+    ObjectiveT objective(params.n_classes, params.min_impurity_decrease, params.min_samples_leaf);
+
+    auto hist_bins = _gen_hist_bins();
+    auto split_bin_index = _rand(params.n_bins);
+    auto ground_truth_gain = _get_ground_truth_gain(hist_bins, split_bin_index);
+    auto hypothesis_gain = objective.gain(&hist_bins[0],
+                                          split_bin_index,
+                                          params.n_bins,
+                                          _get_nLeft(hist_bins, params.n_bins-1),
+                                          _get_nLeft(hist_bins, split_bin_index));
+
+    ASSERT_EQ(ground_truth_gain, hypothesis_gain);
+
+  }
+};
+
+const std::vector<ObjectiveTestParameters> poisson_objective_test_parameters = {
+                                                                               {CRITERION::POISSON, 9507819643927052255LLU, 64, 1, 0.0001, 0},
+                                                                               {CRITERION::POISSON, 9507819643927052256LLU, 128, 1, 0.0001, 1},
+                                                                               {CRITERION::POISSON, 9507819643927052257LLU, 256, 1, 0.0001, 1},
+                                                                               {CRITERION::POISSON, 9507819643927052258LLU, 512, 1, 0.0001, 5},
+                                                                               };
+const std::vector<ObjectiveTestParameters> gini_objective_test_parameters = {
+                                                                            {CRITERION::GINI, 9507819643927052255LLU, 64, 2, 0.0001, 0},
+                                                                            {CRITERION::GINI, 9507819643927052256LLU, 128, 10, 0.0001, 1},
+                                                                            {CRITERION::GINI, 9507819643927052257LLU, 256, 100, 0.0001, 1},
+                                                                            {CRITERION::GINI, 9507819643927052258LLU, 512, 100, 0.0001, 5},
+                                                                            };
+
+// poisson objective test
+typedef ObjectiveTest<PoissonObjectiveFunction<double, double, int>> PoissonObjectiveTestD;
+TEST_P(PoissonObjectiveTestD, poissonObjectiveTest) {}
+INSTANTIATE_TEST_CASE_P(RfTests, PoissonObjectiveTestD, ::testing::ValuesIn(poisson_objective_test_parameters));
+
+// gini objective test
+typedef ObjectiveTest<GiniObjectiveFunction<double, double, int>> GiniObjectiveTestD;
+TEST_P(GiniObjectiveTestD, giniObjectiveTest) {}
+INSTANTIATE_TEST_CASE_P(RfTests, GiniObjectiveTestD, ::testing::ValuesIn(gini_objective_test_parameters));
+
+} // end namespace DT
 }  // end namespace ML

From 925116d0f6f17ce0b7ae7b593f9a2ad047d3a1df Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Tue, 31 Aug 2021 22:28:47 +0530
Subject: [PATCH 11/42] FIX clang format

---
 cpp/test/sg/rf_test.cu | 55 ++++++++++++++++++++++++++++++------------
 1 file changed, 40 insertions(+), 15 deletions(-)

diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index 69edeab73f..6641c585ff 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -251,6 +251,8 @@ class RfSpecialisedTest {
   void TestAccuracyImprovement()
   {
     if (params.max_depth <= 1) { return; }
+    // avereraging between models can introduce variance
+    if (params.n_trees > 1) { return; }
     // accuracy is not guaranteed to improve with bootstrapping
     if (params.bootstrap) { return; }
     raft::handle_t handle(params.n_streams);
@@ -278,12 +280,36 @@ class RfSpecialisedTest {
   void TestTreeSize()
   {
     for (int i = 0u; i < forest->rf_params.n_trees; i++) {
-      EXPECT_LE(forest->trees[i].depth_counter, params.max_depth);
+      // Check we have actually built something, otherwise these tests can all pass when the tree
+      // algorithm produces only stumps
+      size_t effective_rows = params.n_rows * params.max_samples;
+      if (params.max_depth > 0 && params.min_impurity_decrease == 0 && effective_rows >= 100) {
+        EXPECT_GT(forest->trees[i].leaf_counter, 1);
+      }
+
+      // Check number of leaves is accurate
+      int num_leaves = 0;
+      for (auto n : forest->trees[i].sparsetree) {
+        num_leaves += n.IsLeaf();
+      }
+      EXPECT_EQ(num_leaves, forest->trees[i].leaf_counter);
       if (params.max_leaves > 0) { EXPECT_LE(forest->trees[i].leaf_counter, params.max_leaves); }
+
+      EXPECT_LE(forest->trees[i].depth_counter, params.max_depth);
       EXPECT_LE(forest->trees[i].leaf_counter,
                 raft::ceildiv(params.n_rows, params.min_samples_leaf));
     }
   }
+
+  void TestMinImpurity()
+  {
+    for (int i = 0u; i < forest->rf_params.n_trees; i++) {
+      for (auto n : forest->trees[i].sparsetree) {
+        if (!n.IsLeaf()) { EXPECT_GT(n.best_metric_val, params.min_impurity_decrease); }
+      }
+    }
+  }
+
   void TestDeterminism()
   {
     // Regression models use floating point atomics, so are not bitwise reproducible
@@ -309,9 +335,9 @@ class RfSpecialisedTest {
   void Test()
   {
     TestAccuracyImprovement();
-    // Bugs
-    // TestDeterminism();
-    // TestTreeSize();
+    TestDeterminism();
+    TestTreeSize();
+    TestTreeSize();
   }
 
   RF_metrics training_metrics;
@@ -349,19 +375,18 @@ class RfTest : public ::testing::TestWithParam<RfTestParams> {
 TEST_P(RfTest, PropertyBasedTest) {}
 
 // Parameter ranges to test
-std::vector<int> n_rows         = {10, 100, 1452};
-std::vector<int> n_cols         = {1, 5, 152, 1014};
-std::vector<int> n_trees        = {1, 5, 17};
-std::vector<float> max_features = {0.1f, 0.5f, 1.0f};
-std::vector<float> max_samples  = {0.1f, 0.5f, 1.0f};
-std::vector<int> max_depth      = {1, 10, 30};
-std::vector<int> max_leaves = {-1};  // Bug for max_leaves, non-determinism as threads compete to
-                                     // place their nodes inside this limit
-std::vector<bool> bootstrap = {false, true};
-std::vector<int> n_bins     = {2, 57, 128};  // Bug for n_bins > 128. Uses too much shared memory.
+std::vector<int> n_rows                  = {10, 100, 1452};
+std::vector<int> n_cols                  = {1, 5, 152, 1014};
+std::vector<int> n_trees                 = {1, 5, 17};
+std::vector<float> max_features          = {0.1f, 0.5f, 1.0f};
+std::vector<float> max_samples           = {0.1f, 0.5f, 1.0f};
+std::vector<int> max_depth               = {1, 10, 30};
+std::vector<int> max_leaves              = {-1, 16, 50};
+std::vector<bool> bootstrap              = {false, true};
+std::vector<int> n_bins                  = {2, 57, 128, 256};
 std::vector<int> min_samples_leaf        = {1, 10, 30};
 std::vector<int> min_samples_split       = {2, 10};
-std::vector<float> min_impurity_decrease = {0.0, 1.0f, 10.0f};
+std::vector<float> min_impurity_decrease = {0.0f, 1.0f, 10.0f};
 std::vector<int> n_streams               = {1, 2, 10};
 std::vector<CRITERION> split_criterion   = {
   CRITERION::POISSON, CRITERION::MSE, CRITERION::GINI, CRITERION::ENTROPY};

From 3142caf596d425812b61992b739af13dafd11b3e Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Tue, 31 Aug 2021 22:30:32 +0530
Subject: [PATCH 12/42] FIX clang format

---
 .../batched-levelalgo/metrics.cuh             | 134 +++++++--------
 cpp/test/sg/rf_test.cu                        | 155 ++++++++----------
 2 files changed, 138 insertions(+), 151 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index 19009b4234..c1c7a47bed 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -102,28 +102,29 @@ class GiniObjectiveFunction {
 
   DI IdxT NumClasses() const { return nclasses; }
 
-  HDI DataT gain(BinT* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) {
-
-    auto nRight = len - nLeft;
+  HDI DataT gain(BinT* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
+  {
+    auto nRight         = len - nLeft;
     constexpr DataT One = DataT(1.0);
-    auto invlen = One / len;
-    auto invLeft = One / nLeft;
-    auto invRight = One / nRight;
-    auto gain_   = DataT(0.0);
+    auto invlen         = One / len;
+    auto invLeft        = One / nLeft;
+    auto invRight       = One / nRight;
+    auto gain_          = DataT(0.0);
 
     // if there aren't enough samples in this split, don't bother!
-    if (nLeft < min_samples_leaf || nRight < min_samples_leaf) return -std::numeric_limits<DataT>::max();
+    if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
+      return -std::numeric_limits<DataT>::max();
 
     for (IdxT j = 0; j < nclasses; ++j) {
-      int val_i = 0;
+      int val_i   = 0;
       auto lval_i = hist[nbins * j + i].x;
-      auto lval = DataT(lval_i);
+      auto lval   = DataT(lval_i);
       gain_ += lval * invLeft * lval * invlen;
 
       val_i += lval_i;
       auto total_sum = hist[nbins * j + nbins - 1].x;
-      auto rval_i = total_sum - lval_i;
-      auto rval = DataT(rval_i);
+      auto rval_i    = total_sum - lval_i;
+      auto rval      = DataT(rval_i);
       gain_ += rval * invRight * rval * invlen;
 
       val_i += rval_i;
@@ -132,12 +133,14 @@ class GiniObjectiveFunction {
     }
 
     // if the gain is not "enough", don't bother!
-    if (gain_ <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
+    if (gain_ <= min_impurity_decrease)
+      return -std::numeric_limits<DataT>::max();
 
-    else return gain_;
+    else
+      return gain_;
   }
 
-  DI Split<DataT, IdxT> Gain(BinT * shist, DataT * sbins, IdxT col, IdxT len, IdxT nbins)
+  DI Split<DataT, IdxT> Gain(BinT* shist, DataT* sbins, IdxT col, IdxT len, IdxT nbins)
   {
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
@@ -150,7 +153,7 @@ class GiniObjectiveFunction {
     return sp;
   }
 
-  static DI LabelT LeafPrediction(BinT const * shist, int nclasses)
+  static DI LabelT LeafPrediction(BinT const* shist, int nclasses)
   {
     int class_idx = 0;
     int count     = 0;
@@ -185,20 +188,17 @@ class EntropyObjectiveFunction {
   }
   DI IdxT NumClasses() const { return nclasses; }
 
-  HDI DataT gain(BinT const * hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
+  HDI DataT gain(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
   {
-    auto nRight {len - nLeft};
-    auto gain_   {DataT(0.0)};
+    auto nRight{len - nLeft};
+    auto gain_{DataT(0.0)};
     // if there aren't enough samples in this split, don't bother!
-    if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
-    {
+    if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
       return -std::numeric_limits<DataT>::max();
-    }
-    else
-    {
-      auto invLeft {DataT(1.0) / nLeft};
-      auto invRight {DataT(1.0) / nRight};
-      auto invLen {DataT(1.0) / len};
+    } else {
+      auto invLeft{DataT(1.0) / nLeft};
+      auto invRight{DataT(1.0) / nRight};
+      auto invLen{DataT(1.0) / len};
       for (IdxT c = 0; c < nclasses; ++c) {
         int val_i   = 0;
         auto lval_i = hist[nbins * c + i].x;
@@ -222,10 +222,10 @@ class EntropyObjectiveFunction {
         }
       }
 
-        // if the gain is not "enough", don't bother!
-        if (gain_ <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
+      // if the gain is not "enough", don't bother!
+      if (gain_ <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
 
-        return gain_;
+      return gain_;
     }
   }
 
@@ -233,17 +233,16 @@ class EntropyObjectiveFunction {
   {
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
-      auto nLeft {IdxT(0)};
-      for (IdxT j = 0; j < nclasses; ++j)
-      {
+      auto nLeft{IdxT(0)};
+      for (IdxT j = 0; j < nclasses; ++j) {
         nLeft += scdf_labels[nbins * j + i].x;
       }
-      sp.update({sbins[i], col, gain(scdf_labels, i , nbins, len, nLeft), nLeft});
+      sp.update({sbins[i], col, gain(scdf_labels, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
 
-  static DI LabelT LeafPrediction(BinT const * shist, int nclasses)
+  static DI LabelT LeafPrediction(BinT const* shist, int nclasses)
   {
     // Same as Gini
     return GiniObjectiveFunction<DataT, LabelT, IdxT>::LeafPrediction(shist, nclasses);
@@ -280,36 +279,40 @@ class PoissonObjectiveFunction {
    *       of the impurity decrease for a given split.
    *       The Gain is the difference in the proxy impurities of the parent and the
    *       weighted sum of impurities of its children.
-    */
-  HDI DataT gain(BinT const * hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) {
-
+   */
+  HDI DataT gain(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
+  {
     // get the lens'
-    auto  nRight = len - nLeft;
+    auto nRight = len - nLeft;
 
     // if there aren't enough samples in this split, don't bother!
-    if (nLeft < min_samples_leaf || nRight < min_samples_leaf) return -std::numeric_limits<DataT>::max();
+    if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
+      return -std::numeric_limits<DataT>::max();
 
-    auto  label_sum       = hist[nbins - 1].label_sum;
-    auto  left_label_sum  = (hist[i].label_sum);
-    auto  right_label_sum = (hist[nbins - 1].label_sum - hist[i].label_sum);
+    auto label_sum       = hist[nbins - 1].label_sum;
+    auto left_label_sum  = (hist[i].label_sum);
+    auto right_label_sum = (hist[nbins - 1].label_sum - hist[i].label_sum);
 
     // label sum cannot be non-positive
-    if (label_sum < EPS || left_label_sum < EPS || right_label_sum < EPS) return -std::numeric_limits<DataT>::max();
+    if (label_sum < EPS || left_label_sum < EPS || right_label_sum < EPS)
+      return -std::numeric_limits<DataT>::max();
 
     // compute the gain to be
-    DataT  parent_obj     = -label_sum * raft::myLog(label_sum / len);
-    DataT  left_obj       = -left_label_sum * raft::myLog(left_label_sum / nLeft);
-    DataT  right_obj      = -right_label_sum * raft::myLog(right_label_sum / nRight);
-    auto gain_      = parent_obj - (left_obj + right_obj);
-    gain_           = gain_ / len;
+    DataT parent_obj = -label_sum * raft::myLog(label_sum / len);
+    DataT left_obj   = -left_label_sum * raft::myLog(left_label_sum / nLeft);
+    DataT right_obj  = -right_label_sum * raft::myLog(right_label_sum / nRight);
+    auto gain_       = parent_obj - (left_obj + right_obj);
+    gain_            = gain_ / len;
 
     // if the gain is not "enough", don't bother!
-    if (gain_ <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
+    if (gain_ <= min_impurity_decrease)
+      return -std::numeric_limits<DataT>::max();
 
-    else return gain_;
+    else
+      return gain_;
   }
 
-  DI Split<DataT, IdxT> Gain(BinT const * shist, DataT const * sbins, IdxT col, IdxT len, IdxT nbins)
+  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins)
   {
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
@@ -319,7 +322,7 @@ class PoissonObjectiveFunction {
     return sp;
   }
 
-  static DI LabelT LeafPrediction(BinT const * shist, int nclasses)
+  static DI LabelT LeafPrediction(BinT const* shist, int nclasses)
   {
     return shist[0].label_sum / shist[0].count;
   }
@@ -343,24 +346,21 @@ class MSEObjectiveFunction {
   }
   DI IdxT NumClasses() const { return 1; }
 
-  HDI DataT gain(BinT const * hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
+  HDI DataT gain(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
   {
-    auto gain_ {DataT(0)};
-    auto nRight {len - nLeft};
-    auto invLen {DataT(1.0) / len};
+    auto gain_{DataT(0)};
+    auto nRight{len - nLeft};
+    auto invLen{DataT(1.0) / len};
     // if there aren't enough samples in this split, don't bother!
-    if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
-    {
+    if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
       return -std::numeric_limits<DataT>::max();
-    }
-    else
-    {
-      auto label_sum        = hist[nbins - 1].label_sum;
+    } else {
+      auto label_sum       = hist[nbins - 1].label_sum;
       auto parent_obj      = -label_sum * label_sum * invLen;
       auto left_obj        = -(hist[i].label_sum * hist[i].label_sum) / nLeft;
       auto right_label_sum = hist[i].label_sum - label_sum;
       auto right_obj       = -(right_label_sum * right_label_sum) / nRight;
-      gain_                  = parent_obj - (left_obj + right_obj);
+      gain_                = parent_obj - (left_obj + right_obj);
       gain_ *= invLen;
 
       // if the gain is not "enough", don't bother!
@@ -370,17 +370,17 @@ class MSEObjectiveFunction {
     }
   }
 
-  DI Split<DataT, IdxT> Gain(BinT const * shist, DataT const * sbins, IdxT col, IdxT len, IdxT nbins)
+  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins)
   {
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
-      auto nLeft  = shist[i].count;
+      auto nLeft = shist[i].count;
       sp.update({sbins[i], col, gain(shist, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
 
-  static DI LabelT LeafPrediction(BinT const * shist, int nclasses)
+  static DI LabelT LeafPrediction(BinT const* shist, int nclasses)
   {
     return shist[0].label_sum / shist[0].count;
   }
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index 6641c585ff..c6dbe45fbb 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include<icecream.hpp>
+#include <icecream.hpp>
 
 #include <test_utils.h>
 
@@ -532,20 +532,17 @@ INSTANTIATE_TEST_CASE_P(RfTests, RFQuantileBinsLowerBoundTestD, ::testing::Value
 
 namespace DT {
 
-struct ObjectiveTestParameters
-{
+struct ObjectiveTestParameters {
   CRITERION criterion;
   uint64_t seed;
   int n_bins;
   int n_classes;
   double min_impurity_decrease;
   int min_samples_leaf;
-
 };
 
 template <typename ObjectiveT>
-class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters>
-{
+class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
   typedef typename ObjectiveT::DataT DataT;
   typedef typename ObjectiveT::LabelT LabelT;
   typedef typename ObjectiveT::IdxT IdxT;
@@ -554,69 +551,62 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters>
   ObjectiveTestParameters params;
 
  public:
+  auto _rand(int const end = 1000) { return rand() % end; }
 
- auto _rand(int const end = 1000)
- {
-   return rand() % end;
- }
-
-  auto _gen_hist_bins(){
+  auto _gen_hist_bins()
+  {
     std::vector<BinT> hist_bins(params.n_bins * params.n_classes);
-    for(auto c = 0; c < params.n_classes; ++c)
-    {
-      for(auto b = 0; b < params.n_bins; ++b)
-      {
+    for (auto c = 0; c < params.n_classes; ++c) {
+      for (auto b = 0; b < params.n_bins; ++b) {
         // initializing hist_bins
         BinT tmp = BinT();
-        if constexpr(std::is_same<BinT, CountBin>::value) // classification type
+        if constexpr (std::is_same<BinT, CountBin>::value)  // classification type
         {
           tmp += BinT(_rand());
-          hist_bins[c*params.n_bins + b] += tmp; // random pdf bin
-          hist_bins[c*params.n_bins + b] += ( b > 0 ? hist_bins[c*params.n_bins + b - 1] : BinT()); // pdf to cdf
-        }
-        else // regression type
+          hist_bins[c * params.n_bins + b] += tmp;  // random pdf bin
+          hist_bins[c * params.n_bins + b] +=
+            (b > 0 ? hist_bins[c * params.n_bins + b - 1] : BinT());  // pdf to cdf
+        } else                                                        // regression type
         {
           tmp += BinT(static_cast<LabelT>(_rand()), _rand());
-          hist_bins[c*params.n_bins + b] += tmp; // random pdf bin
-          hist_bins[c*params.n_bins + b] += ( b > 0 ? hist_bins[c*params.n_bins + b - 1] : BinT()); // pdf to cdf
+          hist_bins[c * params.n_bins + b] += tmp;  // random pdf bin
+          hist_bins[c * params.n_bins + b] +=
+            (b > 0 ? hist_bins[c * params.n_bins + b - 1] : BinT());  // pdf to cdf
         }
       }
     }
     return hist_bins;
   }
 
-  auto _poisson_ground_truth_gain(std::vector<BinT> const & hist_bins, std::size_t split_bin_index)
+  auto _poisson_ground_truth_gain(std::vector<BinT> const& hist_bins, std::size_t split_bin_index)
   {
-
     // compute the gain to be
-    DataT label_sum      = hist_bins.back().label_sum;
-    IdxT len            = hist_bins.back().count;
-    IdxT nLeft          = hist_bins[split_bin_index].count;
-    DataT left_label_sum = hist_bins[split_bin_index].label_sum;
-    DataT right_label_sum= label_sum - left_label_sum;
-    IdxT nRight         = len - nLeft;
-    DataT parent_obj     = -label_sum * raft::myLog(label_sum / len);
-    DataT left_obj       = -left_label_sum * raft::myLog(left_label_sum / nLeft);
-    DataT right_obj      = -right_label_sum * raft::myLog(right_label_sum / nRight);
-    auto gain      = parent_obj - (left_obj + right_obj);
-    gain           = gain / len;
+    DataT label_sum       = hist_bins.back().label_sum;
+    IdxT len              = hist_bins.back().count;
+    IdxT nLeft            = hist_bins[split_bin_index].count;
+    DataT left_label_sum  = hist_bins[split_bin_index].label_sum;
+    DataT right_label_sum = label_sum - left_label_sum;
+    IdxT nRight           = len - nLeft;
+    DataT parent_obj      = -label_sum * raft::myLog(label_sum / len);
+    DataT left_obj        = -left_label_sum * raft::myLog(left_label_sum / nLeft);
+    DataT right_obj       = -right_label_sum * raft::myLog(right_label_sum / nRight);
+    auto gain             = parent_obj - (left_obj + right_obj);
+    gain                  = gain / len;
 
     // edge cases
-    if (gain <= params.min_impurity_decrease ||
-        nLeft < params.min_samples_leaf ||
-        nRight < params.min_samples_leaf ||
-        label_sum < EPS ||
-        right_label_sum < EPS ||
+    if (gain <= params.min_impurity_decrease || nLeft < params.min_samples_leaf ||
+        nRight < params.min_samples_leaf || label_sum < EPS || right_label_sum < EPS ||
         left_label_sum < EPS)
       return -std::numeric_limits<DataT>::max();
-    else return gain;
-
+    else
+      return gain;
   }
 
-  auto _gini_ground_truth_gain(std::vector<BinT> const & hist_bins, std::size_t const split_bin_index)
+  auto _gini_ground_truth_gain(std::vector<BinT> const& hist_bins,
+                               std::size_t const split_bin_index)
   {
-    auto len = _get_nLeft(hist_bins, params.n_bins-1);
-    auto nLeft = _get_nLeft(hist_bins, split_bin_index);
+    auto len            = _get_nLeft(hist_bins, params.n_bins - 1);
+    auto nLeft          = _get_nLeft(hist_bins, split_bin_index);
     auto nRight         = len - nLeft;
     constexpr DataT One = DataT(1.0);
     auto invlen         = One / len;
@@ -624,8 +614,7 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters>
     auto invRight       = One / nRight;
     auto gain           = DataT(0.0);
 
-    for(IdxT c = 0; c < params.n_classes; ++c)
-    {
+    for (IdxT c = 0; c < params.n_classes; ++c) {
       IdxT val_i  = 0;
       auto lval_i = hist_bins[params.n_bins * c + split_bin_index].x;
       auto lval   = DataT(lval_i);
@@ -643,41 +632,36 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters>
     }
 
     // edge cases
-    if (gain <= params.min_impurity_decrease ||
-        nLeft < params.min_samples_leaf ||
-        nRight < params.min_samples_leaf)
-    {
+    if (gain <= params.min_impurity_decrease || nLeft < params.min_samples_leaf ||
+        nRight < params.min_samples_leaf) {
       return -std::numeric_limits<DataT>::max();
-    }
-    else
-    {
+    } else {
       return gain;
     }
   }
 
-  auto _get_ground_truth_gain(std::vector<BinT> const & hist_bins, std::size_t const split_bin_index)
+  auto _get_ground_truth_gain(std::vector<BinT> const& hist_bins, std::size_t const split_bin_index)
   {
-    if constexpr(std::is_same<ObjectiveT, PoissonObjectiveFunction<DataT, LabelT, IdxT>>::value) // poisson
+    if constexpr (std::is_same<ObjectiveT,
+                               PoissonObjectiveFunction<DataT, LabelT, IdxT>>::value)  // poisson
     {
       return _poisson_ground_truth_gain(hist_bins, split_bin_index);
-    }
-    else if constexpr(std::is_same<ObjectiveT, GiniObjectiveFunction<DataT, LabelT, IdxT>>::value) // gini
+    } else if constexpr (std::is_same<ObjectiveT,
+                                      GiniObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
     {
       return _gini_ground_truth_gain(hist_bins, split_bin_index);
     }
     return (double)0.0;
   }
 
-  auto _get_nLeft(std::vector<BinT> const & hist_bins, IdxT idx)
+  auto _get_nLeft(std::vector<BinT> const& hist_bins, IdxT idx)
   {
-    auto count {IdxT(0)};
-    for (auto c = 0; c < params.n_classes; ++c)
-    {
-      if constexpr(std::is_same<BinT, CountBin>::value) // countbin
+    auto count{IdxT(0)};
+    for (auto c = 0; c < params.n_classes; ++c) {
+      if constexpr (std::is_same<BinT, CountBin>::value)  // countbin
       {
         count += hist_bins[params.n_bins * c + idx].x;
-      }
-      else // aggregatebin
+      } else  // aggregatebin
       {
         count += hist_bins[params.n_bins * c + idx].count;
       }
@@ -691,42 +675,45 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters>
     params = ::testing::TestWithParam<ObjectiveTestParameters>::GetParam();
     ObjectiveT objective(params.n_classes, params.min_impurity_decrease, params.min_samples_leaf);
 
-    auto hist_bins = _gen_hist_bins();
-    auto split_bin_index = _rand(params.n_bins);
+    auto hist_bins         = _gen_hist_bins();
+    auto split_bin_index   = _rand(params.n_bins);
     auto ground_truth_gain = _get_ground_truth_gain(hist_bins, split_bin_index);
-    auto hypothesis_gain = objective.gain(&hist_bins[0],
+    auto hypothesis_gain   = objective.gain(&hist_bins[0],
                                           split_bin_index,
                                           params.n_bins,
-                                          _get_nLeft(hist_bins, params.n_bins-1),
+                                          _get_nLeft(hist_bins, params.n_bins - 1),
                                           _get_nLeft(hist_bins, split_bin_index));
 
     ASSERT_EQ(ground_truth_gain, hypothesis_gain);
-
   }
 };
 
 const std::vector<ObjectiveTestParameters> poisson_objective_test_parameters = {
-                                                                               {CRITERION::POISSON, 9507819643927052255LLU, 64, 1, 0.0001, 0},
-                                                                               {CRITERION::POISSON, 9507819643927052256LLU, 128, 1, 0.0001, 1},
-                                                                               {CRITERION::POISSON, 9507819643927052257LLU, 256, 1, 0.0001, 1},
-                                                                               {CRITERION::POISSON, 9507819643927052258LLU, 512, 1, 0.0001, 5},
-                                                                               };
+  {CRITERION::POISSON, 9507819643927052255LLU, 64, 1, 0.0001, 0},
+  {CRITERION::POISSON, 9507819643927052256LLU, 128, 1, 0.0001, 1},
+  {CRITERION::POISSON, 9507819643927052257LLU, 256, 1, 0.0001, 1},
+  {CRITERION::POISSON, 9507819643927052258LLU, 512, 1, 0.0001, 5},
+};
 const std::vector<ObjectiveTestParameters> gini_objective_test_parameters = {
-                                                                            {CRITERION::GINI, 9507819643927052255LLU, 64, 2, 0.0001, 0},
-                                                                            {CRITERION::GINI, 9507819643927052256LLU, 128, 10, 0.0001, 1},
-                                                                            {CRITERION::GINI, 9507819643927052257LLU, 256, 100, 0.0001, 1},
-                                                                            {CRITERION::GINI, 9507819643927052258LLU, 512, 100, 0.0001, 5},
-                                                                            };
+  {CRITERION::GINI, 9507819643927052255LLU, 64, 2, 0.0001, 0},
+  {CRITERION::GINI, 9507819643927052256LLU, 128, 10, 0.0001, 1},
+  {CRITERION::GINI, 9507819643927052257LLU, 256, 100, 0.0001, 1},
+  {CRITERION::GINI, 9507819643927052258LLU, 512, 100, 0.0001, 5},
+};
 
 // poisson objective test
 typedef ObjectiveTest<PoissonObjectiveFunction<double, double, int>> PoissonObjectiveTestD;
 TEST_P(PoissonObjectiveTestD, poissonObjectiveTest) {}
-INSTANTIATE_TEST_CASE_P(RfTests, PoissonObjectiveTestD, ::testing::ValuesIn(poisson_objective_test_parameters));
+INSTANTIATE_TEST_CASE_P(RfTests,
+                        PoissonObjectiveTestD,
+                        ::testing::ValuesIn(poisson_objective_test_parameters));
 
 // gini objective test
 typedef ObjectiveTest<GiniObjectiveFunction<double, double, int>> GiniObjectiveTestD;
 TEST_P(GiniObjectiveTestD, giniObjectiveTest) {}
-INSTANTIATE_TEST_CASE_P(RfTests, GiniObjectiveTestD, ::testing::ValuesIn(gini_objective_test_parameters));
+INSTANTIATE_TEST_CASE_P(RfTests,
+                        GiniObjectiveTestD,
+                        ::testing::ValuesIn(gini_objective_test_parameters));
 
-} // end namespace DT
+}  // end namespace DT
 }  // end namespace ML

From 9676818db80dad48a07757789ee21e35672b765f Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Tue, 31 Aug 2021 22:38:50 +0530
Subject: [PATCH 13/42] remove debug code

---
 cpp/test/sg/rf_test.cu | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index c6dbe45fbb..0757ee2bd8 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <icecream.hpp>
 
 #include <test_utils.h>
 
@@ -336,7 +335,7 @@ class RfSpecialisedTest {
   {
     TestAccuracyImprovement();
     TestDeterminism();
-    TestTreeSize();
+    TestMinImpurity();
     TestTreeSize();
   }
 
@@ -533,7 +532,6 @@ INSTANTIATE_TEST_CASE_P(RfTests, RFQuantileBinsLowerBoundTestD, ::testing::Value
 namespace DT {
 
 struct ObjectiveTestParameters {
-  CRITERION criterion;
   uint64_t seed;
   int n_bins;
   int n_classes;
@@ -689,16 +687,16 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
 };
 
 const std::vector<ObjectiveTestParameters> poisson_objective_test_parameters = {
-  {CRITERION::POISSON, 9507819643927052255LLU, 64, 1, 0.0001, 0},
-  {CRITERION::POISSON, 9507819643927052256LLU, 128, 1, 0.0001, 1},
-  {CRITERION::POISSON, 9507819643927052257LLU, 256, 1, 0.0001, 1},
-  {CRITERION::POISSON, 9507819643927052258LLU, 512, 1, 0.0001, 5},
+  {9507819643927052255LLU, 64, 1, 0.0001, 0},
+  {9507819643927052256LLU, 128, 1, 0.0001, 1},
+  {9507819643927052257LLU, 256, 1, 0.0001, 1},
+  {9507819643927052258LLU, 512, 1, 0.0001, 5},
 };
 const std::vector<ObjectiveTestParameters> gini_objective_test_parameters = {
-  {CRITERION::GINI, 9507819643927052255LLU, 64, 2, 0.0001, 0},
-  {CRITERION::GINI, 9507819643927052256LLU, 128, 10, 0.0001, 1},
-  {CRITERION::GINI, 9507819643927052257LLU, 256, 100, 0.0001, 1},
-  {CRITERION::GINI, 9507819643927052258LLU, 512, 100, 0.0001, 5},
+  {9507819643927052255LLU, 64, 2, 0.0001, 0},
+  {9507819643927052256LLU, 128, 10, 0.0001, 1},
+  {9507819643927052257LLU, 256, 100, 0.0001, 1},
+  {9507819643927052258LLU, 512, 100, 0.0001, 5},
 };
 
 // poisson objective test

From c52c29fd9bfdd64316eba3d38a5fb865b1a62f1c Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Thu, 2 Sep 2021 22:47:55 +0530
Subject: [PATCH 14/42] address review comments

---
 .../batched-levelalgo/metrics.cuh             |  71 +++----
 cpp/test/sg/rf_test.cu                        | 197 ++++++++++--------
 2 files changed, 145 insertions(+), 123 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index c1c7a47bed..952101a6d7 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -27,12 +27,11 @@
 namespace ML {
 namespace DT {
 
-#define EPS 10 * std::numeric_limits<DataT>::epsilon()
-
 struct CountBin {
   int x;
-  HDI CountBin() : x(0) {}
+  CountBin(CountBin const&) = default;
   HDI CountBin(int x_) : x(x_) {}
+  HDI CountBin() : x(0){};
 
   DI static void IncrementHistogram(CountBin* hist, int nbins, int b, int label)
   {
@@ -56,7 +55,8 @@ struct AggregateBin {
   double label_sum;
   int count;
 
-  HDI AggregateBin() : label_sum(0.0), count(0) {}
+  AggregateBin(AggregateBin const&) = default;
+  HDI AggregateBin() : label_sum(0.0), count(0){};
   HDI AggregateBin(double label_sum, int count) : label_sum(label_sum), count(count) {}
 
   DI static void IncrementHistogram(AggregateBin* hist, int nbins, int b, double label)
@@ -102,14 +102,14 @@ class GiniObjectiveFunction {
 
   DI IdxT NumClasses() const { return nclasses; }
 
-  HDI DataT gain(BinT* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
+  HDI DataT GainPerSplit(BinT* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
   {
     auto nRight         = len - nLeft;
     constexpr DataT One = DataT(1.0);
     auto invlen         = One / len;
     auto invLeft        = One / nLeft;
     auto invRight       = One / nRight;
-    auto gain_          = DataT(0.0);
+    auto gain           = DataT(0.0);
 
     // if there aren't enough samples in this split, don't bother!
     if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
@@ -119,25 +119,25 @@ class GiniObjectiveFunction {
       int val_i   = 0;
       auto lval_i = hist[nbins * j + i].x;
       auto lval   = DataT(lval_i);
-      gain_ += lval * invLeft * lval * invlen;
+      gain += lval * invLeft * lval * invlen;
 
       val_i += lval_i;
       auto total_sum = hist[nbins * j + nbins - 1].x;
       auto rval_i    = total_sum - lval_i;
       auto rval      = DataT(rval_i);
-      gain_ += rval * invRight * rval * invlen;
+      gain += rval * invRight * rval * invlen;
 
       val_i += rval_i;
       auto val = DataT(val_i) * invlen;
-      gain_ -= val * val;
+      gain -= val * val;
     }
 
     // if the gain is not "enough", don't bother!
-    if (gain_ <= min_impurity_decrease)
+    if (gain <= min_impurity_decrease)
       return -std::numeric_limits<DataT>::max();
 
     else
-      return gain_;
+      return gain;
   }
 
   DI Split<DataT, IdxT> Gain(BinT* shist, DataT* sbins, IdxT col, IdxT len, IdxT nbins)
@@ -148,7 +148,7 @@ class GiniObjectiveFunction {
       for (IdxT j = 0; j < nclasses; ++j) {
         nLeft += shist[nbins * j + i].x;
       }
-      sp.update({sbins[i], col, gain(shist, i, nbins, len, nLeft), nLeft});
+      sp.update({sbins[i], col, GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
@@ -188,10 +188,10 @@ class EntropyObjectiveFunction {
   }
   DI IdxT NumClasses() const { return nclasses; }
 
-  HDI DataT gain(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
+  HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
   {
     auto nRight{len - nLeft};
-    auto gain_{DataT(0.0)};
+    auto gain{DataT(0.0)};
     // if there aren't enough samples in this split, don't bother!
     if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
       return -std::numeric_limits<DataT>::max();
@@ -204,7 +204,7 @@ class EntropyObjectiveFunction {
         auto lval_i = hist[nbins * c + i].x;
         if (lval_i != 0) {
           auto lval = DataT(lval_i);
-          gain_ += raft::myLog(lval * invLeft) / raft::myLog(DataT(2)) * lval * invLen;
+          gain += raft::myLog(lval * invLeft) / raft::myLog(DataT(2)) * lval * invLen;
         }
 
         val_i += lval_i;
@@ -212,20 +212,20 @@ class EntropyObjectiveFunction {
         auto rval_i    = total_sum - lval_i;
         if (rval_i != 0) {
           auto rval = DataT(rval_i);
-          gain_ += raft::myLog(rval * invRight) / raft::myLog(DataT(2)) * rval * invLen;
+          gain += raft::myLog(rval * invRight) / raft::myLog(DataT(2)) * rval * invLen;
         }
 
         val_i += rval_i;
         if (val_i != 0) {
           auto val = DataT(val_i) * invLen;
-          gain_ -= val * raft::myLog(val) / raft::myLog(DataT(2));
+          gain -= val * raft::myLog(val) / raft::myLog(DataT(2));
         }
       }
 
       // if the gain is not "enough", don't bother!
-      if (gain_ <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
+      if (gain <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
 
-      return gain_;
+      return gain;
     }
   }
 
@@ -237,7 +237,7 @@ class EntropyObjectiveFunction {
       for (IdxT j = 0; j < nclasses; ++j) {
         nLeft += scdf_labels[nbins * j + i].x;
       }
-      sp.update({sbins[i], col, gain(scdf_labels, i, nbins, len, nLeft), nLeft});
+      sp.update({sbins[i], col, GainPerSplit(scdf_labels, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
@@ -261,7 +261,8 @@ class PoissonObjectiveFunction {
   IdxT min_samples_leaf;
 
  public:
-  using BinT = AggregateBin;
+  using BinT                 = AggregateBin;
+  static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
 
   HDI PoissonObjectiveFunction(IdxT nclasses, DataT min_impurity_decrease, IdxT min_samples_leaf)
     : min_impurity_decrease(min_impurity_decrease), min_samples_leaf(min_samples_leaf)
@@ -280,7 +281,7 @@ class PoissonObjectiveFunction {
    *       The Gain is the difference in the proxy impurities of the parent and the
    *       weighted sum of impurities of its children.
    */
-  HDI DataT gain(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
+  HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
   {
     // get the lens'
     auto nRight = len - nLeft;
@@ -294,22 +295,22 @@ class PoissonObjectiveFunction {
     auto right_label_sum = (hist[nbins - 1].label_sum - hist[i].label_sum);
 
     // label sum cannot be non-positive
-    if (label_sum < EPS || left_label_sum < EPS || right_label_sum < EPS)
+    if (label_sum < eps_ || left_label_sum < eps_ || right_label_sum < eps_)
       return -std::numeric_limits<DataT>::max();
 
     // compute the gain to be
     DataT parent_obj = -label_sum * raft::myLog(label_sum / len);
     DataT left_obj   = -left_label_sum * raft::myLog(left_label_sum / nLeft);
     DataT right_obj  = -right_label_sum * raft::myLog(right_label_sum / nRight);
-    auto gain_       = parent_obj - (left_obj + right_obj);
-    gain_            = gain_ / len;
+    auto gain        = parent_obj - (left_obj + right_obj);
+    gain             = gain / len;
 
     // if the gain is not "enough", don't bother!
-    if (gain_ <= min_impurity_decrease)
+    if (gain <= min_impurity_decrease)
       return -std::numeric_limits<DataT>::max();
 
     else
-      return gain_;
+      return gain;
   }
 
   DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins)
@@ -317,7 +318,7 @@ class PoissonObjectiveFunction {
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
       auto nLeft = shist[i].count;
-      sp.update({sbins[i], col, gain(shist, i, nbins, len, nLeft), nLeft});
+      sp.update({sbins[i], col, GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
@@ -346,9 +347,9 @@ class MSEObjectiveFunction {
   }
   DI IdxT NumClasses() const { return 1; }
 
-  HDI DataT gain(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
+  HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
   {
-    auto gain_{DataT(0)};
+    auto gain{DataT(0)};
     auto nRight{len - nLeft};
     auto invLen{DataT(1.0) / len};
     // if there aren't enough samples in this split, don't bother!
@@ -360,13 +361,13 @@ class MSEObjectiveFunction {
       auto left_obj        = -(hist[i].label_sum * hist[i].label_sum) / nLeft;
       auto right_label_sum = hist[i].label_sum - label_sum;
       auto right_obj       = -(right_label_sum * right_label_sum) / nRight;
-      gain_                = parent_obj - (left_obj + right_obj);
-      gain_ *= invLen;
+      gain                 = parent_obj - (left_obj + right_obj);
+      gain *= invLen;
 
       // if the gain is not "enough", don't bother!
-      if (gain_ <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
+      if (gain <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
 
-      return gain_;
+      return gain;
     }
   }
 
@@ -375,7 +376,7 @@ class MSEObjectiveFunction {
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
       auto nLeft = shist[i].count;
-      sp.update({sbins[i], col, gain(shist, i, nbins, len, nLeft), nLeft});
+      sp.update({sbins[i], col, GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index 0757ee2bd8..d38aacf06b 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -537,6 +537,7 @@ struct ObjectiveTestParameters {
   int n_classes;
   double min_impurity_decrease;
   int min_samples_leaf;
+  double tolerance;
 };
 
 template <typename ObjectiveT>
@@ -549,119 +550,137 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
   ObjectiveTestParameters params;
 
  public:
-  auto _rand(int const end = 1000) { return rand() % end; }
+  auto RandUnder(int const end = 10000) { return rand() % end; }
 
-  auto _gen_hist_bins()
+  auto GenHist()
   {
-    std::vector<BinT> hist_bins(params.n_bins * params.n_classes);
+    std::vector<BinT> cdf_hist, pdf_hist;
+
     for (auto c = 0; c < params.n_classes; ++c) {
       for (auto b = 0; b < params.n_bins; ++b) {
-        // initializing hist_bins
-        BinT tmp = BinT();
-        if constexpr (std::is_same<BinT, CountBin>::value)  // classification type
-        {
-          tmp += BinT(_rand());
-          hist_bins[c * params.n_bins + b] += tmp;  // random pdf bin
-          hist_bins[c * params.n_bins + b] +=
-            (b > 0 ? hist_bins[c * params.n_bins + b - 1] : BinT());  // pdf to cdf
-        } else                                                        // regression type
-        {
-          tmp += BinT(static_cast<LabelT>(_rand()), _rand());
-          hist_bins[c * params.n_bins + b] += tmp;  // random pdf bin
-          hist_bins[c * params.n_bins + b] +=
-            (b > 0 ? hist_bins[c * params.n_bins + b - 1] : BinT());  // pdf to cdf
-        }
+        if constexpr (std::is_same<BinT, CountBin>::value)
+          pdf_hist.emplace_back(RandUnder());
+        else
+          pdf_hist.emplace_back(static_cast<LabelT>(RandUnder()), RandUnder());
+
+        auto cumulative = b > 0 ? cdf_hist.back() : BinT();
+
+        cdf_hist.emplace_back(pdf_hist.empty() ? BinT() : pdf_hist.back());
+
+        cdf_hist.back() += cumulative;
       }
     }
-    return hist_bins;
+
+    return std::make_pair(cdf_hist, pdf_hist);
   }
 
-  auto _poisson_ground_truth_gain(std::vector<BinT> const& hist_bins, std::size_t split_bin_index)
+  auto PoissonHalfDeviance(
+    std::vector<BinT> const& hist)  //  1/n * sum(y_true * log(y_true/y_pred) + y_pred - y_true)
   {
-    // compute the gain to be
-    DataT label_sum       = hist_bins.back().label_sum;
-    IdxT len              = hist_bins.back().count;
-    IdxT nLeft            = hist_bins[split_bin_index].count;
-    DataT left_label_sum  = hist_bins[split_bin_index].label_sum;
-    DataT right_label_sum = label_sum - left_label_sum;
-    IdxT nRight           = len - nLeft;
-    DataT parent_obj      = -label_sum * raft::myLog(label_sum / len);
-    DataT left_obj        = -left_label_sum * raft::myLog(left_label_sum / nLeft);
-    DataT right_obj       = -right_label_sum * raft::myLog(right_label_sum / nRight);
-    auto gain             = parent_obj - (left_obj + right_obj);
-    gain                  = gain / len;
+    BinT aggregate{BinT()};
+    aggregate = std::accumulate(hist.begin(), hist.end(), aggregate);
+    assert(aggregate.count > 0);
+    auto const y_mean = aggregate.label_sum / aggregate.count;
+    auto poisson_half_deviance{DataT(0.0)};
+
+    std::for_each(hist.begin(), hist.end(), [&](BinT const& h) {
+      auto log_y = raft::myLog(h.label_sum ? h.label_sum : DataT(1.0));  // we don't want nans
+      poisson_half_deviance += h.label_sum * (log_y - raft::myLog(y_mean)) + y_mean - h.label_sum;
+    });
+
+    poisson_half_deviance /= aggregate.count;
+    return std::make_tuple(
+      poisson_half_deviance, aggregate.label_sum, static_cast<DataT>(aggregate.count));
+  }
+
+  auto PoissonGroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t split_bin_index)
+  {
+    std::vector<BinT> left_pdf_hist{pdf_hist.begin(), pdf_hist.begin() + split_bin_index + 1};
+    std::vector<BinT> right_pdf_hist{pdf_hist.begin() + split_bin_index + 1, pdf_hist.end()};
+
+    auto [parent_phd, label_sum, n]            = PoissonHalfDeviance(pdf_hist);
+    auto [left_phd, label_sum_left, n_left]    = PoissonHalfDeviance(left_pdf_hist);
+    auto [right_phd, label_sum_right, n_right] = PoissonHalfDeviance(right_pdf_hist);
+
+    auto gain = parent_phd - ((n_left / n) * left_phd +
+                              (n_right / n) * right_phd);  // gain in long form without proxy
 
     // edge cases
-    if (gain <= params.min_impurity_decrease || nLeft < params.min_samples_leaf ||
-        nRight < params.min_samples_leaf || label_sum < EPS || right_label_sum < EPS ||
-        left_label_sum < EPS)
+    if (gain <= params.min_impurity_decrease or n_left < params.min_samples_leaf or
+        n_right < params.min_samples_leaf or label_sum < ObjectiveT::eps_ or
+        label_sum_right < ObjectiveT::eps_ or label_sum_left < ObjectiveT::eps_)
       return -std::numeric_limits<DataT>::max();
     else
       return gain;
   }
 
-  auto _gini_ground_truth_gain(std::vector<BinT> const& hist_bins,
-                               std::size_t const split_bin_index)
+  auto GiniImpurity(std::vector<BinT> const& hist)
+  {  // sum((n_c/n_total)(1-(n_c/n_total)))
+    auto gini{double(0)};
+    auto n_bins      = hist.size() / params.n_classes;
+    auto n_instances = std::accumulate(hist.begin(), hist.end(), BinT()).x;  // total instances
+    for (auto c = 0; c < params.n_classes; ++c) {
+      auto begin_iter    = hist.begin() + c * n_bins;
+      auto end_iter      = hist.begin() + (c + 1) * n_bins;
+      double class_proba = std::accumulate(begin_iter, end_iter, BinT()).x;  // instances of class c
+      class_proba /= n_instances;               // probability of class c
+      gini += class_proba * (1 - class_proba);  // adding gain
+    }
+    return std::make_pair(gini, double(n_instances));
+  }
+
+  auto GiniGroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t const split_bin_index)
   {
-    auto len            = _get_nLeft(hist_bins, params.n_bins - 1);
-    auto nLeft          = _get_nLeft(hist_bins, split_bin_index);
-    auto nRight         = len - nLeft;
-    constexpr DataT One = DataT(1.0);
-    auto invlen         = One / len;
-    auto invLeft        = One / nLeft;
-    auto invRight       = One / nRight;
-    auto gain           = DataT(0.0);
-
-    for (IdxT c = 0; c < params.n_classes; ++c) {
-      IdxT val_i  = 0;
-      auto lval_i = hist_bins[params.n_bins * c + split_bin_index].x;
-      auto lval   = DataT(lval_i);
-      gain += lval * invLeft * lval * invlen;
-
-      val_i += lval_i;
-      auto total_sum = hist_bins[params.n_bins * c + params.n_bins - 1].x;
-      auto rval_i    = total_sum - lval_i;
-      auto rval      = DataT(rval_i);
-      gain += rval * invRight * rval * invlen;
-
-      val_i += rval_i;
-      auto val = DataT(val_i) * invlen;
-      gain -= val * val;
+    std::vector<BinT> left_pdf_hist, right_pdf_hist;
+
+    for (auto c = 0; c < params.n_classes; ++c) {  // decompose the pdf_hist
+      auto start = pdf_hist.begin() + c * params.n_bins;
+      auto split = pdf_hist.begin() + c * params.n_bins + split_bin_index + 1;
+      auto end   = pdf_hist.begin() + (c + 1) * params.n_bins;
+
+      left_pdf_hist.insert(left_pdf_hist.end(), start, split);
+      right_pdf_hist.insert(right_pdf_hist.end(), split, end);
     }
 
+    auto [parent_gini, n]      = GiniImpurity(pdf_hist);
+    auto [left_gini, left_n]   = GiniImpurity(left_pdf_hist);
+    auto [right_gini, right_n] = GiniImpurity(right_pdf_hist);
+
+    auto gain = parent_gini - ((left_n / n) * left_gini + (right_n / n) * right_gini);
+
     // edge cases
-    if (gain <= params.min_impurity_decrease || nLeft < params.min_samples_leaf ||
-        nRight < params.min_samples_leaf) {
+    if (gain <= params.min_impurity_decrease || left_n < params.min_samples_leaf ||
+        right_n < params.min_samples_leaf) {
       return -std::numeric_limits<DataT>::max();
     } else {
       return gain;
     }
   }
 
-  auto _get_ground_truth_gain(std::vector<BinT> const& hist_bins, std::size_t const split_bin_index)
+  auto GroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t const split_bin_index)
   {
     if constexpr (std::is_same<ObjectiveT,
                                PoissonObjectiveFunction<DataT, LabelT, IdxT>>::value)  // poisson
     {
-      return _poisson_ground_truth_gain(hist_bins, split_bin_index);
+      return PoissonGroundTruthGain(pdf_hist, split_bin_index);
     } else if constexpr (std::is_same<ObjectiveT,
                                       GiniObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
     {
-      return _gini_ground_truth_gain(hist_bins, split_bin_index);
+      return GiniGroundTruthGain(pdf_hist, split_bin_index);
     }
-    return (double)0.0;
+    return double(0.0);
   }
 
-  auto _get_nLeft(std::vector<BinT> const& hist_bins, IdxT idx)
+  auto NumLeftOfBin(std::vector<BinT> const& cdf_hist, IdxT idx)
   {
     auto count{IdxT(0)};
     for (auto c = 0; c < params.n_classes; ++c) {
       if constexpr (std::is_same<BinT, CountBin>::value)  // countbin
       {
-        count += hist_bins[params.n_bins * c + idx].x;
+        count += cdf_hist[params.n_bins * c + idx].x;
       } else  // aggregatebin
       {
-        count += hist_bins[params.n_bins * c + idx].count;
+        count += cdf_hist[params.n_bins * c + idx].count;
       }
     }
     return count;
@@ -673,30 +692,32 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     params = ::testing::TestWithParam<ObjectiveTestParameters>::GetParam();
     ObjectiveT objective(params.n_classes, params.min_impurity_decrease, params.min_samples_leaf);
 
-    auto hist_bins         = _gen_hist_bins();
-    auto split_bin_index   = _rand(params.n_bins);
-    auto ground_truth_gain = _get_ground_truth_gain(hist_bins, split_bin_index);
-    auto hypothesis_gain   = objective.gain(&hist_bins[0],
-                                          split_bin_index,
-                                          params.n_bins,
-                                          _get_nLeft(hist_bins, params.n_bins - 1),
-                                          _get_nLeft(hist_bins, split_bin_index));
+    auto [cdf_hist, pdf_hist] = GenHist();
+
+    auto split_bin_index   = RandUnder(params.n_bins);
+    auto ground_truth_gain = GroundTruthGain(pdf_hist, split_bin_index);
+
+    auto hypothesis_gain = objective.GainPerSplit(&cdf_hist[0],
+                                                  split_bin_index,
+                                                  params.n_bins,
+                                                  NumLeftOfBin(cdf_hist, params.n_bins - 1),
+                                                  NumLeftOfBin(cdf_hist, split_bin_index));
 
-    ASSERT_EQ(ground_truth_gain, hypothesis_gain);
+    ASSERT_NEAR(ground_truth_gain, hypothesis_gain, params.tolerance);
   }
 };
 
 const std::vector<ObjectiveTestParameters> poisson_objective_test_parameters = {
-  {9507819643927052255LLU, 64, 1, 0.0001, 0},
-  {9507819643927052256LLU, 128, 1, 0.0001, 1},
-  {9507819643927052257LLU, 256, 1, 0.0001, 1},
-  {9507819643927052258LLU, 512, 1, 0.0001, 5},
+  {9507819643927052255LLU, 64, 1, 0.0001, 0, 0.000001},
+  {9507819643927052259LLU, 128, 1, 0.0001, 1, 0.000001},
+  {9507819643927052251LLU, 256, 1, 0.0001, 1, 0.000001},
+  {9507819643927052258LLU, 512, 1, 0.0001, 5, 0.000001},
 };
 const std::vector<ObjectiveTestParameters> gini_objective_test_parameters = {
-  {9507819643927052255LLU, 64, 2, 0.0001, 0},
-  {9507819643927052256LLU, 128, 10, 0.0001, 1},
-  {9507819643927052257LLU, 256, 100, 0.0001, 1},
-  {9507819643927052258LLU, 512, 100, 0.0001, 5},
+  {9507819643927052255LLU, 64, 2, 0.0001, 0, 0.000001},
+  {9507819643927052256LLU, 128, 10, 0.0001, 1, 0.000001},
+  {9507819643927052257LLU, 256, 100, 0.0001, 1, 0.000001},
+  {9507819643927052258LLU, 512, 100, 0.0001, 5, 0.000001},
 };
 
 // poisson objective test
@@ -707,7 +728,7 @@ INSTANTIATE_TEST_CASE_P(RfTests,
                         ::testing::ValuesIn(poisson_objective_test_parameters));
 
 // gini objective test
-typedef ObjectiveTest<GiniObjectiveFunction<double, double, int>> GiniObjectiveTestD;
+typedef ObjectiveTest<GiniObjectiveFunction<double, int, int>> GiniObjectiveTestD;
 TEST_P(GiniObjectiveTestD, giniObjectiveTest) {}
 INSTANTIATE_TEST_CASE_P(RfTests,
                         GiniObjectiveTestD,

From 79f00b8e69c7409339969738da08acb0774c2585 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Sat, 11 Sep 2021 22:28:43 +0530
Subject: [PATCH 15/42] add python level test

---
 python/cuml/test/test_random_forest.py | 37 +++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/python/cuml/test/test_random_forest.py b/python/cuml/test/test_random_forest.py
index 46e7d572cd..6b0abda4ac 100644
--- a/python/cuml/test/test_random_forest.py
+++ b/python/cuml/test/test_random_forest.py
@@ -32,7 +32,7 @@
 
 from sklearn.ensemble import RandomForestClassifier as skrfc
 from sklearn.ensemble import RandomForestRegressor as skrfr
-from sklearn.metrics import accuracy_score, mean_squared_error
+from sklearn.metrics import accuracy_score, mean_squared_error, mean_poisson_deviance
 from sklearn.datasets import fetch_california_housing, \
     make_classification, make_regression, load_iris, load_breast_cancer, \
     load_boston
@@ -186,6 +186,41 @@ def special_reg(request):
         )
     return X, y
 
+@pytest.mark.parametrize("lam", [0.001, 0.01, 0.1])
+@pytest.mark.parametrize("max_depth", [2, 4, 7, 10, 25, 50])
+def test_poisson_convergence(lam, max_depth):
+    np.random.seed(33)
+    bootstrap = None
+    max_features = 1.0
+    n_estimators = 1
+    min_impurity_decrease = 1e-5
+    n_datapoints = 100000
+    # generating random poisson dataset
+    X = np.random.random((n_datapoints, 4)).astype(np.float32)
+    y = np.random.poisson(lam=lam, size=n_datapoints).astype(np.float32)
+
+    poisson_preds = curfr(
+        split_criterion=4,
+        max_depth=max_depth,
+        n_estimators=n_estimators,
+        bootstrap=bootstrap,
+        max_features=max_features,
+        min_impurity_decrease=min_impurity_decrease).fit(X, y).predict(X)
+    mse_preds = curfr(
+        split_criterion=2,
+        max_depth=max_depth,
+        n_estimators=n_estimators,
+        bootstrap=bootstrap,
+        max_features=max_features,
+        min_impurity_decrease=min_impurity_decrease).fit(X, y).predict(X)
+
+    mask = mse_preds > 0 # y should not be non-positive for mean_poisson_deviance
+    mse_mpd = mean_poisson_deviance(y[mask], mse_preds[mask])
+    poisson_mpd = mean_poisson_deviance(y, poisson_preds)
+
+    # model trained on poisson data with poisson criterion must perform better on poisson loss
+    assert mse_mpd >= poisson_mpd
+
 
 @pytest.mark.parametrize(
     "max_samples", [unit_param(1.0), quality_param(0.90), stress_param(0.95)]

From 13c3386e15822132a6735ff5e2a4398dde8028e4 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 13 Sep 2021 06:32:11 +0530
Subject: [PATCH 16/42] FIX clang format

---
 cpp/src/decisiontree/decisiontree.cuh | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/cpp/src/decisiontree/decisiontree.cuh b/cpp/src/decisiontree/decisiontree.cuh
index f00c8fd52f..44a907379f 100644
--- a/cpp/src/decisiontree/decisiontree.cuh
+++ b/cpp/src/decisiontree/decisiontree.cuh
@@ -292,16 +292,16 @@ class DecisionTree {
         .train();
     } else if (params.split_criterion == CRITERION::POISSON) {
       return Builder<PoissonObjectiveFunction<DataT, LabelT, IdxT>>(handle,
-                                                              treeid,
-                                                              seed,
-                                                              params,
-                                                              data,
-                                                              labels,
-                                                              nrows,
-                                                              ncols,
-                                                              rowids,
-                                                              unique_labels,
-                                                              quantiles)
+                                                                    treeid,
+                                                                    seed,
+                                                                    params,
+                                                                    data,
+                                                                    labels,
+                                                                    nrows,
+                                                                    ncols,
+                                                                    rowids,
+                                                                    unique_labels,
+                                                                    quantiles)
         .train();
     } else {
       ASSERT(false, "Unknown split criterion.");

From 0332cc669e1f5e39788c87782025a31465975932 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 13 Sep 2021 11:41:33 +0530
Subject: [PATCH 17/42] flake fix, reduce test load

---
 python/cuml/test/test_random_forest.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/python/cuml/test/test_random_forest.py b/python/cuml/test/test_random_forest.py
index 6b0abda4ac..f0e5c835d3 100644
--- a/python/cuml/test/test_random_forest.py
+++ b/python/cuml/test/test_random_forest.py
@@ -32,7 +32,8 @@
 
 from sklearn.ensemble import RandomForestClassifier as skrfc
 from sklearn.ensemble import RandomForestRegressor as skrfr
-from sklearn.metrics import accuracy_score, mean_squared_error, mean_poisson_deviance
+from sklearn.metrics import accuracy_score, mean_squared_error, \
+    mean_poisson_deviance
 from sklearn.datasets import fetch_california_housing, \
     make_classification, make_regression, load_iris, load_breast_cancer, \
     load_boston
@@ -186,8 +187,9 @@ def special_reg(request):
         )
     return X, y
 
-@pytest.mark.parametrize("lam", [0.001, 0.01, 0.1])
-@pytest.mark.parametrize("max_depth", [2, 4, 7, 10, 25, 50])
+
+@pytest.mark.parametrize("lam", [0.01, 0.1])
+@pytest.mark.parametrize("max_depth", [2, 4])
 def test_poisson_convergence(lam, max_depth):
     np.random.seed(33)
     bootstrap = None
@@ -213,12 +215,13 @@ def test_poisson_convergence(lam, max_depth):
         bootstrap=bootstrap,
         max_features=max_features,
         min_impurity_decrease=min_impurity_decrease).fit(X, y).predict(X)
-
-    mask = mse_preds > 0 # y should not be non-positive for mean_poisson_deviance
+    # y should not be non-positive for mean_poisson_deviance
+    mask = mse_preds > 0
     mse_mpd = mean_poisson_deviance(y[mask], mse_preds[mask])
     poisson_mpd = mean_poisson_deviance(y, poisson_preds)
 
-    # model trained on poisson data with poisson criterion must perform better on poisson loss
+    # model trained on poisson data with
+    # poisson criterion must perform better on poisson loss
     assert mse_mpd >= poisson_mpd
 
 
From 0a5d52ab8a97f770aebe08c002dccaada350be00 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 13 Sep 2021 16:44:32 +0530
Subject: [PATCH 18/42] fix tests, remove artifacts

---
 build                  | 1 -
 cpp/test/sg/rf_test.cu | 9 +++++----
 2 files changed, 5 insertions(+), 5 deletions(-)
 delete mode 120000 build

diff --git a/build b/build
deleted file mode 120000
index 3e647c0fb6..0000000000
--- a/build
+++ /dev/null
@@ -1 +0,0 @@
-/home/gvenkatarama/cuml-builds/poisson/
\ No newline at end of file
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index 21ca039f54..7f6a49e16c 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -327,7 +327,8 @@ class RfSpecialisedTest {
   void TestDeterminism()
   {
     // Regression models use floating point atomics, so are not bitwise reproducible
-    bool is_regression = params.split_criterion == MSE || params.split_criterion == MAE;
+    bool is_regression = params.split_criterion == MSE or params.split_criterion == MAE or
+                         params.split_criterion == POISSON;
     if (is_regression) return;
 
     // Repeat training
@@ -394,7 +395,7 @@ class RfTest : public ::testing::TestWithParam<RfTestParams> {
   void SetUp() override
   {
     RfTestParams params = ::testing::TestWithParam<RfTestParams>::GetParam();
-    bool is_regression  = params.split_criterion == MSE || params.split_criterion == MAE ||
+    bool is_regression  = params.split_criterion == MSE or params.split_criterion == MAE or
                          params.split_criterion == POISSON;
     if (params.double_precision) {
       if (is_regression) {
@@ -529,7 +530,7 @@ class RFQuantileTest : public ::testing::TestWithParam<QuantileTestParameters> {
     int min_items_per_bin = max_items_per_bin - 1;
     int total_items       = 0;
     for (int b = 0; b < params.n_bins; b++) {
-      ASSERT_TRUE(h_histogram[b] == max_items_per_bin || h_histogram[b] == min_items_per_bin)
+      ASSERT_TRUE(h_histogram[b] == max_items_per_bin or h_histogram[b] == min_items_per_bin)
         << "No. samples in bin[" << b << "] = " << h_histogram[b] << " Expected "
         << max_items_per_bin << " or " << min_items_per_bin << std::endl;
       total_items += h_histogram[b];
@@ -689,7 +690,7 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     auto gain = parent_gini - ((left_n / n) * left_gini + (right_n / n) * right_gini);
 
     // edge cases
-    if (gain <= params.min_impurity_decrease || left_n < params.min_samples_leaf ||
+    if (gain <= params.min_impurity_decrease or left_n < params.min_samples_leaf or
         right_n < params.min_samples_leaf) {
       return -std::numeric_limits<DataT>::max();
     } else {

From 959ee2c6c6c9521bbed17702714fa1fe3c1e53cf Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 13 Sep 2021 16:50:23 +0530
Subject: [PATCH 19/42] purge artifacts

---
 docs/source/checkpoint.tl    | Bin 7684 -> 0 bytes
 docs/source/kmeans_model.pkl | Bin 1817 -> 0 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 docs/source/checkpoint.tl
 delete mode 100644 docs/source/kmeans_model.pkl

diff --git a/docs/source/checkpoint.tl b/docs/source/checkpoint.tl
deleted file mode 100644
index 79f1d38a910f88e85df79949b5cf160d48e5f12b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 7684
zcmeHLU2fDc5KdE|<sTtcf^*~m>nE;(kRTFL;kkXwDUf!>0}>ZN;*|#^Ub6?_0$hLt
zz<il}&1`Dd-d)_??J|-*9(&@9?Qh1Dl*K@0d9N342`>*I9QNhitFxDH-@kryhR(gD
zW`bqk?F-u2pi+wLQ8Sx9@5a$TrUPuE(4U@Eir09oJBe=osS7>9`EQHD9aU=P?Z>G8
zk?0;vku@agFKhp{=|9%Jky_94LhKssBj>1<n+ZLHqvZV&cm9IDvD}|ahaZeTKqWNT
zq;hC}9bLV~d-yKmR=S#^_~L|niMS#T<+-M+D(6*}PU;hfz*Sv;i0TuEGKKh29Ov*s
z?336f_I!={-F?x;e#8EGd+Ma=CEEW`MjuIm{YCM~Nz+Tz-!`1@n?27iPRIHxIw_M=
z(aLO-Q^!89?I(6<jW%OLEG9hd-*M9OVV&CXuk2Z4Dcz&+FBA}C3w)=len)-N#+KRi
z+;x+9D*75@3w(bz{TZD<w=dY(HE+K~?YFV3u77RZ{@nXR3=h7D!yWsYm2G>h+e~<G
zV&p0s`xt$V9OnSye7qY6dl$!5%2#h9uT1qJ`kdZ}@qB?kf%~?uU%1e&(Wi4`F!%h#
z&egcaE?8_YwnKRzaU9|?Jag|u^K*Pv?u-43tN8a2)`ahy)V$2%FeOjSeeT4$EFtOp
ziLqbr#?MZk^U_W6iy@XSV*T=>IZjRuE8pDM4;-O@md&4*#W;Nq7)<#`K%aw8&)atA
z@)h3iV!p?A_L1uuKo|c-e1!c2jd>^ztG51UVS63!%M(&_?NzaYvroyjiIr4rF@9qH
zeaMCPIZyE6R&f>26Rr!l;+IWPe0L%L<h}eKx7;BcH*Fi^R^HwJras)}j<=XU<-aN8
zE%N80a1OifqcLtpNB+(Ha6Vo1QGZ##+cy2Qcx!Qr)mwRYaqG(|bH`iYHph1vajt&1
g`R}H1PHC}o8B3`-C38t~#30b{UX?@h>1cZW7pA^Dng9R*

diff --git a/docs/source/kmeans_model.pkl b/docs/source/kmeans_model.pkl
deleted file mode 100644
index 469082f65354dabc737c236582d6e88b7b2f4cc2..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1817
zcmah~Yitx%6yELbc3ZlA(6ud;3az{!-2gUFDYADcp~%oRR8tX!advmw8M`~XnMdo2
zh4Lu0&8>;_@&giiSlePD1}X}n+PNbSYZO!v2qH8#-~$yA6AKc>JG*Tm!NmD7nK}2|
zbI$#~@0?p_-IiiEMIV^yQ-cAQFQCc_C%LME94pF9uoA%py43hCEkP}4>Uva*ra*f&
zCwW6MhiEHI3aLt1RTw1_<`A^{)NlmBBt{S)GCCAis^k>LA9eM>6p^XmSVfgMnYc5E
z9t%f^%wPpFqHWM2v7$c|WMqX^NHQ}_2(qs*0x1xc&M-rZ5(=OaclMnApL2@yVZ2Df
z2ujpX5IIQ^SO%5Ej%PAKJ6R?>NggwFhR6wIHW&^FVkJ|-`jn7_O2D8m%rMeKmq}@1
zvd7CRJ|2~t>Yz260;bD8Ho$rVTz*x46sqadkx_sYs*zD?gVXr-g#zRhV_`f`P7xsB
zWJMvC*b2qv3k8EA(KWOZza-`oMVK~up`8Th0y2YOPK=kBKPyRWgz<B-PZGjL%`mH&
z&_)^NMZ?xoPGRqpHR1s(JTNKtSIq`g4tZ2BOdAB8@o^%7hot25Vl?v;3_61W#7~3?
zUR4Mvf+!<sCmgbhB9RPm<U_?Fm^fUK;gQCNWI{P7bCSRYgjYGV8I?svlxoNzf7tFv
z#jZN*w(&z8{BLL%g6Y1HC@Yd`(3f9TL*yk#L)&5!gO>Rf1qj+?o+a9(ZS>g9<S!)o
ziGHBD7^%=|Kxu5@=kZ0*PGSkfLKIR@&S}&~ENC!{)z1=Go8nE@-3@K)8c8Ul4P+rC
zB05fmc0tbfseTr%LJch8j}t2uUM7E~E8<v~SR)bf8G=EcdZeyGFhf*>;Rq3e#JPeh
z!8Xb*f(}OFhL2?+Xp1_MY@$Er(t>DHJt{VP(G~*b6xvKO)FXa5Ur4;;)LE#pA+}(F
z=GTzN<5_x-Jkb|zQjoWC;fgqd^0Gp_{(3X$ZfnLJ<)gu@MVH-IZjHmvo;Li>{_~oc
z8>iRqoCf}}HsJ~FU0`a}VZ3r=pSEX9hCaS)jyrp2GkE=ldAOV2i4R}OrdsuQs%U>N
zUbx{iTv2`3-BVi+b{$ODo8M~z+dCc9F3Y!|zbysq+jIqY%+1kLoM-T%AFlN5oplT}
zl!d|jGiq`2m=j<@`VH;3l09H?-#2*Poqg`v+f%7=3G1}C-p;1<FLS7#?-TXKH#E?G
zFa>0&0$v^L2Sw?>;H;-Q@!jKnxLNxfC#dH@&5<rVzW*vn2WJ2su<2V9cYxI|#Zzw8
zse8LJsbo2U3cn0B^O<Ks`RO}&X7?y<>+}r^R$2~#Lq|5^C25Ud`O^is`)DPYpHz<@
z|DXcrojM67r(VV>Oru79d=#I~Oa*-d)%eu93)+U&ojAAp7O-3^0-J&)c2lqW>%SV^
z?MDzUbB@-v$>%}lAI%_~?*N_8O$5r!QEpJ!3wky>^j%}l_S_!vDAkgD3q0e<r7WLx
z<2Q3!wbo6K;f#w-+67@RzWUQ6dhW><koEb=k8Bs$V7=o682Is5yzSFNU`Ahp{>H_d
z_{rS|a9z<3psmjXnZNgf9s8Wr6v;-hThgdWsec0ItF_v_YrS|``n&D}u09-D(u4JD
z4er)smJf@W0#IIa-Mu!;s_$J|48FKw*Za{Js^HQ9F2DIAh|FAr*`|$n;M_FencMCb
zxO#Wl?GEtWq_f)Ww&Os~yNn}GWb241j5T_k%oaNnW+gTlZA#uj+xQIH#HWza&8OZU
z+ljHuL{K8ebz+q{&Tow9_#j8R(MPn-r_pBKd4D=5N2l@7e2z}#)R@T_VNyu&lMXh)
g<e@vj$8)|a)Tr`>{PQr5OoRveWBz~ihw63x3kR3?;{X5v


From 5a5410e78882a492cb79ea52fbc916ae6b2d461e Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 13 Sep 2021 17:39:41 +0530
Subject: [PATCH 20/42] decrease tolerance

---
 cpp/test/sg/rf_test.cu | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index 7f6a49e16c..bee44777a6 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -749,16 +749,16 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
 };
 
 const std::vector<ObjectiveTestParameters> poisson_objective_test_parameters = {
-  {9507819643927052255LLU, 64, 1, 0.0001, 0, 0.000001},
-  {9507819643927052259LLU, 128, 1, 0.0001, 1, 0.000001},
-  {9507819643927052251LLU, 256, 1, 0.0001, 1, 0.000001},
-  {9507819643927052258LLU, 512, 1, 0.0001, 5, 0.000001},
+  {9507819643927052255LLU, 64, 1, 0.0001, 0, 0.00001},
+  {9507819643927052259LLU, 128, 1, 0.0001, 1, 0.00001},
+  {9507819643927052251LLU, 256, 1, 0.0001, 1, 0.00001},
+  {9507819643927052258LLU, 512, 1, 0.0001, 5, 0.00001},
 };
 const std::vector<ObjectiveTestParameters> gini_objective_test_parameters = {
-  {9507819643927052255LLU, 64, 2, 0.0001, 0, 0.000001},
-  {9507819643927052256LLU, 128, 10, 0.0001, 1, 0.000001},
-  {9507819643927052257LLU, 256, 100, 0.0001, 1, 0.000001},
-  {9507819643927052258LLU, 512, 100, 0.0001, 5, 0.000001},
+  {9507819643927052255LLU, 64, 2, 0.0001, 0, 0.00001},
+  {9507819643927052256LLU, 128, 10, 0.0001, 1, 0.00001},
+  {9507819643927052257LLU, 256, 100, 0.0001, 1, 0.00001},
+  {9507819643927052258LLU, 512, 100, 0.0001, 5, 0.00001},
 };
 
 // poisson objective test

From 59caf115144d046a4a1f688b686bb2377f45ee6a Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Thu, 16 Sep 2021 18:00:15 +0530
Subject: [PATCH 21/42] remove min_impurity_decrease member

---
 .../batched-levelalgo/builder.cuh             |  4 +-
 .../batched-levelalgo/metrics.cuh             | 50 +++++--------------
 .../sg/decisiontree_batchedlevel_unittest.cu  |  2 +-
 cpp/test/sg/rf_test.cu                        | 28 +++++------
 4 files changed, 29 insertions(+), 55 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/builder.cuh b/cpp/src/decisiontree/batched-levelalgo/builder.cuh
index 8dc897f414..337e9c2415 100644
--- a/cpp/src/decisiontree/batched-levelalgo/builder.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/builder.cuh
@@ -421,7 +421,7 @@ struct Builder {
     int nHistBins = large_blocks * nbins * colBlks * nclasses;
     CUDA_CHECK(cudaMemsetAsync(hist, 0, sizeof(BinT) * nHistBins, handle.get_stream()));
     ML::PUSH_RANGE("computeSplitClassificationKernel @builder_base.cuh [batched-levelalgo]");
-    ObjectiveT objective(input.numOutputs, params.min_impurity_decrease, params.min_samples_leaf);
+    ObjectiveT objective(input.numOutputs, params.min_samples_leaf);
     computeSplitKernel<DataT, LabelT, IdxT, TPB_DEFAULT>
       <<<grid, TPB_DEFAULT, smemSize, handle.get_stream()>>>(hist,
                                                              params.n_bins,
@@ -450,7 +450,7 @@ struct Builder {
     std::size_t max_batch_size = min(std::size_t(100000), tree->size());
     rmm::device_uvector<NodeT> d_tree(max_batch_size, handle.get_stream());
     rmm::device_uvector<InstanceRange> d_instance_ranges(max_batch_size, handle.get_stream());
-    ObjectiveT objective(input.numOutputs, params.min_impurity_decrease, params.min_samples_leaf);
+    ObjectiveT objective(input.numOutputs, params.min_samples_leaf);
     for (std::size_t batch_begin = 0; batch_begin < tree->size(); batch_begin += max_batch_size) {
       std::size_t batch_end  = min(batch_begin + max_batch_size, tree->size());
       std::size_t batch_size = batch_end - batch_begin;
diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index da96a4fb10..92146992b7 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -30,7 +30,7 @@ struct CountBin {
   int x;
   CountBin(CountBin const&) = default;
   HDI CountBin(int x_) : x(x_) {}
-  HDI CountBin() : x(0){};
+  HDI CountBin() : x(0) {}
 
   DI static void IncrementHistogram(CountBin* hist, int nbins, int b, int label)
   {
@@ -55,7 +55,7 @@ struct AggregateBin {
   int count;
 
   AggregateBin(AggregateBin const&) = default;
-  HDI AggregateBin() : label_sum(0.0), count(0){};
+  HDI AggregateBin() : label_sum(0.0), count(0) {}
   HDI AggregateBin(double label_sum, int count) : label_sum(label_sum), count(count) {}
 
   DI static void IncrementHistogram(AggregateBin* hist, int nbins, int b, double label)
@@ -87,15 +87,12 @@ class GiniObjectiveFunction {
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
   IdxT nclasses;
-  DataT min_impurity_decrease;
   IdxT min_samples_leaf;
 
  public:
   using BinT = CountBin;
-  GiniObjectiveFunction(IdxT nclasses, DataT min_impurity_decrease, IdxT min_samples_leaf)
-    : nclasses(nclasses),
-      min_impurity_decrease(min_impurity_decrease),
-      min_samples_leaf(min_samples_leaf)
+  GiniObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
+    : nclasses(nclasses), min_samples_leaf(min_samples_leaf)
   {
   }
 
@@ -131,12 +128,7 @@ class GiniObjectiveFunction {
       gain -= val * val;
     }
 
-    // if the gain is not "enough", don't bother!
-    if (gain <= min_impurity_decrease)
-      return -std::numeric_limits<DataT>::max();
-
-    else
-      return gain;
+    return gain;
   }
 
   DI Split<DataT, IdxT> Gain(BinT* shist, DataT* sbins, IdxT col, IdxT len, IdxT nbins)
@@ -174,15 +166,12 @@ class EntropyObjectiveFunction {
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
   IdxT nclasses;
-  DataT min_impurity_decrease;
   IdxT min_samples_leaf;
 
  public:
   using BinT = CountBin;
-  EntropyObjectiveFunction(IdxT nclasses, DataT min_impurity_decrease, IdxT min_samples_leaf)
-    : nclasses(nclasses),
-      min_impurity_decrease(min_impurity_decrease),
-      min_samples_leaf(min_samples_leaf)
+  EntropyObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
+    : nclasses(nclasses), min_samples_leaf(min_samples_leaf)
   {
   }
   DI IdxT NumClasses() const { return nclasses; }
@@ -221,9 +210,6 @@ class EntropyObjectiveFunction {
         }
       }
 
-      // if the gain is not "enough", don't bother!
-      if (gain <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
-
       return gain;
     }
   }
@@ -256,15 +242,14 @@ class PoissonObjectiveFunction {
   using IdxT   = IdxT_;
 
  private:
-  DataT min_impurity_decrease;
   IdxT min_samples_leaf;
 
  public:
   using BinT                 = AggregateBin;
   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
 
-  HDI PoissonObjectiveFunction(IdxT nclasses, DataT min_impurity_decrease, IdxT min_samples_leaf)
-    : min_impurity_decrease(min_impurity_decrease), min_samples_leaf(min_samples_leaf)
+  HDI PoissonObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
+    : min_samples_leaf(min_samples_leaf)
   {
   }
   DI IdxT NumClasses() const { return 1; }
@@ -304,12 +289,7 @@ class PoissonObjectiveFunction {
     auto gain        = parent_obj - (left_obj + right_obj);
     gain             = gain / len;
 
-    // if the gain is not "enough", don't bother!
-    if (gain <= min_impurity_decrease)
-      return -std::numeric_limits<DataT>::max();
-
-    else
-      return gain;
+    return gain;
   }
 
   DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins)
@@ -335,13 +315,12 @@ class MSEObjectiveFunction {
   using IdxT   = IdxT_;
 
  private:
-  DataT min_impurity_decrease;
   IdxT min_samples_leaf;
 
  public:
   using BinT = AggregateBin;
-  HDI MSEObjectiveFunction(IdxT nclasses, DataT min_impurity_decrease, IdxT min_samples_leaf)
-    : min_impurity_decrease(min_impurity_decrease), min_samples_leaf(min_samples_leaf)
+  HDI MSEObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
+    : min_samples_leaf(min_samples_leaf)
   {
   }
   DI IdxT NumClasses() const { return 1; }
@@ -356,16 +335,13 @@ class MSEObjectiveFunction {
       return -std::numeric_limits<DataT>::max();
     } else {
       auto label_sum       = hist[nbins - 1].label_sum;
-      auto parent_obj      = -label_sum * label_sum * invLen;
+      auto parent_obj      = -label_sum * label_sum / len;
       auto left_obj        = -(hist[i].label_sum * hist[i].label_sum) / nLeft;
       auto right_label_sum = hist[i].label_sum - label_sum;
       auto right_obj       = -(right_label_sum * right_label_sum) / nRight;
       gain                 = parent_obj - (left_obj + right_obj);
       gain *= invLen;
 
-      // if the gain is not "enough", don't bother!
-      if (gain <= min_impurity_decrease) return -std::numeric_limits<DataT>::max();
-
       return gain;
     }
   }
diff --git a/cpp/test/sg/decisiontree_batchedlevel_unittest.cu b/cpp/test/sg/decisiontree_batchedlevel_unittest.cu
index 9c402bec2c..37b9519b8c 100644
--- a/cpp/test/sg/decisiontree_batchedlevel_unittest.cu
+++ b/cpp/test/sg/decisiontree_batchedlevel_unittest.cu
@@ -279,7 +279,7 @@ TEST_P(TestMetric, RegressionMetricGain)
 
   CRITERION split_criterion = GetParam();
 
-  ObjectiveT obj(1, params.min_impurity_decrease, params.min_samples_leaf);
+  ObjectiveT obj(1, params.min_samples_leaf);
   size_t smemSize1 = n_bins * sizeof(ObjectiveT::BinT) +  // shist size
                      n_bins * sizeof(DataT) +             // sbins size
                      sizeof(int);                         // sDone size
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index bee44777a6..3c38a221b6 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -576,7 +576,6 @@ struct ObjectiveTestParameters {
   uint64_t seed;
   int n_bins;
   int n_classes;
-  double min_impurity_decrease;
   int min_samples_leaf;
   double tolerance;
 };
@@ -647,9 +646,9 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
                               (n_right / n) * right_phd);  // gain in long form without proxy
 
     // edge cases
-    if (gain <= params.min_impurity_decrease or n_left < params.min_samples_leaf or
-        n_right < params.min_samples_leaf or label_sum < ObjectiveT::eps_ or
-        label_sum_right < ObjectiveT::eps_ or label_sum_left < ObjectiveT::eps_)
+    if (n_left < params.min_samples_leaf or n_right < params.min_samples_leaf or
+        label_sum < ObjectiveT::eps_ or label_sum_right < ObjectiveT::eps_ or
+        label_sum_left < ObjectiveT::eps_)
       return -std::numeric_limits<DataT>::max();
     else
       return gain;
@@ -690,8 +689,7 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     auto gain = parent_gini - ((left_n / n) * left_gini + (right_n / n) * right_gini);
 
     // edge cases
-    if (gain <= params.min_impurity_decrease or left_n < params.min_samples_leaf or
-        right_n < params.min_samples_leaf) {
+    if (left_n < params.min_samples_leaf or right_n < params.min_samples_leaf) {
       return -std::numeric_limits<DataT>::max();
     } else {
       return gain;
@@ -731,7 +729,7 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
   {
     srand(params.seed);
     params = ::testing::TestWithParam<ObjectiveTestParameters>::GetParam();
-    ObjectiveT objective(params.n_classes, params.min_impurity_decrease, params.min_samples_leaf);
+    ObjectiveT objective(params.n_classes, params.min_samples_leaf);
 
     auto [cdf_hist, pdf_hist] = GenHist();
 
@@ -749,16 +747,16 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
 };
 
 const std::vector<ObjectiveTestParameters> poisson_objective_test_parameters = {
-  {9507819643927052255LLU, 64, 1, 0.0001, 0, 0.00001},
-  {9507819643927052259LLU, 128, 1, 0.0001, 1, 0.00001},
-  {9507819643927052251LLU, 256, 1, 0.0001, 1, 0.00001},
-  {9507819643927052258LLU, 512, 1, 0.0001, 5, 0.00001},
+  {9507819643927052255LLU, 64, 1, 0, 0.00001},
+  {9507819643927052259LLU, 128, 1, 1, 0.00001},
+  {9507819643927052251LLU, 256, 1, 1, 0.00001},
+  {9507819643927052258LLU, 512, 1, 5, 0.00001},
 };
 const std::vector<ObjectiveTestParameters> gini_objective_test_parameters = {
-  {9507819643927052255LLU, 64, 2, 0.0001, 0, 0.00001},
-  {9507819643927052256LLU, 128, 10, 0.0001, 1, 0.00001},
-  {9507819643927052257LLU, 256, 100, 0.0001, 1, 0.00001},
-  {9507819643927052258LLU, 512, 100, 0.0001, 5, 0.00001},
+  {9507819643927052255LLU, 64, 2, 0, 0.00001},
+  {9507819643927052256LLU, 128, 10, 1, 0.00001},
+  {9507819643927052257LLU, 256, 100, 1, 0.00001},
+  {9507819643927052258LLU, 512, 100, 5, 0.00001},
 };
 
 // poisson objective test

From fd42fb78a344085644f8f430f9d56c7b1ca3d71c Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Fri, 17 Sep 2021 17:02:44 +0530
Subject: [PATCH 22/42] fix accuracy bug and dask docstring duplication

---
 cpp/src/decisiontree/batched-levelalgo/metrics.cuh  | 12 ++++++------
 python/cuml/dask/ensemble/randomforestclassifier.py | 10 +++-------
 python/cuml/dask/ensemble/randomforestregressor.py  |  4 ++--
 3 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index 92146992b7..e87e6b3627 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -334,12 +334,12 @@ class MSEObjectiveFunction {
     if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
       return -std::numeric_limits<DataT>::max();
     } else {
-      auto label_sum       = hist[nbins - 1].label_sum;
-      auto parent_obj      = -label_sum * label_sum / len;
-      auto left_obj        = -(hist[i].label_sum * hist[i].label_sum) / nLeft;
-      auto right_label_sum = hist[i].label_sum - label_sum;
-      auto right_obj       = -(right_label_sum * right_label_sum) / nRight;
-      gain                 = parent_obj - (left_obj + right_obj);
+      auto label_sum        = hist[nbins - 1].label_sum;
+      DataT parent_obj      = -label_sum * label_sum / len;
+      DataT left_obj        = -(hist[i].label_sum * hist[i].label_sum) / nLeft;
+      DataT right_label_sum = hist[i].label_sum - label_sum;
+      DataT right_obj       = -(right_label_sum * right_label_sum) / nRight;
+      gain                  = parent_obj - (left_obj + right_obj);
       gain *= invLen;
 
       return gain;
diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py
index 692d9e3a0e..7e57301818 100755
--- a/python/cuml/dask/ensemble/randomforestclassifier.py
+++ b/python/cuml/dask/ensemble/randomforestclassifier.py
@@ -74,16 +74,12 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
         run different models concurrently in different streams by creating
         handles in several streams.
         If it is None, a new one is created.
-    split_criterion : The criterion used to split nodes.
-        0 for GINI, 1 for ENTROPY, 4 for CRITERION_END.
+    split_criterion : int (default = 2)
+        The criterion used to split nodes.
+        0 for GINI, 1 for ENTROPY, 5 for CRITERION_END.
         2 and 3 not valid for classification
-        (default = 0)
     split_algo : 0 for HIST and 1 for GLOBAL_QUANTILE (default = 1)
         the algorithm to determine how nodes are split in the tree.
-    split_criterion : The criterion used to split nodes.
-        0 for GINI, 1 for ENTROPY, 4 for CRITERION_END.
-        2 and 3 not valid for classification
-        (default = 0)
     bootstrap : boolean (default = True)
         Control bootstrapping.
         If set, each tree in the forest is built
diff --git a/python/cuml/dask/ensemble/randomforestregressor.py b/python/cuml/dask/ensemble/randomforestregressor.py
index c2521d21c7..caf909dc7e 100755
--- a/python/cuml/dask/ensemble/randomforestregressor.py
+++ b/python/cuml/dask/ensemble/randomforestregressor.py
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 from cuml.dask.common.base import DelayedPredictionMixin
 from cuml.ensemble import RandomForestRegressor as cuRFR
 from cuml.dask.ensemble.base import \
@@ -74,7 +73,8 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
     split_criterion : int (default = 2)
         The criterion used to split nodes.
         0 for GINI, 1 for ENTROPY,
-        2 for MSE, 3 for MAE and 4 for POISSON
+        2 for MSE, 3 for MAE, 4 for POISSON,
+        and 5 for CRITERION_END
         0 and 1 not valid for regression
     bootstrap : boolean (default = True)
         Control bootstrapping.

From a31512ddc8bd6d105e5cf14b0ccb0fcb2ddc9d00 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Fri, 17 Sep 2021 17:17:07 +0530
Subject: [PATCH 23/42] fix doctring slip

---
 python/cuml/dask/ensemble/randomforestclassifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py
index 7e57301818..9f0166561e 100755
--- a/python/cuml/dask/ensemble/randomforestclassifier.py
+++ b/python/cuml/dask/ensemble/randomforestclassifier.py
@@ -74,7 +74,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
         run different models concurrently in different streams by creating
         handles in several streams.
         If it is None, a new one is created.
-    split_criterion : int (default = 2)
+    split_criterion : int (default = 0)
         The criterion used to split nodes.
         0 for GINI, 1 for ENTROPY, 5 for CRITERION_END.
         2 and 3 not valid for classification

From 493f847da3aecda97e378480d1cb39a10d3f37b6 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Fri, 17 Sep 2021 22:06:27 +0530
Subject: [PATCH 24/42] merge resolution

---
 cpp/src/decisiontree/batched-levelalgo/metrics.cuh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index 3dbd7809fb..fdcf8c18df 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -304,9 +304,11 @@ class PoissonObjectiveFunction {
     return sp;
   }
 
-  static DI LabelT LeafPrediction(BinT const* shist, int nclasses)
+  static DI void SetLeafVector(BinT const* shist, int nclasses, DataT* out)
   {
-    return shist[0].label_sum / shist[0].count;
+    for (int i = 0; i < nclasses; i++) {
+      out[i] = shist[i].label_sum / shist[i].count;
+    }
   }
 };
 template <typename DataT_, typename LabelT_, typename IdxT_>

From aec9d261902f5e0b52bc28ab059191f1c144e93d Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 20 Sep 2021 18:28:40 +0530
Subject: [PATCH 25/42] merge with poisson branch

---
 .../batched-levelalgo/metrics.cuh             | 221 ++++++++++++------
 cpp/test/sg/rf_test.cu                        |  14 ++
 2 files changed, 169 insertions(+), 66 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index fdcf8c18df..610490072c 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -236,65 +236,33 @@ class EntropyObjectiveFunction {
   }
 };
 
+/** @brief The abstract base class for the tweedie family of objective functions:
+ * mean-squared-error(p=0), poisson(p=1), gamma(p=2) and inverse gaussian(p=3)
+ **/
 template <typename DataT_, typename LabelT_, typename IdxT_>
-class PoissonObjectiveFunction {
+class TweedieObjectiveFunction {
+
  public:
   using DataT  = DataT_;
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
+  using BinT                 = AggregateBin;
 
- private:
+ protected:
   IdxT min_samples_leaf;
 
  public:
-  using BinT                 = AggregateBin;
-  static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
 
-  HDI PoissonObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
+  HDI TweedieObjectiveFunction(IdxT min_samples_leaf)
     : min_samples_leaf(min_samples_leaf)
   {
   }
-  DI IdxT NumClasses() const { return 1; }
-
-  /**
-   * @brief compute the poisson impurity reduction (or purity gain) for each split
-   *
-   * @note This method is used to speed up the search for the best split
-   *       by calculating the gain using a proxy poisson half deviance reduction.
-   *       It is a proxy quantity such that the split that maximizes this value
-   *       also maximizes the impurity improvement. It neglects all constant terms
-   *       of the impurity decrease for a given split.
-   *       The Gain is the difference in the proxy impurities of the parent and the
-   *       weighted sum of impurities of its children.
-   */
-  HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
-  {
-    // get the lens'
-    auto nRight = len - nLeft;
-
-    // if there aren't enough samples in this split, don't bother!
-    if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
-      return -std::numeric_limits<DataT>::max();
-
-    auto label_sum       = hist[nbins - 1].label_sum;
-    auto left_label_sum  = (hist[i].label_sum);
-    auto right_label_sum = (hist[nbins - 1].label_sum - hist[i].label_sum);
 
-    // label sum cannot be non-positive
-    if (label_sum < eps_ || left_label_sum < eps_ || right_label_sum < eps_)
-      return -std::numeric_limits<DataT>::max();
+  DI IdxT NumClasses() const { return 1; }
 
-    // compute the gain to be
-    DataT parent_obj = -label_sum * raft::myLog(label_sum / len);
-    DataT left_obj   = -left_label_sum * raft::myLog(left_label_sum / nLeft);
-    DataT right_obj  = -right_label_sum * raft::myLog(right_label_sum / nRight);
-    auto gain        = parent_obj - (left_obj + right_obj);
-    gain             = gain / len;
+  HDI virtual DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const = 0;
 
-    return gain;
-  }
-
-  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins)
+  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
   {
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
@@ -310,32 +278,29 @@ class PoissonObjectiveFunction {
       out[i] = shist[i].label_sum / shist[i].count;
     }
   }
+
 };
 template <typename DataT_, typename LabelT_, typename IdxT_>
-class MSEObjectiveFunction {
+class MSEObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
  public:
   using DataT  = DataT_;
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
-
- private:
-  IdxT min_samples_leaf;
-
- public:
+  // using BinT   = typename TweedieObjectiveFunction<DataT_, LabelT_, IdxT_>::BinT;
   using BinT = AggregateBin;
+
   HDI MSEObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
-    : min_samples_leaf(min_samples_leaf)
+    : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
   {
   }
-  DI IdxT NumClasses() const { return 1; }
 
-  HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
+  HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
   {
     auto gain{DataT(0)};
     auto nRight{len - nLeft};
     auto invLen{DataT(1.0) / len};
     // if there aren't enough samples in this split, don't bother!
-    if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
+    if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf) {
       return -std::numeric_limits<DataT>::max();
     } else {
       auto label_sum        = hist[nbins - 1].label_sum;
@@ -349,24 +314,148 @@ class MSEObjectiveFunction {
       return gain;
     }
   }
+};
 
-  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins)
+template <typename DataT_, typename LabelT_, typename IdxT_>
+class PoissonObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
+ public:
+  using DataT  = DataT_;
+  using LabelT = LabelT_;
+  using IdxT   = IdxT_;
+  // using BinT   = typename TweedieObjectiveFunction<DataT_, LabelT_, IdxT_>::BinT;
+  using BinT = AggregateBin;
+
+  static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
+
+  HDI PoissonObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
+    : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
   {
-    Split<DataT, IdxT> sp;
-    for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
-      auto nLeft = shist[i].count;
-      sp.update({sbins[i], col, GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
-    }
-    return sp;
   }
 
-  static DI void SetLeafVector(BinT const* shist, int nclasses, DataT* out)
+  /**
+   * @brief compute the poisson impurity reduction (or purity gain) for each split
+   *
+   * @note This method is used to speed up the search for the best split
+   *       by calculating the gain using a proxy poisson half deviance reduction.
+   *       It is a proxy quantity such that the split that maximizes this value
+   *       also maximizes the impurity improvement. It neglects all constant terms
+   *       of the impurity decrease for a given split.
+   *       The Gain is the difference in the proxy impurities of the parent and the
+   *       weighted sum of impurities of its children.
+   */
+  HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
   {
-    for (int i = 0; i < nclasses; i++) {
-      out[i] = shist[i].label_sum / shist[i].count;
-    }
+    // get the lens'
+    auto nRight = len - nLeft;
+
+    // if there aren't enough samples in this split, don't bother!
+    if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
+      return -std::numeric_limits<DataT>::max();
+
+    auto label_sum       = hist[nbins - 1].label_sum;
+    auto left_label_sum  = (hist[i].label_sum);
+    auto right_label_sum = (hist[nbins - 1].label_sum - hist[i].label_sum);
+
+    // label sum cannot be non-positive
+    if (label_sum < eps_ || left_label_sum < eps_ || right_label_sum < eps_)
+      return -std::numeric_limits<DataT>::max();
+
+    // compute the gain to be
+    DataT parent_obj = -label_sum * raft::myLog(label_sum / len);
+    DataT left_obj   = -left_label_sum * raft::myLog(left_label_sum / nLeft);
+    DataT right_obj  = -right_label_sum * raft::myLog(right_label_sum / nRight);
+    auto gain        = parent_obj - (left_obj + right_obj);
+    gain             = gain / len;
+
+    return gain;
   }
+
 };
 
-}  // namespace DT
-}  // namespace ML
+// template <typename DataT_, typename LabelT_, typename IdxT_>
+// class GammaObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
+//  public:
+//   using DataT  = DataT_;
+//   using LabelT = LabelT_;
+//   using IdxT   = IdxT_;
+//   // using BinT   = typename TweedieObjectiveFunction<DataT_, LabelT_, IdxT_>::BinT;
+//   using BinT = AggregateBin;
+//   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
+
+//   HDI GammaObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
+//     : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
+//   {
+//   }
+
+//   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
+//   {
+//     // get the lens'
+//     auto nRight = len - nLeft;
+
+//     // if there aren't enough samples in this split, don't bother!
+//     if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
+//       return -std::numeric_limits<DataT>::max();
+
+//     auto label_sum       = hist[nbins - 1].label_sum;
+//     auto left_label_sum  = (hist[i].label_sum);
+//     auto right_label_sum = (hist[nbins - 1].label_sum - hist[i].label_sum);
+
+//     // label sum cannot be non-positive
+//     if (label_sum < eps_ || left_label_sum < eps_ || right_label_sum < eps_)
+//       return -std::numeric_limits<DataT>::max();
+
+//     // compute the gain to be
+//     DataT parent_obj = len * raft::myLog(label_sum / len);
+//     DataT left_obj   = nLeft * raft::myLog(left_label_sum / nLeft);
+//     DataT right_obj  = nRight * raft::myLog(right_label_sum / nRight);
+//     auto gain        = parent_obj - (left_obj + right_obj);
+//     gain             = gain / len;
+
+//     return gain;
+//   }
+// };
+
+// template <typename DataT_, typename LabelT_, typename IdxT_>
+// class InverseGaussianObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
+//  public:
+//   using DataT  = DataT_;
+//   using LabelT = LabelT_;
+//   using IdxT   = IdxT_;
+//   // using BinT   = typename TweedieObjectiveFunction<DataT_, LabelT_, IdxT_>::BinT;
+//   using BinT = AggregateBin;
+//   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
+
+//   HDI InverseGaussianObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
+//     : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
+//   {
+//   }
+
+//   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
+//   {
+//     // get the lens'
+//     auto nRight = len - nLeft;
+
+//     // if there aren't enough samples in this split, don't bother!
+//     if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
+//       return -std::numeric_limits<DataT>::max();
+
+//     auto label_sum       = hist[nbins - 1].label_sum;
+//     auto left_label_sum  = (hist[i].label_sum);
+//     auto right_label_sum = (hist[nbins - 1].label_sum - hist[i].label_sum);
+
+//     // label sum cannot be non-positive
+//     if (label_sum < eps_ || left_label_sum < eps_ || right_label_sum < eps_)
+//       return -std::numeric_limits<DataT>::max();
+
+//     // compute the gain to be
+//     DataT parent_obj = len * raft::myLog(label_sum / len);
+//     DataT left_obj   = nLeft * raft::myLog(left_label_sum / nLeft);
+//     DataT right_obj  = nRight * raft::myLog(right_label_sum / nRight);
+//     auto gain        = parent_obj - (left_obj + right_obj);
+//     gain             = gain / len;
+
+//     return gain;
+//   }
+// };
+}  // end namespace DT
+}  // end namespace ML
\ No newline at end of file
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index 4f20085a3d..4010bd0697 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <cuml/common/logger.hpp>
 #include <test_utils.h>
 
 #include <decisiontree/batched-levelalgo/kernels.cuh>
@@ -289,23 +290,29 @@ class RfSpecialisedTest {
   // Current model should be at least as accurate as a model with depth - 1
   void TestAccuracyImprovement()
   {
+    CUML_LOG_TRACE("inside test accuracy improvement: %d", __LINE__);
     if (params.max_depth <= 1) { return; }
     // avereraging between models can introduce variance
     if (params.n_trees > 1) { return; }
     // accuracy is not guaranteed to improve with bootstrapping
     if (params.bootstrap) { return; }
+    CUML_LOG_TRACE("%d", __LINE__);
     raft::handle_t handle(params.n_streams);
     RfTestParams alt_params = params;
     alt_params.max_depth--;
+    CUML_LOG_TRACE("%d", __LINE__);
     auto [alt_forest, alt_predictions, alt_metrics] =
       TrainScore(handle, alt_params, X.data().get(), X_transpose.data().get(), y.data().get());
+    CUML_LOG_TRACE("%d", __LINE__);
     double eps = 1e-8;
     if (params.split_criterion == MSE) {
       EXPECT_LE(training_metrics.mean_squared_error, alt_metrics.mean_squared_error + eps);
     } else if (params.split_criterion == MAE) {
       EXPECT_LE(training_metrics.mean_abs_error, alt_metrics.mean_abs_error + eps);
     } else {
+      CUML_LOG_TRACE("%d", __LINE__);
       EXPECT_GE(training_metrics.accuracy, alt_metrics.accuracy);
+      CUML_LOG_TRACE("%d", __LINE__);
     }
   }
   // Regularisation parameters are working correctly
@@ -428,12 +435,19 @@ class RfSpecialisedTest {
   }
   void Test()
   {
+    CUML_LOG_TRACE("inside test");
     TestAccuracyImprovement();
+    CUML_LOG_TRACE("%d", __LINE__);
     TestDeterminism();
+    CUML_LOG_TRACE("%d", __LINE__);
     TestMinImpurity();
+    CUML_LOG_TRACE("%d", __LINE__);
     TestTreeSize();
+    CUML_LOG_TRACE("%d", __LINE__);
     TestInstanceCounts();
+    CUML_LOG_TRACE("%d", __LINE__);
     TestFilPredict();
+    CUML_LOG_TRACE("%d", __LINE__);
   }
 
   RF_metrics training_metrics;

From db09e0f63b49e2fa25aa616e862c66e1f69ac035 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Tue, 21 Sep 2021 21:32:06 +0530
Subject: [PATCH 26/42] add tweedie losses

---
 cpp/include/cuml/tree/algo_helper.h           |   2 +
 .../batched-levelalgo/metrics.cuh             | 229 ++++++++++--------
 cpp/src/decisiontree/decisiontree.cuh         |  26 ++
 cpp/test/sg/rf_test.cu                        | 145 +++++++++--
 4 files changed, 288 insertions(+), 114 deletions(-)

diff --git a/cpp/include/cuml/tree/algo_helper.h b/cpp/include/cuml/tree/algo_helper.h
index ae7aa9b9d1..483f936118 100644
--- a/cpp/include/cuml/tree/algo_helper.h
+++ b/cpp/include/cuml/tree/algo_helper.h
@@ -23,6 +23,8 @@ enum CRITERION {
   MSE,
   MAE,
   POISSON,
+  GAMMA,
+  INVERSE_GAUSSIAN,
   CRITERION_END,
 };
 
diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index 610490072c..b92459a081 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -246,13 +246,12 @@ class TweedieObjectiveFunction {
   using DataT  = DataT_;
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
-  using BinT                 = AggregateBin;
+  using BinT   = AggregateBin;
 
  protected:
   IdxT min_samples_leaf;
 
  public:
-
   HDI TweedieObjectiveFunction(IdxT min_samples_leaf)
     : min_samples_leaf(min_samples_leaf)
   {
@@ -260,17 +259,6 @@ class TweedieObjectiveFunction {
 
   DI IdxT NumClasses() const { return 1; }
 
-  HDI virtual DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const = 0;
-
-  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
-  {
-    Split<DataT, IdxT> sp;
-    for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
-      auto nLeft = shist[i].count;
-      sp.update({sbins[i], col, GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
-    }
-    return sp;
-  }
 
   static DI void SetLeafVector(BinT const* shist, int nclasses, DataT* out)
   {
@@ -280,6 +268,7 @@ class TweedieObjectiveFunction {
   }
 
 };
+
 template <typename DataT_, typename LabelT_, typename IdxT_>
 class MSEObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
  public:
@@ -314,6 +303,17 @@ class MSEObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, Id
       return gain;
     }
   }
+
+  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
+  {
+    Split<DataT, IdxT> sp;
+    for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
+      auto nLeft = shist[i].count;
+      sp.update({sbins[i], col, this->GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
+    }
+    return sp;
+  }
+
 };
 
 template <typename DataT_, typename LabelT_, typename IdxT_>
@@ -370,92 +370,125 @@ class PoissonObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_
     return gain;
   }
 
+  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
+  {
+    Split<DataT, IdxT> sp;
+    for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
+      auto nLeft = shist[i].count;
+      sp.update({sbins[i], col, this->GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
+    }
+    return sp;
+  }
+
+
 };
 
-// template <typename DataT_, typename LabelT_, typename IdxT_>
-// class GammaObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
-//  public:
-//   using DataT  = DataT_;
-//   using LabelT = LabelT_;
-//   using IdxT   = IdxT_;
-//   // using BinT   = typename TweedieObjectiveFunction<DataT_, LabelT_, IdxT_>::BinT;
-//   using BinT = AggregateBin;
-//   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
-
-//   HDI GammaObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
-//     : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
-//   {
-//   }
-
-//   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
-//   {
-//     // get the lens'
-//     auto nRight = len - nLeft;
-
-//     // if there aren't enough samples in this split, don't bother!
-//     if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
-//       return -std::numeric_limits<DataT>::max();
-
-//     auto label_sum       = hist[nbins - 1].label_sum;
-//     auto left_label_sum  = (hist[i].label_sum);
-//     auto right_label_sum = (hist[nbins - 1].label_sum - hist[i].label_sum);
-
-//     // label sum cannot be non-positive
-//     if (label_sum < eps_ || left_label_sum < eps_ || right_label_sum < eps_)
-//       return -std::numeric_limits<DataT>::max();
-
-//     // compute the gain to be
-//     DataT parent_obj = len * raft::myLog(label_sum / len);
-//     DataT left_obj   = nLeft * raft::myLog(left_label_sum / nLeft);
-//     DataT right_obj  = nRight * raft::myLog(right_label_sum / nRight);
-//     auto gain        = parent_obj - (left_obj + right_obj);
-//     gain             = gain / len;
-
-//     return gain;
-//   }
-// };
-
-// template <typename DataT_, typename LabelT_, typename IdxT_>
-// class InverseGaussianObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
-//  public:
-//   using DataT  = DataT_;
-//   using LabelT = LabelT_;
-//   using IdxT   = IdxT_;
-//   // using BinT   = typename TweedieObjectiveFunction<DataT_, LabelT_, IdxT_>::BinT;
-//   using BinT = AggregateBin;
-//   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
-
-//   HDI InverseGaussianObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
-//     : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
-//   {
-//   }
-
-//   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
-//   {
-//     // get the lens'
-//     auto nRight = len - nLeft;
-
-//     // if there aren't enough samples in this split, don't bother!
-//     if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
-//       return -std::numeric_limits<DataT>::max();
-
-//     auto label_sum       = hist[nbins - 1].label_sum;
-//     auto left_label_sum  = (hist[i].label_sum);
-//     auto right_label_sum = (hist[nbins - 1].label_sum - hist[i].label_sum);
-
-//     // label sum cannot be non-positive
-//     if (label_sum < eps_ || left_label_sum < eps_ || right_label_sum < eps_)
-//       return -std::numeric_limits<DataT>::max();
-
-//     // compute the gain to be
-//     DataT parent_obj = len * raft::myLog(label_sum / len);
-//     DataT left_obj   = nLeft * raft::myLog(left_label_sum / nLeft);
-//     DataT right_obj  = nRight * raft::myLog(right_label_sum / nRight);
-//     auto gain        = parent_obj - (left_obj + right_obj);
-//     gain             = gain / len;
-
-//     return gain;
-//   }
-// };
+template <typename DataT_, typename LabelT_, typename IdxT_>
+class GammaObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
+ public:
+  using DataT  = DataT_;
+  using LabelT = LabelT_;
+  using IdxT   = IdxT_;
+  using BinT = AggregateBin;
+  static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
+
+  HDI GammaObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
+    : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
+  {
+  }
+
+  HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
+  {
+    printf("inside GAMMA::GainPerSplit\n");
+    // get the lens'
+    IdxT nRight = len - nLeft;
+
+    // if there aren't enough samples in this split, don't bother!
+    if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
+      return -std::numeric_limits<DataT>::max();
+
+    DataT label_sum       = hist[nbins - 1].label_sum;
+    DataT left_label_sum  = (hist[i].label_sum);
+    DataT right_label_sum = (hist[nbins - 1].label_sum - hist[i].label_sum);
+
+    // label sum cannot be non-positive
+    if (label_sum < eps_ || left_label_sum < eps_ || right_label_sum < eps_)
+      return -std::numeric_limits<DataT>::max();
+
+    // compute the gain to be
+    DataT parent_obj = raft::myLog(label_sum / len);
+    printf("parent_obj: %f\n", parent_obj);
+    DataT left_obj   = (DataT(nLeft)/DataT(len)) * raft::myLog(left_label_sum / nLeft);
+    printf("left_obj: %f\n", left_obj);
+    DataT right_obj  = (DataT(nRight)/DataT(len)) * raft::myLog(right_label_sum / nRight);
+    printf("right_obj: %f\n", right_obj);
+    DataT gain        = parent_obj - (left_obj + right_obj);
+    // gain             = gain / DataT(len);
+
+    return gain;
+  }
+
+  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
+  {
+    Split<DataT, IdxT> sp;
+    for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
+      auto nLeft = shist[i].count;
+      sp.update({sbins[i], col, this->GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
+    }
+    return sp;
+  }
+};
+
+template <typename DataT_, typename LabelT_, typename IdxT_>
+class InverseGaussianObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
+ public:
+  using DataT  = DataT_;
+  using LabelT = LabelT_;
+  using IdxT   = IdxT_;
+  using BinT = AggregateBin;
+  static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
+
+  HDI InverseGaussianObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
+    : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
+  {
+  }
+
+  HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
+  {
+    // get the lens'
+    auto nRight = len - nLeft;
+
+    // if there aren't enough samples in this split, don't bother!
+    if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
+      return -std::numeric_limits<DataT>::max();
+
+    auto label_sum       = hist[nbins - 1].label_sum;
+    auto left_label_sum  = (hist[i].label_sum);
+    auto right_label_sum = (hist[nbins - 1].label_sum - hist[i].label_sum);
+
+    // label sum cannot be non-positive
+    if (label_sum < eps_ || left_label_sum < eps_ || right_label_sum < eps_)
+      return -std::numeric_limits<DataT>::max();
+
+    // compute the gain to be
+    DataT parent_obj =  DataT(len) * DataT(len) / label_sum;
+    DataT left_obj   =  DataT(nLeft) * DataT(nLeft) / left_label_sum;
+    DataT right_obj   = DataT(nRight) * DataT(nRight) / right_label_sum;
+    auto gain        = parent_obj - (left_obj + right_obj);
+    gain             = gain / len;
+
+    return gain;
+  }
+
+  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
+  {
+    Split<DataT, IdxT> sp;
+    for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
+      auto nLeft = shist[i].count;
+      sp.update({sbins[i], col, this->GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
+    }
+    return sp;
+  }
+};
 }  // end namespace DT
 }  // end namespace ML
\ No newline at end of file
diff --git a/cpp/src/decisiontree/decisiontree.cuh b/cpp/src/decisiontree/decisiontree.cuh
index e2284cc14a..33aeac0d75 100644
--- a/cpp/src/decisiontree/decisiontree.cuh
+++ b/cpp/src/decisiontree/decisiontree.cuh
@@ -303,6 +303,32 @@ class DecisionTree {
                                                                     unique_labels,
                                                                     quantiles)
         .train();
+    } else if (params.split_criterion == CRITERION::GAMMA) {
+      return Builder<GammaObjectiveFunction<DataT, LabelT, IdxT>>(handle,
+                                                                    treeid,
+                                                                    seed,
+                                                                    params,
+                                                                    data,
+                                                                    labels,
+                                                                    nrows,
+                                                                    ncols,
+                                                                    rowids,
+                                                                    unique_labels,
+                                                                    quantiles)
+        .train();
+    } else if (params.split_criterion == CRITERION::INVERSE_GAUSSIAN) {
+      return Builder<InverseGaussianObjectiveFunction<DataT, LabelT, IdxT>>(handle,
+                                                                    treeid,
+                                                                    seed,
+                                                                    params,
+                                                                    data,
+                                                                    labels,
+                                                                    nrows,
+                                                                    ncols,
+                                                                    rowids,
+                                                                    unique_labels,
+                                                                    quantiles)
+        .train();
     } else {
       ASSERT(false, "Unknown split criterion.");
     }
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index 4010bd0697..5472ffcd95 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #include <cuml/common/logger.hpp>
 #include <test_utils.h>
 
@@ -285,34 +284,29 @@ class RfSpecialisedTest {
     std::tie(forest, predictions, training_metrics) =
       TrainScore(handle, params, X.data().get(), X_transpose.data().get(), y.data().get());
 
+
     Test();
   }
   // Current model should be at least as accurate as a model with depth - 1
   void TestAccuracyImprovement()
   {
-    CUML_LOG_TRACE("inside test accuracy improvement: %d", __LINE__);
     if (params.max_depth <= 1) { return; }
     // avereraging between models can introduce variance
     if (params.n_trees > 1) { return; }
     // accuracy is not guaranteed to improve with bootstrapping
     if (params.bootstrap) { return; }
-    CUML_LOG_TRACE("%d", __LINE__);
     raft::handle_t handle(params.n_streams);
     RfTestParams alt_params = params;
     alt_params.max_depth--;
-    CUML_LOG_TRACE("%d", __LINE__);
     auto [alt_forest, alt_predictions, alt_metrics] =
       TrainScore(handle, alt_params, X.data().get(), X_transpose.data().get(), y.data().get());
-    CUML_LOG_TRACE("%d", __LINE__);
     double eps = 1e-8;
     if (params.split_criterion == MSE) {
       EXPECT_LE(training_metrics.mean_squared_error, alt_metrics.mean_squared_error + eps);
     } else if (params.split_criterion == MAE) {
       EXPECT_LE(training_metrics.mean_abs_error, alt_metrics.mean_abs_error + eps);
     } else {
-      CUML_LOG_TRACE("%d", __LINE__);
       EXPECT_GE(training_metrics.accuracy, alt_metrics.accuracy);
-      CUML_LOG_TRACE("%d", __LINE__);
     }
   }
   // Regularisation parameters are working correctly
@@ -435,19 +429,12 @@ class RfSpecialisedTest {
   }
   void Test()
   {
-    CUML_LOG_TRACE("inside test");
     TestAccuracyImprovement();
-    CUML_LOG_TRACE("%d", __LINE__);
     TestDeterminism();
-    CUML_LOG_TRACE("%d", __LINE__);
     TestMinImpurity();
-    CUML_LOG_TRACE("%d", __LINE__);
     TestTreeSize();
-    CUML_LOG_TRACE("%d", __LINE__);
     TestInstanceCounts();
-    CUML_LOG_TRACE("%d", __LINE__);
     TestFilPredict();
-    CUML_LOG_TRACE("%d", __LINE__);
   }
 
   RF_metrics training_metrics;
@@ -500,7 +487,8 @@ std::vector<int> min_samples_split       = {2, 10};
 std::vector<float> min_impurity_decrease = {0.0f, 1.0f, 10.0f};
 std::vector<int> n_streams               = {1, 2, 10};
 std::vector<CRITERION> split_criterion   = {
-  CRITERION::POISSON, CRITERION::MSE, CRITERION::GINI, CRITERION::ENTROPY};
+  // CRITERION::POISSON,
+  CRITERION::MSE, CRITERION::GINI, CRITERION::ENTROPY};
 std::vector<int> seed              = {0, 17};
 std::vector<int> n_labels          = {2, 10, 20};
 std::vector<bool> double_precision = {false, true};
@@ -696,7 +684,12 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
   ObjectiveTestParameters params;
 
  public:
-  auto RandUnder(int const end = 10000) { return rand() % end; }
+  auto RandUnder(int const end = 100000) { return rand() % end; }
+
+  auto GenSortedData()
+  {
+
+  }
 
   auto GenHist()
   {
@@ -720,6 +713,91 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     return std::make_pair(cdf_hist, pdf_hist);
   }
 
+  auto InverseGaussianHalfDeviance(
+    std::vector<BinT> const& hist)  //  1/n * 2 * sum((y - y_pred) * (y - y_pred)/(y * (y_pred) * (y_pred)))
+  {
+    BinT aggregate{BinT()};
+    aggregate = std::accumulate(hist.begin(), hist.end(), aggregate);
+    assert(aggregate.count > 0);
+    DataT const y_mean = aggregate.label_sum / aggregate.count;
+    auto ighd{DataT(0.0)}; // ighd: inverse gaussian half deviance
+
+    std::for_each(hist.begin(), hist.end(), [&](BinT const& h) {
+      ighd += (h.label_sum - y_mean) * (h.label_sum - y_mean) / (h.label_sum * y_mean * y_mean); // unit deviance
+    });
+
+    ighd /= aggregate.count;
+    return std::make_tuple(
+      ighd, aggregate.label_sum, static_cast<DataT>(aggregate.count));
+  }
+
+  auto InverseGaussianGroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t split_bin_index)
+  {
+    std::vector<BinT> left_pdf_hist{pdf_hist.begin(), pdf_hist.begin() + split_bin_index + 1};
+    std::vector<BinT> right_pdf_hist{pdf_hist.begin() + split_bin_index + 1, pdf_hist.end()};
+
+    auto [parent_ighd, label_sum, n]            = InverseGaussianHalfDeviance(pdf_hist);
+    auto [left_ighd, label_sum_left, n_left]    = InverseGaussianHalfDeviance(left_pdf_hist);
+    auto [right_ighd, label_sum_right, n_right] = InverseGaussianHalfDeviance(right_pdf_hist);
+
+
+    auto gain = parent_ighd - ((n_left / n) * left_ighd +  // the minimizing objective function is half deviance
+                              (n_right / n) * right_ighd);  // gain in long form without proxy
+
+    // edge cases
+    if (n_left < params.min_samples_leaf or n_right < params.min_samples_leaf or
+        label_sum < ObjectiveT::eps_ or label_sum_right < ObjectiveT::eps_ or
+        label_sum_left < ObjectiveT::eps_)
+      return -std::numeric_limits<DataT>::max();
+    else
+      return gain;
+  }
+
+  auto GammaHalfDeviance(
+    std::vector<BinT> const& hist)  //  1/n * 2 * sum(log(y_pred/y_true) + y_true/y_pred - 1)
+  {
+    BinT aggregate{BinT()};
+    aggregate = std::accumulate(hist.begin(), hist.end(), aggregate);
+    assert(aggregate.count > 0);
+    DataT const y_mean = aggregate.label_sum / aggregate.count;
+    auto mean_gamma_deviance{DataT(0.0)};
+
+    std::for_each(hist.begin(), hist.end(), [&](BinT const& h) {
+      auto log_y = raft::myLog(h.label_sum ? h.label_sum : DataT(1.0));  // we don't want nans
+      mean_gamma_deviance += h.count*raft::myLog(y_mean) - log_y + h.label_sum/y_mean - DataT(1); // InvGauss formula for each bin
+    });
+
+    mean_gamma_deviance /= aggregate.count;
+    // mean_gamma_deviance = raft::myLog(y_mean);
+    return std::make_tuple(
+      mean_gamma_deviance, aggregate.label_sum, static_cast<DataT>(aggregate.count));
+  }
+
+  auto GammaGroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t split_bin_index)
+  {
+    std::vector<BinT> left_pdf_hist{pdf_hist.begin(), pdf_hist.begin() + split_bin_index + 1};
+    std::vector<BinT> right_pdf_hist{pdf_hist.begin() + split_bin_index + 1, pdf_hist.end()};
+
+    auto [parent_ghd, label_sum, n]            = GammaHalfDeviance(pdf_hist);
+    auto [left_ghd, label_sum_left, n_left]    = GammaHalfDeviance(left_pdf_hist);
+    auto [right_ghd, label_sum_right, n_right] = GammaHalfDeviance(right_pdf_hist);
+
+
+    auto gain = parent_ghd - ((n_left / n) * left_ghd +  // the minimizing objective function is half deviance
+                              (n_right / n) * right_ghd);  // gain in long form without proxy
+    // DataT gain = n * parent_ghd - (n_left * left_ghd + n_right * right_ghd);
+    // gain = gain / n;
+
+    // edge cases
+    if (n_left < params.min_samples_leaf or n_right < params.min_samples_leaf or
+        label_sum < ObjectiveT::eps_ or label_sum_right < ObjectiveT::eps_ or
+        label_sum_left < ObjectiveT::eps_)
+      return -std::numeric_limits<DataT>::max();
+    else
+      return gain;
+  }
+
+
   auto PoissonHalfDeviance(
     std::vector<BinT> const& hist)  //  1/n * sum(y_true * log(y_true/y_pred) + y_pred - y_true)
   {
@@ -808,6 +886,14 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
                                PoissonObjectiveFunction<DataT, LabelT, IdxT>>::value)  // poisson
     {
       return PoissonGroundTruthGain(pdf_hist, split_bin_index);
+    } else if constexpr (std::is_same<ObjectiveT,
+                                      GammaObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
+    {
+      return GammaGroundTruthGain(pdf_hist, split_bin_index);
+    } else if constexpr (std::is_same<ObjectiveT,
+                                      InverseGaussianObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
+    {
+      return InverseGaussianGroundTruthGain(pdf_hist, split_bin_index);
     } else if constexpr (std::is_same<ObjectiveT,
                                       GiniObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
     {
@@ -858,6 +944,21 @@ const std::vector<ObjectiveTestParameters> poisson_objective_test_parameters = {
   {9507819643927052251LLU, 256, 1, 1, 0.00001},
   {9507819643927052258LLU, 512, 1, 5, 0.00001},
 };
+
+const std::vector<ObjectiveTestParameters> gamma_objective_test_parameters = {
+  {9507819643927052255LLU, 64, 1, 0, 0.00001},
+  {9507819643927052259LLU, 128, 1, 1, 0.00001},
+  {9507819643927052251LLU, 256, 1, 1, 0.00001},
+  {9507819643927052258LLU, 512, 1, 5, 0.00001},
+};
+
+const std::vector<ObjectiveTestParameters> invgauss_objective_test_parameters = {
+  {9507819643927052255LLU, 64, 1, 0, 0.00001},
+  {9507819643927052259LLU, 128, 1, 1, 0.00001},
+  {9507819643927052251LLU, 256, 1, 1, 0.00001},
+  {9507819643927052258LLU, 512, 1, 5, 0.00001},
+};
+
 const std::vector<ObjectiveTestParameters> gini_objective_test_parameters = {
   {9507819643927052255LLU, 64, 2, 0, 0.00001},
   {9507819643927052256LLU, 128, 10, 1, 0.00001},
@@ -871,6 +972,18 @@ TEST_P(PoissonObjectiveTestD, poissonObjectiveTest) {}
 INSTANTIATE_TEST_CASE_P(RfTests,
                         PoissonObjectiveTestD,
                         ::testing::ValuesIn(poisson_objective_test_parameters));
+// gamma objective test
+typedef ObjectiveTest<GammaObjectiveFunction<double, double, int>> GammaObjectiveTestD;
+TEST_P(GammaObjectiveTestD, GammaObjectiveTest) {}
+INSTANTIATE_TEST_CASE_P(RfTests,
+                        GammaObjectiveTestD,
+                        ::testing::ValuesIn(gamma_objective_test_parameters));
+// InvGauss objective test
+typedef ObjectiveTest<InverseGaussianObjectiveFunction<double, double, int>> InverseGaussianObjectiveTestD;
+TEST_P(InverseGaussianObjectiveTestD, InverseGaussianObjectiveTest) {}
+INSTANTIATE_TEST_CASE_P(RfTests,
+                        InverseGaussianObjectiveTestD,
+                        ::testing::ValuesIn(invgauss_objective_test_parameters));
 
 // gini objective test
 typedef ObjectiveTest<GiniObjectiveFunction<double, int, int>> GiniObjectiveTestD;

From e63754a281cee2eec8c54a80b25f662b56d414a5 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Wed, 22 Sep 2021 22:30:33 +0530
Subject: [PATCH 27/42] refactor unit tests

---
 .../batched-levelalgo/metrics.cuh             |  55 ++--
 cpp/src/decisiontree/decisiontree.cuh         |  40 +--
 cpp/test/sg/rf_test.cu                        | 272 ++++++++++--------
 3 files changed, 188 insertions(+), 179 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index b92459a081..f9372a9a4f 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -241,7 +241,6 @@ class EntropyObjectiveFunction {
  **/
 template <typename DataT_, typename LabelT_, typename IdxT_>
 class TweedieObjectiveFunction {
-
  public:
   using DataT  = DataT_;
   using LabelT = LabelT_;
@@ -252,21 +251,16 @@ class TweedieObjectiveFunction {
   IdxT min_samples_leaf;
 
  public:
-  HDI TweedieObjectiveFunction(IdxT min_samples_leaf)
-    : min_samples_leaf(min_samples_leaf)
-  {
-  }
+  HDI TweedieObjectiveFunction(IdxT min_samples_leaf) : min_samples_leaf(min_samples_leaf) {}
 
   DI IdxT NumClasses() const { return 1; }
 
-
   static DI void SetLeafVector(BinT const* shist, int nclasses, DataT* out)
   {
     for (int i = 0; i < nclasses; i++) {
       out[i] = shist[i].label_sum / shist[i].count;
     }
   }
-
 };
 
 template <typename DataT_, typename LabelT_, typename IdxT_>
@@ -304,7 +298,8 @@ class MSEObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, Id
     }
   }
 
-  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
+  DI Split<DataT, IdxT> Gain(
+    BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
   {
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
@@ -313,7 +308,6 @@ class MSEObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, Id
     }
     return sp;
   }
-
 };
 
 template <typename DataT_, typename LabelT_, typename IdxT_>
@@ -370,7 +364,8 @@ class PoissonObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_
     return gain;
   }
 
-  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
+  DI Split<DataT, IdxT> Gain(
+    BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
   {
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
@@ -379,17 +374,15 @@ class PoissonObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_
     }
     return sp;
   }
-
-
 };
 
 template <typename DataT_, typename LabelT_, typename IdxT_>
 class GammaObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
  public:
-  using DataT  = DataT_;
-  using LabelT = LabelT_;
-  using IdxT   = IdxT_;
-  using BinT = AggregateBin;
+  using DataT                = DataT_;
+  using LabelT               = LabelT_;
+  using IdxT                 = IdxT_;
+  using BinT                 = AggregateBin;
   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
 
   HDI GammaObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
@@ -399,7 +392,6 @@ class GammaObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_,
 
   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
   {
-    printf("inside GAMMA::GainPerSplit\n");
     // get the lens'
     IdxT nRight = len - nLeft;
 
@@ -417,18 +409,16 @@ class GammaObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_,
 
     // compute the gain to be
     DataT parent_obj = raft::myLog(label_sum / len);
-    printf("parent_obj: %f\n", parent_obj);
-    DataT left_obj   = (DataT(nLeft)/DataT(len)) * raft::myLog(left_label_sum / nLeft);
-    printf("left_obj: %f\n", left_obj);
-    DataT right_obj  = (DataT(nRight)/DataT(len)) * raft::myLog(right_label_sum / nRight);
-    printf("right_obj: %f\n", right_obj);
-    DataT gain        = parent_obj - (left_obj + right_obj);
+    DataT left_obj   = (DataT(nLeft) / DataT(len)) * raft::myLog(left_label_sum / nLeft);
+    DataT right_obj  = (DataT(nRight) / DataT(len)) * raft::myLog(right_label_sum / nRight);
+    DataT gain       = parent_obj - (left_obj + right_obj);
     // gain             = gain / DataT(len);
 
     return gain;
   }
 
-  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
+  DI Split<DataT, IdxT> Gain(
+    BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
   {
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
@@ -442,10 +432,10 @@ class GammaObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_,
 template <typename DataT_, typename LabelT_, typename IdxT_>
 class InverseGaussianObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
  public:
-  using DataT  = DataT_;
-  using LabelT = LabelT_;
-  using IdxT   = IdxT_;
-  using BinT = AggregateBin;
+  using DataT                = DataT_;
+  using LabelT               = LabelT_;
+  using IdxT                 = IdxT_;
+  using BinT                 = AggregateBin;
   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
 
   HDI InverseGaussianObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
@@ -471,16 +461,17 @@ class InverseGaussianObjectiveFunction : public TweedieObjectiveFunction<DataT_,
       return -std::numeric_limits<DataT>::max();
 
     // compute the gain to be
-    DataT parent_obj =  DataT(len) * DataT(len) / label_sum;
-    DataT left_obj   =  DataT(nLeft) * DataT(nLeft) / left_label_sum;
-    DataT right_obj   = DataT(nRight) * DataT(nRight) / right_label_sum;
+    DataT parent_obj = -DataT(len) * DataT(len) / label_sum;
+    DataT left_obj   = -DataT(nLeft) * DataT(nLeft) / left_label_sum;
+    DataT right_obj  = -DataT(nRight) * DataT(nRight) / right_label_sum;
     auto gain        = parent_obj - (left_obj + right_obj);
     gain             = gain / len;
 
     return gain;
   }
 
-  DI Split<DataT, IdxT> Gain(BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
+  DI Split<DataT, IdxT> Gain(
+    BinT const* shist, DataT const* sbins, IdxT col, IdxT len, IdxT nbins) const
   {
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
diff --git a/cpp/src/decisiontree/decisiontree.cuh b/cpp/src/decisiontree/decisiontree.cuh
index 33aeac0d75..978839e975 100644
--- a/cpp/src/decisiontree/decisiontree.cuh
+++ b/cpp/src/decisiontree/decisiontree.cuh
@@ -305,29 +305,29 @@ class DecisionTree {
         .train();
     } else if (params.split_criterion == CRITERION::GAMMA) {
       return Builder<GammaObjectiveFunction<DataT, LabelT, IdxT>>(handle,
-                                                                    treeid,
-                                                                    seed,
-                                                                    params,
-                                                                    data,
-                                                                    labels,
-                                                                    nrows,
-                                                                    ncols,
-                                                                    rowids,
-                                                                    unique_labels,
-                                                                    quantiles)
+                                                                  treeid,
+                                                                  seed,
+                                                                  params,
+                                                                  data,
+                                                                  labels,
+                                                                  nrows,
+                                                                  ncols,
+                                                                  rowids,
+                                                                  unique_labels,
+                                                                  quantiles)
         .train();
     } else if (params.split_criterion == CRITERION::INVERSE_GAUSSIAN) {
       return Builder<InverseGaussianObjectiveFunction<DataT, LabelT, IdxT>>(handle,
-                                                                    treeid,
-                                                                    seed,
-                                                                    params,
-                                                                    data,
-                                                                    labels,
-                                                                    nrows,
-                                                                    ncols,
-                                                                    rowids,
-                                                                    unique_labels,
-                                                                    quantiles)
+                                                                            treeid,
+                                                                            seed,
+                                                                            params,
+                                                                            data,
+                                                                            labels,
+                                                                            nrows,
+                                                                            ncols,
+                                                                            rowids,
+                                                                            unique_labels,
+                                                                            quantiles)
         .train();
     } else {
       ASSERT(false, "Unknown split criterion.");
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index 5472ffcd95..89afe08eae 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -13,8 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <cuml/common/logger.hpp>
 #include <test_utils.h>
+#include <cuml/common/logger.hpp>
+#include <icecream.hpp>
 
 #include <decisiontree/batched-levelalgo/kernels.cuh>
 #include <decisiontree/batched-levelalgo/quantile.cuh>
@@ -284,7 +285,6 @@ class RfSpecialisedTest {
     std::tie(forest, predictions, training_metrics) =
       TrainScore(handle, params, X.data().get(), X_transpose.data().get(), y.data().get());
 
-
     Test();
   }
   // Current model should be at least as accurate as a model with depth - 1
@@ -488,7 +488,9 @@ std::vector<float> min_impurity_decrease = {0.0f, 1.0f, 10.0f};
 std::vector<int> n_streams               = {1, 2, 10};
 std::vector<CRITERION> split_criterion   = {
   // CRITERION::POISSON,
-  CRITERION::MSE, CRITERION::GINI, CRITERION::ENTROPY};
+  CRITERION::MSE,
+  CRITERION::GINI,
+  CRITERION::ENTROPY};
 std::vector<int> seed              = {0, 17};
 std::vector<int> n_labels          = {2, 10, 20};
 std::vector<bool> double_precision = {false, true};
@@ -668,6 +670,7 @@ namespace DT {
 
 struct ObjectiveTestParameters {
   uint64_t seed;
+  int n_rows;
   int n_bins;
   int n_classes;
   int min_samples_leaf;
@@ -686,26 +689,52 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
  public:
   auto RandUnder(int const end = 100000) { return rand() % end; }
 
-  auto GenSortedData()
+  auto GenRandomData()
   {
-
+    std::default_random_engine rng;
+    std::vector<DataT> data(params.n_rows);
+    if constexpr (std::is_same<BinT, CountBin>::value)  // classification case
+    {
+      for (auto& iter : data) {
+        iter = RandUnder(params.n_classes);
+      }
+    } else {
+      std::normal_distribution<DataT> normal(1.0, 2.0);
+      for (auto& iter : data) {
+        auto rand_element(DataT(0));
+        while (1) {
+          rand_element = normal(rng);
+          if (rand_element > 0) break;  // only positive random numbers
+        }
+        iter = rand_element;
+      }
+    }
+    return data;
   }
 
-  auto GenHist()
+  auto GenHist(std::vector<DataT> data)
   {
     std::vector<BinT> cdf_hist, pdf_hist;
 
     for (auto c = 0; c < params.n_classes; ++c) {
       for (auto b = 0; b < params.n_bins; ++b) {
-        if constexpr (std::is_same<BinT, CountBin>::value)
-          pdf_hist.emplace_back(RandUnder());
-        else
-          pdf_hist.emplace_back(static_cast<LabelT>(RandUnder()), RandUnder());
+        IdxT bin_width  = raft::ceildiv(params.n_rows, params.n_bins);
+        auto data_begin = data.begin() + b * bin_width;
+        auto data_end   = data_begin + bin_width;
+        if constexpr (std::is_same<BinT, CountBin>::value) {  // classification case
+          auto count(IdxT(0));
+          std::for_each(data_begin, data_end, [&](auto d) {
+            if (d == c) ++count;
+          });
+          pdf_hist.emplace_back(count);
+        } else {  // regression case
+          auto label_sum(DataT(0));
+          label_sum = std::accumulate(data_begin, data_end, DataT(0));
+          pdf_hist.emplace_back(label_sum, bin_width);
+        }
 
         auto cumulative = b > 0 ? cdf_hist.back() : BinT();
-
         cdf_hist.emplace_back(pdf_hist.empty() ? BinT() : pdf_hist.back());
-
         cdf_hist.back() += cumulative;
       }
     }
@@ -714,35 +743,34 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
   }
 
   auto InverseGaussianHalfDeviance(
-    std::vector<BinT> const& hist)  //  1/n * 2 * sum((y - y_pred) * (y - y_pred)/(y * (y_pred) * (y_pred)))
+    std::vector<DataT> const&
+      data)  //  1/n * 2 * sum((y - y_pred) * (y - y_pred)/(y * (y_pred) * (y_pred)))
   {
-    BinT aggregate{BinT()};
-    aggregate = std::accumulate(hist.begin(), hist.end(), aggregate);
-    assert(aggregate.count > 0);
-    DataT const y_mean = aggregate.label_sum / aggregate.count;
-    auto ighd{DataT(0.0)}; // ighd: inverse gaussian half deviance
-
-    std::for_each(hist.begin(), hist.end(), [&](BinT const& h) {
-      ighd += (h.label_sum - y_mean) * (h.label_sum - y_mean) / (h.label_sum * y_mean * y_mean); // unit deviance
+    DataT sum        = std::accumulate(data.begin(), data.end(), DataT(0));
+    DataT const mean = sum / data.size();
+    auto ighd{DataT(0.0)};  // ighd: inverse gaussian half deviance
+
+    std::for_each(data.begin(), data.end(), [&](auto d) {
+      ighd += (d - mean) * (d - mean) / (d * mean * mean);  // unit deviance
     });
 
-    ighd /= aggregate.count;
-    return std::make_tuple(
-      ighd, aggregate.label_sum, static_cast<DataT>(aggregate.count));
+    ighd /= data.size();
+    return std::make_tuple(ighd, sum, DataT(data.size()));
   }
 
-  auto InverseGaussianGroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t split_bin_index)
+  auto InverseGaussianGroundTruthGain(std::vector<DataT> const& data, std::size_t split_bin_index)
   {
-    std::vector<BinT> left_pdf_hist{pdf_hist.begin(), pdf_hist.begin() + split_bin_index + 1};
-    std::vector<BinT> right_pdf_hist{pdf_hist.begin() + split_bin_index + 1, pdf_hist.end()};
+    auto bin_width = raft::ceildiv(params.n_rows, params.n_bins);
+    std::vector<DataT> left_data(data.begin(), data.begin() + (split_bin_index + 1) * bin_width);
+    std::vector<DataT> right_data(data.begin() + (split_bin_index + 1) * bin_width, data.end());
 
-    auto [parent_ighd, label_sum, n]            = InverseGaussianHalfDeviance(pdf_hist);
-    auto [left_ighd, label_sum_left, n_left]    = InverseGaussianHalfDeviance(left_pdf_hist);
-    auto [right_ighd, label_sum_right, n_right] = InverseGaussianHalfDeviance(right_pdf_hist);
+    auto [parent_ighd, label_sum, n]            = InverseGaussianHalfDeviance(data);
+    auto [left_ighd, label_sum_left, n_left]    = InverseGaussianHalfDeviance(left_data);
+    auto [right_ighd, label_sum_right, n_right] = InverseGaussianHalfDeviance(right_data);
 
-
-    auto gain = parent_ighd - ((n_left / n) * left_ighd +  // the minimizing objective function is half deviance
-                              (n_right / n) * right_ighd);  // gain in long form without proxy
+    auto gain = parent_ighd -
+                ((n_left / n) * left_ighd +    // the minimizing objective function is half deviance
+                 (n_right / n) * right_ighd);  // gain in long form without proxy
 
     // edge cases
     if (n_left < params.min_samples_leaf or n_right < params.min_samples_leaf or
@@ -754,39 +782,35 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
   }
 
   auto GammaHalfDeviance(
-    std::vector<BinT> const& hist)  //  1/n * 2 * sum(log(y_pred/y_true) + y_true/y_pred - 1)
+    std::vector<DataT> const& data)  //  1/n * 2 * sum(log(y_pred/y_true) + y_true/y_pred - 1)
   {
-    BinT aggregate{BinT()};
-    aggregate = std::accumulate(hist.begin(), hist.end(), aggregate);
-    assert(aggregate.count > 0);
-    DataT const y_mean = aggregate.label_sum / aggregate.count;
-    auto mean_gamma_deviance{DataT(0.0)};
-
-    std::for_each(hist.begin(), hist.end(), [&](BinT const& h) {
-      auto log_y = raft::myLog(h.label_sum ? h.label_sum : DataT(1.0));  // we don't want nans
-      mean_gamma_deviance += h.count*raft::myLog(y_mean) - log_y + h.label_sum/y_mean - DataT(1); // InvGauss formula for each bin
+    DataT sum(0);
+    sum              = std::accumulate(data.begin(), data.end(), DataT(0));
+    DataT const mean = sum / data.size();
+    DataT ghd(0);  // gamma half deviance
+
+    std::for_each(data.begin(), data.end(), [&](auto& element) {
+      auto log_y = raft::myLog(element ? element : DataT(1.0));
+      ghd += raft::myLog(mean) - log_y + element / mean - 1;
     });
 
-    mean_gamma_deviance /= aggregate.count;
-    // mean_gamma_deviance = raft::myLog(y_mean);
-    return std::make_tuple(
-      mean_gamma_deviance, aggregate.label_sum, static_cast<DataT>(aggregate.count));
+    ghd /= data.size();
+    return std::make_tuple(ghd, sum, DataT(data.size()));
   }
 
-  auto GammaGroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t split_bin_index)
+  auto GammaGroundTruthGain(std::vector<DataT> const& data, std::size_t split_bin_index)
   {
-    std::vector<BinT> left_pdf_hist{pdf_hist.begin(), pdf_hist.begin() + split_bin_index + 1};
-    std::vector<BinT> right_pdf_hist{pdf_hist.begin() + split_bin_index + 1, pdf_hist.end()};
-
-    auto [parent_ghd, label_sum, n]            = GammaHalfDeviance(pdf_hist);
-    auto [left_ghd, label_sum_left, n_left]    = GammaHalfDeviance(left_pdf_hist);
-    auto [right_ghd, label_sum_right, n_right] = GammaHalfDeviance(right_pdf_hist);
+    auto bin_width = raft::ceildiv(params.n_rows, params.n_bins);
+    std::vector<DataT> left_data(data.begin(), data.begin() + (split_bin_index + 1) * bin_width);
+    std::vector<DataT> right_data(data.begin() + (split_bin_index + 1) * bin_width, data.end());
 
+    auto [parent_ghd, label_sum, n]            = GammaHalfDeviance(data);
+    auto [left_ghd, label_sum_left, n_left]    = GammaHalfDeviance(left_data);
+    auto [right_ghd, label_sum_right, n_right] = GammaHalfDeviance(right_data);
 
-    auto gain = parent_ghd - ((n_left / n) * left_ghd +  // the minimizing objective function is half deviance
-                              (n_right / n) * right_ghd);  // gain in long form without proxy
-    // DataT gain = n * parent_ghd - (n_left * left_ghd + n_right * right_ghd);
-    // gain = gain / n;
+    auto gain =
+      parent_ghd - ((n_left / n) * left_ghd +  // the minimizing objective function is half deviance
+                    (n_right / n) * right_ghd);  // gain in long form without proxy
 
     // edge cases
     if (n_left < params.min_samples_leaf or n_right < params.min_samples_leaf or
@@ -797,34 +821,31 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
       return gain;
   }
 
-
   auto PoissonHalfDeviance(
-    std::vector<BinT> const& hist)  //  1/n * sum(y_true * log(y_true/y_pred) + y_pred - y_true)
+    std::vector<DataT> const& data)  //  1/n * sum(y_true * log(y_true/y_pred) + y_pred - y_true)
   {
-    BinT aggregate{BinT()};
-    aggregate = std::accumulate(hist.begin(), hist.end(), aggregate);
-    assert(aggregate.count > 0);
-    auto const y_mean = aggregate.label_sum / aggregate.count;
+    DataT sum       = std::accumulate(data.begin(), data.end(), DataT(0));
+    auto const mean = sum / data.size();
     auto poisson_half_deviance{DataT(0.0)};
 
-    std::for_each(hist.begin(), hist.end(), [&](BinT const& h) {
-      auto log_y = raft::myLog(h.label_sum ? h.label_sum : DataT(1.0));  // we don't want nans
-      poisson_half_deviance += h.label_sum * (log_y - raft::myLog(y_mean)) + y_mean - h.label_sum;
+    std::for_each(data.begin(), data.end(), [&](auto d) {
+      auto log_y = raft::myLog(d ? d : DataT(1.0));  // we don't want nans
+      poisson_half_deviance += d * (log_y - raft::myLog(mean)) + mean - d;
     });
 
-    poisson_half_deviance /= aggregate.count;
-    return std::make_tuple(
-      poisson_half_deviance, aggregate.label_sum, static_cast<DataT>(aggregate.count));
+    poisson_half_deviance /= data.size();
+    return std::make_tuple(poisson_half_deviance, sum, DataT(data.size()));
   }
 
-  auto PoissonGroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t split_bin_index)
+  auto PoissonGroundTruthGain(std::vector<DataT> const& data, std::size_t split_bin_index)
   {
-    std::vector<BinT> left_pdf_hist{pdf_hist.begin(), pdf_hist.begin() + split_bin_index + 1};
-    std::vector<BinT> right_pdf_hist{pdf_hist.begin() + split_bin_index + 1, pdf_hist.end()};
+    auto bin_width = raft::ceildiv(params.n_rows, params.n_bins);
+    std::vector<DataT> left_data(data.begin(), data.begin() + (split_bin_index + 1) * bin_width);
+    std::vector<DataT> right_data(data.begin() + (split_bin_index + 1) * bin_width, data.end());
 
-    auto [parent_phd, label_sum, n]            = PoissonHalfDeviance(pdf_hist);
-    auto [left_phd, label_sum_left, n_left]    = PoissonHalfDeviance(left_pdf_hist);
-    auto [right_phd, label_sum_right, n_right] = PoissonHalfDeviance(right_pdf_hist);
+    auto [parent_phd, label_sum, n]            = PoissonHalfDeviance(data);
+    auto [left_phd, label_sum_left, n_left]    = PoissonHalfDeviance(left_data);
+    auto [right_phd, label_sum_right, n_right] = PoissonHalfDeviance(right_data);
 
     auto gain = parent_phd - ((n_left / n) * left_phd +
                               (n_right / n) * right_phd);  // gain in long form without proxy
@@ -838,37 +859,32 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
       return gain;
   }
 
-  auto GiniImpurity(std::vector<BinT> const& hist)
+  auto GiniImpurity(std::vector<DataT> const& data)
   {  // sum((n_c/n_total)(1-(n_c/n_total)))
-    auto gini{double(0)};
-    auto n_bins      = hist.size() / params.n_classes;
-    auto n_instances = std::accumulate(hist.begin(), hist.end(), BinT()).x;  // total instances
+    double gini(0);
     for (auto c = 0; c < params.n_classes; ++c) {
-      auto begin_iter    = hist.begin() + c * n_bins;
-      auto end_iter      = hist.begin() + (c + 1) * n_bins;
-      double class_proba = std::accumulate(begin_iter, end_iter, BinT()).x;  // instances of class c
-      class_proba /= n_instances;               // probability of class c
+      IdxT sum(0);
+      std::for_each(data.begin(), data.end(), [&](auto d) {
+        if (d == DataT(c)) ++sum;
+      });
+      double class_proba = double(sum) / data.size();
       gini += class_proba * (1 - class_proba);  // adding gain
     }
-    return std::make_pair(gini, double(n_instances));
+    return gini;
   }
 
-  auto GiniGroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t const split_bin_index)
+  auto GiniGroundTruthGain(std::vector<DataT> const& data, std::size_t const split_bin_index)
   {
-    std::vector<BinT> left_pdf_hist, right_pdf_hist;
+    auto bin_width = raft::ceildiv(params.n_rows, params.n_bins);
+    std::vector<DataT> left_data(data.begin(), data.begin() + (split_bin_index + 1) * bin_width);
+    std::vector<DataT> right_data(data.begin() + (split_bin_index + 1) * bin_width, data.end());
 
-    for (auto c = 0; c < params.n_classes; ++c) {  // decompose the pdf_hist
-      auto start = pdf_hist.begin() + c * params.n_bins;
-      auto split = pdf_hist.begin() + c * params.n_bins + split_bin_index + 1;
-      auto end   = pdf_hist.begin() + (c + 1) * params.n_bins;
-
-      left_pdf_hist.insert(left_pdf_hist.end(), start, split);
-      right_pdf_hist.insert(right_pdf_hist.end(), split, end);
-    }
-
-    auto [parent_gini, n]      = GiniImpurity(pdf_hist);
-    auto [left_gini, left_n]   = GiniImpurity(left_pdf_hist);
-    auto [right_gini, right_n] = GiniImpurity(right_pdf_hist);
+    auto parent_gini = GiniImpurity(data);
+    auto left_gini   = GiniImpurity(left_data);
+    auto right_gini  = GiniImpurity(right_data);
+    double n         = data.size();
+    double left_n    = left_data.size();
+    double right_n   = right_data.size();
 
     auto gain = parent_gini - ((left_n / n) * left_gini + (right_n / n) * right_gini);
 
@@ -880,24 +896,25 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     }
   }
 
-  auto GroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t const split_bin_index)
+  auto GroundTruthGain(std::vector<DataT> const& data, std::size_t const split_bin_index)
   {
     if constexpr (std::is_same<ObjectiveT,
                                PoissonObjectiveFunction<DataT, LabelT, IdxT>>::value)  // poisson
     {
-      return PoissonGroundTruthGain(pdf_hist, split_bin_index);
+      return PoissonGroundTruthGain(data, split_bin_index);
     } else if constexpr (std::is_same<ObjectiveT,
                                       GammaObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
     {
-      return GammaGroundTruthGain(pdf_hist, split_bin_index);
-    } else if constexpr (std::is_same<ObjectiveT,
-                                      InverseGaussianObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
+      return GammaGroundTruthGain(data, split_bin_index);
+    } else if constexpr (std::is_same<
+                           ObjectiveT,
+                           InverseGaussianObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
     {
-      return InverseGaussianGroundTruthGain(pdf_hist, split_bin_index);
+      return InverseGaussianGroundTruthGain(data, split_bin_index);
     } else if constexpr (std::is_same<ObjectiveT,
                                       GiniObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
     {
-      return GiniGroundTruthGain(pdf_hist, split_bin_index);
+      return GiniGroundTruthGain(data, split_bin_index);
     }
     return double(0.0);
   }
@@ -923,10 +940,10 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     params = ::testing::TestWithParam<ObjectiveTestParameters>::GetParam();
     ObjectiveT objective(params.n_classes, params.min_samples_leaf);
 
-    auto [cdf_hist, pdf_hist] = GenHist();
-
-    auto split_bin_index   = RandUnder(params.n_bins);
-    auto ground_truth_gain = GroundTruthGain(pdf_hist, split_bin_index);
+    auto data                 = GenRandomData();
+    auto [cdf_hist, pdf_hist] = GenHist(data);
+    auto split_bin_index      = RandUnder(params.n_bins);
+    auto ground_truth_gain    = GroundTruthGain(data, split_bin_index);
 
     auto hypothesis_gain = objective.GainPerSplit(&cdf_hist[0],
                                                   split_bin_index,
@@ -939,31 +956,31 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
 };
 
 const std::vector<ObjectiveTestParameters> poisson_objective_test_parameters = {
-  {9507819643927052255LLU, 64, 1, 0, 0.00001},
-  {9507819643927052259LLU, 128, 1, 1, 0.00001},
-  {9507819643927052251LLU, 256, 1, 1, 0.00001},
-  {9507819643927052258LLU, 512, 1, 5, 0.00001},
+  {9507819643927052255LLU, 2048, 64, 1, 0, 0.00001},
+  {9507819643927052259LLU, 2048, 128, 1, 1, 0.00001},
+  {9507819643927052251LLU, 2048, 256, 1, 1, 0.00001},
+  {9507819643927052258LLU, 2048, 512, 1, 5, 0.00001},
 };
 
 const std::vector<ObjectiveTestParameters> gamma_objective_test_parameters = {
-  {9507819643927052255LLU, 64, 1, 0, 0.00001},
-  {9507819643927052259LLU, 128, 1, 1, 0.00001},
-  {9507819643927052251LLU, 256, 1, 1, 0.00001},
-  {9507819643927052258LLU, 512, 1, 5, 0.00001},
+  {9507819643927052255LLU, 2048, 64, 1, 0, 0.00001},
+  {9507819643927052259LLU, 2048, 128, 1, 1, 0.00001},
+  {9507819643927052251LLU, 2048, 256, 1, 1, 0.00001},
+  {9507819643927052258LLU, 2048, 512, 1, 5, 0.00001},
 };
 
 const std::vector<ObjectiveTestParameters> invgauss_objective_test_parameters = {
-  {9507819643927052255LLU, 64, 1, 0, 0.00001},
-  {9507819643927052259LLU, 128, 1, 1, 0.00001},
-  {9507819643927052251LLU, 256, 1, 1, 0.00001},
-  {9507819643927052258LLU, 512, 1, 5, 0.00001},
+  {9507819643927052255LLU, 2048, 64, 1, 0, 0.00001},
+  {9507819643927052259LLU, 2048, 128, 1, 1, 0.00001},
+  {9507819643927052251LLU, 2048, 256, 1, 1, 0.00001},
+  {9507819643927052258LLU, 2048, 512, 1, 5, 0.00001},
 };
 
 const std::vector<ObjectiveTestParameters> gini_objective_test_parameters = {
-  {9507819643927052255LLU, 64, 2, 0, 0.00001},
-  {9507819643927052256LLU, 128, 10, 1, 0.00001},
-  {9507819643927052257LLU, 256, 100, 1, 0.00001},
-  {9507819643927052258LLU, 512, 100, 5, 0.00001},
+  {9507819643927052255LLU, 2048, 64, 2, 0, 0.00001},
+  {9507819643927052256LLU, 2048, 128, 10, 1, 0.00001},
+  {9507819643927052257LLU, 2048, 256, 100, 1, 0.00001},
+  {9507819643927052258LLU, 2048, 512, 100, 5, 0.00001},
 };
 
 // poisson objective test
@@ -979,7 +996,8 @@ INSTANTIATE_TEST_CASE_P(RfTests,
                         GammaObjectiveTestD,
                         ::testing::ValuesIn(gamma_objective_test_parameters));
 // InvGauss objective test
-typedef ObjectiveTest<InverseGaussianObjectiveFunction<double, double, int>> InverseGaussianObjectiveTestD;
+typedef ObjectiveTest<InverseGaussianObjectiveFunction<double, double, int>>
+  InverseGaussianObjectiveTestD;
 TEST_P(InverseGaussianObjectiveTestD, InverseGaussianObjectiveTest) {}
 INSTANTIATE_TEST_CASE_P(RfTests,
                         InverseGaussianObjectiveTestD,

From 78b0ffde15475c0e30c381170358b000ac46233a Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Fri, 24 Sep 2021 09:16:02 +0530
Subject: [PATCH 28/42] add tests for entropy and mse

---
 .../batched-levelalgo/metrics.cuh             |  15 +-
 cpp/test/sg/rf_test.cu                        | 252 +++++++++---------
 .../dask/ensemble/randomforestclassifier.py   |  13 +-
 .../dask/ensemble/randomforestregressor.py    |   2 +
 python/cuml/ensemble/randomforest_common.pyx  |   5 +-
 python/cuml/ensemble/randomforest_shared.pxd  |   2 +
 .../cuml/ensemble/randomforestregressor.pyx   |   2 +
 7 files changed, 142 insertions(+), 149 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index dfa11a3ebf..974eaa3206 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -100,7 +100,7 @@ class GiniObjectiveFunction {
 
   HDI DataT GainPerSplit(BinT* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
   {
-    auto nRight         = len - nLeft;
+    IdxT nRight         = len - nLeft;
     constexpr DataT One = DataT(1.0);
     auto invlen         = One / len;
     auto invLeft        = One / nLeft;
@@ -175,7 +175,7 @@ class EntropyObjectiveFunction {
 
   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft)
   {
-    auto nRight{len - nLeft};
+    IdxT nRight{len - nLeft};
     auto gain{DataT(0.0)};
     // if there aren't enough samples in this split, don't bother!
     if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
@@ -280,7 +280,7 @@ class MSEObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, Id
   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
   {
     auto gain{DataT(0)};
-    auto nRight{len - nLeft};
+    IdxT nRight{len - nLeft};
     auto invLen{DataT(1.0) / len};
     // if there aren't enough samples in this split, don't bother!
     if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf) {
@@ -292,7 +292,7 @@ class MSEObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, Id
       DataT right_label_sum = hist[i].label_sum - label_sum;
       DataT right_obj       = -(right_label_sum * right_label_sum) / nRight;
       gain                  = parent_obj - (left_obj + right_obj);
-      gain *= invLen;
+      gain *= 0.5 * invLen;
 
       return gain;
     }
@@ -340,7 +340,7 @@ class PoissonObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_
   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
   {
     // get the lens'
-    auto nRight = len - nLeft;
+    IdxT nRight = len - nLeft;
 
     // if there aren't enough samples in this split, don't bother!
     if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
@@ -392,6 +392,7 @@ class GammaObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_,
 
   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
   {
+    IdxT nRight = len - nLeft;
     // if there aren't enough samples in this split, don't bother!
     if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
       return -std::numeric_limits<DataT>::max();
@@ -443,7 +444,7 @@ class InverseGaussianObjectiveFunction : public TweedieObjectiveFunction<DataT_,
   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
   {
     // get the lens'
-    auto nRight = len - nLeft;
+    IdxT nRight = len - nLeft;
 
     // if there aren't enough samples in this split, don't bother!
     if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
@@ -462,7 +463,7 @@ class InverseGaussianObjectiveFunction : public TweedieObjectiveFunction<DataT_,
     DataT left_obj   = -DataT(nLeft) * DataT(nLeft) / left_label_sum;
     DataT right_obj  = -DataT(nRight) * DataT(nRight) / right_label_sum;
     auto gain        = parent_obj - (left_obj + right_obj);
-    gain             = gain / len;
+    gain             = gain / (2 * len );
 
     return gain;
   }
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index 6524272812..ed95bd3207 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -346,8 +346,7 @@ class RfSpecialisedTest {
   void TestDeterminism()
   {
     // Regression models use floating point atomics, so are not bitwise reproducible
-    bool is_regression = params.split_criterion == MSE or params.split_criterion == MAE or
-                         params.split_criterion == POISSON;
+    bool is_regression = params.split_criterion != GINI and params.split_criterion != ENTROPY;
     if (is_regression) return;
 
     // Repeat training
@@ -452,8 +451,7 @@ class RfTest : public ::testing::TestWithParam<RfTestParams> {
   void SetUp() override
   {
     RfTestParams params = ::testing::TestWithParam<RfTestParams>::GetParam();
-    bool is_regression  = params.split_criterion == MSE or params.split_criterion == MAE or
-                         params.split_criterion == POISSON;
+    bool is_regression = params.split_criterion != GINI and params.split_criterion != ENTROPY;
     if (params.double_precision) {
       if (is_regression) {
         RfSpecialisedTest<double, double> test(params);
@@ -487,6 +485,8 @@ std::vector<int> min_samples_split       = {2, 10};
 std::vector<float> min_impurity_decrease = {0.0f, 1.0f, 10.0f};
 std::vector<int> n_streams               = {1, 2, 10};
 std::vector<CRITERION> split_criterion   = {
+  CRITERION::INVERSE_GAUSSIAN,
+  CRITERION::GAMMA,
   CRITERION::POISSON,
   CRITERION::MSE,
   CRITERION::GINI,
@@ -670,10 +670,7 @@ namespace DT {
 
 struct ObjectiveTestParameters {
   uint64_t seed;
-<<<<<<< HEAD
   int n_rows;
-=======
->>>>>>> d0aaafc51703cbe7efca995f495f7ab9731c9dd0
   int n_bins;
   int n_classes;
   int min_samples_leaf;
@@ -690,8 +687,7 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
   ObjectiveTestParameters params;
 
  public:
-<<<<<<< HEAD
-  auto RandUnder(int const end = 100000) { return rand() % end; }
+  auto RandUnder(int const end = 10000) { return rand() % end; }
 
   auto GenRandomData()
   {
@@ -699,41 +695,35 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     std::vector<DataT> data(params.n_rows);
     if constexpr (std::is_same<BinT, CountBin>::value)  // classification case
     {
-      for (auto& iter : data) {
-        iter = RandUnder(params.n_classes);
+      for (auto& d : data) {
+        d = RandUnder(params.n_classes);
       }
     } else {
       std::normal_distribution<DataT> normal(1.0, 2.0);
-      for (auto& iter : data) {
+      for (auto& d : data) {
         auto rand_element(DataT(0));
         while (1) {
           rand_element = normal(rng);
           if (rand_element > 0) break;  // only positive random numbers
         }
-        iter = rand_element;
+        d = rand_element;
       }
     }
     return data;
   }
 
   auto GenHist(std::vector<DataT> data)
-=======
-  auto RandUnder(int const end = 10000) { return rand() % end; }
-
-  auto GenHist()
->>>>>>> d0aaafc51703cbe7efca995f495f7ab9731c9dd0
   {
     std::vector<BinT> cdf_hist, pdf_hist;
 
     for (auto c = 0; c < params.n_classes; ++c) {
       for (auto b = 0; b < params.n_bins; ++b) {
-<<<<<<< HEAD
         IdxT bin_width  = raft::ceildiv(params.n_rows, params.n_bins);
         auto data_begin = data.begin() + b * bin_width;
         auto data_end   = data_begin + bin_width;
         if constexpr (std::is_same<BinT, CountBin>::value) {  // classification case
           auto count(IdxT(0));
-          std::for_each(data_begin, data_end, [&](auto d) {
+          std::for_each(data_begin, data_end, [&](auto d){
             if (d == c) ++count;
           });
           pdf_hist.emplace_back(count);
@@ -745,17 +735,6 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
 
         auto cumulative = b > 0 ? cdf_hist.back() : BinT();
         cdf_hist.emplace_back(pdf_hist.empty() ? BinT() : pdf_hist.back());
-=======
-        if constexpr (std::is_same<BinT, CountBin>::value)
-          pdf_hist.emplace_back(RandUnder());
-        else
-          pdf_hist.emplace_back(static_cast<LabelT>(RandUnder()), RandUnder());
-
-        auto cumulative = b > 0 ? cdf_hist.back() : BinT();
-
-        cdf_hist.emplace_back(pdf_hist.empty() ? BinT() : pdf_hist.back());
-
->>>>>>> d0aaafc51703cbe7efca995f495f7ab9731c9dd0
         cdf_hist.back() += cumulative;
       }
     }
@@ -763,7 +742,44 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     return std::make_pair(cdf_hist, pdf_hist);
   }
 
-<<<<<<< HEAD
+
+  auto MSE(
+    std::vector<DataT> const&
+      data)  //  1/n * 1/2 * sum((y - y_pred) * (y - y_pred))
+  {
+    DataT sum        = std::accumulate(data.begin(), data.end(), DataT(0));
+    DataT const mean = sum / data.size();
+    auto mse{DataT(0.0)};  // mse: mean squared error
+
+    std::for_each(data.begin(), data.end(), [&](auto d) {
+      mse += (d - mean) * (d - mean);  // unit deviance
+    });
+
+    mse /= 2 * data.size();
+    return std::make_tuple(mse, sum, DataT(data.size()));
+  }
+
+  auto MSEGroundTruthGain(std::vector<DataT> const& data, std::size_t split_bin_index)
+  {
+    auto bin_width = raft::ceildiv(params.n_rows, params.n_bins);
+    std::vector<DataT> left_data(data.begin(), data.begin() + (split_bin_index + 1) * bin_width);
+    std::vector<DataT> right_data(data.begin() + (split_bin_index + 1) * bin_width, data.end());
+
+    auto [parent_mse, label_sum, n]            = MSE(data);
+    auto [left_mse, label_sum_left, n_left]    = MSE(left_data);
+    auto [right_mse, label_sum_right, n_right] = MSE(right_data);
+
+    auto gain = parent_mse -
+                ((n_left / n) * left_mse +   // the minimizing objective function is half deviance
+                 (n_right / n) * right_mse);  // gain in long form without proxy
+
+    // edge cases
+    if (n_left < params.min_samples_leaf or n_right < params.min_samples_leaf)
+      return -std::numeric_limits<DataT>::max();
+    else
+      return gain;
+  }
+
   auto InverseGaussianHalfDeviance(
     std::vector<DataT> const&
       data)  //  1/n * 2 * sum((y - y_pred) * (y - y_pred)/(y * (y_pred) * (y_pred)))
@@ -776,7 +792,7 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
       ighd += (d - mean) * (d - mean) / (d * mean * mean);  // unit deviance
     });
 
-    ighd /= data.size();
+    ighd /= 2 * data.size();
     return std::make_tuple(ighd, sum, DataT(data.size()));
   }
 
@@ -868,35 +884,6 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     auto [parent_phd, label_sum, n]            = PoissonHalfDeviance(data);
     auto [left_phd, label_sum_left, n_left]    = PoissonHalfDeviance(left_data);
     auto [right_phd, label_sum_right, n_right] = PoissonHalfDeviance(right_data);
-=======
-  auto PoissonHalfDeviance(
-    std::vector<BinT> const& hist)  //  1/n * sum(y_true * log(y_true/y_pred) + y_pred - y_true)
-  {
-    BinT aggregate{BinT()};
-    aggregate = std::accumulate(hist.begin(), hist.end(), aggregate);
-    assert(aggregate.count > 0);
-    auto const y_mean = aggregate.label_sum / aggregate.count;
-    auto poisson_half_deviance{DataT(0.0)};
-
-    std::for_each(hist.begin(), hist.end(), [&](BinT const& h) {
-      auto log_y = raft::myLog(h.label_sum ? h.label_sum : DataT(1.0));  // we don't want nans
-      poisson_half_deviance += h.label_sum * (log_y - raft::myLog(y_mean)) + y_mean - h.label_sum;
-    });
-
-    poisson_half_deviance /= aggregate.count;
-    return std::make_tuple(
-      poisson_half_deviance, aggregate.label_sum, static_cast<DataT>(aggregate.count));
-  }
-
-  auto PoissonGroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t split_bin_index)
-  {
-    std::vector<BinT> left_pdf_hist{pdf_hist.begin(), pdf_hist.begin() + split_bin_index + 1};
-    std::vector<BinT> right_pdf_hist{pdf_hist.begin() + split_bin_index + 1, pdf_hist.end()};
-
-    auto [parent_phd, label_sum, n]            = PoissonHalfDeviance(pdf_hist);
-    auto [left_phd, label_sum_left, n_left]    = PoissonHalfDeviance(left_pdf_hist);
-    auto [right_phd, label_sum_right, n_right] = PoissonHalfDeviance(right_pdf_hist);
->>>>>>> d0aaafc51703cbe7efca995f495f7ab9731c9dd0
 
     auto gain = parent_phd - ((n_left / n) * left_phd +
                               (n_right / n) * right_phd);  // gain in long form without proxy
@@ -910,7 +897,43 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
       return gain;
   }
 
-<<<<<<< HEAD
+  auto Entropy(std::vector<DataT> const& data)
+  {  // sum((n_c/n_total)*(log(n_c/n_total)))
+    double entropy(0);
+    for (auto c = 0; c < params.n_classes; ++c) {
+      IdxT sum(0);
+      std::for_each(data.begin(), data.end(), [&](auto d) {
+        if (d == DataT(c)) ++sum;
+      });
+      double class_proba = double(sum) / data.size();
+      entropy += -class_proba * raft::myLog(class_proba ? class_proba : DataT(1)) / raft::myLog(DataT(2));  // adding gain
+    }
+    return entropy;
+  }
+
+  auto EntropyGroundTruthGain(std::vector<DataT> const& data, std::size_t const split_bin_index)
+  {
+    auto bin_width = raft::ceildiv(params.n_rows, params.n_bins);
+    std::vector<DataT> left_data(data.begin(), data.begin() + (split_bin_index + 1) * bin_width);
+    std::vector<DataT> right_data(data.begin() + (split_bin_index + 1) * bin_width, data.end());
+
+    auto parent_entropy = Entropy(data);
+    auto left_entropy   = Entropy(left_data);
+    auto right_entropy  = Entropy(right_data);
+    double n         = data.size();
+    double left_n    = left_data.size();
+    double right_n   = right_data.size();
+
+    auto gain = parent_entropy - ((left_n / n) * left_entropy + (right_n / n) * right_entropy);
+
+    // edge cases
+    if (left_n < params.min_samples_leaf or right_n < params.min_samples_leaf) {
+      return -std::numeric_limits<DataT>::max();
+    } else {
+      return gain;
+    }
+  }
+
   auto GiniImpurity(std::vector<DataT> const& data)
   {  // sum((n_c/n_total)(1-(n_c/n_total)))
     double gini(0);
@@ -937,39 +960,6 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     double n         = data.size();
     double left_n    = left_data.size();
     double right_n   = right_data.size();
-=======
-  auto GiniImpurity(std::vector<BinT> const& hist)
-  {  // sum((n_c/n_total)(1-(n_c/n_total)))
-    auto gini{double(0)};
-    auto n_bins      = hist.size() / params.n_classes;
-    auto n_instances = std::accumulate(hist.begin(), hist.end(), BinT()).x;  // total instances
-    for (auto c = 0; c < params.n_classes; ++c) {
-      auto begin_iter    = hist.begin() + c * n_bins;
-      auto end_iter      = hist.begin() + (c + 1) * n_bins;
-      double class_proba = std::accumulate(begin_iter, end_iter, BinT()).x;  // instances of class c
-      class_proba /= n_instances;               // probability of class c
-      gini += class_proba * (1 - class_proba);  // adding gain
-    }
-    return std::make_pair(gini, double(n_instances));
-  }
-
-  auto GiniGroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t const split_bin_index)
-  {
-    std::vector<BinT> left_pdf_hist, right_pdf_hist;
-
-    for (auto c = 0; c < params.n_classes; ++c) {  // decompose the pdf_hist
-      auto start = pdf_hist.begin() + c * params.n_bins;
-      auto split = pdf_hist.begin() + c * params.n_bins + split_bin_index + 1;
-      auto end   = pdf_hist.begin() + (c + 1) * params.n_bins;
-
-      left_pdf_hist.insert(left_pdf_hist.end(), start, split);
-      right_pdf_hist.insert(right_pdf_hist.end(), split, end);
-    }
-
-    auto [parent_gini, n]      = GiniImpurity(pdf_hist);
-    auto [left_gini, left_n]   = GiniImpurity(left_pdf_hist);
-    auto [right_gini, right_n] = GiniImpurity(right_pdf_hist);
->>>>>>> d0aaafc51703cbe7efca995f495f7ab9731c9dd0
 
     auto gain = parent_gini - ((left_n / n) * left_gini + (right_n / n) * right_gini);
 
@@ -981,37 +971,33 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     }
   }
 
-<<<<<<< HEAD
   auto GroundTruthGain(std::vector<DataT> const& data, std::size_t const split_bin_index)
-=======
-  auto GroundTruthGain(std::vector<BinT> const& pdf_hist, std::size_t const split_bin_index)
->>>>>>> d0aaafc51703cbe7efca995f495f7ab9731c9dd0
   {
     if constexpr (std::is_same<ObjectiveT,
+                               MSEObjectiveFunction<DataT, LabelT, IdxT>>::value)  // mean squared error
+    {
+      return MSEGroundTruthGain(data, split_bin_index);
+    } else if constexpr (std::is_same<ObjectiveT,
                                PoissonObjectiveFunction<DataT, LabelT, IdxT>>::value)  // poisson
     {
-<<<<<<< HEAD
       return PoissonGroundTruthGain(data, split_bin_index);
     } else if constexpr (std::is_same<ObjectiveT,
-                                      GammaObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
+                                      GammaObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gamma
     {
       return GammaGroundTruthGain(data, split_bin_index);
     } else if constexpr (std::is_same<
                            ObjectiveT,
-                           InverseGaussianObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
+                           InverseGaussianObjectiveFunction<DataT, LabelT, IdxT>>::value)  // inverse gaussian
     {
       return InverseGaussianGroundTruthGain(data, split_bin_index);
     } else if constexpr (std::is_same<ObjectiveT,
-                                      GiniObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
+                                      EntropyObjectiveFunction<DataT, LabelT, IdxT>>::value)  // entropy
     {
-      return GiniGroundTruthGain(data, split_bin_index);
-=======
-      return PoissonGroundTruthGain(pdf_hist, split_bin_index);
+      return EntropyGroundTruthGain(data, split_bin_index);
     } else if constexpr (std::is_same<ObjectiveT,
                                       GiniObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gini
     {
-      return GiniGroundTruthGain(pdf_hist, split_bin_index);
->>>>>>> d0aaafc51703cbe7efca995f495f7ab9731c9dd0
+      return GiniGroundTruthGain(data, split_bin_index);
     }
     return double(0.0);
   }
@@ -1037,17 +1023,10 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     params = ::testing::TestWithParam<ObjectiveTestParameters>::GetParam();
     ObjectiveT objective(params.n_classes, params.min_samples_leaf);
 
-<<<<<<< HEAD
     auto data                 = GenRandomData();
     auto [cdf_hist, pdf_hist] = GenHist(data);
     auto split_bin_index      = RandUnder(params.n_bins);
     auto ground_truth_gain    = GroundTruthGain(data, split_bin_index);
-=======
-    auto [cdf_hist, pdf_hist] = GenHist();
-
-    auto split_bin_index   = RandUnder(params.n_bins);
-    auto ground_truth_gain = GroundTruthGain(pdf_hist, split_bin_index);
->>>>>>> d0aaafc51703cbe7efca995f495f7ab9731c9dd0
 
     auto hypothesis_gain = objective.GainPerSplit(&cdf_hist[0],
                                                   split_bin_index,
@@ -1059,8 +1038,14 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
   }
 };
 
+const std::vector<ObjectiveTestParameters> mse_objective_test_parameters = {
+  {9507819643927052255LLU, 2048, 64, 1, 0, 0.00001},
+  {9507819643927052259LLU, 2048, 128, 1, 1, 0.00001},
+  {9507819643927052251LLU, 2048, 256, 1, 1, 0.00001},
+  {9507819643927052258LLU, 2048, 512, 1, 5, 0.00001},
+};
+
 const std::vector<ObjectiveTestParameters> poisson_objective_test_parameters = {
-<<<<<<< HEAD
   {9507819643927052255LLU, 2048, 64, 1, 0, 0.00001},
   {9507819643927052259LLU, 2048, 128, 1, 1, 0.00001},
   {9507819643927052251LLU, 2048, 256, 1, 1, 0.00001},
@@ -1081,32 +1066,32 @@ const std::vector<ObjectiveTestParameters> invgauss_objective_test_parameters =
   {9507819643927052258LLU, 2048, 512, 1, 5, 0.00001},
 };
 
-const std::vector<ObjectiveTestParameters> gini_objective_test_parameters = {
+const std::vector<ObjectiveTestParameters> entropy_objective_test_parameters = {
   {9507819643927052255LLU, 2048, 64, 2, 0, 0.00001},
   {9507819643927052256LLU, 2048, 128, 10, 1, 0.00001},
   {9507819643927052257LLU, 2048, 256, 100, 1, 0.00001},
   {9507819643927052258LLU, 2048, 512, 100, 5, 0.00001},
-=======
-  {9507819643927052255LLU, 64, 1, 0, 0.00001},
-  {9507819643927052259LLU, 128, 1, 1, 0.00001},
-  {9507819643927052251LLU, 256, 1, 1, 0.00001},
-  {9507819643927052258LLU, 512, 1, 5, 0.00001},
 };
+
 const std::vector<ObjectiveTestParameters> gini_objective_test_parameters = {
-  {9507819643927052255LLU, 64, 2, 0, 0.00001},
-  {9507819643927052256LLU, 128, 10, 1, 0.00001},
-  {9507819643927052257LLU, 256, 100, 1, 0.00001},
-  {9507819643927052258LLU, 512, 100, 5, 0.00001},
->>>>>>> d0aaafc51703cbe7efca995f495f7ab9731c9dd0
+  {9507819643927052255LLU, 2048, 64, 2, 0, 0.00001},
+  {9507819643927052256LLU, 2048, 128, 10, 1, 0.00001},
+  {9507819643927052257LLU, 2048, 256, 100, 1, 0.00001},
+  {9507819643927052258LLU, 2048, 512, 100, 5, 0.00001},
 };
 
+// mse objective test
+typedef ObjectiveTest<MSEObjectiveFunction<double, double, int>> MSEObjectiveTestD;
+TEST_P(MSEObjectiveTestD, MSEObjectiveTest) {}
+INSTANTIATE_TEST_CASE_P(RfTests,
+                        MSEObjectiveTestD,
+                        ::testing::ValuesIn(mse_objective_test_parameters));
 // poisson objective test
 typedef ObjectiveTest<PoissonObjectiveFunction<double, double, int>> PoissonObjectiveTestD;
 TEST_P(PoissonObjectiveTestD, poissonObjectiveTest) {}
 INSTANTIATE_TEST_CASE_P(RfTests,
                         PoissonObjectiveTestD,
                         ::testing::ValuesIn(poisson_objective_test_parameters));
-<<<<<<< HEAD
 // gamma objective test
 typedef ObjectiveTest<GammaObjectiveFunction<double, double, int>> GammaObjectiveTestD;
 TEST_P(GammaObjectiveTestD, GammaObjectiveTest) {}
@@ -1120,8 +1105,13 @@ TEST_P(InverseGaussianObjectiveTestD, InverseGaussianObjectiveTest) {}
 INSTANTIATE_TEST_CASE_P(RfTests,
                         InverseGaussianObjectiveTestD,
                         ::testing::ValuesIn(invgauss_objective_test_parameters));
-=======
->>>>>>> d0aaafc51703cbe7efca995f495f7ab9731c9dd0
+
+// entropy objective test
+typedef ObjectiveTest<EntropyObjectiveFunction<double, int, int>> EntropyObjectiveTestD;
+TEST_P(EntropyObjectiveTestD, entropyObjectiveTest) {}
+INSTANTIATE_TEST_CASE_P(RfTests,
+                        EntropyObjectiveTestD,
+                        ::testing::ValuesIn(entropy_objective_test_parameters));
 
 // gini objective test
 typedef ObjectiveTest<GiniObjectiveFunction<double, int, int>> GiniObjectiveTestD;
diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py
index ff33880e81..c867f63841 100755
--- a/python/cuml/dask/ensemble/randomforestclassifier.py
+++ b/python/cuml/dask/ensemble/randomforestclassifier.py
@@ -74,21 +74,14 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
         run different models concurrently in different streams by creating
         handles in several streams.
         If it is None, a new one is created.
-<<<<<<< HEAD
-    split_criterion : int (default = 0)
-        The criterion used to split nodes.
-        0 for GINI, 1 for ENTROPY, 5 for CRITERION_END.
-        2 and 3 not valid for classification
-    split_algo : 0 for HIST and 1 for GLOBAL_QUANTILE (default = 1)
-        the algorithm to determine how nodes are split in the tree.
-=======
     split_criterion : int or string (default = 0 ('gini'))
         The criterion used to split nodes.
         0 or 'gini' for GINI, 1 or 'entropy' for ENTROPY,
         2 or 'mse' for MSE,
         4 or 'poisson' for POISSON,
-        2, 'mse', 4, 'poisson' not valid for classification
->>>>>>> d0aaafc51703cbe7efca995f495f7ab9731c9dd0
+        5 or 'gamma' for GAMMA,
+        6 or 'inverse_gaussian' for INVERSE_GAUSSIAN,
+        2, 'mse', 4, 'poisson', 5, 'gamma', 6, 'inverse_gaussian' not valid for classification
     bootstrap : boolean (default = True)
         Control bootstrapping.
         If set, each tree in the forest is built
diff --git a/python/cuml/dask/ensemble/randomforestregressor.py b/python/cuml/dask/ensemble/randomforestregressor.py
index 846e1cc344..328484d6ae 100755
--- a/python/cuml/dask/ensemble/randomforestregressor.py
+++ b/python/cuml/dask/ensemble/randomforestregressor.py
@@ -72,6 +72,8 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
         0 or 'gini' for GINI, 1 or 'entropy' for ENTROPY,
         2 or 'mse' for MSE,
         4 or 'poisson' for POISSON,
+        5 or 'gamma' for GAMMA,
+        6 or 'inverse_gaussian' for INVERSE_GAUSSIAN,
         0, 'gini', 1, 'entropy' not valid for regression
     bootstrap : boolean (default = True)
         Control bootstrapping.
diff --git a/python/cuml/ensemble/randomforest_common.pyx b/python/cuml/ensemble/randomforest_common.pyx
index 264aafa084..ad43dc07b6 100644
--- a/python/cuml/ensemble/randomforest_common.pyx
+++ b/python/cuml/ensemble/randomforest_common.pyx
@@ -58,7 +58,10 @@ class BaseRandomForestModel(Base):
                       '2': MSE, 'mse': MSE,
                       '3': MAE, 'mae': MAE,
                       '4': POISSON, 'poisson': POISSON,
-                      '5': CRITERION_END}
+                      '5': GAMMA, 'gamma': GAMMA,
+                      '6': INVERSE_GAUSSIAN,
+                      'inverse_gaussian': INVERSE_GAUSSIAN,
+                      '6': CRITERION_END}
 
     classes_ = CumlArrayDescriptor()
 
diff --git a/python/cuml/ensemble/randomforest_shared.pxd b/python/cuml/ensemble/randomforest_shared.pxd
index 389eec5a45..638b1d7a10 100644
--- a/python/cuml/ensemble/randomforest_shared.pxd
+++ b/python/cuml/ensemble/randomforest_shared.pxd
@@ -43,6 +43,8 @@ cdef extern from "cuml/ensemble/randomforest.hpp" namespace "ML":
         MSE,
         MAE,
         POISSON,
+        GAMMA,
+        INVERSE_GAUSSIAN,
         CRITERION_END
 
 cdef extern from "cuml/ensemble/randomforest.hpp" namespace "ML":
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index fdb4c9f369..0ede37da66 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -166,6 +166,8 @@ class RandomForestRegressor(BaseRandomForestModel,
         0 or 'gini' for GINI, 1 or 'entropy' for ENTROPY,
         2 or 'mse' for MSE,
         4 or 'poisson' for POISSON,
+        5 or 'gamma' for GAMMA,
+        6 or 'inverse_gaussian' for INVERSE_GAUSSIAN,
         0, 'gini', 1, 'entropy' not valid for regression.
     bootstrap : boolean (default = True)
         Control bootstrapping.

From 11b2f4e375db5ade05bd7276d7a1b047aa57969c Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 4 Oct 2021 18:08:06 +0530
Subject: [PATCH 29/42] add python tests and refactor objectives

---
 .../batched-levelalgo/metrics.cuh             | 140 +++++++++++-------
 cpp/test/sg/rf_test.cu                        |  57 +++++--
 python/cuml/test/test_random_forest.py        |  43 ++++--
 3 files changed, 161 insertions(+), 79 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index 974eaa3206..2962b6385d 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -236,44 +236,17 @@ class EntropyObjectiveFunction {
   }
 };
 
-/** @brief The base class for the tweedie family of objective functions:
- * mean-squared-error(p=0), poisson(p=1), gamma(p=2) and inverse gaussian(p=3)
- **/
 template <typename DataT_, typename LabelT_, typename IdxT_>
-class TweedieObjectiveFunction {
+class MSEObjectiveFunction {
  public:
   using DataT  = DataT_;
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
-  using BinT   = AggregateBin;
-
- protected:
-  IdxT min_samples_leaf;
-
- public:
-  HDI TweedieObjectiveFunction(IdxT min_samples_leaf) : min_samples_leaf(min_samples_leaf) {}
-
-  DI IdxT NumClasses() const { return 1; }
-
-  static DI void SetLeafVector(BinT const* shist, int nclasses, DataT* out)
-  {
-    for (int i = 0; i < nclasses; i++) {
-      out[i] = shist[i].label_sum / shist[i].count;
-    }
-  }
-};
-
-template <typename DataT_, typename LabelT_, typename IdxT_>
-class MSEObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
- public:
-  using DataT  = DataT_;
-  using LabelT = LabelT_;
-  using IdxT   = IdxT_;
-  // using BinT   = typename TweedieObjectiveFunction<DataT_, LabelT_, IdxT_>::BinT;
   using BinT = AggregateBin;
+  IdxT min_samples_leaf;
 
-  HDI MSEObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
-    : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
+  HDI MSEObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf) :
+    min_samples_leaf(min_samples_leaf)
   {
   }
 
@@ -283,16 +256,16 @@ class MSEObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, Id
     IdxT nRight{len - nLeft};
     auto invLen{DataT(1.0) / len};
     // if there aren't enough samples in this split, don't bother!
-    if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf) {
+    if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
       return -std::numeric_limits<DataT>::max();
     } else {
       auto label_sum        = hist[nbins - 1].label_sum;
-      DataT parent_obj      = -label_sum * label_sum / len;
+      DataT parent_obj      = -label_sum * label_sum * invLen;
       DataT left_obj        = -(hist[i].label_sum * hist[i].label_sum) / nLeft;
       DataT right_label_sum = hist[i].label_sum - label_sum;
       DataT right_obj       = -(right_label_sum * right_label_sum) / nRight;
       gain                  = parent_obj - (left_obj + right_obj);
-      gain *= 0.5 * invLen;
+      gain *= DataT(0.5) * invLen;
 
       return gain;
     }
@@ -304,25 +277,35 @@ class MSEObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, Id
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
       auto nLeft = shist[i].count;
-      sp.update({sbins[i], col, this->GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
+      sp.update({sbins[i], col, GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
+
+  DI IdxT NumClasses() const { return 1; }
+
+  static DI void SetLeafVector(BinT const* shist, int nclasses, DataT* out)
+  {
+    for (int i = 0; i < nclasses; i++) {
+      out[i] = shist[i].label_sum / shist[i].count;
+    }
+  }
 };
 
 template <typename DataT_, typename LabelT_, typename IdxT_>
-class PoissonObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
+class PoissonObjectiveFunction {
  public:
   using DataT  = DataT_;
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
-  // using BinT   = typename TweedieObjectiveFunction<DataT_, LabelT_, IdxT_>::BinT;
   using BinT = AggregateBin;
+  IdxT min_samples_leaf;
 
   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
 
   HDI PoissonObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
-    : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
+    // : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
+    : min_samples_leaf(min_samples_leaf)
   {
   }
 
@@ -343,7 +326,7 @@ class PoissonObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_
     IdxT nRight = len - nLeft;
 
     // if there aren't enough samples in this split, don't bother!
-    if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
+    if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
       return -std::numeric_limits<DataT>::max();
 
     auto label_sum       = hist[nbins - 1].label_sum;
@@ -358,7 +341,7 @@ class PoissonObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_
     DataT parent_obj = -label_sum * raft::myLog(label_sum / len);
     DataT left_obj   = -left_label_sum * raft::myLog(left_label_sum / nLeft);
     DataT right_obj  = -right_label_sum * raft::myLog(right_label_sum / nRight);
-    auto gain        = parent_obj - (left_obj + right_obj);
+    DataT gain        = parent_obj - (left_obj + right_obj);
     gain             = gain / len;
 
     return gain;
@@ -370,31 +353,52 @@ class PoissonObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
       auto nLeft = shist[i].count;
-      sp.update({sbins[i], col, this->GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
+      sp.update({sbins[i], col, GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
+
+  DI IdxT NumClasses() const { return 1; }
+
+  static DI void SetLeafVector(BinT const* shist, int nclasses, DataT* out)
+  {
+    for (int i = 0; i < nclasses; i++) {
+      out[i] = shist[i].label_sum / shist[i].count;
+    }
+  }
 };
 
 template <typename DataT_, typename LabelT_, typename IdxT_>
-class GammaObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
+class GammaObjectiveFunction {
  public:
   using DataT                = DataT_;
   using LabelT               = LabelT_;
   using IdxT                 = IdxT_;
   using BinT                 = AggregateBin;
   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
+  IdxT min_samples_leaf;
 
   HDI GammaObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
-    : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
+  : min_samples_leaf{min_samples_leaf}
   {
   }
 
+  /**
+   * @brief compute the gamma impurity reduction (or purity gain) for each split
+   *
+   * @note This method is used to speed up the search for the best split
+   *       by calculating the gain using a proxy gamma half deviance reduction.
+   *       It is a proxy quantity such that the split that maximizes this value
+   *       also maximizes the impurity improvement. It neglects all constant terms
+   *       of the impurity decrease for a given split.
+   *       The Gain is the difference in the proxy impurities of the parent and the
+   *       weighted sum of impurities of its children.
+   */
   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
   {
     IdxT nRight = len - nLeft;
     // if there aren't enough samples in this split, don't bother!
-    if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
+    if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
       return -std::numeric_limits<DataT>::max();
 
     DataT label_sum       = hist[nbins - 1].label_sum;
@@ -406,11 +410,11 @@ class GammaObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_,
       return -std::numeric_limits<DataT>::max();
 
     // compute the gain to be
-    DataT parent_obj = raft::myLog(label_sum / len);
-    DataT left_obj   = (DataT(nLeft) / DataT(len)) * raft::myLog(left_label_sum / nLeft);
-    DataT right_obj  = (DataT(nRight) / DataT(len)) * raft::myLog(right_label_sum / nRight);
+    DataT parent_obj = len * raft::myLog(label_sum / len);
+    DataT left_obj   = nLeft * raft::myLog(left_label_sum / nLeft);
+    DataT right_obj  = nRight * raft::myLog(right_label_sum / nRight);
     DataT gain       = parent_obj - (left_obj + right_obj);
-    // gain             = gain / DataT(len);
+    gain             = gain / DataT(len);
 
     return gain;
   }
@@ -421,33 +425,53 @@ class GammaObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_,
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
       auto nLeft = shist[i].count;
-      sp.update({sbins[i], col, this->GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
+      sp.update({sbins[i], col, GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
+  DI IdxT NumClasses() const { return 1; }
+
+  static DI void SetLeafVector(BinT const* shist, int nclasses, DataT* out)
+  {
+    for (int i = 0; i < nclasses; i++) {
+      out[i] = shist[i].label_sum / shist[i].count;
+    }
+  }
 };
 
 template <typename DataT_, typename LabelT_, typename IdxT_>
-class InverseGaussianObjectiveFunction : public TweedieObjectiveFunction<DataT_, LabelT_, IdxT_> {
+class InverseGaussianObjectiveFunction {
  public:
   using DataT                = DataT_;
   using LabelT               = LabelT_;
   using IdxT                 = IdxT_;
   using BinT                 = AggregateBin;
   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
+  IdxT min_samples_leaf;
 
   HDI InverseGaussianObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
-    : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
+    : min_samples_leaf{min_samples_leaf}
   {
   }
 
+  /**
+   * @brief compute the inverse gaussian impurity reduction (or purity gain) for each split
+   *
+   * @note This method is used to speed up the search for the best split
+   *       by calculating the gain using a proxy inverse gaussian half deviance reduction.
+   *       It is a proxy quantity such that the split that maximizes this value
+   *       also maximizes the impurity improvement. It neglects all constant terms
+   *       of the impurity decrease for a given split.
+   *       The Gain is the difference in the proxy impurities of the parent and the
+   *       weighted sum of impurities of its children.
+   */
   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
   {
     // get the lens'
     IdxT nRight = len - nLeft;
 
     // if there aren't enough samples in this split, don't bother!
-    if (nLeft < this->min_samples_leaf || nRight < this->min_samples_leaf)
+    if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
       return -std::numeric_limits<DataT>::max();
 
     auto label_sum       = hist[nbins - 1].label_sum;
@@ -462,7 +486,7 @@ class InverseGaussianObjectiveFunction : public TweedieObjectiveFunction<DataT_,
     DataT parent_obj = -DataT(len) * DataT(len) / label_sum;
     DataT left_obj   = -DataT(nLeft) * DataT(nLeft) / left_label_sum;
     DataT right_obj  = -DataT(nRight) * DataT(nRight) / right_label_sum;
-    auto gain        = parent_obj - (left_obj + right_obj);
+    DataT gain        = parent_obj - (left_obj + right_obj);
     gain             = gain / (2 * len );
 
     return gain;
@@ -474,10 +498,18 @@ class InverseGaussianObjectiveFunction : public TweedieObjectiveFunction<DataT_,
     Split<DataT, IdxT> sp;
     for (IdxT i = threadIdx.x; i < nbins; i += blockDim.x) {
       auto nLeft = shist[i].count;
-      sp.update({sbins[i], col, this->GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
+      sp.update({sbins[i], col, GainPerSplit(shist, i, nbins, len, nLeft), nLeft});
     }
     return sp;
   }
+  DI IdxT NumClasses() const { return 1; }
+
+  static DI void SetLeafVector(BinT const* shist, int nclasses, DataT* out)
+  {
+    for (int i = 0; i < nclasses; i++) {
+      out[i] = shist[i].label_sum / shist[i].count;
+    }
+  }
 };
 }  // end namespace DT
 }  // end namespace ML
\ No newline at end of file
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index ed95bd3207..92e658c3bc 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -15,7 +15,6 @@
  */
 #include <test_utils.h>
 #include <cuml/common/logger.hpp>
-#include <icecream.hpp>
 
 #include <decisiontree/batched-levelalgo/kernels.cuh>
 #include <decisiontree/batched-levelalgo/quantile.cuh>
@@ -899,13 +898,13 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
 
   auto Entropy(std::vector<DataT> const& data)
   {  // sum((n_c/n_total)*(log(n_c/n_total)))
-    double entropy(0);
+    DataT entropy(0);
     for (auto c = 0; c < params.n_classes; ++c) {
       IdxT sum(0);
       std::for_each(data.begin(), data.end(), [&](auto d) {
         if (d == DataT(c)) ++sum;
       });
-      double class_proba = double(sum) / data.size();
+      DataT class_proba = DataT(sum) / data.size();
       entropy += -class_proba * raft::myLog(class_proba ? class_proba : DataT(1)) / raft::myLog(DataT(2));  // adding gain
     }
     return entropy;
@@ -920,9 +919,9 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     auto parent_entropy = Entropy(data);
     auto left_entropy   = Entropy(left_data);
     auto right_entropy  = Entropy(right_data);
-    double n         = data.size();
-    double left_n    = left_data.size();
-    double right_n   = right_data.size();
+    DataT n         = data.size();
+    DataT left_n    = left_data.size();
+    DataT right_n   = right_data.size();
 
     auto gain = parent_entropy - ((left_n / n) * left_entropy + (right_n / n) * right_entropy);
 
@@ -936,13 +935,13 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
 
   auto GiniImpurity(std::vector<DataT> const& data)
   {  // sum((n_c/n_total)(1-(n_c/n_total)))
-    double gini(0);
+    DataT gini(0);
     for (auto c = 0; c < params.n_classes; ++c) {
       IdxT sum(0);
       std::for_each(data.begin(), data.end(), [&](auto d) {
         if (d == DataT(c)) ++sum;
       });
-      double class_proba = double(sum) / data.size();
+      DataT class_proba = DataT(sum) / data.size();
       gini += class_proba * (1 - class_proba);  // adding gain
     }
     return gini;
@@ -957,9 +956,9 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     auto parent_gini = GiniImpurity(data);
     auto left_gini   = GiniImpurity(left_data);
     auto right_gini  = GiniImpurity(right_data);
-    double n         = data.size();
-    double left_n    = left_data.size();
-    double right_n   = right_data.size();
+    DataT n         = data.size();
+    DataT left_n    = left_data.size();
+    DataT right_n   = right_data.size();
 
     auto gain = parent_gini - ((left_n / n) * left_gini + (right_n / n) * right_gini);
 
@@ -999,7 +998,7 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     {
       return GiniGroundTruthGain(data, split_bin_index);
     }
-    return double(0.0);
+    return DataT(0.0);
   }
 
   auto NumLeftOfBin(std::vector<BinT> const& cdf_hist, IdxT idx)
@@ -1086,18 +1085,36 @@ TEST_P(MSEObjectiveTestD, MSEObjectiveTest) {}
 INSTANTIATE_TEST_CASE_P(RfTests,
                         MSEObjectiveTestD,
                         ::testing::ValuesIn(mse_objective_test_parameters));
+typedef ObjectiveTest<MSEObjectiveFunction<float, float, int>> MSEObjectiveTestF;
+TEST_P(MSEObjectiveTestF, MSEObjectiveTest) {}
+INSTANTIATE_TEST_CASE_P(RfTests,
+                        MSEObjectiveTestF,
+                        ::testing::ValuesIn(mse_objective_test_parameters));
+
 // poisson objective test
 typedef ObjectiveTest<PoissonObjectiveFunction<double, double, int>> PoissonObjectiveTestD;
 TEST_P(PoissonObjectiveTestD, poissonObjectiveTest) {}
 INSTANTIATE_TEST_CASE_P(RfTests,
                         PoissonObjectiveTestD,
                         ::testing::ValuesIn(poisson_objective_test_parameters));
+typedef ObjectiveTest<PoissonObjectiveFunction<float, float, int>> PoissonObjectiveTestF;
+TEST_P(PoissonObjectiveTestF, poissonObjectiveTest) {}
+INSTANTIATE_TEST_CASE_P(RfTests,
+                        PoissonObjectiveTestF,
+                        ::testing::ValuesIn(poisson_objective_test_parameters));
+
 // gamma objective test
 typedef ObjectiveTest<GammaObjectiveFunction<double, double, int>> GammaObjectiveTestD;
 TEST_P(GammaObjectiveTestD, GammaObjectiveTest) {}
 INSTANTIATE_TEST_CASE_P(RfTests,
                         GammaObjectiveTestD,
                         ::testing::ValuesIn(gamma_objective_test_parameters));
+typedef ObjectiveTest<GammaObjectiveFunction<float, float, int>> GammaObjectiveTestF;
+TEST_P(GammaObjectiveTestF, GammaObjectiveTest) {}
+INSTANTIATE_TEST_CASE_P(RfTests,
+                        GammaObjectiveTestF,
+                        ::testing::ValuesIn(gamma_objective_test_parameters));
+
 // InvGauss objective test
 typedef ObjectiveTest<InverseGaussianObjectiveFunction<double, double, int>>
   InverseGaussianObjectiveTestD;
@@ -1105,6 +1122,12 @@ TEST_P(InverseGaussianObjectiveTestD, InverseGaussianObjectiveTest) {}
 INSTANTIATE_TEST_CASE_P(RfTests,
                         InverseGaussianObjectiveTestD,
                         ::testing::ValuesIn(invgauss_objective_test_parameters));
+typedef ObjectiveTest<InverseGaussianObjectiveFunction<float, float, int>>
+  InverseGaussianObjectiveTestF;
+TEST_P(InverseGaussianObjectiveTestF, InverseGaussianObjectiveTest) {}
+INSTANTIATE_TEST_CASE_P(RfTests,
+                        InverseGaussianObjectiveTestF,
+                        ::testing::ValuesIn(invgauss_objective_test_parameters));
 
 // entropy objective test
 typedef ObjectiveTest<EntropyObjectiveFunction<double, int, int>> EntropyObjectiveTestD;
@@ -1112,6 +1135,11 @@ TEST_P(EntropyObjectiveTestD, entropyObjectiveTest) {}
 INSTANTIATE_TEST_CASE_P(RfTests,
                         EntropyObjectiveTestD,
                         ::testing::ValuesIn(entropy_objective_test_parameters));
+typedef ObjectiveTest<EntropyObjectiveFunction<float, int, int>> EntropyObjectiveTestF;
+TEST_P(EntropyObjectiveTestF, entropyObjectiveTest) {}
+INSTANTIATE_TEST_CASE_P(RfTests,
+                        EntropyObjectiveTestF,
+                        ::testing::ValuesIn(entropy_objective_test_parameters));
 
 // gini objective test
 typedef ObjectiveTest<GiniObjectiveFunction<double, int, int>> GiniObjectiveTestD;
@@ -1119,6 +1147,11 @@ TEST_P(GiniObjectiveTestD, giniObjectiveTest) {}
 INSTANTIATE_TEST_CASE_P(RfTests,
                         GiniObjectiveTestD,
                         ::testing::ValuesIn(gini_objective_test_parameters));
+typedef ObjectiveTest<GiniObjectiveFunction<float, int, int>> GiniObjectiveTestF;
+TEST_P(GiniObjectiveTestF, giniObjectiveTest) {}
+INSTANTIATE_TEST_CASE_P(RfTests,
+                        GiniObjectiveTestF,
+                        ::testing::ValuesIn(gini_objective_test_parameters));
 
 }  // end namespace DT
 }  // end namespace ML
diff --git a/python/cuml/test/test_random_forest.py b/python/cuml/test/test_random_forest.py
index 9d1d7bb486..9eb3d7a46a 100644
--- a/python/cuml/test/test_random_forest.py
+++ b/python/cuml/test/test_random_forest.py
@@ -32,7 +32,7 @@
 from sklearn.ensemble import RandomForestClassifier as skrfc
 from sklearn.ensemble import RandomForestRegressor as skrfr
 from sklearn.metrics import accuracy_score, mean_squared_error, \
-    mean_poisson_deviance
+    mean_tweedie_deviance
 from sklearn.datasets import fetch_california_housing, \
     make_classification, make_regression, load_iris, load_breast_cancer, \
     load_boston
@@ -187,21 +187,34 @@ def special_reg(request):
     return X, y
 
 
-@pytest.mark.parametrize("lam", [0.01, 0.1])
 @pytest.mark.parametrize("max_depth", [2, 4])
-def test_poisson_convergence(lam, max_depth):
+@pytest.mark.parametrize("split_criterion",
+                         ["poisson", "gamma", "inverse_gaussian"])
+def test_tweedie_convergence(max_depth, split_criterion):
     np.random.seed(33)
     bootstrap = None
     max_features = 1.0
     n_estimators = 1
     min_impurity_decrease = 1e-5
     n_datapoints = 100000
-    # generating random poisson dataset
+    tweedie = {
+        "poisson":
+            {"power": 1,
+             "gen": np.random.poisson, "args": [0.1]},
+        "gamma":
+            {"power": 2,
+             "gen": np.random.gamma, "args": [1.0]},
+        "inverse_gaussian":
+            {"power": 3,
+             "gen": np.random.wald, "args": [0.1, 1.0]}
+    }
+    # generating random dataset with tweedie distribution
     X = np.random.random((n_datapoints, 4)).astype(np.float32)
-    y = np.random.poisson(lam=lam, size=n_datapoints).astype(np.float32)
+    y = tweedie[split_criterion]["gen"](*tweedie[split_criterion]["args"],
+                                        size=n_datapoints).astype(np.float32)
 
-    poisson_preds = curfr(
-        split_criterion=4,
+    tweedie_preds = curfr(
+        split_criterion=split_criterion,
         max_depth=max_depth,
         n_estimators=n_estimators,
         bootstrap=bootstrap,
@@ -216,12 +229,16 @@ def test_poisson_convergence(lam, max_depth):
         min_impurity_decrease=min_impurity_decrease).fit(X, y).predict(X)
     # y should not be non-positive for mean_poisson_deviance
     mask = mse_preds > 0
-    mse_mpd = mean_poisson_deviance(y[mask], mse_preds[mask])
-    poisson_mpd = mean_poisson_deviance(y, poisson_preds)
-
-    # model trained on poisson data with
-    # poisson criterion must perform better on poisson loss
-    assert mse_mpd >= poisson_mpd
+    mse_tweedie_deviance = mean_tweedie_deviance(y[mask],
+                                                 mse_preds[mask],
+                                                 power=tweedie[split_criterion]["power"])
+    tweedie_tweedie_deviance = mean_tweedie_deviance(y[mask],
+                                                     tweedie_preds[mask],
+                                                     power=tweedie[split_criterion]["power"])
+
+    # model trained on tweedie data with
+    # tweedie criterion must perform better on tweedie loss
+    assert mse_tweedie_deviance >= tweedie_tweedie_deviance
 
 
 @pytest.mark.parametrize(

From 2fa43d7852b5ea7898a32d0139b77ae5db32d77a Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 4 Oct 2021 18:11:36 +0530
Subject: [PATCH 30/42] FIX clang format

---
 .../batched-levelalgo/metrics.cuh             | 16 ++---
 cpp/test/sg/rf_test.cu                        | 67 +++++++++----------
 2 files changed, 40 insertions(+), 43 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index 2962b6385d..10a410dce8 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -242,11 +242,11 @@ class MSEObjectiveFunction {
   using DataT  = DataT_;
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
-  using BinT = AggregateBin;
+  using BinT   = AggregateBin;
   IdxT min_samples_leaf;
 
-  HDI MSEObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf) :
-    min_samples_leaf(min_samples_leaf)
+  HDI MSEObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
+    : min_samples_leaf(min_samples_leaf)
   {
   }
 
@@ -298,7 +298,7 @@ class PoissonObjectiveFunction {
   using DataT  = DataT_;
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
-  using BinT = AggregateBin;
+  using BinT   = AggregateBin;
   IdxT min_samples_leaf;
 
   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
@@ -341,7 +341,7 @@ class PoissonObjectiveFunction {
     DataT parent_obj = -label_sum * raft::myLog(label_sum / len);
     DataT left_obj   = -left_label_sum * raft::myLog(left_label_sum / nLeft);
     DataT right_obj  = -right_label_sum * raft::myLog(right_label_sum / nRight);
-    DataT gain        = parent_obj - (left_obj + right_obj);
+    DataT gain       = parent_obj - (left_obj + right_obj);
     gain             = gain / len;
 
     return gain;
@@ -379,7 +379,7 @@ class GammaObjectiveFunction {
   IdxT min_samples_leaf;
 
   HDI GammaObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
-  : min_samples_leaf{min_samples_leaf}
+    : min_samples_leaf{min_samples_leaf}
   {
   }
 
@@ -486,8 +486,8 @@ class InverseGaussianObjectiveFunction {
     DataT parent_obj = -DataT(len) * DataT(len) / label_sum;
     DataT left_obj   = -DataT(nLeft) * DataT(nLeft) / left_label_sum;
     DataT right_obj  = -DataT(nRight) * DataT(nRight) / right_label_sum;
-    DataT gain        = parent_obj - (left_obj + right_obj);
-    gain             = gain / (2 * len );
+    DataT gain       = parent_obj - (left_obj + right_obj);
+    gain             = gain / (2 * len);
 
     return gain;
   }
diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index 92e658c3bc..dd9e62e2f3 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -450,7 +450,7 @@ class RfTest : public ::testing::TestWithParam<RfTestParams> {
   void SetUp() override
   {
     RfTestParams params = ::testing::TestWithParam<RfTestParams>::GetParam();
-    bool is_regression = params.split_criterion != GINI and params.split_criterion != ENTROPY;
+    bool is_regression  = params.split_criterion != GINI and params.split_criterion != ENTROPY;
     if (params.double_precision) {
       if (is_regression) {
         RfSpecialisedTest<double, double> test(params);
@@ -483,16 +483,15 @@ std::vector<int> min_samples_leaf        = {1, 10, 30};
 std::vector<int> min_samples_split       = {2, 10};
 std::vector<float> min_impurity_decrease = {0.0f, 1.0f, 10.0f};
 std::vector<int> n_streams               = {1, 2, 10};
-std::vector<CRITERION> split_criterion   = {
-  CRITERION::INVERSE_GAUSSIAN,
-  CRITERION::GAMMA,
-  CRITERION::POISSON,
-  CRITERION::MSE,
-  CRITERION::GINI,
-  CRITERION::ENTROPY};
-std::vector<int> seed              = {0, 17};
-std::vector<int> n_labels          = {2, 10, 20};
-std::vector<bool> double_precision = {false, true};
+std::vector<CRITERION> split_criterion   = {CRITERION::INVERSE_GAUSSIAN,
+                                          CRITERION::GAMMA,
+                                          CRITERION::POISSON,
+                                          CRITERION::MSE,
+                                          CRITERION::GINI,
+                                          CRITERION::ENTROPY};
+std::vector<int> seed                    = {0, 17};
+std::vector<int> n_labels                = {2, 10, 20};
+std::vector<bool> double_precision       = {false, true};
 
 int n_tests = 100;
 
@@ -722,7 +721,7 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
         auto data_end   = data_begin + bin_width;
         if constexpr (std::is_same<BinT, CountBin>::value) {  // classification case
           auto count(IdxT(0));
-          std::for_each(data_begin, data_end, [&](auto d){
+          std::for_each(data_begin, data_end, [&](auto d) {
             if (d == c) ++count;
           });
           pdf_hist.emplace_back(count);
@@ -741,10 +740,7 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     return std::make_pair(cdf_hist, pdf_hist);
   }
 
-
-  auto MSE(
-    std::vector<DataT> const&
-      data)  //  1/n * 1/2 * sum((y - y_pred) * (y - y_pred))
+  auto MSE(std::vector<DataT> const& data)  //  1/n * 1/2 * sum((y - y_pred) * (y - y_pred))
   {
     DataT sum        = std::accumulate(data.begin(), data.end(), DataT(0));
     DataT const mean = sum / data.size();
@@ -768,9 +764,9 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     auto [left_mse, label_sum_left, n_left]    = MSE(left_data);
     auto [right_mse, label_sum_right, n_right] = MSE(right_data);
 
-    auto gain = parent_mse -
-                ((n_left / n) * left_mse +   // the minimizing objective function is half deviance
-                 (n_right / n) * right_mse);  // gain in long form without proxy
+    auto gain =
+      parent_mse - ((n_left / n) * left_mse +  // the minimizing objective function is half deviance
+                    (n_right / n) * right_mse);  // gain in long form without proxy
 
     // edge cases
     if (n_left < params.min_samples_leaf or n_right < params.min_samples_leaf)
@@ -905,7 +901,8 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
         if (d == DataT(c)) ++sum;
       });
       DataT class_proba = DataT(sum) / data.size();
-      entropy += -class_proba * raft::myLog(class_proba ? class_proba : DataT(1)) / raft::myLog(DataT(2));  // adding gain
+      entropy += -class_proba * raft::myLog(class_proba ? class_proba : DataT(1)) /
+                 raft::myLog(DataT(2));  // adding gain
     }
     return entropy;
   }
@@ -919,9 +916,9 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     auto parent_entropy = Entropy(data);
     auto left_entropy   = Entropy(left_data);
     auto right_entropy  = Entropy(right_data);
-    DataT n         = data.size();
-    DataT left_n    = left_data.size();
-    DataT right_n   = right_data.size();
+    DataT n             = data.size();
+    DataT left_n        = left_data.size();
+    DataT right_n       = right_data.size();
 
     auto gain = parent_entropy - ((left_n / n) * left_entropy + (right_n / n) * right_entropy);
 
@@ -956,9 +953,9 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     auto parent_gini = GiniImpurity(data);
     auto left_gini   = GiniImpurity(left_data);
     auto right_gini  = GiniImpurity(right_data);
-    DataT n         = data.size();
-    DataT left_n    = left_data.size();
-    DataT right_n   = right_data.size();
+    DataT n          = data.size();
+    DataT left_n     = left_data.size();
+    DataT right_n    = right_data.size();
 
     auto gain = parent_gini - ((left_n / n) * left_gini + (right_n / n) * right_gini);
 
@@ -972,25 +969,25 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
 
   auto GroundTruthGain(std::vector<DataT> const& data, std::size_t const split_bin_index)
   {
-    if constexpr (std::is_same<ObjectiveT,
-                               MSEObjectiveFunction<DataT, LabelT, IdxT>>::value)  // mean squared error
+    if constexpr (std::is_same<ObjectiveT, MSEObjectiveFunction<DataT, LabelT, IdxT>>::
+                    value)  // mean squared error
     {
       return MSEGroundTruthGain(data, split_bin_index);
-    } else if constexpr (std::is_same<ObjectiveT,
-                               PoissonObjectiveFunction<DataT, LabelT, IdxT>>::value)  // poisson
+    } else if constexpr (std::is_same<ObjectiveT, PoissonObjectiveFunction<DataT, LabelT, IdxT>>::
+                           value)  // poisson
     {
       return PoissonGroundTruthGain(data, split_bin_index);
     } else if constexpr (std::is_same<ObjectiveT,
                                       GammaObjectiveFunction<DataT, LabelT, IdxT>>::value)  // gamma
     {
       return GammaGroundTruthGain(data, split_bin_index);
-    } else if constexpr (std::is_same<
-                           ObjectiveT,
-                           InverseGaussianObjectiveFunction<DataT, LabelT, IdxT>>::value)  // inverse gaussian
+    } else if constexpr (std::is_same<ObjectiveT,
+                                      InverseGaussianObjectiveFunction<DataT, LabelT, IdxT>>::
+                           value)  // inverse gaussian
     {
       return InverseGaussianGroundTruthGain(data, split_bin_index);
-    } else if constexpr (std::is_same<ObjectiveT,
-                                      EntropyObjectiveFunction<DataT, LabelT, IdxT>>::value)  // entropy
+    } else if constexpr (std::is_same<ObjectiveT, EntropyObjectiveFunction<DataT, LabelT, IdxT>>::
+                           value)  // entropy
     {
       return EntropyGroundTruthGain(data, split_bin_index);
     } else if constexpr (std::is_same<ObjectiveT,

From 87395ff59ab293ebfc567cc57c62f666993e30dd Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Tue, 5 Oct 2021 14:45:37 +0530
Subject: [PATCH 31/42] reduce division operations

---
 .../batched-levelalgo/metrics.cuh             | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index 10a410dce8..0eca52dfcb 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -102,7 +102,7 @@ class GiniObjectiveFunction {
   {
     IdxT nRight         = len - nLeft;
     constexpr DataT One = DataT(1.0);
-    auto invlen         = One / len;
+    auto invLen         = One / len;
     auto invLeft        = One / nLeft;
     auto invRight       = One / nRight;
     auto gain           = DataT(0.0);
@@ -115,16 +115,16 @@ class GiniObjectiveFunction {
       int val_i   = 0;
       auto lval_i = hist[nbins * j + i].x;
       auto lval   = DataT(lval_i);
-      gain += lval * invLeft * lval * invlen;
+      gain += lval * invLeft * lval * invLen;
 
       val_i += lval_i;
       auto total_sum = hist[nbins * j + nbins - 1].x;
       auto rval_i    = total_sum - lval_i;
       auto rval      = DataT(rval_i);
-      gain += rval * invRight * rval * invlen;
+      gain += rval * invRight * rval * invLen;
 
       val_i += rval_i;
-      auto val = DataT(val_i) * invlen;
+      auto val = DataT(val_i) * invLen;
       gain -= val * val;
     }
 
@@ -254,7 +254,7 @@ class MSEObjectiveFunction {
   {
     auto gain{DataT(0)};
     IdxT nRight{len - nLeft};
-    auto invLen{DataT(1.0) / len};
+    auto invLen = DataT(1.0) / len;
     // if there aren't enough samples in this split, don't bother!
     if (nLeft < min_samples_leaf || nRight < min_samples_leaf) {
       return -std::numeric_limits<DataT>::max();
@@ -304,7 +304,6 @@ class PoissonObjectiveFunction {
   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
 
   HDI PoissonObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
-    // : TweedieObjectiveFunction<DataT, LabelT, IdxT>{min_samples_leaf}
     : min_samples_leaf(min_samples_leaf)
   {
   }
@@ -324,6 +323,7 @@ class PoissonObjectiveFunction {
   {
     // get the lens'
     IdxT nRight = len - nLeft;
+    auto invLen = DataT(1) / len;
 
     // if there aren't enough samples in this split, don't bother!
     if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
@@ -338,11 +338,11 @@ class PoissonObjectiveFunction {
       return -std::numeric_limits<DataT>::max();
 
     // compute the gain to be
-    DataT parent_obj = -label_sum * raft::myLog(label_sum / len);
+    DataT parent_obj = -label_sum * raft::myLog(label_sum * invLen);
     DataT left_obj   = -left_label_sum * raft::myLog(left_label_sum / nLeft);
     DataT right_obj  = -right_label_sum * raft::myLog(right_label_sum / nRight);
     DataT gain       = parent_obj - (left_obj + right_obj);
-    gain             = gain / len;
+    gain             = gain * invLen;
 
     return gain;
   }
@@ -397,6 +397,8 @@ class GammaObjectiveFunction {
   HDI DataT GainPerSplit(BinT const* hist, IdxT i, IdxT nbins, IdxT len, IdxT nLeft) const
   {
     IdxT nRight = len - nLeft;
+    auto invLen = DataT(1) / len;
+
     // if there aren't enough samples in this split, don't bother!
     if (nLeft < min_samples_leaf || nRight < min_samples_leaf)
       return -std::numeric_limits<DataT>::max();
@@ -410,11 +412,11 @@ class GammaObjectiveFunction {
       return -std::numeric_limits<DataT>::max();
 
     // compute the gain to be
-    DataT parent_obj = len * raft::myLog(label_sum / len);
+    DataT parent_obj = len * raft::myLog(label_sum * invLen);
     DataT left_obj   = nLeft * raft::myLog(left_label_sum / nLeft);
     DataT right_obj  = nRight * raft::myLog(right_label_sum / nRight);
     DataT gain       = parent_obj - (left_obj + right_obj);
-    gain             = gain / DataT(len);
+    gain             = gain * invLen;
 
     return gain;
   }

From 846462846d23fc3f85d9d7a9056744dbc65b59dd Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Tue, 5 Oct 2021 18:16:26 +0530
Subject: [PATCH 32/42] flake fix and change criterion_dict

---
 python/cuml/dask/ensemble/randomforestclassifier.py | 3 ++-
 python/cuml/ensemble/randomforest_common.pyx        | 2 +-
 python/cuml/test/test_random_forest.py              | 7 +++++--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py
index c867f63841..39596a2823 100755
--- a/python/cuml/dask/ensemble/randomforestclassifier.py
+++ b/python/cuml/dask/ensemble/randomforestclassifier.py
@@ -81,7 +81,8 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
         4 or 'poisson' for POISSON,
         5 or 'gamma' for GAMMA,
         6 or 'inverse_gaussian' for INVERSE_GAUSSIAN,
-        2, 'mse', 4, 'poisson', 5, 'gamma', 6, 'inverse_gaussian' not valid for classification
+        2, 'mse', 4, 'poisson', 5, 'gamma', 6, 'inverse_gaussian' not valid
+        for classification
     bootstrap : boolean (default = True)
         Control bootstrapping.
         If set, each tree in the forest is built
diff --git a/python/cuml/ensemble/randomforest_common.pyx b/python/cuml/ensemble/randomforest_common.pyx
index ad43dc07b6..7e7a6b1dc8 100644
--- a/python/cuml/ensemble/randomforest_common.pyx
+++ b/python/cuml/ensemble/randomforest_common.pyx
@@ -61,7 +61,7 @@ class BaseRandomForestModel(Base):
                       '5': GAMMA, 'gamma': GAMMA,
                       '6': INVERSE_GAUSSIAN,
                       'inverse_gaussian': INVERSE_GAUSSIAN,
-                      '6': CRITERION_END}
+                      '7': CRITERION_END}
 
     classes_ = CumlArrayDescriptor()
 
diff --git a/python/cuml/test/test_random_forest.py b/python/cuml/test/test_random_forest.py
index 9eb3d7a46a..ceb0407fef 100644
--- a/python/cuml/test/test_random_forest.py
+++ b/python/cuml/test/test_random_forest.py
@@ -231,10 +231,13 @@ def test_tweedie_convergence(max_depth, split_criterion):
     mask = mse_preds > 0
     mse_tweedie_deviance = mean_tweedie_deviance(y[mask],
                                                  mse_preds[mask],
-                                                 power=tweedie[split_criterion]["power"])
+                                                 power=tweedie
+                                                 [split_criterion]["power"])
     tweedie_tweedie_deviance = mean_tweedie_deviance(y[mask],
                                                      tweedie_preds[mask],
-                                                     power=tweedie[split_criterion]["power"])
+                                                     power=tweedie
+                                                     [split_criterion]["power"]
+                                                     )
 
     # model trained on tweedie data with
     # tweedie criterion must perform better on tweedie loss

From d764562d14ba15b8d2085dc618fbe17a5ff47990 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Tue, 5 Oct 2021 18:30:06 +0530
Subject: [PATCH 33/42] make objective data members private

---
 .../decisiontree/batched-levelalgo/metrics.cuh   | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
index 0eca52dfcb..28c3c6c553 100644
--- a/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
+++ b/cpp/src/decisiontree/batched-levelalgo/metrics.cuh
@@ -86,6 +86,8 @@ class GiniObjectiveFunction {
   using DataT  = DataT_;
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
+
+ private:
   IdxT nclasses;
   IdxT min_samples_leaf;
 
@@ -162,6 +164,8 @@ class EntropyObjectiveFunction {
   using DataT  = DataT_;
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
+
+ private:
   IdxT nclasses;
   IdxT min_samples_leaf;
 
@@ -243,8 +247,11 @@ class MSEObjectiveFunction {
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
   using BinT   = AggregateBin;
+
+ private:
   IdxT min_samples_leaf;
 
+ public:
   HDI MSEObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
     : min_samples_leaf(min_samples_leaf)
   {
@@ -299,8 +306,11 @@ class PoissonObjectiveFunction {
   using LabelT = LabelT_;
   using IdxT   = IdxT_;
   using BinT   = AggregateBin;
+
+ private:
   IdxT min_samples_leaf;
 
+ public:
   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
 
   HDI PoissonObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
@@ -376,8 +386,11 @@ class GammaObjectiveFunction {
   using IdxT                 = IdxT_;
   using BinT                 = AggregateBin;
   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
+
+ private:
   IdxT min_samples_leaf;
 
+ public:
   HDI GammaObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
     : min_samples_leaf{min_samples_leaf}
   {
@@ -449,8 +462,11 @@ class InverseGaussianObjectiveFunction {
   using IdxT                 = IdxT_;
   using BinT                 = AggregateBin;
   static constexpr auto eps_ = 10 * std::numeric_limits<DataT>::epsilon();
+
+ private:
   IdxT min_samples_leaf;
 
+ public:
   HDI InverseGaussianObjectiveFunction(IdxT nclasses, IdxT min_samples_leaf)
     : min_samples_leaf{min_samples_leaf}
   {

From 68ecabbb237801712215ee5aa1790d62e998caa0 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Wed, 6 Oct 2021 19:40:51 +0530
Subject: [PATCH 34/42] refactor declaration

---
 cpp/test/sg/rf_test.cu | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu
index dd9e62e2f3..59aa2c29d3 100644
--- a/cpp/test/sg/rf_test.cu
+++ b/cpp/test/sg/rf_test.cu
@@ -699,7 +699,7 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
     } else {
       std::normal_distribution<DataT> normal(1.0, 2.0);
       for (auto& d : data) {
-        auto rand_element(DataT(0));
+        auto rand_element{DataT(0)};
         while (1) {
           rand_element = normal(rng);
           if (rand_element > 0) break;  // only positive random numbers
@@ -720,13 +720,13 @@ class ObjectiveTest : public ::testing::TestWithParam<ObjectiveTestParameters> {
         auto data_begin = data.begin() + b * bin_width;
         auto data_end   = data_begin + bin_width;
         if constexpr (std::is_same<BinT, CountBin>::value) {  // classification case
-          auto count(IdxT(0));
+          auto count{IdxT(0)};
           std::for_each(data_begin, data_end, [&](auto d) {
             if (d == c) ++count;
           });
           pdf_hist.emplace_back(count);
         } else {  // regression case
-          auto label_sum(DataT(0));
+          auto label_sum{DataT(0)};
           label_sum = std::accumulate(data_begin, data_end, DataT(0));
           pdf_hist.emplace_back(label_sum, bin_width);
         }

From b1be698ed17f7c2152db1f148aaad46f120ee611 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Wed, 6 Oct 2021 21:13:51 +0530
Subject: [PATCH 35/42] fix improper merge

---
 cpp/src/decisiontree/decisiontree.cuh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/src/decisiontree/decisiontree.cuh b/cpp/src/decisiontree/decisiontree.cuh
index b4d114f372..c06d587539 100644
--- a/cpp/src/decisiontree/decisiontree.cuh
+++ b/cpp/src/decisiontree/decisiontree.cuh
@@ -310,6 +310,7 @@ class DecisionTree {
         .train();
     } else if (params.split_criterion == CRITERION::GAMMA) {
       return Builder<GammaObjectiveFunction<DataT, LabelT, IdxT>>(handle,
+                                                                  s,
                                                                   treeid,
                                                                   seed,
                                                                   params,
@@ -323,6 +324,7 @@ class DecisionTree {
         .train();
     } else if (params.split_criterion == CRITERION::INVERSE_GAUSSIAN) {
       return Builder<InverseGaussianObjectiveFunction<DataT, LabelT, IdxT>>(handle,
+                                                                            s,
                                                                             treeid,
                                                                             seed,
                                                                             params,

From d1e369d38786ed5671aa44d099c482f1c23e0eaa Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 11 Oct 2021 19:48:18 +0530
Subject: [PATCH 36/42] refactor new changes to docs

---
 docs/source/checkpoint.tl                     | Bin 0 -> 8920 bytes
 docs/source/conf.py                           |   1 +
 docs/source/kmeans_model.pkl                  |   1 +
 .../dask/ensemble/randomforestclassifier.py   |  18 +++++++++---------
 .../dask/ensemble/randomforestregressor.py    |  18 +++++++++---------
 .../cuml/ensemble/randomforestclassifier.pyx  |  18 +++++++++---------
 .../cuml/ensemble/randomforestregressor.pyx   |  18 +++++++++---------
 7 files changed, 38 insertions(+), 36 deletions(-)
 create mode 100644 docs/source/checkpoint.tl
 create mode 100644 docs/source/kmeans_model.pkl

diff --git a/docs/source/checkpoint.tl b/docs/source/checkpoint.tl
new file mode 100644
index 0000000000000000000000000000000000000000..aaf4e9af43e595620a032cc9a92bbf2609669ed1
GIT binary patch
literal 8920
zcmeHMziSjh6rP+>V~jrtidqO3+F00J6)?MNBUo7-*ocU_pcw0Dn#DqlMeKwN5d<;C
zr4a<Rut;TfsU?Ua2>t{90s6k1{hZm!t+RV(OyDxGc{8(b-psuBee>ov4W-nGJSdIE
zxUQ&Cw0hmUq=ska`s}sa_io;r%{7KkWHI)<FMm*ngBtRPf3n5)iQaqkyJZ9pl=3St
zy5;|iVZBK7?6;G$5wyQs6!vJPY^}TG)!!?+WAexzvNT=x>epTWcGEv3d(}n9k}Z*l
zT`zv|!@2nM`={seZv4~BXzs{4E3R3L_}3|U=oyzDelUI_H{#l(e%Ty(d^gxJd($Jy
z<T5*h_G2E-Mf^)wrqt_1WZy!363;R&Yc`unUuEf}KJg5E*1@xwsH6|y!%wm<S<f3Q
z&eCZWu4ljWA#PazV*7q#sT8WzKOnsa<-z)udXY%AZ=wEn<IAwE=lF)`@SJcZ71^)U
zO6N|z(&<b~s0%ofzG$U$*6Pb*@`qLil{_naXToGY!O>j)>->Yx>-zWbpc~)hao{x`
z6rO3|Y_s{(ikp7VzvL{>H0hTd`ZnQvS5lv68qOT0R=xH^bC7ocS+AU#9};)nY;*ng
znVF_PwtW4e{X_1Ed#t_Mb$H0MpFF=3ug{;1wBJ@mo+^wZ&gj=NNshVieO&k1r(N1{
z?ejju`w7nq;|D#0@7?j6-Nt&jCUO(p33=a!{k3k_id%74<N~=|_kJ=g_W@$Z=7;vA
z;fvjtJQ=lGEz9pa_wB8~Kk%8R&yphQP0Pi<yveHEZ<%Xwe@@ejiLqF$(SGQ&J+%FD
z{A0i50Hso|E<cEl-Mz4be+v$qaW~U=dz4sa3svH+?s*;>Kjcf*e1x+Czh{RfznD9T
z;gI1KLu}sJ`Q_cr=jhC($pY7WcPCop9g1yA-l=$B;T?(RfX_bd(hluXmf1Ca(3+f=
z<3h=Pxe3^-^1G=nci@{*d8%@uE|<wQtbeiX|JGV}{o75Sn5@c|e%~>O$Ex^p@y+qX
zE8Db3eYRamo^TxPn7!!{p}vLo%T2&%mER`V8}JOg)%mTV?XO$sR?#cWWufs#z96@*
zbtjVIDol&(L5%pVb?3W*_7A=bf>*j_^3>GA_vxvH8&-_7*q1x#@iNZ%xZ<-<yR^gk
zD9boFOt}eMuKT;G?*BEE`j*h=t|Zq&b3e3wbAB66n7y)(QmOgJW6{?yR||Y09?iJF
z*IayaJaY=$v_gHhT}hsB9POCB=@FrF6XxaI|LeP%&Sgu9>kGDtPu|U>U-<ITtIu4c
z`L*oT4~@^z_6<f1#^FQc1mp(P#dUx1-Y;J;_FC8fmv}D2t!t0^WtPkAn7!$dq4573
Dpo)qv

literal 0
HcmV?d00001

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 8f5a6f7bbd..2ef7d3b800 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -209,3 +209,4 @@ def setup(app):
 # backticks`) to be a python object. See
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-default_role
 default_role = "py:obj"
+nbsphinx_allow_errors=True
diff --git a/docs/source/kmeans_model.pkl b/docs/source/kmeans_model.pkl
new file mode 100644
index 0000000000..9b6ff7ac68
--- /dev/null
+++ b/docs/source/kmeans_model.pkl
@@ -0,0 +1 @@
+�N.
\ No newline at end of file
diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py
index 8abcb6ba34..42821c5ee9 100755
--- a/python/cuml/dask/ensemble/randomforestclassifier.py
+++ b/python/cuml/dask/ensemble/randomforestclassifier.py
@@ -74,15 +74,15 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
         run different models concurrently in different streams by creating
         handles in several streams.
         If it is None, a new one is created.
-    split_criterion : int or string (default = 0 ('gini'))
-        The criterion used to split nodes.
-         * 0 or 'gini' for GINI
-         * 1 or 'entropy' for ENTROPY
-         * 2 or 'mse' for MSE
-         * 4 or 'poisson' for POISSON
-         * 5 or 'gamma' for GAMMA
-         * 6 or 'inverse_gaussian' for INVERSE_GAUSSIAN
-        2, 'mse', 4, 'poisson', 5, 'gamma', 6, 'inverse_gaussian' not valid
+    split_criterion : int or string (default = ``0`` (``'gini'``))
+        The criterion used to split nodes.\n
+         * ``0`` or ``'gini'`` for gini impurity
+         * ``1`` or ``'entropy'`` for information gain (entropy)
+         * ``2`` or ``'mse'`` for mean squared error
+         * ``4`` or ``'poisson'`` for poisson half deviance
+         * ``5`` or ``'gamma'`` for gamma half deviance
+         * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance
+        ``2``, ``'mse'``, ``4``, ``'poisson'``, ``5``, ``'gamma'``, ``6``, ``'inverse_gaussian'`` not valid
         for classification
     bootstrap : boolean (default = True)
         Control bootstrapping.\n
diff --git a/python/cuml/dask/ensemble/randomforestregressor.py b/python/cuml/dask/ensemble/randomforestregressor.py
index 97faaaec3d..4b9f3f3590 100755
--- a/python/cuml/dask/ensemble/randomforestregressor.py
+++ b/python/cuml/dask/ensemble/randomforestregressor.py
@@ -67,15 +67,15 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
         run different models concurrently in different streams by creating
         handles in several streams.
         If it is None, a new one is created.
-    split_criterion : int or string (default = 2 ('mse'))
-        The criterion used to split nodes.
-         * 0 or 'gini' for GINI
-         * 1 or 'entropy' for ENTROPY
-         * 2 or 'mse' for MSE
-         * 4 or 'poisson' for POISSON
-         * 5 or 'gamma' for GAMMA
-         * 6 or 'inverse_gaussian' for INVERSE_GAUSSIAN
-        0, 'gini', 1, 'entropy' not valid for regression
+    split_criterion : int or string (default = ``2`` (``'mse'``))
+        The criterion used to split nodes.\n
+         * ``0`` or ``'gini'`` for gini impurity
+         * ``1`` or ``'entropy'`` for information gain (entropy)
+         * ``2`` or ``'mse'`` for mean squared error
+         * ``4`` or ``'poisson'`` for poisson half deviance
+         * ``5`` or ``'gamma'`` for gamma half deviance
+         * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance
+        ``0``, ``'gini'``, ``1``, ``'entropy'`` not valid for regression
     bootstrap : boolean (default = True)
         Control bootstrapping.\n
          * If ``True``, each tree in the forest is built on a bootstrapped
diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index 6093987108..bb0ed414ec 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -151,15 +151,15 @@ class RandomForestClassifier(BaseRandomForestModel,
     -----------
     n_estimators : int (default = 100)
         Number of trees in the forest. (Default changed to 100 in cuML 0.11)
-    split_criterion : int or string (default = 0 ('gini'))
-        The criterion used to split nodes.
-            * 0 or 'gini' for GINI
-            * 1 or 'entropy' for ENTROPY,
-            * 2 or 'mse' for MSE
-            * 4 or 'poisson' for POISSON
-            * 5 or 'gamma' for GAMMA
-            * 6 or 'inverse_gaussian' for INVERSE_GAUSSIAN
-        only 0/'gini' and 1/'entropy' valid for classification
+    split_criterion : int or string (default = ``0`` (``'gini'``))
+        The criterion used to split nodes.\n
+         * ``0`` or ``'gini'`` for gini impurity
+         * ``1`` or ``'entropy'`` for information gain (entropy)
+         * ``2`` or ``'mse'`` for mean squared error
+         * ``4`` or ``'poisson'`` for poisson half deviance
+         * ``5`` or ``'gamma'`` for gamma half deviance
+         * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance
+        only ``0``/``'gini'`` and ``1``/``'entropy'`` valid for classification
     bootstrap : boolean (default = True)
         Control bootstrapping.\n
             * If ``True``, eachtree in the forest is built on a bootstrapped
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index 15176c70b1..23743711f2 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -151,15 +151,15 @@ class RandomForestRegressor(BaseRandomForestModel,
     -----------
     n_estimators : int (default = 100)
         Number of trees in the forest. (Default changed to 100 in cuML 0.11)
-    split_criterion : int or string (default = 2 ('mse'))
-        The criterion used to split nodes.
-            * 0 or 'gini' for GINI
-            * 1 or 'entropy' for ENTROPY
-            * 2 or 'mse' for MSE
-            * 4 or 'poisson' for POISSON
-            * 5 or 'gamma' for GAMMA
-            * 6 or 'inverse_gaussian' for INVERSE_GAUSSIAN,
-        0, 'gini', 1 and 'entropy' not valid for regression.
+    split_criterion : int or string (default = ``2`` (``'mse'``))
+        The criterion used to split nodes.\n
+         * ``0`` or ``'gini'`` for gini impurity
+         * ``1`` or ``'entropy'`` for information gain (entropy)
+         * ``2`` or ``'mse'`` for mean squared error
+         * ``4`` or ``'poisson'`` for poisson half deviance
+         * ``5`` or ``'gamma'`` for gamma half deviance
+         * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance
+        ``0``, ``'gini'``, ``1`` and ``'entropy'`` not valid for regression.
     bootstrap : boolean (default = True)
         Control bootstrapping.\n
             * If ``True``, eachtree in the forest is built

From 16dfafb0ee88b6edb1cddccec8fb55404d7a4793 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Mon, 11 Oct 2021 20:03:31 +0530
Subject: [PATCH 37/42] prune artifacts

---
 docs/source/checkpoint.tl    | Bin 8920 -> 0 bytes
 docs/source/conf.py          |   1 -
 docs/source/kmeans_model.pkl |   1 -
 3 files changed, 2 deletions(-)
 delete mode 100644 docs/source/checkpoint.tl
 delete mode 100644 docs/source/kmeans_model.pkl

diff --git a/docs/source/checkpoint.tl b/docs/source/checkpoint.tl
deleted file mode 100644
index aaf4e9af43e595620a032cc9a92bbf2609669ed1..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8920
zcmeHMziSjh6rP+>V~jrtidqO3+F00J6)?MNBUo7-*ocU_pcw0Dn#DqlMeKwN5d<;C
zr4a<Rut;TfsU?Ua2>t{90s6k1{hZm!t+RV(OyDxGc{8(b-psuBee>ov4W-nGJSdIE
zxUQ&Cw0hmUq=ska`s}sa_io;r%{7KkWHI)<FMm*ngBtRPf3n5)iQaqkyJZ9pl=3St
zy5;|iVZBK7?6;G$5wyQs6!vJPY^}TG)!!?+WAexzvNT=x>epTWcGEv3d(}n9k}Z*l
zT`zv|!@2nM`={seZv4~BXzs{4E3R3L_}3|U=oyzDelUI_H{#l(e%Ty(d^gxJd($Jy
z<T5*h_G2E-Mf^)wrqt_1WZy!363;R&Yc`unUuEf}KJg5E*1@xwsH6|y!%wm<S<f3Q
z&eCZWu4ljWA#PazV*7q#sT8WzKOnsa<-z)udXY%AZ=wEn<IAwE=lF)`@SJcZ71^)U
zO6N|z(&<b~s0%ofzG$U$*6Pb*@`qLil{_naXToGY!O>j)>->Yx>-zWbpc~)hao{x`
z6rO3|Y_s{(ikp7VzvL{>H0hTd`ZnQvS5lv68qOT0R=xH^bC7ocS+AU#9};)nY;*ng
znVF_PwtW4e{X_1Ed#t_Mb$H0MpFF=3ug{;1wBJ@mo+^wZ&gj=NNshVieO&k1r(N1{
z?ejju`w7nq;|D#0@7?j6-Nt&jCUO(p33=a!{k3k_id%74<N~=|_kJ=g_W@$Z=7;vA
z;fvjtJQ=lGEz9pa_wB8~Kk%8R&yphQP0Pi<yveHEZ<%Xwe@@ejiLqF$(SGQ&J+%FD
z{A0i50Hso|E<cEl-Mz4be+v$qaW~U=dz4sa3svH+?s*;>Kjcf*e1x+Czh{RfznD9T
z;gI1KLu}sJ`Q_cr=jhC($pY7WcPCop9g1yA-l=$B;T?(RfX_bd(hluXmf1Ca(3+f=
z<3h=Pxe3^-^1G=nci@{*d8%@uE|<wQtbeiX|JGV}{o75Sn5@c|e%~>O$Ex^p@y+qX
zE8Db3eYRamo^TxPn7!!{p}vLo%T2&%mER`V8}JOg)%mTV?XO$sR?#cWWufs#z96@*
zbtjVIDol&(L5%pVb?3W*_7A=bf>*j_^3>GA_vxvH8&-_7*q1x#@iNZ%xZ<-<yR^gk
zD9boFOt}eMuKT;G?*BEE`j*h=t|Zq&b3e3wbAB66n7y)(QmOgJW6{?yR||Y09?iJF
z*IayaJaY=$v_gHhT}hsB9POCB=@FrF6XxaI|LeP%&Sgu9>kGDtPu|U>U-<ITtIu4c
z`L*oT4~@^z_6<f1#^FQc1mp(P#dUx1-Y;J;_FC8fmv}D2t!t0^WtPkAn7!$dq4573
Dpo)qv

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 2ef7d3b800..8f5a6f7bbd 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -209,4 +209,3 @@ def setup(app):
 # backticks`) to be a python object. See
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-default_role
 default_role = "py:obj"
-nbsphinx_allow_errors=True
diff --git a/docs/source/kmeans_model.pkl b/docs/source/kmeans_model.pkl
deleted file mode 100644
index 9b6ff7ac68..0000000000
--- a/docs/source/kmeans_model.pkl
+++ /dev/null
@@ -1 +0,0 @@
-�N.
\ No newline at end of file

From 7872e237683588515a397906e1d5b86bdf5ff281 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Tue, 19 Oct 2021 14:35:51 +0530
Subject: [PATCH 38/42] flake fix

---
 python/cuml/dask/ensemble/randomforestclassifier.py | 4 ++--
 python/cuml/ensemble/randomforestclassifier.pyx     | 2 +-
 python/cuml/ensemble/randomforestregressor.pyx      | 2 +-
 source                                              | 0
 4 files changed, 4 insertions(+), 4 deletions(-)
 create mode 100644 source

diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py
index 42821c5ee9..5a10820dfd 100755
--- a/python/cuml/dask/ensemble/randomforestclassifier.py
+++ b/python/cuml/dask/ensemble/randomforestclassifier.py
@@ -82,8 +82,8 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
          * ``4`` or ``'poisson'`` for poisson half deviance
          * ``5`` or ``'gamma'`` for gamma half deviance
          * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance
-        ``2``, ``'mse'``, ``4``, ``'poisson'``, ``5``, ``'gamma'``, ``6``, ``'inverse_gaussian'`` not valid
-        for classification
+        ``2``, ``'mse'``, ``4``, ``'poisson'``, ``5``, ``'gamma'``, ``6``,
+        ``'inverse_gaussian'`` not valid for classification
     bootstrap : boolean (default = True)
         Control bootstrapping.\n
          * If ``True``, each tree in the forest is built on a bootstrapped
diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index bb0ed414ec..6dc60a6646 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -222,7 +222,7 @@ class RandomForestClassifier(BaseRandomForestModel,
     verbose : int or boolean, default=False
         Sets logging level. It must be one of ``cuml.common.logger.level_*``.
         See :ref:`verbosity-levels` for more info.
-    output_type : ``{'input', 'cudf', 'cupy', 'numpy','numba'}`` (default=None)
+    output_type : ``{'input','cudf','cupy','numpy','numba'}`` (default=None)
         Variable to control output type of the results and attributes of
         the estimator. If None, it'll inherit the output type set at the
         module level, ``cuml.global_settings.output_type``.
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index 23743711f2..8cb589c137 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -230,7 +230,7 @@ class RandomForestRegressor(BaseRandomForestModel,
     verbose : int or boolean, default=False
         Sets logging level. It must be one of ``cuml.common.logger.level_*``.
         See :ref:`verbosity-levels` for more info.
-    output_type : ``{'input', 'cudf', 'cupy', 'numpy', 'numba'}`` (default=None)
+    output_type : ``{'input','cudf','cupy','numpy','numba'}`` (default=None)
         Variable to control output type of the results and attributes of
         the estimator. If None, it'll inherit the output type set at the
         module level, ``cuml.global_settings.output_type``.
diff --git a/source b/source
new file mode 100644
index 0000000000..e69de29bb2

From 9ed70721643858b975df07ecc8fa74943c1d17f4 Mon Sep 17 00:00:00 2001
From: Venkat <gvenkatarama@nvidia.com>
Date: Tue, 19 Oct 2021 14:37:27 +0530
Subject: [PATCH 39/42] Delete artifact

---
 source | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 source

diff --git a/source b/source
deleted file mode 100644
index e69de29bb2..0000000000

From 4ec326e5707d87787c02776a7c3b285104dcd212 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Tue, 19 Oct 2021 16:23:18 +0530
Subject: [PATCH 40/42] undo extra backtick causing test-fail

---
 python/cuml/ensemble/randomforestclassifier.pyx | 2 +-
 python/cuml/ensemble/randomforestregressor.pyx  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index 6dc60a6646..82c9eeaadb 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -220,7 +220,7 @@ class RandomForestClassifier(BaseRandomForestModel,
         handles in several streams.
         If it is None, a new one is created.
     verbose : int or boolean, default=False
-        Sets logging level. It must be one of ``cuml.common.logger.level_*``.
+        Sets logging level. It must be one of `cuml.common.logger.level_*`.
         See :ref:`verbosity-levels` for more info.
     output_type : ``{'input','cudf','cupy','numpy','numba'}`` (default=None)
         Variable to control output type of the results and attributes of
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index 8cb589c137..b756a78710 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -228,7 +228,7 @@ class RandomForestRegressor(BaseRandomForestModel,
         handles in several streams.
         If it is None, a new one is created.
     verbose : int or boolean, default=False
-        Sets logging level. It must be one of ``cuml.common.logger.level_*``.
+        Sets logging level. It must be one of `cuml.common.logger.level_*`.
         See :ref:`verbosity-levels` for more info.
     output_type : ``{'input','cudf','cupy','numpy','numba'}`` (default=None)
         Variable to control output type of the results and attributes of

From 9e24756743d2686d6ff436e1dbbe718296f2eba1 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Tue, 19 Oct 2021 18:41:41 +0530
Subject: [PATCH 41/42] undo a cosmetic change due to a pytest dependence

---
 python/cuml/ensemble/randomforestclassifier.pyx | 4 ++--
 python/cuml/ensemble/randomforestregressor.pyx  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index 82c9eeaadb..ae49546a92 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -222,10 +222,10 @@ class RandomForestClassifier(BaseRandomForestModel,
     verbose : int or boolean, default=False
         Sets logging level. It must be one of `cuml.common.logger.level_*`.
         See :ref:`verbosity-levels` for more info.
-    output_type : ``{'input','cudf','cupy','numpy','numba'}`` (default=None)
+    output_type : {'input', 'cudf', 'cupy', 'numpy', 'numba'}, default=None
         Variable to control output type of the results and attributes of
         the estimator. If None, it'll inherit the output type set at the
-        module level, ``cuml.global_settings.output_type``.
+        module level, `cuml.global_settings.output_type`.
         See :ref:`output-data-type-configuration` for more info.
 
     Notes
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index b756a78710..f1df77fc45 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -230,10 +230,10 @@ class RandomForestRegressor(BaseRandomForestModel,
     verbose : int or boolean, default=False
         Sets logging level. It must be one of `cuml.common.logger.level_*`.
         See :ref:`verbosity-levels` for more info.
-    output_type : ``{'input','cudf','cupy','numpy','numba'}`` (default=None)
+    output_type : {'input', 'cudf', 'cupy', 'numpy', 'numba'}, default=None
         Variable to control output type of the results and attributes of
         the estimator. If None, it'll inherit the output type set at the
-        module level, ``cuml.global_settings.output_type``.
+        module level, `cuml.global_settings.output_type`.
         See :ref:`output-data-type-configuration` for more info.
 
     Notes

From 3f8af469f01040a27684c3b479ac2d960aeed924 Mon Sep 17 00:00:00 2001
From: venkywonka <gvenkatarama@nvidia.com>
Date: Wed, 20 Oct 2021 15:31:43 +0530
Subject: [PATCH 42/42] address review comments

---
 python/cuml/dask/ensemble/randomforestclassifier.py |  2 +-
 python/cuml/ensemble/randomforestclassifier.pyx     | 13 -------------
 python/cuml/ensemble/randomforestregressor.pyx      |  5 ++---
 3 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py
index 5a10820dfd..b6de8b7d15 100755
--- a/python/cuml/dask/ensemble/randomforestclassifier.py
+++ b/python/cuml/dask/ensemble/randomforestclassifier.py
@@ -513,7 +513,7 @@ def predict_proba(self, X,
 
         Returns
         -------
-        y : Dask cuDF dataframe or CuPy backed Dask Array (n_rows, 1)
+        y : Dask cuDF dataframe or CuPy backed Dask Array (n_rows, n_classes)
         """
         if self._get_internal_model() is None:
             self._set_internal_model(self._concat_treelite_models())
diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index ae49546a92..a38f0ff772 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -240,19 +240,6 @@ class RandomForestClassifier(BaseRandomForestModel,
         during GPU inference.
       * While training the model for multi class classification problems,
         using deep trees or `max_features=1.0` provides better performance.
-      * Prediction of classes is currently different from how scikit-learn
-        predicts:
-          * scikit-learn predicts random forest classifiers by obtaining class
-            probabilities from each component tree, then averaging these class
-            probabilities over all the ensemble members, and finally resolving
-            to the label with highest probability as prediction.
-          * cuml random forest classifier prediction differs in that, each
-            component tree generates labels instead of class probabilities;
-            with the most frequent label over all the trees (the statistical
-            mode) resolved as prediction.
-        The above differences might cause marginal variations in accuracy in
-        tradeoff to better performance.
-        See: https://github.com/rapidsai/cuml/issues/3764
 
     For additional docs, see `scikitlearn's RandomForestClassifier
     <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html>`_.
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index f1df77fc45..aa45af2543 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -129,11 +129,10 @@ class RandomForestRegressor(BaseRandomForestModel,
     .. code-block:: python
 
         import numpy as np
-        from cuml.test.utils import get_handle
-        from cuml.ensemble import RandomForestRegressor as curfc
+        from cuml.ensemble import RandomForestRegressor as curfr
         X = np.asarray([[0,10],[0,20],[0,30],[0,40]], dtype=np.float32)
         y = np.asarray([0.0,1.0,2.0,3.0], dtype=np.float32)
-        cuml_model = curfc(max_features=1.0, n_bins=128,
+        cuml_model = curfr(max_features=1.0, n_bins=128,
                             min_samples_leaf=1,
                             min_samples_split=2,
                             n_estimators=40, accuracy_metric='r2')