diff --git a/python/cuml/benchmark/algorithms.py b/python/cuml/benchmark/algorithms.py
index ae477e099c..0168de32d9 100644
--- a/python/cuml/benchmark/algorithms.py
+++ b/python/cuml/benchmark/algorithms.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -26,6 +26,8 @@
 import cuml.metrics
 import cuml.decomposition
 import cuml.naive_bayes
+from cuml.dask import neighbors, cluster, manifold, \
+    decomposition, linear_model  # noqa: F401
 from cuml.common.import_utils import has_umap
 import numpy as np
 import tempfile
@@ -37,14 +39,17 @@
 
 from cuml.benchmark.bench_helper_funcs import (
     fit,
-    fit_kneighbors,
-    fit_transform,
+    transform,
     predict,
+    fit_transform,
+    fit_predict,
+    fit_kneighbors,
     _build_cpu_skl_classifier,
     _build_fil_skl_classifier,
     _build_fil_classifier,
     _build_treelite_classifier,
     _treelite_fil_accuracy_score,
+    _build_mnmg_umap
 )
 import treelite
 import treelite_runtime
@@ -122,13 +127,13 @@ def __init__(
     def __str__(self):
         return "AlgoPair:%s" % (self.name)
 
-    def run_cpu(self, data, **override_args):
+    def run_cpu(self, data, bench_args={}, **override_setup_args):
         """Runs the cpu-based algorithm's fit method on specified data"""
         if self.cpu_class is None:
             raise ValueError("No CPU implementation for %s" % self.name)
 
         all_args = {**self.shared_args, **self.cpu_args}
-        all_args = {**all_args, **override_args}
+        all_args = {**all_args, **override_setup_args}
 
         if "cpu_setup_result" not in all_args:
             cpu_obj = self.cpu_class(**all_args)
@@ -137,16 +142,16 @@ def run_cpu(self, data, **override_args):
         if self.cpu_data_prep_hook:
             data = self.cpu_data_prep_hook(data)
         if self.accepts_labels:
-            self.bench_func(cpu_obj, data[0], data[1])
+            self.bench_func(cpu_obj, data[0], data[1], **bench_args)
         else:
-            self.bench_func(cpu_obj, data[0])
+            self.bench_func(cpu_obj, data[0], **bench_args)
 
         return cpu_obj
 
-    def run_cuml(self, data, **override_args):
+    def run_cuml(self, data, bench_args={}, **override_setup_args):
         """Runs the cuml-based algorithm's fit method on specified data"""
         all_args = {**self.shared_args, **self.cuml_args}
-        all_args = {**all_args, **override_args}
+        all_args = {**all_args, **override_setup_args}
 
         if "cuml_setup_result" not in all_args:
             cuml_obj = self.cuml_class(**all_args)
@@ -155,35 +160,35 @@ def run_cuml(self, data, **override_args):
         if self.cuml_data_prep_hook:
             data = self.cuml_data_prep_hook(data)
         if self.accepts_labels:
-            self.bench_func(cuml_obj, data[0], data[1])
+            self.bench_func(cuml_obj, data[0], data[1], **bench_args)
         else:
-            self.bench_func(cuml_obj, data[0])
+            self.bench_func(cuml_obj, data[0], **bench_args)
 
         return cuml_obj
 
     def setup_cpu(self, data, **override_args):
+        all_args = {**self.shared_args, **self.cpu_args}
+        all_args = {**all_args, **override_args}
         if self.setup_cpu_func is not None:
-            all_args = {**self.shared_args, **self.cpu_args}
-            all_args = {**all_args, **override_args}
             return {
                 "cpu_setup_result": self.setup_cpu_func(
                     self.cpu_class, data, all_args, self.tmpdir
                 )
             }
         else:
-            return {}
+            return all_args
 
     def setup_cuml(self, data, **override_args):
+        all_args = {**self.shared_args, **self.cuml_args}
+        all_args = {**all_args, **override_args}
         if self.setup_cuml_func is not None:
-            all_args = {**self.shared_args, **self.cuml_args}
-            all_args = {**all_args, **override_args}
             return {
                 "cuml_setup_result": self.setup_cuml_func(
                     self.cuml_class, data, all_args, self.tmpdir
                 )
             }
         else:
-            return {}
+            return all_args
 
 
 def _labels_to_int_hook(data):
@@ -228,7 +233,6 @@ def all_algorithms():
             cuml.random_projection.GaussianRandomProjection,
             shared_args=dict(n_components=10),
             name="GaussianRandomProjection",
-            bench_func=fit_transform,
             accepts_labels=False,
         ),
         AlgorithmPair(
@@ -236,7 +240,6 @@ def all_algorithms():
             cuml.random_projection.SparseRandomProjection,
             shared_args=dict(n_components=10),
             name="SparseRandomProjection",
-            bench_func=fit_transform,
             accepts_labels=False,
         ),
         AlgorithmPair(
@@ -434,7 +437,7 @@ def all_algorithms():
             cuml.manifold.UMAP,
             shared_args=dict(n_neighbors=5, n_epochs=500),
             name="UMAP-Unsupervised",
-            accepts_labels=True,
+            accepts_labels=False,
             accuracy_function=cuml.metrics.trustworthiness,
         ),
         AlgorithmPair(
@@ -556,6 +559,139 @@ def all_algorithms():
             name="SparseCSRPolynomialFeatures",
             accepts_labels=False,
             bench_func=fit_transform
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.neighbors.KNeighborsClassifier,
+            shared_args={},
+            cuml_args={},
+            name="MNMG.KNeighborsClassifier",
+            bench_func=fit_predict,
+            accepts_labels=True,
+            accuracy_function=cuml.metrics.accuracy_score
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.cluster.KMeans,
+            shared_args=dict(n_clusters=8, max_iter=300, n_init=1),
+            cpu_args=dict(init="k-means++"),
+            cuml_args=dict(init="scalable-k-means++"),
+            name="MNMG.KMeans",
+            bench_func=fit_predict,
+            accepts_labels=False,
+            accuracy_function=metrics.homogeneity_score,
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.cluster.DBSCAN,
+            shared_args=dict(eps=3, min_samples=2),
+            cpu_args=dict(algorithm="brute"),
+            name="MNMG.DBSCAN",
+            bench_func=fit_predict,
+            accepts_labels=False,
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.manifold.UMAP,
+            shared_args=dict(n_neighbors=5, n_epochs=500),
+            name="MNMG.UMAP-Unsupervised",
+            bench_func=transform,
+            setup_cuml_func=_build_mnmg_umap,
+            accepts_labels=False,
+            accuracy_function=cuml.metrics.trustworthiness,
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.manifold.UMAP,
+            shared_args=dict(n_neighbors=5, n_epochs=500),
+            name="MNMG.UMAP-Supervised",
+            bench_func=transform,
+            setup_cuml_func=_build_mnmg_umap,
+            accepts_labels=True,
+            accuracy_function=cuml.metrics.trustworthiness,
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.neighbors.NearestNeighbors,
+            shared_args=dict(n_neighbors=1024),
+            cpu_args=dict(algorithm="brute", n_jobs=-1),
+            cuml_args={},
+            name="MNMG.NearestNeighbors",
+            accepts_labels=False,
+            bench_func=fit_kneighbors,
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.decomposition.TruncatedSVD,
+            shared_args=dict(n_components=10),
+            name="MNMG.tSVD",
+            accepts_labels=False,
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.decomposition.PCA,
+            shared_args=dict(n_components=10),
+            name="MNMG.PCA",
+            accepts_labels=False,
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.linear_model.LinearRegression,
+            shared_args={},
+            name="MNMG.LinearRegression",
+            bench_func=fit_predict,
+            accepts_labels=True,
+            accuracy_function=metrics.r2_score,
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.linear_model.Lasso,
+            shared_args={},
+            name="MNMG.Lasso",
+            bench_func=fit_predict,
+            accepts_labels=True,
+            accuracy_function=metrics.r2_score,
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.linear_model.ElasticNet,
+            shared_args={"alpha": 0.1, "l1_ratio": 0.5},
+            name="MNMG.ElasticNet",
+            bench_func=fit_predict,
+            accepts_labels=True,
+            accuracy_function=metrics.r2_score,
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.linear_model.Ridge,
+            shared_args={},
+            name="MNMG.Ridge",
+            bench_func=fit_predict,
+            accepts_labels=True,
+            accuracy_function=metrics.r2_score,
+        ),
+
+        AlgorithmPair(
+            None,
+            cuml.dask.neighbors.KNeighborsRegressor,
+            shared_args={},
+            cuml_args={},
+            name="MNMG.KNeighborsRegressor",
+            bench_func=fit_predict,
+            accepts_labels=True,
+            accuracy_function=cuml.metrics.r2_score
         )
     ]
 
diff --git a/python/cuml/benchmark/automated/__init__.py b/python/cuml/benchmark/automated/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/cuml/benchmark/automated/bench_classification.py b/python/cuml/benchmark/automated/bench_classification.py
new file mode 100644
index 0000000000..2d34d01c67
--- /dev/null
+++ b/python/cuml/benchmark/automated/bench_classification.py
@@ -0,0 +1,68 @@
+#
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+from .utils.utils import _benchmark_algo, fixture_generation_helper
+from .utils.utils import bench_step  # noqa: F401
+from .. import datagen
+
+#
+# Core tests
+#
+
+
+@pytest.fixture(**fixture_generation_helper({
+                    'n_samples': [1000, 10000],
+                    'n_features': [5, 500]
+                }))
+def classification(request):
+    data = datagen.gen_data(
+        'classification',
+        'cupy',
+        n_samples=request.param['n_samples'],
+        n_features=request.param['n_features']
+    )
+    return data, {
+                    'dataset_type': 'classification',
+                    **request.param
+                 }
+
+
+def bench_logistic_regression(gpubenchmark, bench_step,  # noqa: F811
+                              classification):
+    _benchmark_algo(gpubenchmark, 'LogisticRegression',
+                    bench_step, classification)
+
+
+def bench_mbsgcclf(gpubenchmark, bench_step, classification):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'MBSGDClassifier',
+                    bench_step, classification)
+
+
+def bench_knnclassifier(gpubenchmark, bench_step,  # noqa: F811
+                        classification):
+    _benchmark_algo(gpubenchmark, 'KNeighborsClassifier',
+                    bench_step, classification)
+
+
+def bench_svc_linear(gpubenchmark, bench_step, classification):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'SVC-Linear',
+                    bench_step, classification)
+
+
+def bench_svc_rbf(gpubenchmark, bench_step, classification):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'SVC-RBF',
+                    bench_step, classification)
diff --git a/python/cuml/benchmark/automated/bench_dimensionality_reduction.py b/python/cuml/benchmark/automated/bench_dimensionality_reduction.py
new file mode 100644
index 0000000000..e7aefedec2
--- /dev/null
+++ b/python/cuml/benchmark/automated/bench_dimensionality_reduction.py
@@ -0,0 +1,98 @@
+#
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+from .utils.utils import _benchmark_algo, fixture_generation_helper
+from .utils.utils import bench_step  # noqa: F401
+from .. import datagen
+
+#
+# Core tests
+#
+
+
+@pytest.fixture(**fixture_generation_helper({
+                    'n_samples': [1000, 10000],
+                    'n_features': [5, 500]
+                }))
+def blobs1(request):
+    data = datagen.gen_data(
+        'blobs',
+        'cupy',
+        n_samples=request.param['n_samples'],
+        n_features=request.param['n_features']
+    )
+    return data, {
+                    'dataset_type': 'blobs',
+                    **request.param
+                 }
+
+
+@pytest.fixture(scope='session')
+def blobs2(request):
+    dataset_kwargs = {
+        'dataset_type': 'blobs',
+        'n_samples': 10000,
+        'n_features': 100
+    }
+    dataset = datagen.gen_data(
+        dataset_kwargs['dataset_type'],
+        'cupy',
+        n_samples=dataset_kwargs['n_samples'],
+        n_features=dataset_kwargs['n_features']
+    )
+    return dataset, dataset_kwargs
+
+
+@pytest.fixture(scope='session')
+def blobs3(request):
+    dataset_kwargs = {
+        'dataset_type': 'blobs',
+        'n_samples': 50000,
+        'n_features': 100
+    }
+    dataset = datagen.gen_data(
+        dataset_kwargs['dataset_type'],
+        'cupy',
+        n_samples=dataset_kwargs['n_samples'],
+        n_features=dataset_kwargs['n_features']
+    )
+    return dataset, dataset_kwargs
+
+
+def bench_kmeans(gpubenchmark, bench_step, blobs1):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'KMeans', bench_step, blobs1)
+
+
+@pytest.mark.parametrize('algo_name', ['DBSCAN',
+                                       'UMAP-Unsupervised',
+                                       'UMAP-Supervised',
+                                       'NearestNeighbors',
+                                       'TSNE'])
+def bench_with_blobs(gpubenchmark, algo_name, bench_step,  # noqa: F811
+                     blobs2):
+    # Lump together a bunch of simple blobs-based tests
+    _benchmark_algo(gpubenchmark, algo_name, bench_step, blobs2)
+
+
+@pytest.mark.parametrize('n_components', [2, 10, 50])
+@pytest.mark.parametrize('algo_name', ['tSVD',
+                                       'PCA'])
+def bench_dimensionality_reduction(gpubenchmark, algo_name,
+                                   bench_step, blobs3,  # noqa: F811
+                                   n_components):
+    _benchmark_algo(gpubenchmark, algo_name, bench_step, blobs3,
+                    setup_kwargs={'n_components': n_components})
diff --git a/python/cuml/benchmark/automated/bench_preprocessing.py b/python/cuml/benchmark/automated/bench_preprocessing.py
new file mode 100644
index 0000000000..7a8400296c
--- /dev/null
+++ b/python/cuml/benchmark/automated/bench_preprocessing.py
@@ -0,0 +1,55 @@
+#
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+from .utils.utils import _benchmark_algo
+from .utils.utils import bench_step  # noqa: F401
+from .. import datagen
+
+#
+# Core tests
+#
+
+
+@pytest.fixture(scope='session')
+def regression(request):
+    dataset_kwargs = {
+        'dataset_type': 'regression',
+        'n_samples': 10000,
+        'n_features': 100
+    }
+    dataset = datagen.gen_data(
+        dataset_kwargs['dataset_type'],
+        'cupy',
+        n_samples=dataset_kwargs['n_samples'],
+        n_features=dataset_kwargs['n_features']
+    )
+    return dataset, dataset_kwargs
+
+
+def bench_standardscaler(gpubenchmark, bench_step, regression):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'StandardScaler',
+                    bench_step, regression)
+
+
+def bench_maxabsscaler(gpubenchmark, bench_step, regression):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'MaxAbsScaler',
+                    bench_step, regression)
+
+
+def bench_normalizer(gpubenchmark, bench_step, regression):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'Normalizer',
+                    bench_step, regression)
diff --git a/python/cuml/benchmark/automated/bench_random_forest.py b/python/cuml/benchmark/automated/bench_random_forest.py
new file mode 100644
index 0000000000..02650c1fae
--- /dev/null
+++ b/python/cuml/benchmark/automated/bench_random_forest.py
@@ -0,0 +1,75 @@
+#
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+from .utils.utils import _benchmark_algo, fixture_generation_helper
+from .utils.utils import bench_step  # noqa: F401
+from .. import datagen
+
+#
+# Core tests
+#
+
+
+@pytest.fixture(**fixture_generation_helper({
+                    'n_samples': [1000, 10000],
+                    'n_features': [5, 500]
+                }))
+def classification(request):
+    data = datagen.gen_data(
+        'classification',
+        'cupy',
+        n_samples=request.param['n_samples'],
+        n_features=request.param['n_features']
+    )
+    return data, {
+                    'dataset_type': 'classification',
+                    **request.param
+                 }
+
+
+@pytest.fixture(**fixture_generation_helper({
+                    'n_samples': [1000, 10000],
+                    'n_features': [5, 500]
+                }))
+def regression(request):
+    data = datagen.gen_data(
+        'regression',
+        'cupy',
+        n_samples=request.param['n_samples'],
+        n_features=request.param['n_features']
+    )
+    return data, {
+                    'dataset_type': 'regression',
+                    **request.param
+                 }
+
+
+"""
+def bench_fil(gpubenchmark, bench_step, classification):
+    _benchmark_algo(gpubenchmark, 'FIL',
+                    bench_step, classification)
+"""
+
+
+def bench_rfc(gpubenchmark, bench_step, classification):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'RandomForestClassifier',
+                    bench_step, classification)
+
+
+def bench_rfr(gpubenchmark, bench_step, regression):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'RandomForestRegressor',
+                    bench_step, regression)
diff --git a/python/cuml/benchmark/automated/bench_regression.py b/python/cuml/benchmark/automated/bench_regression.py
new file mode 100644
index 0000000000..0d4ae91a71
--- /dev/null
+++ b/python/cuml/benchmark/automated/bench_regression.py
@@ -0,0 +1,94 @@
+#
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+from .utils.utils import _benchmark_algo, fixture_generation_helper
+from .utils.utils import bench_step  # noqa: F401
+from .. import datagen
+
+#
+# Core tests
+#
+
+
+@pytest.fixture(**fixture_generation_helper({
+                    'n_samples': [1000, 10000],
+                    'n_features': [5, 400]
+                }))
+def regression1(request):
+    data = datagen.gen_data(
+        'regression',
+        'cupy',
+        n_samples=request.param['n_samples'],
+        n_features=request.param['n_features']
+    )
+    return data, {
+                    'dataset_type': 'regression',
+                    **request.param
+                 }
+
+
+@pytest.fixture(**fixture_generation_helper({
+                    'n_samples': [500, 4000],
+                    'n_features': [5, 400]
+                }))
+def regression2(request):
+    data = datagen.gen_data(
+        'regression',
+        'cupy',
+        n_samples=request.param['n_samples'],
+        n_features=request.param['n_features']
+    )
+    return data, {
+                    'dataset_type': 'regression',
+                    **request.param
+                 }
+
+
+def bench_linear_regression(gpubenchmark, bench_step,  # noqa: F811
+                            regression1):
+    _benchmark_algo(gpubenchmark, 'LinearRegression',
+                    bench_step, regression1)
+
+
+def bench_lasso(gpubenchmark, bench_step, regression1):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'Lasso',
+                    bench_step, regression1)
+
+
+def bench_elastic(gpubenchmark, bench_step, regression1):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'ElasticNet',
+                    bench_step, regression1)
+
+
+def bench_ridge(gpubenchmark, bench_step, regression1):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'Ridge',
+                    bench_step, regression1)
+
+
+def bench_knnregressor(gpubenchmark, bench_step, regression1):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'KNeighborsRegressor',
+                    bench_step, regression1)
+
+
+def bench_svr_rbf(gpubenchmark, bench_step, regression1):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'SVR-RBF',
+                    bench_step, regression1)
+
+
+def bench_svr_linear(gpubenchmark, bench_step, regression2):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'SVR-Linear',
+                    bench_step, regression2)
diff --git a/python/cuml/benchmark/automated/dask/__init__.py b/python/cuml/benchmark/automated/dask/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/cuml/benchmark/automated/dask/bench_mnmg_classification.py b/python/cuml/benchmark/automated/dask/bench_mnmg_classification.py
new file mode 100644
index 0000000000..ce1ea2347d
--- /dev/null
+++ b/python/cuml/benchmark/automated/dask/bench_mnmg_classification.py
@@ -0,0 +1,44 @@
+#
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+from ..utils.utils import _benchmark_algo, fixture_generation_helper
+from ..utils.utils import bench_step  # noqa: F401
+from ... import datagen
+
+#
+# Core tests
+#
+
+
+@pytest.fixture(**fixture_generation_helper({
+                    'n_samples': [1000, 10000],
+                    'n_features': [5, 500]
+                }))
+def classification(request):
+    data = datagen.gen_data(
+        'classification',
+        'cudf',
+        n_samples=request.param['n_samples'],
+        n_features=request.param['n_features']
+    )
+    return data, None
+
+
+def bench_mnmg_knnclassifier(gpubenchmark, bench_step,  # noqa: F811
+                             classification, client):
+    _benchmark_algo(gpubenchmark, 'MNMG.KNeighborsClassifier',
+                    bench_step, classification, client=client)
diff --git a/python/cuml/benchmark/automated/dask/bench_mnmg_dimensionality_reduction.py b/python/cuml/benchmark/automated/dask/bench_mnmg_dimensionality_reduction.py
new file mode 100644
index 0000000000..938afe2937
--- /dev/null
+++ b/python/cuml/benchmark/automated/dask/bench_mnmg_dimensionality_reduction.py
@@ -0,0 +1,106 @@
+#
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+from ..utils.utils import _benchmark_algo, fixture_generation_helper
+from ..utils.utils import bench_step  # noqa: F401
+from ... import datagen
+
+#
+# Core tests
+#
+
+
+@pytest.fixture(**fixture_generation_helper({
+                    'n_samples': [1000, 10000],
+                    'n_features': [5, 500]
+                }))
+def blobs1(request):
+    data = datagen.gen_data(
+        'classification',
+        'cupy',
+        n_samples=request.param['n_samples'],
+        n_features=request.param['n_features']
+    )
+    return data, None
+
+
+@pytest.fixture(scope='session')
+def blobs2(request):
+    dataset_kwargs = {
+        'dataset_type': 'blobs',
+        'n_samples': 10000,
+        'n_features': 100
+    }
+    dataset = datagen.gen_data(
+        dataset_kwargs['dataset_type'],
+        'cupy',
+        n_samples=dataset_kwargs['n_samples'],
+        n_features=dataset_kwargs['n_features']
+    )
+    return dataset, dataset_kwargs
+
+
+@pytest.fixture(scope='session')
+def blobs3(request):
+    dataset_kwargs = {
+        'dataset_type': 'blobs',
+        'n_samples': 50000,
+        'n_features': 100
+    }
+    dataset = datagen.gen_data(
+        dataset_kwargs['dataset_type'],
+        'cupy',
+        n_samples=dataset_kwargs['n_samples'],
+        n_features=dataset_kwargs['n_features']
+    )
+    return dataset, dataset_kwargs
+
+
+def bench_mnmg_kmeans(gpubenchmark, bench_step, blobs1, client):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'MNMG.KMeans',
+                    bench_step, blobs1, client=client)
+
+
+def bench_mnmg_dbscan(gpubenchmark, bench_step, blobs2, client):  # noqa: F811
+    _benchmark_algo(gpubenchmark, 'MNMG.DBSCAN',
+                    bench_step, blobs2, client=client)
+
+
+def bench_mnmg_nearest_neighbors(gpubenchmark, bench_step,  # noqa: F811
+                                 blobs2, client):
+    _benchmark_algo(gpubenchmark, 'MNMG.NearestNeighbors',
+                    bench_step, blobs2, client=client)
+
+
+@pytest.mark.parametrize('algo_name', ['MNMG.UMAP-Unsupervised',
+                                       'MNMG.UMAP-Supervised'])
+def bench_mnmg_umap(gpubenchmark, algo_name, bench_step,  # noqa: F811
+                    blobs2, client):
+    _benchmark_algo(gpubenchmark, algo_name,
+                    bench_step, blobs2, client=client)
+
+
+@pytest.mark.parametrize('algo_name', ['MNMG.tSVD',
+                                       'MNMG.PCA'])
+@pytest.mark.parametrize('n_components', [2, 10, 50])
+def bench_mnmg_dimensionality_reduction(gpubenchmark, algo_name,
+                                        bench_step, blobs3,  # noqa: F811
+                                        client, n_components):
+    _benchmark_algo(gpubenchmark, algo_name,
+                    bench_step, blobs3,
+                    setup_kwargs={'n_components': n_components},
+                    client=client)
diff --git a/python/cuml/benchmark/automated/dask/bench_mnmg_regression.py b/python/cuml/benchmark/automated/dask/bench_mnmg_regression.py
new file mode 100644
index 0000000000..6929b75807
--- /dev/null
+++ b/python/cuml/benchmark/automated/dask/bench_mnmg_regression.py
@@ -0,0 +1,68 @@
+#
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+from ..utils.utils import _benchmark_algo, fixture_generation_helper
+from ..utils.utils import bench_step  # noqa: F401
+from ... import datagen
+
+#
+# Core tests
+#
+
+
+@pytest.fixture(**fixture_generation_helper({
+                    'n_samples': [1000, 10000],
+                    'n_features': [5, 500]
+                }))
+def regression(request):
+    data = datagen.gen_data(
+        'regression',
+        'cupy',
+        n_samples=request.param['n_samples'],
+        n_features=request.param['n_features']
+    )
+    return data, None
+
+
+def bench_linear_regression(gpubenchmark, bench_step,  # noqa: F811
+                            regression, client):
+    _benchmark_algo(gpubenchmark, 'MNMG.LinearRegression',
+                    bench_step, regression, client=client)
+
+
+def bench_mnmg_lasso(gpubenchmark, bench_step,  # noqa: F811
+                     regression, client):
+    _benchmark_algo(gpubenchmark, 'MNMG.Lasso',
+                    bench_step, regression, client=client)
+
+
+def bench_mnmg_elastic(gpubenchmark, bench_step,  # noqa: F811
+                       regression, client):
+    _benchmark_algo(gpubenchmark, 'MNMG.ElasticNet',
+                    bench_step, regression, client=client)
+
+
+def bench_mnmg_ridge(gpubenchmark, bench_step,  # noqa: F811
+                     regression, client):
+    _benchmark_algo(gpubenchmark, 'MNMG.Ridge',
+                    bench_step, regression, client=client)
+
+
+def bench_mnmg_knnregressor(gpubenchmark, bench_step,  # noqa: F811
+                            regression, client):
+    _benchmark_algo(gpubenchmark, 'MNMG.KNeighborsRegressor',
+                    bench_step, regression, client=client)
diff --git a/python/cuml/benchmark/automated/dask/conftest.py b/python/cuml/benchmark/automated/dask/conftest.py
new file mode 100644
index 0000000000..f38e5045af
--- /dev/null
+++ b/python/cuml/benchmark/automated/dask/conftest.py
@@ -0,0 +1,63 @@
+#
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+
+from dask_cuda import initialize
+from dask_cuda import LocalCUDACluster
+from dask.distributed import Client
+
+enable_tcp_over_ucx = True
+enable_nvlink = False
+enable_infiniband = False
+
+
+@pytest.fixture(scope="module")
+def cluster():
+
+    cluster = LocalCUDACluster(protocol="tcp", scheduler_port=0)
+    yield cluster
+    cluster.close()
+
+
+@pytest.fixture(scope="function")
+def client(cluster):
+
+    client = Client(cluster)
+    yield client
+    client.close()
+
+
+@pytest.fixture(scope="module")
+def ucx_cluster():
+    initialize.initialize(create_cuda_context=True,
+                          enable_tcp_over_ucx=enable_tcp_over_ucx,
+                          enable_nvlink=enable_nvlink,
+                          enable_infiniband=enable_infiniband)
+    cluster = LocalCUDACluster(protocol="ucx",
+                               enable_tcp_over_ucx=enable_tcp_over_ucx,
+                               enable_nvlink=enable_nvlink,
+                               enable_infiniband=enable_infiniband)
+    yield cluster
+    cluster.close()
+
+
+@pytest.fixture(scope="function")
+def ucx_client(ucx_cluster):
+
+    client = Client(ucx_cluster)
+    yield client
+    client.close()
diff --git a/python/cuml/benchmark/automated/pytest.ini b/python/cuml/benchmark/automated/pytest.ini
new file mode 100644
index 0000000000..068fac6e15
--- /dev/null
+++ b/python/cuml/benchmark/automated/pytest.ini
@@ -0,0 +1,28 @@
+[pytest]
+addopts =
+          --benchmark-warmup=on
+          --benchmark-warmup-iterations=1
+          --benchmark-min-rounds=3
+          --benchmark-columns="min, max, mean, stddev, outliers, gpu_mem, rounds"
+
+markers =
+          managedmem_on: RMM managed memory enabled
+          managedmem_off: RMM managed memory disabled
+          poolallocator_on: RMM pool allocator enabled
+          poolallocator_off: RMM pool allocator disabled
+          ETL: benchmarks for ETL steps
+          small: small datasets
+          tiny: tiny datasets
+          ML: benchmarks for ML steps
+
+python_classes =
+                 Bench*
+                 Test*
+
+python_files =
+                 bench_*
+                 test_*
+
+python_functions =
+                   bench_*
+                   test_*
diff --git a/python/cuml/benchmark/automated/utils/__init__.py b/python/cuml/benchmark/automated/utils/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/cuml/benchmark/automated/utils/auto_nvtx_bench.py b/python/cuml/benchmark/automated/utils/auto_nvtx_bench.py
new file mode 100644
index 0000000000..95d827230f
--- /dev/null
+++ b/python/cuml/benchmark/automated/utils/auto_nvtx_bench.py
@@ -0,0 +1,136 @@
+#
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import json
+from cuml.benchmark import datagen, algorithms
+from cuml.benchmark.automated.utils.utils import setup_bench
+
+parser = argparse.ArgumentParser(
+    prog='launch-benchmark',
+    description=r'''
+    Command-line cuML benchmark runner.
+
+    Examples:
+        python run_benchmarks.py \
+            --algo_name LinearRegression \
+            --dataset_type regression
+    ''',
+    formatter_class=argparse.RawTextHelpFormatter,
+)
+parser.add_argument(
+    '--algo_name',
+    type=str,
+    default='',
+    help='Algorithm name',
+)
+parser.add_argument(
+    '--dataset_type',
+    type=str,
+    default='',
+    help='Dataset type',
+)
+parser.add_argument(
+    '--n_samples',
+    type=int,
+    default=10000,
+    help='Number of samples',
+)
+parser.add_argument(
+    '--n_features',
+    type=int,
+    default=100,
+    help='Number of features',
+)
+parser.add_argument(
+    '--dataset_format',
+    type=str,
+    default='cupy',
+    help='Dataset format',
+)
+parser.add_argument(
+    '--data_kwargs',
+    type=json.loads,
+    default={},
+    help='Data generation options',
+)
+parser.add_argument(
+    '--setup_kwargs',
+    type=json.loads,
+    default={},
+    help='Algorithm setup options',
+)
+parser.add_argument(
+    '--training_kwargs',
+    type=json.loads,
+    default={},
+    help='Algorithm training options',
+)
+parser.add_argument(
+    '--inference_kwargs',
+    type=json.loads,
+    default={},
+    help='Algorithm inference options',
+)
+parser.add_argument(
+    '--json',
+    type=str,
+    default='',
+    help='JSON file containing benchmark parameters',
+)
+args = parser.parse_args()
+
+
+def parse_json(args):
+    with open(args.json) as json_file:
+        params = json.load(json_file)
+
+    # Overwriting
+    if 'algo_name' in params:
+        args.algo_name = params['algo_name']
+    if 'dataset_type' in params:
+        args.dataset_type = params['dataset_type']
+    if 'n_samples' in params:
+        args.n_samples = params['n_samples']
+    if 'n_features' in params:
+        args.n_features = params['n_features']
+    if 'dataset_format' in params:
+        args.dataset_format = params['dataset_format']
+    if 'data_kwargs' in params:
+        args.data_kwargs = params['data_kwargs']
+    if 'setup_kwargs' in params:
+        args.setup_kwargs = params['setup_kwargs']
+    if 'training_kwargs' in params:
+        args.training_kwargs = params['training_kwargs']
+    if 'inference_kwargs' in params:
+        args.inference_kwargs = params['inference_kwargs']
+
+
+if len(args.json):
+    parse_json(args)
+
+dataset = datagen.gen_data(
+    args.dataset_type,
+    args.dataset_format,
+    n_samples=args.n_samples,
+    n_features=args.n_features,
+    **args.data_kwargs
+)
+
+algo = algorithms.algorithm_by_name(args.algo_name)
+cuml_setup = setup_bench('cuml', algo, 'inference', dataset,
+                         args.setup_kwargs, args.training_kwargs)
+algo.run_cuml(dataset, bench_args=args.inference_kwargs, **cuml_setup)
diff --git a/python/cuml/benchmark/automated/utils/utils.py b/python/cuml/benchmark/automated/utils/utils.py
new file mode 100644
index 0000000000..fdbe72b6d4
--- /dev/null
+++ b/python/cuml/benchmark/automated/utils/utils.py
@@ -0,0 +1,321 @@
+#
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+try:
+    from rapids_pytest_benchmark import setFixtureParamNames
+except ImportError:
+    print("\n\nWARNING: rapids_pytest_benchmark is not installed, "
+          "falling back to pytest_benchmark fixtures.\n")
+
+    # if rapids_pytest_benchmark is not available, just perfrom time-only
+    # benchmarking and replace the util functions with nops
+    import pytest_benchmark
+    gpubenchmark = pytest_benchmark.plugin.benchmark
+
+    def setFixtureParamNames(*args, **kwargs):
+        pass
+
+import os
+import json
+import time
+import itertools as it
+import warnings
+import numpy as np
+import cupy as cp
+import cudf
+
+import pytest
+from cuml.benchmark import datagen, algorithms
+from cuml.benchmark.nvtx_benchmark import Profiler
+import dask.array as da
+import dask.dataframe as df
+from copy import copy
+
+from cuml.benchmark.bench_helper_funcs import pass_func, fit, predict, \
+                                              transform, kneighbors, \
+                                              fit_predict, fit_transform, \
+                                              fit_kneighbors
+
+
+def distribute(client, data):
+    if data is not None:
+        n_rows = data.shape[0]
+        n_workers = len(client.scheduler_info()['workers'])
+        if isinstance(data, (np.ndarray, cp.ndarray)):
+            dask_array = da.from_array(x=data,
+                                       chunks={0: n_rows // n_workers, 1: -1})
+            return dask_array
+        elif isinstance(data, (cudf.DataFrame, cudf.Series)):
+            dask_df = df.from_pandas(data,
+                                     chunksize=n_rows // n_workers)
+            return dask_df
+        else:
+            raise ValueError('Could not distribute data')
+
+
+def nvtx_profiling(algo_name, data_kwargs, setup_kwargs,
+                   training_kwargs, inference_kwargs):
+    dataset_type = data_kwargs['dataset_type']
+    n_samples = data_kwargs['n_samples']
+    n_features = data_kwargs['n_features']
+    dataset_format = (data_kwargs['dataset_format'] if 'dataset_format'
+                      in data_kwargs else 'cupy')
+
+    data_kwargs_edited = copy(data_kwargs)
+    for param in ['dataset_type', 'n_samples', 'n_features',
+                  'dataset_format']:
+        data_kwargs_edited.pop(param, None)
+
+    path = os.path.dirname(os.path.realpath(__file__))
+    command = """
+    python {path}/auto_nvtx_bench.py
+        --algo_name {algo_name}
+        --dataset_type {dataset_type}
+        --n_samples {n_samples}
+        --n_features {n_features}
+        --dataset_format {dataset_format}
+        --data_kwargs {data_kwargs}
+        --setup_kwargs {setup_kwargs}
+        --training_kwargs {training_kwargs}
+        --inference_kwargs {inference_kwargs}
+    """.format(path=path,
+               algo_name=algo_name,
+               dataset_type=dataset_type,
+               n_samples=n_samples,
+               n_features=n_features,
+               dataset_format=dataset_format,
+               data_kwargs=json.dumps(data_kwargs_edited,
+                                      separators=(',', ':')),
+               setup_kwargs=json.dumps(setup_kwargs,
+                                       separators=(',', ':')),
+               training_kwargs=json.dumps(training_kwargs,
+                                          separators=(',', ':')),
+               inference_kwargs=json.dumps(inference_kwargs,
+                                           separators=(',', ':')))
+    command = command.replace('\n', '').replace('\t', ' ')
+    command = ' '.join(command.split())
+
+    print('\n\n' + '\033[96m' + '=x'*48)
+    print('=x'*20 + ' NVTX BENCHMARK ' + '=x'*20)
+
+    profiler = Profiler()
+    profiler.profile(command)
+
+    print('=x'*48)
+    print('=x'*48 + '\033[0m' + '\n')
+
+
+def cpu_bench(algo, bench_step, dataset, inference_args, cpu_setup):
+    if algo.cpu_class is None:
+        return
+
+    t = time.process_time()
+    if bench_step == 'training':
+        algo.run_cpu(dataset, **cpu_setup)
+    elif bench_step == 'inference':
+        algo.run_cpu(dataset, **inference_args, **cpu_setup)
+    elapsed_time = time.process_time() - t
+
+    print('\n' + '\033[33m' + '=x'*20 + '  CPU BENCHMARK ' + '=x'*20)
+    print(algo.name + ' : ' + str(algo.cpu_class))
+    print('\tbench_function: ' + str(algo.bench_func))
+    print('\truntime: ' + str(elapsed_time))
+    print('=x'*48 + '\033[0m' + '\n')
+
+
+def setup_bench(platform, algo, bench_step, dataset,
+                setup_kwargs, training_kwargs):
+    """
+    Will setup the AlgorithmPair and the model to be ready for benchmark
+
+    Parameters
+    ----------
+    platform :
+       Either 'cpu' or 'cuml'
+    algo_name :
+       Algorithm/model name, can be found in the algorithms.py file
+    bench_step :
+        Either 'training' or 'inference', describe the algorithm/model
+        step to be benchmarked
+    dataset :
+        Dataset data
+    setup_kwargs :
+        Algorithm/model setup kwargs
+    training_kwargs :
+        Algorithm/model training kwargs
+    """
+
+    # Generate the model
+    if platform == 'cuml':
+        setup = algo.setup_cuml(dataset, **setup_kwargs)
+    elif platform == 'cpu':
+        setup = algo.setup_cpu(dataset, **setup_kwargs)
+
+    # Set the bench_func to perform training
+    if bench_step == 'training':
+        if hasattr(algo.cuml_class, 'fit'):
+            algo.bench_func = fit
+        # Model cannot be trained (special construction)
+        elif algo.setup_cuml_func:
+            pytest.skip('Model cannot be trained (special construction)')
+        else:
+            raise ValueError('Training function not found')
+    # Train the model and then set the bench_func to perform inference
+    elif bench_step == 'inference':
+        if hasattr(algo.cuml_class, 'fit'):
+            algo.bench_func = fit
+        # Model cannot be trained (special construction)
+        elif algo.setup_cuml_func:
+            algo.bench_func = pass_func
+        else:
+            raise ValueError('Training function not found')
+
+        if platform == 'cuml':
+            setup['cuml_setup_result'] = \
+                algo.run_cuml(dataset, bench_args=training_kwargs, **setup)
+        elif platform == 'cpu':
+            setup['cpu_setup_result'] = \
+                algo.run_cpu(dataset, bench_args=training_kwargs, **setup)
+
+        if hasattr(algo.cuml_class, 'predict'):
+            algo.bench_func = predict
+        elif hasattr(algo.cuml_class, 'transform'):
+            algo.bench_func = transform
+        elif hasattr(algo.cuml_class, 'kneighbors'):
+            algo.bench_func = kneighbors
+        elif any(hasattr(algo.cuml_class, attr) for attr in
+                 ['fit_predict', 'fit_transform', 'fit_kneighbors']):
+            warnings.warn('Inference cannot be done separately, '
+                          'doing both training and inference')
+            if hasattr(algo.cuml_class, 'fit_predict'):
+                algo.bench_func = fit_predict
+            elif hasattr(algo.cuml_class, 'fit_transform'):
+                algo.bench_func = fit_transform
+            elif hasattr(algo.cuml_class, 'fit_kneighbors'):
+                algo.bench_func = fit_kneighbors
+        else:
+            raise ValueError('Inference function not found')
+    else:
+        raise ValueError('bench_func should be either training or inference')
+    return setup
+
+
+def _benchmark_algo(
+    benchmarker,
+    algo_name,
+    bench_step,
+    dataset,
+    setup_kwargs={},
+    training_kwargs={},
+    inference_kwargs={},
+    client=None
+):
+    """
+    Benchmark utility
+
+    Parameters
+    ----------
+    benchmarker :
+       Pytest benchmark function, allows to enclose the code
+       that should be benchmarked
+    algo_name :
+       Algorithm/model name, can be found in the algorithms.py file
+    bench_step :
+        Either 'training' or 'inference', describe the algorithm/model
+        step to be benchmarked
+    dataset :
+        Tuple with the data and a dictionnary that describes how it was built.
+        The dictionnary can be later used during the NVTX benchmark.
+    setup_kwargs :
+        Algorithm/model setup kwargs
+    training_kwargs :
+        Algorithm/model training kwargs
+    inference_kwargs :
+        Algorithm/model inference kwargs
+    client :
+        Dask client used in MNMG settings
+    """
+
+    # Get data and dict describing how it was built
+    dataset, data_kwargs = dataset
+
+    # The presence of a Dask client signifies MNMG mode
+    MNMG_mode = client is not None
+
+    # Distribute data in MNMG settings
+    if MNMG_mode:
+        # Add the client to the setup kwargs used by model instantiation
+        setup_kwargs['client'] = client
+        # Exception : data is scattered by the MNMG DBSCAN model itself
+        if algo_name != 'MNMG.DBSCAN':
+            # Distribute data
+            dataset = [distribute(client, d) for d in dataset]
+
+    # Search AlgorithmPair instance by name
+    algo = algorithms.algorithm_by_name(algo_name)
+    # Setup the AlgorithmPair and the model to be ready for benchmark on GPU
+    cuml_setup = setup_bench('cuml', algo, bench_step, dataset,
+                             setup_kwargs, training_kwargs)
+
+    # Pytest benchmark
+    if bench_step == 'training':
+        benchmarker(algo.run_cuml, dataset, bench_args=training_kwargs,
+                    **cuml_setup)
+    elif bench_step == 'inference':
+        benchmarker(algo.run_cuml, dataset, bench_args=inference_kwargs,
+                    **cuml_setup)
+
+    # CPU benchmark and NVTX benchmark (only in SG mode)
+    if not MNMG_mode:
+        # Check that the cuML model has a CPU equivalency
+        if algo.cpu_class:
+            # Convert sataset to a Numpy array
+            cpu_dataset = datagen._convert_to_numpy(dataset)
+            # Setup the AlgorithmPair and the model
+            # to be ready for benchmark on CPU
+            cpu_setup = setup_bench('cpu', algo, bench_step, cpu_dataset,
+                                    setup_kwargs, training_kwargs)
+            # CPU benchmark
+            cpu_bench(algo, bench_step, cpu_dataset, inference_kwargs,
+                      cpu_setup)
+
+        # NVTX benchmark performs both the training and inference at once
+        # but only when bench_step == 'inference'
+        if bench_step == 'inference':
+            # NVTX benchmark
+            nvtx_profiling(algo_name, data_kwargs, setup_kwargs,
+                           training_kwargs, inference_kwargs)
+
+
+def fixture_generation_helper(params):
+    param_names = sorted(params)
+    param_combis = list(it.product(*(params[param_name]
+                                     for param_name in param_names)))
+    ids = ['-'.join(map(str, param_combi)) for param_combi in param_combis]
+    param_combis = [dict(zip(param_names, param_combi))
+                    for param_combi in param_combis]
+    return {
+        'scope': 'session',
+        'params': param_combis,
+        'ids': ids
+    }
+
+
+@pytest.fixture(scope='session',
+                params=['training', 'inference'],
+                ids=['training', 'inference'])
+def bench_step(request):
+    return request.param
diff --git a/python/cuml/benchmark/bench_helper_funcs.py b/python/cuml/benchmark/bench_helper_funcs.py
index 865b8e538e..bdf2b6936f 100644
--- a/python/cuml/benchmark/bench_helper_funcs.py
+++ b/python/cuml/benchmark/bench_helper_funcs.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,23 +23,68 @@
 import cudf
 from numba import cuda
 from cuml.benchmark import datagen
+from cuml.manifold import UMAP
+
+
+def call(m, func_name, X, y=None):
+    def unwrap_and_get_args(func):
+        if hasattr(func, '__wrapped__'):
+            return unwrap_and_get_args(func.__wrapped__)
+        else:
+            return func.__code__.co_varnames
+
+    if not hasattr(m, func_name):
+        raise ValueError('Model does not have function ' + func_name)
+    func = getattr(m, func_name)
+    argnames = unwrap_and_get_args(func)
+    if y is not None and 'y' in argnames:
+        func(X, y=y)
+    else:
+        func(X)
 
 
-def fit_kneighbors(m, x):
-    m.fit(x)
-    m.kneighbors(x)
+def pass_func(m, x, y=None):
+    pass
 
 
 def fit(m, x, y=None):
-    m.fit(x) if y is None else m.fit(x, y)
+    call(m, 'fit', x, y)
+
+
+def predict(m, x, y=None):
+    call(m, 'predict', x)
+
+
+def transform(m, x, y=None):
+    call(m, 'transform', x)
 
 
-def fit_transform(m, x):
-    m.fit_transform(x)
+def kneighbors(m, x, y=None):
+    call(m, 'kneighbors', x)
 
 
-def predict(m, x):
-    m.predict(x)
+def fit_predict(m, x, y=None):
+    if hasattr(m, 'predict'):
+        fit(m, x, y)
+        predict(m, x)
+    else:
+        call(m, 'fit_predict', x, y)
+
+
+def fit_transform(m, x, y=None):
+    if hasattr(m, 'transform'):
+        fit(m, x, y)
+        transform(m, x)
+    else:
+        call(m, 'fit_transform', x, y)
+
+
+def fit_kneighbors(m, x, y=None):
+    if hasattr(m, 'kneighbors'):
+        fit(m, x, y)
+        kneighbors(m, x)
+    else:
+        call(m, 'fit_kneighbors', x, y)
 
 
 def _training_data_to_numpy(X, y):
@@ -182,3 +227,20 @@ def _treelite_fil_accuracy_score(y_true, y_pred):
 
     y_pred_binary = input_utils.convert_dtype(y_pred1 > 0.5, np.int32)
     return cuml.metrics.accuracy_score(y_true1, y_pred_binary)
+
+
+def _build_mnmg_umap(m, data, args, tmpdir):
+    client = args['client']
+    del args['client']
+    local_model = UMAP(**args)
+
+    if isinstance(data, (tuple, list)):
+        local_data = [x.compute() for x in data if x is not None]
+    if len(local_data) == 2:
+        X, y = local_data
+        local_model.fit(X, y)
+    else:
+        X = local_data
+        local_model.fit(X)
+
+    return m(client=client, model=local_model, **args)
diff --git a/python/cuml/benchmark/datagen.py b/python/cuml/benchmark/datagen.py
index b9d5e5ffa6..5a6f20cdfc 100644
--- a/python/cuml/benchmark/datagen.py
+++ b/python/cuml/benchmark/datagen.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -37,8 +37,9 @@
 import cudf
 import gzip
 import functools
-import numpy as np
 import os
+import numpy as np
+import cupy as cp
 import pandas as pd
 
 import cuml.datasets
@@ -58,39 +59,37 @@ def _gen_data_regression(n_samples, n_features, random_state=42,
         n_samples = int(1e6)
     if n_features == 0:
         n_features = 100
+
     X_arr, y_arr = cuml.datasets.make_regression(
-        n_samples=n_samples, n_features=n_features, random_state=random_state,
-        dtype=dtype)
-    return cudf.DataFrame(X_arr), cudf.Series(y_arr)
+        n_samples=n_samples, n_features=n_features,
+        random_state=random_state, dtype=dtype)
 
+    return X_arr, y_arr
 
-def _gen_data_blobs(n_samples, n_features, random_state=42, dtype=np.float32,
-                    centers=None):
+
+def _gen_data_blobs(n_samples, n_features, random_state=42, centers=None,
+                    dtype=np.float32):
     """Wrapper for sklearn make_blobs"""
     if n_samples == 0:
         n_samples = int(1e6)
     if n_features == 0:
         n_samples = 100
+
     X_arr, y_arr = cuml.datasets.make_blobs(
         n_samples=n_samples, n_features=n_features, centers=centers,
         random_state=random_state, dtype=dtype)
-    return (
-        cudf.DataFrame(X_arr),
-        cudf.Series(y_arr),
-    )
+
+    return X_arr, y_arr
 
 
-def _gen_data_zeros(n_samples, n_features, random_state=42, dtype=np.float32):
+def _gen_data_zeros(n_samples, n_features, dtype=np.float32):
     """Dummy generator for use in testing - returns all 0s"""
-    return (
-        cudf.DataFrame(np.zeros((n_samples, n_features), dtype=dtype)),
-        cudf.Series(np.zeros(n_samples, dtype=dtype)),
-    )
+    return cp.zeros((n_samples, n_features), dtype=dtype), \
+        cp.zeros(n_samples, dtype=dtype)
 
 
-def _gen_data_classification(
-        n_samples, n_features, random_state=42, dtype=np.float32, n_classes=2
-):
+def _gen_data_classification(n_samples, n_features, random_state=42,
+                             n_classes=2, dtype=np.float32):
     """Wrapper for sklearn make_blobs"""
     if n_samples == 0:
         n_samples = int(1e6)
@@ -101,14 +100,10 @@ def _gen_data_classification(
         n_samples=n_samples, n_features=n_features, n_classes=n_classes,
         random_state=random_state, dtype=dtype)
 
-    return (
-        cudf.DataFrame(X_arr),
-        cudf.Series(y_arr),
-    )
+    return X_arr, y_arr
 
 
-def _gen_data_higgs(n_samples=None, n_features=None, random_state=42,
-                    dtype=np.float32):
+def _gen_data_higgs(n_samples=None, n_features=None, dtype=np.float32):
     """Wrapper returning Higgs in Pandas format"""
     X_df, y_df = load_higgs()
     if n_samples == 0:
@@ -125,7 +120,8 @@ def _gen_data_higgs(n_samples=None, n_features=None, random_state=42,
             "Higgs dataset has only %d rows, cannot support %d"
             % (X_df.shape[0], n_samples)
         )
-    return X_df.iloc[:n_samples, :n_features], y_df.iloc[:n_samples]
+    return X_df.iloc[:n_samples, :n_features].astype(dtype), \
+        y_df.iloc[:n_samples].astype(dtype)
 
 
 def _download_and_cache(url, compressed_filepath, decompressed_filepath):
@@ -173,6 +169,8 @@ def _convert_to_numpy(data):
         return tuple([_convert_to_numpy(d) for d in data])
     elif isinstance(data, np.ndarray):
         return data
+    elif isinstance(data, cp.ndarray):
+        return cp.asnumpy(data)
     elif isinstance(data, cudf.DataFrame):
         return data.to_numpy()
     elif isinstance(data, cudf.Series):
@@ -183,6 +181,26 @@ def _convert_to_numpy(data):
         raise Exception("Unsupported type %s" % str(type(data)))
 
 
+def _convert_to_cupy(data):
+    """Returns tuple data with all elements converted to cupy ndarrays"""
+    if data is None:
+        return None
+    elif isinstance(data, tuple):
+        return tuple([_convert_to_cupy(d) for d in data])
+    elif isinstance(data, np.ndarray):
+        return cp.asarray(data)
+    elif isinstance(data, cp.ndarray):
+        return data
+    elif isinstance(data, cudf.DataFrame):
+        return data.values
+    elif isinstance(data, cudf.Series):
+        return data.values
+    elif isinstance(data, (pd.DataFrame, pd.Series)):
+        return cp.asarray(data.to_numpy())
+    else:
+        raise Exception("Unsupported type %s" % str(type(data)))
+
+
 def _convert_to_cudf(data):
     if data is None:
         return None
@@ -194,6 +212,18 @@ def _convert_to_cudf(data):
         return cudf.DataFrame.from_pandas(data)
     elif isinstance(data, pd.Series):
         return cudf.Series.from_pandas(data)
+    elif isinstance(data, np.ndarray):
+        data = np.squeeze(data)
+        if data.ndim == 1:
+            return cudf.Series(data)
+        else:
+            return cudf.DataFrame(data)
+    elif isinstance(data, cp.ndarray):
+        data = np.squeeze(cp.asnumpy(data))
+        if data.ndim == 1:
+            return cudf.Series(data)
+        else:
+            return cudf.DataFrame(data)
     else:
         raise Exception("Unsupported type %s" % str(type(data)))
 
@@ -207,6 +237,18 @@ def _convert_to_pandas(data):
         return data
     elif isinstance(data, (cudf.DataFrame, cudf.Series)):
         return data.to_pandas()
+    elif isinstance(data, np.ndarray):
+        data = np.squeeze(data)
+        if data.ndim == 1:
+            return pd.Series(data)
+        else:
+            return pd.DataFrame(data)
+    elif isinstance(data, cp.ndarray):
+        data = np.squeeze(cp.asnumpy(data))
+        if data.ndim == 1:
+            return pd.Series(data)
+        else:
+            return pd.DataFrame(data)
     else:
         raise Exception("Unsupported type %s" % str(type(data)))
 
@@ -285,6 +327,7 @@ def _convert_to_scipy_sparse_csc(data):
 }
 _data_converters = {
     'numpy': _convert_to_numpy,
+    'cupy': _convert_to_cupy,
     'cudf': _convert_to_cudf,
     'pandas': _convert_to_pandas,
     'gpuarray': _convert_to_gpuarray,
@@ -304,9 +347,7 @@ def gen_data(
     dataset_format,
     n_samples=0,
     n_features=0,
-    random_state=42,
     test_fraction=0.0,
-    dtype=np.float32,
     **kwargs
 ):
     """Returns a tuple of data from the specified generator.
@@ -335,16 +376,17 @@ def gen_data(
     data = _data_generators[dataset_name](
         int(n_samples / (1 - test_fraction)),
         n_features,
-        random_state,
-        dtype,
         **kwargs
     )
     if test_fraction != 0.0:
         if n_samples == 0:
             n_samples = int(data[0].shape[0] * (1 - test_fraction))
+        random_state_dict = ({'random_state': kwargs['random_state']}
+                             if 'random_state' in kwargs else {})
         X_train, X_test, y_train, y_test = tuple(
             sklearn.model_selection.train_test_split(
-                *data, train_size=n_samples, random_state=random_state
+                *data, train_size=n_samples,
+                **random_state_dict
             )
         )
         data = (X_train, y_train, X_test, y_test)
diff --git a/python/cuml/benchmark/nvtx_benchmark.py b/python/cuml/benchmark/nvtx_benchmark.py
index 83ecdf3df8..8a2b7338fd 100644
--- a/python/cuml/benchmark/nvtx_benchmark.py
+++ b/python/cuml/benchmark/nvtx_benchmark.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -187,5 +187,6 @@ def profile(self, command):
         self._display_results(results)
 
 
-profiler = Profiler()
-profiler.profile(sys.argv[1])
+if __name__ == "__main__":
+    profiler = Profiler()
+    profiler.profile(sys.argv[1])
diff --git a/python/cuml/pytest_benchmarks/test_bench.py b/python/cuml/pytest_benchmarks/test_bench.py
deleted file mode 100644
index 0cd18f1a15..0000000000
--- a/python/cuml/pytest_benchmarks/test_bench.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Demo integration of benchmarking to pytest interface
-Requires pytest-benchmark, which is not currently installed by default.
-"""
-
-from cuml.benchmark import datagen, algorithms
-from cuml.common.import_utils import has_pytest_benchmark
-import pytest
-
-
-#
-# Testing utilities
-#
-def _benchmark_algo(
-    benchmark,
-    name,
-    dataset_name,
-    n_samples=10000,
-    n_features=100,
-    input_type='numpy',
-    data_kwargs={},
-    algo_args={},
-):
-    """Simplest benchmark wrapper to time algorithm 'name' on dataset
-    'dataset_name'"""
-    algo = algorithms.algorithm_by_name(name)
-    data = datagen.gen_data(
-        dataset_name,
-        input_type,
-        n_samples=n_samples,
-        n_features=n_features,
-        **data_kwargs
-    )
-
-    def _benchmark_inner():
-        algo.run_cuml(data, **algo_args)
-
-    benchmark(_benchmark_inner)
-
-
-#
-# Core tests
-#
-@pytest.mark.skipif(not has_pytest_benchmark(),
-                    reason='pytest-benchmark missing')
-@pytest.mark.parametrize('n_rows', [1000, 10000])
-@pytest.mark.parametrize('n_features', [5, 500])
-def test_kmeans(benchmark, n_rows, n_features):
-    _benchmark_algo(benchmark, 'KMeans', 'blobs', n_rows, n_features)
-
-
-@pytest.mark.skipif(not has_pytest_benchmark(),
-                    reason='pytest-benchmark missing')
-@pytest.mark.parametrize('algo_name', ['DBSCAN', 'UMAP-Supervised',
-                                       'NearestNeighbors'])
-def test_with_blobs(benchmark, algo_name):
-    # Lump together a bunch of simple blobs-based tests
-    _benchmark_algo(benchmark, algo_name, 'blobs', 10000, 100)
-
-
-@pytest.mark.skipif(not has_pytest_benchmark(),
-                    reason='pytest-benchmark missing')
-@pytest.mark.parametrize('n_components', [2, 10, 50])
-def test_pca(benchmark, n_components):
-    _benchmark_algo(
-        benchmark,
-        'PCA',
-        'blobs',
-        50000,
-        100,
-        algo_args=dict(n_components=n_components),
-    )
diff --git a/python/cuml/test/test_benchmark.py b/python/cuml/test/test_benchmark.py
index 3e1fbe1d85..efc376a55a 100644
--- a/python/cuml/test/test_benchmark.py
+++ b/python/cuml/test/test_benchmark.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -28,6 +28,8 @@
 
 import time
 
+from cuml.benchmark.bench_helper_funcs import fit, fit_predict
+
 
 @pytest.mark.parametrize('dataset', ['blobs', 'regression', 'classification'])
 def test_data_generators(dataset):
@@ -104,6 +106,7 @@ def __init__(self):
         FastMockAlgo,
         shared_args={},
         name="Mock",
+        bench_func=fit_predict,
         accuracy_function=metrics.accuracy_score,
     )
 
@@ -128,6 +131,7 @@ def fit(self, X, y):
         CountingAlgo,
         CountingAlgo,
         shared_args={},
+        bench_func=fit,
         name="Counting",
     )
 
@@ -157,6 +161,7 @@ def predict(self, X):
         MockAlgo,
         shared_args={},
         name="Mock",
+        bench_func=fit_predict,
         accuracy_function=metrics.accuracy_score,
     )