diff --git a/ci/docs/build.sh b/ci/docs/build.sh
index a71cc790ff..b006abae75 100644
--- a/ci/docs/build.sh
+++ b/ci/docs/build.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 #################################
 # cuML Docs build script for CI #
 #################################
@@ -51,6 +51,7 @@ gpuci_logger "Build Doxygen docs"
 gpuci_logger "Build Sphinx docs"
 cd "$PROJECT_WORKSPACE/docs"
 make html
+RETVAL=$?
 
 #Commit to Website
 cd "$DOCS_WORKSPACE"
@@ -65,3 +66,5 @@ done
 
 mv "$PROJECT_WORKSPACE/cpp/build/html/"* "$DOCS_WORKSPACE/api/libcuml/$BRANCH_VERSION"
 mv "$PROJECT_WORKSPACE/docs/build/html/"* "$DOCS_WORKSPACE/api/cuml/$BRANCH_VERSION"
+
+exit $RETVAL
\ No newline at end of file
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 4bfc154c8b..f89e2c80b2 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -284,6 +284,11 @@ else
     unset LIBCUML_BUILD_DIR
     $WORKSPACE/build.sh cppdocs -v
 
+    if [ "$CUDA_REL" != "11.0" ]; then
+        gpuci_logger "Building python docs"
+        $WORKSPACE/build.sh pydocs
+    fi
+
 fi
 
 if [ -n "${CODECOV_TOKEN}" ]; then
diff --git a/docs/Makefile b/docs/Makefile
index 2d7149631d..122d7ecee9 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -2,7 +2,7 @@
 #
 
 # You can set these variables from the command line.
-SPHINXOPTS    =
+SPHINXOPTS    = "-W"
 SPHINXBUILD   = sphinx-build
 SPHINXPROJ    = cuML
 SOURCEDIR     = source
diff --git a/python/cuml/cluster/agglomerative.pyx b/python/cuml/cluster/agglomerative.pyx
index 7f6fa21d79..cdd7f59b3e 100644
--- a/python/cuml/cluster/agglomerative.pyx
+++ b/python/cuml/cluster/agglomerative.pyx
@@ -106,21 +106,24 @@ class AgglomerativeClustering(Base, ClusterMixin, CMajorInputTagMixin):
         Which linkage criterion to use. The linkage criterion determines
         which distance to use between sets of observations. The algorithm
         will merge the pairs of clusters that minimize this criterion.
-        - 'single' uses the minimum of the distances between all
-          observations of the two sets.
+
+         * 'single' uses the minimum of the distances between all
+           observations of the two sets.
+
     n_neighbors : int (default = 15)
         The number of neighbors to compute when connectivity = "knn"
     connectivity : {"pairwise", "knn"}, (default = "knn")
         The type of connectivity matrix to compute.
-        - 'pairwise' will compute the entire fully-connected graph of
-          pairwise distances between each set of points. This is the
-          fastest to compute and can be very fast for smaller datasets
-          but requires O(n^2) space.
-        - 'knn' will sparsify the fully-connected connectivity matrix to
-          save memory and enable much larger inputs. "n_neighbors" will
-          control the amount of memory used and the graph will be connected
-          automatically in the event "n_neighbors" was not large enough
-          to connect it.
+         * 'pairwise' will compute the entire fully-connected graph of
+           pairwise distances between each set of points. This is the
+           fastest to compute and can be very fast for smaller datasets
+           but requires O(n^2) space.
+         * 'knn' will sparsify the fully-connected connectivity matrix to
+           save memory and enable much larger inputs. "n_neighbors" will
+           control the amount of memory used and the graph will be connected
+           automatically in the event "n_neighbors" was not large enough
+           to connect it.
+
     output_type : {'input', 'cudf', 'cupy', 'numpy', 'numba'}, default=None
         Variable to control output type of the results and attributes of
         the estimator. If None, it'll inherit the output type set at the
diff --git a/python/cuml/cluster/hdbscan.pyx b/python/cuml/cluster/hdbscan.pyx
index 506f340c3f..a254abce8b 100644
--- a/python/cuml/cluster/hdbscan.pyx
+++ b/python/cuml/cluster/hdbscan.pyx
@@ -1,4 +1,3 @@
-#
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -293,7 +292,6 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
 
     alpha : float, optional (default=1.0)
         A distance scaling parameter as used in robust single linkage.
-        See [2]_ for more information.
 
     verbose : int or boolean, default=False
         Sets logging level. It must be one of `cuml.common.logger.level_*`.
@@ -311,7 +309,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
 
     cluster_selection_epsilon : float, optional (default=0.0)
         A distance threshold. Clusters below this value will be merged.
-        See [3]_ for more information. Note that this should not be used
+        Note that this should not be used
         if we want to predict the cluster labels for new points in future
         (e.g. using approximate_predict), as the approximate_predict function
         is not aware of this argument.
@@ -342,6 +340,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
         to find the most persistent clusters. Alternatively you can instead
         select the clusters at the leaves of the tree -- this provides the
         most fine grained and homogeneous clusters. Options are:
+
             * ``eom``
             * ``leaf``
 
@@ -351,17 +350,17 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
         the case that you feel this is a valid result for your dataset.
 
     gen_min_span_tree : bool, optional (default=False)
-        Whether to populate the minimum_spanning_tree_ member for
+        Whether to populate the `minimum_spanning_tree_` member for
         utilizing plotting tools. This requires the `hdbscan` CPU Python
         package to be installed.
 
     gen_condensed_tree : bool, optional (default=False)
-        Whether to populate the condensed_tree_ member for
+        Whether to populate the `condensed_tree_` member for
         utilizing plotting tools. This requires the `hdbscan` CPU
         Python package to be installed.
 
     gen_single_linkage_tree_ : bool, optinal (default=False)
-        Whether to populate the single_linkage_tree_ member for
+        Whether to populate the `single_linkage_tree_` member for
         utilizing plotting tools. This requires the `hdbscan` CPU
         Python package t be installed.
 
diff --git a/python/cuml/dask/cluster/kmeans.py b/python/cuml/dask/cluster/kmeans.py
index 1326b35a6b..39463bfbd6 100644
--- a/python/cuml/dask/cluster/kmeans.py
+++ b/python/cuml/dask/cluster/kmeans.py
@@ -141,12 +141,14 @@ def fit(self, X, sample_weight=None):
         X : Dask cuDF DataFrame or CuPy backed Dask Array
         Training data to cluster.
 
-        sample_weight : Dask cuDF DataFrame or CuPy backed Dask Array
-                        shape = (n_samples,), default=None # noqa
+        sample_weight : Dask cuDF DataFrame or CuPy backed Dask Array \
+                shape = (n_samples,), default=None # noqa
+
             The weights for each observation in X. If None, all observations
             are assigned equal weight.
             Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
             ndarray, cuda array interface compliant array like CuPy
+
         """
 
         sample_weight = self._check_normalize_sample_weight(sample_weight)
diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py
index abecd72dec..febd8bf318 100755
--- a/python/cuml/dask/ensemble/randomforestclassifier.py
+++ b/python/cuml/dask/ensemble/randomforestclassifier.py
@@ -82,6 +82,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
          * ``4`` or ``'poisson'`` for poisson half deviance
          * ``5`` or ``'gamma'`` for gamma half deviance
          * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance
+
         ``2``, ``'mse'``, ``4``, ``'poisson'``, ``5``, ``'gamma'``, ``6``,
         ``'inverse_gaussian'`` not valid for classification
     bootstrap : boolean (default = True)
@@ -105,6 +106,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
          * If ``'sqrt'`` then ``max_features=1/sqrt(n_features)``.
          * If ``'log2'`` then ``max_features=log2(n_features)/n_features``.
          * If ``None``, then ``max_features = 1.0``.
+
     n_bins : int (default = 128)
         Maximum number of bins used by the split algorithm per feature.
     min_samples_leaf : int or float (default = 1)
@@ -114,6 +116,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
          * If ``float``, then ``min_samples_leaf`` represents a fraction
            and ``ceil(min_samples_leaf * n_rows)`` is the minimum number of
            samples for each leaf node.
+
     min_samples_split : int or float (default = 2)
         The minimum number of samples required to split an internal
         node.\n
@@ -122,6 +125,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin,
          * If type ``float``, then ``min_samples_split`` represents a fraction
            and ``ceil(min_samples_split * n_rows)`` is the minimum number of
            samples for each split.
+
     n_streams : int (default = 4 )
         Number of parallel streams used for forest building
     workers : optional, list of strings
diff --git a/python/cuml/dask/ensemble/randomforestregressor.py b/python/cuml/dask/ensemble/randomforestregressor.py
index 607f2775d0..6b5c36d4fd 100755
--- a/python/cuml/dask/ensemble/randomforestregressor.py
+++ b/python/cuml/dask/ensemble/randomforestregressor.py
@@ -75,6 +75,7 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin,
          * ``4`` or ``'poisson'`` for poisson half deviance
          * ``5`` or ``'gamma'`` for gamma half deviance
          * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance
+
         ``0``, ``'gini'``, ``1``, ``'entropy'`` not valid for regression
     bootstrap : boolean (default = True)
         Control bootstrapping.\n
diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx
index e174e18a35..f100cccf3a 100644
--- a/python/cuml/ensemble/randomforestclassifier.pyx
+++ b/python/cuml/ensemble/randomforestclassifier.pyx
@@ -159,6 +159,7 @@ class RandomForestClassifier(BaseRandomForestModel,
          * ``4`` or ``'poisson'`` for poisson half deviance
          * ``5`` or ``'gamma'`` for gamma half deviance
          * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance
+
         only ``0``/``'gini'`` and ``1``/``'entropy'`` valid for classification
     bootstrap : boolean (default = True)
         Control bootstrapping.\n
diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx
index a951b8482f..31124e427a 100644
--- a/python/cuml/ensemble/randomforestregressor.pyx
+++ b/python/cuml/ensemble/randomforestregressor.pyx
@@ -158,6 +158,7 @@ class RandomForestRegressor(BaseRandomForestModel,
          * ``4`` or ``'poisson'`` for poisson half deviance
          * ``5`` or ``'gamma'`` for gamma half deviance
          * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance
+
         ``0``, ``'gini'``, ``1`` and ``'entropy'`` not valid for regression.
     bootstrap : boolean (default = True)
         Control bootstrapping.\n
diff --git a/python/cuml/feature_extraction/_tfidf_vectorizer.py b/python/cuml/feature_extraction/_tfidf_vectorizer.py
index fbeeba7fc2..319523dafe 100644
--- a/python/cuml/feature_extraction/_tfidf_vectorizer.py
+++ b/python/cuml/feature_extraction/_tfidf_vectorizer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -260,6 +260,7 @@ def transform(self, raw_documents):
     def get_feature_names(self):
         """
         Array mapping from feature integer indices to feature name.
+
         Returns
         -------
         feature_names : Series
diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx
index 205261c076..c4e1a5a909 100644
--- a/python/cuml/fil/fil.pyx
+++ b/python/cuml/fil/fil.pyx
@@ -578,7 +578,6 @@ class ForestInference(Base,
 
         Parameters
         ----------
-    {}
         preds : gpuarray or cudf.Series, shape = (n_samples,)
            Optional 'out' location to store inference results
 
@@ -607,7 +606,6 @@ class ForestInference(Base,
 
         Parameters
         ----------
-    {}
         preds : gpuarray or cudf.Series, shape = (n_samples,2)
            Binary probability output
            Optional 'out' location to store inference results
diff --git a/python/cuml/metrics/pairwise_distances.pyx b/python/cuml/metrics/pairwise_distances.pyx
index 4e7a8d1cc3..99162b0e60 100644
--- a/python/cuml/metrics/pairwise_distances.pyx
+++ b/python/cuml/metrics/pairwise_distances.pyx
@@ -341,6 +341,7 @@ def sparse_pairwise_distances(X, Y=None, metric="euclidean", handle=None,
         See the documentation for scipy.spatial.distance for details on these
         metrics.
     - ['inner_product', 'hellinger']
+
     Parameters
     ----------
     X : array-like (device or host) of shape (n_samples_x, n_features)
diff --git a/python/cuml/metrics/pairwise_kernels.py b/python/cuml/metrics/pairwise_kernels.py
index 5416ce36ce..9a1eb7ae7c 100644
--- a/python/cuml/metrics/pairwise_kernels.py
+++ b/python/cuml/metrics/pairwise_kernels.py
@@ -202,9 +202,8 @@ def pairwise_kernels(X, Y=None, metric="linear", *,
     array.
     If Y is given (default is None), then the returned matrix is the pairwise
     kernel between the arrays from both X and Y.
-    Valid values for metric are:
-        ['additive_chi2', 'chi2', 'linear', 'poly', 'polynomial', 'rbf',
-        'laplacian', 'sigmoid', 'cosine']
+    Valid values for metric are: ['additive_chi2', 'chi2', 'linear', 'poly',
+    'polynomial', 'rbf', 'laplacian', 'sigmoid', 'cosine']
 
     Parameters
     ----------
diff --git a/python/cuml/naive_bayes/naive_bayes.py b/python/cuml/naive_bayes/naive_bayes.py
index 616d254eff..4632bd96ea 100644
--- a/python/cuml/naive_bayes/naive_bayes.py
+++ b/python/cuml/naive_bayes/naive_bayes.py
@@ -1524,6 +1524,7 @@ def _check_X(self, X):
 
     def fit(self, X, y, sample_weight=None) -> "CategoricalNB":
         """Fit Naive Bayes classifier according to X, y
+
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
@@ -1539,6 +1540,7 @@ def fit(self, X, y, sample_weight=None) -> "CategoricalNB":
         sample_weight : array-like of shape (n_samples), default=None
             Weights applied to individual samples (1. for unweighted).
             Currently sample weight is ignored.
+
         Returns
         -------
         self : object
@@ -1556,6 +1558,7 @@ def partial_fit(self, X, y, classes=None,
         This method has some performance overhead hence it is better to call
         partial_fit on chunks of data that are as large as possible
         (as long as fitting in the memory budget) to hide the overhead.
+
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
@@ -1575,6 +1578,7 @@ def partial_fit(self, X, y, classes=None,
         sample_weight : array-like of shape (n_samples), default=None
             Weights applied to individual samples (1. for unweighted).
             Currently sample weight is ignored.
+
         Returns
         -------
         self : object
diff --git a/python/cuml/preprocessing/TargetEncoder.py b/python/cuml/preprocessing/TargetEncoder.py
index 767836a244..55a4325696 100644
--- a/python/cuml/preprocessing/TargetEncoder.py
+++ b/python/cuml/preprocessing/TargetEncoder.py
@@ -49,7 +49,7 @@ class TargetEncoder:
         'continuous': consecutive samples are grouped into one folds.
         'interleaved': samples are assign to each fold in a round robin way.
         'customize': customize splitting by providing a `fold_ids` array
-                     in `fit()` or `fit_transform()` functions.
+        in `fit()` or `fit_transform()` functions.
     output_type: {'cupy', 'numpy', 'auto'}, default = 'auto'
         The data type of output. If 'auto', it matches input data.
     stat: {'mean','var'}, default = 'mean'