rapidsai · dantegd · Aug 19, 2020 · Aug 3, 2020 · Aug 4, 2020 · Aug 4, 2020
diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -201,6 +201,12 @@ Mini Batch SGD Regressor
 .. autoclass:: cuml.MBSGDRegressor
     :members:
 
+Mutinomial Naive Bayes
+----------------------
+
+.. autoclass:: cuml.MultinomialNB
+    :members:
+
 Stochastic Gradient Descent
 ---------------------------
 

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -25,7 +25,6 @@ Support for Windows is possible in the near future.
 
    cuml_intro.rst
    cuml_blogs.rst
-   estimator_intro.ipynb
 
 
 Indices and tables

@@ -50,6 +50,8 @@
 from cuml.metrics.cluster.adjustedrandindex import adjusted_rand_score
 from cuml.metrics.regression import r2_score
 
+from cuml.naive_bayes.naive_bayes import MultinomialNB
+
 from cuml.neighbors.nearest_neighbors import NearestNeighbors
 
 from cuml.preprocessing.LabelEncoder import LabelEncoder

@@ -30,6 +30,7 @@ from libc.stdlib cimport calloc, malloc, free
 
 from cuml.common.array import CumlArray
 from cuml.common.base import Base
+from cuml.common.doc_utils import generate_docstring
 from cuml.common.handle cimport cumlHandle
 from cuml.common import input_to_cuml_array
 
@@ -204,19 +205,17 @@ class DBSCAN(Base):
         if self.max_mbytes_per_batch is None:
             self.max_mbytes_per_batch = 0
 
+    @generate_docstring(skip_parameters_heading=True)
     def fit(self, X, out_dtype="int32"):
         """
         Perform DBSCAN clustering from features.
 
         Parameters
         ----------
-        X : array-like (device or host) shape = (n_samples, n_features)
-           Dense matrix (floats or doubles) of shape (n_samples, n_features).
-           Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
-           ndarray, cuda array interface compliant array like CuPy
         out_dtype: dtype Determines the precision of the output labels array.
             default: "int32". Valid values are { "int32", np.int32,
-            "int64", np.int64}. When the number of samples exceed
+            "int64", np.int64}.
+
         """
         self._set_n_features_in(X)
         self._set_output_type(X)
@@ -321,21 +320,21 @@ class DBSCAN(Base):
 
         return self
 
+    @generate_docstring(skip_parameters_heading=True,
+                        return_values={'name': 'preds',
+                                       'type': 'dense',
+                                       'description': 'Cluster labels',
+                                       'shape': '(n_samples, 1)'})
     def fit_predict(self, X, out_dtype="int32"):
         """
-        Performs clustering on input_gdf and returns cluster labels.
+        Performs clustering on X and returns cluster labels.
 
         Parameters
         ----------
-        X : array-like (device or host) shape = (n_samples, n_features)
-          Dense matrix (floats or doubles) of shape (n_samples, n_features)
-          Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
-          ndarray, cuda array interface compliant array like CuPy
-
-        Returns
-        -------
-        y : cuDF Series, shape (n_samples)
-          cluster labels
+        out_dtype: dtype Determines the precision of the output labels array.
+            default: "int32". Valid values are { "int32", np.int32,
+            "int64", np.int64}.
+
         """
         self.fit(X, out_dtype)
         return self.labels_

@@ -31,6 +31,7 @@ from libc.stdlib cimport calloc, malloc, free
 
 from cuml.common.array import CumlArray
 from cuml.common.base import Base
+from cuml.common.doc_utils import generate_docstring
 from cuml.common.handle cimport cumlHandle
 from cuml.common import input_to_cuml_array
 from cuml.cluster.kmeans_utils cimport *
@@ -306,21 +307,11 @@ class KMeans(Base):
         params.n_init = <int>self.n_init
         self._params = params
 
+    @generate_docstring()
     def fit(self, X, sample_weight=None):
         """
         Compute k-means clustering with X.
 
-        Parameters
-        ----------
-        X : array-like (device or host) shape = (n_samples, n_features)
-            Dense matrix (floats or doubles) of shape (n_samples, n_features).
-            Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
-            ndarray, cuda array interface compliant array like CuPy
-
-        sample_weight : array-like (device or host) shape = (n_samples,), default=None # noqa
-            The weights for each observation in X. If None, all observations
-            are assigned equal weight.
-
         """
         self._set_n_features_in(X)
         self._set_output_type(X)
@@ -407,21 +398,14 @@ class KMeans(Base):
         del(sample_weight_m)
         return self
 
+    @generate_docstring(return_values={'name': 'preds',
+                                       'type': 'dense',
+                                       'description': 'Cluster indexes',
+                                       'shape': '(n_samples, 1)'})
     def fit_predict(self, X, sample_weight=None):
         """
         Compute cluster centers and predict cluster index for each sample.
 
-        Parameters
-        ----------
-        X : array-like (device or host) shape = (n_samples, n_features)
-            Dense matrix (floats or doubles) of shape (n_samples, n_features).
-            Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
-            ndarray, cuda array interface compliant array like CuPy
-
-        sample_weight : array-like (device or host) shape = (n_samples,), default=None # noqa
-            The weights for each observation in X. If None, all observations
-            are assigned equal weight.
-
         """
         return self.fit(X, sample_weight=sample_weight).labels_
 
@@ -522,44 +506,28 @@ class KMeans(Base):
         del(sample_weight_m)
         return self._labels_.to_output(out_type), inertia
 
+    @generate_docstring(return_values={'name': 'preds',
+                                       'type': 'dense',
+                                       'description': 'Cluster indexes',
+                                       'shape': '(n_samples, 1)'})
     def predict(self, X, convert_dtype=False, sample_weight=None):
         """
         Predict the closest cluster each sample in X belongs to.
 
-        Parameters
-        ----------
-        X : array-like (device or host) shape = (n_samples, n_features)
-            Dense matrix (floats or doubles) of shape (n_samples, n_features).
-            Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
-            ndarray, cuda array interface compliant array like CuPy
-
-        Returns
-        -------
-        labels : array
-        Which cluster each datapoint belongs to.
         """
 
         labels, _ = self._predict_labels_inertia(X,
                                                  convert_dtype=convert_dtype)
         return labels
 
+    @generate_docstring(return_values={'name': 'X_new',
+                                       'type': 'dense',
+                                       'description': 'Transformed data',
+                                       'shape': '(n_samples, n_clusters)'})
     def transform(self, X, convert_dtype=False):
         """
         Transform X to a cluster-distance space.
 
-        Parameters
-        ----------
-        X : array-like (device or host) shape = (n_samples, n_features)
-            Dense matrix (floats or doubles) of shape (n_samples, n_features).
-            Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
-            ndarray, cuda array interface compliant array like CuPy
-
-        convert_dtype : bool, optional (default = False)
-            When set to True, the transform method will, when necessary,
-            convert the input to the data type which was used to train the
-            model. This will increase memory used for the method.
-
-
         """
 
         out_type = self._get_output_type(X)
@@ -615,41 +583,27 @@ class KMeans(Base):
         del(X_m)
         return preds.to_output(out_type)
 
+    @generate_docstring(return_values={'name': 'score',
+                                       'type': 'float',
+                                       'description': 'Opposite of the value \
+                                                        of X on the K-means \
+                                                        objective.'})
     def score(self, X):
         """
         Opposite of the value of X on the K-means objective.
 
-        Parameters
-        ----------
-        X : array-like (device or host) shape = (n_samples, n_features)
-            Dense matrix (floats or doubles) of shape (n_samples, n_features).
-            Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
-            ndarray, cuda array interface compliant array like CuPy
-
-        Returns
-        -------
-        score: float
-                 Opposite of the value of X on the K-means objective.
         """
 
         return -1 * self._predict_labels_inertia(X)[1]
 
+    @generate_docstring(return_values={'name': 'X_new',
+                                       'type': 'dense',
+                                       'description': 'Transformed data',
+                                       'shape': '(n_samples, n_clusters)'})
     def fit_transform(self, X, convert_dtype=False):
         """
         Compute clustering and transform X to cluster-distance space.
 
-        Parameters
-        ----------
-        X : array-like (device or host) shape = (n_samples, n_features)
-            Dense matrix (floats or doubles) of shape (n_samples, n_features).
-            Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
-            ndarray, cuda array interface compliant array like CuPy
-
-        convert_dtype : bool, optional (default = False)
-            When set to True, the fit_transform method will automatically
-            convert the input to the data type which was used to train the
-            model. This will increase memory used for the method.
-
         """
         return self.fit(X).transform(X, convert_dtype=convert_dtype)
 

@@ -29,6 +29,7 @@ import inspect
 from cudf.core import Series as cuSeries
 from cudf.core import DataFrame as cuDataFrame
 from cuml.common.array import CumlArray
+from cuml.common.doc_utils import generate_docstring
 from cupy import ndarray as cupyArray
 from numba.cuda import devicearray as numbaArray
 from numpy import ndarray as numpyArray
@@ -331,26 +332,16 @@ class RegressorMixin:
 
     _estimator_type = "regressor"
 
+    @generate_docstring(return_values={'name': 'score',
+                                       'type': 'float',
+                                       'description': 'R^2 of self.predict(X) \
+                                                       wrt. y.'})
     def score(self, X, y, **kwargs):
-        """Scoring function for regression estimators
+        """
+        Scoring function for regression estimators
 
         Returns the coefficient of determination R^2 of the prediction.
 
-        Parameters
-        ----------
-        X : array-like (device or host) shape = (n_samples, n_features)
-            Test samples on which we predict
-            Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
-            ndarray, cuda array interface compliant array like CuPy
-        y : array-like (device or host) shape = (n_samples, n_features)
-            Ground truth values for predict(X)
-            Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device
-            ndarray, cuda array interface compliant array like CuPy
-
-        Returns
-        -------
-        score : float
-            R^2 of self.predict(X) wrt. y.
         """
         from cuml.metrics.regression import r2_score
 
@@ -368,21 +359,16 @@ class ClassifierMixin:
 
     _estimator_type = "classifier"
 
+    @generate_docstring(return_values={'name': 'score',
+                                       'type': 'float',
+                                       'description': 'Accuracy of \
+                                                       self.predict(X) wrt. y \
+                                                       (fraction where y == \
+                                                       pred_y)'})
     def score(self, X, y, **kwargs):
         """
         Scoring function for classifier estimators based on mean accuracy.
 
-        Parameters
-        ----------
-        X : [cudf.DataFrame]
-            Test samples on which we predict
-        y : [cudf.Series, device array, or numpy array]
-            Ground truth values for predict(X)
-
-        Returns
-        -------
-        score : float
-            Accuracy of self.predict(X) wrt. y (fraction where y == pred_y)
         """
         from cuml.metrics.accuracy import accuracy_score
         from cuml.common import input_to_dev_array