rapidsai · dantegd · Aug 12, 2020 · Aug 7, 2020 · Aug 7, 2020 · Aug 7, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -132,6 +132,7 @@
 - PR #2616: Small test code fix for pandas dtype tests
 - PR #2625: Update Estimator notebook to resolve errors
 - PR #2634: singlegpu build option fixes
+- PR #2649: Cleanup sphinx doc warnings for 0.15
 
 # cuML 0.14.0 (03 Jun 2020)
 

diff --git a/docs/source/_static/references.css b/docs/source/_static/references.css
@@ -0,0 +1,23 @@
+
+/* Fix references to not look like parameters */
+dl.citation > dt.label {
+  display: unset !important;
+  float: left !important;
+  border: unset !important;
+  background: unset !important;
+  padding: unset !important;
+  margin: unset !important;
+  font-size: unset !important;
+  line-height: unset !important;
+  padding-right: 0.5rem !important;
+}
+
+/* Add opening bracket */
+dl.citation > dt.label > span::before {
+  content: "[";
+}
+
+/* Add closing bracket */
+dl.citation > dt.label > span::after {
+  content: "]";
+}
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -187,4 +187,7 @@
 
 
 def setup(app):
+    app.add_css_file('copybutton.css')
     app.add_css_file('params.css')
+    app.add_css_file('references.css')
+
@@ -93,7 +93,7 @@ class DBSCAN(Base):
     neighbours.
 
     Examples
-    ---------
+    --------
 
     .. code-block:: python
 

@@ -31,16 +31,16 @@ class CumlArray(Buffer):
     """
     Array represents an abstracted array allocation. It can be instantiated by
     itself, creating an rmm.DeviceBuffer underneath, or can be instantiated by
-    __cuda_array_interface__ or __array_interface__ compliant arrays, in which
-    case it'll keep a reference to that data underneath. Also can be created
-    from a pointer, specifying the characteristics of the array, in that case
-    the owner of the data referred to by the pointer should be specified
-    explicitly.
+    ``__cuda_array_interface__`` or ``__array_interface__`` compliant arrays,
+    in which case it'll keep a reference to that data underneath. Also can be
+    created from a pointer, specifying the characteristics of the array, in
+    that case the owner of the data referred to by the pointer should be
+    specified explicitly.
 
     Parameters
     ----------
 
-    data : rmm.DeviceBuffer, cudf.Buffer, array_like, int, bytes, bytearray or
+    data : rmm.DeviceBuffer, cudf.Buffer, array_like, int, bytes, bytearray or\
            memoryview
         An array-like object or integer representing a
         device or host pointer to pre-allocated memory.
@@ -71,7 +71,7 @@ class CumlArray(Buffer):
     strides : tuple of ints
         Strides of the data
     __cuda_array_interface__ : dictionary
-        __cuda_array_interface__ to interop with other libraries.
+        ``__cuda_array_interface__`` to interop with other libraries.
 
     Object Methods
     --------------
@@ -204,12 +204,12 @@ def to_output(self, output_type='cupy', output_dtype=None):
         ----------
         output_type : string
             Format to convert the array to. Acceptable formats are:
-            'cupy' - to cupy array
-            'numpy' - to numpy (host) array
-            'numba' - to numba device array
-            'dataframe' - to cuDF DataFrame
-            'series' - to cuDF Series
-            'cudf' - to cuDF Series if array is single dimensional, to
+             - 'cupy' - to cupy array
+             - 'numpy' - to numpy (host) array
+             - 'numba' - to numba device array
+             - 'dataframe' - to cuDF DataFrame
+             - 'series' - to cuDF Series
+             - 'cudf' - to cuDF Series if array is single dimensional, to \
                 DataFrame otherwise
         output_dtype : string, optional
             Optionally cast the array to a specified dtype, creating

@@ -66,8 +66,8 @@ def cuda_kernel_factory(nvrtc_kernel_str, dtypes, kernel_name=None):
     included in the kernel string. These will be added by this function and
     the function name will be made unique, based on the given dtypes.
 
-    Example
-    -------
+    Examples
+    --------
 
         The following kernel string with dtypes = [float, double, int]
 

@@ -97,7 +97,6 @@ def set_level(level):
 
     .. code-block:: python
 
-
         # regular usage of setting a logging level for all subsequent logs
         # in this case, it will enable all logs upto and including `info()`
         logger.set_level(logger.level_info)
@@ -147,7 +146,6 @@ def set_pattern(pattern):
 
     .. code-block:: python
 
-
         # regular usage of setting a logging pattern for all subsequent logs
         logger.set_pattern("--> [%H-%M-%S] %v")
 

@@ -76,13 +76,13 @@ def rmm_cupy_ary(cupy_fn, *args, **kwargs):
         Keyword named arguments to pass to the CuPy function
 
 
-    Note: this function should be used if the result of cupy_fn creates
+    .. note:: this function should be used if the result of cupy_fn creates
     a new array. Functions to create a new CuPy array by reference to
     existing device array (through __cuda_array_interface__) can be used
     directly.
 
     Examples
-    ---------
+    --------
 
     .. code-block:: python
 

@@ -114,7 +114,7 @@ def build_rank_size_pair(parts_to_sizes, rank):
     parts_to_sizes: array of tuples in the format: [(rank,size)]
     rank: rank to be mapped
 
-    Returns:
+    Returns
     --------
     ptr: vector pointer of the RankSizePair*
     """
@@ -162,7 +162,7 @@ def build_part_descriptor(m, n, rank_size_t, rank):
         building the part descriptor
     rank: rank to be mapped
 
-    Returns:
+    Returns
     --------
     ptr: PartDescriptor object
     """

@@ -150,7 +150,8 @@ def _insert_zeros(ary, zero_indices):
     Create a new array of len(ary + zero_indices) where zero_indices
     indicates indexes of 0s in the new array. Ary is used to fill the rest.
 
-    Example:
+    Examples
+    --------
         _insert_zeros([1, 2, 3], [1, 3]) => [1, 0, 2, 0, 3]
     """
     if len(zero_indices) == 0:

@@ -60,7 +60,7 @@ class KMeans(BaseEstimator, DelayedPredictionMixin, DelayedTransformMixin):
     random_state : int (default = 1)
         If you want results to be the same when you restart Python,
         select a state.
-    init : {'scalable-kmeans++', 'k-means||' , 'random' or an ndarray}
+    init : {'scalable-kmeans++', 'k-means||' , 'random' or an ndarray} \
            (default = 'scalable-k-means++')
         'scalable-k-means++' or 'k-means||': Uses fast and stable scalable
         kmeans++ intialization.

@@ -41,16 +41,18 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
                         class_sep=1.0, hypercube=True, shift=0.0, scale=1.0,
                         shuffle=True, random_state=None, order='F',
                         dtype='float32', n_parts=None, client=None):
-    """Generate a random n-class classification problem.
+    """
+    Generate a random n-class classification problem.
 
     This initially creates clusters of points normally distributed (std=1)
-    about vertices of an ``n_informative``-dimensional hypercube with sides of
-    length ``2*class_sep`` and assigns an equal number of clusters to each
+    about vertices of an `n_informative`-dimensional hypercube with sides of
+    length ``2 * class_sep`` and assigns an equal number of clusters to each
     class. It introduces interdependence between these features and adds
     various types of further noise to the data.
+
     Without shuffling, ``X`` horizontally stacks features in the following
-    order: the primary ``n_informative`` features, followed by ``n_redundant``
-    linear combinations of the informative features, followed by ``n_repeated``
+    order: the primary `n_informative` features, followed by `n_redundant`
+    linear combinations of the informative features, followed by `n_repeated`
     duplicates, drawn randomly with replacement from the informative and
     redundant features. The remaining features are filled with random noise.
     Thus, without shuffling, all useful features are contained in the columns
@@ -99,15 +101,15 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
     n_samples : int, optional (default=100)
         The number of samples.
     n_features : int, optional (default=20)
-        The total number of features. These comprise ``n_informative``
-        informative features, ``n_redundant`` redundant features,
-        ``n_repeated`` duplicated features and
+        The total number of features. These comprise `n_informative`
+        informative features, `n_redundant` redundant features,
+        `n_repeated` duplicated features and
         ``n_features-n_informative-n_redundant-n_repeated`` useless features
         drawn at random.
     n_informative : int, optional (default=2)
         The number of informative features. Each class is composed of a number
         of gaussian clusters each located around the vertices of a hypercube
-        in a subspace of dimension ``n_informative``. For each cluster,
+        in a subspace of dimension `n_informative`. For each cluster,
         informative features are drawn independently from  N(0, 1) and then
         randomly linearly combined within each cluster in order to add
         covariance. The clusters are then placed on the vertices of the
@@ -122,13 +124,13 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
         The number of classes (or labels) of the classification problem.
     n_clusters_per_class : int, optional (default=2)
         The number of clusters per class.
-    weights : array-like of shape (n_classes,) or (n_classes - 1,),\
-              (default=None)
+    weights : array-like of shape ``(n_classes,)`` or ``(n_classes - 1,)``, \
+        (default=None)
         The proportions of samples assigned to each class. If None, then
         classes are balanced. Note that if ``len(weights) == n_classes - 1``,
         then the last class weight is automatically inferred.
-        More than ``n_samples`` samples may be returned if the sum of
-        ``weights`` exceeds 1.
+        More than `n_samples` samples may be returned if the sum of
+        `weights` exceeds 1.
     flip_y : float, optional (default=0.01)
         The fraction of samples whose class is assigned randomly. Larger
         values introduce noise in the labels and make the classification
@@ -171,17 +173,18 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
     -----
     How we extended the dask MNMG version from the single GPU version:
 
-        1. We generate centroids of shape (n_centroids, n_informative)
-        2. We generate an informative covariance of shape
-           (n_centroids, n_informative, n_informative)
-        3. We generate a redundant covariance of shape
-           (n_informative, n_redundant)
-        4. We generate the indices for the repeated features
-        We pass along the references to the futures of the above arrays
-        with each part to the single GPU
-        `cuml.datasets.classification.make_classification` so that each
-        part (and worker) has access to the correct values to generate
-        data from the same covariances
+    1. We generate centroids of shape ``(n_centroids, n_informative)``
+    2. We generate an informative covariance of shape \
+        ``(n_centroids, n_informative, n_informative)``
+    3. We generate a redundant covariance of shape \
+        ``(n_informative, n_redundant)``
+    4. We generate the indices for the repeated features \
+    We pass along the references to the futures of the above arrays \
+    with each part to the single GPU \
+    `cuml.datasets.classification.make_classification` so that each \
+    part (and worker) has access to the correct values to generate \
+    data from the same covariances
+
     """
 
     client = get_client(client=client)

@@ -223,7 +223,7 @@ def make_low_rank_matrix(n_samples=100, n_features=100,
     tail_strength : float between 0.0 and 1.0, optional (default=0.5)
         The relative importance of the fat noisy tail of the singular values
         profile.
-    random_state : int, CuPy RandomState instance, Dask RandomState instance
+    random_state : int, CuPy RandomState instance, Dask RandomState instance \
                    or None (default)
         Determines random number generation for dataset creation. Pass an int
         for reproducible output across multiple function calls.
@@ -236,6 +236,7 @@ def make_low_rank_matrix(n_samples=100, n_features=100,
     -------
     X : Dask-CuPy array of shape [n_samples, n_features]
         The matrix.
+
     """
 
     rs = _create_rs_generator(random_state)
@@ -276,7 +277,9 @@ def make_regression(n_samples=100, n_features=100, n_informative=10,
                     random_state=None, n_parts=1, n_samples_per_part=None,
                     order='F', dtype='float32', client=None,
                     use_full_low_rank=True):
-    """Generate a random regression problem.
+    """
+    Generate a random regression problem.
+
     The input set can either be well conditioned (by default) or have a low
     rank-fat tail singular profile.
 
@@ -305,9 +308,11 @@ def make_regression(n_samples=100, n_features=100, n_informative=10,
             of the input data by linear combinations. Using this kind of
             singular spectrum in the input allows the generator to reproduce
             the correlations often observed in practice.
+
         if None:
             The input set is well conditioned, centered and gaussian with
             unit variance.
+
     tail_strength : float between 0.0 and 1.0, optional (default=0.5)
         The relative importance of the fat noisy tail of the singular values
         profile if "effective_rank" is not None.
@@ -317,7 +322,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10,
         Shuffle the samples and the features.
     coef : boolean, optional (default=False)
         If True, the coefficients of the underlying linear model are returned.
-    random_state : int, CuPy RandomState instance, Dask RandomState instance
+    random_state : int, CuPy RandomState instance, Dask RandomState instance \
                    or None (default)
         Determines random number generation for dataset creation. Pass an int
         for reproducible output across multiple function calls.
@@ -339,26 +344,26 @@ def make_regression(n_samples=100, n_features=100, n_informative=10,
         The input samples.
     y : Dask-CuPy array of shape [n_samples] or [n_samples, n_targets]
         The output values.
-    coef : Dask-CuPy array of shape [n_features]
+    coef : Dask-CuPy array of shape [n_features] \
            or [n_features, n_targets], optional
         The coefficient of the underlying linear model. It is returned only if
         coef is True.
 
     Notes
     -----
-    - Known Performance Limitations:
-        1. When `effective_rank` is set and `use_full_low_rank` is True,
-           we cannot generate order `F` by construction, and an explicit
-           transpose is performed on each part. This may cause memory to spike
-           (other parameters make order `F` by construction)
-        2. When `n_targets > 1` and `order = 'F'` as above, we have to
-           explicity transpose the `y` array. If `coef = True`, then we also
-           explicity transpose the `ground_truth` array
-        3. When `shuffle = True` and `order = F`, there are memory spikes to
-           shuffle the `F` order arrays
-
-    - NOTE: If out-of-memory errors are encountered in any of the above \
-          configurations, try increasing the `n_parts` parameter.
+    Known Performance Limitations:
+     1. When `effective_rank` is set and `use_full_low_rank` is True, \
+        we cannot generate order `F` by construction, and an explicit \
+        transpose is performed on each part. This may cause memory to spike \
+        (other parameters make order `F` by construction)
+     2. When `n_targets > 1` and `order = 'F'` as above, we have to \
+        explicity transpose the `y` array. If `coef = True`, then we also \
+        explicity transpose the `ground_truth` array
+     3. When `shuffle = True` and `order = F`, there are memory spikes to \
+        shuffle the `F` order arrays
+
+    .. note:: If out-of-memory errors are encountered in any of the above
+        configurations, try increasing the `n_parts` parameter.
     """
 
     client = get_client(client=client)

@@ -37,7 +37,7 @@ class PCA(BaseDecomposition,
     then selects the top K eigenvectors.
 
     Examples
-    ---------
+    --------
 
     .. code-block:: python
 
@@ -92,8 +92,8 @@ class PCA(BaseDecomposition,
                     1  0.011454
                     2 -0.008182
 
-    Note: Everytime this code is run, the output will be different because
-          "make_blobs" function generates random matrices.
+    .. note:: Everytime this code is run, the output will be different because
+        "make_blobs" function generates random matrices.
 
     Parameters
     ----------
-Original file line number
+Diff line change
@@ Expand Up / @@ -93,7 +93,7 @@ class DBSCAN(Base): @@
         neighbours.
         Examples
-        ---------
+        --------
         .. code-block:: python
@@ Expand Down @@