From 164ddbbc1db1f1812c346dae71127557fc537925 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Thu, 17 Feb 2022 02:34:44 -0800 Subject: [PATCH 01/12] Sphinx warnings as errors --- docs/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Makefile b/docs/Makefile index 2d7149631d..122d7ecee9 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -2,7 +2,7 @@ # # You can set these variables from the command line. -SPHINXOPTS = +SPHINXOPTS = "-W" SPHINXBUILD = sphinx-build SPHINXPROJ = cuML SOURCEDIR = source From fd10c27805a9d9e9d620159aac110b9229b2c662 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Thu, 17 Feb 2022 02:55:26 -0800 Subject: [PATCH 02/12] Check exit code on CI --- ci/docs/build.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/docs/build.sh b/ci/docs/build.sh index a71cc790ff..2fb07a6882 100644 --- a/ci/docs/build.sh +++ b/ci/docs/build.sh @@ -51,6 +51,7 @@ gpuci_logger "Build Doxygen docs" gpuci_logger "Build Sphinx docs" cd "$PROJECT_WORKSPACE/docs" make html +RETVAL=$? #Commit to Website cd "$DOCS_WORKSPACE" @@ -65,3 +66,5 @@ done mv "$PROJECT_WORKSPACE/cpp/build/html/"* "$DOCS_WORKSPACE/api/libcuml/$BRANCH_VERSION" mv "$PROJECT_WORKSPACE/docs/build/html/"* "$DOCS_WORKSPACE/api/cuml/$BRANCH_VERSION" + +exit $RETVAL \ No newline at end of file From 80b34af0d989b385e287dc954435af07f10720c5 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Thu, 17 Feb 2022 04:22:56 -0800 Subject: [PATCH 03/12] Fix sphinx doc warnings --- python/cuml/cluster/agglomerative.pyx | 25 +++++++++++-------- python/cuml/cluster/hdbscan.pyx | 10 ++++---- python/cuml/dask/cluster/kmeans.py | 6 +++-- .../dask/ensemble/randomforestclassifier.py | 4 +++ .../dask/ensemble/randomforestregressor.py | 1 + .../cuml/ensemble/randomforestclassifier.pyx | 1 + .../cuml/ensemble/randomforestregressor.pyx | 1 + .../feature_extraction/_tfidf_vectorizer.py | 1 + python/cuml/fil/fil.pyx | 2 -- python/cuml/metrics/pairwise_distances.pyx | 1 + python/cuml/naive_bayes/naive_bayes.py | 4 +++ python/cuml/preprocessing/TargetEncoder.py | 2 +- 12 files changed, 37 insertions(+), 21 deletions(-) diff --git a/python/cuml/cluster/agglomerative.pyx b/python/cuml/cluster/agglomerative.pyx index ea07a92650..10371c5b6d 100644 --- a/python/cuml/cluster/agglomerative.pyx +++ b/python/cuml/cluster/agglomerative.pyx @@ -106,21 +106,24 @@ class AgglomerativeClustering(Base, ClusterMixin, CMajorInputTagMixin): Which linkage criterion to use. The linkage criterion determines which distance to use between sets of observations. The algorithm will merge the pairs of clusters that minimize this criterion. - - 'single' uses the minimum of the distances between all - observations of the two sets. + + * 'single' uses the minimum of the distances between all + observations of the two sets. + n_neighbors : int (default = 15) The number of neighbors to compute when connectivity = "knn" connectivity : {"pairwise", "knn"}, (default = "knn") The type of connectivity matrix to compute. - - 'pairwise' will compute the entire fully-connected graph of - pairwise distances between each set of points. This is the - fastest to compute and can be very fast for smaller datasets - but requires O(n^2) space. - - 'knn' will sparsify the fully-connected connectivity matrix to - save memory and enable much larger inputs. "n_neighbors" will - control the amount of memory used and the graph will be connected - automatically in the event "n_neighbors" was not large enough - to connect it. + * 'pairwise' will compute the entire fully-connected graph of + pairwise distances between each set of points. This is the + fastest to compute and can be very fast for smaller datasets + but requires O(n^2) space. + * 'knn' will sparsify the fully-connected connectivity matrix to + save memory and enable much larger inputs. "n_neighbors" will + control the amount of memory used and the graph will be connected + automatically in the event "n_neighbors" was not large enough + to connect it. + output_type : {'input', 'cudf', 'cupy', 'numpy', 'numba'}, default=None Variable to control output type of the results and attributes of the estimator. If None, it'll inherit the output type set at the diff --git a/python/cuml/cluster/hdbscan.pyx b/python/cuml/cluster/hdbscan.pyx index 395e1f5e94..aa847736ee 100644 --- a/python/cuml/cluster/hdbscan.pyx +++ b/python/cuml/cluster/hdbscan.pyx @@ -291,7 +291,6 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin): alpha : float, optional (default=1.0) A distance scaling parameter as used in robust single linkage. - See [2]_ for more information. verbose : int or boolean, default=False Sets logging level. It must be one of `cuml.common.logger.level_*`. @@ -309,7 +308,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin): cluster_selection_epsilon : float, optional (default=0.0) A distance threshold. Clusters below this value will be merged. - See [3]_ for more information. Note that this should not be used + Note that this should not be used if we want to predict the cluster labels for new points in future (e.g. using approximate_predict), as the approximate_predict function is not aware of this argument. @@ -340,6 +339,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin): to find the most persistent clusters. Alternatively you can instead select the clusters at the leaves of the tree -- this provides the most fine grained and homogeneous clusters. Options are: + * ``eom`` * ``leaf`` @@ -349,17 +349,17 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin): the case that you feel this is a valid result for your dataset. gen_min_span_tree : bool, optional (default=False) - Whether to populate the minimum_spanning_tree_ member for + Whether to populate the `minimum_spanning_tree_` member for utilizing plotting tools. This requires the `hdbscan` CPU Python package to be installed. gen_condensed_tree : bool, optional (default=False) - Whether to populate the condensed_tree_ member for + Whether to populate the `condensed_tree_` member for utilizing plotting tools. This requires the `hdbscan` CPU Python package to be installed. gen_single_linkage_tree_ : bool, optinal (default=False) - Whether to populate the single_linkage_tree_ member for + Whether to populate the `single_linkage_tree_` member for utilizing plotting tools. This requires the `hdbscan` CPU Python package t be installed. diff --git a/python/cuml/dask/cluster/kmeans.py b/python/cuml/dask/cluster/kmeans.py index 690d24b6da..70ad32113c 100644 --- a/python/cuml/dask/cluster/kmeans.py +++ b/python/cuml/dask/cluster/kmeans.py @@ -141,12 +141,14 @@ def fit(self, X, sample_weight=None): X : Dask cuDF DataFrame or CuPy backed Dask Array Training data to cluster. - sample_weight : Dask cuDF DataFrame or CuPy backed Dask Array - shape = (n_samples,), default=None # noqa + sample_weight : Dask cuDF DataFrame or CuPy backed Dask Array \ + shape = (n_samples,), default=None # noqa + The weights for each observation in X. If None, all observations are assigned equal weight. Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device ndarray, cuda array interface compliant array like CuPy + """ sample_weight = self._check_normalize_sample_weight(sample_weight) diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py index abecd72dec..febd8bf318 100755 --- a/python/cuml/dask/ensemble/randomforestclassifier.py +++ b/python/cuml/dask/ensemble/randomforestclassifier.py @@ -82,6 +82,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, * ``4`` or ``'poisson'`` for poisson half deviance * ``5`` or ``'gamma'`` for gamma half deviance * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance + ``2``, ``'mse'``, ``4``, ``'poisson'``, ``5``, ``'gamma'``, ``6``, ``'inverse_gaussian'`` not valid for classification bootstrap : boolean (default = True) @@ -105,6 +106,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, * If ``'sqrt'`` then ``max_features=1/sqrt(n_features)``. * If ``'log2'`` then ``max_features=log2(n_features)/n_features``. * If ``None``, then ``max_features = 1.0``. + n_bins : int (default = 128) Maximum number of bins used by the split algorithm per feature. min_samples_leaf : int or float (default = 1) @@ -114,6 +116,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, * If ``float``, then ``min_samples_leaf`` represents a fraction and ``ceil(min_samples_leaf * n_rows)`` is the minimum number of samples for each leaf node. + min_samples_split : int or float (default = 2) The minimum number of samples required to split an internal node.\n @@ -122,6 +125,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, * If type ``float``, then ``min_samples_split`` represents a fraction and ``ceil(min_samples_split * n_rows)`` is the minimum number of samples for each split. + n_streams : int (default = 4 ) Number of parallel streams used for forest building workers : optional, list of strings diff --git a/python/cuml/dask/ensemble/randomforestregressor.py b/python/cuml/dask/ensemble/randomforestregressor.py index 607f2775d0..6b5c36d4fd 100755 --- a/python/cuml/dask/ensemble/randomforestregressor.py +++ b/python/cuml/dask/ensemble/randomforestregressor.py @@ -75,6 +75,7 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin, * ``4`` or ``'poisson'`` for poisson half deviance * ``5`` or ``'gamma'`` for gamma half deviance * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance + ``0``, ``'gini'``, ``1``, ``'entropy'`` not valid for regression bootstrap : boolean (default = True) Control bootstrapping.\n diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx index 9a24be2995..92de0da07a 100644 --- a/python/cuml/ensemble/randomforestclassifier.pyx +++ b/python/cuml/ensemble/randomforestclassifier.pyx @@ -159,6 +159,7 @@ class RandomForestClassifier(BaseRandomForestModel, * ``4`` or ``'poisson'`` for poisson half deviance * ``5`` or ``'gamma'`` for gamma half deviance * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance + only ``0``/``'gini'`` and ``1``/``'entropy'`` valid for classification bootstrap : boolean (default = True) Control bootstrapping.\n diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx index 078d33b49c..023600f984 100644 --- a/python/cuml/ensemble/randomforestregressor.pyx +++ b/python/cuml/ensemble/randomforestregressor.pyx @@ -158,6 +158,7 @@ class RandomForestRegressor(BaseRandomForestModel, * ``4`` or ``'poisson'`` for poisson half deviance * ``5`` or ``'gamma'`` for gamma half deviance * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance + ``0``, ``'gini'``, ``1`` and ``'entropy'`` not valid for regression. bootstrap : boolean (default = True) Control bootstrapping.\n diff --git a/python/cuml/feature_extraction/_tfidf_vectorizer.py b/python/cuml/feature_extraction/_tfidf_vectorizer.py index fbeeba7fc2..a745d0f717 100644 --- a/python/cuml/feature_extraction/_tfidf_vectorizer.py +++ b/python/cuml/feature_extraction/_tfidf_vectorizer.py @@ -260,6 +260,7 @@ def transform(self, raw_documents): def get_feature_names(self): """ Array mapping from feature integer indices to feature name. + Returns ------- feature_names : Series diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx index d0e6bcad8d..f4040ecacc 100644 --- a/python/cuml/fil/fil.pyx +++ b/python/cuml/fil/fil.pyx @@ -578,7 +578,6 @@ class ForestInference(Base, Parameters ---------- - {} preds : gpuarray or cudf.Series, shape = (n_samples,) Optional 'out' location to store inference results @@ -607,7 +606,6 @@ class ForestInference(Base, Parameters ---------- - {} preds : gpuarray or cudf.Series, shape = (n_samples,2) Binary probability output Optional 'out' location to store inference results diff --git a/python/cuml/metrics/pairwise_distances.pyx b/python/cuml/metrics/pairwise_distances.pyx index 3eb4f916c3..8bcd1dac04 100644 --- a/python/cuml/metrics/pairwise_distances.pyx +++ b/python/cuml/metrics/pairwise_distances.pyx @@ -341,6 +341,7 @@ def sparse_pairwise_distances(X, Y=None, metric="euclidean", handle=None, See the documentation for scipy.spatial.distance for details on these metrics. - ['inner_product', 'hellinger'] + Parameters ---------- X : array-like (device or host) of shape (n_samples_x, n_features) diff --git a/python/cuml/naive_bayes/naive_bayes.py b/python/cuml/naive_bayes/naive_bayes.py index 00b9785b44..b591278d60 100644 --- a/python/cuml/naive_bayes/naive_bayes.py +++ b/python/cuml/naive_bayes/naive_bayes.py @@ -1378,6 +1378,7 @@ def _check_X(self, X): def fit(self, X, y, sample_weight=None) -> "CategoricalNB": """Fit Naive Bayes classifier according to X, y + Parameters ---------- X : array-like of shape (n_samples, n_features) @@ -1393,6 +1394,7 @@ def fit(self, X, y, sample_weight=None) -> "CategoricalNB": sample_weight : array-like of shape (n_samples), default=None Weights applied to individual samples (1. for unweighted). Currently sample weight is ignored. + Returns ------- self : object @@ -1410,6 +1412,7 @@ def partial_fit(self, X, y, classes=None, This method has some performance overhead hence it is better to call partial_fit on chunks of data that are as large as possible (as long as fitting in the memory budget) to hide the overhead. + Parameters ---------- X : array-like of shape (n_samples, n_features) @@ -1429,6 +1432,7 @@ def partial_fit(self, X, y, classes=None, sample_weight : array-like of shape (n_samples), default=None Weights applied to individual samples (1. for unweighted). Currently sample weight is ignored. + Returns ------- self : object diff --git a/python/cuml/preprocessing/TargetEncoder.py b/python/cuml/preprocessing/TargetEncoder.py index 07e63b75ff..755f1b7a59 100644 --- a/python/cuml/preprocessing/TargetEncoder.py +++ b/python/cuml/preprocessing/TargetEncoder.py @@ -49,7 +49,7 @@ class TargetEncoder: 'continuous': consecutive samples are grouped into one folds. 'interleaved': samples are assign to each fold in a round robin way. 'customize': customize splitting by providing a `fold_ids` array - in `fit()` or `fit_transform()` functions. + in `fit()` or `fit_transform()` functions. output_type: {'cupy', 'numpy', 'auto'}, default = 'auto' The data type of output. If 'auto', it matches input data. From 1477bc069087f1ed5b8c283b105b9da9493de29d Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Fri, 18 Feb 2022 05:09:59 -0800 Subject: [PATCH 04/12] Add python docs build step --- ci/gpu/build.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 683a48c5ff..f79c74d3f4 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -269,6 +269,9 @@ else unset LIBCUML_BUILD_DIR $WORKSPACE/build.sh cppdocs -v + gpuci_logger "Building python docs" + $WORKSPACE/build.sh pydocs + fi if [ -n "${CODECOV_TOKEN}" ]; then From 29e10bf819745c9b261669fca9bb354abdd42eb7 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Fri, 18 Feb 2022 05:10:20 -0800 Subject: [PATCH 05/12] Revert "Fix sphinx doc warnings" This reverts commit 80b34af0d989b385e287dc954435af07f10720c5. --- python/cuml/cluster/agglomerative.pyx | 25 ++++++++----------- python/cuml/cluster/hdbscan.pyx | 10 ++++---- python/cuml/dask/cluster/kmeans.py | 6 ++--- .../dask/ensemble/randomforestclassifier.py | 4 --- .../dask/ensemble/randomforestregressor.py | 1 - .../cuml/ensemble/randomforestclassifier.pyx | 1 - .../cuml/ensemble/randomforestregressor.pyx | 1 - .../feature_extraction/_tfidf_vectorizer.py | 1 - python/cuml/fil/fil.pyx | 2 ++ python/cuml/metrics/pairwise_distances.pyx | 1 - python/cuml/naive_bayes/naive_bayes.py | 4 --- python/cuml/preprocessing/TargetEncoder.py | 2 +- 12 files changed, 21 insertions(+), 37 deletions(-) diff --git a/python/cuml/cluster/agglomerative.pyx b/python/cuml/cluster/agglomerative.pyx index 10371c5b6d..ea07a92650 100644 --- a/python/cuml/cluster/agglomerative.pyx +++ b/python/cuml/cluster/agglomerative.pyx @@ -106,24 +106,21 @@ class AgglomerativeClustering(Base, ClusterMixin, CMajorInputTagMixin): Which linkage criterion to use. The linkage criterion determines which distance to use between sets of observations. The algorithm will merge the pairs of clusters that minimize this criterion. - - * 'single' uses the minimum of the distances between all - observations of the two sets. - + - 'single' uses the minimum of the distances between all + observations of the two sets. n_neighbors : int (default = 15) The number of neighbors to compute when connectivity = "knn" connectivity : {"pairwise", "knn"}, (default = "knn") The type of connectivity matrix to compute. - * 'pairwise' will compute the entire fully-connected graph of - pairwise distances between each set of points. This is the - fastest to compute and can be very fast for smaller datasets - but requires O(n^2) space. - * 'knn' will sparsify the fully-connected connectivity matrix to - save memory and enable much larger inputs. "n_neighbors" will - control the amount of memory used and the graph will be connected - automatically in the event "n_neighbors" was not large enough - to connect it. - + - 'pairwise' will compute the entire fully-connected graph of + pairwise distances between each set of points. This is the + fastest to compute and can be very fast for smaller datasets + but requires O(n^2) space. + - 'knn' will sparsify the fully-connected connectivity matrix to + save memory and enable much larger inputs. "n_neighbors" will + control the amount of memory used and the graph will be connected + automatically in the event "n_neighbors" was not large enough + to connect it. output_type : {'input', 'cudf', 'cupy', 'numpy', 'numba'}, default=None Variable to control output type of the results and attributes of the estimator. If None, it'll inherit the output type set at the diff --git a/python/cuml/cluster/hdbscan.pyx b/python/cuml/cluster/hdbscan.pyx index aa847736ee..395e1f5e94 100644 --- a/python/cuml/cluster/hdbscan.pyx +++ b/python/cuml/cluster/hdbscan.pyx @@ -291,6 +291,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin): alpha : float, optional (default=1.0) A distance scaling parameter as used in robust single linkage. + See [2]_ for more information. verbose : int or boolean, default=False Sets logging level. It must be one of `cuml.common.logger.level_*`. @@ -308,7 +309,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin): cluster_selection_epsilon : float, optional (default=0.0) A distance threshold. Clusters below this value will be merged. - Note that this should not be used + See [3]_ for more information. Note that this should not be used if we want to predict the cluster labels for new points in future (e.g. using approximate_predict), as the approximate_predict function is not aware of this argument. @@ -339,7 +340,6 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin): to find the most persistent clusters. Alternatively you can instead select the clusters at the leaves of the tree -- this provides the most fine grained and homogeneous clusters. Options are: - * ``eom`` * ``leaf`` @@ -349,17 +349,17 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin): the case that you feel this is a valid result for your dataset. gen_min_span_tree : bool, optional (default=False) - Whether to populate the `minimum_spanning_tree_` member for + Whether to populate the minimum_spanning_tree_ member for utilizing plotting tools. This requires the `hdbscan` CPU Python package to be installed. gen_condensed_tree : bool, optional (default=False) - Whether to populate the `condensed_tree_` member for + Whether to populate the condensed_tree_ member for utilizing plotting tools. This requires the `hdbscan` CPU Python package to be installed. gen_single_linkage_tree_ : bool, optinal (default=False) - Whether to populate the `single_linkage_tree_` member for + Whether to populate the single_linkage_tree_ member for utilizing plotting tools. This requires the `hdbscan` CPU Python package t be installed. diff --git a/python/cuml/dask/cluster/kmeans.py b/python/cuml/dask/cluster/kmeans.py index 70ad32113c..690d24b6da 100644 --- a/python/cuml/dask/cluster/kmeans.py +++ b/python/cuml/dask/cluster/kmeans.py @@ -141,14 +141,12 @@ def fit(self, X, sample_weight=None): X : Dask cuDF DataFrame or CuPy backed Dask Array Training data to cluster. - sample_weight : Dask cuDF DataFrame or CuPy backed Dask Array \ - shape = (n_samples,), default=None # noqa - + sample_weight : Dask cuDF DataFrame or CuPy backed Dask Array + shape = (n_samples,), default=None # noqa The weights for each observation in X. If None, all observations are assigned equal weight. Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device ndarray, cuda array interface compliant array like CuPy - """ sample_weight = self._check_normalize_sample_weight(sample_weight) diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py index febd8bf318..abecd72dec 100755 --- a/python/cuml/dask/ensemble/randomforestclassifier.py +++ b/python/cuml/dask/ensemble/randomforestclassifier.py @@ -82,7 +82,6 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, * ``4`` or ``'poisson'`` for poisson half deviance * ``5`` or ``'gamma'`` for gamma half deviance * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance - ``2``, ``'mse'``, ``4``, ``'poisson'``, ``5``, ``'gamma'``, ``6``, ``'inverse_gaussian'`` not valid for classification bootstrap : boolean (default = True) @@ -106,7 +105,6 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, * If ``'sqrt'`` then ``max_features=1/sqrt(n_features)``. * If ``'log2'`` then ``max_features=log2(n_features)/n_features``. * If ``None``, then ``max_features = 1.0``. - n_bins : int (default = 128) Maximum number of bins used by the split algorithm per feature. min_samples_leaf : int or float (default = 1) @@ -116,7 +114,6 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, * If ``float``, then ``min_samples_leaf`` represents a fraction and ``ceil(min_samples_leaf * n_rows)`` is the minimum number of samples for each leaf node. - min_samples_split : int or float (default = 2) The minimum number of samples required to split an internal node.\n @@ -125,7 +122,6 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, * If type ``float``, then ``min_samples_split`` represents a fraction and ``ceil(min_samples_split * n_rows)`` is the minimum number of samples for each split. - n_streams : int (default = 4 ) Number of parallel streams used for forest building workers : optional, list of strings diff --git a/python/cuml/dask/ensemble/randomforestregressor.py b/python/cuml/dask/ensemble/randomforestregressor.py index 6b5c36d4fd..607f2775d0 100755 --- a/python/cuml/dask/ensemble/randomforestregressor.py +++ b/python/cuml/dask/ensemble/randomforestregressor.py @@ -75,7 +75,6 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin, * ``4`` or ``'poisson'`` for poisson half deviance * ``5`` or ``'gamma'`` for gamma half deviance * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance - ``0``, ``'gini'``, ``1``, ``'entropy'`` not valid for regression bootstrap : boolean (default = True) Control bootstrapping.\n diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx index 92de0da07a..9a24be2995 100644 --- a/python/cuml/ensemble/randomforestclassifier.pyx +++ b/python/cuml/ensemble/randomforestclassifier.pyx @@ -159,7 +159,6 @@ class RandomForestClassifier(BaseRandomForestModel, * ``4`` or ``'poisson'`` for poisson half deviance * ``5`` or ``'gamma'`` for gamma half deviance * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance - only ``0``/``'gini'`` and ``1``/``'entropy'`` valid for classification bootstrap : boolean (default = True) Control bootstrapping.\n diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx index 023600f984..078d33b49c 100644 --- a/python/cuml/ensemble/randomforestregressor.pyx +++ b/python/cuml/ensemble/randomforestregressor.pyx @@ -158,7 +158,6 @@ class RandomForestRegressor(BaseRandomForestModel, * ``4`` or ``'poisson'`` for poisson half deviance * ``5`` or ``'gamma'`` for gamma half deviance * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance - ``0``, ``'gini'``, ``1`` and ``'entropy'`` not valid for regression. bootstrap : boolean (default = True) Control bootstrapping.\n diff --git a/python/cuml/feature_extraction/_tfidf_vectorizer.py b/python/cuml/feature_extraction/_tfidf_vectorizer.py index a745d0f717..fbeeba7fc2 100644 --- a/python/cuml/feature_extraction/_tfidf_vectorizer.py +++ b/python/cuml/feature_extraction/_tfidf_vectorizer.py @@ -260,7 +260,6 @@ def transform(self, raw_documents): def get_feature_names(self): """ Array mapping from feature integer indices to feature name. - Returns ------- feature_names : Series diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx index f4040ecacc..d0e6bcad8d 100644 --- a/python/cuml/fil/fil.pyx +++ b/python/cuml/fil/fil.pyx @@ -578,6 +578,7 @@ class ForestInference(Base, Parameters ---------- + {} preds : gpuarray or cudf.Series, shape = (n_samples,) Optional 'out' location to store inference results @@ -606,6 +607,7 @@ class ForestInference(Base, Parameters ---------- + {} preds : gpuarray or cudf.Series, shape = (n_samples,2) Binary probability output Optional 'out' location to store inference results diff --git a/python/cuml/metrics/pairwise_distances.pyx b/python/cuml/metrics/pairwise_distances.pyx index 8bcd1dac04..3eb4f916c3 100644 --- a/python/cuml/metrics/pairwise_distances.pyx +++ b/python/cuml/metrics/pairwise_distances.pyx @@ -341,7 +341,6 @@ def sparse_pairwise_distances(X, Y=None, metric="euclidean", handle=None, See the documentation for scipy.spatial.distance for details on these metrics. - ['inner_product', 'hellinger'] - Parameters ---------- X : array-like (device or host) of shape (n_samples_x, n_features) diff --git a/python/cuml/naive_bayes/naive_bayes.py b/python/cuml/naive_bayes/naive_bayes.py index b591278d60..00b9785b44 100644 --- a/python/cuml/naive_bayes/naive_bayes.py +++ b/python/cuml/naive_bayes/naive_bayes.py @@ -1378,7 +1378,6 @@ def _check_X(self, X): def fit(self, X, y, sample_weight=None) -> "CategoricalNB": """Fit Naive Bayes classifier according to X, y - Parameters ---------- X : array-like of shape (n_samples, n_features) @@ -1394,7 +1393,6 @@ def fit(self, X, y, sample_weight=None) -> "CategoricalNB": sample_weight : array-like of shape (n_samples), default=None Weights applied to individual samples (1. for unweighted). Currently sample weight is ignored. - Returns ------- self : object @@ -1412,7 +1410,6 @@ def partial_fit(self, X, y, classes=None, This method has some performance overhead hence it is better to call partial_fit on chunks of data that are as large as possible (as long as fitting in the memory budget) to hide the overhead. - Parameters ---------- X : array-like of shape (n_samples, n_features) @@ -1432,7 +1429,6 @@ def partial_fit(self, X, y, classes=None, sample_weight : array-like of shape (n_samples), default=None Weights applied to individual samples (1. for unweighted). Currently sample weight is ignored. - Returns ------- self : object diff --git a/python/cuml/preprocessing/TargetEncoder.py b/python/cuml/preprocessing/TargetEncoder.py index 755f1b7a59..07e63b75ff 100644 --- a/python/cuml/preprocessing/TargetEncoder.py +++ b/python/cuml/preprocessing/TargetEncoder.py @@ -49,7 +49,7 @@ class TargetEncoder: 'continuous': consecutive samples are grouped into one folds. 'interleaved': samples are assign to each fold in a round robin way. 'customize': customize splitting by providing a `fold_ids` array - in `fit()` or `fit_transform()` functions. + in `fit()` or `fit_transform()` functions. output_type: {'cupy', 'numpy', 'auto'}, default = 'auto' The data type of output. If 'auto', it matches input data. From 9b2bfd82c75264039b408d7c42367a9c816b6db1 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Fri, 18 Feb 2022 05:35:55 -0800 Subject: [PATCH 06/12] Copyright --- ci/docs/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docs/build.sh b/ci/docs/build.sh index 2fb07a6882..b006abae75 100644 --- a/ci/docs/build.sh +++ b/ci/docs/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. ################################# # cuML Docs build script for CI # ################################# From faead481336dd8a593f9543ab5ffc5a95d8612a5 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Fri, 18 Feb 2022 08:39:29 -0800 Subject: [PATCH 07/12] Revert "Revert "Fix sphinx doc warnings"" This reverts commit 29e10bf819745c9b261669fca9bb354abdd42eb7. --- python/cuml/cluster/agglomerative.pyx | 25 +++++++++++-------- python/cuml/cluster/hdbscan.pyx | 10 ++++---- python/cuml/dask/cluster/kmeans.py | 6 +++-- .../dask/ensemble/randomforestclassifier.py | 4 +++ .../dask/ensemble/randomforestregressor.py | 1 + .../cuml/ensemble/randomforestclassifier.pyx | 1 + .../cuml/ensemble/randomforestregressor.pyx | 1 + .../feature_extraction/_tfidf_vectorizer.py | 1 + python/cuml/fil/fil.pyx | 2 -- python/cuml/metrics/pairwise_distances.pyx | 1 + python/cuml/naive_bayes/naive_bayes.py | 4 +++ python/cuml/preprocessing/TargetEncoder.py | 2 +- 12 files changed, 37 insertions(+), 21 deletions(-) diff --git a/python/cuml/cluster/agglomerative.pyx b/python/cuml/cluster/agglomerative.pyx index ea07a92650..10371c5b6d 100644 --- a/python/cuml/cluster/agglomerative.pyx +++ b/python/cuml/cluster/agglomerative.pyx @@ -106,21 +106,24 @@ class AgglomerativeClustering(Base, ClusterMixin, CMajorInputTagMixin): Which linkage criterion to use. The linkage criterion determines which distance to use between sets of observations. The algorithm will merge the pairs of clusters that minimize this criterion. - - 'single' uses the minimum of the distances between all - observations of the two sets. + + * 'single' uses the minimum of the distances between all + observations of the two sets. + n_neighbors : int (default = 15) The number of neighbors to compute when connectivity = "knn" connectivity : {"pairwise", "knn"}, (default = "knn") The type of connectivity matrix to compute. - - 'pairwise' will compute the entire fully-connected graph of - pairwise distances between each set of points. This is the - fastest to compute and can be very fast for smaller datasets - but requires O(n^2) space. - - 'knn' will sparsify the fully-connected connectivity matrix to - save memory and enable much larger inputs. "n_neighbors" will - control the amount of memory used and the graph will be connected - automatically in the event "n_neighbors" was not large enough - to connect it. + * 'pairwise' will compute the entire fully-connected graph of + pairwise distances between each set of points. This is the + fastest to compute and can be very fast for smaller datasets + but requires O(n^2) space. + * 'knn' will sparsify the fully-connected connectivity matrix to + save memory and enable much larger inputs. "n_neighbors" will + control the amount of memory used and the graph will be connected + automatically in the event "n_neighbors" was not large enough + to connect it. + output_type : {'input', 'cudf', 'cupy', 'numpy', 'numba'}, default=None Variable to control output type of the results and attributes of the estimator. If None, it'll inherit the output type set at the diff --git a/python/cuml/cluster/hdbscan.pyx b/python/cuml/cluster/hdbscan.pyx index 395e1f5e94..aa847736ee 100644 --- a/python/cuml/cluster/hdbscan.pyx +++ b/python/cuml/cluster/hdbscan.pyx @@ -291,7 +291,6 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin): alpha : float, optional (default=1.0) A distance scaling parameter as used in robust single linkage. - See [2]_ for more information. verbose : int or boolean, default=False Sets logging level. It must be one of `cuml.common.logger.level_*`. @@ -309,7 +308,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin): cluster_selection_epsilon : float, optional (default=0.0) A distance threshold. Clusters below this value will be merged. - See [3]_ for more information. Note that this should not be used + Note that this should not be used if we want to predict the cluster labels for new points in future (e.g. using approximate_predict), as the approximate_predict function is not aware of this argument. @@ -340,6 +339,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin): to find the most persistent clusters. Alternatively you can instead select the clusters at the leaves of the tree -- this provides the most fine grained and homogeneous clusters. Options are: + * ``eom`` * ``leaf`` @@ -349,17 +349,17 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin): the case that you feel this is a valid result for your dataset. gen_min_span_tree : bool, optional (default=False) - Whether to populate the minimum_spanning_tree_ member for + Whether to populate the `minimum_spanning_tree_` member for utilizing plotting tools. This requires the `hdbscan` CPU Python package to be installed. gen_condensed_tree : bool, optional (default=False) - Whether to populate the condensed_tree_ member for + Whether to populate the `condensed_tree_` member for utilizing plotting tools. This requires the `hdbscan` CPU Python package to be installed. gen_single_linkage_tree_ : bool, optinal (default=False) - Whether to populate the single_linkage_tree_ member for + Whether to populate the `single_linkage_tree_` member for utilizing plotting tools. This requires the `hdbscan` CPU Python package t be installed. diff --git a/python/cuml/dask/cluster/kmeans.py b/python/cuml/dask/cluster/kmeans.py index 690d24b6da..70ad32113c 100644 --- a/python/cuml/dask/cluster/kmeans.py +++ b/python/cuml/dask/cluster/kmeans.py @@ -141,12 +141,14 @@ def fit(self, X, sample_weight=None): X : Dask cuDF DataFrame or CuPy backed Dask Array Training data to cluster. - sample_weight : Dask cuDF DataFrame or CuPy backed Dask Array - shape = (n_samples,), default=None # noqa + sample_weight : Dask cuDF DataFrame or CuPy backed Dask Array \ + shape = (n_samples,), default=None # noqa + The weights for each observation in X. If None, all observations are assigned equal weight. Acceptable formats: cuDF DataFrame, NumPy ndarray, Numba device ndarray, cuda array interface compliant array like CuPy + """ sample_weight = self._check_normalize_sample_weight(sample_weight) diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py index abecd72dec..febd8bf318 100755 --- a/python/cuml/dask/ensemble/randomforestclassifier.py +++ b/python/cuml/dask/ensemble/randomforestclassifier.py @@ -82,6 +82,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, * ``4`` or ``'poisson'`` for poisson half deviance * ``5`` or ``'gamma'`` for gamma half deviance * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance + ``2``, ``'mse'``, ``4``, ``'poisson'``, ``5``, ``'gamma'``, ``6``, ``'inverse_gaussian'`` not valid for classification bootstrap : boolean (default = True) @@ -105,6 +106,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, * If ``'sqrt'`` then ``max_features=1/sqrt(n_features)``. * If ``'log2'`` then ``max_features=log2(n_features)/n_features``. * If ``None``, then ``max_features = 1.0``. + n_bins : int (default = 128) Maximum number of bins used by the split algorithm per feature. min_samples_leaf : int or float (default = 1) @@ -114,6 +116,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, * If ``float``, then ``min_samples_leaf`` represents a fraction and ``ceil(min_samples_leaf * n_rows)`` is the minimum number of samples for each leaf node. + min_samples_split : int or float (default = 2) The minimum number of samples required to split an internal node.\n @@ -122,6 +125,7 @@ class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, * If type ``float``, then ``min_samples_split`` represents a fraction and ``ceil(min_samples_split * n_rows)`` is the minimum number of samples for each split. + n_streams : int (default = 4 ) Number of parallel streams used for forest building workers : optional, list of strings diff --git a/python/cuml/dask/ensemble/randomforestregressor.py b/python/cuml/dask/ensemble/randomforestregressor.py index 607f2775d0..6b5c36d4fd 100755 --- a/python/cuml/dask/ensemble/randomforestregressor.py +++ b/python/cuml/dask/ensemble/randomforestregressor.py @@ -75,6 +75,7 @@ class RandomForestRegressor(BaseRandomForestModel, DelayedPredictionMixin, * ``4`` or ``'poisson'`` for poisson half deviance * ``5`` or ``'gamma'`` for gamma half deviance * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance + ``0``, ``'gini'``, ``1``, ``'entropy'`` not valid for regression bootstrap : boolean (default = True) Control bootstrapping.\n diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx index 9a24be2995..92de0da07a 100644 --- a/python/cuml/ensemble/randomforestclassifier.pyx +++ b/python/cuml/ensemble/randomforestclassifier.pyx @@ -159,6 +159,7 @@ class RandomForestClassifier(BaseRandomForestModel, * ``4`` or ``'poisson'`` for poisson half deviance * ``5`` or ``'gamma'`` for gamma half deviance * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance + only ``0``/``'gini'`` and ``1``/``'entropy'`` valid for classification bootstrap : boolean (default = True) Control bootstrapping.\n diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx index 078d33b49c..023600f984 100644 --- a/python/cuml/ensemble/randomforestregressor.pyx +++ b/python/cuml/ensemble/randomforestregressor.pyx @@ -158,6 +158,7 @@ class RandomForestRegressor(BaseRandomForestModel, * ``4`` or ``'poisson'`` for poisson half deviance * ``5`` or ``'gamma'`` for gamma half deviance * ``6`` or ``'inverse_gaussian'`` for inverse gaussian deviance + ``0``, ``'gini'``, ``1`` and ``'entropy'`` not valid for regression. bootstrap : boolean (default = True) Control bootstrapping.\n diff --git a/python/cuml/feature_extraction/_tfidf_vectorizer.py b/python/cuml/feature_extraction/_tfidf_vectorizer.py index fbeeba7fc2..a745d0f717 100644 --- a/python/cuml/feature_extraction/_tfidf_vectorizer.py +++ b/python/cuml/feature_extraction/_tfidf_vectorizer.py @@ -260,6 +260,7 @@ def transform(self, raw_documents): def get_feature_names(self): """ Array mapping from feature integer indices to feature name. + Returns ------- feature_names : Series diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx index d0e6bcad8d..f4040ecacc 100644 --- a/python/cuml/fil/fil.pyx +++ b/python/cuml/fil/fil.pyx @@ -578,7 +578,6 @@ class ForestInference(Base, Parameters ---------- - {} preds : gpuarray or cudf.Series, shape = (n_samples,) Optional 'out' location to store inference results @@ -607,7 +606,6 @@ class ForestInference(Base, Parameters ---------- - {} preds : gpuarray or cudf.Series, shape = (n_samples,2) Binary probability output Optional 'out' location to store inference results diff --git a/python/cuml/metrics/pairwise_distances.pyx b/python/cuml/metrics/pairwise_distances.pyx index 3eb4f916c3..8bcd1dac04 100644 --- a/python/cuml/metrics/pairwise_distances.pyx +++ b/python/cuml/metrics/pairwise_distances.pyx @@ -341,6 +341,7 @@ def sparse_pairwise_distances(X, Y=None, metric="euclidean", handle=None, See the documentation for scipy.spatial.distance for details on these metrics. - ['inner_product', 'hellinger'] + Parameters ---------- X : array-like (device or host) of shape (n_samples_x, n_features) diff --git a/python/cuml/naive_bayes/naive_bayes.py b/python/cuml/naive_bayes/naive_bayes.py index 00b9785b44..b591278d60 100644 --- a/python/cuml/naive_bayes/naive_bayes.py +++ b/python/cuml/naive_bayes/naive_bayes.py @@ -1378,6 +1378,7 @@ def _check_X(self, X): def fit(self, X, y, sample_weight=None) -> "CategoricalNB": """Fit Naive Bayes classifier according to X, y + Parameters ---------- X : array-like of shape (n_samples, n_features) @@ -1393,6 +1394,7 @@ def fit(self, X, y, sample_weight=None) -> "CategoricalNB": sample_weight : array-like of shape (n_samples), default=None Weights applied to individual samples (1. for unweighted). Currently sample weight is ignored. + Returns ------- self : object @@ -1410,6 +1412,7 @@ def partial_fit(self, X, y, classes=None, This method has some performance overhead hence it is better to call partial_fit on chunks of data that are as large as possible (as long as fitting in the memory budget) to hide the overhead. + Parameters ---------- X : array-like of shape (n_samples, n_features) @@ -1429,6 +1432,7 @@ def partial_fit(self, X, y, classes=None, sample_weight : array-like of shape (n_samples), default=None Weights applied to individual samples (1. for unweighted). Currently sample weight is ignored. + Returns ------- self : object diff --git a/python/cuml/preprocessing/TargetEncoder.py b/python/cuml/preprocessing/TargetEncoder.py index 07e63b75ff..755f1b7a59 100644 --- a/python/cuml/preprocessing/TargetEncoder.py +++ b/python/cuml/preprocessing/TargetEncoder.py @@ -49,7 +49,7 @@ class TargetEncoder: 'continuous': consecutive samples are grouped into one folds. 'interleaved': samples are assign to each fold in a round robin way. 'customize': customize splitting by providing a `fold_ids` array - in `fit()` or `fit_transform()` functions. + in `fit()` or `fit_transform()` functions. output_type: {'cupy', 'numpy', 'auto'}, default = 'auto' The data type of output. If 'auto', it matches input data. From 199cf347b1de5be4b1e93a0c39c8c684ce27ea58 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Fri, 18 Feb 2022 10:22:24 -0800 Subject: [PATCH 08/12] Copyright --- python/cuml/cluster/agglomerative.pyx | 2 +- python/cuml/cluster/hdbscan.pyx | 2 +- python/cuml/dask/cluster/kmeans.py | 2 +- python/cuml/feature_extraction/_tfidf_vectorizer.py | 2 +- python/cuml/fil/fil.pyx | 2 +- python/cuml/metrics/pairwise_distances.pyx | 2 +- python/cuml/naive_bayes/naive_bayes.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/cuml/cluster/agglomerative.pyx b/python/cuml/cluster/agglomerative.pyx index 10371c5b6d..a3c6fd9183 100644 --- a/python/cuml/cluster/agglomerative.pyx +++ b/python/cuml/cluster/agglomerative.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/cuml/cluster/hdbscan.pyx b/python/cuml/cluster/hdbscan.pyx index aa847736ee..b2b94816d8 100644 --- a/python/cuml/cluster/hdbscan.pyx +++ b/python/cuml/cluster/hdbscan.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/cuml/dask/cluster/kmeans.py b/python/cuml/dask/cluster/kmeans.py index 70ad32113c..c2e85725aa 100644 --- a/python/cuml/dask/cluster/kmeans.py +++ b/python/cuml/dask/cluster/kmeans.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/cuml/feature_extraction/_tfidf_vectorizer.py b/python/cuml/feature_extraction/_tfidf_vectorizer.py index a745d0f717..319523dafe 100644 --- a/python/cuml/feature_extraction/_tfidf_vectorizer.py +++ b/python/cuml/feature_extraction/_tfidf_vectorizer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx index f4040ecacc..ba5cab67b7 100644 --- a/python/cuml/fil/fil.pyx +++ b/python/cuml/fil/fil.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/cuml/metrics/pairwise_distances.pyx b/python/cuml/metrics/pairwise_distances.pyx index 8bcd1dac04..8346f9b562 100644 --- a/python/cuml/metrics/pairwise_distances.pyx +++ b/python/cuml/metrics/pairwise_distances.pyx @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/cuml/naive_bayes/naive_bayes.py b/python/cuml/naive_bayes/naive_bayes.py index b591278d60..708b349f19 100644 --- a/python/cuml/naive_bayes/naive_bayes.py +++ b/python/cuml/naive_bayes/naive_bayes.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From dac023805a3f6b1b349109e899e0d7e84a3bf203 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Wed, 9 Mar 2022 11:29:46 +0100 Subject: [PATCH 09/12] Update ci/gpu/build.sh Co-authored-by: Dante Gama Dessavre --- ci/gpu/build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 8c3ada6f5f..14fe6cf17f 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -284,7 +284,8 @@ else unset LIBCUML_BUILD_DIR $WORKSPACE/build.sh cppdocs -v - gpuci_logger "Building python docs" + if [ "$CUDA_REL" == "11.0" ];then + gpuci_logger "Building python docs" $WORKSPACE/build.sh pydocs fi From 5a1533f2bb0700e5c8c1141a3eb6f1fa5311adfe Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Thu, 10 Mar 2022 02:19:39 -0800 Subject: [PATCH 10/12] Fix build file --- ci/gpu/build.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 14fe6cf17f..4e2dc4cab5 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -284,9 +284,10 @@ else unset LIBCUML_BUILD_DIR $WORKSPACE/build.sh cppdocs -v - if [ "$CUDA_REL" == "11.0" ];then + if [ "$CUDA_REL" == "11.0" ]; then gpuci_logger "Building python docs" - $WORKSPACE/build.sh pydocs + $WORKSPACE/build.sh pydocs + fi fi From d6bf563b9f97e3327f03a8c2cf5499009a8a1373 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Mon, 14 Mar 2022 18:14:07 +0100 Subject: [PATCH 11/12] Update ci/gpu/build.sh Co-authored-by: Dante Gama Dessavre --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 4e2dc4cab5..2b05445b1a 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -284,7 +284,7 @@ else unset LIBCUML_BUILD_DIR $WORKSPACE/build.sh cppdocs -v - if [ "$CUDA_REL" == "11.0" ]; then + if [ "$CUDA_REL" != "11.0" ]; then gpuci_logger "Building python docs" $WORKSPACE/build.sh pydocs fi From 65e83519013282ab8e4133b7eb252b484793d396 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Wed, 16 Mar 2022 04:32:36 -0700 Subject: [PATCH 12/12] Fix pairwise_kernels docstring --- python/cuml/metrics/pairwise_kernels.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/cuml/metrics/pairwise_kernels.py b/python/cuml/metrics/pairwise_kernels.py index 5416ce36ce..9a1eb7ae7c 100644 --- a/python/cuml/metrics/pairwise_kernels.py +++ b/python/cuml/metrics/pairwise_kernels.py @@ -202,9 +202,8 @@ def pairwise_kernels(X, Y=None, metric="linear", *, array. If Y is given (default is None), then the returned matrix is the pairwise kernel between the arrays from both X and Y. - Valid values for metric are: - ['additive_chi2', 'chi2', 'linear', 'poly', 'polynomial', 'rbf', - 'laplacian', 'sigmoid', 'cosine'] + Valid values for metric are: ['additive_chi2', 'chi2', 'linear', 'poly', + 'polynomial', 'rbf', 'laplacian', 'sigmoid', 'cosine'] Parameters ----------