From fb2aca347c12cf929b8e52bc3a444edf51320b1f Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 17 Nov 2020 17:08:13 -0600 Subject: [PATCH 1/4] FIX Access to attributes of individual NB objects in dask NB --- python/cuml/dask/naive_bayes/naive_bayes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuml/dask/naive_bayes/naive_bayes.py b/python/cuml/dask/naive_bayes/naive_bayes.py index 5a749be30a..6dc9bd10a5 100644 --- a/python/cuml/dask/naive_bayes/naive_bayes.py +++ b/python/cuml/dask/naive_bayes/naive_bayes.py @@ -136,8 +136,8 @@ def _merge_counts_to_model(models): modela = first(models) for model in models[1:]: - modela._feature_count_ += model._feature_count_ - modela._class_count_ += model._class_count_ + modela.feature_count_ += model.feature_count_ + modela.class_count_ += model.class_count_ return modela @staticmethod From 1eecb0239ef5fc41c1bfe44e544ed231221dd2b7 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 17 Nov 2020 17:10:27 -0600 Subject: [PATCH 2/4] DOC Added entry to changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f8bbc6f76..6b83b27a67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,9 +50,10 @@ - PR #3086: Reverting FIL Notebook Testing - PR #3114: Fixed a typo in SVC's predict_proba AttributeError - PR #3117: Fix two crashes in experimental RF backend -- PR #3119: Fix memset args for benchmark +- PR #3119: Fix memset args for benchmark - PR #3130: Return Python string from `dump_as_json()` of RF - PR #3136: Fix stochastic gradient descent example +- PR #3152: Fix access to attributes of individual NB objects in dask NB # cuML 0.16.0 (Date TBD) From dbfe0684c63e478fa0418234455f7f3963fe9cd3 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Wed, 18 Nov 2020 11:34:12 -0600 Subject: [PATCH 3/4] ENH Add pytest --- python/cuml/test/dask/test_naive_bayes.py | 27 +++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/python/cuml/test/dask/test_naive_bayes.py b/python/cuml/test/dask/test_naive_bayes.py index d00b9b9d7e..62cf819ce8 100644 --- a/python/cuml/test/dask/test_naive_bayes.py +++ b/python/cuml/test/dask/test_naive_bayes.py @@ -14,13 +14,15 @@ # limitations under the License. # - -from cuml.test.dask.utils import load_text_corpus - -from sklearn.metrics import accuracy_score +import cupy as cp +import dask.array +import numpy as np from cuml.dask.naive_bayes import MultinomialNB from cuml.naive_bayes.naive_bayes import MultinomialNB as SGNB +from cuml.test.dask.utils import load_text_corpus +from cupy.sparse import csr_matrix as cp_csr_matrix +from sklearn.metrics import accuracy_score def test_basic_fit_predict(client): @@ -74,3 +76,20 @@ def test_score(client): y_local = y.compute() assert(accuracy_score(y_hat_local.get(), y_local) == score) + + +def test_model_multiple_chunks(client): + # tests naive_bayes with n_chunks being greater than one, related to issue + # https://github.com/rapidsai/cuml/issues/3150 + X = cp.array([[0, 0, 0, 1], [1, 0, 0, 1], [1, 0, 0, 0]]) + + X = dask.array.from_array(X, chunks=((1, 1, 1), -1)).astype(cp.int32) + y = dask.array.from_array([1, 0, 0], asarray=False, + fancy=False, chunks=(1)).astype(cp.int32) + + model = MultinomialNB() + model.fit(X, y) + + # this test is a code coverage test, it is too small to be a numeric test, + # but we call score here to exercise the whole model. + assert(0 <= model.score(X, y) <= 1) From d9f876d9f4dc8a4b5de40b91db7e59a7e523294c Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Wed, 18 Nov 2020 11:38:29 -0600 Subject: [PATCH 4/4] FIX PEP8 fixes --- python/cuml/test/dask/test_naive_bayes.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cuml/test/dask/test_naive_bayes.py b/python/cuml/test/dask/test_naive_bayes.py index 62cf819ce8..4064e0f060 100644 --- a/python/cuml/test/dask/test_naive_bayes.py +++ b/python/cuml/test/dask/test_naive_bayes.py @@ -16,12 +16,10 @@ import cupy as cp import dask.array -import numpy as np from cuml.dask.naive_bayes import MultinomialNB from cuml.naive_bayes.naive_bayes import MultinomialNB as SGNB from cuml.test.dask.utils import load_text_corpus -from cupy.sparse import csr_matrix as cp_csr_matrix from sklearn.metrics import accuracy_score