From 107874626ddfeb0ddceeb3b6adb2f73f74fa67ca Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <dante.gamadessavre@gmail.com>
Date: Fri, 20 Nov 2020 16:56:54 -0600
Subject: [PATCH] [REVIEW] Fix access to attributes of individual NB objects in
 dask NB (#3152)

* FIX Access to attributes of individual NB objects in dask NB

* DOC Added entry to changelog

* ENH Add pytest

* FIX PEP8 fixes

Co-authored-by: John Zedlewski <904524+JohnZed@users.noreply.github.com>
---
 CHANGELOG.md                                |  1 +
 python/cuml/dask/naive_bayes/naive_bayes.py |  4 ++--
 python/cuml/test/dask/test_naive_bayes.py   | 25 +++++++++++++++++----
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e430dd4a08..bacb261246 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -63,6 +63,7 @@
 - PR #3130: Return Python string from `dump_as_json()` of RF
 - PR #3132: Add `min_samples_split` + Rename `min_rows_per_node` -> `min_samples_leaf`
 - PR #3136: Fix stochastic gradient descent example
+- PR #3152: Fix access to attributes of individual NB objects in dask NB
 - PR #3156: Force local conda artifact install
 - PR #3162: Removing accidentally checked in debug file
 
diff --git a/python/cuml/dask/naive_bayes/naive_bayes.py b/python/cuml/dask/naive_bayes/naive_bayes.py
index 5a749be30a..6dc9bd10a5 100644
--- a/python/cuml/dask/naive_bayes/naive_bayes.py
+++ b/python/cuml/dask/naive_bayes/naive_bayes.py
@@ -136,8 +136,8 @@ def _merge_counts_to_model(models):
         modela = first(models)
 
         for model in models[1:]:
-            modela._feature_count_ += model._feature_count_
-            modela._class_count_ += model._class_count_
+            modela.feature_count_ += model.feature_count_
+            modela.class_count_ += model.class_count_
         return modela
 
     @staticmethod
diff --git a/python/cuml/test/dask/test_naive_bayes.py b/python/cuml/test/dask/test_naive_bayes.py
index d00b9b9d7e..4064e0f060 100644
--- a/python/cuml/test/dask/test_naive_bayes.py
+++ b/python/cuml/test/dask/test_naive_bayes.py
@@ -14,13 +14,13 @@
 # limitations under the License.
 #
 
-
-from cuml.test.dask.utils import load_text_corpus
-
-from sklearn.metrics import accuracy_score
+import cupy as cp
+import dask.array
 
 from cuml.dask.naive_bayes import MultinomialNB
 from cuml.naive_bayes.naive_bayes import MultinomialNB as SGNB
+from cuml.test.dask.utils import load_text_corpus
+from sklearn.metrics import accuracy_score
 
 
 def test_basic_fit_predict(client):
@@ -74,3 +74,20 @@ def test_score(client):
     y_local = y.compute()
 
     assert(accuracy_score(y_hat_local.get(), y_local) == score)
+
+
+def test_model_multiple_chunks(client):
+    # tests naive_bayes with n_chunks being greater than one, related to issue
+    # https://github.com/rapidsai/cuml/issues/3150
+    X = cp.array([[0, 0, 0, 1], [1, 0, 0, 1], [1, 0, 0, 0]])
+
+    X = dask.array.from_array(X, chunks=((1, 1, 1), -1)).astype(cp.int32)
+    y = dask.array.from_array([1, 0, 0], asarray=False,
+                              fancy=False, chunks=(1)).astype(cp.int32)
+
+    model = MultinomialNB()
+    model.fit(X, y)
+
+    # this test is a code coverage test, it is too small to be a numeric test,
+    # but we call score here to exercise the whole model.
+    assert(0 <= model.score(X, y) <= 1)