Skip to content

Commit

Permalink
[REVIEW] Fix access to attributes of individual NB objects in dask NB (
Browse files Browse the repository at this point in the history
…#3152)

* FIX Access to attributes of individual NB objects in dask NB

* DOC Added entry to changelog

* ENH Add pytest

* FIX PEP8 fixes

Co-authored-by: John Zedlewski <[email protected]>
  • Loading branch information
dantegd and JohnZed authored Nov 20, 2020
1 parent 2877fb9 commit 1078746
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
- PR #3130: Return Python string from `dump_as_json()` of RF
- PR #3132: Add `min_samples_split` + Rename `min_rows_per_node` -> `min_samples_leaf`
- PR #3136: Fix stochastic gradient descent example
- PR #3152: Fix access to attributes of individual NB objects in dask NB
- PR #3156: Force local conda artifact install
- PR #3162: Removing accidentally checked in debug file

Expand Down
4 changes: 2 additions & 2 deletions python/cuml/dask/naive_bayes/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ def _merge_counts_to_model(models):
modela = first(models)

for model in models[1:]:
modela._feature_count_ += model._feature_count_
modela._class_count_ += model._class_count_
modela.feature_count_ += model.feature_count_
modela.class_count_ += model.class_count_
return modela

@staticmethod
Expand Down
25 changes: 21 additions & 4 deletions python/cuml/test/dask/test_naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
# limitations under the License.
#


from cuml.test.dask.utils import load_text_corpus

from sklearn.metrics import accuracy_score
import cupy as cp
import dask.array

from cuml.dask.naive_bayes import MultinomialNB
from cuml.naive_bayes.naive_bayes import MultinomialNB as SGNB
from cuml.test.dask.utils import load_text_corpus
from sklearn.metrics import accuracy_score


def test_basic_fit_predict(client):
Expand Down Expand Up @@ -74,3 +74,20 @@ def test_score(client):
y_local = y.compute()

assert(accuracy_score(y_hat_local.get(), y_local) == score)


def test_model_multiple_chunks(client):
# tests naive_bayes with n_chunks being greater than one, related to issue
# https://github.com/rapidsai/cuml/issues/3150
X = cp.array([[0, 0, 0, 1], [1, 0, 0, 1], [1, 0, 0, 0]])

X = dask.array.from_array(X, chunks=((1, 1, 1), -1)).astype(cp.int32)
y = dask.array.from_array([1, 0, 0], asarray=False,
fancy=False, chunks=(1)).astype(cp.int32)

model = MultinomialNB()
model.fit(X, y)

# this test is a code coverage test, it is too small to be a numeric test,
# but we call score here to exercise the whole model.
assert(0 <= model.score(X, y) <= 1)

0 comments on commit 1078746

Please sign in to comment.