Skip to content

Commit

Permalink
[REVIEW] Correcting labels meta dtype for `cuml.dask.make_classificat…
Browse files Browse the repository at this point in the history
…ion` (#2940)

* correct meta

* adding a test

* changelog

* using np.int64 explicitly
  • Loading branch information
divyegala authored Oct 9, 2020
1 parent d6ff833 commit bcc5bc3
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
- PR #2932: Marking KBinsDiscretizer pytests as xfail
- PR #2925: Fixing Owner Bug When Slicing CumlArray Objects
- PR #2931: Fix notebook error handling in gpuCI
- PR #2940: Correcting labels meta dtype for `cuml.dask.make_classification`


# cuML 0.15.0 (Date TBD)
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/dask/datasets/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
for idx, f in enumerate(parts)]

X_dela = _create_delayed(X_parts, dtype, worker_rows, n_features)
y_dela = _create_delayed(y_parts, dtype, worker_rows)
y_dela = _create_delayed(y_parts, np.int64, worker_rows)

X = da.concatenate(X_dela)
y = da.concatenate(y_dela)
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/datasets/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
# Initialize X and y
X = generator.randn(n_samples * n_features, dtype=dtype)
X = X.reshape((n_samples, n_features), order=order)
y = cp.zeros(n_samples, dtype=np.int)
y = cp.zeros(n_samples, dtype=np.int64)

# Build the polytope whose vertices become cluster centroids
if _centroids is None:
Expand Down
9 changes: 7 additions & 2 deletions python/cuml/test/dask/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,23 +180,28 @@ def test_make_regression(n_samples, n_features, n_informative,
@pytest.mark.parametrize('random_state', [None, 1234])
@pytest.mark.parametrize('n_parts', [2, 23])
@pytest.mark.parametrize('order', ['C', 'F'])
@pytest.mark.parametrize('dtype', ['float32', 'float64'])
def test_make_classification(n_samples, n_features, hypercube, n_classes,
n_clusters_per_class, n_informative,
random_state, n_parts, order, client):
random_state, n_parts, order, dtype,
client):
from cuml.dask.datasets.classification import make_classification

X, y = make_classification(n_samples=n_samples, n_features=n_features,
n_classes=n_classes, hypercube=hypercube,
n_clusters_per_class=n_clusters_per_class,
n_informative=n_informative,
random_state=random_state, n_parts=n_parts,
order=order)
order=order, dtype=dtype)
assert(len(X.chunks[0])) == n_parts
assert(len(X.chunks[1])) == 1

assert X.shape == (n_samples, n_features)
assert y.shape == (n_samples, )

assert X.dtype == dtype
assert y.dtype == np.int64

assert len(X.chunks[0]) == n_parts
assert len(y.chunks[0]) == n_parts

Expand Down

0 comments on commit bcc5bc3

Please sign in to comment.