Skip to content

Commit

Permalink
Ensure make_classification respects output type (#3415)
Browse files Browse the repository at this point in the history
Switch to api_return_generic decorator in order to get correct output type from make_classification
Provide tests of global_output_type compliance for all dataset generators

Authors:
  - William Hicks (@wphicks)

Approvers:
  - John Zedlewski (@JohnZed)
  - Corey J. Nolet (@cjnolet)

URL: #3415
  • Loading branch information
wphicks authored Feb 2, 2021
1 parent fa2371a commit a3c62b1
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 32 deletions.
4 changes: 3 additions & 1 deletion python/cuml/datasets/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def _generate_hypercube(samples, dimensions, rng):
return out


@cuml.internals.api_return_any()
@cuml.internals.api_return_generic()
def make_classification(n_samples=100, n_features=20, n_informative=2,
n_redundant=2, n_repeated=0, n_classes=2,
n_clusters_per_class=2, weights=None, flip_y=0.01,
Expand Down Expand Up @@ -205,6 +205,8 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
selection benchmark", 2003.
"""
cuml.internals.set_api_output_type("cupy")

generator = _create_rs_generator(random_state)
np_seed = int(generator.randint(n_samples, size=1))
np.random.seed(np_seed)
Expand Down
69 changes: 69 additions & 0 deletions python/cuml/test/test_dataset_generator_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import cudf
import cupy as cp
import numba
import numpy as np
import pytest

import cuml
from cuml.datasets import (
make_arima,
make_blobs,
make_classification,
make_regression
)


TEST_OUTPUT_TYPES = (
(None, (cp.ndarray, cp.ndarray)), # Default is cupy if None is used
('numpy', (np.ndarray, np.ndarray)),
('cupy', (cp.ndarray, cp.ndarray)),
('numba', (numba.cuda.devicearray.DeviceNDArrayBase,
numba.cuda.devicearray.DeviceNDArrayBase)),
('cudf', (cudf.DataFrame, cudf.Series))
)

GENERATORS = (
make_blobs, make_classification, make_regression
)


@pytest.mark.parametrize('generator', GENERATORS)
@pytest.mark.parametrize(
'output_str,output_types', TEST_OUTPUT_TYPES
)
def test_xy_output_type(generator, output_str, output_types):

# Set the output type and ensure data of that type is generated
with cuml.using_output_type(output_str):
data = generator(n_samples=10, random_state=0)

for data, type_ in zip(data, output_types):
assert isinstance(data, type_)


@pytest.mark.parametrize(
'output_str,output_types', TEST_OUTPUT_TYPES
)
def test_time_series_label_output_type(output_str, output_types):

# Set the output type and ensure data of that type is generated
with cuml.using_output_type(output_str):
data = make_arima(n_obs=10, random_state=0)[0]

assert isinstance(data, output_types[1])
32 changes: 1 addition & 31 deletions python/cuml/test/test_make_blobs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,9 +16,6 @@
import cuml
import pytest
import cupy as cp
import cudf
import numpy as np
import numba.cuda

# Testing parameters for scalar parameter tests

Expand Down Expand Up @@ -96,30 +93,3 @@ def test_make_blobs_scalar_parameters(dtype, n_samples, n_features, centers,
elif centers <= n_samples:
assert cp.unique(labels).shape == (centers,), \
"unexpected number of clusters"


test_output_types = {
None: cp.ndarray, # Default is cupy if None is used
'numpy': np.ndarray,
'cupy': cp.ndarray,
'numba': numba.cuda.devicearray.DeviceNDArrayBase,
'cudf': (cudf.DataFrame, cudf.Series)
}


@pytest.mark.parametrize("input_type", test_output_types.keys())
def test_output_type(input_type: str):

# Set the output type and ensure its respected by the function
with cuml.using_output_type(input_type):
X, y = cuml.make_blobs(n_samples=10,
centers=3,
n_features=2,
random_state=0)

if (isinstance(test_output_types[input_type], tuple)):
assert (isinstance(X, test_output_types[input_type][0]))
assert (isinstance(y, test_output_types[input_type][1]))
else:
assert (isinstance(X, test_output_types[input_type]))
assert (isinstance(y, test_output_types[input_type]))

0 comments on commit a3c62b1

Please sign in to comment.