Skip to content

Commit

Permalink
Merge pull request #2209 from canonizer/bug-fil-bench
Browse files Browse the repository at this point in the history
[REVIEW] FIL benchmark now works again with gpuarray-c input type
  • Loading branch information
JohnZed authored Jul 13, 2020
2 parents 2168761 + f71b1c0 commit 2b7f38a
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
- PR #2494: Set QN regularization strength consistent with scikit-learn
- PR #2486: Fix cupy input to kmeans init
- PR #2497: Changes to accomodate cuDF unsigned categorical changes
- PR #2209: Fix FIL benchmark for gpuarray-c input
- PR #2507: Import `treelite.sklearn`
- PR #2521: Fixing invalid smem calculation in KNeighborsCLassifier
- PR #2515: Increase tolerance for LogisticRegression test
Expand Down
20 changes: 8 additions & 12 deletions python/cuml/benchmark/bench_helper_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,15 +174,11 @@ def _build_treelite_classifier(m, data, args, tmpdir):

def _treelite_fil_accuracy_score(y_true, y_pred):
"""Function to get correct accuracy for FIL (returns class index)"""
y_pred_binary = input_utils.convert_dtype(y_pred > 0.5, np.int32)
if isinstance(y_true, np.ndarray):
return cuml.metrics.accuracy_score(y_true, y_pred_binary)
elif cuda.devicearray.is_cuda_ndarray(y_true):
y_true_np = y_true.copy_to_host()
return cuml.metrics.accuracy_score(y_true_np, y_pred_binary)
elif isinstance(y_true, cudf.Series):
return cuml.metrics.accuracy_score(y_true, y_pred_binary)
elif isinstance(y_true, pd.Series):
return cuml.metrics.accuracy_score(y_true, y_pred_binary)
else:
raise TypeError("Received unsupported input type")
# convert the input if necessary
y_pred1 = (y_pred.copy_to_host() if
cuda.devicearray.is_cuda_ndarray(y_pred) else y_pred)
y_true1 = (y_true.copy_to_host() if
cuda.devicearray.is_cuda_ndarray(y_true) else y_true)

y_pred_binary = input_utils.convert_dtype(y_pred1 > 0.5, np.int32)
return cuml.metrics.accuracy_score(y_true1, y_pred_binary)
16 changes: 16 additions & 0 deletions python/cuml/test/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,22 @@ def test_real_algos_runner(algo_name):
assert results["cuml_acc"] is not None


# Test FIL with several input types
@pytest.mark.parametrize('input_type', ['numpy', 'cudf', 'gpuarray',
'gpuarray-c'])
def test_fil_input_types(input_type):
pair = algorithms.algorithm_by_name('FIL')

if not has_xgboost():
pytest.xfail()

runner = AccuracyComparisonRunner(
[20], [5], dataset_name='classification', test_fraction=0.5,
input_type=input_type)
results = runner.run(pair, run_cpu=False)[0]
assert results["cuml_acc"] is not None


@pytest.mark.parametrize('input_type', ['numpy', 'cudf', 'pandas', 'gpuarray'])
def test_training_data_to_numpy(input_type):
X, y, *_ = datagen.gen_data(
Expand Down

0 comments on commit 2b7f38a

Please sign in to comment.