From deb39db24daaa91de9843c1a804404063fbe591d Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 15 Mar 2022 12:21:29 -0500 Subject: [PATCH] Fix error in `cudf.to_numeric` when a `bool` input is passed (#10431) Fixes: #10049 This PR fixes an incorrect error being displayed when `cudf.to_numeric` is called by passing an input with `bool` dtype. Technically `bool` is the lowest `dtype` size possible in `numpy` so this is a no-op because `to_numeric` is aimed at downcasting inputs. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/10431 --- python/cudf/cudf/core/tools/numeric.py | 22 +++++++++++----------- python/cudf/cudf/tests/test_numerical.py | 18 +++++++++++++++++- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py index bd1b505c57f..d589b68e7b2 100644 --- a/python/cudf/cudf/core/tools/numeric.py +++ b/python/cudf/cudf/core/tools/numeric.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. import warnings @@ -144,16 +144,16 @@ def to_numeric(arg, errors="raise", downcast=None): col = col.as_numerical_column("d") if downcast: - downcast_type_map = { - "integer": list(np.typecodes["Integer"]), - "signed": list(np.typecodes["Integer"]), - "unsigned": list(np.typecodes["UnsignedInteger"]), - } - float_types = list(np.typecodes["Float"]) - idx = float_types.index(cudf.dtype(np.float32).char) - downcast_type_map["float"] = float_types[idx:] - - type_set = downcast_type_map[downcast] + if downcast == "float": + # we support only float32 & float64 + type_set = [ + cudf.dtype(np.float32).char, + cudf.dtype(np.float64).char, + ] + elif downcast in ("integer", "signed"): + type_set = list(np.typecodes["Integer"]) + elif downcast == "unsigned": + type_set = list(np.typecodes["UnsignedInteger"]) for t in type_set: downcast_dtype = cudf.dtype(t) diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py index cf329afa8ab..21b179caa38 100644 --- a/python/cudf/cudf/tests/test_numerical.py +++ b/python/cudf/cudf/tests/test_numerical.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. import numpy as np import pandas as pd @@ -397,3 +397,19 @@ def test_series_construction_with_nulls(dtype, input_obj): expect = pd.Series(np_data, dtype=np_dtypes_to_pandas_dtypes[dtype]) got = cudf.Series(np_data, dtype=dtype).to_pandas(nullable=True) assert_eq(expect, got) + + +@pytest.mark.parametrize( + "data", [[True, False, True]], +) +@pytest.mark.parametrize( + "downcast", ["signed", "integer", "unsigned", "float"] +) +def test_series_to_numeric_bool(data, downcast): + ps = pd.Series(data) + gs = cudf.from_pandas(ps) + + expect = pd.to_numeric(ps, downcast=downcast) + got = cudf.to_numeric(gs, downcast=downcast) + + assert_eq(expect, got)