From 08c64a554665e5485fb8af933de6b3ffc28da25b Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 14 Mar 2022 13:57:27 -0700 Subject: [PATCH 1/2] fix bool dtype issue --- python/cudf/cudf/core/tools/numeric.py | 24 +++++++++++++----------- python/cudf/cudf/tests/test_numerical.py | 18 +++++++++++++++++- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py index bd1b505c57f..2b20677a86e 100644 --- a/python/cudf/cudf/core/tools/numeric.py +++ b/python/cudf/cudf/core/tools/numeric.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. import warnings @@ -20,6 +20,17 @@ from cudf.core.column import as_column from cudf.utils.dtypes import can_convert_to_column +DOWNCAST_TYPE_MAP = { + "integer": list(np.typecodes["Integer"]), + "signed": list(np.typecodes["Integer"]), + "unsigned": list(np.typecodes["UnsignedInteger"]), +} +float_types = list(np.typecodes["Float"]) +# we only support float32 & float64 +min_idx = float_types.index(cudf.dtype(np.float32).char) +max_idx = float_types.index(cudf.dtype(np.float64).char) + 1 +DOWNCAST_TYPE_MAP["float"] = float_types[min_idx:max_idx] + def to_numeric(arg, errors="raise", downcast=None): """ @@ -144,16 +155,7 @@ def to_numeric(arg, errors="raise", downcast=None): col = col.as_numerical_column("d") if downcast: - downcast_type_map = { - "integer": list(np.typecodes["Integer"]), - "signed": list(np.typecodes["Integer"]), - "unsigned": list(np.typecodes["UnsignedInteger"]), - } - float_types = list(np.typecodes["Float"]) - idx = float_types.index(cudf.dtype(np.float32).char) - downcast_type_map["float"] = float_types[idx:] - - type_set = downcast_type_map[downcast] + type_set = DOWNCAST_TYPE_MAP[downcast] for t in type_set: downcast_dtype = cudf.dtype(t) diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py index cf329afa8ab..21b179caa38 100644 --- a/python/cudf/cudf/tests/test_numerical.py +++ b/python/cudf/cudf/tests/test_numerical.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. import numpy as np import pandas as pd @@ -397,3 +397,19 @@ def test_series_construction_with_nulls(dtype, input_obj): expect = pd.Series(np_data, dtype=np_dtypes_to_pandas_dtypes[dtype]) got = cudf.Series(np_data, dtype=dtype).to_pandas(nullable=True) assert_eq(expect, got) + + +@pytest.mark.parametrize( + "data", [[True, False, True]], +) +@pytest.mark.parametrize( + "downcast", ["signed", "integer", "unsigned", "float"] +) +def test_series_to_numeric_bool(data, downcast): + ps = pd.Series(data) + gs = cudf.from_pandas(ps) + + expect = pd.to_numeric(ps, downcast=downcast) + got = cudf.to_numeric(gs, downcast=downcast) + + assert_eq(expect, got) From c4d8503877a87a78c035a7fe8eabedd0e61766d1 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 15 Mar 2022 07:35:54 -0700 Subject: [PATCH 2/2] refactor --- python/cudf/cudf/core/tools/numeric.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py index 2b20677a86e..d589b68e7b2 100644 --- a/python/cudf/cudf/core/tools/numeric.py +++ b/python/cudf/cudf/core/tools/numeric.py @@ -20,17 +20,6 @@ from cudf.core.column import as_column from cudf.utils.dtypes import can_convert_to_column -DOWNCAST_TYPE_MAP = { - "integer": list(np.typecodes["Integer"]), - "signed": list(np.typecodes["Integer"]), - "unsigned": list(np.typecodes["UnsignedInteger"]), -} -float_types = list(np.typecodes["Float"]) -# we only support float32 & float64 -min_idx = float_types.index(cudf.dtype(np.float32).char) -max_idx = float_types.index(cudf.dtype(np.float64).char) + 1 -DOWNCAST_TYPE_MAP["float"] = float_types[min_idx:max_idx] - def to_numeric(arg, errors="raise", downcast=None): """ @@ -155,7 +144,16 @@ def to_numeric(arg, errors="raise", downcast=None): col = col.as_numerical_column("d") if downcast: - type_set = DOWNCAST_TYPE_MAP[downcast] + if downcast == "float": + # we support only float32 & float64 + type_set = [ + cudf.dtype(np.float32).char, + cudf.dtype(np.float64).char, + ] + elif downcast in ("integer", "signed"): + type_set = list(np.typecodes["Integer"]) + elif downcast == "unsigned": + type_set = list(np.typecodes["UnsignedInteger"]) for t in type_set: downcast_dtype = cudf.dtype(t)