diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 296fd6a41b0..8d2c49e74c1 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2036,6 +2036,8 @@ def as_column( new_dtype = "str" col = col.astype(new_dtype) + elif dtype is not None: + col = col.astype(dtype) return col @@ -2112,6 +2114,15 @@ def as_column( arbitrary, nan_as_null=nan_as_null, dtype=dtype, length=length ) elif arbitrary.dtype.kind == "O": + if len(arbitrary) == 0: + # TODO: Can remove once empty constructor default becomes + # object instead of float. + return as_column( + pa.array([], type=pa.string()), + nan_as_null=nan_as_null, + dtype=dtype, + length=length, + ) if isinstance(arbitrary, pd.arrays.PandasArray): # infer_dtype does not handle PandasArray arbitrary = np.array(arbitrary, dtype=object) @@ -2140,15 +2151,9 @@ def as_column( arbitrary, from_pandas=True, ) - if isinstance(pyarrow_array.type, pa.Decimal128Type): - pyarrow_type = cudf.Decimal128Dtype.from_arrow( - pyarrow_array.type - ) - else: - pyarrow_type = arbitrary.dtype data = as_column( pyarrow_array, - dtype=pyarrow_type, + dtype=dtype, nan_as_null=nan_as_null, length=length, ) @@ -2280,7 +2285,7 @@ def as_column( if dtype is not None: data = data.astype(dtype) elif arb_dtype.kind in ("O", "U"): - data = as_column(pa.array(arbitrary), dtype=arbitrary.dtype) + data = as_column(pa.array(arbitrary), dtype=dtype) # There is no cast operation available for pa.Array from int to # str, Hence instead of handling in pa.Array block, we # will have to type-cast here. diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 39da34fa89c..b8233e38f1b 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -2640,6 +2640,26 @@ def test_astype_pandas_nullable_pandas_compat(dtype, klass, kind): ser.astype(kind(dtype)) +@pytest.mark.parametrize("klass", [cudf.Series, cudf.Index]) +@pytest.mark.parametrize( + "data", + [ + pa.array([1, None], type=pa.int64()), + pa.chunked_array([[1, None]], type=pa.int64()), + ], +) +def test_from_arrow_array_dtype(klass, data): + obj = klass(data, dtype="int8") + assert obj.dtype == np.dtype("int8") + + +@pytest.mark.parametrize("klass", [cudf.Series, cudf.Index]) +def test_from_pandas_object_dtype_passed_dtype(klass): + result = klass(pd.Series([True, False], dtype=object), dtype="int8") + expected = klass(pa.array([1, 0], type=pa.int8())) + assert_eq(result, expected) + + def test_series_where_mixed_bool_dtype(): s = cudf.Series([True, False, True]) with pytest.raises(TypeError):