diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index a0cf38c6f51..d9b54008e85 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -1095,17 +1095,22 @@ def as_categorical_column(self, dtype: Dtype) -> Self: raise ValueError("dtype must be CategoricalDtype") if not isinstance(self.categories, type(dtype.categories._column)): - # If both categories are of different Column types, - # return a column full of Nulls. - codes = cast( - cudf.core.column.numerical.NumericalColumn, - column.as_column( - _DEFAULT_CATEGORICAL_VALUE, - length=self.size, - dtype=self.codes.dtype, - ), - ) - codes = as_unsigned_codes(len(dtype.categories), codes) + if isinstance( + self.categories.dtype, cudf.StructDtype + ) and isinstance(dtype.categories.dtype, cudf.IntervalDtype): + codes = self.codes + else: + # Otherwise if both categories are of different Column types, + # return a column full of nulls. + codes = cast( + cudf.core.column.numerical.NumericalColumn, + column.as_column( + _DEFAULT_CATEGORICAL_VALUE, + length=self.size, + dtype=self.codes.dtype, + ), + ) + codes = as_unsigned_codes(len(dtype.categories), codes) return type(self)( data=self.data, # type: ignore[arg-type] size=self.size, diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index cc07af0f669..cb40c50651f 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2024,18 +2024,26 @@ def as_column( if isinstance(arbitrary.dtype, pd.DatetimeTZDtype): new_tz = get_compatible_timezone(arbitrary.dtype) arbitrary = arbitrary.astype(new_tz) - if isinstance(arbitrary.dtype, pd.CategoricalDtype) and isinstance( - arbitrary.dtype.categories.dtype, pd.DatetimeTZDtype - ): - new_tz = get_compatible_timezone( - arbitrary.dtype.categories.dtype - ) - new_cats = arbitrary.dtype.categories.astype(new_tz) - new_dtype = pd.CategoricalDtype( - categories=new_cats, ordered=arbitrary.dtype.ordered - ) - arbitrary = arbitrary.astype(new_dtype) - + if isinstance(arbitrary.dtype, pd.CategoricalDtype): + if isinstance( + arbitrary.dtype.categories.dtype, pd.DatetimeTZDtype + ): + new_tz = get_compatible_timezone( + arbitrary.dtype.categories.dtype + ) + new_cats = arbitrary.dtype.categories.astype(new_tz) + new_dtype = pd.CategoricalDtype( + categories=new_cats, ordered=arbitrary.dtype.ordered + ) + arbitrary = arbitrary.astype(new_dtype) + elif ( + isinstance( + arbitrary.dtype.categories.dtype, pd.IntervalDtype + ) + and dtype is None + ): + # Conversion to arrow converts IntervalDtype to StructDtype + dtype = cudf.CategoricalDtype.from_pandas(arbitrary.dtype) return as_column( pa.array(arbitrary, from_pandas=True), nan_as_null=nan_as_null, diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py index db24fdd2a29..8e1dba858c3 100644 --- a/python/cudf/cudf/tests/test_categorical.py +++ b/python/cudf/cudf/tests/test_categorical.py @@ -950,3 +950,13 @@ def test_index_set_categories(ordered): expected = pd_ci.set_categories([1, 2, 3, 4], ordered=ordered) result = cudf_ci.set_categories([1, 2, 3, 4], ordered=ordered) assert_eq(result, expected) + + +def test_categorical_interval_pandas_roundtrip(): + expected = cudf.Series(cudf.interval_range(0, 5)).astype("category") + result = cudf.Series.from_pandas(expected.to_pandas()) + assert_eq(result, expected) + + expected = pd.Series(pd.interval_range(0, 5)).astype("category") + result = cudf.Series.from_pandas(expected).to_pandas() + assert_eq(result, expected)