diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py index ec980adc334..12ea74bd7a7 100644 --- a/python/cudf/cudf/tests/test_json.py +++ b/python/cudf/cudf/tests/test_json.py @@ -13,7 +13,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210 +from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210, PANDAS_GE_220 from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, @@ -1179,7 +1179,13 @@ def test_chunked_nested_json_reader(self, tag, data, chunk_size): def test_order_nested_json_reader(self, tag, data): expected = pd.read_json(StringIO(data), lines=True) + if PANDAS_GE_220: + # TODO: Remove after https://github.com/pandas-dev/pandas/issues/57429 + # is fixed + expected = expected.reset_index(drop=True) target = cudf.read_json(StringIO(data), lines=True) + # Using pyarrow instead of assert_eq because pandas + # doesn't handle nested values comparisons correctly if tag == "dtype_mismatch": with pytest.raises(AssertionError): # pandas parses integer values in float representation diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index cf2fd29d41e..80fc815dd76 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -13,6 +13,7 @@ import pytest import cudf +from cudf.core._compat import PANDAS_GE_220 from cudf.io.orc import ORCWriter from cudf.testing import assert_frame_equal from cudf.testing._utils import ( @@ -130,16 +131,21 @@ def test_orc_reader_filepath_or_buffer(path_or_buf, src): def test_orc_reader_trailing_nulls(datadir): path = datadir / "TestOrcFile.nulls-at-end-snappy.orc" + expect = pd.read_orc(path) + got = cudf.read_orc(path) + if PANDAS_GE_220: + check_categorical = True + else: + check_categorical = False + expect = expect.fillna(0) + got = got.fillna(0) - expect = pd.read_orc(path).fillna(0) - got = cudf.read_orc(path).fillna(0) - - # PANDAS uses NaN to represent invalid data, which forces float dtype - # For comparison, we can replace NaN with 0 and cast to the cuDF dtype - for col in expect.columns: - expect[col] = expect[col].astype(got[col].dtype) + # PANDAS uses NaN to represent invalid data, which forces float dtype + # For comparison, we can replace NaN with 0 and cast to the cuDF dtype + for col in expect.columns: + expect[col] = expect[col].astype(got[col].dtype) - assert_eq(expect, got, check_categorical=False) + assert_eq(expect, got, check_categorical=check_categorical) @pytest.mark.parametrize("use_index", [False, True])