rapidsai · rapids-bot · Feb 21, 2024 · Feb 15, 2024 · Feb 20, 2024 · Feb 20, 2024
@@ -13,7 +13,7 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210
+from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210, PANDAS_GE_220
 from cudf.testing._utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
@@ -1179,7 +1179,13 @@ def test_chunked_nested_json_reader(self, tag, data, chunk_size):
 
     def test_order_nested_json_reader(self, tag, data):
         expected = pd.read_json(StringIO(data), lines=True)
+        if PANDAS_GE_220:
+            # TODO: Remove after https://github.com/pandas-dev/pandas/issues/57429
+            # is fixed
+            expected = expected.reset_index(drop=True)
         target = cudf.read_json(StringIO(data), lines=True)
+        # Using pyarrow instead of assert_eq because pandas
+        # doesn't handle nested values comparisons correctly
         if tag == "dtype_mismatch":
             with pytest.raises(AssertionError):
                 # pandas parses integer values in float representation

@@ -13,6 +13,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_220
 from cudf.io.orc import ORCWriter
 from cudf.testing import assert_frame_equal
 from cudf.testing._utils import (
@@ -130,16 +131,21 @@ def test_orc_reader_filepath_or_buffer(path_or_buf, src):
 
 def test_orc_reader_trailing_nulls(datadir):
     path = datadir / "TestOrcFile.nulls-at-end-snappy.orc"
+    expect = pd.read_orc(path)
+    got = cudf.read_orc(path)
+    if PANDAS_GE_220:
+        check_categorical = True
+    else:
+        check_categorical = False
+        expect = expect.fillna(0)
+        got = got.fillna(0)
 
-    expect = pd.read_orc(path).fillna(0)
-    got = cudf.read_orc(path).fillna(0)
-
-    # PANDAS uses NaN to represent invalid data, which forces float dtype
-    # For comparison, we can replace NaN with 0 and cast to the cuDF dtype
-    for col in expect.columns:
-        expect[col] = expect[col].astype(got[col].dtype)
+        # PANDAS uses NaN to represent invalid data, which forces float dtype
+        # For comparison, we can replace NaN with 0 and cast to the cuDF dtype
+        for col in expect.columns:
+            expect[col] = expect[col].astype(got[col].dtype)
 
-    assert_eq(expect, got, check_categorical=False)
+    assert_eq(expect, got, check_categorical=check_categorical)
 
 
 @pytest.mark.parametrize("use_index", [False, True])