rapidsai · rapids-bot · Feb 21, 2024 · Feb 15, 2024 · Feb 20, 2024 · Feb 20, 2024
@@ -1177,20 +1177,15 @@ def test_chunked_nested_json_reader(self, tag, data, chunk_size):
         df = cudf.concat(chunks, ignore_index=True)
         assert expected.to_arrow().equals(df.to_arrow())
 
-    def test_order_nested_json_reader(self, tag, data):
+    def test_order_nested_json_reader(self, request, tag, data):
         expected = pd.read_json(StringIO(data), lines=True)
         target = cudf.read_json(StringIO(data), lines=True)
-        if tag == "dtype_mismatch":
-            with pytest.raises(AssertionError):
-                # pandas parses integer values in float representation
-                # as integer
-                assert pa.Table.from_pandas(expected).equals(target.to_arrow())
-        elif tag == "missing":
-            with pytest.raises(AssertionError):
-                # pandas inferences integer with nulls as float64
-                assert pa.Table.from_pandas(expected).equals(target.to_arrow())
-        else:
-            assert pa.Table.from_pandas(expected).equals(target.to_arrow())
+        request.applymarker(
+            pytest.mark.xfail(
+                tag == "dtype_mismatch", reason="int vs float mismatch"
+            )
+        )
+        assert_eq(expected, target)
 
 
 def test_json_round_trip_gzip():

@@ -13,6 +13,7 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_220
 from cudf.io.orc import ORCWriter
 from cudf.testing import assert_frame_equal
 from cudf.testing._utils import (
@@ -130,16 +131,21 @@ def test_orc_reader_filepath_or_buffer(path_or_buf, src):
 
 def test_orc_reader_trailing_nulls(datadir):
     path = datadir / "TestOrcFile.nulls-at-end-snappy.orc"
+    expect = pd.read_orc(path)
+    got = cudf.read_orc(path)
+    if PANDAS_GE_220:
+        check_categorical = True
+    else:
+        check_categorical = False
+        expect = expect.fillna(0)
+        got = got.fillna(0)
 
-    expect = pd.read_orc(path).fillna(0)
-    got = cudf.read_orc(path).fillna(0)
-
-    # PANDAS uses NaN to represent invalid data, which forces float dtype
-    # For comparison, we can replace NaN with 0 and cast to the cuDF dtype
-    for col in expect.columns:
-        expect[col] = expect[col].astype(got[col].dtype)
+        # PANDAS uses NaN to represent invalid data, which forces float dtype
+        # For comparison, we can replace NaN with 0 and cast to the cuDF dtype
+        for col in expect.columns:
+            expect[col] = expect[col].astype(got[col].dtype)
 
-    assert_eq(expect, got, check_categorical=False)
+    assert_eq(expect, got, check_categorical=check_categorical)
 
 
 @pytest.mark.parametrize("use_index", [False, True])