rapidsai · rapids-bot · Mar 11, 2022 · Mar 11, 2022 · Mar 11, 2022 · Mar 11, 2022
@@ -415,7 +415,7 @@ def read_parquet(
     # (There is a good chance this was not the intention)
     if engine != "cudf":
         warnings.warn(
-            "Using CPU via PyArrow to read Parquet dataset."
+            "Using CPU via PyArrow to read Parquet dataset. "
             "This option is both inefficient and unstable!"
         )
         if filters is not None:

@@ -891,7 +891,7 @@ def test_parquet_reader_list_table(tmpdir):
     expect.to_parquet(fname)
     assert os.path.exists(fname)
     got = cudf.read_parquet(fname)
-    assert_eq(expect, got, check_dtype=False)
+    assert pa.Table.from_pandas(expect).equals(got.to_arrow())
 
 
 def int_gen(first_val, i):
@@ -1051,7 +1051,7 @@ def test_parquet_reader_list_large_mixed(tmpdir):
     expect.to_parquet(fname)
     assert os.path.exists(fname)
     got = cudf.read_parquet(fname)
-    assert_eq(expect, got, check_dtype=False)
+    assert pa.Table.from_pandas(expect).equals(got.to_arrow())
 
 
 def test_parquet_reader_list_large_multi_rowgroup(tmpdir):
@@ -1121,7 +1121,10 @@ def test_parquet_reader_list_skiprows(skip, tmpdir):
 
     expect = src.iloc[skip:]
     got = cudf.read_parquet(fname, skiprows=skip)
-    assert_eq(expect, got, check_dtype=False)
+    if expect.empty:
+        assert_eq(expect, got)
+    else:
+        assert pa.Table.from_pandas(expect).equals(got.to_arrow())
 
 
 @pytest.mark.parametrize("skip", [0, 1, 5, 10])
@@ -1145,7 +1148,7 @@ def test_parquet_reader_list_num_rows(skip, tmpdir):
     rows_to_read = min(3, (num_rows - skip) - 5)
     expect = src.iloc[skip:].head(rows_to_read)
     got = cudf.read_parquet(fname, skiprows=skip, num_rows=rows_to_read)
-    assert_eq(expect, got, check_dtype=False)
+    assert pa.Table.from_pandas(expect).equals(got.to_arrow())
 
 
 def struct_gen(gen, skip_rows, num_rows, include_validity=False):
@@ -2005,7 +2008,15 @@ def test_parquet_nullable_boolean(tmpdir, engine):
     expected_gdf = cudf.DataFrame({"a": [True, False, None, True, False]})
 
     pdf.to_parquet(pandas_path)
-    actual_gdf = cudf.read_parquet(pandas_path, engine=engine)
+    if engine == "pyarrow":
+        with pytest.warns(
+            UserWarning,
+            match="Using CPU via PyArrow to read Parquet dataset. This option "
+            "is both inefficient and unstable!",
+        ):
+            actual_gdf = cudf.read_parquet(pandas_path, engine=engine)
+    else:
+        actual_gdf = cudf.read_parquet(pandas_path, engine=engine)
 
     assert_eq(actual_gdf, expected_gdf)
 
@@ -2079,7 +2090,15 @@ def test_parquet_allnull_str(tmpdir, engine):
     )
 
     pdf.to_parquet(pandas_path)
-    actual_gdf = cudf.read_parquet(pandas_path, engine=engine)
+    if engine == "pyarrow":
+        with pytest.warns(
+            UserWarning,
+            match="Using CPU via PyArrow to read Parquet dataset. This option "
+            "is both inefficient and unstable!",
+        ):
+            actual_gdf = cudf.read_parquet(pandas_path, engine=engine)
+    else:
+        actual_gdf = cudf.read_parquet(pandas_path, engine=engine)
 
     assert_eq(actual_gdf, expected_gdf)