Skip to content

Commit

Permalink
First pass of pd.read_orc changes in tests (#12103)
Browse files Browse the repository at this point in the history
This PR changes calls going via `pyarrow` and then `to_pandas` to directly call `pd.read_orc`. How-ever since `pd.read_orc` was added in pandas 1.0, we will need to version the call to this constructor. This PR does that.

Partially contributes to #11540

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Lawrence Mitchell (https://github.com/wence-)

URL: #12103
  • Loading branch information
galipremsagar authored Nov 10, 2022
1 parent 4497ed6 commit 8ca2bd9
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 141 deletions.
1 change: 0 additions & 1 deletion python/cudf/cudf/core/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from packaging import version

PANDAS_VERSION = version.parse(pd.__version__)
PANDAS_GE_100 = PANDAS_VERSION >= version.parse("1.0")
PANDAS_GE_110 = PANDAS_VERSION >= version.parse("1.1")
PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2")
PANDAS_LE_122 = PANDAS_VERSION <= version.parse("1.2.2")
Expand Down
4 changes: 1 addition & 3 deletions python/cudf/cudf/tests/test_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.orc
import pytest

import cudf
Expand Down Expand Up @@ -71,5 +69,5 @@ def mock_open(*args, **kwargs):
monkeypatch.setattr(gcsfs.core.GCSFileSystem, "open", mock_open)
gdf.to_orc(f"gcs://{gcs_fname}")

got = pa.orc.ORCFile(local_filepath).read().to_pandas()
got = pd.read_orc(local_filepath)
assert_eq(pdf, got)
5 changes: 2 additions & 3 deletions python/cudf/cudf/tests/test_hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import pandas as pd
import pyarrow as pa
import pytest
from pyarrow import orc

import cudf
from cudf.testing._utils import assert_eq
Expand Down Expand Up @@ -212,7 +211,7 @@ def test_read_orc(datadir, hdfs, test_url):
hd_fpath = f"hdfs://{basedir}/file.orc"

got = cudf.read_orc(hd_fpath)
expect = orc.ORCFile(buffer).read().to_pandas()
expect = pd.read_orc(buffer)
assert_eq(expect, got)


Expand All @@ -232,7 +231,7 @@ def test_write_orc(pdf, hdfs, test_url):

assert hdfs.exists(f"{basedir}/test_orc_writer.orc")
with hdfs.open(f"{basedir}/test_orc_writer.orc", mode="rb") as f:
got = orc.ORCFile(f).read().to_pandas()
got = pd.read_orc(f)

assert_eq(pdf, got)

Expand Down
Loading

0 comments on commit 8ca2bd9

Please sign in to comment.