Skip to content

Commit

Permalink
Enable testing cudf.pandas unit tests for all minor versions of pan…
Browse files Browse the repository at this point in the history
…das (#16595)

Fixes: #16537

This PR enables testing `cudf.pandas` unit tests with all minor versions of pandas-2

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: #16595
  • Loading branch information
galipremsagar authored Aug 23, 2024
1 parent 83f68c9 commit 91f304e
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 4 deletions.
24 changes: 24 additions & 0 deletions ci/cudf_pandas_scripts/fetch_pandas_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import requests
from packaging.version import Version
from packaging.specifiers import SpecifierSet
import argparse

def get_pandas_versions(pandas_range):
url = "https://pypi.org/pypi/pandas/json"
response = requests.get(url)
data = response.json()
versions = [Version(v) for v in data['releases']]
specifier = SpecifierSet(pandas_range.lstrip("pandas"))
matching_versions = [v for v in versions if v in specifier]
matching_minors = sorted(set(".".join((str(v.major), str(v.minor))) for v in matching_versions), key=Version)
return matching_minors

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Filter pandas versions by prefix.")
parser.add_argument("pandas_range", type=str, help="The version prefix to filter by.")
args = parser.parse_args()

versions = get_pandas_versions(args.pandas_range)
print(','.join(versions))
36 changes: 32 additions & 4 deletions ci/cudf_pandas_scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,20 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"

DEPENDENCIES_PATH="dependencies.yaml"
package_name="pandas"

# Use grep to find the line containing the package name and version constraint
pandas_version_constraint=$(grep -oP "pandas>=\d+\.\d+,\<\d+\.\d+\.\d+dev\d+" $DEPENDENCIES_PATH)

# Function to display script usage
function display_usage {
echo "Usage: $0 [--no-cudf]"
echo "Usage: $0 [--no-cudf] [pandas-version]"
}

# Default value for the --no-cudf option
no_cudf=false
PANDAS_VERSION=""

# Parse command-line arguments
while [[ $# -gt 0 ]]; do
Expand All @@ -25,9 +32,14 @@ while [[ $# -gt 0 ]]; do
shift
;;
*)
echo "Error: Unknown option $1"
display_usage
exit 1
if [[ -z "$PANDAS_VERSION" ]]; then
PANDAS_VERSION=$1
shift
else
echo "Error: Unknown option $1"
display_usage
exit 1
fi
;;
esac
done
Expand All @@ -53,3 +65,19 @@ python -m pytest -p cudf.pandas \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \
--cov-report=term \
./python/cudf/cudf_pandas_tests/

output=$(python ci/cudf_pandas_scripts/fetch_pandas_versions.py $pandas_version_constraint)

# Convert the comma-separated list into an array
IFS=',' read -r -a versions <<< "$output"

for version in "${versions[@]}"; do
echo "Installing pandas version: ${version}"
python -m pip install "pandas==${version}"
python -m pytest -p cudf.pandas \
--cov-config=./python/cudf/.coveragerc \
--cov=cudf \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \
--cov-report=term \
./python/cudf/cudf_pandas_tests/
done
18 changes: 18 additions & 0 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
get_calendar,
)

from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION

# Accelerated pandas has the real pandas and cudf modules as attributes
pd = xpd._fsproxy_slow
cudf = xpd._fsproxy_fast
Expand Down Expand Up @@ -607,6 +609,10 @@ def test_array_function_series_fallback(series):
tm.assert_equal(expect, got)


@pytest.mark.xfail(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="Fails in older versions of pandas",
)
def test_timedeltaproperties(series):
psr, sr = series
psr, sr = psr.astype("timedelta64[ns]"), sr.astype("timedelta64[ns]")
Expand Down Expand Up @@ -666,6 +672,10 @@ def test_maintain_container_subclasses(multiindex):
assert isinstance(got, xpd.core.indexes.frozen.FrozenList)


@pytest.mark.xfail(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="Fails in older versions of pandas due to unsupported boxcar window type",
)
def test_rolling_win_type():
pdf = pd.DataFrame(range(5))
df = xpd.DataFrame(range(5))
Expand Down Expand Up @@ -1281,6 +1291,10 @@ def max_times_two(self):
assert s.max_times_two() == 6


@pytest.mark.xfail(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="DatetimeArray.__floordiv__ missing in pandas-2.0.0",
)
def test_floordiv_array_vs_df():
xarray = xpd.Series([1, 2, 3], dtype="datetime64[ns]").array
parray = pd.Series([1, 2, 3], dtype="datetime64[ns]").array
Expand Down Expand Up @@ -1552,6 +1566,10 @@ def test_numpy_cupy_flatiter(series):
assert type(arr.flat._fsproxy_slow) == np.flatiter


@pytest.mark.xfail(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="pyarrow_numpy storage type was not supported in pandas-2.0.0",
)
def test_arrow_string_arrays():
cu_s = xpd.Series(["a", "b", "c"])
pd_s = pd.Series(["a", "b", "c"])
Expand Down
8 changes: 8 additions & 0 deletions python/cudf/cudf_pandas_tests/test_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import os
import subprocess

import pytest

from cudf.pandas import LOADED, Profiler

if not LOADED:
Expand All @@ -13,7 +15,13 @@
import numpy as np
import pandas as pd

from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION


@pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="function names change across versions of pandas, so making sure it only runs on latest version of pandas",
)
def test_profiler():
np.random.seed(42)
with Profiler() as profiler:
Expand Down

0 comments on commit 91f304e

Please sign in to comment.