Skip to content

Commit

Permalink
Update minimum pandas and numpy pinnings (#12887)
Browse files Browse the repository at this point in the history
This PR:

- [x] Increments the minimum pinning for `pandas` version from `1.0` to `1.3`.
- [x] Sets a minimum pinning for `numpy` as `>=1.21`
- [x] Fixes arm conda environment creation by removing `pandoc` version constraint.

Resolves #12785.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Ray Douglass (https://github.com/raydouglass)
  - Lawrence Mitchell (https://github.com/wence-)

URL: #12887
  • Loading branch information
galipremsagar authored Mar 10, 2023
1 parent e591f68 commit e4557cb
Show file tree
Hide file tree
Showing 28 changed files with 131 additions and 399 deletions.
6 changes: 3 additions & 3 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ dependencies:
- ninja
- notebook
- numba>=0.56.2
- numpy
- numpy>=1.21
- numpydoc
- nvcc_linux-64=11.8
- nvtx>=0.2.1
- packaging
- pandas>=1.0,<1.6.0dev0
- pandoc<=2.0.0
- pandas>=1.3,<1.6.0dev0
- pandoc
- pip
- pre-commit
- protobuf>=4.21.6,<4.22
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ requirements:
- protobuf >=4.21.6,<4.22
- python
- typing_extensions
- pandas >=1.0,<1.6.0dev0
- pandas >=1.3,<1.6.0dev0
- cupy >=9.5.0,<12.0.0a0
- numba >=0.56.2
- numpy
- numpy >=1.21
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
- libcudf {{ version }}
- fastavro >=0.22.0
Expand Down
6 changes: 3 additions & 3 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ dependencies:
- myst-nb
- nbsphinx
- numpydoc
- pandoc<=2.0.0 # We should check and fix all "<=" pinnings
- pandoc
- pydata-sphinx-theme
- sphinx
- sphinx-autobuild
Expand Down Expand Up @@ -254,10 +254,10 @@ dependencies:
- distributed>=2023.1.1
- fsspec>=0.6.0
- numba>=0.56.2
- numpy
- numpy>=1.21
- nvtx>=0.2.1
- packaging
- pandas>=1.0,<1.6.0dev0
- pandas>=1.3,<1.6.0dev0
- python-confluent-kafka=1.7.0
- streamz
- typing_extensions
Expand Down
4 changes: 0 additions & 4 deletions python/cudf/cudf/core/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@
from packaging import version

PANDAS_VERSION = version.parse(pd.__version__)
PANDAS_GE_110 = PANDAS_VERSION >= version.parse("1.1")
PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2")
PANDAS_LE_122 = PANDAS_VERSION <= version.parse("1.2.2")
PANDAS_GE_130 = PANDAS_VERSION >= version.parse("1.3.0")
PANDAS_GE_133 = PANDAS_VERSION >= version.parse("1.3.3")
PANDAS_GE_134 = PANDAS_VERSION >= version.parse("1.3.4")
PANDAS_LT_140 = PANDAS_VERSION < version.parse("1.4.0")
Expand Down
6 changes: 1 addition & 5 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,12 @@
ScalarLike,
)
from cudf.api.types import is_datetime64_dtype, is_scalar, is_timedelta64_dtype
from cudf.core._compat import PANDAS_GE_120
from cudf.core.buffer import Buffer, cuda_array_interface_wrapper
from cudf.core.column import ColumnBase, as_column, column, string
from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
from cudf.utils.utils import _fillna_natwise

if PANDAS_GE_120:
_guess_datetime_format = pd.core.tools.datetimes.guess_datetime_format
else:
_guess_datetime_format = pd.core.tools.datetimes._guess_datetime_format
_guess_datetime_format = pd.core.tools.datetimes.guess_datetime_format

# nanoseconds per time_unit
_dtype_to_format_conversion = {
Expand Down
12 changes: 3 additions & 9 deletions python/cudf/cudf/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import cudf
from cudf._typing import Dtype
from cudf.core._compat import PANDAS_GE_130, PANDAS_GE_150
from cudf.core._compat import PANDAS_GE_150
from cudf.core.abc import Serializable
from cudf.core.buffer import Buffer
from cudf.utils.docutils import doc_apply
Expand Down Expand Up @@ -875,16 +875,10 @@ def to_arrow(self):

@classmethod
def from_pandas(cls, pd_dtype: pd.IntervalDtype) -> "IntervalDtype":
if PANDAS_GE_130:
return cls(subtype=pd_dtype.subtype, closed=pd_dtype.closed)
else:
return cls(subtype=pd_dtype.subtype)
return cls(subtype=pd_dtype.subtype, closed=pd_dtype.closed)

def to_pandas(self) -> pd.IntervalDtype:
if PANDAS_GE_130:
return pd.IntervalDtype(subtype=self.subtype, closed=self.closed)
else:
return pd.IntervalDtype(subtype=self.subtype)
return pd.IntervalDtype(subtype=self.subtype, closed=self.closed)

def __eq__(self, other):
if isinstance(other, str):
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from cudf._typing import DataFrameOrSeries
from cudf.api.types import is_integer, is_list_like, is_object_dtype
from cudf.core import column
from cudf.core._compat import PANDAS_GE_120, PANDAS_GE_150
from cudf.core._compat import PANDAS_GE_150
from cudf.core.frame import Frame
from cudf.core.index import (
BaseIndex,
Expand Down Expand Up @@ -495,7 +495,7 @@ def __repr__(self):
)
)

if PANDAS_GE_120 and not PANDAS_GE_150:
if not PANDAS_GE_150:
# Need this whole `if` block,
# this is a workaround for the following issue:
# https://github.com/pandas-dev/pandas/issues/39984
Expand Down
36 changes: 12 additions & 24 deletions python/cudf/cudf/testing/testing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from __future__ import annotations

Expand All @@ -19,7 +19,6 @@
is_string_dtype,
is_struct_dtype,
)
from cudf.core._compat import PANDAS_GE_110
from cudf.core.missing import NA


Expand Down Expand Up @@ -699,28 +698,17 @@ def assert_frame_equal(
obj=f"{obj}.index",
)

if PANDAS_GE_110:
pd.testing.assert_index_equal(
left._data.to_pandas_index(),
right._data.to_pandas_index(),
exact=check_column_type,
check_names=check_names,
check_exact=check_exact,
check_categorical=check_categorical,
rtol=rtol,
atol=atol,
obj=f"{obj}.columns",
)
else:
pd.testing.assert_index_equal(
left._data.to_pandas_index(),
right._data.to_pandas_index(),
exact=check_column_type,
check_names=check_names,
check_exact=check_exact,
check_categorical=check_categorical,
obj=f"{obj}.columns",
)
pd.testing.assert_index_equal(
left._data.to_pandas_index(),
right._data.to_pandas_index(),
exact=check_column_type,
check_names=check_names,
check_exact=check_exact,
check_categorical=check_categorical,
rtol=rtol,
atol=atol,
obj=f"{obj}.columns",
)

for col in left._column_names:
assert_column_equal(
Expand Down
3 changes: 1 addition & 2 deletions python/cudf/cudf/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import pytest

import cudf
from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_134
from cudf.core._compat import PANDAS_GE_134
from cudf.testing._utils import (
NUMERIC_TYPES,
assert_eq,
Expand Down Expand Up @@ -81,7 +81,6 @@ def test_categorical_basic():
assert_eq(cat.codes, cudf_cat.codes.to_numpy())


@pytest.mark.skipif(not PANDAS_GE_110, reason="requires pandas>=1.1.0")
def test_categorical_integer():
cat = pd.Categorical(["a", "_", "_", "c", "a"], categories=["a", "b", "c"])
pdsr = pd.Series(cat)
Expand Down
25 changes: 3 additions & 22 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,7 @@
from packaging import version

import cudf
from cudf.core._compat import (
PANDAS_GE_110,
PANDAS_GE_120,
PANDAS_GE_134,
PANDAS_GE_150,
PANDAS_LT_140,
)
from cudf.core._compat import PANDAS_GE_134, PANDAS_GE_150, PANDAS_LT_140
from cudf.core.buffer.spill_manager import get_global_manager
from cudf.core.column import column
from cudf.testing import _utils as utils
Expand Down Expand Up @@ -3227,10 +3221,6 @@ def test_dataframe_reindex_fill_value(

@pytest.mark.parametrize("copy", [True, False])
def test_dataframe_reindex_change_dtype(copy):
if PANDAS_GE_110:
kwargs = {"check_freq": False}
else:
kwargs = {}
index = pd.date_range("12/29/2009", periods=10, freq="D")
columns = ["a", "b", "c", "d", "e"]
gdf = cudf.datasets.randomdata(
Expand All @@ -3242,7 +3232,7 @@ def test_dataframe_reindex_change_dtype(copy):
assert_eq(
pdf.reindex(index=index, columns=columns, copy=True),
gdf.reindex(index=index, columns=columns, copy=copy),
**kwargs,
check_freq=False,
)


Expand Down Expand Up @@ -4632,10 +4622,6 @@ def test_isin_dataframe(data, values):
else:
try:
expected = pdf.isin(values)
except ValueError as e:
if str(e) == "Lengths must match." and not PANDAS_GE_110:
# https://github.com/pandas-dev/pandas/issues/34256
return
except TypeError as e:
# Can't do isin with different categories
if str(e) == (
Expand Down Expand Up @@ -5302,12 +5288,7 @@ def test_rowwise_ops_datetime_dtypes_pdbug(data):
expected = pdf.max(axis=1, skipna=False)
got = gdf.max(axis=1, skipna=False)

if PANDAS_GE_120:
assert_eq(got, expected)
else:
# PANDAS BUG: https://github.com/pandas-dev/pandas/issues/36907
with pytest.raises(AssertionError, match="numpy array are different"):
assert_eq(got, expected)
assert_eq(got, expected)


@pytest.mark.parametrize(
Expand Down
8 changes: 2 additions & 6 deletions python/cudf/cudf/tests/test_dtypes.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

import numpy as np
import pandas as pd
import pyarrow as pa
import pytest

import cudf
from cudf.core._compat import PANDAS_GE_130, PANDAS_GE_150
from cudf.core._compat import PANDAS_GE_150
from cudf.core.column import ColumnBase
from cudf.core.dtypes import (
CategoricalDtype,
Expand Down Expand Up @@ -187,10 +187,6 @@ def test_interval_dtype_pyarrow_round_trip(subtype, closed):
assert expect.equals(got)


@pytest.mark.skipif(
not PANDAS_GE_130,
reason="pandas<1.3.0 doesn't have a closed argument for IntervalDtype",
)
def test_interval_dtype_from_pandas(subtype, closed):
expect = cudf.IntervalDtype(subtype, closed=closed)
pd_type = pd.IntervalDtype(subtype, closed=closed)
Expand Down
13 changes: 2 additions & 11 deletions python/cudf/cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,7 @@

import cudf
from cudf import DataFrame, Series
from cudf.core._compat import (
PANDAS_GE_110,
PANDAS_GE_130,
PANDAS_GE_150,
PANDAS_LT_140,
)
from cudf.core._compat import PANDAS_GE_150, PANDAS_LT_140
from cudf.core.udf.groupby_typing import SUPPORTED_GROUPBY_NUMPY_TYPES
from cudf.testing._utils import (
DATETIME_TYPES,
Expand Down Expand Up @@ -573,7 +568,7 @@ def test_groupby_2keys_agg(nelem, func):
# "func", ["min", "max", "idxmin", "idxmax", "count", "sum"],
)
@pytest.mark.xfail(
condition=PANDAS_GE_130 and PANDAS_LT_140,
condition=PANDAS_LT_140,
reason="https://github.com/pandas-dev/pandas/issues/43209",
)
def test_groupby_agg_decimal(num_groups, nelem_per_group, func):
Expand Down Expand Up @@ -1507,9 +1502,6 @@ def test_groupby_median(agg, by):

@pytest.mark.parametrize("agg", [lambda x: x.nunique(), "nunique"])
@pytest.mark.parametrize("by", ["a", ["a", "b"], ["a", "c"]])
@pytest.mark.xfail(
condition=not PANDAS_GE_110, reason="pandas >= 1.1 required"
)
def test_groupby_nunique(agg, by):
pdf = pd.DataFrame(
{"a": [1, 1, 1, 2, 3], "b": [1, 2, 2, 2, 1], "c": [1, 2, None, 4, 5]}
Expand Down Expand Up @@ -1545,7 +1537,6 @@ def test_groupby_nth(n, by):


@pytest.mark.xfail(
condition=PANDAS_GE_130,
reason="https://github.com/pandas-dev/pandas/issues/43209",
)
def test_raise_data_error():
Expand Down
33 changes: 1 addition & 32 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import pytest

import cudf
from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_133, PANDAS_GE_200
from cudf.core._compat import PANDAS_GE_133, PANDAS_GE_200
from cudf.core.index import (
CategoricalIndex,
DatetimeIndex,
Expand Down Expand Up @@ -811,17 +811,6 @@ def test_index_difference(data, other, sort):
gd_data = cudf.core.index.as_index(data)
gd_other = cudf.core.index.as_index(other)

if (
gd_data.dtype.kind == "f"
and gd_other.dtype.kind != "f"
or (gd_data.dtype.kind != "f" and gd_other.dtype.kind == "f")
):
pytest.mark.xfail(
condition=not PANDAS_GE_110,
reason="Bug in Pandas: "
"https://github.com/pandas-dev/pandas/issues/35217",
)

expected = pd_data.difference(pd_other, sort=sort)
actual = gd_data.difference(gd_other, sort=sort)
assert_eq(expected, actual)
Expand Down Expand Up @@ -880,15 +869,6 @@ def test_index_equals(data, other):
gd_data = cudf.core.index.as_index(data)
gd_other = cudf.core.index.as_index(other)

if (
gd_data.dtype.kind == "f" or gd_other.dtype.kind == "f"
) and cudf.utils.dtypes.is_mixed_with_object_dtype(gd_data, gd_other):
pytest.mark.xfail(
condition=not PANDAS_GE_110,
reason="Bug in Pandas: "
"https://github.com/pandas-dev/pandas/issues/35217",
)

expected = pd_data.equals(pd_other)
actual = gd_data.equals(gd_other)
assert_eq(expected, actual)
Expand Down Expand Up @@ -935,17 +915,6 @@ def test_index_categories_equal(data, other):
gd_data = cudf.core.index.as_index(data).astype("category")
gd_other = cudf.core.index.as_index(other)

if (
gd_data.dtype.kind == "f"
and gd_other.dtype.kind != "f"
or (gd_data.dtype.kind != "f" and gd_other.dtype.kind == "f")
):
pytest.mark.xfail(
condition=not PANDAS_GE_110,
reason="Bug in Pandas: "
"https://github.com/pandas-dev/pandas/issues/35217",
)

expected = pd_data.equals(pd_other)
actual = gd_data.equals(gd_other)
assert_eq(expected, actual)
Expand Down
Loading

0 comments on commit e4557cb

Please sign in to comment.