rapidsai · rapids-bot · May 9, 2022 · Apr 4, 2022 · Apr 4, 2022 · Apr 4, 2022
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -93,6 +93,7 @@ gpuci_mamba_retry install -y \
 # gpuci_mamba_retry install -y "your-pkg=1.0.0"
 
 
+
 gpuci_logger "Check compiler versions"
 python --version
 $CC --version

@@ -17,7 +17,7 @@ dependencies:
   - python>=3.7,<3.9
   - numba>=0.54
   - numpy
-  - pandas>=1.0,<1.4.0dev0
+  - pandas>=1.0,<1.5.0dev0
   - pyarrow=7.0.0=*cuda
   - fastavro>=0.22.9
   - python-snappy>=0.6.0

@@ -1,6 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import pickle
+import warnings
 
 import pandas as pd
 
@@ -608,10 +609,20 @@ def shift(Column input, int offset, object fill_value=None):
     cdef DeviceScalar fill
 
     if isinstance(fill_value, DeviceScalar):
+        fill_value_type = fill_value.dtype
         fill = fill_value
     else:
+        fill_value_type = type(fill_value)
         fill = as_device_scalar(fill_value, input.dtype)
 
+    if not cudf.utils.dtypes._can_cast(input.dtype, fill_value_type):
+        warnings.warn(
+            f"Passing {fill_value_type} to shift is deprecated and will "
+            f"raise in a future version"
+            f", pass a {input.dtype} scalar instead.",
+            FutureWarning,
+        )
+
     cdef column_view c_input = input.view()
     cdef int32_t c_offset = offset
     cdef const scalar* c_fill_value = fill.get_raw_ptr()

@@ -9,5 +9,6 @@
 PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2")
 PANDAS_LE_122 = PANDAS_VERSION <= version.parse("1.2.2")
 PANDAS_GE_130 = PANDAS_VERSION >= version.parse("1.3.0")
+PANDAS_GE_133 = PANDAS_VERSION >= version.parse("1.3.3")
 PANDAS_GE_134 = PANDAS_VERSION >= version.parse("1.3.4")
 PANDAS_LT_140 = PANDAS_VERSION < version.parse("1.4.0")
@@ -1227,7 +1227,7 @@ def fillna(
                 fill_value = column.as_column(fill_value, nan_as_null=False)
                 if isinstance(fill_value, CategoricalColumn):
                     if self.dtype != fill_value.dtype:
-                        raise ValueError(
+                        raise TypeError(
                             "Cannot set a Categorical with another, "
                             "without identical categories"
                         )

@@ -790,9 +790,8 @@ def _init_from_series_list(self, data, columns, index):
                         data.extend([o for o in initial_data])
                 else:
                     raise ValueError(
-                        f"Shape of passed values is "
-                        f"{(data_length, len(data[0]))}, "
-                        f"indices imply {(index_length, len(data[0]))}"
+                        f"Length of values ({data_length}) does "
+                        f"not match length of index ({index_length})"
                     )
 
             final_index = as_index(index)

@@ -2586,7 +2586,7 @@ def _reduce(self, *args, **kwargs):
     def min(
         self,
         axis=None,
-        skipna=None,
+        skipna=True,
         level=None,
         numeric_only=None,
         **kwargs,
@@ -2637,7 +2637,7 @@ def min(
     def max(
         self,
         axis=None,
-        skipna=None,
+        skipna=True,
         level=None,
         numeric_only=None,
         **kwargs,
@@ -2688,7 +2688,7 @@ def max(
     def sum(
         self,
         axis=None,
-        skipna=None,
+        skipna=True,
         dtype=None,
         level=None,
         numeric_only=None,
@@ -2747,7 +2747,7 @@ def sum(
     def product(
         self,
         axis=None,
-        skipna=None,
+        skipna=True,
         dtype=None,
         level=None,
         numeric_only=None,
@@ -2810,7 +2810,7 @@ def product(
 
     @_cudf_nvtx_annotate
     def mean(
-        self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
+        self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs
     ):
         """
         Return the mean of the values for the requested axis.
@@ -2857,7 +2857,7 @@ def mean(
     def std(
         self,
         axis=None,
-        skipna=None,
+        skipna=True,
         level=None,
         ddof=1,
         numeric_only=None,
@@ -2914,7 +2914,7 @@ def std(
     def var(
         self,
         axis=None,
-        skipna=None,
+        skipna=True,
         level=None,
         ddof=1,
         numeric_only=None,
@@ -2968,12 +2968,12 @@ def var(
 
     @_cudf_nvtx_annotate
     def kurtosis(
-        self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
+        self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs
     ):
         """
         Return Fisher's unbiased kurtosis of a sample.
 
-        Kurtosis obtained using Fisher’s definition of
+        Kurtosis obtained using Fisher's definition of
         kurtosis (kurtosis of normal == 0.0). Normalized by N-1.
 
         Parameters
@@ -3025,7 +3025,7 @@ def kurtosis(
     # Alias for kurtosis.
     @copy_docstring(kurtosis)
     def kurt(
-        self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
+        self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs
     ):
         return self.kurtosis(
             axis=axis,
@@ -3037,7 +3037,7 @@ def kurt(
 
     @_cudf_nvtx_annotate
     def skew(
-        self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
+        self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs
     ):
         """
         Return unbiased Fisher-Pearson skew of a sample.
@@ -3199,7 +3199,7 @@ def sum_of_squares(self, dtype=None):
 
     @_cudf_nvtx_annotate
     def median(
-        self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
+        self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs
     ):
         """
         Return the median of the values for the requested axis.

@@ -722,6 +722,36 @@ def _intersection(self, other, sort=False):
 
         return new_index
 
+    def sort_values(
+        self,
+        return_indexer=False,
+        ascending=True,
+        na_position="last",
+        key=None,
+    ):
+        if key is not None:
+            raise NotImplementedError("key parameter is not yet implemented.")
+        if na_position not in {"first", "last"}:
+            raise ValueError(f"invalid na_position: {na_position}")
+
+        sorted_index = self
+        indexer = RangeIndex(range(len(self)))
+
+        sorted_index = self
+        if ascending:
+            if self.step < 0:
+                sorted_index = self[::-1]
+                indexer = indexer[::-1]
+        else:
+            if self.step > 0:
+                sorted_index = self[::-1]
+                indexer = indexer = indexer[::-1]
+
+        if return_indexer:
+            return sorted_index, indexer
+        else:
+            return sorted_index
+
     @_cudf_nvtx_annotate
     def _gather(self, gather_map, nullify=False, check_bounds=True):
         gather_map = cudf.core.column.as_column(gather_map)

@@ -43,7 +43,7 @@ def _align_objs(objs, how="outer", sort=None):
 
     if not_matching_index:
         if not all(o.index.is_unique for o in objs):
-            raise ValueError("cannot reindex from a duplicate axis")
+            raise ValueError("cannot reindex on an axis with duplicate labels")
 
         index = objs[0].index
         name = index.name

@@ -406,10 +406,12 @@ def __init__(
             else:
                 index = as_index(data.index)
         elif isinstance(data, pd.Index):
-            name = data.name
+            if name is None:
+                name = data.name
             data = data.values
         elif isinstance(data, BaseIndex):
-            name = data.name
+            if name is None:
+                name = data.name
             data = data._values
             if dtype is not None:
                 data = data.astype(dtype)
@@ -805,8 +807,9 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False):
             return cudf.core.dataframe.DataFrame._from_data(data, index)
         # For ``name`` behavior, see:
         # https://github.com/pandas-dev/pandas/issues/44575
+        # ``name`` has to be ignored when `drop=True`
         return self._mimic_inplace(
-            Series._from_data(data, index, name if inplace else None),
+            Series._from_data(data, index, self.name),
             inplace=inplace,
         )
 

@@ -90,6 +90,8 @@ def test_ufunc_index(ufunc):
         if fname in ("power", "float_power"):
             if (got - expect).abs().max() == 1:
                 pytest.xfail("https://github.com/rapidsai/cudf/issues/10178")
+        elif fname in ("bitwise_and", "bitwise_or", "bitwise_xor"):
+            pytest.xfail("https://github.com/pandas-dev/pandas/issues/46769")
         raise
 
 

@@ -9,6 +9,7 @@
 
 import cudf as gd
 from cudf.api.types import is_categorical_dtype
+from cudf.core._compat import PANDAS_LT_140
 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
 from cudf.testing._utils import assert_eq, assert_exceptions_equal
 
@@ -341,8 +342,8 @@ def test_pandas_concat_compatibility_axis1():
     got = gd.concat([d1, d2, d3, d4, d5], axis=1)
 
     assert_eq(
-        got,
-        expect,
+        got.sort_index(),
+        expect.sort_index(),
         check_index_type=True,
     )
 
@@ -659,9 +660,12 @@ def test_concat_dataframe_with_multiindex(df1, df2):
     actual = gd.concat([gdf1, gdf2], axis=1)
     expected = pd.concat([pdf1, pdf2], axis=1)
 
+    # Will need to sort_index before comparing as
+    # ordering is not deterministic in case of pandas
+    # multiIndex with concat.
     assert_eq(
-        expected,
-        actual,
+        expected.sort_index(),
+        actual.sort_index(),
         check_index_type=True,
     )
 
@@ -798,18 +802,8 @@ def test_concat_join_axis_1(objs, ignore_index, sort, join, axis):
         ignore_index=ignore_index,
         axis=axis,
     )
-    # TODO: Remove special handling below
-    # after following bug from pandas is fixed:
-    # https://github.com/pandas-dev/pandas/issues/43584
-    assert_eq(
-        expected,
-        actual,
-        check_index_type=False
-        if sort
-        and isinstance(expected.index, pd.Int64Index)
-        and isinstance(actual.index, gd.RangeIndex)
-        else True,
-    )
+
+    assert_eq(expected, actual, check_index_type=True)
 
 
 @pytest.mark.parametrize("ignore_index", [True, False])
@@ -875,18 +869,8 @@ def test_concat_join_one_df(ignore_index, sort, join, axis):
     actual = gd.concat(
         [gdf1], sort=sort, join=join, ignore_index=ignore_index, axis=axis
     )
-    # TODO: Remove special handling below
-    # after following bug from pandas is fixed:
-    # https://github.com/pandas-dev/pandas/issues/43584
-    assert_eq(
-        expected,
-        actual,
-        check_index_type=False
-        if sort
-        and isinstance(expected.index, pd.Int64Index)
-        and isinstance(actual.index, gd.RangeIndex)
-        else True,
-    )
+
+    assert_eq(expected, actual, check_index_type=True)
 
 
 @pytest.mark.parametrize(
@@ -910,6 +894,10 @@ def test_concat_join_one_df(ignore_index, sort, join, axis):
 @pytest.mark.parametrize("sort", [True, False])
 @pytest.mark.parametrize("join", ["inner", "outer"])
 @pytest.mark.parametrize("axis", [0, 1])
+@pytest.mark.xfail(
+    condition=PANDAS_LT_140,
+    reason="https://github.com/pandas-dev/pandas/issues/43584",
+)
 def test_concat_join_no_overlapping_columns(
     pdf1, pdf2, ignore_index, sort, join, axis
 ):
@@ -931,19 +919,7 @@ def test_concat_join_no_overlapping_columns(
         axis=axis,
     )
 
-    # TODO: Remove special handling below
-    # after following bug from pandas is fixed:
-    # https://github.com/pandas-dev/pandas/issues/43584
-    assert_eq(
-        expected,
-        actual,
-        check_index_type=False
-        if sort
-        and axis == 1
-        and isinstance(expected.index, pd.Int64Index)
-        and isinstance(actual.index, gd.RangeIndex)
-        else True,
-    )
+    assert_eq(expected, actual, check_index_type=True)
 
 
 @pytest.mark.parametrize("ignore_index", [False, True])
@@ -1097,7 +1073,7 @@ def test_concat_join_no_overlapping_columns_empty_df_basic(
     )
     # TODO: change `check_index_type` to `True`
     # after following bug from pandas is fixed:
-    # https://github.com/pandas-dev/pandas/issues/43584
+    # https://github.com/pandas-dev/pandas/issues/46675
     assert_eq(expected, actual, check_index_type=False)
 
 
@@ -1133,15 +1109,11 @@ def test_concat_join_series(ignore_index, sort, join, axis):
 
     # TODO: Remove special handling below
     # after following bug from pandas is fixed:
-    # https://github.com/pandas-dev/pandas/issues/43584
+    # https://github.com/pandas-dev/pandas/issues/46675
     assert_eq(
         expected,
         actual,
-        check_index_type=False
-        if sort
-        and isinstance(expected.index, pd.Int64Index)
-        and isinstance(actual.index, gd.RangeIndex)
-        else True,
+        check_index_type=False if axis == 1 and join == "outer" else True,
     )