rapidsai · rapids-bot · Dec 5, 2022 · Dec 2, 2022 · Dec 2, 2022 · Dec 2, 2022
@@ -1188,15 +1188,13 @@ def _process_for_reduction(
 
         result_col = self
 
+        # TODO: If and when pandas decides to validate that `min_count` >= 0 we
+        # should insert comparable behavior.
+        # https://github.com/pandas-dev/pandas/issues/50022
         if min_count > 0:
             valid_count = len(result_col) - result_col.null_count
             if valid_count < min_count:
                 return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
-        elif min_count < 0:
-            warnings.warn(
-                f"min_count value cannot be negative({min_count}), will "
-                f"default to 0."
-            )
         return result_col
 
     def _reduction_result_dtype(self, reduction_op: str) -> Dtype:

@@ -5861,7 +5861,49 @@ def _reduce(
                     for col in source._data.names
                 ]
             except AttributeError:
-                raise TypeError(f"Not all column dtypes support op {op}")
+                numeric_ops = (
+                    "mean",
+                    "min",
+                    "max",
+                    "sum",
+                    "product",
+                    "prod",
+                    "std",
+                    "var",
+                    "kurtosis",
+                    "kurt",
+                    "skew",
+                )
+
+                if numeric_only is None and op in numeric_ops:
+                    warnings.warn(
+                        f"The default value of numeric_only in DataFrame.{op} "
+                        "is deprecated. In a future version, it will default "
+                        "to False. In addition, specifying "
+                        "'numeric_only=None' is deprecated. Select only valid "
+                        "columns or specify the value of numeric_only to "
+                        "silence this warning.",
+                        FutureWarning,
+                    )
+                    numeric_cols = (
+                        name
+                        for name in self._data.names
+                        if is_numeric_dtype(self._data[name])
+                    )
+                    source = self._get_columns_by_label(numeric_cols)
+                    if source.empty:
+                        return Series(index=cudf.StringIndex([]))
+                    try:
+                        result = [
+                            getattr(source._data[col], op)(**kwargs)
+                            for col in source._data.names
+                        ]
+                    except AttributeError:
+                        raise TypeError(
+                            f"Not all column dtypes support op {op}"
+                        )
+                else:
+                    raise
 
             return Series._from_data(
                 {None: result}, as_index(source._data.names)
@@ -5984,24 +6026,6 @@ def mode(self, axis=0, numeric_only=False, dropna=True):
 
         return df
 
-    @_cudf_nvtx_annotate
-    def kurtosis(
-        self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
-    ):
-        obj = self.select_dtypes(include=[np.number, np.bool_])
-        return super(DataFrame, obj).kurtosis(
-            axis, skipna, level, numeric_only, **kwargs
-        )
-
-    @_cudf_nvtx_annotate
-    def skew(
-        self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
-    ):
-        obj = self.select_dtypes(include=[np.number, np.bool_])
-        return super(DataFrame, obj).skew(
-            axis, skipna, level, numeric_only, **kwargs
-        )
-
     @_cudf_nvtx_annotate
     def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):
         obj = self.select_dtypes(include="bool") if bool_only else self

@@ -387,3 +387,16 @@ def _create_pandas_series(data=None, index=None, dtype=None, *args, **kwargs):
     "left_dtype,right_dtype",
     list(itertools.combinations_with_replacement(NUMERIC_TYPES, 2)),
 )
+
+
+@contextmanager
+def expect_warning_if(condition, warning=FutureWarning, *args, **kwargs):
+    """Catch a warning using pytest.warns if the expect_warning is True.
+
+    All arguments are forwarded to pytest.warns if expect_warning is True.
+    """
+    if condition:
+        with pytest.warns(warning, *args, **kwargs):
+            yield
+    else:
+        yield
@@ -9541,8 +9541,12 @@ def test_mean_timeseries():
 
     assert_eq(expected, actual)
 
-    with pytest.raises(TypeError):
-        gdf.mean()
+    with pytest.warns(FutureWarning):
+        expected = pdf.mean()
+    with pytest.warns(FutureWarning):
+        actual = gdf.mean()
+
+    assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize(
@@ -9564,8 +9568,12 @@ def test_std_different_dtypes(data):
 
     assert_eq(expected, actual)
 
-    with pytest.raises(TypeError):
-        gdf.std()
+    with pytest.warns(FutureWarning):
+        expected = pdf.std()
+    with pytest.warns(FutureWarning):
+        actual = gdf.std()
+
+    assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize(

@@ -13,6 +13,7 @@
     _create_pandas_series,
     assert_eq,
     assert_exceptions_equal,
+    expect_warning_if,
 )
 
 params_dtypes = [np.int32, np.uint32, np.float32, np.float64]
@@ -399,7 +400,13 @@ def test_cov1d(data1, data2):
     ps2 = gs2.to_pandas()
 
     got = gs1.cov(gs2)
-    expected = ps1.cov(ps2)
+    ps1_align, ps2_align = ps1.align(ps2, join="inner")
+    with expect_warning_if(
+        (len(ps1_align.dropna()) == 1 and len(ps2_align.dropna()) > 0)
+        or (len(ps2_align.dropna()) == 1 and len(ps1_align.dropna()) > 0),
+        RuntimeWarning,
+    ):
+        expected = ps1.cov(ps2)
     np.testing.assert_approx_equal(got, expected, significant=8)
 
 
@@ -442,7 +449,34 @@ def test_corr1d(data1, data2, method):
     ps2 = gs2.to_pandas()
 
     got = gs1.corr(gs2, method)
-    expected = ps1.corr(ps2, method)
+
+    ps1_align, ps2_align = ps1.align(ps2, join="inner")
+
+    is_singular = (
+        len(ps1_align.dropna()) == 1 and len(ps2_align.dropna()) > 0
+    ) or (len(ps2_align.dropna()) == 1 and len(ps1_align.dropna()) > 0)
+    is_identical = (
+        len(ps1_align.dropna().unique()) == 1 and len(ps2_align.dropna()) > 0
+    ) or (
+        len(ps2_align.dropna().unique()) == 1 and len(ps1_align.dropna()) > 0
+    )
+
+    # Pearson correlation leads to division by 0 when either sample size is 1.
+    # Spearman allows for size 1 samples, but will error if all data in a
+    # sample is identical since the covariance is zero and so the correlation
+    # coefficient is not defined.
+    cond = (is_singular and method == "pearson") or (
+        is_identical and not is_singular and method == "spearman"
+    )
+    if method == "spearman":
+        import scipy
+
+        expected_warning = scipy.stats._warnings_errors.ConstantInputWarning
+    elif method == "pearson":
+        expected_warning = RuntimeWarning
+
+    with expect_warning_if(cond, expected_warning):
+        expected = ps1.corr(ps2, method)
     np.testing.assert_approx_equal(got, expected, significant=8)
 
 
@@ -567,14 +601,18 @@ def test_kurtosis_df(data, null_flag):
         data.iloc[[0, 2]] = None
         pdata.iloc[[0, 2]] = None
 
-    got = data.kurtosis()
+    with pytest.warns(FutureWarning):
+        got = data.kurtosis()
     got = got if np.isscalar(got) else got.to_numpy()
-    expected = pdata.kurtosis()
+    with pytest.warns(FutureWarning):
+        expected = pdata.kurtosis()
     np.testing.assert_array_almost_equal(got, expected)
 
-    got = data.kurt()
+    with pytest.warns(FutureWarning):
+        got = data.kurt()
     got = got if np.isscalar(got) else got.to_numpy()
-    expected = pdata.kurt()
+    with pytest.warns(FutureWarning):
+        expected = pdata.kurt()
     np.testing.assert_array_almost_equal(got, expected)
 
     got = data.kurt(numeric_only=True)
@@ -599,8 +637,10 @@ def test_skew_df(data, null_flag):
         data.iloc[[0, 2]] = None
         pdata.iloc[[0, 2]] = None
 
-    got = data.skew()
-    expected = pdata.skew()
+    with pytest.warns(FutureWarning):
+        got = data.skew()
+    with pytest.warns(FutureWarning):
+        expected = pdata.skew()
     got = got if np.isscalar(got) else got.to_numpy()
     np.testing.assert_array_almost_equal(got, expected)