Match pandas functionality for Series numeric_only

rapidsai · Apr 14, 2022 · 9ca5d12 · 9ca5d12
1 parent 7dd7936
commit 9ca5d12
Show file tree

Hide file tree

Showing 2 changed files with 75 additions and 12 deletions.
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
@@ -51,9 +51,9 @@ def _reduce(
         if level is not None:
             raise NotImplementedError("level parameter is not implemented yet")
 
-        if numeric_only not in (None, True):
+        if numeric_only:
             raise NotImplementedError(
-                "numeric_only parameter is not implemented yet"
+                f"Series.{op} does not implement numeric_only"
             )
         try:
             return getattr(self._column, op)(**kwargs)

diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
@@ -239,13 +239,10 @@ def test_misc_quantiles(data, q):
         cudf.Series([1.1032, 2.32, 43.4, 13, -312.0], index=[0, 4, 3, 19, 6]),
         cudf.Series([]),
         cudf.Series([-3]),
-        randomdata(
-            nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
-        ),
     ],
 )
 @pytest.mark.parametrize("null_flag", [False, True])
-def test_kurtosis(data, null_flag):
+def test_kurtosis_series(data, null_flag):
     pdata = data.to_pandas()
 
     if null_flag and len(data) > 2:
@@ -262,8 +259,13 @@ def test_kurtosis(data, null_flag):
     expected = pdata.kurt()
     np.testing.assert_array_almost_equal(got, expected)
 
+    got = data.kurt(numeric_only=False)
+    got = got if np.isscalar(got) else got.to_numpy()
+    expected = pdata.kurt(numeric_only=False)
+    np.testing.assert_array_almost_equal(got, expected)
+
     with pytest.raises(NotImplementedError):
-        data.kurt(numeric_only=False)
+        data.kurt(numeric_only=True)
 
 
 @pytest.mark.parametrize(
@@ -280,13 +282,10 @@ def test_kurtosis(data, null_flag):
         cudf.Series([1.1032, 2.32, 43.4, 13, -312.0], index=[0, 4, 3, 19, 6]),
         cudf.Series([]),
         cudf.Series([-3]),
-        randomdata(
-            nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
-        ),
     ],
 )
 @pytest.mark.parametrize("null_flag", [False, True])
-def test_skew(data, null_flag):
+def test_skew_series(data, null_flag):
     pdata = data.to_pandas()
 
     if null_flag and len(data) > 2:
@@ -298,8 +297,13 @@ def test_skew(data, null_flag):
     got = got if np.isscalar(got) else got.to_numpy()
     np.testing.assert_array_almost_equal(got, expected)
 
+    got = data.skew(numeric_only=False)
+    expected = pdata.skew(numeric_only=False)
+    got = got if np.isscalar(got) else got.to_numpy()
+    np.testing.assert_array_almost_equal(got, expected)
+
     with pytest.raises(NotImplementedError):
-        data.skew(numeric_only=False)
+        data.skew(numeric_only=True)
 
 
 @pytest.mark.parametrize("dtype", params_dtypes)
@@ -541,3 +545,62 @@ def test_cov_corr_invalid_dtypes(gsr):
         rfunc_args_and_kwargs=([gsr],),
         compare_error_message=False,
     )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        randomdata(
+            nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
+        ),
+    ],
+)
+@pytest.mark.parametrize("null_flag", [False, True])
+def test_kurtosis_df(data, null_flag):
+    pdata = data.to_pandas()
+
+    if null_flag and len(data) > 2:
+        data.iloc[[0, 2]] = None
+        pdata.iloc[[0, 2]] = None
+
+    got = data.kurtosis()
+    got = got if np.isscalar(got) else got.to_numpy()
+    expected = pdata.kurtosis()
+    np.testing.assert_array_almost_equal(got, expected)
+
+    got = data.kurt()
+    got = got if np.isscalar(got) else got.to_numpy()
+    expected = pdata.kurt()
+    np.testing.assert_array_almost_equal(got, expected)
+
+    got = data.kurt(numeric_only=True)
+    got = got if np.isscalar(got) else got.to_numpy()
+    expected = pdata.kurt(numeric_only=True)
+    np.testing.assert_array_almost_equal(got, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        randomdata(
+            nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
+        ),
+    ],
+)
+@pytest.mark.parametrize("null_flag", [False, True])
+def test_skew_df(data, null_flag):
+    pdata = data.to_pandas()
+
+    if null_flag and len(data) > 2:
+        data.iloc[[0, 2]] = None
+        pdata.iloc[[0, 2]] = None
+
+    got = data.skew()
+    expected = pdata.skew()
+    got = got if np.isscalar(got) else got.to_numpy()
+    np.testing.assert_array_almost_equal(got, expected)
+
+    got = data.skew(numeric_only=True)
+    expected = pdata.skew(numeric_only=True)
+    got = got if np.isscalar(got) else got.to_numpy()
+    np.testing.assert_array_almost_equal(got, expected)