Skip to content

Commit

Permalink
Update now that .sum() is supported
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Nov 1, 2024
1 parent f3c44cb commit 0871326
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 35 deletions.
1 change: 0 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2304,7 +2304,6 @@ def _groupby_op(
):
if isinstance(self.dtype, StringDtype):
if how in [
"sum",
"prod",
"mean",
"median",
Expand Down
1 change: 0 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2609,7 +2609,6 @@ def _groupby_op(
if isinstance(self.dtype, StringDtype):
# StringArray
if op.how in [
"sum",
"prod",
"mean",
"median",
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/extension/base/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
is_bool_dtype,
is_numeric_dtype,
is_object_dtype,
is_string_dtype,
)

import pandas as pd
Expand Down Expand Up @@ -150,6 +151,7 @@ def test_in_numeric_groupby(self, data_for_grouping):
is_numeric_dtype(dtype)
or is_bool_dtype(dtype)
or dtype.name == "decimal"
or is_string_dtype(dtype)
or is_object_dtype(dtype)
or dtype.kind == "m" # in particular duration[*][pyarrow]
):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/aggregate/test_cython.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,11 @@ def test_cython_agg_return_dict():

def test_cython_fail_agg():
dr = bdate_range("1/1/2000", periods=50)
ts = Series(["A", "B", "C", "D", "E"] * 10, index=dr)
ts = Series(["A", "B", "C", "D", "E"] * 10, dtype=object, index=dr)

grouped = ts.groupby(lambda x: x.month)
summed = grouped.sum()
expected = grouped.agg(np.sum)
expected = grouped.agg(np.sum).astype(object)
tm.assert_series_equal(summed, expected)


Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,8 +709,6 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
grouped = df.groupby("A")

no_drop_nuisance = ("var", "std", "sem", "mean", "prod", "median")
if using_infer_string:
no_drop_nuisance += ("sum",)
if agg_function in no_drop_nuisance and not numeric_only:
# Added numeric_only as part of GH#46560; these do not drop nuisance
# columns when numeric_only is False
Expand Down Expand Up @@ -1814,7 +1812,7 @@ def get_categorical_invalid_expected():

if op in ["prod", "sum", "skew"]:
# ops that require more than just ordered-ness
if is_dt64 or is_cat or is_per or is_str:
if is_dt64 or is_cat or is_per or (is_str and op != "sum"):
# GH#41291
# datetime64 -> prod and sum are invalid
if is_dt64:
Expand Down
26 changes: 7 additions & 19 deletions pandas/tests/groupby/test_raises.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,6 @@ def test_groupby_raises_string(

if using_infer_string:
if groupby_func in [
"sum",
"prod",
"mean",
"median",
Expand Down Expand Up @@ -213,13 +212,7 @@ def test_groupby_raises_string(
elif groupby_func in ["cummin", "cummax"]:
msg = msg.replace("object", "str")
elif groupby_func == "corrwith":
if df["d"].dtype.storage == "pyarrow":
msg = (
"ArrowStringArrayNumpySemantics' with dtype str does not "
"support operation 'mean'"
)
else:
msg = "Cannot perform reduction 'mean' with string dtype"
msg = "Cannot perform reduction 'mean' with string dtype"

if groupby_func == "fillna":
kind = "Series" if groupby_series else "DataFrame"
Expand Down Expand Up @@ -273,17 +266,12 @@ def test_groupby_raises_string_np(
}[groupby_func_np]

if using_infer_string:
klass = TypeError
if df["d"].dtype.storage == "python":
msg = (
f"Cannot perform reduction '{groupby_func_np.__name__}' "
"with string dtype"
)
else:
msg = (
"'ArrowStringArrayNumpySemantics' with dtype str does not "
f"support operation '{groupby_func_np.__name__}'"
)
if groupby_func_np is np.mean:
klass = TypeError
msg = (
f"Cannot perform reduction '{groupby_func_np.__name__}' "
"with string dtype"
)

_call_and_check(klass, msg, how, gb, groupby_func_np, ())

Expand Down
9 changes: 0 additions & 9 deletions pandas/tests/resample/test_resample_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -912,11 +912,6 @@ def test_frame_downsample_method(
msg = expected_data
with pytest.raises(klass, match=msg):
_ = func(**kwargs)
elif method == "sum" and using_infer_string and numeric_only is not True:
klass = TypeError
msg = f"dtype 'str' does not support operation '{method}'"
with pytest.raises(klass, match=msg):
_ = func(**kwargs)
else:
result = func(**kwargs)
expected = DataFrame(expected_data, index=expected_index)
Expand Down Expand Up @@ -968,10 +963,6 @@ def test_series_downsample_method(
msg = "dtype 'str' does not support operation 'prod'"
with pytest.raises(TypeError, match=msg):
func(**kwargs)
elif method == "sum" and using_infer_string and numeric_only is not True:
msg = "dtype 'str' does not support operation 'sum'"
with pytest.raises(TypeError, match=msg):
func(**kwargs)

else:
result = func(**kwargs)
Expand Down

0 comments on commit 0871326

Please sign in to comment.