diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index e08b78b98e642..7c42bb5a727ba 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2304,7 +2304,6 @@ def _groupby_op(
     ):
         if isinstance(self.dtype, StringDtype):
             if how in [
-                "sum",
                 "prod",
                 "mean",
                 "median",
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 6cf9cca341794..4835d808f2433 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2609,7 +2609,6 @@ def _groupby_op(
         if isinstance(self.dtype, StringDtype):
             # StringArray
             if op.how in [
-                "sum",
                 "prod",
                 "mean",
                 "median",
diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
index c1480f54163e0..bab8566a06dc2 100644
--- a/pandas/tests/extension/base/groupby.py
+++ b/pandas/tests/extension/base/groupby.py
@@ -6,6 +6,7 @@
     is_bool_dtype,
     is_numeric_dtype,
     is_object_dtype,
+    is_string_dtype,
 )
 
 import pandas as pd
@@ -150,6 +151,7 @@ def test_in_numeric_groupby(self, data_for_grouping):
             is_numeric_dtype(dtype)
             or is_bool_dtype(dtype)
             or dtype.name == "decimal"
+            or is_string_dtype(dtype)
             or is_object_dtype(dtype)
             or dtype.kind == "m"  # in particular duration[*][pyarrow]
         ):
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index d28eb227314c7..b937e7dcc8136 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -148,11 +148,11 @@ def test_cython_agg_return_dict():
 
 def test_cython_fail_agg():
     dr = bdate_range("1/1/2000", periods=50)
-    ts = Series(["A", "B", "C", "D", "E"] * 10, index=dr)
+    ts = Series(["A", "B", "C", "D", "E"] * 10, dtype=object, index=dr)
 
     grouped = ts.groupby(lambda x: x.month)
     summed = grouped.sum()
-    expected = grouped.agg(np.sum)
+    expected = grouped.agg(np.sum).astype(object)
     tm.assert_series_equal(summed, expected)
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index a434391983c01..ac7f305880878 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -709,8 +709,6 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
     grouped = df.groupby("A")
 
     no_drop_nuisance = ("var", "std", "sem", "mean", "prod", "median")
-    if using_infer_string:
-        no_drop_nuisance += ("sum",)
     if agg_function in no_drop_nuisance and not numeric_only:
         # Added numeric_only as part of GH#46560; these do not drop nuisance
         # columns when numeric_only is False
@@ -1814,7 +1812,7 @@ def get_categorical_invalid_expected():
 
     if op in ["prod", "sum", "skew"]:
         # ops that require more than just ordered-ness
-        if is_dt64 or is_cat or is_per or is_str:
+        if is_dt64 or is_cat or is_per or (is_str and op != "sum"):
             # GH#41291
             # datetime64 -> prod and sum are invalid
             if is_dt64:
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index f3768ee3433b5..e915011875c60 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -182,7 +182,6 @@ def test_groupby_raises_string(
 
     if using_infer_string:
         if groupby_func in [
-            "sum",
             "prod",
             "mean",
             "median",
@@ -213,13 +212,7 @@ def test_groupby_raises_string(
         elif groupby_func in ["cummin", "cummax"]:
             msg = msg.replace("object", "str")
         elif groupby_func == "corrwith":
-            if df["d"].dtype.storage == "pyarrow":
-                msg = (
-                    "ArrowStringArrayNumpySemantics' with dtype str does not "
-                    "support operation 'mean'"
-                )
-            else:
-                msg = "Cannot perform reduction 'mean' with string dtype"
+            msg = "Cannot perform reduction 'mean' with string dtype"
 
     if groupby_func == "fillna":
         kind = "Series" if groupby_series else "DataFrame"
@@ -273,17 +266,12 @@ def test_groupby_raises_string_np(
     }[groupby_func_np]
 
     if using_infer_string:
-        klass = TypeError
-        if df["d"].dtype.storage == "python":
-            msg = (
-                f"Cannot perform reduction '{groupby_func_np.__name__}' "
-                "with string dtype"
-            )
-        else:
-            msg = (
-                "'ArrowStringArrayNumpySemantics' with dtype str does not "
-                f"support operation '{groupby_func_np.__name__}'"
-            )
+        if groupby_func_np is np.mean:
+            klass = TypeError
+        msg = (
+            f"Cannot perform reduction '{groupby_func_np.__name__}' "
+            "with string dtype"
+        )
 
     _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 940c9e6700ea2..b7b80b5e427ff 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -912,11 +912,6 @@ def test_frame_downsample_method(
             msg = expected_data
         with pytest.raises(klass, match=msg):
             _ = func(**kwargs)
-    elif method == "sum" and using_infer_string and numeric_only is not True:
-        klass = TypeError
-        msg = f"dtype 'str' does not support operation '{method}'"
-        with pytest.raises(klass, match=msg):
-            _ = func(**kwargs)
     else:
         result = func(**kwargs)
         expected = DataFrame(expected_data, index=expected_index)
@@ -968,10 +963,6 @@ def test_series_downsample_method(
             msg = "dtype 'str' does not support operation 'prod'"
         with pytest.raises(TypeError, match=msg):
             func(**kwargs)
-    elif method == "sum" and using_infer_string and numeric_only is not True:
-        msg = "dtype 'str' does not support operation 'sum'"
-        with pytest.raises(TypeError, match=msg):
-            func(**kwargs)
 
     else:
         result = func(**kwargs)