update

pandas-dev · May 23, 2020 · 9bcb6a8 · 9bcb6a8
1 parent 44abe87
commit 9bcb6a8
Show file tree

Hide file tree

Showing 9 changed files with 33 additions and 27 deletions.
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -236,7 +236,6 @@ Other enhancements
 - :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`)
 - :meth:`MultiIndex.union` will now raise `RuntimeWarning` if the object inside are unsortable, pass `sort=False` to suppress this warning (:issue:`33015`)
 - :class:`Series.dt` and :class:`DatatimeIndex` now have an `isocalendar` method that returns a :class:`DataFrame` with year, week, and day calculated according to the ISO 8601 calendar (:issue:`33206`).
-- :meth:`Series.combine` has gained a ``dtype`` argument. If supplied, the combined series will get that dtype (:issue:`33465`)
 - The :meth:`DataFrame.to_feather` method now supports additional keyword
   arguments (e.g. to set the compression) that are added in pyarrow 0.17
   (:issue:`33422`).

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -27,6 +27,7 @@
     is_datetime64tz_dtype,
     is_datetime_or_timedelta_dtype,
     is_dtype_equal,
+    is_extension_array_dtype,
     is_float_dtype,
     is_integer_dtype,
     is_list_like,
@@ -619,7 +620,11 @@ def astype(self, dtype, copy=True):
         if is_object_dtype(dtype):
             return self._box_values(self.asi8.ravel()).reshape(self.shape)
         elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
-            return self._format_native_types()
+            if is_extension_array_dtype(dtype):
+                arr_cls = dtype.construct_array_type()
+                return arr_cls._from_sequence(self, dtype=dtype)
+            else:
+                return self._format_native_types()
         elif is_integer_dtype(dtype):
             # we deliberately ignore int32 vs. int64 here.
             # See https://github.com/pandas-dev/pandas/issues/24381 for more.

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -1,5 +1,5 @@
 import numbers
-from typing import TYPE_CHECKING, List, Optional, Dict, Tuple, Type, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union
 import warnings
 
 import numpy as np

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -564,7 +564,6 @@ def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs):
         actually format my specific types
         """
         values = self.astype(object)
-
         if date_format:
             formatter = lambda dt: dt.strftime(date_format)
         else:

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -104,11 +104,6 @@ class StringArray(PandasArray):
 
     .. versionadded:: 1.0.0
 
-    .. versionchanged:: 1.1.0
-
-        ``StringArray`` allow non-string input values, but will always convert the
-        values  to strings. (Before Pandas 1.1 non-string values were not allowed).
-
     .. warning::
 
        StringArray is considered experimental. The implementation and
@@ -157,9 +152,13 @@ class StringArray(PandasArray):
     ['This is', 'some text', <NA>, 'data.']
     Length: 4, dtype: string
 
-    Like ``object`` dtype arrays instantiated with ``dtype="str"``, ``StringArray``
-    allows non-string values but will always convert the values to strings.
+    Unlike arrays instantiated with ``dtype="object"``, ``StringArray``
+    will convert the values to strings.
 
+    >>> pd.array(['1', 1], dtype="object")
+    <PandasArray>
+    ['1', 1]
+    Length: 2, dtype: object
     >>> pd.array(['1', 1], dtype="string")
     <StringArray>
     ['1', '1']

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2695,11 +2695,6 @@ def combine(self, other, func, fill_value=None, dtype=None) -> "Series":
             The value to assume when an index is missing from
             one Series or the other. The default specifies to use the
             appropriate NaN value for the underlying dtype of the Series.
-        dtype : str, numpy.dtype, or ExtensionDtype, optional
-            Data type for the output Series. If not specified, this will be
-            inferred from the combined data.
-
-            .. versionadded:: 1.1.0
 
         Returns
         -------
@@ -2770,13 +2765,13 @@ def combine(self, other, func, fill_value=None, dtype=None) -> "Series":
                 new_values = [func(lv, other) for lv in self._values]
             new_name = self.name
 
-        if dtype is not None:
-            return self._constructor(
-                new_values, index=new_index, name=new_name, dtype=dtype
-            )
         if is_categorical_dtype(self.dtype):
             pass
         elif is_extension_array_dtype(self.dtype):
+            # Everything can be be converted to strings, but we may not want to convert
+            if self.dtype == "string" and lib.infer_dtype(new_values) != "string":
+                return self._constructor(new_values, index=new_index, name=new_name)
+
             # TODO: can we do this for only SparseDtype?
             # The function can return something of any type, so check
             # if the type is compatible with the calling EA.

diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py
@@ -33,12 +33,13 @@ def test_tolist(self, data):
 
     def test_astype_str(self, data):
         result = pd.Series(data[:5]).astype(str)
-        expected = pd.Series(data[:5].astype(str))
+        expected = pd.Series([str(x) for x in data[:5]], dtype=str)
         self.assert_series_equal(result, expected)
 
     def test_astype_string(self, data):
+        # GH-33465
         result = pd.Series(data[:5]).astype("string")
-        expected = pd.Series(data[:5].astype("string"))
+        expected = pd.Series([str(x) for x in data[:5]], dtype="string")
         self.assert_series_equal(result, expected)
 
     def test_to_numpy(self, data):

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -188,16 +188,15 @@ def test_combine_le(self, data_repeated):
         orig_data1, orig_data2 = data_repeated(2)
         s1 = pd.Series(orig_data1)
         s2 = pd.Series(orig_data2)
-        result = s1.combine(s2, lambda x1, x2: x1 <= x2, dtype="boolean")
+        result = s1.combine(s2, lambda x1, x2: x1 <= x2)
         expected = pd.Series(
-            [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
-            dtype="boolean",
+            [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))]
         )
         self.assert_series_equal(result, expected)
 
         val = s1.iloc[0]
-        result = s1.combine(val, lambda x1, x2: x1 <= x2, dtype="boolean")
-        expected = pd.Series([a <= val for a in list(orig_data1)], dtype="boolean")
+        result = s1.combine(val, lambda x1, x2: x1 <= x2)
+        expected = pd.Series([a <= val for a in list(orig_data1)])
         self.assert_series_equal(result, expected)
 
     def test_combine_add(self, data_repeated):

diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
@@ -343,6 +343,15 @@ def test_astype_object_frame(self, all_data):
         # comp = result.dtypes.equals(df.dtypes)
         # assert not comp.any()
 
+    @pytest.mark.xfail(raises=AssertionError, reason="no sparse str dtype")
+    def test_astype_str(self, data):
+        # Sparse arrays don't support str dtype
+        super().test_astype_str(data)
+
+    @pytest.mark.xfail(raises=AssertionError, reason="no sparse StringDtype")
+    def test_astype_string(self, data):
+        super().test_astype_string(data)
+
 
 class TestArithmeticOps(BaseSparseTests, base.BaseArithmeticOpsTests):
     series_scalar_exc = None