Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 committed May 23, 2020
1 parent 44abe87 commit 9bcb6a8
Show file tree
Hide file tree
Showing 9 changed files with 33 additions and 27 deletions.
1 change: 0 additions & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,6 @@ Other enhancements
- :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`)
- :meth:`MultiIndex.union` will now raise `RuntimeWarning` if the object inside are unsortable, pass `sort=False` to suppress this warning (:issue:`33015`)
- :class:`Series.dt` and :class:`DatatimeIndex` now have an `isocalendar` method that returns a :class:`DataFrame` with year, week, and day calculated according to the ISO 8601 calendar (:issue:`33206`).
- :meth:`Series.combine` has gained a ``dtype`` argument. If supplied, the combined series will get that dtype (:issue:`33465`)
- The :meth:`DataFrame.to_feather` method now supports additional keyword
arguments (e.g. to set the compression) that are added in pyarrow 0.17
(:issue:`33422`).
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
is_datetime64tz_dtype,
is_datetime_or_timedelta_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_float_dtype,
is_integer_dtype,
is_list_like,
Expand Down Expand Up @@ -619,7 +620,11 @@ def astype(self, dtype, copy=True):
if is_object_dtype(dtype):
return self._box_values(self.asi8.ravel()).reshape(self.shape)
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return self._format_native_types()
if is_extension_array_dtype(dtype):
arr_cls = dtype.construct_array_type()
return arr_cls._from_sequence(self, dtype=dtype)
else:
return self._format_native_types()
elif is_integer_dtype(dtype):
# we deliberately ignore int32 vs. int64 here.
# See https://github.com/pandas-dev/pandas/issues/24381 for more.
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numbers
from typing import TYPE_CHECKING, List, Optional, Dict, Tuple, Type, Union
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union
import warnings

import numpy as np
Expand Down
1 change: 0 additions & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,6 @@ def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs):
actually format my specific types
"""
values = self.astype(object)

if date_format:
formatter = lambda dt: dt.strftime(date_format)
else:
Expand Down
13 changes: 6 additions & 7 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,6 @@ class StringArray(PandasArray):
.. versionadded:: 1.0.0
.. versionchanged:: 1.1.0
``StringArray`` allow non-string input values, but will always convert the
values to strings. (Before Pandas 1.1 non-string values were not allowed).
.. warning::
StringArray is considered experimental. The implementation and
Expand Down Expand Up @@ -157,9 +152,13 @@ class StringArray(PandasArray):
['This is', 'some text', <NA>, 'data.']
Length: 4, dtype: string
Like ``object`` dtype arrays instantiated with ``dtype="str"``, ``StringArray``
allows non-string values but will always convert the values to strings.
Unlike arrays instantiated with ``dtype="object"``, ``StringArray``
will convert the values to strings.
>>> pd.array(['1', 1], dtype="object")
<PandasArray>
['1', 1]
Length: 2, dtype: object
>>> pd.array(['1', 1], dtype="string")
<StringArray>
['1', '1']
Expand Down
13 changes: 4 additions & 9 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2695,11 +2695,6 @@ def combine(self, other, func, fill_value=None, dtype=None) -> "Series":
The value to assume when an index is missing from
one Series or the other. The default specifies to use the
appropriate NaN value for the underlying dtype of the Series.
dtype : str, numpy.dtype, or ExtensionDtype, optional
Data type for the output Series. If not specified, this will be
inferred from the combined data.
.. versionadded:: 1.1.0
Returns
-------
Expand Down Expand Up @@ -2770,13 +2765,13 @@ def combine(self, other, func, fill_value=None, dtype=None) -> "Series":
new_values = [func(lv, other) for lv in self._values]
new_name = self.name

if dtype is not None:
return self._constructor(
new_values, index=new_index, name=new_name, dtype=dtype
)
if is_categorical_dtype(self.dtype):
pass
elif is_extension_array_dtype(self.dtype):
# Everything can be be converted to strings, but we may not want to convert
if self.dtype == "string" and lib.infer_dtype(new_values) != "string":
return self._constructor(new_values, index=new_index, name=new_name)

# TODO: can we do this for only SparseDtype?
# The function can return something of any type, so check
# if the type is compatible with the calling EA.
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/extension/base/casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@ def test_tolist(self, data):

def test_astype_str(self, data):
result = pd.Series(data[:5]).astype(str)
expected = pd.Series(data[:5].astype(str))
expected = pd.Series([str(x) for x in data[:5]], dtype=str)
self.assert_series_equal(result, expected)

def test_astype_string(self, data):
# GH-33465
result = pd.Series(data[:5]).astype("string")
expected = pd.Series(data[:5].astype("string"))
expected = pd.Series([str(x) for x in data[:5]], dtype="string")
self.assert_series_equal(result, expected)

def test_to_numpy(self, data):
Expand Down
9 changes: 4 additions & 5 deletions pandas/tests/extension/base/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,16 +188,15 @@ def test_combine_le(self, data_repeated):
orig_data1, orig_data2 = data_repeated(2)
s1 = pd.Series(orig_data1)
s2 = pd.Series(orig_data2)
result = s1.combine(s2, lambda x1, x2: x1 <= x2, dtype="boolean")
result = s1.combine(s2, lambda x1, x2: x1 <= x2)
expected = pd.Series(
[a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))],
dtype="boolean",
[a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))]
)
self.assert_series_equal(result, expected)

val = s1.iloc[0]
result = s1.combine(val, lambda x1, x2: x1 <= x2, dtype="boolean")
expected = pd.Series([a <= val for a in list(orig_data1)], dtype="boolean")
result = s1.combine(val, lambda x1, x2: x1 <= x2)
expected = pd.Series([a <= val for a in list(orig_data1)])
self.assert_series_equal(result, expected)

def test_combine_add(self, data_repeated):
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/extension/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,15 @@ def test_astype_object_frame(self, all_data):
# comp = result.dtypes.equals(df.dtypes)
# assert not comp.any()

@pytest.mark.xfail(raises=AssertionError, reason="no sparse str dtype")
def test_astype_str(self, data):
# Sparse arrays don't support str dtype
super().test_astype_str(data)

@pytest.mark.xfail(raises=AssertionError, reason="no sparse StringDtype")
def test_astype_string(self, data):
super().test_astype_string(data)


class TestArithmeticOps(BaseSparseTests, base.BaseArithmeticOpsTests):
series_scalar_exc = None
Expand Down

0 comments on commit 9bcb6a8

Please sign in to comment.