diff --git a/ci/code_checks.sh b/ci/code_checks.sh index da878d3343233..d30785d675788 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -288,26 +288,17 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests interval classes' ; echo $MSG - pytest -q --doctest-modules \ - pandas/core/indexes/interval.py \ - pandas/core/arrays/interval.py + pytest -q --doctest-modules pandas/core/indexes/interval.py RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests arrays'; echo $MSG - pytest -q --doctest-modules \ - pandas/core/arrays/string_.py \ - pandas/core/arrays/integer.py \ - pandas/core/arrays/boolean.py + pytest -q --doctest-modules pandas/core/arrays/ RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests dtypes'; echo $MSG pytest -q --doctest-modules pandas/core/dtypes/ RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests arrays/boolean.py' ; echo $MSG - pytest -q --doctest-modules pandas/core/arrays/boolean.py - RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests base.py' ; echo $MSG pytest -q --doctest-modules pandas/core/base.py RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index af897e86a14d4..6cb597ba75852 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1161,7 +1161,7 @@ def _create_method(cls, op, coerce_to_dtype=True): -------- Given an ExtensionArray subclass called MyExtensionArray, use - >>> __add__ = cls._create_method(operator.add) + __add__ = cls._create_method(operator.add) in the class definition of MyExtensionArray to create the operator for addition, that will be based on the operator implementation diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index edc138574830d..f283b6fd3b4b3 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1598,19 +1598,19 @@ def sort_values(self, inplace=False, ascending=True, na_position="last"): >>> c = pd.Categorical([np.nan, 2, 2, np.nan, 5]) >>> c - [NaN, 2.0, 2.0, NaN, 5.0] + [NaN, 2, 2, NaN, 5] Categories (2, int64): [2, 5] >>> c.sort_values() - [2.0, 2.0, 5.0, NaN, NaN] + [2, 2, 5, NaN, NaN] Categories (2, int64): [2, 5] >>> c.sort_values(ascending=False) - [5.0, 2.0, 2.0, NaN, NaN] + [5, 2, 2, NaN, NaN] Categories (2, int64): [2, 5] >>> c.sort_values(na_position='first') - [NaN, NaN, 2.0, 2.0, 5.0] + [NaN, NaN, 2, 2, 5] Categories (2, int64): [2, 5] >>> c.sort_values(ascending=False, na_position='first') - [NaN, NaN, 5.0, 2.0, 2.0] + [NaN, NaN, 5, 2, 2] Categories (2, int64): [2, 5] """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -1835,7 +1835,7 @@ def take(self, indexer, allow_fill: bool = False, fill_value=None): >>> cat.take([0, -1, -1], allow_fill=True, fill_value='a') [a, a, a] - Categories (3, object): [a, b] + Categories (2, object): [a, b] Specifying a fill value that's not in ``self.categories`` will raise a ``TypeError``. @@ -2231,33 +2231,32 @@ def unique(self): ------- unique values : ``Categorical`` + See Also + -------- + pandas.unique + CategoricalIndex.unique + Series.unique + Examples -------- An unordered Categorical will return categories in the order of appearance. - >>> pd.Categorical(list('baabc')) + >>> pd.Categorical(list("baabc")).unique() [b, a, c] Categories (3, object): [b, a, c] - >>> pd.Categorical(list('baabc'), categories=list('abc')) + >>> pd.Categorical(list("baabc"), categories=list("abc")).unique() [b, a, c] Categories (3, object): [b, a, c] An ordered Categorical preserves the category ordering. - >>> pd.Categorical(list('baabc'), - ... categories=list('abc'), - ... ordered=True) + >>> pd.Categorical( + ... list("baabc"), categories=list("abc"), ordered=True + ... ).unique() [b, a, c] Categories (3, object): [a < b < c] - - See Also - -------- - unique - CategoricalIndex.unique - Series.unique - """ # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) @@ -2438,7 +2437,7 @@ def replace(self, to_replace, value, inplace: bool = False): -------- >>> s = pd.Categorical([1, 2, 1, 3]) >>> s.replace(1, 3) - [3, 3, 2, 3] + [3, 2, 3, 3] Categories (2, int64): [2, 3] """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -2506,16 +2505,100 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): Examples -------- + >>> s = pd.Series(list("abbccc")).astype("category") + >>> s + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): [a, b, c] + >>> s.cat.categories - >>> s.cat.categories = list('abc') - >>> s.cat.rename_categories(list('cab')) - >>> s.cat.reorder_categories(list('cab')) - >>> s.cat.add_categories(['d','e']) - >>> s.cat.remove_categories(['d']) - >>> s.cat.remove_unused_categories() - >>> s.cat.set_categories(list('abcde')) + Index(['a', 'b', 'c'], dtype='object') + + >>> s.cat.rename_categories(list("cba")) + 0 c + 1 b + 2 b + 3 a + 4 a + 5 a + dtype: category + Categories (3, object): [c, b, a] + + >>> s.cat.reorder_categories(list("cba")) + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): [c, b, a] + + >>> s.cat.add_categories(["d", "e"]) + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (5, object): [a, b, c, d, e] + + >>> s.cat.remove_categories(["a", "c"]) + 0 NaN + 1 b + 2 b + 3 NaN + 4 NaN + 5 NaN + dtype: category + Categories (1, object): [b] + + >>> s1 = s.cat.add_categories(["d", "e"]) + >>> s1.cat.remove_unused_categories() + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): [a, b, c] + + >>> s.cat.set_categories(list("abcde")) + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (5, object): [a, b, c, d, e] + >>> s.cat.as_ordered() + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): [a < b < c] + >>> s.cat.as_unordered() + 0 a + 1 b + 2 b + 3 c + 4 c + 5 c + dtype: category + Categories (3, object): [a, b, c] """ def __init__(self, data): @@ -2603,7 +2686,7 @@ def _recode_for_categories(codes: np.ndarray, old_categories, new_categories): >>> new_cat = pd.Index(['a', 'b']) >>> codes = np.array([0, 1, 1, 2]) >>> _recode_for_categories(codes, old_cat, new_cat) - array([ 1, 0, 0, -1]) + array([ 1, 0, 0, -1], dtype=int8) """ if len(old_categories) == 0: # All null anyway, so just retain the nulls diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index a153b4e06157b..c0bbbebac7c33 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -181,7 +181,7 @@ def _unbox_scalar(self, value: Union[Period, Timestamp, Timedelta, NaTType]) -> Examples -------- - >>> self._unbox_scalar(Timedelta('10s')) # DOCTEST: +SKIP + >>> self._unbox_scalar(Timedelta("10s")) # doctest: +SKIP 10000000000 """ raise AbstractMethodError(self) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e2a13df069ae2..e6a17491e9378 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -922,9 +922,10 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): ... '2018-10-28 02:36:00', ... '2018-10-28 03:46:00'])) >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) - 0 2015-03-29 03:00:00+02:00 - 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, Europe/Warsaw] + 0 2018-10-28 01:20:00+02:00 + 1 2018-10-28 02:36:00+02:00 + 2 2018-10-28 03:46:00+01:00 + dtype: datetime64[ns, CET] If the DST transition causes nonexistent times, you can shift these dates forward or backwards with a timedelta object or `'shift_forward'` @@ -935,15 +936,17 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') 0 2015-03-29 03:00:00+02:00 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, 'Europe/Warsaw'] + dtype: datetime64[ns, Europe/Warsaw] + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') 0 2015-03-29 01:59:59.999999999+01:00 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, 'Europe/Warsaw'] + dtype: datetime64[ns, Europe/Warsaw] + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) 0 2015-03-29 03:30:00+02:00 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, 'Europe/Warsaw'] + dtype: datetime64[ns, Europe/Warsaw] """ nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") if nonexistent not in nonexistent_options and not isinstance( @@ -1604,9 +1607,9 @@ def date(self): DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'], dtype='datetime64[ns]', freq='A-DEC') >>> idx.is_leap_year - array([ True, False, False], dtype=bool) + array([ True, False, False]) - >>> dates = pd.Series(idx) + >>> dates_series = pd.Series(idx) >>> dates_series 0 2012-12-31 1 2013-12-31 diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index cf6c16d4cad5d..d23d26d870f75 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -94,7 +94,7 @@ def to_numpy( >>> a = pd.array([True, False, pd.NA], dtype="boolean") >>> a.to_numpy() - array([True, False, NA], dtype=object) + array([True, False, ], dtype=object) When no missing values are present, an equivalent dtype can be used. @@ -110,7 +110,7 @@ def to_numpy( >>> a = pd.array([True, False, pd.NA], dtype="boolean") >>> a - [True, False, NA] + [True, False, ] Length: 3, dtype: boolean >>> a.to_numpy(dtype="bool") diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index be9cc53d33d6f..d9bd567f88845 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -818,6 +818,7 @@ def period_array( Integers that look like years are handled >>> period_array([2000, 2001, 2002], freq='D') + ['2000-01-01', '2001-01-01', '2002-01-01'] Length: 3, dtype: period[D] diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index 787407060c7f1..8a30d2b954b55 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -67,24 +67,25 @@ def from_coo(cls, A, dense_index=False): Examples -------- >>> from scipy import sparse - >>> A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), - shape=(3, 4)) + + >>> A = sparse.coo_matrix( + ... ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4) + ... ) >>> A <3x4 sparse matrix of type '' - with 3 stored elements in COOrdinate format> + with 3 stored elements in COOrdinate format> + >>> A.todense() - matrix([[ 0., 0., 1., 2.], - [ 3., 0., 0., 0.], - [ 0., 0., 0., 0.]]) + matrix([[0., 0., 1., 2.], + [3., 0., 0., 0.], + [0., 0., 0., 0.]]) + >>> ss = pd.Series.sparse.from_coo(A) >>> ss - 0 2 1 - 3 2 - 1 0 3 - dtype: float64 - BlockIndex - Block locations: array([0], dtype=int32) - Block lengths: array([3], dtype=int32) + 0 2 1.0 + 3 2.0 + 1 0 3.0 + dtype: Sparse[float64, nan] """ from pandas.core.arrays.sparse.scipy_sparse import _coo_to_sparse_series from pandas import Series @@ -119,24 +120,49 @@ def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False): Examples -------- >>> s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) - >>> s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0), - (1, 2, 'a', 1), - (1, 1, 'b', 0), - (1, 1, 'b', 1), - (2, 1, 'b', 0), - (2, 1, 'b', 1)], - names=['A', 'B', 'C', 'D']) + >>> s.index = pd.MultiIndex.from_tuples( + ... [ + ... (1, 2, "a", 0), + ... (1, 2, "a", 1), + ... (1, 1, "b", 0), + ... (1, 1, "b", 1), + ... (2, 1, "b", 0), + ... (2, 1, "b", 1) + ... ], + ... names=["A", "B", "C", "D"], + ... ) + >>> s + A B C D + 1 2 a 0 3.0 + 1 NaN + 1 b 0 1.0 + 1 3.0 + 2 1 b 0 NaN + 1 NaN + dtype: float64 + >>> ss = s.astype("Sparse") - >>> A, rows, columns = ss.sparse.to_coo(row_levels=['A', 'B'], - ... column_levels=['C', 'D'], - ... sort_labels=True) + >>> ss + A B C D + 1 2 a 0 3.0 + 1 NaN + 1 b 0 1.0 + 1 3.0 + 2 1 b 0 NaN + 1 NaN + dtype: Sparse[float64, nan] + + >>> A, rows, columns = ss.sparse.to_coo( + ... row_levels=["A", "B"], column_levels=["C", "D"], sort_labels=True + ... ) >>> A <3x4 sparse matrix of type '' - with 3 stored elements in COOrdinate format> + with 3 stored elements in COOrdinate format> >>> A.todense() - matrix([[ 0., 0., 1., 3.], - [ 3., 0., 0., 0.], - [ 0., 0., 0., 0.]]) + matrix([[0., 0., 1., 3.], + [3., 0., 0., 0.], + [0., 0., 0., 0.]]) + >>> rows [(1, 1), (1, 2), (2, 1)] >>> columns diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 8c09aa9176f31..a98875ace09aa 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1048,7 +1048,7 @@ def astype(self, dtype=None, copy=True): Examples -------- - >>> arr = SparseArray([0, 0, 1, 2]) + >>> arr = pd.arrays.SparseArray([0, 0, 1, 2]) >>> arr [0, 0, 1, 2] Fill: 0 @@ -1066,8 +1066,8 @@ def astype(self, dtype=None, copy=True): >>> arr.astype(np.dtype('float64')) ... # doctest: +NORMALIZE_WHITESPACE - [0, 0, 1.0, 2.0] - Fill: 0 + [0.0, 0.0, 1.0, 2.0] + Fill: 0.0 IntIndex Indices: array([2, 3], dtype=int32) @@ -1107,19 +1107,19 @@ def map(self, mapper): Examples -------- >>> arr = pd.arrays.SparseArray([0, 1, 2]) - >>> arr.apply(lambda x: x + 10) + >>> arr.map(lambda x: x + 10) [10, 11, 12] Fill: 10 IntIndex Indices: array([1, 2], dtype=int32) - >>> arr.apply({0: 10, 1: 11, 2: 12}) + >>> arr.map({0: 10, 1: 11, 2: 12}) [10, 11, 12] Fill: 10 IntIndex Indices: array([1, 2], dtype=int32) - >>> arr.apply(pd.Series([10, 11, 12], index=[0, 1, 2])) + >>> arr.map(pd.Series([10, 11, 12], index=[0, 1, 2])) [10, 11, 12] Fill: 10 IntIndex diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 135514e334920..afa11586fda04 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -347,7 +347,7 @@ def _subtype_with_str(self): dtype('O') >>> dtype._subtype_with_str - str + """ if isinstance(self.fill_value, str): return type(self.fill_value)