From 3b0824e92e9932588f5d0e58e1b0aa59df2e76fa Mon Sep 17 00:00:00 2001 From: shriyakalakata <87483933+shriyakalakata@users.noreply.github.com> Date: Mon, 22 Apr 2024 12:29:47 -0400 Subject: [PATCH 01/22] DOC: Fix SA01 errors for Index.hasnans, Index.map, Index.nbytes (#58343) * Shorten sentence length * Remove Series.nbytes from ci/code_checks.sh * Update see also method names * Update see also method names * Update see also methods for Index.map * Update method descriptions --- ci/code_checks.sh | 4 ---- pandas/core/base.py | 5 +++++ pandas/core/indexes/base.py | 10 ++++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index cabc25b5e0ba5d..4debc2eb914493 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -156,18 +156,15 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Index.get_indexer_non_unique PR07,SA01" \ -i "pandas.Index.get_loc PR07,RT03,SA01" \ -i "pandas.Index.get_slice_bound PR07" \ - -i "pandas.Index.hasnans SA01" \ -i "pandas.Index.identical PR01,SA01" \ -i "pandas.Index.inferred_type SA01" \ -i "pandas.Index.insert PR07,RT03,SA01" \ -i "pandas.Index.intersection PR07,RT03,SA01" \ -i "pandas.Index.item SA01" \ -i "pandas.Index.join PR07,RT03,SA01" \ - -i "pandas.Index.map SA01" \ -i "pandas.Index.memory_usage RT03" \ -i "pandas.Index.name SA01" \ -i "pandas.Index.names GL08" \ - -i "pandas.Index.nbytes SA01" \ -i "pandas.Index.nunique RT03" \ -i "pandas.Index.putmask PR01,RT03" \ -i "pandas.Index.ravel PR01,RT03" \ @@ -344,7 +341,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.mod PR07" \ -i "pandas.Series.mode SA01" \ -i "pandas.Series.mul PR07" \ - -i "pandas.Series.nbytes SA01" \ -i "pandas.Series.ne PR07,SA01" \ -i "pandas.Series.nunique RT03" \ -i "pandas.Series.pad PR01,SA01" \ diff --git a/pandas/core/base.py b/pandas/core/base.py index 9b1251a4ef5d8c..424f0609dd485c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -419,6 +419,11 @@ def nbytes(self) -> int: """ Return the number of bytes in the underlying data. + See Also + -------- + Series.ndim : Number of dimensions of the underlying data. + Series.size : Return the number of elements in the underlying data. + Examples -------- For Series: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8ede401f37184c..d1d1c5ea3171f9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2423,6 +2423,12 @@ def hasnans(self) -> bool: ------- bool + See Also + -------- + Index.isna : Detect missing values. + Index.dropna : Return Index without NA/NaN values. + Index.fillna : Fill NA/NaN values with the specified value. + Examples -------- >>> s = pd.Series([1, 2, 3], index=["a", "b", None]) @@ -6067,6 +6073,10 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): If the function returns a tuple with more than one element a MultiIndex will be returned. + See Also + -------- + Index.where : Replace values where the condition is False. + Examples -------- >>> idx = pd.Index([1, 2, 3]) From 2768a22d3b6bb70029f406968bc366faf2c7267f Mon Sep 17 00:00:00 2001 From: gboeker <68177766+gboeker@users.noreply.github.com> Date: Mon, 22 Apr 2024 12:31:05 -0400 Subject: [PATCH 02/22] DOC: Enforce Numpy Docstring Validation for DatetimeIndex (#58353) * fix line too long * add return for snap * undo return * fix docstring issues for DatetimeIndex.quarter * remove pandas.Series.dt.quarter from codechecks * fix docstring issues for DatetimeIndex.round * fix docstring issues for DatetimeIndex.time * fix docstring issues for DatetimeIndex.timetz * add see also for timetz * fix code check errors * delete round from code_checks * fix code check errors --- ci/code_checks.sh | 20 ++++---------------- pandas/core/arrays/datetimelike.py | 5 +++++ pandas/core/arrays/datetimes.py | 20 ++++++++++++++++++++ pandas/core/indexes/datetimes.py | 7 +++++++ 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 4debc2eb914493..443fa4b4005d30 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -107,22 +107,16 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DataFrame.to_markdown SA01" \ -i "pandas.DataFrame.to_parquet RT03" \ -i "pandas.DataFrame.var PR01,RT03,SA01" \ - -i "pandas.DatetimeIndex.ceil SA01" \ -i "pandas.DatetimeIndex.date SA01" \ -i "pandas.DatetimeIndex.day_of_year SA01" \ -i "pandas.DatetimeIndex.dayofyear SA01" \ - -i "pandas.DatetimeIndex.floor SA01" \ -i "pandas.DatetimeIndex.freqstr SA01" \ -i "pandas.DatetimeIndex.indexer_at_time PR01,RT03" \ -i "pandas.DatetimeIndex.indexer_between_time RT03" \ -i "pandas.DatetimeIndex.inferred_freq SA01" \ -i "pandas.DatetimeIndex.is_leap_year SA01" \ - -i "pandas.DatetimeIndex.quarter SA01" \ - -i "pandas.DatetimeIndex.round SA01" \ - -i "pandas.DatetimeIndex.snap PR01,RT03,SA01" \ + -i "pandas.DatetimeIndex.snap PR01,RT03" \ -i "pandas.DatetimeIndex.std PR01,RT03" \ - -i "pandas.DatetimeIndex.time SA01" \ - -i "pandas.DatetimeIndex.timetz SA01" \ -i "pandas.DatetimeIndex.to_period RT03" \ -i "pandas.DatetimeIndex.to_pydatetime RT03,SA01" \ -i "pandas.DatetimeIndex.tz SA01" \ @@ -286,7 +280,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.div PR07" \ -i "pandas.Series.droplevel SA01" \ -i "pandas.Series.dt.as_unit PR01,PR02" \ - -i "pandas.Series.dt.ceil PR01,PR02,SA01" \ + -i "pandas.Series.dt.ceil PR01,PR02" \ -i "pandas.Series.dt.components SA01" \ -i "pandas.Series.dt.date SA01" \ -i "pandas.Series.dt.day_name PR01,PR02" \ @@ -295,20 +289,17 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.dt.days SA01" \ -i "pandas.Series.dt.days_in_month SA01" \ -i "pandas.Series.dt.daysinmonth SA01" \ - -i "pandas.Series.dt.floor PR01,PR02,SA01" \ + -i "pandas.Series.dt.floor PR01,PR02" \ -i "pandas.Series.dt.freq GL08" \ -i "pandas.Series.dt.is_leap_year SA01" \ -i "pandas.Series.dt.microseconds SA01" \ -i "pandas.Series.dt.month_name PR01,PR02" \ -i "pandas.Series.dt.nanoseconds SA01" \ -i "pandas.Series.dt.normalize PR01" \ - -i "pandas.Series.dt.quarter SA01" \ -i "pandas.Series.dt.qyear GL08" \ - -i "pandas.Series.dt.round PR01,PR02,SA01" \ + -i "pandas.Series.dt.round PR01,PR02" \ -i "pandas.Series.dt.seconds SA01" \ -i "pandas.Series.dt.strftime PR01,PR02" \ - -i "pandas.Series.dt.time SA01" \ - -i "pandas.Series.dt.timetz SA01" \ -i "pandas.Series.dt.to_period PR01,PR02,RT03" \ -i "pandas.Series.dt.total_seconds PR01" \ -i "pandas.Series.dt.tz SA01" \ @@ -428,14 +419,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Timedelta.total_seconds SA01" \ -i "pandas.Timedelta.view SA01" \ -i "pandas.TimedeltaIndex.as_unit RT03,SA01" \ - -i "pandas.TimedeltaIndex.ceil SA01" \ -i "pandas.TimedeltaIndex.components SA01" \ -i "pandas.TimedeltaIndex.days SA01" \ - -i "pandas.TimedeltaIndex.floor SA01" \ -i "pandas.TimedeltaIndex.inferred_freq SA01" \ -i "pandas.TimedeltaIndex.microseconds SA01" \ -i "pandas.TimedeltaIndex.nanoseconds SA01" \ - -i "pandas.TimedeltaIndex.round SA01" \ -i "pandas.TimedeltaIndex.seconds SA01" \ -i "pandas.TimedeltaIndex.to_pytimedelta RT03,SA01" \ -i "pandas.Timestamp PR07,SA01" \ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8ada9d88e08bc0..974289160b1458 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1825,6 +1825,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: ------ ValueError if the `freq` cannot be converted. + See Also + -------- + DatetimeIndex.floor : Perform floor operation on the data to the specified `freq`. + DatetimeIndex.snap : Snap time stamps to nearest occurring frequency. + Notes ----- If the timestamps have a timezone, {op}ing will take place relative to the diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 7704c99141fc20..fb9f047d432a12 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1391,6 +1391,14 @@ def time(self) -> npt.NDArray[np.object_]: The time part of the Timestamps. + See Also + -------- + DatetimeIndex.timetz : Returns numpy array of :class:`datetime.time` + objects with timezones. The time part of the Timestamps. + DatetimeIndex.date : Returns numpy array of python :class:`datetime.date` + objects. Namely, the date part of Timestamps without time and timezone + information. + Examples -------- For Series: @@ -1428,6 +1436,12 @@ def timetz(self) -> npt.NDArray[np.object_]: The time part of the Timestamps. + See Also + -------- + DatetimeIndex.time : Returns numpy array of :class:`datetime.time` objects. + The time part of the Timestamps. + DatetimeIndex.tz : Return the timezone. + Examples -------- For Series: @@ -1836,6 +1850,12 @@ def isocalendar(self) -> DataFrame: """ The quarter of the date. + See Also + -------- + DatetimeIndex.snap : Snap time stamps to nearest occurring frequency. + DatetimeIndex.time : Returns numpy array of datetime.time objects. + The time part of the Timestamps. + Examples -------- For Series: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index cefdc14145d1f7..7122de745e13b7 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -455,6 +455,13 @@ def snap(self, freq: Frequency = "S") -> DatetimeIndex: ------- DatetimeIndex + See Also + -------- + DatetimeIndex.round : Perform round operation on the data to the + specified `freq`. + DatetimeIndex.floor : Perform floor operation on the data to the + specified `freq`. + Examples -------- >>> idx = pd.DatetimeIndex( From 09c7201d6db4be265aafda8feb7577c02145f2eb Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Mon, 22 Apr 2024 22:34:44 +0530 Subject: [PATCH 03/22] DOC: Enforce Numpy Docstring Validation for pandas.HDFStore.info (#58368) * DOC: add return description and see also section to pandas.HDFStore.info * DOC: add 2 df in the examples for pandas.HDFStore.info * DOC: remove pandas.HDFStore.info --- ci/code_checks.sh | 1 - pandas/io/pytables.py | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 443fa4b4005d30..fdcbcbe31c47fc 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -125,7 +125,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DatetimeTZDtype.tz SA01" \ -i "pandas.DatetimeTZDtype.unit SA01" \ -i "pandas.Grouper PR02" \ - -i "pandas.HDFStore.info RT03,SA01" \ -i "pandas.HDFStore.keys SA01" \ -i "pandas.HDFStore.put PR01,SA01" \ -i "pandas.HDFStore.select SA01" \ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d7fc71d037f2df..89c6ac9a583823 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1688,17 +1688,26 @@ def info(self) -> str: Returns ------- str + A String containing the python pandas class name, filepath to the HDF5 + file and all the object keys along with their respective dataframe shapes. + + See Also + -------- + HDFStore.get_storer : Returns the storer object for a key. Examples -------- - >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=["C", "D"]) >>> store = pd.HDFStore("store.h5", "w") # doctest: +SKIP - >>> store.put("data", df) # doctest: +SKIP + >>> store.put("data1", df1) # doctest: +SKIP + >>> store.put("data2", df2) # doctest: +SKIP >>> print(store.info()) # doctest: +SKIP >>> store.close() # doctest: +SKIP File path: store.h5 - /data frame (shape->[2,2]) + /data1 frame (shape->[2,2]) + /data2 frame (shape->[2,2]) """ path = pprint_thing(self._path) output = f"{type(self)}\nFile path: {path}\n" From 5db3196e8a5779a2548ba5f48ed8f4ebfb2cf31b Mon Sep 17 00:00:00 2001 From: gboeker <68177766+gboeker@users.noreply.github.com> Date: Mon, 22 Apr 2024 13:16:42 -0400 Subject: [PATCH 04/22] DOC: Fix SA01 Docstring Errors for DataFrame (#58364) * DataFrame.__iter__ fix SA01 * add See Also for DataFrame.column * DataFrame.droplevel SA01 fixed * remove pandas.Series.droplevel from code_checks.sh --- ci/code_checks.sh | 4 ---- pandas/core/frame.py | 4 ++++ pandas/core/generic.py | 11 +++++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index fdcbcbe31c47fc..d2ba06902096e0 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -80,10 +80,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.CategoricalIndex.codes SA01" \ -i "pandas.CategoricalIndex.ordered SA01" \ -i "pandas.DataFrame.__dataframe__ SA01" \ - -i "pandas.DataFrame.__iter__ SA01" \ -i "pandas.DataFrame.at_time PR01" \ - -i "pandas.DataFrame.columns SA01" \ - -i "pandas.DataFrame.droplevel SA01" \ -i "pandas.DataFrame.hist RT03" \ -i "pandas.DataFrame.infer_objects RT03" \ -i "pandas.DataFrame.kurt RT03,SA01" \ @@ -277,7 +274,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.cat.reorder_categories PR01,PR02" \ -i "pandas.Series.cat.set_categories PR01,PR02" \ -i "pandas.Series.div PR07" \ - -i "pandas.Series.droplevel SA01" \ -i "pandas.Series.dt.as_unit PR01,PR02" \ -i "pandas.Series.dt.ceil PR01,PR02" \ -i "pandas.Series.dt.components SA01" \ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0185ca82416176..50dc514e7181f6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12893,6 +12893,10 @@ def isin_(x): """ The column labels of the DataFrame. + See Also + -------- + DataFrame.index: The index (row labels) of the DataFrame. + Examples -------- >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index dbe20066424841..a7f155ec93524d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -783,6 +783,12 @@ def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: {klass} {klass} with requested index / column level(s) removed. + See Also + -------- + DataFrame.replace : Replace values given in `to_replace` with `value`. + DataFrame.pivot : Return reshaped DataFrame organized by given + index / column values. + Examples -------- >>> df = ( @@ -1862,6 +1868,11 @@ def __iter__(self) -> Iterator: iterator Info axis as iterator. + See Also + -------- + DataFrame.items : Iterate over (column name, Series) pairs. + DataFrame.itertuples : Iterate over DataFrame rows as namedtuples. + Examples -------- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) From f1297fae4561c1cdf1c0eab1ec6fa2247ef73f07 Mon Sep 17 00:00:00 2001 From: William Andrea <22385371+wjandrea@users.noreply.github.com> Date: Mon, 22 Apr 2024 13:30:27 -0400 Subject: [PATCH 05/22] More idiomatic example code in BaseIndexer (#58356) No need to loop when NumPy supports range and array addition --- pandas/core/indexers/objects.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index 2e6bcda520aba0..d108f840a1b4fc 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -53,11 +53,8 @@ class BaseIndexer: >>> from pandas.api.indexers import BaseIndexer >>> class CustomIndexer(BaseIndexer): ... def get_window_bounds(self, num_values, min_periods, center, closed, step): - ... start = np.empty(num_values, dtype=np.int64) - ... end = np.empty(num_values, dtype=np.int64) - ... for i in range(num_values): - ... start[i] = i - ... end[i] = i + self.window_size + ... start = np.arange(num_values, dtype=np.int64) + ... end = np.arange(num_values, dtype=np.int64) + self.window_size ... return start, end >>> df = pd.DataFrame({"values": range(5)}) >>> indexer = CustomIndexer(window_size=2) From e714aca6f2ed594c95a9681dac3d4858f23552a2 Mon Sep 17 00:00:00 2001 From: KeiOshima Date: Mon, 22 Apr 2024 13:43:48 -0400 Subject: [PATCH 06/22] DOC: fixing SA01 errors for Index: name, dtype, and equals (#58355) * DOC: fixing SA01 errors for Index: name, dtype, and equals * fixing Blank line contains whitespace error --- ci/code_checks.sh | 3 --- pandas/core/indexes/base.py | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index d2ba06902096e0..d595162fd84e92 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -136,10 +136,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Index.drop_duplicates RT03" \ -i "pandas.Index.droplevel RT03,SA01" \ -i "pandas.Index.dropna RT03,SA01" \ - -i "pandas.Index.dtype SA01" \ -i "pandas.Index.duplicated RT03" \ -i "pandas.Index.empty GL08" \ - -i "pandas.Index.equals SA01" \ -i "pandas.Index.fillna RT03" \ -i "pandas.Index.get_indexer PR07,SA01" \ -i "pandas.Index.get_indexer_for PR01,SA01" \ @@ -153,7 +151,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Index.item SA01" \ -i "pandas.Index.join PR07,RT03,SA01" \ -i "pandas.Index.memory_usage RT03" \ - -i "pandas.Index.name SA01" \ -i "pandas.Index.names GL08" \ -i "pandas.Index.nunique RT03" \ -i "pandas.Index.putmask PR01,RT03" \ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d1d1c5ea3171f9..424126132656ca 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -976,6 +976,10 @@ def dtype(self) -> DtypeObj: """ Return the dtype object of the underlying data. + See Also + -------- + Index.inferred_type: Return a string of the type inferred from the values. + Examples -------- >>> idx = pd.Index([1, 2, 3]) @@ -1638,6 +1642,11 @@ def name(self) -> Hashable: """ Return Index or MultiIndex name. + See Also + -------- + Index.set_names: Able to set new names partially and by level. + Index.rename: Able to set new names partially and by level. + Examples -------- >>> idx = pd.Index([1, 2, 3], name="x") @@ -5181,6 +5190,12 @@ def equals(self, other: Any) -> bool: True if "other" is an Index and it has the same elements and order as the calling index; False otherwise. + See Also + -------- + Index.identical: Checks that object attributes and types are also equal. + Index.has_duplicates: Check if the Index has duplicate values. + Index.is_unique: Return if the index has unique values. + Examples -------- >>> idx1 = pd.Index([1, 2, 3]) From 22e524799de6189e93e5d4f1907f3e6ea282a28a Mon Sep 17 00:00:00 2001 From: Nrezhang <102526155+Nrezhang@users.noreply.github.com> Date: Mon, 22 Apr 2024 13:45:18 -0400 Subject: [PATCH 07/22] DOC: Fix SA01 errors for pandas.Index.astype (#58352) * pandas.Index.astype * check fixes * series to index --- ci/code_checks.sh | 1 - pandas/core/indexes/base.py | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index d595162fd84e92..f03ea658660316 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -129,7 +129,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Index PR07" \ -i "pandas.Index.T SA01" \ -i "pandas.Index.append PR07,RT03,SA01" \ - -i "pandas.Index.astype SA01" \ -i "pandas.Index.copy PR07,SA01" \ -i "pandas.Index.difference PR07,RT03,SA01" \ -i "pandas.Index.drop PR07,SA01" \ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 424126132656ca..63facb61ed4984 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1060,6 +1060,12 @@ def astype(self, dtype, copy: bool = True): Index Index with values cast to specified dtype. + See Also + -------- + Index.dtype: Return the dtype object of the underlying data. + Index.dtypes: Return the dtype object of the underlying data. + Index.convert_dtypes: Convert columns to the best possible dtypes. + Examples -------- >>> idx = pd.Index([1, 2, 3]) From 3461db5656b2ea2b90368f521c63fbcccb48d68d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 22 Apr 2024 07:56:29 -1000 Subject: [PATCH 08/22] CLN: Use more memoryviews (#58330) * Add memoryviews in reshape.pyx * Use more const memoryviews --- pandas/_libs/lib.pyx | 6 +++--- pandas/_libs/reshape.pyx | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 7aa1cb715521eb..24afbe3a07bf12 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -477,7 +477,7 @@ def has_infs(const floating[:] arr) -> bool: @cython.boundscheck(False) @cython.wraparound(False) -def has_only_ints_or_nan(floating[:] arr) -> bool: +def has_only_ints_or_nan(const floating[:] arr) -> bool: cdef: floating val intp_t i @@ -631,7 +631,7 @@ ctypedef fused int6432_t: @cython.wraparound(False) @cython.boundscheck(False) -def is_range_indexer(ndarray[int6432_t, ndim=1] left, Py_ssize_t n) -> bool: +def is_range_indexer(const int6432_t[:] left, Py_ssize_t n) -> bool: """ Perform an element by element comparison on 1-d integer arrays, meant for indexer comparisons @@ -652,7 +652,7 @@ def is_range_indexer(ndarray[int6432_t, ndim=1] left, Py_ssize_t n) -> bool: @cython.wraparound(False) @cython.boundscheck(False) -def is_sequence_range(ndarray[int6432_t, ndim=1] sequence, int64_t step) -> bool: +def is_sequence_range(const int6432_t[:] sequence, int64_t step) -> bool: """ Check if sequence is equivalent to a range with the specified step. """ diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index 21d1405328da66..28ea06739e0c8d 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -19,7 +19,7 @@ from pandas._libs.lib cimport c_is_list_like @cython.wraparound(False) @cython.boundscheck(False) -def unstack(numeric_object_t[:, :] values, const uint8_t[:] mask, +def unstack(const numeric_object_t[:, :] values, const uint8_t[:] mask, Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width, numeric_object_t[:, :] new_values, uint8_t[:, :] new_mask) -> None: """ @@ -80,7 +80,7 @@ def unstack(numeric_object_t[:, :] values, const uint8_t[:] mask, @cython.wraparound(False) @cython.boundscheck(False) -def explode(ndarray[object] values): +def explode(object[:] values): """ transform array list-likes to long form preserve non-list entries From 454e2e1d9d7b118953ecfb4edc6f9fe7f5cb07b8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 22 Apr 2024 08:01:24 -1000 Subject: [PATCH 09/22] CLN: Use generators when objects are re-iterated over in core/internals (#58319) * Make _split generator * More iterators * Remove typing --- pandas/core/frame.py | 4 +-- pandas/core/internals/blocks.py | 23 ++++++------- pandas/core/internals/managers.py | 41 +++++++++--------------- pandas/tests/internals/test_internals.py | 2 +- 4 files changed, 29 insertions(+), 41 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 50dc514e7181f6..567fcb1ef7c05c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12925,12 +12925,12 @@ def _to_dict_of_blocks(self): Return a dict of dtype -> Constructor Types that each is a homogeneous dtype. - Internal ONLY - only works for BlockManager + Internal ONLY. """ mgr = self._mgr return { k: self._constructor_from_mgr(v, axes=v.axes).__finalize__(self) - for k, v in mgr.to_dict().items() + for k, v in mgr.to_iter_dict() } @property diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 7be1d5d95ffdf2..1b72c164f79453 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -118,6 +118,7 @@ if TYPE_CHECKING: from collections.abc import ( + Generator, Iterable, Sequence, ) @@ -385,20 +386,18 @@ def _split_op_result(self, result: ArrayLike) -> list[Block]: return [nb] @final - def _split(self) -> list[Block]: + def _split(self) -> Generator[Block, None, None]: """ Split a block into a list of single-column blocks. """ assert self.ndim == 2 - new_blocks = [] for i, ref_loc in enumerate(self._mgr_locs): vals = self.values[slice(i, i + 1)] bp = BlockPlacement(ref_loc) nb = type(self)(vals, placement=bp, ndim=2, refs=self.refs) - new_blocks.append(nb) - return new_blocks + yield nb @final def split_and_operate(self, func, *args, **kwargs) -> list[Block]: @@ -537,7 +536,9 @@ def convert_dtypes( rbs = [] for blk in blks: # Determine dtype column by column - sub_blks = [blk] if blk.ndim == 1 or self.shape[0] == 1 else blk._split() + sub_blks = ( + [blk] if blk.ndim == 1 or self.shape[0] == 1 else list(blk._split()) + ) dtypes = [ convert_dtypes( b.values, @@ -1190,8 +1191,7 @@ def putmask(self, mask, new) -> list[Block]: is_array = isinstance(new, np.ndarray) res_blocks = [] - nbs = self._split() - for i, nb in enumerate(nbs): + for i, nb in enumerate(self._split()): n = new if is_array: # we have a different value per-column @@ -1255,8 +1255,7 @@ def where(self, other, cond) -> list[Block]: is_array = isinstance(other, (np.ndarray, ExtensionArray)) res_blocks = [] - nbs = self._split() - for i, nb in enumerate(nbs): + for i, nb in enumerate(self._split()): oth = other if is_array: # we have a different value per-column @@ -1698,8 +1697,7 @@ def where(self, other, cond) -> list[Block]: is_array = isinstance(orig_other, (np.ndarray, ExtensionArray)) res_blocks = [] - nbs = self._split() - for i, nb in enumerate(nbs): + for i, nb in enumerate(self._split()): n = orig_other if is_array: # we have a different value per-column @@ -1760,8 +1758,7 @@ def putmask(self, mask, new) -> list[Block]: is_array = isinstance(orig_new, (np.ndarray, ExtensionArray)) res_blocks = [] - nbs = self._split() - for i, nb in enumerate(nbs): + for i, nb in enumerate(self._split()): n = orig_new if is_array: # we have a different value per-column diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8fda9cd23b508a..7c1bcbec1d3f23 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -92,6 +92,8 @@ ) if TYPE_CHECKING: + from collections.abc import Generator + from pandas._typing import ( ArrayLike, AxisInt, @@ -645,8 +647,7 @@ def get_bool_data(self) -> Self: new_blocks.append(blk) elif blk.is_object: - nbs = blk._split() - new_blocks.extend(nb for nb in nbs if nb.is_bool) + new_blocks.extend(nb for nb in blk._split() if nb.is_bool) return self._combine(new_blocks) @@ -1525,7 +1526,9 @@ def _insert_update_mgr_locs(self, loc) -> None: When inserting a new Block at location 'loc', we increment all of the mgr_locs of blocks above that by one. """ - for blkno, count in _fast_count_smallints(self.blknos[loc:]): + # Faster version of set(arr) for sequences of small numbers + blknos = np.bincount(self.blknos[loc:]).nonzero()[0] + for blkno in blknos: # .620 this way, .326 of which is in increment_above blk = self.blocks[blkno] blk._mgr_locs = blk._mgr_locs.increment_above(loc) @@ -1597,7 +1600,7 @@ def grouped_reduce(self, func: Callable) -> Self: nrows = 0 else: nrows = result_blocks[0].values.shape[-1] - index = Index(range(nrows)) + index = default_index(nrows) return type(self).from_blocks(result_blocks, [self.axes[0], index]) @@ -1735,21 +1738,18 @@ def unstack(self, unstacker, fill_value) -> BlockManager: bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False) return bm - def to_dict(self) -> dict[str, Self]: + def to_iter_dict(self) -> Generator[tuple[str, Self], None, None]: """ - Return a dict of str(dtype) -> BlockManager + Yield a tuple of (str(dtype), BlockManager) Returns ------- - values : a dict of dtype -> BlockManager + values : a tuple of (str(dtype), BlockManager) """ - - bd: dict[str, list[Block]] = {} - for b in self.blocks: - bd.setdefault(str(b.dtype), []).append(b) - - # TODO(EA2D): the combine will be unnecessary with 2D EAs - return {dtype: self._combine(blocks) for dtype, blocks in bd.items()} + key = lambda block: str(block.dtype) + for dtype, blocks in itertools.groupby(sorted(self.blocks, key=key), key=key): + # TODO(EA2D): the combine will be unnecessary with 2D EAs + yield dtype, self._combine(list(blocks)) def as_array( self, @@ -2330,7 +2330,7 @@ def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, DtypeObj]: def _form_blocks(arrays: list[ArrayLike], consolidate: bool, refs: list) -> list[Block]: - tuples = list(enumerate(arrays)) + tuples = enumerate(arrays) if not consolidate: return _tuples_to_blocks_no_consolidate(tuples, refs) @@ -2351,7 +2351,7 @@ def _form_blocks(arrays: list[ArrayLike], consolidate: bool, refs: list) -> list if issubclass(dtype.type, (str, bytes)): dtype = np.dtype(object) - values, placement = _stack_arrays(list(tup_block), dtype) + values, placement = _stack_arrays(tup_block, dtype) if is_dtlike: values = ensure_wrapped_if_datetimelike(values) blk = block_type(values, placement=BlockPlacement(placement), ndim=2) @@ -2450,15 +2450,6 @@ def _merge_blocks( return blocks, False -def _fast_count_smallints(arr: npt.NDArray[np.intp]): - """Faster version of set(arr) for sequences of small numbers.""" - counts = np.bincount(arr) - nz = counts.nonzero()[0] - # Note: list(zip(...) outperforms list(np.c_[nz, counts[nz]]) here, - # in one benchmark by a factor of 11 - return zip(nz, counts[nz]) - - def _preprocess_slice_or_indexer( slice_or_indexer: slice | np.ndarray, length: int, allow_fill: bool ): diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 92addeb29252a4..43bcf84f901b1c 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -347,7 +347,7 @@ def test_split(self): # GH#37799 values = np.random.default_rng(2).standard_normal((3, 4)) blk = new_block(values, placement=BlockPlacement([3, 1, 6]), ndim=2) - result = blk._split() + result = list(blk._split()) # check that we get views, not copies values[:] = -9999 From cf953dac795e49a530df33d1f1c012bd7346a555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Smr=C5=BE?= Date: Mon, 22 Apr 2024 20:55:37 +0200 Subject: [PATCH 10/22] Allow `tan` to be used in `df.eval`. (#58334) * Allow `tan` to be used in `df.eval`. * Whatsnew: Link issue for fixing `tan` in `eval`. --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/computation/ops.py | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c817e09b3b3608..7823f74b7a153c 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -458,6 +458,7 @@ Other - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) +- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`) - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`) - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`) diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 7d8e23abf43b6b..b7a1cb173f6599 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -45,6 +45,7 @@ _unary_math_ops = ( "sin", "cos", + "tan", "exp", "log", "expm1", From 281d4a8d62b2397225822b3a4f0ba4c4df6cff07 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 23 Apr 2024 00:26:22 +0530 Subject: [PATCH 11/22] DOC: Enforce Numpy Docstring Validation for pandas.HDFStore.keys (#58371) * DOC: add SA01 to HDFStore.keys * DOC: remove HDFStore.keys * DOC: fix typo in See Also for HDFStore.keys --- ci/code_checks.sh | 1 - pandas/io/pytables.py | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f03ea658660316..17316c80f86ba8 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -122,7 +122,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DatetimeTZDtype.tz SA01" \ -i "pandas.DatetimeTZDtype.unit SA01" \ -i "pandas.Grouper PR02" \ - -i "pandas.HDFStore.keys SA01" \ -i "pandas.HDFStore.put PR01,SA01" \ -i "pandas.HDFStore.select SA01" \ -i "pandas.HDFStore.walk SA01" \ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 89c6ac9a583823..5c04342b9eb559 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -656,6 +656,12 @@ def keys(self, include: str = "pandas") -> list[str]: ------ raises ValueError if kind has an illegal value + See Also + -------- + HDFStore.info : Prints detailed information on the store. + HDFStore.get_node : Returns the node with the key. + HDFStore.get_storer : Returns the storer object for a key. + Examples -------- >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) From 19c4769f7d793d715b008675a4f94b2e5570b025 Mon Sep 17 00:00:00 2001 From: KeiOshima Date: Mon, 22 Apr 2024 14:56:51 -0400 Subject: [PATCH 12/22] Doc: Fixing SA01 error for DataFrame: pop and columns (#58359) Doc: Fixinf SA01 error for DataFrame: pop and columns --- ci/code_checks.sh | 1 - pandas/core/frame.py | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 17316c80f86ba8..a7a4bcf165f2a5 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -90,7 +90,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DataFrame.median RT03,SA01" \ -i "pandas.DataFrame.min RT03" \ -i "pandas.DataFrame.plot PR02,SA01" \ - -i "pandas.DataFrame.pop SA01" \ -i "pandas.DataFrame.prod RT03" \ -i "pandas.DataFrame.product RT03" \ -i "pandas.DataFrame.reorder_levels SA01" \ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 567fcb1ef7c05c..3bcf41893b6c8e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5535,6 +5535,11 @@ def pop(self, item: Hashable) -> Series: Series Series representing the item that is dropped. + See Also + -------- + DataFrame.drop: Drop specified labels from rows or columns. + DataFrame.drop_duplicates: Return DataFrame with duplicate rows removed. + Examples -------- >>> df = pd.DataFrame( @@ -12896,6 +12901,7 @@ def isin_(x): See Also -------- DataFrame.index: The index (row labels) of the DataFrame. + DataFrame.axes: Return a list representing the axes of the DataFrame. Examples -------- From 963ce7a594b4346d70a2b39a6fc81af0bb463809 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 23 Apr 2024 01:52:57 +0530 Subject: [PATCH 13/22] DOC: Enforce Numpy Docstring Validation for pandas.HDFStore.select (#58374) * DOC: add SA01 to HDFStore.select * DOC: remove HDFStore.select --- ci/code_checks.sh | 1 - pandas/io/pytables.py | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index a7a4bcf165f2a5..599d4d65b9101b 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -122,7 +122,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DatetimeTZDtype.unit SA01" \ -i "pandas.Grouper PR02" \ -i "pandas.HDFStore.put PR01,SA01" \ - -i "pandas.HDFStore.select SA01" \ -i "pandas.HDFStore.walk SA01" \ -i "pandas.Index PR07" \ -i "pandas.Index.T SA01" \ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5c04342b9eb559..0af5c753977bdd 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -859,6 +859,12 @@ def select( object Retrieved object from file. + See Also + -------- + HDFStore.select_as_coordinates : Returns the selection as an index. + HDFStore.select_column : Returns a single column from the table. + HDFStore.select_as_multiple : Retrieves pandas objects from multiple tables. + Examples -------- >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) From 0cafd1007640b9c6f3542eddd10ffffbaee49c88 Mon Sep 17 00:00:00 2001 From: KeiOshima Date: Mon, 22 Apr 2024 18:11:44 -0400 Subject: [PATCH 14/22] DOC: Fixing SA01 issues for DatetimeIndex: date and tz (#58377) * DOC: Fixing SA01 issues for DatetimeIndex: date and tz * fixing: XPECTED TO FAIL, BUT NOT FAILING error --- ci/code_checks.sh | 4 ---- pandas/core/arrays/datetimes.py | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 599d4d65b9101b..801fe7eccd1ed4 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -103,7 +103,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DataFrame.to_markdown SA01" \ -i "pandas.DataFrame.to_parquet RT03" \ -i "pandas.DataFrame.var PR01,RT03,SA01" \ - -i "pandas.DatetimeIndex.date SA01" \ -i "pandas.DatetimeIndex.day_of_year SA01" \ -i "pandas.DatetimeIndex.dayofyear SA01" \ -i "pandas.DatetimeIndex.freqstr SA01" \ @@ -115,7 +114,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DatetimeIndex.std PR01,RT03" \ -i "pandas.DatetimeIndex.to_period RT03" \ -i "pandas.DatetimeIndex.to_pydatetime RT03,SA01" \ - -i "pandas.DatetimeIndex.tz SA01" \ -i "pandas.DatetimeIndex.tz_convert RT03" \ -i "pandas.DatetimeTZDtype SA01" \ -i "pandas.DatetimeTZDtype.tz SA01" \ @@ -270,7 +268,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.dt.as_unit PR01,PR02" \ -i "pandas.Series.dt.ceil PR01,PR02" \ -i "pandas.Series.dt.components SA01" \ - -i "pandas.Series.dt.date SA01" \ -i "pandas.Series.dt.day_name PR01,PR02" \ -i "pandas.Series.dt.day_of_year SA01" \ -i "pandas.Series.dt.dayofyear SA01" \ @@ -290,7 +287,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.dt.strftime PR01,PR02" \ -i "pandas.Series.dt.to_period PR01,PR02,RT03" \ -i "pandas.Series.dt.total_seconds PR01" \ - -i "pandas.Series.dt.tz SA01" \ -i "pandas.Series.dt.tz_convert PR01,PR02,RT03" \ -i "pandas.Series.dt.tz_localize PR01,PR02" \ -i "pandas.Series.dt.unit GL08" \ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index fb9f047d432a12..203308b4f0deed 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -593,6 +593,13 @@ def tz(self) -> tzinfo | None: datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None Returns None when the array is tz-naive. + See Also + -------- + DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a + given time zone, or remove timezone from a tz-aware DatetimeIndex. + DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from + one time zone to another. + Examples -------- For Series: @@ -1476,6 +1483,14 @@ def date(self) -> npt.NDArray[np.object_]: Namely, the date part of Timestamps without time and timezone information. + See Also + -------- + DatetimeIndex.time : Returns numpy array of :class:`datetime.time` objects. + The time part of the Timestamps. + DatetimeIndex.year : The year of the datetime. + DatetimeIndex.month : The month as January=1, December=12. + DatetimeIndex.day : The day of the datetime. + Examples -------- For Series: From bfe5be01fef4eaecf4ab033e74139b0a3cac4a39 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 22 Apr 2024 15:32:37 -1000 Subject: [PATCH 15/22] REF: Defer creating Index._engine until needed (#58370) --- pandas/core/frame.py | 3 +-- pandas/core/indexes/base.py | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3bcf41893b6c8e..4d89272013a524 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4012,7 +4012,6 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar: return series._values[index] series = self._get_item(col) - engine = self.index._engine if not isinstance(self.index, MultiIndex): # CategoricalIndex: Trying to use the engine fastpath may give incorrect @@ -4023,7 +4022,7 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar: # For MultiIndex going through engine effectively restricts us to # same-length tuples; see test_get_set_value_no_partial_indexing - loc = engine.get_loc(index) + loc = self.index._engine.get_loc(index) return series._values[loc] def isetitem(self, loc, value) -> None: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 63facb61ed4984..d2129c54fabc4c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -832,7 +832,8 @@ def _reset_identity(self) -> None: @final def _cleanup(self) -> None: - self._engine.clear_mapping() + if "_engine" in self._cache: + self._engine.clear_mapping() @cache_readonly def _engine( From ec1dff9ff3289ab2a456d293e232cffcd4abb90d Mon Sep 17 00:00:00 2001 From: shriyakalakata <87483933+shriyakalakata@users.noreply.github.com> Date: Tue, 23 Apr 2024 12:51:09 -0400 Subject: [PATCH 16/22] Add mailing list link (#58358) * Add mailing list link * Update mailing list link --- doc/source/development/community.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/development/community.rst b/doc/source/development/community.rst index ccf7be8e477482..ab8294b8f135a6 100644 --- a/doc/source/development/community.rst +++ b/doc/source/development/community.rst @@ -100,6 +100,8 @@ The pandas mailing list `pandas-dev@python.org `_. + .. _community.slack: Community slack From 903cd53911a3e1dd79b51c28db9cfbed95fb4fc1 Mon Sep 17 00:00:00 2001 From: KeiOshima Date: Tue, 23 Apr 2024 12:58:49 -0400 Subject: [PATCH 17/22] DOC: fixinf SA01 issue for DataFrame.to_feather (#58378) --- ci/code_checks.sh | 1 - pandas/core/frame.py | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 801fe7eccd1ed4..cf21ae92496acd 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -99,7 +99,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DataFrame.std PR01,RT03,SA01" \ -i "pandas.DataFrame.sum RT03" \ -i "pandas.DataFrame.swaplevel SA01" \ - -i "pandas.DataFrame.to_feather SA01" \ -i "pandas.DataFrame.to_markdown SA01" \ -i "pandas.DataFrame.to_parquet RT03" \ -i "pandas.DataFrame.var PR01,RT03,SA01" \ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4d89272013a524..e8a0e37b70145b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2685,6 +2685,16 @@ def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None: This includes the `compression`, `compression_level`, `chunksize` and `version` keywords. + See Also + -------- + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + DataFrame.to_excel : Write object to an Excel sheet. + DataFrame.to_sql : Write to a sql table. + DataFrame.to_csv : Write a csv file. + DataFrame.to_json : Convert the object to a JSON string. + DataFrame.to_html : Render a DataFrame as an HTML table. + DataFrame.to_string : Convert DataFrame to a string. + Notes ----- This function writes the dataframe as a `feather file From ff2727147d367b5b81659931e9804733711e8f6c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 23 Apr 2024 10:04:00 -0700 Subject: [PATCH 18/22] BUG: setitem with mixed-resolution dt64s (#56419) * BUG: setitem with mixed-resolution dt64s * Move whatsnew to 3.0 * de-xfail * improve exception message --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/indexes/datetimes.py | 2 ++ pandas/core/internals/blocks.py | 17 ++++++++-- pandas/tests/series/indexing/test_setitem.py | 33 ++++++++++++++++++++ pandas/tests/series/methods/test_clip.py | 28 ++++++++++++++--- pandas/tests/series/methods/test_fillna.py | 14 ++------- 8 files changed, 79 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7823f74b7a153c..4213cc8e6cfcf4 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -360,6 +360,7 @@ Datetimelike - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`) - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`) - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`) +- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 203308b4f0deed..be087e19ce7b65 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -539,7 +539,7 @@ def _unbox_scalar(self, value) -> np.datetime64: if value is NaT: return np.datetime64(value._value, self.unit) else: - return value.as_unit(self.unit).asm8 + return value.as_unit(self.unit, round_ok=False).asm8 def _scalar_from_string(self, value) -> Timestamp | NaTType: return Timestamp(value, tz=self.tz) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 6eb4d234b349d7..ff43f971611362 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -322,7 +322,7 @@ def _unbox_scalar(self, value) -> np.timedelta64: if value is NaT: return np.timedelta64(value._value, self.unit) else: - return value.as_unit(self.unit).asm8 + return value.as_unit(self.unit, round_ok=False).asm8 def _scalar_from_string(self, value) -> Timedelta | NaTType: return Timedelta(value) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 7122de745e13b7..6d5f32774f485f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -515,6 +515,8 @@ def _parsed_string_to_bounds( freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev) per = Period(parsed, freq=freq) start, end = per.start_time, per.end_time + start = start.as_unit(self.unit) + end = end.as_unit(self.unit) # GH 24076 # If an incoming date string contained a UTC offset, need to localize diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1b72c164f79453..28d3292a1c65bb 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -38,7 +38,10 @@ Shape, npt, ) -from pandas.errors import AbstractMethodError +from pandas.errors import ( + AbstractMethodError, + OutOfBoundsDatetime, +) from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg @@ -478,7 +481,17 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: f"{self.values.dtype}. Please report a bug at " "https://github.com/pandas-dev/pandas/issues." ) - return self.astype(new_dtype) + try: + return self.astype(new_dtype) + except OutOfBoundsDatetime as err: + # e.g. GH#56419 if self.dtype is a low-resolution dt64 and we try to + # upcast to a higher-resolution dt64, we may have entries that are + # out of bounds for the higher resolution. + # Re-raise with a more informative message. + raise OutOfBoundsDatetime( + f"Incompatible (high-resolution) value for dtype='{self.dtype}'. " + "Explicitly cast before operating." + ) from err @final def convert(self) -> list[Block]: diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 99535f273075c1..7a2a4892f61fb5 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1467,6 +1467,39 @@ def test_slice_key(self, obj, key, expected, warn, val, indexer_sli, is_inplace) raise AssertionError("xfail not relevant for this test.") +@pytest.mark.parametrize( + "exp_dtype", + [ + "M8[ms]", + "M8[ms, UTC]", + "m8[ms]", + ], +) +class TestCoercionDatetime64HigherReso(CoercionTest): + @pytest.fixture + def obj(self, exp_dtype): + idx = date_range("2011-01-01", freq="D", periods=4, unit="s") + if exp_dtype == "m8[ms]": + idx = idx - Timestamp("1970-01-01") + assert idx.dtype == "m8[s]" + elif exp_dtype == "M8[ms, UTC]": + idx = idx.tz_localize("UTC") + return Series(idx) + + @pytest.fixture + def val(self, exp_dtype): + ts = Timestamp("2011-01-02 03:04:05.678").as_unit("ms") + if exp_dtype == "m8[ms]": + return ts - Timestamp("1970-01-01") + elif exp_dtype == "M8[ms, UTC]": + return ts.tz_localize("UTC") + return ts + + @pytest.fixture + def warn(self): + return FutureWarning + + @pytest.mark.parametrize( "val,exp_dtype,warn", [ diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index 75b4050c18afeb..8ed422fc118dc5 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.errors import OutOfBoundsDatetime + import pandas as pd from pandas import ( Series, @@ -131,12 +133,30 @@ def test_clip_with_datetimes(self): ) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("dtype", [object, "M8[us]"]) - def test_clip_with_timestamps_and_oob_datetimes(self, dtype): + def test_clip_with_timestamps_and_oob_datetimes_object(self): # GH-42794 - ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype) + ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=object) result = ser.clip(lower=Timestamp.min, upper=Timestamp.max) - expected = Series([Timestamp.min, Timestamp.max], dtype=dtype) + expected = Series([Timestamp.min, Timestamp.max], dtype=object) + + tm.assert_series_equal(result, expected) + + def test_clip_with_timestamps_and_oob_datetimes_non_nano(self): + # GH#56410 + dtype = "M8[us]" + ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype) + + msg = ( + r"Incompatible \(high-resolution\) value for dtype='datetime64\[us\]'. " + "Explicitly cast before operating" + ) + with pytest.raises(OutOfBoundsDatetime, match=msg): + ser.clip(lower=Timestamp.min, upper=Timestamp.max) + + lower = Timestamp.min.as_unit("us") + upper = Timestamp.max.as_unit("us") + result = ser.clip(lower=lower, upper=upper) + expected = Series([lower, upper], dtype=dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 0965d36e4827d1..592dba253532dd 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -308,12 +308,7 @@ def test_datetime64_fillna(self): "scalar", [ False, - pytest.param( - True, - marks=pytest.mark.xfail( - reason="GH#56410 scalar case not yet addressed" - ), - ), + True, ], ) @pytest.mark.parametrize("tz", [None, "UTC"]) @@ -342,12 +337,7 @@ def test_datetime64_fillna_mismatched_reso_no_rounding(self, tz, scalar): "scalar", [ False, - pytest.param( - True, - marks=pytest.mark.xfail( - reason="GH#56410 scalar case not yet addressed" - ), - ), + True, ], ) def test_timedelta64_fillna_mismatched_reso_no_rounding(self, scalar): From 191a56c32578be7ae7d231108abbe4ce1c4378e9 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Tue, 23 Apr 2024 22:50:15 +0530 Subject: [PATCH 19/22] DOC: Enforce Numpy Docstring Validation for pandas.HDFStore.put (#58384) * DOC: add SA01 and PR01 to HDFStore.put * DOC: remove SA01 and PR01 of HDFStore.put --- ci/code_checks.sh | 1 - pandas/io/pytables.py | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index cf21ae92496acd..5993fabfc9d6ce 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -118,7 +118,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DatetimeTZDtype.tz SA01" \ -i "pandas.DatetimeTZDtype.unit SA01" \ -i "pandas.Grouper PR02" \ - -i "pandas.HDFStore.put PR01,SA01" \ -i "pandas.HDFStore.walk SA01" \ -i "pandas.Index PR07" \ -i "pandas.Index.T SA01" \ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 0af5c753977bdd..75e9b779e5094f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1144,12 +1144,27 @@ def put( Write DataFrame index as a column. append : bool, default False This will force Table format, append the input data to the existing. + complib : default None + This parameter is currently not accepted. + complevel : int, 0-9, default None + Specifies a compression level for data. + A value of 0 or None disables compression. + min_itemsize : int, dict, or None + Dict of columns that specify minimum str sizes. + nan_rep : str + Str to use as str nan representation. data_columns : list of columns or True, default None List of columns to create as data columns, or True to use all columns. See `here `__. encoding : str, default None Provide an encoding for strings. + errors : str, default 'strict' + The error handling scheme to use for encoding errors. + The default is 'strict' meaning that encoding errors raise a + UnicodeEncodeError. Other possible values are 'ignore', 'replace' and + 'xmlcharrefreplace' as well as any other name registered with + codecs.register_error that can handle UnicodeEncodeErrors. track_times : bool, default True Parameter is propagated to 'create_table' method of 'PyTables'. If set to False it enables to have the same h5 files (same hashes) @@ -1157,6 +1172,11 @@ def put( dropna : bool, default False, optional Remove missing values. + See Also + -------- + HDFStore.info : Prints detailed information on the store. + HDFStore.get_storer : Returns the storer object for a key. + Examples -------- >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) From bd9c09b4331f890fc9fb4698deaf2d168060941b Mon Sep 17 00:00:00 2001 From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> Date: Tue, 23 Apr 2024 20:21:58 +0300 Subject: [PATCH 20/22] DEPR: to_pytimedelta return Index[object] (#58383) * DEPR: to_pytimedelta return Index[object] * ignore doctest warning --------- Co-authored-by: Abdulaziz Aloqeely <52792999+DAzVise@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/conftest.py | 1 + pandas/core/indexes/accessors.py | 20 +++++++++++++++++++ pandas/tests/extension/test_arrow.py | 8 ++++++-- .../series/accessors/test_cat_accessor.py | 3 +++ .../series/accessors/test_dt_accessor.py | 4 +++- 6 files changed, 34 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4213cc8e6cfcf4..02e4aba6674083 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -199,6 +199,7 @@ Other Deprecations - Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`) - Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`) - Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`) +- Deprecated behavior of :meth:`Series.dt.to_pytimedelta`, in a future version this will return a :class:`Series` containing python ``datetime.timedelta`` objects instead of an ``ndarray`` of timedelta; this matches the behavior of other :meth:`Series.dt` properties. (:issue:`57463`) - Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`) - diff --git a/pandas/conftest.py b/pandas/conftest.py index 34489bb70575af..21100178262c88 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -157,6 +157,7 @@ def pytest_collection_modifyitems(items, config) -> None: ("SeriesGroupBy.fillna", "SeriesGroupBy.fillna is deprecated"), ("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"), ("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"), + ("to_pytimedelta", "The behavior of TimedeltaProperties.to_pytimedelta"), # Docstring divides by zero to show behavior difference ("missing.mask_zero_div_zero", "divide by zero encountered"), ( diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 2bb234e174563c..3dcd1fedc8d641 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -9,10 +9,12 @@ NoReturn, cast, ) +import warnings import numpy as np from pandas._libs import lib +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_integer_dtype, @@ -210,6 +212,15 @@ def _delegate_method(self, name: str, *args, **kwargs): return result def to_pytimedelta(self): + # GH 57463 + warnings.warn( + f"The behavior of {type(self).__name__}.to_pytimedelta is deprecated, " + "in a future version this will return a Series containing python " + "datetime.timedelta objects instead of an ndarray. To retain the " + "old behavior, call `np.array` on the result", + FutureWarning, + stacklevel=find_stack_level(), + ) return cast(ArrowExtensionArray, self._parent.array)._dt_to_pytimedelta() def to_pydatetime(self) -> Series: @@ -462,6 +473,15 @@ def to_pytimedelta(self) -> np.ndarray: datetime.timedelta(days=2), datetime.timedelta(days=3), datetime.timedelta(days=4)], dtype=object) """ + # GH 57463 + warnings.warn( + f"The behavior of {type(self).__name__}.to_pytimedelta is deprecated, " + "in a future version this will return a Series containing python " + "datetime.timedelta objects instead of an ndarray. To retain the " + "old behavior, call `np.array` on the result", + FutureWarning, + stacklevel=find_stack_level(), + ) return self._get_values().to_pytimedelta() @property diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 9b2251d0b7d4a1..79440b55dd5dd6 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2861,12 +2861,16 @@ def test_dt_to_pytimedelta(): data = [timedelta(1, 2, 3), timedelta(1, 2, 4)] ser = pd.Series(data, dtype=ArrowDtype(pa.duration("ns"))) - result = ser.dt.to_pytimedelta() + msg = "The behavior of ArrowTemporalProperties.to_pytimedelta is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.dt.to_pytimedelta() expected = np.array(data, dtype=object) tm.assert_numpy_array_equal(result, expected) assert all(type(res) is timedelta for res in result) - expected = ser.astype("timedelta64[ns]").dt.to_pytimedelta() + msg = "The behavior of TimedeltaProperties.to_pytimedelta is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = ser.astype("timedelta64[ns]").dt.to_pytimedelta() tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/series/accessors/test_cat_accessor.py b/pandas/tests/series/accessors/test_cat_accessor.py index ca2768efd5c680..ce8ea27ea1fa23 100644 --- a/pandas/tests/series/accessors/test_cat_accessor.py +++ b/pandas/tests/series/accessors/test_cat_accessor.py @@ -200,6 +200,9 @@ def test_dt_accessor_api_for_categorical(self, idx): if func == "to_period" and getattr(idx, "tz", None) is not None: # dropping TZ warn_cls.append(UserWarning) + elif func == "to_pytimedelta": + # GH 57463 + warn_cls.append(FutureWarning) if warn_cls: warn_cls = tuple(warn_cls) else: diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 5f0057ac50b472..8c60f7beb317da 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -192,7 +192,9 @@ def test_dt_namespace_accessor_timedelta(self): assert isinstance(result, DataFrame) tm.assert_index_equal(result.index, ser.index) - result = ser.dt.to_pytimedelta() + msg = "The behavior of TimedeltaProperties.to_pytimedelta is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.dt.to_pytimedelta() assert isinstance(result, np.ndarray) assert result.dtype == object From 9b7d09d69e252e6afff4d991728713a541e03045 Mon Sep 17 00:00:00 2001 From: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> Date: Tue, 23 Apr 2024 20:23:16 +0300 Subject: [PATCH 21/22] TST: No longer produce test_stata.dta file after running test suite (#58381) Use tmp_path fixture Co-authored-by: Abdulaziz Aloqeely <52792999+DAzVise@users.noreply.github.com> --- pandas/tests/io/test_stata.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 43c62237c6786b..2650f351e22037 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1962,7 +1962,7 @@ def test_writer_118_exceptions(self, temp_file): "dtype_backend", ["numpy_nullable", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))], ) - def test_read_write_ea_dtypes(self, dtype_backend, temp_file): + def test_read_write_ea_dtypes(self, dtype_backend, temp_file, tmp_path): df = DataFrame( { "a": [1, 2, None], @@ -1974,7 +1974,8 @@ def test_read_write_ea_dtypes(self, dtype_backend, temp_file): index=pd.Index([0, 1, 2], name="index"), ) df = df.convert_dtypes(dtype_backend=dtype_backend) - df.to_stata("test_stata.dta", version=118) + stata_path = tmp_path / "test_stata.dta" + df.to_stata(stata_path, version=118) df.to_stata(temp_file) written_and_read_again = self.read_dta(temp_file) From 23dd1f12aea8bfd503ea86ce1850de817cf0fe43 Mon Sep 17 00:00:00 2001 From: Nrezhang <102526155+Nrezhang@users.noreply.github.com> Date: Tue, 23 Apr 2024 13:25:32 -0400 Subject: [PATCH 22/22] #58324 (#58379) --- doc/source/user_guide/style.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index f831723f449310..43da43a983429a 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -1908,7 +1908,7 @@ "- Provide an API that is pleasing to use interactively and is \"good enough\" for many tasks\n", "- Provide the foundations for dedicated libraries to build on\n", "\n", - "If you build a great library on top of this, let us know and we'll [link](https://pandas.pydata.org/pandas-docs/stable/ecosystem.html) to it.\n", + "If you build a great library on top of this, let us know and we'll [link](https://pandas.pydata.org/community/ecosystem.html) to it.\n", "\n", "### Subclassing\n", "\n",