Pandas 2.2.0 (handley-lab#359)

* fix boxplot closures (maybe_color_bp changed scope) * _args_adjust no longer a thing (still need to deal with hists * remove convert_period from _get_xticks * add fig to PlanePlot2D._make_plot() * range is now self._bin_range, and convert data to numeric first (not quite happy with the latter). * update pandas requirement * bump version * fix MultiIndex.format deprecation * address warnings in plot.py (no idea why these specific ones are the problem???) * Revert "address warnings in plot.py (no idea why these specific ones are the problem???)" This reverts commit 1f9857b. * fix chained assignment warnings * don't include groups in chains.apply in remove_burn_in * replace grouper with _grouper * more chained assignment warning fixes * fix Index.format deprecation * bump version to 2.7.1 * remove unused exception handling from HistPlot._calculate_bins * remember loc to fix covariance coverage (cov cov lol) * remove pandas 1.5-specific documentation fix * use sphinx-autodoc-typehints to fix PlotAccessor documentation * bump version to 2.7.4 * add test for `range=None` in `hist_plot_1d` as this is no longer covered after changes to defaults in pandas * consistent diagonal * try whether `auto-update-conda` makes conda CI work better * make conda install `pandas~=2.2.0` * Lukas' tidy suggestion for Hist1dPlot._calculate_bins() * remove unused import * I can only apologise, I'd forgotten to reinstall the pre-commit hooks after cloning a fresh repo --------- Co-authored-by: lukashergt <[email protected]>
AdamOrmondroyd · Mar 16, 2024 · cb60b9a · cb60b9a
1 parent 54170d8
commit cb60b9a
Show file tree

Hide file tree

Showing 14 changed files with 63 additions and 70 deletions.
diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
@@ -113,7 +113,7 @@ jobs:
         run: |
           conda config --append channels conda-forge
           conda install pytest pytest-cov
-          conda install scipy numpy 'matplotlib>=3.6.1,<3.9.0' 'pandas>=2.0.0,<2.2.0'
+          conda install scipy numpy 'matplotlib>=3.6.1,<3.9.0' 'pandas~=2.2.0'
 
       - name: Test with pytest
         shell: bash -l {0}

diff --git a/README.rst b/README.rst
@@ -2,7 +2,7 @@
 anesthetic: nested sampling post-processing
 ===========================================
 :Authors: Will Handley and Lukas Hergt
-:Version: 2.8.1
+:Version: 2.8.2
 :Homepage: https://github.com/handley-lab/anesthetic
 :Documentation: http://anesthetic.readthedocs.io/
 

diff --git a/anesthetic/_format.py b/anesthetic/_format.py
@@ -12,8 +12,7 @@ class _DataFrameFormatter(DataFrameFormatter):
     def _get_formatted_column_labels(self, frame):
         columns = frame.columns
         if isinstance(columns, MultiIndex):
-            fmt_columns = columns.format(sparsify=False, adjoin=False)
-            fmt_columns = list(zip(*fmt_columns))
+            fmt_columns = [tuple(str(c) for c in column) for column in columns]
             dtypes = self.frame.dtypes._values
 
             # if we have a Float level, they don't use leading space at all
@@ -37,7 +36,7 @@ def space_format(x, y):
             str_columns = [list(x) for x in zip(*str_columns)]
             str_columns = [_make_fixed_width(x) for x in str_columns]
         else:
-            fmt_columns = columns.format()
+            fmt_columns = [str(x) for x in columns]
             dtypes = self.frame.dtypes
             need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
             str_columns = [

diff --git a/anesthetic/_version.py b/anesthetic/_version.py
@@ -1 +1 @@
-__version__ = '2.8.1'
+__version__ = '2.8.2'
diff --git a/anesthetic/plot.py b/anesthetic/plot.py
@@ -247,12 +247,12 @@ def _position_frame(index, columns, lower, diagonal, upper):
             for i, x in enumerate(columns):
                 if all_params.index(x) < all_params.index(y):
                     if lower:
-                        position[x][y] = -1
+                        position.loc[y, x] = -1
                 elif all_params.index(x) > all_params.index(y):
                     if upper:
-                        position[x][y] = +1
+                        position.loc[y, x] = +1
                 elif diagonal:
-                    position[x][y] = 0
+                    position.loc[y, x] = 0
         return position
 
     @classmethod
@@ -277,15 +277,15 @@ def _axes_frame(cls, position, fig, gridspec_kw=None, subplot_spec=None):
                                          hspace=hspace, wspace=wspace,
                                          subplot_spec=subplot_spec,
                                          **gridspec_kw)
-        axes[:][:] = None
+        axes.loc[:, :] = None
         for j, y in enumerate(axes.index[::-1]):
             for i, x in enumerate(axes.columns):
                 if position[x][y] is not None:
                     sx = list(axes[x].dropna())
                     sx = sx[0] if sx else None
                     sy = list(axes.T[y].dropna())
                     sy = sy[0] if sy else None
-                    axes[x][y] = fig.add_subplot(
+                    axes.loc[y, x] = fig.add_subplot(
                         gs[axes.index.size - 1 - j, i], sharex=sx, sharey=sy
                     )
                     if position[x][y] == 0:

diff --git a/anesthetic/plotting/_matplotlib/boxplot.py b/anesthetic/plotting/_matplotlib/boxplot.py
@@ -1,5 +1,6 @@
 import pandas.plotting._matplotlib.boxplot
-from pandas.plotting._matplotlib.boxplot import BoxPlot as _BoxPlot
+from pandas.plotting._matplotlib.boxplot import (BoxPlot as _BoxPlot,
+                                                 maybe_color_bp)
 from anesthetic.plotting._matplotlib.core import _WeightedMPLPlot, _get_weights
 from anesthetic.utils import quantile
 from pandas.core.dtypes.missing import remove_na_arraylike
@@ -56,9 +57,9 @@ def boxplot(data, *args, **kwds):
 
     def create_plot_group():
         fontsize = None  # pragma: no cover
-        maybe_color_bp = None  # pragma: no cover
         return_type = None  # pragma: no cover
         rot = None  # pragma: no cover
+        colors = None  # pragma: no cover
 
         def plot_group(keys, values, ax, **kwds):  # pragma: no cover
             # GH 45465: xlabel/ylabel need to be popped out before plotting
@@ -96,7 +97,7 @@ def plot_group(keys, values, ax, **kwds):  # pragma: no cover
                 ax.set_xticklabels(keys, rotation=rot)
             else:
                 ax.set_yticklabels(keys, rotation=rot)
-            maybe_color_bp(bp, **kwds)
+            maybe_color_bp(bp, color_tup=colors, **kwds)
 
             # Return axes in multiplot case, maybe revisit later # 985
             if return_type == "dict":

diff --git a/anesthetic/plotting/_matplotlib/core.py b/anesthetic/plotting/_matplotlib/core.py
@@ -52,11 +52,11 @@ def _get_index_name(self):
         else:
             return super()._get_index_name()
 
-    def _get_xticks(self, convert_period: bool = False):
+    def _get_xticks(self):
         if isinstance(self.data, _WeightedObject):
             return self.data.drop_weights().index._mpl_repr()
         else:
-            return super()._get_xticks(convert_period)
+            return super()._get_xticks()
 
 
 def _compress_weights(kwargs, data):
@@ -82,7 +82,7 @@ def __init__(self, data, x, y, s=None, c=None, **kwargs) -> None:
 
 class _PlanePlot2d(PlanePlot):
 
-    def _make_plot(self):
+    def _make_plot(self, fig):
         if self.colormap is not None:
             self.kwds['cmap'] = plt.get_cmap(self.colormap)
         colors = self._get_colors()
@@ -109,9 +109,6 @@ def _make_plot(self):
                             f"supported by {self._kind}")
         self._plot(ax, x.values, y.values, **kwds)
 
-    def _args_adjust(self):
-        pass
-
 
 class ScatterPlot2d(_CompressedMPLPlot, _PlanePlot2d):
     # noqa: disable=D101

diff --git a/anesthetic/plotting/_matplotlib/hist.py b/anesthetic/plotting/_matplotlib/hist.py
@@ -22,60 +22,56 @@
     hist_plot_1d,
     quantile_plot_interval,
 )
-from anesthetic.utils import quantile, histogram_bin_edges
+from anesthetic.utils import histogram_bin_edges
 
 
 class HistPlot(_WeightedMPLPlot, _HistPlot):
 
     # noqa: disable=D101
-    def _args_adjust(self) -> None:
+    def _adjust_bins(self, bins) -> None:
         if (
-                hasattr(self, 'bins') and
-                isinstance(self.bins, str) and
-                self.bins in ['fd', 'scott', 'sqrt']
+                isinstance(bins, str) and
+                bins in ['fd', 'scott', 'sqrt']
         ):
-            self.bins = self._calculate_bins(self.data)
-        super()._args_adjust()
+            bins = self._calculate_bins(self.data, bins)
+        return super()._adjust_bins(bins)
 
     # noqa: disable=D101
-    def _calculate_bins(self, data):
+    def _calculate_bins(self, data, bins):
         if self.logx:
             data = np.log10(data)
-            if 'range' in self.kwds and self.kwds['range'] is not None:
-                xmin, xmax = self.kwds['range']
-                self.kwds['range'] = (np.log10(xmin), np.log10(xmax))
+            if self._bin_range is not None:
+                xmin, xmax = self._bin_range
+                self._bin_range = (np.log10(xmin), np.log10(xmax))
         nd_values = data.infer_objects(copy=False)._get_numeric_data()
         values = np.ravel(nd_values)
-        weights = self.kwds.get("weights", None)
+        weights = self.weights
         if weights is not None:
-            try:
-                weights = np.broadcast_to(weights[:, None], nd_values.shape)
-            except ValueError:
-                pass
+            weights = np.broadcast_to(weights[:, None], nd_values.shape)
             weights = np.ravel(weights)
             weights = weights[~isna(values)]
 
         values = values[~isna(values)]
 
-        if isinstance(self.bins, str) and self.bins in ['fd', 'scott', 'sqrt']:
+        if isinstance(bins, str) and bins in ['fd', 'scott', 'sqrt']:
             bins = histogram_bin_edges(
                 values,
                 weights=weights,
-                bins=self.bins,
+                bins=bins,
                 beta=self.kwds.pop('beta', 'equal'),
-                range=self.kwds.get('range', None)
+                range=self._bin_range
             )
         else:
             bins = np.histogram_bin_edges(
                 values,
                 weights=weights,
-                bins=self.bins,
-                range=self.kwds.get('range', None)
+                bins=bins,
+                range=self._bin_range
             )
         if self.logx:
             bins = 10**bins
-            if 'range' in self.kwds and self.kwds['range'] is not None:
-                self.kwds['range'] = (xmin, xmax)
+            if self._bin_range is not None:
+                self._bin_range = (xmin, xmax)
         return bins
 
     def _get_colors(self, num_colors=None, color_kwds='color'):
@@ -180,18 +176,13 @@ def __init__(
     ) -> None:
         super().__init__(data, bins=bins, bottom=bottom, **kwargs)
 
-    def _calculate_bins(self, data):
-        if 'range' not in self.kwds or self.kwds['range'] is None:
+    def _calculate_bins(self, data, bins):
+        if self._bin_range is None:
             q = self.kwds.get('q', 5)
             q = quantile_plot_interval(q=q)
-            weights = self.kwds.get('weights', None)
-            xmin = quantile(data, q[0], weights)
-            xmax = quantile(data, q[-1], weights)
-            self.kwds['range'] = (xmin, xmax)
-            bins = super()._calculate_bins(data)
-            self.kwds.pop('range')
-        else:
-            bins = super()._calculate_bins(data)
+            xmin, xmax = data.quantile(q).to_numpy().ravel()
+            self._bin_range = (xmin, xmax)
+        bins = super()._calculate_bins(data, bins)
         return bins
 
     @classmethod

diff --git a/anesthetic/samples.py b/anesthetic/samples.py
@@ -518,7 +518,8 @@ def remove_burn_in(self, burn_in, reset_index=False, inplace=False):
             nsamples = chains.count().iloc[:, 0].to_numpy()
             ndrop = ndrop * nsamples
         ndrop = ndrop.astype(int)
-        data = self.drop(chains.apply(lambda g: g.head(ndrop[g.name-1])).index,
+        data = self.drop(chains.apply(lambda g: g.head(ndrop[g.name-1]),
+                                      include_groups=False).index,
                          inplace=inplace)
         if reset_index:
             data = data.reset_index(drop=True, inplace=inplace)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
@@ -44,8 +44,7 @@ def read_csv(filename, *args, **kwargs):
 class WeightedGroupBy(GroupBy):
     """Weighted version of ``pandas.core.groupby.GroupBy``."""
 
-    grouper: ops.BaseGrouper
-    """:meta private:"""
+    _grouper: ops.BaseGrouper
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -119,10 +118,10 @@ def _gotitem(self, key, ndim: int, subset=None):  # pragma: no cover
                 subset = self.obj
             return WeightedDataFrameGroupBy(
                 subset,
-                self.grouper,
+                self._grouper,
                 axis=self.axis,
                 level=self.level,
-                grouper=self.grouper,
+                grouper=self._grouper,
                 exclusions=self.exclusions,
                 selection=key,
                 as_index=self.as_index,
@@ -138,7 +137,7 @@ def _gotitem(self, key, ndim: int, subset=None):  # pragma: no cover
             return WeightedSeriesGroupBy(
                 subset,
                 level=self.level,
-                grouper=self.grouper,
+                grouper=self._grouper,
                 selection=key,
                 sort=self.sort,
                 group_keys=self.group_keys,

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -58,6 +58,7 @@ def get_version(short=False):
     'sphinx.ext.githubpages',
     'sphinx.ext.imgconverter',
     'sphinx_copybutton',
+    'sphinx_autodoc_typehints',
     'matplotlib.sphinxext.plot_directive',
     'numpydoc',
 ]
@@ -98,8 +99,6 @@ def get_version(short=False):
 
 autosummary_generate = True
 
-nitpick_ignore = [('py:obj', 'pandas.core.groupby.SeriesGroupBy.sample')] # not currently included in pandas 1.5, but will in future
-
 # -- Options for autosectionlabel------------------------------------------
 autosectionlabel_prefix_document = True
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -34,7 +34,7 @@ requires-python = ">=3.8"
 dependencies = [
     "scipy",
     "numpy",
-    "pandas>=2.0.0,<2.2.0",
+    "pandas~=2.2.0",
     "matplotlib>=3.6.1,<3.9.0",
 ]
 classifiers = [
@@ -60,7 +60,8 @@ classifiers = [
 "JOSS paper" = "https://joss.theoj.org/papers/10.21105/joss.01414"
 
 [project.optional-dependencies]
-docs = ["sphinx>=4.2.0", "sphinx_rtd_theme>=1.2.2", "sphinx-copybutton", "numpydoc"]
+docs = ["sphinx>=4.2.0", "sphinx_rtd_theme>=1.2.2", "sphinx-copybutton",
+        "sphinx-autodoc-typehints", "numpydoc"]
 test = ["pytest", "pytest-cov", "flake8", "pydocstyle", "packaging", "pre-commit"]
 ultranest = ["h5py"]
 astropy = ["astropy"]

diff --git a/tests/test_plot.py b/tests/test_plot.py
@@ -831,6 +831,11 @@ def test_logscale_hist_kwargs(b):
     assert amax < 0.5
     assert edges[0] < 1e-3
     assert edges[-1] > 1e3
+    h, edges, _ = hist_plot_1d(ax, data, bins=b, range=None)
+    amax = abs(np.log10(edges[np.argmax(h)]))
+    assert amax < 0.5
+    assert edges[0] < 1e-3
+    assert edges[-1] > 1e3
     h, edges, _ = hist_plot_1d(ax, data, bins=b, range=(1e-3, 1e3))
     amax = abs(np.log10(edges[np.argmax(h)]))
     assert amax < 0.5

diff --git a/tests/test_weighted_pandas.py b/tests/test_weighted_pandas.py
@@ -409,7 +409,7 @@ def test_WeightedDataFrame_compress(frame):
 
 
 def test_WeightedDataFrame_nan(frame):
-    frame['A'][0] = np.nan
+    frame.loc[0, 'A'] = np.nan
     assert ~frame.mean().isna().any()
     assert ~frame.mean(axis=1).isna().any()
     assert_array_equal(frame.mean(skipna=False).isna(), [True] + [False]*5)
@@ -422,7 +422,7 @@ def test_WeightedDataFrame_nan(frame):
     assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6],
                        [True, False, False, False, False, False])
 
-    frame['B'][2] = np.nan
+    frame.loc[2, 'B'] = np.nan
     assert ~frame.mean().isna().any()
     assert_array_equal(frame.mean(skipna=False).isna(),
                        [True, True] + [False]*4)
@@ -435,10 +435,10 @@ def test_WeightedDataFrame_nan(frame):
     assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6],
                        [True, False, True, False, False, False])
 
-    frame['C'][4] = np.nan
-    frame['D'][5] = np.nan
-    frame['E'][6] = np.nan
-    frame['F'][7] = np.nan
+    frame.loc[4, 'C'] = np.nan
+    frame.loc[5, 'D'] = np.nan
+    frame.loc[6, 'E'] = np.nan
+    frame.loc[7, 'F'] = np.nan
     assert ~frame.mean().isna().any()
     assert frame.mean(skipna=False).isna().all()
     assert_array_equal(frame.mean(axis=1, skipna=False).isna()[0:6],
@@ -493,7 +493,7 @@ def test_WeightedSeries_cov(frame):
     assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2)
     assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2)
 
-    frame['A'][0] = np.nan
+    frame.loc[0, 'A'] = np.nan
     assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2)
     assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2)