Skip to content

Commit

Permalink
Pandas 2.2.0 (handley-lab#359)
Browse files Browse the repository at this point in the history
* fix boxplot closures (maybe_color_bp changed scope)

* _args_adjust no longer a thing (still need to deal with hists

* remove convert_period from _get_xticks

* add fig to PlanePlot2D._make_plot()

* range is now self._bin_range, and convert data to numeric first (not quite happy with the latter).

* update pandas requirement

* bump version

* fix MultiIndex.format deprecation

* address warnings in plot.py (no idea why these specific ones are the problem???)

* Revert "address warnings in plot.py (no idea why these specific ones are the problem???)"

This reverts commit 1f9857b.

* fix chained assignment warnings

* don't include groups in chains.apply in remove_burn_in

* replace grouper with _grouper

* more chained assignment warning fixes

* fix Index.format deprecation

* bump version to 2.7.1

* remove unused exception handling from HistPlot._calculate_bins

* remember loc to fix covariance coverage (cov cov lol)

* remove pandas 1.5-specific documentation fix

* use sphinx-autodoc-typehints to fix PlotAccessor documentation

* bump version to 2.7.4

* add test for `range=None` in `hist_plot_1d` as this is no longer covered after changes to defaults in pandas

* consistent diagonal

* try whether `auto-update-conda` makes conda CI work better

* make conda install `pandas~=2.2.0`

* Lukas' tidy suggestion for Hist1dPlot._calculate_bins()

* remove unused import

* I can only apologise, I'd forgotten to reinstall the pre-commit hooks after cloning a fresh repo

---------

Co-authored-by: lukashergt <[email protected]>
  • Loading branch information
AdamOrmondroyd and lukashergt authored Mar 16, 2024
1 parent 54170d8 commit cb60b9a
Show file tree
Hide file tree
Showing 14 changed files with 63 additions and 70 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/CI.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ jobs:
run: |
conda config --append channels conda-forge
conda install pytest pytest-cov
conda install scipy numpy 'matplotlib>=3.6.1,<3.9.0' 'pandas>=2.0.0,<2.2.0'
conda install scipy numpy 'matplotlib>=3.6.1,<3.9.0' 'pandas~=2.2.0'
- name: Test with pytest
shell: bash -l {0}
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
anesthetic: nested sampling post-processing
===========================================
:Authors: Will Handley and Lukas Hergt
:Version: 2.8.1
:Version: 2.8.2
:Homepage: https://github.com/handley-lab/anesthetic
:Documentation: http://anesthetic.readthedocs.io/

Expand Down
5 changes: 2 additions & 3 deletions anesthetic/_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ class _DataFrameFormatter(DataFrameFormatter):
def _get_formatted_column_labels(self, frame):
columns = frame.columns
if isinstance(columns, MultiIndex):
fmt_columns = columns.format(sparsify=False, adjoin=False)
fmt_columns = list(zip(*fmt_columns))
fmt_columns = [tuple(str(c) for c in column) for column in columns]
dtypes = self.frame.dtypes._values

# if we have a Float level, they don't use leading space at all
Expand All @@ -37,7 +36,7 @@ def space_format(x, y):
str_columns = [list(x) for x in zip(*str_columns)]
str_columns = [_make_fixed_width(x) for x in str_columns]
else:
fmt_columns = columns.format()
fmt_columns = [str(x) for x in columns]
dtypes = self.frame.dtypes
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
str_columns = [
Expand Down
2 changes: 1 addition & 1 deletion anesthetic/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2.8.1'
__version__ = '2.8.2'
10 changes: 5 additions & 5 deletions anesthetic/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,12 @@ def _position_frame(index, columns, lower, diagonal, upper):
for i, x in enumerate(columns):
if all_params.index(x) < all_params.index(y):
if lower:
position[x][y] = -1
position.loc[y, x] = -1
elif all_params.index(x) > all_params.index(y):
if upper:
position[x][y] = +1
position.loc[y, x] = +1
elif diagonal:
position[x][y] = 0
position.loc[y, x] = 0
return position

@classmethod
Expand All @@ -277,15 +277,15 @@ def _axes_frame(cls, position, fig, gridspec_kw=None, subplot_spec=None):
hspace=hspace, wspace=wspace,
subplot_spec=subplot_spec,
**gridspec_kw)
axes[:][:] = None
axes.loc[:, :] = None
for j, y in enumerate(axes.index[::-1]):
for i, x in enumerate(axes.columns):
if position[x][y] is not None:
sx = list(axes[x].dropna())
sx = sx[0] if sx else None
sy = list(axes.T[y].dropna())
sy = sy[0] if sy else None
axes[x][y] = fig.add_subplot(
axes.loc[y, x] = fig.add_subplot(
gs[axes.index.size - 1 - j, i], sharex=sx, sharey=sy
)
if position[x][y] == 0:
Expand Down
7 changes: 4 additions & 3 deletions anesthetic/plotting/_matplotlib/boxplot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas.plotting._matplotlib.boxplot
from pandas.plotting._matplotlib.boxplot import BoxPlot as _BoxPlot
from pandas.plotting._matplotlib.boxplot import (BoxPlot as _BoxPlot,
maybe_color_bp)
from anesthetic.plotting._matplotlib.core import _WeightedMPLPlot, _get_weights
from anesthetic.utils import quantile
from pandas.core.dtypes.missing import remove_na_arraylike
Expand Down Expand Up @@ -56,9 +57,9 @@ def boxplot(data, *args, **kwds):

def create_plot_group():
fontsize = None # pragma: no cover
maybe_color_bp = None # pragma: no cover
return_type = None # pragma: no cover
rot = None # pragma: no cover
colors = None # pragma: no cover

def plot_group(keys, values, ax, **kwds): # pragma: no cover
# GH 45465: xlabel/ylabel need to be popped out before plotting
Expand Down Expand Up @@ -96,7 +97,7 @@ def plot_group(keys, values, ax, **kwds): # pragma: no cover
ax.set_xticklabels(keys, rotation=rot)
else:
ax.set_yticklabels(keys, rotation=rot)
maybe_color_bp(bp, **kwds)
maybe_color_bp(bp, color_tup=colors, **kwds)

# Return axes in multiplot case, maybe revisit later # 985
if return_type == "dict":
Expand Down
9 changes: 3 additions & 6 deletions anesthetic/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ def _get_index_name(self):
else:
return super()._get_index_name()

def _get_xticks(self, convert_period: bool = False):
def _get_xticks(self):
if isinstance(self.data, _WeightedObject):
return self.data.drop_weights().index._mpl_repr()
else:
return super()._get_xticks(convert_period)
return super()._get_xticks()


def _compress_weights(kwargs, data):
Expand All @@ -82,7 +82,7 @@ def __init__(self, data, x, y, s=None, c=None, **kwargs) -> None:

class _PlanePlot2d(PlanePlot):

def _make_plot(self):
def _make_plot(self, fig):
if self.colormap is not None:
self.kwds['cmap'] = plt.get_cmap(self.colormap)
colors = self._get_colors()
Expand All @@ -109,9 +109,6 @@ def _make_plot(self):
f"supported by {self._kind}")
self._plot(ax, x.values, y.values, **kwds)

def _args_adjust(self):
pass


class ScatterPlot2d(_CompressedMPLPlot, _PlanePlot2d):
# noqa: disable=D101
Expand Down
57 changes: 24 additions & 33 deletions anesthetic/plotting/_matplotlib/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,60 +22,56 @@
hist_plot_1d,
quantile_plot_interval,
)
from anesthetic.utils import quantile, histogram_bin_edges
from anesthetic.utils import histogram_bin_edges


class HistPlot(_WeightedMPLPlot, _HistPlot):

# noqa: disable=D101
def _args_adjust(self) -> None:
def _adjust_bins(self, bins) -> None:
if (
hasattr(self, 'bins') and
isinstance(self.bins, str) and
self.bins in ['fd', 'scott', 'sqrt']
isinstance(bins, str) and
bins in ['fd', 'scott', 'sqrt']
):
self.bins = self._calculate_bins(self.data)
super()._args_adjust()
bins = self._calculate_bins(self.data, bins)
return super()._adjust_bins(bins)

# noqa: disable=D101
def _calculate_bins(self, data):
def _calculate_bins(self, data, bins):
if self.logx:
data = np.log10(data)
if 'range' in self.kwds and self.kwds['range'] is not None:
xmin, xmax = self.kwds['range']
self.kwds['range'] = (np.log10(xmin), np.log10(xmax))
if self._bin_range is not None:
xmin, xmax = self._bin_range
self._bin_range = (np.log10(xmin), np.log10(xmax))
nd_values = data.infer_objects(copy=False)._get_numeric_data()
values = np.ravel(nd_values)
weights = self.kwds.get("weights", None)
weights = self.weights
if weights is not None:
try:
weights = np.broadcast_to(weights[:, None], nd_values.shape)
except ValueError:
pass
weights = np.broadcast_to(weights[:, None], nd_values.shape)
weights = np.ravel(weights)
weights = weights[~isna(values)]

values = values[~isna(values)]

if isinstance(self.bins, str) and self.bins in ['fd', 'scott', 'sqrt']:
if isinstance(bins, str) and bins in ['fd', 'scott', 'sqrt']:
bins = histogram_bin_edges(
values,
weights=weights,
bins=self.bins,
bins=bins,
beta=self.kwds.pop('beta', 'equal'),
range=self.kwds.get('range', None)
range=self._bin_range
)
else:
bins = np.histogram_bin_edges(
values,
weights=weights,
bins=self.bins,
range=self.kwds.get('range', None)
bins=bins,
range=self._bin_range
)
if self.logx:
bins = 10**bins
if 'range' in self.kwds and self.kwds['range'] is not None:
self.kwds['range'] = (xmin, xmax)
if self._bin_range is not None:
self._bin_range = (xmin, xmax)
return bins

def _get_colors(self, num_colors=None, color_kwds='color'):
Expand Down Expand Up @@ -180,18 +176,13 @@ def __init__(
) -> None:
super().__init__(data, bins=bins, bottom=bottom, **kwargs)

def _calculate_bins(self, data):
if 'range' not in self.kwds or self.kwds['range'] is None:
def _calculate_bins(self, data, bins):
if self._bin_range is None:
q = self.kwds.get('q', 5)
q = quantile_plot_interval(q=q)
weights = self.kwds.get('weights', None)
xmin = quantile(data, q[0], weights)
xmax = quantile(data, q[-1], weights)
self.kwds['range'] = (xmin, xmax)
bins = super()._calculate_bins(data)
self.kwds.pop('range')
else:
bins = super()._calculate_bins(data)
xmin, xmax = data.quantile(q).to_numpy().ravel()
self._bin_range = (xmin, xmax)
bins = super()._calculate_bins(data, bins)
return bins

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion anesthetic/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,8 @@ def remove_burn_in(self, burn_in, reset_index=False, inplace=False):
nsamples = chains.count().iloc[:, 0].to_numpy()
ndrop = ndrop * nsamples
ndrop = ndrop.astype(int)
data = self.drop(chains.apply(lambda g: g.head(ndrop[g.name-1])).index,
data = self.drop(chains.apply(lambda g: g.head(ndrop[g.name-1]),
include_groups=False).index,
inplace=inplace)
if reset_index:
data = data.reset_index(drop=True, inplace=inplace)
Expand Down
9 changes: 4 additions & 5 deletions anesthetic/weighted_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ def read_csv(filename, *args, **kwargs):
class WeightedGroupBy(GroupBy):
"""Weighted version of ``pandas.core.groupby.GroupBy``."""

grouper: ops.BaseGrouper
""":meta private:"""
_grouper: ops.BaseGrouper

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down Expand Up @@ -119,10 +118,10 @@ def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover
subset = self.obj
return WeightedDataFrameGroupBy(
subset,
self.grouper,
self._grouper,
axis=self.axis,
level=self.level,
grouper=self.grouper,
grouper=self._grouper,
exclusions=self.exclusions,
selection=key,
as_index=self.as_index,
Expand All @@ -138,7 +137,7 @@ def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover
return WeightedSeriesGroupBy(
subset,
level=self.level,
grouper=self.grouper,
grouper=self._grouper,
selection=key,
sort=self.sort,
group_keys=self.group_keys,
Expand Down
3 changes: 1 addition & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def get_version(short=False):
'sphinx.ext.githubpages',
'sphinx.ext.imgconverter',
'sphinx_copybutton',
'sphinx_autodoc_typehints',
'matplotlib.sphinxext.plot_directive',
'numpydoc',
]
Expand Down Expand Up @@ -98,8 +99,6 @@ def get_version(short=False):

autosummary_generate = True

nitpick_ignore = [('py:obj', 'pandas.core.groupby.SeriesGroupBy.sample')] # not currently included in pandas 1.5, but will in future

# -- Options for autosectionlabel------------------------------------------
autosectionlabel_prefix_document = True

Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ requires-python = ">=3.8"
dependencies = [
"scipy",
"numpy",
"pandas>=2.0.0,<2.2.0",
"pandas~=2.2.0",
"matplotlib>=3.6.1,<3.9.0",
]
classifiers = [
Expand All @@ -60,7 +60,8 @@ classifiers = [
"JOSS paper" = "https://joss.theoj.org/papers/10.21105/joss.01414"

[project.optional-dependencies]
docs = ["sphinx>=4.2.0", "sphinx_rtd_theme>=1.2.2", "sphinx-copybutton", "numpydoc"]
docs = ["sphinx>=4.2.0", "sphinx_rtd_theme>=1.2.2", "sphinx-copybutton",
"sphinx-autodoc-typehints", "numpydoc"]
test = ["pytest", "pytest-cov", "flake8", "pydocstyle", "packaging", "pre-commit"]
ultranest = ["h5py"]
astropy = ["astropy"]
Expand Down
5 changes: 5 additions & 0 deletions tests/test_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,11 @@ def test_logscale_hist_kwargs(b):
assert amax < 0.5
assert edges[0] < 1e-3
assert edges[-1] > 1e3
h, edges, _ = hist_plot_1d(ax, data, bins=b, range=None)
amax = abs(np.log10(edges[np.argmax(h)]))
assert amax < 0.5
assert edges[0] < 1e-3
assert edges[-1] > 1e3
h, edges, _ = hist_plot_1d(ax, data, bins=b, range=(1e-3, 1e3))
amax = abs(np.log10(edges[np.argmax(h)]))
assert amax < 0.5
Expand Down
14 changes: 7 additions & 7 deletions tests/test_weighted_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ def test_WeightedDataFrame_compress(frame):


def test_WeightedDataFrame_nan(frame):
frame['A'][0] = np.nan
frame.loc[0, 'A'] = np.nan
assert ~frame.mean().isna().any()
assert ~frame.mean(axis=1).isna().any()
assert_array_equal(frame.mean(skipna=False).isna(), [True] + [False]*5)
Expand All @@ -422,7 +422,7 @@ def test_WeightedDataFrame_nan(frame):
assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6],
[True, False, False, False, False, False])

frame['B'][2] = np.nan
frame.loc[2, 'B'] = np.nan
assert ~frame.mean().isna().any()
assert_array_equal(frame.mean(skipna=False).isna(),
[True, True] + [False]*4)
Expand All @@ -435,10 +435,10 @@ def test_WeightedDataFrame_nan(frame):
assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6],
[True, False, True, False, False, False])

frame['C'][4] = np.nan
frame['D'][5] = np.nan
frame['E'][6] = np.nan
frame['F'][7] = np.nan
frame.loc[4, 'C'] = np.nan
frame.loc[5, 'D'] = np.nan
frame.loc[6, 'E'] = np.nan
frame.loc[7, 'F'] = np.nan
assert ~frame.mean().isna().any()
assert frame.mean(skipna=False).isna().all()
assert_array_equal(frame.mean(axis=1, skipna=False).isna()[0:6],
Expand Down Expand Up @@ -493,7 +493,7 @@ def test_WeightedSeries_cov(frame):
assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2)
assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2)

frame['A'][0] = np.nan
frame.loc[0, 'A'] = np.nan
assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2)
assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2)

Expand Down

0 comments on commit cb60b9a

Please sign in to comment.