Skip to content

Commit

Permalink
HDF5 read/write functionality (#303)
Browse files Browse the repository at this point in the history
* Added example hdf5 reader/writer

* HDF5 functionality and tests

* Bumped version number

* Updated pyproject.toml

* Removed typing for python3.8

* Lint corrections

* Windows corrections

* Fixed problems with pandas documentation

* Actually now making read_hdf documentation

* Removed uncovered AttributeError

* Updated the documentation

* bumped version

* Version bump

* Import read_hdf

* Updated docs for new location

* Moved hdf functionality to anesthetic.read

* Updated documentation

* Bumped version

* moved circular import back out again

* Removed inheritance from anesthetic

* fix hdf5 docstring thing

* Adjusted pandas.hdf to anesthetic.read_hdf

* Added back in class

---------

Co-authored-by: lukashergt <[email protected]>
  • Loading branch information
williamjameshandley and lukashergt authored Jun 29, 2023
1 parent 61bed43 commit 59af500
Show file tree
Hide file tree
Showing 12 changed files with 173 additions and 22 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
anesthetic: nested sampling post-processing
===========================================
:Authors: Will Handley and Lukas Hergt
:Version: 2.0.0-beta.43
:Version: 2.0.0-beta.44
:Homepage: https://github.com/handley-lab/anesthetic
:Documentation: http://anesthetic.readthedocs.io/

Expand Down
2 changes: 2 additions & 0 deletions anesthetic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import anesthetic.samples
import anesthetic.plot
import anesthetic.read.chain
import anesthetic.read.hdf

import pandas
import pandas.plotting._core
Expand Down Expand Up @@ -47,4 +48,5 @@ def wrapper(backend=None):
make_2d_axes = anesthetic.plot.make_2d_axes
make_1d_axes = anesthetic.plot.make_1d_axes

read_hdf = anesthetic.read.hdf.read_hdf
read_chains = anesthetic.read.chain.read_chains
2 changes: 1 addition & 1 deletion anesthetic/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2.0.0b43'
__version__ = '2.0.0b44'
67 changes: 67 additions & 0 deletions anesthetic/read/hdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Anesthetic overwrites for pandas hdf functionality."""
from pandas import HDFStore as _HDFStore
from pandas.io.pytables import to_hdf as _to_hdf, read_hdf as _read_hdf
from anesthetic.utils import adjust_docstrings
from anesthetic.samples import NestedSamples, MCMCSamples, Samples


class HDFStore(_HDFStore): # noqa: D101
anesthetic_types = {x.__name__: x
for x in [NestedSamples, MCMCSamples, Samples]}

def get(self, key, *args, **kwargs): # noqa: D102
storer = self.get_storer(key)
anesthetic_type = storer.attrs.anesthetic_type
anesthetic_type = self.anesthetic_types[anesthetic_type]
value = super().get(key, *args, **kwargs)
value = anesthetic_type(value)
_metadata = storer.attrs._metadata.keys()
value._metadata = list(_metadata)
for k, v in storer.attrs._metadata.items():
setattr(value, k, v)
return value

def put(self, key, value, *args, **kwargs): # noqa: D102
from anesthetic import __version__
super().put(key, value, *args, **kwargs)
storer = self.get_storer(key)
storer.attrs._metadata = {
k: getattr(value, k)
for k in value._metadata
}
storer.attrs.anesthetic_type = type(value).__name__
storer.attrs.anesthetic_version = __version__

def select(self, key, *args, **kwargs): # noqa: D102
storer = self.get_storer(key)
anesthetic_type = storer.attrs.anesthetic_type
anesthetic_type = self.anesthetic_types[anesthetic_type]
value = super().select(key, *args, **kwargs)
value = anesthetic_type(value)
_metadata = storer.attrs._metadata.keys()
value._metadata = list(_metadata)
for k, v in storer.attrs._metadata.items():
setattr(value, k, v)
return value


def to_hdf(path_or_buf, key, value, mode="a", complevel=None, complib=None,
*args, **kwargs): # noqa: D103

store = HDFStore(path_or_buf, mode=mode, complevel=complevel,
complib=complib)
store.__fspath__ = lambda: store
return _to_hdf(store, key, value, *args, **kwargs)


def read_hdf(path_or_buf, *args, **kwargs): # noqa: D103
store = HDFStore(path_or_buf)
return _read_hdf(store, *args, **kwargs)


to_hdf.__doc__ = _to_hdf.__doc__
read_hdf.__doc__ = _read_hdf.__doc__
adjust_docstrings(read_hdf, 'read_hdf', 'anesthetic.read_hdf')
adjust_docstrings(read_hdf, 'DataFrame', 'pandas.DataFrame')
adjust_docstrings(read_hdf, ':func:`open`', '`open`')
adjust_docstrings(read_hdf, ':class:`pandas.HDFStore`', '`pandas.HDFStore`')
13 changes: 13 additions & 0 deletions anesthetic/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pandas.core.accessor import CachedAccessor
from anesthetic.plot import (make_1d_axes, make_2d_axes,
AxesSeries, AxesDataFrame)
from anesthetic.utils import adjust_docstrings
import anesthetic.weighted_pandas
from anesthetic.plotting import PlotAccessor
anesthetic.weighted_pandas._WeightedObject.plot =\
Expand Down Expand Up @@ -483,6 +484,11 @@ def tex(self):
"tex = samples.get_label(label) # anesthetic 2.0"
)

def to_hdf(self, path_or_buf, key, *args, **kwargs): # noqa: D102
import anesthetic.read.hdf
return anesthetic.read.hdf.to_hdf(path_or_buf, key, self,
*args, **kwargs)


class MCMCSamples(Samples):
"""Storage and plotting tools for MCMC samples.
Expand Down Expand Up @@ -1338,3 +1344,10 @@ def merge_samples_weighted(samples, weights=None, label=None):
new_samples.label = label

return new_samples


adjust_docstrings(Samples.to_hdf, r'(pd|pandas)\.DataFrame', 'DataFrame')
adjust_docstrings(Samples.to_hdf, 'DataFrame', 'pandas.DataFrame')
adjust_docstrings(Samples.to_hdf, r'(pd|pandas)\.read_hdf', 'read_hdf')
adjust_docstrings(Samples.to_hdf, 'read_hdf', 'pandas.read_hdf')
adjust_docstrings(Samples.to_hdf, ':func:`open`', '`open`')
13 changes: 13 additions & 0 deletions anesthetic/testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""Anesthetic testing utilities."""
import pandas.testing
import numpy.testing


def assert_frame_equal(left, right, *args, **kwargs):
"""Assert frames are equal, including metadata."""
check_metadata = kwargs.pop('check_metadata', True)
pandas.testing.assert_frame_equal(left, right, *args, **kwargs)
numpy.testing.assert_array_equal(left._metadata, right._metadata)
if check_metadata:
for key in left._metadata:
assert getattr(left, key) == getattr(right, key)
20 changes: 13 additions & 7 deletions anesthetic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ def temporary_seed(seed):
np.random.set_state(state)


def adjust_docstrings(cls, pattern, repl, *args, **kwargs):
def adjust_docstrings(obj, pattern, repl, *args, **kwargs):
"""Adjust the docstrings of a class using regular expressions.
After the first argument, the remaining arguments are identical to re.sub.
Expand All @@ -598,11 +598,17 @@ class to adjust
repl : str
replacement string
"""
for key, val in cls.__dict__.items():
doc = inspect.getdoc(val)
if inspect.isclass(obj):
for key, val in obj.__dict__.items():
doc = inspect.getdoc(val)
if doc is not None:
newdoc = re.sub(pattern, repl, doc, *args, **kwargs)
try:
obj.__dict__[key].__doc__ = newdoc
except AttributeError:
pass
else:
doc = inspect.getdoc(obj)
if doc is not None:
newdoc = re.sub(pattern, repl, doc, *args, **kwargs)
try:
cls.__dict__[key].__doc__ = newdoc
except AttributeError:
pass
obj.__doc__ = newdoc
8 changes: 8 additions & 0 deletions docs/source/anesthetic.read.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ anesthetic.read.getdist module
:show-inheritance:


anesthetic.read.hdf module
--------------------------

.. automodule:: anesthetic.read.hdf
:members:
:undoc-members:


anesthetic.read.multinest module
--------------------------------

Expand Down
9 changes: 9 additions & 0 deletions docs/source/anesthetic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ anesthetic.scripts module
:show-inheritance:


anesthetic.testing module
~~~~~~~~~~~~~~~~~~~~~~~~~

.. automodule:: anesthetic.testing
:members:
:undoc-members:
:show-inheritance:


anesthetic.utils module
~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
24 changes: 14 additions & 10 deletions docs/source/reading_writing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,17 @@ or ``parquet`` files for reading and writing.
:meth:`pandas.DataFrame.to_csv` for the various options of saving the data
(e.g. choosing the delimiter etc.).

* ``samples.to_parquet("filename.parquet")``: When reading and writing speed is
an issue, we recommend using the ``parquet`` file format, which should be
* ``samples.to_hdf("filename.h5", "samples")``: When reading and writing speed
is an issue, we recommend using the ``hdf5`` file format, which should be
faster than ``to_csv`` while still capable of handling the
:class:`pandas.MultiIndex` format. Check out
:meth:`pandas.DataFrame.to_parquet` for more information.
:class:`pandas.MultiIndex` format.


Loading ``NestedSamples`` or ``MCMCSamples``
============================================

When loading in previously saved samples, make sure to use the appropriate
class: ``Samples``, ``MCMCSamples``, or ``NestedSamples``.
When loading in previously saved samples from csv, make sure to use the
appropriate class: ``Samples``, ``MCMCSamples``, or ``NestedSamples``.

* ``read_csv``:

Expand All @@ -96,13 +95,18 @@ class: ``Samples``, ``MCMCSamples``, or ``NestedSamples``.
from anesthetic import Samples # or MCMCSamples, or NestedSamples
samples = Samples(read_csv("filename.csv"))

* ``read_parquet``:
When loading in previously saved samples from hdf5, make sure to import the
``anesthetic.read_hdf`` function, and not the ``pandas.read_hdf`` version. If
you forget to do this, the samples will be read in as a ``DataFrame``, with a
consequent loss of functionality


* ``read_hdf``:

::
from pandas import read_parquet
from anesthetic import Samples # or MCMCSamples, or NestedSamples
samples = Samples(read_parquet("filename.parquet"))
from anesthetic import read_hdf
samples = read_hdf("filename.h5", "samples")


Converting to GetDist
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ test = ["pytest", "pytest-cov", "flake8", "pydocstyle", "packaging", "pre-commit
astropy = ["astropy"]
fastkde = ["fastkde"]
getdist = ["getdist"]
all = ["astropy", "fastkde", "getdist"]
hdf5 = ["tables"]
all = ["astropy", "fastkde", "getdist", "tables"]

[project.scripts]
anesthetic = "anesthetic.scripts:gui"
Expand Down
32 changes: 30 additions & 2 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
import pytest
import numpy as np
from numpy.testing import assert_array_equal, assert_array_almost_equal
from pandas.testing import assert_frame_equal
from anesthetic.testing import assert_frame_equal
from anesthetic import MCMCSamples, NestedSamples
from anesthetic import read_chains
from anesthetic.read.polychord import read_polychord
from anesthetic.read.getdist import read_getdist
from anesthetic.read.cobaya import read_cobaya
from anesthetic.read.multinest import read_multinest
import pandas._testing as tm
from anesthetic.read.hdf import HDFStore, read_hdf
try:
import getdist
except ImportError:
Expand Down Expand Up @@ -232,4 +233,31 @@ def test_read_fail():
def test_regex_escape():
mcmc_1 = read_chains('./tests/example_data/gd_single+X')
mcmc_2 = read_chains('./tests/example_data/gd_single')
assert_frame_equal(mcmc_1, mcmc_2)
assert_frame_equal(mcmc_1, mcmc_2, check_metadata=False)


@pytest.mark.parametrize('root', ['pc', 'gd'])
@pytest.mark.xfail('tables' not in sys.modules,
raises=ImportError,
reason="requires tables package")
def test_hdf5(root):
samples = read_chains('./tests/example_data/' + root)
filename = 'test_hdf5.h5'
key = "samples"

with HDFStore(filename) as store:
store[key] = samples

with HDFStore(filename) as store:
assert_frame_equal(samples, store[key])
assert type(store[key]) == type(samples)

samples.to_hdf(filename, key)

with HDFStore(filename) as store:
assert_frame_equal(samples, store[key])
assert type(store[key]) == type(samples)

samples_ = read_hdf(filename, key)
assert_frame_equal(samples_, samples)
assert type(samples_) == type(samples)

0 comments on commit 59af500

Please sign in to comment.