Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDF5 read/write functionality #303

Merged
merged 27 commits into from
Jun 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
3d8abb4
Added example hdf5 reader/writer
williamjameshandley Jun 21, 2023
f20f0fb
HDF5 functionality and tests
williamjameshandley Jun 21, 2023
89ec30e
Bumped version number
williamjameshandley Jun 21, 2023
247a5fa
Updated pyproject.toml
williamjameshandley Jun 21, 2023
309d4ad
Removed typing for python3.8
williamjameshandley Jun 21, 2023
d6c90af
Lint corrections
williamjameshandley Jun 21, 2023
09b4d01
Windows corrections
williamjameshandley Jun 21, 2023
0d051a8
Fixed problems with pandas documentation
williamjameshandley Jun 21, 2023
40efc04
Actually now making read_hdf documentation
williamjameshandley Jun 21, 2023
1f31c0d
Removed uncovered AttributeError
williamjameshandley Jun 23, 2023
bed2ad6
Updated the documentation
williamjameshandley Jun 28, 2023
df96bd8
Merge branch 'master' into hdf5_io
williamjameshandley Jun 29, 2023
a473b94
bumped version
williamjameshandley Jun 29, 2023
0a37c47
Merge branch 'master' into hdf5_io
williamjameshandley Jun 29, 2023
bb2ef05
Version bump
williamjameshandley Jun 29, 2023
9426d0b
Import read_hdf
williamjameshandley Jun 29, 2023
db6bc25
Updated docs for new location
williamjameshandley Jun 29, 2023
f432f69
Moved hdf functionality to anesthetic.read
williamjameshandley Jun 29, 2023
8747ccf
Updated documentation
williamjameshandley Jun 29, 2023
8643387
Merge branch 'master' into hdf5_io
williamjameshandley Jun 29, 2023
aa86347
Bumped version
williamjameshandley Jun 29, 2023
61e1f80
moved circular import back out again
williamjameshandley Jun 29, 2023
59008e6
Removed inheritance from anesthetic
williamjameshandley Jun 29, 2023
03e7ee4
fix hdf5 docstring thing
lukashergt Jun 29, 2023
c0acee4
Adjusted pandas.hdf to anesthetic.read_hdf
williamjameshandley Jun 29, 2023
ac6aea0
Merge branch 'hdf5_io' of github.com:handley-lab/anesthetic into hdf5_io
williamjameshandley Jun 29, 2023
3c8fd73
Added back in class
williamjameshandley Jun 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
anesthetic: nested sampling post-processing
===========================================
:Authors: Will Handley and Lukas Hergt
:Version: 2.0.0-beta.43
:Version: 2.0.0-beta.44
:Homepage: https://github.com/handley-lab/anesthetic
:Documentation: http://anesthetic.readthedocs.io/

Expand Down
2 changes: 2 additions & 0 deletions anesthetic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import anesthetic.samples
import anesthetic.plot
import anesthetic.read.chain
import anesthetic.read.hdf

import pandas
import pandas.plotting._core
Expand Down Expand Up @@ -47,4 +48,5 @@ def wrapper(backend=None):
make_2d_axes = anesthetic.plot.make_2d_axes
make_1d_axes = anesthetic.plot.make_1d_axes

read_hdf = anesthetic.read.hdf.read_hdf
read_chains = anesthetic.read.chain.read_chains
2 changes: 1 addition & 1 deletion anesthetic/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2.0.0b43'
__version__ = '2.0.0b44'
67 changes: 67 additions & 0 deletions anesthetic/read/hdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Anesthetic overwrites for pandas hdf functionality."""
from pandas import HDFStore as _HDFStore
from pandas.io.pytables import to_hdf as _to_hdf, read_hdf as _read_hdf
from anesthetic.utils import adjust_docstrings
from anesthetic.samples import NestedSamples, MCMCSamples, Samples


class HDFStore(_HDFStore): # noqa: D101
anesthetic_types = {x.__name__: x
for x in [NestedSamples, MCMCSamples, Samples]}

def get(self, key, *args, **kwargs): # noqa: D102
storer = self.get_storer(key)
anesthetic_type = storer.attrs.anesthetic_type
anesthetic_type = self.anesthetic_types[anesthetic_type]
value = super().get(key, *args, **kwargs)
value = anesthetic_type(value)
_metadata = storer.attrs._metadata.keys()
value._metadata = list(_metadata)
for k, v in storer.attrs._metadata.items():
setattr(value, k, v)
return value

def put(self, key, value, *args, **kwargs): # noqa: D102
from anesthetic import __version__
super().put(key, value, *args, **kwargs)
storer = self.get_storer(key)
storer.attrs._metadata = {
k: getattr(value, k)
for k in value._metadata
}
storer.attrs.anesthetic_type = type(value).__name__
storer.attrs.anesthetic_version = __version__

def select(self, key, *args, **kwargs): # noqa: D102
storer = self.get_storer(key)
anesthetic_type = storer.attrs.anesthetic_type
anesthetic_type = self.anesthetic_types[anesthetic_type]
value = super().select(key, *args, **kwargs)
value = anesthetic_type(value)
_metadata = storer.attrs._metadata.keys()
value._metadata = list(_metadata)
for k, v in storer.attrs._metadata.items():
setattr(value, k, v)
return value


def to_hdf(path_or_buf, key, value, mode="a", complevel=None, complib=None,
*args, **kwargs): # noqa: D103

store = HDFStore(path_or_buf, mode=mode, complevel=complevel,
complib=complib)
store.__fspath__ = lambda: store
return _to_hdf(store, key, value, *args, **kwargs)


def read_hdf(path_or_buf, *args, **kwargs): # noqa: D103
store = HDFStore(path_or_buf)
return _read_hdf(store, *args, **kwargs)


to_hdf.__doc__ = _to_hdf.__doc__
read_hdf.__doc__ = _read_hdf.__doc__
adjust_docstrings(read_hdf, 'read_hdf', 'anesthetic.read_hdf')
adjust_docstrings(read_hdf, 'DataFrame', 'pandas.DataFrame')
adjust_docstrings(read_hdf, ':func:`open`', '`open`')
adjust_docstrings(read_hdf, ':class:`pandas.HDFStore`', '`pandas.HDFStore`')
13 changes: 13 additions & 0 deletions anesthetic/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pandas.core.accessor import CachedAccessor
from anesthetic.plot import (make_1d_axes, make_2d_axes,
AxesSeries, AxesDataFrame)
from anesthetic.utils import adjust_docstrings
import anesthetic.weighted_pandas
from anesthetic.plotting import PlotAccessor
anesthetic.weighted_pandas._WeightedObject.plot =\
Expand Down Expand Up @@ -483,6 +484,11 @@ def tex(self):
"tex = samples.get_label(label) # anesthetic 2.0"
)

def to_hdf(self, path_or_buf, key, *args, **kwargs): # noqa: D102
import anesthetic.read.hdf
return anesthetic.read.hdf.to_hdf(path_or_buf, key, self,
*args, **kwargs)


class MCMCSamples(Samples):
"""Storage and plotting tools for MCMC samples.
Expand Down Expand Up @@ -1338,3 +1344,10 @@ def merge_samples_weighted(samples, weights=None, label=None):
new_samples.label = label

return new_samples


adjust_docstrings(Samples.to_hdf, r'(pd|pandas)\.DataFrame', 'DataFrame')
adjust_docstrings(Samples.to_hdf, 'DataFrame', 'pandas.DataFrame')
adjust_docstrings(Samples.to_hdf, r'(pd|pandas)\.read_hdf', 'read_hdf')
adjust_docstrings(Samples.to_hdf, 'read_hdf', 'pandas.read_hdf')
adjust_docstrings(Samples.to_hdf, ':func:`open`', '`open`')
13 changes: 13 additions & 0 deletions anesthetic/testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""Anesthetic testing utilities."""
import pandas.testing
import numpy.testing


def assert_frame_equal(left, right, *args, **kwargs):
"""Assert frames are equal, including metadata."""
check_metadata = kwargs.pop('check_metadata', True)
pandas.testing.assert_frame_equal(left, right, *args, **kwargs)
numpy.testing.assert_array_equal(left._metadata, right._metadata)
if check_metadata:
for key in left._metadata:
assert getattr(left, key) == getattr(right, key)
20 changes: 13 additions & 7 deletions anesthetic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ def temporary_seed(seed):
np.random.set_state(state)


def adjust_docstrings(cls, pattern, repl, *args, **kwargs):
def adjust_docstrings(obj, pattern, repl, *args, **kwargs):
"""Adjust the docstrings of a class using regular expressions.
After the first argument, the remaining arguments are identical to re.sub.
Expand All @@ -598,11 +598,17 @@ class to adjust
repl : str
replacement string
"""
for key, val in cls.__dict__.items():
doc = inspect.getdoc(val)
if inspect.isclass(obj):
for key, val in obj.__dict__.items():
doc = inspect.getdoc(val)
if doc is not None:
newdoc = re.sub(pattern, repl, doc, *args, **kwargs)
try:
obj.__dict__[key].__doc__ = newdoc
except AttributeError:
pass
else:
doc = inspect.getdoc(obj)
if doc is not None:
newdoc = re.sub(pattern, repl, doc, *args, **kwargs)
try:
cls.__dict__[key].__doc__ = newdoc
except AttributeError:
pass
obj.__doc__ = newdoc
8 changes: 8 additions & 0 deletions docs/source/anesthetic.read.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ anesthetic.read.getdist module
:show-inheritance:


anesthetic.read.hdf module
--------------------------

.. automodule:: anesthetic.read.hdf
:members:
:undoc-members:


anesthetic.read.multinest module
--------------------------------

Expand Down
9 changes: 9 additions & 0 deletions docs/source/anesthetic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ anesthetic.scripts module
:show-inheritance:


anesthetic.testing module
~~~~~~~~~~~~~~~~~~~~~~~~~

.. automodule:: anesthetic.testing
:members:
:undoc-members:
:show-inheritance:


anesthetic.utils module
~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
24 changes: 14 additions & 10 deletions docs/source/reading_writing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,17 @@ or ``parquet`` files for reading and writing.
:meth:`pandas.DataFrame.to_csv` for the various options of saving the data
(e.g. choosing the delimiter etc.).

* ``samples.to_parquet("filename.parquet")``: When reading and writing speed is
an issue, we recommend using the ``parquet`` file format, which should be
* ``samples.to_hdf("filename.h5", "samples")``: When reading and writing speed
is an issue, we recommend using the ``hdf5`` file format, which should be
faster than ``to_csv`` while still capable of handling the
:class:`pandas.MultiIndex` format. Check out
:meth:`pandas.DataFrame.to_parquet` for more information.
:class:`pandas.MultiIndex` format.


Loading ``NestedSamples`` or ``MCMCSamples``
============================================

When loading in previously saved samples, make sure to use the appropriate
class: ``Samples``, ``MCMCSamples``, or ``NestedSamples``.
When loading in previously saved samples from csv, make sure to use the
appropriate class: ``Samples``, ``MCMCSamples``, or ``NestedSamples``.

* ``read_csv``:

Expand All @@ -96,13 +95,18 @@ class: ``Samples``, ``MCMCSamples``, or ``NestedSamples``.
from anesthetic import Samples # or MCMCSamples, or NestedSamples
samples = Samples(read_csv("filename.csv"))

* ``read_parquet``:
When loading in previously saved samples from hdf5, make sure to import the
``anesthetic.read_hdf`` function, and not the ``pandas.read_hdf`` version. If
you forget to do this, the samples will be read in as a ``DataFrame``, with a
consequent loss of functionality


* ``read_hdf``:

::
from pandas import read_parquet
from anesthetic import Samples # or MCMCSamples, or NestedSamples
samples = Samples(read_parquet("filename.parquet"))
from anesthetic import read_hdf
samples = read_hdf("filename.h5", "samples")


Converting to GetDist
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ test = ["pytest", "pytest-cov", "flake8", "pydocstyle", "packaging", "pre-commit
astropy = ["astropy"]
fastkde = ["fastkde"]
getdist = ["getdist"]
all = ["astropy", "fastkde", "getdist"]
hdf5 = ["tables"]
all = ["astropy", "fastkde", "getdist", "tables"]

[project.scripts]
anesthetic = "anesthetic.scripts:gui"
Expand Down
32 changes: 30 additions & 2 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
import pytest
import numpy as np
from numpy.testing import assert_array_equal, assert_array_almost_equal
from pandas.testing import assert_frame_equal
from anesthetic.testing import assert_frame_equal
from anesthetic import MCMCSamples, NestedSamples
from anesthetic import read_chains
from anesthetic.read.polychord import read_polychord
from anesthetic.read.getdist import read_getdist
from anesthetic.read.cobaya import read_cobaya
from anesthetic.read.multinest import read_multinest
import pandas._testing as tm
from anesthetic.read.hdf import HDFStore, read_hdf
try:
import getdist
except ImportError:
Expand Down Expand Up @@ -232,4 +233,31 @@ def test_read_fail():
def test_regex_escape():
mcmc_1 = read_chains('./tests/example_data/gd_single+X')
mcmc_2 = read_chains('./tests/example_data/gd_single')
assert_frame_equal(mcmc_1, mcmc_2)
assert_frame_equal(mcmc_1, mcmc_2, check_metadata=False)


@pytest.mark.parametrize('root', ['pc', 'gd'])
@pytest.mark.xfail('tables' not in sys.modules,
raises=ImportError,
reason="requires tables package")
def test_hdf5(root):
samples = read_chains('./tests/example_data/' + root)
filename = 'test_hdf5.h5'
key = "samples"

with HDFStore(filename) as store:
store[key] = samples

with HDFStore(filename) as store:
assert_frame_equal(samples, store[key])
assert type(store[key]) == type(samples)

samples.to_hdf(filename, key)

with HDFStore(filename) as store:
assert_frame_equal(samples, store[key])
assert type(store[key]) == type(samples)

samples_ = read_hdf(filename, key)
assert_frame_equal(samples_, samples)
assert type(samples_) == type(samples)