-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HDF5 read/write functionality #303
Changes from 15 commits
3d8abb4
f20f0fb
89ec30e
247a5fa
309d4ad
d6c90af
09b4d01
0d051a8
40efc04
1f31c0d
bed2ad6
df96bd8
a473b94
0a37c47
bb2ef05
9426d0b
db6bc25
f432f69
8747ccf
8643387
aa86347
61e1f80
59008e6
03e7ee4
c0acee4
ac6aea0
3c8fd73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__version__ = '2.0.0b42' | ||
__version__ = '2.0.0b43' |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
"""Anesthetic overwrites for pandas hdf functionality.""" | ||
from pandas import HDFStore as _HDFStore | ||
from pandas.io.pytables import to_hdf as _to_hdf, read_hdf as _read_hdf | ||
from anesthetic import __version__ | ||
from anesthetic.samples import NestedSamples, MCMCSamples, Samples | ||
from anesthetic.utils import adjust_docstrings | ||
|
||
|
||
class HDFStore(_HDFStore): # noqa: D101 | ||
anesthetic_types = {x.__name__: x | ||
for x in [NestedSamples, MCMCSamples, Samples]} | ||
|
||
def get(self, key, *args, **kwargs): # noqa: D102 | ||
storer = self.get_storer(key) | ||
anesthetic_type = storer.attrs.anesthetic_type | ||
anesthetic_type = self.anesthetic_types[anesthetic_type] | ||
value = super().get(key, *args, **kwargs) | ||
value = anesthetic_type(value) | ||
_metadata = storer.attrs._metadata.keys() | ||
value._metadata = list(_metadata) | ||
for k, v in storer.attrs._metadata.items(): | ||
setattr(value, k, v) | ||
return value | ||
|
||
def put(self, key, value, *args, **kwargs): # noqa: D102 | ||
super().put(key, value, *args, **kwargs) | ||
storer = self.get_storer(key) | ||
storer.attrs._metadata = { | ||
k: getattr(value, k) | ||
for k in value._metadata | ||
} | ||
storer.attrs.anesthetic_type = type(value).__name__ | ||
storer.attrs.anesthetic_version = __version__ | ||
|
||
def select(self, key, *args, **kwargs): # noqa: D102 | ||
storer = self.get_storer(key) | ||
anesthetic_type = storer.attrs.anesthetic_type | ||
anesthetic_type = self.anesthetic_types[anesthetic_type] | ||
value = super().select(key, *args, **kwargs) | ||
value = anesthetic_type(value) | ||
_metadata = storer.attrs._metadata.keys() | ||
value._metadata = list(_metadata) | ||
for k, v in storer.attrs._metadata.items(): | ||
setattr(value, k, v) | ||
return value | ||
|
||
|
||
def to_hdf(path_or_buf, key, value, mode="a", complevel=None, complib=None, | ||
*args, **kwargs): # noqa: D103 | ||
|
||
store = HDFStore(path_or_buf, mode=mode, complevel=complevel, | ||
complib=complib) | ||
store.__fspath__ = lambda: store | ||
return _to_hdf(store, key, value, *args, **kwargs) | ||
|
||
|
||
def read_hdf(path_or_buf, *args, **kwargs): # noqa: D103 | ||
store = HDFStore(path_or_buf) | ||
return _read_hdf(store, *args, **kwargs) | ||
|
||
|
||
to_hdf.__doc__ = _to_hdf.__doc__ | ||
read_hdf.__doc__ = _read_hdf.__doc__ | ||
adjust_docstrings(read_hdf, 'read_hdf', 'pandas.read_hdf') | ||
adjust_docstrings(read_hdf, 'DataFrame', 'pandas.DataFrame') | ||
adjust_docstrings(read_hdf, ':func:`open`', '`open`') | ||
adjust_docstrings(read_hdf, ':class:`pandas.HDFStore`', '`pandas.HDFStore`') |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this file belong in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no -- in the same way that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
"""Anesthetic testing utilities.""" | ||
import pandas.testing | ||
import numpy.testing | ||
|
||
|
||
def assert_frame_equal(left, right, *args, **kwargs): | ||
"""Assert frames are equal, including metadata.""" | ||
check_metadata = kwargs.pop('check_metadata', True) | ||
pandas.testing.assert_frame_equal(left, right, *args, **kwargs) | ||
numpy.testing.assert_array_equal(left._metadata, right._metadata) | ||
if check_metadata: | ||
for key in left._metadata: | ||
assert getattr(left, key) == getattr(right, key) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -75,18 +75,17 @@ or ``parquet`` files for reading and writing. | |
:meth:`pandas.DataFrame.to_csv` for the various options of saving the data | ||
(e.g. choosing the delimiter etc.). | ||
|
||
* ``samples.to_parquet("filename.parquet")``: When reading and writing speed is | ||
an issue, we recommend using the ``parquet`` file format, which should be | ||
* ``samples.to_hdf("filename.h5", "samples")``: When reading and writing speed | ||
is an issue, we recommend using the ``hdf5`` file format, which should be | ||
faster than ``to_csv`` while still capable of handling the | ||
:class:`pandas.MultiIndex` format. Check out | ||
:meth:`pandas.DataFrame.to_parquet` for more information. | ||
:class:`pandas.MultiIndex` format. | ||
|
||
|
||
Loading ``NestedSamples`` or ``MCMCSamples`` | ||
============================================ | ||
|
||
When loading in previously saved samples, make sure to use the appropriate | ||
class: ``Samples``, ``MCMCSamples``, or ``NestedSamples``. | ||
When loading in previously saved samples from csv, make sure to use the | ||
appropriate class: ``Samples``, ``MCMCSamples``, or ``NestedSamples``. | ||
|
||
* ``read_csv``: | ||
|
||
|
@@ -96,13 +95,18 @@ class: ``Samples``, ``MCMCSamples``, or ``NestedSamples``. | |
from anesthetic import Samples # or MCMCSamples, or NestedSamples | ||
samples = Samples(read_csv("filename.csv")) | ||
|
||
* ``read_parquet``: | ||
When loading in previously saved samples from hdf5, make sure to import the | ||
``anesthetic.io.read_hdf`` function, and not the ``pandas.read_hdf`` version. If | ||
you forget to do this, the samples will be read in as a ``DataFrame``, with a | ||
consequent loss of functionality | ||
|
||
|
||
* ``read_hdf``: | ||
|
||
:: | ||
|
||
from pandas import read_parquet | ||
from anesthetic import Samples # or MCMCSamples, or NestedSamples | ||
samples = Samples(read_parquet("filename.parquet")) | ||
from anesthetic.io import read_hdf | ||
samples = read_hdf("filename.h5", "samples") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the interest of being similar to both from anesthetic import read_hdf There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done in 9426d0b |
||
|
||
|
||
Converting to GetDist | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this file belongs in the
read
folder...?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done in f432f69
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
although now since we have
to_hdf
andfrom_hdf
, it should really be theio
folder rather than theread
folder...There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we want to make that change? That turns this into a 2.0.0 milestone, but that is what we are aiming for anyhow, right?