-
-
Notifications
You must be signed in to change notification settings - Fork 409
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[TEP014] Added HDFWriter class + Unit Tests #744
Changes from 2 commits
c321b41
6f651d8
38b6bfd
e0bccaf
31fcc90
5b8de44
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
import os | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import pandas.util.testing as pdt | ||
import pytest | ||
from astropy import units as u | ||
from astropy.tests.helper import assert_quantity_allclose | ||
from numpy.testing import assert_almost_equal, assert_array_almost_equal | ||
|
||
from tardis.io.util import HDFWriter | ||
|
||
|
||
#Test Cases | ||
|
||
#DataFrame | ||
#None | ||
#Numpy Arrays | ||
#Strings | ||
#Numeric Values | ||
#Pandas Series Object | ||
#MultiIndex Object | ||
#Quantity Objects with - Numeric Values, Numpy Arrays, DataFrame, Pandas Series, None objects | ||
|
||
class MockHDF(HDFWriter, object): | ||
hdf_properties = ['property'] | ||
class_properties = {} | ||
|
||
def __init__(self, property): | ||
self.property = property | ||
|
||
simple_objects = [1.5, 'random_string', 4.2e7] | ||
|
||
@pytest.mark.parametrize("attr", simple_objects) | ||
def test_simple_write(tmpdir, attr): | ||
fname = str(tmpdir.mkdir('data').join('test.hdf')) | ||
actual = MockHDF(attr) | ||
actual.to_hdf(fname, path='test') | ||
expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars')['property'] | ||
assert actual.property == expected | ||
|
||
mock_df = pd.DataFrame({'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), | ||
'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}) | ||
complex_objects = [np.array([4.0e14, 2, 2e14, 27.5]), | ||
pd.Series([1., 2., 3.]), mock_df] | ||
|
||
@pytest.mark.parametrize("attr", complex_objects) | ||
def test_complex_obj_write(tmpdir, attr): | ||
fname = str(tmpdir.mkdir('data').join('test.hdf')) | ||
actual = MockHDF(attr) | ||
actual.to_hdf(fname, path='test') | ||
expected = pd.read_hdf(fname, key='/test/mock_hdf/property').values | ||
assert_array_almost_equal(actual.property, expected) | ||
|
||
arrays = [['L1', 'L1', 'L2', 'L2', 'L3', 'L3', 'L4', 'L4'], | ||
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] | ||
tuples = list(zip(*arrays)) | ||
mock_multiIndex = pd.MultiIndex.from_tuples(tuples, names=['first', 'second']) | ||
|
||
def test_MultiIndex_write(tmpdir): | ||
fname = str(tmpdir.mkdir('data').join('test.hdf')) | ||
actual = MockHDF(mock_multiIndex) | ||
actual.to_hdf(fname, path='test') | ||
expected = pd.read_hdf(fname, key='/test/mock_hdf/property') | ||
expected = pd.MultiIndex.from_tuples( | ||
expected.unstack().values, names=['first', 'second']) | ||
pdt.assert_almost_equal(actual.property, expected) | ||
|
||
#Test Quantity Objects | ||
|
||
quantity_objects = [np.array([4.0e14, 2, 2e14, 27.5]), mock_df] | ||
|
||
@pytest.mark.parametrize("attr", quantity_objects) | ||
def test_quantity_objects_write(tmpdir, attr): | ||
fname = str(tmpdir.mkdir('data').join('test.hdf')) | ||
attr_quantity = u.Quantity(attr, 'g/cm**3') | ||
actual = MockHDF(attr_quantity) | ||
actual.to_hdf(fname, path='test') | ||
expected = pd.read_hdf(fname, key='/test/mock_hdf/property') | ||
assert_array_almost_equal(actual.property.cgs.value, expected) | ||
|
||
scalar_quantity_objects = [1.5, 4.2e7] | ||
|
||
@pytest.mark.parametrize("attr", scalar_quantity_objects) | ||
def test_scalar_quantity_objects_write(tmpdir, attr): | ||
fname = str(tmpdir.mkdir('data').join('test.hdf')) | ||
attr_quantity = u.Quantity(attr, 'g/cm**3') | ||
actual = MockHDF(attr_quantity) | ||
actual.to_hdf(fname, path='test') | ||
expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars/')['property'] | ||
assert_array_almost_equal(actual.property.cgs.value, expected) | ||
|
||
def test_none_write(tmpdir): | ||
fname = str(tmpdir.mkdir('data').join('test.hdf')) | ||
actual = MockHDF(None) | ||
actual.to_hdf(fname, path='test') | ||
expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars/')['property'] | ||
if expected == 'none': | ||
expected = None | ||
assert actual.property == expected | ||
|
||
# Test class_properties parameter (like homologous_density is a class | ||
# instance/object inside Model class) | ||
|
||
class MockClass(HDFWriter, object): | ||
hdf_properties = ['property', 'nested_object'] | ||
class_properties = {'nested_object': MockHDF} | ||
|
||
def __init__(self, property, nested_object): | ||
self.property = property | ||
self.nested_object = nested_object | ||
|
||
@pytest.mark.parametrize("attr", quantity_objects) | ||
def test_objects_write(tmpdir, attr): | ||
fname = str(tmpdir.mkdir('data').join('test.hdf')) | ||
nested_object = MockHDF(np.array([4.0e14, 2, 2e14, 27.5])) | ||
attr_quantity = u.Quantity(attr, 'g/cm**3') | ||
actual = MockClass(attr_quantity, nested_object) | ||
actual.to_hdf(fname, path='test') | ||
expected_property = pd.read_hdf(fname, key='/test/mock_class/property') | ||
assert_array_almost_equal(actual.property.cgs.value, expected_property) | ||
nested_property = pd.read_hdf( | ||
fname, key='/test/mock_class/nested_object/property') | ||
assert_array_almost_equal( | ||
actual.nested_object.property, nested_property) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
#Utility functions for the IO part of TARDIS | ||
|
||
import os | ||
import re | ||
import pandas as pd | ||
import numpy as np | ||
import collections | ||
|
@@ -166,6 +167,106 @@ def check_equality(item1, item2): | |
return True | ||
|
||
|
||
class HDFWriter(object): | ||
|
||
@staticmethod | ||
def to_hdf_util(path_or_buf, path, elements, complevel=9, complib='blosc'): | ||
""" | ||
A function to uniformly store TARDIS data | ||
to an HDF file. | ||
|
||
Scalars will be stored in a Series under path/scalars | ||
1D arrays will be stored under path/property_name as distinct Series | ||
2D arrays will be stored under path/property_name as distinct DataFrames | ||
|
||
Units will be stored as their CGS value | ||
|
||
Parameters | ||
---------- | ||
path_or_buf: | ||
Path or buffer to the HDF store | ||
path: str | ||
Path inside the HDF store to store the `elements` | ||
elements: dict | ||
A dict of property names and their values to be | ||
stored. | ||
|
||
Returns | ||
------- | ||
|
||
""" | ||
scalars = {} | ||
for key, value in elements.iteritems(): | ||
if value is None: | ||
value = 'none' | ||
if hasattr(value, 'cgs'): | ||
value = value.cgs.value | ||
if np.isscalar(value): | ||
scalars[key] = value | ||
elif hasattr(value, 'shape'): | ||
if value.ndim == 1: | ||
# This try,except block is only for model.plasma.levels | ||
try: | ||
pd.Series(value).to_hdf(path_or_buf, | ||
os.path.join(path, key)) | ||
except NotImplementedError: | ||
pd.DataFrame(value).to_hdf(path_or_buf, | ||
os.path.join(path, key)) | ||
else: | ||
pd.DataFrame(value).to_hdf( | ||
path_or_buf, os.path.join(path, key)) | ||
else: | ||
try: | ||
value.to_hdf(path_or_buf, path, name=key) | ||
except AttributeError: | ||
data = pd.DataFrame([value]) | ||
data.to_hdf(path_or_buf, os.path.join(path, key)) | ||
|
||
if scalars: | ||
scalars_series = pd.Series(scalars) | ||
|
||
# Unfortunately, with to_hdf we cannot append, so merge beforehand | ||
scalars_path = os.path.join(path, 'scalars') | ||
with pd.HDFStore(path_or_buf, complevel=complevel, complib=complib) as store: | ||
if scalars_path in store: | ||
scalars_series = store[scalars_path].append(scalars_series) | ||
scalars_series.to_hdf(path_or_buf, os.path.join(path, 'scalars')) | ||
|
||
def get_properties(self): | ||
data = {name: getattr(self, name) for name in self.hdf_properties} | ||
return data | ||
|
||
@staticmethod | ||
def convert_to_camel_case(s): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I understand it, what we are doing here is converting to snake_case. Please rename the function accordingly. |
||
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', s) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please explain to me why we need two substitutions? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @yeganer I used this regex from this thread->Stackoverflow |
||
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As mentioned above, I don't understand why we need this second substitution as the first seems to cover everything? Maybe you can add some simple tests for some strings like |
||
|
||
def to_hdf(self, file_path, path='', name=None): | ||
""" | ||
Parameters | ||
---------- | ||
file_path: str | ||
Path or buffer to the HDF store | ||
path: str | ||
Path inside the HDF store to store the `elements` | ||
name: str | ||
Group inside the HDF store to which the `elements` need to be saved | ||
|
||
Returns | ||
------- | ||
|
||
""" | ||
if name is None: | ||
try: | ||
name = self.hdf_name | ||
except AttributeError: | ||
name = self.convert_to_camel_case(self.__class__.__name__) | ||
|
||
data = self.get_properties() | ||
buff_path = os.path.join(path, name) | ||
self.to_hdf_util(file_path, buff_path, data) | ||
|
||
#Deprecated | ||
def to_hdf(path_or_buf, path, elements, complevel=9, complib='blosc'): | ||
""" | ||
A function to uniformly store TARDIS data | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is a very complicated way to create these arrays. I'd suggest you use
np.array(...)
and then simply transpose it.