Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TEP014] Added HDFWriter class + Unit Tests #744

Merged
merged 6 commits into from
Jun 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 135 additions & 0 deletions tardis/io/tests/test_HDFWriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import os

import numpy as np
import pandas as pd
import pandas.util.testing as pdt
import pytest
from astropy import units as u
from astropy.tests.helper import assert_quantity_allclose
from numpy.testing import assert_almost_equal, assert_array_almost_equal

from tardis.io.util import HDFWriter


#Test Cases

#DataFrame
#None
#Numpy Arrays
#Strings
#Numeric Values
#Pandas Series Object
#MultiIndex Object
#Quantity Objects with - Numeric Values, Numpy Arrays, DataFrame, Pandas Series, None objects

class MockHDF(HDFWriter, object):
hdf_properties = ['property']
class_properties = {}

def __init__(self, property):
self.property = property

simple_objects = [1.5, 'random_string', 4.2e7]

@pytest.mark.parametrize("attr", simple_objects)
def test_simple_write(tmpdir, attr):
fname = str(tmpdir.mkdir('data').join('test.hdf'))
actual = MockHDF(attr)
actual.to_hdf(fname, path='test')
expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars')['property']
assert actual.property == expected

mock_df = pd.DataFrame({'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])})
complex_objects = [np.array([4.0e14, 2, 2e14, 27.5]),
pd.Series([1., 2., 3.]), mock_df]

@pytest.mark.parametrize("attr", complex_objects)
def test_complex_obj_write(tmpdir, attr):
fname = str(tmpdir.mkdir('data').join('test.hdf'))
actual = MockHDF(attr)
actual.to_hdf(fname, path='test')
expected = pd.read_hdf(fname, key='/test/mock_hdf/property').values
assert_array_almost_equal(actual.property, expected)

arr = np.array([['L1', 'L1', 'L2', 'L2', 'L3', 'L3', 'L4', 'L4'],
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])
mock_multiIndex = pd.MultiIndex.from_arrays(arr.transpose())

def test_MultiIndex_write(tmpdir):
fname = str(tmpdir.mkdir('data').join('test.hdf'))
actual = MockHDF(mock_multiIndex)
actual.to_hdf(fname, path='test')
expected = pd.read_hdf(fname, key='/test/mock_hdf/property')
expected = pd.MultiIndex.from_tuples(expected.unstack().values)
pdt.assert_almost_equal(actual.property, expected)

#Test Quantity Objects

quantity_objects = [np.array([4.0e14, 2, 2e14, 27.5]), mock_df]

@pytest.mark.parametrize("attr", quantity_objects)
def test_quantity_objects_write(tmpdir, attr):
fname = str(tmpdir.mkdir('data').join('test.hdf'))
attr_quantity = u.Quantity(attr, 'g/cm**3')
actual = MockHDF(attr_quantity)
actual.to_hdf(fname, path='test')
expected = pd.read_hdf(fname, key='/test/mock_hdf/property')
assert_array_almost_equal(actual.property.cgs.value, expected)

scalar_quantity_objects = [1.5, 4.2e7]

@pytest.mark.parametrize("attr", scalar_quantity_objects)
def test_scalar_quantity_objects_write(tmpdir, attr):
fname = str(tmpdir.mkdir('data').join('test.hdf'))
attr_quantity = u.Quantity(attr, 'g/cm**3')
actual = MockHDF(attr_quantity)
actual.to_hdf(fname, path='test')
expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars/')['property']
assert_array_almost_equal(actual.property.cgs.value, expected)

def test_none_write(tmpdir):
fname = str(tmpdir.mkdir('data').join('test.hdf'))
actual = MockHDF(None)
actual.to_hdf(fname, path='test')
expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars/')['property']
if expected == 'none':
expected = None
assert actual.property == expected

# Test class_properties parameter (like homologous_density is a class
# instance/object inside Model class)

class MockClass(HDFWriter, object):
hdf_properties = ['property', 'nested_object']
class_properties = {'nested_object': MockHDF}

def __init__(self, property, nested_object):
self.property = property
self.nested_object = nested_object

@pytest.mark.parametrize("attr", quantity_objects)
def test_objects_write(tmpdir, attr):
fname = str(tmpdir.mkdir('data').join('test.hdf'))
nested_object = MockHDF(np.array([4.0e14, 2, 2e14, 27.5]))
attr_quantity = u.Quantity(attr, 'g/cm**3')
actual = MockClass(attr_quantity, nested_object)
actual.to_hdf(fname, path='test')
expected_property = pd.read_hdf(fname, key='/test/mock_class/property')
assert_array_almost_equal(actual.property.cgs.value, expected_property)
nested_property = pd.read_hdf(
fname, key='/test/mock_class/nested_object/property')
assert_array_almost_equal(
actual.nested_object.property, nested_property)


def test_snake_case():
assert MockHDF.convert_to_snake_case(
"HomologousDensity") == "homologous_density"
assert MockHDF.convert_to_snake_case("TARDISSpectrum") == "tardis_spectrum"
assert MockHDF.convert_to_snake_case("BasePlasma") == "base_plasma"
assert MockHDF.convert_to_snake_case("LTEPlasma") == "lte_plasma"
assert MockHDF.convert_to_snake_case(
"MonteCarloRunner") == "monte_carlo_runner"
assert MockHDF.convert_to_snake_case(
"homologous_density") == "homologous_density"
101 changes: 101 additions & 0 deletions tardis/io/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#Utility functions for the IO part of TARDIS

import os
import re
import pandas as pd
import numpy as np
import collections
Expand Down Expand Up @@ -166,6 +167,106 @@ def check_equality(item1, item2):
return True


class HDFWriter(object):

@staticmethod
def to_hdf_util(path_or_buf, path, elements, complevel=9, complib='blosc'):
"""
A function to uniformly store TARDIS data
to an HDF file.

Scalars will be stored in a Series under path/scalars
1D arrays will be stored under path/property_name as distinct Series
2D arrays will be stored under path/property_name as distinct DataFrames

Units will be stored as their CGS value

Parameters
----------
path_or_buf:
Path or buffer to the HDF store
path: str
Path inside the HDF store to store the `elements`
elements: dict
A dict of property names and their values to be
stored.

Returns
-------

"""
scalars = {}
for key, value in elements.iteritems():
if value is None:
value = 'none'
if hasattr(value, 'cgs'):
value = value.cgs.value
if np.isscalar(value):
scalars[key] = value
elif hasattr(value, 'shape'):
if value.ndim == 1:
# This try,except block is only for model.plasma.levels
try:
pd.Series(value).to_hdf(path_or_buf,
os.path.join(path, key))
except NotImplementedError:
pd.DataFrame(value).to_hdf(path_or_buf,
os.path.join(path, key))
else:
pd.DataFrame(value).to_hdf(
path_or_buf, os.path.join(path, key))
else:
try:
value.to_hdf(path_or_buf, path, name=key)
except AttributeError:
data = pd.DataFrame([value])
data.to_hdf(path_or_buf, os.path.join(path, key))

if scalars:
scalars_series = pd.Series(scalars)

# Unfortunately, with to_hdf we cannot append, so merge beforehand
scalars_path = os.path.join(path, 'scalars')
with pd.HDFStore(path_or_buf, complevel=complevel, complib=complib) as store:
if scalars_path in store:
scalars_series = store[scalars_path].append(scalars_series)
scalars_series.to_hdf(path_or_buf, os.path.join(path, 'scalars'))

def get_properties(self):
data = {name: getattr(self, name) for name in self.hdf_properties}
return data

@staticmethod
def convert_to_snake_case(s):
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', s)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()

def to_hdf(self, file_path, path='', name=None):
"""
Parameters
----------
file_path: str
Path or buffer to the HDF store
path: str
Path inside the HDF store to store the `elements`
name: str
Group inside the HDF store to which the `elements` need to be saved

Returns
-------

"""
if name is None:
try:
name = self.hdf_name
except AttributeError:
name = self.convert_to_snake_case(self.__class__.__name__)

data = self.get_properties()
buff_path = os.path.join(path, name)
self.to_hdf_util(file_path, buff_path, data)

#Deprecated
def to_hdf(path_or_buf, path, elements, complevel=9, complib='blosc'):
"""
A function to uniformly store TARDIS data
Expand Down