tardis-sn · wkerzendorf · Jun 20, 2017 · Jun 14, 2017 · Jun 14, 2017 · Jun 14, 2017
diff --git a/tardis/io/tests/test_HDFWriter.py b/tardis/io/tests/test_HDFWriter.py
@@ -0,0 +1,135 @@
+import os
+
+import numpy as np
+import pandas as pd
+import pandas.util.testing as pdt
+import pytest
+from astropy import units as u
+from astropy.tests.helper import assert_quantity_allclose
+from numpy.testing import assert_almost_equal, assert_array_almost_equal
+
+from tardis.io.util import HDFWriter
+
+
+#Test Cases
+
+#DataFrame
+#None
+#Numpy Arrays
+#Strings
+#Numeric Values
+#Pandas Series Object
+#MultiIndex Object
+#Quantity Objects with - Numeric Values, Numpy Arrays, DataFrame, Pandas Series, None objects
+
+class MockHDF(HDFWriter, object):
+    hdf_properties = ['property']
+    class_properties = {}
+
+    def __init__(self, property):
+        self.property = property
+
+simple_objects = [1.5, 'random_string', 4.2e7]
+
+@pytest.mark.parametrize("attr", simple_objects)
+def test_simple_write(tmpdir, attr):
+    fname = str(tmpdir.mkdir('data').join('test.hdf'))
+    actual = MockHDF(attr)
+    actual.to_hdf(fname, path='test')
+    expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars')['property']
+    assert actual.property == expected
+
+mock_df = pd.DataFrame({'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
+                        'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])})
+complex_objects = [np.array([4.0e14, 2, 2e14, 27.5]),
+                   pd.Series([1., 2., 3.]), mock_df]
+
+@pytest.mark.parametrize("attr", complex_objects)
+def test_complex_obj_write(tmpdir, attr):
+    fname = str(tmpdir.mkdir('data').join('test.hdf'))
+    actual = MockHDF(attr)
+    actual.to_hdf(fname, path='test')
+    expected = pd.read_hdf(fname, key='/test/mock_hdf/property').values
+    assert_array_almost_equal(actual.property, expected)
+
+arr = np.array([['L1', 'L1', 'L2', 'L2', 'L3', 'L3', 'L4', 'L4'],
+                ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])
+mock_multiIndex = pd.MultiIndex.from_arrays(arr.transpose())
+
+def test_MultiIndex_write(tmpdir):
+    fname = str(tmpdir.mkdir('data').join('test.hdf'))
+    actual = MockHDF(mock_multiIndex)
+    actual.to_hdf(fname, path='test')
+    expected = pd.read_hdf(fname, key='/test/mock_hdf/property')
+    expected = pd.MultiIndex.from_tuples(expected.unstack().values)
+    pdt.assert_almost_equal(actual.property, expected)
+
+#Test Quantity Objects
+
+quantity_objects = [np.array([4.0e14, 2, 2e14, 27.5]), mock_df]
+
+@pytest.mark.parametrize("attr", quantity_objects)
+def test_quantity_objects_write(tmpdir, attr):
+    fname = str(tmpdir.mkdir('data').join('test.hdf'))
+    attr_quantity = u.Quantity(attr, 'g/cm**3')
+    actual = MockHDF(attr_quantity)
+    actual.to_hdf(fname, path='test')
+    expected = pd.read_hdf(fname, key='/test/mock_hdf/property')
+    assert_array_almost_equal(actual.property.cgs.value, expected)
+
+scalar_quantity_objects = [1.5, 4.2e7]
+
+@pytest.mark.parametrize("attr", scalar_quantity_objects)
+def test_scalar_quantity_objects_write(tmpdir, attr):
+    fname = str(tmpdir.mkdir('data').join('test.hdf'))
+    attr_quantity = u.Quantity(attr, 'g/cm**3')
+    actual = MockHDF(attr_quantity)
+    actual.to_hdf(fname, path='test')
+    expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars/')['property']
+    assert_array_almost_equal(actual.property.cgs.value, expected)
+
+def test_none_write(tmpdir):
+    fname = str(tmpdir.mkdir('data').join('test.hdf'))
+    actual = MockHDF(None)
+    actual.to_hdf(fname, path='test')
+    expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars/')['property']
+    if expected == 'none':
+        expected = None
+    assert actual.property == expected
+
+# Test class_properties parameter (like homologous_density is a class
+# instance/object inside Model class)
+
+class MockClass(HDFWriter, object):
+    hdf_properties = ['property', 'nested_object']
+    class_properties = {'nested_object': MockHDF}
+
+    def __init__(self, property, nested_object):
+        self.property = property
+        self.nested_object = nested_object
+
+@pytest.mark.parametrize("attr", quantity_objects)
+def test_objects_write(tmpdir, attr):
+    fname = str(tmpdir.mkdir('data').join('test.hdf'))
+    nested_object = MockHDF(np.array([4.0e14, 2, 2e14, 27.5]))
+    attr_quantity = u.Quantity(attr, 'g/cm**3')
+    actual = MockClass(attr_quantity, nested_object)
+    actual.to_hdf(fname, path='test')
+    expected_property = pd.read_hdf(fname, key='/test/mock_class/property')
+    assert_array_almost_equal(actual.property.cgs.value, expected_property)
+    nested_property = pd.read_hdf(
+        fname, key='/test/mock_class/nested_object/property')
+    assert_array_almost_equal(
+        actual.nested_object.property, nested_property)
+
+
+def test_snake_case():
+    assert MockHDF.convert_to_snake_case(
+        "HomologousDensity") == "homologous_density"
+    assert MockHDF.convert_to_snake_case("TARDISSpectrum") == "tardis_spectrum"
+    assert MockHDF.convert_to_snake_case("BasePlasma") == "base_plasma"
+    assert MockHDF.convert_to_snake_case("LTEPlasma") == "lte_plasma"
+    assert MockHDF.convert_to_snake_case(
+        "MonteCarloRunner") == "monte_carlo_runner"
+    assert MockHDF.convert_to_snake_case(
+        "homologous_density") == "homologous_density"
diff --git a/tardis/io/util.py b/tardis/io/util.py
@@ -1,6 +1,7 @@
 #Utility functions for the IO part of TARDIS
 
 import os
+import re
 import pandas as pd
 import numpy as np
 import collections
@@ -166,6 +167,106 @@ def check_equality(item1, item2):
         return True
 
 
+class HDFWriter(object):
+
+    @staticmethod
+    def to_hdf_util(path_or_buf, path, elements, complevel=9, complib='blosc'):
+        """
+        A function to uniformly store TARDIS data
+        to an HDF file.
+
+        Scalars will be stored in a Series under path/scalars
+        1D arrays will be stored under path/property_name as distinct Series
+        2D arrays will be stored under path/property_name as distinct DataFrames
+
+        Units will be stored as their CGS value
+
+        Parameters
+        ----------
+        path_or_buf:
+            Path or buffer to the HDF store
+        path: str
+            Path inside the HDF store to store the `elements`
+        elements: dict
+            A dict of property names and their values to be
+            stored.
+
+        Returns
+        -------
+
+        """
+        scalars = {}
+        for key, value in elements.iteritems():
+            if value is None:
+                value = 'none'
+            if hasattr(value, 'cgs'):
+                value = value.cgs.value
+            if np.isscalar(value):
+                scalars[key] = value
+            elif hasattr(value, 'shape'):
+                if value.ndim == 1:
+                    # This try,except block is only for model.plasma.levels
+                    try:
+                        pd.Series(value).to_hdf(path_or_buf,
+                                                os.path.join(path, key))
+                    except NotImplementedError:
+                        pd.DataFrame(value).to_hdf(path_or_buf,
+                                                   os.path.join(path, key))
+                else:
+                    pd.DataFrame(value).to_hdf(
+                        path_or_buf, os.path.join(path, key))
+            else:
+                try:
+                    value.to_hdf(path_or_buf, path, name=key)
+                except AttributeError:
+                    data = pd.DataFrame([value])
+                    data.to_hdf(path_or_buf, os.path.join(path, key))
+
+        if scalars:
+            scalars_series = pd.Series(scalars)
+
+            # Unfortunately, with to_hdf we cannot append, so merge beforehand
+            scalars_path = os.path.join(path, 'scalars')
+            with pd.HDFStore(path_or_buf, complevel=complevel, complib=complib) as store:
+                if scalars_path in store:
+                    scalars_series = store[scalars_path].append(scalars_series)
+            scalars_series.to_hdf(path_or_buf, os.path.join(path, 'scalars'))
+
+    def get_properties(self):
+        data = {name: getattr(self, name) for name in self.hdf_properties}
+        return data
+
+    @staticmethod
+    def convert_to_snake_case(s):
+        s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', s)
+        return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
+
+    def to_hdf(self, file_path, path='', name=None):
+        """
+        Parameters
+        ----------
+        file_path: str
+            Path or buffer to the HDF store
+        path: str
+            Path inside the HDF store to store the `elements`
+        name: str
+            Group inside the HDF store to which the `elements` need to be saved
+
+        Returns
+        -------
+
+        """
+        if name is None:
+            try:
+                name = self.hdf_name
+            except AttributeError:
+                name = self.convert_to_snake_case(self.__class__.__name__)
+
+        data = self.get_properties()
+        buff_path = os.path.join(path, name)
+        self.to_hdf_util(file_path, buff_path, data)
+
+#Deprecated
 def to_hdf(path_or_buf, path, elements, complevel=9, complib='blosc'):
     """
     A function to uniformly store TARDIS data