From c321b416540160cc3fad2a021258d7f3baad6eb7 Mon Sep 17 00:00:00 2001 From: Vaibhav Gupta Date: Wed, 14 Jun 2017 16:42:53 +0530 Subject: [PATCH 1/6] Added HDFWriter class in io/util.py --- tardis/io/util.py | 101 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/tardis/io/util.py b/tardis/io/util.py index 9582b12ce32..b2af53c8ba9 100644 --- a/tardis/io/util.py +++ b/tardis/io/util.py @@ -1,6 +1,7 @@ #Utility functions for the IO part of TARDIS import os +import re import pandas as pd import numpy as np import collections @@ -166,6 +167,106 @@ def check_equality(item1, item2): return True +class HDFWriter(object): + + @staticmethod + def to_hdf_util(path_or_buf, path, elements, complevel=9, complib='blosc'): + """ + A function to uniformly store TARDIS data + to an HDF file. + + Scalars will be stored in a Series under path/scalars + 1D arrays will be stored under path/property_name as distinct Series + 2D arrays will be stored under path/property_name as distinct DataFrames + + Units will be stored as their CGS value + + Parameters + ---------- + path_or_buf: + Path or buffer to the HDF store + path: str + Path inside the HDF store to store the `elements` + elements: dict + A dict of property names and their values to be + stored. + + Returns + ------- + + """ + scalars = {} + for key, value in elements.iteritems(): + if value is None: + value = 'none' + if hasattr(value, 'cgs'): + value = value.cgs.value + if np.isscalar(value): + scalars[key] = value + elif hasattr(value, 'shape'): + if value.ndim == 1: + # This try,except block is only for model.plasma.levels + try: + pd.Series(value).to_hdf(path_or_buf, + os.path.join(path, key)) + except NotImplementedError: + pd.DataFrame(value).to_hdf(path_or_buf, + os.path.join(path, key)) + else: + pd.DataFrame(value).to_hdf( + path_or_buf, os.path.join(path, key)) + else: + try: + value.to_hdf(path_or_buf, path, name=key) + except AttributeError: + data = pd.DataFrame([value]) + data.to_hdf(path_or_buf, os.path.join(path, key)) + + if scalars: + scalars_series = pd.Series(scalars) + + # Unfortunately, with to_hdf we cannot append, so merge beforehand + scalars_path = os.path.join(path, 'scalars') + with pd.HDFStore(path_or_buf, complevel=complevel, complib=complib) as store: + if scalars_path in store: + scalars_series = store[scalars_path].append(scalars_series) + scalars_series.to_hdf(path_or_buf, os.path.join(path, 'scalars')) + + def get_properties(self): + data = {name: getattr(self, name) for name in self.hdf_properties} + return data + + @staticmethod + def convert_to_camel_case(s): + s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', s) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + + def to_hdf(self, file_path, path='', name=None): + """ + Parameters + ---------- + file_path: str + Path or buffer to the HDF store + path: str + Path inside the HDF store to store the `elements` + name: str + Group inside the HDF store to which the `elements` need to be saved + + Returns + ------- + + """ + if name is None: + try: + name = self.hdf_name + except AttributeError: + name = self.convert_to_camel_case(self.__class__.__name__) + + data = self.get_properties() + buff_path = os.path.join(path, name) + self.to_hdf_util(file_path, buff_path, data) + +#Deprecated def to_hdf(path_or_buf, path, elements, complevel=9, complib='blosc'): """ A function to uniformly store TARDIS data From 6f651d80d6c806b53a2ace384e5b8f0533c66878 Mon Sep 17 00:00:00 2001 From: Vaibhav Gupta Date: Wed, 14 Jun 2017 17:49:29 +0530 Subject: [PATCH 2/6] Added unit tests for HDFWriter class --- tardis/io/tests/test_HDFWriter.py | 125 ++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 tardis/io/tests/test_HDFWriter.py diff --git a/tardis/io/tests/test_HDFWriter.py b/tardis/io/tests/test_HDFWriter.py new file mode 100644 index 00000000000..87516518712 --- /dev/null +++ b/tardis/io/tests/test_HDFWriter.py @@ -0,0 +1,125 @@ +import os + +import numpy as np +import pandas as pd +import pandas.util.testing as pdt +import pytest +from astropy import units as u +from astropy.tests.helper import assert_quantity_allclose +from numpy.testing import assert_almost_equal, assert_array_almost_equal + +from tardis.io.util import HDFWriter + + +#Test Cases + +#DataFrame +#None +#Numpy Arrays +#Strings +#Numeric Values +#Pandas Series Object +#MultiIndex Object +#Quantity Objects with - Numeric Values, Numpy Arrays, DataFrame, Pandas Series, None objects + +class MockHDF(HDFWriter, object): + hdf_properties = ['property'] + class_properties = {} + + def __init__(self, property): + self.property = property + +simple_objects = [1.5, 'random_string', 4.2e7] + +@pytest.mark.parametrize("attr", simple_objects) +def test_simple_write(tmpdir, attr): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + actual = MockHDF(attr) + actual.to_hdf(fname, path='test') + expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars')['property'] + assert actual.property == expected + +mock_df = pd.DataFrame({'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), + 'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}) +complex_objects = [np.array([4.0e14, 2, 2e14, 27.5]), + pd.Series([1., 2., 3.]), mock_df] + +@pytest.mark.parametrize("attr", complex_objects) +def test_complex_obj_write(tmpdir, attr): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + actual = MockHDF(attr) + actual.to_hdf(fname, path='test') + expected = pd.read_hdf(fname, key='/test/mock_hdf/property').values + assert_array_almost_equal(actual.property, expected) + +arrays = [['L1', 'L1', 'L2', 'L2', 'L3', 'L3', 'L4', 'L4'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] +tuples = list(zip(*arrays)) +mock_multiIndex = pd.MultiIndex.from_tuples(tuples, names=['first', 'second']) + +def test_MultiIndex_write(tmpdir): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + actual = MockHDF(mock_multiIndex) + actual.to_hdf(fname, path='test') + expected = pd.read_hdf(fname, key='/test/mock_hdf/property') + expected = pd.MultiIndex.from_tuples( + expected.unstack().values, names=['first', 'second']) + pdt.assert_almost_equal(actual.property, expected) + +#Test Quantity Objects + +quantity_objects = [np.array([4.0e14, 2, 2e14, 27.5]), mock_df] + +@pytest.mark.parametrize("attr", quantity_objects) +def test_quantity_objects_write(tmpdir, attr): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + attr_quantity = u.Quantity(attr, 'g/cm**3') + actual = MockHDF(attr_quantity) + actual.to_hdf(fname, path='test') + expected = pd.read_hdf(fname, key='/test/mock_hdf/property') + assert_array_almost_equal(actual.property.cgs.value, expected) + +scalar_quantity_objects = [1.5, 4.2e7] + +@pytest.mark.parametrize("attr", scalar_quantity_objects) +def test_scalar_quantity_objects_write(tmpdir, attr): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + attr_quantity = u.Quantity(attr, 'g/cm**3') + actual = MockHDF(attr_quantity) + actual.to_hdf(fname, path='test') + expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars/')['property'] + assert_array_almost_equal(actual.property.cgs.value, expected) + +def test_none_write(tmpdir): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + actual = MockHDF(None) + actual.to_hdf(fname, path='test') + expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars/')['property'] + if expected == 'none': + expected = None + assert actual.property == expected + +# Test class_properties parameter (like homologous_density is a class +# instance/object inside Model class) + +class MockClass(HDFWriter, object): + hdf_properties = ['property', 'nested_object'] + class_properties = {'nested_object': MockHDF} + + def __init__(self, property, nested_object): + self.property = property + self.nested_object = nested_object + +@pytest.mark.parametrize("attr", quantity_objects) +def test_objects_write(tmpdir, attr): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + nested_object = MockHDF(np.array([4.0e14, 2, 2e14, 27.5])) + attr_quantity = u.Quantity(attr, 'g/cm**3') + actual = MockClass(attr_quantity, nested_object) + actual.to_hdf(fname, path='test') + expected_property = pd.read_hdf(fname, key='/test/mock_class/property') + assert_array_almost_equal(actual.property.cgs.value, expected_property) + nested_property = pd.read_hdf( + fname, key='/test/mock_class/nested_object/property') + assert_array_almost_equal( + actual.nested_object.property, nested_property) From 38b6bfd02605505041e349f32dc3f62431313431 Mon Sep 17 00:00:00 2001 From: Vaibhav Gupta Date: Wed, 14 Jun 2017 19:24:31 +0530 Subject: [PATCH 3/6] Correction in function name from camel_case to snake_case --- tardis/io/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tardis/io/util.py b/tardis/io/util.py index b2af53c8ba9..3bccf38984c 100644 --- a/tardis/io/util.py +++ b/tardis/io/util.py @@ -237,7 +237,7 @@ def get_properties(self): return data @staticmethod - def convert_to_camel_case(s): + def convert_to_snake_case(s): s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', s) return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() From e0bccaf5e3fc40e4e69f5f3e57ac3f560cc3506e Mon Sep 17 00:00:00 2001 From: Vaibhav Gupta Date: Wed, 14 Jun 2017 19:29:27 +0530 Subject: [PATCH 4/6] Fix name of convert case function in to_hdf --- tardis/io/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tardis/io/util.py b/tardis/io/util.py index 3bccf38984c..fb3ce0ffce2 100644 --- a/tardis/io/util.py +++ b/tardis/io/util.py @@ -260,7 +260,7 @@ def to_hdf(self, file_path, path='', name=None): try: name = self.hdf_name except AttributeError: - name = self.convert_to_camel_case(self.__class__.__name__) + name = self.convert_to_snake_case(self.__class__.__name__) data = self.get_properties() buff_path = os.path.join(path, name) From 31fcc90f026ed5a118a8c6b389c4941a176c6526 Mon Sep 17 00:00:00 2001 From: Vaibhav Gupta Date: Thu, 15 Jun 2017 08:30:34 +0530 Subject: [PATCH 5/6] Added tests for convert_to_snake_case function --- tardis/io/tests/test_HDFWriter.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tardis/io/tests/test_HDFWriter.py b/tardis/io/tests/test_HDFWriter.py index 87516518712..4b9512c1181 100644 --- a/tardis/io/tests/test_HDFWriter.py +++ b/tardis/io/tests/test_HDFWriter.py @@ -123,3 +123,16 @@ def test_objects_write(tmpdir, attr): fname, key='/test/mock_class/nested_object/property') assert_array_almost_equal( actual.nested_object.property, nested_property) + + +def test_snake_case(): + obj = MockHDF(None) + assert obj.convert_to_snake_case( + "HomologousDensity") == "homologous_density" + assert obj.convert_to_snake_case("TARDISSpectrum") == "tardis_spectrum" + assert obj.convert_to_snake_case("BasePlasma") == "base_plasma" + assert obj.convert_to_snake_case("LTEPlasma") == "lte_plasma" + assert obj.convert_to_snake_case( + "MonteCarloRunner") == "monte_carlo_runner" + assert obj.convert_to_snake_case( + "homologous_density") == "homologous_density" From 5b8de4481784ef1393a4a3f97a4ca7225f7422ce Mon Sep 17 00:00:00 2001 From: Vaibhav Gupta Date: Fri, 16 Jun 2017 12:20:09 +0530 Subject: [PATCH 6/6] Create MultiIndex object through np.array() in HDFWriter tests --- tardis/io/tests/test_HDFWriter.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/tardis/io/tests/test_HDFWriter.py b/tardis/io/tests/test_HDFWriter.py index 4b9512c1181..a9031c79fe7 100644 --- a/tardis/io/tests/test_HDFWriter.py +++ b/tardis/io/tests/test_HDFWriter.py @@ -52,18 +52,16 @@ def test_complex_obj_write(tmpdir, attr): expected = pd.read_hdf(fname, key='/test/mock_hdf/property').values assert_array_almost_equal(actual.property, expected) -arrays = [['L1', 'L1', 'L2', 'L2', 'L3', 'L3', 'L4', 'L4'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] -tuples = list(zip(*arrays)) -mock_multiIndex = pd.MultiIndex.from_tuples(tuples, names=['first', 'second']) +arr = np.array([['L1', 'L1', 'L2', 'L2', 'L3', 'L3', 'L4', 'L4'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]) +mock_multiIndex = pd.MultiIndex.from_arrays(arr.transpose()) def test_MultiIndex_write(tmpdir): fname = str(tmpdir.mkdir('data').join('test.hdf')) actual = MockHDF(mock_multiIndex) actual.to_hdf(fname, path='test') expected = pd.read_hdf(fname, key='/test/mock_hdf/property') - expected = pd.MultiIndex.from_tuples( - expected.unstack().values, names=['first', 'second']) + expected = pd.MultiIndex.from_tuples(expected.unstack().values) pdt.assert_almost_equal(actual.property, expected) #Test Quantity Objects @@ -126,13 +124,12 @@ def test_objects_write(tmpdir, attr): def test_snake_case(): - obj = MockHDF(None) - assert obj.convert_to_snake_case( + assert MockHDF.convert_to_snake_case( "HomologousDensity") == "homologous_density" - assert obj.convert_to_snake_case("TARDISSpectrum") == "tardis_spectrum" - assert obj.convert_to_snake_case("BasePlasma") == "base_plasma" - assert obj.convert_to_snake_case("LTEPlasma") == "lte_plasma" - assert obj.convert_to_snake_case( + assert MockHDF.convert_to_snake_case("TARDISSpectrum") == "tardis_spectrum" + assert MockHDF.convert_to_snake_case("BasePlasma") == "base_plasma" + assert MockHDF.convert_to_snake_case("LTEPlasma") == "lte_plasma" + assert MockHDF.convert_to_snake_case( "MonteCarloRunner") == "monte_carlo_runner" - assert obj.convert_to_snake_case( + assert MockHDF.convert_to_snake_case( "homologous_density") == "homologous_density"