From 66a4714ccc9519709b4484c3c17af1eab22e162e Mon Sep 17 00:00:00 2001 From: Artemy Kolchinsky Date: Wed, 15 Oct 2014 14:39:51 -0400 Subject: [PATCH] Fix to allow sparse dataframes to have nan column labels Support for nan columns Fix --- pandas/sparse/frame.py | 18 +++++++++--------- pandas/sparse/tests/test_sparse.py | 5 +++++ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index bd34c7e5f02b2..acf10a623be32 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -101,7 +101,7 @@ def __init__(self, data=None, index=None, columns=None, mgr = self._init_mgr( data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy) elif data is None: - data = {} + data = DataFrame() if index is None: index = Index([]) @@ -116,7 +116,7 @@ def __init__(self, data=None, index=None, columns=None, index=index, kind=self._default_kind, fill_value=self._default_fill_value) - mgr = dict_to_manager(data, columns, index) + mgr = df_to_manager(data, columns, index) if dtype is not None: mgr = mgr.astype(dtype) @@ -156,7 +156,7 @@ def _init_dict(self, data, index, columns, dtype=None): kind=self._default_kind, fill_value=self._default_fill_value, copy=True) - sdict = {} + sdict = DataFrame() for k, v in compat.iteritems(data): if isinstance(v, Series): # Force alignment, no copy necessary @@ -182,7 +182,7 @@ def _init_dict(self, data, index, columns, dtype=None): if c not in sdict: sdict[c] = sp_maker(nan_vec) - return dict_to_manager(sdict, columns, index) + return df_to_manager(sdict, columns, index) def _init_matrix(self, data, index, columns, dtype=None): data = _prep_ndarray(data, copy=False) @@ -229,12 +229,12 @@ def _unpickle_sparse_frame_compat(self, state): else: index = idx - series_dict = {} + series_dict = DataFrame() for col, (sp_index, sp_values) in compat.iteritems(series): series_dict[col] = SparseSeries(sp_values, sparse_index=sp_index, fill_value=fv) - self._data = dict_to_manager(series_dict, columns, index) + self._data = df_to_manager(series_dict, columns, index) self._default_fill_value = fv self._default_kind = kind @@ -738,13 +738,13 @@ def applymap(self, func): """ return self.apply(lambda x: lmap(func, x)) -def dict_to_manager(sdict, columns, index): - """ create and return the block manager from a dict of series, columns, index """ +def df_to_manager(sdf, columns, index): + """ create and return the block manager from a dataframe of series, columns, index """ # from BlockManager perspective axes = [_ensure_index(columns), _ensure_index(index)] - return create_block_manager_from_arrays([sdict[c] for c in columns], columns, axes) + return create_block_manager_from_arrays([sdf[c] for c in columns], columns, axes) def stack_sparse_frame(frame): diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index 9197a4fc22b9c..cd9f4cd936e82 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -1527,6 +1527,11 @@ def test_as_blocks(self): self.assertEqual(list(df_blocks.keys()), ['float64']) assert_frame_equal(df_blocks['float64'], df) + def test_nan_columnname(self): + nan_colname = DataFrame(Series(1.0,index=[0]),columns=[nan]) + nan_colname_sparse = nan_colname.to_sparse() + assert(np.isnan(nan_colname_sparse.columns[0])) + def _dense_series_compare(s, f): result = f(s)