Fix to allow sparse dataframes to have nan column labels

Support for nan columns Fix Trigger Travis CI jreback fixes Release note update
pandas-dev · Apr 9, 2015 · 7879205 · 7879205
1 parent 5dff7df
commit 7879205
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 9 deletions.
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
@@ -98,3 +98,6 @@ Bug Fixes
 - Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`)
 
 - Bug in ``read_csv()`` interprets ``index_col=True`` as ``1`` (:issue:`9798`)
+
+- Bug in which ``SparseDataFrame`` could not take `nan` as a column name (:issue:`8822`)
+
diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
@@ -100,7 +100,7 @@ def __init__(self, data=None, index=None, columns=None,
             mgr = self._init_mgr(
                 data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy)
         elif data is None:
-            data = {}
+            data = DataFrame()
 
             if index is None:
                 index = Index([])
@@ -115,7 +115,7 @@ def __init__(self, data=None, index=None, columns=None,
                                           index=index,
                                           kind=self._default_kind,
                                           fill_value=self._default_fill_value)
-            mgr = dict_to_manager(data, columns, index)
+            mgr = df_to_manager(data, columns, index)
             if dtype is not None:
                 mgr = mgr.astype(dtype)
 
@@ -155,7 +155,7 @@ def _init_dict(self, data, index, columns, dtype=None):
                                          kind=self._default_kind,
                                          fill_value=self._default_fill_value,
                                          copy=True)
-        sdict = {}
+        sdict = DataFrame()
         for k, v in compat.iteritems(data):
             if isinstance(v, Series):
                 # Force alignment, no copy necessary
@@ -181,7 +181,7 @@ def _init_dict(self, data, index, columns, dtype=None):
             if c not in sdict:
                 sdict[c] = sp_maker(nan_vec)
 
-        return dict_to_manager(sdict, columns, index)
+        return df_to_manager(sdict, columns, index)
 
     def _init_matrix(self, data, index, columns, dtype=None):
         data = _prep_ndarray(data, copy=False)
@@ -228,12 +228,12 @@ def _unpickle_sparse_frame_compat(self, state):
         else:
             index = idx
 
-        series_dict = {}
+        series_dict = DataFrame()
         for col, (sp_index, sp_values) in compat.iteritems(series):
             series_dict[col] = SparseSeries(sp_values, sparse_index=sp_index,
                                             fill_value=fv)
 
-        self._data = dict_to_manager(series_dict, columns, index)
+        self._data = df_to_manager(series_dict, columns, index)
         self._default_fill_value = fv
         self._default_kind = kind
 
@@ -737,13 +737,13 @@ def applymap(self, func):
         """
         return self.apply(lambda x: lmap(func, x))
 
-def dict_to_manager(sdict, columns, index):
-    """ create and return the block manager from a dict of series, columns, index """
+def df_to_manager(sdf, columns, index):
+    """ create and return the block manager from a dataframe of series, columns, index """
 
     # from BlockManager perspective
     axes = [_ensure_index(columns), _ensure_index(index)]
 
-    return create_block_manager_from_arrays([sdict[c] for c in columns], columns, axes)
+    return create_block_manager_from_arrays([sdf[c] for c in columns], columns, axes)
 
 
 def stack_sparse_frame(frame):

diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py
@@ -1663,6 +1663,12 @@ def test_as_blocks(self):
         self.assertEqual(list(df_blocks.keys()), ['float64'])
         assert_frame_equal(df_blocks['float64'], df)
 
+    def test_nan_columnname(self):
+        # GH 8822
+        nan_colname = DataFrame(Series(1.0,index=[0]),columns=[nan])
+        nan_colname_sparse = nan_colname.to_sparse()
+        self.assertTrue(np.isnan(nan_colname_sparse.columns[0]))
+
 
 def _dense_series_compare(s, f):
     result = f(s)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -98,3 +98,6 @@ Bug Fixes
		- Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`)

		- Bug in ``read_csv()`` interprets ``index_col=True`` as ``1`` (:issue:`9798`)

		- Bug in which ``SparseDataFrame`` could not take `nan` as a column name (:issue:`8822`)