diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index b143f855bb73f..4c518ff4f87dd 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -259,7 +259,7 @@ Indexing - Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) - + I/O ^^^ @@ -284,7 +284,9 @@ Groupby/Resample/Rolling Sparse ^^^^^^ + - Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`) +- Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`) Reshaping diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 5c1cf8c773501..42fc5189eebd8 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -595,14 +595,11 @@ def fillna(self, value, downcast=None): if issubclass(self.dtype.type, np.floating): value = float(value) - if self._null_fill_value: - return self._simple_new(self.sp_values, self.sp_index, - fill_value=value) - else: - new_values = self.sp_values.copy() - new_values[isnull(new_values)] = value - return self._simple_new(new_values, self.sp_index, - fill_value=self.fill_value) + new_values = np.where(isnull(self.sp_values), value, self.sp_values) + fill_value = value if self._null_fill_value else self.fill_value + + return self._simple_new(new_values, self.sp_index, + fill_value=fill_value) def sum(self, axis=0, *args, **kwargs): """ diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 336b8f30716cd..d9cb69d56528c 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1271,6 +1271,41 @@ def test_from_scipy_correct_ordering(spmatrix): tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) +def test_from_scipy_fillna(spmatrix): + # GH 16112 + tm.skip_if_no_package('scipy') + + arr = np.eye(3) + arr[1:, 0] = np.nan + + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = pd.SparseDataFrame(spm).fillna(-1.0) + + # Returning frame should fill all nan values with -1.0 + expected = pd.SparseDataFrame({ + 0: pd.SparseSeries([1., -1, -1]), + 1: pd.SparseSeries([np.nan, 1, np.nan]), + 2: pd.SparseSeries([np.nan, np.nan, 1]), + }, default_fill_value=-1) + + # fill_value is expected to be what .fillna() above was called with + # We don't use -1 as initial fill_value in expected SparseSeries + # construction because this way we obtain "compressed" SparseArrays, + # avoiding having to construct them ourselves + for col in expected: + expected[col].fill_value = -1 + + tm.assert_sp_frame_equal(sdf, expected) + + class TestSparseDataFrameArithmetic(object): def test_numeric_op_scalar(self):