From 9f4c4e59fc7ffffeb5bd252ead2eba6693107c57 Mon Sep 17 00:00:00 2001 From: keitakurita Date: Sat, 29 Apr 2017 14:31:41 +0900 Subject: [PATCH] BUG: Fixed GH16112 except for dia_format matrix --- pandas/core/sparse/array.py | 7 +++--- pandas/tests/sparse/test_frame.py | 42 +++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index ef3600266c0370..35de45a2b936c4 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -595,12 +595,13 @@ def fillna(self, value, downcast=None): if issubclass(self.dtype.type, np.floating): value = float(value) + new_values = self.sp_values.copy() + new_values[isnull(new_values)] = value + if self._null_fill_value: - return self._simple_new(self.sp_values, self.sp_index, + return self._simple_new(new_values, self.sp_index, fill_value=value) else: - new_values = self.sp_values.copy() - new_values[isnull(new_values)] = value return self._simple_new(new_values, self.sp_index, fill_value=self.fill_value) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 6b54dca8e93d51..4d923fbb9246c4 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1245,6 +1245,48 @@ def test_from_to_scipy_object(spmatrix, fill_value): assert sdf.to_coo().dtype == res_dtype +def test_from_scipy_object_fillna(spmatrix): + columns = list('cd') + index = list('ab') + tm.skip_if_no_package('scipy', max_version='0.19.0') + + # Explicitly convert one zero to np.nan + arr = np.eye(2) + arr[1, 0] = np.nan + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = pd.SparseDataFrame(spm, index=index, columns=columns).fillna(-1.0) + + # Returning frame should fill all nan values with -1.0 + expected = pd.SparseDataFrame({"c": {"a": 1.0, "b": np.nan}, + "d": {"a": np.nan, "b": 1.0}}).fillna(-1.0) + expected_bsr = pd.SparseDataFrame({"c": {"a": 1.0, "b": np.nan}, + "d": {"a": 0.0, "b": 1.0}}).fillna(-1.0) + + from scipy.sparse.bsr import bsr_matrix + from scipy.sparse.dia import dia_matrix + if spmatrix == bsr_matrix: + # A SparseDataFrame from a bsr matrix does not fill 0s + # Therefore, only the explicit nan value needs to be filled with -1 + tm.assert_frame_equal(sdf.to_dense(), expected_bsr.to_dense()) + elif spmatrix == dia_matrix: + # the dia matrix has a bug of a different nature, + # so is currently passed in this test suite + pass + else: + # The internal representations can differ. + # This test is here to ensure that all nan values are filled, + # regardless of origin. + tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) + + class TestSparseDataFrameArithmetic(tm.TestCase): def test_numeric_op_scalar(self):