From f9ba6fed36d79485174fd12d07afdca9ed786471 Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Sun, 8 Oct 2017 12:04:56 -0500 Subject: [PATCH] ERR: Raise ValueError when setting scalars in a dataframe with no index ( #16823) (#16968) --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/core/frame.py | 12 ++++++++---- pandas/core/reshape/pivot.py | 3 +++ pandas/tests/frame/test_indexing.py | 5 +++++ pandas/tests/indexing/test_loc.py | 11 +++++------ pandas/tests/indexing/test_partial.py | 20 ++++++-------------- pandas/tests/reshape/test_pivot.py | 3 ++- 7 files changed, 31 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index b8b06ee0fe94e..1e9c402dac73e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -706,6 +706,8 @@ Other API Changes - Restricted DateOffset keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`). - Pandas no longer registers matplotlib converters on import. The converters will be registered and used when the first plot is draw (:issue:`17710`) +- Setting on a column with a scalar value and 0-len index now raises a ``ValueError`` (:issue:`16823`) + .. _whatsnew_0210.deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 142ccf1f034bc..d907492759dbd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2531,13 +2531,17 @@ def _ensure_valid_index(self, value): passed value """ # GH5632, make sure that we are a Series convertible - if not len(self.index) and is_list_like(value): + if not len(self.index): + if not is_list_like(value): + # GH16823, Raise an error due to loss of information + raise ValueError('If using all scalar values, you must pass' + ' an index') try: value = Series(value) except: - raise ValueError('Cannot set a frame with no defined index ' - 'and a value that cannot be converted to a ' - 'Series') + raise ValueError('Cannot set a frame with no defined' + 'index and a value that cannot be ' + 'converted to a Series') self._data = self._data.reindex_axis(value.index.copy(), axis=1, fill_value=np.nan) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index d19de6030d473..38c28af4d6ecb 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -454,6 +454,9 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, from pandas import DataFrame df = DataFrame(data, index=common_idx) + if not len(df): + return DataFrame(index=common_idx) + if values is None: df['__dummy__'] = 0 kwargs = {'aggfunc': len, 'fill_value': 0} diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index d00f56830a6fa..1a16e4ef48b64 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -721,6 +721,11 @@ def test_setitem_empty_frame_with_boolean(self): df[df > df2] = 47 assert_frame_equal(df, df2) + def test_setitem_scalars_no_index(self): + # GH16823 + df = DataFrame() + pytest.raises(ValueError, df.__setitem__, 'foo', 1) + def test_getitem_empty_frame_with_boolean(self): # Test for issue #11859 diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index c6f38aeba9e87..bf3a840aced8c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -423,15 +423,14 @@ def test_loc_setitem_consistency(self): def test_loc_setitem_consistency_empty(self): # empty (essentially noops) - expected = DataFrame(columns=['x', 'y']) - expected['x'] = expected['x'].astype(np.int64) + # GH16823 df = DataFrame(columns=['x', 'y']) - df.loc[:, 'x'] = 1 - tm.assert_frame_equal(df, expected) + with tm.assert_raises_regex(ValueError, 'If using all scalar values'): + df.loc[:, 'x'] = 1 df = DataFrame(columns=['x', 'y']) - df['x'] = 1 - tm.assert_frame_equal(df, expected) + with tm.assert_raises_regex(ValueError, 'If using all scalar values'): + df['x'] = 1 def test_loc_setitem_consistency_slice_column_len(self): # .loc[:,column] setting with slice == len of the column diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 41ddfe934a131..16f325393649f 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -575,24 +575,16 @@ def f(): def test_partial_set_empty_frame_row(self): # GH5720, GH5744 # don't create rows when empty - expected = DataFrame(columns=['A', 'B', 'New'], - index=pd.Index([], dtype='int64')) - expected['A'] = expected['A'].astype('int64') - expected['B'] = expected['B'].astype('float64') - expected['New'] = expected['New'].astype('float64') - df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] - y['New'] = np.nan - tm.assert_frame_equal(y, expected) - # tm.assert_frame_equal(y,expected) + # GH16823 + # Setting a column with a scalar and no index should raise + with tm.assert_raises_regex(ValueError, 'If using all scalar values'): + y['New'] = np.nan - expected = DataFrame(columns=['a', 'b', 'c c', 'd']) - expected['d'] = expected['d'].astype('int64') df = DataFrame(columns=['a', 'b', 'c c']) - df['d'] = 3 - tm.assert_frame_equal(df, expected) - tm.assert_series_equal(df['c c'], Series(name='c c', dtype=object)) + with tm.assert_raises_regex(ValueError, 'If using all scalar values'): + df['d'] = 3 # reindex columns is ok df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 07d3052c16756..4126bb1de84d7 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1226,7 +1226,8 @@ def test_crosstab_no_overlap(self): s2 = pd.Series([4, 5, 6], index=[4, 5, 6]) actual = crosstab(s1, s2) - expected = pd.DataFrame() + expected = pd.DataFrame( + index=pd.Index([], dtype='int64')).astype('int64') tm.assert_frame_equal(actual, expected)