diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index bb7eba496e34a..c4b7005775536 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -1095,8 +1095,7 @@ def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False, cat = Categorical.from_array(Series(data), ordered=True) levels = cat.categories - # if all NaN - if not dummy_na and len(levels) == 0: + def get_empty_Frame(data, sparse): if isinstance(data, Series): index = data.index else: @@ -1106,11 +1105,19 @@ def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False, else: return SparseDataFrame(index=index) + # if all NaN + if not dummy_na and len(levels) == 0: + return get_empty_Frame(data, sparse) + codes = cat.codes.copy() if dummy_na: codes[codes == -1] = len(cat.categories) levels = np.append(cat.categories, np.nan) + # if dummy_na, we just fake a nan level. drop_first will drop it again + if drop_first and len(levels) == 1: + return get_empty_Frame(data, sparse) + number_of_cols = len(levels) if prefix is not None: diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index b0dd578ae2bf7..671c345898ec2 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -432,13 +432,28 @@ def test_basic_drop_first(self): assert_frame_equal(result, expected) expected.index = list('ABC') - result = get_dummies(s_series_index, sparse=self.sparse, drop_first=True) + result = get_dummies(s_series_index, sparse=self.sparse, + drop_first=True) assert_frame_equal(result, expected) - # Test the case that categorical variable only has one level. def test_basic_drop_first_one_level(self): - result = get_dummies(list('aaa'), sparse=self.sparse, drop_first=True) - self.assertEqual(result.empty, True) + # Test the case that categorical variable only has one level. + s_list = list('aaa') + s_series = Series(s_list) + s_series_index = Series(s_list, list('ABC')) + + expected = DataFrame(index=np.arange(3)) + + result = get_dummies(s_list, sparse=self.sparse, drop_first=True) + assert_frame_equal(result, expected) + + result = get_dummies(s_series, sparse=self.sparse, drop_first=True) + assert_frame_equal(result, expected) + + expected = DataFrame(index=list('ABC')) + result = get_dummies(s_series_index, sparse=self.sparse, + drop_first=True) + assert_frame_equal(result, expected) def test_basic_drop_first_NA(self): # Test NA hadling together with drop_first @@ -449,7 +464,6 @@ def test_basic_drop_first_NA(self): 2: 0.0}}) assert_frame_equal(res, exp) - # Sparse dataframes do not allow nan labelled columns, see #GH8822 res_na = get_dummies(s_NA, dummy_na=True, sparse=self.sparse, drop_first=True) exp_na = DataFrame({'b': {0: 0.0, @@ -463,7 +477,8 @@ def test_basic_drop_first_NA(self): res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse, drop_first=True) - tm.assert_numpy_array_equal(res_just_na.empty, True) + exp_just_na = DataFrame(index=np.arange(1)) + assert_frame_equal(res_just_na, exp_just_na) def test_dataframe_dummies_drop_first(self): df = self.df[['A', 'B']]