Skip to content

Commit

Permalink
Compare with empty DataFrame, not just check empty
Browse files Browse the repository at this point in the history
  • Loading branch information
BranYang committed Feb 2, 2016
1 parent 0d99c2a commit 0528c57
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 8 deletions.
11 changes: 9 additions & 2 deletions pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -1095,8 +1095,7 @@ def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False,
cat = Categorical.from_array(Series(data), ordered=True)
levels = cat.categories

# if all NaN
if not dummy_na and len(levels) == 0:
def get_empty_Frame(data, sparse):
if isinstance(data, Series):
index = data.index
else:
Expand All @@ -1106,11 +1105,19 @@ def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False,
else:
return SparseDataFrame(index=index)

# if all NaN
if not dummy_na and len(levels) == 0:
return get_empty_Frame(data, sparse)

codes = cat.codes.copy()
if dummy_na:
codes[codes == -1] = len(cat.categories)
levels = np.append(cat.categories, np.nan)

# if dummy_na, we just fake a nan level. drop_first will drop it again
if drop_first and len(levels) == 1:
return get_empty_Frame(data, sparse)

number_of_cols = len(levels)

if prefix is not None:
Expand Down
27 changes: 21 additions & 6 deletions pandas/tests/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,13 +432,28 @@ def test_basic_drop_first(self):
assert_frame_equal(result, expected)

expected.index = list('ABC')
result = get_dummies(s_series_index, sparse=self.sparse, drop_first=True)
result = get_dummies(s_series_index, sparse=self.sparse,
drop_first=True)
assert_frame_equal(result, expected)

# Test the case that categorical variable only has one level.
def test_basic_drop_first_one_level(self):
result = get_dummies(list('aaa'), sparse=self.sparse, drop_first=True)
self.assertEqual(result.empty, True)
# Test the case that categorical variable only has one level.
s_list = list('aaa')
s_series = Series(s_list)
s_series_index = Series(s_list, list('ABC'))

expected = DataFrame(index=np.arange(3))

result = get_dummies(s_list, sparse=self.sparse, drop_first=True)
assert_frame_equal(result, expected)

result = get_dummies(s_series, sparse=self.sparse, drop_first=True)
assert_frame_equal(result, expected)

expected = DataFrame(index=list('ABC'))
result = get_dummies(s_series_index, sparse=self.sparse,
drop_first=True)
assert_frame_equal(result, expected)

def test_basic_drop_first_NA(self):
# Test NA hadling together with drop_first
Expand All @@ -449,7 +464,6 @@ def test_basic_drop_first_NA(self):
2: 0.0}})
assert_frame_equal(res, exp)

# Sparse dataframes do not allow nan labelled columns, see #GH8822
res_na = get_dummies(s_NA, dummy_na=True, sparse=self.sparse,
drop_first=True)
exp_na = DataFrame({'b': {0: 0.0,
Expand All @@ -463,7 +477,8 @@ def test_basic_drop_first_NA(self):

res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse,
drop_first=True)
tm.assert_numpy_array_equal(res_just_na.empty, True)
exp_just_na = DataFrame(index=np.arange(1))
assert_frame_equal(res_just_na, exp_just_na)

def test_dataframe_dummies_drop_first(self):
df = self.df[['A', 'B']]
Expand Down

0 comments on commit 0528c57

Please sign in to comment.