diff --git a/pandas/tools/tests/test_concat.py b/pandas/tools/tests/test_concat.py index 568cf63c02e30..13c6b72ade27b 100644 --- a/pandas/tools/tests/test_concat.py +++ b/pandas/tools/tests/test_concat.py @@ -889,6 +889,59 @@ def test_union_categorical(self): with tm.assertRaises(ValueError): union_categoricals([]) + def test_union_categoricals_nan(self): + # GH 13759 + res = union_categoricals([pd.Categorical([1, 2, np.nan]), + pd.Categorical([3, 2, np.nan])]) + exp = Categorical([1, 2, np.nan, 3, 2, np.nan]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([pd.Categorical(['A', 'B']), + pd.Categorical(['B', 'B', np.nan])]) + exp = Categorical(['A', 'B', 'B', 'B', np.nan]) + tm.assert_categorical_equal(res, exp) + + val1 = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-03-01'), + pd.NaT] + val2 = [pd.NaT, pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-02-01')] + + res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)]) + exp = Categorical(val1 + val2, + categories=[pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-03-01'), + pd.Timestamp('2011-02-01')]) + tm.assert_categorical_equal(res, exp) + + # all NaN + res = union_categoricals([pd.Categorical([np.nan, np.nan]), + pd.Categorical(['X'])]) + exp = Categorical([np.nan, np.nan, 'X']) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([pd.Categorical([np.nan, np.nan]), + pd.Categorical([np.nan, np.nan])]) + exp = Categorical([np.nan, np.nan, np.nan, np.nan]) + tm.assert_categorical_equal(res, exp) + + def test_union_categoricals_empty(self): + # GH 13759 + res = union_categoricals([pd.Categorical([]), + pd.Categorical([])]) + exp = Categorical([]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([pd.Categorical([]), + pd.Categorical([1.0])]) + exp = Categorical([1.0]) + tm.assert_categorical_equal(res, exp) + + # to make dtype equal + nanc = pd.Categorical(np.array([np.nan], dtype=np.float64)) + res = union_categoricals([nanc, + pd.Categorical([])]) + tm.assert_categorical_equal(res, nanc) + def test_concat_bug_1719(self): ts1 = tm.makeTimeSeries() ts2 = tm.makeTimeSeries()[::2] diff --git a/pandas/types/concat.py b/pandas/types/concat.py index 3b30531fb30ac..c8af0ec62db86 100644 --- a/pandas/types/concat.py +++ b/pandas/types/concat.py @@ -6,6 +6,7 @@ import pandas.tslib as tslib from pandas import compat from pandas.compat import map +from pandas.core.algorithms import take_1d from .common import (is_categorical_dtype, is_sparse, is_datetimetz, @@ -254,10 +255,15 @@ def union_categoricals(to_union): new_codes = [] for c in to_union: - indexer = categories.get_indexer(c.categories) - new_codes.append(indexer.take(c.codes)) - codes = np.concatenate(new_codes) - return Categorical(codes, categories=categories, ordered=False, + if len(c.categories) > 0: + indexer = categories.get_indexer(c.categories) + new_codes.append(take_1d(indexer, c.codes, fill_value=-1)) + else: + # must be all NaN + new_codes.append(c.codes) + + new_codes = np.concatenate(new_codes) + return Categorical(new_codes, categories=categories, ordered=False, fastpath=True)