diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 0dc9995746ede4..d34f3ae0cf2379 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -445,6 +445,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed previously deprecated "order" argument from :func:`factorize` (:issue:`19751`) - Removed previously deprecated "v" argument from :meth:`FrozenNDarray.searchsorted`, use "value" instead (:issue:`22672`) - :func:`read_stata` and :meth:`DataFrame.to_stata` no longer supports the "encoding" argument (:issue:`21400`) +- In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`) - Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`) - Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) - diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index f650a62bc5b745..c3de1321404b47 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -1,6 +1,5 @@ import textwrap from typing import List, Set -import warnings from pandas._libs import NaT, lib @@ -211,12 +210,6 @@ def conv(i): index = indexes[0] for other in indexes[1:]: if not index.equals(other): - - if sort is None: - # TODO: remove once pd.concat sort default changes - warnings.warn(_sort_msg, FutureWarning, stacklevel=8) - sort = True - return _unique_indices(indexes) name = get_consensus_names(indexes)[0] diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index c2322ae626cfd9..853a638bdb277f 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -37,7 +37,7 @@ def concat( levels=None, names=None, verify_integrity: bool = False, - sort=None, + sort: bool = False, copy: bool = True, ): """ @@ -82,18 +82,16 @@ def concat( verify_integrity : bool, default False Check whether the new concatenated axis contains duplicates. This can be very expensive relative to the actual data concatenation. - sort : bool, default None + sort : bool, default False Sort non-concatenation axis if it is not already aligned when `join` - is 'outer'. The current default of sorting is deprecated and will - change to not-sorting in a future version of pandas. - - Explicitly pass ``sort=True`` to silence the warning and sort. - Explicitly pass ``sort=False`` to silence the warning and not sort. - + is 'outer'. This has no effect when ``join='inner'``, which already preserves the order of the non-concatenation axis. .. versionadded:: 0.23.0 + .. versionchanged:: 1.0.0 + + Changed to not sort by default. copy : bool, default True If False, do not copy data unnecessarily. diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 220968d4b3d292..a0cbc1456afa49 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -195,7 +195,7 @@ def test_join_left_sequence_non_unique_index(): tm.assert_frame_equal(joined, expected) -@pytest.mark.parametrize("sort_kw", [True, False, None]) +@pytest.mark.parametrize("sort_kw", [True, False]) def test_suppress_future_warning_with_sort_kw(sort_kw): a = DataFrame({"col1": [1, 2]}, index=["c", "a"]) @@ -213,12 +213,6 @@ def test_suppress_future_warning_with_sort_kw(sort_kw): if sort_kw is False: expected = expected.reindex(index=["c", "a", "b"]) - if sort_kw is None: - # only warn if not explicitly specified - ctx = tm.assert_produces_warning(FutureWarning, check_stacklevel=False) - else: - ctx = tm.assert_produces_warning(None, check_stacklevel=False) - - with ctx: + with tm.assert_produces_warning(None, check_stacklevel=False): result = a.join([b, c], how="outer", sort=sort_kw) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 795bbabdfad50b..667fe689861be1 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -37,16 +37,6 @@ def sort(request): return request.param -@pytest.fixture(params=[True, False, None]) -def sort_with_none(request): - """Boolean sort keyword for concat and DataFrame.append. - - Includes the default of None - """ - # TODO: Replace with sort once keyword changes. - return request.param - - class TestConcatAppendCommon: """ Test common dtype coercion rules between concat and append. @@ -775,15 +765,13 @@ def test_concat_join_axes_deprecated(self, axis): ) expected = pd.concat([one, two], axis=1, sort=False).reindex(index=two.index) - with tm.assert_produces_warning(expected_warning=FutureWarning): - result = pd.concat([one, two], axis=1, sort=False, join_axes=[two.index]) + result = pd.concat([one, two], axis=1, sort=False, join_axes=[two.index]) tm.assert_frame_equal(result, expected) expected = pd.concat([one, two], axis=0, sort=False).reindex( columns=two.columns ) - with tm.assert_produces_warning(expected_warning=FutureWarning): - result = pd.concat([one, two], axis=0, sort=False, join_axes=[two.columns]) + result = pd.concat([one, two], axis=0, sort=False, join_axes=[two.columns]) tm.assert_frame_equal(result, expected) @@ -875,27 +863,19 @@ def test_append_records(self): tm.assert_frame_equal(result, expected) # rewrite sort fixture, since we also want to test default of None - def test_append_sorts(self, sort_with_none): + def test_append_sorts(self, sort): df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) df2 = pd.DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3]) - if sort_with_none is None: - # only warn if not explicitly specified - # don't check stacklevel since its set for concat, and append - # has an extra stack. - ctx = tm.assert_produces_warning(FutureWarning, check_stacklevel=False) - else: - ctx = tm.assert_produces_warning(None) - - with ctx: - result = df1.append(df2, sort=sort_with_none) + with tm.assert_produces_warning(None): + result = df1.append(df2, sort=sort) # for None / True expected = pd.DataFrame( {"b": [1, 2, None, None], "a": [1, 2, 1, 2], "c": [None, None, 3, 4]}, columns=["a", "b", "c"], ) - if sort_with_none is False: + if sort is False: expected = expected[["b", "a", "c"]] tm.assert_frame_equal(result, expected) @@ -2629,7 +2609,7 @@ def test_concat_empty_and_non_empty_series_regression(): tm.assert_series_equal(result, expected) -def test_concat_sorts_columns(sort_with_none): +def test_concat_sorts_columns(sort): # GH-4588 df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) df2 = pd.DataFrame({"a": [3, 4], "c": [5, 6]}) @@ -2640,22 +2620,16 @@ def test_concat_sorts_columns(sort_with_none): columns=["a", "b", "c"], ) - if sort_with_none is False: + if sort is False: expected = expected[["b", "a", "c"]] - if sort_with_none is None: - # only warn if not explicitly specified - ctx = tm.assert_produces_warning(FutureWarning) - else: - ctx = tm.assert_produces_warning(None) - # default - with ctx: - result = pd.concat([df1, df2], ignore_index=True, sort=sort_with_none) + with tm.assert_produces_warning(None): + result = pd.concat([df1, df2], ignore_index=True, sort=sort) tm.assert_frame_equal(result, expected) -def test_concat_sorts_index(sort_with_none): +def test_concat_sorts_index(sort): df1 = pd.DataFrame({"a": [1, 2, 3]}, index=["c", "a", "b"]) df2 = pd.DataFrame({"b": [1, 2]}, index=["a", "b"]) @@ -2663,22 +2637,16 @@ def test_concat_sorts_index(sort_with_none): expected = pd.DataFrame( {"a": [2, 3, 1], "b": [1, 2, None]}, index=["a", "b", "c"], columns=["a", "b"] ) - if sort_with_none is False: + if sort is False: expected = expected.loc[["c", "a", "b"]] - if sort_with_none is None: - # only warn if not explicitly specified - ctx = tm.assert_produces_warning(FutureWarning) - else: - ctx = tm.assert_produces_warning(None) - # Warn and sort by default - with ctx: - result = pd.concat([df1, df2], axis=1, sort=sort_with_none) + with tm.assert_produces_warning(None): + result = pd.concat([df1, df2], axis=1, sort=sort) tm.assert_frame_equal(result, expected) -def test_concat_inner_sort(sort_with_none): +def test_concat_inner_sort(sort): # https://github.com/pandas-dev/pandas/pull/20613 df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]}, columns=["b", "a", "c"]) df2 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[3, 4]) @@ -2686,12 +2654,10 @@ def test_concat_inner_sort(sort_with_none): with tm.assert_produces_warning(None): # unset sort should *not* warn for inner join # since that never sorted - result = pd.concat( - [df1, df2], sort=sort_with_none, join="inner", ignore_index=True - ) + result = pd.concat([df1, df2], sort=sort, join="inner", ignore_index=True) expected = pd.DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]}, columns=["b", "a"]) - if sort_with_none is True: + if sort is True: expected = expected[["a", "b"]] tm.assert_frame_equal(result, expected)