Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: change pd.concat sort=None to sort=False #29786

Merged
merged 2 commits into from
Nov 25, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
- Removed previously deprecated "order" argument from :func:`factorize` (:issue:`19751`)
- Removed previously deprecated "v" argument from :meth:`FrozenNDarray.searchsorted`, use "value" instead (:issue:`22672`)
- :func:`read_stata` and :meth:`DataFrame.to_stata` no longer supports the "encoding" argument (:issue:`21400`)
- In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`)
- Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`)
- Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`)
-
Expand Down
7 changes: 0 additions & 7 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import textwrap
from typing import List, Set
import warnings

from pandas._libs import NaT, lib

Expand Down Expand Up @@ -211,12 +210,6 @@ def conv(i):
index = indexes[0]
for other in indexes[1:]:
if not index.equals(other):

if sort is None:
# TODO: remove once pd.concat sort default changes
warnings.warn(_sort_msg, FutureWarning, stacklevel=8)
sort = True

return _unique_indices(indexes)

name = get_consensus_names(indexes)[0]
Expand Down
12 changes: 4 additions & 8 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def concat(
levels=None,
names=None,
verify_integrity: bool = False,
sort=None,
sort: bool = False,
copy: bool = True,
):
"""
Expand Down Expand Up @@ -82,18 +82,14 @@ def concat(
verify_integrity : bool, default False
Check whether the new concatenated axis contains duplicates. This can
be very expensive relative to the actual data concatenation.
sort : bool, default None
sort : bool, default False
Sort non-concatenation axis if it is not already aligned when `join`
is 'outer'. The current default of sorting is deprecated and will
change to not-sorting in a future version of pandas.

Explicitly pass ``sort=True`` to silence the warning and sort.
Explicitly pass ``sort=False`` to silence the warning and not sort.

is 'outer'.
This has no effect when ``join='inner'``, which already preserves
the order of the non-concatenation axis.

.. versionadded:: 0.23.0
.. versionchanged:: 1.0.0
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

copy : bool, default True
If False, do not copy data unnecessarily.
Expand Down
10 changes: 2 additions & 8 deletions pandas/tests/frame/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def test_join_left_sequence_non_unique_index():
tm.assert_frame_equal(joined, expected)


@pytest.mark.parametrize("sort_kw", [True, False, None])
@pytest.mark.parametrize("sort_kw", [True, False])
def test_suppress_future_warning_with_sort_kw(sort_kw):
a = DataFrame({"col1": [1, 2]}, index=["c", "a"])

Expand All @@ -213,12 +213,6 @@ def test_suppress_future_warning_with_sort_kw(sort_kw):
if sort_kw is False:
expected = expected.reindex(index=["c", "a", "b"])

if sort_kw is None:
# only warn if not explicitly specified
ctx = tm.assert_produces_warning(FutureWarning, check_stacklevel=False)
else:
ctx = tm.assert_produces_warning(None, check_stacklevel=False)

with ctx:
with tm.assert_produces_warning(None, check_stacklevel=False):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We fail on warnings now right (?). I think could probably get rid of this altogether

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we fail on warnings for all builds, or just the one?

result = a.join([b, c], how="outer", sort=sort_kw)
tm.assert_frame_equal(result, expected)
68 changes: 17 additions & 51 deletions pandas/tests/reshape/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,6 @@ def sort(request):
return request.param


@pytest.fixture(params=[True, False, None])
def sort_with_none(request):
"""Boolean sort keyword for concat and DataFrame.append.

Includes the default of None
"""
# TODO: Replace with sort once keyword changes.
return request.param


class TestConcatAppendCommon:
"""
Test common dtype coercion rules between concat and append.
Expand Down Expand Up @@ -775,15 +765,13 @@ def test_concat_join_axes_deprecated(self, axis):
)

expected = pd.concat([one, two], axis=1, sort=False).reindex(index=two.index)
with tm.assert_produces_warning(expected_warning=FutureWarning):
result = pd.concat([one, two], axis=1, sort=False, join_axes=[two.index])
result = pd.concat([one, two], axis=1, sort=False, join_axes=[two.index])
tm.assert_frame_equal(result, expected)

expected = pd.concat([one, two], axis=0, sort=False).reindex(
columns=two.columns
)
with tm.assert_produces_warning(expected_warning=FutureWarning):
result = pd.concat([one, two], axis=0, sort=False, join_axes=[two.columns])
result = pd.concat([one, two], axis=0, sort=False, join_axes=[two.columns])
tm.assert_frame_equal(result, expected)


Expand Down Expand Up @@ -875,27 +863,19 @@ def test_append_records(self):
tm.assert_frame_equal(result, expected)

# rewrite sort fixture, since we also want to test default of None
def test_append_sorts(self, sort_with_none):
def test_append_sorts(self, sort):
df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
df2 = pd.DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3])

if sort_with_none is None:
# only warn if not explicitly specified
# don't check stacklevel since its set for concat, and append
# has an extra stack.
ctx = tm.assert_produces_warning(FutureWarning, check_stacklevel=False)
else:
ctx = tm.assert_produces_warning(None)

with ctx:
result = df1.append(df2, sort=sort_with_none)
with tm.assert_produces_warning(None):
result = df1.append(df2, sort=sort)

# for None / True
expected = pd.DataFrame(
{"b": [1, 2, None, None], "a": [1, 2, 1, 2], "c": [None, None, 3, 4]},
columns=["a", "b", "c"],
)
if sort_with_none is False:
if sort is False:
expected = expected[["b", "a", "c"]]
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -2629,7 +2609,7 @@ def test_concat_empty_and_non_empty_series_regression():
tm.assert_series_equal(result, expected)


def test_concat_sorts_columns(sort_with_none):
def test_concat_sorts_columns(sort):
# GH-4588
df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
df2 = pd.DataFrame({"a": [3, 4], "c": [5, 6]})
Expand All @@ -2640,58 +2620,44 @@ def test_concat_sorts_columns(sort_with_none):
columns=["a", "b", "c"],
)

if sort_with_none is False:
if sort is False:
expected = expected[["b", "a", "c"]]

if sort_with_none is None:
# only warn if not explicitly specified
ctx = tm.assert_produces_warning(FutureWarning)
else:
ctx = tm.assert_produces_warning(None)

# default
with ctx:
result = pd.concat([df1, df2], ignore_index=True, sort=sort_with_none)
with tm.assert_produces_warning(None):
result = pd.concat([df1, df2], ignore_index=True, sort=sort)
tm.assert_frame_equal(result, expected)


def test_concat_sorts_index(sort_with_none):
def test_concat_sorts_index(sort):
df1 = pd.DataFrame({"a": [1, 2, 3]}, index=["c", "a", "b"])
df2 = pd.DataFrame({"b": [1, 2]}, index=["a", "b"])

# For True/None
expected = pd.DataFrame(
{"a": [2, 3, 1], "b": [1, 2, None]}, index=["a", "b", "c"], columns=["a", "b"]
)
if sort_with_none is False:
if sort is False:
expected = expected.loc[["c", "a", "b"]]

if sort_with_none is None:
# only warn if not explicitly specified
ctx = tm.assert_produces_warning(FutureWarning)
else:
ctx = tm.assert_produces_warning(None)

# Warn and sort by default
with ctx:
result = pd.concat([df1, df2], axis=1, sort=sort_with_none)
with tm.assert_produces_warning(None):
result = pd.concat([df1, df2], axis=1, sort=sort)
tm.assert_frame_equal(result, expected)


def test_concat_inner_sort(sort_with_none):
def test_concat_inner_sort(sort):
# https://github.com/pandas-dev/pandas/pull/20613
df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]}, columns=["b", "a", "c"])
df2 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[3, 4])

with tm.assert_produces_warning(None):
# unset sort should *not* warn for inner join
# since that never sorted
result = pd.concat(
[df1, df2], sort=sort_with_none, join="inner", ignore_index=True
)
result = pd.concat([df1, df2], sort=sort, join="inner", ignore_index=True)

expected = pd.DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]}, columns=["b", "a"])
if sort_with_none is True:
if sort is True:
expected = expected[["a", "b"]]
tm.assert_frame_equal(result, expected)

Expand Down