Skip to content

Commit

Permalink
Fix DataFrame.sort_index when a index is a MultiIndex (#14621)
Browse files Browse the repository at this point in the history
This PR fixes sorting of a MultiIndex by removing an existing hard-coded na_position value that was based on ascending flag, essentially ignoring the user-passed parameter.

On pandas_2.0_feature_branch:

= 501 failed, 101106 passed, 2071 skipped, 786 xfailed, 312 xpassed, 20 errors in 1234.91s (0:20:34) =
This PR:

= 405 failed, 101034 passed, 2071 skipped, 954 xfailed, 312 xpassed, 20 errors in 1124.69s (0:18:44) =
  • Loading branch information
galipremsagar authored Dec 13, 2023
1 parent 5f3ecd6 commit 72221b3
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 11 deletions.
2 changes: 0 additions & 2 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1611,8 +1611,6 @@ def sort_index(
idx = self.index
if isinstance(idx, MultiIndex):
if level is not None:
# Pandas doesn't handle na_position in case of MultiIndex.
na_position = "first" if ascending is True else "last"
if not is_list_like(level):
level = [level]
by = list(map(idx._get_level_label, level))
Expand Down
19 changes: 10 additions & 9 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3492,8 +3492,16 @@ def test_dataframe_sort_index(
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("na_position", ["first", "last"])
def test_dataframe_mulitindex_sort_index(
axis, level, ascending, inplace, ignore_index, na_position
request, axis, level, ascending, inplace, ignore_index, na_position
):
request.applymarker(
pytest.mark.xfail(
condition=axis in (1, "columns")
and ignore_index
and not (level is None and not ascending),
reason="https://github.com/pandas-dev/pandas/issues/56478",
)
)
pdf = pd.DataFrame(
{
"b": [1.0, 3.0, np.nan],
Expand All @@ -3505,17 +3513,14 @@ def test_dataframe_mulitindex_sort_index(
).set_index(["b", "a", 1])
gdf = cudf.DataFrame.from_pandas(pdf)

# ignore_index is supported in v.1.0

expected = pdf.sort_index(
axis=axis,
level=level,
ascending=ascending,
inplace=inplace,
na_position=na_position,
ignore_index=ignore_index,
)
if ignore_index is True:
expected = expected
got = gdf.sort_index(
axis=axis,
level=level,
Expand All @@ -3526,12 +3531,8 @@ def test_dataframe_mulitindex_sort_index(
)

if inplace is True:
if ignore_index is True:
pdf = pdf.reset_index(drop=True)
assert_eq(pdf, gdf)
else:
if ignore_index is True:
expected = expected.reset_index(drop=True)
assert_eq(expected, got)


Expand Down

0 comments on commit 72221b3

Please sign in to comment.