-
-
Notifications
You must be signed in to change notification settings - Fork 17.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG: DataFrame.merge(suffixes=) does not respect None #24819
Changes from 8 commits
82c52a4
dd605e0
af7f9ad
4d5e1a9
3f65bf1
ce7e4b8
90ca9cd
e995a04
9c3dfbd
441e9a5
71729b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1971,16 +1971,19 @@ def items_overlap_with_suffix(left, lsuffix, right, rsuffix): | |
raise ValueError('columns overlap but no suffix specified: ' | ||
'{rename}'.format(rename=to_rename)) | ||
|
||
def lrenamer(x): | ||
if x in to_rename: | ||
return '{x}{lsuffix}'.format(x=x, lsuffix=lsuffix) | ||
return x | ||
def renamer(x, suffix): | ||
"""Rename the left and right indices. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you make a proper doc-string (Parameters / Returns) |
||
|
||
def rrenamer(x): | ||
if x in to_rename: | ||
return '{x}{rsuffix}'.format(x=x, rsuffix=rsuffix) | ||
If there is overlap, and suffix is not None, add | ||
suffix, otherwise, leave it as-is. | ||
""" | ||
if x in to_rename and suffix is not None: | ||
return '{x}{suffix}'.format(x=x, suffix=suffix) | ||
return x | ||
|
||
lrenamer = partial(renamer, suffix=lsuffix) | ||
rrenamer = partial(renamer, suffix=rsuffix) | ||
|
||
return (_transform_index(left, lrenamer), | ||
_transform_index(right, rrenamer)) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -159,9 +159,13 @@ def merge_ordered(left, right, on=None, | |
left DataFrame | ||
fill_method : {'ffill', None}, default None | ||
Interpolation method for data | ||
suffixes : 2-length sequence (tuple, list, ...) | ||
Suffix to apply to overlapping column names in the left and right | ||
side, respectively | ||
suffixes : Sequence, default is ("_x", "_y") | ||
A length-2 sequence where each element is optionally a string | ||
indicating the suffix to add to overlapping column names in | ||
`left` and `right` respectively. Pass a value of `None` instead | ||
of a string to indicate that the column name from `left` or | ||
`right` should be left as-is, with no suffix. At least one of the | ||
values must not be None. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a versionchanged 0.25.0 here |
||
how : {'left', 'right', 'outer', 'inner'}, default 'outer' | ||
* left: use only keys from left frame (SQL: left outer join) | ||
* right: use only keys from right frame (SQL: right outer join) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1526,3 +1526,50 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): | |
with pytest.raises(ValueError, match=msg): | ||
result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on, | ||
left_index=left_index, right_index=right_index) | ||
|
||
|
||
@pytest.mark.parametrize("col1, col2, kwargs, expected_cols", [ | ||
(0, 0, dict(suffixes=("", "_dup")), ["0", "0_dup"]), | ||
(0, 0, dict(suffixes=(None, "_dup")), [0, "0_dup"]), | ||
(0, 0, dict(suffixes=("_x", "_y")), ["0_x", "0_y"]), | ||
("a", 0, dict(suffixes=(None, "_y")), ["a", 0]), | ||
(0.0, 0.0, dict(suffixes=("_x", None)), ["0.0_x", 0.0]), | ||
("b", "b", dict(suffixes=(None, "_y")), ["b", "b_y"]), | ||
("a", "a", dict(suffixes=("_x", None)), ["a_x", "a"]), | ||
("a", "b", dict(suffixes=("_x", None)), ["a", "b"]), | ||
("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]), | ||
(0, 0, dict(suffixes=["_a", None]), ["0_a", 0]), | ||
("a", "a", dict(), ["a_x", "a_y"]), | ||
(0, 0, dict(), ["0_x", "0_y"]) | ||
]) | ||
def test_merge_suffix(col1, col2, kwargs, expected_cols): | ||
# issue: 24782 | ||
a = pd.DataFrame({col1: [1, 2, 3]}) | ||
jschendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
b = pd.DataFrame({col2: [4, 5, 6]}) | ||
|
||
expected = pd.DataFrame([[1, 4], [2, 5], [3, 6]], | ||
columns=expected_cols) | ||
|
||
result = a.merge(b, left_index=True, right_index=True, **kwargs) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
result = pd.merge(a, b, left_index=True, right_index=True, **kwargs) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
@pytest.mark.parametrize("col1, col2, suffixes", [ | ||
("a", "a", [None, None]), | ||
("a", "a", (None, None)), | ||
("a", "a", ("", None)), | ||
(0, 0, [None, None]), | ||
(0, 0, (None, "")) | ||
]) | ||
def test_merge_error(col1, col2, suffixes): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test_merge_suffix_error |
||
# issue: 24782 | ||
a = pd.DataFrame({col1: [1, 2, 3]}) | ||
b = pd.DataFrame({col2: [3, 4, 5]}) | ||
|
||
# TODO: might reconsider current raise behaviour, see issue 24782 | ||
msg = "columns overlap but no suffix specified" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add here a comment like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks, added! |
||
with pytest.raises(ValueError, match=msg): | ||
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you be a bit more clear on what the previous sympton was, instead of 'doesn't work correctly'
double backticks on None
no space after the colon
(:issue:`24782`)