Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Index with right_index=True or left_index=True merge #34468

Closed
wants to merge 10 commits into from
15 changes: 6 additions & 9 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,8 +438,8 @@ def merge_asof(
>>> pd.merge_asof(left, right, left_index=True, right_index=True)
left_val right_val
1 a 1
5 b 3
10 c 7
3 b 3
7 c 7

Here is a real-world times-series example

Expand Down Expand Up @@ -906,23 +906,21 @@ def _get_join_info(self):
if self.right_index:
if len(self.left) > 0:
join_index = self._create_join_index(
self.left.index,
self.right.index,
left_indexer,
self.left.index,
right_indexer,
how="right",
how="left",
)
else:
join_index = self.right.index.take(right_indexer)
left_indexer = np.array([-1] * len(join_index))
elif self.left_index:
if len(self.right) > 0:
join_index = self._create_join_index(
self.right.index,
self.left.index,
right_indexer,
self.right.index,
left_indexer,
how="left",
how="right",
)
else:
join_index = self.left.index.take(left_indexer)
Expand All @@ -939,7 +937,6 @@ def _create_join_index(
index: Index,
other_index: Index,
indexer,
other_indexer,
how: str = "left",
):
"""
Expand Down
36 changes: 31 additions & 5 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
DataFrame,
DatetimeIndex,
Float64Index,
Index,
Int64Index,
IntervalIndex,
MultiIndex,
Expand Down Expand Up @@ -360,7 +361,9 @@ def test_handle_join_key_pass_array(self):

key = np.array([0, 1, 1, 2, 2, 3], dtype=np.int64)
merged = merge(left, right, left_index=True, right_on=key, how="outer")
tm.assert_series_equal(merged["key_0"], Series(key, name="key_0"))
tm.assert_series_equal(
merged["key_0"], Series(key, name="key_0", index=[0, 1, 1, 2, 2, np.nan])
)

def test_no_overlap_more_informative_error(self):
dt = datetime.now()
Expand Down Expand Up @@ -435,7 +438,7 @@ def test_merge_left_empty_right_empty(self, join_type, kwarg):

exp_in = DataFrame(
columns=["a", "b", "c", "x", "y", "z"],
index=pd.Index([], dtype=object),
index=Index([], dtype=object),
dtype=object,
)

Expand Down Expand Up @@ -471,7 +474,10 @@ def check1(exp, kwarg):
def check2(exp, kwarg):
result = pd.merge(left, right, how="right", **kwarg)
tm.assert_frame_equal(result, exp)

def check3(exp, kwarg, index):
result = pd.merge(left, right, how="outer", **kwarg)
exp.index = index
tm.assert_frame_equal(result, exp)

for kwarg in [
Expand All @@ -481,6 +487,13 @@ def check2(exp, kwarg):
check1(exp_in, kwarg)
check2(exp_out, kwarg)

check3(exp_out, dict(left_index=True, right_index=True), exp_out.index)
check3(
exp_out.copy(),
dict(left_index=True, right_on="x"),
Index([np.nan, np.nan, np.nan]),
)

kwarg = dict(left_on="a", right_index=True)
check1(exp_in, kwarg)
exp_out["a"] = [0, 1, 2]
Expand Down Expand Up @@ -1311,7 +1324,7 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index):
],
columns=["a", "key", "b"],
)
expected.set_index(expected_index, inplace=True)
expected.set_index(df2.index, inplace=True)
tm.assert_frame_equal(result, expected)

def test_merge_right_index_right(self):
Expand All @@ -1324,7 +1337,7 @@ def test_merge_right_index_right(self):
expected = DataFrame(
{"a": [1, 2, 3, None], "key": [0, 1, 1, 2], "b": [1, 2, 2, 3]},
columns=["a", "key", "b"],
index=[0, 1, 2, np.nan],
index=[0, 1, 1, 2],
)
result = left.merge(right, left_on="key", right_index=True, how="right")
tm.assert_frame_equal(result, expected)
Expand Down Expand Up @@ -1357,7 +1370,7 @@ def test_merge_take_missing_values_from_index_of_other_dtype(self):
"key": Categorical(["a", "a", "b", "c"]),
"b": [1, 1, 2, 3],
},
index=[0, 1, 2, np.nan],
index=Categorical(["a", "a", "b", "c"]),
)
expected = expected.reindex(columns=["a", "key", "b"])
tm.assert_frame_equal(result, expected)
Expand Down Expand Up @@ -2349,3 +2362,16 @@ def test_merge_join_cols_error_reporting_on_and_index(func, kwargs):
)
with pytest.raises(MergeError, match=msg):
getattr(pd, func)(left, right, on="a", **kwargs)


@pytest.mark.parametrize(
("index", "how", "values"),
[(Index(["a", "b"]), "left", ["a", "b"]), (Index([0, 1]), "right", ["a", "c"])],
)
def test_left_index_true_left_and_righ_join_target_index(index, how, values):
left = DataFrame(index=["a", "b"])
right = DataFrame({"x": ["a", "c"]})

result = merge(left, right, left_index=True, right_on="x", how=how)
expected = DataFrame({"x": values}, index=index)
tm.assert_frame_equal(result, expected)
5 changes: 4 additions & 1 deletion pandas/tests/reshape/merge/test_merge_index_as_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,5 +185,8 @@ def test_join_indexes_and_columns_on(df1, df2, left_index, join_type):
result = left_df.join(
right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y"
)

if join_type == "right" and left_index == "inner":
result.index = result.index.droplevel("outer")
if join_type == "outer" and left_index == "inner":
result.index = result.index.droplevel(0)
tm.assert_frame_equal(result, expected, check_like=True)