From b50df637c4923434559edb53690065440fe7a19a Mon Sep 17 00:00:00 2001 From: Ian Date: Fri, 2 Jun 2017 15:53:20 -0400 Subject: [PATCH 1/7] default to no sort on join --- pandas/core/indexes/period.py | 6 ++++-- pandas/tests/indexes/period/test_period.py | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 9d1a49e13c804..0915462d4d421 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -912,14 +912,16 @@ def insert(self, loc, item): self[loc:].asi8)) return self._shallow_copy(idx) - def join(self, other, how='left', level=None, return_indexers=False): + def join(self, other, how='left', level=None, return_indexers=False, + sort=False): """ See Index.join """ self._assert_can_do_setop(other) result = Int64Index.join(self, other, how=how, level=level, - return_indexers=return_indexers) + return_indexers=return_indexers, + sort=sort) if return_indexers: result, lidx, ridx = result diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 6f73e7c15e4d9..291ca317f8fae 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -773,3 +773,9 @@ def test_map(self): result = index.map(lambda x: x.ordinal) exp = Index([x.ordinal for x in index]) tm.assert_index_equal(result, exp) + + @pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right']) + def test_join_self(self, how): + index = period_range('1/1/2000', periods=10) + joined = index.join(index, how=how) + assert index is joined From 0a13bdb7d08d1925099519962ba5cf1832fb626e Mon Sep 17 00:00:00 2001 From: Ian Date: Thu, 8 Jun 2017 15:07:12 -0400 Subject: [PATCH 2/7] add join to test to common and fix breaks for timedeltas and non-unique categorical indices. --- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/timedeltas.py | 6 ++++-- pandas/tests/indexes/common.py | 13 ++++++++++++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8a4878d9cfbcf..cefb080a3ee78 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3126,7 +3126,7 @@ def _join_non_unique(self, other, how='left', return_indexers=False): left_idx = _ensure_platform_int(left_idx) right_idx = _ensure_platform_int(right_idx) - join_index = self.values.take(left_idx) + join_index = np.asarray(self.values.take(left_idx)) mask = left_idx == -1 np.putmask(join_index, mask, other._values.take(right_idx)) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index c025c74625972..faec813df3993 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -516,7 +516,8 @@ def union(self, other): result.freq = to_offset(result.inferred_freq) return result - def join(self, other, how='left', level=None, return_indexers=False): + def join(self, other, how='left', level=None, return_indexers=False, + sort=False): """ See Index.join """ @@ -527,7 +528,8 @@ def join(self, other, how='left', level=None, return_indexers=False): pass return Index.join(self, other, how=how, level=level, - return_indexers=return_indexers) + return_indexers=return_indexers, + sort=sort) def _wrap_joined_index(self, joined, other): name = self.name if self.name == other.name else None diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index a6177104d6273..c95735004ad65 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -905,7 +905,7 @@ def test_fillna(self): def test_nulls(self): # this is really a smoke test for the methods - # as these are adequantely tested for function elsewhere + # as these are adequately tested for function elsewhere for name, index in self.indices.items(): if len(index) == 0: @@ -933,3 +933,14 @@ def test_empty(self): index = self.create_index() assert not index.empty assert index[:0].empty + + @pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right']) + def test_join_self(self, how): + index = self.create_index() + joined = index.join(index, how=how) + + if index.is_unique: + assert index is joined + + else: + assert isinstance(joined, type(index)) From 24b83f8a562ecfab6d1ca97b89e4e8f51fcbb027 Mon Sep 17 00:00:00 2001 From: ian erb Date: Sun, 2 Jul 2017 20:49:40 -0400 Subject: [PATCH 3/7] explicitly test only unique indices --- pandas/tests/indexes/common.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index c95735004ad65..e7ca435dca1c1 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -935,12 +935,8 @@ def test_empty(self): assert index[:0].empty @pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right']) - def test_join_self(self, how): + def test_join_self_unique(self, how): index = self.create_index() - joined = index.join(index, how=how) - if index.is_unique: - assert index is joined - - else: - assert isinstance(joined, type(index)) + joined = index.join(index, how=how) + assert (index == joined).all() From 93ca23c09c820e6d7188733000ae995677a736ba Mon Sep 17 00:00:00 2001 From: Ian Date: Thu, 6 Jul 2017 09:08:38 -0400 Subject: [PATCH 4/7] edits for whatsnew doc --- doc/source/whatsnew/v0.20.3.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index ce7e8be16d8e2..2cf995b64aff1 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -82,7 +82,7 @@ Sparse Reshaping ^^^^^^^^^ - +- PeriodIndex / TimedeltaIndex.join was missing the sort= kwarg (:issue: `16541`) - Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`). - Bug in :func:`merge` when merging/joining with multiple categorical columns (:issue:`16767`) From e45711f33e06d80afd7dca409e3fc68c5a562e1b Mon Sep 17 00:00:00 2001 From: Ian Date: Thu, 6 Jul 2017 20:09:05 -0400 Subject: [PATCH 5/7] backticks in whatsnew doc --- doc/source/whatsnew/v0.20.3.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 2cf995b64aff1..8744bc4f70026 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -82,7 +82,7 @@ Sparse Reshaping ^^^^^^^^^ -- PeriodIndex / TimedeltaIndex.join was missing the sort= kwarg (:issue: `16541`) +- ``PeriodIndex`` / ``TimedeltaIndex.join`` was missing the ``sort=`` kwarg (:issue:`16541`) - Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`). - Bug in :func:`merge` when merging/joining with multiple categorical columns (:issue:`16767`) From 25030fbd5415134ec51868f8527a713905e9f604 Mon Sep 17 00:00:00 2001 From: Ian Date: Thu, 6 Jul 2017 22:38:54 -0400 Subject: [PATCH 6/7] add specific join of two period-index dataframes --- pandas/tests/frame/test_join.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 21807cb42aa6e..150ede3fb89bf 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -3,10 +3,17 @@ import pytest import numpy as np -from pandas import DataFrame, Index +from pandas import DataFrame, Index, PeriodIndex from pandas.tests.frame.common import TestData import pandas.util.testing as tm +@pytest.fixture +def frame_with_period_index(): + return DataFrame( + data=np.arange(20).reshape(4,5), + columns=list('abcde'), + index=PeriodIndex(start='2000', freq='A', periods=4)) + @pytest.fixture def frame(): @@ -139,3 +146,22 @@ def test_join_overlap(frame): # column order not necessarily sorted tm.assert_frame_equal(joined, expected.loc[:, joined.columns]) + + +def test_join_period_index(frame_with_period_index): + other = frame_with_period_index.rename( + columns=lambda x: '{key}{key}'.format(key=x)) + + joined_values = np.concatenate( + [frame_with_period_index.values] * 2, axis=1) + + joined_cols = frame_with_period_index.columns.append(other.columns) + + joined = frame_with_period_index.join(other) + expected = DataFrame( + data=joined_values, + columns=joined_cols, + index=frame_with_period_index.index) + + tm.assert_frame_equal(joined, expected) + From 2aea2b54ae09a9e8fcbdee904c3ed261617e762c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 7 Jul 2017 06:28:56 -0400 Subject: [PATCH 7/7] fix linting --- pandas/tests/frame/test_join.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 150ede3fb89bf..afecba2026dd7 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -7,10 +7,11 @@ from pandas.tests.frame.common import TestData import pandas.util.testing as tm + @pytest.fixture def frame_with_period_index(): return DataFrame( - data=np.arange(20).reshape(4,5), + data=np.arange(20).reshape(4, 5), columns=list('abcde'), index=PeriodIndex(start='2000', freq='A', periods=4)) @@ -164,4 +165,3 @@ def test_join_period_index(frame_with_period_index): index=frame_with_period_index.index) tm.assert_frame_equal(joined, expected) -