From 91477b6a5211b9e1dcb3c843444f9cc1b27b3602 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Mon, 28 Aug 2017 23:55:45 +0200 Subject: [PATCH] BUG: make order of index from pd.concat deterministic closes #17344 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/common.py | 14 ++++++++++++++ pandas/core/indexes/api.py | 9 ++------- pandas/tests/reshape/test_concat.py | 9 +++++++++ 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index fcadd26156b1d4..3049b233c6c2da 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -405,6 +405,7 @@ Reshaping - Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) - :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`). - Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) +- Bug in :func:`concat` which would not respect the order of the index along the common dimension (:issue:`17344`) Numeric ^^^^^^^ diff --git a/pandas/core/common.py b/pandas/core/common.py index 44cb36b8a32076..515a4010961205 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -629,3 +629,17 @@ def _random_state(state=None): else: raise ValueError("random_state must be an integer, a numpy " "RandomState, or None") + + +def _get_distinct_objs(objs): + """ + Return a list with distinct elements of "objs" (different ids). + Preserves order. + """ + ids = set() + res = [] + for obj in objs: + if not id(obj) in ids: + ids.add(id(obj)) + res.append(obj) + return res diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index db73a6878258ad..323d50166e7b6f 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -23,8 +23,7 @@ 'PeriodIndex', 'DatetimeIndex', '_new_Index', 'NaT', '_ensure_index', '_get_na_value', '_get_combined_index', - '_get_objs_combined_axis', - '_get_distinct_indexes', '_union_indexes', + '_get_objs_combined_axis', '_union_indexes', '_get_consensus_names', '_all_indexes_same'] @@ -41,7 +40,7 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0): def _get_combined_index(indexes, intersect=False): # TODO: handle index names! - indexes = _get_distinct_indexes(indexes) + indexes = com._get_distinct_objs(indexes) if len(indexes) == 0: return Index([]) if len(indexes) == 1: @@ -55,10 +54,6 @@ def _get_combined_index(indexes, intersect=False): return _ensure_index(union) -def _get_distinct_indexes(indexes): - return list(dict((id(x), x) for x in indexes).values()) - - def _union_indexes(indexes): if len(indexes) == 0: raise AssertionError('Must have at least 1 Index to union') diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 52cd18126859a1..61c68f856ab2ae 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1944,6 +1944,15 @@ def test_concat_categoricalindex(self): index=exp_idx) tm.assert_frame_equal(result, exp) + def test_concat_order(self): + # GH 17344 + dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])] + dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a']) + for i in range(100)] + result = pd.concat(dfs).columns + expected = dfs[0].columns + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) @pytest.mark.parametrize('dt', np.sctypes['float'])