From 7dc4edb2e2e945ea028d1e923732b30ac2bed92c Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17+GitHub@gmail.com>
Date: Sat, 3 Nov 2018 06:56:52 -0700
Subject: [PATCH] ENH: Add FrozenList.union and .difference (#23394)

Re-attempt of gh-15506.

Closes gh-15475.
---
 doc/source/groupby.rst              | 10 +++++++
 doc/source/whatsnew/v0.24.0.txt     |  5 ++--
 pandas/core/indexes/frozen.py       | 42 +++++++++++++++++++++++++----
 pandas/tests/indexes/test_frozen.py | 23 +++++++++++++---
 4 files changed, 69 insertions(+), 11 deletions(-)

diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index fecc336049a40b..0a896bac0f2d72 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -125,6 +125,16 @@ We could naturally group by either the ``A`` or ``B`` columns, or both:
    grouped = df.groupby('A')
    grouped = df.groupby(['A', 'B'])
 
+.. versionadded:: 0.24
+
+If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all
+but the specified columns
+
+.. ipython:: python
+
+   df2 = df.set_index(['A', 'B'])
+   grouped = df2.groupby(level=df2.index.names.difference(['B'])
+
 These will split the DataFrame on its index (rows). We could also split by the
 columns:
 
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 64cc098ccaa94d..b19003e1c12846 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -13,10 +13,9 @@ v0.24.0 (Month XX, 2018)
 New features
 ~~~~~~~~~~~~
 - :func:`merge` now directly allows merge between objects of type ``DataFrame`` and named ``Series``, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`)
-
-
 - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
-
+- ``FrozenList`` has gained the ``.union()`` and ``.difference()`` methods. This functionality greatly simplifies groupby's that rely on explicitly excluding certain columns. See :ref:`Splitting an object into groups
+<groupby.split>` for more information (:issue:`15475`, :issue:`15506`)
 - :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing
 the user to override the engine's default behavior to include or omit the
 dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py
index 4f782e22c2370d..3ac4a2bf31a7e2 100644
--- a/pandas/core/indexes/frozen.py
+++ b/pandas/core/indexes/frozen.py
@@ -23,15 +23,47 @@ class FrozenList(PandasObject, list):
     because it's technically non-hashable, will be used
     for lookups, appropriately, etc.
     """
-    # Sidenote: This has to be of type list, otherwise it messes up PyTables
-    #           typechecks
+    # Side note: This has to be of type list. Otherwise,
+    #            it messes up PyTables type checks.
 
-    def __add__(self, other):
+    def union(self, other):
+        """
+        Returns a FrozenList with other concatenated to the end of self.
+
+        Parameters
+        ----------
+        other : array-like
+            The array-like whose elements we are concatenating.
+
+        Returns
+        -------
+        diff : FrozenList
+            The collection difference between self and other.
+        """
         if isinstance(other, tuple):
             other = list(other)
-        return self.__class__(super(FrozenList, self).__add__(other))
+        return type(self)(super(FrozenList, self).__add__(other))
+
+    def difference(self, other):
+        """
+        Returns a FrozenList with elements from other removed from self.
+
+        Parameters
+        ----------
+        other : array-like
+            The array-like whose elements we are removing self.
+
+        Returns
+        -------
+        diff : FrozenList
+            The collection difference between self and other.
+        """
+        other = set(other)
+        temp = [x for x in self if x not in other]
+        return type(self)(temp)
 
-    __iadd__ = __add__
+    # TODO: Consider deprecating these in favor of `union` (xref gh-15506)
+    __add__ = __iadd__ = union
 
     # Python 2 compat
     def __getslice__(self, i, j):
diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py
index e62329dec98463..db9f875b77b8a9 100644
--- a/pandas/tests/indexes/test_frozen.py
+++ b/pandas/tests/indexes/test_frozen.py
@@ -11,7 +11,7 @@ class TestFrozenList(CheckImmutable, CheckStringMixin):
     mutable_methods = ('extend', 'pop', 'remove', 'insert')
     unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"])
 
-    def setup_method(self, method):
+    def setup_method(self, _):
         self.lst = [1, 2, 3, 4, 5]
         self.container = FrozenList(self.lst)
         self.klass = FrozenList
@@ -25,13 +25,30 @@ def test_add(self):
         expected = FrozenList([1, 2, 3] + self.lst)
         self.check_result(result, expected)
 
-    def test_inplace(self):
+    def test_iadd(self):
         q = r = self.container
+
         q += [5]
         self.check_result(q, self.lst + [5])
-        # other shouldn't be mutated
+
+        # Other shouldn't be mutated.
         self.check_result(r, self.lst)
 
+    def test_union(self):
+        result = self.container.union((1, 2, 3))
+        expected = FrozenList(self.lst + [1, 2, 3])
+        self.check_result(result, expected)
+
+    def test_difference(self):
+        result = self.container.difference([2])
+        expected = FrozenList([1, 3, 4, 5])
+        self.check_result(result, expected)
+
+    def test_difference_dupe(self):
+        result = FrozenList([1, 2, 3, 2]).difference([2])
+        expected = FrozenList([1, 3])
+        self.check_result(result, expected)
+
 
 class TestFrozenNDArray(CheckImmutable, CheckStringMixin):
     mutable_methods = ('put', 'itemset', 'fill')