pandas-dev · jorisvandenbossche · Aug 20, 2018 · Aug 14, 2018 · Aug 14, 2018 · Aug 15, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -442,6 +442,7 @@ ExtensionType Changes
 - ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
 - ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
   the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
+- Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`).
 - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
 - Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
 - :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -45,6 +45,10 @@ def is_signed_integer(self):
     def is_unsigned_integer(self):
         return self.kind == 'u'
 
+    @property
+    def _is_numeric(self):
+        return True
+
     @cache_readonly
     def numpy_dtype(self):
         """ Return an instance of our numpy dtype """

diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
@@ -94,6 +94,18 @@ def is_dtype(cls, dtype):
         except TypeError:
             return False
 
+    @property
+    def _is_numeric(self):
+        # type: () -> bool
+        """
+        Whether columns with this dtype should be considered numeric.
+
+        By default ExtensionDtypes are assumed to be non-numeric.
+        They'll be excluded from operations that exclude non-numeric
+        columns, like groupby reductions.
+        """
+        return False
+
 
 class ExtensionDtype(_DtypeOpsMixin):
     """A custom data type, to be paired with an ExtensionArray.
@@ -109,6 +121,11 @@ class ExtensionDtype(_DtypeOpsMixin):
     * name
     * construct_from_string
 
+    The following attributes influence the behavior of the dtype in
+    pandas operations
+
+    * _is_numeric
+
     Optionally one can override construct_array_type for construction
     with the name of this dtype via the Registry
 

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -669,7 +669,9 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
             newb = self.copy() if copy else self
 
         if newb.is_numeric and self.is_numeric:
-            if newb.shape != self.shape:
+            # use values.shape, rather than newb.shape, as newb.shape
+            # may be incorrect for ExtensionBlocks.
+            if values.shape != self.shape:
 # Maybe infer ndim from placement 
 if ndim is None: 
     if len(placement) != 1: 
         ndim = 1 
     else: 
         ndim = 2 
 # Maybe infer ndim from placement 
 if ndim is None: 
     if len(placement) != 1: 
         ndim = 1 
     else: 
         ndim = 2 
                 raise TypeError(
                     "cannot set astype for copy = [{copy}] for dtype "
                     "({dtype} [{itemsize}]) with smaller itemsize than "
@@ -1947,6 +1949,10 @@ def is_view(self):
         """Extension arrays are never treated as views."""
         return False
 
+    @property
+    def is_numeric(self):
+        return self.values.dtype._is_numeric
+
     def setitem(self, indexer, value, mgr=None):
         """Set the value inplace, returning a same-typed block.
 

diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
@@ -67,3 +67,16 @@ def test_groupby_extension_apply(self, data_for_grouping, op):
         df.groupby("B").A.apply(op)
         df.groupby("A").apply(op)
         df.groupby("A").B.apply(op)
+
+    def test_in_numeric_groupby(self, data_for_grouping):
+        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
+                           "B": data_for_grouping,
+                           "C": [1, 1, 1, 1, 1, 1, 1, 1]})
+        result = df.groupby("A").sum().columns
+
+        if data_for_grouping.dtype._is_numeric:
+            expected = pd.Index(['B', 'C'])
+        else:
+            expected = pd.Index(['C'])
+
+        tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py
@@ -67,3 +67,7 @@ def test_no_values_attribute(self, data):
         # code, disallowing this for now until solved
         assert not hasattr(data, 'values')
         assert not hasattr(data, '_values')
+
+    def test_is_numeric_honored(self, data):
+        result = pd.Series(data)
+        assert result._data.blocks[0].is_numeric is data.dtype._is_numeric
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
@@ -33,6 +33,10 @@ def construct_from_string(cls, string):
             raise TypeError("Cannot construct a '{}' from "
                             "'{}'".format(cls, string))
 
+    @property
+    def _is_numeric(self):
+        return True
+
 
 class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin):
     dtype = DecimalDtype()

diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
@@ -697,6 +697,22 @@ def test_cross_type_arithmetic():
     tm.assert_series_equal(result, expected)
 
 
+def test_groupby_mean_included():
+    df = pd.DataFrame({
+        "A": ['a', 'b', 'b'],
+        "B": [1, None, 3],
+        "C": IntegerArray([1, None, 3], dtype='Int64'),
+    })
+
+    result = df.groupby("A").sum()
+    # TODO(#22346): preserve Int64 dtype
+    expected = pd.DataFrame({
+        "B": np.array([1.0, 3.0]),
+        "C": np.array([1, 3], dtype="int64")
+    }, index=pd.Index(['a', 'b'], name='A'))
+    tm.assert_frame_equal(result, expected)
+
+
 # TODO(jreback) - these need testing / are broken
 
 # shift