From 3464e95064ad1c1d4ac9d37e3d381215165a8ffe Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 17 Apr 2020 09:24:35 +0200
Subject: [PATCH 01/13] initial find_common_type/_get_common_type + tests for
 IntegerDtype

---
 pandas/core/arrays/integer.py              | 13 +++++++++--
 pandas/core/dtypes/base.py                 | 27 ++++++++++++++++++++++
 pandas/core/dtypes/cast.py                 |  7 +++++-
 pandas/core/dtypes/concat.py               | 24 ++++++++++++++++---
 pandas/tests/arrays/integer/test_concat.py | 26 +++++++++++++++++++++
 5 files changed, 91 insertions(+), 6 deletions(-)
 create mode 100644 pandas/tests/arrays/integer/test_concat.py

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 5605b3fbc5dfa..1f434f0d18db0 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -1,11 +1,11 @@
 import numbers
-from typing import TYPE_CHECKING, Tuple, Type, Union
+from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union
 import warnings
 
 import numpy as np
 
 from pandas._libs import lib, missing as libmissing
-from pandas._typing import ArrayLike
+from pandas._typing import ArrayLike, DtypeObj
 from pandas.compat import set_function_name
 from pandas.util._decorators import cache_readonly
 
@@ -95,6 +95,15 @@ def construct_array_type(cls) -> Type["IntegerArray"]:
         """
         return IntegerArray
 
+    def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
+        # for now only handle other integer types
+        if not all(isinstance(t, _IntegerDtype) for t in dtypes):
+            return None
+        np_dtype = np.find_common_type([t.numpy_dtype for t in dtypes], [])
+        if np.issubdtype(np_dtype, np.integer):
+            return _dtypes[str(np_dtype)]
+        return None
+
     def __from_arrow__(
         self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
     ) -> "IntegerArray":
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index a4f0ccc2016c0..a168b97b529e4 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 
+from pandas._typing import DtypeObj
 from pandas.errors import AbstractMethodError
 
 from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
@@ -322,3 +323,29 @@ def _is_boolean(self) -> bool:
         bool
         """
         return False
+
+    def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
+        """
+        Return the common dtype, if one exists.
+
+        Used in `find_common_type` implementation. This is for example used
+        to determine the resulting dtype in a concat operation.
+
+        If no common dtype exists, return None. If all dtypes in the list
+        will return None, then the common dtype will be "object" dtype.
+
+        Parameters
+        ----------
+        dtypes : list of dtypes
+            The dtypes for which to determine a common dtype. This is a list
+            of np.dtype or ExtensionDtype instances.
+
+        Returns
+        -------
+        Common dtype (np.dtype or ExtensionDtype) or None
+        """
+        if len(set(dtypes)) == 1:
+            # only itself
+            return self
+        else:
+            return None
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 7dda6850ba4f7..b9264f9697bb3 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1474,7 +1474,12 @@ def find_common_type(types):
         return first
 
     if any(isinstance(t, ExtensionDtype) for t in types):
-        return np.object
+        for t in types:
+            if isinstance(t, ExtensionDtype):
+                res = t._get_common_type(types)
+                if res is not None:
+                    return res
+        return np.dtype("object")
 
     # take lowest unit
     if all(is_datetime64_dtype(t) for t in types):
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 257c4fe3c6d30..a474790475d73 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 
+from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import (
     is_bool_dtype,
     is_categorical_dtype,
@@ -17,6 +18,9 @@
 )
 from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCRangeIndex, ABCSeries
 
+from pandas.core.arrays import ExtensionArray
+from pandas.core.construction import array
+
 
 def get_dtype_kinds(l):
     """
@@ -99,9 +103,23 @@ def is_nonempty(x) -> bool:
     single_dtype = len({x.dtype for x in to_concat}) == 1
     any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat)
 
-    if any_ea and single_dtype and axis == 0:
-        cls = type(to_concat[0])
-        return cls._concat_same_type(to_concat)
+    if any_ea and axis == 0:
+        if not single_dtype:
+            target_dtype = find_common_type([x.dtype for x in to_concat])
+
+            def cast(arr, dtype):
+                if is_extension_array_dtype(dtype):
+                    if isinstance(arr, np.ndarray):
+                        return array(arr, dtype=dtype, copy=False)
+                return arr.astype(dtype, copy=False)
+
+            to_concat = [cast(arr, target_dtype) for arr in to_concat]
+
+        if isinstance(to_concat[0], ExtensionArray):
+            cls = type(to_concat[0])
+            return cls._concat_same_type(to_concat)
+        else:
+            np.concatenate(to_concat)
 
     elif "category" in typs:
         # this must be prior to concat_datetime,
diff --git a/pandas/tests/arrays/integer/test_concat.py b/pandas/tests/arrays/integer/test_concat.py
new file mode 100644
index 0000000000000..3ace35700bd3e
--- /dev/null
+++ b/pandas/tests/arrays/integer/test_concat.py
@@ -0,0 +1,26 @@
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize(
+    "to_concat_dtypes, result_dtype",
+    [
+        (["Int64", "Int64"], "Int64"),
+        (["UInt64", "UInt64"], "UInt64"),
+        (["Int8", "Int8"], "Int8"),
+        (["Int8", "Int16"], "Int16"),
+        (["UInt8", "Int8"], "Int16"),
+        (["Int32", "UInt32"], "Int64"),
+        # this still gives object (awaiting float extension dtype)
+        (["Int64", "UInt64"], "object"),
+    ],
+)
+def test_concat_series(to_concat_dtypes, result_dtype):
+
+    result = pd.concat([pd.Series([1, 2, pd.NA], dtype=t) for t in to_concat_dtypes])
+    expected = pd.concat([pd.Series([1, 2, pd.NA], dtype=object)] * 2).astype(
+        result_dtype
+    )
+    tm.assert_series_equal(result, expected)

From b1d9d682393061560fd218b54aeb6fc82bfd573a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 17 Apr 2020 11:40:26 +0200
Subject: [PATCH 02/13] handle categoricals

---
 pandas/core/arrays/categorical.py   |  4 +-
 pandas/core/dtypes/base.py          |  4 ++
 pandas/core/dtypes/concat.py        | 66 ++++++-----------------------
 pandas/core/dtypes/dtypes.py        | 19 ++++++++-
 pandas/core/internals/concat.py     |  5 ++-
 pandas/tests/reshape/test_concat.py |  6 +--
 6 files changed, 45 insertions(+), 59 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index b3fb3459891e0..5ba670fc87762 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2349,9 +2349,9 @@ def _can_hold_na(self):
 
     @classmethod
     def _concat_same_type(self, to_concat):
-        from pandas.core.dtypes.concat import concat_categorical
+        from pandas.core.dtypes.concat import union_categoricals
 
-        return concat_categorical(to_concat)
+        return union_categoricals(to_concat)
 
     def isin(self, values):
         """
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index a168b97b529e4..d7fb5a45152c6 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -344,6 +344,10 @@ def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         -------
         Common dtype (np.dtype or ExtensionDtype) or None
         """
+        # QUESTIONS:
+        # - do we guarantee that `dtypes` is already deduplicated? (list of uniques)
+        # - do we call this method if `len(dtypes) == 1`, or does this method
+        #   need to handle that case
         if len(set(dtypes)) == 1:
             # only itself
             return self
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index a474790475d73..35ee1064d1189 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -108,8 +108,21 @@ def is_nonempty(x) -> bool:
             target_dtype = find_common_type([x.dtype for x in to_concat])
 
             def cast(arr, dtype):
+                if (
+                    is_categorical_dtype(arr.dtype)
+                    and isinstance(dtype, np.dtype)
+                    and np.issubdtype(dtype, np.integer)
+                ):
+                    # problem case: categorical of int -> gives int as result dtype,
+                    # but categorical can contain NAs -> fall back to object dtype
+                    try:
+                        return arr.astype(dtype, copy=False)
+                    except ValueError:
+                        return arr.astype(object, copy=False)
+
                 if is_extension_array_dtype(dtype):
                     if isinstance(arr, np.ndarray):
+                        # numpy's astype cannot handle ExtensionDtypes
                         return array(arr, dtype=dtype, copy=False)
                 return arr.astype(dtype, copy=False)
 
@@ -119,12 +132,7 @@ def cast(arr, dtype):
             cls = type(to_concat[0])
             return cls._concat_same_type(to_concat)
         else:
-            np.concatenate(to_concat)
-
-    elif "category" in typs:
-        # this must be prior to concat_datetime,
-        # to support Categorical + datetime-like
-        return concat_categorical(to_concat, axis=axis)
+            return np.concatenate(to_concat)
 
     elif _contains_datetime or "timedelta" in typs or _contains_period:
         return concat_datetime(to_concat, axis=axis, typs=typs)
@@ -154,52 +162,6 @@ def cast(arr, dtype):
     return np.concatenate(to_concat, axis=axis)
 
 
-def concat_categorical(to_concat, axis: int = 0):
-    """
-    Concatenate an object/categorical array of arrays, each of which is a
-    single dtype
-
-    Parameters
-    ----------
-    to_concat : array of arrays
-    axis : int
-        Axis to provide concatenation in the current implementation this is
-        always 0, e.g. we only have 1D categoricals
-
-    Returns
-    -------
-    Categorical
-        A single array, preserving the combined dtypes
-    """
-    # we could have object blocks and categoricals here
-    # if we only have a single categoricals then combine everything
-    # else its a non-compat categorical
-    categoricals = [x for x in to_concat if is_categorical_dtype(x.dtype)]
-
-    # validate the categories
-    if len(categoricals) != len(to_concat):
-        pass
-    else:
-        # when all categories are identical
-        first = to_concat[0]
-        if all(first.is_dtype_equal(other) for other in to_concat[1:]):
-            return union_categoricals(categoricals)
-
-    # extract the categoricals & coerce to object if needed
-    to_concat = [
-        x._internal_get_values()
-        if is_categorical_dtype(x.dtype)
-        else np.asarray(x).ravel()
-        if not is_datetime64tz_dtype(x)
-        else np.asarray(x.astype(object))
-        for x in to_concat
-    ]
-    result = concat_compat(to_concat)
-    if axis == 1:
-        result = result.reshape(1, len(result))
-    return result
-
-
 def union_categoricals(
     to_union, sort_categories: bool = False, ignore_order: bool = False
 ):
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 8fe2b3c60d6d0..ede82039e427b 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -21,7 +21,7 @@
 
 from pandas._libs.interval import Interval
 from pandas._libs.tslibs import NaT, Period, Timestamp, timezones
-from pandas._typing import Ordered
+from pandas._typing import DtypeObj, Ordered
 
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCDateOffset, ABCIndexClass
@@ -640,6 +640,23 @@ def _is_boolean(self) -> bool:
 
         return is_bool_dtype(self.categories)
 
+    def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
+
+        # check if we have all categorical dtype with identical categories
+        if all(isinstance(x, CategoricalDtype) for x in dtypes):
+            first = dtypes[0]
+            if all(first == other for other in dtypes[1:]):
+                return first
+
+        # extract the categories' dtype
+        non_cat_dtypes = [
+            x.categories.dtype if isinstance(x, CategoricalDtype) else x for x in dtypes
+        ]
+        # TODO should categorical always give an answer?
+        from pandas.core.dtypes.cast import find_common_type
+
+        return find_common_type(non_cat_dtypes)
+
 
 @register_extension_dtype
 class DatetimeTZDtype(PandasExtensionDtype):
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index c8f4ec14545c7..18d9caff5c7c2 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -24,6 +24,7 @@
 from pandas.core.dtypes.missing import isna
 
 import pandas.core.algorithms as algos
+from pandas.core.arrays import ExtensionArray
 from pandas.core.internals.blocks import make_block
 from pandas.core.internals.managers import BlockManager
 
@@ -65,13 +66,15 @@ def concatenate_block_managers(
             blk = join_units[0].block
             vals = [ju.block.values for ju in join_units]
 
-            if not blk.is_extension or blk.is_datetimetz or blk.is_categorical:
+            if not blk.is_extension or blk.is_datetimetz:
                 # datetimetz and categorical can have the same type but multiple
                 #  dtypes, concatting does not necessarily preserve dtype
                 values = concat_compat(vals, axis=blk.ndim - 1)
             else:
                 # TODO(EA2D): special-casing not needed with 2D EAs
                 values = concat_compat(vals)
+                if not isinstance(values, ExtensionArray):
+                    values = values.reshape(1, len(values))
 
             b = make_block(values, placement=placement, ndim=blk.ndim)
         else:
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index bccae2c4c2772..22f15db03a772 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -610,11 +610,11 @@ def test_concat_categorical_3elem_coercion(self):
         s2 = pd.Series([2, 1, 2], dtype="category")
         s3 = pd.Series([1, 2, 1, 2, np.nan])
 
-        exp = pd.Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="object")
+        exp = pd.Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float")
         tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
         tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp)
 
-        exp = pd.Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="object")
+        exp = pd.Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float")
         tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp)
 
@@ -698,7 +698,7 @@ def test_concat_categorical_coercion_nan(self):
         s1 = pd.Series([1, np.nan], dtype="category")
         s2 = pd.Series([np.nan, np.nan])
 
-        exp = pd.Series([1, np.nan, np.nan, np.nan], dtype="object")
+        exp = pd.Series([1, np.nan, np.nan, np.nan], dtype="float")
         tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
         tm.assert_series_equal(s1.append(s2, ignore_index=True), exp)
 

From bb398e78df218e16704806cd28da883203908c30 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 17 Apr 2020 14:12:08 +0200
Subject: [PATCH 03/13] handle sparse

---
 pandas/core/arrays/sparse/array.py       | 22 +------
 pandas/core/arrays/sparse/dtype.py       | 26 +++++++-
 pandas/core/dtypes/base.py               |  1 +
 pandas/core/dtypes/concat.py             | 83 ++++++++----------------
 pandas/tests/internals/test_internals.py |  2 +-
 5 files changed, 54 insertions(+), 80 deletions(-)

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 620e157ee54ec..ff564ffb5f4a2 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -952,27 +952,7 @@ def copy(self):
 
     @classmethod
     def _concat_same_type(cls, to_concat):
-        fill_values = [x.fill_value for x in to_concat]
-
-        fill_value = fill_values[0]
-
-        # np.nan isn't a singleton, so we may end up with multiple
-        # NaNs here, so we ignore tha all NA case too.
-        if not (len(set(fill_values)) == 1 or isna(fill_values).all()):
-            warnings.warn(
-                "Concatenating sparse arrays with multiple fill "
-                f"values: '{fill_values}'. Picking the first and "
-                "converting the rest.",
-                PerformanceWarning,
-                stacklevel=6,
-            )
-            keep = to_concat[0]
-            to_concat2 = [keep]
-
-            for arr in to_concat[1:]:
-                to_concat2.append(cls(np.asarray(arr), fill_value=fill_value))
-
-            to_concat = to_concat2
+        fill_value = to_concat[0].fill_value
 
         values = []
         length = 0
diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py
index afa11586fda04..24f356b4160ab 100644
--- a/pandas/core/arrays/sparse/dtype.py
+++ b/pandas/core/arrays/sparse/dtype.py
@@ -1,11 +1,13 @@
 """Sparse Dtype"""
 
 import re
-from typing import TYPE_CHECKING, Any, Tuple, Type
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type
+import warnings
 
 import numpy as np
 
-from pandas._typing import Dtype
+from pandas._typing import Dtype, DtypeObj
+from pandas.errors import PerformanceWarning
 
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.cast import astype_nansafe
@@ -352,3 +354,23 @@ def _subtype_with_str(self):
         if isinstance(self.fill_value, str):
             return type(self.fill_value)
         return self.subtype
+
+    def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
+
+        fill_values = [x.fill_value for x in dtypes if isinstance(x, SparseDtype)]
+        fill_value = fill_values[0]
+
+        # np.nan isn't a singleton, so we may end up with multiple
+        # NaNs here, so we ignore tha all NA case too.
+        if not (len(set(fill_values)) == 1 or isna(fill_values).all()):
+            warnings.warn(
+                "Concatenating sparse arrays with multiple fill "
+                f"values: '{fill_values}'. Picking the first and "
+                "converting the rest.",
+                PerformanceWarning,
+                stacklevel=6,
+            )
+
+        # TODO also handle non-numpy other dtypes
+        np_dtypes = [x.subtype if isinstance(x, SparseDtype) else x for x in dtypes]
+        return SparseDtype(np.find_common_type(np_dtypes, []), fill_value=fill_value)
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index d7fb5a45152c6..e85c5ae4d9804 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -348,6 +348,7 @@ def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         # - do we guarantee that `dtypes` is already deduplicated? (list of uniques)
         # - do we call this method if `len(dtypes) == 1`, or does this method
         #   need to handle that case
+        # - does this method need to handle "non-fully-initialized" dtypes?
         if len(set(dtypes)) == 1:
             # only itself
             return self
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 35ee1064d1189..08c98d15b557d 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -4,6 +4,8 @@
 
 import numpy as np
 
+from pandas._typing import ArrayLike, DtypeObj
+
 from pandas.core.dtypes.cast import find_common_type
 from pandas.core.dtypes.common import (
     is_bool_dtype,
@@ -62,6 +64,30 @@ def get_dtype_kinds(l):
     return typs
 
 
+def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
+    """
+    Helper function for `arr.astype(common_type)` but handling all special
+    cases.
+    """
+    if (
+        is_categorical_dtype(arr.dtype)
+        and isinstance(dtype, np.dtype)
+        and np.issubdtype(dtype, np.integer)
+    ):
+        # problem case: categorical of int -> gives int as result dtype,
+        # but categorical can contain NAs -> fall back to object dtype
+        try:
+            return arr.astype(dtype, copy=False)
+        except ValueError:
+            return arr.astype(object, copy=False)
+
+    if is_extension_array_dtype(dtype):
+        if isinstance(arr, np.ndarray):
+            # numpy's astype cannot handle ExtensionDtypes
+            return array(arr, dtype=dtype, copy=False)
+    return arr.astype(dtype, copy=False)
+
+
 def concat_compat(to_concat, axis: int = 0):
     """
     provide concatenation of an array of arrays each of which is a single
@@ -106,27 +132,7 @@ def is_nonempty(x) -> bool:
     if any_ea and axis == 0:
         if not single_dtype:
             target_dtype = find_common_type([x.dtype for x in to_concat])
-
-            def cast(arr, dtype):
-                if (
-                    is_categorical_dtype(arr.dtype)
-                    and isinstance(dtype, np.dtype)
-                    and np.issubdtype(dtype, np.integer)
-                ):
-                    # problem case: categorical of int -> gives int as result dtype,
-                    # but categorical can contain NAs -> fall back to object dtype
-                    try:
-                        return arr.astype(dtype, copy=False)
-                    except ValueError:
-                        return arr.astype(object, copy=False)
-
-                if is_extension_array_dtype(dtype):
-                    if isinstance(arr, np.ndarray):
-                        # numpy's astype cannot handle ExtensionDtypes
-                        return array(arr, dtype=dtype, copy=False)
-                return arr.astype(dtype, copy=False)
-
-            to_concat = [cast(arr, target_dtype) for arr in to_concat]
+            to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]
 
         if isinstance(to_concat[0], ExtensionArray):
             cls = type(to_concat[0])
@@ -137,10 +143,6 @@ def cast(arr, dtype):
     elif _contains_datetime or "timedelta" in typs or _contains_period:
         return concat_datetime(to_concat, axis=axis, typs=typs)
 
-    # these are mandated to handle empties as well
-    elif "sparse" in typs:
-        return _concat_sparse(to_concat, axis=axis, typs=typs)
-
     elif any_ea and axis == 1:
         to_concat = [np.atleast_2d(x.astype("object")) for x in to_concat]
         return np.concatenate(to_concat, axis=axis)
@@ -394,34 +396,3 @@ def _wrap_datetimelike(arr):
     if isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]:
         arr = pd_array(arr)
     return arr
-
-
-def _concat_sparse(to_concat, axis=0, typs=None):
-    """
-    provide concatenation of an sparse/dense array of arrays each of which is a
-    single dtype
-
-    Parameters
-    ----------
-    to_concat : array of arrays
-    axis : axis to provide concatenation
-    typs : set of to_concat dtypes
-
-    Returns
-    -------
-    a single array, preserving the combined dtypes
-    """
-    from pandas.core.arrays import SparseArray
-
-    fill_values = [x.fill_value for x in to_concat if isinstance(x, SparseArray)]
-    fill_value = fill_values[0]
-
-    # TODO: Fix join unit generation so we aren't passed this.
-    to_concat = [
-        x
-        if isinstance(x, SparseArray)
-        else SparseArray(x.squeeze(), fill_value=fill_value)
-        for x in to_concat
-    ]
-
-    return SparseArray._concat_same_type(to_concat)
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index f1d4c865a0ced..7ebf5e5ecefcd 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -584,7 +584,7 @@ def test_interleave_dtype(self, mgr_string, dtype):
         mgr = create_mgr("a: complex")
         assert mgr.as_array().dtype == "complex"
         mgr = create_mgr("a: f8; b: category")
-        assert mgr.as_array().dtype == "object"
+        assert mgr.as_array().dtype == "f8"
         mgr = create_mgr("a: M8[ns]; b: category")
         assert mgr.as_array().dtype == "object"
         mgr = create_mgr("a: M8[ns]; b: bool")

From 83fdc9136704e39055c0483ddbd7754ac7a2de23 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 17 Apr 2020 14:26:58 +0200
Subject: [PATCH 04/13] handle non-initialized CategoricalDtype in
 find_common_type

---
 pandas/core/dtypes/dtypes.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index ede82039e427b..dc311f0128b2c 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -641,13 +641,22 @@ def _is_boolean(self) -> bool:
         return is_bool_dtype(self.categories)
 
     def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
-
         # check if we have all categorical dtype with identical categories
         if all(isinstance(x, CategoricalDtype) for x in dtypes):
             first = dtypes[0]
             if all(first == other for other in dtypes[1:]):
                 return first
 
+        # special case non-initialized categorical
+        # TODO we should figure out the expected return value in general
+        non_init_cats = [
+            isinstance(x, CategoricalDtype) and x.categories is None for x in dtypes
+        ]
+        if all(non_init_cats):
+            return self
+        elif any(non_init_cats):
+            return None
+
         # extract the categories' dtype
         non_cat_dtypes = [
             x.categories.dtype if isinstance(x, CategoricalDtype) else x for x in dtypes

From 7f2ac2ae43789caf15afd3d77c8e8bca95324031 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 17 Apr 2020 15:51:15 +0200
Subject: [PATCH 05/13] handle datetimelike special case

---
 pandas/core/dtypes/concat.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 08c98d15b557d..0b8d2efbfd298 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -81,6 +81,16 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
         except ValueError:
             return arr.astype(object, copy=False)
 
+    if (
+        isinstance(arr, np.ndarray)
+        and arr.dtype.kind in ["m", "M"]
+        and dtype is np.dtype("object")
+    ):
+        # wrap datetime-likes in EA to ensure astype(object) gives Timestamp/Timedelta
+        # this can happen when concat_compat is called directly on arrays (when arrays
+        # are not coming from Index/Series._values), eg in BlockManager.quantile
+        arr = array(arr)
+
     if is_extension_array_dtype(dtype):
         if isinstance(arr, np.ndarray):
             # numpy's astype cannot handle ExtensionDtypes
@@ -123,7 +133,6 @@ def is_nonempty(x) -> bool:
 
     typs = get_dtype_kinds(to_concat)
     _contains_datetime = any(typ.startswith("datetime") for typ in typs)
-    _contains_period = any(typ.startswith("period") for typ in typs)
 
     all_empty = not len(non_empties)
     single_dtype = len({x.dtype for x in to_concat}) == 1
@@ -140,7 +149,7 @@ def is_nonempty(x) -> bool:
         else:
             return np.concatenate(to_concat)
 
-    elif _contains_datetime or "timedelta" in typs or _contains_period:
+    elif _contains_datetime or "timedelta" in typs:
         return concat_datetime(to_concat, axis=axis, typs=typs)
 
     elif any_ea and axis == 1:

From d0f90def968f752ced92cbd7fd8103f8627906b9 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 17 Apr 2020 19:47:28 +0200
Subject: [PATCH 06/13] update docstring and comment

---
 pandas/core/dtypes/base.py      | 5 +++--
 pandas/core/dtypes/concat.py    | 2 +-
 pandas/core/internals/concat.py | 4 +---
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index e85c5ae4d9804..867662f450e8d 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -331,8 +331,9 @@ def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         Used in `find_common_type` implementation. This is for example used
         to determine the resulting dtype in a concat operation.
 
-        If no common dtype exists, return None. If all dtypes in the list
-        will return None, then the common dtype will be "object" dtype.
+        If no common dtype exists, return None (which gives the other dtypes
+        the chance to determine a common dtype). If all dtypes in the list
+        return None, then the common dtype will be "object" dtype.
 
         Parameters
         ----------
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 0b8d2efbfd298..82b2795582ff1 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -66,7 +66,7 @@ def get_dtype_kinds(l):
 
 def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
     """
-    Helper function for `arr.astype(common_type)` but handling all special
+    Helper function for `arr.astype(common_dtype)` but handling all special
     cases.
     """
     if (
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 18d9caff5c7c2..df139915d1593 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -66,9 +66,7 @@ def concatenate_block_managers(
             blk = join_units[0].block
             vals = [ju.block.values for ju in join_units]
 
-            if not blk.is_extension or blk.is_datetimetz:
-                # datetimetz and categorical can have the same type but multiple
-                #  dtypes, concatting does not necessarily preserve dtype
+            if not blk.is_extension:
                 values = concat_compat(vals, axis=blk.ndim - 1)
             else:
                 # TODO(EA2D): special-casing not needed with 2D EAs

From 2d5fcb0a8cfe26c89eb6e25db2d4b195a1be427a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 17 Apr 2020 19:50:05 +0200
Subject: [PATCH 07/13] ignore mypy

---
 pandas/core/arrays/integer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 1f434f0d18db0..44fcec0d68c91 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -99,7 +99,9 @@ def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         # for now only handle other integer types
         if not all(isinstance(t, _IntegerDtype) for t in dtypes):
             return None
-        np_dtype = np.find_common_type([t.numpy_dtype for t in dtypes], [])
+        np_dtype = np.find_common_type(
+            [t.numpy_dtype for t in dtypes], []  # type: ignore
+        )
         if np.issubdtype(np_dtype, np.integer):
             return _dtypes[str(np_dtype)]
         return None

From fc98b652e48e73d7bb6526618469a1550ccbbfcf Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 1 May 2020 15:07:14 +0200
Subject: [PATCH 08/13] common_type -> commong_dtype

---
 pandas/core/arrays/integer.py      | 2 +-
 pandas/core/arrays/sparse/dtype.py | 2 +-
 pandas/core/dtypes/base.py         | 2 +-
 pandas/core/dtypes/cast.py         | 2 +-
 pandas/core/dtypes/dtypes.py       | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 72f98e413c280..743267534bfaa 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -96,7 +96,7 @@ def construct_array_type(cls) -> Type["IntegerArray"]:
         """
         return IntegerArray
 
-    def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
+    def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         # for now only handle other integer types
         if not all(isinstance(t, _IntegerDtype) for t in dtypes):
             return None
diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py
index 24f356b4160ab..156a90f6ce600 100644
--- a/pandas/core/arrays/sparse/dtype.py
+++ b/pandas/core/arrays/sparse/dtype.py
@@ -355,7 +355,7 @@ def _subtype_with_str(self):
             return type(self.fill_value)
         return self.subtype
 
-    def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
+    def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
 
         fill_values = [x.fill_value for x in dtypes if isinstance(x, SparseDtype)]
         fill_value = fill_values[0]
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 867662f450e8d..1c1a9dffd0288 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -324,7 +324,7 @@ def _is_boolean(self) -> bool:
         """
         return False
 
-    def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
+    def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         """
         Return the common dtype, if one exists.
 
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 2eef3ad3ed7c2..7203187e630d6 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1453,7 +1453,7 @@ def find_common_type(types):
     if any(isinstance(t, ExtensionDtype) for t in types):
         for t in types:
             if isinstance(t, ExtensionDtype):
-                res = t._get_common_type(types)
+                res = t._get_common_dtype(types)
                 if res is not None:
                     return res
         return np.dtype("object")
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index dc311f0128b2c..ceed7e29e4a35 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -640,7 +640,7 @@ def _is_boolean(self) -> bool:
 
         return is_bool_dtype(self.categories)
 
-    def _get_common_type(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
+    def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         # check if we have all categorical dtype with identical categories
         if all(isinstance(x, CategoricalDtype) for x in dtypes):
             first = dtypes[0]

From b0725917f6b4a5da6f42b83128019ae1a2377791 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 1 May 2020 15:18:39 +0200
Subject: [PATCH 09/13] ensure deduplicated list of dtypes is passed

---
 pandas/core/dtypes/base.py | 5 -----
 pandas/core/dtypes/cast.py | 3 +++
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 1c1a9dffd0288..0f15b0fe03bb1 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -345,11 +345,6 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
         -------
         Common dtype (np.dtype or ExtensionDtype) or None
         """
-        # QUESTIONS:
-        # - do we guarantee that `dtypes` is already deduplicated? (list of uniques)
-        # - do we call this method if `len(dtypes) == 1`, or does this method
-        #   need to handle that case
-        # - does this method need to handle "non-fully-initialized" dtypes?
         if len(set(dtypes)) == 1:
             # only itself
             return self
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 7203187e630d6..977993a5a2cea 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1450,6 +1450,9 @@ def find_common_type(types):
     if all(is_dtype_equal(first, t) for t in types[1:]):
         return first
 
+    # get unique types (dict.fromkeys is used as order-preserving set())
+    types = list(dict.fromkeys(types).keys())
+
     if any(isinstance(t, ExtensionDtype) for t in types):
         for t in types:
             if isinstance(t, ExtensionDtype):

From 91c984a55e8974a257473d3694a0e982a5564cb7 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 1 May 2020 15:24:45 +0200
Subject: [PATCH 10/13] add very basic base extension test

---
 pandas/tests/extension/base/dtype.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py
index ee4e199fbfe45..65e32d716a4db 100644
--- a/pandas/tests/extension/base/dtype.py
+++ b/pandas/tests/extension/base/dtype.py
@@ -112,3 +112,10 @@ def test_construct_from_string_wrong_type_raises(self, dtype):
             match="'construct_from_string' expects a string, got <class 'int'>",
         ):
             type(dtype).construct_from_string(0)
+
+    def test_get_common_dtype(self, dtype):
+        # in practice we will not typically call this with a 1-length list
+        # (we shortcut to just use that dtype as the common dtype), but
+        # still testing as good practice to have this working (and it is the
+        # only case we can test in general)
+        assert dtype._get_common_dtype([dtype]) == dtype

From 2a2b9d58667c8cded0bba92dacf74f01f43e69f2 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 1 May 2020 15:35:54 +0200
Subject: [PATCH 11/13] document API change

---
 doc/source/whatsnew/v1.1.0.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 7ad7e8f5a27b0..e4ef752a33635 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -241,6 +241,9 @@ Backwards incompatible API changes
 - :meth:`DataFrame.at` and :meth:`Series.at` will raise a ``TypeError`` instead of a ``ValueError`` if an incompatible key is passed, and ``KeyError`` if a missing key is passed, matching the behavior of ``.loc[]`` (:issue:`31722`)
 - Passing an integer dtype other than ``int64`` to ``np.array(period_index, dtype=...)`` will now raise ``TypeError`` instead of incorrectly using ``int64`` (:issue:`32255`)
 - Passing an invalid ``fill_value`` to :meth:`Categorical.take` raises a ``ValueError`` instead of ``TypeError`` (:issue:`33660`)
+- Combining a ``Categorical`` with integer categories and which contains missing values
+  with a float dtype column in operations such as :func:`concat` or :meth:`~DataFrame.append`
+  will now result in a float column instead of an object dtyped column (:issue:`33607`)
 
 ``MultiIndex.get_indexer`` interprets `method` argument differently
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

From 8893165c688c1d6881e0f6455ca4aa10ccb64eac Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 1 May 2020 15:43:07 +0200
Subject: [PATCH 12/13] update EA interface docs

---
 pandas/core/arrays/base.py | 7 ++++++-
 pandas/core/dtypes/base.py | 6 ++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 7447d593a7ff0..bd903d9b1fae3 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1004,7 +1004,7 @@ def _concat_same_type(
         cls, to_concat: Sequence["ExtensionArray"]
     ) -> "ExtensionArray":
         """
-        Concatenate multiple array.
+        Concatenate multiple array of this dtype.
 
         Parameters
         ----------
@@ -1014,6 +1014,11 @@ def _concat_same_type(
         -------
         ExtensionArray
         """
+        # Implementer note: this method will only be called with a sequence of
+        # ExtensionArrays of this class and with the same dtype as self. This
+        # should allow "easy" concatenation (no upcasting needed), and result
+        # in a new ExtensionArray of the same dtype.
+        # Note: this strict behaviour is only guaranteed starting with pandas 1.1
         raise AbstractMethodError(cls)
 
     # The _can_hold_na attribute is set to True so that pandas internals
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 0f15b0fe03bb1..2d81dd4d884a3 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -34,11 +34,12 @@ class ExtensionDtype:
     * type
     * name
 
-    The following attributes influence the behavior of the dtype in
+    The following attributes and methods influence the behavior of the dtype in
     pandas operations
 
     * _is_numeric
     * _is_boolean
+    * _get_common_dtype
 
     Optionally one can override construct_array_type for construction
     with the name of this dtype via the Registry. See
@@ -333,7 +334,8 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
 
         If no common dtype exists, return None (which gives the other dtypes
         the chance to determine a common dtype). If all dtypes in the list
-        return None, then the common dtype will be "object" dtype.
+        return None, then the common dtype will be "object" dtype (this means
+        it is never needed to return "object" dtype from this method itself).
 
         Parameters
         ----------

From e19e3ef93d95a5d96f47812371d47c36af814245 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 1 May 2020 15:46:23 +0200
Subject: [PATCH 13/13] add type annotation on find_common_type

---
 pandas/core/dtypes/cast.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 977993a5a2cea..ad307fd99ec9c 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -3,7 +3,7 @@
 """
 
 from datetime import date, datetime, timedelta
-from typing import TYPE_CHECKING, Any, Optional, Tuple, Type
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type
 
 import numpy as np
 
@@ -1423,7 +1423,7 @@ def maybe_cast_to_datetime(value, dtype, errors: str = "raise"):
     return value
 
 
-def find_common_type(types):
+def find_common_type(types: List[DtypeObj]) -> DtypeObj:
     """
     Find a common data type among the given dtypes.