Skip to content

Commit

Permalink
Bug in concat casting all na levels to float (#44902)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Dec 16, 2021
1 parent 01268aa commit ccc1a25
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,7 @@ Reshaping
- Bug in :func:`crosstab` would fail when inputs are lists or tuples (:issue:`44076`)
- Bug in :meth:`DataFrame.append` failing to retain ``index.name`` when appending a list of :class:`Series` objects (:issue:`44109`)
- Fixed metadata propagation in :meth:`Dataframe.apply` method, consequently fixing the same issue for :meth:`Dataframe.transform`, :meth:`Dataframe.nunique` and :meth:`Dataframe.mode` (:issue:`28283`)
- Bug in :func:`concat` casting levels of :class:`MultiIndex` to float if the only consist of missing values (:issue:`44900`)
- Bug in :meth:`DataFrame.stack` with ``ExtensionDtype`` columns incorrectly raising (:issue:`43561`)
- Bug in :meth:`Series.unstack` with object doing unwanted type inference on resulting columns (:issue:`44595`)
- Bug in :class:`MultiIndex` failing join operations with overlapping ``IntervalIndex`` levels (:issue:`44096`)
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,14 @@ def __init__(
# We remove null values here, then below will re-insert
# them, grep "full_codes"
arr_list = [values[idx] for idx in np.where(~null_mask)[0]]
arr = sanitize_array(arr_list, None)

# GH#44900 Do not cast to float if we have only missing values
if arr_list or arr.dtype == "object":
sanitize_dtype = None
else:
sanitize_dtype = arr.dtype

arr = sanitize_array(arr_list, None, dtype=sanitize_dtype)
values = arr

if dtype.categories is None:
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/arrays/categorical/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,17 @@ def test_compare_categorical_with_missing(self, a1, a2, categories):
result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type)
expected = Series(a1) == Series(a2)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"na_value, dtype",
[
(pd.NaT, "datetime64[ns]"),
(None, "float64"),
(np.nan, "float64"),
(pd.NA, "float64"),
],
)
def test_categorical_only_missing_values_no_cast(self, na_value, dtype):
# GH#44900
result = Categorical([na_value, na_value])
tm.assert_index_equal(result.categories, Index([], dtype=dtype))
13 changes: 13 additions & 0 deletions pandas/tests/reshape/concat/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,3 +528,16 @@ def test_concat_timedelta64_block():
result = concat([df, df])
tm.assert_frame_equal(result.iloc[:10], df)
tm.assert_frame_equal(result.iloc[10:], df)


def test_concat_multiindex_datetime_nat():
# GH#44900
left = DataFrame({"a": 1}, index=MultiIndex.from_tuples([(1, pd.NaT)]))
right = DataFrame(
{"b": 2}, index=MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)])
)
result = concat([left, right], axis="columns")
expected = DataFrame(
{"a": [1.0, np.nan], "b": 2}, MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)])
)
tm.assert_frame_equal(result, expected)

0 comments on commit ccc1a25

Please sign in to comment.