diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index b0f287cf0b9f65..5ae777ca68ebac 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -24,6 +24,8 @@ Fixed Regressions - Fixed issue in ``DataFrame`` construction with passing a mixed list of mixed types could segfault. (:issue:`25075`) - Fixed regression in :meth:`DataFrame.apply` causing ``RecursionError`` when ``dict``-like classes were passed as argument. (:issue:`25196`) +- Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`) + .. _whatsnew_0242.enhancements: Enhancements diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 4032dc20b2e198..1055514cd0e09d 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -68,6 +68,8 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- + Categorical ^^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e89aeb29f16250..5c28259e0cb637 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4619,7 +4619,7 @@ def duplicated(self, subset=None, keep='first'): from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT if self.empty: - return Series() + return Series(dtype=bool) def f(vals): labels, shape = algorithms.factorize( diff --git a/pandas/tests/frame/test_duplicates.py b/pandas/tests/frame/test_duplicates.py index f61dbbdb989e4b..3396670fb5879b 100644 --- a/pandas/tests/frame/test_duplicates.py +++ b/pandas/tests/frame/test_duplicates.py @@ -182,6 +182,17 @@ def test_drop_duplicates(): assert df.duplicated(keep=keep).sum() == 0 +def test_duplicated_on_empty_frame(): + # GH 25184 + + df = DataFrame(columns=['a', 'b']) + dupes = df.duplicated('a') + + result = df[dupes] + expected = df.copy() + tm.assert_frame_equal(result, expected) + + def test_drop_duplicates_with_duplicate_column_names(): # GH17836 df = DataFrame([