diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 73df504c89d5b..abe899b8af5a6 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -24,6 +24,8 @@ Fixed Regressions - Fixed issue in ``DataFrame`` construction with passing a mixed list of mixed types could segfault. (:issue:`25075`) +- Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`) + .. _whatsnew_0242.enhancements: Enhancements diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 4032dc20b2e19..1055514cd0e09 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -68,6 +68,8 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- + Categorical ^^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 19da8ba5c547d..bc521e931e5ae 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4636,7 +4636,7 @@ def duplicated(self, subset=None, keep='first'): from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT if self.empty: - return Series() + return Series(dtype=bool) def f(vals): labels, shape = algorithms.factorize( diff --git a/pandas/tests/frame/test_duplicates.py b/pandas/tests/frame/test_duplicates.py index f61dbbdb989e4..3396670fb5879 100644 --- a/pandas/tests/frame/test_duplicates.py +++ b/pandas/tests/frame/test_duplicates.py @@ -182,6 +182,17 @@ def test_drop_duplicates(): assert df.duplicated(keep=keep).sum() == 0 +def test_duplicated_on_empty_frame(): + # GH 25184 + + df = DataFrame(columns=['a', 'b']) + dupes = df.duplicated('a') + + result = df[dupes] + expected = df.copy() + tm.assert_frame_equal(result, expected) + + def test_drop_duplicates_with_duplicate_column_names(): # GH17836 df = DataFrame([