From ca5b1df94b81bd75e869a5c76e2eafc82f2759d7 Mon Sep 17 00:00:00 2001 From: Sterling Paramore Date: Mon, 11 Feb 2019 04:52:38 -0800 Subject: [PATCH] BUG: Duplicated returns boolean dataframe (#25234) --- doc/source/whatsnew/v0.24.2.rst | 2 ++ doc/source/whatsnew/v0.25.0.rst | 2 ++ pandas/core/frame.py | 2 +- pandas/tests/frame/test_duplicates.py | 11 +++++++++++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index b0f287cf0b9f6..5ae777ca68eba 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -24,6 +24,8 @@ Fixed Regressions - Fixed issue in ``DataFrame`` construction with passing a mixed list of mixed types could segfault. (:issue:`25075`) - Fixed regression in :meth:`DataFrame.apply` causing ``RecursionError`` when ``dict``-like classes were passed as argument. (:issue:`25196`) +- Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`) + .. _whatsnew_0242.enhancements: Enhancements diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 4032dc20b2e19..1055514cd0e09 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -68,6 +68,8 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- + Categorical ^^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e89aeb29f1625..5c28259e0cb63 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4619,7 +4619,7 @@ def duplicated(self, subset=None, keep='first'): from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT if self.empty: - return Series() + return Series(dtype=bool) def f(vals): labels, shape = algorithms.factorize( diff --git a/pandas/tests/frame/test_duplicates.py b/pandas/tests/frame/test_duplicates.py index f61dbbdb989e4..3396670fb5879 100644 --- a/pandas/tests/frame/test_duplicates.py +++ b/pandas/tests/frame/test_duplicates.py @@ -182,6 +182,17 @@ def test_drop_duplicates(): assert df.duplicated(keep=keep).sum() == 0 +def test_duplicated_on_empty_frame(): + # GH 25184 + + df = DataFrame(columns=['a', 'b']) + dupes = df.duplicated('a') + + result = df[dupes] + expected = df.copy() + tm.assert_frame_equal(result, expected) + + def test_drop_duplicates_with_duplicate_column_names(): # GH17836 df = DataFrame([