From 80b5bc6e796e9390b11ddb230eb101fcee6e2bd8 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 3 Nov 2023 21:46:32 -0400 Subject: [PATCH 1/4] Reorder tests in maybe_downcast_numeric The comment `# if we have any nulls, then we are done` is not consistent with the test `if isna(arr).any()` because `arr` is constructed only from the first element (`r[0]`) not the full ravel'd list of values. Moreover, calling `np.array()` on some random type can have surprising consequences. So instead, do the early-out test as intended, just using `r[0]` without going through `np.array()`. Then test other things about `r[0]`. Only then should we test all the values (and if we have any nulls, then we are done). See #55824 Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/core/dtypes/cast.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 716d1a78f93c5..322f14aaabbbd 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -357,18 +357,20 @@ def trans(x): # if we don't have any elements, just astype it return trans(result).astype(dtype) - # do a test on the first element, if it fails then we are done r = result.ravel() - arr = np.array([r[0]]) - if isna(arr).any(): - # if we have any nulls, then we are done + if isna(r[0]): + # do a test on the first element, if it fails then we are done return result elif not isinstance(r[0], (np.integer, np.floating, int, float, bool)): # a comparable, e.g. a Decimal may slip in here return result + if isna(r).any(): + # if we have any nulls, then we are done + return result + if ( issubclass(result.dtype.type, (np.object_, np.number)) and notna(result).all() From 4594c5cebb60e8750b2be2499bceb1b31f00a41d Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 7 Nov 2023 06:07:47 -0500 Subject: [PATCH 2/4] Simplify/optimize tests for downcasting If the first element of `result` is an array, ravel that to get element we will test. Otherwise use it as is. We only need to check whether `result` is all non-null once. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/core/dtypes/cast.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 322f14aaabbbd..4e54ff2fd757c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -357,20 +357,13 @@ def trans(x): # if we don't have any elements, just astype it return trans(result).astype(dtype) - r = result.ravel() - - if isna(r[0]): - # do a test on the first element, if it fails then we are done - return result - - elif not isinstance(r[0], (np.integer, np.floating, int, float, bool)): + element = result[0] + if isinstance(element, np.ndarray): + element = element.ravel()[0] + if not isinstance(element, (np.integer, np.floating, int, float, bool)): # a comparable, e.g. a Decimal may slip in here return result - if isna(r).any(): - # if we have any nulls, then we are done - return result - if ( issubclass(result.dtype.type, (np.object_, np.number)) and notna(result).all() From 8c6bfe14e654224619c8dec9db26736d99da9606 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 7 Nov 2023 07:43:48 -0500 Subject: [PATCH 3/4] Update cast.py Don't use deprecated array indexing on ExtensionArrays. We need to now us `iloc`. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/core/dtypes/cast.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4e54ff2fd757c..5d9f2a7a675f8 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -357,7 +357,10 @@ def trans(x): # if we don't have any elements, just astype it return trans(result).astype(dtype) - element = result[0] + if isinstance(result, np.ndarray): + element = result[0] + else: + element = result.iloc[0] if isinstance(element, np.ndarray): element = element.ravel()[0] if not isinstance(element, (np.integer, np.floating, int, float, bool)): From 450315eb8030f7bd7b2f1aae143fe8ec3a72602a Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 10 Nov 2023 03:47:32 +1300 Subject: [PATCH 4/4] Eliminate need to call `ravel` When processing a multidimensional `ndarray`, we can get the first element by calling `result.item(0)` and completely avoid the copying needed by `ravel` to get the first element that way. We can also eliminates an additional conditional check. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pandas/core/dtypes/cast.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5d9f2a7a675f8..b9d4153626891 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -358,11 +358,9 @@ def trans(x): return trans(result).astype(dtype) if isinstance(result, np.ndarray): - element = result[0] + element = result.item(0) else: element = result.iloc[0] - if isinstance(element, np.ndarray): - element = element.ravel()[0] if not isinstance(element, (np.integer, np.floating, int, float, bool)): # a comparable, e.g. a Decimal may slip in here return result