-
-
Notifications
You must be signed in to change notification settings - Fork 17.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: support downcasting of nullable EAs in pd.to_numeric #38746
Changes from 19 commits
a594847
a1bb9fc
dbba7b4
ddb71cb
323cfdc
7b4180e
337589a
4e8761a
e2b4cbb
23c4ae6
e140b7a
f583a10
a6cb152
d707a61
6b2c39f
fda4ba1
1a23118
1015b07
0279be9
56747da
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
ensure_object, | ||
is_datetime_or_timedelta_dtype, | ||
is_decimal, | ||
is_integer_dtype, | ||
is_number, | ||
is_numeric_dtype, | ||
is_scalar, | ||
|
@@ -15,6 +16,7 @@ | |
from pandas.core.dtypes.generic import ABCIndex, ABCSeries | ||
|
||
import pandas as pd | ||
from pandas.core.arrays.numeric import NumericArray | ||
|
||
|
||
def to_numeric(arg, errors="raise", downcast=None): | ||
|
@@ -108,6 +110,21 @@ def to_numeric(arg, errors="raise", downcast=None): | |
2 2.0 | ||
3 -3.0 | ||
dtype: float64 | ||
|
||
Downcasting of ``ExtensionDtype`` is supported: | ||
arw2019 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
>>> s = pd.Series([1, 2, 3], dtype="Int64") | ||
>>> pd.to_numeric(s, downcast="integer") | ||
0 1 | ||
1 2 | ||
2 3 | ||
dtype: Int8 | ||
>>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64") | ||
>>> pd.to_numeric(s, downcast="float") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we may want to also accept There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we should do that, since those are not actually dtypes, but rather values to a |
||
0 1.0 | ||
1 2.1 | ||
2 3.0 | ||
dtype: Float32 | ||
""" | ||
if downcast not in (None, "integer", "signed", "unsigned", "float"): | ||
raise ValueError("invalid downcasting method provided") | ||
|
@@ -142,6 +159,14 @@ def to_numeric(arg, errors="raise", downcast=None): | |
else: | ||
values = arg | ||
|
||
# GH33013: for IntegerArray & FloatingArray extract non-null values for casting | ||
# save mask to reconstruct the full array after casting | ||
if isinstance(values, NumericArray): | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
mask = values._mask | ||
values = values._data[~mask] | ||
else: | ||
mask = None | ||
|
||
values_dtype = getattr(values, "dtype", None) | ||
if is_numeric_dtype(values_dtype): | ||
pass | ||
|
@@ -188,6 +213,16 @@ def to_numeric(arg, errors="raise", downcast=None): | |
if values.dtype == dtype: | ||
break | ||
|
||
# GH33013: for IntegerArray & FloatingArray need to reconstruct masked array | ||
if mask is not None: | ||
data = np.zeros(mask.shape, dtype=values.dtype) | ||
data[~mask] = values | ||
|
||
from pandas.core.arrays import FloatingArray, IntegerArray | ||
arw2019 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
klass = IntegerArray if is_integer_dtype(data.dtype) else FloatingArray | ||
values = klass(data, mask) | ||
|
||
if is_series: | ||
return arg._constructor(values, index=arg.index, name=arg.name) | ||
elif is_index: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Docstring updated.