Skip to content

Commit

Permalink
REF: implement sanitize_masked_array (#38398)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Dec 11, 2020
1 parent 5a7514c commit e47dbdf
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 23 deletions.
22 changes: 15 additions & 7 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,20 @@ def ensure_wrapped_if_datetimelike(arr):
return arr


def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray:
"""
Convert numpy MaskedArray to ensure mask is softened.
"""
mask = ma.getmaskarray(data)
if mask.any():
data, fill_value = maybe_upcast(data, copy=True)
data.soften_mask() # set hardmask False if it was True
data[mask] = fill_value
else:
data = data.copy()
return data


def sanitize_array(
data,
index: Optional[Index],
Expand All @@ -432,13 +446,7 @@ def sanitize_array(
"""

if isinstance(data, ma.MaskedArray):
mask = ma.getmaskarray(data)
if mask.any():
data, fill_value = maybe_upcast(data, copy=True)
data.soften_mask() # set hardmask False if it was True
data[mask] = fill_value
else:
data = data.copy()
data = sanitize_masked_array(data)

# extract ndarray or ExtensionArray, ensure we have no PandasArray
data = extract_array(data, extract_numpy=True)
Expand Down
11 changes: 2 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@
maybe_convert_platform,
maybe_downcast_to_dtype,
maybe_infer_to_datetimelike,
maybe_upcast,
validate_numeric_casting,
)
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -126,7 +125,7 @@
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import Categorical, ExtensionArray
from pandas.core.arrays.sparse import SparseFrameAccessor
from pandas.core.construction import extract_array
from pandas.core.construction import extract_array, sanitize_masked_array
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.indexes import base as ibase
from pandas.core.indexes.api import (
Expand Down Expand Up @@ -535,13 +534,7 @@ def __init__(

# a masked array
else:
mask = ma.getmaskarray(data)
if mask.any():
data, fill_value = maybe_upcast(data, copy=True)
data.soften_mask() # set hardmask False if it was True
data[mask] = fill_value
else:
data = data.copy()
data = sanitize_masked_array(data)
mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)

elif isinstance(data, (np.ndarray, Series, Index)):
Expand Down
15 changes: 8 additions & 7 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
)

if TYPE_CHECKING:
from numpy.ma.mrecords import MaskedRecords

from pandas import Series

# ---------------------------------------------------------------------
Expand Down Expand Up @@ -96,13 +98,12 @@ def arrays_to_mgr(


def masked_rec_array_to_mgr(
data, index, columns, dtype: Optional[DtypeObj], copy: bool
data: "MaskedRecords", index, columns, dtype: Optional[DtypeObj], copy: bool
):
"""
Extract from a masked rec array and create the manager.
"""
# essentially process a record array then fill it
fill_value = data.fill_value
fdata = ma.getdata(data)
if index is None:
index = get_names_from_index(fdata)
Expand All @@ -116,11 +117,11 @@ def masked_rec_array_to_mgr(

# fill if needed
new_arrays = []
for fv, arr, col in zip(fill_value, arrays, arr_columns):
# TODO: numpy docs suggest fv must be scalar, but could it be
# non-scalar for object dtype?
assert lib.is_scalar(fv), fv
mask = ma.getmaskarray(data[col])
for col in arr_columns:
arr = data[col]
fv = arr.fill_value

mask = ma.getmaskarray(arr)
if mask.any():
arr, fv = maybe_upcast(arr, fill_value=fv, copy=True)
arr[mask] = fv
Expand Down

0 comments on commit e47dbdf

Please sign in to comment.