Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: move __from_arrow__ to common base class for numeric masked arrays #38411

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 3 additions & 34 deletions pandas/core/arrays/floating.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numbers
from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union
from typing import List, Optional, Tuple, Type
import warnings

import numpy as np
Expand Down Expand Up @@ -27,13 +27,10 @@
from pandas.core.tools.numeric import to_numeric

from .masked import BaseMaskedDtype
from .numeric import NumericArray
from .numeric import NumericArray, NumericDtype

if TYPE_CHECKING:
import pyarrow


class FloatingDtype(BaseMaskedDtype):
class FloatingDtype(NumericDtype):
"""
An ExtensionDtype to hold a single size of floating dtype.

Expand Down Expand Up @@ -72,34 +69,6 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
return FLOAT_STR_TO_DTYPE[str(np_dtype)]
return None

def __from_arrow__(
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
) -> "FloatingArray":
"""
Construct FloatingArray from pyarrow Array/ChunkedArray.
"""
import pyarrow

from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask

pyarrow_type = pyarrow.from_numpy_dtype(self.type)
if not array.type.equals(pyarrow_type):
array = array.cast(pyarrow_type)

if isinstance(array, pyarrow.Array):
chunks = [array]
else:
# pyarrow.ChunkedArray
chunks = array.chunks

results = []
for arr in chunks:
data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type)
float_arr = FloatingArray(data.copy(), ~mask, copy=False)
results.append(float_arr)

return FloatingArray._concat_same_type(results)


def coerce_to_array(
values, dtype=None, mask=None, copy: bool = False
Expand Down
37 changes: 3 additions & 34 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numbers
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union
from typing import Dict, List, Optional, Tuple, Type
import warnings

import numpy as np
Expand Down Expand Up @@ -27,13 +27,10 @@
from pandas.core.tools.numeric import to_numeric

from .masked import BaseMaskedArray, BaseMaskedDtype
from .numeric import NumericArray
from .numeric import NumericArray, NumericDtype

if TYPE_CHECKING:
import pyarrow


class _IntegerDtype(BaseMaskedDtype):
class _IntegerDtype(NumericDtype):
"""
An ExtensionDtype to hold a single size & kind of integer dtype.

Expand Down Expand Up @@ -92,34 +89,6 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
return FLOAT_STR_TO_DTYPE[str(np_dtype)]
return None

def __from_arrow__(
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
) -> "IntegerArray":
"""
Construct IntegerArray from pyarrow Array/ChunkedArray.
"""
import pyarrow

from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask

pyarrow_type = pyarrow.from_numpy_dtype(self.type)
if not array.type.equals(pyarrow_type):
array = array.cast(pyarrow_type)

if isinstance(array, pyarrow.Array):
chunks = [array]
else:
# pyarrow.ChunkedArray
chunks = array.chunks

results = []
for arr in chunks:
data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type)
int_arr = IntegerArray(data.copy(), ~mask, copy=False)
results.append(int_arr)

return IntegerArray._concat_same_type(results)


def integer_array(values, dtype=None, copy: bool = False) -> "IntegerArray":
"""
Expand Down
42 changes: 41 additions & 1 deletion pandas/core/arrays/numeric.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
from typing import TYPE_CHECKING, Union

import numpy as np

Expand All @@ -13,7 +14,46 @@
is_list_like,
)

from .masked import BaseMaskedArray
from .masked import BaseMaskedArray, BaseMaskedDtype

if TYPE_CHECKING:
import pyarrow


class NumericDtype(BaseMaskedDtype):
def __from_arrow__(
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
) -> BaseMaskedArray:
"""
Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray.
"""
import pyarrow

from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask

array_class = self.construct_array_type()

pyarrow_type = pyarrow.from_numpy_dtype(self.type)
if not array.type.equals(pyarrow_type):
array = array.cast(pyarrow_type)

if isinstance(array, pyarrow.Array):
chunks = [array]
else:
# pyarrow.ChunkedArray
chunks = array.chunks

results = []
for arr in chunks:
data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type)
num_arr = array_class(data.copy(), ~mask, copy=False)
results.append(num_arr)

if len(results) == 1:
# avoid additional copy in _concat_same_type
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like this is the only change, otherwise this is pretty much copy/paste?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indeed (that's the second commit, the first one is mainly copy/paste)

return results[0]
else:
return array_class._concat_same_type(results)


class NumericArray(BaseMaskedArray):
Expand Down