Skip to content

Commit

Permalink
REF: move __from_arrow__ to common base class for numeric masked arra…
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored and luckyvs1 committed Jan 20, 2021
1 parent cede451 commit 25e41a6
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 69 deletions.
37 changes: 3 additions & 34 deletions pandas/core/arrays/floating.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numbers
from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union
from typing import List, Optional, Tuple, Type
import warnings

import numpy as np
Expand Down Expand Up @@ -27,13 +27,10 @@
from pandas.core.tools.numeric import to_numeric

from .masked import BaseMaskedDtype
from .numeric import NumericArray
from .numeric import NumericArray, NumericDtype

if TYPE_CHECKING:
import pyarrow


class FloatingDtype(BaseMaskedDtype):
class FloatingDtype(NumericDtype):
"""
An ExtensionDtype to hold a single size of floating dtype.
Expand Down Expand Up @@ -72,34 +69,6 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
return FLOAT_STR_TO_DTYPE[str(np_dtype)]
return None

def __from_arrow__(
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
) -> "FloatingArray":
"""
Construct FloatingArray from pyarrow Array/ChunkedArray.
"""
import pyarrow

from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask

pyarrow_type = pyarrow.from_numpy_dtype(self.type)
if not array.type.equals(pyarrow_type):
array = array.cast(pyarrow_type)

if isinstance(array, pyarrow.Array):
chunks = [array]
else:
# pyarrow.ChunkedArray
chunks = array.chunks

results = []
for arr in chunks:
data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type)
float_arr = FloatingArray(data.copy(), ~mask, copy=False)
results.append(float_arr)

return FloatingArray._concat_same_type(results)


def coerce_to_array(
values, dtype=None, mask=None, copy: bool = False
Expand Down
37 changes: 3 additions & 34 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numbers
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union
from typing import Dict, List, Optional, Tuple, Type
import warnings

import numpy as np
Expand Down Expand Up @@ -27,13 +27,10 @@
from pandas.core.tools.numeric import to_numeric

from .masked import BaseMaskedArray, BaseMaskedDtype
from .numeric import NumericArray
from .numeric import NumericArray, NumericDtype

if TYPE_CHECKING:
import pyarrow


class _IntegerDtype(BaseMaskedDtype):
class _IntegerDtype(NumericDtype):
"""
An ExtensionDtype to hold a single size & kind of integer dtype.
Expand Down Expand Up @@ -92,34 +89,6 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
return FLOAT_STR_TO_DTYPE[str(np_dtype)]
return None

def __from_arrow__(
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
) -> "IntegerArray":
"""
Construct IntegerArray from pyarrow Array/ChunkedArray.
"""
import pyarrow

from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask

pyarrow_type = pyarrow.from_numpy_dtype(self.type)
if not array.type.equals(pyarrow_type):
array = array.cast(pyarrow_type)

if isinstance(array, pyarrow.Array):
chunks = [array]
else:
# pyarrow.ChunkedArray
chunks = array.chunks

results = []
for arr in chunks:
data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type)
int_arr = IntegerArray(data.copy(), ~mask, copy=False)
results.append(int_arr)

return IntegerArray._concat_same_type(results)


def safe_cast(values, dtype, copy: bool):
"""
Expand Down
42 changes: 41 additions & 1 deletion pandas/core/arrays/numeric.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
from typing import TYPE_CHECKING, Union

import numpy as np

Expand All @@ -13,7 +14,46 @@
is_list_like,
)

from .masked import BaseMaskedArray
from .masked import BaseMaskedArray, BaseMaskedDtype

if TYPE_CHECKING:
import pyarrow


class NumericDtype(BaseMaskedDtype):
def __from_arrow__(
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
) -> BaseMaskedArray:
"""
Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray.
"""
import pyarrow

from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask

array_class = self.construct_array_type()

pyarrow_type = pyarrow.from_numpy_dtype(self.type)
if not array.type.equals(pyarrow_type):
array = array.cast(pyarrow_type)

if isinstance(array, pyarrow.Array):
chunks = [array]
else:
# pyarrow.ChunkedArray
chunks = array.chunks

results = []
for arr in chunks:
data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type)
num_arr = array_class(data.copy(), ~mask, copy=False)
results.append(num_arr)

if len(results) == 1:
# avoid additional copy in _concat_same_type
return results[0]
else:
return array_class._concat_same_type(results)


class NumericArray(BaseMaskedArray):
Expand Down

0 comments on commit 25e41a6

Please sign in to comment.