Skip to content

Commit

Permalink
get tests working
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewgsavage committed Aug 4, 2024
1 parent 0e3a269 commit f3d8f58
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 29 deletions.
65 changes: 44 additions & 21 deletions pint_pandas/pint_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,13 @@
register_series_accessor,
)
from pandas.api.indexers import check_array_indexer
from pandas.api.types import is_integer, is_list_like, is_object_dtype, is_string_dtype
from pandas.api.types import (
is_integer,
is_list_like,
is_object_dtype,
is_string_dtype,
infer_dtype,
)
from pandas.compat import set_function_name
from pandas.core import nanops # type: ignore
from pint import Quantity as _Quantity
Expand All @@ -30,7 +36,7 @@
# quantify/dequantify
NO_UNIT = "No Unit"
SUBDTYPES = True
DEFAULT_SUBDTYPE = "float"
DEFAULT_SUBDTYPE = "Float64"

pandas_version = version("pandas")
pandas_version_info = tuple(
Expand Down Expand Up @@ -165,15 +171,14 @@ def na_value(self):
return self.ureg.Quantity(np.nan, self.units)

def __hash__(self):
# make myself hashable
return hash(str(self))

def __eq__(self, other):
try:
other = PintType(other)
except (ValueError, errors.UndefinedUnitError):
return False
return self.units == other.units
return self.units == other.units and self.subdtype == other.subdtype

@classmethod
def is_dtype(cls, dtype):
Expand Down Expand Up @@ -301,10 +306,17 @@ class PintArray(ExtensionArray, ExtensionScalarOpsMixin):
_HANDLED_TYPES = (np.ndarray, numbers.Number, _Quantity)

def __init__(self, values, dtype=None, copy=False):
# infer dtype from values if not given
# infer subdtype from values if not given in dtype
if isinstance(dtype, str) and dtype.count("[") == 1:
_dtype = PintType(dtype)
values = pd.array(values, copy=copy)
dtype = PintType(units=_dtype.units, subdtype=values.dtype)
# infer units and subdtype from values if no dtype given
if dtype is None:
if isinstance(values, _Quantity):
dtype = values.units
units = values.units
values = pd.array(values, copy=copy)
dtype = PintType(units=units, subdtype=values.dtype)
elif isinstance(values, PintArray):
dtype = values._dtype

Expand Down Expand Up @@ -629,7 +641,7 @@ def _concat_same_type(cls, to_concat):
converted_values = a.quantity.to(output_units).magnitude
data.append(np.atleast_1d(converted_values))

return cls(np.concatenate(data), output_units)
return cls(np.concatenate(data), to_concat[0].dtype)

@classmethod
def _from_sequence(cls, scalars, dtype=None, copy=False):
Expand All @@ -652,25 +664,32 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
if dtype is None:
if not isinstance(master_scalar, _Quantity):
raise ValueError("No dtype specified and not a sequence of quantities")
dtype = PintType(master_scalar.units)
units = master_scalar.units
subdtype = None
else:
units = dtype.units
subdtype = dtype.subdtype

# convert scalars to output unit
if isinstance(master_scalar, _Quantity):
scalars = [
(item.to(dtype.units).magnitude if hasattr(item, "to") else item)
(item.to(units).magnitude if hasattr(item, "to") else item)
for item in scalars
]
return cls(scalars, dtype=dtype, copy=copy)

values = pd.array(scalars, dtype=subdtype)
return cls(
values, dtype=PintType(units=units, subdtype=values.dtype), copy=copy
)

@classmethod
def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False):
if not dtype:
dtype = PintType.construct_from_quantity_string(scalars[0])
return cls._from_sequence([dtype.ureg.Quantity(x) for x in scalars])
return cls._from_sequence([dtype.ureg.Quantity(x) for x in scalars], dtype)

@classmethod
def _from_factorized(cls, values, original):
from pandas.api.types import infer_dtype

if infer_dtype(values) != "object":
values = pd.array(values, copy=False)
return cls(values, dtype=original.dtype)
Expand Down Expand Up @@ -811,8 +830,6 @@ def _create_method(cls, op, coerce_to_dtype=True):

def _binop(self, other):
def validate_length(obj1, obj2):
# validates length
# CHANGED: do not convert to listlike (why should we? pint.Quantity is perfecty able to handle that...)
try:
if len(obj1) != len(obj2):
raise ValueError("Lengths must match")
Expand Down Expand Up @@ -870,10 +887,17 @@ def _create_comparison_method(cls, op):
return cls._create_method(op, coerce_to_dtype=False)

@classmethod
def from_1darray_quantity(cls, quantity):
def from_1darray_quantity(cls, quantity, subdtype=None):
if not is_list_like(quantity.magnitude):
raise TypeError("quantity's magnitude is not list like")
return cls(quantity.magnitude, quantity.units)
if isinstance(quantity.magnitude, ExtensionArray):
subdtype = quantity.magnitude.dtype
mag = quantity.magnitude
else:
mag = pd.array(quantity.magnitude)
subdtype = mag.dtype

return cls(mag, PintType(quantity.units, subdtype))

def __array__(self, dtype=None, copy=False):
if dtype is None or is_object_dtype(dtype):
Expand Down Expand Up @@ -970,14 +994,13 @@ def map(self, mapper, na_action=None):

arr = map_array(self, mapper, na_action)

master_scalar = None
try:
master_scalar = next(i for i in arr if hasattr(i, "units"))
next(i for i in arr if hasattr(i, "units"))
except StopIteration:
# JSON mapper formatting Qs as str don't create PintArrays
# ...and that's OK. Caller will get array of values
return arr
return PintArray._from_sequence(arr, PintType(master_scalar.units))
return PintArray._from_sequence(arr)

def _reduce(self, name, *, skipna: bool = True, keepdims: bool = False, **kwds):
"""
Expand Down Expand Up @@ -1058,7 +1081,7 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwds):
except NotImplementedError:
result = functions[name](self.numpy_data, **kwds)

return self._from_sequence(result, self.units)
return self._from_sequence(result, self.dtype)


PintArray._add_arithmetic_ops()
Expand Down
10 changes: 5 additions & 5 deletions pint_pandas/testsuite/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_force_ndarray_like(self):

result = pd.concat([a, b], axis=1)
expected = pd.DataFrame(
{0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]"
{0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC][Int64]"
)
tm.assert_equal(result, expected)

Expand All @@ -64,7 +64,7 @@ def test_offset_concat(self):

result = pd.concat([a, b], axis=1)
expected = pd.DataFrame(
{0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]"
{0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC][Int64]"
)
tm.assert_equal(result, expected)

Expand Down Expand Up @@ -213,7 +213,7 @@ def test_dequantify_duplicate_col_names(self):
"column_names": [None, "unit"],
},
orient="tight",
dtype="float64",
dtype="Float64",
)
result = df.iloc[:, 1:].pint.dequantify()
tm.assert_frame_equal(expected, result)
Expand All @@ -227,7 +227,7 @@ def test_dequantify_duplicate_col_names(self):
"column_names": [None, "unit"],
},
orient="tight",
dtype="float64",
dtype="Float64",
)
result = df.pint.dequantify()
tm.assert_frame_equal(expected, result)
Expand All @@ -253,7 +253,7 @@ def test_roundtrip(self):
"power": pd.Series([1.0, 2.0, 3.0], dtype="pint[W]"),
"torque": pd.Series([4.0, 5.0, 6.0], dtype="pint[N*m]"),
"fruits": pd.Series(["apple", "pear", "kiwi"]),
"float_numbers": pd.Series([1.0, 2.0, 3.0], dtype="float64"),
"float_numbers": pd.Series([1.0, 2.0, 3.0], dtype="Float64"),
"int_numbers": pd.Series([1.0, 2.0, 3.0], dtype="int"),
}
)
Expand Down
6 changes: 3 additions & 3 deletions pint_pandas/testsuite/test_pandas_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def test_dequantify(self):
2: 2.0,
3: 3.0,
},
dtype="float64",
dtype="Float64",
),
}
)
Expand Down Expand Up @@ -174,7 +174,7 @@ class TestDataFrameAccessor(object):
def test_index_maintained(self):
test_csv = join(dirname(__file__), "pandas_test.csv")

df = pd.read_csv(test_csv, header=[0, 1])
df = pd.read_csv(test_csv, header=[0, 1], dtype="Float64")
df.columns = pd.MultiIndex.from_arrays(
[
["Holden", "Holden", "Holden", "Ford", "Ford", "Ford"],
Expand Down Expand Up @@ -425,5 +425,5 @@ def test_mismatched_dimensions(self):
def test_numpy_data(self):
foo = PintArray([1, 2, 3], dtype="pint[m]")
result = foo.numpy_data
expected = np.array([1, 2, 3], dtype="float64")
expected = np.array([1, 2, 3], dtype="int64")
np.testing.assert_array_equal(result, expected, strict=True)

0 comments on commit f3d8f58

Please sign in to comment.