get tests working

hgrecco · Aug 4, 2024 · f3d8f58 · f3d8f58
1 parent 0e3a269
commit f3d8f58
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 29 deletions.
diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py
@@ -19,7 +19,13 @@
     register_series_accessor,
 )
 from pandas.api.indexers import check_array_indexer
-from pandas.api.types import is_integer, is_list_like, is_object_dtype, is_string_dtype
+from pandas.api.types import (
+    is_integer,
+    is_list_like,
+    is_object_dtype,
+    is_string_dtype,
+    infer_dtype,
+)
 from pandas.compat import set_function_name
 from pandas.core import nanops  # type: ignore
 from pint import Quantity as _Quantity
@@ -30,7 +36,7 @@
 # quantify/dequantify
 NO_UNIT = "No Unit"
 SUBDTYPES = True
-DEFAULT_SUBDTYPE = "float"
+DEFAULT_SUBDTYPE = "Float64"
 
 pandas_version = version("pandas")
 pandas_version_info = tuple(
@@ -165,15 +171,14 @@ def na_value(self):
         return self.ureg.Quantity(np.nan, self.units)
 
     def __hash__(self):
-        # make myself hashable
         return hash(str(self))
 
     def __eq__(self, other):
         try:
             other = PintType(other)
         except (ValueError, errors.UndefinedUnitError):
             return False
-        return self.units == other.units
+        return self.units == other.units and self.subdtype == other.subdtype
 
     @classmethod
     def is_dtype(cls, dtype):
@@ -301,10 +306,17 @@ class PintArray(ExtensionArray, ExtensionScalarOpsMixin):
     _HANDLED_TYPES = (np.ndarray, numbers.Number, _Quantity)
 
     def __init__(self, values, dtype=None, copy=False):
-        # infer dtype from values if not given
+        # infer subdtype from values if not given in dtype
+        if isinstance(dtype, str) and dtype.count("[") == 1:
+            _dtype = PintType(dtype)
+            values = pd.array(values, copy=copy)
+            dtype = PintType(units=_dtype.units, subdtype=values.dtype)
+        # infer units and subdtype from values if no dtype given
         if dtype is None:
             if isinstance(values, _Quantity):
-                dtype = values.units
+                units = values.units
+                values = pd.array(values, copy=copy)
+                dtype = PintType(units=units, subdtype=values.dtype)
             elif isinstance(values, PintArray):
                 dtype = values._dtype
 
@@ -629,7 +641,7 @@ def _concat_same_type(cls, to_concat):
             converted_values = a.quantity.to(output_units).magnitude
             data.append(np.atleast_1d(converted_values))
 
-        return cls(np.concatenate(data), output_units)
+        return cls(np.concatenate(data), to_concat[0].dtype)
 
     @classmethod
     def _from_sequence(cls, scalars, dtype=None, copy=False):
@@ -652,25 +664,32 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
         if dtype is None:
             if not isinstance(master_scalar, _Quantity):
                 raise ValueError("No dtype specified and not a sequence of quantities")
-            dtype = PintType(master_scalar.units)
+            units = master_scalar.units
+            subdtype = None
+        else:
+            units = dtype.units
+            subdtype = dtype.subdtype
 
+        # convert scalars to output unit
         if isinstance(master_scalar, _Quantity):
             scalars = [
-                (item.to(dtype.units).magnitude if hasattr(item, "to") else item)
+                (item.to(units).magnitude if hasattr(item, "to") else item)
                 for item in scalars
             ]
-        return cls(scalars, dtype=dtype, copy=copy)
+
+        values = pd.array(scalars, dtype=subdtype)
+        return cls(
+            values, dtype=PintType(units=units, subdtype=values.dtype), copy=copy
+        )
 
     @classmethod
     def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False):
         if not dtype:
             dtype = PintType.construct_from_quantity_string(scalars[0])
-        return cls._from_sequence([dtype.ureg.Quantity(x) for x in scalars])
+        return cls._from_sequence([dtype.ureg.Quantity(x) for x in scalars], dtype)
 
     @classmethod
     def _from_factorized(cls, values, original):
-        from pandas.api.types import infer_dtype
-
         if infer_dtype(values) != "object":
             values = pd.array(values, copy=False)
         return cls(values, dtype=original.dtype)
@@ -811,8 +830,6 @@ def _create_method(cls, op, coerce_to_dtype=True):
 
         def _binop(self, other):
             def validate_length(obj1, obj2):
-                # validates length
-                # CHANGED: do not convert to listlike (why should we? pint.Quantity is perfecty able to handle that...)
                 try:
                     if len(obj1) != len(obj2):
                         raise ValueError("Lengths must match")
@@ -870,10 +887,17 @@ def _create_comparison_method(cls, op):
         return cls._create_method(op, coerce_to_dtype=False)
 
     @classmethod
-    def from_1darray_quantity(cls, quantity):
+    def from_1darray_quantity(cls, quantity, subdtype=None):
         if not is_list_like(quantity.magnitude):
             raise TypeError("quantity's magnitude is not list like")
-        return cls(quantity.magnitude, quantity.units)
+        if isinstance(quantity.magnitude, ExtensionArray):
+            subdtype = quantity.magnitude.dtype
+            mag = quantity.magnitude
+        else:
+            mag = pd.array(quantity.magnitude)
+            subdtype = mag.dtype
+
+        return cls(mag, PintType(quantity.units, subdtype))
 
     def __array__(self, dtype=None, copy=False):
         if dtype is None or is_object_dtype(dtype):
@@ -970,14 +994,13 @@ def map(self, mapper, na_action=None):
 
             arr = map_array(self, mapper, na_action)
 
-        master_scalar = None
         try:
-            master_scalar = next(i for i in arr if hasattr(i, "units"))
+            next(i for i in arr if hasattr(i, "units"))
         except StopIteration:
             # JSON mapper formatting Qs as str don't create PintArrays
             # ...and that's OK.  Caller will get array of values
             return arr
-        return PintArray._from_sequence(arr, PintType(master_scalar.units))
+        return PintArray._from_sequence(arr)
 
     def _reduce(self, name, *, skipna: bool = True, keepdims: bool = False, **kwds):
         """
@@ -1058,7 +1081,7 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwds):
             except NotImplementedError:
                 result = functions[name](self.numpy_data, **kwds)
 
-        return self._from_sequence(result, self.units)
+        return self._from_sequence(result, self.dtype)
 
 
 PintArray._add_arithmetic_ops()

diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py
@@ -41,7 +41,7 @@ def test_force_ndarray_like(self):
 
             result = pd.concat([a, b], axis=1)
             expected = pd.DataFrame(
-                {0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]"
+                {0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC][Int64]"
             )
             tm.assert_equal(result, expected)
 
@@ -64,7 +64,7 @@ def test_offset_concat(self):
 
         result = pd.concat([a, b], axis=1)
         expected = pd.DataFrame(
-            {0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]"
+            {0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC][Int64]"
         )
         tm.assert_equal(result, expected)
 
@@ -213,7 +213,7 @@ def test_dequantify_duplicate_col_names(self):
                 "column_names": [None, "unit"],
             },
             orient="tight",
-            dtype="float64",
+            dtype="Float64",
         )
         result = df.iloc[:, 1:].pint.dequantify()
         tm.assert_frame_equal(expected, result)
@@ -227,7 +227,7 @@ def test_dequantify_duplicate_col_names(self):
                 "column_names": [None, "unit"],
             },
             orient="tight",
-            dtype="float64",
+            dtype="Float64",
         )
         result = df.pint.dequantify()
         tm.assert_frame_equal(expected, result)
@@ -253,7 +253,7 @@ def test_roundtrip(self):
                 "power": pd.Series([1.0, 2.0, 3.0], dtype="pint[W]"),
                 "torque": pd.Series([4.0, 5.0, 6.0], dtype="pint[N*m]"),
                 "fruits": pd.Series(["apple", "pear", "kiwi"]),
-                "float_numbers": pd.Series([1.0, 2.0, 3.0], dtype="float64"),
+                "float_numbers": pd.Series([1.0, 2.0, 3.0], dtype="Float64"),
                 "int_numbers": pd.Series([1.0, 2.0, 3.0], dtype="int"),
             }
         )

diff --git a/pint_pandas/testsuite/test_pandas_interface.py b/pint_pandas/testsuite/test_pandas_interface.py
@@ -108,7 +108,7 @@ def test_dequantify(self):
                         2: 2.0,
                         3: 3.0,
                     },
-                    dtype="float64",
+                    dtype="Float64",
                 ),
             }
         )
@@ -174,7 +174,7 @@ class TestDataFrameAccessor(object):
     def test_index_maintained(self):
         test_csv = join(dirname(__file__), "pandas_test.csv")
 
-        df = pd.read_csv(test_csv, header=[0, 1])
+        df = pd.read_csv(test_csv, header=[0, 1], dtype="Float64")
         df.columns = pd.MultiIndex.from_arrays(
             [
                 ["Holden", "Holden", "Holden", "Ford", "Ford", "Ford"],
@@ -425,5 +425,5 @@ def test_mismatched_dimensions(self):
     def test_numpy_data(self):
         foo = PintArray([1, 2, 3], dtype="pint[m]")
         result = foo.numpy_data
-        expected = np.array([1, 2, 3], dtype="float64")
+        expected = np.array([1, 2, 3], dtype="int64")
         np.testing.assert_array_equal(result, expected, strict=True)