hgrecco · MichaelTiemannOSC · Oct 15, 2022 · Oct 19, 2022 · Oct 21, 2022 · Oct 25, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -8,8 +8,15 @@ jobs:
       matrix:
         python-version: [3.9, "3.10", "3.11"]
         numpy: ["numpy>=1.20.3,<2.0.0"]
-        pandas: ["pandas==2.0.2", "pandas==2.1.0rc0" ]
-        pint: ["pint>=0.21.1", "pint==0.22"]
+        pandas: ["pandas==2.0.2", "pandas>=2.1.0" ]
+        pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"]
+        uncertainties: [""]
+        include:
+          - python-version: 3.9
+            numpy: "numpy>=1.20.3,<2.0.0"
+            pandas: "pandas>=2.1.0"
+            pint: "pint==0.23rc0"
+            uncertainties: "uncertainties==3.1.7"
 
     runs-on: ubuntu-latest
 
@@ -57,6 +64,10 @@ jobs:
         if: ${{ matrix.pandas != null }}
         run: pip install "${{matrix.pandas}}"
 
+      - name: Install uncertainties
+        if: ${{ matrix.uncertainties != null }}
+        run: pip install "${{matrix.uncertainties}}"
+
       - name: Run Tests
         run: |
           pytest $TEST_OPTS

diff --git a/CHANGES b/CHANGES
@@ -4,10 +4,10 @@ pint-pandas Changelog
 0.6 (unreleased)
 ----------------
 
+- Support for uncertainties as magnitudes in PintArrays. #140
 - Fix dequantify duplicate column failure #202
 - Fix astype issue #196
 
-
 0.5 (2023-09-07)
 ----------------
 
@@ -50,6 +50,7 @@ pint-pandas Changelog
 - Tests reorganised #131
 - Shortened form of dimensionless unit now in dtype, eg 'pint[]' #151
 - Fixed bug preventing PintArrays with offset units being printed. #150
+- Allow UFloat as type of magnitude supported in PintArray. #139
 
 0.2 (2021-03-23)
 ----------------

diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py
@@ -26,6 +26,16 @@
 # Magic 'unit' flagging columns with no unit support, used in
 # quantify/dequantify
 NO_UNIT = "No Unit"
+from pint.compat import HAS_UNCERTAINTIES
+
+# from pint.facets.plain.quantity import PlainQuantity as _Quantity
+# from pint.facets.plain.unit import PlainUnit as _Unit
+
+if HAS_UNCERTAINTIES:
+    from uncertainties import ufloat, UFloat
+    from uncertainties import unumpy as unp
+
+    _ufloat_nan = ufloat(np.nan, 0)
 
 pandas_version = version("pandas")
 pandas_version_info = tuple(
@@ -330,6 +340,36 @@ def __setitem__(self, key, value):
         key = check_array_indexer(self, key)
         # Filter out invalid values for our array type(s)
         try:
+            if HAS_UNCERTAINTIES and is_object_dtype(self._data):
+                from pandas.api.types import is_scalar, is_numeric_dtype
+
+                def value_to_ufloat(value):
+                    if pd.isna(value) or isinstance(value, UFloat):
+                        return value
+                    if is_numeric_dtype(type(value)):
+                        return ufloat(value, 0)
+                    raise ValueError
+
+                try:
+                    any_ufloats = next(
+                        True for i in self._data if isinstance(i, UFloat)
+                    )
+                    if any_ufloats:
+                        if is_scalar(key):
+                            if is_list_like(value):
+                                # cannot do many:1 setitem
+                                raise ValueError
+                            # 1:1 setitem
+                            value = value_to_ufloat(value)
+                        elif is_list_like(value):
+                            # many:many setitem
+                            value = [value_to_ufloat(v) for v in value]
+                        else:
+                            # broadcast 1:many
+                            value = value_to_ufloat(value)
+                except StopIteration:
+                    # If array is full of nothingness, we can put anything inside it
+                    pass
             self._data[key] = value
         except IndexError as e:
             msg = "Mask is wrong length. {}".format(e)
@@ -381,6 +421,14 @@ def isna(self):
         -------
         missing : np.array
         """
+        if HAS_UNCERTAINTIES:
+            # GH https://github.com/lebigot/uncertainties/issues/164
+            if len(self._data) == 0:
+                # True or False doesn't matter--we just need the value for the type
+                return np.full((0), True)
+            # NumpyEADtype('object') doesn't know about UFloats...
+            if is_object_dtype(self._data.dtype):
+                return np.array([pd.isna(x) or unp.isnan(x) for x in self._data])
         return self._data.isna()
 
     def astype(self, dtype, copy=True):
@@ -542,6 +590,9 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
                 (item.to(dtype.units).magnitude if hasattr(item, "to") else item)
                 for item in scalars
             ]
+        # When creating empty arrays, make them large enoguh to hold UFloats in case we need to do so later
+        if HAS_UNCERTAINTIES and len(scalars) == 0:
+            return cls([_ufloat_nan], dtype=dtype, copy=copy)[1:]
         return cls(scalars, dtype=dtype, copy=copy)
 
     @classmethod
@@ -565,9 +616,37 @@ def _values_for_factorize(self):
         # provided dtype. This may be revisited in the future, see GH#48476.
         arr = self._data
         if arr.dtype.kind == "O":
+            if (
+                HAS_UNCERTAINTIES
+                and arr.size > 0
+                and unp.isnan(arr[~pd.isna(arr)]).any()
+            ):
+                # Canonicalize uncertain NaNs and pd.NA to np.nan
+                arr = np.array(
+                    [np.nan if pd.isna(x) or unp.isnan(x) else x for x in arr]
+                )
             return np.array(arr, copy=False), self.dtype.na_value
         return arr._values_for_factorize()
 
+    def _values_for_argsort(self) -> np.ndarray:
+        """
+        Return values for sorting.
+        Returns
+        -------
+        ndarray
+            The transformed values should maintain the ordering between values
+            within the array.
+        """
+        # In this case we want to return just the magnitude array stripped of units
+        # Must replace uncertain NaNs with np.nan
+        if HAS_UNCERTAINTIES:
+            arr = self._data[~pd.isna(self._data)]
+            if arr.size > 0 and unp.isnan(arr).any():
+                return np.array(
+                    [np.nan if pd.isna(x) or unp.isnan(x) else x for x in self._data]
+                )
+        return self._data
+
     def value_counts(self, dropna=True):
         """
         Returns a Series containing counts of each category.
@@ -592,16 +671,27 @@ def value_counts(self, dropna=True):
 
         # compute counts on the data with no nans
         data = self._data
-        nafilt = pd.isna(data)
-        na_value = pd.NA  # NA value for index, not data, so not quantified
+        if HAS_UNCERTAINTIES:
+            nafilt = np.array([pd.isna(x) or unp.isnan(x) for x in data])
+        else:
+            nafilt = pd.isna(data)
+        na_value_for_index = pd.NA
         data = data[~nafilt]
-        index = list(set(data))
+        if HAS_UNCERTAINTIES and data.dtype.kind == "O":
+            # This is a work-around for unhashable UFloats
+            unique_data = []
+            for item in data:
+                if item not in unique_data:
+                    unique_data.append(item)
+            index = list(unique_data)
+        else:
+            index = list(set(data))
 
         data_list = data.tolist()
         array = [data_list.count(item) for item in index]
 
         if not dropna:
-            index.append(na_value)
+            index.append(na_value_for_index)
             array.append(nafilt.sum())
 
         return Series(np.asarray(array), index=index)
@@ -613,10 +703,21 @@ def unique(self):
         -------
         uniques : PintArray
         """
-        from pandas import unique
 
         data = self._data
-        return self._from_sequence(unique(data), dtype=self.dtype)
+        na_value = self.dtype.na_value
+        if HAS_UNCERTAINTIES and data.dtype.kind == "O":
+            # This is a work-around for unhashable UFloats
+            unique_data = []
+            for item in data:
+                if item is pd.NA or unp.isnan(item):
+                    item = na_value
+                if item not in unique_data:
+                    unique_data.append(item)
+            return self._from_sequence(
+                pd.array(unique_data, dtype=data.dtype), dtype=self.dtype
+            )
+        return self._from_sequence(data.unique(), dtype=self.dtype)
 
     def __contains__(self, item) -> bool:
         if not isinstance(item, _Quantity):

diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py
@@ -9,6 +9,28 @@
 from pandas.tests.extension.base.base import BaseExtensionTests
 from pint.testsuite import helpers
 
+try:
+    import uncertainties.unumpy as unp
+    from uncertainties import ufloat
+    from uncertainties.core import AffineScalarFunc  # noqa: F401
+
+    def AffineScalarFunc__hash__(self):
+        if not self._linear_part.expanded():
+            self._linear_part.expand()
+        combo = tuple(iter(self._linear_part.linear_combo.items()))
+        if len(combo) > 1 or combo[0][1] != 1.0:
+            return hash(combo)
+        # The unique value that comes from a unique variable (which it also hashes to)
+        return id(combo[0][0])
+
+    AffineScalarFunc.__hash__ = AffineScalarFunc__hash__
+
+    _ufloat_nan = ufloat(np.nan, 0)
+    HAS_UNCERTAINTIES = True
+except ImportError:
+    unp = np
+    HAS_UNCERTAINTIES = False
+
 from pint_pandas import PintArray, PintType
 from pint_pandas.pint_array import pandas_version_info
 
@@ -52,12 +74,16 @@ def test_force_ndarray_like(self):
             pint.set_application_registry(prev_appreg)
 
 
+@pytest.mark.skipif(
+    not HAS_UNCERTAINTIES,
+    reason="this test depends entirely on HAS_UNCERTAINTIES being True",
+)
 class TestIssue21(BaseExtensionTests):
     @pytest.mark.filterwarnings("ignore::RuntimeWarning")
     def test_offset_concat(self):
-        q_a = ureg.Quantity(np.arange(5), ureg.Unit("degC"))
-        q_b = ureg.Quantity(np.arange(6), ureg.Unit("degC"))
-        q_a_ = np.append(q_a, np.nan)
+        q_a = ureg.Quantity(np.arange(5) + ufloat(0, 0), ureg.Unit("degC"))
+        q_b = ureg.Quantity(np.arange(6) + ufloat(0, 0), ureg.Unit("degC"))
+        q_a_ = np.append(q_a, ureg.Quantity(np.nan, ureg.Unit("degC")))
 
         a = pd.Series(PintArray(q_a))
         b = pd.Series(PintArray(q_b))
@@ -171,6 +197,31 @@ def test_issue_127():
     assert a == b
 
 
+@pytest.mark.skipif(
+    not HAS_UNCERTAINTIES,
+    reason="this test depends entirely on HAS_UNCERTAINTIES being True",
+)
+def test_issue_139():
+    q1 = 1.234
+    q2 = 5.678
+    q_nan = np.nan
+
+    u1 = ufloat(1, 0)
+    u2 = ufloat(3, 0)
+    u_nan = ufloat(np.nan, 0.0)
+    u_plus_or_minus_nan = ufloat(0.0, np.nan)
+    u_nan_plus_or_minus_nan = ufloat(np.nan, np.nan)
+
+    a_m = PintArray(
+        [q1, u1, q2, u2, q_nan, u_nan, u_plus_or_minus_nan, u_nan_plus_or_minus_nan],
+        ureg.m,
+    )
+    a_cm = a_m.astype("pint[cm]")
+    assert np.all(a_m[0:4] == a_cm[0:4])
+    for x, y in zip(a_m[4:], a_cm[4:]):
+        assert unp.isnan(x) == unp.isnan(y)
+
+
 class TestIssue174(BaseExtensionTests):
     def test_sum(self):
         if pandas_version_info < (2, 1):