Change UInt64Index._na_value from 0 to np.nan

pandas-dev · Nov 21, 2017 · 1bffe76 · 1bffe76
1 parent 509e03c
commit 1bffe76
Show file tree

Hide file tree

Showing 5 changed files with 55 additions and 37 deletions.
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -108,7 +108,7 @@ Bug Fixes
 Conversion
 ^^^^^^^^^^
 
--
+- Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
 -
 -
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -251,7 +251,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
                             # then coerce to integer.
                             try:
                                 return cls._try_convert_to_int_index(
-                                    data, copy, name)
+                                    data, copy, name, dtype)
                             except ValueError:
                                 pass
 
@@ -307,7 +307,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
                 if inferred == 'integer':
                     try:
                         return cls._try_convert_to_int_index(
-                            subarr, copy, name)
+                            subarr, copy, name, dtype)
                     except ValueError:
                         pass
 
@@ -664,7 +664,7 @@ def ravel(self, order='C'):
 
     # construction helpers
     @classmethod
-    def _try_convert_to_int_index(cls, data, copy, name):
+    def _try_convert_to_int_index(cls, data, copy, name, dtype):
         """
         Attempt to convert an array of data into an integer index.
 
@@ -685,12 +685,13 @@ def _try_convert_to_int_index(cls, data, copy, name):
         """
 
         from .numeric import Int64Index, UInt64Index
-        try:
-            res = data.astype('i8', copy=False)
-            if (res == data).all():
-                return Int64Index(res, copy=copy, name=name)
-        except (OverflowError, TypeError, ValueError):
-            pass
+        if not is_unsigned_integer_dtype(dtype):
+            try:
+                res = data.astype('i8', copy=False)
+                if (res == data).all():
+                    return Int64Index(res, copy=copy, name=name)
+            except (OverflowError, TypeError, ValueError):
+                pass
 
         # Conversion to int64 failed (possibly due to
         # overflow), so let's try now with uint64.

diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
@@ -216,7 +216,6 @@ class UInt64Index(NumericIndex):
     _inner_indexer = libjoin.inner_join_indexer_uint64
     _outer_indexer = libjoin.outer_join_indexer_uint64
     _can_hold_na = False
-    _na_value = 0
     _engine_type = libindex.UInt64Engine
     _default_dtype = np.uint64
 

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -14,7 +14,7 @@
 import numpy as np
 
 from pandas import (period_range, date_range, Series,
-                    DataFrame, Float64Index, Int64Index,
+                    DataFrame, Float64Index, Int64Index, UInt64Index,
                     CategoricalIndex, DatetimeIndex, TimedeltaIndex,
                     PeriodIndex, isna)
 from pandas.core.index import _get_combined_index, _ensure_index_from_sequences
@@ -201,6 +201,25 @@ def __array__(self, dtype=None):
             result = pd.Index(ArrayLike(array))
             tm.assert_index_equal(result, expected)
 
+    def test_constructor_int_dtype_float(self):
+        # GH 18400
+        data = [0., 1., 2., 3.]
+
+        expected = Int64Index([0, 1, 2, 3])
+        result = Index(data, dtype='int64')
+        tm.assert_index_equal(result, expected)
+
+        expected = UInt64Index([0, 1, 2, 3])
+        result = Index(data, dtype='uint64')
+        tm.assert_index_equal(result, expected)
+
+        # fall back to Float64Index
+        data = [0.0, 1.1, 2.2, 3.3]
+        expected = Float64Index(data)
+        for dtype in ('int64', 'uint64'):
+            result = Index(data, dtype=dtype)
+            tm.assert_index_equal(result, expected)
+
     def test_constructor_int_dtype_nan(self):
         # see gh-15187
         data = [np.nan]

diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
@@ -658,6 +658,30 @@ def test_ufunc_coercions(self):
         exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x')
         tm.assert_index_equal(result, exp)
 
+    def test_where(self):
+        i = self.create_index()
+        result = i.where(notna(i))
+        expected = i
+        tm.assert_index_equal(result, expected)
+
+        _nan = i._na_value
+        cond = [False] + [True] * len(i[1:])
+        expected = Float64Index([_nan] + i[1:].tolist())
+        result = i.where(cond)
+        tm.assert_index_equal(result, expected)
+
+    def test_where_array_like(self):
+        i = self.create_index()
+
+        _nan = i._na_value
+        cond = [False] + [True] * (len(i) - 1)
+        klasses = [list, tuple, np.array, pd.Series]
+        expected = Float64Index([_nan] + i[1:].tolist())
+
+        for klass in klasses:
+            result = i.where(klass(cond))
+            tm.assert_index_equal(result, expected)
+
 
 class TestInt64Index(NumericInt):
     _dtype = 'int64'
@@ -726,31 +750,6 @@ def test_coerce_list(self):
         arr = Index([1, 2, 3, 4], dtype=object)
         assert isinstance(arr, Index)
 
-    def test_where(self):
-        i = self.create_index()
-        result = i.where(notna(i))
-        expected = i
-        tm.assert_index_equal(result, expected)
-
-        _nan = i._na_value
-        cond = [False] + [True] * len(i[1:])
-        expected = pd.Index([_nan] + i[1:].tolist())
-
-        result = i.where(cond)
-        tm.assert_index_equal(result, expected)
-
-    def test_where_array_like(self):
-        i = self.create_index()
-
-        _nan = i._na_value
-        cond = [False] + [True] * (len(i) - 1)
-        klasses = [list, tuple, np.array, pd.Series]
-        expected = pd.Index([_nan] + i[1:].tolist())
-
-        for klass in klasses:
-            result = i.where(klass(cond))
-            tm.assert_index_equal(result, expected)
-
     def test_get_indexer(self):
         target = Int64Index(np.arange(10))
         indexer = self.index.get_indexer(target)
-Original file line number
+Diff line change
@@ Expand Up / @@ -108,7 +108,7 @@ Bug Fixes @@
     Conversion
     ^^^^^^^^^^
-    -
+    - Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
     -
     -
@@ Expand Down @@