Skip to content

Commit

Permalink
API/BUG: Raise when int-dtype coercions fail
Browse files Browse the repository at this point in the history
Related to the Index and Series constructors.

Closes pandas-devgh-15832.
  • Loading branch information
gfyoung committed Jun 13, 2018
1 parent ab668b0 commit 0387ae1
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 20 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Datetimelike API Changes
Other API Changes
^^^^^^^^^^^^^^^^^

- Series and Index constructors now raise when the data is incompatible with the specified dtype (:issue:`15832`)
-
-
-
Expand Down
62 changes: 62 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
is_dtype_equal,
is_float_dtype, is_complex_dtype,
is_integer_dtype,
is_unsigned_integer_dtype,
is_datetime_or_timedelta_dtype,
is_bool_dtype, is_scalar,
is_string_dtype, _string_dtypes,
Expand Down Expand Up @@ -1269,3 +1270,64 @@ def construct_1d_ndarray_preserving_na(values, dtype=None, copy=False):
subarr = subarr2

return subarr


def maybe_cast_to_integer_array(arr, dtype, copy=False):
"""
Takes any dtype and returns the casted version, raising for when data is
incompatible with integer/unsigned integer dtypes.
.. versionadded:: 0.24.0
Parameters
----------
arr : ndarray
The array to cast.
dtype : str, np.dtype
The integer dtype to cast the array to.
copy: boolean, default False
Whether to make a copy of the array before returning.
Returns
-------
int_arr : ndarray
An array of integer or unsigned integer dtype
Raises
------
OverflowError : the dtype is incompatible with the data
ValueError : loss of precision has occurred during casting
Examples
--------
If you try to coerce negative values to unsigned integers, it raises:
>>> Series([-1], dtype="uint64")
Traceback (most recent call last):
...
OverflowError: Trying to coerce negative values to unsigned integers
Also, if you try to coerce float values to integers, it raises:
>>> Series([1, 2, 3.5], dtype="int64")
Traceback (most recent call last):
...
ValueError: Trying to coerce float values to integers
"""

try:
casted = arr.astype(dtype, copy=copy)
except OverflowError:
raise OverflowError("The elements provided in the data cannot all be "
"casted to the dtype {dtype}".format(dtype=dtype))

if np.array_equal(arr, casted):
return casted

if is_unsigned_integer_dtype(dtype) and (arr < 0).any():
raise OverflowError("Trying to coerce negative values "
"to unsigned integers")

if is_integer_dtype(dtype) and (is_float_dtype(arr) or
is_object_dtype(arr)):
raise ValueError("Trying to coerce float values to integers")
17 changes: 8 additions & 9 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
ABCPeriodIndex, ABCTimedeltaIndex,
ABCDateOffset)
from pandas.core.dtypes.missing import isna, array_equivalent
from pandas.core.dtypes.cast import maybe_cast_to_integer_array
from pandas.core.dtypes.common import (
_ensure_int64,
_ensure_object,
Expand Down Expand Up @@ -309,19 +310,16 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
if is_integer_dtype(dtype):
inferred = lib.infer_dtype(data)
if inferred == 'integer':
try:
data = np.array(data, copy=copy, dtype=dtype)
except OverflowError:
# gh-15823: a more user-friendly error message
raise OverflowError(
"the elements provided in the data cannot "
"all be casted to the dtype {dtype}"
.format(dtype=dtype))
data = maybe_cast_to_integer_array(data, dtype,
copy=copy)
elif inferred in ['floating', 'mixed-integer-float']:
if isna(data).any():
raise ValueError('cannot convert float '
'NaN to integer')

if inferred == "mixed-integer-float":
maybe_cast_to_integer_array(data, dtype)

# If we are actually all equal to integers,
# then coerce to integer.
try:
Expand Down Expand Up @@ -350,7 +348,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,

except (TypeError, ValueError) as e:
msg = str(e)
if 'cannot convert float' in msg:
if ("cannot convert float" in msg or
"Trying to coerce float values to integer" in msg):
raise

# maybe coerce to a sub-class
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@
maybe_cast_to_datetime, maybe_castable,
construct_1d_arraylike_from_scalar,
construct_1d_ndarray_preserving_na,
construct_1d_object_array_from_listlike)
construct_1d_object_array_from_listlike,
maybe_cast_to_integer_array)
from pandas.core.dtypes.missing import (
isna,
notna,
Expand Down Expand Up @@ -4067,6 +4068,9 @@ def _try_cast(arr, take_fast_path):
return arr

try:
if is_float_dtype(dtype) or is_integer_dtype(dtype):
subarr = maybe_cast_to_integer_array(np.asarray(arr), dtype)

subarr = maybe_cast_to_datetime(arr, dtype)
# Take care in creating object arrays (but iterators are not
# supported):
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/generic/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,11 @@ def test_downcast(self):
self._compare(result, expected)

def test_constructor_compound_dtypes(self):
# GH 5191
# compound dtypes should raise not-implementederror
# see gh-5191
# Compound dtypes should raise NotImplementedError.

def f(dtype):
return self._construct(shape=3, dtype=dtype)
return self._construct(shape=3, value=1, dtype=dtype)

pytest.raises(NotImplementedError, f, [("A", "datetime64[h]"),
("B", "str"),
Expand Down Expand Up @@ -534,14 +534,14 @@ def test_truncate_out_of_bounds(self):

# small
shape = [int(2e3)] + ([1] * (self._ndim - 1))
small = self._construct(shape, dtype='int8')
small = self._construct(shape, dtype='int8', value=1)
self._compare(small.truncate(), small)
self._compare(small.truncate(before=0, after=3e3), small)
self._compare(small.truncate(before=-1, after=2e3), small)

# big
shape = [int(2e6)] + ([1] * (self._ndim - 1))
big = self._construct(shape, dtype='int8')
big = self._construct(shape, dtype='int8', value=1)
self._compare(big.truncate(), big)
self._compare(big.truncate(before=0, after=3e6), big)
self._compare(big.truncate(before=-1, after=2e6), big)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def test_constructor_nonhashable_name(self, indices):

def test_constructor_overflow_int64(self):
# see gh-15832
msg = ("the elements provided in the data cannot "
msg = ("The elements provided in the data cannot "
"all be casted to the dtype int64")
with tm.assert_raises_regex(OverflowError, msg):
Index([np.iinfo(np.uint64).max - 1], dtype="int64")
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,20 @@ def test_astype(self):
i = Float64Index([0, 1.1, np.NAN])
pytest.raises(ValueError, lambda: i.astype(dtype))

@pytest.mark.parametrize("int_dtype", ["uint8", "uint16", "uint32",
"uint64", "int32", "int64",
"int16", "int8"])
@pytest.mark.parametrize("float_dtype", ["float16", "float32", "float64"])
def test_type_coercion(self, int_dtype, float_dtype):

# see gh-15832
msg = "Trying to coerce float values to integers"
with tm.assert_raises_regex(ValueError, msg):
Index([1, 2, 3.5], dtype=int_dtype)

i = Index([1, 2, 3.5], dtype=float_dtype)
tm.assert_index_equal(i, Index([1, 2, 3.5]))

def test_equals_numeric(self):

i = Float64Index([1.0, 2.0])
Expand Down Expand Up @@ -862,6 +876,16 @@ def test_constructor_corner(self):
with tm.assert_raises_regex(TypeError, 'casting'):
Int64Index(arr_with_floats)

@pytest.mark.parametrize("uint_dtype", ["uint8", "uint16",
"uint32", "uint64"])
def test_constructor_coercion_signed_to_unsigned(self, uint_dtype):

# see gh-15832
msg = "Trying to coerce negative values to unsigned integers"

with tm.assert_raises_regex(OverflowError, msg):
Index([-1], dtype=uint_dtype)

def test_coerce_list(self):
# coerce things
arr = Index([1, 2, 3, 4])
Expand Down
31 changes: 27 additions & 4 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,12 +542,35 @@ def test_constructor_pass_nan_nat(self):
tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp)

def test_constructor_cast(self):
pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float)
msg = "could not convert string to float"
with tm.assert_raises_regex(ValueError, msg):
Series(["a", "b", "c"], dtype=float)

@pytest.mark.parametrize("uint_dtype", ["uint8", "uint16",
"uint32", "uint64"])
def test_constructor_unsigned_dtype_overflow(self, uint_dtype):
# see gh-15832
msg = 'Trying to coerce negative values to unsigned integers'
with tm.assert_raises_regex(OverflowError, msg):
Series([-1], dtype=uint_dtype)

@pytest.mark.parametrize("int_dtype", ["uint8", "uint16", "uint32",
"uint64", "int32", "int64",
"int16", "int8"])
@pytest.mark.parametrize("float_dtype", ["float16", "float32", "float64"])
def test_constructor_coerce_float_fail(self, int_dtype, float_dtype):
# see gh-15832
msg = "Trying to coerce float values to integers"
with tm.assert_raises_regex(ValueError, msg):
Series([1, 2, 3.5], dtype=int_dtype)

s = Series([1, 2, 3.5], dtype=float_dtype)
expected = Series([1, 2, 3.5]).astype(float_dtype)
assert_series_equal(s, expected)

def test_constructor_dtype_nocast(self):
# 1572
def test_constructor_dtype_no_cast(self):
# see gh-1572
s = Series([1, 2, 3])

s2 = Series(s, dtype=np.int64)

s2[1] = 5
Expand Down

0 comments on commit 0387ae1

Please sign in to comment.