Skip to content

Commit

Permalink
API/BUG: Raise when int-dtype coercions fail
Browse files Browse the repository at this point in the history
* Related to the Index and Series constructors.

Closes pandas-devgh-15832.

* Add integer dtype fixtures to conftest.py

Can used for subsequent refactoring.
  • Loading branch information
gfyoung committed Jun 15, 2018
1 parent bf1c3dc commit b85efa7
Show file tree
Hide file tree
Showing 10 changed files with 194 additions and 21 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ Other API Changes
^^^^^^^^^^^^^^^^^

- :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`)
- ``Series`` and ``Index`` constructors now raise when the data is incompatible with a passed ``dtype=`` (:issue:`15832`)
-
-

Expand Down
63 changes: 63 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,66 @@ def string_dtype(request):
* 'U'
"""
return request.param


@pytest.fixture(params=["float32", "float64"])
def float_dtype(request):
"""
Parameterized fixture for float dtypes.
* float32
* float64
"""

return request.param


UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
SIGNED_INT_DTYPES = ["int8", "int16", "int32", "int64"]
ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES


@pytest.fixture(params=SIGNED_INT_DTYPES)
def sint_dtype(request):
"""
Parameterized fixture for signed integer dtypes.
* int8
* int16
* int32
* int64
"""

return request.param


@pytest.fixture(params=UNSIGNED_INT_DTYPES)
def uint_dtype(request):
"""
Parameterized fixture for unsigned integer dtypes.
* uint8
* uint16
* uint32
* uint64
"""

return request.param


@pytest.fixture(params=ALL_INT_DTYPES)
def any_int_dtype(request):
"""
Parameterized fixture for any integer dtypes.
* int8
* uint8
* int16
* uint16
* int32
* uint32
* int64
* uint64
"""

return request.param
62 changes: 62 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
is_dtype_equal,
is_float_dtype, is_complex_dtype,
is_integer_dtype,
is_unsigned_integer_dtype,
is_datetime_or_timedelta_dtype,
is_bool_dtype, is_scalar,
is_string_dtype, _string_dtypes,
Expand Down Expand Up @@ -1269,3 +1270,64 @@ def construct_1d_ndarray_preserving_na(values, dtype=None, copy=False):
subarr = subarr2

return subarr


def maybe_cast_to_integer_array(arr, dtype, copy=False):
"""
Takes any dtype and returns the casted version, raising for when data is
incompatible with integer/unsigned integer dtypes.
.. versionadded:: 0.24.0
Parameters
----------
arr : ndarray
The array to cast.
dtype : str, np.dtype
The integer dtype to cast the array to.
copy: boolean, default False
Whether to make a copy of the array before returning.
Returns
-------
int_arr : ndarray
An array of integer or unsigned integer dtype
Raises
------
OverflowError : the dtype is incompatible with the data
ValueError : loss of precision has occurred during casting
Examples
--------
If you try to coerce negative values to unsigned integers, it raises:
>>> Series([-1], dtype="uint64")
Traceback (most recent call last):
...
OverflowError: Trying to coerce negative values to unsigned integers
Also, if you try to coerce float values to integers, it raises:
>>> Series([1, 2, 3.5], dtype="int64")
Traceback (most recent call last):
...
ValueError: Trying to coerce float values to integers
"""

try:
casted = arr.astype(dtype, copy=copy)
except OverflowError:
raise OverflowError("The elements provided in the data cannot all be "
"casted to the dtype {dtype}".format(dtype=dtype))

if np.array_equal(arr, casted):
return casted

if is_unsigned_integer_dtype(dtype) and (arr < 0).any():
raise OverflowError("Trying to coerce negative values "
"to unsigned integers")

if is_integer_dtype(dtype) and (is_float_dtype(arr) or
is_object_dtype(arr)):
raise ValueError("Trying to coerce float values to integers")
17 changes: 8 additions & 9 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
ABCPeriodIndex, ABCTimedeltaIndex,
ABCDateOffset)
from pandas.core.dtypes.missing import isna, array_equivalent
from pandas.core.dtypes.cast import maybe_cast_to_integer_array
from pandas.core.dtypes.common import (
_ensure_int64,
_ensure_object,
Expand Down Expand Up @@ -311,19 +312,16 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
if is_integer_dtype(dtype):
inferred = lib.infer_dtype(data)
if inferred == 'integer':
try:
data = np.array(data, copy=copy, dtype=dtype)
except OverflowError:
# gh-15823: a more user-friendly error message
raise OverflowError(
"the elements provided in the data cannot "
"all be casted to the dtype {dtype}"
.format(dtype=dtype))
data = maybe_cast_to_integer_array(data, dtype,
copy=copy)
elif inferred in ['floating', 'mixed-integer-float']:
if isna(data).any():
raise ValueError('cannot convert float '
'NaN to integer')

if inferred == "mixed-integer-float":
maybe_cast_to_integer_array(data, dtype)

# If we are actually all equal to integers,
# then coerce to integer.
try:
Expand Down Expand Up @@ -352,7 +350,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,

except (TypeError, ValueError) as e:
msg = str(e)
if 'cannot convert float' in msg:
if ("cannot convert float" in msg or
"Trying to coerce float values to integer" in msg):
raise

# maybe coerce to a sub-class
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@
maybe_cast_to_datetime, maybe_castable,
construct_1d_arraylike_from_scalar,
construct_1d_ndarray_preserving_na,
construct_1d_object_array_from_listlike)
construct_1d_object_array_from_listlike,
maybe_cast_to_integer_array)
from pandas.core.dtypes.missing import (
isna,
notna,
Expand Down Expand Up @@ -4067,6 +4068,9 @@ def _try_cast(arr, take_fast_path):
return arr

try:
if is_float_dtype(dtype) or is_integer_dtype(dtype):
subarr = maybe_cast_to_integer_array(np.asarray(arr), dtype)

subarr = maybe_cast_to_datetime(arr, dtype)
# Take care in creating object arrays (but iterators are not
# supported):
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/generic/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,11 @@ def test_downcast(self):
self._compare(result, expected)

def test_constructor_compound_dtypes(self):
# GH 5191
# compound dtypes should raise not-implementederror
# see gh-5191
# Compound dtypes should raise NotImplementedError.

def f(dtype):
return self._construct(shape=3, dtype=dtype)
return self._construct(shape=3, value=1, dtype=dtype)

pytest.raises(NotImplementedError, f, [("A", "datetime64[h]"),
("B", "str"),
Expand Down Expand Up @@ -534,14 +534,14 @@ def test_truncate_out_of_bounds(self):

# small
shape = [int(2e3)] + ([1] * (self._ndim - 1))
small = self._construct(shape, dtype='int8')
small = self._construct(shape, dtype='int8', value=1)
self._compare(small.truncate(), small)
self._compare(small.truncate(before=0, after=3e3), small)
self._compare(small.truncate(before=-1, after=2e3), small)

# big
shape = [int(2e6)] + ([1] * (self._ndim - 1))
big = self._construct(shape, dtype='int8')
big = self._construct(shape, dtype='int8', value=1)
self._compare(big.truncate(), big)
self._compare(big.truncate(before=0, after=3e6), big)
self._compare(big.truncate(before=-1, after=2e6), big)
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,11 +483,17 @@ def test_constructor_nonhashable_name(self, indices):

def test_constructor_overflow_int64(self):
# see gh-15832
msg = ("the elements provided in the data cannot "
msg = ("The elements provided in the data cannot "
"all be casted to the dtype int64")
with tm.assert_raises_regex(OverflowError, msg):
Index([np.iinfo(np.uint64).max - 1], dtype="int64")

@pytest.mark.xfail("see gh-21311: Index doesn't enforce dtype argument")
def test_constructor_cast(self):
msg = "could not convert string to float"
with tm.assert_raises_regex(ValueError, msg):
Index(["a", "b", "c"], dtype=float)

def test_view_with_args(self):

restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex',
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,18 @@ def test_astype(self):
i = Float64Index([0, 1.1, np.NAN])
pytest.raises(ValueError, lambda: i.astype(dtype))

def test_type_coercion_fail(self, any_int_dtype):
# see gh-15832
msg = "Trying to coerce float values to integers"
with tm.assert_raises_regex(ValueError, msg):
Index([1, 2, 3.5], dtype=any_int_dtype)

def test_type_coercion_valid(self, float_dtype):
# There is no Float32Index, so we always
# generate Float64Index.
i = Index([1, 2, 3.5], dtype=float_dtype)
tm.assert_index_equal(i, Index([1, 2, 3.5]))

def test_equals_numeric(self):

i = Float64Index([1.0, 2.0])
Expand Down Expand Up @@ -862,6 +874,14 @@ def test_constructor_corner(self):
with tm.assert_raises_regex(TypeError, 'casting'):
Int64Index(arr_with_floats)

def test_constructor_coercion_signed_to_unsigned(self, uint_dtype):

# see gh-15832
msg = "Trying to coerce negative values to unsigned integers"

with tm.assert_raises_regex(OverflowError, msg):
Index([-1], dtype=uint_dtype)

def test_coerce_list(self):
# coerce things
arr = Index([1, 2, 3, 4])
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2047,7 +2047,7 @@ def test_table_values_dtypes_roundtrip(self):
assert df1.dtypes[0] == 'float32'

# check with mixed dtypes
df1 = DataFrame(dict((c, Series(np.random.randn(5), dtype=c))
df1 = DataFrame(dict((c, Series(np.random.randint(5), dtype=c))
for c in ['float32', 'float64', 'int32',
'int64', 'int16', 'int8']))
df1['string'] = 'foo'
Expand Down
26 changes: 22 additions & 4 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,12 +542,30 @@ def test_constructor_pass_nan_nat(self):
tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp)

def test_constructor_cast(self):
pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float)
msg = "could not convert string to float"
with tm.assert_raises_regex(ValueError, msg):
Series(["a", "b", "c"], dtype=float)

def test_constructor_unsigned_dtype_overflow(self, uint_dtype):
# see gh-15832
msg = 'Trying to coerce negative values to unsigned integers'
with tm.assert_raises_regex(OverflowError, msg):
Series([-1], dtype=uint_dtype)

def test_constructor_coerce_float_fail(self, any_int_dtype):
# see gh-15832
msg = "Trying to coerce float values to integers"
with tm.assert_raises_regex(ValueError, msg):
Series([1, 2, 3.5], dtype=any_int_dtype)

def test_constructor_coerce_float_valid(self, float_dtype):
s = Series([1, 2, 3.5], dtype=float_dtype)
expected = Series([1, 2, 3.5]).astype(float_dtype)
assert_series_equal(s, expected)

def test_constructor_dtype_nocast(self):
# 1572
def test_constructor_dtype_no_cast(self):
# see gh-1572
s = Series([1, 2, 3])

s2 = Series(s, dtype=np.int64)

s2[1] = 5
Expand Down

0 comments on commit b85efa7

Please sign in to comment.