From f4e1127ee8f5c30e9197af48072d73befb7d3b60 Mon Sep 17 00:00:00 2001 From: Kendall Masse Date: Tue, 12 Mar 2019 16:45:23 -0400 Subject: [PATCH] BUG: Fix error in replace with strings that are large numbers (#25616) (#25644) (cherry picked from commit 12fd316de829b994d6e3d1fc14c59d8e8bf34500) --- doc/source/whatsnew/v0.24.2.rst | 2 ++ pandas/core/internals/blocks.py | 4 ++-- pandas/tests/series/test_replace.py | 14 ++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index c07959c758780..5b5c9c78d10da 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -33,6 +33,7 @@ Fixed Regressions - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) - Fixed regression in :func:`to_timedelta` losing precision when converting floating data to ``Timedelta`` data (:issue:`25077`). - Fixed pip installing from source into an environment without NumPy (:issue:`25193`) +- Fixed regression in :meth:`DataFrame.replace` where large strings of numbers would be coerced into ``int64``, causing an ``OverflowError`` (:issue:`25616`) - Fixed regression in :func:`factorize` when passing a custom ``na_sentinel`` value with ``sort=True`` (:issue:`25409`). - Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`) @@ -90,6 +91,7 @@ A total of 25 people contributed patches to this release. People with a "+" by t * Joris Van den Bossche * Josh * Justin Zheng +* Kendall Masse * Matthew Roeschke * Max Bolingbroke + * rbenes + diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index df764aa4ba666..7fe033350fe5b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1092,7 +1092,7 @@ def coerce_to_target_dtype(self, other): try: return self.astype(dtype) - except (ValueError, TypeError): + except (ValueError, TypeError, OverflowError): pass return self.astype(object) @@ -3272,7 +3272,7 @@ def _putmask_smart(v, m, n): nv = v.copy() nv[m] = nn_at return nv - except (ValueError, IndexError, TypeError): + except (ValueError, IndexError, TypeError, OverflowError): pass n = np.asarray(n) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 40b28047080da..2e7b746f6c9f2 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -280,3 +280,17 @@ def test_replace_mixed_types_with_string(self): result = s.replace([2, '4'], np.nan) expected = pd.Series([1, np.nan, 3, np.nan, 4, 5]) tm.assert_series_equal(expected, result) + + def test_replace_with_no_overflowerror(self): + # GH 25616 + # casts to object without Exception from OverflowError + s = pd.Series([0, 1, 2, 3, 4]) + result = s.replace([3], ['100000000000000000000']) + expected = pd.Series([0, 1, 2, '100000000000000000000', 4]) + tm.assert_series_equal(result, expected) + + s = pd.Series([0, '100000000000000000000', + '100000000000000000001']) + result = s.replace(['100000000000000000000'], [1]) + expected = pd.Series([0, 1, '100000000000000000001']) + tm.assert_series_equal(result, expected)