From aae7097952d13088d0bc3625ea93a7d0fb967954 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 5 Dec 2019 21:42:41 -0800 Subject: [PATCH 1/3] DEPR: remove box usages --- pandas/core/tools/datetimes.py | 110 ++++++------------- pandas/tests/indexes/datetimes/test_tools.py | 36 ------ 2 files changed, 31 insertions(+), 115 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 3dfafd04dff0a..9b0f898bffe7d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -41,16 +41,11 @@ from pandas.core import algorithms from pandas.core.algorithms import unique -# --------------------------------------------------------------------- -# types used in annotations - -ArrayConvertible = Union[list, tuple, ArrayLike, ABCSeries] - -# --------------------------------------------------------------------- # --------------------------------------------------------------------- # types used in annotations +ArrayConvertible = Union[list, tuple, ArrayLike, ABCSeries] Scalar = Union[int, float, str] DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime) DatetimeScalarOrArrayConvertible = Union[ @@ -154,22 +149,22 @@ def _maybe_cache(arg, format, cache, convert_listlike): unique_dates = unique(arg) if len(unique_dates) < len(arg): - cache_dates = convert_listlike(unique_dates, True, format) + cache_dates = convert_listlike(unique_dates, format) cache_array = Series(cache_dates, index=unique_dates) return cache_array -def _box_as_indexlike( +def _wrap_as_indexlike( dt_array: ArrayLike, utc: Optional[bool] = None, name: Optional[str] = None ) -> Union[ABCIndex, ABCDatetimeIndex]: """ - Properly boxes the ndarray of datetimes to DatetimeIndex + Properly wraps the ndarray of datetimes to DatetimeIndex if it is possible or to generic Index instead Parameters ---------- dt_array: 1-d array - array of datetimes to be boxed + Array of datetimes to be wrapped in an Index. tz : object None or 'utc' name : string, default None @@ -189,40 +184,33 @@ def _box_as_indexlike( return Index(dt_array, name=name) -def _convert_and_box_cache( +def _convert_and_cache( arg: DatetimeScalarOrArrayConvertible, cache_array: ABCSeries, - box: bool, name: Optional[str] = None, -) -> Union[ABCIndex, np.ndarray]: +) -> ABCIndexClass: """ - Convert array of dates with a cache and box the result + Convert array of dates with a cache and wrap the result in an Index. Parameters ---------- arg : integer, float, string, datetime, list, tuple, 1-d array, Series cache_array : Series Cache of converted, unique dates - box : boolean - True boxes result as an Index-like, False returns an ndarray name : string, default None Name for a DatetimeIndex Returns ------- - result : datetime of converted dates - - Index-like if box=True - - ndarray if box=False + result : Index-like of converted dates """ from pandas import Series result = Series(arg).map(cache_array) - if box: - return _box_as_indexlike(result, utc=None, name=name) - return result.values + return _wrap_as_indexlike(result, utc=None, name=name) -def _return_parsed_timezone_results(result, timezones, box, tz, name): +def _return_parsed_timezone_results(result, timezones, tz, name): """ Return results from array_strptime if a %z or %Z directive was passed. @@ -232,8 +220,6 @@ def _return_parsed_timezone_results(result, timezones, box, tz, name): int64 date representations of the dates timezones : ndarray pytz timezone objects - box : boolean - True boxes result as an Index-like, False returns an ndarray tz : object None or pytz timezone object name : string, default None @@ -241,11 +227,7 @@ def _return_parsed_timezone_results(result, timezones, box, tz, name): Returns ------- - tz_result : ndarray of parsed dates with timezone - Returns: - - - Index-like if box=True - - ndarray of Timestamps if box=False + tz_result : Index-like of parsed dates with timezone """ if tz is not None: raise ValueError( @@ -256,16 +238,13 @@ def _return_parsed_timezone_results(result, timezones, box, tz, name): tz_results = np.array( [Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)] ) - if box: - from pandas import Index + from pandas import Index - return Index(tz_results, name=name) - return tz_results + return Index(tz_results, name=name) def _convert_listlike_datetimes( arg, - box, format, name=None, tz=None, @@ -305,11 +284,7 @@ def _convert_listlike_datetimes( Returns ------- - ndarray of parsed dates - Returns: - - - Index-like if box=True - - ndarray of Timestamps if box=False + Index-like of parsed dates """ from pandas import DatetimeIndex from pandas.core.arrays import DatetimeArray @@ -330,7 +305,7 @@ def _convert_listlike_datetimes( return arg elif is_datetime64_ns_dtype(arg): - if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)): + if not isinstance(arg, (DatetimeArray, DatetimeIndex)): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: @@ -346,7 +321,7 @@ def _convert_listlike_datetimes( raise ValueError("cannot specify both format and unit") arg = getattr(arg, "values", arg) result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) - if box: + if True: if errors == "ignore": from pandas import Index @@ -416,7 +391,7 @@ def _convert_listlike_datetimes( ) if "%Z" in format or "%z" in format: return _return_parsed_timezone_results( - result, timezones, box, tz, name + result, timezones, tz, name ) except tslibs.OutOfBoundsDatetime: if errors == "raise": @@ -463,20 +438,12 @@ def _convert_listlike_datetimes( ) if tz_parsed is not None: - if box: - # We can take a shortcut since the datetime64 numpy array - # is in UTC - return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) - else: - # Convert the datetime64 numpy array to an numpy array - # of datetime objects - result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result] - return np.array(result, dtype=object) + # We can take a shortcut since the datetime64 numpy array + # is in UTC + return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) - if box: - utc = tz == "utc" - return _box_as_indexlike(result, utc=utc, name=name) - return result + utc = tz == "utc" + return _wrap_as_indexlike(result, utc=utc, name=name) def _adjust_to_origin(arg, origin, unit): @@ -603,15 +570,6 @@ def to_datetime( utc : bool, default None Return UTC DatetimeIndex if True (converting any tz-aware datetime.datetime objects as well). - box : bool, default True - - If True returns a DatetimeIndex or Index-like object - - If False returns ndarray of values. - - .. deprecated:: 0.25.0 - Use :meth:`Series.to_numpy` or :meth:`Timestamp.to_datetime64` - instead to get an ndarray of values or numpy.datetime64, - respectively. - format : str, default None The strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse all the way up to nanoseconds. @@ -764,25 +722,25 @@ def to_datetime( if not cache_array.empty: result = arg.map(cache_array) else: - values = convert_listlike(arg._values, True, format) + values = convert_listlike(arg._values, format) result = arg._constructor(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)): - result = _assemble_from_unit_mappings(arg, errors, box, tz) + result = _assemble_from_unit_mappings(arg, errors, tz) elif isinstance(arg, ABCIndexClass): cache_array = _maybe_cache(arg, format, cache, convert_listlike) if not cache_array.empty: - result = _convert_and_box_cache(arg, cache_array, box, name=arg.name) + result = _convert_and_cache(arg, cache_array, name=arg.name) else: convert_listlike = partial(convert_listlike, name=arg.name) - result = convert_listlike(arg, box, format) + result = convert_listlike(arg, format) elif is_list_like(arg): cache_array = _maybe_cache(arg, format, cache, convert_listlike) if not cache_array.empty: - result = _convert_and_box_cache(arg, cache_array, box) + result = _convert_and_cache(arg, cache_array) else: - result = convert_listlike(arg, box, format) + result = convert_listlike(arg, format) else: - result = convert_listlike(np.array([arg]), box, format)[0] + result = convert_listlike(np.array([arg]), format)[0] return result @@ -813,7 +771,7 @@ def to_datetime( } -def _assemble_from_unit_mappings(arg, errors, box, tz): +def _assemble_from_unit_mappings(arg, errors, tz): """ assemble the unit specified fields from the arg (DataFrame) Return a Series for actual parsing @@ -826,10 +784,6 @@ def _assemble_from_unit_mappings(arg, errors, box, tz): - If 'raise', then invalid parsing will raise an exception - If 'coerce', then invalid parsing will be set as NaT - If 'ignore', then invalid parsing will return the input - box : boolean - - - If True, return a DatetimeIndex - - If False, return an array tz : None or 'utc' Returns @@ -904,8 +858,6 @@ def coerce(values): "cannot assemble the datetimes [{value}]: " "{error}".format(value=value, error=e) ) - if not box: - return values.values return values diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 4e5d624eba844..ded559f16ad5d 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -921,22 +921,6 @@ def test_iso_8601_strings_with_same_offset(self): result = DatetimeIndex([ts_str] * 2) tm.assert_index_equal(result, expected) - def test_iso_8601_strings_same_offset_no_box(self): - # GH 22446 - data = ["2018-01-04 09:01:00+09:00", "2018-01-04 09:02:00+09:00"] - - with tm.assert_produces_warning(FutureWarning): - result = pd.to_datetime(data, box=False) - - expected = np.array( - [ - datetime(2018, 1, 4, 9, 1, tzinfo=pytz.FixedOffset(540)), - datetime(2018, 1, 4, 9, 2, tzinfo=pytz.FixedOffset(540)), - ], - dtype=object, - ) - tm.assert_numpy_array_equal(result, expected) - def test_iso_8601_strings_with_different_offsets(self): # GH 17697, 11736 ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT] @@ -1024,16 +1008,6 @@ def test_timestamp_utc_true(self, ts, expected): result = to_datetime(ts, utc=True) assert result == expected - def test_to_datetime_box_deprecated(self): - expected = np.datetime64("2018-09-09") - - # Deprecated - see GH24416 - with tm.assert_produces_warning(FutureWarning): - pd.to_datetime(expected, box=False) - - result = pd.to_datetime(expected).to_datetime64() - assert result == expected - @pytest.mark.parametrize("dt_str", ["00010101", "13000101", "30000101", "99990101"]) def test_to_datetime_with_format_out_of_bounds(self, dt_str): # GH 9107 @@ -1345,16 +1319,6 @@ def test_dataframe_dtypes(self, cache): with pytest.raises(ValueError): to_datetime(df, cache=cache) - def test_dataframe_box_false(self): - # GH 23760 - df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) - - with tm.assert_produces_warning(FutureWarning): - result = pd.to_datetime(df, box=False) - - expected = np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[ns]") - tm.assert_numpy_array_equal(result, expected) - def test_dataframe_utc_true(self): # GH 23760 df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) From ef222e9d07b5b6208c6d097e2086d7f3ee2c827b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 6 Dec 2019 08:11:03 -0800 Subject: [PATCH 2/3] remove box arg, revert renames --- pandas/core/tools/datetimes.py | 55 +++++++++++++++------------------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 9b0f898bffe7d..e9e5959454807 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -14,7 +14,6 @@ parse_time_string, ) from pandas._libs.tslibs.strptime import array_strptime -from pandas.util._decorators import deprecate_kwarg from pandas.core.dtypes.common import ( ensure_object, @@ -41,7 +40,6 @@ from pandas.core import algorithms from pandas.core.algorithms import unique - # --------------------------------------------------------------------- # types used in annotations @@ -154,11 +152,11 @@ def _maybe_cache(arg, format, cache, convert_listlike): return cache_array -def _wrap_as_indexlike( +def _box_as_indexlike( dt_array: ArrayLike, utc: Optional[bool] = None, name: Optional[str] = None ) -> Union[ABCIndex, ABCDatetimeIndex]: """ - Properly wraps the ndarray of datetimes to DatetimeIndex + Properly boxes the ndarray of datetimes to DatetimeIndex if it is possible or to generic Index instead Parameters @@ -184,7 +182,7 @@ def _wrap_as_indexlike( return Index(dt_array, name=name) -def _convert_and_cache( +def _convert_and_box_cache( arg: DatetimeScalarOrArrayConvertible, cache_array: ABCSeries, name: Optional[str] = None, @@ -207,7 +205,7 @@ def _convert_and_cache( from pandas import Series result = Series(arg).map(cache_array) - return _wrap_as_indexlike(result, utc=None, name=name) + return _box_as_indexlike(result, utc=None, name=name) def _return_parsed_timezone_results(result, timezones, tz, name): @@ -263,8 +261,6 @@ def _convert_listlike_datetimes( ---------- arg : list, tuple, ndarray, Series, Index date to be parced - box : boolean - True boxes result as an Index-like, False returns an ndarray name : object None or string for the Index name tz : object @@ -321,26 +317,25 @@ def _convert_listlike_datetimes( raise ValueError("cannot specify both format and unit") arg = getattr(arg, "values", arg) result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) - if True: - if errors == "ignore": - from pandas import Index + if errors == "ignore": + from pandas import Index - result = Index(result, name=name) + result = Index(result, name=name) + else: + result = DatetimeIndex(result, name=name) + # GH 23758: We may still need to localize the result with tz + # GH 25546: Apply tz_parsed first (from arg), then tz (from caller) + # result will be naive but in UTC + try: + result = result.tz_localize("UTC").tz_convert(tz_parsed) + except AttributeError: + # Regular Index from 'ignore' path + return result + if tz is not None: + if result.tz is None: + result = result.tz_localize(tz) else: - result = DatetimeIndex(result, name=name) - # GH 23758: We may still need to localize the result with tz - # GH 25546: Apply tz_parsed first (from arg), then tz (from caller) - # result will be naive but in UTC - try: - result = result.tz_localize("UTC").tz_convert(tz_parsed) - except AttributeError: - # Regular Index from 'ignore' path - return result - if tz is not None: - if result.tz is None: - result = result.tz_localize(tz) - else: - result = result.tz_convert(tz) + result = result.tz_convert(tz) return result elif getattr(arg, "ndim", 1) > 1: raise TypeError( @@ -443,7 +438,7 @@ def _convert_listlike_datetimes( return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) utc = tz == "utc" - return _wrap_as_indexlike(result, utc=utc, name=name) + return _box_as_indexlike(result, utc=utc, name=name) def _adjust_to_origin(arg, origin, unit): @@ -525,14 +520,12 @@ def _adjust_to_origin(arg, origin, unit): return arg -@deprecate_kwarg(old_arg_name="box", new_arg_name=None) def to_datetime( arg, errors="raise", dayfirst=False, yearfirst=False, utc=None, - box=True, format=None, exact=True, unit=None, @@ -729,14 +722,14 @@ def to_datetime( elif isinstance(arg, ABCIndexClass): cache_array = _maybe_cache(arg, format, cache, convert_listlike) if not cache_array.empty: - result = _convert_and_cache(arg, cache_array, name=arg.name) + result = _convert_and_box_cache(arg, cache_array, name=arg.name) else: convert_listlike = partial(convert_listlike, name=arg.name) result = convert_listlike(arg, format) elif is_list_like(arg): cache_array = _maybe_cache(arg, format, cache, convert_listlike) if not cache_array.empty: - result = _convert_and_cache(arg, cache_array) + result = _convert_and_box_cache(arg, cache_array) else: result = convert_listlike(arg, format) else: From 9051e2adee17b6f576833e7862adff5c6f42de3b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 6 Dec 2019 08:13:34 -0800 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a646c4aa03687..29139a0a14991 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -538,6 +538,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed the previously deprecated :meth:`Series.compound` and :meth:`DataFrame.compound` (:issue:`26405`) - Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`) - Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`) +- :func:`to_datetime` no longer accepts "box" argument, always returns :class:`DatetimeIndex` or :class:`Index`, :class:`Series`, or :class:`DataFrame` (:issue:`24486`) - Removed the previously deprecated ``time_rule`` keyword from (non-public) :func:`offsets.generate_range`, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`) - :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`) - :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`)