Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: box arg in to_datetime #30111

Merged
merged 4 commits into from
Dec 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
- Removed the previously deprecated :meth:`Series.compound` and :meth:`DataFrame.compound` (:issue:`26405`)
- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to ``False`` (:issue:`27600`)
- Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`)
- :func:`to_datetime` no longer accepts "box" argument, always returns :class:`DatetimeIndex` or :class:`Index`, :class:`Series`, or :class:`DataFrame` (:issue:`24486`)
- Removed the previously deprecated ``time_rule`` keyword from (non-public) :func:`offsets.generate_range`, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`)
- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`)
- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`)
Expand Down
141 changes: 43 additions & 98 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
parse_time_string,
)
from pandas._libs.tslibs.strptime import array_strptime
from pandas.util._decorators import deprecate_kwarg

from pandas.core.dtypes.common import (
ensure_object,
Expand Down Expand Up @@ -45,12 +44,6 @@
# types used in annotations

ArrayConvertible = Union[list, tuple, ArrayLike, ABCSeries]

# ---------------------------------------------------------------------

# ---------------------------------------------------------------------
# types used in annotations

Scalar = Union[int, float, str]
DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime)
DatetimeScalarOrArrayConvertible = Union[
Expand Down Expand Up @@ -154,7 +147,7 @@ def _maybe_cache(arg, format, cache, convert_listlike):

unique_dates = unique(arg)
if len(unique_dates) < len(arg):
cache_dates = convert_listlike(unique_dates, True, format)
cache_dates = convert_listlike(unique_dates, format)
cache_array = Series(cache_dates, index=unique_dates)
return cache_array

Expand All @@ -169,7 +162,7 @@ def _box_as_indexlike(
Parameters
----------
dt_array: 1-d array
array of datetimes to be boxed
Array of datetimes to be wrapped in an Index.
tz : object
None or 'utc'
name : string, default None
Expand All @@ -192,37 +185,30 @@ def _box_as_indexlike(
def _convert_and_box_cache(
arg: DatetimeScalarOrArrayConvertible,
cache_array: ABCSeries,
box: bool,
name: Optional[str] = None,
) -> Union[ABCIndex, np.ndarray]:
) -> ABCIndexClass:
"""
Convert array of dates with a cache and box the result
Convert array of dates with a cache and wrap the result in an Index.
Parameters
----------
arg : integer, float, string, datetime, list, tuple, 1-d array, Series
cache_array : Series
Cache of converted, unique dates
box : boolean
True boxes result as an Index-like, False returns an ndarray
name : string, default None
Name for a DatetimeIndex
Returns
-------
result : datetime of converted dates
- Index-like if box=True
- ndarray if box=False
result : Index-like of converted dates
"""
from pandas import Series

result = Series(arg).map(cache_array)
if box:
return _box_as_indexlike(result, utc=None, name=name)
return result.values
return _box_as_indexlike(result, utc=None, name=name)


def _return_parsed_timezone_results(result, timezones, box, tz, name):
def _return_parsed_timezone_results(result, timezones, tz, name):
"""
Return results from array_strptime if a %z or %Z directive was passed.
Expand All @@ -232,20 +218,14 @@ def _return_parsed_timezone_results(result, timezones, box, tz, name):
int64 date representations of the dates
timezones : ndarray
pytz timezone objects
box : boolean
True boxes result as an Index-like, False returns an ndarray
tz : object
None or pytz timezone object
name : string, default None
Name for a DatetimeIndex
Returns
-------
tz_result : ndarray of parsed dates with timezone
Returns:
- Index-like if box=True
- ndarray of Timestamps if box=False
tz_result : Index-like of parsed dates with timezone
"""
if tz is not None:
raise ValueError(
Expand All @@ -256,16 +236,13 @@ def _return_parsed_timezone_results(result, timezones, box, tz, name):
tz_results = np.array(
[Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)]
)
if box:
from pandas import Index
from pandas import Index

return Index(tz_results, name=name)
return tz_results
return Index(tz_results, name=name)


def _convert_listlike_datetimes(
arg,
box,
format,
name=None,
tz=None,
Expand All @@ -284,8 +261,6 @@ def _convert_listlike_datetimes(
----------
arg : list, tuple, ndarray, Series, Index
date to be parced
box : boolean
True boxes result as an Index-like, False returns an ndarray
name : object
None or string for the Index name
tz : object
Expand All @@ -305,11 +280,7 @@ def _convert_listlike_datetimes(
Returns
-------
ndarray of parsed dates
Returns:
- Index-like if box=True
- ndarray of Timestamps if box=False
Index-like of parsed dates
"""
from pandas import DatetimeIndex
from pandas.core.arrays import DatetimeArray
Expand All @@ -330,7 +301,7 @@ def _convert_listlike_datetimes(
return arg

elif is_datetime64_ns_dtype(arg):
if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
try:
return DatetimeIndex(arg, tz=tz, name=name)
except ValueError:
Expand All @@ -346,26 +317,25 @@ def _convert_listlike_datetimes(
raise ValueError("cannot specify both format and unit")
arg = getattr(arg, "values", arg)
result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
if box:
if errors == "ignore":
from pandas import Index
if errors == "ignore":
from pandas import Index

result = Index(result, name=name)
result = Index(result, name=name)
else:
result = DatetimeIndex(result, name=name)
# GH 23758: We may still need to localize the result with tz
# GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
# result will be naive but in UTC
try:
result = result.tz_localize("UTC").tz_convert(tz_parsed)
except AttributeError:
# Regular Index from 'ignore' path
return result
if tz is not None:
if result.tz is None:
result = result.tz_localize(tz)
else:
result = DatetimeIndex(result, name=name)
# GH 23758: We may still need to localize the result with tz
# GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
# result will be naive but in UTC
try:
result = result.tz_localize("UTC").tz_convert(tz_parsed)
except AttributeError:
# Regular Index from 'ignore' path
return result
if tz is not None:
if result.tz is None:
result = result.tz_localize(tz)
else:
result = result.tz_convert(tz)
result = result.tz_convert(tz)
return result
elif getattr(arg, "ndim", 1) > 1:
raise TypeError(
Expand Down Expand Up @@ -416,7 +386,7 @@ def _convert_listlike_datetimes(
)
if "%Z" in format or "%z" in format:
return _return_parsed_timezone_results(
result, timezones, box, tz, name
result, timezones, tz, name
)
except tslibs.OutOfBoundsDatetime:
if errors == "raise":
Expand Down Expand Up @@ -463,20 +433,12 @@ def _convert_listlike_datetimes(
)

if tz_parsed is not None:
if box:
# We can take a shortcut since the datetime64 numpy array
# is in UTC
return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed)
else:
# Convert the datetime64 numpy array to an numpy array
# of datetime objects
result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result]
return np.array(result, dtype=object)
# We can take a shortcut since the datetime64 numpy array
# is in UTC
return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed)

if box:
utc = tz == "utc"
return _box_as_indexlike(result, utc=utc, name=name)
return result
utc = tz == "utc"
return _box_as_indexlike(result, utc=utc, name=name)


def _adjust_to_origin(arg, origin, unit):
Expand Down Expand Up @@ -558,14 +520,12 @@ def _adjust_to_origin(arg, origin, unit):
return arg


@deprecate_kwarg(old_arg_name="box", new_arg_name=None)
def to_datetime(
arg,
errors="raise",
dayfirst=False,
yearfirst=False,
utc=None,
box=True,
format=None,
exact=True,
unit=None,
Expand Down Expand Up @@ -603,15 +563,6 @@ def to_datetime(
utc : bool, default None
Return UTC DatetimeIndex if True (converting any tz-aware
datetime.datetime objects as well).
box : bool, default True
- If True returns a DatetimeIndex or Index-like object
- If False returns ndarray of values.
.. deprecated:: 0.25.0
Use :meth:`Series.to_numpy` or :meth:`Timestamp.to_datetime64`
instead to get an ndarray of values or numpy.datetime64,
respectively.
format : str, default None
The strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
all the way up to nanoseconds.
Expand Down Expand Up @@ -764,25 +715,25 @@ def to_datetime(
if not cache_array.empty:
result = arg.map(cache_array)
else:
values = convert_listlike(arg._values, True, format)
values = convert_listlike(arg._values, format)
result = arg._constructor(values, index=arg.index, name=arg.name)
elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)):
result = _assemble_from_unit_mappings(arg, errors, box, tz)
result = _assemble_from_unit_mappings(arg, errors, tz)
elif isinstance(arg, ABCIndexClass):
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
if not cache_array.empty:
result = _convert_and_box_cache(arg, cache_array, box, name=arg.name)
result = _convert_and_box_cache(arg, cache_array, name=arg.name)
else:
convert_listlike = partial(convert_listlike, name=arg.name)
result = convert_listlike(arg, box, format)
result = convert_listlike(arg, format)
elif is_list_like(arg):
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
if not cache_array.empty:
result = _convert_and_box_cache(arg, cache_array, box)
result = _convert_and_box_cache(arg, cache_array)
else:
result = convert_listlike(arg, box, format)
result = convert_listlike(arg, format)
else:
result = convert_listlike(np.array([arg]), box, format)[0]
result = convert_listlike(np.array([arg]), format)[0]

return result

Expand Down Expand Up @@ -813,7 +764,7 @@ def to_datetime(
}


def _assemble_from_unit_mappings(arg, errors, box, tz):
def _assemble_from_unit_mappings(arg, errors, tz):
"""
assemble the unit specified fields from the arg (DataFrame)
Return a Series for actual parsing
Expand All @@ -826,10 +777,6 @@ def _assemble_from_unit_mappings(arg, errors, box, tz):
- If 'raise', then invalid parsing will raise an exception
- If 'coerce', then invalid parsing will be set as NaT
- If 'ignore', then invalid parsing will return the input
box : boolean
- If True, return a DatetimeIndex
- If False, return an array
tz : None or 'utc'
Returns
Expand Down Expand Up @@ -904,8 +851,6 @@ def coerce(values):
"cannot assemble the datetimes [{value}]: "
"{error}".format(value=value, error=e)
)
if not box:
return values.values
return values


Expand Down
36 changes: 0 additions & 36 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,22 +921,6 @@ def test_iso_8601_strings_with_same_offset(self):
result = DatetimeIndex([ts_str] * 2)
tm.assert_index_equal(result, expected)

def test_iso_8601_strings_same_offset_no_box(self):
# GH 22446
data = ["2018-01-04 09:01:00+09:00", "2018-01-04 09:02:00+09:00"]

with tm.assert_produces_warning(FutureWarning):
result = pd.to_datetime(data, box=False)

expected = np.array(
[
datetime(2018, 1, 4, 9, 1, tzinfo=pytz.FixedOffset(540)),
datetime(2018, 1, 4, 9, 2, tzinfo=pytz.FixedOffset(540)),
],
dtype=object,
)
tm.assert_numpy_array_equal(result, expected)

def test_iso_8601_strings_with_different_offsets(self):
# GH 17697, 11736
ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT]
Expand Down Expand Up @@ -1024,16 +1008,6 @@ def test_timestamp_utc_true(self, ts, expected):
result = to_datetime(ts, utc=True)
assert result == expected

def test_to_datetime_box_deprecated(self):
expected = np.datetime64("2018-09-09")

# Deprecated - see GH24416
with tm.assert_produces_warning(FutureWarning):
pd.to_datetime(expected, box=False)

result = pd.to_datetime(expected).to_datetime64()
assert result == expected

@pytest.mark.parametrize("dt_str", ["00010101", "13000101", "30000101", "99990101"])
def test_to_datetime_with_format_out_of_bounds(self, dt_str):
# GH 9107
Expand Down Expand Up @@ -1345,16 +1319,6 @@ def test_dataframe_dtypes(self, cache):
with pytest.raises(ValueError):
to_datetime(df, cache=cache)

def test_dataframe_box_false(self):
# GH 23760
df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})

with tm.assert_produces_warning(FutureWarning):
result = pd.to_datetime(df, box=False)

expected = np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[ns]")
tm.assert_numpy_array_equal(result, expected)

def test_dataframe_utc_true(self):
# GH 23760
df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
Expand Down