Skip to content

Commit

Permalink
BUG: Retain timezone dtype with cut and qcut
Browse files Browse the repository at this point in the history
Add aditional test
  • Loading branch information
mroeschke committed Feb 25, 2018
1 parent e97be6f commit f180969
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,7 @@ Reshaping
- :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`)
- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`)
- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`)
- Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`)

Other
^^^^^
Expand Down
17 changes: 14 additions & 3 deletions pandas/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
is_categorical_dtype,
is_datetime64_dtype,
is_timedelta64_dtype,
is_datetime64tz_dtype,
_ensure_int64)

import pandas.core.algorithms as algos
Expand Down Expand Up @@ -284,7 +285,9 @@ def _coerce_to_type(x):
"""
dtype = None

if is_timedelta64_dtype(x):
if is_datetime64tz_dtype(x):
dtype = x.dtype
elif is_timedelta64_dtype(x):
x = to_timedelta(x)
dtype = np.timedelta64
elif is_datetime64_dtype(x):
Expand Down Expand Up @@ -333,7 +336,11 @@ def _format_labels(bins, precision, right=True,

closed = 'right' if right else 'left'

if is_datetime64_dtype(dtype):
if is_datetime64tz_dtype(dtype):
from functools import partial
formatter = partial(Timestamp, tz=dtype.tz)
adjust = lambda x: x - Timedelta('1ns')
elif is_datetime64_dtype(dtype):
formatter = Timestamp
adjust = lambda x: x - Timedelta('1ns')
elif is_timedelta64_dtype(dtype):
Expand Down Expand Up @@ -372,7 +379,11 @@ def _preprocess_for_cut(x):
series_index = x.index
name = x.name

x = np.asarray(x)
ndim = getattr(x, 'ndim', None)
if ndim is None:
x = np.asarray(x)
if x.ndim != 1:
raise ValueError("Input array must be 1 dimensional")

return x_is_series, series_index, name, x

Expand Down
36 changes: 35 additions & 1 deletion pandas/tests/reshape/test_tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
from pandas.compat import zip

from pandas import (Series, isna, to_datetime, DatetimeIndex,
from pandas import (DataFrame, Series, isna, to_datetime, DatetimeIndex,
Timestamp, Interval, IntervalIndex, Categorical,
cut, qcut, date_range, NaT, TimedeltaIndex)
from pandas.tseries.offsets import Nano, Day
Expand Down Expand Up @@ -104,6 +104,12 @@ def test_cut_corner(self):

pytest.raises(ValueError, cut, [1, 2, 3], 0.5)

@pytest.mark.parametrize('arg', [2, np.eye(2), DataFrame(np.eye(2))])
@pytest.mark.parametrize('cut_func', [cut, qcut])
def test_cut_not_1d_arg(self, arg, cut_func):
with pytest.raises(ValueError):
cut_func(arg, 2)

def test_cut_out_of_range_more(self):
# #1511
s = Series([0, -1, 0, 1, -3], name='x')
Expand Down Expand Up @@ -488,6 +494,34 @@ def test_datetime_cut(self):
result, bins = cut(data, 3, retbins=True)
tm.assert_series_equal(Series(result), expected)

def test_datetimetz_cut(self):
# GH 19872
tz = 'US/Eastern'
s = Series(date_range('20130101', periods=3, tz=tz))
result = cut(s, 3)
expected = (
Series(IntervalIndex([
Interval(Timestamp('2012-12-31 23:57:07.200000', tz=tz),
Timestamp('2013-01-01 16:00:00', tz=tz)),
Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
Timestamp('2013-01-02 08:00:00', tz=tz)),
Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
Timestamp('2013-01-03 00:00:00', tz=tz))]))
.astype(CDT(ordered=True)))
tm.assert_series_equal(result, expected)

result = qcut(s, 3)
expected = (
Series(IntervalIndex([
Interval(Timestamp('2012-12-31 23:59:59.999999999', tz=tz),
Timestamp('2013-01-01 16:00:00', tz=tz)),
Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
Timestamp('2013-01-02 08:00:00', tz=tz)),
Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
Timestamp('2013-01-03 00:00:00', tz=tz))]))
.astype(CDT(ordered=True)))
tm.assert_series_equal(result, expected)

def test_datetime_bin(self):
data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')]
bin_data = ['2012-12-12', '2012-12-14', '2012-12-16']
Expand Down

0 comments on commit f180969

Please sign in to comment.